1481 files changed, 116028 insertions, 53932 deletions
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index d0aad06b3872..f245bf35bedb 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -145,6 +145,7 @@ static const struct acpi_device_id acpi_apd_device_ids[] = {
 	{ "AMD0010", APD_ADDR(cz_i2c_desc) },
 	{ "AMDI0010", APD_ADDR(cz_i2c_desc) },
 	{ "AMD0020", APD_ADDR(cz_uart_desc) },
+	{ "AMDI0020", APD_ADDR(cz_uart_desc) },
 	{ "AMD0030", },
 #endif
 #ifdef CONFIG_ARM64
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 2aee41655ce9..f2fd3fee588a 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -816,6 +816,7 @@ struct fwnode_handle *acpi_get_next_subnode(struct device *dev,
 			next = adev->node.next;
 			if (next == head) {
 				child = NULL;
+				adev = ACPI_COMPANION(dev);
 				goto nondev;
 			}
 			adev = list_entry(next, struct acpi_device, node);
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index d02fd53042a5..56241eb341f4 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -27,8 +27,20 @@
 
 #ifdef CONFIG_X86
 #define valid_IRQ(i) (((i) != 0) && ((i) != 2))
+static inline bool acpi_iospace_resource_valid(struct resource *res)
+{
+	/* On X86 IO space is limited to the [0 - 64K] IO port range */
+	return res->end < 0x10003;
+}
 #else
 #define valid_IRQ(i) (true)
+/*
+ * ACPI IO descriptors on arches other than X86 contain MMIO CPU physical
+ * addresses mapping IO space in CPU physical address space, IO space
+ * resources can be placed anywhere in the 64-bit physical address space.
+ */
+static inline bool
+acpi_iospace_resource_valid(struct resource *res) { return true; }
 #endif
 
 static bool acpi_dev_resource_len_valid(u64 start, u64 end, u64 len, bool io)
@@ -127,7 +139,7 @@ static void acpi_dev_ioresource_flags(struct resource *res, u64 len,
 	if (!acpi_dev_resource_len_valid(res->start, res->end, len, true))
 		res->flags |= IORESOURCE_DISABLED | IORESOURCE_UNSET;
 
-	if (res->end >= 0x10003)
+	if (!acpi_iospace_resource_valid(res))
 		res->flags |= IORESOURCE_DISABLED | IORESOURCE_UNSET;
 
 	if (io_decode == ACPI_DECODE_16)
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index fbfcce3b5227..2a8b59644297 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -748,6 +748,7 @@ static int acpi_hibernation_enter(void)
 
 static void acpi_hibernation_leave(void)
 {
+	pm_set_resume_via_firmware();
 	/*
 	 * If ACPI is not enabled by the BIOS and the boot kernel, we need to
 	 * enable it here.
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index f12a72428aac..050673f0c0b3 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -692,7 +692,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, int rev, u64 funcs)
 		mask = obj->integer.value;
 	else if (obj->type == ACPI_TYPE_BUFFER)
 		for (i = 0; i < obj->buffer.length && i < 8; i++)
-			mask |= (((u8)obj->buffer.pointer[i]) << (i * 8));
+			mask |= (((u64)obj->buffer.pointer[i]) << (i * 8));
 	ACPI_FREE(obj);
 
 	/*
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index 87b808374888..bdf28f7dd5e8 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -2,6 +2,7 @@
  * Coherent per-device memory handling.
  * Borrowed from i386
  */
+#include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -31,7 +32,10 @@ static bool dma_init_coherent_memory(
 	if (!size)
 		goto out;
 
-	mem_base = ioremap(phys_addr, size);
+	if (flags & DMA_MEMORY_MAP)
+		mem_base = memremap(phys_addr, size, MEMREMAP_WC);
+	else
+		mem_base = ioremap(phys_addr, size);
 	if (!mem_base)
 		goto out;
 
@@ -54,8 +58,12 @@ static bool dma_init_coherent_memory(
 
 out:
 	kfree(dma_mem);
-	if (mem_base)
-		iounmap(mem_base);
+	if (mem_base) {
+		if (flags & DMA_MEMORY_MAP)
+			memunmap(mem_base);
+		else
+			iounmap(mem_base);
+	}
 	return false;
 }
 
@@ -63,7 +71,11 @@ static void dma_release_coherent_memory(struct dma_coherent_mem *mem)
 {
 	if (!mem)
 		return;
-	iounmap(mem->virt_base);
+
+	if (mem->flags & DMA_MEMORY_MAP)
+		memunmap(mem->virt_base);
+	else
+		iounmap(mem->virt_base);
 	kfree(mem->bitmap);
 	kfree(mem);
 }
@@ -175,7 +187,10 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
 	 */
 	*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
 	*ret = mem->virt_base + (pageno << PAGE_SHIFT);
-	memset(*ret, 0, size);
+	if (mem->flags & DMA_MEMORY_MAP)
+		memset(*ret, 0, size);
+	else
+		memset_io(*ret, 0, size);
 	spin_unlock_irqrestore(&mem->spinlock, flags);
 
 	return 1;
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 272a52ebafc0..0e64a1b5e62a 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -137,6 +137,62 @@ int pm_clk_add_clk(struct device *dev, struct clk *clk)
 	return __pm_clk_add(dev, NULL, clk);
 }
 
+
+/**
+ * of_pm_clk_add_clks - Start using device clock(s) for power management.
+ * @dev: Device whose clock(s) is going to be used for power management.
+ *
+ * Add a series of clocks described in the 'clocks' device-tree node for
+ * a device to the list of clocks used for the power management of @dev.
+ * On success, returns the number of clocks added. Returns a negative
+ * error code if there are no clocks in the device node for the device
+ * or if adding a clock fails.
+ */
+int of_pm_clk_add_clks(struct device *dev)
+{
+	struct clk **clks;
+	unsigned int i, count;
+	int ret;
+
+	if (!dev || !dev->of_node)
+		return -EINVAL;
+
+	count = of_count_phandle_with_args(dev->of_node, "clocks",
+					   "#clock-cells");
+	if (count == 0)
+		return -ENODEV;
+
+	clks = kcalloc(count, sizeof(*clks), GFP_KERNEL);
+	if (!clks)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		clks[i] = of_clk_get(dev->of_node, i);
+		if (IS_ERR(clks[i])) {
+			ret = PTR_ERR(clks[i]);
+			goto error;
+		}
+
+		ret = pm_clk_add_clk(dev, clks[i]);
+		if (ret) {
+			clk_put(clks[i]);
+			goto error;
+		}
+	}
+
+	kfree(clks);
+
+	return i;
+
+error:
+	while (i--)
+		pm_clk_remove_clk(dev, clks[i]);
+
+	kfree(clks);
+
+	return ret;
+}
+
 /**
  * __pm_clk_remove - Destroy PM clock entry.
  * @ce: PM clock entry to destroy.
@@ -198,6 +254,39 @@ void pm_clk_remove(struct device *dev, const char *con_id)
 }
 
 /**
+ * pm_clk_remove_clk - Stop using a device clock for power management.
+ * @dev: Device whose clock should not be used for PM any more.
+ * @clk: Clock pointer
+ *
+ * Remove the clock pointed to by @clk from the list of clocks used for
+ * the power management of @dev.
+ */
+void pm_clk_remove_clk(struct device *dev, struct clk *clk)
+{
+	struct pm_subsys_data *psd = dev_to_psd(dev);
+	struct pm_clock_entry *ce;
+
+	if (!psd || !clk)
+		return;
+
+	spin_lock_irq(&psd->lock);
+
+	list_for_each_entry(ce, &psd->clock_list, node) {
+		if (clk == ce->clk)
+			goto remove;
+	}
+
+	spin_unlock_irq(&psd->lock);
+	return;
+
+ remove:
+	list_del(&ce->node);
+	spin_unlock_irq(&psd->lock);
+
+	__pm_clk_remove(ce);
+}
+
+/**
  * pm_clk_init - Initialize a device's list of power management clocks.
  * @dev: Device to initialize the list of PM clocks for.
  *
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index cc2e71d0a77f..25824c1697c5 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2051,7 +2051,7 @@ static int exec_drive_taskfile(struct driver_data *dd,
 					 outbuf,
 					 taskout,
 					 DMA_TO_DEVICE);
-		if (outbuf_dma == 0) {
+		if (pci_dma_mapping_error(dd->pdev, outbuf_dma)) {
 			err = -ENOMEM;
 			goto abort;
 		}
@@ -2068,7 +2068,7 @@ static int exec_drive_taskfile(struct driver_data *dd,
 		inbuf_dma = pci_map_single(dd->pdev,
 					 inbuf,
 					 taskin, DMA_FROM_DEVICE);
-		if (inbuf_dma == 0) {
+		if (pci_dma_mapping_error(dd->pdev, inbuf_dma)) {
 			err = -ENOMEM;
 			goto abort;
 		}
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 64a7b5971b57..cab97593ba54 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -742,10 +742,11 @@ static int null_add_dev(void)
 
 	add_disk(disk);
 
+done:
 	mutex_lock(&lock);
 	list_add_tail(&nullb->list, &nullb_list);
 	mutex_unlock(&lock);
-done:
+
 	return 0;
 
 out_cleanup_lightnvm:
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4a876785b68c..9c6234428607 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1847,14 +1847,12 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
 	if (osd_req->r_result < 0)
 		obj_request->result = osd_req->r_result;
 
-	rbd_assert(osd_req->r_num_ops <= CEPH_OSD_MAX_OP);
-
 	/*
 	 * We support a 64-bit length, but ultimately it has to be
 	 * passed to the block layer, which just supports a 32-bit
 	 * length field.
 	 */
-	obj_request->xferred = osd_req->r_reply_op_len[0];
+	obj_request->xferred = osd_req->r_ops[0].outdata_len;
 	rbd_assert(obj_request->xferred < (u64)UINT_MAX);
 
 	opcode = osd_req->r_ops[0].op;
@@ -5643,18 +5641,12 @@ static void rbd_sysfs_cleanup(void)
 static int rbd_slab_init(void)
 {
 	rbd_assert(!rbd_img_request_cache);
-	rbd_img_request_cache = kmem_cache_create("rbd_img_request",
-					sizeof (struct rbd_img_request),
-					__alignof__(struct rbd_img_request),
-					0, NULL);
+	rbd_img_request_cache = KMEM_CACHE(rbd_img_request, 0);
 	if (!rbd_img_request_cache)
 		return -ENOMEM;
 
 	rbd_assert(!rbd_obj_request_cache);
-	rbd_obj_request_cache = kmem_cache_create("rbd_obj_request",
-					sizeof (struct rbd_obj_request),
-					__alignof__(struct rbd_obj_request),
-					0, NULL);
+	rbd_obj_request_cache = KMEM_CACHE(rbd_obj_request, 0);
 	if (!rbd_obj_request_cache)
 		goto out_err;
 
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 9a92c072a485..d4a3a3133da5 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -34,15 +34,15 @@ config ARM_CCI400_PORT_CTRL
 	  Low level power management driver for CCI400 cache coherent
 	  interconnect for ARM platforms.
 
-config ARM_CCI500_PMU
-	bool "ARM CCI500 PMU support"
+config ARM_CCI5xx_PMU
+	bool "ARM CCI-500/CCI-550 PMU support"
 	depends on (ARM && CPU_V7) || ARM64
 	depends on PERF_EVENTS
 	select ARM_CCI_PMU
 	help
-	  Support for PMU events monitoring on the ARM CCI-500 cache coherent
-	  interconnect. CCI-500 provides 8 independent event counters, which
-	  can count events pertaining to the slave/master interfaces as well
+	  Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache
+	  coherent interconnects. Both of them provide 8 independent event counters,
+	  which can count events pertaining to the slave/master interfaces as well
 	  as the internal events to the CCI.
 
 	  If unsure, say Y
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 577cc4bf6a9d..a49b28378d59 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -52,8 +52,9 @@ static const struct of_device_id arm_cci_matches[] = {
 #ifdef CONFIG_ARM_CCI400_COMMON
 	{.compatible = "arm,cci-400", .data = CCI400_PORTS_DATA },
 #endif
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
 	{ .compatible = "arm,cci-500", },
+	{ .compatible = "arm,cci-550", },
 #endif
 	{},
 };
@@ -92,7 +93,7 @@ static const struct of_device_id arm_cci_matches[] = {
 enum {
 	CCI_IF_SLAVE,
 	CCI_IF_MASTER,
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
 	CCI_IF_GLOBAL,
 #endif
 	CCI_IF_MAX,
@@ -121,13 +122,12 @@ struct cci_pmu_model {
 	u32 fixed_hw_cntrs;
 	u32 num_hw_cntrs;
 	u32 cntr_size;
-	u64 nformat_attrs;
-	u64 nevent_attrs;
-	struct dev_ext_attribute *format_attrs;
-	struct dev_ext_attribute *event_attrs;
+	struct attribute **format_attrs;
+	struct attribute **event_attrs;
 	struct event_range event_ranges[CCI_IF_MAX];
 	int (*validate_hw_event)(struct cci_pmu *, unsigned long);
 	int (*get_event_idx)(struct cci_pmu *, struct cci_pmu_hw_events *, unsigned long);
+	void (*write_counters)(struct cci_pmu *, unsigned long *);
 };
 
 static struct cci_pmu_model cci_pmu_models[];
@@ -155,19 +155,24 @@ enum cci_models {
 	CCI400_R0,
 	CCI400_R1,
 #endif
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
 	CCI500_R0,
+	CCI550_R0,
 #endif
 	CCI_MODEL_MAX
 };
 
+static void pmu_write_counters(struct cci_pmu *cci_pmu,
+				 unsigned long *mask);
 static ssize_t cci_pmu_format_show(struct device *dev,
 			struct device_attribute *attr, char *buf);
 static ssize_t cci_pmu_event_show(struct device *dev,
 			struct device_attribute *attr, char *buf);
 
-#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \
-	{ __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config }
+#define CCI_EXT_ATTR_ENTRY(_name, _func, _config) 				\
+	&((struct dev_ext_attribute[]) {					\
+		{ __ATTR(_name, S_IRUGO, _func, NULL), (void *)_config }	\
+	})[0].attr.attr
 
 #define CCI_FORMAT_EXT_ATTR_ENTRY(_name, _config) \
 	CCI_EXT_ATTR_ENTRY(_name, cci_pmu_format_show, (char *)_config)
@@ -242,12 +247,13 @@ enum cci400_perf_events {
 static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
 			struct device_attribute *attr, char *buf);
 
-static struct dev_ext_attribute cci400_pmu_format_attrs[] = {
+static struct attribute *cci400_pmu_format_attrs[] = {
 	CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
 	CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-7"),
+	NULL
 };
 
-static struct dev_ext_attribute cci400_r0_pmu_event_attrs[] = {
+static struct attribute *cci400_r0_pmu_event_attrs[] = {
 	/* Slave events */
 	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
 	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
@@ -279,9 +285,10 @@ static struct dev_ext_attribute cci400_r0_pmu_event_attrs[] = {
 	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_stall_tt_full, 0x1A),
 	/* Special event for cycles counter */
 	CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
+	NULL
 };
 
-static struct dev_ext_attribute cci400_r1_pmu_event_attrs[] = {
+static struct attribute *cci400_r1_pmu_event_attrs[] = {
 	/* Slave events */
 	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_any, 0x0),
 	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_device, 0x01),
@@ -325,6 +332,7 @@ static struct dev_ext_attribute cci400_r1_pmu_event_attrs[] = {
 	CCI_EVENT_EXT_ATTR_ENTRY(mi_wrq_unique_or_line_unique_addr_hazard, 0x11),
 	/* Special event for cycles counter */
 	CCI400_CYCLE_EVENT_EXT_ATTR_ENTRY(cycles, 0xff),
+	NULL
 };
 
 static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
@@ -420,72 +428,68 @@ static inline struct cci_pmu_model *probe_cci_model(struct platform_device *pdev
 }
 #endif	/* CONFIG_ARM_CCI400_PMU */
 
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
 
 /*
- * CCI500 provides 8 independent event counters that can count
- * any of the events available.
- *
- * CCI500 PMU event id is an 9-bit value made of two parts.
+ * CCI5xx PMU event id is an 9-bit value made of two parts.
  *	 bits [8:5] - Source for the event
- *		      0x0-0x6 - Slave interfaces
- *		      0x8-0xD - Master interfaces
- *		      0xf     - Global Events
- *		      0x7,0xe - Reserved
- *
  *	 bits [4:0] - Event code (specific to type of interface)
+ *
+ *
  */
 
 /* Port ids */
-#define CCI500_PORT_S0			0x0
-#define CCI500_PORT_S1			0x1
-#define CCI500_PORT_S2			0x2
-#define CCI500_PORT_S3			0x3
-#define CCI500_PORT_S4			0x4
-#define CCI500_PORT_S5			0x5
-#define CCI500_PORT_S6			0x6
-
-#define CCI500_PORT_M0			0x8
-#define CCI500_PORT_M1			0x9
-#define CCI500_PORT_M2			0xa
-#define CCI500_PORT_M3			0xb
-#define CCI500_PORT_M4			0xc
-#define CCI500_PORT_M5			0xd
-
-#define CCI500_PORT_GLOBAL 		0xf
-
-#define CCI500_PMU_EVENT_MASK		0x1ffUL
-#define CCI500_PMU_EVENT_SOURCE_SHIFT	0x5
-#define CCI500_PMU_EVENT_SOURCE_MASK	0xf
-#define CCI500_PMU_EVENT_CODE_SHIFT	0x0
-#define CCI500_PMU_EVENT_CODE_MASK	0x1f
-
-#define CCI500_PMU_EVENT_SOURCE(event)	\
-	((event >> CCI500_PMU_EVENT_SOURCE_SHIFT) & CCI500_PMU_EVENT_SOURCE_MASK)
-#define CCI500_PMU_EVENT_CODE(event)	\
-	((event >> CCI500_PMU_EVENT_CODE_SHIFT) & CCI500_PMU_EVENT_CODE_MASK)
-
-#define CCI500_SLAVE_PORT_MIN_EV	0x00
-#define CCI500_SLAVE_PORT_MAX_EV	0x1f
-#define CCI500_MASTER_PORT_MIN_EV	0x00
-#define CCI500_MASTER_PORT_MAX_EV	0x06
-#define CCI500_GLOBAL_PORT_MIN_EV	0x00
-#define CCI500_GLOBAL_PORT_MAX_EV	0x0f
-
-
-#define CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \
-	CCI_EXT_ATTR_ENTRY(_name, cci500_pmu_global_event_show, \
+#define CCI5xx_PORT_S0			0x0
+#define CCI5xx_PORT_S1			0x1
+#define CCI5xx_PORT_S2			0x2
+#define CCI5xx_PORT_S3			0x3
+#define CCI5xx_PORT_S4			0x4
+#define CCI5xx_PORT_S5			0x5
+#define CCI5xx_PORT_S6			0x6
+
+#define CCI5xx_PORT_M0			0x8
+#define CCI5xx_PORT_M1			0x9
+#define CCI5xx_PORT_M2			0xa
+#define CCI5xx_PORT_M3			0xb
+#define CCI5xx_PORT_M4			0xc
+#define CCI5xx_PORT_M5			0xd
+#define CCI5xx_PORT_M6			0xe
+
+#define CCI5xx_PORT_GLOBAL		0xf
+
+#define CCI5xx_PMU_EVENT_MASK		0x1ffUL
+#define CCI5xx_PMU_EVENT_SOURCE_SHIFT	0x5
+#define CCI5xx_PMU_EVENT_SOURCE_MASK	0xf
+#define CCI5xx_PMU_EVENT_CODE_SHIFT	0x0
+#define CCI5xx_PMU_EVENT_CODE_MASK	0x1f
+
+#define CCI5xx_PMU_EVENT_SOURCE(event)	\
+	((event >> CCI5xx_PMU_EVENT_SOURCE_SHIFT) & CCI5xx_PMU_EVENT_SOURCE_MASK)
+#define CCI5xx_PMU_EVENT_CODE(event)	\
+	((event >> CCI5xx_PMU_EVENT_CODE_SHIFT) & CCI5xx_PMU_EVENT_CODE_MASK)
+
+#define CCI5xx_SLAVE_PORT_MIN_EV	0x00
+#define CCI5xx_SLAVE_PORT_MAX_EV	0x1f
+#define CCI5xx_MASTER_PORT_MIN_EV	0x00
+#define CCI5xx_MASTER_PORT_MAX_EV	0x06
+#define CCI5xx_GLOBAL_PORT_MIN_EV	0x00
+#define CCI5xx_GLOBAL_PORT_MAX_EV	0x0f
+
+
+#define CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(_name, _config) \
+	CCI_EXT_ATTR_ENTRY(_name, cci5xx_pmu_global_event_show, \
 					(unsigned long) _config)
 
-static ssize_t cci500_pmu_global_event_show(struct device *dev,
+static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
 				struct device_attribute *attr, char *buf);
 
-static struct dev_ext_attribute cci500_pmu_format_attrs[] = {
+static struct attribute *cci5xx_pmu_format_attrs[] = {
 	CCI_FORMAT_EXT_ATTR_ENTRY(event, "config:0-4"),
 	CCI_FORMAT_EXT_ATTR_ENTRY(source, "config:5-8"),
+	NULL,
 };
 
-static struct dev_ext_attribute cci500_pmu_event_attrs[] = {
+static struct attribute *cci5xx_pmu_event_attrs[] = {
 	/* Slave events */
 	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_hs_arvalid, 0x0),
 	CCI_EVENT_EXT_ATTR_ENTRY(si_rrq_dev, 0x1),
@@ -530,63 +534,73 @@ static struct dev_ext_attribute cci500_pmu_event_attrs[] = {
 	CCI_EVENT_EXT_ATTR_ENTRY(mi_w_resp_stall, 0x6),
 
 	/* Global events */
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE),
-	CCI500_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_0_1, 0x0),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_2_3, 0x1),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_4_5, 0x2),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_filter_bank_6_7, 0x3),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_0_1, 0x4),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_2_3, 0x5),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_4_5, 0x6),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_access_miss_filter_bank_6_7, 0x7),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_back_invalidation, 0x8),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_alloc_busy, 0x9),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_stall_tt_full, 0xA),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
+	NULL
 };
 
-static ssize_t cci500_pmu_global_event_show(struct device *dev,
+static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	struct dev_ext_attribute *eattr = container_of(attr,
 					struct dev_ext_attribute, attr);
 	/* Global events have single fixed source code */
 	return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n",
-				(unsigned long)eattr->var, CCI500_PORT_GLOBAL);
+				(unsigned long)eattr->var, CCI5xx_PORT_GLOBAL);
 }
 
+/*
+ * CCI500 provides 8 independent event counters that can count
+ * any of the events available.
+ * CCI500 PMU event source ids
+ *	0x0-0x6 - Slave interfaces
+ *	0x8-0xD - Master interfaces
+ *	0xf     - Global Events
+ *	0x7,0xe - Reserved
+ */
 static int cci500_validate_hw_event(struct cci_pmu *cci_pmu,
 					unsigned long hw_event)
 {
-	u32 ev_source = CCI500_PMU_EVENT_SOURCE(hw_event);
-	u32 ev_code = CCI500_PMU_EVENT_CODE(hw_event);
+	u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
+	u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
 	int if_type;
 
-	if (hw_event & ~CCI500_PMU_EVENT_MASK)
+	if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
 		return -ENOENT;
 
 	switch (ev_source) {
-	case CCI500_PORT_S0:
-	case CCI500_PORT_S1:
-	case CCI500_PORT_S2:
-	case CCI500_PORT_S3:
-	case CCI500_PORT_S4:
-	case CCI500_PORT_S5:
-	case CCI500_PORT_S6:
+	case CCI5xx_PORT_S0:
+	case CCI5xx_PORT_S1:
+	case CCI5xx_PORT_S2:
+	case CCI5xx_PORT_S3:
+	case CCI5xx_PORT_S4:
+	case CCI5xx_PORT_S5:
+	case CCI5xx_PORT_S6:
 		if_type = CCI_IF_SLAVE;
 		break;
-	case CCI500_PORT_M0:
-	case CCI500_PORT_M1:
-	case CCI500_PORT_M2:
-	case CCI500_PORT_M3:
-	case CCI500_PORT_M4:
-	case CCI500_PORT_M5:
+	case CCI5xx_PORT_M0:
+	case CCI5xx_PORT_M1:
+	case CCI5xx_PORT_M2:
+	case CCI5xx_PORT_M3:
+	case CCI5xx_PORT_M4:
+	case CCI5xx_PORT_M5:
 		if_type = CCI_IF_MASTER;
 		break;
-	case CCI500_PORT_GLOBAL:
+	case CCI5xx_PORT_GLOBAL:
 		if_type = CCI_IF_GLOBAL;
 		break;
 	default:
@@ -599,7 +613,118 @@ static int cci500_validate_hw_event(struct cci_pmu *cci_pmu,
 
 	return -ENOENT;
 }
-#endif	/* CONFIG_ARM_CCI500_PMU */
+
+/*
+ * CCI550 provides 8 independent event counters that can count
+ * any of the events available.
+ * CCI550 PMU event source ids
+ *	0x0-0x6 - Slave interfaces
+ *	0x8-0xe - Master interfaces
+ *	0xf     - Global Events
+ *	0x7	- Reserved
+ */
+static int cci550_validate_hw_event(struct cci_pmu *cci_pmu,
+					unsigned long hw_event)
+{
+	u32 ev_source = CCI5xx_PMU_EVENT_SOURCE(hw_event);
+	u32 ev_code = CCI5xx_PMU_EVENT_CODE(hw_event);
+	int if_type;
+
+	if (hw_event & ~CCI5xx_PMU_EVENT_MASK)
+		return -ENOENT;
+
+	switch (ev_source) {
+	case CCI5xx_PORT_S0:
+	case CCI5xx_PORT_S1:
+	case CCI5xx_PORT_S2:
+	case CCI5xx_PORT_S3:
+	case CCI5xx_PORT_S4:
+	case CCI5xx_PORT_S5:
+	case CCI5xx_PORT_S6:
+		if_type = CCI_IF_SLAVE;
+		break;
+	case CCI5xx_PORT_M0:
+	case CCI5xx_PORT_M1:
+	case CCI5xx_PORT_M2:
+	case CCI5xx_PORT_M3:
+	case CCI5xx_PORT_M4:
+	case CCI5xx_PORT_M5:
+	case CCI5xx_PORT_M6:
+		if_type = CCI_IF_MASTER;
+		break;
+	case CCI5xx_PORT_GLOBAL:
+		if_type = CCI_IF_GLOBAL;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	if (ev_code >= cci_pmu->model->event_ranges[if_type].min &&
+		ev_code <= cci_pmu->model->event_ranges[if_type].max)
+		return hw_event;
+
+	return -ENOENT;
+}
+
+#endif	/* CONFIG_ARM_CCI5xx_PMU */
+
+/*
+ * Program the CCI PMU counters which have PERF_HES_ARCH set
+ * with the event period and mark them ready before we enable
+ * PMU.
+ */
+static void cci_pmu_sync_counters(struct cci_pmu *cci_pmu)
+{
+	int i;
+	struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
+
+	DECLARE_BITMAP(mask, cci_pmu->num_cntrs);
+
+	bitmap_zero(mask, cci_pmu->num_cntrs);
+	for_each_set_bit(i, cci_pmu->hw_events.used_mask, cci_pmu->num_cntrs) {
+		struct perf_event *event = cci_hw->events[i];
+
+		if (WARN_ON(!event))
+			continue;
+
+		/* Leave the events which are not counting */
+		if (event->hw.state & PERF_HES_STOPPED)
+			continue;
+		if (event->hw.state & PERF_HES_ARCH) {
+			set_bit(i, mask);
+			event->hw.state &= ~PERF_HES_ARCH;
+		}
+	}
+
+	pmu_write_counters(cci_pmu, mask);
+}
+
+/* Should be called with cci_pmu->hw_events->pmu_lock held */
+static void __cci_pmu_enable_nosync(struct cci_pmu *cci_pmu)
+{
+	u32 val;
+
+	/* Enable all the PMU counters. */
+	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI_PMCR);
+}
+
+/* Should be called with cci_pmu->hw_events->pmu_lock held */
+static void __cci_pmu_enable_sync(struct cci_pmu *cci_pmu)
+{
+	cci_pmu_sync_counters(cci_pmu);
+	__cci_pmu_enable_nosync(cci_pmu);
+}
+
+/* Should be called with cci_pmu->hw_events->pmu_lock held */
+static void __cci_pmu_disable(void)
+{
+	u32 val;
+
+	/* Disable all the PMU counters. */
+	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
+	writel(val, cci_ctrl_base + CCI_PMCR);
+}
 
 static ssize_t cci_pmu_format_show(struct device *dev,
 			struct device_attribute *attr, char *buf)
@@ -633,8 +758,8 @@ static u32 pmu_read_register(struct cci_pmu *cci_pmu, int idx, unsigned int offs
 static void pmu_write_register(struct cci_pmu *cci_pmu, u32 value,
 			       int idx, unsigned int offset)
 {
-	return writel_relaxed(value, cci_pmu->base +
-			      CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
+	writel_relaxed(value, cci_pmu->base +
+		       CCI_PMU_CNTR_BASE(cci_pmu->model, idx) + offset);
 }
 
 static void pmu_disable_counter(struct cci_pmu *cci_pmu, int idx)
@@ -647,12 +772,56 @@ static void pmu_enable_counter(struct cci_pmu *cci_pmu, int idx)
 	pmu_write_register(cci_pmu, 1, idx, CCI_PMU_CNTR_CTRL);
 }
 
+static bool __maybe_unused
+pmu_counter_is_enabled(struct cci_pmu *cci_pmu, int idx)
+{
+	return (pmu_read_register(cci_pmu, idx, CCI_PMU_CNTR_CTRL) & 0x1) != 0;
+}
+
 static void pmu_set_event(struct cci_pmu *cci_pmu, int idx, unsigned long event)
 {
 	pmu_write_register(cci_pmu, event, idx, CCI_PMU_EVT_SEL);
 }
 
 /*
+ * For all counters on the CCI-PMU, disable any 'enabled' counters,
+ * saving the changed counters in the mask, so that we can restore
+ * it later using pmu_restore_counters. The mask is private to the
+ * caller. We cannot rely on the used_mask maintained by the CCI_PMU
+ * as it only tells us if the counter is assigned to perf_event or not.
+ * The state of the perf_event cannot be locked by the PMU layer, hence
+ * we check the individual counter status (which can be locked by
+ * cci_pm->hw_events->pmu_lock).
+ *
+ * @mask should be initialised to empty by the caller.
+ */
+static void __maybe_unused
+pmu_save_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+	int i;
+
+	for (i = 0; i < cci_pmu->num_cntrs; i++) {
+		if (pmu_counter_is_enabled(cci_pmu, i)) {
+			set_bit(i, mask);
+			pmu_disable_counter(cci_pmu, i);
+		}
+	}
+}
+
+/*
+ * Restore the status of the counters. Reversal of the pmu_save_counters().
+ * For each counter set in the mask, enable the counter back.
+ */
+static void __maybe_unused
+pmu_restore_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+	int i;
+
+	for_each_set_bit(i, mask, cci_pmu->num_cntrs)
+		pmu_enable_counter(cci_pmu, i);
+}
+
+/*
  * Returns the number of programmable counters actually implemented
  * by the cci
  */
@@ -754,18 +923,98 @@ static u32 pmu_read_counter(struct perf_event *event)
 	return value;
 }
 
-static void pmu_write_counter(struct perf_event *event, u32 value)
+static void pmu_write_counter(struct cci_pmu *cci_pmu, u32 value, int idx)
 {
-	struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
-	struct hw_perf_event *hw_counter = &event->hw;
-	int idx = hw_counter->idx;
+	pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
+}
 
-	if (unlikely(!pmu_is_valid_counter(cci_pmu, idx)))
-		dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+static void __pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+	int i;
+	struct cci_pmu_hw_events *cci_hw = &cci_pmu->hw_events;
+
+	for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
+		struct perf_event *event = cci_hw->events[i];
+
+		if (WARN_ON(!event))
+			continue;
+		pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i);
+	}
+}
+
+static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+	if (cci_pmu->model->write_counters)
+		cci_pmu->model->write_counters(cci_pmu, mask);
 	else
-		pmu_write_register(cci_pmu, value, idx, CCI_PMU_CNTR);
+		__pmu_write_counters(cci_pmu, mask);
 }
 
+#ifdef CONFIG_ARM_CCI5xx_PMU
+
+/*
+ * CCI-500/CCI-550 has advanced power saving policies, which could gate the
+ * clocks to the PMU counters, which makes the writes to them ineffective.
+ * The only way to write to those counters is when the global counters
+ * are enabled and the particular counter is enabled.
+ *
+ * So we do the following :
+ *
+ * 1) Disable all the PMU counters, saving their current state
+ * 2) Enable the global PMU profiling, now that all counters are
+ *    disabled.
+ *
+ * For each counter to be programmed, repeat steps 3-7:
+ *
+ * 3) Write an invalid event code to the event control register for the
+      counter, so that the counters are not modified.
+ * 4) Enable the counter control for the counter.
+ * 5) Set the counter value
+ * 6) Disable the counter
+ * 7) Restore the event in the target counter
+ *
+ * 8) Disable the global PMU.
+ * 9) Restore the status of the rest of the counters.
+ *
+ * We choose an event which for CCI-5xx is guaranteed not to count.
+ * We use the highest possible event code (0x1f) for the master interface 0.
+ */
+#define CCI5xx_INVALID_EVENT	((CCI5xx_PORT_M0 << CCI5xx_PMU_EVENT_SOURCE_SHIFT) | \
+				 (CCI5xx_PMU_EVENT_CODE_MASK << CCI5xx_PMU_EVENT_CODE_SHIFT))
+static void cci5xx_pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask)
+{
+	int i;
+	DECLARE_BITMAP(saved_mask, cci_pmu->num_cntrs);
+
+	bitmap_zero(saved_mask, cci_pmu->num_cntrs);
+	pmu_save_counters(cci_pmu, saved_mask);
+
+	/*
+	 * Now that all the counters are disabled, we can safely turn the PMU on,
+	 * without syncing the status of the counters
+	 */
+	__cci_pmu_enable_nosync(cci_pmu);
+
+	for_each_set_bit(i, mask, cci_pmu->num_cntrs) {
+		struct perf_event *event = cci_pmu->hw_events.events[i];
+
+		if (WARN_ON(!event))
+			continue;
+
+		pmu_set_event(cci_pmu, i, CCI5xx_INVALID_EVENT);
+		pmu_enable_counter(cci_pmu, i);
+		pmu_write_counter(cci_pmu, local64_read(&event->hw.prev_count), i);
+		pmu_disable_counter(cci_pmu, i);
+		pmu_set_event(cci_pmu, i, event->hw.config_base);
+	}
+
+	__cci_pmu_disable();
+
+	pmu_restore_counters(cci_pmu, saved_mask);
+}
+
+#endif	/* CONFIG_ARM_CCI5xx_PMU */
+
 static u64 pmu_event_update(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -789,7 +1038,7 @@ static void pmu_read(struct perf_event *event)
 	pmu_event_update(event);
 }
 
-void pmu_event_set_period(struct perf_event *event)
+static void pmu_event_set_period(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	/*
@@ -800,7 +1049,14 @@ void pmu_event_set_period(struct perf_event *event)
 	 */
 	u64 val = 1ULL << 31;
 	local64_set(&hwc->prev_count, val);
-	pmu_write_counter(event, val);
+
+	/*
+	 * CCI PMU uses PERF_HES_ARCH to keep track of the counters, whose
+	 * values needs to be sync-ed with the s/w state before the PMU is
+	 * enabled.
+	 * Mark this counter for sync.
+	 */
+	hwc->state |= PERF_HES_ARCH;
 }
 
 static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
@@ -811,6 +1067,9 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
 	int idx, handled = IRQ_NONE;
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable the PMU while we walk through the counters */
+	__cci_pmu_disable();
 	/*
 	 * Iterate over counters and update the corresponding perf events.
 	 * This should work regardless of whether we have per-counter overflow
@@ -818,13 +1077,10 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
 	 */
 	for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++) {
 		struct perf_event *event = events->events[idx];
-		struct hw_perf_event *hw_counter;
 
 		if (!event)
 			continue;
 
-		hw_counter = &event->hw;
-
 		/* Did this counter overflow? */
 		if (!(pmu_read_register(cci_pmu, idx, CCI_PMU_OVRFLW) &
 		      CCI_PMU_OVRFLW_FLAG))
@@ -837,6 +1093,9 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
 		pmu_event_set_period(event);
 		handled = IRQ_HANDLED;
 	}
+
+	/* Enable the PMU and sync possibly overflowed counters */
+	__cci_pmu_enable_sync(cci_pmu);
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 
 	return IRQ_RETVAL(handled);
@@ -875,16 +1134,12 @@ static void cci_pmu_enable(struct pmu *pmu)
 	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
 	int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_cntrs);
 	unsigned long flags;
-	u32 val;
 
 	if (!enabled)
 		return;
 
 	raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
-
-	/* Enable all the PMU counters. */
-	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
-	writel(val, cci_ctrl_base + CCI_PMCR);
+	__cci_pmu_enable_sync(cci_pmu);
 	raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
 
 }
@@ -894,13 +1149,9 @@ static void cci_pmu_disable(struct pmu *pmu)
 	struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
 	struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
 	unsigned long flags;
-	u32 val;
 
 	raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
-
-	/* Disable all the PMU counters. */
-	val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
-	writel(val, cci_ctrl_base + CCI_PMCR);
+	__cci_pmu_disable();
 	raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
 }
 
@@ -1176,9 +1427,8 @@ static int cci_pmu_event_init(struct perf_event *event)
 static ssize_t pmu_cpumask_attr_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
-	struct dev_ext_attribute *eattr = container_of(attr,
-					struct dev_ext_attribute, attr);
-	struct cci_pmu *cci_pmu = eattr->var;
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
 
 	int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
 			  cpumask_pr_args(&cci_pmu->cpus));
@@ -1187,13 +1437,11 @@ static ssize_t pmu_cpumask_attr_show(struct device *dev,
 	return n;
 }
 
-static struct dev_ext_attribute pmu_cpumask_attr = {
-	__ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL),
-	NULL,		/* Populated in cci_pmu_init */
-};
+static struct device_attribute pmu_cpumask_attr =
+	__ATTR(cpumask, S_IRUGO, pmu_cpumask_attr_show, NULL);
 
 static struct attribute *pmu_attrs[] = {
-	&pmu_cpumask_attr.attr.attr,
+	&pmu_cpumask_attr.attr,
 	NULL,
 };
 
@@ -1218,60 +1466,14 @@ static const struct attribute_group *pmu_attr_groups[] = {
 	NULL
 };
 
-static struct attribute **alloc_attrs(struct platform_device *pdev,
-				int n, struct dev_ext_attribute *source)
-{
-	int i;
-	struct attribute **attrs;
-
-	/* Alloc n + 1 (for terminating NULL) */
-	attrs  = devm_kcalloc(&pdev->dev, n + 1, sizeof(struct attribute *),
-								GFP_KERNEL);
-	if (!attrs)
-		return attrs;
-	for(i = 0; i < n; i++)
-		attrs[i] = &source[i].attr.attr;
-	return attrs;
-}
-
-static int cci_pmu_init_attrs(struct cci_pmu *cci_pmu, struct platform_device *pdev)
-{
-	const struct cci_pmu_model *model = cci_pmu->model;
-	struct attribute **attrs;
-
-	/*
-	 * All allocations below are managed, hence doesn't need to be
-	 * free'd explicitly in case of an error.
-	 */
-
-	if (model->nevent_attrs) {
-		attrs = alloc_attrs(pdev, model->nevent_attrs,
-						model->event_attrs);
-		if (!attrs)
-			return -ENOMEM;
-		pmu_event_attr_group.attrs = attrs;
-	}
-	if (model->nformat_attrs) {
-		attrs = alloc_attrs(pdev, model->nformat_attrs,
-						 model->format_attrs);
-		if (!attrs)
-			return -ENOMEM;
-		pmu_format_attr_group.attrs = attrs;
-	}
-	pmu_cpumask_attr.var = cci_pmu;
-
-	return 0;
-}
-
 static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
 {
-	char *name = cci_pmu->model->name;
+	const struct cci_pmu_model *model = cci_pmu->model;
+	char *name = model->name;
 	u32 num_cntrs;
-	int rc;
 
-	rc = cci_pmu_init_attrs(cci_pmu, pdev);
-	if (rc)
-		return rc;
+	pmu_event_attr_group.attrs = model->event_attrs;
+	pmu_format_attr_group.attrs = model->format_attrs;
 
 	cci_pmu->pmu = (struct pmu) {
 		.name		= cci_pmu->model->name,
@@ -1314,7 +1516,7 @@ static int cci_pmu_cpu_notifier(struct notifier_block *self,
 		if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus))
 			break;
 		target = cpumask_any_but(cpu_online_mask, cpu);
-		if (target < 0) // UP, last CPU
+		if (target >= nr_cpu_ids) // UP, last CPU
 			break;
 		/*
 		 * TODO: migrate context once core races on event->ctx have
@@ -1336,9 +1538,7 @@ static struct cci_pmu_model cci_pmu_models[] = {
 		.num_hw_cntrs = 4,
 		.cntr_size = SZ_4K,
 		.format_attrs = cci400_pmu_format_attrs,
-		.nformat_attrs = ARRAY_SIZE(cci400_pmu_format_attrs),
 		.event_attrs = cci400_r0_pmu_event_attrs,
-		.nevent_attrs = ARRAY_SIZE(cci400_r0_pmu_event_attrs),
 		.event_ranges = {
 			[CCI_IF_SLAVE] = {
 				CCI400_R0_SLAVE_PORT_MIN_EV,
@@ -1358,9 +1558,7 @@ static struct cci_pmu_model cci_pmu_models[] = {
 		.num_hw_cntrs = 4,
 		.cntr_size = SZ_4K,
 		.format_attrs = cci400_pmu_format_attrs,
-		.nformat_attrs = ARRAY_SIZE(cci400_pmu_format_attrs),
 		.event_attrs = cci400_r1_pmu_event_attrs,
-		.nevent_attrs = ARRAY_SIZE(cci400_r1_pmu_event_attrs),
 		.event_ranges = {
 			[CCI_IF_SLAVE] = {
 				CCI400_R1_SLAVE_PORT_MIN_EV,
@@ -1375,31 +1573,54 @@ static struct cci_pmu_model cci_pmu_models[] = {
 		.get_event_idx = cci400_get_event_idx,
 	},
 #endif
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
 	[CCI500_R0] = {
 		.name = "CCI_500",
 		.fixed_hw_cntrs = 0,
 		.num_hw_cntrs = 8,
 		.cntr_size = SZ_64K,
-		.format_attrs = cci500_pmu_format_attrs,
-		.nformat_attrs = ARRAY_SIZE(cci500_pmu_format_attrs),
-		.event_attrs = cci500_pmu_event_attrs,
-		.nevent_attrs = ARRAY_SIZE(cci500_pmu_event_attrs),
+		.format_attrs = cci5xx_pmu_format_attrs,
+		.event_attrs = cci5xx_pmu_event_attrs,
 		.event_ranges = {
 			[CCI_IF_SLAVE] = {
-				CCI500_SLAVE_PORT_MIN_EV,
-				CCI500_SLAVE_PORT_MAX_EV,
+				CCI5xx_SLAVE_PORT_MIN_EV,
+				CCI5xx_SLAVE_PORT_MAX_EV,
 			},
 			[CCI_IF_MASTER] = {
-				CCI500_MASTER_PORT_MIN_EV,
-				CCI500_MASTER_PORT_MAX_EV,
+				CCI5xx_MASTER_PORT_MIN_EV,
+				CCI5xx_MASTER_PORT_MAX_EV,
 			},
 			[CCI_IF_GLOBAL] = {
-				CCI500_GLOBAL_PORT_MIN_EV,
-				CCI500_GLOBAL_PORT_MAX_EV,
+				CCI5xx_GLOBAL_PORT_MIN_EV,
+				CCI5xx_GLOBAL_PORT_MAX_EV,
 			},
 		},
 		.validate_hw_event = cci500_validate_hw_event,
+		.write_counters	= cci5xx_pmu_write_counters,
+	},
+	[CCI550_R0] = {
+		.name = "CCI_550",
+		.fixed_hw_cntrs = 0,
+		.num_hw_cntrs = 8,
+		.cntr_size = SZ_64K,
+		.format_attrs = cci5xx_pmu_format_attrs,
+		.event_attrs = cci5xx_pmu_event_attrs,
+		.event_ranges = {
+			[CCI_IF_SLAVE] = {
+				CCI5xx_SLAVE_PORT_MIN_EV,
+				CCI5xx_SLAVE_PORT_MAX_EV,
+			},
+			[CCI_IF_MASTER] = {
+				CCI5xx_MASTER_PORT_MIN_EV,
+				CCI5xx_MASTER_PORT_MAX_EV,
+			},
+			[CCI_IF_GLOBAL] = {
+				CCI5xx_GLOBAL_PORT_MIN_EV,
+				CCI5xx_GLOBAL_PORT_MAX_EV,
+			},
+		},
+		.validate_hw_event = cci550_validate_hw_event,
+		.write_counters	= cci5xx_pmu_write_counters,
 	},
 #endif
 };
@@ -1419,11 +1640,15 @@ static const struct of_device_id arm_cci_pmu_matches[] = {
 		.data	= &cci_pmu_models[CCI400_R1],
 	},
 #endif
-#ifdef CONFIG_ARM_CCI500_PMU
+#ifdef CONFIG_ARM_CCI5xx_PMU
 	{
 		.compatible = "arm,cci-500-pmu,r0",
 		.data = &cci_pmu_models[CCI500_R0],
 	},
+	{
+		.compatible = "arm,cci-550-pmu,r0",
+		.data = &cci_pmu_models[CCI550_R0],
+	},
 #endif
 	{},
 };
diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c
index e98d15eaa799..1827fc4d15c1 100644
--- a/drivers/bus/imx-weim.c
+++ b/drivers/bus/imx-weim.c
@@ -150,7 +150,7 @@ static int __init weim_parse_dt(struct platform_device *pdev,
 			return ret;
 	}
 
-	for_each_child_of_node(pdev->dev.of_node, child) {
+	for_each_available_child_of_node(pdev->dev.of_node, child) {
 		if (!child->name)
 			continue;
 
diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c
index 25996e256110..795c9d9c96a6 100644
--- a/drivers/bus/sunxi-rsb.c
+++ b/drivers/bus/sunxi-rsb.c
@@ -330,7 +330,7 @@ static int sunxi_rsb_read(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr,
 		cmd = RSB_CMD_RD32;
 		break;
 	default:
-		dev_err(rsb->dev, "Invalid access width: %d\n", len);
+		dev_err(rsb->dev, "Invalid access width: %zd\n", len);
 		return -EINVAL;
 	}
 
@@ -372,7 +372,7 @@ static int sunxi_rsb_write(struct sunxi_rsb *rsb, u8 rtaddr, u8 addr,
 		cmd = RSB_CMD_WR32;
 		break;
 	default:
-		dev_err(rsb->dev, "Invalid access width: %d\n", len);
+		dev_err(rsb->dev, "Invalid access width: %zd\n", len);
 		return -EINVAL;
 	}
 
diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index 09f17eb73486..0f64d149c98d 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -156,7 +156,7 @@ static pgprot_t agp_convert_mmap_flags(int prot)
 {
 	unsigned long prot_bits;
 
-	prot_bits = calc_vm_prot_bits(prot) | VM_SHARED;
+	prot_bits = calc_vm_prot_bits(prot, 0) | VM_SHARED;
 	return vm_get_page_prot(prot_bits);
 }
 
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 1341a94cc779..aef87fdbd187 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -555,8 +555,10 @@ static unsigned int intel_gtt_mappable_entries(void)
 static void intel_gtt_teardown_scratch_page(void)
 {
 	set_pages_wb(intel_private.scratch_page, 1);
-	pci_unmap_page(intel_private.pcidev, intel_private.scratch_page_dma,
-		       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	if (intel_private.needs_dmar)
+		pci_unmap_page(intel_private.pcidev,
+			       intel_private.scratch_page_dma,
+			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 	__free_page(intel_private.scratch_page);
 }
 
@@ -1346,16 +1348,6 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev,
 {
 	int i, mask;
 
-	/*
-	 * Can be called from the fake agp driver but also directly from
-	 * drm/i915.ko. Hence we need to check whether everything is set up
-	 * already.
-	 */
-	if (intel_private.driver) {
-		intel_private.refcount++;
-		return 1;
-	}
-
 	for (i = 0; intel_gtt_chipsets[i].name != NULL; i++) {
 		if (gpu_pdev) {
 			if (gpu_pdev->device ==
@@ -1376,16 +1368,26 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev,
 	if (!intel_private.driver)
 		return 0;
 
-	intel_private.refcount++;
-
 #if IS_ENABLED(CONFIG_AGP_INTEL)
 	if (bridge) {
+		if (INTEL_GTT_GEN > 1)
+			return 0;
+
 		bridge->driver = &intel_fake_agp_driver;
 		bridge->dev_private_data = &intel_private;
 		bridge->dev = bridge_pdev;
 	}
 #endif
 
+
+	/*
+	 * Can be called from the fake agp driver but also directly from
+	 * drm/i915.ko. Hence we need to check whether everything is set up
+	 * already.
+	 */
+	if (intel_private.refcount++)
+		return 1;
+
 	intel_private.bridge_dev = pci_dev_get(bridge_pdev);
 
 	dev_info(&bridge_pdev->dev, "Intel %s Chipset\n", intel_gtt_chipsets[i].name);
@@ -1430,6 +1432,8 @@ void intel_gmch_remove(void)
 	if (--intel_private.refcount)
 		return;
 
+	if (intel_private.scratch_page)
+		intel_gtt_teardown_scratch_page();
 	if (intel_private.pcidev)
 		pci_dev_put(intel_private.pcidev);
 	if (intel_private.bridge_dev)
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 096f0cef4da1..4facc7517a6a 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -1140,7 +1140,7 @@ ipmi_nmi(unsigned int val, struct pt_regs *regs)
 		   the timer.   So do so. */
 		pretimeout_since_last_heartbeat = 1;
 		if (atomic_inc_and_test(&preop_panic_excl))
-			panic(PFX "pre-timeout");
+			nmi_panic(regs, PFX "pre-timeout");
 	}
 
 	return NMI_HANDLED;
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index d23368874710..f8a483c67b07 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -286,7 +286,7 @@ static int register_device(int minor, struct pp_struct *pp)
 	struct parport *port;
 	struct pardevice *pdev = NULL;
 	char *name;
-	struct pardev_cb ppdev_cb;
+	int fl;
 
 	name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor);
 	if (name == NULL)
@@ -299,11 +299,9 @@ static int register_device(int minor, struct pp_struct *pp)
 		return -ENXIO;
 	}
 
-	memset(&ppdev_cb, 0, sizeof(ppdev_cb));
-	ppdev_cb.irq_func = pp_irq;
-	ppdev_cb.flags = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0;
-	ppdev_cb.private = pp;
-	pdev = parport_register_dev_model(port, name, &ppdev_cb, minor);
+	fl = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0;
+	pdev = parport_register_device(port, name, NULL,
+				       NULL, pp_irq, fl, pp);
 	parport_put_port(port);
 
 	if (!pdev) {
@@ -801,23 +799,10 @@ static void pp_detach(struct parport *port)
 	device_destroy(ppdev_class, MKDEV(PP_MAJOR, port->number));
 }
 
-static int pp_probe(struct pardevice *par_dev)
-{
-	struct device_driver *drv = par_dev->dev.driver;
-	int len = strlen(drv->name);
-
-	if (strncmp(par_dev->name, drv->name, len))
-		return -ENODEV;
-
-	return 0;
-}
-
 static struct parport_driver pp_driver = {
 	.name		= CHRDEV,
-	.probe		= pp_probe,
-	.match_port	= pp_attach,
+	.attach		= pp_attach,
 	.detach		= pp_detach,
-	.devmodel	= true,
 };
 
 static int __init ppdev_init(void)
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index eca8e019e005..16f7d33421d8 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -6,9 +6,6 @@ config CLKDEV_LOOKUP
 config HAVE_CLK_PREPARE
 	bool
 
-config HAVE_MACH_CLKDEV
-	bool
-
 config COMMON_CLK
 	bool
 	select HAVE_CLK_PREPARE
@@ -99,6 +96,14 @@ config COMMON_CLK_SI570
 	  This driver supports Silicon Labs 570/571/598/599 programmable
 	  clock generators.
 
+config COMMON_CLK_CDCE706
+	tristate "Clock driver for TI CDCE706 clock synthesizer"
+	depends on I2C
+	select REGMAP_I2C
+	select RATIONAL
+	---help---
+	  This driver supports TI CDCE706 programmable 3-PLL clock synthesizer.
+
 config COMMON_CLK_CDCE925
 	tristate "Clock driver for TI CDCE925 devices"
 	depends on I2C
@@ -190,23 +195,14 @@ config COMMON_CLK_PWM
 config COMMON_CLK_PXA
 	def_bool COMMON_CLK && ARCH_PXA
 	---help---
-	  Sypport for the Marvell PXA SoC.
-
-config COMMON_CLK_CDCE706
-	tristate "Clock driver for TI CDCE706 clock synthesizer"
-	depends on I2C
-	select REGMAP_I2C
-	select RATIONAL
-	---help---
-	  This driver supports TI CDCE706 programmable 3-PLL clock synthesizer.
+	  Support for the Marvell PXA SoC.
 
 source "drivers/clk/bcm/Kconfig"
 source "drivers/clk/hisilicon/Kconfig"
-source "drivers/clk/qcom/Kconfig"
-
-endmenu
-
 source "drivers/clk/mvebu/Kconfig"
-
+source "drivers/clk/qcom/Kconfig"
 source "drivers/clk/samsung/Kconfig"
 source "drivers/clk/tegra/Kconfig"
+source "drivers/clk/ti/Kconfig"
+
+endmenu
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index bae4be6501df..46869d696e4d 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -70,15 +70,14 @@ obj-$(CONFIG_COMMON_CLK_PXA)		+= pxa/
 obj-$(CONFIG_COMMON_CLK_QCOM)		+= qcom/
 obj-$(CONFIG_ARCH_ROCKCHIP)		+= rockchip/
 obj-$(CONFIG_COMMON_CLK_SAMSUNG)	+= samsung/
-obj-$(CONFIG_ARCH_SHMOBILE_MULTI)	+= shmobile/
-obj-$(CONFIG_ARCH_RENESAS)		+= shmobile/
+obj-$(CONFIG_ARCH_RENESAS)		+= renesas/
 obj-$(CONFIG_ARCH_SIRF)			+= sirf/
 obj-$(CONFIG_ARCH_SOCFPGA)		+= socfpga/
 obj-$(CONFIG_PLAT_SPEAR)		+= spear/
 obj-$(CONFIG_ARCH_STI)			+= st/
 obj-$(CONFIG_ARCH_SUNXI)		+= sunxi/
 obj-$(CONFIG_ARCH_TEGRA)		+= tegra/
-obj-$(CONFIG_ARCH_OMAP2PLUS)		+= ti/
+obj-y					+= ti/
 obj-$(CONFIG_ARCH_U8500)		+= ux500/
 obj-$(CONFIG_COMMON_CLK_VERSATILE)	+= versatile/
 obj-$(CONFIG_X86)			+= x86/
diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c
index abc80949e1dd..e1aa210dd7aa 100644
--- a/drivers/clk/at91/clk-generated.c
+++ b/drivers/clk/at91/clk-generated.c
@@ -15,8 +15,8 @@
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 
@@ -28,8 +28,9 @@
 
 struct clk_generated {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
+	struct regmap *regmap;
 	struct clk_range range;
+	spinlock_t *lock;
 	u32 id;
 	u32 gckdiv;
 	u8 parent_id;
@@ -41,49 +42,52 @@ struct clk_generated {
 static int clk_generated_enable(struct clk_hw *hw)
 {
 	struct clk_generated *gck = to_clk_generated(hw);
-	struct at91_pmc *pmc = gck->pmc;
-	u32 tmp;
+	unsigned long flags;
 
 	pr_debug("GCLK: %s, gckdiv = %d, parent id = %d\n",
 		 __func__, gck->gckdiv, gck->parent_id);
 
-	pmc_lock(pmc);
-	pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
-	tmp = pmc_read(pmc, AT91_PMC_PCR) &
-			~(AT91_PMC_PCR_GCKDIV_MASK | AT91_PMC_PCR_GCKCSS_MASK);
-	pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_GCKCSS(gck->parent_id)
-					 | AT91_PMC_PCR_CMD
-					 | AT91_PMC_PCR_GCKDIV(gck->gckdiv)
-					 | AT91_PMC_PCR_GCKEN);
-	pmc_unlock(pmc);
+	spin_lock_irqsave(gck->lock, flags);
+	regmap_write(gck->regmap, AT91_PMC_PCR,
+		     (gck->id & AT91_PMC_PCR_PID_MASK));
+	regmap_update_bits(gck->regmap, AT91_PMC_PCR,
+			   AT91_PMC_PCR_GCKDIV_MASK | AT91_PMC_PCR_GCKCSS_MASK |
+			   AT91_PMC_PCR_CMD | AT91_PMC_PCR_GCKEN,
+			   AT91_PMC_PCR_GCKCSS(gck->parent_id) |
+			   AT91_PMC_PCR_CMD |
+			   AT91_PMC_PCR_GCKDIV(gck->gckdiv) |
+			   AT91_PMC_PCR_GCKEN);
+	spin_unlock_irqrestore(gck->lock, flags);
 	return 0;
 }
 
 static void clk_generated_disable(struct clk_hw *hw)
 {
 	struct clk_generated *gck = to_clk_generated(hw);
-	struct at91_pmc *pmc = gck->pmc;
-	u32 tmp;
-
-	pmc_lock(pmc);
-	pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
-	tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_GCKEN;
-	pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_CMD);
-	pmc_unlock(pmc);
+	unsigned long flags;
+
+	spin_lock_irqsave(gck->lock, flags);
+	regmap_write(gck->regmap, AT91_PMC_PCR,
+		     (gck->id & AT91_PMC_PCR_PID_MASK));
+	regmap_update_bits(gck->regmap, AT91_PMC_PCR,
+			   AT91_PMC_PCR_CMD | AT91_PMC_PCR_GCKEN,
+			   AT91_PMC_PCR_CMD);
+	spin_unlock_irqrestore(gck->lock, flags);
 }
 
 static int clk_generated_is_enabled(struct clk_hw *hw)
 {
 	struct clk_generated *gck = to_clk_generated(hw);
-	struct at91_pmc *pmc = gck->pmc;
-	int ret;
+	unsigned long flags;
+	unsigned int status;
 
-	pmc_lock(pmc);
-	pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
-	ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_GCKEN);
-	pmc_unlock(pmc);
+	spin_lock_irqsave(gck->lock, flags);
+	regmap_write(gck->regmap, AT91_PMC_PCR,
+		     (gck->id & AT91_PMC_PCR_PID_MASK));
+	regmap_read(gck->regmap, AT91_PMC_PCR, &status);
+	spin_unlock_irqrestore(gck->lock, flags);
 
-	return ret;
+	return status & AT91_PMC_PCR_GCKEN ? 1 : 0;
 }
 
 static unsigned long
@@ -214,13 +218,14 @@ static const struct clk_ops generated_ops = {
  */
 static void clk_generated_startup(struct clk_generated *gck)
 {
-	struct at91_pmc *pmc = gck->pmc;
 	u32 tmp;
+	unsigned long flags;
 
-	pmc_lock(pmc);
-	pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK));
-	tmp = pmc_read(pmc, AT91_PMC_PCR);
-	pmc_unlock(pmc);
+	spin_lock_irqsave(gck->lock, flags);
+	regmap_write(gck->regmap, AT91_PMC_PCR,
+		     (gck->id & AT91_PMC_PCR_PID_MASK));
+	regmap_read(gck->regmap, AT91_PMC_PCR, &tmp);
+	spin_unlock_irqrestore(gck->lock, flags);
 
 	gck->parent_id = (tmp & AT91_PMC_PCR_GCKCSS_MASK)
 					>> AT91_PMC_PCR_GCKCSS_OFFSET;
@@ -229,8 +234,8 @@ static void clk_generated_startup(struct clk_generated *gck)
 }
 
 static struct clk * __init
-at91_clk_register_generated(struct at91_pmc *pmc, const char *name,
-			    const char **parent_names, u8 num_parents,
+at91_clk_register_generated(struct regmap *regmap,  spinlock_t *lock, const char
+			    *name, const char **parent_names, u8 num_parents,
 			    u8 id, const struct clk_range *range)
 {
 	struct clk_generated *gck;
@@ -249,7 +254,8 @@ at91_clk_register_generated(struct at91_pmc *pmc, const char *name,
 
 	gck->id = id;
 	gck->hw.init = &init;
-	gck->pmc = pmc;
+	gck->regmap = regmap;
+	gck->lock = lock;
 	gck->range = *range;
 
 	clk = clk_register(NULL, &gck->hw);
@@ -261,20 +267,20 @@ at91_clk_register_generated(struct at91_pmc *pmc, const char *name,
 	return clk;
 }
 
-void __init of_sama5d2_clk_generated_setup(struct device_node *np,
-					   struct at91_pmc *pmc)
+void __init of_sama5d2_clk_generated_setup(struct device_node *np)
 {
 	int num;
 	u32 id;
 	const char *name;
 	struct clk *clk;
-	int num_parents;
+	unsigned int num_parents;
 	const char *parent_names[GENERATED_SOURCE_MAX];
 	struct device_node *gcknp;
 	struct clk_range range = CLK_RANGE(0, 0);
+	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > GENERATED_SOURCE_MAX)
+	if (num_parents == 0 || num_parents > GENERATED_SOURCE_MAX)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);
@@ -283,6 +289,10 @@ void __init of_sama5d2_clk_generated_setup(struct device_node *np,
 	if (!num || num > PERIPHERAL_MAX)
 		return;
 
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
+
 	for_each_child_of_node(np, gcknp) {
 		if (of_property_read_u32(gcknp, "reg", &id))
 			continue;
@@ -296,11 +306,14 @@ void __init of_sama5d2_clk_generated_setup(struct device_node *np,
 		of_at91_get_clk_range(gcknp, "atmel,clk-output-range",
 				      &range);
 
-		clk = at91_clk_register_generated(pmc, name, parent_names,
-						  num_parents, id, &range);
+		clk = at91_clk_register_generated(regmap, &pmc_pcr_lock, name,
+						  parent_names, num_parents,
+						  id, &range);
 		if (IS_ERR(clk))
 			continue;
 
 		of_clk_add_provider(gcknp, of_clk_src_simple_get, clk);
 	}
 }
+CLK_OF_DECLARE(of_sama5d2_clk_generated_setup, "atmel,sama5d2-clk-generated",
+	       of_sama5d2_clk_generated_setup);
diff --git a/drivers/clk/at91/clk-h32mx.c b/drivers/clk/at91/clk-h32mx.c
index 61566bcefa53..819f5842fa66 100644
--- a/drivers/clk/at91/clk-h32mx.c
+++ b/drivers/clk/at91/clk-h32mx.c
@@ -15,15 +15,9 @@
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
-#include <linux/delay.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/io.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
 
 #include "pmc.h"
 
@@ -31,7 +25,7 @@
 
 struct clk_sama5d4_h32mx {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
+	struct regmap *regmap;
 };
 
 #define to_clk_sama5d4_h32mx(hw) container_of(hw, struct clk_sama5d4_h32mx, hw)
@@ -40,8 +34,10 @@ static unsigned long clk_sama5d4_h32mx_recalc_rate(struct clk_hw *hw,
 						 unsigned long parent_rate)
 {
 	struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
+	unsigned int mckr;
 
-	if (pmc_read(h32mxclk->pmc, AT91_PMC_MCKR) & AT91_PMC_H32MXDIV)
+	regmap_read(h32mxclk->regmap, AT91_PMC_MCKR, &mckr);
+	if (mckr & AT91_PMC_H32MXDIV)
 		return parent_rate / 2;
 
 	if (parent_rate > H32MX_MAX_FREQ)
@@ -70,18 +66,16 @@ static int clk_sama5d4_h32mx_set_rate(struct clk_hw *hw, unsigned long rate,
 				    unsigned long parent_rate)
 {
 	struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
-	struct at91_pmc *pmc = h32mxclk->pmc;
-	u32 tmp;
+	u32 mckr = 0;
 
 	if (parent_rate != rate && (parent_rate / 2) != rate)
 		return -EINVAL;
 
-	pmc_lock(pmc);
-	tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_H32MXDIV;
 	if ((parent_rate / 2) == rate)
-		tmp |= AT91_PMC_H32MXDIV;
-	pmc_write(pmc, AT91_PMC_MCKR, tmp);
-	pmc_unlock(pmc);
+		mckr = AT91_PMC_H32MXDIV;
+
+	regmap_update_bits(h32mxclk->regmap, AT91_PMC_MCKR,
+			   AT91_PMC_H32MXDIV, mckr);
 
 	return 0;
 }
@@ -92,14 +86,18 @@ static const struct clk_ops h32mx_ops = {
 	.set_rate = clk_sama5d4_h32mx_set_rate,
 };
 
-void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
-				     struct at91_pmc *pmc)
+static void __init of_sama5d4_clk_h32mx_setup(struct device_node *np)
 {
 	struct clk_sama5d4_h32mx *h32mxclk;
 	struct clk_init_data init;
 	const char *parent_name;
+	struct regmap *regmap;
 	struct clk *clk;
 
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
+
 	h32mxclk = kzalloc(sizeof(*h32mxclk), GFP_KERNEL);
 	if (!h32mxclk)
 		return;
@@ -113,7 +111,7 @@ void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
 	init.flags = CLK_SET_RATE_GATE;
 
 	h32mxclk->hw.init = &init;
-	h32mxclk->pmc = pmc;
+	h32mxclk->regmap = regmap;
 
 	clk = clk_register(NULL, &h32mxclk->hw);
 	if (!clk) {
@@ -123,3 +121,5 @@ void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+CLK_OF_DECLARE(of_sama5d4_clk_h32mx_setup, "atmel,sama5d4-clk-h32mx",
+	       of_sama5d4_clk_h32mx_setup);
diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c
index fd7247deabdc..58b5baca670c 100644
--- a/drivers/clk/at91/clk-main.c
+++ b/drivers/clk/at91/clk-main.c
@@ -13,13 +13,8 @@
 #include <linux/clk/at91_pmc.h>
 #include <linux/delay.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/io.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 
@@ -34,18 +29,14 @@
 
 struct clk_main_osc {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
-	unsigned int irq;
-	wait_queue_head_t wait;
+	struct regmap *regmap;
 };
 
 #define to_clk_main_osc(hw) container_of(hw, struct clk_main_osc, hw)
 
 struct clk_main_rc_osc {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
-	unsigned int irq;
-	wait_queue_head_t wait;
+	struct regmap *regmap;
 	unsigned long frequency;
 	unsigned long accuracy;
 };
@@ -54,51 +45,47 @@ struct clk_main_rc_osc {
 
 struct clk_rm9200_main {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
+	struct regmap *regmap;
 };
 
 #define to_clk_rm9200_main(hw) container_of(hw, struct clk_rm9200_main, hw)
 
 struct clk_sam9x5_main {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
-	unsigned int irq;
-	wait_queue_head_t wait;
+	struct regmap *regmap;
 	u8 parent;
 };
 
 #define to_clk_sam9x5_main(hw) container_of(hw, struct clk_sam9x5_main, hw)
 
-static irqreturn_t clk_main_osc_irq_handler(int irq, void *dev_id)
+static inline bool clk_main_osc_ready(struct regmap *regmap)
 {
-	struct clk_main_osc *osc = dev_id;
+	unsigned int status;
 
-	wake_up(&osc->wait);
-	disable_irq_nosync(osc->irq);
+	regmap_read(regmap, AT91_PMC_SR, &status);
 
-	return IRQ_HANDLED;
+	return status & AT91_PMC_MOSCS;
 }
 
 static int clk_main_osc_prepare(struct clk_hw *hw)
 {
 	struct clk_main_osc *osc = to_clk_main_osc(hw);
-	struct at91_pmc *pmc = osc->pmc;
+	struct regmap *regmap = osc->regmap;
 	u32 tmp;
 
-	tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
+	regmap_read(regmap, AT91_CKGR_MOR, &tmp);
+	tmp &= ~MOR_KEY_MASK;
+
 	if (tmp & AT91_PMC_OSCBYPASS)
 		return 0;
 
 	if (!(tmp & AT91_PMC_MOSCEN)) {
 		tmp |= AT91_PMC_MOSCEN | AT91_PMC_KEY;
-		pmc_write(pmc, AT91_CKGR_MOR, tmp);
+		regmap_write(regmap, AT91_CKGR_MOR, tmp);
 	}
 
-	while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS)) {
-		enable_irq(osc->irq);
-		wait_event(osc->wait,
-			   pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS);
-	}
+	while (!clk_main_osc_ready(regmap))
+		cpu_relax();
 
 	return 0;
 }
@@ -106,9 +93,10 @@ static int clk_main_osc_prepare(struct clk_hw *hw)
 static void clk_main_osc_unprepare(struct clk_hw *hw)
 {
 	struct clk_main_osc *osc = to_clk_main_osc(hw);
-	struct at91_pmc *pmc = osc->pmc;
-	u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
+	struct regmap *regmap = osc->regmap;
+	u32 tmp;
 
+	regmap_read(regmap, AT91_CKGR_MOR, &tmp);
 	if (tmp & AT91_PMC_OSCBYPASS)
 		return;
 
@@ -116,20 +104,22 @@ static void clk_main_osc_unprepare(struct clk_hw *hw)
 		return;
 
 	tmp &= ~(AT91_PMC_KEY | AT91_PMC_MOSCEN);
-	pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
+	regmap_write(regmap, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
 }
 
 static int clk_main_osc_is_prepared(struct clk_hw *hw)
 {
 	struct clk_main_osc *osc = to_clk_main_osc(hw);
-	struct at91_pmc *pmc = osc->pmc;
-	u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
+	struct regmap *regmap = osc->regmap;
+	u32 tmp, status;
 
+	regmap_read(regmap, AT91_CKGR_MOR, &tmp);
 	if (tmp & AT91_PMC_OSCBYPASS)
 		return 1;
 
-	return !!((pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS) &&
-		  (pmc_read(pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCEN));
+	regmap_read(regmap, AT91_PMC_SR, &status);
+
+	return (status & AT91_PMC_MOSCS) && (tmp & AT91_PMC_MOSCEN);
 }
 
 static const struct clk_ops main_osc_ops = {
@@ -139,18 +129,16 @@ static const struct clk_ops main_osc_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_main_osc(struct at91_pmc *pmc,
-			   unsigned int irq,
+at91_clk_register_main_osc(struct regmap *regmap,
 			   const char *name,
 			   const char *parent_name,
 			   bool bypass)
 {
-	int ret;
 	struct clk_main_osc *osc;
 	struct clk *clk = NULL;
 	struct clk_init_data init;
 
-	if (!pmc || !irq || !name || !parent_name)
+	if (!name || !parent_name)
 		return ERR_PTR(-EINVAL);
 
 	osc = kzalloc(sizeof(*osc), GFP_KERNEL);
@@ -164,85 +152,70 @@ at91_clk_register_main_osc(struct at91_pmc *pmc,
 	init.flags = CLK_IGNORE_UNUSED;
 
 	osc->hw.init = &init;
-	osc->pmc = pmc;
-	osc->irq = irq;
-
-	init_waitqueue_head(&osc->wait);
-	irq_set_status_flags(osc->irq, IRQ_NOAUTOEN);
-	ret = request_irq(osc->irq, clk_main_osc_irq_handler,
-			  IRQF_TRIGGER_HIGH, name, osc);
-	if (ret) {
-		kfree(osc);
-		return ERR_PTR(ret);
-	}
+	osc->regmap = regmap;
 
 	if (bypass)
-		pmc_write(pmc, AT91_CKGR_MOR,
-			  (pmc_read(pmc, AT91_CKGR_MOR) &
-			   ~(MOR_KEY_MASK | AT91_PMC_MOSCEN)) |
-			  AT91_PMC_OSCBYPASS | AT91_PMC_KEY);
+		regmap_update_bits(regmap,
+				   AT91_CKGR_MOR, MOR_KEY_MASK |
+				   AT91_PMC_MOSCEN,
+				   AT91_PMC_OSCBYPASS | AT91_PMC_KEY);
 
 	clk = clk_register(NULL, &osc->hw);
-	if (IS_ERR(clk)) {
-		free_irq(irq, osc);
+	if (IS_ERR(clk))
 		kfree(osc);
-	}
 
 	return clk;
 }
 
-void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np,
-					     struct at91_pmc *pmc)
+static void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np)
 {
 	struct clk *clk;
-	unsigned int irq;
 	const char *name = np->name;
 	const char *parent_name;
+	struct regmap *regmap;
 	bool bypass;
 
 	of_property_read_string(np, "clock-output-names", &name);
 	bypass = of_property_read_bool(np, "atmel,osc-bypass");
 	parent_name = of_clk_get_parent_name(np, 0);
 
-	irq = irq_of_parse_and_map(np, 0);
-	if (!irq)
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
 		return;
 
-	clk = at91_clk_register_main_osc(pmc, irq, name, parent_name, bypass);
+	clk = at91_clk_register_main_osc(regmap, name, parent_name, bypass);
 	if (IS_ERR(clk))
 		return;
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+CLK_OF_DECLARE(at91rm9200_clk_main_osc, "atmel,at91rm9200-clk-main-osc",
+	       of_at91rm9200_clk_main_osc_setup);
 
-static irqreturn_t clk_main_rc_osc_irq_handler(int irq, void *dev_id)
+static bool clk_main_rc_osc_ready(struct regmap *regmap)
 {
-	struct clk_main_rc_osc *osc = dev_id;
+	unsigned int status;
 
-	wake_up(&osc->wait);
-	disable_irq_nosync(osc->irq);
+	regmap_read(regmap, AT91_PMC_SR, &status);
 
-	return IRQ_HANDLED;
+	return status & AT91_PMC_MOSCRCS;
 }
 
 static int clk_main_rc_osc_prepare(struct clk_hw *hw)
 {
 	struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
-	struct at91_pmc *pmc = osc->pmc;
-	u32 tmp;
+	struct regmap *regmap = osc->regmap;
+	unsigned int mor;
 
-	tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
+	regmap_read(regmap, AT91_CKGR_MOR, &mor);
 
-	if (!(tmp & AT91_PMC_MOSCRCEN)) {
-		tmp |= AT91_PMC_MOSCRCEN | AT91_PMC_KEY;
-		pmc_write(pmc, AT91_CKGR_MOR, tmp);
-	}
+	if (!(mor & AT91_PMC_MOSCRCEN))
+		regmap_update_bits(regmap, AT91_CKGR_MOR,
+				   MOR_KEY_MASK | AT91_PMC_MOSCRCEN,
+				   AT91_PMC_MOSCRCEN | AT91_PMC_KEY);
 
-	while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS)) {
-		enable_irq(osc->irq);
-		wait_event(osc->wait,
-			   pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS);
-	}
+	while (!clk_main_rc_osc_ready(regmap))
+		cpu_relax();
 
 	return 0;
 }
@@ -250,23 +223,28 @@ static int clk_main_rc_osc_prepare(struct clk_hw *hw)
 static void clk_main_rc_osc_unprepare(struct clk_hw *hw)
 {
 	struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
-	struct at91_pmc *pmc = osc->pmc;
-	u32 tmp = pmc_read(pmc, AT91_CKGR_MOR);
+	struct regmap *regmap = osc->regmap;
+	unsigned int mor;
+
+	regmap_read(regmap, AT91_CKGR_MOR, &mor);
 
-	if (!(tmp & AT91_PMC_MOSCRCEN))
+	if (!(mor & AT91_PMC_MOSCRCEN))
 		return;
 
-	tmp &= ~(MOR_KEY_MASK | AT91_PMC_MOSCRCEN);
-	pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_KEY);
+	regmap_update_bits(regmap, AT91_CKGR_MOR,
+			   MOR_KEY_MASK | AT91_PMC_MOSCRCEN, AT91_PMC_KEY);
 }
 
 static int clk_main_rc_osc_is_prepared(struct clk_hw *hw)
 {
 	struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw);
-	struct at91_pmc *pmc = osc->pmc;
+	struct regmap *regmap = osc->regmap;
+	unsigned int mor, status;
 
-	return !!((pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS) &&
-		  (pmc_read(pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCRCEN));
+	regmap_read(regmap, AT91_CKGR_MOR, &mor);
+	regmap_read(regmap, AT91_PMC_SR, &status);
+
+	return (mor & AT91_PMC_MOSCRCEN) && (status & AT91_PMC_MOSCRCS);
 }
 
 static unsigned long clk_main_rc_osc_recalc_rate(struct clk_hw *hw,
@@ -294,17 +272,15 @@ static const struct clk_ops main_rc_osc_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_main_rc_osc(struct at91_pmc *pmc,
-			      unsigned int irq,
+at91_clk_register_main_rc_osc(struct regmap *regmap,
 			      const char *name,
 			      u32 frequency, u32 accuracy)
 {
-	int ret;
 	struct clk_main_rc_osc *osc;
 	struct clk *clk = NULL;
 	struct clk_init_data init;
 
-	if (!pmc || !irq || !name || !frequency)
+	if (!name || !frequency)
 		return ERR_PTR(-EINVAL);
 
 	osc = kzalloc(sizeof(*osc), GFP_KERNEL);
@@ -315,66 +291,56 @@ at91_clk_register_main_rc_osc(struct at91_pmc *pmc,
 	init.ops = &main_rc_osc_ops;
 	init.parent_names = NULL;
 	init.num_parents = 0;
-	init.flags = CLK_IS_ROOT | CLK_IGNORE_UNUSED;
+	init.flags = CLK_IGNORE_UNUSED;
 
 	osc->hw.init = &init;
-	osc->pmc = pmc;
-	osc->irq = irq;
+	osc->regmap = regmap;
 	osc->frequency = frequency;
 	osc->accuracy = accuracy;
 
-	init_waitqueue_head(&osc->wait);
-	irq_set_status_flags(osc->irq, IRQ_NOAUTOEN);
-	ret = request_irq(osc->irq, clk_main_rc_osc_irq_handler,
-			  IRQF_TRIGGER_HIGH, name, osc);
-	if (ret)
-		return ERR_PTR(ret);
-
 	clk = clk_register(NULL, &osc->hw);
-	if (IS_ERR(clk)) {
-		free_irq(irq, osc);
+	if (IS_ERR(clk))
 		kfree(osc);
-	}
 
 	return clk;
 }
 
-void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np,
-						struct at91_pmc *pmc)
+static void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np)
 {
 	struct clk *clk;
-	unsigned int irq;
 	u32 frequency = 0;
 	u32 accuracy = 0;
 	const char *name = np->name;
+	struct regmap *regmap;
 
 	of_property_read_string(np, "clock-output-names", &name);
 	of_property_read_u32(np, "clock-frequency", &frequency);
 	of_property_read_u32(np, "clock-accuracy", &accuracy);
 
-	irq = irq_of_parse_and_map(np, 0);
-	if (!irq)
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
 		return;
 
-	clk = at91_clk_register_main_rc_osc(pmc, irq, name, frequency,
-					    accuracy);
+	clk = at91_clk_register_main_rc_osc(regmap, name, frequency, accuracy);
 	if (IS_ERR(clk))
 		return;
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+CLK_OF_DECLARE(at91sam9x5_clk_main_rc_osc, "atmel,at91sam9x5-clk-main-rc-osc",
+	       of_at91sam9x5_clk_main_rc_osc_setup);
 
 
-static int clk_main_probe_frequency(struct at91_pmc *pmc)
+static int clk_main_probe_frequency(struct regmap *regmap)
 {
 	unsigned long prep_time, timeout;
-	u32 tmp;
+	unsigned int mcfr;
 
 	timeout = jiffies + usecs_to_jiffies(MAINFRDY_TIMEOUT);
 	do {
 		prep_time = jiffies;
-		tmp = pmc_read(pmc, AT91_CKGR_MCFR);
-		if (tmp & AT91_PMC_MAINRDY)
+		regmap_read(regmap, AT91_CKGR_MCFR, &mcfr);
+		if (mcfr & AT91_PMC_MAINRDY)
 			return 0;
 		usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT);
 	} while (time_before(prep_time, timeout));
@@ -382,34 +348,37 @@ static int clk_main_probe_frequency(struct at91_pmc *pmc)
 	return -ETIMEDOUT;
 }
 
-static unsigned long clk_main_recalc_rate(struct at91_pmc *pmc,
+static unsigned long clk_main_recalc_rate(struct regmap *regmap,
 					  unsigned long parent_rate)
 {
-	u32 tmp;
+	unsigned int mcfr;
 
 	if (parent_rate)
 		return parent_rate;
 
 	pr_warn("Main crystal frequency not set, using approximate value\n");
-	tmp = pmc_read(pmc, AT91_CKGR_MCFR);
-	if (!(tmp & AT91_PMC_MAINRDY))
+	regmap_read(regmap, AT91_CKGR_MCFR, &mcfr);
+	if (!(mcfr & AT91_PMC_MAINRDY))
 		return 0;
 
-	return ((tmp & AT91_PMC_MAINF) * SLOW_CLOCK_FREQ) / MAINF_DIV;
+	return ((mcfr & AT91_PMC_MAINF) * SLOW_CLOCK_FREQ) / MAINF_DIV;
 }
 
 static int clk_rm9200_main_prepare(struct clk_hw *hw)
 {
 	struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
 
-	return clk_main_probe_frequency(clkmain->pmc);
+	return clk_main_probe_frequency(clkmain->regmap);
 }
 
 static int clk_rm9200_main_is_prepared(struct clk_hw *hw)
 {
 	struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
+	unsigned int status;
+
+	regmap_read(clkmain->regmap, AT91_CKGR_MCFR, &status);
 
-	return !!(pmc_read(clkmain->pmc, AT91_CKGR_MCFR) & AT91_PMC_MAINRDY);
+	return status & AT91_PMC_MAINRDY ? 1 : 0;
 }
 
 static unsigned long clk_rm9200_main_recalc_rate(struct clk_hw *hw,
@@ -417,7 +386,7 @@ static unsigned long clk_rm9200_main_recalc_rate(struct clk_hw *hw,
 {
 	struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw);
 
-	return clk_main_recalc_rate(clkmain->pmc, parent_rate);
+	return clk_main_recalc_rate(clkmain->regmap, parent_rate);
 }
 
 static const struct clk_ops rm9200_main_ops = {
@@ -427,7 +396,7 @@ static const struct clk_ops rm9200_main_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_rm9200_main(struct at91_pmc *pmc,
+at91_clk_register_rm9200_main(struct regmap *regmap,
 			      const char *name,
 			      const char *parent_name)
 {
@@ -435,7 +404,7 @@ at91_clk_register_rm9200_main(struct at91_pmc *pmc,
 	struct clk *clk = NULL;
 	struct clk_init_data init;
 
-	if (!pmc || !name)
+	if (!name)
 		return ERR_PTR(-EINVAL);
 
 	if (!parent_name)
@@ -452,7 +421,7 @@ at91_clk_register_rm9200_main(struct at91_pmc *pmc,
 	init.flags = 0;
 
 	clkmain->hw.init = &init;
-	clkmain->pmc = pmc;
+	clkmain->regmap = regmap;
 
 	clk = clk_register(NULL, &clkmain->hw);
 	if (IS_ERR(clk))
@@ -461,52 +430,54 @@ at91_clk_register_rm9200_main(struct at91_pmc *pmc,
 	return clk;
 }
 
-void __init of_at91rm9200_clk_main_setup(struct device_node *np,
-					 struct at91_pmc *pmc)
+static void __init of_at91rm9200_clk_main_setup(struct device_node *np)
 {
 	struct clk *clk;
 	const char *parent_name;
 	const char *name = np->name;
+	struct regmap *regmap;
 
 	parent_name = of_clk_get_parent_name(np, 0);
 	of_property_read_string(np, "clock-output-names", &name);
 
-	clk = at91_clk_register_rm9200_main(pmc, name, parent_name);
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
+
+	clk = at91_clk_register_rm9200_main(regmap, name, parent_name);
 	if (IS_ERR(clk))
 		return;
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+CLK_OF_DECLARE(at91rm9200_clk_main, "atmel,at91rm9200-clk-main",
+	       of_at91rm9200_clk_main_setup);
 
-static irqreturn_t clk_sam9x5_main_irq_handler(int irq, void *dev_id)
+static inline bool clk_sam9x5_main_ready(struct regmap *regmap)
 {
-	struct clk_sam9x5_main *clkmain = dev_id;
+	unsigned int status;
 
-	wake_up(&clkmain->wait);
-	disable_irq_nosync(clkmain->irq);
+	regmap_read(regmap, AT91_PMC_SR, &status);
 
-	return IRQ_HANDLED;
+	return status & AT91_PMC_MOSCSELS ? 1 : 0;
 }
 
 static int clk_sam9x5_main_prepare(struct clk_hw *hw)
 {
 	struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
-	struct at91_pmc *pmc = clkmain->pmc;
+	struct regmap *regmap = clkmain->regmap;
 
-	while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS)) {
-		enable_irq(clkmain->irq);
-		wait_event(clkmain->wait,
-			   pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
-	}
+	while (!clk_sam9x5_main_ready(regmap))
+		cpu_relax();
 
-	return clk_main_probe_frequency(pmc);
+	return clk_main_probe_frequency(regmap);
 }
 
 static int clk_sam9x5_main_is_prepared(struct clk_hw *hw)
 {
 	struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
 
-	return !!(pmc_read(clkmain->pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
+	return clk_sam9x5_main_ready(clkmain->regmap);
 }
 
 static unsigned long clk_sam9x5_main_recalc_rate(struct clk_hw *hw,
@@ -514,30 +485,28 @@ static unsigned long clk_sam9x5_main_recalc_rate(struct clk_hw *hw,
 {
 	struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
 
-	return clk_main_recalc_rate(clkmain->pmc, parent_rate);
+	return clk_main_recalc_rate(clkmain->regmap, parent_rate);
 }
 
 static int clk_sam9x5_main_set_parent(struct clk_hw *hw, u8 index)
 {
 	struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
-	struct at91_pmc *pmc = clkmain->pmc;
-	u32 tmp;
+	struct regmap *regmap = clkmain->regmap;
+	unsigned int tmp;
 
 	if (index > 1)
 		return -EINVAL;
 
-	tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK;
+	regmap_read(regmap, AT91_CKGR_MOR, &tmp);
+	tmp &= ~MOR_KEY_MASK;
 
 	if (index && !(tmp & AT91_PMC_MOSCSEL))
-		pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_MOSCSEL);
+		regmap_write(regmap, AT91_CKGR_MOR, tmp | AT91_PMC_MOSCSEL);
 	else if (!index && (tmp & AT91_PMC_MOSCSEL))
-		pmc_write(pmc, AT91_CKGR_MOR, tmp & ~AT91_PMC_MOSCSEL);
+		regmap_write(regmap, AT91_CKGR_MOR, tmp & ~AT91_PMC_MOSCSEL);
 
-	while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS)) {
-		enable_irq(clkmain->irq);
-		wait_event(clkmain->wait,
-			   pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS);
-	}
+	while (!clk_sam9x5_main_ready(regmap))
+		cpu_relax();
 
 	return 0;
 }
@@ -545,8 +514,11 @@ static int clk_sam9x5_main_set_parent(struct clk_hw *hw, u8 index)
 static u8 clk_sam9x5_main_get_parent(struct clk_hw *hw)
 {
 	struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw);
+	unsigned int status;
+
+	regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status);
 
-	return !!(pmc_read(clkmain->pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCEN);
+	return status & AT91_PMC_MOSCEN ? 1 : 0;
 }
 
 static const struct clk_ops sam9x5_main_ops = {
@@ -558,18 +530,17 @@ static const struct clk_ops sam9x5_main_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_sam9x5_main(struct at91_pmc *pmc,
-			      unsigned int irq,
+at91_clk_register_sam9x5_main(struct regmap *regmap,
 			      const char *name,
 			      const char **parent_names,
 			      int num_parents)
 {
-	int ret;
 	struct clk_sam9x5_main *clkmain;
 	struct clk *clk = NULL;
 	struct clk_init_data init;
+	unsigned int status;
 
-	if (!pmc || !irq || !name)
+	if (!name)
 		return ERR_PTR(-EINVAL);
 
 	if (!parent_names || !num_parents)
@@ -586,51 +557,42 @@ at91_clk_register_sam9x5_main(struct at91_pmc *pmc,
 	init.flags = CLK_SET_PARENT_GATE;
 
 	clkmain->hw.init = &init;
-	clkmain->pmc = pmc;
-	clkmain->irq = irq;
-	clkmain->parent = !!(pmc_read(clkmain->pmc, AT91_CKGR_MOR) &
-			     AT91_PMC_MOSCEN);
-	init_waitqueue_head(&clkmain->wait);
-	irq_set_status_flags(clkmain->irq, IRQ_NOAUTOEN);
-	ret = request_irq(clkmain->irq, clk_sam9x5_main_irq_handler,
-			  IRQF_TRIGGER_HIGH, name, clkmain);
-	if (ret)
-		return ERR_PTR(ret);
+	clkmain->regmap = regmap;
+	regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status);
+	clkmain->parent = status & AT91_PMC_MOSCEN ? 1 : 0;
 
 	clk = clk_register(NULL, &clkmain->hw);
-	if (IS_ERR(clk)) {
-		free_irq(clkmain->irq, clkmain);
+	if (IS_ERR(clk))
 		kfree(clkmain);
-	}
 
 	return clk;
 }
 
-void __init of_at91sam9x5_clk_main_setup(struct device_node *np,
-					 struct at91_pmc *pmc)
+static void __init of_at91sam9x5_clk_main_setup(struct device_node *np)
 {
 	struct clk *clk;
 	const char *parent_names[2];
-	int num_parents;
-	unsigned int irq;
+	unsigned int num_parents;
 	const char *name = np->name;
+	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > 2)
+	if (num_parents == 0 || num_parents > 2)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
 
 	of_property_read_string(np, "clock-output-names", &name);
 
-	irq = irq_of_parse_and_map(np, 0);
-	if (!irq)
-		return;
-
-	clk = at91_clk_register_sam9x5_main(pmc, irq, name, parent_names,
+	clk = at91_clk_register_sam9x5_main(regmap, name, parent_names,
 					    num_parents);
 	if (IS_ERR(clk))
 		return;
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+CLK_OF_DECLARE(at91sam9x5_clk_main, "atmel,at91sam9x5-clk-main",
+	       of_at91sam9x5_clk_main_setup);
diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c
index 620ea323356b..d1021e106191 100644
--- a/drivers/clk/at91/clk-master.c
+++ b/drivers/clk/at91/clk-master.c
@@ -12,13 +12,8 @@
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/io.h>
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 
@@ -44,32 +39,26 @@ struct clk_master_layout {
 
 struct clk_master {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
-	unsigned int irq;
-	wait_queue_head_t wait;
+	struct regmap *regmap;
 	const struct clk_master_layout *layout;
 	const struct clk_master_characteristics *characteristics;
 };
 
-static irqreturn_t clk_master_irq_handler(int irq, void *dev_id)
+static inline bool clk_master_ready(struct regmap *regmap)
 {
-	struct clk_master *master = (struct clk_master *)dev_id;
+	unsigned int status;
 
-	wake_up(&master->wait);
-	disable_irq_nosync(master->irq);
+	regmap_read(regmap, AT91_PMC_SR, &status);
 
-	return IRQ_HANDLED;
+	return status & AT91_PMC_MCKRDY ? 1 : 0;
 }
+
 static int clk_master_prepare(struct clk_hw *hw)
 {
 	struct clk_master *master = to_clk_master(hw);
-	struct at91_pmc *pmc = master->pmc;
 
-	while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY)) {
-		enable_irq(master->irq);
-		wait_event(master->wait,
-			   pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY);
-	}
+	while (!clk_master_ready(master->regmap))
+		cpu_relax();
 
 	return 0;
 }
@@ -78,7 +67,7 @@ static int clk_master_is_prepared(struct clk_hw *hw)
 {
 	struct clk_master *master = to_clk_master(hw);
 
-	return !!(pmc_read(master->pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY);
+	return clk_master_ready(master->regmap);
 }
 
 static unsigned long clk_master_recalc_rate(struct clk_hw *hw,
@@ -88,18 +77,16 @@ static unsigned long clk_master_recalc_rate(struct clk_hw *hw,
 	u8 div;
 	unsigned long rate = parent_rate;
 	struct clk_master *master = to_clk_master(hw);
-	struct at91_pmc *pmc = master->pmc;
 	const struct clk_master_layout *layout = master->layout;
 	const struct clk_master_characteristics *characteristics =
 						master->characteristics;
-	u32 tmp;
+	unsigned int mckr;
 
-	pmc_lock(pmc);
-	tmp = pmc_read(pmc, AT91_PMC_MCKR) & layout->mask;
-	pmc_unlock(pmc);
+	regmap_read(master->regmap, AT91_PMC_MCKR, &mckr);
+	mckr &= layout->mask;
 
-	pres = (tmp >> layout->pres_shift) & MASTER_PRES_MASK;
-	div = (tmp >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK;
+	pres = (mckr >> layout->pres_shift) & MASTER_PRES_MASK;
+	div = (mckr >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK;
 
 	if (characteristics->have_div3_pres && pres == MASTER_PRES_MAX)
 		rate /= 3;
@@ -119,9 +106,11 @@ static unsigned long clk_master_recalc_rate(struct clk_hw *hw,
 static u8 clk_master_get_parent(struct clk_hw *hw)
 {
 	struct clk_master *master = to_clk_master(hw);
-	struct at91_pmc *pmc = master->pmc;
+	unsigned int mckr;
 
-	return pmc_read(pmc, AT91_PMC_MCKR) & AT91_PMC_CSS;
+	regmap_read(master->regmap, AT91_PMC_MCKR, &mckr);
+
+	return mckr & AT91_PMC_CSS;
 }
 
 static const struct clk_ops master_ops = {
@@ -132,18 +121,17 @@ static const struct clk_ops master_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_master(struct at91_pmc *pmc, unsigned int irq,
+at91_clk_register_master(struct regmap *regmap,
 		const char *name, int num_parents,
 		const char **parent_names,
 		const struct clk_master_layout *layout,
 		const struct clk_master_characteristics *characteristics)
 {
-	int ret;
 	struct clk_master *master;
 	struct clk *clk = NULL;
 	struct clk_init_data init;
 
-	if (!pmc || !irq || !name || !num_parents || !parent_names)
+	if (!name || !num_parents || !parent_names)
 		return ERR_PTR(-EINVAL);
 
 	master = kzalloc(sizeof(*master), GFP_KERNEL);
@@ -159,20 +147,10 @@ at91_clk_register_master(struct at91_pmc *pmc, unsigned int irq,
 	master->hw.init = &init;
 	master->layout = layout;
 	master->characteristics = characteristics;
-	master->pmc = pmc;
-	master->irq = irq;
-	init_waitqueue_head(&master->wait);
-	irq_set_status_flags(master->irq, IRQ_NOAUTOEN);
-	ret = request_irq(master->irq, clk_master_irq_handler,
-			  IRQF_TRIGGER_HIGH, "clk-master", master);
-	if (ret) {
-		kfree(master);
-		return ERR_PTR(ret);
-	}
+	master->regmap = regmap;
 
 	clk = clk_register(NULL, &master->hw);
 	if (IS_ERR(clk)) {
-		free_irq(master->irq, master);
 		kfree(master);
 	}
 
@@ -217,18 +195,18 @@ out_free_characteristics:
 }
 
 static void __init
-of_at91_clk_master_setup(struct device_node *np, struct at91_pmc *pmc,
+of_at91_clk_master_setup(struct device_node *np,
 			 const struct clk_master_layout *layout)
 {
 	struct clk *clk;
-	int num_parents;
-	unsigned int irq;
+	unsigned int num_parents;
 	const char *parent_names[MASTER_SOURCE_MAX];
 	const char *name = np->name;
 	struct clk_master_characteristics *characteristics;
+	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > MASTER_SOURCE_MAX)
+	if (num_parents == 0 || num_parents > MASTER_SOURCE_MAX)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);
@@ -239,11 +217,11 @@ of_at91_clk_master_setup(struct device_node *np, struct at91_pmc *pmc,
 	if (!characteristics)
 		return;
 
-	irq = irq_of_parse_and_map(np, 0);
-	if (!irq)
-		goto out_free_characteristics;
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
 
-	clk = at91_clk_register_master(pmc, irq, name, num_parents,
+	clk = at91_clk_register_master(regmap, name, num_parents,
 				       parent_names, layout,
 				       characteristics);
 	if (IS_ERR(clk))
@@ -256,14 +234,16 @@ out_free_characteristics:
 	kfree(characteristics);
 }
 
-void __init of_at91rm9200_clk_master_setup(struct device_node *np,
-					   struct at91_pmc *pmc)
+static void __init of_at91rm9200_clk_master_setup(struct device_node *np)
 {
-	of_at91_clk_master_setup(np, pmc, &at91rm9200_master_layout);
+	of_at91_clk_master_setup(np, &at91rm9200_master_layout);
 }
+CLK_OF_DECLARE(at91rm9200_clk_master, "atmel,at91rm9200-clk-master",
+	       of_at91rm9200_clk_master_setup);
 
-void __init of_at91sam9x5_clk_master_setup(struct device_node *np,
-					   struct at91_pmc *pmc)
+static void __init of_at91sam9x5_clk_master_setup(struct device_node *np)
 {
-	of_at91_clk_master_setup(np, pmc, &at91sam9x5_master_layout);
+	of_at91_clk_master_setup(np, &at91sam9x5_master_layout);
 }
+CLK_OF_DECLARE(at91sam9x5_clk_master, "atmel,at91sam9x5-clk-master",
+	       of_at91sam9x5_clk_master_setup);
diff --git a/drivers/clk/at91/clk-peripheral.c b/drivers/clk/at91/clk-peripheral.c
index 58f3b568e9cb..fd160728e990 100644
--- a/drivers/clk/at91/clk-peripheral.c
+++ b/drivers/clk/at91/clk-peripheral.c
@@ -12,11 +12,13 @@
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 
+DEFINE_SPINLOCK(pmc_pcr_lock);
+
 #define PERIPHERAL_MAX		64
 
 #define PERIPHERAL_AT91RM9200	0
@@ -33,7 +35,7 @@
 
 struct clk_peripheral {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
+	struct regmap *regmap;
 	u32 id;
 };
 
@@ -41,8 +43,9 @@ struct clk_peripheral {
 
 struct clk_sam9x5_peripheral {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
+	struct regmap *regmap;
 	struct clk_range range;
+	spinlock_t *lock;
 	u32 id;
 	u32 div;
 	bool auto_div;
@@ -54,7 +57,6 @@ struct clk_sam9x5_peripheral {
 static int clk_peripheral_enable(struct clk_hw *hw)
 {
 	struct clk_peripheral *periph = to_clk_peripheral(hw);
-	struct at91_pmc *pmc = periph->pmc;
 	int offset = AT91_PMC_PCER;
 	u32 id = periph->id;
 
@@ -62,14 +64,14 @@ static int clk_peripheral_enable(struct clk_hw *hw)
 		return 0;
 	if (id > PERIPHERAL_ID_MAX)
 		offset = AT91_PMC_PCER1;
-	pmc_write(pmc, offset, PERIPHERAL_MASK(id));
+	regmap_write(periph->regmap, offset, PERIPHERAL_MASK(id));
+
 	return 0;
 }
 
 static void clk_peripheral_disable(struct clk_hw *hw)
 {
 	struct clk_peripheral *periph = to_clk_peripheral(hw);
-	struct at91_pmc *pmc = periph->pmc;
 	int offset = AT91_PMC_PCDR;
 	u32 id = periph->id;
 
@@ -77,21 +79,23 @@ static void clk_peripheral_disable(struct clk_hw *hw)
 		return;
 	if (id > PERIPHERAL_ID_MAX)
 		offset = AT91_PMC_PCDR1;
-	pmc_write(pmc, offset, PERIPHERAL_MASK(id));
+	regmap_write(periph->regmap, offset, PERIPHERAL_MASK(id));
 }
 
 static int clk_peripheral_is_enabled(struct clk_hw *hw)
 {
 	struct clk_peripheral *periph = to_clk_peripheral(hw);
-	struct at91_pmc *pmc = periph->pmc;
 	int offset = AT91_PMC_PCSR;
+	unsigned int status;
 	u32 id = periph->id;
 
 	if (id < PERIPHERAL_ID_MIN)
 		return 1;
 	if (id > PERIPHERAL_ID_MAX)
 		offset = AT91_PMC_PCSR1;
-	return !!(pmc_read(pmc, offset) & PERIPHERAL_MASK(id));
+	regmap_read(periph->regmap, offset, &status);
+
+	return status & PERIPHERAL_MASK(id) ? 1 : 0;
 }
 
 static const struct clk_ops peripheral_ops = {
@@ -101,14 +105,14 @@ static const struct clk_ops peripheral_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_peripheral(struct at91_pmc *pmc, const char *name,
+at91_clk_register_peripheral(struct regmap *regmap, const char *name,
 			     const char *parent_name, u32 id)
 {
 	struct clk_peripheral *periph;
 	struct clk *clk = NULL;
 	struct clk_init_data init;
 
-	if (!pmc || !name || !parent_name || id > PERIPHERAL_ID_MAX)
+	if (!name || !parent_name || id > PERIPHERAL_ID_MAX)
 		return ERR_PTR(-EINVAL);
 
 	periph = kzalloc(sizeof(*periph), GFP_KERNEL);
@@ -123,7 +127,7 @@ at91_clk_register_peripheral(struct at91_pmc *pmc, const char *name,
 
 	periph->id = id;
 	periph->hw.init = &init;
-	periph->pmc = pmc;
+	periph->regmap = regmap;
 
 	clk = clk_register(NULL, &periph->hw);
 	if (IS_ERR(clk))
@@ -160,53 +164,58 @@ static void clk_sam9x5_peripheral_autodiv(struct clk_sam9x5_peripheral *periph)
 static int clk_sam9x5_peripheral_enable(struct clk_hw *hw)
 {
 	struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
-	struct at91_pmc *pmc = periph->pmc;
-	u32 tmp;
+	unsigned long flags;
 
 	if (periph->id < PERIPHERAL_ID_MIN)
 		return 0;
 
-	pmc_lock(pmc);
-	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
-	tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_DIV_MASK;
-	pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_DIV(periph->div)
-					 | AT91_PMC_PCR_CMD
-					 | AT91_PMC_PCR_EN);
-	pmc_unlock(pmc);
+	spin_lock_irqsave(periph->lock, flags);
+	regmap_write(periph->regmap, AT91_PMC_PCR,
+		     (periph->id & AT91_PMC_PCR_PID_MASK));
+	regmap_update_bits(periph->regmap, AT91_PMC_PCR,
+			   AT91_PMC_PCR_DIV_MASK | AT91_PMC_PCR_CMD |
+			   AT91_PMC_PCR_EN,
+			   AT91_PMC_PCR_DIV(periph->div) |
+			   AT91_PMC_PCR_CMD |
+			   AT91_PMC_PCR_EN);
+	spin_unlock_irqrestore(periph->lock, flags);
+
 	return 0;
 }
 
 static void clk_sam9x5_peripheral_disable(struct clk_hw *hw)
 {
 	struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
-	struct at91_pmc *pmc = periph->pmc;
-	u32 tmp;
+	unsigned long flags;
 
 	if (periph->id < PERIPHERAL_ID_MIN)
 		return;
 
-	pmc_lock(pmc);
-	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
-	tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_EN;
-	pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_CMD);
-	pmc_unlock(pmc);
+	spin_lock_irqsave(periph->lock, flags);
+	regmap_write(periph->regmap, AT91_PMC_PCR,
+		     (periph->id & AT91_PMC_PCR_PID_MASK));
+	regmap_update_bits(periph->regmap, AT91_PMC_PCR,
+			   AT91_PMC_PCR_EN | AT91_PMC_PCR_CMD,
+			   AT91_PMC_PCR_CMD);
+	spin_unlock_irqrestore(periph->lock, flags);
 }
 
 static int clk_sam9x5_peripheral_is_enabled(struct clk_hw *hw)
 {
 	struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
-	struct at91_pmc *pmc = periph->pmc;
-	int ret;
+	unsigned long flags;
+	unsigned int status;
 
 	if (periph->id < PERIPHERAL_ID_MIN)
 		return 1;
 
-	pmc_lock(pmc);
-	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
-	ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_EN);
-	pmc_unlock(pmc);
+	spin_lock_irqsave(periph->lock, flags);
+	regmap_write(periph->regmap, AT91_PMC_PCR,
+		     (periph->id & AT91_PMC_PCR_PID_MASK));
+	regmap_read(periph->regmap, AT91_PMC_PCR, &status);
+	spin_unlock_irqrestore(periph->lock, flags);
 
-	return ret;
+	return status & AT91_PMC_PCR_EN ? 1 : 0;
 }
 
 static unsigned long
@@ -214,19 +223,20 @@ clk_sam9x5_peripheral_recalc_rate(struct clk_hw *hw,
 				  unsigned long parent_rate)
 {
 	struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw);
-	struct at91_pmc *pmc = periph->pmc;
-	u32 tmp;
+	unsigned long flags;
+	unsigned int status;
 
 	if (periph->id < PERIPHERAL_ID_MIN)
 		return parent_rate;
 
-	pmc_lock(pmc);
-	pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK));
-	tmp = pmc_read(pmc, AT91_PMC_PCR);
-	pmc_unlock(pmc);
+	spin_lock_irqsave(periph->lock, flags);
+	regmap_write(periph->regmap, AT91_PMC_PCR,
+		     (periph->id & AT91_PMC_PCR_PID_MASK));
+	regmap_read(periph->regmap, AT91_PMC_PCR, &status);
+	spin_unlock_irqrestore(periph->lock, flags);
 
-	if (tmp & AT91_PMC_PCR_EN) {
-		periph->div = PERIPHERAL_RSHIFT(tmp);
+	if (status & AT91_PMC_PCR_EN) {
+		periph->div = PERIPHERAL_RSHIFT(status);
 		periph->auto_div = false;
 	} else {
 		clk_sam9x5_peripheral_autodiv(periph);
@@ -318,15 +328,15 @@ static const struct clk_ops sam9x5_peripheral_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_sam9x5_peripheral(struct at91_pmc *pmc, const char *name,
-				    const char *parent_name, u32 id,
-				    const struct clk_range *range)
+at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock,
+				    const char *name, const char *parent_name,
+				    u32 id, const struct clk_range *range)
 {
 	struct clk_sam9x5_peripheral *periph;
 	struct clk *clk = NULL;
 	struct clk_init_data init;
 
-	if (!pmc || !name || !parent_name)
+	if (!name || !parent_name)
 		return ERR_PTR(-EINVAL);
 
 	periph = kzalloc(sizeof(*periph), GFP_KERNEL);
@@ -342,7 +352,8 @@ at91_clk_register_sam9x5_peripheral(struct at91_pmc *pmc, const char *name,
 	periph->id = id;
 	periph->hw.init = &init;
 	periph->div = 0;
-	periph->pmc = pmc;
+	periph->regmap = regmap;
+	periph->lock = lock;
 	periph->auto_div = true;
 	periph->range = *range;
 
@@ -356,7 +367,7 @@ at91_clk_register_sam9x5_peripheral(struct at91_pmc *pmc, const char *name,
 }
 
 static void __init
-of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type)
+of_at91_clk_periph_setup(struct device_node *np, u8 type)
 {
 	int num;
 	u32 id;
@@ -364,6 +375,7 @@ of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type)
 	const char *parent_name;
 	const char *name;
 	struct device_node *periphclknp;
+	struct regmap *regmap;
 
 	parent_name = of_clk_get_parent_name(np, 0);
 	if (!parent_name)
@@ -373,6 +385,10 @@ of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type)
 	if (!num || num > PERIPHERAL_MAX)
 		return;
 
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
+
 	for_each_child_of_node(np, periphclknp) {
 		if (of_property_read_u32(periphclknp, "reg", &id))
 			continue;
@@ -384,7 +400,7 @@ of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type)
 			name = periphclknp->name;
 
 		if (type == PERIPHERAL_AT91RM9200) {
-			clk = at91_clk_register_peripheral(pmc, name,
+			clk = at91_clk_register_peripheral(regmap, name,
 							   parent_name, id);
 		} else {
 			struct clk_range range = CLK_RANGE(0, 0);
@@ -393,7 +409,9 @@ of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type)
 					      "atmel,clk-output-range",
 					      &range);
 
-			clk = at91_clk_register_sam9x5_peripheral(pmc, name,
+			clk = at91_clk_register_sam9x5_peripheral(regmap,
+								  &pmc_pcr_lock,
+								  name,
 								  parent_name,
 								  id, &range);
 		}
@@ -405,14 +423,17 @@ of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type)
 	}
 }
 
-void __init of_at91rm9200_clk_periph_setup(struct device_node *np,
-					   struct at91_pmc *pmc)
+static void __init of_at91rm9200_clk_periph_setup(struct device_node *np)
 {
-	of_at91_clk_periph_setup(np, pmc, PERIPHERAL_AT91RM9200);
+	of_at91_clk_periph_setup(np, PERIPHERAL_AT91RM9200);
 }
+CLK_OF_DECLARE(at91rm9200_clk_periph, "atmel,at91rm9200-clk-peripheral",
+	       of_at91rm9200_clk_periph_setup);
 
-void __init of_at91sam9x5_clk_periph_setup(struct device_node *np,
-					   struct at91_pmc *pmc)
+static void __init of_at91sam9x5_clk_periph_setup(struct device_node *np)
 {
-	of_at91_clk_periph_setup(np, pmc, PERIPHERAL_AT91SAM9X5);
+	of_at91_clk_periph_setup(np, PERIPHERAL_AT91SAM9X5);
 }
+CLK_OF_DECLARE(at91sam9x5_clk_periph, "atmel,at91sam9x5-clk-peripheral",
+	       of_at91sam9x5_clk_periph_setup);
+
diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c
index 18b60f4895a6..fb2e0b56d4b7 100644
--- a/drivers/clk/at91/clk-pll.c
+++ b/drivers/clk/at91/clk-pll.c
@@ -12,14 +12,8 @@
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 
@@ -58,9 +52,7 @@ struct clk_pll_layout {
 
 struct clk_pll {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
-	unsigned int irq;
-	wait_queue_head_t wait;
+	struct regmap *regmap;
 	u8 id;
 	u8 div;
 	u8 range;
@@ -69,20 +61,19 @@ struct clk_pll {
 	const struct clk_pll_characteristics *characteristics;
 };
 
-static irqreturn_t clk_pll_irq_handler(int irq, void *dev_id)
+static inline bool clk_pll_ready(struct regmap *regmap, int id)
 {
-	struct clk_pll *pll = (struct clk_pll *)dev_id;
+	unsigned int status;
 
-	wake_up(&pll->wait);
-	disable_irq_nosync(pll->irq);
+	regmap_read(regmap, AT91_PMC_SR, &status);
 
-	return IRQ_HANDLED;
+	return status & PLL_STATUS_MASK(id) ? 1 : 0;
 }
 
 static int clk_pll_prepare(struct clk_hw *hw)
 {
 	struct clk_pll *pll = to_clk_pll(hw);
-	struct at91_pmc *pmc = pll->pmc;
+	struct regmap *regmap = pll->regmap;
 	const struct clk_pll_layout *layout = pll->layout;
 	const struct clk_pll_characteristics *characteristics =
 							pll->characteristics;
@@ -90,39 +81,34 @@ static int clk_pll_prepare(struct clk_hw *hw)
 	u32 mask = PLL_STATUS_MASK(id);
 	int offset = PLL_REG(id);
 	u8 out = 0;
-	u32 pllr, icpr;
+	unsigned int pllr;
+	unsigned int status;
 	u8 div;
 	u16 mul;
 
-	pllr = pmc_read(pmc, offset);
+	regmap_read(regmap, offset, &pllr);
 	div = PLL_DIV(pllr);
 	mul = PLL_MUL(pllr, layout);
 
-	if ((pmc_read(pmc, AT91_PMC_SR) & mask) &&
+	regmap_read(regmap, AT91_PMC_SR, &status);
+	if ((status & mask) &&
 	    (div == pll->div && mul == pll->mul))
 		return 0;
 
 	if (characteristics->out)
 		out = characteristics->out[pll->range];
-	if (characteristics->icpll) {
-		icpr = pmc_read(pmc, AT91_PMC_PLLICPR) & ~PLL_ICPR_MASK(id);
-		icpr |= (characteristics->icpll[pll->range] <<
-			PLL_ICPR_SHIFT(id));
-		pmc_write(pmc, AT91_PMC_PLLICPR, icpr);
-	}
 
-	pllr &= ~layout->pllr_mask;
-	pllr |= layout->pllr_mask &
-	       (pll->div | (PLL_MAX_COUNT << PLL_COUNT_SHIFT) |
-		(out << PLL_OUT_SHIFT) |
-		((pll->mul & layout->mul_mask) << layout->mul_shift));
-	pmc_write(pmc, offset, pllr);
-
-	while (!(pmc_read(pmc, AT91_PMC_SR) & mask)) {
-		enable_irq(pll->irq);
-		wait_event(pll->wait,
-			   pmc_read(pmc, AT91_PMC_SR) & mask);
-	}
+	if (characteristics->icpll)
+		regmap_update_bits(regmap, AT91_PMC_PLLICPR, PLL_ICPR_MASK(id),
+			characteristics->icpll[pll->range] << PLL_ICPR_SHIFT(id));
+
+	regmap_update_bits(regmap, offset, layout->pllr_mask,
+			pll->div | (PLL_MAX_COUNT << PLL_COUNT_SHIFT) |
+			(out << PLL_OUT_SHIFT) |
+			((pll->mul & layout->mul_mask) << layout->mul_shift));
+
+	while (!clk_pll_ready(regmap, pll->id))
+		cpu_relax();
 
 	return 0;
 }
@@ -130,32 +116,35 @@ static int clk_pll_prepare(struct clk_hw *hw)
 static int clk_pll_is_prepared(struct clk_hw *hw)
 {
 	struct clk_pll *pll = to_clk_pll(hw);
-	struct at91_pmc *pmc = pll->pmc;
 
-	return !!(pmc_read(pmc, AT91_PMC_SR) &
-		  PLL_STATUS_MASK(pll->id));
+	return clk_pll_ready(pll->regmap, pll->id);
 }
 
 static void clk_pll_unprepare(struct clk_hw *hw)
 {
 	struct clk_pll *pll = to_clk_pll(hw);
-	struct at91_pmc *pmc = pll->pmc;
-	const struct clk_pll_layout *layout = pll->layout;
-	int offset = PLL_REG(pll->id);
-	u32 tmp = pmc_read(pmc, offset) & ~(layout->pllr_mask);
+	unsigned int mask = pll->layout->pllr_mask;
 
-	pmc_write(pmc, offset, tmp);
+	regmap_update_bits(pll->regmap, PLL_REG(pll->id), mask, ~mask);
 }
 
 static unsigned long clk_pll_recalc_rate(struct clk_hw *hw,
 					 unsigned long parent_rate)
 {
 	struct clk_pll *pll = to_clk_pll(hw);
+	unsigned int pllr;
+	u16 mul;
+	u8 div;
 
-	if (!pll->div || !pll->mul)
+	regmap_read(pll->regmap, PLL_REG(pll->id), &pllr);
+
+	div = PLL_DIV(pllr);
+	mul = PLL_MUL(pllr, pll->layout);
+
+	if (!div || !mul)
 		return 0;
 
-	return (parent_rate / pll->div) * (pll->mul + 1);
+	return (parent_rate / div) * (mul + 1);
 }
 
 static long clk_pll_get_best_div_mul(struct clk_pll *pll, unsigned long rate,
@@ -308,7 +297,7 @@ static const struct clk_ops pll_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name,
+at91_clk_register_pll(struct regmap *regmap, const char *name,
 		      const char *parent_name, u8 id,
 		      const struct clk_pll_layout *layout,
 		      const struct clk_pll_characteristics *characteristics)
@@ -316,9 +305,8 @@ at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name,
 	struct clk_pll *pll;
 	struct clk *clk = NULL;
 	struct clk_init_data init;
-	int ret;
 	int offset = PLL_REG(id);
-	u32 tmp;
+	unsigned int pllr;
 
 	if (id > PLL_MAX_ID)
 		return ERR_PTR(-EINVAL);
@@ -337,23 +325,13 @@ at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name,
 	pll->hw.init = &init;
 	pll->layout = layout;
 	pll->characteristics = characteristics;
-	pll->pmc = pmc;
-	pll->irq = irq;
-	tmp = pmc_read(pmc, offset) & layout->pllr_mask;
-	pll->div = PLL_DIV(tmp);
-	pll->mul = PLL_MUL(tmp, layout);
-	init_waitqueue_head(&pll->wait);
-	irq_set_status_flags(pll->irq, IRQ_NOAUTOEN);
-	ret = request_irq(pll->irq, clk_pll_irq_handler, IRQF_TRIGGER_HIGH,
-			  id ? "clk-pllb" : "clk-plla", pll);
-	if (ret) {
-		kfree(pll);
-		return ERR_PTR(ret);
-	}
+	pll->regmap = regmap;
+	regmap_read(regmap, offset, &pllr);
+	pll->div = PLL_DIV(pllr);
+	pll->mul = PLL_MUL(pllr, layout);
 
 	clk = clk_register(NULL, &pll->hw);
 	if (IS_ERR(clk)) {
-		free_irq(pll->irq, pll);
 		kfree(pll);
 	}
 
@@ -483,12 +461,12 @@ out_free_characteristics:
 }
 
 static void __init
-of_at91_clk_pll_setup(struct device_node *np, struct at91_pmc *pmc,
+of_at91_clk_pll_setup(struct device_node *np,
 		      const struct clk_pll_layout *layout)
 {
 	u32 id;
-	unsigned int irq;
 	struct clk *clk;
+	struct regmap *regmap;
 	const char *parent_name;
 	const char *name = np->name;
 	struct clk_pll_characteristics *characteristics;
@@ -500,15 +478,15 @@ of_at91_clk_pll_setup(struct device_node *np, struct at91_pmc *pmc,
 
 	of_property_read_string(np, "clock-output-names", &name);
 
-	characteristics = of_at91_clk_pll_get_characteristics(np);
-	if (!characteristics)
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
 		return;
 
-	irq = irq_of_parse_and_map(np, 0);
-	if (!irq)
+	characteristics = of_at91_clk_pll_get_characteristics(np);
+	if (!characteristics)
 		return;
 
-	clk = at91_clk_register_pll(pmc, irq, name, parent_name, id, layout,
+	clk = at91_clk_register_pll(regmap, name, parent_name, id, layout,
 				    characteristics);
 	if (IS_ERR(clk))
 		goto out_free_characteristics;
@@ -520,26 +498,30 @@ out_free_characteristics:
 	kfree(characteristics);
 }
 
-void __init of_at91rm9200_clk_pll_setup(struct device_node *np,
-					       struct at91_pmc *pmc)
+static void __init of_at91rm9200_clk_pll_setup(struct device_node *np)
 {
-	of_at91_clk_pll_setup(np, pmc, &at91rm9200_pll_layout);
+	of_at91_clk_pll_setup(np, &at91rm9200_pll_layout);
 }
+CLK_OF_DECLARE(at91rm9200_clk_pll, "atmel,at91rm9200-clk-pll",
+	       of_at91rm9200_clk_pll_setup);
 
-void __init of_at91sam9g45_clk_pll_setup(struct device_node *np,
-						struct at91_pmc *pmc)
+static void __init of_at91sam9g45_clk_pll_setup(struct device_node *np)
 {
-	of_at91_clk_pll_setup(np, pmc, &at91sam9g45_pll_layout);
+	of_at91_clk_pll_setup(np, &at91sam9g45_pll_layout);
 }
+CLK_OF_DECLARE(at91sam9g45_clk_pll, "atmel,at91sam9g45-clk-pll",
+	       of_at91sam9g45_clk_pll_setup);
 
-void __init of_at91sam9g20_clk_pllb_setup(struct device_node *np,
-						 struct at91_pmc *pmc)
+static void __init of_at91sam9g20_clk_pllb_setup(struct device_node *np)
 {
-	of_at91_clk_pll_setup(np, pmc, &at91sam9g20_pllb_layout);
+	of_at91_clk_pll_setup(np, &at91sam9g20_pllb_layout);
 }
+CLK_OF_DECLARE(at91sam9g20_clk_pllb, "atmel,at91sam9g20-clk-pllb",
+	       of_at91sam9g20_clk_pllb_setup);
 
-void __init of_sama5d3_clk_pll_setup(struct device_node *np,
-					    struct at91_pmc *pmc)
+static void __init of_sama5d3_clk_pll_setup(struct device_node *np)
 {
-	of_at91_clk_pll_setup(np, pmc, &sama5d3_pll_layout);
+	of_at91_clk_pll_setup(np, &sama5d3_pll_layout);
 }
+CLK_OF_DECLARE(sama5d3_clk_pll, "atmel,sama5d3-clk-pll",
+	       of_sama5d3_clk_pll_setup);
diff --git a/drivers/clk/at91/clk-plldiv.c b/drivers/clk/at91/clk-plldiv.c
index ea226562bb40..2bed26481027 100644
--- a/drivers/clk/at91/clk-plldiv.c
+++ b/drivers/clk/at91/clk-plldiv.c
@@ -12,8 +12,8 @@
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 
@@ -21,16 +21,18 @@
 
 struct clk_plldiv {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
+	struct regmap *regmap;
 };
 
 static unsigned long clk_plldiv_recalc_rate(struct clk_hw *hw,
 					    unsigned long parent_rate)
 {
 	struct clk_plldiv *plldiv = to_clk_plldiv(hw);
-	struct at91_pmc *pmc = plldiv->pmc;
+	unsigned int mckr;
 
-	if (pmc_read(pmc, AT91_PMC_MCKR) & AT91_PMC_PLLADIV2)
+	regmap_read(plldiv->regmap, AT91_PMC_MCKR, &mckr);
+
+	if (mckr & AT91_PMC_PLLADIV2)
 		return parent_rate / 2;
 
 	return parent_rate;
@@ -57,18 +59,12 @@ static int clk_plldiv_set_rate(struct clk_hw *hw, unsigned long rate,
 			       unsigned long parent_rate)
 {
 	struct clk_plldiv *plldiv = to_clk_plldiv(hw);
-	struct at91_pmc *pmc = plldiv->pmc;
-	u32 tmp;
 
-	if (parent_rate != rate && (parent_rate / 2) != rate)
+	if ((parent_rate != rate) && (parent_rate / 2 != rate))
 		return -EINVAL;
 
-	pmc_lock(pmc);
-	tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_PLLADIV2;
-	if ((parent_rate / 2) == rate)
-		tmp |= AT91_PMC_PLLADIV2;
-	pmc_write(pmc, AT91_PMC_MCKR, tmp);
-	pmc_unlock(pmc);
+	regmap_update_bits(plldiv->regmap, AT91_PMC_MCKR, AT91_PMC_PLLADIV2,
+			   parent_rate != rate ? AT91_PMC_PLLADIV2 : 0);
 
 	return 0;
 }
@@ -80,7 +76,7 @@ static const struct clk_ops plldiv_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_plldiv(struct at91_pmc *pmc, const char *name,
+at91_clk_register_plldiv(struct regmap *regmap, const char *name,
 			 const char *parent_name)
 {
 	struct clk_plldiv *plldiv;
@@ -98,7 +94,7 @@ at91_clk_register_plldiv(struct at91_pmc *pmc, const char *name,
 	init.flags = CLK_SET_RATE_GATE;
 
 	plldiv->hw.init = &init;
-	plldiv->pmc = pmc;
+	plldiv->regmap = regmap;
 
 	clk = clk_register(NULL, &plldiv->hw);
 
@@ -109,27 +105,27 @@ at91_clk_register_plldiv(struct at91_pmc *pmc, const char *name,
 }
 
 static void __init
-of_at91_clk_plldiv_setup(struct device_node *np, struct at91_pmc *pmc)
+of_at91sam9x5_clk_plldiv_setup(struct device_node *np)
 {
 	struct clk *clk;
 	const char *parent_name;
 	const char *name = np->name;
+	struct regmap *regmap;
 
 	parent_name = of_clk_get_parent_name(np, 0);
 
 	of_property_read_string(np, "clock-output-names", &name);
 
-	clk = at91_clk_register_plldiv(pmc, name, parent_name);
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
 
+	clk = at91_clk_register_plldiv(regmap, name, parent_name);
 	if (IS_ERR(clk))
 		return;
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 	return;
 }
-
-void __init of_at91sam9x5_clk_plldiv_setup(struct device_node *np,
-					   struct at91_pmc *pmc)
-{
-	of_at91_clk_plldiv_setup(np, pmc);
-}
+CLK_OF_DECLARE(at91sam9x5_clk_plldiv, "atmel,at91sam9x5-clk-plldiv",
+	       of_at91sam9x5_clk_plldiv_setup);
diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c
index 14b270b85fec..10f846cc8db1 100644
--- a/drivers/clk/at91/clk-programmable.c
+++ b/drivers/clk/at91/clk-programmable.c
@@ -12,10 +12,8 @@
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/io.h>
-#include <linux/wait.h>
-#include <linux/sched.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 
@@ -24,6 +22,7 @@
 
 #define PROG_STATUS_MASK(id)	(1 << ((id) + 8))
 #define PROG_PRES_MASK		0x7
+#define PROG_PRES(layout, pckr)	((pckr >> layout->pres_shift) & PROG_PRES_MASK)
 #define PROG_MAX_RM9200_CSS	3
 
 struct clk_programmable_layout {
@@ -34,7 +33,7 @@ struct clk_programmable_layout {
 
 struct clk_programmable {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
+	struct regmap *regmap;
 	u8 id;
 	const struct clk_programmable_layout *layout;
 };
@@ -44,14 +43,12 @@ struct clk_programmable {
 static unsigned long clk_programmable_recalc_rate(struct clk_hw *hw,
 						  unsigned long parent_rate)
 {
-	u32 pres;
 	struct clk_programmable *prog = to_clk_programmable(hw);
-	struct at91_pmc *pmc = prog->pmc;
-	const struct clk_programmable_layout *layout = prog->layout;
+	unsigned int pckr;
+
+	regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
 
-	pres = (pmc_read(pmc, AT91_PMC_PCKR(prog->id)) >> layout->pres_shift) &
-	       PROG_PRES_MASK;
-	return parent_rate >> pres;
+	return parent_rate >> PROG_PRES(prog->layout, pckr);
 }
 
 static int clk_programmable_determine_rate(struct clk_hw *hw,
@@ -101,36 +98,36 @@ static int clk_programmable_set_parent(struct clk_hw *hw, u8 index)
 {
 	struct clk_programmable *prog = to_clk_programmable(hw);
 	const struct clk_programmable_layout *layout = prog->layout;
-	struct at91_pmc *pmc = prog->pmc;
-	u32 tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id)) & ~layout->css_mask;
+	unsigned int mask = layout->css_mask;
+	unsigned int pckr = 0;
 
 	if (layout->have_slck_mck)
-		tmp &= AT91_PMC_CSSMCK_MCK;
+		mask |= AT91_PMC_CSSMCK_MCK;
 
 	if (index > layout->css_mask) {
-		if (index > PROG_MAX_RM9200_CSS && layout->have_slck_mck) {
-			tmp |= AT91_PMC_CSSMCK_MCK;
-			return 0;
-		} else {
+		if (index > PROG_MAX_RM9200_CSS && !layout->have_slck_mck)
 			return -EINVAL;
-		}
+
+		pckr |= AT91_PMC_CSSMCK_MCK;
 	}
 
-	pmc_write(pmc, AT91_PMC_PCKR(prog->id), tmp | index);
+	regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id), mask, pckr);
+
 	return 0;
 }
 
 static u8 clk_programmable_get_parent(struct clk_hw *hw)
 {
-	u32 tmp;
-	u8 ret;
 	struct clk_programmable *prog = to_clk_programmable(hw);
-	struct at91_pmc *pmc = prog->pmc;
 	const struct clk_programmable_layout *layout = prog->layout;
+	unsigned int pckr;
+	u8 ret;
+
+	regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
+
+	ret = pckr & layout->css_mask;
 
-	tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id));
-	ret = tmp & layout->css_mask;
-	if (layout->have_slck_mck && (tmp & AT91_PMC_CSSMCK_MCK) && !ret)
+	if (layout->have_slck_mck && (pckr & AT91_PMC_CSSMCK_MCK) && !ret)
 		ret = PROG_MAX_RM9200_CSS + 1;
 
 	return ret;
@@ -140,26 +137,27 @@ static int clk_programmable_set_rate(struct clk_hw *hw, unsigned long rate,
 				     unsigned long parent_rate)
 {
 	struct clk_programmable *prog = to_clk_programmable(hw);
-	struct at91_pmc *pmc = prog->pmc;
 	const struct clk_programmable_layout *layout = prog->layout;
 	unsigned long div = parent_rate / rate;
+	unsigned int pckr;
 	int shift = 0;
-	u32 tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id)) &
-		  ~(PROG_PRES_MASK << layout->pres_shift);
+
+	regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr);
 
 	if (!div)
 		return -EINVAL;
 
 	shift = fls(div) - 1;
 
-	if (div != (1<<shift))
+	if (div != (1 << shift))
 		return -EINVAL;
 
 	if (shift >= PROG_PRES_MASK)
 		return -EINVAL;
 
-	pmc_write(pmc, AT91_PMC_PCKR(prog->id),
-		  tmp | (shift << layout->pres_shift));
+	regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id),
+			   PROG_PRES_MASK << layout->pres_shift,
+			   shift << layout->pres_shift);
 
 	return 0;
 }
@@ -173,7 +171,7 @@ static const struct clk_ops programmable_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_programmable(struct at91_pmc *pmc,
+at91_clk_register_programmable(struct regmap *regmap,
 			       const char *name, const char **parent_names,
 			       u8 num_parents, u8 id,
 			       const struct clk_programmable_layout *layout)
@@ -198,7 +196,7 @@ at91_clk_register_programmable(struct at91_pmc *pmc,
 	prog->id = id;
 	prog->layout = layout;
 	prog->hw.init = &init;
-	prog->pmc = pmc;
+	prog->regmap = regmap;
 
 	clk = clk_register(NULL, &prog->hw);
 	if (IS_ERR(clk))
@@ -226,19 +224,20 @@ static const struct clk_programmable_layout at91sam9x5_programmable_layout = {
 };
 
 static void __init
-of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc,
+of_at91_clk_prog_setup(struct device_node *np,
 		       const struct clk_programmable_layout *layout)
 {
 	int num;
 	u32 id;
 	struct clk *clk;
-	int num_parents;
+	unsigned int num_parents;
 	const char *parent_names[PROG_SOURCE_MAX];
 	const char *name;
 	struct device_node *progclknp;
+	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > PROG_SOURCE_MAX)
+	if (num_parents == 0 || num_parents > PROG_SOURCE_MAX)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);
@@ -247,6 +246,10 @@ of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc,
 	if (!num || num > (PROG_ID_MAX + 1))
 		return;
 
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
+
 	for_each_child_of_node(np, progclknp) {
 		if (of_property_read_u32(progclknp, "reg", &id))
 			continue;
@@ -254,7 +257,7 @@ of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc,
 		if (of_property_read_string(np, "clock-output-names", &name))
 			name = progclknp->name;
 
-		clk = at91_clk_register_programmable(pmc, name,
+		clk = at91_clk_register_programmable(regmap, name,
 						     parent_names, num_parents,
 						     id, layout);
 		if (IS_ERR(clk))
@@ -265,20 +268,23 @@ of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc,
 }
 
 
-void __init of_at91rm9200_clk_prog_setup(struct device_node *np,
-					 struct at91_pmc *pmc)
+static void __init of_at91rm9200_clk_prog_setup(struct device_node *np)
 {
-	of_at91_clk_prog_setup(np, pmc, &at91rm9200_programmable_layout);
+	of_at91_clk_prog_setup(np, &at91rm9200_programmable_layout);
 }
+CLK_OF_DECLARE(at91rm9200_clk_prog, "atmel,at91rm9200-clk-programmable",
+	       of_at91rm9200_clk_prog_setup);
 
-void __init of_at91sam9g45_clk_prog_setup(struct device_node *np,
-					  struct at91_pmc *pmc)
+static void __init of_at91sam9g45_clk_prog_setup(struct device_node *np)
 {
-	of_at91_clk_prog_setup(np, pmc, &at91sam9g45_programmable_layout);
+	of_at91_clk_prog_setup(np, &at91sam9g45_programmable_layout);
 }
+CLK_OF_DECLARE(at91sam9g45_clk_prog, "atmel,at91sam9g45-clk-programmable",
+	       of_at91sam9g45_clk_prog_setup);
 
-void __init of_at91sam9x5_clk_prog_setup(struct device_node *np,
-					 struct at91_pmc *pmc)
+static void __init of_at91sam9x5_clk_prog_setup(struct device_node *np)
 {
-	of_at91_clk_prog_setup(np, pmc, &at91sam9x5_programmable_layout);
+	of_at91_clk_prog_setup(np, &at91sam9x5_programmable_layout);
 }
+CLK_OF_DECLARE(at91sam9x5_clk_prog, "atmel,at91sam9x5-clk-programmable",
+	       of_at91sam9x5_clk_prog_setup);
diff --git a/drivers/clk/at91/clk-slow.c b/drivers/clk/at91/clk-slow.c
index 6f99a530ead6..61090b1146cf 100644
--- a/drivers/clk/at91/clk-slow.c
+++ b/drivers/clk/at91/clk-slow.c
@@ -12,17 +12,11 @@
 
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
-#include <linux/slab.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/delay.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/io.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 #include "sckc.h"
@@ -58,7 +52,7 @@ struct clk_slow_rc_osc {
 
 struct clk_sam9260_slow {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
+	struct regmap *regmap;
 };
 
 #define to_clk_sam9260_slow(hw) container_of(hw, struct clk_sam9260_slow, hw)
@@ -251,7 +245,7 @@ at91_clk_register_slow_rc_osc(void __iomem *sckcr,
 	init.ops = &slow_rc_osc_ops;
 	init.parent_names = NULL;
 	init.num_parents = 0;
-	init.flags = CLK_IS_ROOT | CLK_IGNORE_UNUSED;
+	init.flags = CLK_IGNORE_UNUSED;
 
 	osc->hw.init = &init;
 	osc->sckcr = sckcr;
@@ -366,11 +360,11 @@ void __init of_at91sam9x5_clk_slow_setup(struct device_node *np,
 {
 	struct clk *clk;
 	const char *parent_names[2];
-	int num_parents;
+	unsigned int num_parents;
 	const char *name = np->name;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > 2)
+	if (num_parents == 0 || num_parents > 2)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);
@@ -388,8 +382,11 @@ void __init of_at91sam9x5_clk_slow_setup(struct device_node *np,
 static u8 clk_sam9260_slow_get_parent(struct clk_hw *hw)
 {
 	struct clk_sam9260_slow *slowck = to_clk_sam9260_slow(hw);
+	unsigned int status;
+
+	regmap_read(slowck->regmap, AT91_PMC_SR, &status);
 
-	return !!(pmc_read(slowck->pmc, AT91_PMC_SR) & AT91_PMC_OSCSEL);
+	return status & AT91_PMC_OSCSEL ? 1 : 0;
 }
 
 static const struct clk_ops sam9260_slow_ops = {
@@ -397,7 +394,7 @@ static const struct clk_ops sam9260_slow_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_sam9260_slow(struct at91_pmc *pmc,
+at91_clk_register_sam9260_slow(struct regmap *regmap,
 			       const char *name,
 			       const char **parent_names,
 			       int num_parents)
@@ -406,7 +403,7 @@ at91_clk_register_sam9260_slow(struct at91_pmc *pmc,
 	struct clk *clk = NULL;
 	struct clk_init_data init;
 
-	if (!pmc || !name)
+	if (!name)
 		return ERR_PTR(-EINVAL);
 
 	if (!parent_names || !num_parents)
@@ -423,7 +420,7 @@ at91_clk_register_sam9260_slow(struct at91_pmc *pmc,
 	init.flags = 0;
 
 	slowck->hw.init = &init;
-	slowck->pmc = pmc;
+	slowck->regmap = regmap;
 
 	clk = clk_register(NULL, &slowck->hw);
 	if (IS_ERR(clk))
@@ -432,26 +429,32 @@ at91_clk_register_sam9260_slow(struct at91_pmc *pmc,
 	return clk;
 }
 
-void __init of_at91sam9260_clk_slow_setup(struct device_node *np,
-					  struct at91_pmc *pmc)
+static void __init of_at91sam9260_clk_slow_setup(struct device_node *np)
 {
 	struct clk *clk;
 	const char *parent_names[2];
-	int num_parents;
+	unsigned int num_parents;
 	const char *name = np->name;
+	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
 	if (num_parents != 2)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
 
 	of_property_read_string(np, "clock-output-names", &name);
 
-	clk = at91_clk_register_sam9260_slow(pmc, name, parent_names,
+	clk = at91_clk_register_sam9260_slow(regmap, name, parent_names,
 					     num_parents);
 	if (IS_ERR(clk))
 		return;
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+
+CLK_OF_DECLARE(at91sam9260_clk_slow, "atmel,at91sam9260-clk-slow",
+	       of_at91sam9260_clk_slow_setup);
diff --git a/drivers/clk/at91/clk-smd.c b/drivers/clk/at91/clk-smd.c
index a7f8501cfa05..3c04b069d5b8 100644
--- a/drivers/clk/at91/clk-smd.c
+++ b/drivers/clk/at91/clk-smd.c
@@ -12,8 +12,8 @@
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 
@@ -24,7 +24,7 @@
 
 struct at91sam9x5_clk_smd {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
+	struct regmap *regmap;
 };
 
 #define to_at91sam9x5_clk_smd(hw) \
@@ -33,13 +33,13 @@ struct at91sam9x5_clk_smd {
 static unsigned long at91sam9x5_clk_smd_recalc_rate(struct clk_hw *hw,
 						    unsigned long parent_rate)
 {
-	u32 tmp;
-	u8 smddiv;
 	struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
-	struct at91_pmc *pmc = smd->pmc;
+	unsigned int smdr;
+	u8 smddiv;
+
+	regmap_read(smd->regmap, AT91_PMC_SMD, &smdr);
+	smddiv = (smdr & AT91_PMC_SMD_DIV) >> SMD_DIV_SHIFT;
 
-	tmp = pmc_read(pmc, AT91_PMC_SMD);
-	smddiv = (tmp & AT91_PMC_SMD_DIV) >> SMD_DIV_SHIFT;
 	return parent_rate / (smddiv + 1);
 }
 
@@ -67,40 +67,38 @@ static long at91sam9x5_clk_smd_round_rate(struct clk_hw *hw, unsigned long rate,
 
 static int at91sam9x5_clk_smd_set_parent(struct clk_hw *hw, u8 index)
 {
-	u32 tmp;
 	struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
-	struct at91_pmc *pmc = smd->pmc;
 
 	if (index > 1)
 		return -EINVAL;
-	tmp = pmc_read(pmc, AT91_PMC_SMD) & ~AT91_PMC_SMDS;
-	if (index)
-		tmp |= AT91_PMC_SMDS;
-	pmc_write(pmc, AT91_PMC_SMD, tmp);
+
+	regmap_update_bits(smd->regmap, AT91_PMC_SMD, AT91_PMC_SMDS,
+			   index ? AT91_PMC_SMDS : 0);
+
 	return 0;
 }
 
 static u8 at91sam9x5_clk_smd_get_parent(struct clk_hw *hw)
 {
 	struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
-	struct at91_pmc *pmc = smd->pmc;
+	unsigned int smdr;
 
-	return pmc_read(pmc, AT91_PMC_SMD) & AT91_PMC_SMDS;
+	regmap_read(smd->regmap, AT91_PMC_SMD, &smdr);
+
+	return smdr & AT91_PMC_SMDS;
 }
 
 static int at91sam9x5_clk_smd_set_rate(struct clk_hw *hw, unsigned long rate,
 				       unsigned long parent_rate)
 {
-	u32 tmp;
 	struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw);
-	struct at91_pmc *pmc = smd->pmc;
 	unsigned long div = parent_rate / rate;
 
 	if (parent_rate % rate || div < 1 || div > (SMD_MAX_DIV + 1))
 		return -EINVAL;
-	tmp = pmc_read(pmc, AT91_PMC_SMD) & ~AT91_PMC_SMD_DIV;
-	tmp |= (div - 1) << SMD_DIV_SHIFT;
-	pmc_write(pmc, AT91_PMC_SMD, tmp);
+
+	regmap_update_bits(smd->regmap, AT91_PMC_SMD, AT91_PMC_SMD_DIV,
+			   (div - 1) << SMD_DIV_SHIFT);
 
 	return 0;
 }
@@ -114,7 +112,7 @@ static const struct clk_ops at91sam9x5_smd_ops = {
 };
 
 static struct clk * __init
-at91sam9x5_clk_register_smd(struct at91_pmc *pmc, const char *name,
+at91sam9x5_clk_register_smd(struct regmap *regmap, const char *name,
 			    const char **parent_names, u8 num_parents)
 {
 	struct at91sam9x5_clk_smd *smd;
@@ -132,7 +130,7 @@ at91sam9x5_clk_register_smd(struct at91_pmc *pmc, const char *name,
 	init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE;
 
 	smd->hw.init = &init;
-	smd->pmc = pmc;
+	smd->regmap = regmap;
 
 	clk = clk_register(NULL, &smd->hw);
 	if (IS_ERR(clk))
@@ -141,26 +139,32 @@ at91sam9x5_clk_register_smd(struct at91_pmc *pmc, const char *name,
 	return clk;
 }
 
-void __init of_at91sam9x5_clk_smd_setup(struct device_node *np,
-					struct at91_pmc *pmc)
+static void __init of_at91sam9x5_clk_smd_setup(struct device_node *np)
 {
 	struct clk *clk;
-	int num_parents;
+	unsigned int num_parents;
 	const char *parent_names[SMD_SOURCE_MAX];
 	const char *name = np->name;
+	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > SMD_SOURCE_MAX)
+	if (num_parents == 0 || num_parents > SMD_SOURCE_MAX)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);
 
 	of_property_read_string(np, "clock-output-names", &name);
 
-	clk = at91sam9x5_clk_register_smd(pmc, name, parent_names,
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
+
+	clk = at91sam9x5_clk_register_smd(regmap, name, parent_names,
 					  num_parents);
 	if (IS_ERR(clk))
 		return;
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+CLK_OF_DECLARE(at91sam9x5_clk_smd, "atmel,at91sam9x5-clk-smd",
+	       of_at91sam9x5_clk_smd_setup);
diff --git a/drivers/clk/at91/clk-system.c b/drivers/clk/at91/clk-system.c
index 3f5314344286..8f35d8172909 100644
--- a/drivers/clk/at91/clk-system.c
+++ b/drivers/clk/at91/clk-system.c
@@ -12,13 +12,8 @@
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/of_irq.h>
-#include <linux/interrupt.h>
-#include <linux/wait.h>
-#include <linux/sched.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 
@@ -29,9 +24,7 @@
 #define to_clk_system(hw) container_of(hw, struct clk_system, hw)
 struct clk_system {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
-	unsigned int irq;
-	wait_queue_head_t wait;
+	struct regmap *regmap;
 	u8 id;
 };
 
@@ -39,58 +32,54 @@ static inline int is_pck(int id)
 {
 	return (id >= 8) && (id <= 15);
 }
-static irqreturn_t clk_system_irq_handler(int irq, void *dev_id)
+
+static inline bool clk_system_ready(struct regmap *regmap, int id)
 {
-	struct clk_system *sys = (struct clk_system *)dev_id;
+	unsigned int status;
 
-	wake_up(&sys->wait);
-	disable_irq_nosync(sys->irq);
+	regmap_read(regmap, AT91_PMC_SR, &status);
 
-	return IRQ_HANDLED;
+	return status & (1 << id) ? 1 : 0;
 }
 
 static int clk_system_prepare(struct clk_hw *hw)
 {
 	struct clk_system *sys = to_clk_system(hw);
-	struct at91_pmc *pmc = sys->pmc;
-	u32 mask = 1 << sys->id;
 
-	pmc_write(pmc, AT91_PMC_SCER, mask);
+	regmap_write(sys->regmap, AT91_PMC_SCER, 1 << sys->id);
 
 	if (!is_pck(sys->id))
 		return 0;
 
-	while (!(pmc_read(pmc, AT91_PMC_SR) & mask)) {
-		if (sys->irq) {
-			enable_irq(sys->irq);
-			wait_event(sys->wait,
-				   pmc_read(pmc, AT91_PMC_SR) & mask);
-		} else
-			cpu_relax();
-	}
+	while (!clk_system_ready(sys->regmap, sys->id))
+		cpu_relax();
+
 	return 0;
 }
 
 static void clk_system_unprepare(struct clk_hw *hw)
 {
 	struct clk_system *sys = to_clk_system(hw);
-	struct at91_pmc *pmc = sys->pmc;
 
-	pmc_write(pmc, AT91_PMC_SCDR, 1 << sys->id);
+	regmap_write(sys->regmap, AT91_PMC_SCDR, 1 << sys->id);
 }
 
 static int clk_system_is_prepared(struct clk_hw *hw)
 {
 	struct clk_system *sys = to_clk_system(hw);
-	struct at91_pmc *pmc = sys->pmc;
+	unsigned int status;
+
+	regmap_read(sys->regmap, AT91_PMC_SCSR, &status);
 
-	if (!(pmc_read(pmc, AT91_PMC_SCSR) & (1 << sys->id)))
+	if (!(status & (1 << sys->id)))
 		return 0;
 
 	if (!is_pck(sys->id))
 		return 1;
 
-	return !!(pmc_read(pmc, AT91_PMC_SR) & (1 << sys->id));
+	regmap_read(sys->regmap, AT91_PMC_SR, &status);
+
+	return status & (1 << sys->id) ? 1 : 0;
 }
 
 static const struct clk_ops system_ops = {
@@ -100,13 +89,12 @@ static const struct clk_ops system_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_system(struct at91_pmc *pmc, const char *name,
-			 const char *parent_name, u8 id, int irq)
+at91_clk_register_system(struct regmap *regmap, const char *name,
+			 const char *parent_name, u8 id)
 {
 	struct clk_system *sys;
 	struct clk *clk = NULL;
 	struct clk_init_data init;
-	int ret;
 
 	if (!parent_name || id > SYSTEM_MAX_ID)
 		return ERR_PTR(-EINVAL);
@@ -123,44 +111,33 @@ at91_clk_register_system(struct at91_pmc *pmc, const char *name,
 
 	sys->id = id;
 	sys->hw.init = &init;
-	sys->pmc = pmc;
-	sys->irq = irq;
-	if (irq) {
-		init_waitqueue_head(&sys->wait);
-		irq_set_status_flags(sys->irq, IRQ_NOAUTOEN);
-		ret = request_irq(sys->irq, clk_system_irq_handler,
-				IRQF_TRIGGER_HIGH, name, sys);
-		if (ret) {
-			kfree(sys);
-			return ERR_PTR(ret);
-		}
-	}
+	sys->regmap = regmap;
 
 	clk = clk_register(NULL, &sys->hw);
-	if (IS_ERR(clk)) {
-		if (irq)
-			free_irq(sys->irq, sys);
+	if (IS_ERR(clk))
 		kfree(sys);
-	}
 
 	return clk;
 }
 
-static void __init
-of_at91_clk_sys_setup(struct device_node *np, struct at91_pmc *pmc)
+static void __init of_at91rm9200_clk_sys_setup(struct device_node *np)
 {
 	int num;
-	int irq = 0;
 	u32 id;
 	struct clk *clk;
 	const char *name;
 	struct device_node *sysclknp;
 	const char *parent_name;
+	struct regmap *regmap;
 
 	num = of_get_child_count(np);
 	if (num > (SYSTEM_MAX_ID + 1))
 		return;
 
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
+
 	for_each_child_of_node(np, sysclknp) {
 		if (of_property_read_u32(sysclknp, "reg", &id))
 			continue;
@@ -168,21 +145,14 @@ of_at91_clk_sys_setup(struct device_node *np, struct at91_pmc *pmc)
 		if (of_property_read_string(np, "clock-output-names", &name))
 			name = sysclknp->name;
 
-		if (is_pck(id))
-			irq = irq_of_parse_and_map(sysclknp, 0);
-
 		parent_name = of_clk_get_parent_name(sysclknp, 0);
 
-		clk = at91_clk_register_system(pmc, name, parent_name, id, irq);
+		clk = at91_clk_register_system(regmap, name, parent_name, id);
 		if (IS_ERR(clk))
 			continue;
 
 		of_clk_add_provider(sysclknp, of_clk_src_simple_get, clk);
 	}
 }
-
-void __init of_at91rm9200_clk_sys_setup(struct device_node *np,
-					struct at91_pmc *pmc)
-{
-	of_at91_clk_sys_setup(np, pmc);
-}
+CLK_OF_DECLARE(at91rm9200_clk_sys, "atmel,at91rm9200-clk-system",
+	       of_at91rm9200_clk_sys_setup);
diff --git a/drivers/clk/at91/clk-usb.c b/drivers/clk/at91/clk-usb.c
index 8ab8502778a2..d80bdb0a8b02 100644
--- a/drivers/clk/at91/clk-usb.c
+++ b/drivers/clk/at91/clk-usb.c
@@ -12,8 +12,8 @@
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 
@@ -27,7 +27,7 @@
 
 struct at91sam9x5_clk_usb {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
+	struct regmap *regmap;
 };
 
 #define to_at91sam9x5_clk_usb(hw) \
@@ -35,7 +35,7 @@ struct at91sam9x5_clk_usb {
 
 struct at91rm9200_clk_usb {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
+	struct regmap *regmap;
 	u32 divisors[4];
 };
 
@@ -45,13 +45,12 @@ struct at91rm9200_clk_usb {
 static unsigned long at91sam9x5_clk_usb_recalc_rate(struct clk_hw *hw,
 						    unsigned long parent_rate)
 {
-	u32 tmp;
-	u8 usbdiv;
 	struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
-	struct at91_pmc *pmc = usb->pmc;
+	unsigned int usbr;
+	u8 usbdiv;
 
-	tmp = pmc_read(pmc, AT91_PMC_USB);
-	usbdiv = (tmp & AT91_PMC_OHCIUSBDIV) >> SAM9X5_USB_DIV_SHIFT;
+	regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
+	usbdiv = (usbr & AT91_PMC_OHCIUSBDIV) >> SAM9X5_USB_DIV_SHIFT;
 
 	return DIV_ROUND_CLOSEST(parent_rate, (usbdiv + 1));
 }
@@ -109,33 +108,31 @@ static int at91sam9x5_clk_usb_determine_rate(struct clk_hw *hw,
 
 static int at91sam9x5_clk_usb_set_parent(struct clk_hw *hw, u8 index)
 {
-	u32 tmp;
 	struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
-	struct at91_pmc *pmc = usb->pmc;
 
 	if (index > 1)
 		return -EINVAL;
-	tmp = pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_USBS;
-	if (index)
-		tmp |= AT91_PMC_USBS;
-	pmc_write(pmc, AT91_PMC_USB, tmp);
+
+	regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS,
+			   index ? AT91_PMC_USBS : 0);
+
 	return 0;
 }
 
 static u8 at91sam9x5_clk_usb_get_parent(struct clk_hw *hw)
 {
 	struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
-	struct at91_pmc *pmc = usb->pmc;
+	unsigned int usbr;
 
-	return pmc_read(pmc, AT91_PMC_USB) & AT91_PMC_USBS;
+	regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
+
+	return usbr & AT91_PMC_USBS;
 }
 
 static int at91sam9x5_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate,
 				       unsigned long parent_rate)
 {
-	u32 tmp;
 	struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
-	struct at91_pmc *pmc = usb->pmc;
 	unsigned long div;
 
 	if (!rate)
@@ -145,9 +142,8 @@ static int at91sam9x5_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (div > SAM9X5_USB_MAX_DIV + 1 || !div)
 		return -EINVAL;
 
-	tmp = pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_OHCIUSBDIV;
-	tmp |= (div - 1) << SAM9X5_USB_DIV_SHIFT;
-	pmc_write(pmc, AT91_PMC_USB, tmp);
+	regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_OHCIUSBDIV,
+			   (div - 1) << SAM9X5_USB_DIV_SHIFT);
 
 	return 0;
 }
@@ -163,28 +159,28 @@ static const struct clk_ops at91sam9x5_usb_ops = {
 static int at91sam9n12_clk_usb_enable(struct clk_hw *hw)
 {
 	struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
-	struct at91_pmc *pmc = usb->pmc;
 
-	pmc_write(pmc, AT91_PMC_USB,
-		  pmc_read(pmc, AT91_PMC_USB) | AT91_PMC_USBS);
+	regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS,
+			   AT91_PMC_USBS);
+
 	return 0;
 }
 
 static void at91sam9n12_clk_usb_disable(struct clk_hw *hw)
 {
 	struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
-	struct at91_pmc *pmc = usb->pmc;
 
-	pmc_write(pmc, AT91_PMC_USB,
-		  pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_USBS);
+	regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS, 0);
 }
 
 static int at91sam9n12_clk_usb_is_enabled(struct clk_hw *hw)
 {
 	struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw);
-	struct at91_pmc *pmc = usb->pmc;
+	unsigned int usbr;
 
-	return !!(pmc_read(pmc, AT91_PMC_USB) & AT91_PMC_USBS);
+	regmap_read(usb->regmap, AT91_PMC_USB, &usbr);
+
+	return usbr & AT91_PMC_USBS;
 }
 
 static const struct clk_ops at91sam9n12_usb_ops = {
@@ -197,7 +193,7 @@ static const struct clk_ops at91sam9n12_usb_ops = {
 };
 
 static struct clk * __init
-at91sam9x5_clk_register_usb(struct at91_pmc *pmc, const char *name,
+at91sam9x5_clk_register_usb(struct regmap *regmap, const char *name,
 			    const char **parent_names, u8 num_parents)
 {
 	struct at91sam9x5_clk_usb *usb;
@@ -216,7 +212,7 @@ at91sam9x5_clk_register_usb(struct at91_pmc *pmc, const char *name,
 		     CLK_SET_RATE_PARENT;
 
 	usb->hw.init = &init;
-	usb->pmc = pmc;
+	usb->regmap = regmap;
 
 	clk = clk_register(NULL, &usb->hw);
 	if (IS_ERR(clk))
@@ -226,7 +222,7 @@ at91sam9x5_clk_register_usb(struct at91_pmc *pmc, const char *name,
 }
 
 static struct clk * __init
-at91sam9n12_clk_register_usb(struct at91_pmc *pmc, const char *name,
+at91sam9n12_clk_register_usb(struct regmap *regmap, const char *name,
 			     const char *parent_name)
 {
 	struct at91sam9x5_clk_usb *usb;
@@ -244,7 +240,7 @@ at91sam9n12_clk_register_usb(struct at91_pmc *pmc, const char *name,
 	init.flags = CLK_SET_RATE_GATE | CLK_SET_RATE_PARENT;
 
 	usb->hw.init = &init;
-	usb->pmc = pmc;
+	usb->regmap = regmap;
 
 	clk = clk_register(NULL, &usb->hw);
 	if (IS_ERR(clk))
@@ -257,12 +253,12 @@ static unsigned long at91rm9200_clk_usb_recalc_rate(struct clk_hw *hw,
 						    unsigned long parent_rate)
 {
 	struct at91rm9200_clk_usb *usb = to_at91rm9200_clk_usb(hw);
-	struct at91_pmc *pmc = usb->pmc;
-	u32 tmp;
+	unsigned int pllbr;
 	u8 usbdiv;
 
-	tmp = pmc_read(pmc, AT91_CKGR_PLLBR);
-	usbdiv = (tmp & AT91_PMC_USBDIV) >> RM9200_USB_DIV_SHIFT;
+	regmap_read(usb->regmap, AT91_CKGR_PLLBR, &pllbr);
+
+	usbdiv = (pllbr & AT91_PMC_USBDIV) >> RM9200_USB_DIV_SHIFT;
 	if (usb->divisors[usbdiv])
 		return parent_rate / usb->divisors[usbdiv];
 
@@ -310,10 +306,8 @@ static long at91rm9200_clk_usb_round_rate(struct clk_hw *hw, unsigned long rate,
 static int at91rm9200_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate,
 				       unsigned long parent_rate)
 {
-	u32 tmp;
 	int i;
 	struct at91rm9200_clk_usb *usb = to_at91rm9200_clk_usb(hw);
-	struct at91_pmc *pmc = usb->pmc;
 	unsigned long div;
 
 	if (!rate)
@@ -323,10 +317,10 @@ static int at91rm9200_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate,
 
 	for (i = 0; i < RM9200_USB_DIV_TAB_SIZE; i++) {
 		if (usb->divisors[i] == div) {
-			tmp = pmc_read(pmc, AT91_CKGR_PLLBR) &
-			      ~AT91_PMC_USBDIV;
-			tmp |= i << RM9200_USB_DIV_SHIFT;
-			pmc_write(pmc, AT91_CKGR_PLLBR, tmp);
+			regmap_update_bits(usb->regmap, AT91_CKGR_PLLBR,
+					   AT91_PMC_USBDIV,
+					   i << RM9200_USB_DIV_SHIFT);
+
 			return 0;
 		}
 	}
@@ -341,7 +335,7 @@ static const struct clk_ops at91rm9200_usb_ops = {
 };
 
 static struct clk * __init
-at91rm9200_clk_register_usb(struct at91_pmc *pmc, const char *name,
+at91rm9200_clk_register_usb(struct regmap *regmap, const char *name,
 			    const char *parent_name, const u32 *divisors)
 {
 	struct at91rm9200_clk_usb *usb;
@@ -359,7 +353,7 @@ at91rm9200_clk_register_usb(struct at91_pmc *pmc, const char *name,
 	init.flags = CLK_SET_RATE_PARENT;
 
 	usb->hw.init = &init;
-	usb->pmc = pmc;
+	usb->regmap = regmap;
 	memcpy(usb->divisors, divisors, sizeof(usb->divisors));
 
 	clk = clk_register(NULL, &usb->hw);
@@ -369,35 +363,42 @@ at91rm9200_clk_register_usb(struct at91_pmc *pmc, const char *name,
 	return clk;
 }
 
-void __init of_at91sam9x5_clk_usb_setup(struct device_node *np,
-					struct at91_pmc *pmc)
+static void __init of_at91sam9x5_clk_usb_setup(struct device_node *np)
 {
 	struct clk *clk;
-	int num_parents;
+	unsigned int num_parents;
 	const char *parent_names[USB_SOURCE_MAX];
 	const char *name = np->name;
+	struct regmap *regmap;
 
 	num_parents = of_clk_get_parent_count(np);
-	if (num_parents <= 0 || num_parents > USB_SOURCE_MAX)
+	if (num_parents == 0 || num_parents > USB_SOURCE_MAX)
 		return;
 
 	of_clk_parent_fill(np, parent_names, num_parents);
 
 	of_property_read_string(np, "clock-output-names", &name);
 
-	clk = at91sam9x5_clk_register_usb(pmc, name, parent_names, num_parents);
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
+
+	clk = at91sam9x5_clk_register_usb(regmap, name, parent_names,
+					  num_parents);
 	if (IS_ERR(clk))
 		return;
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+CLK_OF_DECLARE(at91sam9x5_clk_usb, "atmel,at91sam9x5-clk-usb",
+	       of_at91sam9x5_clk_usb_setup);
 
-void __init of_at91sam9n12_clk_usb_setup(struct device_node *np,
-					 struct at91_pmc *pmc)
+static void __init of_at91sam9n12_clk_usb_setup(struct device_node *np)
 {
 	struct clk *clk;
 	const char *parent_name;
 	const char *name = np->name;
+	struct regmap *regmap;
 
 	parent_name = of_clk_get_parent_name(np, 0);
 	if (!parent_name)
@@ -405,20 +406,26 @@ void __init of_at91sam9n12_clk_usb_setup(struct device_node *np,
 
 	of_property_read_string(np, "clock-output-names", &name);
 
-	clk = at91sam9n12_clk_register_usb(pmc, name, parent_name);
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
+
+	clk = at91sam9n12_clk_register_usb(regmap, name, parent_name);
 	if (IS_ERR(clk))
 		return;
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+CLK_OF_DECLARE(at91sam9n12_clk_usb, "atmel,at91sam9n12-clk-usb",
+	       of_at91sam9n12_clk_usb_setup);
 
-void __init of_at91rm9200_clk_usb_setup(struct device_node *np,
-					struct at91_pmc *pmc)
+static void __init of_at91rm9200_clk_usb_setup(struct device_node *np)
 {
 	struct clk *clk;
 	const char *parent_name;
 	const char *name = np->name;
 	u32 divisors[4] = {0, 0, 0, 0};
+	struct regmap *regmap;
 
 	parent_name = of_clk_get_parent_name(np, 0);
 	if (!parent_name)
@@ -430,9 +437,15 @@ void __init of_at91rm9200_clk_usb_setup(struct device_node *np,
 
 	of_property_read_string(np, "clock-output-names", &name);
 
-	clk = at91rm9200_clk_register_usb(pmc, name, parent_name, divisors);
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
+		return;
+
+	clk = at91rm9200_clk_register_usb(regmap, name, parent_name, divisors);
 	if (IS_ERR(clk))
 		return;
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
+CLK_OF_DECLARE(at91rm9200_clk_usb, "atmel,at91rm9200-clk-usb",
+	       of_at91rm9200_clk_usb_setup);
diff --git a/drivers/clk/at91/clk-utmi.c b/drivers/clk/at91/clk-utmi.c
index ca561e90a60f..61fcf399e58c 100644
--- a/drivers/clk/at91/clk-utmi.c
+++ b/drivers/clk/at91/clk-utmi.c
@@ -11,14 +11,9 @@
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/io.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include "pmc.h"
 
@@ -26,37 +21,30 @@
 
 struct clk_utmi {
 	struct clk_hw hw;
-	struct at91_pmc *pmc;
-	unsigned int irq;
-	wait_queue_head_t wait;
+	struct regmap *regmap;
 };
 
 #define to_clk_utmi(hw) container_of(hw, struct clk_utmi, hw)
 
-static irqreturn_t clk_utmi_irq_handler(int irq, void *dev_id)
+static inline bool clk_utmi_ready(struct regmap *regmap)
 {
-	struct clk_utmi *utmi = (struct clk_utmi *)dev_id;
+	unsigned int status;
 
-	wake_up(&utmi->wait);
-	disable_irq_nosync(utmi->irq);
+	regmap_read(regmap, AT91_PMC_SR, &status);
 
-	return IRQ_HANDLED;
+	return status & AT91_PMC_LOCKU;
 }
 
 static int clk_utmi_prepare(struct clk_hw *hw)
 {
 	struct clk_utmi *utmi = to_clk_utmi(hw);
-	struct at91_pmc *pmc = utmi->pmc;
-	u32 tmp = pmc_read(pmc, AT91_CKGR_UCKR) | AT91_PMC_UPLLEN |
-		  AT91_PMC_UPLLCOUNT | AT91_PMC_BIASEN;
+	unsigned int uckr = AT91_PMC_UPLLEN | AT91_PMC_UPLLCOUNT |
+			    AT91_PMC_BIASEN;
 
-	pmc_write(pmc, AT91_CKGR_UCKR, tmp);
+	regmap_update_bits(utmi->regmap, AT91_CKGR_UCKR, uckr, uckr);
 
-	while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU)) {
-		enable_irq(utmi->irq);
-		wait_event(utmi->wait,
-			   pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU);
-	}
+	while (!clk_utmi_ready(utmi->regmap))
+		cpu_relax();
 
 	return 0;
 }
@@ -64,18 +52,15 @@ static int clk_utmi_prepare(struct clk_hw *hw)
 static int clk_utmi_is_prepared(struct clk_hw *hw)
 {
 	struct clk_utmi *utmi = to_clk_utmi(hw);
-	struct at91_pmc *pmc = utmi->pmc;
 
-	return !!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU);
+	return clk_utmi_ready(utmi->regmap);
 }
 
 static void clk_utmi_unprepare(struct clk_hw *hw)
 {
 	struct clk_utmi *utmi = to_clk_utmi(hw);
-	struct at91_pmc *pmc = utmi->pmc;
-	u32 tmp = pmc_read(pmc, AT91_CKGR_UCKR) & ~AT91_PMC_UPLLEN;
 
-	pmc_write(pmc, AT91_CKGR_UCKR, tmp);
+	regmap_update_bits(utmi->regmap, AT91_CKGR_UCKR, AT91_PMC_UPLLEN, 0);
 }
 
 static unsigned long clk_utmi_recalc_rate(struct clk_hw *hw,
@@ -93,10 +78,9 @@ static const struct clk_ops utmi_ops = {
 };
 
 static struct clk * __init
-at91_clk_register_utmi(struct at91_pmc *pmc, unsigned int irq,
+at91_clk_register_utmi(struct regmap *regmap,
 		       const char *name, const char *parent_name)
 {
-	int ret;
 	struct clk_utmi *utmi;
 	struct clk *clk = NULL;
 	struct clk_init_data init;
@@ -112,52 +96,36 @@ at91_clk_register_utmi(struct at91_pmc *pmc, unsigned int irq,
 	init.flags = CLK_SET_RATE_GATE;
 
 	utmi->hw.init = &init;
-	utmi->pmc = pmc;
-	utmi->irq = irq;
-	init_waitqueue_head(&utmi->wait);
-	irq_set_status_flags(utmi->irq, IRQ_NOAUTOEN);
-	ret = request_irq(utmi->irq, clk_utmi_irq_handler,
-			  IRQF_TRIGGER_HIGH, "clk-utmi", utmi);
-	if (ret) {
-		kfree(utmi);
-		return ERR_PTR(ret);
-	}
+	utmi->regmap = regmap;
 
 	clk = clk_register(NULL, &utmi->hw);
-	if (IS_ERR(clk)) {
-		free_irq(utmi->irq, utmi);
+	if (IS_ERR(clk))
 		kfree(utmi);
-	}
 
 	return clk;
 }
 
-static void __init
-of_at91_clk_utmi_setup(struct device_node *np, struct at91_pmc *pmc)
+static void __init of_at91sam9x5_clk_utmi_setup(struct device_node *np)
 {
-	unsigned int irq;
 	struct clk *clk;
 	const char *parent_name;
 	const char *name = np->name;
+	struct regmap *regmap;
 
 	parent_name = of_clk_get_parent_name(np, 0);
 
 	of_property_read_string(np, "clock-output-names", &name);
 
-	irq = irq_of_parse_and_map(np, 0);
-	if (!irq)
+	regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(regmap))
 		return;
 
-	clk = at91_clk_register_utmi(pmc, irq, name, parent_name);
+	clk = at91_clk_register_utmi(regmap, name, parent_name);
 	if (IS_ERR(clk))
 		return;
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 	return;
 }
-
-void __init of_at91sam9x5_clk_utmi_setup(struct device_node *np,
-					 struct at91_pmc *pmc)
-{
-	of_at91_clk_utmi_setup(np, pmc);
-}
+CLK_OF_DECLARE(at91sam9x5_clk_utmi, "atmel,at91sam9x5-clk-utmi",
+	       of_at91sam9x5_clk_utmi_setup);
diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c
index 8476b570779b..526df5ba042d 100644
--- a/drivers/clk/at91/pmc.c
+++ b/drivers/clk/at91/pmc.c
@@ -12,36 +12,13 @@
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/io.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/irqchip/chained_irq.h>
-#include <linux/irqdomain.h>
-#include <linux/of_irq.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include <asm/proc-fns.h>
 
 #include "pmc.h"
 
-void __iomem *at91_pmc_base;
-EXPORT_SYMBOL_GPL(at91_pmc_base);
-
-void at91rm9200_idle(void)
-{
-	/*
-	 * Disable the processor clock.  The processor will be automatically
-	 * re-enabled by an interrupt or by a reset.
-	 */
-	at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
-}
-
-void at91sam9_idle(void)
-{
-	at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK);
-	cpu_do_idle();
-}
-
 int of_at91_get_clk_range(struct device_node *np, const char *propname,
 			  struct clk_range *range)
 {
@@ -64,402 +41,3 @@ int of_at91_get_clk_range(struct device_node *np, const char *propname,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(of_at91_get_clk_range);
-
-static void pmc_irq_mask(struct irq_data *d)
-{
-	struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
-
-	pmc_write(pmc, AT91_PMC_IDR, 1 << d->hwirq);
-}
-
-static void pmc_irq_unmask(struct irq_data *d)
-{
-	struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
-
-	pmc_write(pmc, AT91_PMC_IER, 1 << d->hwirq);
-}
-
-static int pmc_irq_set_type(struct irq_data *d, unsigned type)
-{
-	if (type != IRQ_TYPE_LEVEL_HIGH) {
-		pr_warn("PMC: type not supported (support only IRQ_TYPE_LEVEL_HIGH type)\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static void pmc_irq_suspend(struct irq_data *d)
-{
-	struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
-
-	pmc->imr = pmc_read(pmc, AT91_PMC_IMR);
-	pmc_write(pmc, AT91_PMC_IDR, pmc->imr);
-}
-
-static void pmc_irq_resume(struct irq_data *d)
-{
-	struct at91_pmc *pmc = irq_data_get_irq_chip_data(d);
-
-	pmc_write(pmc, AT91_PMC_IER, pmc->imr);
-}
-
-static struct irq_chip pmc_irq = {
-	.name = "PMC",
-	.irq_disable = pmc_irq_mask,
-	.irq_mask = pmc_irq_mask,
-	.irq_unmask = pmc_irq_unmask,
-	.irq_set_type = pmc_irq_set_type,
-	.irq_suspend = pmc_irq_suspend,
-	.irq_resume = pmc_irq_resume,
-};
-
-static struct lock_class_key pmc_lock_class;
-
-static int pmc_irq_map(struct irq_domain *h, unsigned int virq,
-		       irq_hw_number_t hw)
-{
-	struct at91_pmc	*pmc = h->host_data;
-
-	irq_set_lockdep_class(virq, &pmc_lock_class);
-
-	irq_set_chip_and_handler(virq, &pmc_irq,
-				 handle_level_irq);
-	irq_set_chip_data(virq, pmc);
-
-	return 0;
-}
-
-static int pmc_irq_domain_xlate(struct irq_domain *d,
-				struct device_node *ctrlr,
-				const u32 *intspec, unsigned int intsize,
-				irq_hw_number_t *out_hwirq,
-				unsigned int *out_type)
-{
-	struct at91_pmc *pmc = d->host_data;
-	const struct at91_pmc_caps *caps = pmc->caps;
-
-	if (WARN_ON(intsize < 1))
-		return -EINVAL;
-
-	*out_hwirq = intspec[0];
-
-	if (!(caps->available_irqs & (1 << *out_hwirq)))
-		return -EINVAL;
-
-	*out_type = IRQ_TYPE_LEVEL_HIGH;
-
-	return 0;
-}
-
-static const struct irq_domain_ops pmc_irq_ops = {
-	.map	= pmc_irq_map,
-	.xlate	= pmc_irq_domain_xlate,
-};
-
-static irqreturn_t pmc_irq_handler(int irq, void *data)
-{
-	struct at91_pmc *pmc = (struct at91_pmc *)data;
-	unsigned long sr;
-	int n;
-
-	sr = pmc_read(pmc, AT91_PMC_SR) & pmc_read(pmc, AT91_PMC_IMR);
-	if (!sr)
-		return IRQ_NONE;
-
-	for_each_set_bit(n, &sr, BITS_PER_LONG)
-		generic_handle_irq(irq_find_mapping(pmc->irqdomain, n));
-
-	return IRQ_HANDLED;
-}
-
-static const struct at91_pmc_caps at91rm9200_caps = {
-	.available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
-			  AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
-			  AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
-			  AT91_PMC_PCK3RDY,
-};
-
-static const struct at91_pmc_caps at91sam9260_caps = {
-	.available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
-			  AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
-			  AT91_PMC_PCK1RDY,
-};
-
-static const struct at91_pmc_caps at91sam9g45_caps = {
-	.available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
-			  AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
-			  AT91_PMC_PCK1RDY,
-};
-
-static const struct at91_pmc_caps at91sam9n12_caps = {
-	.available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB |
-			  AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY |
-			  AT91_PMC_PCK1RDY | AT91_PMC_MOSCSELS |
-			  AT91_PMC_MOSCRCS | AT91_PMC_CFDEV,
-};
-
-static const struct at91_pmc_caps at91sam9x5_caps = {
-	.available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
-			  AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
-			  AT91_PMC_PCK1RDY | AT91_PMC_MOSCSELS |
-			  AT91_PMC_MOSCRCS | AT91_PMC_CFDEV,
-};
-
-static const struct at91_pmc_caps sama5d2_caps = {
-	.available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
-			  AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
-			  AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
-			  AT91_PMC_MOSCSELS | AT91_PMC_MOSCRCS |
-			  AT91_PMC_CFDEV | AT91_PMC_GCKRDY,
-};
-
-static const struct at91_pmc_caps sama5d3_caps = {
-	.available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY |
-			  AT91_PMC_LOCKU | AT91_PMC_PCK0RDY |
-			  AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY |
-			  AT91_PMC_MOSCSELS | AT91_PMC_MOSCRCS |
-			  AT91_PMC_CFDEV,
-};
-
-static struct at91_pmc *__init at91_pmc_init(struct device_node *np,
-					     void __iomem *regbase, int virq,
-					     const struct at91_pmc_caps *caps)
-{
-	struct at91_pmc *pmc;
-
-	if (!regbase || !virq ||  !caps)
-		return NULL;
-
-	at91_pmc_base = regbase;
-
-	pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
-	if (!pmc)
-		return NULL;
-
-	spin_lock_init(&pmc->lock);
-	pmc->regbase = regbase;
-	pmc->virq = virq;
-	pmc->caps = caps;
-
-	pmc->irqdomain = irq_domain_add_linear(np, 32, &pmc_irq_ops, pmc);
-
-	if (!pmc->irqdomain)
-		goto out_free_pmc;
-
-	pmc_write(pmc, AT91_PMC_IDR, 0xffffffff);
-	if (request_irq(pmc->virq, pmc_irq_handler,
-			IRQF_SHARED | IRQF_COND_SUSPEND, "pmc", pmc))
-		goto out_remove_irqdomain;
-
-	return pmc;
-
-out_remove_irqdomain:
-	irq_domain_remove(pmc->irqdomain);
-out_free_pmc:
-	kfree(pmc);
-
-	return NULL;
-}
-
-static const struct of_device_id pmc_clk_ids[] __initconst = {
-	/* Slow oscillator */
-	{
-		.compatible = "atmel,at91sam9260-clk-slow",
-		.data = of_at91sam9260_clk_slow_setup,
-	},
-	/* Main clock */
-	{
-		.compatible = "atmel,at91rm9200-clk-main-osc",
-		.data = of_at91rm9200_clk_main_osc_setup,
-	},
-	{
-		.compatible = "atmel,at91sam9x5-clk-main-rc-osc",
-		.data = of_at91sam9x5_clk_main_rc_osc_setup,
-	},
-	{
-		.compatible = "atmel,at91rm9200-clk-main",
-		.data = of_at91rm9200_clk_main_setup,
-	},
-	{
-		.compatible = "atmel,at91sam9x5-clk-main",
-		.data = of_at91sam9x5_clk_main_setup,
-	},
-	/* PLL clocks */
-	{
-		.compatible = "atmel,at91rm9200-clk-pll",
-		.data = of_at91rm9200_clk_pll_setup,
-	},
-	{
-		.compatible = "atmel,at91sam9g45-clk-pll",
-		.data = of_at91sam9g45_clk_pll_setup,
-	},
-	{
-		.compatible = "atmel,at91sam9g20-clk-pllb",
-		.data = of_at91sam9g20_clk_pllb_setup,
-	},
-	{
-		.compatible = "atmel,sama5d3-clk-pll",
-		.data = of_sama5d3_clk_pll_setup,
-	},
-	{
-		.compatible = "atmel,at91sam9x5-clk-plldiv",
-		.data = of_at91sam9x5_clk_plldiv_setup,
-	},
-	/* Master clock */
-	{
-		.compatible = "atmel,at91rm9200-clk-master",
-		.data = of_at91rm9200_clk_master_setup,
-	},
-	{
-		.compatible = "atmel,at91sam9x5-clk-master",
-		.data = of_at91sam9x5_clk_master_setup,
-	},
-	/* System clocks */
-	{
-		.compatible = "atmel,at91rm9200-clk-system",
-		.data = of_at91rm9200_clk_sys_setup,
-	},
-	/* Peripheral clocks */
-	{
-		.compatible = "atmel,at91rm9200-clk-peripheral",
-		.data = of_at91rm9200_clk_periph_setup,
-	},
-	{
-		.compatible = "atmel,at91sam9x5-clk-peripheral",
-		.data = of_at91sam9x5_clk_periph_setup,
-	},
-	/* Programmable clocks */
-	{
-		.compatible = "atmel,at91rm9200-clk-programmable",
-		.data = of_at91rm9200_clk_prog_setup,
-	},
-	{
-		.compatible = "atmel,at91sam9g45-clk-programmable",
-		.data = of_at91sam9g45_clk_prog_setup,
-	},
-	{
-		.compatible = "atmel,at91sam9x5-clk-programmable",
-		.data = of_at91sam9x5_clk_prog_setup,
-	},
-	/* UTMI clock */
-#if defined(CONFIG_HAVE_AT91_UTMI)
-	{
-		.compatible = "atmel,at91sam9x5-clk-utmi",
-		.data = of_at91sam9x5_clk_utmi_setup,
-	},
-#endif
-	/* USB clock */
-#if defined(CONFIG_HAVE_AT91_USB_CLK)
-	{
-		.compatible = "atmel,at91rm9200-clk-usb",
-		.data = of_at91rm9200_clk_usb_setup,
-	},
-	{
-		.compatible = "atmel,at91sam9x5-clk-usb",
-		.data = of_at91sam9x5_clk_usb_setup,
-	},
-	{
-		.compatible = "atmel,at91sam9n12-clk-usb",
-		.data = of_at91sam9n12_clk_usb_setup,
-	},
-#endif
-	/* SMD clock */
-#if defined(CONFIG_HAVE_AT91_SMD)
-	{
-		.compatible = "atmel,at91sam9x5-clk-smd",
-		.data = of_at91sam9x5_clk_smd_setup,
-	},
-#endif
-#if defined(CONFIG_HAVE_AT91_H32MX)
-	{
-		.compatible = "atmel,sama5d4-clk-h32mx",
-		.data = of_sama5d4_clk_h32mx_setup,
-	},
-#endif
-#if defined(CONFIG_HAVE_AT91_GENERATED_CLK)
-	{
-		.compatible = "atmel,sama5d2-clk-generated",
-		.data = of_sama5d2_clk_generated_setup,
-	},
-#endif
-	{ /*sentinel*/ }
-};
-
-static void __init of_at91_pmc_setup(struct device_node *np,
-				     const struct at91_pmc_caps *caps)
-{
-	struct at91_pmc *pmc;
-	struct device_node *childnp;
-	void (*clk_setup)(struct device_node *, struct at91_pmc *);
-	const struct of_device_id *clk_id;
-	void __iomem *regbase = of_iomap(np, 0);
-	int virq;
-
-	if (!regbase)
-		return;
-
-	virq = irq_of_parse_and_map(np, 0);
-	if (!virq)
-		return;
-
-	pmc = at91_pmc_init(np, regbase, virq, caps);
-	if (!pmc)
-		return;
-	for_each_child_of_node(np, childnp) {
-		clk_id = of_match_node(pmc_clk_ids, childnp);
-		if (!clk_id)
-			continue;
-		clk_setup = clk_id->data;
-		clk_setup(childnp, pmc);
-	}
-}
-
-static void __init of_at91rm9200_pmc_setup(struct device_node *np)
-{
-	of_at91_pmc_setup(np, &at91rm9200_caps);
-}
-CLK_OF_DECLARE(at91rm9200_clk_pmc, "atmel,at91rm9200-pmc",
-	       of_at91rm9200_pmc_setup);
-
-static void __init of_at91sam9260_pmc_setup(struct device_node *np)
-{
-	of_at91_pmc_setup(np, &at91sam9260_caps);
-}
-CLK_OF_DECLARE(at91sam9260_clk_pmc, "atmel,at91sam9260-pmc",
-	       of_at91sam9260_pmc_setup);
-
-static void __init of_at91sam9g45_pmc_setup(struct device_node *np)
-{
-	of_at91_pmc_setup(np, &at91sam9g45_caps);
-}
-CLK_OF_DECLARE(at91sam9g45_clk_pmc, "atmel,at91sam9g45-pmc",
-	       of_at91sam9g45_pmc_setup);
-
-static void __init of_at91sam9n12_pmc_setup(struct device_node *np)
-{
-	of_at91_pmc_setup(np, &at91sam9n12_caps);
-}
-CLK_OF_DECLARE(at91sam9n12_clk_pmc, "atmel,at91sam9n12-pmc",
-	       of_at91sam9n12_pmc_setup);
-
-static void __init of_at91sam9x5_pmc_setup(struct device_node *np)
-{
-	of_at91_pmc_setup(np, &at91sam9x5_caps);
-}
-CLK_OF_DECLARE(at91sam9x5_clk_pmc, "atmel,at91sam9x5-pmc",
-	       of_at91sam9x5_pmc_setup);
-
-static void __init of_sama5d2_pmc_setup(struct device_node *np)
-{
-	of_at91_pmc_setup(np, &sama5d2_caps);
-}
-CLK_OF_DECLARE(sama5d2_clk_pmc, "atmel,sama5d2-pmc",
-	       of_sama5d2_pmc_setup);
-
-static void __init of_sama5d3_pmc_setup(struct device_node *np)
-{
-	of_at91_pmc_setup(np, &sama5d3_caps);
-}
-CLK_OF_DECLARE(sama5d3_clk_pmc, "atmel,sama5d3-pmc",
-	       of_sama5d3_pmc_setup);
diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h
index f65739272779..5771fff0ee3f 100644
--- a/drivers/clk/at91/pmc.h
+++ b/drivers/clk/at91/pmc.h
@@ -14,8 +14,11 @@
 
 #include <linux/io.h>
 #include <linux/irqdomain.h>
+#include <linux/regmap.h>
 #include <linux/spinlock.h>
 
+extern spinlock_t pmc_pcr_lock;
+
 struct clk_range {
 	unsigned long min;
 	unsigned long max;
@@ -23,102 +26,7 @@ struct clk_range {
 
 #define CLK_RANGE(MIN, MAX) {.min = MIN, .max = MAX,}
 
-struct at91_pmc_caps {
-	u32 available_irqs;
-};
-
-struct at91_pmc {
-	void __iomem *regbase;
-	int virq;
-	spinlock_t lock;
-	const struct at91_pmc_caps *caps;
-	struct irq_domain *irqdomain;
-	u32 imr;
-};
-
-static inline void pmc_lock(struct at91_pmc *pmc)
-{
-	spin_lock(&pmc->lock);
-}
-
-static inline void pmc_unlock(struct at91_pmc *pmc)
-{
-	spin_unlock(&pmc->lock);
-}
-
-static inline u32 pmc_read(struct at91_pmc *pmc, int offset)
-{
-	return readl(pmc->regbase + offset);
-}
-
-static inline void pmc_write(struct at91_pmc *pmc, int offset, u32 value)
-{
-	writel(value, pmc->regbase + offset);
-}
-
 int of_at91_get_clk_range(struct device_node *np, const char *propname,
 			  struct clk_range *range);
 
-void of_at91sam9260_clk_slow_setup(struct device_node *np,
-				   struct at91_pmc *pmc);
-
-void of_at91rm9200_clk_main_osc_setup(struct device_node *np,
-				      struct at91_pmc *pmc);
-void of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np,
-					 struct at91_pmc *pmc);
-void of_at91rm9200_clk_main_setup(struct device_node *np,
-				  struct at91_pmc *pmc);
-void of_at91sam9x5_clk_main_setup(struct device_node *np,
-				  struct at91_pmc *pmc);
-
-void of_at91rm9200_clk_pll_setup(struct device_node *np,
-				 struct at91_pmc *pmc);
-void of_at91sam9g45_clk_pll_setup(struct device_node *np,
-				  struct at91_pmc *pmc);
-void of_at91sam9g20_clk_pllb_setup(struct device_node *np,
-				   struct at91_pmc *pmc);
-void of_sama5d3_clk_pll_setup(struct device_node *np,
-			      struct at91_pmc *pmc);
-void of_at91sam9x5_clk_plldiv_setup(struct device_node *np,
-				    struct at91_pmc *pmc);
-
-void of_at91rm9200_clk_master_setup(struct device_node *np,
-				    struct at91_pmc *pmc);
-void of_at91sam9x5_clk_master_setup(struct device_node *np,
-				    struct at91_pmc *pmc);
-
-void of_at91rm9200_clk_sys_setup(struct device_node *np,
-				 struct at91_pmc *pmc);
-
-void of_at91rm9200_clk_periph_setup(struct device_node *np,
-				    struct at91_pmc *pmc);
-void of_at91sam9x5_clk_periph_setup(struct device_node *np,
-				    struct at91_pmc *pmc);
-
-void of_at91rm9200_clk_prog_setup(struct device_node *np,
-				  struct at91_pmc *pmc);
-void of_at91sam9g45_clk_prog_setup(struct device_node *np,
-				   struct at91_pmc *pmc);
-void of_at91sam9x5_clk_prog_setup(struct device_node *np,
-				  struct at91_pmc *pmc);
-
-void of_at91sam9x5_clk_utmi_setup(struct device_node *np,
-				  struct at91_pmc *pmc);
-
-void of_at91rm9200_clk_usb_setup(struct device_node *np,
-				 struct at91_pmc *pmc);
-void of_at91sam9x5_clk_usb_setup(struct device_node *np,
-				 struct at91_pmc *pmc);
-void of_at91sam9n12_clk_usb_setup(struct device_node *np,
-				  struct at91_pmc *pmc);
-
-void of_at91sam9x5_clk_smd_setup(struct device_node *np,
-				 struct at91_pmc *pmc);
-
-void of_sama5d4_clk_h32mx_setup(struct device_node *np,
-				struct at91_pmc *pmc);
-
-void of_sama5d2_clk_generated_setup(struct device_node *np,
-				    struct at91_pmc *pmc);
-
 #endif /* __PMC_H_ */
diff --git a/drivers/clk/bcm/clk-bcm2835-aux.c b/drivers/clk/bcm/clk-bcm2835-aux.c
index e4f89e28b5ec..3a177ade6e6c 100644
--- a/drivers/clk/bcm/clk-bcm2835-aux.c
+++ b/drivers/clk/bcm/clk-bcm2835-aux.c
@@ -38,8 +38,8 @@ static int bcm2835_aux_clk_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	reg = devm_ioremap_resource(dev, res);
-	if (!reg)
-		return -ENODEV;
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
 
 	onecell = devm_kmalloc(dev, sizeof(*onecell), GFP_KERNEL);
 	if (!onecell)
diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index 015e687ffabe..c74ed3fd496d 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -88,10 +88,23 @@
 #define CM_HSMDIV		0x08c
 #define CM_OTPCTL		0x090
 #define CM_OTPDIV		0x094
+#define CM_PCMCTL		0x098
+#define CM_PCMDIV		0x09c
 #define CM_PWMCTL		0x0a0
 #define CM_PWMDIV		0x0a4
+#define CM_SLIMCTL		0x0a8
+#define CM_SLIMDIV		0x0ac
 #define CM_SMICTL		0x0b0
 #define CM_SMIDIV		0x0b4
+/* no definition for 0x0b8  and 0x0bc */
+#define CM_TCNTCTL		0x0c0
+#define CM_TCNTDIV		0x0c4
+#define CM_TECCTL		0x0c8
+#define CM_TECDIV		0x0cc
+#define CM_TD0CTL		0x0d0
+#define CM_TD0DIV		0x0d4
+#define CM_TD1CTL		0x0d8
+#define CM_TD1DIV		0x0dc
 #define CM_TSENSCTL		0x0e0
 #define CM_TSENSDIV		0x0e4
 #define CM_TIMERCTL		0x0e8
@@ -311,21 +324,18 @@ void __init bcm2835_init_clocks(void)
 	struct clk *clk;
 	int ret;
 
-	clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, CLK_IS_ROOT,
-					126000000);
+	clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, 0, 126000000);
 	if (IS_ERR(clk))
 		pr_err("apb_pclk not registered\n");
 
-	clk = clk_register_fixed_rate(NULL, "uart0_pclk", NULL, CLK_IS_ROOT,
-					3000000);
+	clk = clk_register_fixed_rate(NULL, "uart0_pclk", NULL, 0, 3000000);
 	if (IS_ERR(clk))
 		pr_err("uart0_pclk not registered\n");
 	ret = clk_register_clkdev(clk, NULL, "20201000.uart");
 	if (ret)
 		pr_err("uart0_pclk alias not registered\n");
 
-	clk = clk_register_fixed_rate(NULL, "uart1_pclk", NULL, CLK_IS_ROOT,
-					125000000);
+	clk = clk_register_fixed_rate(NULL, "uart1_pclk", NULL, 0, 125000000);
 	if (IS_ERR(clk))
 		pr_err("uart1_pclk not registered\n");
 	ret = clk_register_clkdev(clk, NULL, "20215000.uart");
@@ -1060,16 +1070,7 @@ static long bcm2835_pll_divider_round_rate(struct clk_hw *hw,
 static unsigned long bcm2835_pll_divider_get_rate(struct clk_hw *hw,
 						  unsigned long parent_rate)
 {
-	struct bcm2835_pll_divider *divider = bcm2835_pll_divider_from_hw(hw);
-	struct bcm2835_cprman *cprman = divider->cprman;
-	const struct bcm2835_pll_divider_data *data = divider->data;
-	u32 div = cprman_read(cprman, data->a2w_reg);
-
-	div &= (1 << A2W_PLL_DIV_BITS) - 1;
-	if (div == 0)
-		div = 256;
-
-	return parent_rate / div;
+	return clk_divider_ops.recalc_rate(hw, parent_rate);
 }
 
 static void bcm2835_pll_divider_off(struct clk_hw *hw)
@@ -1107,13 +1108,15 @@ static int bcm2835_pll_divider_set_rate(struct clk_hw *hw,
 	struct bcm2835_pll_divider *divider = bcm2835_pll_divider_from_hw(hw);
 	struct bcm2835_cprman *cprman = divider->cprman;
 	const struct bcm2835_pll_divider_data *data = divider->data;
-	u32 cm;
-	int ret;
+	u32 cm, div, max_div = 1 << A2W_PLL_DIV_BITS;
 
-	ret = clk_divider_ops.set_rate(hw, rate, parent_rate);
-	if (ret)
-		return ret;
+	div = DIV_ROUND_UP_ULL(parent_rate, rate);
+
+	div = min(div, max_div);
+	if (div == max_div)
+		div = 0;
 
+	cprman_write(cprman, data->a2w_reg, div);
 	cm = cprman_read(cprman, data->cm_reg);
 	cprman_write(cprman, data->cm_reg, cm | data->load_mask);
 	cprman_write(cprman, data->cm_reg, cm & ~data->load_mask);
@@ -1428,7 +1431,7 @@ bcm2835_register_pll_divider(struct bcm2835_cprman *cprman,
 	divider->div.reg = cprman->regs + data->a2w_reg;
 	divider->div.shift = A2W_PLL_DIV_SHIFT;
 	divider->div.width = A2W_PLL_DIV_BITS;
-	divider->div.flags = 0;
+	divider->div.flags = CLK_DIVIDER_MAX_AT_ZERO;
 	divider->div.lock = &cprman->regs_lock;
 	divider->div.hw.init = &init;
 	divider->div.table = NULL;
diff --git a/drivers/clk/bcm/clk-cygnus.c b/drivers/clk/bcm/clk-cygnus.c
index 3a228b6d4fee..464fdc4bc66b 100644
--- a/drivers/clk/bcm/clk-cygnus.c
+++ b/drivers/clk/bcm/clk-cygnus.c
@@ -268,3 +268,62 @@ static void __init cygnus_asiu_init(struct device_node *node)
 	iproc_asiu_setup(node, asiu_div, asiu_gate, ARRAY_SIZE(asiu_div));
 }
 CLK_OF_DECLARE(cygnus_asiu_clk, "brcm,cygnus-asiu-clk", cygnus_asiu_init);
+
+/*
+ * AUDIO PLL VCO frequency parameter table
+ *
+ * PLL output frequency = ((ndiv_int + ndiv_frac / 2^20) *
+ * (parent clock rate / pdiv)
+ *
+ * On Cygnus, parent is the 25MHz oscillator
+ */
+static const struct iproc_pll_vco_param audiopll_vco_params[] = {
+	/* rate (Hz) ndiv_int ndiv_frac pdiv */
+	{ 1354750204UL,  54,     199238,   1 },
+	{ 1769470191UL,  70,     816639,   1 },
+};
+
+static const struct iproc_pll_ctrl audiopll = {
+	.flags = IPROC_CLK_PLL_NEEDS_SW_CFG | IPROC_CLK_PLL_HAS_NDIV_FRAC |
+		IPROC_CLK_PLL_USER_MODE_ON | IPROC_CLK_PLL_RESET_ACTIVE_LOW,
+	.reset = RESET_VAL(0x5c, 0, 1),
+	.dig_filter = DF_VAL(0x48, 0, 3, 6, 4, 3, 3),
+	.sw_ctrl = SW_CTRL_VAL(0x4, 0),
+	.ndiv_int = REG_VAL(0x8, 0, 10),
+	.ndiv_frac = REG_VAL(0x8, 10, 20),
+	.pdiv = REG_VAL(0x44, 0, 4),
+	.vco_ctrl = VCO_CTRL_VAL(0x0c, 0x10),
+	.status = REG_VAL(0x54, 0, 1),
+	.macro_mode = REG_VAL(0x0, 0, 3),
+};
+
+static const struct iproc_clk_ctrl audiopll_clk[] = {
+	[BCM_CYGNUS_AUDIOPLL_CH0] = {
+		.channel = BCM_CYGNUS_AUDIOPLL_CH0,
+		.flags = IPROC_CLK_AON |
+				IPROC_CLK_MCLK_DIV_BY_2,
+		.enable = ENABLE_VAL(0x14, 8, 10, 9),
+		.mdiv = REG_VAL(0x14, 0, 8),
+	},
+	[BCM_CYGNUS_AUDIOPLL_CH1] = {
+		.channel = BCM_CYGNUS_AUDIOPLL_CH1,
+		.flags = IPROC_CLK_AON,
+		.enable = ENABLE_VAL(0x18, 8, 10, 9),
+		.mdiv = REG_VAL(0x18, 0, 8),
+	},
+	[BCM_CYGNUS_AUDIOPLL_CH2] = {
+		.channel = BCM_CYGNUS_AUDIOPLL_CH2,
+		.flags = IPROC_CLK_AON,
+		.enable = ENABLE_VAL(0x1c, 8, 10, 9),
+		.mdiv = REG_VAL(0x1c, 0, 8),
+	},
+};
+
+static void __init cygnus_audiopll_clk_init(struct device_node *node)
+{
+	iproc_pll_clk_setup(node, &audiopll, audiopll_vco_params,
+			    ARRAY_SIZE(audiopll_vco_params), audiopll_clk,
+			    ARRAY_SIZE(audiopll_clk));
+}
+CLK_OF_DECLARE(cygnus_audiopll, "brcm,cygnus-audiopll",
+			cygnus_audiopll_clk_init);
diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c
index afd5891ac9e6..fd492a5dad12 100644
--- a/drivers/clk/bcm/clk-iproc-pll.c
+++ b/drivers/clk/bcm/clk-iproc-pll.c
@@ -25,6 +25,12 @@
 #define PLL_VCO_HIGH_SHIFT 19
 #define PLL_VCO_LOW_SHIFT  30
 
+/*
+ * PLL MACRO_SELECT modes 0 to 5 choose pre-calculated PLL output frequencies
+ * from a look-up table. Mode 7 allows user to manipulate PLL clock dividers
+ */
+#define PLL_USER_MODE 7
+
 /* number of delay loops waiting for PLL to lock */
 #define LOCK_DELAY 100
 
@@ -215,7 +221,10 @@ static void __pll_put_in_reset(struct iproc_pll *pll)
 	const struct iproc_pll_reset_ctrl *reset = &ctrl->reset;
 
 	val = readl(pll->control_base + reset->offset);
-	val &= ~(1 << reset->reset_shift | 1 << reset->p_reset_shift);
+	if (ctrl->flags & IPROC_CLK_PLL_RESET_ACTIVE_LOW)
+		val |= BIT(reset->reset_shift) | BIT(reset->p_reset_shift);
+	else
+		val &= ~(BIT(reset->reset_shift) | BIT(reset->p_reset_shift));
 	iproc_pll_write(pll, pll->control_base, reset->offset, val);
 }
 
@@ -236,7 +245,10 @@ static void __pll_bring_out_reset(struct iproc_pll *pll, unsigned int kp,
 	iproc_pll_write(pll, pll->control_base, dig_filter->offset, val);
 
 	val = readl(pll->control_base + reset->offset);
-	val |= 1 << reset->reset_shift | 1 << reset->p_reset_shift;
+	if (ctrl->flags & IPROC_CLK_PLL_RESET_ACTIVE_LOW)
+		val &= ~(BIT(reset->reset_shift) | BIT(reset->p_reset_shift));
+	else
+		val |= BIT(reset->reset_shift) | BIT(reset->p_reset_shift);
 	iproc_pll_write(pll, pll->control_base, reset->offset, val);
 }
 
@@ -292,6 +304,16 @@ static int pll_set_rate(struct iproc_clk *clk, unsigned int rate_index,
 	/* put PLL in reset */
 	__pll_put_in_reset(pll);
 
+	/* set PLL in user mode before modifying PLL controls */
+	if (ctrl->flags & IPROC_CLK_PLL_USER_MODE_ON) {
+		val = readl(pll->control_base + ctrl->macro_mode.offset);
+		val &= ~(bit_mask(ctrl->macro_mode.width) <<
+			ctrl->macro_mode.shift);
+		val |= PLL_USER_MODE << ctrl->macro_mode.shift;
+		iproc_pll_write(pll, pll->control_base,
+			ctrl->macro_mode.offset, val);
+	}
+
 	iproc_pll_write(pll, pll->control_base, ctrl->vco_ctrl.u_offset, 0);
 
 	val = readl(pll->control_base + ctrl->vco_ctrl.l_offset);
@@ -505,7 +527,10 @@ static unsigned long iproc_clk_recalc_rate(struct clk_hw *hw,
 	if (mdiv == 0)
 		mdiv = 256;
 
-	clk->rate = parent_rate / mdiv;
+	if (ctrl->flags & IPROC_CLK_MCLK_DIV_BY_2)
+		clk->rate = parent_rate / (mdiv * 2);
+	else
+		clk->rate = parent_rate / mdiv;
 
 	return clk->rate;
 }
@@ -543,7 +568,10 @@ static int iproc_clk_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (rate == 0 || parent_rate == 0)
 		return -EINVAL;
 
-	div = DIV_ROUND_UP(parent_rate, rate);
+	if (ctrl->flags & IPROC_CLK_MCLK_DIV_BY_2)
+		div = DIV_ROUND_UP(parent_rate, rate * 2);
+	else
+		div = DIV_ROUND_UP(parent_rate, rate);
 	if (div > 256)
 		return -EINVAL;
 
@@ -555,7 +583,10 @@ static int iproc_clk_set_rate(struct clk_hw *hw, unsigned long rate,
 		val |= div << ctrl->mdiv.shift;
 	}
 	iproc_pll_write(pll, pll->control_base, ctrl->mdiv.offset, val);
-	clk->rate = parent_rate / div;
+	if (ctrl->flags & IPROC_CLK_MCLK_DIV_BY_2)
+		clk->rate = parent_rate / (div * 2);
+	else
+		clk->rate = parent_rate / div;
 
 	return 0;
 }
diff --git a/drivers/clk/bcm/clk-iproc.h b/drivers/clk/bcm/clk-iproc.h
index 8988de70a98c..2148b4ea9f28 100644
--- a/drivers/clk/bcm/clk-iproc.h
+++ b/drivers/clk/bcm/clk-iproc.h
@@ -61,6 +61,26 @@
 #define IPROC_CLK_PLL_SPLIT_STAT_CTRL BIT(6)
 
 /*
+ * Some PLLs have an additional divide by 2 in master clock calculation;
+ * MCLK = VCO_freq / (Mdiv * 2). Identify this to let the driver know
+ * of modified calculations
+ */
+#define IPROC_CLK_MCLK_DIV_BY_2 BIT(7)
+
+/*
+ * Some PLLs provide a look up table for the leaf clock frequencies and
+ * auto calculates VCO frequency parameters based on the provided leaf
+ * clock frequencies. They have a user mode that allows the divider
+ * controls to be determined by the user
+ */
+#define IPROC_CLK_PLL_USER_MODE_ON BIT(8)
+
+/*
+ * Some PLLs have an active low reset
+ */
+#define IPROC_CLK_PLL_RESET_ACTIVE_LOW BIT(9)
+
+/*
  * Parameters for VCO frequency configuration
  *
  * VCO frequency =
@@ -149,6 +169,7 @@ struct iproc_pll_ctrl {
 	struct iproc_clk_reg_op pdiv;
 	struct iproc_pll_vco_ctrl vco_ctrl;
 	struct iproc_clk_reg_op status;
+	struct iproc_clk_reg_op macro_mode;
 };
 
 /*
@@ -183,16 +204,16 @@ struct iproc_asiu_div {
 	unsigned int low_width;
 };
 
-void __init iproc_armpll_setup(struct device_node *node);
-void __init iproc_pll_clk_setup(struct device_node *node,
-				const struct iproc_pll_ctrl *pll_ctrl,
-				const struct iproc_pll_vco_param *vco,
-				unsigned int num_vco_entries,
-				const struct iproc_clk_ctrl *clk_ctrl,
-				unsigned int num_clks);
-void __init iproc_asiu_setup(struct device_node *node,
-			     const struct iproc_asiu_div *div,
-			     const struct iproc_asiu_gate *gate,
-			     unsigned int num_clks);
+void iproc_armpll_setup(struct device_node *node);
+void iproc_pll_clk_setup(struct device_node *node,
+			 const struct iproc_pll_ctrl *pll_ctrl,
+			 const struct iproc_pll_vco_param *vco,
+			 unsigned int num_vco_entries,
+			 const struct iproc_clk_ctrl *clk_ctrl,
+			 unsigned int num_clks);
+void iproc_asiu_setup(struct device_node *node,
+		      const struct iproc_asiu_div *div,
+		      const struct iproc_asiu_gate *gate,
+		      unsigned int num_clks);
 
 #endif /* _CLK_IPROC_H */
diff --git a/drivers/clk/clk-axi-clkgen.c b/drivers/clk/clk-axi-clkgen.c
index 3bcd42fbb55e..3294db3b4e4e 100644
--- a/drivers/clk/clk-axi-clkgen.c
+++ b/drivers/clk/clk-axi-clkgen.c
@@ -16,19 +16,8 @@
 #include <linux/module.h>
 #include <linux/err.h>
 
-#define AXI_CLKGEN_V1_REG_UPDATE_ENABLE	0x04
-#define AXI_CLKGEN_V1_REG_CLK_OUT1	0x08
-#define AXI_CLKGEN_V1_REG_CLK_OUT2	0x0c
-#define AXI_CLKGEN_V1_REG_CLK_DIV	0x10
-#define AXI_CLKGEN_V1_REG_CLK_FB1	0x14
-#define AXI_CLKGEN_V1_REG_CLK_FB2	0x18
-#define AXI_CLKGEN_V1_REG_LOCK1		0x1c
-#define AXI_CLKGEN_V1_REG_LOCK2		0x20
-#define AXI_CLKGEN_V1_REG_LOCK3		0x24
-#define AXI_CLKGEN_V1_REG_FILTER1	0x28
-#define AXI_CLKGEN_V1_REG_FILTER2	0x2c
-
 #define AXI_CLKGEN_V2_REG_RESET		0x40
+#define AXI_CLKGEN_V2_REG_CLKSEL	0x44
 #define AXI_CLKGEN_V2_REG_DRP_CNTRL	0x70
 #define AXI_CLKGEN_V2_REG_DRP_STATUS	0x74
 
@@ -51,40 +40,11 @@
 #define MMCM_REG_FILTER1	0x4e
 #define MMCM_REG_FILTER2	0x4f
 
-struct axi_clkgen;
-
-struct axi_clkgen_mmcm_ops {
-	void (*enable)(struct axi_clkgen *axi_clkgen, bool enable);
-	int (*write)(struct axi_clkgen *axi_clkgen, unsigned int reg,
-		     unsigned int val, unsigned int mask);
-	int (*read)(struct axi_clkgen *axi_clkgen, unsigned int reg,
-		    unsigned int *val);
-};
-
 struct axi_clkgen {
 	void __iomem *base;
-	const struct axi_clkgen_mmcm_ops *mmcm_ops;
 	struct clk_hw clk_hw;
 };
 
-static void axi_clkgen_mmcm_enable(struct axi_clkgen *axi_clkgen,
-	bool enable)
-{
-	axi_clkgen->mmcm_ops->enable(axi_clkgen, enable);
-}
-
-static int axi_clkgen_mmcm_write(struct axi_clkgen *axi_clkgen,
-	unsigned int reg, unsigned int val, unsigned int mask)
-{
-	return axi_clkgen->mmcm_ops->write(axi_clkgen, reg, val, mask);
-}
-
-static int axi_clkgen_mmcm_read(struct axi_clkgen *axi_clkgen,
-	unsigned int reg, unsigned int *val)
-{
-	return axi_clkgen->mmcm_ops->read(axi_clkgen, reg, val);
-}
-
 static uint32_t axi_clkgen_lookup_filter(unsigned int m)
 {
 	switch (m) {
@@ -207,70 +167,6 @@ static void axi_clkgen_read(struct axi_clkgen *axi_clkgen,
 	*val = readl(axi_clkgen->base + reg);
 }
 
-static unsigned int axi_clkgen_v1_map_mmcm_reg(unsigned int reg)
-{
-	switch (reg) {
-	case MMCM_REG_CLKOUT0_1:
-		return AXI_CLKGEN_V1_REG_CLK_OUT1;
-	case MMCM_REG_CLKOUT0_2:
-		return AXI_CLKGEN_V1_REG_CLK_OUT2;
-	case MMCM_REG_CLK_FB1:
-		return AXI_CLKGEN_V1_REG_CLK_FB1;
-	case MMCM_REG_CLK_FB2:
-		return AXI_CLKGEN_V1_REG_CLK_FB2;
-	case MMCM_REG_CLK_DIV:
-		return AXI_CLKGEN_V1_REG_CLK_DIV;
-	case MMCM_REG_LOCK1:
-		return AXI_CLKGEN_V1_REG_LOCK1;
-	case MMCM_REG_LOCK2:
-		return AXI_CLKGEN_V1_REG_LOCK2;
-	case MMCM_REG_LOCK3:
-		return AXI_CLKGEN_V1_REG_LOCK3;
-	case MMCM_REG_FILTER1:
-		return AXI_CLKGEN_V1_REG_FILTER1;
-	case MMCM_REG_FILTER2:
-		return AXI_CLKGEN_V1_REG_FILTER2;
-	default:
-		return 0;
-	}
-}
-
-static int axi_clkgen_v1_mmcm_write(struct axi_clkgen *axi_clkgen,
-	unsigned int reg, unsigned int val, unsigned int mask)
-{
-	reg = axi_clkgen_v1_map_mmcm_reg(reg);
-	if (reg == 0)
-		return -EINVAL;
-
-	axi_clkgen_write(axi_clkgen, reg, val);
-
-	return 0;
-}
-
-static int axi_clkgen_v1_mmcm_read(struct axi_clkgen *axi_clkgen,
-	unsigned int reg, unsigned int *val)
-{
-	reg = axi_clkgen_v1_map_mmcm_reg(reg);
-	if (reg == 0)
-		return -EINVAL;
-
-	axi_clkgen_read(axi_clkgen, reg, val);
-
-	return 0;
-}
-
-static void axi_clkgen_v1_mmcm_enable(struct axi_clkgen *axi_clkgen,
-	bool enable)
-{
-	axi_clkgen_write(axi_clkgen, AXI_CLKGEN_V1_REG_UPDATE_ENABLE, enable);
-}
-
-static const struct axi_clkgen_mmcm_ops axi_clkgen_v1_mmcm_ops = {
-	.write = axi_clkgen_v1_mmcm_write,
-	.read = axi_clkgen_v1_mmcm_read,
-	.enable = axi_clkgen_v1_mmcm_enable,
-};
-
 static int axi_clkgen_wait_non_busy(struct axi_clkgen *axi_clkgen)
 {
 	unsigned int timeout = 10000;
@@ -286,7 +182,7 @@ static int axi_clkgen_wait_non_busy(struct axi_clkgen *axi_clkgen)
 	return val & 0xffff;
 }
 
-static int axi_clkgen_v2_mmcm_read(struct axi_clkgen *axi_clkgen,
+static int axi_clkgen_mmcm_read(struct axi_clkgen *axi_clkgen,
 	unsigned int reg, unsigned int *val)
 {
 	unsigned int reg_val;
@@ -310,7 +206,7 @@ static int axi_clkgen_v2_mmcm_read(struct axi_clkgen *axi_clkgen,
 	return 0;
 }
 
-static int axi_clkgen_v2_mmcm_write(struct axi_clkgen *axi_clkgen,
+static int axi_clkgen_mmcm_write(struct axi_clkgen *axi_clkgen,
 	unsigned int reg, unsigned int val, unsigned int mask)
 {
 	unsigned int reg_val = 0;
@@ -321,7 +217,7 @@ static int axi_clkgen_v2_mmcm_write(struct axi_clkgen *axi_clkgen,
 		return ret;
 
 	if (mask != 0xffff) {
-		axi_clkgen_v2_mmcm_read(axi_clkgen, reg, &reg_val);
+		axi_clkgen_mmcm_read(axi_clkgen, reg, &reg_val);
 		reg_val &= ~mask;
 	}
 
@@ -332,7 +228,7 @@ static int axi_clkgen_v2_mmcm_write(struct axi_clkgen *axi_clkgen,
 	return 0;
 }
 
-static void axi_clkgen_v2_mmcm_enable(struct axi_clkgen *axi_clkgen,
+static void axi_clkgen_mmcm_enable(struct axi_clkgen *axi_clkgen,
 	bool enable)
 {
 	unsigned int val = AXI_CLKGEN_V2_RESET_ENABLE;
@@ -343,12 +239,6 @@ static void axi_clkgen_v2_mmcm_enable(struct axi_clkgen *axi_clkgen,
 	axi_clkgen_write(axi_clkgen, AXI_CLKGEN_V2_REG_RESET, val);
 }
 
-static const struct axi_clkgen_mmcm_ops axi_clkgen_v2_mmcm_ops = {
-	.write = axi_clkgen_v2_mmcm_write,
-	.read = axi_clkgen_v2_mmcm_read,
-	.enable = axi_clkgen_v2_mmcm_enable,
-};
-
 static struct axi_clkgen *clk_hw_to_axi_clkgen(struct clk_hw *clk_hw)
 {
 	return container_of(clk_hw, struct axi_clkgen, clk_hw);
@@ -438,10 +328,7 @@ static unsigned long axi_clkgen_recalc_rate(struct clk_hw *clk_hw,
 	tmp = (unsigned long long)(parent_rate / d) * m;
 	do_div(tmp, dout);
 
-	if (tmp > ULONG_MAX)
-		return ULONG_MAX;
-
-	return tmp;
+	return min_t(unsigned long long, tmp, ULONG_MAX);
 }
 
 static int axi_clkgen_enable(struct clk_hw *clk_hw)
@@ -460,21 +347,38 @@ static void axi_clkgen_disable(struct clk_hw *clk_hw)
 	axi_clkgen_mmcm_enable(axi_clkgen, false);
 }
 
+static int axi_clkgen_set_parent(struct clk_hw *clk_hw, u8 index)
+{
+	struct axi_clkgen *axi_clkgen = clk_hw_to_axi_clkgen(clk_hw);
+
+	axi_clkgen_write(axi_clkgen, AXI_CLKGEN_V2_REG_CLKSEL, index);
+
+	return 0;
+}
+
+static u8 axi_clkgen_get_parent(struct clk_hw *clk_hw)
+{
+	struct axi_clkgen *axi_clkgen = clk_hw_to_axi_clkgen(clk_hw);
+	unsigned int parent;
+
+	axi_clkgen_read(axi_clkgen, AXI_CLKGEN_V2_REG_CLKSEL, &parent);
+
+	return parent;
+}
+
 static const struct clk_ops axi_clkgen_ops = {
 	.recalc_rate = axi_clkgen_recalc_rate,
 	.round_rate = axi_clkgen_round_rate,
 	.set_rate = axi_clkgen_set_rate,
 	.enable = axi_clkgen_enable,
 	.disable = axi_clkgen_disable,
+	.set_parent = axi_clkgen_set_parent,
+	.get_parent = axi_clkgen_get_parent,
 };
 
 static const struct of_device_id axi_clkgen_ids[] = {
 	{
-		.compatible = "adi,axi-clkgen-1.00.a",
-		.data = &axi_clkgen_v1_mmcm_ops
-	}, {
 		.compatible = "adi,axi-clkgen-2.00.a",
-		.data = &axi_clkgen_v2_mmcm_ops,
 	},
 	{ },
 };
@@ -485,10 +389,11 @@ static int axi_clkgen_probe(struct platform_device *pdev)
 	const struct of_device_id *id;
 	struct axi_clkgen *axi_clkgen;
 	struct clk_init_data init;
-	const char *parent_name;
+	const char *parent_names[2];
 	const char *clk_name;
 	struct resource *mem;
 	struct clk *clk;
+	unsigned int i;
 
 	if (!pdev->dev.of_node)
 		return -ENODEV;
@@ -501,26 +406,29 @@ static int axi_clkgen_probe(struct platform_device *pdev)
 	if (!axi_clkgen)
 		return -ENOMEM;
 
-	axi_clkgen->mmcm_ops = id->data;
-
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	axi_clkgen->base = devm_ioremap_resource(&pdev->dev, mem);
 	if (IS_ERR(axi_clkgen->base))
 		return PTR_ERR(axi_clkgen->base);
 
-	parent_name = of_clk_get_parent_name(pdev->dev.of_node, 0);
-	if (!parent_name)
+	init.num_parents = of_clk_get_parent_count(pdev->dev.of_node);
+	if (init.num_parents < 1 || init.num_parents > 2)
 		return -EINVAL;
 
+	for (i = 0; i < init.num_parents; i++) {
+		parent_names[i] = of_clk_get_parent_name(pdev->dev.of_node, i);
+		if (!parent_names[i])
+			return -EINVAL;
+	}
+
 	clk_name = pdev->dev.of_node->name;
 	of_property_read_string(pdev->dev.of_node, "clock-output-names",
 		&clk_name);
 
 	init.name = clk_name;
 	init.ops = &axi_clkgen_ops;
-	init.flags = CLK_SET_RATE_GATE;
-	init.parent_names = &parent_name;
-	init.num_parents = 1;
+	init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE;
+	init.parent_names = parent_names;
 
 	axi_clkgen_mmcm_enable(axi_clkgen, false);
 
diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c
index 4735de0660cc..1f903e1f86a2 100644
--- a/drivers/clk/clk-composite.c
+++ b/drivers/clk/clk-composite.c
@@ -19,8 +19,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 
-#define to_clk_composite(_hw) container_of(_hw, struct clk_composite, hw)
-
 static u8 clk_composite_get_parent(struct clk_hw *hw)
 {
 	struct clk_composite *composite = to_clk_composite(hw);
diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index ded3ff4b91b9..00e035b51c69 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -28,8 +28,6 @@
  * parent - fixed parent.  No clk_set_parent support
  */
 
-#define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw)
-
 #define div_mask(width)	((1 << (width)) - 1)
 
 static unsigned int _get_table_maxdiv(const struct clk_div_table *table,
@@ -305,9 +303,8 @@ static int clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 	 */
 	maxdiv = min(ULONG_MAX / rate, maxdiv);
 
-	for (i = 1; i <= maxdiv; i = _next_div(table, i, flags)) {
-		if (!_is_valid_div(table, i, flags))
-			continue;
+	for (i = _next_div(table, 0, flags); i <= maxdiv;
+					     i = _next_div(table, i, flags)) {
 		if (rate * i == parent_rate_saved) {
 			/*
 			 * It's the most ideal case if the requested rate can be
@@ -423,6 +420,12 @@ const struct clk_ops clk_divider_ops = {
 };
 EXPORT_SYMBOL_GPL(clk_divider_ops);
 
+const struct clk_ops clk_divider_ro_ops = {
+	.recalc_rate = clk_divider_recalc_rate,
+	.round_rate = clk_divider_round_rate,
+};
+EXPORT_SYMBOL_GPL(clk_divider_ro_ops);
+
 static struct clk *_register_divider(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
@@ -446,7 +449,10 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
-	init.ops = &clk_divider_ops;
+	if (clk_divider_flags & CLK_DIVIDER_READ_ONLY)
+		init.ops = &clk_divider_ro_ops;
+	else
+		init.ops = &clk_divider_ops;
 	init.flags = flags | CLK_IS_BASIC;
 	init.parent_names = (parent_name ? &parent_name: NULL);
 	init.num_parents = (parent_name ? 1 : 0);
diff --git a/drivers/clk/clk-efm32gg.c b/drivers/clk/clk-efm32gg.c
index bac4553f04b8..22e4c659704e 100644
--- a/drivers/clk/clk-efm32gg.c
+++ b/drivers/clk/clk-efm32gg.c
@@ -36,7 +36,7 @@ static void __init efm32gg_cmu_init(struct device_node *np)
 	}
 
 	clk[clk_HFXO] = clk_register_fixed_rate(NULL, "HFXO", NULL,
-			CLK_IS_ROOT, 48000000);
+			0, 48000000);
 
 	clk[clk_HFPERCLKUSART0] = clk_register_gate(NULL, "HFPERCLK.USART0",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 0, 0, NULL);
diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c
index 83de57aeceea..053448e2453d 100644
--- a/drivers/clk/clk-fixed-factor.c
+++ b/drivers/clk/clk-fixed-factor.c
@@ -23,8 +23,6 @@
  * parent - fixed parent.  No clk_set_parent support
  */
 
-#define to_clk_fixed_factor(_hw) container_of(_hw, struct clk_fixed_factor, hw)
-
 static unsigned long clk_factor_recalc_rate(struct clk_hw *hw,
 		unsigned long parent_rate)
 {
@@ -102,6 +100,19 @@ struct clk *clk_register_fixed_factor(struct device *dev, const char *name,
 }
 EXPORT_SYMBOL_GPL(clk_register_fixed_factor);
 
+void clk_unregister_fixed_factor(struct clk *clk)
+{
+	struct clk_hw *hw;
+
+	hw = __clk_get_hw(clk);
+	if (!hw)
+		return;
+
+	clk_unregister(clk);
+	kfree(to_clk_fixed_factor(hw));
+}
+EXPORT_SYMBOL_GPL(clk_unregister_fixed_factor);
+
 #ifdef CONFIG_OF
 /**
  * of_fixed_factor_clk_setup() - Setup function for simple fixed factor clock
diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index f85ec8d1711f..cd9dc925b3f8 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c
@@ -26,8 +26,6 @@
  * parent - fixed parent.  No clk_set_parent support
  */
 
-#define to_clk_fixed_rate(_hw) container_of(_hw, struct clk_fixed_rate, hw)
-
 static unsigned long clk_fixed_rate_recalc_rate(struct clk_hw *hw,
 		unsigned long parent_rate)
 {
@@ -106,6 +104,19 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
 }
 EXPORT_SYMBOL_GPL(clk_register_fixed_rate);
 
+void clk_unregister_fixed_rate(struct clk *clk)
+{
+	struct clk_hw *hw;
+
+	hw = __clk_get_hw(clk);
+	if (!hw)
+		return;
+
+	clk_unregister(clk);
+	kfree(to_clk_fixed_rate(hw));
+}
+EXPORT_SYMBOL_GPL(clk_unregister_fixed_rate);
+
 #ifdef CONFIG_OF
 /**
  * of_fixed_clk_setup() - Setup function for simple fixed rate clock
@@ -125,8 +136,7 @@ void of_fixed_clk_setup(struct device_node *node)
 	of_property_read_string(node, "clock-output-names", &clk_name);
 
 	clk = clk_register_fixed_rate_with_accuracy(NULL, clk_name, NULL,
-						    CLK_IS_ROOT, rate,
-						    accuracy);
+						    0, rate, accuracy);
 	if (!IS_ERR(clk))
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 }
diff --git a/drivers/clk/clk-fractional-divider.c b/drivers/clk/clk-fractional-divider.c
index 5c4955e33f7a..1abcd76b4993 100644
--- a/drivers/clk/clk-fractional-divider.c
+++ b/drivers/clk/clk-fractional-divider.c
@@ -16,8 +16,6 @@
 #include <linux/slab.h>
 #include <linux/rational.h>
 
-#define to_clk_fd(_hw) container_of(_hw, struct clk_fractional_divider, hw)
-
 static unsigned long clk_fd_recalc_rate(struct clk_hw *hw,
 					unsigned long parent_rate)
 {
diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c
index de0b322f5f58..d0d8ec8e1f1b 100644
--- a/drivers/clk/clk-gate.c
+++ b/drivers/clk/clk-gate.c
@@ -26,8 +26,6 @@
  * parent - fixed parent.  No clk_set_parent support
  */
 
-#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw)
-
 /*
  * It works on following logic:
  *
diff --git a/drivers/clk/clk-gpio.c b/drivers/clk/clk-gpio.c
index 7b09a265d79f..08f65acc5d57 100644
--- a/drivers/clk/clk-gpio.c
+++ b/drivers/clk/clk-gpio.c
@@ -20,6 +20,8 @@
 #include <linux/of_gpio.h>
 #include <linux/err.h>
 #include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
 
 /**
  * DOC: basic gpio gated clock which can be enabled and disabled
@@ -31,8 +33,6 @@
  * parent - fixed parent.  No clk_set_parent support
  */
 
-#define to_clk_gpio(_hw) container_of(_hw, struct clk_gpio, hw)
-
 static int clk_gpio_gate_enable(struct clk_hw *hw)
 {
 	struct clk_gpio *clk = to_clk_gpio(hw);
@@ -201,134 +201,69 @@ struct clk *clk_register_gpio_mux(struct device *dev, const char *name,
 }
 EXPORT_SYMBOL_GPL(clk_register_gpio_mux);
 
-#ifdef CONFIG_OF
-/**
- * clk_register_get() has to be delayed, because -EPROBE_DEFER
- * can not be handled properly at of_clk_init() call time.
- */
-
-struct clk_gpio_delayed_register_data {
-	const char *gpio_name;
-	int num_parents;
-	const char **parent_names;
-	struct device_node *node;
-	struct mutex lock;
-	struct clk *clk;
-	struct clk *(*clk_register_get)(const char *name,
-			const char * const *parent_names, u8 num_parents,
-			unsigned gpio, bool active_low);
-};
-
-static struct clk *of_clk_gpio_delayed_register_get(
-		struct of_phandle_args *clkspec, void *_data)
+static int gpio_clk_driver_probe(struct platform_device *pdev)
 {
-	struct clk_gpio_delayed_register_data *data = _data;
-	struct clk *clk;
+	struct device_node *node = pdev->dev.of_node;
+	const char **parent_names, *gpio_name;
+	unsigned int num_parents;
 	int gpio;
 	enum of_gpio_flags of_flags;
+	struct clk *clk;
+	bool active_low, is_mux;
 
-	mutex_lock(&data->lock);
+	num_parents = of_clk_get_parent_count(node);
+	if (num_parents) {
+		parent_names = devm_kcalloc(&pdev->dev, num_parents,
+					    sizeof(char *), GFP_KERNEL);
+		if (!parent_names)
+			return -ENOMEM;
 
-	if (data->clk) {
-		mutex_unlock(&data->lock);
-		return data->clk;
+		of_clk_parent_fill(node, parent_names, num_parents);
+	} else {
+		parent_names = NULL;
 	}
 
-	gpio = of_get_named_gpio_flags(data->node, data->gpio_name, 0,
-			&of_flags);
+	is_mux = of_device_is_compatible(node, "gpio-mux-clock");
+
+	gpio_name = is_mux ? "select-gpios" : "enable-gpios";
+	gpio = of_get_named_gpio_flags(node, gpio_name, 0, &of_flags);
 	if (gpio < 0) {
-		mutex_unlock(&data->lock);
 		if (gpio == -EPROBE_DEFER)
 			pr_debug("%s: %s: GPIOs not yet available, retry later\n",
-					data->node->name, __func__);
+					node->name, __func__);
 		else
 			pr_err("%s: %s: Can't get '%s' DT property\n",
-					data->node->name, __func__,
-					data->gpio_name);
-		return ERR_PTR(gpio);
+					node->name, __func__,
+					gpio_name);
+		return gpio;
 	}
 
-	clk = data->clk_register_get(data->node->name, data->parent_names,
-			data->num_parents, gpio, of_flags & OF_GPIO_ACTIVE_LOW);
-	if (IS_ERR(clk))
-		goto out;
-
-	data->clk = clk;
-out:
-	mutex_unlock(&data->lock);
-
-	return clk;
-}
-
-static struct clk *of_clk_gpio_gate_delayed_register_get(const char *name,
-		const char * const *parent_names, u8 num_parents,
-		unsigned gpio, bool active_low)
-{
-	return clk_register_gpio_gate(NULL, name, parent_names ?
-			parent_names[0] : NULL, gpio, active_low, 0);
-}
-
-static struct clk *of_clk_gpio_mux_delayed_register_get(const char *name,
-		const char * const *parent_names, u8 num_parents, unsigned gpio,
-		bool active_low)
-{
-	return clk_register_gpio_mux(NULL, name, parent_names, num_parents,
-			gpio, active_low, 0);
-}
-
-static void __init of_gpio_clk_setup(struct device_node *node,
-		const char *gpio_name,
-		struct clk *(*clk_register_get)(const char *name,
-				const char * const *parent_names,
-				u8 num_parents,
-				unsigned gpio, bool active_low))
-{
-	struct clk_gpio_delayed_register_data *data;
-	const char **parent_names;
-	int i, num_parents;
-
-	num_parents = of_clk_get_parent_count(node);
-	if (num_parents < 0)
-		num_parents = 0;
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return;
-
-	if (num_parents) {
-		parent_names = kcalloc(num_parents, sizeof(char *), GFP_KERNEL);
-		if (!parent_names) {
-			kfree(data);
-			return;
-		}
-
-		for (i = 0; i < num_parents; i++)
-			parent_names[i] = of_clk_get_parent_name(node, i);
-	} else {
-		parent_names = NULL;
-	}
+	active_low = of_flags & OF_GPIO_ACTIVE_LOW;
 
-	data->num_parents = num_parents;
-	data->parent_names = parent_names;
-	data->node = node;
-	data->gpio_name = gpio_name;
-	data->clk_register_get = clk_register_get;
-	mutex_init(&data->lock);
+	if (is_mux)
+		clk = clk_register_gpio_mux(&pdev->dev, node->name,
+				parent_names, num_parents, gpio, active_low, 0);
+	else
+		clk = clk_register_gpio_gate(&pdev->dev, node->name,
+				parent_names ?  parent_names[0] : NULL, gpio,
+				active_low, 0);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
 
-	of_clk_add_provider(node, of_clk_gpio_delayed_register_get, data);
+	return of_clk_add_provider(node, of_clk_src_simple_get, clk);
 }
 
-static void __init of_gpio_gate_clk_setup(struct device_node *node)
-{
-	of_gpio_clk_setup(node, "enable-gpios",
-		of_clk_gpio_gate_delayed_register_get);
-}
-CLK_OF_DECLARE(gpio_gate_clk, "gpio-gate-clock", of_gpio_gate_clk_setup);
+static const struct of_device_id gpio_clk_match_table[] = {
+	{ .compatible = "gpio-mux-clock" },
+	{ .compatible = "gpio-gate-clock" },
+	{ }
+};
 
-void __init of_gpio_mux_clk_setup(struct device_node *node)
-{
-	of_gpio_clk_setup(node, "select-gpios",
-		of_clk_gpio_mux_delayed_register_get);
-}
-CLK_OF_DECLARE(gpio_mux_clk, "gpio-mux-clock", of_gpio_mux_clk_setup);
-#endif
+static struct platform_driver gpio_clk_driver = {
+	.probe		= gpio_clk_driver_probe,
+	.driver		= {
+		.name	= "gpio-clk",
+		.of_match_table = gpio_clk_match_table,
+	},
+};
+builtin_platform_driver(gpio_clk_driver);
diff --git a/drivers/clk/clk-max77686.c b/drivers/clk/clk-max77686.c
index 446c2fe76dc2..9b6f2772e948 100644
--- a/drivers/clk/clk-max77686.c
+++ b/drivers/clk/clk-max77686.c
@@ -38,17 +38,14 @@ static struct clk_init_data max77686_clks_init[MAX77686_CLKS_NUM] = {
 	[MAX77686_CLK_AP] = {
 		.name = "32khz_ap",
 		.ops = &max_gen_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 	[MAX77686_CLK_CP] = {
 		.name = "32khz_cp",
 		.ops = &max_gen_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 	[MAX77686_CLK_PMIC] = {
 		.name = "32khz_pmic",
 		.ops = &max_gen_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 };
 
diff --git a/drivers/clk/clk-max77802.c b/drivers/clk/clk-max77802.c
index 4a89f7979ba0..355dd2e522c3 100644
--- a/drivers/clk/clk-max77802.c
+++ b/drivers/clk/clk-max77802.c
@@ -39,12 +39,10 @@ static struct clk_init_data max77802_clks_init[MAX77802_CLKS_NUM] = {
 	[MAX77802_CLK_32K_AP] = {
 		.name = "32khz_ap",
 		.ops = &max_gen_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 	[MAX77802_CLK_32K_CP] = {
 		.name = "32khz_cp",
 		.ops = &max_gen_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 };
 
diff --git a/drivers/clk/clk-mb86s7x.c b/drivers/clk/clk-mb86s7x.c
index f39c25a22f43..e0817754ca3e 100644
--- a/drivers/clk/clk-mb86s7x.c
+++ b/drivers/clk/clk-mb86s7x.c
@@ -217,7 +217,7 @@ static struct clk *crg11_get(struct of_phandle_args *clkspec, void *data)
 	init.name = clkp;
 	init.num_parents = 0;
 	init.ops = &crg_port_ops;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	crgclk->hw.init = &init;
 	crgclk->cntrlr = cntrlr;
 	crgclk->domain = domain;
@@ -341,7 +341,7 @@ struct clk *mb86s7x_clclk_register(struct device *cpu_dev)
 
 	init.name = dev_name(cpu_dev);
 	init.ops = &clk_clc_ops;
-	init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE;
+	init.flags = CLK_GET_RATE_NOCACHE;
 	init.num_parents = 0;
 
 	return devm_clk_register(cpu_dev, &clc->hw);
diff --git a/drivers/clk/clk-multiplier.c b/drivers/clk/clk-multiplier.c
index fe7806506bf3..9e449c7b751c 100644
--- a/drivers/clk/clk-multiplier.c
+++ b/drivers/clk/clk-multiplier.c
@@ -14,8 +14,6 @@
 #include <linux/of.h>
 #include <linux/slab.h>
 
-#define to_clk_multiplier(_hw) container_of(_hw, struct clk_multiplier, hw)
-
 static unsigned long __get_mult(struct clk_multiplier *mult,
 				unsigned long rate,
 				unsigned long parent_rate)
diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c
index 5ed03c8a8df9..252188fd8bcd 100644
--- a/drivers/clk/clk-mux.c
+++ b/drivers/clk/clk-mux.c
@@ -26,8 +26,6 @@
  * parent - parent is adjustable through clk_set_parent
  */
 
-#define to_clk_mux(_hw) container_of(_hw, struct clk_mux, hw)
-
 static u8 clk_mux_get_parent(struct clk_hw *hw)
 {
 	struct clk_mux *mux = to_clk_mux(hw);
diff --git a/drivers/clk/clk-palmas.c b/drivers/clk/clk-palmas.c
index 8e3039f0c3f9..9c0b8e6b1ab3 100644
--- a/drivers/clk/clk-palmas.c
+++ b/drivers/clk/clk-palmas.c
@@ -44,7 +44,7 @@ struct palmas_clock_info {
 	struct clk *clk;
 	struct clk_hw hw;
 	struct palmas *palmas;
-	struct palmas_clk32k_desc *clk_desc;
+	const struct palmas_clk32k_desc *clk_desc;
 	int ext_control_pin;
 };
 
@@ -125,10 +125,10 @@ static struct clk_ops palmas_clks_ops = {
 
 struct palmas_clks_of_match_data {
 	struct clk_init_data init;
-	struct palmas_clk32k_desc desc;
+	const struct palmas_clk32k_desc desc;
 };
 
-static struct palmas_clks_of_match_data palmas_of_clk32kg = {
+static const struct palmas_clks_of_match_data palmas_of_clk32kg = {
 	.init = {
 		.name = "clk32kg",
 		.ops = &palmas_clks_ops,
@@ -144,7 +144,7 @@ static struct palmas_clks_of_match_data palmas_of_clk32kg = {
 	},
 };
 
-static struct palmas_clks_of_match_data palmas_of_clk32kgaudio = {
+static const struct palmas_clks_of_match_data palmas_of_clk32kgaudio = {
 	.init = {
 		.name = "clk32kgaudio",
 		.ops = &palmas_clks_ops,
@@ -240,14 +240,14 @@ static int palmas_clks_probe(struct platform_device *pdev)
 {
 	struct palmas *palmas = dev_get_drvdata(pdev->dev.parent);
 	struct device_node *node = pdev->dev.of_node;
-	struct palmas_clks_of_match_data *match_data;
-	const struct of_device_id *match;
+	const struct palmas_clks_of_match_data *match_data;
 	struct palmas_clock_info *cinfo;
 	struct clk *clk;
 	int ret;
 
-	match = of_match_device(palmas_clks_of_match, &pdev->dev);
-	match_data = (struct palmas_clks_of_match_data *)match->data;
+	match_data = of_device_get_match_data(&pdev->dev);
+	if (!match_data)
+		return 1;
 
 	cinfo = devm_kzalloc(&pdev->dev, sizeof(*cinfo), GFP_KERNEL);
 	if (!cinfo)
diff --git a/drivers/clk/clk-pwm.c b/drivers/clk/clk-pwm.c
index 328fcfcefd8c..883045814dac 100644
--- a/drivers/clk/clk-pwm.c
+++ b/drivers/clk/clk-pwm.c
@@ -95,7 +95,7 @@ static int clk_pwm_probe(struct platform_device *pdev)
 
 	init.name = clk_name;
 	init.ops = &clk_pwm_ops;
-	init.flags = CLK_IS_BASIC | CLK_IS_ROOT;
+	init.flags = CLK_IS_BASIC;
 	init.num_parents = 0;
 
 	clk_pwm->pwm = pwm;
diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c
index d266299dfdb1..f8c83977c7fa 100644
--- a/drivers/clk/clk-s2mps11.c
+++ b/drivers/clk/clk-s2mps11.c
@@ -28,11 +28,6 @@
 #include <linux/mfd/samsung/s5m8767.h>
 #include <linux/mfd/samsung/core.h>
 
-#define s2mps11_name(a) (a->hw.init->name)
-
-static struct clk **clk_table;
-static struct clk_onecell_data clk_data;
-
 enum {
 	S2MPS11_CLK_AP = 0,
 	S2MPS11_CLK_CP,
@@ -99,52 +94,19 @@ static struct clk_ops s2mps11_clk_ops = {
 	.recalc_rate	= s2mps11_clk_recalc_rate,
 };
 
+/* This s2mps11_clks_init tructure is common to s2mps11, s2mps13 and s2mps14 */
 static struct clk_init_data s2mps11_clks_init[S2MPS11_CLKS_NUM] = {
 	[S2MPS11_CLK_AP] = {
 		.name = "s2mps11_ap",
 		.ops = &s2mps11_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 	[S2MPS11_CLK_CP] = {
 		.name = "s2mps11_cp",
 		.ops = &s2mps11_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 	[S2MPS11_CLK_BT] = {
 		.name = "s2mps11_bt",
 		.ops = &s2mps11_clk_ops,
-		.flags = CLK_IS_ROOT,
-	},
-};
-
-static struct clk_init_data s2mps13_clks_init[S2MPS11_CLKS_NUM] = {
-	[S2MPS11_CLK_AP] = {
-		.name = "s2mps13_ap",
-		.ops = &s2mps11_clk_ops,
-		.flags = CLK_IS_ROOT,
-	},
-	[S2MPS11_CLK_CP] = {
-		.name = "s2mps13_cp",
-		.ops = &s2mps11_clk_ops,
-		.flags = CLK_IS_ROOT,
-	},
-	[S2MPS11_CLK_BT] = {
-		.name = "s2mps13_bt",
-		.ops = &s2mps11_clk_ops,
-		.flags = CLK_IS_ROOT,
-	},
-};
-
-static struct clk_init_data s2mps14_clks_init[S2MPS11_CLKS_NUM] = {
-	[S2MPS11_CLK_AP] = {
-		.name = "s2mps14_ap",
-		.ops = &s2mps11_clk_ops,
-		.flags = CLK_IS_ROOT,
-	},
-	[S2MPS11_CLK_BT] = {
-		.name = "s2mps14_bt",
-		.ops = &s2mps11_clk_ops,
-		.flags = CLK_IS_ROOT,
 	},
 };
 
@@ -164,12 +126,9 @@ static struct device_node *s2mps11_clk_parse_dt(struct platform_device *pdev,
 		return ERR_PTR(-EINVAL);
 	}
 
-	for (i = 0; i < S2MPS11_CLKS_NUM; i++) {
-		if (!clks_init[i].name)
-			continue; /* Skip clocks not present in some devices */
+	for (i = 0; i < S2MPS11_CLKS_NUM; i++)
 		of_property_read_string_index(clk_np, "clock-output-names", i,
 				&clks_init[i].name);
-	}
 
 	return clk_np;
 }
@@ -177,39 +136,38 @@ static struct device_node *s2mps11_clk_parse_dt(struct platform_device *pdev,
 static int s2mps11_clk_probe(struct platform_device *pdev)
 {
 	struct sec_pmic_dev *iodev = dev_get_drvdata(pdev->dev.parent);
-	struct s2mps11_clk *s2mps11_clks, *s2mps11_clk;
+	struct s2mps11_clk *s2mps11_clks;
+	struct clk_onecell_data *clk_data;
 	unsigned int s2mps11_reg;
-	struct clk_init_data *clks_init;
 	int i, ret = 0;
+	enum sec_device_type hwid = platform_get_device_id(pdev)->driver_data;
 
 	s2mps11_clks = devm_kcalloc(&pdev->dev, S2MPS11_CLKS_NUM,
-				sizeof(*s2mps11_clk), GFP_KERNEL);
+				sizeof(*s2mps11_clks), GFP_KERNEL);
 	if (!s2mps11_clks)
 		return -ENOMEM;
 
-	s2mps11_clk = s2mps11_clks;
+	clk_data = devm_kzalloc(&pdev->dev, sizeof(*clk_data), GFP_KERNEL);
+	if (!clk_data)
+		return -ENOMEM;
 
-	clk_table = devm_kcalloc(&pdev->dev, S2MPS11_CLKS_NUM,
+	clk_data->clks = devm_kcalloc(&pdev->dev, S2MPS11_CLKS_NUM,
 				sizeof(struct clk *), GFP_KERNEL);
-	if (!clk_table)
+	if (!clk_data->clks)
 		return -ENOMEM;
 
-	switch(platform_get_device_id(pdev)->driver_data) {
+	switch (hwid) {
 	case S2MPS11X:
 		s2mps11_reg = S2MPS11_REG_RTC_CTRL;
-		clks_init = s2mps11_clks_init;
 		break;
 	case S2MPS13X:
 		s2mps11_reg = S2MPS13_REG_RTCCTRL;
-		clks_init = s2mps13_clks_init;
 		break;
 	case S2MPS14X:
 		s2mps11_reg = S2MPS14_REG_RTCCTRL;
-		clks_init = s2mps14_clks_init;
 		break;
 	case S5M8767X:
 		s2mps11_reg = S5M8767_REG_CTRL1;
-		clks_init = s2mps11_clks_init;
 		break;
 	default:
 		dev_err(&pdev->dev, "Invalid device type\n");
@@ -217,46 +175,39 @@ static int s2mps11_clk_probe(struct platform_device *pdev)
 	}
 
 	/* Store clocks of_node in first element of s2mps11_clks array */
-	s2mps11_clks->clk_np = s2mps11_clk_parse_dt(pdev, clks_init);
+	s2mps11_clks->clk_np = s2mps11_clk_parse_dt(pdev, s2mps11_clks_init);
 	if (IS_ERR(s2mps11_clks->clk_np))
 		return PTR_ERR(s2mps11_clks->clk_np);
 
-	for (i = 0; i < S2MPS11_CLKS_NUM; i++, s2mps11_clk++) {
-		if (!clks_init[i].name)
+	for (i = 0; i < S2MPS11_CLKS_NUM; i++) {
+		if (i == S2MPS11_CLK_CP && hwid == S2MPS14X)
 			continue; /* Skip clocks not present in some devices */
-		s2mps11_clk->iodev = iodev;
-		s2mps11_clk->hw.init = &clks_init[i];
-		s2mps11_clk->mask = 1 << i;
-		s2mps11_clk->reg = s2mps11_reg;
-
-		s2mps11_clk->clk = devm_clk_register(&pdev->dev,
-							&s2mps11_clk->hw);
-		if (IS_ERR(s2mps11_clk->clk)) {
+		s2mps11_clks[i].iodev = iodev;
+		s2mps11_clks[i].hw.init = &s2mps11_clks_init[i];
+		s2mps11_clks[i].mask = 1 << i;
+		s2mps11_clks[i].reg = s2mps11_reg;
+
+		s2mps11_clks[i].clk = devm_clk_register(&pdev->dev,
+							&s2mps11_clks[i].hw);
+		if (IS_ERR(s2mps11_clks[i].clk)) {
 			dev_err(&pdev->dev, "Fail to register : %s\n",
-						s2mps11_name(s2mps11_clk));
-			ret = PTR_ERR(s2mps11_clk->clk);
+						s2mps11_clks_init[i].name);
+			ret = PTR_ERR(s2mps11_clks[i].clk);
 			goto err_reg;
 		}
 
-		s2mps11_clk->lookup = clkdev_create(s2mps11_clk->clk,
-					s2mps11_name(s2mps11_clk), NULL);
-		if (!s2mps11_clk->lookup) {
+		s2mps11_clks[i].lookup = clkdev_create(s2mps11_clks[i].clk,
+					s2mps11_clks_init[i].name, NULL);
+		if (!s2mps11_clks[i].lookup) {
 			ret = -ENOMEM;
 			goto err_reg;
 		}
+		clk_data->clks[i] = s2mps11_clks[i].clk;
 	}
 
-	for (i = 0; i < S2MPS11_CLKS_NUM; i++) {
-		/* Skip clocks not present on S2MPS14 */
-		if (!clks_init[i].name)
-			continue;
-		clk_table[i] = s2mps11_clks[i].clk;
-	}
-
-	clk_data.clks = clk_table;
-	clk_data.clk_num = S2MPS11_CLKS_NUM;
+	clk_data->clk_num = S2MPS11_CLKS_NUM;
 	of_clk_add_provider(s2mps11_clks->clk_np, of_clk_src_onecell_get,
-			&clk_data);
+			clk_data);
 
 	platform_set_drvdata(pdev, s2mps11_clks);
 
diff --git a/drivers/clk/clk-scpi.c b/drivers/clk/clk-scpi.c
index 89e9ca78bb94..6962ee5d1e9a 100644
--- a/drivers/clk/clk-scpi.c
+++ b/drivers/clk/clk-scpi.c
@@ -155,7 +155,7 @@ scpi_clk_ops_init(struct device *dev, const struct of_device_id *match,
 	unsigned long min = 0, max = 0;
 
 	init.name = name;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.num_parents = 0;
 	init.ops = match->data;
 	sclk->hw.init = &init;
diff --git a/drivers/clk/clk-si514.c b/drivers/clk/clk-si514.c
index 6af7dce54241..ceef25b0990b 100644
--- a/drivers/clk/clk-si514.c
+++ b/drivers/clk/clk-si514.c
@@ -313,7 +313,7 @@ static int si514_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	init.ops = &si514_clk_ops;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.num_parents = 0;
 	data->hw.init = &init;
 	data->i2c_client = client;
diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c
index 850316ac8831..b1bc12c045d3 100644
--- a/drivers/clk/clk-si5351.c
+++ b/drivers/clk/clk-si5351.c
@@ -1495,7 +1495,7 @@ static int si5351_i2c_probe(struct i2c_client *client,
 	if (drvdata->variant == SI5351_VARIANT_B) {
 		init.name = si5351_pll_names[2];
 		init.ops = &si5351_vxco_ops;
-		init.flags = CLK_IS_ROOT;
+		init.flags = 0;
 		init.parent_names = NULL;
 		init.num_parents = 0;
 	} else {
diff --git a/drivers/clk/clk-si570.c b/drivers/clk/clk-si570.c
index cf478aa9fa5d..d56648521a95 100644
--- a/drivers/clk/clk-si570.c
+++ b/drivers/clk/clk-si570.c
@@ -418,7 +418,7 @@ static int si570_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	init.ops = &si570_clk_ops;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.num_parents = 0;
 	data->hw.init = &init;
 	data->i2c_client = client;
diff --git a/drivers/clk/clk-vt8500.c b/drivers/clk/clk-vt8500.c
index 37e928846ec5..b0f76a84f1e9 100644
--- a/drivers/clk/clk-vt8500.c
+++ b/drivers/clk/clk-vt8500.c
@@ -355,7 +355,7 @@ CLK_OF_DECLARE(vt8500_device, "via,vt8500-device-clock", vtwm_device_clk_init);
 #define WM8850_BITS_TO_VAL(m, d1, d2)					\
 		((((m / 2) - 1) << 16) | ((d1 - 1) << 8) | d2)
 
-static void vt8500_find_pll_bits(unsigned long rate, unsigned long parent_rate,
+static int vt8500_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 				u32 *multiplier, u32 *prediv)
 {
 	unsigned long tclk;
@@ -365,7 +365,7 @@ static void vt8500_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 		pr_err("%s: requested rate out of range\n", __func__);
 		*multiplier = 0;
 		*prediv = 1;
-		return;
+		return -EINVAL;
 	}
 	if (rate <= parent_rate * 31)
 		/* use the prediv to double the resolution */
@@ -379,12 +379,15 @@ static void vt8500_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 	if (tclk != rate)
 		pr_warn("%s: requested rate %lu, found rate %lu\n", __func__,
 								rate, tclk);
+
+	return 0;
 }
 
-static void wm8650_find_pll_bits(unsigned long rate, unsigned long parent_rate,
+static int wm8650_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 				u32 *multiplier, u32 *divisor1, u32 *divisor2)
 {
-	u32 mul, div1, div2;
+	u32 mul, div1;
+	int div2;
 	u32 best_mul, best_div1, best_div2;
 	unsigned long tclk, rate_err, best_err;
 
@@ -403,7 +406,7 @@ static void wm8650_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 					*multiplier = mul;
 					*divisor1 = div1;
 					*divisor2 = div2;
-					return;
+					return 0;
 				}
 
 				if (rate_err < best_err) {
@@ -414,12 +417,19 @@ static void wm8650_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 				}
 			}
 
+	if (best_err == (unsigned long)-1) {
+		pr_warn("%s: impossible rate %lu\n", __func__, rate);
+		return -EINVAL;
+	}
+
 	/* if we got here, it wasn't an exact match */
 	pr_warn("%s: requested rate %lu, found rate %lu\n", __func__, rate,
 							rate - best_err);
 	*multiplier = best_mul;
 	*divisor1 = best_div1;
 	*divisor2 = best_div2;
+
+	return 0;
 }
 
 static u32 wm8750_get_filter(u32 parent_rate, u32 divisor1)
@@ -449,10 +459,11 @@ static u32 wm8750_get_filter(u32 parent_rate, u32 divisor1)
 	return 0;
 }
 
-static void wm8750_find_pll_bits(unsigned long rate, unsigned long parent_rate,
+static int wm8750_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 				u32 *filter, u32 *multiplier, u32 *divisor1, u32 *divisor2)
 {
-	u32 mul, div1, div2;
+	u32 mul;
+	int div1, div2;
 	u32 best_mul, best_div1, best_div2;
 	unsigned long tclk, rate_err, best_err;
 
@@ -472,7 +483,7 @@ static void wm8750_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 					*multiplier = mul;
 					*divisor1 = div1;
 					*divisor2 = div2;
-					return;
+					return 0;
 				}
 
 				if (rate_err < best_err) {
@@ -483,6 +494,11 @@ static void wm8750_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 				}
 			}
 
+	if (best_err == (unsigned long)-1) {
+		pr_warn("%s: impossible rate %lu\n", __func__, rate);
+		return -EINVAL;
+	}
+
 	/* if we got here, it wasn't an exact match */
 	pr_warn("%s: requested rate %lu, found rate %lu\n", __func__, rate,
 							rate - best_err);
@@ -491,12 +507,15 @@ static void wm8750_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 	*multiplier = best_mul;
 	*divisor1 = best_div1;
 	*divisor2 = best_div2;
+
+	return 0;
 }
 
-static void wm8850_find_pll_bits(unsigned long rate, unsigned long parent_rate,
+static int wm8850_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 				u32 *multiplier, u32 *divisor1, u32 *divisor2)
 {
-	u32 mul, div1, div2;
+	u32 mul;
+	int div1, div2;
 	u32 best_mul, best_div1, best_div2;
 	unsigned long tclk, rate_err, best_err;
 
@@ -516,7 +535,7 @@ static void wm8850_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 					*multiplier = mul;
 					*divisor1 = div1;
 					*divisor2 = div2;
-					return;
+					return 0;
 				}
 
 				if (rate_err < best_err) {
@@ -527,6 +546,11 @@ static void wm8850_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 				}
 			}
 
+	if (best_err == (unsigned long)-1) {
+		pr_warn("%s: impossible rate %lu\n", __func__, rate);
+		return -EINVAL;
+	}
+
 	/* if we got here, it wasn't an exact match */
 	pr_warn("%s: requested rate %lu, found rate %lu\n", __func__, rate,
 							rate - best_err);
@@ -534,6 +558,8 @@ static void wm8850_find_pll_bits(unsigned long rate, unsigned long parent_rate,
 	*multiplier = best_mul;
 	*divisor1 = best_div1;
 	*divisor2 = best_div2;
+
+	return 0;
 }
 
 static int vtwm_pll_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -543,31 +569,39 @@ static int vtwm_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 	u32 filter, mul, div1, div2;
 	u32 pll_val;
 	unsigned long flags = 0;
+	int ret;
 
 	/* sanity check */
 
 	switch (pll->type) {
 	case PLL_TYPE_VT8500:
-		vt8500_find_pll_bits(rate, parent_rate, &mul, &div1);
-		pll_val = VT8500_BITS_TO_VAL(mul, div1);
+		ret = vt8500_find_pll_bits(rate, parent_rate, &mul, &div1);
+		if (!ret)
+			pll_val = VT8500_BITS_TO_VAL(mul, div1);
 		break;
 	case PLL_TYPE_WM8650:
-		wm8650_find_pll_bits(rate, parent_rate, &mul, &div1, &div2);
-		pll_val = WM8650_BITS_TO_VAL(mul, div1, div2);
+		ret = wm8650_find_pll_bits(rate, parent_rate, &mul, &div1, &div2);
+		if (!ret)
+			pll_val = WM8650_BITS_TO_VAL(mul, div1, div2);
 		break;
 	case PLL_TYPE_WM8750:
-		wm8750_find_pll_bits(rate, parent_rate, &filter, &mul, &div1, &div2);
-		pll_val = WM8750_BITS_TO_VAL(filter, mul, div1, div2);
+		ret = wm8750_find_pll_bits(rate, parent_rate, &filter, &mul, &div1, &div2);
+		if (!ret)
+			pll_val = WM8750_BITS_TO_VAL(filter, mul, div1, div2);
 		break;
 	case PLL_TYPE_WM8850:
-		wm8850_find_pll_bits(rate, parent_rate, &mul, &div1, &div2);
-		pll_val = WM8850_BITS_TO_VAL(mul, div1, div2);
+		ret = wm8850_find_pll_bits(rate, parent_rate, &mul, &div1, &div2);
+		if (!ret)
+			pll_val = WM8850_BITS_TO_VAL(mul, div1, div2);
 		break;
 	default:
 		pr_err("%s: invalid pll type\n", __func__);
-		return 0;
+		ret = -EINVAL;
 	}
 
+	if (ret)
+		return ret;
+
 	spin_lock_irqsave(pll->lock, flags);
 
 	vt8500_pmc_wait_busy();
@@ -585,28 +619,36 @@ static long vtwm_pll_round_rate(struct clk_hw *hw, unsigned long rate,
 	struct clk_pll *pll = to_clk_pll(hw);
 	u32 filter, mul, div1, div2;
 	long round_rate;
+	int ret;
 
 	switch (pll->type) {
 	case PLL_TYPE_VT8500:
-		vt8500_find_pll_bits(rate, *prate, &mul, &div1);
-		round_rate = VT8500_BITS_TO_FREQ(*prate, mul, div1);
+		ret = vt8500_find_pll_bits(rate, *prate, &mul, &div1);
+		if (!ret)
+			round_rate = VT8500_BITS_TO_FREQ(*prate, mul, div1);
 		break;
 	case PLL_TYPE_WM8650:
-		wm8650_find_pll_bits(rate, *prate, &mul, &div1, &div2);
-		round_rate = WM8650_BITS_TO_FREQ(*prate, mul, div1, div2);
+		ret = wm8650_find_pll_bits(rate, *prate, &mul, &div1, &div2);
+		if (!ret)
+			round_rate = WM8650_BITS_TO_FREQ(*prate, mul, div1, div2);
 		break;
 	case PLL_TYPE_WM8750:
-		wm8750_find_pll_bits(rate, *prate, &filter, &mul, &div1, &div2);
-		round_rate = WM8750_BITS_TO_FREQ(*prate, mul, div1, div2);
+		ret = wm8750_find_pll_bits(rate, *prate, &filter, &mul, &div1, &div2);
+		if (!ret)
+			round_rate = WM8750_BITS_TO_FREQ(*prate, mul, div1, div2);
 		break;
 	case PLL_TYPE_WM8850:
-		wm8850_find_pll_bits(rate, *prate, &mul, &div1, &div2);
-		round_rate = WM8850_BITS_TO_FREQ(*prate, mul, div1, div2);
+		ret = wm8850_find_pll_bits(rate, *prate, &mul, &div1, &div2);
+		if (!ret)
+			round_rate = WM8850_BITS_TO_FREQ(*prate, mul, div1, div2);
 		break;
 	default:
-		round_rate = 0;
+		ret = -EINVAL;
 	}
 
+	if (ret)
+		return ret;
+
 	return round_rate;
 }
 
diff --git a/drivers/clk/clk-xgene.c b/drivers/clk/clk-xgene.c
index 10224b01b97c..d73450b60b28 100644
--- a/drivers/clk/clk-xgene.c
+++ b/drivers/clk/clk-xgene.c
@@ -29,7 +29,9 @@
 #include <linux/of_address.h>
 
 /* Register SCU_PCPPLL bit fields */
-#define N_DIV_RD(src)			(((src) & 0x000001ff))
+#define N_DIV_RD(src)			((src) & 0x000001ff)
+#define SC_N_DIV_RD(src)		((src) & 0x0000007f)
+#define SC_OUTDIV2(src)			(((src) & 0x00000100) >> 8)
 
 /* Register SCU_SOCPLL bit fields */
 #define CLKR_RD(src)			(((src) & 0x07000000)>>24)
@@ -48,7 +50,7 @@ static inline u32 xgene_clk_read(void __iomem *csr)
 
 static inline void xgene_clk_write(u32 data, void __iomem *csr)
 {
-	return writel_relaxed(data, csr);
+	writel_relaxed(data, csr);
 }
 
 /* PLL Clock */
@@ -63,6 +65,7 @@ struct xgene_clk_pll {
 	spinlock_t	*lock;
 	u32		pll_offset;
 	enum xgene_pll_type	type;
+	int		version;
 };
 
 #define to_xgene_clk_pll(_hw) container_of(_hw, struct xgene_clk_pll, hw)
@@ -92,27 +95,37 @@ static unsigned long xgene_clk_pll_recalc_rate(struct clk_hw *hw,
 
 	pll = xgene_clk_read(pllclk->reg + pllclk->pll_offset);
 
-	if (pllclk->type == PLL_TYPE_PCP) {
-		/*
-		 * PLL VCO = Reference clock * NF
-		 * PCP PLL = PLL_VCO / 2
-		 */
-		nout = 2;
-		fvco = parent_rate * (N_DIV_RD(pll) + 4);
+	if (pllclk->version <= 1) {
+		if (pllclk->type == PLL_TYPE_PCP) {
+			/*
+			* PLL VCO = Reference clock * NF
+			* PCP PLL = PLL_VCO / 2
+			*/
+			nout = 2;
+			fvco = parent_rate * (N_DIV_RD(pll) + 4);
+		} else {
+			/*
+			* Fref = Reference Clock / NREF;
+			* Fvco = Fref * NFB;
+			* Fout = Fvco / NOUT;
+			*/
+			nref = CLKR_RD(pll) + 1;
+			nout = CLKOD_RD(pll) + 1;
+			nfb = CLKF_RD(pll);
+			fref = parent_rate / nref;
+			fvco = fref * nfb;
+		}
 	} else {
 		/*
-		 * Fref = Reference Clock / NREF;
-		 * Fvco = Fref * NFB;
-		 * Fout = Fvco / NOUT;
+		 * fvco = Reference clock * FBDIVC
+		 * PLL freq = fvco / NOUT
 		 */
-		nref = CLKR_RD(pll) + 1;
-		nout = CLKOD_RD(pll) + 1;
-		nfb = CLKF_RD(pll);
-		fref = parent_rate / nref;
-		fvco = fref * nfb;
+		nout = SC_OUTDIV2(pll) ? 2 : 3;
+		fvco = parent_rate * SC_N_DIV_RD(pll);
 	}
-	pr_debug("%s pll recalc rate %ld parent %ld\n", clk_hw_get_name(hw),
-		fvco / nout, parent_rate);
+	pr_debug("%s pll recalc rate %ld parent %ld version %d\n",
+		 clk_hw_get_name(hw), fvco / nout, parent_rate,
+		 pllclk->version);
 
 	return fvco / nout;
 }
@@ -125,7 +138,7 @@ static const struct clk_ops xgene_clk_pll_ops = {
 static struct clk *xgene_register_clk_pll(struct device *dev,
 	const char *name, const char *parent_name,
 	unsigned long flags, void __iomem *reg, u32 pll_offset,
-	u32 type, spinlock_t *lock)
+	u32 type, spinlock_t *lock, int version)
 {
 	struct xgene_clk_pll *apmclk;
 	struct clk *clk;
@@ -144,6 +157,7 @@ static struct clk *xgene_register_clk_pll(struct device *dev,
 	init.parent_names = parent_name ? &parent_name : NULL;
 	init.num_parents = parent_name ? 1 : 0;
 
+	apmclk->version = version;
 	apmclk->reg = reg;
 	apmclk->lock = lock;
 	apmclk->pll_offset = pll_offset;
@@ -160,26 +174,37 @@ static struct clk *xgene_register_clk_pll(struct device *dev,
 	return clk;
 }
 
+static int xgene_pllclk_version(struct device_node *np)
+{
+	if (of_device_is_compatible(np, "apm,xgene-socpll-clock"))
+		return 1;
+	if (of_device_is_compatible(np, "apm,xgene-pcppll-clock"))
+		return 1;
+	return 2;
+}
+
 static void xgene_pllclk_init(struct device_node *np, enum xgene_pll_type pll_type)
 {
-        const char *clk_name = np->full_name;
-        struct clk *clk;
-        void __iomem *reg;
+	const char *clk_name = np->full_name;
+	struct clk *clk;
+	void __iomem *reg;
+	int version = xgene_pllclk_version(np);
 
-        reg = of_iomap(np, 0);
-        if (reg == NULL) {
-                pr_err("Unable to map CSR register for %s\n", np->full_name);
-                return;
-        }
-        of_property_read_string(np, "clock-output-names", &clk_name);
-        clk = xgene_register_clk_pll(NULL,
-                        clk_name, of_clk_get_parent_name(np, 0),
-                        CLK_IS_ROOT, reg, 0, pll_type, &clk_lock);
-        if (!IS_ERR(clk)) {
-                of_clk_add_provider(np, of_clk_src_simple_get, clk);
-                clk_register_clkdev(clk, clk_name, NULL);
-                pr_debug("Add %s clock PLL\n", clk_name);
-        }
+	reg = of_iomap(np, 0);
+	if (reg == NULL) {
+		pr_err("Unable to map CSR register for %s\n", np->full_name);
+		return;
+	}
+	of_property_read_string(np, "clock-output-names", &clk_name);
+	clk = xgene_register_clk_pll(NULL,
+			clk_name, of_clk_get_parent_name(np, 0),
+			CLK_IS_ROOT, reg, 0, pll_type, &clk_lock,
+			version);
+	if (!IS_ERR(clk)) {
+		of_clk_add_provider(np, of_clk_src_simple_get, clk);
+		clk_register_clkdev(clk, clk_name, NULL);
+		pr_debug("Add %s clock PLL\n", clk_name);
+	}
 }
 
 static void xgene_socpllclk_init(struct device_node *np)
@@ -351,8 +376,8 @@ static int xgene_clk_set_rate(struct clk_hw *hw, unsigned long rate,
 		/* Set new divider */
 		data = xgene_clk_read(pclk->param.divider_reg +
 				pclk->param.reg_divider_offset);
-		data &= ~((1 << pclk->param.reg_divider_width) - 1)
-				<< pclk->param.reg_divider_shift;
+		data &= ~(((1 << pclk->param.reg_divider_width) - 1)
+				<< pclk->param.reg_divider_shift);
 		data |= divider;
 		xgene_clk_write(data, pclk->param.divider_reg +
 					pclk->param.reg_divider_offset);
@@ -460,7 +485,7 @@ static void __init xgene_devclk_init(struct device_node *np)
 		rc = of_address_to_resource(np, i, &res);
 		if (rc != 0) {
 			if (i == 0) {
-				pr_err("no DTS register for %s\n", 
+				pr_err("no DTS register for %s\n",
 					np->full_name);
 				return;
 			}
@@ -518,4 +543,8 @@ err:
 
 CLK_OF_DECLARE(xgene_socpll_clock, "apm,xgene-socpll-clock", xgene_socpllclk_init);
 CLK_OF_DECLARE(xgene_pcppll_clock, "apm,xgene-pcppll-clock", xgene_pcppllclk_init);
+CLK_OF_DECLARE(xgene_socpll_v2_clock, "apm,xgene-socpll-v2-clock",
+	       xgene_socpllclk_init);
+CLK_OF_DECLARE(xgene_pcppll_v2_clock, "apm,xgene-pcppll-v2-clock",
+	       xgene_pcppllclk_init);
 CLK_OF_DECLARE(xgene_dev_clock, "apm,xgene-device-clock", xgene_devclk_init);
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index b4db67a446c8..fb74dc1f7520 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -350,13 +350,12 @@ static struct clk_core *clk_core_get_parent_by_index(struct clk_core *core,
 {
 	if (!core || index >= core->num_parents)
 		return NULL;
-	else if (!core->parents)
-		return clk_core_lookup(core->parent_names[index]);
-	else if (!core->parents[index])
-		return core->parents[index] =
-			clk_core_lookup(core->parent_names[index]);
-	else
-		return core->parents[index];
+
+	if (!core->parents[index])
+		core->parents[index] =
+				clk_core_lookup(core->parent_names[index]);
+
+	return core->parents[index];
 }
 
 struct clk_hw *
@@ -386,7 +385,7 @@ static unsigned long clk_core_get_rate_nolock(struct clk_core *core)
 
 	ret = core->rate;
 
-	if (core->flags & CLK_IS_ROOT)
+	if (!core->num_parents)
 		goto out;
 
 	if (!core->parent)
@@ -1067,30 +1066,12 @@ static int clk_fetch_parent_index(struct clk_core *core,
 {
 	int i;
 
-	if (!core->parents) {
-		core->parents = kcalloc(core->num_parents,
-					sizeof(struct clk *), GFP_KERNEL);
-		if (!core->parents)
-			return -ENOMEM;
-	}
-
-	/*
-	 * find index of new parent clock using cached parent ptrs,
-	 * or if not yet cached, use string name comparison and cache
-	 * them now to avoid future calls to clk_core_lookup.
-	 */
-	for (i = 0; i < core->num_parents; i++) {
-		if (core->parents[i] == parent)
-			return i;
-
-		if (core->parents[i])
-			continue;
+	if (!parent)
+		return -EINVAL;
 
-		if (!strcmp(core->parent_names[i], parent->name)) {
-			core->parents[i] = clk_core_lookup(parent->name);
+	for (i = 0; i < core->num_parents; i++)
+		if (clk_core_get_parent_by_index(core, i) == parent)
 			return i;
-		}
-	}
 
 	return -EINVAL;
 }
@@ -1677,56 +1658,14 @@ struct clk *clk_get_parent(struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(clk_get_parent);
 
-/*
- * .get_parent is mandatory for clocks with multiple possible parents.  It is
- * optional for single-parent clocks.  Always call .get_parent if it is
- * available and WARN if it is missing for multi-parent clocks.
- *
- * For single-parent clocks without .get_parent, first check to see if the
- * .parents array exists, and if so use it to avoid an expensive tree
- * traversal.  If .parents does not exist then walk the tree.
- */
 static struct clk_core *__clk_init_parent(struct clk_core *core)
 {
-	struct clk_core *ret = NULL;
-	u8 index;
+	u8 index = 0;
 
-	/* handle the trivial cases */
+	if (core->num_parents > 1 && core->ops->get_parent)
+		index = core->ops->get_parent(core->hw);
 
-	if (!core->num_parents)
-		goto out;
-
-	if (core->num_parents == 1) {
-		if (IS_ERR_OR_NULL(core->parent))
-			core->parent = clk_core_lookup(core->parent_names[0]);
-		ret = core->parent;
-		goto out;
-	}
-
-	if (!core->ops->get_parent) {
-		WARN(!core->ops->get_parent,
-			"%s: multi-parent clocks must implement .get_parent\n",
-			__func__);
-		goto out;
-	}
-
-	/*
-	 * Do our best to cache parent clocks in core->parents.  This prevents
-	 * unnecessary and expensive lookups.  We don't set core->parent here;
-	 * that is done by the calling function.
-	 */
-
-	index = core->ops->get_parent(core->hw);
-
-	if (!core->parents)
-		core->parents =
-			kcalloc(core->num_parents, sizeof(struct clk *),
-					GFP_KERNEL);
-
-	ret = clk_core_get_parent_by_index(core, index);
-
-out:
-	return ret;
+	return clk_core_get_parent_by_index(core, index);
 }
 
 static void clk_core_reparent(struct clk_core *core,
@@ -1809,13 +1748,13 @@ static int clk_core_set_parent(struct clk_core *core, struct clk_core *parent)
 	/* try finding the new parent index */
 	if (parent) {
 		p_index = clk_fetch_parent_index(core, parent);
-		p_rate = parent->rate;
 		if (p_index < 0) {
 			pr_debug("%s: clk %s can not be parent of clk %s\n",
 					__func__, parent->name, core->name);
 			ret = p_index;
 			goto out;
 		}
+		p_rate = parent->rate;
 	}
 
 	/* propagate PRE_RATE_CHANGE notifications */
@@ -1902,6 +1841,10 @@ int clk_set_phase(struct clk *clk, int degrees)
 
 	clk_prepare_lock();
 
+	/* bail early if nothing to do */
+	if (degrees == clk->core->phase)
+		goto out;
+
 	trace_clk_set_phase(clk->core, degrees);
 
 	if (clk->core->ops->set_phase)
@@ -1912,6 +1855,7 @@ int clk_set_phase(struct clk *clk, int degrees)
 	if (!ret)
 		clk->core->phase = degrees;
 
+out:
 	clk_prepare_unlock();
 
 	return ret;
@@ -2218,7 +2162,7 @@ unlock:
  *
  * Dynamically removes a clk and all its child nodes from the
  * debugfs clk directory if clk->dentry points to debugfs created by
- * clk_debug_register in __clk_init.
+ * clk_debug_register in __clk_core_init.
  */
 static void clk_debug_unregister(struct clk_core *core)
 {
@@ -2303,26 +2247,22 @@ static inline void clk_debug_unregister(struct clk_core *core)
 #endif
 
 /**
- * __clk_init - initialize the data structures in a struct clk
- * @dev:	device initializing this clk, placeholder for now
- * @clk:	clk being initialized
+ * __clk_core_init - initialize the data structures in a struct clk_core
+ * @core:	clk_core being initialized
  *
  * Initializes the lists in struct clk_core, queries the hardware for the
  * parent and rate and sets them both.
  */
-static int __clk_init(struct device *dev, struct clk *clk_user)
+static int __clk_core_init(struct clk_core *core)
 {
 	int i, ret = 0;
 	struct clk_core *orphan;
 	struct hlist_node *tmp2;
-	struct clk_core *core;
 	unsigned long rate;
 
-	if (!clk_user)
+	if (!core)
 		return -EINVAL;
 
-	core = clk_user->core;
-
 	clk_prepare_lock();
 
 	/* check to see if a clock with this name is already registered */
@@ -2337,22 +2277,29 @@ static int __clk_init(struct device *dev, struct clk *clk_user)
 	if (core->ops->set_rate &&
 	    !((core->ops->round_rate || core->ops->determine_rate) &&
 	      core->ops->recalc_rate)) {
-		pr_warning("%s: %s must implement .round_rate or .determine_rate in addition to .recalc_rate\n",
-				__func__, core->name);
+		pr_err("%s: %s must implement .round_rate or .determine_rate in addition to .recalc_rate\n",
+		       __func__, core->name);
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (core->ops->set_parent && !core->ops->get_parent) {
-		pr_warning("%s: %s must implement .get_parent & .set_parent\n",
-				__func__, core->name);
+		pr_err("%s: %s must implement .get_parent & .set_parent\n",
+		       __func__, core->name);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (core->num_parents > 1 && !core->ops->get_parent) {
+		pr_err("%s: %s must implement .get_parent as it has multi parents\n",
+		       __func__, core->name);
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (core->ops->set_rate_and_parent &&
 			!(core->ops->set_parent && core->ops->set_rate)) {
-		pr_warn("%s: %s must implement .set_parent & .set_rate\n",
+		pr_err("%s: %s must implement .set_parent & .set_rate\n",
 				__func__, core->name);
 		ret = -EINVAL;
 		goto out;
@@ -2364,37 +2311,12 @@ static int __clk_init(struct device *dev, struct clk *clk_user)
 				"%s: invalid NULL in %s's .parent_names\n",
 				__func__, core->name);
 
-	/*
-	 * Allocate an array of struct clk *'s to avoid unnecessary string
-	 * look-ups of clk's possible parents.  This can fail for clocks passed
-	 * in to clk_init during early boot; thus any access to core->parents[]
-	 * must always check for a NULL pointer and try to populate it if
-	 * necessary.
-	 *
-	 * If core->parents is not NULL we skip this entire block.  This allows
-	 * for clock drivers to statically initialize core->parents.
-	 */
-	if (core->num_parents > 1 && !core->parents) {
-		core->parents = kcalloc(core->num_parents, sizeof(struct clk *),
-					GFP_KERNEL);
-		/*
-		 * clk_core_lookup returns NULL for parents that have not been
-		 * clk_init'd; thus any access to clk->parents[] must check
-		 * for a NULL pointer.  We can always perform lazy lookups for
-		 * missing parents later on.
-		 */
-		if (core->parents)
-			for (i = 0; i < core->num_parents; i++)
-				core->parents[i] =
-					clk_core_lookup(core->parent_names[i]);
-	}
-
 	core->parent = __clk_init_parent(core);
 
 	/*
-	 * Populate core->parent if parent has already been __clk_init'd.  If
-	 * parent has not yet been __clk_init'd then place clk in the orphan
-	 * list.  If clk has set the CLK_IS_ROOT flag then place it in the root
+	 * Populate core->parent if parent has already been clk_core_init'd. If
+	 * parent has not yet been clk_core_init'd then place clk in the orphan
+	 * list.  If clk doesn't have any parents then place it in the root
 	 * clk list.
 	 *
 	 * Every time a new clk is clk_init'd then we walk the list of orphan
@@ -2405,7 +2327,7 @@ static int __clk_init(struct device *dev, struct clk *clk_user)
 		hlist_add_head(&core->child_node,
 				&core->parent->children);
 		core->orphan = core->parent->orphan;
-	} else if (core->flags & CLK_IS_ROOT) {
+	} else if (!core->num_parents) {
 		hlist_add_head(&core->child_node, &clk_root_list);
 		core->orphan = false;
 	} else {
@@ -2454,24 +2376,15 @@ static int __clk_init(struct device *dev, struct clk *clk_user)
 	core->rate = core->req_rate = rate;
 
 	/*
-	 * walk the list of orphan clocks and reparent any that are children of
-	 * this clock
+	 * walk the list of orphan clocks and reparent any that newly finds a
+	 * parent.
 	 */
 	hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
-		if (orphan->num_parents && orphan->ops->get_parent) {
-			i = orphan->ops->get_parent(orphan->hw);
-			if (i >= 0 && i < orphan->num_parents &&
-			    !strcmp(core->name, orphan->parent_names[i]))
-				clk_core_reparent(orphan, core);
-			continue;
-		}
+		struct clk_core *parent = __clk_init_parent(orphan);
 
-		for (i = 0; i < orphan->num_parents; i++)
-			if (!strcmp(core->name, orphan->parent_names[i])) {
-				clk_core_reparent(orphan, core);
-				break;
-			}
-	 }
+		if (parent)
+			clk_core_reparent(orphan, parent);
+	}
 
 	/*
 	 * optional platform-specific magic
@@ -2585,21 +2498,31 @@ struct clk *clk_register(struct device *dev, struct clk_hw *hw)
 		}
 	}
 
+	/* avoid unnecessary string look-ups of clk_core's possible parents. */
+	core->parents = kcalloc(core->num_parents, sizeof(*core->parents),
+				GFP_KERNEL);
+	if (!core->parents) {
+		ret = -ENOMEM;
+		goto fail_parents;
+	};
+
 	INIT_HLIST_HEAD(&core->clks);
 
 	hw->clk = __clk_create_clk(hw, NULL, NULL);
 	if (IS_ERR(hw->clk)) {
 		ret = PTR_ERR(hw->clk);
-		goto fail_parent_names_copy;
+		goto fail_parents;
 	}
 
-	ret = __clk_init(dev, hw->clk);
+	ret = __clk_core_init(core);
 	if (!ret)
 		return hw->clk;
 
 	__clk_free_clk(hw->clk);
 	hw->clk = NULL;
 
+fail_parents:
+	kfree(core->parents);
 fail_parent_names_copy:
 	while (--i >= 0)
 		kfree_const(core->parent_names[i]);
@@ -2683,7 +2606,7 @@ void clk_unregister(struct clk *clk)
 	if (clk->core->ops == &clk_nodrv_ops) {
 		pr_err("%s: unregistered clock: %s\n", __func__,
 		       clk->core->name);
-		return;
+		goto unlock;
 	}
 	/*
 	 * Assign empty clock ops for consumers that might still hold
@@ -2709,7 +2632,7 @@ void clk_unregister(struct clk *clk)
 		pr_warn("%s: unregistering prepared clock: %s\n",
 					__func__, clk->core->name);
 	kref_put(&clk->core->ref, __clk_release);
-
+unlock:
 	clk_prepare_unlock();
 }
 EXPORT_SYMBOL_GPL(clk_unregister);
@@ -3061,10 +2984,23 @@ struct clk *of_clk_get_from_provider(struct of_phandle_args *clkspec)
 {
 	return __of_clk_get_from_provider(clkspec, NULL, __func__);
 }
+EXPORT_SYMBOL_GPL(of_clk_get_from_provider);
 
-int of_clk_get_parent_count(struct device_node *np)
+/**
+ * of_clk_get_parent_count() - Count the number of clocks a device node has
+ * @np: device node to count
+ *
+ * Returns: The number of clocks that are possible parents of this node
+ */
+unsigned int of_clk_get_parent_count(struct device_node *np)
 {
-	return of_count_phandle_with_args(np, "clocks", "#clock-cells");
+	int count;
+
+	count = of_count_phandle_with_args(np, "clocks", "#clock-cells");
+	if (count < 0)
+		return 0;
+
+	return count;
 }
 EXPORT_SYMBOL_GPL(of_clk_get_parent_count);
 
@@ -3214,6 +3150,9 @@ void __init of_clk_init(const struct of_device_id *matches)
 	for_each_matching_node_and_match(np, matches, &match) {
 		struct clock_provider *parent;
 
+		if (!of_device_is_available(np))
+			continue;
+
 		parent = kzalloc(sizeof(*parent), GFP_KERNEL);
 		if (!parent) {
 			list_for_each_entry_safe(clk_provider, next,
diff --git a/drivers/clk/h8300/clk-div.c b/drivers/clk/h8300/clk-div.c
index d71d01157dbb..4bf44a25d950 100644
--- a/drivers/clk/h8300/clk-div.c
+++ b/drivers/clk/h8300/clk-div.c
@@ -13,7 +13,7 @@ static DEFINE_SPINLOCK(clklock);
 
 static void __init h8300_div_clk_setup(struct device_node *node)
 {
-	int num_parents;
+	unsigned int num_parents;
 	struct clk *clk;
 	const char *clk_name = node->name;
 	const char *parent_name;
@@ -22,7 +22,7 @@ static void __init h8300_div_clk_setup(struct device_node *node)
 	int offset;
 
 	num_parents = of_clk_get_parent_count(node);
-	if (num_parents < 1) {
+	if (!num_parents) {
 		pr_err("%s: no parent found", clk_name);
 		return;
 	}
@@ -34,7 +34,7 @@ static void __init h8300_div_clk_setup(struct device_node *node)
 	}
 	offset = (unsigned long)divcr & 3;
 	offset = (3 - offset) * 8;
-	divcr = (void *)((unsigned long)divcr & ~3);
+	divcr = (void __iomem *)((unsigned long)divcr & ~3);
 
 	parent_name = of_clk_get_parent_name(node, 0);
 	of_property_read_u32(node, "renesas,width", &width);
diff --git a/drivers/clk/h8300/clk-h8s2678.c b/drivers/clk/h8300/clk-h8s2678.c
index 6cf38dc1c929..c9c2fd575ef7 100644
--- a/drivers/clk/h8300/clk-h8s2678.c
+++ b/drivers/clk/h8300/clk-h8s2678.c
@@ -83,7 +83,7 @@ static const struct clk_ops pll_ops = {
 
 static void __init h8s2678_pll_clk_setup(struct device_node *node)
 {
-	int num_parents;
+	unsigned int num_parents;
 	struct clk *clk;
 	const char *clk_name = node->name;
 	const char *parent_name;
@@ -91,7 +91,7 @@ static void __init h8s2678_pll_clk_setup(struct device_node *node)
 	struct clk_init_data init;
 
 	num_parents = of_clk_get_parent_count(node);
-	if (num_parents < 1) {
+	if (!num_parents) {
 		pr_err("%s: no parent found", clk_name);
 		return;
 	}
diff --git a/drivers/clk/hisilicon/clk-hi3620.c b/drivers/clk/hisilicon/clk-hi3620.c
index 7d03fe17d66f..d04a104ce1b4 100644
--- a/drivers/clk/hisilicon/clk-hi3620.c
+++ b/drivers/clk/hisilicon/clk-hi3620.c
@@ -78,15 +78,15 @@ static const char *const mmc3_mux_p[] __initconst = { "armpll2", "armpll3", };
 
 /* fixed rate clocks */
 static struct hisi_fixed_rate_clock hi3620_fixed_rate_clks[] __initdata = {
-	{ HI3620_OSC32K,   "osc32k",   NULL, CLK_IS_ROOT, 32768, },
-	{ HI3620_OSC26M,   "osc26m",   NULL, CLK_IS_ROOT, 26000000, },
-	{ HI3620_PCLK,     "pclk",     NULL, CLK_IS_ROOT, 26000000, },
-	{ HI3620_PLL_ARM0, "armpll0",  NULL, CLK_IS_ROOT, 1600000000, },
-	{ HI3620_PLL_ARM1, "armpll1",  NULL, CLK_IS_ROOT, 1600000000, },
-	{ HI3620_PLL_PERI, "armpll2",  NULL, CLK_IS_ROOT, 1440000000, },
-	{ HI3620_PLL_USB,  "armpll3",  NULL, CLK_IS_ROOT, 1440000000, },
-	{ HI3620_PLL_HDMI, "armpll4",  NULL, CLK_IS_ROOT, 1188000000, },
-	{ HI3620_PLL_GPU,  "armpll5",  NULL, CLK_IS_ROOT, 1300000000, },
+	{ HI3620_OSC32K,   "osc32k",   NULL, 0, 32768, },
+	{ HI3620_OSC26M,   "osc26m",   NULL, 0, 26000000, },
+	{ HI3620_PCLK,     "pclk",     NULL, 0, 26000000, },
+	{ HI3620_PLL_ARM0, "armpll0",  NULL, 0, 1600000000, },
+	{ HI3620_PLL_ARM1, "armpll1",  NULL, 0, 1600000000, },
+	{ HI3620_PLL_PERI, "armpll2",  NULL, 0, 1440000000, },
+	{ HI3620_PLL_USB,  "armpll3",  NULL, 0, 1440000000, },
+	{ HI3620_PLL_HDMI, "armpll4",  NULL, 0, 1188000000, },
+	{ HI3620_PLL_GPU,  "armpll5",  NULL, 0, 1300000000, },
 };
 
 /* fixed factor clocks */
diff --git a/drivers/clk/hisilicon/clk-hi6220-stub.c b/drivers/clk/hisilicon/clk-hi6220-stub.c
index 8afb40ef40ce..329a09214d12 100644
--- a/drivers/clk/hisilicon/clk-hi6220-stub.c
+++ b/drivers/clk/hisilicon/clk-hi6220-stub.c
@@ -235,7 +235,7 @@ static int hi6220_stub_clk_probe(struct platform_device *pdev)
 	init.name = "acpu0";
 	init.ops = &hi6220_stub_clk_ops;
 	init.num_parents = 0;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 
 	clk = devm_clk_register(dev, &stub_clk->hw);
 	if (IS_ERR(clk))
diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c
index 4563343b6420..f02cb41d40a4 100644
--- a/drivers/clk/hisilicon/clk-hi6220.c
+++ b/drivers/clk/hisilicon/clk-hi6220.c
@@ -26,19 +26,19 @@
 
 /* clocks in AO (always on) controller */
 static struct hisi_fixed_rate_clock hi6220_fixed_rate_clks[] __initdata = {
-	{ HI6220_REF32K,	"ref32k",	NULL, CLK_IS_ROOT, 32764,     },
-	{ HI6220_CLK_TCXO,	"clk_tcxo",	NULL, CLK_IS_ROOT, 19200000,  },
-	{ HI6220_MMC1_PAD,	"mmc1_pad",	NULL, CLK_IS_ROOT, 100000000, },
-	{ HI6220_MMC2_PAD,	"mmc2_pad",	NULL, CLK_IS_ROOT, 100000000, },
-	{ HI6220_MMC0_PAD,	"mmc0_pad",	NULL, CLK_IS_ROOT, 200000000, },
-	{ HI6220_PLL_BBP,	"bbppll0",	NULL, CLK_IS_ROOT, 245760000, },
-	{ HI6220_PLL_GPU,	"gpupll",	NULL, CLK_IS_ROOT, 1000000000,},
-	{ HI6220_PLL1_DDR,	"ddrpll1",	NULL, CLK_IS_ROOT, 1066000000,},
-	{ HI6220_PLL_SYS,	"syspll",	NULL, CLK_IS_ROOT, 1200000000,},
-	{ HI6220_PLL_SYS_MEDIA,	"media_syspll",	NULL, CLK_IS_ROOT, 1200000000,},
-	{ HI6220_DDR_SRC,	"ddr_sel_src",  NULL, CLK_IS_ROOT, 1200000000,},
-	{ HI6220_PLL_MEDIA,	"media_pll",    NULL, CLK_IS_ROOT, 1440000000,},
-	{ HI6220_PLL_DDR,	"ddrpll0",      NULL, CLK_IS_ROOT, 1600000000,},
+	{ HI6220_REF32K,	"ref32k",	NULL, 0, 32764,     },
+	{ HI6220_CLK_TCXO,	"clk_tcxo",	NULL, 0, 19200000,  },
+	{ HI6220_MMC1_PAD,	"mmc1_pad",	NULL, 0, 100000000, },
+	{ HI6220_MMC2_PAD,	"mmc2_pad",	NULL, 0, 100000000, },
+	{ HI6220_MMC0_PAD,	"mmc0_pad",	NULL, 0, 200000000, },
+	{ HI6220_PLL_BBP,	"bbppll0",	NULL, 0, 245760000, },
+	{ HI6220_PLL_GPU,	"gpupll",	NULL, 0, 1000000000,},
+	{ HI6220_PLL1_DDR,	"ddrpll1",	NULL, 0, 1066000000,},
+	{ HI6220_PLL_SYS,	"syspll",	NULL, 0, 1200000000,},
+	{ HI6220_PLL_SYS_MEDIA,	"media_syspll",	NULL, 0, 1200000000,},
+	{ HI6220_DDR_SRC,	"ddr_sel_src",  NULL, 0, 1200000000,},
+	{ HI6220_PLL_MEDIA,	"media_pll",    NULL, 0, 1440000000,},
+	{ HI6220_PLL_DDR,	"ddrpll0",      NULL, 0, 1600000000,},
 };
 
 static struct hisi_fixed_factor_clock hi6220_fixed_factor_clks[] __initdata = {
diff --git a/drivers/clk/hisilicon/clk-hip04.c b/drivers/clk/hisilicon/clk-hip04.c
index 8ca967308343..b38e03da1d02 100644
--- a/drivers/clk/hisilicon/clk-hip04.c
+++ b/drivers/clk/hisilicon/clk-hip04.c
@@ -36,9 +36,9 @@
 
 /* fixed rate clocks */
 static struct hisi_fixed_rate_clock hip04_fixed_rate_clks[] __initdata = {
-	{ HIP04_OSC50M,   "osc50m",   NULL, CLK_IS_ROOT, 50000000, },
-	{ HIP04_CLK_50M,  "clk50m",   NULL, CLK_IS_ROOT, 50000000, },
-	{ HIP04_CLK_168M, "clk168m",  NULL, CLK_IS_ROOT, 168750000, },
+	{ HIP04_OSC50M,   "osc50m",   NULL, 0, 50000000, },
+	{ HIP04_CLK_50M,  "clk50m",   NULL, 0, 50000000, },
+	{ HIP04_CLK_168M, "clk168m",  NULL, 0, 168750000, },
 };
 
 static void __init hip04_clk_init(struct device_node *np)
diff --git a/drivers/clk/hisilicon/clk-hix5hd2.c b/drivers/clk/hisilicon/clk-hix5hd2.c
index 0aaf29da8491..14b05efa3c2a 100644
--- a/drivers/clk/hisilicon/clk-hix5hd2.c
+++ b/drivers/clk/hisilicon/clk-hix5hd2.c
@@ -14,36 +14,36 @@
 #include "clk.h"
 
 static struct hisi_fixed_rate_clock hix5hd2_fixed_rate_clks[] __initdata = {
-	{ HIX5HD2_FIXED_1200M, "1200m", NULL, CLK_IS_ROOT, 1200000000, },
-	{ HIX5HD2_FIXED_400M, "400m", NULL, CLK_IS_ROOT, 400000000, },
-	{ HIX5HD2_FIXED_48M, "48m", NULL, CLK_IS_ROOT, 48000000, },
-	{ HIX5HD2_FIXED_24M, "24m", NULL, CLK_IS_ROOT, 24000000, },
-	{ HIX5HD2_FIXED_600M, "600m", NULL, CLK_IS_ROOT, 600000000, },
-	{ HIX5HD2_FIXED_300M, "300m", NULL, CLK_IS_ROOT, 300000000, },
-	{ HIX5HD2_FIXED_75M, "75m", NULL, CLK_IS_ROOT, 75000000, },
-	{ HIX5HD2_FIXED_200M, "200m", NULL, CLK_IS_ROOT, 200000000, },
-	{ HIX5HD2_FIXED_100M, "100m", NULL, CLK_IS_ROOT, 100000000, },
-	{ HIX5HD2_FIXED_40M, "40m", NULL, CLK_IS_ROOT, 40000000, },
-	{ HIX5HD2_FIXED_150M, "150m", NULL, CLK_IS_ROOT, 150000000, },
-	{ HIX5HD2_FIXED_1728M, "1728m", NULL, CLK_IS_ROOT, 1728000000, },
-	{ HIX5HD2_FIXED_28P8M, "28p8m", NULL, CLK_IS_ROOT, 28000000, },
-	{ HIX5HD2_FIXED_432M, "432m", NULL, CLK_IS_ROOT, 432000000, },
-	{ HIX5HD2_FIXED_345P6M, "345p6m", NULL, CLK_IS_ROOT, 345000000, },
-	{ HIX5HD2_FIXED_288M, "288m", NULL, CLK_IS_ROOT, 288000000, },
-	{ HIX5HD2_FIXED_60M,	"60m", NULL, CLK_IS_ROOT, 60000000, },
-	{ HIX5HD2_FIXED_750M, "750m", NULL, CLK_IS_ROOT, 750000000, },
-	{ HIX5HD2_FIXED_500M, "500m", NULL, CLK_IS_ROOT, 500000000, },
-	{ HIX5HD2_FIXED_54M,	"54m", NULL, CLK_IS_ROOT, 54000000, },
-	{ HIX5HD2_FIXED_27M, "27m", NULL, CLK_IS_ROOT, 27000000, },
-	{ HIX5HD2_FIXED_1500M, "1500m", NULL, CLK_IS_ROOT, 1500000000, },
-	{ HIX5HD2_FIXED_375M, "375m", NULL, CLK_IS_ROOT, 375000000, },
-	{ HIX5HD2_FIXED_187M, "187m", NULL, CLK_IS_ROOT, 187000000, },
-	{ HIX5HD2_FIXED_250M, "250m", NULL, CLK_IS_ROOT, 250000000, },
-	{ HIX5HD2_FIXED_125M, "125m", NULL, CLK_IS_ROOT, 125000000, },
-	{ HIX5HD2_FIXED_2P02M, "2m", NULL, CLK_IS_ROOT, 2000000, },
-	{ HIX5HD2_FIXED_50M, "50m", NULL, CLK_IS_ROOT, 50000000, },
-	{ HIX5HD2_FIXED_25M, "25m", NULL, CLK_IS_ROOT, 25000000, },
-	{ HIX5HD2_FIXED_83M, "83m", NULL, CLK_IS_ROOT, 83333333, },
+	{ HIX5HD2_FIXED_1200M, "1200m", NULL, 0, 1200000000, },
+	{ HIX5HD2_FIXED_400M, "400m", NULL, 0, 400000000, },
+	{ HIX5HD2_FIXED_48M, "48m", NULL, 0, 48000000, },
+	{ HIX5HD2_FIXED_24M, "24m", NULL, 0, 24000000, },
+	{ HIX5HD2_FIXED_600M, "600m", NULL, 0, 600000000, },
+	{ HIX5HD2_FIXED_300M, "300m", NULL, 0, 300000000, },
+	{ HIX5HD2_FIXED_75M, "75m", NULL, 0, 75000000, },
+	{ HIX5HD2_FIXED_200M, "200m", NULL, 0, 200000000, },
+	{ HIX5HD2_FIXED_100M, "100m", NULL, 0, 100000000, },
+	{ HIX5HD2_FIXED_40M, "40m", NULL, 0, 40000000, },
+	{ HIX5HD2_FIXED_150M, "150m", NULL, 0, 150000000, },
+	{ HIX5HD2_FIXED_1728M, "1728m", NULL, 0, 1728000000, },
+	{ HIX5HD2_FIXED_28P8M, "28p8m", NULL, 0, 28000000, },
+	{ HIX5HD2_FIXED_432M, "432m", NULL, 0, 432000000, },
+	{ HIX5HD2_FIXED_345P6M, "345p6m", NULL, 0, 345000000, },
+	{ HIX5HD2_FIXED_288M, "288m", NULL, 0, 288000000, },
+	{ HIX5HD2_FIXED_60M,	"60m", NULL, 0, 60000000, },
+	{ HIX5HD2_FIXED_750M, "750m", NULL, 0, 750000000, },
+	{ HIX5HD2_FIXED_500M, "500m", NULL, 0, 500000000, },
+	{ HIX5HD2_FIXED_54M,	"54m", NULL, 0, 54000000, },
+	{ HIX5HD2_FIXED_27M, "27m", NULL, 0, 27000000, },
+	{ HIX5HD2_FIXED_1500M, "1500m", NULL, 0, 1500000000, },
+	{ HIX5HD2_FIXED_375M, "375m", NULL, 0, 375000000, },
+	{ HIX5HD2_FIXED_187M, "187m", NULL, 0, 187000000, },
+	{ HIX5HD2_FIXED_250M, "250m", NULL, 0, 250000000, },
+	{ HIX5HD2_FIXED_125M, "125m", NULL, 0, 125000000, },
+	{ HIX5HD2_FIXED_2P02M, "2m", NULL, 0, 2000000, },
+	{ HIX5HD2_FIXED_50M, "50m", NULL, 0, 50000000, },
+	{ HIX5HD2_FIXED_25M, "25m", NULL, 0, 25000000, },
+	{ HIX5HD2_FIXED_83M, "83m", NULL, 0, 83333333, },
 };
 
 static const char *const sfc_mux_p[] __initconst = {
diff --git a/drivers/clk/imx/clk-busy.c b/drivers/clk/imx/clk-busy.c
index 4bb1bc419b79..5cc99590f9a3 100644
--- a/drivers/clk/imx/clk-busy.c
+++ b/drivers/clk/imx/clk-busy.c
@@ -38,7 +38,7 @@ struct clk_busy_divider {
 
 static inline struct clk_busy_divider *to_clk_busy_divider(struct clk_hw *hw)
 {
-	struct clk_divider *div = container_of(hw, struct clk_divider, hw);
+	struct clk_divider *div = to_clk_divider(hw);
 
 	return container_of(div, struct clk_busy_divider, div);
 }
@@ -123,7 +123,7 @@ struct clk_busy_mux {
 
 static inline struct clk_busy_mux *to_clk_busy_mux(struct clk_hw *hw)
 {
-	struct clk_mux *mux = container_of(hw, struct clk_mux, hw);
+	struct clk_mux *mux = to_clk_mux(hw);
 
 	return container_of(mux, struct clk_busy_mux, mux);
 }
diff --git a/drivers/clk/imx/clk-fixup-div.c b/drivers/clk/imx/clk-fixup-div.c
index 21db020b1f2d..ce5722732715 100644
--- a/drivers/clk/imx/clk-fixup-div.c
+++ b/drivers/clk/imx/clk-fixup-div.c
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include "clk.h"
 
-#define to_clk_div(_hw) container_of(_hw, struct clk_divider, hw)
 #define div_mask(d)	((1 << (d->width)) - 1)
 
 /**
@@ -35,7 +34,7 @@ struct clk_fixup_div {
 
 static inline struct clk_fixup_div *to_clk_fixup_div(struct clk_hw *hw)
 {
-	struct clk_divider *divider = to_clk_div(hw);
+	struct clk_divider *divider = to_clk_divider(hw);
 
 	return container_of(divider, struct clk_fixup_div, divider);
 }
@@ -60,7 +59,7 @@ static int clk_fixup_div_set_rate(struct clk_hw *hw, unsigned long rate,
 			    unsigned long parent_rate)
 {
 	struct clk_fixup_div *fixup_div = to_clk_fixup_div(hw);
-	struct clk_divider *div = to_clk_div(hw);
+	struct clk_divider *div = to_clk_divider(hw);
 	unsigned int divider, value;
 	unsigned long flags = 0;
 	u32 val;
diff --git a/drivers/clk/imx/clk-fixup-mux.c b/drivers/clk/imx/clk-fixup-mux.c
index 0d40b35c557c..c9b327e0a8dd 100644
--- a/drivers/clk/imx/clk-fixup-mux.c
+++ b/drivers/clk/imx/clk-fixup-mux.c
@@ -15,8 +15,6 @@
 #include <linux/slab.h>
 #include "clk.h"
 
-#define to_clk_mux(_hw) container_of(_hw, struct clk_mux, hw)
-
 /**
  * struct clk_fixup_mux - imx integer fixup multiplexer clock
  * @mux: the parent class
diff --git a/drivers/clk/imx/clk-gate-exclusive.c b/drivers/clk/imx/clk-gate-exclusive.c
index c12f5f2e04dc..3bd9dee618b2 100644
--- a/drivers/clk/imx/clk-gate-exclusive.c
+++ b/drivers/clk/imx/clk-gate-exclusive.c
@@ -31,7 +31,7 @@ struct clk_gate_exclusive {
 
 static int clk_gate_exclusive_enable(struct clk_hw *hw)
 {
-	struct clk_gate *gate = container_of(hw, struct clk_gate, hw);
+	struct clk_gate *gate = to_clk_gate(hw);
 	struct clk_gate_exclusive *exgate = container_of(gate,
 					struct clk_gate_exclusive, gate);
 	u32 val = readl(gate->reg);
diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c
index f0efc6feeec2..02e18182fcb5 100644
--- a/drivers/clk/imx/clk-imx6q.c
+++ b/drivers/clk/imx/clk-imx6q.c
@@ -34,7 +34,9 @@ static const char *periph2_sels[]	= { "periph2_pre", "periph2_clk2", };
 static const char *axi_sels[]		= { "periph", "pll2_pfd2_396m", "periph", "pll3_pfd1_540m", };
 static const char *audio_sels[]	= { "pll4_audio_div", "pll3_pfd2_508m", "pll3_pfd3_454m", "pll3_usb_otg", };
 static const char *gpu_axi_sels[]	= { "axi", "ahb", };
+static const char *pre_axi_sels[]	= { "axi", "ahb", };
 static const char *gpu2d_core_sels[]	= { "axi", "pll3_usb_otg", "pll2_pfd0_352m", "pll2_pfd2_396m", };
+static const char *gpu2d_core_sels_2[]	= { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll3_pfd0_720m",};
 static const char *gpu3d_core_sels[]	= { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll2_pfd2_396m", };
 static const char *gpu3d_shader_sels[] = { "mmdc_ch0_axi", "pll3_usb_otg", "pll2_pfd1_594m", "pll3_pfd0_720m", };
 static const char *ipu_sels[]		= { "mmdc_ch0_axi", "pll2_pfd2_396m", "pll3_120m", "pll3_pfd1_540m", };
@@ -44,15 +46,24 @@ static const char *ipu1_di0_sels[]	= { "ipu1_di0_pre", "dummy", "dummy", "ldb_di
 static const char *ipu1_di1_sels[]	= { "ipu1_di1_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", };
 static const char *ipu2_di0_sels[]	= { "ipu2_di0_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", };
 static const char *ipu2_di1_sels[]	= { "ipu2_di1_pre", "dummy", "dummy", "ldb_di0", "ldb_di1", };
+static const char *ipu1_di0_sels_2[]	= { "ipu1_di0_pre", "dummy", "dummy", "ldb_di0_podf", "ldb_di1_podf", };
+static const char *ipu1_di1_sels_2[]	= { "ipu1_di1_pre", "dummy", "dummy", "ldb_di0_podf", "ldb_di1_podf", };
+static const char *ipu2_di0_sels_2[]	= { "ipu2_di0_pre", "dummy", "dummy", "ldb_di0_podf", "ldb_di1_podf", };
+static const char *ipu2_di1_sels_2[]	= { "ipu2_di1_pre", "dummy", "dummy", "ldb_di0_podf", "ldb_di1_podf", };
 static const char *hsi_tx_sels[]	= { "pll3_120m", "pll2_pfd2_396m", };
 static const char *pcie_axi_sels[]	= { "axi", "ahb", };
 static const char *ssi_sels[]		= { "pll3_pfd2_508m", "pll3_pfd3_454m", "pll4_audio_div", };
 static const char *usdhc_sels[]	= { "pll2_pfd2_396m", "pll2_pfd0_352m", };
 static const char *enfc_sels[]	= { "pll2_pfd0_352m", "pll2_bus", "pll3_usb_otg", "pll2_pfd2_396m", };
+static const char *enfc_sels_2[] = {"pll2_pfd0_352m", "pll2_bus", "pll3_usb_otg", "pll2_pfd2_396m", "pll3_pfd3_454m", "dummy", };
 static const char *eim_sels[]		= { "pll2_pfd2_396m", "pll3_usb_otg", "axi", "pll2_pfd0_352m", };
 static const char *eim_slow_sels[]      = { "axi", "pll3_usb_otg", "pll2_pfd2_396m", "pll2_pfd0_352m", };
 static const char *vdo_axi_sels[]	= { "axi", "ahb", };
 static const char *vpu_axi_sels[]	= { "axi", "pll2_pfd2_396m", "pll2_pfd0_352m", };
+static const char *uart_sels[] = { "pll3_80m", "osc", };
+static const char *ipg_per_sels[] = { "ipg", "osc", };
+static const char *ecspi_sels[] = { "pll3_60m", "osc", };
+static const char *can_sels[] = { "pll3_60m", "osc", "pll3_80m", };
 static const char *cko1_sels[]	= { "pll3_usb_otg", "pll2_bus", "pll1_sys", "pll5_video_div",
 				    "dummy", "axi", "enfc", "ipu1_di0", "ipu1_di1", "ipu2_di0",
 				    "ipu2_di1", "ahb", "ipg", "ipg_per", "ckil", "pll4_audio_div", };
@@ -121,12 +132,19 @@ static unsigned int share_count_ssi2;
 static unsigned int share_count_ssi3;
 static unsigned int share_count_mipi_core_cfg;
 static unsigned int share_count_spdif;
+static unsigned int share_count_prg0;
+static unsigned int share_count_prg1;
 
 static inline int clk_on_imx6q(void)
 {
 	return of_machine_is_compatible("fsl,imx6q");
 }
 
+static inline int clk_on_imx6qp(void)
+{
+	return of_machine_is_compatible("fsl,imx6qp");
+}
+
 static inline int clk_on_imx6dl(void)
 {
 	return of_machine_is_compatible("fsl,imx6dl");
@@ -265,7 +283,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 	clk[IMX6QDL_CLK_TWD]       = imx_clk_fixed_factor("twd",       "arm",            1, 2);
 	clk[IMX6QDL_CLK_GPT_3M]    = imx_clk_fixed_factor("gpt_3m",    "osc",            1, 8);
 	clk[IMX6QDL_CLK_VIDEO_27M] = imx_clk_fixed_factor("video_27m", "pll3_pfd1_540m", 1, 20);
-	if (clk_on_imx6dl()) {
+	if (clk_on_imx6dl() || clk_on_imx6qp()) {
 		clk[IMX6QDL_CLK_GPU2D_AXI] = imx_clk_fixed_factor("gpu2d_axi", "mmdc_ch0_axi_podf", 1, 1);
 		clk[IMX6QDL_CLK_GPU3D_AXI] = imx_clk_fixed_factor("gpu3d_axi", "mmdc_ch0_axi_podf", 1, 1);
 	}
@@ -294,7 +312,15 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 		clk[IMX6QDL_CLK_GPU2D_AXI]        = imx_clk_mux("gpu2d_axi",        base + 0x18, 0,  1, gpu_axi_sels,      ARRAY_SIZE(gpu_axi_sels));
 		clk[IMX6QDL_CLK_GPU3D_AXI]        = imx_clk_mux("gpu3d_axi",        base + 0x18, 1,  1, gpu_axi_sels,      ARRAY_SIZE(gpu_axi_sels));
 	}
-	clk[IMX6QDL_CLK_GPU2D_CORE_SEL]   = imx_clk_mux("gpu2d_core_sel",   base + 0x18, 16, 2, gpu2d_core_sels,   ARRAY_SIZE(gpu2d_core_sels));
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_CAN_SEL]   = imx_clk_mux("can_sel",	base + 0x20, 8,  2, can_sels, ARRAY_SIZE(can_sels));
+		clk[IMX6QDL_CLK_ECSPI_SEL] = imx_clk_mux("ecspi_sel",	base + 0x38, 18, 1, ecspi_sels,  ARRAY_SIZE(ecspi_sels));
+		clk[IMX6QDL_CLK_IPG_PER_SEL] = imx_clk_mux("ipg_per_sel", base + 0x1c, 6, 1, ipg_per_sels, ARRAY_SIZE(ipg_per_sels));
+		clk[IMX6QDL_CLK_UART_SEL] = imx_clk_mux("uart_sel", base + 0x24, 6, 1, uart_sels, ARRAY_SIZE(uart_sels));
+		clk[IMX6QDL_CLK_GPU2D_CORE_SEL] = imx_clk_mux("gpu2d_core_sel", base + 0x18, 16, 2, gpu2d_core_sels_2, ARRAY_SIZE(gpu2d_core_sels_2));
+	} else {
+		clk[IMX6QDL_CLK_GPU2D_CORE_SEL] = imx_clk_mux("gpu2d_core_sel",   base + 0x18, 16, 2, gpu2d_core_sels,   ARRAY_SIZE(gpu2d_core_sels));
+	}
 	clk[IMX6QDL_CLK_GPU3D_CORE_SEL]   = imx_clk_mux("gpu3d_core_sel",   base + 0x18, 4,  2, gpu3d_core_sels,   ARRAY_SIZE(gpu3d_core_sels));
 	clk[IMX6QDL_CLK_GPU3D_SHADER_SEL] = imx_clk_mux("gpu3d_shader_sel", base + 0x18, 8,  2, gpu3d_shader_sels, ARRAY_SIZE(gpu3d_shader_sels));
 	clk[IMX6QDL_CLK_IPU1_SEL]         = imx_clk_mux("ipu1_sel",         base + 0x3c, 9,  2, ipu_sels,          ARRAY_SIZE(ipu_sels));
@@ -305,22 +331,40 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 	clk[IMX6QDL_CLK_IPU1_DI1_PRE_SEL] = imx_clk_mux_flags("ipu1_di1_pre_sel", base + 0x34, 15, 3, ipu_di_pre_sels,   ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT);
 	clk[IMX6QDL_CLK_IPU2_DI0_PRE_SEL] = imx_clk_mux_flags("ipu2_di0_pre_sel", base + 0x38, 6,  3, ipu_di_pre_sels,   ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT);
 	clk[IMX6QDL_CLK_IPU2_DI1_PRE_SEL] = imx_clk_mux_flags("ipu2_di1_pre_sel", base + 0x38, 15, 3, ipu_di_pre_sels,   ARRAY_SIZE(ipu_di_pre_sels), CLK_SET_RATE_PARENT);
-	clk[IMX6QDL_CLK_IPU1_DI0_SEL]     = imx_clk_mux_flags("ipu1_di0_sel",     base + 0x34, 0,  3, ipu1_di0_sels,     ARRAY_SIZE(ipu1_di0_sels), CLK_SET_RATE_PARENT);
-	clk[IMX6QDL_CLK_IPU1_DI1_SEL]     = imx_clk_mux_flags("ipu1_di1_sel",     base + 0x34, 9,  3, ipu1_di1_sels,     ARRAY_SIZE(ipu1_di1_sels), CLK_SET_RATE_PARENT);
-	clk[IMX6QDL_CLK_IPU2_DI0_SEL]     = imx_clk_mux_flags("ipu2_di0_sel",     base + 0x38, 0,  3, ipu2_di0_sels,     ARRAY_SIZE(ipu2_di0_sels), CLK_SET_RATE_PARENT);
-	clk[IMX6QDL_CLK_IPU2_DI1_SEL]     = imx_clk_mux_flags("ipu2_di1_sel",     base + 0x38, 9,  3, ipu2_di1_sels,     ARRAY_SIZE(ipu2_di1_sels), CLK_SET_RATE_PARENT);
 	clk[IMX6QDL_CLK_HSI_TX_SEL]       = imx_clk_mux("hsi_tx_sel",       base + 0x30, 28, 1, hsi_tx_sels,       ARRAY_SIZE(hsi_tx_sels));
 	clk[IMX6QDL_CLK_PCIE_AXI_SEL]     = imx_clk_mux("pcie_axi_sel",     base + 0x18, 10, 1, pcie_axi_sels,     ARRAY_SIZE(pcie_axi_sels));
-	clk[IMX6QDL_CLK_SSI1_SEL]         = imx_clk_fixup_mux("ssi1_sel",   base + 0x1c, 10, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_SSI2_SEL]         = imx_clk_fixup_mux("ssi2_sel",   base + 0x1c, 12, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_SSI3_SEL]         = imx_clk_fixup_mux("ssi3_sel",   base + 0x1c, 14, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_USDHC1_SEL]       = imx_clk_fixup_mux("usdhc1_sel", base + 0x1c, 16, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_USDHC2_SEL]       = imx_clk_fixup_mux("usdhc2_sel", base + 0x1c, 17, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_USDHC3_SEL]       = imx_clk_fixup_mux("usdhc3_sel", base + 0x1c, 18, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_USDHC4_SEL]       = imx_clk_fixup_mux("usdhc4_sel", base + 0x1c, 19, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_ENFC_SEL]         = imx_clk_mux("enfc_sel",         base + 0x2c, 16, 2, enfc_sels,         ARRAY_SIZE(enfc_sels));
-	clk[IMX6QDL_CLK_EIM_SEL]          = imx_clk_fixup_mux("eim_sel",      base + 0x1c, 27, 2, eim_sels,        ARRAY_SIZE(eim_sels), imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_EIM_SLOW_SEL]     = imx_clk_fixup_mux("eim_slow_sel", base + 0x1c, 29, 2, eim_slow_sels,   ARRAY_SIZE(eim_slow_sels), imx_cscmr1_fixup);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_IPU1_DI0_SEL]     = imx_clk_mux_flags("ipu1_di0_sel",     base + 0x34, 0,  3, ipu1_di0_sels_2,     ARRAY_SIZE(ipu1_di0_sels_2), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU1_DI1_SEL]     = imx_clk_mux_flags("ipu1_di1_sel",     base + 0x34, 9,  3, ipu1_di1_sels_2,     ARRAY_SIZE(ipu1_di1_sels_2), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU2_DI0_SEL]     = imx_clk_mux_flags("ipu2_di0_sel",     base + 0x38, 0,  3, ipu2_di0_sels_2,     ARRAY_SIZE(ipu2_di0_sels_2), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU2_DI1_SEL]     = imx_clk_mux_flags("ipu2_di1_sel",     base + 0x38, 9,  3, ipu2_di1_sels_2,     ARRAY_SIZE(ipu2_di1_sels_2), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_SSI1_SEL]         = imx_clk_mux("ssi1_sel",   base + 0x1c, 10, 2, ssi_sels,          ARRAY_SIZE(ssi_sels));
+		clk[IMX6QDL_CLK_SSI2_SEL]         = imx_clk_mux("ssi2_sel",   base + 0x1c, 12, 2, ssi_sels,          ARRAY_SIZE(ssi_sels));
+		clk[IMX6QDL_CLK_SSI3_SEL]         = imx_clk_mux("ssi3_sel",   base + 0x1c, 14, 2, ssi_sels,          ARRAY_SIZE(ssi_sels));
+		clk[IMX6QDL_CLK_USDHC1_SEL]       = imx_clk_mux("usdhc1_sel", base + 0x1c, 16, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels));
+		clk[IMX6QDL_CLK_USDHC2_SEL]       = imx_clk_mux("usdhc2_sel", base + 0x1c, 17, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels));
+		clk[IMX6QDL_CLK_USDHC3_SEL]       = imx_clk_mux("usdhc3_sel", base + 0x1c, 18, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels));
+		clk[IMX6QDL_CLK_USDHC4_SEL]       = imx_clk_mux("usdhc4_sel", base + 0x1c, 19, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels));
+		clk[IMX6QDL_CLK_ENFC_SEL]         = imx_clk_mux("enfc_sel",         base + 0x2c, 15, 3, enfc_sels_2,         ARRAY_SIZE(enfc_sels_2));
+		clk[IMX6QDL_CLK_EIM_SEL]          = imx_clk_mux("eim_sel",      base + 0x1c, 27, 2, eim_sels,        ARRAY_SIZE(eim_sels));
+		clk[IMX6QDL_CLK_EIM_SLOW_SEL]     = imx_clk_mux("eim_slow_sel", base + 0x1c, 29, 2, eim_slow_sels,   ARRAY_SIZE(eim_slow_sels));
+		clk[IMX6QDL_CLK_PRE_AXI]	  = imx_clk_mux("pre_axi",	base + 0x18, 1,  1, pre_axi_sels,    ARRAY_SIZE(pre_axi_sels));
+	} else {
+		clk[IMX6QDL_CLK_IPU1_DI0_SEL]     = imx_clk_mux_flags("ipu1_di0_sel",     base + 0x34, 0,  3, ipu1_di0_sels,     ARRAY_SIZE(ipu1_di0_sels), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU1_DI1_SEL]     = imx_clk_mux_flags("ipu1_di1_sel",     base + 0x34, 9,  3, ipu1_di1_sels,     ARRAY_SIZE(ipu1_di1_sels), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU2_DI0_SEL]     = imx_clk_mux_flags("ipu2_di0_sel",     base + 0x38, 0,  3, ipu2_di0_sels,     ARRAY_SIZE(ipu2_di0_sels), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_IPU2_DI1_SEL]     = imx_clk_mux_flags("ipu2_di1_sel",     base + 0x38, 9,  3, ipu2_di1_sels,     ARRAY_SIZE(ipu2_di1_sels), CLK_SET_RATE_PARENT);
+		clk[IMX6QDL_CLK_SSI1_SEL]         = imx_clk_fixup_mux("ssi1_sel",   base + 0x1c, 10, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_SSI2_SEL]         = imx_clk_fixup_mux("ssi2_sel",   base + 0x1c, 12, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_SSI3_SEL]         = imx_clk_fixup_mux("ssi3_sel",   base + 0x1c, 14, 2, ssi_sels,          ARRAY_SIZE(ssi_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_USDHC1_SEL]       = imx_clk_fixup_mux("usdhc1_sel", base + 0x1c, 16, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_USDHC2_SEL]       = imx_clk_fixup_mux("usdhc2_sel", base + 0x1c, 17, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_USDHC3_SEL]       = imx_clk_fixup_mux("usdhc3_sel", base + 0x1c, 18, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_USDHC4_SEL]       = imx_clk_fixup_mux("usdhc4_sel", base + 0x1c, 19, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_ENFC_SEL]         = imx_clk_mux("enfc_sel",         base + 0x2c, 16, 2, enfc_sels,         ARRAY_SIZE(enfc_sels));
+		clk[IMX6QDL_CLK_EIM_SEL]          = imx_clk_fixup_mux("eim_sel",      base + 0x1c, 27, 2, eim_sels,        ARRAY_SIZE(eim_sels), imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_EIM_SLOW_SEL]     = imx_clk_fixup_mux("eim_slow_sel", base + 0x1c, 29, 2, eim_slow_sels,   ARRAY_SIZE(eim_slow_sels), imx_cscmr1_fixup);
+	}
 	clk[IMX6QDL_CLK_VDO_AXI_SEL]      = imx_clk_mux("vdo_axi_sel",      base + 0x18, 11, 1, vdo_axi_sels,      ARRAY_SIZE(vdo_axi_sels));
 	clk[IMX6QDL_CLK_VPU_AXI_SEL]      = imx_clk_mux("vpu_axi_sel",      base + 0x18, 14, 2, vpu_axi_sels,      ARRAY_SIZE(vpu_axi_sels));
 	clk[IMX6QDL_CLK_CKO1_SEL]         = imx_clk_mux("cko1_sel",         base + 0x60, 0,  4, cko1_sels,         ARRAY_SIZE(cko1_sels));
@@ -335,23 +379,33 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 	clk[IMX6QDL_CLK_PERIPH_CLK2]      = imx_clk_divider("periph_clk2",      "periph_clk2_sel",   base + 0x14, 27, 3);
 	clk[IMX6QDL_CLK_PERIPH2_CLK2]     = imx_clk_divider("periph2_clk2",     "periph2_clk2_sel",  base + 0x14, 0,  3);
 	clk[IMX6QDL_CLK_IPG]              = imx_clk_divider("ipg",              "ahb",               base + 0x14, 8,  2);
-	clk[IMX6QDL_CLK_IPG_PER]          = imx_clk_fixup_divider("ipg_per",    "ipg",               base + 0x1c, 0,  6, imx_cscmr1_fixup);
 	clk[IMX6QDL_CLK_ESAI_PRED]        = imx_clk_divider("esai_pred",        "esai_sel",          base + 0x28, 9,  3);
 	clk[IMX6QDL_CLK_ESAI_PODF]        = imx_clk_divider("esai_podf",        "esai_pred",         base + 0x28, 25, 3);
 	clk[IMX6QDL_CLK_ASRC_PRED]        = imx_clk_divider("asrc_pred",        "asrc_sel",          base + 0x30, 12, 3);
 	clk[IMX6QDL_CLK_ASRC_PODF]        = imx_clk_divider("asrc_podf",        "asrc_pred",         base + 0x30, 9,  3);
 	clk[IMX6QDL_CLK_SPDIF_PRED]       = imx_clk_divider("spdif_pred",       "spdif_sel",         base + 0x30, 25, 3);
 	clk[IMX6QDL_CLK_SPDIF_PODF]       = imx_clk_divider("spdif_podf",       "spdif_pred",        base + 0x30, 22, 3);
-	clk[IMX6QDL_CLK_CAN_ROOT]         = imx_clk_divider("can_root",         "pll3_60m",          base + 0x20, 2,  6);
-	clk[IMX6QDL_CLK_ECSPI_ROOT]       = imx_clk_divider("ecspi_root",       "pll3_60m",          base + 0x38, 19, 6);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_IPG_PER] = imx_clk_divider("ipg_per", "ipg_per_sel", base + 0x1c, 0, 6);
+		clk[IMX6QDL_CLK_ECSPI_ROOT] = imx_clk_divider("ecspi_root", "ecspi_sel", base + 0x38, 19, 6);
+		clk[IMX6QDL_CLK_CAN_ROOT] = imx_clk_divider("can_root", "can_sel", base + 0x20, 2, 6);
+		clk[IMX6QDL_CLK_UART_SERIAL_PODF] = imx_clk_divider("uart_serial_podf", "uart_sel", base + 0x24, 0, 6);
+		clk[IMX6QDL_CLK_LDB_DI0_DIV_3_5] = imx_clk_fixed_factor("ldb_di0_div_3_5", "ldb_di0", 2, 7);
+		clk[IMX6QDL_CLK_LDB_DI1_DIV_3_5] = imx_clk_fixed_factor("ldb_di1_div_3_5", "ldb_di1", 2, 7);
+	} else {
+		clk[IMX6QDL_CLK_ECSPI_ROOT] = imx_clk_divider("ecspi_root", "pll3_60m", base + 0x38, 19, 6);
+		clk[IMX6QDL_CLK_CAN_ROOT] = imx_clk_divider("can_root", "pll3_60", base + 0x20, 2, 6);
+		clk[IMX6QDL_CLK_IPG_PER] = imx_clk_fixup_divider("ipg_per", "ipg", base + 0x1c, 0, 6, imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_UART_SERIAL_PODF] = imx_clk_divider("uart_serial_podf", "pll3_80m",          base + 0x24, 0,  6);
+		clk[IMX6QDL_CLK_LDB_DI0_DIV_3_5] = imx_clk_fixed_factor("ldb_di0_div_3_5", "ldb_di0_sel", 2, 7);
+		clk[IMX6QDL_CLK_LDB_DI1_DIV_3_5] = imx_clk_fixed_factor("ldb_di1_div_3_5", "ldb_di1_sel", 2, 7);
+	}
 	clk[IMX6QDL_CLK_GPU2D_CORE_PODF]  = imx_clk_divider("gpu2d_core_podf",  "gpu2d_core_sel",    base + 0x18, 23, 3);
 	clk[IMX6QDL_CLK_GPU3D_CORE_PODF]  = imx_clk_divider("gpu3d_core_podf",  "gpu3d_core_sel",    base + 0x18, 26, 3);
 	clk[IMX6QDL_CLK_GPU3D_SHADER]     = imx_clk_divider("gpu3d_shader",     "gpu3d_shader_sel",  base + 0x18, 29, 3);
 	clk[IMX6QDL_CLK_IPU1_PODF]        = imx_clk_divider("ipu1_podf",        "ipu1_sel",          base + 0x3c, 11, 3);
 	clk[IMX6QDL_CLK_IPU2_PODF]        = imx_clk_divider("ipu2_podf",        "ipu2_sel",          base + 0x3c, 16, 3);
-	clk[IMX6QDL_CLK_LDB_DI0_DIV_3_5]  = imx_clk_fixed_factor("ldb_di0_div_3_5", "ldb_di0_sel", 2, 7);
 	clk[IMX6QDL_CLK_LDB_DI0_PODF]     = imx_clk_divider_flags("ldb_di0_podf", "ldb_di0_div_3_5", base + 0x20, 10, 1, 0);
-	clk[IMX6QDL_CLK_LDB_DI1_DIV_3_5]  = imx_clk_fixed_factor("ldb_di1_div_3_5", "ldb_di1_sel", 2, 7);
 	clk[IMX6QDL_CLK_LDB_DI1_PODF]     = imx_clk_divider_flags("ldb_di1_podf", "ldb_di1_div_3_5", base + 0x20, 11, 1, 0);
 	clk[IMX6QDL_CLK_IPU1_DI0_PRE]     = imx_clk_divider("ipu1_di0_pre",     "ipu1_di0_pre_sel",  base + 0x34, 3,  3);
 	clk[IMX6QDL_CLK_IPU1_DI1_PRE]     = imx_clk_divider("ipu1_di1_pre",     "ipu1_di1_pre_sel",  base + 0x34, 12, 3);
@@ -364,15 +418,19 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 	clk[IMX6QDL_CLK_SSI2_PODF]        = imx_clk_divider("ssi2_podf",        "ssi2_pred",         base + 0x2c, 0,  6);
 	clk[IMX6QDL_CLK_SSI3_PRED]        = imx_clk_divider("ssi3_pred",        "ssi3_sel",          base + 0x28, 22, 3);
 	clk[IMX6QDL_CLK_SSI3_PODF]        = imx_clk_divider("ssi3_podf",        "ssi3_pred",         base + 0x28, 16, 6);
-	clk[IMX6QDL_CLK_UART_SERIAL_PODF] = imx_clk_divider("uart_serial_podf", "pll3_80m",          base + 0x24, 0,  6);
 	clk[IMX6QDL_CLK_USDHC1_PODF]      = imx_clk_divider("usdhc1_podf",      "usdhc1_sel",        base + 0x24, 11, 3);
 	clk[IMX6QDL_CLK_USDHC2_PODF]      = imx_clk_divider("usdhc2_podf",      "usdhc2_sel",        base + 0x24, 16, 3);
 	clk[IMX6QDL_CLK_USDHC3_PODF]      = imx_clk_divider("usdhc3_podf",      "usdhc3_sel",        base + 0x24, 19, 3);
 	clk[IMX6QDL_CLK_USDHC4_PODF]      = imx_clk_divider("usdhc4_podf",      "usdhc4_sel",        base + 0x24, 22, 3);
 	clk[IMX6QDL_CLK_ENFC_PRED]        = imx_clk_divider("enfc_pred",        "enfc_sel",          base + 0x2c, 18, 3);
 	clk[IMX6QDL_CLK_ENFC_PODF]        = imx_clk_divider("enfc_podf",        "enfc_pred",         base + 0x2c, 21, 6);
-	clk[IMX6QDL_CLK_EIM_PODF]         = imx_clk_fixup_divider("eim_podf",   "eim_sel",           base + 0x1c, 20, 3, imx_cscmr1_fixup);
-	clk[IMX6QDL_CLK_EIM_SLOW_PODF]    = imx_clk_fixup_divider("eim_slow_podf", "eim_slow_sel",   base + 0x1c, 23, 3, imx_cscmr1_fixup);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_EIM_PODF]         = imx_clk_divider("eim_podf",   "eim_sel",           base + 0x1c, 20, 3);
+		clk[IMX6QDL_CLK_EIM_SLOW_PODF]    = imx_clk_divider("eim_slow_podf", "eim_slow_sel",   base + 0x1c, 23, 3);
+	} else {
+		clk[IMX6QDL_CLK_EIM_PODF]         = imx_clk_fixup_divider("eim_podf",   "eim_sel",           base + 0x1c, 20, 3, imx_cscmr1_fixup);
+		clk[IMX6QDL_CLK_EIM_SLOW_PODF]    = imx_clk_fixup_divider("eim_slow_podf", "eim_slow_sel",   base + 0x1c, 23, 3, imx_cscmr1_fixup);
+	}
 	clk[IMX6QDL_CLK_VPU_AXI_PODF]     = imx_clk_divider("vpu_axi_podf",     "vpu_axi_sel",       base + 0x24, 25, 3);
 	clk[IMX6QDL_CLK_CKO1_PODF]        = imx_clk_divider("cko1_podf",        "cko1_sel",          base + 0x60, 4,  3);
 	clk[IMX6QDL_CLK_CKO2_PODF]        = imx_clk_divider("cko2_podf",        "cko2_sel",          base + 0x60, 21, 3);
@@ -380,7 +438,12 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 	/*                                                        name                 parent_name    reg        shift width busy: reg, shift */
 	clk[IMX6QDL_CLK_AXI]               = imx_clk_busy_divider("axi",               "axi_sel",     base + 0x14, 16,  3,   base + 0x48, 0);
 	clk[IMX6QDL_CLK_MMDC_CH0_AXI_PODF] = imx_clk_busy_divider("mmdc_ch0_axi_podf", "periph",      base + 0x14, 19,  3,   base + 0x48, 4);
-	clk[IMX6QDL_CLK_MMDC_CH1_AXI_PODF] = imx_clk_busy_divider("mmdc_ch1_axi_podf", "periph2",     base + 0x14, 3,   3,   base + 0x48, 2);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_MMDC_CH1_AXI_CG] = imx_clk_gate("mmdc_ch1_axi_cg", "periph2", base + 0x4, 18);
+		clk[IMX6QDL_CLK_MMDC_CH1_AXI_PODF] = imx_clk_busy_divider("mmdc_ch1_axi_podf", "mmdc_ch1_axi_cg", base + 0x14, 3, 3, base + 0x48, 2);
+	} else {
+		clk[IMX6QDL_CLK_MMDC_CH1_AXI_PODF] = imx_clk_busy_divider("mmdc_ch1_axi_podf", "periph2",     base + 0x14, 3,   3,   base + 0x48, 2);
+	}
 	clk[IMX6QDL_CLK_ARM]               = imx_clk_busy_divider("arm",               "pll1_sw",     base + 0x10, 0,   3,   base + 0x48, 16);
 	clk[IMX6QDL_CLK_AHB]               = imx_clk_busy_divider("ahb",               "periph",      base + 0x14, 10,  3,   base + 0x48, 1);
 
@@ -432,8 +495,13 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 	clk[IMX6QDL_CLK_IPU1_DI1]     = imx_clk_gate2("ipu1_di1",      "ipu1_di1_sel",      base + 0x74, 4);
 	clk[IMX6QDL_CLK_IPU2]         = imx_clk_gate2("ipu2",          "ipu2_podf",         base + 0x74, 6);
 	clk[IMX6QDL_CLK_IPU2_DI0]     = imx_clk_gate2("ipu2_di0",      "ipu2_di0_sel",      base + 0x74, 8);
-	clk[IMX6QDL_CLK_LDB_DI0]      = imx_clk_gate2("ldb_di0",       "ldb_di0_podf",      base + 0x74, 12);
-	clk[IMX6QDL_CLK_LDB_DI1]      = imx_clk_gate2("ldb_di1",       "ldb_di1_podf",      base + 0x74, 14);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_LDB_DI0]      = imx_clk_gate2("ldb_di0",       "ldb_di0_sel",      base + 0x74, 12);
+		clk[IMX6QDL_CLK_LDB_DI1]      = imx_clk_gate2("ldb_di1",       "ldb_di1_sel",      base + 0x74, 14);
+	} else {
+		clk[IMX6QDL_CLK_LDB_DI0]      = imx_clk_gate2("ldb_di0",       "ldb_di0_podf",      base + 0x74, 12);
+		clk[IMX6QDL_CLK_LDB_DI1]      = imx_clk_gate2("ldb_di1",       "ldb_di1_podf",      base + 0x74, 14);
+	}
 	clk[IMX6QDL_CLK_IPU2_DI1]     = imx_clk_gate2("ipu2_di1",      "ipu2_di1_sel",      base + 0x74, 10);
 	clk[IMX6QDL_CLK_HSI_TX]       = imx_clk_gate2_shared("hsi_tx", "hsi_tx_podf",       base + 0x74, 16, &share_count_mipi_core_cfg);
 	clk[IMX6QDL_CLK_MIPI_CORE_CFG] = imx_clk_gate2_shared("mipi_core_cfg", "video_27m", base + 0x74, 16, &share_count_mipi_core_cfg);
@@ -482,6 +550,16 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 	clk[IMX6QDL_CLK_EIM_SLOW]     = imx_clk_gate2("eim_slow",      "eim_slow_podf",     base + 0x80, 10);
 	clk[IMX6QDL_CLK_VDO_AXI]      = imx_clk_gate2("vdo_axi",       "vdo_axi_sel",       base + 0x80, 12);
 	clk[IMX6QDL_CLK_VPU_AXI]      = imx_clk_gate2("vpu_axi",       "vpu_axi_podf",      base + 0x80, 14);
+	if (clk_on_imx6qp()) {
+		clk[IMX6QDL_CLK_PRE0] = imx_clk_gate2("pre0",	       "pre_axi",	    base + 0x80, 16);
+		clk[IMX6QDL_CLK_PRE1] = imx_clk_gate2("pre1",	       "pre_axi",	    base + 0x80, 18);
+		clk[IMX6QDL_CLK_PRE2] = imx_clk_gate2("pre2",	       "pre_axi",         base + 0x80, 20);
+		clk[IMX6QDL_CLK_PRE3] = imx_clk_gate2("pre3",	       "pre_axi",	    base + 0x80, 22);
+		clk[IMX6QDL_CLK_PRG0_AXI] = imx_clk_gate2_shared("prg0_axi",  "ipu1_podf",  base + 0x80, 24, &share_count_prg0);
+		clk[IMX6QDL_CLK_PRG1_AXI] = imx_clk_gate2_shared("prg1_axi",  "ipu2_podf",  base + 0x80, 26, &share_count_prg1);
+		clk[IMX6QDL_CLK_PRG0_APB] = imx_clk_gate2_shared("prg0_apb",  "ipg",	    base + 0x80, 24, &share_count_prg0);
+		clk[IMX6QDL_CLK_PRG1_APB] = imx_clk_gate2_shared("prg1_apb",  "ipg",	    base + 0x80, 26, &share_count_prg1);
+	}
 	clk[IMX6QDL_CLK_CKO1]         = imx_clk_gate("cko1",           "cko1_podf",         base + 0x60, 7);
 	clk[IMX6QDL_CLK_CKO2]         = imx_clk_gate("cko2",           "cko2_podf",         base + 0x60, 24);
 
diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index 08692d74b884..0f1f17a8f3ed 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -157,9 +157,9 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clk_set_parent(clks[IMX6UL_PLL7_BYPASS], clks[IMX6UL_CLK_PLL7]);
 
 	clks[IMX6UL_CLK_PLL1_SYS]	= imx_clk_fixed_factor("pll1_sys",	"pll1_bypass", 1, 1);
-	clks[IMX6UL_CLK_PLL2_BUS]	= imx_clk_gate("pll2_bus", 	"pll2_bypass", base + 0x30, 13);
-	clks[IMX6UL_CLK_PLL3_USB_OTG]	= imx_clk_gate("pll3_usb_otg", 	"pll3_bypass", base + 0x10, 13);
-	clks[IMX6UL_CLK_PLL4_AUDIO]	= imx_clk_gate("pll4_audio", 	"pll4_bypass", base + 0x70, 13);
+	clks[IMX6UL_CLK_PLL2_BUS]	= imx_clk_gate("pll2_bus",	"pll2_bypass", base + 0x30, 13);
+	clks[IMX6UL_CLK_PLL3_USB_OTG]	= imx_clk_gate("pll3_usb_otg",	"pll3_bypass", base + 0x10, 13);
+	clks[IMX6UL_CLK_PLL4_AUDIO]	= imx_clk_gate("pll4_audio",	"pll4_bypass", base + 0x70, 13);
 	clks[IMX6UL_CLK_PLL5_VIDEO]	= imx_clk_gate("pll5_video",	"pll5_bypass", base + 0xa0, 13);
 	clks[IMX6UL_CLK_PLL6_ENET]	= imx_clk_gate("pll6_enet",	"pll6_bypass", base + 0xe0, 13);
 	clks[IMX6UL_CLK_PLL7_USB_HOST]	= imx_clk_gate("pll7_usb_host",	"pll7_bypass", base + 0x20, 13);
@@ -196,8 +196,8 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 			base + 0xe0, 2, 2, 0, clk_enet_ref_table, &imx_ccm_lock);
 
 	clks[IMX6UL_CLK_ENET2_REF_125M] = imx_clk_gate("enet_ref_125m", "enet2_ref", base + 0xe0, 20);
-	clks[IMX6UL_CLK_ENET_PTP_REF] 	= imx_clk_fixed_factor("enet_ptp_ref", "pll6_enet", 1, 20);
-	clks[IMX6UL_CLK_ENET_PTP] 	= imx_clk_gate("enet_ptp", "enet_ptp_ref", base + 0xe0, 21);
+	clks[IMX6UL_CLK_ENET_PTP_REF]	= imx_clk_fixed_factor("enet_ptp_ref", "pll6_enet", 1, 20);
+	clks[IMX6UL_CLK_ENET_PTP]	= imx_clk_gate("enet_ptp", "enet_ptp_ref", base + 0xe0, 21);
 
 	clks[IMX6UL_CLK_PLL4_POST_DIV]  = clk_register_divider_table(NULL, "pll4_post_div", "pll4_audio",
 		 CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0x70, 19, 2, 0, post_div_table, &imx_ccm_lock);
@@ -210,8 +210,8 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 
 	/*						   name		parent_name	 mult  div */
 	clks[IMX6UL_CLK_PLL2_198M] = imx_clk_fixed_factor("pll2_198m", "pll2_pfd2_396m", 1,	2);
-	clks[IMX6UL_CLK_PLL3_80M]  = imx_clk_fixed_factor("pll3_80m",  "pll3_usb_otg",   1, 	6);
-	clks[IMX6UL_CLK_PLL3_60M]  = imx_clk_fixed_factor("pll3_60m",  "pll3_usb_otg",   1, 	8);
+	clks[IMX6UL_CLK_PLL3_80M]  = imx_clk_fixed_factor("pll3_80m",  "pll3_usb_otg",   1,	6);
+	clks[IMX6UL_CLK_PLL3_60M]  = imx_clk_fixed_factor("pll3_60m",  "pll3_usb_otg",   1,	8);
 	clks[IMX6UL_CLK_GPT_3M]	   = imx_clk_fixed_factor("gpt_3m",	"osc",		 1,	8);
 
 	np = ccm_node;
@@ -219,34 +219,34 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	WARN_ON(!base);
 
 	clks[IMX6UL_CA7_SECONDARY_SEL]	  = imx_clk_mux("ca7_secondary_sel", base + 0xc, 3, 1, ca7_secondary_sels, ARRAY_SIZE(ca7_secondary_sels));
-	clks[IMX6UL_CLK_STEP] 	 	  = imx_clk_mux("step", base + 0x0c, 8, 1, step_sels, ARRAY_SIZE(step_sels));
-	clks[IMX6UL_CLK_PLL1_SW] 	  = imx_clk_mux_flags("pll1_sw",   base + 0x0c, 2,  1, pll1_sw_sels, ARRAY_SIZE(pll1_sw_sels), 0);
+	clks[IMX6UL_CLK_STEP]		  = imx_clk_mux("step", base + 0x0c, 8, 1, step_sels, ARRAY_SIZE(step_sels));
+	clks[IMX6UL_CLK_PLL1_SW]	  = imx_clk_mux_flags("pll1_sw",   base + 0x0c, 2,  1, pll1_sw_sels, ARRAY_SIZE(pll1_sw_sels), 0);
 	clks[IMX6UL_CLK_AXI_ALT_SEL]	  = imx_clk_mux("axi_alt_sel",		base + 0x14, 7,  1, axi_alt_sels, ARRAY_SIZE(axi_alt_sels));
-	clks[IMX6UL_CLK_AXI_SEL] 	  = imx_clk_mux_flags("axi_sel", 	base + 0x14, 6,  1, axi_sels, ARRAY_SIZE(axi_sels), 0);
-	clks[IMX6UL_CLK_PERIPH_PRE] 	  = imx_clk_mux("periph_pre",       base + 0x18, 18, 2, periph_pre_sels, ARRAY_SIZE(periph_pre_sels));
-	clks[IMX6UL_CLK_PERIPH2_PRE] 	  = imx_clk_mux("periph2_pre",      base + 0x18, 21, 2, periph2_pre_sels, ARRAY_SIZE(periph2_pre_sels));
+	clks[IMX6UL_CLK_AXI_SEL]	  = imx_clk_mux_flags("axi_sel",	base + 0x14, 6,  1, axi_sels, ARRAY_SIZE(axi_sels), 0);
+	clks[IMX6UL_CLK_PERIPH_PRE]	  = imx_clk_mux("periph_pre",       base + 0x18, 18, 2, periph_pre_sels, ARRAY_SIZE(periph_pre_sels));
+	clks[IMX6UL_CLK_PERIPH2_PRE]	  = imx_clk_mux("periph2_pre",      base + 0x18, 21, 2, periph2_pre_sels, ARRAY_SIZE(periph2_pre_sels));
 	clks[IMX6UL_CLK_PERIPH_CLK2_SEL]  = imx_clk_mux("periph_clk2_sel",  base + 0x18, 12, 2, periph_clk2_sels, ARRAY_SIZE(periph_clk2_sels));
 	clks[IMX6UL_CLK_PERIPH2_CLK2_SEL] = imx_clk_mux("periph2_clk2_sel", base + 0x18, 20, 1, periph2_clk2_sels, ARRAY_SIZE(periph2_clk2_sels));
-	clks[IMX6UL_CLK_EIM_SLOW_SEL] 	  = imx_clk_mux("eim_slow_sel", base + 0x1c, 29, 2, eim_slow_sels, ARRAY_SIZE(eim_slow_sels));
+	clks[IMX6UL_CLK_EIM_SLOW_SEL]	  = imx_clk_mux("eim_slow_sel", base + 0x1c, 29, 2, eim_slow_sels, ARRAY_SIZE(eim_slow_sels));
 	clks[IMX6UL_CLK_GPMI_SEL]	  = imx_clk_mux("gpmi_sel",     base + 0x1c, 19, 1, gpmi_sels, ARRAY_SIZE(gpmi_sels));
-	clks[IMX6UL_CLK_BCH_SEL]      	  = imx_clk_mux("bch_sel", 	base + 0x1c, 18, 1, bch_sels, ARRAY_SIZE(bch_sels));
+	clks[IMX6UL_CLK_BCH_SEL]	  = imx_clk_mux("bch_sel",	base + 0x1c, 18, 1, bch_sels, ARRAY_SIZE(bch_sels));
 	clks[IMX6UL_CLK_USDHC2_SEL]	  = imx_clk_mux("usdhc2_sel",   base + 0x1c, 17, 1, usdhc_sels, ARRAY_SIZE(usdhc_sels));
 	clks[IMX6UL_CLK_USDHC1_SEL]	  = imx_clk_mux("usdhc1_sel",   base + 0x1c, 16, 1, usdhc_sels, ARRAY_SIZE(usdhc_sels));
-	clks[IMX6UL_CLK_SAI3_SEL]     	  = imx_clk_mux("sai3_sel",     base + 0x1c, 14, 2, sai_sels, ARRAY_SIZE(sai_sels));
+	clks[IMX6UL_CLK_SAI3_SEL]	  = imx_clk_mux("sai3_sel",     base + 0x1c, 14, 2, sai_sels, ARRAY_SIZE(sai_sels));
 	clks[IMX6UL_CLK_SAI2_SEL]         = imx_clk_mux("sai2_sel",     base + 0x1c, 12, 2, sai_sels, ARRAY_SIZE(sai_sels));
-	clks[IMX6UL_CLK_SAI1_SEL]    	  = imx_clk_mux("sai1_sel",     base + 0x1c, 10, 2, sai_sels, ARRAY_SIZE(sai_sels));
-	clks[IMX6UL_CLK_QSPI1_SEL] 	  = imx_clk_mux("qspi1_sel",    base + 0x1c, 7,  3, qspi1_sels, ARRAY_SIZE(qspi1_sels));
-	clks[IMX6UL_CLK_PERCLK_SEL] 	  = imx_clk_mux("perclk_sel",	base + 0x1c, 6,  1, perclk_sels, ARRAY_SIZE(perclk_sels));
-	clks[IMX6UL_CLK_CAN_SEL]      	  = imx_clk_mux("can_sel",	base + 0x20, 8,  2, can_sels, ARRAY_SIZE(can_sels));
+	clks[IMX6UL_CLK_SAI1_SEL]	  = imx_clk_mux("sai1_sel",     base + 0x1c, 10, 2, sai_sels, ARRAY_SIZE(sai_sels));
+	clks[IMX6UL_CLK_QSPI1_SEL]	  = imx_clk_mux("qspi1_sel",    base + 0x1c, 7,  3, qspi1_sels, ARRAY_SIZE(qspi1_sels));
+	clks[IMX6UL_CLK_PERCLK_SEL]	  = imx_clk_mux("perclk_sel",	base + 0x1c, 6,  1, perclk_sels, ARRAY_SIZE(perclk_sels));
+	clks[IMX6UL_CLK_CAN_SEL]	  = imx_clk_mux("can_sel",	base + 0x20, 8,  2, can_sels, ARRAY_SIZE(can_sels));
 	clks[IMX6UL_CLK_UART_SEL]	  = imx_clk_mux("uart_sel",	base + 0x24, 6,  1, uart_sels, ARRAY_SIZE(uart_sels));
 	clks[IMX6UL_CLK_ENFC_SEL]	  = imx_clk_mux("enfc_sel",	base + 0x2c, 15, 3, enfc_sels, ARRAY_SIZE(enfc_sels));
 	clks[IMX6UL_CLK_LDB_DI0_SEL]	  = imx_clk_mux("ldb_di0_sel",	base + 0x2c, 9,  3, ldb_di0_sels, ARRAY_SIZE(ldb_di0_sels));
 	clks[IMX6UL_CLK_SPDIF_SEL]	  = imx_clk_mux("spdif_sel",	base + 0x30, 20, 2, spdif_sels, ARRAY_SIZE(spdif_sels));
-	clks[IMX6UL_CLK_SIM_PRE_SEL] 	  = imx_clk_mux("sim_pre_sel",	base + 0x34, 15, 3, sim_pre_sels, ARRAY_SIZE(sim_pre_sels));
-	clks[IMX6UL_CLK_SIM_SEL]	  = imx_clk_mux("sim_sel", 	base + 0x34, 9, 3, sim_sels, ARRAY_SIZE(sim_sels));
+	clks[IMX6UL_CLK_SIM_PRE_SEL]	  = imx_clk_mux("sim_pre_sel",	base + 0x34, 15, 3, sim_pre_sels, ARRAY_SIZE(sim_pre_sels));
+	clks[IMX6UL_CLK_SIM_SEL]	  = imx_clk_mux("sim_sel",	base + 0x34, 9, 3, sim_sels, ARRAY_SIZE(sim_sels));
 	clks[IMX6UL_CLK_ECSPI_SEL]	  = imx_clk_mux("ecspi_sel",	base + 0x38, 18, 1, ecspi_sels, ARRAY_SIZE(ecspi_sels));
 	clks[IMX6UL_CLK_LCDIF_PRE_SEL]	  = imx_clk_mux("lcdif_pre_sel", base + 0x38, 15, 3, lcdif_pre_sels, ARRAY_SIZE(lcdif_pre_sels));
-	clks[IMX6UL_CLK_LCDIF_SEL]	  = imx_clk_mux("lcdif_sel", 	base + 0x38, 9, 3, lcdif_sels, ARRAY_SIZE(lcdif_sels));
+	clks[IMX6UL_CLK_LCDIF_SEL]	  = imx_clk_mux("lcdif_sel",	base + 0x38, 9, 3, lcdif_sels, ARRAY_SIZE(lcdif_sels));
 
 	clks[IMX6UL_CLK_LDB_DI0_DIV_SEL]  = imx_clk_mux("ldb_di0", base + 0x20, 10, 1, ldb_di0_div_sels, ARRAY_SIZE(ldb_di0_div_sels));
 	clks[IMX6UL_CLK_LDB_DI1_DIV_SEL]  = imx_clk_mux("ldb_di1", base + 0x20, 11, 1, ldb_di1_div_sels, ARRAY_SIZE(ldb_di1_div_sels));
@@ -259,11 +259,11 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_PERIPH]  = imx_clk_busy_mux("periph",  base + 0x14, 25, 1, base + 0x48, 5, periph_sels, ARRAY_SIZE(periph_sels));
 	clks[IMX6UL_CLK_PERIPH2] = imx_clk_busy_mux("periph2", base + 0x14, 26, 1, base + 0x48, 3, periph2_sels, ARRAY_SIZE(periph2_sels));
 
-	clks[IMX6UL_CLK_PERIPH_CLK2]	= imx_clk_divider("periph_clk2",   "periph_clk2_sel",  	base + 0x14, 27, 3);
-	clks[IMX6UL_CLK_PERIPH2_CLK2]	= imx_clk_divider("periph2_clk2",  "periph2_clk2_sel", 	base + 0x14, 0,  3);
+	clks[IMX6UL_CLK_PERIPH_CLK2]	= imx_clk_divider("periph_clk2",   "periph_clk2_sel",	base + 0x14, 27, 3);
+	clks[IMX6UL_CLK_PERIPH2_CLK2]	= imx_clk_divider("periph2_clk2",  "periph2_clk2_sel",	base + 0x14, 0,  3);
 	clks[IMX6UL_CLK_IPG]		= imx_clk_divider("ipg",	   "ahb",		base + 0x14, 8,	 2);
 	clks[IMX6UL_CLK_LCDIF_PODF]	= imx_clk_divider("lcdif_podf",	   "lcdif_pred",	base + 0x18, 23, 3);
-	clks[IMX6UL_CLK_QSPI1_PDOF] 	= imx_clk_divider("qspi1_podf",	   "qspi1_sel",		base + 0x1c, 26, 3);
+	clks[IMX6UL_CLK_QSPI1_PDOF]	= imx_clk_divider("qspi1_podf",	   "qspi1_sel",		base + 0x1c, 26, 3);
 	clks[IMX6UL_CLK_EIM_SLOW_PODF]	= imx_clk_divider("eim_slow_podf", "eim_slow_sel",	base + 0x1c, 23, 3);
 	clks[IMX6UL_CLK_PERCLK]		= imx_clk_divider("perclk",	   "perclk_sel",	base + 0x1c, 0,  6);
 	clks[IMX6UL_CLK_CAN_PODF]	= imx_clk_divider("can_podf",	   "can_sel",		base + 0x20, 2,  6);
@@ -287,14 +287,14 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_LCDIF_PRED]	= imx_clk_divider("lcdif_pred",	   "lcdif_pre_sel",	base + 0x38, 12, 3);
 	clks[IMX6UL_CLK_CSI_PODF]       = imx_clk_divider("csi_podf",      "csi_sel",           base + 0x3c, 11, 3);
 
-	clks[IMX6UL_CLK_ARM]		= imx_clk_busy_divider("arm", 	    "pll1_sw",	base +	0x10, 0,  3,  base + 0x48, 16);
+	clks[IMX6UL_CLK_ARM]		= imx_clk_busy_divider("arm",	    "pll1_sw",	base +	0x10, 0,  3,  base + 0x48, 16);
 	clks[IMX6UL_CLK_MMDC_PODF]	= imx_clk_busy_divider("mmdc_podf", "periph2",	base +  0x14, 3,  3,  base + 0x48, 2);
 	clks[IMX6UL_CLK_AXI_PODF]	= imx_clk_busy_divider("axi_podf",  "axi_sel",	base +  0x14, 16, 3,  base + 0x48, 0);
 	clks[IMX6UL_CLK_AHB]		= imx_clk_busy_divider("ahb",	    "periph",	base +  0x14, 10, 3,  base + 0x48, 1);
 
 	/* CCGR0 */
-	clks[IMX6UL_CLK_AIPSTZ1]	= imx_clk_gate2("aips_tz1", 	"ahb",		base + 0x68,	0);
-	clks[IMX6UL_CLK_AIPSTZ2]	= imx_clk_gate2("aips_tz2", 	"ahb",		base + 0x68,	2);
+	clks[IMX6UL_CLK_AIPSTZ1]	= imx_clk_gate2("aips_tz1",	"ahb",		base + 0x68,	0);
+	clks[IMX6UL_CLK_AIPSTZ2]	= imx_clk_gate2("aips_tz2",	"ahb",		base + 0x68,	2);
 	clks[IMX6UL_CLK_APBHDMA]	= imx_clk_gate2("apbh_dma",	"bch_podf",	base + 0x68,	4);
 	clks[IMX6UL_CLK_ASRC_IPG]	= imx_clk_gate2_shared("asrc_ipg",	"ahb",	base + 0x68,	6, &share_count_asrc);
 	clks[IMX6UL_CLK_ASRC_MEM]	= imx_clk_gate2_shared("asrc_mem",	"ahb",	base + 0x68,	6, &share_count_asrc);
@@ -302,7 +302,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_CAAM_ACLK]	= imx_clk_gate2("caam_aclk",	"ahb",		base + 0x68,	10);
 	clks[IMX6UL_CLK_CAAM_IPG]	= imx_clk_gate2("caam_ipg",	"ipg",		base + 0x68,	12);
 	clks[IMX6UL_CLK_CAN1_IPG]	= imx_clk_gate2("can1_ipg",	"ipg",		base + 0x68,	14);
-	clks[IMX6UL_CLK_CAN1_SERIAL]	= imx_clk_gate2("can1_serial",	"can_podf",	base + 0x68, 	16);
+	clks[IMX6UL_CLK_CAN1_SERIAL]	= imx_clk_gate2("can1_serial",	"can_podf",	base + 0x68,	16);
 	clks[IMX6UL_CLK_CAN2_IPG]	= imx_clk_gate2("can2_ipg",	"ipg",		base + 0x68,	18);
 	clks[IMX6UL_CLK_CAN2_SERIAL]	= imx_clk_gate2("can2_serial",	"can_podf",	base + 0x68,	20);
 	clks[IMX6UL_CLK_GPT2_BUS]	= imx_clk_gate2("gpt_bus",	"perclk",	base + 0x68,	24);
@@ -331,7 +331,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_CSI]		= imx_clk_gate2("csi",		"csi_podf",		base + 0x70,	2);
 	clks[IMX6UL_CLK_I2C1]		= imx_clk_gate2("i2c1",		"perclk",	base + 0x70,	6);
 	clks[IMX6UL_CLK_I2C2]		= imx_clk_gate2("i2c2",		"perclk",	base + 0x70,	8);
-	clks[IMX6UL_CLK_I2C3] 		= imx_clk_gate2("i2c3",		"perclk",	base + 0x70,	10);
+	clks[IMX6UL_CLK_I2C3]		= imx_clk_gate2("i2c3",		"perclk",	base + 0x70,	10);
 	clks[IMX6UL_CLK_OCOTP]		= imx_clk_gate2("ocotp",	"ipg",		base + 0x70,	12);
 	clks[IMX6UL_CLK_IOMUXC]		= imx_clk_gate2("iomuxc",	"lcdif_podf",	base + 0x70,	14);
 	clks[IMX6UL_CLK_LCDIF_APB]	= imx_clk_gate2("lcdif_apb",	"axi",		base + 0x70,	28);
@@ -365,6 +365,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	/* CCGR5 */
 	clks[IMX6UL_CLK_ROM]		= imx_clk_gate2("rom",		"ahb",		base + 0x7c,	0);
 	clks[IMX6UL_CLK_SDMA]		= imx_clk_gate2("sdma",		"ahb",		base + 0x7c,	6);
+	clks[IMX6UL_CLK_KPP]		= imx_clk_gate2("kpp",		"ipg",		base + 0x7c,	8);
 	clks[IMX6UL_CLK_WDOG2]		= imx_clk_gate2("wdog2",	"ipg",		base + 0x7c,	10);
 	clks[IMX6UL_CLK_SPBA]		= imx_clk_gate2("spba",		"ipg",		base + 0x7c,	12);
 	clks[IMX6UL_CLK_SPDIF]		= imx_clk_gate2_shared("spdif",		"spdif_podf",	base + 0x7c,	14, &share_count_audio);
@@ -391,10 +392,10 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_UART8_IPG]	= imx_clk_gate2("uart8_ipg",	"ipg",		 base + 0x80,	14);
 	clks[IMX6UL_CLK_UART8_SERIAL]	= imx_clk_gate2("uart8_serial", "uart_podf",	 base + 0x80,	14);
 	clks[IMX6UL_CLK_WDOG3]		= imx_clk_gate2("wdog3",	"ipg",		 base + 0x80,	20);
-	clks[IMX6UL_CLK_I2C4]		= imx_clk_gate2("i2c4",		"perclk", 	 base + 0x80,	24);
+	clks[IMX6UL_CLK_I2C4]		= imx_clk_gate2("i2c4",		"perclk",	 base + 0x80,	24);
 	clks[IMX6UL_CLK_PWM5]		= imx_clk_gate2("pwm5",		"perclk",	 base + 0x80,	26);
 	clks[IMX6UL_CLK_PWM6]		= imx_clk_gate2("pwm6",		"perclk",	 base +	0x80,	28);
-	clks[IMX6UL_CLK_PWM7]		= imx_clk_gate2("Pwm7",		"perclk",	 base + 0x80,	30);
+	clks[IMX6UL_CLK_PWM7]		= imx_clk_gate2("pwm7",		"perclk",	 base + 0x80,	30);
 
 	/* mask handshake of mmdc */
 	writel_relaxed(BM_CCM_CCDR_MMDC_CH0_MASK, base + CCDR);
diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h
index c94ac5c26226..d942f5748d08 100644
--- a/drivers/clk/imx/clk.h
+++ b/drivers/clk/imx/clk.h
@@ -87,7 +87,7 @@ struct clk *imx_clk_fixup_mux(const char *name, void __iomem *reg,
 
 static inline struct clk *imx_clk_fixed(const char *name, int rate)
 {
-	return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate);
+	return clk_register_fixed_rate(NULL, name, NULL, 0, rate);
 }
 
 static inline struct clk *imx_clk_divider(const char *name, const char *parent,
diff --git a/drivers/clk/mediatek/clk-gate.c b/drivers/clk/mediatek/clk-gate.c
index 576bdb7c98b8..2a76901bf04b 100644
--- a/drivers/clk/mediatek/clk-gate.c
+++ b/drivers/clk/mediatek/clk-gate.c
@@ -25,7 +25,7 @@
 
 static int mtk_cg_bit_is_cleared(struct clk_hw *hw)
 {
-	struct mtk_clk_gate *cg = to_clk_gate(hw);
+	struct mtk_clk_gate *cg = to_mtk_clk_gate(hw);
 	u32 val;
 
 	regmap_read(cg->regmap, cg->sta_ofs, &val);
@@ -37,7 +37,7 @@ static int mtk_cg_bit_is_cleared(struct clk_hw *hw)
 
 static int mtk_cg_bit_is_set(struct clk_hw *hw)
 {
-	struct mtk_clk_gate *cg = to_clk_gate(hw);
+	struct mtk_clk_gate *cg = to_mtk_clk_gate(hw);
 	u32 val;
 
 	regmap_read(cg->regmap, cg->sta_ofs, &val);
@@ -49,14 +49,14 @@ static int mtk_cg_bit_is_set(struct clk_hw *hw)
 
 static void mtk_cg_set_bit(struct clk_hw *hw)
 {
-	struct mtk_clk_gate *cg = to_clk_gate(hw);
+	struct mtk_clk_gate *cg = to_mtk_clk_gate(hw);
 
 	regmap_write(cg->regmap, cg->set_ofs, BIT(cg->bit));
 }
 
 static void mtk_cg_clr_bit(struct clk_hw *hw)
 {
-	struct mtk_clk_gate *cg = to_clk_gate(hw);
+	struct mtk_clk_gate *cg = to_mtk_clk_gate(hw);
 
 	regmap_write(cg->regmap, cg->clr_ofs, BIT(cg->bit));
 }
diff --git a/drivers/clk/mediatek/clk-gate.h b/drivers/clk/mediatek/clk-gate.h
index 11e25c992948..b1821603b887 100644
--- a/drivers/clk/mediatek/clk-gate.h
+++ b/drivers/clk/mediatek/clk-gate.h
@@ -29,7 +29,7 @@ struct mtk_clk_gate {
 	u8		bit;
 };
 
-static inline struct mtk_clk_gate *to_clk_gate(struct clk_hw *hw)
+static inline struct mtk_clk_gate *to_mtk_clk_gate(struct clk_hw *hw)
 {
 	return container_of(hw, struct mtk_clk_gate, hw);
 }
diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c
index cf08db6c130c..5ada644e6200 100644
--- a/drivers/clk/mediatek/clk-mtk.c
+++ b/drivers/clk/mediatek/clk-mtk.c
@@ -58,8 +58,8 @@ void __init mtk_clk_register_fixed_clks(const struct mtk_fixed_clk *clks,
 	for (i = 0; i < num; i++) {
 		const struct mtk_fixed_clk *rc = &clks[i];
 
-		clk = clk_register_fixed_rate(NULL, rc->name, rc->parent,
-				rc->parent ? 0 : CLK_IS_ROOT, rc->rate);
+		clk = clk_register_fixed_rate(NULL, rc->name, rc->parent, 0,
+					      rc->rate);
 
 		if (IS_ERR(clk)) {
 			pr_err("Failed to register clk %s: %ld\n",
@@ -209,12 +209,14 @@ struct clk * __init mtk_clk_register_composite(const struct mtk_composite *mc,
 		mc->flags);
 
 	if (IS_ERR(clk)) {
-		kfree(gate);
-		kfree(mux);
+		ret = PTR_ERR(clk);
+		goto err_out;
 	}
 
 	return clk;
 err_out:
+	kfree(div);
+	kfree(gate);
 	kfree(mux);
 
 	return ERR_PTR(ret);
diff --git a/drivers/clk/meson/clkc.c b/drivers/clk/meson/clkc.c
index c83ae1367abc..d920d410b51d 100644
--- a/drivers/clk/meson/clkc.c
+++ b/drivers/clk/meson/clkc.c
@@ -198,7 +198,7 @@ meson_clk_register_fixed_rate(const struct clk_conf *clk_conf,
 }
 
 void __init meson_clk_register_clks(const struct clk_conf *clk_confs,
-				    size_t nr_confs,
+				    unsigned int nr_confs,
 				    void __iomem *clk_base)
 {
 	unsigned int i;
diff --git a/drivers/clk/mvebu/Kconfig b/drivers/clk/mvebu/Kconfig
index 27696255486d..eaee8f099c8c 100644
--- a/drivers/clk/mvebu/Kconfig
+++ b/drivers/clk/mvebu/Kconfig
@@ -11,7 +11,6 @@ config ARMADA_370_CLK
 	bool
 	select MVEBU_CLK_COMMON
 	select MVEBU_CLK_CPU
-	select MVEBU_CLK_COREDIV
 
 config ARMADA_375_CLK
 	bool
@@ -29,7 +28,6 @@ config ARMADA_XP_CLK
 	bool
 	select MVEBU_CLK_COMMON
 	select MVEBU_CLK_CPU
-	select MVEBU_CLK_COREDIV
 
 config DOVE_CLK
 	bool
diff --git a/drivers/clk/mvebu/common.c b/drivers/clk/mvebu/common.c
index 28aac67e7b92..66be2e0c82b4 100644
--- a/drivers/clk/mvebu/common.c
+++ b/drivers/clk/mvebu/common.c
@@ -137,8 +137,8 @@ void __init mvebu_coreclk_setup(struct device_node *np,
 	of_property_read_string_index(np, "clock-output-names", 0,
 				      &tclk_name);
 	rate = desc->get_tclk_freq(base);
-	clk_data.clks[0] = clk_register_fixed_rate(NULL, tclk_name, NULL,
-						   CLK_IS_ROOT, rate);
+	clk_data.clks[0] = clk_register_fixed_rate(NULL, tclk_name, NULL, 0,
+						   rate);
 	WARN_ON(IS_ERR(clk_data.clks[0]));
 
 	/* Register CPU clock */
@@ -150,8 +150,8 @@ void __init mvebu_coreclk_setup(struct device_node *np,
 		&& desc->is_sscg_enabled(base))
 		rate = desc->fix_sscg_deviation(rate);
 
-	clk_data.clks[1] = clk_register_fixed_rate(NULL, cpuclk_name, NULL,
-						   CLK_IS_ROOT, rate);
+	clk_data.clks[1] = clk_register_fixed_rate(NULL, cpuclk_name, NULL, 0,
+						   rate);
 	WARN_ON(IS_ERR(clk_data.clks[1]));
 
 	/* Register fixed-factor clocks derived from CPU clock */
@@ -174,8 +174,7 @@ void __init mvebu_coreclk_setup(struct device_node *np,
 					      2 + desc->num_ratios, &name);
 		rate = desc->get_refclk_freq(base);
 		clk_data.clks[2 + desc->num_ratios] =
-			clk_register_fixed_rate(NULL, name, NULL,
-						CLK_IS_ROOT, rate);
+			clk_register_fixed_rate(NULL, name, NULL, 0, rate);
 		WARN_ON(IS_ERR(clk_data.clks[2 + desc->num_ratios]));
 	}
 
@@ -199,8 +198,6 @@ struct clk_gating_ctrl {
 	u32 saved_reg;
 };
 
-#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw)
-
 static struct clk_gating_ctrl *ctrl;
 
 static struct clk *clk_gating_get_src(
diff --git a/drivers/clk/mvebu/dove-divider.c b/drivers/clk/mvebu/dove-divider.c
index 3e0b52daa35f..4091f3cfee19 100644
--- a/drivers/clk/mvebu/dove-divider.c
+++ b/drivers/clk/mvebu/dove-divider.c
@@ -225,8 +225,7 @@ static int dove_divider_init(struct device *dev, void __iomem *base,
 	 * Create the core PLL clock.  We treat this as a fixed rate
 	 * clock as we don't know any better, and documentation is sparse.
 	 */
-	clk = clk_register_fixed_rate(dev, core_pll[0], NULL, CLK_IS_ROOT,
-				      2000000000UL);
+	clk = clk_register_fixed_rate(dev, core_pll[0], NULL, 0, 2000000000UL);
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
diff --git a/drivers/clk/mvebu/kirkwood.c b/drivers/clk/mvebu/kirkwood.c
index 99550f25975e..a2a8d614039d 100644
--- a/drivers/clk/mvebu/kirkwood.c
+++ b/drivers/clk/mvebu/kirkwood.c
@@ -256,8 +256,6 @@ static const struct clk_muxing_soc_desc kirkwood_mux_desc[] __initconst = {
 		11, 1, 0 },
 };
 
-#define to_clk_mux(_hw) container_of(_hw, struct clk_mux, hw)
-
 static struct clk *clk_muxing_get_src(
 	struct of_phandle_args *clkspec, void *data)
 {
diff --git a/drivers/clk/mxs/clk-div.c b/drivers/clk/mxs/clk-div.c
index 049ee27d5a22..f75e989c578f 100644
--- a/drivers/clk/mxs/clk-div.c
+++ b/drivers/clk/mxs/clk-div.c
@@ -33,7 +33,7 @@ struct clk_div {
 
 static inline struct clk_div *to_clk_div(struct clk_hw *hw)
 {
-	struct clk_divider *divider = container_of(hw, struct clk_divider, hw);
+	struct clk_divider *divider = to_clk_divider(hw);
 
 	return container_of(divider, struct clk_div, divider);
 }
diff --git a/drivers/clk/mxs/clk.h b/drivers/clk/mxs/clk.h
index a4590956d2a2..5a264a486ad9 100644
--- a/drivers/clk/mxs/clk.h
+++ b/drivers/clk/mxs/clk.h
@@ -38,7 +38,7 @@ struct clk *mxs_clk_frac(const char *name, const char *parent_name,
 
 static inline struct clk *mxs_clk_fixed(const char *name, int rate)
 {
-	return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate);
+	return clk_register_fixed_rate(NULL, name, NULL, 0, rate);
 }
 
 static inline struct clk *mxs_clk_gate(const char *name,
diff --git a/drivers/clk/nxp/Makefile b/drivers/clk/nxp/Makefile
index 607bd48c6563..d456ee6cc3d3 100644
--- a/drivers/clk/nxp/Makefile
+++ b/drivers/clk/nxp/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_ARCH_LPC18XX)	+= clk-lpc18xx-cgu.o
 obj-$(CONFIG_ARCH_LPC18XX)	+= clk-lpc18xx-ccu.o
+obj-$(CONFIG_ARCH_LPC18XX)	+= clk-lpc18xx-creg.o
 obj-$(CONFIG_ARCH_LPC32XX)	+= clk-lpc32xx.o
diff --git a/drivers/clk/nxp/clk-lpc18xx-ccu.c b/drivers/clk/nxp/clk-lpc18xx-ccu.c
index 13aabbb3acbe..f7136b94fd0e 100644
--- a/drivers/clk/nxp/clk-lpc18xx-ccu.c
+++ b/drivers/clk/nxp/clk-lpc18xx-ccu.c
@@ -28,8 +28,6 @@
 #define CCU_BRANCH_IS_BUS	BIT(0)
 #define CCU_BRANCH_HAVE_DIV2	BIT(1)
 
-#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw)
-
 struct lpc18xx_branch_clk_data {
 	const char **name;
 	int num;
@@ -222,7 +220,7 @@ static void lpc18xx_ccu_register_branch_gate_div(struct lpc18xx_clk_branch *bran
 		div->width = 1;
 
 		div_hw = &div->hw;
-		div_ops = &clk_divider_ops;
+		div_ops = &clk_divider_ro_ops;
 	}
 
 	branch->gate.reg = branch->offset + reg_base;
diff --git a/drivers/clk/nxp/clk-lpc18xx-cgu.c b/drivers/clk/nxp/clk-lpc18xx-cgu.c
index c924572fc9bc..2531174b399e 100644
--- a/drivers/clk/nxp/clk-lpc18xx-cgu.c
+++ b/drivers/clk/nxp/clk-lpc18xx-cgu.c
@@ -605,7 +605,7 @@ static void __init lpc18xx_cgu_register_source_clks(struct device_node *np,
 
 	/* Register the internal 12 MHz RC oscillator (IRC) */
 	clk = clk_register_fixed_rate(NULL, clk_src_names[CLK_SRC_IRC],
-				      NULL, CLK_IS_ROOT, 12000000);
+				      NULL, 0, 12000000);
 	if (IS_ERR(clk))
 		pr_warn("%s: failed to register irc clk\n", __func__);
 
diff --git a/drivers/clk/nxp/clk-lpc18xx-creg.c b/drivers/clk/nxp/clk-lpc18xx-creg.c
new file mode 100644
index 000000000000..d44b61afa2dc
--- /dev/null
+++ b/drivers/clk/nxp/clk-lpc18xx-creg.c
@@ -0,0 +1,226 @@
+/*
+ * Clk driver for NXP LPC18xx/43xx Configuration Registers (CREG)
+ *
+ * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#define LPC18XX_CREG_CREG0			0x004
+#define  LPC18XX_CREG_CREG0_EN1KHZ		BIT(0)
+#define  LPC18XX_CREG_CREG0_EN32KHZ		BIT(1)
+#define  LPC18XX_CREG_CREG0_RESET32KHZ		BIT(2)
+#define  LPC18XX_CREG_CREG0_PD32KHZ		BIT(3)
+
+#define to_clk_creg(_hw) container_of(_hw, struct clk_creg_data, hw)
+
+enum {
+	CREG_CLK_1KHZ,
+	CREG_CLK_32KHZ,
+	CREG_CLK_MAX,
+};
+
+struct clk_creg_data {
+	struct clk_hw hw;
+	const char *name;
+	struct regmap *reg;
+	unsigned int en_mask;
+	const struct clk_ops *ops;
+};
+
+#define CREG_CLK(_name, _emask, _ops)		\
+{						\
+	.name = _name,				\
+	.en_mask = LPC18XX_CREG_CREG0_##_emask,	\
+	.ops = &_ops,				\
+}
+
+static int clk_creg_32k_prepare(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+	int ret;
+
+	ret = regmap_update_bits(creg->reg, LPC18XX_CREG_CREG0,
+				 LPC18XX_CREG_CREG0_PD32KHZ |
+				 LPC18XX_CREG_CREG0_RESET32KHZ, 0);
+
+	/*
+	 * Powering up the 32k oscillator takes a long while
+	 * and sadly there aren't any status bit to poll.
+	 */
+	msleep(2500);
+
+	return ret;
+}
+
+static void clk_creg_32k_unprepare(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+
+	regmap_update_bits(creg->reg, LPC18XX_CREG_CREG0,
+			   LPC18XX_CREG_CREG0_PD32KHZ,
+			   LPC18XX_CREG_CREG0_PD32KHZ);
+}
+
+static int clk_creg_32k_is_prepared(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+	u32 reg;
+
+	regmap_read(creg->reg, LPC18XX_CREG_CREG0, &reg);
+
+	return !(reg & LPC18XX_CREG_CREG0_PD32KHZ) &&
+	       !(reg & LPC18XX_CREG_CREG0_RESET32KHZ);
+}
+
+static unsigned long clk_creg_1k_recalc_rate(struct clk_hw *hw,
+					     unsigned long parent_rate)
+{
+	return parent_rate / 32;
+}
+
+static int clk_creg_enable(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+
+	return regmap_update_bits(creg->reg, LPC18XX_CREG_CREG0,
+				  creg->en_mask, creg->en_mask);
+}
+
+static void clk_creg_disable(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+
+	regmap_update_bits(creg->reg, LPC18XX_CREG_CREG0,
+			   creg->en_mask, 0);
+}
+
+static int clk_creg_is_enabled(struct clk_hw *hw)
+{
+	struct clk_creg_data *creg = to_clk_creg(hw);
+	u32 reg;
+
+	regmap_read(creg->reg, LPC18XX_CREG_CREG0, &reg);
+
+	return !!(reg & creg->en_mask);
+}
+
+static const struct clk_ops clk_creg_32k = {
+	.enable		= clk_creg_enable,
+	.disable	= clk_creg_disable,
+	.is_enabled	= clk_creg_is_enabled,
+	.prepare	= clk_creg_32k_prepare,
+	.unprepare	= clk_creg_32k_unprepare,
+	.is_prepared	= clk_creg_32k_is_prepared,
+};
+
+static const struct clk_ops clk_creg_1k = {
+	.enable		= clk_creg_enable,
+	.disable	= clk_creg_disable,
+	.is_enabled	= clk_creg_is_enabled,
+	.recalc_rate	= clk_creg_1k_recalc_rate,
+};
+
+static struct clk_creg_data clk_creg_clocks[] = {
+	[CREG_CLK_1KHZ]  = CREG_CLK("1khz_clk",  EN1KHZ,  clk_creg_1k),
+	[CREG_CLK_32KHZ] = CREG_CLK("32khz_clk", EN32KHZ, clk_creg_32k),
+};
+
+static struct clk *clk_register_creg_clk(struct device *dev,
+					 struct clk_creg_data *creg_clk,
+					 const char **parent_name,
+					 struct regmap *syscon)
+{
+	struct clk_init_data init;
+
+	init.ops = creg_clk->ops;
+	init.name = creg_clk->name;
+	init.parent_names = parent_name;
+	init.num_parents = 1;
+
+	creg_clk->reg = syscon;
+	creg_clk->hw.init = &init;
+
+	if (dev)
+		return devm_clk_register(dev, &creg_clk->hw);
+
+	return clk_register(NULL, &creg_clk->hw);
+}
+
+static struct clk *clk_creg_early[CREG_CLK_MAX];
+static struct clk_onecell_data clk_creg_early_data = {
+	.clks = clk_creg_early,
+	.clk_num = CREG_CLK_MAX,
+};
+
+static void __init lpc18xx_creg_clk_init(struct device_node *np)
+{
+	const char *clk_32khz_parent;
+	struct regmap *syscon;
+
+	syscon = syscon_node_to_regmap(np->parent);
+	if (IS_ERR(syscon)) {
+		pr_err("%s: syscon lookup failed\n", __func__);
+		return;
+	}
+
+	clk_32khz_parent = of_clk_get_parent_name(np, 0);
+
+	clk_creg_early[CREG_CLK_32KHZ] =
+		clk_register_creg_clk(NULL, &clk_creg_clocks[CREG_CLK_32KHZ],
+				      &clk_32khz_parent, syscon);
+	clk_creg_early[CREG_CLK_1KHZ] = ERR_PTR(-EPROBE_DEFER);
+
+	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_creg_early_data);
+}
+CLK_OF_DECLARE(lpc18xx_creg_clk, "nxp,lpc1850-creg-clk", lpc18xx_creg_clk_init);
+
+static struct clk *clk_creg[CREG_CLK_MAX];
+static struct clk_onecell_data clk_creg_data = {
+	.clks = clk_creg,
+	.clk_num = CREG_CLK_MAX,
+};
+
+static int lpc18xx_creg_clk_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct regmap *syscon;
+
+	syscon = syscon_node_to_regmap(np->parent);
+	if (IS_ERR(syscon)) {
+		dev_err(&pdev->dev, "syscon lookup failed\n");
+		return PTR_ERR(syscon);
+	}
+
+	clk_creg[CREG_CLK_32KHZ] = clk_creg_early[CREG_CLK_32KHZ];
+	clk_creg[CREG_CLK_1KHZ] =
+		clk_register_creg_clk(NULL, &clk_creg_clocks[CREG_CLK_1KHZ],
+				      &clk_creg_clocks[CREG_CLK_32KHZ].name,
+				      syscon);
+
+	return of_clk_add_provider(np, of_clk_src_onecell_get, &clk_creg_data);
+}
+
+static const struct of_device_id lpc18xx_creg_clk_of_match[] = {
+	{ .compatible = "nxp,lpc1850-creg-clk" },
+	{},
+};
+
+static struct platform_driver lpc18xx_creg_clk_driver = {
+	.probe = lpc18xx_creg_clk_probe,
+	.driver = {
+		.name = "lpc18xx-creg-clk",
+		.of_match_table = lpc18xx_creg_clk_of_match,
+	},
+};
+builtin_platform_driver(lpc18xx_creg_clk_driver);
diff --git a/drivers/clk/nxp/clk-lpc32xx.c b/drivers/clk/nxp/clk-lpc32xx.c
index 10dd0fdaa474..481b2646b496 100644
--- a/drivers/clk/nxp/clk-lpc32xx.c
+++ b/drivers/clk/nxp/clk-lpc32xx.c
@@ -87,7 +87,7 @@ enum {
 
 enum {
 	/* Start from the last defined clock in dt bindings */
-	LPC32XX_CLK_ADC_DIV = LPC32XX_CLK_ADC + 1,
+	LPC32XX_CLK_ADC_DIV = LPC32XX_CLK_HCLK_PLL + 1,
 	LPC32XX_CLK_ADC_RTC,
 	LPC32XX_CLK_TEST1,
 	LPC32XX_CLK_TEST2,
@@ -96,7 +96,6 @@ enum {
 	LPC32XX_CLK_OSC,
 	LPC32XX_CLK_SYS,
 	LPC32XX_CLK_PLL397X,
-	LPC32XX_CLK_HCLK_PLL,
 	LPC32XX_CLK_HCLK_DIV_PERIPH,
 	LPC32XX_CLK_HCLK_DIV,
 	LPC32XX_CLK_HCLK,
@@ -589,7 +588,8 @@ static long clk_hclk_pll_round_rate(struct clk_hw *hw, unsigned long rate,
 				    unsigned long *parent_rate)
 {
 	struct lpc32xx_pll_clk *clk = to_lpc32xx_pll_clk(hw);
-	u64 m_i, m, n, p, o = rate, i = *parent_rate, d = (u64)rate << 6;
+	u64 m_i, o = rate, i = *parent_rate, d = (u64)rate << 6;
+	u64 m = 0, n = 0, p = 0;
 	int p_i, n_i;
 
 	pr_debug("%s: %lu/%lu\n", clk_hw_get_name(hw), *parent_rate, rate);
@@ -1429,6 +1429,8 @@ static struct clk * __init lpc32xx_clk_register(u32 id)
 			hw = &clk_hw->hw0.div.hw;
 		else if (clk_hw->type == CLK_GATE)
 			hw = &clk_hw->hw0.gate.hw;
+		else
+			return ERR_PTR(-EINVAL);
 
 		hw->init = &clk_init;
 		clk = clk_register(NULL, hw);
@@ -1515,7 +1517,7 @@ static void __init lpc32xx_clk_init(struct device_node *np)
 		return;
 	}
 
-	for (i = 0; i < LPC32XX_CLK_MAX; i++) {
+	for (i = 1; i < LPC32XX_CLK_MAX; i++) {
 		clk[i] = lpc32xx_clk_register(i);
 		if (IS_ERR(clk[i])) {
 			pr_err("failed to register %s clock: %ld\n",
@@ -1526,9 +1528,6 @@ static void __init lpc32xx_clk_init(struct device_node *np)
 
 	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
 
-	/* For 13MHz osc valid output range of PLL is from 156MHz to 266.5MHz */
-	clk_set_rate(clk[LPC32XX_CLK_HCLK_PLL], 208000000);
-
 	/* Set 48MHz rate of USB PLL clock */
 	clk_set_rate(clk[LPC32XX_CLK_USB_PLL], 48000000);
 
@@ -1555,7 +1554,7 @@ static void __init lpc32xx_usb_clk_init(struct device_node *np)
 		return;
 	}
 
-	for (i = 0; i < LPC32XX_USB_CLK_MAX; i++) {
+	for (i = 1; i < LPC32XX_USB_CLK_MAX; i++) {
 		usb_clk[i] = lpc32xx_clk_register(i + LPC32XX_CLK_USB_OFFSET);
 		if (IS_ERR(usb_clk[i])) {
 			pr_err("failed to register %s clock: %ld\n",
diff --git a/drivers/clk/pxa/clk-pxa25x.c b/drivers/clk/pxa/clk-pxa25x.c
index b7747229db9a..a98b98e2a9e4 100644
--- a/drivers/clk/pxa/clk-pxa25x.c
+++ b/drivers/clk/pxa/clk-pxa25x.c
@@ -84,7 +84,7 @@ unsigned int pxa25x_get_clk_frequency_khz(int info)
 static unsigned long clk_pxa25x_memory_get_rate(struct clk_hw *hw,
 						unsigned long parent_rate)
 {
-	unsigned long cccr = CCCR;
+	unsigned long cccr = readl(CCCR);
 	unsigned int m = M_clk_mult[(cccr >> 5) & 0x03];
 
 	return parent_rate / m;
@@ -99,7 +99,7 @@ PARENTS(pxa25x_osc3) = { "osc_3_6864mhz", "osc_3_6864mhz" };
 #define PXA25X_CKEN(dev_id, con_id, parents, mult, div,			\
 		    bit, is_lp, flags)					\
 	PXA_CKEN(dev_id, con_id, bit, parents, mult, div, mult, div,	\
-		 is_lp,  &CKEN, CKEN_ ## bit, flags)
+		 is_lp,  CKEN, CKEN_ ## bit, flags)
 #define PXA25X_PBUS95_CKEN(dev_id, con_id, bit, mult_hp, div_hp, delay)	\
 	PXA25X_CKEN(dev_id, con_id, pxa25x_pbus95_parents, mult_hp,	\
 		    div_hp, bit, NULL, 0)
@@ -112,10 +112,10 @@ PARENTS(pxa25x_osc3) = { "osc_3_6864mhz", "osc_3_6864mhz" };
 
 #define PXA25X_CKEN_1RATE(dev_id, con_id, bit, parents, delay)		\
 	PXA_CKEN_1RATE(dev_id, con_id, bit, parents,			\
-		       &CKEN, CKEN_ ## bit, 0)
+		       CKEN, CKEN_ ## bit, 0)
 #define PXA25X_CKEN_1RATE_AO(dev_id, con_id, bit, parents, delay)	\
 	PXA_CKEN_1RATE(dev_id, con_id, bit, parents,			\
-		       &CKEN, CKEN_ ## bit, CLK_IGNORE_UNUSED)
+		       CKEN, CKEN_ ## bit, CLK_IGNORE_UNUSED)
 
 static struct desc_clk_cken pxa25x_clocks[] __initdata = {
 	PXA25X_PBUS95_CKEN("pxa2xx-mci.0", NULL, MMC, 1, 5, 0),
@@ -162,7 +162,7 @@ MUX_RO_RATE_RO_OPS(clk_pxa25x_core, "core");
 static unsigned long clk_pxa25x_run_get_rate(struct clk_hw *hw,
 					     unsigned long parent_rate)
 {
-	unsigned long cccr = CCCR;
+	unsigned long cccr = readl(CCCR);
 	unsigned int n2 = N2_clk_mult[(cccr >> 7) & 0x07];
 
 	return (parent_rate / n2) * 2;
@@ -173,7 +173,7 @@ RATE_RO_OPS(clk_pxa25x_run, "run");
 static unsigned long clk_pxa25x_cpll_get_rate(struct clk_hw *hw,
 	unsigned long parent_rate)
 {
-	unsigned long clkcfg, cccr = CCCR;
+	unsigned long clkcfg, cccr = readl(CCCR);
 	unsigned int l, m, n2, t;
 
 	asm("mrc\tp14, 0, %0, c6, c0, 0" : "=r" (clkcfg));
@@ -200,12 +200,10 @@ static void __init pxa25x_register_core(void)
 static void __init pxa25x_register_plls(void)
 {
 	clk_register_fixed_rate(NULL, "osc_3_6864mhz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
-				3686400);
+				CLK_GET_RATE_NOCACHE, 3686400);
 	clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
-				32768);
-	clk_register_fixed_rate(NULL, "clk_dummy", NULL, CLK_IS_ROOT, 0);
+				CLK_GET_RATE_NOCACHE, 32768);
+	clk_register_fixed_rate(NULL, "clk_dummy", NULL, 0, 0);
 	clk_register_fixed_factor(NULL, "ppll_95_85mhz", "osc_3_6864mhz",
 				  0, 26, 1);
 	clk_register_fixed_factor(NULL, "ppll_147_46mhz", "osc_3_6864mhz",
diff --git a/drivers/clk/pxa/clk-pxa27x.c b/drivers/clk/pxa/clk-pxa27x.c
index 5b82d30baf9f..c40b1804f58c 100644
--- a/drivers/clk/pxa/clk-pxa27x.c
+++ b/drivers/clk/pxa/clk-pxa27x.c
@@ -85,7 +85,7 @@ unsigned int pxa27x_get_clk_frequency_khz(int info)
 
 bool pxa27x_is_ppll_disabled(void)
 {
-	unsigned long ccsr = CCSR;
+	unsigned long ccsr = readl(CCSR);
 
 	return ccsr & (1 << CCCR_PPDIS_BIT);
 }
@@ -93,7 +93,7 @@ bool pxa27x_is_ppll_disabled(void)
 #define PXA27X_CKEN(dev_id, con_id, parents, mult_hp, div_hp,		\
 		    bit, is_lp, flags)					\
 	PXA_CKEN(dev_id, con_id, bit, parents, 1, 1, mult_hp, div_hp,	\
-		 is_lp,  &CKEN, CKEN_ ## bit, flags)
+		 is_lp,  CKEN, CKEN_ ## bit, flags)
 #define PXA27X_PBUS_CKEN(dev_id, con_id, bit, mult_hp, div_hp, delay)	\
 	PXA27X_CKEN(dev_id, con_id, pxa27x_pbus_parents, mult_hp,	\
 		    div_hp, bit, pxa27x_is_ppll_disabled, 0)
@@ -106,10 +106,10 @@ PARENTS(pxa27x_membus) = { "lcd_base", "lcd_base" };
 
 #define PXA27X_CKEN_1RATE(dev_id, con_id, bit, parents, delay)		\
 	PXA_CKEN_1RATE(dev_id, con_id, bit, parents,			\
-		       &CKEN, CKEN_ ## bit, 0)
+		       CKEN, CKEN_ ## bit, 0)
 #define PXA27X_CKEN_1RATE_AO(dev_id, con_id, bit, parents, delay)	\
 	PXA_CKEN_1RATE(dev_id, con_id, bit, parents,			\
-		       &CKEN, CKEN_ ## bit, CLK_IGNORE_UNUSED)
+		       CKEN, CKEN_ ## bit, CLK_IGNORE_UNUSED)
 
 static struct desc_clk_cken pxa27x_clocks[] __initdata = {
 	PXA27X_PBUS_CKEN("pxa2xx-uart.0", NULL, FFUART, 2, 42, 1),
@@ -151,7 +151,7 @@ static unsigned long clk_pxa27x_cpll_get_rate(struct clk_hw *hw,
 	unsigned long clkcfg;
 	unsigned int t, ht;
 	unsigned int l, L, n2, N;
-	unsigned long ccsr = CCSR;
+	unsigned long ccsr = readl(CCSR);
 
 	asm("mrc\tp14, 0, %0, c6, c0, 0" : "=r" (clkcfg));
 	t  = clkcfg & (1 << 0);
@@ -171,8 +171,8 @@ static unsigned long clk_pxa27x_lcd_base_get_rate(struct clk_hw *hw,
 						  unsigned long parent_rate)
 {
 	unsigned int l, osc_forced;
-	unsigned long ccsr = CCSR;
-	unsigned long cccr = CCCR;
+	unsigned long ccsr = readl(CCSR);
+	unsigned long cccr = readl(CCCR);
 
 	l  = ccsr & CCSR_L_MASK;
 	osc_forced = ccsr & (1 << CCCR_CPDIS_BIT);
@@ -193,7 +193,7 @@ static unsigned long clk_pxa27x_lcd_base_get_rate(struct clk_hw *hw,
 static u8 clk_pxa27x_lcd_base_get_parent(struct clk_hw *hw)
 {
 	unsigned int osc_forced;
-	unsigned long ccsr = CCSR;
+	unsigned long ccsr = readl(CCSR);
 
 	osc_forced = ccsr & (1 << CCCR_CPDIS_BIT);
 	if (osc_forced)
@@ -208,12 +208,12 @@ MUX_RO_RATE_RO_OPS(clk_pxa27x_lcd_base, "lcd_base");
 static void __init pxa27x_register_plls(void)
 {
 	clk_register_fixed_rate(NULL, "osc_13mhz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				CLK_GET_RATE_NOCACHE,
 				13 * MHz);
 	clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				CLK_GET_RATE_NOCACHE,
 				32768 * KHz);
-	clk_register_fixed_rate(NULL, "clk_dummy", NULL, CLK_IS_ROOT, 0);
+	clk_register_fixed_rate(NULL, "clk_dummy", NULL, 0, 0);
 	clk_register_fixed_factor(NULL, "ppll_312mhz", "osc_13mhz", 0, 24, 1);
 }
 
@@ -222,7 +222,7 @@ static unsigned long clk_pxa27x_core_get_rate(struct clk_hw *hw,
 {
 	unsigned long clkcfg;
 	unsigned int t, ht, b, osc_forced;
-	unsigned long ccsr = CCSR;
+	unsigned long ccsr = readl(CCSR);
 
 	osc_forced = ccsr & (1 << CCCR_CPDIS_BIT);
 	asm("mrc\tp14, 0, %0, c6, c0, 0" : "=r" (clkcfg));
@@ -242,7 +242,7 @@ static u8 clk_pxa27x_core_get_parent(struct clk_hw *hw)
 {
 	unsigned long clkcfg;
 	unsigned int t, ht, b, osc_forced;
-	unsigned long ccsr = CCSR;
+	unsigned long ccsr = readl(CCSR);
 
 	osc_forced = ccsr & (1 << CCCR_CPDIS_BIT);
 	if (osc_forced)
@@ -263,7 +263,7 @@ MUX_RO_RATE_RO_OPS(clk_pxa27x_core, "core");
 static unsigned long clk_pxa27x_run_get_rate(struct clk_hw *hw,
 					     unsigned long parent_rate)
 {
-	unsigned long ccsr = CCSR;
+	unsigned long ccsr = readl(CCSR);
 	unsigned int n2 = (ccsr & CCSR_N2_MASK) >> CCSR_N2_SHIFT;
 
 	return (parent_rate / n2) * 2;
@@ -285,7 +285,7 @@ static unsigned long clk_pxa27x_system_bus_get_rate(struct clk_hw *hw,
 {
 	unsigned long clkcfg;
 	unsigned int b, osc_forced;
-	unsigned long ccsr = CCSR;
+	unsigned long ccsr = readl(CCSR);
 
 	osc_forced = ccsr & (1 << CCCR_CPDIS_BIT);
 	asm("mrc\tp14, 0, %0, c6, c0, 0" : "=r" (clkcfg));
@@ -302,7 +302,7 @@ static unsigned long clk_pxa27x_system_bus_get_rate(struct clk_hw *hw,
 static u8 clk_pxa27x_system_bus_get_parent(struct clk_hw *hw)
 {
 	unsigned int osc_forced;
-	unsigned long ccsr = CCSR;
+	unsigned long ccsr = readl(CCSR);
 
 	osc_forced = ccsr & (1 << CCCR_CPDIS_BIT);
 	if (osc_forced)
@@ -318,8 +318,8 @@ static unsigned long clk_pxa27x_memory_get_rate(struct clk_hw *hw,
 						unsigned long parent_rate)
 {
 	unsigned int a, l, osc_forced;
-	unsigned long cccr = CCCR;
-	unsigned long ccsr = CCSR;
+	unsigned long cccr = readl(CCCR);
+	unsigned long ccsr = readl(CCSR);
 
 	osc_forced = ccsr & (1 << CCCR_CPDIS_BIT);
 	a = cccr & (1 << CCCR_A_BIT);
@@ -337,8 +337,8 @@ static unsigned long clk_pxa27x_memory_get_rate(struct clk_hw *hw,
 static u8 clk_pxa27x_memory_get_parent(struct clk_hw *hw)
 {
 	unsigned int osc_forced, a;
-	unsigned long cccr = CCCR;
-	unsigned long ccsr = CCSR;
+	unsigned long cccr = readl(CCCR);
+	unsigned long ccsr = readl(CCSR);
 
 	osc_forced = ccsr & (1 << CCCR_CPDIS_BIT);
 	a = cccr & (1 << CCCR_A_BIT);
diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c
index 4af4eed5f89f..42bdaa772be0 100644
--- a/drivers/clk/pxa/clk-pxa3xx.c
+++ b/drivers/clk/pxa/clk-pxa3xx.c
@@ -284,15 +284,15 @@ static void __init pxa3xx_register_core(void)
 static void __init pxa3xx_register_plls(void)
 {
 	clk_register_fixed_rate(NULL, "osc_13mhz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				CLK_GET_RATE_NOCACHE,
 				13 * MHz);
 	clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				CLK_GET_RATE_NOCACHE,
 				32768);
 	clk_register_fixed_rate(NULL, "ring_osc_120mhz", NULL,
-				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				CLK_GET_RATE_NOCACHE,
 				120 * MHz);
-	clk_register_fixed_rate(NULL, "clk_dummy", NULL, CLK_IS_ROOT, 0);
+	clk_register_fixed_rate(NULL, "clk_dummy", NULL, 0, 0);
 	clk_register_fixed_factor(NULL, "spll_624mhz", "osc_13mhz", 0, 48, 1);
 	clk_register_fixed_factor(NULL, "ring_osc_60mhz", "ring_osc_120mhz",
 				  0, 1, 2);
@@ -334,8 +334,7 @@ static void __init pxa3xx_base_clocks_init(void)
 	clk_register_clk_pxa3xx_system_bus();
 	clk_register_clk_pxa3xx_ac97();
 	clk_register_clk_pxa3xx_smemc();
-	clk_register_gate(NULL, "CLK_POUT", "osc_13mhz", 0,
-			  (void __iomem *)&OSCC, 11, 0, NULL);
+	clk_register_gate(NULL, "CLK_POUT", "osc_13mhz", 0, OSCC, 11, 0, NULL);
 	clkdev_pxa_register(CLK_OSTIMER, "OSTIMER0", NULL,
 			    clk_register_fixed_factor(NULL, "os-timer0",
 						      "osc_13mhz", 0, 1, 4));
diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index b552eceec2be..95e3b3e0fa1c 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -28,6 +28,14 @@ config APQ_MMCC_8084
 	  Say Y if you want to support multimedia devices such as display,
 	  graphics, video encode/decode, camera, etc.
 
+config IPQ_GCC_4019
+	tristate "IPQ4019 Global Clock Controller"
+	depends on COMMON_CLK_QCOM
+	help
+	  Support for the global clock controller on ipq4019 devices.
+	  Say Y if you want to use peripheral devices such as UART, SPI,
+	  i2c, USB, SD/eMMC, etc.
+
 config IPQ_GCC_806X
 	tristate "IPQ806x Global Clock Controller"
 	depends on COMMON_CLK_QCOM
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index dc4280b85db1..2a25f4e75f49 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -14,6 +14,7 @@ clk-qcom-$(CONFIG_QCOM_GDSC) += gdsc.o
 
 obj-$(CONFIG_APQ_GCC_8084) += gcc-apq8084.o
 obj-$(CONFIG_APQ_MMCC_8084) += mmcc-apq8084.o
+obj-$(CONFIG_IPQ_GCC_4019) += gcc-ipq4019.o
 obj-$(CONFIG_IPQ_GCC_806X) += gcc-ipq806x.o
 obj-$(CONFIG_IPQ_LCC_806X) += lcc-ipq806x.o
 obj-$(CONFIG_MSM_GCC_8660) += gcc-msm8660.o
diff --git a/drivers/clk/qcom/clk-rcg.c b/drivers/clk/qcom/clk-rcg.c
index bfbb28f450c2..67ce7c146a6a 100644
--- a/drivers/clk/qcom/clk-rcg.c
+++ b/drivers/clk/qcom/clk-rcg.c
@@ -638,7 +638,6 @@ static int clk_rcg_pixel_set_rate(struct clk_hw *hw, unsigned long rate,
 		return ret;
 
 	src = ns_to_src(&rcg->s, ns);
-	f.pre_div = ns_to_pre_div(&rcg->p, ns) + 1;
 
 	for (i = 0; i < num_parents; i++) {
 		if (src == rcg->s.parent_map[i].cfg) {
@@ -647,6 +646,9 @@ static int clk_rcg_pixel_set_rate(struct clk_hw *hw, unsigned long rate,
 		}
 	}
 
+	/* bypass the pre divider */
+	f.pre_div = 1;
+
 	/* let us find appropriate m/n values for this */
 	for (; frac->num; frac++) {
 		request = (rate * frac->den) / frac->num;
diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c
index c112ebaba70d..f7c226ab4307 100644
--- a/drivers/clk/qcom/common.c
+++ b/drivers/clk/qcom/common.c
@@ -119,7 +119,6 @@ static int _qcom_cc_register_board_clk(struct device *dev, const char *path,
 		fixed->hw.init = &init_data;
 
 		init_data.name = path;
-		init_data.flags = CLK_IS_ROOT;
 		init_data.ops = &clk_fixed_rate_ops;
 
 		clk = devm_clk_register(dev, &fixed->hw);
@@ -185,6 +184,7 @@ int qcom_cc_really_probe(struct platform_device *pdev,
 	struct clk **clks;
 	struct qcom_reset_controller *reset;
 	struct qcom_cc *cc;
+	struct gdsc_desc *scd;
 	size_t num_clks = desc->num_clks;
 	struct clk_regmap **rclks = desc->clks;
 
@@ -213,7 +213,11 @@ int qcom_cc_really_probe(struct platform_device *pdev,
 	if (ret)
 		return ret;
 
-	devm_add_action(dev, qcom_cc_del_clk_provider, pdev->dev.of_node);
+	ret = devm_add_action_or_reset(dev, qcom_cc_del_clk_provider,
+				       pdev->dev.of_node);
+
+	if (ret)
+		return ret;
 
 	reset = &cc->reset;
 	reset->rcdev.of_node = dev->of_node;
@@ -227,18 +231,28 @@ int qcom_cc_really_probe(struct platform_device *pdev,
 	if (ret)
 		return ret;
 
-	devm_add_action(dev, qcom_cc_reset_unregister, &reset->rcdev);
+	ret = devm_add_action_or_reset(dev, qcom_cc_reset_unregister,
+				       &reset->rcdev);
+
+	if (ret)
+		return ret;
 
 	if (desc->gdscs && desc->num_gdscs) {
-		ret = gdsc_register(dev, desc->gdscs, desc->num_gdscs,
-				    &reset->rcdev, regmap);
+		scd = devm_kzalloc(dev, sizeof(*scd), GFP_KERNEL);
+		if (!scd)
+			return -ENOMEM;
+		scd->dev = dev;
+		scd->scs = desc->gdscs;
+		scd->num = desc->num_gdscs;
+		ret = gdsc_register(scd, &reset->rcdev, regmap);
+		if (ret)
+			return ret;
+		ret = devm_add_action_or_reset(dev, qcom_cc_gdsc_unregister,
+					       scd);
 		if (ret)
 			return ret;
 	}
 
-	devm_add_action(dev, qcom_cc_gdsc_unregister, dev);
-
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(qcom_cc_really_probe);
diff --git a/drivers/clk/qcom/gcc-ipq4019.c b/drivers/clk/qcom/gcc-ipq4019.c
new file mode 100644
index 000000000000..5428efb9fbf5
--- /dev/null
+++ b/drivers/clk/qcom/gcc-ipq4019.c
@@ -0,0 +1,1354 @@
+/*
+ * Copyright (c) 2015 The Linux Foundation. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/clk-provider.h>
+#include <linux/regmap.h>
+#include <linux/reset-controller.h>
+
+#include <dt-bindings/clock/qcom,gcc-ipq4019.h>
+
+#include "common.h"
+#include "clk-regmap.h"
+#include "clk-rcg.h"
+#include "clk-branch.h"
+#include "reset.h"
+
+enum {
+	P_XO,
+	P_FEPLL200,
+	P_FEPLL500,
+	P_DDRPLL,
+	P_FEPLLWCSS2G,
+	P_FEPLLWCSS5G,
+	P_FEPLL125DLY,
+	P_DDRPLLAPSS,
+};
+
+static struct parent_map gcc_xo_200_500_map[] = {
+	{ P_XO, 0 },
+	{ P_FEPLL200, 1 },
+	{ P_FEPLL500, 2 },
+};
+
+static const char * const gcc_xo_200_500[] = {
+	"xo",
+	"fepll200",
+	"fepll500",
+};
+
+static struct parent_map gcc_xo_200_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLL200, 1 },
+};
+
+static const char * const gcc_xo_200[] = {
+	"xo",
+	"fepll200",
+};
+
+static struct parent_map gcc_xo_200_spi_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLL200, 2 },
+};
+
+static const char * const gcc_xo_200_spi[] = {
+	"xo",
+	"fepll200",
+};
+
+static struct parent_map gcc_xo_sdcc1_500_map[] = {
+	{  P_XO, 0 },
+	{  P_DDRPLL, 1 },
+	{  P_FEPLL500, 2 },
+};
+
+static const char * const gcc_xo_sdcc1_500[] = {
+	"xo",
+	"ddrpll",
+	"fepll500",
+};
+
+static struct parent_map gcc_xo_wcss2g_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLLWCSS2G, 1 },
+};
+
+static const char * const gcc_xo_wcss2g[] = {
+	"xo",
+	"fepllwcss2g",
+};
+
+static struct parent_map gcc_xo_wcss5g_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLLWCSS5G, 1 },
+};
+
+static const char * const gcc_xo_wcss5g[] = {
+	"xo",
+	"fepllwcss5g",
+};
+
+static struct parent_map gcc_xo_125_dly_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLL125DLY, 1 },
+};
+
+static const char * const gcc_xo_125_dly[] = {
+	"xo",
+	"fepll125dly",
+};
+
+static struct parent_map gcc_xo_ddr_500_200_map[] = {
+	{  P_XO, 0 },
+	{  P_FEPLL200, 3 },
+	{  P_FEPLL500, 2 },
+	{  P_DDRPLLAPSS, 1 },
+};
+
+static const char * const gcc_xo_ddr_500_200[] = {
+	"xo",
+	"fepll200",
+	"fepll500",
+	"ddrpllapss",
+};
+
+#define F(f, s, h, m, n) { (f), (s), (2 * (h) - 1), (m), (n) }
+#define P_XO 0
+#define FE_PLL_200 1
+#define FE_PLL_500 2
+#define DDRC_PLL_666  3
+
+#define DDRC_PLL_666_SDCC  1
+#define FE_PLL_125_DLY 1
+
+#define FE_PLL_WCSS2G 1
+#define FE_PLL_WCSS5G 1
+
+static const struct freq_tbl ftbl_gcc_audio_pwm_clk[] = {
+	F(48000000, P_XO, 1, 0, 0),
+	F(200000000, FE_PLL_200, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 audio_clk_src = {
+	.cmd_rcgr = 0x1b000,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_map,
+	.freq_tbl = ftbl_gcc_audio_pwm_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "audio_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+
+	},
+};
+
+static struct clk_branch gcc_audio_ahb_clk = {
+	.halt_reg = 0x1b010,
+	.clkr = {
+		.enable_reg = 0x1b010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_audio_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.flags = CLK_SET_RATE_PARENT,
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_audio_pwm_clk = {
+	.halt_reg = 0x1b00C,
+	.clkr = {
+		.enable_reg = 0x1b00C,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_audio_pwm_clk",
+			.parent_names = (const char *[]){
+				"audio_clk_src",
+			},
+			.flags = CLK_SET_RATE_PARENT,
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_blsp1_qup1_2_i2c_apps_clk[] = {
+	F(19200000, P_XO, 1, 2, 5),
+	F(24000000, P_XO, 1, 1, 2),
+	{ }
+};
+
+static struct clk_rcg2 blsp1_qup1_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x200c,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_map,
+	.freq_tbl = ftbl_gcc_blsp1_qup1_2_i2c_apps_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_qup1_i2c_apps_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_qup1_i2c_apps_clk = {
+	.halt_reg = 0x2008,
+	.clkr = {
+		.enable_reg = 0x2008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_qup1_i2c_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_qup1_i2c_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg2 blsp1_qup2_i2c_apps_clk_src = {
+	.cmd_rcgr = 0x3000,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_map,
+	.freq_tbl = ftbl_gcc_blsp1_qup1_2_i2c_apps_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_qup2_i2c_apps_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_qup2_i2c_apps_clk = {
+	.halt_reg = 0x3010,
+	.clkr = {
+		.enable_reg = 0x3010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_qup2_i2c_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_qup2_i2c_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_blsp1_qup1_2_spi_apps_clk[] = {
+	F(960000, P_XO, 12, 1, 4),
+	F(4800000, P_XO, 1, 1, 10),
+	F(9600000, P_XO, 1, 1, 5),
+	F(15000000, P_XO, 1, 1, 3),
+	F(19200000, P_XO, 1, 2, 5),
+	F(24000000, P_XO, 1, 1, 2),
+	F(48000000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 blsp1_qup1_spi_apps_clk_src = {
+	.cmd_rcgr = 0x2024,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_spi_map,
+	.freq_tbl = ftbl_gcc_blsp1_qup1_2_spi_apps_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_qup1_spi_apps_clk_src",
+		.parent_names = gcc_xo_200_spi,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_qup1_spi_apps_clk = {
+	.halt_reg = 0x2004,
+	.clkr = {
+		.enable_reg = 0x2004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_qup1_spi_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_qup1_spi_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg2 blsp1_qup2_spi_apps_clk_src = {
+	.cmd_rcgr = 0x3014,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_blsp1_qup1_2_spi_apps_clk,
+	.parent_map = gcc_xo_200_spi_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_qup2_spi_apps_clk_src",
+		.parent_names = gcc_xo_200_spi,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_qup2_spi_apps_clk = {
+	.halt_reg = 0x300c,
+	.clkr = {
+		.enable_reg = 0x300c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_qup2_spi_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_qup2_spi_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_blsp1_uart1_2_apps_clk[] = {
+	F(1843200, FE_PLL_200, 1, 144, 15625),
+	F(3686400, FE_PLL_200, 1, 288, 15625),
+	F(7372800, FE_PLL_200, 1, 576, 15625),
+	F(14745600, FE_PLL_200, 1, 1152, 15625),
+	F(16000000, FE_PLL_200, 1, 2, 25),
+	F(24000000, P_XO, 1, 1, 2),
+	F(32000000, FE_PLL_200, 1, 4, 25),
+	F(40000000, FE_PLL_200, 1, 1, 5),
+	F(46400000, FE_PLL_200, 1, 29, 125),
+	F(48000000, P_XO, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 blsp1_uart1_apps_clk_src = {
+	.cmd_rcgr = 0x2044,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_blsp1_uart1_2_apps_clk,
+	.parent_map = gcc_xo_200_spi_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_uart1_apps_clk_src",
+		.parent_names = gcc_xo_200_spi,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_uart1_apps_clk = {
+	.halt_reg = 0x203c,
+	.clkr = {
+		.enable_reg = 0x203c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_uart1_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_uart1_apps_clk_src",
+			},
+			.flags = CLK_SET_RATE_PARENT,
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_rcg2 blsp1_uart2_apps_clk_src = {
+	.cmd_rcgr = 0x3034,
+	.mnd_width = 16,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_blsp1_uart1_2_apps_clk,
+	.parent_map = gcc_xo_200_spi_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "blsp1_uart2_apps_clk_src",
+		.parent_names = gcc_xo_200_spi,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_blsp1_uart2_apps_clk = {
+	.halt_reg = 0x302c,
+	.clkr = {
+		.enable_reg = 0x302c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_uart2_apps_clk",
+			.parent_names = (const char *[]){
+				"blsp1_uart2_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_gp_clk[] = {
+	F(1250000,  FE_PLL_200, 1, 16, 0),
+	F(2500000,  FE_PLL_200, 1,  8, 0),
+	F(5000000,  FE_PLL_200, 1,  4, 0),
+	{ }
+};
+
+static struct clk_rcg2 gp1_clk_src = {
+	.cmd_rcgr = 0x8004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_gp_clk,
+	.parent_map = gcc_xo_200_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gp1_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_gp1_clk = {
+	.halt_reg = 0x8000,
+	.clkr = {
+		.enable_reg = 0x8000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp1_clk",
+			.parent_names = (const char *[]){
+				"gp1_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg2 gp2_clk_src = {
+	.cmd_rcgr = 0x9004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_gp_clk,
+	.parent_map = gcc_xo_200_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gp2_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_gp2_clk = {
+	.halt_reg = 0x9000,
+	.clkr = {
+		.enable_reg = 0x9000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp2_clk",
+			.parent_names = (const char *[]){
+				"gp2_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg2 gp3_clk_src = {
+	.cmd_rcgr = 0xa004,
+	.mnd_width = 8,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_gp_clk,
+	.parent_map = gcc_xo_200_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "gp3_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_gp3_clk = {
+	.halt_reg = 0xa000,
+	.clkr = {
+		.enable_reg = 0xa000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_gp3_clk",
+			.parent_names = (const char *[]){
+				"gp3_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_sdcc1_apps_clk[] = {
+	F(144000,    P_XO,			1,  3, 240),
+	F(400000,    P_XO,			1,  1, 0),
+	F(20000000,  FE_PLL_500,		1,  1, 25),
+	F(25000000,  FE_PLL_500,		1,  1, 20),
+	F(50000000,  FE_PLL_500,		1,  1, 10),
+	F(100000000, FE_PLL_500,		1,  1, 5),
+	F(193000000, DDRC_PLL_666_SDCC,		1,  0, 0),
+	{ }
+};
+
+static struct clk_rcg2  sdcc1_apps_clk_src = {
+	.cmd_rcgr = 0x18004,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_sdcc1_apps_clk,
+	.parent_map = gcc_xo_sdcc1_500_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "sdcc1_apps_clk_src",
+		.parent_names = gcc_xo_sdcc1_500,
+		.num_parents = 3,
+		.ops = &clk_rcg2_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_apps_clk[] = {
+	F(48000000, P_XO,	   1, 0, 0),
+	F(200000000, FE_PLL_200,   1, 0, 0),
+	F(500000000, FE_PLL_500,   1, 0, 0),
+	F(626000000, DDRC_PLL_666, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 apps_clk_src = {
+	.cmd_rcgr = 0x1900c,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_apps_clk,
+	.parent_map = gcc_xo_ddr_500_200_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "apps_clk_src",
+		.parent_names = gcc_xo_ddr_500_200,
+		.num_parents = 4,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_apps_ahb_clk[] = {
+	F(48000000, P_XO,	   1, 0, 0),
+	F(100000000, FE_PLL_200,   2, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 apps_ahb_clk_src = {
+	.cmd_rcgr = 0x19014,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_500_map,
+	.freq_tbl = ftbl_gcc_apps_ahb_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "apps_ahb_clk_src",
+		.parent_names = gcc_xo_200_500,
+		.num_parents = 3,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_apss_ahb_clk = {
+	.halt_reg = 0x19004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(14),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_apss_ahb_clk",
+			.parent_names = (const char *[]){
+				"apps_ahb_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_blsp1_ahb_clk = {
+	.halt_reg = 0x1008,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_blsp1_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_dcd_xo_clk = {
+	.halt_reg = 0x2103c,
+	.clkr = {
+		.enable_reg = 0x2103c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_dcd_xo_clk",
+			.parent_names = (const char *[]){
+				"xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_boot_rom_ahb_clk = {
+	.halt_reg = 0x1300c,
+	.clkr = {
+		.enable_reg = 0x1300c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_boot_rom_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_crypto_ahb_clk = {
+	.halt_reg = 0x16024,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_crypto_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_crypto_axi_clk = {
+	.halt_reg = 0x16020,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(1),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_crypto_axi_clk",
+			.parent_names = (const char *[]){
+				"fepll125",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_crypto_clk = {
+	.halt_reg = 0x1601c,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(2),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_crypto_clk",
+			.parent_names = (const char *[]){
+				"fepll125",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_ess_clk = {
+	.halt_reg = 0x12010,
+	.clkr = {
+		.enable_reg = 0x12010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_ess_clk",
+			.parent_names = (const char *[]){
+				"fephy_125m_dly_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_imem_axi_clk = {
+	.halt_reg = 0xe004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(17),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_imem_axi_clk",
+			.parent_names = (const char *[]){
+				"fepll200",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_imem_cfg_ahb_clk = {
+	.halt_reg = 0xe008,
+	.clkr = {
+		.enable_reg = 0xe008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_imem_cfg_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pcie_ahb_clk = {
+	.halt_reg = 0x1d00c,
+	.clkr = {
+		.enable_reg = 0x1d00c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pcie_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pcie_axi_m_clk = {
+	.halt_reg = 0x1d004,
+	.clkr = {
+		.enable_reg = 0x1d004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pcie_axi_m_clk",
+			.parent_names = (const char *[]){
+				"fepll200",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_pcie_axi_s_clk = {
+	.halt_reg = 0x1d008,
+	.clkr = {
+		.enable_reg = 0x1d008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_pcie_axi_s_clk",
+			.parent_names = (const char *[]){
+				"fepll200",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_prng_ahb_clk = {
+	.halt_reg = 0x13004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(8),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_prng_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qpic_ahb_clk = {
+	.halt_reg = 0x1c008,
+	.clkr = {
+		.enable_reg = 0x1c008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qpic_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_qpic_clk = {
+	.halt_reg = 0x1c004,
+	.clkr = {
+		.enable_reg = 0x1c004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_qpic_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_ahb_clk = {
+	.halt_reg = 0x18010,
+	.clkr = {
+		.enable_reg = 0x18010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_sdcc1_apps_clk = {
+	.halt_reg = 0x1800c,
+	.clkr = {
+		.enable_reg = 0x1800c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_sdcc1_apps_clk",
+			.parent_names = (const char *[]){
+				"sdcc1_apps_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_tlmm_ahb_clk = {
+	.halt_reg = 0x5004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.clkr = {
+		.enable_reg = 0x6000,
+		.enable_mask = BIT(5),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_tlmm_ahb_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb2_master_clk = {
+	.halt_reg = 0x1e00c,
+	.clkr = {
+		.enable_reg = 0x1e00c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb2_master_clk",
+			.parent_names = (const char *[]){
+				"pcnoc_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb2_sleep_clk = {
+	.halt_reg = 0x1e010,
+	.clkr = {
+		.enable_reg = 0x1e010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb2_sleep_clk",
+			.parent_names = (const char *[]){
+				"gcc_sleep_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb2_mock_utmi_clk = {
+	.halt_reg = 0x1e014,
+	.clkr = {
+		.enable_reg = 0x1e014,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb2_mock_utmi_clk",
+			.parent_names = (const char *[]){
+				"usb30_mock_utmi_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_usb30_mock_utmi_clk[] = {
+	F(2000000, FE_PLL_200, 10, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 usb30_mock_utmi_clk_src = {
+	.cmd_rcgr = 0x1e000,
+	.hid_width = 5,
+	.parent_map = gcc_xo_200_map,
+	.freq_tbl = ftbl_gcc_usb30_mock_utmi_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "usb30_mock_utmi_clk_src",
+		.parent_names = gcc_xo_200,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_usb3_master_clk = {
+	.halt_reg = 0x1e028,
+	.clkr = {
+		.enable_reg = 0x1e028,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_master_clk",
+			.parent_names = (const char *[]){
+				"fepll125",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_sleep_clk = {
+	.halt_reg = 0x1e02C,
+	.clkr = {
+		.enable_reg = 0x1e02C,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_sleep_clk",
+			.parent_names = (const char *[]){
+				"gcc_sleep_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_usb3_mock_utmi_clk = {
+	.halt_reg = 0x1e030,
+	.clkr = {
+		.enable_reg = 0x1e030,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_usb3_mock_utmi_clk",
+			.parent_names = (const char *[]){
+				"usb30_mock_utmi_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_fephy_dly_clk[] = {
+	F(125000000, FE_PLL_125_DLY, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 fephy_125m_dly_clk_src = {
+	.cmd_rcgr = 0x12000,
+	.hid_width = 5,
+	.parent_map = gcc_xo_125_dly_map,
+	.freq_tbl = ftbl_gcc_fephy_dly_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "fephy_125m_dly_clk_src",
+		.parent_names = gcc_xo_125_dly,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+
+static const struct freq_tbl ftbl_gcc_wcss2g_clk[] = {
+	F(48000000, P_XO, 1, 0, 0),
+	F(250000000, FE_PLL_WCSS2G, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 wcss2g_clk_src = {
+	.cmd_rcgr = 0x1f000,
+	.hid_width = 5,
+	.freq_tbl = ftbl_gcc_wcss2g_clk,
+	.parent_map = gcc_xo_wcss2g_map,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "wcss2g_clk_src",
+		.parent_names = gcc_xo_wcss2g,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_branch gcc_wcss2g_clk = {
+	.halt_reg = 0x1f00C,
+	.clkr = {
+		.enable_reg = 0x1f00C,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss2g_clk",
+			.parent_names = (const char *[]){
+				"wcss2g_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_wcss2g_ref_clk = {
+	.halt_reg = 0x1f00C,
+	.clkr = {
+		.enable_reg = 0x1f00C,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss2g_ref_clk",
+			.parent_names = (const char *[]){
+				"xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_wcss2g_rtc_clk = {
+	.halt_reg = 0x1f010,
+	.clkr = {
+		.enable_reg = 0x1f010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss2g_rtc_clk",
+			.parent_names = (const char *[]){
+				"gcc_sleep_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static const struct freq_tbl ftbl_gcc_wcss5g_clk[] = {
+	F(48000000, P_XO, 1, 0, 0),
+	F(250000000, FE_PLL_WCSS5G, 1, 0, 0),
+	{ }
+};
+
+static struct clk_rcg2 wcss5g_clk_src = {
+	.cmd_rcgr = 0x20000,
+	.hid_width = 5,
+	.parent_map = gcc_xo_wcss5g_map,
+	.freq_tbl = ftbl_gcc_wcss5g_clk,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "wcss5g_clk_src",
+		.parent_names = gcc_xo_wcss5g,
+		.num_parents = 2,
+		.ops = &clk_rcg2_ops,
+	},
+};
+
+static struct clk_branch gcc_wcss5g_clk = {
+	.halt_reg = 0x2000c,
+	.clkr = {
+		.enable_reg = 0x2000c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss5g_clk",
+			.parent_names = (const char *[]){
+				"wcss5g_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_wcss5g_ref_clk = {
+	.halt_reg = 0x2000c,
+	.clkr = {
+		.enable_reg = 0x2000c,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss5g_ref_clk",
+			.parent_names = (const char *[]){
+				"xo",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch gcc_wcss5g_rtc_clk = {
+	.halt_reg = 0x20010,
+	.clkr = {
+		.enable_reg = 0x20010,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wcss5g_rtc_clk",
+			.parent_names = (const char *[]){
+				"gcc_sleep_clk_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch2_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_regmap *gcc_ipq4019_clocks[] = {
+	[AUDIO_CLK_SRC] = &audio_clk_src.clkr,
+	[BLSP1_QUP1_I2C_APPS_CLK_SRC] = &blsp1_qup1_i2c_apps_clk_src.clkr,
+	[BLSP1_QUP1_SPI_APPS_CLK_SRC] = &blsp1_qup1_spi_apps_clk_src.clkr,
+	[BLSP1_QUP2_I2C_APPS_CLK_SRC] = &blsp1_qup2_i2c_apps_clk_src.clkr,
+	[BLSP1_QUP2_SPI_APPS_CLK_SRC] = &blsp1_qup2_spi_apps_clk_src.clkr,
+	[BLSP1_UART1_APPS_CLK_SRC] = &blsp1_uart1_apps_clk_src.clkr,
+	[BLSP1_UART2_APPS_CLK_SRC] = &blsp1_uart2_apps_clk_src.clkr,
+	[GCC_USB3_MOCK_UTMI_CLK_SRC] = &usb30_mock_utmi_clk_src.clkr,
+	[GCC_APPS_CLK_SRC] = &apps_clk_src.clkr,
+	[GCC_APPS_AHB_CLK_SRC] = &apps_ahb_clk_src.clkr,
+	[GP1_CLK_SRC] = &gp1_clk_src.clkr,
+	[GP2_CLK_SRC] = &gp2_clk_src.clkr,
+	[GP3_CLK_SRC] = &gp3_clk_src.clkr,
+	[SDCC1_APPS_CLK_SRC] = &sdcc1_apps_clk_src.clkr,
+	[FEPHY_125M_DLY_CLK_SRC] = &fephy_125m_dly_clk_src.clkr,
+	[WCSS2G_CLK_SRC] = &wcss2g_clk_src.clkr,
+	[WCSS5G_CLK_SRC] = &wcss5g_clk_src.clkr,
+	[GCC_APSS_AHB_CLK] = &gcc_apss_ahb_clk.clkr,
+	[GCC_AUDIO_AHB_CLK] = &gcc_audio_ahb_clk.clkr,
+	[GCC_AUDIO_PWM_CLK] = &gcc_audio_pwm_clk.clkr,
+	[GCC_BLSP1_AHB_CLK] = &gcc_blsp1_ahb_clk.clkr,
+	[GCC_BLSP1_QUP1_I2C_APPS_CLK] = &gcc_blsp1_qup1_i2c_apps_clk.clkr,
+	[GCC_BLSP1_QUP1_SPI_APPS_CLK] = &gcc_blsp1_qup1_spi_apps_clk.clkr,
+	[GCC_BLSP1_QUP2_I2C_APPS_CLK] = &gcc_blsp1_qup2_i2c_apps_clk.clkr,
+	[GCC_BLSP1_QUP2_SPI_APPS_CLK] = &gcc_blsp1_qup2_spi_apps_clk.clkr,
+	[GCC_BLSP1_UART1_APPS_CLK] = &gcc_blsp1_uart1_apps_clk.clkr,
+	[GCC_BLSP1_UART2_APPS_CLK] = &gcc_blsp1_uart2_apps_clk.clkr,
+	[GCC_DCD_XO_CLK] = &gcc_dcd_xo_clk.clkr,
+	[GCC_GP1_CLK] = &gcc_gp1_clk.clkr,
+	[GCC_GP2_CLK] = &gcc_gp2_clk.clkr,
+	[GCC_GP3_CLK] = &gcc_gp3_clk.clkr,
+	[GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr,
+	[GCC_CRYPTO_AHB_CLK] = &gcc_crypto_ahb_clk.clkr,
+	[GCC_CRYPTO_AXI_CLK] = &gcc_crypto_axi_clk.clkr,
+	[GCC_CRYPTO_CLK] = &gcc_crypto_clk.clkr,
+	[GCC_ESS_CLK] = &gcc_ess_clk.clkr,
+	[GCC_IMEM_AXI_CLK] = &gcc_imem_axi_clk.clkr,
+	[GCC_IMEM_CFG_AHB_CLK] = &gcc_imem_cfg_ahb_clk.clkr,
+	[GCC_PCIE_AHB_CLK] = &gcc_pcie_ahb_clk.clkr,
+	[GCC_PCIE_AXI_M_CLK] = &gcc_pcie_axi_m_clk.clkr,
+	[GCC_PCIE_AXI_S_CLK] = &gcc_pcie_axi_s_clk.clkr,
+	[GCC_PRNG_AHB_CLK] = &gcc_prng_ahb_clk.clkr,
+	[GCC_QPIC_AHB_CLK] = &gcc_qpic_ahb_clk.clkr,
+	[GCC_QPIC_CLK] = &gcc_qpic_clk.clkr,
+	[GCC_SDCC1_AHB_CLK] = &gcc_sdcc1_ahb_clk.clkr,
+	[GCC_SDCC1_APPS_CLK] = &gcc_sdcc1_apps_clk.clkr,
+	[GCC_TLMM_AHB_CLK] = &gcc_tlmm_ahb_clk.clkr,
+	[GCC_USB2_MASTER_CLK] = &gcc_usb2_master_clk.clkr,
+	[GCC_USB2_SLEEP_CLK] = &gcc_usb2_sleep_clk.clkr,
+	[GCC_USB2_MOCK_UTMI_CLK] = &gcc_usb2_mock_utmi_clk.clkr,
+	[GCC_USB3_MASTER_CLK] = &gcc_usb3_master_clk.clkr,
+	[GCC_USB3_SLEEP_CLK] = &gcc_usb3_sleep_clk.clkr,
+	[GCC_USB3_MOCK_UTMI_CLK] = &gcc_usb3_mock_utmi_clk.clkr,
+	[GCC_WCSS2G_CLK] = &gcc_wcss2g_clk.clkr,
+	[GCC_WCSS2G_REF_CLK] = &gcc_wcss2g_ref_clk.clkr,
+	[GCC_WCSS2G_RTC_CLK] = &gcc_wcss2g_rtc_clk.clkr,
+	[GCC_WCSS5G_CLK] = &gcc_wcss5g_clk.clkr,
+	[GCC_WCSS5G_REF_CLK] = &gcc_wcss5g_ref_clk.clkr,
+	[GCC_WCSS5G_RTC_CLK] = &gcc_wcss5g_rtc_clk.clkr,
+};
+
+static const struct qcom_reset_map gcc_ipq4019_resets[] = {
+	[WIFI0_CPU_INIT_RESET] = { 0x1f008, 5 },
+	[WIFI0_RADIO_SRIF_RESET] = { 0x1f008, 4 },
+	[WIFI0_RADIO_WARM_RESET] = { 0x1f008, 3 },
+	[WIFI0_RADIO_COLD_RESET] = { 0x1f008, 2 },
+	[WIFI0_CORE_WARM_RESET] = { 0x1f008, 1 },
+	[WIFI0_CORE_COLD_RESET] = { 0x1f008, 0 },
+	[WIFI1_CPU_INIT_RESET] = { 0x20008, 5 },
+	[WIFI1_RADIO_SRIF_RESET] = { 0x20008, 4 },
+	[WIFI1_RADIO_WARM_RESET] = { 0x20008, 3 },
+	[WIFI1_RADIO_COLD_RESET] = { 0x20008, 2 },
+	[WIFI1_CORE_WARM_RESET] = { 0x20008, 1 },
+	[WIFI1_CORE_COLD_RESET] = { 0x20008, 0 },
+	[USB3_UNIPHY_PHY_ARES] = { 0x1e038, 5 },
+	[USB3_HSPHY_POR_ARES] = { 0x1e038, 4 },
+	[USB3_HSPHY_S_ARES] = { 0x1e038, 2 },
+	[USB2_HSPHY_POR_ARES] = { 0x1e01c, 4 },
+	[USB2_HSPHY_S_ARES] = { 0x1e01c, 2 },
+	[PCIE_PHY_AHB_ARES] = { 0x1d010, 11 },
+	[PCIE_AHB_ARES] = { 0x1d010, 10 },
+	[PCIE_PWR_ARES] = { 0x1d010, 9 },
+	[PCIE_PIPE_STICKY_ARES] = { 0x1d010, 8 },
+	[PCIE_AXI_M_STICKY_ARES] = { 0x1d010, 7 },
+	[PCIE_PHY_ARES] = { 0x1d010, 6 },
+	[PCIE_PARF_XPU_ARES] = { 0x1d010, 5 },
+	[PCIE_AXI_S_XPU_ARES] = { 0x1d010, 4 },
+	[PCIE_AXI_M_VMIDMT_ARES] = { 0x1d010, 3 },
+	[PCIE_PIPE_ARES] = { 0x1d010, 2 },
+	[PCIE_AXI_S_ARES] = { 0x1d010, 1 },
+	[PCIE_AXI_M_ARES] = { 0x1d010, 0 },
+	[ESS_RESET] = { 0x12008, 0},
+	[GCC_BLSP1_BCR] = {0x01000, 0},
+	[GCC_BLSP1_QUP1_BCR] = {0x02000, 0},
+	[GCC_BLSP1_UART1_BCR] = {0x02038, 0},
+	[GCC_BLSP1_QUP2_BCR] = {0x03008, 0},
+	[GCC_BLSP1_UART2_BCR] = {0x03028, 0},
+	[GCC_BIMC_BCR] = {0x04000, 0},
+	[GCC_TLMM_BCR] = {0x05000, 0},
+	[GCC_IMEM_BCR] = {0x0E000, 0},
+	[GCC_ESS_BCR] = {0x12008, 0},
+	[GCC_PRNG_BCR] = {0x13000, 0},
+	[GCC_BOOT_ROM_BCR] = {0x13008, 0},
+	[GCC_CRYPTO_BCR] = {0x16000, 0},
+	[GCC_SDCC1_BCR] = {0x18000, 0},
+	[GCC_SEC_CTRL_BCR] = {0x1A000, 0},
+	[GCC_AUDIO_BCR] = {0x1B008, 0},
+	[GCC_QPIC_BCR] = {0x1C000, 0},
+	[GCC_PCIE_BCR] = {0x1D000, 0},
+	[GCC_USB2_BCR] = {0x1E008, 0},
+	[GCC_USB2_PHY_BCR] = {0x1E018, 0},
+	[GCC_USB3_BCR] = {0x1E024, 0},
+	[GCC_USB3_PHY_BCR] = {0x1E034, 0},
+	[GCC_SYSTEM_NOC_BCR] = {0x21000, 0},
+	[GCC_PCNOC_BCR] = {0x2102C, 0},
+	[GCC_DCD_BCR] = {0x21038, 0},
+	[GCC_SNOC_BUS_TIMEOUT0_BCR] = {0x21064, 0},
+	[GCC_SNOC_BUS_TIMEOUT1_BCR] = {0x2106C, 0},
+	[GCC_SNOC_BUS_TIMEOUT2_BCR] = {0x21074, 0},
+	[GCC_SNOC_BUS_TIMEOUT3_BCR] = {0x2107C, 0},
+	[GCC_PCNOC_BUS_TIMEOUT0_BCR] = {0x21084, 0},
+	[GCC_PCNOC_BUS_TIMEOUT1_BCR] = {0x2108C, 0},
+	[GCC_PCNOC_BUS_TIMEOUT2_BCR] = {0x21094, 0},
+	[GCC_PCNOC_BUS_TIMEOUT3_BCR] = {0x2109C, 0},
+	[GCC_PCNOC_BUS_TIMEOUT4_BCR] = {0x210A4, 0},
+	[GCC_PCNOC_BUS_TIMEOUT5_BCR] = {0x210AC, 0},
+	[GCC_PCNOC_BUS_TIMEOUT6_BCR] = {0x210B4, 0},
+	[GCC_PCNOC_BUS_TIMEOUT7_BCR] = {0x210BC, 0},
+	[GCC_PCNOC_BUS_TIMEOUT8_BCR] = {0x210C4, 0},
+	[GCC_PCNOC_BUS_TIMEOUT9_BCR] = {0x210CC, 0},
+	[GCC_TCSR_BCR] = {0x22000, 0},
+	[GCC_MPM_BCR] = {0x24000, 0},
+	[GCC_SPDM_BCR] = {0x25000, 0},
+};
+
+static const struct regmap_config gcc_ipq4019_regmap_config = {
+	.reg_bits	= 32,
+	.reg_stride	= 4,
+	.val_bits	= 32,
+	.max_register	= 0x2dfff,
+	.fast_io	= true,
+};
+
+static const struct qcom_cc_desc gcc_ipq4019_desc = {
+	.config = &gcc_ipq4019_regmap_config,
+	.clks = gcc_ipq4019_clocks,
+	.num_clks = ARRAY_SIZE(gcc_ipq4019_clocks),
+	.resets = gcc_ipq4019_resets,
+	.num_resets = ARRAY_SIZE(gcc_ipq4019_resets),
+};
+
+static const struct of_device_id gcc_ipq4019_match_table[] = {
+	{ .compatible = "qcom,gcc-ipq4019" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, gcc_ipq4019_match_table);
+
+static int gcc_ipq4019_probe(struct platform_device *pdev)
+{
+	return qcom_cc_probe(pdev, &gcc_ipq4019_desc);
+}
+
+static struct platform_driver gcc_ipq4019_driver = {
+	.probe		= gcc_ipq4019_probe,
+	.driver		= {
+		.name	= "qcom,gcc-ipq4019",
+		.owner	= THIS_MODULE,
+		.of_match_table = gcc_ipq4019_match_table,
+	},
+};
+
+static int __init gcc_ipq4019_init(void)
+{
+	return platform_driver_register(&gcc_ipq4019_driver);
+}
+core_initcall(gcc_ipq4019_init);
+
+static void __exit gcc_ipq4019_exit(void)
+{
+	platform_driver_unregister(&gcc_ipq4019_driver);
+}
+module_exit(gcc_ipq4019_exit);
+
+MODULE_ALIAS("platform:gcc-ipq4019");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("QCOM GCC IPQ4019 driver");
diff --git a/drivers/clk/qcom/gcc-ipq806x.c b/drivers/clk/qcom/gcc-ipq806x.c
index dd5402bac620..52a7d3959875 100644
--- a/drivers/clk/qcom/gcc-ipq806x.c
+++ b/drivers/clk/qcom/gcc-ipq806x.c
@@ -890,7 +890,6 @@ static struct clk_branch gsbi1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -906,7 +905,6 @@ static struct clk_branch gsbi2_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -922,7 +920,6 @@ static struct clk_branch gsbi4_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -938,7 +935,6 @@ static struct clk_branch gsbi5_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi5_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -954,7 +950,6 @@ static struct clk_branch gsbi6_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi6_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -970,7 +965,6 @@ static struct clk_branch gsbi7_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi7_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1144,7 +1138,6 @@ static struct clk_branch pmem_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmem_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1308,7 +1301,6 @@ static struct clk_branch sdc1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1324,7 +1316,6 @@ static struct clk_branch sdc3_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1394,7 +1385,6 @@ static struct clk_branch tsif_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "tsif_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1410,7 +1400,6 @@ static struct clk_branch dma_bam_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "dma_bam_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1425,7 +1414,6 @@ static struct clk_branch adm0_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1442,7 +1430,6 @@ static struct clk_branch adm0_pbus_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_pbus_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1457,7 +1444,6 @@ static struct clk_branch pmic_arb0_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb0_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1472,7 +1458,6 @@ static struct clk_branch pmic_arb1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1487,7 +1472,6 @@ static struct clk_branch pmic_ssbi2_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_ssbi2_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1504,7 +1488,6 @@ static struct clk_branch rpm_msg_ram_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "rpm_msg_ram_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1563,7 +1546,6 @@ static struct clk_branch pcie_a_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1577,7 +1559,6 @@ static struct clk_branch pcie_aux_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_aux_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1591,7 +1572,6 @@ static struct clk_branch pcie_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1605,7 +1585,6 @@ static struct clk_branch pcie_phy_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_phy_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1659,7 +1638,6 @@ static struct clk_branch pcie1_a_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie1_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1673,7 +1651,6 @@ static struct clk_branch pcie1_aux_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie1_aux_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1687,7 +1664,6 @@ static struct clk_branch pcie1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1701,7 +1677,6 @@ static struct clk_branch pcie1_phy_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie1_phy_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1755,7 +1730,6 @@ static struct clk_branch pcie2_a_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie2_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1769,7 +1743,6 @@ static struct clk_branch pcie2_aux_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie2_aux_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1783,7 +1756,6 @@ static struct clk_branch pcie2_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1797,7 +1769,6 @@ static struct clk_branch pcie2_phy_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie2_phy_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1887,7 +1858,6 @@ static struct clk_branch sata_a_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1901,7 +1871,6 @@ static struct clk_branch sata_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1915,7 +1884,6 @@ static struct clk_branch sfab_sata_s_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sfab_sata_s_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1929,7 +1897,6 @@ static struct clk_branch sata_phy_cfg_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_phy_cfg_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2139,7 +2106,6 @@ static struct clk_branch usb_hs1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2218,7 +2184,6 @@ static struct clk_branch usb_fs1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_fs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2234,7 +2199,6 @@ static struct clk_branch ebi2_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "ebi2_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2248,7 +2212,6 @@ static struct clk_branch ebi2_aon_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "ebi2_always_on_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
diff --git a/drivers/clk/qcom/gcc-msm8660.c b/drivers/clk/qcom/gcc-msm8660.c
index ad413036f7c7..6dc55864979c 100644
--- a/drivers/clk/qcom/gcc-msm8660.c
+++ b/drivers/clk/qcom/gcc-msm8660.c
@@ -1479,7 +1479,6 @@ static struct clk_branch pmem_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmem_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2027,7 +2026,6 @@ static struct clk_branch gsbi1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2041,7 +2039,6 @@ static struct clk_branch gsbi2_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2055,7 +2052,6 @@ static struct clk_branch gsbi3_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2069,7 +2065,6 @@ static struct clk_branch gsbi4_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2083,7 +2078,6 @@ static struct clk_branch gsbi5_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi5_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2097,7 +2091,6 @@ static struct clk_branch gsbi6_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi6_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2111,7 +2104,6 @@ static struct clk_branch gsbi7_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi7_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2125,7 +2117,6 @@ static struct clk_branch gsbi8_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi8_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2139,7 +2130,6 @@ static struct clk_branch gsbi9_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi9_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2153,7 +2143,6 @@ static struct clk_branch gsbi10_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi10_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2167,7 +2156,6 @@ static struct clk_branch gsbi11_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi11_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2181,7 +2169,6 @@ static struct clk_branch gsbi12_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi12_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2195,7 +2182,6 @@ static struct clk_branch tsif_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "tsif_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2209,7 +2195,6 @@ static struct clk_branch usb_fs1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_fs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2223,7 +2208,6 @@ static struct clk_branch usb_fs2_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_fs2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2237,7 +2221,6 @@ static struct clk_branch usb_hs1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2251,7 +2234,6 @@ static struct clk_branch sdc1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2265,7 +2247,6 @@ static struct clk_branch sdc2_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2279,7 +2260,6 @@ static struct clk_branch sdc3_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2293,7 +2273,6 @@ static struct clk_branch sdc4_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2307,7 +2286,6 @@ static struct clk_branch sdc5_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc5_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2322,7 +2300,6 @@ static struct clk_branch adm0_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2337,7 +2314,6 @@ static struct clk_branch adm0_pbus_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_pbus_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2352,7 +2328,6 @@ static struct clk_branch adm1_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "adm1_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2367,7 +2342,6 @@ static struct clk_branch adm1_pbus_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "adm1_pbus_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2382,7 +2356,6 @@ static struct clk_branch modem_ahb1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "modem_ahb1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2397,7 +2370,6 @@ static struct clk_branch modem_ahb2_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "modem_ahb2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2412,7 +2384,6 @@ static struct clk_branch pmic_arb0_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb0_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2427,7 +2398,6 @@ static struct clk_branch pmic_arb1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2442,7 +2412,6 @@ static struct clk_branch pmic_ssbi2_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_ssbi2_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2459,7 +2428,6 @@ static struct clk_branch rpm_msg_ram_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "rpm_msg_ram_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
diff --git a/drivers/clk/qcom/gcc-msm8916.c b/drivers/clk/qcom/gcc-msm8916.c
index 8cc9b2868b41..9c29080a84d8 100644
--- a/drivers/clk/qcom/gcc-msm8916.c
+++ b/drivers/clk/qcom/gcc-msm8916.c
@@ -2590,6 +2590,23 @@ static struct clk_branch gcc_mss_cfg_ahb_clk = {
 	},
 };
 
+static struct clk_branch gcc_mss_q6_bimc_axi_clk = {
+	.halt_reg = 0x49004,
+	.clkr = {
+		.enable_reg = 0x49004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_mss_q6_bimc_axi_clk",
+			.parent_names = (const char *[]){
+				"bimc_ddr_clk_src",
+			},
+			.num_parents = 1,
+			.flags = CLK_SET_RATE_PARENT,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
 static struct clk_branch gcc_oxili_ahb_clk = {
 	.halt_reg = 0x59028,
 	.clkr = {
@@ -3227,6 +3244,7 @@ static struct clk_regmap *gcc_msm8916_clocks[] = {
 	[GCC_ULTAUDIO_LPAIF_SEC_I2S_CLK] = &gcc_ultaudio_lpaif_sec_i2s_clk.clkr,
 	[GCC_ULTAUDIO_LPAIF_AUX_I2S_CLK] = &gcc_ultaudio_lpaif_aux_i2s_clk.clkr,
 	[GCC_CODEC_DIGCODEC_CLK] = &gcc_codec_digcodec_clk.clkr,
+	[GCC_MSS_Q6_BIMC_AXI_CLK] = &gcc_mss_q6_bimc_axi_clk.clkr,
 };
 
 static struct gdsc *gcc_msm8916_gdscs[] = {
diff --git a/drivers/clk/qcom/gcc-msm8960.c b/drivers/clk/qcom/gcc-msm8960.c
index 983dd7dc89a7..eb551c75fba6 100644
--- a/drivers/clk/qcom/gcc-msm8960.c
+++ b/drivers/clk/qcom/gcc-msm8960.c
@@ -1546,7 +1546,6 @@ static struct clk_branch pmem_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmem_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2143,7 +2142,6 @@ static struct clk_branch usb_hsic_hsio_cal_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hsic_hsio_cal_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2293,7 +2291,6 @@ static struct clk_branch ce1_core_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "ce1_core_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2307,7 +2304,6 @@ static struct clk_branch ce1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "ce1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2323,7 +2319,6 @@ static struct clk_branch dma_bam_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "dma_bam_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2339,7 +2334,6 @@ static struct clk_branch gsbi1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2355,7 +2349,6 @@ static struct clk_branch gsbi2_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2371,7 +2364,6 @@ static struct clk_branch gsbi3_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2387,7 +2379,6 @@ static struct clk_branch gsbi4_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2403,7 +2394,6 @@ static struct clk_branch gsbi5_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi5_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2419,7 +2409,6 @@ static struct clk_branch gsbi6_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi6_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2435,7 +2424,6 @@ static struct clk_branch gsbi7_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi7_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2451,7 +2439,6 @@ static struct clk_branch gsbi8_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi8_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2467,7 +2454,6 @@ static struct clk_branch gsbi9_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi9_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2483,7 +2469,6 @@ static struct clk_branch gsbi10_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi10_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2499,7 +2484,6 @@ static struct clk_branch gsbi11_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi11_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2515,7 +2499,6 @@ static struct clk_branch gsbi12_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gsbi12_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2531,7 +2514,6 @@ static struct clk_branch tsif_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "tsif_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2545,7 +2527,6 @@ static struct clk_branch usb_fs1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_fs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2559,7 +2540,6 @@ static struct clk_branch usb_fs2_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_fs2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2575,7 +2555,6 @@ static struct clk_branch usb_hs1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hs1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2589,7 +2568,6 @@ static struct clk_branch usb_hs3_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hs3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2603,7 +2581,6 @@ static struct clk_branch usb_hs4_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hs4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2617,7 +2594,6 @@ static struct clk_branch usb_hsic_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "usb_hsic_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2633,7 +2609,6 @@ static struct clk_branch sdc1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2649,7 +2624,6 @@ static struct clk_branch sdc2_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc2_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2665,7 +2639,6 @@ static struct clk_branch sdc3_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc3_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2681,7 +2654,6 @@ static struct clk_branch sdc4_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc4_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2697,7 +2669,6 @@ static struct clk_branch sdc5_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sdc5_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2712,7 +2683,6 @@ static struct clk_branch adm0_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2729,7 +2699,6 @@ static struct clk_branch adm0_pbus_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "adm0_pbus_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2753,7 +2722,7 @@ static struct clk_rcg ce3_src = {
 	},
 	.freq_tbl = clk_tbl_ce3,
 	.clkr = {
-		.enable_reg = 0x2c08,
+		.enable_reg = 0x36c0,
 		.enable_mask = BIT(7),
 		.hw.init = &(struct clk_init_data){
 			.name = "ce3_src",
@@ -2769,7 +2738,7 @@ static struct clk_branch ce3_core_clk = {
 	.halt_reg = 0x2fdc,
 	.halt_bit = 5,
 	.clkr = {
-		.enable_reg = 0x36c4,
+		.enable_reg = 0x36cc,
 		.enable_mask = BIT(4),
 		.hw.init = &(struct clk_init_data){
 			.name = "ce3_core_clk",
@@ -2883,7 +2852,6 @@ static struct clk_branch sata_a_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2897,7 +2865,6 @@ static struct clk_branch sata_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2911,7 +2878,6 @@ static struct clk_branch sfab_sata_s_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sfab_sata_s_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2925,7 +2891,6 @@ static struct clk_branch sata_phy_cfg_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "sata_phy_cfg_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2939,7 +2904,6 @@ static struct clk_branch pcie_phy_ref_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_phy_ref_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2953,7 +2917,6 @@ static struct clk_branch pcie_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2967,7 +2930,6 @@ static struct clk_branch pcie_a_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pcie_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2982,7 +2944,6 @@ static struct clk_branch pmic_arb0_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb0_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2997,7 +2958,6 @@ static struct clk_branch pmic_arb1_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_arb1_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -3012,7 +2972,6 @@ static struct clk_branch pmic_ssbi2_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "pmic_ssbi2_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -3029,7 +2988,6 @@ static struct clk_branch rpm_msg_ram_h_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "rpm_msg_ram_h_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
diff --git a/drivers/clk/qcom/gcc-msm8974.c b/drivers/clk/qcom/gcc-msm8974.c
index 335952db309b..00915209e7c5 100644
--- a/drivers/clk/qcom/gcc-msm8974.c
+++ b/drivers/clk/qcom/gcc-msm8974.c
@@ -1965,7 +1965,6 @@ static struct clk_branch gcc_mss_q6_bimc_axi_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_mss_q6_bimc_axi_clk",
-			.flags = CLK_IS_ROOT,
 			.ops = &clk_branch2_ops,
 		},
 	},
diff --git a/drivers/clk/qcom/gcc-msm8996.c b/drivers/clk/qcom/gcc-msm8996.c
index 16d7c323db49..c9b96f318d9c 100644
--- a/drivers/clk/qcom/gcc-msm8996.c
+++ b/drivers/clk/qcom/gcc-msm8996.c
@@ -30,6 +30,7 @@
 #include "clk-rcg.h"
 #include "clk-branch.h"
 #include "reset.h"
+#include "gdsc.h"
 
 #define F(f, s, h, m, n) { (f), (s), (2 * (h) - 1), (m), (n) }
 
@@ -1320,7 +1321,7 @@ static struct clk_branch gcc_mmss_bimc_gfx_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_mmss_bimc_gfx_clk",
-			.flags = CLK_SET_RATE_PARENT | CLK_IS_ROOT,
+			.flags = CLK_SET_RATE_PARENT,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -2314,7 +2315,7 @@ static struct clk_branch gcc_bimc_gfx_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_bimc_gfx_clk",
-			.flags = CLK_SET_RATE_PARENT | CLK_IS_ROOT,
+			.flags = CLK_SET_RATE_PARENT,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -2814,7 +2815,6 @@ static struct clk_branch gcc_ufs_sys_clk_core_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_ufs_sys_clk_core_clk",
 			.ops = &clk_branch2_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2827,7 +2827,6 @@ static struct clk_branch gcc_ufs_tx_symbol_clk_core_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_ufs_tx_symbol_clk_core_clk",
 			.ops = &clk_branch2_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -3059,6 +3058,83 @@ static struct clk_hw *gcc_msm8996_hws[] = {
 	&ufs_ice_core_postdiv_clk_src.hw,
 };
 
+static struct gdsc aggre0_noc_gdsc = {
+	.gdscr = 0x81004,
+	.gds_hw_ctrl = 0x81028,
+	.pd = {
+		.name = "aggre0_noc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc hlos1_vote_aggre0_noc_gdsc = {
+	.gdscr = 0x7d024,
+	.pd = {
+		.name = "hlos1_vote_aggre0_noc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc hlos1_vote_lpass_adsp_gdsc = {
+	.gdscr = 0x7d034,
+	.pd = {
+		.name = "hlos1_vote_lpass_adsp",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc hlos1_vote_lpass_core_gdsc = {
+	.gdscr = 0x7d038,
+	.pd = {
+		.name = "hlos1_vote_lpass_core",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc usb30_gdsc = {
+	.gdscr = 0xf004,
+	.pd = {
+		.name = "usb30",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc pcie0_gdsc = {
+	.gdscr = 0x6b004,
+	.pd = {
+		.name = "pcie0",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc pcie1_gdsc = {
+	.gdscr = 0x6d004,
+	.pd = {
+		.name = "pcie1",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc pcie2_gdsc = {
+	.gdscr = 0x6e004,
+	.pd = {
+		.name = "pcie2",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc ufs_gdsc = {
+	.gdscr = 0x75004,
+	.pd = {
+		.name = "ufs",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
 static struct clk_regmap *gcc_msm8996_clocks[] = {
 	[GPLL0_EARLY] = &gpll0_early.clkr,
 	[GPLL0] = &gpll0.clkr,
@@ -3245,6 +3321,18 @@ static struct clk_regmap *gcc_msm8996_clocks[] = {
 	[GCC_RX1_USB2_CLKREF_CLK] = &gcc_rx1_usb2_clkref_clk.clkr,
 };
 
+static struct gdsc *gcc_msm8996_gdscs[] = {
+	[AGGRE0_NOC_GDSC] = &aggre0_noc_gdsc,
+	[HLOS1_VOTE_AGGRE0_NOC_GDSC] = &hlos1_vote_aggre0_noc_gdsc,
+	[HLOS1_VOTE_LPASS_ADSP_GDSC] = &hlos1_vote_lpass_adsp_gdsc,
+	[HLOS1_VOTE_LPASS_CORE_GDSC] = &hlos1_vote_lpass_core_gdsc,
+	[USB30_GDSC] = &usb30_gdsc,
+	[PCIE0_GDSC] = &pcie0_gdsc,
+	[PCIE1_GDSC] = &pcie1_gdsc,
+	[PCIE2_GDSC] = &pcie2_gdsc,
+	[UFS_GDSC] = &ufs_gdsc,
+};
+
 static const struct qcom_reset_map gcc_msm8996_resets[] = {
 	[GCC_SYSTEM_NOC_BCR] = { 0x4000 },
 	[GCC_CONFIG_NOC_BCR] = { 0x5000 },
@@ -3363,6 +3451,8 @@ static const struct qcom_cc_desc gcc_msm8996_desc = {
 	.num_clks = ARRAY_SIZE(gcc_msm8996_clocks),
 	.resets = gcc_msm8996_resets,
 	.num_resets = ARRAY_SIZE(gcc_msm8996_resets),
+	.gdscs = gcc_msm8996_gdscs,
+	.num_gdscs = ARRAY_SIZE(gcc_msm8996_gdscs),
 };
 
 static const struct of_device_id gcc_msm8996_match_table[] = {
diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c
index da9fad8b642b..f12d7b2bddd7 100644
--- a/drivers/clk/qcom/gdsc.c
+++ b/drivers/clk/qcom/gdsc.c
@@ -16,6 +16,7 @@
 #include <linux/err.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
+#include <linux/ktime.h>
 #include <linux/pm_domain.h>
 #include <linux/regmap.h>
 #include <linux/reset-controller.h>
@@ -42,12 +43,12 @@
 
 #define domain_to_gdsc(domain) container_of(domain, struct gdsc, pd)
 
-static int gdsc_is_enabled(struct gdsc *sc)
+static int gdsc_is_enabled(struct gdsc *sc, unsigned int reg)
 {
 	u32 val;
 	int ret;
 
-	ret = regmap_read(sc->regmap, sc->gdscr, &val);
+	ret = regmap_read(sc->regmap, reg, &val);
 	if (ret)
 		return ret;
 
@@ -58,28 +59,46 @@ static int gdsc_toggle_logic(struct gdsc *sc, bool en)
 {
 	int ret;
 	u32 val = en ? 0 : SW_COLLAPSE_MASK;
-	u32 check = en ? PWR_ON_MASK : 0;
-	unsigned long timeout;
+	ktime_t start;
+	unsigned int status_reg = sc->gdscr;
 
 	ret = regmap_update_bits(sc->regmap, sc->gdscr, SW_COLLAPSE_MASK, val);
 	if (ret)
 		return ret;
 
-	timeout = jiffies + usecs_to_jiffies(TIMEOUT_US);
-	do {
-		ret = regmap_read(sc->regmap, sc->gdscr, &val);
-		if (ret)
-			return ret;
+	/* If disabling votable gdscs, don't poll on status */
+	if ((sc->flags & VOTABLE) && !en) {
+		/*
+		 * Add a short delay here to ensure that an enable
+		 * right after it was disabled does not put it in an
+		 * unknown state
+		 */
+		udelay(TIMEOUT_US);
+		return 0;
+	}
 
-		if ((val & PWR_ON_MASK) == check)
-			return 0;
-	} while (time_before(jiffies, timeout));
+	if (sc->gds_hw_ctrl) {
+		status_reg = sc->gds_hw_ctrl;
+		/*
+		 * The gds hw controller asserts/de-asserts the status bit soon
+		 * after it receives a power on/off request from a master.
+		 * The controller then takes around 8 xo cycles to start its
+		 * internal state machine and update the status bit. During
+		 * this time, the status bit does not reflect the true status
+		 * of the core.
+		 * Add a delay of 1 us between writing to the SW_COLLAPSE bit
+		 * and polling the status bit.
+		 */
+		udelay(1);
+	}
 
-	ret = regmap_read(sc->regmap, sc->gdscr, &val);
-	if (ret)
-		return ret;
+	start = ktime_get();
+	do {
+		if (gdsc_is_enabled(sc, status_reg) == en)
+			return 0;
+	} while (ktime_us_delta(ktime_get(), start) < TIMEOUT_US);
 
-	if ((val & PWR_ON_MASK) == check)
+	if (gdsc_is_enabled(sc, status_reg) == en)
 		return 0;
 
 	return -ETIMEDOUT;
@@ -165,6 +184,7 @@ static int gdsc_init(struct gdsc *sc)
 {
 	u32 mask, val;
 	int on, ret;
+	unsigned int reg;
 
 	/*
 	 * Disable HW trigger: collapse/restore occur based on registers writes.
@@ -185,10 +205,18 @@ static int gdsc_init(struct gdsc *sc)
 			return ret;
 	}
 
-	on = gdsc_is_enabled(sc);
+	reg = sc->gds_hw_ctrl ? sc->gds_hw_ctrl : sc->gdscr;
+	on = gdsc_is_enabled(sc, reg);
 	if (on < 0)
 		return on;
 
+	/*
+	 * Votable GDSCs can be ON due to Vote from other masters.
+	 * If a Votable GDSC is ON, make sure we have a Vote.
+	 */
+	if ((sc->flags & VOTABLE) && on)
+		gdsc_enable(&sc->pd);
+
 	if (on || (sc->pwrsts & PWRSTS_RET))
 		gdsc_force_mem_on(sc);
 	else
@@ -201,11 +229,14 @@ static int gdsc_init(struct gdsc *sc)
 	return 0;
 }
 
-int gdsc_register(struct device *dev, struct gdsc **scs, size_t num,
+int gdsc_register(struct gdsc_desc *desc,
 		  struct reset_controller_dev *rcdev, struct regmap *regmap)
 {
 	int i, ret;
 	struct genpd_onecell_data *data;
+	struct device *dev = desc->dev;
+	struct gdsc **scs = desc->scs;
+	size_t num = desc->num;
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
@@ -228,10 +259,30 @@ int gdsc_register(struct device *dev, struct gdsc **scs, size_t num,
 		data->domains[i] = &scs[i]->pd;
 	}
 
+	/* Add subdomains */
+	for (i = 0; i < num; i++) {
+		if (!scs[i])
+			continue;
+		if (scs[i]->parent)
+			pm_genpd_add_subdomain(scs[i]->parent, &scs[i]->pd);
+	}
+
 	return of_genpd_add_provider_onecell(dev->of_node, data);
 }
 
-void gdsc_unregister(struct device *dev)
+void gdsc_unregister(struct gdsc_desc *desc)
 {
+	int i;
+	struct device *dev = desc->dev;
+	struct gdsc **scs = desc->scs;
+	size_t num = desc->num;
+
+	/* Remove subdomains */
+	for (i = 0; i < num; i++) {
+		if (!scs[i])
+			continue;
+		if (scs[i]->parent)
+			pm_genpd_remove_subdomain(scs[i]->parent, &scs[i]->pd);
+	}
 	of_genpd_del_provider(dev->of_node);
 }
diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h
index 5ded26884f08..3bf497c36bdf 100644
--- a/drivers/clk/qcom/gdsc.h
+++ b/drivers/clk/qcom/gdsc.h
@@ -20,18 +20,12 @@
 struct regmap;
 struct reset_controller_dev;
 
-/* Powerdomain allowable state bitfields */
-#define PWRSTS_OFF		BIT(0)
-#define PWRSTS_RET		BIT(1)
-#define PWRSTS_ON		BIT(2)
-#define PWRSTS_OFF_ON		(PWRSTS_OFF | PWRSTS_ON)
-#define PWRSTS_RET_ON		(PWRSTS_RET | PWRSTS_ON)
-
 /**
  * struct gdsc - Globally Distributed Switch Controller
  * @pd: generic power domain
  * @regmap: regmap for MMIO accesses
  * @gdscr: gsdc control register
+ * @gds_hw_ctrl: gds_hw_ctrl register
  * @cxcs: offsets of branch registers to toggle mem/periph bits in
  * @cxc_count: number of @cxcs
  * @pwrsts: Possible powerdomain power states
@@ -41,28 +35,44 @@ struct reset_controller_dev;
  */
 struct gdsc {
 	struct generic_pm_domain	pd;
+	struct generic_pm_domain	*parent;
 	struct regmap			*regmap;
 	unsigned int			gdscr;
+	unsigned int			gds_hw_ctrl;
 	unsigned int			*cxcs;
 	unsigned int			cxc_count;
 	const u8			pwrsts;
+/* Powerdomain allowable state bitfields */
+#define PWRSTS_OFF		BIT(0)
+#define PWRSTS_RET		BIT(1)
+#define PWRSTS_ON		BIT(2)
+#define PWRSTS_OFF_ON		(PWRSTS_OFF | PWRSTS_ON)
+#define PWRSTS_RET_ON		(PWRSTS_RET | PWRSTS_ON)
+	const u8			flags;
+#define VOTABLE		BIT(0)
 	struct reset_controller_dev	*rcdev;
 	unsigned int			*resets;
 	unsigned int			reset_count;
 };
 
+struct gdsc_desc {
+	struct device *dev;
+	struct gdsc **scs;
+	size_t num;
+};
+
 #ifdef CONFIG_QCOM_GDSC
-int gdsc_register(struct device *, struct gdsc **, size_t n,
-		  struct reset_controller_dev *, struct regmap *);
-void gdsc_unregister(struct device *);
+int gdsc_register(struct gdsc_desc *desc, struct reset_controller_dev *,
+		  struct regmap *);
+void gdsc_unregister(struct gdsc_desc *desc);
 #else
-static inline int gdsc_register(struct device *d, struct gdsc **g, size_t n,
+static inline int gdsc_register(struct gdsc_desc *desc,
 				struct reset_controller_dev *rcdev,
 				struct regmap *r)
 {
 	return -ENOSYS;
 }
 
-static inline void gdsc_unregister(struct device *d) {};
+static inline void gdsc_unregister(struct gdsc_desc *desc) {};
 #endif /* CONFIG_QCOM_GDSC */
 #endif /* __QCOM_GDSC_H__ */
diff --git a/drivers/clk/qcom/mmcc-msm8960.c b/drivers/clk/qcom/mmcc-msm8960.c
index 00e36192a1de..7f21421c87d6 100644
--- a/drivers/clk/qcom/mmcc-msm8960.c
+++ b/drivers/clk/qcom/mmcc-msm8960.c
@@ -1789,7 +1789,6 @@ static struct clk_branch gmem_axi_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gmem_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1805,7 +1804,6 @@ static struct clk_branch ijpeg_axi_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "ijpeg_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1821,7 +1819,6 @@ static struct clk_branch mmss_imem_axi_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "mmss_imem_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1835,7 +1832,6 @@ static struct clk_branch jpegd_axi_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "jpegd_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1851,7 +1847,6 @@ static struct clk_branch vcodec_axi_b_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "vcodec_axi_b_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1867,7 +1862,6 @@ static struct clk_branch vcodec_axi_a_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "vcodec_axi_a_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1883,7 +1877,6 @@ static struct clk_branch vcodec_axi_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "vcodec_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1897,7 +1890,6 @@ static struct clk_branch vfe_axi_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "vfe_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1913,7 +1905,6 @@ static struct clk_branch mdp_axi_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "mdp_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1929,7 +1920,6 @@ static struct clk_branch rot_axi_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "rot_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1945,7 +1935,6 @@ static struct clk_branch vcap_axi_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "vcap_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1961,7 +1950,6 @@ static struct clk_branch vpe_axi_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "vpe_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1977,7 +1965,6 @@ static struct clk_branch gfx3d_axi_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gfx3d_axi_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -1991,7 +1978,6 @@ static struct clk_branch amp_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "amp_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2005,7 +1991,6 @@ static struct clk_branch csi_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "csi_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT
 		},
 	},
 };
@@ -2019,7 +2004,6 @@ static struct clk_branch dsi_m_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "dsi_m_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2035,7 +2019,6 @@ static struct clk_branch dsi_s_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "dsi_s_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2049,7 +2032,6 @@ static struct clk_branch dsi2_m_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "dsi2_m_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT
 		},
 	},
 };
@@ -2065,7 +2047,6 @@ static struct clk_branch dsi2_s_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "dsi2_s_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2425,7 +2406,6 @@ static struct clk_branch gfx2d0_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gfx2d0_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2441,7 +2421,6 @@ static struct clk_branch gfx2d1_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gfx2d1_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2457,7 +2436,6 @@ static struct clk_branch gfx3d_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "gfx3d_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2473,7 +2451,6 @@ static struct clk_branch hdmi_m_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "hdmi_m_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2489,7 +2466,6 @@ static struct clk_branch hdmi_s_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "hdmi_s_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2503,7 +2479,6 @@ static struct clk_branch ijpeg_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "ijpeg_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT
 		},
 	},
 };
@@ -2519,7 +2494,6 @@ static struct clk_branch mmss_imem_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "mmss_imem_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT
 		},
 	},
 };
@@ -2533,7 +2507,6 @@ static struct clk_branch jpegd_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "jpegd_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2547,7 +2520,6 @@ static struct clk_branch mdp_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "mdp_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2561,7 +2533,6 @@ static struct clk_branch rot_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "rot_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT
 		},
 	},
 };
@@ -2577,7 +2548,6 @@ static struct clk_branch smmu_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "smmu_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2591,7 +2561,6 @@ static struct clk_branch tv_enc_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "tv_enc_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2605,7 +2574,6 @@ static struct clk_branch vcap_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "vcap_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2621,7 +2589,6 @@ static struct clk_branch vcodec_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "vcodec_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2635,7 +2602,6 @@ static struct clk_branch vfe_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "vfe_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
@@ -2649,7 +2615,6 @@ static struct clk_branch vpe_ahb_clk = {
 		.hw.init = &(struct clk_init_data){
 			.name = "vpe_ahb_clk",
 			.ops = &clk_branch_ops,
-			.flags = CLK_IS_ROOT,
 		},
 	},
 };
diff --git a/drivers/clk/qcom/mmcc-msm8974.c b/drivers/clk/qcom/mmcc-msm8974.c
index 9d790bcadf25..715e7cd94125 100644
--- a/drivers/clk/qcom/mmcc-msm8974.c
+++ b/drivers/clk/qcom/mmcc-msm8974.c
@@ -2400,6 +2400,7 @@ static struct gdsc oxilicx_gdsc = {
 	.pd = {
 		.name = "oxilicx",
 	},
+	.parent = &oxili_gdsc.pd,
 	.pwrsts = PWRSTS_OFF_ON,
 };
 
@@ -2615,7 +2616,6 @@ MODULE_DEVICE_TABLE(of, mmcc_msm8974_match_table);
 static int mmcc_msm8974_probe(struct platform_device *pdev)
 {
 	struct regmap *regmap;
-	int ret;
 
 	regmap = qcom_cc_map(pdev, &mmcc_msm8974_desc);
 	if (IS_ERR(regmap))
@@ -2624,22 +2624,11 @@ static int mmcc_msm8974_probe(struct platform_device *pdev)
 	clk_pll_configure_sr_hpm_lp(&mmpll1, regmap, &mmpll1_config, true);
 	clk_pll_configure_sr_hpm_lp(&mmpll3, regmap, &mmpll3_config, false);
 
-	ret = qcom_cc_really_probe(pdev, &mmcc_msm8974_desc, regmap);
-	if (ret)
-		return ret;
-
-	return pm_genpd_add_subdomain(&oxili_gdsc.pd, &oxilicx_gdsc.pd);
-}
-
-static int mmcc_msm8974_remove(struct platform_device *pdev)
-{
-	pm_genpd_remove_subdomain(&oxili_gdsc.pd, &oxilicx_gdsc.pd);
-	return 0;
+	return qcom_cc_really_probe(pdev, &mmcc_msm8974_desc, regmap);
 }
 
 static struct platform_driver mmcc_msm8974_driver = {
 	.probe		= mmcc_msm8974_probe,
-	.remove		= mmcc_msm8974_remove,
 	.driver		= {
 		.name	= "mmcc-msm8974",
 		.of_match_table = mmcc_msm8974_match_table,
diff --git a/drivers/clk/qcom/mmcc-msm8996.c b/drivers/clk/qcom/mmcc-msm8996.c
index 064f3eaa39d0..6df7ff36b416 100644
--- a/drivers/clk/qcom/mmcc-msm8996.c
+++ b/drivers/clk/qcom/mmcc-msm8996.c
@@ -32,6 +32,7 @@
 #include "clk-rcg.h"
 #include "clk-branch.h"
 #include "reset.h"
+#include "gdsc.h"
 
 #define F(f, s, h, m, n) { (f), (s), (2 * (h) - 1), (m), (n) }
 
@@ -2917,6 +2918,144 @@ static struct clk_hw *mmcc_msm8996_hws[] = {
 	&gpll0_div.hw,
 };
 
+static struct gdsc mmagic_video_gdsc = {
+	.gdscr = 0x119c,
+	.gds_hw_ctrl = 0x120c,
+	.pd = {
+		.name = "mmagic_video",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc mmagic_mdss_gdsc = {
+	.gdscr = 0x247c,
+	.gds_hw_ctrl = 0x2480,
+	.pd = {
+		.name = "mmagic_mdss",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc mmagic_camss_gdsc = {
+	.gdscr = 0x3c4c,
+	.gds_hw_ctrl = 0x3c50,
+	.pd = {
+		.name = "mmagic_camss",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+	.flags = VOTABLE,
+};
+
+static struct gdsc venus_gdsc = {
+	.gdscr = 0x1024,
+	.cxcs = (unsigned int []){ 0x1028, 0x1034, 0x1038 },
+	.cxc_count = 3,
+	.pd = {
+		.name = "venus",
+	},
+	.parent = &mmagic_video_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc venus_core0_gdsc = {
+	.gdscr = 0x1040,
+	.cxcs = (unsigned int []){ 0x1048 },
+	.cxc_count = 1,
+	.pd = {
+		.name = "venus_core0",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc venus_core1_gdsc = {
+	.gdscr = 0x1044,
+	.cxcs = (unsigned int []){ 0x104c },
+	.cxc_count = 1,
+	.pd = {
+		.name = "venus_core1",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc camss_gdsc = {
+	.gdscr = 0x34a0,
+	.cxcs = (unsigned int []){ 0x36bc, 0x36c4 },
+	.cxc_count = 2,
+	.pd = {
+		.name = "camss",
+	},
+	.parent = &mmagic_camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc vfe0_gdsc = {
+	.gdscr = 0x3664,
+	.cxcs = (unsigned int []){ 0x36a8 },
+	.cxc_count = 1,
+	.pd = {
+		.name = "vfe0",
+	},
+	.parent = &camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc vfe1_gdsc = {
+	.gdscr = 0x3674,
+	.cxcs = (unsigned int []){ 0x36ac },
+	.cxc_count = 1,
+	.pd = {
+		.name = "vfe0",
+	},
+	.parent = &camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc jpeg_gdsc = {
+	.gdscr = 0x35a4,
+	.cxcs = (unsigned int []){ 0x35a8, 0x35b0, 0x35c0, 0x35b8 },
+	.cxc_count = 4,
+	.pd = {
+		.name = "jpeg",
+	},
+	.parent = &camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc cpp_gdsc = {
+	.gdscr = 0x36d4,
+	.cxcs = (unsigned int []){ 0x36b0 },
+	.cxc_count = 1,
+	.pd = {
+		.name = "cpp",
+	},
+	.parent = &camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc fd_gdsc = {
+	.gdscr = 0x3b64,
+	.cxcs = (unsigned int []){ 0x3b68, 0x3b6c },
+	.cxc_count = 2,
+	.pd = {
+		.name = "fd",
+	},
+	.parent = &camss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
+static struct gdsc mdss_gdsc = {
+	.gdscr = 0x2304,
+	.cxcs = (unsigned int []){ 0x2310, 0x231c },
+	.cxc_count = 2,
+	.pd = {
+		.name = "mdss",
+	},
+	.parent = &mmagic_mdss_gdsc.pd,
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
 static struct clk_regmap *mmcc_msm8996_clocks[] = {
 	[MMPLL0_EARLY] = &mmpll0_early.clkr,
 	[MMPLL0_PLL] = &mmpll0.clkr,
@@ -3093,6 +3232,22 @@ static struct clk_regmap *mmcc_msm8996_clocks[] = {
 	[FD_AHB_CLK] = &fd_ahb_clk.clkr,
 };
 
+static struct gdsc *mmcc_msm8996_gdscs[] = {
+	[MMAGIC_VIDEO_GDSC] = &mmagic_video_gdsc,
+	[MMAGIC_MDSS_GDSC] = &mmagic_mdss_gdsc,
+	[MMAGIC_CAMSS_GDSC] = &mmagic_camss_gdsc,
+	[VENUS_GDSC] = &venus_gdsc,
+	[VENUS_CORE0_GDSC] = &venus_core0_gdsc,
+	[VENUS_CORE1_GDSC] = &venus_core1_gdsc,
+	[CAMSS_GDSC] = &camss_gdsc,
+	[VFE0_GDSC] = &vfe0_gdsc,
+	[VFE1_GDSC] = &vfe1_gdsc,
+	[JPEG_GDSC] = &jpeg_gdsc,
+	[CPP_GDSC] = &cpp_gdsc,
+	[FD_GDSC] = &fd_gdsc,
+	[MDSS_GDSC] = &mdss_gdsc,
+};
+
 static const struct qcom_reset_map mmcc_msm8996_resets[] = {
 	[MMAGICAHB_BCR] = { 0x5020 },
 	[MMAGIC_CFG_BCR] = { 0x5050 },
@@ -3170,6 +3325,8 @@ static const struct qcom_cc_desc mmcc_msm8996_desc = {
 	.num_clks = ARRAY_SIZE(mmcc_msm8996_clocks),
 	.resets = mmcc_msm8996_resets,
 	.num_resets = ARRAY_SIZE(mmcc_msm8996_resets),
+	.gdscs = mmcc_msm8996_gdscs,
+	.num_gdscs = ARRAY_SIZE(mmcc_msm8996_gdscs),
 };
 
 static const struct of_device_id mmcc_msm8996_match_table[] = {
diff --git a/drivers/clk/shmobile/Makefile b/drivers/clk/renesas/Makefile
index 7e2579b30326..7e2579b30326 100644
--- a/drivers/clk/shmobile/Makefile
+++ b/drivers/clk/renesas/Makefile
diff --git a/drivers/clk/shmobile/clk-div6.c b/drivers/clk/renesas/clk-div6.c
index 999994769450..0627860233cb 100644
--- a/drivers/clk/shmobile/clk-div6.c
+++ b/drivers/clk/renesas/clk-div6.c
@@ -82,9 +82,8 @@ static unsigned long cpg_div6_clock_recalc_rate(struct clk_hw *hw,
 						unsigned long parent_rate)
 {
 	struct div6_clock *clock = to_div6_clock(hw);
-	unsigned int div = (clk_readl(clock->reg) & CPG_DIV6_DIV_MASK) + 1;
 
-	return parent_rate / div;
+	return parent_rate / clock->div;
 }
 
 static unsigned int cpg_div6_clock_calc_div(unsigned long rate,
diff --git a/drivers/clk/shmobile/clk-div6.h b/drivers/clk/renesas/clk-div6.h
index 9a85a95188da..567b31d2bfa5 100644
--- a/drivers/clk/shmobile/clk-div6.h
+++ b/drivers/clk/renesas/clk-div6.h
@@ -1,5 +1,5 @@
-#ifndef __SHMOBILE_CLK_DIV6_H__
-#define __SHMOBILE_CLK_DIV6_H__
+#ifndef __RENESAS_CLK_DIV6_H__
+#define __RENESAS_CLK_DIV6_H__
 
 struct clk *cpg_div6_register(const char *name, unsigned int num_parents,
 			      const char **parent_names, void __iomem *reg);
diff --git a/drivers/clk/shmobile/clk-emev2.c b/drivers/clk/renesas/clk-emev2.c
index a91825471c79..a91825471c79 100644
--- a/drivers/clk/shmobile/clk-emev2.c
+++ b/drivers/clk/renesas/clk-emev2.c
diff --git a/drivers/clk/shmobile/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c
index 3b09716ebda2..3d44e183aedd 100644
--- a/drivers/clk/shmobile/clk-mstp.c
+++ b/drivers/clk/renesas/clk-mstp.c
@@ -14,7 +14,7 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/of.h>
diff --git a/drivers/clk/shmobile/clk-r8a73a4.c b/drivers/clk/renesas/clk-r8a73a4.c
index 9326204bed9d..28d204bb659e 100644
--- a/drivers/clk/shmobile/clk-r8a73a4.c
+++ b/drivers/clk/renesas/clk-r8a73a4.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
diff --git a/drivers/clk/shmobile/clk-r8a7740.c b/drivers/clk/renesas/clk-r8a7740.c
index 1e6b1da58065..2f7ce6696b6c 100644
--- a/drivers/clk/shmobile/clk-r8a7740.c
+++ b/drivers/clk/renesas/clk-r8a7740.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
diff --git a/drivers/clk/shmobile/clk-r8a7778.c b/drivers/clk/renesas/clk-r8a7778.c
index b1741551fff2..40e3a501a50e 100644
--- a/drivers/clk/shmobile/clk-r8a7778.c
+++ b/drivers/clk/renesas/clk-r8a7778.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
 
diff --git a/drivers/clk/shmobile/clk-r8a7779.c b/drivers/clk/renesas/clk-r8a7779.c
index 92275c5f2c60..cf2a37df03b1 100644
--- a/drivers/clk/shmobile/clk-r8a7779.c
+++ b/drivers/clk/renesas/clk-r8a7779.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
diff --git a/drivers/clk/shmobile/clk-rcar-gen2.c b/drivers/clk/renesas/clk-rcar-gen2.c
index 841977240305..00e6aba4b9c0 100644
--- a/drivers/clk/shmobile/clk-rcar-gen2.c
+++ b/drivers/clk/renesas/clk-rcar-gen2.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/math64.h>
diff --git a/drivers/clk/shmobile/clk-rz.c b/drivers/clk/renesas/clk-rz.c
index 9766e3cb595f..f6312c62f16b 100644
--- a/drivers/clk/shmobile/clk-rz.c
+++ b/drivers/clk/renesas/clk-rz.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
diff --git a/drivers/clk/shmobile/clk-sh73a0.c b/drivers/clk/renesas/clk-sh73a0.c
index 8966f8bbfd72..eea38f6ea77e 100644
--- a/drivers/clk/shmobile/clk-sh73a0.c
+++ b/drivers/clk/renesas/clk-sh73a0.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clk/shmobile.h>
+#include <linux/clk/renesas.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
diff --git a/drivers/clk/shmobile/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
index 13e994772dfd..b2198aef5ed4 100644
--- a/drivers/clk/shmobile/r8a7795-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c
@@ -20,6 +20,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
+#include <linux/slab.h>
 
 #include <dt-bindings/clock/r8a7795-cpg-mssr.h>
 
@@ -61,6 +62,7 @@ enum r8a7795_clk_types {
 	CLK_TYPE_GEN3_PLL2,
 	CLK_TYPE_GEN3_PLL3,
 	CLK_TYPE_GEN3_PLL4,
+	CLK_TYPE_GEN3_SD,
 };
 
 static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
@@ -99,11 +101,18 @@ static const struct cpg_core_clk r8a7795_core_clks[] __initconst = {
 	DEF_FIXED("s3d1",       R8A7795_CLK_S3D1,  CLK_S3,         1, 1),
 	DEF_FIXED("s3d2",       R8A7795_CLK_S3D2,  CLK_S3,         2, 1),
 	DEF_FIXED("s3d4",       R8A7795_CLK_S3D4,  CLK_S3,         4, 1),
+
+	DEF_SD("sd0",           R8A7795_CLK_SD0,   CLK_PLL1_DIV2, 0x0074),
+	DEF_SD("sd1",           R8A7795_CLK_SD1,   CLK_PLL1_DIV2, 0x0078),
+	DEF_SD("sd2",           R8A7795_CLK_SD2,   CLK_PLL1_DIV2, 0x0268),
+	DEF_SD("sd3",           R8A7795_CLK_SD3,   CLK_PLL1_DIV2, 0x026c),
+
 	DEF_FIXED("cl",         R8A7795_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
 	DEF_FIXED("cp",         R8A7795_CLK_CP,    CLK_EXTAL,      2, 1),
 
 	DEF_DIV6P1("mso",       R8A7795_CLK_MSO,   CLK_PLL1_DIV4, 0x014),
 	DEF_DIV6P1("hdmi",      R8A7795_CLK_HDMI,  CLK_PLL1_DIV2, 0x250),
+	DEF_DIV6P1("canfd",     R8A7795_CLK_CANFD, CLK_PLL1_DIV4, 0x244),
 };
 
 static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
@@ -120,8 +129,17 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("sys-dmac1",		 218,	R8A7795_CLK_S3D1),
 	DEF_MOD("sys-dmac0",		 219,	R8A7795_CLK_S3D1),
 	DEF_MOD("scif2",		 310,	R8A7795_CLK_S3D4),
+	DEF_MOD("sdif3",		 311,	R8A7795_CLK_SD3),
+	DEF_MOD("sdif2",		 312,	R8A7795_CLK_SD2),
+	DEF_MOD("sdif1",		 313,	R8A7795_CLK_SD1),
+	DEF_MOD("sdif0",		 314,	R8A7795_CLK_SD0),
 	DEF_MOD("pcie1",		 318,	R8A7795_CLK_S3D1),
 	DEF_MOD("pcie0",		 319,	R8A7795_CLK_S3D1),
+	DEF_MOD("usb3-if1",		 327,	R8A7795_CLK_S3D1),
+	DEF_MOD("usb3-if0",		 328,	R8A7795_CLK_S3D1),
+	DEF_MOD("usb-dmac0",		 330,	R8A7795_CLK_S3D1),
+	DEF_MOD("usb-dmac1",		 331,	R8A7795_CLK_S3D1),
+	DEF_MOD("intc-ex",		 407,	R8A7795_CLK_CP),
 	DEF_MOD("intc-ap",		 408,	R8A7795_CLK_S3D1),
 	DEF_MOD("audmac0",		 502,	R8A7795_CLK_S3D4),
 	DEF_MOD("audmac1",		 501,	R8A7795_CLK_S3D4),
@@ -130,6 +148,21 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("hscif2",		 518,	R8A7795_CLK_S3D1),
 	DEF_MOD("hscif1",		 519,	R8A7795_CLK_S3D1),
 	DEF_MOD("hscif0",		 520,	R8A7795_CLK_S3D1),
+	DEF_MOD("fcpvd3",		 600,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvd2",		 601,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvd1",		 602,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvd0",		 603,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvb1",		 606,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvb0",		 607,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvi2",		 609,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvi1",		 610,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpvi0",		 611,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpf2",		 613,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpf1",		 614,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpf0",		 615,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpci1",		 616,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpci0",		 617,	R8A7795_CLK_S2D1),
+	DEF_MOD("fcpcs",		 619,	R8A7795_CLK_S2D1),
 	DEF_MOD("vspd3",		 620,	R8A7795_CLK_S2D1),
 	DEF_MOD("vspd2",		 621,	R8A7795_CLK_S2D1),
 	DEF_MOD("vspd1",		 622,	R8A7795_CLK_S2D1),
@@ -147,6 +180,7 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("du2",			 722,	R8A7795_CLK_S2D1),
 	DEF_MOD("du1",			 723,	R8A7795_CLK_S2D1),
 	DEF_MOD("du0",			 724,	R8A7795_CLK_S2D1),
+	DEF_MOD("lvds",			 727,	R8A7795_CLK_S2D1),
 	DEF_MOD("hdmi1",		 728,	R8A7795_CLK_HDMI),
 	DEF_MOD("hdmi0",		 729,	R8A7795_CLK_HDMI),
 	DEF_MOD("etheravb",		 812,	R8A7795_CLK_S3D2),
@@ -159,6 +193,9 @@ static const struct mssr_mod_clk r8a7795_mod_clks[] __initconst = {
 	DEF_MOD("gpio2",		 910,	R8A7795_CLK_CP),
 	DEF_MOD("gpio1",		 911,	R8A7795_CLK_CP),
 	DEF_MOD("gpio0",		 912,	R8A7795_CLK_CP),
+	DEF_MOD("can-fd",		 914,	R8A7795_CLK_S3D2),
+	DEF_MOD("can-if1",		 915,	R8A7795_CLK_S3D4),
+	DEF_MOD("can-if0",		 916,	R8A7795_CLK_S3D4),
 	DEF_MOD("i2c6",			 918,	R8A7795_CLK_S3D2),
 	DEF_MOD("i2c5",			 919,	R8A7795_CLK_S3D2),
 	DEF_MOD("i2c4",			 927,	R8A7795_CLK_S3D2),
@@ -198,6 +235,221 @@ static const unsigned int r8a7795_crit_mod_clks[] __initconst = {
 	MOD_CLK_ID(408),	/* INTC-AP (GIC) */
 };
 
+/* -----------------------------------------------------------------------------
+ * SDn Clock
+ *
+ */
+#define CPG_SD_STP_HCK		BIT(9)
+#define CPG_SD_STP_CK		BIT(8)
+
+#define CPG_SD_STP_MASK		(CPG_SD_STP_HCK | CPG_SD_STP_CK)
+#define CPG_SD_FC_MASK		(0x7 << 2 | 0x3 << 0)
+
+#define CPG_SD_DIV_TABLE_DATA(stp_hck, stp_ck, sd_srcfc, sd_fc, sd_div) \
+{ \
+	.val = ((stp_hck) ? CPG_SD_STP_HCK : 0) | \
+	       ((stp_ck) ? CPG_SD_STP_CK : 0) | \
+	       ((sd_srcfc) << 2) | \
+	       ((sd_fc) << 0), \
+	.div = (sd_div), \
+}
+
+struct sd_div_table {
+	u32 val;
+	unsigned int div;
+};
+
+struct sd_clock {
+	struct clk_hw hw;
+	void __iomem *reg;
+	const struct sd_div_table *div_table;
+	unsigned int div_num;
+	unsigned int div_min;
+	unsigned int div_max;
+};
+
+/* SDn divider
+ *                     sd_srcfc   sd_fc   div
+ * stp_hck   stp_ck    (div)      (div)     = sd_srcfc x sd_fc
+ *-------------------------------------------------------------------
+ *  0         0         0 (1)      1 (4)      4
+ *  0         0         1 (2)      1 (4)      8
+ *  1         0         2 (4)      1 (4)     16
+ *  1         0         3 (8)      1 (4)     32
+ *  1         0         4 (16)     1 (4)     64
+ *  0         0         0 (1)      0 (2)      2
+ *  0         0         1 (2)      0 (2)      4
+ *  1         0         2 (4)      0 (2)      8
+ *  1         0         3 (8)      0 (2)     16
+ *  1         0         4 (16)     0 (2)     32
+ */
+static const struct sd_div_table cpg_sd_div_table[] = {
+/*	CPG_SD_DIV_TABLE_DATA(stp_hck,  stp_ck,   sd_srcfc,   sd_fc,  sd_div) */
+	CPG_SD_DIV_TABLE_DATA(0,        0,        0,          1,        4),
+	CPG_SD_DIV_TABLE_DATA(0,        0,        1,          1,        8),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        2,          1,       16),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        3,          1,       32),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        4,          1,       64),
+	CPG_SD_DIV_TABLE_DATA(0,        0,        0,          0,        2),
+	CPG_SD_DIV_TABLE_DATA(0,        0,        1,          0,        4),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        2,          0,        8),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        3,          0,       16),
+	CPG_SD_DIV_TABLE_DATA(1,        0,        4,          0,       32),
+};
+
+#define to_sd_clock(_hw) container_of(_hw, struct sd_clock, hw)
+
+static int cpg_sd_clock_enable(struct clk_hw *hw)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+	u32 val, sd_fc;
+	unsigned int i;
+
+	val = clk_readl(clock->reg);
+
+	sd_fc = val & CPG_SD_FC_MASK;
+	for (i = 0; i < clock->div_num; i++)
+		if (sd_fc == (clock->div_table[i].val & CPG_SD_FC_MASK))
+			break;
+
+	if (i >= clock->div_num)
+		return -EINVAL;
+
+	val &= ~(CPG_SD_STP_MASK);
+	val |= clock->div_table[i].val & CPG_SD_STP_MASK;
+
+	clk_writel(val, clock->reg);
+
+	return 0;
+}
+
+static void cpg_sd_clock_disable(struct clk_hw *hw)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+
+	clk_writel(clk_readl(clock->reg) | CPG_SD_STP_MASK, clock->reg);
+}
+
+static int cpg_sd_clock_is_enabled(struct clk_hw *hw)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+
+	return !(clk_readl(clock->reg) & CPG_SD_STP_MASK);
+}
+
+static unsigned long cpg_sd_clock_recalc_rate(struct clk_hw *hw,
+						unsigned long parent_rate)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+	unsigned long rate = parent_rate;
+	u32 val, sd_fc;
+	unsigned int i;
+
+	val = clk_readl(clock->reg);
+
+	sd_fc = val & CPG_SD_FC_MASK;
+	for (i = 0; i < clock->div_num; i++)
+		if (sd_fc == (clock->div_table[i].val & CPG_SD_FC_MASK))
+			break;
+
+	if (i >= clock->div_num)
+		return -EINVAL;
+
+	return DIV_ROUND_CLOSEST(rate, clock->div_table[i].div);
+}
+
+static unsigned int cpg_sd_clock_calc_div(struct sd_clock *clock,
+					  unsigned long rate,
+					  unsigned long parent_rate)
+{
+	unsigned int div;
+
+	if (!rate)
+		rate = 1;
+
+	div = DIV_ROUND_CLOSEST(parent_rate, rate);
+
+	return clamp_t(unsigned int, div, clock->div_min, clock->div_max);
+}
+
+static long cpg_sd_clock_round_rate(struct clk_hw *hw, unsigned long rate,
+				      unsigned long *parent_rate)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+	unsigned int div = cpg_sd_clock_calc_div(clock, rate, *parent_rate);
+
+	return DIV_ROUND_CLOSEST(*parent_rate, div);
+}
+
+static int cpg_sd_clock_set_rate(struct clk_hw *hw, unsigned long rate,
+				   unsigned long parent_rate)
+{
+	struct sd_clock *clock = to_sd_clock(hw);
+	unsigned int div = cpg_sd_clock_calc_div(clock, rate, parent_rate);
+	u32 val;
+	unsigned int i;
+
+	for (i = 0; i < clock->div_num; i++)
+		if (div == clock->div_table[i].div)
+			break;
+
+	if (i >= clock->div_num)
+		return -EINVAL;
+
+	val = clk_readl(clock->reg);
+	val &= ~(CPG_SD_STP_MASK | CPG_SD_FC_MASK);
+	val |= clock->div_table[i].val & (CPG_SD_STP_MASK | CPG_SD_FC_MASK);
+	clk_writel(val, clock->reg);
+
+	return 0;
+}
+
+static const struct clk_ops cpg_sd_clock_ops = {
+	.enable = cpg_sd_clock_enable,
+	.disable = cpg_sd_clock_disable,
+	.is_enabled = cpg_sd_clock_is_enabled,
+	.recalc_rate = cpg_sd_clock_recalc_rate,
+	.round_rate = cpg_sd_clock_round_rate,
+	.set_rate = cpg_sd_clock_set_rate,
+};
+
+static struct clk * __init cpg_sd_clk_register(const struct cpg_core_clk *core,
+					       void __iomem *base,
+					       const char *parent_name)
+{
+	struct clk_init_data init;
+	struct sd_clock *clock;
+	struct clk *clk;
+	unsigned int i;
+
+	clock = kzalloc(sizeof(*clock), GFP_KERNEL);
+	if (!clock)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = core->name;
+	init.ops = &cpg_sd_clock_ops;
+	init.flags = CLK_IS_BASIC | CLK_SET_RATE_PARENT;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	clock->reg = base + core->offset;
+	clock->hw.init = &init;
+	clock->div_table = cpg_sd_div_table;
+	clock->div_num = ARRAY_SIZE(cpg_sd_div_table);
+
+	clock->div_max = clock->div_table[0].div;
+	clock->div_min = clock->div_max;
+	for (i = 1; i < clock->div_num; i++) {
+		clock->div_max = max(clock->div_max, clock->div_table[i].div);
+		clock->div_min = min(clock->div_min, clock->div_table[i].div);
+	}
+
+	clk = clk_register(NULL, &clock->hw);
+	if (IS_ERR(clk))
+		kfree(clock);
+
+	return clk;
+}
 
 #define CPG_PLL0CR	0x00d8
 #define CPG_PLL2CR	0x002c
@@ -323,6 +575,9 @@ struct clk * __init r8a7795_cpg_clk_register(struct device *dev,
 		mult = (((value >> 24) & 0x7f) + 1) * 2;
 		break;
 
+	case CLK_TYPE_GEN3_SD:
+		return cpg_sd_clk_register(core, base, __clk_get_name(parent));
+
 	default:
 		return ERR_PTR(-EINVAL);
 	}
diff --git a/drivers/clk/shmobile/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c
index 9a4d888164bb..58e24b326a48 100644
--- a/drivers/clk/shmobile/renesas-cpg-mssr.c
+++ b/drivers/clk/renesas/renesas-cpg-mssr.c
@@ -348,6 +348,7 @@ static void __init cpg_mssr_register_mod_clk(const struct mssr_mod_clk *mod,
 #else
 			dev_dbg(dev, "Ignoring MSTP %s to prevent disabling\n",
 				mod->name);
+			kfree(clock);
 			return;
 #endif
 		}
@@ -568,7 +569,11 @@ static int __init cpg_mssr_probe(struct platform_device *pdev)
 	if (error)
 		return error;
 
-	devm_add_action(dev, cpg_mssr_del_clk_provider, np);
+	error = devm_add_action_or_reset(dev,
+					 cpg_mssr_del_clk_provider,
+					 np);
+	if (error)
+		return error;
 
 	error = cpg_mssr_add_clk_domain(dev, info->core_pm_clks,
 					info->num_core_pm_clks);
diff --git a/drivers/clk/shmobile/renesas-cpg-mssr.h b/drivers/clk/renesas/renesas-cpg-mssr.h
index e09f03cbf086..952b6957233b 100644
--- a/drivers/clk/shmobile/renesas-cpg-mssr.h
+++ b/drivers/clk/renesas/renesas-cpg-mssr.h
@@ -53,6 +53,8 @@ enum clk_types {
 	DEF_BASE(_name, _id, CLK_TYPE_FF, _parent, .div = _div, .mult = _mult)
 #define DEF_DIV6P1(_name, _id, _parent, _offset)	\
 	DEF_BASE(_name, _id, CLK_TYPE_DIV6P1, _parent, .offset = _offset)
+#define DEF_SD(_name, _id, _parent, _offset)	\
+	DEF_BASE(_name, _id, CLK_TYPE_GEN3_SD, _parent, .offset = _offset)
 
 
     /*
diff --git a/drivers/clk/rockchip/clk-cpu.c b/drivers/clk/rockchip/clk-cpu.c
index d07374f48caf..4e73ed5cab58 100644
--- a/drivers/clk/rockchip/clk-cpu.c
+++ b/drivers/clk/rockchip/clk-cpu.c
@@ -116,7 +116,7 @@ static void rockchip_cpuclk_set_dividers(struct rockchip_cpuclk *cpuclk,
 
 		pr_debug("%s: setting reg 0x%x to 0x%x\n",
 			 __func__, clksel->reg, clksel->val);
-		writel(clksel->val , cpuclk->reg_base + clksel->reg);
+		writel(clksel->val, cpuclk->reg_base + clksel->reg);
 	}
 }
 
@@ -290,14 +290,14 @@ struct clk *rockchip_clk_register_cpuclk(const char *name,
 		pr_err("%s: could not lookup parent clock %s\n",
 		       __func__, parent_names[0]);
 		ret = -EINVAL;
-		goto free_cpuclk;
+		goto free_alt_parent;
 	}
 
 	ret = clk_notifier_register(clk, &cpuclk->clk_nb);
 	if (ret) {
 		pr_err("%s: failed to register clock notifier for %s\n",
 				__func__, name);
-		goto free_cpuclk;
+		goto free_alt_parent;
 	}
 
 	if (nrates > 0) {
@@ -326,6 +326,8 @@ free_rate_table:
 	kfree(cpuclk->rate_table);
 unregister_notifier:
 	clk_notifier_unregister(clk, &cpuclk->clk_nb);
+free_alt_parent:
+	clk_disable_unprepare(cpuclk->alt_parent);
 free_cpuclk:
 	kfree(cpuclk);
 	return ERR_PTR(ret);
diff --git a/drivers/clk/rockchip/clk-inverter.c b/drivers/clk/rockchip/clk-inverter.c
index 7cbf43beb3c6..dcb6e37f3da1 100644
--- a/drivers/clk/rockchip/clk-inverter.c
+++ b/drivers/clk/rockchip/clk-inverter.c
@@ -90,7 +90,7 @@ struct clk *rockchip_clk_register_inverter(const char *name,
 
 	inv_clock = kmalloc(sizeof(*inv_clock), GFP_KERNEL);
 	if (!inv_clock)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
 	init.num_parents = num_parents;
@@ -106,11 +106,7 @@ struct clk *rockchip_clk_register_inverter(const char *name,
 
 	clk = clk_register(NULL, &inv_clock->hw);
 	if (IS_ERR(clk))
-		goto err_free;
+		kfree(inv_clock);
 
 	return clk;
-
-err_free:
-	kfree(inv_clock);
-	return NULL;
 }
diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c
index 2685644826a0..e0dc7e83403a 100644
--- a/drivers/clk/rockchip/clk-mmc-phase.c
+++ b/drivers/clk/rockchip/clk-mmc-phase.c
@@ -150,7 +150,7 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 
 	mmc_clock = kmalloc(sizeof(*mmc_clock), GFP_KERNEL);
 	if (!mmc_clock)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
 	init.num_parents = num_parents;
@@ -172,11 +172,7 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 
 	clk = clk_register(NULL, &mmc_clock->hw);
 	if (IS_ERR(clk))
-		goto err_free;
+		kfree(mmc_clock);
 
 	return clk;
-
-err_free:
-	kfree(mmc_clock);
-	return NULL;
 }
diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c
index b7e66c9dd9f2..5de797e34d54 100644
--- a/drivers/clk/rockchip/clk-pll.c
+++ b/drivers/clk/rockchip/clk-pll.c
@@ -94,6 +94,11 @@ static int rockchip_pll_wait_lock(struct rockchip_clk_pll *pll)
 	unsigned int val;
 	int delay = 24000000, ret;
 
+	if (IS_ERR(grf)) {
+		pr_err("%s: grf regmap not available\n", __func__);
+		return PTR_ERR(grf);
+	}
+
 	while (delay > 0) {
 		ret = regmap_read(grf, pll->lock_offset, &val);
 		if (ret) {
diff --git a/drivers/clk/rockchip/clk-rk3036.c b/drivers/clk/rockchip/clk-rk3036.c
index 53e9c39f5103..7cdb2d61f3e0 100644
--- a/drivers/clk/rockchip/clk-rk3036.c
+++ b/drivers/clk/rockchip/clk-rk3036.c
@@ -177,6 +177,8 @@ static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = {
 	GATE(0, "gpll_armclk", "gpll", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(0), 6, GFLAGS),
 
+	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
 	/*
 	 * Clock-Architecture Diagram 2
 	 */
@@ -187,6 +189,7 @@ static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = {
 			RK2928_CLKGATE_CON(0), 8, GFLAGS),
 	COMPOSITE_NOGATE(0, "ddrphy2x", mux_ddrphy_p, CLK_IGNORE_UNUSED,
 			RK2928_CLKSEL_CON(26), 8, 1, MFLAGS, 0, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO),
+	FACTOR(0, "ddrphy", "ddrphy2x", 0, 1, 2),
 
 	COMPOSITE_NOMUX(0, "pclk_dbg", "armclk", CLK_IGNORE_UNUSED,
 			RK2928_CLKSEL_CON(1), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY,
@@ -263,6 +266,8 @@ static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = {
 	COMPOSITE(0, "aclk_vcodec", mux_pll_src_3plls_p, 0,
 			RK2928_CLKSEL_CON(32), 14, 2, MFLAGS, 8, 5, DFLAGS,
 			RK2928_CLKGATE_CON(3), 11, GFLAGS),
+	FACTOR_GATE(HCLK_VCODEC, "hclk_vcodec", "aclk_vcodec", 0, 1, 4,
+			RK2928_CLKGATE_CON(3), 12, GFLAGS),
 
 	COMPOSITE(0, "aclk_hvec", mux_pll_src_3plls_p, 0,
 			RK2928_CLKSEL_CON(20), 0, 2, MFLAGS, 2, 5, DFLAGS,
@@ -351,6 +356,7 @@ static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = {
 	COMPOSITE_NOMUX(SCLK_MAC, "mac_clk", "mac_clk_ref", 0,
 			RK2928_CLKSEL_CON(21), 4, 5, DFLAGS,
 			RK2928_CLKGATE_CON(2), 6, GFLAGS),
+	FACTOR(0, "sclk_macref_out", "hclk_peri_src", 0, 1, 2),
 
 	MUX(SCLK_HDMI, "dclk_hdmi", mux_dclk_p, 0,
 			RK2928_CLKSEL_CON(31), 0, 1, MFLAGS),
@@ -376,11 +382,9 @@ static struct rockchip_clk_branch rk3036_clk_branches[] __initdata = {
 	GATE(ACLK_VIO, "aclk_vio", "aclk_disp1_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(6), 13, GFLAGS),
 	GATE(ACLK_LCDC, "aclk_lcdc", "aclk_disp1_pre", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS),
 
-	GATE(HCLK_VIO_BUS, "hclk_vio_bus", "hclk_disp_pre", 0, RK2928_CLKGATE_CON(6), 12, GFLAGS),
+	GATE(HCLK_VIO_BUS, "hclk_vio_bus", "hclk_disp_pre", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(6), 12, GFLAGS),
 	GATE(HCLK_LCDC, "hclk_lcdc", "hclk_disp_pre", 0, RK2928_CLKGATE_CON(9), 5, GFLAGS),
 
-	/* hclk_video gates */
-	GATE(HCLK_VCODEC, "hclk_vcodec", "hclk_disp_pre", 0, RK2928_CLKGATE_CON(3), 12, GFLAGS),
 
 	/* xin24m gates */
 	GATE(SCLK_PVTM_CORE, "sclk_pvtm_core", "xin24m", 0, RK2928_CLKGATE_CON(10), 0, GFLAGS),
@@ -444,34 +448,11 @@ static void __init rk3036_clk_init(struct device_node *np)
 
 	rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 
-	/* xin12m is created by an cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "xin12m", "xin24m", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	clk = clk_register_fixed_factor(NULL, "usb480m", "xin24m", 0, 20, 1);
 	if (IS_ERR(clk))
 		pr_warn("%s: could not register clock usb480m: %ld\n",
 			__func__, PTR_ERR(clk));
 
-	clk = clk_register_fixed_factor(NULL, "ddrphy", "ddrphy2x", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock ddrphy: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "hclk_vcodec_pre",
-					"aclk_vcodec", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock hclk_vcodec_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "sclk_macref_out",
-					"hclk_peri_src", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock sclk_macref_out: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	rockchip_clk_register_plls(rk3036_pll_clks,
 				   ARRAY_SIZE(rk3036_pll_clks),
 				   RK3036_GRF_SOC_STATUS0);
diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c
index 7f7444cbf6fc..40bab3901491 100644
--- a/drivers/clk/rockchip/clk-rk3188.c
+++ b/drivers/clk/rockchip/clk-rk3188.c
@@ -339,13 +339,15 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = {
 	INVERTER(0, "pclk_cif0", "pclkin_cif0",
 			RK2928_CLKSEL_CON(30), 8, IFLAGS),
 
+	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
 	/*
 	 * the 480m are generated inside the usb block from these clocks,
 	 * but they are also a source for the hsicphy clock.
 	 */
-	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "usb480m", CLK_IGNORE_UNUSED,
+	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(1), 5, GFLAGS),
-	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "usb480m", CLK_IGNORE_UNUSED,
+	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin24m", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(1), 6, GFLAGS),
 
 	COMPOSITE(0, "mac_src", mux_mac_p, 0,
@@ -605,7 +607,7 @@ static struct rockchip_clk_branch rk3066a_clk_branches[] __initdata = {
 	GATE(SCLK_TIMER2, "timer2", "xin24m", 0,
 			RK2928_CLKGATE_CON(3), 2, GFLAGS),
 
-	COMPOSITE_NOMUX(0, "sclk_tsadc", "xin24m", 0,
+	COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin24m", 0,
 			RK2928_CLKSEL_CON(34), 0, 16, DFLAGS,
 			RK2928_CLKGATE_CON(2), 15, GFLAGS),
 
@@ -662,11 +664,11 @@ static struct clk_div_table div_rk3188_aclk_core_t[] = {
 	{ /* sentinel */ },
 };
 
-PNAME(mux_hsicphy_p)		= { "sclk_otgphy0", "sclk_otgphy1",
+PNAME(mux_hsicphy_p)		= { "sclk_otgphy0_480m", "sclk_otgphy1_480m",
 				    "gpll", "cpll" };
 
 static struct rockchip_clk_branch rk3188_i2s0_fracmux __initdata =
-	MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, 0,
+	MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, CLK_SET_RATE_PARENT,
 			RK2928_CLKSEL_CON(3), 8, 2, MFLAGS);
 
 static struct rockchip_clk_branch rk3188_clk_branches[] __initdata = {
@@ -722,7 +724,7 @@ static struct rockchip_clk_branch rk3188_clk_branches[] __initdata = {
 	COMPOSITE_NOMUX(0, "i2s0_pre", "i2s_src", 0,
 			RK2928_CLKSEL_CON(3), 0, 7, DFLAGS,
 			RK2928_CLKGATE_CON(0), 9, GFLAGS),
-	COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", 0,
+	COMPOSITE_FRACMUX(0, "i2s0_frac", "i2s0_pre", CLK_SET_RATE_PARENT,
 			RK2928_CLKSEL_CON(7), 0,
 			RK2928_CLKGATE_CON(0), 10, GFLAGS,
 			&rk3188_i2s0_fracmux),
@@ -748,12 +750,12 @@ static const char *const rk3188_critical_clocks[] __initconst = {
 	"hclk_peri",
 	"pclk_cpu",
 	"pclk_peri",
+	"hclk_cpubus"
 };
 
 static void __init rk3188_common_clk_init(struct device_node *np)
 {
 	void __iomem *reg_base;
-	struct clk *clk;
 
 	reg_base = of_iomap(np, 0);
 	if (!reg_base) {
@@ -763,17 +765,6 @@ static void __init rk3188_common_clk_init(struct device_node *np)
 
 	rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 
-	/* xin12m is created by an cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "xin12m", "xin24m", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "usb480m", "xin24m", 0, 20, 1);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock usb480m: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	rockchip_clk_register_branches(common_clk_branches,
 				  ARRAY_SIZE(common_clk_branches));
 
diff --git a/drivers/clk/rockchip/clk-rk3228.c b/drivers/clk/rockchip/clk-rk3228.c
index 981a50205339..7702d2855e9c 100644
--- a/drivers/clk/rockchip/clk-rk3228.c
+++ b/drivers/clk/rockchip/clk-rk3228.c
@@ -187,7 +187,7 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 			RK2928_CLKGATE_CON(7), 1, GFLAGS),
 	GATE(0, "ddrc", "ddrphy_pre", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(8), 5, GFLAGS),
-	GATE(0, "ddrphy", "ddrphy_pre", CLK_IGNORE_UNUSED,
+	FACTOR_GATE(0, "ddrphy", "ddrphy4x", CLK_IGNORE_UNUSED, 1, 4,
 			RK2928_CLKGATE_CON(7), 0, GFLAGS),
 
 	/* PD_CORE */
@@ -240,13 +240,13 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 	COMPOSITE(0, "aclk_vpu_pre", mux_pll_src_4plls_p, 0,
 			RK2928_CLKSEL_CON(32), 5, 2, MFLAGS, 0, 5, DFLAGS,
 			RK2928_CLKGATE_CON(3), 11, GFLAGS),
-	GATE(0, "hclk_vpu_src", "aclk_vpu_pre", 0,
+	FACTOR_GATE(0, "hclk_vpu_pre", "aclk_vpu_pre", 0, 1, 4,
 			RK2928_CLKGATE_CON(4), 4, GFLAGS),
 
 	COMPOSITE(0, "aclk_rkvdec_pre", mux_pll_src_4plls_p, 0,
 			RK2928_CLKSEL_CON(28), 6, 2, MFLAGS, 0, 5, DFLAGS,
 			RK2928_CLKGATE_CON(3), 2, GFLAGS),
-	GATE(0, "hclk_rkvdec_src", "aclk_rkvdec_pre", 0,
+	FACTOR_GATE(0, "hclk_rkvdec_pre", "aclk_rkvdec_pre", 0, 1, 4,
 			RK2928_CLKGATE_CON(4), 5, GFLAGS),
 
 	COMPOSITE(0, "sclk_vdec_cabac", mux_pll_src_4plls_p, 0,
@@ -285,7 +285,7 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 			RK2928_CLKSEL_CON(23), 14, 2, MFLAGS, 8, 6, DFLAGS,
 			RK2928_CLKGATE_CON(3), 5, GFLAGS),
 
-	GATE(0, "sclk_hdmi_hdcp", "xin24m", 0,
+	GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0,
 			RK2928_CLKGATE_CON(3), 7, GFLAGS),
 
 	COMPOSITE(0, "sclk_hdmi_cec", mux_sclk_hdmi_cec_p, 0,
@@ -364,13 +364,15 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 			RK2928_CLKGATE_CON(3), 1, GFLAGS),
 	MUX(0, "sclk_vop_src", mux_sclk_vop_src_p, 0,
 			RK2928_CLKSEL_CON(27), 0, 1, MFLAGS),
-	DIV(0, "dclk_hdmiphy", "sclk_vop_src", 0,
+	DIV(DCLK_HDMI_PHY, "dclk_hdmiphy", "sclk_vop_src", 0,
 			RK2928_CLKSEL_CON(29), 0, 3, DFLAGS),
 	DIV(0, "sclk_vop_pre", "sclk_vop_src", 0,
 			RK2928_CLKSEL_CON(27), 8, 8, DFLAGS),
-	MUX(0, "dclk_vop", mux_dclk_vop_p, 0,
+	MUX(DCLK_VOP, "dclk_vop", mux_dclk_vop_p, 0,
 			RK2928_CLKSEL_CON(27), 1, 1, MFLAGS),
 
+	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
 	COMPOSITE(0, "i2s0_src", mux_pll_src_2plls_p, 0,
 			RK2928_CLKSEL_CON(9), 15, 1, MFLAGS, 0, 7, DFLAGS,
 			RK2928_CLKGATE_CON(0), 3, GFLAGS),
@@ -422,7 +424,7 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 	GATE(0, "sclk_otgphy1", "xin24m", 0,
 			RK2928_CLKGATE_CON(1), 6, GFLAGS),
 
-	COMPOSITE_NOMUX(0, "sclk_tsadc", "xin24m", 0,
+	COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin24m", 0,
 			RK2928_CLKSEL_CON(24), 6, 10, DFLAGS,
 			RK2928_CLKGATE_CON(2), 8, GFLAGS),
 
@@ -503,7 +505,7 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 	GATE(0, "aclk_iep", "aclk_iep_pre", 0, RK2928_CLKGATE_CON(13), 2, GFLAGS),
 	GATE(0, "aclk_iep_noc", "aclk_iep_pre", 0, RK2928_CLKGATE_CON(13), 9, GFLAGS),
 
-	GATE(0, "aclk_vop", "aclk_vop_pre", 0, RK2928_CLKGATE_CON(13), 5, GFLAGS),
+	GATE(ACLK_VOP, "aclk_vop", "aclk_vop_pre", 0, RK2928_CLKGATE_CON(13), 5, GFLAGS),
 	GATE(0, "aclk_vop_noc", "aclk_vop_pre", 0, RK2928_CLKGATE_CON(13), 12, GFLAGS),
 
 	GATE(0, "aclk_hdcp", "aclk_hdcp_pre", 0, RK2928_CLKGATE_CON(14), 10, GFLAGS),
@@ -511,13 +513,13 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 
 	GATE(0, "hclk_rga", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 1, GFLAGS),
 	GATE(0, "hclk_iep", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 3, GFLAGS),
-	GATE(0, "hclk_vop", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 6, GFLAGS),
+	GATE(HCLK_VOP, "hclk_vop", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 6, GFLAGS),
 	GATE(0, "hclk_vio_ahb_arbi", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 7, GFLAGS),
 	GATE(0, "hclk_vio_noc", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 8, GFLAGS),
 	GATE(0, "hclk_vop_noc", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(13), 13, GFLAGS),
 	GATE(0, "hclk_vio_h2p", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 7, GFLAGS),
 	GATE(0, "hclk_hdcp_mmu", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 12, GFLAGS),
-	GATE(0, "pclk_hdmi_ctrl", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 6, GFLAGS),
+	GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 6, GFLAGS),
 	GATE(0, "pclk_vio_h2p", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 8, GFLAGS),
 	GATE(0, "pclk_hdcp", "hclk_vio_pre", 0, RK2928_CLKGATE_CON(14), 11, GFLAGS),
 
@@ -582,7 +584,7 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 	GATE(PCLK_UART0, "pclk_uart0", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 12, GFLAGS),
 	GATE(PCLK_UART1, "pclk_uart1", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 13, GFLAGS),
 	GATE(PCLK_UART2, "pclk_uart2", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 14, GFLAGS),
-	GATE(0, "pclk_tsadc", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 15, GFLAGS),
+	GATE(PCLK_TSADC, "pclk_tsadc", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 15, GFLAGS),
 	GATE(PCLK_GRF, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 0, GFLAGS),
 	GATE(0, "pclk_cru", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 1, GFLAGS),
 	GATE(0, "pclk_sgrf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(10), 2, GFLAGS),
@@ -590,7 +592,7 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 
 	GATE(0, "pclk_ddrphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 3, GFLAGS),
 	GATE(0, "pclk_acodecphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 5, GFLAGS),
-	GATE(0, "pclk_hdmiphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 7, GFLAGS),
+	GATE(PCLK_HDMI_PHY, "pclk_hdmiphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 7, GFLAGS),
 	GATE(0, "pclk_vdacphy", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 8, GFLAGS),
 	GATE(0, "pclk_phy_noc", "pclk_phy_pre", 0, RK2928_CLKGATE_CON(10), 9, GFLAGS),
 
@@ -605,13 +607,13 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 
 	/* PD_MMC */
 	MMC(SCLK_SDMMC_DRV,    "sdmmc_drv",    "sclk_sdmmc", RK3228_SDMMC_CON0, 1),
-	MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3228_SDMMC_CON1, 1),
+	MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3228_SDMMC_CON1, 0),
 
 	MMC(SCLK_SDIO_DRV,     "sdio_drv",     "sclk_sdio",  RK3228_SDIO_CON0,  1),
-	MMC(SCLK_SDIO_SAMPLE,  "sdio_sample",  "sclk_sdio",  RK3228_SDIO_CON1,  1),
+	MMC(SCLK_SDIO_SAMPLE,  "sdio_sample",  "sclk_sdio",  RK3228_SDIO_CON1,  0),
 
 	MMC(SCLK_EMMC_DRV,     "emmc_drv",     "sclk_emmc",  RK3228_EMMC_CON0,  1),
-	MMC(SCLK_EMMC_SAMPLE,  "emmc_sample",  "sclk_emmc",  RK3228_EMMC_CON1,  1),
+	MMC(SCLK_EMMC_SAMPLE,  "emmc_sample",  "sclk_emmc",  RK3228_EMMC_CON1,  0),
 };
 
 static const char *const rk3228_critical_clocks[] __initconst = {
@@ -624,7 +626,6 @@ static const char *const rk3228_critical_clocks[] __initconst = {
 static void __init rk3228_clk_init(struct device_node *np)
 {
 	void __iomem *reg_base;
-	struct clk *clk;
 
 	reg_base = of_iomap(np, 0);
 	if (!reg_base) {
@@ -634,29 +635,6 @@ static void __init rk3228_clk_init(struct device_node *np)
 
 	rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 
-	/* xin12m is created by an cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "xin12m", "xin24m", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-				__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "ddrphy_pre", "ddrphy4x", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock ddrphy_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "hclk_vpu_pre",
-					"hclk_vpu_src", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock hclk_vpu_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "hclk_rkvdec_pre",
-					"hclk_rkvdec_src", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock hclk_rkvdec_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	rockchip_clk_register_plls(rk3228_pll_clks,
 				   ARRAY_SIZE(rk3228_pll_clks),
 				   RK3228_GRF_SOC_STATUS0);
diff --git a/drivers/clk/rockchip/clk-rk3288.c b/drivers/clk/rockchip/clk-rk3288.c
index 984fc187d12e..3cb72163a512 100644
--- a/drivers/clk/rockchip/clk-rk3288.c
+++ b/drivers/clk/rockchip/clk-rk3288.c
@@ -195,8 +195,8 @@ PNAME(mux_hsadcout_p)	= { "hsadc_src", "ext_hsadc" };
 PNAME(mux_edp_24m_p)	= { "ext_edp_24m", "xin24m" };
 PNAME(mux_tspout_p)	= { "cpll", "gpll", "npll", "xin27m" };
 
-PNAME(mux_usbphy480m_p)		= { "sclk_otgphy1", "sclk_otgphy2",
-				    "sclk_otgphy0" };
+PNAME(mux_usbphy480m_p)		= { "sclk_otgphy1_480m", "sclk_otgphy2_480m",
+				    "sclk_otgphy0_480m" };
 PNAME(mux_hsicphy480m_p)	= { "cpll", "gpll", "usbphy480m_src" };
 PNAME(mux_hsicphy12m_p)		= { "hsicphy12m_xin12m", "hsicphy12m_usbphy" };
 
@@ -333,6 +333,8 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = {
 	GATE(0, "aclk_bus_2pmu", "aclk_cpu_pre", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(0), 7, GFLAGS),
 
+	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
 	COMPOSITE(0, "i2s_src", mux_pll_src_cpll_gpll_p, 0,
 			RK3288_CLKSEL_CON(4), 15, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3288_CLKGATE_CON(4), 1, GFLAGS),
@@ -399,12 +401,10 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = {
 	 */
 	GATE(ACLK_VCODEC, "aclk_vcodec", "aclk_vdpu", 0,
 		RK3288_CLKGATE_CON(9), 0, GFLAGS),
-	/*
-	 * We introduce a virtul node of hclk_vodec_pre_v to split one clock
-	 * struct with a gate and a fix divider into two node in software.
-	 */
-	GATE(0, "hclk_vcodec_pre_v", "aclk_vdpu", 0,
+
+	FACTOR_GATE(0, "hclk_vcodec_pre", "aclk_vdpu", 0, 1, 4,
 		RK3288_CLKGATE_CON(3), 10, GFLAGS),
+
 	GATE(HCLK_VCODEC, "hclk_vcodec", "hclk_vcodec_pre", 0,
 		RK3288_CLKGATE_CON(9), 1, GFLAGS),
 
@@ -537,11 +537,11 @@ static struct rockchip_clk_branch rk3288_clk_branches[] __initdata = {
 			RK3288_CLKSEL_CON(35), 6, 2, MFLAGS, 0, 5, DFLAGS,
 			RK3288_CLKGATE_CON(4), 10, GFLAGS),
 
-	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "usb480m", CLK_IGNORE_UNUSED,
+	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "xin24m", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 4, GFLAGS),
-	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "usb480m", CLK_IGNORE_UNUSED,
+	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "xin24m", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 5, GFLAGS),
-	GATE(SCLK_OTGPHY2, "sclk_otgphy2", "usb480m", CLK_IGNORE_UNUSED,
+	GATE(SCLK_OTGPHY2, "sclk_otgphy2", "xin24m", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 6, GFLAGS),
 	GATE(SCLK_OTG_ADP, "sclk_otg_adp", "xin32k", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 7, GFLAGS),
@@ -888,24 +888,6 @@ static void __init rk3288_clk_init(struct device_node *np)
 
 	rockchip_clk_init(np, rk3288_cru_base, CLK_NR_CLKS);
 
-	/* xin12m is created by an cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "xin12m", "xin24m", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-			__func__, PTR_ERR(clk));
-
-
-	clk = clk_register_fixed_factor(NULL, "usb480m", "xin24m", 0, 20, 1);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock usb480m: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "hclk_vcodec_pre",
-					"hclk_vcodec_pre_v", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock hclk_vcodec_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	/* Watchdog pclk is controlled by RK3288_SGRF_SOC_CON0[1]. */
 	clk = clk_register_fixed_factor(NULL, "pclk_wdt", "pclk_pd_alive", 0, 1, 1);
 	if (IS_ERR(clk))
diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c
index 21f3ea909fab..a2bb12200465 100644
--- a/drivers/clk/rockchip/clk-rk3368.c
+++ b/drivers/clk/rockchip/clk-rk3368.c
@@ -121,7 +121,7 @@ PNAME(mux_i2s_2ch_p)		= { "i2s_2ch_src", "i2s_2ch_frac",
 				    "dummy", "xin12m" };
 PNAME(mux_spdif_8ch_p)		= { "spdif_8ch_pre", "spdif_8ch_frac",
 				    "ext_i2s", "xin12m" };
-PNAME(mux_edp_24m_p)		= { "dummy", "xin24m" };
+PNAME(mux_edp_24m_p)		= { "xin24m", "dummy" };
 PNAME(mux_vip_out_p)		= { "vip_src", "xin24m" };
 PNAME(mux_usbphy480m_p)		= { "usbotg_out", "xin24m" };
 PNAME(mux_hsic_usbphy480m_p)	= { "usbotg_out", "dummy" };
@@ -165,7 +165,7 @@ static const struct rockchip_cpuclk_reg_data rk3368_cpuclkb_data = {
 	.core_reg = RK3368_CLKSEL_CON(0),
 	.div_core_shift = 0,
 	.div_core_mask = 0x1f,
-	.mux_core_shift = 15,
+	.mux_core_shift = 7,
 };
 
 static const struct rockchip_cpuclk_reg_data rk3368_cpuclkl_data = {
@@ -218,36 +218,66 @@ static const struct rockchip_cpuclk_reg_data rk3368_cpuclkl_data = {
 	}
 
 static struct rockchip_cpuclk_rate_table rk3368_cpuclkb_rates[] __initdata = {
-	RK3368_CPUCLKB_RATE(1512000000, 2, 6, 6),
-	RK3368_CPUCLKB_RATE(1488000000, 2, 5, 5),
-	RK3368_CPUCLKB_RATE(1416000000, 2, 5, 5),
-	RK3368_CPUCLKB_RATE(1200000000, 2, 4, 4),
-	RK3368_CPUCLKB_RATE(1008000000, 2, 4, 4),
-	RK3368_CPUCLKB_RATE( 816000000, 2, 3, 3),
-	RK3368_CPUCLKB_RATE( 696000000, 2, 3, 3),
-	RK3368_CPUCLKB_RATE( 600000000, 2, 2, 2),
-	RK3368_CPUCLKB_RATE( 408000000, 2, 2, 2),
-	RK3368_CPUCLKB_RATE( 312000000, 2, 2, 2),
+	RK3368_CPUCLKB_RATE(1512000000, 1, 5, 5),
+	RK3368_CPUCLKB_RATE(1488000000, 1, 4, 4),
+	RK3368_CPUCLKB_RATE(1416000000, 1, 4, 4),
+	RK3368_CPUCLKB_RATE(1200000000, 1, 3, 3),
+	RK3368_CPUCLKB_RATE(1008000000, 1, 3, 3),
+	RK3368_CPUCLKB_RATE( 816000000, 1, 2, 2),
+	RK3368_CPUCLKB_RATE( 696000000, 1, 2, 2),
+	RK3368_CPUCLKB_RATE( 600000000, 1, 1, 1),
+	RK3368_CPUCLKB_RATE( 408000000, 1, 1, 1),
+	RK3368_CPUCLKB_RATE( 312000000, 1, 1, 1),
 };
 
 static struct rockchip_cpuclk_rate_table rk3368_cpuclkl_rates[] __initdata = {
-	RK3368_CPUCLKL_RATE(1512000000, 2, 7, 7),
-	RK3368_CPUCLKL_RATE(1488000000, 2, 6, 6),
-	RK3368_CPUCLKL_RATE(1416000000, 2, 6, 6),
-	RK3368_CPUCLKL_RATE(1200000000, 2, 5, 5),
-	RK3368_CPUCLKL_RATE(1008000000, 2, 5, 5),
-	RK3368_CPUCLKL_RATE( 816000000, 2, 4, 4),
-	RK3368_CPUCLKL_RATE( 696000000, 2, 3, 3),
-	RK3368_CPUCLKL_RATE( 600000000, 2, 3, 3),
-	RK3368_CPUCLKL_RATE( 408000000, 2, 2, 2),
-	RK3368_CPUCLKL_RATE( 312000000, 2, 2, 2),
+	RK3368_CPUCLKL_RATE(1512000000, 1, 6, 6),
+	RK3368_CPUCLKL_RATE(1488000000, 1, 5, 5),
+	RK3368_CPUCLKL_RATE(1416000000, 1, 5, 5),
+	RK3368_CPUCLKL_RATE(1200000000, 1, 4, 4),
+	RK3368_CPUCLKL_RATE(1008000000, 1, 4, 4),
+	RK3368_CPUCLKL_RATE( 816000000, 1, 3, 3),
+	RK3368_CPUCLKL_RATE( 696000000, 1, 2, 2),
+	RK3368_CPUCLKL_RATE( 600000000, 1, 2, 2),
+	RK3368_CPUCLKL_RATE( 408000000, 1, 1, 1),
+	RK3368_CPUCLKL_RATE( 312000000, 1, 1, 1),
 };
 
+static struct rockchip_clk_branch rk3368_i2s_8ch_fracmux __initdata =
+	MUX(0, "i2s_8ch_pre", mux_i2s_8ch_pre_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(27), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_spdif_8ch_fracmux __initdata =
+	MUX(0, "spdif_8ch_pre", mux_spdif_8ch_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(31), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_i2s_2ch_fracmux __initdata =
+	MUX(0, "i2s_2ch_pre", mux_i2s_2ch_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(53), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_uart0_fracmux __initdata =
+	MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(33), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_uart1_fracmux __initdata =
+	MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(35), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_uart3_fracmux __initdata =
+	MUX(SCLK_UART3, "sclk_uart3", mux_uart3_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(39), 8, 2, MFLAGS);
+
+static struct rockchip_clk_branch rk3368_uart4_fracmux __initdata =
+	MUX(SCLK_UART4, "sclk_uart4", mux_uart4_p, CLK_SET_RATE_PARENT,
+	    RK3368_CLKSEL_CON(41), 8, 2, MFLAGS);
+
 static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	/*
 	 * Clock-Architecture Diagram 2
 	 */
 
+	FACTOR(0, "xin12m", "xin24m", 0, 1, 2),
+
 	MUX(SCLK_USBPHY480M, "usbphy_480m", mux_usbphy480m_p, CLK_SET_RATE_PARENT,
 			RK3368_CLKSEL_CON(13), 8, 1, MFLAGS),
 
@@ -299,7 +329,7 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	COMPOSITE_NOGATE_DIVTBL(0, "ddrphy_src", mux_ddrphy_p, CLK_IGNORE_UNUSED,
 			RK3368_CLKSEL_CON(13), 4, 1, MFLAGS, 0, 2, DFLAGS, div_ddrphy_t),
 
-	GATE(0, "sclk_ddr", "ddrphy_div4", CLK_IGNORE_UNUSED,
+	FACTOR_GATE(0, "sclk_ddr", "ddrphy_src", CLK_IGNORE_UNUSED, 1, 4,
 			RK3368_CLKGATE_CON(6), 14, GFLAGS),
 	GATE(0, "sclk_ddr4x", "ddrphy_src", CLK_IGNORE_UNUSED,
 			RK3368_CLKGATE_CON(6), 15, GFLAGS),
@@ -337,11 +367,10 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	COMPOSITE(0, "i2s_8ch_src", mux_pll_src_cpll_gpll_p, 0,
 			RK3368_CLKSEL_CON(27), 12, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(6), 1, GFLAGS),
-	COMPOSITE_FRAC(0, "i2s_8ch_frac", "i2s_8ch_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(28), 0,
-			RK3368_CLKGATE_CON(6), 2, GFLAGS),
-	MUX(0, "i2s_8ch_pre", mux_i2s_8ch_pre_p, CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(27), 8, 2, MFLAGS),
+	COMPOSITE_FRACMUX(0, "i2s_8ch_frac", "i2s_8ch_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(28), 0,
+			  RK3368_CLKGATE_CON(6), 2, GFLAGS,
+			  &rk3368_i2s_8ch_fracmux),
 	COMPOSITE_NODIV(SCLK_I2S_8CH_OUT, "i2s_8ch_clkout", mux_i2s_8ch_clkout_p, 0,
 			RK3368_CLKSEL_CON(27), 15, 1, MFLAGS,
 			RK3368_CLKGATE_CON(6), 0, GFLAGS),
@@ -350,21 +379,21 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	COMPOSITE(0, "spdif_8ch_src", mux_pll_src_cpll_gpll_p, 0,
 			RK3368_CLKSEL_CON(31), 12, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(6), 4, GFLAGS),
-	COMPOSITE_FRAC(0, "spdif_8ch_frac", "spdif_8ch_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(32), 0,
-			RK3368_CLKGATE_CON(6), 5, GFLAGS),
-	COMPOSITE_NODIV(SCLK_SPDIF_8CH, "sclk_spdif_8ch", mux_spdif_8ch_p, 0,
-			RK3368_CLKSEL_CON(31), 8, 2, MFLAGS,
-			RK3368_CLKGATE_CON(6), 6, GFLAGS),
+	COMPOSITE_FRACMUX(0, "spdif_8ch_frac", "spdif_8ch_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(32), 0,
+			  RK3368_CLKGATE_CON(6), 5, GFLAGS,
+			  &rk3368_spdif_8ch_fracmux),
+	GATE(SCLK_SPDIF_8CH, "sclk_spdif_8ch", "spdif_8ch_pre", CLK_SET_RATE_PARENT,
+	     RK3368_CLKGATE_CON(6), 6, GFLAGS),
 	COMPOSITE(0, "i2s_2ch_src", mux_pll_src_cpll_gpll_p, 0,
 			RK3368_CLKSEL_CON(53), 12, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(5), 13, GFLAGS),
-	COMPOSITE_FRAC(0, "i2s_2ch_frac", "i2s_2ch_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(54), 0,
-			RK3368_CLKGATE_CON(5), 14, GFLAGS),
-	COMPOSITE_NODIV(SCLK_I2S_2CH, "sclk_i2s_2ch", mux_i2s_2ch_p, 0,
-			RK3368_CLKSEL_CON(53), 8, 2, MFLAGS,
-			RK3368_CLKGATE_CON(5), 15, GFLAGS),
+	COMPOSITE_FRACMUX(0, "i2s_2ch_frac", "i2s_2ch_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(54), 0,
+			  RK3368_CLKGATE_CON(5), 14, GFLAGS,
+			  &rk3368_i2s_2ch_fracmux),
+	GATE(SCLK_I2S_2CH, "sclk_i2s_2ch", "i2s_2ch_pre", CLK_SET_RATE_PARENT,
+	     RK3368_CLKGATE_CON(5), 15, GFLAGS),
 
 	COMPOSITE(0, "sclk_tsp", mux_pll_src_cpll_gpll_npll_p, 0,
 			RK3368_CLKSEL_CON(46), 6, 2, MFLAGS, 0, 5, DFLAGS,
@@ -384,18 +413,18 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	 * Clock-Architecture Diagram 3
 	 */
 
-	COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_usb_p, 0,
+	COMPOSITE(0, "aclk_vepu", mux_pll_src_cpll_gpll_npll_usb_p, 0,
 			RK3368_CLKSEL_CON(15), 6, 2, MFLAGS, 0, 5, DFLAGS,
 			RK3368_CLKGATE_CON(4), 6, GFLAGS),
-	COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_usb_p, 0,
+	COMPOSITE(0, "aclk_vdpu", mux_pll_src_cpll_gpll_npll_usb_p, 0,
 			RK3368_CLKSEL_CON(15), 14, 2, MFLAGS, 8, 5, DFLAGS,
 			RK3368_CLKGATE_CON(4), 7, GFLAGS),
 
 	/*
-	 * We introduce a virtual node of hclk_vodec_pre_v to split one clock
-	 * struct with a gate and a fix divider into two node in software.
+	 * We use aclk_vdpu by default ---GRF_SOC_CON0[7] setting in system,
+	 * so we ignore the mux and make clocks nodes as following,
 	 */
-	GATE(0, "hclk_video_pre_v", "aclk_vdpu", 0,
+	FACTOR_GATE(0, "hclk_video_pre", "aclk_vdpu", 0, 1, 4,
 		RK3368_CLKGATE_CON(4), 8, GFLAGS),
 
 	COMPOSITE(0, "sclk_hevc_cabac_src", mux_pll_src_cpll_gpll_npll_usb_p, 0,
@@ -442,7 +471,7 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	GATE(SCLK_HDMI_HDCP, "sclk_hdmi_hdcp", "xin24m", 0,
 			RK3368_CLKGATE_CON(4), 13, GFLAGS),
 	GATE(SCLK_HDMI_CEC, "sclk_hdmi_cec", "xin32k", 0,
-			RK3368_CLKGATE_CON(5), 12, GFLAGS),
+			RK3368_CLKGATE_CON(4), 12, GFLAGS),
 
 	COMPOSITE_NODIV(0, "vip_src", mux_pll_src_cpll_gpll_p, 0,
 			RK3368_CLKSEL_CON(21), 15, 1, MFLAGS,
@@ -560,38 +589,34 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = {
 	COMPOSITE(0, "uart0_src", mux_pll_src_cpll_gpll_usb_usb_p, 0,
 			RK3368_CLKSEL_CON(33), 12, 2, MFLAGS, 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(2), 0, GFLAGS),
-	COMPOSITE_FRAC(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(34), 0,
-			RK3368_CLKGATE_CON(2), 1, GFLAGS),
-	MUX(SCLK_UART0, "sclk_uart0", mux_uart0_p, CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(33), 8, 2, MFLAGS),
+	COMPOSITE_FRACMUX(0, "uart0_frac", "uart0_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(34), 0,
+			  RK3368_CLKGATE_CON(2), 1, GFLAGS,
+			  &rk3368_uart0_fracmux),
 
 	COMPOSITE_NOMUX(0, "uart1_src", "uart_src", 0,
 			RK3368_CLKSEL_CON(35), 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(2), 2, GFLAGS),
-	COMPOSITE_FRAC(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(36), 0,
-			RK3368_CLKGATE_CON(2), 3, GFLAGS),
-	MUX(SCLK_UART1, "sclk_uart1", mux_uart1_p, CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(35), 8, 2, MFLAGS),
+	COMPOSITE_FRACMUX(0, "uart1_frac", "uart1_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(36), 0,
+			  RK3368_CLKGATE_CON(2), 3, GFLAGS,
+			  &rk3368_uart1_fracmux),
 
 	COMPOSITE_NOMUX(0, "uart3_src", "uart_src", 0,
 			RK3368_CLKSEL_CON(39), 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(2), 6, GFLAGS),
-	COMPOSITE_FRAC(0, "uart3_frac", "uart3_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(40), 0,
-			RK3368_CLKGATE_CON(2), 7, GFLAGS),
-	MUX(SCLK_UART3, "sclk_uart3", mux_uart3_p, CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(39), 8, 2, MFLAGS),
+	COMPOSITE_FRACMUX(0, "uart3_frac", "uart3_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(40), 0,
+			  RK3368_CLKGATE_CON(2), 7, GFLAGS,
+			  &rk3368_uart3_fracmux),
 
 	COMPOSITE_NOMUX(0, "uart4_src", "uart_src", 0,
 			RK3368_CLKSEL_CON(41), 0, 7, DFLAGS,
 			RK3368_CLKGATE_CON(2), 8, GFLAGS),
-	COMPOSITE_FRAC(0, "uart4_frac", "uart4_src", CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(42), 0,
-			RK3368_CLKGATE_CON(2), 9, GFLAGS),
-	MUX(SCLK_UART4, "sclk_uart4", mux_uart4_p, CLK_SET_RATE_PARENT,
-			RK3368_CLKSEL_CON(41), 8, 2, MFLAGS),
+	COMPOSITE_FRACMUX(0, "uart4_frac", "uart4_src", CLK_SET_RATE_PARENT,
+			  RK3368_CLKSEL_CON(42), 0,
+			  RK3368_CLKGATE_CON(2), 9, GFLAGS,
+			  &rk3368_uart4_fracmux),
 
 	COMPOSITE(0, "mac_pll_src", mux_pll_src_npll_cpll_gpll_p, 0,
 			RK3368_CLKSEL_CON(43), 6, 2, MFLAGS, 0, 5, DFLAGS,
@@ -842,24 +867,6 @@ static void __init rk3368_clk_init(struct device_node *np)
 
 	rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 
-	/* xin12m is created by a cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "xin12m", "xin24m", 0, 1, 2);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	/* ddrphy_div4 is created by a cru-internal divider */
-	clk = clk_register_fixed_factor(NULL, "ddrphy_div4", "ddrphy_src", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock xin12m: %ld\n",
-			__func__, PTR_ERR(clk));
-
-	clk = clk_register_fixed_factor(NULL, "hclk_video_pre",
-					"hclk_video_pre_v", 0, 1, 4);
-	if (IS_ERR(clk))
-		pr_warn("%s: could not register clock hclk_vcodec_pre: %ld\n",
-			__func__, PTR_ERR(clk));
-
 	/* Watchdog pclk is controlled by sgrf_soc_con3[7]. */
 	clk = clk_register_fixed_factor(NULL, "pclk_wdt", "pclk_pd_alive", 0, 1, 1);
 	if (IS_ERR(clk))
diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index d9a0b5d4d47f..ec06350c78c4 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -70,7 +70,7 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 	if (gate_offset >= 0) {
 		gate = kzalloc(sizeof(*gate), GFP_KERNEL);
 		if (!gate)
-			return ERR_PTR(-ENOMEM);
+			goto err_gate;
 
 		gate->flags = gate_flags;
 		gate->reg = base + gate_offset;
@@ -82,7 +82,7 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 	if (div_width > 0) {
 		div = kzalloc(sizeof(*div), GFP_KERNEL);
 		if (!div)
-			return ERR_PTR(-ENOMEM);
+			goto err_div;
 
 		div->flags = div_flags;
 		div->reg = base + muxdiv_offset;
@@ -90,7 +90,9 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 		div->width = div_width;
 		div->lock = lock;
 		div->table = div_table;
-		div_ops = &clk_divider_ops;
+		div_ops = (div_flags & CLK_DIVIDER_READ_ONLY)
+						? &clk_divider_ro_ops
+						: &clk_divider_ops;
 	}
 
 	clk = clk_register_composite(NULL, name, parent_names, num_parents,
@@ -100,6 +102,11 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 				     flags);
 
 	return clk;
+err_div:
+	kfree(gate);
+err_gate:
+	kfree(mux);
+	return ERR_PTR(-ENOMEM);
 }
 
 struct rockchip_clk_frac {
@@ -260,6 +267,53 @@ static struct clk *rockchip_clk_register_frac_branch(const char *name,
 	return clk;
 }
 
+static struct clk *rockchip_clk_register_factor_branch(const char *name,
+		const char *const *parent_names, u8 num_parents,
+		void __iomem *base, unsigned int mult, unsigned int div,
+		int gate_offset, u8 gate_shift, u8 gate_flags,
+		unsigned long flags, spinlock_t *lock)
+{
+	struct clk *clk;
+	struct clk_gate *gate = NULL;
+	struct clk_fixed_factor *fix = NULL;
+
+	/* without gate, register a simple factor clock */
+	if (gate_offset == 0) {
+		return clk_register_fixed_factor(NULL, name,
+				parent_names[0], flags, mult,
+				div);
+	}
+
+	gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		return ERR_PTR(-ENOMEM);
+
+	gate->flags = gate_flags;
+	gate->reg = base + gate_offset;
+	gate->bit_idx = gate_shift;
+	gate->lock = lock;
+
+	fix = kzalloc(sizeof(*fix), GFP_KERNEL);
+	if (!fix) {
+		kfree(gate);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fix->mult = mult;
+	fix->div = div;
+
+	clk = clk_register_composite(NULL, name, parent_names, num_parents,
+				     NULL, NULL,
+				     &fix->hw, &clk_fixed_factor_ops,
+				     &gate->hw, &clk_gate_ops, flags);
+	if (IS_ERR(clk)) {
+		kfree(fix);
+		kfree(gate);
+	}
+
+	return clk;
+}
+
 static DEFINE_SPINLOCK(clk_lock);
 static struct clk **clk_table;
 static void __iomem *reg_base;
@@ -395,6 +449,14 @@ void __init rockchip_clk_register_branches(
 				reg_base + list->muxdiv_offset,
 				list->div_shift, list->div_flags, &clk_lock);
 			break;
+		case branch_factor:
+			clk = rockchip_clk_register_factor_branch(
+				list->name, list->parent_names,
+				list->num_parents, reg_base,
+				list->div_shift, list->div_width,
+				list->gate_offset, list->gate_shift,
+				list->gate_flags, flags, &clk_lock);
+			break;
 		}
 
 		/* none of the cases above matched */
diff --git a/drivers/clk/rockchip/clk.h b/drivers/clk/rockchip/clk.h
index ff8bd23a93ec..39c198bbcbee 100644
--- a/drivers/clk/rockchip/clk.h
+++ b/drivers/clk/rockchip/clk.h
@@ -254,6 +254,7 @@ enum rockchip_clk_branch_type {
 	branch_gate,
 	branch_mmc,
 	branch_inverter,
+	branch_factor,
 };
 
 struct rockchip_clk_branch {
@@ -508,6 +509,33 @@ struct rockchip_clk_branch {
 		.div_flags	= if,				\
 	}
 
+#define FACTOR(_id, cname, pname,  f, fm, fd)			\
+	{							\
+		.id		= _id,				\
+		.branch_type	= branch_factor,		\
+		.name		= cname,			\
+		.parent_names	= (const char *[]){ pname },	\
+		.num_parents	= 1,				\
+		.flags		= f,				\
+		.div_shift	= fm,				\
+		.div_width	= fd,				\
+	}
+
+#define FACTOR_GATE(_id, cname, pname,  f, fm, fd, go, gb, gf)	\
+	{							\
+		.id		= _id,				\
+		.branch_type	= branch_factor,		\
+		.name		= cname,			\
+		.parent_names	= (const char *[]){ pname },	\
+		.num_parents	= 1,				\
+		.flags		= f,				\
+		.div_shift	= fm,				\
+		.div_width	= fd,				\
+		.gate_offset	= go,				\
+		.gate_shift	= gb,				\
+		.gate_flags	= gf,				\
+	}
+
 void rockchip_clk_init(struct device_node *np, void __iomem *base,
 		       unsigned long nr_clks);
 struct regmap *rockchip_clk_get_grf(void);
diff --git a/drivers/clk/samsung/Kconfig b/drivers/clk/samsung/Kconfig
index 84196ecdaa12..20c5fe92ab4a 100644
--- a/drivers/clk/samsung/Kconfig
+++ b/drivers/clk/samsung/Kconfig
@@ -1,9 +1,17 @@
+# Recent Exynos platforms should just select COMMON_CLK_SAMSUNG:
 config COMMON_CLK_SAMSUNG
-	bool
-	select COMMON_CLK
+	bool "Samsung Exynos clock controller support" if COMPILE_TEST
+	# Clocks on ARM64 SoCs (e.g. Exynos5433, Exynos7) are chosen by
+	# EXYNOS_ARM64_COMMON_CLK to avoid building them on ARMv7:
+	select EXYNOS_ARM64_COMMON_CLK if ARM64 && ARCH_EXYNOS
+
+config EXYNOS_ARM64_COMMON_CLK
+	bool "Samsung Exynos ARMv8-family clock controller support" if COMPILE_TEST
+	depends on COMMON_CLK_SAMSUNG
 
+# For S3C24XX platforms, select following symbols:
 config S3C2410_COMMON_CLK
-	bool
+	bool "Samsung S3C2410 clock controller support" if COMPILE_TEST
 	select COMMON_CLK_SAMSUNG
 	help
 	  Build the s3c2410 clock driver based on the common clock framework.
@@ -17,10 +25,9 @@ config S3C2410_COMMON_DCLK
 	  framework.
 
 config S3C2412_COMMON_CLK
-	bool
+	bool "Samsung S3C2412 clock controller support" if COMPILE_TEST
 	select COMMON_CLK_SAMSUNG
 
 config S3C2443_COMMON_CLK
-	bool
+	bool "Samsung S3C2443 clock controller support" if COMPILE_TEST
 	select COMMON_CLK_SAMSUNG
-
diff --git a/drivers/clk/samsung/Makefile b/drivers/clk/samsung/Makefile
index 5f6833ea355d..fc367d4b2902 100644
--- a/drivers/clk/samsung/Makefile
+++ b/drivers/clk/samsung/Makefile
@@ -10,11 +10,11 @@ obj-$(CONFIG_SOC_EXYNOS5250)	+= clk-exynos5250.o
 obj-$(CONFIG_SOC_EXYNOS5260)	+= clk-exynos5260.o
 obj-$(CONFIG_SOC_EXYNOS5410)	+= clk-exynos5410.o
 obj-$(CONFIG_SOC_EXYNOS5420)	+= clk-exynos5420.o
-obj-$(CONFIG_ARCH_EXYNOS)	+= clk-exynos5433.o
+obj-$(CONFIG_EXYNOS_ARM64_COMMON_CLK)	+= clk-exynos5433.o
 obj-$(CONFIG_SOC_EXYNOS5440)	+= clk-exynos5440.o
 obj-$(CONFIG_ARCH_EXYNOS)	+= clk-exynos-audss.o
 obj-$(CONFIG_ARCH_EXYNOS)	+= clk-exynos-clkout.o
-obj-$(CONFIG_ARCH_EXYNOS7)	+= clk-exynos7.o
+obj-$(CONFIG_EXYNOS_ARM64_COMMON_CLK)	+= clk-exynos7.o
 obj-$(CONFIG_S3C2410_COMMON_CLK)+= clk-s3c2410.o
 obj-$(CONFIG_S3C2410_COMMON_DCLK)+= clk-s3c2410-dclk.o
 obj-$(CONFIG_S3C2412_COMMON_CLK)+= clk-s3c2412.o
diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c
index ac03e4fe2871..7b3d0f975987 100644
--- a/drivers/clk/samsung/clk-exynos4.c
+++ b/drivers/clk/samsung/clk-exynos4.c
@@ -500,19 +500,19 @@ PNAME(clkout_cpu_p4x12) = { "fout_apll_div_2", "none", "none", "none",
 
 /* fixed rate clocks generated outside the soc */
 static struct samsung_fixed_rate_clock exynos4_fixed_rate_ext_clks[] __initdata = {
-	FRATE(CLK_XXTI, "xxti", NULL, CLK_IS_ROOT, 0),
-	FRATE(CLK_XUSBXTI, "xusbxti", NULL, CLK_IS_ROOT, 0),
+	FRATE(CLK_XXTI, "xxti", NULL, 0, 0),
+	FRATE(CLK_XUSBXTI, "xusbxti", NULL, 0, 0),
 };
 
 /* fixed rate clocks generated inside the soc */
 static struct samsung_fixed_rate_clock exynos4_fixed_rate_clks[] __initdata = {
-	FRATE(0, "sclk_hdmi24m", NULL, CLK_IS_ROOT, 24000000),
+	FRATE(0, "sclk_hdmi24m", NULL, 0, 24000000),
 	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", "hdmi", 0, 27000000),
-	FRATE(0, "sclk_usbphy0", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(0, "sclk_usbphy0", NULL, 0, 48000000),
 };
 
 static struct samsung_fixed_rate_clock exynos4210_fixed_rate_clks[] __initdata = {
-	FRATE(0, "sclk_usbphy1", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(0, "sclk_usbphy1", NULL, 0, 48000000),
 };
 
 static struct samsung_fixed_factor_clock exynos4_fixed_factor_clks[] __initdata = {
@@ -1251,7 +1251,7 @@ static void __init exynos4_clk_register_finpll(struct samsung_clk_provider *ctx)
 	fclk.id = CLK_FIN_PLL;
 	fclk.name = "fin_pll";
 	fclk.parent_name = NULL;
-	fclk.flags = CLK_IS_ROOT;
+	fclk.flags = 0;
 	fclk.fixed_rate = finpll_f;
 	samsung_clk_register_fixed_rate(ctx, &fclk, 1);
 
diff --git a/drivers/clk/samsung/clk-exynos4415.c b/drivers/clk/samsung/clk-exynos4415.c
index 92c39f6efec8..86ee06b226bd 100644
--- a/drivers/clk/samsung/clk-exynos4415.c
+++ b/drivers/clk/samsung/clk-exynos4415.c
@@ -274,7 +274,7 @@ static struct samsung_fixed_factor_clock exynos4415_fixed_factor_clks[] __initda
 };
 
 static struct samsung_fixed_rate_clock exynos4415_fixed_rate_clks[] __initdata = {
-	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 27000000),
+	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, 0, 27000000),
 };
 
 static struct samsung_mux_clock exynos4415_mux_clks[] __initdata = {
diff --git a/drivers/clk/samsung/clk-exynos5250.c b/drivers/clk/samsung/clk-exynos5250.c
index 5bebf8cb0d70..837197db4ffb 100644
--- a/drivers/clk/samsung/clk-exynos5250.c
+++ b/drivers/clk/samsung/clk-exynos5250.c
@@ -262,15 +262,15 @@ PNAME(mout_spdif_p)	= { "sclk_audio0", "sclk_audio1", "sclk_audio2",
 
 /* fixed rate clocks generated outside the soc */
 static struct samsung_fixed_rate_clock exynos5250_fixed_rate_ext_clks[] __initdata = {
-	FRATE(CLK_FIN_PLL, "fin_pll", NULL, CLK_IS_ROOT, 0),
+	FRATE(CLK_FIN_PLL, "fin_pll", NULL, 0, 0),
 };
 
 /* fixed rate clocks generated inside the soc */
 static struct samsung_fixed_rate_clock exynos5250_fixed_rate_clks[] __initdata = {
-	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 24000000),
-	FRATE(0, "sclk_hdmi27m", NULL, CLK_IS_ROOT, 27000000),
-	FRATE(0, "sclk_dptxphy", NULL, CLK_IS_ROOT, 24000000),
-	FRATE(0, "sclk_uhostphy", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, 0, 24000000),
+	FRATE(0, "sclk_hdmi27m", NULL, 0, 27000000),
+	FRATE(0, "sclk_dptxphy", NULL, 0, 24000000),
+	FRATE(0, "sclk_uhostphy", NULL, 0, 48000000),
 };
 
 static struct samsung_fixed_factor_clock exynos5250_fixed_factor_clks[] __initdata = {
diff --git a/drivers/clk/samsung/clk-exynos5260.c b/drivers/clk/samsung/clk-exynos5260.c
index d1a29f6c1084..7a7ed075a573 100644
--- a/drivers/clk/samsung/clk-exynos5260.c
+++ b/drivers/clk/samsung/clk-exynos5260.c
@@ -1432,42 +1432,38 @@ static unsigned long top_clk_regs[] __initdata = {
 /* fixed rate clocks generated inside the soc */
 static struct samsung_fixed_rate_clock fixed_rate_clks[] __initdata = {
 	FRATE(PHYCLK_DPTX_PHY_CH3_TXD_CLK, "phyclk_dptx_phy_ch3_txd_clk", NULL,
-			CLK_IS_ROOT, 270000000),
+			0, 270000000),
 	FRATE(PHYCLK_DPTX_PHY_CH2_TXD_CLK, "phyclk_dptx_phy_ch2_txd_clk", NULL,
-			CLK_IS_ROOT, 270000000),
+			0, 270000000),
 	FRATE(PHYCLK_DPTX_PHY_CH1_TXD_CLK, "phyclk_dptx_phy_ch1_txd_clk", NULL,
-			CLK_IS_ROOT, 270000000),
+			0, 270000000),
 	FRATE(PHYCLK_DPTX_PHY_CH0_TXD_CLK, "phyclk_dptx_phy_ch0_txd_clk", NULL,
-			CLK_IS_ROOT, 270000000),
+			0, 270000000),
 	FRATE(phyclk_hdmi_phy_tmds_clko, "phyclk_hdmi_phy_tmds_clko", NULL,
-			CLK_IS_ROOT, 250000000),
+			0, 250000000),
 	FRATE(PHYCLK_HDMI_PHY_PIXEL_CLKO, "phyclk_hdmi_phy_pixel_clko", NULL,
-			CLK_IS_ROOT, 1660000000),
+			0, 1660000000),
 	FRATE(PHYCLK_HDMI_LINK_O_TMDS_CLKHI, "phyclk_hdmi_link_o_tmds_clkhi",
-			NULL, CLK_IS_ROOT, 125000000),
+			NULL, 0, 125000000),
 	FRATE(PHYCLK_MIPI_DPHY_4L_M_TXBYTECLKHS,
 			"phyclk_mipi_dphy_4l_m_txbyte_clkhs" , NULL,
-			CLK_IS_ROOT, 187500000),
+			0, 187500000),
 	FRATE(PHYCLK_DPTX_PHY_O_REF_CLK_24M, "phyclk_dptx_phy_o_ref_clk_24m",
-			NULL, CLK_IS_ROOT, 24000000),
+			NULL, 0, 24000000),
 	FRATE(PHYCLK_DPTX_PHY_CLK_DIV2, "phyclk_dptx_phy_clk_div2", NULL,
-			CLK_IS_ROOT, 135000000),
+			0, 135000000),
 	FRATE(PHYCLK_MIPI_DPHY_4L_M_RXCLKESC0,
-			"phyclk_mipi_dphy_4l_m_rxclkesc0", NULL,
-			CLK_IS_ROOT, 20000000),
+			"phyclk_mipi_dphy_4l_m_rxclkesc0", NULL, 0, 20000000),
 	FRATE(PHYCLK_USBHOST20_PHY_PHYCLOCK, "phyclk_usbhost20_phy_phyclock",
-			NULL, CLK_IS_ROOT, 60000000),
+			NULL, 0, 60000000),
 	FRATE(PHYCLK_USBHOST20_PHY_FREECLK, "phyclk_usbhost20_phy_freeclk",
-			NULL, CLK_IS_ROOT, 60000000),
+			NULL, 0, 60000000),
 	FRATE(PHYCLK_USBHOST20_PHY_CLK48MOHCI,
-			"phyclk_usbhost20_phy_clk48mohci",
-			NULL, CLK_IS_ROOT, 48000000),
+			"phyclk_usbhost20_phy_clk48mohci", NULL, 0, 48000000),
 	FRATE(PHYCLK_USBDRD30_UDRD30_PIPE_PCLK,
-			"phyclk_usbdrd30_udrd30_pipe_pclk", NULL,
-			CLK_IS_ROOT, 125000000),
+			"phyclk_usbdrd30_udrd30_pipe_pclk", NULL, 0, 125000000),
 	FRATE(PHYCLK_USBDRD30_UDRD30_PHYCLOCK,
-			"phyclk_usbdrd30_udrd30_phyclock", NULL,
-			CLK_IS_ROOT, 60000000),
+			"phyclk_usbdrd30_udrd30_phyclock", NULL, 0, 60000000),
 };
 
 PNAME(mout_memtop_pll_user_p) = {"fin_pll", "dout_mem_pll"};
diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c
index d048dedd8b72..be03ed0fcb6b 100644
--- a/drivers/clk/samsung/clk-exynos5420.c
+++ b/drivers/clk/samsung/clk-exynos5420.c
@@ -480,16 +480,16 @@ PNAME(mout_group15_5800_p)	= { "dout_osc_div", "mout_sw_aclk550_cam" };
 /* fixed rate clocks generated outside the soc */
 static struct samsung_fixed_rate_clock
 		exynos5x_fixed_rate_ext_clks[] __initdata = {
-	FRATE(CLK_FIN_PLL, "fin_pll", NULL, CLK_IS_ROOT, 0),
+	FRATE(CLK_FIN_PLL, "fin_pll", NULL, 0, 0),
 };
 
 /* fixed rate clocks generated inside the soc */
 static struct samsung_fixed_rate_clock exynos5x_fixed_rate_clks[] __initdata = {
-	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 24000000),
-	FRATE(0, "sclk_pwi", NULL, CLK_IS_ROOT, 24000000),
-	FRATE(0, "sclk_usbh20", NULL, CLK_IS_ROOT, 48000000),
-	FRATE(0, "mphy_refclk_ixtal24", NULL, CLK_IS_ROOT, 48000000),
-	FRATE(0, "sclk_usbh20_scan_clk", NULL, CLK_IS_ROOT, 480000000),
+	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, 0, 24000000),
+	FRATE(0, "sclk_pwi", NULL, 0, 24000000),
+	FRATE(0, "sclk_usbh20", NULL, 0, 48000000),
+	FRATE(0, "mphy_refclk_ixtal24", NULL, 0, 48000000),
+	FRATE(0, "sclk_usbh20_scan_clk", NULL, 0, 480000000),
 };
 
 static struct samsung_fixed_factor_clock
diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c
index cee062c588de..128527b8fbeb 100644
--- a/drivers/clk/samsung/clk-exynos5433.c
+++ b/drivers/clk/samsung/clk-exynos5433.c
@@ -142,17 +142,6 @@ static unsigned long top_clk_regs[] __initdata = {
 	MUX_ENABLE_TOP_FSYS1,
 	MUX_ENABLE_TOP_PERIC0,
 	MUX_ENABLE_TOP_PERIC1,
-	MUX_STAT_TOP0,
-	MUX_STAT_TOP1,
-	MUX_STAT_TOP2,
-	MUX_STAT_TOP3,
-	MUX_STAT_TOP4,
-	MUX_STAT_TOP_MSCL,
-	MUX_STAT_TOP_CAM1,
-	MUX_STAT_TOP_FSYS0,
-	MUX_STAT_TOP_FSYS1,
-	MUX_STAT_TOP_PERIC0,
-	MUX_STAT_TOP_PERIC1,
 	DIV_TOP0,
 	DIV_TOP1,
 	DIV_TOP2,
@@ -170,22 +159,6 @@ static unsigned long top_clk_regs[] __initdata = {
 	DIV_TOP_PERIC3,
 	DIV_TOP_PERIC4,
 	DIV_TOP_PLL_FREQ_DET,
-	DIV_STAT_TOP0,
-	DIV_STAT_TOP1,
-	DIV_STAT_TOP2,
-	DIV_STAT_TOP3,
-	DIV_STAT_TOP4,
-	DIV_STAT_TOP_MSCL,
-	DIV_STAT_TOP_CAM10,
-	DIV_STAT_TOP_CAM11,
-	DIV_STAT_TOP_FSYS0,
-	DIV_STAT_TOP_FSYS1,
-	DIV_STAT_TOP_FSYS2,
-	DIV_STAT_TOP_PERIC0,
-	DIV_STAT_TOP_PERIC1,
-	DIV_STAT_TOP_PERIC2,
-	DIV_STAT_TOP_PERIC3,
-	DIV_STAT_TOP_PLL_FREQ_DET,
 	ENABLE_ACLK_TOP,
 	ENABLE_SCLK_TOP,
 	ENABLE_SCLK_TOP_MSCL,
@@ -251,18 +224,18 @@ static struct samsung_fixed_factor_clock top_fixed_factor_clks[] __initdata = {
 
 static struct samsung_fixed_rate_clock top_fixed_clks[] __initdata = {
 	/* Xi2s{0|1}CDCLK input clock for I2S/PCM */
-	FRATE(0, "ioclk_audiocdclk1", NULL, CLK_IS_ROOT, 100000000),
-	FRATE(0, "ioclk_audiocdclk0", NULL, CLK_IS_ROOT, 100000000),
+	FRATE(0, "ioclk_audiocdclk1", NULL, 0, 100000000),
+	FRATE(0, "ioclk_audiocdclk0", NULL, 0, 100000000),
 	/* Xi2s1SDI input clock for SPDIF */
-	FRATE(0, "ioclk_spdif_extclk", NULL, CLK_IS_ROOT, 100000000),
+	FRATE(0, "ioclk_spdif_extclk", NULL, 0, 100000000),
 	/* XspiCLK[4:0] input clock for SPI */
-	FRATE(0, "ioclk_spi4_clk_in", NULL, CLK_IS_ROOT, 50000000),
-	FRATE(0, "ioclk_spi3_clk_in", NULL, CLK_IS_ROOT, 50000000),
-	FRATE(0, "ioclk_spi2_clk_in", NULL, CLK_IS_ROOT, 50000000),
-	FRATE(0, "ioclk_spi1_clk_in", NULL, CLK_IS_ROOT, 50000000),
-	FRATE(0, "ioclk_spi0_clk_in", NULL, CLK_IS_ROOT, 50000000),
+	FRATE(0, "ioclk_spi4_clk_in", NULL, 0, 50000000),
+	FRATE(0, "ioclk_spi3_clk_in", NULL, 0, 50000000),
+	FRATE(0, "ioclk_spi2_clk_in", NULL, 0, 50000000),
+	FRATE(0, "ioclk_spi1_clk_in", NULL, 0, 50000000),
+	FRATE(0, "ioclk_spi0_clk_in", NULL, 0, 50000000),
 	/* Xi2s1SCLK input clock for I2S1_BCLK */
-	FRATE(0, "ioclk_i2s1_bclk_in", NULL, CLK_IS_ROOT, 12288000),
+	FRATE(0, "ioclk_i2s1_bclk_in", NULL, 0, 12288000),
 };
 
 static struct samsung_mux_clock top_mux_clks[] __initdata = {
@@ -490,9 +463,9 @@ static struct samsung_div_clock top_div_clks[] __initdata = {
 	DIV(CLK_DIV_SCLK_ISP_SENSOR1_A, "div_sclk_isp_sensor1_a",
 			"mout_sclk_isp_sensor1", DIV_TOP_CAM11, 8, 4),
 	DIV(CLK_DIV_SCLK_ISP_SENSOR0_B, "div_sclk_isp_sensor0_b",
-			"div_sclk_isp_sensor0_a", DIV_TOP_CAM11, 12, 4),
+			"div_sclk_isp_sensor0_a", DIV_TOP_CAM11, 4, 4),
 	DIV(CLK_DIV_SCLK_ISP_SENSOR0_A, "div_sclk_isp_sensor0_a",
-			"mout_sclk_isp_sensor0", DIV_TOP_CAM11, 8, 4),
+			"mout_sclk_isp_sensor0", DIV_TOP_CAM11, 0, 4),
 
 	/* DIV_TOP_FSYS0 */
 	DIV(CLK_DIV_SCLK_MMC1_B, "div_sclk_mmc1_b", "div_sclk_mmc1_a",
@@ -999,26 +972,12 @@ static unsigned long mif_clk_regs[] __initdata = {
 	MUX_ENABLE_MIF5,
 	MUX_ENABLE_MIF6,
 	MUX_ENABLE_MIF7,
-	MUX_STAT_MIF0,
-	MUX_STAT_MIF1,
-	MUX_STAT_MIF2,
-	MUX_STAT_MIF3,
-	MUX_STAT_MIF4,
-	MUX_STAT_MIF5,
-	MUX_STAT_MIF6,
-	MUX_STAT_MIF7,
 	DIV_MIF1,
 	DIV_MIF2,
 	DIV_MIF3,
 	DIV_MIF4,
 	DIV_MIF5,
 	DIV_MIF_PLL_FREQ_DET,
-	DIV_STAT_MIF1,
-	DIV_STAT_MIF2,
-	DIV_STAT_MIF3,
-	DIV_STAT_MIF4,
-	DIV_STAT_MIF5,
-	DIV_STAT_MIF_PLL_FREQ_DET,
 	ENABLE_ACLK_MIF0,
 	ENABLE_ACLK_MIF1,
 	ENABLE_ACLK_MIF2,
@@ -1565,7 +1524,6 @@ CLK_OF_DECLARE(exynos5433_cmu_mif, "samsung,exynos5433-cmu-mif",
 
 static unsigned long peric_clk_regs[] __initdata = {
 	DIV_PERIC,
-	DIV_STAT_PERIC,
 	ENABLE_ACLK_PERIC,
 	ENABLE_PCLK_PERIC0,
 	ENABLE_PCLK_PERIC1,
@@ -2012,11 +1970,6 @@ static unsigned long fsys_clk_regs[] __initdata = {
 	MUX_ENABLE_FSYS2,
 	MUX_ENABLE_FSYS3,
 	MUX_ENABLE_FSYS4,
-	MUX_STAT_FSYS0,
-	MUX_STAT_FSYS1,
-	MUX_STAT_FSYS2,
-	MUX_STAT_FSYS3,
-	MUX_STAT_FSYS4,
 	MUX_IGNORE_FSYS2,
 	MUX_IGNORE_FSYS3,
 	ENABLE_ACLK_FSYS0,
@@ -2031,42 +1984,40 @@ static struct samsung_fixed_rate_clock fsys_fixed_clks[] __initdata = {
 	/* PHY clocks from USBDRD30_PHY */
 	FRATE(CLK_PHYCLK_USBDRD30_UDRD30_PHYCLOCK_PHY,
 			"phyclk_usbdrd30_udrd30_phyclock_phy", NULL,
-			CLK_IS_ROOT, 60000000),
+			0, 60000000),
 	FRATE(CLK_PHYCLK_USBDRD30_UDRD30_PIPE_PCLK_PHY,
 			"phyclk_usbdrd30_udrd30_pipe_pclk_phy", NULL,
-			CLK_IS_ROOT, 125000000),
+			0, 125000000),
 	/* PHY clocks from USBHOST30_PHY */
 	FRATE(CLK_PHYCLK_USBHOST30_UHOST30_PHYCLOCK_PHY,
 			"phyclk_usbhost30_uhost30_phyclock_phy", NULL,
-			CLK_IS_ROOT, 60000000),
+			0, 60000000),
 	FRATE(CLK_PHYCLK_USBHOST30_UHOST30_PIPE_PCLK_PHY,
 			"phyclk_usbhost30_uhost30_pipe_pclk_phy", NULL,
-			CLK_IS_ROOT, 125000000),
+			0, 125000000),
 	/* PHY clocks from USBHOST20_PHY */
 	FRATE(CLK_PHYCLK_USBHOST20_PHY_FREECLK_PHY,
-			"phyclk_usbhost20_phy_freeclk_phy", NULL, CLK_IS_ROOT,
-			60000000),
+			"phyclk_usbhost20_phy_freeclk_phy", NULL, 0, 60000000),
 	FRATE(CLK_PHYCLK_USBHOST20_PHY_PHYCLOCK_PHY,
-			"phyclk_usbhost20_phy_phyclock_phy", NULL, CLK_IS_ROOT,
-			60000000),
+			"phyclk_usbhost20_phy_phyclock_phy", NULL, 0, 60000000),
 	FRATE(CLK_PHYCLK_USBHOST20_PHY_CLK48MOHCI_PHY,
 			"phyclk_usbhost20_phy_clk48mohci_phy", NULL,
-			CLK_IS_ROOT, 48000000),
+			0, 48000000),
 	FRATE(CLK_PHYCLK_USBHOST20_PHY_HSIC1_PHY,
-			"phyclk_usbhost20_phy_hsic1_phy", NULL, CLK_IS_ROOT,
+			"phyclk_usbhost20_phy_hsic1_phy", NULL, 0,
 			60000000),
 	/* PHY clocks from UFS_PHY */
 	FRATE(CLK_PHYCLK_UFS_TX0_SYMBOL_PHY, "phyclk_ufs_tx0_symbol_phy",
-			NULL, CLK_IS_ROOT, 300000000),
+			NULL, 0, 300000000),
 	FRATE(CLK_PHYCLK_UFS_RX0_SYMBOL_PHY, "phyclk_ufs_rx0_symbol_phy",
-			NULL, CLK_IS_ROOT, 300000000),
+			NULL, 0, 300000000),
 	FRATE(CLK_PHYCLK_UFS_TX1_SYMBOL_PHY, "phyclk_ufs_tx1_symbol_phy",
-			NULL, CLK_IS_ROOT, 300000000),
+			NULL, 0, 300000000),
 	FRATE(CLK_PHYCLK_UFS_RX1_SYMBOL_PHY, "phyclk_ufs_rx1_symbol_phy",
-			NULL, CLK_IS_ROOT, 300000000),
+			NULL, 0, 300000000),
 	/* PHY clocks from LLI_PHY */
 	FRATE(CLK_PHYCLK_LLI_MPHY_TO_UFS_PHY, "phyclk_lli_mphy_to_ufs_phy",
-			NULL, CLK_IS_ROOT, 26000000),
+			NULL, 0, 26000000),
 };
 
 static struct samsung_mux_clock fsys_mux_clks[] __initdata = {
@@ -2362,9 +2313,7 @@ CLK_OF_DECLARE(exynos5433_cmu_fsys, "samsung,exynos5433-cmu-fsys",
 static unsigned long g2d_clk_regs[] __initdata = {
 	MUX_SEL_G2D0,
 	MUX_SEL_ENABLE_G2D0,
-	MUX_SEL_STAT_G2D0,
 	DIV_G2D,
-	DIV_STAT_G2D,
 	DIV_ENABLE_ACLK_G2D,
 	DIV_ENABLE_ACLK_G2D_SECURE_SMMU_G2D,
 	DIV_ENABLE_PCLK_G2D,
@@ -2520,16 +2469,9 @@ static unsigned long disp_clk_regs[] __initdata = {
 	MUX_ENABLE_DISP2,
 	MUX_ENABLE_DISP3,
 	MUX_ENABLE_DISP4,
-	MUX_STAT_DISP0,
-	MUX_STAT_DISP1,
-	MUX_STAT_DISP2,
-	MUX_STAT_DISP3,
-	MUX_STAT_DISP4,
 	MUX_IGNORE_DISP2,
 	DIV_DISP,
 	DIV_DISP_PLL_FREQ_DET,
-	DIV_STAT_DISP,
-	DIV_STAT_DISP_PLL_FREQ_DET,
 	ENABLE_ACLK_DISP0,
 	ENABLE_ACLK_DISP1,
 	ENABLE_PCLK_DISP,
@@ -2604,18 +2546,16 @@ static struct samsung_fixed_factor_clock disp_fixed_factor_clks[] __initdata = {
 
 static struct samsung_fixed_rate_clock disp_fixed_clks[] __initdata = {
 	/* PHY clocks from MIPI_DPHY1 */
-	FRATE(0, "phyclk_mipidphy1_bitclkdiv8_phy", NULL, CLK_IS_ROOT,
-			188000000),
-	FRATE(0, "phyclk_mipidphy1_rxclkesc0_phy", NULL, CLK_IS_ROOT,
-			100000000),
+	FRATE(0, "phyclk_mipidphy1_bitclkdiv8_phy", NULL, 0, 188000000),
+	FRATE(0, "phyclk_mipidphy1_rxclkesc0_phy", NULL, 0, 100000000),
 	/* PHY clocks from MIPI_DPHY0 */
-	FRATE(0, "phyclk_mipidphy0_bitclkdiv8_phy", NULL, CLK_IS_ROOT,
-			188000000),
-	FRATE(0, "phyclk_mipidphy0_rxclkesc0_phy", NULL, CLK_IS_ROOT,
-			100000000),
+	FRATE(0, "phyclk_mipidphy0_bitclkdiv8_phy", NULL, 0, 188000000),
+	FRATE(0, "phyclk_mipidphy0_rxclkesc0_phy", NULL, 0, 100000000),
 	/* PHY clocks from HDMI_PHY */
-	FRATE(0, "phyclk_hdmiphy_tmds_clko_phy", NULL, CLK_IS_ROOT, 300000000),
-	FRATE(0, "phyclk_hdmiphy_pixel_clko_phy", NULL, CLK_IS_ROOT, 166000000),
+	FRATE(CLK_PHYCLK_HDMIPHY_TMDS_CLKO_PHY, "phyclk_hdmiphy_tmds_clko_phy",
+			NULL, 0, 300000000),
+	FRATE(CLK_PHYCLK_HDMIPHY_PIXEL_CLKO_PHY, "phyclk_hdmiphy_pixel_clko_phy",
+			NULL, 0, 166000000),
 };
 
 static struct samsung_mux_clock disp_mux_clks[] __initdata = {
@@ -2820,6 +2760,8 @@ static struct samsung_gate_clock disp_gate_clks[] __initdata = {
 			ENABLE_PCLK_DISP, 2, 0, 0),
 	GATE(CLK_PCLK_DECON_TV, "pclk_decon_tv", "div_pclk_disp",
 			ENABLE_PCLK_DISP, 1, 0, 0),
+	GATE(CLK_PCLK_DECON, "pclk_decon", "div_pclk_disp",
+			ENABLE_PCLK_DISP, 0, 0, 0),
 
 	/* ENABLE_SCLK_DISP */
 	GATE(CLK_PHYCLK_MIPIDPHY1_BITCLKDIV8, "phyclk_mipidphy1_bitclkdiv8",
@@ -2919,11 +2861,8 @@ static unsigned long aud_clk_regs[] __initdata = {
 	MUX_SEL_AUD1,
 	MUX_ENABLE_AUD0,
 	MUX_ENABLE_AUD1,
-	MUX_STAT_AUD0,
 	DIV_AUD0,
 	DIV_AUD1,
-	DIV_STAT_AUD0,
-	DIV_STAT_AUD1,
 	ENABLE_ACLK_AUD,
 	ENABLE_PCLK_AUD,
 	ENABLE_SCLK_AUD0,
@@ -2937,9 +2876,9 @@ PNAME(mout_aud_pll_user_aud_p)	= { "oscclk", "fout_aud_pll", };
 PNAME(mout_sclk_aud_pcm_p)	= { "mout_aud_pll_user", "ioclk_audiocdclk0",};
 
 static struct samsung_fixed_rate_clock aud_fixed_clks[] __initdata = {
-	FRATE(0, "ioclk_jtag_tclk", NULL, CLK_IS_ROOT, 33000000),
-	FRATE(0, "ioclk_slimbus_clk", NULL, CLK_IS_ROOT, 25000000),
-	FRATE(0, "ioclk_i2s_bclk", NULL, CLK_IS_ROOT, 50000000),
+	FRATE(0, "ioclk_jtag_tclk", NULL, 0, 33000000),
+	FRATE(0, "ioclk_slimbus_clk", NULL, 0, 25000000),
+	FRATE(0, "ioclk_i2s_bclk", NULL, 0, 50000000),
 };
 
 static struct samsung_mux_clock aud_mux_clks[] __initdata = {
@@ -3087,7 +3026,6 @@ PNAME(mout_aclk_bus2_400_p)	= { "oscclk", "aclk_bus2_400", };
 
 #define CMU_BUS_COMMON_CLK_REGS	\
 	DIV_BUS,		\
-	DIV_STAT_BUS,		\
 	ENABLE_ACLK_BUS,	\
 	ENABLE_PCLK_BUS,	\
 	ENABLE_IP_BUS0,		\
@@ -3100,7 +3038,6 @@ static unsigned long bus01_clk_regs[] __initdata = {
 static unsigned long bus2_clk_regs[] __initdata = {
 	MUX_SEL_BUS2,
 	MUX_ENABLE_BUS2,
-	MUX_STAT_BUS2,
 	CMU_BUS_COMMON_CLK_REGS,
 };
 
@@ -3259,11 +3196,8 @@ static unsigned long g3d_clk_regs[] __initdata = {
 	G3D_PLL_FREQ_DET,
 	MUX_SEL_G3D,
 	MUX_ENABLE_G3D,
-	MUX_STAT_G3D,
 	DIV_G3D,
 	DIV_G3D_PLL_FREQ_DET,
-	DIV_STAT_G3D,
-	DIV_STAT_G3D_PLL_FREQ_DET,
 	ENABLE_ACLK_G3D,
 	ENABLE_PCLK_G3D,
 	ENABLE_SCLK_G3D,
@@ -3379,7 +3313,6 @@ CLK_OF_DECLARE(exynos5433_cmu_g3d, "samsung,exynos5433-cmu-g3d",
 static unsigned long gscl_clk_regs[] __initdata = {
 	MUX_SEL_GSCL,
 	MUX_ENABLE_GSCL,
-	MUX_STAT_GSCL,
 	ENABLE_ACLK_GSCL,
 	ENABLE_ACLK_GSCL_SECURE_SMMU_GSCL0,
 	ENABLE_ACLK_GSCL_SECURE_SMMU_GSCL1,
@@ -3472,11 +3405,11 @@ static struct samsung_gate_clock gscl_gate_clks[] __initdata = {
 
 	/* ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL1 */
 	GATE(CLK_PCLK_SMMU_GSCL1, "pclk_smmu_gscl1", "mout_aclk_gscl_111_user",
-		ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL0, 0, 0, 0),
+		ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL1, 0, 0, 0),
 
 	/* ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL2 */
 	GATE(CLK_PCLK_SMMU_GSCL2, "pclk_smmu_gscl2", "mout_aclk_gscl_111_user",
-		ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL0, 0, 0, 0),
+		ENABLE_PCLK_GSCL_SECURE_SMMU_GSCL2, 0, 0, 0),
 };
 
 static struct samsung_cmu_info gscl_cmu_info __initdata = {
@@ -3543,15 +3476,9 @@ static unsigned long apollo_clk_regs[] __initdata = {
 	MUX_ENABLE_APOLLO0,
 	MUX_ENABLE_APOLLO1,
 	MUX_ENABLE_APOLLO2,
-	MUX_STAT_APOLLO0,
-	MUX_STAT_APOLLO1,
-	MUX_STAT_APOLLO2,
 	DIV_APOLLO0,
 	DIV_APOLLO1,
 	DIV_APOLLO_PLL_FREQ_DET,
-	DIV_STAT_APOLLO0,
-	DIV_STAT_APOLLO1,
-	DIV_STAT_APOLLO_PLL_FREQ_DET,
 	ENABLE_ACLK_APOLLO,
 	ENABLE_PCLK_APOLLO,
 	ENABLE_SCLK_APOLLO,
@@ -3735,15 +3662,9 @@ static unsigned long atlas_clk_regs[] __initdata = {
 	MUX_ENABLE_ATLAS0,
 	MUX_ENABLE_ATLAS1,
 	MUX_ENABLE_ATLAS2,
-	MUX_STAT_ATLAS0,
-	MUX_STAT_ATLAS1,
-	MUX_STAT_ATLAS2,
 	DIV_ATLAS0,
 	DIV_ATLAS1,
 	DIV_ATLAS_PLL_FREQ_DET,
-	DIV_STAT_ATLAS0,
-	DIV_STAT_ATLAS1,
-	DIV_STAT_ATLAS_PLL_FREQ_DET,
 	ENABLE_ACLK_ATLAS,
 	ENABLE_PCLK_ATLAS,
 	ENABLE_SCLK_ATLAS,
@@ -3937,10 +3858,7 @@ static unsigned long mscl_clk_regs[] __initdata = {
 	MUX_SEL_MSCL1,
 	MUX_ENABLE_MSCL0,
 	MUX_ENABLE_MSCL1,
-	MUX_STAT_MSCL0,
-	MUX_STAT_MSCL1,
 	DIV_MSCL,
-	DIV_STAT_MSCL,
 	ENABLE_ACLK_MSCL,
 	ENABLE_ACLK_MSCL_SECURE_SMMU_M2MSCALER0,
 	ENABLE_ACLK_MSCL_SECURE_SMMU_M2MSCALER1,
@@ -4097,9 +4015,7 @@ CLK_OF_DECLARE(exynos5433_cmu_mscl, "samsung,exynos5433-cmu-mscl",
 static unsigned long mfc_clk_regs[] __initdata = {
 	MUX_SEL_MFC,
 	MUX_ENABLE_MFC,
-	MUX_STAT_MFC,
 	DIV_MFC,
-	DIV_STAT_MFC,
 	ENABLE_ACLK_MFC,
 	ENABLE_ACLK_MFC_SECURE_SMMU_MFC,
 	ENABLE_PCLK_MFC,
@@ -4207,9 +4123,7 @@ CLK_OF_DECLARE(exynos5433_cmu_mfc, "samsung,exynos5433-cmu-mfc",
 static unsigned long hevc_clk_regs[] __initdata = {
 	MUX_SEL_HEVC,
 	MUX_ENABLE_HEVC,
-	MUX_STAT_HEVC,
 	DIV_HEVC,
-	DIV_STAT_HEVC,
 	ENABLE_ACLK_HEVC,
 	ENABLE_ACLK_HEVC_SECURE_SMMU_HEVC,
 	ENABLE_PCLK_HEVC,
@@ -4321,9 +4235,7 @@ CLK_OF_DECLARE(exynos5433_cmu_hevc, "samsung,exynos5433-cmu-hevc",
 static unsigned long isp_clk_regs[] __initdata = {
 	MUX_SEL_ISP,
 	MUX_ENABLE_ISP,
-	MUX_STAT_ISP,
 	DIV_ISP,
-	DIV_STAT_ISP,
 	ENABLE_ACLK_ISP0,
 	ENABLE_ACLK_ISP1,
 	ENABLE_ACLK_ISP2,
@@ -4603,20 +4515,11 @@ static unsigned long cam0_clk_regs[] __initdata = {
 	MUX_ENABLE_CAM02,
 	MUX_ENABLE_CAM03,
 	MUX_ENABLE_CAM04,
-	MUX_STAT_CAM00,
-	MUX_STAT_CAM01,
-	MUX_STAT_CAM02,
-	MUX_STAT_CAM03,
-	MUX_STAT_CAM04,
 	MUX_IGNORE_CAM01,
 	DIV_CAM00,
 	DIV_CAM01,
 	DIV_CAM02,
 	DIV_CAM03,
-	DIV_STAT_CAM00,
-	DIV_STAT_CAM01,
-	DIV_STAT_CAM02,
-	DIV_STAT_CAM03,
 	ENABLE_ACLK_CAM00,
 	ENABLE_ACLK_CAM01,
 	ENABLE_ACLK_CAM02,
@@ -4687,9 +4590,9 @@ PNAME(mout_sclk_pixelasync_lite_c_init_a_p) = {
 
 static struct samsung_fixed_rate_clock cam0_fixed_clks[] __initdata = {
 	FRATE(CLK_PHYCLK_RXBYTEECLKHS0_S4_PHY, "phyclk_rxbyteclkhs0_s4_phy",
-			NULL, CLK_IS_ROOT, 100000000),
+			NULL, 0, 100000000),
 	FRATE(CLK_PHYCLK_RXBYTEECLKHS0_S2A_PHY, "phyclk_rxbyteclkhs0_s2a_phy",
-			NULL, CLK_IS_ROOT, 100000000),
+			NULL, 0, 100000000),
 };
 
 static struct samsung_mux_clock cam0_mux_clks[] __initdata = {
@@ -4749,21 +4652,21 @@ static struct samsung_mux_clock cam0_mux_clks[] __initdata = {
 	MUX(CLK_MOUT_SCLK_LITE_FREECNT_C, "mout_sclk_lite_freecnt_c",
 			mout_sclk_lite_freecnt_c_p, MUX_SEL_CAM04, 24, 1),
 	MUX(CLK_MOUT_SCLK_LITE_FREECNT_B, "mout_sclk_lite_freecnt_b",
-			mout_sclk_lite_freecnt_b_p, MUX_SEL_CAM04, 24, 1),
+			mout_sclk_lite_freecnt_b_p, MUX_SEL_CAM04, 20, 1),
 	MUX(CLK_MOUT_SCLK_LITE_FREECNT_A, "mout_sclk_lite_freecnt_a",
-			mout_sclk_lite_freecnt_a_p, MUX_SEL_CAM04, 24, 1),
+			mout_sclk_lite_freecnt_a_p, MUX_SEL_CAM04, 16, 1),
 	MUX(CLK_MOUT_SCLK_PIXELASYNC_LITE_C_B, "mout_sclk_pixelasync_lite_c_b",
-			mout_sclk_pixelasync_lite_c_b_p, MUX_SEL_CAM04, 24, 1),
+			mout_sclk_pixelasync_lite_c_b_p, MUX_SEL_CAM04, 12, 1),
 	MUX(CLK_MOUT_SCLK_PIXELASYNC_LITE_C_A, "mout_sclk_pixelasync_lite_c_a",
-			mout_sclk_pixelasync_lite_c_a_p, MUX_SEL_CAM04, 24, 1),
+			mout_sclk_pixelasync_lite_c_a_p, MUX_SEL_CAM04, 8, 1),
 	MUX(CLK_MOUT_SCLK_PIXELASYNC_LITE_C_INIT_B,
 			"mout_sclk_pixelasync_lite_c_init_b",
 			mout_sclk_pixelasync_lite_c_init_b_p,
-			MUX_SEL_CAM04, 24, 1),
+			MUX_SEL_CAM04, 4, 1),
 	MUX(CLK_MOUT_SCLK_PIXELASYNC_LITE_C_INIT_A,
 			"mout_sclk_pixelasync_lite_c_init_a",
 			mout_sclk_pixelasync_lite_c_init_a_p,
-			MUX_SEL_CAM04, 24, 1),
+			MUX_SEL_CAM04, 0, 1),
 };
 
 static struct samsung_div_clock cam0_div_clks[] __initdata = {
@@ -5074,14 +4977,9 @@ static unsigned long cam1_clk_regs[] __initdata = {
 	MUX_ENABLE_CAM10,
 	MUX_ENABLE_CAM11,
 	MUX_ENABLE_CAM12,
-	MUX_STAT_CAM10,
-	MUX_STAT_CAM11,
-	MUX_STAT_CAM12,
 	MUX_IGNORE_CAM11,
 	DIV_CAM10,
 	DIV_CAM11,
-	DIV_STAT_CAM10,
-	DIV_STAT_CAM11,
 	ENABLE_ACLK_CAM10,
 	ENABLE_ACLK_CAM11,
 	ENABLE_ACLK_CAM12,
@@ -5120,7 +5018,7 @@ PNAME(mout_aclk_lite_c_a_p)		= { "mout_aclk_cam1_552_user",
 
 static struct samsung_fixed_rate_clock cam1_fixed_clks[] __initdata = {
 	FRATE(CLK_PHYCLK_RXBYTEECLKHS0_S2B, "phyclk_rxbyteclkhs0_s2b_phy", NULL,
-			CLK_IS_ROOT, 100000000),
+			0, 100000000),
 };
 
 static struct samsung_mux_clock cam1_mux_clks[] __initdata = {
@@ -5134,9 +5032,9 @@ static struct samsung_mux_clock cam1_mux_clks[] __initdata = {
 	MUX(CLK_MOUT_ACLK_CAM1_333_USER, "mout_aclk_cam1_333_user",
 			mout_aclk_cam1_333_user_p, MUX_SEL_CAM10, 8, 1),
 	MUX(CLK_MOUT_ACLK_CAM1_400_USER, "mout_aclk_cam1_400_user",
-			mout_aclk_cam1_400_user_p, MUX_SEL_CAM01, 4, 1),
+			mout_aclk_cam1_400_user_p, MUX_SEL_CAM10, 4, 1),
 	MUX(CLK_MOUT_ACLK_CAM1_552_USER, "mout_aclk_cam1_552_user",
-			mout_aclk_cam1_552_user_p, MUX_SEL_CAM01, 0, 1),
+			mout_aclk_cam1_552_user_p, MUX_SEL_CAM10, 0, 1),
 
 	/* MUX_SEL_CAM11 */
 	MUX(CLK_MOUT_PHYCLK_RXBYTECLKHS0_S2B_USER,
@@ -5161,7 +5059,7 @@ static struct samsung_mux_clock cam1_mux_clks[] __initdata = {
 
 static struct samsung_div_clock cam1_div_clks[] __initdata = {
 	/* DIV_CAM10 */
-	DIV(CLK_DIV_SCLK_ISP_WPWM, "div_sclk_isp_wpwm",
+	DIV(CLK_DIV_SCLK_ISP_MPWM, "div_sclk_isp_mpwm",
 			"div_pclk_cam1_83", DIV_CAM10, 16, 2),
 	DIV(CLK_DIV_PCLK_CAM1_83, "div_pclk_cam1_83",
 			"mout_aclk_cam1_333_user", DIV_CAM10, 12, 2),
@@ -5355,7 +5253,7 @@ static struct samsung_gate_clock cam1_gate_clks[] __initdata = {
 			ENABLE_PCLK_CAM1, 5, CLK_IGNORE_UNUSED, 0),
 	GATE(CLK_PCLK_ISP_I2C0, "pclk_isp_i2c0", "div_pclk_cam1_83",
 			ENABLE_PCLK_CAM1, 4, CLK_IGNORE_UNUSED, 0),
-	GATE(CLK_PCLK_ISP_MPWM, "pclk_isp_wpwm", "div_pclk_cam1_83",
+	GATE(CLK_PCLK_ISP_MPWM, "pclk_isp_mpwm", "div_pclk_cam1_83",
 			ENABLE_PCLK_CAM1, 3, CLK_IGNORE_UNUSED, 0),
 	GATE(CLK_PCLK_FD, "pclk_fd", "div_pclk_fd",
 			ENABLE_PCLK_CAM1, 3, CLK_IGNORE_UNUSED, 0),
@@ -5388,7 +5286,7 @@ static struct samsung_gate_clock cam1_gate_clks[] __initdata = {
 			ENABLE_SCLK_CAM1, 5, 0, 0),
 	GATE(CLK_SCLK_ISP_SPI0, "sclk_isp_spi0", "mout_sclk_isp_spi0_user",
 			ENABLE_SCLK_CAM1, 4, 0, 0),
-	GATE(CLK_SCLK_ISP_MPWM, "sclk_isp_wpwm", "div_sclk_isp_wpwm",
+	GATE(CLK_SCLK_ISP_MPWM, "sclk_isp_mpwm", "div_sclk_isp_mpwm",
 			ENABLE_SCLK_CAM1, 3, 0, 0),
 	GATE(CLK_PCLK_DBG_ISP, "sclk_dbg_isp", "div_pclk_dbg_cam1",
 			ENABLE_SCLK_CAM1, 2, 0, 0),
diff --git a/drivers/clk/samsung/clk-exynos5440.c b/drivers/clk/samsung/clk-exynos5440.c
index 590813871ffe..c57cff1e1798 100644
--- a/drivers/clk/samsung/clk-exynos5440.c
+++ b/drivers/clk/samsung/clk-exynos5440.c
@@ -31,16 +31,16 @@ PNAME(mout_spi_p)	= { "div125", "div200" };
 
 /* fixed rate clocks generated outside the soc */
 static struct samsung_fixed_rate_clock exynos5440_fixed_rate_ext_clks[] __initdata = {
-	FRATE(0, "xtal", NULL, CLK_IS_ROOT, 0),
+	FRATE(0, "xtal", NULL, 0, 0),
 };
 
 /* fixed rate clocks */
 static struct samsung_fixed_rate_clock exynos5440_fixed_rate_clks[] __initdata = {
-	FRATE(0, "ppll", NULL, CLK_IS_ROOT, 1000000000),
-	FRATE(0, "usb_phy0", NULL, CLK_IS_ROOT, 60000000),
-	FRATE(0, "usb_phy1", NULL, CLK_IS_ROOT, 60000000),
-	FRATE(0, "usb_ohci12", NULL, CLK_IS_ROOT, 12000000),
-	FRATE(0, "usb_ohci48", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(0, "ppll", NULL, 0, 1000000000),
+	FRATE(0, "usb_phy0", NULL, 0, 60000000),
+	FRATE(0, "usb_phy1", NULL, 0, 60000000),
+	FRATE(0, "usb_ohci12", NULL, 0, 12000000),
+	FRATE(0, "usb_ohci48", NULL, 0, 48000000),
 };
 
 /* fixed factor clocks */
diff --git a/drivers/clk/samsung/clk-exynos7.c b/drivers/clk/samsung/clk-exynos7.c
index 55f8e2e24ab8..ad68d463b12c 100644
--- a/drivers/clk/samsung/clk-exynos7.c
+++ b/drivers/clk/samsung/clk-exynos7.c
@@ -894,10 +894,8 @@ PNAME(mout_phyclk_usbdrd300_udrd30_pipe_pclk_user_p)	= { "fin_pll",
 
 /* fixed rate clocks used in the FSYS0 block */
 static struct samsung_fixed_rate_clock fixed_rate_clks_fsys0[] __initdata = {
-	FRATE(0, "phyclk_usbdrd300_udrd30_phyclock", NULL,
-		CLK_IS_ROOT, 60000000),
-	FRATE(0, "phyclk_usbdrd300_udrd30_pipe_pclk", NULL,
-		CLK_IS_ROOT, 125000000),
+	FRATE(0, "phyclk_usbdrd300_udrd30_phyclock", NULL, 0, 60000000),
+	FRATE(0, "phyclk_usbdrd300_udrd30_pipe_pclk", NULL, 0, 125000000),
 };
 
 static unsigned long fsys0_clk_regs[] __initdata = {
@@ -1009,11 +1007,11 @@ PNAME(mout_phyclk_ufs20_rx1_user_p) = { "fin_pll", "phyclk_ufs20_rx1_symbol" };
 /* fixed rate clocks used in the FSYS1 block */
 static struct samsung_fixed_rate_clock fixed_rate_clks_fsys1[] __initdata = {
 	FRATE(PHYCLK_UFS20_TX0_SYMBOL, "phyclk_ufs20_tx0_symbol", NULL,
-			CLK_IS_ROOT, 300000000),
+			0, 300000000),
 	FRATE(PHYCLK_UFS20_RX0_SYMBOL, "phyclk_ufs20_rx0_symbol", NULL,
-			CLK_IS_ROOT, 300000000),
+			0, 300000000),
 	FRATE(PHYCLK_UFS20_RX1_SYMBOL, "phyclk_ufs20_rx1_symbol", NULL,
-			CLK_IS_ROOT, 300000000),
+			0, 300000000),
 };
 
 static unsigned long fsys1_clk_regs[] __initdata = {
diff --git a/drivers/clk/samsung/clk-s3c2410.c b/drivers/clk/samsung/clk-s3c2410.c
index 0945a8852299..d7b011c1fcf8 100644
--- a/drivers/clk/samsung/clk-s3c2410.c
+++ b/drivers/clk/samsung/clk-s3c2410.c
@@ -344,7 +344,7 @@ struct samsung_mux_clock s3c2442_muxes[] __initdata = {
  */
 #define XTI	1
 struct samsung_fixed_rate_clock s3c2410_common_frate_clks[] __initdata = {
-	FRATE(XTI, "xti", NULL, CLK_IS_ROOT, 0),
+	FRATE(XTI, "xti", NULL, 0, 0),
 };
 
 static void __init s3c2410_common_clk_register_fixed_ext(
diff --git a/drivers/clk/samsung/clk-s3c2412.c b/drivers/clk/samsung/clk-s3c2412.c
index 44d6a9f4f5b2..effe3736ec6b 100644
--- a/drivers/clk/samsung/clk-s3c2412.c
+++ b/drivers/clk/samsung/clk-s3c2412.c
@@ -232,8 +232,8 @@ static struct notifier_block s3c2412_restart_handler = {
  */
 #define XTI	1
 struct samsung_fixed_rate_clock s3c2412_common_frate_clks[] __initdata = {
-	FRATE(XTI, "xti", NULL, CLK_IS_ROOT, 0),
-	FRATE(0, "ext", NULL, CLK_IS_ROOT, 0),
+	FRATE(XTI, "xti", NULL, 0, 0),
+	FRATE(0, "ext", NULL, 0, 0),
 };
 
 static void __init s3c2412_common_clk_register_fixed_ext(
diff --git a/drivers/clk/samsung/clk-s3c2443.c b/drivers/clk/samsung/clk-s3c2443.c
index 2c0a1ea3c80c..37562783b25e 100644
--- a/drivers/clk/samsung/clk-s3c2443.c
+++ b/drivers/clk/samsung/clk-s3c2443.c
@@ -371,10 +371,10 @@ static struct notifier_block s3c2443_restart_handler = {
  * Only necessary until the devicetree-move is complete
  */
 struct samsung_fixed_rate_clock s3c2443_common_frate_clks[] __initdata = {
-	FRATE(0, "xti", NULL, CLK_IS_ROOT, 0),
-	FRATE(0, "ext", NULL, CLK_IS_ROOT, 0),
-	FRATE(0, "ext_i2s", NULL, CLK_IS_ROOT, 0),
-	FRATE(0, "ext_uart", NULL, CLK_IS_ROOT, 0),
+	FRATE(0, "xti", NULL, 0, 0),
+	FRATE(0, "ext", NULL, 0, 0),
+	FRATE(0, "ext_i2s", NULL, 0, 0),
+	FRATE(0, "ext_uart", NULL, 0, 0),
 };
 
 static void __init s3c2443_common_clk_register_fixed_ext(
diff --git a/drivers/clk/samsung/clk-s3c64xx.c b/drivers/clk/samsung/clk-s3c64xx.c
index d325ed1e196b..60aa775bd374 100644
--- a/drivers/clk/samsung/clk-s3c64xx.c
+++ b/drivers/clk/samsung/clk-s3c64xx.c
@@ -176,14 +176,14 @@ PNAME(audio2_p6410)	= { "mout_epll", "dout_mpll", "fin_pll", "iiscdclk2",
 
 /* Fixed rate clocks generated outside the SoC. */
 FIXED_RATE_CLOCKS(s3c64xx_fixed_rate_ext_clks) __initdata = {
-	FRATE(0, "fin_pll", NULL, CLK_IS_ROOT, 0),
-	FRATE(0, "xusbxti", NULL, CLK_IS_ROOT, 0),
+	FRATE(0, "fin_pll", NULL, 0, 0),
+	FRATE(0, "xusbxti", NULL, 0, 0),
 };
 
 /* Fixed rate clocks generated inside the SoC. */
 FIXED_RATE_CLOCKS(s3c64xx_fixed_rate_clks) __initdata = {
-	FRATE(CLK27M, "clk27m", NULL, CLK_IS_ROOT, 27000000),
-	FRATE(CLK48M, "clk48m", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(CLK27M, "clk27m", NULL, 0, 27000000),
+	FRATE(CLK48M, "clk48m", NULL, 0, 48000000),
 };
 
 /* List of clock muxes present on all S3C64xx SoCs. */
diff --git a/drivers/clk/samsung/clk-s5pv210.c b/drivers/clk/samsung/clk-s5pv210.c
index 759aaf342bea..52302262045d 100644
--- a/drivers/clk/samsung/clk-s5pv210.c
+++ b/drivers/clk/samsung/clk-s5pv210.c
@@ -503,15 +503,15 @@ static const struct samsung_mux_clock s5p6442_mux_clks[] __initconst = {
 
 /* S5PV210-specific fixed rate clocks generated inside the SoC. */
 static const struct samsung_fixed_rate_clock s5pv210_frate_clks[] __initconst = {
-	FRATE(SCLK_HDMI27M, "sclk_hdmi27m", NULL, CLK_IS_ROOT, 27000000),
-	FRATE(SCLK_HDMIPHY, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 27000000),
-	FRATE(SCLK_USBPHY0, "sclk_usbphy0", NULL, CLK_IS_ROOT, 48000000),
-	FRATE(SCLK_USBPHY1, "sclk_usbphy1", NULL, CLK_IS_ROOT, 48000000),
+	FRATE(SCLK_HDMI27M, "sclk_hdmi27m", NULL, 0, 27000000),
+	FRATE(SCLK_HDMIPHY, "sclk_hdmiphy", NULL, 0, 27000000),
+	FRATE(SCLK_USBPHY0, "sclk_usbphy0", NULL, 0, 48000000),
+	FRATE(SCLK_USBPHY1, "sclk_usbphy1", NULL, 0, 48000000),
 };
 
 /* S5P6442-specific fixed rate clocks generated inside the SoC. */
 static const struct samsung_fixed_rate_clock s5p6442_frate_clks[] __initconst = {
-	FRATE(SCLK_USBPHY0, "sclk_usbphy0", NULL, CLK_IS_ROOT, 30000000),
+	FRATE(SCLK_USBPHY0, "sclk_usbphy0", NULL, 0, 30000000),
 };
 
 /* Common clock dividers. */
diff --git a/drivers/clk/socfpga/clk-gate-a10.c b/drivers/clk/socfpga/clk-gate-a10.c
index 1cebf253e8fd..c2d572748167 100644
--- a/drivers/clk/socfpga/clk-gate-a10.c
+++ b/drivers/clk/socfpga/clk-gate-a10.c
@@ -115,7 +115,6 @@ static void __init __socfpga_gate_init(struct device_node *node,
 	const char *parent_name[SOCFPGA_MAX_PARENTS];
 	struct clk_init_data init;
 	int rc;
-	int i = 0;
 
 	socfpga_clk = kzalloc(sizeof(*socfpga_clk), GFP_KERNEL);
 	if (WARN_ON(!socfpga_clk))
@@ -167,12 +166,9 @@ static void __init __socfpga_gate_init(struct device_node *node,
 	init.name = clk_name;
 	init.ops = ops;
 	init.flags = 0;
-	while (i < SOCFPGA_MAX_PARENTS && (parent_name[i] =
-			of_clk_get_parent_name(node, i)) != NULL)
-		i++;
 
+	init.num_parents = of_clk_parent_fill(node, parent_name, SOCFPGA_MAX_PARENTS);
 	init.parent_names = parent_name;
-	init.num_parents = i;
 	socfpga_clk->hw.hw.init = &init;
 
 	clk = clk_register(NULL, &socfpga_clk->hw.hw);
diff --git a/drivers/clk/socfpga/clk-periph-a10.c b/drivers/clk/socfpga/clk-periph-a10.c
index 1f397cb72e89..70993f1e88bc 100644
--- a/drivers/clk/socfpga/clk-periph-a10.c
+++ b/drivers/clk/socfpga/clk-periph-a10.c
@@ -74,7 +74,7 @@ static __init void __socfpga_periph_init(struct device_node *node,
 	struct clk *clk;
 	struct socfpga_periph_clk *periph_clk;
 	const char *clk_name = node->name;
-	const char *parent_name;
+	const char *parent_name[SOCFPGA_MAX_PARENTS];
 	struct clk_init_data init;
 	int rc;
 	u32 fixed_div;
@@ -109,9 +109,8 @@ static __init void __socfpga_periph_init(struct device_node *node,
 	init.ops = ops;
 	init.flags = 0;
 
-	parent_name = of_clk_get_parent_name(node, 0);
-	init.num_parents = 1;
-	init.parent_names = &parent_name;
+	init.num_parents = of_clk_parent_fill(node, parent_name, SOCFPGA_MAX_PARENTS);
+	init.parent_names = parent_name;
 
 	periph_clk->hw.hw.init = &init;
 
diff --git a/drivers/clk/socfpga/clk-pll-a10.c b/drivers/clk/socfpga/clk-pll-a10.c
index 402d630bd531..35fabe1a32c3 100644
--- a/drivers/clk/socfpga/clk-pll-a10.c
+++ b/drivers/clk/socfpga/clk-pll-a10.c
@@ -74,7 +74,7 @@ static struct clk_ops clk_pll_ops = {
 	.get_parent = clk_pll_get_parent,
 };
 
-static struct __init clk * __socfpga_pll_init(struct device_node *node,
+static struct clk * __init __socfpga_pll_init(struct device_node *node,
 	const struct clk_ops *ops)
 {
 	u32 reg;
diff --git a/drivers/clk/spear/spear1310_clock.c b/drivers/clk/spear/spear1310_clock.c
index 009bd1410cfa..2f86e3f94efa 100644
--- a/drivers/clk/spear/spear1310_clock.c
+++ b/drivers/clk/spear/spear1310_clock.c
@@ -386,24 +386,20 @@ void __init spear1310_clk_init(void __iomem *misc_base, void __iomem *ras_base)
 {
 	struct clk *clk, *clk1;
 
-	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, CLK_IS_ROOT,
-			32000);
+	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, 0, 32000);
 	clk_register_clkdev(clk, "osc_32k_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, CLK_IS_ROOT,
-			24000000);
+	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, 0, 24000000);
 	clk_register_clkdev(clk, "osc_24m_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_25m_clk", NULL, CLK_IS_ROOT,
-			25000000);
+	clk = clk_register_fixed_rate(NULL, "osc_25m_clk", NULL, 0, 25000000);
 	clk_register_clkdev(clk, "osc_25m_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT,
-			125000000);
+	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, 0, 125000000);
 	clk_register_clkdev(clk, "gmii_pad_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL,
-			CLK_IS_ROOT, 12288000);
+	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL, 0,
+				      12288000);
 	clk_register_clkdev(clk, "i2s_src_pad_clk", NULL);
 
 	/* clock derived from 32 KHz osc clk */
@@ -897,11 +893,10 @@ void __init spear1310_clk_init(void __iomem *misc_base, void __iomem *ras_base)
 			&_lock);
 	clk_register_clkdev(clk, "ras_apb_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "ras_plclk0_clk", NULL, CLK_IS_ROOT,
+	clk = clk_register_fixed_rate(NULL, "ras_plclk0_clk", NULL, 0,
 			50000000);
 
-	clk = clk_register_fixed_rate(NULL, "ras_tx50_clk", NULL, CLK_IS_ROOT,
-			50000000);
+	clk = clk_register_fixed_rate(NULL, "ras_tx50_clk", NULL, 0, 50000000);
 
 	clk = clk_register_gate(NULL, "can0_clk", "apb_clk", 0,
 			SPEAR1310_RAS_SW_CLK_CTRL, SPEAR1310_CAN0_CLK_ENB, 0,
diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c
index 9c7abfd951ba..cbb19a90f2d6 100644
--- a/drivers/clk/spear/spear1340_clock.c
+++ b/drivers/clk/spear/spear1340_clock.c
@@ -443,24 +443,20 @@ void __init spear1340_clk_init(void __iomem *misc_base)
 {
 	struct clk *clk, *clk1;
 
-	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, CLK_IS_ROOT,
-			32000);
+	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, 0, 32000);
 	clk_register_clkdev(clk, "osc_32k_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, CLK_IS_ROOT,
-			24000000);
+	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, 0, 24000000);
 	clk_register_clkdev(clk, "osc_24m_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_25m_clk", NULL, CLK_IS_ROOT,
-			25000000);
+	clk = clk_register_fixed_rate(NULL, "osc_25m_clk", NULL, 0, 25000000);
 	clk_register_clkdev(clk, "osc_25m_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, CLK_IS_ROOT,
-			125000000);
+	clk = clk_register_fixed_rate(NULL, "gmii_pad_clk", NULL, 0, 125000000);
 	clk_register_clkdev(clk, "gmii_pad_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL,
-			CLK_IS_ROOT, 12288000);
+	clk = clk_register_fixed_rate(NULL, "i2s_src_pad_clk", NULL, 0,
+				      12288000);
 	clk_register_clkdev(clk, "i2s_src_pad_clk", NULL);
 
 	/* clock derived from 32 KHz osc clk */
diff --git a/drivers/clk/spear/spear3xx_clock.c b/drivers/clk/spear/spear3xx_clock.c
index 404a55edd613..c403c66b6583 100644
--- a/drivers/clk/spear/spear3xx_clock.c
+++ b/drivers/clk/spear/spear3xx_clock.c
@@ -251,7 +251,7 @@ static void __init spear320_clk_init(void __iomem *soc_config_base,
 	struct clk *clk;
 
 	clk = clk_register_fixed_rate(NULL, "smii_125m_pad_clk", NULL,
-			CLK_IS_ROOT, 125000000);
+			0, 125000000);
 	clk_register_clkdev(clk, "smii_125m_pad", NULL);
 
 	clk = clk_register_fixed_factor(NULL, "clcd_clk", "ras_pll3_clk", 0,
@@ -391,12 +391,10 @@ void __init spear3xx_clk_init(void __iomem *misc_base, void __iomem *soc_config_
 {
 	struct clk *clk, *clk1, *ras_apb_clk;
 
-	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, CLK_IS_ROOT,
-			32000);
+	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, 0, 32000);
 	clk_register_clkdev(clk, "osc_32k_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, CLK_IS_ROOT,
-			24000000);
+	clk = clk_register_fixed_rate(NULL, "osc_24m_clk", NULL, 0, 24000000);
 	clk_register_clkdev(clk, "osc_24m_clk", NULL);
 
 	/* clock derived from 32 KHz osc clk */
diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c
index e24f85cd4300..7c9383c3c2c6 100644
--- a/drivers/clk/spear/spear6xx_clock.c
+++ b/drivers/clk/spear/spear6xx_clock.c
@@ -117,12 +117,10 @@ void __init spear6xx_clk_init(void __iomem *misc_base)
 {
 	struct clk *clk, *clk1;
 
-	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, CLK_IS_ROOT,
-			32000);
+	clk = clk_register_fixed_rate(NULL, "osc_32k_clk", NULL, 0, 32000);
 	clk_register_clkdev(clk, "osc_32k_clk", NULL);
 
-	clk = clk_register_fixed_rate(NULL, "osc_30m_clk", NULL, CLK_IS_ROOT,
-			30000000);
+	clk = clk_register_fixed_rate(NULL, "osc_30m_clk", NULL, 0, 30000000);
 	clk_register_clkdev(clk, "osc_30m_clk", NULL);
 
 	/* clock derived from 32 KHz osc clk */
diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c
index 24d99594c0b3..627267c7ec5c 100644
--- a/drivers/clk/st/clk-flexgen.c
+++ b/drivers/clk/st/clk-flexgen.c
@@ -244,10 +244,10 @@ static const char ** __init flexgen_get_parents(struct device_node *np,
 						       int *num_parents)
 {
 	const char **parents;
-	int nparents;
+	unsigned int nparents;
 
 	nparents = of_clk_get_parent_count(np);
-	if (WARN_ON(nparents <= 0))
+	if (WARN_ON(!nparents))
 		return NULL;
 
 	parents = kcalloc(nparents, sizeof(const char *), GFP_KERNEL);
diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c
index ccb324d97160..dec4eaaecc00 100644
--- a/drivers/clk/st/clkgen-fsyn.c
+++ b/drivers/clk/st/clkgen-fsyn.c
@@ -574,12 +574,16 @@ static int quadfs_pll_fs660c32_set_rate(struct clk_hw *hw, unsigned long rate,
 	struct stm_fs params;
 	long hwrate = 0;
 	unsigned long flags = 0;
+	int ret;
 
 	if (!rate || !parent_rate)
 		return -EINVAL;
 
-	if (!clk_fs660c32_vco_get_params(parent_rate, rate, &params))
-		clk_fs660c32_vco_get_rate(parent_rate, &params, &hwrate);
+	ret = clk_fs660c32_vco_get_params(parent_rate, rate, &params);
+	if (ret)
+		return ret;
+
+	clk_fs660c32_vco_get_rate(parent_rate, &params, &hwrate);
 
 	pr_debug("%s: %s new rate %ld [ndiv=0x%x]\n",
 		 __func__, clk_hw_get_name(hw),
diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c
index 5dc5ce217960..b1e10ffe7a44 100644
--- a/drivers/clk/st/clkgen-mux.c
+++ b/drivers/clk/st/clkgen-mux.c
@@ -26,10 +26,10 @@ static const char ** __init clkgen_mux_get_parents(struct device_node *np,
 						       int *num_parents)
 {
 	const char **parents;
-	int nparents;
+	unsigned int nparents;
 
 	nparents = of_clk_get_parent_count(np);
-	if (WARN_ON(nparents <= 0))
+	if (WARN_ON(!nparents))
 		return ERR_PTR(-EINVAL);
 
 	parents = kcalloc(nparents, sizeof(const char *), GFP_KERNEL);
@@ -822,11 +822,10 @@ err:
 		if (!clk_data->clks[i])
 			continue;
 
-		composite = container_of(__clk_get_hw(clk_data->clks[i]),
-					 struct clk_composite, hw);
-		kfree(container_of(composite->gate_hw, struct clk_gate, hw));
-		kfree(container_of(composite->rate_hw, struct clk_divider, hw));
-		kfree(container_of(composite->mux_hw, struct clk_mux, hw));
+		composite = to_clk_composite(__clk_get_hw(clk_data->clks[i]));
+		kfree(to_clk_gate(composite->gate_hw));
+		kfree(to_clk_divider(composite->rate_hw));
+		kfree(to_clk_mux(composite->mux_hw));
 	}
 
 	kfree(clk_data->clks);
diff --git a/drivers/clk/sunxi/clk-a10-hosc.c b/drivers/clk/sunxi/clk-a10-hosc.c
index 0481d5d673d6..6b598c6a0213 100644
--- a/drivers/clk/sunxi/clk-a10-hosc.c
+++ b/drivers/clk/sunxi/clk-a10-hosc.c
@@ -15,9 +15,9 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clkdev.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/slab.h>
 
 #define SUNXI_OSC24M_GATE	0
 
@@ -61,7 +61,6 @@ static void __init sun4i_osc_clk_setup(struct device_node *node)
 		goto err_free_gate;
 
 	of_clk_add_provider(node, of_clk_src_simple_get, clk);
-	clk_register_clkdev(clk, clk_name, NULL);
 
 	return;
 
diff --git a/drivers/clk/sunxi/clk-a20-gmac.c b/drivers/clk/sunxi/clk-a20-gmac.c
index 1611b036421c..3437f734c9bf 100644
--- a/drivers/clk/sunxi/clk-a20-gmac.c
+++ b/drivers/clk/sunxi/clk-a20-gmac.c
@@ -17,7 +17,6 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clkdev.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
@@ -107,7 +106,6 @@ static void __init sun7i_a20_gmac_clk_setup(struct device_node *node)
 		goto iounmap_reg;
 
 	of_clk_add_provider(node, of_clk_src_simple_get, clk);
-	clk_register_clkdev(clk, clk_name, NULL);
 
 	return;
 
diff --git a/drivers/clk/sunxi/clk-factors.c b/drivers/clk/sunxi/clk-factors.c
index 59428dbd607a..ddefe9668863 100644
--- a/drivers/clk/sunxi/clk-factors.c
+++ b/drivers/clk/sunxi/clk-factors.c
@@ -48,7 +48,7 @@ static unsigned long clk_factors_recalc_rate(struct clk_hw *hw,
 	u32 reg;
 	unsigned long rate;
 	struct clk_factors *factors = to_clk_factors(hw);
-	struct clk_factors_config *config = factors->config;
+	const struct clk_factors_config *config = factors->config;
 
 	/* Fetch the register value */
 	reg = readl(factors->reg);
@@ -63,18 +63,28 @@ static unsigned long clk_factors_recalc_rate(struct clk_hw *hw,
 	if (config->pwidth != SUNXI_FACTORS_NOT_APPLICABLE)
 		p = FACTOR_GET(config->pshift, config->pwidth, reg);
 
-	/* Calculate the rate */
-	rate = (parent_rate * (n + config->n_start) * (k + 1) >> p) / (m + 1);
+	if (factors->recalc) {
+		struct factors_request factors_req = {
+			.parent_rate = parent_rate,
+			.n = n,
+			.k = k,
+			.m = m,
+			.p = p,
+		};
 
-	return rate;
-}
+		/* get mux details from mux clk structure */
+		if (factors->mux)
+			factors_req.parent_index =
+				(reg >> factors->mux->shift) &
+				factors->mux->mask;
 
-static long clk_factors_round_rate(struct clk_hw *hw, unsigned long rate,
-				   unsigned long *parent_rate)
-{
-	struct clk_factors *factors = to_clk_factors(hw);
-	factors->get_factors((u32 *)&rate, (u32)*parent_rate,
-			     NULL, NULL, NULL, NULL);
+		factors->recalc(&factors_req);
+
+		return factors_req.rate;
+	}
+
+	/* Calculate the rate */
+	rate = (parent_rate * (n + config->n_start) * (k + 1) >> p) / (m + 1);
 
 	return rate;
 }
@@ -82,6 +92,7 @@ static long clk_factors_round_rate(struct clk_hw *hw, unsigned long rate,
 static int clk_factors_determine_rate(struct clk_hw *hw,
 				      struct clk_rate_request *req)
 {
+	struct clk_factors *factors = to_clk_factors(hw);
 	struct clk_hw *parent, *best_parent = NULL;
 	int i, num_parents;
 	unsigned long parent_rate, best = 0, child_rate, best_child_rate = 0;
@@ -89,6 +100,10 @@ static int clk_factors_determine_rate(struct clk_hw *hw,
 	/* find the parent that can help provide the fastest rate <= rate */
 	num_parents = clk_hw_get_num_parents(hw);
 	for (i = 0; i < num_parents; i++) {
+		struct factors_request factors_req = {
+			.rate = req->rate,
+			.parent_index = i,
+		};
 		parent = clk_hw_get_parent_by_index(hw, i);
 		if (!parent)
 			continue;
@@ -97,8 +112,9 @@ static int clk_factors_determine_rate(struct clk_hw *hw,
 		else
 			parent_rate = clk_hw_get_rate(parent);
 
-		child_rate = clk_factors_round_rate(hw, req->rate,
-						    &parent_rate);
+		factors_req.parent_rate = parent_rate;
+		factors->get_factors(&factors_req);
+		child_rate = factors_req.rate;
 
 		if (child_rate <= req->rate && child_rate > best_child_rate) {
 			best_parent = parent;
@@ -120,13 +136,16 @@ static int clk_factors_determine_rate(struct clk_hw *hw,
 static int clk_factors_set_rate(struct clk_hw *hw, unsigned long rate,
 				unsigned long parent_rate)
 {
-	u8 n = 0, k = 0, m = 0, p = 0;
+	struct factors_request req = {
+		.rate = rate,
+		.parent_rate = parent_rate,
+	};
 	u32 reg;
 	struct clk_factors *factors = to_clk_factors(hw);
-	struct clk_factors_config *config = factors->config;
+	const struct clk_factors_config *config = factors->config;
 	unsigned long flags = 0;
 
-	factors->get_factors((u32 *)&rate, (u32)parent_rate, &n, &k, &m, &p);
+	factors->get_factors(&req);
 
 	if (factors->lock)
 		spin_lock_irqsave(factors->lock, flags);
@@ -135,10 +154,10 @@ static int clk_factors_set_rate(struct clk_hw *hw, unsigned long rate,
 	reg = readl(factors->reg);
 
 	/* Set up the new factors - macros do not do anything if width is 0 */
-	reg = FACTOR_SET(config->nshift, config->nwidth, reg, n);
-	reg = FACTOR_SET(config->kshift, config->kwidth, reg, k);
-	reg = FACTOR_SET(config->mshift, config->mwidth, reg, m);
-	reg = FACTOR_SET(config->pshift, config->pwidth, reg, p);
+	reg = FACTOR_SET(config->nshift, config->nwidth, reg, req.n);
+	reg = FACTOR_SET(config->kshift, config->kwidth, reg, req.k);
+	reg = FACTOR_SET(config->mshift, config->mwidth, reg, req.m);
+	reg = FACTOR_SET(config->pshift, config->pwidth, reg, req.p);
 
 	/* Apply them now */
 	writel(reg, factors->reg);
@@ -155,7 +174,6 @@ static int clk_factors_set_rate(struct clk_hw *hw, unsigned long rate,
 static const struct clk_ops clk_factors_ops = {
 	.determine_rate = clk_factors_determine_rate,
 	.recalc_rate = clk_factors_recalc_rate,
-	.round_rate = clk_factors_round_rate,
 	.set_rate = clk_factors_set_rate,
 };
 
@@ -172,7 +190,7 @@ struct clk *sunxi_factors_register(struct device_node *node,
 	struct clk_hw *mux_hw = NULL;
 	const char *clk_name = node->name;
 	const char *parents[FACTORS_MAX_PARENTS];
-	int i = 0;
+	int ret, i = 0;
 
 	/* if we have a mux, we will have >1 parents */
 	i = of_clk_parent_fill(node, parents, FACTORS_MAX_PARENTS);
@@ -188,21 +206,22 @@ struct clk *sunxi_factors_register(struct device_node *node,
 
 	factors = kzalloc(sizeof(struct clk_factors), GFP_KERNEL);
 	if (!factors)
-		return NULL;
+		goto err_factors;
 
 	/* set up factors properties */
 	factors->reg = reg;
 	factors->config = data->table;
 	factors->get_factors = data->getter;
+	factors->recalc = data->recalc;
 	factors->lock = lock;
 
 	/* Add a gate if this factor clock can be gated */
 	if (data->enable) {
 		gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL);
-		if (!gate) {
-			kfree(factors);
-			return NULL;
-		}
+		if (!gate)
+			goto err_gate;
+
+		factors->gate = gate;
 
 		/* set up gate properties */
 		gate->reg = reg;
@@ -214,11 +233,10 @@ struct clk *sunxi_factors_register(struct device_node *node,
 	/* Add a mux if this factor clock can be muxed */
 	if (data->mux) {
 		mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL);
-		if (!mux) {
-			kfree(factors);
-			kfree(gate);
-			return NULL;
-		}
+		if (!mux)
+			goto err_mux;
+
+		factors->mux = mux;
 
 		/* set up gate properties */
 		mux->reg = reg;
@@ -233,11 +251,44 @@ struct clk *sunxi_factors_register(struct device_node *node,
 			mux_hw, &clk_mux_ops,
 			&factors->hw, &clk_factors_ops,
 			gate_hw, &clk_gate_ops, 0);
+	if (IS_ERR(clk))
+		goto err_register;
 
-	if (!IS_ERR(clk)) {
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
-		clk_register_clkdev(clk, clk_name, NULL);
-	}
+	ret = of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	if (ret)
+		goto err_provider;
 
 	return clk;
+
+err_provider:
+	/* TODO: The composite clock stuff will leak a bit here. */
+	clk_unregister(clk);
+err_register:
+	kfree(mux);
+err_mux:
+	kfree(gate);
+err_gate:
+	kfree(factors);
+err_factors:
+	return NULL;
+}
+
+void sunxi_factors_unregister(struct device_node *node, struct clk *clk)
+{
+	struct clk_hw *hw = __clk_get_hw(clk);
+	struct clk_factors *factors;
+	const char *name;
+
+	if (!hw)
+		return;
+
+	factors = to_clk_factors(hw);
+	name = clk_hw_get_name(hw);
+
+	of_clk_del_provider(node);
+	/* TODO: The composite clock stuff will leak a bit here. */
+	clk_unregister(clk);
+	kfree(factors->mux);
+	kfree(factors->gate);
+	kfree(factors);
 }
diff --git a/drivers/clk/sunxi/clk-factors.h b/drivers/clk/sunxi/clk-factors.h
index 171085ab5513..1e63c5b2d5f4 100644
--- a/drivers/clk/sunxi/clk-factors.h
+++ b/drivers/clk/sunxi/clk-factors.h
@@ -2,7 +2,6 @@
 #define __MACH_SUNXI_CLK_FACTORS_H
 
 #include <linux/clk-provider.h>
-#include <linux/clkdev.h>
 #include <linux/spinlock.h>
 
 #define SUNXI_FACTORS_NOT_APPLICABLE	(0)
@@ -19,21 +18,36 @@ struct clk_factors_config {
 	u8 n_start;
 };
 
+struct factors_request {
+	unsigned long rate;
+	unsigned long parent_rate;
+	u8 parent_index;
+	u8 n;
+	u8 k;
+	u8 m;
+	u8 p;
+};
+
 struct factors_data {
 	int enable;
 	int mux;
 	int muxmask;
-	struct clk_factors_config *table;
-	void (*getter) (u32 *rate, u32 parent_rate, u8 *n, u8 *k, u8 *m, u8 *p);
+	const struct clk_factors_config *table;
+	void (*getter)(struct factors_request *req);
+	void (*recalc)(struct factors_request *req);
 	const char *name;
 };
 
 struct clk_factors {
 	struct clk_hw hw;
 	void __iomem *reg;
-	struct clk_factors_config *config;
-	void (*get_factors) (u32 *rate, u32 parent, u8 *n, u8 *k, u8 *m, u8 *p);
+	const struct clk_factors_config *config;
+	void (*get_factors)(struct factors_request *req);
+	void (*recalc)(struct factors_request *req);
 	spinlock_t *lock;
+	/* for cleanup */
+	struct clk_mux *mux;
+	struct clk_gate *gate;
 };
 
 struct clk *sunxi_factors_register(struct device_node *node,
@@ -41,4 +55,6 @@ struct clk *sunxi_factors_register(struct device_node *node,
 				   spinlock_t *lock,
 				   void __iomem *reg);
 
+void sunxi_factors_unregister(struct device_node *node, struct clk *clk);
+
 #endif
diff --git a/drivers/clk/sunxi/clk-mod0.c b/drivers/clk/sunxi/clk-mod0.c
index d167e1efb927..b38d71cec74c 100644
--- a/drivers/clk/sunxi/clk-mod0.c
+++ b/drivers/clk/sunxi/clk-mod0.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clkdev.h>
 #include <linux/clk-provider.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
@@ -28,17 +29,16 @@
  * rate = (parent_rate >> p) / (m + 1);
  */
 
-static void sun4i_a10_get_mod0_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun4i_a10_get_mod0_factors(struct factors_request *req)
 {
 	u8 div, calcm, calcp;
 
 	/* These clocks can only divide, so we will never be able to achieve
 	 * frequencies higher than the parent frequency */
-	if (*freq > parent_rate)
-		*freq = parent_rate;
+	if (req->rate > req->parent_rate)
+		req->rate = req->parent_rate;
 
-	div = DIV_ROUND_UP(parent_rate, *freq);
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
 	if (div < 16)
 		calcp = 0;
@@ -51,18 +51,13 @@ static void sun4i_a10_get_mod0_factors(u32 *freq, u32 parent_rate,
 
 	calcm = DIV_ROUND_UP(div, 1 << calcp);
 
-	*freq = (parent_rate >> calcp) / calcm;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	*m = calcm - 1;
-	*p = calcp;
+	req->rate = (req->parent_rate >> calcp) / calcm;
+	req->m = calcm - 1;
+	req->p = calcp;
 }
 
 /* user manual says "n" but it's really "p" */
-static struct clk_factors_config sun4i_a10_mod0_config = {
+static const struct clk_factors_config sun4i_a10_mod0_config = {
 	.mshift = 0,
 	.mwidth = 4,
 	.pshift = 16,
diff --git a/drivers/clk/sunxi/clk-simple-gates.c b/drivers/clk/sunxi/clk-simple-gates.c
index f4da52b5ca0e..a085c3bc127c 100644
--- a/drivers/clk/sunxi/clk-simple-gates.c
+++ b/drivers/clk/sunxi/clk-simple-gates.c
@@ -98,6 +98,8 @@ static void __init sunxi_simple_gates_init(struct device_node *node)
 	sunxi_simple_gates_setup(node, NULL, 0);
 }
 
+CLK_OF_DECLARE(sun4i_a10_gates, "allwinner,sun4i-a10-gates-clk",
+	       sunxi_simple_gates_init);
 CLK_OF_DECLARE(sun4i_a10_apb0, "allwinner,sun4i-a10-apb0-gates-clk",
 	       sunxi_simple_gates_init);
 CLK_OF_DECLARE(sun4i_a10_apb1, "allwinner,sun4i-a10-apb1-gates-clk",
@@ -130,6 +132,8 @@ CLK_OF_DECLARE(sun8i_a23_apb2, "allwinner,sun8i-a23-apb2-gates-clk",
 	       sunxi_simple_gates_init);
 CLK_OF_DECLARE(sun8i_a33_ahb1, "allwinner,sun8i-a33-ahb1-gates-clk",
 	       sunxi_simple_gates_init);
+CLK_OF_DECLARE(sun8i_a83t_apb0, "allwinner,sun8i-a83t-apb0-gates-clk",
+	       sunxi_simple_gates_init);
 CLK_OF_DECLARE(sun9i_a80_ahb0, "allwinner,sun9i-a80-ahb0-gates-clk",
 	       sunxi_simple_gates_init);
 CLK_OF_DECLARE(sun9i_a80_ahb1, "allwinner,sun9i-a80-ahb1-gates-clk",
diff --git a/drivers/clk/sunxi/clk-sun6i-apb0-gates.c b/drivers/clk/sunxi/clk-sun6i-apb0-gates.c
index 23d042aabb4f..68021fa5ecd9 100644
--- a/drivers/clk/sunxi/clk-sun6i-apb0-gates.c
+++ b/drivers/clk/sunxi/clk-sun6i-apb0-gates.c
@@ -9,7 +9,6 @@
  */
 
 #include <linux/clk-provider.h>
-#include <linux/clkdev.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -87,7 +86,6 @@ static int sun6i_a31_apb0_gates_clk_probe(struct platform_device *pdev)
 						      clk_parent, 0, reg, i,
 						      0, NULL);
 		WARN_ON(IS_ERR(clk_data->clks[i]));
-		clk_register_clkdev(clk_data->clks[i], clk_name, NULL);
 
 		j++;
 	}
diff --git a/drivers/clk/sunxi/clk-sun6i-ar100.c b/drivers/clk/sunxi/clk-sun6i-ar100.c
index 20887686bdbe..84a187e55360 100644
--- a/drivers/clk/sunxi/clk-sun6i-ar100.c
+++ b/drivers/clk/sunxi/clk-sun6i-ar100.c
@@ -8,211 +8,97 @@
  *
  */
 
+#include <linux/bitops.h>
 #include <linux/clk-provider.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/spinlock.h>
 
-#define SUN6I_AR100_MAX_PARENTS		4
-#define SUN6I_AR100_SHIFT_MASK		0x3
-#define SUN6I_AR100_SHIFT_MAX		SUN6I_AR100_SHIFT_MASK
-#define SUN6I_AR100_SHIFT_SHIFT		4
-#define SUN6I_AR100_DIV_MASK		0x1f
-#define SUN6I_AR100_DIV_MAX		(SUN6I_AR100_DIV_MASK + 1)
-#define SUN6I_AR100_DIV_SHIFT		8
-#define SUN6I_AR100_MUX_MASK		0x3
-#define SUN6I_AR100_MUX_SHIFT		16
-
-struct ar100_clk {
-	struct clk_hw hw;
-	void __iomem *reg;
-};
-
-static inline struct ar100_clk *to_ar100_clk(struct clk_hw *hw)
-{
-	return container_of(hw, struct ar100_clk, hw);
-}
-
-static unsigned long ar100_recalc_rate(struct clk_hw *hw,
-				       unsigned long parent_rate)
-{
-	struct ar100_clk *clk = to_ar100_clk(hw);
-	u32 val = readl(clk->reg);
-	int shift = (val >> SUN6I_AR100_SHIFT_SHIFT) & SUN6I_AR100_SHIFT_MASK;
-	int div = (val >> SUN6I_AR100_DIV_SHIFT) & SUN6I_AR100_DIV_MASK;
-
-	return (parent_rate >> shift) / (div + 1);
-}
-
-static int ar100_determine_rate(struct clk_hw *hw,
-				struct clk_rate_request *req)
-{
-	int nparents = clk_hw_get_num_parents(hw);
-	long best_rate = -EINVAL;
-	int i;
-
-	req->best_parent_hw = NULL;
-
-	for (i = 0; i < nparents; i++) {
-		unsigned long parent_rate;
-		unsigned long tmp_rate;
-		struct clk_hw *parent;
-		unsigned long div;
-		int shift;
-
-		parent = clk_hw_get_parent_by_index(hw, i);
-		parent_rate = clk_hw_get_rate(parent);
-		div = DIV_ROUND_UP(parent_rate, req->rate);
-
-		/*
-		 * The AR100 clk contains 2 divisors:
-		 * - one power of 2 divisor
-		 * - one regular divisor
-		 *
-		 * First check if we can safely shift (or divide by a power
-		 * of 2) without losing precision on the requested rate.
-		 */
-		shift = ffs(div) - 1;
-		if (shift > SUN6I_AR100_SHIFT_MAX)
-			shift = SUN6I_AR100_SHIFT_MAX;
-
-		div >>= shift;
-
-		/*
-		 * Then if the divisor is still bigger than what the HW
-		 * actually supports, use a bigger shift (or power of 2
-		 * divider) value and accept to lose some precision.
-		 */
-		while (div > SUN6I_AR100_DIV_MAX) {
-			shift++;
-			div >>= 1;
-			if (shift > SUN6I_AR100_SHIFT_MAX)
-				break;
-		}
-
-		/*
-		 * If the shift value (or power of 2 divider) is bigger
-		 * than what the HW actually support, skip this parent.
-		 */
-		if (shift > SUN6I_AR100_SHIFT_MAX)
-			continue;
-
-		tmp_rate = (parent_rate >> shift) / div;
-		if (!req->best_parent_hw || tmp_rate > best_rate) {
-			req->best_parent_hw = parent;
-			req->best_parent_rate = parent_rate;
-			best_rate = tmp_rate;
-		}
-	}
-
-	if (best_rate < 0)
-		return best_rate;
-
-	req->rate = best_rate;
-
-	return 0;
-}
-
-static int ar100_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct ar100_clk *clk = to_ar100_clk(hw);
-	u32 val = readl(clk->reg);
-
-	if (index >= SUN6I_AR100_MAX_PARENTS)
-		return -EINVAL;
-
-	val &= ~(SUN6I_AR100_MUX_MASK << SUN6I_AR100_MUX_SHIFT);
-	val |= (index << SUN6I_AR100_MUX_SHIFT);
-	writel(val, clk->reg);
-
-	return 0;
-}
+#include "clk-factors.h"
 
-static u8 ar100_get_parent(struct clk_hw *hw)
-{
-	struct ar100_clk *clk = to_ar100_clk(hw);
-	return (readl(clk->reg) >> SUN6I_AR100_MUX_SHIFT) &
-	       SUN6I_AR100_MUX_MASK;
-}
-
-static int ar100_set_rate(struct clk_hw *hw, unsigned long rate,
-			  unsigned long parent_rate)
+/**
+ * sun6i_get_ar100_factors - Calculates factors p, m for AR100
+ *
+ * AR100 rate is calculated as follows
+ * rate = (parent_rate >> p) / (m + 1);
+ */
+static void sun6i_get_ar100_factors(struct factors_request *req)
 {
-	unsigned long div = parent_rate / rate;
-	struct ar100_clk *clk = to_ar100_clk(hw);
-	u32 val = readl(clk->reg);
+	unsigned long div;
 	int shift;
 
-	if (parent_rate % rate)
-		return -EINVAL;
+	/* clock only divides */
+	if (req->rate > req->parent_rate)
+		req->rate = req->parent_rate;
 
-	shift = ffs(div) - 1;
-	if (shift > SUN6I_AR100_SHIFT_MAX)
-		shift = SUN6I_AR100_SHIFT_MAX;
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
-	div >>= shift;
+	if (div < 32)
+		shift = 0;
+	else if (div >> 1 < 32)
+		shift = 1;
+	else if (div >> 2 < 32)
+		shift = 2;
+	else
+		shift = 3;
 
-	if (div > SUN6I_AR100_DIV_MAX)
-		return -EINVAL;
+	div >>= shift;
 
-	val &= ~((SUN6I_AR100_SHIFT_MASK << SUN6I_AR100_SHIFT_SHIFT) |
-		 (SUN6I_AR100_DIV_MASK << SUN6I_AR100_DIV_SHIFT));
-	val |= (shift << SUN6I_AR100_SHIFT_SHIFT) |
-	       (div << SUN6I_AR100_DIV_SHIFT);
-	writel(val, clk->reg);
+	if (div > 32)
+		div = 32;
 
-	return 0;
+	req->rate = (req->parent_rate >> shift) / div;
+	req->m = div - 1;
+	req->p = shift;
 }
 
-static struct clk_ops ar100_ops = {
-	.recalc_rate = ar100_recalc_rate,
-	.determine_rate = ar100_determine_rate,
-	.set_parent = ar100_set_parent,
-	.get_parent = ar100_get_parent,
-	.set_rate = ar100_set_rate,
+static const struct clk_factors_config sun6i_ar100_config = {
+	.mwidth = 5,
+	.mshift = 8,
+	.pwidth = 2,
+	.pshift = 4,
 };
 
+static const struct factors_data sun6i_ar100_data = {
+	.mux = 16,
+	.muxmask = GENMASK(1, 0),
+	.table = &sun6i_ar100_config,
+	.getter = sun6i_get_ar100_factors,
+};
+
+static DEFINE_SPINLOCK(sun6i_ar100_lock);
+
 static int sun6i_a31_ar100_clk_probe(struct platform_device *pdev)
 {
-	const char *parents[SUN6I_AR100_MAX_PARENTS];
 	struct device_node *np = pdev->dev.of_node;
-	const char *clk_name = np->name;
-	struct clk_init_data init;
-	struct ar100_clk *ar100;
 	struct resource *r;
+	void __iomem *reg;
 	struct clk *clk;
-	int nparents;
-
-	ar100 = devm_kzalloc(&pdev->dev, sizeof(*ar100), GFP_KERNEL);
-	if (!ar100)
-		return -ENOMEM;
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ar100->reg = devm_ioremap_resource(&pdev->dev, r);
-	if (IS_ERR(ar100->reg))
-		return PTR_ERR(ar100->reg);
+	reg = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
 
-	nparents = of_clk_get_parent_count(np);
-	if (nparents > SUN6I_AR100_MAX_PARENTS)
-		nparents = SUN6I_AR100_MAX_PARENTS;
-
-	of_clk_parent_fill(np, parents, nparents);
+	clk = sunxi_factors_register(np, &sun6i_ar100_data, &sun6i_ar100_lock,
+				     reg);
+	if (!clk)
+		return -ENOMEM;
 
-	of_property_read_string(np, "clock-output-names", &clk_name);
+	platform_set_drvdata(pdev, clk);
 
-	init.name = clk_name;
-	init.ops = &ar100_ops;
-	init.parent_names = parents;
-	init.num_parents = nparents;
-	init.flags = 0;
+	return 0;
+}
 
-	ar100->hw.init = &init;
+static int sun6i_a31_ar100_clk_remove(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct clk *clk = platform_get_drvdata(pdev);
 
-	clk = clk_register(&pdev->dev, &ar100->hw);
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
+	sunxi_factors_unregister(np, clk);
 
-	return of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	return 0;
 }
 
 static const struct of_device_id sun6i_a31_ar100_clk_dt_ids[] = {
@@ -227,6 +113,7 @@ static struct platform_driver sun6i_a31_ar100_clk_driver = {
 		.of_match_table = sun6i_a31_ar100_clk_dt_ids,
 	},
 	.probe = sun6i_a31_ar100_clk_probe,
+	.remove = sun6i_a31_ar100_clk_remove,
 };
 module_platform_driver(sun6i_a31_ar100_clk_driver);
 
diff --git a/drivers/clk/sunxi/clk-sun8i-apb0.c b/drivers/clk/sunxi/clk-sun8i-apb0.c
index 7ba61103a6f5..2ea61debffc1 100644
--- a/drivers/clk/sunxi/clk-sun8i-apb0.c
+++ b/drivers/clk/sunxi/clk-sun8i-apb0.c
@@ -36,7 +36,7 @@ static struct clk *sun8i_a23_apb0_register(struct device_node *node,
 
 	/* The A23 APB0 clock is a standard 2 bit wide divider clock */
 	clk = clk_register_divider(NULL, clk_name, clk_parent, 0, reg,
-				   0, 2, CLK_DIVIDER_POWER_OF_TWO, NULL);
+				   0, 2, 0, NULL);
 	if (IS_ERR(clk))
 		return clk;
 
diff --git a/drivers/clk/sunxi/clk-sun8i-bus-gates.c b/drivers/clk/sunxi/clk-sun8i-bus-gates.c
index e32d18ba252b..63fdb790df29 100644
--- a/drivers/clk/sunxi/clk-sun8i-bus-gates.c
+++ b/drivers/clk/sunxi/clk-sun8i-bus-gates.c
@@ -17,7 +17,6 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
@@ -110,3 +109,5 @@ err_unmap:
 
 CLK_OF_DECLARE(sun8i_h3_bus_gates, "allwinner,sun8i-h3-bus-gates-clk",
 	       sun8i_h3_bus_gates_init);
+CLK_OF_DECLARE(sun8i_a83t_bus_gates, "allwinner,sun8i-a83t-bus-gates-clk",
+	       sun8i_h3_bus_gates_init);
diff --git a/drivers/clk/sunxi/clk-sun8i-mbus.c b/drivers/clk/sunxi/clk-sun8i-mbus.c
index bf117a636d23..411d3033a96e 100644
--- a/drivers/clk/sunxi/clk-sun8i-mbus.c
+++ b/drivers/clk/sunxi/clk-sun8i-mbus.c
@@ -15,74 +15,106 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clkdev.h>
 #include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/of_address.h>
 
-#include "clk-factors.h"
+#define SUN8I_MBUS_ENABLE	31
+#define SUN8I_MBUS_MUX_SHIFT	24
+#define SUN8I_MBUS_MUX_MASK	0x3
+#define SUN8I_MBUS_DIV_SHIFT	0
+#define SUN8I_MBUS_DIV_WIDTH	3
+#define SUN8I_MBUS_MAX_PARENTS	4
 
-/**
- * sun8i_a23_get_mbus_factors() - calculates m factor for MBUS clocks
- * MBUS rate is calculated as follows
- * rate = parent_rate / (m + 1);
- */
+static DEFINE_SPINLOCK(sun8i_a23_mbus_lock);
 
-static void sun8i_a23_get_mbus_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
+static void __init sun8i_a23_mbus_setup(struct device_node *node)
 {
-	u8 div;
+	int num_parents = of_clk_get_parent_count(node);
+	const char **parents;
+	const char *clk_name = node->name;
+	struct resource res;
+	struct clk_divider *div;
+	struct clk_gate *gate;
+	struct clk_mux *mux;
+	struct clk *clk;
+	void __iomem *reg;
+	int err;
 
-	/*
-	 * These clocks can only divide, so we will never be able to
-	 * achieve frequencies higher than the parent frequency
-	 */
-	if (*freq > parent_rate)
-		*freq = parent_rate;
+	parents = kcalloc(num_parents, sizeof(*parents), GFP_KERNEL);
+	if (!parents)
+		return;
 
-	div = DIV_ROUND_UP(parent_rate, *freq);
+	reg = of_io_request_and_map(node, 0, of_node_full_name(node));
+	if (!reg) {
+		pr_err("Could not get registers for sun8i-mbus-clk\n");
+		goto err_free_parents;
+	}
 
-	if (div > 8)
-		div = 8;
+	div = kzalloc(sizeof(*div), GFP_KERNEL);
+	if (!div)
+		goto err_unmap;
 
-	*freq = parent_rate / div;
+	mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+	if (!mux)
+		goto err_free_div;
 
-	/* we were called to round the frequency, we can now return */
-	if (m == NULL)
-		return;
+	gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		goto err_free_mux;
 
-	*m = div - 1;
-}
+	of_property_read_string(node, "clock-output-names", &clk_name);
+	of_clk_parent_fill(node, parents, num_parents);
 
-static struct clk_factors_config sun8i_a23_mbus_config = {
-	.mshift = 0,
-	.mwidth = 3,
-};
+	gate->reg = reg;
+	gate->bit_idx = SUN8I_MBUS_ENABLE;
+	gate->lock = &sun8i_a23_mbus_lock;
 
-static const struct factors_data sun8i_a23_mbus_data __initconst = {
-	.enable = 31,
-	.mux = 24,
-	.muxmask = BIT(1) | BIT(0),
-	.table = &sun8i_a23_mbus_config,
-	.getter = sun8i_a23_get_mbus_factors,
-};
+	div->reg = reg;
+	div->shift = SUN8I_MBUS_DIV_SHIFT;
+	div->width = SUN8I_MBUS_DIV_WIDTH;
+	div->lock = &sun8i_a23_mbus_lock;
 
-static DEFINE_SPINLOCK(sun8i_a23_mbus_lock);
-
-static void __init sun8i_a23_mbus_setup(struct device_node *node)
-{
-	struct clk *mbus;
-	void __iomem *reg;
+	mux->reg = reg;
+	mux->shift = SUN8I_MBUS_MUX_SHIFT;
+	mux->mask = SUN8I_MBUS_MUX_MASK;
+	mux->lock = &sun8i_a23_mbus_lock;
 
-	reg = of_iomap(node, 0);
-	if (!reg) {
-		pr_err("Could not get registers for a23-mbus-clk\n");
-		return;
-	}
+	clk = clk_register_composite(NULL, clk_name, parents, num_parents,
+				     &mux->hw, &clk_mux_ops,
+				     &div->hw, &clk_divider_ops,
+				     &gate->hw, &clk_gate_ops,
+				     0);
+	if (IS_ERR(clk))
+		goto err_free_gate;
 
-	mbus = sunxi_factors_register(node, &sun8i_a23_mbus_data,
-				      &sun8i_a23_mbus_lock, reg);
+	err = of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	if (err)
+		goto err_unregister_clk;
 
+	kfree(parents); /* parents is deep copied */
 	/* The MBUS clocks needs to be always enabled */
-	__clk_get(mbus);
-	clk_prepare_enable(mbus);
+	__clk_get(clk);
+	clk_prepare_enable(clk);
+
+	return;
+
+err_unregister_clk:
+	/* TODO: The composite clock stuff will leak a bit here. */
+	clk_unregister(clk);
+err_free_gate:
+	kfree(gate);
+err_free_mux:
+	kfree(mux);
+err_free_div:
+	kfree(div);
+err_unmap:
+	iounmap(reg);
+	of_address_to_resource(node, 0, &res);
+	release_mem_region(res.start, resource_size(&res));
+err_free_parents:
+	kfree(parents);
 }
 CLK_OF_DECLARE(sun8i_a23_mbus, "allwinner,sun8i-a23-mbus-clk", sun8i_a23_mbus_setup);
diff --git a/drivers/clk/sunxi/clk-sun9i-core.c b/drivers/clk/sunxi/clk-sun9i-core.c
index 6c4c98324d3c..43f014f85803 100644
--- a/drivers/clk/sunxi/clk-sun9i-core.c
+++ b/drivers/clk/sunxi/clk-sun9i-core.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clkdev.h>
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
@@ -32,15 +33,14 @@
  * p and m are named div1 and div2 in Allwinner's SDK
  */
 
-static void sun9i_a80_get_pll4_factors(u32 *freq, u32 parent_rate,
-				       u8 *n_ret, u8 *k, u8 *m_ret, u8 *p_ret)
+static void sun9i_a80_get_pll4_factors(struct factors_request *req)
 {
 	int n;
 	int m = 1;
 	int p = 1;
 
 	/* Normalize value to a 6 MHz multiple (24 MHz / 4) */
-	n = DIV_ROUND_UP(*freq, 6000000);
+	n = DIV_ROUND_UP(req->rate, 6000000);
 
 	/* If n is too large switch to steps of 12 MHz */
 	if (n > 255) {
@@ -60,18 +60,13 @@ static void sun9i_a80_get_pll4_factors(u32 *freq, u32 parent_rate,
 	else if (n < 12)
 		n = 12;
 
-	*freq = ((24000000 * n) >> p) / (m + 1);
-
-	/* we were called to round the frequency, we can now return */
-	if (n_ret == NULL)
-		return;
-
-	*n_ret = n;
-	*m_ret = m;
-	*p_ret = p;
+	req->rate = ((24000000 * n) >> p) / (m + 1);
+	req->n = n;
+	req->m = m;
+	req->p = p;
 }
 
-static struct clk_factors_config sun9i_a80_pll4_config = {
+static const struct clk_factors_config sun9i_a80_pll4_config = {
 	.mshift = 18,
 	.mwidth = 1,
 	.nshift = 8,
@@ -111,30 +106,24 @@ CLK_OF_DECLARE(sun9i_a80_pll4, "allwinner,sun9i-a80-pll4-clk", sun9i_a80_pll4_se
  * rate = parent_rate / (m + 1);
  */
 
-static void sun9i_a80_get_gt_factors(u32 *freq, u32 parent_rate,
-				     u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun9i_a80_get_gt_factors(struct factors_request *req)
 {
 	u32 div;
 
-	if (parent_rate < *freq)
-		*freq = parent_rate;
+	if (req->parent_rate < req->rate)
+		req->rate = req->parent_rate;
 
-	div = DIV_ROUND_UP(parent_rate, *freq);
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
 	/* maximum divider is 4 */
 	if (div > 4)
 		div = 4;
 
-	*freq = parent_rate / div;
-
-	/* we were called to round the frequency, we can now return */
-	if (!m)
-		return;
-
-	*m = div;
+	req->rate = req->parent_rate / div;
+	req->m = div;
 }
 
-static struct clk_factors_config sun9i_a80_gt_config = {
+static const struct clk_factors_config sun9i_a80_gt_config = {
 	.mshift = 0,
 	.mwidth = 2,
 };
@@ -176,30 +165,24 @@ CLK_OF_DECLARE(sun9i_a80_gt, "allwinner,sun9i-a80-gt-clk", sun9i_a80_gt_setup);
  * rate = parent_rate >> p;
  */
 
-static void sun9i_a80_get_ahb_factors(u32 *freq, u32 parent_rate,
-				      u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun9i_a80_get_ahb_factors(struct factors_request *req)
 {
 	u32 _p;
 
-	if (parent_rate < *freq)
-		*freq = parent_rate;
+	if (req->parent_rate < req->rate)
+		req->rate = req->parent_rate;
 
-	_p = order_base_2(DIV_ROUND_UP(parent_rate, *freq));
+	_p = order_base_2(DIV_ROUND_UP(req->parent_rate, req->rate));
 
 	/* maximum p is 3 */
 	if (_p > 3)
 		_p = 3;
 
-	*freq = parent_rate >> _p;
-
-	/* we were called to round the frequency, we can now return */
-	if (!p)
-		return;
-
-	*p = _p;
+	req->rate = req->parent_rate >> _p;
+	req->p = _p;
 }
 
-static struct clk_factors_config sun9i_a80_ahb_config = {
+static const struct clk_factors_config sun9i_a80_ahb_config = {
 	.pshift = 0,
 	.pwidth = 2,
 };
@@ -262,34 +245,25 @@ CLK_OF_DECLARE(sun9i_a80_apb0, "allwinner,sun9i-a80-apb0-clk", sun9i_a80_apb0_se
  * rate = (parent_rate >> p) / (m + 1);
  */
 
-static void sun9i_a80_get_apb1_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun9i_a80_get_apb1_factors(struct factors_request *req)
 {
 	u32 div;
-	u8 calcm, calcp;
 
-	if (parent_rate < *freq)
-		*freq = parent_rate;
+	if (req->parent_rate < req->rate)
+		req->rate = req->parent_rate;
 
-	div = DIV_ROUND_UP(parent_rate, *freq);
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
 	/* Highest possible divider is 256 (p = 3, m = 31) */
 	if (div > 256)
 		div = 256;
 
-	calcp = order_base_2(div);
-	calcm = (parent_rate >> calcp) - 1;
-	*freq = (parent_rate >> calcp) / (calcm + 1);
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	*m = calcm;
-	*p = calcp;
+	req->p = order_base_2(div);
+	req->m = (req->parent_rate >> req->p) - 1;
+	req->rate = (req->parent_rate >> req->p) / (req->m + 1);
 }
 
-static struct clk_factors_config sun9i_a80_apb1_config = {
+static const struct clk_factors_config sun9i_a80_apb1_config = {
 	.mshift = 0,
 	.mwidth = 5,
 	.pshift = 16,
diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c
index 5ba2188ee99c..91de0a006773 100644
--- a/drivers/clk/sunxi/clk-sunxi.c
+++ b/drivers/clk/sunxi/clk-sunxi.c
@@ -28,214 +28,6 @@
 
 static DEFINE_SPINLOCK(clk_lock);
 
-/**
- * sun6i_a31_ahb1_clk_setup() - Setup function for a31 ahb1 composite clk
- */
-
-#define SUN6I_AHB1_MAX_PARENTS		4
-#define SUN6I_AHB1_MUX_PARENT_PLL6	3
-#define SUN6I_AHB1_MUX_SHIFT		12
-/* un-shifted mask is what mux_clk expects */
-#define SUN6I_AHB1_MUX_MASK		0x3
-#define SUN6I_AHB1_MUX_GET_PARENT(reg)	((reg >> SUN6I_AHB1_MUX_SHIFT) & \
-					 SUN6I_AHB1_MUX_MASK)
-
-#define SUN6I_AHB1_DIV_SHIFT		4
-#define SUN6I_AHB1_DIV_MASK		(0x3 << SUN6I_AHB1_DIV_SHIFT)
-#define SUN6I_AHB1_DIV_GET(reg)		((reg & SUN6I_AHB1_DIV_MASK) >> \
-						SUN6I_AHB1_DIV_SHIFT)
-#define SUN6I_AHB1_DIV_SET(reg, div)	((reg & ~SUN6I_AHB1_DIV_MASK) | \
-						(div << SUN6I_AHB1_DIV_SHIFT))
-#define SUN6I_AHB1_PLL6_DIV_SHIFT	6
-#define SUN6I_AHB1_PLL6_DIV_MASK	(0x3 << SUN6I_AHB1_PLL6_DIV_SHIFT)
-#define SUN6I_AHB1_PLL6_DIV_GET(reg)	((reg & SUN6I_AHB1_PLL6_DIV_MASK) >> \
-						SUN6I_AHB1_PLL6_DIV_SHIFT)
-#define SUN6I_AHB1_PLL6_DIV_SET(reg, div) ((reg & ~SUN6I_AHB1_PLL6_DIV_MASK) | \
-						(div << SUN6I_AHB1_PLL6_DIV_SHIFT))
-
-struct sun6i_ahb1_clk {
-	struct clk_hw hw;
-	void __iomem *reg;
-};
-
-#define to_sun6i_ahb1_clk(_hw) container_of(_hw, struct sun6i_ahb1_clk, hw)
-
-static unsigned long sun6i_ahb1_clk_recalc_rate(struct clk_hw *hw,
-						unsigned long parent_rate)
-{
-	struct sun6i_ahb1_clk *ahb1 = to_sun6i_ahb1_clk(hw);
-	unsigned long rate;
-	u32 reg;
-
-	/* Fetch the register value */
-	reg = readl(ahb1->reg);
-
-	/* apply pre-divider first if parent is pll6 */
-	if (SUN6I_AHB1_MUX_GET_PARENT(reg) == SUN6I_AHB1_MUX_PARENT_PLL6)
-		parent_rate /= SUN6I_AHB1_PLL6_DIV_GET(reg) + 1;
-
-	/* clk divider */
-	rate = parent_rate >> SUN6I_AHB1_DIV_GET(reg);
-
-	return rate;
-}
-
-static long sun6i_ahb1_clk_round(unsigned long rate, u8 *divp, u8 *pre_divp,
-				 u8 parent, unsigned long parent_rate)
-{
-	u8 div, calcp, calcm = 1;
-
-	/*
-	 * clock can only divide, so we will never be able to achieve
-	 * frequencies higher than the parent frequency
-	 */
-	if (parent_rate && rate > parent_rate)
-		rate = parent_rate;
-
-	div = DIV_ROUND_UP(parent_rate, rate);
-
-	/* calculate pre-divider if parent is pll6 */
-	if (parent == SUN6I_AHB1_MUX_PARENT_PLL6) {
-		if (div < 4)
-			calcp = 0;
-		else if (div / 2 < 4)
-			calcp = 1;
-		else if (div / 4 < 4)
-			calcp = 2;
-		else
-			calcp = 3;
-
-		calcm = DIV_ROUND_UP(div, 1 << calcp);
-	} else {
-		calcp = __roundup_pow_of_two(div);
-		calcp = calcp > 3 ? 3 : calcp;
-	}
-
-	/* we were asked to pass back divider values */
-	if (divp) {
-		*divp = calcp;
-		*pre_divp = calcm - 1;
-	}
-
-	return (parent_rate / calcm) >> calcp;
-}
-
-static int sun6i_ahb1_clk_determine_rate(struct clk_hw *hw,
-					 struct clk_rate_request *req)
-{
-	struct clk_hw *parent, *best_parent = NULL;
-	int i, num_parents;
-	unsigned long parent_rate, best = 0, child_rate, best_child_rate = 0;
-
-	/* find the parent that can help provide the fastest rate <= rate */
-	num_parents = clk_hw_get_num_parents(hw);
-	for (i = 0; i < num_parents; i++) {
-		parent = clk_hw_get_parent_by_index(hw, i);
-		if (!parent)
-			continue;
-		if (clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)
-			parent_rate = clk_hw_round_rate(parent, req->rate);
-		else
-			parent_rate = clk_hw_get_rate(parent);
-
-		child_rate = sun6i_ahb1_clk_round(req->rate, NULL, NULL, i,
-						  parent_rate);
-
-		if (child_rate <= req->rate && child_rate > best_child_rate) {
-			best_parent = parent;
-			best = parent_rate;
-			best_child_rate = child_rate;
-		}
-	}
-
-	if (!best_parent)
-		return -EINVAL;
-
-	req->best_parent_hw = best_parent;
-	req->best_parent_rate = best;
-	req->rate = best_child_rate;
-
-	return 0;
-}
-
-static int sun6i_ahb1_clk_set_rate(struct clk_hw *hw, unsigned long rate,
-				   unsigned long parent_rate)
-{
-	struct sun6i_ahb1_clk *ahb1 = to_sun6i_ahb1_clk(hw);
-	unsigned long flags;
-	u8 div, pre_div, parent;
-	u32 reg;
-
-	spin_lock_irqsave(&clk_lock, flags);
-
-	reg = readl(ahb1->reg);
-
-	/* need to know which parent is used to apply pre-divider */
-	parent = SUN6I_AHB1_MUX_GET_PARENT(reg);
-	sun6i_ahb1_clk_round(rate, &div, &pre_div, parent, parent_rate);
-
-	reg = SUN6I_AHB1_DIV_SET(reg, div);
-	reg = SUN6I_AHB1_PLL6_DIV_SET(reg, pre_div);
-	writel(reg, ahb1->reg);
-
-	spin_unlock_irqrestore(&clk_lock, flags);
-
-	return 0;
-}
-
-static const struct clk_ops sun6i_ahb1_clk_ops = {
-	.determine_rate	= sun6i_ahb1_clk_determine_rate,
-	.recalc_rate	= sun6i_ahb1_clk_recalc_rate,
-	.set_rate	= sun6i_ahb1_clk_set_rate,
-};
-
-static void __init sun6i_ahb1_clk_setup(struct device_node *node)
-{
-	struct clk *clk;
-	struct sun6i_ahb1_clk *ahb1;
-	struct clk_mux *mux;
-	const char *clk_name = node->name;
-	const char *parents[SUN6I_AHB1_MAX_PARENTS];
-	void __iomem *reg;
-	int i;
-
-	reg = of_io_request_and_map(node, 0, of_node_full_name(node));
-	if (IS_ERR(reg))
-		return;
-
-	/* we have a mux, we will have >1 parents */
-	i = of_clk_parent_fill(node, parents, SUN6I_AHB1_MAX_PARENTS);
-	of_property_read_string(node, "clock-output-names", &clk_name);
-
-	ahb1 = kzalloc(sizeof(struct sun6i_ahb1_clk), GFP_KERNEL);
-	if (!ahb1)
-		return;
-
-	mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL);
-	if (!mux) {
-		kfree(ahb1);
-		return;
-	}
-
-	/* set up clock properties */
-	mux->reg = reg;
-	mux->shift = SUN6I_AHB1_MUX_SHIFT;
-	mux->mask = SUN6I_AHB1_MUX_MASK;
-	mux->lock = &clk_lock;
-	ahb1->reg = reg;
-
-	clk = clk_register_composite(NULL, clk_name, parents, i,
-				     &mux->hw, &clk_mux_ops,
-				     &ahb1->hw, &sun6i_ahb1_clk_ops,
-				     NULL, NULL, 0);
-
-	if (!IS_ERR(clk)) {
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
-		clk_register_clkdev(clk, clk_name, NULL);
-	}
-}
-CLK_OF_DECLARE(sun6i_a31_ahb1, "allwinner,sun6i-a31-ahb1-clk", sun6i_ahb1_clk_setup);
-
 /* Maximum number of parents our clocks have */
 #define SUNXI_MAX_PARENTS	5
 
@@ -246,49 +38,45 @@ CLK_OF_DECLARE(sun6i_a31_ahb1, "allwinner,sun6i-a31-ahb1-clk", sun6i_ahb1_clk_se
  * parent_rate is always 24Mhz
  */
 
-static void sun4i_get_pll1_factors(u32 *freq, u32 parent_rate,
-				   u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun4i_get_pll1_factors(struct factors_request *req)
 {
 	u8 div;
 
 	/* Normalize value to a 6M multiple */
-	div = *freq / 6000000;
-	*freq = 6000000 * div;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
+	div = req->rate / 6000000;
+	req->rate = 6000000 * div;
 
 	/* m is always zero for pll1 */
-	*m = 0;
+	req->m = 0;
 
 	/* k is 1 only on these cases */
-	if (*freq >= 768000000 || *freq == 42000000 || *freq == 54000000)
-		*k = 1;
+	if (req->rate >= 768000000 || req->rate == 42000000 ||
+			req->rate == 54000000)
+		req->k = 1;
 	else
-		*k = 0;
+		req->k = 0;
 
 	/* p will be 3 for divs under 10 */
 	if (div < 10)
-		*p = 3;
+		req->p = 3;
 
 	/* p will be 2 for divs between 10 - 20 and odd divs under 32 */
 	else if (div < 20 || (div < 32 && (div & 1)))
-		*p = 2;
+		req->p = 2;
 
 	/* p will be 1 for even divs under 32, divs under 40 and odd pairs
 	 * of divs between 40-62 */
 	else if (div < 40 || (div < 64 && (div & 2)))
-		*p = 1;
+		req->p = 1;
 
 	/* any other entries have p = 0 */
 	else
-		*p = 0;
+		req->p = 0;
 
 	/* calculate a suitable n based on k and p */
-	div <<= *p;
-	div /= (*k + 1);
-	*n = div / 4;
+	div <<= req->p;
+	div /= (req->k + 1);
+	req->n = div / 4;
 }
 
 /**
@@ -297,15 +85,14 @@ static void sun4i_get_pll1_factors(u32 *freq, u32 parent_rate,
  * rate = parent_rate * (n + 1) * (k + 1) / (m + 1);
  * parent_rate should always be 24MHz
  */
-static void sun6i_a31_get_pll1_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun6i_a31_get_pll1_factors(struct factors_request *req)
 {
 	/*
 	 * We can operate only on MHz, this will make our life easier
 	 * later.
 	 */
-	u32 freq_mhz = *freq / 1000000;
-	u32 parent_freq_mhz = parent_rate / 1000000;
+	u32 freq_mhz = req->rate / 1000000;
+	u32 parent_freq_mhz = req->parent_rate / 1000000;
 
 	/*
 	 * Round down the frequency to the closest multiple of either
@@ -319,28 +106,20 @@ static void sun6i_a31_get_pll1_factors(u32 *freq, u32 parent_rate,
 	else
 		freq_mhz = round_freq_16;
 
-	*freq = freq_mhz * 1000000;
-
-	/*
-	 * If the factors pointer are null, we were just called to
-	 * round down the frequency.
-	 * Exit.
-	 */
-	if (n == NULL)
-		return;
+	req->rate = freq_mhz * 1000000;
 
 	/* If the frequency is a multiple of 32 MHz, k is always 3 */
 	if (!(freq_mhz % 32))
-		*k = 3;
+		req->k = 3;
 	/* If the frequency is a multiple of 9 MHz, k is always 2 */
 	else if (!(freq_mhz % 9))
-		*k = 2;
+		req->k = 2;
 	/* If the frequency is a multiple of 8 MHz, k is always 1 */
 	else if (!(freq_mhz % 8))
-		*k = 1;
+		req->k = 1;
 	/* Otherwise, we don't use the k factor */
 	else
-		*k = 0;
+		req->k = 0;
 
 	/*
 	 * If the frequency is a multiple of 2 but not a multiple of
@@ -351,27 +130,28 @@ static void sun6i_a31_get_pll1_factors(u32 *freq, u32 parent_rate,
 	 * somehow relates to this frequency.
 	 */
 	if ((freq_mhz % 6) == 2 || (freq_mhz % 6) == 4)
-		*m = 2;
+		req->m = 2;
 	/*
 	 * If the frequency is a multiple of 6MHz, but the factor is
 	 * odd, m will be 3
 	 */
 	else if ((freq_mhz / 6) & 1)
-		*m = 3;
+		req->m = 3;
 	/* Otherwise, we end up with m = 1 */
 	else
-		*m = 1;
+		req->m = 1;
 
 	/* Calculate n thanks to the above factors we already got */
-	*n = freq_mhz * (*m + 1) / ((*k + 1) * parent_freq_mhz) - 1;
+	req->n = freq_mhz * (req->m + 1) / ((req->k + 1) * parent_freq_mhz)
+		 - 1;
 
 	/*
 	 * If n end up being outbound, and that we can still decrease
 	 * m, do it.
 	 */
-	if ((*n + 1) > 31 && (*m + 1) > 1) {
-		*n = (*n + 1) / 2 - 1;
-		*m = (*m + 1) / 2 - 1;
+	if ((req->n + 1) > 31 && (req->m + 1) > 1) {
+		req->n = (req->n + 1) / 2 - 1;
+		req->m = (req->m + 1) / 2 - 1;
 	}
 }
 
@@ -382,45 +162,41 @@ static void sun6i_a31_get_pll1_factors(u32 *freq, u32 parent_rate,
  * parent_rate is always 24Mhz
  */
 
-static void sun8i_a23_get_pll1_factors(u32 *freq, u32 parent_rate,
-				   u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun8i_a23_get_pll1_factors(struct factors_request *req)
 {
 	u8 div;
 
 	/* Normalize value to a 6M multiple */
-	div = *freq / 6000000;
-	*freq = 6000000 * div;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
+	div = req->rate / 6000000;
+	req->rate = 6000000 * div;
 
 	/* m is always zero for pll1 */
-	*m = 0;
+	req->m = 0;
 
 	/* k is 1 only on these cases */
-	if (*freq >= 768000000 || *freq == 42000000 || *freq == 54000000)
-		*k = 1;
+	if (req->rate >= 768000000 || req->rate == 42000000 ||
+			req->rate == 54000000)
+		req->k = 1;
 	else
-		*k = 0;
+		req->k = 0;
 
 	/* p will be 2 for divs under 20 and odd divs under 32 */
 	if (div < 20 || (div < 32 && (div & 1)))
-		*p = 2;
+		req->p = 2;
 
 	/* p will be 1 for even divs under 32, divs under 40 and odd pairs
 	 * of divs between 40-62 */
 	else if (div < 40 || (div < 64 && (div & 2)))
-		*p = 1;
+		req->p = 1;
 
 	/* any other entries have p = 0 */
 	else
-		*p = 0;
+		req->p = 0;
 
 	/* calculate a suitable n based on k and p */
-	div <<= *p;
-	div /= (*k + 1);
-	*n = div / 4 - 1;
+	div <<= req->p;
+	div /= (req->k + 1);
+	req->n = div / 4 - 1;
 }
 
 /**
@@ -430,29 +206,24 @@ static void sun8i_a23_get_pll1_factors(u32 *freq, u32 parent_rate,
  * parent_rate is always 24Mhz
  */
 
-static void sun4i_get_pll5_factors(u32 *freq, u32 parent_rate,
-				   u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun4i_get_pll5_factors(struct factors_request *req)
 {
 	u8 div;
 
 	/* Normalize value to a parent_rate multiple (24M) */
-	div = *freq / parent_rate;
-	*freq = parent_rate * div;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
+	div = req->rate / req->parent_rate;
+	req->rate = req->parent_rate * div;
 
 	if (div < 31)
-		*k = 0;
+		req->k = 0;
 	else if (div / 2 < 31)
-		*k = 1;
+		req->k = 1;
 	else if (div / 3 < 31)
-		*k = 2;
+		req->k = 2;
 	else
-		*k = 3;
+		req->k = 3;
 
-	*n = DIV_ROUND_UP(div, (*k+1));
+	req->n = DIV_ROUND_UP(div, (req->k + 1));
 }
 
 /**
@@ -462,24 +233,19 @@ static void sun4i_get_pll5_factors(u32 *freq, u32 parent_rate,
  * parent_rate is always 24Mhz
  */
 
-static void sun6i_a31_get_pll6_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun6i_a31_get_pll6_factors(struct factors_request *req)
 {
 	u8 div;
 
 	/* Normalize value to a parent_rate multiple (24M) */
-	div = *freq / parent_rate;
-	*freq = parent_rate * div;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
+	div = req->rate / req->parent_rate;
+	req->rate = req->parent_rate * div;
 
-	*k = div / 32;
-	if (*k > 3)
-		*k = 3;
+	req->k = div / 32;
+	if (req->k > 3)
+		req->k = 3;
 
-	*n = DIV_ROUND_UP(div, (*k+1)) - 1;
+	req->n = DIV_ROUND_UP(div, (req->k + 1)) - 1;
 }
 
 /**
@@ -488,37 +254,94 @@ static void sun6i_a31_get_pll6_factors(u32 *freq, u32 parent_rate,
  * rate = parent_rate >> p
  */
 
-static void sun5i_a13_get_ahb_factors(u32 *freq, u32 parent_rate,
-				       u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun5i_a13_get_ahb_factors(struct factors_request *req)
 {
 	u32 div;
 
 	/* divide only */
-	if (parent_rate < *freq)
-		*freq = parent_rate;
+	if (req->parent_rate < req->rate)
+		req->rate = req->parent_rate;
 
 	/*
 	 * user manual says valid speed is 8k ~ 276M, but tests show it
 	 * can work at speeds up to 300M, just after reparenting to pll6
 	 */
-	if (*freq < 8000)
-		*freq = 8000;
-	if (*freq > 300000000)
-		*freq = 300000000;
+	if (req->rate < 8000)
+		req->rate = 8000;
+	if (req->rate > 300000000)
+		req->rate = 300000000;
 
-	div = order_base_2(DIV_ROUND_UP(parent_rate, *freq));
+	div = order_base_2(DIV_ROUND_UP(req->parent_rate, req->rate));
 
 	/* p = 0 ~ 3 */
 	if (div > 3)
 		div = 3;
 
-	*freq = parent_rate >> div;
+	req->rate = req->parent_rate >> div;
 
-	/* we were called to round the frequency, we can now return */
-	if (p == NULL)
-		return;
+	req->p = div;
+}
+
+#define SUN6I_AHB1_PARENT_PLL6	3
+
+/**
+ * sun6i_a31_get_ahb_factors() - calculates m, p factors for AHB
+ * AHB rate is calculated as follows
+ * rate = parent_rate >> p
+ *
+ * if parent is pll6, then
+ * parent_rate = pll6 rate / (m + 1)
+ */
+
+static void sun6i_get_ahb1_factors(struct factors_request *req)
+{
+	u8 div, calcp, calcm = 1;
+
+	/*
+	 * clock can only divide, so we will never be able to achieve
+	 * frequencies higher than the parent frequency
+	 */
+	if (req->parent_rate && req->rate > req->parent_rate)
+		req->rate = req->parent_rate;
+
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
+
+	/* calculate pre-divider if parent is pll6 */
+	if (req->parent_index == SUN6I_AHB1_PARENT_PLL6) {
+		if (div < 4)
+			calcp = 0;
+		else if (div / 2 < 4)
+			calcp = 1;
+		else if (div / 4 < 4)
+			calcp = 2;
+		else
+			calcp = 3;
+
+		calcm = DIV_ROUND_UP(div, 1 << calcp);
+	} else {
+		calcp = __roundup_pow_of_two(div);
+		calcp = calcp > 3 ? 3 : calcp;
+	}
 
-	*p = div;
+	req->rate = (req->parent_rate / calcm) >> calcp;
+	req->p = calcp;
+	req->m = calcm - 1;
+}
+
+/**
+ * sun6i_ahb1_recalc() - calculates AHB clock rate from m, p factors and
+ *			 parent index
+ */
+static void sun6i_ahb1_recalc(struct factors_request *req)
+{
+	req->rate = req->parent_rate;
+
+	/* apply pre-divider first if parent is pll6 */
+	if (req->parent_index == SUN6I_AHB1_PARENT_PLL6)
+		req->rate /= req->m + 1;
+
+	/* clk divider */
+	req->rate >>= req->p;
 }
 
 /**
@@ -527,39 +350,34 @@ static void sun5i_a13_get_ahb_factors(u32 *freq, u32 parent_rate,
  * rate = (parent_rate >> p) / (m + 1);
  */
 
-static void sun4i_get_apb1_factors(u32 *freq, u32 parent_rate,
-				   u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun4i_get_apb1_factors(struct factors_request *req)
 {
 	u8 calcm, calcp;
+	int div;
 
-	if (parent_rate < *freq)
-		*freq = parent_rate;
+	if (req->parent_rate < req->rate)
+		req->rate = req->parent_rate;
 
-	parent_rate = DIV_ROUND_UP(parent_rate, *freq);
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
 	/* Invalid rate! */
-	if (parent_rate > 32)
+	if (div > 32)
 		return;
 
-	if (parent_rate <= 4)
+	if (div <= 4)
 		calcp = 0;
-	else if (parent_rate <= 8)
+	else if (div <= 8)
 		calcp = 1;
-	else if (parent_rate <= 16)
+	else if (div <= 16)
 		calcp = 2;
 	else
 		calcp = 3;
 
-	calcm = (parent_rate >> calcp) - 1;
+	calcm = (req->parent_rate >> calcp) - 1;
 
-	*freq = (parent_rate >> calcp) / (calcm + 1);
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	*m = calcm;
-	*p = calcp;
+	req->rate = (req->parent_rate >> calcp) / (calcm + 1);
+	req->m = calcm;
+	req->p = calcp;
 }
 
 
@@ -571,17 +389,16 @@ static void sun4i_get_apb1_factors(u32 *freq, u32 parent_rate,
  * rate = (parent_rate >> p) / (m + 1);
  */
 
-static void sun7i_a20_get_out_factors(u32 *freq, u32 parent_rate,
-				      u8 *n, u8 *k, u8 *m, u8 *p)
+static void sun7i_a20_get_out_factors(struct factors_request *req)
 {
 	u8 div, calcm, calcp;
 
 	/* These clocks can only divide, so we will never be able to achieve
 	 * frequencies higher than the parent frequency */
-	if (*freq > parent_rate)
-		*freq = parent_rate;
+	if (req->rate > req->parent_rate)
+		req->rate = req->parent_rate;
 
-	div = DIV_ROUND_UP(parent_rate, *freq);
+	div = DIV_ROUND_UP(req->parent_rate, req->rate);
 
 	if (div < 32)
 		calcp = 0;
@@ -594,21 +411,16 @@ static void sun7i_a20_get_out_factors(u32 *freq, u32 parent_rate,
 
 	calcm = DIV_ROUND_UP(div, 1 << calcp);
 
-	*freq = (parent_rate >> calcp) / calcm;
-
-	/* we were called to round the frequency, we can now return */
-	if (n == NULL)
-		return;
-
-	*m = calcm - 1;
-	*p = calcp;
+	req->rate = (req->parent_rate >> calcp) / calcm;
+	req->m = calcm - 1;
+	req->p = calcp;
 }
 
 /**
  * sunxi_factors_clk_setup() - Setup function for factor clocks
  */
 
-static struct clk_factors_config sun4i_pll1_config = {
+static const struct clk_factors_config sun4i_pll1_config = {
 	.nshift = 8,
 	.nwidth = 5,
 	.kshift = 4,
@@ -619,7 +431,7 @@ static struct clk_factors_config sun4i_pll1_config = {
 	.pwidth = 2,
 };
 
-static struct clk_factors_config sun6i_a31_pll1_config = {
+static const struct clk_factors_config sun6i_a31_pll1_config = {
 	.nshift	= 8,
 	.nwidth = 5,
 	.kshift = 4,
@@ -629,7 +441,7 @@ static struct clk_factors_config sun6i_a31_pll1_config = {
 	.n_start = 1,
 };
 
-static struct clk_factors_config sun8i_a23_pll1_config = {
+static const struct clk_factors_config sun8i_a23_pll1_config = {
 	.nshift = 8,
 	.nwidth = 5,
 	.kshift = 4,
@@ -641,14 +453,14 @@ static struct clk_factors_config sun8i_a23_pll1_config = {
 	.n_start = 1,
 };
 
-static struct clk_factors_config sun4i_pll5_config = {
+static const struct clk_factors_config sun4i_pll5_config = {
 	.nshift = 8,
 	.nwidth = 5,
 	.kshift = 4,
 	.kwidth = 2,
 };
 
-static struct clk_factors_config sun6i_a31_pll6_config = {
+static const struct clk_factors_config sun6i_a31_pll6_config = {
 	.nshift	= 8,
 	.nwidth = 5,
 	.kshift = 4,
@@ -656,12 +468,19 @@ static struct clk_factors_config sun6i_a31_pll6_config = {
 	.n_start = 1,
 };
 
-static struct clk_factors_config sun5i_a13_ahb_config = {
+static const struct clk_factors_config sun5i_a13_ahb_config = {
 	.pshift = 4,
 	.pwidth = 2,
 };
 
-static struct clk_factors_config sun4i_apb1_config = {
+static const struct clk_factors_config sun6i_ahb1_config = {
+	.mshift = 6,
+	.mwidth = 2,
+	.pshift = 4,
+	.pwidth = 2,
+};
+
+static const struct clk_factors_config sun4i_apb1_config = {
 	.mshift = 0,
 	.mwidth = 5,
 	.pshift = 16,
@@ -669,7 +488,7 @@ static struct clk_factors_config sun4i_apb1_config = {
 };
 
 /* user manual says "n" but it's really "p" */
-static struct clk_factors_config sun7i_a20_out_config = {
+static const struct clk_factors_config sun7i_a20_out_config = {
 	.mshift = 8,
 	.mwidth = 5,
 	.pshift = 20,
@@ -728,6 +547,14 @@ static const struct factors_data sun5i_a13_ahb_data __initconst = {
 	.getter = sun5i_a13_get_ahb_factors,
 };
 
+static const struct factors_data sun6i_ahb1_data __initconst = {
+	.mux = 12,
+	.muxmask = BIT(1) | BIT(0),
+	.table = &sun6i_ahb1_config,
+	.getter = sun6i_get_ahb1_factors,
+	.recalc = sun6i_ahb1_recalc,
+};
+
 static const struct factors_data sun4i_apb1_data __initconst = {
 	.mux = 24,
 	.muxmask = BIT(1) | BIT(0),
@@ -758,6 +585,61 @@ static struct clk * __init sunxi_factors_clk_setup(struct device_node *node,
 	return sunxi_factors_register(node, data, &clk_lock, reg);
 }
 
+static void __init sun4i_pll1_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun4i_pll1_data);
+}
+CLK_OF_DECLARE(sun4i_pll1, "allwinner,sun4i-a10-pll1-clk",
+	       sun4i_pll1_clk_setup);
+
+static void __init sun6i_pll1_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun6i_a31_pll1_data);
+}
+CLK_OF_DECLARE(sun6i_pll1, "allwinner,sun6i-a31-pll1-clk",
+	       sun6i_pll1_clk_setup);
+
+static void __init sun8i_pll1_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun8i_a23_pll1_data);
+}
+CLK_OF_DECLARE(sun8i_pll1, "allwinner,sun8i-a23-pll1-clk",
+	       sun8i_pll1_clk_setup);
+
+static void __init sun7i_pll4_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun7i_a20_pll4_data);
+}
+CLK_OF_DECLARE(sun7i_pll4, "allwinner,sun7i-a20-pll4-clk",
+	       sun7i_pll4_clk_setup);
+
+static void __init sun5i_ahb_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun5i_a13_ahb_data);
+}
+CLK_OF_DECLARE(sun5i_ahb, "allwinner,sun5i-a13-ahb-clk",
+	       sun5i_ahb_clk_setup);
+
+static void __init sun6i_ahb1_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun6i_ahb1_data);
+}
+CLK_OF_DECLARE(sun6i_a31_ahb1, "allwinner,sun6i-a31-ahb1-clk",
+	       sun6i_ahb1_clk_setup);
+
+static void __init sun4i_apb1_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun4i_apb1_data);
+}
+CLK_OF_DECLARE(sun4i_apb1, "allwinner,sun4i-a10-apb1-clk",
+	       sun4i_apb1_clk_setup);
+
+static void __init sun7i_out_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun7i_a20_out_data);
+}
+CLK_OF_DECLARE(sun7i_out, "allwinner,sun7i-a20-out-clk",
+	       sun7i_out_clk_setup);
 
 
 /**
@@ -782,8 +664,8 @@ static const struct mux_data sun8i_h3_ahb2_mux_data __initconst = {
 	.shift = 0,
 };
 
-static void __init sunxi_mux_clk_setup(struct device_node *node,
-				       struct mux_data *data)
+static struct clk * __init sunxi_mux_clk_setup(struct device_node *node,
+					       const struct mux_data *data)
 {
 	struct clk *clk;
 	const char *clk_name = node->name;
@@ -792,21 +674,71 @@ static void __init sunxi_mux_clk_setup(struct device_node *node,
 	int i;
 
 	reg = of_iomap(node, 0);
+	if (!reg) {
+		pr_err("Could not map registers for mux-clk: %s\n",
+		       of_node_full_name(node));
+		return NULL;
+	}
 
 	i = of_clk_parent_fill(node, parents, SUNXI_MAX_PARENTS);
-	of_property_read_string(node, "clock-output-names", &clk_name);
+	if (of_property_read_string(node, "clock-output-names", &clk_name)) {
+		pr_err("%s: could not read clock-output-names from \"%s\"\n",
+		       __func__, of_node_full_name(node));
+		goto out_unmap;
+	}
 
 	clk = clk_register_mux(NULL, clk_name, parents, i,
 			       CLK_SET_RATE_PARENT, reg,
 			       data->shift, SUNXI_MUX_GATE_WIDTH,
 			       0, &clk_lock);
 
-	if (clk) {
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
-		clk_register_clkdev(clk, clk_name, NULL);
+	if (IS_ERR(clk)) {
+		pr_err("%s: failed to register mux clock %s: %ld\n", __func__,
+		       clk_name, PTR_ERR(clk));
+		goto out_unmap;
+	}
+
+	if (of_clk_add_provider(node, of_clk_src_simple_get, clk)) {
+		pr_err("%s: failed to add clock provider for %s\n",
+		       __func__, clk_name);
+		clk_unregister_divider(clk);
+		goto out_unmap;
 	}
+
+	return clk;
+out_unmap:
+	iounmap(reg);
+	return NULL;
+}
+
+static void __init sun4i_cpu_clk_setup(struct device_node *node)
+{
+	struct clk *clk;
+
+	clk = sunxi_mux_clk_setup(node, &sun4i_cpu_mux_data);
+	if (!clk)
+		return;
+
+	/* Protect CPU clock */
+	__clk_get(clk);
+	clk_prepare_enable(clk);
+}
+CLK_OF_DECLARE(sun4i_cpu, "allwinner,sun4i-a10-cpu-clk",
+	       sun4i_cpu_clk_setup);
+
+static void __init sun6i_ahb1_mux_clk_setup(struct device_node *node)
+{
+	sunxi_mux_clk_setup(node, &sun6i_a31_ahb1_mux_data);
 }
+CLK_OF_DECLARE(sun6i_ahb1_mux, "allwinner,sun6i-a31-ahb1-mux-clk",
+	       sun6i_ahb1_mux_clk_setup);
 
+static void __init sun8i_ahb2_clk_setup(struct device_node *node)
+{
+	sunxi_mux_clk_setup(node, &sun8i_h3_ahb2_mux_data);
+}
+CLK_OF_DECLARE(sun8i_ahb2, "allwinner,sun8i-h3-ahb2-clk",
+	       sun8i_ahb2_clk_setup);
 
 
 /**
@@ -865,7 +797,7 @@ static const struct div_data sun4i_apb0_data __initconst = {
 };
 
 static void __init sunxi_divider_clk_setup(struct device_node *node,
-					   struct div_data *data)
+					   const struct div_data *data)
 {
 	struct clk *clk;
 	const char *clk_name = node->name;
@@ -873,21 +805,77 @@ static void __init sunxi_divider_clk_setup(struct device_node *node,
 	void __iomem *reg;
 
 	reg = of_iomap(node, 0);
+	if (!reg) {
+		pr_err("Could not map registers for mux-clk: %s\n",
+		       of_node_full_name(node));
+		return;
+	}
 
 	clk_parent = of_clk_get_parent_name(node, 0);
 
-	of_property_read_string(node, "clock-output-names", &clk_name);
+	if (of_property_read_string(node, "clock-output-names", &clk_name)) {
+		pr_err("%s: could not read clock-output-names from \"%s\"\n",
+		       __func__, of_node_full_name(node));
+		goto out_unmap;
+	}
 
 	clk = clk_register_divider_table(NULL, clk_name, clk_parent, 0,
 					 reg, data->shift, data->width,
 					 data->pow ? CLK_DIVIDER_POWER_OF_TWO : 0,
 					 data->table, &clk_lock);
-	if (clk) {
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
-		clk_register_clkdev(clk, clk_name, NULL);
+	if (IS_ERR(clk)) {
+		pr_err("%s: failed to register divider clock %s: %ld\n",
+		       __func__, clk_name, PTR_ERR(clk));
+		goto out_unmap;
 	}
+
+	if (of_clk_add_provider(node, of_clk_src_simple_get, clk)) {
+		pr_err("%s: failed to add clock provider for %s\n",
+		       __func__, clk_name);
+		goto out_unregister;
+	}
+
+	if (clk_register_clkdev(clk, clk_name, NULL)) {
+		of_clk_del_provider(node);
+		goto out_unregister;
+	}
+
+	return;
+out_unregister:
+	clk_unregister_divider(clk);
+
+out_unmap:
+	iounmap(reg);
 }
 
+static void __init sun4i_ahb_clk_setup(struct device_node *node)
+{
+	sunxi_divider_clk_setup(node, &sun4i_ahb_data);
+}
+CLK_OF_DECLARE(sun4i_ahb, "allwinner,sun4i-a10-ahb-clk",
+	       sun4i_ahb_clk_setup);
+
+static void __init sun4i_apb0_clk_setup(struct device_node *node)
+{
+	sunxi_divider_clk_setup(node, &sun4i_apb0_data);
+}
+CLK_OF_DECLARE(sun4i_apb0, "allwinner,sun4i-a10-apb0-clk",
+	       sun4i_apb0_clk_setup);
+
+static void __init sun4i_axi_clk_setup(struct device_node *node)
+{
+	sunxi_divider_clk_setup(node, &sun4i_axi_data);
+}
+CLK_OF_DECLARE(sun4i_axi, "allwinner,sun4i-a10-axi-clk",
+	       sun4i_axi_clk_setup);
+
+static void __init sun8i_axi_clk_setup(struct device_node *node)
+{
+	sunxi_divider_clk_setup(node, &sun8i_a23_axi_data);
+}
+CLK_OF_DECLARE(sun8i_axi, "allwinner,sun8i-a23-axi-clk",
+	       sun8i_axi_clk_setup);
+
 
 
 /**
@@ -975,8 +963,8 @@ static const struct divs_data sun6i_a31_pll6_divs_data __initconst = {
  *           |________________________|
  */
 
-static void __init sunxi_divs_clk_setup(struct device_node *node,
-					struct divs_data *data)
+static struct clk ** __init sunxi_divs_clk_setup(struct device_node *node,
+						 const struct divs_data *data)
 {
 	struct clk_onecell_data *clk_data;
 	const char *parent;
@@ -997,13 +985,20 @@ static void __init sunxi_divs_clk_setup(struct device_node *node,
 
 	/* Set up factor clock that we will be dividing */
 	pclk = sunxi_factors_clk_setup(node, data->factors);
+	if (!pclk)
+		return NULL;
 	parent = __clk_get_name(pclk);
 
 	reg = of_iomap(node, 0);
+	if (!reg) {
+		pr_err("Could not map registers for divs-clk: %s\n",
+		       of_node_full_name(node));
+		return NULL;
+	}
 
 	clk_data = kmalloc(sizeof(struct clk_onecell_data), GFP_KERNEL);
 	if (!clk_data)
-		return;
+		goto out_unmap;
 
 	clks = kcalloc(ndivs, sizeof(*clks), GFP_KERNEL);
 	if (!clks)
@@ -1081,146 +1076,54 @@ static void __init sunxi_divs_clk_setup(struct device_node *node,
 						 clkflags);
 
 		WARN_ON(IS_ERR(clk_data->clks[i]));
-		clk_register_clkdev(clks[i], clk_name, NULL);
 	}
 
 	/* Adjust to the real max */
 	clk_data->clk_num = i;
 
-	of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
-
-	return;
+	if (of_clk_add_provider(node, of_clk_src_onecell_get, clk_data)) {
+		pr_err("%s: failed to add clock provider for %s\n",
+		       __func__, clk_name);
+		goto free_gate;
+	}
 
+	return clks;
 free_gate:
 	kfree(gate);
 free_clks:
 	kfree(clks);
 free_clkdata:
 	kfree(clk_data);
+out_unmap:
+	iounmap(reg);
+	return NULL;
 }
 
-
-
-/* Matches for factors clocks */
-static const struct of_device_id clk_factors_match[] __initconst = {
-	{.compatible = "allwinner,sun4i-a10-pll1-clk", .data = &sun4i_pll1_data,},
-	{.compatible = "allwinner,sun6i-a31-pll1-clk", .data = &sun6i_a31_pll1_data,},
-	{.compatible = "allwinner,sun8i-a23-pll1-clk", .data = &sun8i_a23_pll1_data,},
-	{.compatible = "allwinner,sun7i-a20-pll4-clk", .data = &sun7i_a20_pll4_data,},
-	{.compatible = "allwinner,sun5i-a13-ahb-clk", .data = &sun5i_a13_ahb_data,},
-	{.compatible = "allwinner,sun4i-a10-apb1-clk", .data = &sun4i_apb1_data,},
-	{.compatible = "allwinner,sun7i-a20-out-clk", .data = &sun7i_a20_out_data,},
-	{}
-};
-
-/* Matches for divider clocks */
-static const struct of_device_id clk_div_match[] __initconst = {
-	{.compatible = "allwinner,sun4i-a10-axi-clk", .data = &sun4i_axi_data,},
-	{.compatible = "allwinner,sun8i-a23-axi-clk", .data = &sun8i_a23_axi_data,},
-	{.compatible = "allwinner,sun4i-a10-ahb-clk", .data = &sun4i_ahb_data,},
-	{.compatible = "allwinner,sun4i-a10-apb0-clk", .data = &sun4i_apb0_data,},
-	{}
-};
-
-/* Matches for divided outputs */
-static const struct of_device_id clk_divs_match[] __initconst = {
-	{.compatible = "allwinner,sun4i-a10-pll5-clk", .data = &pll5_divs_data,},
-	{.compatible = "allwinner,sun4i-a10-pll6-clk", .data = &pll6_divs_data,},
-	{.compatible = "allwinner,sun6i-a31-pll6-clk", .data = &sun6i_a31_pll6_divs_data,},
-	{}
-};
-
-/* Matches for mux clocks */
-static const struct of_device_id clk_mux_match[] __initconst = {
-	{.compatible = "allwinner,sun4i-a10-cpu-clk", .data = &sun4i_cpu_mux_data,},
-	{.compatible = "allwinner,sun6i-a31-ahb1-mux-clk", .data = &sun6i_a31_ahb1_mux_data,},
-	{.compatible = "allwinner,sun8i-h3-ahb2-clk", .data = &sun8i_h3_ahb2_mux_data,},
-	{}
-};
-
-
-static void __init of_sunxi_table_clock_setup(const struct of_device_id *clk_match,
-					      void *function)
-{
-	struct device_node *np;
-	const struct div_data *data;
-	const struct of_device_id *match;
-	void (*setup_function)(struct device_node *, const void *) = function;
-
-	for_each_matching_node_and_match(np, clk_match, &match) {
-		data = match->data;
-		setup_function(np, data);
-	}
-}
-
-static void __init sunxi_init_clocks(const char *clocks[], int nclocks)
-{
-	unsigned int i;
-
-	/* Register divided output clocks */
-	of_sunxi_table_clock_setup(clk_divs_match, sunxi_divs_clk_setup);
-
-	/* Register factor clocks */
-	of_sunxi_table_clock_setup(clk_factors_match, sunxi_factors_clk_setup);
-
-	/* Register divider clocks */
-	of_sunxi_table_clock_setup(clk_div_match, sunxi_divider_clk_setup);
-
-	/* Register mux clocks */
-	of_sunxi_table_clock_setup(clk_mux_match, sunxi_mux_clk_setup);
-
-	/* Protect the clocks that needs to stay on */
-	for (i = 0; i < nclocks; i++) {
-		struct clk *clk = clk_get(NULL, clocks[i]);
-
-		if (!IS_ERR(clk))
-			clk_prepare_enable(clk);
-	}
-}
-
-static const char *sun4i_a10_critical_clocks[] __initdata = {
-	"pll5_ddr",
-};
-
-static void __init sun4i_a10_init_clocks(struct device_node *node)
+static void __init sun4i_pll5_clk_setup(struct device_node *node)
 {
-	sunxi_init_clocks(sun4i_a10_critical_clocks,
-			  ARRAY_SIZE(sun4i_a10_critical_clocks));
-}
-CLK_OF_DECLARE(sun4i_a10_clk_init, "allwinner,sun4i-a10", sun4i_a10_init_clocks);
+	struct clk **clks;
 
-static const char *sun5i_critical_clocks[] __initdata = {
-	"cpu",
-	"pll5_ddr",
-};
+	clks = sunxi_divs_clk_setup(node, &pll5_divs_data);
+	if (!clks)
+		return;
 
-static void __init sun5i_init_clocks(struct device_node *node)
-{
-	sunxi_init_clocks(sun5i_critical_clocks,
-			  ARRAY_SIZE(sun5i_critical_clocks));
+	/* Protect PLL5_DDR */
+	__clk_get(clks[0]);
+	clk_prepare_enable(clks[0]);
 }
-CLK_OF_DECLARE(sun5i_a10s_clk_init, "allwinner,sun5i-a10s", sun5i_init_clocks);
-CLK_OF_DECLARE(sun5i_a13_clk_init, "allwinner,sun5i-a13", sun5i_init_clocks);
-CLK_OF_DECLARE(sun5i_r8_clk_init, "allwinner,sun5i-r8", sun5i_init_clocks);
-CLK_OF_DECLARE(sun7i_a20_clk_init, "allwinner,sun7i-a20", sun5i_init_clocks);
-
-static const char *sun6i_critical_clocks[] __initdata = {
-	"cpu",
-};
+CLK_OF_DECLARE(sun4i_pll5, "allwinner,sun4i-a10-pll5-clk",
+	       sun4i_pll5_clk_setup);
 
-static void __init sun6i_init_clocks(struct device_node *node)
+static void __init sun4i_pll6_clk_setup(struct device_node *node)
 {
-	sunxi_init_clocks(sun6i_critical_clocks,
-			  ARRAY_SIZE(sun6i_critical_clocks));
+	sunxi_divs_clk_setup(node, &pll6_divs_data);
 }
-CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sun6i_init_clocks);
-CLK_OF_DECLARE(sun6i_a31s_clk_init, "allwinner,sun6i-a31s", sun6i_init_clocks);
-CLK_OF_DECLARE(sun8i_a23_clk_init, "allwinner,sun8i-a23", sun6i_init_clocks);
-CLK_OF_DECLARE(sun8i_a33_clk_init, "allwinner,sun8i-a33", sun6i_init_clocks);
-CLK_OF_DECLARE(sun8i_h3_clk_init, "allwinner,sun8i-h3", sun6i_init_clocks);
+CLK_OF_DECLARE(sun4i_pll6, "allwinner,sun4i-a10-pll6-clk",
+	       sun4i_pll6_clk_setup);
 
-static void __init sun9i_init_clocks(struct device_node *node)
+static void __init sun6i_pll6_clk_setup(struct device_node *node)
 {
-	sunxi_init_clocks(NULL, 0);
+	sunxi_divs_clk_setup(node, &sun6i_a31_pll6_divs_data);
 }
-CLK_OF_DECLARE(sun9i_a80_clk_init, "allwinner,sun9i-a80", sun9i_init_clocks);
+CLK_OF_DECLARE(sun6i_pll6, "allwinner,sun6i-a31-pll6-clk",
+	       sun6i_pll6_clk_setup);
diff --git a/drivers/clk/sunxi/clk-usb.c b/drivers/clk/sunxi/clk-usb.c
index 67b8e38f4ee9..5432b1c198a4 100644
--- a/drivers/clk/sunxi/clk-usb.c
+++ b/drivers/clk/sunxi/clk-usb.c
@@ -216,6 +216,18 @@ static void __init sun8i_a23_usb_setup(struct device_node *node)
 }
 CLK_OF_DECLARE(sun8i_a23_usb, "allwinner,sun8i-a23-usb-clk", sun8i_a23_usb_setup);
 
+static const struct usb_clk_data sun8i_h3_usb_clk_data __initconst = {
+	.clk_mask =  BIT(19) | BIT(18) | BIT(17) | BIT(16) |
+		     BIT(11) | BIT(10) | BIT(9) | BIT(8),
+	.reset_mask = BIT(3) | BIT(2) | BIT(1) | BIT(0),
+};
+
+static void __init sun8i_h3_usb_setup(struct device_node *node)
+{
+	sunxi_usb_clk_setup(node, &sun8i_h3_usb_clk_data, &sun4i_a10_usb_lock);
+}
+CLK_OF_DECLARE(sun8i_h3_usb, "allwinner,sun8i-h3-usb-clk", sun8i_h3_usb_setup);
+
 static const struct usb_clk_data sun9i_a80_usb_mod_data __initconst = {
 	.clk_mask = BIT(6) | BIT(5) | BIT(4) | BIT(3) | BIT(2) | BIT(1),
 	.reset_mask = BIT(19) | BIT(18) | BIT(17),
@@ -243,15 +255,3 @@ static void __init sun9i_a80_usb_phy_setup(struct device_node *node)
 	sunxi_usb_clk_setup(node, &sun9i_a80_usb_phy_data, &a80_usb_phy_lock);
 }
 CLK_OF_DECLARE(sun9i_a80_usb_phy, "allwinner,sun9i-a80-usb-phy-clk", sun9i_a80_usb_phy_setup);
-
-static const struct usb_clk_data sun8i_h3_usb_clk_data __initconst = {
-	.clk_mask =  BIT(19) | BIT(18) | BIT(17) | BIT(16) |
-		     BIT(11) | BIT(10) | BIT(9) | BIT(8),
-	.reset_mask = BIT(3) | BIT(2) | BIT(1) | BIT(0),
-};
-
-static void __init sun8i_h3_usb_setup(struct device_node *node)
-{
-	sunxi_usb_clk_setup(node, &sun8i_h3_usb_clk_data, &sun4i_a10_usb_lock);
-}
-CLK_OF_DECLARE(sun8i_h3_usb, "allwinner,sun8i-h3-usb-clk", sun8i_h3_usb_setup);
diff --git a/drivers/clk/tegra/clk-audio-sync.c b/drivers/clk/tegra/clk-audio-sync.c
index c0f7843e80e6..92d04ce2dee6 100644
--- a/drivers/clk/tegra/clk-audio-sync.c
+++ b/drivers/clk/tegra/clk-audio-sync.c
@@ -72,7 +72,7 @@ struct clk *tegra_clk_register_sync_source(const char *name,
 
 	init.ops = &tegra_clk_sync_source_ops;
 	init.name = name;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.parent_names = NULL;
 	init.num_parents = 0;
 
diff --git a/drivers/clk/tegra/clk-dfll.c b/drivers/clk/tegra/clk-dfll.c
index 86a307b17eb0..19bfa07e24b1 100644
--- a/drivers/clk/tegra/clk-dfll.c
+++ b/drivers/clk/tegra/clk-dfll.c
@@ -995,7 +995,6 @@ static const struct clk_ops dfll_clk_ops = {
 };
 
 static struct clk_init_data dfll_clk_init_data = {
-	.flags		= CLK_IS_ROOT,
 	.ops		= &dfll_clk_ops,
 	.num_parents	= 0,
 };
diff --git a/drivers/clk/tegra/clk-tegra-fixed.c b/drivers/clk/tegra/clk-tegra-fixed.c
index da0b5941c89f..d64ec7a1b976 100644
--- a/drivers/clk/tegra/clk-tegra-fixed.c
+++ b/drivers/clk/tegra/clk-tegra-fixed.c
@@ -52,8 +52,7 @@ int __init tegra_osc_clk_init(void __iomem *clk_base, struct tegra_clk *clks,
 		return -EINVAL;
 	}
 
-	osc = clk_register_fixed_rate(NULL, "osc", NULL, CLK_IS_ROOT,
-				      *osc_freq);
+	osc = clk_register_fixed_rate(NULL, "osc", NULL, 0, *osc_freq);
 
 	dt_clk = tegra_lookup_dt_id(tegra_clk_clk_m, clks);
 	if (!dt_clk)
@@ -88,8 +87,7 @@ void __init tegra_fixed_clk_init(struct tegra_clk *tegra_clks)
 	/* clk_32k */
 	dt_clk = tegra_lookup_dt_id(tegra_clk_clk_32k, tegra_clks);
 	if (dt_clk) {
-		clk = clk_register_fixed_rate(NULL, "clk_32k", NULL,
-					CLK_IS_ROOT, 32768);
+		clk = clk_register_fixed_rate(NULL, "clk_32k", NULL, 0, 32768);
 		*dt_clk = clk;
 	}
 
diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
index 4a24aa4bbdea..df47ec3169c3 100644
--- a/drivers/clk/tegra/clk-tegra114.c
+++ b/drivers/clk/tegra/clk-tegra114.c
@@ -972,8 +972,7 @@ static void __init tegra114_fixed_clk_init(void __iomem *clk_base)
 	struct clk *clk;
 
 	/* clk_32k */
-	clk = clk_register_fixed_rate(NULL, "clk_32k", NULL, CLK_IS_ROOT,
-				      32768);
+	clk = clk_register_fixed_rate(NULL, "clk_32k", NULL, 0, 32768);
 	clks[TEGRA114_CLK_CLK_32K] = clk;
 
 	/* clk_m_div2 */
diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
index 7a48e986c4c9..7ad63837694f 100644
--- a/drivers/clk/tegra/clk-tegra20.c
+++ b/drivers/clk/tegra/clk-tegra20.c
@@ -837,15 +837,13 @@ static void __init tegra20_periph_clk_init(void)
 	clks[TEGRA20_CLK_PEX] = clk;
 
 	/* cdev1 */
-	clk = clk_register_fixed_rate(NULL, "cdev1_fixed", NULL, CLK_IS_ROOT,
-				      26000000);
+	clk = clk_register_fixed_rate(NULL, "cdev1_fixed", NULL, 0, 26000000);
 	clk = tegra_clk_register_periph_gate("cdev1", "cdev1_fixed", 0,
 				    clk_base, 0, 94, periph_clk_enb_refcnt);
 	clks[TEGRA20_CLK_CDEV1] = clk;
 
 	/* cdev2 */
-	clk = clk_register_fixed_rate(NULL, "cdev2_fixed", NULL, CLK_IS_ROOT,
-				      26000000);
+	clk = clk_register_fixed_rate(NULL, "cdev2_fixed", NULL, 0, 26000000);
 	clk = tegra_clk_register_periph_gate("cdev2", "cdev2_fixed", 0,
 				    clk_base, 0, 93, periph_clk_enb_refcnt);
 	clks[TEGRA20_CLK_CDEV2] = clk;
@@ -879,8 +877,8 @@ static void __init tegra20_osc_clk_init(void)
 	input_freq = tegra20_clk_measure_input_freq();
 
 	/* clk_m */
-	clk = clk_register_fixed_rate(NULL, "clk_m", NULL, CLK_IS_ROOT |
-				      CLK_IGNORE_UNUSED, input_freq);
+	clk = clk_register_fixed_rate(NULL, "clk_m", NULL, CLK_IGNORE_UNUSED,
+				      input_freq);
 	clks[TEGRA20_CLK_CLK_M] = clk;
 
 	/* pll_ref */
diff --git a/drivers/clk/ti/Kconfig b/drivers/clk/ti/Kconfig
new file mode 100644
index 000000000000..271341787e67
--- /dev/null
+++ b/drivers/clk/ti/Kconfig
@@ -0,0 +1,6 @@
+config COMMON_CLK_TI_ADPLL
+	tristate "Clock driver for dm814x ADPLL"
+	depends on ARCH_OMAP2PLUS || COMPILE_TEST
+	default y if SOC_TI81XX
+	---help---
+	  ADPLL clock driver for the dm814x SoC using common clock framework.
diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile
index d4ac96087ccd..0deac9821039 100644
--- a/drivers/clk/ti/Makefile
+++ b/drivers/clk/ti/Makefile
@@ -1,3 +1,5 @@
+ifeq ($(CONFIG_ARCH_OMAP2PLUS), y)
+
 obj-y					+= clk.o autoidle.o clockdomain.o
 clk-common				= dpll.o composite.o divider.o gate.o \
 					  fixed-factor.o mux.o apll.o \
@@ -18,3 +20,7 @@ obj-$(CONFIG_SOC_AM43XX)		+= $(clk-common) dpll3xxx.o clk-43xx.o
 ifdef CONFIG_ATAGS
 obj-$(CONFIG_ARCH_OMAP3)                += clk-3xxx-legacy.o
 endif
+
+endif	# CONFIG_ARCH_OMAP2PLUS
+
+obj-$(CONFIG_COMMON_CLK_TI_ADPLL)	+= adpll.o
diff --git a/drivers/clk/ti/adpll.c b/drivers/clk/ti/adpll.c
new file mode 100644
index 000000000000..255cafb18336
--- /dev/null
+++ b/drivers/clk/ti/adpll.c
@@ -0,0 +1,983 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/string.h>
+
+#define ADPLL_PLLSS_MMR_LOCK_OFFSET	0x00	/* Managed by MPPULL */
+#define ADPLL_PLLSS_MMR_LOCK_ENABLED	0x1f125B64
+#define ADPLL_PLLSS_MMR_UNLOCK_MAGIC	0x1eda4c3d
+
+#define ADPLL_PWRCTRL_OFFSET		0x00
+#define ADPLL_PWRCTRL_PONIN		5
+#define ADPLL_PWRCTRL_PGOODIN		4
+#define ADPLL_PWRCTRL_RET		3
+#define ADPLL_PWRCTRL_ISORET		2
+#define ADPLL_PWRCTRL_ISOSCAN		1
+#define ADPLL_PWRCTRL_OFFMODE		0
+
+#define ADPLL_CLKCTRL_OFFSET		0x04
+#define ADPLL_CLKCTRL_CLKDCOLDOEN	29
+#define ADPLL_CLKCTRL_IDLE		23
+#define ADPLL_CLKCTRL_CLKOUTEN		20
+#define ADPLL_CLKINPHIFSEL_ADPLL_S	19	/* REVISIT: which bit? */
+#define ADPLL_CLKCTRL_CLKOUTLDOEN_ADPLL_LJ 19
+#define ADPLL_CLKCTRL_ULOWCLKEN		18
+#define ADPLL_CLKCTRL_CLKDCOLDOPWDNZ	17
+#define ADPLL_CLKCTRL_M2PWDNZ		16
+#define ADPLL_CLKCTRL_M3PWDNZ_ADPLL_S	15
+#define ADPLL_CLKCTRL_LOWCURRSTDBY_ADPLL_S 13
+#define ADPLL_CLKCTRL_LPMODE_ADPLL_S	12
+#define ADPLL_CLKCTRL_REGM4XEN_ADPLL_S	10
+#define ADPLL_CLKCTRL_SELFREQDCO_ADPLL_LJ 10
+#define ADPLL_CLKCTRL_TINITZ		0
+
+#define ADPLL_TENABLE_OFFSET		0x08
+#define ADPLL_TENABLEDIV_OFFSET		0x8c
+
+#define ADPLL_M2NDIV_OFFSET		0x10
+#define ADPLL_M2NDIV_M2			16
+#define ADPLL_M2NDIV_M2_ADPLL_S_WIDTH	5
+#define ADPLL_M2NDIV_M2_ADPLL_LJ_WIDTH	7
+
+#define ADPLL_MN2DIV_OFFSET		0x14
+#define ADPLL_MN2DIV_N2			16
+
+#define ADPLL_FRACDIV_OFFSET		0x18
+#define ADPLL_FRACDIV_REGSD		24
+#define ADPLL_FRACDIV_FRACTIONALM	0
+#define ADPLL_FRACDIV_FRACTIONALM_MASK	0x3ffff
+
+#define ADPLL_BWCTRL_OFFSET		0x1c
+#define ADPLL_BWCTRL_BWCONTROL		1
+#define ADPLL_BWCTRL_BW_INCR_DECRZ	0
+
+#define ADPLL_RESERVED_OFFSET		0x20
+
+#define ADPLL_STATUS_OFFSET		0x24
+#define ADPLL_STATUS_PONOUT		31
+#define ADPLL_STATUS_PGOODOUT		30
+#define ADPLL_STATUS_LDOPWDN		29
+#define ADPLL_STATUS_RECAL_BSTATUS3	28
+#define ADPLL_STATUS_RECAL_OPPIN	27
+#define ADPLL_STATUS_PHASELOCK		10
+#define ADPLL_STATUS_FREQLOCK		9
+#define ADPLL_STATUS_BYPASSACK		8
+#define ADPLL_STATUS_LOSSREF		6
+#define ADPLL_STATUS_CLKOUTENACK	5
+#define ADPLL_STATUS_LOCK2		4
+#define ADPLL_STATUS_M2CHANGEACK	3
+#define ADPLL_STATUS_HIGHJITTER		1
+#define ADPLL_STATUS_BYPASS		0
+#define ADPLL_STATUS_PREPARED_MASK	(BIT(ADPLL_STATUS_PHASELOCK) | \
+					 BIT(ADPLL_STATUS_FREQLOCK))
+
+#define ADPLL_M3DIV_OFFSET		0x28	/* Only on MPUPLL */
+#define ADPLL_M3DIV_M3			0
+#define ADPLL_M3DIV_M3_WIDTH		5
+#define ADPLL_M3DIV_M3_MASK		0x1f
+
+#define ADPLL_RAMPCTRL_OFFSET		0x2c	/* Only on MPUPLL */
+#define ADPLL_RAMPCTRL_CLKRAMPLEVEL	19
+#define ADPLL_RAMPCTRL_CLKRAMPRATE	16
+#define ADPLL_RAMPCTRL_RELOCK_RAMP_EN	0
+
+#define MAX_ADPLL_INPUTS		3
+#define MAX_ADPLL_OUTPUTS		4
+#define ADPLL_MAX_RETRIES		5
+
+#define to_dco(_hw)	container_of(_hw, struct ti_adpll_dco_data, hw)
+#define to_adpll(_hw)	container_of(_hw, struct ti_adpll_data, dco)
+#define to_clkout(_hw)	container_of(_hw, struct ti_adpll_clkout_data, hw)
+
+enum ti_adpll_clocks {
+	TI_ADPLL_DCO,
+	TI_ADPLL_DCO_GATE,
+	TI_ADPLL_N2,
+	TI_ADPLL_M2,
+	TI_ADPLL_M2_GATE,
+	TI_ADPLL_BYPASS,
+	TI_ADPLL_HIF,
+	TI_ADPLL_DIV2,
+	TI_ADPLL_CLKOUT,
+	TI_ADPLL_CLKOUT2,
+	TI_ADPLL_M3,
+};
+
+#define TI_ADPLL_NR_CLOCKS	(TI_ADPLL_M3 + 1)
+
+enum ti_adpll_inputs {
+	TI_ADPLL_CLKINP,
+	TI_ADPLL_CLKINPULOW,
+	TI_ADPLL_CLKINPHIF,
+};
+
+enum ti_adpll_s_outputs {
+	TI_ADPLL_S_DCOCLKLDO,
+	TI_ADPLL_S_CLKOUT,
+	TI_ADPLL_S_CLKOUTX2,
+	TI_ADPLL_S_CLKOUTHIF,
+};
+
+enum ti_adpll_lj_outputs {
+	TI_ADPLL_LJ_CLKDCOLDO,
+	TI_ADPLL_LJ_CLKOUT,
+	TI_ADPLL_LJ_CLKOUTLDO,
+};
+
+struct ti_adpll_platform_data {
+	const bool is_type_s;
+	const int nr_max_inputs;
+	const int nr_max_outputs;
+	const int output_index;
+};
+
+struct ti_adpll_clock {
+	struct clk *clk;
+	struct clk_lookup *cl;
+	void (*unregister)(struct clk *clk);
+};
+
+struct ti_adpll_dco_data {
+	struct clk_hw hw;
+};
+
+struct ti_adpll_clkout_data {
+	struct ti_adpll_data *adpll;
+	struct clk_gate gate;
+	struct clk_hw hw;
+};
+
+struct ti_adpll_data {
+	struct device *dev;
+	const struct ti_adpll_platform_data *c;
+	struct device_node *np;
+	unsigned long pa;
+	void __iomem *iobase;
+	void __iomem *regs;
+	spinlock_t lock;	/* For ADPLL shared register access */
+	const char *parent_names[MAX_ADPLL_INPUTS];
+	struct clk *parent_clocks[MAX_ADPLL_INPUTS];
+	struct ti_adpll_clock *clocks;
+	struct clk_onecell_data outputs;
+	struct ti_adpll_dco_data dco;
+};
+
+static const char *ti_adpll_clk_get_name(struct ti_adpll_data *d,
+					 int output_index,
+					 const char *postfix)
+{
+	const char *name;
+	int err;
+
+	if (output_index >= 0) {
+		err = of_property_read_string_index(d->np,
+						    "clock-output-names",
+						    output_index,
+						    &name);
+		if (err)
+			return NULL;
+	} else {
+		const char *base_name = "adpll";
+		char *buf;
+
+		buf = devm_kzalloc(d->dev, 8 + 1 + strlen(base_name) + 1 +
+				    strlen(postfix), GFP_KERNEL);
+		if (!buf)
+			return NULL;
+		sprintf(buf, "%08lx.%s.%s", d->pa, base_name, postfix);
+		name = buf;
+	}
+
+	return name;
+}
+
+#define ADPLL_MAX_CON_ID	16	/* See MAX_CON_ID */
+
+static int ti_adpll_setup_clock(struct ti_adpll_data *d, struct clk *clock,
+				int index, int output_index, const char *name,
+				void (*unregister)(struct clk *clk))
+{
+	struct clk_lookup *cl;
+	const char *postfix = NULL;
+	char con_id[ADPLL_MAX_CON_ID];
+
+	d->clocks[index].clk = clock;
+	d->clocks[index].unregister = unregister;
+
+	/* Separate con_id in format "pll040dcoclkldo" to fit MAX_CON_ID */
+	postfix = strrchr(name, '.');
+	if (strlen(postfix) > 1) {
+		if (strlen(postfix) > ADPLL_MAX_CON_ID)
+			dev_warn(d->dev, "clock %s con_id lookup may fail\n",
+				 name);
+		snprintf(con_id, 16, "pll%03lx%s", d->pa & 0xfff, postfix + 1);
+		cl = clkdev_create(clock, con_id, NULL);
+		if (!cl)
+			return -ENOMEM;
+		d->clocks[index].cl = cl;
+	} else {
+		dev_warn(d->dev, "no con_id for clock %s\n", name);
+	}
+
+	if (output_index < 0)
+		return 0;
+
+	d->outputs.clks[output_index] = clock;
+	d->outputs.clk_num++;
+
+	return 0;
+}
+
+static int ti_adpll_init_divider(struct ti_adpll_data *d,
+				 enum ti_adpll_clocks index,
+				 int output_index, char *name,
+				 struct clk *parent_clock,
+				 void __iomem *reg,
+				 u8 shift, u8 width,
+				 u8 clk_divider_flags)
+{
+	const char *child_name;
+	const char *parent_name;
+	struct clk *clock;
+
+	child_name = ti_adpll_clk_get_name(d, output_index, name);
+	if (!child_name)
+		return -EINVAL;
+
+	parent_name = __clk_get_name(parent_clock);
+	clock = clk_register_divider(d->dev, child_name, parent_name, 0,
+				     reg, shift, width, clk_divider_flags,
+				     &d->lock);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "failed to register divider %s: %li\n",
+			name, PTR_ERR(clock));
+		return PTR_ERR(clock);
+	}
+
+	return ti_adpll_setup_clock(d, clock, index, output_index, child_name,
+				    clk_unregister_divider);
+}
+
+static int ti_adpll_init_mux(struct ti_adpll_data *d,
+			     enum ti_adpll_clocks index,
+			     char *name, struct clk *clk0,
+			     struct clk *clk1,
+			     void __iomem *reg,
+			     u8 shift)
+{
+	const char *child_name;
+	const char *parents[2];
+	struct clk *clock;
+
+	child_name = ti_adpll_clk_get_name(d, -ENODEV, name);
+	if (!child_name)
+		return -ENOMEM;
+	parents[0] = __clk_get_name(clk0);
+	parents[1] = __clk_get_name(clk1);
+	clock = clk_register_mux(d->dev, child_name, parents, 2, 0,
+				 reg, shift, 1, 0, &d->lock);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "failed to register mux %s: %li\n",
+			name, PTR_ERR(clock));
+		return PTR_ERR(clock);
+	}
+
+	return ti_adpll_setup_clock(d, clock, index, -ENODEV, child_name,
+				    clk_unregister_mux);
+}
+
+static int ti_adpll_init_gate(struct ti_adpll_data *d,
+			      enum ti_adpll_clocks index,
+			      int output_index, char *name,
+			      struct clk *parent_clock,
+			      void __iomem *reg,
+			      u8 bit_idx,
+			      u8 clk_gate_flags)
+{
+	const char *child_name;
+	const char *parent_name;
+	struct clk *clock;
+
+	child_name = ti_adpll_clk_get_name(d, output_index, name);
+	if (!child_name)
+		return -EINVAL;
+
+	parent_name = __clk_get_name(parent_clock);
+	clock = clk_register_gate(d->dev, child_name, parent_name, 0,
+				  reg, bit_idx, clk_gate_flags,
+				  &d->lock);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "failed to register gate %s: %li\n",
+			name, PTR_ERR(clock));
+		return PTR_ERR(clock);
+	}
+
+	return ti_adpll_setup_clock(d, clock, index, output_index, child_name,
+				    clk_unregister_gate);
+}
+
+static int ti_adpll_init_fixed_factor(struct ti_adpll_data *d,
+				      enum ti_adpll_clocks index,
+				      char *name,
+				      struct clk *parent_clock,
+				      unsigned int mult,
+				      unsigned int div)
+{
+	const char *child_name;
+	const char *parent_name;
+	struct clk *clock;
+
+	child_name = ti_adpll_clk_get_name(d, -ENODEV, name);
+	if (!child_name)
+		return -ENOMEM;
+
+	parent_name = __clk_get_name(parent_clock);
+	clock = clk_register_fixed_factor(d->dev, child_name, parent_name,
+					  0, mult, div);
+	if (IS_ERR(clock))
+		return PTR_ERR(clock);
+
+	return ti_adpll_setup_clock(d, clock, index, -ENODEV, child_name,
+				    clk_unregister);
+}
+
+static void ti_adpll_set_idle_bypass(struct ti_adpll_data *d)
+{
+	unsigned long flags;
+	u32 v;
+
+	spin_lock_irqsave(&d->lock, flags);
+	v = readl_relaxed(d->regs + ADPLL_CLKCTRL_OFFSET);
+	v |= BIT(ADPLL_CLKCTRL_IDLE);
+	writel_relaxed(v, d->regs + ADPLL_CLKCTRL_OFFSET);
+	spin_unlock_irqrestore(&d->lock, flags);
+}
+
+static void ti_adpll_clear_idle_bypass(struct ti_adpll_data *d)
+{
+	unsigned long flags;
+	u32 v;
+
+	spin_lock_irqsave(&d->lock, flags);
+	v = readl_relaxed(d->regs + ADPLL_CLKCTRL_OFFSET);
+	v &= ~BIT(ADPLL_CLKCTRL_IDLE);
+	writel_relaxed(v, d->regs + ADPLL_CLKCTRL_OFFSET);
+	spin_unlock_irqrestore(&d->lock, flags);
+}
+
+static bool ti_adpll_clock_is_bypass(struct ti_adpll_data *d)
+{
+	u32 v;
+
+	v = readl_relaxed(d->regs + ADPLL_STATUS_OFFSET);
+
+	return v & BIT(ADPLL_STATUS_BYPASS);
+}
+
+/*
+ * Locked and bypass are not actually mutually exclusive:  if you only care
+ * about the DCO clock and not CLKOUT you can clear M2PWDNZ before enabling
+ * the PLL, resulting in status (FREQLOCK | PHASELOCK | BYPASS) after lock.
+ */
+static bool ti_adpll_is_locked(struct ti_adpll_data *d)
+{
+	u32 v = readl_relaxed(d->regs + ADPLL_STATUS_OFFSET);
+
+	return (v & ADPLL_STATUS_PREPARED_MASK) == ADPLL_STATUS_PREPARED_MASK;
+}
+
+static int ti_adpll_wait_lock(struct ti_adpll_data *d)
+{
+	int retries = ADPLL_MAX_RETRIES;
+
+	do {
+		if (ti_adpll_is_locked(d))
+			return 0;
+		usleep_range(200, 300);
+	} while (retries--);
+
+	dev_err(d->dev, "pll failed to lock\n");
+	return -ETIMEDOUT;
+}
+
+static int ti_adpll_prepare(struct clk_hw *hw)
+{
+	struct ti_adpll_dco_data *dco = to_dco(hw);
+	struct ti_adpll_data *d = to_adpll(dco);
+
+	ti_adpll_clear_idle_bypass(d);
+	ti_adpll_wait_lock(d);
+
+	return 0;
+}
+
+static void ti_adpll_unprepare(struct clk_hw *hw)
+{
+	struct ti_adpll_dco_data *dco = to_dco(hw);
+	struct ti_adpll_data *d = to_adpll(dco);
+
+	ti_adpll_set_idle_bypass(d);
+}
+
+static int ti_adpll_is_prepared(struct clk_hw *hw)
+{
+	struct ti_adpll_dco_data *dco = to_dco(hw);
+	struct ti_adpll_data *d = to_adpll(dco);
+
+	return ti_adpll_is_locked(d);
+}
+
+/*
+ * Note that the DCO clock is never subject to bypass: if the PLL is off,
+ * dcoclk is low.
+ */
+static unsigned long ti_adpll_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	struct ti_adpll_dco_data *dco = to_dco(hw);
+	struct ti_adpll_data *d = to_adpll(dco);
+	u32 frac_m, divider, v;
+	u64 rate;
+	unsigned long flags;
+
+	if (ti_adpll_clock_is_bypass(d))
+		return 0;
+
+	spin_lock_irqsave(&d->lock, flags);
+	frac_m = readl_relaxed(d->regs + ADPLL_FRACDIV_OFFSET);
+	frac_m &= ADPLL_FRACDIV_FRACTIONALM_MASK;
+	rate = (u64)readw_relaxed(d->regs + ADPLL_MN2DIV_OFFSET) << 18;
+	rate += frac_m;
+	rate *= parent_rate;
+	divider = (readw_relaxed(d->regs + ADPLL_M2NDIV_OFFSET) + 1) << 18;
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	do_div(rate, divider);
+
+	if (d->c->is_type_s) {
+		v = readl_relaxed(d->regs + ADPLL_CLKCTRL_OFFSET);
+		if (v & BIT(ADPLL_CLKCTRL_REGM4XEN_ADPLL_S))
+			rate *= 4;
+		rate *= 2;
+	}
+
+	return rate;
+}
+
+/* PLL parent is always clkinp, bypass only affects the children */
+static u8 ti_adpll_get_parent(struct clk_hw *hw)
+{
+	return 0;
+}
+
+static struct clk_ops ti_adpll_ops = {
+	.prepare = ti_adpll_prepare,
+	.unprepare = ti_adpll_unprepare,
+	.is_prepared = ti_adpll_is_prepared,
+	.recalc_rate = ti_adpll_recalc_rate,
+	.get_parent = ti_adpll_get_parent,
+};
+
+static int ti_adpll_init_dco(struct ti_adpll_data *d)
+{
+	struct clk_init_data init;
+	struct clk *clock;
+	const char *postfix;
+	int width, err;
+
+	d->outputs.clks = devm_kzalloc(d->dev, sizeof(struct clk *) *
+				       MAX_ADPLL_OUTPUTS,
+				       GFP_KERNEL);
+	if (!d->outputs.clks)
+		return -ENOMEM;
+
+	if (d->c->output_index < 0)
+		postfix = "dco";
+	else
+		postfix = NULL;
+
+	init.name = ti_adpll_clk_get_name(d, d->c->output_index, postfix);
+	if (!init.name)
+		return -EINVAL;
+
+	init.parent_names = d->parent_names;
+	init.num_parents = d->c->nr_max_inputs;
+	init.ops = &ti_adpll_ops;
+	init.flags = CLK_GET_RATE_NOCACHE;
+	d->dco.hw.init = &init;
+
+	if (d->c->is_type_s)
+		width = 5;
+	else
+		width = 4;
+
+	/* Internal input clock divider N2 */
+	err = ti_adpll_init_divider(d, TI_ADPLL_N2, -ENODEV, "n2",
+				    d->parent_clocks[TI_ADPLL_CLKINP],
+				    d->regs + ADPLL_MN2DIV_OFFSET,
+				    ADPLL_MN2DIV_N2, width, 0);
+	if (err)
+		return err;
+
+	clock = devm_clk_register(d->dev, &d->dco.hw);
+	if (IS_ERR(clock))
+		return PTR_ERR(clock);
+
+	return ti_adpll_setup_clock(d, clock, TI_ADPLL_DCO, d->c->output_index,
+				    init.name, NULL);
+}
+
+static int ti_adpll_clkout_enable(struct clk_hw *hw)
+{
+	struct ti_adpll_clkout_data *co = to_clkout(hw);
+	struct clk_hw *gate_hw = &co->gate.hw;
+
+	__clk_hw_set_clk(gate_hw, hw);
+
+	return clk_gate_ops.enable(gate_hw);
+}
+
+static void ti_adpll_clkout_disable(struct clk_hw *hw)
+{
+	struct ti_adpll_clkout_data *co = to_clkout(hw);
+	struct clk_hw *gate_hw = &co->gate.hw;
+
+	__clk_hw_set_clk(gate_hw, hw);
+	clk_gate_ops.disable(gate_hw);
+}
+
+static int ti_adpll_clkout_is_enabled(struct clk_hw *hw)
+{
+	struct ti_adpll_clkout_data *co = to_clkout(hw);
+	struct clk_hw *gate_hw = &co->gate.hw;
+
+	__clk_hw_set_clk(gate_hw, hw);
+
+	return clk_gate_ops.is_enabled(gate_hw);
+}
+
+/* Setting PLL bypass puts clkout and clkoutx2 into bypass */
+static u8 ti_adpll_clkout_get_parent(struct clk_hw *hw)
+{
+	struct ti_adpll_clkout_data *co = to_clkout(hw);
+	struct ti_adpll_data *d = co->adpll;
+
+	return ti_adpll_clock_is_bypass(d);
+}
+
+static int ti_adpll_init_clkout(struct ti_adpll_data *d,
+				enum ti_adpll_clocks index,
+				int output_index, int gate_bit,
+				char *name, struct clk *clk0,
+				struct clk *clk1)
+{
+	struct ti_adpll_clkout_data *co;
+	struct clk_init_data init;
+	struct clk_ops *ops;
+	const char *parent_names[2];
+	const char *child_name;
+	struct clk *clock;
+	int err;
+
+	co = devm_kzalloc(d->dev, sizeof(*co), GFP_KERNEL);
+	if (!co)
+		return -ENOMEM;
+	co->adpll = d;
+
+	err = of_property_read_string_index(d->np,
+					    "clock-output-names",
+					    output_index,
+					    &child_name);
+	if (err)
+		return err;
+
+	ops = devm_kzalloc(d->dev, sizeof(*ops), GFP_KERNEL);
+	if (!ops)
+		return -ENOMEM;
+
+	init.name = child_name;
+	init.ops = ops;
+	init.flags = CLK_IS_BASIC;
+	co->hw.init = &init;
+	parent_names[0] = __clk_get_name(clk0);
+	parent_names[1] = __clk_get_name(clk1);
+	init.parent_names = parent_names;
+	init.num_parents = 2;
+
+	ops->get_parent = ti_adpll_clkout_get_parent;
+	ops->determine_rate = __clk_mux_determine_rate;
+	if (gate_bit) {
+		co->gate.lock = &d->lock;
+		co->gate.reg = d->regs + ADPLL_CLKCTRL_OFFSET;
+		co->gate.bit_idx = gate_bit;
+		ops->enable = ti_adpll_clkout_enable;
+		ops->disable = ti_adpll_clkout_disable;
+		ops->is_enabled = ti_adpll_clkout_is_enabled;
+	}
+
+	clock = devm_clk_register(d->dev, &co->hw);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "failed to register output %s: %li\n",
+			name, PTR_ERR(clock));
+		return PTR_ERR(clock);
+	}
+
+	return ti_adpll_setup_clock(d, clock, index, output_index, child_name,
+				    NULL);
+}
+
+static int ti_adpll_init_children_adpll_s(struct ti_adpll_data *d)
+{
+	int err;
+
+	if (!d->c->is_type_s)
+		return 0;
+
+	/* Internal mux, sources from divider N2 or clkinpulow */
+	err = ti_adpll_init_mux(d, TI_ADPLL_BYPASS, "bypass",
+				d->clocks[TI_ADPLL_N2].clk,
+				d->parent_clocks[TI_ADPLL_CLKINPULOW],
+				d->regs + ADPLL_CLKCTRL_OFFSET,
+				ADPLL_CLKCTRL_ULOWCLKEN);
+	if (err)
+		return err;
+
+	/* Internal divider M2, sources DCO */
+	err = ti_adpll_init_divider(d, TI_ADPLL_M2, -ENODEV, "m2",
+				    d->clocks[TI_ADPLL_DCO].clk,
+				    d->regs + ADPLL_M2NDIV_OFFSET,
+				    ADPLL_M2NDIV_M2,
+				    ADPLL_M2NDIV_M2_ADPLL_S_WIDTH,
+				    CLK_DIVIDER_ONE_BASED);
+	if (err)
+		return err;
+
+	/* Internal fixed divider, after M2 before clkout */
+	err = ti_adpll_init_fixed_factor(d, TI_ADPLL_DIV2, "div2",
+					 d->clocks[TI_ADPLL_M2].clk,
+					 1, 2);
+	if (err)
+		return err;
+
+	/* Output clkout with a mux and gate, sources from div2 or bypass */
+	err = ti_adpll_init_clkout(d, TI_ADPLL_CLKOUT, TI_ADPLL_S_CLKOUT,
+				   ADPLL_CLKCTRL_CLKOUTEN, "clkout",
+				   d->clocks[TI_ADPLL_DIV2].clk,
+				   d->clocks[TI_ADPLL_BYPASS].clk);
+	if (err)
+		return err;
+
+	/* Output clkoutx2 with a mux and gate, sources from M2 or bypass */
+	err = ti_adpll_init_clkout(d, TI_ADPLL_CLKOUT2, TI_ADPLL_S_CLKOUTX2, 0,
+				   "clkout2", d->clocks[TI_ADPLL_M2].clk,
+				   d->clocks[TI_ADPLL_BYPASS].clk);
+	if (err)
+		return err;
+
+	/* Internal mux, sources from DCO and clkinphif */
+	if (d->parent_clocks[TI_ADPLL_CLKINPHIF]) {
+		err = ti_adpll_init_mux(d, TI_ADPLL_HIF, "hif",
+					d->clocks[TI_ADPLL_DCO].clk,
+					d->parent_clocks[TI_ADPLL_CLKINPHIF],
+					d->regs + ADPLL_CLKCTRL_OFFSET,
+					ADPLL_CLKINPHIFSEL_ADPLL_S);
+		if (err)
+			return err;
+	}
+
+	/* Output clkouthif with a divider M3, sources from hif */
+	err = ti_adpll_init_divider(d, TI_ADPLL_M3, TI_ADPLL_S_CLKOUTHIF, "m3",
+				    d->clocks[TI_ADPLL_HIF].clk,
+				    d->regs + ADPLL_M3DIV_OFFSET,
+				    ADPLL_M3DIV_M3,
+				    ADPLL_M3DIV_M3_WIDTH,
+				    CLK_DIVIDER_ONE_BASED);
+	if (err)
+		return err;
+
+	/* Output clock dcoclkldo is the DCO */
+
+	return 0;
+}
+
+static int ti_adpll_init_children_adpll_lj(struct ti_adpll_data *d)
+{
+	int err;
+
+	if (d->c->is_type_s)
+		return 0;
+
+	/* Output clkdcoldo, gated output of DCO */
+	err = ti_adpll_init_gate(d, TI_ADPLL_DCO_GATE, TI_ADPLL_LJ_CLKDCOLDO,
+				 "clkdcoldo", d->clocks[TI_ADPLL_DCO].clk,
+				 d->regs + ADPLL_CLKCTRL_OFFSET,
+				 ADPLL_CLKCTRL_CLKDCOLDOEN, 0);
+	if (err)
+		return err;
+
+	/* Internal divider M2, sources from DCO */
+	err = ti_adpll_init_divider(d, TI_ADPLL_M2, -ENODEV,
+				    "m2", d->clocks[TI_ADPLL_DCO].clk,
+				    d->regs + ADPLL_M2NDIV_OFFSET,
+				    ADPLL_M2NDIV_M2,
+				    ADPLL_M2NDIV_M2_ADPLL_LJ_WIDTH,
+				    CLK_DIVIDER_ONE_BASED);
+	if (err)
+		return err;
+
+	/* Output clkoutldo, gated output of M2 */
+	err = ti_adpll_init_gate(d, TI_ADPLL_M2_GATE, TI_ADPLL_LJ_CLKOUTLDO,
+				 "clkoutldo", d->clocks[TI_ADPLL_M2].clk,
+				 d->regs + ADPLL_CLKCTRL_OFFSET,
+				 ADPLL_CLKCTRL_CLKOUTLDOEN_ADPLL_LJ,
+				 0);
+	if (err)
+		return err;
+
+	/* Internal mux, sources from divider N2 or clkinpulow */
+	err = ti_adpll_init_mux(d, TI_ADPLL_BYPASS, "bypass",
+				d->clocks[TI_ADPLL_N2].clk,
+				d->parent_clocks[TI_ADPLL_CLKINPULOW],
+				d->regs + ADPLL_CLKCTRL_OFFSET,
+				ADPLL_CLKCTRL_ULOWCLKEN);
+	if (err)
+		return err;
+
+	/* Output clkout, sources M2 or bypass */
+	err = ti_adpll_init_clkout(d, TI_ADPLL_CLKOUT, TI_ADPLL_S_CLKOUT,
+				   ADPLL_CLKCTRL_CLKOUTEN, "clkout",
+				   d->clocks[TI_ADPLL_M2].clk,
+				   d->clocks[TI_ADPLL_BYPASS].clk);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void ti_adpll_free_resources(struct ti_adpll_data *d)
+{
+	int i;
+
+	for (i = TI_ADPLL_M3; i >= 0; i--) {
+		struct ti_adpll_clock *ac = &d->clocks[i];
+
+		if (!ac || IS_ERR_OR_NULL(ac->clk))
+			continue;
+		if (ac->cl)
+			clkdev_drop(ac->cl);
+		if (ac->unregister)
+			ac->unregister(ac->clk);
+	}
+}
+
+/* MPU PLL manages the lock register for all PLLs */
+static void ti_adpll_unlock_all(void __iomem *reg)
+{
+	u32 v;
+
+	v = readl_relaxed(reg);
+	if (v == ADPLL_PLLSS_MMR_LOCK_ENABLED)
+		writel_relaxed(ADPLL_PLLSS_MMR_UNLOCK_MAGIC, reg);
+}
+
+static int ti_adpll_init_registers(struct ti_adpll_data *d)
+{
+	int register_offset = 0;
+
+	if (d->c->is_type_s) {
+		register_offset = 8;
+		ti_adpll_unlock_all(d->iobase + ADPLL_PLLSS_MMR_LOCK_OFFSET);
+	}
+
+	d->regs = d->iobase + register_offset + ADPLL_PWRCTRL_OFFSET;
+
+	return 0;
+}
+
+static int ti_adpll_init_inputs(struct ti_adpll_data *d)
+{
+	const char *error = "need at least %i inputs";
+	struct clk *clock;
+	int nr_inputs;
+
+	nr_inputs = of_clk_get_parent_count(d->np);
+	if (nr_inputs < d->c->nr_max_inputs) {
+		dev_err(d->dev, error, nr_inputs);
+		return -EINVAL;
+	}
+	of_clk_parent_fill(d->np, d->parent_names, nr_inputs);
+
+	clock = devm_clk_get(d->dev, d->parent_names[0]);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "could not get clkinp\n");
+		return PTR_ERR(clock);
+	}
+	d->parent_clocks[TI_ADPLL_CLKINP] = clock;
+
+	clock = devm_clk_get(d->dev, d->parent_names[1]);
+	if (IS_ERR(clock)) {
+		dev_err(d->dev, "could not get clkinpulow clock\n");
+		return PTR_ERR(clock);
+	}
+	d->parent_clocks[TI_ADPLL_CLKINPULOW] = clock;
+
+	if (d->c->is_type_s) {
+		clock =  devm_clk_get(d->dev, d->parent_names[2]);
+		if (IS_ERR(clock)) {
+			dev_err(d->dev, "could not get clkinphif clock\n");
+			return PTR_ERR(clock);
+		}
+		d->parent_clocks[TI_ADPLL_CLKINPHIF] = clock;
+	}
+
+	return 0;
+}
+
+static const struct ti_adpll_platform_data ti_adpll_type_s = {
+	.is_type_s = true,
+	.nr_max_inputs = MAX_ADPLL_INPUTS,
+	.nr_max_outputs = MAX_ADPLL_OUTPUTS,
+	.output_index = TI_ADPLL_S_DCOCLKLDO,
+};
+
+static const struct ti_adpll_platform_data ti_adpll_type_lj = {
+	.is_type_s = false,
+	.nr_max_inputs = MAX_ADPLL_INPUTS - 1,
+	.nr_max_outputs = MAX_ADPLL_OUTPUTS - 1,
+	.output_index = -EINVAL,
+};
+
+static const struct of_device_id ti_adpll_match[] = {
+	{ .compatible = "ti,dm814-adpll-s-clock", &ti_adpll_type_s },
+	{ .compatible = "ti,dm814-adpll-lj-clock", &ti_adpll_type_lj },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ti_adpll_match);
+
+static int ti_adpll_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
+	const struct of_device_id *match;
+	const struct ti_adpll_platform_data *pdata;
+	struct ti_adpll_data *d;
+	struct resource *res;
+	int err;
+
+	match = of_match_device(ti_adpll_match, dev);
+	if (match)
+		pdata = match->data;
+	else
+		return -ENODEV;
+
+	d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+	d->dev = dev;
+	d->np = node;
+	d->c = pdata;
+	dev_set_drvdata(d->dev, d);
+	spin_lock_init(&d->lock);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+	d->pa = res->start;
+
+	d->iobase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(d->iobase)) {
+		dev_err(dev, "could not get IO base: %li\n",
+			PTR_ERR(d->iobase));
+		return PTR_ERR(d->iobase);
+	}
+
+	err = ti_adpll_init_registers(d);
+	if (err)
+		return err;
+
+	err = ti_adpll_init_inputs(d);
+	if (err)
+		return err;
+
+	d->clocks = devm_kzalloc(d->dev, sizeof(struct ti_adpll_clock) *
+				 TI_ADPLL_NR_CLOCKS,
+				 GFP_KERNEL);
+	if (!d->clocks)
+		return -ENOMEM;
+
+	err = ti_adpll_init_dco(d);
+	if (err) {
+		dev_err(dev, "could not register dco: %i\n", err);
+		goto free;
+	}
+
+	err = ti_adpll_init_children_adpll_s(d);
+	if (err)
+		goto free;
+	err = ti_adpll_init_children_adpll_lj(d);
+	if (err)
+		goto free;
+
+	err = of_clk_add_provider(d->np, of_clk_src_onecell_get, &d->outputs);
+	if (err)
+		goto free;
+
+	return 0;
+
+free:
+	WARN_ON(1);
+	ti_adpll_free_resources(d);
+
+	return err;
+}
+
+static int ti_adpll_remove(struct platform_device *pdev)
+{
+	struct ti_adpll_data *d = dev_get_drvdata(&pdev->dev);
+
+	ti_adpll_free_resources(d);
+
+	return 0;
+}
+
+static struct platform_driver ti_adpll_driver = {
+	.driver = {
+		.name = "ti-adpll",
+		.of_match_table = ti_adpll_match,
+	},
+	.probe = ti_adpll_probe,
+	.remove = ti_adpll_remove,
+};
+
+static int __init ti_adpll_init(void)
+{
+	return platform_driver_register(&ti_adpll_driver);
+}
+core_initcall(ti_adpll_init);
+
+static void __exit ti_adpll_exit(void)
+{
+	platform_driver_unregister(&ti_adpll_driver);
+}
+module_exit(ti_adpll_exit);
+
+MODULE_DESCRIPTION("Clock driver for dm814x ADPLL");
+MODULE_ALIAS("platform:dm814-adpll-clock");
+MODULE_AUTHOR("Tony LIndgren <tony@atomide.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c
index b336a8c11e2a..6411e132faa2 100644
--- a/drivers/clk/ti/apll.c
+++ b/drivers/clk/ti/apll.c
@@ -140,11 +140,21 @@ static void __init omap_clk_register_apll(struct clk_hw *hw,
 	struct dpll_data *ad = clk_hw->dpll_data;
 	struct clk *clk;
 
-	ad->clk_ref = of_clk_get(node, 0);
-	ad->clk_bypass = of_clk_get(node, 1);
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk)) {
+		pr_debug("clk-ref for %s not ready, retry\n",
+			 node->name);
+		if (!ti_clk_retry_init(node, hw, omap_clk_register_apll))
+			return;
+
+		goto cleanup;
+	}
 
-	if (IS_ERR(ad->clk_ref) || IS_ERR(ad->clk_bypass)) {
-		pr_debug("clk-ref or clk-bypass for %s not ready, retry\n",
+	ad->clk_ref = __clk_get_hw(clk);
+
+	clk = of_clk_get(node, 1);
+	if (IS_ERR(clk)) {
+		pr_debug("clk-bypass for %s not ready, retry\n",
 			 node->name);
 		if (!ti_clk_retry_init(node, hw, omap_clk_register_apll))
 			return;
@@ -152,6 +162,8 @@ static void __init omap_clk_register_apll(struct clk_hw *hw,
 		goto cleanup;
 	}
 
+	ad->clk_bypass = __clk_get_hw(clk);
+
 	clk = clk_register(NULL, &clk_hw->hw);
 	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
diff --git a/drivers/clk/ti/clk-814x.c b/drivers/clk/ti/clk-814x.c
index 9e85fcc74cc9..52c6efc53731 100644
--- a/drivers/clk/ti/clk-814x.c
+++ b/drivers/clk/ti/clk-814x.c
@@ -5,8 +5,10 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/clk/ti.h>
+#include <linux/of_platform.h>
 
 #include "clock.h"
 
@@ -27,11 +29,62 @@ static struct ti_dt_clk dm814_clks[] = {
 	{ .node_name = NULL },
 };
 
+static bool timer_clocks_initialized;
+
+static int __init dm814x_adpll_early_init(void)
+{
+	struct device_node *np;
+
+	if (!timer_clocks_initialized)
+		return -ENODEV;
+
+	np = of_find_node_by_name(NULL, "pllss");
+	if (!np) {
+		pr_err("Could not find node for plls\n");
+		return -ENODEV;
+	}
+
+	of_platform_populate(np, NULL, NULL, NULL);
+
+	return 0;
+}
+core_initcall(dm814x_adpll_early_init);
+
+static const char * const init_clocks[] = {
+	"pll040clkout",		/* MPU 481c5040.adpll.clkout */
+	"pll290clkout",		/* DDR 481c5290.adpll.clkout */
+};
+
+static int __init dm814x_adpll_enable_init_clocks(void)
+{
+	int i, err;
+
+	if (!timer_clocks_initialized)
+		return -ENODEV;
+
+	for (i = 0; i < ARRAY_SIZE(init_clocks); i++) {
+		struct clk *clock;
+
+		clock = clk_get(NULL, init_clocks[i]);
+		if (WARN(IS_ERR(clock), "could not find init clock %s\n",
+			 init_clocks[i]))
+			continue;
+		err = clk_prepare_enable(clock);
+		if (WARN(err, "could not enable init clock %s\n",
+			 init_clocks[i]))
+			continue;
+	}
+
+	return 0;
+}
+postcore_initcall(dm814x_adpll_enable_init_clocks);
+
 int __init dm814x_dt_clk_init(void)
 {
 	ti_dt_clocks_register(dm814_clks);
 	omap2_clk_disable_autoidle_all();
 	omap2_clk_enable_init_clocks(NULL, 0);
+	timer_clocks_initialized = true;
 
 	return 0;
 }
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index b5bcd77e8d0f..5fcf247759ac 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -305,8 +305,8 @@ struct clk __init *ti_clk_register_clk(struct ti_clk *setup)
 	case TI_CLK_FIXED:
 		fixed = setup->data;
 
-		clk = clk_register_fixed_rate(NULL, setup->name, NULL,
-					      CLK_IS_ROOT, fixed->frequency);
+		clk = clk_register_fixed_rate(NULL, setup->name, NULL, 0,
+					      fixed->frequency);
 		break;
 	case TI_CLK_MUX:
 		clk = ti_clk_register_mux(setup);
diff --git a/drivers/clk/ti/clkt_dpll.c b/drivers/clk/ti/clkt_dpll.c
index b5cc6f66ae5d..032c658a5f5e 100644
--- a/drivers/clk/ti/clkt_dpll.c
+++ b/drivers/clk/ti/clkt_dpll.c
@@ -254,7 +254,7 @@ unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk)
 	v >>= __ffs(dd->enable_mask);
 
 	if (_omap2_dpll_is_in_bypass(v))
-		return clk_get_rate(dd->clk_bypass);
+		return clk_hw_get_rate(dd->clk_bypass);
 
 	v = ti_clk_ll_ops->clk_readl(dd->mult_div1_reg);
 	dpll_mult = v & dd->mult_mask;
@@ -262,7 +262,7 @@ unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk)
 	dpll_div = v & dd->div1_mask;
 	dpll_div >>= __ffs(dd->div1_mask);
 
-	dpll_clk = (u64)clk_get_rate(dd->clk_ref) * dpll_mult;
+	dpll_clk = (u64)clk_hw_get_rate(dd->clk_ref) * dpll_mult;
 	do_div(dpll_clk, dpll_div + 1);
 
 	return dpll_clk;
@@ -301,7 +301,7 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate,
 
 	dd = clk->dpll_data;
 
-	ref_rate = clk_get_rate(dd->clk_ref);
+	ref_rate = clk_hw_get_rate(dd->clk_ref);
 	clk_name = clk_hw_get_name(hw);
 	pr_debug("clock: %s: starting DPLL round_rate, target rate %lu\n",
 		 clk_name, target_rate);
diff --git a/drivers/clk/ti/clockdomain.c b/drivers/clk/ti/clockdomain.c
index b9bc3b8df659..6cf9dd189a92 100644
--- a/drivers/clk/ti/clockdomain.c
+++ b/drivers/clk/ti/clockdomain.c
@@ -109,7 +109,7 @@ static void __init of_ti_clockdomain_setup(struct device_node *node)
 	struct clk_hw *clk_hw;
 	const char *clkdm_name = node->name;
 	int i;
-	int num_clks;
+	unsigned int num_clks;
 
 	num_clks = of_clk_get_parent_count(node);
 
diff --git a/drivers/clk/ti/composite.c b/drivers/clk/ti/composite.c
index dbef218fe5ec..1cf70f452e1e 100644
--- a/drivers/clk/ti/composite.c
+++ b/drivers/clk/ti/composite.c
@@ -28,8 +28,6 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw)
-
 static unsigned long ti_composite_recalc_rate(struct clk_hw *hw,
 					      unsigned long parent_rate)
 {
@@ -236,14 +234,14 @@ cleanup:
 
 static void __init of_ti_composite_clk_setup(struct device_node *node)
 {
-	int num_clks;
+	unsigned int num_clks;
 	int i;
 	struct clk_hw_omap_comp *cclk;
 
 	/* Number of component clocks to be put inside this clock */
 	num_clks = of_clk_get_parent_count(node);
 
-	if (num_clks < 1) {
+	if (!num_clks) {
 		pr_err("composite clk %s must have component(s)\n", node->name);
 		return;
 	}
@@ -273,13 +271,13 @@ CLK_OF_DECLARE(ti_composite_clock, "ti,composite-clock",
 int __init ti_clk_add_component(struct device_node *node, struct clk_hw *hw,
 				int type)
 {
-	int num_parents;
+	unsigned int num_parents;
 	const char **parent_names;
 	struct component_clk *clk;
 
 	num_parents = of_clk_get_parent_count(node);
 
-	if (num_parents < 1) {
+	if (!num_parents) {
 		pr_err("component-clock %s must have parent(s)\n", node->name);
 		return -EINVAL;
 	}
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index df2558350fc1..b4e5de16e561 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -26,8 +26,6 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw)
-
 #define div_mask(d)	((1 << ((d)->width)) - 1)
 
 static unsigned int _get_table_maxdiv(const struct clk_div_table *table)
diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index 5519b386edc0..3bc9959f71c3 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -147,11 +147,22 @@ static void __init _register_dpll(struct clk_hw *hw,
 	struct dpll_data *dd = clk_hw->dpll_data;
 	struct clk *clk;
 
-	dd->clk_ref = of_clk_get(node, 0);
-	dd->clk_bypass = of_clk_get(node, 1);
+	clk = of_clk_get(node, 0);
+	if (IS_ERR(clk)) {
+		pr_debug("clk-ref missing for %s, retry later\n",
+			 node->name);
+		if (!ti_clk_retry_init(node, hw, _register_dpll))
+			return;
 
-	if (IS_ERR(dd->clk_ref) || IS_ERR(dd->clk_bypass)) {
-		pr_debug("clk-ref or clk-bypass missing for %s, retry later\n",
+		goto cleanup;
+	}
+
+	dd->clk_ref = __clk_get_hw(clk);
+
+	clk = of_clk_get(node, 1);
+
+	if (IS_ERR(clk)) {
+		pr_debug("clk-bypass missing for %s, retry later\n",
 			 node->name);
 		if (!ti_clk_retry_init(node, hw, _register_dpll))
 			return;
@@ -159,6 +170,8 @@ static void __init _register_dpll(struct clk_hw *hw,
 		goto cleanup;
 	}
 
+	dd->clk_bypass = __clk_get_hw(clk);
+
 	/* register the clock */
 	clk = clk_register(NULL, &clk_hw->hw);
 
@@ -251,8 +264,8 @@ struct clk *ti_clk_register_dpll(struct ti_clk *setup)
 	dd->recal_en_bit = dpll->recal_en_bit;
 	dd->recal_st_bit = dpll->recal_st_bit;
 
-	dd->clk_ref = clk_ref;
-	dd->clk_bypass = clk_bypass;
+	dd->clk_ref = __clk_get_hw(clk_ref);
+	dd->clk_bypass = __clk_get_hw(clk_bypass);
 
 	if (dpll->flags & CLKF_CORE)
 		ops = &omap3_dpll_core_ck_ops;
@@ -361,7 +374,7 @@ static void __init of_ti_dpll_setup(struct device_node *node,
 	init->ops = ops;
 
 	init->num_parents = of_clk_get_parent_count(node);
-	if (init->num_parents < 1) {
+	if (!init->num_parents) {
 		pr_err("%s must have parent(s)\n", node->name);
 		goto cleanup;
 	}
diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c
index cc739291a3ce..88f2ce81ba55 100644
--- a/drivers/clk/ti/dpll3xxx.c
+++ b/drivers/clk/ti/dpll3xxx.c
@@ -98,7 +98,7 @@ static u16 _omap3_dpll_compute_freqsel(struct clk_hw_omap *clk, u8 n)
 	unsigned long fint;
 	u16 f = 0;
 
-	fint = clk_get_rate(clk->dpll_data->clk_ref) / n;
+	fint = clk_hw_get_rate(clk->dpll_data->clk_ref) / n;
 
 	pr_debug("clock: fint is %lu\n", fint);
 
@@ -460,12 +460,11 @@ int omap3_noncore_dpll_enable(struct clk_hw *hw)
 
 	parent = clk_hw_get_parent(hw);
 
-	if (clk_hw_get_rate(hw) ==
-	    clk_hw_get_rate(__clk_get_hw(dd->clk_bypass))) {
-		WARN_ON(parent != __clk_get_hw(dd->clk_bypass));
+	if (clk_hw_get_rate(hw) == clk_hw_get_rate(dd->clk_bypass)) {
+		WARN_ON(parent != dd->clk_bypass);
 		r = _omap3_noncore_dpll_bypass(clk);
 	} else {
-		WARN_ON(parent != __clk_get_hw(dd->clk_ref));
+		WARN_ON(parent != dd->clk_ref);
 		r = _omap3_noncore_dpll_lock(clk);
 	}
 
@@ -513,13 +512,13 @@ int omap3_noncore_dpll_determine_rate(struct clk_hw *hw,
 	if (!dd)
 		return -EINVAL;
 
-	if (clk_get_rate(dd->clk_bypass) == req->rate &&
+	if (clk_hw_get_rate(dd->clk_bypass) == req->rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		req->best_parent_hw = __clk_get_hw(dd->clk_bypass);
+		req->best_parent_hw = dd->clk_bypass;
 	} else {
 		req->rate = omap2_dpll_round_rate(hw, req->rate,
 					  &req->best_parent_rate);
-		req->best_parent_hw = __clk_get_hw(dd->clk_ref);
+		req->best_parent_hw = dd->clk_ref;
 	}
 
 	req->best_parent_rate = req->rate;
@@ -577,7 +576,7 @@ int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (!dd)
 		return -EINVAL;
 
-	if (clk_hw_get_parent(hw) != __clk_get_hw(dd->clk_ref))
+	if (clk_hw_get_parent(hw) != dd->clk_ref)
 		return -EINVAL;
 
 	if (dd->last_rounded_rate == 0)
diff --git a/drivers/clk/ti/dpll44xx.c b/drivers/clk/ti/dpll44xx.c
index 660d7436ac24..82c05b55a7be 100644
--- a/drivers/clk/ti/dpll44xx.c
+++ b/drivers/clk/ti/dpll44xx.c
@@ -94,7 +94,7 @@ static void omap4_dpll_lpmode_recalc(struct dpll_data *dd)
 {
 	long fint, fout;
 
-	fint = clk_get_rate(dd->clk_ref) / (dd->last_rounded_n + 1);
+	fint = clk_hw_get_rate(dd->clk_ref) / (dd->last_rounded_n + 1);
 	fout = fint * dd->last_rounded_m;
 
 	if ((fint < OMAP4_DPLL_LP_FINT_MAX) && (fout < OMAP4_DPLL_LP_FOUT_MAX))
@@ -212,13 +212,13 @@ int omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw,
 	if (!dd)
 		return -EINVAL;
 
-	if (clk_get_rate(dd->clk_bypass) == req->rate &&
+	if (clk_hw_get_rate(dd->clk_bypass) == req->rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		req->best_parent_hw = __clk_get_hw(dd->clk_bypass);
+		req->best_parent_hw = dd->clk_bypass;
 	} else {
 		req->rate = omap4_dpll_regm4xen_round_rate(hw, req->rate,
 						&req->best_parent_rate);
-		req->best_parent_hw = __clk_get_hw(dd->clk_ref);
+		req->best_parent_hw = dd->clk_ref;
 	}
 
 	req->best_parent_rate = req->rate;
diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c
index 5429d3534363..bc05f276f32b 100644
--- a/drivers/clk/ti/gate.c
+++ b/drivers/clk/ti/gate.c
@@ -24,8 +24,6 @@
 
 #include "clock.h"
 
-#define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw)
-
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index dab9ba88b9d6..44777ab6fdeb 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -26,8 +26,6 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#define to_clk_mux(_hw) container_of(_hw, struct clk_mux, hw)
-
 static u8 ti_clk_mux_get_parent(struct clk_hw *hw)
 {
 	struct clk_mux *mux = to_clk_mux(hw);
@@ -180,7 +178,7 @@ static void of_mux_clk_setup(struct device_node *node)
 {
 	struct clk *clk;
 	void __iomem *reg;
-	int num_parents;
+	unsigned int num_parents;
 	const char **parent_names;
 	u8 clk_mux_flags = 0;
 	u32 mask = 0;
@@ -263,7 +261,7 @@ struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup)
 static void __init of_ti_composite_mux_clk_setup(struct device_node *node)
 {
 	struct clk_mux *mux;
-	int num_parents;
+	unsigned int num_parents;
 	u32 val;
 
 	mux = kzalloc(sizeof(*mux), GFP_KERNEL);
diff --git a/drivers/clk/ux500/abx500-clk.c b/drivers/clk/ux500/abx500-clk.c
index 222425d08ab6..a07c31e6f26d 100644
--- a/drivers/clk/ux500/abx500-clk.c
+++ b/drivers/clk/ux500/abx500-clk.c
@@ -40,8 +40,7 @@ static int ab8500_reg_clks(struct device *dev)
 		return ret;
 
 	/* ab8500_sysclk */
-	clk = clk_reg_prcmu_gate("ab8500_sysclk", NULL, PRCMU_SYSCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("ab8500_sysclk", NULL, PRCMU_SYSCLK, 0);
 	clk_register_clkdev(clk, "sysclk", "ab8500-usb.0");
 	clk_register_clkdev(clk, "sysclk", "ab-iddet.0");
 	clk_register_clkdev(clk, "sysclk", "snd-soc-mop500.0");
@@ -68,7 +67,7 @@ static int ab8500_reg_clks(struct device *dev)
 	clk = clk_reg_sysctrl_gate_fixed_rate(dev, "ulpclk", NULL,
 		AB8500_SYSULPCLKCTRL1, AB8500_SYSULPCLKCTRL1_ULPCLKREQ,
 		AB8500_SYSULPCLKCTRL1_ULPCLKREQ,
-		38400000, 9000, CLK_IS_ROOT);
+		38400000, 9000, 0);
 	clk_register_clkdev(clk, "ulpclk", "snd-soc-mop500.0");
 
 	/* ab8500_intclk */
diff --git a/drivers/clk/ux500/u8500_of_clk.c b/drivers/clk/ux500/u8500_of_clk.c
index 271c09644652..9a736d939806 100644
--- a/drivers/clk/ux500/u8500_of_clk.c
+++ b/drivers/clk/ux500/u8500_of_clk.c
@@ -91,21 +91,21 @@ void u8500_clk_init(void)
 
 	/* Clock sources */
 	clk = clk_reg_prcmu_gate("soc0_pll", NULL, PRCMU_PLLSOC0,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	prcmu_clk[PRCMU_PLLSOC0] = clk;
 
 	clk = clk_reg_prcmu_gate("soc1_pll", NULL, PRCMU_PLLSOC1,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	prcmu_clk[PRCMU_PLLSOC1] = clk;
 
 	clk = clk_reg_prcmu_gate("ddr_pll", NULL, PRCMU_PLLDDR,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	prcmu_clk[PRCMU_PLLDDR] = clk;
 
 	/* FIXME: Add sys, ulp and int clocks here. */
 
 	rtc_clk = clk_register_fixed_rate(NULL, "rtc32k", "NULL",
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED,
+				CLK_IGNORE_UNUSED,
 				32768);
 
 	/* PRCMU clocks */
@@ -126,105 +126,101 @@ void u8500_clk_init(void)
 		clk = clk_reg_prcmu_gate("sgclk", sgaclk_parent,
 					PRCMU_SGACLK, 0);
 	else
-		clk = clk_reg_prcmu_gate("sgclk", NULL,
-					PRCMU_SGACLK, CLK_IS_ROOT);
+		clk = clk_reg_prcmu_gate("sgclk", NULL, PRCMU_SGACLK, 0);
 	prcmu_clk[PRCMU_SGACLK] = clk;
 
-	clk = clk_reg_prcmu_gate("uartclk", NULL, PRCMU_UARTCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("uartclk", NULL, PRCMU_UARTCLK, 0);
 	prcmu_clk[PRCMU_UARTCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("msp02clk", NULL, PRCMU_MSP02CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("msp02clk", NULL, PRCMU_MSP02CLK, 0);
 	prcmu_clk[PRCMU_MSP02CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("msp1clk", NULL, PRCMU_MSP1CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("msp1clk", NULL, PRCMU_MSP1CLK, 0);
 	prcmu_clk[PRCMU_MSP1CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("i2cclk", NULL, PRCMU_I2CCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("i2cclk", NULL, PRCMU_I2CCLK, 0);
 	prcmu_clk[PRCMU_I2CCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("slimclk", NULL, PRCMU_SLIMCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("slimclk", NULL, PRCMU_SLIMCLK, 0);
 	prcmu_clk[PRCMU_SLIMCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per1clk", NULL, PRCMU_PER1CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per1clk", NULL, PRCMU_PER1CLK, 0);
 	prcmu_clk[PRCMU_PER1CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per2clk", NULL, PRCMU_PER2CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per2clk", NULL, PRCMU_PER2CLK, 0);
 	prcmu_clk[PRCMU_PER2CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per3clk", NULL, PRCMU_PER3CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per3clk", NULL, PRCMU_PER3CLK, 0);
 	prcmu_clk[PRCMU_PER3CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per5clk", NULL, PRCMU_PER5CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per5clk", NULL, PRCMU_PER5CLK, 0);
 	prcmu_clk[PRCMU_PER5CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per6clk", NULL, PRCMU_PER6CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per6clk", NULL, PRCMU_PER6CLK, 0);
 	prcmu_clk[PRCMU_PER6CLK] = clk;
 
-	clk = clk_reg_prcmu_gate("per7clk", NULL, PRCMU_PER7CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per7clk", NULL, PRCMU_PER7CLK, 0);
 	prcmu_clk[PRCMU_PER7CLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("lcdclk", NULL, PRCMU_LCDCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_LCDCLK] = clk;
 
-	clk = clk_reg_prcmu_opp_gate("bmlclk", NULL, PRCMU_BMLCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_opp_gate("bmlclk", NULL, PRCMU_BMLCLK, 0);
 	prcmu_clk[PRCMU_BMLCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("hsitxclk", NULL, PRCMU_HSITXCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_HSITXCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("hsirxclk", NULL, PRCMU_HSIRXCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_HSIRXCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("hdmiclk", NULL, PRCMU_HDMICLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_HDMICLK] = clk;
 
-	clk = clk_reg_prcmu_gate("apeatclk", NULL, PRCMU_APEATCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("apeatclk", NULL, PRCMU_APEATCLK, 0);
 	prcmu_clk[PRCMU_APEATCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("apetraceclk", NULL, PRCMU_APETRACECLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_APETRACECLK] = clk;
 
-	clk = clk_reg_prcmu_gate("mcdeclk", NULL, PRCMU_MCDECLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("mcdeclk", NULL, PRCMU_MCDECLK, 0);
 	prcmu_clk[PRCMU_MCDECLK] = clk;
 
-	clk = clk_reg_prcmu_opp_gate("ipi2cclk", NULL, PRCMU_IPI2CCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_opp_gate("ipi2cclk", NULL, PRCMU_IPI2CCLK, 0);
 	prcmu_clk[PRCMU_IPI2CCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("dsialtclk", NULL, PRCMU_DSIALTCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("dsialtclk", NULL, PRCMU_DSIALTCLK, 0);
 	prcmu_clk[PRCMU_DSIALTCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("dmaclk", NULL, PRCMU_DMACLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("dmaclk", NULL, PRCMU_DMACLK, 0);
 	prcmu_clk[PRCMU_DMACLK] = clk;
 
-	clk = clk_reg_prcmu_gate("b2r2clk", NULL, PRCMU_B2R2CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("b2r2clk", NULL, PRCMU_B2R2CLK, 0);
 	prcmu_clk[PRCMU_B2R2CLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("tvclk", NULL, PRCMU_TVCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_TVCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("sspclk", NULL, PRCMU_SSPCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("sspclk", NULL, PRCMU_SSPCLK, 0);
 	prcmu_clk[PRCMU_SSPCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("rngclk", NULL, PRCMU_RNGCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("rngclk", NULL, PRCMU_RNGCLK, 0);
 	prcmu_clk[PRCMU_RNGCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("uiccclk", NULL, PRCMU_UICCCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("uiccclk", NULL, PRCMU_UICCCLK, 0);
 	prcmu_clk[PRCMU_UICCCLK] = clk;
 
-	clk = clk_reg_prcmu_gate("timclk", NULL, PRCMU_TIMCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("timclk", NULL, PRCMU_TIMCLK, 0);
 	prcmu_clk[PRCMU_TIMCLK] = clk;
 
 	clk = clk_reg_prcmu_opp_volt_scalable("sdmmcclk", NULL, PRCMU_SDMMCCLK,
-					100000000,
-					CLK_IS_ROOT|CLK_SET_RATE_GATE);
+					100000000, CLK_SET_RATE_GATE);
 	prcmu_clk[PRCMU_SDMMCCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable("dsi_pll", "hdmiclk",
@@ -252,7 +248,7 @@ void u8500_clk_init(void)
 	prcmu_clk[PRCMU_DSI2ESCCLK] = clk;
 
 	clk = clk_reg_prcmu_scalable_rate("armss", NULL,
-				PRCMU_ARMSS, 0, CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				PRCMU_ARMSS, 0, CLK_IGNORE_UNUSED);
 	prcmu_clk[PRCMU_ARMSS] = clk;
 
 	twd_clk = clk_register_fixed_factor(NULL, "smp_twd", "armss",
diff --git a/drivers/clk/ux500/u8540_clk.c b/drivers/clk/ux500/u8540_clk.c
index d7bcb7a86615..86549e59fb42 100644
--- a/drivers/clk/ux500/u8540_clk.c
+++ b/drivers/clk/ux500/u8540_clk.c
@@ -56,28 +56,28 @@ void u8540_clk_init(void)
 	/* Clock sources. */
 	/* Fixed ClockGen */
 	clk = clk_reg_prcmu_gate("soc0_pll", NULL, PRCMU_PLLSOC0,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	clk_register_clkdev(clk, "soc0_pll", NULL);
 
 	clk = clk_reg_prcmu_gate("soc1_pll", NULL, PRCMU_PLLSOC1,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	clk_register_clkdev(clk, "soc1_pll", NULL);
 
 	clk = clk_reg_prcmu_gate("ddr_pll", NULL, PRCMU_PLLDDR,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				CLK_IGNORE_UNUSED);
 	clk_register_clkdev(clk, "ddr_pll", NULL);
 
 	clk = clk_register_fixed_rate(NULL, "rtc32k", NULL,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED,
+				CLK_IGNORE_UNUSED,
 				32768);
 	clk_register_clkdev(clk, "clk32k", NULL);
 	clk_register_clkdev(clk, "apb_pclk", "rtc-pl031");
 
 	clk = clk_register_fixed_rate(NULL, "ulp38m4", NULL,
-				CLK_IS_ROOT|CLK_IGNORE_UNUSED,
+				CLK_IGNORE_UNUSED,
 				38400000);
 
-	clk = clk_reg_prcmu_gate("uartclk", NULL, PRCMU_UARTCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("uartclk", NULL, PRCMU_UARTCLK, 0);
 	clk_register_clkdev(clk, NULL, "UART");
 
 	/* msp02clk needs a abx500 clk as parent. Handle by abx500 clk driver */
@@ -85,120 +85,116 @@ void u8540_clk_init(void)
 			PRCMU_MSP02CLK, 0);
 	clk_register_clkdev(clk, NULL, "MSP02");
 
-	clk = clk_reg_prcmu_gate("msp1clk", NULL, PRCMU_MSP1CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("msp1clk", NULL, PRCMU_MSP1CLK, 0);
 	clk_register_clkdev(clk, NULL, "MSP1");
 
-	clk = clk_reg_prcmu_gate("i2cclk", NULL, PRCMU_I2CCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("i2cclk", NULL, PRCMU_I2CCLK, 0);
 	clk_register_clkdev(clk, NULL, "I2C");
 
-	clk = clk_reg_prcmu_gate("slimclk", NULL, PRCMU_SLIMCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("slimclk", NULL, PRCMU_SLIMCLK, 0);
 	clk_register_clkdev(clk, NULL, "slim");
 
-	clk = clk_reg_prcmu_gate("per1clk", NULL, PRCMU_PER1CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per1clk", NULL, PRCMU_PER1CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH1");
 
-	clk = clk_reg_prcmu_gate("per2clk", NULL, PRCMU_PER2CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per2clk", NULL, PRCMU_PER2CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH2");
 
-	clk = clk_reg_prcmu_gate("per3clk", NULL, PRCMU_PER3CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per3clk", NULL, PRCMU_PER3CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH3");
 
-	clk = clk_reg_prcmu_gate("per5clk", NULL, PRCMU_PER5CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per5clk", NULL, PRCMU_PER5CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH5");
 
-	clk = clk_reg_prcmu_gate("per6clk", NULL, PRCMU_PER6CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per6clk", NULL, PRCMU_PER6CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH6");
 
-	clk = clk_reg_prcmu_gate("per7clk", NULL, PRCMU_PER7CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("per7clk", NULL, PRCMU_PER7CLK, 0);
 	clk_register_clkdev(clk, NULL, "PERIPH7");
 
 	clk = clk_reg_prcmu_scalable("lcdclk", NULL, PRCMU_LCDCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, NULL, "lcd");
 	clk_register_clkdev(clk, "lcd", "mcde");
 
-	clk = clk_reg_prcmu_opp_gate("bmlclk", NULL, PRCMU_BMLCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_opp_gate("bmlclk", NULL, PRCMU_BMLCLK, 0);
 	clk_register_clkdev(clk, NULL, "bml");
 
 	clk = clk_reg_prcmu_scalable("hsitxclk", NULL, PRCMU_HSITXCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				     CLK_SET_RATE_GATE);
 
 	clk = clk_reg_prcmu_scalable("hsirxclk", NULL, PRCMU_HSIRXCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				     CLK_SET_RATE_GATE);
 
 	clk = clk_reg_prcmu_scalable("hdmiclk", NULL, PRCMU_HDMICLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				     CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, NULL, "hdmi");
 	clk_register_clkdev(clk, "hdmi", "mcde");
 
-	clk = clk_reg_prcmu_gate("apeatclk", NULL, PRCMU_APEATCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("apeatclk", NULL, PRCMU_APEATCLK, 0);
 	clk_register_clkdev(clk, NULL, "apeat");
 
-	clk = clk_reg_prcmu_gate("apetraceclk", NULL, PRCMU_APETRACECLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("apetraceclk", NULL, PRCMU_APETRACECLK, 0);
 	clk_register_clkdev(clk, NULL, "apetrace");
 
-	clk = clk_reg_prcmu_gate("mcdeclk", NULL, PRCMU_MCDECLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("mcdeclk", NULL, PRCMU_MCDECLK, 0);
 	clk_register_clkdev(clk, NULL, "mcde");
 	clk_register_clkdev(clk, "mcde", "mcde");
 	clk_register_clkdev(clk, NULL, "dsilink.0");
 	clk_register_clkdev(clk, NULL, "dsilink.1");
 	clk_register_clkdev(clk, NULL, "dsilink.2");
 
-	clk = clk_reg_prcmu_opp_gate("ipi2cclk", NULL, PRCMU_IPI2CCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_opp_gate("ipi2cclk", NULL, PRCMU_IPI2CCLK, 0);
 	clk_register_clkdev(clk, NULL, "ipi2");
 
-	clk = clk_reg_prcmu_gate("dsialtclk", NULL, PRCMU_DSIALTCLK,
-				CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("dsialtclk", NULL, PRCMU_DSIALTCLK, 0);
 	clk_register_clkdev(clk, NULL, "dsialt");
 
-	clk = clk_reg_prcmu_gate("dmaclk", NULL, PRCMU_DMACLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("dmaclk", NULL, PRCMU_DMACLK, 0);
 	clk_register_clkdev(clk, NULL, "dma40.0");
 
-	clk = clk_reg_prcmu_gate("b2r2clk", NULL, PRCMU_B2R2CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("b2r2clk", NULL, PRCMU_B2R2CLK, 0);
 	clk_register_clkdev(clk, NULL, "b2r2");
 	clk_register_clkdev(clk, NULL, "b2r2_core");
 	clk_register_clkdev(clk, NULL, "U8500-B2R2.0");
 	clk_register_clkdev(clk, NULL, "b2r2_1_core");
 
 	clk = clk_reg_prcmu_scalable("tvclk", NULL, PRCMU_TVCLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				     CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, NULL, "tv");
 	clk_register_clkdev(clk, "tv", "mcde");
 
-	clk = clk_reg_prcmu_gate("sspclk", NULL, PRCMU_SSPCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("sspclk", NULL, PRCMU_SSPCLK, 0);
 	clk_register_clkdev(clk, NULL, "SSP");
 
-	clk = clk_reg_prcmu_gate("rngclk", NULL, PRCMU_RNGCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("rngclk", NULL, PRCMU_RNGCLK, 0);
 	clk_register_clkdev(clk, NULL, "rngclk");
 
-	clk = clk_reg_prcmu_gate("uiccclk", NULL, PRCMU_UICCCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("uiccclk", NULL, PRCMU_UICCCLK, 0);
 	clk_register_clkdev(clk, NULL, "uicc");
 
-	clk = clk_reg_prcmu_gate("timclk", NULL, PRCMU_TIMCLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("timclk", NULL, PRCMU_TIMCLK, 0);
 	clk_register_clkdev(clk, NULL, "mtu0");
 	clk_register_clkdev(clk, NULL, "mtu1");
 
 	clk = clk_reg_prcmu_opp_volt_scalable("sdmmcclk", NULL,
 					PRCMU_SDMMCCLK, 100000000,
-					CLK_IS_ROOT|CLK_SET_RATE_GATE);
+					CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, NULL, "sdmmc");
 
 	clk = clk_reg_prcmu_opp_volt_scalable("sdmmchclk", NULL,
 					PRCMU_SDMMCHCLK, 400000000,
-					CLK_IS_ROOT|CLK_SET_RATE_GATE);
+					CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, NULL, "sdmmchclk");
 
-	clk = clk_reg_prcmu_gate("hvaclk", NULL, PRCMU_HVACLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("hvaclk", NULL, PRCMU_HVACLK, 0);
 	clk_register_clkdev(clk, NULL, "hva");
 
-	clk = clk_reg_prcmu_gate("g1clk", NULL, PRCMU_G1CLK, CLK_IS_ROOT);
+	clk = clk_reg_prcmu_gate("g1clk", NULL, PRCMU_G1CLK, 0);
 	clk_register_clkdev(clk, NULL, "g1");
 
 	clk = clk_reg_prcmu_scalable("spare1clk", NULL, PRCMU_SPARE1CLK, 0,
-				CLK_IS_ROOT|CLK_SET_RATE_GATE);
+				     CLK_SET_RATE_GATE);
 	clk_register_clkdev(clk, "dsilcd", "mcde");
 
 	clk = clk_reg_prcmu_scalable("dsi_pll", "hdmiclk",
@@ -244,7 +240,7 @@ void u8540_clk_init(void)
 	clk_register_clkdev(clk, "dsilp2", "mcde");
 
 	clk = clk_reg_prcmu_scalable_rate("armss", NULL,
-				PRCMU_ARMSS, 0, CLK_IS_ROOT|CLK_IGNORE_UNUSED);
+				PRCMU_ARMSS, 0, CLK_IGNORE_UNUSED);
 	clk_register_clkdev(clk, "armss", NULL);
 
 	clk = clk_register_fixed_factor(NULL, "smp_twd", "armss",
diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c
index 3bca438ecd19..5e9b65278e4c 100644
--- a/drivers/clk/versatile/clk-icst.c
+++ b/drivers/clk/versatile/clk-icst.c
@@ -170,7 +170,7 @@ static struct clk *icst_clk_setup(struct device *dev,
 
 	init.name = name;
 	init.ops = &icst_ops;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.parent_names = (parent_name ? &parent_name : NULL);
 	init.num_parents = (parent_name ? 1 : 0);
 	icst->map = map;
diff --git a/drivers/clk/versatile/clk-impd1.c b/drivers/clk/versatile/clk-impd1.c
index 65c842a21c62..74c3216dbb00 100644
--- a/drivers/clk/versatile/clk-impd1.c
+++ b/drivers/clk/versatile/clk-impd1.c
@@ -98,8 +98,7 @@ void integrator_impd1_clk_init(void __iomem *base, unsigned int id)
 
 	/* Register the fixed rate PCLK */
 	imc->pclkname = kasprintf(GFP_KERNEL, "lm%x-pclk", id);
-	pclk = clk_register_fixed_rate(NULL, imc->pclkname, NULL,
-				      CLK_IS_ROOT, 0);
+	pclk = clk_register_fixed_rate(NULL, imc->pclkname, NULL, 0, 0);
 	imc->pclk = pclk;
 
 	imc->vco1name = kasprintf(GFP_KERNEL, "lm%x-vco1", id);
diff --git a/drivers/clk/versatile/clk-realview.c b/drivers/clk/versatile/clk-realview.c
index bd4dd2463e23..c56efc70ac16 100644
--- a/drivers/clk/versatile/clk-realview.c
+++ b/drivers/clk/versatile/clk-realview.c
@@ -56,12 +56,11 @@ void __init realview_clk_init(void __iomem *sysbase, bool is_pb1176)
 	struct clk *clk;
 
 	/* APB clock dummy */
-	clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, CLK_IS_ROOT, 0);
+	clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, 0, 0);
 	clk_register_clkdev(clk, "apb_pclk", NULL);
 
 	/* 24 MHz clock */
-	clk = clk_register_fixed_rate(NULL, "clk24mhz", NULL, CLK_IS_ROOT,
-				24000000);
+	clk = clk_register_fixed_rate(NULL, "clk24mhz", NULL, 0, 24000000);
 	clk_register_clkdev(clk, NULL, "dev:uart0");
 	clk_register_clkdev(clk, NULL, "dev:uart1");
 	clk_register_clkdev(clk, NULL, "dev:uart2");
@@ -81,8 +80,7 @@ void __init realview_clk_init(void __iomem *sysbase, bool is_pb1176)
 
 
 	/* 1 MHz clock */
-	clk = clk_register_fixed_rate(NULL, "clk1mhz", NULL, CLK_IS_ROOT,
-				      1000000);
+	clk = clk_register_fixed_rate(NULL, "clk1mhz", NULL, 0, 1000000);
 	clk_register_clkdev(clk, NULL, "sp804");
 
 	/* ICST VCO clock */
diff --git a/drivers/clk/versatile/clk-sp810.c b/drivers/clk/versatile/clk-sp810.c
index e78755e0ef78..1fe1e8d970cf 100644
--- a/drivers/clk/versatile/clk-sp810.c
+++ b/drivers/clk/versatile/clk-sp810.c
@@ -92,6 +92,7 @@ static void __init clk_sp810_of_setup(struct device_node *node)
 	int num = ARRAY_SIZE(parent_names);
 	char name[12];
 	struct clk_init_data init;
+	static int instance;
 	int i;
 	bool deprecated;
 
@@ -117,7 +118,7 @@ static void __init clk_sp810_of_setup(struct device_node *node)
 	deprecated = !of_find_property(node, "assigned-clock-parents", NULL);
 
 	for (i = 0; i < ARRAY_SIZE(sp810->timerclken); i++) {
-		snprintf(name, ARRAY_SIZE(name), "timerclken%d", i);
+		snprintf(name, sizeof(name), "sp810_%d_%d", instance, i);
 
 		sp810->timerclken[i].sp810 = sp810;
 		sp810->timerclken[i].channel = i;
@@ -138,5 +139,6 @@ static void __init clk_sp810_of_setup(struct device_node *node)
 	}
 
 	of_clk_add_provider(node, clk_sp810_timerclken_of_get, sp810);
+	instance++;
 }
 CLK_OF_DECLARE(sp810, "arm,sp810", clk_sp810_of_setup);
diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c
index 89c0609e180b..7e5add7d7752 100644
--- a/drivers/clk/versatile/clk-vexpress-osc.c
+++ b/drivers/clk/versatile/clk-vexpress-osc.c
@@ -94,7 +94,7 @@ static int vexpress_osc_probe(struct platform_device *pdev)
 		init.name = dev_name(&pdev->dev);
 
 	init.ops = &vexpress_osc_ops;
-	init.flags = CLK_IS_ROOT;
+	init.flags = 0;
 	init.num_parents = 0;
 
 	osc->hw.init = &init;
diff --git a/drivers/clk/x86/clk-lpt.c b/drivers/clk/x86/clk-lpt.c
index f827083defc4..6b40eb89ae19 100644
--- a/drivers/clk/x86/clk-lpt.c
+++ b/drivers/clk/x86/clk-lpt.c
@@ -10,8 +10,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/clk.h>
-#include <linux/clkdev.h>
 #include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/module.h>
@@ -30,7 +28,7 @@ static int lpt_clk_probe(struct platform_device *pdev)
 	/* LPSS free running clock */
 	drvdata->name = "lpss_clk";
 	clk = clk_register_fixed_rate(&pdev->dev, drvdata->name, NULL,
-				      CLK_IS_ROOT, 100000000);
+				      0, 100000000);
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
diff --git a/drivers/clk/zynq/clkc.c b/drivers/clk/zynq/clkc.c
index 38a65c3e62fc..88a2cab37f62 100644
--- a/drivers/clk/zynq/clkc.c
+++ b/drivers/clk/zynq/clkc.c
@@ -265,8 +265,7 @@ static void __init zynq_clk_setup(struct device_node *np)
 		pr_warn("ps_clk frequency not specified, using 33 MHz.\n");
 		tmp = 33333333;
 	}
-	ps_clk = clk_register_fixed_rate(NULL, "ps_clk", NULL, CLK_IS_ROOT,
-			tmp);
+	ps_clk = clk_register_fixed_rate(NULL, "ps_clk", NULL, 0, tmp);
 
 	/* PLLs */
 	clk = clk_register_zynq_pll("armpll_int", "ps_clk", SLCR_ARMPLL_CTRL,
diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c
index 3269d9ef7a18..376e59bc5fa0 100644
--- a/drivers/clocksource/time-pistachio.c
+++ b/drivers/clocksource/time-pistachio.c
@@ -163,7 +163,7 @@ static void __init pistachio_clksrc_of_init(struct device_node *node)
 
 	periph_regs = syscon_regmap_lookup_by_phandle(node, "img,cr-periph");
 	if (IS_ERR(periph_regs)) {
-		pr_err("cannot get peripheral regmap (%lu)\n",
+		pr_err("cannot get peripheral regmap (%ld)\n",
 		       PTR_ERR(periph_regs));
 		return;
 	}
@@ -176,7 +176,7 @@ static void __init pistachio_clksrc_of_init(struct device_node *node)
 
 	sys_clk = of_clk_get_by_name(node, "sys");
 	if (IS_ERR(sys_clk)) {
-		pr_err("clock get failed (%lu)\n", PTR_ERR(sys_clk));
+		pr_err("clock get failed (%ld)\n", PTR_ERR(sys_clk));
 		return;
 	}
 
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 59a7b380fbe2..fb5712141040 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -245,7 +245,7 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
 	}
 }
 
-u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
+static u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
 {
 	u32 val, dummy;
 
@@ -253,7 +253,7 @@ u32 cpu_freq_read_intel(struct acpi_pct_register *not_used)
 	return val;
 }
 
-void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
+static void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
 {
 	u32 lo, hi;
 
@@ -262,7 +262,7 @@ void cpu_freq_write_intel(struct acpi_pct_register *not_used, u32 val)
 	wrmsr(MSR_IA32_PERF_CTL, lo, hi);
 }
 
-u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
+static u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
 {
 	u32 val, dummy;
 
@@ -270,12 +270,12 @@ u32 cpu_freq_read_amd(struct acpi_pct_register *not_used)
 	return val;
 }
 
-void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
+static void cpu_freq_write_amd(struct acpi_pct_register *not_used, u32 val)
 {
 	wrmsr(MSR_AMD_PERF_CTL, val, 0);
 }
 
-u32 cpu_freq_read_io(struct acpi_pct_register *reg)
+static u32 cpu_freq_read_io(struct acpi_pct_register *reg)
 {
 	u32 val;
 
@@ -283,7 +283,7 @@ u32 cpu_freq_read_io(struct acpi_pct_register *reg)
 	return val;
 }
 
-void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
+static void cpu_freq_write_io(struct acpi_pct_register *reg, u32 val)
 {
 	acpi_os_write_port(reg->address, val, reg->bit_width);
 }
@@ -514,8 +514,10 @@ static int boost_notify(struct notifier_block *nb, unsigned long action,
 	 */
 
 	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
+	case CPU_DOWN_FAILED:
+	case CPU_DOWN_FAILED_FROZEN:
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
 		boost_set_msrs(acpi_cpufreq_driver.boost_enabled, cpumask);
 		break;
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 4c7825856eab..b87596b591b3 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -76,6 +76,7 @@ static inline bool has_target(void)
 /* internal prototypes */
 static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
 static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
+static int cpufreq_start_governor(struct cpufreq_policy *policy);
 
 /**
  * Two notifier lists: the "policy" list is involved in the
@@ -964,10 +965,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp
 	cpumask_set_cpu(cpu, policy->cpus);
 
 	if (has_target()) {
-		ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-		if (!ret)
-			ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
-
+		ret = cpufreq_start_governor(policy);
 		if (ret)
 			pr_err("%s: Failed to start governor\n", __func__);
 	}
@@ -1308,10 +1306,7 @@ static void cpufreq_offline(unsigned int cpu)
 	/* Start governor again for active policy */
 	if (!policy_is_inactive(policy)) {
 		if (has_target()) {
-			ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-			if (!ret)
-				ret = cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
-
+			ret = cpufreq_start_governor(policy);
 			if (ret)
 				pr_err("%s: Failed to start governor\n", __func__);
 		}
@@ -1401,9 +1396,17 @@ unsigned int cpufreq_quick_get(unsigned int cpu)
 {
 	struct cpufreq_policy *policy;
 	unsigned int ret_freq = 0;
+	unsigned long flags;
 
-	if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
-		return cpufreq_driver->get(cpu);
+	read_lock_irqsave(&cpufreq_driver_lock, flags);
+
+	if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get) {
+		ret_freq = cpufreq_driver->get(cpu);
+		read_unlock_irqrestore(&cpufreq_driver_lock, flags);
+		return ret_freq;
+	}
+
+	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
 	policy = cpufreq_cpu_get(cpu);
 	if (policy) {
@@ -1484,6 +1487,24 @@ unsigned int cpufreq_get(unsigned int cpu)
 }
 EXPORT_SYMBOL(cpufreq_get);
 
+static unsigned int cpufreq_update_current_freq(struct cpufreq_policy *policy)
+{
+	unsigned int new_freq;
+
+	new_freq = cpufreq_driver->get(policy->cpu);
+	if (!new_freq)
+		return 0;
+
+	if (!policy->cur) {
+		pr_debug("cpufreq: Driver did not initialize current freq\n");
+		policy->cur = new_freq;
+	} else if (policy->cur != new_freq && has_target()) {
+		cpufreq_out_of_sync(policy, new_freq);
+	}
+
+	return new_freq;
+}
+
 static struct subsys_interface cpufreq_interface = {
 	.name		= "cpufreq",
 	.subsys		= &cpu_subsys,
@@ -1583,9 +1604,7 @@ void cpufreq_resume(void)
 				policy);
 		} else {
 			down_write(&policy->rwsem);
-			ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-			if (!ret)
-				cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+			ret = cpufreq_start_governor(policy);
 			up_write(&policy->rwsem);
 
 			if (ret)
@@ -1593,17 +1612,6 @@ void cpufreq_resume(void)
 				       __func__, policy);
 		}
 	}
-
-	/*
-	 * schedule call cpufreq_update_policy() for first-online CPU, as that
-	 * wouldn't be hotplugged-out on suspend. It will verify that the
-	 * current freq is in sync with what we believe it to be.
-	 */
-	policy = cpufreq_cpu_get_raw(cpumask_first(cpu_online_mask));
-	if (WARN_ON(!policy))
-		return;
-
-	schedule_work(&policy->update);
 }
 
 /**
@@ -1927,6 +1935,17 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
 	return ret;
 }
 
+static int cpufreq_start_governor(struct cpufreq_policy *policy)
+{
+	int ret;
+
+	if (cpufreq_driver->get && !cpufreq_driver->setpolicy)
+		cpufreq_update_current_freq(policy);
+
+	ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
+	return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+}
+
 int cpufreq_register_governor(struct cpufreq_governor *governor)
 {
 	int err;
@@ -2063,8 +2082,10 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 		return cpufreq_driver->setpolicy(new_policy);
 	}
 
-	if (new_policy->governor == policy->governor)
-		goto out;
+	if (new_policy->governor == policy->governor) {
+		pr_debug("cpufreq: governor limits update\n");
+		return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+	}
 
 	pr_debug("governor switch\n");
 
@@ -2092,10 +2113,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 	policy->governor = new_policy->governor;
 	ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
 	if (!ret) {
-		ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-		if (!ret)
-			goto out;
-
+		ret = cpufreq_start_governor(policy);
+		if (!ret) {
+			pr_debug("cpufreq: governor change\n");
+			return 0;
+		}
 		cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
 	}
 
@@ -2106,14 +2128,10 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 		if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT))
 			policy->governor = NULL;
 		else
-			cpufreq_governor(policy, CPUFREQ_GOV_START);
+			cpufreq_start_governor(policy);
 	}
 
 	return ret;
-
- out:
-	pr_debug("governor: change or update limits\n");
-	return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
 }
 
 /**
@@ -2144,19 +2162,11 @@ int cpufreq_update_policy(unsigned int cpu)
 	 * -> ask driver for current freq and notify governors about a change
 	 */
 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
-		new_policy.cur = cpufreq_driver->get(cpu);
+		new_policy.cur = cpufreq_update_current_freq(policy);
 		if (WARN_ON(!new_policy.cur)) {
 			ret = -EIO;
 			goto unlock;
 		}
-
-		if (!policy->cur) {
-			pr_debug("Driver did not initialize current freq\n");
-			policy->cur = new_policy.cur;
-		} else {
-			if (policy->cur != new_policy.cur && has_target())
-				cpufreq_out_of_sync(policy, new_policy.cur);
-		}
 	}
 
 	ret = cpufreq_set_policy(policy, &new_policy);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 1c25ef405616..10a5cfeae8c5 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -329,7 +329,7 @@ static void dbs_irq_work(struct irq_work *irq_work)
 	struct policy_dbs_info *policy_dbs;
 
 	policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work);
-	schedule_work(&policy_dbs->work);
+	schedule_work_on(smp_processor_id(), &policy_dbs->work);
 }
 
 static void dbs_update_util_handler(struct update_util_data *data, u64 time,
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index cb5607495816..4b644526fd59 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -134,7 +134,7 @@ struct pstate_funcs {
 	int (*get_min)(void);
 	int (*get_turbo)(void);
 	int (*get_scaling)(void);
-	void (*set)(struct cpudata*, int pstate);
+	u64 (*get_val)(struct cpudata*, int pstate);
 	void (*get_vid)(struct cpudata *);
 	int32_t (*get_target_pstate)(struct cpudata *);
 };
@@ -565,7 +565,7 @@ static int atom_get_turbo_pstate(void)
 	return value & 0x7F;
 }
 
-static void atom_set_pstate(struct cpudata *cpudata, int pstate)
+static u64 atom_get_val(struct cpudata *cpudata, int pstate)
 {
 	u64 val;
 	int32_t vid_fp;
@@ -585,9 +585,7 @@ static void atom_set_pstate(struct cpudata *cpudata, int pstate)
 	if (pstate > cpudata->pstate.max_pstate)
 		vid = cpudata->vid.turbo;
 
-	val |= vid;
-
-	wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
+	return val | vid;
 }
 
 static int silvermont_get_scaling(void)
@@ -711,7 +709,7 @@ static inline int core_get_scaling(void)
 	return 100000;
 }
 
-static void core_set_pstate(struct cpudata *cpudata, int pstate)
+static u64 core_get_val(struct cpudata *cpudata, int pstate)
 {
 	u64 val;
 
@@ -719,7 +717,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
 	if (limits->no_turbo && !limits->turbo_disabled)
 		val |= (u64)1 << 32;
 
-	wrmsrl(MSR_IA32_PERF_CTL, val);
+	return val;
 }
 
 static int knl_get_turbo_pstate(void)
@@ -750,7 +748,7 @@ static struct cpu_defaults core_params = {
 		.get_min = core_get_min_pstate,
 		.get_turbo = core_get_turbo_pstate,
 		.get_scaling = core_get_scaling,
-		.set = core_set_pstate,
+		.get_val = core_get_val,
 		.get_target_pstate = get_target_pstate_use_performance,
 	},
 };
@@ -769,7 +767,7 @@ static struct cpu_defaults silvermont_params = {
 		.get_max_physical = atom_get_max_pstate,
 		.get_min = atom_get_min_pstate,
 		.get_turbo = atom_get_turbo_pstate,
-		.set = atom_set_pstate,
+		.get_val = atom_get_val,
 		.get_scaling = silvermont_get_scaling,
 		.get_vid = atom_get_vid,
 		.get_target_pstate = get_target_pstate_use_cpu_load,
@@ -790,7 +788,7 @@ static struct cpu_defaults airmont_params = {
 		.get_max_physical = atom_get_max_pstate,
 		.get_min = atom_get_min_pstate,
 		.get_turbo = atom_get_turbo_pstate,
-		.set = atom_set_pstate,
+		.get_val = atom_get_val,
 		.get_scaling = airmont_get_scaling,
 		.get_vid = atom_get_vid,
 		.get_target_pstate = get_target_pstate_use_cpu_load,
@@ -812,7 +810,7 @@ static struct cpu_defaults knl_params = {
 		.get_min = core_get_min_pstate,
 		.get_turbo = knl_get_turbo_pstate,
 		.get_scaling = core_get_scaling,
-		.set = core_set_pstate,
+		.get_val = core_get_val,
 		.get_target_pstate = get_target_pstate_use_performance,
 	},
 };
@@ -839,25 +837,24 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 }
 
-static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
+static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate)
 {
-	int max_perf, min_perf;
-
-	if (force) {
-		update_turbo_state();
-
-		intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
-
-		pstate = clamp_t(int, pstate, min_perf, max_perf);
-
-		if (pstate == cpu->pstate.current_pstate)
-			return;
-	}
 	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
-
 	cpu->pstate.current_pstate = pstate;
+}
 
-	pstate_funcs.set(cpu, pstate);
+static void intel_pstate_set_min_pstate(struct cpudata *cpu)
+{
+	int pstate = cpu->pstate.min_pstate;
+
+	intel_pstate_record_pstate(cpu, pstate);
+	/*
+	 * Generally, there is no guarantee that this code will always run on
+	 * the CPU being updated, so force the register update to run on the
+	 * right CPU.
+	 */
+	wrmsrl_on_cpu(cpu->cpu, MSR_IA32_PERF_CTL,
+		      pstate_funcs.get_val(cpu, pstate));
 }
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
@@ -870,7 +867,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 
 	if (pstate_funcs.get_vid)
 		pstate_funcs.get_vid(cpu);
-	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
+
+	intel_pstate_set_min_pstate(cpu);
 }
 
 static inline void intel_pstate_calc_busy(struct cpudata *cpu)
@@ -997,6 +995,21 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
 	return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
 }
 
+static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
+{
+	int max_perf, min_perf;
+
+	update_turbo_state();
+
+	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
+	pstate = clamp_t(int, pstate, min_perf, max_perf);
+	if (pstate == cpu->pstate.current_pstate)
+		return;
+
+	intel_pstate_record_pstate(cpu, pstate);
+	wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
+}
+
 static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 {
 	int from, target_pstate;
@@ -1006,7 +1019,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 
 	target_pstate = pstate_funcs.get_target_pstate(cpu);
 
-	intel_pstate_set_pstate(cpu, target_pstate, true);
+	intel_pstate_update_pstate(cpu, target_pstate);
 
 	sample = &cpu->sample;
 	trace_pstate_sample(fp_toint(sample->core_pct_busy),
@@ -1180,7 +1193,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
 	if (hwp_active)
 		return;
 
-	intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate, false);
+	intel_pstate_set_min_pstate(cpu);
 }
 
 static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
@@ -1255,7 +1268,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
 	pstate_funcs.get_min   = funcs->get_min;
 	pstate_funcs.get_turbo = funcs->get_turbo;
 	pstate_funcs.get_scaling = funcs->get_scaling;
-	pstate_funcs.set       = funcs->set;
+	pstate_funcs.get_val   = funcs->get_val;
 	pstate_funcs.get_vid   = funcs->get_vid;
 	pstate_funcs.get_target_pstate = funcs->get_target_pstate;
 
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 50bf12033bbc..39ac78c94be0 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -44,7 +44,6 @@
 
 static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1];
 static bool rebooting, throttled, occ_reset;
-static unsigned int *core_to_chip_map;
 
 static const char * const throttle_reason[] = {
 	"No throttling",
@@ -55,6 +54,16 @@ static const char * const throttle_reason[] = {
 	"OCC Reset"
 };
 
+enum throttle_reason_type {
+	NO_THROTTLE = 0,
+	POWERCAP,
+	CPU_OVERTEMP,
+	POWER_SUPPLY_FAILURE,
+	OVERCURRENT,
+	OCC_RESET_THROTTLE,
+	OCC_MAX_REASON
+};
+
 static struct chip {
 	unsigned int id;
 	bool throttled;
@@ -62,9 +71,13 @@ static struct chip {
 	u8 throttle_reason;
 	cpumask_t mask;
 	struct work_struct throttle;
+	int throttle_turbo;
+	int throttle_sub_turbo;
+	int reason[OCC_MAX_REASON];
 } *chips;
 
 static int nr_chips;
+static DEFINE_PER_CPU(struct chip *, chip_info);
 
 /*
  * Note: The set of pstates consists of contiguous integers, the
@@ -196,6 +209,42 @@ static struct freq_attr *powernv_cpu_freq_attr[] = {
 	NULL,
 };
 
+#define throttle_attr(name, member)					\
+static ssize_t name##_show(struct cpufreq_policy *policy, char *buf)	\
+{									\
+	struct chip *chip = per_cpu(chip_info, policy->cpu);		\
+									\
+	return sprintf(buf, "%u\n", chip->member);			\
+}									\
+									\
+static struct freq_attr throttle_attr_##name = __ATTR_RO(name)		\
+
+throttle_attr(unthrottle, reason[NO_THROTTLE]);
+throttle_attr(powercap, reason[POWERCAP]);
+throttle_attr(overtemp, reason[CPU_OVERTEMP]);
+throttle_attr(supply_fault, reason[POWER_SUPPLY_FAILURE]);
+throttle_attr(overcurrent, reason[OVERCURRENT]);
+throttle_attr(occ_reset, reason[OCC_RESET_THROTTLE]);
+throttle_attr(turbo_stat, throttle_turbo);
+throttle_attr(sub_turbo_stat, throttle_sub_turbo);
+
+static struct attribute *throttle_attrs[] = {
+	&throttle_attr_unthrottle.attr,
+	&throttle_attr_powercap.attr,
+	&throttle_attr_overtemp.attr,
+	&throttle_attr_supply_fault.attr,
+	&throttle_attr_overcurrent.attr,
+	&throttle_attr_occ_reset.attr,
+	&throttle_attr_turbo_stat.attr,
+	&throttle_attr_sub_turbo_stat.attr,
+	NULL,
+};
+
+static const struct attribute_group throttle_attr_grp = {
+	.name	= "throttle_stats",
+	.attrs	= throttle_attrs,
+};
+
 /* Helper routines */
 
 /* Access helpers to power mgt SPR */
@@ -324,34 +373,35 @@ static inline unsigned int get_nominal_index(void)
 
 static void powernv_cpufreq_throttle_check(void *data)
 {
+	struct chip *chip;
 	unsigned int cpu = smp_processor_id();
-	unsigned int chip_id = core_to_chip_map[cpu_core_index_of_thread(cpu)];
 	unsigned long pmsr;
-	int pmsr_pmax, i;
+	int pmsr_pmax;
 
 	pmsr = get_pmspr(SPRN_PMSR);
-
-	for (i = 0; i < nr_chips; i++)
-		if (chips[i].id == chip_id)
-			break;
+	chip = this_cpu_read(chip_info);
 
 	/* Check for Pmax Capping */
 	pmsr_pmax = (s8)PMSR_MAX(pmsr);
 	if (pmsr_pmax != powernv_pstate_info.max) {
-		if (chips[i].throttled)
+		if (chip->throttled)
 			goto next;
-		chips[i].throttled = true;
-		if (pmsr_pmax < powernv_pstate_info.nominal)
+		chip->throttled = true;
+		if (pmsr_pmax < powernv_pstate_info.nominal) {
 			pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n",
-				     cpu, chips[i].id, pmsr_pmax,
+				     cpu, chip->id, pmsr_pmax,
 				     powernv_pstate_info.nominal);
-		trace_powernv_throttle(chips[i].id,
-				      throttle_reason[chips[i].throttle_reason],
+			chip->throttle_sub_turbo++;
+		} else {
+			chip->throttle_turbo++;
+		}
+		trace_powernv_throttle(chip->id,
+				      throttle_reason[chip->throttle_reason],
 				      pmsr_pmax);
-	} else if (chips[i].throttled) {
-		chips[i].throttled = false;
-		trace_powernv_throttle(chips[i].id,
-				      throttle_reason[chips[i].throttle_reason],
+	} else if (chip->throttled) {
+		chip->throttled = false;
+		trace_powernv_throttle(chip->id,
+				      throttle_reason[chip->throttle_reason],
 				      pmsr_pmax);
 	}
 
@@ -411,6 +461,21 @@ static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	for (i = 0; i < threads_per_core; i++)
 		cpumask_set_cpu(base + i, policy->cpus);
 
+	if (!policy->driver_data) {
+		int ret;
+
+		ret = sysfs_create_group(&policy->kobj, &throttle_attr_grp);
+		if (ret) {
+			pr_info("Failed to create throttle stats directory for cpu %d\n",
+				policy->cpu);
+			return ret;
+		}
+		/*
+		 * policy->driver_data is used as a flag for one-time
+		 * creation of throttle sysfs files.
+		 */
+		policy->driver_data = policy;
+	}
 	return cpufreq_table_validate_and_show(policy, powernv_freqs);
 }
 
@@ -517,8 +582,10 @@ static int powernv_cpufreq_occ_msg(struct notifier_block *nb,
 				break;
 
 		if (omsg.throttle_status >= 0 &&
-		    omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS)
+		    omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) {
 			chips[i].throttle_reason = omsg.throttle_status;
+			chips[i].reason[omsg.throttle_status]++;
+		}
 
 		if (!omsg.throttle_status)
 			chips[i].restore = true;
@@ -558,47 +625,34 @@ static int init_chip_info(void)
 	unsigned int chip[256];
 	unsigned int cpu, i;
 	unsigned int prev_chip_id = UINT_MAX;
-	cpumask_t cpu_mask;
-	int ret = -ENOMEM;
-
-	core_to_chip_map = kcalloc(cpu_nr_cores(), sizeof(unsigned int),
-				   GFP_KERNEL);
-	if (!core_to_chip_map)
-		goto out;
 
-	cpumask_copy(&cpu_mask, cpu_possible_mask);
-	for_each_cpu(cpu, &cpu_mask) {
+	for_each_possible_cpu(cpu) {
 		unsigned int id = cpu_to_chip_id(cpu);
 
 		if (prev_chip_id != id) {
 			prev_chip_id = id;
 			chip[nr_chips++] = id;
 		}
-		core_to_chip_map[cpu_core_index_of_thread(cpu)] = id;
-		cpumask_andnot(&cpu_mask, &cpu_mask, cpu_sibling_mask(cpu));
 	}
 
 	chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL);
 	if (!chips)
-		goto free_chip_map;
+		return -ENOMEM;
 
 	for (i = 0; i < nr_chips; i++) {
 		chips[i].id = chip[i];
 		cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i]));
 		INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn);
+		for_each_cpu(cpu, &chips[i].mask)
+			per_cpu(chip_info, cpu) =  &chips[i];
 	}
 
 	return 0;
-free_chip_map:
-	kfree(core_to_chip_map);
-out:
-	return ret;
 }
 
 static inline void clean_chip_info(void)
 {
 	kfree(chips);
-	kfree(core_to_chip_map);
 }
 
 static inline void unregister_all_notifiers(void)
diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c
index 096377232747..46fee1539cc8 100644
--- a/drivers/cpufreq/pxa2xx-cpufreq.c
+++ b/drivers/cpufreq/pxa2xx-cpufreq.c
@@ -319,7 +319,7 @@ static int pxa_set_target(struct cpufreq_policy *policy, unsigned int idx)
 	local_irq_save(flags);
 
 	/* Set new the CCCR and prepare CCLKCFG */
-	CCCR = pxa_freq_settings[idx].cccr;
+	writel(pxa_freq_settings[idx].cccr, CCCR);
 	cclkcfg = pxa_freq_settings[idx].cclkcfg;
 
 	asm volatile("							\n\
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index 051a8a8224cd..a145b319d171 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -576,10 +576,8 @@ static struct cpufreq_driver s5pv210_driver = {
 	.get		= cpufreq_generic_get,
 	.init		= s5pv210_cpu_init,
 	.name		= "s5pv210",
-#ifdef CONFIG_PM
 	.suspend	= cpufreq_generic_suspend,
 	.resume		= cpufreq_generic_suspend, /* We need to set SLEEP FREQ again */
-#endif
 };
 
 static struct notifier_block s5pv210_cpufreq_reboot_notifier = {
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 27fc733cb5b9..03d38c291de6 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -196,7 +196,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
  * of points is below a threshold. If it is... then use the
  * average of these 8 points as the estimated value.
  */
-static void get_typical_interval(struct menu_device *data)
+static unsigned int get_typical_interval(struct menu_device *data)
 {
 	int i, divisor;
 	unsigned int max, thresh, avg;
@@ -253,9 +253,7 @@ again:
 	if (likely(variance <= U64_MAX/36)) {
 		if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3))
 							|| variance <= 400) {
-			if (data->next_timer_us > avg)
-				data->predicted_us = avg;
-			return;
+			return avg;
 		}
 	}
 
@@ -269,7 +267,7 @@ again:
 	 * with sporadic activity with a bunch of short pauses.
 	 */
 	if ((divisor * 4) <= INTERVALS * 3)
-		return;
+		return UINT_MAX;
 
 	thresh = max - 1;
 	goto again;
@@ -286,6 +284,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 	int i;
 	unsigned int interactivity_req;
+	unsigned int expected_interval;
 	unsigned long nr_iowaiters, cpu_load;
 
 	if (data->needs_update) {
@@ -312,32 +311,43 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 					 data->correction_factor[data->bucket],
 					 RESOLUTION * DECAY);
 
-	get_typical_interval(data);
-
-	/*
-	 * Performance multiplier defines a minimum predicted idle
-	 * duration / latency ratio. Adjust the latency limit if
-	 * necessary.
-	 */
-	interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
-	if (latency_req > interactivity_req)
-		latency_req = interactivity_req;
+	expected_interval = get_typical_interval(data);
+	expected_interval = min(expected_interval, data->next_timer_us);
 
 	if (CPUIDLE_DRIVER_STATE_START > 0) {
-		data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
+		struct cpuidle_state *s = &drv->states[CPUIDLE_DRIVER_STATE_START];
+		unsigned int polling_threshold;
+
 		/*
 		 * We want to default to C1 (hlt), not to busy polling
-		 * unless the timer is happening really really soon.
+		 * unless the timer is happening really really soon, or
+		 * C1's exit latency exceeds the user configured limit.
 		 */
-		if (interactivity_req > 20 &&
-		    !drv->states[CPUIDLE_DRIVER_STATE_START].disabled &&
-			dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable == 0)
+		polling_threshold = max_t(unsigned int, 20, s->target_residency);
+		if (data->next_timer_us > polling_threshold &&
+		    latency_req > s->exit_latency && !s->disabled &&
+		    !dev->states_usage[CPUIDLE_DRIVER_STATE_START].disable)
 			data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
+		else
+			data->last_state_idx = CPUIDLE_DRIVER_STATE_START - 1;
 	} else {
 		data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
 	}
 
 	/*
+	 * Use the lowest expected idle interval to pick the idle state.
+	 */
+	data->predicted_us = min(data->predicted_us, expected_interval);
+
+	/*
+	 * Use the performance multiplier and the user-configurable
+	 * latency_req to determine the maximum exit latency.
+	 */
+	interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
+	if (latency_req > interactivity_req)
+		latency_req = interactivity_req;
+
+	/*
 	 * Find the idle state with the lowest power while satisfying
 	 * our constraints.
 	 */
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 336e5b780fcb..4dbc18727235 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -53,7 +53,7 @@ static DEFINE_RWLOCK(ccp_unit_lock);
 static LIST_HEAD(ccp_units);
 
 /* Round-robin counter */
-static DEFINE_RWLOCK(ccp_rr_lock);
+static DEFINE_SPINLOCK(ccp_rr_lock);
 static struct ccp_device *ccp_rr;
 
 /* Ever-increasing value to produce unique unit numbers */
@@ -128,14 +128,14 @@ static struct ccp_device *ccp_get_device(void)
 	 */
 	read_lock_irqsave(&ccp_unit_lock, flags);
 	if (!list_empty(&ccp_units)) {
-		write_lock_irqsave(&ccp_rr_lock, flags);
+		spin_lock(&ccp_rr_lock);
 		dp = ccp_rr;
 		if (list_is_last(&ccp_rr->entry, &ccp_units))
 			ccp_rr = list_first_entry(&ccp_units, struct ccp_device,
 						  entry);
 		else
 			ccp_rr = list_next_entry(ccp_rr, entry);
-		write_unlock_irqrestore(&ccp_rr_lock, flags);
+		spin_unlock(&ccp_rr_lock);
 	}
 	read_unlock_irqrestore(&ccp_unit_lock, flags);
 
diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index c0656e7f37b5..80239ae69527 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c
@@ -420,7 +420,7 @@ static int mv_cesa_probe(struct platform_device *pdev)
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
 	cesa->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(cesa->regs))
-		return -ENOMEM;
+		return PTR_ERR(cesa->regs);
 
 	ret = mv_cesa_dev_dma_init(cesa);
 	if (ret)
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
index bd985e72520b..74071e45ada0 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h
@@ -588,6 +588,7 @@ struct mv_cesa_ahash_dma_req {
 	struct mv_cesa_tdma_req base;
 	u8 *padding;
 	dma_addr_t padding_dma;
+	u8 *cache;
 	dma_addr_t cache_dma;
 };
 
@@ -609,7 +610,7 @@ struct mv_cesa_ahash_req {
 		struct mv_cesa_ahash_std_req std;
 	} req;
 	struct mv_cesa_op_ctx op_tmpl;
-	u8 *cache;
+	u8 cache[CESA_MAX_HASH_BLOCK_SIZE];
 	unsigned int cache_ptr;
 	u64 len;
 	int src_nents;
diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index 683cca9ac3c4..7ca2e0f9dc2e 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -45,69 +45,25 @@ mv_cesa_ahash_req_iter_next_op(struct mv_cesa_ahash_dma_iter *iter)
 	return mv_cesa_req_dma_iter_next_op(&iter->base);
 }
 
-static inline int mv_cesa_ahash_dma_alloc_cache(struct mv_cesa_ahash_req *creq,
-						gfp_t flags)
+static inline int
+mv_cesa_ahash_dma_alloc_cache(struct mv_cesa_ahash_dma_req *req, gfp_t flags)
 {
-	struct mv_cesa_ahash_dma_req *dreq = &creq->req.dma;
-
-	creq->cache = dma_pool_alloc(cesa_dev->dma->cache_pool, flags,
-				     &dreq->cache_dma);
-	if (!creq->cache)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static inline int mv_cesa_ahash_std_alloc_cache(struct mv_cesa_ahash_req *creq,
-						gfp_t flags)
-{
-	creq->cache = kzalloc(CESA_MAX_HASH_BLOCK_SIZE, flags);
-	if (!creq->cache)
+	req->cache = dma_pool_alloc(cesa_dev->dma->cache_pool, flags,
+				    &req->cache_dma);
+	if (!req->cache)
 		return -ENOMEM;
 
 	return 0;
 }
 
-static int mv_cesa_ahash_alloc_cache(struct ahash_request *req)
-{
-	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
-		      GFP_KERNEL : GFP_ATOMIC;
-	int ret;
-
-	if (creq->cache)
-		return 0;
-
-	if (creq->req.base.type == CESA_DMA_REQ)
-		ret = mv_cesa_ahash_dma_alloc_cache(creq, flags);
-	else
-		ret = mv_cesa_ahash_std_alloc_cache(creq, flags);
-
-	return ret;
-}
-
-static inline void mv_cesa_ahash_dma_free_cache(struct mv_cesa_ahash_req *creq)
-{
-	dma_pool_free(cesa_dev->dma->cache_pool, creq->cache,
-		      creq->req.dma.cache_dma);
-}
-
-static inline void mv_cesa_ahash_std_free_cache(struct mv_cesa_ahash_req *creq)
-{
-	kfree(creq->cache);
-}
-
-static void mv_cesa_ahash_free_cache(struct mv_cesa_ahash_req *creq)
+static inline void
+mv_cesa_ahash_dma_free_cache(struct mv_cesa_ahash_dma_req *req)
 {
-	if (!creq->cache)
+	if (!req->cache)
 		return;
 
-	if (creq->req.base.type == CESA_DMA_REQ)
-		mv_cesa_ahash_dma_free_cache(creq);
-	else
-		mv_cesa_ahash_std_free_cache(creq);
-
-	creq->cache = NULL;
+	dma_pool_free(cesa_dev->dma->cache_pool, req->cache,
+		      req->cache_dma);
 }
 
 static int mv_cesa_ahash_dma_alloc_padding(struct mv_cesa_ahash_dma_req *req,
@@ -146,6 +102,7 @@ static inline void mv_cesa_ahash_dma_cleanup(struct ahash_request *req)
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 
 	dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE);
+	mv_cesa_ahash_dma_free_cache(&creq->req.dma);
 	mv_cesa_dma_cleanup(&creq->req.dma.base);
 }
 
@@ -161,8 +118,6 @@ static void mv_cesa_ahash_last_cleanup(struct ahash_request *req)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 
-	mv_cesa_ahash_free_cache(creq);
-
 	if (creq->req.base.type == CESA_DMA_REQ)
 		mv_cesa_ahash_dma_last_cleanup(req);
 }
@@ -445,14 +400,6 @@ static inline int mv_cesa_ahash_cra_init(struct crypto_tfm *tfm)
 static int mv_cesa_ahash_cache_req(struct ahash_request *req, bool *cached)
 {
 	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
-	int ret;
-
-	if (((creq->cache_ptr + req->nbytes) & CESA_HASH_BLOCK_SIZE_MSK) &&
-	    !creq->last_req) {
-		ret = mv_cesa_ahash_alloc_cache(req);
-		if (ret)
-			return ret;
-	}
 
 	if (creq->cache_ptr + req->nbytes < 64 && !creq->last_req) {
 		*cached = true;
@@ -505,10 +452,17 @@ mv_cesa_ahash_dma_add_cache(struct mv_cesa_tdma_chain *chain,
 			    gfp_t flags)
 {
 	struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma;
+	int ret;
 
 	if (!creq->cache_ptr)
 		return 0;
 
+	ret = mv_cesa_ahash_dma_alloc_cache(ahashdreq, flags);
+	if (ret)
+		return ret;
+
+	memcpy(ahashdreq->cache, creq->cache, creq->cache_ptr);
+
 	return mv_cesa_dma_add_data_transfer(chain,
 					     CESA_SA_DATA_SRAM_OFFSET,
 					     ahashdreq->cache_dma,
@@ -848,10 +802,6 @@ static int mv_cesa_ahash_import(struct ahash_request *req, const void *hash,
 	if (!cache_ptr)
 		return 0;
 
-	ret = mv_cesa_ahash_alloc_cache(req);
-	if (ret)
-		return ret;
-
 	memcpy(creq->cache, cache, cache_ptr);
 	creq->cache_ptr = cache_ptr;
 
@@ -860,9 +810,14 @@ static int mv_cesa_ahash_import(struct ahash_request *req, const void *hash,
 
 static int mv_cesa_md5_init(struct ahash_request *req)
 {
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_op_ctx tmpl = { };
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5);
+	creq->state[0] = MD5_H0;
+	creq->state[1] = MD5_H1;
+	creq->state[2] = MD5_H2;
+	creq->state[3] = MD5_H3;
 
 	mv_cesa_ahash_init(req, &tmpl, true);
 
@@ -923,9 +878,15 @@ struct ahash_alg mv_md5_alg = {
 
 static int mv_cesa_sha1_init(struct ahash_request *req)
 {
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_op_ctx tmpl = { };
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA1);
+	creq->state[0] = SHA1_H0;
+	creq->state[1] = SHA1_H1;
+	creq->state[2] = SHA1_H2;
+	creq->state[3] = SHA1_H3;
+	creq->state[4] = SHA1_H4;
 
 	mv_cesa_ahash_init(req, &tmpl, false);
 
@@ -986,9 +947,18 @@ struct ahash_alg mv_sha1_alg = {
 
 static int mv_cesa_sha256_init(struct ahash_request *req)
 {
+	struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
 	struct mv_cesa_op_ctx tmpl = { };
 
 	mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA256);
+	creq->state[0] = SHA256_H0;
+	creq->state[1] = SHA256_H1;
+	creq->state[2] = SHA256_H2;
+	creq->state[3] = SHA256_H3;
+	creq->state[4] = SHA256_H4;
+	creq->state[5] = SHA256_H5;
+	creq->state[6] = SHA256_H6;
+	creq->state[7] = SHA256_H7;
 
 	mv_cesa_ahash_init(req, &tmpl, false);
 
diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 64281bb2f650..4de78c552251 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -61,7 +61,7 @@ config DEVFREQ_GOV_USERSPACE
 	  Sets the frequency at the user specified one.
 	  This governor returns the user configured frequency if there
 	  has been an input to /sys/devices/.../power/devfreq_set_freq.
-	  Otherwise, the governor does not change the frequnecy
+	  Otherwise, the governor does not change the frequency
 	  given at the initialization.
 
 comment "DEVFREQ Drivers"
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 155c1464948e..4a2c07ee6677 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -34,6 +34,8 @@
 #include <linux/poll.h>
 #include <linux/reservation.h>
 
+#include <uapi/linux/dma-buf.h>
+
 static inline int is_dma_buf_file(struct file *);
 
 struct dma_buf_list {
@@ -251,11 +253,55 @@ out:
 	return events;
 }
 
+static long dma_buf_ioctl(struct file *file,
+			  unsigned int cmd, unsigned long arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_sync sync;
+	enum dma_data_direction direction;
+	int ret;
+
+	dmabuf = file->private_data;
+
+	switch (cmd) {
+	case DMA_BUF_IOCTL_SYNC:
+		if (copy_from_user(&sync, (void __user *) arg, sizeof(sync)))
+			return -EFAULT;
+
+		if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK)
+			return -EINVAL;
+
+		switch (sync.flags & DMA_BUF_SYNC_RW) {
+		case DMA_BUF_SYNC_READ:
+			direction = DMA_FROM_DEVICE;
+			break;
+		case DMA_BUF_SYNC_WRITE:
+			direction = DMA_TO_DEVICE;
+			break;
+		case DMA_BUF_SYNC_RW:
+			direction = DMA_BIDIRECTIONAL;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (sync.flags & DMA_BUF_SYNC_END)
+			ret = dma_buf_end_cpu_access(dmabuf, direction);
+		else
+			ret = dma_buf_begin_cpu_access(dmabuf, direction);
+
+		return ret;
+	default:
+		return -ENOTTY;
+	}
+}
+
 static const struct file_operations dma_buf_fops = {
 	.release	= dma_buf_release,
 	.mmap		= dma_buf_mmap_internal,
 	.llseek		= dma_buf_llseek,
 	.poll		= dma_buf_poll,
+	.unlocked_ioctl	= dma_buf_ioctl,
 };
 
 /*
@@ -539,13 +585,11 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
  * preparations. Coherency is only guaranteed in the specified range for the
  * specified access direction.
  * @dmabuf:	[in]	buffer to prepare cpu access for.
- * @start:	[in]	start of range for cpu access.
- * @len:	[in]	length of range for cpu access.
  * @direction:	[in]	length of range for cpu access.
  *
  * Can return negative error values, returns 0 on success.
  */
-int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
+int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
 			     enum dma_data_direction direction)
 {
 	int ret = 0;
@@ -554,8 +598,7 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
 		return -EINVAL;
 
 	if (dmabuf->ops->begin_cpu_access)
-		ret = dmabuf->ops->begin_cpu_access(dmabuf, start,
-							len, direction);
+		ret = dmabuf->ops->begin_cpu_access(dmabuf, direction);
 
 	return ret;
 }
@@ -567,19 +610,21 @@ EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);
  * actions. Coherency is only guaranteed in the specified range for the
  * specified access direction.
  * @dmabuf:	[in]	buffer to complete cpu access for.
- * @start:	[in]	start of range for cpu access.
- * @len:	[in]	length of range for cpu access.
  * @direction:	[in]	length of range for cpu access.
  *
- * This call must always succeed.
+ * Can return negative error values, returns 0 on success.
  */
-void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
-			    enum dma_data_direction direction)
+int dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+			   enum dma_data_direction direction)
 {
+	int ret = 0;
+
 	WARN_ON(!dmabuf);
 
 	if (dmabuf->ops->end_cpu_access)
-		dmabuf->ops->end_cpu_access(dmabuf, start, len, direction);
+		ret = dmabuf->ops->end_cpu_access(dmabuf, direction);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
 
diff --git a/drivers/dma/idma64.h b/drivers/dma/idma64.h
index dc6874424188..6b816878e5e7 100644
--- a/drivers/dma/idma64.h
+++ b/drivers/dma/idma64.h
@@ -16,7 +16,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
-#include <asm-generic/io-64-nonatomic-lo-hi.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 #include "virt-dma.h"
 
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index debca824bed6..77c1c44009d8 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -122,6 +122,7 @@ struct pxad_chan {
 struct pxad_device {
 	struct dma_device		slave;
 	int				nr_chans;
+	int				nr_requestors;
 	void __iomem			*base;
 	struct pxad_phy			*phys;
 	spinlock_t			phy_lock;	/* Phy association */
@@ -473,7 +474,7 @@ static void pxad_free_phy(struct pxad_chan *chan)
 		return;
 
 	/* clear the channel mapping in DRCMR */
-	if (chan->drcmr <= DRCMR_CHLNUM) {
+	if (chan->drcmr <= pdev->nr_requestors) {
 		reg = pxad_drcmr(chan->drcmr);
 		writel_relaxed(0, chan->phy->base + reg);
 	}
@@ -509,6 +510,7 @@ static bool is_running_chan_misaligned(struct pxad_chan *chan)
 
 static void phy_enable(struct pxad_phy *phy, bool misaligned)
 {
+	struct pxad_device *pdev;
 	u32 reg, dalgn;
 
 	if (!phy->vchan)
@@ -518,7 +520,8 @@ static void phy_enable(struct pxad_phy *phy, bool misaligned)
 		"%s(); phy=%p(%d) misaligned=%d\n", __func__,
 		phy, phy->idx, misaligned);
 
-	if (phy->vchan->drcmr <= DRCMR_CHLNUM) {
+	pdev = to_pxad_dev(phy->vchan->vc.chan.device);
+	if (phy->vchan->drcmr <= pdev->nr_requestors) {
 		reg = pxad_drcmr(phy->vchan->drcmr);
 		writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg);
 	}
@@ -914,6 +917,7 @@ static void pxad_get_config(struct pxad_chan *chan,
 {
 	u32 maxburst = 0, dev_addr = 0;
 	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
 
 	*dcmd = 0;
 	if (dir == DMA_DEV_TO_MEM) {
@@ -922,7 +926,7 @@ static void pxad_get_config(struct pxad_chan *chan,
 		dev_addr = chan->cfg.src_addr;
 		*dev_src = dev_addr;
 		*dcmd |= PXA_DCMD_INCTRGADDR;
-		if (chan->drcmr <= DRCMR_CHLNUM)
+		if (chan->drcmr <= pdev->nr_requestors)
 			*dcmd |= PXA_DCMD_FLOWSRC;
 	}
 	if (dir == DMA_MEM_TO_DEV) {
@@ -931,7 +935,7 @@ static void pxad_get_config(struct pxad_chan *chan,
 		dev_addr = chan->cfg.dst_addr;
 		*dev_dst = dev_addr;
 		*dcmd |= PXA_DCMD_INCSRCADDR;
-		if (chan->drcmr <= DRCMR_CHLNUM)
+		if (chan->drcmr <= pdev->nr_requestors)
 			*dcmd |= PXA_DCMD_FLOWTRG;
 	}
 	if (dir == DMA_MEM_TO_MEM)
@@ -1341,13 +1345,15 @@ static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec,
 
 static int pxad_init_dmadev(struct platform_device *op,
 			    struct pxad_device *pdev,
-			    unsigned int nr_phy_chans)
+			    unsigned int nr_phy_chans,
+			    unsigned int nr_requestors)
 {
 	int ret;
 	unsigned int i;
 	struct pxad_chan *c;
 
 	pdev->nr_chans = nr_phy_chans;
+	pdev->nr_requestors = nr_requestors;
 	INIT_LIST_HEAD(&pdev->slave.channels);
 	pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources;
 	pdev->slave.device_free_chan_resources = pxad_free_chan_resources;
@@ -1382,7 +1388,7 @@ static int pxad_probe(struct platform_device *op)
 	const struct of_device_id *of_id;
 	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
 	struct resource *iores;
-	int ret, dma_channels = 0;
+	int ret, dma_channels = 0, nb_requestors = 0;
 	const enum dma_slave_buswidth widths =
 		DMA_SLAVE_BUSWIDTH_1_BYTE   | DMA_SLAVE_BUSWIDTH_2_BYTES |
 		DMA_SLAVE_BUSWIDTH_4_BYTES;
@@ -1399,13 +1405,23 @@ static int pxad_probe(struct platform_device *op)
 		return PTR_ERR(pdev->base);
 
 	of_id = of_match_device(pxad_dt_ids, &op->dev);
-	if (of_id)
+	if (of_id) {
 		of_property_read_u32(op->dev.of_node, "#dma-channels",
 				     &dma_channels);
-	else if (pdata && pdata->dma_channels)
+		ret = of_property_read_u32(op->dev.of_node, "#dma-requests",
+					   &nb_requestors);
+		if (ret) {
+			dev_warn(pdev->slave.dev,
+				 "#dma-requests set to default 32 as missing in OF: %d",
+				 ret);
+			nb_requestors = 32;
+		};
+	} else if (pdata && pdata->dma_channels) {
 		dma_channels = pdata->dma_channels;
-	else
+		nb_requestors = pdata->nb_requestors;
+	} else {
 		dma_channels = 32;	/* default 32 channel */
+	}
 
 	dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask);
 	dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask);
@@ -1423,7 +1439,7 @@ static int pxad_probe(struct platform_device *op)
 	pdev->slave.descriptor_reuse = true;
 
 	pdev->slave.dev = &op->dev;
-	ret = pxad_init_dmadev(op, pdev, dma_channels);
+	ret = pxad_init_dmadev(op, pdev, dma_channels, nb_requestors);
 	if (ret) {
 		dev_err(pdev->slave.dev, "unable to register\n");
 		return ret;
@@ -1442,7 +1458,8 @@ static int pxad_probe(struct platform_device *op)
 
 	platform_set_drvdata(op, pdev);
 	pxad_init_debugfs(pdev);
-	dev_info(pdev->slave.dev, "initialized %d channels\n", dma_channels);
+	dev_info(pdev->slave.dev, "initialized %d channels on %d requestors\n",
+		 dma_channels, nb_requestors);
 	return 0;
 }
 
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 36a7c2d89a01..aee149bdf4c0 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -221,7 +221,7 @@ struct inbound_phy_packet_event {
 #ifdef CONFIG_COMPAT
 static void __user *u64_to_uptr(u64 value)
 {
-	if (is_compat_task())
+	if (in_compat_syscall())
 		return compat_ptr(value);
 	else
 		return (void __user *)(unsigned long)value;
@@ -229,7 +229,7 @@ static void __user *u64_to_uptr(u64 value)
 
 static u64 uptr_to_u64(void __user *ptr)
 {
-	if (is_compat_task())
+	if (in_compat_syscall())
 		return ptr_to_compat(ptr);
 	else
 		return (u64)(unsigned long)ptr;
diff --git a/drivers/firewire/nosy.c b/drivers/firewire/nosy.c
index 8a46077129ac..631c977b0da5 100644
--- a/drivers/firewire/nosy.c
+++ b/drivers/firewire/nosy.c
@@ -446,14 +446,16 @@ static void
 bus_reset_irq_handler(struct pcilynx *lynx)
 {
 	struct client *client;
-	struct timeval tv;
+	struct timespec64 ts64;
+	u32    timestamp;
 
-	do_gettimeofday(&tv);
+	ktime_get_real_ts64(&ts64);
+	timestamp = ts64.tv_nsec / NSEC_PER_USEC;
 
 	spin_lock(&lynx->client_list_lock);
 
 	list_for_each_entry(client, &lynx->client_list, link)
-		packet_buffer_put(&client->buffer, &tv.tv_usec, 4);
+		packet_buffer_put(&client->buffer, &timestamp, 4);
 
 	spin_unlock(&lynx->client_list_lock);
 }
diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
index 6174db80c663..7e3e595c9f30 100644
--- a/drivers/firmware/arm_scpi.c
+++ b/drivers/firmware/arm_scpi.c
@@ -80,7 +80,7 @@
 #define FW_REV_MINOR(x)		(((x) & FW_REV_MINOR_MASK) >> FW_REV_MINOR_BITS)
 #define FW_REV_PATCH(x)		((x) & FW_REV_PATCH_MASK)
 
-#define MAX_RX_TIMEOUT		(msecs_to_jiffies(20))
+#define MAX_RX_TIMEOUT		(msecs_to_jiffies(30))
 
 enum scpi_error_codes {
 	SCPI_SUCCESS = 0, /* Success */
@@ -231,7 +231,8 @@ struct _scpi_sensor_info {
 };
 
 struct sensor_value {
-	__le32 val;
+	__le32 lo_val;
+	__le32 hi_val;
 } __packed;
 
 static struct scpi_drvinfo *scpi_info;
@@ -373,7 +374,7 @@ static int scpi_send_message(u8 cmd, void *tx_buf, unsigned int tx_len,
 		ret = -ETIMEDOUT;
 	else
 		/* first status word */
-		ret = le32_to_cpu(msg->status);
+		ret = msg->status;
 out:
 	if (ret < 0 && rx_buf) /* remove entry from the list if timed-out */
 		scpi_process_cmd(scpi_chan, msg->cmd);
@@ -525,15 +526,17 @@ static int scpi_sensor_get_info(u16 sensor_id, struct scpi_sensor_info *info)
 	return ret;
 }
 
-int scpi_sensor_get_value(u16 sensor, u32 *val)
+int scpi_sensor_get_value(u16 sensor, u64 *val)
 {
+	__le16 id = cpu_to_le16(sensor);
 	struct sensor_value buf;
 	int ret;
 
-	ret = scpi_send_message(SCPI_CMD_SENSOR_VALUE, &sensor, sizeof(sensor),
+	ret = scpi_send_message(SCPI_CMD_SENSOR_VALUE, &id, sizeof(id),
 				&buf, sizeof(buf));
 	if (!ret)
-		*val = le32_to_cpu(buf.val);
+		*val = (u64)le32_to_cpu(buf.hi_val) << 32 |
+			le32_to_cpu(buf.lo_val);
 
 	return ret;
 }
@@ -699,7 +702,7 @@ static int scpi_probe(struct platform_device *pdev)
 		cl->rx_callback = scpi_handle_remote_msg;
 		cl->tx_prepare = scpi_tx_prepare;
 		cl->tx_block = true;
-		cl->tx_tout = 50;
+		cl->tx_tout = 20;
 		cl->knows_txdone = false; /* controller can't ack */
 
 		INIT_LIST_HEAD(&pchan->rx_pending);
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index 9e15d571b53c..aa1f743152a2 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -61,8 +61,8 @@ static int __init uefi_init(void)
 	char vendor[100] = "unknown";
 	int i, retval;
 
-	efi.systab = early_memremap(efi_system_table,
-				    sizeof(efi_system_table_t));
+	efi.systab = early_memremap_ro(efi_system_table,
+				       sizeof(efi_system_table_t));
 	if (efi.systab == NULL) {
 		pr_warn("Unable to map EFI system table.\n");
 		return -ENOMEM;
@@ -86,8 +86,8 @@ static int __init uefi_init(void)
 			efi.systab->hdr.revision & 0xffff);
 
 	/* Show what we know for posterity */
-	c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor),
-			     sizeof(vendor) * sizeof(efi_char16_t));
+	c16 = early_memremap_ro(efi_to_phys(efi.systab->fw_vendor),
+				sizeof(vendor) * sizeof(efi_char16_t));
 	if (c16) {
 		for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
 			vendor[i] = c16[i];
@@ -100,8 +100,8 @@ static int __init uefi_init(void)
 		efi.systab->hdr.revision & 0xffff, vendor);
 
 	table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables;
-	config_tables = early_memremap(efi_to_phys(efi.systab->tables),
-				       table_size);
+	config_tables = early_memremap_ro(efi_to_phys(efi.systab->tables),
+					  table_size);
 	if (config_tables == NULL) {
 		pr_warn("Unable to map EFI config table array.\n");
 		retval = -ENOMEM;
@@ -185,7 +185,7 @@ void __init efi_init(void)
 	efi_system_table = params.system_table;
 
 	memmap.phys_map = params.mmap;
-	memmap.map = early_memremap(params.mmap, params.mmap_size);
+	memmap.map = early_memremap_ro(params.mmap, params.mmap_size);
 	if (memmap.map == NULL) {
 		/*
 		* If we are booting via UEFI, the UEFI memory map is the only
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 2cd37dad67a6..3a69ed5ecfcb 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -182,6 +182,7 @@ static int generic_ops_register(void)
 {
 	generic_ops.get_variable = efi.get_variable;
 	generic_ops.set_variable = efi.set_variable;
+	generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking;
 	generic_ops.get_next_variable = efi.get_next_variable;
 	generic_ops.query_variable_store = efi_query_variable_store;
 
@@ -326,38 +327,6 @@ u64 __init efi_mem_desc_end(efi_memory_desc_t *md)
 	return end;
 }
 
-/*
- * We can't ioremap data in EFI boot services RAM, because we've already mapped
- * it as RAM.  So, look it up in the existing EFI memory map instead.  Only
- * callable after efi_enter_virtual_mode and before efi_free_boot_services.
- */
-void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
-{
-	struct efi_memory_map *map;
-	void *p;
-	map = efi.memmap;
-	if (!map)
-		return NULL;
-	if (WARN_ON(!map->map))
-		return NULL;
-	for (p = map->map; p < map->map_end; p += map->desc_size) {
-		efi_memory_desc_t *md = p;
-		u64 size = md->num_pages << EFI_PAGE_SHIFT;
-		u64 end = md->phys_addr + size;
-		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
-		    md->type != EFI_BOOT_SERVICES_CODE &&
-		    md->type != EFI_BOOT_SERVICES_DATA)
-			continue;
-		if (!md->virt_addr)
-			continue;
-		if (phys_addr >= md->phys_addr && phys_addr < end) {
-			phys_addr += md->virt_addr - md->phys_addr;
-			return (__force void __iomem *)(unsigned long)phys_addr;
-		}
-	}
-	return NULL;
-}
-
 static __initdata efi_config_table_type_t common_tables[] = {
 	{ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20},
 	{ACPI_TABLE_GUID, "ACPI", &efi.acpi},
@@ -586,7 +555,8 @@ static __initdata char memory_type_name[][20] = {
 	"ACPI Memory NVS",
 	"Memory Mapped I/O",
 	"MMIO Port Space",
-	"PAL Code"
+	"PAL Code",
+	"Persistent Memory",
 };
 
 char * __init efi_md_typeattr_format(char *buf, size_t size,
@@ -613,13 +583,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t size,
 	if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
 		     EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
 		     EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
+		     EFI_MEMORY_NV |
 		     EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE))
 		snprintf(pos, size, "|attr=0x%016llx]",
 			 (unsigned long long)attr);
 	else
-		snprintf(pos, size, "|%3s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+		snprintf(pos, size,
+			 "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
 			 attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
 			 attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "",
+			 attr & EFI_MEMORY_NV      ? "NV"  : "",
 			 attr & EFI_MEMORY_XP      ? "XP"  : "",
 			 attr & EFI_MEMORY_RP      ? "RP"  : "",
 			 attr & EFI_MEMORY_WP      ? "WP"  : "",
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 10e6774ab2a2..096adcbcb5a9 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -231,7 +231,7 @@ sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor,
 
 static inline bool is_compat(void)
 {
-	if (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())
+	if (IS_ENABLED(CONFIG_COMPAT) && in_compat_syscall())
 		return true;
 
 	return false;
@@ -386,7 +386,7 @@ static const struct sysfs_ops efivar_attr_ops = {
 
 static void efivar_release(struct kobject *kobj)
 {
-	struct efivar_entry *var = container_of(kobj, struct efivar_entry, kobj);
+	struct efivar_entry *var = to_efivar_entry(kobj);
 	kfree(var);
 }
 
diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c
index 22c5285f7705..75feb3f5829b 100644
--- a/drivers/firmware/efi/esrt.c
+++ b/drivers/firmware/efi/esrt.c
@@ -167,14 +167,11 @@ static struct kset *esrt_kset;
 static int esre_create_sysfs_entry(void *esre, int entry_num)
 {
 	struct esre_entry *entry;
-	char name[20];
 
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
 		return -ENOMEM;
 
-	sprintf(name, "entry%d", entry_num);
-
 	entry->kobj.kset = esrt_kset;
 
 	if (esrt->fw_resource_version == 1) {
@@ -182,7 +179,7 @@ static int esre_create_sysfs_entry(void *esre, int entry_num)
 
 		entry->esre.esre1 = esre;
 		rc = kobject_init_and_add(&entry->kobj, &esre1_ktype, NULL,
-					  "%s", name);
+					  "entry%d", entry_num);
 		if (rc) {
 			kfree(entry);
 			return rc;
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index ad077944aa0e..da99bbb74aeb 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -23,6 +23,10 @@ KBUILD_CFLAGS			:= $(cflags-y) -DDISABLE_BRANCH_PROFILING \
 GCOV_PROFILE			:= n
 KASAN_SANITIZE			:= n
 UBSAN_SANITIZE			:= n
+OBJECT_FILES_NON_STANDARD	:= y
+
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT			:= n
 
 lib-y				:= efi-stub-helper.o
 
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 4deb3e7faa0e..414deb85c2e5 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -192,6 +192,10 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 
 	pr_efi(sys_table, "Booting Linux Kernel...\n");
 
+	status = check_platform_features(sys_table);
+	if (status != EFI_SUCCESS)
+		goto fail;
+
 	/*
 	 * Get a handle to the loaded image protocol.  This is used to get
 	 * information about the running image, such as size and the command
diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c
index 495ebd657e38..6f42be4d0084 100644
--- a/drivers/firmware/efi/libstub/arm32-stub.c
+++ b/drivers/firmware/efi/libstub/arm32-stub.c
@@ -9,6 +9,23 @@
 #include <linux/efi.h>
 #include <asm/efi.h>
 
+efi_status_t check_platform_features(efi_system_table_t *sys_table_arg)
+{
+	int block;
+
+	/* non-LPAE kernels can run anywhere */
+	if (!IS_ENABLED(CONFIG_ARM_LPAE))
+		return EFI_SUCCESS;
+
+	/* LPAE kernels need compatible hardware */
+	block = cpuid_feature_extract(CPUID_EXT_MMFR0, 0);
+	if (block < 5) {
+		pr_efi_err(sys_table_arg, "This LPAE kernel is not supported by your CPU\n");
+		return EFI_UNSUPPORTED;
+	}
+	return EFI_SUCCESS;
+}
+
 efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
 				 unsigned long *image_addr,
 				 unsigned long *image_size,
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index e0e6b74fef8f..a90f6459f5c6 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -12,18 +12,38 @@
 #include <linux/efi.h>
 #include <asm/efi.h>
 #include <asm/sections.h>
+#include <asm/sysreg.h>
 
 #include "efistub.h"
 
 extern bool __nokaslr;
 
-efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
-					unsigned long *image_addr,
-					unsigned long *image_size,
-					unsigned long *reserve_addr,
-					unsigned long *reserve_size,
-					unsigned long dram_base,
-					efi_loaded_image_t *image)
+efi_status_t check_platform_features(efi_system_table_t *sys_table_arg)
+{
+	u64 tg;
+
+	/* UEFI mandates support for 4 KB granularity, no need to check */
+	if (IS_ENABLED(CONFIG_ARM64_4K_PAGES))
+		return EFI_SUCCESS;
+
+	tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf;
+	if (tg != ID_AA64MMFR0_TGRAN_SUPPORTED) {
+		if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
+			pr_efi_err(sys_table_arg, "This 64 KB granular kernel is not supported by your CPU\n");
+		else
+			pr_efi_err(sys_table_arg, "This 16 KB granular kernel is not supported by your CPU\n");
+		return EFI_UNSUPPORTED;
+	}
+	return EFI_SUCCESS;
+}
+
+efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg,
+				 unsigned long *image_addr,
+				 unsigned long *image_size,
+				 unsigned long *reserve_addr,
+				 unsigned long *reserve_size,
+				 unsigned long dram_base,
+				 efi_loaded_image_t *image)
 {
 	efi_status_t status;
 	unsigned long kernel_size, kernel_memsize = 0;
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 5ed3d3f38166..ee49cd23ee63 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -5,6 +5,16 @@
 /* error code which can't be mistaken for valid address */
 #define EFI_ERROR	(~0UL)
 
+/*
+ * __init annotations should not be used in the EFI stub, since the code is
+ * either included in the decompressor (x86, ARM) where they have no effect,
+ * or the whole stub is __init annotated at the section level (arm64), by
+ * renaming the sections, in which case the __init annotation will be
+ * redundant, and will result in section names like .init.init.text, and our
+ * linker script does not expect that.
+ */
+#undef __init
+
 void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
 
 efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image,
@@ -50,4 +60,6 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
 			      unsigned long size, unsigned long align,
 			      unsigned long *addr, unsigned long random_seed);
 
+efi_status_t check_platform_features(efi_system_table_t *sys_table_arg);
+
 #endif
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 228bbf910461..de6953039af6 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -61,63 +61,23 @@
  */
 static DEFINE_SPINLOCK(efi_runtime_lock);
 
-/*
- * Some runtime services calls can be reentrant under NMI, even if the table
- * above says they are not. (source: UEFI Specification v2.4A)
- *
- * Table 32. Functions that may be called after Machine Check, INIT and NMI
- * +----------------------------+------------------------------------------+
- * | Function			| Called after Machine Check, INIT and NMI |
- * +----------------------------+------------------------------------------+
- * | GetTime()			| Yes, even if previously busy.		   |
- * | GetVariable()		| Yes, even if previously busy		   |
- * | GetNextVariableName()	| Yes, even if previously busy		   |
- * | QueryVariableInfo()	| Yes, even if previously busy		   |
- * | SetVariable()		| Yes, even if previously busy		   |
- * | UpdateCapsule()		| Yes, even if previously busy		   |
- * | QueryCapsuleCapabilities()	| Yes, even if previously busy		   |
- * | ResetSystem()		| Yes, even if previously busy		   |
- * +----------------------------+------------------------------------------+
- *
- * In order to prevent deadlocks under NMI, the wrappers for these functions
- * may only grab the efi_runtime_lock or rtc_lock spinlocks if !efi_in_nmi().
- * However, not all of the services listed are reachable through NMI code paths,
- * so the the special handling as suggested by the UEFI spec is only implemented
- * for QueryVariableInfo() and SetVariable(), as these can be reached in NMI
- * context through efi_pstore_write().
- */
-
-/*
- * As per commit ef68c8f87ed1 ("x86: Serialize EFI time accesses on rtc_lock"),
- * the EFI specification requires that callers of the time related runtime
- * functions serialize with other CMOS accesses in the kernel, as the EFI time
- * functions may choose to also use the legacy CMOS RTC.
- */
-__weak DEFINE_SPINLOCK(rtc_lock);
-
 static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 {
-	unsigned long flags;
 	efi_status_t status;
 
-	spin_lock_irqsave(&rtc_lock, flags);
 	spin_lock(&efi_runtime_lock);
 	status = efi_call_virt(get_time, tm, tc);
 	spin_unlock(&efi_runtime_lock);
-	spin_unlock_irqrestore(&rtc_lock, flags);
 	return status;
 }
 
 static efi_status_t virt_efi_set_time(efi_time_t *tm)
 {
-	unsigned long flags;
 	efi_status_t status;
 
-	spin_lock_irqsave(&rtc_lock, flags);
 	spin_lock(&efi_runtime_lock);
 	status = efi_call_virt(set_time, tm);
 	spin_unlock(&efi_runtime_lock);
-	spin_unlock_irqrestore(&rtc_lock, flags);
 	return status;
 }
 
@@ -125,27 +85,21 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
 					     efi_bool_t *pending,
 					     efi_time_t *tm)
 {
-	unsigned long flags;
 	efi_status_t status;
 
-	spin_lock_irqsave(&rtc_lock, flags);
 	spin_lock(&efi_runtime_lock);
 	status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
 	spin_unlock(&efi_runtime_lock);
-	spin_unlock_irqrestore(&rtc_lock, flags);
 	return status;
 }
 
 static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
 {
-	unsigned long flags;
 	efi_status_t status;
 
-	spin_lock_irqsave(&rtc_lock, flags);
 	spin_lock(&efi_runtime_lock);
 	status = efi_call_virt(set_wakeup_time, enabled, tm);
 	spin_unlock(&efi_runtime_lock);
-	spin_unlock_irqrestore(&rtc_lock, flags);
 	return status;
 }
 
@@ -155,13 +109,12 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name,
 					  unsigned long *data_size,
 					  void *data)
 {
-	unsigned long flags;
 	efi_status_t status;
 
-	spin_lock_irqsave(&efi_runtime_lock, flags);
+	spin_lock(&efi_runtime_lock);
 	status = efi_call_virt(get_variable, name, vendor, attr, data_size,
 			       data);
-	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	spin_unlock(&efi_runtime_lock);
 	return status;
 }
 
@@ -169,12 +122,11 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
 					       efi_char16_t *name,
 					       efi_guid_t *vendor)
 {
-	unsigned long flags;
 	efi_status_t status;
 
-	spin_lock_irqsave(&efi_runtime_lock, flags);
+	spin_lock(&efi_runtime_lock);
 	status = efi_call_virt(get_next_variable, name_size, name, vendor);
-	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	spin_unlock(&efi_runtime_lock);
 	return status;
 }
 
@@ -184,13 +136,12 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
 					  unsigned long data_size,
 					  void *data)
 {
-	unsigned long flags;
 	efi_status_t status;
 
-	spin_lock_irqsave(&efi_runtime_lock, flags);
+	spin_lock(&efi_runtime_lock);
 	status = efi_call_virt(set_variable, name, vendor, attr, data_size,
 			       data);
-	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	spin_unlock(&efi_runtime_lock);
 	return status;
 }
 
@@ -199,15 +150,14 @@ virt_efi_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
 				  u32 attr, unsigned long data_size,
 				  void *data)
 {
-	unsigned long flags;
 	efi_status_t status;
 
-	if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
+	if (!spin_trylock(&efi_runtime_lock))
 		return EFI_NOT_READY;
 
 	status = efi_call_virt(set_variable, name, vendor, attr, data_size,
 			       data);
-	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	spin_unlock(&efi_runtime_lock);
 	return status;
 }
 
@@ -217,27 +167,45 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
 						 u64 *remaining_space,
 						 u64 *max_variable_size)
 {
-	unsigned long flags;
 	efi_status_t status;
 
 	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
 		return EFI_UNSUPPORTED;
 
-	spin_lock_irqsave(&efi_runtime_lock, flags);
+	spin_lock(&efi_runtime_lock);
 	status = efi_call_virt(query_variable_info, attr, storage_space,
 			       remaining_space, max_variable_size);
-	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	spin_unlock(&efi_runtime_lock);
+	return status;
+}
+
+static efi_status_t
+virt_efi_query_variable_info_nonblocking(u32 attr,
+					 u64 *storage_space,
+					 u64 *remaining_space,
+					 u64 *max_variable_size)
+{
+	efi_status_t status;
+
+	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
+		return EFI_UNSUPPORTED;
+
+	if (!spin_trylock(&efi_runtime_lock))
+		return EFI_NOT_READY;
+
+	status = efi_call_virt(query_variable_info, attr, storage_space,
+			       remaining_space, max_variable_size);
+	spin_unlock(&efi_runtime_lock);
 	return status;
 }
 
 static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
 {
-	unsigned long flags;
 	efi_status_t status;
 
-	spin_lock_irqsave(&efi_runtime_lock, flags);
+	spin_lock(&efi_runtime_lock);
 	status = efi_call_virt(get_next_high_mono_count, count);
-	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	spin_unlock(&efi_runtime_lock);
 	return status;
 }
 
@@ -246,26 +214,23 @@ static void virt_efi_reset_system(int reset_type,
 				  unsigned long data_size,
 				  efi_char16_t *data)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&efi_runtime_lock, flags);
+	spin_lock(&efi_runtime_lock);
 	__efi_call_virt(reset_system, reset_type, status, data_size, data);
-	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	spin_unlock(&efi_runtime_lock);
 }
 
 static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
 					    unsigned long count,
 					    unsigned long sg_list)
 {
-	unsigned long flags;
 	efi_status_t status;
 
 	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
 		return EFI_UNSUPPORTED;
 
-	spin_lock_irqsave(&efi_runtime_lock, flags);
+	spin_lock(&efi_runtime_lock);
 	status = efi_call_virt(update_capsule, capsules, count, sg_list);
-	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	spin_unlock(&efi_runtime_lock);
 	return status;
 }
 
@@ -274,16 +239,15 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
 						u64 *max_size,
 						int *reset_type)
 {
-	unsigned long flags;
 	efi_status_t status;
 
 	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
 		return EFI_UNSUPPORTED;
 
-	spin_lock_irqsave(&efi_runtime_lock, flags);
+	spin_lock(&efi_runtime_lock);
 	status = efi_call_virt(query_capsule_caps, capsules, count, max_size,
 			       reset_type);
-	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	spin_unlock(&efi_runtime_lock);
 	return status;
 }
 
@@ -300,6 +264,7 @@ void efi_native_runtime_setup(void)
 	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
 	efi.reset_system = virt_efi_reset_system;
 	efi.query_variable_info = virt_efi_query_variable_info;
+	efi.query_variable_info_nonblocking = virt_efi_query_variable_info_nonblocking;
 	efi.update_capsule = virt_efi_update_capsule;
 	efi.query_capsule_caps = virt_efi_query_capsule_caps;
 }
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 7f2ea21c730d..0ac594c0a234 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -300,7 +300,18 @@ check_var_size(u32 attributes, unsigned long size)
 	if (!fops->query_variable_store)
 		return EFI_UNSUPPORTED;
 
-	return fops->query_variable_store(attributes, size);
+	return fops->query_variable_store(attributes, size, false);
+}
+
+static efi_status_t
+check_var_size_nonblocking(u32 attributes, unsigned long size)
+{
+	const struct efivar_operations *fops = __efivars->ops;
+
+	if (!fops->query_variable_store)
+		return EFI_UNSUPPORTED;
+
+	return fops->query_variable_store(attributes, size, true);
 }
 
 static int efi_status_to_err(efi_status_t status)
@@ -681,7 +692,8 @@ efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor,
 	if (!spin_trylock_irqsave(&__efivars->lock, flags))
 		return -EBUSY;
 
-	status = check_var_size(attributes, size + ucs2_strsize(name, 1024));
+	status = check_var_size_nonblocking(attributes,
+					    size + ucs2_strsize(name, 1024));
 	if (status != EFI_SUCCESS) {
 		spin_unlock_irqrestore(&__efivars->lock, flags);
 		return -ENOSPC;
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 8ae7ab68cb97..f2a74d0b68ae 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -25,6 +25,14 @@ config DRM_MIPI_DSI
 	bool
 	depends on DRM
 
+config DRM_DP_AUX_CHARDEV
+	bool "DRM DP AUX Interface"
+	depends on DRM
+	help
+	  Choose this option to enable a /dev/drm_dp_auxN node that allows to
+	  read and write values to arbitrary DPCD registers on the DP aux
+	  channel.
+
 config DRM_KMS_HELPER
 	tristate
 	depends on DRM
@@ -106,6 +114,8 @@ config DRM_TDFX
 	  Choose this option if you have a 3dfx Banshee or Voodoo3 (or later),
 	  graphics card.  If M is selected, the module will be called tdfx.
 
+source "drivers/gpu/drm/arm/Kconfig"
+
 config DRM_R128
 	tristate "ATI Rage 128"
 	depends on DRM && PCI
@@ -162,6 +172,8 @@ config DRM_AMDGPU
 source "drivers/gpu/drm/amd/amdgpu/Kconfig"
 source "drivers/gpu/drm/amd/powerplay/Kconfig"
 
+source "drivers/gpu/drm/amd/acp/Kconfig"
+
 source "drivers/gpu/drm/nouveau/Kconfig"
 
 config DRM_I810
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 61766dec6a8d..6eb94fc561dc 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -22,10 +22,13 @@ drm-$(CONFIG_OF) += drm_of.o
 drm-$(CONFIG_AGP) += drm_agpsupport.o
 
 drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \
-		drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o
+		drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
+		drm_kms_helper_common.o
+
 drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
 drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
 drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
+drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
 
 obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
 
@@ -33,6 +36,7 @@ CFLAGS_drm_trace_points.o := -I$(src)
 
 obj-$(CONFIG_DRM)	+= drm.o
 obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
+obj-$(CONFIG_DRM_ARM)	+= arm/
 obj-$(CONFIG_DRM_TTM)	+= ttm/
 obj-$(CONFIG_DRM_TDFX)	+= tdfx/
 obj-$(CONFIG_DRM_R128)	+= r128/
diff --git a/drivers/gpu/drm/amd/acp/Kconfig b/drivers/gpu/drm/amd/acp/Kconfig
new file mode 100644
index 000000000000..0f734ee05274
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/Kconfig
@@ -0,0 +1,10 @@
+menu "ACP Configuration"
+
+config DRM_AMD_ACP
+       bool "Enable ACP IP support"
+       select MFD_CORE
+       select PM_GENERIC_DOMAINS if PM
+       help
+	Choose this option to enable ACP IP support for AMD SOCs.
+
+endmenu
diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile
new file mode 100644
index 000000000000..8363cb57915b
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the ACP, which is a sub-component
+# of AMDSOC/AMDGPU drm driver.
+# It provides the HW control for ACP related functionalities.
+
+subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
+
+AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm206.c b/drivers/gpu/drm/amd/acp/acp_hw.c
index 341dc560acbb..7af83f142b4b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm206.c
+++ b/drivers/gpu/drm/amd/acp/acp_hw.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Red Hat Inc.
+ * Copyright 2015 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -19,30 +19,32 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
-#include "gf100.h"
-#include "ctxgf100.h"
-
-#include <nvif/class.h>
-
-static const struct gf100_gr_func
-gm206_gr = {
-	.init = gm204_gr_init,
-	.mmio = gm204_gr_pack_mmio,
-	.ppc_nr = 2,
-	.grctx = &gm206_grctx,
-	.sclass = {
-		{ -1, -1, FERMI_TWOD_A },
-		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
-		{ -1, -1, MAXWELL_B, &gf100_fermi },
-		{ -1, -1, MAXWELL_COMPUTE_B },
-		{}
-	}
-};
-
-int
-gm206_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+
+#include "acp_gfx_if.h"
+
+#define ACP_MODE_I2S	0
+#define ACP_MODE_AZ	1
+
+#define mmACP_AZALIA_I2S_SELECT 0x51d4
+
+int amd_acp_hw_init(void *cgs_device,
+		    unsigned acp_version_major, unsigned acp_version_minor)
 {
-	return gf100_gr_new_(&gm206_gr, device, index, pgr);
+	unsigned int acp_mode = ACP_MODE_I2S;
+
+	if ((acp_version_major == 2) && (acp_version_minor == 2))
+		acp_mode = cgs_read_register(cgs_device,
+					mmACP_AZALIA_I2S_SELECT);
+
+	if (acp_mode != ACP_MODE_I2S)
+		return -ENODEV;
+
+	return 0;
 }
diff --git a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
new file mode 100644
index 000000000000..bccf47b63899
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+*/
+
+#ifndef _ACP_GFX_IF_H
+#define _ACP_GFX_IF_H
+
+#include <linux/types.h>
+#include "cgs_linux.h"
+#include "cgs_common.h"
+
+int amd_acp_hw_init(void *cgs_device,
+		    unsigned acp_version_major, unsigned acp_version_minor);
+
+#endif /* _ACP_GFX_IF_H */
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 20c9539abc36..c7fcdcedaadb 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -8,7 +8,8 @@ ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \
 	-I$(FULL_AMD_PATH)/include \
 	-I$(FULL_AMD_PATH)/amdgpu \
 	-I$(FULL_AMD_PATH)/scheduler \
-	-I$(FULL_AMD_PATH)/powerplay/inc
+	-I$(FULL_AMD_PATH)/powerplay/inc \
+	-I$(FULL_AMD_PATH)/acp/include
 
 amdgpu-y := amdgpu_drv.o
 
@@ -20,7 +21,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
 	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
 	amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
-	atombios_encoders.o amdgpu_semaphore.o amdgpu_sa.o atombios_i2c.o \
+	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o
 
@@ -92,7 +93,17 @@ amdgpu-y += amdgpu_cgs.o
 amdgpu-y += \
 	../scheduler/gpu_scheduler.o \
 	../scheduler/sched_fence.o \
-	amdgpu_sched.o
+	amdgpu_job.o
+
+# ACP componet
+ifneq ($(CONFIG_DRM_AMD_ACP),)
+amdgpu-y += amdgpu_acp.o
+
+AMDACPPATH := ../acp
+include $(FULL_AMD_PATH)/acp/Makefile
+
+amdgpu-y += $(AMD_ACP_FILES)
+endif
 
 amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
 amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5e7770f9a415..c4a21c6428f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -53,6 +53,7 @@
 #include "amdgpu_ucode.h"
 #include "amdgpu_gds.h"
 #include "amd_powerplay.h"
+#include "amdgpu_acp.h"
 
 #include "gpu_scheduler.h"
 
@@ -74,7 +75,6 @@ extern int amdgpu_dpm;
 extern int amdgpu_smc_load_fw;
 extern int amdgpu_aspm;
 extern int amdgpu_runtime_pm;
-extern int amdgpu_hard_reset;
 extern unsigned amdgpu_ip_block_mask;
 extern int amdgpu_bapm;
 extern int amdgpu_deep_color;
@@ -82,10 +82,8 @@ extern int amdgpu_vm_size;
 extern int amdgpu_vm_block_size;
 extern int amdgpu_vm_fault_stop;
 extern int amdgpu_vm_debug;
-extern int amdgpu_enable_scheduler;
 extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_hw_submission;
-extern int amdgpu_enable_semaphores;
 extern int amdgpu_powerplay;
 extern unsigned amdgpu_pcie_gen_cap;
 extern unsigned amdgpu_pcie_lane_cap;
@@ -108,9 +106,6 @@ extern unsigned amdgpu_pcie_lane_cap;
 /* max number of IP instances */
 #define AMDGPU_MAX_SDMA_INSTANCES		2
 
-/* number of hw syncs before falling back on blocking */
-#define AMDGPU_NUM_SYNCS			4
-
 /* hardcode that limit for now */
 #define AMDGPU_VA_RESERVED_SIZE			(8 << 20)
 
@@ -146,11 +141,9 @@ extern unsigned amdgpu_pcie_lane_cap;
 #define CIK_CURSOR_HEIGHT 128
 
 struct amdgpu_device;
-struct amdgpu_fence;
 struct amdgpu_ib;
 struct amdgpu_vm;
 struct amdgpu_ring;
-struct amdgpu_semaphore;
 struct amdgpu_cs_parser;
 struct amdgpu_job;
 struct amdgpu_irq_src;
@@ -248,7 +241,7 @@ struct amdgpu_vm_pte_funcs {
 			 unsigned count);
 	/* write pte one entry at a time with addr mapping */
 	void (*write_pte)(struct amdgpu_ib *ib,
-			  uint64_t pe,
+			  const dma_addr_t *pages_addr, uint64_t pe,
 			  uint64_t addr, unsigned count,
 			  uint32_t incr, uint32_t flags);
 	/* for linear pte/pde updates without addr mapping */
@@ -256,8 +249,6 @@ struct amdgpu_vm_pte_funcs {
 			    uint64_t pe,
 			    uint64_t addr, unsigned count,
 			    uint32_t incr, uint32_t flags);
-	/* pad the indirect buffer to the necessary number of dw */
-	void (*pad_ib)(struct amdgpu_ib *ib);
 };
 
 /* provided by the gmc block */
@@ -295,12 +286,11 @@ struct amdgpu_ring_funcs {
 			struct amdgpu_ib *ib);
 	void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
 			   uint64_t seq, unsigned flags);
-	bool (*emit_semaphore)(struct amdgpu_ring *ring,
-			       struct amdgpu_semaphore *semaphore,
-			       bool emit_wait);
+	void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
 	void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id,
 			      uint64_t pd_addr);
 	void (*emit_hdp_flush)(struct amdgpu_ring *ring);
+	void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
 	void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
 				uint32_t gds_base, uint32_t gds_size,
 				uint32_t gws_base, uint32_t gws_size,
@@ -310,6 +300,8 @@ struct amdgpu_ring_funcs {
 	int (*test_ib)(struct amdgpu_ring *ring);
 	/* insert NOP packets */
 	void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
+	/* pad the indirect buffer to the necessary number of dw */
+	void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 };
 
 /*
@@ -355,13 +347,15 @@ struct amdgpu_fence_driver {
 	uint64_t			gpu_addr;
 	volatile uint32_t		*cpu_addr;
 	/* sync_seq is protected by ring emission lock */
-	uint64_t			sync_seq[AMDGPU_MAX_RINGS];
-	atomic64_t			last_seq;
+	uint32_t			sync_seq;
+	atomic_t			last_seq;
 	bool				initialized;
 	struct amdgpu_irq_src		*irq_src;
 	unsigned			irq_type;
 	struct timer_list		fallback_timer;
-	wait_queue_head_t		fence_queue;
+	unsigned			num_fences_mask;
+	spinlock_t			lock;
+	struct fence			**fences;
 };
 
 /* some special values for the owner field */
@@ -371,19 +365,6 @@ struct amdgpu_fence_driver {
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
 
-struct amdgpu_fence {
-	struct fence base;
-
-	/* RB, DMA, etc. */
-	struct amdgpu_ring		*ring;
-	uint64_t			seq;
-
-	/* filp or special value for fence creator */
-	void				*owner;
-
-	wait_queue_t			fence_wake;
-};
-
 struct amdgpu_user_fence {
 	/* write-back bo */
 	struct amdgpu_bo 	*bo;
@@ -395,24 +376,18 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev);
 void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
 void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
 
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+				  unsigned num_hw_submission);
 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 				   struct amdgpu_irq_src *irq_src,
 				   unsigned irq_type);
 void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
 void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
-		      struct amdgpu_fence **fence);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence);
 void amdgpu_fence_process(struct amdgpu_ring *ring);
-int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
 
-bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
-			    struct amdgpu_ring *ring);
-void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
-			    struct amdgpu_ring *ring);
-
 /*
  * TTM.
  */
@@ -431,6 +406,8 @@ struct amdgpu_mman {
 	/* buffer handling */
 	const struct amdgpu_buffer_funcs	*buffer_funcs;
 	struct amdgpu_ring			*buffer_funcs_ring;
+	/* Scheduler entity for buffer moves */
+	struct amd_sched_entity			entity;
 };
 
 int amdgpu_copy_buffer(struct amdgpu_ring *ring,
@@ -445,9 +422,9 @@ struct amdgpu_bo_list_entry {
 	struct amdgpu_bo		*robj;
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
-	unsigned			prefered_domains;
-	unsigned			allowed_domains;
 	uint32_t			priority;
+	struct page			**user_pages;
+	int				user_invalidated;
 };
 
 struct amdgpu_bo_va_mapping {
@@ -459,7 +436,6 @@ struct amdgpu_bo_va_mapping {
 
 /* bo virtual addresses in a specific vm */
 struct amdgpu_bo_va {
-	struct mutex		        mutex;
 	/* protected by bo being reserved */
 	struct list_head		bo_list;
 	struct fence		        *last_pt_update;
@@ -483,7 +459,8 @@ struct amdgpu_bo {
 	/* Protected by gem.mutex */
 	struct list_head		list;
 	/* Protected by tbo.reserved */
-	u32				initial_domain;
+	u32				prefered_domains;
+	u32				allowed_domains;
 	struct ttm_place		placements[AMDGPU_GEM_DOMAIN_MAX + 1];
 	struct ttm_placement		placement;
 	struct ttm_buffer_object	tbo;
@@ -505,7 +482,6 @@ struct amdgpu_bo {
 	struct amdgpu_bo		*parent;
 
 	struct ttm_bo_kmap_obj		dma_buf_vmap;
-	pid_t				pid;
 	struct amdgpu_mn		*mn;
 	struct list_head		mn_list;
 };
@@ -554,11 +530,14 @@ int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
  * Assumption is that there won't be hole (all object on same
  * alignment).
  */
+
+#define AMDGPU_SA_NUM_FENCE_LISTS	32
+
 struct amdgpu_sa_manager {
 	wait_queue_head_t	wq;
 	struct amdgpu_bo	*bo;
 	struct list_head	*hole;
-	struct list_head	flist[AMDGPU_MAX_RINGS];
+	struct list_head	flist[AMDGPU_SA_NUM_FENCE_LISTS];
 	struct list_head	olist;
 	unsigned		size;
 	uint64_t		gpu_addr;
@@ -580,13 +559,7 @@ struct amdgpu_sa_bo {
 /*
  * GEM objects.
  */
-struct amdgpu_gem {
-	struct mutex		mutex;
-	struct list_head	objects;
-};
-
-int amdgpu_gem_init(struct amdgpu_device *adev);
-void amdgpu_gem_fini(struct amdgpu_device *adev);
+void amdgpu_gem_force_release(struct amdgpu_device *adev);
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 				int alignment, u32 initial_domain,
 				u64 flags, bool kernel,
@@ -598,32 +571,10 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 int amdgpu_mode_dumb_mmap(struct drm_file *filp,
 			  struct drm_device *dev,
 			  uint32_t handle, uint64_t *offset_p);
-
-/*
- * Semaphores.
- */
-struct amdgpu_semaphore {
-	struct amdgpu_sa_bo	*sa_bo;
-	signed			waiters;
-	uint64_t		gpu_addr;
-};
-
-int amdgpu_semaphore_create(struct amdgpu_device *adev,
-			    struct amdgpu_semaphore **semaphore);
-bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring,
-				  struct amdgpu_semaphore *semaphore);
-bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
-				struct amdgpu_semaphore *semaphore);
-void amdgpu_semaphore_free(struct amdgpu_device *adev,
-			   struct amdgpu_semaphore **semaphore,
-			   struct fence *fence);
-
 /*
  * Synchronization
  */
 struct amdgpu_sync {
-	struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
-	struct fence		*sync_to[AMDGPU_MAX_RINGS];
 	DECLARE_HASHTABLE(fences, 4);
 	struct fence	        *last_vm_update;
 };
@@ -635,12 +586,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
 		     void *owner);
-int amdgpu_sync_rings(struct amdgpu_sync *sync,
-		      struct amdgpu_ring *ring);
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
 int amdgpu_sync_wait(struct amdgpu_sync *sync);
-void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync,
-		      struct fence *fence);
+void amdgpu_sync_free(struct amdgpu_sync *sync);
+int amdgpu_sync_init(void);
+void amdgpu_sync_fini(void);
 
 /*
  * GART structures, functions & helpers
@@ -758,6 +708,7 @@ struct amdgpu_flip_work {
 	struct fence			*excl;
 	unsigned			shared_count;
 	struct fence			**shared;
+	struct fence_cb			cb;
 };
 
 
@@ -770,12 +721,11 @@ struct amdgpu_ib {
 	uint32_t			length_dw;
 	uint64_t			gpu_addr;
 	uint32_t			*ptr;
-	struct amdgpu_ring		*ring;
-	struct amdgpu_fence		*fence;
 	struct amdgpu_user_fence        *user;
 	struct amdgpu_vm		*vm;
+	unsigned			vm_id;
+	uint64_t			vm_pd_addr;
 	struct amdgpu_ctx		*ctx;
-	struct amdgpu_sync		sync;
 	uint32_t			gds_base, gds_size;
 	uint32_t			gws_base, gws_size;
 	uint32_t			oa_base, oa_size;
@@ -794,13 +744,14 @@ enum amdgpu_ring_type {
 
 extern struct amd_sched_backend_ops amdgpu_sched_ops;
 
-int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
-					 struct amdgpu_ring *ring,
-					 struct amdgpu_ib *ibs,
-					 unsigned num_ibs,
-					 int (*free_job)(struct amdgpu_job *),
-					 void *owner,
-					 struct fence **fence);
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+		     struct amdgpu_job **job);
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
+			     struct amdgpu_job **job);
+void amdgpu_job_free(struct amdgpu_job *job);
+int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
+		      struct amd_sched_entity *entity, void *owner,
+		      struct fence **f);
 
 struct amdgpu_ring {
 	struct amdgpu_device		*adev;
@@ -809,7 +760,6 @@ struct amdgpu_ring {
 	struct amd_gpu_scheduler 	sched;
 
 	spinlock_t              fence_lock;
-	struct mutex		*ring_lock;
 	struct amdgpu_bo	*ring_obj;
 	volatile uint32_t	*ring;
 	unsigned		rptr_offs;
@@ -818,7 +768,7 @@ struct amdgpu_ring {
 	unsigned		wptr;
 	unsigned		wptr_old;
 	unsigned		ring_size;
-	unsigned		ring_free_dw;
+	unsigned		max_dw;
 	int			count_dw;
 	uint64_t		gpu_addr;
 	uint32_t		align_mask;
@@ -826,8 +776,6 @@ struct amdgpu_ring {
 	bool			ready;
 	u32			nop;
 	u32			idx;
-	u64			last_semaphore_signal_addr;
-	u64			last_semaphore_wait_addr;
 	u32			me;
 	u32			pipe;
 	u32			queue;
@@ -840,7 +788,6 @@ struct amdgpu_ring {
 	struct amdgpu_ctx	*current_ctx;
 	enum amdgpu_ring_type	type;
 	char			name[16];
-	bool                    is_pte_ring;
 };
 
 /*
@@ -884,13 +831,14 @@ struct amdgpu_vm_pt {
 };
 
 struct amdgpu_vm_id {
-	unsigned		id;
-	uint64_t		pd_gpu_addr;
+	struct amdgpu_vm_manager_id	*mgr_id;
+	uint64_t			pd_gpu_addr;
 	/* last flushed PD/PT update */
-	struct fence	        *flushed_updates;
+	struct fence			*flushed_updates;
 };
 
 struct amdgpu_vm {
+	/* tree of virtual addresses mapped */
 	struct rb_root		va;
 
 	/* protecting invalidated */
@@ -915,30 +863,47 @@ struct amdgpu_vm {
 
 	/* for id and flush management per ring */
 	struct amdgpu_vm_id	ids[AMDGPU_MAX_RINGS];
-	/* for interval tree */
-	spinlock_t		it_lock;
+
 	/* protecting freed */
 	spinlock_t		freed_lock;
+
+	/* Scheduler entity for page table updates */
+	struct amd_sched_entity	entity;
+};
+
+struct amdgpu_vm_manager_id {
+	struct list_head	list;
+	struct fence		*active;
+	atomic_long_t		owner;
+
+	uint32_t		gds_base;
+	uint32_t		gds_size;
+	uint32_t		gws_base;
+	uint32_t		gws_size;
+	uint32_t		oa_base;
+	uint32_t		oa_size;
 };
 
 struct amdgpu_vm_manager {
-	struct {
-		struct fence	*active;
-		atomic_long_t	owner;
-	} ids[AMDGPU_NUM_VM];
+	/* Handling of VMIDs */
+	struct mutex				lock;
+	unsigned				num_ids;
+	struct list_head			ids_lru;
+	struct amdgpu_vm_manager_id		ids[AMDGPU_NUM_VM];
 
 	uint32_t				max_pfn;
-	/* number of VMIDs */
-	unsigned				nvm;
 	/* vram base address for page table entry  */
 	u64					vram_base_offset;
 	/* is vm enabled? */
 	bool					enabled;
 	/* vm pte handling */
 	const struct amdgpu_vm_pte_funcs        *vm_pte_funcs;
-	struct amdgpu_ring                      *vm_pte_funcs_ring;
+	struct amdgpu_ring                      *vm_pte_rings[AMDGPU_MAX_RINGS];
+	unsigned				vm_pte_num_rings;
+	atomic_t				vm_pte_next_ring;
 };
 
+void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
@@ -949,14 +914,15 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates);
 void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm);
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		      struct amdgpu_sync *sync);
+		      struct amdgpu_sync *sync, struct fence *fence,
+		      unsigned *vm_id, uint64_t *vm_pd_addr);
 void amdgpu_vm_flush(struct amdgpu_ring *ring,
-		     struct amdgpu_vm *vm,
-		     struct fence *updates);
-void amdgpu_vm_fence(struct amdgpu_device *adev,
-		     struct amdgpu_vm *vm,
-		     struct fence *fence);
-uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
+		     unsigned vm_id, uint64_t pd_addr,
+		     uint32_t gds_base, uint32_t gds_size,
+		     uint32_t gws_base, uint32_t gws_size,
+		     uint32_t oa_base, uint32_t oa_size);
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
+uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 				    struct amdgpu_vm *vm);
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
@@ -982,7 +948,6 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 		       uint64_t addr);
 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 		      struct amdgpu_bo_va *bo_va);
-int amdgpu_vm_free_job(struct amdgpu_job *job);
 
 /*
  * context related structures
@@ -1010,10 +975,6 @@ struct amdgpu_ctx_mgr {
 	struct idr		ctx_handles;
 };
 
-int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
-		    struct amdgpu_ctx *ctx);
-void amdgpu_ctx_fini(struct amdgpu_ctx *ctx);
-
 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
 int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
 
@@ -1048,13 +1009,15 @@ struct amdgpu_bo_list {
 	struct amdgpu_bo *gds_obj;
 	struct amdgpu_bo *gws_obj;
 	struct amdgpu_bo *oa_obj;
-	bool has_userptr;
+	unsigned first_userptr;
 	unsigned num_entries;
 	struct amdgpu_bo_list_entry *array;
 };
 
 struct amdgpu_bo_list *
 amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id);
+void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
+			     struct list_head *validated);
 void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
 void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
 
@@ -1128,6 +1091,7 @@ struct amdgpu_gca_config {
 	unsigned multi_gpu_tile_size;
 	unsigned mc_arb_ramcfg;
 	unsigned gb_addr_config;
+	unsigned num_rbs;
 
 	uint32_t tile_mode_array[32];
 	uint32_t macrotile_mode_array[16];
@@ -1170,23 +1134,20 @@ struct amdgpu_gfx {
 	unsigned ce_ram_size;
 };
 
-int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
+int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib);
-int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
-		       struct amdgpu_ib *ib, void *owner);
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f);
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+		       struct amdgpu_ib *ib, struct fence *last_vm_update,
+		       struct fence **f);
 int amdgpu_ib_pool_init(struct amdgpu_device *adev);
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
 int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
-/* Ring access between begin & end cannot sleep */
-void amdgpu_ring_free_size(struct amdgpu_ring *ring);
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
-int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw);
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
-void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
-void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring);
 unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
 			    uint32_t **data);
 int amdgpu_ring_restore(struct amdgpu_ring *ring,
@@ -1196,7 +1157,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     struct amdgpu_irq_src *irq_src, unsigned irq_type,
 		     enum amdgpu_ring_type ring_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
-struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f);
 
 /*
  * CS.
@@ -1205,47 +1165,58 @@ struct amdgpu_cs_chunk {
 	uint32_t		chunk_id;
 	uint32_t		length_dw;
 	uint32_t		*kdata;
-	void __user		*user_ptr;
 };
 
 struct amdgpu_cs_parser {
 	struct amdgpu_device	*adev;
 	struct drm_file		*filp;
 	struct amdgpu_ctx	*ctx;
-	struct amdgpu_bo_list *bo_list;
+
 	/* chunks */
 	unsigned		nchunks;
 	struct amdgpu_cs_chunk	*chunks;
-	/* relocations */
-	struct amdgpu_bo_list_entry	vm_pd;
-	struct list_head	validated;
-	struct fence		*fence;
 
-	struct amdgpu_ib	*ibs;
-	uint32_t		num_ibs;
+	/* scheduler job object */
+	struct amdgpu_job	*job;
 
-	struct ww_acquire_ctx	ticket;
+	/* buffer objects */
+	struct ww_acquire_ctx		ticket;
+	struct amdgpu_bo_list		*bo_list;
+	struct amdgpu_bo_list_entry	vm_pd;
+	struct list_head		validated;
+	struct fence			*fence;
+	uint64_t			bytes_moved_threshold;
+	uint64_t			bytes_moved;
 
 	/* user fence */
-	struct amdgpu_user_fence	uf;
 	struct amdgpu_bo_list_entry	uf_entry;
 };
 
 struct amdgpu_job {
 	struct amd_sched_job    base;
 	struct amdgpu_device	*adev;
+	struct amdgpu_ring	*ring;
+	struct amdgpu_sync	sync;
 	struct amdgpu_ib	*ibs;
+	struct fence		*fence; /* the hw fence */
 	uint32_t		num_ibs;
 	void			*owner;
 	struct amdgpu_user_fence uf;
-	int (*free_job)(struct amdgpu_job *job);
 };
 #define to_amdgpu_job(sched_job)		\
 		container_of((sched_job), struct amdgpu_job, base)
 
-static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx)
+static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
+				      uint32_t ib_idx, int idx)
+{
+	return p->job->ibs[ib_idx].ptr[idx];
+}
+
+static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
+				       uint32_t ib_idx, int idx,
+				       uint32_t value)
 {
-	return p->ibs[ib_idx].ptr[idx];
+	p->job->ibs[ib_idx].ptr[idx] = value;
 }
 
 /*
@@ -1497,6 +1468,7 @@ enum amdgpu_dpm_forced_level {
 	AMDGPU_DPM_FORCED_LEVEL_AUTO = 0,
 	AMDGPU_DPM_FORCED_LEVEL_LOW = 1,
 	AMDGPU_DPM_FORCED_LEVEL_HIGH = 2,
+	AMDGPU_DPM_FORCED_LEVEL_MANUAL = 3,
 };
 
 struct amdgpu_vce_state {
@@ -1626,6 +1598,7 @@ struct amdgpu_uvd {
 	struct amdgpu_ring	ring;
 	struct amdgpu_irq_src	irq;
 	bool			address_64_bit;
+	struct amd_sched_entity entity;
 };
 
 /*
@@ -1650,6 +1623,7 @@ struct amdgpu_vce {
 	struct amdgpu_ring	ring[AMDGPU_MAX_VCE_RINGS];
 	struct amdgpu_irq_src	irq;
 	unsigned		harvest_config;
+	struct amd_sched_entity	entity;
 };
 
 /*
@@ -1884,6 +1858,18 @@ void amdgpu_cgs_destroy_device(void *cgs_device);
 
 
 /*
+ * CGS
+ */
+void *amdgpu_cgs_create_device(struct amdgpu_device *adev);
+void amdgpu_cgs_destroy_device(void *cgs_device);
+
+
+/* GPU virtualization */
+struct amdgpu_virtualization {
+	bool supports_sr_iov;
+};
+
+/*
  * Core structure, functions and helpers.
  */
 typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t);
@@ -1903,6 +1889,10 @@ struct amdgpu_device {
 	struct drm_device		*ddev;
 	struct pci_dev			*pdev;
 
+#ifdef CONFIG_DRM_AMD_ACP
+	struct amdgpu_acp		acp;
+#endif
+
 	/* ASIC */
 	enum amd_asic_type		asic_type;
 	uint32_t			family;
@@ -1979,7 +1969,6 @@ struct amdgpu_device {
 
 	/* memory management */
 	struct amdgpu_mman		mman;
-	struct amdgpu_gem		gem;
 	struct amdgpu_vram_scratch	vram_scratch;
 	struct amdgpu_wb		wb;
 	atomic64_t			vram_usage;
@@ -1997,7 +1986,6 @@ struct amdgpu_device {
 
 	/* rings */
 	unsigned			fence_context;
-	struct mutex			ring_lock;
 	unsigned			num_rings;
 	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
 	bool				ib_pool_ready;
@@ -2009,6 +1997,7 @@ struct amdgpu_device {
 	/* powerplay */
 	struct amd_powerplay		powerplay;
 	bool				pp_enabled;
+	bool				pp_force_state_enabled;
 
 	/* dpm */
 	struct amdgpu_pm		pm;
@@ -2025,7 +2014,6 @@ struct amdgpu_device {
 	struct amdgpu_sdma		sdma;
 
 	/* uvd */
-	bool				has_uvd;
 	struct amdgpu_uvd		uvd;
 
 	/* vce */
@@ -2050,8 +2038,7 @@ struct amdgpu_device {
 	/* amdkfd interface */
 	struct kfd_dev          *kfd;
 
-	/* kernel conext for IB submission */
-	struct amdgpu_ctx	kernel_ctx;
+	struct amdgpu_virtualization virtualization;
 };
 
 bool amdgpu_device_is_px(struct drm_device *dev);
@@ -2073,20 +2060,6 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
 
 /*
- * Cast helper
- */
-extern const struct fence_ops amdgpu_fence_ops;
-static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
-{
-	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
-
-	if (__f->base.ops == &amdgpu_fence_ops)
-		return __f;
-
-	return NULL;
-}
-
-/*
  * Registers read & write functions.
  */
 #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false)
@@ -2156,7 +2129,6 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
 	ring->ring[ring->wptr++] = v;
 	ring->wptr &= ring->ptr_mask;
 	ring->count_dw--;
-	ring->ring_free_dw--;
 }
 
 static inline struct amdgpu_sdma_instance *
@@ -2192,9 +2164,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
-#define amdgpu_vm_write_pte(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (addr), (count), (incr), (flags)))
+#define amdgpu_vm_write_pte(adev, ib, pa, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pa), (pe), (addr), (count), (incr), (flags)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
-#define amdgpu_vm_pad_ib(adev, ib) ((adev)->vm_manager.vm_pte_funcs->pad_ib((ib)))
 #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
 #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
 #define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r))
@@ -2202,11 +2173,13 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
 #define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib))
+#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
 #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
 #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
-#define amdgpu_ring_emit_semaphore(r, semaphore, emit_wait) (r)->funcs->emit_semaphore((r), (semaphore), (emit_wait))
 #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
+#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
+#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
 #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
 #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
@@ -2298,6 +2271,21 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_get_performance_level(adev) \
 	(adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle)
 
+#define amdgpu_dpm_get_pp_num_states(adev, data) \
+	(adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data)
+
+#define amdgpu_dpm_get_pp_table(adev, table) \
+	(adev)->powerplay.pp_funcs->get_pp_table((adev)->powerplay.pp_handle, table)
+
+#define amdgpu_dpm_set_pp_table(adev, buf, size) \
+	(adev)->powerplay.pp_funcs->set_pp_table((adev)->powerplay.pp_handle, buf, size)
+
+#define amdgpu_dpm_print_clock_levels(adev, type, buf) \
+	(adev)->powerplay.pp_funcs->print_clock_levels((adev)->powerplay.pp_handle, type, buf)
+
+#define amdgpu_dpm_force_clock_level(adev, type, level) \
+		(adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level)
+
 #define amdgpu_dpm_dispatch_task(adev, event_id, input, output)		\
 	(adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output))
 
@@ -2308,7 +2296,6 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev);
 void amdgpu_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_card_posted(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
-bool amdgpu_boot_test_post_card(struct amdgpu_device *adev);
 
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
@@ -2316,11 +2303,15 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
 		       struct amdgpu_ring **out_ring);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
+int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
 bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
+struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end);
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+				       int *last_invalidated);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 				 struct ttm_mem_reg *mem);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
new file mode 100644
index 000000000000..d6b0bff510aa
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -0,0 +1,500 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <linux/irqdomain.h>
+#include <linux/pm_domain.h>
+#include <linux/platform_device.h>
+#include <sound/designware_i2s.h>
+#include <sound/pcm.h>
+
+#include "amdgpu.h"
+#include "atom.h"
+#include "amdgpu_acp.h"
+
+#include "acp_gfx_if.h"
+
+#define ACP_TILE_ON_MASK                0x03
+#define ACP_TILE_OFF_MASK               0x02
+#define ACP_TILE_ON_RETAIN_REG_MASK     0x1f
+#define ACP_TILE_OFF_RETAIN_REG_MASK    0x20
+
+#define ACP_TILE_P1_MASK                0x3e
+#define ACP_TILE_P2_MASK                0x3d
+#define ACP_TILE_DSP0_MASK              0x3b
+#define ACP_TILE_DSP1_MASK              0x37
+
+#define ACP_TILE_DSP2_MASK              0x2f
+
+#define ACP_DMA_REGS_END		0x146c0
+#define ACP_I2S_PLAY_REGS_START		0x14840
+#define ACP_I2S_PLAY_REGS_END		0x148b4
+#define ACP_I2S_CAP_REGS_START		0x148b8
+#define ACP_I2S_CAP_REGS_END		0x1496c
+
+#define ACP_I2S_COMP1_CAP_REG_OFFSET	0xac
+#define ACP_I2S_COMP2_CAP_REG_OFFSET	0xa8
+#define ACP_I2S_COMP1_PLAY_REG_OFFSET	0x6c
+#define ACP_I2S_COMP2_PLAY_REG_OFFSET	0x68
+
+#define mmACP_PGFSM_RETAIN_REG		0x51c9
+#define mmACP_PGFSM_CONFIG_REG		0x51ca
+#define mmACP_PGFSM_READ_REG_0		0x51cc
+
+#define mmACP_MEM_SHUT_DOWN_REQ_LO	0x51f8
+#define mmACP_MEM_SHUT_DOWN_REQ_HI	0x51f9
+#define mmACP_MEM_SHUT_DOWN_STS_LO	0x51fa
+#define mmACP_MEM_SHUT_DOWN_STS_HI	0x51fb
+
+#define ACP_TIMEOUT_LOOP		0x000000FF
+#define ACP_DEVS			3
+#define ACP_SRC_ID			162
+
+enum {
+	ACP_TILE_P1 = 0,
+	ACP_TILE_P2,
+	ACP_TILE_DSP0,
+	ACP_TILE_DSP1,
+	ACP_TILE_DSP2,
+};
+
+static int acp_sw_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	adev->acp.parent = adev->dev;
+
+	adev->acp.cgs_device =
+		amdgpu_cgs_create_device(adev);
+	if (!adev->acp.cgs_device)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int acp_sw_fini(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (adev->acp.cgs_device)
+		amdgpu_cgs_destroy_device(adev->acp.cgs_device);
+
+	return 0;
+}
+
+/* power off a tile/block within ACP */
+static int acp_suspend_tile(void *cgs_dev, int tile)
+{
+	u32 val = 0;
+	u32 count = 0;
+
+	if ((tile  < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
+		pr_err("Invalid ACP tile : %d to suspend\n", tile);
+		return -1;
+	}
+
+	val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
+	val &= ACP_TILE_ON_MASK;
+
+	if (val == 0x0) {
+		val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
+		val = val | (1 << tile);
+		cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
+		cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
+					0x500 + tile);
+
+		count = ACP_TIMEOUT_LOOP;
+		while (true) {
+			val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+								+ tile);
+			val = val & ACP_TILE_ON_MASK;
+			if (val == ACP_TILE_OFF_MASK)
+				break;
+			if (--count == 0) {
+				pr_err("Timeout reading ACP PGFSM status\n");
+				return -ETIMEDOUT;
+			}
+			udelay(100);
+		}
+
+		val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
+
+		val |= ACP_TILE_OFF_RETAIN_REG_MASK;
+		cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
+	}
+	return 0;
+}
+
+/* power on a tile/block within ACP */
+static int acp_resume_tile(void *cgs_dev, int tile)
+{
+	u32 val = 0;
+	u32 count = 0;
+
+	if ((tile  < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
+		pr_err("Invalid ACP tile to resume\n");
+		return -1;
+	}
+
+	val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
+	val = val & ACP_TILE_ON_MASK;
+
+	if (val != 0x0) {
+		cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
+					0x600 + tile);
+		count = ACP_TIMEOUT_LOOP;
+		while (true) {
+			val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+							+ tile);
+			val = val & ACP_TILE_ON_MASK;
+			if (val == 0x0)
+				break;
+			if (--count == 0) {
+				pr_err("Timeout reading ACP PGFSM status\n");
+				return -ETIMEDOUT;
+			}
+			udelay(100);
+		}
+		val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
+		if (tile == ACP_TILE_P1)
+			val = val & (ACP_TILE_P1_MASK);
+		else if (tile == ACP_TILE_P2)
+			val = val & (ACP_TILE_P2_MASK);
+
+		cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
+	}
+	return 0;
+}
+
+struct acp_pm_domain {
+	void *cgs_dev;
+	struct generic_pm_domain gpd;
+};
+
+static int acp_poweroff(struct generic_pm_domain *genpd)
+{
+	int i, ret;
+	struct acp_pm_domain *apd;
+
+	apd = container_of(genpd, struct acp_pm_domain, gpd);
+	if (apd != NULL) {
+		/* Donot return abruptly if any of power tile fails to suspend.
+		 * Log it and continue powering off other tile
+		 */
+		for (i = 4; i >= 0 ; i--) {
+			ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
+			if (ret)
+				pr_err("ACP tile %d tile suspend failed\n", i);
+		}
+	}
+	return 0;
+}
+
+static int acp_poweron(struct generic_pm_domain *genpd)
+{
+	int i, ret;
+	struct acp_pm_domain *apd;
+
+	apd = container_of(genpd, struct acp_pm_domain, gpd);
+	if (apd != NULL) {
+		for (i = 0; i < 2; i++) {
+			ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i);
+			if (ret) {
+				pr_err("ACP tile %d resume failed\n", i);
+				break;
+			}
+		}
+
+		/* Disable DSPs which are not going to be used */
+		for (i = 0; i < 3; i++) {
+			ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i);
+			/* Continue suspending other DSP, even if one fails */
+			if (ret)
+				pr_err("ACP DSP %d suspend failed\n", i);
+		}
+	}
+	return 0;
+}
+
+static struct device *get_mfd_cell_dev(const char *device_name, int r)
+{
+	char auto_dev_name[25];
+	struct device *dev;
+
+	snprintf(auto_dev_name, sizeof(auto_dev_name),
+		 "%s.%d.auto", device_name, r);
+	dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name);
+	dev_info(dev, "device %s added to pm domain\n", auto_dev_name);
+
+	return dev;
+}
+
+/**
+ * acp_hw_init - start and test ACP block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+static int acp_hw_init(void *handle)
+{
+	int r, i;
+	uint64_t acp_base;
+	struct device *dev;
+	struct i2s_platform_data *i2s_pdata;
+
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	const struct amdgpu_ip_block_version *ip_version =
+		amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);
+
+	if (!ip_version)
+		return -EINVAL;
+
+	r = amd_acp_hw_init(adev->acp.cgs_device,
+			    ip_version->major, ip_version->minor);
+	/* -ENODEV means board uses AZ rather than ACP */
+	if (r == -ENODEV)
+		return 0;
+	else if (r)
+		return r;
+
+	r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO,
+			0x5289, 0, &acp_base);
+	if (r == -ENODEV)
+		return 0;
+	else if (r)
+		return r;
+
+	adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
+	if (adev->acp.acp_genpd == NULL)
+		return -ENOMEM;
+
+	adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
+	adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
+	adev->acp.acp_genpd->gpd.power_on = acp_poweron;
+
+
+	adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device;
+
+	pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
+
+	adev->acp.acp_cell = kzalloc(sizeof(struct mfd_cell) * ACP_DEVS,
+							GFP_KERNEL);
+
+	if (adev->acp.acp_cell == NULL)
+		return -ENOMEM;
+
+	adev->acp.acp_res = kzalloc(sizeof(struct resource) * 4, GFP_KERNEL);
+
+	if (adev->acp.acp_res == NULL) {
+		kfree(adev->acp.acp_cell);
+		return -ENOMEM;
+	}
+
+	i2s_pdata = kzalloc(sizeof(struct i2s_platform_data) * 2, GFP_KERNEL);
+	if (i2s_pdata == NULL) {
+		kfree(adev->acp.acp_res);
+		kfree(adev->acp.acp_cell);
+		return -ENOMEM;
+	}
+
+	i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+	i2s_pdata[0].cap = DWC_I2S_PLAY;
+	i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+	i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
+	i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
+
+	i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+				DW_I2S_QUIRK_COMP_PARAM1;
+	i2s_pdata[1].cap = DWC_I2S_RECORD;
+	i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
+	i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+	i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+	adev->acp.acp_res[0].name = "acp2x_dma";
+	adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+	adev->acp.acp_res[0].start = acp_base;
+	adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+	adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
+	adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+	adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
+	adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
+
+	adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
+	adev->acp.acp_res[2].flags = IORESOURCE_MEM;
+	adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
+	adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+	adev->acp.acp_res[3].name = "acp2x_dma_irq";
+	adev->acp.acp_res[3].flags = IORESOURCE_IRQ;
+	adev->acp.acp_res[3].start = amdgpu_irq_create_mapping(adev, 162);
+	adev->acp.acp_res[3].end = adev->acp.acp_res[3].start;
+
+	adev->acp.acp_cell[0].name = "acp_audio_dma";
+	adev->acp.acp_cell[0].num_resources = 4;
+	adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+
+	adev->acp.acp_cell[1].name = "designware-i2s";
+	adev->acp.acp_cell[1].num_resources = 1;
+	adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+	adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+	adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+
+	adev->acp.acp_cell[2].name = "designware-i2s";
+	adev->acp.acp_cell[2].num_resources = 1;
+	adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
+	adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
+	adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
+
+	r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell,
+								ACP_DEVS);
+	if (r)
+		return r;
+
+	for (i = 0; i < ACP_DEVS ; i++) {
+		dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
+		r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
+		if (r) {
+			dev_err(dev, "Failed to add dev to genpd\n");
+			return r;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * acp_hw_fini - stop the hardware block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+static int acp_hw_fini(void *handle)
+{
+	int i, ret;
+	struct device *dev;
+
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	for (i = 0; i < ACP_DEVS ; i++) {
+		dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
+		ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev);
+		/* If removal fails, dont giveup and try rest */
+		if (ret)
+			dev_err(dev, "remove dev from genpd failed\n");
+	}
+
+	mfd_remove_devices(adev->acp.parent);
+	kfree(adev->acp.acp_res);
+	kfree(adev->acp.acp_genpd);
+	kfree(adev->acp.acp_cell);
+
+	return 0;
+}
+
+static int acp_suspend(void *handle)
+{
+	return 0;
+}
+
+static int acp_resume(void *handle)
+{
+	int i, ret;
+	struct acp_pm_domain *apd;
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	/* SMU block will power on ACP irrespective of ACP runtime status.
+	 * Power off explicitly based on genpd ACP runtime status so that ACP
+	 * hw and ACP-genpd status are in sync.
+	 * 'suspend_power_off' represents "Power status before system suspend"
+	*/
+	if (adev->acp.acp_genpd->gpd.suspend_power_off == true) {
+		apd = container_of(&adev->acp.acp_genpd->gpd,
+					struct acp_pm_domain, gpd);
+
+		for (i = 4; i >= 0 ; i--) {
+			ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
+			if (ret)
+				pr_err("ACP tile %d tile suspend failed\n", i);
+		}
+	}
+	return 0;
+}
+
+static int acp_early_init(void *handle)
+{
+	return 0;
+}
+
+static bool acp_is_idle(void *handle)
+{
+	return true;
+}
+
+static int acp_wait_for_idle(void *handle)
+{
+	return 0;
+}
+
+static int acp_soft_reset(void *handle)
+{
+	return 0;
+}
+
+static void acp_print_status(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	dev_info(adev->dev, "ACP STATUS\n");
+}
+
+static int acp_set_clockgating_state(void *handle,
+				     enum amd_clockgating_state state)
+{
+	return 0;
+}
+
+static int acp_set_powergating_state(void *handle,
+				     enum amd_powergating_state state)
+{
+	return 0;
+}
+
+const struct amd_ip_funcs acp_ip_funcs = {
+	.early_init = acp_early_init,
+	.late_init = NULL,
+	.sw_init = acp_sw_init,
+	.sw_fini = acp_sw_fini,
+	.hw_init = acp_hw_init,
+	.hw_fini = acp_hw_fini,
+	.suspend = acp_suspend,
+	.resume = acp_resume,
+	.is_idle = acp_is_idle,
+	.wait_for_idle = acp_wait_for_idle,
+	.soft_reset = acp_soft_reset,
+	.print_status = acp_print_status,
+	.set_clockgating_state = acp_set_clockgating_state,
+	.set_powergating_state = acp_set_powergating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
new file mode 100644
index 000000000000..f6e32a639107
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_ACP_H__
+#define __AMDGPU_ACP_H__
+
+#include <linux/mfd/core.h>
+
+struct amdgpu_acp {
+	struct device *parent;
+	void *cgs_device;
+	struct amd_acp_private *private;
+	struct mfd_cell *acp_cell;
+	struct resource *acp_res;
+	struct acp_pm_domain *acp_genpd;
+};
+
+extern const struct amd_ip_funcs acp_ip_funcs;
+
+#endif /* __AMDGPU_ACP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 84d68d658f8a..32809f749903 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -30,25 +30,38 @@ const struct kfd2kgd_calls *kfd2kgd;
 const struct kgd2kfd_calls *kgd2kfd;
 bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
 
-bool amdgpu_amdkfd_init(void)
+int amdgpu_amdkfd_init(void)
 {
+	int ret;
+
 #if defined(CONFIG_HSA_AMD_MODULE)
-	bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
+	int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
 
 	kgd2kfd_init_p = symbol_request(kgd2kfd_init);
 
 	if (kgd2kfd_init_p == NULL)
-		return false;
+		return -ENOENT;
+
+	ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
+	if (ret) {
+		symbol_put(kgd2kfd_init);
+		kgd2kfd = NULL;
+	}
+
+#elif defined(CONFIG_HSA_AMD)
+	ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
+	if (ret)
+		kgd2kfd = NULL;
+
+#else
+	ret = -ENOENT;
 #endif
-	return true;
+
+	return ret;
 }
 
 bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
 {
-#if defined(CONFIG_HSA_AMD_MODULE)
-	bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
-#endif
-
 	switch (rdev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_KAVERI:
@@ -62,35 +75,7 @@ bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
 		return false;
 	}
 
-#if defined(CONFIG_HSA_AMD_MODULE)
-	kgd2kfd_init_p = symbol_request(kgd2kfd_init);
-
-	if (kgd2kfd_init_p == NULL) {
-		kfd2kgd = NULL;
-		return false;
-	}
-
-	if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
-		symbol_put(kgd2kfd_init);
-		kfd2kgd = NULL;
-		kgd2kfd = NULL;
-
-		return false;
-	}
-
 	return true;
-#elif defined(CONFIG_HSA_AMD)
-	if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
-		kfd2kgd = NULL;
-		kgd2kfd = NULL;
-		return false;
-	}
-
-	return true;
-#else
-	kfd2kgd = NULL;
-	return false;
-#endif
 }
 
 void amdgpu_amdkfd_fini(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index a8be765542e6..de530f68d4e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -36,7 +36,7 @@ struct kgd_mem {
 	void *cpu_ptr;
 };
 
-bool amdgpu_amdkfd_init(void);
+int amdgpu_amdkfd_init(void);
 void amdgpu_amdkfd_fini(void);
 
 bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 9416e0f5c1db..84b0ce39ee14 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1514,6 +1514,19 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
 	return -EINVAL;
 }
 
+bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev)
+{
+	int index = GetIndexIntoMasterTable(DATA, GPUVirtualizationInfo);
+	u8 frev, crev;
+	u16 data_offset, size;
+
+	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+					  &frev, &crev, &data_offset))
+		return true;
+
+	return false;
+}
+
 void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock)
 {
 	uint32_t bios_6_scratch;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
index 0ebb959ea435..9e1442053fe4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -196,6 +196,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
 				      u8 module_index,
 				      struct atom_mc_reg_table *reg_table);
 
+bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
+
 void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
 void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev);
 void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 3c895863fcf5..0020a0ea43ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -63,6 +63,10 @@ bool amdgpu_has_atpx(void) {
 	return amdgpu_atpx_priv.atpx_detected;
 }
 
+bool amdgpu_has_atpx_dgpu_power_cntl(void) {
+	return amdgpu_atpx_priv.atpx.functions.power_cntl;
+}
+
 /**
  * amdgpu_atpx_call - call an ATPX method
  *
@@ -142,10 +146,6 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
  */
 static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	atpx->functions.power_cntl = true;
-
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
@@ -552,13 +552,14 @@ static bool amdgpu_atpx_detect(void)
 void amdgpu_register_atpx_handler(void)
 {
 	bool r;
+	enum vga_switcheroo_handler_flags_t handler_flags = 0;
 
 	/* detect if we have any ATPX + 2 VGA in the system */
 	r = amdgpu_atpx_detect();
 	if (!r)
 		return;
 
-	vga_switcheroo_register_handler(&amdgpu_atpx_handler);
+	vga_switcheroo_register_handler(&amdgpu_atpx_handler, handler_flags);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index f82a2dd83874..eacd810fc09b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -32,6 +32,9 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
+#define AMDGPU_BO_LIST_MAX_PRIORITY	32u
+#define AMDGPU_BO_LIST_NUM_BUCKETS	(AMDGPU_BO_LIST_MAX_PRIORITY + 1)
+
 static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
 				 struct amdgpu_bo_list **result,
 				 int *id)
@@ -88,8 +91,9 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	struct amdgpu_bo *gws_obj = adev->gds.gws_gfx_bo;
 	struct amdgpu_bo *oa_obj = adev->gds.oa_gfx_bo;
 
-	bool has_userptr = false;
+	unsigned last_entry = 0, first_userptr = num_entries;
 	unsigned i;
+	int r;
 
 	array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
 	if (!array)
@@ -97,33 +101,43 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
 
 	for (i = 0; i < num_entries; ++i) {
-		struct amdgpu_bo_list_entry *entry = &array[i];
+		struct amdgpu_bo_list_entry *entry;
 		struct drm_gem_object *gobj;
+		struct amdgpu_bo *bo;
+		struct mm_struct *usermm;
 
 		gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle);
-		if (!gobj)
+		if (!gobj) {
+			r = -ENOENT;
 			goto error_free;
+		}
 
-		entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+		bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
 		drm_gem_object_unreference_unlocked(gobj);
-		entry->priority = info[i].bo_priority;
-		entry->prefered_domains = entry->robj->initial_domain;
-		entry->allowed_domains = entry->prefered_domains;
-		if (entry->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
-			entry->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
-		if (amdgpu_ttm_tt_has_userptr(entry->robj->tbo.ttm)) {
-			has_userptr = true;
-			entry->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
-			entry->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+
+		usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
+		if (usermm) {
+			if (usermm != current->mm) {
+				amdgpu_bo_unref(&bo);
+				r = -EPERM;
+				goto error_free;
+			}
+			entry = &array[--first_userptr];
+		} else {
+			entry = &array[last_entry++];
 		}
+
+		entry->robj = bo;
+		entry->priority = min(info[i].bo_priority,
+				      AMDGPU_BO_LIST_MAX_PRIORITY);
 		entry->tv.bo = &entry->robj->tbo;
 		entry->tv.shared = true;
 
-		if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
+		if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
 			gds_obj = entry->robj;
-		if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GWS)
+		if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS)
 			gws_obj = entry->robj;
-		if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
+		if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
 			oa_obj = entry->robj;
 
 		trace_amdgpu_bo_list_set(list, entry->robj);
@@ -137,15 +151,17 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	list->gds_obj = gds_obj;
 	list->gws_obj = gws_obj;
 	list->oa_obj = oa_obj;
-	list->has_userptr = has_userptr;
+	list->first_userptr = first_userptr;
 	list->array = array;
 	list->num_entries = num_entries;
 
 	return 0;
 
 error_free:
+	while (i--)
+		amdgpu_bo_unref(&array[i].robj);
 	drm_free_large(array);
-	return -ENOENT;
+	return r;
 }
 
 struct amdgpu_bo_list *
@@ -161,6 +177,37 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
 	return result;
 }
 
+void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
+			     struct list_head *validated)
+{
+	/* This is based on the bucket sort with O(n) time complexity.
+	 * An item with priority "i" is added to bucket[i]. The lists are then
+	 * concatenated in descending order.
+	 */
+	struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
+	unsigned i;
+
+	for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
+		INIT_LIST_HEAD(&bucket[i]);
+
+	/* Since buffers which appear sooner in the relocation list are
+	 * likely to be used more often than buffers which appear later
+	 * in the list, the sort mustn't change the ordering of buffers
+	 * with the same priority, i.e. it must be stable.
+	 */
+	for (i = 0; i < list->num_entries; i++) {
+		unsigned priority = list->array[i].priority;
+
+		list_add_tail(&list->array[i].tv.head,
+			      &bucket[priority]);
+		list->array[i].user_pages = NULL;
+	}
+
+	/* Connect the sorted buckets in the output list. */
+	for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
+		list_splice(&bucket[i], validated);
+}
+
 void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
 {
 	mutex_unlock(&list->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b882e8175615..9392e50a7ba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -25,52 +25,12 @@
  *    Jerome Glisse <glisse@freedesktop.org>
  */
 #include <linux/list_sort.h>
+#include <linux/pagemap.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
-#define AMDGPU_CS_MAX_PRIORITY		32u
-#define AMDGPU_CS_NUM_BUCKETS		(AMDGPU_CS_MAX_PRIORITY + 1)
-
-/* This is based on the bucket sort with O(n) time complexity.
- * An item with priority "i" is added to bucket[i]. The lists are then
- * concatenated in descending order.
- */
-struct amdgpu_cs_buckets {
-	struct list_head bucket[AMDGPU_CS_NUM_BUCKETS];
-};
-
-static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b)
-{
-	unsigned i;
-
-	for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++)
-		INIT_LIST_HEAD(&b->bucket[i]);
-}
-
-static void amdgpu_cs_buckets_add(struct amdgpu_cs_buckets *b,
-				  struct list_head *item, unsigned priority)
-{
-	/* Since buffers which appear sooner in the relocation list are
-	 * likely to be used more often than buffers which appear later
-	 * in the list, the sort mustn't change the ordering of buffers
-	 * with the same priority, i.e. it must be stable.
-	 */
-	list_add_tail(item, &b->bucket[min(priority, AMDGPU_CS_MAX_PRIORITY)]);
-}
-
-static void amdgpu_cs_buckets_get_list(struct amdgpu_cs_buckets *b,
-				       struct list_head *out_list)
-{
-	unsigned i;
-
-	/* Connect the sorted buckets in the output list. */
-	for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) {
-		list_splice(&b->bucket[i], out_list);
-	}
-}
-
 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
 		       u32 ip_instance, u32 ring,
 		       struct amdgpu_ring **out_ring)
@@ -128,6 +88,7 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
 }
 
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
+				      struct amdgpu_user_fence *uf,
 				      struct drm_amdgpu_cs_chunk_fence *fence_data)
 {
 	struct drm_gem_object *gobj;
@@ -139,20 +100,19 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	if (gobj == NULL)
 		return -EINVAL;
 
-	p->uf.bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
-	p->uf.offset = fence_data->offset;
+	uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+	uf->offset = fence_data->offset;
 
-	if (amdgpu_ttm_tt_has_userptr(p->uf.bo->tbo.ttm)) {
+	if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) {
 		drm_gem_object_unreference_unlocked(gobj);
 		return -EINVAL;
 	}
 
-	p->uf_entry.robj = amdgpu_bo_ref(p->uf.bo);
-	p->uf_entry.prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
-	p->uf_entry.allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+	p->uf_entry.robj = amdgpu_bo_ref(uf->bo);
 	p->uf_entry.priority = 0;
 	p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
 	p->uf_entry.tv.shared = true;
+	p->uf_entry.user_pages = NULL;
 
 	drm_gem_object_unreference_unlocked(gobj);
 	return 0;
@@ -160,11 +120,12 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 {
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	union drm_amdgpu_cs *cs = data;
 	uint64_t *chunk_array_user;
 	uint64_t *chunk_array;
-	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	unsigned size;
+	struct amdgpu_user_fence uf = {};
+	unsigned size, num_ibs = 0;
 	int i;
 	int ret;
 
@@ -181,15 +142,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		goto free_chunk;
 	}
 
-	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
-
 	/* get chunks */
-	INIT_LIST_HEAD(&p->validated);
 	chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks);
 	if (copy_from_user(chunk_array, chunk_array_user,
 			   sizeof(uint64_t)*cs->in.num_chunks)) {
 		ret = -EFAULT;
-		goto put_bo_list;
+		goto put_ctx;
 	}
 
 	p->nchunks = cs->in.num_chunks;
@@ -197,7 +155,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 			    GFP_KERNEL);
 	if (!p->chunks) {
 		ret = -ENOMEM;
-		goto put_bo_list;
+		goto put_ctx;
 	}
 
 	for (i = 0; i < p->nchunks; i++) {
@@ -217,7 +175,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 
 		size = p->chunks[i].length_dw;
 		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
-		p->chunks[i].user_ptr = cdata;
 
 		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
 		if (p->chunks[i].kdata == NULL) {
@@ -233,7 +190,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 
 		switch (p->chunks[i].chunk_id) {
 		case AMDGPU_CHUNK_ID_IB:
-			p->num_ibs++;
+			++num_ibs;
 			break;
 
 		case AMDGPU_CHUNK_ID_FENCE:
@@ -243,7 +200,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 				goto free_partial_kdata;
 			}
 
-			ret = amdgpu_cs_user_fence_chunk(p, (void *)p->chunks[i].kdata);
+			ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata);
 			if (ret)
 				goto free_partial_kdata;
 
@@ -258,12 +215,11 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		}
 	}
 
-
-	p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
-	if (!p->ibs) {
-		ret = -ENOMEM;
+	ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job);
+	if (ret)
 		goto free_all_kdata;
-	}
+
+	p->job->uf = uf;
 
 	kfree(chunk_array);
 	return 0;
@@ -274,9 +230,7 @@ free_partial_kdata:
 	for (; i >= 0; i--)
 		drm_free_large(p->chunks[i].kdata);
 	kfree(p->chunks);
-put_bo_list:
-	if (p->bo_list)
-		amdgpu_bo_list_put(p->bo_list);
+put_ctx:
 	amdgpu_ctx_put(p->ctx);
 free_chunk:
 	kfree(chunk_array);
@@ -336,96 +290,198 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
 	return max(bytes_moved_threshold, 1024*1024ull);
 }
 
-int amdgpu_cs_list_validate(struct amdgpu_device *adev,
-			    struct amdgpu_vm *vm,
+int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 			    struct list_head *validated)
 {
 	struct amdgpu_bo_list_entry *lobj;
-	struct amdgpu_bo *bo;
-	u64 bytes_moved = 0, initial_bytes_moved;
-	u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev);
+	u64 initial_bytes_moved;
 	int r;
 
 	list_for_each_entry(lobj, validated, tv.head) {
-		bo = lobj->robj;
-		if (!bo->pin_count) {
-			u32 domain = lobj->prefered_domains;
-			u32 current_domain =
-				amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
-
-			/* Check if this buffer will be moved and don't move it
-			 * if we have moved too many buffers for this IB already.
-			 *
-			 * Note that this allows moving at least one buffer of
-			 * any size, because it doesn't take the current "bo"
-			 * into account. We don't want to disallow buffer moves
-			 * completely.
-			 */
-			if ((lobj->allowed_domains & current_domain) != 0 &&
-			    (domain & current_domain) == 0 && /* will be moved */
-			    bytes_moved > bytes_moved_threshold) {
-				/* don't move it */
-				domain = current_domain;
-			}
+		struct amdgpu_bo *bo = lobj->robj;
+		bool binding_userptr = false;
+		struct mm_struct *usermm;
+		uint32_t domain;
+
+		usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
+		if (usermm && usermm != current->mm)
+			return -EPERM;
+
+		/* Check if we have user pages and nobody bound the BO already */
+		if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) {
+			size_t size = sizeof(struct page *);
+
+			size *= bo->tbo.ttm->num_pages;
+			memcpy(bo->tbo.ttm->pages, lobj->user_pages, size);
+			binding_userptr = true;
+		}
 
-		retry:
-			amdgpu_ttm_placement_from_domain(bo, domain);
-			initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
-			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-			bytes_moved += atomic64_read(&adev->num_bytes_moved) -
-				       initial_bytes_moved;
-
-			if (unlikely(r)) {
-				if (r != -ERESTARTSYS && domain != lobj->allowed_domains) {
-					domain = lobj->allowed_domains;
-					goto retry;
-				}
-				return r;
+		if (bo->pin_count)
+			continue;
+
+		/* Avoid moving this one if we have moved too many buffers
+		 * for this IB already.
+		 *
+		 * Note that this allows moving at least one buffer of
+		 * any size, because it doesn't take the current "bo"
+		 * into account. We don't want to disallow buffer moves
+		 * completely.
+		 */
+		if (p->bytes_moved <= p->bytes_moved_threshold)
+			domain = bo->prefered_domains;
+		else
+			domain = bo->allowed_domains;
+
+	retry:
+		amdgpu_ttm_placement_from_domain(bo, domain);
+		initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+		p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) -
+			       initial_bytes_moved;
+
+		if (unlikely(r)) {
+			if (r != -ERESTARTSYS && domain != bo->allowed_domains) {
+				domain = bo->allowed_domains;
+				goto retry;
 			}
+			return r;
+		}
+
+		if (binding_userptr) {
+			drm_free_large(lobj->user_pages);
+			lobj->user_pages = NULL;
 		}
-		lobj->bo_va = amdgpu_vm_bo_find(vm, bo);
 	}
 	return 0;
 }
 
-static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
+static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
+				union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	struct amdgpu_cs_buckets buckets;
+	struct amdgpu_bo_list_entry *e;
 	struct list_head duplicates;
 	bool need_mmap_lock = false;
-	int i, r;
+	unsigned i, tries = 10;
+	int r;
 
-	if (p->bo_list) {
-		need_mmap_lock = p->bo_list->has_userptr;
-		amdgpu_cs_buckets_init(&buckets);
-		for (i = 0; i < p->bo_list->num_entries; i++)
-			amdgpu_cs_buckets_add(&buckets, &p->bo_list->array[i].tv.head,
-								  p->bo_list->array[i].priority);
+	INIT_LIST_HEAD(&p->validated);
 
-		amdgpu_cs_buckets_get_list(&buckets, &p->validated);
+	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
+	if (p->bo_list) {
+		need_mmap_lock = p->bo_list->first_userptr !=
+			p->bo_list->num_entries;
+		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 	}
 
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
 
-	if (p->uf.bo)
+	if (p->job->uf.bo)
 		list_add(&p->uf_entry.tv.head, &p->validated);
 
 	if (need_mmap_lock)
 		down_read(&current->mm->mmap_sem);
 
-	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
-	if (unlikely(r != 0))
-		goto error_reserve;
+	while (1) {
+		struct list_head need_pages;
+		unsigned i;
+
+		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+					   &duplicates);
+		if (unlikely(r != 0))
+			goto error_free_pages;
+
+		/* Without a BO list we don't have userptr BOs */
+		if (!p->bo_list)
+			break;
+
+		INIT_LIST_HEAD(&need_pages);
+		for (i = p->bo_list->first_userptr;
+		     i < p->bo_list->num_entries; ++i) {
+
+			e = &p->bo_list->array[i];
+
+			if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm,
+				 &e->user_invalidated) && e->user_pages) {
+
+				/* We acquired a page array, but somebody
+				 * invalidated it. Free it an try again
+				 */
+				release_pages(e->user_pages,
+					      e->robj->tbo.ttm->num_pages,
+					      false);
+				drm_free_large(e->user_pages);
+				e->user_pages = NULL;
+			}
+
+			if (e->robj->tbo.ttm->state != tt_bound &&
+			    !e->user_pages) {
+				list_del(&e->tv.head);
+				list_add(&e->tv.head, &need_pages);
+
+				amdgpu_bo_unreserve(e->robj);
+			}
+		}
+
+		if (list_empty(&need_pages))
+			break;
+
+		/* Unreserve everything again. */
+		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+
+		/* We tried to often, just abort */
+		if (!--tries) {
+			r = -EDEADLK;
+			goto error_free_pages;
+		}
+
+		/* Fill the page arrays for all useptrs. */
+		list_for_each_entry(e, &need_pages, tv.head) {
+			struct ttm_tt *ttm = e->robj->tbo.ttm;
+
+			e->user_pages = drm_calloc_large(ttm->num_pages,
+							 sizeof(struct page*));
+			if (!e->user_pages) {
+				r = -ENOMEM;
+				goto error_free_pages;
+			}
+
+			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
+			if (r) {
+				drm_free_large(e->user_pages);
+				e->user_pages = NULL;
+				goto error_free_pages;
+			}
+		}
+
+		/* And try again. */
+		list_splice(&need_pages, &p->validated);
+	}
 
 	amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
 
-	r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates);
+	p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
+	p->bytes_moved = 0;
+
+	r = amdgpu_cs_list_validate(p, &duplicates);
+	if (r)
+		goto error_validate;
+
+	r = amdgpu_cs_list_validate(p, &p->validated);
 	if (r)
 		goto error_validate;
 
-	r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated);
+	if (p->bo_list) {
+		struct amdgpu_vm *vm = &fpriv->vm;
+		unsigned i;
+
+		for (i = 0; i < p->bo_list->num_entries; i++) {
+			struct amdgpu_bo *bo = p->bo_list->array[i].robj;
+
+			p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
+		}
+	}
 
 error_validate:
 	if (r) {
@@ -433,10 +489,26 @@ error_validate:
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
 	}
 
-error_reserve:
+error_free_pages:
+
 	if (need_mmap_lock)
 		up_read(&current->mm->mmap_sem);
 
+	if (p->bo_list) {
+		for (i = p->bo_list->first_userptr;
+		     i < p->bo_list->num_entries; ++i) {
+			e = &p->bo_list->array[i];
+
+			if (!e->user_pages)
+				continue;
+
+			release_pages(e->user_pages,
+				      e->robj->tbo.ttm->num_pages,
+				      false);
+			drm_free_large(e->user_pages);
+		}
+	}
+
 	return r;
 }
 
@@ -447,7 +519,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 
 	list_for_each_entry(e, &p->validated, tv.head) {
 		struct reservation_object *resv = e->robj->tbo.resv;
-		r = amdgpu_sync_resv(p->adev, &p->ibs[0].sync, resv, p->filp);
+		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp);
 
 		if (r)
 			return r;
@@ -510,11 +582,8 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 	for (i = 0; i < parser->nchunks; i++)
 		drm_free_large(parser->chunks[i].kdata);
 	kfree(parser->chunks);
-	if (parser->ibs)
-		for (i = 0; i < parser->num_ibs; i++)
-			amdgpu_ib_free(parser->adev, &parser->ibs[i]);
-	kfree(parser->ibs);
-	amdgpu_bo_unref(&parser->uf.bo);
+	if (parser->job)
+		amdgpu_job_free(parser->job);
 	amdgpu_bo_unref(&parser->uf_entry.robj);
 }
 
@@ -530,7 +599,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
 	if (r)
 		return r;
 
-	r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence);
+	r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);
 	if (r)
 		return r;
 
@@ -556,14 +625,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
 				return r;
 
 			f = bo_va->last_pt_update;
-			r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f);
+			r = amdgpu_sync_fence(adev, &p->job->sync, f);
 			if (r)
 				return r;
 		}
 
 	}
 
-	r = amdgpu_vm_clear_invalids(adev, vm, &p->ibs[0].sync);
+	r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync);
 
 	if (amdgpu_vm_debug && p->bo_list) {
 		/* Invalidate all BOs to test for userspace bugs */
@@ -581,29 +650,25 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
 }
 
 static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
-				 struct amdgpu_cs_parser *parser)
+				 struct amdgpu_cs_parser *p)
 {
-	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
-	struct amdgpu_ring *ring;
+	struct amdgpu_ring *ring = p->job->ring;
 	int i, r;
 
-	if (parser->num_ibs == 0)
-		return 0;
-
 	/* Only for UVD/VCE VM emulation */
-	for (i = 0; i < parser->num_ibs; i++) {
-		ring = parser->ibs[i].ring;
-		if (ring->funcs->parse_cs) {
-			r = amdgpu_ring_parse_cs(ring, parser, i);
+	if (ring->funcs->parse_cs) {
+		for (i = 0; i < p->job->num_ibs; i++) {
+			r = amdgpu_ring_parse_cs(ring, p, i);
 			if (r)
 				return r;
 		}
 	}
 
-	r = amdgpu_bo_vm_update_pte(parser, vm);
+	r = amdgpu_bo_vm_update_pte(p, vm);
 	if (!r)
-		amdgpu_cs_sync_rings(parser);
+		amdgpu_cs_sync_rings(p);
 
 	return r;
 }
@@ -626,14 +691,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	int i, j;
 	int r;
 
-	for (i = 0, j = 0; i < parser->nchunks && j < parser->num_ibs; i++) {
+	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
 		struct amdgpu_cs_chunk *chunk;
 		struct amdgpu_ib *ib;
 		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 		struct amdgpu_ring *ring;
 
 		chunk = &parser->chunks[i];
-		ib = &parser->ibs[j];
+		ib = &parser->job->ibs[j];
 		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
 
 		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
@@ -645,6 +710,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		if (r)
 			return r;
 
+		if (parser->job->ring && parser->job->ring != ring)
+			return -EINVAL;
+
+		parser->job->ring = ring;
+
 		if (ring->funcs->parse_cs) {
 			struct amdgpu_bo_va_mapping *m;
 			struct amdgpu_bo *aobj = NULL;
@@ -673,7 +743,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 			offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE;
 			kptr += chunk_ib->va_start - offset;
 
-			r =  amdgpu_ib_get(ring, NULL, chunk_ib->ib_bytes, ib);
+			r =  amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib);
 			if (r) {
 				DRM_ERROR("Failed to get ib !\n");
 				return r;
@@ -682,7 +752,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
 			amdgpu_bo_kunmap(aobj);
 		} else {
-			r =  amdgpu_ib_get(ring, vm, 0, ib);
+			r =  amdgpu_ib_get(adev, vm, 0, ib);
 			if (r) {
 				DRM_ERROR("Failed to get ib !\n");
 				return r;
@@ -697,15 +767,12 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		j++;
 	}
 
-	if (!parser->num_ibs)
-		return 0;
-
 	/* add GDS resources to first IB */
 	if (parser->bo_list) {
 		struct amdgpu_bo *gds = parser->bo_list->gds_obj;
 		struct amdgpu_bo *gws = parser->bo_list->gws_obj;
 		struct amdgpu_bo *oa = parser->bo_list->oa_obj;
-		struct amdgpu_ib *ib = &parser->ibs[0];
+		struct amdgpu_ib *ib = &parser->job->ibs[0];
 
 		if (gds) {
 			ib->gds_base = amdgpu_bo_gpu_offset(gds);
@@ -721,15 +788,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		}
 	}
 	/* wrap the last IB with user fence */
-	if (parser->uf.bo) {
-		struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1];
+	if (parser->job->uf.bo) {
+		struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1];
 
 		/* UVD & VCE fw doesn't support user fences */
-		if (ib->ring->type == AMDGPU_RING_TYPE_UVD ||
-		    ib->ring->type == AMDGPU_RING_TYPE_VCE)
+		if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
+		    parser->job->ring->type == AMDGPU_RING_TYPE_VCE)
 			return -EINVAL;
 
-		ib->user = &parser->uf;
+		ib->user = &parser->job->uf;
 	}
 
 	return 0;
@@ -739,14 +806,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 				  struct amdgpu_cs_parser *p)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	struct amdgpu_ib *ib;
 	int i, j, r;
 
-	if (!p->num_ibs)
-		return 0;
-
-	/* Add dependencies to first IB */
-	ib = &p->ibs[0];
 	for (i = 0; i < p->nchunks; ++i) {
 		struct drm_amdgpu_cs_chunk_dep *deps;
 		struct amdgpu_cs_chunk *chunk;
@@ -784,7 +845,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 				return r;
 
 			} else if (fence) {
-				r = amdgpu_sync_fence(adev, &ib->sync, fence);
+				r = amdgpu_sync_fence(adev, &p->job->sync,
+						      fence);
 				fence_put(fence);
 				amdgpu_ctx_put(ctx);
 				if (r)
@@ -796,15 +858,36 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int amdgpu_cs_free_job(struct amdgpu_job *job)
+static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
+			    union drm_amdgpu_cs *cs)
 {
-	int i;
-	if (job->ibs)
-		for (i = 0; i < job->num_ibs; i++)
-			amdgpu_ib_free(job->adev, &job->ibs[i]);
-	kfree(job->ibs);
-	if (job->uf.bo)
-		amdgpu_bo_unref(&job->uf.bo);
+	struct amdgpu_ring *ring = p->job->ring;
+	struct amd_sched_fence *fence;
+	struct amdgpu_job *job;
+
+	job = p->job;
+	p->job = NULL;
+
+	job->base.sched = &ring->sched;
+	job->base.s_entity = &p->ctx->rings[ring->idx].entity;
+	job->owner = p->filp;
+
+	fence = amd_sched_fence_create(job->base.s_entity, p->filp);
+	if (!fence) {
+		amdgpu_job_free(job);
+		return -ENOMEM;
+	}
+
+	job->base.s_fence = fence;
+	p->fence = fence_get(&fence->base);
+
+	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring,
+					      &fence->base);
+	job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
+
+	trace_amdgpu_cs_ioctl(job);
+	amd_sched_entity_push_job(&job->base);
+
 	return 0;
 }
 
@@ -829,7 +912,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		r = amdgpu_cs_handle_lockup(adev, r);
 		return r;
 	}
-	r = amdgpu_cs_parser_relocs(&parser);
+	r = amdgpu_cs_parser_bos(&parser, data);
 	if (r == -ENOMEM)
 		DRM_ERROR("Not enough memory for command submission!\n");
 	else if (r && r != -ERESTARTSYS)
@@ -848,68 +931,14 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	if (r)
 		goto out;
 
-	for (i = 0; i < parser.num_ibs; i++)
+	for (i = 0; i < parser.job->num_ibs; i++)
 		trace_amdgpu_cs(&parser, i);
 
 	r = amdgpu_cs_ib_vm_chunk(adev, &parser);
 	if (r)
 		goto out;
 
-	if (amdgpu_enable_scheduler && parser.num_ibs) {
-		struct amdgpu_ring * ring = parser.ibs->ring;
-		struct amd_sched_fence *fence;
-		struct amdgpu_job *job;
-
-		job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
-		if (!job) {
-			r = -ENOMEM;
-			goto out;
-		}
-
-		job->base.sched = &ring->sched;
-		job->base.s_entity = &parser.ctx->rings[ring->idx].entity;
-		job->adev = parser.adev;
-		job->owner = parser.filp;
-		job->free_job = amdgpu_cs_free_job;
-
-		job->ibs = parser.ibs;
-		job->num_ibs = parser.num_ibs;
-		parser.ibs = NULL;
-		parser.num_ibs = 0;
-
-		if (job->ibs[job->num_ibs - 1].user) {
-			job->uf = parser.uf;
-			job->ibs[job->num_ibs - 1].user = &job->uf;
-			parser.uf.bo = NULL;
-		}
-
-		fence = amd_sched_fence_create(job->base.s_entity,
-					       parser.filp);
-		if (!fence) {
-			r = -ENOMEM;
-			amdgpu_cs_free_job(job);
-			kfree(job);
-			goto out;
-		}
-		job->base.s_fence = fence;
-		parser.fence = fence_get(&fence->base);
-
-		cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring,
-						      &fence->base);
-		job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
-
-		trace_amdgpu_cs_ioctl(job);
-		amd_sched_entity_push_job(&job->base);
-
-	} else {
-		struct amdgpu_fence *fence;
-
-		r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs,
-				       parser.filp);
-		fence = parser.ibs[parser.num_ibs - 1].fence;
-		parser.fence = fence_get(&fence->base);
-		cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence;
-	}
+	r = amdgpu_cs_submit(&parser, cs);
 
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
@@ -980,30 +1009,36 @@ struct amdgpu_bo_va_mapping *
 amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 		       uint64_t addr, struct amdgpu_bo **bo)
 {
-	struct amdgpu_bo_list_entry *reloc;
 	struct amdgpu_bo_va_mapping *mapping;
+	unsigned i;
+
+	if (!parser->bo_list)
+		return NULL;
 
 	addr /= AMDGPU_GPU_PAGE_SIZE;
 
-	list_for_each_entry(reloc, &parser->validated, tv.head) {
-		if (!reloc->bo_va)
+	for (i = 0; i < parser->bo_list->num_entries; i++) {
+		struct amdgpu_bo_list_entry *lobj;
+
+		lobj = &parser->bo_list->array[i];
+		if (!lobj->bo_va)
 			continue;
 
-		list_for_each_entry(mapping, &reloc->bo_va->valids, list) {
+		list_for_each_entry(mapping, &lobj->bo_va->valids, list) {
 			if (mapping->it.start > addr ||
 			    addr > mapping->it.last)
 				continue;
 
-			*bo = reloc->bo_va->bo;
+			*bo = lobj->bo_va->bo;
 			return mapping;
 		}
 
-		list_for_each_entry(mapping, &reloc->bo_va->invalids, list) {
+		list_for_each_entry(mapping, &lobj->bo_va->invalids, list) {
 			if (mapping->it.start > addr ||
 			    addr > mapping->it.last)
 				continue;
 
-			*bo = reloc->bo_va->bo;
+			*bo = lobj->bo_va->bo;
 			return mapping;
 		}
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 17d1fb12128a..17e13621fae9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -25,8 +25,7 @@
 #include <drm/drmP.h>
 #include "amdgpu.h"
 
-int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
-		    struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
 {
 	unsigned i, j;
 	int r;
@@ -35,44 +34,38 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
 	ctx->adev = adev;
 	kref_init(&ctx->refcount);
 	spin_lock_init(&ctx->ring_lock);
-	ctx->fences = kzalloc(sizeof(struct fence *) * amdgpu_sched_jobs *
-			 AMDGPU_MAX_RINGS, GFP_KERNEL);
+	ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
+			      sizeof(struct fence*), GFP_KERNEL);
 	if (!ctx->fences)
 		return -ENOMEM;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		ctx->rings[i].sequence = 1;
-		ctx->rings[i].fences = (void *)ctx->fences + sizeof(struct fence *) *
-			amdgpu_sched_jobs * i;
+		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
 	}
-	if (amdgpu_enable_scheduler) {
-		/* create context entity for each ring */
-		for (i = 0; i < adev->num_rings; i++) {
-			struct amd_sched_rq *rq;
-			if (pri >= AMD_SCHED_MAX_PRIORITY) {
-				kfree(ctx->fences);
-				return -EINVAL;
-			}
-			rq = &adev->rings[i]->sched.sched_rq[pri];
-			r = amd_sched_entity_init(&adev->rings[i]->sched,
-						  &ctx->rings[i].entity,
-						  rq, amdgpu_sched_jobs);
-			if (r)
-				break;
-		}
-
-		if (i < adev->num_rings) {
-			for (j = 0; j < i; j++)
-				amd_sched_entity_fini(&adev->rings[j]->sched,
-						      &ctx->rings[j].entity);
-			kfree(ctx->fences);
-			return r;
-		}
+	/* create context entity for each ring */
+	for (i = 0; i < adev->num_rings; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+		struct amd_sched_rq *rq;
+
+		rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+		r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
+					  rq, amdgpu_sched_jobs);
+		if (r)
+			break;
+	}
+
+	if (i < adev->num_rings) {
+		for (j = 0; j < i; j++)
+			amd_sched_entity_fini(&adev->rings[j]->sched,
+					      &ctx->rings[j].entity);
+		kfree(ctx->fences);
+		return r;
 	}
 	return 0;
 }
 
-void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
+static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 {
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;
@@ -85,11 +78,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 			fence_put(ctx->rings[i].fences[j]);
 	kfree(ctx->fences);
 
-	if (amdgpu_enable_scheduler) {
-		for (i = 0; i < adev->num_rings; i++)
-			amd_sched_entity_fini(&adev->rings[i]->sched,
-					      &ctx->rings[i].entity);
-	}
+	for (i = 0; i < adev->num_rings; i++)
+		amd_sched_entity_fini(&adev->rings[i]->sched,
+				      &ctx->rings[i].entity);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -112,7 +103,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 		return r;
 	}
 	*id = (uint32_t)r;
-	r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_NORMAL, ctx);
+	r = amdgpu_ctx_init(adev, ctx);
 	if (r) {
 		idr_remove(&mgr->ctx_handles, *id);
 		*id = 0;
@@ -200,18 +191,18 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 	id = args->in.ctx_id;
 
 	switch (args->in.op) {
-		case AMDGPU_CTX_OP_ALLOC_CTX:
-			r = amdgpu_ctx_alloc(adev, fpriv, &id);
-			args->out.alloc.ctx_id = id;
-			break;
-		case AMDGPU_CTX_OP_FREE_CTX:
-			r = amdgpu_ctx_free(fpriv, id);
-			break;
-		case AMDGPU_CTX_OP_QUERY_STATE:
-			r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
-			break;
-		default:
-			return -EINVAL;
+	case AMDGPU_CTX_OP_ALLOC_CTX:
+		r = amdgpu_ctx_alloc(adev, fpriv, &id);
+		args->out.alloc.ctx_id = id;
+		break;
+	case AMDGPU_CTX_OP_FREE_CTX:
+		r = amdgpu_ctx_free(fpriv, id);
+		break;
+	case AMDGPU_CTX_OP_QUERY_STATE:
+		r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
+		break;
+	default:
+		return -EINVAL;
 	}
 
 	return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 51bfc114584e..612117478b57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -62,6 +62,12 @@ static const char *amdgpu_asic_name[] = {
 	"LAST",
 };
 
+#if defined(CONFIG_VGA_SWITCHEROO)
+bool amdgpu_has_atpx_dgpu_power_cntl(void);
+#else
+static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+#endif
+
 bool amdgpu_device_is_px(struct drm_device *dev)
 {
 	struct amdgpu_device *adev = dev->dev_private;
@@ -636,31 +642,6 @@ bool amdgpu_card_posted(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_boot_test_post_card - check and possibly initialize the hw
- *
- * @adev: amdgpu_device pointer
- *
- * Check if the asic is initialized and if not, attempt to initialize
- * it (all asics).
- * Returns true if initialized or false if not.
- */
-bool amdgpu_boot_test_post_card(struct amdgpu_device *adev)
-{
-	if (amdgpu_card_posted(adev))
-		return true;
-
-	if (adev->bios) {
-		DRM_INFO("GPU not posted. posting now...\n");
-		if (adev->is_atom_bios)
-			amdgpu_atom_asic_init(adev->mode_info.atom_context);
-		return true;
-	} else {
-		dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
-		return false;
-	}
-}
-
-/**
  * amdgpu_dummy_page_init - init dummy page used by the driver
  *
  * @adev: amdgpu_device pointer
@@ -959,12 +940,6 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
 			 amdgpu_sched_jobs);
 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
 	}
-	/* vramlimit must be a power of two */
-	if (!amdgpu_check_pot_argument(amdgpu_vram_limit)) {
-		dev_warn(adev->dev, "vram limit (%d) must be a power of 2\n",
-				amdgpu_vram_limit);
-		amdgpu_vram_limit = 0;
-	}
 
 	if (amdgpu_gart_size != -1) {
 		/* gtt size must be power of two and greater or equal to 32M */
@@ -1434,7 +1409,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->mman.buffer_funcs = NULL;
 	adev->mman.buffer_funcs_ring = NULL;
 	adev->vm_manager.vm_pte_funcs = NULL;
-	adev->vm_manager.vm_pte_funcs_ring = NULL;
+	adev->vm_manager.vm_pte_num_rings = 0;
 	adev->gart.gart_funcs = NULL;
 	adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
 
@@ -1455,9 +1430,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	/* mutex initialization are all done here so we
 	 * can recall function without having locking issues */
-	mutex_init(&adev->ring_lock);
+	mutex_init(&adev->vm_manager.lock);
 	atomic_set(&adev->irq.ih.lock, 0);
-	mutex_init(&adev->gem.mutex);
 	mutex_init(&adev->pm.mutex);
 	mutex_init(&adev->gfx.gpu_clock_mutex);
 	mutex_init(&adev->srbm_mutex);
@@ -1511,7 +1485,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	if (amdgpu_runtime_pm == 1)
 		runtime = true;
-	if (amdgpu_device_is_px(ddev))
+	if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl())
 		runtime = true;
 	vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime);
 	if (runtime)
@@ -1531,8 +1505,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		return r;
 	}
 
+	/* See if the asic supports SR-IOV */
+	adev->virtualization.supports_sr_iov =
+		amdgpu_atombios_has_gpu_virtualization_table(adev);
+
 	/* Post card if necessary */
-	if (!amdgpu_card_posted(adev)) {
+	if (!amdgpu_card_posted(adev) ||
+	    adev->virtualization.supports_sr_iov) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
 			return -EINVAL;
@@ -1577,11 +1556,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		return r;
 	}
 
-	r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_KERNEL, &adev->kernel_ctx);
-	if (r) {
-		dev_err(adev->dev, "failed to create kernel context (%d).\n", r);
-		return r;
-	}
 	r = amdgpu_ib_ring_tests(adev);
 	if (r)
 		DRM_ERROR("ib ring test failed (%d).\n", r);
@@ -1645,7 +1619,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	adev->shutdown = true;
 	/* evict vram memory */
 	amdgpu_bo_evict_vram(adev);
-	amdgpu_ctx_fini(&adev->kernel_ctx);
 	amdgpu_ib_pool_fini(adev);
 	amdgpu_fence_driver_fini(adev);
 	amdgpu_fbdev_fini(adev);
@@ -1894,6 +1867,9 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 
 retry:
 	r = amdgpu_asic_reset(adev);
+	/* post card */
+	amdgpu_atom_asic_init(adev->mode_info.atom_context);
+
 	if (!r) {
 		dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
 		r = amdgpu_resume(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 1846d65b7285..f0ed974bd4e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -35,32 +35,30 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 
-static void amdgpu_flip_wait_fence(struct amdgpu_device *adev,
-				   struct fence **f)
+static void amdgpu_flip_callback(struct fence *f, struct fence_cb *cb)
 {
-	struct amdgpu_fence *fence;
-	long r;
+	struct amdgpu_flip_work *work =
+		container_of(cb, struct amdgpu_flip_work, cb);
 
-	if (*f == NULL)
-		return;
+	fence_put(f);
+	schedule_work(&work->flip_work);
+}
 
-	fence = to_amdgpu_fence(*f);
-	if (fence) {
-		r = fence_wait(&fence->base, false);
-		if (r == -EDEADLK)
-			r = amdgpu_gpu_reset(adev);
-	} else
-		r = fence_wait(*f, false);
+static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
+				     struct fence **f)
+{
+	struct fence *fence= *f;
 
-	if (r)
-		DRM_ERROR("failed to wait on page flip fence (%ld)!\n", r);
+	if (fence == NULL)
+		return false;
 
-	/* We continue with the page flip even if we failed to wait on
-	 * the fence, otherwise the DRM core and userspace will be
-	 * confused about which BO the CRTC is scanning out
-	 */
-	fence_put(*f);
 	*f = NULL;
+
+	if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
+		return true;
+
+	fence_put(*f);
+	return false;
 }
 
 static void amdgpu_flip_work_func(struct work_struct *__work)
@@ -76,9 +74,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 	int vpos, hpos, stat, min_udelay = 0;
 	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
-	amdgpu_flip_wait_fence(adev, &work->excl);
+	if (amdgpu_flip_handle_fence(work, &work->excl))
+		return;
+
 	for (i = 0; i < work->shared_count; ++i)
-		amdgpu_flip_wait_fence(adev, &work->shared[i]);
+		if (amdgpu_flip_handle_fence(work, &work->shared[i]))
+			return;
 
 	/* We borrow the event spin lock for protecting flip_status */
 	spin_lock_irqsave(&crtc->dev->event_lock, flags);
@@ -130,12 +131,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 				 vblank->framedur_ns / 1000,
 				 vblank->linedur_ns / 1000, stat, vpos, hpos);
 
-	/* do the flip (mmio) */
-	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
 	/* set the flip status */
 	amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
-
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+	/* Do the flip (mmio) */
+	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
 }
 
 /*
@@ -254,7 +255,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
 	/* update crtc fb */
 	crtc->primary->fb = fb;
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-	queue_work(amdgpu_crtc->pflip_queue, &work->flip_work);
+	amdgpu_flip_work_func(&work->flip_work);
 	return 0;
 
 vblank_cleanup:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 9ef1db87cf26..f1e17d60055a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -69,7 +69,6 @@ int amdgpu_dpm = -1;
 int amdgpu_smc_load_fw = 1;
 int amdgpu_aspm = -1;
 int amdgpu_runtime_pm = -1;
-int amdgpu_hard_reset = 0;
 unsigned amdgpu_ip_block_mask = 0xffffffff;
 int amdgpu_bapm = -1;
 int amdgpu_deep_color = 0;
@@ -78,10 +77,8 @@ int amdgpu_vm_block_size = -1;
 int amdgpu_vm_fault_stop = 0;
 int amdgpu_vm_debug = 0;
 int amdgpu_exp_hw_support = 0;
-int amdgpu_enable_scheduler = 1;
 int amdgpu_sched_jobs = 32;
 int amdgpu_sched_hw_submission = 2;
-int amdgpu_enable_semaphores = 0;
 int amdgpu_powerplay = -1;
 unsigned amdgpu_pcie_gen_cap = 0;
 unsigned amdgpu_pcie_lane_cap = 0;
@@ -128,9 +125,6 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);
 MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)");
 module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
 
-MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))");
-module_param_named(hard_reset, amdgpu_hard_reset, int, 0444);
-
 MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
 module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
 
@@ -155,18 +149,12 @@ module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
 MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
 module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
 
-MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable (default), 0 = disable)");
-module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444);
-
 MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");
 module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
 
 MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
 module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
 
-MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable, 0 = disable (default))");
-module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644);
-
 #ifdef CONFIG_DRM_AMD_POWERPLAY
 MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
 module_param_named(powerplay, amdgpu_powerplay, int, 0444);
@@ -330,6 +318,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 		return -ENODEV;
 	}
 
+	/*
+	 * Initialize amdkfd before starting radeon. If it was not loaded yet,
+	 * defer radeon probing
+	 */
+	ret = amdgpu_amdkfd_init();
+	if (ret == -EPROBE_DEFER)
+		return ret;
+
 	/* Get rid of things like offb */
 	ret = amdgpu_kick_out_firmware_fb(pdev);
 	if (ret)
@@ -559,6 +555,7 @@ static struct pci_driver amdgpu_kms_pci_driver = {
 
 static int __init amdgpu_init(void)
 {
+	amdgpu_sync_init();
 #ifdef CONFIG_VGA_CONSOLE
 	if (vgacon_text_force()) {
 		DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
@@ -572,8 +569,6 @@ static int __init amdgpu_init(void)
 	driver->num_ioctls = amdgpu_max_kms_ioctl;
 	amdgpu_register_atpx_handler();
 
-	amdgpu_amdkfd_init();
-
 	/* let modprobe override vga console setting */
 	return drm_pci_init(driver, pdriver);
 }
@@ -583,6 +578,7 @@ static void __exit amdgpu_exit(void)
 	amdgpu_amdkfd_fini();
 	drm_pci_exit(driver, pdriver);
 	amdgpu_unregister_atpx_handler();
+	amdgpu_sync_fini();
 }
 
 module_init(amdgpu_init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3671f9f220bd..4303b447efe8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,9 +47,30 @@
  * that the the relevant GPU caches have been flushed.
  */
 
+struct amdgpu_fence {
+	struct fence base;
+
+	/* RB, DMA, etc. */
+	struct amdgpu_ring		*ring;
+};
+
 static struct kmem_cache *amdgpu_fence_slab;
 static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
 
+/*
+ * Cast helper
+ */
+static const struct fence_ops amdgpu_fence_ops;
+static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
+{
+	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
+
+	if (__f->base.ops == &amdgpu_fence_ops)
+		return __f;
+
+	return NULL;
+}
+
 /**
  * amdgpu_fence_write - write a fence value
  *
@@ -82,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
 	if (drv->cpu_addr)
 		seq = le32_to_cpu(*drv->cpu_addr);
 	else
-		seq = lower_32_bits(atomic64_read(&drv->last_seq));
+		seq = atomic_read(&drv->last_seq);
 
 	return seq;
 }
@@ -91,32 +112,41 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  * amdgpu_fence_emit - emit a fence on the requested ring
  *
  * @ring: ring the fence is associated with
- * @owner: creator of the fence
- * @fence: amdgpu fence object
+ * @f: resulting fence object
  *
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
-		      struct amdgpu_fence **fence)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
 {
 	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_fence *fence;
+	struct fence **ptr;
+	uint32_t seq;
 
-	/* we are protected by the ring emission mutex */
-	*fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
-	if ((*fence) == NULL) {
+	fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
+	if (fence == NULL)
 		return -ENOMEM;
-	}
-	(*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx];
-	(*fence)->ring = ring;
-	(*fence)->owner = owner;
-	fence_init(&(*fence)->base, &amdgpu_fence_ops,
-		&ring->fence_drv.fence_queue.lock,
-		adev->fence_context + ring->idx,
-		(*fence)->seq);
+
+	seq = ++ring->fence_drv.sync_seq;
+	fence->ring = ring;
+	fence_init(&fence->base, &amdgpu_fence_ops,
+		   &ring->fence_drv.lock,
+		   adev->fence_context + ring->idx,
+		   seq);
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
-			       (*fence)->seq,
-			       AMDGPU_FENCE_FLAG_INT);
+			       seq, AMDGPU_FENCE_FLAG_INT);
+
+	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+	/* This function can't be called concurrently anyway, otherwise
+	 * emitting the fence would mess up the hardware ring buffer.
+	 */
+	BUG_ON(rcu_dereference_protected(*ptr, 1));
+
+	rcu_assign_pointer(*ptr, fence_get(&fence->base));
+
+	*f = &fence->base;
+
 	return 0;
 }
 
@@ -134,89 +164,48 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
 }
 
 /**
- * amdgpu_fence_activity - check for fence activity
+ * amdgpu_fence_process - check for fence activity
  *
  * @ring: pointer to struct amdgpu_ring
  *
  * Checks the current fence value and calculates the last
- * signalled fence value. Returns true if activity occured
- * on the ring, and the fence_queue should be waken up.
+ * signalled fence value. Wakes the fence queue if the
+ * sequence number has increased.
  */
-static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
+void amdgpu_fence_process(struct amdgpu_ring *ring)
 {
-	uint64_t seq, last_seq, last_emitted;
-	unsigned count_loop = 0;
-	bool wake = false;
-
-	/* Note there is a scenario here for an infinite loop but it's
-	 * very unlikely to happen. For it to happen, the current polling
-	 * process need to be interrupted by another process and another
-	 * process needs to update the last_seq btw the atomic read and
-	 * xchg of the current process.
-	 *
-	 * More over for this to go in infinite loop there need to be
-	 * continuously new fence signaled ie amdgpu_fence_read needs
-	 * to return a different value each time for both the currently
-	 * polling process and the other process that xchg the last_seq
-	 * btw atomic read and xchg of the current process. And the
-	 * value the other process set as last seq must be higher than
-	 * the seq value we just read. Which means that current process
-	 * need to be interrupted after amdgpu_fence_read and before
-	 * atomic xchg.
-	 *
-	 * To be even more safe we count the number of time we loop and
-	 * we bail after 10 loop just accepting the fact that we might
-	 * have temporarly set the last_seq not to the true real last
-	 * seq but to an older one.
-	 */
-	last_seq = atomic64_read(&ring->fence_drv.last_seq);
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	uint32_t seq, last_seq;
+	int r;
+
 	do {
-		last_emitted = ring->fence_drv.sync_seq[ring->idx];
+		last_seq = atomic_read(&ring->fence_drv.last_seq);
 		seq = amdgpu_fence_read(ring);
-		seq |= last_seq & 0xffffffff00000000LL;
-		if (seq < last_seq) {
-			seq &= 0xffffffff;
-			seq |= last_emitted & 0xffffffff00000000LL;
-		}
 
-		if (seq <= last_seq || seq > last_emitted) {
-			break;
-		}
-		/* If we loop over we don't want to return without
-		 * checking if a fence is signaled as it means that the
-		 * seq we just read is different from the previous on.
-		 */
-		wake = true;
-		last_seq = seq;
-		if ((count_loop++) > 10) {
-			/* We looped over too many time leave with the
-			 * fact that we might have set an older fence
-			 * seq then the current real last seq as signaled
-			 * by the hw.
-			 */
-			break;
-		}
-	} while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
+	} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
 
-	if (seq < last_emitted)
+	if (seq != ring->fence_drv.sync_seq)
 		amdgpu_fence_schedule_fallback(ring);
 
-	return wake;
-}
+	while (last_seq != seq) {
+		struct fence *fence, **ptr;
 
-/**
- * amdgpu_fence_process - process a fence
- *
- * @adev: amdgpu_device pointer
- * @ring: ring index the fence is associated with
- *
- * Checks the current fence value and wakes the fence queue
- * if the sequence number has increased (all asics).
- */
-void amdgpu_fence_process(struct amdgpu_ring *ring)
-{
-	if (amdgpu_fence_activity(ring))
-		wake_up_all(&ring->fence_drv.fence_queue);
+		ptr = &drv->fences[++last_seq & drv->num_fences_mask];
+
+		/* There is always exactly one thread signaling this fence slot */
+		fence = rcu_dereference_protected(*ptr, 1);
+		rcu_assign_pointer(*ptr, NULL);
+
+		BUG_ON(!fence);
+
+		r = fence_signal(fence);
+		if (!r)
+			FENCE_TRACE(fence, "signaled from irq context\n");
+		else
+			BUG();
+
+		fence_put(fence);
+	}
 }
 
 /**
@@ -234,83 +223,6 @@ static void amdgpu_fence_fallback(unsigned long arg)
 }
 
 /**
- * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled
- *
- * @ring: ring the fence is associated with
- * @seq: sequence number
- *
- * Check if the last signaled fence sequnce number is >= the requested
- * sequence number (all asics).
- * Returns true if the fence has signaled (current fence value
- * is >= requested value) or false if it has not (current fence
- * value is < the requested value.  Helper function for
- * amdgpu_fence_signaled().
- */
-static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
-{
-	if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
-		return true;
-
-	/* poll new last sequence at least once */
-	amdgpu_fence_process(ring);
-	if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
-		return true;
-
-	return false;
-}
-
-/*
- * amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal
- * @ring: ring to wait on for the seq number
- * @seq: seq number wait for
- *
- * return value:
- * 0: seq signaled, and gpu not hang
- * -EDEADL: GPU hang detected
- * -EINVAL: some paramter is not valid
- */
-static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
-{
-	bool signaled = false;
-
-	BUG_ON(!ring);
-	if (seq > ring->fence_drv.sync_seq[ring->idx])
-		return -EINVAL;
-
-	if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
-		return 0;
-
-	amdgpu_fence_schedule_fallback(ring);
-	wait_event(ring->fence_drv.fence_queue, (
-		   (signaled = amdgpu_fence_seq_signaled(ring, seq))));
-
-	if (signaled)
-		return 0;
-	else
-		return -EDEADLK;
-}
-
-/**
- * amdgpu_fence_wait_next - wait for the next fence to signal
- *
- * @adev: amdgpu device pointer
- * @ring: ring index the fence is associated with
- *
- * Wait for the next fence on the requested ring to signal (all asics).
- * Returns 0 if the next fence has passed, error for all other cases.
- * Caller must hold ring lock.
- */
-int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
-{
-	uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
-
-	if (seq >= ring->fence_drv.sync_seq[ring->idx])
-		return -ENOENT;
-
-	return amdgpu_fence_ring_wait_seq(ring, seq);
-}
-
-/**
  * amdgpu_fence_wait_empty - wait for all fences to signal
  *
  * @adev: amdgpu device pointer
@@ -318,16 +230,28 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
  *
  * Wait for all fences on the requested ring to signal (all asics).
  * Returns 0 if the fences have passed, error for all other cases.
- * Caller must hold ring lock.
  */
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 {
-	uint64_t seq = ring->fence_drv.sync_seq[ring->idx];
+	uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq);
+	struct fence *fence, **ptr;
+	int r;
 
 	if (!seq)
 		return 0;
 
-	return amdgpu_fence_ring_wait_seq(ring, seq);
+	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+	rcu_read_lock();
+	fence = rcu_dereference(*ptr);
+	if (!fence || !fence_get_rcu(fence)) {
+		rcu_read_unlock();
+		return 0;
+	}
+	rcu_read_unlock();
+
+	r = fence_wait(fence, false);
+	fence_put(fence);
+	return r;
 }
 
 /**
@@ -347,75 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
 	 * but it's ok to report slightly wrong fence count here.
 	 */
 	amdgpu_fence_process(ring);
-	emitted = ring->fence_drv.sync_seq[ring->idx]
-		- atomic64_read(&ring->fence_drv.last_seq);
-	/* to avoid 32bits warp around */
-	if (emitted > 0x10000000)
-		emitted = 0x10000000;
-
-	return (unsigned)emitted;
-}
-
-/**
- * amdgpu_fence_need_sync - do we need a semaphore
- *
- * @fence: amdgpu fence object
- * @dst_ring: which ring to check against
- *
- * Check if the fence needs to be synced against another ring
- * (all asics).  If so, we need to emit a semaphore.
- * Returns true if we need to sync with another ring, false if
- * not.
- */
-bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
-			    struct amdgpu_ring *dst_ring)
-{
-	struct amdgpu_fence_driver *fdrv;
-
-	if (!fence)
-		return false;
-
-	if (fence->ring == dst_ring)
-		return false;
-
-	/* we are protected by the ring mutex */
-	fdrv = &dst_ring->fence_drv;
-	if (fence->seq <= fdrv->sync_seq[fence->ring->idx])
-		return false;
-
-	return true;
-}
-
-/**
- * amdgpu_fence_note_sync - record the sync point
- *
- * @fence: amdgpu fence object
- * @dst_ring: which ring to check against
- *
- * Note the sequence number at which point the fence will
- * be synced with the requested ring (all asics).
- */
-void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
-			    struct amdgpu_ring *dst_ring)
-{
-	struct amdgpu_fence_driver *dst, *src;
-	unsigned i;
-
-	if (!fence)
-		return;
-
-	if (fence->ring == dst_ring)
-		return;
-
-	/* we are protected by the ring mutex */
-	src = &fence->ring->fence_drv;
-	dst = &dst_ring->fence_drv;
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		if (i == dst_ring->idx)
-			continue;
-
-		dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
-	}
+	emitted = 0x100000000ull;
+	emitted -= atomic_read(&ring->fence_drv.last_seq);
+	emitted += ACCESS_ONCE(ring->fence_drv.sync_seq);
+	return lower_32_bits(emitted);
 }
 
 /**
@@ -447,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 		ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
 		ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
 	}
-	amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq));
+	amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
 	amdgpu_irq_get(adev, irq_src, irq_type);
 
 	ring->fence_drv.irq_src = irq_src;
@@ -465,47 +324,55 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
  * for the requested ring.
  *
  * @ring: ring to init the fence driver on
+ * @num_hw_submission: number of entries on the hardware queue
  *
  * Init the fence driver for the requested ring (all asics).
  * Helper function for amdgpu_fence_driver_init().
  */
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+				  unsigned num_hw_submission)
 {
-	int i, r;
+	long timeout;
+	int r;
+
+	/* Check that num_hw_submission is a power of two */
+	if ((num_hw_submission & (num_hw_submission - 1)) != 0)
+		return -EINVAL;
 
 	ring->fence_drv.cpu_addr = NULL;
 	ring->fence_drv.gpu_addr = 0;
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
-		ring->fence_drv.sync_seq[i] = 0;
-
-	atomic64_set(&ring->fence_drv.last_seq, 0);
+	ring->fence_drv.sync_seq = 0;
+	atomic_set(&ring->fence_drv.last_seq, 0);
 	ring->fence_drv.initialized = false;
 
 	setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
 		    (unsigned long)ring);
 
-	init_waitqueue_head(&ring->fence_drv.fence_queue);
-
-	if (amdgpu_enable_scheduler) {
-		long timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
-		if (timeout == 0) {
-			/*
-			 * FIXME:
-			 * Delayed workqueue cannot use it directly,
-			 * so the scheduler will not use delayed workqueue if
-			 * MAX_SCHEDULE_TIMEOUT is set.
-			 * Currently keep it simple and silly.
-			 */
-			timeout = MAX_SCHEDULE_TIMEOUT;
-		}
-		r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
-				   amdgpu_sched_hw_submission,
-				   timeout, ring->name);
-		if (r) {
-			DRM_ERROR("Failed to create scheduler on ring %s.\n",
-				  ring->name);
-			return r;
-		}
+	ring->fence_drv.num_fences_mask = num_hw_submission - 1;
+	spin_lock_init(&ring->fence_drv.lock);
+	ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
+					 GFP_KERNEL);
+	if (!ring->fence_drv.fences)
+		return -ENOMEM;
+
+	timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
+	if (timeout == 0) {
+		/*
+		 * FIXME:
+		 * Delayed workqueue cannot use it directly,
+		 * so the scheduler will not use delayed workqueue if
+		 * MAX_SCHEDULE_TIMEOUT is set.
+		 * Currently keep it simple and silly.
+		 */
+		timeout = MAX_SCHEDULE_TIMEOUT;
+	}
+	r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
+			   num_hw_submission,
+			   timeout, ring->name);
+	if (r) {
+		DRM_ERROR("Failed to create scheduler on ring %s.\n",
+			  ring->name);
+		return r;
 	}
 
 	return 0;
@@ -548,11 +415,9 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev)
  */
 void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
 {
-	int i, r;
+	unsigned i, j;
+	int r;
 
-	if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
-		kmem_cache_destroy(amdgpu_fence_slab);
-	mutex_lock(&adev->ring_lock);
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 
@@ -563,14 +428,18 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
 			/* no need to trigger GPU reset as we are unloading */
 			amdgpu_fence_driver_force_completion(adev);
 		}
-		wake_up_all(&ring->fence_drv.fence_queue);
 		amdgpu_irq_put(adev, ring->fence_drv.irq_src,
 			       ring->fence_drv.irq_type);
 		amd_sched_fini(&ring->sched);
 		del_timer_sync(&ring->fence_drv.fallback_timer);
+		for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
+			fence_put(ring->fence_drv.fences[i]);
+		kfree(ring->fence_drv.fences);
 		ring->fence_drv.initialized = false;
 	}
-	mutex_unlock(&adev->ring_lock);
+
+	if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
+		kmem_cache_destroy(amdgpu_fence_slab);
 }
 
 /**
@@ -585,7 +454,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
 {
 	int i, r;
 
-	mutex_lock(&adev->ring_lock);
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 		if (!ring || !ring->fence_drv.initialized)
@@ -602,7 +470,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
 		amdgpu_irq_put(adev, ring->fence_drv.irq_src,
 			       ring->fence_drv.irq_type);
 	}
-	mutex_unlock(&adev->ring_lock);
 }
 
 /**
@@ -621,7 +488,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
 {
 	int i;
 
-	mutex_lock(&adev->ring_lock);
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 		if (!ring || !ring->fence_drv.initialized)
@@ -631,7 +497,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
 		amdgpu_irq_get(adev, ring->fence_drv.irq_src,
 			       ring->fence_drv.irq_type);
 	}
-	mutex_unlock(&adev->ring_lock);
 }
 
 /**
@@ -651,7 +516,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
 		if (!ring || !ring->fence_drv.initialized)
 			continue;
 
-		amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]);
+		amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
 	}
 }
 
@@ -671,103 +536,57 @@ static const char *amdgpu_fence_get_timeline_name(struct fence *f)
 }
 
 /**
- * amdgpu_fence_is_signaled - test if fence is signaled
- *
- * @f: fence to test
+ * amdgpu_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
  *
- * Test the fence sequence number if it is already signaled. If it isn't
- * signaled start fence processing. Returns True if the fence is signaled.
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
  */
-static bool amdgpu_fence_is_signaled(struct fence *f)
+static bool amdgpu_fence_enable_signaling(struct fence *f)
 {
 	struct amdgpu_fence *fence = to_amdgpu_fence(f);
 	struct amdgpu_ring *ring = fence->ring;
 
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return true;
-
-	amdgpu_fence_process(ring);
+	if (!timer_pending(&ring->fence_drv.fallback_timer))
+		amdgpu_fence_schedule_fallback(ring);
 
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return true;
+	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
 
-	return false;
+	return true;
 }
 
 /**
- * amdgpu_fence_check_signaled - callback from fence_queue
+ * amdgpu_fence_free - free up the fence memory
+ *
+ * @rcu: RCU callback head
  *
- * this function is called with fence_queue lock held, which is also used
- * for the fence locking itself, so unlocked variants are used for
- * fence_signal, and remove_wait_queue.
+ * Free up the fence memory after the RCU grace period.
  */
-static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+static void amdgpu_fence_free(struct rcu_head *rcu)
 {
-	struct amdgpu_fence *fence;
-	struct amdgpu_device *adev;
-	u64 seq;
-	int ret;
-
-	fence = container_of(wait, struct amdgpu_fence, fence_wake);
-	adev = fence->ring->adev;
-
-	/*
-	 * We cannot use amdgpu_fence_process here because we're already
-	 * in the waitqueue, in a call from wake_up_all.
-	 */
-	seq = atomic64_read(&fence->ring->fence_drv.last_seq);
-	if (seq >= fence->seq) {
-		ret = fence_signal_locked(&fence->base);
-		if (!ret)
-			FENCE_TRACE(&fence->base, "signaled from irq context\n");
-		else
-			FENCE_TRACE(&fence->base, "was already signaled\n");
-
-		__remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
-		fence_put(&fence->base);
-	} else
-		FENCE_TRACE(&fence->base, "pending\n");
-	return 0;
+	struct fence *f = container_of(rcu, struct fence, rcu);
+	struct amdgpu_fence *fence = to_amdgpu_fence(f);
+	kmem_cache_free(amdgpu_fence_slab, fence);
 }
 
 /**
- * amdgpu_fence_enable_signaling - enable signalling on fence
+ * amdgpu_fence_release - callback that fence can be freed
+ *
  * @fence: fence
  *
- * This function is called with fence_queue lock held, and adds a callback
- * to fence_queue that checks if this fence is signaled, and if so it
- * signals the fence and removes itself.
+ * This function is called when the reference count becomes zero.
+ * It just RCU schedules freeing up the fence.
  */
-static bool amdgpu_fence_enable_signaling(struct fence *f)
-{
-	struct amdgpu_fence *fence = to_amdgpu_fence(f);
-	struct amdgpu_ring *ring = fence->ring;
-
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return false;
-
-	fence->fence_wake.flags = 0;
-	fence->fence_wake.private = NULL;
-	fence->fence_wake.func = amdgpu_fence_check_signaled;
-	__add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
-	fence_get(f);
-	if (!timer_pending(&ring->fence_drv.fallback_timer))
-		amdgpu_fence_schedule_fallback(ring);
-	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
-	return true;
-}
-
 static void amdgpu_fence_release(struct fence *f)
 {
-	struct amdgpu_fence *fence = to_amdgpu_fence(f);
-	kmem_cache_free(amdgpu_fence_slab, fence);
+	call_rcu(&f->rcu, amdgpu_fence_free);
 }
 
-const struct fence_ops amdgpu_fence_ops = {
+static const struct fence_ops amdgpu_fence_ops = {
 	.get_driver_name = amdgpu_fence_get_driver_name,
 	.get_timeline_name = amdgpu_fence_get_timeline_name,
 	.enable_signaling = amdgpu_fence_enable_signaling,
-	.signaled = amdgpu_fence_is_signaled,
 	.wait = fence_default_wait,
 	.release = amdgpu_fence_release,
 };
@@ -781,7 +600,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
-	int i, j;
+	int i;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
@@ -791,31 +610,41 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
 		amdgpu_fence_process(ring);
 
 		seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
-		seq_printf(m, "Last signaled fence 0x%016llx\n",
-			   (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
-		seq_printf(m, "Last emitted        0x%016llx\n",
-			   ring->fence_drv.sync_seq[i]);
-
-		for (j = 0; j < AMDGPU_MAX_RINGS; ++j) {
-			struct amdgpu_ring *other = adev->rings[j];
-			if (i != j && other && other->fence_drv.initialized &&
-			    ring->fence_drv.sync_seq[j])
-				seq_printf(m, "Last sync to ring %d 0x%016llx\n",
-					   j, ring->fence_drv.sync_seq[j]);
-		}
+		seq_printf(m, "Last signaled fence 0x%08x\n",
+			   atomic_read(&ring->fence_drv.last_seq));
+		seq_printf(m, "Last emitted        0x%08x\n",
+			   ring->fence_drv.sync_seq);
 	}
 	return 0;
 }
 
+/**
+ * amdgpu_debugfs_gpu_reset - manually trigger a gpu reset
+ *
+ * Manually trigger a gpu reset at the next fence wait.
+ */
+static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+
+	seq_printf(m, "gpu reset\n");
+	amdgpu_gpu_reset(adev);
+
+	return 0;
+}
+
 static struct drm_info_list amdgpu_debugfs_fence_list[] = {
 	{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
+	{"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
 };
 #endif
 
 int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
 {
 #if defined(CONFIG_DEBUG_FS)
-	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1);
+	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2);
 #else
 	return 0;
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d20c2a8929cb..fa6a27bff298 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -26,6 +26,7 @@
  *          Jerome Glisse
  */
 #include <linux/ktime.h>
+#include <linux/pagemap.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -83,24 +84,32 @@ retry:
 		return r;
 	}
 	*obj = &robj->gem_base;
-	robj->pid = task_pid_nr(current);
-
-	mutex_lock(&adev->gem.mutex);
-	list_add_tail(&robj->list, &adev->gem.objects);
-	mutex_unlock(&adev->gem.mutex);
 
 	return 0;
 }
 
-int amdgpu_gem_init(struct amdgpu_device *adev)
+void amdgpu_gem_force_release(struct amdgpu_device *adev)
 {
-	INIT_LIST_HEAD(&adev->gem.objects);
-	return 0;
-}
+	struct drm_device *ddev = adev->ddev;
+	struct drm_file *file;
 
-void amdgpu_gem_fini(struct amdgpu_device *adev)
-{
-	amdgpu_bo_force_delete(adev);
+	mutex_lock(&ddev->struct_mutex);
+
+	list_for_each_entry(file, &ddev->filelist, lhead) {
+		struct drm_gem_object *gobj;
+		int handle;
+
+		WARN_ONCE(1, "Still active user space clients!\n");
+		spin_lock(&file->table_lock);
+		idr_for_each_entry(&file->object_idr, gobj, handle) {
+			WARN_ONCE(1, "And also active allocations!\n");
+			drm_gem_object_unreference(gobj);
+		}
+		idr_destroy(&file->object_idr);
+		spin_unlock(&file->table_lock);
+	}
+
+	mutex_unlock(&ddev->struct_mutex);
 }
 
 /*
@@ -132,25 +141,40 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
 void amdgpu_gem_object_close(struct drm_gem_object *obj,
 			     struct drm_file *file_priv)
 {
-	struct amdgpu_bo *rbo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = rbo->adev;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = bo->adev;
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
+
+	struct amdgpu_bo_list_entry vm_pd;
+	struct list_head list, duplicates;
+	struct ttm_validate_buffer tv;
+	struct ww_acquire_ctx ticket;
 	struct amdgpu_bo_va *bo_va;
 	int r;
-	r = amdgpu_bo_reserve(rbo, true);
+
+	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
+
+	tv.bo = &bo->tbo;
+	tv.shared = true;
+	list_add(&tv.head, &list);
+
+	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
+
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
 		return;
 	}
-	bo_va = amdgpu_vm_bo_find(vm, rbo);
+	bo_va = amdgpu_vm_bo_find(vm, bo);
 	if (bo_va) {
 		if (--bo_va->ref_count == 0) {
 			amdgpu_vm_bo_rmv(adev, bo_va);
 		}
 	}
-	amdgpu_bo_unreserve(rbo);
+	ttm_eu_backoff_reservation(&ticket, &list);
 }
 
 static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
@@ -235,12 +259,10 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	    AMDGPU_GEM_USERPTR_REGISTER))
 		return -EINVAL;
 
-	if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) && (
-	     !(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
-	     !(args->flags & AMDGPU_GEM_USERPTR_REGISTER))) {
+	if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) &&
+	     !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) {
 
-		/* if we want to write to it we must require anonymous
-		   memory and install a MMU notifier */
+		/* if we want to write to it we must install a MMU notifier */
 		return -EACCES;
 	}
 
@@ -252,6 +274,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 		goto handle_lockup;
 
 	bo = gem_to_amdgpu_bo(gobj);
+	bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
+	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
 	r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags);
 	if (r)
 		goto release_object;
@@ -264,18 +288,23 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 
 	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
 		down_read(&current->mm->mmap_sem);
+
+		r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+						 bo->tbo.ttm->pages);
+		if (r)
+			goto unlock_mmap_sem;
+
 		r = amdgpu_bo_reserve(bo, true);
-		if (r) {
-			up_read(&current->mm->mmap_sem);
-			goto release_object;
-		}
+		if (r)
+			goto free_pages;
 
 		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 		amdgpu_bo_unreserve(bo);
-		up_read(&current->mm->mmap_sem);
 		if (r)
-			goto release_object;
+			goto free_pages;
+
+		up_read(&current->mm->mmap_sem);
 	}
 
 	r = drm_gem_handle_create(filp, gobj, &handle);
@@ -287,6 +316,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	args->handle = handle;
 	return 0;
 
+free_pages:
+	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false);
+
+unlock_mmap_sem:
+	up_read(&current->mm->mmap_sem);
+
 release_object:
 	drm_gem_object_unreference_unlocked(gobj);
 
@@ -308,7 +343,7 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
 		return -ENOENT;
 	}
 	robj = gem_to_amdgpu_bo(gobj);
-	if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm) ||
+	if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
 	    (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
 		drm_gem_object_unreference_unlocked(gobj);
 		return -EPERM;
@@ -559,11 +594,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	tv.shared = true;
 	list_add(&tv.head, &list);
 
-	if (args->operation == AMDGPU_VA_OP_MAP) {
-		tv_pd.bo = &fpriv->vm.page_directory->tbo;
-		tv_pd.shared = true;
-		list_add(&tv_pd.head, &list);
-	}
+	tv_pd.bo = &fpriv->vm.page_directory->tbo;
+	tv_pd.shared = true;
+	list_add(&tv_pd.head, &list);
+
 	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
 	if (r) {
 		drm_gem_object_unreference_unlocked(gobj);
@@ -629,7 +663,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 
 		info.bo_size = robj->gem_base.size;
 		info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
-		info.domains = robj->initial_domain;
+		info.domains = robj->prefered_domains;
 		info.domain_flags = robj->flags;
 		amdgpu_bo_unreserve(robj);
 		if (copy_to_user(out, &info, sizeof(info)))
@@ -637,14 +671,18 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	case AMDGPU_GEM_OP_SET_PLACEMENT:
-		if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) {
+		if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
 			r = -EPERM;
 			amdgpu_bo_unreserve(robj);
 			break;
 		}
-		robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
-						      AMDGPU_GEM_DOMAIN_GTT |
-						      AMDGPU_GEM_DOMAIN_CPU);
+		robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
+							AMDGPU_GEM_DOMAIN_GTT |
+							AMDGPU_GEM_DOMAIN_CPU);
+		robj->allowed_domains = robj->prefered_domains;
+		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
+
 		amdgpu_bo_unreserve(robj);
 		break;
 	default:
@@ -689,38 +727,73 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 }
 
 #if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
+{
+	struct drm_gem_object *gobj = ptr;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+	struct seq_file *m = data;
+
+	unsigned domain;
+	const char *placement;
+	unsigned pin_count;
+
+	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
+	switch (domain) {
+	case AMDGPU_GEM_DOMAIN_VRAM:
+		placement = "VRAM";
+		break;
+	case AMDGPU_GEM_DOMAIN_GTT:
+		placement = " GTT";
+		break;
+	case AMDGPU_GEM_DOMAIN_CPU:
+	default:
+		placement = " CPU";
+		break;
+	}
+	seq_printf(m, "\t0x%08x: %12ld byte %s @ 0x%010Lx",
+		   id, amdgpu_bo_size(bo), placement,
+		   amdgpu_bo_gpu_offset(bo));
+
+	pin_count = ACCESS_ONCE(bo->pin_count);
+	if (pin_count)
+		seq_printf(m, " pin count %d", pin_count);
+	seq_printf(m, "\n");
+
+	return 0;
+}
+
 static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
-	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_bo *rbo;
-	unsigned i = 0;
+	struct drm_file *file;
+	int r;
+
+	r = mutex_lock_interruptible(&dev->struct_mutex);
+	if (r)
+		return r;
 
-	mutex_lock(&adev->gem.mutex);
-	list_for_each_entry(rbo, &adev->gem.objects, list) {
-		unsigned domain;
-		const char *placement;
+	list_for_each_entry(file, &dev->filelist, lhead) {
+		struct task_struct *task;
 
-		domain = amdgpu_mem_type_to_domain(rbo->tbo.mem.mem_type);
-		switch (domain) {
-		case AMDGPU_GEM_DOMAIN_VRAM:
-			placement = "VRAM";
-			break;
-		case AMDGPU_GEM_DOMAIN_GTT:
-			placement = " GTT";
-			break;
-		case AMDGPU_GEM_DOMAIN_CPU:
-		default:
-			placement = " CPU";
-			break;
-		}
-		seq_printf(m, "bo[0x%08x] %8ldkB %8ldMB %s pid %8ld\n",
-			   i, amdgpu_bo_size(rbo) >> 10, amdgpu_bo_size(rbo) >> 20,
-			   placement, (unsigned long)rbo->pid);
-		i++;
+		/*
+		 * Although we have a valid reference on file->pid, that does
+		 * not guarantee that the task_struct who called get_pid() is
+		 * still alive (e.g. get_pid(current) => fork() => exit()).
+		 * Therefore, we need to protect this ->comm access using RCU.
+		 */
+		rcu_read_lock();
+		task = pid_task(file->pid, PIDTYPE_PID);
+		seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
+			   task ? task->comm : "<unknown>");
+		rcu_read_unlock();
+
+		spin_lock(&file->table_lock);
+		idr_for_each(&file->object_idr, amdgpu_debugfs_gem_bo_info, m);
+		spin_unlock(&file->table_lock);
 	}
-	mutex_unlock(&adev->gem.mutex);
+
+	mutex_unlock(&dev->struct_mutex);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 9e25edafa721..8443cea6821a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -55,10 +55,9 @@ static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev);
  * suballocator.
  * Returns 0 on success, error on failure.
  */
-int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
+int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib)
 {
-	struct amdgpu_device *adev = ring->adev;
 	int r;
 
 	if (size) {
@@ -75,10 +74,8 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
 			ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
 	}
 
-	amdgpu_sync_create(&ib->sync);
-
-	ib->ring = ring;
 	ib->vm = vm;
+	ib->vm_id = 0;
 
 	return 0;
 }
@@ -88,15 +85,13 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
  *
  * @adev: amdgpu_device pointer
  * @ib: IB object to free
+ * @f: the fence SA bo need wait on for the ib alloation
  *
  * Free an IB (all asics).
  */
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f)
 {
-	amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
-	amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
-	if (ib->fence)
-		fence_put(&ib->fence->base);
+	amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
 }
 
 /**
@@ -105,7 +100,7 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
  * @adev: amdgpu_device pointer
  * @num_ibs: number of IBs to schedule
  * @ibs: IB objects to schedule
- * @owner: owner for creating the fences
+ * @f: fence created during this submission
  *
  * Schedule an IB on the associated ring (all asics).
  * Returns 0 on success, error on failure.
@@ -120,20 +115,21 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
  * a CONST_IB), it will be put on the ring prior to the DE IB.  Prior
  * to SI there was just a DE IB.
  */
-int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
-		       struct amdgpu_ib *ibs, void *owner)
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+		       struct amdgpu_ib *ibs, struct fence *last_vm_update,
+		       struct fence **f)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
-	struct amdgpu_ring *ring;
 	struct amdgpu_ctx *ctx, *old_ctx;
 	struct amdgpu_vm *vm;
+	struct fence *hwf;
 	unsigned i;
 	int r = 0;
 
 	if (num_ibs == 0)
 		return -EINVAL;
 
-	ring = ibs->ring;
 	ctx = ibs->ctx;
 	vm = ibs->vm;
 
@@ -141,42 +137,24 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
 		dev_err(adev->dev, "couldn't schedule ib\n");
 		return -EINVAL;
 	}
-	r = amdgpu_sync_wait(&ibs->sync);
-	if (r) {
-		dev_err(adev->dev, "IB sync failed (%d).\n", r);
-		return r;
-	}
-	r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs);
-	if (r) {
-		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
-		return r;
-	}
 
-	if (vm) {
-		/* grab a vm id if necessary */
-		r = amdgpu_vm_grab_id(ibs->vm, ibs->ring, &ibs->sync);
-		if (r) {
-			amdgpu_ring_unlock_undo(ring);
-			return r;
-		}
+	if (vm && !ibs->vm_id) {
+		dev_err(adev->dev, "VM IB without ID\n");
+		return -EINVAL;
 	}
 
-	r = amdgpu_sync_rings(&ibs->sync, ring);
+	r = amdgpu_ring_alloc(ring, 256 * num_ibs);
 	if (r) {
-		amdgpu_ring_unlock_undo(ring);
-		dev_err(adev->dev, "failed to sync rings (%d)\n", r);
+		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
 		return r;
 	}
 
 	if (vm) {
 		/* do context switch */
-		amdgpu_vm_flush(ring, vm, ib->sync.last_vm_update);
-
-		if (ring->funcs->emit_gds_switch)
-			amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id,
-						    ib->gds_base, ib->gds_size,
-						    ib->gws_base, ib->gws_size,
-						    ib->oa_base, ib->oa_size);
+		amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
+				ib->gds_base, ib->gds_size,
+				ib->gws_base, ib->gws_size,
+				ib->oa_base, ib->oa_size);
 
 		if (ring->funcs->emit_hdp_flush)
 			amdgpu_ring_emit_hdp_flush(ring);
@@ -186,27 +164,32 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
 
-		if (ib->ring != ring || ib->ctx != ctx || ib->vm != vm) {
+		if (ib->ctx != ctx || ib->vm != vm) {
 			ring->current_ctx = old_ctx;
-			amdgpu_ring_unlock_undo(ring);
+			if (ib->vm_id)
+				amdgpu_vm_reset_id(adev, ib->vm_id);
+			amdgpu_ring_undo(ring);
 			return -EINVAL;
 		}
 		amdgpu_ring_emit_ib(ring, ib);
 		ring->current_ctx = ctx;
 	}
 
-	r = amdgpu_fence_emit(ring, owner, &ib->fence);
+	if (vm) {
+		if (ring->funcs->emit_hdp_invalidate)
+			amdgpu_ring_emit_hdp_invalidate(ring);
+	}
+
+	r = amdgpu_fence_emit(ring, &hwf);
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		ring->current_ctx = old_ctx;
-		amdgpu_ring_unlock_undo(ring);
+		if (ib->vm_id)
+			amdgpu_vm_reset_id(adev, ib->vm_id);
+		amdgpu_ring_undo(ring);
 		return r;
 	}
 
-	if (!amdgpu_enable_scheduler && ib->ctx)
-		ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
-						    &ib->fence->base);
-
 	/* wrap the last IB with fence */
 	if (ib->user) {
 		uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
@@ -215,10 +198,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
 				       AMDGPU_FENCE_FLAG_64BIT);
 	}
 
-	if (ib->vm)
-		amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);
+	if (f)
+		*f = fence_get(hwf);
 
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
new file mode 100644
index 000000000000..9c9b19e2f353
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+		     struct amdgpu_job **job)
+{
+	size_t size = sizeof(struct amdgpu_job);
+
+	if (num_ibs == 0)
+		return -EINVAL;
+
+	size += sizeof(struct amdgpu_ib) * num_ibs;
+
+	*job = kzalloc(size, GFP_KERNEL);
+	if (!*job)
+		return -ENOMEM;
+
+	(*job)->adev = adev;
+	(*job)->ibs = (void *)&(*job)[1];
+	(*job)->num_ibs = num_ibs;
+
+	amdgpu_sync_create(&(*job)->sync);
+
+	return 0;
+}
+
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
+			     struct amdgpu_job **job)
+{
+	int r;
+
+	r = amdgpu_job_alloc(adev, 1, job);
+	if (r)
+		return r;
+
+	r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
+	if (r)
+		kfree(*job);
+
+	return r;
+}
+
+void amdgpu_job_free(struct amdgpu_job *job)
+{
+	unsigned i;
+	struct fence *f;
+	/* use sched fence if available */
+	f = (job->base.s_fence)? &job->base.s_fence->base : job->fence;
+
+	for (i = 0; i < job->num_ibs; ++i)
+		amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
+	fence_put(job->fence);
+
+	amdgpu_bo_unref(&job->uf.bo);
+	amdgpu_sync_free(&job->sync);
+	kfree(job);
+}
+
+int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
+		      struct amd_sched_entity *entity, void *owner,
+		      struct fence **f)
+{
+	job->ring = ring;
+	job->base.sched = &ring->sched;
+	job->base.s_entity = entity;
+	job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
+	if (!job->base.s_fence)
+		return -ENOMEM;
+
+	*f = fence_get(&job->base.s_fence->base);
+
+	job->owner = owner;
+	amd_sched_entity_push_job(&job->base);
+
+	return 0;
+}
+
+static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
+{
+	struct amdgpu_job *job = to_amdgpu_job(sched_job);
+	struct amdgpu_vm *vm = job->ibs->vm;
+
+	struct fence *fence = amdgpu_sync_get_fence(&job->sync);
+
+	if (fence == NULL && vm && !job->ibs->vm_id) {
+		struct amdgpu_ring *ring = job->ring;
+		unsigned i, vm_id;
+		uint64_t vm_pd_addr;
+		int r;
+
+		r = amdgpu_vm_grab_id(vm, ring, &job->sync,
+				      &job->base.s_fence->base,
+				      &vm_id, &vm_pd_addr);
+		if (r)
+			DRM_ERROR("Error getting VM ID (%d)\n", r);
+		else {
+			for (i = 0; i < job->num_ibs; ++i) {
+				job->ibs[i].vm_id = vm_id;
+				job->ibs[i].vm_pd_addr = vm_pd_addr;
+			}
+		}
+
+		fence = amdgpu_sync_get_fence(&job->sync);
+	}
+
+	return fence;
+}
+
+static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
+{
+	struct fence *fence = NULL;
+	struct amdgpu_job *job;
+	int r;
+
+	if (!sched_job) {
+		DRM_ERROR("job is null\n");
+		return NULL;
+	}
+	job = to_amdgpu_job(sched_job);
+
+	r = amdgpu_sync_wait(&job->sync);
+	if (r) {
+		DRM_ERROR("failed to sync wait (%d)\n", r);
+		return NULL;
+	}
+
+	trace_amdgpu_sched_run_job(job);
+	r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
+			       job->sync.last_vm_update, &fence);
+	if (r) {
+		DRM_ERROR("Error scheduling IBs (%d)\n", r);
+		goto err;
+	}
+
+err:
+	job->fence = fence;
+	amdgpu_job_free(job);
+	return fence;
+}
+
+struct amd_sched_backend_ops amdgpu_sched_ops = {
+	.dependency = amdgpu_job_dependency,
+	.run_job = amdgpu_job_run,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index e23843f4d877..7805a8706af7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			dev_info.max_memory_clock = adev->pm.default_mclk * 10;
 		}
 		dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
-		dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
-					adev->gfx.config.max_shader_engines;
+		dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
 		dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
 		dev_info._pad = 0;
 		dev_info.ids_flags = 0;
@@ -727,6 +726,12 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
 
 	/* Get associated drm_crtc: */
 	crtc = &adev->mode_info.crtcs[pipe]->base;
+	if (!crtc) {
+		/* This can occur on driver load if some component fails to
+		 * initialize completely and driver is unloaded */
+		DRM_ERROR("Uninitialized crtc %d\n", pipe);
+		return -EINVAL;
+	}
 
 	/* Helper routine in DRM core does all the work: */
 	return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index d4e2780c0796..9f4a45cd2aab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -77,8 +77,6 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 	hash_del(&rmn->node);
 	rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects,
 					     it.rb) {
-
-		interval_tree_remove(&node->it, &rmn->objects);
 		list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
 			bo->mn = NULL;
 			list_del_init(&bo->mn_list);
@@ -87,7 +85,7 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 	}
 	mutex_unlock(&rmn->lock);
 	mutex_unlock(&adev->mn_lock);
-	mmu_notifier_unregister(&rmn->mn, rmn->mm);
+	mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
 	kfree(rmn);
 }
 
@@ -108,6 +106,76 @@ static void amdgpu_mn_release(struct mmu_notifier *mn,
 }
 
 /**
+ * amdgpu_mn_invalidate_node - unmap all BOs of a node
+ *
+ * @node: the node with the BOs to unmap
+ *
+ * We block for all BOs and unmap them by move them
+ * into system domain again.
+ */
+static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
+				      unsigned long start,
+				      unsigned long end)
+{
+	struct amdgpu_bo *bo;
+	long r;
+
+	list_for_each_entry(bo, &node->bos, mn_list) {
+
+		if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
+			continue;
+
+		r = amdgpu_bo_reserve(bo, true);
+		if (r) {
+			DRM_ERROR("(%ld) failed to reserve user bo\n", r);
+			continue;
+		}
+
+		r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
+			true, false, MAX_SCHEDULE_TIMEOUT);
+		if (r <= 0)
+			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+
+		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+		if (r)
+			DRM_ERROR("(%ld) failed to validate user bo\n", r);
+
+		amdgpu_bo_unreserve(bo);
+	}
+}
+
+/**
+ * amdgpu_mn_invalidate_page - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @address: address of invalidate page
+ *
+ * Invalidation of a single page. Blocks for all BOs mapping it
+ * and unmap them by move them into system domain again.
+ */
+static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn,
+				      struct mm_struct *mm,
+				      unsigned long address)
+{
+	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+	struct interval_tree_node *it;
+
+	mutex_lock(&rmn->lock);
+
+	it = interval_tree_iter_first(&rmn->objects, address, address);
+	if (it) {
+		struct amdgpu_mn_node *node;
+
+		node = container_of(it, struct amdgpu_mn_node, it);
+		amdgpu_mn_invalidate_node(node, address, address);
+	}
+
+	mutex_unlock(&rmn->lock);
+}
+
+/**
  * amdgpu_mn_invalidate_range_start - callback to notify about mm change
  *
  * @mn: our notifier
@@ -134,36 +202,11 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
 	it = interval_tree_iter_first(&rmn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
-		struct amdgpu_bo *bo;
-		long r;
 
 		node = container_of(it, struct amdgpu_mn_node, it);
 		it = interval_tree_iter_next(it, start, end);
 
-		list_for_each_entry(bo, &node->bos, mn_list) {
-
-			if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start,
-							  end))
-				continue;
-
-			r = amdgpu_bo_reserve(bo, true);
-			if (r) {
-				DRM_ERROR("(%ld) failed to reserve user bo\n", r);
-				continue;
-			}
-
-			r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
-				true, false, MAX_SCHEDULE_TIMEOUT);
-			if (r <= 0)
-				DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
-			amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
-			r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
-			if (r)
-				DRM_ERROR("(%ld) failed to validate user bo\n", r);
-
-			amdgpu_bo_unreserve(bo);
-		}
+		amdgpu_mn_invalidate_node(node, start, end);
 	}
 
 	mutex_unlock(&rmn->lock);
@@ -171,6 +214,7 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
 
 static const struct mmu_notifier_ops amdgpu_mn_ops = {
 	.release = amdgpu_mn_release,
+	.invalidate_page = amdgpu_mn_invalidate_page,
 	.invalidate_range_start = amdgpu_mn_invalidate_range_start,
 };
 
@@ -187,8 +231,8 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 	struct amdgpu_mn *rmn;
 	int r;
 
-	down_write(&mm->mmap_sem);
 	mutex_lock(&adev->mn_lock);
+	down_write(&mm->mmap_sem);
 
 	hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
 		if (rmn->mm == mm)
@@ -213,14 +257,14 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 	hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
 
 release_locks:
-	mutex_unlock(&adev->mn_lock);
 	up_write(&mm->mmap_sem);
+	mutex_unlock(&adev->mn_lock);
 
 	return rmn;
 
 free_rmn:
-	mutex_unlock(&adev->mn_lock);
 	up_write(&mm->mmap_sem);
+	mutex_unlock(&adev->mn_lock);
 	kfree(rmn);
 
 	return ERR_PTR(r);
@@ -298,6 +342,7 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 	struct list_head *head;
 
 	mutex_lock(&adev->mn_lock);
+
 	rmn = bo->mn;
 	if (rmn == NULL) {
 		mutex_unlock(&adev->mn_lock);
@@ -305,6 +350,7 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 	}
 
 	mutex_lock(&rmn->lock);
+
 	/* save the next list entry for later */
 	head = bo->mn_list.next;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index fdc1be8550da..8d432e6901af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -390,7 +390,6 @@ struct amdgpu_crtc {
 	struct drm_display_mode native_mode;
 	u32 pll_id;
 	/* page flipping */
-	struct workqueue_struct *pflip_queue;
 	struct amdgpu_flip_work *pflip_works;
 	enum amdgpu_flip_status pflip_status;
 	int deferred_flip_completion;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b8fbbd7699e4..151a2d42c639 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -97,9 +97,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 
 	amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL);
 
-	mutex_lock(&bo->adev->gem.mutex);
-	list_del_init(&bo->list);
-	mutex_unlock(&bo->adev->gem.mutex);
 	drm_gem_object_release(&bo->gem_base);
 	amdgpu_bo_unref(&bo->parent);
 	kfree(bo->metadata);
@@ -254,12 +251,15 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	bo->adev = adev;
 	INIT_LIST_HEAD(&bo->list);
 	INIT_LIST_HEAD(&bo->va);
-	bo->initial_domain = domain & (AMDGPU_GEM_DOMAIN_VRAM |
-				       AMDGPU_GEM_DOMAIN_GTT |
-				       AMDGPU_GEM_DOMAIN_CPU |
-				       AMDGPU_GEM_DOMAIN_GDS |
-				       AMDGPU_GEM_DOMAIN_GWS |
-				       AMDGPU_GEM_DOMAIN_OA);
+	bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
+					 AMDGPU_GEM_DOMAIN_GTT |
+					 AMDGPU_GEM_DOMAIN_CPU |
+					 AMDGPU_GEM_DOMAIN_GDS |
+					 AMDGPU_GEM_DOMAIN_GWS |
+					 AMDGPU_GEM_DOMAIN_OA);
+	bo->allowed_domains = bo->prefered_domains;
+	if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 
 	bo->flags = flags;
 
@@ -308,7 +308,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 {
 	bool is_iomem;
-	int r;
+	long r;
 
 	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
 		return -EPERM;
@@ -319,14 +319,20 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 		}
 		return 0;
 	}
+
+	r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false,
+						MAX_SCHEDULE_TIMEOUT);
+	if (r < 0)
+		return r;
+
 	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
-	if (r) {
+	if (r)
 		return r;
-	}
+
 	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
-	if (ptr) {
+	if (ptr)
 		*ptr = bo->kptr;
-	}
+
 	return 0;
 }
 
@@ -367,7 +373,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 	int r, i;
 	unsigned fpfn, lpfn;
 
-	if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
 		return -EPERM;
 
 	if (WARN_ON_ONCE(min_offset > max_offset))
@@ -470,26 +476,6 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
 	return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
 }
 
-void amdgpu_bo_force_delete(struct amdgpu_device *adev)
-{
-	struct amdgpu_bo *bo, *n;
-
-	if (list_empty(&adev->gem.objects)) {
-		return;
-	}
-	dev_err(adev->dev, "Userspace still has active objects !\n");
-	list_for_each_entry_safe(bo, n, &adev->gem.objects, list) {
-		dev_err(adev->dev, "%p %p %lu %lu force free\n",
-			&bo->gem_base, bo, (unsigned long)bo->gem_base.size,
-			*((unsigned long *)&bo->gem_base.refcount));
-		mutex_lock(&bo->adev->gem.mutex);
-		list_del_init(&bo->list);
-		mutex_unlock(&bo->adev->gem.mutex);
-		/* this should unref the ttm bo */
-		drm_gem_object_unreference_unlocked(&bo->gem_base);
-	}
-}
-
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
 	/* Add an MTRR for the VRAM */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 5107fb291bdb..acc08018c6cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -149,7 +149,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 			     u64 *gpu_addr);
 int amdgpu_bo_unpin(struct amdgpu_bo *bo);
 int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
-void amdgpu_bo_force_delete(struct amdgpu_device *adev);
 int amdgpu_bo_init(struct amdgpu_device *adev);
 void amdgpu_bo_fini(struct amdgpu_device *adev);
 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 95a4a25d8df9..ff9597ce268c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -123,7 +123,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
 		level = amdgpu_dpm_get_performance_level(adev);
 		return snprintf(buf, PAGE_SIZE, "%s\n",
 				(level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" :
-				(level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : "high");
+				(level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" :
+				(level == AMD_DPM_FORCED_LEVEL_HIGH) ? "high" :
+				(level == AMD_DPM_FORCED_LEVEL_MANUAL) ? "manual" : "unknown");
 	} else {
 		enum amdgpu_dpm_forced_level level;
 
@@ -155,6 +157,8 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
 		level = AMDGPU_DPM_FORCED_LEVEL_HIGH;
 	} else if (strncmp("auto", buf, strlen("auto")) == 0) {
 		level = AMDGPU_DPM_FORCED_LEVEL_AUTO;
+	} else if (strncmp("manual", buf, strlen("manual")) == 0) {
+		level = AMDGPU_DPM_FORCED_LEVEL_MANUAL;
 	} else {
 		count = -EINVAL;
 		goto fail;
@@ -180,10 +184,293 @@ fail:
 	return count;
 }
 
+static ssize_t amdgpu_get_pp_num_states(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	struct pp_states_info data;
+	int i, buf_len;
+
+	if (adev->pp_enabled)
+		amdgpu_dpm_get_pp_num_states(adev, &data);
+
+	buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums);
+	for (i = 0; i < data.nums; i++)
+		buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i,
+				(data.states[i] == POWER_STATE_TYPE_INTERNAL_BOOT) ? "boot" :
+				(data.states[i] == POWER_STATE_TYPE_BATTERY) ? "battery" :
+				(data.states[i] == POWER_STATE_TYPE_BALANCED) ? "balanced" :
+				(data.states[i] == POWER_STATE_TYPE_PERFORMANCE) ? "performance" : "default");
+
+	return buf_len;
+}
+
+static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	struct pp_states_info data;
+	enum amd_pm_state_type pm = 0;
+	int i = 0;
+
+	if (adev->pp_enabled) {
+
+		pm = amdgpu_dpm_get_current_power_state(adev);
+		amdgpu_dpm_get_pp_num_states(adev, &data);
+
+		for (i = 0; i < data.nums; i++) {
+			if (pm == data.states[i])
+				break;
+		}
+
+		if (i == data.nums)
+			i = -EINVAL;
+	}
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", i);
+}
+
+static ssize_t amdgpu_get_pp_force_state(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	struct pp_states_info data;
+	enum amd_pm_state_type pm = 0;
+	int i;
+
+	if (adev->pp_force_state_enabled && adev->pp_enabled) {
+		pm = amdgpu_dpm_get_current_power_state(adev);
+		amdgpu_dpm_get_pp_num_states(adev, &data);
+
+		for (i = 0; i < data.nums; i++) {
+			if (pm == data.states[i])
+				break;
+		}
+
+		if (i == data.nums)
+			i = -EINVAL;
+
+		return snprintf(buf, PAGE_SIZE, "%d\n", i);
+
+	} else
+		return snprintf(buf, PAGE_SIZE, "\n");
+}
+
+static ssize_t amdgpu_set_pp_force_state(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	enum amd_pm_state_type state = 0;
+	long idx;
+	int ret;
+
+	if (strlen(buf) == 1)
+		adev->pp_force_state_enabled = false;
+	else {
+		ret = kstrtol(buf, 0, &idx);
+
+		if (ret) {
+			count = -EINVAL;
+			goto fail;
+		}
+
+		if (adev->pp_enabled) {
+			struct pp_states_info data;
+			amdgpu_dpm_get_pp_num_states(adev, &data);
+			state = data.states[idx];
+			/* only set user selected power states */
+			if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
+				state != POWER_STATE_TYPE_DEFAULT) {
+				amdgpu_dpm_dispatch_task(adev,
+						AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
+				adev->pp_force_state_enabled = true;
+			}
+		}
+	}
+fail:
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_table(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	char *table = NULL;
+	int size, i;
+
+	if (adev->pp_enabled)
+		size = amdgpu_dpm_get_pp_table(adev, &table);
+	else
+		return 0;
+
+	if (size >= PAGE_SIZE)
+		size = PAGE_SIZE - 1;
+
+	for (i = 0; i < size; i++) {
+		sprintf(buf + i, "%02x", table[i]);
+	}
+	sprintf(buf + i, "\n");
+
+	return size;
+}
+
+static ssize_t amdgpu_set_pp_table(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+
+	if (adev->pp_enabled)
+		amdgpu_dpm_set_pp_table(adev, buf, count);
+
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size = 0;
+
+	if (adev->pp_enabled)
+		size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
+
+	return size;
+}
+
+static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	long level;
+
+	ret = kstrtol(buf, 0, &level);
+
+	if (ret) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	if (adev->pp_enabled)
+		amdgpu_dpm_force_clock_level(adev, PP_SCLK, level);
+fail:
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size = 0;
+
+	if (adev->pp_enabled)
+		size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
+
+	return size;
+}
+
+static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	long level;
+
+	ret = kstrtol(buf, 0, &level);
+
+	if (ret) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	if (adev->pp_enabled)
+		amdgpu_dpm_force_clock_level(adev, PP_MCLK, level);
+fail:
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	ssize_t size = 0;
+
+	if (adev->pp_enabled)
+		size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
+
+	return size;
+}
+
+static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	long level;
+
+	ret = kstrtol(buf, 0, &level);
+
+	if (ret) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	if (adev->pp_enabled)
+		amdgpu_dpm_force_clock_level(adev, PP_PCIE, level);
+fail:
+	return count;
+}
+
 static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
 static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
 		   amdgpu_get_dpm_forced_performance_level,
 		   amdgpu_set_dpm_forced_performance_level);
+static DEVICE_ATTR(pp_num_states, S_IRUGO, amdgpu_get_pp_num_states, NULL);
+static DEVICE_ATTR(pp_cur_state, S_IRUGO, amdgpu_get_pp_cur_state, NULL);
+static DEVICE_ATTR(pp_force_state, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_force_state,
+		amdgpu_set_pp_force_state);
+static DEVICE_ATTR(pp_table, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_table,
+		amdgpu_set_pp_table);
+static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_dpm_sclk,
+		amdgpu_set_pp_dpm_sclk);
+static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_dpm_mclk,
+		amdgpu_set_pp_dpm_mclk);
+static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_dpm_pcie,
+		amdgpu_set_pp_dpm_pcie);
 
 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      struct device_attribute *attr,
@@ -637,14 +924,12 @@ force:
 		amdgpu_dpm_print_power_state(adev, adev->pm.dpm.requested_ps);
 	}
 
-	mutex_lock(&adev->ring_lock);
-
 	/* update whether vce is active */
 	ps->vce_active = adev->pm.dpm.vce_active;
 
 	ret = amdgpu_dpm_pre_set_power_state(adev);
 	if (ret)
-		goto done;
+		return;
 
 	/* update display watermarks based on new power state */
 	amdgpu_display_bandwidth_update(adev);
@@ -682,9 +967,6 @@ force:
 			amdgpu_dpm_force_performance_level(adev, adev->pm.dpm.forced_level);
 		}
 	}
-
-done:
-	mutex_unlock(&adev->ring_lock);
 }
 
 void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
@@ -785,6 +1067,44 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 		DRM_ERROR("failed to create device file for dpm state\n");
 		return ret;
 	}
+
+	if (adev->pp_enabled) {
+		ret = device_create_file(adev->dev, &dev_attr_pp_num_states);
+		if (ret) {
+			DRM_ERROR("failed to create device file pp_num_states\n");
+			return ret;
+		}
+		ret = device_create_file(adev->dev, &dev_attr_pp_cur_state);
+		if (ret) {
+			DRM_ERROR("failed to create device file pp_cur_state\n");
+			return ret;
+		}
+		ret = device_create_file(adev->dev, &dev_attr_pp_force_state);
+		if (ret) {
+			DRM_ERROR("failed to create device file pp_force_state\n");
+			return ret;
+		}
+		ret = device_create_file(adev->dev, &dev_attr_pp_table);
+		if (ret) {
+			DRM_ERROR("failed to create device file pp_table\n");
+			return ret;
+		}
+		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
+		if (ret) {
+			DRM_ERROR("failed to create device file pp_dpm_sclk\n");
+			return ret;
+		}
+		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
+		if (ret) {
+			DRM_ERROR("failed to create device file pp_dpm_mclk\n");
+			return ret;
+		}
+		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
+		if (ret) {
+			DRM_ERROR("failed to create device file pp_dpm_pcie\n");
+			return ret;
+		}
+	}
 	ret = amdgpu_debugfs_pm_init(adev);
 	if (ret) {
 		DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -802,6 +1122,15 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 		hwmon_device_unregister(adev->pm.int_hwmon_dev);
 	device_remove_file(adev->dev, &dev_attr_power_dpm_state);
 	device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
+	if (adev->pp_enabled) {
+		device_remove_file(adev->dev, &dev_attr_pp_num_states);
+		device_remove_file(adev->dev, &dev_attr_pp_cur_state);
+		device_remove_file(adev->dev, &dev_attr_pp_force_state);
+		device_remove_file(adev->dev, &dev_attr_pp_table);
+		device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
+		device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
+		device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
+	}
 }
 
 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
@@ -817,13 +1146,11 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 		int i = 0;
 
 		amdgpu_display_bandwidth_update(adev);
-		mutex_lock(&adev->ring_lock);
-			for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
-				struct amdgpu_ring *ring = adev->rings[i];
-				if (ring && ring->ready)
-					amdgpu_fence_wait_empty(ring);
-			}
-		mutex_unlock(&adev->ring_lock);
+		for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+			struct amdgpu_ring *ring = adev->rings[i];
+			if (ring && ring->ready)
+				amdgpu_fence_wait_empty(ring);
+		}
 
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL);
 	} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 59f735a933a9..be6388f73ba2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -73,10 +73,6 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 	if (ret)
 		return ERR_PTR(ret);
 
-	mutex_lock(&adev->gem.mutex);
-	list_add_tail(&bo->list, &adev->gem.objects);
-	mutex_unlock(&adev->gem.mutex);
-
 	return &bo->gem_base;
 }
 
@@ -121,7 +117,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
 
-	if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
 		return ERR_PTR(-EPERM);
 
 	return drm_gem_prime_export(dev, gobj, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index d1f234dd2126..972eed2ef787 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -49,28 +49,6 @@
 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 
 /**
- * amdgpu_ring_free_size - update the free size
- *
- * @adev: amdgpu_device pointer
- * @ring: amdgpu_ring structure holding ring information
- *
- * Update the free dw slots in the ring buffer (all asics).
- */
-void amdgpu_ring_free_size(struct amdgpu_ring *ring)
-{
-	uint32_t rptr = amdgpu_ring_get_rptr(ring);
-
-	/* This works because ring_size is a power of 2 */
-	ring->ring_free_dw = rptr + (ring->ring_size / 4);
-	ring->ring_free_dw -= ring->wptr;
-	ring->ring_free_dw &= ring->ptr_mask;
-	if (!ring->ring_free_dw) {
-		/* this is an empty ring */
-		ring->ring_free_dw = ring->ring_size / 4;
-	}
-}
-
-/**
  * amdgpu_ring_alloc - allocate space on the ring buffer
  *
  * @adev: amdgpu_device pointer
@@ -82,50 +60,18 @@ void amdgpu_ring_free_size(struct amdgpu_ring *ring)
  */
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
 {
-	int r;
-
-	/* make sure we aren't trying to allocate more space than there is on the ring */
-	if (ndw > (ring->ring_size / 4))
-		return -ENOMEM;
 	/* Align requested size with padding so unlock_commit can
 	 * pad safely */
-	amdgpu_ring_free_size(ring);
 	ndw = (ndw + ring->align_mask) & ~ring->align_mask;
-	while (ndw > (ring->ring_free_dw - 1)) {
-		amdgpu_ring_free_size(ring);
-		if (ndw < ring->ring_free_dw) {
-			break;
-		}
-		r = amdgpu_fence_wait_next(ring);
-		if (r)
-			return r;
-	}
-	ring->count_dw = ndw;
-	ring->wptr_old = ring->wptr;
-	return 0;
-}
 
-/**
- * amdgpu_ring_lock - lock the ring and allocate space on it
- *
- * @adev: amdgpu_device pointer
- * @ring: amdgpu_ring structure holding ring information
- * @ndw: number of dwords to allocate in the ring buffer
- *
- * Lock the ring and allocate @ndw dwords in the ring buffer
- * (all asics).
- * Returns 0 on success, error on failure.
- */
-int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
-{
-	int r;
+	/* Make sure we aren't trying to allocate more space
+	 * than the maximum for one submission
+	 */
+	if (WARN_ON_ONCE(ndw > ring->max_dw))
+		return -ENOMEM;
 
-	mutex_lock(ring->ring_lock);
-	r = amdgpu_ring_alloc(ring, ndw);
-	if (r) {
-		mutex_unlock(ring->ring_lock);
-		return r;
-	}
+	ring->count_dw = ndw;
+	ring->wptr_old = ring->wptr;
 	return 0;
 }
 
@@ -144,6 +90,19 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 		amdgpu_ring_write(ring, ring->nop);
 }
 
+/** amdgpu_ring_generic_pad_ib - pad IB with NOP packets
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: IB to add NOP packets to
+ *
+ * This is the generic pad_ib function for rings except SDMA
+ */
+void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+	while (ib->length_dw & ring->align_mask)
+		ib->ptr[ib->length_dw++] = ring->nop;
+}
+
 /**
  * amdgpu_ring_commit - tell the GPU to execute the new
  * commands on the ring buffer
@@ -168,20 +127,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 }
 
 /**
- * amdgpu_ring_unlock_commit - tell the GPU to execute the new
- * commands on the ring buffer and unlock it
- *
- * @ring: amdgpu_ring structure holding ring information
- *
- * Call amdgpu_ring_commit() then unlock the ring (all asics).
- */
-void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring)
-{
-	amdgpu_ring_commit(ring);
-	mutex_unlock(ring->ring_lock);
-}
-
-/**
  * amdgpu_ring_undo - reset the wptr
  *
  * @ring: amdgpu_ring structure holding ring information
@@ -194,19 +139,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
 }
 
 /**
- * amdgpu_ring_unlock_undo - reset the wptr and unlock the ring
- *
- * @ring: amdgpu_ring structure holding ring information
- *
- * Call amdgpu_ring_undo() then unlock the ring (all asics).
- */
-void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring)
-{
-	amdgpu_ring_undo(ring);
-	mutex_unlock(ring->ring_lock);
-}
-
-/**
  * amdgpu_ring_backup - Back up the content of a ring
  *
  * @ring: the ring we want to back up
@@ -218,43 +150,32 @@ unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
 {
 	unsigned size, ptr, i;
 
-	/* just in case lock the ring */
-	mutex_lock(ring->ring_lock);
 	*data = NULL;
 
-	if (ring->ring_obj == NULL) {
-		mutex_unlock(ring->ring_lock);
+	if (ring->ring_obj == NULL)
 		return 0;
-	}
 
 	/* it doesn't make sense to save anything if all fences are signaled */
-	if (!amdgpu_fence_count_emitted(ring)) {
-		mutex_unlock(ring->ring_lock);
+	if (!amdgpu_fence_count_emitted(ring))
 		return 0;
-	}
 
 	ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
 
 	size = ring->wptr + (ring->ring_size / 4);
 	size -= ptr;
 	size &= ring->ptr_mask;
-	if (size == 0) {
-		mutex_unlock(ring->ring_lock);
+	if (size == 0)
 		return 0;
-	}
 
 	/* and then save the content of the ring */
 	*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
-	if (!*data) {
-		mutex_unlock(ring->ring_lock);
+	if (!*data)
 		return 0;
-	}
 	for (i = 0; i < size; ++i) {
 		(*data)[i] = ring->ring[ptr++];
 		ptr &= ring->ptr_mask;
 	}
 
-	mutex_unlock(ring->ring_lock);
 	return size;
 }
 
@@ -276,7 +197,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
 		return 0;
 
 	/* restore the saved ring content */
-	r = amdgpu_ring_lock(ring, size);
+	r = amdgpu_ring_alloc(ring, size);
 	if (r)
 		return r;
 
@@ -284,7 +205,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
 		amdgpu_ring_write(ring, data[i]);
 	}
 
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 	kfree(data);
 	return 0;
 }
@@ -315,7 +236,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		ring->adev = adev;
 		ring->idx = adev->num_rings++;
 		adev->rings[ring->idx] = ring;
-		r = amdgpu_fence_driver_init_ring(ring);
+		r = amdgpu_fence_driver_init_ring(ring,
+			amdgpu_sched_hw_submission);
 		if (r)
 			return r;
 	}
@@ -352,7 +274,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		return r;
 	}
 
-	ring->ring_lock = &adev->ring_lock;
 	/* Align ring size */
 	rb_bufsz = order_base_2(ring_size / 8);
 	ring_size = (1 << (rb_bufsz + 1)) * 4;
@@ -389,7 +310,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		}
 	}
 	ring->ptr_mask = (ring->ring_size / 4) - 1;
-	ring->ring_free_dw = ring->ring_size / 4;
+	ring->max_dw = DIV_ROUND_UP(ring->ring_size / 4,
+				    amdgpu_sched_hw_submission);
 
 	if (amdgpu_debugfs_ring_init(adev, ring)) {
 		DRM_ERROR("Failed to register debugfs file for rings !\n");
@@ -410,15 +332,10 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	int r;
 	struct amdgpu_bo *ring_obj;
 
-	if (ring->ring_lock == NULL)
-		return;
-
-	mutex_lock(ring->ring_lock);
 	ring_obj = ring->ring_obj;
 	ring->ready = false;
 	ring->ring = NULL;
 	ring->ring_obj = NULL;
-	mutex_unlock(ring->ring_lock);
 
 	amdgpu_wb_free(ring->adev, ring->fence_offs);
 	amdgpu_wb_free(ring->adev, ring->rptr_offs);
@@ -436,30 +353,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	}
 }
 
-/**
- * amdgpu_ring_from_fence - get ring from fence
- *
- * @f: fence structure
- *
- * Extract the ring a fence belongs to. Handles both scheduler as
- * well as hardware fences.
- */
-struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f)
-{
-	struct amdgpu_fence *a_fence;
-	struct amd_sched_fence *s_fence;
-
-	s_fence = to_amd_sched_fence(f);
-	if (s_fence)
-		return container_of(s_fence->sched, struct amdgpu_ring, sched);
-
-	a_fence = to_amdgpu_fence(f);
-	if (a_fence)
-		return a_fence->ring;
-
-	return NULL;
-}
-
 /*
  * Debugfs info
  */
@@ -474,29 +367,18 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
 	struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
 
 	uint32_t rptr, wptr, rptr_next;
-	unsigned count, i, j;
-
-	amdgpu_ring_free_size(ring);
-	count = (ring->ring_size / 4) - ring->ring_free_dw;
+	unsigned i;
 
 	wptr = amdgpu_ring_get_wptr(ring);
-	seq_printf(m, "wptr: 0x%08x [%5d]\n",
-		   wptr, wptr);
+	seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);
 
 	rptr = amdgpu_ring_get_rptr(ring);
-	seq_printf(m, "rptr: 0x%08x [%5d]\n",
-		   rptr, rptr);
-
 	rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
 
+	seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr);
+
 	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
 		   ring->wptr, ring->wptr);
-	seq_printf(m, "last semaphore signal addr : 0x%016llx\n",
-		   ring->last_semaphore_signal_addr);
-	seq_printf(m, "last semaphore wait addr   : 0x%016llx\n",
-		   ring->last_semaphore_wait_addr);
-	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
-	seq_printf(m, "%u dwords in ring\n", count);
 
 	if (!ring->ready)
 		return 0;
@@ -505,11 +387,20 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
 	 * packet that is the root issue
 	 */
 	i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
-	for (j = 0; j <= (count + 32); j++) {
+	while (i != rptr) {
+		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
+		if (i == rptr)
+			seq_puts(m, " *");
+		if (i == rptr_next)
+			seq_puts(m, " #");
+		seq_puts(m, "\n");
+		i = (i + 1) & ring->ptr_mask;
+	}
+	while (i != wptr) {
 		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
-		if (rptr == i)
+		if (i == rptr)
 			seq_puts(m, " *");
-		if (rptr_next == i)
+		if (i == rptr_next)
 			seq_puts(m, " #");
 		seq_puts(m, "\n");
 		i = (i + 1) & ring->ptr_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index ca72a2e487b9..8bf84efafb04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -60,9 +60,8 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
 	sa_manager->align = align;
 	sa_manager->hole = &sa_manager->olist;
 	INIT_LIST_HEAD(&sa_manager->olist);
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+	for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
 		INIT_LIST_HEAD(&sa_manager->flist[i]);
-	}
 
 	r = amdgpu_bo_create(adev, size, align, true, domain,
 			     0, NULL, NULL, &sa_manager->bo);
@@ -228,11 +227,9 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
 	unsigned soffset, eoffset, wasted;
 	int i;
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		if (!list_empty(&sa_manager->flist[i])) {
+	for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
+		if (!list_empty(&sa_manager->flist[i]))
 			return true;
-		}
-	}
 
 	soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
 	eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
@@ -265,12 +262,11 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
 	/* go over all fence list and try to find the closest sa_bo
 	 * of the current last
 	 */
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+	for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
 		struct amdgpu_sa_bo *sa_bo;
 
-		if (list_empty(&sa_manager->flist[i])) {
+		if (list_empty(&sa_manager->flist[i]))
 			continue;
-		}
 
 		sa_bo = list_first_entry(&sa_manager->flist[i],
 					 struct amdgpu_sa_bo, flist);
@@ -299,7 +295,9 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
 	}
 
 	if (best_bo) {
-		uint32_t idx = amdgpu_ring_from_fence(best_bo->fence)->idx;
+		uint32_t idx = best_bo->fence->context;
+
+		idx %= AMDGPU_SA_NUM_FENCE_LISTS;
 		++tries[idx];
 		sa_manager->hole = best_bo->olist.prev;
 
@@ -315,14 +313,17 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 		     struct amdgpu_sa_bo **sa_bo,
 		     unsigned size, unsigned align)
 {
-	struct fence *fences[AMDGPU_MAX_RINGS];
-	unsigned tries[AMDGPU_MAX_RINGS];
+	struct fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
+	unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
 	unsigned count;
 	int i, r;
 	signed long t;
 
-	BUG_ON(align > sa_manager->align);
-	BUG_ON(size > sa_manager->size);
+	if (WARN_ON_ONCE(align > sa_manager->align))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(size > sa_manager->size))
+		return -EINVAL;
 
 	*sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
 	if ((*sa_bo) == NULL) {
@@ -335,7 +336,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 
 	spin_lock(&sa_manager->wq.lock);
 	do {
-		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+		for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
 			fences[i] = NULL;
 			tries[i] = 0;
 		}
@@ -352,7 +353,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 			/* see if we can skip over some allocations */
 		} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
 
-		for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i)
+		for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
 			if (fences[i])
 				fences[count++] = fence_get(fences[i]);
 
@@ -394,8 +395,9 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
 	spin_lock(&sa_manager->wq.lock);
 	if (fence && !fence_is_signaled(fence)) {
 		uint32_t idx;
+
 		(*sa_bo)->fence = fence_get(fence);
-		idx = amdgpu_ring_from_fence(fence)->idx;
+		idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
 		list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
 	} else {
 		amdgpu_sa_bo_remove_locked(*sa_bo);
@@ -407,25 +409,6 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
 
 #if defined(CONFIG_DEBUG_FS)
 
-static void amdgpu_sa_bo_dump_fence(struct fence *fence, struct seq_file *m)
-{
-	struct amdgpu_fence *a_fence = to_amdgpu_fence(fence);
-	struct amd_sched_fence *s_fence = to_amd_sched_fence(fence);
-
-	if (a_fence)
-		seq_printf(m, " protected by 0x%016llx on ring %d",
-			   a_fence->seq, a_fence->ring->idx);
-
-	if (s_fence) {
-		struct amdgpu_ring *ring;
-
-
-		ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
-		seq_printf(m, " protected by 0x%016x on ring %d",
-			   s_fence->base.seqno, ring->idx);
-	}
-}
-
 void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
 				  struct seq_file *m)
 {
@@ -442,8 +425,11 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
 		}
 		seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
 			   soffset, eoffset, eoffset - soffset);
+
 		if (i->fence)
-			amdgpu_sa_bo_dump_fence(i->fence, m);
+			seq_printf(m, " protected by 0x%08x on context %d",
+				   i->fence->seqno, i->fence->context);
+
 		seq_printf(m, "\n");
 	}
 	spin_unlock(&sa_manager->wq.lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
deleted file mode 100644
index 438c05254695..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- *
- */
-#include <linux/kthread.h>
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <drm/drmP.h>
-#include "amdgpu.h"
-#include "amdgpu_trace.h"
-
-static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
-{
-	struct amdgpu_job *job = to_amdgpu_job(sched_job);
-	return amdgpu_sync_get_fence(&job->ibs->sync);
-}
-
-static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
-{
-	struct amdgpu_fence *fence = NULL;
-	struct amdgpu_job *job;
-	int r;
-
-	if (!sched_job) {
-		DRM_ERROR("job is null\n");
-		return NULL;
-	}
-	job = to_amdgpu_job(sched_job);
-	trace_amdgpu_sched_run_job(job);
-	r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
-	if (r) {
-		DRM_ERROR("Error scheduling IBs (%d)\n", r);
-		goto err;
-	}
-
-	fence = job->ibs[job->num_ibs - 1].fence;
-	fence_get(&fence->base);
-
-err:
-	if (job->free_job)
-		job->free_job(job);
-
-	kfree(job);
-	return fence ? &fence->base : NULL;
-}
-
-struct amd_sched_backend_ops amdgpu_sched_ops = {
-	.dependency = amdgpu_sched_dependency,
-	.run_job = amdgpu_sched_run_job,
-};
-
-int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
-					 struct amdgpu_ring *ring,
-					 struct amdgpu_ib *ibs,
-					 unsigned num_ibs,
-					 int (*free_job)(struct amdgpu_job *),
-					 void *owner,
-					 struct fence **f)
-{
-	int r = 0;
-	if (amdgpu_enable_scheduler) {
-		struct amdgpu_job *job =
-			kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
-		if (!job)
-			return -ENOMEM;
-		job->base.sched = &ring->sched;
-		job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
-		job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
-		if (!job->base.s_fence) {
-			kfree(job);
-			return -ENOMEM;
-		}
-		*f = fence_get(&job->base.s_fence->base);
-
-		job->adev = adev;
-		job->ibs = ibs;
-		job->num_ibs = num_ibs;
-		job->owner = owner;
-		job->free_job = free_job;
-		amd_sched_entity_push_job(&job->base);
-	} else {
-		r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
-		if (r)
-			return r;
-		*f = fence_get(&ibs[num_ibs - 1].fence->base);
-	}
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
deleted file mode 100644
index 1caaf201b708..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright 2011 Christian König.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-/*
- * Authors:
- *    Christian König <deathsimple@vodafone.de>
- */
-#include <drm/drmP.h>
-#include "amdgpu.h"
-#include "amdgpu_trace.h"
-
-int amdgpu_semaphore_create(struct amdgpu_device *adev,
-			    struct amdgpu_semaphore **semaphore)
-{
-	int r;
-
-	*semaphore = kmalloc(sizeof(struct amdgpu_semaphore), GFP_KERNEL);
-	if (*semaphore == NULL) {
-		return -ENOMEM;
-	}
-	r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
-			     &(*semaphore)->sa_bo, 8, 8);
-	if (r) {
-		kfree(*semaphore);
-		*semaphore = NULL;
-		return r;
-	}
-	(*semaphore)->waiters = 0;
-	(*semaphore)->gpu_addr = amdgpu_sa_bo_gpu_addr((*semaphore)->sa_bo);
-
-	*((uint64_t *)amdgpu_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
-
-	return 0;
-}
-
-bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring,
-				  struct amdgpu_semaphore *semaphore)
-{
-	trace_amdgpu_semaphore_signale(ring->idx, semaphore);
-
-	if (amdgpu_ring_emit_semaphore(ring, semaphore, false)) {
-		--semaphore->waiters;
-
-		/* for debugging lockup only, used by sysfs debug files */
-		ring->last_semaphore_signal_addr = semaphore->gpu_addr;
-		return true;
-	}
-	return false;
-}
-
-bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
-				struct amdgpu_semaphore *semaphore)
-{
-	trace_amdgpu_semaphore_wait(ring->idx, semaphore);
-
-	if (amdgpu_ring_emit_semaphore(ring, semaphore, true)) {
-		++semaphore->waiters;
-
-		/* for debugging lockup only, used by sysfs debug files */
-		ring->last_semaphore_wait_addr = semaphore->gpu_addr;
-		return true;
-	}
-	return false;
-}
-
-void amdgpu_semaphore_free(struct amdgpu_device *adev,
-			   struct amdgpu_semaphore **semaphore,
-			   struct fence *fence)
-{
-	if (semaphore == NULL || *semaphore == NULL) {
-		return;
-	}
-	if ((*semaphore)->waiters > 0) {
-		dev_err(adev->dev, "semaphore %p has more waiters than signalers,"
-			" hardware lockup imminent!\n", *semaphore);
-	}
-	amdgpu_sa_bo_free(adev, &(*semaphore)->sa_bo, fence);
-	kfree(*semaphore);
-	*semaphore = NULL;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 181ce39ef5e5..c48b4fce5e57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -37,6 +37,8 @@ struct amdgpu_sync_entry {
 	struct fence		*fence;
 };
 
+static struct kmem_cache *amdgpu_sync_slab;
+
 /**
  * amdgpu_sync_create - zero init sync object
  *
@@ -46,26 +48,22 @@ struct amdgpu_sync_entry {
  */
 void amdgpu_sync_create(struct amdgpu_sync *sync)
 {
-	unsigned i;
-
-	for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
-		sync->semaphores[i] = NULL;
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
-		sync->sync_to[i] = NULL;
-
 	hash_init(sync->fences);
 	sync->last_vm_update = NULL;
 }
 
+/**
+ * amdgpu_sync_same_dev - test if fence belong to us
+ *
+ * @adev: amdgpu device to use for the test
+ * @f: fence to test
+ *
+ * Test if the fence was issued by us.
+ */
 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
 {
-	struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
 	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
 
-	if (a_fence)
-		return a_fence->ring->adev == adev;
-
 	if (s_fence) {
 		struct amdgpu_ring *ring;
 
@@ -76,17 +74,31 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
 	return false;
 }
 
-static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
+/**
+ * amdgpu_sync_get_owner - extract the owner of a fence
+ *
+ * @fence: fence get the owner from
+ *
+ * Extract who originally created the fence.
+ */
+static void *amdgpu_sync_get_owner(struct fence *f)
 {
-	struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
 	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
 	if (s_fence)
-		return s_fence->owner == owner;
-	if (a_fence)
-		return a_fence->owner == owner;
-	return false;
+		return s_fence->owner;
+
+	return AMDGPU_FENCE_OWNER_UNDEFINED;
 }
 
+/**
+ * amdgpu_sync_keep_later - Keep the later fence
+ *
+ * @keep: existing fence to test
+ * @fence: new fence
+ *
+ * Either keep the existing fence or the new one, depending which one is later.
+ */
 static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
 {
 	if (*keep && fence_is_later(*keep, fence))
@@ -107,59 +119,39 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 		      struct fence *f)
 {
 	struct amdgpu_sync_entry *e;
-	struct amdgpu_fence *fence;
 
 	if (!f)
 		return 0;
 
 	if (amdgpu_sync_same_dev(adev, f) &&
-	    amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
+	    amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
 		amdgpu_sync_keep_later(&sync->last_vm_update, f);
 
-	fence = to_amdgpu_fence(f);
-	if (!fence || fence->ring->adev != adev) {
-		hash_for_each_possible(sync->fences, e, node, f->context) {
-			if (unlikely(e->fence->context != f->context))
-				continue;
-
-			amdgpu_sync_keep_later(&e->fence, f);
-			return 0;
-		}
-
-		e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
-		if (!e)
-			return -ENOMEM;
+	hash_for_each_possible(sync->fences, e, node, f->context) {
+		if (unlikely(e->fence->context != f->context))
+			continue;
 
-		hash_add(sync->fences, &e->node, f->context);
-		e->fence = fence_get(f);
+		amdgpu_sync_keep_later(&e->fence, f);
 		return 0;
 	}
 
-	amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f);
+	e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
 
+	hash_add(sync->fences, &e->node, f->context);
+	e->fence = fence_get(f);
 	return 0;
 }
 
-static void *amdgpu_sync_get_owner(struct fence *f)
-{
-	struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
-	struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
-
-	if (s_fence)
-		return s_fence->owner;
-	else if (a_fence)
-		return a_fence->owner;
-	return AMDGPU_FENCE_OWNER_UNDEFINED;
-}
-
 /**
- * amdgpu_sync_resv - use the semaphores to sync to a reservation object
+ * amdgpu_sync_resv - sync to a reservation object
  *
  * @sync: sync object to add fences from reservation object to
  * @resv: reservation object with embedded fence
  * @shared: true if we should only sync to the exclusive fence
  *
- * Sync to the fence using the semaphore objects
+ * Sync to the fence
  */
 int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
@@ -224,7 +216,7 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 		f = e->fence;
 
 		hash_del(&e->node);
-		kfree(e);
+		kmem_cache_free(amdgpu_sync_slab, e);
 
 		if (!fence_is_signaled(f))
 			return f;
@@ -247,109 +239,7 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
 
 		hash_del(&e->node);
 		fence_put(e->fence);
-		kfree(e);
-	}
-
-	if (amdgpu_enable_semaphores)
-		return 0;
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct fence *fence = sync->sync_to[i];
-		if (!fence)
-			continue;
-
-		r = fence_wait(fence, false);
-		if (r)
-			return r;
-	}
-
-	return 0;
-}
-
-/**
- * amdgpu_sync_rings - sync ring to all registered fences
- *
- * @sync: sync object to use
- * @ring: ring that needs sync
- *
- * Ensure that all registered fences are signaled before letting
- * the ring continue. The caller must hold the ring lock.
- */
-int amdgpu_sync_rings(struct amdgpu_sync *sync,
-		      struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-	unsigned count = 0;
-	int i, r;
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_ring *other = adev->rings[i];
-		struct amdgpu_semaphore *semaphore;
-		struct amdgpu_fence *fence;
-
-		if (!sync->sync_to[i])
-			continue;
-
-		fence = to_amdgpu_fence(sync->sync_to[i]);
-
-		/* check if we really need to sync */
-		if (!amdgpu_enable_scheduler &&
-		    !amdgpu_fence_need_sync(fence, ring))
-			continue;
-
-		/* prevent GPU deadlocks */
-		if (!other->ready) {
-			dev_err(adev->dev, "Syncing to a disabled ring!");
-			return -EINVAL;
-		}
-
-		if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
-			r = fence_wait(sync->sync_to[i], true);
-			if (r)
-				return r;
-			continue;
-		}
-
-		if (count >= AMDGPU_NUM_SYNCS) {
-			/* not enough room, wait manually */
-			r = fence_wait(&fence->base, false);
-			if (r)
-				return r;
-			continue;
-		}
-		r = amdgpu_semaphore_create(adev, &semaphore);
-		if (r)
-			return r;
-
-		sync->semaphores[count++] = semaphore;
-
-		/* allocate enough space for sync command */
-		r = amdgpu_ring_alloc(other, 16);
-		if (r)
-			return r;
-
-		/* emit the signal semaphore */
-		if (!amdgpu_semaphore_emit_signal(other, semaphore)) {
-			/* signaling wasn't successful wait manually */
-			amdgpu_ring_undo(other);
-			r = fence_wait(&fence->base, false);
-			if (r)
-				return r;
-			continue;
-		}
-
-		/* we assume caller has already allocated space on waiters ring */
-		if (!amdgpu_semaphore_emit_wait(ring, semaphore)) {
-			/* waiting wasn't successful wait manually */
-			amdgpu_ring_undo(other);
-			r = fence_wait(&fence->base, false);
-			if (r)
-				return r;
-			continue;
-		}
-
-		amdgpu_ring_commit(other);
-		amdgpu_fence_note_sync(fence, ring);
+		kmem_cache_free(amdgpu_sync_slab, e);
 	}
 
 	return 0;
@@ -358,15 +248,11 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
 /**
  * amdgpu_sync_free - free the sync object
  *
- * @adev: amdgpu_device pointer
  * @sync: sync object to use
- * @fence: fence to use for the free
  *
- * Free the sync object by freeing all semaphores in it.
+ * Free the sync object.
  */
-void amdgpu_sync_free(struct amdgpu_device *adev,
-		      struct amdgpu_sync *sync,
-		      struct fence *fence)
+void amdgpu_sync_free(struct amdgpu_sync *sync)
 {
 	struct amdgpu_sync_entry *e;
 	struct hlist_node *tmp;
@@ -375,14 +261,34 @@ void amdgpu_sync_free(struct amdgpu_device *adev,
 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
 		hash_del(&e->node);
 		fence_put(e->fence);
-		kfree(e);
+		kmem_cache_free(amdgpu_sync_slab, e);
 	}
 
-	for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
-		amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
+	fence_put(sync->last_vm_update);
+}
+
+/**
+ * amdgpu_sync_init - init sync object subsystem
+ *
+ * Allocate the slab allocator.
+ */
+int amdgpu_sync_init(void)
+{
+	amdgpu_sync_slab = kmem_cache_create(
+		"amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
+		SLAB_HWCACHE_ALIGN, NULL);
+	if (!amdgpu_sync_slab)
+		return -ENOMEM;
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
-		fence_put(sync->sync_to[i]);
+	return 0;
+}
 
-	fence_put(sync->last_vm_update);
+/**
+ * amdgpu_sync_fini - fini sync object subsystem
+ *
+ * Free the slab allocator.
+ */
+void amdgpu_sync_fini(void)
+{
+	kmem_cache_destroy(amdgpu_sync_slab);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 4865615e9c06..05a53f4fc334 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -238,144 +238,10 @@ void amdgpu_test_moves(struct amdgpu_device *adev)
 		amdgpu_do_test_moves(adev);
 }
 
-static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev,
-					     struct amdgpu_ring *ring,
-					     struct fence **fence)
-{
-	uint32_t handle = ring->idx ^ 0xdeafbeef;
-	int r;
-
-	if (ring == &adev->uvd.ring) {
-		r = amdgpu_uvd_get_create_msg(ring, handle, NULL);
-		if (r) {
-			DRM_ERROR("Failed to get dummy create msg\n");
-			return r;
-		}
-
-		r = amdgpu_uvd_get_destroy_msg(ring, handle, fence);
-		if (r) {
-			DRM_ERROR("Failed to get dummy destroy msg\n");
-			return r;
-		}
-
-	} else if (ring == &adev->vce.ring[0] ||
-		   ring == &adev->vce.ring[1]) {
-		r = amdgpu_vce_get_create_msg(ring, handle, NULL);
-		if (r) {
-			DRM_ERROR("Failed to get dummy create msg\n");
-			return r;
-		}
-
-		r = amdgpu_vce_get_destroy_msg(ring, handle, fence);
-		if (r) {
-			DRM_ERROR("Failed to get dummy destroy msg\n");
-			return r;
-		}
-	} else {
-		struct amdgpu_fence *a_fence = NULL;
-		r = amdgpu_ring_lock(ring, 64);
-		if (r) {
-			DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
-			return r;
-		}
-		amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence);
-		amdgpu_ring_unlock_commit(ring);
-		*fence = &a_fence->base;
-	}
-	return 0;
-}
-
 void amdgpu_test_ring_sync(struct amdgpu_device *adev,
 			   struct amdgpu_ring *ringA,
 			   struct amdgpu_ring *ringB)
 {
-	struct fence *fence1 = NULL, *fence2 = NULL;
-	struct amdgpu_semaphore *semaphore = NULL;
-	int r;
-
-	r = amdgpu_semaphore_create(adev, &semaphore);
-	if (r) {
-		DRM_ERROR("Failed to create semaphore\n");
-		goto out_cleanup;
-	}
-
-	r = amdgpu_ring_lock(ringA, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_wait(ringA, semaphore);
-	amdgpu_ring_unlock_commit(ringA);
-
-	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1);
-	if (r)
-		goto out_cleanup;
-
-	r = amdgpu_ring_lock(ringA, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_wait(ringA, semaphore);
-	amdgpu_ring_unlock_commit(ringA);
-
-	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2);
-	if (r)
-		goto out_cleanup;
-
-	mdelay(1000);
-
-	if (fence_is_signaled(fence1)) {
-		DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
-		goto out_cleanup;
-	}
-
-	r = amdgpu_ring_lock(ringB, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring B %p\n", ringB);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_signal(ringB, semaphore);
-	amdgpu_ring_unlock_commit(ringB);
-
-	r = fence_wait(fence1, false);
-	if (r) {
-		DRM_ERROR("Failed to wait for sync fence 1\n");
-		goto out_cleanup;
-	}
-
-	mdelay(1000);
-
-	if (fence_is_signaled(fence2)) {
-		DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
-		goto out_cleanup;
-	}
-
-	r = amdgpu_ring_lock(ringB, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring B %p\n", ringB);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_signal(ringB, semaphore);
-	amdgpu_ring_unlock_commit(ringB);
-
-	r = fence_wait(fence2, false);
-	if (r) {
-		DRM_ERROR("Failed to wait for sync fence 1\n");
-		goto out_cleanup;
-	}
-
-out_cleanup:
-	amdgpu_semaphore_free(adev, &semaphore, NULL);
-
-	if (fence1)
-		fence_put(fence1);
-
-	if (fence2)
-		fence_put(fence2);
-
-	if (r)
-		printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
 }
 
 static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
@@ -383,109 +249,6 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
 			    struct amdgpu_ring *ringB,
 			    struct amdgpu_ring *ringC)
 {
-	struct fence *fenceA = NULL, *fenceB = NULL;
-	struct amdgpu_semaphore *semaphore = NULL;
-	bool sigA, sigB;
-	int i, r;
-
-	r = amdgpu_semaphore_create(adev, &semaphore);
-	if (r) {
-		DRM_ERROR("Failed to create semaphore\n");
-		goto out_cleanup;
-	}
-
-	r = amdgpu_ring_lock(ringA, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_wait(ringA, semaphore);
-	amdgpu_ring_unlock_commit(ringA);
-
-	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA);
-	if (r)
-		goto out_cleanup;
-
-	r = amdgpu_ring_lock(ringB, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_wait(ringB, semaphore);
-	amdgpu_ring_unlock_commit(ringB);
-	r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB);
-	if (r)
-		goto out_cleanup;
-
-	mdelay(1000);
-
-	if (fence_is_signaled(fenceA)) {
-		DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
-		goto out_cleanup;
-	}
-	if (fence_is_signaled(fenceB)) {
-		DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
-		goto out_cleanup;
-	}
-
-	r = amdgpu_ring_lock(ringC, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring B %p\n", ringC);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_signal(ringC, semaphore);
-	amdgpu_ring_unlock_commit(ringC);
-
-	for (i = 0; i < 30; ++i) {
-		mdelay(100);
-		sigA = fence_is_signaled(fenceA);
-		sigB = fence_is_signaled(fenceB);
-		if (sigA || sigB)
-			break;
-	}
-
-	if (!sigA && !sigB) {
-		DRM_ERROR("Neither fence A nor B has been signaled\n");
-		goto out_cleanup;
-	} else if (sigA && sigB) {
-		DRM_ERROR("Both fence A and B has been signaled\n");
-		goto out_cleanup;
-	}
-
-	DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B');
-
-	r = amdgpu_ring_lock(ringC, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring B %p\n", ringC);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_signal(ringC, semaphore);
-	amdgpu_ring_unlock_commit(ringC);
-
-	mdelay(1000);
-
-	r = fence_wait(fenceA, false);
-	if (r) {
-		DRM_ERROR("Failed to wait for sync fence A\n");
-		goto out_cleanup;
-	}
-	r = fence_wait(fenceB, false);
-	if (r) {
-		DRM_ERROR("Failed to wait for sync fence B\n");
-		goto out_cleanup;
-	}
-
-out_cleanup:
-	amdgpu_semaphore_free(adev, &semaphore, NULL);
-
-	if (fenceA)
-		fence_put(fenceA);
-
-	if (fenceB)
-		fence_put(fenceB);
-
-	if (r)
-		printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
 }
 
 static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 8f9834ab1bd5..26a5f4acf584 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -38,10 +38,10 @@ TRACE_EVENT(amdgpu_cs,
 
 	    TP_fast_assign(
 			   __entry->bo_list = p->bo_list;
-			   __entry->ring = p->ibs[i].ring->idx;
-			   __entry->dw = p->ibs[i].length_dw;
+			   __entry->ring = p->job->ring->idx;
+			   __entry->dw = p->job->ibs[i].length_dw;
 			   __entry->fences = amdgpu_fence_count_emitted(
-				p->ibs[i].ring);
+				p->job->ring);
 			   ),
 	    TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
 		      __entry->bo_list, __entry->ring, __entry->dw,
@@ -65,7 +65,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
 			   __entry->fence = &job->base.s_fence->base;
-			   __entry->ring_name = job->ibs[0].ring->name;
+			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
 	    TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
@@ -90,7 +90,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
 			   __entry->fence = &job->base.s_fence->base;
-			   __entry->ring_name = job->ibs[0].ring->name;
+			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
 	    TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
@@ -100,18 +100,24 @@ TRACE_EVENT(amdgpu_sched_run_job,
 
 
 TRACE_EVENT(amdgpu_vm_grab_id,
-	    TP_PROTO(unsigned vmid, int ring),
-	    TP_ARGS(vmid, ring),
+	    TP_PROTO(struct amdgpu_vm *vm, int ring, unsigned vmid,
+		     uint64_t pd_addr),
+	    TP_ARGS(vm, ring, vmid, pd_addr),
 	    TP_STRUCT__entry(
-			     __field(u32, vmid)
+			     __field(struct amdgpu_vm *, vm)
 			     __field(u32, ring)
+			     __field(u32, vmid)
+			     __field(u64, pd_addr)
 			     ),
 
 	    TP_fast_assign(
-			   __entry->vmid = vmid;
+			   __entry->vm = vm;
 			   __entry->ring = ring;
+			   __entry->vmid = vmid;
+			   __entry->pd_addr = pd_addr;
 			   ),
-	    TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring)
+	    TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx", __entry->vm,
+		      __entry->ring, __entry->vmid, __entry->pd_addr)
 );
 
 TRACE_EVENT(amdgpu_vm_bo_map,
@@ -228,8 +234,8 @@ TRACE_EVENT(amdgpu_vm_flush,
 			   __entry->ring = ring;
 			   __entry->id = id;
 			   ),
-	    TP_printk("pd_addr=%010Lx, ring=%u, id=%u",
-		      __entry->pd_addr, __entry->ring, __entry->id)
+	    TP_printk("ring=%u, id=%u, pd_addr=%010Lx",
+		      __entry->ring, __entry->id, __entry->pd_addr)
 );
 
 TRACE_EVENT(amdgpu_bo_list_set,
@@ -247,42 +253,6 @@ TRACE_EVENT(amdgpu_bo_list_set,
 	    TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
 );
 
-DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
-
-	    TP_PROTO(int ring, struct amdgpu_semaphore *sem),
-
-	    TP_ARGS(ring, sem),
-
-	    TP_STRUCT__entry(
-			     __field(int, ring)
-			     __field(signed, waiters)
-			     __field(uint64_t, gpu_addr)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->ring = ring;
-			   __entry->waiters = sem->waiters;
-			   __entry->gpu_addr = sem->gpu_addr;
-			   ),
-
-	    TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring,
-		      __entry->waiters, __entry->gpu_addr)
-);
-
-DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_signale,
-
-	    TP_PROTO(int ring, struct amdgpu_semaphore *sem),
-
-	    TP_ARGS(ring, sem)
-);
-
-DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_wait,
-
-	    TP_PROTO(int ring, struct amdgpu_semaphore *sem),
-
-	    TP_ARGS(ring, sem)
-);
-
 #endif
 
 /* This part must be outside protection */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 1cbb16e15307..ab34190859a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -77,6 +77,8 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
 static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 {
 	struct drm_global_reference *global_ref;
+	struct amdgpu_ring *ring;
+	struct amd_sched_rq *rq;
 	int r;
 
 	adev->mman.mem_global_referenced = false;
@@ -106,13 +108,27 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 		return r;
 	}
 
+	ring = adev->mman.buffer_funcs_ring;
+	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
+	r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
+				  rq, amdgpu_sched_jobs);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up TTM BO move run queue.\n");
+		drm_global_item_unref(&adev->mman.mem_global_ref);
+		drm_global_item_unref(&adev->mman.bo_global_ref.ref);
+		return r;
+	}
+
 	adev->mman.mem_global_referenced = true;
+
 	return 0;
 }
 
 static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
 {
 	if (adev->mman.mem_global_referenced) {
+		amd_sched_entity_fini(adev->mman.entity.sched,
+				      &adev->mman.entity);
 		drm_global_item_unref(&adev->mman.bo_global_ref.ref);
 		drm_global_item_unref(&adev->mman.mem_global_ref);
 		adev->mman.mem_global_referenced = false;
@@ -478,32 +494,32 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 /*
  * TTM backend functions.
  */
+struct amdgpu_ttm_gup_task_list {
+	struct list_head	list;
+	struct task_struct	*task;
+};
+
 struct amdgpu_ttm_tt {
-	struct ttm_dma_tt		ttm;
-	struct amdgpu_device		*adev;
-	u64				offset;
-	uint64_t			userptr;
-	struct mm_struct		*usermm;
-	uint32_t			userflags;
+	struct ttm_dma_tt	ttm;
+	struct amdgpu_device	*adev;
+	u64			offset;
+	uint64_t		userptr;
+	struct mm_struct	*usermm;
+	uint32_t		userflags;
+	spinlock_t              guptasklock;
+	struct list_head        guptasks;
+	atomic_t		mmu_invalidations;
 };
 
-/* prepare the sg table with the user pages */
-static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
+int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
-	struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	unsigned pinned = 0, nents;
-	int r;
-
 	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
-	enum dma_data_direction direction = write ?
-		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
-
-	if (current->mm != gtt->usermm)
-		return -EPERM;
+	unsigned pinned = 0;
+	int r;
 
 	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
-		/* check that we only pin down anonymous memory
+		/* check that we only use anonymous memory
 		   to prevent problems with writeback */
 		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
 		struct vm_area_struct *vma;
@@ -516,10 +532,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 	do {
 		unsigned num_pages = ttm->num_pages - pinned;
 		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
-		struct page **pages = ttm->pages + pinned;
+		struct page **p = pages + pinned;
+		struct amdgpu_ttm_gup_task_list guptask;
+
+		guptask.task = current;
+		spin_lock(&gtt->guptasklock);
+		list_add(&guptask.list, &gtt->guptasks);
+		spin_unlock(&gtt->guptasklock);
+
+		r = get_user_pages(userptr, num_pages, write, 0, p, NULL);
+
+		spin_lock(&gtt->guptasklock);
+		list_del(&guptask.list);
+		spin_unlock(&gtt->guptasklock);
 
-		r = get_user_pages(current, current->mm, userptr, num_pages,
-				   write, 0, pages, NULL);
 		if (r < 0)
 			goto release_pages;
 
@@ -527,6 +553,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 
 	} while (pinned < ttm->num_pages);
 
+	return 0;
+
+release_pages:
+	release_pages(pages, pinned, 0);
+	return r;
+}
+
+/* prepare the sg table with the user pages */
+static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
+{
+	struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	unsigned nents;
+	int r;
+
+	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+	enum dma_data_direction direction = write ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
 	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
 				      ttm->num_pages << PAGE_SHIFT,
 				      GFP_KERNEL);
@@ -545,9 +590,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 
 release_sg:
 	kfree(ttm->sg);
-
-release_pages:
-	release_pages(ttm->pages, pinned, 0);
 	return r;
 }
 
@@ -770,38 +812,61 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 	gtt->userptr = addr;
 	gtt->usermm = current->mm;
 	gtt->userflags = flags;
+	spin_lock_init(&gtt->guptasklock);
+	INIT_LIST_HEAD(&gtt->guptasks);
+	atomic_set(&gtt->mmu_invalidations, 0);
+
 	return 0;
 }
 
-bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm)
+struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 
 	if (gtt == NULL)
-		return false;
+		return NULL;
 
-	return !!gtt->userptr;
+	return gtt->usermm;
 }
 
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	struct amdgpu_ttm_gup_task_list *entry;
 	unsigned long size;
 
-	if (gtt == NULL)
-		return false;
-
-	if (gtt->ttm.ttm.state != tt_bound || !gtt->userptr)
+	if (gtt == NULL || !gtt->userptr)
 		return false;
 
 	size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
 	if (gtt->userptr > end || gtt->userptr + size <= start)
 		return false;
 
+	spin_lock(&gtt->guptasklock);
+	list_for_each_entry(entry, &gtt->guptasks, list) {
+		if (entry->task == current) {
+			spin_unlock(&gtt->guptasklock);
+			return false;
+		}
+	}
+	spin_unlock(&gtt->guptasklock);
+
+	atomic_inc(&gtt->mmu_invalidations);
+
 	return true;
 }
 
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+				       int *last_invalidated)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	int prev_invalidated = *last_invalidated;
+
+	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
+	return prev_invalidated != *last_invalidated;
+}
+
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1015,9 +1080,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 		       struct fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_job *job;
+
 	uint32_t max_bytes;
 	unsigned num_loops, num_dw;
-	struct amdgpu_ib *ib;
 	unsigned i;
 	int r;
 
@@ -1029,20 +1095,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 	while (num_dw & 0x7)
 		num_dw++;
 
-	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
-	if (!ib)
-		return -ENOMEM;
-
-	r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib);
-	if (r) {
-		kfree(ib);
+	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
+	if (r)
 		return r;
-	}
-
-	ib->length_dw = 0;
 
 	if (resv) {
-		r = amdgpu_sync_resv(adev, &ib->sync, resv,
+		r = amdgpu_sync_resv(adev, &job->sync, resv,
 				     AMDGPU_FENCE_OWNER_UNDEFINED);
 		if (r) {
 			DRM_ERROR("sync failed (%d).\n", r);
@@ -1053,31 +1111,25 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 	for (i = 0; i < num_loops; i++) {
 		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
 
-		amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset,
-					cur_size_in_bytes);
+		amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
+					dst_offset, cur_size_in_bytes);
 
 		src_offset += cur_size_in_bytes;
 		dst_offset += cur_size_in_bytes;
 		byte_count -= cur_size_in_bytes;
 	}
 
-	amdgpu_vm_pad_ib(adev, ib);
-	WARN_ON(ib->length_dw > num_dw);
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
-						 &amdgpu_vm_free_job,
-						 AMDGPU_FENCE_OWNER_UNDEFINED,
-						 fence);
+	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+	WARN_ON(job->ibs[0].length_dw > num_dw);
+	r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
 	if (r)
 		goto error_free;
 
-	if (!amdgpu_enable_scheduler) {
-		amdgpu_ib_free(adev, ib);
-		kfree(ib);
-	}
 	return 0;
+
 error_free:
-	amdgpu_ib_free(adev, ib);
-	kfree(ib);
+	amdgpu_job_free(job);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 53f987aeeacf..c1a581044417 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -91,6 +91,8 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 {
+	struct amdgpu_ring *ring;
+	struct amd_sched_rq *rq;
 	unsigned long bo_size;
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
@@ -191,6 +193,15 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 
 	amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
 
+	ring = &adev->uvd.ring;
+	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+	r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity,
+				  rq, amdgpu_sched_jobs);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up UVD run queue.\n");
+		return r;
+	}
+
 	for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
 		atomic_set(&adev->uvd.handles[i], 0);
 		adev->uvd.filp[i] = NULL;
@@ -210,6 +221,8 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 	if (adev->uvd.vcpu_bo == NULL)
 		return 0;
 
+	amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
+
 	r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
 	if (!r) {
 		amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
@@ -241,7 +254,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 
 			amdgpu_uvd_note_usage(adev);
 
-			r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence);
+			r = amdgpu_uvd_get_destroy_msg(ring, handle, false, &fence);
 			if (r) {
 				DRM_ERROR("Error destroying UVD (%d)!\n", r);
 				continue;
@@ -295,7 +308,8 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 
 			amdgpu_uvd_note_usage(adev);
 
-			r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence);
+			r = amdgpu_uvd_get_destroy_msg(ring, handle,
+						       false, &fence);
 			if (r) {
 				DRM_ERROR("Error destroying UVD (%d)!\n", r);
 				continue;
@@ -525,13 +539,6 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 		return -EINVAL;
 	}
 
-	r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
-						MAX_SCHEDULE_TIMEOUT);
-	if (r < 0) {
-		DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r);
-		return r;
-	}
-
 	r = amdgpu_bo_kmap(bo, &ptr);
 	if (r) {
 		DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
@@ -616,7 +623,6 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
 {
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_bo *bo;
-	struct amdgpu_ib *ib;
 	uint32_t cmd, lo, hi;
 	uint64_t start, end;
 	uint64_t addr;
@@ -638,9 +644,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
 	addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE;
 	start += addr;
 
-	ib = &ctx->parser->ibs[ctx->ib_idx];
-	ib->ptr[ctx->data0] = start & 0xFFFFFFFF;
-	ib->ptr[ctx->data1] = start >> 32;
+	amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0,
+			    lower_32_bits(start));
+	amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1,
+			    upper_32_bits(start));
 
 	cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
 	if (cmd < 0x4) {
@@ -702,7 +709,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
 static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
 			     int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
 {
-	struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx];
+	struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
 	int i, r;
 
 	ctx->idx++;
@@ -748,7 +755,7 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
 static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
 				 int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
 {
-	struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx];
+	struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
 	int r;
 
 	for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) {
@@ -790,7 +797,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 		[0x00000003]	=	2048,
 		[0x00000004]	=	0xFFFFFFFF,
 	};
-	struct amdgpu_ib *ib = &parser->ibs[ib_idx];
+	struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
 	int r;
 
 	if (ib->length_dw % 16) {
@@ -823,22 +830,14 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 	return 0;
 }
 
-static int amdgpu_uvd_free_job(
-	struct amdgpu_job *job)
-{
-	amdgpu_ib_free(job->adev, job->ibs);
-	kfree(job->ibs);
-	return 0;
-}
-
-static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
-			       struct amdgpu_bo *bo,
-			       struct fence **fence)
+static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
+			       bool direct, struct fence **fence)
 {
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
 	struct list_head head;
-	struct amdgpu_ib *ib = NULL;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
 	struct fence *f = NULL;
 	struct amdgpu_device *adev = ring->adev;
 	uint64_t addr;
@@ -862,15 +861,12 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 	if (r)
 		goto err;
-	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
-	if (!ib) {
-		r = -ENOMEM;
-		goto err;
-	}
-	r = amdgpu_ib_get(ring, NULL, 64, ib);
+
+	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
 	if (r)
-		goto err1;
+		goto err;
 
+	ib = &job->ibs[0];
 	addr = amdgpu_bo_gpu_offset(bo);
 	ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
 	ib->ptr[1] = addr;
@@ -882,12 +878,19 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
 		ib->ptr[i] = PACKET2(0);
 	ib->length_dw = 16;
 
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
-						 &amdgpu_uvd_free_job,
-						 AMDGPU_FENCE_OWNER_UNDEFINED,
-						 &f);
-	if (r)
-		goto err2;
+	if (direct) {
+		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+		job->fence = f;
+		if (r)
+			goto err_free;
+
+		amdgpu_job_free(job);
+	} else {
+		r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
+				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+		if (r)
+			goto err_free;
+	}
 
 	ttm_eu_fence_buffer_objects(&ticket, &head, f);
 
@@ -895,16 +898,12 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
 		*fence = fence_get(f);
 	amdgpu_bo_unref(&bo);
 	fence_put(f);
-	if (amdgpu_enable_scheduler)
-		return 0;
 
-	amdgpu_ib_free(ring->adev, ib);
-	kfree(ib);
 	return 0;
-err2:
-	amdgpu_ib_free(ring->adev, ib);
-err1:
-	kfree(ib);
+
+err_free:
+	amdgpu_job_free(job);
+
 err:
 	ttm_eu_backoff_reservation(&ticket, &head);
 	return r;
@@ -959,11 +958,11 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	amdgpu_bo_kunmap(bo);
 	amdgpu_bo_unreserve(bo);
 
-	return amdgpu_uvd_send_msg(ring, bo, fence);
+	return amdgpu_uvd_send_msg(ring, bo, true, fence);
 }
 
 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       struct fence **fence)
+			       bool direct, struct fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_bo *bo;
@@ -1001,7 +1000,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	amdgpu_bo_kunmap(bo);
 	amdgpu_bo_unreserve(bo);
 
-	return amdgpu_uvd_send_msg(ring, bo, fence);
+	return amdgpu_uvd_send_msg(ring, bo, direct, fence);
 }
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 1724c2c86151..9a3b449081a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -31,7 +31,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev);
 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct fence **fence);
 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       struct fence **fence);
+			       bool direct, struct fence **fence);
 void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
 			     struct drm_file *filp);
 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index a745eeeb5d82..4bec0c108cea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -74,6 +74,8 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work);
  */
 int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 {
+	struct amdgpu_ring *ring;
+	struct amd_sched_rq *rq;
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
 	unsigned ucode_version, version_major, version_minor, binary_id;
@@ -170,6 +172,16 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 		return r;
 	}
 
+
+	ring = &adev->vce.ring[0];
+	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+	r = amd_sched_entity_init(&ring->sched, &adev->vce.entity,
+				  rq, amdgpu_sched_jobs);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up VCE run queue.\n");
+		return r;
+	}
+
 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 		atomic_set(&adev->vce.handles[i], 0);
 		adev->vce.filp[i] = NULL;
@@ -190,6 +202,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 	if (adev->vce.vcpu_bo == NULL)
 		return 0;
 
+	amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
+
 	amdgpu_bo_unref(&adev->vce.vcpu_bo);
 
 	amdgpu_ring_fini(&adev->vce.ring[0]);
@@ -337,7 +351,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 
 		amdgpu_vce_note_usage(adev);
 
-		r = amdgpu_vce_get_destroy_msg(ring, handle, NULL);
+		r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
 		if (r)
 			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
 
@@ -346,14 +360,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 	}
 }
 
-static int amdgpu_vce_free_job(
-	struct amdgpu_job *job)
-{
-	amdgpu_ib_free(job->adev, job->ibs);
-	kfree(job->ibs);
-	return 0;
-}
-
 /**
  * amdgpu_vce_get_create_msg - generate a VCE create msg
  *
@@ -368,21 +374,17 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct fence **fence)
 {
 	const unsigned ib_size_dw = 1024;
-	struct amdgpu_ib *ib = NULL;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
 	struct fence *f = NULL;
-	struct amdgpu_device *adev = ring->adev;
 	uint64_t dummy;
 	int i, r;
 
-	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
-	if (!ib)
-		return -ENOMEM;
-	r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
-		kfree(ib);
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	if (r)
 		return r;
-	}
+
+	ib = &job->ibs[0];
 
 	dummy = ib->gpu_addr + 1024;
 
@@ -423,20 +425,19 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = ib->length_dw; i < ib_size_dw; ++i)
 		ib->ptr[i] = 0x0;
 
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
-						 &amdgpu_vce_free_job,
-						 AMDGPU_FENCE_OWNER_UNDEFINED,
-						 &f);
+	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+	job->fence = f;
 	if (r)
 		goto err;
+
+	amdgpu_job_free(job);
 	if (fence)
 		*fence = fence_get(f);
 	fence_put(f);
-	if (amdgpu_enable_scheduler)
-		return 0;
+	return 0;
+
 err:
-	amdgpu_ib_free(adev, ib);
-	kfree(ib);
+	amdgpu_job_free(job);
 	return r;
 }
 
@@ -451,26 +452,20 @@ err:
  * Close up a stream for HW test or if userspace failed to do so
  */
 int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       struct fence **fence)
+			       bool direct, struct fence **fence)
 {
 	const unsigned ib_size_dw = 1024;
-	struct amdgpu_ib *ib = NULL;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
 	struct fence *f = NULL;
-	struct amdgpu_device *adev = ring->adev;
 	uint64_t dummy;
 	int i, r;
 
-	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
-	if (!ib)
-		return -ENOMEM;
-
-	r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib);
-	if (r) {
-		kfree(ib);
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	if (r)
 		return r;
-	}
 
+	ib = &job->ibs[0];
 	dummy = ib->gpu_addr + 1024;
 
 	/* stitch together an VCE destroy msg */
@@ -490,20 +485,28 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 
 	for (i = ib->length_dw; i < ib_size_dw; ++i)
 		ib->ptr[i] = 0x0;
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
-						 &amdgpu_vce_free_job,
-						 AMDGPU_FENCE_OWNER_UNDEFINED,
-						 &f);
-	if (r)
-		goto err;
+
+	if (direct) {
+		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+		job->fence = f;
+		if (r)
+			goto err;
+
+		amdgpu_job_free(job);
+	} else {
+		r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity,
+				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+		if (r)
+			goto err;
+	}
+
 	if (fence)
 		*fence = fence_get(f);
 	fence_put(f);
-	if (amdgpu_enable_scheduler)
-		return 0;
+	return 0;
+
 err:
-	amdgpu_ib_free(adev, ib);
-	kfree(ib);
+	amdgpu_job_free(job);
 	return r;
 }
 
@@ -521,7 +524,6 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
 			       int lo, int hi, unsigned size, uint32_t index)
 {
 	struct amdgpu_bo_va_mapping *mapping;
-	struct amdgpu_ib *ib = &p->ibs[ib_idx];
 	struct amdgpu_bo *bo;
 	uint64_t addr;
 
@@ -550,8 +552,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
 	addr += amdgpu_bo_gpu_offset(bo);
 	addr -= ((uint64_t)size) * ((uint64_t)index);
 
-	ib->ptr[lo] = addr & 0xFFFFFFFF;
-	ib->ptr[hi] = addr >> 32;
+	amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
+	amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
 
 	return 0;
 }
@@ -606,7 +608,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
  */
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 {
-	struct amdgpu_ib *ib = &p->ibs[ib_idx];
+	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
 	unsigned fb_idx = 0, bs_idx = 0;
 	int session_idx = -1;
 	bool destroyed = false;
@@ -743,30 +745,6 @@ out:
 }
 
 /**
- * amdgpu_vce_ring_emit_semaphore - emit a semaphore command
- *
- * @ring: engine to use
- * @semaphore: address of semaphore
- * @emit_wait: true=emit wait, false=emit signal
- *
- */
-bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
-				    struct amdgpu_semaphore *semaphore,
-				    bool emit_wait)
-{
-	uint64_t addr = semaphore->gpu_addr;
-
-	amdgpu_ring_write(ring, VCE_CMD_SEMAPHORE);
-	amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-	amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-	amdgpu_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0));
-	if (!emit_wait)
-		amdgpu_ring_write(ring, VCE_CMD_END);
-
-	return true;
-}
-
-/**
  * amdgpu_vce_ring_emit_ib - execute indirect buffer
  *
  * @ring: engine to use
@@ -814,14 +792,14 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 	unsigned i;
 	int r;
 
-	r = amdgpu_ring_lock(ring, 16);
+	r = amdgpu_ring_alloc(ring, 16);
 	if (r) {
 		DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
 			  ring->idx, r);
 		return r;
 	}
 	amdgpu_ring_write(ring, VCE_CMD_END);
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		if (amdgpu_ring_get_rptr(ring) != rptr)
@@ -862,7 +840,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
 		goto error;
 	}
 
-	r = amdgpu_vce_get_destroy_msg(ring, 1, &fence);
+	r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
 		goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index ba2da8ee5906..ef99d2370182 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -31,12 +31,9 @@ int amdgpu_vce_resume(struct amdgpu_device *adev);
 int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct fence **fence);
 int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       struct fence **fence);
+			       bool direct, struct fence **fence);
 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
-bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
-				    struct amdgpu_semaphore *semaphore,
-				    bool emit_wait);
 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 				unsigned flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9599f7559b3d..b6c011b83641 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -50,12 +50,15 @@
  * SI supports 16.
  */
 
+/* Special value that no flush is necessary */
+#define AMDGPU_VM_NO_FLUSH (~0ll)
+
 /**
  * amdgpu_vm_num_pde - return the number of page directory entries
  *
  * @adev: amdgpu_device pointer
  *
- * Calculate the number of page directory entries (cayman+).
+ * Calculate the number of page directory entries.
  */
 static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev)
 {
@@ -67,7 +70,7 @@ static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev)
  *
  * @adev: amdgpu_device pointer
  *
- * Calculate the size of the page directory in bytes (cayman+).
+ * Calculate the size of the page directory in bytes.
  */
 static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev)
 {
@@ -89,11 +92,10 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct amdgpu_bo_list_entry *entry)
 {
 	entry->robj = vm->page_directory;
-	entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
-	entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
 	entry->priority = 0;
 	entry->tv.bo = &vm->page_directory->tbo;
 	entry->tv.shared = true;
+	entry->user_pages = NULL;
 	list_add(&entry->tv.head, validated);
 }
 
@@ -154,133 +156,154 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
  * @vm: vm to allocate id for
  * @ring: ring we want to submit job to
  * @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
  *
  * Allocate an id for the vm, adding fences to the sync obj as necessary.
- *
- * Global mutex must be locked!
  */
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		      struct amdgpu_sync *sync)
+		      struct amdgpu_sync *sync, struct fence *fence,
+		      unsigned *vm_id, uint64_t *vm_pd_addr)
 {
-	struct fence *best[AMDGPU_MAX_RINGS] = {};
-	struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
+	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vm_id *id = &vm->ids[ring->idx];
+	struct fence *updates = sync->last_vm_update;
+	int r;
 
-	unsigned choices[2] = {};
-	unsigned i;
+	mutex_lock(&adev->vm_manager.lock);
 
 	/* check if the id is still valid */
-	if (vm_id->id) {
-		unsigned id = vm_id->id;
+	if (id->mgr_id) {
+		struct fence *flushed = id->flushed_updates;
+		bool is_later;
 		long owner;
 
-		owner = atomic_long_read(&adev->vm_manager.ids[id].owner);
-		if (owner == (long)vm) {
-			trace_amdgpu_vm_grab_id(vm_id->id, ring->idx);
+		if (!flushed)
+			is_later = true;
+		else if (!updates)
+			is_later = false;
+		else
+			is_later = fence_is_later(updates, flushed);
+
+		owner = atomic_long_read(&id->mgr_id->owner);
+		if (!is_later && owner == (long)id &&
+		    pd_addr == id->pd_gpu_addr) {
+
+			r = amdgpu_sync_fence(ring->adev, sync,
+					      id->mgr_id->active);
+			if (r) {
+				mutex_unlock(&adev->vm_manager.lock);
+				return r;
+			}
+
+			fence_put(id->mgr_id->active);
+			id->mgr_id->active = fence_get(fence);
+
+			list_move_tail(&id->mgr_id->list,
+				       &adev->vm_manager.ids_lru);
+
+			*vm_id = id->mgr_id - adev->vm_manager.ids;
+			*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
+			trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id,
+						*vm_pd_addr);
+
+			mutex_unlock(&adev->vm_manager.lock);
 			return 0;
 		}
 	}
 
-	/* we definately need to flush */
-	vm_id->pd_gpu_addr = ~0ll;
+	id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
+				      struct amdgpu_vm_manager_id,
+				      list);
 
-	/* skip over VMID 0, since it is the system VM */
-	for (i = 1; i < adev->vm_manager.nvm; ++i) {
-		struct fence *fence = adev->vm_manager.ids[i].active;
-		struct amdgpu_ring *fring;
-
-		if (fence == NULL) {
-			/* found a free one */
-			vm_id->id = i;
-			trace_amdgpu_vm_grab_id(i, ring->idx);
-			return 0;
-		}
+	r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active);
+	if (!r) {
+		fence_put(id->mgr_id->active);
+		id->mgr_id->active = fence_get(fence);
 
-		fring = amdgpu_ring_from_fence(fence);
-		if (best[fring->idx] == NULL ||
-		    fence_is_later(best[fring->idx], fence)) {
-			best[fring->idx] = fence;
-			choices[fring == ring ? 0 : 1] = i;
-		}
-	}
+		fence_put(id->flushed_updates);
+		id->flushed_updates = fence_get(updates);
 
-	for (i = 0; i < 2; ++i) {
-		if (choices[i]) {
-			struct fence *fence;
+		id->pd_gpu_addr = pd_addr;
 
-			fence  = adev->vm_manager.ids[choices[i]].active;
-			vm_id->id = choices[i];
+		list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru);
+		atomic_long_set(&id->mgr_id->owner, (long)id);
 
-			trace_amdgpu_vm_grab_id(choices[i], ring->idx);
-			return amdgpu_sync_fence(ring->adev, sync, fence);
-		}
+		*vm_id = id->mgr_id - adev->vm_manager.ids;
+		*vm_pd_addr = pd_addr;
+		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
 	}
 
-	/* should never happen */
-	BUG();
-	return -EINVAL;
+	mutex_unlock(&adev->vm_manager.lock);
+	return r;
 }
 
 /**
  * amdgpu_vm_flush - hardware flush the vm
  *
  * @ring: ring to use for flush
- * @vm: vm we want to flush
- * @updates: last vm update that we waited for
+ * @vm_id: vmid number to use
+ * @pd_addr: address of the page directory
  *
- * Flush the vm (cayman+).
- *
- * Global and local mutex must be locked!
+ * Emit a VM flush when it is necessary.
  */
 void amdgpu_vm_flush(struct amdgpu_ring *ring,
-		     struct amdgpu_vm *vm,
-		     struct fence *updates)
+		     unsigned vm_id, uint64_t pd_addr,
+		     uint32_t gds_base, uint32_t gds_size,
+		     uint32_t gws_base, uint32_t gws_size,
+		     uint32_t oa_base, uint32_t oa_size)
 {
-	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
-	struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
-	struct fence *flushed_updates = vm_id->flushed_updates;
-	bool is_later;
-
-	if (!flushed_updates)
-		is_later = true;
-	else if (!updates)
-		is_later = false;
-	else
-		is_later = fence_is_later(updates, flushed_updates);
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
+		mgr_id->gds_base != gds_base ||
+		mgr_id->gds_size != gds_size ||
+		mgr_id->gws_base != gws_base ||
+		mgr_id->gws_size != gws_size ||
+		mgr_id->oa_base != oa_base ||
+		mgr_id->oa_size != oa_size);
+
+	if (ring->funcs->emit_pipeline_sync && (
+	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
+		amdgpu_ring_emit_pipeline_sync(ring);
+
+	if (pd_addr != AMDGPU_VM_NO_FLUSH) {
+		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
+		amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
+	}
 
-	if (pd_addr != vm_id->pd_gpu_addr || is_later) {
-		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
-		if (is_later) {
-			vm_id->flushed_updates = fence_get(updates);
-			fence_put(flushed_updates);
-		}
-		vm_id->pd_gpu_addr = pd_addr;
-		amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
+	if (gds_switch_needed) {
+		mgr_id->gds_base = gds_base;
+		mgr_id->gds_size = gds_size;
+		mgr_id->gws_base = gws_base;
+		mgr_id->gws_size = gws_size;
+		mgr_id->oa_base = oa_base;
+		mgr_id->oa_size = oa_size;
+		amdgpu_ring_emit_gds_switch(ring, vm_id,
+					    gds_base, gds_size,
+					    gws_base, gws_size,
+					    oa_base, oa_size);
 	}
 }
 
 /**
- * amdgpu_vm_fence - remember fence for vm
+ * amdgpu_vm_reset_id - reset VMID to zero
  *
- * @adev: amdgpu_device pointer
- * @vm: vm we want to fence
- * @fence: fence to remember
- *
- * Fence the vm (cayman+).
- * Set the fence used to protect page table and id.
+ * @adev: amdgpu device structure
+ * @vm_id: vmid number to use
  *
- * Global and local mutex must be locked!
+ * Reset saved GDW, GWS and OA to force switch on next flush.
  */
-void amdgpu_vm_fence(struct amdgpu_device *adev,
-		     struct amdgpu_vm *vm,
-		     struct fence *fence)
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id)
 {
-	struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence);
-	unsigned vm_id = vm->ids[ring->idx].id;
-
-	fence_put(adev->vm_manager.ids[vm_id].active);
-	adev->vm_manager.ids[vm_id].active = fence_get(fence);
-	atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm);
+	struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+
+	mgr_id->gds_base = 0;
+	mgr_id->gds_size = 0;
+	mgr_id->gws_base = 0;
+	mgr_id->gws_size = 0;
+	mgr_id->oa_base = 0;
+	mgr_id->oa_size = 0;
 }
 
 /**
@@ -289,7 +312,7 @@ void amdgpu_vm_fence(struct amdgpu_device *adev,
  * @vm: requested vm
  * @bo: requested buffer object
  *
- * Find @bo inside the requested vm (cayman+).
+ * Find @bo inside the requested vm.
  * Search inside the @bos vm list for the requested vm
  * Returns the found bo_va or NULL if none is found
  *
@@ -312,32 +335,40 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * amdgpu_vm_update_pages - helper to call the right asic function
  *
  * @adev: amdgpu_device pointer
+ * @gtt: GART instance to use for mapping
+ * @gtt_flags: GTT hw access flags
  * @ib: indirect buffer to fill with commands
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
  * @count: number of page entries to update
  * @incr: increase next addr by incr bytes
  * @flags: hw access flags
- * @gtt_flags: GTT hw access flags
  *
  * Traces the parameters and calls the right asic functions
  * to setup the page table using the DMA.
  */
 static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
+				   struct amdgpu_gart *gtt,
+				   uint32_t gtt_flags,
 				   struct amdgpu_ib *ib,
 				   uint64_t pe, uint64_t addr,
 				   unsigned count, uint32_t incr,
-				   uint32_t flags, uint32_t gtt_flags)
+				   uint32_t flags)
 {
 	trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
 
-	if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) {
-		uint64_t src = adev->gart.table_addr + (addr >> 12) * 8;
+	if ((gtt == &adev->gart) && (flags == gtt_flags)) {
+		uint64_t src = gtt->table_addr + (addr >> 12) * 8;
 		amdgpu_vm_copy_pte(adev, ib, pe, src, count);
 
-	} else if ((flags & AMDGPU_PTE_SYSTEM) || (count < 3)) {
-		amdgpu_vm_write_pte(adev, ib, pe, addr,
-				      count, incr, flags);
+	} else if (gtt) {
+		dma_addr_t *pages_addr = gtt->pages_addr;
+		amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr,
+				    count, incr, flags);
+
+	} else if (count < 3) {
+		amdgpu_vm_write_pte(adev, ib, NULL, pe, addr,
+				    count, incr, flags);
 
 	} else {
 		amdgpu_vm_set_pte_pde(adev, ib, pe, addr,
@@ -345,15 +376,6 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
 	}
 }
 
-int amdgpu_vm_free_job(struct amdgpu_job *job)
-{
-	int i;
-	for (i = 0; i < job->num_ibs; i++)
-		amdgpu_ib_free(job->adev, &job->ibs[i]);
-	kfree(job->ibs);
-	return 0;
-}
-
 /**
  * amdgpu_vm_clear_bo - initially clear the page dir/table
  *
@@ -363,15 +385,18 @@ int amdgpu_vm_free_job(struct amdgpu_job *job)
  * need to reserve bo first before calling it.
  */
 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
+			      struct amdgpu_vm *vm,
 			      struct amdgpu_bo *bo)
 {
-	struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
+	struct amdgpu_ring *ring;
 	struct fence *fence = NULL;
-	struct amdgpu_ib *ib;
+	struct amdgpu_job *job;
 	unsigned entries;
 	uint64_t addr;
 	int r;
 
+	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
 	r = reservation_object_reserve_shared(bo->tbo.resv);
 	if (r)
 		return r;
@@ -383,56 +408,57 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	addr = amdgpu_bo_gpu_offset(bo);
 	entries = amdgpu_bo_size(bo) / 8;
 
-	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
-	if (!ib)
+	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+	if (r)
 		goto error;
 
-	r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib);
+	amdgpu_vm_update_pages(adev, NULL, 0, &job->ibs[0], addr, 0, entries,
+			       0, 0);
+	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+
+	WARN_ON(job->ibs[0].length_dw > 64);
+	r = amdgpu_job_submit(job, ring, &vm->entity,
+			      AMDGPU_FENCE_OWNER_VM, &fence);
 	if (r)
 		goto error_free;
 
-	ib->length_dw = 0;
-
-	amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0);
-	amdgpu_vm_pad_ib(adev, ib);
-	WARN_ON(ib->length_dw > 64);
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
-						 &amdgpu_vm_free_job,
-						 AMDGPU_FENCE_OWNER_VM,
-						 &fence);
-	if (!r)
-		amdgpu_bo_fence(bo, fence, true);
+	amdgpu_bo_fence(bo, fence, true);
 	fence_put(fence);
-	if (amdgpu_enable_scheduler)
-		return 0;
+	return 0;
 
 error_free:
-	amdgpu_ib_free(adev, ib);
-	kfree(ib);
+	amdgpu_job_free(job);
 
 error:
 	return r;
 }
 
 /**
- * amdgpu_vm_map_gart - get the physical address of a gart page
+ * amdgpu_vm_map_gart - Resolve gart mapping of addr
  *
- * @adev: amdgpu_device pointer
+ * @pages_addr: optional DMA address to use for lookup
  * @addr: the unmapped addr
  *
  * Look up the physical address of the page that the pte resolves
- * to (cayman+).
- * Returns the physical address of the page.
+ * to and return the pointer for the page table entry.
  */
-uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr)
+uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 {
 	uint64_t result;
 
-	/* page table offset */
-	result = adev->gart.pages_addr[addr >> PAGE_SHIFT];
+	if (pages_addr) {
+		/* page table offset */
+		result = pages_addr[addr >> PAGE_SHIFT];
+
+		/* in case cpu page size != gpu page size*/
+		result |= addr & (~PAGE_MASK);
 
-	/* in case cpu page size != gpu page size*/
-	result |= addr & (~PAGE_MASK);
+	} else {
+		/* No mapping required */
+		result = addr;
+	}
+
+	result &= 0xFFFFFFFFFFFFF000ULL;
 
 	return result;
 }
@@ -446,45 +472,37 @@ uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr)
  * @end: end of GPU address range
  *
  * Allocates new page tables if necessary
- * and updates the page directory (cayman+).
+ * and updates the page directory.
  * Returns 0 for success, error for failure.
- *
- * Global and local mutex must be locked!
  */
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 				    struct amdgpu_vm *vm)
 {
-	struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
+	struct amdgpu_ring *ring;
 	struct amdgpu_bo *pd = vm->page_directory;
 	uint64_t pd_addr = amdgpu_bo_gpu_offset(pd);
 	uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
 	uint64_t last_pde = ~0, last_pt = ~0;
 	unsigned count = 0, pt_idx, ndw;
+	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
 	struct fence *fence = NULL;
 
 	int r;
 
+	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
 	/* padding, etc. */
 	ndw = 64;
 
 	/* assume the worst case */
 	ndw += vm->max_pde_used * 6;
 
-	/* update too big for an IB */
-	if (ndw > 0xfffff)
-		return -ENOMEM;
-
-	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
-	if (!ib)
-		return -ENOMEM;
-
-	r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
-	if (r) {
-		kfree(ib);
+	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
+	if (r)
 		return r;
-	}
-	ib->length_dw = 0;
+
+	ib = &job->ibs[0];
 
 	/* walk over the address space and update the page directory */
 	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
@@ -504,9 +522,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 		    ((last_pt + incr * count) != pt)) {
 
 			if (count) {
-				amdgpu_vm_update_pages(adev, ib, last_pde,
-						       last_pt, count, incr,
-						       AMDGPU_PTE_VALID, 0);
+				amdgpu_vm_update_pages(adev, NULL, 0, ib,
+						       last_pde, last_pt,
+						       count, incr,
+						       AMDGPU_PTE_VALID);
 			}
 
 			count = 1;
@@ -518,17 +537,16 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	}
 
 	if (count)
-		amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count,
-				       incr, AMDGPU_PTE_VALID, 0);
+		amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt,
+				       count, incr, AMDGPU_PTE_VALID);
 
 	if (ib->length_dw != 0) {
-		amdgpu_vm_pad_ib(adev, ib);
-		amdgpu_sync_resv(adev, &ib->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM);
+		amdgpu_ring_pad_ib(ring, ib);
+		amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv,
+				 AMDGPU_FENCE_OWNER_VM);
 		WARN_ON(ib->length_dw > ndw);
-		r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
-							 &amdgpu_vm_free_job,
-							 AMDGPU_FENCE_OWNER_VM,
-							 &fence);
+		r = amdgpu_job_submit(job, ring, &vm->entity,
+				      AMDGPU_FENCE_OWNER_VM, &fence);
 		if (r)
 			goto error_free;
 
@@ -536,18 +554,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 		fence_put(vm->page_directory_fence);
 		vm->page_directory_fence = fence_get(fence);
 		fence_put(fence);
-	}
 
-	if (!amdgpu_enable_scheduler || ib->length_dw == 0) {
-		amdgpu_ib_free(adev, ib);
-		kfree(ib);
+	} else {
+		amdgpu_job_free(job);
 	}
 
 	return 0;
 
 error_free:
-	amdgpu_ib_free(adev, ib);
-	kfree(ib);
+	amdgpu_job_free(job);
 	return r;
 }
 
@@ -555,20 +570,20 @@ error_free:
  * amdgpu_vm_frag_ptes - add fragment information to PTEs
  *
  * @adev: amdgpu_device pointer
+ * @gtt: GART instance to use for mapping
+ * @gtt_flags: GTT hw mapping flags
  * @ib: IB for the update
  * @pe_start: first PTE to handle
  * @pe_end: last PTE to handle
  * @addr: addr those PTEs should point to
  * @flags: hw mapping flags
- * @gtt_flags: GTT hw mapping flags
- *
- * Global and local mutex must be locked!
  */
 static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
+				struct amdgpu_gart *gtt,
+				uint32_t gtt_flags,
 				struct amdgpu_ib *ib,
 				uint64_t pe_start, uint64_t pe_end,
-				uint64_t addr, uint32_t flags,
-				uint32_t gtt_flags)
+				uint64_t addr, uint32_t flags)
 {
 	/**
 	 * The MC L1 TLB supports variable sized pages, based on a fragment
@@ -598,36 +613,39 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 
 	unsigned count;
 
+	/* Abort early if there isn't anything to do */
+	if (pe_start == pe_end)
+		return;
+
 	/* system pages are non continuously */
-	if ((flags & AMDGPU_PTE_SYSTEM) || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end)) {
+	if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
 
 		count = (pe_end - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, ib, pe_start, addr, count,
-				       AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags);
+		amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start,
+				       addr, count, AMDGPU_GPU_PAGE_SIZE,
+				       flags);
 		return;
 	}
 
 	/* handle the 4K area at the beginning */
 	if (pe_start != frag_start) {
 		count = (frag_start - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, ib, pe_start, addr, count,
-				       AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags);
+		amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr,
+				       count, AMDGPU_GPU_PAGE_SIZE, flags);
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 	}
 
 	/* handle the area in the middle */
 	count = (frag_end - frag_start) / 8;
-	amdgpu_vm_update_pages(adev, ib, frag_start, addr, count,
-			       AMDGPU_GPU_PAGE_SIZE, flags | frag_flags,
-			       gtt_flags);
+	amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count,
+			       AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
 
 	/* handle the 4K area at the end */
 	if (frag_end != pe_end) {
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 		count = (pe_end - frag_end) / 8;
-		amdgpu_vm_update_pages(adev, ib, frag_end, addr, count,
-				       AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags);
+		amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr,
+				       count, AMDGPU_GPU_PAGE_SIZE, flags);
 	}
 }
 
@@ -635,122 +653,105 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * amdgpu_vm_update_ptes - make sure that page tables are valid
  *
  * @adev: amdgpu_device pointer
+ * @gtt: GART instance to use for mapping
+ * @gtt_flags: GTT hw mapping flags
  * @vm: requested vm
  * @start: start of GPU address range
  * @end: end of GPU address range
  * @dst: destination address to map to
  * @flags: mapping flags
  *
- * Update the page tables in the range @start - @end (cayman+).
- *
- * Global and local mutex must be locked!
+ * Update the page tables in the range @start - @end.
  */
-static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
-				 struct amdgpu_vm *vm,
-				 struct amdgpu_ib *ib,
-				 uint64_t start, uint64_t end,
-				 uint64_t dst, uint32_t flags,
-				 uint32_t gtt_flags)
+static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
+				  struct amdgpu_gart *gtt,
+				  uint32_t gtt_flags,
+				  struct amdgpu_vm *vm,
+				  struct amdgpu_ib *ib,
+				  uint64_t start, uint64_t end,
+				  uint64_t dst, uint32_t flags)
 {
-	uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
-	uint64_t last_pte = ~0, last_dst = ~0;
-	void *owner = AMDGPU_FENCE_OWNER_VM;
-	unsigned count = 0;
-	uint64_t addr;
+	const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
 
-	/* sync to everything on unmapping */
-	if (!(flags & AMDGPU_PTE_VALID))
-		owner = AMDGPU_FENCE_OWNER_UNDEFINED;
+	uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0;
+	uint64_t addr;
 
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; ) {
 		uint64_t pt_idx = addr >> amdgpu_vm_block_size;
 		struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
 		unsigned nptes;
-		uint64_t pte;
-		int r;
-
-		amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner);
-		r = reservation_object_reserve_shared(pt->tbo.resv);
-		if (r)
-			return r;
+		uint64_t pe_start;
 
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
 		else
 			nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
 
-		pte = amdgpu_bo_gpu_offset(pt);
-		pte += (addr & mask) * 8;
+		pe_start = amdgpu_bo_gpu_offset(pt);
+		pe_start += (addr & mask) * 8;
 
-		if ((last_pte + 8 * count) != pte) {
+		if (last_pe_end != pe_start) {
 
-			if (count) {
-				amdgpu_vm_frag_ptes(adev, ib, last_pte,
-						    last_pte + 8 * count,
-						    last_dst, flags,
-						    gtt_flags);
-			}
+			amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+					    last_pe_start, last_pe_end,
+					    last_dst, flags);
 
-			count = nptes;
-			last_pte = pte;
+			last_pe_start = pe_start;
+			last_pe_end = pe_start + 8 * nptes;
 			last_dst = dst;
 		} else {
-			count += nptes;
+			last_pe_end += 8 * nptes;
 		}
 
 		addr += nptes;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	if (count) {
-		amdgpu_vm_frag_ptes(adev, ib, last_pte,
-				    last_pte + 8 * count,
-				    last_dst, flags, gtt_flags);
-	}
-
-	return 0;
+	amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+			    last_pe_start, last_pe_end,
+			    last_dst, flags);
 }
 
 /**
  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
  *
  * @adev: amdgpu_device pointer
+ * @gtt: GART instance to use for mapping
+ * @gtt_flags: flags as they are used for GTT
  * @vm: requested vm
- * @mapping: mapped range and flags to use for the update
+ * @start: start of mapped range
+ * @last: last mapped entry
+ * @flags: flags for the entries
  * @addr: addr to set the area to
- * @gtt_flags: flags as they are used for GTT
  * @fence: optional resulting fence
  *
- * Fill in the page table entries for @mapping.
+ * Fill in the page table entries between @start and @last.
  * Returns 0 for success, -EINVAL for failure.
- *
- * Object have to be reserved and mutex must be locked!
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+				       struct amdgpu_gart *gtt,
+				       uint32_t gtt_flags,
 				       struct amdgpu_vm *vm,
-				       struct amdgpu_bo_va_mapping *mapping,
-				       uint64_t addr, uint32_t gtt_flags,
+				       uint64_t start, uint64_t last,
+				       uint32_t flags, uint64_t addr,
 				       struct fence **fence)
 {
-	struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
+	struct amdgpu_ring *ring;
+	void *owner = AMDGPU_FENCE_OWNER_VM;
 	unsigned nptes, ncmds, ndw;
-	uint32_t flags = gtt_flags;
+	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
 	struct fence *f = NULL;
 	int r;
 
-	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
-	 * but in case of something, we filter the flags in first place
-	 */
-	if (!(mapping->flags & AMDGPU_PTE_READABLE))
-		flags &= ~AMDGPU_PTE_READABLE;
-	if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
-		flags &= ~AMDGPU_PTE_WRITEABLE;
+	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
-	trace_amdgpu_vm_bo_update(mapping);
+	/* sync to everything on unmapping */
+	if (!(flags & AMDGPU_PTE_VALID))
+		owner = AMDGPU_FENCE_OWNER_UNDEFINED;
 
-	nptes = mapping->it.last - mapping->it.start + 1;
+	nptes = last - start + 1;
 
 	/*
 	 * reserve space for one command every (1 << BLOCK_SIZE)
@@ -761,11 +762,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* padding, etc. */
 	ndw = 64;
 
-	if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) {
+	if ((gtt == &adev->gart) && (flags == gtt_flags)) {
 		/* only copy commands needed */
 		ndw += ncmds * 7;
 
-	} else if (flags & AMDGPU_PTE_SYSTEM) {
+	} else if (gtt) {
 		/* header for write data commands */
 		ndw += ncmds * 4;
 
@@ -780,38 +781,28 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		ndw += 2 * 10;
 	}
 
-	/* update too big for an IB */
-	if (ndw > 0xfffff)
-		return -ENOMEM;
-
-	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
-	if (!ib)
-		return -ENOMEM;
-
-	r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
-	if (r) {
-		kfree(ib);
+	r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
+	if (r)
 		return r;
-	}
 
-	ib->length_dw = 0;
+	ib = &job->ibs[0];
 
-	r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start,
-				  mapping->it.last + 1, addr + mapping->offset,
-				  flags, gtt_flags);
+	r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
+			     owner);
+	if (r)
+		goto error_free;
 
-	if (r) {
-		amdgpu_ib_free(adev, ib);
-		kfree(ib);
-		return r;
-	}
+	r = reservation_object_reserve_shared(vm->page_directory->tbo.resv);
+	if (r)
+		goto error_free;
+
+	amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1,
+			      addr, flags);
 
-	amdgpu_vm_pad_ib(adev, ib);
+	amdgpu_ring_pad_ib(ring, ib);
 	WARN_ON(ib->length_dw > ndw);
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
-						 &amdgpu_vm_free_job,
-						 AMDGPU_FENCE_OWNER_VM,
-						 &f);
+	r = amdgpu_job_submit(job, ring, &vm->entity,
+			      AMDGPU_FENCE_OWNER_VM, &f);
 	if (r)
 		goto error_free;
 
@@ -821,19 +812,76 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		*fence = fence_get(f);
 	}
 	fence_put(f);
-	if (!amdgpu_enable_scheduler) {
-		amdgpu_ib_free(adev, ib);
-		kfree(ib);
-	}
 	return 0;
 
 error_free:
-	amdgpu_ib_free(adev, ib);
-	kfree(ib);
+	amdgpu_job_free(job);
 	return r;
 }
 
 /**
+ * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
+ *
+ * @adev: amdgpu_device pointer
+ * @gtt: GART instance to use for mapping
+ * @vm: requested vm
+ * @mapping: mapped range and flags to use for the update
+ * @addr: addr to set the area to
+ * @gtt_flags: flags as they are used for GTT
+ * @fence: optional resulting fence
+ *
+ * Split the mapping into smaller chunks so that each update fits
+ * into a SDMA IB.
+ * Returns 0 for success, -EINVAL for failure.
+ */
+static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
+				      struct amdgpu_gart *gtt,
+				      uint32_t gtt_flags,
+				      struct amdgpu_vm *vm,
+				      struct amdgpu_bo_va_mapping *mapping,
+				      uint64_t addr, struct fence **fence)
+{
+	const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE;
+
+	uint64_t start = mapping->it.start;
+	uint32_t flags = gtt_flags;
+	int r;
+
+	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
+	 * but in case of something, we filter the flags in first place
+	 */
+	if (!(mapping->flags & AMDGPU_PTE_READABLE))
+		flags &= ~AMDGPU_PTE_READABLE;
+	if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
+		flags &= ~AMDGPU_PTE_WRITEABLE;
+
+	trace_amdgpu_vm_bo_update(mapping);
+
+	addr += mapping->offset;
+
+	if (!gtt || ((gtt == &adev->gart) && (flags == gtt_flags)))
+		return amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
+						   start, mapping->it.last,
+						   flags, addr, fence);
+
+	while (start != mapping->it.last + 1) {
+		uint64_t last;
+
+		last = min((uint64_t)mapping->it.last, start + max_size - 1);
+		r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
+						start, last, flags, addr,
+						fence);
+		if (r)
+			return r;
+
+		start = last + 1;
+		addr += max_size * AMDGPU_GPU_PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+/**
  * amdgpu_vm_bo_update - update all BO mappings in the vm page table
  *
  * @adev: amdgpu_device pointer
@@ -851,14 +899,25 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 {
 	struct amdgpu_vm *vm = bo_va->vm;
 	struct amdgpu_bo_va_mapping *mapping;
+	struct amdgpu_gart *gtt = NULL;
 	uint32_t flags;
 	uint64_t addr;
 	int r;
 
 	if (mem) {
 		addr = (u64)mem->start << PAGE_SHIFT;
-		if (mem->mem_type != TTM_PL_TT)
+		switch (mem->mem_type) {
+		case TTM_PL_TT:
+			gtt = &bo_va->bo->adev->gart;
+			break;
+
+		case TTM_PL_VRAM:
 			addr += adev->vm_manager.vram_base_offset;
+			break;
+
+		default:
+			break;
+		}
 	} else {
 		addr = 0;
 	}
@@ -871,8 +930,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	spin_unlock(&vm->status_lock);
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
-		r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr,
-						flags, &bo_va->last_pt_update);
+		r = amdgpu_vm_bo_split_mapping(adev, gtt, flags, vm, mapping, addr,
+					       &bo_va->last_pt_update);
 		if (r)
 			return r;
 	}
@@ -912,21 +971,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 	struct amdgpu_bo_va_mapping *mapping;
 	int r;
 
-	spin_lock(&vm->freed_lock);
 	while (!list_empty(&vm->freed)) {
 		mapping = list_first_entry(&vm->freed,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
-		spin_unlock(&vm->freed_lock);
-		r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL);
+
+		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping,
+					       0, NULL);
 		kfree(mapping);
 		if (r)
 			return r;
 
-		spin_lock(&vm->freed_lock);
 	}
-	spin_unlock(&vm->freed_lock);
-
 	return 0;
 
 }
@@ -953,9 +1009,8 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
 		bo_va = list_first_entry(&vm->invalidated,
 			struct amdgpu_bo_va, vm_status);
 		spin_unlock(&vm->status_lock);
-		mutex_lock(&bo_va->mutex);
+
 		r = amdgpu_vm_bo_update(adev, bo_va, NULL);
-		mutex_unlock(&bo_va->mutex);
 		if (r)
 			return r;
 
@@ -976,7 +1031,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
  * @vm: requested vm
  * @bo: amdgpu buffer object
  *
- * Add @bo into the requested vm (cayman+).
+ * Add @bo into the requested vm.
  * Add @bo to the list of bos associated with the vm
  * Returns newly added bo_va or NULL for failure
  *
@@ -999,7 +1054,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	INIT_LIST_HEAD(&bo_va->valids);
 	INIT_LIST_HEAD(&bo_va->invalids);
 	INIT_LIST_HEAD(&bo_va->vm_status);
-	mutex_init(&bo_va->mutex);
+
 	list_add_tail(&bo_va->bo_list, &bo->va);
 
 	return bo_va;
@@ -1051,9 +1106,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
-	spin_lock(&vm->it_lock);
 	it = interval_tree_iter_first(&vm->va, saddr, eaddr);
-	spin_unlock(&vm->it_lock);
 	if (it) {
 		struct amdgpu_bo_va_mapping *tmp;
 		tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
@@ -1077,13 +1130,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	mutex_lock(&bo_va->mutex);
 	list_add(&mapping->list, &bo_va->invalids);
-	mutex_unlock(&bo_va->mutex);
-	spin_lock(&vm->it_lock);
 	interval_tree_insert(&mapping->it, &vm->va);
-	spin_unlock(&vm->it_lock);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
 
 	/* Make sure the page tables are allocated */
 	saddr >>= amdgpu_vm_block_size;
@@ -1117,18 +1165,17 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 		 */
 		pt->parent = amdgpu_bo_ref(vm->page_directory);
 
-		r = amdgpu_vm_clear_bo(adev, pt);
+		r = amdgpu_vm_clear_bo(adev, vm, pt);
 		if (r) {
 			amdgpu_bo_unref(&pt);
 			goto error_free;
 		}
 
 		entry->robj = pt;
-		entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
-		entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
 		entry->priority = 0;
 		entry->tv.bo = &entry->robj->tbo;
 		entry->tv.shared = true;
+		entry->user_pages = NULL;
 		vm->page_tables[pt_idx].addr = 0;
 	}
 
@@ -1136,9 +1183,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 
 error_free:
 	list_del(&mapping->list);
-	spin_lock(&vm->it_lock);
 	interval_tree_remove(&mapping->it, &vm->va);
-	spin_unlock(&vm->it_lock);
 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 	kfree(mapping);
 
@@ -1167,7 +1212,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 	bool valid = true;
 
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
-	mutex_lock(&bo_va->mutex);
+
 	list_for_each_entry(mapping, &bo_va->valids, list) {
 		if (mapping->it.start == saddr)
 			break;
@@ -1181,25 +1226,18 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 				break;
 		}
 
-		if (&mapping->list == &bo_va->invalids) {
-			mutex_unlock(&bo_va->mutex);
+		if (&mapping->list == &bo_va->invalids)
 			return -ENOENT;
-		}
 	}
-	mutex_unlock(&bo_va->mutex);
+
 	list_del(&mapping->list);
-	spin_lock(&vm->it_lock);
 	interval_tree_remove(&mapping->it, &vm->va);
-	spin_unlock(&vm->it_lock);
 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 
-	if (valid) {
-		spin_lock(&vm->freed_lock);
+	if (valid)
 		list_add(&mapping->list, &vm->freed);
-		spin_unlock(&vm->freed_lock);
-	} else {
+	else
 		kfree(mapping);
-	}
 
 	return 0;
 }
@@ -1210,7 +1248,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
  * @adev: amdgpu_device pointer
  * @bo_va: requested bo_va
  *
- * Remove @bo_va->bo from the requested vm (cayman+).
+ * Remove @bo_va->bo from the requested vm.
  *
  * Object have to be reserved!
  */
@@ -1228,23 +1266,17 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
 		list_del(&mapping->list);
-		spin_lock(&vm->it_lock);
 		interval_tree_remove(&mapping->it, &vm->va);
-		spin_unlock(&vm->it_lock);
 		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
-		spin_lock(&vm->freed_lock);
 		list_add(&mapping->list, &vm->freed);
-		spin_unlock(&vm->freed_lock);
 	}
 	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
 		list_del(&mapping->list);
-		spin_lock(&vm->it_lock);
 		interval_tree_remove(&mapping->it, &vm->va);
-		spin_unlock(&vm->it_lock);
 		kfree(mapping);
 	}
+
 	fence_put(bo_va->last_pt_update);
-	mutex_destroy(&bo_va->mutex);
 	kfree(bo_va);
 }
 
@@ -1255,7 +1287,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
  * @vm: requested vm
  * @bo: amdgpu buffer object
  *
- * Mark @bo as invalid (cayman+).
+ * Mark @bo as invalid.
  */
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 			     struct amdgpu_bo *bo)
@@ -1276,17 +1308,20 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
  * @adev: amdgpu_device pointer
  * @vm: requested vm
  *
- * Init @vm fields (cayman+).
+ * Init @vm fields.
  */
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT * 8);
 	unsigned pd_size, pd_entries;
+	unsigned ring_instance;
+	struct amdgpu_ring *ring;
+	struct amd_sched_rq *rq;
 	int i, r;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		vm->ids[i].id = 0;
+		vm->ids[i].mgr_id = NULL;
 		vm->ids[i].flushed_updates = NULL;
 	}
 	vm->va = RB_ROOT;
@@ -1294,8 +1329,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->cleared);
 	INIT_LIST_HEAD(&vm->freed);
-	spin_lock_init(&vm->it_lock);
-	spin_lock_init(&vm->freed_lock);
+
 	pd_size = amdgpu_vm_directory_size(adev);
 	pd_entries = amdgpu_vm_num_pdes(adev);
 
@@ -1306,6 +1340,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		return -ENOMEM;
 	}
 
+	/* create scheduler entity for page table updates */
+
+	ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
+	ring_instance %= adev->vm_manager.vm_pte_num_rings;
+	ring = adev->vm_manager.vm_pte_rings[ring_instance];
+	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
+	r = amd_sched_entity_init(&ring->sched, &vm->entity,
+				  rq, amdgpu_sched_jobs);
+	if (r)
+		return r;
+
 	vm->page_directory_fence = NULL;
 
 	r = amdgpu_bo_create(adev, pd_size, align, true,
@@ -1313,22 +1358,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 			     AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
 			     NULL, NULL, &vm->page_directory);
 	if (r)
-		return r;
+		goto error_free_sched_entity;
+
 	r = amdgpu_bo_reserve(vm->page_directory, false);
-	if (r) {
-		amdgpu_bo_unref(&vm->page_directory);
-		vm->page_directory = NULL;
-		return r;
-	}
-	r = amdgpu_vm_clear_bo(adev, vm->page_directory);
+	if (r)
+		goto error_free_page_directory;
+
+	r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory);
 	amdgpu_bo_unreserve(vm->page_directory);
-	if (r) {
-		amdgpu_bo_unref(&vm->page_directory);
-		vm->page_directory = NULL;
-		return r;
-	}
+	if (r)
+		goto error_free_page_directory;
 
 	return 0;
+
+error_free_page_directory:
+	amdgpu_bo_unref(&vm->page_directory);
+	vm->page_directory = NULL;
+
+error_free_sched_entity:
+	amd_sched_entity_fini(&ring->sched, &vm->entity);
+
+	return r;
 }
 
 /**
@@ -1337,7 +1387,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  * @adev: amdgpu_device pointer
  * @vm: requested vm
  *
- * Tear down @vm (cayman+).
+ * Tear down @vm.
  * Unbind the VM and remove all bos from the vm bo list
  */
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
@@ -1345,6 +1395,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	int i;
 
+	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
+
 	if (!RB_EMPTY_ROOT(&vm->va)) {
 		dev_err(adev->dev, "still active bo inside vm\n");
 	}
@@ -1364,14 +1416,38 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	amdgpu_bo_unref(&vm->page_directory);
 	fence_put(vm->page_directory_fence);
+
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		unsigned id = vm->ids[i].id;
+		struct amdgpu_vm_id *id = &vm->ids[i];
 
-		atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner,
-				    (long)vm, 0);
-		fence_put(vm->ids[i].flushed_updates);
+		if (id->mgr_id)
+			atomic_long_cmpxchg(&id->mgr_id->owner,
+					    (long)id, 0);
+		fence_put(id->flushed_updates);
+	}
+}
+
+/**
+ * amdgpu_vm_manager_init - init the VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize the VM manager structures
+ */
+void amdgpu_vm_manager_init(struct amdgpu_device *adev)
+{
+	unsigned i;
+
+	INIT_LIST_HEAD(&adev->vm_manager.ids_lru);
+
+	/* skip over VMID 0, since it is the system VM */
+	for (i = 1; i < adev->vm_manager.num_ids; ++i) {
+		amdgpu_vm_reset_id(adev, i);
+		list_add_tail(&adev->vm_manager.ids[i].list,
+			      &adev->vm_manager.ids_lru);
 	}
 
+	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 474ca02b0949..1f9109d3348b 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -3017,7 +3017,6 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
 						      &memory_level->MinVddcPhases);
 
 	memory_level->EnabledForThrottle = 1;
-	memory_level->EnabledForActivity = 1;
 	memory_level->UpH = 0;
 	memory_level->DownH = 100;
 	memory_level->VoltageDownH = 0;
@@ -3376,7 +3375,6 @@ static int ci_populate_single_graphic_level(struct amdgpu_device *adev,
 	graphic_level->SpllSpreadSpectrum2 = cpu_to_be32(graphic_level->SpllSpreadSpectrum2);
 	graphic_level->CcPwrDynRm = cpu_to_be32(graphic_level->CcPwrDynRm);
 	graphic_level->CcPwrDynRm1 = cpu_to_be32(graphic_level->CcPwrDynRm1);
-	graphic_level->EnabledForActivity = 1;
 
 	return 0;
 }
@@ -3407,6 +3405,7 @@ static int ci_populate_all_graphic_levels(struct amdgpu_device *adev)
 			pi->smc_state_table.GraphicsLevel[i].DisplayWatermark =
 				PPSMC_DISPLAY_WATERMARK_HIGH;
 	}
+	pi->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1;
 
 	pi->smc_state_table.GraphicsDpmLevelCount = (u8)dpm_table->sclk_table.count;
 	pi->dpm_level_enable_mask.sclk_dpm_enable_mask =
@@ -3450,6 +3449,8 @@ static int ci_populate_all_memory_levels(struct amdgpu_device *adev)
 			return ret;
 	}
 
+	pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1;
+
 	if ((dpm_table->mclk_table.count >= 2) &&
 	    ((adev->pdev->device == 0x67B0) || (adev->pdev->device == 0x67B1))) {
 		pi->smc_state_table.MemoryLevel[1].MinVddc =
@@ -4381,26 +4382,6 @@ static int ci_dpm_force_performance_level(struct amdgpu_device *adev,
 				}
 			}
 		}
-		if ((!pi->pcie_dpm_key_disabled) &&
-		    pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
-			levels = 0;
-			tmp = pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
-			while (tmp >>= 1)
-				levels++;
-			if (levels) {
-				ret = ci_dpm_force_state_pcie(adev, level);
-				if (ret)
-					return ret;
-				for (i = 0; i < adev->usec_timeout; i++) {
-					tmp = (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX_1) &
-					TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX_MASK) >>
-					TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX__SHIFT;
-					if (tmp == levels)
-						break;
-					udelay(1);
-				}
-			}
-		}
 	} else if (level == AMDGPU_DPM_FORCED_LEVEL_LOW) {
 		if ((!pi->sclk_dpm_key_disabled) &&
 		    pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
@@ -5395,30 +5376,6 @@ static int ci_dpm_enable(struct amdgpu_device *adev)
 
 	ci_update_current_ps(adev, boot_ps);
 
-	if (adev->irq.installed &&
-	    amdgpu_is_internal_thermal_sensor(adev->pm.int_thermal_type)) {
-#if 0
-		PPSMC_Result result;
-#endif
-		ret = ci_thermal_set_temperature_range(adev, CISLANDS_TEMP_RANGE_MIN,
-						       CISLANDS_TEMP_RANGE_MAX);
-		if (ret) {
-			DRM_ERROR("ci_thermal_set_temperature_range failed\n");
-			return ret;
-		}
-		amdgpu_irq_get(adev, &adev->pm.dpm.thermal.irq,
-			       AMDGPU_THERMAL_IRQ_LOW_TO_HIGH);
-		amdgpu_irq_get(adev, &adev->pm.dpm.thermal.irq,
-			       AMDGPU_THERMAL_IRQ_HIGH_TO_LOW);
-
-#if 0
-		result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_EnableThermalInterrupt);
-
-		if (result != PPSMC_Result_OK)
-			DRM_DEBUG_KMS("Could not enable thermal interrupts.\n");
-#endif
-	}
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 155965ed14a3..bddc9ba11495 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1059,257 +1059,6 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num,
 	return -EINVAL;
 }
 
-static void cik_print_gpu_status_regs(struct amdgpu_device *adev)
-{
-	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
-		RREG32(mmGRBM_STATUS));
-	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
-		RREG32(mmGRBM_STATUS2));
-	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
-		RREG32(mmGRBM_STATUS_SE0));
-	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
-		RREG32(mmGRBM_STATUS_SE1));
-	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
-		RREG32(mmGRBM_STATUS_SE2));
-	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
-		RREG32(mmGRBM_STATUS_SE3));
-	dev_info(adev->dev, "  SRBM_STATUS=0x%08X\n",
-		RREG32(mmSRBM_STATUS));
-	dev_info(adev->dev, "  SRBM_STATUS2=0x%08X\n",
-		RREG32(mmSRBM_STATUS2));
-	dev_info(adev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
-		RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
-	dev_info(adev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
-		 RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
-	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
-	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
-		 RREG32(mmCP_STALLED_STAT1));
-	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
-		 RREG32(mmCP_STALLED_STAT2));
-	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
-		 RREG32(mmCP_STALLED_STAT3));
-	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
-		 RREG32(mmCP_CPF_BUSY_STAT));
-	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
-		 RREG32(mmCP_CPF_STALLED_STAT1));
-	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
-	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
-	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
-		 RREG32(mmCP_CPC_STALLED_STAT1));
-	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
-}
-
-/**
- * cik_gpu_check_soft_reset - check which blocks are busy
- *
- * @adev: amdgpu_device pointer
- *
- * Check which blocks are busy and return the relevant reset
- * mask to be used by cik_gpu_soft_reset().
- * Returns a mask of the blocks to be reset.
- */
-u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev)
-{
-	u32 reset_mask = 0;
-	u32 tmp;
-
-	/* GRBM_STATUS */
-	tmp = RREG32(mmGRBM_STATUS);
-	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
-		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
-		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
-		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
-		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
-		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
-		reset_mask |= AMDGPU_RESET_GFX;
-
-	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK))
-		reset_mask |= AMDGPU_RESET_CP;
-
-	/* GRBM_STATUS2 */
-	tmp = RREG32(mmGRBM_STATUS2);
-	if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_RLC;
-
-	/* SDMA0_STATUS_REG */
-	tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
-	if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
-		reset_mask |= AMDGPU_RESET_DMA;
-
-	/* SDMA1_STATUS_REG */
-	tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
-	if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
-		reset_mask |= AMDGPU_RESET_DMA1;
-
-	/* SRBM_STATUS2 */
-	tmp = RREG32(mmSRBM_STATUS2);
-	if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_DMA;
-
-	if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_DMA1;
-
-	/* SRBM_STATUS */
-	tmp = RREG32(mmSRBM_STATUS);
-
-	if (tmp & SRBM_STATUS__IH_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_IH;
-
-	if (tmp & SRBM_STATUS__SEM_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_SEM;
-
-	if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
-		reset_mask |= AMDGPU_RESET_GRBM;
-
-	if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_VMC;
-
-	if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
-		   SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK))
-		reset_mask |= AMDGPU_RESET_MC;
-
-	if (amdgpu_display_is_display_hung(adev))
-		reset_mask |= AMDGPU_RESET_DISPLAY;
-
-	/* Skip MC reset as it's mostly likely not hung, just busy */
-	if (reset_mask & AMDGPU_RESET_MC) {
-		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
-		reset_mask &= ~AMDGPU_RESET_MC;
-	}
-
-	return reset_mask;
-}
-
-/**
- * cik_gpu_soft_reset - soft reset GPU
- *
- * @adev: amdgpu_device pointer
- * @reset_mask: mask of which blocks to reset
- *
- * Soft reset the blocks specified in @reset_mask.
- */
-static void cik_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask)
-{
-	struct amdgpu_mode_mc_save save;
-	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
-	u32 tmp;
-
-	if (reset_mask == 0)
-		return;
-
-	dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask);
-
-	cik_print_gpu_status_regs(adev);
-	dev_info(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
-		 RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR));
-	dev_info(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
-		 RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS));
-
-	/* disable CG/PG */
-
-	/* stop the rlc */
-	gfx_v7_0_rlc_stop(adev);
-
-	/* Disable GFX parsing/prefetching */
-	WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
-
-	/* Disable MEC parsing/prefetching */
-	WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
-
-	if (reset_mask & AMDGPU_RESET_DMA) {
-		/* sdma0 */
-		tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
-		tmp |= SDMA0_F32_CNTL__HALT_MASK;
-		WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
-	}
-	if (reset_mask & AMDGPU_RESET_DMA1) {
-		/* sdma1 */
-		tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
-		tmp |= SDMA0_F32_CNTL__HALT_MASK;
-		WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
-	}
-
-	gmc_v7_0_mc_stop(adev, &save);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
-		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
-	}
-
-	if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP))
-		grbm_soft_reset = GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
-			GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
-
-	if (reset_mask & AMDGPU_RESET_CP) {
-		grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
-
-		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
-	}
-
-	if (reset_mask & AMDGPU_RESET_DMA)
-		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
-
-	if (reset_mask & AMDGPU_RESET_DMA1)
-		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
-
-	if (reset_mask & AMDGPU_RESET_DISPLAY)
-		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
-
-	if (reset_mask & AMDGPU_RESET_RLC)
-		grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
-
-	if (reset_mask & AMDGPU_RESET_SEM)
-		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SEM_MASK;
-
-	if (reset_mask & AMDGPU_RESET_IH)
-		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK;
-
-	if (reset_mask & AMDGPU_RESET_GRBM)
-		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
-
-	if (reset_mask & AMDGPU_RESET_VMC)
-		srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_VMC_MASK;
-
-	if (!(adev->flags & AMD_IS_APU)) {
-		if (reset_mask & AMDGPU_RESET_MC)
-			srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_MC_MASK;
-	}
-
-	if (grbm_soft_reset) {
-		tmp = RREG32(mmGRBM_SOFT_RESET);
-		tmp |= grbm_soft_reset;
-		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
-		WREG32(mmGRBM_SOFT_RESET, tmp);
-		tmp = RREG32(mmGRBM_SOFT_RESET);
-
-		udelay(50);
-
-		tmp &= ~grbm_soft_reset;
-		WREG32(mmGRBM_SOFT_RESET, tmp);
-		tmp = RREG32(mmGRBM_SOFT_RESET);
-	}
-
-	if (srbm_soft_reset) {
-		tmp = RREG32(mmSRBM_SOFT_RESET);
-		tmp |= srbm_soft_reset;
-		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
-		WREG32(mmSRBM_SOFT_RESET, tmp);
-		tmp = RREG32(mmSRBM_SOFT_RESET);
-
-		udelay(50);
-
-		tmp &= ~srbm_soft_reset;
-		WREG32(mmSRBM_SOFT_RESET, tmp);
-		tmp = RREG32(mmSRBM_SOFT_RESET);
-	}
-
-	/* Wait a little for things to settle down */
-	udelay(50);
-
-	gmc_v7_0_mc_resume(adev, &save);
-	udelay(50);
-
-	cik_print_gpu_status_regs(adev);
-}
-
 struct kv_reset_save_regs {
 	u32 gmcon_reng_execute;
 	u32 gmcon_misc;
@@ -1405,45 +1154,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev,
 
 static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 {
-	struct amdgpu_mode_mc_save save;
 	struct kv_reset_save_regs kv_save = { 0 };
-	u32 tmp, i;
+	u32 i;
 
 	dev_info(adev->dev, "GPU pci config reset\n");
 
-	/* disable dpm? */
-
-	/* disable cg/pg */
-
-	/* Disable GFX parsing/prefetching */
-	WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK |
-		CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
-
-	/* Disable MEC parsing/prefetching */
-	WREG32(mmCP_MEC_CNTL,
-			CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
-
-	/* sdma0 */
-	tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
-	tmp |= SDMA0_F32_CNTL__HALT_MASK;
-	WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
-	/* sdma1 */
-	tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
-	tmp |= SDMA0_F32_CNTL__HALT_MASK;
-	WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
-	/* XXX other engines? */
-
-	/* halt the rlc, disable cp internal ints */
-	gfx_v7_0_rlc_stop(adev);
-
-	udelay(50);
-
-	/* disable mem access */
-	gmc_v7_0_mc_stop(adev, &save);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
-		dev_warn(adev->dev, "Wait for MC idle timed out !\n");
-	}
-
 	if (adev->flags & AMD_IS_APU)
 		kv_save_regs_for_reset(adev, &kv_save);
 
@@ -1489,26 +1204,11 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu
  */
 static int cik_asic_reset(struct amdgpu_device *adev)
 {
-	u32 reset_mask;
-
-	reset_mask = amdgpu_cik_gpu_check_soft_reset(adev);
+	cik_set_bios_scratch_engine_hung(adev, true);
 
-	if (reset_mask)
-		cik_set_bios_scratch_engine_hung(adev, true);
+	cik_gpu_pci_config_reset(adev);
 
-	/* try soft reset */
-	cik_gpu_soft_reset(adev, reset_mask);
-
-	reset_mask = amdgpu_cik_gpu_check_soft_reset(adev);
-
-	/* try pci config reset */
-	if (reset_mask && amdgpu_hard_reset)
-		cik_gpu_pci_config_reset(adev);
-
-	reset_mask = amdgpu_cik_gpu_check_soft_reset(adev);
-
-	if (!reset_mask)
-		cik_set_bios_scratch_engine_hung(adev, false);
+	cik_set_bios_scratch_engine_hung(adev, false);
 
 	return 0;
 }
@@ -2328,8 +2028,6 @@ static int cik_common_early_init(void *handle)
 
 	adev->asic_funcs = &cik_asic_funcs;
 
-	adev->has_uvd = true;
-
 	adev->rev_id = cik_get_rev_id(adev);
 	adev->external_rev_id = 0xFF;
 	switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c55ecf0ea845..d3ac3298fba8 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -212,7 +212,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
 			   struct amdgpu_ib *ib)
 {
-	u32 extra_bits = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+	u32 extra_bits = ib->vm_id & 0xf;
 	u32 next_rptr = ring->wptr + 5;
 
 	while ((next_rptr & 7) != 4)
@@ -261,6 +261,13 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
 }
 
+static void cik_sdma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	amdgpu_ring_write(ring, mmHDP_DEBUG0);
+	amdgpu_ring_write(ring, 1);
+}
+
 /**
  * cik_sdma_ring_emit_fence - emit a fence on the DMA ring
  *
@@ -295,30 +302,6 @@ static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
- * cik_sdma_ring_emit_semaphore - emit a semaphore on the dma ring
- *
- * @ring: amdgpu_ring structure holding ring information
- * @semaphore: amdgpu semaphore object
- * @emit_wait: wait or signal semaphore
- *
- * Add a DMA semaphore packet to the ring wait on or signal
- * other rings (CIK).
- */
-static bool cik_sdma_ring_emit_semaphore(struct amdgpu_ring *ring,
-					 struct amdgpu_semaphore *semaphore,
-					 bool emit_wait)
-{
-	u64 addr = semaphore->gpu_addr;
-	u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
-
-	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
-	amdgpu_ring_write(ring, addr & 0xfffffff8);
-	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
-
-	return true;
-}
-
-/**
  * cik_sdma_gfx_stop - stop the gfx async dma engines
  *
  * @adev: amdgpu_device pointer
@@ -417,6 +400,9 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
 		cik_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 
+		WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
+		       adev->gfx.config.gb_addr_config & 0x70);
+
 		WREG32(mmSDMA0_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
 		WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
 
@@ -584,7 +570,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 
-	r = amdgpu_ring_lock(ring, 5);
+	r = amdgpu_ring_alloc(ring, 5);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
 		amdgpu_wb_free(adev, index);
@@ -595,7 +581,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
 	amdgpu_ring_write(ring, 1); /* number of DWs to follow */
 	amdgpu_ring_write(ring, 0xDEADBEEF);
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		tmp = le32_to_cpu(adev->wb.wb[index]);
@@ -645,7 +631,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(ring, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 		goto err0;
@@ -657,9 +643,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[3] = 1;
 	ib.ptr[4] = 0xDEADBEEF;
 	ib.length_dw = 5;
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
-						 AMDGPU_FENCE_OWNER_UNDEFINED,
-						 &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r)
 		goto err1;
 
@@ -685,7 +669,8 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 
 err1:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
@@ -738,7 +723,7 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
  * Update PTEs by writing them manually using sDMA (CIK).
  */
 static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib,
-				  uint64_t pe,
+				  const dma_addr_t *pages_addr, uint64_t pe,
 				  uint64_t addr, unsigned count,
 				  uint32_t incr, uint32_t flags)
 {
@@ -757,14 +742,7 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib,
 		ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 		ib->ptr[ib->length_dw++] = ndw;
 		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-			if (flags & AMDGPU_PTE_SYSTEM) {
-				value = amdgpu_vm_map_gart(ib->ring->adev, addr);
-				value &= 0xFFFFFFFFFFFFF000ULL;
-			} else if (flags & AMDGPU_PTE_VALID) {
-				value = addr;
-			} else {
-				value = 0;
-			}
+			value = amdgpu_vm_map_gart(pages_addr, addr);
 			addr += incr;
 			value |= flags;
 			ib->ptr[ib->length_dw++] = value;
@@ -827,9 +805,9 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
  * @ib: indirect buffer to fill with padding
  *
  */
-static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib)
+static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
 {
-	struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring);
+	struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
 	u32 pad_count;
 	int i;
 
@@ -845,6 +823,30 @@ static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib)
 }
 
 /**
+ * cik_sdma_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	/* wait for idle */
+	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0,
+					    SDMA_POLL_REG_MEM_EXTRA_OP(0) |
+					    SDMA_POLL_REG_MEM_EXTRA_FUNC(3) | /* equal */
+					    SDMA_POLL_REG_MEM_EXTRA_M));
+	amdgpu_ring_write(ring, addr & 0xfffffffc);
+	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	amdgpu_ring_write(ring, seq); /* reference */
+	amdgpu_ring_write(ring, 0xfffffff); /* mask */
+	amdgpu_ring_write(ring, (0xfff << 16) | 4); /* retry count, poll interval */
+}
+
+/**
  * cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA
  *
  * @ring: amdgpu_ring pointer
@@ -1097,6 +1099,8 @@ static void cik_sdma_print_status(void *handle)
 			 i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i]));
 		dev_info(adev->dev, "  SDMA%d_GFX_RB_BASE_HI=0x%08X\n",
 			 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
+		dev_info(adev->dev, "  SDMA%d_TILING_CONFIG=0x%08X\n",
+			 i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i]));
 		mutex_lock(&adev->srbm_mutex);
 		for (j = 0; j < 16; j++) {
 			cik_srbm_select(adev, 0, 0, 0, j);
@@ -1297,12 +1301,14 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
 	.parse_cs = NULL,
 	.emit_ib = cik_sdma_ring_emit_ib,
 	.emit_fence = cik_sdma_ring_emit_fence,
-	.emit_semaphore = cik_sdma_ring_emit_semaphore,
+	.emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
 	.emit_vm_flush = cik_sdma_ring_emit_vm_flush,
 	.emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = cik_sdma_ring_emit_hdp_invalidate,
 	.test_ring = cik_sdma_ring_test_ring,
 	.test_ib = cik_sdma_ring_test_ib,
 	.insert_nop = cik_sdma_ring_insert_nop,
+	.pad_ib = cik_sdma_ring_pad_ib,
 };
 
 static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
@@ -1399,14 +1405,18 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
 	.copy_pte = cik_sdma_vm_copy_pte,
 	.write_pte = cik_sdma_vm_write_pte,
 	.set_pte_pde = cik_sdma_vm_set_pte_pde,
-	.pad_ib = cik_sdma_vm_pad_ib,
 };
 
 static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
+	unsigned i;
+
 	if (adev->vm_manager.vm_pte_funcs == NULL) {
 		adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
-		adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
-		adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true;
+		for (i = 0; i < adev->sdma.num_instances; i++)
+			adev->vm_manager.vm_pte_rings[i] =
+				&adev->sdma.instance[i].ring;
+
+		adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 7f6d457f250a..60d4493206dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -46,9 +46,6 @@
 #define BONAIRE_GB_ADDR_CONFIG_GOLDEN        0x12010001
 #define HAWAII_GB_ADDR_CONFIG_GOLDEN         0x12011003
 
-#define CIK_RB_BITMAP_WIDTH_PER_SH     2
-#define HAWAII_RB_BITMAP_WIDTH_PER_SH  4
-
 #define AMDGPU_NUM_OF_VMIDS	8
 
 #define		PIPEID(x)					((x) << 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 093599aba64b..6de2ce535e37 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1668,6 +1668,9 @@ static void dce_v10_0_audio_fini(struct amdgpu_device *adev)
 {
 	int i;
 
+	if (!amdgpu_audio)
+		return;
+
 	if (!adev->mode_info.audio.enabled)
 		return;
 
@@ -1973,7 +1976,7 @@ static void dce_v10_0_afmt_enable(struct drm_encoder *encoder, bool enable)
 		  enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
 }
 
-static void dce_v10_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v10_0_afmt_init(struct amdgpu_device *adev)
 {
 	int i;
 
@@ -1986,8 +1989,16 @@ static void dce_v10_0_afmt_init(struct amdgpu_device *adev)
 		if (adev->mode_info.afmt[i]) {
 			adev->mode_info.afmt[i]->offset = dig_offsets[i];
 			adev->mode_info.afmt[i]->id = i;
+		} else {
+			int j;
+			for (j = 0; j < i; j++) {
+				kfree(adev->mode_info.afmt[j]);
+				adev->mode_info.afmt[j] = NULL;
+			}
+			return -ENOMEM;
 		}
 	}
+	return 0;
 }
 
 static void dce_v10_0_afmt_fini(struct amdgpu_device *adev)
@@ -2064,8 +2075,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (atomic) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		target_fb = fb;
-	}
-	else {
+	} else {
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		target_fb = crtc->primary->fb;
 	}
@@ -2079,9 +2089,9 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (unlikely(r != 0))
 		return r;
 
-	if (atomic)
+	if (atomic) {
 		fb_location = amdgpu_bo_gpu_offset(rbo);
-	else {
+	} else {
 		r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
 		if (unlikely(r != 0)) {
 			amdgpu_bo_unreserve(rbo);
@@ -2670,7 +2680,6 @@ static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc)
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 
 	drm_crtc_cleanup(crtc);
-	destroy_workqueue(amdgpu_crtc->pflip_queue);
 	kfree(amdgpu_crtc);
 }
 
@@ -2701,13 +2710,13 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
-		drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_on(dev, amdgpu_crtc->crtc_id);
 		dce_v10_0_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_off(dev, amdgpu_crtc->crtc_id);
 		if (amdgpu_crtc->enabled) {
 			dce_v10_0_vga_enable(crtc, true);
 			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2890,7 +2899,6 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
 
 	drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
 	amdgpu_crtc->crtc_id = index;
-	amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
 	adev->mode_info.crtcs[index] = amdgpu_crtc;
 
 	amdgpu_crtc->max_cursor_width = 128;
@@ -2982,8 +2990,6 @@ static int dce_v10_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	adev->mode_info.mode_config_initialized = true;
-
 	adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
 
 	adev->ddev->mode_config.max_width = 16384;
@@ -3014,7 +3020,9 @@ static int dce_v10_0_sw_init(void *handle)
 		return -EINVAL;
 
 	/* setup afmt */
-	dce_v10_0_afmt_init(adev);
+	r = dce_v10_0_afmt_init(adev);
+	if (r)
+		return r;
 
 	r = dce_v10_0_audio_init(adev);
 	if (r)
@@ -3022,7 +3030,8 @@ static int dce_v10_0_sw_init(void *handle)
 
 	drm_kms_helper_poll_init(adev->ddev);
 
-	return r;
+	adev->mode_info.mode_config_initialized = true;
+	return 0;
 }
 
 static int dce_v10_0_sw_fini(void *handle)
@@ -3366,7 +3375,7 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
 	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
 
 	drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
-	queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
+	schedule_work(&works->unpin_work);
 
 	return 0;
 }
@@ -3624,16 +3633,8 @@ dce_v10_0_ext_dpms(struct drm_encoder *encoder, int mode)
 
 }
 
-static bool dce_v10_0_ext_mode_fixup(struct drm_encoder *encoder,
-				    const struct drm_display_mode *mode,
-				    struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static const struct drm_encoder_helper_funcs dce_v10_0_ext_helper_funcs = {
 	.dpms = dce_v10_0_ext_dpms,
-	.mode_fixup = dce_v10_0_ext_mode_fixup,
 	.prepare = dce_v10_0_ext_prepare,
 	.mode_set = dce_v10_0_ext_mode_set,
 	.commit = dce_v10_0_ext_commit,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 8e67249d4367..e9ccc6b787f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1658,6 +1658,9 @@ static void dce_v11_0_audio_fini(struct amdgpu_device *adev)
 {
 	int i;
 
+	if (!amdgpu_audio)
+		return;
+
 	if (!adev->mode_info.audio.enabled)
 		return;
 
@@ -1963,7 +1966,7 @@ static void dce_v11_0_afmt_enable(struct drm_encoder *encoder, bool enable)
 		  enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
 }
 
-static void dce_v11_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v11_0_afmt_init(struct amdgpu_device *adev)
 {
 	int i;
 
@@ -1976,8 +1979,16 @@ static void dce_v11_0_afmt_init(struct amdgpu_device *adev)
 		if (adev->mode_info.afmt[i]) {
 			adev->mode_info.afmt[i]->offset = dig_offsets[i];
 			adev->mode_info.afmt[i]->id = i;
+		} else {
+			int j;
+			for (j = 0; j < i; j++) {
+				kfree(adev->mode_info.afmt[j]);
+				adev->mode_info.afmt[j] = NULL;
+			}
+			return -ENOMEM;
 		}
 	}
+	return 0;
 }
 
 static void dce_v11_0_afmt_fini(struct amdgpu_device *adev)
@@ -2054,8 +2065,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (atomic) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		target_fb = fb;
-	}
-	else {
+	} else {
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		target_fb = crtc->primary->fb;
 	}
@@ -2069,9 +2079,9 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (unlikely(r != 0))
 		return r;
 
-	if (atomic)
+	if (atomic) {
 		fb_location = amdgpu_bo_gpu_offset(rbo);
-	else {
+	} else {
 		r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
 		if (unlikely(r != 0)) {
 			amdgpu_bo_unreserve(rbo);
@@ -2661,7 +2671,6 @@ static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 
 	drm_crtc_cleanup(crtc);
-	destroy_workqueue(amdgpu_crtc->pflip_queue);
 	kfree(amdgpu_crtc);
 }
 
@@ -2692,13 +2701,13 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
-		drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_on(dev, amdgpu_crtc->crtc_id);
 		dce_v11_0_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_off(dev, amdgpu_crtc->crtc_id);
 		if (amdgpu_crtc->enabled) {
 			dce_v11_0_vga_enable(crtc, true);
 			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2881,7 +2890,6 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
 
 	drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
 	amdgpu_crtc->crtc_id = index;
-	amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
 	adev->mode_info.crtcs[index] = amdgpu_crtc;
 
 	amdgpu_crtc->max_cursor_width = 128;
@@ -2963,7 +2971,7 @@ static int dce_v11_0_sw_init(void *handle)
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq);
 		if (r)
-		return r;
+			return r;
 	}
 
 	for (i = 8; i < 20; i += 2) {
@@ -2975,9 +2983,7 @@ static int dce_v11_0_sw_init(void *handle)
 	/* HPD hotplug */
 	r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq);
 	if (r)
-	return r;
-
-	adev->mode_info.mode_config_initialized = true;
+		return r;
 
 	adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
 
@@ -2996,6 +3002,7 @@ static int dce_v11_0_sw_init(void *handle)
 	adev->ddev->mode_config.max_width = 16384;
 	adev->ddev->mode_config.max_height = 16384;
 
+
 	/* allocate crtcs */
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		r = dce_v11_0_crtc_init(adev, i);
@@ -3009,7 +3016,9 @@ static int dce_v11_0_sw_init(void *handle)
 		return -EINVAL;
 
 	/* setup afmt */
-	dce_v11_0_afmt_init(adev);
+	r = dce_v11_0_afmt_init(adev);
+	if (r)
+		return r;
 
 	r = dce_v11_0_audio_init(adev);
 	if (r)
@@ -3017,7 +3026,8 @@ static int dce_v11_0_sw_init(void *handle)
 
 	drm_kms_helper_poll_init(adev->ddev);
 
-	return r;
+	adev->mode_info.mode_config_initialized = true;
+	return 0;
 }
 
 static int dce_v11_0_sw_fini(void *handle)
@@ -3361,7 +3371,7 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
 	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
 
 	drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
-	queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
+	schedule_work(&works->unpin_work);
 
 	return 0;
 }
@@ -3619,16 +3629,8 @@ dce_v11_0_ext_dpms(struct drm_encoder *encoder, int mode)
 
 }
 
-static bool dce_v11_0_ext_mode_fixup(struct drm_encoder *encoder,
-				    const struct drm_display_mode *mode,
-				    struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static const struct drm_encoder_helper_funcs dce_v11_0_ext_helper_funcs = {
 	.dpms = dce_v11_0_ext_dpms,
-	.mode_fixup = dce_v11_0_ext_mode_fixup,
 	.prepare = dce_v11_0_ext_prepare,
 	.mode_set = dce_v11_0_ext_mode_set,
 	.commit = dce_v11_0_ext_commit,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index d0e128c24813..e56b55d8c280 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1639,6 +1639,9 @@ static void dce_v8_0_audio_fini(struct amdgpu_device *adev)
 {
 	int i;
 
+	if (!amdgpu_audio)
+		return;
+
 	if (!adev->mode_info.audio.enabled)
 		return;
 
@@ -1910,7 +1913,7 @@ static void dce_v8_0_afmt_enable(struct drm_encoder *encoder, bool enable)
 		  enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
 }
 
-static void dce_v8_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v8_0_afmt_init(struct amdgpu_device *adev)
 {
 	int i;
 
@@ -1923,8 +1926,16 @@ static void dce_v8_0_afmt_init(struct amdgpu_device *adev)
 		if (adev->mode_info.afmt[i]) {
 			adev->mode_info.afmt[i]->offset = dig_offsets[i];
 			adev->mode_info.afmt[i]->id = i;
+		} else {
+			int j;
+			for (j = 0; j < i; j++) {
+				kfree(adev->mode_info.afmt[j]);
+				adev->mode_info.afmt[j] = NULL;
+			}
+			return -ENOMEM;
 		}
 	}
+	return 0;
 }
 
 static void dce_v8_0_afmt_fini(struct amdgpu_device *adev)
@@ -2001,8 +2012,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (atomic) {
 		amdgpu_fb = to_amdgpu_framebuffer(fb);
 		target_fb = fb;
-	}
-	else {
+	} else {
 		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
 		target_fb = crtc->primary->fb;
 	}
@@ -2016,9 +2026,9 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
 	if (unlikely(r != 0))
 		return r;
 
-	if (atomic)
+	if (atomic) {
 		fb_location = amdgpu_bo_gpu_offset(rbo);
-	else {
+	} else {
 		r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
 		if (unlikely(r != 0)) {
 			amdgpu_bo_unreserve(rbo);
@@ -2582,7 +2592,6 @@ static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc)
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 
 	drm_crtc_cleanup(crtc);
-	destroy_workqueue(amdgpu_crtc->pflip_queue);
 	kfree(amdgpu_crtc);
 }
 
@@ -2613,13 +2622,13 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
-		drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_on(dev, amdgpu_crtc->crtc_id);
 		dce_v8_0_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+		drm_vblank_off(dev, amdgpu_crtc->crtc_id);
 		if (amdgpu_crtc->enabled) {
 			dce_v8_0_vga_enable(crtc, true);
 			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2809,7 +2818,6 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
 
 	drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
 	amdgpu_crtc->crtc_id = index;
-	amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
 	adev->mode_info.crtcs[index] = amdgpu_crtc;
 
 	amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH;
@@ -2892,8 +2900,6 @@ static int dce_v8_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	adev->mode_info.mode_config_initialized = true;
-
 	adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
 
 	adev->ddev->mode_config.max_width = 16384;
@@ -2924,7 +2930,9 @@ static int dce_v8_0_sw_init(void *handle)
 		return -EINVAL;
 
 	/* setup afmt */
-	dce_v8_0_afmt_init(adev);
+	r = dce_v8_0_afmt_init(adev);
+	if (r)
+		return r;
 
 	r = dce_v8_0_audio_init(adev);
 	if (r)
@@ -2932,7 +2940,8 @@ static int dce_v8_0_sw_init(void *handle)
 
 	drm_kms_helper_poll_init(adev->ddev);
 
-	return r;
+	adev->mode_info.mode_config_initialized = true;
+	return 0;
 }
 
 static int dce_v8_0_sw_fini(void *handle)
@@ -3375,7 +3384,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
 	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
 
 	drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
-	queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
+	schedule_work(&works->unpin_work);
 
 	return 0;
 }
@@ -3554,16 +3563,8 @@ dce_v8_0_ext_dpms(struct drm_encoder *encoder, int mode)
 
 }
 
-static bool dce_v8_0_ext_mode_fixup(struct drm_encoder *encoder,
-				    const struct drm_display_mode *mode,
-				    struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static const struct drm_encoder_helper_funcs dce_v8_0_ext_helper_funcs = {
 	.dpms = dce_v8_0_ext_dpms,
-	.mode_fixup = dce_v8_0_ext_mode_fixup,
 	.prepare = dce_v8_0_ext_prepare,
 	.mode_set = dce_v8_0_ext_mode_set,
 	.commit = dce_v8_0_ext_commit,
diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
index e35340afd3db..b336c918d6a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
@@ -272,6 +272,12 @@ static int fiji_smu_upload_firmware_image(struct amdgpu_device *adev)
 	if (!adev->pm.fw)
 		return -EINVAL;
 
+	/* Skip SMC ucode loading on SR-IOV capable boards.
+	 * vbios does this for us in asic_init in that case.
+	 */
+	if (adev->virtualization.supports_sr_iov)
+		return 0;
+
 	hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
 	amdgpu_ucode_print_smc_hdr(&hdr->header);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 06602df707f8..bb8709066fd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -31,8 +31,6 @@
 #include "amdgpu_ucode.h"
 #include "clearstate_ci.h"
 
-#include "uvd/uvd_4_2_d.h"
-
 #include "dce/dce_8_0_d.h"
 #include "dce/dce_8_0_sh_mask.h"
 
@@ -1006,9 +1004,15 @@ out:
  */
 static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
 {
-	const u32 num_tile_mode_states = 32;
-	const u32 num_secondary_tile_mode_states = 16;
-	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
+	const u32 num_tile_mode_states =
+			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+	const u32 num_secondary_tile_mode_states =
+			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+	u32 reg_offset, split_equal_to_row_size;
+	uint32_t *tile, *macrotile;
+
+	tile = adev->gfx.config.tile_mode_array;
+	macrotile = adev->gfx.config.macrotile_mode_array;
 
 	switch (adev->gfx.config.mem_row_size_in_kb) {
 	case 1:
@@ -1023,832 +1027,531 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
 		break;
 	}
 
+	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+		tile[reg_offset] = 0;
+	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+		macrotile[reg_offset] = 0;
+
 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 1:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 2:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 3:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 4:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 5:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 6:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 7:
-				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
-				break;
-
-			case 8:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16));
-				break;
-			case 9:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
-				break;
-			case 10:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 11:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			case 12:
-				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 13:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-				break;
-			case 14:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 15:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 16:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			case 17:
-				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 18:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 19:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-				break;
-			case 20:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 21:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 22:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 23:
-				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 24:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 25:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 26:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 27:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
-				break;
-			case 28:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 29:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			case 30:
-				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
-		}
-		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 4:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 5:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 9:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 10:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 14:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
-		}
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[7] = (TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+		tile[12] = (TILE_SPLIT(split_equal_to_row_size));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+		tile[17] = (TILE_SPLIT(split_equal_to_row_size));
+		tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[21] =  (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[23] = (TILE_SPLIT(split_equal_to_row_size));
+		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+		tile[30] = (TILE_SPLIT(split_equal_to_row_size));
+
+		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+			if (reg_offset != 7)
+				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
 		break;
 	case CHIP_HAWAII:
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 1:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 2:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 3:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 4:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 5:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 6:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 7:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						TILE_SPLIT(split_equal_to_row_size));
-				break;
-
-			case 8:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
-				break;
-			case 9:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
-				break;
-			case 10:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 11:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			case 12:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			case 13:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-				break;
-			case 14:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 15:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 16:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			case 17:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			case 18:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 19:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
-				break;
-			case 20:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 21:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 22:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 23:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 24:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 25:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 26:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 27:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
-				break;
-			case 28:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 29:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			case 30:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
-		}
-		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 4:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 5:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 9:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 10:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 14:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
-		}
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+		tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+		tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
+		tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+		tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+
+		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+			if (reg_offset != 7)
+				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
 		break;
 	case CHIP_KABINI:
 	case CHIP_KAVERI:
 	case CHIP_MULLINS:
 	default:
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 1:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 2:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 3:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 4:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 5:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 6:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 7:
-				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
-				break;
-
-			case 8:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						PIPE_CONFIG(ADDR_SURF_P2));
-				break;
-			case 9:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
-				break;
-			case 10:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 11:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			case 12:
-				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 13:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-				break;
-			case 14:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 15:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 16:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			case 17:
-				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 18:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 19:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
-				break;
-			case 20:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 21:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 22:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 23:
-				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 24:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 25:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 26:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
-				break;
-			case 27:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
-				break;
-			case 28:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 29:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						PIPE_CONFIG(ADDR_SURF_P2) |
-						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
-				break;
-			case 30:
-				gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
-		}
-		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 4:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 5:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 9:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 10:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 14:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
-		}
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[7] = (TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P2));
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+		tile[12] = (TILE_SPLIT(split_equal_to_row_size));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+		tile[17] = (TILE_SPLIT(split_equal_to_row_size));
+		tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
+		tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[23] = (TILE_SPLIT(split_equal_to_row_size));
+		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+		tile[30] = (TILE_SPLIT(split_equal_to_row_size));
+
+		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+			if (reg_offset != 7)
+				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
 		break;
 	}
 }
@@ -1893,45 +1596,31 @@ void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
  */
 static u32 gfx_v7_0_create_bitmask(u32 bit_width)
 {
-	u32 i, mask = 0;
-
-	for (i = 0; i < bit_width; i++) {
-		mask <<= 1;
-		mask |= 1;
-	}
-	return mask;
+	return (u32)((1ULL << bit_width) - 1);
 }
 
 /**
- * gfx_v7_0_get_rb_disabled - computes the mask of disabled RBs
+ * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
  *
  * @adev: amdgpu_device pointer
- * @max_rb_num: max RBs (render backends) for the asic
- * @se_num: number of SEs (shader engines) for the asic
- * @sh_per_se: number of SH blocks per SE for the asic
  *
- * Calculates the bitmask of disabled RBs (CIK).
- * Returns the disabled RB bitmask.
+ * Calculates the bitmask of enabled RBs (CIK).
+ * Returns the enabled RB bitmask.
  */
-static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev,
-				    u32 max_rb_num_per_se,
-				    u32 sh_per_se)
+static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
 
 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
-	if (data & 1)
-		data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
-	else
-		data = 0;
-
 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
 
+	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
 
-	mask = gfx_v7_0_create_bitmask(max_rb_num_per_se / sh_per_se);
+	mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
+				       adev->gfx.config.max_sh_per_se);
 
-	return data & mask;
+	return (~data) & mask;
 }
 
 /**
@@ -1940,73 +1629,31 @@ static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev,
  * @adev: amdgpu_device pointer
  * @se_num: number of SEs (shader engines) for the asic
  * @sh_per_se: number of SH blocks per SE for the asic
- * @max_rb_num: max RBs (render backends) for the asic
  *
  * Configures per-SE/SH RB registers (CIK).
  */
-static void gfx_v7_0_setup_rb(struct amdgpu_device *adev,
-			      u32 se_num, u32 sh_per_se,
-			      u32 max_rb_num_per_se)
+static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
 {
 	int i, j;
-	u32 data, mask;
-	u32 disabled_rbs = 0;
-	u32 enabled_rbs = 0;
+	u32 data;
+	u32 active_rbs = 0;
+	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+					adev->gfx.config.max_sh_per_se;
 
 	mutex_lock(&adev->grbm_idx_mutex);
-	for (i = 0; i < se_num; i++) {
-		for (j = 0; j < sh_per_se; j++) {
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			gfx_v7_0_select_se_sh(adev, i, j);
-			data = gfx_v7_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se);
-			if (adev->asic_type == CHIP_HAWAII)
-				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
-			else
-				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
+			data = gfx_v7_0_get_rb_active_bitmap(adev);
+			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+					       rb_bitmap_width_per_sh);
 		}
 	}
 	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
-	mask = 1;
-	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
-		if (!(disabled_rbs & mask))
-			enabled_rbs |= mask;
-		mask <<= 1;
-	}
-
-	adev->gfx.config.backend_enable_mask = enabled_rbs;
-
-	mutex_lock(&adev->grbm_idx_mutex);
-	for (i = 0; i < se_num; i++) {
-		gfx_v7_0_select_se_sh(adev, i, 0xffffffff);
-		data = 0;
-		for (j = 0; j < sh_per_se; j++) {
-			switch (enabled_rbs & 3) {
-			case 0:
-				if (j == 0)
-					data |= (RASTER_CONFIG_RB_MAP_3 <<
-						PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
-				else
-					data |= (RASTER_CONFIG_RB_MAP_0 <<
-						PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
-				break;
-			case 1:
-				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
-				break;
-			case 2:
-				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
-				break;
-			case 3:
-			default:
-				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
-				break;
-			}
-			enabled_rbs >>= 2;
-		}
-		WREG32(mmPA_SC_RASTER_CONFIG, data);
-	}
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
-	mutex_unlock(&adev->grbm_idx_mutex);
+	adev->gfx.config.backend_enable_mask = active_rbs;
+	adev->gfx.config.num_rbs = hweight32(active_rbs);
 }
 
 /**
@@ -2059,192 +1706,23 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
  */
 static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 {
-	u32 gb_addr_config;
-	u32 mc_shared_chmap, mc_arb_ramcfg;
-	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
-	u32 sh_mem_cfg;
-	u32 tmp;
+	u32 tmp, sh_mem_cfg;
 	int i;
 
-	switch (adev->asic_type) {
-	case CHIP_BONAIRE:
-		adev->gfx.config.max_shader_engines = 2;
-		adev->gfx.config.max_tile_pipes = 4;
-		adev->gfx.config.max_cu_per_sh = 7;
-		adev->gfx.config.max_sh_per_se = 1;
-		adev->gfx.config.max_backends_per_se = 2;
-		adev->gfx.config.max_texture_channel_caches = 4;
-		adev->gfx.config.max_gprs = 256;
-		adev->gfx.config.max_gs_threads = 32;
-		adev->gfx.config.max_hw_contexts = 8;
-
-		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
-		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
-		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
-		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
-		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
-		break;
-	case CHIP_HAWAII:
-		adev->gfx.config.max_shader_engines = 4;
-		adev->gfx.config.max_tile_pipes = 16;
-		adev->gfx.config.max_cu_per_sh = 11;
-		adev->gfx.config.max_sh_per_se = 1;
-		adev->gfx.config.max_backends_per_se = 4;
-		adev->gfx.config.max_texture_channel_caches = 16;
-		adev->gfx.config.max_gprs = 256;
-		adev->gfx.config.max_gs_threads = 32;
-		adev->gfx.config.max_hw_contexts = 8;
-
-		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
-		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
-		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
-		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
-		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
-		break;
-	case CHIP_KAVERI:
-		adev->gfx.config.max_shader_engines = 1;
-		adev->gfx.config.max_tile_pipes = 4;
-		if ((adev->pdev->device == 0x1304) ||
-		    (adev->pdev->device == 0x1305) ||
-		    (adev->pdev->device == 0x130C) ||
-		    (adev->pdev->device == 0x130F) ||
-		    (adev->pdev->device == 0x1310) ||
-		    (adev->pdev->device == 0x1311) ||
-		    (adev->pdev->device == 0x131C)) {
-			adev->gfx.config.max_cu_per_sh = 8;
-			adev->gfx.config.max_backends_per_se = 2;
-		} else if ((adev->pdev->device == 0x1309) ||
-			   (adev->pdev->device == 0x130A) ||
-			   (adev->pdev->device == 0x130D) ||
-			   (adev->pdev->device == 0x1313) ||
-			   (adev->pdev->device == 0x131D)) {
-			adev->gfx.config.max_cu_per_sh = 6;
-			adev->gfx.config.max_backends_per_se = 2;
-		} else if ((adev->pdev->device == 0x1306) ||
-			   (adev->pdev->device == 0x1307) ||
-			   (adev->pdev->device == 0x130B) ||
-			   (adev->pdev->device == 0x130E) ||
-			   (adev->pdev->device == 0x1315) ||
-			   (adev->pdev->device == 0x131B)) {
-			adev->gfx.config.max_cu_per_sh = 4;
-			adev->gfx.config.max_backends_per_se = 1;
-		} else {
-			adev->gfx.config.max_cu_per_sh = 3;
-			adev->gfx.config.max_backends_per_se = 1;
-		}
-		adev->gfx.config.max_sh_per_se = 1;
-		adev->gfx.config.max_texture_channel_caches = 4;
-		adev->gfx.config.max_gprs = 256;
-		adev->gfx.config.max_gs_threads = 16;
-		adev->gfx.config.max_hw_contexts = 8;
-
-		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
-		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
-		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
-		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
-		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
-		break;
-	case CHIP_KABINI:
-	case CHIP_MULLINS:
-	default:
-		adev->gfx.config.max_shader_engines = 1;
-		adev->gfx.config.max_tile_pipes = 2;
-		adev->gfx.config.max_cu_per_sh = 2;
-		adev->gfx.config.max_sh_per_se = 1;
-		adev->gfx.config.max_backends_per_se = 1;
-		adev->gfx.config.max_texture_channel_caches = 2;
-		adev->gfx.config.max_gprs = 256;
-		adev->gfx.config.max_gs_threads = 16;
-		adev->gfx.config.max_hw_contexts = 8;
-
-		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
-		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
-		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
-		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
-		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
-		break;
-	}
-
 	WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
 
-	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
-	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
-	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
-
-	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
-	adev->gfx.config.mem_max_burst_length_bytes = 256;
-	if (adev->flags & AMD_IS_APU) {
-		/* Get memory bank mapping mode. */
-		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
-		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
-		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
-
-		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
-		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
-		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
-
-		/* Validate settings in case only one DIMM installed. */
-		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
-			dimm00_addr_map = 0;
-		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
-			dimm01_addr_map = 0;
-		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
-			dimm10_addr_map = 0;
-		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
-			dimm11_addr_map = 0;
-
-		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
-		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
-		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
-			adev->gfx.config.mem_row_size_in_kb = 2;
-		else
-			adev->gfx.config.mem_row_size_in_kb = 1;
-	} else {
-		tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
-		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
-		if (adev->gfx.config.mem_row_size_in_kb > 4)
-			adev->gfx.config.mem_row_size_in_kb = 4;
-	}
-	/* XXX use MC settings? */
-	adev->gfx.config.shader_engine_tile_size = 32;
-	adev->gfx.config.num_gpus = 1;
-	adev->gfx.config.multi_gpu_tile_size = 64;
-
-	/* fix up row size */
-	gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
-	switch (adev->gfx.config.mem_row_size_in_kb) {
-	case 1:
-	default:
-		gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
-		break;
-	case 2:
-		gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
-		break;
-	case 4:
-		gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
-		break;
-	}
-	adev->gfx.config.gb_addr_config = gb_addr_config;
-
-	WREG32(mmGB_ADDR_CONFIG, gb_addr_config);
-	WREG32(mmHDP_ADDR_CONFIG, gb_addr_config);
-	WREG32(mmDMIF_ADDR_CALC, gb_addr_config);
-	WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
-	WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
-	WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config);
-	WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
-	WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
+	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
 
 	gfx_v7_0_tiling_mode_table_init(adev);
 
-	gfx_v7_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
-			  adev->gfx.config.max_sh_per_se,
-			  adev->gfx.config.max_backends_per_se);
+	gfx_v7_0_setup_rb(adev);
 
 	/* set HW defaults for 3D engine */
 	WREG32(mmCP_MEQ_THRESHOLDS,
-			(0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
-			(0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
+	       (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
+	       (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	/*
@@ -2255,7 +1733,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 
 	/* XXX SH_MEM regs */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
-	sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 
+	sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
 				   SH_MEM_ALIGNMENT_MODE_UNALIGNED);
 
 	mutex_lock(&adev->srbm_mutex);
@@ -2379,7 +1857,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
-	r = amdgpu_ring_lock(ring, 3);
+	r = amdgpu_ring_alloc(ring, 3);
 	if (r) {
 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
 		amdgpu_gfx_scratch_free(adev, scratch);
@@ -2388,7 +1866,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 	amdgpu_ring_write(ring, 0xDEADBEEF);
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
@@ -2447,6 +1925,25 @@ static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 }
 
 /**
+ * gfx_v7_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
+ *
+ * @adev: amdgpu_device pointer
+ * @ridx: amdgpu ring index
+ *
+ * Emits an hdp invalidate on the cp.
+ */
+static void gfx_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+				 WRITE_DATA_DST_SEL(0) |
+				 WR_CONFIRM));
+	amdgpu_ring_write(ring, mmHDP_DEBUG0);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, 1);
+}
+
+/**
  * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
  *
  * @adev: amdgpu_device pointer
@@ -2516,36 +2013,6 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, upper_32_bits(seq));
 }
 
-/**
- * gfx_v7_0_ring_emit_semaphore - emit a semaphore on the CP ring
- *
- * @ring: amdgpu ring buffer object
- * @semaphore: amdgpu semaphore object
- * @emit_wait: Is this a sempahore wait?
- *
- * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
- * from running ahead of semaphore waits.
- */
-static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring,
-					 struct amdgpu_semaphore *semaphore,
-					 bool emit_wait)
-{
-	uint64_t addr = semaphore->gpu_addr;
-	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
-
-	amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
-	amdgpu_ring_write(ring, addr & 0xffffffff);
-	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
-
-	if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
-		/* Prevent the PFP from running ahead of the semaphore wait */
-		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
-		amdgpu_ring_write(ring, 0x0);
-	}
-
-	return true;
-}
-
 /*
  * IB stuff
  */
@@ -2593,8 +2060,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 	else
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw |
-		(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+	control |= ib->length_dw | (ib->vm_id << 24);
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
@@ -2622,8 +2088,7 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 
 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw |
-			   (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+	control |= ib->length_dw | (ib->vm_id << 24);
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
@@ -2661,7 +2126,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
 	}
 	WREG32(scratch, 0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(ring, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 		goto err1;
@@ -2671,9 +2136,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
 
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
-						 AMDGPU_FENCE_OWNER_UNDEFINED,
-						 &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r)
 		goto err2;
 
@@ -2700,7 +2163,8 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
 
 err2:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 err1:
 	amdgpu_gfx_scratch_free(adev, scratch);
 	return r;
@@ -2842,7 +2306,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
 
 	gfx_v7_0_cp_gfx_enable(adev, true);
 
-	r = amdgpu_ring_lock(ring, gfx_v7_0_get_csb_size(adev) + 8);
+	r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
 	if (r) {
 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
 		return r;
@@ -2911,7 +2375,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
 	amdgpu_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
 	amdgpu_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
 
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 
 	return 0;
 }
@@ -2989,21 +2453,14 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
 
 static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
 {
-	u32 rptr;
-
-	rptr = ring->adev->wb.wb[ring->rptr_offs];
-
-	return rptr;
+	return ring->adev->wb.wb[ring->rptr_offs];
 }
 
 static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	u32 wptr;
-
-	wptr = RREG32(mmCP_RB0_WPTR);
 
-	return wptr;
+	return RREG32(mmCP_RB0_WPTR);
 }
 
 static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
@@ -3016,21 +2473,13 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
 
 static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
 {
-	u32 rptr;
-
-	rptr = ring->adev->wb.wb[ring->rptr_offs];
-
-	return rptr;
+	return ring->adev->wb.wb[ring->rptr_offs];
 }
 
 static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
 {
-	u32 wptr;
-
 	/* XXX check if swapping is necessary on BE */
-	wptr = ring->adev->wb.wb[ring->wptr_offs];
-
-	return wptr;
+	return ring->adev->wb.wb[ring->wptr_offs];
 }
 
 static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
@@ -3126,21 +2575,6 @@ static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 }
 
 /**
- * gfx_v7_0_cp_compute_start - start the compute queues
- *
- * @adev: amdgpu_device pointer
- *
- * Enable the compute queues.
- * Returns 0 for success, error for failure.
- */
-static int gfx_v7_0_cp_compute_start(struct amdgpu_device *adev)
-{
-	gfx_v7_0_cp_compute_enable(adev, true);
-
-	return 0;
-}
-
-/**
  * gfx_v7_0_cp_compute_fini - stop the compute queues
  *
  * @adev: amdgpu_device pointer
@@ -3330,9 +2764,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
 	u32 *buf;
 	struct bonaire_mqd *mqd;
 
-	r = gfx_v7_0_cp_compute_start(adev);
-	if (r)
-		return r;
+	gfx_v7_0_cp_compute_enable(adev, true);
 
 	/* fix up chicken bits */
 	tmp = RREG32(mmCP_CPF_DEBUG);
@@ -3610,6 +3042,26 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP
+ *
+ * @ring: the ring to emmit the commands to
+ *
+ * Sync the command pipeline with the PFP. E.g. wait for everything
+ * to be completed.
+ */
+static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+	if (usepfp) {
+		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+		amdgpu_ring_write(ring, 0);
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+		amdgpu_ring_write(ring, 0);
+	}
+}
+
 /*
  * vm
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -3641,14 +3093,6 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, 0xffffffff);
 	amdgpu_ring_write(ring, 4); /* poll interval */
 
-	if (usepfp) {
-		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
-		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
-		amdgpu_ring_write(ring, 0);
-	}
-
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
 				 WRITE_DATA_DST_SEL(0)));
@@ -4408,28 +3852,19 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
 	}
 }
 
-static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev,
-					 u32 se, u32 sh)
+static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 {
-	u32 mask = 0, tmp, tmp1;
-	int i;
-
-	gfx_v7_0_select_se_sh(adev, se, sh);
-	tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
-	tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	u32 data, mask;
 
-	tmp &= 0xffff0000;
+	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
+	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
 
-	tmp |= tmp1;
-	tmp >>= 16;
+	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
 
-	for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
-		mask <<= 1;
-		mask |= 1;
-	}
+	mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
 
-	return (~tmp) & mask;
+	return (~data) & mask;
 }
 
 static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
@@ -4767,6 +4202,172 @@ static int gfx_v7_0_late_init(void *handle)
 	return 0;
 }
 
+static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
+{
+	u32 gb_addr_config;
+	u32 mc_shared_chmap, mc_arb_ramcfg;
+	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
+	u32 tmp;
+
+	switch (adev->asic_type) {
+	case CHIP_BONAIRE:
+		adev->gfx.config.max_shader_engines = 2;
+		adev->gfx.config.max_tile_pipes = 4;
+		adev->gfx.config.max_cu_per_sh = 7;
+		adev->gfx.config.max_sh_per_se = 1;
+		adev->gfx.config.max_backends_per_se = 2;
+		adev->gfx.config.max_texture_channel_caches = 4;
+		adev->gfx.config.max_gprs = 256;
+		adev->gfx.config.max_gs_threads = 32;
+		adev->gfx.config.max_hw_contexts = 8;
+
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
+		break;
+	case CHIP_HAWAII:
+		adev->gfx.config.max_shader_engines = 4;
+		adev->gfx.config.max_tile_pipes = 16;
+		adev->gfx.config.max_cu_per_sh = 11;
+		adev->gfx.config.max_sh_per_se = 1;
+		adev->gfx.config.max_backends_per_se = 4;
+		adev->gfx.config.max_texture_channel_caches = 16;
+		adev->gfx.config.max_gprs = 256;
+		adev->gfx.config.max_gs_threads = 32;
+		adev->gfx.config.max_hw_contexts = 8;
+
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
+		break;
+	case CHIP_KAVERI:
+		adev->gfx.config.max_shader_engines = 1;
+		adev->gfx.config.max_tile_pipes = 4;
+		if ((adev->pdev->device == 0x1304) ||
+		    (adev->pdev->device == 0x1305) ||
+		    (adev->pdev->device == 0x130C) ||
+		    (adev->pdev->device == 0x130F) ||
+		    (adev->pdev->device == 0x1310) ||
+		    (adev->pdev->device == 0x1311) ||
+		    (adev->pdev->device == 0x131C)) {
+			adev->gfx.config.max_cu_per_sh = 8;
+			adev->gfx.config.max_backends_per_se = 2;
+		} else if ((adev->pdev->device == 0x1309) ||
+			   (adev->pdev->device == 0x130A) ||
+			   (adev->pdev->device == 0x130D) ||
+			   (adev->pdev->device == 0x1313) ||
+			   (adev->pdev->device == 0x131D)) {
+			adev->gfx.config.max_cu_per_sh = 6;
+			adev->gfx.config.max_backends_per_se = 2;
+		} else if ((adev->pdev->device == 0x1306) ||
+			   (adev->pdev->device == 0x1307) ||
+			   (adev->pdev->device == 0x130B) ||
+			   (adev->pdev->device == 0x130E) ||
+			   (adev->pdev->device == 0x1315) ||
+			   (adev->pdev->device == 0x131B)) {
+			adev->gfx.config.max_cu_per_sh = 4;
+			adev->gfx.config.max_backends_per_se = 1;
+		} else {
+			adev->gfx.config.max_cu_per_sh = 3;
+			adev->gfx.config.max_backends_per_se = 1;
+		}
+		adev->gfx.config.max_sh_per_se = 1;
+		adev->gfx.config.max_texture_channel_caches = 4;
+		adev->gfx.config.max_gprs = 256;
+		adev->gfx.config.max_gs_threads = 16;
+		adev->gfx.config.max_hw_contexts = 8;
+
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
+		break;
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+	default:
+		adev->gfx.config.max_shader_engines = 1;
+		adev->gfx.config.max_tile_pipes = 2;
+		adev->gfx.config.max_cu_per_sh = 2;
+		adev->gfx.config.max_sh_per_se = 1;
+		adev->gfx.config.max_backends_per_se = 1;
+		adev->gfx.config.max_texture_channel_caches = 2;
+		adev->gfx.config.max_gprs = 256;
+		adev->gfx.config.max_gs_threads = 16;
+		adev->gfx.config.max_hw_contexts = 8;
+
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
+		break;
+	}
+
+	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
+	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
+	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
+
+	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
+	adev->gfx.config.mem_max_burst_length_bytes = 256;
+	if (adev->flags & AMD_IS_APU) {
+		/* Get memory bank mapping mode. */
+		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
+		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
+		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
+
+		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
+		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
+		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
+
+		/* Validate settings in case only one DIMM installed. */
+		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
+			dimm00_addr_map = 0;
+		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
+			dimm01_addr_map = 0;
+		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
+			dimm10_addr_map = 0;
+		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
+			dimm11_addr_map = 0;
+
+		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
+		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
+		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
+			adev->gfx.config.mem_row_size_in_kb = 2;
+		else
+			adev->gfx.config.mem_row_size_in_kb = 1;
+	} else {
+		tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
+		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
+		if (adev->gfx.config.mem_row_size_in_kb > 4)
+			adev->gfx.config.mem_row_size_in_kb = 4;
+	}
+	/* XXX use MC settings? */
+	adev->gfx.config.shader_engine_tile_size = 32;
+	adev->gfx.config.num_gpus = 1;
+	adev->gfx.config.multi_gpu_tile_size = 64;
+
+	/* fix up row size */
+	gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
+	switch (adev->gfx.config.mem_row_size_in_kb) {
+	case 1:
+	default:
+		gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
+		break;
+	case 2:
+		gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
+		break;
+	case 4:
+		gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
+		break;
+	}
+	adev->gfx.config.gb_addr_config = gb_addr_config;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 {
 	struct amdgpu_ring *ring;
@@ -4870,6 +4471,10 @@ static int gfx_v7_0_sw_init(void *handle)
 	if (r)
 		return r;
 
+	adev->gfx.ce_ram_size = 0x8000;
+
+	gfx_v7_0_gpu_early_init(adev);
+
 	return r;
 }
 
@@ -4910,8 +4515,6 @@ static int gfx_v7_0_hw_init(void *handle)
 	if (r)
 		return r;
 
-	adev->gfx.ce_ram_size = 0x8000;
-
 	return r;
 }
 
@@ -5028,16 +4631,6 @@ static void gfx_v7_0_print_status(void *handle)
 		 RREG32(mmHDP_ADDR_CONFIG));
 	dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
 		 RREG32(mmDMIF_ADDR_CALC));
-	dev_info(adev->dev, "  SDMA0_TILING_CONFIG=0x%08X\n",
-		 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
-	dev_info(adev->dev, "  SDMA1_TILING_CONFIG=0x%08X\n",
-		 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
-	dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
-		 RREG32(mmUVD_UDEC_ADDR_CONFIG));
-	dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
-		 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
-	dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
-		 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
 
 	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
 		 RREG32(mmCP_MEQ_THRESHOLDS));
@@ -5580,13 +5173,15 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
 	.parse_cs = NULL,
 	.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
-	.emit_semaphore = gfx_v7_0_ring_emit_semaphore,
+	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v7_0_ring_test_ring,
 	.test_ib = gfx_v7_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
 };
 
 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@@ -5596,13 +5191,15 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
 	.parse_cs = NULL,
 	.emit_ib = gfx_v7_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v7_0_ring_emit_fence_compute,
-	.emit_semaphore = gfx_v7_0_ring_emit_semaphore,
+	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v7_0_ring_test_ring,
 	.test_ib = gfx_v7_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
 };
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -5672,7 +5269,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
 
 
 int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
-								   struct amdgpu_cu_info *cu_info)
+			 struct amdgpu_cu_info *cu_info)
 {
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
@@ -5680,16 +5277,19 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
 	if (!adev || !cu_info)
 		return -EINVAL;
 
+	memset(cu_info, 0, sizeof(*cu_info));
+
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			mask = 1;
 			ao_bitmap = 0;
 			counter = 0;
-			bitmap = gfx_v7_0_get_cu_active_bitmap(adev, i, j);
+			gfx_v7_0_select_se_sh(adev, i, j);
+			bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 
-			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
+			for (k = 0; k < 16; k ++) {
 				if (bitmap & mask) {
 					if (counter < 2)
 						ao_bitmap |= mask;
@@ -5701,9 +5301,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
 		}
 	}
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	mutex_unlock(&adev->grbm_idx_mutex);
 
 	cu_info->number = active_cu_number;
 	cu_info->ao_cu_mask = ao_cu_mask;
-	mutex_unlock(&adev->grbm_idx_mutex);
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 7086ac17abee..f0c7b3596480 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -43,9 +43,6 @@
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_enum.h"
 
-#include "uvd/uvd_5_0_d.h"
-#include "uvd/uvd_5_0_sh_mask.h"
-
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
@@ -652,7 +649,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
-	r = amdgpu_ring_lock(ring, 3);
+	r = amdgpu_ring_alloc(ring, 3);
 	if (r) {
 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 			  ring->idx, r);
@@ -662,7 +659,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 	amdgpu_ring_write(ring, 0xDEADBEEF);
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
@@ -699,7 +696,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 	}
 	WREG32(scratch, 0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(ring, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 		goto err1;
@@ -709,9 +706,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
 
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
-						 AMDGPU_FENCE_OWNER_UNDEFINED,
-						 &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r)
 		goto err2;
 
@@ -737,7 +732,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 	}
 err2:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 err1:
 	amdgpu_gfx_scratch_free(adev, scratch);
 	return r;
@@ -1171,7 +1167,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 
 	/* allocate an indirect buffer to put the commands in */
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(ring, NULL, total_size, &ib);
+	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 		return r;
@@ -1266,9 +1262,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
 
 	/* shedule the ib on the ring */
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
-						 AMDGPU_FENCE_OWNER_UNDEFINED,
-						 &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r) {
 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
 		goto fail;
@@ -1296,7 +1290,8 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 
 fail:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 
 	return r;
 }
@@ -2574,11 +2569,6 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
 	}
 }
 
-static u32 gfx_v8_0_create_bitmask(u32 bit_width)
-{
-	return (u32)((1ULL << bit_width) - 1);
-}
-
 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
 {
 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
@@ -2599,89 +2589,49 @@ void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
 	WREG32(mmGRBM_GFX_INDEX, data);
 }
 
-static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
-				    u32 max_rb_num_per_se,
-				    u32 sh_per_se)
+static u32 gfx_v8_0_create_bitmask(u32 bit_width)
+{
+	return (u32)((1ULL << bit_width) - 1);
+}
+
+static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
 
 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
-	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
-
 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
 
+	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
 
-	mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
+	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
+				       adev->gfx.config.max_sh_per_se);
 
-	return data & mask;
+	return (~data) & mask;
 }
 
-static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
-			      u32 se_num, u32 sh_per_se,
-			      u32 max_rb_num_per_se)
+static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
 {
 	int i, j;
-	u32 data, mask;
-	u32 disabled_rbs = 0;
-	u32 enabled_rbs = 0;
+	u32 data;
+	u32 active_rbs = 0;
+	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+					adev->gfx.config.max_sh_per_se;
 
 	mutex_lock(&adev->grbm_idx_mutex);
-	for (i = 0; i < se_num; i++) {
-		for (j = 0; j < sh_per_se; j++) {
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			gfx_v8_0_select_se_sh(adev, i, j);
-			data = gfx_v8_0_get_rb_disabled(adev,
-					      max_rb_num_per_se, sh_per_se);
-			disabled_rbs |= data << ((i * sh_per_se + j) *
-						 RB_BITMAP_WIDTH_PER_SH);
+			data = gfx_v8_0_get_rb_active_bitmap(adev);
+			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+					       rb_bitmap_width_per_sh);
 		}
 	}
 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
-	mask = 1;
-	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
-		if (!(disabled_rbs & mask))
-			enabled_rbs |= mask;
-		mask <<= 1;
-	}
-
-	adev->gfx.config.backend_enable_mask = enabled_rbs;
-
-	mutex_lock(&adev->grbm_idx_mutex);
-	for (i = 0; i < se_num; i++) {
-		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
-		data = RREG32(mmPA_SC_RASTER_CONFIG);
-		for (j = 0; j < sh_per_se; j++) {
-			switch (enabled_rbs & 3) {
-			case 0:
-				if (j == 0)
-					data |= (RASTER_CONFIG_RB_MAP_3 <<
-						 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
-				else
-					data |= (RASTER_CONFIG_RB_MAP_0 <<
-						 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
-				break;
-			case 1:
-				data |= (RASTER_CONFIG_RB_MAP_0 <<
-					 (i * sh_per_se + j) * 2);
-				break;
-			case 2:
-				data |= (RASTER_CONFIG_RB_MAP_3 <<
-					 (i * sh_per_se + j) * 2);
-				break;
-			case 3:
-			default:
-				data |= (RASTER_CONFIG_RB_MAP_2 <<
-					 (i * sh_per_se + j) * 2);
-				break;
-			}
-			enabled_rbs >>= 2;
-		}
-		WREG32(mmPA_SC_RASTER_CONFIG, data);
-	}
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
-	mutex_unlock(&adev->grbm_idx_mutex);
+	adev->gfx.config.backend_enable_mask = active_rbs;
+	adev->gfx.config.num_rbs = hweight32(active_rbs);
 }
 
 /**
@@ -2741,19 +2691,10 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
-	WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
-	       adev->gfx.config.gb_addr_config & 0x70);
-	WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
-	       adev->gfx.config.gb_addr_config & 0x70);
-	WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
-	WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
-	WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
 
 	gfx_v8_0_tiling_mode_table_init(adev);
 
-	gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
-				 adev->gfx.config.max_sh_per_se,
-				 adev->gfx.config.max_backends_per_se);
+	gfx_v8_0_setup_rb(adev);
 
 	/* XXX SH_MEM regs */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -3062,7 +3003,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
 
 	gfx_v8_0_cp_gfx_enable(adev, true);
 
-	r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
+	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
 	if (r) {
 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
 		return r;
@@ -3126,7 +3067,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
 	amdgpu_ring_write(ring, 0x8000);
 	amdgpu_ring_write(ring, 0x8000);
 
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 
 	return 0;
 }
@@ -3226,13 +3167,6 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 	udelay(50);
 }
 
-static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
-{
-	gfx_v8_0_cp_compute_enable(adev, true);
-
-	return 0;
-}
-
 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 {
 	const struct gfx_firmware_header_v1_0 *mec_hdr;
@@ -3802,9 +3736,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
 		WREG32(mmCP_PQ_STATUS, tmp);
 	}
 
-	r = gfx_v8_0_cp_compute_start(adev);
-	if (r)
-		return r;
+	gfx_v8_0_cp_compute_enable(adev, true);
 
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
@@ -4016,16 +3948,6 @@ static void gfx_v8_0_print_status(void *handle)
 		 RREG32(mmHDP_ADDR_CONFIG));
 	dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
 		 RREG32(mmDMIF_ADDR_CALC));
-	dev_info(adev->dev, "  SDMA0_TILING_CONFIG=0x%08X\n",
-		 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
-	dev_info(adev->dev, "  SDMA1_TILING_CONFIG=0x%08X\n",
-		 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
-	dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
-		 RREG32(mmUVD_UDEC_ADDR_CONFIG));
-	dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
-		 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
-	dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
-		 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
 
 	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
 		 RREG32(mmCP_MEQ_THRESHOLDS));
@@ -4667,6 +4589,18 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0x20); /* poll interval */
 }
 
+static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+				 WRITE_DATA_DST_SEL(0) |
+				 WR_CONFIRM));
+	amdgpu_ring_write(ring, mmHDP_DEBUG0);
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, 1);
+
+}
+
 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				  struct amdgpu_ib *ib)
 {
@@ -4699,8 +4633,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 	else
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw |
-		(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+	control |= ib->length_dw | (ib->vm_id << 24);
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
@@ -4729,8 +4662,7 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 
 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
 
-	control |= ib->length_dw |
-			   (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+	control |= ib->length_dw | (ib->vm_id << 24);
 
 	amdgpu_ring_write(ring, header);
 	amdgpu_ring_write(ring,
@@ -4762,49 +4694,10 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
 
 }
 
-/**
- * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
- *
- * @ring: amdgpu ring buffer object
- * @semaphore: amdgpu semaphore object
- * @emit_wait: Is this a sempahore wait?
- *
- * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
- * from running ahead of semaphore waits.
- */
-static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
-					 struct amdgpu_semaphore *semaphore,
-					 bool emit_wait)
-{
-	uint64_t addr = semaphore->gpu_addr;
-	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
-
-	if (ring->adev->asic_type == CHIP_TOPAZ ||
-	    ring->adev->asic_type == CHIP_TONGA ||
-	    ring->adev->asic_type == CHIP_FIJI)
-		/* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
-		return false;
-	else {
-		amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
-		amdgpu_ring_write(ring, lower_32_bits(addr));
-		amdgpu_ring_write(ring, upper_32_bits(addr));
-		amdgpu_ring_write(ring, sel);
-	}
-
-	if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
-		/* Prevent the PFP from running ahead of the semaphore wait */
-		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
-		amdgpu_ring_write(ring, 0x0);
-	}
-
-	return true;
-}
-
-static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
-					unsigned vm_id, uint64_t pd_addr)
+static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 {
 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
-	uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
+	uint32_t seq = ring->fence_drv.sync_seq;
 	uint64_t addr = ring->fence_drv.gpu_addr;
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -4824,6 +4717,12 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
 		amdgpu_ring_write(ring, 0);
 	}
+}
+
+static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+					unsigned vm_id, uint64_t pd_addr)
+{
+	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -5146,13 +5045,15 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.parse_cs = NULL,
 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
-	.emit_semaphore = gfx_v8_0_ring_emit_semaphore,
+	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -5162,13 +5063,15 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.parse_cs = NULL,
 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
-	.emit_semaphore = gfx_v8_0_ring_emit_semaphore,
+	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
 };
 
 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -5237,32 +5140,23 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
 	}
 }
 
-static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
-		u32 se, u32 sh)
+static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 {
-	u32 mask = 0, tmp, tmp1;
-	int i;
-
-	gfx_v8_0_select_se_sh(adev, se, sh);
-	tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
-	tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	u32 data, mask;
 
-	tmp &= 0xffff0000;
+	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
+	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
 
-	tmp |= tmp1;
-	tmp >>= 16;
+	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
 
-	for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
-		mask <<= 1;
-		mask |= 1;
-	}
+	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
 
-	return (~tmp) & mask;
+	return (~data) & mask;
 }
 
 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
-						 struct amdgpu_cu_info *cu_info)
+			 struct amdgpu_cu_info *cu_info)
 {
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
@@ -5270,16 +5164,19 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
 	if (!adev || !cu_info)
 		return -EINVAL;
 
+	memset(cu_info, 0, sizeof(*cu_info));
+
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			mask = 1;
 			ao_bitmap = 0;
 			counter = 0;
-			bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
+			gfx_v8_0_select_se_sh(adev, i, j);
+			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 
-			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
+			for (k = 0; k < 16; k ++) {
 				if (bitmap & mask) {
 					if (counter < 2)
 						ao_bitmap |= mask;
@@ -5291,9 +5188,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
 		}
 	}
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	mutex_unlock(&adev->grbm_idx_mutex);
 
 	cu_info->number = active_cu_number;
 	cu_info->ao_cu_mask = ao_cu_mask;
-	mutex_unlock(&adev->grbm_idx_mutex);
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index b8060795b27b..82ce7d943884 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -339,7 +339,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
 	WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
 
 	tmp = RREG32(mmHDP_MISC_CNTL);
-	tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+	tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 0);
 	WREG32(mmHDP_MISC_CNTL, tmp);
 
 	tmp = RREG32(mmHDP_HOST_PATH_CNTL);
@@ -694,7 +694,8 @@ static int gmc_v7_0_vm_init(struct amdgpu_device *adev)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.nvm = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+	amdgpu_vm_manager_init(adev);
 
 	/* base offset of vram pages */
 	if (adev->flags & AMD_IS_APU) {
@@ -926,10 +927,6 @@ static int gmc_v7_0_sw_init(void *handle)
 	int dma_bits;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	r = amdgpu_gem_init(adev);
-	if (r)
-		return r;
-
 	r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
 	if (r)
 		return r;
@@ -1010,7 +1007,7 @@ static int gmc_v7_0_sw_fini(void *handle)
 		adev->vm_manager.enabled = false;
 	}
 	gmc_v7_0_gart_fini(adev);
-	amdgpu_gem_fini(adev);
+	amdgpu_gem_force_release(adev);
 	amdgpu_bo_fini(adev);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 3efd45546241..29bd7b57dc91 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -252,6 +252,12 @@ static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev)
 	if (!adev->mc.fw)
 		return -EINVAL;
 
+	/* Skip MC ucode loading on SR-IOV capable boards.
+	 * vbios does this for us in asic_init in that case.
+	 */
+	if (adev->virtualization.supports_sr_iov)
+		return 0;
+
 	hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
 	amdgpu_ucode_print_mc_hdr(&hdr->header);
 
@@ -380,7 +386,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
 	WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
 
 	tmp = RREG32(mmHDP_MISC_CNTL);
-	tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+	tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 0);
 	WREG32(mmHDP_MISC_CNTL, tmp);
 
 	tmp = RREG32(mmHDP_HOST_PATH_CNTL);
@@ -774,7 +780,8 @@ static int gmc_v8_0_vm_init(struct amdgpu_device *adev)
 	 * amdgpu graphics/compute will use VMIDs 1-7
 	 * amdkfd will use VMIDs 8-15
 	 */
-	adev->vm_manager.nvm = AMDGPU_NUM_OF_VMIDS;
+	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+	amdgpu_vm_manager_init(adev);
 
 	/* base offset of vram pages */
 	if (adev->flags & AMD_IS_APU) {
@@ -880,10 +887,6 @@ static int gmc_v8_0_sw_init(void *handle)
 	int dma_bits;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	r = amdgpu_gem_init(adev);
-	if (r)
-		return r;
-
 	r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
 	if (r)
 		return r;
@@ -964,7 +967,7 @@ static int gmc_v8_0_sw_fini(void *handle)
 		adev->vm_manager.enabled = false;
 	}
 	gmc_v8_0_gart_fini(adev);
-	amdgpu_gem_fini(adev);
+	amdgpu_gem_force_release(adev);
 	amdgpu_bo_fini(adev);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
index 090486c18249..52ee08193295 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
@@ -279,6 +279,12 @@ static int iceland_smu_upload_firmware_image(struct amdgpu_device *adev)
 	if (!adev->pm.fw)
 		return -EINVAL;
 
+	/* Skip SMC ucode loading on SR-IOV capable boards.
+	 * vbios does this for us in asic_init in that case.
+	 */
+	if (adev->virtualization.supports_sr_iov)
+		return 0;
+
 	hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
 	amdgpu_ucode_print_smc_hdr(&hdr->header);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 2cf50180cc51..6e0a86a563f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -32,8 +32,8 @@
 #include "oss/oss_2_4_d.h"
 #include "oss/oss_2_4_sh_mask.h"
 
-#include "gmc/gmc_8_1_d.h"
-#include "gmc/gmc_8_1_sh_mask.h"
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
 
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
@@ -244,7 +244,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
 				   struct amdgpu_ib *ib)
 {
-	u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+	u32 vmid = ib->vm_id & 0xf;
 	u32 next_rptr = ring->wptr + 5;
 
 	while ((next_rptr & 7) != 2)
@@ -300,6 +300,13 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
 }
 
+static void sdma_v2_4_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, mmHDP_DEBUG0);
+	amdgpu_ring_write(ring, 1);
+}
 /**
  * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring
  *
@@ -335,31 +342,6 @@ static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
 }
 
 /**
- * sdma_v2_4_ring_emit_semaphore - emit a semaphore on the dma ring
- *
- * @ring: amdgpu_ring structure holding ring information
- * @semaphore: amdgpu semaphore object
- * @emit_wait: wait or signal semaphore
- *
- * Add a DMA semaphore packet to the ring wait on or signal
- * other rings (VI).
- */
-static bool sdma_v2_4_ring_emit_semaphore(struct amdgpu_ring *ring,
-					  struct amdgpu_semaphore *semaphore,
-					  bool emit_wait)
-{
-	u64 addr = semaphore->gpu_addr;
-	u32 sig = emit_wait ? 0 : 1;
-
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) |
-			  SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig));
-	amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8);
-	amdgpu_ring_write(ring, upper_32_bits(addr));
-
-	return true;
-}
-
-/**
  * sdma_v2_4_gfx_stop - stop the gfx async dma engines
  *
  * @adev: amdgpu_device pointer
@@ -459,6 +441,9 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 
+		WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
+		       adev->gfx.config.gb_addr_config & 0x70);
+
 		WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
 
 		/* Set ring buffer size in dwords */
@@ -636,7 +621,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 
-	r = amdgpu_ring_lock(ring, 5);
+	r = amdgpu_ring_alloc(ring, 5);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
 		amdgpu_wb_free(adev, index);
@@ -649,7 +634,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
 	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
 	amdgpu_ring_write(ring, 0xDEADBEEF);
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		tmp = le32_to_cpu(adev->wb.wb[index]);
@@ -699,7 +684,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(ring, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 		goto err0;
@@ -716,9 +701,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
 	ib.length_dw = 8;
 
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
-						 AMDGPU_FENCE_OWNER_UNDEFINED,
-						 &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r)
 		goto err1;
 
@@ -744,7 +727,8 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
 
 err1:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
@@ -797,7 +781,7 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
  * Update PTEs by writing them manually using sDMA (CIK).
  */
 static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib,
-				   uint64_t pe,
+				   const dma_addr_t *pages_addr, uint64_t pe,
 				   uint64_t addr, unsigned count,
 				   uint32_t incr, uint32_t flags)
 {
@@ -816,14 +800,7 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib,
 		ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 		ib->ptr[ib->length_dw++] = ndw;
 		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-			if (flags & AMDGPU_PTE_SYSTEM) {
-				value = amdgpu_vm_map_gart(ib->ring->adev, addr);
-				value &= 0xFFFFFFFFFFFFF000ULL;
-			} else if (flags & AMDGPU_PTE_VALID) {
-				value = addr;
-			} else {
-				value = 0;
-			}
+			value = amdgpu_vm_map_gart(pages_addr, addr);
 			addr += incr;
 			value |= flags;
 			ib->ptr[ib->length_dw++] = value;
@@ -881,14 +858,14 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
 }
 
 /**
- * sdma_v2_4_vm_pad_ib - pad the IB to the required number of dw
+ * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw
  *
  * @ib: indirect buffer to fill with padding
  *
  */
-static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
+static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
 {
-	struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring);
+	struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
 	u32 pad_count;
 	int i;
 
@@ -904,6 +881,31 @@ static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
 }
 
 /**
+ * sdma_v2_4_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	/* wait for idle */
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+	amdgpu_ring_write(ring, addr & 0xfffffffc);
+	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	amdgpu_ring_write(ring, seq); /* reference */
+	amdgpu_ring_write(ring, 0xfffffff); /* mask */
+	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
  * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA
  *
  * @ring: amdgpu_ring pointer
@@ -1111,6 +1113,8 @@ static void sdma_v2_4_print_status(void *handle)
 			 i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i]));
 		dev_info(adev->dev, "  SDMA%d_GFX_RB_BASE_HI=0x%08X\n",
 			 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
+		dev_info(adev->dev, "  SDMA%d_TILING_CONFIG=0x%08X\n",
+			 i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i]));
 		mutex_lock(&adev->srbm_mutex);
 		for (j = 0; j < 16; j++) {
 			vi_srbm_select(adev, 0, 0, 0, j);
@@ -1302,12 +1306,14 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
 	.parse_cs = NULL,
 	.emit_ib = sdma_v2_4_ring_emit_ib,
 	.emit_fence = sdma_v2_4_ring_emit_fence,
-	.emit_semaphore = sdma_v2_4_ring_emit_semaphore,
+	.emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
 	.emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
 	.emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = sdma_v2_4_ring_emit_hdp_invalidate,
 	.test_ring = sdma_v2_4_ring_test_ring,
 	.test_ib = sdma_v2_4_ring_test_ib,
 	.insert_nop = sdma_v2_4_ring_insert_nop,
+	.pad_ib = sdma_v2_4_ring_pad_ib,
 };
 
 static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
@@ -1405,14 +1411,18 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
 	.copy_pte = sdma_v2_4_vm_copy_pte,
 	.write_pte = sdma_v2_4_vm_write_pte,
 	.set_pte_pde = sdma_v2_4_vm_set_pte_pde,
-	.pad_ib = sdma_v2_4_vm_pad_ib,
 };
 
 static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
+	unsigned i;
+
 	if (adev->vm_manager.vm_pte_funcs == NULL) {
 		adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
-		adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
-		adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true;
+		for (i = 0; i < adev->sdma.num_instances; i++)
+			adev->vm_manager.vm_pte_rings[i] =
+				&adev->sdma.instance[i].ring;
+
+		adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index ad54c46751b0..8c8ca98dd129 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -355,7 +355,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
 				   struct amdgpu_ib *ib)
 {
-	u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+	u32 vmid = ib->vm_id & 0xf;
 	u32 next_rptr = ring->wptr + 5;
 
 	while ((next_rptr & 7) != 2)
@@ -410,6 +410,14 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
 }
 
+static void sdma_v3_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+			  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+	amdgpu_ring_write(ring, mmHDP_DEBUG0);
+	amdgpu_ring_write(ring, 1);
+}
+
 /**
  * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring
  *
@@ -444,32 +452,6 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
 	amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
 }
 
-
-/**
- * sdma_v3_0_ring_emit_semaphore - emit a semaphore on the dma ring
- *
- * @ring: amdgpu_ring structure holding ring information
- * @semaphore: amdgpu semaphore object
- * @emit_wait: wait or signal semaphore
- *
- * Add a DMA semaphore packet to the ring wait on or signal
- * other rings (VI).
- */
-static bool sdma_v3_0_ring_emit_semaphore(struct amdgpu_ring *ring,
-					  struct amdgpu_semaphore *semaphore,
-					  bool emit_wait)
-{
-	u64 addr = semaphore->gpu_addr;
-	u32 sig = emit_wait ? 0 : 1;
-
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) |
-			  SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig));
-	amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8);
-	amdgpu_ring_write(ring, upper_32_bits(addr));
-
-	return true;
-}
-
 /**
  * sdma_v3_0_gfx_stop - stop the gfx async dma engines
  *
@@ -596,6 +578,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 
+		WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
+		       adev->gfx.config.gb_addr_config & 0x70);
+
 		WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
 
 		/* Set ring buffer size in dwords */
@@ -788,7 +773,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 
-	r = amdgpu_ring_lock(ring, 5);
+	r = amdgpu_ring_alloc(ring, 5);
 	if (r) {
 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
 		amdgpu_wb_free(adev, index);
@@ -801,7 +786,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
 	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
 	amdgpu_ring_write(ring, 0xDEADBEEF);
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		tmp = le32_to_cpu(adev->wb.wb[index]);
@@ -851,7 +836,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
 	tmp = 0xCAFEDEAD;
 	adev->wb.wb[index] = cpu_to_le32(tmp);
 	memset(&ib, 0, sizeof(ib));
-	r = amdgpu_ib_get(ring, NULL, 256, &ib);
+	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
 		goto err0;
@@ -868,9 +853,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
 	ib.length_dw = 8;
 
-	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
-						 AMDGPU_FENCE_OWNER_UNDEFINED,
-						 &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
 	if (r)
 		goto err1;
 
@@ -895,7 +878,8 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
 	}
 err1:
 	fence_put(f);
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, &ib, NULL);
+	fence_put(f);
 err0:
 	amdgpu_wb_free(adev, index);
 	return r;
@@ -948,7 +932,7 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
  * Update PTEs by writing them manually using sDMA (CIK).
  */
 static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib,
-				   uint64_t pe,
+				   const dma_addr_t *pages_addr, uint64_t pe,
 				   uint64_t addr, unsigned count,
 				   uint32_t incr, uint32_t flags)
 {
@@ -967,14 +951,7 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib,
 		ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 		ib->ptr[ib->length_dw++] = ndw;
 		for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-			if (flags & AMDGPU_PTE_SYSTEM) {
-				value = amdgpu_vm_map_gart(ib->ring->adev, addr);
-				value &= 0xFFFFFFFFFFFFF000ULL;
-			} else if (flags & AMDGPU_PTE_VALID) {
-				value = addr;
-			} else {
-				value = 0;
-			}
+			value = amdgpu_vm_map_gart(pages_addr, addr);
 			addr += incr;
 			value |= flags;
 			ib->ptr[ib->length_dw++] = value;
@@ -1032,14 +1009,14 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
 }
 
 /**
- * sdma_v3_0_vm_pad_ib - pad the IB to the required number of dw
+ * sdma_v3_0_ring_pad_ib - pad the IB to the required number of dw
  *
  * @ib: indirect buffer to fill with padding
  *
  */
-static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
+static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
 {
-	struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring);
+	struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
 	u32 pad_count;
 	int i;
 
@@ -1055,6 +1032,31 @@ static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
 }
 
 /**
+ * sdma_v3_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	/* wait for idle */
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+	amdgpu_ring_write(ring, addr & 0xfffffffc);
+	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	amdgpu_ring_write(ring, seq); /* reference */
+	amdgpu_ring_write(ring, 0xfffffff); /* mask */
+	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
  * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA
  *
  * @ring: amdgpu_ring pointer
@@ -1275,6 +1277,8 @@ static void sdma_v3_0_print_status(void *handle)
 			 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
 		dev_info(adev->dev, "  SDMA%d_GFX_DOORBELL=0x%08X\n",
 			 i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]));
+		dev_info(adev->dev, "  SDMA%d_TILING_CONFIG=0x%08X\n",
+			 i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i]));
 		mutex_lock(&adev->srbm_mutex);
 		for (j = 0; j < 16; j++) {
 			vi_srbm_select(adev, 0, 0, 0, j);
@@ -1570,12 +1574,14 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
 	.parse_cs = NULL,
 	.emit_ib = sdma_v3_0_ring_emit_ib,
 	.emit_fence = sdma_v3_0_ring_emit_fence,
-	.emit_semaphore = sdma_v3_0_ring_emit_semaphore,
+	.emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = sdma_v3_0_ring_emit_vm_flush,
 	.emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = sdma_v3_0_ring_emit_hdp_invalidate,
 	.test_ring = sdma_v3_0_ring_test_ring,
 	.test_ib = sdma_v3_0_ring_test_ib,
 	.insert_nop = sdma_v3_0_ring_insert_nop,
+	.pad_ib = sdma_v3_0_ring_pad_ib,
 };
 
 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1673,14 +1679,18 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
 	.copy_pte = sdma_v3_0_vm_copy_pte,
 	.write_pte = sdma_v3_0_vm_write_pte,
 	.set_pte_pde = sdma_v3_0_vm_set_pte_pde,
-	.pad_ib = sdma_v3_0_vm_pad_ib,
 };
 
 static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
 {
+	unsigned i;
+
 	if (adev->vm_manager.vm_pte_funcs == NULL) {
 		adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
-		adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
-		adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true;
+		for (i = 0; i < adev->sdma.num_instances; i++)
+			adev->vm_manager.vm_pte_rings[i] =
+				&adev->sdma.instance[i].ring;
+
+		adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
index 361c49a82323..083893dd68c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
@@ -272,6 +272,12 @@ static int tonga_smu_upload_firmware_image(struct amdgpu_device *adev)
 	if (!adev->pm.fw)
 		return -EINVAL;
 
+	/* Skip SMC ucode loading on SR-IOV capable boards.
+	 * vbios does this for us in asic_init in that case.
+	 */
+	if (adev->virtualization.supports_sr_iov)
+		return 0;
+
 	hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
 	amdgpu_ucode_print_smc_hdr(&hdr->header);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index fbd3767671bb..c606ccb38d8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -164,7 +164,7 @@ static int uvd_v4_2_hw_init(void *handle)
 		goto done;
 	}
 
-	r = amdgpu_ring_lock(ring, 10);
+	r = amdgpu_ring_alloc(ring, 10);
 	if (r) {
 		DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
 		goto done;
@@ -189,7 +189,7 @@ static int uvd_v4_2_hw_init(void *handle)
 	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
 	amdgpu_ring_write(ring, 3);
 
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 
 done:
 	/* lower clocks again */
@@ -439,33 +439,6 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
- * uvd_v4_2_ring_emit_semaphore - emit semaphore command
- *
- * @ring: amdgpu_ring pointer
- * @semaphore: semaphore to emit commands for
- * @emit_wait: true if we should emit a wait command
- *
- * Emit a semaphore command (either wait or signal) to the UVD ring.
- */
-static bool uvd_v4_2_ring_emit_semaphore(struct amdgpu_ring *ring,
-					 struct amdgpu_semaphore *semaphore,
-					 bool emit_wait)
-{
-	uint64_t addr = semaphore->gpu_addr;
-
-	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0));
-	amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-
-	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0));
-	amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-
-	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0));
-	amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
-
-	return true;
-}
-
-/**
  * uvd_v4_2_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -480,7 +453,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
 	int r;
 
 	WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
-	r = amdgpu_ring_lock(ring, 3);
+	r = amdgpu_ring_alloc(ring, 3);
 	if (r) {
 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 			  ring->idx, r);
@@ -488,7 +461,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
 	}
 	amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
 	amdgpu_ring_write(ring, 0xDEADBEEF);
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 	for (i = 0; i < adev->usec_timeout; i++) {
 		tmp = RREG32(mmUVD_CONTEXT_ID);
 		if (tmp == 0xDEADBEEF)
@@ -549,7 +522,7 @@ static int uvd_v4_2_ring_test_ib(struct amdgpu_ring *ring)
 		goto error;
 	}
 
-	r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence);
+	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
 		goto error;
@@ -603,6 +576,10 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev)
 	addr = (adev->uvd.gpu_addr >> 32) & 0xFF;
 	WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
 
+	WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+	WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+	WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+
 	uvd_v4_2_init_cg(adev);
 }
 
@@ -804,6 +781,13 @@ static void uvd_v4_2_print_status(void *handle)
 		 RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL));
 	dev_info(adev->dev, "  UVD_CONTEXT_ID=0x%08X\n",
 		 RREG32(mmUVD_CONTEXT_ID));
+	dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
+		 RREG32(mmUVD_UDEC_ADDR_CONFIG));
+	dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
+		 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
+	dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
+		 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
+
 }
 
 static int uvd_v4_2_set_interrupt_state(struct amdgpu_device *adev,
@@ -888,10 +872,10 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v4_2_ring_emit_ib,
 	.emit_fence = uvd_v4_2_ring_emit_fence,
-	.emit_semaphore = uvd_v4_2_ring_emit_semaphore,
 	.test_ring = uvd_v4_2_ring_test_ring,
 	.test_ib = uvd_v4_2_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
 };
 
 static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 57f1c5bf3bf1..e3c852d9d79a 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -160,7 +160,7 @@ static int uvd_v5_0_hw_init(void *handle)
 		goto done;
 	}
 
-	r = amdgpu_ring_lock(ring, 10);
+	r = amdgpu_ring_alloc(ring, 10);
 	if (r) {
 		DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
 		goto done;
@@ -185,7 +185,7 @@ static int uvd_v5_0_hw_init(void *handle)
 	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
 	amdgpu_ring_write(ring, 3);
 
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 
 done:
 	/* lower clocks again */
@@ -279,6 +279,10 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev)
 	size = AMDGPU_UVD_HEAP_SIZE;
 	WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3);
 	WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
+
+	WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+	WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+	WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
 }
 
 /**
@@ -483,33 +487,6 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
- * uvd_v5_0_ring_emit_semaphore - emit semaphore command
- *
- * @ring: amdgpu_ring pointer
- * @semaphore: semaphore to emit commands for
- * @emit_wait: true if we should emit a wait command
- *
- * Emit a semaphore command (either wait or signal) to the UVD ring.
- */
-static bool uvd_v5_0_ring_emit_semaphore(struct amdgpu_ring *ring,
-					 struct amdgpu_semaphore *semaphore,
-					 bool emit_wait)
-{
-	uint64_t addr = semaphore->gpu_addr;
-
-	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0));
-	amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-
-	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0));
-	amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-
-	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0));
-	amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
-
-	return true;
-}
-
-/**
  * uvd_v5_0_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -524,7 +501,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
 	int r;
 
 	WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
-	r = amdgpu_ring_lock(ring, 3);
+	r = amdgpu_ring_alloc(ring, 3);
 	if (r) {
 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 			  ring->idx, r);
@@ -532,7 +509,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
 	}
 	amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
 	amdgpu_ring_write(ring, 0xDEADBEEF);
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 	for (i = 0; i < adev->usec_timeout; i++) {
 		tmp = RREG32(mmUVD_CONTEXT_ID);
 		if (tmp == 0xDEADBEEF)
@@ -595,7 +572,7 @@ static int uvd_v5_0_ring_test_ib(struct amdgpu_ring *ring)
 		goto error;
 	}
 
-	r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence);
+	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
 		goto error;
@@ -751,6 +728,12 @@ static void uvd_v5_0_print_status(void *handle)
 		 RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL));
 	dev_info(adev->dev, "  UVD_CONTEXT_ID=0x%08X\n",
 		 RREG32(mmUVD_CONTEXT_ID));
+	dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
+		 RREG32(mmUVD_UDEC_ADDR_CONFIG));
+	dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
+		 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
+	dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
+		 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
 }
 
 static int uvd_v5_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -829,10 +812,10 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v5_0_ring_emit_ib,
 	.emit_fence = uvd_v5_0_ring_emit_fence,
-	.emit_semaphore = uvd_v5_0_ring_emit_semaphore,
 	.test_ring = uvd_v5_0_ring_test_ring,
 	.test_ib = uvd_v5_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
 };
 
 static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 0b365b7651ff..3375e614ac67 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -157,7 +157,7 @@ static int uvd_v6_0_hw_init(void *handle)
 		goto done;
 	}
 
-	r = amdgpu_ring_lock(ring, 10);
+	r = amdgpu_ring_alloc(ring, 10);
 	if (r) {
 		DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
 		goto done;
@@ -182,7 +182,7 @@ static int uvd_v6_0_hw_init(void *handle)
 	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
 	amdgpu_ring_write(ring, 3);
 
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 
 done:
 	if (!r)
@@ -277,6 +277,10 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev)
 	size = AMDGPU_UVD_HEAP_SIZE;
 	WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3);
 	WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
+
+	WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+	WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+	WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
 }
 
 static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev,
@@ -722,33 +726,6 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
- * uvd_v6_0_ring_emit_semaphore - emit semaphore command
- *
- * @ring: amdgpu_ring pointer
- * @semaphore: semaphore to emit commands for
- * @emit_wait: true if we should emit a wait command
- *
- * Emit a semaphore command (either wait or signal) to the UVD ring.
- */
-static bool uvd_v6_0_ring_emit_semaphore(struct amdgpu_ring *ring,
-					 struct amdgpu_semaphore *semaphore,
-					 bool emit_wait)
-{
-	uint64_t addr = semaphore->gpu_addr;
-
-	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0));
-	amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-
-	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0));
-	amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-
-	amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0));
-	amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
-
-	return true;
-}
-
-/**
  * uvd_v6_0_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -763,7 +740,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
 	int r;
 
 	WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
-	r = amdgpu_ring_lock(ring, 3);
+	r = amdgpu_ring_alloc(ring, 3);
 	if (r) {
 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
 			  ring->idx, r);
@@ -771,7 +748,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
 	}
 	amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
 	amdgpu_ring_write(ring, 0xDEADBEEF);
-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 	for (i = 0; i < adev->usec_timeout; i++) {
 		tmp = RREG32(mmUVD_CONTEXT_ID);
 		if (tmp == 0xDEADBEEF)
@@ -827,7 +804,7 @@ static int uvd_v6_0_ring_test_ib(struct amdgpu_ring *ring)
 		goto error;
 	}
 
-	r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence);
+	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
 	if (r) {
 		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
 		goto error;
@@ -974,6 +951,12 @@ static void uvd_v6_0_print_status(void *handle)
 		 RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL));
 	dev_info(adev->dev, "  UVD_CONTEXT_ID=0x%08X\n",
 		 RREG32(mmUVD_CONTEXT_ID));
+	dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
+		 RREG32(mmUVD_UDEC_ADDR_CONFIG));
+	dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
+		 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
+	dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
+		 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
 }
 
 static int uvd_v6_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -1065,10 +1048,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = {
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v6_0_ring_emit_ib,
 	.emit_fence = uvd_v6_0_ring_emit_fence,
-	.emit_semaphore = uvd_v6_0_ring_emit_semaphore,
 	.test_ring = uvd_v6_0_ring_test_ring,
 	.test_ib = uvd_v6_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
 };
 
 static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index a822edacfa95..c7e885bcfd41 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -642,10 +642,10 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
 	.parse_cs = amdgpu_vce_ring_parse_cs,
 	.emit_ib = amdgpu_vce_ring_emit_ib,
 	.emit_fence = amdgpu_vce_ring_emit_fence,
-	.emit_semaphore = amdgpu_vce_ring_emit_semaphore,
 	.test_ring = amdgpu_vce_ring_test_ring,
 	.test_ib = amdgpu_vce_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
 };
 
 static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index d662fa9f9091..ce468ee5da2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -762,10 +762,10 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
 	.parse_cs = amdgpu_vce_ring_parse_cs,
 	.emit_ib = amdgpu_vce_ring_emit_ib,
 	.emit_fence = amdgpu_vce_ring_emit_fence,
-	.emit_semaphore = amdgpu_vce_ring_emit_semaphore,
 	.test_ring = amdgpu_vce_ring_test_ring,
 	.test_ib = amdgpu_vce_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
 };
 
 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 0d14d108a6c4..1c120efa292c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -74,6 +74,9 @@
 #include "uvd_v6_0.h"
 #include "vce_v3_0.h"
 #include "amdgpu_powerplay.h"
+#if defined(CONFIG_DRM_AMD_ACP)
+#include "amdgpu_acp.h"
+#endif
 
 /*
  * Indirect registers accessor
@@ -571,374 +574,12 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
 	return -EINVAL;
 }
 
-static void vi_print_gpu_status_regs(struct amdgpu_device *adev)
-{
-	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
-		RREG32(mmGRBM_STATUS));
-	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
-		RREG32(mmGRBM_STATUS2));
-	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
-		RREG32(mmGRBM_STATUS_SE0));
-	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
-		RREG32(mmGRBM_STATUS_SE1));
-	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
-		RREG32(mmGRBM_STATUS_SE2));
-	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
-		RREG32(mmGRBM_STATUS_SE3));
-	dev_info(adev->dev, "  SRBM_STATUS=0x%08X\n",
-		RREG32(mmSRBM_STATUS));
-	dev_info(adev->dev, "  SRBM_STATUS2=0x%08X\n",
-		RREG32(mmSRBM_STATUS2));
-	dev_info(adev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
-		RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
-	if (adev->sdma.num_instances > 1) {
-		dev_info(adev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
-			RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
-	}
-	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
-	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
-		 RREG32(mmCP_STALLED_STAT1));
-	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
-		 RREG32(mmCP_STALLED_STAT2));
-	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
-		 RREG32(mmCP_STALLED_STAT3));
-	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
-		 RREG32(mmCP_CPF_BUSY_STAT));
-	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
-		 RREG32(mmCP_CPF_STALLED_STAT1));
-	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
-	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
-	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
-		 RREG32(mmCP_CPC_STALLED_STAT1));
-	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
-}
-
-/**
- * vi_gpu_check_soft_reset - check which blocks are busy
- *
- * @adev: amdgpu_device pointer
- *
- * Check which blocks are busy and return the relevant reset
- * mask to be used by vi_gpu_soft_reset().
- * Returns a mask of the blocks to be reset.
- */
-u32 vi_gpu_check_soft_reset(struct amdgpu_device *adev)
-{
-	u32 reset_mask = 0;
-	u32 tmp;
-
-	/* GRBM_STATUS */
-	tmp = RREG32(mmGRBM_STATUS);
-	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
-		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
-		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
-		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
-		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
-		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
-		reset_mask |= AMDGPU_RESET_GFX;
-
-	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK))
-		reset_mask |= AMDGPU_RESET_CP;
-
-	/* GRBM_STATUS2 */
-	tmp = RREG32(mmGRBM_STATUS2);
-	if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_RLC;
-
-	if (tmp & (GRBM_STATUS2__CPF_BUSY_MASK |
-		   GRBM_STATUS2__CPC_BUSY_MASK |
-		   GRBM_STATUS2__CPG_BUSY_MASK))
-		reset_mask |= AMDGPU_RESET_CP;
-
-	/* SRBM_STATUS2 */
-	tmp = RREG32(mmSRBM_STATUS2);
-	if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_DMA;
-
-	if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_DMA1;
-
-	/* SRBM_STATUS */
-	tmp = RREG32(mmSRBM_STATUS);
-
-	if (tmp & SRBM_STATUS__IH_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_IH;
-
-	if (tmp & SRBM_STATUS__SEM_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_SEM;
-
-	if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
-		reset_mask |= AMDGPU_RESET_GRBM;
-
-	if (adev->asic_type != CHIP_TOPAZ) {
-		if (tmp & (SRBM_STATUS__UVD_RQ_PENDING_MASK |
-			   SRBM_STATUS__UVD_BUSY_MASK))
-			reset_mask |= AMDGPU_RESET_UVD;
-	}
-
-	if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
-		reset_mask |= AMDGPU_RESET_VMC;
-
-	if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
-		   SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK))
-		reset_mask |= AMDGPU_RESET_MC;
-
-	/* SDMA0_STATUS_REG */
-	tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
-	if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
-		reset_mask |= AMDGPU_RESET_DMA;
-
-	/* SDMA1_STATUS_REG */
-	if (adev->sdma.num_instances > 1) {
-		tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
-		if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
-			reset_mask |= AMDGPU_RESET_DMA1;
-	}
-#if 0
-	/* VCE_STATUS */
-	if (adev->asic_type != CHIP_TOPAZ) {
-		tmp = RREG32(mmVCE_STATUS);
-		if (tmp & VCE_STATUS__VCPU_REPORT_RB0_BUSY_MASK)
-			reset_mask |= AMDGPU_RESET_VCE;
-		if (tmp & VCE_STATUS__VCPU_REPORT_RB1_BUSY_MASK)
-			reset_mask |= AMDGPU_RESET_VCE1;
-
-	}
-
-	if (adev->asic_type != CHIP_TOPAZ) {
-		if (amdgpu_display_is_display_hung(adev))
-			reset_mask |= AMDGPU_RESET_DISPLAY;
-	}
-#endif
-
-	/* Skip MC reset as it's mostly likely not hung, just busy */
-	if (reset_mask & AMDGPU_RESET_MC) {
-		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
-		reset_mask &= ~AMDGPU_RESET_MC;
-	}
-
-	return reset_mask;
-}
-
-/**
- * vi_gpu_soft_reset - soft reset GPU
- *
- * @adev: amdgpu_device pointer
- * @reset_mask: mask of which blocks to reset
- *
- * Soft reset the blocks specified in @reset_mask.
- */
-static void vi_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask)
-{
-	struct amdgpu_mode_mc_save save;
-	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
-	u32 tmp;
-
-	if (reset_mask == 0)
-		return;
-
-	dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask);
-
-	vi_print_gpu_status_regs(adev);
-	dev_info(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
-		 RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR));
-	dev_info(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
-		 RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS));
-
-	/* disable CG/PG */
-
-	/* stop the rlc */
-	//XXX
-	//gfx_v8_0_rlc_stop(adev);
-
-	/* Disable GFX parsing/prefetching */
-	tmp = RREG32(mmCP_ME_CNTL);
-	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
-	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
-	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
-	WREG32(mmCP_ME_CNTL, tmp);
-
-	/* Disable MEC parsing/prefetching */
-	tmp = RREG32(mmCP_MEC_CNTL);
-	tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1);
-	tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1);
-	WREG32(mmCP_MEC_CNTL, tmp);
-
-	if (reset_mask & AMDGPU_RESET_DMA) {
-		/* sdma0 */
-		tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
-		tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
-		WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
-	}
-	if (reset_mask & AMDGPU_RESET_DMA1) {
-		/* sdma1 */
-		tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
-		tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
-		WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
-	}
-
-	gmc_v8_0_mc_stop(adev, &save);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
-		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
-	}
-
-	if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP)) {
-		grbm_soft_reset =
-			REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
-		grbm_soft_reset =
-			REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
-	}
-
-	if (reset_mask & AMDGPU_RESET_CP) {
-		grbm_soft_reset =
-			REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
-	}
-
-	if (reset_mask & AMDGPU_RESET_DMA)
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA, 1);
-
-	if (reset_mask & AMDGPU_RESET_DMA1)
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1, 1);
-
-	if (reset_mask & AMDGPU_RESET_DISPLAY)
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_DC, 1);
-
-	if (reset_mask & AMDGPU_RESET_RLC)
-		grbm_soft_reset =
-			REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
-
-	if (reset_mask & AMDGPU_RESET_SEM)
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
-
-	if (reset_mask & AMDGPU_RESET_IH)
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_IH, 1);
-
-	if (reset_mask & AMDGPU_RESET_GRBM)
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
-
-	if (reset_mask & AMDGPU_RESET_VMC)
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VMC, 1);
-
-	if (reset_mask & AMDGPU_RESET_UVD)
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
-
-	if (reset_mask & AMDGPU_RESET_VCE)
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
-
-	if (reset_mask & AMDGPU_RESET_VCE)
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
-
-	if (!(adev->flags & AMD_IS_APU)) {
-		if (reset_mask & AMDGPU_RESET_MC)
-		srbm_soft_reset =
-			REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
-	}
-
-	if (grbm_soft_reset) {
-		tmp = RREG32(mmGRBM_SOFT_RESET);
-		tmp |= grbm_soft_reset;
-		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
-		WREG32(mmGRBM_SOFT_RESET, tmp);
-		tmp = RREG32(mmGRBM_SOFT_RESET);
-
-		udelay(50);
-
-		tmp &= ~grbm_soft_reset;
-		WREG32(mmGRBM_SOFT_RESET, tmp);
-		tmp = RREG32(mmGRBM_SOFT_RESET);
-	}
-
-	if (srbm_soft_reset) {
-		tmp = RREG32(mmSRBM_SOFT_RESET);
-		tmp |= srbm_soft_reset;
-		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
-		WREG32(mmSRBM_SOFT_RESET, tmp);
-		tmp = RREG32(mmSRBM_SOFT_RESET);
-
-		udelay(50);
-
-		tmp &= ~srbm_soft_reset;
-		WREG32(mmSRBM_SOFT_RESET, tmp);
-		tmp = RREG32(mmSRBM_SOFT_RESET);
-	}
-
-	/* Wait a little for things to settle down */
-	udelay(50);
-
-	gmc_v8_0_mc_resume(adev, &save);
-	udelay(50);
-
-	vi_print_gpu_status_regs(adev);
-}
-
 static void vi_gpu_pci_config_reset(struct amdgpu_device *adev)
 {
-	struct amdgpu_mode_mc_save save;
-	u32 tmp, i;
+	u32 i;
 
 	dev_info(adev->dev, "GPU pci config reset\n");
 
-	/* disable dpm? */
-
-	/* disable cg/pg */
-
-	/* Disable GFX parsing/prefetching */
-	tmp = RREG32(mmCP_ME_CNTL);
-	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
-	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
-	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
-	WREG32(mmCP_ME_CNTL, tmp);
-
-	/* Disable MEC parsing/prefetching */
-	tmp = RREG32(mmCP_MEC_CNTL);
-	tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1);
-	tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1);
-	WREG32(mmCP_MEC_CNTL, tmp);
-
-	/* Disable GFX parsing/prefetching */
-	WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK |
-		CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
-
-	/* Disable MEC parsing/prefetching */
-	WREG32(mmCP_MEC_CNTL,
-			CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
-
-	/* sdma0 */
-	tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
-	tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
-	WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
-
-	/* sdma1 */
-	tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
-	tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
-	WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
-
-	/* XXX other engines? */
-
-	/* halt the rlc, disable cp internal ints */
-	//XXX
-	//gfx_v8_0_rlc_stop(adev);
-
-	udelay(50);
-
-	/* disable mem access */
-	gmc_v8_0_mc_stop(adev, &save);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
-		dev_warn(adev->dev, "Wait for MC idle timed out !\n");
-	}
-
 	/* disable BM */
 	pci_clear_master(adev->pdev);
 	/* reset */
@@ -978,26 +619,11 @@ static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hun
  */
 static int vi_asic_reset(struct amdgpu_device *adev)
 {
-	u32 reset_mask;
-
-	reset_mask = vi_gpu_check_soft_reset(adev);
-
-	if (reset_mask)
-		vi_set_bios_scratch_engine_hung(adev, true);
-
-	/* try soft reset */
-	vi_gpu_soft_reset(adev, reset_mask);
-
-	reset_mask = vi_gpu_check_soft_reset(adev);
+	vi_set_bios_scratch_engine_hung(adev, true);
 
-	/* try pci config reset */
-	if (reset_mask && amdgpu_hard_reset)
-		vi_gpu_pci_config_reset(adev);
+	vi_gpu_pci_config_reset(adev);
 
-	reset_mask = vi_gpu_check_soft_reset(adev);
-
-	if (!reset_mask)
-		vi_set_bios_scratch_engine_hung(adev, false);
+	vi_set_bios_scratch_engine_hung(adev, false);
 
 	return 0;
 }
@@ -1347,6 +973,15 @@ static const struct amdgpu_ip_block_version cz_ip_blocks[] =
 		.rev = 0,
 		.funcs = &vce_v3_0_ip_funcs,
 	},
+#if defined(CONFIG_DRM_AMD_ACP)
+	{
+		.type = AMD_IP_BLOCK_TYPE_ACP,
+		.major = 2,
+		.minor = 2,
+		.rev = 0,
+		.funcs = &acp_ip_funcs,
+	},
+#endif
 };
 
 int vi_set_ip_blocks(struct amdgpu_device *adev)
@@ -1436,26 +1071,22 @@ static int vi_common_early_init(void *handle)
 	adev->external_rev_id = 0xFF;
 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
-		adev->has_uvd = false;
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = 0x1;
 		break;
 	case CHIP_FIJI:
-		adev->has_uvd = true;
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x3c;
 		break;
 	case CHIP_TONGA:
-		adev->has_uvd = true;
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x14;
 		break;
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
-		adev->has_uvd = true;
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x1;
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index d98aa9d82fa1..ace49976f7be 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -71,8 +71,6 @@
 #define		VMID(x)						((x) << 4)
 #define		QUEUEID(x)					((x) << 8)
 
-#define RB_BITMAP_WIDTH_PER_SH     2
-
 #define MC_SEQ_MISC0__MT__MASK	0xf0000000
 #define MC_SEQ_MISC0__MT__GDDR1  0x10000000
 #define MC_SEQ_MISC0__MT__DDR2   0x20000000
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index d2b49c026cf6..07ac724e3ec9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -107,7 +107,7 @@ static int kfd_open(struct inode *inode, struct file *filep)
 	if (iminor(inode) != 0)
 		return -ENODEV;
 
-	is_32bit_user_mode = is_compat_task();
+	is_32bit_user_mode = in_compat_syscall();
 
 	if (is_32bit_user_mode == true) {
 		dev_warn(kfd_device,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index c34c393e9aea..d5e19b5fbbfb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -513,7 +513,7 @@ static int dbgdev_wave_control_set_registers(
 				union SQ_CMD_BITS *in_reg_sq_cmd,
 				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
 {
-	int status;
+	int status = 0;
 	union SQ_CMD_BITS reg_sq_cmd;
 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
 	struct HsaDbgWaveMsgAMDGen2 *pMsg;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index ca8410e8683d..850a5623661f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -59,18 +59,23 @@ module_param(send_sigterm, int, 0444);
 MODULE_PARM_DESC(send_sigterm,
 	"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
 
-bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
+static int amdkfd_init_completed;
+
+int kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
 {
+	if (!amdkfd_init_completed)
+		return -EPROBE_DEFER;
+
 	/*
 	 * Only one interface version is supported,
 	 * no kfd/kgd version skew allowed.
 	 */
 	if (interface_version != KFD_INTERFACE_VERSION)
-		return false;
+		return -EINVAL;
 
 	*g2f = &kgd2kfd;
 
-	return true;
+	return 0;
 }
 EXPORT_SYMBOL(kgd2kfd_init);
 
@@ -111,6 +116,8 @@ static int __init kfd_module_init(void)
 
 	kfd_process_create_wq();
 
+	amdkfd_init_completed = 1;
+
 	dev_info(kfd_device, "Initialized module\n");
 
 	return 0;
@@ -125,6 +132,8 @@ err_pasid:
 
 static void __exit kfd_module_exit(void)
 {
+	amdkfd_init_completed = 0;
+
 	kfd_process_destroy_wq();
 	kfd_topology_shutdown();
 	kfd_chardev_exit();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index a902ae037398..ac005796b71c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -311,7 +311,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 		goto err_process_pqm_init;
 
 	/* init process apertures*/
-	process->is_32bit_user_mode = is_compat_task();
+	process->is_32bit_user_mode = in_compat_syscall();
 	if (kfd_init_apertures(process) != 0)
 		goto err_init_apretures;
 
diff --git a/drivers/gpu/drm/amd/include/amd_acpi.h b/drivers/gpu/drm/amd/include/amd_acpi.h
index 496360eb3fba..50e893325141 100644
--- a/drivers/gpu/drm/amd/include/amd_acpi.h
+++ b/drivers/gpu/drm/amd/include/amd_acpi.h
@@ -340,6 +340,8 @@ struct atcs_pref_req_output {
 #       define ATPX_FIXED_NOT_SUPPORTED                            (1 << 9)
 #       define ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED               (1 << 10)
 #       define ATPX_DGPU_REQ_POWER_FOR_DISPLAYS                    (1 << 11)
+#       define ATPX_DGPU_CAN_DRIVE_DISPLAYS                        (1 << 12)
+#       define ATPX_MS_HYBRID_GFX_SUPPORTED                        (1 << 14)
 #define ATPX_FUNCTION_POWER_CONTROL                                0x2
 /* ARG0: ATPX_FUNCTION_POWER_CONTROL
  * ARG1:
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index dbf7e6413cab..04e4090666fb 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -73,6 +73,7 @@ enum amd_ip_block_type {
 	AMD_IP_BLOCK_TYPE_SDMA,
 	AMD_IP_BLOCK_TYPE_UVD,
 	AMD_IP_BLOCK_TYPE_VCE,
+	AMD_IP_BLOCK_TYPE_ACP,
 };
 
 enum amd_clockgating_state {
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h
index dc52ea0df4b4..d3ccf5a86de0 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h
@@ -1379,6 +1379,7 @@
 #define mmDC_GPIO_PAD_STRENGTH_1                                                0x1978
 #define mmDC_GPIO_PAD_STRENGTH_2                                                0x1979
 #define mmPHY_AUX_CNTL                                                          0x197f
+#define mmDC_GPIO_I2CPAD_MASK                                                   0x1974
 #define mmDC_GPIO_I2CPAD_A                                                      0x1975
 #define mmDC_GPIO_I2CPAD_EN                                                     0x1976
 #define mmDC_GPIO_I2CPAD_Y                                                      0x1977
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h
new file mode 100644
index 000000000000..6bea30ef3df5
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h
@@ -0,0 +1,1117 @@
+/*
+ * DCE_8_0 Register documentation
+ *
+ * Copyright (C) 2016  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef DCE_8_0_ENUM_H
+#define DCE_8_0_ENUM_H
+
+typedef enum SurfaceEndian {
+	ENDIAN_NONE                                      = 0x0,
+	ENDIAN_8IN16                                     = 0x1,
+	ENDIAN_8IN32                                     = 0x2,
+	ENDIAN_8IN64                                     = 0x3,
+} SurfaceEndian;
+typedef enum ArrayMode {
+	ARRAY_LINEAR_GENERAL                             = 0x0,
+	ARRAY_LINEAR_ALIGNED                             = 0x1,
+	ARRAY_1D_TILED_THIN1                             = 0x2,
+	ARRAY_1D_TILED_THICK                             = 0x3,
+	ARRAY_2D_TILED_THIN1                             = 0x4,
+	ARRAY_PRT_TILED_THIN1                            = 0x5,
+	ARRAY_PRT_2D_TILED_THIN1                         = 0x6,
+	ARRAY_2D_TILED_THICK                             = 0x7,
+	ARRAY_2D_TILED_XTHICK                            = 0x8,
+	ARRAY_PRT_TILED_THICK                            = 0x9,
+	ARRAY_PRT_2D_TILED_THICK                         = 0xa,
+	ARRAY_PRT_3D_TILED_THIN1                         = 0xb,
+	ARRAY_3D_TILED_THIN1                             = 0xc,
+	ARRAY_3D_TILED_THICK                             = 0xd,
+	ARRAY_3D_TILED_XTHICK                            = 0xe,
+	ARRAY_PRT_3D_TILED_THICK                         = 0xf,
+} ArrayMode;
+typedef enum PipeTiling {
+	CONFIG_1_PIPE                                    = 0x0,
+	CONFIG_2_PIPE                                    = 0x1,
+	CONFIG_4_PIPE                                    = 0x2,
+	CONFIG_8_PIPE                                    = 0x3,
+} PipeTiling;
+typedef enum BankTiling {
+	CONFIG_4_BANK                                    = 0x0,
+	CONFIG_8_BANK                                    = 0x1,
+} BankTiling;
+typedef enum GroupInterleave {
+	CONFIG_256B_GROUP                                = 0x0,
+	CONFIG_512B_GROUP                                = 0x1,
+} GroupInterleave;
+typedef enum RowTiling {
+	CONFIG_1KB_ROW                                   = 0x0,
+	CONFIG_2KB_ROW                                   = 0x1,
+	CONFIG_4KB_ROW                                   = 0x2,
+	CONFIG_8KB_ROW                                   = 0x3,
+	CONFIG_1KB_ROW_OPT                               = 0x4,
+	CONFIG_2KB_ROW_OPT                               = 0x5,
+	CONFIG_4KB_ROW_OPT                               = 0x6,
+	CONFIG_8KB_ROW_OPT                               = 0x7,
+} RowTiling;
+typedef enum BankSwapBytes {
+	CONFIG_128B_SWAPS                                = 0x0,
+	CONFIG_256B_SWAPS                                = 0x1,
+	CONFIG_512B_SWAPS                                = 0x2,
+	CONFIG_1KB_SWAPS                                 = 0x3,
+} BankSwapBytes;
+typedef enum SampleSplitBytes {
+	CONFIG_1KB_SPLIT                                 = 0x0,
+	CONFIG_2KB_SPLIT                                 = 0x1,
+	CONFIG_4KB_SPLIT                                 = 0x2,
+	CONFIG_8KB_SPLIT                                 = 0x3,
+} SampleSplitBytes;
+typedef enum NumPipes {
+	ADDR_CONFIG_1_PIPE                               = 0x0,
+	ADDR_CONFIG_2_PIPE                               = 0x1,
+	ADDR_CONFIG_4_PIPE                               = 0x2,
+	ADDR_CONFIG_8_PIPE                               = 0x3,
+} NumPipes;
+typedef enum PipeInterleaveSize {
+	ADDR_CONFIG_PIPE_INTERLEAVE_256B                 = 0x0,
+	ADDR_CONFIG_PIPE_INTERLEAVE_512B                 = 0x1,
+} PipeInterleaveSize;
+typedef enum BankInterleaveSize {
+	ADDR_CONFIG_BANK_INTERLEAVE_1                    = 0x0,
+	ADDR_CONFIG_BANK_INTERLEAVE_2                    = 0x1,
+	ADDR_CONFIG_BANK_INTERLEAVE_4                    = 0x2,
+	ADDR_CONFIG_BANK_INTERLEAVE_8                    = 0x3,
+} BankInterleaveSize;
+typedef enum NumShaderEngines {
+	ADDR_CONFIG_1_SHADER_ENGINE                      = 0x0,
+	ADDR_CONFIG_2_SHADER_ENGINE                      = 0x1,
+} NumShaderEngines;
+typedef enum ShaderEngineTileSize {
+	ADDR_CONFIG_SE_TILE_16                           = 0x0,
+	ADDR_CONFIG_SE_TILE_32                           = 0x1,
+} ShaderEngineTileSize;
+typedef enum NumGPUs {
+	ADDR_CONFIG_1_GPU                                = 0x0,
+	ADDR_CONFIG_2_GPU                                = 0x1,
+	ADDR_CONFIG_4_GPU                                = 0x2,
+} NumGPUs;
+typedef enum MultiGPUTileSize {
+	ADDR_CONFIG_GPU_TILE_16                          = 0x0,
+	ADDR_CONFIG_GPU_TILE_32                          = 0x1,
+	ADDR_CONFIG_GPU_TILE_64                          = 0x2,
+	ADDR_CONFIG_GPU_TILE_128                         = 0x3,
+} MultiGPUTileSize;
+typedef enum RowSize {
+	ADDR_CONFIG_1KB_ROW                              = 0x0,
+	ADDR_CONFIG_2KB_ROW                              = 0x1,
+	ADDR_CONFIG_4KB_ROW                              = 0x2,
+} RowSize;
+typedef enum NumLowerPipes {
+	ADDR_CONFIG_1_LOWER_PIPES                        = 0x0,
+	ADDR_CONFIG_2_LOWER_PIPES                        = 0x1,
+} NumLowerPipes;
+typedef enum DebugBlockId {
+	DBG_CLIENT_BLKID_RESERVED                        = 0x0,
+	DBG_CLIENT_BLKID_dbg                             = 0x1,
+	DBG_CLIENT_BLKID_uvdu_0                          = 0x2,
+	DBG_CLIENT_BLKID_uvdu_1                          = 0x3,
+	DBG_CLIENT_BLKID_uvdu_2                          = 0x4,
+	DBG_CLIENT_BLKID_uvdu_3                          = 0x5,
+	DBG_CLIENT_BLKID_uvdu_4                          = 0x6,
+	DBG_CLIENT_BLKID_uvdu_5                          = 0x7,
+	DBG_CLIENT_BLKID_uvdu_6                          = 0x8,
+	DBG_CLIENT_BLKID_uvdm_0                          = 0x9,
+	DBG_CLIENT_BLKID_uvdm_1                          = 0xa,
+	DBG_CLIENT_BLKID_uvdm_2                          = 0xb,
+	DBG_CLIENT_BLKID_uvdm_3                          = 0xc,
+	DBG_CLIENT_BLKID_vcea_0                          = 0xd,
+	DBG_CLIENT_BLKID_vcea_1                          = 0xe,
+	DBG_CLIENT_BLKID_vcea_2                          = 0xf,
+	DBG_CLIENT_BLKID_vcea_3                          = 0x10,
+	DBG_CLIENT_BLKID_vcea_4                          = 0x11,
+	DBG_CLIENT_BLKID_vcea_5                          = 0x12,
+	DBG_CLIENT_BLKID_vcea_6                          = 0x13,
+	DBG_CLIENT_BLKID_vceb_0                          = 0x14,
+	DBG_CLIENT_BLKID_vceb_1                          = 0x15,
+	DBG_CLIENT_BLKID_vceb_2                          = 0x16,
+	DBG_CLIENT_BLKID_dco                             = 0x17,
+	DBG_CLIENT_BLKID_xdma                            = 0x18,
+	DBG_CLIENT_BLKID_smu_0                           = 0x19,
+	DBG_CLIENT_BLKID_smu_1                           = 0x1a,
+	DBG_CLIENT_BLKID_smu_2                           = 0x1b,
+	DBG_CLIENT_BLKID_gck                             = 0x1c,
+	DBG_CLIENT_BLKID_tmonw0                          = 0x1d,
+	DBG_CLIENT_BLKID_tmonw1                          = 0x1e,
+	DBG_CLIENT_BLKID_grbm                            = 0x1f,
+	DBG_CLIENT_BLKID_rlc                             = 0x20,
+	DBG_CLIENT_BLKID_ds0                             = 0x21,
+	DBG_CLIENT_BLKID_cpg_0                           = 0x22,
+	DBG_CLIENT_BLKID_cpg_1                           = 0x23,
+	DBG_CLIENT_BLKID_cpc_0                           = 0x24,
+	DBG_CLIENT_BLKID_cpc_1                           = 0x25,
+	DBG_CLIENT_BLKID_cpf                             = 0x26,
+	DBG_CLIENT_BLKID_scf0                            = 0x27,
+	DBG_CLIENT_BLKID_scf1                            = 0x28,
+	DBG_CLIENT_BLKID_scf2                            = 0x29,
+	DBG_CLIENT_BLKID_scf3                            = 0x2a,
+	DBG_CLIENT_BLKID_pc0                             = 0x2b,
+	DBG_CLIENT_BLKID_pc1                             = 0x2c,
+	DBG_CLIENT_BLKID_pc2                             = 0x2d,
+	DBG_CLIENT_BLKID_pc3                             = 0x2e,
+	DBG_CLIENT_BLKID_vgt0                            = 0x2f,
+	DBG_CLIENT_BLKID_vgt1                            = 0x30,
+	DBG_CLIENT_BLKID_vgt2                            = 0x31,
+	DBG_CLIENT_BLKID_vgt3                            = 0x32,
+	DBG_CLIENT_BLKID_sx00                            = 0x33,
+	DBG_CLIENT_BLKID_sx10                            = 0x34,
+	DBG_CLIENT_BLKID_sx20                            = 0x35,
+	DBG_CLIENT_BLKID_sx30                            = 0x36,
+	DBG_CLIENT_BLKID_cb001                           = 0x37,
+	DBG_CLIENT_BLKID_cb200                           = 0x38,
+	DBG_CLIENT_BLKID_cb201                           = 0x39,
+	DBG_CLIENT_BLKID_cbr0                            = 0x3a,
+	DBG_CLIENT_BLKID_cb000                           = 0x3b,
+	DBG_CLIENT_BLKID_cb101                           = 0x3c,
+	DBG_CLIENT_BLKID_cb300                           = 0x3d,
+	DBG_CLIENT_BLKID_cb301                           = 0x3e,
+	DBG_CLIENT_BLKID_cbr1                            = 0x3f,
+	DBG_CLIENT_BLKID_cb100                           = 0x40,
+	DBG_CLIENT_BLKID_ia0                             = 0x41,
+	DBG_CLIENT_BLKID_ia1                             = 0x42,
+	DBG_CLIENT_BLKID_bci0                            = 0x43,
+	DBG_CLIENT_BLKID_bci1                            = 0x44,
+	DBG_CLIENT_BLKID_bci2                            = 0x45,
+	DBG_CLIENT_BLKID_bci3                            = 0x46,
+	DBG_CLIENT_BLKID_pa0                             = 0x47,
+	DBG_CLIENT_BLKID_pa1                             = 0x48,
+	DBG_CLIENT_BLKID_spim0                           = 0x49,
+	DBG_CLIENT_BLKID_spim1                           = 0x4a,
+	DBG_CLIENT_BLKID_spim2                           = 0x4b,
+	DBG_CLIENT_BLKID_spim3                           = 0x4c,
+	DBG_CLIENT_BLKID_sdma                            = 0x4d,
+	DBG_CLIENT_BLKID_ih                              = 0x4e,
+	DBG_CLIENT_BLKID_sem                             = 0x4f,
+	DBG_CLIENT_BLKID_srbm                            = 0x50,
+	DBG_CLIENT_BLKID_hdp                             = 0x51,
+	DBG_CLIENT_BLKID_acp_0                           = 0x52,
+	DBG_CLIENT_BLKID_acp_1                           = 0x53,
+	DBG_CLIENT_BLKID_sam                             = 0x54,
+	DBG_CLIENT_BLKID_mcc0                            = 0x55,
+	DBG_CLIENT_BLKID_mcc1                            = 0x56,
+	DBG_CLIENT_BLKID_mcc2                            = 0x57,
+	DBG_CLIENT_BLKID_mcc3                            = 0x58,
+	DBG_CLIENT_BLKID_mcd0                            = 0x59,
+	DBG_CLIENT_BLKID_mcd1                            = 0x5a,
+	DBG_CLIENT_BLKID_mcd2                            = 0x5b,
+	DBG_CLIENT_BLKID_mcd3                            = 0x5c,
+	DBG_CLIENT_BLKID_mcb                             = 0x5d,
+	DBG_CLIENT_BLKID_vmc                             = 0x5e,
+	DBG_CLIENT_BLKID_gmcon                           = 0x5f,
+	DBG_CLIENT_BLKID_gdc_0                           = 0x60,
+	DBG_CLIENT_BLKID_gdc_1                           = 0x61,
+	DBG_CLIENT_BLKID_gdc_2                           = 0x62,
+	DBG_CLIENT_BLKID_gdc_3                           = 0x63,
+	DBG_CLIENT_BLKID_gdc_4                           = 0x64,
+	DBG_CLIENT_BLKID_gdc_5                           = 0x65,
+	DBG_CLIENT_BLKID_gdc_6                           = 0x66,
+	DBG_CLIENT_BLKID_gdc_7                           = 0x67,
+	DBG_CLIENT_BLKID_gdc_8                           = 0x68,
+	DBG_CLIENT_BLKID_gdc_9                           = 0x69,
+	DBG_CLIENT_BLKID_gdc_10                          = 0x6a,
+	DBG_CLIENT_BLKID_gdc_11                          = 0x6b,
+	DBG_CLIENT_BLKID_gdc_12                          = 0x6c,
+	DBG_CLIENT_BLKID_gdc_13                          = 0x6d,
+	DBG_CLIENT_BLKID_gdc_14                          = 0x6e,
+	DBG_CLIENT_BLKID_gdc_15                          = 0x6f,
+	DBG_CLIENT_BLKID_gdc_16                          = 0x70,
+	DBG_CLIENT_BLKID_gdc_17                          = 0x71,
+	DBG_CLIENT_BLKID_gdc_18                          = 0x72,
+	DBG_CLIENT_BLKID_gdc_19                          = 0x73,
+	DBG_CLIENT_BLKID_gdc_20                          = 0x74,
+	DBG_CLIENT_BLKID_gdc_21                          = 0x75,
+	DBG_CLIENT_BLKID_gdc_22                          = 0x76,
+	DBG_CLIENT_BLKID_wd                              = 0x77,
+	DBG_CLIENT_BLKID_sdma_0                          = 0x78,
+	DBG_CLIENT_BLKID_sdma_1                          = 0x79,
+} DebugBlockId;
+typedef enum DebugBlockId_OLD {
+	DBG_BLOCK_ID_RESERVED                            = 0x0,
+	DBG_BLOCK_ID_DBG                                 = 0x1,
+	DBG_BLOCK_ID_VMC                                 = 0x2,
+	DBG_BLOCK_ID_PDMA                                = 0x3,
+	DBG_BLOCK_ID_CG                                  = 0x4,
+	DBG_BLOCK_ID_SRBM                                = 0x5,
+	DBG_BLOCK_ID_GRBM                                = 0x6,
+	DBG_BLOCK_ID_RLC                                 = 0x7,
+	DBG_BLOCK_ID_CSC                                 = 0x8,
+	DBG_BLOCK_ID_SEM                                 = 0x9,
+	DBG_BLOCK_ID_IH                                  = 0xa,
+	DBG_BLOCK_ID_SC                                  = 0xb,
+	DBG_BLOCK_ID_SQ                                  = 0xc,
+	DBG_BLOCK_ID_AVP                                 = 0xd,
+	DBG_BLOCK_ID_GMCON                               = 0xe,
+	DBG_BLOCK_ID_SMU                                 = 0xf,
+	DBG_BLOCK_ID_DMA0                                = 0x10,
+	DBG_BLOCK_ID_DMA1                                = 0x11,
+	DBG_BLOCK_ID_SPIM                                = 0x12,
+	DBG_BLOCK_ID_GDS                                 = 0x13,
+	DBG_BLOCK_ID_SPIS                                = 0x14,
+	DBG_BLOCK_ID_UNUSED0                             = 0x15,
+	DBG_BLOCK_ID_PA0                                 = 0x16,
+	DBG_BLOCK_ID_PA1                                 = 0x17,
+	DBG_BLOCK_ID_CP0                                 = 0x18,
+	DBG_BLOCK_ID_CP1                                 = 0x19,
+	DBG_BLOCK_ID_CP2                                 = 0x1a,
+	DBG_BLOCK_ID_UNUSED1                             = 0x1b,
+	DBG_BLOCK_ID_UVDU                                = 0x1c,
+	DBG_BLOCK_ID_UVDM                                = 0x1d,
+	DBG_BLOCK_ID_VCE                                 = 0x1e,
+	DBG_BLOCK_ID_UNUSED2                             = 0x1f,
+	DBG_BLOCK_ID_VGT0                                = 0x20,
+	DBG_BLOCK_ID_VGT1                                = 0x21,
+	DBG_BLOCK_ID_IA                                  = 0x22,
+	DBG_BLOCK_ID_UNUSED3                             = 0x23,
+	DBG_BLOCK_ID_SCT0                                = 0x24,
+	DBG_BLOCK_ID_SCT1                                = 0x25,
+	DBG_BLOCK_ID_SPM0                                = 0x26,
+	DBG_BLOCK_ID_SPM1                                = 0x27,
+	DBG_BLOCK_ID_TCAA                                = 0x28,
+	DBG_BLOCK_ID_TCAB                                = 0x29,
+	DBG_BLOCK_ID_TCCA                                = 0x2a,
+	DBG_BLOCK_ID_TCCB                                = 0x2b,
+	DBG_BLOCK_ID_MCC0                                = 0x2c,
+	DBG_BLOCK_ID_MCC1                                = 0x2d,
+	DBG_BLOCK_ID_MCC2                                = 0x2e,
+	DBG_BLOCK_ID_MCC3                                = 0x2f,
+	DBG_BLOCK_ID_SX0                                 = 0x30,
+	DBG_BLOCK_ID_SX1                                 = 0x31,
+	DBG_BLOCK_ID_SX2                                 = 0x32,
+	DBG_BLOCK_ID_SX3                                 = 0x33,
+	DBG_BLOCK_ID_UNUSED4                             = 0x34,
+	DBG_BLOCK_ID_UNUSED5                             = 0x35,
+	DBG_BLOCK_ID_UNUSED6                             = 0x36,
+	DBG_BLOCK_ID_UNUSED7                             = 0x37,
+	DBG_BLOCK_ID_PC0                                 = 0x38,
+	DBG_BLOCK_ID_PC1                                 = 0x39,
+	DBG_BLOCK_ID_UNUSED8                             = 0x3a,
+	DBG_BLOCK_ID_UNUSED9                             = 0x3b,
+	DBG_BLOCK_ID_UNUSED10                            = 0x3c,
+	DBG_BLOCK_ID_UNUSED11                            = 0x3d,
+	DBG_BLOCK_ID_MCB                                 = 0x3e,
+	DBG_BLOCK_ID_UNUSED12                            = 0x3f,
+	DBG_BLOCK_ID_SCB0                                = 0x40,
+	DBG_BLOCK_ID_SCB1                                = 0x41,
+	DBG_BLOCK_ID_UNUSED13                            = 0x42,
+	DBG_BLOCK_ID_UNUSED14                            = 0x43,
+	DBG_BLOCK_ID_SCF0                                = 0x44,
+	DBG_BLOCK_ID_SCF1                                = 0x45,
+	DBG_BLOCK_ID_UNUSED15                            = 0x46,
+	DBG_BLOCK_ID_UNUSED16                            = 0x47,
+	DBG_BLOCK_ID_BCI0                                = 0x48,
+	DBG_BLOCK_ID_BCI1                                = 0x49,
+	DBG_BLOCK_ID_BCI2                                = 0x4a,
+	DBG_BLOCK_ID_BCI3                                = 0x4b,
+	DBG_BLOCK_ID_UNUSED17                            = 0x4c,
+	DBG_BLOCK_ID_UNUSED18                            = 0x4d,
+	DBG_BLOCK_ID_UNUSED19                            = 0x4e,
+	DBG_BLOCK_ID_UNUSED20                            = 0x4f,
+	DBG_BLOCK_ID_CB00                                = 0x50,
+	DBG_BLOCK_ID_CB01                                = 0x51,
+	DBG_BLOCK_ID_CB02                                = 0x52,
+	DBG_BLOCK_ID_CB03                                = 0x53,
+	DBG_BLOCK_ID_CB04                                = 0x54,
+	DBG_BLOCK_ID_UNUSED21                            = 0x55,
+	DBG_BLOCK_ID_UNUSED22                            = 0x56,
+	DBG_BLOCK_ID_UNUSED23                            = 0x57,
+	DBG_BLOCK_ID_CB10                                = 0x58,
+	DBG_BLOCK_ID_CB11                                = 0x59,
+	DBG_BLOCK_ID_CB12                                = 0x5a,
+	DBG_BLOCK_ID_CB13                                = 0x5b,
+	DBG_BLOCK_ID_CB14                                = 0x5c,
+	DBG_BLOCK_ID_UNUSED24                            = 0x5d,
+	DBG_BLOCK_ID_UNUSED25                            = 0x5e,
+	DBG_BLOCK_ID_UNUSED26                            = 0x5f,
+	DBG_BLOCK_ID_TCP0                                = 0x60,
+	DBG_BLOCK_ID_TCP1                                = 0x61,
+	DBG_BLOCK_ID_TCP2                                = 0x62,
+	DBG_BLOCK_ID_TCP3                                = 0x63,
+	DBG_BLOCK_ID_TCP4                                = 0x64,
+	DBG_BLOCK_ID_TCP5                                = 0x65,
+	DBG_BLOCK_ID_TCP6                                = 0x66,
+	DBG_BLOCK_ID_TCP7                                = 0x67,
+	DBG_BLOCK_ID_TCP8                                = 0x68,
+	DBG_BLOCK_ID_TCP9                                = 0x69,
+	DBG_BLOCK_ID_TCP10                               = 0x6a,
+	DBG_BLOCK_ID_TCP11                               = 0x6b,
+	DBG_BLOCK_ID_TCP12                               = 0x6c,
+	DBG_BLOCK_ID_TCP13                               = 0x6d,
+	DBG_BLOCK_ID_TCP14                               = 0x6e,
+	DBG_BLOCK_ID_TCP15                               = 0x6f,
+	DBG_BLOCK_ID_TCP16                               = 0x70,
+	DBG_BLOCK_ID_TCP17                               = 0x71,
+	DBG_BLOCK_ID_TCP18                               = 0x72,
+	DBG_BLOCK_ID_TCP19                               = 0x73,
+	DBG_BLOCK_ID_TCP20                               = 0x74,
+	DBG_BLOCK_ID_TCP21                               = 0x75,
+	DBG_BLOCK_ID_TCP22                               = 0x76,
+	DBG_BLOCK_ID_TCP23                               = 0x77,
+	DBG_BLOCK_ID_TCP_RESERVED0                       = 0x78,
+	DBG_BLOCK_ID_TCP_RESERVED1                       = 0x79,
+	DBG_BLOCK_ID_TCP_RESERVED2                       = 0x7a,
+	DBG_BLOCK_ID_TCP_RESERVED3                       = 0x7b,
+	DBG_BLOCK_ID_TCP_RESERVED4                       = 0x7c,
+	DBG_BLOCK_ID_TCP_RESERVED5                       = 0x7d,
+	DBG_BLOCK_ID_TCP_RESERVED6                       = 0x7e,
+	DBG_BLOCK_ID_TCP_RESERVED7                       = 0x7f,
+	DBG_BLOCK_ID_DB00                                = 0x80,
+	DBG_BLOCK_ID_DB01                                = 0x81,
+	DBG_BLOCK_ID_DB02                                = 0x82,
+	DBG_BLOCK_ID_DB03                                = 0x83,
+	DBG_BLOCK_ID_DB04                                = 0x84,
+	DBG_BLOCK_ID_UNUSED27                            = 0x85,
+	DBG_BLOCK_ID_UNUSED28                            = 0x86,
+	DBG_BLOCK_ID_UNUSED29                            = 0x87,
+	DBG_BLOCK_ID_DB10                                = 0x88,
+	DBG_BLOCK_ID_DB11                                = 0x89,
+	DBG_BLOCK_ID_DB12                                = 0x8a,
+	DBG_BLOCK_ID_DB13                                = 0x8b,
+	DBG_BLOCK_ID_DB14                                = 0x8c,
+	DBG_BLOCK_ID_UNUSED30                            = 0x8d,
+	DBG_BLOCK_ID_UNUSED31                            = 0x8e,
+	DBG_BLOCK_ID_UNUSED32                            = 0x8f,
+	DBG_BLOCK_ID_TCC0                                = 0x90,
+	DBG_BLOCK_ID_TCC1                                = 0x91,
+	DBG_BLOCK_ID_TCC2                                = 0x92,
+	DBG_BLOCK_ID_TCC3                                = 0x93,
+	DBG_BLOCK_ID_TCC4                                = 0x94,
+	DBG_BLOCK_ID_TCC5                                = 0x95,
+	DBG_BLOCK_ID_TCC6                                = 0x96,
+	DBG_BLOCK_ID_TCC7                                = 0x97,
+	DBG_BLOCK_ID_SPS00                               = 0x98,
+	DBG_BLOCK_ID_SPS01                               = 0x99,
+	DBG_BLOCK_ID_SPS02                               = 0x9a,
+	DBG_BLOCK_ID_SPS10                               = 0x9b,
+	DBG_BLOCK_ID_SPS11                               = 0x9c,
+	DBG_BLOCK_ID_SPS12                               = 0x9d,
+	DBG_BLOCK_ID_UNUSED33                            = 0x9e,
+	DBG_BLOCK_ID_UNUSED34                            = 0x9f,
+	DBG_BLOCK_ID_TA00                                = 0xa0,
+	DBG_BLOCK_ID_TA01                                = 0xa1,
+	DBG_BLOCK_ID_TA02                                = 0xa2,
+	DBG_BLOCK_ID_TA03                                = 0xa3,
+	DBG_BLOCK_ID_TA04                                = 0xa4,
+	DBG_BLOCK_ID_TA05                                = 0xa5,
+	DBG_BLOCK_ID_TA06                                = 0xa6,
+	DBG_BLOCK_ID_TA07                                = 0xa7,
+	DBG_BLOCK_ID_TA08                                = 0xa8,
+	DBG_BLOCK_ID_TA09                                = 0xa9,
+	DBG_BLOCK_ID_TA0A                                = 0xaa,
+	DBG_BLOCK_ID_TA0B                                = 0xab,
+	DBG_BLOCK_ID_UNUSED35                            = 0xac,
+	DBG_BLOCK_ID_UNUSED36                            = 0xad,
+	DBG_BLOCK_ID_UNUSED37                            = 0xae,
+	DBG_BLOCK_ID_UNUSED38                            = 0xaf,
+	DBG_BLOCK_ID_TA10                                = 0xb0,
+	DBG_BLOCK_ID_TA11                                = 0xb1,
+	DBG_BLOCK_ID_TA12                                = 0xb2,
+	DBG_BLOCK_ID_TA13                                = 0xb3,
+	DBG_BLOCK_ID_TA14                                = 0xb4,
+	DBG_BLOCK_ID_TA15                                = 0xb5,
+	DBG_BLOCK_ID_TA16                                = 0xb6,
+	DBG_BLOCK_ID_TA17                                = 0xb7,
+	DBG_BLOCK_ID_TA18                                = 0xb8,
+	DBG_BLOCK_ID_TA19                                = 0xb9,
+	DBG_BLOCK_ID_TA1A                                = 0xba,
+	DBG_BLOCK_ID_TA1B                                = 0xbb,
+	DBG_BLOCK_ID_UNUSED39                            = 0xbc,
+	DBG_BLOCK_ID_UNUSED40                            = 0xbd,
+	DBG_BLOCK_ID_UNUSED41                            = 0xbe,
+	DBG_BLOCK_ID_UNUSED42                            = 0xbf,
+	DBG_BLOCK_ID_TD00                                = 0xc0,
+	DBG_BLOCK_ID_TD01                                = 0xc1,
+	DBG_BLOCK_ID_TD02                                = 0xc2,
+	DBG_BLOCK_ID_TD03                                = 0xc3,
+	DBG_BLOCK_ID_TD04                                = 0xc4,
+	DBG_BLOCK_ID_TD05                                = 0xc5,
+	DBG_BLOCK_ID_TD06                                = 0xc6,
+	DBG_BLOCK_ID_TD07                                = 0xc7,
+	DBG_BLOCK_ID_TD08                                = 0xc8,
+	DBG_BLOCK_ID_TD09                                = 0xc9,
+	DBG_BLOCK_ID_TD0A                                = 0xca,
+	DBG_BLOCK_ID_TD0B                                = 0xcb,
+	DBG_BLOCK_ID_UNUSED43                            = 0xcc,
+	DBG_BLOCK_ID_UNUSED44                            = 0xcd,
+	DBG_BLOCK_ID_UNUSED45                            = 0xce,
+	DBG_BLOCK_ID_UNUSED46                            = 0xcf,
+	DBG_BLOCK_ID_TD10                                = 0xd0,
+	DBG_BLOCK_ID_TD11                                = 0xd1,
+	DBG_BLOCK_ID_TD12                                = 0xd2,
+	DBG_BLOCK_ID_TD13                                = 0xd3,
+	DBG_BLOCK_ID_TD14                                = 0xd4,
+	DBG_BLOCK_ID_TD15                                = 0xd5,
+	DBG_BLOCK_ID_TD16                                = 0xd6,
+	DBG_BLOCK_ID_TD17                                = 0xd7,
+	DBG_BLOCK_ID_TD18                                = 0xd8,
+	DBG_BLOCK_ID_TD19                                = 0xd9,
+	DBG_BLOCK_ID_TD1A                                = 0xda,
+	DBG_BLOCK_ID_TD1B                                = 0xdb,
+	DBG_BLOCK_ID_UNUSED47                            = 0xdc,
+	DBG_BLOCK_ID_UNUSED48                            = 0xdd,
+	DBG_BLOCK_ID_UNUSED49                            = 0xde,
+	DBG_BLOCK_ID_UNUSED50                            = 0xdf,
+	DBG_BLOCK_ID_MCD0                                = 0xe0,
+	DBG_BLOCK_ID_MCD1                                = 0xe1,
+	DBG_BLOCK_ID_MCD2                                = 0xe2,
+	DBG_BLOCK_ID_MCD3                                = 0xe3,
+	DBG_BLOCK_ID_MCD4                                = 0xe4,
+	DBG_BLOCK_ID_MCD5                                = 0xe5,
+	DBG_BLOCK_ID_UNUSED51                            = 0xe6,
+	DBG_BLOCK_ID_UNUSED52                            = 0xe7,
+} DebugBlockId_OLD;
+typedef enum DebugBlockId_BY2 {
+	DBG_BLOCK_ID_RESERVED_BY2                        = 0x0,
+	DBG_BLOCK_ID_VMC_BY2                             = 0x1,
+	DBG_BLOCK_ID_CG_BY2                              = 0x2,
+	DBG_BLOCK_ID_GRBM_BY2                            = 0x3,
+	DBG_BLOCK_ID_CSC_BY2                             = 0x4,
+	DBG_BLOCK_ID_IH_BY2                              = 0x5,
+	DBG_BLOCK_ID_SQ_BY2                              = 0x6,
+	DBG_BLOCK_ID_GMCON_BY2                           = 0x7,
+	DBG_BLOCK_ID_DMA0_BY2                            = 0x8,
+	DBG_BLOCK_ID_SPIM_BY2                            = 0x9,
+	DBG_BLOCK_ID_SPIS_BY2                            = 0xa,
+	DBG_BLOCK_ID_PA0_BY2                             = 0xb,
+	DBG_BLOCK_ID_CP0_BY2                             = 0xc,
+	DBG_BLOCK_ID_CP2_BY2                             = 0xd,
+	DBG_BLOCK_ID_UVDU_BY2                            = 0xe,
+	DBG_BLOCK_ID_VCE_BY2                             = 0xf,
+	DBG_BLOCK_ID_VGT0_BY2                            = 0x10,
+	DBG_BLOCK_ID_IA_BY2                              = 0x11,
+	DBG_BLOCK_ID_SCT0_BY2                            = 0x12,
+	DBG_BLOCK_ID_SPM0_BY2                            = 0x13,
+	DBG_BLOCK_ID_TCAA_BY2                            = 0x14,
+	DBG_BLOCK_ID_TCCA_BY2                            = 0x15,
+	DBG_BLOCK_ID_MCC0_BY2                            = 0x16,
+	DBG_BLOCK_ID_MCC2_BY2                            = 0x17,
+	DBG_BLOCK_ID_SX0_BY2                             = 0x18,
+	DBG_BLOCK_ID_SX2_BY2                             = 0x19,
+	DBG_BLOCK_ID_UNUSED4_BY2                         = 0x1a,
+	DBG_BLOCK_ID_UNUSED6_BY2                         = 0x1b,
+	DBG_BLOCK_ID_PC0_BY2                             = 0x1c,
+	DBG_BLOCK_ID_UNUSED8_BY2                         = 0x1d,
+	DBG_BLOCK_ID_UNUSED10_BY2                        = 0x1e,
+	DBG_BLOCK_ID_MCB_BY2                             = 0x1f,
+	DBG_BLOCK_ID_SCB0_BY2                            = 0x20,
+	DBG_BLOCK_ID_UNUSED13_BY2                        = 0x21,
+	DBG_BLOCK_ID_SCF0_BY2                            = 0x22,
+	DBG_BLOCK_ID_UNUSED15_BY2                        = 0x23,
+	DBG_BLOCK_ID_BCI0_BY2                            = 0x24,
+	DBG_BLOCK_ID_BCI2_BY2                            = 0x25,
+	DBG_BLOCK_ID_UNUSED17_BY2                        = 0x26,
+	DBG_BLOCK_ID_UNUSED19_BY2                        = 0x27,
+	DBG_BLOCK_ID_CB00_BY2                            = 0x28,
+	DBG_BLOCK_ID_CB02_BY2                            = 0x29,
+	DBG_BLOCK_ID_CB04_BY2                            = 0x2a,
+	DBG_BLOCK_ID_UNUSED22_BY2                        = 0x2b,
+	DBG_BLOCK_ID_CB10_BY2                            = 0x2c,
+	DBG_BLOCK_ID_CB12_BY2                            = 0x2d,
+	DBG_BLOCK_ID_CB14_BY2                            = 0x2e,
+	DBG_BLOCK_ID_UNUSED25_BY2                        = 0x2f,
+	DBG_BLOCK_ID_TCP0_BY2                            = 0x30,
+	DBG_BLOCK_ID_TCP2_BY2                            = 0x31,
+	DBG_BLOCK_ID_TCP4_BY2                            = 0x32,
+	DBG_BLOCK_ID_TCP6_BY2                            = 0x33,
+	DBG_BLOCK_ID_TCP8_BY2                            = 0x34,
+	DBG_BLOCK_ID_TCP10_BY2                           = 0x35,
+	DBG_BLOCK_ID_TCP12_BY2                           = 0x36,
+	DBG_BLOCK_ID_TCP14_BY2                           = 0x37,
+	DBG_BLOCK_ID_TCP16_BY2                           = 0x38,
+	DBG_BLOCK_ID_TCP18_BY2                           = 0x39,
+	DBG_BLOCK_ID_TCP20_BY2                           = 0x3a,
+	DBG_BLOCK_ID_TCP22_BY2                           = 0x3b,
+	DBG_BLOCK_ID_TCP_RESERVED0_BY2                   = 0x3c,
+	DBG_BLOCK_ID_TCP_RESERVED2_BY2                   = 0x3d,
+	DBG_BLOCK_ID_TCP_RESERVED4_BY2                   = 0x3e,
+	DBG_BLOCK_ID_TCP_RESERVED6_BY2                   = 0x3f,
+	DBG_BLOCK_ID_DB00_BY2                            = 0x40,
+	DBG_BLOCK_ID_DB02_BY2                            = 0x41,
+	DBG_BLOCK_ID_DB04_BY2                            = 0x42,
+	DBG_BLOCK_ID_UNUSED28_BY2                        = 0x43,
+	DBG_BLOCK_ID_DB10_BY2                            = 0x44,
+	DBG_BLOCK_ID_DB12_BY2                            = 0x45,
+	DBG_BLOCK_ID_DB14_BY2                            = 0x46,
+	DBG_BLOCK_ID_UNUSED31_BY2                        = 0x47,
+	DBG_BLOCK_ID_TCC0_BY2                            = 0x48,
+	DBG_BLOCK_ID_TCC2_BY2                            = 0x49,
+	DBG_BLOCK_ID_TCC4_BY2                            = 0x4a,
+	DBG_BLOCK_ID_TCC6_BY2                            = 0x4b,
+	DBG_BLOCK_ID_SPS00_BY2                           = 0x4c,
+	DBG_BLOCK_ID_SPS02_BY2                           = 0x4d,
+	DBG_BLOCK_ID_SPS11_BY2                           = 0x4e,
+	DBG_BLOCK_ID_UNUSED33_BY2                        = 0x4f,
+	DBG_BLOCK_ID_TA00_BY2                            = 0x50,
+	DBG_BLOCK_ID_TA02_BY2                            = 0x51,
+	DBG_BLOCK_ID_TA04_BY2                            = 0x52,
+	DBG_BLOCK_ID_TA06_BY2                            = 0x53,
+	DBG_BLOCK_ID_TA08_BY2                            = 0x54,
+	DBG_BLOCK_ID_TA0A_BY2                            = 0x55,
+	DBG_BLOCK_ID_UNUSED35_BY2                        = 0x56,
+	DBG_BLOCK_ID_UNUSED37_BY2                        = 0x57,
+	DBG_BLOCK_ID_TA10_BY2                            = 0x58,
+	DBG_BLOCK_ID_TA12_BY2                            = 0x59,
+	DBG_BLOCK_ID_TA14_BY2                            = 0x5a,
+	DBG_BLOCK_ID_TA16_BY2                            = 0x5b,
+	DBG_BLOCK_ID_TA18_BY2                            = 0x5c,
+	DBG_BLOCK_ID_TA1A_BY2                            = 0x5d,
+	DBG_BLOCK_ID_UNUSED39_BY2                        = 0x5e,
+	DBG_BLOCK_ID_UNUSED41_BY2                        = 0x5f,
+	DBG_BLOCK_ID_TD00_BY2                            = 0x60,
+	DBG_BLOCK_ID_TD02_BY2                            = 0x61,
+	DBG_BLOCK_ID_TD04_BY2                            = 0x62,
+	DBG_BLOCK_ID_TD06_BY2                            = 0x63,
+	DBG_BLOCK_ID_TD08_BY2                            = 0x64,
+	DBG_BLOCK_ID_TD0A_BY2                            = 0x65,
+	DBG_BLOCK_ID_UNUSED43_BY2                        = 0x66,
+	DBG_BLOCK_ID_UNUSED45_BY2                        = 0x67,
+	DBG_BLOCK_ID_TD10_BY2                            = 0x68,
+	DBG_BLOCK_ID_TD12_BY2                            = 0x69,
+	DBG_BLOCK_ID_TD14_BY2                            = 0x6a,
+	DBG_BLOCK_ID_TD16_BY2                            = 0x6b,
+	DBG_BLOCK_ID_TD18_BY2                            = 0x6c,
+	DBG_BLOCK_ID_TD1A_BY2                            = 0x6d,
+	DBG_BLOCK_ID_UNUSED47_BY2                        = 0x6e,
+	DBG_BLOCK_ID_UNUSED49_BY2                        = 0x6f,
+	DBG_BLOCK_ID_MCD0_BY2                            = 0x70,
+	DBG_BLOCK_ID_MCD2_BY2                            = 0x71,
+	DBG_BLOCK_ID_MCD4_BY2                            = 0x72,
+	DBG_BLOCK_ID_UNUSED51_BY2                        = 0x73,
+} DebugBlockId_BY2;
+typedef enum DebugBlockId_BY4 {
+	DBG_BLOCK_ID_RESERVED_BY4                        = 0x0,
+	DBG_BLOCK_ID_CG_BY4                              = 0x1,
+	DBG_BLOCK_ID_CSC_BY4                             = 0x2,
+	DBG_BLOCK_ID_SQ_BY4                              = 0x3,
+	DBG_BLOCK_ID_DMA0_BY4                            = 0x4,
+	DBG_BLOCK_ID_SPIS_BY4                            = 0x5,
+	DBG_BLOCK_ID_CP0_BY4                             = 0x6,
+	DBG_BLOCK_ID_UVDU_BY4                            = 0x7,
+	DBG_BLOCK_ID_VGT0_BY4                            = 0x8,
+	DBG_BLOCK_ID_SCT0_BY4                            = 0x9,
+	DBG_BLOCK_ID_TCAA_BY4                            = 0xa,
+	DBG_BLOCK_ID_MCC0_BY4                            = 0xb,
+	DBG_BLOCK_ID_SX0_BY4                             = 0xc,
+	DBG_BLOCK_ID_UNUSED4_BY4                         = 0xd,
+	DBG_BLOCK_ID_PC0_BY4                             = 0xe,
+	DBG_BLOCK_ID_UNUSED10_BY4                        = 0xf,
+	DBG_BLOCK_ID_SCB0_BY4                            = 0x10,
+	DBG_BLOCK_ID_SCF0_BY4                            = 0x11,
+	DBG_BLOCK_ID_BCI0_BY4                            = 0x12,
+	DBG_BLOCK_ID_UNUSED17_BY4                        = 0x13,
+	DBG_BLOCK_ID_CB00_BY4                            = 0x14,
+	DBG_BLOCK_ID_CB04_BY4                            = 0x15,
+	DBG_BLOCK_ID_CB10_BY4                            = 0x16,
+	DBG_BLOCK_ID_CB14_BY4                            = 0x17,
+	DBG_BLOCK_ID_TCP0_BY4                            = 0x18,
+	DBG_BLOCK_ID_TCP4_BY4                            = 0x19,
+	DBG_BLOCK_ID_TCP8_BY4                            = 0x1a,
+	DBG_BLOCK_ID_TCP12_BY4                           = 0x1b,
+	DBG_BLOCK_ID_TCP16_BY4                           = 0x1c,
+	DBG_BLOCK_ID_TCP20_BY4                           = 0x1d,
+	DBG_BLOCK_ID_TCP_RESERVED0_BY4                   = 0x1e,
+	DBG_BLOCK_ID_TCP_RESERVED4_BY4                   = 0x1f,
+	DBG_BLOCK_ID_DB_BY4                              = 0x20,
+	DBG_BLOCK_ID_DB04_BY4                            = 0x21,
+	DBG_BLOCK_ID_DB10_BY4                            = 0x22,
+	DBG_BLOCK_ID_DB14_BY4                            = 0x23,
+	DBG_BLOCK_ID_TCC0_BY4                            = 0x24,
+	DBG_BLOCK_ID_TCC4_BY4                            = 0x25,
+	DBG_BLOCK_ID_SPS00_BY4                           = 0x26,
+	DBG_BLOCK_ID_SPS11_BY4                           = 0x27,
+	DBG_BLOCK_ID_TA00_BY4                            = 0x28,
+	DBG_BLOCK_ID_TA04_BY4                            = 0x29,
+	DBG_BLOCK_ID_TA08_BY4                            = 0x2a,
+	DBG_BLOCK_ID_UNUSED35_BY4                        = 0x2b,
+	DBG_BLOCK_ID_TA10_BY4                            = 0x2c,
+	DBG_BLOCK_ID_TA14_BY4                            = 0x2d,
+	DBG_BLOCK_ID_TA18_BY4                            = 0x2e,
+	DBG_BLOCK_ID_UNUSED39_BY4                        = 0x2f,
+	DBG_BLOCK_ID_TD00_BY4                            = 0x30,
+	DBG_BLOCK_ID_TD04_BY4                            = 0x31,
+	DBG_BLOCK_ID_TD08_BY4                            = 0x32,
+	DBG_BLOCK_ID_UNUSED43_BY4                        = 0x33,
+	DBG_BLOCK_ID_TD10_BY4                            = 0x34,
+	DBG_BLOCK_ID_TD14_BY4                            = 0x35,
+	DBG_BLOCK_ID_TD18_BY4                            = 0x36,
+	DBG_BLOCK_ID_UNUSED47_BY4                        = 0x37,
+	DBG_BLOCK_ID_MCD0_BY4                            = 0x38,
+	DBG_BLOCK_ID_MCD4_BY4                            = 0x39,
+} DebugBlockId_BY4;
+typedef enum DebugBlockId_BY8 {
+	DBG_BLOCK_ID_RESERVED_BY8                        = 0x0,
+	DBG_BLOCK_ID_CSC_BY8                             = 0x1,
+	DBG_BLOCK_ID_DMA0_BY8                            = 0x2,
+	DBG_BLOCK_ID_CP0_BY8                             = 0x3,
+	DBG_BLOCK_ID_VGT0_BY8                            = 0x4,
+	DBG_BLOCK_ID_TCAA_BY8                            = 0x5,
+	DBG_BLOCK_ID_SX0_BY8                             = 0x6,
+	DBG_BLOCK_ID_PC0_BY8                             = 0x7,
+	DBG_BLOCK_ID_SCB0_BY8                            = 0x8,
+	DBG_BLOCK_ID_BCI0_BY8                            = 0x9,
+	DBG_BLOCK_ID_CB00_BY8                            = 0xa,
+	DBG_BLOCK_ID_CB10_BY8                            = 0xb,
+	DBG_BLOCK_ID_TCP0_BY8                            = 0xc,
+	DBG_BLOCK_ID_TCP8_BY8                            = 0xd,
+	DBG_BLOCK_ID_TCP16_BY8                           = 0xe,
+	DBG_BLOCK_ID_TCP_RESERVED0_BY8                   = 0xf,
+	DBG_BLOCK_ID_DB00_BY8                            = 0x10,
+	DBG_BLOCK_ID_DB10_BY8                            = 0x11,
+	DBG_BLOCK_ID_TCC0_BY8                            = 0x12,
+	DBG_BLOCK_ID_SPS00_BY8                           = 0x13,
+	DBG_BLOCK_ID_TA00_BY8                            = 0x14,
+	DBG_BLOCK_ID_TA08_BY8                            = 0x15,
+	DBG_BLOCK_ID_TA10_BY8                            = 0x16,
+	DBG_BLOCK_ID_TA18_BY8                            = 0x17,
+	DBG_BLOCK_ID_TD00_BY8                            = 0x18,
+	DBG_BLOCK_ID_TD08_BY8                            = 0x19,
+	DBG_BLOCK_ID_TD10_BY8                            = 0x1a,
+	DBG_BLOCK_ID_TD18_BY8                            = 0x1b,
+	DBG_BLOCK_ID_MCD0_BY8                            = 0x1c,
+} DebugBlockId_BY8;
+typedef enum DebugBlockId_BY16 {
+	DBG_BLOCK_ID_RESERVED_BY16                       = 0x0,
+	DBG_BLOCK_ID_DMA0_BY16                           = 0x1,
+	DBG_BLOCK_ID_VGT0_BY16                           = 0x2,
+	DBG_BLOCK_ID_SX0_BY16                            = 0x3,
+	DBG_BLOCK_ID_SCB0_BY16                           = 0x4,
+	DBG_BLOCK_ID_CB00_BY16                           = 0x5,
+	DBG_BLOCK_ID_TCP0_BY16                           = 0x6,
+	DBG_BLOCK_ID_TCP16_BY16                          = 0x7,
+	DBG_BLOCK_ID_DB00_BY16                           = 0x8,
+	DBG_BLOCK_ID_TCC0_BY16                           = 0x9,
+	DBG_BLOCK_ID_TA00_BY16                           = 0xa,
+	DBG_BLOCK_ID_TA10_BY16                           = 0xb,
+	DBG_BLOCK_ID_TD00_BY16                           = 0xc,
+	DBG_BLOCK_ID_TD10_BY16                           = 0xd,
+	DBG_BLOCK_ID_MCD0_BY16                           = 0xe,
+} DebugBlockId_BY16;
+typedef enum CompareRef {
+	REF_NEVER                                        = 0x0,
+	REF_LESS                                         = 0x1,
+	REF_EQUAL                                        = 0x2,
+	REF_LEQUAL                                       = 0x3,
+	REF_GREATER                                      = 0x4,
+	REF_NOTEQUAL                                     = 0x5,
+	REF_GEQUAL                                       = 0x6,
+	REF_ALWAYS                                       = 0x7,
+} CompareRef;
+typedef enum ReadSize {
+	READ_256_BITS                                    = 0x0,
+	READ_512_BITS                                    = 0x1,
+} ReadSize;
+typedef enum DepthFormat {
+	DEPTH_INVALID                                    = 0x0,
+	DEPTH_16                                         = 0x1,
+	DEPTH_X8_24                                      = 0x2,
+	DEPTH_8_24                                       = 0x3,
+	DEPTH_X8_24_FLOAT                                = 0x4,
+	DEPTH_8_24_FLOAT                                 = 0x5,
+	DEPTH_32_FLOAT                                   = 0x6,
+	DEPTH_X24_8_32_FLOAT                             = 0x7,
+} DepthFormat;
+typedef enum ZFormat {
+	Z_INVALID                                        = 0x0,
+	Z_16                                             = 0x1,
+	Z_24                                             = 0x2,
+	Z_32_FLOAT                                       = 0x3,
+} ZFormat;
+typedef enum StencilFormat {
+	STENCIL_INVALID                                  = 0x0,
+	STENCIL_8                                        = 0x1,
+} StencilFormat;
+typedef enum CmaskMode {
+	CMASK_CLEAR_NONE                                 = 0x0,
+	CMASK_CLEAR_ONE                                  = 0x1,
+	CMASK_CLEAR_ALL                                  = 0x2,
+	CMASK_ANY_EXPANDED                               = 0x3,
+	CMASK_ALPHA0_FRAG1                               = 0x4,
+	CMASK_ALPHA0_FRAG2                               = 0x5,
+	CMASK_ALPHA0_FRAG4                               = 0x6,
+	CMASK_ALPHA0_FRAGS                               = 0x7,
+	CMASK_ALPHA1_FRAG1                               = 0x8,
+	CMASK_ALPHA1_FRAG2                               = 0x9,
+	CMASK_ALPHA1_FRAG4                               = 0xa,
+	CMASK_ALPHA1_FRAGS                               = 0xb,
+	CMASK_ALPHAX_FRAG1                               = 0xc,
+	CMASK_ALPHAX_FRAG2                               = 0xd,
+	CMASK_ALPHAX_FRAG4                               = 0xe,
+	CMASK_ALPHAX_FRAGS                               = 0xf,
+} CmaskMode;
+typedef enum QuadExportFormat {
+	EXPORT_UNUSED                                    = 0x0,
+	EXPORT_32_R                                      = 0x1,
+	EXPORT_32_GR                                     = 0x2,
+	EXPORT_32_AR                                     = 0x3,
+	EXPORT_FP16_ABGR                                 = 0x4,
+	EXPORT_UNSIGNED16_ABGR                           = 0x5,
+	EXPORT_SIGNED16_ABGR                             = 0x6,
+	EXPORT_32_ABGR                                   = 0x7,
+} QuadExportFormat;
+typedef enum QuadExportFormatOld {
+	EXPORT_4P_32BPC_ABGR                             = 0x0,
+	EXPORT_4P_16BPC_ABGR                             = 0x1,
+	EXPORT_4P_32BPC_GR                               = 0x2,
+	EXPORT_4P_32BPC_AR                               = 0x3,
+	EXPORT_2P_32BPC_ABGR                             = 0x4,
+	EXPORT_8P_32BPC_R                                = 0x5,
+} QuadExportFormatOld;
+typedef enum ColorFormat {
+	COLOR_INVALID                                    = 0x0,
+	COLOR_8                                          = 0x1,
+	COLOR_16                                         = 0x2,
+	COLOR_8_8                                        = 0x3,
+	COLOR_32                                         = 0x4,
+	COLOR_16_16                                      = 0x5,
+	COLOR_10_11_11                                   = 0x6,
+	COLOR_11_11_10                                   = 0x7,
+	COLOR_10_10_10_2                                 = 0x8,
+	COLOR_2_10_10_10                                 = 0x9,
+	COLOR_8_8_8_8                                    = 0xa,
+	COLOR_32_32                                      = 0xb,
+	COLOR_16_16_16_16                                = 0xc,
+	COLOR_RESERVED_13                                = 0xd,
+	COLOR_32_32_32_32                                = 0xe,
+	COLOR_RESERVED_15                                = 0xf,
+	COLOR_5_6_5                                      = 0x10,
+	COLOR_1_5_5_5                                    = 0x11,
+	COLOR_5_5_5_1                                    = 0x12,
+	COLOR_4_4_4_4                                    = 0x13,
+	COLOR_8_24                                       = 0x14,
+	COLOR_24_8                                       = 0x15,
+	COLOR_X24_8_32_FLOAT                             = 0x16,
+	COLOR_RESERVED_23                                = 0x17,
+} ColorFormat;
+typedef enum SurfaceFormat {
+	FMT_INVALID                                      = 0x0,
+	FMT_8                                            = 0x1,
+	FMT_16                                           = 0x2,
+	FMT_8_8                                          = 0x3,
+	FMT_32                                           = 0x4,
+	FMT_16_16                                        = 0x5,
+	FMT_10_11_11                                     = 0x6,
+	FMT_11_11_10                                     = 0x7,
+	FMT_10_10_10_2                                   = 0x8,
+	FMT_2_10_10_10                                   = 0x9,
+	FMT_8_8_8_8                                      = 0xa,
+	FMT_32_32                                        = 0xb,
+	FMT_16_16_16_16                                  = 0xc,
+	FMT_32_32_32                                     = 0xd,
+	FMT_32_32_32_32                                  = 0xe,
+	FMT_RESERVED_4                                   = 0xf,
+	FMT_5_6_5                                        = 0x10,
+	FMT_1_5_5_5                                      = 0x11,
+	FMT_5_5_5_1                                      = 0x12,
+	FMT_4_4_4_4                                      = 0x13,
+	FMT_8_24                                         = 0x14,
+	FMT_24_8                                         = 0x15,
+	FMT_X24_8_32_FLOAT                               = 0x16,
+	FMT_RESERVED_33                                  = 0x17,
+	FMT_11_11_10_FLOAT                               = 0x18,
+	FMT_16_FLOAT                                     = 0x19,
+	FMT_32_FLOAT                                     = 0x1a,
+	FMT_16_16_FLOAT                                  = 0x1b,
+	FMT_8_24_FLOAT                                   = 0x1c,
+	FMT_24_8_FLOAT                                   = 0x1d,
+	FMT_32_32_FLOAT                                  = 0x1e,
+	FMT_10_11_11_FLOAT                               = 0x1f,
+	FMT_16_16_16_16_FLOAT                            = 0x20,
+	FMT_3_3_2                                        = 0x21,
+	FMT_6_5_5                                        = 0x22,
+	FMT_32_32_32_32_FLOAT                            = 0x23,
+	FMT_RESERVED_36                                  = 0x24,
+	FMT_1                                            = 0x25,
+	FMT_1_REVERSED                                   = 0x26,
+	FMT_GB_GR                                        = 0x27,
+	FMT_BG_RG                                        = 0x28,
+	FMT_32_AS_8                                      = 0x29,
+	FMT_32_AS_8_8                                    = 0x2a,
+	FMT_5_9_9_9_SHAREDEXP                            = 0x2b,
+	FMT_8_8_8                                        = 0x2c,
+	FMT_16_16_16                                     = 0x2d,
+	FMT_16_16_16_FLOAT                               = 0x2e,
+	FMT_4_4                                          = 0x2f,
+	FMT_32_32_32_FLOAT                               = 0x30,
+	FMT_BC1                                          = 0x31,
+	FMT_BC2                                          = 0x32,
+	FMT_BC3                                          = 0x33,
+	FMT_BC4                                          = 0x34,
+	FMT_BC5                                          = 0x35,
+	FMT_BC6                                          = 0x36,
+	FMT_BC7                                          = 0x37,
+	FMT_32_AS_32_32_32_32                            = 0x38,
+	FMT_APC3                                         = 0x39,
+	FMT_APC4                                         = 0x3a,
+	FMT_APC5                                         = 0x3b,
+	FMT_APC6                                         = 0x3c,
+	FMT_APC7                                         = 0x3d,
+	FMT_CTX1                                         = 0x3e,
+	FMT_RESERVED_63                                  = 0x3f,
+} SurfaceFormat;
+typedef enum BUF_DATA_FORMAT {
+	BUF_DATA_FORMAT_INVALID                          = 0x0,
+	BUF_DATA_FORMAT_8                                = 0x1,
+	BUF_DATA_FORMAT_16                               = 0x2,
+	BUF_DATA_FORMAT_8_8                              = 0x3,
+	BUF_DATA_FORMAT_32                               = 0x4,
+	BUF_DATA_FORMAT_16_16                            = 0x5,
+	BUF_DATA_FORMAT_10_11_11                         = 0x6,
+	BUF_DATA_FORMAT_11_11_10                         = 0x7,
+	BUF_DATA_FORMAT_10_10_10_2                       = 0x8,
+	BUF_DATA_FORMAT_2_10_10_10                       = 0x9,
+	BUF_DATA_FORMAT_8_8_8_8                          = 0xa,
+	BUF_DATA_FORMAT_32_32                            = 0xb,
+	BUF_DATA_FORMAT_16_16_16_16                      = 0xc,
+	BUF_DATA_FORMAT_32_32_32                         = 0xd,
+	BUF_DATA_FORMAT_32_32_32_32                      = 0xe,
+	BUF_DATA_FORMAT_RESERVED_15                      = 0xf,
+} BUF_DATA_FORMAT;
+typedef enum IMG_DATA_FORMAT {
+	IMG_DATA_FORMAT_INVALID                          = 0x0,
+	IMG_DATA_FORMAT_8                                = 0x1,
+	IMG_DATA_FORMAT_16                               = 0x2,
+	IMG_DATA_FORMAT_8_8                              = 0x3,
+	IMG_DATA_FORMAT_32                               = 0x4,
+	IMG_DATA_FORMAT_16_16                            = 0x5,
+	IMG_DATA_FORMAT_10_11_11                         = 0x6,
+	IMG_DATA_FORMAT_11_11_10                         = 0x7,
+	IMG_DATA_FORMAT_10_10_10_2                       = 0x8,
+	IMG_DATA_FORMAT_2_10_10_10                       = 0x9,
+	IMG_DATA_FORMAT_8_8_8_8                          = 0xa,
+	IMG_DATA_FORMAT_32_32                            = 0xb,
+	IMG_DATA_FORMAT_16_16_16_16                      = 0xc,
+	IMG_DATA_FORMAT_32_32_32                         = 0xd,
+	IMG_DATA_FORMAT_32_32_32_32                      = 0xe,
+	IMG_DATA_FORMAT_RESERVED_15                      = 0xf,
+	IMG_DATA_FORMAT_5_6_5                            = 0x10,
+	IMG_DATA_FORMAT_1_5_5_5                          = 0x11,
+	IMG_DATA_FORMAT_5_5_5_1                          = 0x12,
+	IMG_DATA_FORMAT_4_4_4_4                          = 0x13,
+	IMG_DATA_FORMAT_8_24                             = 0x14,
+	IMG_DATA_FORMAT_24_8                             = 0x15,
+	IMG_DATA_FORMAT_X24_8_32                         = 0x16,
+	IMG_DATA_FORMAT_RESERVED_23                      = 0x17,
+	IMG_DATA_FORMAT_RESERVED_24                      = 0x18,
+	IMG_DATA_FORMAT_RESERVED_25                      = 0x19,
+	IMG_DATA_FORMAT_RESERVED_26                      = 0x1a,
+	IMG_DATA_FORMAT_RESERVED_27                      = 0x1b,
+	IMG_DATA_FORMAT_RESERVED_28                      = 0x1c,
+	IMG_DATA_FORMAT_RESERVED_29                      = 0x1d,
+	IMG_DATA_FORMAT_RESERVED_30                      = 0x1e,
+	IMG_DATA_FORMAT_RESERVED_31                      = 0x1f,
+	IMG_DATA_FORMAT_GB_GR                            = 0x20,
+	IMG_DATA_FORMAT_BG_RG                            = 0x21,
+	IMG_DATA_FORMAT_5_9_9_9                          = 0x22,
+	IMG_DATA_FORMAT_BC1                              = 0x23,
+	IMG_DATA_FORMAT_BC2                              = 0x24,
+	IMG_DATA_FORMAT_BC3                              = 0x25,
+	IMG_DATA_FORMAT_BC4                              = 0x26,
+	IMG_DATA_FORMAT_BC5                              = 0x27,
+	IMG_DATA_FORMAT_BC6                              = 0x28,
+	IMG_DATA_FORMAT_BC7                              = 0x29,
+	IMG_DATA_FORMAT_RESERVED_42                      = 0x2a,
+	IMG_DATA_FORMAT_RESERVED_43                      = 0x2b,
+	IMG_DATA_FORMAT_FMASK8_S2_F1                     = 0x2c,
+	IMG_DATA_FORMAT_FMASK8_S4_F1                     = 0x2d,
+	IMG_DATA_FORMAT_FMASK8_S8_F1                     = 0x2e,
+	IMG_DATA_FORMAT_FMASK8_S2_F2                     = 0x2f,
+	IMG_DATA_FORMAT_FMASK8_S4_F2                     = 0x30,
+	IMG_DATA_FORMAT_FMASK8_S4_F4                     = 0x31,
+	IMG_DATA_FORMAT_FMASK16_S16_F1                   = 0x32,
+	IMG_DATA_FORMAT_FMASK16_S8_F2                    = 0x33,
+	IMG_DATA_FORMAT_FMASK32_S16_F2                   = 0x34,
+	IMG_DATA_FORMAT_FMASK32_S8_F4                    = 0x35,
+	IMG_DATA_FORMAT_FMASK32_S8_F8                    = 0x36,
+	IMG_DATA_FORMAT_FMASK64_S16_F4                   = 0x37,
+	IMG_DATA_FORMAT_FMASK64_S16_F8                   = 0x38,
+	IMG_DATA_FORMAT_4_4                              = 0x39,
+	IMG_DATA_FORMAT_6_5_5                            = 0x3a,
+	IMG_DATA_FORMAT_1                                = 0x3b,
+	IMG_DATA_FORMAT_1_REVERSED                       = 0x3c,
+	IMG_DATA_FORMAT_32_AS_8                          = 0x3d,
+	IMG_DATA_FORMAT_32_AS_8_8                        = 0x3e,
+	IMG_DATA_FORMAT_32_AS_32_32_32_32                = 0x3f,
+} IMG_DATA_FORMAT;
+typedef enum BUF_NUM_FORMAT {
+	BUF_NUM_FORMAT_UNORM                             = 0x0,
+	BUF_NUM_FORMAT_SNORM                             = 0x1,
+	BUF_NUM_FORMAT_USCALED                           = 0x2,
+	BUF_NUM_FORMAT_SSCALED                           = 0x3,
+	BUF_NUM_FORMAT_UINT                              = 0x4,
+	BUF_NUM_FORMAT_SINT                              = 0x5,
+	BUF_NUM_FORMAT_SNORM_OGL                         = 0x6,
+	BUF_NUM_FORMAT_FLOAT                             = 0x7,
+} BUF_NUM_FORMAT;
+typedef enum IMG_NUM_FORMAT {
+	IMG_NUM_FORMAT_UNORM                             = 0x0,
+	IMG_NUM_FORMAT_SNORM                             = 0x1,
+	IMG_NUM_FORMAT_USCALED                           = 0x2,
+	IMG_NUM_FORMAT_SSCALED                           = 0x3,
+	IMG_NUM_FORMAT_UINT                              = 0x4,
+	IMG_NUM_FORMAT_SINT                              = 0x5,
+	IMG_NUM_FORMAT_SNORM_OGL                         = 0x6,
+	IMG_NUM_FORMAT_FLOAT                             = 0x7,
+	IMG_NUM_FORMAT_RESERVED_8                        = 0x8,
+	IMG_NUM_FORMAT_SRGB                              = 0x9,
+	IMG_NUM_FORMAT_UBNORM                            = 0xa,
+	IMG_NUM_FORMAT_UBNORM_OGL                        = 0xb,
+	IMG_NUM_FORMAT_UBINT                             = 0xc,
+	IMG_NUM_FORMAT_UBSCALED                          = 0xd,
+	IMG_NUM_FORMAT_RESERVED_14                       = 0xe,
+	IMG_NUM_FORMAT_RESERVED_15                       = 0xf,
+} IMG_NUM_FORMAT;
+typedef enum TileType {
+	ARRAY_COLOR_TILE                                 = 0x0,
+	ARRAY_DEPTH_TILE                                 = 0x1,
+} TileType;
+typedef enum NonDispTilingOrder {
+	ADDR_SURF_MICRO_TILING_DISPLAY                   = 0x0,
+	ADDR_SURF_MICRO_TILING_NON_DISPLAY               = 0x1,
+} NonDispTilingOrder;
+typedef enum MicroTileMode {
+	ADDR_SURF_DISPLAY_MICRO_TILING                   = 0x0,
+	ADDR_SURF_THIN_MICRO_TILING                      = 0x1,
+	ADDR_SURF_DEPTH_MICRO_TILING                     = 0x2,
+	ADDR_SURF_ROTATED_MICRO_TILING                   = 0x3,
+	ADDR_SURF_THICK_MICRO_TILING                     = 0x4,
+} MicroTileMode;
+typedef enum TileSplit {
+	ADDR_SURF_TILE_SPLIT_64B                         = 0x0,
+	ADDR_SURF_TILE_SPLIT_128B                        = 0x1,
+	ADDR_SURF_TILE_SPLIT_256B                        = 0x2,
+	ADDR_SURF_TILE_SPLIT_512B                        = 0x3,
+	ADDR_SURF_TILE_SPLIT_1KB                         = 0x4,
+	ADDR_SURF_TILE_SPLIT_2KB                         = 0x5,
+	ADDR_SURF_TILE_SPLIT_4KB                         = 0x6,
+} TileSplit;
+typedef enum SampleSplit {
+	ADDR_SURF_SAMPLE_SPLIT_1                         = 0x0,
+	ADDR_SURF_SAMPLE_SPLIT_2                         = 0x1,
+	ADDR_SURF_SAMPLE_SPLIT_4                         = 0x2,
+	ADDR_SURF_SAMPLE_SPLIT_8                         = 0x3,
+} SampleSplit;
+typedef enum PipeConfig {
+	ADDR_SURF_P2                                     = 0x0,
+	ADDR_SURF_P2_RESERVED0                           = 0x1,
+	ADDR_SURF_P2_RESERVED1                           = 0x2,
+	ADDR_SURF_P2_RESERVED2                           = 0x3,
+	ADDR_SURF_P4_8x16                                = 0x4,
+	ADDR_SURF_P4_16x16                               = 0x5,
+	ADDR_SURF_P4_16x32                               = 0x6,
+	ADDR_SURF_P4_32x32                               = 0x7,
+	ADDR_SURF_P8_16x16_8x16                          = 0x8,
+	ADDR_SURF_P8_16x32_8x16                          = 0x9,
+	ADDR_SURF_P8_32x32_8x16                          = 0xa,
+	ADDR_SURF_P8_16x32_16x16                         = 0xb,
+	ADDR_SURF_P8_32x32_16x16                         = 0xc,
+	ADDR_SURF_P8_32x32_16x32                         = 0xd,
+	ADDR_SURF_P8_32x64_32x32                         = 0xe,
+} PipeConfig;
+typedef enum NumBanks {
+	ADDR_SURF_2_BANK                                 = 0x0,
+	ADDR_SURF_4_BANK                                 = 0x1,
+	ADDR_SURF_8_BANK                                 = 0x2,
+	ADDR_SURF_16_BANK                                = 0x3,
+} NumBanks;
+typedef enum BankWidth {
+	ADDR_SURF_BANK_WIDTH_1                           = 0x0,
+	ADDR_SURF_BANK_WIDTH_2                           = 0x1,
+	ADDR_SURF_BANK_WIDTH_4                           = 0x2,
+	ADDR_SURF_BANK_WIDTH_8                           = 0x3,
+} BankWidth;
+typedef enum BankHeight {
+	ADDR_SURF_BANK_HEIGHT_1                          = 0x0,
+	ADDR_SURF_BANK_HEIGHT_2                          = 0x1,
+	ADDR_SURF_BANK_HEIGHT_4                          = 0x2,
+	ADDR_SURF_BANK_HEIGHT_8                          = 0x3,
+} BankHeight;
+typedef enum BankWidthHeight {
+	ADDR_SURF_BANK_WH_1                              = 0x0,
+	ADDR_SURF_BANK_WH_2                              = 0x1,
+	ADDR_SURF_BANK_WH_4                              = 0x2,
+	ADDR_SURF_BANK_WH_8                              = 0x3,
+} BankWidthHeight;
+typedef enum MacroTileAspect {
+	ADDR_SURF_MACRO_ASPECT_1                         = 0x0,
+	ADDR_SURF_MACRO_ASPECT_2                         = 0x1,
+	ADDR_SURF_MACRO_ASPECT_4                         = 0x2,
+	ADDR_SURF_MACRO_ASPECT_8                         = 0x3,
+} MacroTileAspect;
+typedef enum TCC_CACHE_POLICIES {
+	TCC_CACHE_POLICY_LRU                             = 0x0,
+	TCC_CACHE_POLICY_STREAM                          = 0x1,
+	TCC_CACHE_POLICY_BYPASS                          = 0x2,
+} TCC_CACHE_POLICIES;
+typedef enum PERFMON_COUNTER_MODE {
+	PERFMON_COUNTER_MODE_ACCUM                       = 0x0,
+	PERFMON_COUNTER_MODE_ACTIVE_CYCLES               = 0x1,
+	PERFMON_COUNTER_MODE_MAX                         = 0x2,
+	PERFMON_COUNTER_MODE_DIRTY                       = 0x3,
+	PERFMON_COUNTER_MODE_SAMPLE                      = 0x4,
+	PERFMON_COUNTER_MODE_CYCLES_SINCE_FIRST_EVENT    = 0x5,
+	PERFMON_COUNTER_MODE_CYCLES_SINCE_LAST_EVENT     = 0x6,
+	PERFMON_COUNTER_MODE_CYCLES_GE_HI                = 0x7,
+	PERFMON_COUNTER_MODE_CYCLES_EQ_HI                = 0x8,
+	PERFMON_COUNTER_MODE_INACTIVE_CYCLES             = 0x9,
+	PERFMON_COUNTER_MODE_RESERVED                    = 0xf,
+} PERFMON_COUNTER_MODE;
+typedef enum PERFMON_SPM_MODE {
+	PERFMON_SPM_MODE_OFF                             = 0x0,
+	PERFMON_SPM_MODE_16BIT_CLAMP                     = 0x1,
+	PERFMON_SPM_MODE_16BIT_NO_CLAMP                  = 0x2,
+	PERFMON_SPM_MODE_32BIT_CLAMP                     = 0x3,
+	PERFMON_SPM_MODE_32BIT_NO_CLAMP                  = 0x4,
+	PERFMON_SPM_MODE_RESERVED_5                      = 0x5,
+	PERFMON_SPM_MODE_RESERVED_6                      = 0x6,
+	PERFMON_SPM_MODE_RESERVED_7                      = 0x7,
+	PERFMON_SPM_MODE_TEST_MODE_0                     = 0x8,
+	PERFMON_SPM_MODE_TEST_MODE_1                     = 0x9,
+	PERFMON_SPM_MODE_TEST_MODE_2                     = 0xa,
+} PERFMON_SPM_MODE;
+typedef enum SurfaceTiling {
+	ARRAY_LINEAR                                     = 0x0,
+	ARRAY_TILED                                      = 0x1,
+} SurfaceTiling;
+typedef enum SurfaceArray {
+	ARRAY_1D                                         = 0x0,
+	ARRAY_2D                                         = 0x1,
+	ARRAY_3D                                         = 0x2,
+	ARRAY_3D_SLICE                                   = 0x3,
+} SurfaceArray;
+typedef enum ColorArray {
+	ARRAY_2D_ALT_COLOR                               = 0x0,
+	ARRAY_2D_COLOR                                   = 0x1,
+	ARRAY_3D_SLICE_COLOR                             = 0x3,
+} ColorArray;
+typedef enum DepthArray {
+	ARRAY_2D_ALT_DEPTH                               = 0x0,
+	ARRAY_2D_DEPTH                                   = 0x1,
+} DepthArray;
+
+#endif /* DCE_8_0_ENUM_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
index 8a2930734477..c331c9fe7b81 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
@@ -4130,6 +4130,18 @@
 #define PHY_AUX_CNTL__AUX_PAD_WAKE__SHIFT 0xe
 #define PHY_AUX_CNTL__AUX_PAD_RXSEL_MASK 0x10000
 #define PHY_AUX_CNTL__AUX_PAD_RXSEL__SHIFT 0x10
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_MASK_MASK 0x1
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_MASK__SHIFT 0x0
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_PD_DIS_MASK 0x2
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_PD_DIS__SHIFT 0x1
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_RECV_MASK 0x4
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_RECV__SHIFT 0x2
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_MASK_MASK 0x10
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_MASK__SHIFT 0x4
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_PD_DIS_MASK 0x20
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_PD_DIS__SHIFT 0x5
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_RECV_MASK 0x40
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_RECV__SHIFT 0x6
 #define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A_MASK 0x1
 #define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A__SHIFT 0x0
 #define DC_GPIO_I2CPAD_A__DC_GPIO_SDA_A_MASK 0x2
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h
index 9d4347dd6125..dfe78799100d 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h
@@ -6225,6 +6225,12 @@ typedef enum TCC_CACHE_POLICIES {
 	TCC_CACHE_POLICY_STREAM                          = 0x1,
 	TCC_CACHE_POLICY_BYPASS                          = 0x2,
 } TCC_CACHE_POLICIES;
+typedef enum MTYPE {
+	MTYPE_NC_NV                                      = 0x0,
+	MTYPE_NC                                         = 0x1,
+	MTYPE_CC                                         = 0x2,
+	MTYPE_UC                                         = 0x3,
+} MTYPE;
 typedef enum PERFMON_COUNTER_MODE {
 	PERFMON_COUNTER_MODE_ACCUM                       = 0x0,
 	PERFMON_COUNTER_MODE_ACTIVE_CYCLES               = 0x1,
diff --git a/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h b/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h
new file mode 100644
index 000000000000..d21c6b14662f
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h
@@ -0,0 +1,102 @@
+/*
+ * Volcanic Islands IV SRC Register documentation
+ *
+ * Copyright (C) 2015  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _IVSRCID_VISLANDS30_H_
+#define _IVSRCID_VISLANDS30_H_
+
+
+// IV Source IDs
+
+#define VISLANDS30_IV_SRCID_D1_V_UPDATE_INT		            7	    // 0x07	
+#define VISLANDS30_IV_EXTID_D1_V_UPDATE_INT                  0
+
+#define VISLANDS30_IV_SRCID_D1_GRPH_PFLIP		            8	    // 0x08	
+#define VISLANDS30_IV_EXTID_D1_GRPH_PFLIP                    0
+
+#define VISLANDS30_IV_SRCID_D2_V_UPDATE_INT		            9	    // 0x09	
+#define VISLANDS30_IV_EXTID_D2_V_UPDATE_INT                  0
+
+#define VISLANDS30_IV_SRCID_D2_GRPH_PFLIP  		            10	    // 0x0a	
+#define VISLANDS30_IV_EXTID_D2_GRPH_PFLIP                    0
+
+#define VISLANDS30_IV_SRCID_D3_V_UPDATE_INT		            11	    // 0x0b	
+#define VISLANDS30_IV_EXTID_D3_V_UPDATE_INT                  0
+
+#define VISLANDS30_IV_SRCID_D3_GRPH_PFLIP		            12	    // 0x0c	
+#define VISLANDS30_IV_EXTID_D3_GRPH_PFLIP                    0
+
+#define VISLANDS30_IV_SRCID_D4_V_UPDATE_INT		            13	    // 0x0d  	
+#define VISLANDS30_IV_EXTID_D4_V_UPDATE_INT                  0
+
+#define VISLANDS30_IV_SRCID_D4_GRPH_PFLIP		            14	    // 0x0e  	
+#define VISLANDS30_IV_EXTID_D4_GRPH_PFLIP                    0
+
+#define VISLANDS30_IV_SRCID_D5_V_UPDATE_INT		            15	    // 0x0f	
+#define VISLANDS30_IV_EXTID_D5_V_UPDATE_INT                  0
+
+#define VISLANDS30_IV_SRCID_D5_GRPH_PFLIP		            16	    // 0x10  	
+#define VISLANDS30_IV_EXTID_D5_GRPH_PFLIP                    0
+
+#define VISLANDS30_IV_SRCID_D6_V_UPDATE_INT		            17	    // 0x11      	
+#define VISLANDS30_IV_EXTID_D6_V_UPDATE_INT                  0
+
+#define VISLANDS30_IV_SRCID_D6_GRPH_PFLIP		            18	    // 0x12  	
+#define VISLANDS30_IV_EXTID_D6_GRPH_PFLIP                    0
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A		            42	    // 0x2a	
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_A                 0
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_B   		        42	    // 0x2a		
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_B                 1
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_C   		        42	    // 0x2a		
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_C                 2
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_D	    	        42	    // 0x2a		
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_D                 3
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_E		            42	    // 0x2a		
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_E                 4
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_F		            42	    // 0x2a		
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_F                 5
+
+#define VISLANDS30_IV_SRCID_HPD_RX_A		                    42	    // 0x2a		
+#define VISLANDS30_IV_EXTID_HPD_RX_A                         6
+
+#define VISLANDS30_IV_SRCID_HPD_RX_B		                    42	    // 0x2a		
+#define VISLANDS30_IV_EXTID_HPD_RX_B                         7
+
+#define VISLANDS30_IV_SRCID_HPD_RX_C		                    42	    // 0x2a		
+#define VISLANDS30_IV_EXTID_HPD_RX_C                         8
+
+#define VISLANDS30_IV_SRCID_HPD_RX_D		                    42	    // 0x2a		
+#define VISLANDS30_IV_EXTID_HPD_RX_D                         9
+
+#define VISLANDS30_IV_SRCID_HPD_RX_E		                    42	    // 0x2a		
+#define VISLANDS30_IV_EXTID_HPD_RX_E                         10
+
+#define VISLANDS30_IV_SRCID_HPD_RX_F		                    42	    // 0x2a		
+#define VISLANDS30_IV_EXTID_HPD_RX_F                         11
+
+#endif // _IVSRCID_VISLANDS30_H_
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 888250b33ea8..a09d9f352871 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -221,7 +221,7 @@ struct kgd2kfd_calls {
 	int (*resume)(struct kfd_dev *kfd);
 };
 
-bool kgd2kfd_init(unsigned interface_version,
+int kgd2kfd_init(unsigned interface_version,
 		const struct kgd2kfd_calls **g2f);
 
 #endif	/* KGD_KFD_INTERFACE_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/powerplay/Makefile b/drivers/gpu/drm/amd/powerplay/Makefile
index e195bf59da86..043e6ebab575 100644
--- a/drivers/gpu/drm/amd/powerplay/Makefile
+++ b/drivers/gpu/drm/amd/powerplay/Makefile
@@ -1,17 +1,17 @@
 
 subdir-ccflags-y += -Iinclude/drm  \
-		-Idrivers/gpu/drm/amd/powerplay/inc/  \
-		-Idrivers/gpu/drm/amd/include/asic_reg  \
-		-Idrivers/gpu/drm/amd/include  \
-		-Idrivers/gpu/drm/amd/powerplay/smumgr\
-		-Idrivers/gpu/drm/amd/powerplay/hwmgr \
-		-Idrivers/gpu/drm/amd/powerplay/eventmgr
+		-I$(FULL_AMD_PATH)/powerplay/inc/  \
+		-I$(FULL_AMD_PATH)/include/asic_reg  \
+		-I$(FULL_AMD_PATH)/include  \
+		-I$(FULL_AMD_PATH)/powerplay/smumgr\
+		-I$(FULL_AMD_PATH)/powerplay/hwmgr \
+		-I$(FULL_AMD_PATH)/powerplay/eventmgr
 
 AMD_PP_PATH = ../powerplay
 
 PP_LIBS = smumgr hwmgr eventmgr
 
-AMD_POWERPLAY = $(addsuffix /Makefile,$(addprefix drivers/gpu/drm/amd/powerplay/,$(PP_LIBS)))
+AMD_POWERPLAY = $(addsuffix /Makefile,$(addprefix $(FULL_AMD_PATH)/powerplay/,$(PP_LIBS)))
 
 include $(AMD_POWERPLAY)
 
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 589599f66fcc..9d2290044708 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -29,6 +29,7 @@
 #include "pp_instance.h"
 #include "power_state.h"
 #include "eventmanager.h"
+#include "pp_debug.h"
 
 #define PP_CHECK(handle)						\
 	do {								\
@@ -436,7 +437,10 @@ enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle)
 	case PP_StateUILabel_Performance:
 		return POWER_STATE_TYPE_PERFORMANCE;
 	default:
-		return POWER_STATE_TYPE_DEFAULT;
+		if (state->classification.flags & PP_StateClassificationFlag_Boot)
+			return  POWER_STATE_TYPE_INTERNAL_BOOT;
+		else
+			return POWER_STATE_TYPE_DEFAULT;
 	}
 }
 
@@ -538,6 +542,112 @@ static int pp_dpm_get_temperature(void *handle)
 	return hwmgr->hwmgr_func->get_temperature(hwmgr);
 }
 
+static int pp_dpm_get_pp_num_states(void *handle,
+		struct pp_states_info *data)
+{
+	struct pp_hwmgr *hwmgr;
+	int i;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	if (hwmgr == NULL || hwmgr->ps == NULL)
+		return -EINVAL;
+
+	data->nums = hwmgr->num_ps;
+
+	for (i = 0; i < hwmgr->num_ps; i++) {
+		struct pp_power_state *state = (struct pp_power_state *)
+				((unsigned long)hwmgr->ps + i * hwmgr->ps_size);
+		switch (state->classification.ui_label) {
+		case PP_StateUILabel_Battery:
+			data->states[i] = POWER_STATE_TYPE_BATTERY;
+			break;
+		case PP_StateUILabel_Balanced:
+			data->states[i] = POWER_STATE_TYPE_BALANCED;
+			break;
+		case PP_StateUILabel_Performance:
+			data->states[i] = POWER_STATE_TYPE_PERFORMANCE;
+			break;
+		default:
+			if (state->classification.flags & PP_StateClassificationFlag_Boot)
+				data->states[i] = POWER_STATE_TYPE_INTERNAL_BOOT;
+			else
+				data->states[i] = POWER_STATE_TYPE_DEFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static int pp_dpm_get_pp_table(void *handle, char **table)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
+		hwmgr->hwmgr_func->get_pp_table == NULL)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->get_pp_table(hwmgr, table);
+}
+
+static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
+		hwmgr->hwmgr_func->set_pp_table == NULL)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->set_pp_table(hwmgr, buf, size);
+}
+
+static int pp_dpm_force_clock_level(void *handle,
+		enum pp_clock_type type, int level)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
+			hwmgr->hwmgr_func->force_clock_level == NULL)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->force_clock_level(hwmgr, type, level);
+}
+
+static int pp_dpm_print_clock_levels(void *handle,
+		enum pp_clock_type type, char *buf)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
+			hwmgr->hwmgr_func->print_clock_levels == NULL)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->print_clock_levels(hwmgr, type, buf);
+}
+
 const struct amd_powerplay_funcs pp_dpm_funcs = {
 	.get_temperature = pp_dpm_get_temperature,
 	.load_firmware = pp_dpm_load_fw,
@@ -555,6 +665,11 @@ const struct amd_powerplay_funcs pp_dpm_funcs = {
 	.get_fan_control_mode = pp_dpm_get_fan_control_mode,
 	.set_fan_speed_percent = pp_dpm_set_fan_speed_percent,
 	.get_fan_speed_percent = pp_dpm_get_fan_speed_percent,
+	.get_pp_num_states = pp_dpm_get_pp_num_states,
+	.get_pp_table = pp_dpm_get_pp_table,
+	.set_pp_table = pp_dpm_set_pp_table,
+	.force_clock_level = pp_dpm_force_clock_level,
+	.print_clock_levels = pp_dpm_print_clock_levels,
 };
 
 static int amd_pp_instance_init(struct amd_pp_init *pp_init,
@@ -638,10 +753,10 @@ int amd_powerplay_fini(void *handle)
 
 /* export this function to DAL */
 
-int amd_powerplay_display_configuration_change(void *handle, const void *input)
+int amd_powerplay_display_configuration_change(void *handle,
+	const struct amd_pp_display_configuration *display_config)
 {
 	struct pp_hwmgr  *hwmgr;
-	const struct amd_pp_display_configuration *display_config = input;
 
 	PP_CHECK((struct pp_instance *)handle);
 
@@ -653,7 +768,7 @@ int amd_powerplay_display_configuration_change(void *handle, const void *input)
 }
 
 int amd_powerplay_get_display_power_level(void *handle,
-		struct amd_pp_dal_clock_info *output)
+		struct amd_pp_simple_clock_info *output)
 {
 	struct pp_hwmgr  *hwmgr;
 
@@ -666,3 +781,86 @@ int amd_powerplay_get_display_power_level(void *handle,
 
 	return phm_get_dal_power_level(hwmgr, output);
 }
+
+int amd_powerplay_get_current_clocks(void *handle,
+		struct amd_pp_clock_info *clocks)
+{
+	struct pp_hwmgr  *hwmgr;
+	struct amd_pp_simple_clock_info simple_clocks;
+	struct pp_clock_info hw_clocks;
+
+	PP_CHECK((struct pp_instance *)handle);
+
+	if (clocks == NULL)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	phm_get_dal_power_level(hwmgr, &simple_clocks);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PowerContainment)) {
+		if (0 != phm_get_clock_info(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks, PHM_PerformanceLevelDesignation_PowerContainment))
+			PP_ASSERT_WITH_CODE(0, "Error in PHM_GetPowerContainmentClockInfo", return -1);
+	} else {
+		if (0 != phm_get_clock_info(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks, PHM_PerformanceLevelDesignation_Activity))
+			PP_ASSERT_WITH_CODE(0, "Error in PHM_GetClockInfo", return -1);
+	}
+
+	clocks->min_engine_clock = hw_clocks.min_eng_clk;
+	clocks->max_engine_clock = hw_clocks.max_eng_clk;
+	clocks->min_memory_clock = hw_clocks.min_mem_clk;
+	clocks->max_memory_clock = hw_clocks.max_mem_clk;
+	clocks->min_bus_bandwidth = hw_clocks.min_bus_bandwidth;
+	clocks->max_bus_bandwidth = hw_clocks.max_bus_bandwidth;
+
+	clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk;
+	clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk;
+
+	clocks->max_clocks_state = simple_clocks.level;
+
+	if (0 == phm_get_current_shallow_sleep_clocks(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks)) {
+		clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk;
+		clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk;
+	}
+
+	return 0;
+
+}
+
+int amd_powerplay_get_clock_by_type(void *handle, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks)
+{
+	int result = -1;
+
+	struct pp_hwmgr *hwmgr;
+
+	PP_CHECK((struct pp_instance *)handle);
+
+	if (clocks == NULL)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	result = phm_get_clock_by_type(hwmgr, type, clocks);
+
+	return result;
+}
+
+int amd_powerplay_get_display_mode_validation_clocks(void *handle,
+		struct amd_pp_simple_clock_info *clocks)
+{
+	int result = -1;
+	struct pp_hwmgr  *hwmgr;
+
+	PP_CHECK((struct pp_instance *)handle);
+
+	if (clocks == NULL)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DynamicPatchPowerState))
+		result = phm_get_max_high_clocks(hwmgr, clocks);
+
+	return result;
+}
+
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
index cf01177ca3b5..5682490337e3 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
@@ -241,6 +241,11 @@ static int cz_initialize_dpm_defaults(struct pp_hwmgr *hwmgr)
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_DynamicUVDState);
 
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_UVDDPM);
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_VCEDPM);
+
 	cz_hwmgr->cc6_settings.cpu_cc6_disable = false;
 	cz_hwmgr->cc6_settings.cpu_pstate_disable = false;
 	cz_hwmgr->cc6_settings.nb_pstate_switch_disable = false;
@@ -733,7 +738,6 @@ static int cz_tf_update_sclk_limit(struct pp_hwmgr *hwmgr,
 	unsigned long clock = 0;
 	unsigned long level;
 	unsigned long stable_pstate_sclk;
-	struct PP_Clocks clocks;
 	unsigned long percentage;
 
 	cz_hwmgr->sclk_dpm.soft_min_clk = table->entries[0].clk;
@@ -744,8 +748,10 @@ static int cz_tf_update_sclk_limit(struct pp_hwmgr *hwmgr,
 	else
 		cz_hwmgr->sclk_dpm.soft_max_clk  = table->entries[table->count - 1].clk;
 
-	/*PECI_GetMinClockSettings(pHwMgr->pPECI, &clocks);*/
-	clock = clocks.engineClock;
+	clock = hwmgr->display_config.min_core_set_clock;
+;
+	if (clock == 0)
+		printk(KERN_INFO "[ powerplay ] min_core_set_clock not set\n");
 
 	if (cz_hwmgr->sclk_dpm.hard_min_clk != clock) {
 		cz_hwmgr->sclk_dpm.hard_min_clk = clock;
@@ -901,9 +907,9 @@ static int cz_tf_update_low_mem_pstate(struct pp_hwmgr *hwmgr,
 
 		if (pnew_state->action == FORCE_HIGH)
 			cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch);
-		else if(pnew_state->action == CANCEL_FORCE_HIGH)
-			cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch);
-		else 
+		else if (pnew_state->action == CANCEL_FORCE_HIGH)
+			cz_nbdpm_pstate_enable_disable(hwmgr, true, disable_switch);
+		else
 			cz_nbdpm_pstate_enable_disable(hwmgr, enable_low_mem_state, disable_switch);
 	}
 	return 0;
@@ -1128,9 +1134,10 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				cast_const_PhwCzPowerState(&pcurrent_ps->hardware);
 
 	struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend);
-	struct PP_Clocks clocks;
+	struct PP_Clocks clocks = {0, 0, 0, 0};
 	bool force_high;
-	unsigned long  num_of_active_displays = 4;
+	uint32_t  num_of_active_displays = 0;
+	struct cgs_display_info info = {0};
 
 	cz_ps->evclk = hwmgr->vce_arbiter.evclk;
 	cz_ps->ecclk = hwmgr->vce_arbiter.ecclk;
@@ -1142,12 +1149,15 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 
 	cz_hwmgr->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label);
 
-	/* to do PECI_GetMinClockSettings(pHwMgr->pPECI, &clocks); */
-	/* PECI_GetNumberOfActiveDisplays(pHwMgr->pPECI, &numOfActiveDisplays); */
+	clocks.memoryClock = hwmgr->display_config.min_mem_set_clock != 0 ?
+				hwmgr->display_config.min_mem_set_clock :
+				cz_hwmgr->sys_info.nbp_memory_clock[1];
+
+	cgs_get_active_displays_info(hwmgr->device, &info);
+	num_of_active_displays = info.display_count;
+
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
 		clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk;
-	else
-		clocks.memoryClock = 0;
 
 	if (clocks.memoryClock < hwmgr->gfx_arbiter.mclk)
 		clocks.memoryClock = hwmgr->gfx_arbiter.mclk;
@@ -1217,6 +1227,7 @@ static int cz_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		printk(KERN_ERR "[ powerplay ] Fail to construct set_power_state\n");
 		return result;
 	}
+	hwmgr->platform_descriptor.hardwareActivityPerformanceLevels =  CZ_MAX_HARDWARE_POWERLEVELS;
 
 	result = phm_construct_table(hwmgr, &cz_phm_enable_clock_power_gatings_master, &(hwmgr->enable_clock_power_gatings));
 	if (result != 0) {
@@ -1648,10 +1659,10 @@ static void cz_hw_print_display_cfg(
 			& PWRMGT_SEPARATION_TIME_MASK)
 			<< PWRMGT_SEPARATION_TIME_SHIFT;
 
-		data|= (hw_data->cc6_settings.cpu_cc6_disable ? 0x1 : 0x0)
+		data |= (hw_data->cc6_settings.cpu_cc6_disable ? 0x1 : 0x0)
 			<< PWRMGT_DISABLE_CPU_CSTATES_SHIFT;
 
-		data|= (hw_data->cc6_settings.cpu_pstate_disable ? 0x1 : 0x0)
+		data |= (hw_data->cc6_settings.cpu_pstate_disable ? 0x1 : 0x0)
 			<< PWRMGT_DISABLE_CPU_PSTATES_SHIFT;
 
 		PP_DBG_LOG("SetDisplaySizePowerParams data: 0x%X\n",
@@ -1666,9 +1677,9 @@ static void cz_hw_print_display_cfg(
 }
 
 
- static int cz_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time,
+static int cz_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time,
 			bool cc6_disable, bool pstate_disable, bool pstate_switch_disable)
- {
+{
 	struct cz_hwmgr *hw_data = (struct cz_hwmgr *)(hwmgr->backend);
 
 	if (separation_time !=
@@ -1696,20 +1707,19 @@ static void cz_hw_print_display_cfg(
 	return 0;
 }
 
- static int cz_get_dal_power_level(struct pp_hwmgr *hwmgr,
-		struct amd_pp_dal_clock_info*info)
+static int cz_get_dal_power_level(struct pp_hwmgr *hwmgr,
+		struct amd_pp_simple_clock_info *info)
 {
 	uint32_t i;
-	const struct phm_clock_voltage_dependency_table * table =
+	const struct phm_clock_voltage_dependency_table *table =
 			hwmgr->dyn_state.vddc_dep_on_dal_pwrl;
-	const struct phm_clock_and_voltage_limits* limits =
+	const struct phm_clock_and_voltage_limits *limits =
 			&hwmgr->dyn_state.max_clock_voltage_on_ac;
 
 	info->engine_max_clock = limits->sclk;
 	info->memory_max_clock = limits->mclk;
 
 	for (i = table->count - 1; i > 0; i--) {
-
 		if (limits->vddc >= table->entries[i].v) {
 			info->level = table->entries[i].clk;
 			return 0;
@@ -1718,6 +1728,158 @@ static void cz_hw_print_display_cfg(
 	return -EINVAL;
 }
 
+static int cz_force_clock_level(struct pp_hwmgr *hwmgr,
+		enum pp_clock_type type, int level)
+{
+	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
+		return -EINVAL;
+
+	switch (type) {
+	case PP_SCLK:
+		smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+				PPSMC_MSG_SetSclkSoftMin,
+				(1 << level));
+		smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+				PPSMC_MSG_SetSclkSoftMax,
+				(1 << level));
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int cz_print_clock_levels(struct pp_hwmgr *hwmgr,
+		enum pp_clock_type type, char *buf)
+{
+	struct phm_clock_voltage_dependency_table *sclk_table =
+			hwmgr->dyn_state.vddc_dependency_on_sclk;
+	int i, now, size = 0;
+
+	switch (type) {
+	case PP_SCLK:
+		now = PHM_GET_FIELD(cgs_read_ind_register(hwmgr->device,
+				CGS_IND_REG__SMC,
+				ixTARGET_AND_CURRENT_PROFILE_INDEX),
+				TARGET_AND_CURRENT_PROFILE_INDEX,
+				CURR_SCLK_INDEX);
+
+		for (i = 0; i < sclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, sclk_table->entries[i].clk / 100,
+					(i == now) ? "*" : "");
+		break;
+	default:
+		break;
+	}
+	return size;
+}
+
+static int cz_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+				PHM_PerformanceLevelDesignation designation, uint32_t index,
+				PHM_PerformanceLevel *level)
+{
+	const struct cz_power_state *ps;
+	struct cz_hwmgr *data;
+	uint32_t level_index;
+	uint32_t i;
+
+	if (level == NULL || hwmgr == NULL || state == NULL)
+		return -EINVAL;
+
+	data = (struct cz_hwmgr *)(hwmgr->backend);
+	ps = cast_const_PhwCzPowerState(state);
+
+	level_index = index > ps->level - 1 ? ps->level - 1 : index;
+
+	level->coreClock  = ps->levels[level_index].engineClock;
+
+	if (designation == PHM_PerformanceLevelDesignation_PowerContainment) {
+		for (i = 1; i < ps->level; i++) {
+			if (ps->levels[i].engineClock > data->dce_slow_sclk_threshold) {
+				level->coreClock = ps->levels[i].engineClock;
+				break;
+			}
+		}
+	}
+
+	if (level_index == 0)
+		level->memory_clock = data->sys_info.nbp_memory_clock[CZ_NUM_NBPMEMORYCLOCK - 1];
+	else
+		level->memory_clock = data->sys_info.nbp_memory_clock[0];
+
+	level->vddc = (cz_convert_8Bit_index_to_voltage(hwmgr, ps->levels[level_index].vddcIndex) + 2) / 4;
+	level->nonLocalMemoryFreq = 0;
+	level->nonLocalMemoryWidth = 0;
+
+	return 0;
+}
+
+static int cz_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr,
+	const struct pp_hw_power_state *state, struct pp_clock_info *clock_info)
+{
+	const struct cz_power_state *ps = cast_const_PhwCzPowerState(state);
+
+	clock_info->min_eng_clk = ps->levels[0].engineClock / (1 << (ps->levels[0].ssDividerIndex));
+	clock_info->max_eng_clk = ps->levels[ps->level - 1].engineClock / (1 << (ps->levels[ps->level - 1].ssDividerIndex));
+
+	return 0;
+}
+
+static int cz_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type,
+						struct amd_pp_clocks *clocks)
+{
+	struct cz_hwmgr *data = (struct cz_hwmgr *)(hwmgr->backend);
+	int i;
+	struct phm_clock_voltage_dependency_table *table;
+
+	clocks->count = cz_get_max_sclk_level(hwmgr);
+	switch (type) {
+	case amd_pp_disp_clock:
+		for (i = 0; i < clocks->count; i++)
+			clocks->clock[i] = data->sys_info.display_clock[i];
+		break;
+	case amd_pp_sys_clock:
+		table = hwmgr->dyn_state.vddc_dependency_on_sclk;
+		for (i = 0; i < clocks->count; i++)
+			clocks->clock[i] = table->entries[i].clk;
+		break;
+	case amd_pp_mem_clock:
+		clocks->count = CZ_NUM_NBPMEMORYCLOCK;
+		for (i = 0; i < clocks->count; i++)
+			clocks->clock[i] = data->sys_info.nbp_memory_clock[clocks->count - 1 - i];
+		break;
+	default:
+		return -1;
+	}
+
+	return 0;
+}
+
+static int cz_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks)
+{
+	struct phm_clock_voltage_dependency_table *table =
+					hwmgr->dyn_state.vddc_dependency_on_sclk;
+	unsigned long level;
+	const struct phm_clock_and_voltage_limits *limits =
+			&hwmgr->dyn_state.max_clock_voltage_on_ac;
+
+	if ((NULL == table) || (table->count <= 0) || (clocks == NULL))
+		return -EINVAL;
+
+	level = cz_get_max_sclk_level(hwmgr) - 1;
+
+	if (level < table->count)
+		clocks->engine_max_clock = table->entries[level].clk;
+	else
+		clocks->engine_max_clock = table->entries[table->count - 1].clk;
+
+	clocks->memory_max_clock = limits->mclk;
+
+	return 0;
+}
+
 static const struct pp_hwmgr_func cz_hwmgr_funcs = {
 	.backend_init = cz_hwmgr_backend_init,
 	.backend_fini = cz_hwmgr_backend_fini,
@@ -1736,7 +1898,13 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = {
 	.print_current_perforce_level = cz_print_current_perforce_level,
 	.set_cpu_power_state = cz_set_cpu_power_state,
 	.store_cc6_data = cz_store_cc6_data,
-	.get_dal_power_level= cz_get_dal_power_level,
+	.force_clock_level = cz_force_clock_level,
+	.print_clock_levels = cz_print_clock_levels,
+	.get_dal_power_level = cz_get_dal_power_level,
+	.get_performance_level = cz_get_performance_level,
+	.get_current_shallow_sleep_clocks = cz_get_current_shallow_sleep_clocks,
+	.get_clock_by_type = cz_get_clock_by_type,
+	.get_max_high_clocks = cz_get_max_high_clocks,
 };
 
 int cz_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 28031a7eddba..51dedf84623c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -4275,7 +4275,6 @@ static int fiji_populate_and_upload_sclk_mclk_dpm_levels(
 	if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK) {
 		dpm_table->mclk_table.dpm_levels
 			[dpm_table->mclk_table.count - 1].value = mclk;
-
 		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_OD6PlusinACSupport) ||
 			phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -4886,6 +4885,10 @@ static void fiji_print_current_perforce_level(
 	activity_percent >>= 8;
 
 	seq_printf(m, "\n [GPU load]: %u%%\n\n", activity_percent > 100 ? 100 : activity_percent);
+
+	seq_printf(m, "uvd    %sabled\n", data->uvd_power_gated ? "dis" : "en");
+
+	seq_printf(m, "vce    %sabled\n", data->vce_power_gated ? "dis" : "en");
 }
 
 static int fiji_program_display_gap(struct pp_hwmgr *hwmgr)
@@ -5073,6 +5076,125 @@ static int fiji_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 }
 
+static int fiji_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	*table = (char *)&data->smc_state_table;
+
+	return sizeof(struct SMU73_Discrete_DpmTable);
+}
+
+static int fiji_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	void *table = (void *)&data->smc_state_table;
+
+	memcpy(table, buf, size);
+
+	return 0;
+}
+
+static int fiji_force_clock_level(struct pp_hwmgr *hwmgr,
+		enum pp_clock_type type, int level)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
+		return -EINVAL;
+
+	switch (type) {
+	case PP_SCLK:
+		if (!data->sclk_dpm_key_disabled)
+			smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+					PPSMC_MSG_SCLKDPM_SetEnabledMask,
+					(1 << level));
+		break;
+	case PP_MCLK:
+		if (!data->mclk_dpm_key_disabled)
+			smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+					PPSMC_MSG_MCLKDPM_SetEnabledMask,
+					(1 << level));
+		break;
+	case PP_PCIE:
+		if (!data->pcie_dpm_key_disabled)
+			smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+					PPSMC_MSG_PCIeDPM_ForceLevel,
+					(1 << level));
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int fiji_print_clock_levels(struct pp_hwmgr *hwmgr,
+		enum pp_clock_type type, char *buf)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct fiji_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+	struct fiji_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table);
+	int i, now, size = 0;
+	uint32_t clock, pcie_speed;
+
+	switch (type) {
+	case PP_SCLK:
+		smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetSclkFrequency);
+		clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < sclk_table->count; i++) {
+			if (clock > sclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < sclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, sclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_MCLK:
+		smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetMclkFrequency);
+		clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < mclk_table->count; i++) {
+			if (clock > mclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < mclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, mclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_PCIE:
+		pcie_speed = fiji_get_current_pcie_speed(hwmgr);
+		for (i = 0; i < pcie_table->count; i++) {
+			if (pcie_speed != pcie_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < pcie_table->count; i++)
+			size += sprintf(buf + size, "%d: %s %s\n", i,
+					(pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x1" :
+					(pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" :
+					(pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "",
+					(i == now) ? "*" : "");
+		break;
+	default:
+		break;
+	}
+	return size;
+}
+
 static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
 	.backend_init = &fiji_hwmgr_backend_init,
 	.backend_fini = &tonga_hwmgr_backend_fini,
@@ -5108,6 +5230,10 @@ static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
 	.register_internal_thermal_interrupt = fiji_register_internal_thermal_interrupt,
 	.set_fan_control_mode = fiji_set_fan_control_mode,
 	.get_fan_control_mode = fiji_get_fan_control_mode,
+	.get_pp_table = fiji_get_pp_table,
+	.set_pp_table = fiji_set_pp_table,
+	.force_clock_level = fiji_force_clock_level,
+	.print_clock_levels = fiji_print_clock_levels,
 };
 
 int fiji_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
index 22e273b1c1c5..a16f7cd4c238 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
@@ -29,6 +29,7 @@
 #include "smu73_discrete.h"
 #include "ppatomctrl.h"
 #include "fiji_ppsmc.h"
+#include "pp_endian.h"
 
 #define FIJI_MAX_HARDWARE_POWERLEVELS	2
 #define FIJI_AT_DFLT	30
@@ -347,15 +348,4 @@ int fiji_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate);
 int fiji_update_acp_dpm(struct pp_hwmgr *hwmgr, bool bgate);
 int fiji_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable);
 
-#define PP_HOST_TO_SMC_UL(X) cpu_to_be32(X)
-#define PP_SMC_TO_HOST_UL(X) be32_to_cpu(X)
-
-#define PP_HOST_TO_SMC_US(X) cpu_to_be16(X)
-#define PP_SMC_TO_HOST_US(X) be16_to_cpu(X)
-
-#define CONVERT_FROM_HOST_TO_SMC_UL(X) ((X) = PP_HOST_TO_SMC_UL(X))
-#define CONVERT_FROM_SMC_TO_HOST_UL(X) ((X) = PP_SMC_TO_HOST_UL(X))
-
-#define CONVERT_FROM_HOST_TO_SMC_US(X) ((X) = PP_HOST_TO_SMC_US(X))
-
 #endif /* _FIJI_HWMGR_H_ */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
index 9deadabbc81c..72cfecc4f9f7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
@@ -34,6 +34,11 @@ static int phm_run_table(struct pp_hwmgr *hwmgr,
 	int result = 0;
 	phm_table_function *function;
 
+	if (rt_table->function_list == NULL) {
+		printk(KERN_INFO "[ powerplay ] this function not implement!\n");
+		return 0;
+	}
+
 	for (function = rt_table->function_list; NULL != *function; function++) {
 		int tmp = (*function)(hwmgr, input, output, temp_storage, result);
 
@@ -57,9 +62,9 @@ int phm_dispatch_table(struct pp_hwmgr *hwmgr,
 	int result = 0;
 	void *temp_storage = NULL;
 
-	if (hwmgr == NULL || rt_table == NULL || rt_table->function_list == NULL) {
+	if (hwmgr == NULL || rt_table == NULL) {
 		printk(KERN_ERR "[ powerplay ] Invalid Parameter!\n");
-		return 0; /*temp return ture because some function not implement on some asic */
+		return -EINVAL;
 	}
 
 	if (0 != rt_table->storage_size) {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 0f2d5e4bc241..be31bed2538a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -26,7 +26,7 @@
 #include "power_state.h"
 #include "pp_acpi.h"
 #include "amd_acpi.h"
-#include "amd_powerplay.h"
+#include "pp_debug.h"
 
 #define PHM_FUNC_CHECK(hw) \
 	do {							\
@@ -313,13 +313,12 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
 }
 
 int phm_get_dal_power_level(struct pp_hwmgr *hwmgr,
-		struct amd_pp_dal_clock_info *info)
+		struct amd_pp_simple_clock_info *info)
 {
 	PHM_FUNC_CHECK(hwmgr);
 
 	if (info == NULL || hwmgr->hwmgr_func->get_dal_power_level == NULL)
 		return -EINVAL;
-
 	return hwmgr->hwmgr_func->get_dal_power_level(hwmgr, info);
 }
 
@@ -332,3 +331,91 @@ int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr)
 
 	return 0;
 }
+
+
+int phm_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+				PHM_PerformanceLevelDesignation designation, uint32_t index,
+				PHM_PerformanceLevel *level)
+{
+	PHM_FUNC_CHECK(hwmgr);
+	if (hwmgr->hwmgr_func->get_performance_level == NULL)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->get_performance_level(hwmgr, state, designation, index, level);
+
+
+}
+
+
+/**
+* Gets Clock Info.
+*
+* @param    pHwMgr  the address of the powerplay hardware manager.
+* @param    pPowerState the address of the Power State structure.
+* @param    pClockInfo the address of PP_ClockInfo structure where the result will be returned.
+* @exception PP_Result_Failed if any of the paramters is NULL, otherwise the return value from the back-end.
+*/
+int phm_get_clock_info(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *pclock_info,
+			PHM_PerformanceLevelDesignation designation)
+{
+	int result;
+	PHM_PerformanceLevel performance_level;
+
+	PHM_FUNC_CHECK(hwmgr);
+
+	PP_ASSERT_WITH_CODE((NULL != state), "Invalid Input!", return -EINVAL);
+	PP_ASSERT_WITH_CODE((NULL != pclock_info), "Invalid Input!", return -EINVAL);
+
+	result = phm_get_performance_level(hwmgr, state, PHM_PerformanceLevelDesignation_Activity, 0, &performance_level);
+
+	PP_ASSERT_WITH_CODE((0 == result), "Failed to retrieve minimum clocks.", return result);
+
+
+	pclock_info->min_mem_clk = performance_level.memory_clock;
+	pclock_info->min_eng_clk = performance_level.coreClock;
+	pclock_info->min_bus_bandwidth = performance_level.nonLocalMemoryFreq * performance_level.nonLocalMemoryWidth;
+
+
+	result = phm_get_performance_level(hwmgr, state, designation,
+					(hwmgr->platform_descriptor.hardwareActivityPerformanceLevels - 1), &performance_level);
+
+	PP_ASSERT_WITH_CODE((0 == result), "Failed to retrieve maximum clocks.", return result);
+
+	pclock_info->max_mem_clk = performance_level.memory_clock;
+	pclock_info->max_eng_clk = performance_level.coreClock;
+	pclock_info->max_bus_bandwidth = performance_level.nonLocalMemoryFreq * performance_level.nonLocalMemoryWidth;
+
+	return 0;
+}
+
+int phm_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *clock_info)
+{
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (hwmgr->hwmgr_func->get_current_shallow_sleep_clocks == NULL)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->get_current_shallow_sleep_clocks(hwmgr, state, clock_info);
+
+}
+
+int phm_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks)
+{
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (hwmgr->hwmgr_func->get_clock_by_type == NULL)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->get_clock_by_type(hwmgr, type, clocks);
+
+}
+
+int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks)
+{
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (hwmgr->hwmgr_func->get_max_high_clocks == NULL)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->get_max_high_clocks(hwmgr, clocks);
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
index b7429a527828..b10df328d58c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
@@ -293,7 +293,7 @@ fInt GetScaledFraction(int X, int factor)
 	}
 
 	if (factor == 1)
-	return (ConvertToFraction(X));
+		return ConvertToFraction(X);
 
 	fValue = fDivide(ConvertToFraction(X * uPow(-1, bNEGATED)), ConvertToFraction(factor));
 
@@ -371,7 +371,7 @@ fInt fDivide (fInt X, fInt Y)
 	fZERO = ConvertToFraction(0);
 
 	if (Equal(Y, fZERO))
-	return fZERO;
+		return fZERO;
 
 	longlongX = (int64_t)X.full;
 	longlongY = (int64_t)Y.full;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index 980d3bf8ea76..0d5d8372953e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -5185,7 +5185,6 @@ tonga_print_current_perforce_level(struct pp_hwmgr *hwmgr, struct seq_file *m)
 	mclk = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
 	seq_printf(m, "\n [  mclk  ]: %u MHz\n\n [  sclk  ]: %u MHz\n", mclk/100, sclk/100);
 
-
 	offset = data->soft_regs_start + offsetof(SMU72_SoftRegisters, AverageGraphicsActivity);
 	activity_percent = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset);
 	activity_percent += 0x80;
@@ -5193,6 +5192,9 @@ tonga_print_current_perforce_level(struct pp_hwmgr *hwmgr, struct seq_file *m)
 
 	seq_printf(m, "\n [GPU load]: %u%%\n\n", activity_percent > 100 ? 100 : activity_percent);
 
+	seq_printf(m, "uvd    %sabled\n", data->uvd_power_gated ? "dis" : "en");
+
+	seq_printf(m, "vce    %sabled\n", data->vce_power_gated ? "dis" : "en");
 }
 
 static int tonga_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input)
@@ -6033,6 +6035,125 @@ static int tonga_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 }
 
+static int tonga_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+
+	*table = (char *)&data->smc_state_table;
+
+	return sizeof(struct SMU72_Discrete_DpmTable);
+}
+
+static int tonga_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+
+	void *table = (void *)&data->smc_state_table;
+
+	memcpy(table, buf, size);
+
+	return 0;
+}
+
+static int tonga_force_clock_level(struct pp_hwmgr *hwmgr,
+		enum pp_clock_type type, int level)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+
+	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
+		return -EINVAL;
+
+	switch (type) {
+	case PP_SCLK:
+		if (!data->sclk_dpm_key_disabled)
+			smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+					PPSMC_MSG_SCLKDPM_SetEnabledMask,
+					(1 << level));
+		break;
+	case PP_MCLK:
+		if (!data->mclk_dpm_key_disabled)
+			smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+					PPSMC_MSG_MCLKDPM_SetEnabledMask,
+					(1 << level));
+		break;
+	case PP_PCIE:
+		if (!data->pcie_dpm_key_disabled)
+			smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+					PPSMC_MSG_PCIeDPM_ForceLevel,
+					(1 << level));
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int tonga_print_clock_levels(struct pp_hwmgr *hwmgr,
+		enum pp_clock_type type, char *buf)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct tonga_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+	struct tonga_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table);
+	int i, now, size = 0;
+	uint32_t clock, pcie_speed;
+
+	switch (type) {
+	case PP_SCLK:
+		smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetSclkFrequency);
+		clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < sclk_table->count; i++) {
+			if (clock > sclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < sclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, sclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_MCLK:
+		smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetMclkFrequency);
+		clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < mclk_table->count; i++) {
+			if (clock > mclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < mclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, mclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_PCIE:
+		pcie_speed = tonga_get_current_pcie_speed(hwmgr);
+		for (i = 0; i < pcie_table->count; i++) {
+			if (pcie_speed != pcie_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < pcie_table->count; i++)
+			size += sprintf(buf + size, "%d: %s %s\n", i,
+					(pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x8" :
+					(pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" :
+					(pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "",
+					(i == now) ? "*" : "");
+		break;
+	default:
+		break;
+	}
+	return size;
+}
+
 static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
 	.backend_init = &tonga_hwmgr_backend_init,
 	.backend_fini = &tonga_hwmgr_backend_fini,
@@ -6070,6 +6191,10 @@ static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
 	.check_states_equal = tonga_check_states_equal,
 	.set_fan_control_mode = tonga_set_fan_control_mode,
 	.get_fan_control_mode = tonga_get_fan_control_mode,
+	.get_pp_table = tonga_get_pp_table,
+	.set_pp_table = tonga_set_pp_table,
+	.force_clock_level = tonga_force_clock_level,
+	.print_clock_levels = tonga_print_clock_levels,
 };
 
 int tonga_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
index 49168d262ccc..f88d3bbe6671 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
@@ -28,6 +28,7 @@
 #include "ppatomctrl.h"
 #include "ppinterrupt.h"
 #include "tonga_powertune.h"
+#include "pp_endian.h"
 
 #define TONGA_MAX_HARDWARE_POWERLEVELS 2
 #define TONGA_DYNCLK_NUMBER_OF_TREND_COEFFICIENTS 15
@@ -386,17 +387,6 @@ typedef struct tonga_hwmgr tonga_hwmgr;
 
 #define TONGA_UNUSED_GPIO_PIN                        0x7F
 
-#define PP_HOST_TO_SMC_UL(X) cpu_to_be32(X)
-#define PP_SMC_TO_HOST_UL(X) be32_to_cpu(X)
-
-#define PP_HOST_TO_SMC_US(X) cpu_to_be16(X)
-#define PP_SMC_TO_HOST_US(X) be16_to_cpu(X)
-
-#define CONVERT_FROM_HOST_TO_SMC_UL(X) ((X) = PP_HOST_TO_SMC_UL(X))
-#define CONVERT_FROM_SMC_TO_HOST_UL(X) ((X) = PP_SMC_TO_HOST_UL(X))
-
-#define CONVERT_FROM_HOST_TO_SMC_US(X) ((X) = PP_HOST_TO_SMC_US(X))
-
 int tonga_hwmgr_init(struct pp_hwmgr *hwmgr);
 int tonga_update_vce_dpm(struct pp_hwmgr *hwmgr, const void *input);
 int tonga_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
index 34f4bef3691f..b156481b50e8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
@@ -512,8 +512,10 @@ static int get_cac_tdp_table(
 
 	hwmgr->dyn_state.cac_dtp_table = kzalloc(table_size, GFP_KERNEL);
 
-	if (NULL == hwmgr->dyn_state.cac_dtp_table)
+	if (NULL == hwmgr->dyn_state.cac_dtp_table) {
+		kfree(tdp_table);
 		return -ENOMEM;
+	}
 
 	memset(hwmgr->dyn_state.cac_dtp_table, 0x00, table_size);
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
index e61a3e67852e..7255f7ddf93a 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
@@ -29,6 +29,7 @@
 #include "amd_shared.h"
 #include "cgs_common.h"
 
+
 enum amd_pp_event {
 	AMD_PP_EVENT_INITIALIZE = 0,
 	AMD_PP_EVENT_UNINITIALIZE,
@@ -123,6 +124,7 @@ enum amd_dpm_forced_level {
 	AMD_DPM_FORCED_LEVEL_AUTO = 0,
 	AMD_DPM_FORCED_LEVEL_LOW = 1,
 	AMD_DPM_FORCED_LEVEL_HIGH = 2,
+	AMD_DPM_FORCED_LEVEL_MANUAL = 3,
 };
 
 struct amd_pp_init {
@@ -212,12 +214,55 @@ struct amd_pp_display_configuration {
 	uint32_t dce_tolerable_mclk_in_active_latency;
 };
 
-struct amd_pp_dal_clock_info {
+struct amd_pp_simple_clock_info {
 	uint32_t	engine_max_clock;
 	uint32_t	memory_max_clock;
 	uint32_t	level;
 };
 
+enum PP_DAL_POWERLEVEL {
+	PP_DAL_POWERLEVEL_INVALID = 0,
+	PP_DAL_POWERLEVEL_ULTRALOW,
+	PP_DAL_POWERLEVEL_LOW,
+	PP_DAL_POWERLEVEL_NOMINAL,
+	PP_DAL_POWERLEVEL_PERFORMANCE,
+
+	PP_DAL_POWERLEVEL_0 = PP_DAL_POWERLEVEL_ULTRALOW,
+	PP_DAL_POWERLEVEL_1 = PP_DAL_POWERLEVEL_LOW,
+	PP_DAL_POWERLEVEL_2 = PP_DAL_POWERLEVEL_NOMINAL,
+	PP_DAL_POWERLEVEL_3 = PP_DAL_POWERLEVEL_PERFORMANCE,
+	PP_DAL_POWERLEVEL_4 = PP_DAL_POWERLEVEL_3+1,
+	PP_DAL_POWERLEVEL_5 = PP_DAL_POWERLEVEL_4+1,
+	PP_DAL_POWERLEVEL_6 = PP_DAL_POWERLEVEL_5+1,
+	PP_DAL_POWERLEVEL_7 = PP_DAL_POWERLEVEL_6+1,
+};
+
+struct amd_pp_clock_info {
+	uint32_t min_engine_clock;
+	uint32_t max_engine_clock;
+	uint32_t min_memory_clock;
+	uint32_t max_memory_clock;
+	uint32_t min_bus_bandwidth;
+	uint32_t max_bus_bandwidth;
+	uint32_t max_engine_clock_in_sr;
+	uint32_t min_engine_clock_in_sr;
+	enum PP_DAL_POWERLEVEL max_clocks_state;
+};
+
+enum amd_pp_clock_type {
+	amd_pp_disp_clock = 1,
+	amd_pp_sys_clock,
+	amd_pp_mem_clock
+};
+
+#define MAX_NUM_CLOCKS 16
+
+struct amd_pp_clocks {
+	uint32_t count;
+	uint32_t clock[MAX_NUM_CLOCKS];
+};
+
+
 enum {
 	PP_GROUP_UNKNOWN = 0,
 	PP_GROUP_GFX = 1,
@@ -225,6 +270,17 @@ enum {
 	PP_GROUP_MAX
 };
 
+enum pp_clock_type {
+	PP_SCLK,
+	PP_MCLK,
+	PP_PCIE,
+};
+
+struct pp_states_info {
+	uint32_t nums;
+	uint32_t states[16];
+};
+
 #define PP_GROUP_MASK        0xF0000000
 #define PP_GROUP_SHIFT       28
 
@@ -278,6 +334,11 @@ struct amd_powerplay_funcs {
 	int (*get_fan_control_mode)(void *handle);
 	int (*set_fan_speed_percent)(void *handle, uint32_t percent);
 	int (*get_fan_speed_percent)(void *handle, uint32_t *speed);
+	int (*get_pp_num_states)(void *handle, struct pp_states_info *data);
+	int (*get_pp_table)(void *handle, char **table);
+	int (*set_pp_table)(void *handle, const char *buf, size_t size);
+	int (*force_clock_level)(void *handle, enum pp_clock_type type, int level);
+	int (*print_clock_levels)(void *handle, enum pp_clock_type type, char *buf);
 };
 
 struct amd_powerplay {
@@ -288,12 +349,23 @@ struct amd_powerplay {
 
 int amd_powerplay_init(struct amd_pp_init *pp_init,
 		       struct amd_powerplay *amd_pp);
+
 int amd_powerplay_fini(void *handle);
 
-int amd_powerplay_display_configuration_change(void *handle, const void *input);
+int amd_powerplay_display_configuration_change(void *handle,
+		const struct amd_pp_display_configuration *input);
 
 int amd_powerplay_get_display_power_level(void *handle,
-		struct amd_pp_dal_clock_info *output);
+		struct amd_pp_simple_clock_info *output);
+
+int amd_powerplay_get_current_clocks(void *handle,
+		struct amd_pp_clock_info *output);
+
+int amd_powerplay_get_clock_by_type(void *handle,
+		enum amd_pp_clock_type type,
+		struct amd_pp_clocks *clocks);
 
+int amd_powerplay_get_display_mode_validation_clocks(void *handle,
+		struct amd_pp_simple_clock_info *output);
 
 #endif /* _AMD_POWERPLAY_H_ */
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 91795efe1336..040d3f7cbf49 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -31,6 +31,7 @@ struct pp_power_state;
 enum amd_dpm_forced_level;
 struct PP_TemperatureRange;
 
+
 struct phm_fan_speed_info {
 	uint32_t min_percent;
 	uint32_t max_percent;
@@ -290,6 +291,15 @@ struct PP_Clocks {
 	uint32_t engineClockInSR;
 };
 
+struct pp_clock_info {
+	uint32_t min_mem_clk;
+	uint32_t max_mem_clk;
+	uint32_t min_eng_clk;
+	uint32_t max_eng_clk;
+	uint32_t min_bus_bandwidth;
+	uint32_t max_bus_bandwidth;
+};
+
 struct phm_platform_descriptor {
 	uint32_t platformCaps[PHM_MAX_NUM_CAPS_ULONG_ENTRIES];
 	uint32_t vbiosInterruptId;
@@ -323,24 +333,6 @@ struct phm_clocks {
 	uint32_t clock[MAX_NUM_CLOCKS];
 };
 
-enum PP_DAL_POWERLEVEL {
-	PP_DAL_POWERLEVEL_INVALID = 0,
-	PP_DAL_POWERLEVEL_ULTRALOW,
-	PP_DAL_POWERLEVEL_LOW,
-	PP_DAL_POWERLEVEL_NOMINAL,
-	PP_DAL_POWERLEVEL_PERFORMANCE,
-
-	PP_DAL_POWERLEVEL_0 = PP_DAL_POWERLEVEL_ULTRALOW,
-	PP_DAL_POWERLEVEL_1 = PP_DAL_POWERLEVEL_LOW,
-	PP_DAL_POWERLEVEL_2 = PP_DAL_POWERLEVEL_NOMINAL,
-	PP_DAL_POWERLEVEL_3 = PP_DAL_POWERLEVEL_PERFORMANCE,
-	PP_DAL_POWERLEVEL_4 = PP_DAL_POWERLEVEL_3+1,
-	PP_DAL_POWERLEVEL_5 = PP_DAL_POWERLEVEL_4+1,
-	PP_DAL_POWERLEVEL_6 = PP_DAL_POWERLEVEL_5+1,
-	PP_DAL_POWERLEVEL_7 = PP_DAL_POWERLEVEL_6+1,
-};
-
-
 extern int phm_enable_clock_power_gatings(struct pp_hwmgr *hwmgr);
 extern int phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool gate);
 extern int phm_powergate_vce(struct pp_hwmgr *hwmgr, bool gate);
@@ -375,11 +367,25 @@ extern int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
 		const struct amd_pp_display_configuration *display_config);
 
 extern int phm_get_dal_power_level(struct pp_hwmgr *hwmgr,
-		struct amd_pp_dal_clock_info*info);
+		struct amd_pp_simple_clock_info *info);
 
 extern int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr);
 
 extern int phm_power_down_asic(struct pp_hwmgr *hwmgr);
 
+extern int phm_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+				PHM_PerformanceLevelDesignation designation, uint32_t index,
+				PHM_PerformanceLevel *level);
+
+extern int phm_get_clock_info(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+			struct pp_clock_info *pclock_info,
+			PHM_PerformanceLevelDesignation designation);
+
+extern int phm_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *clock_info);
+
+extern int phm_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks);
+
+extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
+
 #endif /* _HARDWARE_MANAGER_H_ */
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index aeaa3dbba525..928f5a740cba 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -325,8 +325,18 @@ struct pp_hwmgr_func {
 				bool cc6_disable, bool pstate_disable,
 				bool pstate_switch_disable);
 	int (*get_dal_power_level)(struct pp_hwmgr *hwmgr,
-				   struct amd_pp_dal_clock_info *info);
+			struct amd_pp_simple_clock_info *info);
+	int (*get_performance_level)(struct pp_hwmgr *, const struct pp_hw_power_state *,
+			PHM_PerformanceLevelDesignation, uint32_t, PHM_PerformanceLevel *);
+	int (*get_current_shallow_sleep_clocks)(struct pp_hwmgr *hwmgr,
+				const struct pp_hw_power_state *state, struct pp_clock_info *clock_info);
+	int (*get_clock_by_type)(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks);
+	int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
 	int (*power_off_asic)(struct pp_hwmgr *hwmgr);
+	int (*get_pp_table)(struct pp_hwmgr *hwmgr, char **table);
+	int (*set_pp_table)(struct pp_hwmgr *hwmgr, const char *buf, size_t size);
+	int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, int level);
+	int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf);
 };
 
 struct pp_table_func {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_endian.h b/drivers/gpu/drm/amd/powerplay/inc/pp_endian.h
new file mode 100644
index 000000000000..f49d1963fe85
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_endian.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _PP_ENDIAN_H_
+#define _PP_ENDIAN_H_
+
+#define PP_HOST_TO_SMC_UL(X) cpu_to_be32(X)
+#define PP_SMC_TO_HOST_UL(X) be32_to_cpu(X)
+
+#define PP_HOST_TO_SMC_US(X) cpu_to_be16(X)
+#define PP_SMC_TO_HOST_US(X) be16_to_cpu(X)
+
+#define CONVERT_FROM_HOST_TO_SMC_UL(X) ((X) = PP_HOST_TO_SMC_UL(X))
+#define CONVERT_FROM_SMC_TO_HOST_UL(X) ((X) = PP_SMC_TO_HOST_UL(X))
+
+#define CONVERT_FROM_HOST_TO_SMC_US(X) ((X) = PP_HOST_TO_SMC_US(X))
+
+#endif /* _PP_ENDIAN_H_ */
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index 504f035d1843..fc9e3d1dd409 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -32,6 +32,27 @@ struct pp_instance;
 #define smu_lower_32_bits(n) ((uint32_t)(n))
 #define smu_upper_32_bits(n) ((uint32_t)(((n)>>16)>>16))
 
+enum AVFS_BTC_STATUS {
+	AVFS_BTC_BOOT = 0,
+	AVFS_BTC_BOOT_STARTEDSMU,
+	AVFS_LOAD_VIRUS,
+	AVFS_BTC_VIRUS_LOADED,
+	AVFS_BTC_VIRUS_FAIL,
+	AVFS_BTC_COMPLETED_PREVIOUSLY,
+	AVFS_BTC_ENABLEAVFS,
+	AVFS_BTC_STARTED,
+	AVFS_BTC_FAILED,
+	AVFS_BTC_RESTOREVFT_FAILED,
+	AVFS_BTC_SAVEVFT_FAILED,
+	AVFS_BTC_DPMTABLESETUP_FAILED,
+	AVFS_BTC_COMPLETED_UNSAVED,
+	AVFS_BTC_COMPLETED_SAVED,
+	AVFS_BTC_COMPLETED_RESTORED,
+	AVFS_BTC_DISABLED,
+	AVFS_BTC_NOTSUPPORTED,
+	AVFS_BTC_SMUMSG_ERROR
+};
+
 struct pp_smumgr_func {
 	int (*smu_init)(struct pp_smumgr *smumgr);
 	int (*smu_fini)(struct pp_smumgr *smumgr);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h
index 8cd22d9c9140..b4eb483215b1 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h
@@ -23,24 +23,6 @@
 #ifndef _FIJI_SMUMANAGER_H_
 #define _FIJI_SMUMANAGER_H_
 
-enum AVFS_BTC_STATUS {
-	AVFS_BTC_BOOT = 0,
-	AVFS_BTC_BOOT_STARTEDSMU,
-	AVFS_LOAD_VIRUS,
-	AVFS_BTC_VIRUS_LOADED,
-	AVFS_BTC_VIRUS_FAIL,
-	AVFS_BTC_STARTED,
-	AVFS_BTC_FAILED,
-	AVFS_BTC_RESTOREVFT_FAILED,
-	AVFS_BTC_SAVEVFT_FAILED,
-	AVFS_BTC_DPMTABLESETUP_FAILED,
-	AVFS_BTC_COMPLETED_UNSAVED,
-	AVFS_BTC_COMPLETED_SAVED,
-	AVFS_BTC_COMPLETED_RESTORED,
-	AVFS_BTC_DISABLED,
-	AVFS_BTC_NOTSUPPORTED,
-	AVFS_BTC_SMUMSG_ERROR
-};
 
 struct fiji_smu_avfs {
 	enum AVFS_BTC_STATUS AvfsBtcStatus;
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 8b2becd1aa07..a5ff9458d359 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -229,6 +229,14 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
 	amd_sched_wakeup(entity->sched);
 }
 
+static void amd_sched_entity_clear_dep(struct fence *f, struct fence_cb *cb)
+{
+	struct amd_sched_entity *entity =
+		container_of(cb, struct amd_sched_entity, cb);
+	entity->dependency = NULL;
+	fence_put(f);
+}
+
 static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 {
 	struct amd_gpu_scheduler *sched = entity->sched;
@@ -251,7 +259,7 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 		}
 
 		/* Wait for fence to be scheduled */
-		entity->cb.func = amd_sched_entity_wakeup;
+		entity->cb.func = amd_sched_entity_clear_dep;
 		list_add_tail(&entity->cb.node, &s_fence->scheduled_cb);
 		return true;
 	}
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index 87c78eecea64..dc115aea352b 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -84,12 +84,33 @@ static bool amd_sched_fence_enable_signaling(struct fence *f)
 	return true;
 }
 
-static void amd_sched_fence_release(struct fence *f)
+/**
+ * amd_sched_fence_free - free up the fence memory
+ *
+ * @rcu: RCU callback head
+ *
+ * Free up the fence memory after the RCU grace period.
+ */
+static void amd_sched_fence_free(struct rcu_head *rcu)
 {
+	struct fence *f = container_of(rcu, struct fence, rcu);
 	struct amd_sched_fence *fence = to_amd_sched_fence(f);
 	kmem_cache_free(sched_fence_slab, fence);
 }
 
+/**
+ * amd_sched_fence_release - callback that fence can be freed
+ *
+ * @fence: fence
+ *
+ * This function is called when the reference count becomes zero.
+ * It just RCU schedules freeing up the fence.
+ */
+static void amd_sched_fence_release(struct fence *f)
+{
+	call_rcu(&f->rcu, amd_sched_fence_free);
+}
+
 const struct fence_ops amd_sched_fence_ops = {
 	.get_driver_name = amd_sched_fence_get_driver_name,
 	.get_timeline_name = amd_sched_fence_get_timeline_name,
diff --git a/drivers/gpu/drm/arm/Kconfig b/drivers/gpu/drm/arm/Kconfig
new file mode 100644
index 000000000000..eaed454e043c
--- /dev/null
+++ b/drivers/gpu/drm/arm/Kconfig
@@ -0,0 +1,27 @@
+config DRM_ARM
+	bool
+	help
+	  Choose this option to select drivers for ARM's devices
+
+config DRM_HDLCD
+	tristate "ARM HDLCD"
+	depends on DRM && OF && (ARM || ARM64)
+	depends on COMMON_CLK
+	select DRM_ARM
+	select DRM_KMS_HELPER
+	select DRM_KMS_FB_HELPER
+	select DRM_KMS_CMA_HELPER
+	help
+	  Choose this option if you have an ARM High Definition Colour LCD
+	  controller.
+
+	  If M is selected the module will be called hdlcd.
+
+config DRM_HDLCD_SHOW_UNDERRUN
+	bool "Show underrun conditions"
+	depends on DRM_HDLCD
+	default n
+	help
+	  Enable this option to show in red colour the pixels that the
+	  HDLCD device did not fetch from framebuffer due to underrun
+	  conditions.
diff --git a/drivers/gpu/drm/arm/Makefile b/drivers/gpu/drm/arm/Makefile
new file mode 100644
index 000000000000..89dcb7bab93a
--- /dev/null
+++ b/drivers/gpu/drm/arm/Makefile
@@ -0,0 +1,2 @@
+hdlcd-y := hdlcd_drv.o hdlcd_crtc.o
+obj-$(CONFIG_DRM_HDLCD)	+= hdlcd.o
diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c
new file mode 100644
index 000000000000..fef1b04c2aab
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_crtc.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited
+ * Author: Liviu Dudau <Liviu.Dudau@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ *  Implementation of a CRTC class for the HDLCD driver.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_of.h>
+#include <drm/drm_plane_helper.h>
+#include <linux/clk.h>
+#include <linux/of_graph.h>
+#include <linux/platform_data/simplefb.h>
+#include <video/videomode.h>
+
+#include "hdlcd_drv.h"
+#include "hdlcd_regs.h"
+
+/*
+ * The HDLCD controller is a dumb RGB streamer that gets connected to
+ * a single HDMI transmitter or in the case of the ARM Models it gets
+ * emulated by the software that does the actual rendering.
+ *
+ */
+
+static const struct drm_crtc_funcs hdlcd_crtc_funcs = {
+	.destroy = drm_crtc_cleanup,
+	.set_config = drm_atomic_helper_set_config,
+	.page_flip = drm_atomic_helper_page_flip,
+	.reset = drm_atomic_helper_crtc_reset,
+	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+};
+
+static struct simplefb_format supported_formats[] = SIMPLEFB_FORMATS;
+
+/*
+ * Setup the HDLCD registers for decoding the pixels out of the framebuffer
+ */
+static int hdlcd_set_pxl_fmt(struct drm_crtc *crtc)
+{
+	unsigned int btpp;
+	struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc);
+	uint32_t pixel_format;
+	struct simplefb_format *format = NULL;
+	int i;
+
+	pixel_format = crtc->primary->state->fb->pixel_format;
+
+	for (i = 0; i < ARRAY_SIZE(supported_formats); i++) {
+		if (supported_formats[i].fourcc == pixel_format)
+			format = &supported_formats[i];
+	}
+
+	if (WARN_ON(!format))
+		return 0;
+
+	/* HDLCD uses 'bytes per pixel', zero means 1 byte */
+	btpp = (format->bits_per_pixel + 7) / 8;
+	hdlcd_write(hdlcd, HDLCD_REG_PIXEL_FORMAT, (btpp - 1) << 3);
+
+	/*
+	 * The format of the HDLCD_REG_<color>_SELECT register is:
+	 *   - bits[23:16] - default value for that color component
+	 *   - bits[11:8]  - number of bits to extract for each color component
+	 *   - bits[4:0]   - index of the lowest bit to extract
+	 *
+	 * The default color value is used when bits[11:8] are zero, when the
+	 * pixel is outside the visible frame area or when there is a
+	 * buffer underrun.
+	 */
+	hdlcd_write(hdlcd, HDLCD_REG_RED_SELECT, format->red.offset |
+#ifdef CONFIG_DRM_HDLCD_SHOW_UNDERRUN
+		    0x00ff0000 |	/* show underruns in red */
+#endif
+		    ((format->red.length & 0xf) << 8));
+	hdlcd_write(hdlcd, HDLCD_REG_GREEN_SELECT, format->green.offset |
+		    ((format->green.length & 0xf) << 8));
+	hdlcd_write(hdlcd, HDLCD_REG_BLUE_SELECT, format->blue.offset |
+		    ((format->blue.length & 0xf) << 8));
+
+	return 0;
+}
+
+static void hdlcd_crtc_mode_set_nofb(struct drm_crtc *crtc)
+{
+	struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc);
+	struct drm_display_mode *m = &crtc->state->adjusted_mode;
+	struct videomode vm;
+	unsigned int polarities, line_length, err;
+
+	vm.vfront_porch = m->crtc_vsync_start - m->crtc_vdisplay;
+	vm.vback_porch = m->crtc_vtotal - m->crtc_vsync_end;
+	vm.vsync_len = m->crtc_vsync_end - m->crtc_vsync_start;
+	vm.hfront_porch = m->crtc_hsync_start - m->crtc_hdisplay;
+	vm.hback_porch = m->crtc_htotal - m->crtc_hsync_end;
+	vm.hsync_len = m->crtc_hsync_end - m->crtc_hsync_start;
+
+	polarities = HDLCD_POLARITY_DATAEN | HDLCD_POLARITY_DATA;
+
+	if (m->flags & DRM_MODE_FLAG_PHSYNC)
+		polarities |= HDLCD_POLARITY_HSYNC;
+	if (m->flags & DRM_MODE_FLAG_PVSYNC)
+		polarities |= HDLCD_POLARITY_VSYNC;
+
+	line_length = crtc->primary->state->fb->pitches[0];
+
+	/* Allow max number of outstanding requests and largest burst size */
+	hdlcd_write(hdlcd, HDLCD_REG_BUS_OPTIONS,
+		    HDLCD_BUS_MAX_OUTSTAND | HDLCD_BUS_BURST_16);
+
+	hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_LENGTH, line_length);
+	hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_PITCH, line_length);
+	hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_COUNT, m->crtc_vdisplay - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_V_DATA, m->crtc_vdisplay - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_V_BACK_PORCH, vm.vback_porch - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_V_FRONT_PORCH, vm.vfront_porch - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_V_SYNC, vm.vsync_len - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_H_BACK_PORCH, vm.hback_porch - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_H_FRONT_PORCH, vm.hfront_porch - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_H_SYNC, vm.hsync_len - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_H_DATA, m->crtc_hdisplay - 1);
+	hdlcd_write(hdlcd, HDLCD_REG_POLARITIES, polarities);
+
+	err = hdlcd_set_pxl_fmt(crtc);
+	if (err)
+		return;
+
+	clk_set_rate(hdlcd->clk, m->crtc_clock * 1000);
+}
+
+static void hdlcd_crtc_enable(struct drm_crtc *crtc)
+{
+	struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc);
+
+	clk_prepare_enable(hdlcd->clk);
+	hdlcd_write(hdlcd, HDLCD_REG_COMMAND, 1);
+	drm_crtc_vblank_on(crtc);
+}
+
+static void hdlcd_crtc_disable(struct drm_crtc *crtc)
+{
+	struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc);
+
+	if (!crtc->primary->fb)
+		return;
+
+	clk_disable_unprepare(hdlcd->clk);
+	hdlcd_write(hdlcd, HDLCD_REG_COMMAND, 0);
+	drm_crtc_vblank_off(crtc);
+}
+
+static int hdlcd_crtc_atomic_check(struct drm_crtc *crtc,
+				   struct drm_crtc_state *state)
+{
+	struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc);
+	struct drm_display_mode *mode = &state->adjusted_mode;
+	long rate, clk_rate = mode->clock * 1000;
+
+	rate = clk_round_rate(hdlcd->clk, clk_rate);
+	if (rate != clk_rate) {
+		/* clock required by mode not supported by hardware */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void hdlcd_crtc_atomic_begin(struct drm_crtc *crtc,
+				    struct drm_crtc_state *state)
+{
+	struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc);
+	unsigned long flags;
+
+	if (crtc->state->event) {
+		struct drm_pending_vblank_event *event = crtc->state->event;
+
+		crtc->state->event = NULL;
+		event->pipe = drm_crtc_index(crtc);
+
+		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
+		spin_lock_irqsave(&crtc->dev->event_lock, flags);
+		list_add_tail(&event->base.link, &hdlcd->event_list);
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+	}
+}
+
+static void hdlcd_crtc_atomic_flush(struct drm_crtc *crtc,
+				    struct drm_crtc_state *state)
+{
+}
+
+static bool hdlcd_crtc_mode_fixup(struct drm_crtc *crtc,
+			const struct drm_display_mode *mode,
+			struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static const struct drm_crtc_helper_funcs hdlcd_crtc_helper_funcs = {
+	.mode_fixup	= hdlcd_crtc_mode_fixup,
+	.mode_set	= drm_helper_crtc_mode_set,
+	.mode_set_base	= drm_helper_crtc_mode_set_base,
+	.mode_set_nofb	= hdlcd_crtc_mode_set_nofb,
+	.enable		= hdlcd_crtc_enable,
+	.disable	= hdlcd_crtc_disable,
+	.prepare	= hdlcd_crtc_disable,
+	.commit		= hdlcd_crtc_enable,
+	.atomic_check	= hdlcd_crtc_atomic_check,
+	.atomic_begin	= hdlcd_crtc_atomic_begin,
+	.atomic_flush	= hdlcd_crtc_atomic_flush,
+};
+
+static int hdlcd_plane_atomic_check(struct drm_plane *plane,
+				    struct drm_plane_state *state)
+{
+	return 0;
+}
+
+static void hdlcd_plane_atomic_update(struct drm_plane *plane,
+				      struct drm_plane_state *state)
+{
+	struct hdlcd_drm_private *hdlcd;
+	struct drm_gem_cma_object *gem;
+	dma_addr_t scanout_start;
+
+	if (!plane->state->crtc || !plane->state->fb)
+		return;
+
+	hdlcd = crtc_to_hdlcd_priv(plane->state->crtc);
+	gem = drm_fb_cma_get_gem_obj(plane->state->fb, 0);
+	scanout_start = gem->paddr;
+	hdlcd_write(hdlcd, HDLCD_REG_FB_BASE, scanout_start);
+}
+
+static const struct drm_plane_helper_funcs hdlcd_plane_helper_funcs = {
+	.prepare_fb = NULL,
+	.cleanup_fb = NULL,
+	.atomic_check = hdlcd_plane_atomic_check,
+	.atomic_update = hdlcd_plane_atomic_update,
+};
+
+static void hdlcd_plane_destroy(struct drm_plane *plane)
+{
+	drm_plane_helper_disable(plane);
+	drm_plane_cleanup(plane);
+}
+
+static const struct drm_plane_funcs hdlcd_plane_funcs = {
+	.update_plane		= drm_atomic_helper_update_plane,
+	.disable_plane		= drm_atomic_helper_disable_plane,
+	.destroy		= hdlcd_plane_destroy,
+	.reset			= drm_atomic_helper_plane_reset,
+	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
+};
+
+static struct drm_plane *hdlcd_plane_init(struct drm_device *drm)
+{
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	struct drm_plane *plane = NULL;
+	u32 formats[ARRAY_SIZE(supported_formats)], i;
+	int ret;
+
+	plane = devm_kzalloc(drm->dev, sizeof(*plane), GFP_KERNEL);
+	if (!plane)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < ARRAY_SIZE(supported_formats); i++)
+		formats[i] = supported_formats[i].fourcc;
+
+	ret = drm_universal_plane_init(drm, plane, 0xff, &hdlcd_plane_funcs,
+				       formats, ARRAY_SIZE(formats),
+				       DRM_PLANE_TYPE_PRIMARY, NULL);
+	if (ret) {
+		devm_kfree(drm->dev, plane);
+		return ERR_PTR(ret);
+	}
+
+	drm_plane_helper_add(plane, &hdlcd_plane_helper_funcs);
+	hdlcd->plane = plane;
+
+	return plane;
+}
+
+void hdlcd_crtc_suspend(struct drm_crtc *crtc)
+{
+	hdlcd_crtc_disable(crtc);
+}
+
+void hdlcd_crtc_resume(struct drm_crtc *crtc)
+{
+	hdlcd_crtc_enable(crtc);
+}
+
+int hdlcd_setup_crtc(struct drm_device *drm)
+{
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	struct drm_plane *primary;
+	int ret;
+
+	primary = hdlcd_plane_init(drm);
+	if (IS_ERR(primary))
+		return PTR_ERR(primary);
+
+	ret = drm_crtc_init_with_planes(drm, &hdlcd->crtc, primary, NULL,
+					&hdlcd_crtc_funcs, NULL);
+	if (ret) {
+		hdlcd_plane_destroy(primary);
+		devm_kfree(drm->dev, primary);
+		return ret;
+	}
+
+	drm_crtc_helper_add(&hdlcd->crtc, &hdlcd_crtc_helper_funcs);
+	return 0;
+}
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
new file mode 100644
index 000000000000..56b829f97699
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited
+ * Author: Liviu Dudau <Liviu.Dudau@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ *  ARM HDLCD Driver
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/list.h>
+#include <linux/of_graph.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_of.h>
+
+#include "hdlcd_drv.h"
+#include "hdlcd_regs.h"
+
+static int hdlcd_load(struct drm_device *drm, unsigned long flags)
+{
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	struct platform_device *pdev = to_platform_device(drm->dev);
+	struct resource *res;
+	u32 version;
+	int ret;
+
+	hdlcd->clk = devm_clk_get(drm->dev, "pxlclk");
+	if (IS_ERR(hdlcd->clk))
+		return PTR_ERR(hdlcd->clk);
+
+#ifdef CONFIG_DEBUG_FS
+	atomic_set(&hdlcd->buffer_underrun_count, 0);
+	atomic_set(&hdlcd->bus_error_count, 0);
+	atomic_set(&hdlcd->vsync_count, 0);
+	atomic_set(&hdlcd->dma_end_count, 0);
+#endif
+
+	INIT_LIST_HEAD(&hdlcd->event_list);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	hdlcd->mmio = devm_ioremap_resource(drm->dev, res);
+	if (IS_ERR(hdlcd->mmio)) {
+		DRM_ERROR("failed to map control registers area\n");
+		ret = PTR_ERR(hdlcd->mmio);
+		hdlcd->mmio = NULL;
+		goto fail;
+	}
+
+	version = hdlcd_read(hdlcd, HDLCD_REG_VERSION);
+	if ((version & HDLCD_PRODUCT_MASK) != HDLCD_PRODUCT_ID) {
+		DRM_ERROR("unknown product id: 0x%x\n", version);
+		ret = -EINVAL;
+		goto fail;
+	}
+	DRM_INFO("found ARM HDLCD version r%dp%d\n",
+		(version & HDLCD_VERSION_MAJOR_MASK) >> 8,
+		version & HDLCD_VERSION_MINOR_MASK);
+
+	/* Get the optional framebuffer memory resource */
+	ret = of_reserved_mem_device_init(drm->dev);
+	if (ret && ret != -ENODEV)
+		goto fail;
+
+	ret = dma_set_mask_and_coherent(drm->dev, DMA_BIT_MASK(32));
+	if (ret)
+		goto setup_fail;
+
+	ret = hdlcd_setup_crtc(drm);
+	if (ret < 0) {
+		DRM_ERROR("failed to create crtc\n");
+		goto setup_fail;
+	}
+
+	pm_runtime_enable(drm->dev);
+
+	pm_runtime_get_sync(drm->dev);
+	ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
+	pm_runtime_put_sync(drm->dev);
+	if (ret < 0) {
+		DRM_ERROR("failed to install IRQ handler\n");
+		goto irq_fail;
+	}
+
+	return 0;
+
+irq_fail:
+	drm_crtc_cleanup(&hdlcd->crtc);
+setup_fail:
+	of_reserved_mem_device_release(drm->dev);
+fail:
+	devm_clk_put(drm->dev, hdlcd->clk);
+
+	return ret;
+}
+
+static void hdlcd_fb_output_poll_changed(struct drm_device *drm)
+{
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+
+	if (hdlcd->fbdev)
+		drm_fbdev_cma_hotplug_event(hdlcd->fbdev);
+}
+
+static int hdlcd_atomic_commit(struct drm_device *dev,
+			       struct drm_atomic_state *state, bool async)
+{
+	return drm_atomic_helper_commit(dev, state, false);
+}
+
+static const struct drm_mode_config_funcs hdlcd_mode_config_funcs = {
+	.fb_create = drm_fb_cma_create,
+	.output_poll_changed = hdlcd_fb_output_poll_changed,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = hdlcd_atomic_commit,
+};
+
+static void hdlcd_setup_mode_config(struct drm_device *drm)
+{
+	drm_mode_config_init(drm);
+	drm->mode_config.min_width = 0;
+	drm->mode_config.min_height = 0;
+	drm->mode_config.max_width = HDLCD_MAX_XRES;
+	drm->mode_config.max_height = HDLCD_MAX_YRES;
+	drm->mode_config.funcs = &hdlcd_mode_config_funcs;
+}
+
+static void hdlcd_lastclose(struct drm_device *drm)
+{
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+
+	drm_fbdev_cma_restore_mode(hdlcd->fbdev);
+}
+
+static irqreturn_t hdlcd_irq(int irq, void *arg)
+{
+	struct drm_device *drm = arg;
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	unsigned long irq_status;
+
+	irq_status = hdlcd_read(hdlcd, HDLCD_REG_INT_STATUS);
+
+#ifdef CONFIG_DEBUG_FS
+	if (irq_status & HDLCD_INTERRUPT_UNDERRUN)
+		atomic_inc(&hdlcd->buffer_underrun_count);
+
+	if (irq_status & HDLCD_INTERRUPT_DMA_END)
+		atomic_inc(&hdlcd->dma_end_count);
+
+	if (irq_status & HDLCD_INTERRUPT_BUS_ERROR)
+		atomic_inc(&hdlcd->bus_error_count);
+
+	if (irq_status & HDLCD_INTERRUPT_VSYNC)
+		atomic_inc(&hdlcd->vsync_count);
+
+#endif
+	if (irq_status & HDLCD_INTERRUPT_VSYNC) {
+		bool events_sent = false;
+		unsigned long flags;
+		struct drm_pending_vblank_event	*e, *t;
+
+		drm_crtc_handle_vblank(&hdlcd->crtc);
+
+		spin_lock_irqsave(&drm->event_lock, flags);
+		list_for_each_entry_safe(e, t, &hdlcd->event_list, base.link) {
+			list_del(&e->base.link);
+			drm_crtc_send_vblank_event(&hdlcd->crtc, e);
+			events_sent = true;
+		}
+		if (events_sent)
+			drm_crtc_vblank_put(&hdlcd->crtc);
+		spin_unlock_irqrestore(&drm->event_lock, flags);
+	}
+
+	/* acknowledge interrupt(s) */
+	hdlcd_write(hdlcd, HDLCD_REG_INT_CLEAR, irq_status);
+
+	return IRQ_HANDLED;
+}
+
+static void hdlcd_irq_preinstall(struct drm_device *drm)
+{
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	/* Ensure interrupts are disabled */
+	hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, 0);
+	hdlcd_write(hdlcd, HDLCD_REG_INT_CLEAR, ~0);
+}
+
+static int hdlcd_irq_postinstall(struct drm_device *drm)
+{
+#ifdef CONFIG_DEBUG_FS
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	unsigned long irq_mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK);
+
+	/* enable debug interrupts */
+	irq_mask |= HDLCD_DEBUG_INT_MASK;
+
+	hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, irq_mask);
+#endif
+	return 0;
+}
+
+static void hdlcd_irq_uninstall(struct drm_device *drm)
+{
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	/* disable all the interrupts that we might have enabled */
+	unsigned long irq_mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK);
+
+#ifdef CONFIG_DEBUG_FS
+	/* disable debug interrupts */
+	irq_mask &= ~HDLCD_DEBUG_INT_MASK;
+#endif
+
+	/* disable vsync interrupts */
+	irq_mask &= ~HDLCD_INTERRUPT_VSYNC;
+
+	hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, irq_mask);
+}
+
+static int hdlcd_enable_vblank(struct drm_device *drm, unsigned int crtc)
+{
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	unsigned int mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK);
+
+	hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, mask | HDLCD_INTERRUPT_VSYNC);
+
+	return 0;
+}
+
+static void hdlcd_disable_vblank(struct drm_device *drm, unsigned int crtc)
+{
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	unsigned int mask = hdlcd_read(hdlcd, HDLCD_REG_INT_MASK);
+
+	hdlcd_write(hdlcd, HDLCD_REG_INT_MASK, mask & ~HDLCD_INTERRUPT_VSYNC);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int hdlcd_show_underrun_count(struct seq_file *m, void *arg)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *drm = node->minor->dev;
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+
+	seq_printf(m, "underrun : %d\n", atomic_read(&hdlcd->buffer_underrun_count));
+	seq_printf(m, "dma_end  : %d\n", atomic_read(&hdlcd->dma_end_count));
+	seq_printf(m, "bus_error: %d\n", atomic_read(&hdlcd->bus_error_count));
+	seq_printf(m, "vsync    : %d\n", atomic_read(&hdlcd->vsync_count));
+	return 0;
+}
+
+static int hdlcd_show_pxlclock(struct seq_file *m, void *arg)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *drm = node->minor->dev;
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	unsigned long clkrate = clk_get_rate(hdlcd->clk);
+	unsigned long mode_clock = hdlcd->crtc.mode.crtc_clock * 1000;
+
+	seq_printf(m, "hw  : %lu\n", clkrate);
+	seq_printf(m, "mode: %lu\n", mode_clock);
+	return 0;
+}
+
+static struct drm_info_list hdlcd_debugfs_list[] = {
+	{ "interrupt_count", hdlcd_show_underrun_count, 0 },
+	{ "clocks", hdlcd_show_pxlclock, 0 },
+};
+
+static int hdlcd_debugfs_init(struct drm_minor *minor)
+{
+	return drm_debugfs_create_files(hdlcd_debugfs_list,
+		ARRAY_SIZE(hdlcd_debugfs_list),	minor->debugfs_root, minor);
+}
+
+static void hdlcd_debugfs_cleanup(struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(hdlcd_debugfs_list,
+		ARRAY_SIZE(hdlcd_debugfs_list), minor);
+}
+#endif
+
+static const struct file_operations fops = {
+	.owner		= THIS_MODULE,
+	.open		= drm_open,
+	.release	= drm_release,
+	.unlocked_ioctl	= drm_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= drm_compat_ioctl,
+#endif
+	.poll		= drm_poll,
+	.read		= drm_read,
+	.llseek		= noop_llseek,
+	.mmap		= drm_gem_cma_mmap,
+};
+
+static struct drm_driver hdlcd_driver = {
+	.driver_features = DRIVER_HAVE_IRQ | DRIVER_GEM |
+			   DRIVER_MODESET | DRIVER_PRIME |
+			   DRIVER_ATOMIC,
+	.lastclose = hdlcd_lastclose,
+	.irq_handler = hdlcd_irq,
+	.irq_preinstall = hdlcd_irq_preinstall,
+	.irq_postinstall = hdlcd_irq_postinstall,
+	.irq_uninstall = hdlcd_irq_uninstall,
+	.get_vblank_counter = drm_vblank_no_hw_counter,
+	.enable_vblank = hdlcd_enable_vblank,
+	.disable_vblank = hdlcd_disable_vblank,
+	.gem_free_object = drm_gem_cma_free_object,
+	.gem_vm_ops = &drm_gem_cma_vm_ops,
+	.dumb_create = drm_gem_cma_dumb_create,
+	.dumb_map_offset = drm_gem_cma_dumb_map_offset,
+	.dumb_destroy = drm_gem_dumb_destroy,
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_import = drm_gem_prime_import,
+	.gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
+	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+	.gem_prime_vmap = drm_gem_cma_prime_vmap,
+	.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
+	.gem_prime_mmap = drm_gem_cma_prime_mmap,
+#ifdef CONFIG_DEBUG_FS
+	.debugfs_init = hdlcd_debugfs_init,
+	.debugfs_cleanup = hdlcd_debugfs_cleanup,
+#endif
+	.fops = &fops,
+	.name = "hdlcd",
+	.desc = "ARM HDLCD Controller DRM",
+	.date = "20151021",
+	.major = 1,
+	.minor = 0,
+};
+
+static int hdlcd_drm_bind(struct device *dev)
+{
+	struct drm_device *drm;
+	struct hdlcd_drm_private *hdlcd;
+	int ret;
+
+	hdlcd = devm_kzalloc(dev, sizeof(*hdlcd), GFP_KERNEL);
+	if (!hdlcd)
+		return -ENOMEM;
+
+	drm = drm_dev_alloc(&hdlcd_driver, dev);
+	if (!drm)
+		return -ENOMEM;
+
+	drm->dev_private = hdlcd;
+	hdlcd_setup_mode_config(drm);
+	ret = hdlcd_load(drm, 0);
+	if (ret)
+		goto err_free;
+
+	ret = drm_dev_register(drm, 0);
+	if (ret)
+		goto err_unload;
+
+	dev_set_drvdata(dev, drm);
+
+	ret = component_bind_all(dev, drm);
+	if (ret) {
+		DRM_ERROR("Failed to bind all components\n");
+		goto err_unregister;
+	}
+
+	ret = drm_vblank_init(drm, drm->mode_config.num_crtc);
+	if (ret < 0) {
+		DRM_ERROR("failed to initialise vblank\n");
+		goto err_vblank;
+	}
+	drm->vblank_disable_allowed = true;
+
+	drm_mode_config_reset(drm);
+	drm_kms_helper_poll_init(drm);
+
+	hdlcd->fbdev = drm_fbdev_cma_init(drm, 32, drm->mode_config.num_crtc,
+					  drm->mode_config.num_connector);
+
+	if (IS_ERR(hdlcd->fbdev)) {
+		ret = PTR_ERR(hdlcd->fbdev);
+		hdlcd->fbdev = NULL;
+		goto err_fbdev;
+	}
+
+	return 0;
+
+err_fbdev:
+	drm_kms_helper_poll_fini(drm);
+	drm_mode_config_cleanup(drm);
+	drm_vblank_cleanup(drm);
+err_vblank:
+	component_unbind_all(dev, drm);
+err_unregister:
+	drm_dev_unregister(drm);
+err_unload:
+	pm_runtime_get_sync(drm->dev);
+	drm_irq_uninstall(drm);
+	pm_runtime_put_sync(drm->dev);
+	pm_runtime_disable(drm->dev);
+	of_reserved_mem_device_release(drm->dev);
+	devm_clk_put(dev, hdlcd->clk);
+err_free:
+	drm_dev_unref(drm);
+
+	return ret;
+}
+
+static void hdlcd_drm_unbind(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+
+	if (hdlcd->fbdev) {
+		drm_fbdev_cma_fini(hdlcd->fbdev);
+		hdlcd->fbdev = NULL;
+	}
+	drm_kms_helper_poll_fini(drm);
+	component_unbind_all(dev, drm);
+	drm_vblank_cleanup(drm);
+	pm_runtime_get_sync(drm->dev);
+	drm_irq_uninstall(drm);
+	pm_runtime_put_sync(drm->dev);
+	pm_runtime_disable(drm->dev);
+	of_reserved_mem_device_release(drm->dev);
+	if (!IS_ERR(hdlcd->clk)) {
+		devm_clk_put(drm->dev, hdlcd->clk);
+		hdlcd->clk = NULL;
+	}
+	drm_mode_config_cleanup(drm);
+	drm_dev_unregister(drm);
+	drm_dev_unref(drm);
+	drm->dev_private = NULL;
+	dev_set_drvdata(dev, NULL);
+}
+
+static const struct component_master_ops hdlcd_master_ops = {
+	.bind		= hdlcd_drm_bind,
+	.unbind		= hdlcd_drm_unbind,
+};
+
+static int compare_dev(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static int hdlcd_probe(struct platform_device *pdev)
+{
+	struct device_node *port, *ep;
+	struct component_match *match = NULL;
+
+	if (!pdev->dev.of_node)
+		return -ENODEV;
+
+	/* there is only one output port inside each device, find it */
+	ep = of_graph_get_next_endpoint(pdev->dev.of_node, NULL);
+	if (!ep)
+		return -ENODEV;
+
+	if (!of_device_is_available(ep)) {
+		of_node_put(ep);
+		return -ENODEV;
+	}
+
+	/* add the remote encoder port as component */
+	port = of_graph_get_remote_port_parent(ep);
+	of_node_put(ep);
+	if (!port || !of_device_is_available(port)) {
+		of_node_put(port);
+		return -EAGAIN;
+	}
+
+	component_match_add(&pdev->dev, &match, compare_dev, port);
+
+	return component_master_add_with_match(&pdev->dev, &hdlcd_master_ops,
+					       match);
+}
+
+static int hdlcd_remove(struct platform_device *pdev)
+{
+	component_master_del(&pdev->dev, &hdlcd_master_ops);
+	return 0;
+}
+
+static const struct of_device_id  hdlcd_of_match[] = {
+	{ .compatible	= "arm,hdlcd" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, hdlcd_of_match);
+
+static int __maybe_unused hdlcd_pm_suspend(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct drm_crtc *crtc;
+
+	if (pm_runtime_suspended(dev))
+		return 0;
+
+	drm_modeset_lock_all(drm);
+	list_for_each_entry(crtc, &drm->mode_config.crtc_list, head)
+		hdlcd_crtc_suspend(crtc);
+	drm_modeset_unlock_all(drm);
+	return 0;
+}
+
+static int __maybe_unused hdlcd_pm_resume(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct drm_crtc *crtc;
+
+	if (!pm_runtime_suspended(dev))
+		return 0;
+
+	drm_modeset_lock_all(drm);
+	list_for_each_entry(crtc, &drm->mode_config.crtc_list, head)
+		hdlcd_crtc_resume(crtc);
+	drm_modeset_unlock_all(drm);
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(hdlcd_pm_ops, hdlcd_pm_suspend, hdlcd_pm_resume);
+
+static struct platform_driver hdlcd_platform_driver = {
+	.probe		= hdlcd_probe,
+	.remove		= hdlcd_remove,
+	.driver	= {
+		.name = "hdlcd",
+		.pm = &hdlcd_pm_ops,
+		.of_match_table	= hdlcd_of_match,
+	},
+};
+
+module_platform_driver(hdlcd_platform_driver);
+
+MODULE_AUTHOR("Liviu Dudau");
+MODULE_DESCRIPTION("ARM HDLCD DRM driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.h b/drivers/gpu/drm/arm/hdlcd_drv.h
new file mode 100644
index 000000000000..aa234784f053
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_drv.h
@@ -0,0 +1,42 @@
+/*
+ *  ARM HDLCD Controller register definition
+ */
+
+#ifndef __HDLCD_DRV_H__
+#define __HDLCD_DRV_H__
+
+struct hdlcd_drm_private {
+	void __iomem			*mmio;
+	struct clk			*clk;
+	struct drm_fbdev_cma		*fbdev;
+	struct drm_framebuffer		*fb;
+	struct list_head		event_list;
+	struct drm_crtc			crtc;
+	struct drm_plane		*plane;
+#ifdef CONFIG_DEBUG_FS
+	atomic_t buffer_underrun_count;
+	atomic_t bus_error_count;
+	atomic_t vsync_count;
+	atomic_t dma_end_count;
+#endif
+};
+
+#define crtc_to_hdlcd_priv(x)	container_of(x, struct hdlcd_drm_private, crtc)
+
+static inline void hdlcd_write(struct hdlcd_drm_private *hdlcd,
+			       unsigned int reg, u32 value)
+{
+	writel(value, hdlcd->mmio + reg);
+}
+
+static inline u32 hdlcd_read(struct hdlcd_drm_private *hdlcd, unsigned int reg)
+{
+	return readl(hdlcd->mmio + reg);
+}
+
+int hdlcd_setup_crtc(struct drm_device *dev);
+void hdlcd_set_scanout(struct hdlcd_drm_private *hdlcd);
+void hdlcd_crtc_suspend(struct drm_crtc *crtc);
+void hdlcd_crtc_resume(struct drm_crtc *crtc);
+
+#endif /* __HDLCD_DRV_H__ */
diff --git a/drivers/gpu/drm/arm/hdlcd_regs.h b/drivers/gpu/drm/arm/hdlcd_regs.h
new file mode 100644
index 000000000000..66799ebef6d3
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_regs.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013,2014 ARM Limited
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ *  ARM HDLCD Controller register definition
+ */
+
+#ifndef __HDLCD_REGS_H__
+#define __HDLCD_REGS_H__
+
+/* register offsets */
+#define HDLCD_REG_VERSION		0x0000	/* ro */
+#define HDLCD_REG_INT_RAWSTAT		0x0010	/* rw */
+#define HDLCD_REG_INT_CLEAR		0x0014	/* wo */
+#define HDLCD_REG_INT_MASK		0x0018	/* rw */
+#define HDLCD_REG_INT_STATUS		0x001c	/* ro */
+#define HDLCD_REG_FB_BASE		0x0100	/* rw */
+#define HDLCD_REG_FB_LINE_LENGTH	0x0104	/* rw */
+#define HDLCD_REG_FB_LINE_COUNT		0x0108	/* rw */
+#define HDLCD_REG_FB_LINE_PITCH		0x010c	/* rw */
+#define HDLCD_REG_BUS_OPTIONS		0x0110	/* rw */
+#define HDLCD_REG_V_SYNC		0x0200	/* rw */
+#define HDLCD_REG_V_BACK_PORCH		0x0204	/* rw */
+#define HDLCD_REG_V_DATA		0x0208	/* rw */
+#define HDLCD_REG_V_FRONT_PORCH		0x020c	/* rw */
+#define HDLCD_REG_H_SYNC		0x0210	/* rw */
+#define HDLCD_REG_H_BACK_PORCH		0x0214	/* rw */
+#define HDLCD_REG_H_DATA		0x0218	/* rw */
+#define HDLCD_REG_H_FRONT_PORCH		0x021c	/* rw */
+#define HDLCD_REG_POLARITIES		0x0220	/* rw */
+#define HDLCD_REG_COMMAND		0x0230	/* rw */
+#define HDLCD_REG_PIXEL_FORMAT		0x0240	/* rw */
+#define HDLCD_REG_RED_SELECT		0x0244	/* rw */
+#define HDLCD_REG_GREEN_SELECT		0x0248	/* rw */
+#define HDLCD_REG_BLUE_SELECT		0x024c	/* rw */
+
+/* version */
+#define HDLCD_PRODUCT_ID		0x1CDC0000
+#define HDLCD_PRODUCT_MASK		0xFFFF0000
+#define HDLCD_VERSION_MAJOR_MASK	0x0000FF00
+#define HDLCD_VERSION_MINOR_MASK	0x000000FF
+
+/* interrupts */
+#define HDLCD_INTERRUPT_DMA_END		(1 << 0)
+#define HDLCD_INTERRUPT_BUS_ERROR	(1 << 1)
+#define HDLCD_INTERRUPT_VSYNC		(1 << 2)
+#define HDLCD_INTERRUPT_UNDERRUN	(1 << 3)
+#define HDLCD_DEBUG_INT_MASK		(HDLCD_INTERRUPT_DMA_END |  \
+					HDLCD_INTERRUPT_BUS_ERROR | \
+					HDLCD_INTERRUPT_UNDERRUN)
+
+/* polarities */
+#define HDLCD_POLARITY_VSYNC		(1 << 0)
+#define HDLCD_POLARITY_HSYNC		(1 << 1)
+#define HDLCD_POLARITY_DATAEN		(1 << 2)
+#define HDLCD_POLARITY_DATA		(1 << 3)
+#define HDLCD_POLARITY_PIXELCLK		(1 << 4)
+
+/* commands */
+#define HDLCD_COMMAND_DISABLE		(0 << 0)
+#define HDLCD_COMMAND_ENABLE		(1 << 0)
+
+/* pixel format */
+#define HDLCD_PIXEL_FMT_LITTLE_ENDIAN	(0 << 31)
+#define HDLCD_PIXEL_FMT_BIG_ENDIAN	(1 << 31)
+#define HDLCD_BYTES_PER_PIXEL_MASK	(3 << 3)
+
+/* bus options */
+#define HDLCD_BUS_BURST_MASK		0x01f
+#define HDLCD_BUS_MAX_OUTSTAND		0xf00
+#define HDLCD_BUS_BURST_NONE		(0 << 0)
+#define HDLCD_BUS_BURST_1		(1 << 0)
+#define HDLCD_BUS_BURST_2		(1 << 1)
+#define HDLCD_BUS_BURST_4		(1 << 2)
+#define HDLCD_BUS_BURST_8		(1 << 3)
+#define HDLCD_BUS_BURST_16		(1 << 4)
+
+/* Max resolution supported is 4096x4096, 32bpp */
+#define HDLCD_MAX_XRES			4096
+#define HDLCD_MAX_YRES			4096
+
+#define NR_PALETTE			256
+
+#endif /* __HDLCD_REGS_H__ */
diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c
index 3bd7e1cde99e..82043c204b76 100644
--- a/drivers/gpu/drm/armada/armada_drv.c
+++ b/drivers/gpu/drm/armada/armada_drv.c
@@ -188,9 +188,6 @@ static const struct file_operations armada_drm_fops = {
 
 static struct drm_driver armada_drm_driver = {
 	.load			= armada_drm_load,
-	.open			= NULL,
-	.preclose		= NULL,
-	.postclose		= NULL,
 	.lastclose		= armada_drm_lastclose,
 	.unload			= armada_drm_unload,
 	.set_busid		= drm_platform_set_busid,
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 0123458cbd83..a965e7e8ad6e 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -497,13 +497,6 @@ static void ast_crtc_dpms(struct drm_crtc *crtc, int mode)
 	}
 }
 
-static bool ast_crtc_mode_fixup(struct drm_crtc *crtc,
-				const struct drm_display_mode *mode,
-				struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 /* ast is different - we will force move buffers out of VRAM */
 static int ast_crtc_do_set_base(struct drm_crtc *crtc,
 				struct drm_framebuffer *fb,
@@ -617,7 +610,6 @@ static void ast_crtc_commit(struct drm_crtc *crtc)
 
 static const struct drm_crtc_helper_funcs ast_crtc_helper_funcs = {
 	.dpms = ast_crtc_dpms,
-	.mode_fixup = ast_crtc_mode_fixup,
 	.mode_set = ast_crtc_mode_set,
 	.mode_set_base = ast_crtc_mode_set_base,
 	.disable = ast_crtc_disable,
@@ -710,13 +702,6 @@ static void ast_encoder_dpms(struct drm_encoder *encoder, int mode)
 
 }
 
-static bool ast_mode_fixup(struct drm_encoder *encoder,
-			   const struct drm_display_mode *mode,
-			   struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void ast_encoder_mode_set(struct drm_encoder *encoder,
 			       struct drm_display_mode *mode,
 			       struct drm_display_mode *adjusted_mode)
@@ -736,7 +721,6 @@ static void ast_encoder_commit(struct drm_encoder *encoder)
 
 static const struct drm_encoder_helper_funcs ast_enc_helper_funcs = {
 	.dpms = ast_encoder_dpms,
-	.mode_fixup = ast_mode_fixup,
 	.prepare = ast_encoder_prepare,
 	.commit = ast_encoder_commit,
 	.mode_set = ast_encoder_mode_set,
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
index 468a14f266a7..58c4f785cf84 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
@@ -121,13 +121,6 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
 			   cfg);
 }
 
-static bool atmel_hlcdc_crtc_mode_fixup(struct drm_crtc *crtc,
-					const struct drm_display_mode *mode,
-					struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void atmel_hlcdc_crtc_disable(struct drm_crtc *c)
 {
 	struct drm_device *dev = c->dev;
@@ -261,7 +254,6 @@ static void atmel_hlcdc_crtc_atomic_flush(struct drm_crtc *crtc,
 }
 
 static const struct drm_crtc_helper_funcs lcdc_crtc_helper_funcs = {
-	.mode_fixup = atmel_hlcdc_crtc_mode_fixup,
 	.mode_set = drm_helper_crtc_mode_set,
 	.mode_set_nofb = atmel_hlcdc_crtc_mode_set_nofb,
 	.mode_set_base = drm_helper_crtc_mode_set_base,
@@ -280,24 +272,6 @@ static void atmel_hlcdc_crtc_destroy(struct drm_crtc *c)
 	kfree(crtc);
 }
 
-void atmel_hlcdc_crtc_cancel_page_flip(struct drm_crtc *c,
-				       struct drm_file *file)
-{
-	struct atmel_hlcdc_crtc *crtc = drm_crtc_to_atmel_hlcdc_crtc(c);
-	struct drm_pending_vblank_event *event;
-	struct drm_device *dev = c->dev;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->event_lock, flags);
-	event = crtc->event;
-	if (event && event->base.file_priv == file) {
-		event->base.destroy(&event->base);
-		drm_vblank_put(dev, crtc->id);
-		crtc->event = NULL;
-	}
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-}
-
 static void atmel_hlcdc_crtc_finish_page_flip(struct atmel_hlcdc_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -367,4 +341,3 @@ fail:
 	atmel_hlcdc_crtc_destroy(&crtc->base);
 	return ret;
 }
-
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index a45b32ba029e..3d8d16402d07 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -619,15 +619,6 @@ static void atmel_hlcdc_dc_connector_unplug_all(struct drm_device *dev)
 	mutex_unlock(&dev->mode_config.mutex);
 }
 
-static void atmel_hlcdc_dc_preclose(struct drm_device *dev,
-				    struct drm_file *file)
-{
-	struct drm_crtc *crtc;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		atmel_hlcdc_crtc_cancel_page_flip(crtc, file);
-}
-
 static void atmel_hlcdc_dc_lastclose(struct drm_device *dev)
 {
 	struct atmel_hlcdc_dc *dc = dev->dev_private;
@@ -698,7 +689,6 @@ static struct drm_driver atmel_hlcdc_dc_driver = {
 	.driver_features = DRIVER_HAVE_IRQ | DRIVER_GEM |
 			   DRIVER_MODESET | DRIVER_PRIME |
 			   DRIVER_ATOMIC,
-	.preclose = atmel_hlcdc_dc_preclose,
 	.lastclose = atmel_hlcdc_dc_lastclose,
 	.irq_handler = atmel_hlcdc_dc_irq_handler,
 	.irq_preinstall = atmel_hlcdc_dc_irq_uninstall,
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
index cf6b375bc38d..fed517f297da 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
@@ -152,9 +152,6 @@ int atmel_hlcdc_plane_prepare_disc_area(struct drm_crtc_state *c_state);
 
 void atmel_hlcdc_crtc_irq(struct drm_crtc *c);
 
-void atmel_hlcdc_crtc_cancel_page_flip(struct drm_crtc *crtc,
-				       struct drm_file *file);
-
 void atmel_hlcdc_crtc_suspend(struct drm_crtc *crtc);
 void atmel_hlcdc_crtc_resume(struct drm_crtc *crtc);
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index 1ffe9c329c46..d65dcaee3832 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -558,7 +558,7 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 	if (!state->base.crtc || !fb)
 		return 0;
 
-	crtc_state = s->state->crtc_states[drm_crtc_index(s->crtc)];
+	crtc_state = drm_atomic_get_existing_crtc_state(s->state, s->crtc);
 	mode = &crtc_state->adjusted_mode;
 
 	state->src_x = s->src_x;
diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c
index 2849f1b95eec..96926f09e0c9 100644
--- a/drivers/gpu/drm/bochs/bochs_kms.c
+++ b/drivers/gpu/drm/bochs/bochs_kms.c
@@ -30,13 +30,6 @@ static void bochs_crtc_dpms(struct drm_crtc *crtc, int mode)
 	}
 }
 
-static bool bochs_crtc_mode_fixup(struct drm_crtc *crtc,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static int bochs_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 				    struct drm_framebuffer *old_fb)
 {
@@ -135,7 +128,6 @@ static const struct drm_crtc_funcs bochs_crtc_funcs = {
 
 static const struct drm_crtc_helper_funcs bochs_helper_funcs = {
 	.dpms = bochs_crtc_dpms,
-	.mode_fixup = bochs_crtc_mode_fixup,
 	.mode_set = bochs_crtc_mode_set,
 	.mode_set_base = bochs_crtc_mode_set_base,
 	.prepare = bochs_crtc_prepare,
@@ -152,13 +144,6 @@ static void bochs_crtc_init(struct drm_device *dev)
 	drm_crtc_helper_add(crtc, &bochs_helper_funcs);
 }
 
-static bool bochs_encoder_mode_fixup(struct drm_encoder *encoder,
-				     const struct drm_display_mode *mode,
-				     struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void bochs_encoder_mode_set(struct drm_encoder *encoder,
 				   struct drm_display_mode *mode,
 				   struct drm_display_mode *adjusted_mode)
@@ -179,7 +164,6 @@ static void bochs_encoder_commit(struct drm_encoder *encoder)
 
 static const struct drm_encoder_helper_funcs bochs_encoder_helper_funcs = {
 	.dpms = bochs_encoder_dpms,
-	.mode_fixup = bochs_encoder_mode_fixup,
 	.mode_set = bochs_encoder_mode_set,
 	.prepare = bochs_encoder_prepare,
 	.commit = bochs_encoder_commit,
diff --git a/drivers/gpu/drm/bridge/dw-hdmi.c b/drivers/gpu/drm/bridge/dw-hdmi.c
index b0aac4733020..9795b72472ba 100644
--- a/drivers/gpu/drm/bridge/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/dw-hdmi.c
@@ -1391,13 +1391,6 @@ static void dw_hdmi_bridge_mode_set(struct drm_bridge *bridge,
 	mutex_unlock(&hdmi->mutex);
 }
 
-static bool dw_hdmi_bridge_mode_fixup(struct drm_bridge *bridge,
-				      const struct drm_display_mode *mode,
-				      struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void dw_hdmi_bridge_disable(struct drm_bridge *bridge)
 {
 	struct dw_hdmi *hdmi = bridge->driver_private;
@@ -1546,7 +1539,6 @@ static const struct drm_bridge_funcs dw_hdmi_bridge_funcs = {
 	.pre_enable = dw_hdmi_bridge_nop,
 	.post_disable = dw_hdmi_bridge_nop,
 	.mode_set = dw_hdmi_bridge_mode_set,
-	.mode_fixup = dw_hdmi_bridge_mode_fixup,
 };
 
 static irqreturn_t dw_hdmi_hardirq(int irq, void *dev_id)
diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index 4a02854a6963..d3d8d7bfcc57 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c
@@ -91,18 +91,6 @@ static void cirrus_crtc_dpms(struct drm_crtc *crtc, int mode)
 	WREG_GFX(0xe, gr0e);
 }
 
-/*
- * The core passes the desired mode to the CRTC code to see whether any
- * CRTC-specific modifications need to be made to it. We're in a position
- * to just pass that straight through, so this does nothing
- */
-static bool cirrus_crtc_mode_fixup(struct drm_crtc *crtc,
-				   const struct drm_display_mode *mode,
-				   struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void cirrus_set_start_address(struct drm_crtc *crtc, unsigned offset)
 {
 	struct cirrus_device *cdev = crtc->dev->dev_private;
@@ -372,7 +360,6 @@ static const struct drm_crtc_funcs cirrus_crtc_funcs = {
 
 static const struct drm_crtc_helper_funcs cirrus_helper_funcs = {
 	.dpms = cirrus_crtc_dpms,
-	.mode_fixup = cirrus_crtc_mode_fixup,
 	.mode_set = cirrus_crtc_mode_set,
 	.mode_set_base = cirrus_crtc_mode_set_base,
 	.prepare = cirrus_crtc_prepare,
@@ -430,14 +417,6 @@ void cirrus_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
 	*blue = cirrus_crtc->lut_b[regno];
 }
 
-
-static bool cirrus_encoder_mode_fixup(struct drm_encoder *encoder,
-				      const struct drm_display_mode *mode,
-				      struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void cirrus_encoder_mode_set(struct drm_encoder *encoder,
 				struct drm_display_mode *mode,
 				struct drm_display_mode *adjusted_mode)
@@ -466,7 +445,6 @@ static void cirrus_encoder_destroy(struct drm_encoder *encoder)
 
 static const struct drm_encoder_helper_funcs cirrus_encoder_helper_funcs = {
 	.dpms = cirrus_encoder_dpms,
-	.mode_fixup = cirrus_encoder_mode_fixup,
 	.mode_set = cirrus_encoder_mode_set,
 	.prepare = cirrus_encoder_prepare,
 	.commit = cirrus_encoder_commit,
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 9a7b44616b55..8ee1db866e80 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -28,6 +28,7 @@
 
 #include <drm/drmP.h>
 #include <drm/drm_atomic.h>
+#include <drm/drm_mode.h>
 #include <drm/drm_plane_helper.h>
 
 /**
@@ -376,6 +377,58 @@ int drm_atomic_set_mode_prop_for_crtc(struct drm_crtc_state *state,
 EXPORT_SYMBOL(drm_atomic_set_mode_prop_for_crtc);
 
 /**
+ * drm_atomic_replace_property_blob - replace a blob property
+ * @blob: a pointer to the member blob to be replaced
+ * @new_blob: the new blob to replace with
+ * @replaced: whether the blob has been replaced
+ *
+ * RETURNS:
+ * Zero on success, error code on failure
+ */
+static void
+drm_atomic_replace_property_blob(struct drm_property_blob **blob,
+				 struct drm_property_blob *new_blob,
+				 bool *replaced)
+{
+	struct drm_property_blob *old_blob = *blob;
+
+	if (old_blob == new_blob)
+		return;
+
+	if (old_blob)
+		drm_property_unreference_blob(old_blob);
+	if (new_blob)
+		drm_property_reference_blob(new_blob);
+	*blob = new_blob;
+	*replaced = true;
+
+	return;
+}
+
+static int
+drm_atomic_replace_property_blob_from_id(struct drm_crtc *crtc,
+					 struct drm_property_blob **blob,
+					 uint64_t blob_id,
+					 ssize_t expected_size,
+					 bool *replaced)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_property_blob *new_blob = NULL;
+
+	if (blob_id != 0) {
+		new_blob = drm_property_lookup_blob(dev, blob_id);
+		if (new_blob == NULL)
+			return -EINVAL;
+		if (expected_size > 0 && expected_size != new_blob->length)
+			return -EINVAL;
+	}
+
+	drm_atomic_replace_property_blob(blob, new_blob, replaced);
+
+	return 0;
+}
+
+/**
  * drm_atomic_crtc_set_property - set property on CRTC
  * @crtc: the drm CRTC to set a property on
  * @state: the state object to update with the new property value
@@ -397,6 +450,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_mode_config *config = &dev->mode_config;
+	bool replaced = false;
 	int ret;
 
 	if (property == config->prop_active)
@@ -407,8 +461,31 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		ret = drm_atomic_set_mode_prop_for_crtc(state, mode);
 		drm_property_unreference_blob(mode);
 		return ret;
-	}
-	else if (crtc->funcs->atomic_set_property)
+	} else if (property == config->degamma_lut_property) {
+		ret = drm_atomic_replace_property_blob_from_id(crtc,
+					&state->degamma_lut,
+					val,
+					-1,
+					&replaced);
+		state->color_mgmt_changed = replaced;
+		return ret;
+	} else if (property == config->ctm_property) {
+		ret = drm_atomic_replace_property_blob_from_id(crtc,
+					&state->ctm,
+					val,
+					sizeof(struct drm_color_ctm),
+					&replaced);
+		state->color_mgmt_changed = replaced;
+		return ret;
+	} else if (property == config->gamma_lut_property) {
+		ret = drm_atomic_replace_property_blob_from_id(crtc,
+					&state->gamma_lut,
+					val,
+					-1,
+					&replaced);
+		state->color_mgmt_changed = replaced;
+		return ret;
+	} else if (crtc->funcs->atomic_set_property)
 		return crtc->funcs->atomic_set_property(crtc, state, property, val);
 	else
 		return -EINVAL;
@@ -444,6 +521,12 @@ drm_atomic_crtc_get_property(struct drm_crtc *crtc,
 		*val = state->active;
 	else if (property == config->prop_mode_id)
 		*val = (state->mode_blob) ? state->mode_blob->base.id : 0;
+	else if (property == config->degamma_lut_property)
+		*val = (state->degamma_lut) ? state->degamma_lut->base.id : 0;
+	else if (property == config->ctm_property)
+		*val = (state->ctm) ? state->ctm->base.id : 0;
+	else if (property == config->gamma_lut_property)
+		*val = (state->gamma_lut) ? state->gamma_lut->base.id : 0;
 	else if (crtc->funcs->atomic_get_property)
 		return crtc->funcs->atomic_get_property(crtc, state, property, val);
 	else
@@ -1343,44 +1426,23 @@ static struct drm_pending_vblank_event *create_vblank_event(
 		struct drm_device *dev, struct drm_file *file_priv, uint64_t user_data)
 {
 	struct drm_pending_vblank_event *e = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->event_lock, flags);
-	if (file_priv->event_space < sizeof e->event) {
-		spin_unlock_irqrestore(&dev->event_lock, flags);
-		goto out;
-	}
-	file_priv->event_space -= sizeof e->event;
-	spin_unlock_irqrestore(&dev->event_lock, flags);
+	int ret;
 
 	e = kzalloc(sizeof *e, GFP_KERNEL);
-	if (e == NULL) {
-		spin_lock_irqsave(&dev->event_lock, flags);
-		file_priv->event_space += sizeof e->event;
-		spin_unlock_irqrestore(&dev->event_lock, flags);
-		goto out;
-	}
+	if (!e)
+		return NULL;
 
 	e->event.base.type = DRM_EVENT_FLIP_COMPLETE;
-	e->event.base.length = sizeof e->event;
+	e->event.base.length = sizeof(e->event);
 	e->event.user_data = user_data;
-	e->base.event = &e->event.base;
-	e->base.file_priv = file_priv;
-	e->base.destroy = (void (*) (struct drm_pending_event *)) kfree;
-
-out:
-	return e;
-}
 
-static void destroy_vblank_event(struct drm_device *dev,
-		struct drm_file *file_priv, struct drm_pending_vblank_event *e)
-{
-	unsigned long flags;
+	ret = drm_event_reserve_init(dev, file_priv, &e->base, &e->event.base);
+	if (ret) {
+		kfree(e);
+		return NULL;
+	}
 
-	spin_lock_irqsave(&dev->event_lock, flags);
-	file_priv->event_space += sizeof e->event;
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-	kfree(e);
+	return e;
 }
 
 static int atomic_set_prop(struct drm_atomic_state *state,
@@ -1642,8 +1704,7 @@ out:
 			if (!crtc_state->event)
 				continue;
 
-			destroy_vblank_event(dev, file_priv,
-					     crtc_state->event);
+			drm_event_cancel_free(dev, &crtc_state->event->base);
 		}
 	}
 
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 4f2d3e161593..4befe25c81c7 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -67,7 +67,8 @@ drm_atomic_helper_plane_changed(struct drm_atomic_state *state,
 	struct drm_crtc_state *crtc_state;
 
 	if (plane->state->crtc) {
-		crtc_state = state->crtc_states[drm_crtc_index(plane->state->crtc)];
+		crtc_state = drm_atomic_get_existing_crtc_state(state,
+								plane->state->crtc);
 
 		if (WARN_ON(!crtc_state))
 			return;
@@ -76,8 +77,8 @@ drm_atomic_helper_plane_changed(struct drm_atomic_state *state,
 	}
 
 	if (plane_state->crtc) {
-		crtc_state =
-			state->crtc_states[drm_crtc_index(plane_state->crtc)];
+		crtc_state = drm_atomic_get_existing_crtc_state(state,
+								plane_state->crtc);
 
 		if (WARN_ON(!crtc_state))
 			return;
@@ -86,110 +87,185 @@ drm_atomic_helper_plane_changed(struct drm_atomic_state *state,
 	}
 }
 
-static bool
-check_pending_encoder_assignment(struct drm_atomic_state *state,
-				 struct drm_encoder *new_encoder)
+static int handle_conflicting_encoders(struct drm_atomic_state *state,
+				       bool disable_conflicting_encoders)
 {
-	struct drm_connector *connector;
 	struct drm_connector_state *conn_state;
-	int i;
+	struct drm_connector *connector;
+	struct drm_encoder *encoder;
+	unsigned encoder_mask = 0;
+	int i, ret;
 
+	/*
+	 * First loop, find all newly assigned encoders from the connectors
+	 * part of the state. If the same encoder is assigned to multiple
+	 * connectors bail out.
+	 */
 	for_each_connector_in_state(state, connector, conn_state, i) {
-		if (conn_state->best_encoder != new_encoder)
+		const struct drm_connector_helper_funcs *funcs = connector->helper_private;
+		struct drm_encoder *new_encoder;
+
+		if (!conn_state->crtc)
+			continue;
+
+		if (funcs->atomic_best_encoder)
+			new_encoder = funcs->atomic_best_encoder(connector, conn_state);
+		else
+			new_encoder = funcs->best_encoder(connector);
+
+		if (new_encoder) {
+			if (encoder_mask & (1 << drm_encoder_index(new_encoder))) {
+				DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] on [CONNECTOR:%d:%s] already assigned\n",
+					new_encoder->base.id, new_encoder->name,
+					connector->base.id, connector->name);
+
+				return -EINVAL;
+			}
+
+			encoder_mask |= 1 << drm_encoder_index(new_encoder);
+		}
+	}
+
+	if (!encoder_mask)
+		return 0;
+
+	/*
+	 * Second loop, iterate over all connectors not part of the state.
+	 *
+	 * If a conflicting encoder is found and disable_conflicting_encoders
+	 * is not set, an error is returned. Userspace can provide a solution
+	 * through the atomic ioctl.
+	 *
+	 * If the flag is set conflicting connectors are removed from the crtc
+	 * and the crtc is disabled if no encoder is left. This preserves
+	 * compatibility with the legacy set_config behavior.
+	 */
+	drm_for_each_connector(connector, state->dev) {
+		struct drm_crtc_state *crtc_state;
+
+		if (drm_atomic_get_existing_connector_state(state, connector))
+			continue;
+
+		encoder = connector->state->best_encoder;
+		if (!encoder || !(encoder_mask & (1 << drm_encoder_index(encoder))))
 			continue;
 
-		/* encoder already assigned and we're trying to re-steal it! */
-		if (connector->state->best_encoder != conn_state->best_encoder)
-			return false;
+		if (!disable_conflicting_encoders) {
+			DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] in use on [CRTC:%d:%s] by [CONNECTOR:%d:%s]\n",
+					 encoder->base.id, encoder->name,
+					 connector->state->crtc->base.id,
+					 connector->state->crtc->name,
+					 connector->base.id, connector->name);
+			return -EINVAL;
+		}
+
+		conn_state = drm_atomic_get_connector_state(state, connector);
+		if (IS_ERR(conn_state))
+			return PTR_ERR(conn_state);
+
+		DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] in use on [CRTC:%d:%s], disabling [CONNECTOR:%d:%s]\n",
+				 encoder->base.id, encoder->name,
+				 conn_state->crtc->base.id, conn_state->crtc->name,
+				 connector->base.id, connector->name);
+
+		crtc_state = drm_atomic_get_existing_crtc_state(state, conn_state->crtc);
+
+		ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
+		if (ret)
+			return ret;
+
+		if (!crtc_state->connector_mask) {
+			ret = drm_atomic_set_mode_prop_for_crtc(crtc_state,
+								NULL);
+			if (ret < 0)
+				return ret;
+
+			crtc_state->active = false;
+		}
 	}
 
-	return true;
+	return 0;
 }
 
-static struct drm_crtc *
-get_current_crtc_for_encoder(struct drm_device *dev,
-			     struct drm_encoder *encoder)
+static void
+set_best_encoder(struct drm_atomic_state *state,
+		 struct drm_connector_state *conn_state,
+		 struct drm_encoder *encoder)
 {
-	struct drm_mode_config *config = &dev->mode_config;
-	struct drm_connector *connector;
+	struct drm_crtc_state *crtc_state;
+	struct drm_crtc *crtc;
 
-	WARN_ON(!drm_modeset_is_locked(&config->connection_mutex));
+	if (conn_state->best_encoder) {
+		/* Unset the encoder_mask in the old crtc state. */
+		crtc = conn_state->connector->state->crtc;
 
-	drm_for_each_connector(connector, dev) {
-		if (connector->state->best_encoder != encoder)
-			continue;
+		/* A NULL crtc is an error here because we should have
+		 *  duplicated a NULL best_encoder when crtc was NULL.
+		 * As an exception restoring duplicated atomic state
+		 * during resume is allowed, so don't warn when
+		 * best_encoder is equal to encoder we intend to set.
+		 */
+		WARN_ON(!crtc && encoder != conn_state->best_encoder);
+		if (crtc) {
+			crtc_state = drm_atomic_get_existing_crtc_state(state, crtc);
+
+			crtc_state->encoder_mask &=
+				~(1 << drm_encoder_index(conn_state->best_encoder));
+		}
+	}
 
-		return connector->state->crtc;
+	if (encoder) {
+		crtc = conn_state->crtc;
+		WARN_ON(!crtc);
+		if (crtc) {
+			crtc_state = drm_atomic_get_existing_crtc_state(state, crtc);
+
+			crtc_state->encoder_mask |=
+				1 << drm_encoder_index(encoder);
+		}
 	}
 
-	return NULL;
+	conn_state->best_encoder = encoder;
 }
 
-static int
+static void
 steal_encoder(struct drm_atomic_state *state,
-	      struct drm_encoder *encoder,
-	      struct drm_crtc *encoder_crtc)
+	      struct drm_encoder *encoder)
 {
-	struct drm_mode_config *config = &state->dev->mode_config;
 	struct drm_crtc_state *crtc_state;
 	struct drm_connector *connector;
 	struct drm_connector_state *connector_state;
-	int ret;
-
-	/*
-	 * We can only steal an encoder coming from a connector, which means we
-	 * must already hold the connection_mutex.
-	 */
-	WARN_ON(!drm_modeset_is_locked(&config->connection_mutex));
+	int i;
 
-	DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] in use on [CRTC:%d:%s], stealing it\n",
-			 encoder->base.id, encoder->name,
-			 encoder_crtc->base.id, encoder_crtc->name);
+	for_each_connector_in_state(state, connector, connector_state, i) {
+		struct drm_crtc *encoder_crtc;
 
-	crtc_state = drm_atomic_get_crtc_state(state, encoder_crtc);
-	if (IS_ERR(crtc_state))
-		return PTR_ERR(crtc_state);
+		if (connector_state->best_encoder != encoder)
+			continue;
 
-	crtc_state->connectors_changed = true;
+		encoder_crtc = connector->state->crtc;
 
-	list_for_each_entry(connector, &config->connector_list, head) {
-		if (connector->state->best_encoder != encoder)
-			continue;
+		DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] in use on [CRTC:%d:%s], stealing it\n",
+				 encoder->base.id, encoder->name,
+				 encoder_crtc->base.id, encoder_crtc->name);
 
-		DRM_DEBUG_ATOMIC("Stealing encoder from [CONNECTOR:%d:%s]\n",
-				 connector->base.id,
-				 connector->name);
+		set_best_encoder(state, connector_state, NULL);
 
-		connector_state = drm_atomic_get_connector_state(state,
-								 connector);
-		if (IS_ERR(connector_state))
-			return PTR_ERR(connector_state);
+		crtc_state = drm_atomic_get_existing_crtc_state(state, encoder_crtc);
+		crtc_state->connectors_changed = true;
 
-		ret = drm_atomic_set_crtc_for_connector(connector_state, NULL);
-		if (ret)
-			return ret;
-		connector_state->best_encoder = NULL;
+		return;
 	}
-
-	return 0;
 }
 
 static int
-update_connector_routing(struct drm_atomic_state *state, int conn_idx)
+update_connector_routing(struct drm_atomic_state *state,
+			 struct drm_connector *connector,
+			 struct drm_connector_state *connector_state)
 {
 	const struct drm_connector_helper_funcs *funcs;
 	struct drm_encoder *new_encoder;
-	struct drm_crtc *encoder_crtc;
-	struct drm_connector *connector;
-	struct drm_connector_state *connector_state;
 	struct drm_crtc_state *crtc_state;
-	int idx, ret;
-
-	connector = state->connectors[conn_idx];
-	connector_state = state->connector_states[conn_idx];
-
-	if (!connector)
-		return 0;
 
 	DRM_DEBUG_ATOMIC("Updating routing for [CONNECTOR:%d:%s]\n",
 			 connector->base.id,
@@ -197,16 +273,12 @@ update_connector_routing(struct drm_atomic_state *state, int conn_idx)
 
 	if (connector->state->crtc != connector_state->crtc) {
 		if (connector->state->crtc) {
-			idx = drm_crtc_index(connector->state->crtc);
-
-			crtc_state = state->crtc_states[idx];
+			crtc_state = drm_atomic_get_existing_crtc_state(state, connector->state->crtc);
 			crtc_state->connectors_changed = true;
 		}
 
 		if (connector_state->crtc) {
-			idx = drm_crtc_index(connector_state->crtc);
-
-			crtc_state = state->crtc_states[idx];
+			crtc_state = drm_atomic_get_existing_crtc_state(state, connector_state->crtc);
 			crtc_state->connectors_changed = true;
 		}
 	}
@@ -216,7 +288,7 @@ update_connector_routing(struct drm_atomic_state *state, int conn_idx)
 				connector->base.id,
 				connector->name);
 
-		connector_state->best_encoder = NULL;
+		set_best_encoder(state, connector_state, NULL);
 
 		return 0;
 	}
@@ -245,6 +317,8 @@ update_connector_routing(struct drm_atomic_state *state, int conn_idx)
 	}
 
 	if (new_encoder == connector_state->best_encoder) {
+		set_best_encoder(state, connector_state, new_encoder);
+
 		DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] keeps [ENCODER:%d:%s], now on [CRTC:%d:%s]\n",
 				 connector->base.id,
 				 connector->name,
@@ -256,33 +330,11 @@ update_connector_routing(struct drm_atomic_state *state, int conn_idx)
 		return 0;
 	}
 
-	if (!check_pending_encoder_assignment(state, new_encoder)) {
-		DRM_DEBUG_ATOMIC("Encoder for [CONNECTOR:%d:%s] already assigned\n",
-				 connector->base.id,
-				 connector->name);
-		return -EINVAL;
-	}
-
-	encoder_crtc = get_current_crtc_for_encoder(state->dev,
-						    new_encoder);
+	steal_encoder(state, new_encoder);
 
-	if (encoder_crtc) {
-		ret = steal_encoder(state, new_encoder, encoder_crtc);
-		if (ret) {
-			DRM_DEBUG_ATOMIC("Encoder stealing failed for [CONNECTOR:%d:%s]\n",
-					 connector->base.id,
-					 connector->name);
-			return ret;
-		}
-	}
-
-	if (WARN_ON(!connector_state->crtc))
-		return -EINVAL;
+	set_best_encoder(state, connector_state, new_encoder);
 
-	connector_state->best_encoder = new_encoder;
-	idx = drm_crtc_index(connector_state->crtc);
-
-	crtc_state = state->crtc_states[idx];
+	crtc_state = drm_atomic_get_existing_crtc_state(state, connector_state->crtc);
 	crtc_state->connectors_changed = true;
 
 	DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n",
@@ -323,8 +375,8 @@ mode_fixup(struct drm_atomic_state *state)
 		if (!conn_state->crtc || !conn_state->best_encoder)
 			continue;
 
-		crtc_state =
-			state->crtc_states[drm_crtc_index(conn_state->crtc)];
+		crtc_state = drm_atomic_get_existing_crtc_state(state,
+								conn_state->crtc);
 
 		/*
 		 * Each encoder has at most one connector (since we always steal
@@ -445,13 +497,18 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 		}
 	}
 
+	ret = handle_conflicting_encoders(state, state->legacy_set_config);
+	if (ret)
+		return ret;
+
 	for_each_connector_in_state(state, connector, connector_state, i) {
 		/*
 		 * This only sets crtc->mode_changed for routing changes,
 		 * drivers must set crtc->mode_changed themselves when connector
 		 * properties need to be updated.
 		 */
-		ret = update_connector_routing(state, i);
+		ret = update_connector_routing(state, connector,
+					       connector_state);
 		if (ret)
 			return ret;
 	}
@@ -617,14 +674,14 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 	for_each_connector_in_state(old_state, connector, old_conn_state, i) {
 		const struct drm_encoder_helper_funcs *funcs;
 		struct drm_encoder *encoder;
-		struct drm_crtc_state *old_crtc_state;
 
 		/* Shut down everything that's in the changeset and currently
 		 * still on. So need to check the old, saved state. */
 		if (!old_conn_state->crtc)
 			continue;
 
-		old_crtc_state = old_state->crtc_states[drm_crtc_index(old_conn_state->crtc)];
+		old_crtc_state = drm_atomic_get_existing_crtc_state(old_state,
+								    old_conn_state->crtc);
 
 		if (!old_crtc_state->active ||
 		    !drm_atomic_crtc_needs_modeset(old_conn_state->crtc->state))
@@ -1719,28 +1776,18 @@ static int update_output_state(struct drm_atomic_state *state,
 	struct drm_crtc_state *crtc_state;
 	struct drm_connector *connector;
 	struct drm_connector_state *conn_state;
-	int ret, i, j;
+	int ret, i;
 
 	ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
 			       state->acquire_ctx);
 	if (ret)
 		return ret;
 
-	/* First grab all affected connector/crtc states. */
-	for (i = 0; i < set->num_connectors; i++) {
-		conn_state = drm_atomic_get_connector_state(state,
-							    set->connectors[i]);
-		if (IS_ERR(conn_state))
-			return PTR_ERR(conn_state);
-	}
-
-	for_each_crtc_in_state(state, crtc, crtc_state, i) {
-		ret = drm_atomic_add_affected_connectors(state, crtc);
-		if (ret)
-			return ret;
-	}
+	/* First disable all connectors on the target crtc. */
+	ret = drm_atomic_add_affected_connectors(state, set->crtc);
+	if (ret)
+		return ret;
 
-	/* Then recompute connector->crtc links and crtc enabling state. */
 	for_each_connector_in_state(state, connector, conn_state, i) {
 		if (conn_state->crtc == set->crtc) {
 			ret = drm_atomic_set_crtc_for_connector(conn_state,
@@ -1748,16 +1795,19 @@ static int update_output_state(struct drm_atomic_state *state,
 			if (ret)
 				return ret;
 		}
+	}
 
-		for (j = 0; j < set->num_connectors; j++) {
-			if (set->connectors[j] == connector) {
-				ret = drm_atomic_set_crtc_for_connector(conn_state,
-									set->crtc);
-				if (ret)
-					return ret;
-				break;
-			}
-		}
+	/* Then set all connectors from set->connectors on the target crtc */
+	for (i = 0; i < set->num_connectors; i++) {
+		conn_state = drm_atomic_get_connector_state(state,
+							    set->connectors[i]);
+		if (IS_ERR(conn_state))
+			return PTR_ERR(conn_state);
+
+		ret = drm_atomic_set_crtc_for_connector(conn_state,
+							set->crtc);
+		if (ret)
+			return ret;
 	}
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
@@ -1800,6 +1850,7 @@ int drm_atomic_helper_set_config(struct drm_mode_set *set)
 	if (!state)
 		return -ENOMEM;
 
+	state->legacy_set_config = true;
 	state->acquire_ctx = drm_modeset_legacy_acquire_ctx(crtc);
 retry:
 	ret = __drm_atomic_helper_set_config(set, state);
@@ -2446,8 +2497,12 @@ EXPORT_SYMBOL(drm_atomic_helper_connector_dpms);
  */
 void drm_atomic_helper_crtc_reset(struct drm_crtc *crtc)
 {
-	if (crtc->state)
+	if (crtc->state) {
 		drm_property_unreference_blob(crtc->state->mode_blob);
+		drm_property_unreference_blob(crtc->state->degamma_lut);
+		drm_property_unreference_blob(crtc->state->ctm);
+		drm_property_unreference_blob(crtc->state->gamma_lut);
+	}
 	kfree(crtc->state);
 	crtc->state = kzalloc(sizeof(*crtc->state), GFP_KERNEL);
 
@@ -2471,10 +2526,17 @@ void __drm_atomic_helper_crtc_duplicate_state(struct drm_crtc *crtc,
 
 	if (state->mode_blob)
 		drm_property_reference_blob(state->mode_blob);
+	if (state->degamma_lut)
+		drm_property_reference_blob(state->degamma_lut);
+	if (state->ctm)
+		drm_property_reference_blob(state->ctm);
+	if (state->gamma_lut)
+		drm_property_reference_blob(state->gamma_lut);
 	state->mode_changed = false;
 	state->active_changed = false;
 	state->planes_changed = false;
 	state->connectors_changed = false;
+	state->color_mgmt_changed = false;
 	state->event = NULL;
 }
 EXPORT_SYMBOL(__drm_atomic_helper_crtc_duplicate_state);
@@ -2515,6 +2577,9 @@ void __drm_atomic_helper_crtc_destroy_state(struct drm_crtc *crtc,
 					    struct drm_crtc_state *state)
 {
 	drm_property_unreference_blob(state->mode_blob);
+	drm_property_unreference_blob(state->degamma_lut);
+	drm_property_unreference_blob(state->ctm);
+	drm_property_unreference_blob(state->gamma_lut);
 }
 EXPORT_SYMBOL(__drm_atomic_helper_crtc_destroy_state);
 
@@ -2549,8 +2614,10 @@ void drm_atomic_helper_plane_reset(struct drm_plane *plane)
 	kfree(plane->state);
 	plane->state = kzalloc(sizeof(*plane->state), GFP_KERNEL);
 
-	if (plane->state)
+	if (plane->state) {
 		plane->state->plane = plane;
+		plane->state->rotation = BIT(DRM_ROTATE_0);
+	}
 }
 EXPORT_SYMBOL(drm_atomic_helper_plane_reset);
 
@@ -2826,3 +2893,98 @@ void drm_atomic_helper_connector_destroy_state(struct drm_connector *connector,
 	kfree(state);
 }
 EXPORT_SYMBOL(drm_atomic_helper_connector_destroy_state);
+
+/**
+ * drm_atomic_helper_legacy_gamma_set - set the legacy gamma correction table
+ * @crtc: CRTC object
+ * @red: red correction table
+ * @green: green correction table
+ * @blue: green correction table
+ * @start:
+ * @size: size of the tables
+ *
+ * Implements support for legacy gamma correction table for drivers
+ * that support color management through the DEGAMMA_LUT/GAMMA_LUT
+ * properties.
+ */
+void drm_atomic_helper_legacy_gamma_set(struct drm_crtc *crtc,
+					u16 *red, u16 *green, u16 *blue,
+					uint32_t start, uint32_t size)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_atomic_state *state;
+	struct drm_crtc_state *crtc_state;
+	struct drm_property_blob *blob = NULL;
+	struct drm_color_lut *blob_data;
+	int i, ret = 0;
+
+	state = drm_atomic_state_alloc(crtc->dev);
+	if (!state)
+		return;
+
+	blob = drm_property_create_blob(dev,
+					sizeof(struct drm_color_lut) * size,
+					NULL);
+	if (IS_ERR(blob)) {
+		ret = PTR_ERR(blob);
+		blob = NULL;
+		goto fail;
+	}
+
+	/* Prepare GAMMA_LUT with the legacy values. */
+	blob_data = (struct drm_color_lut *) blob->data;
+	for (i = 0; i < size; i++) {
+		blob_data[i].red = red[i];
+		blob_data[i].green = green[i];
+		blob_data[i].blue = blue[i];
+	}
+
+	state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
+retry:
+	crtc_state = drm_atomic_get_crtc_state(state, crtc);
+	if (IS_ERR(crtc_state)) {
+		ret = PTR_ERR(crtc_state);
+		goto fail;
+	}
+
+	/* Reset DEGAMMA_LUT and CTM properties. */
+	ret = drm_atomic_crtc_set_property(crtc, crtc_state,
+			config->degamma_lut_property, 0);
+	if (ret)
+		goto fail;
+
+	ret = drm_atomic_crtc_set_property(crtc, crtc_state,
+			config->ctm_property, 0);
+	if (ret)
+		goto fail;
+
+	ret = drm_atomic_crtc_set_property(crtc, crtc_state,
+			config->gamma_lut_property, blob->base.id);
+	if (ret)
+		goto fail;
+
+	ret = drm_atomic_commit(state);
+	if (ret)
+		goto fail;
+
+	/* Driver takes ownership of state on successful commit. */
+
+	drm_property_unreference_blob(blob);
+
+	return;
+fail:
+	if (ret == -EDEADLK)
+		goto backoff;
+
+	drm_atomic_state_free(state);
+	drm_property_unreference_blob(blob);
+
+	return;
+backoff:
+	drm_atomic_state_clear(state);
+	drm_atomic_legacy_backoff(state);
+
+	goto retry;
+}
+EXPORT_SYMBOL(drm_atomic_helper_legacy_gamma_set);
diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
index bd93453afa61..b3654404abd0 100644
--- a/drivers/gpu/drm/drm_bridge.c
+++ b/drivers/gpu/drm/drm_bridge.c
@@ -186,7 +186,8 @@ void drm_bridge_disable(struct drm_bridge *bridge)
 
 	drm_bridge_disable(bridge->next);
 
-	bridge->funcs->disable(bridge);
+	if (bridge->funcs->disable)
+		bridge->funcs->disable(bridge);
 }
 EXPORT_SYMBOL(drm_bridge_disable);
 
@@ -206,7 +207,8 @@ void drm_bridge_post_disable(struct drm_bridge *bridge)
 	if (!bridge)
 		return;
 
-	bridge->funcs->post_disable(bridge);
+	if (bridge->funcs->post_disable)
+		bridge->funcs->post_disable(bridge);
 
 	drm_bridge_post_disable(bridge->next);
 }
@@ -256,7 +258,8 @@ void drm_bridge_pre_enable(struct drm_bridge *bridge)
 
 	drm_bridge_pre_enable(bridge->next);
 
-	bridge->funcs->pre_enable(bridge);
+	if (bridge->funcs->pre_enable)
+		bridge->funcs->pre_enable(bridge);
 }
 EXPORT_SYMBOL(drm_bridge_pre_enable);
 
@@ -276,7 +279,8 @@ void drm_bridge_enable(struct drm_bridge *bridge)
 	if (!bridge)
 		return;
 
-	bridge->funcs->enable(bridge);
+	if (bridge->funcs->enable)
+		bridge->funcs->enable(bridge);
 
 	drm_bridge_enable(bridge->next);
 }
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index f6191215b2cb..e08f962288d9 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -430,9 +430,7 @@ EXPORT_SYMBOL(drm_framebuffer_init);
 static void __drm_framebuffer_unregister(struct drm_device *dev,
 					 struct drm_framebuffer *fb)
 {
-	mutex_lock(&dev->mode_config.idr_mutex);
-	idr_remove(&dev->mode_config.crtc_idr, fb->base.id);
-	mutex_unlock(&dev->mode_config.idr_mutex);
+	drm_mode_object_put(dev, &fb->base);
 
 	fb->base.id = 0;
 }
@@ -1150,6 +1148,29 @@ out_unlock:
 EXPORT_SYMBOL(drm_encoder_init);
 
 /**
+ * drm_encoder_index - find the index of a registered encoder
+ * @encoder: encoder to find index for
+ *
+ * Given a registered encoder, return the index of that encoder within a DRM
+ * device's list of encoders.
+ */
+unsigned int drm_encoder_index(struct drm_encoder *encoder)
+{
+	unsigned int index = 0;
+	struct drm_encoder *tmp;
+
+	drm_for_each_encoder(tmp, encoder->dev) {
+		if (tmp == encoder)
+			return index;
+
+		index++;
+	}
+
+	BUG();
+}
+EXPORT_SYMBOL(drm_encoder_index);
+
+/**
  * drm_encoder_cleanup - cleans up an initialised encoder
  * @encoder: encoder to cleanup
  *
@@ -1531,6 +1552,41 @@ static int drm_mode_create_standard_properties(struct drm_device *dev)
 		return -ENOMEM;
 	dev->mode_config.prop_mode_id = prop;
 
+	prop = drm_property_create(dev,
+			DRM_MODE_PROP_BLOB,
+			"DEGAMMA_LUT", 0);
+	if (!prop)
+		return -ENOMEM;
+	dev->mode_config.degamma_lut_property = prop;
+
+	prop = drm_property_create_range(dev,
+			DRM_MODE_PROP_IMMUTABLE,
+			"DEGAMMA_LUT_SIZE", 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	dev->mode_config.degamma_lut_size_property = prop;
+
+	prop = drm_property_create(dev,
+			DRM_MODE_PROP_BLOB,
+			"CTM", 0);
+	if (!prop)
+		return -ENOMEM;
+	dev->mode_config.ctm_property = prop;
+
+	prop = drm_property_create(dev,
+			DRM_MODE_PROP_BLOB,
+			"GAMMA_LUT", 0);
+	if (!prop)
+		return -ENOMEM;
+	dev->mode_config.gamma_lut_property = prop;
+
+	prop = drm_property_create_range(dev,
+			DRM_MODE_PROP_IMMUTABLE,
+			"GAMMA_LUT_SIZE", 0, UINT_MAX);
+	if (!prop)
+		return -ENOMEM;
+	dev->mode_config.gamma_lut_size_property = prop;
+
 	return 0;
 }
 
@@ -5254,7 +5310,6 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 	struct drm_crtc *crtc;
 	struct drm_framebuffer *fb = NULL;
 	struct drm_pending_vblank_event *e = NULL;
-	unsigned long flags;
 	int ret = -EINVAL;
 
 	if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS ||
@@ -5305,41 +5360,26 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 	}
 
 	if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) {
-		ret = -ENOMEM;
-		spin_lock_irqsave(&dev->event_lock, flags);
-		if (file_priv->event_space < sizeof(e->event)) {
-			spin_unlock_irqrestore(&dev->event_lock, flags);
-			goto out;
-		}
-		file_priv->event_space -= sizeof(e->event);
-		spin_unlock_irqrestore(&dev->event_lock, flags);
-
-		e = kzalloc(sizeof(*e), GFP_KERNEL);
-		if (e == NULL) {
-			spin_lock_irqsave(&dev->event_lock, flags);
-			file_priv->event_space += sizeof(e->event);
-			spin_unlock_irqrestore(&dev->event_lock, flags);
+		e = kzalloc(sizeof *e, GFP_KERNEL);
+		if (!e) {
+			ret = -ENOMEM;
 			goto out;
 		}
-
 		e->event.base.type = DRM_EVENT_FLIP_COMPLETE;
 		e->event.base.length = sizeof(e->event);
 		e->event.user_data = page_flip->user_data;
-		e->base.event = &e->event.base;
-		e->base.file_priv = file_priv;
-		e->base.destroy =
-			(void (*) (struct drm_pending_event *)) kfree;
+		ret = drm_event_reserve_init(dev, file_priv, &e->base, &e->event.base);
+		if (ret) {
+			kfree(e);
+			goto out;
+		}
 	}
 
 	crtc->primary->old_fb = crtc->primary->fb;
 	ret = crtc->funcs->page_flip(crtc, fb, e, page_flip->flags);
 	if (ret) {
-		if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) {
-			spin_lock_irqsave(&dev->event_lock, flags);
-			file_priv->event_space += sizeof(e->event);
-			spin_unlock_irqrestore(&dev->event_lock, flags);
-			kfree(e);
-		}
+		if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT)
+			drm_event_cancel_free(dev, &e->base);
 		/* Keep the old fb, don't unref it. */
 		crtc->primary->old_fb = NULL;
 	} else {
@@ -5720,6 +5760,48 @@ int drm_format_vert_chroma_subsampling(uint32_t format)
 EXPORT_SYMBOL(drm_format_vert_chroma_subsampling);
 
 /**
+ * drm_format_plane_width - width of the plane given the first plane
+ * @width: width of the first plane
+ * @format: pixel format
+ * @plane: plane index
+ *
+ * Returns:
+ * The width of @plane, given that the width of the first plane is @width.
+ */
+int drm_format_plane_width(int width, uint32_t format, int plane)
+{
+	if (plane >= drm_format_num_planes(format))
+		return 0;
+
+	if (plane == 0)
+		return width;
+
+	return width / drm_format_horz_chroma_subsampling(format);
+}
+EXPORT_SYMBOL(drm_format_plane_width);
+
+/**
+ * drm_format_plane_height - height of the plane given the first plane
+ * @height: height of the first plane
+ * @format: pixel format
+ * @plane: plane index
+ *
+ * Returns:
+ * The height of @plane, given that the height of the first plane is @height.
+ */
+int drm_format_plane_height(int height, uint32_t format, int plane)
+{
+	if (plane >= drm_format_num_planes(format))
+		return 0;
+
+	if (plane == 0)
+		return height;
+
+	return height / drm_format_vert_chroma_subsampling(format);
+}
+EXPORT_SYMBOL(drm_format_plane_height);
+
+/**
  * drm_rotation_simplify() - Try to simplify the rotation
  * @rotation: Rotation to be simplified
  * @supported_rotations: Supported rotations
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index a02a7f9a6a9d..79555d2b1b87 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -73,9 +73,6 @@
  * &drm_crtc_helper_funcs, struct &drm_encoder_helper_funcs and struct
  * &drm_connector_helper_funcs.
  */
-MODULE_AUTHOR("David Airlie, Jesse Barnes");
-MODULE_DESCRIPTION("DRM KMS helper");
-MODULE_LICENSE("GPL and additional rights");
 
 /**
  * drm_helper_move_panel_connectors_to_head() - move panels to the front in the
@@ -220,6 +217,15 @@ static void __drm_helper_disable_unused_functions(struct drm_device *dev)
  * disconnected connectors. Then it will disable all unused encoders and CRTCs
  * either by calling their disable callback if available or by calling their
  * dpms callback with DRM_MODE_DPMS_OFF.
+ *
+ * NOTE:
+ *
+ * This function is part of the legacy modeset helper library and will cause
+ * major confusion with atomic drivers. This is because atomic helpers guarantee
+ * to never call ->disable() hooks on a disabled function, or ->enable() hooks
+ * on an enabled functions. drm_helper_disable_unused_functions() on the other
+ * hand throws such guarantees into the wind and calls disable hooks
+ * unconditionally on unused functions.
  */
 void drm_helper_disable_unused_functions(struct drm_device *dev)
 {
@@ -328,16 +334,21 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 		}
 
 		encoder_funcs = encoder->helper_private;
-		if (!(ret = encoder_funcs->mode_fixup(encoder, mode,
-						      adjusted_mode))) {
-			DRM_DEBUG_KMS("Encoder fixup failed\n");
-			goto done;
+		if (encoder_funcs->mode_fixup) {
+			if (!(ret = encoder_funcs->mode_fixup(encoder, mode,
+							      adjusted_mode))) {
+				DRM_DEBUG_KMS("Encoder fixup failed\n");
+				goto done;
+			}
 		}
 	}
 
-	if (!(ret = crtc_funcs->mode_fixup(crtc, mode, adjusted_mode))) {
-		DRM_DEBUG_KMS("CRTC fixup failed\n");
-		goto done;
+	if (crtc_funcs->mode_fixup) {
+		if (!(ret = crtc_funcs->mode_fixup(crtc, mode,
+						adjusted_mode))) {
+			DRM_DEBUG_KMS("CRTC fixup failed\n");
+			goto done;
+		}
 	}
 	DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name);
 
@@ -578,8 +589,6 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		if (set->crtc->primary->fb == NULL) {
 			DRM_DEBUG_KMS("crtc has no fb, full mode set\n");
 			mode_changed = true;
-		} else if (set->fb == NULL) {
-			mode_changed = true;
 		} else if (set->fb->pixel_format !=
 			   set->crtc->primary->fb->pixel_format) {
 			mode_changed = true;
@@ -590,7 +599,7 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 	if (set->x != set->crtc->x || set->y != set->crtc->y)
 		fb_changed = true;
 
-	if (set->mode && !drm_mode_equal(set->mode, &set->crtc->mode)) {
+	if (!drm_mode_equal(set->mode, &set->crtc->mode)) {
 		DRM_DEBUG_KMS("modes are different, full mode set\n");
 		drm_mode_debug_printmodeline(&set->crtc->mode);
 		drm_mode_debug_printmodeline(set->mode);
@@ -1066,3 +1075,36 @@ int drm_helper_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 	return drm_plane_helper_commit(plane, plane_state, old_fb);
 }
 EXPORT_SYMBOL(drm_helper_crtc_mode_set_base);
+
+/**
+ * drm_helper_crtc_enable_color_mgmt - enable color management properties
+ * @crtc: DRM CRTC
+ * @degamma_lut_size: the size of the degamma lut (before CSC)
+ * @gamma_lut_size: the size of the gamma lut (after CSC)
+ *
+ * This function lets the driver enable the color correction properties on a
+ * CRTC. This includes 3 degamma, csc and gamma properties that userspace can
+ * set and 2 size properties to inform the userspace of the lut sizes.
+ */
+void drm_helper_crtc_enable_color_mgmt(struct drm_crtc *crtc,
+				       int degamma_lut_size,
+				       int gamma_lut_size)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_mode_config *config = &dev->mode_config;
+
+	drm_object_attach_property(&crtc->base,
+				   config->degamma_lut_property, 0);
+	drm_object_attach_property(&crtc->base,
+				   config->ctm_property, 0);
+	drm_object_attach_property(&crtc->base,
+				   config->gamma_lut_property, 0);
+
+	drm_object_attach_property(&crtc->base,
+				   config->degamma_lut_size_property,
+				   degamma_lut_size);
+	drm_object_attach_property(&crtc->base,
+				   config->gamma_lut_size_property,
+				   gamma_lut_size);
+}
+EXPORT_SYMBOL(drm_helper_crtc_enable_color_mgmt);
diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
new file mode 100644
index 000000000000..f73b38b33a8e
--- /dev/null
+++ b/drivers/gpu/drm/drm_dp_aux_dev.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Rafael Antognolli <rafael.antognolli@intel.com>
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_crtc.h>
+#include <drm/drmP.h>
+
+struct drm_dp_aux_dev {
+	unsigned index;
+	struct drm_dp_aux *aux;
+	struct device *dev;
+	struct kref refcount;
+	atomic_t usecount;
+};
+
+#define DRM_AUX_MINORS	256
+#define AUX_MAX_OFFSET	(1 << 20)
+static DEFINE_IDR(aux_idr);
+static DEFINE_MUTEX(aux_idr_mutex);
+static struct class *drm_dp_aux_dev_class;
+static int drm_dev_major = -1;
+
+static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_minor(unsigned index)
+{
+	struct drm_dp_aux_dev *aux_dev = NULL;
+
+	mutex_lock(&aux_idr_mutex);
+	aux_dev = idr_find(&aux_idr, index);
+	if (!kref_get_unless_zero(&aux_dev->refcount))
+		aux_dev = NULL;
+	mutex_unlock(&aux_idr_mutex);
+
+	return aux_dev;
+}
+
+static struct drm_dp_aux_dev *alloc_drm_dp_aux_dev(struct drm_dp_aux *aux)
+{
+	struct drm_dp_aux_dev *aux_dev;
+	int index;
+
+	aux_dev = kzalloc(sizeof(*aux_dev), GFP_KERNEL);
+	if (!aux_dev)
+		return ERR_PTR(-ENOMEM);
+	aux_dev->aux = aux;
+	atomic_set(&aux_dev->usecount, 1);
+	kref_init(&aux_dev->refcount);
+
+	mutex_lock(&aux_idr_mutex);
+	index = idr_alloc_cyclic(&aux_idr, aux_dev, 0, DRM_AUX_MINORS,
+				 GFP_KERNEL);
+	mutex_unlock(&aux_idr_mutex);
+	if (index < 0) {
+		kfree(aux_dev);
+		return ERR_PTR(index);
+	}
+	aux_dev->index = index;
+
+	return aux_dev;
+}
+
+static void release_drm_dp_aux_dev(struct kref *ref)
+{
+	struct drm_dp_aux_dev *aux_dev =
+		container_of(ref, struct drm_dp_aux_dev, refcount);
+
+	kfree(aux_dev);
+}
+
+static ssize_t name_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	ssize_t res;
+	struct drm_dp_aux_dev *aux_dev =
+		drm_dp_aux_dev_get_by_minor(MINOR(dev->devt));
+
+	if (!aux_dev)
+		return -ENODEV;
+
+	res = sprintf(buf, "%s\n", aux_dev->aux->name);
+	kref_put(&aux_dev->refcount, release_drm_dp_aux_dev);
+
+	return res;
+}
+static DEVICE_ATTR_RO(name);
+
+static struct attribute *drm_dp_aux_attrs[] = {
+	&dev_attr_name.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(drm_dp_aux);
+
+static int auxdev_open(struct inode *inode, struct file *file)
+{
+	unsigned int minor = iminor(inode);
+	struct drm_dp_aux_dev *aux_dev;
+
+	aux_dev = drm_dp_aux_dev_get_by_minor(minor);
+	if (!aux_dev)
+		return -ENODEV;
+
+	file->private_data = aux_dev;
+	return 0;
+}
+
+static loff_t auxdev_llseek(struct file *file, loff_t offset, int whence)
+{
+	return fixed_size_llseek(file, offset, whence, AUX_MAX_OFFSET);
+}
+
+static ssize_t auxdev_read(struct file *file, char __user *buf, size_t count,
+			   loff_t *offset)
+{
+	size_t bytes_pending, num_bytes_processed = 0;
+	struct drm_dp_aux_dev *aux_dev = file->private_data;
+	ssize_t res = 0;
+
+	if (!atomic_inc_not_zero(&aux_dev->usecount))
+		return -ENODEV;
+
+	bytes_pending = min((loff_t)count, AUX_MAX_OFFSET - (*offset));
+
+	if (!access_ok(VERIFY_WRITE, buf, bytes_pending)) {
+		res = -EFAULT;
+		goto out;
+	}
+
+	while (bytes_pending > 0) {
+		uint8_t localbuf[DP_AUX_MAX_PAYLOAD_BYTES];
+		ssize_t todo = min_t(size_t, bytes_pending, sizeof(localbuf));
+
+		res = drm_dp_dpcd_read(aux_dev->aux, *offset, localbuf, todo);
+		if (res <= 0) {
+			res = num_bytes_processed ? num_bytes_processed : res;
+			goto out;
+		}
+		if (__copy_to_user(buf + num_bytes_processed, localbuf, res)) {
+			res = num_bytes_processed ?
+				num_bytes_processed : -EFAULT;
+			goto out;
+		}
+		bytes_pending -= res;
+		*offset += res;
+		num_bytes_processed += res;
+		res = num_bytes_processed;
+	}
+
+out:
+	atomic_dec(&aux_dev->usecount);
+	wake_up_atomic_t(&aux_dev->usecount);
+	return res;
+}
+
+static ssize_t auxdev_write(struct file *file, const char __user *buf,
+			    size_t count, loff_t *offset)
+{
+	size_t bytes_pending, num_bytes_processed = 0;
+	struct drm_dp_aux_dev *aux_dev = file->private_data;
+	ssize_t res = 0;
+
+	if (!atomic_inc_not_zero(&aux_dev->usecount))
+		return -ENODEV;
+
+	bytes_pending = min((loff_t)count, AUX_MAX_OFFSET - *offset);
+
+	if (!access_ok(VERIFY_READ, buf, bytes_pending)) {
+		res = -EFAULT;
+		goto out;
+	}
+
+	while (bytes_pending > 0) {
+		uint8_t localbuf[DP_AUX_MAX_PAYLOAD_BYTES];
+		ssize_t todo = min_t(size_t, bytes_pending, sizeof(localbuf));
+
+		if (__copy_from_user(localbuf,
+				     buf + num_bytes_processed, todo)) {
+			res = num_bytes_processed ?
+				num_bytes_processed : -EFAULT;
+			goto out;
+		}
+
+		res = drm_dp_dpcd_write(aux_dev->aux, *offset, localbuf, todo);
+		if (res <= 0) {
+			res = num_bytes_processed ? num_bytes_processed : res;
+			goto out;
+		}
+		bytes_pending -= res;
+		*offset += res;
+		num_bytes_processed += res;
+		res = num_bytes_processed;
+	}
+
+out:
+	atomic_dec(&aux_dev->usecount);
+	wake_up_atomic_t(&aux_dev->usecount);
+	return res;
+}
+
+static int auxdev_release(struct inode *inode, struct file *file)
+{
+	struct drm_dp_aux_dev *aux_dev = file->private_data;
+
+	kref_put(&aux_dev->refcount, release_drm_dp_aux_dev);
+	return 0;
+}
+
+static const struct file_operations auxdev_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= auxdev_llseek,
+	.read		= auxdev_read,
+	.write		= auxdev_write,
+	.open		= auxdev_open,
+	.release	= auxdev_release,
+};
+
+#define to_auxdev(d) container_of(d, struct drm_dp_aux_dev, aux)
+
+static struct drm_dp_aux_dev *drm_dp_aux_dev_get_by_aux(struct drm_dp_aux *aux)
+{
+	struct drm_dp_aux_dev *iter, *aux_dev = NULL;
+	int id;
+
+	/* don't increase kref count here because this function should only be
+	 * used by drm_dp_aux_unregister_devnode. Thus, it will always have at
+	 * least one reference - the one that drm_dp_aux_register_devnode
+	 * created
+	 */
+	mutex_lock(&aux_idr_mutex);
+	idr_for_each_entry(&aux_idr, iter, id) {
+		if (iter->aux == aux) {
+			aux_dev = iter;
+			break;
+		}
+	}
+	mutex_unlock(&aux_idr_mutex);
+	return aux_dev;
+}
+
+static int auxdev_wait_atomic_t(atomic_t *p)
+{
+	schedule();
+	return 0;
+}
+/**
+ * drm_dp_aux_unregister_devnode() - unregister a devnode for this aux channel
+ * @aux: DisplayPort AUX channel
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+void drm_dp_aux_unregister_devnode(struct drm_dp_aux *aux)
+{
+	struct drm_dp_aux_dev *aux_dev;
+	unsigned int minor;
+
+	aux_dev = drm_dp_aux_dev_get_by_aux(aux);
+	if (!aux_dev) /* attach must have failed */
+		return;
+
+	mutex_lock(&aux_idr_mutex);
+	idr_remove(&aux_idr, aux_dev->index);
+	mutex_unlock(&aux_idr_mutex);
+
+	atomic_dec(&aux_dev->usecount);
+	wait_on_atomic_t(&aux_dev->usecount, auxdev_wait_atomic_t,
+			 TASK_UNINTERRUPTIBLE);
+
+	minor = aux_dev->index;
+	if (aux_dev->dev)
+		device_destroy(drm_dp_aux_dev_class,
+			       MKDEV(drm_dev_major, minor));
+
+	DRM_DEBUG("drm_dp_aux_dev: aux [%s] unregistering\n", aux->name);
+	kref_put(&aux_dev->refcount, release_drm_dp_aux_dev);
+}
+EXPORT_SYMBOL(drm_dp_aux_unregister_devnode);
+
+/**
+ * drm_dp_aux_register_devnode() - register a devnode for this aux channel
+ * @aux: DisplayPort AUX channel
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_dp_aux_register_devnode(struct drm_dp_aux *aux)
+{
+	struct drm_dp_aux_dev *aux_dev;
+	int res;
+
+	aux_dev = alloc_drm_dp_aux_dev(aux);
+	if (IS_ERR(aux_dev))
+		return PTR_ERR(aux_dev);
+
+	aux_dev->dev = device_create(drm_dp_aux_dev_class, aux->dev,
+				     MKDEV(drm_dev_major, aux_dev->index), NULL,
+				     "drm_dp_aux%d", aux_dev->index);
+	if (IS_ERR(aux_dev->dev)) {
+		res = PTR_ERR(aux_dev->dev);
+		aux_dev->dev = NULL;
+		goto error;
+	}
+
+	DRM_DEBUG("drm_dp_aux_dev: aux [%s] registered as minor %d\n",
+		  aux->name, aux_dev->index);
+	return 0;
+error:
+	drm_dp_aux_unregister_devnode(aux);
+	return res;
+}
+EXPORT_SYMBOL(drm_dp_aux_register_devnode);
+
+int drm_dp_aux_dev_init(void)
+{
+	int res;
+
+	drm_dp_aux_dev_class = class_create(THIS_MODULE, "drm_dp_aux_dev");
+	if (IS_ERR(drm_dp_aux_dev_class)) {
+		res = PTR_ERR(drm_dp_aux_dev_class);
+		goto out;
+	}
+	drm_dp_aux_dev_class->dev_groups = drm_dp_aux_groups;
+
+	res = register_chrdev(0, "aux", &auxdev_fops);
+	if (res < 0)
+		goto out;
+	drm_dev_major = res;
+
+	return 0;
+out:
+	class_destroy(drm_dp_aux_dev_class);
+	return res;
+}
+EXPORT_SYMBOL(drm_dp_aux_dev_init);
+
+void drm_dp_aux_dev_exit(void)
+{
+	unregister_chrdev(drm_dev_major, "aux");
+	class_destroy(drm_dp_aux_dev_class);
+}
+EXPORT_SYMBOL(drm_dp_aux_dev_exit);
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 9535c5b60387..7d58f594cffe 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -28,6 +28,7 @@
 #include <linux/sched.h>
 #include <linux/i2c.h>
 #include <drm/drm_dp_helper.h>
+#include <drm/drm_dp_aux_dev.h>
 #include <drm/drmP.h>
 
 /**
@@ -754,6 +755,8 @@ static const struct i2c_algorithm drm_dp_i2c_algo = {
  */
 int drm_dp_aux_register(struct drm_dp_aux *aux)
 {
+	int ret;
+
 	mutex_init(&aux->hw_mutex);
 
 	aux->ddc.algo = &drm_dp_i2c_algo;
@@ -768,7 +771,17 @@ int drm_dp_aux_register(struct drm_dp_aux *aux)
 	strlcpy(aux->ddc.name, aux->name ? aux->name : dev_name(aux->dev),
 		sizeof(aux->ddc.name));
 
-	return i2c_add_adapter(&aux->ddc);
+	ret = drm_dp_aux_register_devnode(aux);
+	if (ret)
+		return ret;
+
+	ret = i2c_add_adapter(&aux->ddc);
+	if (ret) {
+		drm_dp_aux_unregister_devnode(aux);
+		return ret;
+	}
+
+	return 0;
 }
 EXPORT_SYMBOL(drm_dp_aux_register);
 
@@ -778,6 +791,7 @@ EXPORT_SYMBOL(drm_dp_aux_register);
  */
 void drm_dp_aux_unregister(struct drm_dp_aux *aux)
 {
+	drm_dp_aux_unregister_devnode(aux);
 	i2c_del_adapter(&aux->ddc);
 }
 EXPORT_SYMBOL(drm_dp_aux_unregister);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 04cb4877fabd..414d7f61aa05 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -32,6 +32,7 @@
 #include <linux/hdmi.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
+#include <linux/vga_switcheroo.h>
 #include <drm/drmP.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_displayid.h>
@@ -1395,6 +1396,31 @@ struct edid *drm_get_edid(struct drm_connector *connector,
 EXPORT_SYMBOL(drm_get_edid);
 
 /**
+ * drm_get_edid_switcheroo - get EDID data for a vga_switcheroo output
+ * @connector: connector we're probing
+ * @adapter: I2C adapter to use for DDC
+ *
+ * Wrapper around drm_get_edid() for laptops with dual GPUs using one set of
+ * outputs. The wrapper adds the requisite vga_switcheroo calls to temporarily
+ * switch DDC to the GPU which is retrieving EDID.
+ *
+ * Return: Pointer to valid EDID or %NULL if we couldn't find any.
+ */
+struct edid *drm_get_edid_switcheroo(struct drm_connector *connector,
+				     struct i2c_adapter *adapter)
+{
+	struct pci_dev *pdev = connector->dev->pdev;
+	struct edid *edid;
+
+	vga_switcheroo_lock_ddc(pdev);
+	edid = drm_get_edid(connector, adapter);
+	vga_switcheroo_unlock_ddc(pdev);
+
+	return edid;
+}
+EXPORT_SYMBOL(drm_get_edid_switcheroo);
+
+/**
  * drm_edid_duplicate - duplicate an EDID and the extensions
  * @edid: EDID to duplicate
  *
@@ -3282,7 +3308,7 @@ void drm_edid_to_eld(struct drm_connector *connector, struct edid *edid)
 	u8 *cea;
 	u8 *name;
 	u8 *db;
-	int sad_count = 0;
+	int total_sad_count = 0;
 	int mnl;
 	int dbl;
 
@@ -3296,6 +3322,7 @@ void drm_edid_to_eld(struct drm_connector *connector, struct edid *edid)
 
 	name = NULL;
 	drm_for_each_detailed_block((u8 *)edid, monitor_name, &name);
+	/* max: 13 bytes EDID, 16 bytes ELD */
 	for (mnl = 0; name && mnl < 13; mnl++) {
 		if (name[mnl] == 0x0a)
 			break;
@@ -3324,11 +3351,15 @@ void drm_edid_to_eld(struct drm_connector *connector, struct edid *edid)
 			dbl = cea_db_payload_len(db);
 
 			switch (cea_db_tag(db)) {
+				int sad_count;
+
 			case AUDIO_BLOCK:
 				/* Audio Data Block, contains SADs */
-				sad_count = dbl / 3;
-				if (dbl >= 1)
-					memcpy(eld + 20 + mnl, &db[1], dbl);
+				sad_count = min(dbl / 3, 15 - total_sad_count);
+				if (sad_count >= 1)
+					memcpy(eld + 20 + mnl + total_sad_count * 3,
+					       &db[1], sad_count * 3);
+				total_sad_count += sad_count;
 				break;
 			case SPEAKER_BLOCK:
 				/* Speaker Allocation Data Block */
@@ -3345,13 +3376,13 @@ void drm_edid_to_eld(struct drm_connector *connector, struct edid *edid)
 			}
 		}
 	}
-	eld[5] |= sad_count << 4;
+	eld[5] |= total_sad_count << 4;
 
 	eld[DRM_ELD_BASELINE_ELD_LEN] =
 		DIV_ROUND_UP(drm_eld_calc_baseline_block_size(eld), 4);
 
 	DRM_DEBUG_KMS("ELD size %d, SAD count %d\n",
-		      drm_eld_size(eld), sad_count);
+		      drm_eld_size(eld), total_sad_count);
 }
 EXPORT_SYMBOL(drm_edid_to_eld);
 
diff --git a/drivers/gpu/drm/drm_encoder_slave.c b/drivers/gpu/drm/drm_encoder_slave.c
index e8629076de32..4484785cd9ac 100644
--- a/drivers/gpu/drm/drm_encoder_slave.c
+++ b/drivers/gpu/drm/drm_encoder_slave.c
@@ -140,6 +140,9 @@ bool drm_i2c_encoder_mode_fixup(struct drm_encoder *encoder,
 		const struct drm_display_mode *mode,
 		struct drm_display_mode *adjusted_mode)
 {
+	if (!get_slave_funcs(encoder)->mode_fixup)
+		return true;
+
 	return get_slave_funcs(encoder)->mode_fixup(encoder, mode, adjusted_mode);
 }
 EXPORT_SYMBOL(drm_i2c_encoder_mode_fixup);
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index c895b6fddbd8..bb88e3df9257 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -74,7 +74,8 @@ static struct drm_framebuffer_funcs drm_fb_cma_funcs = {
 };
 
 static struct drm_fb_cma *drm_fb_cma_alloc(struct drm_device *dev,
-	const const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_cma_object **obj,
+	const struct drm_mode_fb_cmd2 *mode_cmd,
+	struct drm_gem_cma_object **obj,
 	unsigned int num_planes)
 {
 	struct drm_fb_cma *fb_cma;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 1e103c4c6ee0..855108e6e1bd 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -104,21 +104,17 @@ int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper)
 {
 	struct drm_device *dev = fb_helper->dev;
 	struct drm_connector *connector;
-	int i;
+	int i, ret;
 
 	if (!drm_fbdev_emulation)
 		return 0;
 
 	mutex_lock(&dev->mode_config.mutex);
 	drm_for_each_connector(connector, dev) {
-		struct drm_fb_helper_connector *fb_helper_connector;
+		ret = drm_fb_helper_add_one_connector(fb_helper, connector);
 
-		fb_helper_connector = kzalloc(sizeof(struct drm_fb_helper_connector), GFP_KERNEL);
-		if (!fb_helper_connector)
+		if (ret)
 			goto fail;
-
-		fb_helper_connector->connector = connector;
-		fb_helper->connector_info[fb_helper->connector_count++] = fb_helper_connector;
 	}
 	mutex_unlock(&dev->mode_config.mutex);
 	return 0;
@@ -130,7 +126,7 @@ fail:
 	fb_helper->connector_count = 0;
 	mutex_unlock(&dev->mode_config.mutex);
 
-	return -ENOMEM;
+	return ret;
 }
 EXPORT_SYMBOL(drm_fb_helper_single_add_all_connectors);
 
@@ -1989,13 +1985,13 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper)
 	width = dev->mode_config.max_width;
 	height = dev->mode_config.max_height;
 
-	crtcs = kcalloc(dev->mode_config.num_connector,
+	crtcs = kcalloc(fb_helper->connector_count,
 			sizeof(struct drm_fb_helper_crtc *), GFP_KERNEL);
-	modes = kcalloc(dev->mode_config.num_connector,
+	modes = kcalloc(fb_helper->connector_count,
 			sizeof(struct drm_display_mode *), GFP_KERNEL);
-	offsets = kcalloc(dev->mode_config.num_connector,
+	offsets = kcalloc(fb_helper->connector_count,
 			  sizeof(struct drm_fb_offset), GFP_KERNEL);
-	enabled = kcalloc(dev->mode_config.num_connector,
+	enabled = kcalloc(fb_helper->connector_count,
 			  sizeof(bool), GFP_KERNEL);
 	if (!crtcs || !modes || !enabled || !offsets) {
 		DRM_ERROR("Memory allocation failed\n");
@@ -2009,9 +2005,9 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper)
 	      fb_helper->funcs->initial_config(fb_helper, crtcs, modes,
 					       offsets,
 					       enabled, width, height))) {
-		memset(modes, 0, dev->mode_config.num_connector*sizeof(modes[0]));
-		memset(crtcs, 0, dev->mode_config.num_connector*sizeof(crtcs[0]));
-		memset(offsets, 0, dev->mode_config.num_connector*sizeof(offsets[0]));
+		memset(modes, 0, fb_helper->connector_count*sizeof(modes[0]));
+		memset(crtcs, 0, fb_helper->connector_count*sizeof(crtcs[0]));
+		memset(offsets, 0, fb_helper->connector_count*sizeof(offsets[0]));
 
 		if (!drm_target_cloned(fb_helper, modes, offsets,
 				       enabled, width, height) &&
@@ -2091,6 +2087,27 @@ out:
  * drm_fb_helper_fill_fix() are provided as helpers to setup simple default
  * values for the fbdev info structure.
  *
+ * HANG DEBUGGING:
+ *
+ * When you have fbcon support built-in or already loaded, this function will do
+ * a full modeset to setup the fbdev console. Due to locking misdesign in the
+ * VT/fbdev subsystem that entire modeset sequence has to be done while holding
+ * console_lock. Until console_unlock is called no dmesg lines will be sent out
+ * to consoles, not even serial console. This means when your driver crashes,
+ * you will see absolutely nothing else but a system stuck in this function,
+ * with no further output. Any kind of printk() you place within your own driver
+ * or in the drm core modeset code will also never show up.
+ *
+ * Standard debug practice is to run the fbcon setup without taking the
+ * console_lock as a hack, to be able to see backtraces and crashes on the
+ * serial line. This can be done by setting the fb.lockless_register_fb=1 kernel
+ * cmdline option.
+ *
+ * The other option is to just disable fbdev emulation since very likely the
+ * first modest from userspace will crash in the same way, and is even easier to
+ * debug. This can be done by setting the drm_kms_helper.fbdev_emulation=0
+ * kernel cmdline option.
+ *
  * RETURNS:
  * Zero if everything went ok, nonzero otherwise.
  */
@@ -2175,9 +2192,9 @@ EXPORT_SYMBOL(drm_fb_helper_hotplug_event);
  * but the module doesn't depend on any fb console symbols.  At least
  * attempt to load fbcon to avoid leaving the system without a usable console.
  */
-#if defined(CONFIG_FRAMEBUFFER_CONSOLE_MODULE) && !defined(CONFIG_EXPERT)
-static int __init drm_fb_helper_modinit(void)
+int __init drm_fb_helper_modinit(void)
 {
+#if defined(CONFIG_FRAMEBUFFER_CONSOLE_MODULE) && !defined(CONFIG_EXPERT)
 	const char *name = "fbcon";
 	struct module *fbcon;
 
@@ -2187,8 +2204,7 @@ static int __init drm_fb_helper_modinit(void)
 
 	if (!fbcon)
 		request_module_nowait(name);
+#endif
 	return 0;
 }
-
-module_init(drm_fb_helper_modinit);
-#endif
+EXPORT_SYMBOL(drm_fb_helper_modinit);
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 1ea8790e5090..aeef58ed359b 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -1,4 +1,4 @@
-/**
+/*
  * \file drm_fops.c
  * File operations for DRM
  *
@@ -44,6 +44,46 @@
 /* from BKL pushdown */
 DEFINE_MUTEX(drm_global_mutex);
 
+/**
+ * DOC: file operations
+ *
+ * Drivers must define the file operations structure that forms the DRM
+ * userspace API entry point, even though most of those operations are
+ * implemented in the DRM core. The mandatory functions are drm_open(),
+ * drm_read(), drm_ioctl() and drm_compat_ioctl if CONFIG_COMPAT is enabled.
+ * Drivers which implement private ioctls that require 32/64 bit compatibility
+ * support must provided their onw .compat_ioctl() handler that processes
+ * private ioctls and calls drm_compat_ioctl() for core ioctls.
+ *
+ * In addition drm_read() and drm_poll() provide support for DRM events. DRM
+ * events are a generic and extensible means to send asynchronous events to
+ * userspace through the file descriptor. They are used to send vblank event and
+ * page flip completions by the KMS API. But drivers can also use it for their
+ * own needs, e.g. to signal completion of rendering.
+ *
+ * The memory mapping implementation will vary depending on how the driver
+ * manages memory. Legacy drivers will use the deprecated drm_legacy_mmap()
+ * function, modern drivers should use one of the provided memory-manager
+ * specific implementations. For GEM-based drivers this is drm_gem_mmap().
+ *
+ * No other file operations are supported by the DRM userspace API. Overall the
+ * following is an example #file_operations structure:
+ *
+ *     static const example_drm_fops = {
+ *             .owner = THIS_MODULE,
+ *             .open = drm_open,
+ *             .release = drm_release,
+ *             .unlocked_ioctl = drm_ioctl,
+ *     #ifdef CONFIG_COMPAT
+ *             .compat_ioctl = drm_compat_ioctl,
+ *     #endif
+ *             .poll = drm_poll,
+ *             .read = drm_read,
+ *             .llseek = no_llseek,
+ *             .mmap = drm_gem_mmap,
+ *     };
+ */
+
 static int drm_open_helper(struct file *filp, struct drm_minor *minor);
 
 static int drm_setup(struct drm_device * dev)
@@ -67,15 +107,17 @@ static int drm_setup(struct drm_device * dev)
 }
 
 /**
- * Open file.
+ * drm_open - open method for DRM file
+ * @inode: device inode
+ * @filp: file pointer.
  *
- * \param inode device inode
- * \param filp file pointer.
- * \return zero on success or a negative number on failure.
+ * This function must be used by drivers as their .open() #file_operations
+ * method. It looks up the correct DRM device and instantiates all the per-file
+ * resources for it.
+ *
+ * RETURNS:
  *
- * Searches the DRM device with the same minor number, calls open_helper(), and
- * increments the device open count. If the open count was previous at zero,
- * i.e., it's the first that the device is open, then calls setup().
+ * 0 on success or negative errno value on falure.
  */
 int drm_open(struct inode *inode, struct file *filp)
 {
@@ -112,7 +154,7 @@ err_undo:
 }
 EXPORT_SYMBOL(drm_open);
 
-/**
+/*
  * Check whether DRI will run on this CPU.
  *
  * \return non-zero if the DRI will run on this CPU, or zero otherwise.
@@ -125,7 +167,7 @@ static int drm_cpu_valid(void)
 	return 1;
 }
 
-/**
+/*
  * drm_new_set_master - Allocate a new master object and become master for the
  * associated master realm.
  *
@@ -179,7 +221,7 @@ out_err:
 	return ret;
 }
 
-/**
+/*
  * Called whenever a process opens /dev/drm.
  *
  * \param filp file pointer.
@@ -222,6 +264,7 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor)
 	INIT_LIST_HEAD(&priv->fbs);
 	mutex_init(&priv->fbs_lock);
 	INIT_LIST_HEAD(&priv->blobs);
+	INIT_LIST_HEAD(&priv->pending_event_list);
 	INIT_LIST_HEAD(&priv->event_list);
 	init_waitqueue_head(&priv->event_wait);
 	priv->event_space = 4096; /* set aside 4k for event buffer */
@@ -311,18 +354,16 @@ static void drm_events_release(struct drm_file *file_priv)
 {
 	struct drm_device *dev = file_priv->minor->dev;
 	struct drm_pending_event *e, *et;
-	struct drm_pending_vblank_event *v, *vt;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 
-	/* Remove pending flips */
-	list_for_each_entry_safe(v, vt, &dev->vblank_event_list, base.link)
-		if (v->base.file_priv == file_priv) {
-			list_del(&v->base.link);
-			drm_vblank_put(dev, v->pipe);
-			v->base.destroy(&v->base);
-		}
+	/* Unlink pending events */
+	list_for_each_entry_safe(e, et, &file_priv->pending_event_list,
+				 pending_link) {
+		list_del(&e->pending_link);
+		e->file_priv = NULL;
+	}
 
 	/* Remove unconsumed events */
 	list_for_each_entry_safe(e, et, &file_priv->event_list, link) {
@@ -333,7 +374,7 @@ static void drm_events_release(struct drm_file *file_priv)
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
-/**
+/*
  * drm_legacy_dev_reinit
  *
  * Reinitializes a legacy/ums drm device in it's lastclose function.
@@ -350,7 +391,7 @@ static void drm_legacy_dev_reinit(struct drm_device *dev)
 	dev->if_version = 0;
 }
 
-/**
+/*
  * Take down the DRM device.
  *
  * \param dev DRM device structure.
@@ -387,16 +428,17 @@ int drm_lastclose(struct drm_device * dev)
 }
 
 /**
- * Release file.
+ * drm_release - release method for DRM file
+ * @inode: device inode
+ * @filp: file pointer.
  *
- * \param inode device inode
- * \param file_priv DRM file private.
- * \return zero on success or a negative number on failure.
+ * This function must be used by drivers as their .release() #file_operations
+ * method. It frees any resources associated with the open file, and if this is
+ * the last open file for the DRM device also proceeds to call drm_lastclose().
  *
- * If the hardware lock is held then free it, and take it again for the kernel
- * context since it's necessary to reclaim buffers. Unlink the file private
- * data from its list and free it. Decreases the open count and if it reaches
- * zero calls drm_lastclose().
+ * RETURNS:
+ *
+ * Always succeeds and returns 0.
  */
 int drm_release(struct inode *inode, struct file *filp)
 {
@@ -451,7 +493,7 @@ int drm_release(struct inode *inode, struct file *filp)
 	if (file_priv->is_master) {
 		struct drm_master *master = file_priv->master;
 
-		/**
+		/*
 		 * Since the master is disappearing, so is the
 		 * possibility to lock.
 		 */
@@ -508,6 +550,32 @@ int drm_release(struct inode *inode, struct file *filp)
 }
 EXPORT_SYMBOL(drm_release);
 
+/**
+ * drm_read - read method for DRM file
+ * @filp: file pointer
+ * @buffer: userspace destination pointer for the read
+ * @count: count in bytes to read
+ * @offset: offset to read
+ *
+ * This function must be used by drivers as their .read() #file_operations
+ * method iff they use DRM events for asynchronous signalling to userspace.
+ * Since events are used by the KMS API for vblank and page flip completion this
+ * means all modern display drivers must use it.
+ *
+ * @offset is ignore, DRM events are read like a pipe. Therefore drivers also
+ * must set the .llseek() #file_operation to no_llseek(). Polling support is
+ * provided by drm_poll().
+ *
+ * This function will only ever read a full event. Therefore userspace must
+ * supply a big enough buffer to fit any event to ensure forward progress. Since
+ * the maximum event space is currently 4K it's recommended to just use that for
+ * safety.
+ *
+ * RETURNS:
+ *
+ * Number of bytes read (always aligned to full events, and can be 0) or a
+ * negative error code on failure.
+ */
 ssize_t drm_read(struct file *filp, char __user *buffer,
 		 size_t count, loff_t *offset)
 {
@@ -578,6 +646,22 @@ put_back_event:
 }
 EXPORT_SYMBOL(drm_read);
 
+/**
+ * drm_poll - poll method for DRM file
+ * @filp: file pointer
+ * @wait: poll waiter table
+ *
+ * This function must be used by drivers as their .read() #file_operations
+ * method iff they use DRM events for asynchronous signalling to userspace.
+ * Since events are used by the KMS API for vblank and page flip completion this
+ * means all modern display drivers must use it.
+ *
+ * See also drm_read().
+ *
+ * RETURNS:
+ *
+ * Mask of POLL flags indicating the current status of the file.
+ */
 unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait)
 {
 	struct drm_file *file_priv = filp->private_data;
@@ -591,3 +675,164 @@ unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait)
 	return mask;
 }
 EXPORT_SYMBOL(drm_poll);
+
+/**
+ * drm_event_reserve_init_locked - init a DRM event and reserve space for it
+ * @dev: DRM device
+ * @file_priv: DRM file private data
+ * @p: tracking structure for the pending event
+ * @e: actual event data to deliver to userspace
+ *
+ * This function prepares the passed in event for eventual delivery. If the event
+ * doesn't get delivered (because the IOCTL fails later on, before queuing up
+ * anything) then the even must be cancelled and freed using
+ * drm_event_cancel_free(). Successfully initialized events should be sent out
+ * using drm_send_event() or drm_send_event_locked() to signal completion of the
+ * asynchronous event to userspace.
+ *
+ * If callers embedded @p into a larger structure it must be allocated with
+ * kmalloc and @p must be the first member element.
+ *
+ * This is the locked version of drm_event_reserve_init() for callers which
+ * already hold dev->event_lock.
+ *
+ * RETURNS:
+ *
+ * 0 on success or a negative error code on failure.
+ */
+int drm_event_reserve_init_locked(struct drm_device *dev,
+				  struct drm_file *file_priv,
+				  struct drm_pending_event *p,
+				  struct drm_event *e)
+{
+	if (file_priv->event_space < e->length)
+		return -ENOMEM;
+
+	file_priv->event_space -= e->length;
+
+	p->event = e;
+	list_add(&p->pending_link, &file_priv->pending_event_list);
+	p->file_priv = file_priv;
+
+	/* we *could* pass this in as arg, but everyone uses kfree: */
+	p->destroy = (void (*) (struct drm_pending_event *)) kfree;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_event_reserve_init_locked);
+
+/**
+ * drm_event_reserve_init - init a DRM event and reserve space for it
+ * @dev: DRM device
+ * @file_priv: DRM file private data
+ * @p: tracking structure for the pending event
+ * @e: actual event data to deliver to userspace
+ *
+ * This function prepares the passed in event for eventual delivery. If the event
+ * doesn't get delivered (because the IOCTL fails later on, before queuing up
+ * anything) then the even must be cancelled and freed using
+ * drm_event_cancel_free(). Successfully initialized events should be sent out
+ * using drm_send_event() or drm_send_event_locked() to signal completion of the
+ * asynchronous event to userspace.
+ *
+ * If callers embedded @p into a larger structure it must be allocated with
+ * kmalloc and @p must be the first member element.
+ *
+ * Callers which already hold dev->event_lock should use
+ * drm_event_reserve_init() instead.
+ *
+ * RETURNS:
+ *
+ * 0 on success or a negative error code on failure.
+ */
+int drm_event_reserve_init(struct drm_device *dev,
+			   struct drm_file *file_priv,
+			   struct drm_pending_event *p,
+			   struct drm_event *e)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+	ret = drm_event_reserve_init_locked(dev, file_priv, p, e);
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_event_reserve_init);
+
+/**
+ * drm_event_cancel_free - free a DRM event and release it's space
+ * @dev: DRM device
+ * @p: tracking structure for the pending event
+ *
+ * This function frees the event @p initialized with drm_event_reserve_init()
+ * and releases any allocated space.
+ */
+void drm_event_cancel_free(struct drm_device *dev,
+			   struct drm_pending_event *p)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&dev->event_lock, flags);
+	if (p->file_priv) {
+		p->file_priv->event_space += p->event->length;
+		list_del(&p->pending_link);
+	}
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+	p->destroy(p);
+}
+EXPORT_SYMBOL(drm_event_cancel_free);
+
+/**
+ * drm_send_event_locked - send DRM event to file descriptor
+ * @dev: DRM device
+ * @e: DRM event to deliver
+ *
+ * This function sends the event @e, initialized with drm_event_reserve_init(),
+ * to its associated userspace DRM file. Callers must already hold
+ * dev->event_lock, see drm_send_event() for the unlocked version.
+ *
+ * Note that the core will take care of unlinking and disarming events when the
+ * corresponding DRM file is closed. Drivers need not worry about whether the
+ * DRM file for this event still exists and can call this function upon
+ * completion of the asynchronous work unconditionally.
+ */
+void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e)
+{
+	assert_spin_locked(&dev->event_lock);
+
+	if (!e->file_priv) {
+		e->destroy(e);
+		return;
+	}
+
+	list_del(&e->pending_link);
+	list_add_tail(&e->link,
+		      &e->file_priv->event_list);
+	wake_up_interruptible(&e->file_priv->event_wait);
+}
+EXPORT_SYMBOL(drm_send_event_locked);
+
+/**
+ * drm_send_event - send DRM event to file descriptor
+ * @dev: DRM device
+ * @e: DRM event to deliver
+ *
+ * This function sends the event @e, initialized with drm_event_reserve_init(),
+ * to its associated userspace DRM file. This function acquires dev->event_lock,
+ * see drm_send_event_locked() for callers which already hold this lock.
+ *
+ * Note that the core will take care of unlinking and disarming events when the
+ * corresponding DRM file is closed. Drivers need not worry about whether the
+ * DRM file for this event still exists and can call this function upon
+ * completion of the asynchronous work unconditionally.
+ */
+void drm_send_event(struct drm_device *dev, struct drm_pending_event *e)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&dev->event_lock, irqflags);
+	drm_send_event_locked(dev, e);
+	spin_unlock_irqrestore(&dev->event_lock, irqflags);
+}
+EXPORT_SYMBOL(drm_send_event);
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 1fe14579e8c9..881c5a6c180c 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -1041,15 +1041,12 @@ static void send_vblank_event(struct drm_device *dev,
 		struct drm_pending_vblank_event *e,
 		unsigned long seq, struct timeval *now)
 {
-	assert_spin_locked(&dev->event_lock);
-
 	e->event.sequence = seq;
 	e->event.tv_sec = now->tv_sec;
 	e->event.tv_usec = now->tv_usec;
 
-	list_add_tail(&e->base.link,
-		      &e->base.file_priv->event_list);
-	wake_up_interruptible(&e->base.file_priv->event_wait);
+	drm_send_event_locked(dev, &e->base);
+
 	trace_drm_vblank_event_delivered(e->base.pid, e->pipe,
 					 e->event.sequence);
 }
@@ -1668,9 +1665,6 @@ static int drm_queue_vblank_event(struct drm_device *dev, unsigned int pipe,
 	e->event.base.type = DRM_EVENT_VBLANK;
 	e->event.base.length = sizeof(e->event);
 	e->event.user_data = vblwait->request.signal;
-	e->base.event = &e->event.base;
-	e->base.file_priv = file_priv;
-	e->base.destroy = (void (*) (struct drm_pending_event *)) kfree;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 
@@ -1686,12 +1680,12 @@ static int drm_queue_vblank_event(struct drm_device *dev, unsigned int pipe,
 		goto err_unlock;
 	}
 
-	if (file_priv->event_space < sizeof(e->event)) {
-		ret = -EBUSY;
+	ret = drm_event_reserve_init_locked(dev, file_priv, &e->base,
+					    &e->event.base);
+
+	if (ret)
 		goto err_unlock;
-	}
 
-	file_priv->event_space -= sizeof(e->event);
 	seq = drm_vblank_count_and_time(dev, pipe, &now);
 
 	if ((vblwait->request.type & _DRM_VBLANK_NEXTONMISS) &&
diff --git a/drivers/gpu/drm/drm_kms_helper_common.c b/drivers/gpu/drm/drm_kms_helper_common.c
new file mode 100644
index 000000000000..3187c4bb01cb
--- /dev/null
+++ b/drivers/gpu/drm/drm_kms_helper_common.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Rafael Antognolli <rafael.antognolli@intel.com>
+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_dp_aux_dev.h>
+
+MODULE_AUTHOR("David Airlie, Jesse Barnes");
+MODULE_DESCRIPTION("DRM KMS helper");
+MODULE_LICENSE("GPL and additional rights");
+
+static int __init drm_kms_helper_init(void)
+{
+	int ret;
+
+	/* Call init functions from specific kms helpers here */
+	ret = drm_fb_helper_modinit();
+	if (ret < 0)
+		goto out;
+
+	ret = drm_dp_aux_dev_init();
+	if (ret < 0)
+		goto out;
+
+out:
+	return ret;
+}
+
+static void __exit drm_kms_helper_exit(void)
+{
+	/* Call exit functions from specific kms helpers here */
+	drm_dp_aux_dev_exit();
+}
+
+module_init(drm_kms_helper_init);
+module_exit(drm_kms_helper_exit);
diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 6e6a9c58d404..f5d80839a90c 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -47,7 +47,17 @@
 
 static int mipi_dsi_device_match(struct device *dev, struct device_driver *drv)
 {
-	return of_driver_match_device(dev, drv);
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev);
+
+	/* attempt OF style match */
+	if (of_driver_match_device(dev, drv))
+		return 1;
+
+	/* compare DSI device and driver names */
+	if (!strcmp(dsi->name, drv->name))
+		return 1;
+
+	return 0;
 }
 
 static const struct dev_pm_ops mipi_dsi_device_pm_ops = {
@@ -129,14 +139,20 @@ static int mipi_dsi_device_add(struct mipi_dsi_device *dsi)
 	return device_add(&dsi->dev);
 }
 
+#if IS_ENABLED(CONFIG_OF)
 static struct mipi_dsi_device *
 of_mipi_dsi_device_add(struct mipi_dsi_host *host, struct device_node *node)
 {
-	struct mipi_dsi_device *dsi;
 	struct device *dev = host->dev;
+	struct mipi_dsi_device_info info = { };
 	int ret;
 	u32 reg;
 
+	if (of_modalias_node(node, info.type, sizeof(info.type)) < 0) {
+		dev_err(dev, "modalias failure on %s\n", node->full_name);
+		return ERR_PTR(-EINVAL);
+	}
+
 	ret = of_property_read_u32(node, "reg", &reg);
 	if (ret) {
 		dev_err(dev, "device node %s has no valid reg property: %d\n",
@@ -144,32 +160,111 @@ of_mipi_dsi_device_add(struct mipi_dsi_host *host, struct device_node *node)
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (reg > 3) {
-		dev_err(dev, "device node %s has invalid reg property: %u\n",
-			node->full_name, reg);
+	info.channel = reg;
+	info.node = of_node_get(node);
+
+	return mipi_dsi_device_register_full(host, &info);
+}
+#else
+static struct mipi_dsi_device *
+of_mipi_dsi_device_add(struct mipi_dsi_host *host, struct device_node *node)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
+
+/**
+ * mipi_dsi_device_register_full - create a MIPI DSI device
+ * @host: DSI host to which this device is connected
+ * @info: pointer to template containing DSI device information
+ *
+ * Create a MIPI DSI device by using the device information provided by
+ * mipi_dsi_device_info template
+ *
+ * Returns:
+ * A pointer to the newly created MIPI DSI device, or, a pointer encoded
+ * with an error
+ */
+struct mipi_dsi_device *
+mipi_dsi_device_register_full(struct mipi_dsi_host *host,
+			      const struct mipi_dsi_device_info *info)
+{
+	struct mipi_dsi_device *dsi;
+	struct device *dev = host->dev;
+	int ret;
+
+	if (!info) {
+		dev_err(dev, "invalid mipi_dsi_device_info pointer\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (info->channel > 3) {
+		dev_err(dev, "invalid virtual channel: %u\n", info->channel);
 		return ERR_PTR(-EINVAL);
 	}
 
 	dsi = mipi_dsi_device_alloc(host);
 	if (IS_ERR(dsi)) {
-		dev_err(dev, "failed to allocate DSI device %s: %ld\n",
-			node->full_name, PTR_ERR(dsi));
+		dev_err(dev, "failed to allocate DSI device %ld\n",
+			PTR_ERR(dsi));
 		return dsi;
 	}
 
-	dsi->dev.of_node = of_node_get(node);
-	dsi->channel = reg;
+	dsi->dev.of_node = info->node;
+	dsi->channel = info->channel;
+	strlcpy(dsi->name, info->type, sizeof(dsi->name));
 
 	ret = mipi_dsi_device_add(dsi);
 	if (ret) {
-		dev_err(dev, "failed to add DSI device %s: %d\n",
-			node->full_name, ret);
+		dev_err(dev, "failed to add DSI device %d\n", ret);
 		kfree(dsi);
 		return ERR_PTR(ret);
 	}
 
 	return dsi;
 }
+EXPORT_SYMBOL(mipi_dsi_device_register_full);
+
+/**
+ * mipi_dsi_device_unregister - unregister MIPI DSI device
+ * @dsi: DSI peripheral device
+ */
+void mipi_dsi_device_unregister(struct mipi_dsi_device *dsi)
+{
+	device_unregister(&dsi->dev);
+}
+EXPORT_SYMBOL(mipi_dsi_device_unregister);
+
+static DEFINE_MUTEX(host_lock);
+static LIST_HEAD(host_list);
+
+/**
+ * of_find_mipi_dsi_host_by_node() - find the MIPI DSI host matching a
+ *				     device tree node
+ * @node: device tree node
+ *
+ * Returns:
+ * A pointer to the MIPI DSI host corresponding to @node or NULL if no
+ * such device exists (or has not been registered yet).
+ */
+struct mipi_dsi_host *of_find_mipi_dsi_host_by_node(struct device_node *node)
+{
+	struct mipi_dsi_host *host;
+
+	mutex_lock(&host_lock);
+
+	list_for_each_entry(host, &host_list, list) {
+		if (host->dev->of_node == node) {
+			mutex_unlock(&host_lock);
+			return host;
+		}
+	}
+
+	mutex_unlock(&host_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL(of_find_mipi_dsi_host_by_node);
 
 int mipi_dsi_host_register(struct mipi_dsi_host *host)
 {
@@ -182,6 +277,10 @@ int mipi_dsi_host_register(struct mipi_dsi_host *host)
 		of_mipi_dsi_device_add(host, node);
 	}
 
+	mutex_lock(&host_lock);
+	list_add_tail(&host->list, &host_list);
+	mutex_unlock(&host_lock);
+
 	return 0;
 }
 EXPORT_SYMBOL(mipi_dsi_host_register);
@@ -190,7 +289,7 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv)
 {
 	struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev);
 
-	device_unregister(&dsi->dev);
+	mipi_dsi_device_unregister(dsi);
 
 	return 0;
 }
@@ -198,6 +297,10 @@ static int mipi_dsi_remove_device_fn(struct device *dev, void *priv)
 void mipi_dsi_host_unregister(struct mipi_dsi_host *host)
 {
 	device_for_each_child(host->dev, NULL, mipi_dsi_remove_device_fn);
+
+	mutex_lock(&host_lock);
+	list_del_init(&host->list);
+	mutex_unlock(&host_lock);
 }
 EXPORT_SYMBOL(mipi_dsi_host_unregister);
 
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 20775c05235a..f7448a5e95a9 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1371,8 +1371,7 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 	}
 done:
 	if (i >= 0) {
-		printk(KERN_WARNING
-			"parse error at position %i in video mode '%s'\n",
+		pr_warn("[drm] parse error at position %i in video mode '%s'\n",
 			i, name);
 		mode->specified = false;
 		return false;
diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c
index 493c05c9ce4f..bc98bb94264d 100644
--- a/drivers/gpu/drm/drm_of.c
+++ b/drivers/gpu/drm/drm_of.c
@@ -149,3 +149,37 @@ int drm_of_component_probe(struct device *dev,
 	return component_master_add_with_match(dev, m_ops, match);
 }
 EXPORT_SYMBOL(drm_of_component_probe);
+
+/*
+ * drm_of_encoder_active_endpoint - return the active encoder endpoint
+ * @node: device tree node containing encoder input ports
+ * @encoder: drm_encoder
+ *
+ * Given an encoder device node and a drm_encoder with a connected crtc,
+ * parse the encoder endpoint connecting to the crtc port.
+ */
+int drm_of_encoder_active_endpoint(struct device_node *node,
+				   struct drm_encoder *encoder,
+				   struct of_endpoint *endpoint)
+{
+	struct device_node *ep;
+	struct drm_crtc *crtc = encoder->crtc;
+	struct device_node *port;
+	int ret;
+
+	if (!node || !crtc)
+		return -EINVAL;
+
+	for_each_endpoint_of_node(node, ep) {
+		port = of_graph_get_remote_port(ep);
+		of_node_put(port);
+		if (port == crtc->port) {
+			ret = of_graph_parse_endpoint(ep, endpoint);
+			of_node_put(ep);
+			return ret;
+		}
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(drm_of_encoder_active_endpoint);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 27aa7183b20b..df6cdc76a16e 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -329,7 +329,7 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops =  {
  * drm_gem_prime_export - helper library implementation of the export callback
  * @dev: drm_device to export from
  * @obj: GEM object to export
- * @flags: flags like DRM_CLOEXEC
+ * @flags: flags like DRM_CLOEXEC and DRM_RDWR
  *
  * This is the implementation of the gem_prime_export functions for GEM drivers
  * using the PRIME helpers.
@@ -628,7 +628,6 @@ int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file_priv)
 {
 	struct drm_prime_handle *args = data;
-	uint32_t flags;
 
 	if (!drm_core_check_feature(dev, DRIVER_PRIME))
 		return -EINVAL;
@@ -637,14 +636,11 @@ int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 		return -ENOSYS;
 
 	/* check flags are valid */
-	if (args->flags & ~DRM_CLOEXEC)
+	if (args->flags & ~(DRM_CLOEXEC | DRM_RDWR))
 		return -EINVAL;
 
-	/* we only want to pass DRM_CLOEXEC which is == O_CLOEXEC */
-	flags = args->flags & DRM_CLOEXEC;
-
 	return dev->driver->prime_handle_to_fd(dev, file_priv,
-			args->handle, flags, &args->fd);
+			args->handle, args->flags, &args->fd);
 }
 
 int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
index 332c55ebba6d..d8d556457427 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -21,6 +21,7 @@
 
 #include "common.xml.h"
 #include "state.xml.h"
+#include "state_3d.xml.h"
 #include "cmdstream.xml.h"
 
 /*
@@ -85,10 +86,17 @@ static inline void CMD_STALL(struct etnaviv_cmdbuf *buffer,
 	OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
 }
 
-static void etnaviv_cmd_select_pipe(struct etnaviv_cmdbuf *buffer, u8 pipe)
+static inline void CMD_SEM(struct etnaviv_cmdbuf *buffer, u32 from, u32 to)
 {
-	u32 flush;
-	u32 stall;
+	CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN,
+		       VIVS_GL_SEMAPHORE_TOKEN_FROM(from) |
+		       VIVS_GL_SEMAPHORE_TOKEN_TO(to));
+}
+
+static void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu,
+	struct etnaviv_cmdbuf *buffer, u8 pipe)
+{
+	u32 flush = 0;
 
 	/*
 	 * This assumes that if we're switching to 2D, we're switching
@@ -96,17 +104,13 @@ static void etnaviv_cmd_select_pipe(struct etnaviv_cmdbuf *buffer, u8 pipe)
 	 * the 2D core, we need to flush the 3D depth and color caches,
 	 * otherwise we need to flush the 2D pixel engine cache.
 	 */
-	if (pipe == ETNA_PIPE_2D)
-		flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;
-	else
+	if (gpu->exec_state == ETNA_PIPE_2D)
 		flush = VIVS_GL_FLUSH_CACHE_PE2D;
-
-	stall = VIVS_GL_SEMAPHORE_TOKEN_FROM(SYNC_RECIPIENT_FE) |
-		VIVS_GL_SEMAPHORE_TOKEN_TO(SYNC_RECIPIENT_PE);
+	else if (gpu->exec_state == ETNA_PIPE_3D)
+		flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;
 
 	CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
-	CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN, stall);
-
+	CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
 	CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
 
 	CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
@@ -131,6 +135,36 @@ static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
 			ptr, len * 4, 0);
 }
 
+/*
+ * Safely replace the WAIT of a waitlink with a new command and argument.
+ * The GPU may be executing this WAIT while we're modifying it, so we have
+ * to write it in a specific order to avoid the GPU branching to somewhere
+ * else.  'wl_offset' is the offset to the first byte of the WAIT command.
+ */
+static void etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf *buffer,
+	unsigned int wl_offset, u32 cmd, u32 arg)
+{
+	u32 *lw = buffer->vaddr + wl_offset;
+
+	lw[1] = arg;
+	mb();
+	lw[0] = cmd;
+	mb();
+}
+
+/*
+ * Ensure that there is space in the command buffer to contiguously write
+ * 'cmd_dwords' 64-bit words into the buffer, wrapping if necessary.
+ */
+static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
+	struct etnaviv_cmdbuf *buffer, unsigned int cmd_dwords)
+{
+	if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
+		buffer->user_size = 0;
+
+	return gpu_va(gpu, buffer) + buffer->user_size;
+}
+
 u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
 {
 	struct etnaviv_cmdbuf *buffer = gpu->buffer;
@@ -147,81 +181,79 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
 void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
 {
 	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+	unsigned int waitlink_offset = buffer->user_size - 16;
+	u32 link_target, flush = 0;
 
-	/* Replace the last WAIT with an END */
-	buffer->user_size -= 16;
-
-	CMD_END(buffer);
-	mb();
+	if (gpu->exec_state == ETNA_PIPE_2D)
+		flush = VIVS_GL_FLUSH_CACHE_PE2D;
+	else if (gpu->exec_state == ETNA_PIPE_3D)
+		flush = VIVS_GL_FLUSH_CACHE_DEPTH |
+			VIVS_GL_FLUSH_CACHE_COLOR |
+			VIVS_GL_FLUSH_CACHE_TEXTURE |
+			VIVS_GL_FLUSH_CACHE_TEXTUREVS |
+			VIVS_GL_FLUSH_CACHE_SHADER_L2;
+
+	if (flush) {
+		unsigned int dwords = 7;
+
+		link_target = etnaviv_buffer_reserve(gpu, buffer, dwords);
+
+		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
+		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
+		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
+		if (gpu->exec_state == ETNA_PIPE_3D)
+			CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
+				       VIVS_TS_FLUSH_CACHE_FLUSH);
+		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
+		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
+		CMD_END(buffer);
+
+		etnaviv_buffer_replace_wait(buffer, waitlink_offset,
+					    VIV_FE_LINK_HEADER_OP_LINK |
+					    VIV_FE_LINK_HEADER_PREFETCH(dwords),
+					    link_target);
+	} else {
+		/* Replace the last link-wait with an "END" command */
+		etnaviv_buffer_replace_wait(buffer, waitlink_offset,
+					    VIV_FE_END_HEADER_OP_END, 0);
+	}
 }
 
+/* Append a command buffer to the ring buffer. */
 void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 	struct etnaviv_cmdbuf *cmdbuf)
 {
 	struct etnaviv_cmdbuf *buffer = gpu->buffer;
-	u32 *lw = buffer->vaddr + buffer->user_size - 16;
-	u32 back, link_target, link_size, reserve_size, extra_size = 0;
+	unsigned int waitlink_offset = buffer->user_size - 16;
+	u32 return_target, return_dwords;
+	u32 link_target, link_dwords;
 
 	if (drm_debug & DRM_UT_DRIVER)
 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
 
+	link_target = gpu_va(gpu, cmdbuf);
+	link_dwords = cmdbuf->size / 8;
+
 	/*
-	 * If we need to flush the MMU prior to submitting this buffer, we
-	 * will need to append a mmu flush load state, followed by a new
+	 * If we need maintanence prior to submitting this buffer, we will
+	 * need to append a mmu flush load state, followed by a new
 	 * link to this buffer - a total of four additional words.
 	 */
 	if (gpu->mmu->need_flush || gpu->switch_context) {
+		u32 target, extra_dwords;
+
 		/* link command */
-		extra_size += 2;
+		extra_dwords = 1;
+
 		/* flush command */
 		if (gpu->mmu->need_flush)
-			extra_size += 2;
+			extra_dwords += 1;
+
 		/* pipe switch commands */
 		if (gpu->switch_context)
-			extra_size += 8;
-	}
+			extra_dwords += 4;
 
-	reserve_size = (6 + extra_size) * 4;
-
-	/*
-	 * if we are going to completely overflow the buffer, we need to wrap.
-	 */
-	if (buffer->user_size + reserve_size > buffer->size)
-		buffer->user_size = 0;
-
-	/* save offset back into main buffer */
-	back = buffer->user_size + reserve_size - 6 * 4;
-	link_target = gpu_va(gpu, buffer) + buffer->user_size;
-	link_size = 6;
-
-	/* Skip over any extra instructions */
-	link_target += extra_size * sizeof(u32);
-
-	if (drm_debug & DRM_UT_DRIVER)
-		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
-			link_target, gpu_va(gpu, cmdbuf), cmdbuf->vaddr);
-
-	/* jump back from cmd to main buffer */
-	CMD_LINK(cmdbuf, link_size, link_target);
-
-	link_target = gpu_va(gpu, cmdbuf);
-	link_size = cmdbuf->size / 8;
-
-
-
-	if (drm_debug & DRM_UT_DRIVER) {
-		print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
-			       cmdbuf->vaddr, cmdbuf->size, 0);
-
-		pr_info("link op: %p\n", lw);
-		pr_info("link addr: %p\n", lw + 1);
-		pr_info("addr: 0x%08x\n", link_target);
-		pr_info("back: 0x%08x\n", gpu_va(gpu, buffer) + back);
-		pr_info("event: %d\n", event);
-	}
-
-	if (gpu->mmu->need_flush || gpu->switch_context) {
-		u32 new_target = gpu_va(gpu, buffer) + buffer->user_size;
+		target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
 
 		if (gpu->mmu->need_flush) {
 			/* Add the MMU flush */
@@ -236,32 +268,59 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 		}
 
 		if (gpu->switch_context) {
-			etnaviv_cmd_select_pipe(buffer, cmdbuf->exec_state);
+			etnaviv_cmd_select_pipe(gpu, buffer, cmdbuf->exec_state);
+			gpu->exec_state = cmdbuf->exec_state;
 			gpu->switch_context = false;
 		}
 
-		/* And the link to the first buffer */
-		CMD_LINK(buffer, link_size, link_target);
+		/* And the link to the submitted buffer */
+		CMD_LINK(buffer, link_dwords, link_target);
 
 		/* Update the link target to point to above instructions */
-		link_target = new_target;
-		link_size = extra_size;
+		link_target = target;
+		link_dwords = extra_dwords;
 	}
 
-	/* trigger event */
+	/*
+	 * Append a LINK to the submitted command buffer to return to
+	 * the ring buffer.  return_target is the ring target address.
+	 * We need three dwords: event, wait, link.
+	 */
+	return_dwords = 3;
+	return_target = etnaviv_buffer_reserve(gpu, buffer, return_dwords);
+	CMD_LINK(cmdbuf, return_dwords, return_target);
+
+	/*
+	 * Append event, wait and link pointing back to the wait
+	 * command to the ring buffer.
+	 */
 	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
 		       VIVS_GL_EVENT_FROM_PE);
-
-	/* append WAIT/LINK to main buffer */
 	CMD_WAIT(buffer);
-	CMD_LINK(buffer, 2, gpu_va(gpu, buffer) + (buffer->user_size - 4));
+	CMD_LINK(buffer, 2, return_target + 8);
 
-	/* Change WAIT into a LINK command; write the address first. */
-	*(lw + 1) = link_target;
-	mb();
-	*(lw) = VIV_FE_LINK_HEADER_OP_LINK |
-		VIV_FE_LINK_HEADER_PREFETCH(link_size);
-	mb();
+	if (drm_debug & DRM_UT_DRIVER)
+		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
+			return_target, gpu_va(gpu, cmdbuf), cmdbuf->vaddr);
+
+	if (drm_debug & DRM_UT_DRIVER) {
+		print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
+			       cmdbuf->vaddr, cmdbuf->size, 0);
+
+		pr_info("link op: %p\n", buffer->vaddr + waitlink_offset);
+		pr_info("addr: 0x%08x\n", link_target);
+		pr_info("back: 0x%08x\n", return_target);
+		pr_info("event: %d\n", event);
+	}
+
+	/*
+	 * Kick off the submitted command by replacing the previous
+	 * WAIT with a link to the address in the ring buffer.
+	 */
+	etnaviv_buffer_replace_wait(buffer, waitlink_offset,
+				    VIV_FE_LINK_HEADER_OP_LINK |
+				    VIV_FE_LINK_HEADER_PREFETCH(link_dwords),
+				    link_target);
 
 	if (drm_debug & DRM_UT_DRIVER)
 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index 1cd6046e76b1..115c5bc6d7c8 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -75,9 +75,6 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset);
-int etnaviv_gem_get_iova(struct etnaviv_gpu *gpu,
-	struct drm_gem_object *obj, u32 *iova);
-void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj);
 struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj);
 void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj);
 void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 4b519e4309b2..281c6eca20a8 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -260,8 +260,32 @@ etnaviv_gem_get_vram_mapping(struct etnaviv_gem_object *obj,
 	return NULL;
 }
 
-int etnaviv_gem_get_iova(struct etnaviv_gpu *gpu,
-	struct drm_gem_object *obj, u32 *iova)
+void etnaviv_gem_mapping_reference(struct etnaviv_vram_mapping *mapping)
+{
+	struct etnaviv_gem_object *etnaviv_obj = mapping->object;
+
+	drm_gem_object_reference(&etnaviv_obj->base);
+
+	mutex_lock(&etnaviv_obj->lock);
+	WARN_ON(mapping->use == 0);
+	mapping->use += 1;
+	mutex_unlock(&etnaviv_obj->lock);
+}
+
+void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping)
+{
+	struct etnaviv_gem_object *etnaviv_obj = mapping->object;
+
+	mutex_lock(&etnaviv_obj->lock);
+	WARN_ON(mapping->use == 0);
+	mapping->use -= 1;
+	mutex_unlock(&etnaviv_obj->lock);
+
+	drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
+}
+
+struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
+	struct drm_gem_object *obj, struct etnaviv_gpu *gpu)
 {
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 	struct etnaviv_vram_mapping *mapping;
@@ -329,28 +353,12 @@ int etnaviv_gem_get_iova(struct etnaviv_gpu *gpu,
 out:
 	mutex_unlock(&etnaviv_obj->lock);
 
-	if (!ret) {
-		/* Take a reference on the object */
-		drm_gem_object_reference(obj);
-		*iova = mapping->iova;
-	}
-
-	return ret;
-}
-
-void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj)
-{
-	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
-	struct etnaviv_vram_mapping *mapping;
-
-	mutex_lock(&etnaviv_obj->lock);
-	mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, gpu->mmu);
-
-	WARN_ON(mapping->use == 0);
-	mapping->use -= 1;
-	mutex_unlock(&etnaviv_obj->lock);
+	if (ret)
+		return ERR_PTR(ret);
 
-	drm_gem_object_unreference_unlocked(obj);
+	/* Take a reference on the object */
+	drm_gem_object_reference(obj);
+	return mapping;
 }
 
 void *etnaviv_gem_vmap(struct drm_gem_object *obj)
@@ -753,9 +761,9 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
 
 	down_read(&mm->mmap_sem);
 	while (pinned < npages) {
-		ret = get_user_pages(task, mm, ptr, npages - pinned,
-				     !etnaviv_obj->userptr.ro, 0,
-				     pvec + pinned, NULL);
+		ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
+					    !etnaviv_obj->userptr.ro, 0,
+					    pvec + pinned, NULL);
 		if (ret < 0)
 			break;
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index ab5df8147a5f..02665d8c10ee 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -88,6 +88,12 @@ static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
 
 #define MAX_CMDS 4
 
+struct etnaviv_gem_submit_bo {
+	u32 flags;
+	struct etnaviv_gem_object *obj;
+	struct etnaviv_vram_mapping *mapping;
+};
+
 /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
  * associated with the cmdstream submission for synchronization (and
  * make it easier to unwind when things go wrong, etc).  This only
@@ -99,11 +105,7 @@ struct etnaviv_gem_submit {
 	struct ww_acquire_ctx ticket;
 	u32 fence;
 	unsigned int nr_bos;
-	struct {
-		u32 flags;
-		struct etnaviv_gem_object *obj;
-		u32 iova;
-	} bos[0];
+	struct etnaviv_gem_submit_bo bos[0];
 };
 
 int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
@@ -115,4 +117,9 @@ int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj);
 struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj);
 void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj);
 
+struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
+	struct drm_gem_object *obj, struct etnaviv_gpu *gpu);
+void etnaviv_gem_mapping_reference(struct etnaviv_vram_mapping *mapping);
+void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping);
+
 #endif /* __ETNAVIV_GEM_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 1aba01a999df..236ada93df53 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -187,12 +187,10 @@ static void submit_unpin_objects(struct etnaviv_gem_submit *submit)
 	int i;
 
 	for (i = 0; i < submit->nr_bos; i++) {
-		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
-
 		if (submit->bos[i].flags & BO_PINNED)
-			etnaviv_gem_put_iova(submit->gpu, &etnaviv_obj->base);
+			etnaviv_gem_mapping_unreference(submit->bos[i].mapping);
 
-		submit->bos[i].iova = 0;
+		submit->bos[i].mapping = NULL;
 		submit->bos[i].flags &= ~BO_PINNED;
 	}
 }
@@ -203,22 +201,24 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit)
 
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
-		u32 iova;
+		struct etnaviv_vram_mapping *mapping;
 
-		ret = etnaviv_gem_get_iova(submit->gpu, &etnaviv_obj->base,
-					   &iova);
-		if (ret)
+		mapping = etnaviv_gem_mapping_get(&etnaviv_obj->base,
+						  submit->gpu);
+		if (IS_ERR(mapping)) {
+			ret = PTR_ERR(mapping);
 			break;
+		}
 
 		submit->bos[i].flags |= BO_PINNED;
-		submit->bos[i].iova = iova;
+		submit->bos[i].mapping = mapping;
 	}
 
 	return ret;
 }
 
 static int submit_bo(struct etnaviv_gem_submit *submit, u32 idx,
-		struct etnaviv_gem_object **obj, u32 *iova)
+	struct etnaviv_gem_submit_bo **bo)
 {
 	if (idx >= submit->nr_bos) {
 		DRM_ERROR("invalid buffer index: %u (out of %u)\n",
@@ -226,10 +226,7 @@ static int submit_bo(struct etnaviv_gem_submit *submit, u32 idx,
 		return -EINVAL;
 	}
 
-	if (obj)
-		*obj = submit->bos[idx].obj;
-	if (iova)
-		*iova = submit->bos[idx].iova;
+	*bo = &submit->bos[idx];
 
 	return 0;
 }
@@ -245,8 +242,8 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
 
 	for (i = 0; i < nr_relocs; i++) {
 		const struct drm_etnaviv_gem_submit_reloc *r = relocs + i;
-		struct etnaviv_gem_object *bobj;
-		u32 iova, off;
+		struct etnaviv_gem_submit_bo *bo;
+		u32 off;
 
 		if (unlikely(r->flags)) {
 			DRM_ERROR("invalid reloc flags\n");
@@ -268,17 +265,16 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
 			return -EINVAL;
 		}
 
-		ret = submit_bo(submit, r->reloc_idx, &bobj, &iova);
+		ret = submit_bo(submit, r->reloc_idx, &bo);
 		if (ret)
 			return ret;
 
-		if (r->reloc_offset >=
-		    bobj->base.size - sizeof(*ptr)) {
+		if (r->reloc_offset >= bo->obj->base.size - sizeof(*ptr)) {
 			DRM_ERROR("relocation %u outside object", i);
 			return -EINVAL;
 		}
 
-		ptr[off] = iova + r->reloc_offset;
+		ptr[off] = bo->mapping->iova + r->reloc_offset;
 
 		last_offset = off;
 	}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 3c1ce44483d9..09198d0b5814 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -628,6 +628,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	/* Now program the hardware */
 	mutex_lock(&gpu->lock);
 	etnaviv_gpu_hw_init(gpu);
+	gpu->exec_state = -1;
 	mutex_unlock(&gpu->lock);
 
 	pm_runtime_mark_last_busy(gpu->dev);
@@ -871,17 +872,13 @@ static void recover_worker(struct work_struct *work)
 		gpu->event[i].fence = NULL;
 		gpu->event[i].used = false;
 		complete(&gpu->event_free);
-		/*
-		 * Decrement the PM count for each stuck event. This is safe
-		 * even in atomic context as we use ASYNC RPM here.
-		 */
-		pm_runtime_put_autosuspend(gpu->dev);
 	}
 	spin_unlock_irqrestore(&gpu->event_spinlock, flags);
 	gpu->completed_fence = gpu->active_fence;
 
 	etnaviv_gpu_hw_init(gpu);
 	gpu->switch_context = true;
+	gpu->exec_state = -1;
 
 	mutex_unlock(&gpu->lock);
 	pm_runtime_mark_last_busy(gpu->dev);
@@ -1106,7 +1103,7 @@ struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
 	size_t nr_bos)
 {
 	struct etnaviv_cmdbuf *cmdbuf;
-	size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo[0]),
+	size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]),
 				 sizeof(*cmdbuf));
 
 	cmdbuf = kzalloc(sz, GFP_KERNEL);
@@ -1150,14 +1147,23 @@ static void retire_worker(struct work_struct *work)
 		fence_put(cmdbuf->fence);
 
 		for (i = 0; i < cmdbuf->nr_bos; i++) {
-			struct etnaviv_gem_object *etnaviv_obj = cmdbuf->bo[i];
+			struct etnaviv_vram_mapping *mapping = cmdbuf->bo_map[i];
+			struct etnaviv_gem_object *etnaviv_obj = mapping->object;
 
 			atomic_dec(&etnaviv_obj->gpu_active);
 			/* drop the refcount taken in etnaviv_gpu_submit */
-			etnaviv_gem_put_iova(gpu, &etnaviv_obj->base);
+			etnaviv_gem_mapping_unreference(mapping);
 		}
 
 		etnaviv_gpu_cmdbuf_free(cmdbuf);
+		/*
+		 * We need to balance the runtime PM count caused by
+		 * each submission.  Upon submission, we increment
+		 * the runtime PM counter, and allocate one event.
+		 * So here, we put the runtime PM count for each
+		 * completed event.
+		 */
+		pm_runtime_put_autosuspend(gpu->dev);
 	}
 
 	gpu->retired_fence = fence;
@@ -1304,11 +1310,10 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
-		u32 iova;
 
-		/* Each cmdbuf takes a refcount on the iova */
-		etnaviv_gem_get_iova(gpu, &etnaviv_obj->base, &iova);
-		cmdbuf->bo[i] = etnaviv_obj;
+		/* Each cmdbuf takes a refcount on the mapping */
+		etnaviv_gem_mapping_reference(submit->bos[i].mapping);
+		cmdbuf->bo_map[i] = submit->bos[i].mapping;
 		atomic_inc(&etnaviv_obj->gpu_active);
 
 		if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
@@ -1378,15 +1383,6 @@ static irqreturn_t irq_handler(int irq, void *data)
 				gpu->completed_fence = fence->seqno;
 
 			event_free(gpu, event);
-
-			/*
-			 * We need to balance the runtime PM count caused by
-			 * each submission.  Upon submission, we increment
-			 * the runtime PM counter, and allocate one event.
-			 * So here, we put the runtime PM count for each
-			 * completed event.
-			 */
-			pm_runtime_put_autosuspend(gpu->dev);
 		}
 
 		/* Retire the buffer objects in a work */
@@ -1481,6 +1477,7 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
 	etnaviv_gpu_hw_init(gpu);
 
 	gpu->switch_context = true;
+	gpu->exec_state = -1;
 
 	mutex_unlock(&gpu->lock);
 
@@ -1569,6 +1566,7 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct etnaviv_gpu *gpu;
+	u32 dma_mask;
 	int err = 0;
 
 	gpu = devm_kzalloc(dev, sizeof(*gpu), GFP_KERNEL);
@@ -1579,12 +1577,16 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
 	mutex_init(&gpu->lock);
 
 	/*
-	 * Set the GPU base address to the start of physical memory.  This
-	 * ensures that if we have up to 2GB, the v1 MMU can address the
-	 * highest memory.  This is important as command buffers may be
-	 * allocated outside of this limit.
+	 * Set the GPU linear window to be at the end of the DMA window, where
+	 * the CMA area is likely to reside. This ensures that we are able to
+	 * map the command buffers while having the linear window overlap as
+	 * much RAM as possible, so we can optimize mappings for other buffers.
 	 */
-	gpu->memory_base = PHYS_OFFSET;
+	dma_mask = (u32)dma_get_required_mask(dev);
+	if (dma_mask < PHYS_OFFSET + SZ_2G)
+		gpu->memory_base = PHYS_OFFSET;
+	else
+		gpu->memory_base = dma_mask - SZ_2G + 1;
 
 	/* Map registers: */
 	gpu->mmio = etnaviv_ioremap(pdev, NULL, dev_name(gpu->dev));
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index f233ac4c7c1c..f5321e2f25ff 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -23,6 +23,7 @@
 #include "etnaviv_drv.h"
 
 struct etnaviv_gem_submit;
+struct etnaviv_vram_mapping;
 
 struct etnaviv_chip_identity {
 	/* Chip model. */
@@ -103,6 +104,7 @@ struct etnaviv_gpu {
 
 	/* 'ring'-buffer: */
 	struct etnaviv_cmdbuf *buffer;
+	int exec_state;
 
 	/* bus base address of memory  */
 	u32 memory_base;
@@ -166,7 +168,7 @@ struct etnaviv_cmdbuf {
 	struct list_head node;
 	/* BOs attached to this command buffer */
 	unsigned int nr_bos;
-	struct etnaviv_gem_object *bo[0];
+	struct etnaviv_vram_mapping *bo_map[0];
 };
 
 static inline void gpu_write(struct etnaviv_gpu *gpu, u32 reg, u32 data)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
index 6743bc648dc8..29a723fabc17 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -193,7 +193,7 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
 
 		/*
 		 * Unmap the blocks which need to be reaped from the MMU.
-		 * Clear the mmu pointer to prevent the get_iova finding
+		 * Clear the mmu pointer to prevent the mapping_get finding
 		 * this mapping.
 		 */
 		list_for_each_entry_safe(m, n, &list, scan_node) {
diff --git a/drivers/gpu/drm/etnaviv/state_3d.xml.h b/drivers/gpu/drm/etnaviv/state_3d.xml.h
new file mode 100644
index 000000000000..d7146fd13943
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/state_3d.xml.h
@@ -0,0 +1,9 @@
+#ifndef STATE_3D_XML
+#define STATE_3D_XML
+
+/* This is a cut-down version of the state_3d.xml.h file */
+
+#define VIVS_TS_FLUSH_CACHE					0x00001650
+#define VIVS_TS_FLUSH_CACHE_FLUSH				0x00000001
+
+#endif /* STATE_3D_XML */
diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile
index 6496532aaa91..968b31c522b2 100644
--- a/drivers/gpu/drm/exynos/Makefile
+++ b/drivers/gpu/drm/exynos/Makefile
@@ -2,7 +2,6 @@
 # Makefile for the drm device driver.  This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
-ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/exynos
 exynosdrm-y := exynos_drm_drv.o exynos_drm_crtc.o exynos_drm_fbdev.o \
 		exynos_drm_fb.o exynos_drm_gem.o exynos_drm_core.o \
 		exynos_drm_plane.o
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 162ab93e99cb..5245bc5e82e9 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -429,7 +429,7 @@ static void decon_disable(struct exynos_drm_crtc *crtc)
 	set_bit(BIT_SUSPENDED, &ctx->flags);
 }
 
-void decon_te_irq_handler(struct exynos_drm_crtc *crtc)
+static void decon_te_irq_handler(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
 
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index 52bda3b42fe0..93361073af9a 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -60,7 +60,6 @@ struct decon_context {
 	wait_queue_head_t		wait_vsync_queue;
 	atomic_t			wait_vsync_event;
 
-	struct exynos_drm_panel_info panel;
 	struct drm_encoder *encoder;
 };
 
diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c
index 673164b331c8..cff8dc788820 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.c
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.c
@@ -977,9 +977,7 @@ static int exynos_dp_get_modes(struct drm_connector *connector)
 		return 0;
 	}
 
-	drm_display_mode_from_videomode(&dp->priv.vm, mode);
-	mode->width_mm = dp->priv.width_mm;
-	mode->height_mm = dp->priv.height_mm;
+	drm_display_mode_from_videomode(&dp->vm, mode);
 	connector->display_info.width_mm = mode->width_mm;
 	connector->display_info.height_mm = mode->height_mm;
 
@@ -1155,13 +1153,6 @@ static int exynos_dp_create_connector(struct drm_encoder *encoder)
 	return 0;
 }
 
-static bool exynos_dp_mode_fixup(struct drm_encoder *encoder,
-				 const struct drm_display_mode *mode,
-				 struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void exynos_dp_mode_set(struct drm_encoder *encoder,
 			       struct drm_display_mode *mode,
 			       struct drm_display_mode *adjusted_mode)
@@ -1177,7 +1168,6 @@ static void exynos_dp_disable(struct drm_encoder *encoder)
 }
 
 static const struct drm_encoder_helper_funcs exynos_dp_encoder_helper_funcs = {
-	.mode_fixup = exynos_dp_mode_fixup,
 	.mode_set = exynos_dp_mode_set,
 	.enable = exynos_dp_enable,
 	.disable = exynos_dp_disable,
@@ -1249,8 +1239,7 @@ static int exynos_dp_dt_parse_panel(struct exynos_dp_device *dp)
 {
 	int ret;
 
-	ret = of_get_videomode(dp->dev->of_node, &dp->priv.vm,
-			OF_USE_NATIVE_MODE);
+	ret = of_get_videomode(dp->dev->of_node, &dp->vm, OF_USE_NATIVE_MODE);
 	if (ret) {
 		DRM_ERROR("failed: of_get_videomode() : %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.h b/drivers/gpu/drm/exynos/exynos_dp_core.h
index 66eec4b2d5c6..b5c2d8f47f9c 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.h
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.h
@@ -16,6 +16,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/exynos_drm.h>
+#include <video/videomode.h>
 
 #include "exynos_drm_drv.h"
 
@@ -164,8 +165,7 @@ struct exynos_dp_device {
 	struct phy		*phy;
 	int			dpms_mode;
 	int			hpd_gpio;
-
-	struct exynos_drm_panel_info priv;
+	struct videomode	vm;
 };
 
 /* exynos_dp_reg.c */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
index 05350ae0785b..75e570f45259 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
@@ -128,13 +128,6 @@ static int exynos_dpi_create_connector(struct drm_encoder *encoder)
 	return 0;
 }
 
-static bool exynos_dpi_mode_fixup(struct drm_encoder *encoder,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void exynos_dpi_mode_set(struct drm_encoder *encoder,
 				struct drm_display_mode *mode,
 				struct drm_display_mode *adjusted_mode)
@@ -162,7 +155,6 @@ static void exynos_dpi_disable(struct drm_encoder *encoder)
 }
 
 static const struct drm_encoder_helper_funcs exynos_dpi_encoder_helper_funcs = {
-	.mode_fixup = exynos_dpi_mode_fixup,
 	.mode_set = exynos_dpi_mode_set,
 	.enable = exynos_dpi_enable,
 	.disable = exynos_dpi_disable,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 68f0f36f6e7e..5344940c8a07 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -130,6 +130,8 @@ static void exynos_drm_atomic_work(struct work_struct *work)
 	exynos_atomic_commit_complete(commit);
 }
 
+static struct device *exynos_drm_get_dma_device(void);
+
 static int exynos_drm_load(struct drm_device *dev, unsigned long flags)
 {
 	struct exynos_drm_private *private;
@@ -147,6 +149,16 @@ static int exynos_drm_load(struct drm_device *dev, unsigned long flags)
 	dev_set_drvdata(dev->dev, dev);
 	dev->dev_private = (void *)private;
 
+	/* the first real CRTC device is used for all dma mapping operations */
+	private->dma_dev = exynos_drm_get_dma_device();
+	if (!private->dma_dev) {
+		DRM_ERROR("no device found for DMA mapping operations.\n");
+		ret = -ENODEV;
+		goto err_free_private;
+	}
+	DRM_INFO("Exynos DRM: using %s device for DMA mapping operations\n",
+		 dev_name(private->dma_dev));
+
 	/*
 	 * create mapping to manage iommu table and set a pointer to iommu
 	 * mapping structure to iommu_mapping of private data.
@@ -340,20 +352,6 @@ static void exynos_drm_preclose(struct drm_device *dev,
 
 static void exynos_drm_postclose(struct drm_device *dev, struct drm_file *file)
 {
-	struct drm_pending_event *e, *et;
-	unsigned long flags;
-
-	if (!file->driver_priv)
-		return;
-
-	spin_lock_irqsave(&dev->event_lock, flags);
-	/* Release all events handled by page flip handler but not freed. */
-	list_for_each_entry_safe(e, et, &file->event_list, link) {
-		list_del(&e->link);
-		e->destroy(e);
-	}
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-
 	kfree(file->driver_priv);
 	file->driver_priv = NULL;
 }
@@ -372,6 +370,8 @@ static const struct vm_operations_struct exynos_drm_gem_vm_ops = {
 static const struct drm_ioctl_desc exynos_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_CREATE, exynos_drm_gem_create_ioctl,
 			DRM_AUTH | DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_MAP, exynos_drm_gem_map_ioctl,
+			DRM_AUTH | DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_GET, exynos_drm_gem_get_ioctl,
 			DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(EXYNOS_VIDI_CONNECTION, vidi_connection_ioctl,
@@ -495,69 +495,65 @@ static const struct dev_pm_ops exynos_drm_pm_ops = {
 /* forward declaration */
 static struct platform_driver exynos_drm_platform_driver;
 
+struct exynos_drm_driver_info {
+	struct platform_driver *driver;
+	unsigned int flags;
+};
+
+#define DRM_COMPONENT_DRIVER	BIT(0)	/* supports component framework */
+#define DRM_VIRTUAL_DEVICE	BIT(1)	/* create virtual platform device */
+#define DRM_DMA_DEVICE		BIT(2)	/* can be used for dma allocations */
+
+#define DRV_PTR(drv, cond) (IS_ENABLED(cond) ? &drv : NULL)
+
 /*
  * Connector drivers should not be placed before associated crtc drivers,
  * because connector requires pipe number of its crtc during initialization.
  */
-static struct platform_driver *const exynos_drm_kms_drivers[] = {
-#ifdef CONFIG_DRM_EXYNOS_FIMD
-	&fimd_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS5433_DECON
-	&exynos5433_decon_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS7_DECON
-	&decon_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS_MIC
-	&mic_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS_DP
-	&dp_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS_DSI
-	&dsi_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS_MIXER
-	&mixer_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS_HDMI
-	&hdmi_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS_VIDI
-	&vidi_driver,
-#endif
-};
-
-static struct platform_driver *const exynos_drm_non_kms_drivers[] = {
-#ifdef CONFIG_DRM_EXYNOS_G2D
-	&g2d_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS_FIMC
-	&fimc_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS_ROTATOR
-	&rotator_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS_GSC
-	&gsc_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS_IPP
-	&ipp_driver,
-#endif
-	&exynos_drm_platform_driver,
-};
-
-static struct platform_driver *const exynos_drm_drv_with_simple_dev[] = {
-#ifdef CONFIG_DRM_EXYNOS_VIDI
-	&vidi_driver,
-#endif
-#ifdef CONFIG_DRM_EXYNOS_IPP
-	&ipp_driver,
-#endif
-	&exynos_drm_platform_driver,
+static struct exynos_drm_driver_info exynos_drm_drivers[] = {
+	{
+		DRV_PTR(fimd_driver, CONFIG_DRM_EXYNOS_FIMD),
+		DRM_COMPONENT_DRIVER | DRM_DMA_DEVICE
+	}, {
+		DRV_PTR(exynos5433_decon_driver, CONFIG_DRM_EXYNOS5433_DECON),
+		DRM_COMPONENT_DRIVER | DRM_DMA_DEVICE
+	}, {
+		DRV_PTR(decon_driver, CONFIG_DRM_EXYNOS7_DECON),
+		DRM_COMPONENT_DRIVER | DRM_DMA_DEVICE
+	}, {
+		DRV_PTR(mixer_driver, CONFIG_DRM_EXYNOS_MIXER),
+		DRM_COMPONENT_DRIVER | DRM_DMA_DEVICE
+	}, {
+		DRV_PTR(mic_driver, CONFIG_DRM_EXYNOS_MIC),
+		DRM_COMPONENT_DRIVER
+	}, {
+		DRV_PTR(dp_driver, CONFIG_DRM_EXYNOS_DP),
+		DRM_COMPONENT_DRIVER
+	}, {
+		DRV_PTR(dsi_driver, CONFIG_DRM_EXYNOS_DSI),
+		DRM_COMPONENT_DRIVER
+	}, {
+		DRV_PTR(hdmi_driver, CONFIG_DRM_EXYNOS_HDMI),
+		DRM_COMPONENT_DRIVER
+	}, {
+		DRV_PTR(vidi_driver, CONFIG_DRM_EXYNOS_VIDI),
+		DRM_COMPONENT_DRIVER | DRM_VIRTUAL_DEVICE
+	}, {
+		DRV_PTR(g2d_driver, CONFIG_DRM_EXYNOS_G2D),
+	}, {
+		DRV_PTR(fimc_driver, CONFIG_DRM_EXYNOS_FIMC),
+	}, {
+		DRV_PTR(rotator_driver, CONFIG_DRM_EXYNOS_ROTATOR),
+	}, {
+		DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC),
+	}, {
+		DRV_PTR(ipp_driver, CONFIG_DRM_EXYNOS_IPP),
+		DRM_VIRTUAL_DEVICE
+	}, {
+		&exynos_drm_platform_driver,
+		DRM_VIRTUAL_DEVICE
+	}
 };
-#define PDEV_COUNT ARRAY_SIZE(exynos_drm_drv_with_simple_dev)
 
 static int compare_dev(struct device *dev, void *data)
 {
@@ -569,11 +565,15 @@ static struct component_match *exynos_drm_match_add(struct device *dev)
 	struct component_match *match = NULL;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(exynos_drm_kms_drivers); ++i) {
-		struct device_driver *drv = &exynos_drm_kms_drivers[i]->driver;
+	for (i = 0; i < ARRAY_SIZE(exynos_drm_drivers); ++i) {
+		struct exynos_drm_driver_info *info = &exynos_drm_drivers[i];
 		struct device *p = NULL, *d;
 
-		while ((d = bus_find_device(&platform_bus_type, p, drv,
+		if (!info->driver || !(info->flags & DRM_COMPONENT_DRIVER))
+			continue;
+
+		while ((d = bus_find_device(&platform_bus_type, p,
+					    &info->driver->driver,
 					    (void *)platform_bus_type.match))) {
 			put_device(p);
 			component_match_add(dev, &match, compare_dev, d);
@@ -630,91 +630,102 @@ static struct platform_driver exynos_drm_platform_driver = {
 	},
 };
 
-static struct platform_device *exynos_drm_pdevs[PDEV_COUNT];
+static struct device *exynos_drm_get_dma_device(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(exynos_drm_drivers); ++i) {
+		struct exynos_drm_driver_info *info = &exynos_drm_drivers[i];
+		struct device *dev;
+
+		if (!info->driver || !(info->flags & DRM_DMA_DEVICE))
+			continue;
+
+		while ((dev = bus_find_device(&platform_bus_type, NULL,
+					    &info->driver->driver,
+					    (void *)platform_bus_type.match))) {
+			put_device(dev);
+			return dev;
+		}
+	}
+	return NULL;
+}
 
 static void exynos_drm_unregister_devices(void)
 {
-	int i = PDEV_COUNT;
+	int i;
+
+	for (i = ARRAY_SIZE(exynos_drm_drivers) - 1; i >= 0; --i) {
+		struct exynos_drm_driver_info *info = &exynos_drm_drivers[i];
+		struct device *dev;
+
+		if (!info->driver || !(info->flags & DRM_VIRTUAL_DEVICE))
+			continue;
 
-	while (--i >= 0) {
-		platform_device_unregister(exynos_drm_pdevs[i]);
-		exynos_drm_pdevs[i] = NULL;
+		while ((dev = bus_find_device(&platform_bus_type, NULL,
+					    &info->driver->driver,
+					    (void *)platform_bus_type.match))) {
+			put_device(dev);
+			platform_device_unregister(to_platform_device(dev));
+		}
 	}
 }
 
 static int exynos_drm_register_devices(void)
 {
+	struct platform_device *pdev;
 	int i;
 
-	for (i = 0; i < PDEV_COUNT; ++i) {
-		struct platform_driver *d = exynos_drm_drv_with_simple_dev[i];
-		struct platform_device *pdev =
-			platform_device_register_simple(d->driver.name, -1,
-							NULL, 0);
+	for (i = 0; i < ARRAY_SIZE(exynos_drm_drivers); ++i) {
+		struct exynos_drm_driver_info *info = &exynos_drm_drivers[i];
 
-		if (!IS_ERR(pdev)) {
-			exynos_drm_pdevs[i] = pdev;
+		if (!info->driver || !(info->flags & DRM_VIRTUAL_DEVICE))
 			continue;
-		}
-		while (--i >= 0) {
-			platform_device_unregister(exynos_drm_pdevs[i]);
-			exynos_drm_pdevs[i] = NULL;
-		}
 
-		return PTR_ERR(pdev);
+		pdev = platform_device_register_simple(
+					info->driver->driver.name, -1, NULL, 0);
+		if (IS_ERR(pdev))
+			goto fail;
 	}
 
 	return 0;
+fail:
+	exynos_drm_unregister_devices();
+	return PTR_ERR(pdev);
 }
 
-static void exynos_drm_unregister_drivers(struct platform_driver * const *drv,
-					  int count)
+static void exynos_drm_unregister_drivers(void)
 {
-	while (--count >= 0)
-		platform_driver_unregister(drv[count]);
-}
+	int i;
 
-static int exynos_drm_register_drivers(struct platform_driver * const *drv,
-				       int count)
-{
-	int i, ret;
+	for (i = ARRAY_SIZE(exynos_drm_drivers) - 1; i >= 0; --i) {
+		struct exynos_drm_driver_info *info = &exynos_drm_drivers[i];
 
-	for (i = 0; i < count; ++i) {
-		ret = platform_driver_register(drv[i]);
-		if (!ret)
+		if (!info->driver)
 			continue;
 
-		while (--i >= 0)
-			platform_driver_unregister(drv[i]);
-
-		return ret;
+		platform_driver_unregister(info->driver);
 	}
-
-	return 0;
 }
 
-static inline int exynos_drm_register_kms_drivers(void)
+static int exynos_drm_register_drivers(void)
 {
-	return exynos_drm_register_drivers(exynos_drm_kms_drivers,
-					ARRAY_SIZE(exynos_drm_kms_drivers));
-}
+	int i, ret;
 
-static inline int exynos_drm_register_non_kms_drivers(void)
-{
-	return exynos_drm_register_drivers(exynos_drm_non_kms_drivers,
-					ARRAY_SIZE(exynos_drm_non_kms_drivers));
-}
+	for (i = 0; i < ARRAY_SIZE(exynos_drm_drivers); ++i) {
+		struct exynos_drm_driver_info *info = &exynos_drm_drivers[i];
 
-static inline void exynos_drm_unregister_kms_drivers(void)
-{
-	exynos_drm_unregister_drivers(exynos_drm_kms_drivers,
-					ARRAY_SIZE(exynos_drm_kms_drivers));
-}
+		if (!info->driver)
+			continue;
 
-static inline void exynos_drm_unregister_non_kms_drivers(void)
-{
-	exynos_drm_unregister_drivers(exynos_drm_non_kms_drivers,
-					ARRAY_SIZE(exynos_drm_non_kms_drivers));
+		ret = platform_driver_register(info->driver);
+		if (ret)
+			goto fail;
+	}
+	return 0;
+fail:
+	exynos_drm_unregister_drivers();
+	return ret;
 }
 
 static int exynos_drm_init(void)
@@ -725,19 +736,12 @@ static int exynos_drm_init(void)
 	if (ret)
 		return ret;
 
-	ret = exynos_drm_register_kms_drivers();
+	ret = exynos_drm_register_drivers();
 	if (ret)
 		goto err_unregister_pdevs;
 
-	ret = exynos_drm_register_non_kms_drivers();
-	if (ret)
-		goto err_unregister_kms_drivers;
-
 	return 0;
 
-err_unregister_kms_drivers:
-	exynos_drm_unregister_kms_drivers();
-
 err_unregister_pdevs:
 	exynos_drm_unregister_devices();
 
@@ -746,8 +750,7 @@ err_unregister_pdevs:
 
 static void exynos_drm_exit(void)
 {
-	exynos_drm_unregister_non_kms_drivers();
-	exynos_drm_unregister_kms_drivers();
+	exynos_drm_unregister_drivers();
 	exynos_drm_unregister_devices();
 }
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index 17b5ded72ff1..502f750bad2a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -219,8 +219,10 @@ struct exynos_drm_private {
 	struct drm_crtc *crtc[MAX_CRTC];
 	struct drm_property *plane_zpos_property;
 
+	struct device *dma_dev;
 	unsigned long da_start;
 	unsigned long da_space_size;
+	void *mapping;
 
 	unsigned int pipe;
 
@@ -230,6 +232,13 @@ struct exynos_drm_private {
 	wait_queue_head_t	wait;
 };
 
+static inline struct device *to_dma_dev(struct drm_device *dev)
+{
+	struct exynos_drm_private *priv = dev->dev_private;
+
+	return priv->dma_dev;
+}
+
 /*
  * Exynos drm sub driver structure.
  *
@@ -297,7 +306,6 @@ extern struct platform_driver dp_driver;
 extern struct platform_driver dsi_driver;
 extern struct platform_driver mixer_driver;
 extern struct platform_driver hdmi_driver;
-extern struct platform_driver exynos_drm_common_hdmi_driver;
 extern struct platform_driver vidi_driver;
 extern struct platform_driver g2d_driver;
 extern struct platform_driver fimc_driver;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 26e81d191f56..63c84a106c0b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
 */
 
+#include <asm/unaligned.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_mipi_dsi.h>
@@ -209,12 +211,6 @@
 
 #define OLD_SCLK_MIPI_CLK_NAME "pll_clk"
 
-#define REG_ADDR(dsi, reg_idx)		((dsi)->reg_base + \
-					dsi->driver_data->reg_ofs[(reg_idx)])
-#define DSI_WRITE(dsi, reg_idx, val)	writel((val), \
-					REG_ADDR((dsi), (reg_idx)))
-#define DSI_READ(dsi, reg_idx)		readl(REG_ADDR((dsi), (reg_idx)))
-
 static char *clk_names[5] = { "bus_clk", "sclk_mipi",
 	"phyclk_mipidphy0_bitclkdiv8", "phyclk_mipidphy0_rxclkesc0",
 	"sclk_rgb_vclk_to_dsim0" };
@@ -228,12 +224,8 @@ struct exynos_dsi_transfer {
 	struct list_head list;
 	struct completion completed;
 	int result;
-	u8 data_id;
-	u8 data[2];
+	struct mipi_dsi_packet packet;
 	u16 flags;
-
-	const u8 *tx_payload;
-	u16 tx_len;
 	u16 tx_done;
 
 	u8 *rx_payload;
@@ -247,7 +239,7 @@ struct exynos_dsi_transfer {
 #define DSIM_STATE_VIDOUT_AVAILABLE	BIT(3)
 
 struct exynos_dsi_driver_data {
-	unsigned int *reg_ofs;
+	const unsigned int *reg_ofs;
 	unsigned int plltmr_reg;
 	unsigned int has_freqband:1;
 	unsigned int has_clklane_stop:1;
@@ -255,7 +247,7 @@ struct exynos_dsi_driver_data {
 	unsigned int max_freq;
 	unsigned int wait_for_reset;
 	unsigned int num_bits_resol;
-	unsigned int *reg_values;
+	const unsigned int *reg_values;
 };
 
 struct exynos_dsi {
@@ -324,7 +316,20 @@ enum reg_idx {
 	DSIM_PHYTIMING2_REG,
 	NUM_REGS
 };
-static unsigned int exynos_reg_ofs[] = {
+
+static inline void exynos_dsi_write(struct exynos_dsi *dsi, enum reg_idx idx,
+				    u32 val)
+{
+
+	writel(val, dsi->reg_base + dsi->driver_data->reg_ofs[idx]);
+}
+
+static inline u32 exynos_dsi_read(struct exynos_dsi *dsi, enum reg_idx idx)
+{
+	return readl(dsi->reg_base + dsi->driver_data->reg_ofs[idx]);
+}
+
+static const unsigned int exynos_reg_ofs[] = {
 	[DSIM_STATUS_REG] =  0x00,
 	[DSIM_SWRST_REG] =  0x04,
 	[DSIM_CLKCTRL_REG] =  0x08,
@@ -348,7 +353,7 @@ static unsigned int exynos_reg_ofs[] = {
 	[DSIM_PHYTIMING2_REG] =  0x6c,
 };
 
-static unsigned int exynos5433_reg_ofs[] = {
+static const unsigned int exynos5433_reg_ofs[] = {
 	[DSIM_STATUS_REG] = 0x04,
 	[DSIM_SWRST_REG] = 0x0C,
 	[DSIM_CLKCTRL_REG] = 0x10,
@@ -390,7 +395,7 @@ enum reg_value_idx {
 	PHYTIMING_HS_TRAIL
 };
 
-static unsigned int reg_values[] = {
+static const unsigned int reg_values[] = {
 	[RESET_TYPE] = DSIM_SWRST,
 	[PLL_TIMER] = 500,
 	[STOP_STATE_CNT] = 0xf,
@@ -408,7 +413,25 @@ static unsigned int reg_values[] = {
 	[PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0b),
 };
 
-static unsigned int exynos5433_reg_values[] = {
+static const unsigned int exynos5422_reg_values[] = {
+	[RESET_TYPE] = DSIM_SWRST,
+	[PLL_TIMER] = 500,
+	[STOP_STATE_CNT] = 0xf,
+	[PHYCTRL_ULPS_EXIT] = DSIM_PHYCTRL_ULPS_EXIT(0xaf),
+	[PHYCTRL_VREG_LP] = 0,
+	[PHYCTRL_SLEW_UP] = 0,
+	[PHYTIMING_LPX] = DSIM_PHYTIMING_LPX(0x08),
+	[PHYTIMING_HS_EXIT] = DSIM_PHYTIMING_HS_EXIT(0x0d),
+	[PHYTIMING_CLK_PREPARE] = DSIM_PHYTIMING1_CLK_PREPARE(0x09),
+	[PHYTIMING_CLK_ZERO] = DSIM_PHYTIMING1_CLK_ZERO(0x30),
+	[PHYTIMING_CLK_POST] = DSIM_PHYTIMING1_CLK_POST(0x0e),
+	[PHYTIMING_CLK_TRAIL] = DSIM_PHYTIMING1_CLK_TRAIL(0x0a),
+	[PHYTIMING_HS_PREPARE] = DSIM_PHYTIMING2_HS_PREPARE(0x0c),
+	[PHYTIMING_HS_ZERO] = DSIM_PHYTIMING2_HS_ZERO(0x11),
+	[PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0d),
+};
+
+static const unsigned int exynos5433_reg_values[] = {
 	[RESET_TYPE] = DSIM_FUNCRST,
 	[PLL_TIMER] = 22200,
 	[STOP_STATE_CNT] = 0xa,
@@ -426,7 +449,7 @@ static unsigned int exynos5433_reg_values[] = {
 	[PHYTIMING_HS_TRAIL] = DSIM_PHYTIMING2_HS_TRAIL(0x0c),
 };
 
-static struct exynos_dsi_driver_data exynos3_dsi_driver_data = {
+static const struct exynos_dsi_driver_data exynos3_dsi_driver_data = {
 	.reg_ofs = exynos_reg_ofs,
 	.plltmr_reg = 0x50,
 	.has_freqband = 1,
@@ -438,7 +461,7 @@ static struct exynos_dsi_driver_data exynos3_dsi_driver_data = {
 	.reg_values = reg_values,
 };
 
-static struct exynos_dsi_driver_data exynos4_dsi_driver_data = {
+static const struct exynos_dsi_driver_data exynos4_dsi_driver_data = {
 	.reg_ofs = exynos_reg_ofs,
 	.plltmr_reg = 0x50,
 	.has_freqband = 1,
@@ -450,7 +473,7 @@ static struct exynos_dsi_driver_data exynos4_dsi_driver_data = {
 	.reg_values = reg_values,
 };
 
-static struct exynos_dsi_driver_data exynos4415_dsi_driver_data = {
+static const struct exynos_dsi_driver_data exynos4415_dsi_driver_data = {
 	.reg_ofs = exynos_reg_ofs,
 	.plltmr_reg = 0x58,
 	.has_clklane_stop = 1,
@@ -461,7 +484,7 @@ static struct exynos_dsi_driver_data exynos4415_dsi_driver_data = {
 	.reg_values = reg_values,
 };
 
-static struct exynos_dsi_driver_data exynos5_dsi_driver_data = {
+static const struct exynos_dsi_driver_data exynos5_dsi_driver_data = {
 	.reg_ofs = exynos_reg_ofs,
 	.plltmr_reg = 0x58,
 	.num_clks = 2,
@@ -471,7 +494,7 @@ static struct exynos_dsi_driver_data exynos5_dsi_driver_data = {
 	.reg_values = reg_values,
 };
 
-static struct exynos_dsi_driver_data exynos5433_dsi_driver_data = {
+static const struct exynos_dsi_driver_data exynos5433_dsi_driver_data = {
 	.reg_ofs = exynos5433_reg_ofs,
 	.plltmr_reg = 0xa0,
 	.has_clklane_stop = 1,
@@ -482,7 +505,18 @@ static struct exynos_dsi_driver_data exynos5433_dsi_driver_data = {
 	.reg_values = exynos5433_reg_values,
 };
 
-static struct of_device_id exynos_dsi_of_match[] = {
+static const struct exynos_dsi_driver_data exynos5422_dsi_driver_data = {
+	.reg_ofs = exynos5433_reg_ofs,
+	.plltmr_reg = 0xa0,
+	.has_clklane_stop = 1,
+	.num_clks = 2,
+	.max_freq = 1500,
+	.wait_for_reset = 1,
+	.num_bits_resol = 12,
+	.reg_values = exynos5422_reg_values,
+};
+
+static const struct of_device_id exynos_dsi_of_match[] = {
 	{ .compatible = "samsung,exynos3250-mipi-dsi",
 	  .data = &exynos3_dsi_driver_data },
 	{ .compatible = "samsung,exynos4210-mipi-dsi",
@@ -491,6 +525,8 @@ static struct of_device_id exynos_dsi_of_match[] = {
 	  .data = &exynos4415_dsi_driver_data },
 	{ .compatible = "samsung,exynos5410-mipi-dsi",
 	  .data = &exynos5_dsi_driver_data },
+	{ .compatible = "samsung,exynos5422-mipi-dsi",
+	  .data = &exynos5422_dsi_driver_data },
 	{ .compatible = "samsung,exynos5433-mipi-dsi",
 	  .data = &exynos5433_dsi_driver_data },
 	{ }
@@ -515,10 +551,10 @@ static void exynos_dsi_wait_for_reset(struct exynos_dsi *dsi)
 
 static void exynos_dsi_reset(struct exynos_dsi *dsi)
 {
-	struct exynos_dsi_driver_data *driver_data = dsi->driver_data;
+	u32 reset_val = dsi->driver_data->reg_values[RESET_TYPE];
 
 	reinit_completion(&dsi->completed);
-	DSI_WRITE(dsi, DSIM_SWRST_REG, driver_data->reg_values[RESET_TYPE]);
+	exynos_dsi_write(dsi, DSIM_SWRST_REG, reset_val);
 }
 
 #ifndef MHZ
@@ -621,7 +657,7 @@ static unsigned long exynos_dsi_set_pll(struct exynos_dsi *dsi,
 		reg |= DSIM_FREQ_BAND(band);
 	}
 
-	DSI_WRITE(dsi, DSIM_PLLCTRL_REG, reg);
+	exynos_dsi_write(dsi, DSIM_PLLCTRL_REG, reg);
 
 	timeout = 1000;
 	do {
@@ -629,7 +665,7 @@ static unsigned long exynos_dsi_set_pll(struct exynos_dsi *dsi,
 			dev_err(dsi->dev, "PLL failed to stabilize\n");
 			return 0;
 		}
-		reg = DSI_READ(dsi, DSIM_STATUS_REG);
+		reg = exynos_dsi_read(dsi, DSIM_STATUS_REG);
 	} while ((reg & DSIM_PLL_STABLE) == 0);
 
 	return fout;
@@ -659,7 +695,7 @@ static int exynos_dsi_enable_clock(struct exynos_dsi *dsi)
 	dev_dbg(dsi->dev, "hs_clk = %lu, byte_clk = %lu, esc_clk = %lu\n",
 		hs_clk, byte_clk, esc_clk);
 
-	reg = DSI_READ(dsi, DSIM_CLKCTRL_REG);
+	reg = exynos_dsi_read(dsi, DSIM_CLKCTRL_REG);
 	reg &= ~(DSIM_ESC_PRESCALER_MASK | DSIM_LANE_ESC_CLK_EN_CLK
 			| DSIM_LANE_ESC_CLK_EN_DATA_MASK | DSIM_PLL_BYPASS
 			| DSIM_BYTE_CLK_SRC_MASK);
@@ -669,7 +705,7 @@ static int exynos_dsi_enable_clock(struct exynos_dsi *dsi)
 			| DSIM_LANE_ESC_CLK_EN_DATA(BIT(dsi->lanes) - 1)
 			| DSIM_BYTE_CLK_SRC(0)
 			| DSIM_TX_REQUEST_HSCLK;
-	DSI_WRITE(dsi, DSIM_CLKCTRL_REG, reg);
+	exynos_dsi_write(dsi, DSIM_CLKCTRL_REG, reg);
 
 	return 0;
 }
@@ -677,7 +713,7 @@ static int exynos_dsi_enable_clock(struct exynos_dsi *dsi)
 static void exynos_dsi_set_phy_ctrl(struct exynos_dsi *dsi)
 {
 	struct exynos_dsi_driver_data *driver_data = dsi->driver_data;
-	unsigned int *reg_values = driver_data->reg_values;
+	const unsigned int *reg_values = driver_data->reg_values;
 	u32 reg;
 
 	if (driver_data->has_freqband)
@@ -686,7 +722,7 @@ static void exynos_dsi_set_phy_ctrl(struct exynos_dsi *dsi)
 	/* B D-PHY: D-PHY Master & Slave Analog Block control */
 	reg = reg_values[PHYCTRL_ULPS_EXIT] | reg_values[PHYCTRL_VREG_LP] |
 		reg_values[PHYCTRL_SLEW_UP];
-	DSI_WRITE(dsi, DSIM_PHYCTRL_REG, reg);
+	exynos_dsi_write(dsi, DSIM_PHYCTRL_REG, reg);
 
 	/*
 	 * T LPX: Transmitted length of any Low-Power state period
@@ -694,7 +730,7 @@ static void exynos_dsi_set_phy_ctrl(struct exynos_dsi *dsi)
 	 *	burst
 	 */
 	reg = reg_values[PHYTIMING_LPX] | reg_values[PHYTIMING_HS_EXIT];
-	DSI_WRITE(dsi, DSIM_PHYTIMING_REG, reg);
+	exynos_dsi_write(dsi, DSIM_PHYTIMING_REG, reg);
 
 	/*
 	 * T CLK-PREPARE: Time that the transmitter drives the Clock Lane LP-00
@@ -714,7 +750,7 @@ static void exynos_dsi_set_phy_ctrl(struct exynos_dsi *dsi)
 		reg_values[PHYTIMING_CLK_POST] |
 		reg_values[PHYTIMING_CLK_TRAIL];
 
-	DSI_WRITE(dsi, DSIM_PHYTIMING1_REG, reg);
+	exynos_dsi_write(dsi, DSIM_PHYTIMING1_REG, reg);
 
 	/*
 	 * T HS-PREPARE: Time that the transmitter drives the Data Lane LP-00
@@ -727,29 +763,29 @@ static void exynos_dsi_set_phy_ctrl(struct exynos_dsi *dsi)
 	 */
 	reg = reg_values[PHYTIMING_HS_PREPARE] | reg_values[PHYTIMING_HS_ZERO] |
 		reg_values[PHYTIMING_HS_TRAIL];
-	DSI_WRITE(dsi, DSIM_PHYTIMING2_REG, reg);
+	exynos_dsi_write(dsi, DSIM_PHYTIMING2_REG, reg);
 }
 
 static void exynos_dsi_disable_clock(struct exynos_dsi *dsi)
 {
 	u32 reg;
 
-	reg = DSI_READ(dsi, DSIM_CLKCTRL_REG);
+	reg = exynos_dsi_read(dsi, DSIM_CLKCTRL_REG);
 	reg &= ~(DSIM_LANE_ESC_CLK_EN_CLK | DSIM_LANE_ESC_CLK_EN_DATA_MASK
 			| DSIM_ESC_CLKEN | DSIM_BYTE_CLKEN);
-	DSI_WRITE(dsi, DSIM_CLKCTRL_REG, reg);
+	exynos_dsi_write(dsi, DSIM_CLKCTRL_REG, reg);
 
-	reg = DSI_READ(dsi, DSIM_PLLCTRL_REG);
+	reg = exynos_dsi_read(dsi, DSIM_PLLCTRL_REG);
 	reg &= ~DSIM_PLL_EN;
-	DSI_WRITE(dsi, DSIM_PLLCTRL_REG, reg);
+	exynos_dsi_write(dsi, DSIM_PLLCTRL_REG, reg);
 }
 
 static void exynos_dsi_enable_lane(struct exynos_dsi *dsi, u32 lane)
 {
-	u32 reg = DSI_READ(dsi, DSIM_CONFIG_REG);
+	u32 reg = exynos_dsi_read(dsi, DSIM_CONFIG_REG);
 	reg |= (DSIM_NUM_OF_DATA_LANE(dsi->lanes - 1) | DSIM_LANE_EN_CLK |
 			DSIM_LANE_EN(lane));
-	DSI_WRITE(dsi, DSIM_CONFIG_REG, reg);
+	exynos_dsi_write(dsi, DSIM_CONFIG_REG, reg);
 }
 
 static int exynos_dsi_init_link(struct exynos_dsi *dsi)
@@ -760,14 +796,14 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi)
 	u32 lanes_mask;
 
 	/* Initialize FIFO pointers */
-	reg = DSI_READ(dsi, DSIM_FIFOCTRL_REG);
+	reg = exynos_dsi_read(dsi, DSIM_FIFOCTRL_REG);
 	reg &= ~0x1f;
-	DSI_WRITE(dsi, DSIM_FIFOCTRL_REG, reg);
+	exynos_dsi_write(dsi, DSIM_FIFOCTRL_REG, reg);
 
 	usleep_range(9000, 11000);
 
 	reg |= 0x1f;
-	DSI_WRITE(dsi, DSIM_FIFOCTRL_REG, reg);
+	exynos_dsi_write(dsi, DSIM_FIFOCTRL_REG, reg);
 	usleep_range(9000, 11000);
 
 	/* DSI configuration */
@@ -836,7 +872,7 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi)
 			dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) {
 		reg |= DSIM_CLKLANE_STOP;
 	}
-	DSI_WRITE(dsi, DSIM_CONFIG_REG, reg);
+	exynos_dsi_write(dsi, DSIM_CONFIG_REG, reg);
 
 	lanes_mask = BIT(dsi->lanes) - 1;
 	exynos_dsi_enable_lane(dsi, lanes_mask);
@@ -849,19 +885,19 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi)
 			return -EFAULT;
 		}
 
-		reg = DSI_READ(dsi, DSIM_STATUS_REG);
+		reg = exynos_dsi_read(dsi, DSIM_STATUS_REG);
 		if ((reg & DSIM_STOP_STATE_DAT(lanes_mask))
 		    != DSIM_STOP_STATE_DAT(lanes_mask))
 			continue;
 	} while (!(reg & (DSIM_STOP_STATE_CLK | DSIM_TX_READY_HS_CLK)));
 
-	reg = DSI_READ(dsi, DSIM_ESCMODE_REG);
+	reg = exynos_dsi_read(dsi, DSIM_ESCMODE_REG);
 	reg &= ~DSIM_STOP_STATE_CNT_MASK;
 	reg |= DSIM_STOP_STATE_CNT(driver_data->reg_values[STOP_STATE_CNT]);
-	DSI_WRITE(dsi, DSIM_ESCMODE_REG, reg);
+	exynos_dsi_write(dsi, DSIM_ESCMODE_REG, reg);
 
 	reg = DSIM_BTA_TIMEOUT(0xff) | DSIM_LPDR_TIMEOUT(0xffff);
-	DSI_WRITE(dsi, DSIM_TIMEOUT_REG, reg);
+	exynos_dsi_write(dsi, DSIM_TIMEOUT_REG, reg);
 
 	return 0;
 }
@@ -876,20 +912,20 @@ static void exynos_dsi_set_display_mode(struct exynos_dsi *dsi)
 		reg = DSIM_CMD_ALLOW(0xf)
 			| DSIM_STABLE_VFP(vm->vfront_porch)
 			| DSIM_MAIN_VBP(vm->vback_porch);
-		DSI_WRITE(dsi, DSIM_MVPORCH_REG, reg);
+		exynos_dsi_write(dsi, DSIM_MVPORCH_REG, reg);
 
 		reg = DSIM_MAIN_HFP(vm->hfront_porch)
 			| DSIM_MAIN_HBP(vm->hback_porch);
-		DSI_WRITE(dsi, DSIM_MHPORCH_REG, reg);
+		exynos_dsi_write(dsi, DSIM_MHPORCH_REG, reg);
 
 		reg = DSIM_MAIN_VSA(vm->vsync_len)
 			| DSIM_MAIN_HSA(vm->hsync_len);
-		DSI_WRITE(dsi, DSIM_MSYNC_REG, reg);
+		exynos_dsi_write(dsi, DSIM_MSYNC_REG, reg);
 	}
 	reg =  DSIM_MAIN_HRESOL(vm->hactive, num_bits_resol) |
 		DSIM_MAIN_VRESOL(vm->vactive, num_bits_resol);
 
-	DSI_WRITE(dsi, DSIM_MDRESOL_REG, reg);
+	exynos_dsi_write(dsi, DSIM_MDRESOL_REG, reg);
 
 	dev_dbg(dsi->dev, "LCD size = %dx%d\n", vm->hactive, vm->vactive);
 }
@@ -898,12 +934,12 @@ static void exynos_dsi_set_display_enable(struct exynos_dsi *dsi, bool enable)
 {
 	u32 reg;
 
-	reg = DSI_READ(dsi, DSIM_MDRESOL_REG);
+	reg = exynos_dsi_read(dsi, DSIM_MDRESOL_REG);
 	if (enable)
 		reg |= DSIM_MAIN_STAND_BY;
 	else
 		reg &= ~DSIM_MAIN_STAND_BY;
-	DSI_WRITE(dsi, DSIM_MDRESOL_REG, reg);
+	exynos_dsi_write(dsi, DSIM_MDRESOL_REG, reg);
 }
 
 static int exynos_dsi_wait_for_hdr_fifo(struct exynos_dsi *dsi)
@@ -911,7 +947,7 @@ static int exynos_dsi_wait_for_hdr_fifo(struct exynos_dsi *dsi)
 	int timeout = 2000;
 
 	do {
-		u32 reg = DSI_READ(dsi, DSIM_FIFOCTRL_REG);
+		u32 reg = exynos_dsi_read(dsi, DSIM_FIFOCTRL_REG);
 
 		if (!(reg & DSIM_SFR_HEADER_FULL))
 			return 0;
@@ -925,34 +961,35 @@ static int exynos_dsi_wait_for_hdr_fifo(struct exynos_dsi *dsi)
 
 static void exynos_dsi_set_cmd_lpm(struct exynos_dsi *dsi, bool lpm)
 {
-	u32 v = DSI_READ(dsi, DSIM_ESCMODE_REG);
+	u32 v = exynos_dsi_read(dsi, DSIM_ESCMODE_REG);
 
 	if (lpm)
 		v |= DSIM_CMD_LPDT_LP;
 	else
 		v &= ~DSIM_CMD_LPDT_LP;
 
-	DSI_WRITE(dsi, DSIM_ESCMODE_REG, v);
+	exynos_dsi_write(dsi, DSIM_ESCMODE_REG, v);
 }
 
 static void exynos_dsi_force_bta(struct exynos_dsi *dsi)
 {
-	u32 v = DSI_READ(dsi, DSIM_ESCMODE_REG);
+	u32 v = exynos_dsi_read(dsi, DSIM_ESCMODE_REG);
 	v |= DSIM_FORCE_BTA;
-	DSI_WRITE(dsi, DSIM_ESCMODE_REG, v);
+	exynos_dsi_write(dsi, DSIM_ESCMODE_REG, v);
 }
 
 static void exynos_dsi_send_to_fifo(struct exynos_dsi *dsi,
 					struct exynos_dsi_transfer *xfer)
 {
 	struct device *dev = dsi->dev;
-	const u8 *payload = xfer->tx_payload + xfer->tx_done;
-	u16 length = xfer->tx_len - xfer->tx_done;
+	struct mipi_dsi_packet *pkt = &xfer->packet;
+	const u8 *payload = pkt->payload + xfer->tx_done;
+	u16 length = pkt->payload_length - xfer->tx_done;
 	bool first = !xfer->tx_done;
 	u32 reg;
 
 	dev_dbg(dev, "< xfer %p: tx len %u, done %u, rx len %u, done %u\n",
-		xfer, xfer->tx_len, xfer->tx_done, xfer->rx_len, xfer->rx_done);
+		xfer, length, xfer->tx_done, xfer->rx_len, xfer->rx_done);
 
 	if (length > DSI_TX_FIFO_SIZE)
 		length = DSI_TX_FIFO_SIZE;
@@ -961,9 +998,8 @@ static void exynos_dsi_send_to_fifo(struct exynos_dsi *dsi,
 
 	/* Send payload */
 	while (length >= 4) {
-		reg = (payload[3] << 24) | (payload[2] << 16)
-					| (payload[1] << 8) | payload[0];
-		DSI_WRITE(dsi, DSIM_PAYLOAD_REG, reg);
+		reg = get_unaligned_le32(payload);
+		exynos_dsi_write(dsi, DSIM_PAYLOAD_REG, reg);
 		payload += 4;
 		length -= 4;
 	}
@@ -978,10 +1014,7 @@ static void exynos_dsi_send_to_fifo(struct exynos_dsi *dsi,
 		/* Fall through */
 	case 1:
 		reg |= payload[0];
-		DSI_WRITE(dsi, DSIM_PAYLOAD_REG, reg);
-		break;
-	case 0:
-		/* Do nothing */
+		exynos_dsi_write(dsi, DSIM_PAYLOAD_REG, reg);
 		break;
 	}
 
@@ -989,7 +1022,7 @@ static void exynos_dsi_send_to_fifo(struct exynos_dsi *dsi,
 	if (!first)
 		return;
 
-	reg = (xfer->data[1] << 16) | (xfer->data[0] << 8) | xfer->data_id;
+	reg = get_unaligned_le32(pkt->header);
 	if (exynos_dsi_wait_for_hdr_fifo(dsi)) {
 		dev_err(dev, "waiting for header FIFO timed out\n");
 		return;
@@ -1001,7 +1034,7 @@ static void exynos_dsi_send_to_fifo(struct exynos_dsi *dsi,
 		dsi->state ^= DSIM_STATE_CMD_LPM;
 	}
 
-	DSI_WRITE(dsi, DSIM_PKTHDR_REG, reg);
+	exynos_dsi_write(dsi, DSIM_PKTHDR_REG, reg);
 
 	if (xfer->flags & MIPI_DSI_MSG_REQ_ACK)
 		exynos_dsi_force_bta(dsi);
@@ -1017,7 +1050,7 @@ static void exynos_dsi_read_from_fifo(struct exynos_dsi *dsi,
 	u32 reg;
 
 	if (first) {
-		reg = DSI_READ(dsi, DSIM_RXFIFO_REG);
+		reg = exynos_dsi_read(dsi, DSIM_RXFIFO_REG);
 
 		switch (reg & 0x3f) {
 		case MIPI_DSI_RX_GENERIC_SHORT_READ_RESPONSE_2BYTE:
@@ -1056,7 +1089,7 @@ static void exynos_dsi_read_from_fifo(struct exynos_dsi *dsi,
 
 	/* Receive payload */
 	while (length >= 4) {
-		reg = DSI_READ(dsi, DSIM_RXFIFO_REG);
+		reg = exynos_dsi_read(dsi, DSIM_RXFIFO_REG);
 		payload[0] = (reg >>  0) & 0xff;
 		payload[1] = (reg >>  8) & 0xff;
 		payload[2] = (reg >> 16) & 0xff;
@@ -1066,7 +1099,7 @@ static void exynos_dsi_read_from_fifo(struct exynos_dsi *dsi,
 	}
 
 	if (length) {
-		reg = DSI_READ(dsi, DSIM_RXFIFO_REG);
+		reg = exynos_dsi_read(dsi, DSIM_RXFIFO_REG);
 		switch (length) {
 		case 3:
 			payload[2] = (reg >> 16) & 0xff;
@@ -1085,7 +1118,7 @@ static void exynos_dsi_read_from_fifo(struct exynos_dsi *dsi,
 clear_fifo:
 	length = DSI_RX_FIFO_SIZE / 4;
 	do {
-		reg = DSI_READ(dsi, DSIM_RXFIFO_REG);
+		reg = exynos_dsi_read(dsi, DSIM_RXFIFO_REG);
 		if (reg == DSI_RX_FIFO_EMPTY)
 			break;
 	} while (--length);
@@ -1110,13 +1143,14 @@ again:
 
 	spin_unlock_irqrestore(&dsi->transfer_lock, flags);
 
-	if (xfer->tx_len && xfer->tx_done == xfer->tx_len)
+	if (xfer->packet.payload_length &&
+	    xfer->tx_done == xfer->packet.payload_length)
 		/* waiting for RX */
 		return;
 
 	exynos_dsi_send_to_fifo(dsi, xfer);
 
-	if (xfer->tx_len || xfer->rx_len)
+	if (xfer->packet.payload_length || xfer->rx_len)
 		return;
 
 	xfer->result = 0;
@@ -1152,10 +1186,11 @@ static bool exynos_dsi_transfer_finish(struct exynos_dsi *dsi)
 	spin_unlock_irqrestore(&dsi->transfer_lock, flags);
 
 	dev_dbg(dsi->dev,
-		"> xfer %p, tx_len %u, tx_done %u, rx_len %u, rx_done %u\n",
-		xfer, xfer->tx_len, xfer->tx_done, xfer->rx_len, xfer->rx_done);
+		"> xfer %p, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n",
+		xfer, xfer->packet.payload_length, xfer->tx_done, xfer->rx_len,
+		xfer->rx_done);
 
-	if (xfer->tx_done != xfer->tx_len)
+	if (xfer->tx_done != xfer->packet.payload_length)
 		return true;
 
 	if (xfer->rx_done != xfer->rx_len)
@@ -1226,9 +1261,10 @@ static int exynos_dsi_transfer(struct exynos_dsi *dsi,
 	wait_for_completion_timeout(&xfer->completed,
 				    msecs_to_jiffies(DSI_XFER_TIMEOUT_MS));
 	if (xfer->result == -ETIMEDOUT) {
+		struct mipi_dsi_packet *pkt = &xfer->packet;
 		exynos_dsi_remove_transfer(dsi, xfer);
-		dev_err(dsi->dev, "xfer timed out: %*ph %*ph\n", 2, xfer->data,
-			xfer->tx_len, xfer->tx_payload);
+		dev_err(dsi->dev, "xfer timed out: %*ph %*ph\n", 4, pkt->header,
+			(int)pkt->payload_length, pkt->payload);
 		return -ETIMEDOUT;
 	}
 
@@ -1241,20 +1277,20 @@ static irqreturn_t exynos_dsi_irq(int irq, void *dev_id)
 	struct exynos_dsi *dsi = dev_id;
 	u32 status;
 
-	status = DSI_READ(dsi, DSIM_INTSRC_REG);
+	status = exynos_dsi_read(dsi, DSIM_INTSRC_REG);
 	if (!status) {
 		static unsigned long int j;
 		if (printk_timed_ratelimit(&j, 500))
 			dev_warn(dsi->dev, "spurious interrupt\n");
 		return IRQ_HANDLED;
 	}
-	DSI_WRITE(dsi, DSIM_INTSRC_REG, status);
+	exynos_dsi_write(dsi, DSIM_INTSRC_REG, status);
 
 	if (status & DSIM_INT_SW_RST_RELEASE) {
 		u32 mask = ~(DSIM_INT_RX_DONE | DSIM_INT_SFR_FIFO_EMPTY |
 			DSIM_INT_SFR_HDR_FIFO_EMPTY | DSIM_INT_FRAME_DONE |
 			DSIM_INT_RX_ECC_ERR | DSIM_INT_SW_RST_RELEASE);
-		DSI_WRITE(dsi, DSIM_INTMSK_REG, mask);
+		exynos_dsi_write(dsi, DSIM_INTMSK_REG, mask);
 		complete(&dsi->completed);
 		return IRQ_HANDLED;
 	}
@@ -1401,12 +1437,6 @@ static int exynos_dsi_host_detach(struct mipi_dsi_host *host,
 	return 0;
 }
 
-/* distinguish between short and long DSI packet types */
-static bool exynos_dsi_is_short_dsi_type(u8 type)
-{
-	return (type & 0x0f) <= 8;
-}
-
 static ssize_t exynos_dsi_host_transfer(struct mipi_dsi_host *host,
 				        const struct mipi_dsi_msg *msg)
 {
@@ -1424,25 +1454,9 @@ static ssize_t exynos_dsi_host_transfer(struct mipi_dsi_host *host,
 		dsi->state |= DSIM_STATE_INITIALIZED;
 	}
 
-	if (msg->tx_len == 0)
-		return -EINVAL;
-
-	xfer.data_id = msg->type | (msg->channel << 6);
-
-	if (exynos_dsi_is_short_dsi_type(msg->type)) {
-		const char *tx_buf = msg->tx_buf;
-
-		if (msg->tx_len > 2)
-			return -EINVAL;
-		xfer.tx_len = 0;
-		xfer.data[0] = tx_buf[0];
-		xfer.data[1] = (msg->tx_len == 2) ? tx_buf[1] : 0;
-	} else {
-		xfer.tx_len = msg->tx_len;
-		xfer.data[0] = msg->tx_len & 0xff;
-		xfer.data[1] = msg->tx_len >> 8;
-		xfer.tx_payload = msg->tx_buf;
-	}
+	ret = mipi_dsi_create_packet(&xfer.packet, msg);
+	if (ret < 0)
+		return ret;
 
 	xfer.rx_len = msg->rx_len;
 	xfer.rx_payload = msg->rx_buf;
@@ -1597,13 +1611,6 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder)
 	return 0;
 }
 
-static bool exynos_dsi_mode_fixup(struct drm_encoder *encoder,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void exynos_dsi_mode_set(struct drm_encoder *encoder,
 				struct drm_display_mode *mode,
 				struct drm_display_mode *adjusted_mode)
@@ -1623,7 +1630,6 @@ static void exynos_dsi_mode_set(struct drm_encoder *encoder,
 }
 
 static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = {
-	.mode_fixup = exynos_dsi_mode_fixup,
 	.mode_set = exynos_dsi_mode_set,
 	.enable = exynos_dsi_enable,
 	.disable = exynos_dsi_disable,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index 8baabd813ff5..4ae860c44f1d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -50,7 +50,7 @@ static int exynos_drm_fb_mmap(struct fb_info *info,
 	if (vm_size > exynos_gem->size)
 		return -EINVAL;
 
-	ret = dma_mmap_attrs(helper->dev->dev, vma, exynos_gem->cookie,
+	ret = dma_mmap_attrs(to_dma_dev(helper->dev), vma, exynos_gem->cookie,
 			     exynos_gem->dma_addr, exynos_gem->size,
 			     &exynos_gem->dma_attrs);
 	if (ret < 0) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 8a4f4a0211d0..0525c56145db 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -163,7 +163,6 @@ struct fimc_context {
 	u32		clk_frequency;
 	struct regmap	*sysreg;
 	struct fimc_scaler	sc;
-	struct exynos_drm_ipp_pol	pol;
 	int	id;
 	int	irq;
 	bool	suspended;
@@ -260,32 +259,6 @@ static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb)
 	fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
 }
 
-static void fimc_set_polarity(struct fimc_context *ctx,
-		struct exynos_drm_ipp_pol *pol)
-{
-	u32 cfg;
-
-	DRM_DEBUG_KMS("inv_pclk[%d]inv_vsync[%d]\n",
-		pol->inv_pclk, pol->inv_vsync);
-	DRM_DEBUG_KMS("inv_href[%d]inv_hsync[%d]\n",
-		pol->inv_href, pol->inv_hsync);
-
-	cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
-	cfg &= ~(EXYNOS_CIGCTRL_INVPOLPCLK | EXYNOS_CIGCTRL_INVPOLVSYNC |
-		 EXYNOS_CIGCTRL_INVPOLHREF | EXYNOS_CIGCTRL_INVPOLHSYNC);
-
-	if (pol->inv_pclk)
-		cfg |= EXYNOS_CIGCTRL_INVPOLPCLK;
-	if (pol->inv_vsync)
-		cfg |= EXYNOS_CIGCTRL_INVPOLVSYNC;
-	if (pol->inv_href)
-		cfg |= EXYNOS_CIGCTRL_INVPOLHREF;
-	if (pol->inv_hsync)
-		cfg |= EXYNOS_CIGCTRL_INVPOLHSYNC;
-
-	fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
-}
-
 static void fimc_handle_jpeg(struct fimc_context *ctx, bool enable)
 {
 	u32 cfg;
@@ -1467,7 +1440,6 @@ static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
 	/* If set ture, we can save jpeg about screen */
 	fimc_handle_jpeg(ctx, false);
 	fimc_set_scaler(ctx, &ctx->sc);
-	fimc_set_polarity(ctx, &ctx->pol);
 
 	switch (cmd) {
 	case IPP_CMD_M2M:
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 70194d0e4fe4..51d484ae9f49 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -94,12 +94,14 @@ struct fimd_driver_data {
 	unsigned int lcdblk_offset;
 	unsigned int lcdblk_vt_shift;
 	unsigned int lcdblk_bypass_shift;
+	unsigned int lcdblk_mic_bypass_shift;
 
 	unsigned int has_shadowcon:1;
 	unsigned int has_clksel:1;
 	unsigned int has_limited_fmt:1;
 	unsigned int has_vidoutcon:1;
 	unsigned int has_vtsel:1;
+	unsigned int has_mic_bypass:1;
 };
 
 static struct fimd_driver_data s3c64xx_fimd_driver_data = {
@@ -145,6 +147,18 @@ static struct fimd_driver_data exynos5_fimd_driver_data = {
 	.has_vtsel = 1,
 };
 
+static struct fimd_driver_data exynos5420_fimd_driver_data = {
+	.timing_base = 0x20000,
+	.lcdblk_offset = 0x214,
+	.lcdblk_vt_shift = 24,
+	.lcdblk_bypass_shift = 15,
+	.lcdblk_mic_bypass_shift = 11,
+	.has_shadowcon = 1,
+	.has_vidoutcon = 1,
+	.has_vtsel = 1,
+	.has_mic_bypass = 1,
+};
+
 struct fimd_context {
 	struct device			*dev;
 	struct drm_device		*drm_dev;
@@ -168,7 +182,6 @@ struct fimd_context {
 	atomic_t			win_updated;
 	atomic_t			triggering;
 
-	struct exynos_drm_panel_info panel;
 	struct fimd_driver_data *driver_data;
 	struct drm_encoder *encoder;
 };
@@ -184,6 +197,8 @@ static const struct of_device_id fimd_driver_dt_match[] = {
 	  .data = &exynos4415_fimd_driver_data },
 	{ .compatible = "samsung,exynos5250-fimd",
 	  .data = &exynos5_fimd_driver_data },
+	{ .compatible = "samsung,exynos5420-fimd",
+	  .data = &exynos5420_fimd_driver_data },
 	{},
 };
 MODULE_DEVICE_TABLE(of, fimd_driver_dt_match);
@@ -380,7 +395,7 @@ static u32 fimd_calc_clkdiv(struct fimd_context *ctx,
 	}
 
 	/* Find the clock divider value that gets us closest to ideal_clk */
-	clkdiv = DIV_ROUND_UP(clk_get_rate(ctx->lcd_clk), ideal_clk);
+	clkdiv = DIV_ROUND_CLOSEST(clk_get_rate(ctx->lcd_clk), ideal_clk);
 
 	return (clkdiv < 0x100) ? clkdiv : 0xff;
 }
@@ -461,6 +476,18 @@ static void fimd_commit(struct exynos_drm_crtc *crtc)
 		return;
 	}
 
+	/* TODO: When MIC is enabled for display path, the lcdblk_mic_bypass
+	 * bit should be cleared.
+	 */
+	if (driver_data->has_mic_bypass && ctx->sysreg &&
+	    regmap_update_bits(ctx->sysreg,
+				driver_data->lcdblk_offset,
+				0x1 << driver_data->lcdblk_mic_bypass_shift,
+				0x1 << driver_data->lcdblk_mic_bypass_shift)) {
+		DRM_ERROR("Failed to update sysreg for bypass mic.\n");
+		return;
+	}
+
 	/* setup horizontal and vertical display size. */
 	val = VIDTCON2_LINEVAL(mode->vdisplay - 1) |
 	       VIDTCON2_HOZVAL(mode->hdisplay - 1) |
@@ -861,7 +888,8 @@ static void fimd_dp_clock_enable(struct exynos_drm_crtc *crtc, bool enable)
 	 * clock. On these SoCs the bootloader may enable it but any
 	 * power domain off/on will reset it to disable state.
 	 */
-	if (ctx->driver_data != &exynos5_fimd_driver_data)
+	if (ctx->driver_data != &exynos5_fimd_driver_data ||
+	    ctx->driver_data != &exynos5420_fimd_driver_data)
 		return;
 
 	val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 8dfe6e113a88..193d3602dffb 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -259,7 +259,7 @@ static int g2d_init_cmdlist(struct g2d_data *g2d)
 	init_dma_attrs(&g2d->cmdlist_dma_attrs);
 	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &g2d->cmdlist_dma_attrs);
 
-	g2d->cmdlist_pool_virt = dma_alloc_attrs(subdrv->drm_dev->dev,
+	g2d->cmdlist_pool_virt = dma_alloc_attrs(to_dma_dev(subdrv->drm_dev),
 						G2D_CMDLIST_POOL_SIZE,
 						&g2d->cmdlist_pool, GFP_KERNEL,
 						&g2d->cmdlist_dma_attrs);
@@ -293,7 +293,7 @@ static int g2d_init_cmdlist(struct g2d_data *g2d)
 	return 0;
 
 err:
-	dma_free_attrs(subdrv->drm_dev->dev, G2D_CMDLIST_POOL_SIZE,
+	dma_free_attrs(to_dma_dev(subdrv->drm_dev), G2D_CMDLIST_POOL_SIZE,
 			g2d->cmdlist_pool_virt,
 			g2d->cmdlist_pool, &g2d->cmdlist_dma_attrs);
 	return ret;
@@ -306,7 +306,8 @@ static void g2d_fini_cmdlist(struct g2d_data *g2d)
 	kfree(g2d->cmdlist_node);
 
 	if (g2d->cmdlist_pool_virt && g2d->cmdlist_pool) {
-		dma_free_attrs(subdrv->drm_dev->dev, G2D_CMDLIST_POOL_SIZE,
+		dma_free_attrs(to_dma_dev(subdrv->drm_dev),
+				G2D_CMDLIST_POOL_SIZE,
 				g2d->cmdlist_pool_virt,
 				g2d->cmdlist_pool, &g2d->cmdlist_dma_attrs);
 	}
@@ -880,7 +881,6 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no)
 	struct g2d_runqueue_node *runqueue_node = g2d->runqueue_node;
 	struct drm_exynos_pending_g2d_event *e;
 	struct timeval now;
-	unsigned long flags;
 
 	if (list_empty(&runqueue_node->event_list))
 		return;
@@ -893,10 +893,7 @@ static void g2d_finish_event(struct g2d_data *g2d, u32 cmdlist_no)
 	e->event.tv_usec = now.tv_usec;
 	e->event.cmdlist_no = cmdlist_no;
 
-	spin_lock_irqsave(&drm_dev->event_lock, flags);
-	list_move_tail(&e->base.link, &e->base.file_priv->event_list);
-	wake_up_interruptible(&e->base.file_priv->event_wait);
-	spin_unlock_irqrestore(&drm_dev->event_lock, flags);
+	drm_send_event(drm_dev, &e->base);
 }
 
 static irqreturn_t g2d_irq_handler(int irq, void *dev_id)
@@ -1072,7 +1069,6 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data,
 	struct drm_exynos_pending_g2d_event *e;
 	struct g2d_cmdlist_node *node;
 	struct g2d_cmdlist *cmdlist;
-	unsigned long flags;
 	int size;
 	int ret;
 
@@ -1094,21 +1090,8 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data,
 	node->event = NULL;
 
 	if (req->event_type != G2D_EVENT_NOT) {
-		spin_lock_irqsave(&drm_dev->event_lock, flags);
-		if (file->event_space < sizeof(e->event)) {
-			spin_unlock_irqrestore(&drm_dev->event_lock, flags);
-			ret = -ENOMEM;
-			goto err;
-		}
-		file->event_space -= sizeof(e->event);
-		spin_unlock_irqrestore(&drm_dev->event_lock, flags);
-
 		e = kzalloc(sizeof(*node->event), GFP_KERNEL);
 		if (!e) {
-			spin_lock_irqsave(&drm_dev->event_lock, flags);
-			file->event_space += sizeof(e->event);
-			spin_unlock_irqrestore(&drm_dev->event_lock, flags);
-
 			ret = -ENOMEM;
 			goto err;
 		}
@@ -1116,9 +1099,12 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data,
 		e->event.base.type = DRM_EXYNOS_G2D_EVENT;
 		e->event.base.length = sizeof(e->event);
 		e->event.user_data = req->user_data;
-		e->base.event = &e->event.base;
-		e->base.file_priv = file;
-		e->base.destroy = (void (*) (struct drm_pending_event *)) kfree;
+
+		ret = drm_event_reserve_init(drm_dev, file, &e->base, &e->event.base);
+		if (ret) {
+			kfree(e);
+			goto err;
+		}
 
 		node->event = e;
 	}
@@ -1220,12 +1206,8 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data,
 err_unmap:
 	g2d_unmap_cmdlist_gem(g2d, node, file);
 err_free_event:
-	if (node->event) {
-		spin_lock_irqsave(&drm_dev->event_lock, flags);
-		file->event_space += sizeof(e->event);
-		spin_unlock_irqrestore(&drm_dev->event_lock, flags);
-		kfree(node->event);
-	}
+	if (node->event)
+		drm_event_cancel_free(drm_dev, &node->event->base);
 err:
 	g2d_put_cmdlist(g2d, node);
 	return ret;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 26b5e4bd55b6..2914d62d0d80 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -65,7 +65,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem)
 		return -ENOMEM;
 	}
 
-	exynos_gem->cookie = dma_alloc_attrs(dev->dev, exynos_gem->size,
+	exynos_gem->cookie = dma_alloc_attrs(to_dma_dev(dev), exynos_gem->size,
 					     &exynos_gem->dma_addr, GFP_KERNEL,
 					     &exynos_gem->dma_attrs);
 	if (!exynos_gem->cookie) {
@@ -73,7 +73,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem)
 		goto err_free;
 	}
 
-	ret = dma_get_sgtable_attrs(dev->dev, &sgt, exynos_gem->cookie,
+	ret = dma_get_sgtable_attrs(to_dma_dev(dev), &sgt, exynos_gem->cookie,
 				    exynos_gem->dma_addr, exynos_gem->size,
 				    &exynos_gem->dma_attrs);
 	if (ret < 0) {
@@ -98,7 +98,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem)
 err_sgt_free:
 	sg_free_table(&sgt);
 err_dma_free:
-	dma_free_attrs(dev->dev, exynos_gem->size, exynos_gem->cookie,
+	dma_free_attrs(to_dma_dev(dev), exynos_gem->size, exynos_gem->cookie,
 		       exynos_gem->dma_addr, &exynos_gem->dma_attrs);
 err_free:
 	drm_free_large(exynos_gem->pages);
@@ -118,7 +118,7 @@ static void exynos_drm_free_buf(struct exynos_drm_gem *exynos_gem)
 	DRM_DEBUG_KMS("dma_addr(0x%lx), size(0x%lx)\n",
 			(unsigned long)exynos_gem->dma_addr, exynos_gem->size);
 
-	dma_free_attrs(dev->dev, exynos_gem->size, exynos_gem->cookie,
+	dma_free_attrs(to_dma_dev(dev), exynos_gem->size, exynos_gem->cookie,
 			(dma_addr_t)exynos_gem->dma_addr,
 			&exynos_gem->dma_attrs);
 
@@ -280,6 +280,15 @@ int exynos_drm_gem_create_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+int exynos_drm_gem_map_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv)
+{
+	struct drm_exynos_gem_map *args = data;
+
+	return exynos_drm_gem_dumb_map_offset(file_priv, dev, args->handle,
+					      &args->offset);
+}
+
 dma_addr_t *exynos_drm_gem_get_dma_addr(struct drm_device *dev,
 					unsigned int gem_handle,
 					struct drm_file *filp)
@@ -335,7 +344,7 @@ static int exynos_drm_gem_mmap_buffer(struct exynos_drm_gem *exynos_gem,
 	if (vm_size > exynos_gem->size)
 		return -EINVAL;
 
-	ret = dma_mmap_attrs(drm_dev->dev, vma, exynos_gem->cookie,
+	ret = dma_mmap_attrs(to_dma_dev(drm_dev), vma, exynos_gem->cookie,
 			     exynos_gem->dma_addr, exynos_gem->size,
 			     &exynos_gem->dma_attrs);
 	if (ret < 0) {
@@ -381,7 +390,7 @@ int exynos_gem_map_sgt_with_dma(struct drm_device *drm_dev,
 
 	mutex_lock(&drm_dev->struct_mutex);
 
-	nents = dma_map_sg(drm_dev->dev, sgt->sgl, sgt->nents, dir);
+	nents = dma_map_sg(to_dma_dev(drm_dev), sgt->sgl, sgt->nents, dir);
 	if (!nents) {
 		DRM_ERROR("failed to map sgl with dma.\n");
 		mutex_unlock(&drm_dev->struct_mutex);
@@ -396,7 +405,7 @@ void exynos_gem_unmap_sgt_from_dma(struct drm_device *drm_dev,
 				struct sg_table *sgt,
 				enum dma_data_direction dir)
 {
-	dma_unmap_sg(drm_dev->dev, sgt->sgl, sgt->nents, dir);
+	dma_unmap_sg(to_dma_dev(drm_dev), sgt->sgl, sgt->nents, dir);
 }
 
 void exynos_drm_gem_free_object(struct drm_gem_object *obj)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 9ca5047959ec..00223052b87b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -71,6 +71,10 @@ struct exynos_drm_gem *exynos_drm_gem_create(struct drm_device *dev,
 int exynos_drm_gem_create_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 
+/* get fake-offset of gem object that can be used with mmap. */
+int exynos_drm_gem_map_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv);
+
 /*
  * get dma address from gem handle and this function could be used for
  * other drivers such as 2d/3d acceleration drivers.
diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.c b/drivers/gpu/drm/exynos/exynos_drm_iommu.c
index d73b9ad35b7a..7ca09ee19656 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_iommu.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.c
@@ -9,7 +9,7 @@
  * option) any later version.
  */
 
-#include <drmP.h>
+#include <drm/drmP.h>
 #include <drm/exynos_drm.h>
 
 #include <linux/dma-mapping.h>
@@ -30,7 +30,6 @@ int drm_create_iommu_mapping(struct drm_device *drm_dev)
 {
 	struct dma_iommu_mapping *mapping = NULL;
 	struct exynos_drm_private *priv = drm_dev->dev_private;
-	struct device *dev = drm_dev->dev;
 
 	if (!priv->da_start)
 		priv->da_start = EXYNOS_DEV_ADDR_START;
@@ -43,18 +42,9 @@ int drm_create_iommu_mapping(struct drm_device *drm_dev)
 	if (IS_ERR(mapping))
 		return PTR_ERR(mapping);
 
-	dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms),
-					GFP_KERNEL);
-	if (!dev->dma_parms)
-		goto error;
-
-	dma_set_max_seg_size(dev, 0xffffffffu);
-	dev->archdata.mapping = mapping;
+	priv->mapping = mapping;
 
 	return 0;
-error:
-	arm_iommu_release_mapping(mapping);
-	return -ENOMEM;
 }
 
 /*
@@ -67,9 +57,9 @@ error:
  */
 void drm_release_iommu_mapping(struct drm_device *drm_dev)
 {
-	struct device *dev = drm_dev->dev;
+	struct exynos_drm_private *priv = drm_dev->dev_private;
 
-	arm_iommu_release_mapping(dev->archdata.mapping);
+	arm_iommu_release_mapping(priv->mapping);
 }
 
 /*
@@ -84,10 +74,10 @@ void drm_release_iommu_mapping(struct drm_device *drm_dev)
 int drm_iommu_attach_device(struct drm_device *drm_dev,
 				struct device *subdrv_dev)
 {
-	struct device *dev = drm_dev->dev;
+	struct exynos_drm_private *priv = drm_dev->dev_private;
 	int ret;
 
-	if (!dev->archdata.mapping)
+	if (!priv->mapping)
 		return 0;
 
 	subdrv_dev->dma_parms = devm_kzalloc(subdrv_dev,
@@ -101,23 +91,12 @@ int drm_iommu_attach_device(struct drm_device *drm_dev,
 	if (subdrv_dev->archdata.mapping)
 		arm_iommu_detach_device(subdrv_dev);
 
-	ret = arm_iommu_attach_device(subdrv_dev, dev->archdata.mapping);
+	ret = arm_iommu_attach_device(subdrv_dev, priv->mapping);
 	if (ret < 0) {
 		DRM_DEBUG_KMS("failed iommu attach.\n");
 		return ret;
 	}
 
-	/*
-	 * Set dma_ops to drm_device just one time.
-	 *
-	 * The dma mapping api needs device object and the api is used
-	 * to allocate physial memory and map it with iommu table.
-	 * If iommu attach succeeded, the sub driver would have dma_ops
-	 * for iommu and also all sub drivers have same dma_ops.
-	 */
-	if (get_dma_ops(dev) == get_dma_ops(NULL))
-		set_dma_ops(dev, get_dma_ops(subdrv_dev));
-
 	return 0;
 }
 
@@ -133,8 +112,8 @@ int drm_iommu_attach_device(struct drm_device *drm_dev,
 void drm_iommu_detach_device(struct drm_device *drm_dev,
 				struct device *subdrv_dev)
 {
-	struct device *dev = drm_dev->dev;
-	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
+	struct exynos_drm_private *priv = drm_dev->dev_private;
+	struct dma_iommu_mapping *mapping = priv->mapping;
 
 	if (!mapping || !mapping->domain)
 		return;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
index dc1b5441f491..5ffebe02ee4d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
@@ -29,9 +29,9 @@ void drm_iommu_detach_device(struct drm_device *dev_dev,
 
 static inline bool is_drm_iommu_supported(struct drm_device *drm_dev)
 {
-	struct device *dev = drm_dev->dev;
+	struct exynos_drm_private *priv = drm_dev->dev_private;
 
-	return dev->archdata.mapping ? true : false;
+	return priv->mapping ? true : false;
 }
 
 #else
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
index 95eeb9116f10..9c84ee76f18a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -618,27 +618,18 @@ static void ipp_clean_mem_nodes(struct drm_device *drm_dev,
 	mutex_unlock(&c_node->mem_lock);
 }
 
-static void ipp_free_event(struct drm_pending_event *event)
-{
-	kfree(event);
-}
-
 static int ipp_get_event(struct drm_device *drm_dev,
 		struct drm_exynos_ipp_cmd_node *c_node,
 		struct drm_exynos_ipp_queue_buf *qbuf)
 {
 	struct drm_exynos_ipp_send_event *e;
-	unsigned long flags;
+	int ret;
 
 	DRM_DEBUG_KMS("ops_id[%d]buf_id[%d]\n", qbuf->ops_id, qbuf->buf_id);
 
 	e = kzalloc(sizeof(*e), GFP_KERNEL);
-	if (!e) {
-		spin_lock_irqsave(&drm_dev->event_lock, flags);
-		c_node->filp->event_space += sizeof(e->event);
-		spin_unlock_irqrestore(&drm_dev->event_lock, flags);
+	if (!e)
 		return -ENOMEM;
-	}
 
 	/* make event */
 	e->event.base.type = DRM_EXYNOS_IPP_EVENT;
@@ -646,9 +637,13 @@ static int ipp_get_event(struct drm_device *drm_dev,
 	e->event.user_data = qbuf->user_data;
 	e->event.prop_id = qbuf->prop_id;
 	e->event.buf_id[EXYNOS_DRM_OPS_DST] = qbuf->buf_id;
-	e->base.event = &e->event.base;
-	e->base.file_priv = c_node->filp;
-	e->base.destroy = ipp_free_event;
+
+	ret = drm_event_reserve_init(drm_dev, c_node->filp, &e->base, &e->event.base);
+	if (ret) {
+		kfree(e);
+		return ret;
+	}
+
 	mutex_lock(&c_node->event_lock);
 	list_add_tail(&e->base.link, &c_node->event_list);
 	mutex_unlock(&c_node->event_lock);
@@ -1412,7 +1407,6 @@ static int ipp_send_event(struct exynos_drm_ippdrv *ippdrv,
 	struct drm_exynos_ipp_send_event *e;
 	struct list_head *head;
 	struct timeval now;
-	unsigned long flags;
 	u32 tbuf_id[EXYNOS_DRM_OPS_MAX] = {0, };
 	int ret, i;
 
@@ -1525,10 +1519,7 @@ static int ipp_send_event(struct exynos_drm_ippdrv *ippdrv,
 	for_each_ipp_ops(i)
 		e->event.buf_id[i] = tbuf_id[i];
 
-	spin_lock_irqsave(&drm_dev->event_lock, flags);
-	list_move_tail(&e->base.link, &e->base.file_priv->event_list);
-	wake_up_interruptible(&e->base.file_priv->event_wait);
-	spin_unlock_irqrestore(&drm_dev->event_lock, flags);
+	drm_send_event(drm_dev, &e->base);
 	mutex_unlock(&c_node->event_lock);
 
 	DRM_DEBUG_KMS("done cmd[%d]prop_id[%d]buf_id[%d]\n",
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index ce59f4443394..f18fbe43f55f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -20,7 +20,6 @@
 #include <drm/drmP.h>
 #include <drm/exynos_drm.h>
 #include "regs-rotator.h"
-#include "exynos_drm.h"
 #include "exynos_drm_drv.h"
 #include "exynos_drm_ipp.h"
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index b605bd7395ec..608b0afa337f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -412,13 +412,6 @@ static int vidi_create_connector(struct drm_encoder *encoder)
 	return 0;
 }
 
-static bool exynos_vidi_mode_fixup(struct drm_encoder *encoder,
-				 const struct drm_display_mode *mode,
-				 struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void exynos_vidi_mode_set(struct drm_encoder *encoder,
 			       struct drm_display_mode *mode,
 			       struct drm_display_mode *adjusted_mode)
@@ -434,7 +427,6 @@ static void exynos_vidi_disable(struct drm_encoder *encoder)
 }
 
 static const struct drm_encoder_helper_funcs exynos_vidi_encoder_helper_funcs = {
-	.mode_fixup = exynos_vidi_mode_fixup,
 	.mode_set = exynos_vidi_mode_set,
 	.enable = exynos_vidi_enable,
 	.disable = exynos_vidi_disable,
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 21a29dbce18c..e148d728e28c 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -867,10 +867,8 @@ static void hdmi_reg_infoframe(struct hdmi_context *hdata,
 {
 	u32 hdr_sum;
 	u8 chksum;
-	u32 mod;
 	u8 ar;
 
-	mod = hdmi_reg_read(hdata, HDMI_MODE_SEL);
 	if (hdata->dvi_mode) {
 		hdmi_reg_writeb(hdata, HDMI_VSI_CON,
 				HDMI_VSI_CON_DO_NOT_TRANSMIT);
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
index d8ab8f0af10c..4ed7798533f9 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
@@ -42,41 +42,24 @@ static void fsl_dcu_drm_disable_crtc(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
-	int ret;
 
-	ret = regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
-				 DCU_MODE_DCU_MODE_MASK,
-				 DCU_MODE_DCU_MODE(DCU_MODE_OFF));
-	if (ret)
-		dev_err(fsl_dev->dev, "Disable CRTC failed\n");
-	ret = regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-			   DCU_UPDATE_MODE_READREG);
-	if (ret)
-		dev_err(fsl_dev->dev, "Enable CRTC failed\n");
+	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
+			   DCU_MODE_DCU_MODE_MASK,
+			   DCU_MODE_DCU_MODE(DCU_MODE_OFF));
+	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
+		     DCU_UPDATE_MODE_READREG);
 }
 
 static void fsl_dcu_drm_crtc_enable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
-	int ret;
-
-	ret = regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
-				 DCU_MODE_DCU_MODE_MASK,
-				 DCU_MODE_DCU_MODE(DCU_MODE_NORMAL));
-	if (ret)
-		dev_err(fsl_dev->dev, "Enable CRTC failed\n");
-	ret = regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-			   DCU_UPDATE_MODE_READREG);
-	if (ret)
-		dev_err(fsl_dev->dev, "Enable CRTC failed\n");
-}
 
-static bool fsl_dcu_drm_crtc_mode_fixup(struct drm_crtc *crtc,
-					const struct drm_display_mode *mode,
-					struct drm_display_mode *adjusted_mode)
-{
-	return true;
+	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
+			   DCU_MODE_DCU_MODE_MASK,
+			   DCU_MODE_DCU_MODE(DCU_MODE_NORMAL));
+	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
+		     DCU_UPDATE_MODE_READREG);
 }
 
 static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc)
@@ -84,9 +67,8 @@ static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc)
 	struct drm_device *dev = crtc->dev;
 	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
 	struct drm_display_mode *mode = &crtc->state->mode;
-	unsigned int hbp, hfp, hsw, vbp, vfp, vsw, div, index;
+	unsigned int hbp, hfp, hsw, vbp, vfp, vsw, div, index, pol = 0;
 	unsigned long dcuclk;
-	int ret;
 
 	index = drm_crtc_index(crtc);
 	dcuclk = clk_get_rate(fsl_dev->clk);
@@ -100,51 +82,36 @@ static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc)
 	vfp = mode->vsync_start - mode->vdisplay;
 	vsw = mode->vsync_end - mode->vsync_start;
 
-	ret = regmap_write(fsl_dev->regmap, DCU_HSYN_PARA,
-			   DCU_HSYN_PARA_BP(hbp) |
-			   DCU_HSYN_PARA_PW(hsw) |
-			   DCU_HSYN_PARA_FP(hfp));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_VSYN_PARA,
-			   DCU_VSYN_PARA_BP(vbp) |
-			   DCU_VSYN_PARA_PW(vsw) |
-			   DCU_VSYN_PARA_FP(vfp));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_DISP_SIZE,
-			   DCU_DISP_SIZE_DELTA_Y(mode->vdisplay) |
-			   DCU_DISP_SIZE_DELTA_X(mode->hdisplay));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_DIV_RATIO, div);
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_SYN_POL,
-			   DCU_SYN_POL_INV_VS_LOW | DCU_SYN_POL_INV_HS_LOW);
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_BGND, DCU_BGND_R(0) |
-			   DCU_BGND_G(0) | DCU_BGND_B(0));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_DCU_MODE,
-			   DCU_MODE_BLEND_ITER(1) | DCU_MODE_RASTER_EN);
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_THRESHOLD,
-			   DCU_THRESHOLD_LS_BF_VS(BF_VS_VAL) |
-			   DCU_THRESHOLD_OUT_BUF_HIGH(BUF_MAX_VAL) |
-			   DCU_THRESHOLD_OUT_BUF_LOW(BUF_MIN_VAL));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-			   DCU_UPDATE_MODE_READREG);
-	if (ret)
-		goto set_failed;
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		pol |= DCU_SYN_POL_INV_HS_LOW;
+
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		pol |= DCU_SYN_POL_INV_VS_LOW;
+
+	regmap_write(fsl_dev->regmap, DCU_HSYN_PARA,
+		     DCU_HSYN_PARA_BP(hbp) |
+		     DCU_HSYN_PARA_PW(hsw) |
+		     DCU_HSYN_PARA_FP(hfp));
+	regmap_write(fsl_dev->regmap, DCU_VSYN_PARA,
+		     DCU_VSYN_PARA_BP(vbp) |
+		     DCU_VSYN_PARA_PW(vsw) |
+		     DCU_VSYN_PARA_FP(vfp));
+	regmap_write(fsl_dev->regmap, DCU_DISP_SIZE,
+		     DCU_DISP_SIZE_DELTA_Y(mode->vdisplay) |
+		     DCU_DISP_SIZE_DELTA_X(mode->hdisplay));
+	regmap_write(fsl_dev->regmap, DCU_DIV_RATIO, div);
+	regmap_write(fsl_dev->regmap, DCU_SYN_POL, pol);
+	regmap_write(fsl_dev->regmap, DCU_BGND, DCU_BGND_R(0) |
+		     DCU_BGND_G(0) | DCU_BGND_B(0));
+	regmap_write(fsl_dev->regmap, DCU_DCU_MODE,
+		     DCU_MODE_BLEND_ITER(1) | DCU_MODE_RASTER_EN);
+	regmap_write(fsl_dev->regmap, DCU_THRESHOLD,
+		     DCU_THRESHOLD_LS_BF_VS(BF_VS_VAL) |
+		     DCU_THRESHOLD_OUT_BUF_HIGH(BUF_MAX_VAL) |
+		     DCU_THRESHOLD_OUT_BUF_LOW(BUF_MIN_VAL));
+	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
+		     DCU_UPDATE_MODE_READREG);
 	return;
-set_failed:
-	dev_err(dev->dev, "set DCU register failed\n");
 }
 
 static const struct drm_crtc_helper_funcs fsl_dcu_drm_crtc_helper_funcs = {
@@ -153,7 +120,6 @@ static const struct drm_crtc_helper_funcs fsl_dcu_drm_crtc_helper_funcs = {
 	.atomic_flush = fsl_dcu_drm_crtc_atomic_flush,
 	.disable = fsl_dcu_drm_disable_crtc,
 	.enable = fsl_dcu_drm_crtc_enable,
-	.mode_fixup = fsl_dcu_drm_crtc_mode_fixup,
 	.mode_set_nofb = fsl_dcu_drm_crtc_mode_set_nofb,
 };
 
@@ -174,10 +140,15 @@ int fsl_dcu_drm_crtc_create(struct fsl_dcu_drm_device *fsl_dev)
 	int ret;
 
 	primary = fsl_dcu_drm_primary_create_plane(fsl_dev->drm);
+	if (!primary)
+		return -ENOMEM;
+
 	ret = drm_crtc_init_with_planes(fsl_dev->drm, crtc, primary, NULL,
 					&fsl_dcu_drm_crtc_funcs, NULL);
-	if (ret < 0)
+	if (ret) {
+		primary->funcs->destroy(primary);
 		return ret;
+	}
 
 	drm_crtc_helper_add(crtc, &fsl_dcu_drm_crtc_helper_funcs);
 
@@ -185,26 +156,15 @@ int fsl_dcu_drm_crtc_create(struct fsl_dcu_drm_device *fsl_dev)
 		reg_num = LS1021A_LAYER_REG_NUM;
 	else
 		reg_num = VF610_LAYER_REG_NUM;
-	for (i = 0; i <= fsl_dev->soc->total_layer; i++) {
-		for (j = 0; j < reg_num; j++) {
-			ret = regmap_write(fsl_dev->regmap,
-					   DCU_CTRLDESCLN(i, j), 0);
-			if (ret)
-				goto init_failed;
-		}
+	for (i = 0; i < fsl_dev->soc->total_layer; i++) {
+		for (j = 1; j <= reg_num; j++)
+			regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(i, j), 0);
 	}
-	ret = regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
-				 DCU_MODE_DCU_MODE_MASK,
-				 DCU_MODE_DCU_MODE(DCU_MODE_OFF));
-	if (ret)
-		goto init_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-			   DCU_UPDATE_MODE_READREG);
-	if (ret)
-		goto init_failed;
+	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
+			   DCU_MODE_DCU_MODE_MASK,
+			   DCU_MODE_DCU_MODE(DCU_MODE_OFF));
+	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
+		     DCU_UPDATE_MODE_READREG);
 
 	return 0;
-init_failed:
-	dev_err(fsl_dev->dev, "init DCU register failed\n");
-	return ret;
 }
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
index fca97d3fc846..e8d9337a66d8 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
@@ -28,37 +28,36 @@
 #include "fsl_dcu_drm_crtc.h"
 #include "fsl_dcu_drm_drv.h"
 
+static bool fsl_dcu_drm_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	if (reg == DCU_INT_STATUS || reg == DCU_UPDATE_MODE)
+		return true;
+
+	return false;
+}
+
 static const struct regmap_config fsl_dcu_regmap_config = {
 	.reg_bits = 32,
 	.reg_stride = 4,
 	.val_bits = 32,
 	.cache_type = REGCACHE_RBTREE,
+
+	.volatile_reg = fsl_dcu_drm_is_volatile_reg,
 };
 
 static int fsl_dcu_drm_irq_init(struct drm_device *dev)
 {
 	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
-	unsigned int value;
 	int ret;
 
 	ret = drm_irq_install(dev, fsl_dev->irq);
 	if (ret < 0)
 		dev_err(dev->dev, "failed to install IRQ handler\n");
 
-	ret = regmap_write(fsl_dev->regmap, DCU_INT_STATUS, 0);
-	if (ret)
-		dev_err(dev->dev, "set DCU_INT_STATUS failed\n");
-	ret = regmap_read(fsl_dev->regmap, DCU_INT_MASK, &value);
-	if (ret)
-		dev_err(dev->dev, "read DCU_INT_MASK failed\n");
-	value &= DCU_INT_MASK_VBLANK;
-	ret = regmap_write(fsl_dev->regmap, DCU_INT_MASK, value);
-	if (ret)
-		dev_err(dev->dev, "set DCU_INT_MASK failed\n");
-	ret = regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-			   DCU_UPDATE_MODE_READREG);
-	if (ret)
-		dev_err(dev->dev, "set DCU_UPDATE_MODE failed\n");
+	regmap_write(fsl_dev->regmap, DCU_INT_STATUS, 0);
+	regmap_write(fsl_dev->regmap, DCU_INT_MASK, ~0);
+	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
+		     DCU_UPDATE_MODE_READREG);
 
 	return ret;
 }
@@ -112,10 +111,6 @@ static int fsl_dcu_unload(struct drm_device *dev)
 	return 0;
 }
 
-static void fsl_dcu_drm_preclose(struct drm_device *dev, struct drm_file *file)
-{
-}
-
 static irqreturn_t fsl_dcu_drm_irq(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
@@ -124,18 +119,17 @@ static irqreturn_t fsl_dcu_drm_irq(int irq, void *arg)
 	int ret;
 
 	ret = regmap_read(fsl_dev->regmap, DCU_INT_STATUS, &int_status);
-	if (ret)
-		dev_err(dev->dev, "set DCU_INT_STATUS failed\n");
+	if (ret) {
+		dev_err(dev->dev, "read DCU_INT_STATUS failed\n");
+		return IRQ_NONE;
+	}
+
 	if (int_status & DCU_INT_STATUS_VBLANK)
 		drm_handle_vblank(dev, 0);
 
-	ret = regmap_write(fsl_dev->regmap, DCU_INT_STATUS, 0xffffffff);
-	if (ret)
-		dev_err(dev->dev, "set DCU_INT_STATUS failed\n");
-	ret = regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-			   DCU_UPDATE_MODE_READREG);
-	if (ret)
-		dev_err(dev->dev, "set DCU_UPDATE_MODE failed\n");
+	regmap_write(fsl_dev->regmap, DCU_INT_STATUS, int_status);
+	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
+		     DCU_UPDATE_MODE_READREG);
 
 	return IRQ_HANDLED;
 }
@@ -144,15 +138,11 @@ static int fsl_dcu_drm_enable_vblank(struct drm_device *dev, unsigned int pipe)
 {
 	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
 	unsigned int value;
-	int ret;
 
-	ret = regmap_read(fsl_dev->regmap, DCU_INT_MASK, &value);
-	if (ret)
-		dev_err(dev->dev, "read DCU_INT_MASK failed\n");
+	regmap_read(fsl_dev->regmap, DCU_INT_MASK, &value);
 	value &= ~DCU_INT_MASK_VBLANK;
-	ret = regmap_write(fsl_dev->regmap, DCU_INT_MASK, value);
-	if (ret)
-		dev_err(dev->dev, "set DCU_INT_MASK failed\n");
+	regmap_write(fsl_dev->regmap, DCU_INT_MASK, value);
+
 	return 0;
 }
 
@@ -161,15 +151,10 @@ static void fsl_dcu_drm_disable_vblank(struct drm_device *dev,
 {
 	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
 	unsigned int value;
-	int ret;
 
-	ret = regmap_read(fsl_dev->regmap, DCU_INT_MASK, &value);
-	if (ret)
-		dev_err(dev->dev, "read DCU_INT_MASK failed\n");
+	regmap_read(fsl_dev->regmap, DCU_INT_MASK, &value);
 	value |= DCU_INT_MASK_VBLANK;
-	ret = regmap_write(fsl_dev->regmap, DCU_INT_MASK, value);
-	if (ret)
-		dev_err(dev->dev, "set DCU_INT_MASK failed\n");
+	regmap_write(fsl_dev->regmap, DCU_INT_MASK, value);
 }
 
 static const struct file_operations fsl_dcu_drm_fops = {
@@ -191,7 +176,6 @@ static struct drm_driver fsl_dcu_drm_driver = {
 				| DRIVER_PRIME | DRIVER_ATOMIC,
 	.load			= fsl_dcu_load,
 	.unload			= fsl_dcu_unload,
-	.preclose		= fsl_dcu_drm_preclose,
 	.irq_handler		= fsl_dcu_drm_irq,
 	.get_vblank_counter	= drm_vblank_no_hw_counter,
 	.enable_vblank		= fsl_dcu_drm_enable_vblank,
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
index 579b9e44e764..6413ac9e4769 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
@@ -133,7 +133,9 @@
 #define DCU_LAYER_RLE_EN		BIT(15)
 #define DCU_LAYER_LUOFFS(x)		((x) << 4)
 #define DCU_LAYER_BB_ON			BIT(2)
-#define DCU_LAYER_AB(x)			(x)
+#define DCU_LAYER_AB_NONE		0
+#define DCU_LAYER_AB_CHROMA_KEYING	1
+#define DCU_LAYER_AB_WHOLE_FRAME	2
 
 #define DCU_LAYER_CKMAX_R(x)		((x) << 16)
 #define DCU_LAYER_CKMAX_G(x)		((x) << 8)
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c
index 0ef5959710e7..c564ec612b59 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c
@@ -25,6 +25,8 @@ static const struct drm_mode_config_funcs fsl_dcu_drm_mode_config_funcs = {
 
 int fsl_dcu_drm_modeset_init(struct fsl_dcu_drm_device *fsl_dev)
 {
+	int ret;
+
 	drm_mode_config_init(fsl_dev->drm);
 
 	fsl_dev->drm->mode_config.min_width = 0;
@@ -33,11 +35,25 @@ int fsl_dcu_drm_modeset_init(struct fsl_dcu_drm_device *fsl_dev)
 	fsl_dev->drm->mode_config.max_height = 2047;
 	fsl_dev->drm->mode_config.funcs = &fsl_dcu_drm_mode_config_funcs;
 
-	drm_kms_helper_poll_init(fsl_dev->drm);
-	fsl_dcu_drm_crtc_create(fsl_dev);
-	fsl_dcu_drm_encoder_create(fsl_dev, &fsl_dev->crtc);
-	fsl_dcu_drm_connector_create(fsl_dev, &fsl_dev->encoder);
+	ret = fsl_dcu_drm_crtc_create(fsl_dev);
+	if (ret)
+		return ret;
+
+	ret = fsl_dcu_drm_encoder_create(fsl_dev, &fsl_dev->crtc);
+	if (ret)
+		goto fail_encoder;
+
+	ret = fsl_dcu_drm_connector_create(fsl_dev, &fsl_dev->encoder);
+	if (ret)
+		goto fail_connector;
+
 	drm_mode_config_reset(fsl_dev->drm);
+	drm_kms_helper_poll_init(fsl_dev->drm);
 
 	return 0;
+fail_encoder:
+	fsl_dev->crtc.funcs->destroy(&fsl_dev->crtc);
+fail_connector:
+	fsl_dev->encoder.funcs->destroy(&fsl_dev->encoder);
+	return ret;
 }
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
index 4b13cf919575..274558b3b32b 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
@@ -41,11 +41,17 @@ static int fsl_dcu_drm_plane_atomic_check(struct drm_plane *plane,
 {
 	struct drm_framebuffer *fb = state->fb;
 
+	if (!state->fb || !state->crtc)
+		return 0;
+
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_RGB565:
 	case DRM_FORMAT_RGB888:
+	case DRM_FORMAT_XRGB8888:
 	case DRM_FORMAT_ARGB8888:
-	case DRM_FORMAT_BGRA4444:
+	case DRM_FORMAT_XRGB4444:
+	case DRM_FORMAT_ARGB4444:
+	case DRM_FORMAT_XRGB1555:
 	case DRM_FORMAT_ARGB1555:
 	case DRM_FORMAT_YUV422:
 		return 0;
@@ -59,19 +65,15 @@ static void fsl_dcu_drm_plane_atomic_disable(struct drm_plane *plane,
 {
 	struct fsl_dcu_drm_device *fsl_dev = plane->dev->dev_private;
 	unsigned int value;
-	int index, ret;
+	int index;
 
 	index = fsl_dcu_drm_plane_index(plane);
 	if (index < 0)
 		return;
 
-	ret = regmap_read(fsl_dev->regmap, DCU_CTRLDESCLN(index, 4), &value);
-	if (ret)
-		dev_err(fsl_dev->dev, "read DCU_INT_MASK failed\n");
+	regmap_read(fsl_dev->regmap, DCU_CTRLDESCLN(index, 4), &value);
 	value &= ~DCU_LAYER_EN;
-	ret = regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 4), value);
-	if (ret)
-		dev_err(fsl_dev->dev, "set DCU register failed\n");
+	regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 4), value);
 }
 
 static void fsl_dcu_drm_plane_atomic_update(struct drm_plane *plane,
@@ -82,8 +84,8 @@ static void fsl_dcu_drm_plane_atomic_update(struct drm_plane *plane,
 	struct drm_plane_state *state = plane->state;
 	struct drm_framebuffer *fb = plane->state->fb;
 	struct drm_gem_cma_object *gem;
-	unsigned int alpha, bpp;
-	int index, ret;
+	unsigned int alpha = DCU_LAYER_AB_NONE, bpp;
+	int index;
 
 	if (!fb)
 		return;
@@ -97,96 +99,74 @@ static void fsl_dcu_drm_plane_atomic_update(struct drm_plane *plane,
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_RGB565:
 		bpp = FSL_DCU_RGB565;
-		alpha = 0xff;
 		break;
 	case DRM_FORMAT_RGB888:
 		bpp = FSL_DCU_RGB888;
-		alpha = 0xff;
 		break;
 	case DRM_FORMAT_ARGB8888:
+		alpha = DCU_LAYER_AB_WHOLE_FRAME;
+		/* fall-through */
+	case DRM_FORMAT_XRGB8888:
 		bpp = FSL_DCU_ARGB8888;
-		alpha = 0xff;
 		break;
-	case DRM_FORMAT_BGRA4444:
+	case DRM_FORMAT_ARGB4444:
+		alpha = DCU_LAYER_AB_WHOLE_FRAME;
+		/* fall-through */
+	case DRM_FORMAT_XRGB4444:
 		bpp = FSL_DCU_ARGB4444;
-		alpha = 0xff;
 		break;
 	case DRM_FORMAT_ARGB1555:
+		alpha = DCU_LAYER_AB_WHOLE_FRAME;
+		/* fall-through */
+	case DRM_FORMAT_XRGB1555:
 		bpp = FSL_DCU_ARGB1555;
-		alpha = 0xff;
 		break;
 	case DRM_FORMAT_YUV422:
 		bpp = FSL_DCU_YUV422;
-		alpha = 0xff;
 		break;
 	default:
 		return;
 	}
 
-	ret = regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 1),
-			   DCU_LAYER_HEIGHT(state->crtc_h) |
-			   DCU_LAYER_WIDTH(state->crtc_w));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 2),
-			   DCU_LAYER_POSY(state->crtc_y) |
-			   DCU_LAYER_POSX(state->crtc_x));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap,
-			   DCU_CTRLDESCLN(index, 3), gem->paddr);
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 4),
-			   DCU_LAYER_EN |
-			   DCU_LAYER_TRANS(alpha) |
-			   DCU_LAYER_BPP(bpp) |
-			   DCU_LAYER_AB(0));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 5),
-			   DCU_LAYER_CKMAX_R(0xFF) |
-			   DCU_LAYER_CKMAX_G(0xFF) |
-			   DCU_LAYER_CKMAX_B(0xFF));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 6),
-			   DCU_LAYER_CKMIN_R(0) |
-			   DCU_LAYER_CKMIN_G(0) |
-			   DCU_LAYER_CKMIN_B(0));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 7), 0);
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 8),
-			   DCU_LAYER_FG_FCOLOR(0));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 9),
-			   DCU_LAYER_BG_BCOLOR(0));
-	if (ret)
-		goto set_failed;
+	regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 1),
+		     DCU_LAYER_HEIGHT(state->crtc_h) |
+		     DCU_LAYER_WIDTH(state->crtc_w));
+	regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 2),
+		     DCU_LAYER_POSY(state->crtc_y) |
+		     DCU_LAYER_POSX(state->crtc_x));
+	regmap_write(fsl_dev->regmap,
+		     DCU_CTRLDESCLN(index, 3), gem->paddr);
+	regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 4),
+		     DCU_LAYER_EN |
+		     DCU_LAYER_TRANS(0xff) |
+		     DCU_LAYER_BPP(bpp) |
+		     alpha);
+	regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 5),
+		     DCU_LAYER_CKMAX_R(0xFF) |
+		     DCU_LAYER_CKMAX_G(0xFF) |
+		     DCU_LAYER_CKMAX_B(0xFF));
+	regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 6),
+		     DCU_LAYER_CKMIN_R(0) |
+		     DCU_LAYER_CKMIN_G(0) |
+		     DCU_LAYER_CKMIN_B(0));
+	regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 7), 0);
+	regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 8),
+		     DCU_LAYER_FG_FCOLOR(0));
+	regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 9),
+		     DCU_LAYER_BG_BCOLOR(0));
+
 	if (!strcmp(fsl_dev->soc->name, "ls1021a")) {
-		ret = regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 10),
-				   DCU_LAYER_POST_SKIP(0) |
-				   DCU_LAYER_PRE_SKIP(0));
-		if (ret)
-			goto set_failed;
+		regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(index, 10),
+			     DCU_LAYER_POST_SKIP(0) |
+			     DCU_LAYER_PRE_SKIP(0));
 	}
-	ret = regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
-				 DCU_MODE_DCU_MODE_MASK,
-				 DCU_MODE_DCU_MODE(DCU_MODE_NORMAL));
-	if (ret)
-		goto set_failed;
-	ret = regmap_write(fsl_dev->regmap,
-			   DCU_UPDATE_MODE, DCU_UPDATE_MODE_READREG);
-	if (ret)
-		goto set_failed;
-	return;
+	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
+			   DCU_MODE_DCU_MODE_MASK,
+			   DCU_MODE_DCU_MODE(DCU_MODE_NORMAL));
+	regmap_write(fsl_dev->regmap,
+		     DCU_UPDATE_MODE, DCU_UPDATE_MODE_READREG);
 
-set_failed:
-	dev_err(fsl_dev->dev, "set DCU register failed\n");
+	return;
 }
 
 static void
@@ -213,6 +193,7 @@ static const struct drm_plane_helper_funcs fsl_dcu_drm_plane_helper_funcs = {
 static void fsl_dcu_drm_plane_destroy(struct drm_plane *plane)
 {
 	drm_plane_cleanup(plane);
+	kfree(plane);
 }
 
 static const struct drm_plane_funcs fsl_dcu_drm_plane_funcs = {
@@ -227,8 +208,11 @@ static const struct drm_plane_funcs fsl_dcu_drm_plane_funcs = {
 static const u32 fsl_dcu_drm_plane_formats[] = {
 	DRM_FORMAT_RGB565,
 	DRM_FORMAT_RGB888,
+	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XRGB4444,
 	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_XRGB1555,
 	DRM_FORMAT_ARGB1555,
 	DRM_FORMAT_YUV422,
 };
diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c
index d0717a85c7ec..b837e7a92196 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_crt.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c
@@ -217,7 +217,6 @@ static int cdv_intel_crt_set_property(struct drm_connector *connector,
 
 static const struct drm_encoder_helper_funcs cdv_intel_crt_helper_funcs = {
 	.dpms = cdv_intel_crt_dpms,
-	.mode_fixup = gma_encoder_mode_fixup,
 	.prepare = gma_encoder_prepare,
 	.commit = gma_encoder_commit,
 	.mode_set = cdv_intel_crt_mode_set,
diff --git a/drivers/gpu/drm/gma500/cdv_intel_display.c b/drivers/gpu/drm/gma500/cdv_intel_display.c
index 6126546295e9..17db4b4749d5 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_display.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_display.c
@@ -116,7 +116,7 @@ static const struct gma_limit_t cdv_intel_limits[] = {
 	 .p1 = {.min = 1, .max = 10},
 	 .p2 = {.dot_limit = 225000, .p2_slow = 10, .p2_fast = 10},
 	 .find_pll = cdv_intel_find_dp_pll,
-	 }	
+	}
 };
 
 #define _wait_for(COND, MS, W) ({ \
@@ -245,7 +245,7 @@ cdv_dpll_set_clock_cdv(struct drm_device *dev, struct drm_crtc *crtc,
 	/* We don't know what the other fields of these regs are, so
 	 * leave them in place.
 	 */
-	/* 
+	/*
 	 * The BIT 14:13 of 0x8010/0x8030 is used to select the ref clk
 	 * for the pipe A/B. Display spec 1.06 has wrong definition.
 	 * Correct definition is like below:
@@ -256,7 +256,7 @@ cdv_dpll_set_clock_cdv(struct drm_device *dev, struct drm_crtc *crtc,
 	 *
 	 * if DPLLA sets 01 and DPLLB sets 02, both use clk from DPLLA
 	 *
-	 */  
+	 */
 	ret = cdv_sb_read(dev, ref_sfr, &ref_value);
 	if (ret)
 		return ret;
@@ -646,7 +646,7 @@ static int cdv_intel_crtc_mode_set(struct drm_crtc *crtc,
 		 * for DP/eDP. When using SSC clock, the ref clk is 100MHz.Otherwise
 		 * it will be 27MHz. From the VBIOS code it seems that the pipe A choose
 		 * 27MHz for DP/eDP while the Pipe B chooses the 100MHz.
-		 */ 
+		 */
 		if (pipe == 0)
 			refclk = 27000;
 		else
@@ -659,7 +659,7 @@ static int cdv_intel_crtc_mode_set(struct drm_crtc *crtc,
 	}
 
 	drm_mode_debug_printmodeline(adjusted_mode);
-	
+
 	limit = gma_crtc->clock_funcs->limit(crtc, refclk);
 
 	ok = limit->find_pll(limit, crtc, adjusted_mode->clock, refclk,
@@ -721,7 +721,7 @@ static int cdv_intel_crtc_mode_set(struct drm_crtc *crtc,
 			pipeconf |= PIPE_6BPC;
 	} else
 		pipeconf |= PIPE_8BPC;
-			
+
 	/* Set up the display plane register */
 	dspcntr = DISPPLANE_GAMMA_ENABLE;
 
@@ -974,7 +974,6 @@ struct drm_display_mode *cdv_intel_crtc_mode_get(struct drm_device *dev,
 
 const struct drm_crtc_helper_funcs cdv_intel_helper_funcs = {
 	.dpms = gma_crtc_dpms,
-	.mode_fixup = gma_crtc_mode_fixup,
 	.mode_set = cdv_intel_crtc_mode_set,
 	.mode_set_base = gma_pipe_set_base,
 	.prepare = gma_crtc_prepare,
diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index ddf2d7700759..28f9d90988ff 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
@@ -255,7 +255,6 @@ static void cdv_hdmi_destroy(struct drm_connector *connector)
 
 static const struct drm_encoder_helper_funcs cdv_hdmi_helper_funcs = {
 	.dpms = cdv_hdmi_dpms,
-	.mode_fixup = gma_encoder_mode_fixup,
 	.prepare = gma_encoder_prepare,
 	.mode_set = cdv_hdmi_mode_set,
 	.commit = gma_encoder_commit,
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index cb95765050cc..033d894d030e 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -674,29 +674,17 @@ static const struct drm_mode_config_funcs psb_mode_funcs = {
 	.output_poll_changed = psbfb_output_poll_changed,
 };
 
-static int psb_create_backlight_property(struct drm_device *dev)
-{
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct drm_property *backlight;
-
-	if (dev_priv->backlight_property)
-		return 0;
-
-	backlight = drm_property_create_range(dev, 0, "backlight", 0, 100);
-
-	dev_priv->backlight_property = backlight;
-
-	return 0;
-}
-
 static void psb_setup_outputs(struct drm_device *dev)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct drm_connector *connector;
 
 	drm_mode_create_scaling_mode_property(dev);
-	psb_create_backlight_property(dev);
 
+	/* It is ok for this to fail - we just don't get backlight control */
+	if (!dev_priv->backlight_property)
+		dev_priv->backlight_property = drm_property_create_range(dev, 0,
+							"backlight", 0, 100);
 	dev_priv->ops->output_init(dev);
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list,
diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c
index ff17af4cfc64..5bf765de2517 100644
--- a/drivers/gpu/drm/gma500/gma_display.c
+++ b/drivers/gpu/drm/gma500/gma_display.c
@@ -478,20 +478,6 @@ int gma_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 	return 0;
 }
 
-bool gma_encoder_mode_fixup(struct drm_encoder *encoder,
-			    const struct drm_display_mode *mode,
-			    struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
-bool gma_crtc_mode_fixup(struct drm_crtc *crtc,
-			 const struct drm_display_mode *mode,
-			 struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 void gma_crtc_prepare(struct drm_crtc *crtc)
 {
 	const struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
diff --git a/drivers/gpu/drm/gma500/gma_display.h b/drivers/gpu/drm/gma500/gma_display.h
index ed569d8a6af3..b2491c65f053 100644
--- a/drivers/gpu/drm/gma500/gma_display.h
+++ b/drivers/gpu/drm/gma500/gma_display.h
@@ -75,9 +75,6 @@ extern void gma_crtc_load_lut(struct drm_crtc *crtc);
 extern void gma_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
 			       u16 *blue, u32 start, u32 size);
 extern void gma_crtc_dpms(struct drm_crtc *crtc, int mode);
-extern bool gma_crtc_mode_fixup(struct drm_crtc *crtc,
-				const struct drm_display_mode *mode,
-				struct drm_display_mode *adjusted_mode);
 extern void gma_crtc_prepare(struct drm_crtc *crtc);
 extern void gma_crtc_commit(struct drm_crtc *crtc);
 extern void gma_crtc_disable(struct drm_crtc *crtc);
@@ -90,9 +87,6 @@ extern void gma_crtc_restore(struct drm_crtc *crtc);
 extern void gma_encoder_prepare(struct drm_encoder *encoder);
 extern void gma_encoder_commit(struct drm_encoder *encoder);
 extern void gma_encoder_destroy(struct drm_encoder *encoder);
-extern bool gma_encoder_mode_fixup(struct drm_encoder *encoder,
-				   const struct drm_display_mode *mode,
-				   struct drm_display_mode *adjusted_mode);
 
 /* Common clock related functions */
 extern const struct gma_limit_t *gma_limit(struct drm_crtc *crtc, int refclk);
diff --git a/drivers/gpu/drm/gma500/intel_gmbus.c b/drivers/gpu/drm/gma500/intel_gmbus.c
index 566d330aaeea..e7e22187c539 100644
--- a/drivers/gpu/drm/gma500/intel_gmbus.c
+++ b/drivers/gpu/drm/gma500/intel_gmbus.c
@@ -436,7 +436,7 @@ int gma_intel_setup_gmbus(struct drm_device *dev)
 	return 0;
 
 err:
-	while (--i) {
+	while (i--) {
 		struct intel_gmbus *bus = &dev_priv->gmbus[i];
 		i2c_del_adapter(&bus->adapter);
 	}
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
index d758f4cc6805..907cb51795c3 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
@@ -382,16 +382,6 @@ static int mdfld_dsi_connector_mode_valid(struct drm_connector *connector,
 	return MODE_OK;
 }
 
-static void mdfld_dsi_connector_dpms(struct drm_connector *connector, int mode)
-{
-	if (mode == connector->dpms)
-		return;
-
-	/*first, execute dpms*/
-
-	drm_helper_connector_dpms(connector, mode);
-}
-
 static struct drm_encoder *mdfld_dsi_connector_best_encoder(
 				struct drm_connector *connector)
 {
@@ -404,7 +394,7 @@ static struct drm_encoder *mdfld_dsi_connector_best_encoder(
 
 /*DSI connector funcs*/
 static const struct drm_connector_funcs mdfld_dsi_connector_funcs = {
-	.dpms = /*drm_helper_connector_dpms*/mdfld_dsi_connector_dpms,
+	.dpms = drm_helper_connector_dpms,
 	.detect = mdfld_dsi_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = mdfld_dsi_connector_set_property,
diff --git a/drivers/gpu/drm/gma500/mdfld_intel_display.c b/drivers/gpu/drm/gma500/mdfld_intel_display.c
index acd38344b302..92e3f93ee682 100644
--- a/drivers/gpu/drm/gma500/mdfld_intel_display.c
+++ b/drivers/gpu/drm/gma500/mdfld_intel_display.c
@@ -1026,10 +1026,8 @@ mrst_crtc_mode_set_exit:
 
 const struct drm_crtc_helper_funcs mdfld_helper_funcs = {
 	.dpms = mdfld_crtc_dpms,
-	.mode_fixup = gma_crtc_mode_fixup,
 	.mode_set = mdfld_crtc_mode_set,
 	.mode_set_base = mdfld__intel_pipe_set_base,
 	.prepare = gma_crtc_prepare,
 	.commit = gma_crtc_commit,
 };
-
diff --git a/drivers/gpu/drm/gma500/oaktrail_crtc.c b/drivers/gpu/drm/gma500/oaktrail_crtc.c
index 1048f0c7c6ce..da9fd34b9550 100644
--- a/drivers/gpu/drm/gma500/oaktrail_crtc.c
+++ b/drivers/gpu/drm/gma500/oaktrail_crtc.c
@@ -657,7 +657,6 @@ pipe_set_base_exit:
 
 const struct drm_crtc_helper_funcs oaktrail_helper_funcs = {
 	.dpms = oaktrail_crtc_dpms,
-	.mode_fixup = gma_crtc_mode_fixup,
 	.mode_set = oaktrail_crtc_mode_set,
 	.mode_set_base = oaktrail_pipe_set_base,
 	.prepare = gma_crtc_prepare,
diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index 2d18499d6060..8b2eb32ee988 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
@@ -601,7 +601,6 @@ static void oaktrail_hdmi_destroy(struct drm_connector *connector)
 
 static const struct drm_encoder_helper_funcs oaktrail_hdmi_helper_funcs = {
 	.dpms = oaktrail_hdmi_dpms,
-	.mode_fixup = gma_encoder_mode_fixup,
 	.prepare = gma_encoder_prepare,
 	.mode_set = oaktrail_hdmi_mode_set,
 	.commit = gma_encoder_commit,
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 92e7e5795398..4e1c6850520e 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -442,14 +442,6 @@ static long psb_unlocked_ioctl(struct file *filp, unsigned int cmd,
 	/* FIXME: do we need to wrap the other side of this */
 }
 
-/*
- * When a client dies:
- *    - Check for and clean up flipped page state
- */
-static void psb_driver_preclose(struct drm_device *dev, struct drm_file *priv)
-{
-}
-
 static int psb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	return drm_get_pci_dev(pdev, ent, &driver);
@@ -495,7 +487,6 @@ static struct drm_driver driver = {
 	.load = psb_driver_load,
 	.unload = psb_driver_unload,
 	.lastclose = psb_driver_lastclose,
-	.preclose = psb_driver_preclose,
 	.set_busid = drm_pci_set_busid,
 
 	.num_ioctls = ARRAY_SIZE(psb_ioctls),
diff --git a/drivers/gpu/drm/gma500/psb_intel_display.c b/drivers/gpu/drm/gma500/psb_intel_display.c
index dcdbc37e55e1..398015be87e4 100644
--- a/drivers/gpu/drm/gma500/psb_intel_display.c
+++ b/drivers/gpu/drm/gma500/psb_intel_display.c
@@ -430,7 +430,6 @@ struct drm_display_mode *psb_intel_crtc_mode_get(struct drm_device *dev,
 
 const struct drm_crtc_helper_funcs psb_intel_helper_funcs = {
 	.dpms = gma_crtc_dpms,
-	.mode_fixup = gma_crtc_mode_fixup,
 	.mode_set = psb_intel_crtc_mode_set,
 	.mode_set_base = gma_pipe_set_base,
 	.prepare = gma_crtc_prepare,
diff --git a/drivers/gpu/drm/i2c/ch7006_drv.c b/drivers/gpu/drm/i2c/ch7006_drv.c
index 90db5f4dcce5..0594c45f7164 100644
--- a/drivers/gpu/drm/i2c/ch7006_drv.c
+++ b/drivers/gpu/drm/i2c/ch7006_drv.c
@@ -253,6 +253,8 @@ static int ch7006_encoder_create_resources(struct drm_encoder *encoder,
 	drm_mode_create_tv_properties(dev, NUM_TV_NORMS, ch7006_tv_norm_names);
 
 	priv->scale_property = drm_property_create_range(dev, 0, "scale", 0, 2);
+	if (!priv->scale_property)
+		return -ENOMEM;
 
 	drm_object_attach_property(&connector->base, conf->tv_select_subconnector_property,
 				      priv->select_subconnector);
diff --git a/drivers/gpu/drm/i2c/sil164_drv.c b/drivers/gpu/drm/i2c/sil164_drv.c
index c400428f6c8c..db0b03fb0ff1 100644
--- a/drivers/gpu/drm/i2c/sil164_drv.c
+++ b/drivers/gpu/drm/i2c/sil164_drv.c
@@ -252,14 +252,6 @@ sil164_encoder_restore(struct drm_encoder *encoder)
 				     priv->saved_slave_state);
 }
 
-static bool
-sil164_encoder_mode_fixup(struct drm_encoder *encoder,
-			  const struct drm_display_mode *mode,
-			  struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static int
 sil164_encoder_mode_valid(struct drm_encoder *encoder,
 			  struct drm_display_mode *mode)
@@ -347,7 +339,6 @@ static const struct drm_encoder_slave_funcs sil164_encoder_funcs = {
 	.dpms = sil164_encoder_dpms,
 	.save = sil164_encoder_save,
 	.restore = sil164_encoder_restore,
-	.mode_fixup = sil164_encoder_mode_fixup,
 	.mode_valid = sil164_encoder_mode_valid,
 	.mode_set = sil164_encoder_mode_set,
 	.detect = sil164_encoder_detect,
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index f8ee740c0e26..f4315bc8d471 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -856,14 +856,6 @@ static void tda998x_encoder_dpms(struct drm_encoder *encoder, int mode)
 	priv->dpms = mode;
 }
 
-static bool
-tda998x_encoder_mode_fixup(struct drm_encoder *encoder,
-			  const struct drm_display_mode *mode,
-			  struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static int tda998x_connector_mode_valid(struct drm_connector *connector,
 					struct drm_display_mode *mode)
 {
@@ -1343,7 +1335,6 @@ static void tda998x_encoder_commit(struct drm_encoder *encoder)
 
 static const struct drm_encoder_helper_funcs tda998x_encoder_helper_funcs = {
 	.dpms = tda998x_encoder_dpms,
-	.mode_fixup = tda998x_encoder_mode_fixup,
 	.prepare = tda998x_encoder_prepare,
 	.commit = tda998x_encoder_commit,
 	.mode_set = tda998x_encoder_mode_set,
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 051eab33e4c7..20a5d0455e19 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -2,9 +2,7 @@ config DRM_I915
 	tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
 	depends on DRM
 	depends on X86 && PCI
-	depends on (AGP || AGP=n)
 	select INTEL_GTT
-	select AGP_INTEL if AGP
 	select INTERVAL_TREE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
@@ -47,3 +45,14 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
 	  option changes the default for that module option.
 
 	  If in doubt, say "N".
+
+config DRM_I915_USERPTR
+	bool "Always enable userptr support"
+	depends on DRM_I915
+	select MMU_NOTIFIER
+	default y
+	help
+	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
+	  selected to enabled full userptr support.
+
+	  If in doubt, say "Y".
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index cf39ed3133d6..a0f1bd711b53 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -117,9 +117,8 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
 	u64 size = 0;
 	struct i915_vma *vma;
 
-	list_for_each_entry(vma, &obj->vma_list, vma_link) {
-		if (i915_is_ggtt(vma->vm) &&
-		    drm_mm_node_allocated(&vma->node))
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		if (vma->is_ggtt && drm_mm_node_allocated(&vma->node))
 			size += vma->node.size;
 	}
 
@@ -155,7 +154,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
 	if (obj->base.name)
 		seq_printf(m, " (name: %d)", obj->base.name);
-	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
 		if (vma->pin_count > 0)
 			pin_count++;
 	}
@@ -164,14 +163,13 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		seq_printf(m, " (display)");
 	if (obj->fence_reg != I915_FENCE_REG_NONE)
 		seq_printf(m, " (fence: %d)", obj->fence_reg);
-	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
 		seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
-			   i915_is_ggtt(vma->vm) ? "g" : "pp",
+			   vma->is_ggtt ? "g" : "pp",
 			   vma->node.start, vma->node.size);
-		if (i915_is_ggtt(vma->vm))
-			seq_printf(m, ", type: %u)", vma->ggtt_view.type);
-		else
-			seq_puts(m, ")");
+		if (vma->is_ggtt)
+			seq_printf(m, ", type: %u", vma->ggtt_view.type);
+		seq_puts(m, ")");
 	}
 	if (obj->stolen)
 		seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
@@ -230,7 +228,7 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
 	}
 
 	total_obj_size = total_gtt_size = count = 0;
-	list_for_each_entry(vma, head, mm_list) {
+	list_for_each_entry(vma, head, vm_link) {
 		seq_printf(m, "   ");
 		describe_obj(m, vma->obj);
 		seq_printf(m, "\n");
@@ -342,13 +340,13 @@ static int per_file_stats(int id, void *ptr, void *data)
 		stats->shared += obj->base.size;
 
 	if (USES_FULL_PPGTT(obj->base.dev)) {
-		list_for_each_entry(vma, &obj->vma_list, vma_link) {
+		list_for_each_entry(vma, &obj->vma_list, obj_link) {
 			struct i915_hw_ppgtt *ppgtt;
 
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
-			if (i915_is_ggtt(vma->vm)) {
+			if (vma->is_ggtt) {
 				stats->global += obj->base.size;
 				continue;
 			}
@@ -454,12 +452,12 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
 		   count, mappable_count, size, mappable_size);
 
 	size = count = mappable_size = mappable_count = 0;
-	count_vmas(&vm->active_list, mm_list);
+	count_vmas(&vm->active_list, vm_link);
 	seq_printf(m, "  %u [%u] active objects, %llu [%llu] bytes\n",
 		   count, mappable_count, size, mappable_size);
 
 	size = count = mappable_size = mappable_count = 0;
-	count_vmas(&vm->inactive_list, mm_list);
+	count_vmas(&vm->inactive_list, vm_link);
 	seq_printf(m, "  %u [%u] inactive objects, %llu [%llu] bytes\n",
 		   count, mappable_count, size, mappable_size);
 
@@ -1336,7 +1334,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	struct intel_engine_cs *ring;
 	u64 acthd[I915_NUM_RINGS];
 	u32 seqno[I915_NUM_RINGS];
-	int i;
+	u32 instdone[I915_NUM_INSTDONE_REG];
+	int i, j;
 
 	if (!i915.enable_hangcheck) {
 		seq_printf(m, "Hangcheck disabled\n");
@@ -1350,6 +1349,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		acthd[i] = intel_ring_get_active_head(ring);
 	}
 
+	i915_get_extra_instdone(dev, instdone);
+
 	intel_runtime_pm_put(dev_priv);
 
 	if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) {
@@ -1370,6 +1371,21 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 			   (long long)ring->hangcheck.max_acthd);
 		seq_printf(m, "\tscore = %d\n", ring->hangcheck.score);
 		seq_printf(m, "\taction = %d\n", ring->hangcheck.action);
+
+		if (ring->id == RCS) {
+			seq_puts(m, "\tinstdone read =");
+
+			for (j = 0; j < I915_NUM_INSTDONE_REG; j++)
+				seq_printf(m, " 0x%08x", instdone[j]);
+
+			seq_puts(m, "\n\tinstdone accu =");
+
+			for (j = 0; j < I915_NUM_INSTDONE_REG; j++)
+				seq_printf(m, " 0x%08x",
+					   ring->hangcheck.instdone[j]);
+
+			seq_puts(m, "\n");
+		}
 	}
 
 	return 0;
@@ -1947,11 +1963,8 @@ static int i915_context_status(struct seq_file *m, void *unused)
 
 		seq_puts(m, "HW context ");
 		describe_ctx(m, ctx);
-		for_each_ring(ring, dev_priv, i) {
-			if (ring->default_context == ctx)
-				seq_printf(m, "(default context %s) ",
-					   ring->name);
-		}
+		if (ctx == dev_priv->kernel_context)
+			seq_printf(m, "(kernel context) ");
 
 		if (i915.enable_execlists) {
 			seq_putc(m, '\n');
@@ -1981,12 +1994,13 @@ static int i915_context_status(struct seq_file *m, void *unused)
 }
 
 static void i915_dump_lrc_obj(struct seq_file *m,
-			      struct intel_engine_cs *ring,
-			      struct drm_i915_gem_object *ctx_obj)
+			      struct intel_context *ctx,
+			      struct intel_engine_cs *ring)
 {
 	struct page *page;
 	uint32_t *reg_state;
 	int j;
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
 	unsigned long ggtt_offset = 0;
 
 	if (ctx_obj == NULL) {
@@ -1996,7 +2010,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
 	}
 
 	seq_printf(m, "CONTEXT: %s %u\n", ring->name,
-		   intel_execlists_ctx_id(ctx_obj));
+		   intel_execlists_ctx_id(ctx, ring));
 
 	if (!i915_gem_obj_ggtt_bound(ctx_obj))
 		seq_puts(m, "\tNot bound in GGTT\n");
@@ -2042,13 +2056,10 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
 	if (ret)
 		return ret;
 
-	list_for_each_entry(ctx, &dev_priv->context_list, link) {
-		for_each_ring(ring, dev_priv, i) {
-			if (ring->default_context != ctx)
-				i915_dump_lrc_obj(m, ring,
-						  ctx->engine[i].state);
-		}
-	}
+	list_for_each_entry(ctx, &dev_priv->context_list, link)
+		if (ctx != dev_priv->kernel_context)
+			for_each_ring(ring, dev_priv, i)
+				i915_dump_lrc_obj(m, ctx, ring);
 
 	mutex_unlock(&dev->struct_mutex);
 
@@ -2097,13 +2108,13 @@ static int i915_execlists(struct seq_file *m, void *data)
 		seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
 
 		read_pointer = ring->next_context_status_buffer;
-		write_pointer = status_pointer & 0x07;
+		write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
 		if (read_pointer > write_pointer)
-			write_pointer += 6;
+			write_pointer += GEN8_CSB_ENTRIES;
 		seq_printf(m, "\tRead pointer: 0x%08X, write pointer 0x%08X\n",
 			   read_pointer, write_pointer);
 
-		for (i = 0; i < 6; i++) {
+		for (i = 0; i < GEN8_CSB_ENTRIES; i++) {
 			status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, i));
 			ctx_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, i));
 
@@ -2120,11 +2131,8 @@ static int i915_execlists(struct seq_file *m, void *data)
 
 		seq_printf(m, "\t%d requests in queue\n", count);
 		if (head_req) {
-			struct drm_i915_gem_object *ctx_obj;
-
-			ctx_obj = head_req->ctx->engine[ring_id].state;
 			seq_printf(m, "\tHead request id: %u\n",
-				   intel_execlists_ctx_id(ctx_obj));
+				   intel_execlists_ctx_id(head_req->ctx, ring));
 			seq_printf(m, "\tHead request tail: %u\n",
 				   head_req->tail);
 		}
@@ -2458,9 +2466,9 @@ static void i915_guc_client_info(struct seq_file *m,
 
 	for_each_ring(ring, dev_priv, i) {
 		seq_printf(m, "\tSubmissions: %llu %s\n",
-				client->submissions[i],
+				client->submissions[ring->guc_id],
 				ring->name);
-		tot += client->submissions[i];
+		tot += client->submissions[ring->guc_id];
 	}
 	seq_printf(m, "\tTotal: %llu\n", tot);
 }
@@ -2497,10 +2505,10 @@ static int i915_guc_info(struct seq_file *m, void *data)
 
 	seq_printf(m, "\nGuC submissions:\n");
 	for_each_ring(ring, dev_priv, i) {
-		seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x %9d\n",
-			ring->name, guc.submissions[i],
-			guc.last_seqno[i], guc.last_seqno[i]);
-		total += guc.submissions[i];
+		seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
+			ring->name, guc.submissions[ring->guc_id],
+			guc.last_seqno[ring->guc_id]);
+		total += guc.submissions[ring->guc_id];
 	}
 	seq_printf(m, "\t%s: %llu\n", "Total", total);
 
@@ -2578,6 +2586,10 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 				enabled = true;
 		}
 	}
+
+	seq_printf(m, "Main link in standby mode: %s\n",
+		   yesno(dev_priv->psr.link_standby));
+
 	seq_printf(m, "HW Enabled & Active bit: %s", yesno(enabled));
 
 	if (!HAS_DDI(dev))
@@ -3216,9 +3228,11 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
 {
 	int i;
 	int ret;
+	struct intel_engine_cs *ring;
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_workarounds *workarounds = &dev_priv->workarounds;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
@@ -3226,15 +3240,18 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
 
 	intel_runtime_pm_get(dev_priv);
 
-	seq_printf(m, "Workarounds applied: %d\n", dev_priv->workarounds.count);
-	for (i = 0; i < dev_priv->workarounds.count; ++i) {
+	seq_printf(m, "Workarounds applied: %d\n", workarounds->count);
+	for_each_ring(ring, dev_priv, i)
+		seq_printf(m, "HW whitelist count for %s: %d\n",
+			   ring->name, workarounds->hw_whitelist_count[i]);
+	for (i = 0; i < workarounds->count; ++i) {
 		i915_reg_t addr;
 		u32 mask, value, read;
 		bool ok;
 
-		addr = dev_priv->workarounds.reg[i].addr;
-		mask = dev_priv->workarounds.reg[i].mask;
-		value = dev_priv->workarounds.reg[i].value;
+		addr = workarounds->reg[i].addr;
+		mask = workarounds->reg[i].mask;
+		value = workarounds->reg[i].value;
 		read = I915_READ(addr);
 		ok = (value & mask) == (read & mask);
 		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index d70d96fe553b..1c6d227aae7c 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -391,20 +391,13 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	if (ret)
 		goto cleanup_vga_client;
 
-	/* Initialise stolen first so that we may reserve preallocated
-	 * objects for the BIOS to KMS transition.
-	 */
-	ret = i915_gem_init_stolen(dev);
-	if (ret)
-		goto cleanup_vga_switcheroo;
-
 	intel_power_domains_init_hw(dev_priv, false);
 
 	intel_csr_ucode_init(dev_priv);
 
 	ret = intel_irq_install(dev_priv);
 	if (ret)
-		goto cleanup_gem_stolen;
+		goto cleanup_csr;
 
 	intel_setup_gmbus(dev);
 
@@ -458,9 +451,8 @@ cleanup_irq:
 	intel_guc_ucode_fini(dev);
 	drm_irq_uninstall(dev);
 	intel_teardown_gmbus(dev);
-cleanup_gem_stolen:
-	i915_gem_cleanup_stolen(dev);
-cleanup_vga_switcheroo:
+cleanup_csr:
+	intel_csr_ucode_fini(dev_priv);
 	vga_switcheroo_unregister_client(dev->pdev);
 cleanup_vga_client:
 	vga_client_register(dev->pdev, NULL, NULL, NULL);
@@ -816,7 +808,41 @@ static void intel_device_info_runtime_init(struct drm_device *dev)
 		     !(sfuse_strap & SFUSE_STRAP_FUSE_LOCK))) {
 			DRM_INFO("Display fused off, disabling\n");
 			info->num_pipes = 0;
+		} else if (fuse_strap & IVB_PIPE_C_DISABLE) {
+			DRM_INFO("PipeC fused off\n");
+			info->num_pipes -= 1;
 		}
+	} else if (info->num_pipes > 0 && INTEL_INFO(dev)->gen == 9) {
+		u32 dfsm = I915_READ(SKL_DFSM);
+		u8 disabled_mask = 0;
+		bool invalid;
+		int num_bits;
+
+		if (dfsm & SKL_DFSM_PIPE_A_DISABLE)
+			disabled_mask |= BIT(PIPE_A);
+		if (dfsm & SKL_DFSM_PIPE_B_DISABLE)
+			disabled_mask |= BIT(PIPE_B);
+		if (dfsm & SKL_DFSM_PIPE_C_DISABLE)
+			disabled_mask |= BIT(PIPE_C);
+
+		num_bits = hweight8(disabled_mask);
+
+		switch (disabled_mask) {
+		case BIT(PIPE_A):
+		case BIT(PIPE_B):
+		case BIT(PIPE_A) | BIT(PIPE_B):
+		case BIT(PIPE_A) | BIT(PIPE_C):
+			invalid = true;
+			break;
+		default:
+			invalid = false;
+		}
+
+		if (num_bits > info->num_pipes || invalid)
+			DRM_ERROR("invalid pipe fuse configuration: 0x%x\n",
+				  disabled_mask);
+		else
+			info->num_pipes -= num_bits;
 	}
 
 	/* Initialize slice/subslice/EU info */
@@ -855,6 +881,94 @@ static void intel_init_dpio(struct drm_i915_private *dev_priv)
 	}
 }
 
+static int i915_workqueues_init(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * The i915 workqueue is primarily used for batched retirement of
+	 * requests (and thus managing bo) once the task has been completed
+	 * by the GPU. i915_gem_retire_requests() is called directly when we
+	 * need high-priority retirement, such as waiting for an explicit
+	 * bo.
+	 *
+	 * It is also used for periodic low-priority events, such as
+	 * idle-timers and recording error state.
+	 *
+	 * All tasks on the workqueue are expected to acquire the dev mutex
+	 * so there is no point in running more than one instance of the
+	 * workqueue at any time.  Use an ordered one.
+	 */
+	dev_priv->wq = alloc_ordered_workqueue("i915", 0);
+	if (dev_priv->wq == NULL)
+		goto out_err;
+
+	dev_priv->hotplug.dp_wq = alloc_ordered_workqueue("i915-dp", 0);
+	if (dev_priv->hotplug.dp_wq == NULL)
+		goto out_free_wq;
+
+	dev_priv->gpu_error.hangcheck_wq =
+		alloc_ordered_workqueue("i915-hangcheck", 0);
+	if (dev_priv->gpu_error.hangcheck_wq == NULL)
+		goto out_free_dp_wq;
+
+	return 0;
+
+out_free_dp_wq:
+	destroy_workqueue(dev_priv->hotplug.dp_wq);
+out_free_wq:
+	destroy_workqueue(dev_priv->wq);
+out_err:
+	DRM_ERROR("Failed to allocate workqueues.\n");
+
+	return -ENOMEM;
+}
+
+static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
+{
+	destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
+	destroy_workqueue(dev_priv->hotplug.dp_wq);
+	destroy_workqueue(dev_priv->wq);
+}
+
+static int i915_mmio_setup(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	int mmio_bar;
+	int mmio_size;
+
+	mmio_bar = IS_GEN2(dev) ? 1 : 0;
+	/*
+	 * Before gen4, the registers and the GTT are behind different BARs.
+	 * However, from gen4 onwards, the registers and the GTT are shared
+	 * in the same BAR, so we want to restrict this ioremap from
+	 * clobbering the GTT which we want ioremap_wc instead. Fortunately,
+	 * the register BAR remains the same size for all the earlier
+	 * generations up to Ironlake.
+	 */
+	if (INTEL_INFO(dev)->gen < 5)
+		mmio_size = 512 * 1024;
+	else
+		mmio_size = 2 * 1024 * 1024;
+	dev_priv->regs = pci_iomap(dev->pdev, mmio_bar, mmio_size);
+	if (dev_priv->regs == NULL) {
+		DRM_ERROR("failed to map registers\n");
+
+		return -EIO;
+	}
+
+	/* Try to make sure MCHBAR is enabled before poking at it */
+	intel_setup_mchbar(dev);
+
+	return 0;
+}
+
+static void i915_mmio_cleanup(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+
+	intel_teardown_mchbar(dev);
+	pci_iounmap(dev->pdev, dev_priv->regs);
+}
+
 /**
  * i915_driver_load - setup chip and create an initial config
  * @dev: DRM device
@@ -870,7 +984,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 {
 	struct drm_i915_private *dev_priv;
 	struct intel_device_info *info, *device_info;
-	int ret = 0, mmio_bar, mmio_size;
+	int ret = 0;
 	uint32_t aperture_size;
 
 	info = (struct intel_device_info *) flags;
@@ -897,6 +1011,10 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	mutex_init(&dev_priv->modeset_restore_lock);
 	mutex_init(&dev_priv->av_mutex);
 
+	ret = i915_workqueues_init(dev_priv);
+	if (ret < 0)
+		goto out_free_priv;
+
 	intel_pm_setup(dev);
 
 	intel_runtime_pm_get(dev_priv);
@@ -915,28 +1033,12 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	if (i915_get_bridge_dev(dev)) {
 		ret = -EIO;
-		goto free_priv;
+		goto out_runtime_pm_put;
 	}
 
-	mmio_bar = IS_GEN2(dev) ? 1 : 0;
-	/* Before gen4, the registers and the GTT are behind different BARs.
-	 * However, from gen4 onwards, the registers and the GTT are shared
-	 * in the same BAR, so we want to restrict this ioremap from
-	 * clobbering the GTT which we want ioremap_wc instead. Fortunately,
-	 * the register BAR remains the same size for all the earlier
-	 * generations up to Ironlake.
-	 */
-	if (info->gen < 5)
-		mmio_size = 512*1024;
-	else
-		mmio_size = 2*1024*1024;
-
-	dev_priv->regs = pci_iomap(dev->pdev, mmio_bar, mmio_size);
-	if (!dev_priv->regs) {
-		DRM_ERROR("failed to map registers\n");
-		ret = -EIO;
+	ret = i915_mmio_setup(dev);
+	if (ret < 0)
 		goto put_bridge;
-	}
 
 	/* This must be called before any calls to HAS_PCH_* */
 	intel_detect_pch(dev);
@@ -945,7 +1047,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	ret = i915_gem_gtt_init(dev);
 	if (ret)
-		goto out_freecsr;
+		goto out_uncore_fini;
 
 	/* WARNING: Apparently we must kick fbdev drivers before vgacon,
 	 * otherwise the vga fbdev driver falls over. */
@@ -991,49 +1093,13 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	dev_priv->gtt.mtrr = arch_phys_wc_add(dev_priv->gtt.mappable_base,
 					      aperture_size);
 
-	/* The i915 workqueue is primarily used for batched retirement of
-	 * requests (and thus managing bo) once the task has been completed
-	 * by the GPU. i915_gem_retire_requests() is called directly when we
-	 * need high-priority retirement, such as waiting for an explicit
-	 * bo.
-	 *
-	 * It is also used for periodic low-priority events, such as
-	 * idle-timers and recording error state.
-	 *
-	 * All tasks on the workqueue are expected to acquire the dev mutex
-	 * so there is no point in running more than one instance of the
-	 * workqueue at any time.  Use an ordered one.
-	 */
-	dev_priv->wq = alloc_ordered_workqueue("i915", 0);
-	if (dev_priv->wq == NULL) {
-		DRM_ERROR("Failed to create our workqueue.\n");
-		ret = -ENOMEM;
-		goto out_mtrrfree;
-	}
-
-	dev_priv->hotplug.dp_wq = alloc_ordered_workqueue("i915-dp", 0);
-	if (dev_priv->hotplug.dp_wq == NULL) {
-		DRM_ERROR("Failed to create our dp workqueue.\n");
-		ret = -ENOMEM;
-		goto out_freewq;
-	}
-
-	dev_priv->gpu_error.hangcheck_wq =
-		alloc_ordered_workqueue("i915-hangcheck", 0);
-	if (dev_priv->gpu_error.hangcheck_wq == NULL) {
-		DRM_ERROR("Failed to create our hangcheck workqueue.\n");
-		ret = -ENOMEM;
-		goto out_freedpwq;
-	}
-
 	intel_irq_init(dev_priv);
 	intel_uncore_sanitize(dev);
 
-	/* Try to make sure MCHBAR is enabled before poking at it */
-	intel_setup_mchbar(dev);
 	intel_opregion_setup(dev);
 
-	i915_gem_load(dev);
+	i915_gem_load_init(dev);
+	i915_gem_shrinker_init(dev_priv);
 
 	/* On the 945G/GM, the chipset reports the MSI capability on the
 	 * integrated graphics even though the support isn't actually there
@@ -1046,8 +1112,10 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	 * be lost or delayed, but we use them anyways to avoid
 	 * stuck interrupts on some machines.
 	 */
-	if (!IS_I945G(dev) && !IS_I945GM(dev))
-		pci_enable_msi(dev->pdev);
+	if (!IS_I945G(dev) && !IS_I945GM(dev)) {
+		if (pci_enable_msi(dev->pdev) < 0)
+			DRM_DEBUG_DRIVER("can't enable MSI");
+	}
 
 	intel_device_info_runtime_init(dev);
 
@@ -1097,38 +1165,29 @@ out_power_well:
 	intel_power_domains_fini(dev_priv);
 	drm_vblank_cleanup(dev);
 out_gem_unload:
-	WARN_ON(unregister_oom_notifier(&dev_priv->mm.oom_notifier));
-	unregister_shrinker(&dev_priv->mm.shrinker);
+	i915_gem_shrinker_cleanup(dev_priv);
 
 	if (dev->pdev->msi_enabled)
 		pci_disable_msi(dev->pdev);
 
 	intel_teardown_mchbar(dev);
 	pm_qos_remove_request(&dev_priv->pm_qos);
-	destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
-out_freedpwq:
-	destroy_workqueue(dev_priv->hotplug.dp_wq);
-out_freewq:
-	destroy_workqueue(dev_priv->wq);
-out_mtrrfree:
 	arch_phys_wc_del(dev_priv->gtt.mtrr);
 	io_mapping_free(dev_priv->gtt.mappable);
 out_gtt:
 	i915_global_gtt_cleanup(dev);
-out_freecsr:
-	intel_csr_ucode_fini(dev_priv);
+out_uncore_fini:
 	intel_uncore_fini(dev);
-	pci_iounmap(dev->pdev, dev_priv->regs);
+	i915_mmio_cleanup(dev);
 put_bridge:
 	pci_dev_put(dev_priv->bridge_dev);
-free_priv:
-	kmem_cache_destroy(dev_priv->requests);
-	kmem_cache_destroy(dev_priv->vmas);
-	kmem_cache_destroy(dev_priv->objects);
-
+	i915_gem_load_cleanup(dev);
+out_runtime_pm_put:
 	intel_runtime_pm_put(dev_priv);
-
+	i915_workqueues_cleanup(dev_priv);
+out_free_priv:
 	kfree(dev_priv);
+
 	return ret;
 }
 
@@ -1153,8 +1212,7 @@ int i915_driver_unload(struct drm_device *dev)
 
 	i915_teardown_sysfs(dev);
 
-	WARN_ON(unregister_oom_notifier(&dev_priv->mm.oom_notifier));
-	unregister_shrinker(&dev_priv->mm.shrinker);
+	i915_gem_shrinker_cleanup(dev_priv);
 
 	io_mapping_free(dev_priv->gtt.mappable);
 	arch_phys_wc_del(dev_priv->gtt.mtrr);
@@ -1182,6 +1240,8 @@ int i915_driver_unload(struct drm_device *dev)
 	vga_switcheroo_unregister_client(dev->pdev);
 	vga_client_register(dev->pdev, NULL, NULL, NULL);
 
+	intel_csr_ucode_fini(dev_priv);
+
 	/* Free error state after interrupts are fully disabled. */
 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	i915_destroy_error_state(dev);
@@ -1200,27 +1260,17 @@ int i915_driver_unload(struct drm_device *dev)
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
 	intel_fbc_cleanup_cfb(dev_priv);
-	i915_gem_cleanup_stolen(dev);
 
-	intel_csr_ucode_fini(dev_priv);
-
-	intel_teardown_mchbar(dev);
-
-	destroy_workqueue(dev_priv->hotplug.dp_wq);
-	destroy_workqueue(dev_priv->wq);
-	destroy_workqueue(dev_priv->gpu_error.hangcheck_wq);
 	pm_qos_remove_request(&dev_priv->pm_qos);
 
 	i915_global_gtt_cleanup(dev);
 
 	intel_uncore_fini(dev);
-	if (dev_priv->regs != NULL)
-		pci_iounmap(dev->pdev, dev_priv->regs);
+	i915_mmio_cleanup(dev);
 
-	kmem_cache_destroy(dev_priv->requests);
-	kmem_cache_destroy(dev_priv->vmas);
-	kmem_cache_destroy(dev_priv->objects);
+	i915_gem_load_cleanup(dev);
 	pci_dev_put(dev_priv->bridge_dev);
+	i915_workqueues_cleanup(dev_priv);
 	kfree(dev_priv);
 
 	return 0;
@@ -1261,8 +1311,6 @@ void i915_driver_preclose(struct drm_device *dev, struct drm_file *file)
 	i915_gem_context_close(dev, file);
 	i915_gem_release(dev, file);
 	mutex_unlock(&dev->struct_mutex);
-
-	intel_modeset_preclose(dev, file);
 }
 
 void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f357058c74d9..20e82008b8b6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -35,9 +35,12 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+#include <linux/apple-gmux.h>
 #include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
+#include <linux/vgaarb.h>
+#include <linux/vga_switcheroo.h>
 #include <drm/drm_crtc_helper.h>
 
 static struct drm_driver driver;
@@ -600,13 +603,7 @@ static int i915_drm_suspend(struct drm_device *dev)
 
 	intel_suspend_gt_powersave(dev);
 
-	/*
-	 * Disable CRTCs directly since we want to preserve sw state
-	 * for _thaw. Also, power gate the CRTC power wells.
-	 */
-	drm_modeset_lock_all(dev);
 	intel_display_suspend(dev);
-	drm_modeset_unlock_all(dev);
 
 	intel_dp_mst_suspend(dev);
 
@@ -761,9 +758,7 @@ static int i915_drm_resume(struct drm_device *dev)
 		dev_priv->display.hpd_irq_setup(dev);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
-	drm_modeset_lock_all(dev);
 	intel_display_resume(dev);
-	drm_modeset_unlock_all(dev);
 
 	intel_dp_mst_resume(dev);
 
@@ -969,6 +964,15 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (PCI_FUNC(pdev->devfn))
 		return -ENODEV;
 
+	/*
+	 * apple-gmux is needed on dual GPU MacBook Pro
+	 * to probe the panel if we're the inactive GPU.
+	 */
+	if (IS_ENABLED(CONFIG_VGA_ARB) && IS_ENABLED(CONFIG_VGA_SWITCHEROO) &&
+	    apple_gmux_present() && pdev != vga_default_device() &&
+	    !vga_switcheroo_handler_flags())
+		return -EPROBE_DEFER;
+
 	return drm_get_pci_dev(pdev, ent, &driver);
 }
 
@@ -1079,7 +1083,6 @@ static int bxt_resume_prepare(struct drm_i915_private *dev_priv)
 	 */
 	broxton_init_cdclk(dev);
 	broxton_ddi_phy_init(dev);
-	intel_prepare_ddi(dev);
 
 	return 0;
 }
@@ -1338,8 +1341,8 @@ static int vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv,
 		return 0;
 
 	DRM_DEBUG_KMS("waiting for GT wells to go %s (%08x)\n",
-			wait_for_on ? "on" : "off",
-			I915_READ(VLV_GTLC_PW_STATUS));
+		      onoff(wait_for_on),
+		      I915_READ(VLV_GTLC_PW_STATUS));
 
 	/*
 	 * RC6 transitioning can be delayed up to 2 msec (see
@@ -1348,7 +1351,7 @@ static int vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv,
 	err = wait_for(COND, 3);
 	if (err)
 		DRM_ERROR("timeout waiting for GT wells to go %s\n",
-			  wait_for_on ? "on" : "off");
+			  onoff(wait_for_on));
 
 	return err;
 #undef COND
@@ -1359,7 +1362,7 @@ static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv)
 	if (!(I915_READ(VLV_GTLC_PW_STATUS) & VLV_GTLC_ALLOWWAKEERR))
 		return;
 
-	DRM_ERROR("GT register access while GT waking disabled\n");
+	DRM_DEBUG_DRIVER("GT register access while GT waking disabled\n");
 	I915_WRITE(VLV_GTLC_PW_STATUS, VLV_GTLC_ALLOWWAKEERR);
 }
 
@@ -1503,6 +1506,10 @@ static int intel_runtime_suspend(struct device *device)
 
 	enable_rpm_wakeref_asserts(dev_priv);
 	WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count));
+
+	if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv))
+		DRM_ERROR("Unclaimed access detected prior to suspending\n");
+
 	dev_priv->pm.suspended = true;
 
 	/*
@@ -1551,6 +1558,8 @@ static int intel_runtime_resume(struct device *device)
 
 	intel_opregion_notify_adapter(dev, PCI_D0);
 	dev_priv->pm.suspended = false;
+	if (intel_uncore_unclaimed_mmio(dev_priv))
+		DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n");
 
 	intel_guc_resume(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b0847b915545..10480939159c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -34,6 +34,7 @@
 #include <uapi/drm/drm_fourcc.h>
 
 #include <drm/drmP.h>
+#include "i915_params.h"
 #include "i915_reg.h"
 #include "intel_bios.h"
 #include "intel_ringbuffer.h"
@@ -58,7 +59,7 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20151218"
+#define DRIVER_DATE		"20160229"
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -69,11 +70,11 @@
 		BUILD_BUG_ON(__i915_warn_cond); \
 	WARN(__i915_warn_cond, "WARN_ON(" #x ")"); })
 #else
-#define WARN_ON(x) WARN((x), "WARN_ON(%s)", #x )
+#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")")
 #endif
 
 #undef WARN_ON_ONCE
-#define WARN_ON_ONCE(x) WARN_ONCE((x), "WARN_ON_ONCE(%s)", #x )
+#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")")
 
 #define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \
 			     (long) (x), __func__);
@@ -87,31 +88,25 @@
  */
 #define I915_STATE_WARN(condition, format...) ({			\
 	int __ret_warn_on = !!(condition);				\
-	if (unlikely(__ret_warn_on)) {					\
-		if (i915.verbose_state_checks)				\
-			WARN(1, format);				\
-		else 							\
+	if (unlikely(__ret_warn_on))					\
+		if (!WARN(i915.verbose_state_checks, format))		\
 			DRM_ERROR(format);				\
-	}								\
 	unlikely(__ret_warn_on);					\
 })
 
-#define I915_STATE_WARN_ON(condition) ({				\
-	int __ret_warn_on = !!(condition);				\
-	if (unlikely(__ret_warn_on)) {					\
-		if (i915.verbose_state_checks)				\
-			WARN(1, "WARN_ON(" #condition ")\n");		\
-		else 							\
-			DRM_ERROR("WARN_ON(" #condition ")\n");		\
-	}								\
-	unlikely(__ret_warn_on);					\
-})
+#define I915_STATE_WARN_ON(x)						\
+	I915_STATE_WARN((x), "%s", "WARN_ON(" __stringify(x) ")")
 
 static inline const char *yesno(bool v)
 {
 	return v ? "yes" : "no";
 }
 
+static inline const char *onoff(bool v)
+{
+	return v ? "on" : "off";
+}
+
 enum pipe {
 	INVALID_PIPE = -1,
 	PIPE_A = 0,
@@ -266,6 +261,9 @@ struct i915_hotplug {
 
 #define for_each_pipe(__dev_priv, __p) \
 	for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++)
+#define for_each_pipe_masked(__dev_priv, __p, __mask) \
+	for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \
+		for_each_if ((__mask) & (1 << (__p)))
 #define for_each_plane(__dev_priv, __pipe, __p)				\
 	for ((__p) = 0;							\
 	     (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1;	\
@@ -339,7 +337,7 @@ struct drm_i915_file_private {
 		unsigned boosts;
 	} rps;
 
-	struct intel_engine_cs *bsd_ring;
+	unsigned int bsd_ring;
 };
 
 enum intel_dpll_id {
@@ -633,6 +631,7 @@ struct drm_i915_display_funcs {
 			  struct dpll *best_clock);
 	int (*compute_pipe_wm)(struct intel_crtc *crtc,
 			       struct drm_atomic_state *state);
+	void (*program_watermarks)(struct intel_crtc_state *cstate);
 	void (*update_wm)(struct drm_crtc *crtc);
 	int (*modeset_calc_cdclk)(struct drm_atomic_state *state);
 	void (*modeset_commit_cdclk)(struct drm_atomic_state *state);
@@ -657,9 +656,6 @@ struct drm_i915_display_funcs {
 			  struct drm_i915_gem_object *obj,
 			  struct drm_i915_gem_request *req,
 			  uint32_t flags);
-	void (*update_primary_plane)(struct drm_crtc *crtc,
-				     struct drm_framebuffer *fb,
-				     int x, int y);
 	void (*hpd_irq_setup)(struct drm_device *dev);
 	/* clock updates for mode set */
 	/* cursor updates */
@@ -726,6 +722,8 @@ struct intel_uncore {
 		i915_reg_t reg_post;
 		u32 val_reset;
 	} fw_domain[FW_DOMAIN_ID_COUNT];
+
+	int unclaimed_mmio_check;
 };
 
 /* Iterate over initialised fw domains */
@@ -890,6 +888,9 @@ struct intel_context {
 		struct drm_i915_gem_object *state;
 		struct intel_ringbuffer *ringbuf;
 		int pin_count;
+		struct i915_vma *lrc_vma;
+		u64 lrc_desc;
+		uint32_t *lrc_reg_state;
 	} engine[I915_NUM_RINGS];
 
 	struct list_head link;
@@ -903,16 +904,15 @@ enum fb_op_origin {
 	ORIGIN_DIRTYFB,
 };
 
-struct i915_fbc {
+struct intel_fbc {
 	/* This is always the inner lock when overlapping with struct_mutex and
 	 * it's the outer lock when overlapping with stolen_lock. */
 	struct mutex lock;
 	unsigned threshold;
-	unsigned int fb_id;
 	unsigned int possible_framebuffer_bits;
 	unsigned int busy_bits;
+	unsigned int visible_pipes_mask;
 	struct intel_crtc *crtc;
-	int y;
 
 	struct drm_mm_node compressed_fb;
 	struct drm_mm_node *compressed_llb;
@@ -922,18 +922,52 @@ struct i915_fbc {
 	bool enabled;
 	bool active;
 
+	struct intel_fbc_state_cache {
+		struct {
+			unsigned int mode_flags;
+			uint32_t hsw_bdw_pixel_rate;
+		} crtc;
+
+		struct {
+			unsigned int rotation;
+			int src_w;
+			int src_h;
+			bool visible;
+		} plane;
+
+		struct {
+			u64 ilk_ggtt_offset;
+			uint32_t pixel_format;
+			unsigned int stride;
+			int fence_reg;
+			unsigned int tiling_mode;
+		} fb;
+	} state_cache;
+
+	struct intel_fbc_reg_params {
+		struct {
+			enum pipe pipe;
+			enum plane plane;
+			unsigned int fence_y_offset;
+		} crtc;
+
+		struct {
+			u64 ggtt_offset;
+			uint32_t pixel_format;
+			unsigned int stride;
+			int fence_reg;
+		} fb;
+
+		int cfb_size;
+	} params;
+
 	struct intel_fbc_work {
 		bool scheduled;
+		u32 scheduled_vblank;
 		struct work_struct work;
-		struct drm_framebuffer *fb;
-		unsigned long enable_jiffies;
 	} work;
 
 	const char *no_fbc_reason;
-
-	bool (*is_active)(struct drm_i915_private *dev_priv);
-	void (*activate)(struct intel_crtc *crtc);
-	void (*deactivate)(struct drm_i915_private *dev_priv);
 };
 
 /**
@@ -973,6 +1007,7 @@ struct i915_psr {
 	unsigned busy_frontbuffer_bits;
 	bool psr2_support;
 	bool aux_frame_sync;
+	bool link_standby;
 };
 
 enum intel_pch {
@@ -1302,7 +1337,7 @@ struct i915_gem_mm {
 	bool busy;
 
 	/* the indicator for dispatch video commands on two BSD rings */
-	int bsd_ring_dispatch_index;
+	unsigned int bsd_ring_dispatch_index;
 
 	/** Bit 6 swizzling required for X tiling */
 	uint32_t bit_6_swizzle_x;
@@ -1488,7 +1523,7 @@ struct intel_vbt_data {
 		u8 seq_version;
 		u32 size;
 		u8 *data;
-		u8 *sequence[MIPI_SEQ_MAX];
+		const u8 *sequence[MIPI_SEQ_MAX];
 	} dsi;
 
 	int crt_ddc_pin;
@@ -1660,11 +1695,18 @@ struct i915_wa_reg {
 	u32 mask;
 };
 
-#define I915_MAX_WA_REGS 16
+/*
+ * RING_MAX_NONPRIV_SLOTS is per-engine but at this point we are only
+ * allowing it for RCS as we don't foresee any requirement of having
+ * a whitelist for other engines. When it is really required for
+ * other engines then the limit need to be increased.
+ */
+#define I915_MAX_WA_REGS (16 + RING_MAX_NONPRIV_SLOTS)
 
 struct i915_workarounds {
 	struct i915_wa_reg reg[I915_MAX_WA_REGS];
 	u32 count;
+	u32 hw_whitelist_count[I915_NUM_RINGS];
 };
 
 struct i915_virtual_gpu {
@@ -1761,7 +1803,7 @@ struct drm_i915_private {
 	u32 pipestat_irq_mask[I915_MAX_PIPES];
 
 	struct i915_hotplug hotplug;
-	struct i915_fbc fbc;
+	struct intel_fbc fbc;
 	struct i915_drrs drrs;
 	struct intel_opregion opregion;
 	struct intel_vbt_data vbt;
@@ -1785,7 +1827,7 @@ struct drm_i915_private {
 
 	unsigned int fsb_freq, mem_freq, is_ddr3;
 	unsigned int skl_boot_cdclk;
-	unsigned int cdclk_freq, max_cdclk_freq;
+	unsigned int cdclk_freq, max_cdclk_freq, atomic_cdclk_freq;
 	unsigned int max_dotclk_freq;
 	unsigned int hpll_freq;
 	unsigned int czclk_freq;
@@ -1810,6 +1852,7 @@ struct drm_i915_private {
 
 	enum modeset_restore modeset_restore;
 	struct mutex modeset_restore_lock;
+	struct drm_atomic_state *modeset_restore_state;
 
 	struct list_head vm_list; /* Global list of all address spaces */
 	struct i915_gtt gtt; /* VM representing the global address space */
@@ -1830,8 +1873,13 @@ struct drm_i915_private {
 	struct intel_pipe_crc pipe_crc[I915_MAX_PIPES];
 #endif
 
+	/* dpll and cdclk state is protected by connection_mutex */
 	int num_shared_dpll;
 	struct intel_shared_dpll shared_dplls[I915_NUM_PLLS];
+
+	unsigned int active_crtcs;
+	unsigned int min_pixclk[I915_MAX_PIPES];
+
 	int dpio_phy_iosf_port[I915_NUM_PHYS_VLV];
 
 	struct i915_workarounds workarounds;
@@ -1946,6 +1994,8 @@ struct drm_i915_private {
 		void (*stop_ring)(struct intel_engine_cs *ring);
 	} gt;
 
+	struct intel_context *kernel_context;
+
 	bool edp_low_vswing;
 
 	/* perform PHY state sanity checks? */
@@ -2270,9 +2320,9 @@ struct drm_i915_gem_request {
 
 };
 
-int i915_gem_request_alloc(struct intel_engine_cs *ring,
-			   struct intel_context *ctx,
-			   struct drm_i915_gem_request **req_out);
+struct drm_i915_gem_request * __must_check
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+		       struct intel_context *ctx);
 void i915_gem_request_cancel(struct drm_i915_gem_request *req);
 void i915_gem_request_free(struct kref *req_ref);
 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
@@ -2581,6 +2631,11 @@ struct drm_i915_cmd_table {
 
 /* Early gen2 have a totally busted CS tlb and require pinned batches. */
 #define HAS_BROKEN_CS_TLB(dev)		(IS_I830(dev) || IS_845G(dev))
+
+/* WaRsDisableCoarsePowerGating:skl,bxt */
+#define NEEDS_WaRsDisableCoarsePowerGating(dev) (IS_BXT_REVID(dev, 0, BXT_REVID_A1) || \
+						 ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && \
+						  IS_SKL_REVID(dev, 0, SKL_REVID_F0)))
 /*
  * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts
  * even when in MSI mode. This results in spurious interrupt warnings if the
@@ -2670,44 +2725,7 @@ extern int i915_max_ioctl;
 extern int i915_suspend_switcheroo(struct drm_device *dev, pm_message_t state);
 extern int i915_resume_switcheroo(struct drm_device *dev);
 
-/* i915_params.c */
-struct i915_params {
-	int modeset;
-	int panel_ignore_lid;
-	int semaphores;
-	int lvds_channel_mode;
-	int panel_use_ssc;
-	int vbt_sdvo_panel_type;
-	int enable_rc6;
-	int enable_dc;
-	int enable_fbc;
-	int enable_ppgtt;
-	int enable_execlists;
-	int enable_psr;
-	unsigned int preliminary_hw_support;
-	int disable_power_well;
-	int enable_ips;
-	int invert_brightness;
-	int enable_cmd_parser;
-	/* leave bools at the end to not create holes */
-	bool enable_hangcheck;
-	bool fastboot;
-	bool prefault_disable;
-	bool load_detect_test;
-	bool reset;
-	bool disable_display;
-	bool disable_vtd_wa;
-	bool enable_guc_submission;
-	int guc_log_level;
-	int use_mmio_flip;
-	int mmio_debug;
-	bool verbose_state_checks;
-	bool nuclear_pageflip;
-	int edp_vswing;
-};
-extern struct i915_params i915 __read_mostly;
-
-				/* i915_dma.c */
+/* i915_dma.c */
 extern int i915_driver_load(struct drm_device *, unsigned long flags);
 extern int i915_driver_unload(struct drm_device *);
 extern int i915_driver_open(struct drm_device *dev, struct drm_file *file);
@@ -2750,7 +2768,8 @@ extern void intel_uncore_sanitize(struct drm_device *dev);
 extern void intel_uncore_early_sanitize(struct drm_device *dev,
 					bool restore_forcewake);
 extern void intel_uncore_init(struct drm_device *dev);
-extern void intel_uncore_check_errors(struct drm_device *dev);
+extern bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv);
+extern bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv);
 extern void intel_uncore_fini(struct drm_device *dev);
 extern void intel_uncore_forcewake_reset(struct drm_device *dev, bool restore);
 const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id);
@@ -2872,7 +2891,8 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
-void i915_gem_load(struct drm_device *dev);
+void i915_gem_load_init(struct drm_device *dev);
+void i915_gem_load_cleanup(struct drm_device *dev);
 void *i915_gem_object_alloc(struct drm_device *dev);
 void i915_gem_object_free(struct drm_i915_gem_object *obj);
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
@@ -3136,18 +3156,11 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj);
 /* Some GGTT VM helpers */
 #define i915_obj_to_ggtt(obj) \
 	(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
-static inline bool i915_is_ggtt(struct i915_address_space *vm)
-{
-	struct i915_address_space *ggtt =
-		&((struct drm_i915_private *)(vm)->dev->dev_private)->gtt.base;
-	return vm == ggtt;
-}
 
 static inline struct i915_hw_ppgtt *
 i915_vm_to_ppgtt(struct i915_address_space *vm)
 {
 	WARN_ON(i915_is_ggtt(vm));
-
 	return container_of(vm, struct i915_hw_ppgtt, base);
 }
 
@@ -3285,6 +3298,7 @@ unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
 #define I915_SHRINK_ACTIVE 0x8
 unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
 void i915_gem_shrinker_init(struct drm_i915_private *dev_priv);
+void i915_gem_shrinker_cleanup(struct drm_i915_private *dev_priv);
 
 
 /* i915_gem_tiling.c */
@@ -3455,16 +3469,14 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val
 u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr);
 void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val);
 u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr);
-u32 vlv_gpio_nc_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_gpio_nc_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
+u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg);
+void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val);
 u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg);
 void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
 u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg);
 void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
 u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg);
 void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-u32 vlv_gps_core_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_gps_core_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
 u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg);
 void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val);
 u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bb44bad15403..3d31d3ac589e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -138,10 +138,10 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 
 	pinned = 0;
 	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry(vma, &ggtt->base.active_list, mm_list)
+	list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
 		if (vma->pin_count)
 			pinned += vma->node.size;
-	list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list)
+	list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
 		if (vma->pin_count)
 			pinned += vma->node.size;
 	mutex_unlock(&dev->struct_mutex);
@@ -272,7 +272,7 @@ drop_pages(struct drm_i915_gem_object *obj)
 	int ret;
 
 	drm_gem_object_reference(&obj->base);
-	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
+	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link)
 		if (i915_vma_unbind(vma))
 			break;
 
@@ -489,7 +489,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 
 	*needs_clflush = 0;
 
-	if (!obj->base.filp)
+	if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0))
 		return -EINVAL;
 
 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
@@ -1251,7 +1251,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	DEFINE_WAIT(wait);
 	unsigned long timeout_expire;
-	s64 before, now;
+	s64 before = 0; /* Only to silence a compiler warning. */
 	int ret;
 
 	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
@@ -1271,14 +1271,17 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 			return -ETIME;
 
 		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
+
+		/*
+		 * Record current time in case interrupted by signal, or wedged.
+		 */
+		before = ktime_get_raw_ns();
 	}
 
 	if (INTEL_INFO(dev_priv)->gen >= 6)
 		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
 
-	/* Record current time in case interrupted by signal, or wedged */
 	trace_i915_gem_request_wait_begin(req);
-	before = ktime_get_raw_ns();
 
 	/* Optimistic spin for the next jiffie before touching IRQs */
 	ret = __i915_spin_request(req, state);
@@ -1343,11 +1346,10 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 	finish_wait(&ring->irq_queue, &wait);
 
 out:
-	now = ktime_get_raw_ns();
 	trace_i915_gem_request_wait_end(req);
 
 	if (timeout) {
-		s64 tres = *timeout - (now - before);
+		s64 tres = *timeout - (ktime_get_raw_ns() - before);
 
 		*timeout = tres < 0 ? 0 : tres;
 
@@ -2414,7 +2416,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 	list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
 	i915_gem_request_assign(&obj->last_read_req[ring->id], req);
 
-	list_move_tail(&vma->mm_list, &vma->vm->active_list);
+	list_move_tail(&vma->vm_link, &vma->vm->active_list);
 }
 
 static void
@@ -2452,9 +2454,9 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
 	list_move_tail(&obj->global_list,
 		       &to_i915(obj->base.dev)->mm.bound_list);
 
-	list_for_each_entry(vma, &obj->vma_list, vma_link) {
-		if (!list_empty(&vma->mm_list))
-			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		if (!list_empty(&vma->vm_link))
+			list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 	}
 
 	i915_gem_request_assign(&obj->last_fenced_req, NULL);
@@ -2677,10 +2679,8 @@ void i915_gem_request_free(struct kref *req_ref)
 		i915_gem_request_remove_from_client(req);
 
 	if (ctx) {
-		if (i915.enable_execlists) {
-			if (ctx != req->ring->default_context)
-				intel_lr_context_unpin(req);
-		}
+		if (i915.enable_execlists && ctx != req->i915->kernel_context)
+			intel_lr_context_unpin(ctx, req->ring);
 
 		i915_gem_context_unreference(ctx);
 	}
@@ -2688,9 +2688,10 @@ void i915_gem_request_free(struct kref *req_ref)
 	kmem_cache_free(req->i915->requests, req);
 }
 
-int i915_gem_request_alloc(struct intel_engine_cs *ring,
-			   struct intel_context *ctx,
-			   struct drm_i915_gem_request **req_out)
+static inline int
+__i915_gem_request_alloc(struct intel_engine_cs *ring,
+			 struct intel_context *ctx,
+			 struct drm_i915_gem_request **req_out)
 {
 	struct drm_i915_private *dev_priv = to_i915(ring->dev);
 	struct drm_i915_gem_request *req;
@@ -2753,6 +2754,31 @@ err:
 	return ret;
 }
 
+/**
+ * i915_gem_request_alloc - allocate a request structure
+ *
+ * @engine: engine that we wish to issue the request on.
+ * @ctx: context that the request will be associated with.
+ *       This can be NULL if the request is not directly related to
+ *       any specific user context, in which case this function will
+ *       choose an appropriate context to use.
+ *
+ * Returns a pointer to the allocated request if successful,
+ * or an error code if not.
+ */
+struct drm_i915_gem_request *
+i915_gem_request_alloc(struct intel_engine_cs *engine,
+		       struct intel_context *ctx)
+{
+	struct drm_i915_gem_request *req;
+	int err;
+
+	if (ctx == NULL)
+		ctx = to_i915(engine->dev)->kernel_context;
+	err = __i915_gem_request_alloc(engine, ctx, &req);
+	return err ? ERR_PTR(err) : req;
+}
+
 void i915_gem_request_cancel(struct drm_i915_gem_request *req)
 {
 	intel_ring_reserved_space_cancel(req->ringbuf);
@@ -2944,11 +2970,9 @@ i915_gem_retire_requests(struct drm_device *dev)
 		i915_gem_retire_requests_ring(ring);
 		idle &= list_empty(&ring->request_list);
 		if (i915.enable_execlists) {
-			unsigned long flags;
-
-			spin_lock_irqsave(&ring->execlist_lock, flags);
+			spin_lock_irq(&ring->execlist_lock);
 			idle &= list_empty(&ring->execlist_queue);
-			spin_unlock_irqrestore(&ring->execlist_lock, flags);
+			spin_unlock_irq(&ring->execlist_lock);
 
 			intel_execlists_retire_requests(ring);
 		}
@@ -3170,9 +3194,13 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 			return 0;
 
 		if (*to_req == NULL) {
-			ret = i915_gem_request_alloc(to, to->default_context, to_req);
-			if (ret)
-				return ret;
+			struct drm_i915_gem_request *req;
+
+			req = i915_gem_request_alloc(to, NULL);
+			if (IS_ERR(req))
+				return PTR_ERR(req);
+
+			*to_req = req;
 		}
 
 		trace_i915_gem_ring_sync_to(*to_req, from, from_req);
@@ -3289,7 +3317,7 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 	int ret;
 
-	if (list_empty(&vma->vma_link))
+	if (list_empty(&vma->obj_link))
 		return 0;
 
 	if (!drm_mm_node_allocated(&vma->node)) {
@@ -3308,8 +3336,7 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 			return ret;
 	}
 
-	if (i915_is_ggtt(vma->vm) &&
-	    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
+	if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
 		i915_gem_object_finish_gtt(obj);
 
 		/* release the fence reg _after_ flushing */
@@ -3323,8 +3350,8 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 	vma->vm->unbind_vma(vma);
 	vma->bound = 0;
 
-	list_del_init(&vma->mm_list);
-	if (i915_is_ggtt(vma->vm)) {
+	list_del_init(&vma->vm_link);
+	if (vma->is_ggtt) {
 		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
 			obj->map_and_fenceable = false;
 		} else if (vma->ggtt_view.pages) {
@@ -3372,9 +3399,9 @@ int i915_gpu_idle(struct drm_device *dev)
 		if (!i915.enable_execlists) {
 			struct drm_i915_gem_request *req;
 
-			ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-			if (ret)
-				return ret;
+			req = i915_gem_request_alloc(ring, NULL);
+			if (IS_ERR(req))
+				return PTR_ERR(req);
 
 			ret = i915_switch_context(req);
 			if (ret) {
@@ -3581,7 +3608,7 @@ search_free:
 		goto err_remove_node;
 
 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
-	list_add_tail(&vma->mm_list, &vm->inactive_list);
+	list_add_tail(&vma->vm_link, &vm->inactive_list);
 
 	return vma;
 
@@ -3746,7 +3773,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	/* And bump the LRU for this access */
 	vma = i915_gem_obj_to_ggtt(obj);
 	if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
-		list_move_tail(&vma->mm_list,
+		list_move_tail(&vma->vm_link,
 			       &to_i915(obj->base.dev)->gtt.base.inactive_list);
 
 	return 0;
@@ -3781,7 +3808,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	 * catch the issue of the CS prefetch crossing page boundaries and
 	 * reading an invalid PTE on older architectures.
 	 */
-	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
+	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
@@ -3844,7 +3871,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 			 */
 		}
 
-		list_for_each_entry(vma, &obj->vma_list, vma_link) {
+		list_for_each_entry(vma, &obj->vma_list, obj_link) {
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
@@ -3854,7 +3881,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 		}
 	}
 
-	list_for_each_entry(vma, &obj->vma_list, vma_link)
+	list_for_each_entry(vma, &obj->vma_list, obj_link)
 		vma->node.color = cache_level;
 	obj->cache_level = cache_level;
 
@@ -4328,10 +4355,20 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto unref;
 
-	BUILD_BUG_ON(I915_NUM_RINGS > 16);
-	args->busy = obj->active << 16;
-	if (obj->last_write_req)
-		args->busy |= obj->last_write_req->ring->id;
+	args->busy = 0;
+	if (obj->active) {
+		int i;
+
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			struct drm_i915_gem_request *req;
+
+			req = obj->last_read_req[i];
+			if (req)
+				args->busy |= 1 << (16 + req->ring->exec_id);
+		}
+		if (obj->last_write_req)
+			args->busy |= obj->last_write_req->ring->exec_id;
+	}
 
 unref:
 	drm_gem_object_unreference(&obj->base);
@@ -4518,7 +4555,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 
 	trace_i915_gem_object_destroy(obj);
 
-	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
+	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
 		int ret;
 
 		vma->pin_count = 0;
@@ -4575,7 +4612,7 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 				     struct i915_address_space *vm)
 {
 	struct i915_vma *vma;
-	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
 		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
 		    vma->vm == vm)
 			return vma;
@@ -4592,7 +4629,7 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
 	if (WARN_ONCE(!view, "no view specified"))
 		return ERR_PTR(-EINVAL);
 
-	list_for_each_entry(vma, &obj->vma_list, vma_link)
+	list_for_each_entry(vma, &obj->vma_list, obj_link)
 		if (vma->vm == ggtt &&
 		    i915_ggtt_view_equal(&vma->ggtt_view, view))
 			return vma;
@@ -4601,19 +4638,16 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
 
 void i915_gem_vma_destroy(struct i915_vma *vma)
 {
-	struct i915_address_space *vm = NULL;
 	WARN_ON(vma->node.allocated);
 
 	/* Keep the vma as a placeholder in the execbuffer reservation lists */
 	if (!list_empty(&vma->exec_list))
 		return;
 
-	vm = vma->vm;
-
-	if (!i915_is_ggtt(vm))
-		i915_ppgtt_put(i915_vm_to_ppgtt(vm));
+	if (!vma->is_ggtt)
+		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
 
-	list_del(&vma->vma_link);
+	list_del(&vma->obj_link);
 
 	kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
 }
@@ -4833,7 +4867,7 @@ i915_gem_init_hw(struct drm_device *dev)
 	 */
 	init_unused_rings(dev);
 
-	BUG_ON(!dev_priv->ring[RCS].default_context);
+	BUG_ON(!dev_priv->kernel_context);
 
 	ret = i915_ppgtt_init_hw(dev);
 	if (ret) {
@@ -4870,10 +4904,9 @@ i915_gem_init_hw(struct drm_device *dev)
 	for_each_ring(ring, dev_priv, i) {
 		struct drm_i915_gem_request *req;
 
-		WARN_ON(!ring->default_context);
-
-		ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-		if (ret) {
+		req = i915_gem_request_alloc(ring, NULL);
+		if (IS_ERR(req)) {
+			ret = PTR_ERR(req);
 			i915_gem_cleanup_ringbuffer(dev);
 			goto out;
 		}
@@ -4996,7 +5029,7 @@ init_ring_lists(struct intel_engine_cs *ring)
 }
 
 void
-i915_gem_load(struct drm_device *dev)
+i915_gem_load_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int i;
@@ -5062,11 +5095,18 @@ i915_gem_load(struct drm_device *dev)
 
 	dev_priv->mm.interruptible = true;
 
-	i915_gem_shrinker_init(dev_priv);
-
 	mutex_init(&dev_priv->fb_tracking.lock);
 }
 
+void i915_gem_load_cleanup(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+
+	kmem_cache_destroy(dev_priv->requests);
+	kmem_cache_destroy(dev_priv->vmas);
+	kmem_cache_destroy(dev_priv->objects);
+}
+
 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
@@ -5113,6 +5153,8 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
 	spin_lock_init(&file_priv->mm.lock);
 	INIT_LIST_HEAD(&file_priv->mm.request_list);
 
+	file_priv->bsd_ring = -1;
+
 	ret = i915_gem_context_open(dev, file);
 	if (ret)
 		kfree(file_priv);
@@ -5155,8 +5197,8 @@ u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
 
 	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
 
-	list_for_each_entry(vma, &o->vma_list, vma_link) {
-		if (i915_is_ggtt(vma->vm) &&
+	list_for_each_entry(vma, &o->vma_list, obj_link) {
+		if (vma->is_ggtt &&
 		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
 			continue;
 		if (vma->vm == vm)
@@ -5174,7 +5216,7 @@ u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
 	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
 	struct i915_vma *vma;
 
-	list_for_each_entry(vma, &o->vma_list, vma_link)
+	list_for_each_entry(vma, &o->vma_list, obj_link)
 		if (vma->vm == ggtt &&
 		    i915_ggtt_view_equal(&vma->ggtt_view, view))
 			return vma->node.start;
@@ -5188,8 +5230,8 @@ bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
 {
 	struct i915_vma *vma;
 
-	list_for_each_entry(vma, &o->vma_list, vma_link) {
-		if (i915_is_ggtt(vma->vm) &&
+	list_for_each_entry(vma, &o->vma_list, obj_link) {
+		if (vma->is_ggtt &&
 		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
 			continue;
 		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
@@ -5205,7 +5247,7 @@ bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
 	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
 	struct i915_vma *vma;
 
-	list_for_each_entry(vma, &o->vma_list, vma_link)
+	list_for_each_entry(vma, &o->vma_list, obj_link)
 		if (vma->vm == ggtt &&
 		    i915_ggtt_view_equal(&vma->ggtt_view, view) &&
 		    drm_mm_node_allocated(&vma->node))
@@ -5218,7 +5260,7 @@ bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
 {
 	struct i915_vma *vma;
 
-	list_for_each_entry(vma, &o->vma_list, vma_link)
+	list_for_each_entry(vma, &o->vma_list, obj_link)
 		if (drm_mm_node_allocated(&vma->node))
 			return true;
 
@@ -5235,8 +5277,8 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
 
 	BUG_ON(list_empty(&o->vma_list));
 
-	list_for_each_entry(vma, &o->vma_list, vma_link) {
-		if (i915_is_ggtt(vma->vm) &&
+	list_for_each_entry(vma, &o->vma_list, obj_link) {
+		if (vma->is_ggtt &&
 		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
 			continue;
 		if (vma->vm == vm)
@@ -5248,7 +5290,7 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
 bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
-	list_for_each_entry(vma, &obj->vma_list, vma_link)
+	list_for_each_entry(vma, &obj->vma_list, obj_link)
 		if (vma->pin_count > 0)
 			return true;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index c25083c78ba7..5dd84e148bba 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -142,7 +142,7 @@ static void i915_gem_context_clean(struct intel_context *ctx)
 		return;
 
 	list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list,
-				 mm_list) {
+				 vm_link) {
 		if (WARN_ON(__i915_vma_unbind_no_wait(vma)))
 			break;
 	}
@@ -321,6 +321,18 @@ err_destroy:
 	return ERR_PTR(ret);
 }
 
+static void i915_gem_context_unpin(struct intel_context *ctx,
+				   struct intel_engine_cs *engine)
+{
+	if (i915.enable_execlists) {
+		intel_lr_context_unpin(ctx, engine);
+	} else {
+		if (engine->id == RCS && ctx->legacy_hw_ctx.rcs_state)
+			i915_gem_object_ggtt_unpin(ctx->legacy_hw_ctx.rcs_state);
+		i915_gem_context_unreference(ctx);
+	}
+}
+
 void i915_gem_context_reset(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -329,40 +341,31 @@ void i915_gem_context_reset(struct drm_device *dev)
 	if (i915.enable_execlists) {
 		struct intel_context *ctx;
 
-		list_for_each_entry(ctx, &dev_priv->context_list, link) {
+		list_for_each_entry(ctx, &dev_priv->context_list, link)
 			intel_lr_context_reset(dev, ctx);
-		}
-
-		return;
 	}
 
 	for (i = 0; i < I915_NUM_RINGS; i++) {
 		struct intel_engine_cs *ring = &dev_priv->ring[i];
-		struct intel_context *lctx = ring->last_context;
-
-		if (lctx) {
-			if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
-				i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
 
-			i915_gem_context_unreference(lctx);
+		if (ring->last_context) {
+			i915_gem_context_unpin(ring->last_context, ring);
 			ring->last_context = NULL;
 		}
-
-		/* Force the GPU state to be reinitialised on enabling */
-		if (ring->default_context)
-			ring->default_context->legacy_hw_ctx.initialized = false;
 	}
+
+	/* Force the GPU state to be reinitialised on enabling */
+	dev_priv->kernel_context->legacy_hw_ctx.initialized = false;
 }
 
 int i915_gem_context_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_context *ctx;
-	int i;
 
 	/* Init should only be called once per module load. Eventually the
 	 * restriction on the context_disabled check can be loosened. */
-	if (WARN_ON(dev_priv->ring[RCS].default_context))
+	if (WARN_ON(dev_priv->kernel_context))
 		return 0;
 
 	if (intel_vgpu_active(dev) && HAS_LOGICAL_RING_CONTEXTS(dev)) {
@@ -392,12 +395,7 @@ int i915_gem_context_init(struct drm_device *dev)
 		return PTR_ERR(ctx);
 	}
 
-	for (i = 0; i < I915_NUM_RINGS; i++) {
-		struct intel_engine_cs *ring = &dev_priv->ring[i];
-
-		/* NB: RCS will hold a ref for all rings */
-		ring->default_context = ctx;
-	}
+	dev_priv->kernel_context = ctx;
 
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
 			i915.enable_execlists ? "LR" :
@@ -408,7 +406,7 @@ int i915_gem_context_init(struct drm_device *dev)
 void i915_gem_context_fini(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_context *dctx = dev_priv->ring[RCS].default_context;
+	struct intel_context *dctx = dev_priv->kernel_context;
 	int i;
 
 	if (dctx->legacy_hw_ctx.rcs_state) {
@@ -424,28 +422,21 @@ void i915_gem_context_fini(struct drm_device *dev)
 		 * to offset the do_switch part, so that i915_gem_context_unreference()
 		 * can then free the base object correctly. */
 		WARN_ON(!dev_priv->ring[RCS].last_context);
-		if (dev_priv->ring[RCS].last_context == dctx) {
-			/* Fake switch to NULL context */
-			WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
-			i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
-			i915_gem_context_unreference(dctx);
-			dev_priv->ring[RCS].last_context = NULL;
-		}
 
 		i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
 	}
 
-	for (i = 0; i < I915_NUM_RINGS; i++) {
+	for (i = I915_NUM_RINGS; --i >= 0;) {
 		struct intel_engine_cs *ring = &dev_priv->ring[i];
 
-		if (ring->last_context)
-			i915_gem_context_unreference(ring->last_context);
-
-		ring->default_context = NULL;
-		ring->last_context = NULL;
+		if (ring->last_context) {
+			i915_gem_context_unpin(ring->last_context, ring);
+			ring->last_context = NULL;
+		}
 	}
 
 	i915_gem_context_unreference(dctx);
+	dev_priv->kernel_context = NULL;
 }
 
 int i915_gem_context_enable(struct drm_i915_gem_request *req)
@@ -864,6 +855,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (!contexts_enabled(dev))
 		return -ENODEV;
 
+	if (args->pad != 0)
+		return -EINVAL;
+
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		return ret;
@@ -887,6 +881,9 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	struct intel_context *ctx;
 	int ret;
 
+	if (args->pad != 0)
+		return -EINVAL;
+
 	if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
 		return -ENOENT;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index e9c2bfd85b52..0506016e18e0 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -193,10 +193,26 @@ static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_n
 
 static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
 {
-	return -EINVAL;
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	int ret;
+
+	if (obj->base.size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	if (!obj->base.filp)
+		return -ENODEV;
+
+	ret = obj->base.filp->f_op->mmap(obj->base.filp, vma);
+	if (ret)
+		return ret;
+
+	fput(vma->vm_file);
+	vma->vm_file = get_file(obj->base.filp);
+
+	return 0;
 }
 
-static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction)
+static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
@@ -212,6 +228,22 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size
 	return ret;
 }
 
+static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	struct drm_device *dev = obj->base.dev;
+	int ret;
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, false);
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+}
+
 static const struct dma_buf_ops i915_dmabuf_ops =  {
 	.map_dma_buf = i915_gem_map_dma_buf,
 	.unmap_dma_buf = i915_gem_unmap_dma_buf,
@@ -224,6 +256,7 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 	.vmap = i915_gem_dmabuf_vmap,
 	.vunmap = i915_gem_dmabuf_vunmap,
 	.begin_cpu_access = i915_gem_begin_cpu_access,
+	.end_cpu_access = i915_gem_end_cpu_access,
 };
 
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 07c6e4d320c9..ea1f8d1bd228 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -116,7 +116,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
 
 search_again:
 	/* First see if there is a large enough contiguous idle region... */
-	list_for_each_entry(vma, &vm->inactive_list, mm_list) {
+	list_for_each_entry(vma, &vm->inactive_list, vm_link) {
 		if (mark_free(vma, &unwind_list))
 			goto found;
 	}
@@ -125,7 +125,7 @@ search_again:
 		goto none;
 
 	/* Now merge in the soon-to-be-expired objects... */
-	list_for_each_entry(vma, &vm->active_list, mm_list) {
+	list_for_each_entry(vma, &vm->active_list, vm_link) {
 		if (mark_free(vma, &unwind_list))
 			goto found;
 	}
@@ -270,7 +270,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 		WARN_ON(!list_empty(&vm->active_list));
 	}
 
-	list_for_each_entry_safe(vma, next, &vm->inactive_list, mm_list)
+	list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link)
 		if (vma->pin_count == 0)
 			WARN_ON(i915_vma_unbind(vma));
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index dccb517361b3..1328bc5021b4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -193,13 +193,10 @@ static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
 		return eb->lut[handle];
 	} else {
 		struct hlist_head *head;
-		struct hlist_node *node;
+		struct i915_vma *vma;
 
 		head = &eb->buckets[handle & eb->and];
-		hlist_for_each(node, head) {
-			struct i915_vma *vma;
-
-			vma = hlist_entry(node, struct i915_vma, exec_node);
+		hlist_for_each_entry(vma, head, exec_node) {
 			if (vma->exec_handle == handle)
 				return vma;
 		}
@@ -671,7 +668,7 @@ need_reloc_mappable(struct i915_vma *vma)
 	if (entry->relocation_count == 0)
 		return false;
 
-	if (!i915_is_ggtt(vma->vm))
+	if (!vma->is_ggtt)
 		return false;
 
 	/* See also use_cpu_reloc() */
@@ -690,8 +687,7 @@ eb_vma_misplaced(struct i915_vma *vma)
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
-	       !i915_is_ggtt(vma->vm));
+	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt);
 
 	if (entry->alignment &&
 	    vma->node.start & (entry->alignment - 1))
@@ -1309,6 +1305,9 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
 	exec_start = params->batch_obj_vm_offset +
 		     params->args_batch_start_offset;
 
+	if (exec_len == 0)
+		exec_len = params->batch_obj->base.size;
+
 	ret = ring->dispatch_execbuffer(params->request,
 					exec_start, exec_len,
 					params->dispatch_flags);
@@ -1325,33 +1324,23 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
 
 /**
  * Find one BSD ring to dispatch the corresponding BSD command.
- * The Ring ID is returned.
+ * The ring index is returned.
  */
-static int gen8_dispatch_bsd_ring(struct drm_device *dev,
-				  struct drm_file *file)
+static unsigned int
+gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	/* Check whether the file_priv is using one ring */
-	if (file_priv->bsd_ring)
-		return file_priv->bsd_ring->id;
-	else {
-		/* If no, use the ping-pong mechanism to select one ring */
-		int ring_id;
-
-		mutex_lock(&dev->struct_mutex);
-		if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
-			ring_id = VCS;
-			dev_priv->mm.bsd_ring_dispatch_index = 1;
-		} else {
-			ring_id = VCS2;
-			dev_priv->mm.bsd_ring_dispatch_index = 0;
-		}
-		file_priv->bsd_ring = &dev_priv->ring[ring_id];
-		mutex_unlock(&dev->struct_mutex);
-		return ring_id;
+	/* Check whether the file_priv has already selected one ring. */
+	if ((int)file_priv->bsd_ring < 0) {
+		/* If not, use the ping-pong mechanism to select one. */
+		mutex_lock(&dev_priv->dev->struct_mutex);
+		file_priv->bsd_ring = dev_priv->mm.bsd_ring_dispatch_index;
+		dev_priv->mm.bsd_ring_dispatch_index ^= 1;
+		mutex_unlock(&dev_priv->dev->struct_mutex);
 	}
+
+	return file_priv->bsd_ring;
 }
 
 static struct drm_i915_gem_object *
@@ -1374,6 +1363,64 @@ eb_get_batch(struct eb_vmas *eb)
 	return vma->obj;
 }
 
+#define I915_USER_RINGS (4)
+
+static const enum intel_ring_id user_ring_map[I915_USER_RINGS + 1] = {
+	[I915_EXEC_DEFAULT]	= RCS,
+	[I915_EXEC_RENDER]	= RCS,
+	[I915_EXEC_BLT]		= BCS,
+	[I915_EXEC_BSD]		= VCS,
+	[I915_EXEC_VEBOX]	= VECS
+};
+
+static int
+eb_select_ring(struct drm_i915_private *dev_priv,
+	       struct drm_file *file,
+	       struct drm_i915_gem_execbuffer2 *args,
+	       struct intel_engine_cs **ring)
+{
+	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
+
+	if (user_ring_id > I915_USER_RINGS) {
+		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
+		return -EINVAL;
+	}
+
+	if ((user_ring_id != I915_EXEC_BSD) &&
+	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
+		DRM_DEBUG("execbuf with non bsd ring but with invalid "
+			  "bsd dispatch flags: %d\n", (int)(args->flags));
+		return -EINVAL;
+	}
+
+	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
+		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
+
+		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
+			bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file);
+		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
+			   bsd_idx <= I915_EXEC_BSD_RING2) {
+			bsd_idx >>= I915_EXEC_BSD_SHIFT;
+			bsd_idx--;
+		} else {
+			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
+				  bsd_idx);
+			return -EINVAL;
+		}
+
+		*ring = &dev_priv->ring[_VCS(bsd_idx)];
+	} else {
+		*ring = &dev_priv->ring[user_ring_map[user_ring_id]];
+	}
+
+	if (!intel_ring_initialized(*ring)) {
+		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		       struct drm_file *file,
@@ -1381,6 +1428,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		       struct drm_i915_gem_exec_object2 *exec)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_request *req = NULL;
 	struct eb_vmas *eb;
 	struct drm_i915_gem_object *batch_obj;
 	struct drm_i915_gem_exec_object2 shadow_exec_entry;
@@ -1411,51 +1459,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (args->flags & I915_EXEC_IS_PINNED)
 		dispatch_flags |= I915_DISPATCH_PINNED;
 
-	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
-		DRM_DEBUG("execbuf with unknown ring: %d\n",
-			  (int)(args->flags & I915_EXEC_RING_MASK));
-		return -EINVAL;
-	}
-
-	if (((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_BSD) &&
-	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
-		DRM_DEBUG("execbuf with non bsd ring but with invalid "
-			"bsd dispatch flags: %d\n", (int)(args->flags));
-		return -EINVAL;
-	} 
-
-	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
-		ring = &dev_priv->ring[RCS];
-	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
-		if (HAS_BSD2(dev)) {
-			int ring_id;
-
-			switch (args->flags & I915_EXEC_BSD_MASK) {
-			case I915_EXEC_BSD_DEFAULT:
-				ring_id = gen8_dispatch_bsd_ring(dev, file);
-				ring = &dev_priv->ring[ring_id];
-				break;
-			case I915_EXEC_BSD_RING1:
-				ring = &dev_priv->ring[VCS];
-				break;
-			case I915_EXEC_BSD_RING2:
-				ring = &dev_priv->ring[VCS2];
-				break;
-			default:
-				DRM_DEBUG("execbuf with unknown bsd ring: %d\n",
-					  (int)(args->flags & I915_EXEC_BSD_MASK));
-				return -EINVAL;
-			}
-		} else
-			ring = &dev_priv->ring[VCS];
-	} else
-		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
-
-	if (!intel_ring_initialized(ring)) {
-		DRM_DEBUG("execbuf with invalid ring: %d\n",
-			  (int)(args->flags & I915_EXEC_RING_MASK));
-		return -EINVAL;
-	}
+	ret = eb_select_ring(dev_priv, file, args, &ring);
+	if (ret)
+		return ret;
 
 	if (args->buffer_count < 1) {
 		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
@@ -1602,11 +1608,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
 
 	/* Allocate a request for this batch buffer nice and early. */
-	ret = i915_gem_request_alloc(ring, ctx, &params->request);
-	if (ret)
+	req = i915_gem_request_alloc(ring, ctx);
+	if (IS_ERR(req)) {
+		ret = PTR_ERR(req);
 		goto err_batch_unpin;
+	}
 
-	ret = i915_gem_request_add_to_client(params->request, file);
+	ret = i915_gem_request_add_to_client(req, file);
 	if (ret)
 		goto err_batch_unpin;
 
@@ -1622,6 +1630,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	params->dispatch_flags          = dispatch_flags;
 	params->batch_obj               = batch_obj;
 	params->ctx                     = ctx;
+	params->request                 = req;
 
 	ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
 
@@ -1645,8 +1654,8 @@ err:
 	 * must be freed again. If it was submitted then it is being tracked
 	 * on the active request list and no clean up is required here.
 	 */
-	if (ret && params->request)
-		i915_gem_request_cancel(params->request);
+	if (ret && !IS_ERR_OR_NULL(req))
+		i915_gem_request_cancel(req);
 
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 56f4f2e58d53..49e4f26b79d8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -96,9 +96,11 @@
 static int
 i915_get_ggtt_vma_pages(struct i915_vma *vma);
 
-const struct i915_ggtt_view i915_ggtt_view_normal;
+const struct i915_ggtt_view i915_ggtt_view_normal = {
+	.type = I915_GGTT_VIEW_NORMAL,
+};
 const struct i915_ggtt_view i915_ggtt_view_rotated = {
-        .type = I915_GGTT_VIEW_ROTATED
+	.type = I915_GGTT_VIEW_ROTATED,
 };
 
 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
@@ -2130,6 +2132,25 @@ static void i915_address_space_init(struct i915_address_space *vm,
 	list_add_tail(&vm->global_link, &dev_priv->vm_list);
 }
 
+static void gtt_write_workarounds(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* This function is for gtt related workarounds. This function is
+	 * called on driver load and after a GPU reset, so you can place
+	 * workarounds here even if they get overwritten by GPU reset.
+	 */
+	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
+	if (IS_BROADWELL(dev))
+		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
+	else if (IS_CHERRYVIEW(dev))
+		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
+	else if (IS_SKYLAKE(dev))
+		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
+	else if (IS_BROXTON(dev))
+		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
+}
+
 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2146,6 +2167,8 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 
 int i915_ppgtt_init_hw(struct drm_device *dev)
 {
+	gtt_write_workarounds(dev);
+
 	/* In the case of execlists, PPGTT is enabled by the context descriptor
 	 * and the PDPs are contained within the context itself.  We don't
 	 * need to do anything here. */
@@ -2735,7 +2758,7 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
 		}
 		vma->bound |= GLOBAL_BIND;
 		__i915_vma_set_map_and_fenceable(vma);
-		list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list);
+		list_add_tail(&vma->vm_link, &ggtt_vm->inactive_list);
 	}
 
 	/* Clear any non-preallocated blocks */
@@ -2807,6 +2830,8 @@ void i915_global_gtt_cleanup(struct drm_device *dev)
 		ppgtt->base.cleanup(&ppgtt->base);
 	}
 
+	i915_gem_cleanup_stolen(dev);
+
 	if (drm_mm_initialized(&vm->mm)) {
 		if (intel_vgpu_active(dev))
 			intel_vgt_deballoon();
@@ -3173,12 +3198,21 @@ int i915_gem_gtt_init(struct drm_device *dev)
 	}
 
 	gtt->base.dev = dev;
+	gtt->base.is_ggtt = true;
 
 	ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
 			     &gtt->mappable_base, &gtt->mappable_end);
 	if (ret)
 		return ret;
 
+	/*
+	 * Initialise stolen early so that we may reserve preallocated
+	 * objects for the BIOS to KMS transition.
+	 */
+	ret = i915_gem_init_stolen(dev);
+	if (ret)
+		goto out_gtt_cleanup;
+
 	/* GMADR is the PCI mmio aperture into the global GTT. */
 	DRM_INFO("Memory usable by graphics device = %lluM\n",
 		 gtt->base.total >> 20);
@@ -3198,6 +3232,11 @@ int i915_gem_gtt_init(struct drm_device *dev)
 	DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
 
 	return 0;
+
+out_gtt_cleanup:
+	gtt->base.cleanup(&dev_priv->gtt.base);
+
+	return ret;
 }
 
 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
@@ -3220,7 +3259,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 	vm = &dev_priv->gtt.base;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		flush = false;
-		list_for_each_entry(vma, &obj->vma_list, vma_link) {
+		list_for_each_entry(vma, &obj->vma_list, obj_link) {
 			if (vma->vm != vm)
 				continue;
 
@@ -3276,19 +3315,20 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	INIT_LIST_HEAD(&vma->vma_link);
-	INIT_LIST_HEAD(&vma->mm_list);
+	INIT_LIST_HEAD(&vma->vm_link);
+	INIT_LIST_HEAD(&vma->obj_link);
 	INIT_LIST_HEAD(&vma->exec_list);
 	vma->vm = vm;
 	vma->obj = obj;
+	vma->is_ggtt = i915_is_ggtt(vm);
 
 	if (i915_is_ggtt(vm))
 		vma->ggtt_view = *ggtt_view;
-
-	list_add_tail(&vma->vma_link, &obj->vma_list);
-	if (!i915_is_ggtt(vm))
+	else
 		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
 
+	list_add_tail(&vma->obj_link, &obj->vma_list);
+
 	return vma;
 }
 
@@ -3329,8 +3369,9 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
 }
 
 static struct scatterlist *
-rotate_pages(dma_addr_t *in, unsigned int offset,
+rotate_pages(const dma_addr_t *in, unsigned int offset,
 	     unsigned int width, unsigned int height,
+	     unsigned int stride,
 	     struct sg_table *st, struct scatterlist *sg)
 {
 	unsigned int column, row;
@@ -3342,7 +3383,7 @@ rotate_pages(dma_addr_t *in, unsigned int offset,
 	}
 
 	for (column = 0; column < width; column++) {
-		src_idx = width * (height - 1) + column;
+		src_idx = stride * (height - 1) + column;
 		for (row = 0; row < height; row++) {
 			st->nents++;
 			/* We don't need the pages, but need to initialize
@@ -3353,7 +3394,7 @@ rotate_pages(dma_addr_t *in, unsigned int offset,
 			sg_dma_address(sg) = in[offset + src_idx];
 			sg_dma_len(sg) = PAGE_SIZE;
 			sg = sg_next(sg);
-			src_idx -= width;
+			src_idx -= stride;
 		}
 	}
 
@@ -3361,10 +3402,9 @@ rotate_pages(dma_addr_t *in, unsigned int offset,
 }
 
 static struct sg_table *
-intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
+intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
 			  struct drm_i915_gem_object *obj)
 {
-	struct intel_rotation_info *rot_info = &ggtt_view->params.rotation_info;
 	unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
 	unsigned int size_pages_uv;
 	struct sg_page_iter sg_iter;
@@ -3406,6 +3446,7 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
 	/* Rotate the pages. */
 	sg = rotate_pages(page_addr_list, 0,
 		     rot_info->width_pages, rot_info->height_pages,
+		     rot_info->width_pages,
 		     st, NULL);
 
 	/* Append the UV plane if NV12. */
@@ -3421,6 +3462,7 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
 		rotate_pages(page_addr_list, uv_start_page,
 			     rot_info->width_pages_uv,
 			     rot_info->height_pages_uv,
+			     rot_info->width_pages_uv,
 			     st, sg);
 	}
 
@@ -3502,7 +3544,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
 		vma->ggtt_view.pages = vma->obj->pages;
 	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
 		vma->ggtt_view.pages =
-			intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj);
+			intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
 	else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
 		vma->ggtt_view.pages =
 			intel_partial_pages(&vma->ggtt_view, vma->obj);
@@ -3558,13 +3600,9 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
 		return 0;
 
 	if (vma->bound == 0 && vma->vm->allocate_va_range) {
-		trace_i915_va_alloc(vma->vm,
-				    vma->node.start,
-				    vma->node.size,
-				    VM_TO_TRACE_NAME(vma->vm));
-
 		/* XXX: i915_vma_pin() will fix this +- hack */
 		vma->pin_count++;
+		trace_i915_va_alloc(vma);
 		ret = vma->vm->allocate_va_range(vma->vm,
 						 vma->node.start,
 						 vma->node.size);
@@ -3596,7 +3634,7 @@ i915_ggtt_view_size(struct drm_i915_gem_object *obj,
 	if (view->type == I915_GGTT_VIEW_NORMAL) {
 		return obj->base.size;
 	} else if (view->type == I915_GGTT_VIEW_ROTATED) {
-		return view->params.rotation_info.size;
+		return view->params.rotated.size;
 	} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
 		return view->params.partial.size << PAGE_SHIFT;
 	} else {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index b448ad832dcf..8774f1ba46e7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -44,7 +44,6 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
 
 #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
 
-
 /* gen6-hsw has bit 11-4 for physical addr bit 39-32 */
 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
@@ -156,7 +155,7 @@ struct i915_ggtt_view {
 			u64 offset;
 			unsigned int size;
 		} partial;
-		struct intel_rotation_info rotation_info;
+		struct intel_rotation_info rotated;
 	} params;
 
 	struct sg_table *pages;
@@ -184,6 +183,7 @@ struct i915_vma {
 #define GLOBAL_BIND	(1<<0)
 #define LOCAL_BIND	(1<<1)
 	unsigned int bound : 4;
+	bool is_ggtt : 1;
 
 	/**
 	 * Support different GGTT views into the same object.
@@ -195,9 +195,9 @@ struct i915_vma {
 	struct i915_ggtt_view ggtt_view;
 
 	/** This object's place on the active/inactive lists */
-	struct list_head mm_list;
+	struct list_head vm_link;
 
-	struct list_head vma_link; /* Link in the object's VMA list */
+	struct list_head obj_link; /* Link in the object's VMA list */
 
 	/** This vma's place in the batchbuffer or on the eviction list */
 	struct list_head exec_list;
@@ -276,6 +276,8 @@ struct i915_address_space {
 	u64 start;		/* Start offset always 0 for dri2 */
 	u64 total;		/* size addr space maps (ex. 2GB for ggtt) */
 
+	bool is_ggtt;
+
 	struct i915_page_scratch *scratch_page;
 	struct i915_page_table *scratch_pt;
 	struct i915_page_directory *scratch_pd;
@@ -331,6 +333,8 @@ struct i915_address_space {
 			u32 flags);
 };
 
+#define i915_is_ggtt(V) ((V)->is_ggtt)
+
 /* The Graphics Translation Table is the way in which GEN hardware translates a
  * Graphics Virtual Address into a Physical Address. In addition to the normal
  * collateral associated with any va->pa translations GEN hardware also has a
@@ -343,6 +347,8 @@ struct i915_gtt {
 
 	size_t stolen_size;		/* Total size of stolen memory */
 	size_t stolen_usable_size;	/* Total size minus BIOS reserved */
+	size_t stolen_reserved_base;
+	size_t stolen_reserved_size;
 	u64 mappable_end;		/* End offset that we can CPU map */
 	struct io_mapping *mappable;	/* Mapping to our CPU mappable region */
 	phys_addr_t mappable_base;	/* PA of our GMADR */
@@ -417,7 +423,7 @@ static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
 static inline uint32_t i915_pte_count(uint64_t addr, size_t length,
 				      uint32_t pde_shift)
 {
-	const uint64_t mask = ~((1 << pde_shift) - 1);
+	const uint64_t mask = ~((1ULL << pde_shift) - 1);
 	uint64_t end;
 
 	WARN_ON(length == 0);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index f7df54a8ee2b..d3c473ffb90a 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -47,6 +47,46 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
 #endif
 }
 
+static int num_vma_bound(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+	int count = 0;
+
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		if (drm_mm_node_allocated(&vma->node))
+			count++;
+		if (vma->pin_count)
+			count++;
+	}
+
+	return count;
+}
+
+static bool swap_available(void)
+{
+	return get_nr_swap_pages() > 0;
+}
+
+static bool can_release_pages(struct drm_i915_gem_object *obj)
+{
+	/* Only report true if by unbinding the object and putting its pages
+	 * we can actually make forward progress towards freeing physical
+	 * pages.
+	 *
+	 * If the pages are pinned for any other reason than being bound
+	 * to the GPU, simply unbinding from the GPU is not going to succeed
+	 * in releasing our pin count on the pages themselves.
+	 */
+	if (obj->pages_pin_count != num_vma_bound(obj))
+		return false;
+
+	/* We can only return physical pages to the system if we can either
+	 * discard the contents (because the user has marked them as being
+	 * purgeable) or if we can move their contents out to swap.
+	 */
+	return swap_available() || obj->madv == I915_MADV_DONTNEED;
+}
+
 /**
  * i915_gem_shrink - Shrink buffer object caches
  * @dev_priv: i915 device
@@ -129,11 +169,14 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 			if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active)
 				continue;
 
+			if (!can_release_pages(obj))
+				continue;
+
 			drm_gem_object_reference(&obj->base);
 
 			/* For the unbound phase, this should be a no-op! */
 			list_for_each_entry_safe(vma, v,
-						 &obj->vma_list, vma_link)
+						 &obj->vma_list, obj_link)
 				if (i915_vma_unbind(vma))
 					break;
 
@@ -188,21 +231,6 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 	return true;
 }
 
-static int num_vma_bound(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
-	int count = 0;
-
-	list_for_each_entry(vma, &obj->vma_list, vma_link) {
-		if (drm_mm_node_allocated(&vma->node))
-			count++;
-		if (vma->pin_count)
-			count++;
-	}
-
-	return count;
-}
-
 static unsigned long
 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
@@ -222,7 +250,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if (!obj->active && obj->pages_pin_count == num_vma_bound(obj))
+		if (!obj->active && can_release_pages(obj))
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
@@ -339,8 +367,20 @@ void i915_gem_shrinker_init(struct drm_i915_private *dev_priv)
 	dev_priv->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
 	dev_priv->mm.shrinker.count_objects = i915_gem_shrinker_count;
 	dev_priv->mm.shrinker.seeks = DEFAULT_SEEKS;
-	register_shrinker(&dev_priv->mm.shrinker);
+	WARN_ON(register_shrinker(&dev_priv->mm.shrinker));
 
 	dev_priv->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
-	register_oom_notifier(&dev_priv->mm.oom_notifier);
+	WARN_ON(register_oom_notifier(&dev_priv->mm.oom_notifier));
+}
+
+/**
+ * i915_gem_shrinker_cleanup - Clean up i915 shrinker
+ * @dev_priv: i915 device
+ *
+ * This function unregisters the i915 shrinker and OOM handler.
+ */
+void i915_gem_shrinker_cleanup(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(unregister_oom_notifier(&dev_priv->mm.oom_notifier));
+	unregister_shrinker(&dev_priv->mm.shrinker);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 3476877fc0d6..2e6e9fb6f80d 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -458,6 +458,9 @@ int i915_gem_init_stolen(struct drm_device *dev)
 		return 0;
 	}
 
+	dev_priv->gtt.stolen_reserved_base = reserved_base;
+	dev_priv->gtt.stolen_reserved_size = reserved_size;
+
 	/* It is possible for the reserved area to end before the end of stolen
 	 * memory, so just consider the start. */
 	reserved_total = stolen_top - reserved_base;
@@ -569,6 +572,9 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
 	if (obj->pages == NULL)
 		goto cleanup;
 
+	obj->get_page.sg = obj->pages->sgl;
+	obj->get_page.last = 0;
+
 	i915_gem_object_pin_pages(obj);
 	obj->stolen = stolen;
 
@@ -632,6 +638,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 	if (!drm_mm_initialized(&dev_priv->mm.stolen))
 		return NULL;
 
+	lockdep_assert_held(&dev->struct_mutex);
+
 	DRM_DEBUG_KMS("creating preallocated stolen object: stolen_offset=%x, gtt_offset=%x, size=%x\n",
 			stolen_offset, gtt_offset, size);
 
@@ -689,7 +697,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 
 		vma->bound |= GLOBAL_BIND;
 		__i915_vma_set_map_and_fenceable(vma);
-		list_add_tail(&vma->mm_list, &ggtt->inactive_list);
+		list_add_tail(&vma->vm_link, &ggtt->inactive_list);
 	}
 
 	list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 59e45b3a6937..6be40f3ba2c7 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -49,21 +49,18 @@ struct i915_mmu_notifier {
 	struct hlist_node node;
 	struct mmu_notifier mn;
 	struct rb_root objects;
-	struct list_head linear;
-	bool has_linear;
 };
 
 struct i915_mmu_object {
 	struct i915_mmu_notifier *mn;
+	struct drm_i915_gem_object *obj;
 	struct interval_tree_node it;
 	struct list_head link;
-	struct drm_i915_gem_object *obj;
 	struct work_struct work;
-	bool active;
-	bool is_linear;
+	bool attached;
 };
 
-static void __cancel_userptr__worker(struct work_struct *work)
+static void cancel_userptr(struct work_struct *work)
 {
 	struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
 	struct drm_i915_gem_object *obj = mo->obj;
@@ -81,7 +78,7 @@ static void __cancel_userptr__worker(struct work_struct *work)
 		was_interruptible = dev_priv->mm.interruptible;
 		dev_priv->mm.interruptible = false;
 
-		list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
+		list_for_each_entry_safe(vma, tmp, &obj->vma_list, obj_link) {
 			int ret = i915_vma_unbind(vma);
 			WARN_ON(ret && ret != -EIO);
 		}
@@ -94,24 +91,22 @@ static void __cancel_userptr__worker(struct work_struct *work)
 	mutex_unlock(&dev->struct_mutex);
 }
 
-static unsigned long cancel_userptr(struct i915_mmu_object *mo)
+static void add_object(struct i915_mmu_object *mo)
 {
-	unsigned long end = mo->obj->userptr.ptr + mo->obj->base.size;
-
-	/* The mmu_object is released late when destroying the
-	 * GEM object so it is entirely possible to gain a
-	 * reference on an object in the process of being freed
-	 * since our serialisation is via the spinlock and not
-	 * the struct_mutex - and consequently use it after it
-	 * is freed and then double free it.
-	 */
-	if (mo->active && kref_get_unless_zero(&mo->obj->base.refcount)) {
-		schedule_work(&mo->work);
-		/* only schedule one work packet to avoid the refleak */
-		mo->active = false;
-	}
+	if (mo->attached)
+		return;
+
+	interval_tree_insert(&mo->it, &mo->mn->objects);
+	mo->attached = true;
+}
 
-	return end;
+static void del_object(struct i915_mmu_object *mo)
+{
+	if (!mo->attached)
+		return;
+
+	interval_tree_remove(&mo->it, &mo->mn->objects);
+	mo->attached = false;
 }
 
 static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
@@ -122,28 +117,36 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	struct i915_mmu_notifier *mn =
 		container_of(_mn, struct i915_mmu_notifier, mn);
 	struct i915_mmu_object *mo;
+	struct interval_tree_node *it;
+	LIST_HEAD(cancelled);
+
+	if (RB_EMPTY_ROOT(&mn->objects))
+		return;
 
 	/* interval ranges are inclusive, but invalidate range is exclusive */
 	end--;
 
 	spin_lock(&mn->lock);
-	if (mn->has_linear) {
-		list_for_each_entry(mo, &mn->linear, link) {
-			if (mo->it.last < start || mo->it.start > end)
-				continue;
-
-			cancel_userptr(mo);
-		}
-	} else {
-		struct interval_tree_node *it;
+	it = interval_tree_iter_first(&mn->objects, start, end);
+	while (it) {
+		/* The mmu_object is released late when destroying the
+		 * GEM object so it is entirely possible to gain a
+		 * reference on an object in the process of being freed
+		 * since our serialisation is via the spinlock and not
+		 * the struct_mutex - and consequently use it after it
+		 * is freed and then double free it. To prevent that
+		 * use-after-free we only acquire a reference on the
+		 * object if it is not in the process of being destroyed.
+		 */
+		mo = container_of(it, struct i915_mmu_object, it);
+		if (kref_get_unless_zero(&mo->obj->base.refcount))
+			schedule_work(&mo->work);
 
-		it = interval_tree_iter_first(&mn->objects, start, end);
-		while (it) {
-			mo = container_of(it, struct i915_mmu_object, it);
-			start = cancel_userptr(mo);
-			it = interval_tree_iter_next(it, start, end);
-		}
+		list_add(&mo->link, &cancelled);
+		it = interval_tree_iter_next(it, start, end);
 	}
+	list_for_each_entry(mo, &cancelled, link)
+		del_object(mo);
 	spin_unlock(&mn->lock);
 }
 
@@ -164,8 +167,6 @@ i915_mmu_notifier_create(struct mm_struct *mm)
 	spin_lock_init(&mn->lock);
 	mn->mn.ops = &i915_gem_userptr_notifier;
 	mn->objects = RB_ROOT;
-	INIT_LIST_HEAD(&mn->linear);
-	mn->has_linear = false;
 
 	 /* Protected by mmap_sem (write-lock) */
 	ret = __mmu_notifier_register(&mn->mn, mm);
@@ -177,85 +178,6 @@ i915_mmu_notifier_create(struct mm_struct *mm)
 	return mn;
 }
 
-static int
-i915_mmu_notifier_add(struct drm_device *dev,
-		      struct i915_mmu_notifier *mn,
-		      struct i915_mmu_object *mo)
-{
-	struct interval_tree_node *it;
-	int ret = 0;
-
-	/* By this point we have already done a lot of expensive setup that
-	 * we do not want to repeat just because the caller (e.g. X) has a
-	 * signal pending (and partly because of that expensive setup, X
-	 * using an interrupt timer is likely to get stuck in an EINTR loop).
-	 */
-	mutex_lock(&dev->struct_mutex);
-
-	/* Make sure we drop the final active reference (and thereby
-	 * remove the objects from the interval tree) before we do
-	 * the check for overlapping objects.
-	 */
-	i915_gem_retire_requests(dev);
-
-	spin_lock(&mn->lock);
-	it = interval_tree_iter_first(&mn->objects,
-				      mo->it.start, mo->it.last);
-	if (it) {
-		struct drm_i915_gem_object *obj;
-
-		/* We only need to check the first object in the range as it
-		 * either has cancelled gup work queued and we need to
-		 * return back to the user to give time for the gup-workers
-		 * to flush their object references upon which the object will
-		 * be removed from the interval-tree, or the the range is
-		 * still in use by another client and the overlap is invalid.
-		 *
-		 * If we do have an overlap, we cannot use the interval tree
-		 * for fast range invalidation.
-		 */
-
-		obj = container_of(it, struct i915_mmu_object, it)->obj;
-		if (!obj->userptr.workers)
-			mn->has_linear = mo->is_linear = true;
-		else
-			ret = -EAGAIN;
-	} else
-		interval_tree_insert(&mo->it, &mn->objects);
-
-	if (ret == 0)
-		list_add(&mo->link, &mn->linear);
-
-	spin_unlock(&mn->lock);
-	mutex_unlock(&dev->struct_mutex);
-
-	return ret;
-}
-
-static bool i915_mmu_notifier_has_linear(struct i915_mmu_notifier *mn)
-{
-	struct i915_mmu_object *mo;
-
-	list_for_each_entry(mo, &mn->linear, link)
-		if (mo->is_linear)
-			return true;
-
-	return false;
-}
-
-static void
-i915_mmu_notifier_del(struct i915_mmu_notifier *mn,
-		      struct i915_mmu_object *mo)
-{
-	spin_lock(&mn->lock);
-	list_del(&mo->link);
-	if (mo->is_linear)
-		mn->has_linear = i915_mmu_notifier_has_linear(mn);
-	else
-		interval_tree_remove(&mo->it, &mn->objects);
-	spin_unlock(&mn->lock);
-}
-
 static void
 i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 {
@@ -265,7 +187,9 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
 	if (mo == NULL)
 		return;
 
-	i915_mmu_notifier_del(mo->mn, mo);
+	spin_lock(&mo->mn->lock);
+	del_object(mo);
+	spin_unlock(&mo->mn->lock);
 	kfree(mo);
 
 	obj->userptr.mmu_object = NULL;
@@ -299,7 +223,6 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 {
 	struct i915_mmu_notifier *mn;
 	struct i915_mmu_object *mo;
-	int ret;
 
 	if (flags & I915_USERPTR_UNSYNCHRONIZED)
 		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
@@ -316,16 +239,10 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 		return -ENOMEM;
 
 	mo->mn = mn;
-	mo->it.start = obj->userptr.ptr;
-	mo->it.last = mo->it.start + obj->base.size - 1;
 	mo->obj = obj;
-	INIT_WORK(&mo->work, __cancel_userptr__worker);
-
-	ret = i915_mmu_notifier_add(obj->base.dev, mn, mo);
-	if (ret) {
-		kfree(mo);
-		return ret;
-	}
+	mo->it.start = obj->userptr.ptr;
+	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
+	INIT_WORK(&mo->work, cancel_userptr);
 
 	obj->userptr.mmu_object = mo;
 	return 0;
@@ -552,8 +469,10 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
 	/* In order to serialise get_pages with an outstanding
 	 * cancel_userptr, we must drop the struct_mutex and try again.
 	 */
-	if (!value || !work_pending(&obj->userptr.mmu_object->work))
-		obj->userptr.mmu_object->active = value;
+	if (!value)
+		del_object(obj->userptr.mmu_object);
+	else if (!work_pending(&obj->userptr.mmu_object->work))
+		add_object(obj->userptr.mmu_object);
 	else
 		ret = -EAGAIN;
 	spin_unlock(&obj->userptr.mmu_object->mn->lock);
@@ -584,11 +503,11 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 
 		down_read(&mm->mmap_sem);
 		while (pinned < npages) {
-			ret = get_user_pages(work->task, mm,
-					     obj->userptr.ptr + pinned * PAGE_SIZE,
-					     npages - pinned,
-					     !obj->userptr.read_only, 0,
-					     pvec + pinned, NULL);
+			ret = get_user_pages_remote(work->task, mm,
+					obj->userptr.ptr + pinned * PAGE_SIZE,
+					npages - pinned,
+					!obj->userptr.read_only, 0,
+					pvec + pinned, NULL);
 			if (ret < 0)
 				break;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 06ca4082735b..831895b8cb75 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -365,6 +365,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	err_printf(m, "Reset count: %u\n", error->reset_count);
 	err_printf(m, "Suspend count: %u\n", error->suspend_count);
 	err_printf(m, "PCI ID: 0x%04x\n", dev->pdev->device);
+	err_printf(m, "PCI Revision: 0x%02x\n", dev->pdev->revision);
+	err_printf(m, "PCI Subsystem: %04x:%04x\n",
+		   dev->pdev->subsystem_vendor,
+		   dev->pdev->subsystem_device);
 	err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
 
 	if (HAS_CSR(dev)) {
@@ -732,7 +736,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
 	struct i915_vma *vma;
 	int i = 0;
 
-	list_for_each_entry(vma, head, mm_list) {
+	list_for_each_entry(vma, head, vm_link) {
 		capture_bo(err++, vma);
 		if (++i == count)
 			break;
@@ -755,7 +759,7 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
 		if (err == last)
 			break;
 
-		list_for_each_entry(vma, &obj->vma_list, vma_link)
+		list_for_each_entry(vma, &obj->vma_list, obj_link)
 			if (vma->vm == vm && vma->pin_count > 0)
 				capture_bo(err++, vma);
 	}
@@ -1050,7 +1054,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 			if (request)
 				rbuf = request->ctx->engine[ring->id].ringbuf;
 			else
-				rbuf = ring->default_context->engine[ring->id].ringbuf;
+				rbuf = dev_priv->kernel_context->engine[ring->id].ringbuf;
 		} else
 			rbuf = ring->buffer;
 
@@ -1123,12 +1127,12 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 	int i;
 
 	i = 0;
-	list_for_each_entry(vma, &vm->active_list, mm_list)
+	list_for_each_entry(vma, &vm->active_list, vm_link)
 		i++;
 	error->active_bo_count[ndx] = i;
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		list_for_each_entry(vma, &obj->vma_list, vma_link)
+		list_for_each_entry(vma, &obj->vma_list, obj_link)
 			if (vma->vm == vm && vma->pin_count > 0)
 				i++;
 	}
diff --git a/drivers/gpu/drm/i915/i915_guc_reg.h b/drivers/gpu/drm/i915/i915_guc_reg.h
index 685c7991e24f..e4ba5822289b 100644
--- a/drivers/gpu/drm/i915/i915_guc_reg.h
+++ b/drivers/gpu/drm/i915/i915_guc_reg.h
@@ -40,6 +40,7 @@
 #define   GS_MIA_CORE_STATE		  (1 << GS_MIA_SHIFT)
 
 #define SOFT_SCRATCH(n)			_MMIO(0xc180 + (n) * 4)
+#define SOFT_SCRATCH_COUNT		16
 
 #define UOS_RSA_SCRATCH(i)		_MMIO(0xc200 + (i) * 4)
 #define   UOS_RSA_SCRATCH_MAX_COUNT	  64
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 05aa7e61cbe0..d7543efc8a5e 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -158,10 +158,8 @@ static int host2guc_sample_forcewake(struct intel_guc *guc,
 
 	data[0] = HOST2GUC_ACTION_SAMPLE_FORCEWAKE;
 	/* WaRsDisableCoarsePowerGating:skl,bxt */
-	if (!intel_enable_rc6(dev_priv->dev) ||
-	    IS_BXT_REVID(dev, 0, BXT_REVID_A1) ||
-	    (IS_SKL_GT3(dev) && IS_SKL_REVID(dev, 0, SKL_REVID_E0)) ||
-	    (IS_SKL_GT4(dev) && IS_SKL_REVID(dev, 0, SKL_REVID_E0)))
+	if (!intel_enable_rc6(dev) ||
+	    NEEDS_WaRsDisableCoarsePowerGating(dev))
 		data[1] = 0;
 	else
 		/* bit 0 and 1 are for Render and Media domain separately */
@@ -246,6 +244,9 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
 			db_exc.cookie = 1;
 	}
 
+	/* Finally, update the cached copy of the GuC's WQ head */
+	gc->wq_head = desc->head;
+
 	kunmap_atomic(base);
 	return ret;
 }
@@ -375,6 +376,8 @@ static void guc_init_proc_desc(struct intel_guc *guc,
 static void guc_init_ctx_desc(struct intel_guc *guc,
 			      struct i915_guc_client *client)
 {
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_engine_cs *ring;
 	struct intel_context *ctx = client->owner;
 	struct guc_context_desc desc;
 	struct sg_table *sg;
@@ -387,10 +390,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 	desc.priority = client->priority;
 	desc.db_id = client->doorbell_id;
 
-	for (i = 0; i < I915_NUM_RINGS; i++) {
-		struct guc_execlist_context *lrc = &desc.lrc[i];
-		struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
-		struct intel_engine_cs *ring;
+	for_each_ring(ring, dev_priv, i) {
+		struct guc_execlist_context *lrc = &desc.lrc[ring->guc_id];
 		struct drm_i915_gem_object *obj;
 		uint64_t ctx_desc;
 
@@ -405,7 +406,6 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 		if (!obj)
 			break;	/* XXX: continue? */
 
-		ring = ringbuf->ring;
 		ctx_desc = intel_lr_context_descriptor(ctx, ring);
 		lrc->context_desc = (u32)ctx_desc;
 
@@ -413,16 +413,16 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 		lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
 				LRC_STATE_PN * PAGE_SIZE;
 		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
-				(ring->id << GUC_ELC_ENGINE_OFFSET);
+				(ring->guc_id << GUC_ELC_ENGINE_OFFSET);
 
-		obj = ringbuf->obj;
+		obj = ctx->engine[i].ringbuf->obj;
 
 		lrc->ring_begin = i915_gem_obj_ggtt_offset(obj);
 		lrc->ring_end = lrc->ring_begin + obj->base.size - 1;
 		lrc->ring_next_free_location = lrc->ring_begin;
 		lrc->ring_current_tail_pointer_value = 0;
 
-		desc.engines_used |= (1 << ring->id);
+		desc.engines_used |= (1 << ring->guc_id);
 	}
 
 	WARN_ON(desc.engines_used == 0);
@@ -471,28 +471,30 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
 			     sizeof(desc) * client->ctx_index);
 }
 
-/* Get valid workqueue item and return it back to offset */
-static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
+int i915_guc_wq_check_space(struct i915_guc_client *gc)
 {
 	struct guc_process_desc *desc;
 	void *base;
 	u32 size = sizeof(struct guc_wq_item);
 	int ret = -ETIMEDOUT, timeout_counter = 200;
 
+	if (!gc)
+		return 0;
+
+	/* Quickly return if wq space is available since last time we cache the
+	 * head position. */
+	if (CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size) >= size)
+		return 0;
+
 	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
 	desc = base + gc->proc_desc_offset;
 
 	while (timeout_counter-- > 0) {
-		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
-			*offset = gc->wq_tail;
+		gc->wq_head = desc->head;
 
-			/* advance the tail for next workqueue item */
-			gc->wq_tail += size;
-			gc->wq_tail &= gc->wq_size - 1;
-
-			/* this will break the loop */
-			timeout_counter = 0;
+		if (CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size) >= size) {
 			ret = 0;
+			break;
 		}
 
 		if (timeout_counter)
@@ -507,15 +509,18 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
 static int guc_add_workqueue_item(struct i915_guc_client *gc,
 				  struct drm_i915_gem_request *rq)
 {
-	enum intel_ring_id ring_id = rq->ring->id;
 	struct guc_wq_item *wqi;
 	void *base;
-	u32 tail, wq_len, wq_off = 0;
-	int ret;
+	u32 tail, wq_len, wq_off, space;
+
+	space = CIRC_SPACE(gc->wq_tail, gc->wq_head, gc->wq_size);
+	if (WARN_ON(space < sizeof(struct guc_wq_item)))
+		return -ENOSPC; /* shouldn't happen */
 
-	ret = guc_get_workqueue_space(gc, &wq_off);
-	if (ret)
-		return ret;
+	/* postincrement WQ tail for next time */
+	wq_off = gc->wq_tail;
+	gc->wq_tail += sizeof(struct guc_wq_item);
+	gc->wq_tail &= gc->wq_size - 1;
 
 	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
 	 * should not have the case where structure wqi is across page, neither
@@ -537,7 +542,7 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc,
 	wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1;
 	wqi->header = WQ_TYPE_INORDER |
 			(wq_len << WQ_LEN_SHIFT) |
-			(ring_id << WQ_TARGET_SHIFT) |
+			(rq->ring->guc_id << WQ_TARGET_SHIFT) |
 			WQ_NO_WCFLUSH_WAIT;
 
 	/* The GuC wants only the low-order word of the context descriptor */
@@ -553,29 +558,6 @@ static int guc_add_workqueue_item(struct i915_guc_client *gc,
 	return 0;
 }
 
-#define CTX_RING_BUFFER_START		0x08
-
-/* Update the ringbuffer pointer in a saved context image */
-static void lr_context_update(struct drm_i915_gem_request *rq)
-{
-	enum intel_ring_id ring_id = rq->ring->id;
-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring_id].state;
-	struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
-	struct page *page;
-	uint32_t *reg_state;
-
-	BUG_ON(!ctx_obj);
-	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
-	WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
-
-	page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
-	reg_state = kmap_atomic(page);
-
-	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
-
-	kunmap_atomic(reg_state);
-}
-
 /**
  * i915_guc_submit() - Submit commands through GuC
  * @client:	the guc client where commands will go through
@@ -587,18 +569,14 @@ int i915_guc_submit(struct i915_guc_client *client,
 		    struct drm_i915_gem_request *rq)
 {
 	struct intel_guc *guc = client->guc;
-	enum intel_ring_id ring_id = rq->ring->id;
+	unsigned int engine_id = rq->ring->guc_id;
 	int q_ret, b_ret;
 
-	/* Need this because of the deferred pin ctx and ring */
-	/* Shall we move this right after ring is pinned? */
-	lr_context_update(rq);
-
 	q_ret = guc_add_workqueue_item(client, rq);
 	if (q_ret == 0)
 		b_ret = guc_ring_doorbell(client);
 
-	client->submissions[ring_id] += 1;
+	client->submissions[engine_id] += 1;
 	if (q_ret) {
 		client->q_fail += 1;
 		client->retcode = q_ret;
@@ -608,8 +586,8 @@ int i915_guc_submit(struct i915_guc_client *client,
 	} else {
 		client->retcode = 0;
 	}
-	guc->submissions[ring_id] += 1;
-	guc->last_seqno[ring_id] = rq->seqno;
+	guc->submissions[engine_id] += 1;
+	guc->last_seqno[engine_id] = rq->seqno;
 
 	return q_ret;
 }
@@ -832,6 +810,96 @@ static void guc_create_log(struct intel_guc *guc)
 	guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
 }
 
+static void init_guc_policies(struct guc_policies *policies)
+{
+	struct guc_policy *policy;
+	u32 p, i;
+
+	policies->dpc_promote_time = 500000;
+	policies->max_num_work_items = POLICY_MAX_NUM_WI;
+
+	for (p = 0; p < GUC_CTX_PRIORITY_NUM; p++) {
+		for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) {
+			policy = &policies->policy[p][i];
+
+			policy->execution_quantum = 1000000;
+			policy->preemption_time = 500000;
+			policy->fault_time = 250000;
+			policy->policy_flags = 0;
+		}
+	}
+
+	policies->is_valid = 1;
+}
+
+static void guc_create_ads(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct drm_i915_gem_object *obj;
+	struct guc_ads *ads;
+	struct guc_policies *policies;
+	struct guc_mmio_reg_state *reg_state;
+	struct intel_engine_cs *ring;
+	struct page *page;
+	u32 size, i;
+
+	/* The ads obj includes the struct itself and buffers passed to GuC */
+	size = sizeof(struct guc_ads) + sizeof(struct guc_policies) +
+			sizeof(struct guc_mmio_reg_state) +
+			GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE;
+
+	obj = guc->ads_obj;
+	if (!obj) {
+		obj = gem_allocate_guc_obj(dev_priv->dev, PAGE_ALIGN(size));
+		if (!obj)
+			return;
+
+		guc->ads_obj = obj;
+	}
+
+	page = i915_gem_object_get_page(obj, 0);
+	ads = kmap(page);
+
+	/*
+	 * The GuC requires a "Golden Context" when it reinitialises
+	 * engines after a reset. Here we use the Render ring default
+	 * context, which must already exist and be pinned in the GGTT,
+	 * so its address won't change after we've told the GuC where
+	 * to find it.
+	 */
+	ring = &dev_priv->ring[RCS];
+	ads->golden_context_lrca = ring->status_page.gfx_addr;
+
+	for_each_ring(ring, dev_priv, i)
+		ads->eng_state_size[ring->guc_id] = intel_lr_context_size(ring);
+
+	/* GuC scheduling policies */
+	policies = (void *)ads + sizeof(struct guc_ads);
+	init_guc_policies(policies);
+
+	ads->scheduler_policies = i915_gem_obj_ggtt_offset(obj) +
+			sizeof(struct guc_ads);
+
+	/* MMIO reg state */
+	reg_state = (void *)policies + sizeof(struct guc_policies);
+
+	for_each_ring(ring, dev_priv, i) {
+		reg_state->mmio_white_list[ring->guc_id].mmio_start =
+			ring->mmio_base + GUC_MMIO_WHITE_LIST_START;
+
+		/* Nothing to be saved or restored for now. */
+		reg_state->mmio_white_list[ring->guc_id].count = 0;
+	}
+
+	ads->reg_state_addr = ads->scheduler_policies +
+			sizeof(struct guc_policies);
+
+	ads->reg_state_buffer = ads->reg_state_addr +
+			sizeof(struct guc_mmio_reg_state);
+
+	kunmap(page);
+}
+
 /*
  * Set up the memory resources to be shared with the GuC.  At this point,
  * we require just one object that can be mapped through the GGTT.
@@ -858,6 +926,8 @@ int i915_guc_submission_init(struct drm_device *dev)
 
 	guc_create_log(guc);
 
+	guc_create_ads(guc);
+
 	return 0;
 }
 
@@ -865,7 +935,7 @@ int i915_guc_submission_enable(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc *guc = &dev_priv->guc;
-	struct intel_context *ctx = dev_priv->ring[RCS].default_context;
+	struct intel_context *ctx = dev_priv->kernel_context;
 	struct i915_guc_client *client;
 
 	/* client for execbuf submission */
@@ -896,6 +966,9 @@ void i915_guc_submission_fini(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc *guc = &dev_priv->guc;
 
+	gem_release_guc_obj(dev_priv->guc.ads_obj);
+	guc->ads_obj = NULL;
+
 	gem_release_guc_obj(dev_priv->guc.log_obj);
 	guc->log_obj = NULL;
 
@@ -919,7 +992,7 @@ int intel_guc_suspend(struct drm_device *dev)
 	if (!i915.enable_guc_submission)
 		return 0;
 
-	ctx = dev_priv->ring[RCS].default_context;
+	ctx = dev_priv->kernel_context;
 
 	data[0] = HOST2GUC_ACTION_ENTER_S_STATE;
 	/* any value greater than GUC_POWER_D0 */
@@ -945,7 +1018,7 @@ int intel_guc_resume(struct drm_device *dev)
 	if (!i915.enable_guc_submission)
 		return 0;
 
-	ctx = dev_priv->ring[RCS].default_context;
+	ctx = dev_priv->kernel_context;
 
 	data[0] = HOST2GUC_ACTION_EXIT_S_STATE;
 	data[1] = GUC_POWER_D0;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index fa8afa7860ae..d1a46ef5ab3f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1651,6 +1651,12 @@ static void valleyview_pipestat_irq_handler(struct drm_device *dev, u32 iir)
 	int pipe;
 
 	spin_lock(&dev_priv->irq_lock);
+
+	if (!dev_priv->display_irqs_enabled) {
+		spin_unlock(&dev_priv->irq_lock);
+		return;
+	}
+
 	for_each_pipe(dev_priv, pipe) {
 		i915_reg_t reg;
 		u32 mask, iir_bit = 0;
@@ -2188,10 +2194,6 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
 	disable_rpm_wakeref_asserts(dev_priv);
 
-	/* We get interrupts on unclaimed registers, so check for this before we
-	 * do any I915_{READ,WRITE}. */
-	intel_uncore_check_errors(dev);
-
 	/* disable master interrupt before clearing iir  */
 	de_ier = I915_READ(DEIER);
 	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
@@ -2268,43 +2270,20 @@ static void bxt_hpd_irq_handler(struct drm_device *dev, u32 hotplug_trigger,
 	intel_hpd_irq_handler(dev, pin_mask, long_mask);
 }
 
-static irqreturn_t gen8_irq_handler(int irq, void *arg)
+static irqreturn_t
+gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl)
 {
-	struct drm_device *dev = arg;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 master_ctl;
+	struct drm_device *dev = dev_priv->dev;
 	irqreturn_t ret = IRQ_NONE;
-	uint32_t tmp = 0;
+	u32 iir;
 	enum pipe pipe;
-	u32 aux_mask = GEN8_AUX_CHANNEL_A;
-
-	if (!intel_irqs_enabled(dev_priv))
-		return IRQ_NONE;
-
-	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
-	disable_rpm_wakeref_asserts(dev_priv);
-
-	if (INTEL_INFO(dev_priv)->gen >= 9)
-		aux_mask |=  GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C |
-			GEN9_AUX_CHANNEL_D;
-
-	master_ctl = I915_READ_FW(GEN8_MASTER_IRQ);
-	master_ctl &= ~GEN8_MASTER_IRQ_CONTROL;
-	if (!master_ctl)
-		goto out;
-
-	I915_WRITE_FW(GEN8_MASTER_IRQ, 0);
-
-	/* Find, clear, then process each source of interrupt */
-
-	ret = gen8_gt_irq_handler(dev_priv, master_ctl);
 
 	if (master_ctl & GEN8_DE_MISC_IRQ) {
-		tmp = I915_READ(GEN8_DE_MISC_IIR);
-		if (tmp) {
-			I915_WRITE(GEN8_DE_MISC_IIR, tmp);
+		iir = I915_READ(GEN8_DE_MISC_IIR);
+		if (iir) {
+			I915_WRITE(GEN8_DE_MISC_IIR, iir);
 			ret = IRQ_HANDLED;
-			if (tmp & GEN8_DE_MISC_GSE)
+			if (iir & GEN8_DE_MISC_GSE)
 				intel_opregion_asle_intr(dev);
 			else
 				DRM_ERROR("Unexpected DE Misc interrupt\n");
@@ -2314,33 +2293,40 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 	}
 
 	if (master_ctl & GEN8_DE_PORT_IRQ) {
-		tmp = I915_READ(GEN8_DE_PORT_IIR);
-		if (tmp) {
+		iir = I915_READ(GEN8_DE_PORT_IIR);
+		if (iir) {
+			u32 tmp_mask;
 			bool found = false;
-			u32 hotplug_trigger = 0;
-
-			if (IS_BROXTON(dev_priv))
-				hotplug_trigger = tmp & BXT_DE_PORT_HOTPLUG_MASK;
-			else if (IS_BROADWELL(dev_priv))
-				hotplug_trigger = tmp & GEN8_PORT_DP_A_HOTPLUG;
 
-			I915_WRITE(GEN8_DE_PORT_IIR, tmp);
+			I915_WRITE(GEN8_DE_PORT_IIR, iir);
 			ret = IRQ_HANDLED;
 
-			if (tmp & aux_mask) {
+			tmp_mask = GEN8_AUX_CHANNEL_A;
+			if (INTEL_INFO(dev_priv)->gen >= 9)
+				tmp_mask |= GEN9_AUX_CHANNEL_B |
+					    GEN9_AUX_CHANNEL_C |
+					    GEN9_AUX_CHANNEL_D;
+
+			if (iir & tmp_mask) {
 				dp_aux_irq_handler(dev);
 				found = true;
 			}
 
-			if (hotplug_trigger) {
-				if (IS_BROXTON(dev))
-					bxt_hpd_irq_handler(dev, hotplug_trigger, hpd_bxt);
-				else
-					ilk_hpd_irq_handler(dev, hotplug_trigger, hpd_bdw);
-				found = true;
+			if (IS_BROXTON(dev_priv)) {
+				tmp_mask = iir & BXT_DE_PORT_HOTPLUG_MASK;
+				if (tmp_mask) {
+					bxt_hpd_irq_handler(dev, tmp_mask, hpd_bxt);
+					found = true;
+				}
+			} else if (IS_BROADWELL(dev_priv)) {
+				tmp_mask = iir & GEN8_PORT_DP_A_HOTPLUG;
+				if (tmp_mask) {
+					ilk_hpd_irq_handler(dev, tmp_mask, hpd_bdw);
+					found = true;
+				}
 			}
 
-			if (IS_BROXTON(dev) && (tmp & BXT_DE_PORT_GMBUS)) {
+			if (IS_BROXTON(dev) && (iir & BXT_DE_PORT_GMBUS)) {
 				gmbus_irq_handler(dev);
 				found = true;
 			}
@@ -2353,49 +2339,51 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 	}
 
 	for_each_pipe(dev_priv, pipe) {
-		uint32_t pipe_iir, flip_done = 0, fault_errors = 0;
+		u32 flip_done, fault_errors;
 
 		if (!(master_ctl & GEN8_DE_PIPE_IRQ(pipe)))
 			continue;
 
-		pipe_iir = I915_READ(GEN8_DE_PIPE_IIR(pipe));
-		if (pipe_iir) {
-			ret = IRQ_HANDLED;
-			I915_WRITE(GEN8_DE_PIPE_IIR(pipe), pipe_iir);
+		iir = I915_READ(GEN8_DE_PIPE_IIR(pipe));
+		if (!iir) {
+			DRM_ERROR("The master control interrupt lied (DE PIPE)!\n");
+			continue;
+		}
 
-			if (pipe_iir & GEN8_PIPE_VBLANK &&
-			    intel_pipe_handle_vblank(dev, pipe))
-				intel_check_page_flip(dev, pipe);
+		ret = IRQ_HANDLED;
+		I915_WRITE(GEN8_DE_PIPE_IIR(pipe), iir);
 
-			if (INTEL_INFO(dev_priv)->gen >= 9)
-				flip_done = pipe_iir & GEN9_PIPE_PLANE1_FLIP_DONE;
-			else
-				flip_done = pipe_iir & GEN8_PIPE_PRIMARY_FLIP_DONE;
+		if (iir & GEN8_PIPE_VBLANK &&
+		    intel_pipe_handle_vblank(dev, pipe))
+			intel_check_page_flip(dev, pipe);
 
-			if (flip_done) {
-				intel_prepare_page_flip(dev, pipe);
-				intel_finish_page_flip_plane(dev, pipe);
-			}
+		flip_done = iir;
+		if (INTEL_INFO(dev_priv)->gen >= 9)
+			flip_done &= GEN9_PIPE_PLANE1_FLIP_DONE;
+		else
+			flip_done &= GEN8_PIPE_PRIMARY_FLIP_DONE;
 
-			if (pipe_iir & GEN8_PIPE_CDCLK_CRC_DONE)
-				hsw_pipe_crc_irq_handler(dev, pipe);
+		if (flip_done) {
+			intel_prepare_page_flip(dev, pipe);
+			intel_finish_page_flip_plane(dev, pipe);
+		}
 
-			if (pipe_iir & GEN8_PIPE_FIFO_UNDERRUN)
-				intel_cpu_fifo_underrun_irq_handler(dev_priv,
-								    pipe);
+		if (iir & GEN8_PIPE_CDCLK_CRC_DONE)
+			hsw_pipe_crc_irq_handler(dev, pipe);
 
+		if (iir & GEN8_PIPE_FIFO_UNDERRUN)
+			intel_cpu_fifo_underrun_irq_handler(dev_priv, pipe);
 
-			if (INTEL_INFO(dev_priv)->gen >= 9)
-				fault_errors = pipe_iir & GEN9_DE_PIPE_IRQ_FAULT_ERRORS;
-			else
-				fault_errors = pipe_iir & GEN8_DE_PIPE_IRQ_FAULT_ERRORS;
+		fault_errors = iir;
+		if (INTEL_INFO(dev_priv)->gen >= 9)
+			fault_errors &= GEN9_DE_PIPE_IRQ_FAULT_ERRORS;
+		else
+			fault_errors &= GEN8_DE_PIPE_IRQ_FAULT_ERRORS;
 
-			if (fault_errors)
-				DRM_ERROR("Fault errors on pipe %c\n: 0x%08x",
-					  pipe_name(pipe),
-					  pipe_iir & GEN8_DE_PIPE_IRQ_FAULT_ERRORS);
-		} else
-			DRM_ERROR("The master control interrupt lied (DE PIPE)!\n");
+		if (fault_errors)
+			DRM_ERROR("Fault errors on pipe %c\n: 0x%08x",
+				  pipe_name(pipe),
+				  fault_errors);
 	}
 
 	if (HAS_PCH_SPLIT(dev) && !HAS_PCH_NOP(dev) &&
@@ -2405,15 +2393,15 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 		 * scheme also closed the SDE interrupt handling race we've seen
 		 * on older pch-split platforms. But this needs testing.
 		 */
-		u32 pch_iir = I915_READ(SDEIIR);
-		if (pch_iir) {
-			I915_WRITE(SDEIIR, pch_iir);
+		iir = I915_READ(SDEIIR);
+		if (iir) {
+			I915_WRITE(SDEIIR, iir);
 			ret = IRQ_HANDLED;
 
 			if (HAS_PCH_SPT(dev_priv))
-				spt_irq_handler(dev, pch_iir);
+				spt_irq_handler(dev, iir);
 			else
-				cpt_irq_handler(dev, pch_iir);
+				cpt_irq_handler(dev, iir);
 		} else {
 			/*
 			 * Like on previous PCH there seems to be something
@@ -2423,10 +2411,36 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 		}
 	}
 
+	return ret;
+}
+
+static irqreturn_t gen8_irq_handler(int irq, void *arg)
+{
+	struct drm_device *dev = arg;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 master_ctl;
+	irqreturn_t ret;
+
+	if (!intel_irqs_enabled(dev_priv))
+		return IRQ_NONE;
+
+	master_ctl = I915_READ_FW(GEN8_MASTER_IRQ);
+	master_ctl &= ~GEN8_MASTER_IRQ_CONTROL;
+	if (!master_ctl)
+		return IRQ_NONE;
+
+	I915_WRITE_FW(GEN8_MASTER_IRQ, 0);
+
+	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
+	disable_rpm_wakeref_asserts(dev_priv);
+
+	/* Find, clear, then process each source of interrupt */
+	ret = gen8_gt_irq_handler(dev_priv, master_ctl);
+	ret |= gen8_de_irq_handler(dev_priv, master_ctl);
+
 	I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
 	POSTING_READ_FW(GEN8_MASTER_IRQ);
 
-out:
 	enable_rpm_wakeref_asserts(dev_priv);
 
 	return ret;
@@ -2949,14 +2963,44 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
 		ring->hangcheck.deadlock = 0;
 }
 
-static enum intel_ring_hangcheck_action
-ring_stuck(struct intel_engine_cs *ring, u64 acthd)
+static bool subunits_stuck(struct intel_engine_cs *ring)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 tmp;
+	u32 instdone[I915_NUM_INSTDONE_REG];
+	bool stuck;
+	int i;
+
+	if (ring->id != RCS)
+		return true;
+
+	i915_get_extra_instdone(ring->dev, instdone);
 
+	/* There might be unstable subunit states even when
+	 * actual head is not moving. Filter out the unstable ones by
+	 * accumulating the undone -> done transitions and only
+	 * consider those as progress.
+	 */
+	stuck = true;
+	for (i = 0; i < I915_NUM_INSTDONE_REG; i++) {
+		const u32 tmp = instdone[i] | ring->hangcheck.instdone[i];
+
+		if (tmp != ring->hangcheck.instdone[i])
+			stuck = false;
+
+		ring->hangcheck.instdone[i] |= tmp;
+	}
+
+	return stuck;
+}
+
+static enum intel_ring_hangcheck_action
+head_stuck(struct intel_engine_cs *ring, u64 acthd)
+{
 	if (acthd != ring->hangcheck.acthd) {
+
+		/* Clear subunit states on head movement */
+		memset(ring->hangcheck.instdone, 0,
+		       sizeof(ring->hangcheck.instdone));
+
 		if (acthd > ring->hangcheck.max_acthd) {
 			ring->hangcheck.max_acthd = acthd;
 			return HANGCHECK_ACTIVE;
@@ -2965,6 +3009,24 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
 		return HANGCHECK_ACTIVE_LOOP;
 	}
 
+	if (!subunits_stuck(ring))
+		return HANGCHECK_ACTIVE;
+
+	return HANGCHECK_HUNG;
+}
+
+static enum intel_ring_hangcheck_action
+ring_stuck(struct intel_engine_cs *ring, u64 acthd)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum intel_ring_hangcheck_action ha;
+	u32 tmp;
+
+	ha = head_stuck(ring, acthd);
+	if (ha != HANGCHECK_HUNG)
+		return ha;
+
 	if (IS_GEN2(dev))
 		return HANGCHECK_HUNG;
 
@@ -3032,6 +3094,12 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	 */
 	DISABLE_RPM_WAKEREF_ASSERTS(dev_priv);
 
+	/* As enabling the GPU requires fairly extensive mmio access,
+	 * periodically arm the mmio checker to see if we are triggering
+	 * any invalid access.
+	 */
+	intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
+
 	for_each_ring(ring, dev_priv, i) {
 		u64 acthd;
 		u32 seqno;
@@ -3106,7 +3174,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 			if (ring->hangcheck.score > 0)
 				ring->hangcheck.score--;
 
+			/* Clear head and subunit states on seqno movement */
 			ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0;
+
+			memset(ring->hangcheck.instdone, 0,
+			       sizeof(ring->hangcheck.instdone));
 		}
 
 		ring->hangcheck.seqno = seqno;
@@ -3277,21 +3349,28 @@ void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv,
 				     unsigned int pipe_mask)
 {
 	uint32_t extra_ier = GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN;
+	enum pipe pipe;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	for_each_pipe_masked(dev_priv, pipe, pipe_mask)
+		GEN8_IRQ_INIT_NDX(DE_PIPE, pipe,
+				  dev_priv->de_irq_mask[pipe],
+				  ~dev_priv->de_irq_mask[pipe] | extra_ier);
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv,
+				     unsigned int pipe_mask)
+{
+	enum pipe pipe;
 
 	spin_lock_irq(&dev_priv->irq_lock);
-	if (pipe_mask & 1 << PIPE_A)
-		GEN8_IRQ_INIT_NDX(DE_PIPE, PIPE_A,
-				  dev_priv->de_irq_mask[PIPE_A],
-				  ~dev_priv->de_irq_mask[PIPE_A] | extra_ier);
-	if (pipe_mask & 1 << PIPE_B)
-		GEN8_IRQ_INIT_NDX(DE_PIPE, PIPE_B,
-				  dev_priv->de_irq_mask[PIPE_B],
-				  ~dev_priv->de_irq_mask[PIPE_B] | extra_ier);
-	if (pipe_mask & 1 << PIPE_C)
-		GEN8_IRQ_INIT_NDX(DE_PIPE, PIPE_C,
-				  dev_priv->de_irq_mask[PIPE_C],
-				  ~dev_priv->de_irq_mask[PIPE_C] | extra_ier);
+	for_each_pipe_masked(dev_priv, pipe, pipe_mask)
+		GEN8_IRQ_RESET_NDX(DE_PIPE, pipe);
 	spin_unlock_irq(&dev_priv->irq_lock);
+
+	/* make sure we're done processing display irqs */
+	synchronize_irq(dev_priv->dev->irq);
 }
 
 static void cherryview_irq_preinstall(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 835d6099c769..278c9c40c2e0 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -22,6 +22,7 @@
  * IN THE SOFTWARE.
  */
 
+#include "i915_params.h"
 #include "i915_drv.h"
 
 struct i915_params i915 __read_mostly = {
@@ -37,7 +38,7 @@ struct i915_params i915 __read_mostly = {
 	.enable_execlists = -1,
 	.enable_hangcheck = true,
 	.enable_ppgtt = -1,
-	.enable_psr = 0,
+	.enable_psr = -1,
 	.preliminary_hw_support = IS_ENABLED(CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT),
 	.disable_power_well = -1,
 	.enable_ips = 1,
@@ -48,7 +49,6 @@ struct i915_params i915 __read_mostly = {
 	.invert_brightness = 0,
 	.disable_display = 0,
 	.enable_cmd_parser = 1,
-	.disable_vtd_wa = 0,
 	.use_mmio_flip = 0,
 	.mmio_debug = 0,
 	.verbose_state_checks = 1,
@@ -91,7 +91,7 @@ MODULE_PARM_DESC(enable_fbc,
 	"Enable frame buffer compression for power savings "
 	"(default: -1 (use per-chip default))");
 
-module_param_named_unsafe(lvds_channel_mode, i915.lvds_channel_mode, int, 0600);
+module_param_named_unsafe(lvds_channel_mode, i915.lvds_channel_mode, int, 0400);
 MODULE_PARM_DESC(lvds_channel_mode,
 	 "Specify LVDS channel mode "
 	 "(0=probe BIOS [default], 1=single-channel, 2=dual-channel)");
@@ -101,7 +101,7 @@ MODULE_PARM_DESC(lvds_use_ssc,
 	"Use Spread Spectrum Clock with panels [LVDS/eDP] "
 	"(default: auto from VBT)");
 
-module_param_named_unsafe(vbt_sdvo_panel_type, i915.vbt_sdvo_panel_type, int, 0600);
+module_param_named_unsafe(vbt_sdvo_panel_type, i915.vbt_sdvo_panel_type, int, 0400);
 MODULE_PARM_DESC(vbt_sdvo_panel_type,
 	"Override/Ignore selection of SDVO panel mode in the VBT "
 	"(-2=ignore, -1=auto [default], index in VBT BIOS table)");
@@ -126,9 +126,11 @@ MODULE_PARM_DESC(enable_execlists,
 	"(-1=auto [default], 0=disabled, 1=enabled)");
 
 module_param_named_unsafe(enable_psr, i915.enable_psr, int, 0600);
-MODULE_PARM_DESC(enable_psr, "Enable PSR (default: false)");
+MODULE_PARM_DESC(enable_psr, "Enable PSR "
+		 "(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) "
+		 "Default: -1 (use per-chip default)");
 
-module_param_named_unsafe(preliminary_hw_support, i915.preliminary_hw_support, int, 0600);
+module_param_named_unsafe(preliminary_hw_support, i915.preliminary_hw_support, int, 0400);
 MODULE_PARM_DESC(preliminary_hw_support,
 	"Enable preliminary hardware support.");
 
@@ -162,12 +164,9 @@ MODULE_PARM_DESC(invert_brightness,
 	"to dri-devel@lists.freedesktop.org, if your machine needs it. "
 	"It will then be included in an upcoming module version.");
 
-module_param_named(disable_display, i915.disable_display, bool, 0600);
+module_param_named(disable_display, i915.disable_display, bool, 0400);
 MODULE_PARM_DESC(disable_display, "Disable display (default: false)");
 
-module_param_named_unsafe(disable_vtd_wa, i915.disable_vtd_wa, bool, 0600);
-MODULE_PARM_DESC(disable_vtd_wa, "Disable all VT-d workarounds (default: false)");
-
 module_param_named_unsafe(enable_cmd_parser, i915.enable_cmd_parser, int, 0600);
 MODULE_PARM_DESC(enable_cmd_parser,
 		 "Enable command parsing (1=enabled [default], 0=disabled)");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
new file mode 100644
index 000000000000..bd5026b15d3e
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _I915_PARAMS_H_
+#define _I915_PARAMS_H_
+
+#include <linux/cache.h> /* for __read_mostly */
+
+struct i915_params {
+	int modeset;
+	int panel_ignore_lid;
+	int semaphores;
+	int lvds_channel_mode;
+	int panel_use_ssc;
+	int vbt_sdvo_panel_type;
+	int enable_rc6;
+	int enable_dc;
+	int enable_fbc;
+	int enable_ppgtt;
+	int enable_execlists;
+	int enable_psr;
+	unsigned int preliminary_hw_support;
+	int disable_power_well;
+	int enable_ips;
+	int invert_brightness;
+	int enable_cmd_parser;
+	int guc_log_level;
+	int use_mmio_flip;
+	int mmio_debug;
+	int edp_vswing;
+	/* leave bools at the end to not create holes */
+	bool enable_hangcheck;
+	bool fastboot;
+	bool prefault_disable;
+	bool load_detect_test;
+	bool reset;
+	bool disable_display;
+	bool enable_guc_submission;
+	bool verbose_state_checks;
+	bool nuclear_pageflip;
+};
+
+extern struct i915_params i915 __read_mostly;
+
+#endif
+
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 4897728713f6..f76cbf3e5d1e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -610,16 +610,17 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   IOSF_BYTE_ENABLES_SHIFT		4
 #define   IOSF_BAR_SHIFT			1
 #define   IOSF_SB_BUSY				(1<<0)
-#define   IOSF_PORT_BUNIT			0x3
-#define   IOSF_PORT_PUNIT			0x4
+#define   IOSF_PORT_BUNIT			0x03
+#define   IOSF_PORT_PUNIT			0x04
 #define   IOSF_PORT_NC				0x11
 #define   IOSF_PORT_DPIO			0x12
-#define   IOSF_PORT_DPIO_2			0x1a
 #define   IOSF_PORT_GPIO_NC			0x13
 #define   IOSF_PORT_CCK				0x14
-#define   IOSF_PORT_CCU				0xA9
-#define   IOSF_PORT_GPS_CORE			0x48
-#define   IOSF_PORT_FLISDSI			0x1B
+#define   IOSF_PORT_DPIO_2			0x1a
+#define   IOSF_PORT_FLISDSI			0x1b
+#define   IOSF_PORT_GPIO_SC			0x48
+#define   IOSF_PORT_GPIO_SUS			0xa8
+#define   IOSF_PORT_CCU				0xa9
 #define VLV_IOSF_DATA				_MMIO(VLV_DISPLAY_BASE + 0x2104)
 #define VLV_IOSF_ADDR				_MMIO(VLV_DISPLAY_BASE + 0x2108)
 
@@ -1635,6 +1636,9 @@ enum skl_disp_power_wells {
 #define   RING_WAIT		(1<<11) /* gen3+, PRBx_CTL */
 #define   RING_WAIT_SEMAPHORE	(1<<10) /* gen6+ */
 
+#define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base)+0x4D0) + (i)*4)
+#define   RING_MAX_NONPRIV_SLOTS  12
+
 #define GEN7_TLB_RD_ADDR	_MMIO(0x4700)
 
 #if 0
@@ -1711,6 +1715,11 @@ enum skl_disp_power_wells {
 #define FPGA_DBG		_MMIO(0x42300)
 #define   FPGA_DBG_RM_NOCLAIM	(1<<31)
 
+#define CLAIM_ER		_MMIO(VLV_DISPLAY_BASE + 0x2028)
+#define   CLAIM_ER_CLR		(1 << 31)
+#define   CLAIM_ER_OVERFLOW	(1 << 16)
+#define   CLAIM_ER_CTR_MASK	0xffff
+
 #define DERRMR		_MMIO(0x44050)
 /* Note that HBLANK events are reserved on bdw+ */
 #define   DERRMR_PIPEA_SCANLINE		(1<<0)
@@ -5941,6 +5950,7 @@ enum skl_disp_power_wells {
 #define  ILK_INTERNAL_GRAPHICS_DISABLE	(1 << 31)
 #define  ILK_INTERNAL_DISPLAY_DISABLE	(1 << 30)
 #define  ILK_DISPLAY_DEBUG_DISABLE	(1 << 29)
+#define  IVB_PIPE_C_DISABLE		(1 << 28)
 #define  ILK_HDCP_DISABLE		(1 << 25)
 #define  ILK_eDP_A_DISABLE		(1 << 24)
 #define  HSW_CDCLK_LIMIT		(1 << 24)
@@ -5987,10 +5997,19 @@ enum skl_disp_power_wells {
 #define SKL_DFSM_CDCLK_LIMIT_540	(1 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_450	(2 << 23)
 #define SKL_DFSM_CDCLK_LIMIT_337_5	(3 << 23)
+#define SKL_DFSM_PIPE_A_DISABLE		(1 << 30)
+#define SKL_DFSM_PIPE_B_DISABLE		(1 << 21)
+#define SKL_DFSM_PIPE_C_DISABLE		(1 << 28)
+
+#define GEN7_FF_SLICE_CS_CHICKEN1	_MMIO(0x20e0)
+#define   GEN9_FFSC_PERCTX_PREEMPT_CTRL	(1<<14)
 
 #define FF_SLICE_CS_CHICKEN2			_MMIO(0x20e4)
 #define  GEN9_TSG_BARRIER_ACK_DISABLE		(1<<8)
 
+#define GEN9_CS_DEBUG_MODE1		_MMIO(0x20ec)
+#define GEN8_CS_CHICKEN1		_MMIO(0x2580)
+
 /* GEN7 chicken */
 #define GEN7_COMMON_SLICE_CHICKEN1		_MMIO(0x7010)
 # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC	((1<<10) | (1<<26))
@@ -6036,6 +6055,8 @@ enum skl_disp_power_wells {
 #define  HDC_FORCE_NON_COHERENT			(1<<4)
 #define  HDC_BARRIER_PERFORMANCE_DISABLE	(1<<10)
 
+#define GEN8_HDC_CHICKEN1			_MMIO(0x7304)
+
 /* GEN9 chicken */
 #define SLICE_ECO_CHICKEN0			_MMIO(0x7308)
 #define   PIXEL_MASK_CAMMING_DISABLE		(1 << 14)
@@ -6766,6 +6787,16 @@ enum skl_disp_power_wells {
 
 #define  VLV_PMWGICZ				_MMIO(0x1300a4)
 
+#define  RC6_LOCATION				_MMIO(0xD40)
+#define	   RC6_CTX_IN_DRAM			(1 << 0)
+#define  RC6_CTX_BASE				_MMIO(0xD48)
+#define    RC6_CTX_BASE_MASK			0xFFFFFFF0
+#define  PWRCTX_MAXCNT_RCSUNIT			_MMIO(0x2054)
+#define  PWRCTX_MAXCNT_VCSUNIT0			_MMIO(0x12054)
+#define  PWRCTX_MAXCNT_BCSUNIT			_MMIO(0x22054)
+#define  PWRCTX_MAXCNT_VECSUNIT			_MMIO(0x1A054)
+#define  PWRCTX_MAXCNT_VCSUNIT1			_MMIO(0x1C054)
+#define    IDLE_TIME_MASK			0xFFFFF
 #define  FORCEWAKE				_MMIO(0xA18C)
 #define  FORCEWAKE_VLV				_MMIO(0x1300b0)
 #define  FORCEWAKE_ACK_VLV			_MMIO(0x1300b4)
@@ -6904,6 +6935,7 @@ enum skl_disp_power_wells {
 #define GEN6_RPDEUC				_MMIO(0xA084)
 #define GEN6_RPDEUCSW				_MMIO(0xA088)
 #define GEN6_RC_STATE				_MMIO(0xA094)
+#define   RC6_STATE				(1 << 18)
 #define GEN6_RC1_WAKE_RATE_LIMIT		_MMIO(0xA098)
 #define GEN6_RC6_WAKE_RATE_LIMIT		_MMIO(0xA09C)
 #define GEN6_RC6pp_WAKE_RATE_LIMIT		_MMIO(0xA0A0)
@@ -7536,6 +7568,7 @@ enum skl_disp_power_wells {
 #define  DC_STATE_EN_UPTO_DC5_DC6_MASK   0x3
 
 #define  DC_STATE_DEBUG                  _MMIO(0x45520)
+#define  DC_STATE_DEBUG_MASK_CORES	(1<<0)
 #define  DC_STATE_DEBUG_MASK_MEMORY_UP	(1<<1)
 
 /* Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register,
@@ -8155,4 +8188,11 @@ enum skl_disp_power_wells {
 #define GEN9_VEBOX_MOCS(i)	_MMIO(0xcb00 + (i) * 4)	/* Video MOCS registers */
 #define GEN9_BLT_MOCS(i)	_MMIO(0xcc00 + (i) * 4)	/* Blitter MOCS registers */
 
+/* gamt regs */
+#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
+#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW  0x67F1427F /* max/min for LRA1/2 */
+#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV  0x5FF101FF /* max/min for LRA1/2 */
+#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL  0x67F1427F /*    "        " */
+#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT  0x5FF101FF /*    "        " */
+
 #endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index a8af594fbd00..34e061a9ef06 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -92,7 +92,7 @@ static void i915_restore_display(struct drm_device *dev)
 	}
 
 	/* only restore FBC info on the platform that supports FBC*/
-	intel_fbc_disable(dev_priv);
+	intel_fbc_global_disable(dev_priv);
 
 	/* restore FBC interval */
 	if (HAS_FBC(dev) && INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev))
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 37e3f0ddf8e0..c6188dddb341 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -164,7 +164,7 @@ i915_l3_read(struct file *filp, struct kobject *kobj,
 	     struct bin_attribute *attr, char *buf,
 	     loff_t offset, size_t count)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct drm_minor *dminor = dev_to_drm_minor(dev);
 	struct drm_device *drm_dev = dminor->dev;
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
@@ -200,7 +200,7 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	      struct bin_attribute *attr, char *buf,
 	      loff_t offset, size_t count)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
+	struct device *dev = kobj_to_dev(kobj);
 	struct drm_minor *dminor = dev_to_drm_minor(dev);
 	struct drm_device *drm_dev = dminor->dev;
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
@@ -521,7 +521,7 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 				loff_t off, size_t count)
 {
 
-	struct device *kdev = container_of(kobj, struct device, kobj);
+	struct device *kdev = kobj_to_dev(kobj);
 	struct drm_minor *minor = dev_to_drm_minor(kdev);
 	struct drm_device *dev = minor->dev;
 	struct i915_error_state_file_priv error_priv;
@@ -556,7 +556,7 @@ static ssize_t error_state_write(struct file *file, struct kobject *kobj,
 				 struct bin_attribute *attr, char *buf,
 				 loff_t off, size_t count)
 {
-	struct device *kdev = container_of(kobj, struct device, kobj);
+	struct device *kdev = kobj_to_dev(kobj);
 	struct drm_minor *minor = dev_to_drm_minor(kdev);
 	struct drm_device *dev = minor->dev;
 	int ret;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 52b2d409945d..fa09e5581137 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -175,35 +175,24 @@ TRACE_EVENT(i915_vma_unbind,
 		      __entry->obj, __entry->offset, __entry->size, __entry->vm)
 );
 
-#define VM_TO_TRACE_NAME(vm) \
-	(i915_is_ggtt(vm) ? "G" : \
-		      "P")
-
-DECLARE_EVENT_CLASS(i915_va,
-	TP_PROTO(struct i915_address_space *vm, u64 start, u64 length, const char *name),
-	TP_ARGS(vm, start, length, name),
+TRACE_EVENT(i915_va_alloc,
+	TP_PROTO(struct i915_vma *vma),
+	TP_ARGS(vma),
 
 	TP_STRUCT__entry(
 		__field(struct i915_address_space *, vm)
 		__field(u64, start)
 		__field(u64, end)
-		__string(name, name)
 	),
 
 	TP_fast_assign(
-		__entry->vm = vm;
-		__entry->start = start;
-		__entry->end = start + length - 1;
-		__assign_str(name, name);
+		__entry->vm = vma->vm;
+		__entry->start = vma->node.start;
+		__entry->end = vma->node.start + vma->node.size - 1;
 	),
 
-	TP_printk("vm=%p (%s), 0x%llx-0x%llx",
-		  __entry->vm, __get_str(name),  __entry->start, __entry->end)
-);
-
-DEFINE_EVENT(i915_va, i915_va_alloc,
-	     TP_PROTO(struct i915_address_space *vm, u64 start, u64 length, const char *name),
-	     TP_ARGS(vm, start, length, name)
+	TP_printk("vm=%p (%c), 0x%llx-0x%llx",
+		  __entry->vm, i915_is_ggtt(__entry->vm) ? 'G' : 'P',  __entry->start, __entry->end)
 );
 
 DECLARE_EVENT_CLASS(i915_px_entry,
diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c
index d0b1c9afa35e..8e579a8505ac 100644
--- a/drivers/gpu/drm/i915/intel_atomic.c
+++ b/drivers/gpu/drm/i915/intel_atomic.c
@@ -97,6 +97,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc)
 	crtc_state->disable_lp_wm = false;
 	crtc_state->disable_cxsr = false;
 	crtc_state->wm_changed = false;
+	crtc_state->fb_changed = false;
 
 	return &crtc_state->base;
 }
@@ -308,5 +309,5 @@ void intel_atomic_state_clear(struct drm_atomic_state *s)
 {
 	struct intel_atomic_state *state = to_intel_atomic_state(s);
 	drm_atomic_state_default_clear(&state->base);
-	state->dpll_set = false;
+	state->dpll_set = state->modeset = false;
 }
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index c6bb0fc1edfb..e0b851a0004a 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -152,9 +152,9 @@ static int intel_plane_atomic_check(struct drm_plane *plane,
 	intel_state->clip.x1 = 0;
 	intel_state->clip.y1 = 0;
 	intel_state->clip.x2 =
-		crtc_state->base.active ? crtc_state->pipe_src_w : 0;
+		crtc_state->base.enable ? crtc_state->pipe_src_w : 0;
 	intel_state->clip.y2 =
-		crtc_state->base.active ? crtc_state->pipe_src_h : 0;
+		crtc_state->base.enable ? crtc_state->pipe_src_h : 0;
 
 	if (state->fb && intel_rotation_90_or_270(state->rotation)) {
 		if (!(state->fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
@@ -194,8 +194,16 @@ static void intel_plane_atomic_update(struct drm_plane *plane,
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct intel_plane_state *intel_state =
 		to_intel_plane_state(plane->state);
+	struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc;
+	struct drm_crtc_state *crtc_state =
+		drm_atomic_get_existing_crtc_state(old_state->state, crtc);
 
-	intel_plane->commit_plane(plane, intel_state);
+	if (intel_state->visible)
+		intel_plane->update_plane(plane,
+					  to_intel_crtc_state(crtc_state),
+					  intel_state);
+	else
+		intel_plane->disable_plane(plane, crtc);
 }
 
 const struct drm_plane_helper_funcs intel_plane_helper_funcs = {
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index eba3e0f87181..bf62a19c8f69 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -31,11 +31,49 @@
 #include "i915_drv.h"
 #include "intel_bios.h"
 
+/**
+ * DOC: Video BIOS Table (VBT)
+ *
+ * The Video BIOS Table, or VBT, provides platform and board specific
+ * configuration information to the driver that is not discoverable or available
+ * through other means. The configuration is mostly related to display
+ * hardware. The VBT is available via the ACPI OpRegion or, on older systems, in
+ * the PCI ROM.
+ *
+ * The VBT consists of a VBT Header (defined as &struct vbt_header), a BDB
+ * Header (&struct bdb_header), and a number of BIOS Data Blocks (BDB) that
+ * contain the actual configuration information. The VBT Header, and thus the
+ * VBT, begins with "$VBT" signature. The VBT Header contains the offset of the
+ * BDB Header. The data blocks are concatenated after the BDB Header. The data
+ * blocks have a 1-byte Block ID, 2-byte Block Size, and Block Size bytes of
+ * data. (Block 53, the MIPI Sequence Block is an exception.)
+ *
+ * The driver parses the VBT during load. The relevant information is stored in
+ * driver private data for ease of use, and the actual VBT is not read after
+ * that.
+ */
+
 #define	SLAVE_ADDR1	0x70
 #define	SLAVE_ADDR2	0x72
 
 static int panel_type;
 
+/* Get BDB block size given a pointer to Block ID. */
+static u32 _get_blocksize(const u8 *block_base)
+{
+	/* The MIPI Sequence Block v3+ has a separate size field. */
+	if (*block_base == BDB_MIPI_SEQUENCE && *(block_base + 3) >= 3)
+		return *((const u32 *)(block_base + 4));
+	else
+		return *((const u16 *)(block_base + 1));
+}
+
+/* Get BDB block size give a pointer to data after Block ID and Block Size. */
+static u32 get_blocksize(const void *block_data)
+{
+	return _get_blocksize(block_data - 3);
+}
+
 static const void *
 find_section(const void *_bdb, int section_id)
 {
@@ -52,14 +90,8 @@ find_section(const void *_bdb, int section_id)
 	/* walk the sections looking for section_id */
 	while (index + 3 < total) {
 		current_id = *(base + index);
-		index++;
-
-		current_size = *((const u16 *)(base + index));
-		index += 2;
-
-		/* The MIPI Sequence Block v3+ has a separate size field. */
-		if (current_id == BDB_MIPI_SEQUENCE && *(base + index) >= 3)
-			current_size = *((const u32 *)(base + index + 1));
+		current_size = _get_blocksize(base + index);
+		index += 3;
 
 		if (index + current_size > total)
 			return NULL;
@@ -73,16 +105,6 @@ find_section(const void *_bdb, int section_id)
 	return NULL;
 }
 
-static u16
-get_blocksize(const void *p)
-{
-	u16 *block_ptr, block_size;
-
-	block_ptr = (u16 *)((char *)p - 2);
-	block_size = *block_ptr;
-	return block_size;
-}
-
 static void
 fill_detail_timing_data(struct drm_display_mode *panel_fixed_mode,
 			const struct lvds_dvo_timing *dvo_timing)
@@ -675,84 +697,13 @@ parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
 	dev_priv->vbt.psr.tp2_tp3_wakeup_time = psr_table->tp2_tp3_wakeup_time;
 }
 
-static u8 *goto_next_sequence(u8 *data, int *size)
-{
-	u16 len;
-	int tmp = *size;
-
-	if (--tmp < 0)
-		return NULL;
-
-	/* goto first element */
-	data++;
-	while (1) {
-		switch (*data) {
-		case MIPI_SEQ_ELEM_SEND_PKT:
-			/*
-			 * skip by this element payload size
-			 * skip elem id, command flag and data type
-			 */
-			tmp -= 5;
-			if (tmp < 0)
-				return NULL;
-
-			data += 3;
-			len = *((u16 *)data);
-
-			tmp -= len;
-			if (tmp < 0)
-				return NULL;
-
-			/* skip by len */
-			data = data + 2 + len;
-			break;
-		case MIPI_SEQ_ELEM_DELAY:
-			/* skip by elem id, and delay is 4 bytes */
-			tmp -= 5;
-			if (tmp < 0)
-				return NULL;
-
-			data += 5;
-			break;
-		case MIPI_SEQ_ELEM_GPIO:
-			tmp -= 3;
-			if (tmp < 0)
-				return NULL;
-
-			data += 3;
-			break;
-		default:
-			DRM_ERROR("Unknown element\n");
-			return NULL;
-		}
-
-		/* end of sequence ? */
-		if (*data == 0)
-			break;
-	}
-
-	/* goto next sequence or end of block byte */
-	if (--tmp < 0)
-		return NULL;
-
-	data++;
-
-	/* update amount of data left for the sequence block to be parsed */
-	*size = tmp;
-	return data;
-}
-
 static void
-parse_mipi(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
+parse_mipi_config(struct drm_i915_private *dev_priv,
+		  const struct bdb_header *bdb)
 {
 	const struct bdb_mipi_config *start;
-	const struct bdb_mipi_sequence *sequence;
 	const struct mipi_config *config;
 	const struct mipi_pps_data *pps;
-	u8 *data;
-	const u8 *seq_data;
-	int i, panel_id, seq_size;
-	u16 block_size;
 
 	/* parse MIPI blocks only if LFP type is MIPI */
 	if (!dev_priv->vbt.has_mipi)
@@ -798,104 +749,233 @@ parse_mipi(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
 
 	/* We have mandatory mipi config blocks. Initialize as generic panel */
 	dev_priv->vbt.dsi.panel_id = MIPI_DSI_GENERIC_PANEL_ID;
+}
 
-	/* Check if we have sequence block as well */
-	sequence = find_section(bdb, BDB_MIPI_SEQUENCE);
-	if (!sequence) {
-		DRM_DEBUG_KMS("No MIPI Sequence found, parsing complete\n");
-		return;
+/* Find the sequence block and size for the given panel. */
+static const u8 *
+find_panel_sequence_block(const struct bdb_mipi_sequence *sequence,
+			  u16 panel_id, u32 *seq_size)
+{
+	u32 total = get_blocksize(sequence);
+	const u8 *data = &sequence->data[0];
+	u8 current_id;
+	u32 current_size;
+	int header_size = sequence->version >= 3 ? 5 : 3;
+	int index = 0;
+	int i;
+
+	/* skip new block size */
+	if (sequence->version >= 3)
+		data += 4;
+
+	for (i = 0; i < MAX_MIPI_CONFIGURATIONS && index < total; i++) {
+		if (index + header_size > total) {
+			DRM_ERROR("Invalid sequence block (header)\n");
+			return NULL;
+		}
+
+		current_id = *(data + index);
+		if (sequence->version >= 3)
+			current_size = *((const u32 *)(data + index + 1));
+		else
+			current_size = *((const u16 *)(data + index + 1));
+
+		index += header_size;
+
+		if (index + current_size > total) {
+			DRM_ERROR("Invalid sequence block\n");
+			return NULL;
+		}
+
+		if (current_id == panel_id) {
+			*seq_size = current_size;
+			return data + index;
+		}
+
+		index += current_size;
 	}
 
-	/* Fail gracefully for forward incompatible sequence block. */
-	if (sequence->version >= 3) {
-		DRM_ERROR("Unable to parse MIPI Sequence Block v3+\n");
-		return;
+	DRM_ERROR("Sequence block detected but no valid configuration\n");
+
+	return NULL;
+}
+
+static int goto_next_sequence(const u8 *data, int index, int total)
+{
+	u16 len;
+
+	/* Skip Sequence Byte. */
+	for (index = index + 1; index < total; index += len) {
+		u8 operation_byte = *(data + index);
+		index++;
+
+		switch (operation_byte) {
+		case MIPI_SEQ_ELEM_END:
+			return index;
+		case MIPI_SEQ_ELEM_SEND_PKT:
+			if (index + 4 > total)
+				return 0;
+
+			len = *((const u16 *)(data + index + 2)) + 4;
+			break;
+		case MIPI_SEQ_ELEM_DELAY:
+			len = 4;
+			break;
+		case MIPI_SEQ_ELEM_GPIO:
+			len = 2;
+			break;
+		case MIPI_SEQ_ELEM_I2C:
+			if (index + 7 > total)
+				return 0;
+			len = *(data + index + 6) + 7;
+			break;
+		default:
+			DRM_ERROR("Unknown operation byte\n");
+			return 0;
+		}
 	}
 
-	DRM_DEBUG_DRIVER("Found MIPI sequence block\n");
+	return 0;
+}
 
-	block_size = get_blocksize(sequence);
+static int goto_next_sequence_v3(const u8 *data, int index, int total)
+{
+	int seq_end;
+	u16 len;
+	u32 size_of_sequence;
 
 	/*
-	 * parse the sequence block for individual sequences
+	 * Could skip sequence based on Size of Sequence alone, but also do some
+	 * checking on the structure.
 	 */
-	dev_priv->vbt.dsi.seq_version = sequence->version;
+	if (total < 5) {
+		DRM_ERROR("Too small sequence size\n");
+		return 0;
+	}
 
-	seq_data = &sequence->data[0];
+	/* Skip Sequence Byte. */
+	index++;
 
 	/*
-	 * sequence block is variable length and hence we need to parse and
-	 * get the sequence data for specific panel id
+	 * Size of Sequence. Excludes the Sequence Byte and the size itself,
+	 * includes MIPI_SEQ_ELEM_END byte, excludes the final MIPI_SEQ_END
+	 * byte.
 	 */
-	for (i = 0; i < MAX_MIPI_CONFIGURATIONS; i++) {
-		panel_id = *seq_data;
-		seq_size = *((u16 *) (seq_data + 1));
-		if (panel_id == panel_type)
-			break;
+	size_of_sequence = *((const uint32_t *)(data + index));
+	index += 4;
 
-		/* skip the sequence including seq header of 3 bytes */
-		seq_data = seq_data + 3 + seq_size;
-		if ((seq_data - &sequence->data[0]) > block_size) {
-			DRM_ERROR("Sequence start is beyond sequence block size, corrupted sequence block\n");
-			return;
+	seq_end = index + size_of_sequence;
+	if (seq_end > total) {
+		DRM_ERROR("Invalid sequence size\n");
+		return 0;
+	}
+
+	for (; index < total; index += len) {
+		u8 operation_byte = *(data + index);
+		index++;
+
+		if (operation_byte == MIPI_SEQ_ELEM_END) {
+			if (index != seq_end) {
+				DRM_ERROR("Invalid element structure\n");
+				return 0;
+			}
+			return index;
+		}
+
+		len = *(data + index);
+		index++;
+
+		/*
+		 * FIXME: Would be nice to check elements like for v1/v2 in
+		 * goto_next_sequence() above.
+		 */
+		switch (operation_byte) {
+		case MIPI_SEQ_ELEM_SEND_PKT:
+		case MIPI_SEQ_ELEM_DELAY:
+		case MIPI_SEQ_ELEM_GPIO:
+		case MIPI_SEQ_ELEM_I2C:
+		case MIPI_SEQ_ELEM_SPI:
+		case MIPI_SEQ_ELEM_PMIC:
+			break;
+		default:
+			DRM_ERROR("Unknown operation byte %u\n",
+				  operation_byte);
+			break;
 		}
 	}
 
-	if (i == MAX_MIPI_CONFIGURATIONS) {
-		DRM_ERROR("Sequence block detected but no valid configuration\n");
+	return 0;
+}
+
+static void
+parse_mipi_sequence(struct drm_i915_private *dev_priv,
+		    const struct bdb_header *bdb)
+{
+	const struct bdb_mipi_sequence *sequence;
+	const u8 *seq_data;
+	u32 seq_size;
+	u8 *data;
+	int index = 0;
+
+	/* Only our generic panel driver uses the sequence block. */
+	if (dev_priv->vbt.dsi.panel_id != MIPI_DSI_GENERIC_PANEL_ID)
+		return;
+
+	sequence = find_section(bdb, BDB_MIPI_SEQUENCE);
+	if (!sequence) {
+		DRM_DEBUG_KMS("No MIPI Sequence found, parsing complete\n");
 		return;
 	}
 
-	/* check if found sequence is completely within the sequence block
-	 * just being paranoid */
-	if (seq_size > block_size) {
-		DRM_ERROR("Corrupted sequence/size, bailing out\n");
+	/* Fail gracefully for forward incompatible sequence block. */
+	if (sequence->version >= 4) {
+		DRM_ERROR("Unable to parse MIPI Sequence Block v%u\n",
+			  sequence->version);
 		return;
 	}
 
-	/* skip the panel id(1 byte) and seq size(2 bytes) */
-	dev_priv->vbt.dsi.data = kmemdup(seq_data + 3, seq_size, GFP_KERNEL);
-	if (!dev_priv->vbt.dsi.data)
+	DRM_DEBUG_DRIVER("Found MIPI sequence block v%u\n", sequence->version);
+
+	seq_data = find_panel_sequence_block(sequence, panel_type, &seq_size);
+	if (!seq_data)
 		return;
 
-	/*
-	 * loop into the sequence data and split into multiple sequneces
-	 * There are only 5 types of sequences as of now
-	 */
-	data = dev_priv->vbt.dsi.data;
-	dev_priv->vbt.dsi.size = seq_size;
+	data = kmemdup(seq_data, seq_size, GFP_KERNEL);
+	if (!data)
+		return;
 
-	/* two consecutive 0x00 indicate end of all sequences */
-	while (1) {
-		int seq_id = *data;
-		if (MIPI_SEQ_MAX > seq_id && seq_id > MIPI_SEQ_UNDEFINED) {
-			dev_priv->vbt.dsi.sequence[seq_id] = data;
-			DRM_DEBUG_DRIVER("Found mipi sequence - %d\n", seq_id);
-		} else {
-			DRM_ERROR("undefined sequence\n");
+	/* Parse the sequences, store pointers to each sequence. */
+	for (;;) {
+		u8 seq_id = *(data + index);
+		if (seq_id == MIPI_SEQ_END)
+			break;
+
+		if (seq_id >= MIPI_SEQ_MAX) {
+			DRM_ERROR("Unknown sequence %u\n", seq_id);
 			goto err;
 		}
 
-		/* partial parsing to skip elements */
-		data = goto_next_sequence(data, &seq_size);
+		dev_priv->vbt.dsi.sequence[seq_id] = data + index;
 
-		if (data == NULL) {
-			DRM_ERROR("Sequence elements going beyond block itself. Sequence block parsing failed\n");
+		if (sequence->version >= 3)
+			index = goto_next_sequence_v3(data, index, seq_size);
+		else
+			index = goto_next_sequence(data, index, seq_size);
+		if (!index) {
+			DRM_ERROR("Invalid sequence %u\n", seq_id);
 			goto err;
 		}
-
-		if (*data == 0)
-			break; /* end of sequence reached */
 	}
 
-	DRM_DEBUG_DRIVER("MIPI related vbt parsing complete\n");
+	dev_priv->vbt.dsi.data = data;
+	dev_priv->vbt.dsi.size = seq_size;
+	dev_priv->vbt.dsi.seq_version = sequence->version;
+
+	DRM_DEBUG_DRIVER("MIPI related VBT parsing complete\n");
 	return;
-err:
-	kfree(dev_priv->vbt.dsi.data);
-	dev_priv->vbt.dsi.data = NULL;
 
-	/* error during parsing so set all pointers to null
-	 * because of partial parsing */
+err:
+	kfree(data);
 	memset(dev_priv->vbt.dsi.sequence, 0, sizeof(dev_priv->vbt.dsi.sequence));
 }
 
@@ -1088,7 +1168,12 @@ parse_device_mapping(struct drm_i915_private *dev_priv,
 		DRM_DEBUG_KMS("No general definition block is found, no devices defined.\n");
 		return;
 	}
-	if (bdb->version < 195) {
+	if (bdb->version < 106) {
+		expected_size = 22;
+	} else if (bdb->version < 109) {
+		expected_size = 27;
+	} else if (bdb->version < 195) {
+		BUILD_BUG_ON(sizeof(struct old_child_dev_config) != 33);
 		expected_size = sizeof(struct old_child_dev_config);
 	} else if (bdb->version == 195) {
 		expected_size = 37;
@@ -1101,18 +1186,18 @@ parse_device_mapping(struct drm_i915_private *dev_priv,
 				 bdb->version, expected_size);
 	}
 
-	/* The legacy sized child device config is the minimum we need. */
-	if (p_defs->child_dev_size < sizeof(struct old_child_dev_config)) {
-		DRM_ERROR("Child device config size %u is too small.\n",
-			  p_defs->child_dev_size);
-		return;
-	}
-
 	/* Flag an error for unexpected size, but continue anyway. */
 	if (p_defs->child_dev_size != expected_size)
 		DRM_ERROR("Unexpected child device config size %u (expected %u for VBT version %u)\n",
 			  p_defs->child_dev_size, expected_size, bdb->version);
 
+	/* The legacy sized child device config is the minimum we need. */
+	if (p_defs->child_dev_size < sizeof(struct old_child_dev_config)) {
+		DRM_DEBUG_KMS("Child device config size %u is too small.\n",
+			      p_defs->child_dev_size);
+		return;
+	}
+
 	/* get the block size of general definitions */
 	block_size = get_blocksize(p_defs);
 	/* get the number of child device */
@@ -1285,7 +1370,7 @@ static const struct vbt_header *find_vbt(void __iomem *bios, size_t size)
 
 /**
  * intel_bios_init - find VBT and initialize settings from the BIOS
- * @dev: DRM device
+ * @dev_priv: i915 device instance
  *
  * Loads the Video BIOS and checks that the VBT exists.  Sets scratch registers
  * to appropriate values.
@@ -1337,7 +1422,8 @@ intel_bios_init(struct drm_i915_private *dev_priv)
 	parse_driver_features(dev_priv, bdb);
 	parse_edp(dev_priv, bdb);
 	parse_psr(dev_priv, bdb);
-	parse_mipi(dev_priv, bdb);
+	parse_mipi_config(dev_priv, bdb);
+	parse_mipi_sequence(dev_priv, bdb);
 	parse_ddi_ports(dev_priv, bdb);
 
 	if (bios)
diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
index 54eac1003a1e..350d4e0f75a4 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/intel_bios.h
@@ -25,25 +25,43 @@
  *
  */
 
-#ifndef _I830_BIOS_H_
-#define _I830_BIOS_H_
-
+#ifndef _INTEL_BIOS_H_
+#define _INTEL_BIOS_H_
+
+/**
+ * struct vbt_header - VBT Header structure
+ * @signature:		VBT signature, always starts with "$VBT"
+ * @version:		Version of this structure
+ * @header_size:	Size of this structure
+ * @vbt_size:		Size of VBT (VBT Header, BDB Header and data blocks)
+ * @vbt_checksum:	Checksum
+ * @reserved0:		Reserved
+ * @bdb_offset:		Offset of &struct bdb_header from beginning of VBT
+ * @aim_offset:		Offsets of add-in data blocks from beginning of VBT
+ */
 struct vbt_header {
-	u8 signature[20];		/**< Always starts with 'VBT$' */
-	u16 version;			/**< decimal */
-	u16 header_size;		/**< in bytes */
-	u16 vbt_size;			/**< in bytes */
+	u8 signature[20];
+	u16 version;
+	u16 header_size;
+	u16 vbt_size;
 	u8 vbt_checksum;
 	u8 reserved0;
-	u32 bdb_offset;			/**< from beginning of VBT */
-	u32 aim_offset[4];		/**< from beginning of VBT */
+	u32 bdb_offset;
+	u32 aim_offset[4];
 } __packed;
 
+/**
+ * struct bdb_header - BDB Header structure
+ * @signature:		BDB signature "BIOS_DATA_BLOCK"
+ * @version:		Version of the data block definitions
+ * @header_size:	Size of this structure
+ * @bdb_size:		Size of BDB (BDB Header and data blocks)
+ */
 struct bdb_header {
-	u8 signature[16];		/**< Always 'BIOS_DATA_BLOCK' */
-	u16 version;			/**< decimal */
-	u16 header_size;		/**< in bytes */
-	u16 bdb_size;			/**< in bytes */
+	u8 signature[16];
+	u16 version;
+	u16 header_size;
+	u16 bdb_size;
 } __packed;
 
 /* strictly speaking, this is a "skip" block, but it has interesting info */
@@ -936,21 +954,29 @@ struct bdb_mipi_sequence {
 
 /* MIPI Sequnece Block definitions */
 enum mipi_seq {
-	MIPI_SEQ_UNDEFINED = 0,
+	MIPI_SEQ_END = 0,
 	MIPI_SEQ_ASSERT_RESET,
 	MIPI_SEQ_INIT_OTP,
 	MIPI_SEQ_DISPLAY_ON,
 	MIPI_SEQ_DISPLAY_OFF,
 	MIPI_SEQ_DEASSERT_RESET,
+	MIPI_SEQ_BACKLIGHT_ON,		/* sequence block v2+ */
+	MIPI_SEQ_BACKLIGHT_OFF,		/* sequence block v2+ */
+	MIPI_SEQ_TEAR_ON,		/* sequence block v2+ */
+	MIPI_SEQ_TEAR_OFF,		/* sequence block v3+ */
+	MIPI_SEQ_POWER_ON,		/* sequence block v3+ */
+	MIPI_SEQ_POWER_OFF,		/* sequence block v3+ */
 	MIPI_SEQ_MAX
 };
 
 enum mipi_seq_element {
-	MIPI_SEQ_ELEM_UNDEFINED = 0,
+	MIPI_SEQ_ELEM_END = 0,
 	MIPI_SEQ_ELEM_SEND_PKT,
 	MIPI_SEQ_ELEM_DELAY,
 	MIPI_SEQ_ELEM_GPIO,
-	MIPI_SEQ_ELEM_STATUS,
+	MIPI_SEQ_ELEM_I2C,		/* sequence block v2+ */
+	MIPI_SEQ_ELEM_SPI,		/* sequence block v3+ */
+	MIPI_SEQ_ELEM_PMIC,		/* sequence block v3+ */
 	MIPI_SEQ_ELEM_MAX
 };
 
@@ -965,4 +991,4 @@ enum mipi_gpio_pin_index {
 	MIPI_GPIO_MAX
 };
 
-#endif /* _I830_BIOS_H_ */
+#endif /* _INTEL_BIOS_H_ */
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index a7b4a524fadd..505fc5cf26f8 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -213,9 +213,7 @@ static void pch_post_disable_crt(struct intel_encoder *encoder)
 
 static void intel_enable_crt(struct intel_encoder *encoder)
 {
-	struct intel_crt *crt = intel_encoder_to_crt(encoder);
-
-	intel_crt_set_dpms(encoder, crt->connector->base.dpms);
+	intel_crt_set_dpms(encoder, DRM_MODE_DPMS_ON);
 }
 
 static enum drm_mode_status
@@ -223,6 +221,7 @@ intel_crt_mode_valid(struct drm_connector *connector,
 		     struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
+	int max_dotclk = to_i915(dev)->max_dotclk_freq;
 
 	int max_clock = 0;
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
@@ -238,6 +237,9 @@ intel_crt_mode_valid(struct drm_connector *connector,
 	if (mode->clock > max_clock)
 		return MODE_CLOCK_HIGH;
 
+	if (mode->clock > max_dotclk)
+		return MODE_CLOCK_HIGH;
+
 	/* The FDI receiver on LPT only supports 8bpc and only has 2 lanes. */
 	if (HAS_PCH_LPT(dev) &&
 	    (ironlake_get_lanes_required(mode->clock, 270000, 24) > 2))
@@ -476,11 +478,10 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector)
 }
 
 static enum drm_connector_status
-intel_crt_load_detect(struct intel_crt *crt)
+intel_crt_load_detect(struct intel_crt *crt, uint32_t pipe)
 {
 	struct drm_device *dev = crt->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t pipe = to_intel_crtc(crt->base.base.crtc)->pipe;
 	uint32_t save_bclrpat;
 	uint32_t save_vtotal;
 	uint32_t vtotal, vactive;
@@ -649,7 +650,8 @@ intel_crt_detect(struct drm_connector *connector, bool force)
 		if (intel_crt_detect_ddc(connector))
 			status = connector_status_connected;
 		else if (INTEL_INFO(dev)->gen < 4)
-			status = intel_crt_load_detect(crt);
+			status = intel_crt_load_detect(crt,
+				to_intel_crtc(connector->state->crtc)->pipe);
 		else
 			status = connector_status_unknown;
 		intel_release_load_detect_pipe(connector, &tmp, &ctx);
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 647d85e77c2f..902054efb902 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -44,6 +44,8 @@
 #define I915_CSR_SKL "i915/skl_dmc_ver1.bin"
 #define I915_CSR_BXT "i915/bxt_dmc_ver1.bin"
 
+#define FIRMWARE_URL  "https://01.org/linuxgraphics/intel-linux-graphics-firmwares"
+
 MODULE_FIRMWARE(I915_CSR_SKL);
 MODULE_FIRMWARE(I915_CSR_BXT);
 
@@ -177,7 +179,8 @@ static const struct stepping_info kbl_stepping_info[] = {
 static const struct stepping_info skl_stepping_info[] = {
 	{'A', '0'}, {'B', '0'}, {'C', '0'},
 	{'D', '0'}, {'E', '0'}, {'F', '0'},
-	{'G', '0'}, {'H', '0'}, {'I', '0'}
+	{'G', '0'}, {'H', '0'}, {'I', '0'},
+	{'J', '0'}, {'K', '0'}
 };
 
 static const struct stepping_info bxt_stepping_info[] = {
@@ -217,19 +220,19 @@ static const struct stepping_info *intel_get_stepping_info(struct drm_device *de
  * Everytime display comes back from low power state this function is called to
  * copy the firmware from internal memory to registers.
  */
-void intel_csr_load_program(struct drm_i915_private *dev_priv)
+bool intel_csr_load_program(struct drm_i915_private *dev_priv)
 {
 	u32 *payload = dev_priv->csr.dmc_payload;
 	uint32_t i, fw_size;
 
 	if (!IS_GEN9(dev_priv)) {
 		DRM_ERROR("No CSR support available for this platform\n");
-		return;
+		return false;
 	}
 
 	if (!dev_priv->csr.dmc_payload) {
 		DRM_ERROR("Tried to program CSR with empty payload\n");
-		return;
+		return false;
 	}
 
 	fw_size = dev_priv->csr.dmc_fw_size;
@@ -242,6 +245,8 @@ void intel_csr_load_program(struct drm_i915_private *dev_priv)
 	}
 
 	dev_priv->csr.dc_state = 0;
+
+	return true;
 }
 
 static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
@@ -280,10 +285,11 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
 
 	csr->version = css_header->version;
 
-	if (IS_SKYLAKE(dev) && csr->version < SKL_CSR_VERSION_REQUIRED) {
+	if ((IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) &&
+	    csr->version < SKL_CSR_VERSION_REQUIRED) {
 		DRM_INFO("Refusing to load old Skylake DMC firmware v%u.%u,"
 			 " please upgrade to v%u.%u or later"
-			 " [https://01.org/linuxgraphics/intel-linux-graphics-firmwares].\n",
+			   " [" FIRMWARE_URL "].\n",
 			 CSR_VERSION_MAJOR(csr->version),
 			 CSR_VERSION_MINOR(csr->version),
 			 CSR_VERSION_MAJOR(SKL_CSR_VERSION_REQUIRED),
@@ -401,7 +407,10 @@ out:
 			 CSR_VERSION_MAJOR(csr->version),
 			 CSR_VERSION_MINOR(csr->version));
 	} else {
-		DRM_ERROR("Failed to load DMC firmware, disabling rpm\n");
+		dev_notice(dev_priv->dev->dev,
+			   "Failed to load DMC firmware"
+			   " [" FIRMWARE_URL "],"
+			   " disabling runtime power management.\n");
 	}
 
 	release_firmware(fw);
@@ -423,7 +432,7 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv)
 	if (!HAS_CSR(dev_priv))
 		return;
 
-	if (IS_SKYLAKE(dev_priv))
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		csr->fw_path = I915_CSR_SKL;
 	else if (IS_BROXTON(dev_priv))
 		csr->fw_path = I915_CSR_BXT;
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 084d5586585d..62de9f4bce09 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -133,38 +133,38 @@ static const struct ddi_buf_trans skl_ddi_translations_dp[] = {
 	{ 0x00002016, 0x000000A0, 0x0 },
 	{ 0x00005012, 0x0000009B, 0x0 },
 	{ 0x00007011, 0x00000088, 0x0 },
-	{ 0x80009010, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
+	{ 0x80009010, 0x000000C0, 0x1 },
 	{ 0x00002016, 0x0000009B, 0x0 },
 	{ 0x00005012, 0x00000088, 0x0 },
-	{ 0x80007011, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
+	{ 0x80007011, 0x000000C0, 0x1 },
 	{ 0x00002016, 0x000000DF, 0x0 },
-	{ 0x80005012, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
+	{ 0x80005012, 0x000000C0, 0x1 },
 };
 
 /* Skylake U */
 static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
 	{ 0x0000201B, 0x000000A2, 0x0 },
 	{ 0x00005012, 0x00000088, 0x0 },
-	{ 0x00007011, 0x00000087, 0x0 },
-	{ 0x80009010, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
+	{ 0x80007011, 0x000000CD, 0x0 },
+	{ 0x80009010, 0x000000C0, 0x1 },
 	{ 0x0000201B, 0x0000009D, 0x0 },
-	{ 0x80005012, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
-	{ 0x80007011, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
+	{ 0x80005012, 0x000000C0, 0x1 },
+	{ 0x80007011, 0x000000C0, 0x1 },
 	{ 0x00002016, 0x00000088, 0x0 },
-	{ 0x80005012, 0x000000C0, 0x1 },	/* Uses I_boost level 0x1 */
+	{ 0x80005012, 0x000000C0, 0x1 },
 };
 
 /* Skylake Y */
 static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = {
 	{ 0x00000018, 0x000000A2, 0x0 },
 	{ 0x00005012, 0x00000088, 0x0 },
-	{ 0x00007011, 0x00000087, 0x0 },
-	{ 0x80009010, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
+	{ 0x80007011, 0x000000CD, 0x0 },
+	{ 0x80009010, 0x000000C0, 0x3 },
 	{ 0x00000018, 0x0000009D, 0x0 },
-	{ 0x80005012, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
-	{ 0x80007011, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
+	{ 0x80005012, 0x000000C0, 0x3 },
+	{ 0x80007011, 0x000000C0, 0x3 },
 	{ 0x00000018, 0x00000088, 0x0 },
-	{ 0x80005012, 0x000000C0, 0x3 },	/* Uses I_boost level 0x3 */
+	{ 0x80005012, 0x000000C0, 0x3 },
 };
 
 /*
@@ -226,26 +226,26 @@ static const struct ddi_buf_trans skl_ddi_translations_hdmi[] = {
 	{ 0x00000018, 0x000000A1, 0x0 },
 	{ 0x00000018, 0x00000098, 0x0 },
 	{ 0x00004013, 0x00000088, 0x0 },
-	{ 0x00006012, 0x00000087, 0x0 },
+	{ 0x80006012, 0x000000CD, 0x1 },
 	{ 0x00000018, 0x000000DF, 0x0 },
-	{ 0x00003015, 0x00000087, 0x0 },	/* Default */
-	{ 0x00003015, 0x000000C7, 0x0 },
-	{ 0x00000018, 0x000000C7, 0x0 },
+	{ 0x80003015, 0x000000CD, 0x1 },	/* Default */
+	{ 0x80003015, 0x000000C0, 0x1 },
+	{ 0x80000018, 0x000000C0, 0x1 },
 };
 
 /* Skylake Y */
 static const struct ddi_buf_trans skl_y_ddi_translations_hdmi[] = {
 	{ 0x00000018, 0x000000A1, 0x0 },
 	{ 0x00005012, 0x000000DF, 0x0 },
-	{ 0x00007011, 0x00000084, 0x0 },
+	{ 0x80007011, 0x000000CB, 0x3 },
 	{ 0x00000018, 0x000000A4, 0x0 },
 	{ 0x00000018, 0x0000009D, 0x0 },
 	{ 0x00004013, 0x00000080, 0x0 },
-	{ 0x00006013, 0x000000C7, 0x0 },
+	{ 0x80006013, 0x000000C0, 0x3 },
 	{ 0x00000018, 0x0000008A, 0x0 },
-	{ 0x00003015, 0x000000C7, 0x0 },	/* Default */
-	{ 0x80003015, 0x000000C7, 0x7 },	/* Uses I_boost level 0x7 */
-	{ 0x00000018, 0x000000C7, 0x0 },
+	{ 0x80003015, 0x000000C0, 0x3 },	/* Default */
+	{ 0x80003015, 0x000000C0, 0x3 },
+	{ 0x80000018, 0x000000C0, 0x3 },
 };
 
 struct bxt_ddi_buf_trans {
@@ -301,8 +301,8 @@ static const struct bxt_ddi_buf_trans bxt_ddi_translations_hdmi[] = {
 	{ 154, 0x9A, 1, 128, true },	/* 9:	1200		0   */
 };
 
-static void bxt_ddi_vswing_sequence(struct drm_device *dev, u32 level,
-				    enum port port, int type);
+static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv,
+				    u32 level, enum port port, int type);
 
 static void ddi_get_encoder_port(struct intel_encoder *intel_encoder,
 				 struct intel_digital_port **dig_port,
@@ -342,81 +342,50 @@ enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder)
 	return port;
 }
 
-static bool
-intel_dig_port_supports_hdmi(const struct intel_digital_port *intel_dig_port)
-{
-	return i915_mmio_reg_valid(intel_dig_port->hdmi.hdmi_reg);
-}
-
-static const struct ddi_buf_trans *skl_get_buf_trans_dp(struct drm_device *dev,
-							int *n_entries)
+static const struct ddi_buf_trans *
+skl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries)
 {
-	const struct ddi_buf_trans *ddi_translations;
-
-	if (IS_SKL_ULX(dev) || IS_KBL_ULX(dev)) {
-		ddi_translations = skl_y_ddi_translations_dp;
+	if (IS_SKL_ULX(dev_priv) || IS_KBL_ULX(dev_priv)) {
 		*n_entries = ARRAY_SIZE(skl_y_ddi_translations_dp);
-	} else if (IS_SKL_ULT(dev) || IS_KBL_ULT(dev)) {
-		ddi_translations = skl_u_ddi_translations_dp;
+		return skl_y_ddi_translations_dp;
+	} else if (IS_SKL_ULT(dev_priv) || IS_KBL_ULT(dev_priv)) {
 		*n_entries = ARRAY_SIZE(skl_u_ddi_translations_dp);
+		return skl_u_ddi_translations_dp;
 	} else {
-		ddi_translations = skl_ddi_translations_dp;
 		*n_entries = ARRAY_SIZE(skl_ddi_translations_dp);
+		return skl_ddi_translations_dp;
 	}
-
-	return ddi_translations;
 }
 
-static const struct ddi_buf_trans *skl_get_buf_trans_edp(struct drm_device *dev,
-							 int *n_entries)
+static const struct ddi_buf_trans *
+skl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const struct ddi_buf_trans *ddi_translations;
-
-	if (IS_SKL_ULX(dev) || IS_KBL_ULX(dev)) {
-		if (dev_priv->edp_low_vswing) {
-			ddi_translations = skl_y_ddi_translations_edp;
+	if (dev_priv->edp_low_vswing) {
+		if (IS_SKL_ULX(dev_priv) || IS_KBL_ULX(dev_priv)) {
 			*n_entries = ARRAY_SIZE(skl_y_ddi_translations_edp);
-		} else {
-			ddi_translations = skl_y_ddi_translations_dp;
-			*n_entries = ARRAY_SIZE(skl_y_ddi_translations_dp);
-		}
-	} else if (IS_SKL_ULT(dev) || IS_KBL_ULT(dev)) {
-		if (dev_priv->edp_low_vswing) {
-			ddi_translations = skl_u_ddi_translations_edp;
+			return skl_y_ddi_translations_edp;
+		} else if (IS_SKL_ULT(dev_priv) || IS_KBL_ULT(dev_priv)) {
 			*n_entries = ARRAY_SIZE(skl_u_ddi_translations_edp);
+			return skl_u_ddi_translations_edp;
 		} else {
-			ddi_translations = skl_u_ddi_translations_dp;
-			*n_entries = ARRAY_SIZE(skl_u_ddi_translations_dp);
-		}
-	} else {
-		if (dev_priv->edp_low_vswing) {
-			ddi_translations = skl_ddi_translations_edp;
 			*n_entries = ARRAY_SIZE(skl_ddi_translations_edp);
-		} else {
-			ddi_translations = skl_ddi_translations_dp;
-			*n_entries = ARRAY_SIZE(skl_ddi_translations_dp);
+			return skl_ddi_translations_edp;
 		}
 	}
 
-	return ddi_translations;
+	return skl_get_buf_trans_dp(dev_priv, n_entries);
 }
 
 static const struct ddi_buf_trans *
-skl_get_buf_trans_hdmi(struct drm_device *dev,
-		       int *n_entries)
+skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries)
 {
-	const struct ddi_buf_trans *ddi_translations;
-
-	if (IS_SKL_ULX(dev) || IS_KBL_ULX(dev)) {
-		ddi_translations = skl_y_ddi_translations_hdmi;
+	if (IS_SKL_ULX(dev_priv) || IS_KBL_ULX(dev_priv)) {
 		*n_entries = ARRAY_SIZE(skl_y_ddi_translations_hdmi);
+		return skl_y_ddi_translations_hdmi;
 	} else {
-		ddi_translations = skl_ddi_translations_hdmi;
 		*n_entries = ARRAY_SIZE(skl_ddi_translations_hdmi);
+		return skl_ddi_translations_hdmi;
 	}
-
-	return ddi_translations;
 }
 
 /*
@@ -426,42 +395,52 @@ skl_get_buf_trans_hdmi(struct drm_device *dev,
  * in either FDI or DP modes only, as HDMI connections will work with both
  * of those
  */
-static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port,
-				      bool supports_hdmi)
+void intel_prepare_ddi_buffer(struct intel_encoder *encoder)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	u32 iboost_bit = 0;
 	int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry,
 	    size;
-	int hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
+	int hdmi_level;
+	enum port port;
 	const struct ddi_buf_trans *ddi_translations_fdi;
 	const struct ddi_buf_trans *ddi_translations_dp;
 	const struct ddi_buf_trans *ddi_translations_edp;
 	const struct ddi_buf_trans *ddi_translations_hdmi;
 	const struct ddi_buf_trans *ddi_translations;
 
-	if (IS_BROXTON(dev)) {
-		if (!supports_hdmi)
+	port = intel_ddi_get_encoder_port(encoder);
+	hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
+
+	if (IS_BROXTON(dev_priv)) {
+		if (encoder->type != INTEL_OUTPUT_HDMI)
 			return;
 
 		/* Vswing programming for HDMI */
-		bxt_ddi_vswing_sequence(dev, hdmi_level, port,
+		bxt_ddi_vswing_sequence(dev_priv, hdmi_level, port,
 					INTEL_OUTPUT_HDMI);
 		return;
-	} else if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev)) {
+	}
+
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
 		ddi_translations_fdi = NULL;
 		ddi_translations_dp =
-				skl_get_buf_trans_dp(dev, &n_dp_entries);
+				skl_get_buf_trans_dp(dev_priv, &n_dp_entries);
 		ddi_translations_edp =
-				skl_get_buf_trans_edp(dev, &n_edp_entries);
+				skl_get_buf_trans_edp(dev_priv, &n_edp_entries);
 		ddi_translations_hdmi =
-				skl_get_buf_trans_hdmi(dev, &n_hdmi_entries);
+				skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
 		hdmi_default_entry = 8;
 		/* If we're boosting the current, set bit 31 of trans1 */
 		if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level ||
 		    dev_priv->vbt.ddi_port_info[port].dp_boost_level)
 			iboost_bit = 1<<31;
-	} else if (IS_BROADWELL(dev)) {
+
+		if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP &&
+			    port != PORT_A && port != PORT_E &&
+			    n_edp_entries > 9))
+			n_edp_entries = 9;
+	} else if (IS_BROADWELL(dev_priv)) {
 		ddi_translations_fdi = bdw_ddi_translations_fdi;
 		ddi_translations_dp = bdw_ddi_translations_dp;
 		ddi_translations_edp = bdw_ddi_translations_edp;
@@ -470,7 +449,7 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port,
 		n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
 		n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
 		hdmi_default_entry = 7;
-	} else if (IS_HASWELL(dev)) {
+	} else if (IS_HASWELL(dev_priv)) {
 		ddi_translations_fdi = hsw_ddi_translations_fdi;
 		ddi_translations_dp = hsw_ddi_translations_dp;
 		ddi_translations_edp = hsw_ddi_translations_dp;
@@ -490,30 +469,18 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port,
 		hdmi_default_entry = 7;
 	}
 
-	switch (port) {
-	case PORT_A:
+	switch (encoder->type) {
+	case INTEL_OUTPUT_EDP:
 		ddi_translations = ddi_translations_edp;
 		size = n_edp_entries;
 		break;
-	case PORT_B:
-	case PORT_C:
+	case INTEL_OUTPUT_DISPLAYPORT:
+	case INTEL_OUTPUT_HDMI:
 		ddi_translations = ddi_translations_dp;
 		size = n_dp_entries;
 		break;
-	case PORT_D:
-		if (intel_dp_is_edp(dev, PORT_D)) {
-			ddi_translations = ddi_translations_edp;
-			size = n_edp_entries;
-		} else {
-			ddi_translations = ddi_translations_dp;
-			size = n_dp_entries;
-		}
-		break;
-	case PORT_E:
-		if (ddi_translations_fdi)
-			ddi_translations = ddi_translations_fdi;
-		else
-			ddi_translations = ddi_translations_dp;
+	case INTEL_OUTPUT_ANALOG:
+		ddi_translations = ddi_translations_fdi;
 		size = n_dp_entries;
 		break;
 	default:
@@ -527,7 +494,7 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port,
 			   ddi_translations[i].trans2);
 	}
 
-	if (!supports_hdmi)
+	if (encoder->type != INTEL_OUTPUT_HDMI)
 		return;
 
 	/* Choose a good default if VBT is badly populated */
@@ -542,37 +509,6 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port,
 		   ddi_translations_hdmi[hdmi_level].trans2);
 }
 
-/* Program DDI buffers translations for DP. By default, program ports A-D in DP
- * mode and port E for FDI.
- */
-void intel_prepare_ddi(struct drm_device *dev)
-{
-	struct intel_encoder *intel_encoder;
-	bool visited[I915_MAX_PORTS] = { 0, };
-
-	if (!HAS_DDI(dev))
-		return;
-
-	for_each_intel_encoder(dev, intel_encoder) {
-		struct intel_digital_port *intel_dig_port;
-		enum port port;
-		bool supports_hdmi;
-
-		if (intel_encoder->type == INTEL_OUTPUT_DSI)
-			continue;
-
-		ddi_get_encoder_port(intel_encoder, &intel_dig_port, &port);
-		if (visited[port])
-			continue;
-
-		supports_hdmi = intel_dig_port &&
-				intel_dig_port_supports_hdmi(intel_dig_port);
-
-		intel_prepare_ddi_buffers(dev, port, supports_hdmi);
-		visited[port] = true;
-	}
-}
-
 static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv,
 				    enum port port)
 {
@@ -601,8 +537,14 @@ void hsw_fdi_link_train(struct drm_crtc *crtc)
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_encoder *encoder;
 	u32 temp, i, rx_ctl_val;
 
+	for_each_encoder_on_crtc(dev, crtc, encoder) {
+		WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG);
+		intel_prepare_ddi_buffer(encoder);
+	}
+
 	/* Set the FDI_RX_MISC pwrdn lanes and the 2 workarounds listed at the
 	 * mode set "sequence for CRT port" document:
 	 * - TP1 to TP2 time with the default value
@@ -1604,8 +1546,10 @@ skl_ddi_pll_select(struct intel_crtc *intel_crtc,
 		}
 
 		cfgcr1 = cfgcr2 = 0;
-	} else /* eDP */
+	} else if (intel_encoder->type == INTEL_OUTPUT_EDP) {
 		return true;
+	} else
+		return false;
 
 	memset(&crtc_state->dpll_hw_state, 0,
 	       sizeof(crtc_state->dpll_hw_state));
@@ -2109,10 +2053,9 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc)
 			   TRANS_CLK_SEL_DISABLED);
 }
 
-static void skl_ddi_set_iboost(struct drm_device *dev, u32 level,
-			       enum port port, int type)
+static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
+			       u32 level, enum port port, int type)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	const struct ddi_buf_trans *ddi_translations;
 	uint8_t iboost;
 	uint8_t dp_iboost, hdmi_iboost;
@@ -2127,21 +2070,26 @@ static void skl_ddi_set_iboost(struct drm_device *dev, u32 level,
 		if (dp_iboost) {
 			iboost = dp_iboost;
 		} else {
-			ddi_translations = skl_get_buf_trans_dp(dev, &n_entries);
+			ddi_translations = skl_get_buf_trans_dp(dev_priv, &n_entries);
 			iboost = ddi_translations[level].i_boost;
 		}
 	} else if (type == INTEL_OUTPUT_EDP) {
 		if (dp_iboost) {
 			iboost = dp_iboost;
 		} else {
-			ddi_translations = skl_get_buf_trans_edp(dev, &n_entries);
+			ddi_translations = skl_get_buf_trans_edp(dev_priv, &n_entries);
+
+			if (WARN_ON(port != PORT_A &&
+				    port != PORT_E && n_entries > 9))
+				n_entries = 9;
+
 			iboost = ddi_translations[level].i_boost;
 		}
 	} else if (type == INTEL_OUTPUT_HDMI) {
 		if (hdmi_iboost) {
 			iboost = hdmi_iboost;
 		} else {
-			ddi_translations = skl_get_buf_trans_hdmi(dev, &n_entries);
+			ddi_translations = skl_get_buf_trans_hdmi(dev_priv, &n_entries);
 			iboost = ddi_translations[level].i_boost;
 		}
 	} else {
@@ -2166,10 +2114,9 @@ static void skl_ddi_set_iboost(struct drm_device *dev, u32 level,
 	I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg);
 }
 
-static void bxt_ddi_vswing_sequence(struct drm_device *dev, u32 level,
-				    enum port port, int type)
+static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv,
+				    u32 level, enum port port, int type)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	const struct bxt_ddi_buf_trans *ddi_translations;
 	u32 n_entries, i;
 	uint32_t val;
@@ -2284,7 +2231,7 @@ static uint32_t translate_signal_level(int signal_levels)
 uint32_t ddi_signal_levels(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
-	struct drm_device *dev = dport->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev);
 	struct intel_encoder *encoder = &dport->base;
 	uint8_t train_set = intel_dp->train_set[0];
 	int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK |
@@ -2294,10 +2241,10 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp)
 
 	level = translate_signal_level(signal_levels);
 
-	if (IS_SKYLAKE(dev) || IS_KABYLAKE(dev))
-		skl_ddi_set_iboost(dev, level, port, encoder->type);
-	else if (IS_BROXTON(dev))
-		bxt_ddi_vswing_sequence(dev, level, port, encoder->type);
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+		skl_ddi_set_iboost(dev_priv, level, port, encoder->type);
+	else if (IS_BROXTON(dev_priv))
+		bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type);
 
 	return DDI_BUF_TRANS_SELECT(level);
 }
@@ -2349,12 +2296,12 @@ void intel_ddi_clk_select(struct intel_encoder *encoder,
 static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
 {
 	struct drm_encoder *encoder = &intel_encoder->base;
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
 	struct intel_crtc *crtc = to_intel_crtc(encoder->crtc);
 	enum port port = intel_ddi_get_encoder_port(intel_encoder);
 	int type = intel_encoder->type;
-	int hdmi_level;
+
+	intel_prepare_ddi_buffer(intel_encoder);
 
 	if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
@@ -2372,17 +2319,11 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder)
 
 		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
 		intel_dp_start_link_train(intel_dp);
-		if (port != PORT_A || INTEL_INFO(dev)->gen >= 9)
+		if (port != PORT_A || INTEL_INFO(dev_priv)->gen >= 9)
 			intel_dp_stop_link_train(intel_dp);
 	} else if (type == INTEL_OUTPUT_HDMI) {
 		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 
-		if (IS_BROXTON(dev)) {
-			hdmi_level = dev_priv->vbt.
-				ddi_port_info[port].hdmi_level_shift;
-			bxt_ddi_vswing_sequence(dev, hdmi_level, port,
-					INTEL_OUTPUT_HDMI);
-		}
 		intel_hdmi->set_infoframes(encoder,
 					   crtc->config->has_hdmi_sink,
 					   &crtc->config->base.adjusted_mode);
@@ -3329,6 +3270,33 @@ void intel_ddi_init(struct drm_device *dev, enum port port)
 	struct intel_encoder *intel_encoder;
 	struct drm_encoder *encoder;
 	bool init_hdmi, init_dp;
+	int max_lanes;
+
+	if (I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES) {
+		switch (port) {
+		case PORT_A:
+			max_lanes = 4;
+			break;
+		case PORT_E:
+			max_lanes = 0;
+			break;
+		default:
+			max_lanes = 4;
+			break;
+		}
+	} else {
+		switch (port) {
+		case PORT_A:
+			max_lanes = 2;
+			break;
+		case PORT_E:
+			max_lanes = 2;
+			break;
+		default:
+			max_lanes = 4;
+			break;
+		}
+	}
 
 	init_hdmi = (dev_priv->vbt.ddi_port_info[port].supports_dvi ||
 		     dev_priv->vbt.ddi_port_info[port].supports_hdmi);
@@ -3373,9 +3341,12 @@ void intel_ddi_init(struct drm_device *dev, enum port port)
 		if (!(intel_dig_port->saved_port_bits & DDI_A_4_LANES)) {
 			DRM_DEBUG_KMS("BXT BIOS forgot to set DDI_A_4_LANES for port A; fixing\n");
 			intel_dig_port->saved_port_bits |= DDI_A_4_LANES;
+			max_lanes = 4;
 		}
 	}
 
+	intel_dig_port->max_lanes = max_lanes;
+
 	intel_encoder->type = INTEL_OUTPUT_UNKNOWN;
 	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
 	intel_encoder->cloneable = 0;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 46947fffd599..6e0d8283daa6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -85,8 +85,6 @@ static const uint32_t intel_cursor_formats[] = {
 	DRM_FORMAT_ARGB8888,
 };
 
-static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on);
-
 static void i9xx_crtc_clock_get(struct intel_crtc *crtc,
 				struct intel_crtc_state *pipe_config);
 static void ironlake_pch_clock_get(struct intel_crtc *crtc,
@@ -1152,11 +1150,6 @@ static void intel_wait_for_pipe_off(struct intel_crtc *crtc)
 	}
 }
 
-static const char *state_string(bool enabled)
-{
-	return enabled ? "on" : "off";
-}
-
 /* Only for pre-ILK configs */
 void assert_pll(struct drm_i915_private *dev_priv,
 		enum pipe pipe, bool state)
@@ -1168,7 +1161,7 @@ void assert_pll(struct drm_i915_private *dev_priv,
 	cur_state = !!(val & DPLL_VCO_ENABLE);
 	I915_STATE_WARN(cur_state != state,
 	     "PLL state assertion failure (expected %s, current %s)\n",
-	     state_string(state), state_string(cur_state));
+			onoff(state), onoff(cur_state));
 }
 
 /* XXX: the dsi pll is shared between MIPI DSI ports */
@@ -1184,7 +1177,7 @@ static void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state)
 	cur_state = val & DSI_PLL_VCO_EN;
 	I915_STATE_WARN(cur_state != state,
 	     "DSI PLL state assertion failure (expected %s, current %s)\n",
-	     state_string(state), state_string(cur_state));
+			onoff(state), onoff(cur_state));
 }
 #define assert_dsi_pll_enabled(d) assert_dsi_pll(d, true)
 #define assert_dsi_pll_disabled(d) assert_dsi_pll(d, false)
@@ -1208,14 +1201,13 @@ void assert_shared_dpll(struct drm_i915_private *dev_priv,
 	bool cur_state;
 	struct intel_dpll_hw_state hw_state;
 
-	if (WARN (!pll,
-		  "asserting DPLL %s with no DPLL\n", state_string(state)))
+	if (WARN(!pll, "asserting DPLL %s with no DPLL\n", onoff(state)))
 		return;
 
 	cur_state = pll->get_hw_state(dev_priv, pll, &hw_state);
 	I915_STATE_WARN(cur_state != state,
 	     "%s assertion failure (expected %s, current %s)\n",
-	     pll->name, state_string(state), state_string(cur_state));
+			pll->name, onoff(state), onoff(cur_state));
 }
 
 static void assert_fdi_tx(struct drm_i915_private *dev_priv,
@@ -1235,7 +1227,7 @@ static void assert_fdi_tx(struct drm_i915_private *dev_priv,
 	}
 	I915_STATE_WARN(cur_state != state,
 	     "FDI TX state assertion failure (expected %s, current %s)\n",
-	     state_string(state), state_string(cur_state));
+			onoff(state), onoff(cur_state));
 }
 #define assert_fdi_tx_enabled(d, p) assert_fdi_tx(d, p, true)
 #define assert_fdi_tx_disabled(d, p) assert_fdi_tx(d, p, false)
@@ -1250,7 +1242,7 @@ static void assert_fdi_rx(struct drm_i915_private *dev_priv,
 	cur_state = !!(val & FDI_RX_ENABLE);
 	I915_STATE_WARN(cur_state != state,
 	     "FDI RX state assertion failure (expected %s, current %s)\n",
-	     state_string(state), state_string(cur_state));
+			onoff(state), onoff(cur_state));
 }
 #define assert_fdi_rx_enabled(d, p) assert_fdi_rx(d, p, true)
 #define assert_fdi_rx_disabled(d, p) assert_fdi_rx(d, p, false)
@@ -1282,7 +1274,7 @@ void assert_fdi_rx_pll(struct drm_i915_private *dev_priv,
 	cur_state = !!(val & FDI_RX_PLL_ENABLE);
 	I915_STATE_WARN(cur_state != state,
 	     "FDI RX PLL assertion failure (expected %s, current %s)\n",
-	     state_string(state), state_string(cur_state));
+			onoff(state), onoff(cur_state));
 }
 
 void assert_panel_unlocked(struct drm_i915_private *dev_priv,
@@ -1340,7 +1332,7 @@ static void assert_cursor(struct drm_i915_private *dev_priv,
 
 	I915_STATE_WARN(cur_state != state,
 	     "cursor on pipe %c assertion failure (expected %s, current %s)\n",
-	     pipe_name(pipe), state_string(state), state_string(cur_state));
+			pipe_name(pipe), onoff(state), onoff(cur_state));
 }
 #define assert_cursor_enabled(d, p) assert_cursor(d, p, true)
 #define assert_cursor_disabled(d, p) assert_cursor(d, p, false)
@@ -1370,7 +1362,7 @@ void assert_pipe(struct drm_i915_private *dev_priv,
 
 	I915_STATE_WARN(cur_state != state,
 	     "pipe %c assertion failure (expected %s, current %s)\n",
-	     pipe_name(pipe), state_string(state), state_string(cur_state));
+			pipe_name(pipe), onoff(state), onoff(cur_state));
 }
 
 static void assert_plane(struct drm_i915_private *dev_priv,
@@ -1383,7 +1375,7 @@ static void assert_plane(struct drm_i915_private *dev_priv,
 	cur_state = !!(val & DISPLAY_PLANE_ENABLE);
 	I915_STATE_WARN(cur_state != state,
 	     "plane %c assertion failure (expected %s, current %s)\n",
-	     plane_name(plane), state_string(state), state_string(cur_state));
+			plane_name(plane), onoff(state), onoff(cur_state));
 }
 
 #define assert_plane_enabled(d, p) assert_plane(d, p, true)
@@ -2156,6 +2148,17 @@ static void intel_enable_pipe(struct intel_crtc *crtc)
 
 	I915_WRITE(reg, val | PIPECONF_ENABLE);
 	POSTING_READ(reg);
+
+	/*
+	 * Until the pipe starts DSL will read as 0, which would cause
+	 * an apparent vblank timestamp jump, which messes up also the
+	 * frame count when it's derived from the timestamps. So let's
+	 * wait for the pipe to start properly before we call
+	 * drm_crtc_vblank_on()
+	 */
+	if (dev->max_vblank_count == 0 &&
+	    wait_for(intel_get_crtc_scanline(crtc) != crtc->scanline_offset, 50))
+		DRM_ERROR("pipe %c didn't start\n", pipe_name(pipe));
 }
 
 /**
@@ -2217,67 +2220,75 @@ static bool need_vtd_wa(struct drm_device *dev)
 	return false;
 }
 
-unsigned int
-intel_tile_height(struct drm_device *dev, uint32_t pixel_format,
-		  uint64_t fb_format_modifier, unsigned int plane)
+static unsigned int intel_tile_size(const struct drm_i915_private *dev_priv)
 {
-	unsigned int tile_height;
-	uint32_t pixel_bytes;
+	return IS_GEN2(dev_priv) ? 2048 : 4096;
+}
 
-	switch (fb_format_modifier) {
+static unsigned int intel_tile_width(const struct drm_i915_private *dev_priv,
+				     uint64_t fb_modifier, unsigned int cpp)
+{
+	switch (fb_modifier) {
 	case DRM_FORMAT_MOD_NONE:
-		tile_height = 1;
-		break;
+		return cpp;
 	case I915_FORMAT_MOD_X_TILED:
-		tile_height = IS_GEN2(dev) ? 16 : 8;
-		break;
+		if (IS_GEN2(dev_priv))
+			return 128;
+		else
+			return 512;
 	case I915_FORMAT_MOD_Y_TILED:
-		tile_height = 32;
-		break;
+		if (IS_GEN2(dev_priv) || HAS_128_BYTE_Y_TILING(dev_priv))
+			return 128;
+		else
+			return 512;
 	case I915_FORMAT_MOD_Yf_TILED:
-		pixel_bytes = drm_format_plane_cpp(pixel_format, plane);
-		switch (pixel_bytes) {
-		default:
+		switch (cpp) {
 		case 1:
-			tile_height = 64;
-			break;
+			return 64;
 		case 2:
 		case 4:
-			tile_height = 32;
-			break;
+			return 128;
 		case 8:
-			tile_height = 16;
-			break;
 		case 16:
-			WARN_ONCE(1,
-				  "128-bit pixels are not supported for display!");
-			tile_height = 16;
-			break;
+			return 256;
+		default:
+			MISSING_CASE(cpp);
+			return cpp;
 		}
 		break;
 	default:
-		MISSING_CASE(fb_format_modifier);
-		tile_height = 1;
-		break;
+		MISSING_CASE(fb_modifier);
+		return cpp;
 	}
+}
 
-	return tile_height;
+unsigned int intel_tile_height(const struct drm_i915_private *dev_priv,
+			       uint64_t fb_modifier, unsigned int cpp)
+{
+	if (fb_modifier == DRM_FORMAT_MOD_NONE)
+		return 1;
+	else
+		return intel_tile_size(dev_priv) /
+			intel_tile_width(dev_priv, fb_modifier, cpp);
 }
 
 unsigned int
 intel_fb_align_height(struct drm_device *dev, unsigned int height,
-		      uint32_t pixel_format, uint64_t fb_format_modifier)
+		      uint32_t pixel_format, uint64_t fb_modifier)
 {
-	return ALIGN(height, intel_tile_height(dev, pixel_format,
-					       fb_format_modifier, 0));
+	unsigned int cpp = drm_format_plane_cpp(pixel_format, 0);
+	unsigned int tile_height = intel_tile_height(to_i915(dev), fb_modifier, cpp);
+
+	return ALIGN(height, tile_height);
 }
 
 static void
 intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, struct drm_framebuffer *fb,
 			const struct drm_plane_state *plane_state)
 {
-	struct intel_rotation_info *info = &view->params.rotation_info;
-	unsigned int tile_height, tile_pitch;
+	struct drm_i915_private *dev_priv = to_i915(fb->dev);
+	struct intel_rotation_info *info = &view->params.rotated;
+	unsigned int tile_size, tile_width, tile_height, cpp;
 
 	*view = i915_ggtt_view_normal;
 
@@ -2295,26 +2306,28 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, struct drm_framebuffer *fb,
 	info->uv_offset = fb->offsets[1];
 	info->fb_modifier = fb->modifier[0];
 
-	tile_height = intel_tile_height(fb->dev, fb->pixel_format,
-					fb->modifier[0], 0);
-	tile_pitch = PAGE_SIZE / tile_height;
-	info->width_pages = DIV_ROUND_UP(fb->pitches[0], tile_pitch);
+	tile_size = intel_tile_size(dev_priv);
+
+	cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+	tile_width = intel_tile_width(dev_priv, fb->modifier[0], cpp);
+	tile_height = tile_size / tile_width;
+
+	info->width_pages = DIV_ROUND_UP(fb->pitches[0], tile_width);
 	info->height_pages = DIV_ROUND_UP(fb->height, tile_height);
-	info->size = info->width_pages * info->height_pages * PAGE_SIZE;
+	info->size = info->width_pages * info->height_pages * tile_size;
 
 	if (info->pixel_format == DRM_FORMAT_NV12) {
-		tile_height = intel_tile_height(fb->dev, fb->pixel_format,
-						fb->modifier[0], 1);
-		tile_pitch = PAGE_SIZE / tile_height;
-		info->width_pages_uv = DIV_ROUND_UP(fb->pitches[0], tile_pitch);
-		info->height_pages_uv = DIV_ROUND_UP(fb->height / 2,
-						     tile_height);
-		info->size_uv = info->width_pages_uv * info->height_pages_uv *
-				PAGE_SIZE;
+		cpp = drm_format_plane_cpp(fb->pixel_format, 1);
+		tile_width = intel_tile_width(dev_priv, fb->modifier[1], cpp);
+		tile_height = tile_size / tile_width;
+
+		info->width_pages_uv = DIV_ROUND_UP(fb->pitches[1], tile_width);
+		info->height_pages_uv = DIV_ROUND_UP(fb->height / 2, tile_height);
+		info->size_uv = info->width_pages_uv * info->height_pages_uv * tile_size;
 	}
 }
 
-static unsigned int intel_linear_alignment(struct drm_i915_private *dev_priv)
+static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv)
 {
 	if (INTEL_INFO(dev_priv)->gen >= 9)
 		return 256 * 1024;
@@ -2327,6 +2340,25 @@ static unsigned int intel_linear_alignment(struct drm_i915_private *dev_priv)
 		return 0;
 }
 
+static unsigned int intel_surf_alignment(const struct drm_i915_private *dev_priv,
+					 uint64_t fb_modifier)
+{
+	switch (fb_modifier) {
+	case DRM_FORMAT_MOD_NONE:
+		return intel_linear_alignment(dev_priv);
+	case I915_FORMAT_MOD_X_TILED:
+		if (INTEL_INFO(dev_priv)->gen >= 9)
+			return 256 * 1024;
+		return 0;
+	case I915_FORMAT_MOD_Y_TILED:
+	case I915_FORMAT_MOD_Yf_TILED:
+		return 1 * 1024 * 1024;
+	default:
+		MISSING_CASE(fb_modifier);
+		return 0;
+	}
+}
+
 int
 intel_pin_and_fence_fb_obj(struct drm_plane *plane,
 			   struct drm_framebuffer *fb,
@@ -2341,29 +2373,7 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane,
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	switch (fb->modifier[0]) {
-	case DRM_FORMAT_MOD_NONE:
-		alignment = intel_linear_alignment(dev_priv);
-		break;
-	case I915_FORMAT_MOD_X_TILED:
-		if (INTEL_INFO(dev)->gen >= 9)
-			alignment = 256 * 1024;
-		else {
-			/* pin() will align the object as required by fence */
-			alignment = 0;
-		}
-		break;
-	case I915_FORMAT_MOD_Y_TILED:
-	case I915_FORMAT_MOD_Yf_TILED:
-		if (WARN_ONCE(INTEL_INFO(dev)->gen < 9,
-			  "Y tiling bo slipped through, driver bug!\n"))
-			return -EINVAL;
-		alignment = 1 * 1024 * 1024;
-		break;
-	default:
-		MISSING_CASE(fb->modifier[0]);
-		return -EINVAL;
-	}
+	alignment = intel_surf_alignment(dev_priv, fb->modifier[0]);
 
 	intel_fill_fb_ggtt_view(&view, fb, plane_state);
 
@@ -2441,22 +2451,27 @@ static void intel_unpin_fb_obj(struct drm_framebuffer *fb,
 
 /* Computes the linear offset to the base tile and adjusts x, y. bytes per pixel
  * is assumed to be a power-of-two. */
-unsigned long intel_gen4_compute_page_offset(struct drm_i915_private *dev_priv,
-					     int *x, int *y,
-					     unsigned int tiling_mode,
-					     unsigned int cpp,
-					     unsigned int pitch)
-{
-	if (tiling_mode != I915_TILING_NONE) {
+u32 intel_compute_tile_offset(struct drm_i915_private *dev_priv,
+			      int *x, int *y,
+			      uint64_t fb_modifier,
+			      unsigned int cpp,
+			      unsigned int pitch)
+{
+	if (fb_modifier != DRM_FORMAT_MOD_NONE) {
+		unsigned int tile_size, tile_width, tile_height;
 		unsigned int tile_rows, tiles;
 
-		tile_rows = *y / 8;
-		*y %= 8;
+		tile_size = intel_tile_size(dev_priv);
+		tile_width = intel_tile_width(dev_priv, fb_modifier, cpp);
+		tile_height = tile_size / tile_width;
+
+		tile_rows = *y / tile_height;
+		*y %= tile_height;
 
-		tiles = *x / (512/cpp);
-		*x %= 512/cpp;
+		tiles = *x / (tile_width/cpp);
+		*x %= tile_width/cpp;
 
-		return tile_rows * pitch * 8 + tiles * 4096;
+		return tile_rows * pitch * tile_height + tiles * tile_size;
 	} else {
 		unsigned int alignment = intel_linear_alignment(dev_priv) - 1;
 		unsigned int offset;
@@ -2539,12 +2554,16 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 	if (size_aligned * 2 > dev_priv->gtt.stolen_usable_size)
 		return false;
 
+	mutex_lock(&dev->struct_mutex);
+
 	obj = i915_gem_object_create_stolen_for_preallocated(dev,
 							     base_aligned,
 							     base_aligned,
 							     size_aligned);
-	if (!obj)
+	if (!obj) {
+		mutex_unlock(&dev->struct_mutex);
 		return false;
+	}
 
 	obj->tiling_mode = plane_config->tiling;
 	if (obj->tiling_mode == I915_TILING_X)
@@ -2557,12 +2576,12 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 	mode_cmd.modifier[0] = fb->modifier[0];
 	mode_cmd.flags = DRM_MODE_FB_MODIFIERS;
 
-	mutex_lock(&dev->struct_mutex);
 	if (intel_framebuffer_init(dev, to_intel_framebuffer(fb),
 				   &mode_cmd, obj)) {
 		DRM_DEBUG_KMS("intel fb init failed\n");
 		goto out_unref_obj;
 	}
+
 	mutex_unlock(&dev->struct_mutex);
 
 	DRM_DEBUG_KMS("initial plane fb obj %p\n", obj);
@@ -2601,6 +2620,8 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	struct drm_plane_state *plane_state = primary->state;
 	struct drm_crtc_state *crtc_state = intel_crtc->base.state;
 	struct intel_plane *intel_plane = to_intel_plane(primary);
+	struct intel_plane_state *intel_state =
+		to_intel_plane_state(plane_state);
 	struct drm_framebuffer *fb;
 
 	if (!plane_config->fb)
@@ -2662,6 +2683,15 @@ valid_fb:
 	plane_state->crtc_w = fb->width;
 	plane_state->crtc_h = fb->height;
 
+	intel_state->src.x1 = plane_state->src_x;
+	intel_state->src.y1 = plane_state->src_y;
+	intel_state->src.x2 = plane_state->src_x + plane_state->src_w;
+	intel_state->src.y2 = plane_state->src_y + plane_state->src_h;
+	intel_state->dst.x1 = plane_state->crtc_x;
+	intel_state->dst.y1 = plane_state->crtc_y;
+	intel_state->dst.x2 = plane_state->crtc_x + plane_state->crtc_w;
+	intel_state->dst.y2 = plane_state->crtc_y + plane_state->crtc_h;
+
 	obj = intel_fb_obj(fb);
 	if (obj->tiling_mode != I915_TILING_NONE)
 		dev_priv->preserve_bios_swizzle = true;
@@ -2673,37 +2703,22 @@ valid_fb:
 	obj->frontbuffer_bits |= to_intel_plane(primary)->frontbuffer_bit;
 }
 
-static void i9xx_update_primary_plane(struct drm_crtc *crtc,
-				      struct drm_framebuffer *fb,
-				      int x, int y)
+static void i9xx_update_primary_plane(struct drm_plane *primary,
+				      const struct intel_crtc_state *crtc_state,
+				      const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = crtc->dev;
+	struct drm_device *dev = primary->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_plane *primary = crtc->primary;
-	bool visible = to_intel_plane_state(primary->state)->visible;
-	struct drm_i915_gem_object *obj;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	int plane = intel_crtc->plane;
-	unsigned long linear_offset;
+	u32 linear_offset;
 	u32 dspcntr;
 	i915_reg_t reg = DSPCNTR(plane);
-	int pixel_size;
-
-	if (!visible || !fb) {
-		I915_WRITE(reg, 0);
-		if (INTEL_INFO(dev)->gen >= 4)
-			I915_WRITE(DSPSURF(plane), 0);
-		else
-			I915_WRITE(DSPADDR(plane), 0);
-		POSTING_READ(reg);
-		return;
-	}
-
-	obj = intel_fb_obj(fb);
-	if (WARN_ON(obj == NULL))
-		return;
-
-	pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
+	int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+	int x = plane_state->src.x1 >> 16;
+	int y = plane_state->src.y1 >> 16;
 
 	dspcntr = DISPPLANE_GAMMA_ENABLE;
 
@@ -2717,13 +2732,13 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc,
 		 * which should always be the user's requested size.
 		 */
 		I915_WRITE(DSPSIZE(plane),
-			   ((intel_crtc->config->pipe_src_h - 1) << 16) |
-			   (intel_crtc->config->pipe_src_w - 1));
+			   ((crtc_state->pipe_src_h - 1) << 16) |
+			   (crtc_state->pipe_src_w - 1));
 		I915_WRITE(DSPPOS(plane), 0);
 	} else if (IS_CHERRYVIEW(dev) && plane == PLANE_B) {
 		I915_WRITE(PRIMSIZE(plane),
-			   ((intel_crtc->config->pipe_src_h - 1) << 16) |
-			   (intel_crtc->config->pipe_src_w - 1));
+			   ((crtc_state->pipe_src_h - 1) << 16) |
+			   (crtc_state->pipe_src_w - 1));
 		I915_WRITE(PRIMPOS(plane), 0);
 		I915_WRITE(PRIMCNSTALPHA(plane), 0);
 	}
@@ -2761,30 +2776,29 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc,
 	if (IS_G4X(dev))
 		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
 
-	linear_offset = y * fb->pitches[0] + x * pixel_size;
+	linear_offset = y * fb->pitches[0] + x * cpp;
 
 	if (INTEL_INFO(dev)->gen >= 4) {
 		intel_crtc->dspaddr_offset =
-			intel_gen4_compute_page_offset(dev_priv,
-						       &x, &y, obj->tiling_mode,
-						       pixel_size,
-						       fb->pitches[0]);
+			intel_compute_tile_offset(dev_priv, &x, &y,
+						  fb->modifier[0], cpp,
+						  fb->pitches[0]);
 		linear_offset -= intel_crtc->dspaddr_offset;
 	} else {
 		intel_crtc->dspaddr_offset = linear_offset;
 	}
 
-	if (crtc->primary->state->rotation == BIT(DRM_ROTATE_180)) {
+	if (plane_state->base.rotation == BIT(DRM_ROTATE_180)) {
 		dspcntr |= DISPPLANE_ROTATE_180;
 
-		x += (intel_crtc->config->pipe_src_w - 1);
-		y += (intel_crtc->config->pipe_src_h - 1);
+		x += (crtc_state->pipe_src_w - 1);
+		y += (crtc_state->pipe_src_h - 1);
 
 		/* Finding the last pixel of the last line of the display
 		data and adding to linear_offset*/
 		linear_offset +=
-			(intel_crtc->config->pipe_src_h - 1) * fb->pitches[0] +
-			(intel_crtc->config->pipe_src_w - 1) * pixel_size;
+			(crtc_state->pipe_src_h - 1) * fb->pitches[0] +
+			(crtc_state->pipe_src_w - 1) * cpp;
 	}
 
 	intel_crtc->adjusted_x = x;
@@ -2803,37 +2817,40 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc,
 	POSTING_READ(reg);
 }
 
-static void ironlake_update_primary_plane(struct drm_crtc *crtc,
-					  struct drm_framebuffer *fb,
-					  int x, int y)
+static void i9xx_disable_primary_plane(struct drm_plane *primary,
+				       struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_plane *primary = crtc->primary;
-	bool visible = to_intel_plane_state(primary->state)->visible;
-	struct drm_i915_gem_object *obj;
 	int plane = intel_crtc->plane;
-	unsigned long linear_offset;
-	u32 dspcntr;
-	i915_reg_t reg = DSPCNTR(plane);
-	int pixel_size;
 
-	if (!visible || !fb) {
-		I915_WRITE(reg, 0);
+	I915_WRITE(DSPCNTR(plane), 0);
+	if (INTEL_INFO(dev_priv)->gen >= 4)
 		I915_WRITE(DSPSURF(plane), 0);
-		POSTING_READ(reg);
-		return;
-	}
-
-	obj = intel_fb_obj(fb);
-	if (WARN_ON(obj == NULL))
-		return;
+	else
+		I915_WRITE(DSPADDR(plane), 0);
+	POSTING_READ(DSPCNTR(plane));
+}
 
-	pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
+static void ironlake_update_primary_plane(struct drm_plane *primary,
+					  const struct intel_crtc_state *crtc_state,
+					  const struct intel_plane_state *plane_state)
+{
+	struct drm_device *dev = primary->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	int plane = intel_crtc->plane;
+	u32 linear_offset;
+	u32 dspcntr;
+	i915_reg_t reg = DSPCNTR(plane);
+	int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+	int x = plane_state->src.x1 >> 16;
+	int y = plane_state->src.y1 >> 16;
 
 	dspcntr = DISPPLANE_GAMMA_ENABLE;
-
 	dspcntr |= DISPLAY_PLANE_ENABLE;
 
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
@@ -2868,25 +2885,24 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc,
 	if (!IS_HASWELL(dev) && !IS_BROADWELL(dev))
 		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
 
-	linear_offset = y * fb->pitches[0] + x * pixel_size;
+	linear_offset = y * fb->pitches[0] + x * cpp;
 	intel_crtc->dspaddr_offset =
-		intel_gen4_compute_page_offset(dev_priv,
-					       &x, &y, obj->tiling_mode,
-					       pixel_size,
-					       fb->pitches[0]);
+		intel_compute_tile_offset(dev_priv, &x, &y,
+					  fb->modifier[0], cpp,
+					  fb->pitches[0]);
 	linear_offset -= intel_crtc->dspaddr_offset;
-	if (crtc->primary->state->rotation == BIT(DRM_ROTATE_180)) {
+	if (plane_state->base.rotation == BIT(DRM_ROTATE_180)) {
 		dspcntr |= DISPPLANE_ROTATE_180;
 
 		if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
-			x += (intel_crtc->config->pipe_src_w - 1);
-			y += (intel_crtc->config->pipe_src_h - 1);
+			x += (crtc_state->pipe_src_w - 1);
+			y += (crtc_state->pipe_src_h - 1);
 
 			/* Finding the last pixel of the last line of the display
 			data and adding to linear_offset*/
 			linear_offset +=
-				(intel_crtc->config->pipe_src_h - 1) * fb->pitches[0] +
-				(intel_crtc->config->pipe_src_w - 1) * pixel_size;
+				(crtc_state->pipe_src_h - 1) * fb->pitches[0] +
+				(crtc_state->pipe_src_w - 1) * cpp;
 		}
 	}
 
@@ -2907,37 +2923,15 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc,
 	POSTING_READ(reg);
 }
 
-u32 intel_fb_stride_alignment(struct drm_device *dev, uint64_t fb_modifier,
-			      uint32_t pixel_format)
+u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
+			      uint64_t fb_modifier, uint32_t pixel_format)
 {
-	u32 bits_per_pixel = drm_format_plane_cpp(pixel_format, 0) * 8;
-
-	/*
-	 * The stride is either expressed as a multiple of 64 bytes
-	 * chunks for linear buffers or in number of tiles for tiled
-	 * buffers.
-	 */
-	switch (fb_modifier) {
-	case DRM_FORMAT_MOD_NONE:
-		return 64;
-	case I915_FORMAT_MOD_X_TILED:
-		if (INTEL_INFO(dev)->gen == 2)
-			return 128;
-		return 512;
-	case I915_FORMAT_MOD_Y_TILED:
-		/* No need to check for old gens and Y tiling since this is
-		 * about the display engine and those will be blocked before
-		 * we get here.
-		 */
-		return 128;
-	case I915_FORMAT_MOD_Yf_TILED:
-		if (bits_per_pixel == 8)
-			return 64;
-		else
-			return 128;
-	default:
-		MISSING_CASE(fb_modifier);
+	if (fb_modifier == DRM_FORMAT_MOD_NONE) {
 		return 64;
+	} else {
+		int cpp = drm_format_plane_cpp(pixel_format, 0);
+
+		return intel_tile_width(dev_priv, fb_modifier, cpp);
 	}
 }
 
@@ -2960,7 +2954,7 @@ u32 intel_plane_obj_offset(struct intel_plane *intel_plane,
 	offset = vma->node.start;
 
 	if (plane == 1) {
-		offset += vma->ggtt_view.params.rotation_info.uv_start_page *
+		offset += vma->ggtt_view.params.rotated.uv_start_page *
 			  PAGE_SIZE;
 	}
 
@@ -3077,36 +3071,30 @@ u32 skl_plane_ctl_rotation(unsigned int rotation)
 	return 0;
 }
 
-static void skylake_update_primary_plane(struct drm_crtc *crtc,
-					 struct drm_framebuffer *fb,
-					 int x, int y)
+static void skylake_update_primary_plane(struct drm_plane *plane,
+					 const struct intel_crtc_state *crtc_state,
+					 const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = crtc->dev;
+	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_plane *plane = crtc->primary;
-	bool visible = to_intel_plane_state(plane->state)->visible;
-	struct drm_i915_gem_object *obj;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	int pipe = intel_crtc->pipe;
 	u32 plane_ctl, stride_div, stride;
 	u32 tile_height, plane_offset, plane_size;
-	unsigned int rotation;
+	unsigned int rotation = plane_state->base.rotation;
 	int x_offset, y_offset;
 	u32 surf_addr;
-	struct intel_crtc_state *crtc_state = intel_crtc->config;
-	struct intel_plane_state *plane_state;
-	int src_x = 0, src_y = 0, src_w = 0, src_h = 0;
-	int dst_x = 0, dst_y = 0, dst_w = 0, dst_h = 0;
-	int scaler_id = -1;
-
-	plane_state = to_intel_plane_state(plane->state);
-
-	if (!visible || !fb) {
-		I915_WRITE(PLANE_CTL(pipe, 0), 0);
-		I915_WRITE(PLANE_SURF(pipe, 0), 0);
-		POSTING_READ(PLANE_CTL(pipe, 0));
-		return;
-	}
+	int scaler_id = plane_state->scaler_id;
+	int src_x = plane_state->src.x1 >> 16;
+	int src_y = plane_state->src.y1 >> 16;
+	int src_w = drm_rect_width(&plane_state->src) >> 16;
+	int src_h = drm_rect_height(&plane_state->src) >> 16;
+	int dst_x = plane_state->dst.x1;
+	int dst_y = plane_state->dst.y1;
+	int dst_w = drm_rect_width(&plane_state->dst);
+	int dst_h = drm_rect_height(&plane_state->dst);
 
 	plane_ctl = PLANE_CTL_ENABLE |
 		    PLANE_CTL_PIPE_GAMMA_ENABLE |
@@ -3115,41 +3103,27 @@ static void skylake_update_primary_plane(struct drm_crtc *crtc,
 	plane_ctl |= skl_plane_ctl_format(fb->pixel_format);
 	plane_ctl |= skl_plane_ctl_tiling(fb->modifier[0]);
 	plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE;
-
-	rotation = plane->state->rotation;
 	plane_ctl |= skl_plane_ctl_rotation(rotation);
 
-	obj = intel_fb_obj(fb);
-	stride_div = intel_fb_stride_alignment(dev, fb->modifier[0],
+	stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0],
 					       fb->pixel_format);
 	surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj, 0);
 
 	WARN_ON(drm_rect_width(&plane_state->src) == 0);
 
-	scaler_id = plane_state->scaler_id;
-	src_x = plane_state->src.x1 >> 16;
-	src_y = plane_state->src.y1 >> 16;
-	src_w = drm_rect_width(&plane_state->src) >> 16;
-	src_h = drm_rect_height(&plane_state->src) >> 16;
-	dst_x = plane_state->dst.x1;
-	dst_y = plane_state->dst.y1;
-	dst_w = drm_rect_width(&plane_state->dst);
-	dst_h = drm_rect_height(&plane_state->dst);
-
-	WARN_ON(x != src_x || y != src_y);
-
 	if (intel_rotation_90_or_270(rotation)) {
+		int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+
 		/* stride = Surface height in tiles */
-		tile_height = intel_tile_height(dev, fb->pixel_format,
-						fb->modifier[0], 0);
+		tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp);
 		stride = DIV_ROUND_UP(fb->height, tile_height);
-		x_offset = stride * tile_height - y - src_h;
-		y_offset = x;
+		x_offset = stride * tile_height - src_y - src_h;
+		y_offset = src_x;
 		plane_size = (src_w - 1) << 16 | (src_h - 1);
 	} else {
 		stride = fb->pitches[0] / stride_div;
-		x_offset = x;
-		y_offset = y;
+		x_offset = src_x;
+		y_offset = src_y;
 		plane_size = (src_h - 1) << 16 | (src_w - 1);
 	}
 	plane_offset = y_offset << 16 | x_offset;
@@ -3182,20 +3156,27 @@ static void skylake_update_primary_plane(struct drm_crtc *crtc,
 	POSTING_READ(PLANE_SURF(pipe, 0));
 }
 
-/* Assume fb object is pinned & idle & fenced and just update base pointers */
-static int
-intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-			   int x, int y, enum mode_set_atomic state)
+static void skylake_disable_primary_plane(struct drm_plane *primary,
+					  struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	int pipe = to_intel_crtc(crtc)->pipe;
 
-	if (dev_priv->fbc.deactivate)
-		dev_priv->fbc.deactivate(dev_priv);
+	I915_WRITE(PLANE_CTL(pipe, 0), 0);
+	I915_WRITE(PLANE_SURF(pipe, 0), 0);
+	POSTING_READ(PLANE_SURF(pipe, 0));
+}
 
-	dev_priv->display.update_primary_plane(crtc, fb, x, y);
+/* Assume fb object is pinned & idle & fenced and just update base pointers */
+static int
+intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+			   int x, int y, enum mode_set_atomic state)
+{
+	/* Support for kgdboc is disabled, this needs a major rework. */
+	DRM_ERROR("legacy panic handler not supported any more.\n");
 
-	return 0;
+	return -ENODEV;
 }
 
 static void intel_complete_page_flips(struct drm_device *dev)
@@ -3222,8 +3203,10 @@ static void intel_update_primary_planes(struct drm_device *dev)
 		drm_modeset_lock_crtc(crtc, &plane->base);
 		plane_state = to_intel_plane_state(plane->base.state);
 
-		if (crtc->state->active && plane_state->base.fb)
-			plane->commit_plane(&plane->base, plane_state);
+		if (plane_state->visible)
+			plane->update_plane(&plane->base,
+					    to_intel_crtc_state(crtc->state),
+					    plane_state);
 
 		drm_modeset_unlock_crtc(crtc);
 	}
@@ -4455,7 +4438,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state)
 		      intel_crtc->base.base.id, intel_crtc->pipe, SKL_CRTC_INDEX);
 
 	return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
-		&state->scaler_state.scaler_id, DRM_ROTATE_0,
+		&state->scaler_state.scaler_id, BIT(DRM_ROTATE_0),
 		state->pipe_src_w, state->pipe_src_h,
 		adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
 }
@@ -4809,9 +4792,6 @@ static void intel_post_plane_update(struct intel_crtc *crtc)
 		to_intel_crtc_state(crtc->base.state);
 	struct drm_device *dev = crtc->base.dev;
 
-	if (atomic->wait_vblank)
-		intel_wait_for_vblank(dev, crtc->pipe);
-
 	intel_frontbuffer_flip(dev, atomic->fb_bits);
 
 	crtc->wm.cxsr_allowed = true;
@@ -4820,7 +4800,7 @@ static void intel_post_plane_update(struct intel_crtc *crtc)
 		intel_update_watermarks(&crtc->base);
 
 	if (atomic->update_fbc)
-		intel_fbc_update(crtc);
+		intel_fbc_post_update(crtc);
 
 	if (atomic->post_enable_primary)
 		intel_post_enable_primary(&crtc->base);
@@ -4828,26 +4808,39 @@ static void intel_post_plane_update(struct intel_crtc *crtc)
 	memset(atomic, 0, sizeof(*atomic));
 }
 
-static void intel_pre_plane_update(struct intel_crtc *crtc)
+static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state)
 {
+	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc_atomic_commit *atomic = &crtc->atomic;
 	struct intel_crtc_state *pipe_config =
 		to_intel_crtc_state(crtc->base.state);
+	struct drm_atomic_state *old_state = old_crtc_state->base.state;
+	struct drm_plane *primary = crtc->base.primary;
+	struct drm_plane_state *old_pri_state =
+		drm_atomic_get_existing_plane_state(old_state, primary);
+	bool modeset = needs_modeset(&pipe_config->base);
 
-	if (atomic->disable_fbc)
-		intel_fbc_deactivate(crtc);
+	if (atomic->update_fbc)
+		intel_fbc_pre_update(crtc);
 
-	if (crtc->atomic.disable_ips)
-		hsw_disable_ips(crtc);
+	if (old_pri_state) {
+		struct intel_plane_state *primary_state =
+			to_intel_plane_state(primary->state);
+		struct intel_plane_state *old_primary_state =
+			to_intel_plane_state(old_pri_state);
 
-	if (atomic->pre_disable_primary)
-		intel_pre_disable_primary(&crtc->base);
+		if (old_primary_state->visible &&
+		    (modeset || !primary_state->visible))
+			intel_pre_disable_primary(&crtc->base);
+	}
 
 	if (pipe_config->disable_cxsr) {
 		crtc->wm.cxsr_allowed = false;
-		intel_set_memory_cxsr(dev_priv, false);
+
+		if (old_crtc_state->base.active)
+			intel_set_memory_cxsr(dev_priv, false);
 	}
 
 	if (!needs_modeset(&pipe_config->base) && pipe_config->wm_changed)
@@ -4948,8 +4941,6 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
 	if (intel_crtc->config->has_pch_encoder)
 		intel_wait_for_vblank(dev, pipe);
 	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
-
-	intel_fbc_enable(intel_crtc);
 }
 
 /* IPS only exists on ULT machines and is tied to pipe A. */
@@ -5062,8 +5053,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc)
 		intel_wait_for_vblank(dev, hsw_workaround_pipe);
 		intel_wait_for_vblank(dev, hsw_workaround_pipe);
 	}
-
-	intel_fbc_enable(intel_crtc);
 }
 
 static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force)
@@ -5144,8 +5133,6 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc)
 	}
 
 	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
-
-	intel_fbc_disable_crtc(intel_crtc);
 }
 
 static void haswell_crtc_disable(struct drm_crtc *crtc)
@@ -5196,8 +5183,6 @@ static void haswell_crtc_disable(struct drm_crtc *crtc)
 		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
 						      true);
 	}
-
-	intel_fbc_disable_crtc(intel_crtc);
 }
 
 static void i9xx_pfit_enable(struct intel_crtc *crtc)
@@ -5320,31 +5305,37 @@ intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder)
 	}
 }
 
-static unsigned long get_crtc_power_domains(struct drm_crtc *crtc)
+static unsigned long get_crtc_power_domains(struct drm_crtc *crtc,
+					    struct intel_crtc_state *crtc_state)
 {
 	struct drm_device *dev = crtc->dev;
-	struct intel_encoder *intel_encoder;
+	struct drm_encoder *encoder;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	enum pipe pipe = intel_crtc->pipe;
 	unsigned long mask;
-	enum transcoder transcoder = intel_crtc->config->cpu_transcoder;
+	enum transcoder transcoder = crtc_state->cpu_transcoder;
 
-	if (!crtc->state->active)
+	if (!crtc_state->base.active)
 		return 0;
 
 	mask = BIT(POWER_DOMAIN_PIPE(pipe));
 	mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder));
-	if (intel_crtc->config->pch_pfit.enabled ||
-	    intel_crtc->config->pch_pfit.force_thru)
+	if (crtc_state->pch_pfit.enabled ||
+	    crtc_state->pch_pfit.force_thru)
 		mask |= BIT(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe));
 
-	for_each_encoder_on_crtc(dev, crtc, intel_encoder)
+	drm_for_each_encoder_mask(encoder, dev, crtc_state->base.encoder_mask) {
+		struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
+
 		mask |= BIT(intel_display_port_power_domain(intel_encoder));
+	}
 
 	return mask;
 }
 
-static unsigned long modeset_get_crtc_power_domains(struct drm_crtc *crtc)
+static unsigned long
+modeset_get_crtc_power_domains(struct drm_crtc *crtc,
+			       struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -5352,7 +5343,8 @@ static unsigned long modeset_get_crtc_power_domains(struct drm_crtc *crtc)
 	unsigned long domains, new_domains, old_domains;
 
 	old_domains = intel_crtc->enabled_power_domains;
-	intel_crtc->enabled_power_domains = new_domains = get_crtc_power_domains(crtc);
+	intel_crtc->enabled_power_domains = new_domains =
+		get_crtc_power_domains(crtc, crtc_state);
 
 	domains = new_domains & ~old_domains;
 
@@ -5371,34 +5363,6 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv,
 		intel_display_power_put(dev_priv, domain);
 }
 
-static void modeset_update_crtc_power_domains(struct drm_atomic_state *state)
-{
-	struct drm_device *dev = state->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long put_domains[I915_MAX_PIPES] = {};
-	struct drm_crtc_state *crtc_state;
-	struct drm_crtc *crtc;
-	int i;
-
-	for_each_crtc_in_state(state, crtc, crtc_state, i) {
-		if (needs_modeset(crtc->state))
-			put_domains[to_intel_crtc(crtc)->pipe] =
-				modeset_get_crtc_power_domains(crtc);
-	}
-
-	if (dev_priv->display.modeset_commit_cdclk) {
-		unsigned int cdclk = to_intel_atomic_state(state)->cdclk;
-
-		if (cdclk != dev_priv->cdclk_freq &&
-		    !WARN_ON(!state->allow_modeset))
-			dev_priv->display.modeset_commit_cdclk(state);
-	}
-
-	for (i = 0; i < I915_MAX_PIPES; i++)
-		if (put_domains[i])
-			modeset_put_power_domains(dev_priv, put_domains[i]);
-}
-
 static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv)
 {
 	int max_cdclk_freq = dev_priv->max_cdclk_freq;
@@ -6061,27 +6025,32 @@ static int broxton_calc_cdclk(struct drm_i915_private *dev_priv,
 		return 144000;
 }
 
-/* Compute the max pixel clock for new configuration. Uses atomic state if
- * that's non-NULL, look at current state otherwise. */
+/* Compute the max pixel clock for new configuration. */
 static int intel_mode_max_pixclk(struct drm_device *dev,
 				 struct drm_atomic_state *state)
 {
-	struct intel_crtc *intel_crtc;
-	struct intel_crtc_state *crtc_state;
-	int max_pixclk = 0;
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	unsigned max_pixclk = 0, i;
+	enum pipe pipe;
 
-	for_each_intel_crtc(dev, intel_crtc) {
-		crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
-		if (IS_ERR(crtc_state))
-			return PTR_ERR(crtc_state);
+	memcpy(intel_state->min_pixclk, dev_priv->min_pixclk,
+	       sizeof(intel_state->min_pixclk));
 
-		if (!crtc_state->base.enable)
-			continue;
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		int pixclk = 0;
+
+		if (crtc_state->enable)
+			pixclk = crtc_state->adjusted_mode.crtc_clock;
 
-		max_pixclk = max(max_pixclk,
-				 crtc_state->base.adjusted_mode.crtc_clock);
+		intel_state->min_pixclk[i] = pixclk;
 	}
 
+	for_each_pipe(dev_priv, pipe)
+		max_pixclk = max(intel_state->min_pixclk[pipe], max_pixclk);
+
 	return max_pixclk;
 }
 
@@ -6090,13 +6059,18 @@ static int valleyview_modeset_calc_cdclk(struct drm_atomic_state *state)
 	struct drm_device *dev = state->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int max_pixclk = intel_mode_max_pixclk(dev, state);
+	struct intel_atomic_state *intel_state =
+		to_intel_atomic_state(state);
 
 	if (max_pixclk < 0)
 		return max_pixclk;
 
-	to_intel_atomic_state(state)->cdclk =
+	intel_state->cdclk = intel_state->dev_cdclk =
 		valleyview_calc_cdclk(dev_priv, max_pixclk);
 
+	if (!intel_state->active_crtcs)
+		intel_state->dev_cdclk = valleyview_calc_cdclk(dev_priv, 0);
+
 	return 0;
 }
 
@@ -6105,13 +6079,18 @@ static int broxton_modeset_calc_cdclk(struct drm_atomic_state *state)
 	struct drm_device *dev = state->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int max_pixclk = intel_mode_max_pixclk(dev, state);
+	struct intel_atomic_state *intel_state =
+		to_intel_atomic_state(state);
 
 	if (max_pixclk < 0)
 		return max_pixclk;
 
-	to_intel_atomic_state(state)->cdclk =
+	intel_state->cdclk = intel_state->dev_cdclk =
 		broxton_calc_cdclk(dev_priv, max_pixclk);
 
+	if (!intel_state->active_crtcs)
+		intel_state->dev_cdclk = broxton_calc_cdclk(dev_priv, 0);
+
 	return 0;
 }
 
@@ -6154,8 +6133,10 @@ static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv)
 static void valleyview_modeset_commit_cdclk(struct drm_atomic_state *old_state)
 {
 	struct drm_device *dev = old_state->dev;
-	unsigned int req_cdclk = to_intel_atomic_state(old_state)->cdclk;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_atomic_state *old_intel_state =
+		to_intel_atomic_state(old_state);
+	unsigned req_cdclk = old_intel_state->dev_cdclk;
 
 	/*
 	 * FIXME: We can end up here with all power domains off, yet
@@ -6290,8 +6271,6 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc)
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->enable(encoder);
-
-	intel_fbc_enable(intel_crtc);
 }
 
 static void i9xx_pfit_disable(struct intel_crtc *crtc)
@@ -6354,8 +6333,6 @@ static void i9xx_crtc_disable(struct drm_crtc *crtc)
 
 	if (!IS_GEN2(dev))
 		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
-
-	intel_fbc_disable_crtc(intel_crtc);
 }
 
 static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
@@ -6379,6 +6356,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
 
 	dev_priv->display.crtc_disable(crtc);
 	intel_crtc->active = false;
+	intel_fbc_disable(intel_crtc);
 	intel_update_watermarks(crtc);
 	intel_disable_shared_dpll(intel_crtc);
 
@@ -6386,6 +6364,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
 	for_each_power_domain(domain, domains)
 		intel_display_power_put(dev_priv, domain);
 	intel_crtc->enabled_power_domains = 0;
+
+	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
+	dev_priv->min_pixclk[intel_crtc->pipe] = 0;
 }
 
 /*
@@ -6394,55 +6375,16 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
  */
 int intel_display_suspend(struct drm_device *dev)
 {
-	struct drm_mode_config *config = &dev->mode_config;
-	struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_atomic_state *state;
-	struct drm_crtc *crtc;
-	unsigned crtc_mask = 0;
-	int ret = 0;
-
-	if (WARN_ON(!ctx))
-		return 0;
-
-	lockdep_assert_held(&ctx->ww_ctx);
-	state = drm_atomic_state_alloc(dev);
-	if (WARN_ON(!state))
-		return -ENOMEM;
-
-	state->acquire_ctx = ctx;
-	state->allow_modeset = true;
-
-	for_each_crtc(dev, crtc) {
-		struct drm_crtc_state *crtc_state =
-			drm_atomic_get_crtc_state(state, crtc);
-
-		ret = PTR_ERR_OR_ZERO(crtc_state);
-		if (ret)
-			goto free;
-
-		if (!crtc_state->active)
-			continue;
-
-		crtc_state->active = false;
-		crtc_mask |= 1 << drm_crtc_index(crtc);
-	}
-
-	if (crtc_mask) {
-		ret = drm_atomic_commit(state);
-
-		if (!ret) {
-			for_each_crtc(dev, crtc)
-				if (crtc_mask & (1 << drm_crtc_index(crtc)))
-					crtc->state->active = true;
-
-			return ret;
-		}
-	}
+	int ret;
 
-free:
+	state = drm_atomic_helper_suspend(dev);
+	ret = PTR_ERR_OR_ZERO(state);
 	if (ret)
 		DRM_ERROR("Suspending crtc's failed with %i\n", ret);
-	drm_atomic_state_free(state);
+	else
+		dev_priv->modeset_restore_state = state;
 	return ret;
 }
 
@@ -7596,26 +7538,34 @@ static void chv_prepare_pll(struct intel_crtc *crtc,
  * in cases where we need the PLL enabled even when @pipe is not going to
  * be enabled.
  */
-void vlv_force_pll_on(struct drm_device *dev, enum pipe pipe,
-		      const struct dpll *dpll)
+int vlv_force_pll_on(struct drm_device *dev, enum pipe pipe,
+		     const struct dpll *dpll)
 {
 	struct intel_crtc *crtc =
 		to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe));
-	struct intel_crtc_state pipe_config = {
-		.base.crtc = &crtc->base,
-		.pixel_multiplier = 1,
-		.dpll = *dpll,
-	};
+	struct intel_crtc_state *pipe_config;
+
+	pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL);
+	if (!pipe_config)
+		return -ENOMEM;
+
+	pipe_config->base.crtc = &crtc->base;
+	pipe_config->pixel_multiplier = 1;
+	pipe_config->dpll = *dpll;
 
 	if (IS_CHERRYVIEW(dev)) {
-		chv_compute_dpll(crtc, &pipe_config);
-		chv_prepare_pll(crtc, &pipe_config);
-		chv_enable_pll(crtc, &pipe_config);
+		chv_compute_dpll(crtc, pipe_config);
+		chv_prepare_pll(crtc, pipe_config);
+		chv_enable_pll(crtc, pipe_config);
 	} else {
-		vlv_compute_dpll(crtc, &pipe_config);
-		vlv_prepare_pll(crtc, &pipe_config);
-		vlv_enable_pll(crtc, &pipe_config);
+		vlv_compute_dpll(crtc, pipe_config);
+		vlv_prepare_pll(crtc, pipe_config);
+		vlv_enable_pll(crtc, pipe_config);
 	}
+
+	kfree(pipe_config);
+
+	return 0;
 }
 
 /**
@@ -9258,7 +9208,7 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
 	fb->width = ((val >> 0) & 0x1fff) + 1;
 
 	val = I915_READ(PLANE_STRIDE(pipe, 0));
-	stride_mult = intel_fb_stride_alignment(dev, fb->modifier[0],
+	stride_mult = intel_fb_stride_alignment(dev_priv, fb->modifier[0],
 						fb->pixel_format);
 	fb->pitches[0] = (val & 0x3ff) * stride_mult;
 
@@ -9682,14 +9632,14 @@ void hsw_disable_pc8(struct drm_i915_private *dev_priv)
 		val |= PCH_LP_PARTITION_LEVEL_DISABLE;
 		I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
 	}
-
-	intel_prepare_ddi(dev);
 }
 
 static void broxton_modeset_commit_cdclk(struct drm_atomic_state *old_state)
 {
 	struct drm_device *dev = old_state->dev;
-	unsigned int req_cdclk = to_intel_atomic_state(old_state)->cdclk;
+	struct intel_atomic_state *old_intel_state =
+		to_intel_atomic_state(old_state);
+	unsigned int req_cdclk = old_intel_state->dev_cdclk;
 
 	broxton_set_cdclk(dev, req_cdclk);
 }
@@ -9697,29 +9647,38 @@ static void broxton_modeset_commit_cdclk(struct drm_atomic_state *old_state)
 /* compute the max rate for new configuration */
 static int ilk_max_pixel_rate(struct drm_atomic_state *state)
 {
-	struct intel_crtc *intel_crtc;
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_i915_private *dev_priv = state->dev->dev_private;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *cstate;
 	struct intel_crtc_state *crtc_state;
-	int max_pixel_rate = 0;
+	unsigned max_pixel_rate = 0, i;
+	enum pipe pipe;
 
-	for_each_intel_crtc(state->dev, intel_crtc) {
-		int pixel_rate;
+	memcpy(intel_state->min_pixclk, dev_priv->min_pixclk,
+	       sizeof(intel_state->min_pixclk));
 
-		crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
-		if (IS_ERR(crtc_state))
-			return PTR_ERR(crtc_state);
+	for_each_crtc_in_state(state, crtc, cstate, i) {
+		int pixel_rate;
 
-		if (!crtc_state->base.enable)
+		crtc_state = to_intel_crtc_state(cstate);
+		if (!crtc_state->base.enable) {
+			intel_state->min_pixclk[i] = 0;
 			continue;
+		}
 
 		pixel_rate = ilk_pipe_pixel_rate(crtc_state);
 
 		/* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
-		if (IS_BROADWELL(state->dev) && crtc_state->ips_enabled)
+		if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled)
 			pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95);
 
-		max_pixel_rate = max(max_pixel_rate, pixel_rate);
+		intel_state->min_pixclk[i] = pixel_rate;
 	}
 
+	for_each_pipe(dev_priv, pipe)
+		max_pixel_rate = max(intel_state->min_pixclk[pipe], max_pixel_rate);
+
 	return max_pixel_rate;
 }
 
@@ -9803,6 +9762,7 @@ static void broadwell_set_cdclk(struct drm_device *dev, int cdclk)
 static int broadwell_modeset_calc_cdclk(struct drm_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->dev);
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	int max_pixclk = ilk_max_pixel_rate(state);
 	int cdclk;
 
@@ -9825,7 +9785,9 @@ static int broadwell_modeset_calc_cdclk(struct drm_atomic_state *state)
 		return -EINVAL;
 	}
 
-	to_intel_atomic_state(state)->cdclk = cdclk;
+	intel_state->cdclk = intel_state->dev_cdclk = cdclk;
+	if (!intel_state->active_crtcs)
+		intel_state->dev_cdclk = 337500;
 
 	return 0;
 }
@@ -9833,7 +9795,9 @@ static int broadwell_modeset_calc_cdclk(struct drm_atomic_state *state)
 static void broadwell_modeset_commit_cdclk(struct drm_atomic_state *old_state)
 {
 	struct drm_device *dev = old_state->dev;
-	unsigned int req_cdclk = to_intel_atomic_state(old_state)->cdclk;
+	struct intel_atomic_state *old_intel_state =
+		to_intel_atomic_state(old_state);
+	unsigned req_cdclk = old_intel_state->dev_cdclk;
 
 	broadwell_set_cdclk(dev, req_cdclk);
 }
@@ -9841,8 +9805,13 @@ static void broadwell_modeset_commit_cdclk(struct drm_atomic_state *old_state)
 static int haswell_crtc_compute_clock(struct intel_crtc *crtc,
 				      struct intel_crtc_state *crtc_state)
 {
-	if (!intel_ddi_pll_select(crtc, crtc_state))
-		return -EINVAL;
+	struct intel_encoder *intel_encoder =
+		intel_ddi_get_crtc_new_encoder(crtc_state);
+
+	if (intel_encoder->type != INTEL_OUTPUT_DSI) {
+		if (!intel_ddi_pll_select(crtc, crtc_state))
+			return -EINVAL;
+	}
 
 	crtc->lowfreq_avail = false;
 
@@ -10058,16 +10027,17 @@ out:
 	return ret;
 }
 
-static void i845_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
+static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
+			       const struct intel_plane_state *plane_state)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	uint32_t cntl = 0, size = 0;
 
-	if (on) {
-		unsigned int width = intel_crtc->base.cursor->state->crtc_w;
-		unsigned int height = intel_crtc->base.cursor->state->crtc_h;
+	if (plane_state && plane_state->visible) {
+		unsigned int width = plane_state->base.crtc_w;
+		unsigned int height = plane_state->base.crtc_h;
 		unsigned int stride = roundup_pow_of_two(width) * 4;
 
 		switch (stride) {
@@ -10120,7 +10090,8 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
 	}
 }
 
-static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
+static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base,
+			       const struct intel_plane_state *plane_state)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -10128,9 +10099,9 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
 	int pipe = intel_crtc->pipe;
 	uint32_t cntl = 0;
 
-	if (on) {
+	if (plane_state && plane_state->visible) {
 		cntl = MCURSOR_GAMMA_ENABLE;
-		switch (intel_crtc->base.cursor->state->crtc_w) {
+		switch (plane_state->base.crtc_w) {
 			case 64:
 				cntl |= CURSOR_MODE_64_ARGB_AX;
 				break;
@@ -10141,17 +10112,17 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
 				cntl |= CURSOR_MODE_256_ARGB_AX;
 				break;
 			default:
-				MISSING_CASE(intel_crtc->base.cursor->state->crtc_w);
+				MISSING_CASE(plane_state->base.crtc_w);
 				return;
 		}
 		cntl |= pipe << 28; /* Connect to correct pipe */
 
 		if (HAS_DDI(dev))
 			cntl |= CURSOR_PIPE_CSC_ENABLE;
-	}
 
-	if (crtc->cursor->state->rotation == BIT(DRM_ROTATE_180))
-		cntl |= CURSOR_ROTATE_180;
+		if (plane_state->base.rotation == BIT(DRM_ROTATE_180))
+			cntl |= CURSOR_ROTATE_180;
+	}
 
 	if (intel_crtc->cursor_cntl != cntl) {
 		I915_WRITE(CURCNTR(pipe), cntl);
@@ -10168,56 +10139,45 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
 
 /* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */
 static void intel_crtc_update_cursor(struct drm_crtc *crtc,
-				     bool on)
+				     const struct intel_plane_state *plane_state)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	struct drm_plane_state *cursor_state = crtc->cursor->state;
-	int x = cursor_state->crtc_x;
-	int y = cursor_state->crtc_y;
-	u32 base = 0, pos = 0;
-
-	base = intel_crtc->cursor_addr;
+	u32 base = intel_crtc->cursor_addr;
+	u32 pos = 0;
 
-	if (x >= intel_crtc->config->pipe_src_w)
-		on = false;
+	if (plane_state) {
+		int x = plane_state->base.crtc_x;
+		int y = plane_state->base.crtc_y;
 
-	if (y >= intel_crtc->config->pipe_src_h)
-		on = false;
-
-	if (x < 0) {
-		if (x + cursor_state->crtc_w <= 0)
-			on = false;
-
-		pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT;
-		x = -x;
-	}
-	pos |= x << CURSOR_X_SHIFT;
+		if (x < 0) {
+			pos |= CURSOR_POS_SIGN << CURSOR_X_SHIFT;
+			x = -x;
+		}
+		pos |= x << CURSOR_X_SHIFT;
 
-	if (y < 0) {
-		if (y + cursor_state->crtc_h <= 0)
-			on = false;
+		if (y < 0) {
+			pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT;
+			y = -y;
+		}
+		pos |= y << CURSOR_Y_SHIFT;
 
-		pos |= CURSOR_POS_SIGN << CURSOR_Y_SHIFT;
-		y = -y;
+		/* ILK+ do this automagically */
+		if (HAS_GMCH_DISPLAY(dev) &&
+		    plane_state->base.rotation == BIT(DRM_ROTATE_180)) {
+			base += (plane_state->base.crtc_h *
+				 plane_state->base.crtc_w - 1) * 4;
+		}
 	}
-	pos |= y << CURSOR_Y_SHIFT;
 
 	I915_WRITE(CURPOS(pipe), pos);
 
-	/* ILK+ do this automagically */
-	if (HAS_GMCH_DISPLAY(dev) &&
-	    crtc->cursor->state->rotation == BIT(DRM_ROTATE_180)) {
-		base += (cursor_state->crtc_h *
-			 cursor_state->crtc_w - 1) * 4;
-	}
-
 	if (IS_845G(dev) || IS_I865G(dev))
-		i845_update_cursor(crtc, base, on);
+		i845_update_cursor(crtc, base, plane_state);
 	else
-		i9xx_update_cursor(crtc, base, on);
+		i9xx_update_cursor(crtc, base, plane_state);
 }
 
 static bool cursor_size_ok(struct drm_device *dev,
@@ -10385,6 +10345,7 @@ mode_fits_in_fbdev(struct drm_device *dev,
 	if (obj->base.size < mode->vdisplay * fb->pitches[0])
 		return NULL;
 
+	drm_framebuffer_reference(fb);
 	return fb;
 #else
 	return NULL;
@@ -10440,7 +10401,7 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 	struct drm_device *dev = encoder->dev;
 	struct drm_framebuffer *fb;
 	struct drm_mode_config *config = &dev->mode_config;
-	struct drm_atomic_state *state = NULL;
+	struct drm_atomic_state *state = NULL, *restore_state = NULL;
 	struct drm_connector_state *connector_state;
 	struct intel_crtc_state *crtc_state;
 	int ret, i = -1;
@@ -10449,6 +10410,8 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 		      connector->base.id, connector->name,
 		      encoder->base.id, encoder->name);
 
+	old->restore_state = NULL;
+
 retry:
 	ret = drm_modeset_lock(&config->connection_mutex, ctx);
 	if (ret)
@@ -10465,24 +10428,15 @@ retry:
 	 */
 
 	/* See if we already have a CRTC for this connector */
-	if (encoder->crtc) {
-		crtc = encoder->crtc;
+	if (connector->state->crtc) {
+		crtc = connector->state->crtc;
 
 		ret = drm_modeset_lock(&crtc->mutex, ctx);
 		if (ret)
 			goto fail;
-		ret = drm_modeset_lock(&crtc->primary->mutex, ctx);
-		if (ret)
-			goto fail;
-
-		old->dpms_mode = connector->dpms;
-		old->load_detect_temp = false;
 
 		/* Make sure the crtc and connector are running */
-		if (connector->dpms != DRM_MODE_DPMS_ON)
-			connector->funcs->dpms(connector, DRM_MODE_DPMS_ON);
-
-		return true;
+		goto found;
 	}
 
 	/* Find an unused one (if possible) */
@@ -10490,8 +10444,15 @@ retry:
 		i++;
 		if (!(encoder->possible_crtcs & (1 << i)))
 			continue;
-		if (possible_crtc->state->enable)
+
+		ret = drm_modeset_lock(&possible_crtc->mutex, ctx);
+		if (ret)
+			goto fail;
+
+		if (possible_crtc->state->enable) {
+			drm_modeset_unlock(&possible_crtc->mutex);
 			continue;
+		}
 
 		crtc = possible_crtc;
 		break;
@@ -10505,23 +10466,22 @@ retry:
 		goto fail;
 	}
 
-	ret = drm_modeset_lock(&crtc->mutex, ctx);
-	if (ret)
-		goto fail;
+found:
+	intel_crtc = to_intel_crtc(crtc);
+
 	ret = drm_modeset_lock(&crtc->primary->mutex, ctx);
 	if (ret)
 		goto fail;
 
-	intel_crtc = to_intel_crtc(crtc);
-	old->dpms_mode = connector->dpms;
-	old->load_detect_temp = true;
-	old->release_fb = NULL;
-
 	state = drm_atomic_state_alloc(dev);
-	if (!state)
-		return false;
+	restore_state = drm_atomic_state_alloc(dev);
+	if (!state || !restore_state) {
+		ret = -ENOMEM;
+		goto fail;
+	}
 
 	state->acquire_ctx = ctx;
+	restore_state->acquire_ctx = ctx;
 
 	connector_state = drm_atomic_get_connector_state(state, connector);
 	if (IS_ERR(connector_state)) {
@@ -10529,8 +10489,9 @@ retry:
 		goto fail;
 	}
 
-	connector_state->crtc = crtc;
-	connector_state->best_encoder = &intel_encoder->base;
+	ret = drm_atomic_set_crtc_for_connector(connector_state, crtc);
+	if (ret)
+		goto fail;
 
 	crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
 	if (IS_ERR(crtc_state)) {
@@ -10554,7 +10515,6 @@ retry:
 	if (fb == NULL) {
 		DRM_DEBUG_KMS("creating tmp fb for load-detection\n");
 		fb = intel_framebuffer_create_for_mode(dev, mode, 24, 32);
-		old->release_fb = fb;
 	} else
 		DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n");
 	if (IS_ERR(fb)) {
@@ -10566,15 +10526,29 @@ retry:
 	if (ret)
 		goto fail;
 
-	drm_mode_copy(&crtc_state->base.mode, mode);
+	drm_framebuffer_unreference(fb);
+
+	ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode);
+	if (ret)
+		goto fail;
+
+	ret = PTR_ERR_OR_ZERO(drm_atomic_get_connector_state(restore_state, connector));
+	if (!ret)
+		ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, crtc));
+	if (!ret)
+		ret = PTR_ERR_OR_ZERO(drm_atomic_get_plane_state(restore_state, crtc->primary));
+	if (ret) {
+		DRM_DEBUG_KMS("Failed to create a copy of old state to restore: %i\n", ret);
+		goto fail;
+	}
 
-	if (drm_atomic_commit(state)) {
+	ret = drm_atomic_commit(state);
+	if (ret) {
 		DRM_DEBUG_KMS("failed to set mode on load-detect pipe\n");
-		if (old->release_fb)
-			old->release_fb->funcs->destroy(old->release_fb);
 		goto fail;
 	}
-	crtc->primary->crtc = crtc;
+
+	old->restore_state = restore_state;
 
 	/* let the connector get through one full cycle before testing */
 	intel_wait_for_vblank(dev, intel_crtc->pipe);
@@ -10582,7 +10556,8 @@ retry:
 
 fail:
 	drm_atomic_state_free(state);
-	state = NULL;
+	drm_atomic_state_free(restore_state);
+	restore_state = state = NULL;
 
 	if (ret == -EDEADLK) {
 		drm_modeset_backoff(ctx);
@@ -10596,66 +10571,24 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 				    struct intel_load_detect_pipe *old,
 				    struct drm_modeset_acquire_ctx *ctx)
 {
-	struct drm_device *dev = connector->dev;
 	struct intel_encoder *intel_encoder =
 		intel_attached_encoder(connector);
 	struct drm_encoder *encoder = &intel_encoder->base;
-	struct drm_crtc *crtc = encoder->crtc;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_atomic_state *state;
-	struct drm_connector_state *connector_state;
-	struct intel_crtc_state *crtc_state;
+	struct drm_atomic_state *state = old->restore_state;
 	int ret;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
 		      connector->base.id, connector->name,
 		      encoder->base.id, encoder->name);
 
-	if (old->load_detect_temp) {
-		state = drm_atomic_state_alloc(dev);
-		if (!state)
-			goto fail;
-
-		state->acquire_ctx = ctx;
-
-		connector_state = drm_atomic_get_connector_state(state, connector);
-		if (IS_ERR(connector_state))
-			goto fail;
-
-		crtc_state = intel_atomic_get_crtc_state(state, intel_crtc);
-		if (IS_ERR(crtc_state))
-			goto fail;
-
-		connector_state->best_encoder = NULL;
-		connector_state->crtc = NULL;
-
-		crtc_state->base.enable = crtc_state->base.active = false;
-
-		ret = intel_modeset_setup_plane_state(state, crtc, NULL, NULL,
-						      0, 0);
-		if (ret)
-			goto fail;
-
-		ret = drm_atomic_commit(state);
-		if (ret)
-			goto fail;
-
-		if (old->release_fb) {
-			drm_framebuffer_unregister_private(old->release_fb);
-			drm_framebuffer_unreference(old->release_fb);
-		}
-
+	if (!state)
 		return;
-	}
 
-	/* Switch crtc and encoder back off if necessary */
-	if (old->dpms_mode != DRM_MODE_DPMS_ON)
-		connector->funcs->dpms(connector, old->dpms_mode);
-
-	return;
-fail:
-	DRM_DEBUG_KMS("Couldn't release load detect pipe.\n");
-	drm_atomic_state_free(state);
+	ret = drm_atomic_commit(state);
+	if (ret) {
+		DRM_DEBUG_KMS("Couldn't release load detect pipe: %i\n", ret);
+		drm_atomic_state_free(state);
+	}
 }
 
 static int i9xx_pll_refclk(struct drm_device *dev,
@@ -10810,7 +10743,7 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev,
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
 	struct drm_display_mode *mode;
-	struct intel_crtc_state pipe_config;
+	struct intel_crtc_state *pipe_config;
 	int htot = I915_READ(HTOTAL(cpu_transcoder));
 	int hsync = I915_READ(HSYNC(cpu_transcoder));
 	int vtot = I915_READ(VTOTAL(cpu_transcoder));
@@ -10821,6 +10754,12 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev,
 	if (!mode)
 		return NULL;
 
+	pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL);
+	if (!pipe_config) {
+		kfree(mode);
+		return NULL;
+	}
+
 	/*
 	 * Construct a pipe_config sufficient for getting the clock info
 	 * back out of crtc_clock_get.
@@ -10828,14 +10767,14 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev,
 	 * Note, if LVDS ever uses a non-1 pixel multiplier, we'll need
 	 * to use a real value here instead.
 	 */
-	pipe_config.cpu_transcoder = (enum transcoder) pipe;
-	pipe_config.pixel_multiplier = 1;
-	pipe_config.dpll_hw_state.dpll = I915_READ(DPLL(pipe));
-	pipe_config.dpll_hw_state.fp0 = I915_READ(FP0(pipe));
-	pipe_config.dpll_hw_state.fp1 = I915_READ(FP1(pipe));
-	i9xx_crtc_clock_get(intel_crtc, &pipe_config);
-
-	mode->clock = pipe_config.port_clock / pipe_config.pixel_multiplier;
+	pipe_config->cpu_transcoder = (enum transcoder) pipe;
+	pipe_config->pixel_multiplier = 1;
+	pipe_config->dpll_hw_state.dpll = I915_READ(DPLL(pipe));
+	pipe_config->dpll_hw_state.fp0 = I915_READ(FP0(pipe));
+	pipe_config->dpll_hw_state.fp1 = I915_READ(FP1(pipe));
+	i9xx_crtc_clock_get(intel_crtc, pipe_config);
+
+	mode->clock = pipe_config->port_clock / pipe_config->pixel_multiplier;
 	mode->hdisplay = (htot & 0xffff) + 1;
 	mode->htotal = ((htot & 0xffff0000) >> 16) + 1;
 	mode->hsync_start = (hsync & 0xffff) + 1;
@@ -10847,6 +10786,8 @@ struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev,
 
 	drm_mode_set_name(mode);
 
+	kfree(pipe_config);
+
 	return mode;
 }
 
@@ -10917,6 +10858,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 	mutex_unlock(&dev->struct_mutex);
 
 	intel_frontbuffer_flip_complete(dev, to_intel_plane(primary)->frontbuffer_bit);
+	intel_fbc_post_update(crtc);
 	drm_framebuffer_unreference(work->old_fb);
 
 	BUG_ON(atomic_read(&crtc->unpin_work_count) == 0);
@@ -10998,6 +10940,12 @@ static bool page_flip_finished(struct intel_crtc *crtc)
 		return true;
 
 	/*
+	 * BDW signals flip done immediately if the plane
+	 * is disabled, even if the plane enable is already
+	 * armed to occur at the next vblank :(
+	 */
+
+	/*
 	 * A DSPSURFLIVE check isn't enough in case the mmio and CS flips
 	 * used the same base address. In that case the mmio flip might
 	 * have completed, but the CS hasn't even executed the flip yet.
@@ -11351,13 +11299,12 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
 	 */
 	if (intel_rotation_90_or_270(rotation)) {
 		/* stride = Surface height in tiles */
-		tile_height = intel_tile_height(dev, fb->pixel_format,
-						fb->modifier[0], 0);
+		tile_height = intel_tile_height(dev_priv, fb->modifier[0], 0);
 		stride = DIV_ROUND_UP(fb->height, tile_height);
 	} else {
 		stride = fb->pitches[0] /
-				intel_fb_stride_alignment(dev, fb->modifier[0],
-							  fb->pixel_format);
+			intel_fb_stride_alignment(dev_priv, fb->modifier[0],
+						  fb->pixel_format);
 	}
 
 	/*
@@ -11633,6 +11580,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 
 	crtc->primary->fb = fb;
 	update_state_fb(crtc->primary);
+	intel_fbc_pre_update(intel_crtc);
 
 	work->pending_flip_obj = obj;
 
@@ -11692,9 +11640,11 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 					obj->last_write_req);
 	} else {
 		if (!request) {
-			ret = i915_gem_request_alloc(ring, ring->default_context, &request);
-			if (ret)
+			request = i915_gem_request_alloc(ring, NULL);
+			if (IS_ERR(request)) {
+				ret = PTR_ERR(request);
 				goto cleanup_unpin;
+			}
 		}
 
 		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request,
@@ -11715,7 +11665,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 			  to_intel_plane(primary)->frontbuffer_bit);
 	mutex_unlock(&dev->struct_mutex);
 
-	intel_fbc_deactivate(intel_crtc);
 	intel_frontbuffer_flip_prepare(dev,
 				       to_intel_plane(primary)->frontbuffer_bit);
 
@@ -11726,7 +11675,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 cleanup_unpin:
 	intel_unpin_fb_obj(fb, crtc->primary->state);
 cleanup_pending:
-	if (request)
+	if (!IS_ERR_OR_NULL(request))
 		i915_gem_request_cancel(request);
 	atomic_dec(&intel_crtc->unpin_work_count);
 	mutex_unlock(&dev->struct_mutex);
@@ -11837,11 +11786,9 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_plane *plane = plane_state->plane;
 	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane_state *old_plane_state =
 		to_intel_plane_state(plane->state);
 	int idx = intel_crtc->base.base.id, ret;
-	int i = drm_plane_index(plane);
 	bool mode_changed = needs_modeset(crtc_state);
 	bool was_crtc_enabled = crtc->state->active;
 	bool is_crtc_enabled = crtc_state->active;
@@ -11863,12 +11810,20 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
 	if (!was_crtc_enabled && WARN_ON(was_visible))
 		was_visible = false;
 
-	if (!is_crtc_enabled && WARN_ON(visible))
-		visible = false;
+	/*
+	 * Visibility is calculated as if the crtc was on, but
+	 * after scaler setup everything depends on it being off
+	 * when the crtc isn't active.
+	 */
+	if (!is_crtc_enabled)
+		to_intel_plane_state(plane_state)->visible = visible = false;
 
 	if (!was_visible && !visible)
 		return 0;
 
+	if (fb != old_plane_state->base.fb)
+		pipe_config->fb_changed = true;
+
 	turn_off = was_visible && (!visible || mode_changed);
 	turn_on = visible && (!was_visible || mode_changed);
 
@@ -11883,11 +11838,8 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
 		pipe_config->wm_changed = true;
 
 		/* must disable cxsr around plane enable/disable */
-		if (plane->type != DRM_PLANE_TYPE_CURSOR) {
-			if (is_crtc_enabled)
-				intel_crtc->atomic.wait_vblank = true;
+		if (plane->type != DRM_PLANE_TYPE_CURSOR)
 			pipe_config->disable_cxsr = true;
-		}
 	} else if (intel_wm_need_update(plane, plane_state)) {
 		pipe_config->wm_changed = true;
 	}
@@ -11898,49 +11850,9 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
 
 	switch (plane->type) {
 	case DRM_PLANE_TYPE_PRIMARY:
-		intel_crtc->atomic.pre_disable_primary = turn_off;
 		intel_crtc->atomic.post_enable_primary = turn_on;
+		intel_crtc->atomic.update_fbc = true;
 
-		if (turn_off) {
-			/*
-			 * FIXME: Actually if we will still have any other
-			 * plane enabled on the pipe we could let IPS enabled
-			 * still, but for now lets consider that when we make
-			 * primary invisible by setting DSPCNTR to 0 on
-			 * update_primary_plane function IPS needs to be
-			 * disable.
-			 */
-			intel_crtc->atomic.disable_ips = true;
-
-			intel_crtc->atomic.disable_fbc = true;
-		}
-
-		/*
-		 * FBC does not work on some platforms for rotated
-		 * planes, so disable it when rotation is not 0 and
-		 * update it when rotation is set back to 0.
-		 *
-		 * FIXME: This is redundant with the fbc update done in
-		 * the primary plane enable function except that that
-		 * one is done too late. We eventually need to unify
-		 * this.
-		 */
-
-		if (visible &&
-		    INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev) &&
-		    dev_priv->fbc.crtc == intel_crtc &&
-		    plane_state->rotation != BIT(DRM_ROTATE_0))
-			intel_crtc->atomic.disable_fbc = true;
-
-		/*
-		 * BDW signals flip done immediately if the plane
-		 * is disabled, even if the plane enable is already
-		 * armed to occur at the next vblank :(
-		 */
-		if (turn_on && IS_BROADWELL(dev))
-			intel_crtc->atomic.wait_vblank = true;
-
-		intel_crtc->atomic.update_fbc |= visible || mode_changed;
 		break;
 	case DRM_PLANE_TYPE_CURSOR:
 		break;
@@ -11953,13 +11865,8 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
 		 */
 		if (IS_IVYBRIDGE(dev) &&
 		    needs_scaling(to_intel_plane_state(plane_state)) &&
-		    !needs_scaling(old_plane_state)) {
-			to_intel_crtc_state(crtc_state)->disable_lp_wm = true;
-		} else if (turn_off && !mode_changed) {
-			intel_crtc->atomic.wait_vblank = true;
-			intel_crtc->atomic.update_sprite_watermarks |=
-				1 << i;
-		}
+		    !needs_scaling(old_plane_state))
+			pipe_config->disable_lp_wm = true;
 
 		break;
 	}
@@ -12561,19 +12468,22 @@ intel_compare_m_n(unsigned int m, unsigned int n,
 
 	BUILD_BUG_ON(DATA_LINK_M_N_MASK > INT_MAX);
 
-	if (m > m2) {
-		while (m > m2) {
+	if (n > n2) {
+		while (n > n2) {
 			m2 <<= 1;
 			n2 <<= 1;
 		}
-	} else if (m < m2) {
-		while (m < m2) {
+	} else if (n < n2) {
+		while (n < n2) {
 			m <<= 1;
 			n <<= 1;
 		}
 	}
 
-	return m == m2 && n == n2;
+	if (n != n2)
+		return false;
+
+	return intel_fuzzy_clock_check(m, m2);
 }
 
 static bool
@@ -13124,8 +13034,6 @@ static void intel_modeset_clear_plls(struct drm_atomic_state *state)
 	struct drm_device *dev = state->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_shared_dpll_config *shared_dpll = NULL;
-	struct intel_crtc *intel_crtc;
-	struct intel_crtc_state *intel_crtc_state;
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *crtc_state;
 	int i;
@@ -13134,21 +13042,21 @@ static void intel_modeset_clear_plls(struct drm_atomic_state *state)
 		return;
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
-		int dpll;
-
-		intel_crtc = to_intel_crtc(crtc);
-		intel_crtc_state = to_intel_crtc_state(crtc_state);
-		dpll = intel_crtc_state->shared_dpll;
+		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+		int old_dpll = to_intel_crtc_state(crtc->state)->shared_dpll;
 
-		if (!needs_modeset(crtc_state) || dpll == DPLL_ID_PRIVATE)
+		if (!needs_modeset(crtc_state))
 			continue;
 
-		intel_crtc_state->shared_dpll = DPLL_ID_PRIVATE;
+		to_intel_crtc_state(crtc_state)->shared_dpll = DPLL_ID_PRIVATE;
+
+		if (old_dpll == DPLL_ID_PRIVATE)
+			continue;
 
 		if (!shared_dpll)
 			shared_dpll = intel_atomic_get_shared_dpll_state(state);
 
-		shared_dpll[dpll].crtc_mask &= ~(1 << intel_crtc->pipe);
+		shared_dpll[old_dpll].crtc_mask &= ~(1 << intel_crtc->pipe);
 	}
 }
 
@@ -13248,15 +13156,27 @@ static int intel_modeset_all_pipes(struct drm_atomic_state *state)
 
 static int intel_modeset_checks(struct drm_atomic_state *state)
 {
-	struct drm_device *dev = state->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_i915_private *dev_priv = state->dev->dev_private;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	int ret = 0, i;
 
 	if (!check_digital_port_conflicts(state)) {
 		DRM_DEBUG_KMS("rejecting conflicting digital port configuration\n");
 		return -EINVAL;
 	}
 
+	intel_state->modeset = true;
+	intel_state->active_crtcs = dev_priv->active_crtcs;
+
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		if (crtc_state->active)
+			intel_state->active_crtcs |= 1 << i;
+		else
+			intel_state->active_crtcs &= ~(1 << i);
+	}
+
 	/*
 	 * See if the config requires any additional preparation, e.g.
 	 * to adjust global state with pipes off.  We need to do this
@@ -13265,22 +13185,22 @@ static int intel_modeset_checks(struct drm_atomic_state *state)
 	 * adjusted_mode bits in the crtc directly.
 	 */
 	if (dev_priv->display.modeset_calc_cdclk) {
-		unsigned int cdclk;
-
 		ret = dev_priv->display.modeset_calc_cdclk(state);
 
-		cdclk = to_intel_atomic_state(state)->cdclk;
-		if (!ret && cdclk != dev_priv->cdclk_freq)
+		if (!ret && intel_state->dev_cdclk != dev_priv->cdclk_freq)
 			ret = intel_modeset_all_pipes(state);
 
 		if (ret < 0)
 			return ret;
+
+		DRM_DEBUG_KMS("New cdclk calculated to be atomic %u, actual %u\n",
+			      intel_state->cdclk, intel_state->dev_cdclk);
 	} else
-		to_intel_atomic_state(state)->cdclk = dev_priv->cdclk_freq;
+		to_intel_atomic_state(state)->cdclk = dev_priv->atomic_cdclk_freq;
 
 	intel_modeset_clear_plls(state);
 
-	if (IS_HASWELL(dev))
+	if (IS_HASWELL(dev_priv))
 		return haswell_mode_set_planes_workaround(state);
 
 	return 0;
@@ -13333,6 +13253,7 @@ static void calc_watermark_data(struct drm_atomic_state *state)
 static int intel_atomic_check(struct drm_device *dev,
 			      struct drm_atomic_state *state)
 {
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *crtc_state;
@@ -13375,7 +13296,7 @@ static int intel_atomic_check(struct drm_device *dev,
 			return ret;
 
 		if (i915.fastboot &&
-		    intel_pipe_config_compare(state->dev,
+		    intel_pipe_config_compare(dev,
 					to_intel_crtc_state(crtc->state),
 					pipe_config, true)) {
 			crtc_state->mode_changed = false;
@@ -13401,12 +13322,13 @@ static int intel_atomic_check(struct drm_device *dev,
 		if (ret)
 			return ret;
 	} else
-		intel_state->cdclk = to_i915(state->dev)->cdclk_freq;
+		intel_state->cdclk = dev_priv->cdclk_freq;
 
-	ret = drm_atomic_helper_check_planes(state->dev, state);
+	ret = drm_atomic_helper_check_planes(dev, state);
 	if (ret)
 		return ret;
 
+	intel_fbc_choose_crtc(dev_priv, state);
 	calc_watermark_data(state);
 
 	return 0;
@@ -13478,6 +13400,71 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 	return ret;
 }
 
+static void intel_atomic_wait_for_vblanks(struct drm_device *dev,
+					  struct drm_i915_private *dev_priv,
+					  unsigned crtc_mask)
+{
+	unsigned last_vblank_count[I915_MAX_PIPES];
+	enum pipe pipe;
+	int ret;
+
+	if (!crtc_mask)
+		return;
+
+	for_each_pipe(dev_priv, pipe) {
+		struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+
+		if (!((1 << pipe) & crtc_mask))
+			continue;
+
+		ret = drm_crtc_vblank_get(crtc);
+		if (WARN_ON(ret != 0)) {
+			crtc_mask &= ~(1 << pipe);
+			continue;
+		}
+
+		last_vblank_count[pipe] = drm_crtc_vblank_count(crtc);
+	}
+
+	for_each_pipe(dev_priv, pipe) {
+		struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+		long lret;
+
+		if (!((1 << pipe) & crtc_mask))
+			continue;
+
+		lret = wait_event_timeout(dev->vblank[pipe].queue,
+				last_vblank_count[pipe] !=
+					drm_crtc_vblank_count(crtc),
+				msecs_to_jiffies(50));
+
+		WARN_ON(!lret);
+
+		drm_crtc_vblank_put(crtc);
+	}
+}
+
+static bool needs_vblank_wait(struct intel_crtc_state *crtc_state)
+{
+	/* fb updated, need to unpin old fb */
+	if (crtc_state->fb_changed)
+		return true;
+
+	/* wm changes, need vblank before final wm's */
+	if (crtc_state->wm_changed)
+		return true;
+
+	/*
+	 * cxsr is re-enabled after vblank.
+	 * This is already handled by crtc_state->wm_changed,
+	 * but added for clarity.
+	 */
+	if (crtc_state->disable_cxsr)
+		return true;
+
+	return false;
+}
+
 /**
  * intel_atomic_commit - commit validated state object
  * @dev: DRM device
@@ -13498,12 +13485,14 @@ static int intel_atomic_commit(struct drm_device *dev,
 			       struct drm_atomic_state *state,
 			       bool async)
 {
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc_state *crtc_state;
 	struct drm_crtc *crtc;
-	int ret = 0;
-	int i;
-	bool any_ms = false;
+	int ret = 0, i;
+	bool hw_check = intel_state->modeset;
+	unsigned long put_domains[I915_MAX_PIPES] = {};
+	unsigned crtc_vblank_mask = 0;
 
 	ret = intel_atomic_prepare_commit(dev, state, async);
 	if (ret) {
@@ -13514,19 +13503,37 @@ static int intel_atomic_commit(struct drm_device *dev,
 	drm_atomic_helper_swap_state(dev, state);
 	dev_priv->wm.config = to_intel_atomic_state(state)->wm_config;
 
+	if (intel_state->modeset) {
+		memcpy(dev_priv->min_pixclk, intel_state->min_pixclk,
+		       sizeof(intel_state->min_pixclk));
+		dev_priv->active_crtcs = intel_state->active_crtcs;
+		dev_priv->atomic_cdclk_freq = intel_state->cdclk;
+
+		intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
+	}
+
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
+		if (needs_modeset(crtc->state) ||
+		    to_intel_crtc_state(crtc->state)->update_pipe) {
+			hw_check = true;
+
+			put_domains[to_intel_crtc(crtc)->pipe] =
+				modeset_get_crtc_power_domains(crtc,
+					to_intel_crtc_state(crtc->state));
+		}
+
 		if (!needs_modeset(crtc->state))
 			continue;
 
-		any_ms = true;
-		intel_pre_plane_update(intel_crtc);
+		intel_pre_plane_update(to_intel_crtc_state(crtc_state));
 
 		if (crtc_state->active) {
 			intel_crtc_disable_planes(crtc, crtc_state->plane_mask);
 			dev_priv->display.crtc_disable(crtc);
 			intel_crtc->active = false;
+			intel_fbc_disable(intel_crtc);
 			intel_disable_shared_dpll(intel_crtc);
 
 			/*
@@ -13545,65 +13552,80 @@ static int intel_atomic_commit(struct drm_device *dev,
 	 * update the the output configuration. */
 	intel_modeset_update_crtc_state(state);
 
-	if (any_ms) {
+	if (intel_state->modeset) {
 		intel_shared_dpll_commit(state);
 
 		drm_atomic_helper_update_legacy_modeset_state(state->dev, state);
-		modeset_update_crtc_power_domains(state);
+
+		if (dev_priv->display.modeset_commit_cdclk &&
+		    intel_state->dev_cdclk != dev_priv->cdclk_freq)
+			dev_priv->display.modeset_commit_cdclk(state);
 	}
 
 	/* Now enable the clocks, plane, pipe, and connectors that we set up. */
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 		bool modeset = needs_modeset(crtc->state);
-		bool update_pipe = !modeset &&
-			to_intel_crtc_state(crtc->state)->update_pipe;
-		unsigned long put_domains = 0;
-
-		if (modeset)
-			intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET);
+		struct intel_crtc_state *pipe_config =
+			to_intel_crtc_state(crtc->state);
+		bool update_pipe = !modeset && pipe_config->update_pipe;
 
 		if (modeset && crtc->state->active) {
 			update_scanline_offset(to_intel_crtc(crtc));
 			dev_priv->display.crtc_enable(crtc);
 		}
 
-		if (update_pipe) {
-			put_domains = modeset_get_crtc_power_domains(crtc);
-
-			/* make sure intel_modeset_check_state runs */
-			any_ms = true;
-		}
-
 		if (!modeset)
-			intel_pre_plane_update(intel_crtc);
+			intel_pre_plane_update(to_intel_crtc_state(crtc_state));
+
+		if (crtc->state->active && intel_crtc->atomic.update_fbc)
+			intel_fbc_enable(intel_crtc);
 
 		if (crtc->state->active &&
 		    (crtc->state->planes_changed || update_pipe))
 			drm_atomic_helper_commit_planes_on_crtc(crtc_state);
 
-		if (put_domains)
-			modeset_put_power_domains(dev_priv, put_domains);
-
-		intel_post_plane_update(intel_crtc);
-
-		if (modeset)
-			intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
+		if (pipe_config->base.active && needs_vblank_wait(pipe_config))
+			crtc_vblank_mask |= 1 << i;
 	}
 
 	/* FIXME: add subpixel order */
 
-	drm_atomic_helper_wait_for_vblanks(dev, state);
+	if (!state->legacy_cursor_update)
+		intel_atomic_wait_for_vblanks(dev, dev_priv, crtc_vblank_mask);
+
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		intel_post_plane_update(to_intel_crtc(crtc));
+
+		if (put_domains[i])
+			modeset_put_power_domains(dev_priv, put_domains[i]);
+	}
+
+	if (intel_state->modeset)
+		intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET);
 
 	mutex_lock(&dev->struct_mutex);
 	drm_atomic_helper_cleanup_planes(dev, state);
 	mutex_unlock(&dev->struct_mutex);
 
-	if (any_ms)
+	if (hw_check)
 		intel_modeset_check_state(dev, state);
 
 	drm_atomic_state_free(state);
 
+	/* As one of the primary mmio accessors, KMS has a high likelihood
+	 * of triggering bugs in unclaimed access. After we finish
+	 * modesetting, see if an error has been flagged, and if so
+	 * enable debugging for the next modeset - and hope we catch
+	 * the culprit.
+	 *
+	 * XXX note that we assume display power is on at this point.
+	 * This might hold true now but we need to add pm helper to check
+	 * unclaimed only when the hardware is on, as atomic commits
+	 * can happen also when the device is completely off.
+	 */
+	intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
+
 	return 0;
 }
 
@@ -13894,7 +13916,7 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state
 	struct drm_i915_private *dev_priv;
 	int crtc_clock, cdclk;
 
-	if (!intel_crtc || !crtc_state)
+	if (!intel_crtc || !crtc_state->base.enable)
 		return DRM_PLANE_HELPER_NO_SCALING;
 
 	dev = intel_crtc->base.dev;
@@ -13943,32 +13965,6 @@ intel_check_primary_plane(struct drm_plane *plane,
 					     &state->visible);
 }
 
-static void
-intel_commit_primary_plane(struct drm_plane *plane,
-			   struct intel_plane_state *state)
-{
-	struct drm_crtc *crtc = state->base.crtc;
-	struct drm_framebuffer *fb = state->base.fb;
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	crtc = crtc ? crtc : plane->crtc;
-
-	dev_priv->display.update_primary_plane(crtc, fb,
-					       state->src.x1 >> 16,
-					       state->src.y1 >> 16);
-}
-
-static void
-intel_disable_primary_plane(struct drm_plane *plane,
-			    struct drm_crtc *crtc)
-{
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	dev_priv->display.update_primary_plane(crtc, NULL, 0, 0);
-}
-
 static void intel_begin_crtc_commit(struct drm_crtc *crtc,
 				    struct drm_crtc_state *old_crtc_state)
 {
@@ -14053,20 +14049,33 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev,
 	primary->plane = pipe;
 	primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe);
 	primary->check_plane = intel_check_primary_plane;
-	primary->commit_plane = intel_commit_primary_plane;
-	primary->disable_plane = intel_disable_primary_plane;
 	if (HAS_FBC(dev) && INTEL_INFO(dev)->gen < 4)
 		primary->plane = !pipe;
 
 	if (INTEL_INFO(dev)->gen >= 9) {
 		intel_primary_formats = skl_primary_formats;
 		num_formats = ARRAY_SIZE(skl_primary_formats);
+
+		primary->update_plane = skylake_update_primary_plane;
+		primary->disable_plane = skylake_disable_primary_plane;
+	} else if (HAS_PCH_SPLIT(dev)) {
+		intel_primary_formats = i965_primary_formats;
+		num_formats = ARRAY_SIZE(i965_primary_formats);
+
+		primary->update_plane = ironlake_update_primary_plane;
+		primary->disable_plane = i9xx_disable_primary_plane;
 	} else if (INTEL_INFO(dev)->gen >= 4) {
 		intel_primary_formats = i965_primary_formats;
 		num_formats = ARRAY_SIZE(i965_primary_formats);
+
+		primary->update_plane = i9xx_update_primary_plane;
+		primary->disable_plane = i9xx_disable_primary_plane;
 	} else {
 		intel_primary_formats = i8xx_primary_formats;
 		num_formats = ARRAY_SIZE(i8xx_primary_formats);
+
+		primary->update_plane = i9xx_update_primary_plane;
+		primary->disable_plane = i9xx_disable_primary_plane;
 	}
 
 	drm_universal_plane_init(dev, &primary->base, 0,
@@ -14165,22 +14174,23 @@ static void
 intel_disable_cursor_plane(struct drm_plane *plane,
 			   struct drm_crtc *crtc)
 {
-	intel_crtc_update_cursor(crtc, false);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+	intel_crtc->cursor_addr = 0;
+	intel_crtc_update_cursor(crtc, NULL);
 }
 
 static void
-intel_commit_cursor_plane(struct drm_plane *plane,
-			  struct intel_plane_state *state)
+intel_update_cursor_plane(struct drm_plane *plane,
+			  const struct intel_crtc_state *crtc_state,
+			  const struct intel_plane_state *state)
 {
-	struct drm_crtc *crtc = state->base.crtc;
+	struct drm_crtc *crtc = crtc_state->base.crtc;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_device *dev = plane->dev;
-	struct intel_crtc *intel_crtc;
 	struct drm_i915_gem_object *obj = intel_fb_obj(state->base.fb);
 	uint32_t addr;
 
-	crtc = crtc ? crtc : plane->crtc;
-	intel_crtc = to_intel_crtc(crtc);
-
 	if (!obj)
 		addr = 0;
 	else if (!INTEL_INFO(dev)->cursor_needs_physical)
@@ -14189,9 +14199,7 @@ intel_commit_cursor_plane(struct drm_plane *plane,
 		addr = obj->phys_handle->busaddr;
 
 	intel_crtc->cursor_addr = addr;
-
-	if (crtc->state->active)
-		intel_crtc_update_cursor(crtc, state->visible);
+	intel_crtc_update_cursor(crtc, state);
 }
 
 static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
@@ -14217,7 +14225,7 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
 	cursor->plane = pipe;
 	cursor->frontbuffer_bit = INTEL_FRONTBUFFER_CURSOR(pipe);
 	cursor->check_plane = intel_check_cursor_plane;
-	cursor->commit_plane = intel_commit_cursor_plane;
+	cursor->update_plane = intel_update_cursor_plane;
 	cursor->disable_plane = intel_disable_cursor_plane;
 
 	drm_universal_plane_init(dev, &cursor->base, 0,
@@ -14664,10 +14672,12 @@ u32 intel_fb_pitch_limit(struct drm_device *dev, uint64_t fb_modifier,
 	u32 gen = INTEL_INFO(dev)->gen;
 
 	if (gen >= 9) {
+		int cpp = drm_format_plane_cpp(pixel_format, 0);
+
 		/* "The stride in bytes must not exceed the of the size of 8K
 		 *  pixels and 32K bytes."
 		 */
-		 return min(8192*drm_format_plane_cpp(pixel_format, 0), 32768);
+		return min(8192 * cpp, 32768);
 	} else if (gen >= 5 && !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
 		return 32*1024;
 	} else if (gen >= 4) {
@@ -14691,6 +14701,7 @@ static int intel_framebuffer_init(struct drm_device *dev,
 				  struct drm_mode_fb_cmd2 *mode_cmd,
 				  struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	unsigned int aligned_height;
 	int ret;
 	u32 pitch_limit, stride_alignment;
@@ -14732,7 +14743,8 @@ static int intel_framebuffer_init(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	stride_alignment = intel_fb_stride_alignment(dev, mode_cmd->modifier[0],
+	stride_alignment = intel_fb_stride_alignment(dev_priv,
+						     mode_cmd->modifier[0],
 						     mode_cmd->pixel_format);
 	if (mode_cmd->pitches[0] & (stride_alignment - 1)) {
 		DRM_DEBUG("pitch (%d) must be at least %u byte aligned\n",
@@ -14824,7 +14836,6 @@ static int intel_framebuffer_init(struct drm_device *dev,
 
 	drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd);
 	intel_fb->obj = obj;
-	intel_fb->obj->framebuffer_references++;
 
 	ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
 	if (ret) {
@@ -14832,6 +14843,8 @@ static int intel_framebuffer_init(struct drm_device *dev,
 		return ret;
 	}
 
+	intel_fb->obj->framebuffer_references++;
+
 	return 0;
 }
 
@@ -14895,8 +14908,6 @@ static void intel_init_display(struct drm_device *dev)
 			haswell_crtc_compute_clock;
 		dev_priv->display.crtc_enable = haswell_crtc_enable;
 		dev_priv->display.crtc_disable = haswell_crtc_disable;
-		dev_priv->display.update_primary_plane =
-			skylake_update_primary_plane;
 	} else if (HAS_DDI(dev)) {
 		dev_priv->display.get_pipe_config = haswell_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
@@ -14905,8 +14916,6 @@ static void intel_init_display(struct drm_device *dev)
 			haswell_crtc_compute_clock;
 		dev_priv->display.crtc_enable = haswell_crtc_enable;
 		dev_priv->display.crtc_disable = haswell_crtc_disable;
-		dev_priv->display.update_primary_plane =
-			ironlake_update_primary_plane;
 	} else if (HAS_PCH_SPLIT(dev)) {
 		dev_priv->display.get_pipe_config = ironlake_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
@@ -14915,8 +14924,6 @@ static void intel_init_display(struct drm_device *dev)
 			ironlake_crtc_compute_clock;
 		dev_priv->display.crtc_enable = ironlake_crtc_enable;
 		dev_priv->display.crtc_disable = ironlake_crtc_disable;
-		dev_priv->display.update_primary_plane =
-			ironlake_update_primary_plane;
 	} else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
 		dev_priv->display.get_pipe_config = i9xx_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
@@ -14924,8 +14931,6 @@ static void intel_init_display(struct drm_device *dev)
 		dev_priv->display.crtc_compute_clock = i9xx_crtc_compute_clock;
 		dev_priv->display.crtc_enable = valleyview_crtc_enable;
 		dev_priv->display.crtc_disable = i9xx_crtc_disable;
-		dev_priv->display.update_primary_plane =
-			i9xx_update_primary_plane;
 	} else {
 		dev_priv->display.get_pipe_config = i9xx_get_pipe_config;
 		dev_priv->display.get_initial_plane_config =
@@ -14933,8 +14938,6 @@ static void intel_init_display(struct drm_device *dev)
 		dev_priv->display.crtc_compute_clock = i9xx_crtc_compute_clock;
 		dev_priv->display.crtc_enable = i9xx_crtc_enable;
 		dev_priv->display.crtc_disable = i9xx_crtc_disable;
-		dev_priv->display.update_primary_plane =
-			i9xx_update_primary_plane;
 	}
 
 	/* Returns the core display clock speed */
@@ -15240,12 +15243,89 @@ static void i915_disable_vga(struct drm_device *dev)
 
 void intel_modeset_init_hw(struct drm_device *dev)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
 	intel_update_cdclk(dev);
-	intel_prepare_ddi(dev);
+
+	dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq;
+
 	intel_init_clock_gating(dev);
 	intel_enable_gt_powersave(dev);
 }
 
+/*
+ * Calculate what we think the watermarks should be for the state we've read
+ * out of the hardware and then immediately program those watermarks so that
+ * we ensure the hardware settings match our internal state.
+ *
+ * We can calculate what we think WM's should be by creating a duplicate of the
+ * current state (which was constructed during hardware readout) and running it
+ * through the atomic check code to calculate new watermark values in the
+ * state object.
+ */
+static void sanitize_watermarks(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_atomic_state *state;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *cstate;
+	struct drm_modeset_acquire_ctx ctx;
+	int ret;
+	int i;
+
+	/* Only supported on platforms that use atomic watermark design */
+	if (!dev_priv->display.program_watermarks)
+		return;
+
+	/*
+	 * We need to hold connection_mutex before calling duplicate_state so
+	 * that the connector loop is protected.
+	 */
+	drm_modeset_acquire_init(&ctx, 0);
+retry:
+	ret = drm_modeset_lock_all_ctx(dev, &ctx);
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	} else if (WARN_ON(ret)) {
+		goto fail;
+	}
+
+	state = drm_atomic_helper_duplicate_state(dev, &ctx);
+	if (WARN_ON(IS_ERR(state)))
+		goto fail;
+
+	ret = intel_atomic_check(dev, state);
+	if (ret) {
+		/*
+		 * If we fail here, it means that the hardware appears to be
+		 * programmed in a way that shouldn't be possible, given our
+		 * understanding of watermark requirements.  This might mean a
+		 * mistake in the hardware readout code or a mistake in the
+		 * watermark calculations for a given platform.  Raise a WARN
+		 * so that this is noticeable.
+		 *
+		 * If this actually happens, we'll have to just leave the
+		 * BIOS-programmed watermarks untouched and hope for the best.
+		 */
+		WARN(true, "Could not determine valid watermarks for inherited state\n");
+		goto fail;
+	}
+
+	/* Write calculated watermark values back */
+	to_i915(dev)->wm.config = to_intel_atomic_state(state)->wm_config;
+	for_each_crtc_in_state(state, crtc, cstate, i) {
+		struct intel_crtc_state *cs = to_intel_crtc_state(cstate);
+
+		dev_priv->display.program_watermarks(cs);
+	}
+
+	drm_atomic_state_free(state);
+fail:
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+}
+
 void intel_modeset_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -15366,6 +15446,13 @@ void intel_modeset_init(struct drm_device *dev)
 		 */
 		intel_find_initial_plane_obj(crtc, &plane_config);
 	}
+
+	/*
+	 * Make sure hardware watermarks really match the state we read out.
+	 * Note that we need to do this after reconstructing the BIOS fb's
+	 * since the watermark calculation done here will use pstate->fb.
+	 */
+	sanitize_watermarks(dev);
 }
 
 static void intel_enable_pipe_a(struct drm_device *dev)
@@ -15422,6 +15509,17 @@ static bool intel_crtc_has_encoders(struct intel_crtc *crtc)
 	return false;
 }
 
+static bool intel_encoder_has_connectors(struct intel_encoder *encoder)
+{
+	struct drm_device *dev = encoder->base.dev;
+	struct intel_connector *connector;
+
+	for_each_connector_on_encoder(dev, &encoder->base, connector)
+		return true;
+
+	return false;
+}
+
 static void intel_sanitize_crtc(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -15496,6 +15594,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 		crtc->base.state->active = crtc->active;
 		crtc->base.enabled = crtc->active;
 		crtc->base.state->connector_mask = 0;
+		crtc->base.state->encoder_mask = 0;
 
 		/* Because we only establish the connector -> encoder ->
 		 * crtc links if something is active, this means the
@@ -15531,7 +15630,6 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 {
 	struct intel_connector *connector;
 	struct drm_device *dev = encoder->base.dev;
-	bool active = false;
 
 	/* We need to check both for a crtc link (meaning that the
 	 * encoder is active and trying to read from a pipe) and the
@@ -15539,15 +15637,7 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 	bool has_active_crtc = encoder->base.crtc &&
 		to_intel_crtc(encoder->base.crtc)->active;
 
-	for_each_intel_connector(dev, connector) {
-		if (connector->base.encoder != &encoder->base)
-			continue;
-
-		active = true;
-		break;
-	}
-
-	if (active && !has_active_crtc) {
+	if (intel_encoder_has_connectors(encoder) && !has_active_crtc) {
 		DRM_DEBUG_KMS("[ENCODER:%d:%s] has active connectors but no active pipe!\n",
 			      encoder->base.base.id,
 			      encoder->base.name);
@@ -15640,16 +15730,40 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 	struct intel_connector *connector;
 	int i;
 
+	dev_priv->active_crtcs = 0;
+
 	for_each_intel_crtc(dev, crtc) {
-		__drm_atomic_helper_crtc_destroy_state(&crtc->base, crtc->base.state);
-		memset(crtc->config, 0, sizeof(*crtc->config));
-		crtc->config->base.crtc = &crtc->base;
+		struct intel_crtc_state *crtc_state = crtc->config;
+		int pixclk = 0;
 
-		crtc->active = dev_priv->display.get_pipe_config(crtc,
-								 crtc->config);
+		__drm_atomic_helper_crtc_destroy_state(&crtc->base, &crtc_state->base);
+		memset(crtc_state, 0, sizeof(*crtc_state));
+		crtc_state->base.crtc = &crtc->base;
 
-		crtc->base.state->active = crtc->active;
-		crtc->base.enabled = crtc->active;
+		crtc_state->base.active = crtc_state->base.enable =
+			dev_priv->display.get_pipe_config(crtc, crtc_state);
+
+		crtc->base.enabled = crtc_state->base.enable;
+		crtc->active = crtc_state->base.active;
+
+		if (crtc_state->base.active) {
+			dev_priv->active_crtcs |= 1 << crtc->pipe;
+
+			if (IS_BROADWELL(dev_priv)) {
+				pixclk = ilk_pipe_pixel_rate(crtc_state);
+
+				/* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
+				if (crtc_state->ips_enabled)
+					pixclk = DIV_ROUND_UP(pixclk * 100, 95);
+			} else if (IS_VALLEYVIEW(dev_priv) ||
+				   IS_CHERRYVIEW(dev_priv) ||
+				   IS_BROXTON(dev_priv))
+				pixclk = crtc_state->base.adjusted_mode.crtc_clock;
+			else
+				WARN_ON(dev_priv->display.modeset_calc_cdclk);
+		}
+
+		dev_priv->min_pixclk[crtc->pipe] = pixclk;
 
 		readout_plane_state(crtc);
 
@@ -15713,6 +15827,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 				 */
 				encoder->base.crtc->state->connector_mask |=
 					1 << drm_connector_index(&connector->base);
+				encoder->base.crtc->state->encoder_mask |=
+					1 << drm_encoder_index(&encoder->base);
 			}
 
 		} else {
@@ -15809,63 +15925,76 @@ intel_modeset_setup_hw_state(struct drm_device *dev)
 	for_each_intel_crtc(dev, crtc) {
 		unsigned long put_domains;
 
-		put_domains = modeset_get_crtc_power_domains(&crtc->base);
+		put_domains = modeset_get_crtc_power_domains(&crtc->base, crtc->config);
 		if (WARN_ON(put_domains))
 			modeset_put_power_domains(dev_priv, put_domains);
 	}
 	intel_display_set_init_power(dev_priv, false);
+
+	intel_fbc_init_pipe_state(dev_priv);
 }
 
 void intel_display_resume(struct drm_device *dev)
 {
-	struct drm_atomic_state *state = drm_atomic_state_alloc(dev);
-	struct intel_connector *conn;
-	struct intel_plane *plane;
-	struct drm_crtc *crtc;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_atomic_state *state = dev_priv->modeset_restore_state;
+	struct drm_modeset_acquire_ctx ctx;
 	int ret;
+	bool setup = false;
 
-	if (!state)
-		return;
-
-	state->acquire_ctx = dev->mode_config.acquire_ctx;
+	dev_priv->modeset_restore_state = NULL;
 
-	/* preserve complete old state, including dpll */
-	intel_atomic_get_shared_dpll_state(state);
+	/*
+	 * This is a cludge because with real atomic modeset mode_config.mutex
+	 * won't be taken. Unfortunately some probed state like
+	 * audio_codec_enable is still protected by mode_config.mutex, so lock
+	 * it here for now.
+	 */
+	mutex_lock(&dev->mode_config.mutex);
+	drm_modeset_acquire_init(&ctx, 0);
 
-	for_each_crtc(dev, crtc) {
-		struct drm_crtc_state *crtc_state =
-			drm_atomic_get_crtc_state(state, crtc);
+retry:
+	ret = drm_modeset_lock_all_ctx(dev, &ctx);
 
-		ret = PTR_ERR_OR_ZERO(crtc_state);
-		if (ret)
-			goto err;
+	if (ret == 0 && !setup) {
+		setup = true;
 
-		/* force a restore */
-		crtc_state->mode_changed = true;
+		intel_modeset_setup_hw_state(dev);
+		i915_redisable_vga(dev);
 	}
 
-	for_each_intel_plane(dev, plane) {
-		ret = PTR_ERR_OR_ZERO(drm_atomic_get_plane_state(state, &plane->base));
-		if (ret)
-			goto err;
-	}
+	if (ret == 0 && state) {
+		struct drm_crtc_state *crtc_state;
+		struct drm_crtc *crtc;
+		int i;
 
-	for_each_intel_connector(dev, conn) {
-		ret = PTR_ERR_OR_ZERO(drm_atomic_get_connector_state(state, &conn->base));
-		if (ret)
-			goto err;
+		state->acquire_ctx = &ctx;
+
+		for_each_crtc_in_state(state, crtc, crtc_state, i) {
+			/*
+			 * Force recalculation even if we restore
+			 * current state. With fast modeset this may not result
+			 * in a modeset when the state is compatible.
+			 */
+			crtc_state->mode_changed = true;
+		}
+
+		ret = drm_atomic_commit(state);
 	}
 
-	intel_modeset_setup_hw_state(dev);
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	}
 
-	i915_redisable_vga(dev);
-	ret = drm_atomic_commit(state);
-	if (!ret)
-		return;
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+	mutex_unlock(&dev->mode_config.mutex);
 
-err:
-	DRM_ERROR("Restoring old state failed with %i\n", ret);
-	drm_atomic_state_free(state);
+	if (ret) {
+		DRM_ERROR("Restoring old state failed with %i\n", ret);
+		drm_atomic_state_free(state);
+	}
 }
 
 void intel_modeset_gem_init(struct drm_device *dev)
@@ -15874,9 +16003,7 @@ void intel_modeset_gem_init(struct drm_device *dev)
 	struct drm_i915_gem_object *obj;
 	int ret;
 
-	mutex_lock(&dev->struct_mutex);
 	intel_init_gt_powersave(dev);
-	mutex_unlock(&dev->struct_mutex);
 
 	intel_modeset_init_hw(dev);
 
@@ -15943,7 +16070,7 @@ void intel_modeset_cleanup(struct drm_device *dev)
 
 	intel_unregister_dsm_handler();
 
-	intel_fbc_disable(dev_priv);
+	intel_fbc_global_disable(dev_priv);
 
 	/* flush any delayed tasks or pending work */
 	flush_scheduled_work();
@@ -15956,9 +16083,7 @@ void intel_modeset_cleanup(struct drm_device *dev)
 
 	intel_cleanup_overlay(dev);
 
-	mutex_lock(&dev->struct_mutex);
 	intel_cleanup_gt_powersave(dev);
-	mutex_unlock(&dev->struct_mutex);
 
 	intel_teardown_gmbus(dev);
 }
@@ -16153,7 +16278,7 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m,
 	for_each_pipe(dev_priv, i) {
 		err_printf(m, "Pipe [%d]:\n", i);
 		err_printf(m, "  Power: %s\n",
-			   error->pipe[i].power_domain_on ? "on" : "off");
+			   onoff(error->pipe[i].power_domain_on));
 		err_printf(m, "  SRC: %08x\n", error->pipe[i].source);
 		err_printf(m, "  STAT: %08x\n", error->pipe[i].stat);
 
@@ -16181,7 +16306,7 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m,
 		err_printf(m, "CPU transcoder: %c\n",
 			   transcoder_name(error->transcoder[i].cpu_transcoder));
 		err_printf(m, "  Power: %s\n",
-			   error->transcoder[i].power_domain_on ? "on" : "off");
+			   onoff(error->transcoder[i].power_domain_on));
 		err_printf(m, "  CONF: %08x\n", error->transcoder[i].conf);
 		err_printf(m, "  HTOTAL: %08x\n", error->transcoder[i].htotal);
 		err_printf(m, "  HBLANK: %08x\n", error->transcoder[i].hblank);
@@ -16191,24 +16316,3 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m,
 		err_printf(m, "  VSYNC: %08x\n", error->transcoder[i].vsync);
 	}
 }
-
-void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file)
-{
-	struct intel_crtc *crtc;
-
-	for_each_intel_crtc(dev, crtc) {
-		struct intel_unpin_work *work;
-
-		spin_lock_irq(&dev->event_lock);
-
-		work = crtc->unpin_work;
-
-		if (work && work->event &&
-		    work->event->base.file_priv == file) {
-			kfree(work->event);
-			work->event = NULL;
-		}
-
-		spin_unlock_irq(&dev->event_lock);
-	}
-}
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index cdc2c15873dc..f069a82deb57 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -157,14 +157,9 @@ intel_dp_max_link_bw(struct intel_dp  *intel_dp)
 static u8 intel_dp_max_lane_count(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct drm_device *dev = intel_dig_port->base.base.dev;
 	u8 source_max, sink_max;
 
-	source_max = 4;
-	if (HAS_DDI(dev) && intel_dig_port->port == PORT_A &&
-	    (intel_dig_port->saved_port_bits & DDI_A_4_LANES) == 0)
-		source_max = 2;
-
+	source_max = intel_dig_port->max_lanes;
 	sink_max = drm_dp_max_lane_count(intel_dp->dpcd);
 
 	return min(source_max, sink_max);
@@ -208,6 +203,7 @@ intel_dp_mode_valid(struct drm_connector *connector,
 	struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
 	int target_clock = mode->clock;
 	int max_rate, mode_rate, max_lanes, max_link_clock;
+	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 
 	if (is_edp(intel_dp) && fixed_mode) {
 		if (mode->hdisplay > fixed_mode->hdisplay)
@@ -225,7 +221,7 @@ intel_dp_mode_valid(struct drm_connector *connector,
 	max_rate = intel_dp_max_data_rate(max_link_clock, max_lanes);
 	mode_rate = intel_dp_link_required(target_clock, 18);
 
-	if (mode_rate > max_rate)
+	if (mode_rate > max_rate || target_clock > max_dotclk)
 		return MODE_CLOCK_HIGH;
 
 	if (mode->clock < 10000)
@@ -340,8 +336,12 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp)
 		release_cl_override = IS_CHERRYVIEW(dev) &&
 			!chv_phy_powergate_ch(dev_priv, phy, ch, true);
 
-		vlv_force_pll_on(dev, pipe, IS_CHERRYVIEW(dev) ?
-				 &chv_dpll[0].dpll : &vlv_dpll[0].dpll);
+		if (vlv_force_pll_on(dev, pipe, IS_CHERRYVIEW(dev) ?
+				     &chv_dpll[0].dpll : &vlv_dpll[0].dpll)) {
+			DRM_ERROR("Failed to force on pll for pipe %c!\n",
+				  pipe_name(pipe));
+			return;
+		}
 	}
 
 	/*
@@ -980,7 +980,10 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
 		if (WARN_ON(txsize > 20))
 			return -E2BIG;
 
-		memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size);
+		if (msg->buffer)
+			memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size);
+		else
+			WARN_ON(msg->size);
 
 		ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize);
 		if (ret > 0) {
@@ -1189,7 +1192,6 @@ intel_dp_aux_fini(struct intel_dp *intel_dp)
 static int
 intel_dp_aux_init(struct intel_dp *intel_dp, struct intel_connector *connector)
 {
-	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	enum port port = intel_dig_port->port;
 	int ret;
@@ -1200,7 +1202,7 @@ intel_dp_aux_init(struct intel_dp *intel_dp, struct intel_connector *connector)
 	if (!intel_dp->aux.name)
 		return -ENOMEM;
 
-	intel_dp->aux.dev = dev->dev;
+	intel_dp->aux.dev = connector->base.kdev;
 	intel_dp->aux.transfer = intel_dp_aux_transfer;
 
 	DRM_DEBUG_KMS("registering %s bus for %s\n",
@@ -1215,16 +1217,6 @@ intel_dp_aux_init(struct intel_dp *intel_dp, struct intel_connector *connector)
 		return ret;
 	}
 
-	ret = sysfs_create_link(&connector->base.kdev->kobj,
-				&intel_dp->aux.ddc.dev.kobj,
-				intel_dp->aux.ddc.dev.kobj.name);
-	if (ret < 0) {
-		DRM_ERROR("sysfs_create_link() for %s failed (%d)\n",
-			  intel_dp->aux.name, ret);
-		intel_dp_aux_fini(intel_dp);
-		return ret;
-	}
-
 	return 0;
 }
 
@@ -1233,9 +1225,7 @@ intel_dp_connector_unregister(struct intel_connector *intel_connector)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(&intel_connector->base);
 
-	if (!intel_connector->mst_port)
-		sysfs_remove_link(&intel_connector->base.kdev->kobj,
-				  intel_dp->aux.ddc.dev.kobj.name);
+	intel_dp_aux_fini(intel_dp);
 	intel_connector_unregister(intel_connector);
 }
 
@@ -1812,12 +1802,21 @@ static void wait_panel_off(struct intel_dp *intel_dp)
 
 static void wait_panel_power_cycle(struct intel_dp *intel_dp)
 {
+	ktime_t panel_power_on_time;
+	s64 panel_power_off_duration;
+
 	DRM_DEBUG_KMS("Wait for panel power cycle\n");
 
+	/* take the difference of currrent time and panel power off time
+	 * and then make panel wait for t11_t12 if needed. */
+	panel_power_on_time = ktime_get_boottime();
+	panel_power_off_duration = ktime_ms_delta(panel_power_on_time, intel_dp->panel_power_off_time);
+
 	/* When we disable the VDD override bit last we have to do the manual
 	 * wait. */
-	wait_remaining_ms_from_jiffies(intel_dp->last_power_cycle,
-				       intel_dp->panel_power_cycle_delay);
+	if (panel_power_off_duration < (s64)intel_dp->panel_power_cycle_delay)
+		wait_remaining_ms_from_jiffies(jiffies,
+				       intel_dp->panel_power_cycle_delay - panel_power_off_duration);
 
 	wait_panel_status(intel_dp, IDLE_CYCLE_MASK, IDLE_CYCLE_VALUE);
 }
@@ -1969,7 +1968,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
 	I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg));
 
 	if ((pp & POWER_TARGET_ON) == 0)
-		intel_dp->last_power_cycle = jiffies;
+		intel_dp->panel_power_off_time = ktime_get_boottime();
 
 	power_domain = intel_display_port_aux_power_domain(intel_encoder);
 	intel_display_power_put(dev_priv, power_domain);
@@ -2118,7 +2117,7 @@ static void edp_panel_off(struct intel_dp *intel_dp)
 	I915_WRITE(pp_ctrl_reg, pp);
 	POSTING_READ(pp_ctrl_reg);
 
-	intel_dp->last_power_cycle = jiffies;
+	intel_dp->panel_power_off_time = ktime_get_boottime();
 	wait_panel_off(intel_dp);
 
 	/* We got a reference when we enabled the VDD. */
@@ -2243,11 +2242,6 @@ static void intel_edp_backlight_power(struct intel_connector *connector,
 		_intel_edp_backlight_off(intel_dp);
 }
 
-static const char *state_string(bool enabled)
-{
-	return enabled ? "on" : "off";
-}
-
 static void assert_dp_port(struct intel_dp *intel_dp, bool state)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
@@ -2257,7 +2251,7 @@ static void assert_dp_port(struct intel_dp *intel_dp, bool state)
 	I915_STATE_WARN(cur_state != state,
 			"DP port %c state assertion failure (expected %s, current %s)\n",
 			port_name(dig_port->port),
-			state_string(state), state_string(cur_state));
+			onoff(state), onoff(cur_state));
 }
 #define assert_dp_port_disabled(d) assert_dp_port((d), false)
 
@@ -2267,7 +2261,7 @@ static void assert_edp_pll(struct drm_i915_private *dev_priv, bool state)
 
 	I915_STATE_WARN(cur_state != state,
 			"eDP PLL state assertion failure (expected %s, current %s)\n",
-			state_string(state), state_string(cur_state));
+			onoff(state), onoff(cur_state));
 }
 #define assert_edp_pll_enabled(d) assert_edp_pll((d), true)
 #define assert_edp_pll_disabled(d) assert_edp_pll((d), false)
@@ -4024,7 +4018,7 @@ static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp)
 	} while (--attempts && count);
 
 	if (attempts == 0) {
-		DRM_ERROR("TIMEOUT: Sink CRC counter is not zeroed\n");
+		DRM_DEBUG_KMS("TIMEOUT: Sink CRC counter is not zeroed after calculation is stopped\n");
 		ret = -ETIMEDOUT;
 	}
 
@@ -4564,7 +4558,7 @@ bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
 {
 	if (HAS_PCH_IBX(dev_priv))
 		return ibx_digital_port_connected(dev_priv, port);
-	if (HAS_PCH_SPLIT(dev_priv))
+	else if (HAS_PCH_SPLIT(dev_priv))
 		return cpt_digital_port_connected(dev_priv, port);
 	else if (IS_BROXTON(dev_priv))
 		return bxt_digital_port_connected(dev_priv, port);
@@ -4884,7 +4878,6 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 
-	intel_dp_aux_fini(intel_dp);
 	intel_dp_mst_encoder_cleanup(intel_dig_port);
 	if (is_edp(intel_dp)) {
 		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
@@ -5132,7 +5125,7 @@ intel_dp_add_properties(struct intel_dp *intel_dp, struct drm_connector *connect
 
 static void intel_dp_init_panel_power_timestamps(struct intel_dp *intel_dp)
 {
-	intel_dp->last_power_cycle = jiffies;
+	intel_dp->panel_power_off_time = ktime_get_boottime();
 	intel_dp->last_power_on = jiffies;
 	intel_dp->last_backlight_off = jiffies;
 }
@@ -5849,6 +5842,11 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 	enum port port = intel_dig_port->port;
 	int type, ret;
 
+	if (WARN(intel_dig_port->max_lanes < 1,
+		 "Not enough lanes (%d) for DP on port %c\n",
+		 intel_dig_port->max_lanes, port_name(port)))
+		return false;
+
 	intel_dp->pps_pipe = INVALID_PIPE;
 
 	/* intel_dp vfuncs */
@@ -6046,6 +6044,7 @@ intel_dp_init(struct drm_device *dev,
 
 	intel_dig_port->port = port;
 	intel_dig_port->dp.output_reg = output_reg;
+	intel_dig_port->max_lanes = 4;
 
 	intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
 	if (IS_CHERRYVIEW(dev)) {
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index fa0dabf578dc..a2bd698fe2f7 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -184,7 +184,9 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder)
 	intel_mst->port = found->port;
 
 	if (intel_dp->active_mst_links == 0) {
-		intel_ddi_clk_select(encoder, intel_crtc->config);
+		intel_prepare_ddi_buffer(&intel_dig_port->base);
+
+		intel_ddi_clk_select(&intel_dig_port->base, intel_crtc->config);
 
 		intel_dp_set_link_params(intel_dp, intel_crtc->config);
 
@@ -369,6 +371,8 @@ static enum drm_mode_status
 intel_dp_mst_mode_valid(struct drm_connector *connector,
 			struct drm_display_mode *mode)
 {
+	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
+
 	/* TODO - validate mode against available PBN for link */
 	if (mode->clock < 10000)
 		return MODE_CLOCK_LOW;
@@ -376,6 +380,9 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
 	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
 		return MODE_H_ILLEGAL;
 
+	if (mode->clock > max_dotclk)
+		return MODE_CLOCK_HIGH;
+
 	return MODE_OK;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index df7f3cb66056..4c027d69fac9 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -246,7 +246,18 @@ struct intel_atomic_state {
 	struct drm_atomic_state base;
 
 	unsigned int cdclk;
-	bool dpll_set;
+
+	/*
+	 * Calculated device cdclk, can be different from cdclk
+	 * only when all crtc's are DPMS off.
+	 */
+	unsigned int dev_cdclk;
+
+	bool dpll_set, modeset;
+
+	unsigned int active_crtcs;
+	unsigned int min_pixclk[I915_MAX_PIPES];
+
 	struct intel_shared_dpll_config shared_dpll[I915_NUM_PLLS];
 	struct intel_wm_config wm_config;
 };
@@ -368,6 +379,7 @@ struct intel_crtc_state {
 	bool update_pipe; /* can a fast modeset be performed? */
 	bool disable_cxsr;
 	bool wm_changed; /* watermarks are updated */
+	bool fb_changed; /* fb on any of the planes is changed */
 
 	/* Pipe source size (ie. panel fitter input size)
 	 * All planes will be positioned inside this space,
@@ -481,6 +493,8 @@ struct intel_crtc_state {
 
 	bool ips_enabled;
 
+	bool enable_fbc;
+
 	bool double_wide;
 
 	bool dp_encoder_is_mst;
@@ -531,16 +545,13 @@ struct intel_mmio_flip {
  */
 struct intel_crtc_atomic_commit {
 	/* Sleepable operations to perform before commit */
-	bool disable_fbc;
-	bool disable_ips;
-	bool pre_disable_primary;
 
 	/* Sleepable operations to perform after commit */
 	unsigned fb_bits;
-	bool wait_vblank;
-	bool update_fbc;
 	bool post_enable_primary;
-	unsigned update_sprite_watermarks;
+
+	/* Sleepable operations to perform before and after commit */
+	bool update_fbc;
 };
 
 struct intel_crtc {
@@ -564,7 +575,7 @@ struct intel_crtc {
 	/* Display surface base address adjustement for pageflips. Note that on
 	 * gen4+ this only adjusts up to a tile, offsets within a tile are
 	 * handled in the hw itself (with the TILEOFF register). */
-	unsigned long dspaddr_offset;
+	u32 dspaddr_offset;
 	int adjusted_x;
 	int adjusted_y;
 
@@ -647,23 +658,17 @@ struct intel_plane {
 	/*
 	 * NOTE: Do not place new plane state fields here (e.g., when adding
 	 * new plane properties).  New runtime state should now be placed in
-	 * the intel_plane_state structure and accessed via drm_plane->state.
+	 * the intel_plane_state structure and accessed via plane_state.
 	 */
 
 	void (*update_plane)(struct drm_plane *plane,
-			     struct drm_crtc *crtc,
-			     struct drm_framebuffer *fb,
-			     int crtc_x, int crtc_y,
-			     unsigned int crtc_w, unsigned int crtc_h,
-			     uint32_t x, uint32_t y,
-			     uint32_t src_w, uint32_t src_h);
+			     const struct intel_crtc_state *crtc_state,
+			     const struct intel_plane_state *plane_state);
 	void (*disable_plane)(struct drm_plane *plane,
 			      struct drm_crtc *crtc);
 	int (*check_plane)(struct drm_plane *plane,
 			   struct intel_crtc_state *crtc_state,
 			   struct intel_plane_state *state);
-	void (*commit_plane)(struct drm_plane *plane,
-			     struct intel_plane_state *state);
 };
 
 struct intel_watermark_params {
@@ -765,9 +770,9 @@ struct intel_dp {
 	int backlight_off_delay;
 	struct delayed_work panel_vdd_work;
 	bool want_panel_vdd;
-	unsigned long last_power_cycle;
 	unsigned long last_power_on;
 	unsigned long last_backlight_off;
+	ktime_t panel_power_off_time;
 
 	struct notifier_block edp_notifier;
 
@@ -817,6 +822,7 @@ struct intel_digital_port {
 	struct intel_hdmi hdmi;
 	enum irqreturn (*hpd_pulse)(struct intel_digital_port *, bool);
 	bool release_cl2_override;
+	uint8_t max_lanes;
 	/* for communication with audio component; protected by av_mutex */
 	const struct drm_connector *audio_connector;
 };
@@ -903,9 +909,7 @@ struct intel_unpin_work {
 };
 
 struct intel_load_detect_pipe {
-	struct drm_framebuffer *release_fb;
-	bool load_detect_temp;
-	int dpms_mode;
+	struct drm_atomic_state *restore_state;
 };
 
 static inline struct intel_encoder *
@@ -988,6 +992,8 @@ static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv)
 int intel_get_crtc_scanline(struct intel_crtc *crtc);
 void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv,
 				     unsigned int pipe_mask);
+void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv,
+				     unsigned int pipe_mask);
 
 /* intel_crt.c */
 void intel_crt_init(struct drm_device *dev);
@@ -996,7 +1002,7 @@ void intel_crt_init(struct drm_device *dev);
 /* intel_ddi.c */
 void intel_ddi_clk_select(struct intel_encoder *encoder,
 			  const struct intel_crtc_state *pipe_config);
-void intel_prepare_ddi(struct drm_device *dev);
+void intel_prepare_ddi_buffer(struct intel_encoder *encoder);
 void hsw_fdi_link_train(struct drm_crtc *crtc);
 void intel_ddi_init(struct drm_device *dev, enum port port);
 enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder);
@@ -1041,8 +1047,8 @@ unsigned int intel_fb_align_height(struct drm_device *dev,
 				   uint64_t fb_format_modifier);
 void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire,
 			enum fb_op_origin origin);
-u32 intel_fb_stride_alignment(struct drm_device *dev, uint64_t fb_modifier,
-			      uint32_t pixel_format);
+u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
+			      uint64_t fb_modifier, uint32_t pixel_format);
 
 /* intel_audio.c */
 void intel_init_audio(struct drm_device *dev);
@@ -1126,9 +1132,8 @@ int intel_plane_atomic_set_property(struct drm_plane *plane,
 int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
 				    struct drm_plane_state *plane_state);
 
-unsigned int
-intel_tile_height(struct drm_device *dev, uint32_t pixel_format,
-		  uint64_t fb_format_modifier, unsigned int plane);
+unsigned int intel_tile_height(const struct drm_i915_private *dev_priv,
+			       uint64_t fb_modifier, unsigned int cpp);
 
 static inline bool
 intel_rotation_90_or_270(unsigned int rotation)
@@ -1149,8 +1154,8 @@ void assert_shared_dpll(struct drm_i915_private *dev_priv,
 struct intel_shared_dpll *intel_get_shared_dpll(struct intel_crtc *crtc,
 						struct intel_crtc_state *state);
 
-void vlv_force_pll_on(struct drm_device *dev, enum pipe pipe,
-		      const struct dpll *dpll);
+int vlv_force_pll_on(struct drm_device *dev, enum pipe pipe,
+		     const struct dpll *dpll);
 void vlv_force_pll_off(struct drm_device *dev, enum pipe pipe);
 
 /* modesetting asserts */
@@ -1167,11 +1172,11 @@ void assert_fdi_rx_pll(struct drm_i915_private *dev_priv,
 void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state);
 #define assert_pipe_enabled(d, p) assert_pipe(d, p, true)
 #define assert_pipe_disabled(d, p) assert_pipe(d, p, false)
-unsigned long intel_gen4_compute_page_offset(struct drm_i915_private *dev_priv,
-					     int *x, int *y,
-					     unsigned int tiling_mode,
-					     unsigned int bpp,
-					     unsigned int pitch);
+u32 intel_compute_tile_offset(struct drm_i915_private *dev_priv,
+			      int *x, int *y,
+			      uint64_t fb_modifier,
+			      unsigned int cpp,
+			      unsigned int pitch);
 void intel_prepare_reset(struct drm_device *dev);
 void intel_finish_reset(struct drm_device *dev);
 void hsw_enable_pc8(struct drm_i915_private *dev_priv);
@@ -1207,7 +1212,6 @@ enum intel_display_power_domain
 intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder);
 void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 				 struct intel_crtc_state *pipe_config);
-void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file);
 
 int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
 int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state);
@@ -1222,7 +1226,7 @@ u32 skl_plane_ctl_rotation(unsigned int rotation);
 
 /* intel_csr.c */
 void intel_csr_ucode_init(struct drm_i915_private *);
-void intel_csr_load_program(struct drm_i915_private *);
+bool intel_csr_load_program(struct drm_i915_private *);
 void intel_csr_ucode_fini(struct drm_i915_private *);
 
 /* intel_dp.c */
@@ -1323,13 +1327,16 @@ static inline void intel_fbdev_restore_mode(struct drm_device *dev)
 #endif
 
 /* intel_fbc.c */
+void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv,
+			   struct drm_atomic_state *state);
 bool intel_fbc_is_active(struct drm_i915_private *dev_priv);
-void intel_fbc_deactivate(struct intel_crtc *crtc);
-void intel_fbc_update(struct intel_crtc *crtc);
+void intel_fbc_pre_update(struct intel_crtc *crtc);
+void intel_fbc_post_update(struct intel_crtc *crtc);
 void intel_fbc_init(struct drm_i915_private *dev_priv);
+void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv);
 void intel_fbc_enable(struct intel_crtc *crtc);
-void intel_fbc_disable(struct drm_i915_private *dev_priv);
-void intel_fbc_disable_crtc(struct intel_crtc *crtc);
+void intel_fbc_disable(struct intel_crtc *crtc);
+void intel_fbc_global_disable(struct drm_i915_private *dev_priv);
 void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
 			  unsigned int frontbuffer_bits,
 			  enum fb_op_origin origin);
@@ -1558,6 +1565,7 @@ void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 			  struct skl_ddb_allocation *ddb /* out */);
 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config);
+int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6);
 
 /* intel_sdvo.c */
 bool intel_sdvo_init(struct drm_device *dev,
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 0193c62a53ef..01b8e9f4c272 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -478,8 +478,8 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder)
 
 	DRM_DEBUG_KMS("\n");
 
-	intel_dsi_prepare(encoder);
 	intel_enable_dsi_pll(encoder);
+	intel_dsi_prepare(encoder);
 
 	/* Panel Enable over CRC PMIC */
 	if (intel_dsi->gpio_panel)
@@ -634,7 +634,6 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
-	u32 val;
 
 	DRM_DEBUG_KMS("\n");
 
@@ -642,9 +641,13 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder)
 
 	intel_dsi_clear_device_ready(encoder);
 
-	val = I915_READ(DSPCLK_GATE_D);
-	val &= ~DPOUNIT_CLOCK_GATE_DISABLE;
-	I915_WRITE(DSPCLK_GATE_D, val);
+	if (!IS_BROXTON(dev_priv)) {
+		u32 val;
+
+		val = I915_READ(DSPCLK_GATE_D);
+		val &= ~DPOUNIT_CLOCK_GATE_DISABLE;
+		I915_WRITE(DSPCLK_GATE_D, val);
+	}
 
 	drm_panel_unprepare(intel_dsi->panel);
 
@@ -709,7 +712,7 @@ out:
 static void intel_dsi_get_config(struct intel_encoder *encoder,
 				 struct intel_crtc_state *pipe_config)
 {
-	u32 pclk = 0;
+	u32 pclk;
 	DRM_DEBUG_KMS("\n");
 
 	pipe_config->has_dsi_encoder = true;
@@ -720,12 +723,7 @@ static void intel_dsi_get_config(struct intel_encoder *encoder,
 	 */
 	pipe_config->dpll_hw_state.dpll_md = 0;
 
-	if (IS_BROXTON(encoder->base.dev))
-		pclk = bxt_get_dsi_pclk(encoder, pipe_config->pipe_bpp);
-	else if (IS_VALLEYVIEW(encoder->base.dev) ||
-		 IS_CHERRYVIEW(encoder->base.dev))
-		pclk = vlv_get_dsi_pclk(encoder, pipe_config->pipe_bpp);
-
+	pclk = intel_dsi_get_pclk(encoder, pipe_config->pipe_bpp);
 	if (!pclk)
 		return;
 
@@ -787,10 +785,9 @@ static void set_dsi_timings(struct drm_encoder *encoder,
 {
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	enum port port;
-	unsigned int bpp = intel_crtc->config->pipe_bpp;
+	unsigned int bpp = dsi_pixel_format_bpp(intel_dsi->pixel_format);
 	unsigned int lane_count = intel_dsi->lane_count;
 
 	u16 hactive, hfp, hsync, hbp, vfp, vsync, vbp;
@@ -861,7 +858,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder)
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
 	const struct drm_display_mode *adjusted_mode = &intel_crtc->config->base.adjusted_mode;
 	enum port port;
-	unsigned int bpp = intel_crtc->config->pipe_bpp;
+	unsigned int bpp = dsi_pixel_format_bpp(intel_dsi->pixel_format);
 	u32 val, tmp;
 	u16 mode_hdisplay;
 
diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h
index 02551ff228c2..92f39227b361 100644
--- a/drivers/gpu/drm/i915/intel_dsi.h
+++ b/drivers/gpu/drm/i915/intel_dsi.h
@@ -34,6 +34,8 @@
 #define DSI_DUAL_LINK_FRONT_BACK	1
 #define DSI_DUAL_LINK_PIXEL_ALT		2
 
+int dsi_pixel_format_bpp(int pixel_format);
+
 struct intel_dsi_host;
 
 struct intel_dsi {
@@ -126,8 +128,7 @@ static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder)
 
 extern void intel_enable_dsi_pll(struct intel_encoder *encoder);
 extern void intel_disable_dsi_pll(struct intel_encoder *encoder);
-extern u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp);
-extern u32 bxt_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp);
+extern u32 intel_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp);
 extern void intel_dsi_reset_clocks(struct intel_encoder *encoder,
 							enum port port);
 
diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
index e8113ad65477..7f145b4fec6a 100644
--- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
@@ -234,28 +234,33 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
 	if (!gtable[gpio].init) {
 		/* program the function */
 		/* FIXME: remove constant below */
-		vlv_gpio_nc_write(dev_priv, function, 0x2000CC00);
+		vlv_iosf_sb_write(dev_priv, IOSF_PORT_GPIO_NC, function,
+				  0x2000CC00);
 		gtable[gpio].init = 1;
 	}
 
 	val = 0x4 | action;
 
 	/* pull up/down */
-	vlv_gpio_nc_write(dev_priv, pad, val);
+	vlv_iosf_sb_write(dev_priv, IOSF_PORT_GPIO_NC, pad, val);
 	mutex_unlock(&dev_priv->sb_lock);
 
 out:
 	return data;
 }
 
+static const u8 *mipi_exec_i2c_skip(struct intel_dsi *intel_dsi, const u8 *data)
+{
+	return data + *(data + 6) + 7;
+}
+
 typedef const u8 * (*fn_mipi_elem_exec)(struct intel_dsi *intel_dsi,
 					const u8 *data);
 static const fn_mipi_elem_exec exec_elem[] = {
-	NULL, /* reserved */
-	mipi_exec_send_packet,
-	mipi_exec_delay,
-	mipi_exec_gpio,
-	NULL, /* status read; later */
+	[MIPI_SEQ_ELEM_SEND_PKT] = mipi_exec_send_packet,
+	[MIPI_SEQ_ELEM_DELAY] = mipi_exec_delay,
+	[MIPI_SEQ_ELEM_GPIO] = mipi_exec_gpio,
+	[MIPI_SEQ_ELEM_I2C] = mipi_exec_i2c_skip,
 };
 
 /*
@@ -265,107 +270,114 @@ static const fn_mipi_elem_exec exec_elem[] = {
  */
 
 static const char * const seq_name[] = {
-	"UNDEFINED",
-	"MIPI_SEQ_ASSERT_RESET",
-	"MIPI_SEQ_INIT_OTP",
-	"MIPI_SEQ_DISPLAY_ON",
-	"MIPI_SEQ_DISPLAY_OFF",
-	"MIPI_SEQ_DEASSERT_RESET"
+	[MIPI_SEQ_ASSERT_RESET] = "MIPI_SEQ_ASSERT_RESET",
+	[MIPI_SEQ_INIT_OTP] = "MIPI_SEQ_INIT_OTP",
+	[MIPI_SEQ_DISPLAY_ON] = "MIPI_SEQ_DISPLAY_ON",
+	[MIPI_SEQ_DISPLAY_OFF]  = "MIPI_SEQ_DISPLAY_OFF",
+	[MIPI_SEQ_DEASSERT_RESET] = "MIPI_SEQ_DEASSERT_RESET",
+	[MIPI_SEQ_BACKLIGHT_ON] = "MIPI_SEQ_BACKLIGHT_ON",
+	[MIPI_SEQ_BACKLIGHT_OFF] = "MIPI_SEQ_BACKLIGHT_OFF",
+	[MIPI_SEQ_TEAR_ON] = "MIPI_SEQ_TEAR_ON",
+	[MIPI_SEQ_TEAR_OFF] = "MIPI_SEQ_TEAR_OFF",
+	[MIPI_SEQ_POWER_ON] = "MIPI_SEQ_POWER_ON",
+	[MIPI_SEQ_POWER_OFF] = "MIPI_SEQ_POWER_OFF",
 };
 
-static void generic_exec_sequence(struct intel_dsi *intel_dsi, const u8 *data)
+static const char *sequence_name(enum mipi_seq seq_id)
+{
+	if (seq_id < ARRAY_SIZE(seq_name) && seq_name[seq_id])
+		return seq_name[seq_id];
+	else
+		return "(unknown)";
+}
+
+static void generic_exec_sequence(struct drm_panel *panel, enum mipi_seq seq_id)
 {
+	struct vbt_panel *vbt_panel = to_vbt_panel(panel);
+	struct intel_dsi *intel_dsi = vbt_panel->intel_dsi;
+	struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev);
+	const u8 *data;
 	fn_mipi_elem_exec mipi_elem_exec;
-	int index;
 
-	if (!data)
+	if (WARN_ON(seq_id >= ARRAY_SIZE(dev_priv->vbt.dsi.sequence)))
 		return;
 
-	DRM_DEBUG_DRIVER("Starting MIPI sequence - %s\n", seq_name[*data]);
+	data = dev_priv->vbt.dsi.sequence[seq_id];
+	if (!data) {
+		DRM_DEBUG_KMS("MIPI sequence %d - %s not available\n",
+			      seq_id, sequence_name(seq_id));
+		return;
+	}
 
-	/* go to the first element of the sequence */
-	data++;
+	WARN_ON(*data != seq_id);
 
-	/* parse each byte till we reach end of sequence byte - 0x00 */
-	while (1) {
-		index = *data;
-		mipi_elem_exec = exec_elem[index];
-		if (!mipi_elem_exec) {
-			DRM_ERROR("Unsupported MIPI element, skipping sequence execution\n");
-			return;
-		}
+	DRM_DEBUG_KMS("Starting MIPI sequence %d - %s\n",
+		      seq_id, sequence_name(seq_id));
 
-		/* goto element payload */
-		data++;
+	/* Skip Sequence Byte. */
+	data++;
 
-		/* execute the element specific rotines */
-		data = mipi_elem_exec(intel_dsi, data);
+	/* Skip Size of Sequence. */
+	if (dev_priv->vbt.dsi.seq_version >= 3)
+		data += 4;
 
-		/*
-		 * After processing the element, data should point to
-		 * next element or end of sequence
-		 * check if have we reached end of sequence
-		 */
-		if (*data == 0x00)
+	while (1) {
+		u8 operation_byte = *data++;
+		u8 operation_size = 0;
+
+		if (operation_byte == MIPI_SEQ_ELEM_END)
 			break;
+
+		if (operation_byte < ARRAY_SIZE(exec_elem))
+			mipi_elem_exec = exec_elem[operation_byte];
+		else
+			mipi_elem_exec = NULL;
+
+		/* Size of Operation. */
+		if (dev_priv->vbt.dsi.seq_version >= 3)
+			operation_size = *data++;
+
+		if (mipi_elem_exec) {
+			data = mipi_elem_exec(intel_dsi, data);
+		} else if (operation_size) {
+			/* We have size, skip. */
+			DRM_DEBUG_KMS("Unsupported MIPI operation byte %u\n",
+				      operation_byte);
+			data += operation_size;
+		} else {
+			/* No size, can't skip without parsing. */
+			DRM_ERROR("Unsupported MIPI operation byte %u\n",
+				  operation_byte);
+			return;
+		}
 	}
 }
 
 static int vbt_panel_prepare(struct drm_panel *panel)
 {
-	struct vbt_panel *vbt_panel = to_vbt_panel(panel);
-	struct intel_dsi *intel_dsi = vbt_panel->intel_dsi;
-	struct drm_device *dev = intel_dsi->base.base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const u8 *sequence;
-
-	sequence = dev_priv->vbt.dsi.sequence[MIPI_SEQ_ASSERT_RESET];
-	generic_exec_sequence(intel_dsi, sequence);
-
-	sequence = dev_priv->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
-	generic_exec_sequence(intel_dsi, sequence);
+	generic_exec_sequence(panel, MIPI_SEQ_ASSERT_RESET);
+	generic_exec_sequence(panel, MIPI_SEQ_INIT_OTP);
 
 	return 0;
 }
 
 static int vbt_panel_unprepare(struct drm_panel *panel)
 {
-	struct vbt_panel *vbt_panel = to_vbt_panel(panel);
-	struct intel_dsi *intel_dsi = vbt_panel->intel_dsi;
-	struct drm_device *dev = intel_dsi->base.base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const u8 *sequence;
-
-	sequence = dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET];
-	generic_exec_sequence(intel_dsi, sequence);
+	generic_exec_sequence(panel, MIPI_SEQ_DEASSERT_RESET);
 
 	return 0;
 }
 
 static int vbt_panel_enable(struct drm_panel *panel)
 {
-	struct vbt_panel *vbt_panel = to_vbt_panel(panel);
-	struct intel_dsi *intel_dsi = vbt_panel->intel_dsi;
-	struct drm_device *dev = intel_dsi->base.base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const u8 *sequence;
-
-	sequence = dev_priv->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_ON];
-	generic_exec_sequence(intel_dsi, sequence);
+	generic_exec_sequence(panel, MIPI_SEQ_DISPLAY_ON);
 
 	return 0;
 }
 
 static int vbt_panel_disable(struct drm_panel *panel)
 {
-	struct vbt_panel *vbt_panel = to_vbt_panel(panel);
-	struct intel_dsi *intel_dsi = vbt_panel->intel_dsi;
-	struct drm_device *dev = intel_dsi->base.base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const u8 *sequence;
-
-	sequence = dev_priv->vbt.dsi.sequence[MIPI_SEQ_DISPLAY_OFF];
-	generic_exec_sequence(intel_dsi, sequence);
+	generic_exec_sequence(panel, MIPI_SEQ_DISPLAY_OFF);
 
 	return 0;
 }
@@ -428,10 +440,7 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id)
 	intel_dsi->dual_link = mipi_config->dual_link;
 	intel_dsi->pixel_overlap = mipi_config->pixel_overlap;
 
-	if (intel_dsi->pixel_format == VID_MODE_FORMAT_RGB666)
-		bits_per_pixel = 18;
-	else if (intel_dsi->pixel_format == VID_MODE_FORMAT_RGB565)
-		bits_per_pixel = 16;
+	bits_per_pixel = dsi_pixel_format_bpp(intel_dsi->pixel_format);
 
 	intel_dsi->operation_mode = mipi_config->is_cmd_mode;
 	intel_dsi->video_mode_format = mipi_config->video_transfer_mode;
@@ -685,6 +694,8 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id)
 
 	/* This is cheating a bit with the cleanup. */
 	vbt_panel = devm_kzalloc(dev->dev, sizeof(*vbt_panel), GFP_KERNEL);
+	if (!vbt_panel)
+		return NULL;
 
 	vbt_panel->intel_dsi = intel_dsi;
 	drm_panel_init(&vbt_panel->panel);
diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c
index fbd2b51810ca..70883c54cb0a 100644
--- a/drivers/gpu/drm/i915/intel_dsi_pll.c
+++ b/drivers/gpu/drm/i915/intel_dsi_pll.c
@@ -30,15 +30,7 @@
 #include "i915_drv.h"
 #include "intel_dsi.h"
 
-#define DSI_HSS_PACKET_SIZE		4
-#define DSI_HSE_PACKET_SIZE		4
-#define DSI_HSA_PACKET_EXTRA_SIZE	6
-#define DSI_HBP_PACKET_EXTRA_SIZE	6
-#define DSI_HACTIVE_PACKET_EXTRA_SIZE	6
-#define DSI_HFP_PACKET_EXTRA_SIZE	6
-#define DSI_EOTP_PACKET_SIZE		4
-
-static int dsi_pixel_format_bpp(int pixel_format)
+int dsi_pixel_format_bpp(int pixel_format)
 {
 	int bpp;
 
@@ -71,77 +63,6 @@ static const u32 lfsr_converts[] = {
 	71, 35, 273, 136, 324, 418, 465, 488, 500, 506		/* 91 - 100 */
 };
 
-#ifdef DSI_CLK_FROM_RR
-
-static u32 dsi_rr_formula(const struct drm_display_mode *mode,
-			  int pixel_format, int video_mode_format,
-			  int lane_count, bool eotp)
-{
-	u32 bpp;
-	u32 hactive, vactive, hfp, hsync, hbp, vfp, vsync, vbp;
-	u32 hsync_bytes, hbp_bytes, hactive_bytes, hfp_bytes;
-	u32 bytes_per_line, bytes_per_frame;
-	u32 num_frames;
-	u32 bytes_per_x_frames, bytes_per_x_frames_x_lanes;
-	u32 dsi_bit_clock_hz;
-	u32 dsi_clk;
-
-	bpp = dsi_pixel_format_bpp(pixel_format);
-
-	hactive = mode->hdisplay;
-	vactive = mode->vdisplay;
-	hfp = mode->hsync_start - mode->hdisplay;
-	hsync = mode->hsync_end - mode->hsync_start;
-	hbp = mode->htotal - mode->hsync_end;
-
-	vfp = mode->vsync_start - mode->vdisplay;
-	vsync = mode->vsync_end - mode->vsync_start;
-	vbp = mode->vtotal - mode->vsync_end;
-
-	hsync_bytes = DIV_ROUND_UP(hsync * bpp, 8);
-	hbp_bytes = DIV_ROUND_UP(hbp * bpp, 8);
-	hactive_bytes = DIV_ROUND_UP(hactive * bpp, 8);
-	hfp_bytes = DIV_ROUND_UP(hfp * bpp, 8);
-
-	bytes_per_line = DSI_HSS_PACKET_SIZE + hsync_bytes +
-		DSI_HSA_PACKET_EXTRA_SIZE + DSI_HSE_PACKET_SIZE +
-		hbp_bytes + DSI_HBP_PACKET_EXTRA_SIZE +
-		hactive_bytes + DSI_HACTIVE_PACKET_EXTRA_SIZE +
-		hfp_bytes + DSI_HFP_PACKET_EXTRA_SIZE;
-
-	/*
-	 * XXX: Need to accurately calculate LP to HS transition timeout and add
-	 * it to bytes_per_line/bytes_per_frame.
-	 */
-
-	if (eotp && video_mode_format == VIDEO_MODE_BURST)
-		bytes_per_line += DSI_EOTP_PACKET_SIZE;
-
-	bytes_per_frame = vsync * bytes_per_line + vbp * bytes_per_line +
-		vactive * bytes_per_line + vfp * bytes_per_line;
-
-	if (eotp &&
-	    (video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE ||
-	     video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS))
-		bytes_per_frame += DSI_EOTP_PACKET_SIZE;
-
-	num_frames = drm_mode_vrefresh(mode);
-	bytes_per_x_frames = num_frames * bytes_per_frame;
-
-	bytes_per_x_frames_x_lanes = bytes_per_x_frames / lane_count;
-
-	/* the dsi clock is divided by 2 in the hardware to get dsi ddr clock */
-	dsi_bit_clock_hz = bytes_per_x_frames_x_lanes * 8;
-	dsi_clk = dsi_bit_clock_hz / 1000;
-
-	if (eotp && video_mode_format == VIDEO_MODE_BURST)
-		dsi_clk *= 2;
-
-	return dsi_clk;
-}
-
-#else
-
 /* Get DSI clock from pixel clock */
 static u32 dsi_clk_from_pclk(u32 pclk, int pixel_format, int lane_count)
 {
@@ -155,8 +76,6 @@ static u32 dsi_clk_from_pclk(u32 pclk, int pixel_format, int lane_count)
 	return dsi_clk_khz;
 }
 
-#endif
-
 static int dsi_calc_mnp(struct drm_i915_private *dev_priv,
 			struct dsi_mnp *dsi_mnp, int target_dsi_clk)
 {
@@ -322,7 +241,7 @@ static void assert_bpp_mismatch(int pixel_format, int pipe_bpp)
 	     bpp, pipe_bpp);
 }
 
-u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
+static u32 vlv_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp)
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
@@ -384,7 +303,7 @@ u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
 	return pclk;
 }
 
-u32 bxt_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
+static u32 bxt_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp)
 {
 	u32 pclk;
 	u32 dsi_clk;
@@ -419,6 +338,14 @@ u32 bxt_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
 	return pclk;
 }
 
+u32 intel_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp)
+{
+	if (IS_BROXTON(encoder->base.dev))
+		return bxt_dsi_get_pclk(encoder, pipe_bpp);
+	else
+		return vlv_dsi_get_pclk(encoder, pipe_bpp);
+}
+
 static void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port)
 {
 	u32 temp;
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index a1988a486b92..0f0492f4a357 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -43,7 +43,7 @@
 
 static inline bool fbc_supported(struct drm_i915_private *dev_priv)
 {
-	return dev_priv->fbc.activate != NULL;
+	return HAS_FBC(dev_priv);
 }
 
 static inline bool fbc_on_pipe_a_only(struct drm_i915_private *dev_priv)
@@ -56,6 +56,11 @@ static inline bool fbc_on_plane_a_only(struct drm_i915_private *dev_priv)
 	return INTEL_INFO(dev_priv)->gen < 4;
 }
 
+static inline bool no_fbc_on_multiple_pipes(struct drm_i915_private *dev_priv)
+{
+	return INTEL_INFO(dev_priv)->gen <= 3;
+}
+
 /*
  * In some platforms where the CRTC's x:0/y:0 coordinates doesn't match the
  * frontbuffer's x:0/y:0 coordinates we lie to the hardware about the plane's
@@ -74,19 +79,17 @@ static unsigned int get_crtc_fence_y_offset(struct intel_crtc *crtc)
  * write to the PLANE_SIZE register. For BDW-, the hardware looks at the value
  * we wrote to PIPESRC.
  */
-static void intel_fbc_get_plane_source_size(struct intel_crtc *crtc,
+static void intel_fbc_get_plane_source_size(struct intel_fbc_state_cache *cache,
 					    int *width, int *height)
 {
-	struct intel_plane_state *plane_state =
-			to_intel_plane_state(crtc->base.primary->state);
 	int w, h;
 
-	if (intel_rotation_90_or_270(plane_state->base.rotation)) {
-		w = drm_rect_height(&plane_state->src) >> 16;
-		h = drm_rect_width(&plane_state->src) >> 16;
+	if (intel_rotation_90_or_270(cache->plane.rotation)) {
+		w = cache->plane.src_h;
+		h = cache->plane.src_w;
 	} else {
-		w = drm_rect_width(&plane_state->src) >> 16;
-		h = drm_rect_height(&plane_state->src) >> 16;
+		w = cache->plane.src_w;
+		h = cache->plane.src_h;
 	}
 
 	if (width)
@@ -95,26 +98,23 @@ static void intel_fbc_get_plane_source_size(struct intel_crtc *crtc,
 		*height = h;
 }
 
-static int intel_fbc_calculate_cfb_size(struct intel_crtc *crtc,
-					struct drm_framebuffer *fb)
+static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv,
+					struct intel_fbc_state_cache *cache)
 {
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	int lines;
 
-	intel_fbc_get_plane_source_size(crtc, NULL, &lines);
+	intel_fbc_get_plane_source_size(cache, NULL, &lines);
 	if (INTEL_INFO(dev_priv)->gen >= 7)
 		lines = min(lines, 2048);
 
 	/* Hardware needs the full buffer stride, not just the active area. */
-	return lines * fb->pitches[0];
+	return lines * cache->fb.stride;
 }
 
 static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
 	u32 fbc_ctl;
 
-	dev_priv->fbc.active = false;
-
 	/* Disable compression */
 	fbc_ctl = I915_READ(FBC_CONTROL);
 	if ((fbc_ctl & FBC_CTL_EN) == 0)
@@ -130,21 +130,17 @@ static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv)
 	}
 }
 
-static void i8xx_fbc_activate(struct intel_crtc *crtc)
+static void i8xx_fbc_activate(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct drm_framebuffer *fb = crtc->base.primary->fb;
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	struct intel_fbc_reg_params *params = &dev_priv->fbc.params;
 	int cfb_pitch;
 	int i;
 	u32 fbc_ctl;
 
-	dev_priv->fbc.active = true;
-
 	/* Note: fbc.threshold == 1 for i8xx */
-	cfb_pitch = intel_fbc_calculate_cfb_size(crtc, fb) / FBC_LL_SIZE;
-	if (fb->pitches[0] < cfb_pitch)
-		cfb_pitch = fb->pitches[0];
+	cfb_pitch = params->cfb_size / FBC_LL_SIZE;
+	if (params->fb.stride < cfb_pitch)
+		cfb_pitch = params->fb.stride;
 
 	/* FBC_CTL wants 32B or 64B units */
 	if (IS_GEN2(dev_priv))
@@ -161,9 +157,9 @@ static void i8xx_fbc_activate(struct intel_crtc *crtc)
 
 		/* Set it up... */
 		fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE;
-		fbc_ctl2 |= FBC_CTL_PLANE(crtc->plane);
+		fbc_ctl2 |= FBC_CTL_PLANE(params->crtc.plane);
 		I915_WRITE(FBC_CONTROL2, fbc_ctl2);
-		I915_WRITE(FBC_FENCE_OFF, get_crtc_fence_y_offset(crtc));
+		I915_WRITE(FBC_FENCE_OFF, params->crtc.fence_y_offset);
 	}
 
 	/* enable it... */
@@ -173,7 +169,7 @@ static void i8xx_fbc_activate(struct intel_crtc *crtc)
 	if (IS_I945GM(dev_priv))
 		fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */
 	fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
-	fbc_ctl |= obj->fence_reg;
+	fbc_ctl |= params->fb.fence_reg;
 	I915_WRITE(FBC_CONTROL, fbc_ctl);
 }
 
@@ -182,23 +178,19 @@ static bool i8xx_fbc_is_active(struct drm_i915_private *dev_priv)
 	return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
 }
 
-static void g4x_fbc_activate(struct intel_crtc *crtc)
+static void g4x_fbc_activate(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct drm_framebuffer *fb = crtc->base.primary->fb;
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	struct intel_fbc_reg_params *params = &dev_priv->fbc.params;
 	u32 dpfc_ctl;
 
-	dev_priv->fbc.active = true;
-
-	dpfc_ctl = DPFC_CTL_PLANE(crtc->plane) | DPFC_SR_EN;
-	if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
+	dpfc_ctl = DPFC_CTL_PLANE(params->crtc.plane) | DPFC_SR_EN;
+	if (drm_format_plane_cpp(params->fb.pixel_format, 0) == 2)
 		dpfc_ctl |= DPFC_CTL_LIMIT_2X;
 	else
 		dpfc_ctl |= DPFC_CTL_LIMIT_1X;
-	dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg;
+	dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg;
 
-	I915_WRITE(DPFC_FENCE_YOFF, get_crtc_fence_y_offset(crtc));
+	I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset);
 
 	/* enable it... */
 	I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
@@ -208,8 +200,6 @@ static void g4x_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
 	u32 dpfc_ctl;
 
-	dev_priv->fbc.active = false;
-
 	/* Disable compression */
 	dpfc_ctl = I915_READ(DPFC_CONTROL);
 	if (dpfc_ctl & DPFC_CTL_EN) {
@@ -230,19 +220,14 @@ static void intel_fbc_recompress(struct drm_i915_private *dev_priv)
 	POSTING_READ(MSG_FBC_REND_STATE);
 }
 
-static void ilk_fbc_activate(struct intel_crtc *crtc)
+static void ilk_fbc_activate(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct drm_framebuffer *fb = crtc->base.primary->fb;
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	struct intel_fbc_reg_params *params = &dev_priv->fbc.params;
 	u32 dpfc_ctl;
 	int threshold = dev_priv->fbc.threshold;
-	unsigned int y_offset;
 
-	dev_priv->fbc.active = true;
-
-	dpfc_ctl = DPFC_CTL_PLANE(crtc->plane);
-	if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
+	dpfc_ctl = DPFC_CTL_PLANE(params->crtc.plane);
+	if (drm_format_plane_cpp(params->fb.pixel_format, 0) == 2)
 		threshold++;
 
 	switch (threshold) {
@@ -259,18 +244,17 @@ static void ilk_fbc_activate(struct intel_crtc *crtc)
 	}
 	dpfc_ctl |= DPFC_CTL_FENCE_EN;
 	if (IS_GEN5(dev_priv))
-		dpfc_ctl |= obj->fence_reg;
+		dpfc_ctl |= params->fb.fence_reg;
 
-	y_offset = get_crtc_fence_y_offset(crtc);
-	I915_WRITE(ILK_DPFC_FENCE_YOFF, y_offset);
-	I915_WRITE(ILK_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj) | ILK_FBC_RT_VALID);
+	I915_WRITE(ILK_DPFC_FENCE_YOFF, params->crtc.fence_y_offset);
+	I915_WRITE(ILK_FBC_RT_BASE, params->fb.ggtt_offset | ILK_FBC_RT_VALID);
 	/* enable it... */
 	I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
 
 	if (IS_GEN6(dev_priv)) {
 		I915_WRITE(SNB_DPFC_CTL_SA,
-			   SNB_CPU_FENCE_ENABLE | obj->fence_reg);
-		I915_WRITE(DPFC_CPU_FENCE_OFFSET, y_offset);
+			   SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
+		I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset);
 	}
 
 	intel_fbc_recompress(dev_priv);
@@ -280,8 +264,6 @@ static void ilk_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
 	u32 dpfc_ctl;
 
-	dev_priv->fbc.active = false;
-
 	/* Disable compression */
 	dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
 	if (dpfc_ctl & DPFC_CTL_EN) {
@@ -295,21 +277,17 @@ static bool ilk_fbc_is_active(struct drm_i915_private *dev_priv)
 	return I915_READ(ILK_DPFC_CONTROL) & DPFC_CTL_EN;
 }
 
-static void gen7_fbc_activate(struct intel_crtc *crtc)
+static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct drm_framebuffer *fb = crtc->base.primary->fb;
-	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	struct intel_fbc_reg_params *params = &dev_priv->fbc.params;
 	u32 dpfc_ctl;
 	int threshold = dev_priv->fbc.threshold;
 
-	dev_priv->fbc.active = true;
-
 	dpfc_ctl = 0;
 	if (IS_IVYBRIDGE(dev_priv))
-		dpfc_ctl |= IVB_DPFC_CTL_PLANE(crtc->plane);
+		dpfc_ctl |= IVB_DPFC_CTL_PLANE(params->crtc.plane);
 
-	if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
+	if (drm_format_plane_cpp(params->fb.pixel_format, 0) == 2)
 		threshold++;
 
 	switch (threshold) {
@@ -337,20 +315,60 @@ static void gen7_fbc_activate(struct intel_crtc *crtc)
 			   ILK_FBCQ_DIS);
 	} else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
 		/* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */
-		I915_WRITE(CHICKEN_PIPESL_1(crtc->pipe),
-			   I915_READ(CHICKEN_PIPESL_1(crtc->pipe)) |
+		I915_WRITE(CHICKEN_PIPESL_1(params->crtc.pipe),
+			   I915_READ(CHICKEN_PIPESL_1(params->crtc.pipe)) |
 			   HSW_FBCQ_DIS);
 	}
 
 	I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
 
 	I915_WRITE(SNB_DPFC_CTL_SA,
-		   SNB_CPU_FENCE_ENABLE | obj->fence_reg);
-	I915_WRITE(DPFC_CPU_FENCE_OFFSET, get_crtc_fence_y_offset(crtc));
+		   SNB_CPU_FENCE_ENABLE | params->fb.fence_reg);
+	I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset);
 
 	intel_fbc_recompress(dev_priv);
 }
 
+static bool intel_fbc_hw_is_active(struct drm_i915_private *dev_priv)
+{
+	if (INTEL_INFO(dev_priv)->gen >= 5)
+		return ilk_fbc_is_active(dev_priv);
+	else if (IS_GM45(dev_priv))
+		return g4x_fbc_is_active(dev_priv);
+	else
+		return i8xx_fbc_is_active(dev_priv);
+}
+
+static void intel_fbc_hw_activate(struct drm_i915_private *dev_priv)
+{
+	struct intel_fbc *fbc = &dev_priv->fbc;
+
+	fbc->active = true;
+
+	if (INTEL_INFO(dev_priv)->gen >= 7)
+		gen7_fbc_activate(dev_priv);
+	else if (INTEL_INFO(dev_priv)->gen >= 5)
+		ilk_fbc_activate(dev_priv);
+	else if (IS_GM45(dev_priv))
+		g4x_fbc_activate(dev_priv);
+	else
+		i8xx_fbc_activate(dev_priv);
+}
+
+static void intel_fbc_hw_deactivate(struct drm_i915_private *dev_priv)
+{
+	struct intel_fbc *fbc = &dev_priv->fbc;
+
+	fbc->active = false;
+
+	if (INTEL_INFO(dev_priv)->gen >= 5)
+		ilk_fbc_deactivate(dev_priv);
+	else if (IS_GM45(dev_priv))
+		g4x_fbc_deactivate(dev_priv);
+	else
+		i8xx_fbc_deactivate(dev_priv);
+}
+
 /**
  * intel_fbc_is_active - Is FBC active?
  * @dev_priv: i915 device instance
@@ -364,24 +382,24 @@ bool intel_fbc_is_active(struct drm_i915_private *dev_priv)
 	return dev_priv->fbc.active;
 }
 
-static void intel_fbc_activate(const struct drm_framebuffer *fb)
-{
-	struct drm_i915_private *dev_priv = fb->dev->dev_private;
-	struct intel_crtc *crtc = dev_priv->fbc.crtc;
-
-	dev_priv->fbc.activate(crtc);
-
-	dev_priv->fbc.fb_id = fb->base.id;
-	dev_priv->fbc.y = crtc->base.y;
-}
-
 static void intel_fbc_work_fn(struct work_struct *__work)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(__work, struct drm_i915_private, fbc.work.work);
-	struct intel_fbc_work *work = &dev_priv->fbc.work;
-	struct intel_crtc *crtc = dev_priv->fbc.crtc;
-	int delay_ms = 50;
+	struct intel_fbc *fbc = &dev_priv->fbc;
+	struct intel_fbc_work *work = &fbc->work;
+	struct intel_crtc *crtc = fbc->crtc;
+	struct drm_vblank_crtc *vblank = &dev_priv->dev->vblank[crtc->pipe];
+
+	if (drm_crtc_vblank_get(&crtc->base)) {
+		DRM_ERROR("vblank not available for FBC on pipe %c\n",
+			  pipe_name(crtc->pipe));
+
+		mutex_lock(&fbc->lock);
+		work->scheduled = false;
+		mutex_unlock(&fbc->lock);
+		return;
+	}
 
 retry:
 	/* Delay the actual enabling to let pageflipping cease and the
@@ -390,142 +408,97 @@ retry:
 	 * vblank to pass after disabling the FBC before we attempt
 	 * to modify the control registers.
 	 *
-	 * A more complicated solution would involve tracking vblanks
-	 * following the termination of the page-flipping sequence
-	 * and indeed performing the enable as a co-routine and not
-	 * waiting synchronously upon the vblank.
-	 *
 	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
+	 *
+	 * It is also worth mentioning that since work->scheduled_vblank can be
+	 * updated multiple times by the other threads, hitting the timeout is
+	 * not an error condition. We'll just end up hitting the "goto retry"
+	 * case below.
 	 */
-	wait_remaining_ms_from_jiffies(work->enable_jiffies, delay_ms);
+	wait_event_timeout(vblank->queue,
+		drm_crtc_vblank_count(&crtc->base) != work->scheduled_vblank,
+		msecs_to_jiffies(50));
 
-	mutex_lock(&dev_priv->fbc.lock);
+	mutex_lock(&fbc->lock);
 
 	/* Were we cancelled? */
 	if (!work->scheduled)
 		goto out;
 
 	/* Were we delayed again while this function was sleeping? */
-	if (time_after(work->enable_jiffies + msecs_to_jiffies(delay_ms),
-		       jiffies)) {
-		mutex_unlock(&dev_priv->fbc.lock);
+	if (drm_crtc_vblank_count(&crtc->base) == work->scheduled_vblank) {
+		mutex_unlock(&fbc->lock);
 		goto retry;
 	}
 
-	if (crtc->base.primary->fb == work->fb)
-		intel_fbc_activate(work->fb);
+	intel_fbc_hw_activate(dev_priv);
 
 	work->scheduled = false;
 
 out:
-	mutex_unlock(&dev_priv->fbc.lock);
-}
-
-static void intel_fbc_cancel_work(struct drm_i915_private *dev_priv)
-{
-	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
-	dev_priv->fbc.work.scheduled = false;
+	mutex_unlock(&fbc->lock);
+	drm_crtc_vblank_put(&crtc->base);
 }
 
 static void intel_fbc_schedule_activation(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct intel_fbc_work *work = &dev_priv->fbc.work;
+	struct intel_fbc *fbc = &dev_priv->fbc;
+	struct intel_fbc_work *work = &fbc->work;
+
+	WARN_ON(!mutex_is_locked(&fbc->lock));
 
-	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
+	if (drm_crtc_vblank_get(&crtc->base)) {
+		DRM_ERROR("vblank not available for FBC on pipe %c\n",
+			  pipe_name(crtc->pipe));
+		return;
+	}
 
-	/* It is useless to call intel_fbc_cancel_work() in this function since
-	 * we're not releasing fbc.lock, so it won't have an opportunity to grab
-	 * it to discover that it was cancelled. So we just update the expected
-	 * jiffy count. */
-	work->fb = crtc->base.primary->fb;
+	/* It is useless to call intel_fbc_cancel_work() or cancel_work() in
+	 * this function since we're not releasing fbc.lock, so it won't have an
+	 * opportunity to grab it to discover that it was cancelled. So we just
+	 * update the expected jiffy count. */
 	work->scheduled = true;
-	work->enable_jiffies = jiffies;
+	work->scheduled_vblank = drm_crtc_vblank_count(&crtc->base);
+	drm_crtc_vblank_put(&crtc->base);
 
 	schedule_work(&work->work);
 }
 
-static void __intel_fbc_deactivate(struct drm_i915_private *dev_priv)
+static void intel_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
-	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
-
-	intel_fbc_cancel_work(dev_priv);
-
-	if (dev_priv->fbc.active)
-		dev_priv->fbc.deactivate(dev_priv);
-}
-
-/*
- * intel_fbc_deactivate - deactivate FBC if it's associated with crtc
- * @crtc: the CRTC
- *
- * This function deactivates FBC if it's associated with the provided CRTC.
- */
-void intel_fbc_deactivate(struct intel_crtc *crtc)
-{
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-
-	if (!fbc_supported(dev_priv))
-		return;
+	struct intel_fbc *fbc = &dev_priv->fbc;
 
-	mutex_lock(&dev_priv->fbc.lock);
-	if (dev_priv->fbc.crtc == crtc)
-		__intel_fbc_deactivate(dev_priv);
-	mutex_unlock(&dev_priv->fbc.lock);
-}
+	WARN_ON(!mutex_is_locked(&fbc->lock));
 
-static void set_no_fbc_reason(struct drm_i915_private *dev_priv,
-			      const char *reason)
-{
-	if (dev_priv->fbc.no_fbc_reason == reason)
-		return;
+	/* Calling cancel_work() here won't help due to the fact that the work
+	 * function grabs fbc->lock. Just set scheduled to false so the work
+	 * function can know it was cancelled. */
+	fbc->work.scheduled = false;
 
-	dev_priv->fbc.no_fbc_reason = reason;
-	DRM_DEBUG_KMS("Disabling FBC: %s\n", reason);
+	if (fbc->active)
+		intel_fbc_hw_deactivate(dev_priv);
 }
 
-static bool crtc_can_fbc(struct intel_crtc *crtc)
+static bool multiple_pipes_ok(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct drm_plane *primary = crtc->base.primary;
+	struct intel_fbc *fbc = &dev_priv->fbc;
+	enum pipe pipe = crtc->pipe;
 
-	if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A)
-		return false;
-
-	if (fbc_on_plane_a_only(dev_priv) && crtc->plane != PLANE_A)
-		return false;
-
-	return true;
-}
-
-static bool crtc_is_valid(struct intel_crtc *crtc)
-{
-	if (!intel_crtc_active(&crtc->base))
-		return false;
-
-	if (!to_intel_plane_state(crtc->base.primary->state)->visible)
-		return false;
-
-	return true;
-}
-
-static bool multiple_pipes_ok(struct drm_i915_private *dev_priv)
-{
-	enum pipe pipe;
-	int n_pipes = 0;
-	struct drm_crtc *crtc;
-
-	if (INTEL_INFO(dev_priv)->gen > 4)
+	/* Don't even bother tracking anything we don't need. */
+	if (!no_fbc_on_multiple_pipes(dev_priv))
 		return true;
 
-	for_each_pipe(dev_priv, pipe) {
-		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+	WARN_ON(!drm_modeset_is_locked(&primary->mutex));
 
-		if (intel_crtc_active(crtc) &&
-		    to_intel_plane_state(crtc->primary->state)->visible)
-			n_pipes++;
-	}
+	if (to_intel_plane_state(primary->state)->visible)
+		fbc->visible_pipes_mask |= (1 << pipe);
+	else
+		fbc->visible_pipes_mask &= ~(1 << pipe);
 
-	return (n_pipes < 2);
+	return (fbc->visible_pipes_mask & ~(1 << pipe)) != 0;
 }
 
 static int find_compression_threshold(struct drm_i915_private *dev_priv,
@@ -581,16 +554,16 @@ again:
 static int intel_fbc_alloc_cfb(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct drm_framebuffer *fb = crtc->base.primary->state->fb;
+	struct intel_fbc *fbc = &dev_priv->fbc;
 	struct drm_mm_node *uninitialized_var(compressed_llb);
 	int size, fb_cpp, ret;
 
-	WARN_ON(drm_mm_node_allocated(&dev_priv->fbc.compressed_fb));
+	WARN_ON(drm_mm_node_allocated(&fbc->compressed_fb));
 
-	size = intel_fbc_calculate_cfb_size(crtc, fb);
-	fb_cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+	size = intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache);
+	fb_cpp = drm_format_plane_cpp(fbc->state_cache.fb.pixel_format, 0);
 
-	ret = find_compression_threshold(dev_priv, &dev_priv->fbc.compressed_fb,
+	ret = find_compression_threshold(dev_priv, &fbc->compressed_fb,
 					 size, fb_cpp);
 	if (!ret)
 		goto err_llb;
@@ -599,12 +572,12 @@ static int intel_fbc_alloc_cfb(struct intel_crtc *crtc)
 
 	}
 
-	dev_priv->fbc.threshold = ret;
+	fbc->threshold = ret;
 
 	if (INTEL_INFO(dev_priv)->gen >= 5)
-		I915_WRITE(ILK_DPFC_CB_BASE, dev_priv->fbc.compressed_fb.start);
+		I915_WRITE(ILK_DPFC_CB_BASE, fbc->compressed_fb.start);
 	else if (IS_GM45(dev_priv)) {
-		I915_WRITE(DPFC_CB_BASE, dev_priv->fbc.compressed_fb.start);
+		I915_WRITE(DPFC_CB_BASE, fbc->compressed_fb.start);
 	} else {
 		compressed_llb = kzalloc(sizeof(*compressed_llb), GFP_KERNEL);
 		if (!compressed_llb)
@@ -615,23 +588,22 @@ static int intel_fbc_alloc_cfb(struct intel_crtc *crtc)
 		if (ret)
 			goto err_fb;
 
-		dev_priv->fbc.compressed_llb = compressed_llb;
+		fbc->compressed_llb = compressed_llb;
 
 		I915_WRITE(FBC_CFB_BASE,
-			   dev_priv->mm.stolen_base + dev_priv->fbc.compressed_fb.start);
+			   dev_priv->mm.stolen_base + fbc->compressed_fb.start);
 		I915_WRITE(FBC_LL_BASE,
 			   dev_priv->mm.stolen_base + compressed_llb->start);
 	}
 
 	DRM_DEBUG_KMS("reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n",
-		      dev_priv->fbc.compressed_fb.size,
-		      dev_priv->fbc.threshold);
+		      fbc->compressed_fb.size, fbc->threshold);
 
 	return 0;
 
 err_fb:
 	kfree(compressed_llb);
-	i915_gem_stolen_remove_node(dev_priv, &dev_priv->fbc.compressed_fb);
+	i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb);
 err_llb:
 	pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size);
 	return -ENOSPC;
@@ -639,25 +611,27 @@ err_llb:
 
 static void __intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv)
 {
-	if (drm_mm_node_allocated(&dev_priv->fbc.compressed_fb))
-		i915_gem_stolen_remove_node(dev_priv,
-					    &dev_priv->fbc.compressed_fb);
-
-	if (dev_priv->fbc.compressed_llb) {
-		i915_gem_stolen_remove_node(dev_priv,
-					    dev_priv->fbc.compressed_llb);
-		kfree(dev_priv->fbc.compressed_llb);
+	struct intel_fbc *fbc = &dev_priv->fbc;
+
+	if (drm_mm_node_allocated(&fbc->compressed_fb))
+		i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb);
+
+	if (fbc->compressed_llb) {
+		i915_gem_stolen_remove_node(dev_priv, fbc->compressed_llb);
+		kfree(fbc->compressed_llb);
 	}
 }
 
 void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv)
 {
+	struct intel_fbc *fbc = &dev_priv->fbc;
+
 	if (!fbc_supported(dev_priv))
 		return;
 
-	mutex_lock(&dev_priv->fbc.lock);
+	mutex_lock(&fbc->lock);
 	__intel_fbc_cleanup_cfb(dev_priv);
-	mutex_unlock(&dev_priv->fbc.lock);
+	mutex_unlock(&fbc->lock);
 }
 
 static bool stride_is_valid(struct drm_i915_private *dev_priv,
@@ -681,19 +655,17 @@ static bool stride_is_valid(struct drm_i915_private *dev_priv,
 	return true;
 }
 
-static bool pixel_format_is_valid(struct drm_framebuffer *fb)
+static bool pixel_format_is_valid(struct drm_i915_private *dev_priv,
+				  uint32_t pixel_format)
 {
-	struct drm_device *dev = fb->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	switch (fb->pixel_format) {
+	switch (pixel_format) {
 	case DRM_FORMAT_XRGB8888:
 	case DRM_FORMAT_XBGR8888:
 		return true;
 	case DRM_FORMAT_XRGB1555:
 	case DRM_FORMAT_RGB565:
 		/* 16bpp not supported on gen2 */
-		if (IS_GEN2(dev))
+		if (IS_GEN2(dev_priv))
 			return false;
 		/* WaFbcOnly1to1Ratio:ctg */
 		if (IS_G4X(dev_priv))
@@ -713,6 +685,7 @@ static bool pixel_format_is_valid(struct drm_framebuffer *fb)
 static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct intel_fbc *fbc = &dev_priv->fbc;
 	unsigned int effective_w, effective_h, max_w, max_h;
 
 	if (INTEL_INFO(dev_priv)->gen >= 8 || IS_HASWELL(dev_priv)) {
@@ -726,87 +699,105 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
 		max_h = 1536;
 	}
 
-	intel_fbc_get_plane_source_size(crtc, &effective_w, &effective_h);
+	intel_fbc_get_plane_source_size(&fbc->state_cache, &effective_w,
+					&effective_h);
 	effective_w += crtc->adjusted_x;
 	effective_h += crtc->adjusted_y;
 
 	return effective_w <= max_w && effective_h <= max_h;
 }
 
-/**
- * __intel_fbc_update - activate/deactivate FBC as needed, unlocked
- * @crtc: the CRTC that triggered the update
- *
- * This function completely reevaluates the status of FBC, then activates,
- * deactivates or maintains it on the same state.
- */
-static void __intel_fbc_update(struct intel_crtc *crtc)
+static void intel_fbc_update_state_cache(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct drm_framebuffer *fb;
+	struct intel_fbc *fbc = &dev_priv->fbc;
+	struct intel_fbc_state_cache *cache = &fbc->state_cache;
+	struct intel_crtc_state *crtc_state =
+		to_intel_crtc_state(crtc->base.state);
+	struct intel_plane_state *plane_state =
+		to_intel_plane_state(crtc->base.primary->state);
+	struct drm_framebuffer *fb = plane_state->base.fb;
 	struct drm_i915_gem_object *obj;
-	const struct drm_display_mode *adjusted_mode;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
+	WARN_ON(!drm_modeset_is_locked(&crtc->base.mutex));
+	WARN_ON(!drm_modeset_is_locked(&crtc->base.primary->mutex));
 
-	if (!multiple_pipes_ok(dev_priv)) {
-		set_no_fbc_reason(dev_priv, "more than one pipe active");
-		goto out_disable;
-	}
+	cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags;
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+		cache->crtc.hsw_bdw_pixel_rate =
+			ilk_pipe_pixel_rate(crtc_state);
 
-	if (!dev_priv->fbc.enabled || dev_priv->fbc.crtc != crtc)
-		return;
+	cache->plane.rotation = plane_state->base.rotation;
+	cache->plane.src_w = drm_rect_width(&plane_state->src) >> 16;
+	cache->plane.src_h = drm_rect_height(&plane_state->src) >> 16;
+	cache->plane.visible = plane_state->visible;
 
-	if (!crtc_is_valid(crtc)) {
-		set_no_fbc_reason(dev_priv, "no output");
-		goto out_disable;
-	}
+	if (!cache->plane.visible)
+		return;
 
-	fb = crtc->base.primary->fb;
 	obj = intel_fb_obj(fb);
-	adjusted_mode = &crtc->config->base.adjusted_mode;
 
-	if ((adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) ||
-	    (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
-		set_no_fbc_reason(dev_priv, "incompatible mode");
-		goto out_disable;
+	/* FIXME: We lack the proper locking here, so only run this on the
+	 * platforms that need. */
+	if (INTEL_INFO(dev_priv)->gen >= 5 && INTEL_INFO(dev_priv)->gen < 7)
+		cache->fb.ilk_ggtt_offset = i915_gem_obj_ggtt_offset(obj);
+	cache->fb.pixel_format = fb->pixel_format;
+	cache->fb.stride = fb->pitches[0];
+	cache->fb.fence_reg = obj->fence_reg;
+	cache->fb.tiling_mode = obj->tiling_mode;
+}
+
+static bool intel_fbc_can_activate(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct intel_fbc *fbc = &dev_priv->fbc;
+	struct intel_fbc_state_cache *cache = &fbc->state_cache;
+
+	if (!cache->plane.visible) {
+		fbc->no_fbc_reason = "primary plane not visible";
+		return false;
+	}
+
+	if ((cache->crtc.mode_flags & DRM_MODE_FLAG_INTERLACE) ||
+	    (cache->crtc.mode_flags & DRM_MODE_FLAG_DBLSCAN)) {
+		fbc->no_fbc_reason = "incompatible mode";
+		return false;
 	}
 
 	if (!intel_fbc_hw_tracking_covers_screen(crtc)) {
-		set_no_fbc_reason(dev_priv, "mode too large for compression");
-		goto out_disable;
+		fbc->no_fbc_reason = "mode too large for compression";
+		return false;
 	}
 
 	/* The use of a CPU fence is mandatory in order to detect writes
 	 * by the CPU to the scanout and trigger updates to the FBC.
 	 */
-	if (obj->tiling_mode != I915_TILING_X ||
-	    obj->fence_reg == I915_FENCE_REG_NONE) {
-		set_no_fbc_reason(dev_priv, "framebuffer not tiled or fenced");
-		goto out_disable;
+	if (cache->fb.tiling_mode != I915_TILING_X ||
+	    cache->fb.fence_reg == I915_FENCE_REG_NONE) {
+		fbc->no_fbc_reason = "framebuffer not tiled or fenced";
+		return false;
 	}
 	if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_G4X(dev_priv) &&
-	    crtc->base.primary->state->rotation != BIT(DRM_ROTATE_0)) {
-		set_no_fbc_reason(dev_priv, "rotation unsupported");
-		goto out_disable;
+	    cache->plane.rotation != BIT(DRM_ROTATE_0)) {
+		fbc->no_fbc_reason = "rotation unsupported";
+		return false;
 	}
 
-	if (!stride_is_valid(dev_priv, fb->pitches[0])) {
-		set_no_fbc_reason(dev_priv, "framebuffer stride not supported");
-		goto out_disable;
+	if (!stride_is_valid(dev_priv, cache->fb.stride)) {
+		fbc->no_fbc_reason = "framebuffer stride not supported";
+		return false;
 	}
 
-	if (!pixel_format_is_valid(fb)) {
-		set_no_fbc_reason(dev_priv, "pixel format is invalid");
-		goto out_disable;
+	if (!pixel_format_is_valid(dev_priv, cache->fb.pixel_format)) {
+		fbc->no_fbc_reason = "pixel format is invalid";
+		return false;
 	}
 
 	/* WaFbcExceedCdClockThreshold:hsw,bdw */
 	if ((IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) &&
-	    ilk_pipe_pixel_rate(crtc->config) >=
-	    dev_priv->cdclk_freq * 95 / 100) {
-		set_no_fbc_reason(dev_priv, "pixel rate is too big");
-		goto out_disable;
+	    cache->crtc.hsw_bdw_pixel_rate >= dev_priv->cdclk_freq * 95 / 100) {
+		fbc->no_fbc_reason = "pixel rate is too big";
+		return false;
 	}
 
 	/* It is possible for the required CFB size change without a
@@ -819,189 +810,322 @@ static void __intel_fbc_update(struct intel_crtc *crtc)
 	 * we didn't get any invalidate/deactivate calls, but this would require
 	 * a lot of tracking just for a specific case. If we conclude it's an
 	 * important case, we can implement it later. */
-	if (intel_fbc_calculate_cfb_size(crtc, fb) >
-	    dev_priv->fbc.compressed_fb.size * dev_priv->fbc.threshold) {
-		set_no_fbc_reason(dev_priv, "CFB requirements changed");
-		goto out_disable;
+	if (intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) >
+	    fbc->compressed_fb.size * fbc->threshold) {
+		fbc->no_fbc_reason = "CFB requirements changed";
+		return false;
 	}
 
+	return true;
+}
+
+static bool intel_fbc_can_choose(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct intel_fbc *fbc = &dev_priv->fbc;
+	bool enable_by_default = IS_HASWELL(dev_priv) ||
+				 IS_BROADWELL(dev_priv);
+
+	if (intel_vgpu_active(dev_priv->dev)) {
+		fbc->no_fbc_reason = "VGPU is active";
+		return false;
+	}
+
+	if (i915.enable_fbc < 0 && !enable_by_default) {
+		fbc->no_fbc_reason = "disabled per chip default";
+		return false;
+	}
+
+	if (!i915.enable_fbc) {
+		fbc->no_fbc_reason = "disabled per module param";
+		return false;
+	}
+
+	if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A) {
+		fbc->no_fbc_reason = "no enabled pipes can have FBC";
+		return false;
+	}
+
+	if (fbc_on_plane_a_only(dev_priv) && crtc->plane != PLANE_A) {
+		fbc->no_fbc_reason = "no enabled planes can have FBC";
+		return false;
+	}
+
+	return true;
+}
+
+static void intel_fbc_get_reg_params(struct intel_crtc *crtc,
+				     struct intel_fbc_reg_params *params)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct intel_fbc *fbc = &dev_priv->fbc;
+	struct intel_fbc_state_cache *cache = &fbc->state_cache;
+
+	/* Since all our fields are integer types, use memset here so the
+	 * comparison function can rely on memcmp because the padding will be
+	 * zero. */
+	memset(params, 0, sizeof(*params));
+
+	params->crtc.pipe = crtc->pipe;
+	params->crtc.plane = crtc->plane;
+	params->crtc.fence_y_offset = get_crtc_fence_y_offset(crtc);
+
+	params->fb.pixel_format = cache->fb.pixel_format;
+	params->fb.stride = cache->fb.stride;
+	params->fb.fence_reg = cache->fb.fence_reg;
+
+	params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache);
+
+	params->fb.ggtt_offset = cache->fb.ilk_ggtt_offset;
+}
+
+static bool intel_fbc_reg_params_equal(struct intel_fbc_reg_params *params1,
+				       struct intel_fbc_reg_params *params2)
+{
+	/* We can use this since intel_fbc_get_reg_params() does a memset. */
+	return memcmp(params1, params2, sizeof(*params1)) == 0;
+}
+
+void intel_fbc_pre_update(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct intel_fbc *fbc = &dev_priv->fbc;
+
+	if (!fbc_supported(dev_priv))
+		return;
+
+	mutex_lock(&fbc->lock);
+
+	if (!multiple_pipes_ok(crtc)) {
+		fbc->no_fbc_reason = "more than one pipe active";
+		goto deactivate;
+	}
+
+	if (!fbc->enabled || fbc->crtc != crtc)
+		goto unlock;
+
+	intel_fbc_update_state_cache(crtc);
+
+deactivate:
+	intel_fbc_deactivate(dev_priv);
+unlock:
+	mutex_unlock(&fbc->lock);
+}
+
+static void __intel_fbc_post_update(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct intel_fbc *fbc = &dev_priv->fbc;
+	struct intel_fbc_reg_params old_params;
+
+	WARN_ON(!mutex_is_locked(&fbc->lock));
+
+	if (!fbc->enabled || fbc->crtc != crtc)
+		return;
+
+	if (!intel_fbc_can_activate(crtc)) {
+		WARN_ON(fbc->active);
+		return;
+	}
+
+	old_params = fbc->params;
+	intel_fbc_get_reg_params(crtc, &fbc->params);
+
 	/* If the scanout has not changed, don't modify the FBC settings.
 	 * Note that we make the fundamental assumption that the fb->obj
 	 * cannot be unpinned (and have its GTT offset and fence revoked)
 	 * without first being decoupled from the scanout and FBC disabled.
 	 */
-	if (dev_priv->fbc.crtc == crtc &&
-	    dev_priv->fbc.fb_id == fb->base.id &&
-	    dev_priv->fbc.y == crtc->base.y &&
-	    dev_priv->fbc.active)
+	if (fbc->active &&
+	    intel_fbc_reg_params_equal(&old_params, &fbc->params))
 		return;
 
-	if (intel_fbc_is_active(dev_priv)) {
-		/* We update FBC along two paths, after changing fb/crtc
-		 * configuration (modeswitching) and after page-flipping
-		 * finishes. For the latter, we know that not only did
-		 * we disable the FBC at the start of the page-flip
-		 * sequence, but also more than one vblank has passed.
-		 *
-		 * For the former case of modeswitching, it is possible
-		 * to switch between two FBC valid configurations
-		 * instantaneously so we do need to disable the FBC
-		 * before we can modify its control registers. We also
-		 * have to wait for the next vblank for that to take
-		 * effect. However, since we delay enabling FBC we can
-		 * assume that a vblank has passed since disabling and
-		 * that we can safely alter the registers in the deferred
-		 * callback.
-		 *
-		 * In the scenario that we go from a valid to invalid
-		 * and then back to valid FBC configuration we have
-		 * no strict enforcement that a vblank occurred since
-		 * disabling the FBC. However, along all current pipe
-		 * disabling paths we do need to wait for a vblank at
-		 * some point. And we wait before enabling FBC anyway.
-		 */
-		DRM_DEBUG_KMS("deactivating FBC for update\n");
-		__intel_fbc_deactivate(dev_priv);
-	}
-
+	intel_fbc_deactivate(dev_priv);
 	intel_fbc_schedule_activation(crtc);
-	dev_priv->fbc.no_fbc_reason = "FBC enabled (not necessarily active)";
-	return;
-
-out_disable:
-	/* Multiple disables should be harmless */
-	if (intel_fbc_is_active(dev_priv)) {
-		DRM_DEBUG_KMS("unsupported config, deactivating FBC\n");
-		__intel_fbc_deactivate(dev_priv);
-	}
+	fbc->no_fbc_reason = "FBC enabled (active or scheduled)";
 }
 
-/*
- * intel_fbc_update - activate/deactivate FBC as needed
- * @crtc: the CRTC that triggered the update
- *
- * This function reevaluates the overall state and activates or deactivates FBC.
- */
-void intel_fbc_update(struct intel_crtc *crtc)
+void intel_fbc_post_update(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct intel_fbc *fbc = &dev_priv->fbc;
 
 	if (!fbc_supported(dev_priv))
 		return;
 
-	mutex_lock(&dev_priv->fbc.lock);
-	__intel_fbc_update(crtc);
-	mutex_unlock(&dev_priv->fbc.lock);
+	mutex_lock(&fbc->lock);
+	__intel_fbc_post_update(crtc);
+	mutex_unlock(&fbc->lock);
+}
+
+static unsigned int intel_fbc_get_frontbuffer_bit(struct intel_fbc *fbc)
+{
+	if (fbc->enabled)
+		return to_intel_plane(fbc->crtc->base.primary)->frontbuffer_bit;
+	else
+		return fbc->possible_framebuffer_bits;
 }
 
 void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
 			  unsigned int frontbuffer_bits,
 			  enum fb_op_origin origin)
 {
-	unsigned int fbc_bits;
+	struct intel_fbc *fbc = &dev_priv->fbc;
 
 	if (!fbc_supported(dev_priv))
 		return;
 
-	if (origin == ORIGIN_GTT)
+	if (origin == ORIGIN_GTT || origin == ORIGIN_FLIP)
 		return;
 
-	mutex_lock(&dev_priv->fbc.lock);
+	mutex_lock(&fbc->lock);
 
-	if (dev_priv->fbc.enabled)
-		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);
-	else
-		fbc_bits = dev_priv->fbc.possible_framebuffer_bits;
-
-	dev_priv->fbc.busy_bits |= (fbc_bits & frontbuffer_bits);
+	fbc->busy_bits |= intel_fbc_get_frontbuffer_bit(fbc) & frontbuffer_bits;
 
-	if (dev_priv->fbc.busy_bits)
-		__intel_fbc_deactivate(dev_priv);
+	if (fbc->enabled && fbc->busy_bits)
+		intel_fbc_deactivate(dev_priv);
 
-	mutex_unlock(&dev_priv->fbc.lock);
+	mutex_unlock(&fbc->lock);
 }
 
 void intel_fbc_flush(struct drm_i915_private *dev_priv,
 		     unsigned int frontbuffer_bits, enum fb_op_origin origin)
 {
+	struct intel_fbc *fbc = &dev_priv->fbc;
+
 	if (!fbc_supported(dev_priv))
 		return;
 
-	if (origin == ORIGIN_GTT)
+	if (origin == ORIGIN_GTT || origin == ORIGIN_FLIP)
 		return;
 
-	mutex_lock(&dev_priv->fbc.lock);
+	mutex_lock(&fbc->lock);
 
-	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
+	fbc->busy_bits &= ~frontbuffer_bits;
 
-	if (!dev_priv->fbc.busy_bits && dev_priv->fbc.enabled) {
-		if (origin != ORIGIN_FLIP && dev_priv->fbc.active) {
+	if (!fbc->busy_bits && fbc->enabled &&
+	    (frontbuffer_bits & intel_fbc_get_frontbuffer_bit(fbc))) {
+		if (fbc->active)
 			intel_fbc_recompress(dev_priv);
-		} else {
-			__intel_fbc_deactivate(dev_priv);
-			__intel_fbc_update(dev_priv->fbc.crtc);
+		else
+			__intel_fbc_post_update(fbc->crtc);
+	}
+
+	mutex_unlock(&fbc->lock);
+}
+
+/**
+ * intel_fbc_choose_crtc - select a CRTC to enable FBC on
+ * @dev_priv: i915 device instance
+ * @state: the atomic state structure
+ *
+ * This function looks at the proposed state for CRTCs and planes, then chooses
+ * which pipe is going to have FBC by setting intel_crtc_state->enable_fbc to
+ * true.
+ *
+ * Later, intel_fbc_enable is going to look for state->enable_fbc and then maybe
+ * enable FBC for the chosen CRTC. If it does, it will set dev_priv->fbc.crtc.
+ */
+void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv,
+			   struct drm_atomic_state *state)
+{
+	struct intel_fbc *fbc = &dev_priv->fbc;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	struct drm_plane *plane;
+	struct drm_plane_state *plane_state;
+	bool fbc_crtc_present = false;
+	int i, j;
+
+	mutex_lock(&fbc->lock);
+
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		if (fbc->crtc == to_intel_crtc(crtc)) {
+			fbc_crtc_present = true;
+			break;
+		}
+	}
+	/* This atomic commit doesn't involve the CRTC currently tied to FBC. */
+	if (!fbc_crtc_present && fbc->crtc != NULL)
+		goto out;
+
+	/* Simply choose the first CRTC that is compatible and has a visible
+	 * plane. We could go for fancier schemes such as checking the plane
+	 * size, but this would just affect the few platforms that don't tie FBC
+	 * to pipe or plane A. */
+	for_each_plane_in_state(state, plane, plane_state, i) {
+		struct intel_plane_state *intel_plane_state =
+			to_intel_plane_state(plane_state);
+
+		if (!intel_plane_state->visible)
+			continue;
+
+		for_each_crtc_in_state(state, crtc, crtc_state, j) {
+			struct intel_crtc_state *intel_crtc_state =
+				to_intel_crtc_state(crtc_state);
+
+			if (plane_state->crtc != crtc)
+				continue;
+
+			if (!intel_fbc_can_choose(to_intel_crtc(crtc)))
+				break;
+
+			intel_crtc_state->enable_fbc = true;
+			goto out;
 		}
 	}
 
-	mutex_unlock(&dev_priv->fbc.lock);
+out:
+	mutex_unlock(&fbc->lock);
 }
 
 /**
  * intel_fbc_enable: tries to enable FBC on the CRTC
  * @crtc: the CRTC
  *
- * This function checks if it's possible to enable FBC on the following CRTC,
- * then enables it. Notice that it doesn't activate FBC.
+ * This function checks if the given CRTC was chosen for FBC, then enables it if
+ * possible. Notice that it doesn't activate FBC. It is valid to call
+ * intel_fbc_enable multiple times for the same pipe without an
+ * intel_fbc_disable in the middle, as long as it is deactivated.
  */
 void intel_fbc_enable(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct intel_fbc *fbc = &dev_priv->fbc;
 
 	if (!fbc_supported(dev_priv))
 		return;
 
-	mutex_lock(&dev_priv->fbc.lock);
-
-	if (dev_priv->fbc.enabled) {
-		WARN_ON(dev_priv->fbc.crtc == crtc);
-		goto out;
-	}
-
-	WARN_ON(dev_priv->fbc.active);
-	WARN_ON(dev_priv->fbc.crtc != NULL);
+	mutex_lock(&fbc->lock);
 
-	if (intel_vgpu_active(dev_priv->dev)) {
-		set_no_fbc_reason(dev_priv, "VGPU is active");
-		goto out;
-	}
-
-	if (i915.enable_fbc < 0) {
-		set_no_fbc_reason(dev_priv, "disabled per chip default");
+	if (fbc->enabled) {
+		WARN_ON(fbc->crtc == NULL);
+		if (fbc->crtc == crtc) {
+			WARN_ON(!crtc->config->enable_fbc);
+			WARN_ON(fbc->active);
+		}
 		goto out;
 	}
 
-	if (!i915.enable_fbc) {
-		set_no_fbc_reason(dev_priv, "disabled per module param");
+	if (!crtc->config->enable_fbc)
 		goto out;
-	}
 
-	if (!crtc_can_fbc(crtc)) {
-		set_no_fbc_reason(dev_priv, "no enabled pipes can have FBC");
-		goto out;
-	}
+	WARN_ON(fbc->active);
+	WARN_ON(fbc->crtc != NULL);
 
+	intel_fbc_update_state_cache(crtc);
 	if (intel_fbc_alloc_cfb(crtc)) {
-		set_no_fbc_reason(dev_priv, "not enough stolen memory");
+		fbc->no_fbc_reason = "not enough stolen memory";
 		goto out;
 	}
 
 	DRM_DEBUG_KMS("Enabling FBC on pipe %c\n", pipe_name(crtc->pipe));
-	dev_priv->fbc.no_fbc_reason = "FBC enabled but not active yet\n";
+	fbc->no_fbc_reason = "FBC enabled but not active yet\n";
 
-	dev_priv->fbc.enabled = true;
-	dev_priv->fbc.crtc = crtc;
+	fbc->enabled = true;
+	fbc->crtc = crtc;
 out:
-	mutex_unlock(&dev_priv->fbc.lock);
+	mutex_unlock(&fbc->lock);
 }
 
 /**
@@ -1013,58 +1137,88 @@ out:
  */
 static void __intel_fbc_disable(struct drm_i915_private *dev_priv)
 {
-	struct intel_crtc *crtc = dev_priv->fbc.crtc;
+	struct intel_fbc *fbc = &dev_priv->fbc;
+	struct intel_crtc *crtc = fbc->crtc;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
-	WARN_ON(!dev_priv->fbc.enabled);
-	WARN_ON(dev_priv->fbc.active);
-	assert_pipe_disabled(dev_priv, crtc->pipe);
+	WARN_ON(!mutex_is_locked(&fbc->lock));
+	WARN_ON(!fbc->enabled);
+	WARN_ON(fbc->active);
+	WARN_ON(crtc->active);
 
 	DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe));
 
 	__intel_fbc_cleanup_cfb(dev_priv);
 
-	dev_priv->fbc.enabled = false;
-	dev_priv->fbc.crtc = NULL;
+	fbc->enabled = false;
+	fbc->crtc = NULL;
 }
 
 /**
- * intel_fbc_disable_crtc - disable FBC if it's associated with crtc
+ * intel_fbc_disable - disable FBC if it's associated with crtc
  * @crtc: the CRTC
  *
  * This function disables FBC if it's associated with the provided CRTC.
  */
-void intel_fbc_disable_crtc(struct intel_crtc *crtc)
+void intel_fbc_disable(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct intel_fbc *fbc = &dev_priv->fbc;
 
 	if (!fbc_supported(dev_priv))
 		return;
 
-	mutex_lock(&dev_priv->fbc.lock);
-	if (dev_priv->fbc.crtc == crtc) {
-		WARN_ON(!dev_priv->fbc.enabled);
-		WARN_ON(dev_priv->fbc.active);
+	mutex_lock(&fbc->lock);
+	if (fbc->crtc == crtc) {
+		WARN_ON(!fbc->enabled);
+		WARN_ON(fbc->active);
 		__intel_fbc_disable(dev_priv);
 	}
-	mutex_unlock(&dev_priv->fbc.lock);
+	mutex_unlock(&fbc->lock);
+
+	cancel_work_sync(&fbc->work.work);
 }
 
 /**
- * intel_fbc_disable - globally disable FBC
+ * intel_fbc_global_disable - globally disable FBC
  * @dev_priv: i915 device instance
  *
  * This function disables FBC regardless of which CRTC is associated with it.
  */
-void intel_fbc_disable(struct drm_i915_private *dev_priv)
+void intel_fbc_global_disable(struct drm_i915_private *dev_priv)
 {
+	struct intel_fbc *fbc = &dev_priv->fbc;
+
 	if (!fbc_supported(dev_priv))
 		return;
 
-	mutex_lock(&dev_priv->fbc.lock);
-	if (dev_priv->fbc.enabled)
+	mutex_lock(&fbc->lock);
+	if (fbc->enabled)
 		__intel_fbc_disable(dev_priv);
-	mutex_unlock(&dev_priv->fbc.lock);
+	mutex_unlock(&fbc->lock);
+
+	cancel_work_sync(&fbc->work.work);
+}
+
+/**
+ * intel_fbc_init_pipe_state - initialize FBC's CRTC visibility tracking
+ * @dev_priv: i915 device instance
+ *
+ * The FBC code needs to track CRTC visibility since the older platforms can't
+ * have FBC enabled while multiple pipes are used. This function does the
+ * initial setup at driver load to make sure FBC is matching the real hardware.
+ */
+void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc;
+
+	/* Don't even bother tracking anything if we don't need. */
+	if (!no_fbc_on_multiple_pipes(dev_priv))
+		return;
+
+	for_each_intel_crtc(dev_priv->dev, crtc)
+		if (intel_crtc_active(&crtc->base) &&
+		    to_intel_plane_state(crtc->base.primary->state)->visible)
+			dev_priv->fbc.visible_pipes_mask |= (1 << crtc->pipe);
 }
 
 /**
@@ -1075,51 +1229,35 @@ void intel_fbc_disable(struct drm_i915_private *dev_priv)
  */
 void intel_fbc_init(struct drm_i915_private *dev_priv)
 {
+	struct intel_fbc *fbc = &dev_priv->fbc;
 	enum pipe pipe;
 
-	INIT_WORK(&dev_priv->fbc.work.work, intel_fbc_work_fn);
-	mutex_init(&dev_priv->fbc.lock);
-	dev_priv->fbc.enabled = false;
-	dev_priv->fbc.active = false;
-	dev_priv->fbc.work.scheduled = false;
+	INIT_WORK(&fbc->work.work, intel_fbc_work_fn);
+	mutex_init(&fbc->lock);
+	fbc->enabled = false;
+	fbc->active = false;
+	fbc->work.scheduled = false;
 
 	if (!HAS_FBC(dev_priv)) {
-		dev_priv->fbc.no_fbc_reason = "unsupported by this chipset";
+		fbc->no_fbc_reason = "unsupported by this chipset";
 		return;
 	}
 
 	for_each_pipe(dev_priv, pipe) {
-		dev_priv->fbc.possible_framebuffer_bits |=
+		fbc->possible_framebuffer_bits |=
 				INTEL_FRONTBUFFER_PRIMARY(pipe);
 
 		if (fbc_on_pipe_a_only(dev_priv))
 			break;
 	}
 
-	if (INTEL_INFO(dev_priv)->gen >= 7) {
-		dev_priv->fbc.is_active = ilk_fbc_is_active;
-		dev_priv->fbc.activate = gen7_fbc_activate;
-		dev_priv->fbc.deactivate = ilk_fbc_deactivate;
-	} else if (INTEL_INFO(dev_priv)->gen >= 5) {
-		dev_priv->fbc.is_active = ilk_fbc_is_active;
-		dev_priv->fbc.activate = ilk_fbc_activate;
-		dev_priv->fbc.deactivate = ilk_fbc_deactivate;
-	} else if (IS_GM45(dev_priv)) {
-		dev_priv->fbc.is_active = g4x_fbc_is_active;
-		dev_priv->fbc.activate = g4x_fbc_activate;
-		dev_priv->fbc.deactivate = g4x_fbc_deactivate;
-	} else {
-		dev_priv->fbc.is_active = i8xx_fbc_is_active;
-		dev_priv->fbc.activate = i8xx_fbc_activate;
-		dev_priv->fbc.deactivate = i8xx_fbc_deactivate;
-
-		/* This value was pulled out of someone's hat */
+	/* This value was pulled out of someone's hat */
+	if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_GM45(dev_priv))
 		I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT);
-	}
 
 	/* We still don't have any sort of hardware state readout for FBC, so
 	 * deactivate it in case the BIOS activated it to make sure software
 	 * matches the hardware state. */
-	if (dev_priv->fbc.is_active(dev_priv))
-		dev_priv->fbc.deactivate(dev_priv);
+	if (intel_fbc_hw_is_active(dev_priv))
+		intel_fbc_hw_deactivate(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index bea75cafc623..97a91e631915 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -119,7 +119,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 {
 	struct intel_fbdev *ifbdev =
 		container_of(helper, struct intel_fbdev, helper);
-	struct drm_framebuffer *fb = NULL;
+	struct drm_framebuffer *fb;
 	struct drm_device *dev = helper->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_mode_fb_cmd2 mode_cmd = {};
@@ -171,8 +171,6 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 
 out:
 	mutex_unlock(&dev->struct_mutex);
-	if (!IS_ERR_OR_NULL(fb))
-		drm_framebuffer_unreference(fb);
 	return ret;
 }
 
@@ -408,8 +406,8 @@ retry:
 			continue;
 		}
 
-		encoder = connector->encoder;
-		if (!encoder || WARN_ON(!encoder->crtc)) {
+		encoder = connector->state->best_encoder;
+		if (!encoder || WARN_ON(!connector->state->crtc)) {
 			if (connector->force > DRM_FORCE_OFF)
 				goto bail;
 
@@ -422,7 +420,7 @@ retry:
 
 		num_connectors_enabled++;
 
-		new_crtc = intel_fb_helper_crtc(fb_helper, encoder->crtc);
+		new_crtc = intel_fb_helper_crtc(fb_helper, connector->state->crtc);
 
 		/*
 		 * Make sure we're not trying to drive multiple connectors
@@ -468,17 +466,22 @@ retry:
 			 * usually contains. But since our current
 			 * code puts a mode derived from the post-pfit timings
 			 * into crtc->mode this works out correctly.
+			 *
+			 * This is crtc->mode and not crtc->state->mode for the
+			 * fastboot check to work correctly. crtc_state->mode has
+			 * I915_MODE_FLAG_INHERITED, which we clear to force check
+			 * state.
 			 */
 			DRM_DEBUG_KMS("looking for current mode on connector %s\n",
 				      connector->name);
-			modes[i] = &encoder->crtc->mode;
+			modes[i] = &connector->state->crtc->mode;
 		}
 		crtcs[i] = new_crtc;
 
 		DRM_DEBUG_KMS("connector %s on pipe %c [CRTC:%d]: %dx%d%s\n",
 			      connector->name,
-			      pipe_name(to_intel_crtc(encoder->crtc)->pipe),
-			      encoder->crtc->base.id,
+			      pipe_name(to_intel_crtc(connector->state->crtc)->pipe),
+			      connector->state->crtc->base.id,
 			      modes[i]->hdisplay, modes[i]->vdisplay,
 			      modes[i]->flags & DRM_MODE_FLAG_INTERLACE ? "i" :"");
 
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 822952235dcf..73002e901ff2 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -43,9 +43,10 @@ struct i915_guc_client {
 	uint32_t wq_offset;
 	uint32_t wq_size;
 	uint32_t wq_tail;
+	uint32_t wq_head;
 
 	/* GuC submission statistics & status */
-	uint64_t submissions[I915_NUM_RINGS];
+	uint64_t submissions[GUC_MAX_ENGINES_NUM];
 	uint32_t q_fail;
 	uint32_t b_fail;
 	int retcode;
@@ -88,6 +89,8 @@ struct intel_guc {
 	uint32_t log_flags;
 	struct drm_i915_gem_object *log_obj;
 
+	struct drm_i915_gem_object *ads_obj;
+
 	struct drm_i915_gem_object *ctx_pool_obj;
 	struct ida ctx_ids;
 
@@ -103,8 +106,8 @@ struct intel_guc {
 	uint32_t action_fail;		/* Total number of failures	*/
 	int32_t action_err;		/* Last error code		*/
 
-	uint64_t submissions[I915_NUM_RINGS];
-	uint32_t last_seqno[I915_NUM_RINGS];
+	uint64_t submissions[GUC_MAX_ENGINES_NUM];
+	uint32_t last_seqno[GUC_MAX_ENGINES_NUM];
 };
 
 /* intel_guc_loader.c */
@@ -122,5 +125,6 @@ int i915_guc_submit(struct i915_guc_client *client,
 		    struct drm_i915_gem_request *rq);
 void i915_guc_submission_disable(struct drm_device *dev);
 void i915_guc_submission_fini(struct drm_device *dev);
+int i915_guc_wq_check_space(struct i915_guc_client *client);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 40b2ea572e16..2de57ffe5e18 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -39,10 +39,18 @@
 #define GUC_CTX_PRIORITY_HIGH		1
 #define GUC_CTX_PRIORITY_KMD_NORMAL	2
 #define GUC_CTX_PRIORITY_NORMAL		3
+#define GUC_CTX_PRIORITY_NUM		4
 
 #define GUC_MAX_GPU_CONTEXTS		1024
 #define	GUC_INVALID_CTX_ID		GUC_MAX_GPU_CONTEXTS
 
+#define GUC_RENDER_ENGINE		0
+#define GUC_VIDEO_ENGINE		1
+#define GUC_BLITTER_ENGINE		2
+#define GUC_VIDEOENHANCE_ENGINE		3
+#define GUC_VIDEO_ENGINE2		4
+#define GUC_MAX_ENGINES_NUM		(GUC_VIDEO_ENGINE2 + 1)
+
 /* Work queue item header definitions */
 #define WQ_STATUS_ACTIVE		1
 #define WQ_STATUS_SUSPENDED		2
@@ -81,11 +89,14 @@
 #define GUC_CTL_CTXINFO			0
 #define   GUC_CTL_CTXNUM_IN16_SHIFT	0
 #define   GUC_CTL_BASE_ADDR_SHIFT	12
+
 #define GUC_CTL_ARAT_HIGH		1
 #define GUC_CTL_ARAT_LOW		2
+
 #define GUC_CTL_DEVICE_INFO		3
 #define   GUC_CTL_GTTYPE_SHIFT		0
 #define   GUC_CTL_COREFAMILY_SHIFT	7
+
 #define GUC_CTL_LOG_PARAMS		4
 #define   GUC_LOG_VALID			(1 << 0)
 #define   GUC_LOG_NOTIFY_ON_HALF_FULL	(1 << 1)
@@ -97,9 +108,12 @@
 #define   GUC_LOG_ISR_PAGES		3
 #define   GUC_LOG_ISR_SHIFT		9
 #define   GUC_LOG_BUF_ADDR_SHIFT	12
+
 #define GUC_CTL_PAGE_FAULT_CONTROL	5
+
 #define GUC_CTL_WA			6
 #define   GUC_CTL_WA_UK_BY_DRIVER	(1 << 3)
+
 #define GUC_CTL_FEATURE			7
 #define   GUC_CTL_VCS2_ENABLED		(1 << 0)
 #define   GUC_CTL_KERNEL_SUBMISSIONS	(1 << 1)
@@ -109,6 +123,7 @@
 #define   GUC_CTL_PREEMPTION_LOG	(1 << 5)
 #define   GUC_CTL_ENABLE_SLPC		(1 << 7)
 #define   GUC_CTL_RESET_ON_PREMPT_FAILURE	(1 << 8)
+
 #define GUC_CTL_DEBUG			8
 #define   GUC_LOG_VERBOSITY_SHIFT	0
 #define   GUC_LOG_VERBOSITY_LOW		(0 << GUC_LOG_VERBOSITY_SHIFT)
@@ -118,9 +133,19 @@
 /* Verbosity range-check limits, without the shift */
 #define	  GUC_LOG_VERBOSITY_MIN		0
 #define	  GUC_LOG_VERBOSITY_MAX		3
+#define	  GUC_LOG_VERBOSITY_MASK	0x0000000f
+#define	  GUC_LOG_DESTINATION_MASK	(3 << 4)
+#define   GUC_LOG_DISABLED		(1 << 6)
+#define   GUC_PROFILE_ENABLED		(1 << 7)
+#define   GUC_WQ_TRACK_ENABLED		(1 << 8)
+#define   GUC_ADS_ENABLED		(1 << 9)
+#define   GUC_DEBUG_RESERVED		(1 << 10)
+#define   GUC_ADS_ADDR_SHIFT		11
+#define   GUC_ADS_ADDR_MASK		0xfffff800
+
 #define GUC_CTL_RSRVD			9
 
-#define GUC_CTL_MAX_DWORDS		(GUC_CTL_RSRVD + 1)
+#define GUC_CTL_MAX_DWORDS		(SOFT_SCRATCH_COUNT - 2) /* [1..14] */
 
 /**
  * DOC: GuC Firmware Layout
@@ -267,7 +292,7 @@ struct guc_context_desc {
 	u64 db_trigger_phy;
 	u16 db_id;
 
-	struct guc_execlist_context lrc[I915_NUM_RINGS];
+	struct guc_execlist_context lrc[GUC_MAX_ENGINES_NUM];
 
 	u8 attribute;
 
@@ -299,6 +324,99 @@ struct guc_context_desc {
 #define GUC_POWER_D2		3
 #define GUC_POWER_D3		4
 
+/* Scheduling policy settings */
+
+/* Reset engine upon preempt failure */
+#define POLICY_RESET_ENGINE		(1<<0)
+/* Preempt to idle on quantum expiry */
+#define POLICY_PREEMPT_TO_IDLE		(1<<1)
+
+#define POLICY_MAX_NUM_WI		15
+
+struct guc_policy {
+	/* Time for one workload to execute. (in micro seconds) */
+	u32 execution_quantum;
+	u32 reserved1;
+
+	/* Time to wait for a preemption request to completed before issuing a
+	 * reset. (in micro seconds). */
+	u32 preemption_time;
+
+	/* How much time to allow to run after the first fault is observed.
+	 * Then preempt afterwards. (in micro seconds) */
+	u32 fault_time;
+
+	u32 policy_flags;
+	u32 reserved[2];
+} __packed;
+
+struct guc_policies {
+	struct guc_policy policy[GUC_CTX_PRIORITY_NUM][GUC_MAX_ENGINES_NUM];
+
+	/* In micro seconds. How much time to allow before DPC processing is
+	 * called back via interrupt (to prevent DPC queue drain starving).
+	 * Typically 1000s of micro seconds (example only, not granularity). */
+	u32 dpc_promote_time;
+
+	/* Must be set to take these new values. */
+	u32 is_valid;
+
+	/* Max number of WIs to process per call. A large value may keep CS
+	 * idle. */
+	u32 max_num_work_items;
+
+	u32 reserved[19];
+} __packed;
+
+/* GuC MMIO reg state struct */
+
+#define GUC_REGSET_FLAGS_NONE		0x0
+#define GUC_REGSET_POWERCYCLE		0x1
+#define GUC_REGSET_MASKED		0x2
+#define GUC_REGSET_ENGINERESET		0x4
+#define GUC_REGSET_SAVE_DEFAULT_VALUE	0x8
+#define GUC_REGSET_SAVE_CURRENT_VALUE	0x10
+
+#define GUC_REGSET_MAX_REGISTERS	25
+#define GUC_MMIO_WHITE_LIST_START	0x24d0
+#define GUC_MMIO_WHITE_LIST_MAX		12
+#define GUC_S3_SAVE_SPACE_PAGES		10
+
+struct guc_mmio_regset {
+	struct __packed {
+		u32 offset;
+		u32 value;
+		u32 flags;
+	} registers[GUC_REGSET_MAX_REGISTERS];
+
+	u32 values_valid;
+	u32 number_of_registers;
+} __packed;
+
+struct guc_mmio_reg_state {
+	struct guc_mmio_regset global_reg;
+	struct guc_mmio_regset engine_reg[GUC_MAX_ENGINES_NUM];
+
+	/* MMIO registers that are set as non privileged */
+	struct __packed {
+		u32 mmio_start;
+		u32 offsets[GUC_MMIO_WHITE_LIST_MAX];
+		u32 count;
+	} mmio_white_list[GUC_MAX_ENGINES_NUM];
+} __packed;
+
+/* GuC Additional Data Struct */
+
+struct guc_ads {
+	u32 reg_state_addr;
+	u32 reg_state_buffer;
+	u32 golden_context_lrca;
+	u32 scheduler_policies;
+	u32 reserved0[3];
+	u32 eng_state_size[GUC_MAX_ENGINES_NUM];
+	u32 reserved2[4];
+} __packed;
+
 /* This Action will be programmed in C180 - SOFT_SCRATCH_O_REG */
 enum host2guc_action {
 	HOST2GUC_ACTION_DEFAULT = 0x0,
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 550921f2ef7d..82a3c03fbc0e 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -165,6 +165,13 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv)
 			i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
 	}
 
+	if (guc->ads_obj) {
+		u32 ads = (u32)i915_gem_obj_ggtt_offset(guc->ads_obj)
+				>> PAGE_SHIFT;
+		params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
+		params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;
+	}
+
 	/* If GuC submission is enabled, set up additional parameters here */
 	if (i915.enable_guc_submission) {
 		u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj);
@@ -192,7 +199,7 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv)
  * the value matches either of two values representing completion
  * of the GuC boot process.
  *
- * This is used for polling the GuC status in a wait_for_atomic()
+ * This is used for polling the GuC status in a wait_for()
  * loop below.
  */
 static inline bool guc_ucode_response(struct drm_i915_private *dev_priv,
@@ -252,14 +259,14 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv)
 	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA));
 
 	/*
-	 * Spin-wait for the DMA to complete & the GuC to start up.
+	 * Wait for the DMA to complete & the GuC to start up.
 	 * NB: Docs recommend not using the interrupt for completion.
 	 * Measurements indicate this should take no more than 20ms, so a
 	 * timeout here indicates that the GuC has failed and is unusable.
 	 * (Higher levels of the driver will attempt to fall back to
 	 * execlist mode if this happens.)
 	 */
-	ret = wait_for_atomic(guc_ucode_response(dev_priv, &status), 100);
+	ret = wait_for(guc_ucode_response(dev_priv, &status), 100);
 
 	DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n",
 			I915_READ(DMA_CTRL), status);
@@ -438,6 +445,7 @@ fail:
 
 	direct_interrupts_to_host(dev_priv);
 	i915_guc_submission_disable(dev);
+	i915_guc_submission_fini(dev);
 
 	return err;
 }
@@ -554,10 +562,12 @@ fail:
 	DRM_ERROR("Failed to fetch GuC firmware from %s (error %d)\n",
 		  guc_fw->guc_fw_path, err);
 
+	mutex_lock(&dev->struct_mutex);
 	obj = guc_fw->guc_fw_obj;
 	if (obj)
 		drm_gem_object_unreference(&obj->base);
 	guc_fw->guc_fw_obj = NULL;
+	mutex_unlock(&dev->struct_mutex);
 
 	release_firmware(fw);		/* OK even if fw is NULL */
 	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_FAIL;
@@ -624,10 +634,11 @@ void intel_guc_ucode_fini(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw;
 
+	mutex_lock(&dev->struct_mutex);
 	direct_interrupts_to_host(dev_priv);
+	i915_guc_submission_disable(dev);
 	i915_guc_submission_fini(dev);
 
-	mutex_lock(&dev->struct_mutex);
 	if (guc_fw->guc_fw_obj)
 		drm_gem_object_unreference(&guc_fw->guc_fw_obj->base);
 	guc_fw->guc_fw_obj = NULL;
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 616108c4bc3e..a0d8daed2470 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1210,11 +1210,19 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
 	struct drm_device *dev = intel_hdmi_to_dev(hdmi);
 	enum drm_mode_status status;
 	int clock;
+	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
 
 	clock = mode->clock;
+
+	if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING)
+		clock *= 2;
+
+	if (clock > max_dotclk)
+		return MODE_CLOCK_HIGH;
+
 	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
 		clock *= 2;
 
@@ -2041,6 +2049,11 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
 	enum port port = intel_dig_port->port;
 	uint8_t alternate_ddc_pin;
 
+	if (WARN(intel_dig_port->max_lanes < 4,
+		 "Not enough lanes (%d) for HDMI on port %c\n",
+		 intel_dig_port->max_lanes, port_name(port)))
+		return;
+
 	drm_connector_init(dev, connector, &intel_hdmi_connector_funcs,
 			   DRM_MODE_CONNECTOR_HDMIA);
 	drm_connector_helper_add(connector, &intel_hdmi_connector_helper_funcs);
@@ -2224,6 +2237,7 @@ void intel_hdmi_init(struct drm_device *dev,
 	intel_dig_port->port = port;
 	intel_dig_port->hdmi.hdmi_reg = hdmi_reg;
 	intel_dig_port->dp.output_reg = INVALID_MMIO_REG;
+	intel_dig_port->max_lanes = 4;
 
 	intel_hdmi_init_connector(intel_dig_port, intel_connector);
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f1fa756c5d5d..6a978ce80244 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -223,9 +223,11 @@ enum {
 	FAULT_AND_CONTINUE /* Unsupported */
 };
 #define GEN8_CTX_ID_SHIFT 32
-#define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT  0x17
+#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x17
+#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT	0x26
 
-static int intel_lr_context_pin(struct drm_i915_gem_request *rq);
+static int intel_lr_context_pin(struct intel_context *ctx,
+				struct intel_engine_cs *engine);
 static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
 		struct drm_i915_gem_object *default_ctx_obj);
 
@@ -263,65 +265,92 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists
 	return 0;
 }
 
+static void
+logical_ring_init_platform_invariants(struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ring->dev;
+
+	ring->disable_lite_restore_wa = (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
+					IS_BXT_REVID(dev, 0, BXT_REVID_A1)) &&
+					(ring->id == VCS || ring->id == VCS2);
+
+	ring->ctx_desc_template = GEN8_CTX_VALID;
+	ring->ctx_desc_template |= GEN8_CTX_ADDRESSING_MODE(dev) <<
+				   GEN8_CTX_ADDRESSING_MODE_SHIFT;
+	if (IS_GEN8(dev))
+		ring->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT;
+	ring->ctx_desc_template |= GEN8_CTX_PRIVILEGE;
+
+	/* TODO: WaDisableLiteRestore when we start using semaphore
+	 * signalling between Command Streamers */
+	/* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */
+
+	/* WaEnableForceRestoreInCtxtDescForVCS:skl */
+	/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
+	if (ring->disable_lite_restore_wa)
+		ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
+}
+
 /**
- * intel_execlists_ctx_id() - get the Execlists Context ID
- * @ctx_obj: Logical Ring Context backing object.
+ * intel_lr_context_descriptor_update() - calculate & cache the descriptor
+ * 					  descriptor for a pinned context
  *
- * Do not confuse with ctx->id! Unfortunately we have a name overload
- * here: the old context ID we pass to userspace as a handler so that
- * they can refer to a context, and the new context ID we pass to the
- * ELSP so that the GPU can inform us of the context status via
- * interrupts.
+ * @ctx: Context to work on
+ * @ring: Engine the descriptor will be used with
  *
- * Return: 20-bits globally unique context ID.
+ * The context descriptor encodes various attributes of a context,
+ * including its GTT address and some flags. Because it's fairly
+ * expensive to calculate, we'll just do it once and cache the result,
+ * which remains valid until the context is unpinned.
+ *
+ * This is what a descriptor looks like, from LSB to MSB:
+ *    bits 0-11:    flags, GEN8_CTX_* (cached in ctx_desc_template)
+ *    bits 12-31:    LRCA, GTT address of (the HWSP of) this context
+ *    bits 32-51:    ctx ID, a globally unique tag (the LRCA again!)
+ *    bits 52-63:    reserved, may encode the engine ID (for GuC)
  */
-u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
+static void
+intel_lr_context_descriptor_update(struct intel_context *ctx,
+				   struct intel_engine_cs *ring)
 {
-	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
-			LRC_PPHWSP_PN * PAGE_SIZE;
+	uint64_t lrca, desc;
 
-	/* LRCA is required to be 4K aligned so the more significant 20 bits
-	 * are globally unique */
-	return lrca >> 12;
-}
+	lrca = ctx->engine[ring->id].lrc_vma->node.start +
+	       LRC_PPHWSP_PN * PAGE_SIZE;
 
-static bool disable_lite_restore_wa(struct intel_engine_cs *ring)
-{
-	struct drm_device *dev = ring->dev;
+	desc = ring->ctx_desc_template;			   /* bits  0-11 */
+	desc |= lrca;					   /* bits 12-31 */
+	desc |= (lrca >> PAGE_SHIFT) << GEN8_CTX_ID_SHIFT; /* bits 32-51 */
 
-	return (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
-		IS_BXT_REVID(dev, 0, BXT_REVID_A1)) &&
-	       (ring->id == VCS || ring->id == VCS2);
+	ctx->engine[ring->id].lrc_desc = desc;
 }
 
 uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
 				     struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
-	uint64_t desc;
-	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
-			LRC_PPHWSP_PN * PAGE_SIZE;
-
-	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
-
-	desc = GEN8_CTX_VALID;
-	desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
-	if (IS_GEN8(ctx_obj->base.dev))
-		desc |= GEN8_CTX_L3LLC_COHERENT;
-	desc |= GEN8_CTX_PRIVILEGE;
-	desc |= lrca;
-	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
-
-	/* TODO: WaDisableLiteRestore when we start using semaphore
-	 * signalling between Command Streamers */
-	/* desc |= GEN8_CTX_FORCE_RESTORE; */
-
-	/* WaEnableForceRestoreInCtxtDescForVCS:skl */
-	/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
-	if (disable_lite_restore_wa(ring))
-		desc |= GEN8_CTX_FORCE_RESTORE;
+	return ctx->engine[ring->id].lrc_desc;
+}
 
-	return desc;
+/**
+ * intel_execlists_ctx_id() - get the Execlists Context ID
+ * @ctx: Context to get the ID for
+ * @ring: Engine to get the ID for
+ *
+ * Do not confuse with ctx->id! Unfortunately we have a name overload
+ * here: the old context ID we pass to userspace as a handler so that
+ * they can refer to a context, and the new context ID we pass to the
+ * ELSP so that the GPU can inform us of the context status via
+ * interrupts.
+ *
+ * The context ID is a portion of the context descriptor, so we can
+ * just extract the required part from the cached descriptor.
+ *
+ * Return: 20-bits globally unique context ID.
+ */
+u32 intel_execlists_ctx_id(struct intel_context *ctx,
+			   struct intel_engine_cs *ring)
+{
+	return intel_lr_context_descriptor(ctx, ring) >> GEN8_CTX_ID_SHIFT;
 }
 
 static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
@@ -363,20 +392,9 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
 {
 	struct intel_engine_cs *ring = rq->ring;
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-	struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
-	struct page *page;
-	uint32_t *reg_state;
-
-	BUG_ON(!ctx_obj);
-	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
-	WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
-
-	page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
-	reg_state = kmap_atomic(page);
+	uint32_t *reg_state = rq->ctx->engine[ring->id].lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = rq->tail;
-	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
 
 	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
 		/* True 32b PPGTT with dynamic page allocation: update PDP
@@ -390,8 +408,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
 		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
 	}
 
-	kunmap_atomic(reg_state);
-
 	return 0;
 }
 
@@ -431,9 +447,8 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
 			/* Same ctx: ignore first request, as second request
 			 * will update tail past first request's workload */
 			cursor->elsp_submitted = req0->elsp_submitted;
-			list_del(&req0->execlist_link);
-			list_add_tail(&req0->execlist_link,
-				&ring->execlist_retired_req_list);
+			list_move_tail(&req0->execlist_link,
+				       &ring->execlist_retired_req_list);
 			req0 = cursor;
 		} else {
 			req1 = cursor;
@@ -478,16 +493,13 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 					    execlist_link);
 
 	if (head_req != NULL) {
-		struct drm_i915_gem_object *ctx_obj =
-				head_req->ctx->engine[ring->id].state;
-		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
+		if (intel_execlists_ctx_id(head_req->ctx, ring) == request_id) {
 			WARN(head_req->elsp_submitted == 0,
 			     "Never submitted head request\n");
 
 			if (--head_req->elsp_submitted <= 0) {
-				list_del(&head_req->execlist_link);
-				list_add_tail(&head_req->execlist_link,
-					&ring->execlist_retired_req_list);
+				list_move_tail(&head_req->execlist_link,
+					       &ring->execlist_retired_req_list);
 				return true;
 			}
 		}
@@ -496,6 +508,19 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
 	return false;
 }
 
+static void get_context_status(struct intel_engine_cs *ring,
+			       u8 read_pointer,
+			       u32 *status, u32 *context_id)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+
+	if (WARN_ON(read_pointer >= GEN8_CSB_ENTRIES))
+		return;
+
+	*status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer));
+	*context_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer));
+}
+
 /**
  * intel_lrc_irq_handler() - handle Context Switch interrupts
  * @ring: Engine Command Streamer to handle.
@@ -516,16 +541,16 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
 	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
 
 	read_pointer = ring->next_context_status_buffer;
-	write_pointer = status_pointer & GEN8_CSB_PTR_MASK;
+	write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
 	if (read_pointer > write_pointer)
 		write_pointer += GEN8_CSB_ENTRIES;
 
 	spin_lock(&ring->execlist_lock);
 
 	while (read_pointer < write_pointer) {
-		read_pointer++;
-		status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer % GEN8_CSB_ENTRIES));
-		status_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer % GEN8_CSB_ENTRIES));
+
+		get_context_status(ring, ++read_pointer % GEN8_CSB_ENTRIES,
+				   &status, &status_id);
 
 		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
 			continue;
@@ -538,14 +563,14 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
 				WARN(1, "Preemption without Lite Restore\n");
 		}
 
-		 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
-		     (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
+		if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
+		    (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
 			if (execlists_check_remove_request(ring, status_id))
 				submit_contexts++;
 		}
 	}
 
-	if (disable_lite_restore_wa(ring)) {
+	if (ring->disable_lite_restore_wa) {
 		/* Prevent a ctx to preempt itself */
 		if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
 		    (submit_contexts != 0))
@@ -556,13 +581,16 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
 
 	spin_unlock(&ring->execlist_lock);
 
-	WARN(submit_contexts > 2, "More than two context complete events?\n");
+	if (unlikely(submit_contexts > 2))
+		DRM_ERROR("More than two context complete events?\n");
+
 	ring->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
 
+	/* Update the read pointer to the old write pointer. Manual ringbuffer
+	 * management ftw </sarcasm> */
 	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
-		   _MASKED_FIELD(GEN8_CSB_PTR_MASK << 8,
-				 ((u32)ring->next_context_status_buffer &
-				  GEN8_CSB_PTR_MASK) << 8));
+		   _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
+				 ring->next_context_status_buffer << 8));
 }
 
 static int execlists_context_queue(struct drm_i915_gem_request *request)
@@ -571,8 +599,8 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
 	struct drm_i915_gem_request *cursor;
 	int num_elements = 0;
 
-	if (request->ctx != ring->default_context)
-		intel_lr_context_pin(request);
+	if (request->ctx != request->i915->kernel_context)
+		intel_lr_context_pin(request->ctx, ring);
 
 	i915_gem_request_reference(request);
 
@@ -592,9 +620,8 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
 		if (request->ctx == tail_req->ctx) {
 			WARN(tail_req->elsp_submitted != 0,
 				"More than 2 already-submitted reqs queued\n");
-			list_del(&tail_req->execlist_link);
-			list_add_tail(&tail_req->execlist_link,
-				&ring->execlist_retired_req_list);
+			list_move_tail(&tail_req->execlist_link,
+				       &ring->execlist_retired_req_list);
 		}
 	}
 
@@ -660,17 +687,27 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
 
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
 {
-	int ret;
+	int ret = 0;
 
 	request->ringbuf = request->ctx->engine[request->ring->id].ringbuf;
 
-	if (request->ctx != request->ring->default_context) {
-		ret = intel_lr_context_pin(request);
+	if (i915.enable_guc_submission) {
+		/*
+		 * Check that the GuC has space for the request before
+		 * going any further, as the i915_add_request() call
+		 * later on mustn't fail ...
+		 */
+		struct intel_guc *guc = &request->i915->guc;
+
+		ret = i915_guc_wq_check_space(guc->execbuf_client);
 		if (ret)
 			return ret;
 	}
 
-	return 0;
+	if (request->ctx != request->i915->kernel_context)
+		ret = intel_lr_context_pin(request->ctx, request->ring);
+
+	return ret;
 }
 
 static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
@@ -724,23 +761,46 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
  * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
  * point, the tail *inside* the context is updated and the ELSP written to.
  */
-static void
+static int
 intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
 {
-	struct intel_engine_cs *ring = request->ring;
+	struct intel_ringbuffer *ringbuf = request->ringbuf;
 	struct drm_i915_private *dev_priv = request->i915;
+	struct intel_engine_cs *engine = request->ring;
+
+	intel_logical_ring_advance(ringbuf);
+	request->tail = ringbuf->tail;
 
-	intel_logical_ring_advance(request->ringbuf);
+	/*
+	 * Here we add two extra NOOPs as padding to avoid
+	 * lite restore of a context with HEAD==TAIL.
+	 *
+	 * Caller must reserve WA_TAIL_DWORDS for us!
+	 */
+	intel_logical_ring_emit(ringbuf, MI_NOOP);
+	intel_logical_ring_emit(ringbuf, MI_NOOP);
+	intel_logical_ring_advance(ringbuf);
 
-	request->tail = request->ringbuf->tail;
+	if (intel_ring_stopped(engine))
+		return 0;
 
-	if (intel_ring_stopped(ring))
-		return;
+	if (engine->last_context != request->ctx) {
+		if (engine->last_context)
+			intel_lr_context_unpin(engine->last_context, engine);
+		if (request->ctx != request->i915->kernel_context) {
+			intel_lr_context_pin(request->ctx, engine);
+			engine->last_context = request->ctx;
+		} else {
+			engine->last_context = NULL;
+		}
+	}
 
 	if (dev_priv->guc.execbuf_client)
 		i915_guc_submit(dev_priv->guc.execbuf_client, request);
 	else
 		execlists_context_queue(request);
+
+	return 0;
 }
 
 static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
@@ -967,8 +1027,9 @@ void intel_execlists_retire_requests(struct intel_engine_cs *ring)
 		struct drm_i915_gem_object *ctx_obj =
 				ctx->engine[ring->id].state;
 
-		if (ctx_obj && (ctx != ring->default_context))
-			intel_lr_context_unpin(req);
+		if (ctx_obj && (ctx != req->i915->kernel_context))
+			intel_lr_context_unpin(ctx, ring);
+
 		list_del(&req->execlist_link);
 		i915_gem_request_unreference(req);
 	}
@@ -1012,24 +1073,39 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
 	return 0;
 }
 
-static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
-		struct drm_i915_gem_object *ctx_obj,
-		struct intel_ringbuffer *ringbuf)
+static int intel_lr_context_do_pin(struct intel_context *ctx,
+				   struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret = 0;
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+	struct page *lrc_state_page;
+	uint32_t *lrc_reg_state;
+	int ret;
 
 	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+
 	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
 			PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
 	if (ret)
 		return ret;
 
+	lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
+	if (WARN_ON(!lrc_state_page)) {
+		ret = -ENODEV;
+		goto unpin_ctx_obj;
+	}
+
 	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
 	if (ret)
 		goto unpin_ctx_obj;
 
+	ctx->engine[ring->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
+	intel_lr_context_descriptor_update(ctx, ring);
+	lrc_reg_state = kmap(lrc_state_page);
+	lrc_reg_state[CTX_RING_BUFFER_START+1] = ringbuf->vma->node.start;
+	ctx->engine[ring->id].lrc_reg_state = lrc_reg_state;
 	ctx_obj->dirty = true;
 
 	/* Invalidate GuC TLB. */
@@ -1044,37 +1120,40 @@ unpin_ctx_obj:
 	return ret;
 }
 
-static int intel_lr_context_pin(struct drm_i915_gem_request *rq)
+static int intel_lr_context_pin(struct intel_context *ctx,
+				struct intel_engine_cs *engine)
 {
 	int ret = 0;
-	struct intel_engine_cs *ring = rq->ring;
-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-	struct intel_ringbuffer *ringbuf = rq->ringbuf;
 
-	if (rq->ctx->engine[ring->id].pin_count++ == 0) {
-		ret = intel_lr_context_do_pin(ring, ctx_obj, ringbuf);
+	if (ctx->engine[engine->id].pin_count++ == 0) {
+		ret = intel_lr_context_do_pin(ctx, engine);
 		if (ret)
 			goto reset_pin_count;
+
+		i915_gem_context_reference(ctx);
 	}
 	return ret;
 
 reset_pin_count:
-	rq->ctx->engine[ring->id].pin_count = 0;
+	ctx->engine[engine->id].pin_count = 0;
 	return ret;
 }
 
-void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
+void intel_lr_context_unpin(struct intel_context *ctx,
+			    struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *ring = rq->ring;
-	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-	struct intel_ringbuffer *ringbuf = rq->ringbuf;
+	struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
 
-	if (ctx_obj) {
-		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-		if (--rq->ctx->engine[ring->id].pin_count == 0) {
-			intel_unpin_ringbuffer_obj(ringbuf);
-			i915_gem_object_ggtt_unpin(ctx_obj);
-		}
+	WARN_ON(!mutex_is_locked(&ctx->i915->dev->struct_mutex));
+	if (--ctx->engine[engine->id].pin_count == 0) {
+		kunmap(kmap_to_page(ctx->engine[engine->id].lrc_reg_state));
+		intel_unpin_ringbuffer_obj(ctx->engine[engine->id].ringbuf);
+		i915_gem_object_ggtt_unpin(ctx_obj);
+		ctx->engine[engine->id].lrc_vma = NULL;
+		ctx->engine[engine->id].lrc_desc = 0;
+		ctx->engine[engine->id].lrc_reg_state = NULL;
+
+		i915_gem_context_unreference(ctx);
 	}
 }
 
@@ -1087,7 +1166,7 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct i915_workarounds *w = &dev_priv->workarounds;
 
-	if (WARN_ON_ONCE(w->count == 0))
+	if (w->count == 0)
 		return 0;
 
 	ring->gpu_caches_dirty = true;
@@ -1474,7 +1553,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring)
 	u8 next_context_status_buffer_hw;
 
 	lrc_setup_hardware_status_page(ring,
-				ring->default_context->engine[ring->id].state);
+				dev_priv->kernel_context->engine[ring->id].state);
 
 	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
 	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
@@ -1493,9 +1572,11 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring)
 	 *      | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
 	 * BDW  | CSB regs not reset       | CSB regs reset       |
 	 * CHT  | CSB regs not reset       | CSB regs not reset   |
+	 * SKL  |         ?                |         ?            |
+	 * BXT  |         ?                |         ?            |
 	 */
-	next_context_status_buffer_hw = (I915_READ(RING_CONTEXT_STATUS_PTR(ring))
-						   & GEN8_CSB_PTR_MASK);
+	next_context_status_buffer_hw =
+		GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(ring)));
 
 	/*
 	 * When the CSB registers are reset (also after power-up / gpu reset),
@@ -1698,7 +1779,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 	struct intel_ringbuffer *ringbuf = request->ringbuf;
 	struct intel_engine_cs *ring = ringbuf->ring;
 	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
-	bool vf_flush_wa;
+	bool vf_flush_wa = false;
 	u32 flags = 0;
 	int ret;
 
@@ -1720,14 +1801,14 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_QW_WRITE;
 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-	}
 
-	/*
-	 * On GEN9+ Before VF_CACHE_INVALIDATE we need to emit a NULL pipe
-	 * control.
-	 */
-	vf_flush_wa = INTEL_INFO(ring->dev)->gen >= 9 &&
-		      flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		/*
+		 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
+		 * pipe control.
+		 */
+		if (IS_GEN9(ring->dev))
+			vf_flush_wa = true;
+	}
 
 	ret = intel_logical_ring_begin(request, vf_flush_wa ? 12 : 6);
 	if (ret)
@@ -1791,44 +1872,65 @@ static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
 	intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
 }
 
+/*
+ * Reserve space for 2 NOOPs at the end of each request to be
+ * used as a workaround for not being allowed to do lite
+ * restore with HEAD==TAIL (WaIdleLiteRestore).
+ */
+#define WA_TAIL_DWORDS 2
+
+static inline u32 hws_seqno_address(struct intel_engine_cs *engine)
+{
+	return engine->status_page.gfx_addr + I915_GEM_HWS_INDEX_ADDR;
+}
+
 static int gen8_emit_request(struct drm_i915_gem_request *request)
 {
 	struct intel_ringbuffer *ringbuf = request->ringbuf;
-	struct intel_engine_cs *ring = ringbuf->ring;
-	u32 cmd;
 	int ret;
 
-	/*
-	 * Reserve space for 2 NOOPs at the end of each request to be
-	 * used as a workaround for not being allowed to do lite
-	 * restore with HEAD==TAIL (WaIdleLiteRestore).
-	 */
-	ret = intel_logical_ring_begin(request, 8);
+	ret = intel_logical_ring_begin(request, 6 + WA_TAIL_DWORDS);
 	if (ret)
 		return ret;
 
-	cmd = MI_STORE_DWORD_IMM_GEN4;
-	cmd |= MI_GLOBAL_GTT;
+	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
+	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
 
-	intel_logical_ring_emit(ringbuf, cmd);
 	intel_logical_ring_emit(ringbuf,
-				(ring->status_page.gfx_addr +
-				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
+				(MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
+	intel_logical_ring_emit(ringbuf,
+				hws_seqno_address(request->ring) |
+				MI_FLUSH_DW_USE_GTT);
 	intel_logical_ring_emit(ringbuf, 0);
 	intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
 	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
-	intel_logical_ring_advance_and_submit(request);
+	return intel_logical_ring_advance_and_submit(request);
+}
 
-	/*
-	 * Here we add two extra NOOPs as padding to avoid
-	 * lite restore of a context with HEAD==TAIL.
-	 */
-	intel_logical_ring_emit(ringbuf, MI_NOOP);
-	intel_logical_ring_emit(ringbuf, MI_NOOP);
-	intel_logical_ring_advance(ringbuf);
+static int gen8_emit_request_render(struct drm_i915_gem_request *request)
+{
+	struct intel_ringbuffer *ringbuf = request->ringbuf;
+	int ret;
 
-	return 0;
+	ret = intel_logical_ring_begin(request, 6 + WA_TAIL_DWORDS);
+	if (ret)
+		return ret;
+
+	/* w/a for post sync ops following a GPGPU operation we
+	 * need a prior CS_STALL, which is emitted by the flush
+	 * following the batch.
+	 */
+	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(5));
+	intel_logical_ring_emit(ringbuf,
+				(PIPE_CONTROL_GLOBAL_GTT_IVB |
+				 PIPE_CONTROL_CS_STALL |
+				 PIPE_CONTROL_QW_WRITE));
+	intel_logical_ring_emit(ringbuf, hws_seqno_address(request->ring));
+	intel_logical_ring_emit(ringbuf, 0);
+	intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
+	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
+	return intel_logical_ring_advance_and_submit(request);
 }
 
 static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
@@ -1911,12 +2013,44 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 		ring->status_page.obj = NULL;
 	}
 
+	ring->disable_lite_restore_wa = false;
+	ring->ctx_desc_template = 0;
+
 	lrc_destroy_wa_ctx_obj(ring);
 	ring->dev = NULL;
 }
 
-static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
+static void
+logical_ring_default_vfuncs(struct drm_device *dev,
+			    struct intel_engine_cs *ring)
 {
+	/* Default vfuncs which can be overriden by each engine. */
+	ring->init_hw = gen8_init_common_ring;
+	ring->emit_request = gen8_emit_request;
+	ring->emit_flush = gen8_emit_flush;
+	ring->irq_get = gen8_logical_ring_get_irq;
+	ring->irq_put = gen8_logical_ring_put_irq;
+	ring->emit_bb_start = gen8_emit_bb_start;
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
+		ring->get_seqno = bxt_a_get_seqno;
+		ring->set_seqno = bxt_a_set_seqno;
+	} else {
+		ring->get_seqno = gen8_get_seqno;
+		ring->set_seqno = gen8_set_seqno;
+	}
+}
+
+static inline void
+logical_ring_default_irqs(struct intel_engine_cs *ring, unsigned shift)
+{
+	ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
+	ring->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
+}
+
+static int
+logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
+{
+	struct intel_context *dctx = to_i915(dev)->kernel_context;
 	int ret;
 
 	/* Intentionally left blank. */
@@ -1933,19 +2067,18 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
 	spin_lock_init(&ring->execlist_lock);
 
+	logical_ring_init_platform_invariants(ring);
+
 	ret = i915_cmd_parser_init_ring(ring);
 	if (ret)
 		goto error;
 
-	ret = intel_lr_context_deferred_alloc(ring->default_context, ring);
+	ret = intel_lr_context_deferred_alloc(dctx, ring);
 	if (ret)
 		goto error;
 
 	/* As this is the default context, always pin it */
-	ret = intel_lr_context_do_pin(
-			ring,
-			ring->default_context->engine[ring->id].state,
-			ring->default_context->engine[ring->id].ringbuf);
+	ret = intel_lr_context_do_pin(dctx, ring);
 	if (ret) {
 		DRM_ERROR(
 			"Failed to pin and map ringbuffer %s: %d\n",
@@ -1968,32 +2101,25 @@ static int logical_render_ring_init(struct drm_device *dev)
 
 	ring->name = "render ring";
 	ring->id = RCS;
+	ring->exec_id = I915_EXEC_RENDER;
+	ring->guc_id = GUC_RENDER_ENGINE;
 	ring->mmio_base = RENDER_RING_BASE;
-	ring->irq_enable_mask =
-		GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
-	ring->irq_keep_mask =
-		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
+
+	logical_ring_default_irqs(ring, GEN8_RCS_IRQ_SHIFT);
 	if (HAS_L3_DPF(dev))
 		ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
+	logical_ring_default_vfuncs(dev, ring);
+
+	/* Override some for render ring. */
 	if (INTEL_INFO(dev)->gen >= 9)
 		ring->init_hw = gen9_init_render_ring;
 	else
 		ring->init_hw = gen8_init_render_ring;
 	ring->init_context = gen8_init_rcs_context;
 	ring->cleanup = intel_fini_pipe_control;
-	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
-		ring->get_seqno = bxt_a_get_seqno;
-		ring->set_seqno = bxt_a_set_seqno;
-	} else {
-		ring->get_seqno = gen8_get_seqno;
-		ring->set_seqno = gen8_set_seqno;
-	}
-	ring->emit_request = gen8_emit_request;
 	ring->emit_flush = gen8_emit_flush_render;
-	ring->irq_get = gen8_logical_ring_get_irq;
-	ring->irq_put = gen8_logical_ring_put_irq;
-	ring->emit_bb_start = gen8_emit_bb_start;
+	ring->emit_request = gen8_emit_request_render;
 
 	ring->dev = dev;
 
@@ -2027,25 +2153,12 @@ static int logical_bsd_ring_init(struct drm_device *dev)
 
 	ring->name = "bsd ring";
 	ring->id = VCS;
+	ring->exec_id = I915_EXEC_BSD;
+	ring->guc_id = GUC_VIDEO_ENGINE;
 	ring->mmio_base = GEN6_BSD_RING_BASE;
-	ring->irq_enable_mask =
-		GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
-	ring->irq_keep_mask =
-		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
 
-	ring->init_hw = gen8_init_common_ring;
-	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
-		ring->get_seqno = bxt_a_get_seqno;
-		ring->set_seqno = bxt_a_set_seqno;
-	} else {
-		ring->get_seqno = gen8_get_seqno;
-		ring->set_seqno = gen8_set_seqno;
-	}
-	ring->emit_request = gen8_emit_request;
-	ring->emit_flush = gen8_emit_flush;
-	ring->irq_get = gen8_logical_ring_get_irq;
-	ring->irq_put = gen8_logical_ring_put_irq;
-	ring->emit_bb_start = gen8_emit_bb_start;
+	logical_ring_default_irqs(ring, GEN8_VCS1_IRQ_SHIFT);
+	logical_ring_default_vfuncs(dev, ring);
 
 	return logical_ring_init(dev, ring);
 }
@@ -2055,22 +2168,14 @@ static int logical_bsd2_ring_init(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
 
-	ring->name = "bds2 ring";
+	ring->name = "bsd2 ring";
 	ring->id = VCS2;
+	ring->exec_id = I915_EXEC_BSD;
+	ring->guc_id = GUC_VIDEO_ENGINE2;
 	ring->mmio_base = GEN8_BSD2_RING_BASE;
-	ring->irq_enable_mask =
-		GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
-	ring->irq_keep_mask =
-		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
 
-	ring->init_hw = gen8_init_common_ring;
-	ring->get_seqno = gen8_get_seqno;
-	ring->set_seqno = gen8_set_seqno;
-	ring->emit_request = gen8_emit_request;
-	ring->emit_flush = gen8_emit_flush;
-	ring->irq_get = gen8_logical_ring_get_irq;
-	ring->irq_put = gen8_logical_ring_put_irq;
-	ring->emit_bb_start = gen8_emit_bb_start;
+	logical_ring_default_irqs(ring, GEN8_VCS2_IRQ_SHIFT);
+	logical_ring_default_vfuncs(dev, ring);
 
 	return logical_ring_init(dev, ring);
 }
@@ -2082,25 +2187,12 @@ static int logical_blt_ring_init(struct drm_device *dev)
 
 	ring->name = "blitter ring";
 	ring->id = BCS;
+	ring->exec_id = I915_EXEC_BLT;
+	ring->guc_id = GUC_BLITTER_ENGINE;
 	ring->mmio_base = BLT_RING_BASE;
-	ring->irq_enable_mask =
-		GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
-	ring->irq_keep_mask =
-		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
 
-	ring->init_hw = gen8_init_common_ring;
-	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
-		ring->get_seqno = bxt_a_get_seqno;
-		ring->set_seqno = bxt_a_set_seqno;
-	} else {
-		ring->get_seqno = gen8_get_seqno;
-		ring->set_seqno = gen8_set_seqno;
-	}
-	ring->emit_request = gen8_emit_request;
-	ring->emit_flush = gen8_emit_flush;
-	ring->irq_get = gen8_logical_ring_get_irq;
-	ring->irq_put = gen8_logical_ring_put_irq;
-	ring->emit_bb_start = gen8_emit_bb_start;
+	logical_ring_default_irqs(ring, GEN8_BCS_IRQ_SHIFT);
+	logical_ring_default_vfuncs(dev, ring);
 
 	return logical_ring_init(dev, ring);
 }
@@ -2112,25 +2204,12 @@ static int logical_vebox_ring_init(struct drm_device *dev)
 
 	ring->name = "video enhancement ring";
 	ring->id = VECS;
+	ring->exec_id = I915_EXEC_VEBOX;
+	ring->guc_id = GUC_VIDEOENHANCE_ENGINE;
 	ring->mmio_base = VEBOX_RING_BASE;
-	ring->irq_enable_mask =
-		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
-	ring->irq_keep_mask =
-		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
 
-	ring->init_hw = gen8_init_common_ring;
-	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
-		ring->get_seqno = bxt_a_get_seqno;
-		ring->set_seqno = bxt_a_set_seqno;
-	} else {
-		ring->get_seqno = gen8_get_seqno;
-		ring->set_seqno = gen8_set_seqno;
-	}
-	ring->emit_request = gen8_emit_request;
-	ring->emit_flush = gen8_emit_flush;
-	ring->irq_get = gen8_logical_ring_get_irq;
-	ring->irq_put = gen8_logical_ring_put_irq;
-	ring->emit_bb_start = gen8_emit_bb_start;
+	logical_ring_default_irqs(ring, GEN8_VECS_IRQ_SHIFT);
+	logical_ring_default_vfuncs(dev, ring);
 
 	return logical_ring_init(dev, ring);
 }
@@ -2235,6 +2314,27 @@ make_rpcs(struct drm_device *dev)
 	return rpcs;
 }
 
+static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *ring)
+{
+	u32 indirect_ctx_offset;
+
+	switch (INTEL_INFO(ring->dev)->gen) {
+	default:
+		MISSING_CASE(INTEL_INFO(ring->dev)->gen);
+		/* fall through */
+	case 9:
+		indirect_ctx_offset =
+			GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+		break;
+	case 8:
+		indirect_ctx_offset =
+			GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+		break;
+	}
+
+	return indirect_ctx_offset;
+}
+
 static int
 populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
 		    struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf)
@@ -2278,7 +2378,8 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 	ASSIGN_CTX_REG(reg_state, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(ring),
 		       _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
 					  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-					  CTX_CTRL_RS_CTX_ENABLE));
+					  (HAS_RESOURCE_STREAMER(dev) ?
+					    CTX_CTRL_RS_CTX_ENABLE : 0)));
 	ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(ring->mmio_base), 0);
 	ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(ring->mmio_base), 0);
 	/* Ring buffer start address is not known until the buffer is pinned.
@@ -2307,7 +2408,7 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 				(wa_ctx->indirect_ctx.size / CACHELINE_DWORDS);
 
 			reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] =
-				CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6;
+				intel_lr_indirect_ctx_offset(ring) << 6;
 
 			reg_state[CTX_BB_PER_CTX_PTR+1] =
 				(ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) |
@@ -2368,26 +2469,39 @@ void intel_lr_context_free(struct intel_context *ctx)
 {
 	int i;
 
-	for (i = 0; i < I915_NUM_RINGS; i++) {
+	for (i = I915_NUM_RINGS; --i >= 0; ) {
+		struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
 		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
 
-		if (ctx_obj) {
-			struct intel_ringbuffer *ringbuf =
-					ctx->engine[i].ringbuf;
-			struct intel_engine_cs *ring = ringbuf->ring;
+		if (!ctx_obj)
+			continue;
 
-			if (ctx == ring->default_context) {
-				intel_unpin_ringbuffer_obj(ringbuf);
-				i915_gem_object_ggtt_unpin(ctx_obj);
-			}
-			WARN_ON(ctx->engine[ring->id].pin_count);
-			intel_ringbuffer_free(ringbuf);
-			drm_gem_object_unreference(&ctx_obj->base);
+		if (ctx == ctx->i915->kernel_context) {
+			intel_unpin_ringbuffer_obj(ringbuf);
+			i915_gem_object_ggtt_unpin(ctx_obj);
 		}
+
+		WARN_ON(ctx->engine[i].pin_count);
+		intel_ringbuffer_free(ringbuf);
+		drm_gem_object_unreference(&ctx_obj->base);
 	}
 }
 
-static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
+/**
+ * intel_lr_context_size() - return the size of the context for an engine
+ * @ring: which engine to find the context size for
+ *
+ * Each engine may require a different amount of space for a context image,
+ * so when allocating (or copying) an image, this function can be used to
+ * find the right size for the specific engine.
+ *
+ * Return: size (in bytes) of an engine-specific context image
+ *
+ * Note: this size includes the HWSP, which is part of the context image
+ * in LRC mode, but does not include the "shared data page" used with
+ * GuC submission. The caller should account for this if using the GuC.
+ */
+uint32_t intel_lr_context_size(struct intel_engine_cs *ring)
 {
 	int ret = 0;
 
@@ -2444,7 +2558,7 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
  */
 
 int intel_lr_context_deferred_alloc(struct intel_context *ctx,
-				     struct intel_engine_cs *ring)
+				    struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_gem_object *ctx_obj;
@@ -2455,7 +2569,7 @@ int intel_lr_context_deferred_alloc(struct intel_context *ctx,
 	WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
 	WARN_ON(ctx->engine[ring->id].state);
 
-	context_size = round_up(get_lr_context_size(ring), 4096);
+	context_size = round_up(intel_lr_context_size(ring), 4096);
 
 	/* One extra page as the sharing data between driver and GuC */
 	context_size += PAGE_SIZE * LRC_PPHWSP_PN;
@@ -2481,14 +2595,13 @@ int intel_lr_context_deferred_alloc(struct intel_context *ctx,
 	ctx->engine[ring->id].ringbuf = ringbuf;
 	ctx->engine[ring->id].state = ctx_obj;
 
-	if (ctx != ring->default_context && ring->init_context) {
+	if (ctx != ctx->i915->kernel_context && ring->init_context) {
 		struct drm_i915_gem_request *req;
 
-		ret = i915_gem_request_alloc(ring,
-			ctx, &req);
-		if (ret) {
-			DRM_ERROR("ring create req: %d\n",
-				ret);
+		req = i915_gem_request_alloc(ring, ctx);
+		if (IS_ERR(req)) {
+			ret = PTR_ERR(req);
+			DRM_ERROR("ring create req: %d\n", ret);
 			goto error_ringbuf;
 		}
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 0b821b91723a..e6cda3e225d0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -25,8 +25,6 @@
 #define _INTEL_LRC_H_
 
 #define GEN8_LR_CONTEXT_ALIGN 4096
-#define GEN8_CSB_ENTRIES 6
-#define GEN8_CSB_PTR_MASK 0x07
 
 /* Execlists regs */
 #define RING_ELSP(ring)				_MMIO((ring)->mmio_base + 0x230)
@@ -40,6 +38,22 @@
 #define RING_CONTEXT_STATUS_BUF_HI(ring, i)	_MMIO((ring)->mmio_base + 0x370 + (i) * 8 + 4)
 #define RING_CONTEXT_STATUS_PTR(ring)		_MMIO((ring)->mmio_base + 0x3a0)
 
+/* The docs specify that the write pointer wraps around after 5h, "After status
+ * is written out to the last available status QW at offset 5h, this pointer
+ * wraps to 0."
+ *
+ * Therefore, one must infer than even though there are 3 bits available, 6 and
+ * 7 appear to be * reserved.
+ */
+#define GEN8_CSB_ENTRIES 6
+#define GEN8_CSB_PTR_MASK 0x7
+#define GEN8_CSB_READ_PTR_MASK (GEN8_CSB_PTR_MASK << 8)
+#define GEN8_CSB_WRITE_PTR_MASK (GEN8_CSB_PTR_MASK << 0)
+#define GEN8_CSB_WRITE_PTR(csb_status) \
+	(((csb_status) & GEN8_CSB_WRITE_PTR_MASK) >> 0)
+#define GEN8_CSB_READ_PTR(csb_status) \
+	(((csb_status) & GEN8_CSB_READ_PTR_MASK) >> 8)
+
 /* Logical Rings */
 int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request);
 int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request);
@@ -84,21 +98,25 @@ static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer *ringbuf,
 #define LRC_STATE_PN	(LRC_PPHWSP_PN + 1)
 
 void intel_lr_context_free(struct intel_context *ctx);
+uint32_t intel_lr_context_size(struct intel_engine_cs *ring);
 int intel_lr_context_deferred_alloc(struct intel_context *ctx,
 				    struct intel_engine_cs *ring);
-void intel_lr_context_unpin(struct drm_i915_gem_request *req);
+void intel_lr_context_unpin(struct intel_context *ctx,
+			    struct intel_engine_cs *engine);
 void intel_lr_context_reset(struct drm_device *dev,
 			struct intel_context *ctx);
 uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
 				     struct intel_engine_cs *ring);
 
+u32 intel_execlists_ctx_id(struct intel_context *ctx,
+			   struct intel_engine_cs *ring);
+
 /* Execlists */
 int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
 struct i915_execbuffer_params;
 int intel_execlists_submission(struct i915_execbuffer_params *params,
 			       struct drm_i915_gem_execbuffer2 *args,
 			       struct list_head *vmas);
-u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
 
 void intel_lrc_irq_handler(struct intel_engine_cs *ring);
 void intel_execlists_retire_requests(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index bc04d8d29acb..30a8403a8f4f 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -31,6 +31,7 @@
 #include <linux/dmi.h>
 #include <linux/i2c.h>
 #include <linux/slab.h>
+#include <linux/vga_switcheroo.h>
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
@@ -1088,7 +1089,12 @@ void intel_lvds_init(struct drm_device *dev)
 	 * preferred mode is the right one.
 	 */
 	mutex_lock(&dev->mode_config.mutex);
-	edid = drm_get_edid(connector, intel_gmbus_get_adapter(dev_priv, pin));
+	if (vga_switcheroo_handler_flags() & VGA_SWITCHEROO_CAN_SWITCH_DDC)
+		edid = drm_get_edid_switcheroo(connector,
+				    intel_gmbus_get_adapter(dev_priv, pin));
+	else
+		edid = drm_get_edid(connector,
+				    intel_gmbus_get_adapter(dev_priv, pin));
 	if (edid) {
 		if (drm_add_edid_modes(connector, edid)) {
 			drm_mode_connector_update_edid_property(connector,
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 76f1980a7541..9168413fe204 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -240,9 +240,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 	WARN_ON(overlay->active);
 	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
 
-	ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-	if (ret)
-		return ret;
+	req = i915_gem_request_alloc(ring, NULL);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
 
 	ret = intel_ring_begin(req, 4);
 	if (ret) {
@@ -283,9 +283,9 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 	if (tmp & (1 << 17))
 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
 
-	ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-	if (ret)
-		return ret;
+	req = i915_gem_request_alloc(ring, NULL);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
 
 	ret = intel_ring_begin(req, 2);
 	if (ret) {
@@ -349,9 +349,9 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	 * of the hw. Do it in both cases */
 	flip_addr |= OFC_UPDATE;
 
-	ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-	if (ret)
-		return ret;
+	req = i915_gem_request_alloc(ring, NULL);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
 
 	ret = intel_ring_begin(req, 6);
 	if (ret) {
@@ -423,9 +423,9 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 		/* synchronous slowpath */
 		struct drm_i915_gem_request *req;
 
-		ret = i915_gem_request_alloc(ring, ring->default_context, &req);
-		if (ret)
-			return ret;
+		req = i915_gem_request_alloc(ring, NULL);
+		if (IS_ERR(req))
+			return PTR_ERR(req);
 
 		ret = intel_ring_begin(req, 2);
 		if (ret) {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b28c29f20e75..347d4df49a9b 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -32,6 +32,8 @@
 #include <linux/module.h>
 
 /**
+ * DOC: RC6
+ *
  * RC6 is a special power stage which allows the GPU to enter an very
  * low-voltage mode when idle, using down to 0V while at this stage.  This
  * stage is entered automatically when the GPU is idle when RC6 support is
@@ -546,7 +548,7 @@ static const struct intel_watermark_params i845_wm_info = {
  * intel_calculate_wm - calculate watermark level
  * @clock_in_khz: pixel clock
  * @wm: chip FIFO params
- * @pixel_size: display pixel size
+ * @cpp: bytes per pixel
  * @latency_ns: memory latency for the platform
  *
  * Calculate the watermark level (the level at which the display plane will
@@ -562,8 +564,7 @@ static const struct intel_watermark_params i845_wm_info = {
  */
 static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
 					const struct intel_watermark_params *wm,
-					int fifo_size,
-					int pixel_size,
+					int fifo_size, int cpp,
 					unsigned long latency_ns)
 {
 	long entries_required, wm_size;
@@ -574,7 +575,7 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
 	 * clocks go from a few thousand to several hundred thousand.
 	 * latency is usually a few thousand
 	 */
-	entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
+	entries_required = ((clock_in_khz / 1000) * cpp * latency_ns) /
 		1000;
 	entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
 
@@ -638,13 +639,13 @@ static void pineview_update_wm(struct drm_crtc *unused_crtc)
 	crtc = single_enabled_crtc(dev);
 	if (crtc) {
 		const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
-		int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
+		int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
 		int clock = adjusted_mode->crtc_clock;
 
 		/* Display SR */
 		wm = intel_calculate_wm(clock, &pineview_display_wm,
 					pineview_display_wm.fifo_size,
-					pixel_size, latency->display_sr);
+					cpp, latency->display_sr);
 		reg = I915_READ(DSPFW1);
 		reg &= ~DSPFW_SR_MASK;
 		reg |= FW_WM(wm, SR);
@@ -654,7 +655,7 @@ static void pineview_update_wm(struct drm_crtc *unused_crtc)
 		/* cursor SR */
 		wm = intel_calculate_wm(clock, &pineview_cursor_wm,
 					pineview_display_wm.fifo_size,
-					pixel_size, latency->cursor_sr);
+					cpp, latency->cursor_sr);
 		reg = I915_READ(DSPFW3);
 		reg &= ~DSPFW_CURSOR_SR_MASK;
 		reg |= FW_WM(wm, CURSOR_SR);
@@ -663,7 +664,7 @@ static void pineview_update_wm(struct drm_crtc *unused_crtc)
 		/* Display HPLL off SR */
 		wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
 					pineview_display_hplloff_wm.fifo_size,
-					pixel_size, latency->display_hpll_disable);
+					cpp, latency->display_hpll_disable);
 		reg = I915_READ(DSPFW3);
 		reg &= ~DSPFW_HPLL_SR_MASK;
 		reg |= FW_WM(wm, HPLL_SR);
@@ -672,7 +673,7 @@ static void pineview_update_wm(struct drm_crtc *unused_crtc)
 		/* cursor HPLL off SR */
 		wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
 					pineview_display_hplloff_wm.fifo_size,
-					pixel_size, latency->cursor_hpll_disable);
+					cpp, latency->cursor_hpll_disable);
 		reg = I915_READ(DSPFW3);
 		reg &= ~DSPFW_HPLL_CURSOR_MASK;
 		reg |= FW_WM(wm, HPLL_CURSOR);
@@ -696,7 +697,7 @@ static bool g4x_compute_wm0(struct drm_device *dev,
 {
 	struct drm_crtc *crtc;
 	const struct drm_display_mode *adjusted_mode;
-	int htotal, hdisplay, clock, pixel_size;
+	int htotal, hdisplay, clock, cpp;
 	int line_time_us, line_count;
 	int entries, tlb_miss;
 
@@ -711,10 +712,10 @@ static bool g4x_compute_wm0(struct drm_device *dev,
 	clock = adjusted_mode->crtc_clock;
 	htotal = adjusted_mode->crtc_htotal;
 	hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
-	pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
+	cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
 
 	/* Use the small buffer method to calculate plane watermark */
-	entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
+	entries = ((clock * cpp / 1000) * display_latency_ns) / 1000;
 	tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
 	if (tlb_miss > 0)
 		entries += tlb_miss;
@@ -726,7 +727,7 @@ static bool g4x_compute_wm0(struct drm_device *dev,
 	/* Use the large buffer method to calculate cursor watermark */
 	line_time_us = max(htotal * 1000 / clock, 1);
 	line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
-	entries = line_count * crtc->cursor->state->crtc_w * pixel_size;
+	entries = line_count * crtc->cursor->state->crtc_w * cpp;
 	tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
 	if (tlb_miss > 0)
 		entries += tlb_miss;
@@ -782,7 +783,7 @@ static bool g4x_compute_srwm(struct drm_device *dev,
 {
 	struct drm_crtc *crtc;
 	const struct drm_display_mode *adjusted_mode;
-	int hdisplay, htotal, pixel_size, clock;
+	int hdisplay, htotal, cpp, clock;
 	unsigned long line_time_us;
 	int line_count, line_size;
 	int small, large;
@@ -798,21 +799,21 @@ static bool g4x_compute_srwm(struct drm_device *dev,
 	clock = adjusted_mode->crtc_clock;
 	htotal = adjusted_mode->crtc_htotal;
 	hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
-	pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
+	cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
 
 	line_time_us = max(htotal * 1000 / clock, 1);
 	line_count = (latency_ns / line_time_us + 1000) / 1000;
-	line_size = hdisplay * pixel_size;
+	line_size = hdisplay * cpp;
 
 	/* Use the minimum of the small and large buffer method for primary */
-	small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
+	small = ((clock * cpp / 1000) * latency_ns) / 1000;
 	large = line_count * line_size;
 
 	entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
 	*display_wm = entries + display->guard_size;
 
 	/* calculate the self-refresh watermark for display cursor */
-	entries = line_count * pixel_size * crtc->cursor->state->crtc_w;
+	entries = line_count * cpp * crtc->cursor->state->crtc_w;
 	entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
 	*cursor_wm = entries + cursor->guard_size;
 
@@ -904,13 +905,13 @@ enum vlv_wm_level {
 static unsigned int vlv_wm_method2(unsigned int pixel_rate,
 				   unsigned int pipe_htotal,
 				   unsigned int horiz_pixels,
-				   unsigned int bytes_per_pixel,
+				   unsigned int cpp,
 				   unsigned int latency)
 {
 	unsigned int ret;
 
 	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
-	ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
+	ret = (ret + 1) * horiz_pixels * cpp;
 	ret = DIV_ROUND_UP(ret, 64);
 
 	return ret;
@@ -939,7 +940,7 @@ static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
 				     int level)
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	int clock, htotal, pixel_size, width, wm;
+	int clock, htotal, cpp, width, wm;
 
 	if (dev_priv->wm.pri_latency[level] == 0)
 		return USHRT_MAX;
@@ -947,7 +948,7 @@ static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
 	if (!state->visible)
 		return 0;
 
-	pixel_size = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
+	cpp = drm_format_plane_cpp(state->base.fb->pixel_format, 0);
 	clock = crtc->config->base.adjusted_mode.crtc_clock;
 	htotal = crtc->config->base.adjusted_mode.crtc_htotal;
 	width = crtc->config->pipe_src_w;
@@ -963,7 +964,7 @@ static uint16_t vlv_compute_wm_level(struct intel_plane *plane,
 		 */
 		wm = 63;
 	} else {
-		wm = vlv_wm_method2(clock, htotal, width, pixel_size,
+		wm = vlv_wm_method2(clock, htotal, width, cpp,
 				    dev_priv->wm.pri_latency[level] * 10);
 	}
 
@@ -1437,7 +1438,7 @@ static void i965_update_wm(struct drm_crtc *unused_crtc)
 		int clock = adjusted_mode->crtc_clock;
 		int htotal = adjusted_mode->crtc_htotal;
 		int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
-		int pixel_size = crtc->primary->state->fb->bits_per_pixel / 8;
+		int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
 		unsigned long line_time_us;
 		int entries;
 
@@ -1445,7 +1446,7 @@ static void i965_update_wm(struct drm_crtc *unused_crtc)
 
 		/* Use ns/us then divide to preserve precision */
 		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
-			pixel_size * hdisplay;
+			cpp * hdisplay;
 		entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
 		srwm = I965_FIFO_SIZE - entries;
 		if (srwm < 0)
@@ -1455,7 +1456,7 @@ static void i965_update_wm(struct drm_crtc *unused_crtc)
 			      entries, srwm);
 
 		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
-			pixel_size * crtc->cursor->state->crtc_w;
+			cpp * crtc->cursor->state->crtc_w;
 		entries = DIV_ROUND_UP(entries,
 					  i965_cursor_wm_info.cacheline_size);
 		cursor_sr = i965_cursor_wm_info.fifo_size -
@@ -1516,7 +1517,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
 	crtc = intel_get_crtc_for_plane(dev, 0);
 	if (intel_crtc_active(crtc)) {
 		const struct drm_display_mode *adjusted_mode;
-		int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
+		int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
 		if (IS_GEN2(dev))
 			cpp = 4;
 
@@ -1538,7 +1539,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
 	crtc = intel_get_crtc_for_plane(dev, 1);
 	if (intel_crtc_active(crtc)) {
 		const struct drm_display_mode *adjusted_mode;
-		int cpp = crtc->primary->state->fb->bits_per_pixel / 8;
+		int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0);
 		if (IS_GEN2(dev))
 			cpp = 4;
 
@@ -1584,7 +1585,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
 		int clock = adjusted_mode->crtc_clock;
 		int htotal = adjusted_mode->crtc_htotal;
 		int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w;
-		int pixel_size = enabled->primary->state->fb->bits_per_pixel / 8;
+		int cpp = drm_format_plane_cpp(enabled->primary->state->fb->pixel_format, 0);
 		unsigned long line_time_us;
 		int entries;
 
@@ -1592,7 +1593,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc)
 
 		/* Use ns/us then divide to preserve precision */
 		entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
-			pixel_size * hdisplay;
+			cpp * hdisplay;
 		entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
 		DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
 		srwm = wm_info->fifo_size - entries;
@@ -1672,6 +1673,9 @@ uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
 		if (pipe_h < pfit_h)
 			pipe_h = pfit_h;
 
+		if (WARN_ON(!pfit_w || !pfit_h))
+			return pixel_rate;
+
 		pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
 				     pfit_w * pfit_h);
 	}
@@ -1680,15 +1684,14 @@ uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
 }
 
 /* latency must be in 0.1us units. */
-static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
-			       uint32_t latency)
+static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency)
 {
 	uint64_t ret;
 
 	if (WARN(latency == 0, "Latency value missing\n"))
 		return UINT_MAX;
 
-	ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
+	ret = (uint64_t) pixel_rate * cpp * latency;
 	ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
 
 	return ret;
@@ -1696,24 +1699,37 @@ static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
 
 /* latency must be in 0.1us units. */
 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
-			       uint32_t horiz_pixels, uint8_t bytes_per_pixel,
+			       uint32_t horiz_pixels, uint8_t cpp,
 			       uint32_t latency)
 {
 	uint32_t ret;
 
 	if (WARN(latency == 0, "Latency value missing\n"))
 		return UINT_MAX;
+	if (WARN_ON(!pipe_htotal))
+		return UINT_MAX;
 
 	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
-	ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
+	ret = (ret + 1) * horiz_pixels * cpp;
 	ret = DIV_ROUND_UP(ret, 64) + 2;
 	return ret;
 }
 
 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
-			   uint8_t bytes_per_pixel)
+			   uint8_t cpp)
 {
-	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
+	/*
+	 * Neither of these should be possible since this function shouldn't be
+	 * called if the CRTC is off or the plane is invisible.  But let's be
+	 * extra paranoid to avoid a potential divide-by-zero if we screw up
+	 * elsewhere in the driver.
+	 */
+	if (WARN_ON(!cpp))
+		return 0;
+	if (WARN_ON(!horiz_pixels))
+		return 0;
+
+	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * cpp) + 2;
 }
 
 struct ilk_wm_maximums {
@@ -1732,13 +1748,14 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
 				   uint32_t mem_value,
 				   bool is_lp)
 {
-	int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
+	int cpp = pstate->base.fb ?
+		drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0;
 	uint32_t method1, method2;
 
 	if (!cstate->base.active || !pstate->visible)
 		return 0;
 
-	method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
+	method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value);
 
 	if (!is_lp)
 		return method1;
@@ -1746,8 +1763,7 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
 	method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
 				 cstate->base.adjusted_mode.crtc_htotal,
 				 drm_rect_width(&pstate->dst),
-				 bpp,
-				 mem_value);
+				 cpp, mem_value);
 
 	return min(method1, method2);
 }
@@ -1760,18 +1776,18 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
 				   const struct intel_plane_state *pstate,
 				   uint32_t mem_value)
 {
-	int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
+	int cpp = pstate->base.fb ?
+		drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0;
 	uint32_t method1, method2;
 
 	if (!cstate->base.active || !pstate->visible)
 		return 0;
 
-	method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), bpp, mem_value);
+	method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value);
 	method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate),
 				 cstate->base.adjusted_mode.crtc_htotal,
 				 drm_rect_width(&pstate->dst),
-				 bpp,
-				 mem_value);
+				 cpp, mem_value);
 	return min(method1, method2);
 }
 
@@ -1804,12 +1820,13 @@ static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
 				   const struct intel_plane_state *pstate,
 				   uint32_t pri_val)
 {
-	int bpp = pstate->base.fb ? pstate->base.fb->bits_per_pixel / 8 : 0;
+	int cpp = pstate->base.fb ?
+		drm_format_plane_cpp(pstate->base.fb->pixel_format, 0) : 0;
 
 	if (!cstate->base.active || !pstate->visible)
 		return 0;
 
-	return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), bpp);
+	return ilk_wm_fbc(pri_val, drm_rect_width(&pstate->dst), cpp);
 }
 
 static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
@@ -2002,14 +2019,19 @@ static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
 }
 
 static uint32_t
-hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
+hsw_compute_linetime_wm(struct drm_device *dev,
+			struct intel_crtc_state *cstate)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	const struct drm_display_mode *adjusted_mode = &intel_crtc->config->base.adjusted_mode;
+	const struct drm_display_mode *adjusted_mode =
+		&cstate->base.adjusted_mode;
 	u32 linetime, ips_linetime;
 
-	if (!intel_crtc->active)
+	if (!cstate->base.active)
+		return 0;
+	if (WARN_ON(adjusted_mode->crtc_clock == 0))
+		return 0;
+	if (WARN_ON(dev_priv->cdclk_freq == 0))
 		return 0;
 
 	/* The WM are computed with base on how long it takes to fill a single
@@ -2281,6 +2303,7 @@ static int ilk_compute_pipe_wm(struct intel_crtc *intel_crtc,
 		return PTR_ERR(cstate);
 
 	pipe_wm = &cstate->wm.optimal.ilk;
+	memset(pipe_wm, 0, sizeof(*pipe_wm));
 
 	for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
 		ps = drm_atomic_get_plane_state(state,
@@ -2317,8 +2340,7 @@ static int ilk_compute_pipe_wm(struct intel_crtc *intel_crtc,
 			     pristate, sprstate, curstate, &pipe_wm->wm[0]);
 
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
-		pipe_wm->linetime = hsw_compute_linetime_wm(dev,
-							    &intel_crtc->base);
+		pipe_wm->linetime = hsw_compute_linetime_wm(dev, cstate);
 
 	/* LP0 watermarks always use 1/2 DDB partitioning */
 	ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
@@ -3028,26 +3050,25 @@ static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
 
 /*
  * The max latency should be 257 (max the punit can code is 255 and we add 2us
- * for the read latency) and bytes_per_pixel should always be <= 8, so that
+ * for the read latency) and cpp should always be <= 8, so that
  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
 */
-static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
-			       uint32_t latency)
+static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency)
 {
 	uint32_t wm_intermediate_val, ret;
 
 	if (latency == 0)
 		return UINT_MAX;
 
-	wm_intermediate_val = latency * pixel_rate * bytes_per_pixel / 512;
+	wm_intermediate_val = latency * pixel_rate * cpp / 512;
 	ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
 
 	return ret;
 }
 
 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
-			       uint32_t horiz_pixels, uint8_t bytes_per_pixel,
+			       uint32_t horiz_pixels, uint8_t cpp,
 			       uint64_t tiling, uint32_t latency)
 {
 	uint32_t ret;
@@ -3057,7 +3078,7 @@ static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
 	if (latency == 0)
 		return UINT_MAX;
 
-	plane_bytes_per_line = horiz_pixels * bytes_per_pixel;
+	plane_bytes_per_line = horiz_pixels * cpp;
 
 	if (tiling == I915_FORMAT_MOD_Y_TILED ||
 	    tiling == I915_FORMAT_MOD_Yf_TILED) {
@@ -3107,23 +3128,21 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	uint32_t plane_bytes_per_line, plane_blocks_per_line;
 	uint32_t res_blocks, res_lines;
 	uint32_t selected_result;
-	uint8_t bytes_per_pixel;
+	uint8_t cpp;
 
 	if (latency == 0 || !cstate->base.active || !fb)
 		return false;
 
-	bytes_per_pixel = drm_format_plane_cpp(fb->pixel_format, 0);
+	cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 	method1 = skl_wm_method1(skl_pipe_pixel_rate(cstate),
-				 bytes_per_pixel,
-				 latency);
+				 cpp, latency);
 	method2 = skl_wm_method2(skl_pipe_pixel_rate(cstate),
 				 cstate->base.adjusted_mode.crtc_htotal,
 				 cstate->pipe_src_w,
-				 bytes_per_pixel,
-				 fb->modifier[0],
+				 cpp, fb->modifier[0],
 				 latency);
 
-	plane_bytes_per_line = cstate->pipe_src_w * bytes_per_pixel;
+	plane_bytes_per_line = cstate->pipe_src_w * cpp;
 	plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
 
 	if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
@@ -3131,11 +3150,11 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 		uint32_t min_scanlines = 4;
 		uint32_t y_tile_minimum;
 		if (intel_rotation_90_or_270(plane->state->rotation)) {
-			int bpp = (fb->pixel_format == DRM_FORMAT_NV12) ?
+			int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ?
 				drm_format_plane_cpp(fb->pixel_format, 1) :
 				drm_format_plane_cpp(fb->pixel_format, 0);
 
-			switch (bpp) {
+			switch (cpp) {
 			case 1:
 				min_scanlines = 16;
 				break;
@@ -3606,23 +3625,45 @@ static void skl_update_wm(struct drm_crtc *crtc)
 	dev_priv->wm.skl_hw = *results;
 }
 
-static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
+static void ilk_compute_wm_config(struct drm_device *dev,
+				  struct intel_wm_config *config)
 {
-	struct drm_device *dev = dev_priv->dev;
+	struct intel_crtc *crtc;
+
+	/* Compute the currently _active_ config */
+	for_each_intel_crtc(dev, crtc) {
+		const struct intel_pipe_wm *wm = &crtc->wm.active.ilk;
+
+		if (!wm->pipe_enabled)
+			continue;
+
+		config->sprites_enabled |= wm->sprites_enabled;
+		config->sprites_scaled |= wm->sprites_scaled;
+		config->num_pipes_active++;
+	}
+}
+
+static void ilk_program_watermarks(struct intel_crtc_state *cstate)
+{
+	struct drm_crtc *crtc = cstate->base.crtc;
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
 	struct ilk_wm_maximums max;
-	struct intel_wm_config *config = &dev_priv->wm.config;
+	struct intel_wm_config config = {};
 	struct ilk_wm_values results = {};
 	enum intel_ddb_partitioning partitioning;
 
-	ilk_compute_wm_maximums(dev, 1, config, INTEL_DDB_PART_1_2, &max);
-	ilk_wm_merge(dev, config, &max, &lp_wm_1_2);
+	ilk_compute_wm_config(dev, &config);
+
+	ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
+	ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
 
 	/* 5/6 split only in single pipe config on IVB+ */
 	if (INTEL_INFO(dev)->gen >= 7 &&
-	    config->num_pipes_active == 1 && config->sprites_enabled) {
-		ilk_compute_wm_maximums(dev, 1, config, INTEL_DDB_PART_5_6, &max);
-		ilk_wm_merge(dev, config, &max, &lp_wm_5_6);
+	    config.num_pipes_active == 1 && config.sprites_enabled) {
+		ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
+		ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
 
 		best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
 	} else {
@@ -3639,7 +3680,6 @@ static void ilk_program_watermarks(struct drm_i915_private *dev_priv)
 
 static void ilk_update_wm(struct drm_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
 
@@ -3659,7 +3699,7 @@ static void ilk_update_wm(struct drm_crtc *crtc)
 
 	intel_crtc->wm.active.ilk = cstate->wm.optimal.ilk;
 
-	ilk_program_watermarks(dev_priv);
+	ilk_program_watermarks(cstate);
 }
 
 static void skl_pipe_wm_active_state(uint32_t val,
@@ -4045,7 +4085,7 @@ void intel_update_watermarks(struct drm_crtc *crtc)
 		dev_priv->display.update_wm(crtc);
 }
 
-/**
+/*
  * Lock protecting IPS related data structures
  */
 DEFINE_SPINLOCK(mchdev_lock);
@@ -4081,11 +4121,13 @@ bool ironlake_set_drps(struct drm_device *dev, u8 val)
 static void ironlake_enable_drps(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 rgvmodectl = I915_READ(MEMMODECTL);
+	u32 rgvmodectl;
 	u8 fmax, fmin, fstart, vstart;
 
 	spin_lock_irq(&mchdev_lock);
 
+	rgvmodectl = I915_READ(MEMMODECTL);
+
 	/* Enable temp reporting */
 	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
 	I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
@@ -4518,21 +4560,71 @@ static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
 	}
 	if (HAS_RC6p(dev))
 		DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n",
-			      (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
-			      (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
-			      (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
+			      onoff(mode & GEN6_RC_CTL_RC6_ENABLE),
+			      onoff(mode & GEN6_RC_CTL_RC6p_ENABLE),
+			      onoff(mode & GEN6_RC_CTL_RC6pp_ENABLE));
 
 	else
 		DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n",
-			      (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
+			      onoff(mode & GEN6_RC_CTL_RC6_ENABLE));
 }
 
-static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
+static bool bxt_check_bios_rc6_setup(const struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	bool enable_rc6 = true;
+	unsigned long rc6_ctx_base;
+
+	if (!(I915_READ(RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
+		DRM_DEBUG_KMS("RC6 Base location not set properly.\n");
+		enable_rc6 = false;
+	}
+
+	/*
+	 * The exact context size is not known for BXT, so assume a page size
+	 * for this check.
+	 */
+	rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
+	if (!((rc6_ctx_base >= dev_priv->gtt.stolen_reserved_base) &&
+	      (rc6_ctx_base + PAGE_SIZE <= dev_priv->gtt.stolen_reserved_base +
+					dev_priv->gtt.stolen_reserved_size))) {
+		DRM_DEBUG_KMS("RC6 Base address not as expected.\n");
+		enable_rc6 = false;
+	}
+
+	if (!(((I915_READ(PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1) &&
+	      ((I915_READ(PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1) &&
+	      ((I915_READ(PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1) &&
+	      ((I915_READ(PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1))) {
+		DRM_DEBUG_KMS("Engine Idle wait time not set properly.\n");
+		enable_rc6 = false;
+	}
+
+	if (!(I915_READ(GEN6_RC_CONTROL) & (GEN6_RC_CTL_RC6_ENABLE |
+					    GEN6_RC_CTL_HW_ENABLE)) &&
+	    ((I915_READ(GEN6_RC_CONTROL) & GEN6_RC_CTL_HW_ENABLE) ||
+	     !(I915_READ(GEN6_RC_STATE) & RC6_STATE))) {
+		DRM_DEBUG_KMS("HW/SW RC6 is not enabled by BIOS.\n");
+		enable_rc6 = false;
+	}
+
+	return enable_rc6;
+}
+
+int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
 {
 	/* No RC6 before Ironlake and code is gone for ilk. */
 	if (INTEL_INFO(dev)->gen < 6)
 		return 0;
 
+	if (!enable_rc6)
+		return 0;
+
+	if (IS_BROXTON(dev) && !bxt_check_bios_rc6_setup(dev)) {
+		DRM_INFO("RC6 disabled by BIOS\n");
+		return 0;
+	}
+
 	/* Respect the kernel parameter if it is set */
 	if (enable_rc6 >= 0) {
 		int mask;
@@ -4702,8 +4794,7 @@ static void gen9_enable_rc6(struct drm_device *dev)
 	/* 3a: Enable RC6 */
 	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
 		rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
-	DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
-			"on" : "off");
+	DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE));
 	/* WaRsUseTimeoutMode */
 	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
 	    IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
@@ -4722,8 +4813,7 @@ static void gen9_enable_rc6(struct drm_device *dev)
 	 * 3b: Enable Coarse Power Gating only when RC6 is enabled.
 	 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
 	 */
-	if ((IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) ||
-	    ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && (INTEL_REVID(dev) <= SKL_REVID_F0)))
+	if (NEEDS_WaRsDisableCoarsePowerGating(dev))
 		I915_WRITE(GEN9_PG_ENABLE, 0);
 	else
 		I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
@@ -5146,8 +5236,6 @@ static void cherryview_setup_pctx(struct drm_device *dev)
 	u32 pcbr;
 	int pctx_size = 32*1024;
 
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
-
 	pcbr = I915_READ(VLV_PCBR);
 	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
 		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
@@ -5169,7 +5257,7 @@ static void valleyview_setup_pctx(struct drm_device *dev)
 	u32 pcbr;
 	int pctx_size = 24*1024;
 
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	mutex_lock(&dev->struct_mutex);
 
 	pcbr = I915_READ(VLV_PCBR);
 	if (pcbr) {
@@ -5197,7 +5285,7 @@ static void valleyview_setup_pctx(struct drm_device *dev)
 	pctx = i915_gem_object_create_stolen(dev, pctx_size);
 	if (!pctx) {
 		DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
-		return;
+		goto out;
 	}
 
 	pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
@@ -5206,6 +5294,7 @@ static void valleyview_setup_pctx(struct drm_device *dev)
 out:
 	DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
 	dev_priv->vlv_pctx = pctx;
+	mutex_unlock(&dev->struct_mutex);
 }
 
 static void valleyview_cleanup_pctx(struct drm_device *dev)
@@ -5215,7 +5304,7 @@ static void valleyview_cleanup_pctx(struct drm_device *dev)
 	if (WARN_ON(!dev_priv->vlv_pctx))
 		return;
 
-	drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
+	drm_gem_object_unreference_unlocked(&dev_priv->vlv_pctx->base);
 	dev_priv->vlv_pctx = NULL;
 }
 
@@ -6024,7 +6113,6 @@ void intel_init_gt_powersave(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
 	/*
 	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
 	 * requirement.
@@ -6159,8 +6247,8 @@ void intel_enable_gt_powersave(struct drm_device *dev)
 		return;
 
 	if (IS_IRONLAKE_M(dev)) {
-		mutex_lock(&dev->struct_mutex);
 		ironlake_enable_drps(dev);
+		mutex_lock(&dev->struct_mutex);
 		intel_init_emon(dev);
 		mutex_unlock(&dev->struct_mutex);
 	} else if (INTEL_INFO(dev)->gen >= 6) {
@@ -6990,6 +7078,7 @@ void intel_init_pm(struct drm_device *dev)
 		     dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
 			dev_priv->display.update_wm = ilk_update_wm;
 			dev_priv->display.compute_pipe_wm = ilk_compute_pipe_wm;
+			dev_priv->display.program_watermarks = ilk_program_watermarks;
 		} else {
 			DRM_DEBUG_KMS("Failed to read display plane latency. "
 				      "Disable CxSR\n");
@@ -7155,9 +7244,10 @@ static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
 {
 	int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
 
-	div = vlv_gpu_freq_div(czclk_freq) / 2;
+	div = vlv_gpu_freq_div(czclk_freq);
 	if (div < 0)
 		return div;
+	div /= 2;
 
 	return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2;
 }
@@ -7166,9 +7256,10 @@ static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
 {
 	int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->czclk_freq, 1000);
 
-	mul = vlv_gpu_freq_div(czclk_freq) / 2;
+	mul = vlv_gpu_freq_div(czclk_freq);
 	if (mul < 0)
 		return mul;
+	mul /= 2;
 
 	/* CHV needs even values */
 	return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2;
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index 9ccff3011523..0b42ada338c8 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -225,7 +225,12 @@ static void hsw_psr_enable_sink(struct intel_dp *intel_dp)
 		   (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT));
 	}
 
-	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, DP_PSR_ENABLE);
+	if (dev_priv->psr.link_standby)
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
+				   DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE);
+	else
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
+				   DP_PSR_ENABLE);
 }
 
 static void vlv_psr_enable_source(struct intel_dp *intel_dp)
@@ -280,6 +285,9 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp)
 	if (IS_HASWELL(dev))
 		val |= EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES;
 
+	if (dev_priv->psr.link_standby)
+		val |= EDP_PSR_LINK_STANDBY;
+
 	I915_WRITE(EDP_PSR_CTL, val |
 		   max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT |
 		   idle_frames << EDP_PSR_IDLE_FRAME_SHIFT |
@@ -304,8 +312,15 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp)
 
 	dev_priv->psr.source_ok = false;
 
-	if (IS_HASWELL(dev) && dig_port->port != PORT_A) {
-		DRM_DEBUG_KMS("HSW ties PSR to DDI A (eDP)\n");
+	/*
+	 * HSW spec explicitly says PSR is tied to port A.
+	 * BDW+ platforms with DDI implementation of PSR have different
+	 * PSR registers per transcoder and we only implement transcoder EDP
+	 * ones. Since by Display design transcoder EDP is tied to port A
+	 * we can safely escape based on the port A.
+	 */
+	if (HAS_DDI(dev) && dig_port->port != PORT_A) {
+		DRM_DEBUG_KMS("PSR condition failed: Port not supported\n");
 		return false;
 	}
 
@@ -314,6 +329,12 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp)
 		return false;
 	}
 
+	if ((IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) &&
+	    !dev_priv->psr.link_standby) {
+		DRM_ERROR("PSR condition failed: Link off requested but not supported on this platform\n");
+		return false;
+	}
+
 	if (IS_HASWELL(dev) &&
 	    I915_READ(HSW_STEREO_3D_CTL(intel_crtc->config->cpu_transcoder)) &
 		      S3D_ENABLE) {
@@ -327,12 +348,6 @@ static bool intel_psr_match_conditions(struct intel_dp *intel_dp)
 		return false;
 	}
 
-	if (!IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
-	    ((dev_priv->vbt.psr.full_link) || (dig_port->port != PORT_A))) {
-		DRM_DEBUG_KMS("PSR condition failed: Link Standby requested/needed but not supported on this platform\n");
-		return false;
-	}
-
 	dev_priv->psr.source_ok = true;
 	return true;
 }
@@ -763,6 +778,36 @@ void intel_psr_init(struct drm_device *dev)
 	dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ?
 		HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE;
 
+	/* Per platform default */
+	if (i915.enable_psr == -1) {
+		if (IS_HASWELL(dev) || IS_BROADWELL(dev) ||
+		    IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
+			i915.enable_psr = 1;
+		else
+			i915.enable_psr = 0;
+	}
+
+	/* Set link_standby x link_off defaults */
+	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+		/* HSW and BDW require workarounds that we don't implement. */
+		dev_priv->psr.link_standby = false;
+	else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev))
+		/* On VLV and CHV only standby mode is supported. */
+		dev_priv->psr.link_standby = true;
+	else
+		/* For new platforms let's respect VBT back again */
+		dev_priv->psr.link_standby = dev_priv->vbt.psr.full_link;
+
+	/* Override link_standby x link_off defaults */
+	if (i915.enable_psr == 2 && !dev_priv->psr.link_standby) {
+		DRM_DEBUG_KMS("PSR: Forcing link standby\n");
+		dev_priv->psr.link_standby = true;
+	}
+	if (i915.enable_psr == 3 && dev_priv->psr.link_standby) {
+		DRM_DEBUG_KMS("PSR: Forcing main link off\n");
+		dev_priv->psr.link_standby = false;
+	}
+
 	INIT_DELAYED_WORK(&dev_priv->psr.work, intel_psr_work);
 	mutex_init(&dev_priv->psr.lock);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 40c6aff57256..45ce45a5e122 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -746,9 +746,9 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
 
 	ret = i915_gem_render_state_init(req);
 	if (ret)
-		DRM_ERROR("init render state: %d\n", ret);
+		return ret;
 
-	return ret;
+	return 0;
 }
 
 static int wa_add(struct drm_i915_private *dev_priv,
@@ -789,6 +789,22 @@ static int wa_add(struct drm_i915_private *dev_priv,
 
 #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
 
+static int wa_ring_whitelist_reg(struct intel_engine_cs *ring, i915_reg_t reg)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct i915_workarounds *wa = &dev_priv->workarounds;
+	const uint32_t index = wa->hw_whitelist_count[ring->id];
+
+	if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
+		return -EINVAL;
+
+	WA_WRITE(RING_FORCE_TO_NONPRIV(ring->mmio_base, index),
+		 i915_mmio_reg_offset(reg));
+	wa->hw_whitelist_count[ring->id]++;
+
+	return 0;
+}
+
 static int gen8_init_workarounds(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
@@ -894,6 +910,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t tmp;
+	int ret;
 
 	/* WaEnableLbsSlaRetryTimerDecrement:skl */
 	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
@@ -964,6 +981,20 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
 	/* WaDisableSTUnitPowerOptimization:skl,bxt */
 	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 
+	/* WaOCLCoherentLineFlush:skl,bxt */
+	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
+				    GEN8_LQSC_FLUSH_COHERENT_LINES));
+
+	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt */
+	ret= wa_ring_whitelist_reg(ring, GEN8_CS_CHICKEN1);
+	if (ret)
+		return ret;
+
+	/* WaAllowUMDToModifyHDCChicken1:skl,bxt */
+	ret = wa_ring_whitelist_reg(ring, GEN8_HDC_CHICKEN1);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -1019,6 +1050,16 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
 	if (ret)
 		return ret;
 
+	/*
+	 * Actual WA is to disable percontext preemption granularity control
+	 * until D0 which is the default case so this is equivalent to
+	 * !WaDisablePerCtxtPreemptionGranularityControl:skl
+	 */
+	if (IS_SKL_REVID(dev, SKL_REVID_E0, REVID_FOREVER)) {
+		I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
+			   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
+	}
+
 	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
 		/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
 		I915_WRITE(FF_SLICE_CS_CHICKEN2,
@@ -1071,6 +1112,11 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)
 			GEN7_HALF_SLICE_CHICKEN1,
 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 
+	/* WaDisableLSQCROPERFforOCL:skl */
+	ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4);
+	if (ret)
+		return ret;
+
 	return skl_tune_iz_hashing(ring);
 }
 
@@ -1106,6 +1152,20 @@ static int bxt_init_workarounds(struct intel_engine_cs *ring)
 			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 	}
 
+	/* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
+	/* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
+	/* WaDisableObjectLevelPreemtionForInstanceId:bxt */
+	/* WaDisableLSQCROPERFforOCL:bxt */
+	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
+		ret = wa_ring_whitelist_reg(ring, GEN9_CS_DEBUG_MODE1);
+		if (ret)
+			return ret;
+
+		ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -1117,6 +1177,7 @@ int init_workarounds_ring(struct intel_engine_cs *ring)
 	WARN_ON(ring->id != RCS);
 
 	dev_priv->workarounds.count = 0;
+	dev_priv->workarounds.hw_whitelist_count[RCS] = 0;
 
 	if (IS_BROADWELL(dev))
 		return bdw_init_workarounds(ring);
@@ -1867,15 +1928,13 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
 		offset = cs_offset;
 	}
 
-	ret = intel_ring_begin(req, 4);
+	ret = intel_ring_begin(req, 2);
 	if (ret)
 		return ret;
 
-	intel_ring_emit(ring, MI_BATCH_BUFFER);
+	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
 	intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
 					0 : MI_BATCH_NON_SECURE));
-	intel_ring_emit(ring, offset + len - 8);
-	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
 
 	return 0;
@@ -1901,6 +1960,17 @@ i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
 	return 0;
 }
 
+static void cleanup_phys_status_page(struct intel_engine_cs *ring)
+{
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+
+	if (!dev_priv->status_page_dmah)
+		return;
+
+	drm_pci_free(ring->dev, dev_priv->status_page_dmah);
+	ring->status_page.page_addr = NULL;
+}
+
 static void cleanup_status_page(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_object *obj;
@@ -1917,9 +1987,9 @@ static void cleanup_status_page(struct intel_engine_cs *ring)
 
 static int init_status_page(struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_gem_object *obj = ring->status_page.obj;
 
-	if ((obj = ring->status_page.obj) == NULL) {
+	if (obj == NULL) {
 		unsigned flags;
 		int ret;
 
@@ -1990,6 +2060,7 @@ void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 	else
 		iounmap(ringbuf->virtual_start);
 	ringbuf->virtual_start = NULL;
+	ringbuf->vma = NULL;
 	i915_gem_object_ggtt_unpin(ringbuf->obj);
 }
 
@@ -2048,6 +2119,9 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
 			return ret;
 		}
 
+		/* Access through the GTT requires the device to be awake. */
+		assert_rpm_wakelock_held(dev_priv);
+
 		ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
 						    i915_gem_obj_ggtt_offset(obj), ringbuf->size);
 		if (ringbuf->virtual_start == NULL) {
@@ -2056,6 +2130,8 @@ int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
 		}
 	}
 
+	ringbuf->vma = i915_gem_obj_to_ggtt(obj);
+
 	return 0;
 }
 
@@ -2164,7 +2240,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 		if (ret)
 			goto error;
 	} else {
-		BUG_ON(ring->id != RCS);
+		WARN_ON(ring->id != RCS);
 		ret = init_phys_status_page(ring);
 		if (ret)
 			goto error;
@@ -2210,7 +2286,12 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	if (ring->cleanup)
 		ring->cleanup(ring);
 
-	cleanup_status_page(ring);
+	if (I915_NEED_GFX_HWS(ring->dev)) {
+		cleanup_status_page(ring);
+	} else {
+		WARN_ON(ring->id != RCS);
+		cleanup_phys_status_page(ring);
+	}
 
 	i915_cmd_parser_fini_ring(ring);
 	i915_gem_batch_pool_fini(&ring->batch_pool);
@@ -2666,6 +2747,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 
 	ring->name = "render ring";
 	ring->id = RCS;
+	ring->exec_id = I915_EXEC_RENDER;
 	ring->mmio_base = RENDER_RING_BASE;
 
 	if (INTEL_INFO(dev)->gen >= 8) {
@@ -2814,6 +2896,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 
 	ring->name = "bsd ring";
 	ring->id = VCS;
+	ring->exec_id = I915_EXEC_BSD;
 
 	ring->write_tail = ring_write_tail;
 	if (INTEL_INFO(dev)->gen >= 6) {
@@ -2890,6 +2973,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev)
 
 	ring->name = "bsd2 ring";
 	ring->id = VCS2;
+	ring->exec_id = I915_EXEC_BSD;
 
 	ring->write_tail = ring_write_tail;
 	ring->mmio_base = GEN8_BSD2_RING_BASE;
@@ -2920,6 +3004,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 
 	ring->name = "blitter ring";
 	ring->id = BCS;
+	ring->exec_id = I915_EXEC_BLT;
 
 	ring->mmio_base = BLT_RING_BASE;
 	ring->write_tail = ring_write_tail;
@@ -2977,6 +3062,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 
 	ring->name = "video enhancement ring";
 	ring->id = VECS;
+	ring->exec_id = I915_EXEC_VEBOX;
 
 	ring->mmio_base = VEBOX_RING_BASE;
 	ring->write_tail = ring_write_tail;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 49574ffe54bc..566b0ae10ce0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -93,11 +93,13 @@ struct intel_ring_hangcheck {
 	int score;
 	enum intel_ring_hangcheck_action action;
 	int deadlock;
+	u32 instdone[I915_NUM_INSTDONE_REG];
 };
 
 struct intel_ringbuffer {
 	struct drm_i915_gem_object *obj;
 	void __iomem *virtual_start;
+	struct i915_vma *vma;
 
 	struct intel_engine_cs *ring;
 	struct list_head link;
@@ -147,14 +149,16 @@ struct  i915_ctx_workarounds {
 struct  intel_engine_cs {
 	const char	*name;
 	enum intel_ring_id {
-		RCS = 0x0,
-		VCS,
+		RCS = 0,
 		BCS,
-		VECS,
-		VCS2
+		VCS,
+		VCS2,	/* Keep instances of the same type engine together. */
+		VECS
 	} id;
 #define I915_NUM_RINGS 5
-#define LAST_USER_RING (VECS + 1)
+#define _VCS(n) (VCS + (n))
+	unsigned int exec_id;
+	unsigned int guc_id;
 	u32		mmio_base;
 	struct		drm_device *dev;
 	struct intel_ringbuffer *buffer;
@@ -268,6 +272,8 @@ struct  intel_engine_cs {
 	struct list_head execlist_queue;
 	struct list_head execlist_retired_req_list;
 	u8 next_context_status_buffer;
+	bool disable_lite_restore_wa;
+	u32 ctx_desc_template;
 	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
 	int		(*emit_request)(struct drm_i915_gem_request *request);
 	int		(*emit_flush)(struct drm_i915_gem_request *request,
@@ -305,7 +311,6 @@ struct  intel_engine_cs {
 
 	wait_queue_head_t irq_queue;
 
-	struct intel_context *default_context;
 	struct intel_context *last_context;
 
 	struct intel_ring_hangcheck hangcheck;
@@ -406,7 +411,7 @@ intel_write_status_page(struct intel_engine_cs *ring,
 	ring->status_page.page_addr[reg] = value;
 }
 
-/**
+/*
  * Reads a dword out of the status page, which is written to from the command
  * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
  * MI_STORE_DATA_IMM.
@@ -423,6 +428,7 @@ intel_write_status_page(struct intel_engine_cs *ring,
  * The area from dword 0x30 to 0x3ff is available for driver usage.
  */
 #define I915_GEM_HWS_INDEX		0x30
+#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 #define I915_GEM_HWS_SCRATCH_INDEX	0x40
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 4f43d9b32e66..6e54d978d9d4 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -284,6 +284,13 @@ static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv)
 						1 << PIPE_C | 1 << PIPE_B);
 }
 
+static void hsw_power_well_pre_disable(struct drm_i915_private *dev_priv)
+{
+	if (IS_BROADWELL(dev_priv))
+		gen8_irq_power_well_pre_disable(dev_priv,
+						1 << PIPE_C | 1 << PIPE_B);
+}
+
 static void skl_power_well_post_enable(struct drm_i915_private *dev_priv,
 				       struct i915_power_well *power_well)
 {
@@ -309,6 +316,14 @@ static void skl_power_well_post_enable(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void skl_power_well_pre_disable(struct drm_i915_private *dev_priv,
+				       struct i915_power_well *power_well)
+{
+	if (power_well->data == SKL_DISP_PW_2)
+		gen8_irq_power_well_pre_disable(dev_priv,
+						1 << PIPE_C | 1 << PIPE_B);
+}
+
 static void hsw_set_power_well(struct drm_i915_private *dev_priv,
 			       struct i915_power_well *power_well, bool enable)
 {
@@ -334,6 +349,7 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv,
 
 	} else {
 		if (enable_requested) {
+			hsw_power_well_pre_disable(dev_priv);
 			I915_WRITE(HSW_PWR_WELL_DRIVER, 0);
 			POSTING_READ(HSW_PWR_WELL_DRIVER);
 			DRM_DEBUG_KMS("Requesting to disable the power well\n");
@@ -456,15 +472,19 @@ static void assert_can_disable_dc9(struct drm_i915_private *dev_priv)
 	  */
 }
 
-static void gen9_set_dc_state_debugmask_memory_up(
-			struct drm_i915_private *dev_priv)
+static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv)
 {
-	uint32_t val;
+	uint32_t val, mask;
+
+	mask = DC_STATE_DEBUG_MASK_MEMORY_UP;
+
+	if (IS_BROXTON(dev_priv))
+		mask |= DC_STATE_DEBUG_MASK_CORES;
 
 	/* The below bit doesn't need to be cleared ever afterwards */
 	val = I915_READ(DC_STATE_DEBUG);
-	if (!(val & DC_STATE_DEBUG_MASK_MEMORY_UP)) {
-		val |= DC_STATE_DEBUG_MASK_MEMORY_UP;
+	if ((val & mask) != mask) {
+		val |= mask;
 		I915_WRITE(DC_STATE_DEBUG, val);
 		POSTING_READ(DC_STATE_DEBUG);
 	}
@@ -525,9 +545,6 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state)
 	else if (i915.enable_dc == 1 && state > DC_STATE_EN_UPTO_DC5)
 		state = DC_STATE_EN_UPTO_DC5;
 
-	if (state & DC_STATE_EN_UPTO_DC5_DC6_MASK)
-		gen9_set_dc_state_debugmask_memory_up(dev_priv);
-
 	val = I915_READ(DC_STATE_EN);
 	DRM_DEBUG_KMS("Setting DC state from %02x to %02x\n",
 		      val & mask, state);
@@ -577,7 +594,8 @@ static void assert_can_enable_dc5(struct drm_i915_private *dev_priv)
 	bool pg2_enabled = intel_display_power_well_is_enabled(dev_priv,
 					SKL_DISP_PW_2);
 
-	WARN_ONCE(!IS_SKYLAKE(dev), "Platform doesn't support DC5.\n");
+	WARN_ONCE(!IS_SKYLAKE(dev) && !IS_KABYLAKE(dev),
+		  "Platform doesn't support DC5.\n");
 	WARN_ONCE(!HAS_RUNTIME_PM(dev), "Runtime PM not enabled.\n");
 	WARN_ONCE(pg2_enabled, "PG2 not disabled to enable DC5.\n");
 
@@ -613,7 +631,8 @@ static void assert_can_enable_dc6(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
 
-	WARN_ONCE(!IS_SKYLAKE(dev), "Platform doesn't support DC6.\n");
+	WARN_ONCE(!IS_SKYLAKE(dev) && !IS_KABYLAKE(dev),
+		  "Platform doesn't support DC6.\n");
 	WARN_ONCE(!HAS_RUNTIME_PM(dev), "Runtime PM not enabled.\n");
 	WARN_ONCE(I915_READ(UTIL_PIN_CTL) & UTIL_PIN_ENABLE,
 		  "Backlight is not disabled.\n");
@@ -640,7 +659,8 @@ static void gen9_disable_dc5_dc6(struct drm_i915_private *dev_priv)
 {
 	assert_can_disable_dc5(dev_priv);
 
-	if (IS_SKYLAKE(dev_priv) && i915.enable_dc != 0 && i915.enable_dc != 1)
+	if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) &&
+	    i915.enable_dc != 0 && i915.enable_dc != 1)
 		assert_can_disable_dc6(dev_priv);
 
 	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
@@ -668,7 +688,6 @@ void skl_disable_dc6(struct drm_i915_private *dev_priv)
 static void skl_set_power_well(struct drm_i915_private *dev_priv,
 			struct i915_power_well *power_well, bool enable)
 {
-	struct drm_device *dev = dev_priv->dev;
 	uint32_t tmp, fuse_status;
 	uint32_t req_mask, state_mask;
 	bool is_enabled, enable_requested, check_fuse_status = false;
@@ -706,23 +725,15 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv,
 	state_mask = SKL_POWER_WELL_STATE(power_well->data);
 	is_enabled = tmp & state_mask;
 
+	if (!enable && enable_requested)
+		skl_power_well_pre_disable(dev_priv, power_well);
+
 	if (enable) {
 		if (!enable_requested) {
 			WARN((tmp & state_mask) &&
 				!I915_READ(HSW_PWR_WELL_BIOS),
 				"Invalid for power well status to be enabled, unless done by the BIOS, \
 				when request is to disable!\n");
-			if (power_well->data == SKL_DISP_PW_2) {
-				/*
-				 * DDI buffer programming unnecessary during
-				 * driver-load/resume as it's already done
-				 * during modeset initialization then. It's
-				 * also invalid here as encoder list is still
-				 * uninitialized.
-				 */
-				if (!dev_priv->power_domains.initializing)
-					intel_prepare_ddi(dev);
-			}
 			I915_WRITE(HSW_PWR_WELL_DRIVER, tmp | req_mask);
 		}
 
@@ -828,7 +839,8 @@ static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv,
 static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv,
 					   struct i915_power_well *power_well)
 {
-	if (IS_SKYLAKE(dev_priv) && i915.enable_dc != 0 && i915.enable_dc != 1)
+	if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) &&
+	    i915.enable_dc != 0 && i915.enable_dc != 1)
 		skl_enable_dc6(dev_priv);
 	else
 		gen9_enable_dc5(dev_priv);
@@ -840,7 +852,8 @@ static void gen9_dc_off_power_well_sync_hw(struct drm_i915_private *dev_priv,
 	if (power_well->count > 0) {
 		gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
 	} else {
-		if (IS_SKYLAKE(dev_priv) && i915.enable_dc != 0 &&
+		if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) &&
+		    i915.enable_dc != 0 &&
 		    i915.enable_dc != 1)
 			gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
 		else
@@ -993,6 +1006,9 @@ static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv)
 	valleyview_disable_display_irqs(dev_priv);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
+	/* make sure we're done processing display irqs */
+	synchronize_irq(dev_priv->dev->irq);
+
 	vlv_power_sequencer_reset(dev_priv);
 }
 
@@ -1941,7 +1957,7 @@ void skl_pw1_misc_io_init(struct drm_i915_private *dev_priv)
 {
 	struct i915_power_well *well;
 
-	if (!IS_SKYLAKE(dev_priv))
+	if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
 		return;
 
 	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
@@ -1955,7 +1971,7 @@ void skl_pw1_misc_io_fini(struct drm_i915_private *dev_priv)
 {
 	struct i915_power_well *well;
 
-	if (!IS_SKYLAKE(dev_priv))
+	if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
 		return;
 
 	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
@@ -2125,8 +2141,8 @@ static void skl_display_core_init(struct drm_i915_private *dev_priv,
 
 	skl_init_cdclk(dev_priv);
 
-	if (dev_priv->csr.dmc_payload)
-		intel_csr_load_program(dev_priv);
+	if (dev_priv->csr.dmc_payload && intel_csr_load_program(dev_priv))
+		gen9_set_dc_state_debugmask(dev_priv);
 }
 
 static void skl_display_core_uninit(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 2e1da060b0e1..4ecc076c4041 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1527,6 +1527,7 @@ intel_sdvo_mode_valid(struct drm_connector *connector,
 		      struct drm_display_mode *mode)
 {
 	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
+	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
@@ -1537,6 +1538,9 @@ intel_sdvo_mode_valid(struct drm_connector *connector,
 	if (intel_sdvo->pixel_clock_max < mode->clock)
 		return MODE_CLOCK_HIGH;
 
+	if (mode->clock > max_dotclk)
+		return MODE_CLOCK_HIGH;
+
 	if (intel_sdvo->is_lvds) {
 		if (mode->hdisplay > intel_sdvo->sdvo_lvds_fixed_mode->hdisplay)
 			return MODE_PANEL;
diff --git a/drivers/gpu/drm/i915/intel_sdvo_regs.h b/drivers/gpu/drm/i915/intel_sdvo_regs.h
index 2e2d4eb4a00d..db0ed499268a 100644
--- a/drivers/gpu/drm/i915/intel_sdvo_regs.h
+++ b/drivers/gpu/drm/i915/intel_sdvo_regs.h
@@ -24,8 +24,8 @@
  *	Eric Anholt <eric@anholt.net>
  */
 
-/**
- * @file SDVO command definitions and structures.
+/*
+ * SDVO command definitions and structures.
  */
 
 #define SDVO_OUTPUT_FIRST   (0)
@@ -66,39 +66,39 @@ struct intel_sdvo_caps {
 #define DTD_FLAG_VSYNC_POSITIVE (1 << 2)
 #define DTD_FLAG_INTERLACE	(1 << 7)
 
-/** This matches the EDID DTD structure, more or less */
+/* This matches the EDID DTD structure, more or less */
 struct intel_sdvo_dtd {
 	struct {
-		u16 clock;	/**< pixel clock, in 10kHz units */
-		u8 h_active;	/**< lower 8 bits (pixels) */
-		u8 h_blank;	/**< lower 8 bits (pixels) */
-		u8 h_high;	/**< upper 4 bits each h_active, h_blank */
-		u8 v_active;	/**< lower 8 bits (lines) */
-		u8 v_blank;	/**< lower 8 bits (lines) */
-		u8 v_high;	/**< upper 4 bits each v_active, v_blank */
+		u16 clock;	/* pixel clock, in 10kHz units */
+		u8 h_active;	/* lower 8 bits (pixels) */
+		u8 h_blank;	/* lower 8 bits (pixels) */
+		u8 h_high;	/* upper 4 bits each h_active, h_blank */
+		u8 v_active;	/* lower 8 bits (lines) */
+		u8 v_blank;	/* lower 8 bits (lines) */
+		u8 v_high;	/* upper 4 bits each v_active, v_blank */
 	} part1;
 
 	struct {
-		u8 h_sync_off;	/**< lower 8 bits, from hblank start */
-		u8 h_sync_width;	/**< lower 8 bits (pixels) */
-		/** lower 4 bits each vsync offset, vsync width */
+		u8 h_sync_off;	/* lower 8 bits, from hblank start */
+		u8 h_sync_width;	/* lower 8 bits (pixels) */
+		/* lower 4 bits each vsync offset, vsync width */
 		u8 v_sync_off_width;
-		/**
+		/*
 		* 2 high bits of hsync offset, 2 high bits of hsync width,
 		* bits 4-5 of vsync offset, and 2 high bits of vsync width.
 		*/
 		u8 sync_off_width_high;
 		u8 dtd_flags;
 		u8 sdvo_flags;
-		/** bits 6-7 of vsync offset at bits 6-7 */
+		/* bits 6-7 of vsync offset at bits 6-7 */
 		u8 v_sync_off_high;
 		u8 reserved;
 	} part2;
 } __packed;
 
 struct intel_sdvo_pixel_clock_range {
-	u16 min;	/**< pixel clock, in 10kHz units */
-	u16 max;	/**< pixel clock, in 10kHz units */
+	u16 min;	/* pixel clock, in 10kHz units */
+	u16 max;	/* pixel clock, in 10kHz units */
 } __packed;
 
 struct intel_sdvo_preferred_input_timing_args {
@@ -144,7 +144,7 @@ struct intel_sdvo_preferred_input_timing_args {
 
 #define SDVO_CMD_RESET					0x01
 
-/** Returns a struct intel_sdvo_caps */
+/* Returns a struct intel_sdvo_caps */
 #define SDVO_CMD_GET_DEVICE_CAPS			0x02
 
 #define SDVO_CMD_GET_FIRMWARE_REV			0x86
@@ -152,7 +152,7 @@ struct intel_sdvo_preferred_input_timing_args {
 # define SDVO_DEVICE_FIRMWARE_MAJOR			SDVO_I2C_RETURN_1
 # define SDVO_DEVICE_FIRMWARE_PATCH			SDVO_I2C_RETURN_2
 
-/**
+/*
  * Reports which inputs are trained (managed to sync).
  *
  * Devices must have trained within 2 vsyncs of a mode change.
@@ -164,10 +164,10 @@ struct intel_sdvo_get_trained_inputs_response {
 	unsigned int pad:6;
 } __packed;
 
-/** Returns a struct intel_sdvo_output_flags of active outputs. */
+/* Returns a struct intel_sdvo_output_flags of active outputs. */
 #define SDVO_CMD_GET_ACTIVE_OUTPUTS			0x04
 
-/**
+/*
  * Sets the current set of active outputs.
  *
  * Takes a struct intel_sdvo_output_flags.  Must be preceded by a SET_IN_OUT_MAP
@@ -175,7 +175,7 @@ struct intel_sdvo_get_trained_inputs_response {
  */
 #define SDVO_CMD_SET_ACTIVE_OUTPUTS			0x05
 
-/**
+/*
  * Returns the current mapping of SDVO inputs to outputs on the device.
  *
  * Returns two struct intel_sdvo_output_flags structures.
@@ -185,29 +185,29 @@ struct intel_sdvo_in_out_map {
 	u16 in0, in1;
 };
 
-/**
+/*
  * Sets the current mapping of SDVO inputs to outputs on the device.
  *
  * Takes two struct i380_sdvo_output_flags structures.
  */
 #define SDVO_CMD_SET_IN_OUT_MAP				0x07
 
-/**
+/*
  * Returns a struct intel_sdvo_output_flags of attached displays.
  */
 #define SDVO_CMD_GET_ATTACHED_DISPLAYS			0x0b
 
-/**
+/*
  * Returns a struct intel_sdvo_ouptut_flags of displays supporting hot plugging.
  */
 #define SDVO_CMD_GET_HOT_PLUG_SUPPORT			0x0c
 
-/**
+/*
  * Takes a struct intel_sdvo_output_flags.
  */
 #define SDVO_CMD_SET_ACTIVE_HOT_PLUG			0x0d
 
-/**
+/*
  * Returns a struct intel_sdvo_output_flags of displays with hot plug
  * interrupts enabled.
  */
@@ -221,7 +221,7 @@ struct intel_sdvo_get_interrupt_event_source_response {
 	unsigned int pad:6;
 } __packed;
 
-/**
+/*
  * Selects which input is affected by future input commands.
  *
  * Commands affected include SET_INPUT_TIMINGS_PART[12],
@@ -234,7 +234,7 @@ struct intel_sdvo_set_target_input_args {
 	unsigned int pad:7;
 } __packed;
 
-/**
+/*
  * Takes a struct intel_sdvo_output_flags of which outputs are targeted by
  * future output commands.
  *
@@ -280,7 +280,7 @@ struct intel_sdvo_set_target_input_args {
 # define SDVO_DTD_SDVO_FLAG_SCALING_SMOOTH			(2 << 4)
 # define SDVO_DTD_VSYNC_OFF_HIGH			SDVO_I2C_ARG_6
 
-/**
+/*
  * Generates a DTD based on the given width, height, and flags.
  *
  * This will be supported by any device supporting scaling or interlaced
@@ -300,24 +300,24 @@ struct intel_sdvo_set_target_input_args {
 #define SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART1	0x1b
 #define SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART2	0x1c
 
-/** Returns a struct intel_sdvo_pixel_clock_range */
+/* Returns a struct intel_sdvo_pixel_clock_range */
 #define SDVO_CMD_GET_INPUT_PIXEL_CLOCK_RANGE		0x1d
-/** Returns a struct intel_sdvo_pixel_clock_range */
+/* Returns a struct intel_sdvo_pixel_clock_range */
 #define SDVO_CMD_GET_OUTPUT_PIXEL_CLOCK_RANGE		0x1e
 
-/** Returns a byte bitfield containing SDVO_CLOCK_RATE_MULT_* flags */
+/* Returns a byte bitfield containing SDVO_CLOCK_RATE_MULT_* flags */
 #define SDVO_CMD_GET_SUPPORTED_CLOCK_RATE_MULTS		0x1f
 
-/** Returns a byte containing a SDVO_CLOCK_RATE_MULT_* flag */
+/* Returns a byte containing a SDVO_CLOCK_RATE_MULT_* flag */
 #define SDVO_CMD_GET_CLOCK_RATE_MULT			0x20
-/** Takes a byte containing a SDVO_CLOCK_RATE_MULT_* flag */
+/* Takes a byte containing a SDVO_CLOCK_RATE_MULT_* flag */
 #define SDVO_CMD_SET_CLOCK_RATE_MULT			0x21
 # define SDVO_CLOCK_RATE_MULT_1X				(1 << 0)
 # define SDVO_CLOCK_RATE_MULT_2X				(1 << 1)
 # define SDVO_CLOCK_RATE_MULT_4X				(1 << 3)
 
 #define SDVO_CMD_GET_SUPPORTED_TV_FORMATS		0x27
-/** 6 bytes of bit flags for TV formats shared by all TV format functions */
+/* 6 bytes of bit flags for TV formats shared by all TV format functions */
 struct intel_sdvo_tv_format {
 	unsigned int ntsc_m:1;
 	unsigned int ntsc_j:1;
@@ -376,7 +376,7 @@ struct intel_sdvo_tv_format {
 
 #define SDVO_CMD_SET_TV_FORMAT				0x29
 
-/** Returns the resolutiosn that can be used with the given TV format */
+/* Returns the resolutiosn that can be used with the given TV format */
 #define SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT		0x83
 struct intel_sdvo_sdtv_resolution_request {
 	unsigned int ntsc_m:1;
@@ -539,7 +539,7 @@ struct intel_sdvo_hdtv_resolution_reply {
 #define SDVO_CMD_GET_MAX_PANEL_POWER_SEQUENCING		0x2d
 #define SDVO_CMD_GET_PANEL_POWER_SEQUENCING		0x2e
 #define SDVO_CMD_SET_PANEL_POWER_SEQUENCING		0x2f
-/**
+/*
  * The panel power sequencing parameters are in units of milliseconds.
  * The high fields are bits 8:9 of the 10-bit values.
  */
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 8831fc579ade..c3998188cf35 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -129,17 +129,18 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
 	return val;
 }
 
-u32 vlv_gpio_nc_read(struct drm_i915_private *dev_priv, u32 reg)
+u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg)
 {
 	u32 val = 0;
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_GPIO_NC,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), port,
 			SB_CRRDDA_NP, reg, &val);
 	return val;
 }
 
-void vlv_gpio_nc_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
+void vlv_iosf_sb_write(struct drm_i915_private *dev_priv,
+		       u8 port, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_GPIO_NC,
+	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), port,
 			SB_CRWRDA_NP, reg, &val);
 }
 
@@ -171,20 +172,6 @@ void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
 			SB_CRWRDA_NP, reg, &val);
 }
 
-u32 vlv_gps_core_read(struct drm_i915_private *dev_priv, u32 reg)
-{
-	u32 val = 0;
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_GPS_CORE,
-			SB_CRRDDA_NP, reg, &val);
-	return val;
-}
-
-void vlv_gps_core_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
-{
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_GPS_CORE,
-			SB_CRWRDA_NP, reg, &val);
-}
-
 u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg)
 {
 	u32 val = 0;
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 4ff7a1f4183e..a2582c455b36 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -178,28 +178,33 @@ void intel_pipe_update_end(struct intel_crtc *crtc)
 }
 
 static void
-skl_update_plane(struct drm_plane *drm_plane, struct drm_crtc *crtc,
-		 struct drm_framebuffer *fb,
-		 int crtc_x, int crtc_y,
-		 unsigned int crtc_w, unsigned int crtc_h,
-		 uint32_t x, uint32_t y,
-		 uint32_t src_w, uint32_t src_h)
+skl_update_plane(struct drm_plane *drm_plane,
+		 const struct intel_crtc_state *crtc_state,
+		 const struct intel_plane_state *plane_state)
 {
 	struct drm_device *dev = drm_plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane *intel_plane = to_intel_plane(drm_plane);
+	struct drm_framebuffer *fb = plane_state->base.fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	const int pipe = intel_plane->pipe;
 	const int plane = intel_plane->plane + 1;
 	u32 plane_ctl, stride_div, stride;
-	const struct drm_intel_sprite_colorkey *key =
-		&to_intel_plane_state(drm_plane->state)->ckey;
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	u32 surf_addr;
 	u32 tile_height, plane_offset, plane_size;
 	unsigned int rotation;
 	int x_offset, y_offset;
-	struct intel_crtc_state *crtc_state = to_intel_crtc(crtc)->config;
-	int scaler_id;
+	int crtc_x = plane_state->dst.x1;
+	int crtc_y = plane_state->dst.y1;
+	uint32_t crtc_w = drm_rect_width(&plane_state->dst);
+	uint32_t crtc_h = drm_rect_height(&plane_state->dst);
+	uint32_t x = plane_state->src.x1 >> 16;
+	uint32_t y = plane_state->src.y1 >> 16;
+	uint32_t src_w = drm_rect_width(&plane_state->src) >> 16;
+	uint32_t src_h = drm_rect_height(&plane_state->src) >> 16;
+	const struct intel_scaler *scaler =
+		&crtc_state->scaler_state.scalers[plane_state->scaler_id];
 
 	plane_ctl = PLANE_CTL_ENABLE |
 		PLANE_CTL_PIPE_GAMMA_ENABLE |
@@ -208,14 +213,12 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_crtc *crtc,
 	plane_ctl |= skl_plane_ctl_format(fb->pixel_format);
 	plane_ctl |= skl_plane_ctl_tiling(fb->modifier[0]);
 
-	rotation = drm_plane->state->rotation;
+	rotation = plane_state->base.rotation;
 	plane_ctl |= skl_plane_ctl_rotation(rotation);
 
-	stride_div = intel_fb_stride_alignment(dev, fb->modifier[0],
+	stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0],
 					       fb->pixel_format);
 
-	scaler_id = to_intel_plane_state(drm_plane->state)->scaler_id;
-
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
@@ -236,9 +239,10 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_crtc *crtc,
 	surf_addr = intel_plane_obj_offset(intel_plane, obj, 0);
 
 	if (intel_rotation_90_or_270(rotation)) {
+		int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+
 		/* stride: Surface height in tiles */
-		tile_height = intel_tile_height(dev, fb->pixel_format,
-						fb->modifier[0], 0);
+		tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp);
 		stride = DIV_ROUND_UP(fb->height, tile_height);
 		plane_size = (src_w << 16) | src_h;
 		x_offset = stride * tile_height - y - (src_h + 1);
@@ -256,13 +260,13 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_crtc *crtc,
 	I915_WRITE(PLANE_SIZE(pipe, plane), plane_size);
 
 	/* program plane scaler */
-	if (scaler_id >= 0) {
+	if (plane_state->scaler_id >= 0) {
 		uint32_t ps_ctrl = 0;
+		int scaler_id = plane_state->scaler_id;
 
 		DRM_DEBUG_KMS("plane = %d PS_PLANE_SEL(plane) = 0x%x\n", plane,
 			PS_PLANE_SEL(plane));
-		ps_ctrl = PS_SCALER_EN | PS_PLANE_SEL(plane) |
-			crtc_state->scaler_state.scalers[scaler_id].mode;
+		ps_ctrl = PS_SCALER_EN | PS_PLANE_SEL(plane) | scaler->mode;
 		I915_WRITE(SKL_PS_CTRL(pipe, scaler_id), ps_ctrl);
 		I915_WRITE(SKL_PS_PWR_GATE(pipe, scaler_id), 0);
 		I915_WRITE(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y);
@@ -334,24 +338,29 @@ chv_update_csc(struct intel_plane *intel_plane, uint32_t format)
 }
 
 static void
-vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
-		 struct drm_framebuffer *fb,
-		 int crtc_x, int crtc_y,
-		 unsigned int crtc_w, unsigned int crtc_h,
-		 uint32_t x, uint32_t y,
-		 uint32_t src_w, uint32_t src_h)
+vlv_update_plane(struct drm_plane *dplane,
+		 const struct intel_crtc_state *crtc_state,
+		 const struct intel_plane_state *plane_state)
 {
 	struct drm_device *dev = dplane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane *intel_plane = to_intel_plane(dplane);
+	struct drm_framebuffer *fb = plane_state->base.fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	int pipe = intel_plane->pipe;
 	int plane = intel_plane->plane;
 	u32 sprctl;
-	unsigned long sprsurf_offset, linear_offset;
-	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
-	const struct drm_intel_sprite_colorkey *key =
-		&to_intel_plane_state(dplane->state)->ckey;
+	u32 sprsurf_offset, linear_offset;
+	int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
+	int crtc_x = plane_state->dst.x1;
+	int crtc_y = plane_state->dst.y1;
+	uint32_t crtc_w = drm_rect_width(&plane_state->dst);
+	uint32_t crtc_h = drm_rect_height(&plane_state->dst);
+	uint32_t x = plane_state->src.x1 >> 16;
+	uint32_t y = plane_state->src.y1 >> 16;
+	uint32_t src_w = drm_rect_width(&plane_state->src) >> 16;
+	uint32_t src_h = drm_rect_height(&plane_state->src) >> 16;
 
 	sprctl = SP_ENABLE;
 
@@ -413,20 +422,18 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
 	crtc_w--;
 	crtc_h--;
 
-	linear_offset = y * fb->pitches[0] + x * pixel_size;
-	sprsurf_offset = intel_gen4_compute_page_offset(dev_priv,
-							&x, &y,
-							obj->tiling_mode,
-							pixel_size,
-							fb->pitches[0]);
+	linear_offset = y * fb->pitches[0] + x * cpp;
+	sprsurf_offset = intel_compute_tile_offset(dev_priv, &x, &y,
+						   fb->modifier[0], cpp,
+						   fb->pitches[0]);
 	linear_offset -= sprsurf_offset;
 
-	if (dplane->state->rotation == BIT(DRM_ROTATE_180)) {
+	if (plane_state->base.rotation == BIT(DRM_ROTATE_180)) {
 		sprctl |= SP_ROTATE_180;
 
 		x += src_w;
 		y += src_h;
-		linear_offset += src_h * fb->pitches[0] + src_w * pixel_size;
+		linear_offset += src_h * fb->pitches[0] + src_w * cpp;
 	}
 
 	if (key->flags) {
@@ -474,23 +481,28 @@ vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
 }
 
 static void
-ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
-		 struct drm_framebuffer *fb,
-		 int crtc_x, int crtc_y,
-		 unsigned int crtc_w, unsigned int crtc_h,
-		 uint32_t x, uint32_t y,
-		 uint32_t src_w, uint32_t src_h)
+ivb_update_plane(struct drm_plane *plane,
+		 const struct intel_crtc_state *crtc_state,
+		 const struct intel_plane_state *plane_state)
 {
 	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct drm_framebuffer *fb = plane_state->base.fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	enum pipe pipe = intel_plane->pipe;
 	u32 sprctl, sprscale = 0;
-	unsigned long sprsurf_offset, linear_offset;
-	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
-	const struct drm_intel_sprite_colorkey *key =
-		&to_intel_plane_state(plane->state)->ckey;
+	u32 sprsurf_offset, linear_offset;
+	int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
+	int crtc_x = plane_state->dst.x1;
+	int crtc_y = plane_state->dst.y1;
+	uint32_t crtc_w = drm_rect_width(&plane_state->dst);
+	uint32_t crtc_h = drm_rect_height(&plane_state->dst);
+	uint32_t x = plane_state->src.x1 >> 16;
+	uint32_t y = plane_state->src.y1 >> 16;
+	uint32_t src_w = drm_rect_width(&plane_state->src) >> 16;
+	uint32_t src_h = drm_rect_height(&plane_state->src) >> 16;
 
 	sprctl = SPRITE_ENABLE;
 
@@ -543,22 +555,20 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	if (crtc_w != src_w || crtc_h != src_h)
 		sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h;
 
-	linear_offset = y * fb->pitches[0] + x * pixel_size;
-	sprsurf_offset =
-		intel_gen4_compute_page_offset(dev_priv,
-					       &x, &y, obj->tiling_mode,
-					       pixel_size, fb->pitches[0]);
+	linear_offset = y * fb->pitches[0] + x * cpp;
+	sprsurf_offset = intel_compute_tile_offset(dev_priv, &x, &y,
+						   fb->modifier[0], cpp,
+						   fb->pitches[0]);
 	linear_offset -= sprsurf_offset;
 
-	if (plane->state->rotation == BIT(DRM_ROTATE_180)) {
+	if (plane_state->base.rotation == BIT(DRM_ROTATE_180)) {
 		sprctl |= SPRITE_ROTATE_180;
 
 		/* HSW and BDW does this automagically in hardware */
 		if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
 			x += src_w;
 			y += src_h;
-			linear_offset += src_h * fb->pitches[0] +
-				src_w * pixel_size;
+			linear_offset += src_h * fb->pitches[0] + src_w * cpp;
 		}
 	}
 
@@ -612,23 +622,28 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
 }
 
 static void
-ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
-		 struct drm_framebuffer *fb,
-		 int crtc_x, int crtc_y,
-		 unsigned int crtc_w, unsigned int crtc_h,
-		 uint32_t x, uint32_t y,
-		 uint32_t src_w, uint32_t src_h)
+ilk_update_plane(struct drm_plane *plane,
+		 const struct intel_crtc_state *crtc_state,
+		 const struct intel_plane_state *plane_state)
 {
 	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct drm_framebuffer *fb = plane_state->base.fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	int pipe = intel_plane->pipe;
-	unsigned long dvssurf_offset, linear_offset;
 	u32 dvscntr, dvsscale;
-	int pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
-	const struct drm_intel_sprite_colorkey *key =
-		&to_intel_plane_state(plane->state)->ckey;
+	u32 dvssurf_offset, linear_offset;
+	int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
+	int crtc_x = plane_state->dst.x1;
+	int crtc_y = plane_state->dst.y1;
+	uint32_t crtc_w = drm_rect_width(&plane_state->dst);
+	uint32_t crtc_h = drm_rect_height(&plane_state->dst);
+	uint32_t x = plane_state->src.x1 >> 16;
+	uint32_t y = plane_state->src.y1 >> 16;
+	uint32_t src_w = drm_rect_width(&plane_state->src) >> 16;
+	uint32_t src_h = drm_rect_height(&plane_state->src) >> 16;
 
 	dvscntr = DVS_ENABLE;
 
@@ -677,19 +692,18 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	if (crtc_w != src_w || crtc_h != src_h)
 		dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h;
 
-	linear_offset = y * fb->pitches[0] + x * pixel_size;
-	dvssurf_offset =
-		intel_gen4_compute_page_offset(dev_priv,
-					       &x, &y, obj->tiling_mode,
-					       pixel_size, fb->pitches[0]);
+	linear_offset = y * fb->pitches[0] + x * cpp;
+	dvssurf_offset = intel_compute_tile_offset(dev_priv, &x, &y,
+						   fb->modifier[0], cpp,
+						   fb->pitches[0]);
 	linear_offset -= dvssurf_offset;
 
-	if (plane->state->rotation == BIT(DRM_ROTATE_180)) {
+	if (plane_state->base.rotation == BIT(DRM_ROTATE_180)) {
 		dvscntr |= DVS_ROTATE_180;
 
 		x += src_w;
 		y += src_h;
-		linear_offset += src_h * fb->pitches[0] + src_w * pixel_size;
+		linear_offset += src_h * fb->pitches[0] + src_w * cpp;
 	}
 
 	if (key->flags) {
@@ -754,7 +768,6 @@ intel_check_sprite_plane(struct drm_plane *plane,
 	int hscale, vscale;
 	int max_scale, min_scale;
 	bool can_scale;
-	int pixel_size;
 
 	if (!fb) {
 		state->visible = false;
@@ -876,6 +889,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
 	/* Check size restrictions when scaling */
 	if (state->visible && (src_w != crtc_w || src_h != crtc_h)) {
 		unsigned int width_bytes;
+		int cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 
 		WARN_ON(!can_scale);
 
@@ -887,9 +901,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
 		if (src_w < 3 || src_h < 3)
 			state->visible = false;
 
-		pixel_size = drm_format_plane_cpp(fb->pixel_format, 0);
-		width_bytes = ((src_x * pixel_size) & 63) +
-					src_w * pixel_size;
+		width_bytes = ((src_x * cpp) & 63) + src_w * cpp;
 
 		if (INTEL_INFO(dev)->gen < 9 && (src_w > 2048 || src_h > 2048 ||
 		    width_bytes > 4096 || fb->pitches[0] > 4096)) {
@@ -913,30 +925,6 @@ intel_check_sprite_plane(struct drm_plane *plane,
 	return 0;
 }
 
-static void
-intel_commit_sprite_plane(struct drm_plane *plane,
-			  struct intel_plane_state *state)
-{
-	struct drm_crtc *crtc = state->base.crtc;
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	struct drm_framebuffer *fb = state->base.fb;
-
-	crtc = crtc ? crtc : plane->crtc;
-
-	if (state->visible) {
-		intel_plane->update_plane(plane, crtc, fb,
-					  state->dst.x1, state->dst.y1,
-					  drm_rect_width(&state->dst),
-					  drm_rect_height(&state->dst),
-					  state->src.x1 >> 16,
-					  state->src.y1 >> 16,
-					  drm_rect_width(&state->src) >> 16,
-					  drm_rect_height(&state->src) >> 16);
-	} else {
-		intel_plane->disable_plane(plane, crtc);
-	}
-}
-
 int intel_sprite_set_colorkey(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv)
 {
@@ -1118,7 +1106,6 @@ intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane)
 	intel_plane->plane = plane;
 	intel_plane->frontbuffer_bit = INTEL_FRONTBUFFER_SPRITE(pipe, plane);
 	intel_plane->check_plane = intel_check_sprite_plane;
-	intel_plane->commit_plane = intel_commit_sprite_plane;
 	possible_crtcs = (1 << pipe);
 	ret = drm_universal_plane_init(dev, &intel_plane->base, possible_crtcs,
 				       &intel_plane_funcs,
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 948cbff6c62e..6745bad5bff0 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -897,6 +897,10 @@ intel_tv_mode_valid(struct drm_connector *connector,
 {
 	struct intel_tv *intel_tv = intel_attached_tv(connector);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
+	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
+
+	if (mode->clock > max_dotclk)
+		return MODE_CLOCK_HIGH;
 
 	/* Ensure TV refresh is close to desired refresh */
 	if (tv_mode && abs(tv_mode->refresh - drm_mode_vrefresh(mode) * 1000)
@@ -1178,10 +1182,9 @@ static int
 intel_tv_detect_type(struct intel_tv *intel_tv,
 		      struct drm_connector *connector)
 {
-	struct drm_encoder *encoder = &intel_tv->base.base;
-	struct drm_crtc *crtc = encoder->crtc;
+	struct drm_crtc *crtc = connector->state->crtc;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_device *dev = encoder->dev;
+	struct drm_device *dev = connector->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 tv_ctl, save_tv_ctl;
 	u32 tv_dac, save_tv_dac;
@@ -1230,8 +1233,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv,
 	I915_WRITE(TV_DAC, tv_dac);
 	POSTING_READ(TV_DAC);
 
-	intel_wait_for_vblank(intel_tv->base.base.dev,
-			      to_intel_crtc(intel_tv->base.base.crtc)->pipe);
+	intel_wait_for_vblank(dev, intel_crtc->pipe);
 
 	type = -1;
 	tv_dac = I915_READ(TV_DAC);
@@ -1261,8 +1263,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv,
 	POSTING_READ(TV_CTL);
 
 	/* For unknown reasons the hw barfs if we don't do this vblank wait. */
-	intel_wait_for_vblank(intel_tv->base.base.dev,
-			      to_intel_crtc(intel_tv->base.base.crtc)->pipe);
+	intel_wait_for_vblank(dev, intel_crtc->pipe);
 
 	/* Restore interrupt config */
 	if (connector->polled & DRM_CONNECTOR_POLL_HPD) {
@@ -1420,6 +1421,7 @@ intel_tv_get_modes(struct drm_connector *connector)
 		if (!mode_ptr)
 			continue;
 		strncpy(mode_ptr->name, input->name, DRM_DISPLAY_MODE_LEN);
+		mode_ptr->name[DRM_DISPLAY_MODE_LEN - 1] = '\0';
 
 		mode_ptr->hdisplay = hactive_s;
 		mode_ptr->hsync_start = hactive_s + 1;
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 277e60ae0e47..436d8f2b8682 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -327,13 +327,54 @@ static void intel_uncore_ellc_detect(struct drm_device *dev)
 	}
 }
 
+static bool
+fpga_check_for_unclaimed_mmio(struct drm_i915_private *dev_priv)
+{
+	u32 dbg;
+
+	dbg = __raw_i915_read32(dev_priv, FPGA_DBG);
+	if (likely(!(dbg & FPGA_DBG_RM_NOCLAIM)))
+		return false;
+
+	__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+
+	return true;
+}
+
+static bool
+vlv_check_for_unclaimed_mmio(struct drm_i915_private *dev_priv)
+{
+	u32 cer;
+
+	cer = __raw_i915_read32(dev_priv, CLAIM_ER);
+	if (likely(!(cer & (CLAIM_ER_OVERFLOW | CLAIM_ER_CTR_MASK))))
+		return false;
+
+	__raw_i915_write32(dev_priv, CLAIM_ER, CLAIM_ER_CLR);
+
+	return true;
+}
+
+static bool
+check_for_unclaimed_mmio(struct drm_i915_private *dev_priv)
+{
+	if (HAS_FPGA_DBG_UNCLAIMED(dev_priv))
+		return fpga_check_for_unclaimed_mmio(dev_priv);
+
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		return vlv_check_for_unclaimed_mmio(dev_priv);
+
+	return false;
+}
+
 static void __intel_uncore_early_sanitize(struct drm_device *dev,
 					  bool restore_forcewake)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (HAS_FPGA_DBG_UNCLAIMED(dev))
-		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+	/* clear out unclaimed reg detection bit */
+	if (check_for_unclaimed_mmio(dev_priv))
+		DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
 
 	/* clear out old GT FIFO errors */
 	if (IS_GEN6(dev) || IS_GEN7(dev))
@@ -359,6 +400,8 @@ void intel_uncore_early_sanitize(struct drm_device *dev, bool restore_forcewake)
 
 void intel_uncore_sanitize(struct drm_device *dev)
 {
+	i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
+
 	/* BIOS often leaves RC6 enabled, but disable it for hw init */
 	intel_disable_gt_powersave(dev);
 }
@@ -585,38 +628,38 @@ ilk_dummy_write(struct drm_i915_private *dev_priv)
 }
 
 static void
-hsw_unclaimed_reg_debug(struct drm_i915_private *dev_priv,
-			i915_reg_t reg, bool read, bool before)
+__unclaimed_reg_debug(struct drm_i915_private *dev_priv,
+		      const i915_reg_t reg,
+		      const bool read,
+		      const bool before)
 {
-	const char *op = read ? "reading" : "writing to";
-	const char *when = before ? "before" : "after";
-
-	if (!i915.mmio_debug)
+	/* XXX. We limit the auto arming traces for mmio
+	 * debugs on these platforms. There are just too many
+	 * revealed by these and CI/Bat suffers from the noise.
+	 * Please fix and then re-enable the automatic traces.
+	 */
+	if (i915.mmio_debug < 2 &&
+	    (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)))
 		return;
 
-	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
-		WARN(1, "Unclaimed register detected %s %s register 0x%x\n",
-		     when, op, i915_mmio_reg_offset(reg));
-		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+	if (WARN(check_for_unclaimed_mmio(dev_priv),
+		 "Unclaimed register detected %s %s register 0x%x\n",
+		 before ? "before" : "after",
+		 read ? "reading" : "writing to",
+		 i915_mmio_reg_offset(reg)))
 		i915.mmio_debug--; /* Only report the first N failures */
-	}
 }
 
-static void
-hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv)
+static inline void
+unclaimed_reg_debug(struct drm_i915_private *dev_priv,
+		    const i915_reg_t reg,
+		    const bool read,
+		    const bool before)
 {
-	static bool mmio_debug_once = true;
-
-	if (i915.mmio_debug || !mmio_debug_once)
+	if (likely(!i915.mmio_debug))
 		return;
 
-	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
-		DRM_DEBUG("Unclaimed register detected, "
-			  "enabling oneshot unclaimed register reporting. "
-			  "Please use i915.mmio_debug=N for more information.\n");
-		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-		i915.mmio_debug = mmio_debug_once--;
-	}
+	__unclaimed_reg_debug(dev_priv, reg, read, before);
 }
 
 #define GEN2_READ_HEADER(x) \
@@ -664,9 +707,11 @@ __gen2_read(64)
 	unsigned long irqflags; \
 	u##x val = 0; \
 	assert_rpm_wakelock_held(dev_priv); \
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags)
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
+	unclaimed_reg_debug(dev_priv, reg, true, true)
 
 #define GEN6_READ_FOOTER \
+	unclaimed_reg_debug(dev_priv, reg, true, false); \
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
 	trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \
 	return val
@@ -699,11 +744,9 @@ static inline void __force_wake_get(struct drm_i915_private *dev_priv,
 static u##x \
 gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	GEN6_READ_HEADER(x); \
-	hsw_unclaimed_reg_debug(dev_priv, reg, true, true); \
 	if (NEEDS_FORCE_WAKE(offset)) \
 		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
 	val = __raw_i915_read##x(dev_priv, reg); \
-	hsw_unclaimed_reg_debug(dev_priv, reg, true, false); \
 	GEN6_READ_FOOTER; \
 }
 
@@ -751,7 +794,6 @@ static u##x \
 gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	enum forcewake_domains fw_engine; \
 	GEN6_READ_HEADER(x); \
-	hsw_unclaimed_reg_debug(dev_priv, reg, true, true); \
 	if (!SKL_NEEDS_FORCE_WAKE(offset)) \
 		fw_engine = 0; \
 	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(offset)) \
@@ -765,7 +807,6 @@ gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	if (fw_engine) \
 		__force_wake_get(dev_priv, fw_engine); \
 	val = __raw_i915_read##x(dev_priv, reg); \
-	hsw_unclaimed_reg_debug(dev_priv, reg, true, false); \
 	GEN6_READ_FOOTER; \
 }
 
@@ -864,9 +905,11 @@ __gen2_write(64)
 	unsigned long irqflags; \
 	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
 	assert_rpm_wakelock_held(dev_priv); \
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags)
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
+	unclaimed_reg_debug(dev_priv, reg, false, true)
 
 #define GEN6_WRITE_FOOTER \
+	unclaimed_reg_debug(dev_priv, reg, false, false); \
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags)
 
 #define __gen6_write(x) \
@@ -892,13 +935,10 @@ hsw_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool t
 	if (NEEDS_FORCE_WAKE(offset)) { \
 		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
 	} \
-	hsw_unclaimed_reg_debug(dev_priv, reg, false, true); \
 	__raw_i915_write##x(dev_priv, reg, val); \
 	if (unlikely(__fifo_ret)) { \
 		gen6_gt_check_fifodbg(dev_priv); \
 	} \
-	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
-	hsw_unclaimed_reg_detect(dev_priv); \
 	GEN6_WRITE_FOOTER; \
 }
 
@@ -928,12 +968,9 @@ static bool is_gen8_shadowed(struct drm_i915_private *dev_priv,
 static void \
 gen8_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
 	GEN6_WRITE_HEADER; \
-	hsw_unclaimed_reg_debug(dev_priv, reg, false, true); \
 	if (NEEDS_FORCE_WAKE(offset) && !is_gen8_shadowed(dev_priv, reg)) \
 		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
 	__raw_i915_write##x(dev_priv, reg, val); \
-	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
-	hsw_unclaimed_reg_detect(dev_priv); \
 	GEN6_WRITE_FOOTER; \
 }
 
@@ -987,7 +1024,6 @@ gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
 		bool trace) { \
 	enum forcewake_domains fw_engine; \
 	GEN6_WRITE_HEADER; \
-	hsw_unclaimed_reg_debug(dev_priv, reg, false, true); \
 	if (!SKL_NEEDS_FORCE_WAKE(offset) || \
 	    is_gen9_shadowed(dev_priv, reg)) \
 		fw_engine = 0; \
@@ -1002,8 +1038,6 @@ gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
 	if (fw_engine) \
 		__force_wake_get(dev_priv, fw_engine); \
 	__raw_i915_write##x(dev_priv, reg, val); \
-	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
-	hsw_unclaimed_reg_detect(dev_priv); \
 	GEN6_WRITE_FOOTER; \
 }
 
@@ -1223,6 +1257,8 @@ void intel_uncore_init(struct drm_device *dev)
 	intel_uncore_fw_domains_init(dev);
 	__intel_uncore_early_sanitize(dev, false);
 
+	dev_priv->uncore.unclaimed_mmio_check = 1;
+
 	switch (INTEL_INFO(dev)->gen) {
 	default:
 	case 9:
@@ -1580,13 +1616,26 @@ bool intel_has_gpu_reset(struct drm_device *dev)
 	return intel_get_gpu_reset(dev) != NULL;
 }
 
-void intel_uncore_check_errors(struct drm_device *dev)
+bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	return check_for_unclaimed_mmio(dev_priv);
+}
+
+bool
+intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv)
+{
+	if (unlikely(i915.mmio_debug ||
+		     dev_priv->uncore.unclaimed_mmio_check <= 0))
+		return false;
 
-	if (HAS_FPGA_DBG_UNCLAIMED(dev) &&
-	    (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
-		DRM_ERROR("Unclaimed register before interrupt\n");
-		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+	if (unlikely(intel_uncore_unclaimed_mmio(dev_priv))) {
+		DRM_DEBUG("Unclaimed register detected, "
+			  "enabling oneshot unclaimed register reporting. "
+			  "Please use i915.mmio_debug=N for more information.\n");
+		i915.mmio_debug++;
+		dev_priv->uncore.unclaimed_mmio_check--;
+		return true;
 	}
+
+	return false;
 }
diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c
index 063825fecbe2..2a95d10e9d92 100644
--- a/drivers/gpu/drm/imx/dw_hdmi-imx.c
+++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c
@@ -109,13 +109,6 @@ static void dw_hdmi_imx_encoder_disable(struct drm_encoder *encoder)
 {
 }
 
-static bool dw_hdmi_imx_encoder_mode_fixup(struct drm_encoder *encoder,
-					   const struct drm_display_mode *mode,
-					   struct drm_display_mode *adj_mode)
-{
-	return true;
-}
-
 static void dw_hdmi_imx_encoder_mode_set(struct drm_encoder *encoder,
 					 struct drm_display_mode *mode,
 					 struct drm_display_mode *adj_mode)
@@ -125,7 +118,7 @@ static void dw_hdmi_imx_encoder_mode_set(struct drm_encoder *encoder,
 static void dw_hdmi_imx_encoder_commit(struct drm_encoder *encoder)
 {
 	struct imx_hdmi *hdmi = container_of(encoder, struct imx_hdmi, encoder);
-	int mux = imx_drm_encoder_get_mux_id(hdmi->dev->of_node, encoder);
+	int mux = drm_of_encoder_active_port_id(hdmi->dev->of_node, encoder);
 
 	regmap_update_bits(hdmi->regmap, IOMUXC_GPR3,
 			   IMX6Q_GPR3_HDMI_MUX_CTL_MASK,
@@ -138,7 +131,6 @@ static void dw_hdmi_imx_encoder_prepare(struct drm_encoder *encoder)
 }
 
 static const struct drm_encoder_helper_funcs dw_hdmi_imx_encoder_helper_funcs = {
-	.mode_fixup = dw_hdmi_imx_encoder_mode_fixup,
 	.mode_set   = dw_hdmi_imx_encoder_mode_set,
 	.prepare    = dw_hdmi_imx_encoder_prepare,
 	.commit     = dw_hdmi_imx_encoder_commit,
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 2f57d7967417..9876e0f0c3e1 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -17,7 +17,6 @@
 #include <linux/device.h>
 #include <linux/fb.h>
 #include <linux/module.h>
-#include <linux/of_graph.h>
 #include <linux/platform_device.h>
 #include <drm/drmP.h>
 #include <drm/drm_fb_helper.h>
@@ -171,18 +170,6 @@ static void imx_drm_disable_vblank(struct drm_device *drm, unsigned int pipe)
 	imx_drm_crtc->imx_drm_helper_funcs.disable_vblank(imx_drm_crtc->crtc);
 }
 
-static void imx_drm_driver_preclose(struct drm_device *drm,
-		struct drm_file *file)
-{
-	int i;
-
-	if (!file->is_master)
-		return;
-
-	for (i = 0; i < MAX_CRTC; i++)
-		imx_drm_disable_vblank(drm, i);
-}
-
 static const struct file_operations imx_drm_driver_fops = {
 	.owner = THIS_MODULE,
 	.open = drm_open,
@@ -424,36 +411,6 @@ int imx_drm_encoder_parse_of(struct drm_device *drm,
 }
 EXPORT_SYMBOL_GPL(imx_drm_encoder_parse_of);
 
-/*
- * @node: device tree node containing encoder input ports
- * @encoder: drm_encoder
- */
-int imx_drm_encoder_get_mux_id(struct device_node *node,
-			       struct drm_encoder *encoder)
-{
-	struct imx_drm_crtc *imx_crtc = imx_drm_find_crtc(encoder->crtc);
-	struct device_node *ep;
-	struct of_endpoint endpoint;
-	struct device_node *port;
-	int ret;
-
-	if (!node || !imx_crtc)
-		return -EINVAL;
-
-	for_each_endpoint_of_node(node, ep) {
-		port = of_graph_get_remote_port(ep);
-		of_node_put(port);
-		if (port == imx_crtc->crtc->port) {
-			ret = of_graph_parse_endpoint(ep, &endpoint);
-			of_node_put(ep);
-			return ret ? ret : endpoint.port;
-		}
-	}
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(imx_drm_encoder_get_mux_id);
-
 static const struct drm_ioctl_desc imx_drm_ioctls[] = {
 	/* none so far */
 };
@@ -463,7 +420,6 @@ static struct drm_driver imx_drm_driver = {
 	.load			= imx_drm_driver_load,
 	.unload			= imx_drm_driver_unload,
 	.lastclose		= imx_drm_driver_lastclose,
-	.preclose		= imx_drm_driver_preclose,
 	.set_busid		= drm_platform_set_busid,
 	.gem_free_object	= drm_gem_cma_free_object,
 	.gem_vm_ops		= &drm_gem_cma_vm_ops,
diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h
index 71cf6d9c714f..b0241b9d1334 100644
--- a/drivers/gpu/drm/imx/imx-drm.h
+++ b/drivers/gpu/drm/imx/imx-drm.h
@@ -46,8 +46,6 @@ int imx_drm_set_bus_format_pins(struct drm_encoder *encoder,
 int imx_drm_set_bus_format(struct drm_encoder *encoder,
 		u32 bus_format);
 
-int imx_drm_encoder_get_mux_id(struct device_node *node,
-		struct drm_encoder *encoder);
 int imx_drm_encoder_parse_of(struct drm_device *drm,
 	struct drm_encoder *encoder, struct device_node *np);
 
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index 22ac482231ed..a58eee59550a 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -19,6 +19,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
@@ -139,13 +140,6 @@ static void imx_ldb_encoder_dpms(struct drm_encoder *encoder, int mode)
 {
 }
 
-static bool imx_ldb_encoder_mode_fixup(struct drm_encoder *encoder,
-			   const struct drm_display_mode *mode,
-			   struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void imx_ldb_set_clock(struct imx_ldb *ldb, int mux, int chno,
 		unsigned long serial_clk, unsigned long di_clk)
 {
@@ -215,7 +209,7 @@ static void imx_ldb_encoder_commit(struct drm_encoder *encoder)
 	struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder);
 	struct imx_ldb *ldb = imx_ldb_ch->ldb;
 	int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN;
-	int mux = imx_drm_encoder_get_mux_id(imx_ldb_ch->child, encoder);
+	int mux = drm_of_encoder_active_port_id(imx_ldb_ch->child, encoder);
 
 	drm_panel_prepare(imx_ldb_ch->panel);
 
@@ -265,7 +259,7 @@ static void imx_ldb_encoder_mode_set(struct drm_encoder *encoder,
 	int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN;
 	unsigned long serial_clk;
 	unsigned long di_clk = mode->clock * 1000;
-	int mux = imx_drm_encoder_get_mux_id(imx_ldb_ch->child, encoder);
+	int mux = drm_of_encoder_active_port_id(imx_ldb_ch->child, encoder);
 
 	if (mode->clock > 170000) {
 		dev_warn(ldb->dev,
@@ -376,7 +370,6 @@ static const struct drm_encoder_funcs imx_ldb_encoder_funcs = {
 
 static const struct drm_encoder_helper_funcs imx_ldb_encoder_helper_funcs = {
 	.dpms = imx_ldb_encoder_dpms,
-	.mode_fixup = imx_ldb_encoder_mode_fixup,
 	.prepare = imx_ldb_encoder_prepare,
 	.commit = imx_ldb_encoder_commit,
 	.mode_set = imx_ldb_encoder_mode_set,
diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
index 292349f0b132..ae7a9fb3b8a2 100644
--- a/drivers/gpu/drm/imx/imx-tve.c
+++ b/drivers/gpu/drm/imx/imx-tve.c
@@ -286,13 +286,6 @@ static void imx_tve_encoder_dpms(struct drm_encoder *encoder, int mode)
 		dev_err(tve->dev, "failed to disable TVOUT: %d\n", ret);
 }
 
-static bool imx_tve_encoder_mode_fixup(struct drm_encoder *encoder,
-				       const struct drm_display_mode *mode,
-				       struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void imx_tve_encoder_prepare(struct drm_encoder *encoder)
 {
 	struct imx_tve *tve = enc_to_tve(encoder);
@@ -379,7 +372,6 @@ static const struct drm_encoder_funcs imx_tve_encoder_funcs = {
 
 static const struct drm_encoder_helper_funcs imx_tve_encoder_helper_funcs = {
 	.dpms = imx_tve_encoder_dpms,
-	.mode_fixup = imx_tve_encoder_mode_fixup,
 	.prepare = imx_tve_encoder_prepare,
 	.mode_set = imx_tve_encoder_mode_set,
 	.commit = imx_tve_encoder_commit,
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 287226311413..dee8e8b3523b 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -22,6 +22,8 @@
 #include <linux/fb.h>
 #include <linux/clk.h>
 #include <linux/errno.h>
+#include <linux/reservation.h>
+#include <linux/dma-buf.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 
@@ -31,6 +33,23 @@
 
 #define DRIVER_DESC		"i.MX IPUv3 Graphics"
 
+enum ipu_flip_status {
+	IPU_FLIP_NONE,
+	IPU_FLIP_PENDING,
+	IPU_FLIP_SUBMITTED,
+};
+
+struct ipu_flip_work {
+	struct work_struct		unref_work;
+	struct drm_gem_object		*bo;
+	struct drm_pending_vblank_event *page_flip_event;
+	struct work_struct		fence_work;
+	struct ipu_crtc			*crtc;
+	struct fence			*excl;
+	unsigned			shared_count;
+	struct fence			**shared;
+};
+
 struct ipu_crtc {
 	struct device		*dev;
 	struct drm_crtc		base;
@@ -42,8 +61,9 @@ struct ipu_crtc {
 	struct ipu_dc		*dc;
 	struct ipu_di		*di;
 	int			enabled;
-	struct drm_pending_vblank_event *page_flip_event;
-	struct drm_framebuffer	*newfb;
+	enum ipu_flip_status	flip_state;
+	struct workqueue_struct *flip_queue;
+	struct ipu_flip_work	*flip_work;
 	int			irq;
 	u32			bus_format;
 	int			di_hsync_pin;
@@ -104,15 +124,45 @@ static void ipu_crtc_dpms(struct drm_crtc *crtc, int mode)
 	}
 }
 
+static void ipu_flip_unref_work_func(struct work_struct *__work)
+{
+	struct ipu_flip_work *work =
+			container_of(__work, struct ipu_flip_work, unref_work);
+
+	drm_gem_object_unreference_unlocked(work->bo);
+	kfree(work);
+}
+
+static void ipu_flip_fence_work_func(struct work_struct *__work)
+{
+	struct ipu_flip_work *work =
+			container_of(__work, struct ipu_flip_work, fence_work);
+	int i;
+
+	/* wait for all fences attached to the FB obj to signal */
+	if (work->excl) {
+		fence_wait(work->excl, false);
+		fence_put(work->excl);
+	}
+	for (i = 0; i < work->shared_count; i++) {
+		fence_wait(work->shared[i], false);
+		fence_put(work->shared[i]);
+	}
+
+	work->crtc->flip_state = IPU_FLIP_SUBMITTED;
+}
+
 static int ipu_page_flip(struct drm_crtc *crtc,
 		struct drm_framebuffer *fb,
 		struct drm_pending_vblank_event *event,
 		uint32_t page_flip_flags)
 {
+	struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
 	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
+	struct ipu_flip_work *flip_work;
 	int ret;
 
-	if (ipu_crtc->newfb)
+	if (ipu_crtc->flip_state != IPU_FLIP_NONE)
 		return -EBUSY;
 
 	ret = imx_drm_crtc_vblank_get(ipu_crtc->imx_crtc);
@@ -123,11 +173,58 @@ static int ipu_page_flip(struct drm_crtc *crtc,
 		return ret;
 	}
 
-	ipu_crtc->newfb = fb;
-	ipu_crtc->page_flip_event = event;
-	crtc->primary->fb = fb;
+	flip_work = kzalloc(sizeof *flip_work, GFP_KERNEL);
+	if (!flip_work) {
+		ret = -ENOMEM;
+		goto put_vblank;
+	}
+	INIT_WORK(&flip_work->unref_work, ipu_flip_unref_work_func);
+	flip_work->page_flip_event = event;
+
+	/* get BO backing the old framebuffer and take a reference */
+	flip_work->bo = &drm_fb_cma_get_gem_obj(crtc->primary->fb, 0)->base;
+	drm_gem_object_reference(flip_work->bo);
+
+	ipu_crtc->flip_work = flip_work;
+	/*
+	 * If the object has a DMABUF attached, we need to wait on its fences
+	 * if there are any.
+	 */
+	if (cma_obj->base.dma_buf) {
+		INIT_WORK(&flip_work->fence_work, ipu_flip_fence_work_func);
+		flip_work->crtc = ipu_crtc;
+
+		ret = reservation_object_get_fences_rcu(
+				cma_obj->base.dma_buf->resv, &flip_work->excl,
+				&flip_work->shared_count, &flip_work->shared);
+
+		if (unlikely(ret)) {
+			DRM_ERROR("failed to get fences for buffer\n");
+			goto free_flip_work;
+		}
+
+		/* No need to queue the worker if the are no fences */
+		if (!flip_work->excl && !flip_work->shared_count) {
+			ipu_crtc->flip_state = IPU_FLIP_SUBMITTED;
+		} else {
+			ipu_crtc->flip_state = IPU_FLIP_PENDING;
+			queue_work(ipu_crtc->flip_queue,
+				   &flip_work->fence_work);
+		}
+	} else {
+		ipu_crtc->flip_state = IPU_FLIP_SUBMITTED;
+	}
 
 	return 0;
+
+free_flip_work:
+	drm_gem_object_unreference_unlocked(flip_work->bo);
+	kfree(flip_work);
+	ipu_crtc->flip_work = NULL;
+put_vblank:
+	imx_drm_crtc_vblank_put(ipu_crtc->imx_crtc);
+
+	return ret;
 }
 
 static const struct drm_crtc_funcs ipu_crtc_funcs = {
@@ -211,12 +308,12 @@ static void ipu_crtc_handle_pageflip(struct ipu_crtc *ipu_crtc)
 {
 	unsigned long flags;
 	struct drm_device *drm = ipu_crtc->base.dev;
+	struct ipu_flip_work *work = ipu_crtc->flip_work;
 
 	spin_lock_irqsave(&drm->event_lock, flags);
-	if (ipu_crtc->page_flip_event)
+	if (work->page_flip_event)
 		drm_crtc_send_vblank_event(&ipu_crtc->base,
-					   ipu_crtc->page_flip_event);
-	ipu_crtc->page_flip_event = NULL;
+					   work->page_flip_event);
 	imx_drm_crtc_vblank_put(ipu_crtc->imx_crtc);
 	spin_unlock_irqrestore(&drm->event_lock, flags);
 }
@@ -227,13 +324,15 @@ static irqreturn_t ipu_irq_handler(int irq, void *dev_id)
 
 	imx_drm_handle_vblank(ipu_crtc->imx_crtc);
 
-	if (ipu_crtc->newfb) {
+	if (ipu_crtc->flip_state == IPU_FLIP_SUBMITTED) {
 		struct ipu_plane *plane = ipu_crtc->plane[0];
 
-		ipu_crtc->newfb = NULL;
 		ipu_plane_set_base(plane, ipu_crtc->base.primary->fb,
 				   plane->x, plane->y);
 		ipu_crtc_handle_pageflip(ipu_crtc);
+		queue_work(ipu_crtc->flip_queue,
+			   &ipu_crtc->flip_work->unref_work);
+		ipu_crtc->flip_state = IPU_FLIP_NONE;
 	}
 
 	return IRQ_HANDLED;
@@ -282,6 +381,10 @@ static const struct drm_crtc_helper_funcs ipu_helper_funcs = {
 
 static int ipu_enable_vblank(struct drm_crtc *crtc)
 {
+	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
+
+	enable_irq(ipu_crtc->irq);
+
 	return 0;
 }
 
@@ -289,8 +392,7 @@ static void ipu_disable_vblank(struct drm_crtc *crtc)
 {
 	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
 
-	ipu_crtc->page_flip_event = NULL;
-	ipu_crtc->newfb = NULL;
+	disable_irq_nosync(ipu_crtc->irq);
 }
 
 static int ipu_set_interface_pix_fmt(struct drm_crtc *crtc,
@@ -401,6 +503,10 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
 		dev_err(ipu_crtc->dev, "irq request failed with %d.\n", ret);
 		goto err_put_plane_res;
 	}
+	/* Only enable IRQ when we actually need it to trigger work. */
+	disable_irq(ipu_crtc->irq);
+
+	ipu_crtc->flip_queue = create_singlethread_workqueue("ipu-crtc-flip");
 
 	return 0;
 
@@ -443,6 +549,7 @@ static void ipu_drm_unbind(struct device *dev, struct device *master,
 
 	imx_drm_remove_crtc(ipu_crtc->imx_crtc);
 
+	destroy_workqueue(ipu_crtc->flip_queue);
 	ipu_plane_put_resources(ipu_crtc->plane[0]);
 	ipu_put_resources(ipu_crtc);
 }
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 26bb1b626fe3..588827844f30 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -339,7 +339,7 @@ static int ipu_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	}
 
 	if (crtc != plane->crtc)
-		dev_info(plane->dev->dev, "crtc change: %p -> %p\n",
+		dev_dbg(plane->dev->dev, "crtc change: %p -> %p\n",
 				plane->crtc, crtc);
 	plane->crtc = crtc;
 
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index 0ffef172afb4..363e2c7741e2 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -112,13 +112,6 @@ static void imx_pd_encoder_dpms(struct drm_encoder *encoder, int mode)
 		drm_panel_enable(imxpd->panel);
 }
 
-static bool imx_pd_encoder_mode_fixup(struct drm_encoder *encoder,
-			   const struct drm_display_mode *mode,
-			   struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void imx_pd_encoder_prepare(struct drm_encoder *encoder)
 {
 	struct imx_parallel_display *imxpd = enc_to_imxpd(encoder);
@@ -166,7 +159,6 @@ static const struct drm_encoder_funcs imx_pd_encoder_funcs = {
 
 static const struct drm_encoder_helper_funcs imx_pd_encoder_helper_funcs = {
 	.dpms = imx_pd_encoder_dpms,
-	.mode_fixup = imx_pd_encoder_mode_fixup,
 	.prepare = imx_pd_encoder_prepare,
 	.commit = imx_pd_encoder_commit,
 	.mode_set = imx_pd_encoder_mode_set,
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index dc13c4857e6f..14e64e08909e 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -92,18 +92,6 @@ static inline void mga_wait_busy(struct mga_device *mdev)
 	} while ((status & 0x01) && time_before(jiffies, timeout));
 }
 
-/*
- * The core passes the desired mode to the CRTC code to see whether any
- * CRTC-specific modifications need to be made to it. We're in a position
- * to just pass that straight through, so this does nothing
- */
-static bool mga_crtc_mode_fixup(struct drm_crtc *crtc,
-				const struct drm_display_mode *mode,
-				struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 #define P_ARRAY_SIZE 9
 
 static int mga_g200se_set_plls(struct mga_device *mdev, long clock)
@@ -1410,7 +1398,6 @@ static const struct drm_crtc_funcs mga_crtc_funcs = {
 static const struct drm_crtc_helper_funcs mga_helper_funcs = {
 	.disable = mga_crtc_disable,
 	.dpms = mga_crtc_dpms,
-	.mode_fixup = mga_crtc_mode_fixup,
 	.mode_set = mga_crtc_mode_set,
 	.mode_set_base = mga_crtc_mode_set_base,
 	.prepare = mga_crtc_prepare,
@@ -1479,13 +1466,6 @@ void mga_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
  * These functions are analagous to those in the CRTC code, but are intended
  * to handle any encoder-specific limitations
  */
-static bool mga_encoder_mode_fixup(struct drm_encoder *encoder,
-				   const struct drm_display_mode *mode,
-				   struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void mga_encoder_mode_set(struct drm_encoder *encoder,
 				struct drm_display_mode *mode,
 				struct drm_display_mode *adjusted_mode)
@@ -1515,7 +1495,6 @@ static void mga_encoder_destroy(struct drm_encoder *encoder)
 
 static const struct drm_encoder_helper_funcs mga_encoder_helper_funcs = {
 	.dpms = mga_encoder_dpms,
-	.mode_fixup = mga_encoder_mode_fixup,
 	.mode_set = mga_encoder_mode_set,
 	.prepare = mga_encoder_prepare,
 	.commit = mga_encoder_commit,
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 065ad4138799..ddb4c9d097e4 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -12,6 +12,7 @@ msm-y := \
 	hdmi/hdmi_connector.o \
 	hdmi/hdmi_hdcp.o \
 	hdmi/hdmi_i2c.o \
+	hdmi/hdmi_phy.o \
 	hdmi/hdmi_phy_8960.o \
 	hdmi/hdmi_phy_8x60.o \
 	hdmi/hdmi_phy_8x74.o \
@@ -52,6 +53,8 @@ msm-y := \
 
 msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o
 msm-$(CONFIG_COMMON_CLK) += mdp/mdp4/mdp4_lvds_pll.o
+msm-$(CONFIG_COMMON_CLK) += hdmi/hdmi_pll_8960.o
+msm-$(CONFIG_COMMON_CLK) += hdmi/hdmi_phy_8996.o
 
 msm-$(CONFIG_DRM_MSM_DSI) += dsi/dsi.o \
 			mdp/mdp4/mdp4_dsi_encoder.o \
diff --git a/drivers/gpu/drm/msm/adreno/a2xx.xml.h b/drivers/gpu/drm/msm/adreno/a2xx.xml.h
index 9e2aceb4ffe6..fee24297fb92 100644
--- a/drivers/gpu/drm/msm/adreno/a2xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a2xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          ( 109916 bytes, from 2016-02-20 18:44:48)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/drivers/gpu/drm/msm/adreno/a3xx.xml.h b/drivers/gpu/drm/msm/adreno/a3xx.xml.h
index 97dc1c6ec107..27dabd5e57fb 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a3xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          ( 109916 bytes, from 2016-02-20 18:44:48)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
@@ -111,10 +112,14 @@ enum a3xx_vtx_fmt {
 	VFMT_8_8_SNORM = 53,
 	VFMT_8_8_8_SNORM = 54,
 	VFMT_8_8_8_8_SNORM = 55,
-	VFMT_10_10_10_2_UINT = 60,
-	VFMT_10_10_10_2_UNORM = 61,
-	VFMT_10_10_10_2_SINT = 62,
-	VFMT_10_10_10_2_SNORM = 63,
+	VFMT_10_10_10_2_UINT = 56,
+	VFMT_10_10_10_2_UNORM = 57,
+	VFMT_10_10_10_2_SINT = 58,
+	VFMT_10_10_10_2_SNORM = 59,
+	VFMT_2_10_10_10_UINT = 60,
+	VFMT_2_10_10_10_UNORM = 61,
+	VFMT_2_10_10_10_SINT = 62,
+	VFMT_2_10_10_10_SNORM = 63,
 };
 
 enum a3xx_tex_fmt {
@@ -138,10 +143,12 @@ enum a3xx_tex_fmt {
 	TFMT_DXT1 = 36,
 	TFMT_DXT3 = 37,
 	TFMT_DXT5 = 38,
+	TFMT_2_10_10_10_UNORM = 40,
 	TFMT_10_10_10_2_UNORM = 41,
 	TFMT_9_9_9_E5_FLOAT = 42,
 	TFMT_11_11_10_FLOAT = 43,
 	TFMT_A8_UNORM = 44,
+	TFMT_L8_UNORM = 45,
 	TFMT_L8_A8_UNORM = 47,
 	TFMT_8_UNORM = 48,
 	TFMT_8_8_UNORM = 49,
@@ -183,6 +190,8 @@ enum a3xx_tex_fmt {
 	TFMT_32_SINT = 92,
 	TFMT_32_32_SINT = 93,
 	TFMT_32_32_32_32_SINT = 95,
+	TFMT_2_10_10_10_UINT = 96,
+	TFMT_10_10_10_2_UINT = 97,
 	TFMT_ETC2_RG11_SNORM = 112,
 	TFMT_ETC2_RG11_UNORM = 113,
 	TFMT_ETC2_R11_SNORM = 114,
@@ -215,6 +224,9 @@ enum a3xx_color_fmt {
 	RB_R8_UINT = 14,
 	RB_R8_SINT = 15,
 	RB_R10G10B10A2_UNORM = 16,
+	RB_A2R10G10B10_UNORM = 17,
+	RB_R10G10B10A2_UINT = 18,
+	RB_A2R10G10B10_UINT = 19,
 	RB_A8_UNORM = 20,
 	RB_R8_UNORM = 21,
 	RB_R16_FLOAT = 24,
@@ -244,30 +256,273 @@ enum a3xx_color_fmt {
 	RB_R32G32B32A32_UINT = 59,
 };
 
+enum a3xx_cp_perfcounter_select {
+	CP_ALWAYS_COUNT = 0,
+	CP_AHB_PFPTRANS_WAIT = 3,
+	CP_AHB_NRTTRANS_WAIT = 6,
+	CP_CSF_NRT_READ_WAIT = 8,
+	CP_CSF_I1_FIFO_FULL = 9,
+	CP_CSF_I2_FIFO_FULL = 10,
+	CP_CSF_ST_FIFO_FULL = 11,
+	CP_RESERVED_12 = 12,
+	CP_CSF_RING_ROQ_FULL = 13,
+	CP_CSF_I1_ROQ_FULL = 14,
+	CP_CSF_I2_ROQ_FULL = 15,
+	CP_CSF_ST_ROQ_FULL = 16,
+	CP_RESERVED_17 = 17,
+	CP_MIU_TAG_MEM_FULL = 18,
+	CP_MIU_NRT_WRITE_STALLED = 22,
+	CP_MIU_NRT_READ_STALLED = 23,
+	CP_ME_REGS_RB_DONE_FIFO_FULL = 26,
+	CP_ME_REGS_VS_EVENT_FIFO_FULL = 27,
+	CP_ME_REGS_PS_EVENT_FIFO_FULL = 28,
+	CP_ME_REGS_CF_EVENT_FIFO_FULL = 29,
+	CP_ME_MICRO_RB_STARVED = 30,
+	CP_AHB_RBBM_DWORD_SENT = 40,
+	CP_ME_BUSY_CLOCKS = 41,
+	CP_ME_WAIT_CONTEXT_AVAIL = 42,
+	CP_PFP_TYPE0_PACKET = 43,
+	CP_PFP_TYPE3_PACKET = 44,
+	CP_CSF_RB_WPTR_NEQ_RPTR = 45,
+	CP_CSF_I1_SIZE_NEQ_ZERO = 46,
+	CP_CSF_I2_SIZE_NEQ_ZERO = 47,
+	CP_CSF_RBI1I2_FETCHING = 48,
+};
+
+enum a3xx_gras_tse_perfcounter_select {
+	GRAS_TSEPERF_INPUT_PRIM = 0,
+	GRAS_TSEPERF_INPUT_NULL_PRIM = 1,
+	GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2,
+	GRAS_TSEPERF_CLIPPED_PRIM = 3,
+	GRAS_TSEPERF_NEW_PRIM = 4,
+	GRAS_TSEPERF_ZERO_AREA_PRIM = 5,
+	GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6,
+	GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7,
+	GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8,
+	GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9,
+	GRAS_TSEPERF_PRE_CLIP_PRIM = 10,
+	GRAS_TSEPERF_POST_CLIP_PRIM = 11,
+	GRAS_TSEPERF_WORKING_CYCLES = 12,
+	GRAS_TSEPERF_PC_STARVE = 13,
+	GRAS_TSERASPERF_STALL = 14,
+};
+
+enum a3xx_gras_ras_perfcounter_select {
+	GRAS_RASPERF_16X16_TILES = 0,
+	GRAS_RASPERF_8X8_TILES = 1,
+	GRAS_RASPERF_4X4_TILES = 2,
+	GRAS_RASPERF_WORKING_CYCLES = 3,
+	GRAS_RASPERF_STALL_CYCLES_BY_RB = 4,
+	GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5,
+	GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6,
+};
+
+enum a3xx_hlsq_perfcounter_select {
+	HLSQ_PERF_SP_VS_CONSTANT = 0,
+	HLSQ_PERF_SP_VS_INSTRUCTIONS = 1,
+	HLSQ_PERF_SP_FS_CONSTANT = 2,
+	HLSQ_PERF_SP_FS_INSTRUCTIONS = 3,
+	HLSQ_PERF_TP_STATE = 4,
+	HLSQ_PERF_QUADS = 5,
+	HLSQ_PERF_PIXELS = 6,
+	HLSQ_PERF_VERTICES = 7,
+	HLSQ_PERF_FS8_THREADS = 8,
+	HLSQ_PERF_FS16_THREADS = 9,
+	HLSQ_PERF_FS32_THREADS = 10,
+	HLSQ_PERF_VS8_THREADS = 11,
+	HLSQ_PERF_VS16_THREADS = 12,
+	HLSQ_PERF_SP_VS_DATA_BYTES = 13,
+	HLSQ_PERF_SP_FS_DATA_BYTES = 14,
+	HLSQ_PERF_ACTIVE_CYCLES = 15,
+	HLSQ_PERF_STALL_CYCLES_SP_STATE = 16,
+	HLSQ_PERF_STALL_CYCLES_SP_VS = 17,
+	HLSQ_PERF_STALL_CYCLES_SP_FS = 18,
+	HLSQ_PERF_STALL_CYCLES_UCHE = 19,
+	HLSQ_PERF_RBBM_LOAD_CYCLES = 20,
+	HLSQ_PERF_DI_TO_VS_START_SP0 = 21,
+	HLSQ_PERF_DI_TO_FS_START_SP0 = 22,
+	HLSQ_PERF_VS_START_TO_DONE_SP0 = 23,
+	HLSQ_PERF_FS_START_TO_DONE_SP0 = 24,
+	HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25,
+	HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26,
+	HLSQ_PERF_UCHE_LATENCY_CYCLES = 27,
+	HLSQ_PERF_UCHE_LATENCY_COUNT = 28,
+};
+
+enum a3xx_pc_perfcounter_select {
+	PC_PCPERF_VISIBILITY_STREAMS = 0,
+	PC_PCPERF_TOTAL_INSTANCES = 1,
+	PC_PCPERF_PRIMITIVES_PC_VPC = 2,
+	PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3,
+	PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4,
+	PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5,
+	PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6,
+	PC_PCPERF_VERTICES_TO_VFD = 7,
+	PC_PCPERF_REUSED_VERTICES = 8,
+	PC_PCPERF_CYCLES_STALLED_BY_VFD = 9,
+	PC_PCPERF_CYCLES_STALLED_BY_TSE = 10,
+	PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11,
+	PC_PCPERF_CYCLES_IS_WORKING = 12,
+};
+
+enum a3xx_rb_perfcounter_select {
+	RB_RBPERF_ACTIVE_CYCLES_ANY = 0,
+	RB_RBPERF_ACTIVE_CYCLES_ALL = 1,
+	RB_RBPERF_STARVE_CYCLES_BY_SP = 2,
+	RB_RBPERF_STARVE_CYCLES_BY_RAS = 3,
+	RB_RBPERF_STARVE_CYCLES_BY_MARB = 4,
+	RB_RBPERF_STALL_CYCLES_BY_MARB = 5,
+	RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6,
+	RB_RBPERF_RB_MARB_DATA = 7,
+	RB_RBPERF_SP_RB_QUAD = 8,
+	RB_RBPERF_RAS_EARLY_Z_QUADS = 9,
+	RB_RBPERF_GMEM_CH0_READ = 10,
+	RB_RBPERF_GMEM_CH1_READ = 11,
+	RB_RBPERF_GMEM_CH0_WRITE = 12,
+	RB_RBPERF_GMEM_CH1_WRITE = 13,
+	RB_RBPERF_CP_CONTEXT_DONE = 14,
+	RB_RBPERF_CP_CACHE_FLUSH = 15,
+	RB_RBPERF_CP_ZPASS_DONE = 16,
+};
+
+enum a3xx_rbbm_perfcounter_select {
+	RBBM_ALAWYS_ON = 0,
+	RBBM_VBIF_BUSY = 1,
+	RBBM_TSE_BUSY = 2,
+	RBBM_RAS_BUSY = 3,
+	RBBM_PC_DCALL_BUSY = 4,
+	RBBM_PC_VSD_BUSY = 5,
+	RBBM_VFD_BUSY = 6,
+	RBBM_VPC_BUSY = 7,
+	RBBM_UCHE_BUSY = 8,
+	RBBM_VSC_BUSY = 9,
+	RBBM_HLSQ_BUSY = 10,
+	RBBM_ANY_RB_BUSY = 11,
+	RBBM_ANY_TEX_BUSY = 12,
+	RBBM_ANY_USP_BUSY = 13,
+	RBBM_ANY_MARB_BUSY = 14,
+	RBBM_ANY_ARB_BUSY = 15,
+	RBBM_AHB_STATUS_BUSY = 16,
+	RBBM_AHB_STATUS_STALLED = 17,
+	RBBM_AHB_STATUS_TXFR = 18,
+	RBBM_AHB_STATUS_TXFR_SPLIT = 19,
+	RBBM_AHB_STATUS_TXFR_ERROR = 20,
+	RBBM_AHB_STATUS_LONG_STALL = 21,
+	RBBM_RBBM_STATUS_MASKED = 22,
+};
+
 enum a3xx_sp_perfcounter_select {
+	SP_LM_LOAD_INSTRUCTIONS = 0,
+	SP_LM_STORE_INSTRUCTIONS = 1,
+	SP_LM_ATOMICS = 2,
+	SP_UCHE_LOAD_INSTRUCTIONS = 3,
+	SP_UCHE_STORE_INSTRUCTIONS = 4,
+	SP_UCHE_ATOMICS = 5,
+	SP_VS_TEX_INSTRUCTIONS = 6,
+	SP_VS_CFLOW_INSTRUCTIONS = 7,
+	SP_VS_EFU_INSTRUCTIONS = 8,
+	SP_VS_FULL_ALU_INSTRUCTIONS = 9,
+	SP_VS_HALF_ALU_INSTRUCTIONS = 10,
+	SP_FS_TEX_INSTRUCTIONS = 11,
 	SP_FS_CFLOW_INSTRUCTIONS = 12,
+	SP_FS_EFU_INSTRUCTIONS = 13,
 	SP_FS_FULL_ALU_INSTRUCTIONS = 14,
-	SP0_ICL1_MISSES = 26,
+	SP_FS_HALF_ALU_INSTRUCTIONS = 15,
+	SP_FS_BARY_INSTRUCTIONS = 16,
+	SP_VS_INSTRUCTIONS = 17,
+	SP_FS_INSTRUCTIONS = 18,
+	SP_ADDR_LOCK_COUNT = 19,
+	SP_UCHE_READ_TRANS = 20,
+	SP_UCHE_WRITE_TRANS = 21,
+	SP_EXPORT_VPC_TRANS = 22,
+	SP_EXPORT_RB_TRANS = 23,
+	SP_PIXELS_KILLED = 24,
+	SP_ICL1_REQUESTS = 25,
+	SP_ICL1_MISSES = 26,
+	SP_ICL0_REQUESTS = 27,
+	SP_ICL0_MISSES = 28,
 	SP_ALU_ACTIVE_CYCLES = 29,
+	SP_EFU_ACTIVE_CYCLES = 30,
+	SP_STALL_CYCLES_BY_VPC = 31,
+	SP_STALL_CYCLES_BY_TP = 32,
+	SP_STALL_CYCLES_BY_UCHE = 33,
+	SP_STALL_CYCLES_BY_RB = 34,
+	SP_ACTIVE_CYCLES_ANY = 35,
+	SP_ACTIVE_CYCLES_ALL = 36,
+};
+
+enum a3xx_tp_perfcounter_select {
+	TPL1_TPPERF_L1_REQUESTS = 0,
+	TPL1_TPPERF_TP0_L1_REQUESTS = 1,
+	TPL1_TPPERF_TP0_L1_MISSES = 2,
+	TPL1_TPPERF_TP1_L1_REQUESTS = 3,
+	TPL1_TPPERF_TP1_L1_MISSES = 4,
+	TPL1_TPPERF_TP2_L1_REQUESTS = 5,
+	TPL1_TPPERF_TP2_L1_MISSES = 6,
+	TPL1_TPPERF_TP3_L1_REQUESTS = 7,
+	TPL1_TPPERF_TP3_L1_MISSES = 8,
+	TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9,
+	TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10,
+	TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11,
+	TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12,
+	TPL1_TPPERF_BILINEAR_OPS = 13,
+	TPL1_TPPERF_QUADSQUADS_OFFSET = 14,
+	TPL1_TPPERF_QUADQUADS_SHADOW = 15,
+	TPL1_TPPERF_QUADS_ARRAY = 16,
+	TPL1_TPPERF_QUADS_PROJECTION = 17,
+	TPL1_TPPERF_QUADS_GRADIENT = 18,
+	TPL1_TPPERF_QUADS_1D2D = 19,
+	TPL1_TPPERF_QUADS_3DCUBE = 20,
+	TPL1_TPPERF_ZERO_LOD = 21,
+	TPL1_TPPERF_OUTPUT_TEXELS = 22,
+	TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23,
+	TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24,
+	TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25,
+	TPL1_TPPERF_LATENCY = 26,
+	TPL1_TPPERF_LATENCY_TRANS = 27,
 };
 
-enum a3xx_rop_code {
-	ROP_CLEAR = 0,
-	ROP_NOR = 1,
-	ROP_AND_INVERTED = 2,
-	ROP_COPY_INVERTED = 3,
-	ROP_AND_REVERSE = 4,
-	ROP_INVERT = 5,
-	ROP_XOR = 6,
-	ROP_NAND = 7,
-	ROP_AND = 8,
-	ROP_EQUIV = 9,
-	ROP_NOOP = 10,
-	ROP_OR_INVERTED = 11,
-	ROP_COPY = 12,
-	ROP_OR_REVERSE = 13,
-	ROP_OR = 14,
-	ROP_SET = 15,
+enum a3xx_vfd_perfcounter_select {
+	VFD_PERF_UCHE_BYTE_FETCHED = 0,
+	VFD_PERF_UCHE_TRANS = 1,
+	VFD_PERF_VPC_BYPASS_COMPONENTS = 2,
+	VFD_PERF_FETCH_INSTRUCTIONS = 3,
+	VFD_PERF_DECODE_INSTRUCTIONS = 4,
+	VFD_PERF_ACTIVE_CYCLES = 5,
+	VFD_PERF_STALL_CYCLES_UCHE = 6,
+	VFD_PERF_STALL_CYCLES_HLSQ = 7,
+	VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8,
+	VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9,
+};
+
+enum a3xx_vpc_perfcounter_select {
+	VPC_PERF_SP_LM_PRIMITIVES = 0,
+	VPC_PERF_COMPONENTS_FROM_SP = 1,
+	VPC_PERF_SP_LM_COMPONENTS = 2,
+	VPC_PERF_ACTIVE_CYCLES = 3,
+	VPC_PERF_STALL_CYCLES_LM = 4,
+	VPC_PERF_STALL_CYCLES_RAS = 5,
+};
+
+enum a3xx_uche_perfcounter_select {
+	UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0,
+	UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1,
+	UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2,
+	UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3,
+	UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4,
+	UCHE_UCHEPERF_READ_REQUESTS_TP = 8,
+	UCHE_UCHEPERF_READ_REQUESTS_VFD = 9,
+	UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10,
+	UCHE_UCHEPERF_READ_REQUESTS_MARB = 11,
+	UCHE_UCHEPERF_READ_REQUESTS_SP = 12,
+	UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13,
+	UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14,
+	UCHE_UCHEPERF_TAG_CHECK_FAILS = 15,
+	UCHE_UCHEPERF_EVICTS = 16,
+	UCHE_UCHEPERF_FLUSHES = 17,
+	UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18,
+	UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19,
+	UCHE_UCHEPERF_ACTIVE_CYCLES = 20,
 };
 
 enum a3xx_rb_blend_opcode {
@@ -1429,15 +1684,23 @@ static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_
 #define REG_A3XX_PC_RESTART_INDEX				0x000021ed
 
 #define REG_A3XX_HLSQ_CONTROL_0_REG				0x00002200
-#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK		0x00000010
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK		0x00000030
 #define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT		4
 static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
 {
 	return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
 }
 #define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE		0x00000040
+#define A3XX_HLSQ_CONTROL_0_REG_COMPUTEMODE			0x00000100
 #define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART			0x00000200
 #define A3XX_HLSQ_CONTROL_0_REG_RESERVED2			0x00000400
+#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK	0x00fff000
+#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT	12
+static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK;
+}
+#define A3XX_HLSQ_CONTROL_0_REG_FSONLYTEX			0x02000000
 #define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE			0x04000000
 #define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK			0x08000000
 #define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT		27
@@ -1451,17 +1714,39 @@ static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val)
 #define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT			0x80000000
 
 #define REG_A3XX_HLSQ_CONTROL_1_REG				0x00002201
-#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK		0x00000040
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK		0x000000c0
 #define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT		6
 static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
 {
 	return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
 }
 #define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE		0x00000100
-#define A3XX_HLSQ_CONTROL_1_REG_RESERVED1			0x00000200
-#define A3XX_HLSQ_CONTROL_1_REG_ZWCOORD				0x02000000
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK		0x00ff0000
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT		16
+static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK;
+}
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK		0xff000000
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT		24
+static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK;
+}
 
 #define REG_A3XX_HLSQ_CONTROL_2_REG				0x00002202
+#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK		0x000003fc
+#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT		2
+static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK;
+}
+#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK		0x03fc0000
+#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT		18
+static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK;
+}
 #define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK	0xfc000000
 #define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT	26
 static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
@@ -1478,13 +1763,13 @@ static inline uint32_t A3XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val)
 }
 
 #define REG_A3XX_HLSQ_VS_CONTROL_REG				0x00002204
-#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK		0x00000fff
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK		0x000003ff
 #define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT		0
 static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
 {
 	return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
 }
-#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK		0x00fff000
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK		0x001ff000
 #define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT	12
 static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
 {
@@ -1498,13 +1783,13 @@ static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
 }
 
 #define REG_A3XX_HLSQ_FS_CONTROL_REG				0x00002205
-#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK		0x00000fff
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK		0x000003ff
 #define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT		0
 static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
 {
 	return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
 }
-#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK		0x00fff000
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK		0x001ff000
 #define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT	12
 static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
 {
@@ -1518,13 +1803,13 @@ static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
 }
 
 #define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG			0x00002206
-#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK	0x0000ffff
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK	0x000001ff
 #define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT	0
 static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
 {
 	return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK;
 }
-#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK	0xffff0000
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK	0x01ff0000
 #define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT	16
 static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
 {
@@ -1532,13 +1817,13 @@ static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
 }
 
 #define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG			0x00002207
-#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK	0x0000ffff
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK	0x000001ff
 #define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT	0
 static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
 {
 	return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK;
 }
-#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK	0xffff0000
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK	0x01ff0000
 #define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT	16
 static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
 {
@@ -1620,12 +1905,24 @@ static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
 }
 
 #define REG_A3XX_VFD_CONTROL_1					0x00002241
-#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK			0x0000ffff
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK			0x0000000f
 #define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT			0
 static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
 {
 	return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
 }
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK			0x000000f0
+#define A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT			4
+static inline uint32_t A3XX_VFD_CONTROL_1_MAXTHRESHOLD(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_MAXTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MAXTHRESHOLD__MASK;
+}
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK			0x00000f00
+#define A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT			8
+static inline uint32_t A3XX_VFD_CONTROL_1_MINTHRESHOLD(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_MINTHRESHOLD__SHIFT) & A3XX_VFD_CONTROL_1_MINTHRESHOLD__MASK;
+}
 #define A3XX_VFD_CONTROL_1_REGID4VTX__MASK			0x00ff0000
 #define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT			16
 static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
@@ -2008,24 +2305,19 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffe
 	return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK;
 }
 #define A3XX_SP_VS_CTRL_REG0_CACHEINVALID			0x00000004
+#define A3XX_SP_VS_CTRL_REG0_ALUSCHMODE				0x00000008
 #define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
 #define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
 static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
 {
 	return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
 }
-#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0003fc00
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
 #define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
 static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
 {
 	return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
 }
-#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK		0x000c0000
-#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT		18
-static inline uint32_t A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
-{
-	return ((val) << A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK;
-}
 #define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK			0x00100000
 #define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT			20
 static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
@@ -2033,8 +2325,6 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
 	return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
 }
 #define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE			0x00200000
-#define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE			0x00400000
-#define A3XX_SP_VS_CTRL_REG0_COMPUTEMODE			0x00800000
 #define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK			0xff000000
 #define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT			24
 static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val)
@@ -2075,7 +2365,8 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
 {
 	return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
 }
-#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK		0xfff00000
+#define A3XX_SP_VS_PARAM_REG_POS2DMODE				0x00010000
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK		0x01f00000
 #define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT		20
 static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
 {
@@ -2085,24 +2376,26 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
 static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
 
 static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
-#define A3XX_SP_VS_OUT_REG_A_REGID__MASK			0x000001ff
+#define A3XX_SP_VS_OUT_REG_A_REGID__MASK			0x000000ff
 #define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT			0
 static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
 {
 	return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK;
 }
+#define A3XX_SP_VS_OUT_REG_A_HALF				0x00000100
 #define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK			0x00001e00
 #define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT			9
 static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
 {
 	return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
 }
-#define A3XX_SP_VS_OUT_REG_B_REGID__MASK			0x01ff0000
+#define A3XX_SP_VS_OUT_REG_B_REGID__MASK			0x00ff0000
 #define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT			16
 static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
 {
 	return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK;
 }
+#define A3XX_SP_VS_OUT_REG_B_HALF				0x01000000
 #define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK			0x1e000000
 #define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT			25
 static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
@@ -2113,25 +2406,25 @@ static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
 static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
 
 static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x0000007f
 #define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT			0
 static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
 {
 	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
 }
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK			0x00007f00
 #define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT			8
 static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
 {
 	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
 }
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK			0x007f0000
 #define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT			16
 static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
 {
 	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
 }
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK			0x7f000000
 #define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT			24
 static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
 {
@@ -2139,6 +2432,12 @@ static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
 }
 
 #define REG_A3XX_SP_VS_OBJ_OFFSET_REG				0x000022d4
+#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK	0x0000ffff
+#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT	0
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK;
+}
 #define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
 #define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
 static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
@@ -2155,8 +2454,38 @@ static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
 #define REG_A3XX_SP_VS_OBJ_START_REG				0x000022d5
 
 #define REG_A3XX_SP_VS_PVT_MEM_PARAM_REG			0x000022d6
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK	0x000000ff
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT	0
+static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK;
+}
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK	0x00ffff00
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT	8
+static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK;
+}
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK	0xff000000
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT	24
+static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK;
+}
 
 #define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG				0x000022d7
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK		0x0000001f
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT		0
+static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK;
+}
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK	0xffffffe0
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT	5
+static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val)
+{
+	return ((val >> 5) << A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK;
+}
 
 #define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG				0x000022d8
 
@@ -2182,24 +2511,22 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffe
 	return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK;
 }
 #define A3XX_SP_FS_CTRL_REG0_CACHEINVALID			0x00000004
+#define A3XX_SP_FS_CTRL_REG0_ALUSCHMODE				0x00000008
 #define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
 #define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
 static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
 {
 	return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
 }
-#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0003fc00
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0000fc00
 #define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
 static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
 {
 	return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
 }
-#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK		0x000c0000
-#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT		18
-static inline uint32_t A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
-{
-	return ((val) << A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK;
-}
+#define A3XX_SP_FS_CTRL_REG0_FSBYPASSENABLE			0x00020000
+#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP			0x00040000
+#define A3XX_SP_FS_CTRL_REG0_OUTORDERED				0x00080000
 #define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK			0x00100000
 #define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT			20
 static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
@@ -2235,7 +2562,7 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
 {
 	return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK;
 }
-#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK		0x3f000000
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK		0x7f000000
 #define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT		24
 static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
 {
@@ -2243,6 +2570,12 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
 }
 
 #define REG_A3XX_SP_FS_OBJ_OFFSET_REG				0x000022e2
+#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK	0x0000ffff
+#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT	0
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK;
+}
 #define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
 #define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
 static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
@@ -2259,8 +2592,38 @@ static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
 #define REG_A3XX_SP_FS_OBJ_START_REG				0x000022e3
 
 #define REG_A3XX_SP_FS_PVT_MEM_PARAM_REG			0x000022e4
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK	0x000000ff
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT	0
+static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK;
+}
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK	0x00ffff00
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT	8
+static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK;
+}
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK	0xff000000
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT	24
+static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK;
+}
 
 #define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG				0x000022e5
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK		0x0000001f
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT		0
+static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK;
+}
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK	0xffffffe0
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT	5
+static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val)
+{
+	return ((val >> 5) << A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK;
+}
 
 #define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG				0x000022e6
 
diff --git a/drivers/gpu/drm/msm/adreno/a4xx.xml.h b/drivers/gpu/drm/msm/adreno/a4xx.xml.h
index 99de8271dba8..3220b91f559a 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a4xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          ( 109916 bytes, from 2016-02-20 18:44:48)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
@@ -47,11 +48,13 @@ enum a4xx_color_fmt {
 	RB4_R8_UNORM = 2,
 	RB4_R4G4B4A4_UNORM = 8,
 	RB4_R5G5B5A1_UNORM = 10,
-	RB4_R5G6R5_UNORM = 14,
+	RB4_R5G6B5_UNORM = 14,
 	RB4_R8G8_UNORM = 15,
 	RB4_R8G8_SNORM = 16,
 	RB4_R8G8_UINT = 17,
 	RB4_R8G8_SINT = 18,
+	RB4_R16_UNORM = 19,
+	RB4_R16_SNORM = 20,
 	RB4_R16_FLOAT = 21,
 	RB4_R16_UINT = 22,
 	RB4_R16_SINT = 23,
@@ -63,12 +66,16 @@ enum a4xx_color_fmt {
 	RB4_R10G10B10A2_UNORM = 31,
 	RB4_R10G10B10A2_UINT = 34,
 	RB4_R11G11B10_FLOAT = 39,
+	RB4_R16G16_UNORM = 40,
+	RB4_R16G16_SNORM = 41,
 	RB4_R16G16_FLOAT = 42,
 	RB4_R16G16_UINT = 43,
 	RB4_R16G16_SINT = 44,
 	RB4_R32_FLOAT = 45,
 	RB4_R32_UINT = 46,
 	RB4_R32_SINT = 47,
+	RB4_R16G16B16A16_UNORM = 52,
+	RB4_R16G16B16A16_SNORM = 53,
 	RB4_R16G16B16A16_FLOAT = 54,
 	RB4_R16G16B16A16_UINT = 55,
 	RB4_R16G16B16A16_SINT = 56,
@@ -106,6 +113,7 @@ enum a4xx_vtx_fmt {
 	VFMT4_32_32_FIXED = 10,
 	VFMT4_32_32_32_FIXED = 11,
 	VFMT4_32_32_32_32_FIXED = 12,
+	VFMT4_11_11_10_FLOAT = 13,
 	VFMT4_16_SINT = 16,
 	VFMT4_16_16_SINT = 17,
 	VFMT4_16_16_16_SINT = 18,
@@ -146,52 +154,76 @@ enum a4xx_vtx_fmt {
 	VFMT4_8_8_SNORM = 53,
 	VFMT4_8_8_8_SNORM = 54,
 	VFMT4_8_8_8_8_SNORM = 55,
-	VFMT4_10_10_10_2_UINT = 60,
-	VFMT4_10_10_10_2_UNORM = 61,
-	VFMT4_10_10_10_2_SINT = 62,
-	VFMT4_10_10_10_2_SNORM = 63,
+	VFMT4_10_10_10_2_UINT = 56,
+	VFMT4_10_10_10_2_UNORM = 57,
+	VFMT4_10_10_10_2_SINT = 58,
+	VFMT4_10_10_10_2_SNORM = 59,
+	VFMT4_2_10_10_10_UINT = 60,
+	VFMT4_2_10_10_10_UNORM = 61,
+	VFMT4_2_10_10_10_SINT = 62,
+	VFMT4_2_10_10_10_SNORM = 63,
 };
 
 enum a4xx_tex_fmt {
-	TFMT4_5_6_5_UNORM = 11,
-	TFMT4_5_5_5_1_UNORM = 10,
-	TFMT4_4_4_4_4_UNORM = 8,
-	TFMT4_X8Z24_UNORM = 71,
-	TFMT4_10_10_10_2_UNORM = 33,
 	TFMT4_A8_UNORM = 3,
-	TFMT4_L8_A8_UNORM = 13,
 	TFMT4_8_UNORM = 4,
-	TFMT4_8_8_UNORM = 14,
-	TFMT4_8_8_8_8_UNORM = 28,
 	TFMT4_8_SNORM = 5,
-	TFMT4_8_8_SNORM = 15,
-	TFMT4_8_8_8_8_SNORM = 29,
 	TFMT4_8_UINT = 6,
-	TFMT4_8_8_UINT = 16,
-	TFMT4_8_8_8_8_UINT = 30,
 	TFMT4_8_SINT = 7,
+	TFMT4_4_4_4_4_UNORM = 8,
+	TFMT4_5_5_5_1_UNORM = 9,
+	TFMT4_5_6_5_UNORM = 11,
+	TFMT4_L8_A8_UNORM = 13,
+	TFMT4_8_8_UNORM = 14,
+	TFMT4_8_8_SNORM = 15,
+	TFMT4_8_8_UINT = 16,
 	TFMT4_8_8_SINT = 17,
-	TFMT4_8_8_8_8_SINT = 31,
+	TFMT4_16_UNORM = 18,
+	TFMT4_16_SNORM = 19,
+	TFMT4_16_FLOAT = 20,
 	TFMT4_16_UINT = 21,
-	TFMT4_16_16_UINT = 41,
-	TFMT4_16_16_16_16_UINT = 54,
 	TFMT4_16_SINT = 22,
+	TFMT4_8_8_8_8_UNORM = 28,
+	TFMT4_8_8_8_8_SNORM = 29,
+	TFMT4_8_8_8_8_UINT = 30,
+	TFMT4_8_8_8_8_SINT = 31,
+	TFMT4_9_9_9_E5_FLOAT = 32,
+	TFMT4_10_10_10_2_UNORM = 33,
+	TFMT4_10_10_10_2_UINT = 34,
+	TFMT4_11_11_10_FLOAT = 37,
+	TFMT4_16_16_UNORM = 38,
+	TFMT4_16_16_SNORM = 39,
+	TFMT4_16_16_FLOAT = 40,
+	TFMT4_16_16_UINT = 41,
 	TFMT4_16_16_SINT = 42,
-	TFMT4_16_16_16_16_SINT = 55,
+	TFMT4_32_FLOAT = 43,
 	TFMT4_32_UINT = 44,
-	TFMT4_32_32_UINT = 57,
-	TFMT4_32_32_32_32_UINT = 64,
 	TFMT4_32_SINT = 45,
-	TFMT4_32_32_SINT = 58,
-	TFMT4_32_32_32_32_SINT = 65,
-	TFMT4_16_FLOAT = 20,
-	TFMT4_16_16_FLOAT = 40,
+	TFMT4_16_16_16_16_UNORM = 51,
+	TFMT4_16_16_16_16_SNORM = 52,
 	TFMT4_16_16_16_16_FLOAT = 53,
-	TFMT4_32_FLOAT = 43,
+	TFMT4_16_16_16_16_UINT = 54,
+	TFMT4_16_16_16_16_SINT = 55,
 	TFMT4_32_32_FLOAT = 56,
+	TFMT4_32_32_UINT = 57,
+	TFMT4_32_32_SINT = 58,
+	TFMT4_32_32_32_FLOAT = 59,
+	TFMT4_32_32_32_UINT = 60,
+	TFMT4_32_32_32_SINT = 61,
 	TFMT4_32_32_32_32_FLOAT = 63,
-	TFMT4_9_9_9_E5_FLOAT = 32,
-	TFMT4_11_11_10_FLOAT = 37,
+	TFMT4_32_32_32_32_UINT = 64,
+	TFMT4_32_32_32_32_SINT = 65,
+	TFMT4_X8Z24_UNORM = 71,
+	TFMT4_DXT1 = 86,
+	TFMT4_DXT3 = 87,
+	TFMT4_DXT5 = 88,
+	TFMT4_RGTC1_UNORM = 90,
+	TFMT4_RGTC1_SNORM = 91,
+	TFMT4_RGTC2_UNORM = 94,
+	TFMT4_RGTC2_SNORM = 95,
+	TFMT4_BPTC_UFLOAT = 97,
+	TFMT4_BPTC_FLOAT = 98,
+	TFMT4_BPTC = 99,
 	TFMT4_ATC_RGB = 100,
 	TFMT4_ATC_RGBA_EXPLICIT = 101,
 	TFMT4_ATC_RGBA_INTERPOLATED = 102,
@@ -240,6 +272,545 @@ enum a4xx_tess_spacing {
 	EVEN_SPACING = 3,
 };
 
+enum a4xx_ccu_perfcounter_select {
+	CCU_BUSY_CYCLES = 0,
+	CCU_RB_DEPTH_RETURN_STALL = 2,
+	CCU_RB_COLOR_RETURN_STALL = 3,
+	CCU_DEPTH_BLOCKS = 6,
+	CCU_COLOR_BLOCKS = 7,
+	CCU_DEPTH_BLOCK_HIT = 8,
+	CCU_COLOR_BLOCK_HIT = 9,
+	CCU_DEPTH_FLAG1_COUNT = 10,
+	CCU_DEPTH_FLAG2_COUNT = 11,
+	CCU_DEPTH_FLAG3_COUNT = 12,
+	CCU_DEPTH_FLAG4_COUNT = 13,
+	CCU_COLOR_FLAG1_COUNT = 14,
+	CCU_COLOR_FLAG2_COUNT = 15,
+	CCU_COLOR_FLAG3_COUNT = 16,
+	CCU_COLOR_FLAG4_COUNT = 17,
+	CCU_PARTIAL_BLOCK_READ = 18,
+};
+
+enum a4xx_cp_perfcounter_select {
+	CP_ALWAYS_COUNT = 0,
+	CP_BUSY = 1,
+	CP_PFP_IDLE = 2,
+	CP_PFP_BUSY_WORKING = 3,
+	CP_PFP_STALL_CYCLES_ANY = 4,
+	CP_PFP_STARVE_CYCLES_ANY = 5,
+	CP_PFP_STARVED_PER_LOAD_ADDR = 6,
+	CP_PFP_STALLED_PER_STORE_ADDR = 7,
+	CP_PFP_PC_PROFILE = 8,
+	CP_PFP_MATCH_PM4_PKT_PROFILE = 9,
+	CP_PFP_COND_INDIRECT_DISCARDED = 10,
+	CP_LONG_RESUMPTIONS = 11,
+	CP_RESUME_CYCLES = 12,
+	CP_RESUME_TO_BOUNDARY_CYCLES = 13,
+	CP_LONG_PREEMPTIONS = 14,
+	CP_PREEMPT_CYCLES = 15,
+	CP_PREEMPT_TO_BOUNDARY_CYCLES = 16,
+	CP_ME_FIFO_EMPTY_PFP_IDLE = 17,
+	CP_ME_FIFO_EMPTY_PFP_BUSY = 18,
+	CP_ME_FIFO_NOT_EMPTY_NOT_FULL = 19,
+	CP_ME_FIFO_FULL_ME_BUSY = 20,
+	CP_ME_FIFO_FULL_ME_NON_WORKING = 21,
+	CP_ME_WAITING_FOR_PACKETS = 22,
+	CP_ME_BUSY_WORKING = 23,
+	CP_ME_STARVE_CYCLES_ANY = 24,
+	CP_ME_STARVE_CYCLES_PER_PROFILE = 25,
+	CP_ME_STALL_CYCLES_PER_PROFILE = 26,
+	CP_ME_PC_PROFILE = 27,
+	CP_RCIU_FIFO_EMPTY = 28,
+	CP_RCIU_FIFO_NOT_EMPTY_NOT_FULL = 29,
+	CP_RCIU_FIFO_FULL = 30,
+	CP_RCIU_FIFO_FULL_NO_CONTEXT = 31,
+	CP_RCIU_FIFO_FULL_AHB_MASTER = 32,
+	CP_RCIU_FIFO_FULL_OTHER = 33,
+	CP_AHB_IDLE = 34,
+	CP_AHB_STALL_ON_GRANT_NO_SPLIT = 35,
+	CP_AHB_STALL_ON_GRANT_SPLIT = 36,
+	CP_AHB_STALL_ON_GRANT_SPLIT_PROFILE = 37,
+	CP_AHB_BUSY_WORKING = 38,
+	CP_AHB_BUSY_STALL_ON_HRDY = 39,
+	CP_AHB_BUSY_STALL_ON_HRDY_PROFILE = 40,
+};
+
+enum a4xx_gras_ras_perfcounter_select {
+	RAS_SUPER_TILES = 0,
+	RAS_8X8_TILES = 1,
+	RAS_4X4_TILES = 2,
+	RAS_BUSY_CYCLES = 3,
+	RAS_STALL_CYCLES_BY_RB = 4,
+	RAS_STALL_CYCLES_BY_VSC = 5,
+	RAS_STARVE_CYCLES_BY_TSE = 6,
+	RAS_SUPERTILE_CYCLES = 7,
+	RAS_TILE_CYCLES = 8,
+	RAS_FULLY_COVERED_SUPER_TILES = 9,
+	RAS_FULLY_COVERED_8X8_TILES = 10,
+	RAS_4X4_PRIM = 11,
+	RAS_8X4_4X8_PRIM = 12,
+	RAS_8X8_PRIM = 13,
+};
+
+enum a4xx_gras_tse_perfcounter_select {
+	TSE_INPUT_PRIM = 0,
+	TSE_INPUT_NULL_PRIM = 1,
+	TSE_TRIVAL_REJ_PRIM = 2,
+	TSE_CLIPPED_PRIM = 3,
+	TSE_NEW_PRIM = 4,
+	TSE_ZERO_AREA_PRIM = 5,
+	TSE_FACENESS_CULLED_PRIM = 6,
+	TSE_ZERO_PIXEL_PRIM = 7,
+	TSE_OUTPUT_NULL_PRIM = 8,
+	TSE_OUTPUT_VISIBLE_PRIM = 9,
+	TSE_PRE_CLIP_PRIM = 10,
+	TSE_POST_CLIP_PRIM = 11,
+	TSE_BUSY_CYCLES = 12,
+	TSE_PC_STARVE = 13,
+	TSE_RAS_STALL = 14,
+	TSE_STALL_BARYPLANE_FIFO_FULL = 15,
+	TSE_STALL_ZPLANE_FIFO_FULL = 16,
+};
+
+enum a4xx_hlsq_perfcounter_select {
+	HLSQ_SP_VS_STAGE_CONSTANT = 0,
+	HLSQ_SP_VS_STAGE_INSTRUCTIONS = 1,
+	HLSQ_SP_FS_STAGE_CONSTANT = 2,
+	HLSQ_SP_FS_STAGE_INSTRUCTIONS = 3,
+	HLSQ_TP_STATE = 4,
+	HLSQ_QUADS = 5,
+	HLSQ_PIXELS = 6,
+	HLSQ_VERTICES = 7,
+	HLSQ_SP_VS_STAGE_DATA_BYTES = 13,
+	HLSQ_SP_FS_STAGE_DATA_BYTES = 14,
+	HLSQ_BUSY_CYCLES = 15,
+	HLSQ_STALL_CYCLES_SP_STATE = 16,
+	HLSQ_STALL_CYCLES_SP_VS_STAGE = 17,
+	HLSQ_STALL_CYCLES_SP_FS_STAGE = 18,
+	HLSQ_STALL_CYCLES_UCHE = 19,
+	HLSQ_RBBM_LOAD_CYCLES = 20,
+	HLSQ_DI_TO_VS_START_SP = 21,
+	HLSQ_DI_TO_FS_START_SP = 22,
+	HLSQ_VS_STAGE_START_TO_DONE_SP = 23,
+	HLSQ_FS_STAGE_START_TO_DONE_SP = 24,
+	HLSQ_SP_STATE_COPY_CYCLES_VS_STAGE = 25,
+	HLSQ_SP_STATE_COPY_CYCLES_FS_STAGE = 26,
+	HLSQ_UCHE_LATENCY_CYCLES = 27,
+	HLSQ_UCHE_LATENCY_COUNT = 28,
+	HLSQ_STARVE_CYCLES_VFD = 29,
+};
+
+enum a4xx_pc_perfcounter_select {
+	PC_VIS_STREAMS_LOADED = 0,
+	PC_VPC_PRIMITIVES = 2,
+	PC_DEAD_PRIM = 3,
+	PC_LIVE_PRIM = 4,
+	PC_DEAD_DRAWCALLS = 5,
+	PC_LIVE_DRAWCALLS = 6,
+	PC_VERTEX_MISSES = 7,
+	PC_STALL_CYCLES_VFD = 9,
+	PC_STALL_CYCLES_TSE = 10,
+	PC_STALL_CYCLES_UCHE = 11,
+	PC_WORKING_CYCLES = 12,
+	PC_IA_VERTICES = 13,
+	PC_GS_PRIMITIVES = 14,
+	PC_HS_INVOCATIONS = 15,
+	PC_DS_INVOCATIONS = 16,
+	PC_DS_PRIMITIVES = 17,
+	PC_STARVE_CYCLES_FOR_INDEX = 20,
+	PC_STARVE_CYCLES_FOR_TESS_FACTOR = 21,
+	PC_STARVE_CYCLES_FOR_VIZ_STREAM = 22,
+	PC_STALL_CYCLES_TESS = 23,
+	PC_STARVE_CYCLES_FOR_POSITION = 24,
+	PC_MODE0_DRAWCALL = 25,
+	PC_MODE1_DRAWCALL = 26,
+	PC_MODE2_DRAWCALL = 27,
+	PC_MODE3_DRAWCALL = 28,
+	PC_MODE4_DRAWCALL = 29,
+	PC_PREDICATED_DEAD_DRAWCALL = 30,
+	PC_STALL_CYCLES_BY_TSE_ONLY = 31,
+	PC_STALL_CYCLES_BY_VPC_ONLY = 32,
+	PC_VPC_POS_DATA_TRANSACTION = 33,
+	PC_BUSY_CYCLES = 34,
+	PC_STARVE_CYCLES_DI = 35,
+	PC_STALL_CYCLES_VPC = 36,
+	TESS_WORKING_CYCLES = 37,
+	TESS_NUM_CYCLES_SETUP_WORKING = 38,
+	TESS_NUM_CYCLES_PTGEN_WORKING = 39,
+	TESS_NUM_CYCLES_CONNGEN_WORKING = 40,
+	TESS_BUSY_CYCLES = 41,
+	TESS_STARVE_CYCLES_PC = 42,
+	TESS_STALL_CYCLES_PC = 43,
+};
+
+enum a4xx_pwr_perfcounter_select {
+	PWR_CORE_CLOCK_CYCLES = 0,
+	PWR_BUSY_CLOCK_CYCLES = 1,
+};
+
+enum a4xx_rb_perfcounter_select {
+	RB_BUSY_CYCLES = 0,
+	RB_BUSY_CYCLES_BINNING = 1,
+	RB_BUSY_CYCLES_RENDERING = 2,
+	RB_BUSY_CYCLES_RESOLVE = 3,
+	RB_STARVE_CYCLES_BY_SP = 4,
+	RB_STARVE_CYCLES_BY_RAS = 5,
+	RB_STARVE_CYCLES_BY_MARB = 6,
+	RB_STALL_CYCLES_BY_MARB = 7,
+	RB_STALL_CYCLES_BY_HLSQ = 8,
+	RB_RB_RB_MARB_DATA = 9,
+	RB_SP_RB_QUAD = 10,
+	RB_RAS_RB_Z_QUADS = 11,
+	RB_GMEM_CH0_READ = 12,
+	RB_GMEM_CH1_READ = 13,
+	RB_GMEM_CH0_WRITE = 14,
+	RB_GMEM_CH1_WRITE = 15,
+	RB_CP_CONTEXT_DONE = 16,
+	RB_CP_CACHE_FLUSH = 17,
+	RB_CP_ZPASS_DONE = 18,
+	RB_STALL_FIFO0_FULL = 19,
+	RB_STALL_FIFO1_FULL = 20,
+	RB_STALL_FIFO2_FULL = 21,
+	RB_STALL_FIFO3_FULL = 22,
+	RB_RB_HLSQ_TRANSACTIONS = 23,
+	RB_Z_READ = 24,
+	RB_Z_WRITE = 25,
+	RB_C_READ = 26,
+	RB_C_WRITE = 27,
+	RB_C_READ_LATENCY = 28,
+	RB_Z_READ_LATENCY = 29,
+	RB_STALL_BY_UCHE = 30,
+	RB_MARB_UCHE_TRANSACTIONS = 31,
+	RB_CACHE_STALL_MISS = 32,
+	RB_CACHE_STALL_FIFO_FULL = 33,
+	RB_8BIT_BLENDER_UNITS_ACTIVE = 34,
+	RB_16BIT_BLENDER_UNITS_ACTIVE = 35,
+	RB_SAMPLER_UNITS_ACTIVE = 36,
+	RB_TOTAL_PASS = 38,
+	RB_Z_PASS = 39,
+	RB_Z_FAIL = 40,
+	RB_S_FAIL = 41,
+	RB_POWER0 = 42,
+	RB_POWER1 = 43,
+	RB_POWER2 = 44,
+	RB_POWER3 = 45,
+	RB_POWER4 = 46,
+	RB_POWER5 = 47,
+	RB_POWER6 = 48,
+	RB_POWER7 = 49,
+};
+
+enum a4xx_rbbm_perfcounter_select {
+	RBBM_ALWAYS_ON = 0,
+	RBBM_VBIF_BUSY = 1,
+	RBBM_TSE_BUSY = 2,
+	RBBM_RAS_BUSY = 3,
+	RBBM_PC_DCALL_BUSY = 4,
+	RBBM_PC_VSD_BUSY = 5,
+	RBBM_VFD_BUSY = 6,
+	RBBM_VPC_BUSY = 7,
+	RBBM_UCHE_BUSY = 8,
+	RBBM_VSC_BUSY = 9,
+	RBBM_HLSQ_BUSY = 10,
+	RBBM_ANY_RB_BUSY = 11,
+	RBBM_ANY_TPL1_BUSY = 12,
+	RBBM_ANY_SP_BUSY = 13,
+	RBBM_ANY_MARB_BUSY = 14,
+	RBBM_ANY_ARB_BUSY = 15,
+	RBBM_AHB_STATUS_BUSY = 16,
+	RBBM_AHB_STATUS_STALLED = 17,
+	RBBM_AHB_STATUS_TXFR = 18,
+	RBBM_AHB_STATUS_TXFR_SPLIT = 19,
+	RBBM_AHB_STATUS_TXFR_ERROR = 20,
+	RBBM_AHB_STATUS_LONG_STALL = 21,
+	RBBM_STATUS_MASKED = 22,
+	RBBM_CP_BUSY_GFX_CORE_IDLE = 23,
+	RBBM_TESS_BUSY = 24,
+	RBBM_COM_BUSY = 25,
+	RBBM_DCOM_BUSY = 32,
+	RBBM_ANY_CCU_BUSY = 33,
+	RBBM_DPM_BUSY = 34,
+};
+
+enum a4xx_sp_perfcounter_select {
+	SP_LM_LOAD_INSTRUCTIONS = 0,
+	SP_LM_STORE_INSTRUCTIONS = 1,
+	SP_LM_ATOMICS = 2,
+	SP_GM_LOAD_INSTRUCTIONS = 3,
+	SP_GM_STORE_INSTRUCTIONS = 4,
+	SP_GM_ATOMICS = 5,
+	SP_VS_STAGE_TEX_INSTRUCTIONS = 6,
+	SP_VS_STAGE_CFLOW_INSTRUCTIONS = 7,
+	SP_VS_STAGE_EFU_INSTRUCTIONS = 8,
+	SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 9,
+	SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 10,
+	SP_FS_STAGE_TEX_INSTRUCTIONS = 11,
+	SP_FS_STAGE_CFLOW_INSTRUCTIONS = 12,
+	SP_FS_STAGE_EFU_INSTRUCTIONS = 13,
+	SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 14,
+	SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 15,
+	SP_VS_INSTRUCTIONS = 17,
+	SP_FS_INSTRUCTIONS = 18,
+	SP_ADDR_LOCK_COUNT = 19,
+	SP_UCHE_READ_TRANS = 20,
+	SP_UCHE_WRITE_TRANS = 21,
+	SP_EXPORT_VPC_TRANS = 22,
+	SP_EXPORT_RB_TRANS = 23,
+	SP_PIXELS_KILLED = 24,
+	SP_ICL1_REQUESTS = 25,
+	SP_ICL1_MISSES = 26,
+	SP_ICL0_REQUESTS = 27,
+	SP_ICL0_MISSES = 28,
+	SP_ALU_WORKING_CYCLES = 29,
+	SP_EFU_WORKING_CYCLES = 30,
+	SP_STALL_CYCLES_BY_VPC = 31,
+	SP_STALL_CYCLES_BY_TP = 32,
+	SP_STALL_CYCLES_BY_UCHE = 33,
+	SP_STALL_CYCLES_BY_RB = 34,
+	SP_BUSY_CYCLES = 35,
+	SP_HS_INSTRUCTIONS = 36,
+	SP_DS_INSTRUCTIONS = 37,
+	SP_GS_INSTRUCTIONS = 38,
+	SP_CS_INSTRUCTIONS = 39,
+	SP_SCHEDULER_NON_WORKING = 40,
+	SP_WAVE_CONTEXTS = 41,
+	SP_WAVE_CONTEXT_CYCLES = 42,
+	SP_POWER0 = 43,
+	SP_POWER1 = 44,
+	SP_POWER2 = 45,
+	SP_POWER3 = 46,
+	SP_POWER4 = 47,
+	SP_POWER5 = 48,
+	SP_POWER6 = 49,
+	SP_POWER7 = 50,
+	SP_POWER8 = 51,
+	SP_POWER9 = 52,
+	SP_POWER10 = 53,
+	SP_POWER11 = 54,
+	SP_POWER12 = 55,
+	SP_POWER13 = 56,
+	SP_POWER14 = 57,
+	SP_POWER15 = 58,
+};
+
+enum a4xx_tp_perfcounter_select {
+	TP_L1_REQUESTS = 0,
+	TP_L1_MISSES = 1,
+	TP_QUADS_OFFSET = 8,
+	TP_QUAD_SHADOW = 9,
+	TP_QUADS_ARRAY = 10,
+	TP_QUADS_GRADIENT = 11,
+	TP_QUADS_1D2D = 12,
+	TP_QUADS_3DCUBE = 13,
+	TP_BUSY_CYCLES = 16,
+	TP_STALL_CYCLES_BY_ARB = 17,
+	TP_STATE_CACHE_REQUESTS = 20,
+	TP_STATE_CACHE_MISSES = 21,
+	TP_POWER0 = 22,
+	TP_POWER1 = 23,
+	TP_POWER2 = 24,
+	TP_POWER3 = 25,
+	TP_POWER4 = 26,
+	TP_POWER5 = 27,
+	TP_POWER6 = 28,
+	TP_POWER7 = 29,
+};
+
+enum a4xx_uche_perfcounter_select {
+	UCHE_VBIF_READ_BEATS_TP = 0,
+	UCHE_VBIF_READ_BEATS_VFD = 1,
+	UCHE_VBIF_READ_BEATS_HLSQ = 2,
+	UCHE_VBIF_READ_BEATS_MARB = 3,
+	UCHE_VBIF_READ_BEATS_SP = 4,
+	UCHE_READ_REQUESTS_TP = 5,
+	UCHE_READ_REQUESTS_VFD = 6,
+	UCHE_READ_REQUESTS_HLSQ = 7,
+	UCHE_READ_REQUESTS_MARB = 8,
+	UCHE_READ_REQUESTS_SP = 9,
+	UCHE_WRITE_REQUESTS_MARB = 10,
+	UCHE_WRITE_REQUESTS_SP = 11,
+	UCHE_TAG_CHECK_FAILS = 12,
+	UCHE_EVICTS = 13,
+	UCHE_FLUSHES = 14,
+	UCHE_VBIF_LATENCY_CYCLES = 15,
+	UCHE_VBIF_LATENCY_SAMPLES = 16,
+	UCHE_BUSY_CYCLES = 17,
+	UCHE_VBIF_READ_BEATS_PC = 18,
+	UCHE_READ_REQUESTS_PC = 19,
+	UCHE_WRITE_REQUESTS_VPC = 20,
+	UCHE_STALL_BY_VBIF = 21,
+	UCHE_WRITE_REQUESTS_VSC = 22,
+	UCHE_POWER0 = 23,
+	UCHE_POWER1 = 24,
+	UCHE_POWER2 = 25,
+	UCHE_POWER3 = 26,
+	UCHE_POWER4 = 27,
+	UCHE_POWER5 = 28,
+	UCHE_POWER6 = 29,
+	UCHE_POWER7 = 30,
+};
+
+enum a4xx_vbif_perfcounter_select {
+	AXI_READ_REQUESTS_ID_0 = 0,
+	AXI_READ_REQUESTS_ID_1 = 1,
+	AXI_READ_REQUESTS_ID_2 = 2,
+	AXI_READ_REQUESTS_ID_3 = 3,
+	AXI_READ_REQUESTS_ID_4 = 4,
+	AXI_READ_REQUESTS_ID_5 = 5,
+	AXI_READ_REQUESTS_ID_6 = 6,
+	AXI_READ_REQUESTS_ID_7 = 7,
+	AXI_READ_REQUESTS_ID_8 = 8,
+	AXI_READ_REQUESTS_ID_9 = 9,
+	AXI_READ_REQUESTS_ID_10 = 10,
+	AXI_READ_REQUESTS_ID_11 = 11,
+	AXI_READ_REQUESTS_ID_12 = 12,
+	AXI_READ_REQUESTS_ID_13 = 13,
+	AXI_READ_REQUESTS_ID_14 = 14,
+	AXI_READ_REQUESTS_ID_15 = 15,
+	AXI0_READ_REQUESTS_TOTAL = 16,
+	AXI1_READ_REQUESTS_TOTAL = 17,
+	AXI2_READ_REQUESTS_TOTAL = 18,
+	AXI3_READ_REQUESTS_TOTAL = 19,
+	AXI_READ_REQUESTS_TOTAL = 20,
+	AXI_WRITE_REQUESTS_ID_0 = 21,
+	AXI_WRITE_REQUESTS_ID_1 = 22,
+	AXI_WRITE_REQUESTS_ID_2 = 23,
+	AXI_WRITE_REQUESTS_ID_3 = 24,
+	AXI_WRITE_REQUESTS_ID_4 = 25,
+	AXI_WRITE_REQUESTS_ID_5 = 26,
+	AXI_WRITE_REQUESTS_ID_6 = 27,
+	AXI_WRITE_REQUESTS_ID_7 = 28,
+	AXI_WRITE_REQUESTS_ID_8 = 29,
+	AXI_WRITE_REQUESTS_ID_9 = 30,
+	AXI_WRITE_REQUESTS_ID_10 = 31,
+	AXI_WRITE_REQUESTS_ID_11 = 32,
+	AXI_WRITE_REQUESTS_ID_12 = 33,
+	AXI_WRITE_REQUESTS_ID_13 = 34,
+	AXI_WRITE_REQUESTS_ID_14 = 35,
+	AXI_WRITE_REQUESTS_ID_15 = 36,
+	AXI0_WRITE_REQUESTS_TOTAL = 37,
+	AXI1_WRITE_REQUESTS_TOTAL = 38,
+	AXI2_WRITE_REQUESTS_TOTAL = 39,
+	AXI3_WRITE_REQUESTS_TOTAL = 40,
+	AXI_WRITE_REQUESTS_TOTAL = 41,
+	AXI_TOTAL_REQUESTS = 42,
+	AXI_READ_DATA_BEATS_ID_0 = 43,
+	AXI_READ_DATA_BEATS_ID_1 = 44,
+	AXI_READ_DATA_BEATS_ID_2 = 45,
+	AXI_READ_DATA_BEATS_ID_3 = 46,
+	AXI_READ_DATA_BEATS_ID_4 = 47,
+	AXI_READ_DATA_BEATS_ID_5 = 48,
+	AXI_READ_DATA_BEATS_ID_6 = 49,
+	AXI_READ_DATA_BEATS_ID_7 = 50,
+	AXI_READ_DATA_BEATS_ID_8 = 51,
+	AXI_READ_DATA_BEATS_ID_9 = 52,
+	AXI_READ_DATA_BEATS_ID_10 = 53,
+	AXI_READ_DATA_BEATS_ID_11 = 54,
+	AXI_READ_DATA_BEATS_ID_12 = 55,
+	AXI_READ_DATA_BEATS_ID_13 = 56,
+	AXI_READ_DATA_BEATS_ID_14 = 57,
+	AXI_READ_DATA_BEATS_ID_15 = 58,
+	AXI0_READ_DATA_BEATS_TOTAL = 59,
+	AXI1_READ_DATA_BEATS_TOTAL = 60,
+	AXI2_READ_DATA_BEATS_TOTAL = 61,
+	AXI3_READ_DATA_BEATS_TOTAL = 62,
+	AXI_READ_DATA_BEATS_TOTAL = 63,
+	AXI_WRITE_DATA_BEATS_ID_0 = 64,
+	AXI_WRITE_DATA_BEATS_ID_1 = 65,
+	AXI_WRITE_DATA_BEATS_ID_2 = 66,
+	AXI_WRITE_DATA_BEATS_ID_3 = 67,
+	AXI_WRITE_DATA_BEATS_ID_4 = 68,
+	AXI_WRITE_DATA_BEATS_ID_5 = 69,
+	AXI_WRITE_DATA_BEATS_ID_6 = 70,
+	AXI_WRITE_DATA_BEATS_ID_7 = 71,
+	AXI_WRITE_DATA_BEATS_ID_8 = 72,
+	AXI_WRITE_DATA_BEATS_ID_9 = 73,
+	AXI_WRITE_DATA_BEATS_ID_10 = 74,
+	AXI_WRITE_DATA_BEATS_ID_11 = 75,
+	AXI_WRITE_DATA_BEATS_ID_12 = 76,
+	AXI_WRITE_DATA_BEATS_ID_13 = 77,
+	AXI_WRITE_DATA_BEATS_ID_14 = 78,
+	AXI_WRITE_DATA_BEATS_ID_15 = 79,
+	AXI0_WRITE_DATA_BEATS_TOTAL = 80,
+	AXI1_WRITE_DATA_BEATS_TOTAL = 81,
+	AXI2_WRITE_DATA_BEATS_TOTAL = 82,
+	AXI3_WRITE_DATA_BEATS_TOTAL = 83,
+	AXI_WRITE_DATA_BEATS_TOTAL = 84,
+	AXI_DATA_BEATS_TOTAL = 85,
+	CYCLES_HELD_OFF_ID_0 = 86,
+	CYCLES_HELD_OFF_ID_1 = 87,
+	CYCLES_HELD_OFF_ID_2 = 88,
+	CYCLES_HELD_OFF_ID_3 = 89,
+	CYCLES_HELD_OFF_ID_4 = 90,
+	CYCLES_HELD_OFF_ID_5 = 91,
+	CYCLES_HELD_OFF_ID_6 = 92,
+	CYCLES_HELD_OFF_ID_7 = 93,
+	CYCLES_HELD_OFF_ID_8 = 94,
+	CYCLES_HELD_OFF_ID_9 = 95,
+	CYCLES_HELD_OFF_ID_10 = 96,
+	CYCLES_HELD_OFF_ID_11 = 97,
+	CYCLES_HELD_OFF_ID_12 = 98,
+	CYCLES_HELD_OFF_ID_13 = 99,
+	CYCLES_HELD_OFF_ID_14 = 100,
+	CYCLES_HELD_OFF_ID_15 = 101,
+	AXI_READ_REQUEST_HELD_OFF = 102,
+	AXI_WRITE_REQUEST_HELD_OFF = 103,
+	AXI_REQUEST_HELD_OFF = 104,
+	AXI_WRITE_DATA_HELD_OFF = 105,
+	OCMEM_AXI_READ_REQUEST_HELD_OFF = 106,
+	OCMEM_AXI_WRITE_REQUEST_HELD_OFF = 107,
+	OCMEM_AXI_REQUEST_HELD_OFF = 108,
+	OCMEM_AXI_WRITE_DATA_HELD_OFF = 109,
+	ELAPSED_CYCLES_DDR = 110,
+	ELAPSED_CYCLES_OCMEM = 111,
+};
+
+enum a4xx_vfd_perfcounter_select {
+	VFD_UCHE_BYTE_FETCHED = 0,
+	VFD_UCHE_TRANS = 1,
+	VFD_FETCH_INSTRUCTIONS = 3,
+	VFD_BUSY_CYCLES = 5,
+	VFD_STALL_CYCLES_UCHE = 6,
+	VFD_STALL_CYCLES_HLSQ = 7,
+	VFD_STALL_CYCLES_VPC_BYPASS = 8,
+	VFD_STALL_CYCLES_VPC_ALLOC = 9,
+	VFD_MODE_0_FIBERS = 13,
+	VFD_MODE_1_FIBERS = 14,
+	VFD_MODE_2_FIBERS = 15,
+	VFD_MODE_3_FIBERS = 16,
+	VFD_MODE_4_FIBERS = 17,
+	VFD_BFIFO_STALL = 18,
+	VFD_NUM_VERTICES_TOTAL = 19,
+	VFD_PACKER_FULL = 20,
+	VFD_UCHE_REQUEST_FIFO_FULL = 21,
+	VFD_STARVE_CYCLES_PC = 22,
+	VFD_STARVE_CYCLES_UCHE = 23,
+};
+
+enum a4xx_vpc_perfcounter_select {
+	VPC_SP_LM_COMPONENTS = 2,
+	VPC_SP0_LM_BYTES = 3,
+	VPC_SP1_LM_BYTES = 4,
+	VPC_SP2_LM_BYTES = 5,
+	VPC_SP3_LM_BYTES = 6,
+	VPC_WORKING_CYCLES = 7,
+	VPC_STALL_CYCLES_LM = 8,
+	VPC_STARVE_CYCLES_RAS = 9,
+	VPC_STREAMOUT_CYCLES = 10,
+	VPC_UCHE_TRANSACTIONS = 12,
+	VPC_STALL_CYCLES_UCHE = 13,
+	VPC_BUSY_CYCLES = 14,
+	VPC_STARVE_CYCLES_SP = 15,
+};
+
+enum a4xx_vsc_perfcounter_select {
+	VSC_BUSY_CYCLES = 0,
+	VSC_WORKING_CYCLES = 1,
+	VSC_STALL_CYCLES_UCHE = 2,
+	VSC_STARVE_CYCLES_RAS = 3,
+	VSC_EOT_NUM = 4,
+};
+
 enum a4xx_tex_filter {
 	A4XX_TEX_NEAREST = 0,
 	A4XX_TEX_LINEAR = 1,
@@ -326,6 +897,12 @@ static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val)
 
 #define REG_A4XX_RB_PERFCTR_RB_SEL_7				0x00000cce
 
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_0				0x00000ccf
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_1				0x00000cd0
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_2				0x00000cd1
+
 #define REG_A4XX_RB_PERFCTR_CCU_SEL_3				0x00000cd2
 
 #define REG_A4XX_RB_FRAME_BUFFER_DIMENSION			0x00000ce0
@@ -400,8 +977,13 @@ static inline uint32_t REG_A4XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020a4
 #define A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE			0x00000008
 #define A4XX_RB_MRT_CONTROL_BLEND				0x00000010
 #define A4XX_RB_MRT_CONTROL_BLEND2				0x00000020
-#define A4XX_RB_MRT_CONTROL_FASTCLEAR				0x00000400
-#define A4XX_RB_MRT_CONTROL_B11					0x00000800
+#define A4XX_RB_MRT_CONTROL_ROP_ENABLE				0x00000040
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__MASK			0x00000f00
+#define A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT			8
+static inline uint32_t A4XX_RB_MRT_CONTROL_ROP_CODE(enum a3xx_rop_code val)
+{
+	return ((val) << A4XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A4XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
 #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK		0x0f000000
 #define A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT		24
 static inline uint32_t A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
@@ -490,8 +1072,8 @@ static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_r
 	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
 }
 
-#define REG_A4XX_RB_BLEND_RED					0x000020f3
-#define A4XX_RB_BLEND_RED_UINT__MASK				0x00007fff
+#define REG_A4XX_RB_BLEND_RED					0x000020f0
+#define A4XX_RB_BLEND_RED_UINT__MASK				0x0000ffff
 #define A4XX_RB_BLEND_RED_UINT__SHIFT				0
 static inline uint32_t A4XX_RB_BLEND_RED_UINT(uint32_t val)
 {
@@ -504,8 +1086,16 @@ static inline uint32_t A4XX_RB_BLEND_RED_FLOAT(float val)
 	return ((util_float_to_half(val)) << A4XX_RB_BLEND_RED_FLOAT__SHIFT) & A4XX_RB_BLEND_RED_FLOAT__MASK;
 }
 
-#define REG_A4XX_RB_BLEND_GREEN					0x000020f4
-#define A4XX_RB_BLEND_GREEN_UINT__MASK				0x00007fff
+#define REG_A4XX_RB_BLEND_RED_F32				0x000020f1
+#define A4XX_RB_BLEND_RED_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_RED_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_RED_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_RED_F32__SHIFT) & A4XX_RB_BLEND_RED_F32__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_GREEN					0x000020f2
+#define A4XX_RB_BLEND_GREEN_UINT__MASK				0x0000ffff
 #define A4XX_RB_BLEND_GREEN_UINT__SHIFT				0
 static inline uint32_t A4XX_RB_BLEND_GREEN_UINT(uint32_t val)
 {
@@ -518,8 +1108,16 @@ static inline uint32_t A4XX_RB_BLEND_GREEN_FLOAT(float val)
 	return ((util_float_to_half(val)) << A4XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A4XX_RB_BLEND_GREEN_FLOAT__MASK;
 }
 
-#define REG_A4XX_RB_BLEND_BLUE					0x000020f5
-#define A4XX_RB_BLEND_BLUE_UINT__MASK				0x00007fff
+#define REG_A4XX_RB_BLEND_GREEN_F32				0x000020f3
+#define A4XX_RB_BLEND_GREEN_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_GREEN_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_GREEN_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_GREEN_F32__SHIFT) & A4XX_RB_BLEND_GREEN_F32__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_BLUE					0x000020f4
+#define A4XX_RB_BLEND_BLUE_UINT__MASK				0x0000ffff
 #define A4XX_RB_BLEND_BLUE_UINT__SHIFT				0
 static inline uint32_t A4XX_RB_BLEND_BLUE_UINT(uint32_t val)
 {
@@ -532,8 +1130,16 @@ static inline uint32_t A4XX_RB_BLEND_BLUE_FLOAT(float val)
 	return ((util_float_to_half(val)) << A4XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A4XX_RB_BLEND_BLUE_FLOAT__MASK;
 }
 
+#define REG_A4XX_RB_BLEND_BLUE_F32				0x000020f5
+#define A4XX_RB_BLEND_BLUE_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_BLUE_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_BLUE_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_BLUE_F32__SHIFT) & A4XX_RB_BLEND_BLUE_F32__MASK;
+}
+
 #define REG_A4XX_RB_BLEND_ALPHA					0x000020f6
-#define A4XX_RB_BLEND_ALPHA_UINT__MASK				0x00007fff
+#define A4XX_RB_BLEND_ALPHA_UINT__MASK				0x0000ffff
 #define A4XX_RB_BLEND_ALPHA_UINT__SHIFT				0
 static inline uint32_t A4XX_RB_BLEND_ALPHA_UINT(uint32_t val)
 {
@@ -546,6 +1152,14 @@ static inline uint32_t A4XX_RB_BLEND_ALPHA_FLOAT(float val)
 	return ((util_float_to_half(val)) << A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A4XX_RB_BLEND_ALPHA_FLOAT__MASK;
 }
 
+#define REG_A4XX_RB_BLEND_ALPHA_F32				0x000020f7
+#define A4XX_RB_BLEND_ALPHA_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_ALPHA_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_ALPHA_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_ALPHA_F32__SHIFT) & A4XX_RB_BLEND_ALPHA_F32__MASK;
+}
+
 #define REG_A4XX_RB_ALPHA_CONTROL				0x000020f8
 #define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK			0x000000ff
 #define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT			0
@@ -568,7 +1182,7 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val)
 {
 	return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK;
 }
-#define A4XX_RB_FS_OUTPUT_FAST_CLEAR				0x00000100
+#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND			0x00000100
 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK			0xffff0000
 #define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT			16
 static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val)
@@ -736,6 +1350,7 @@ static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
 }
 #define A4XX_RB_DEPTH_CONTROL_BF_ENABLE				0x00000080
 #define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE			0x00010000
+#define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS			0x00020000
 #define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE			0x80000000
 
 #define REG_A4XX_RB_DEPTH_CLEAR					0x00002102
@@ -996,8 +1611,386 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x
 
 #define REG_A4XX_RBBM_CFG_DEBBUS_SEL_D				0x0000004d
 
+#define REG_A4XX_RBBM_POWER_CNTL_IP				0x00000098
+#define A4XX_RBBM_POWER_CNTL_IP_SW_COLLAPSE			0x00000001
+#define A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON			0x00100000
+
 #define REG_A4XX_RBBM_PERFCTR_CP_0_LO				0x0000009c
 
+#define REG_A4XX_RBBM_PERFCTR_CP_0_HI				0x0000009d
+
+#define REG_A4XX_RBBM_PERFCTR_CP_1_LO				0x0000009e
+
+#define REG_A4XX_RBBM_PERFCTR_CP_1_HI				0x0000009f
+
+#define REG_A4XX_RBBM_PERFCTR_CP_2_LO				0x000000a0
+
+#define REG_A4XX_RBBM_PERFCTR_CP_2_HI				0x000000a1
+
+#define REG_A4XX_RBBM_PERFCTR_CP_3_LO				0x000000a2
+
+#define REG_A4XX_RBBM_PERFCTR_CP_3_HI				0x000000a3
+
+#define REG_A4XX_RBBM_PERFCTR_CP_4_LO				0x000000a4
+
+#define REG_A4XX_RBBM_PERFCTR_CP_4_HI				0x000000a5
+
+#define REG_A4XX_RBBM_PERFCTR_CP_5_LO				0x000000a6
+
+#define REG_A4XX_RBBM_PERFCTR_CP_5_HI				0x000000a7
+
+#define REG_A4XX_RBBM_PERFCTR_CP_6_LO				0x000000a8
+
+#define REG_A4XX_RBBM_PERFCTR_CP_6_HI				0x000000a9
+
+#define REG_A4XX_RBBM_PERFCTR_CP_7_LO				0x000000aa
+
+#define REG_A4XX_RBBM_PERFCTR_CP_7_HI				0x000000ab
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_0_LO				0x000000ac
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_0_HI				0x000000ad
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_1_LO				0x000000ae
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_1_HI				0x000000af
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_2_LO				0x000000b0
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_2_HI				0x000000b1
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_3_LO				0x000000b2
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_3_HI				0x000000b3
+
+#define REG_A4XX_RBBM_PERFCTR_PC_0_LO				0x000000b4
+
+#define REG_A4XX_RBBM_PERFCTR_PC_0_HI				0x000000b5
+
+#define REG_A4XX_RBBM_PERFCTR_PC_1_LO				0x000000b6
+
+#define REG_A4XX_RBBM_PERFCTR_PC_1_HI				0x000000b7
+
+#define REG_A4XX_RBBM_PERFCTR_PC_2_LO				0x000000b8
+
+#define REG_A4XX_RBBM_PERFCTR_PC_2_HI				0x000000b9
+
+#define REG_A4XX_RBBM_PERFCTR_PC_3_LO				0x000000ba
+
+#define REG_A4XX_RBBM_PERFCTR_PC_3_HI				0x000000bb
+
+#define REG_A4XX_RBBM_PERFCTR_PC_4_LO				0x000000bc
+
+#define REG_A4XX_RBBM_PERFCTR_PC_4_HI				0x000000bd
+
+#define REG_A4XX_RBBM_PERFCTR_PC_5_LO				0x000000be
+
+#define REG_A4XX_RBBM_PERFCTR_PC_5_HI				0x000000bf
+
+#define REG_A4XX_RBBM_PERFCTR_PC_6_LO				0x000000c0
+
+#define REG_A4XX_RBBM_PERFCTR_PC_6_HI				0x000000c1
+
+#define REG_A4XX_RBBM_PERFCTR_PC_7_LO				0x000000c2
+
+#define REG_A4XX_RBBM_PERFCTR_PC_7_HI				0x000000c3
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_0_LO				0x000000c4
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_0_HI				0x000000c5
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_1_LO				0x000000c6
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_1_HI				0x000000c7
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_2_LO				0x000000c8
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_2_HI				0x000000c9
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_3_LO				0x000000ca
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_3_HI				0x000000cb
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_4_LO				0x000000cc
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_4_HI				0x000000cd
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_5_LO				0x000000ce
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_5_HI				0x000000cf
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_6_LO				0x000000d0
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_6_HI				0x000000d1
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_7_LO				0x000000d2
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_7_HI				0x000000d3
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_LO				0x000000d4
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_HI				0x000000d5
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_LO				0x000000d6
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_HI				0x000000d7
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_LO				0x000000d8
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_HI				0x000000d9
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_LO				0x000000da
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_HI				0x000000db
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_LO				0x000000dc
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_HI				0x000000dd
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_LO				0x000000de
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_HI				0x000000df
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_LO				0x000000e0
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_HI				0x000000e1
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_LO				0x000000e2
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_HI				0x000000e3
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_0_LO				0x000000e4
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_0_HI				0x000000e5
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_1_LO				0x000000e6
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_1_HI				0x000000e7
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_2_LO				0x000000e8
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_2_HI				0x000000e9
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_3_LO				0x000000ea
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_3_HI				0x000000eb
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_0_LO				0x000000ec
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_0_HI				0x000000ed
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_1_LO				0x000000ee
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_1_HI				0x000000ef
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_2_LO				0x000000f0
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_2_HI				0x000000f1
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_3_LO				0x000000f2
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_3_HI				0x000000f3
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_0_LO				0x000000f4
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_0_HI				0x000000f5
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_1_LO				0x000000f6
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_1_HI				0x000000f7
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_2_LO				0x000000f8
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_2_HI				0x000000f9
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_3_LO				0x000000fa
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_3_HI				0x000000fb
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_0_LO				0x000000fc
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_0_HI				0x000000fd
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_1_LO				0x000000fe
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_1_HI				0x000000ff
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_2_LO				0x00000100
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_2_HI				0x00000101
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_3_LO				0x00000102
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_3_HI				0x00000103
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_0_LO				0x00000104
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_0_HI				0x00000105
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_1_LO				0x00000106
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_1_HI				0x00000107
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_2_LO				0x00000108
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_2_HI				0x00000109
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_3_LO				0x0000010a
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_3_HI				0x0000010b
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_4_LO				0x0000010c
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_4_HI				0x0000010d
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_5_LO				0x0000010e
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_5_HI				0x0000010f
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_6_LO				0x00000110
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_6_HI				0x00000111
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_7_LO				0x00000112
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_7_HI				0x00000113
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_LO				0x00000114
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_HI				0x00000115
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_LO				0x00000114
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_HI				0x00000115
+
+#define REG_A4XX_RBBM_PERFCTR_TP_1_LO				0x00000116
+
+#define REG_A4XX_RBBM_PERFCTR_TP_1_HI				0x00000117
+
+#define REG_A4XX_RBBM_PERFCTR_TP_2_LO				0x00000118
+
+#define REG_A4XX_RBBM_PERFCTR_TP_2_HI				0x00000119
+
+#define REG_A4XX_RBBM_PERFCTR_TP_3_LO				0x0000011a
+
+#define REG_A4XX_RBBM_PERFCTR_TP_3_HI				0x0000011b
+
+#define REG_A4XX_RBBM_PERFCTR_TP_4_LO				0x0000011c
+
+#define REG_A4XX_RBBM_PERFCTR_TP_4_HI				0x0000011d
+
+#define REG_A4XX_RBBM_PERFCTR_TP_5_LO				0x0000011e
+
+#define REG_A4XX_RBBM_PERFCTR_TP_5_HI				0x0000011f
+
+#define REG_A4XX_RBBM_PERFCTR_TP_6_LO				0x00000120
+
+#define REG_A4XX_RBBM_PERFCTR_TP_6_HI				0x00000121
+
+#define REG_A4XX_RBBM_PERFCTR_TP_7_LO				0x00000122
+
+#define REG_A4XX_RBBM_PERFCTR_TP_7_HI				0x00000123
+
+#define REG_A4XX_RBBM_PERFCTR_SP_0_LO				0x00000124
+
+#define REG_A4XX_RBBM_PERFCTR_SP_0_HI				0x00000125
+
+#define REG_A4XX_RBBM_PERFCTR_SP_1_LO				0x00000126
+
+#define REG_A4XX_RBBM_PERFCTR_SP_1_HI				0x00000127
+
+#define REG_A4XX_RBBM_PERFCTR_SP_2_LO				0x00000128
+
+#define REG_A4XX_RBBM_PERFCTR_SP_2_HI				0x00000129
+
+#define REG_A4XX_RBBM_PERFCTR_SP_3_LO				0x0000012a
+
+#define REG_A4XX_RBBM_PERFCTR_SP_3_HI				0x0000012b
+
+#define REG_A4XX_RBBM_PERFCTR_SP_4_LO				0x0000012c
+
+#define REG_A4XX_RBBM_PERFCTR_SP_4_HI				0x0000012d
+
+#define REG_A4XX_RBBM_PERFCTR_SP_5_LO				0x0000012e
+
+#define REG_A4XX_RBBM_PERFCTR_SP_5_HI				0x0000012f
+
+#define REG_A4XX_RBBM_PERFCTR_SP_6_LO				0x00000130
+
+#define REG_A4XX_RBBM_PERFCTR_SP_6_HI				0x00000131
+
+#define REG_A4XX_RBBM_PERFCTR_SP_7_LO				0x00000132
+
+#define REG_A4XX_RBBM_PERFCTR_SP_7_HI				0x00000133
+
+#define REG_A4XX_RBBM_PERFCTR_SP_8_LO				0x00000134
+
+#define REG_A4XX_RBBM_PERFCTR_SP_8_HI				0x00000135
+
+#define REG_A4XX_RBBM_PERFCTR_SP_9_LO				0x00000136
+
+#define REG_A4XX_RBBM_PERFCTR_SP_9_HI				0x00000137
+
+#define REG_A4XX_RBBM_PERFCTR_SP_10_LO				0x00000138
+
+#define REG_A4XX_RBBM_PERFCTR_SP_10_HI				0x00000139
+
+#define REG_A4XX_RBBM_PERFCTR_SP_11_LO				0x0000013a
+
+#define REG_A4XX_RBBM_PERFCTR_SP_11_HI				0x0000013b
+
+#define REG_A4XX_RBBM_PERFCTR_RB_0_LO				0x0000013c
+
+#define REG_A4XX_RBBM_PERFCTR_RB_0_HI				0x0000013d
+
+#define REG_A4XX_RBBM_PERFCTR_RB_1_LO				0x0000013e
+
+#define REG_A4XX_RBBM_PERFCTR_RB_1_HI				0x0000013f
+
+#define REG_A4XX_RBBM_PERFCTR_RB_2_LO				0x00000140
+
+#define REG_A4XX_RBBM_PERFCTR_RB_2_HI				0x00000141
+
+#define REG_A4XX_RBBM_PERFCTR_RB_3_LO				0x00000142
+
+#define REG_A4XX_RBBM_PERFCTR_RB_3_HI				0x00000143
+
+#define REG_A4XX_RBBM_PERFCTR_RB_4_LO				0x00000144
+
+#define REG_A4XX_RBBM_PERFCTR_RB_4_HI				0x00000145
+
+#define REG_A4XX_RBBM_PERFCTR_RB_5_LO				0x00000146
+
+#define REG_A4XX_RBBM_PERFCTR_RB_5_HI				0x00000147
+
+#define REG_A4XX_RBBM_PERFCTR_RB_6_LO				0x00000148
+
+#define REG_A4XX_RBBM_PERFCTR_RB_6_HI				0x00000149
+
+#define REG_A4XX_RBBM_PERFCTR_RB_7_LO				0x0000014a
+
+#define REG_A4XX_RBBM_PERFCTR_RB_7_HI				0x0000014b
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_0_LO				0x0000014c
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_0_HI				0x0000014d
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_1_LO				0x0000014e
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_1_HI				0x0000014f
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_0_LO				0x00000166
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_0_HI				0x00000167
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO				0x00000168
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_HI				0x00000169
+
+#define REG_A4XX_RBBM_ALWAYSON_COUNTER_LO			0x0000016e
+
+#define REG_A4XX_RBBM_ALWAYSON_COUNTER_HI			0x0000016f
+
 static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; }
 
 static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; }
@@ -1046,6 +2039,10 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(uint32_t i0) { r
 
 static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) { return 0x0000008e + 0x1*i0; }
 
+#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0			0x00000099
+
+#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1			0x0000009a
+
 #define REG_A4XX_RBBM_PERFCTR_PWR_1_LO				0x00000168
 
 #define REG_A4XX_RBBM_PERFCTR_CTL				0x00000170
@@ -1060,6 +2057,14 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0)
 
 #define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI			0x00000175
 
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_0			0x00000176
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_1			0x00000177
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_2			0x00000178
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_3			0x00000179
+
 #define REG_A4XX_RBBM_GPU_BUSY_MASKED				0x0000017a
 
 #define REG_A4XX_RBBM_INT_0_STATUS				0x0000017d
@@ -1099,6 +2104,11 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0)
 
 #define REG_A4XX_RBBM_INTERFACE_RRDY_STATUS5			0x0000019f
 
+#define REG_A4XX_RBBM_POWER_STATUS				0x000001b0
+#define A4XX_RBBM_POWER_STATUS_SP_TP_PWR_ON			0x00100000
+
+#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2			0x000001b8
+
 #define REG_A4XX_CP_SCRATCH_UMASK				0x00000228
 
 #define REG_A4XX_CP_SCRATCH_ADDR				0x00000229
@@ -1191,6 +2201,20 @@ static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240
 
 #define REG_A4XX_CP_PERFCTR_CP_SEL_0				0x00000500
 
+#define REG_A4XX_CP_PERFCTR_CP_SEL_1				0x00000501
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_2				0x00000502
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_3				0x00000503
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_4				0x00000504
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_5				0x00000505
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_6				0x00000506
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_7				0x00000507
+
 #define REG_A4XX_CP_PERFCOMBINER_SELECT				0x0000050b
 
 static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; }
@@ -1201,6 +2225,28 @@ static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578
 
 #define REG_A4XX_SP_MODE_CONTROL				0x00000ec3
 
+#define REG_A4XX_SP_PERFCTR_SP_SEL_0				0x00000ec4
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_1				0x00000ec5
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_2				0x00000ec6
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_3				0x00000ec7
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_4				0x00000ec8
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_5				0x00000ec9
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_6				0x00000eca
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_7				0x00000ecb
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_8				0x00000ecc
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_9				0x00000ecd
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_10				0x00000ece
+
 #define REG_A4XX_SP_PERFCTR_SP_SEL_11				0x00000ecf
 
 #define REG_A4XX_SP_SP_CTRL_REG					0x000022c0
@@ -1699,6 +2745,12 @@ static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
 
 #define REG_A4XX_VPC_DEBUG_ECO_CONTROL				0x00000e64
 
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_0				0x00000e65
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_1				0x00000e66
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_2				0x00000e67
+
 #define REG_A4XX_VPC_PERFCTR_VPC_SEL_3				0x00000e68
 
 #define REG_A4XX_VPC_ATTR					0x00002140
@@ -1811,6 +2863,20 @@ static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0
 
 #define REG_A4XX_VFD_DEBUG_CONTROL				0x00000e40
 
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_0				0x00000e43
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_1				0x00000e44
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_2				0x00000e45
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_3				0x00000e46
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_4				0x00000e47
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_5				0x00000e48
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_6				0x00000e49
+
 #define REG_A4XX_VFD_PERFCTR_VFD_SEL_7				0x00000e4a
 
 #define REG_A4XX_VGT_CL_INITIATOR				0x000021d0
@@ -1967,6 +3033,20 @@ static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
 
 #define REG_A4XX_TPL1_TP_MODE_CONTROL				0x00000f03
 
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_0				0x00000f04
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_1				0x00000f05
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_2				0x00000f06
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_3				0x00000f07
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_4				0x00000f08
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_5				0x00000f09
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_6				0x00000f0a
+
 #define REG_A4XX_TPL1_PERFCTR_TP_SEL_7				0x00000f0b
 
 #define REG_A4XX_TPL1_TP_TEX_OFFSET				0x00002380
@@ -2021,9 +3101,23 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
 
 #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0				0x00000c88
 
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_1				0x00000c89
+
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_2				0x00000c8a
+
 #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3				0x00000c8b
 
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_0				0x00000c8c
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_1				0x00000c8d
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_2				0x00000c8e
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_3				0x00000c8f
+
 #define REG_A4XX_GRAS_CL_CLIP_CNTL				0x00002000
+#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE			0x00008000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z			0x00400000
 
 #define REG_A4XX_GRAS_CLEAR_CNTL				0x00002003
 #define A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR			0x00000001
@@ -2114,6 +3208,7 @@ static inline uint32_t A4XX_GRAS_SU_POINT_SIZE(float val)
 
 #define REG_A4XX_GRAS_ALPHA_CONTROL				0x00002073
 #define A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE		0x00000004
+#define A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS		0x00000008
 
 #define REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE			0x00002074
 #define A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK			0xffffffff
@@ -2285,6 +3380,20 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
 
 #define REG_A4XX_UCHE_CACHE_WAYS_VFD				0x00000e8c
 
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_0			0x00000e8e
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_1			0x00000e8f
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_2			0x00000e90
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_3			0x00000e91
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_4			0x00000e92
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_5			0x00000e93
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_6			0x00000e94
+
 #define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7			0x00000e95
 
 #define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD				0x00000e00
@@ -2295,6 +3404,22 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
 
 #define REG_A4XX_HLSQ_PERF_PIPE_MASK				0x00000e0e
 
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_0			0x00000e06
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_1			0x00000e07
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_2			0x00000e08
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_3			0x00000e09
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_4			0x00000e0a
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_5			0x00000e0b
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_6			0x00000e0c
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_7			0x00000e0d
+
 #define REG_A4XX_HLSQ_CONTROL_0_REG				0x000023c0
 #define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK		0x00000010
 #define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT		4
@@ -2549,6 +3674,18 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val)
 
 #define REG_A4XX_PC_PERFCTR_PC_SEL_0				0x00000d10
 
+#define REG_A4XX_PC_PERFCTR_PC_SEL_1				0x00000d11
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_2				0x00000d12
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_3				0x00000d13
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_4				0x00000d14
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_5				0x00000d15
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_6				0x00000d16
+
 #define REG_A4XX_PC_PERFCTR_PC_SEL_7				0x00000d17
 
 #define REG_A4XX_PC_BIN_BASE					0x000021c0
@@ -2564,7 +3701,20 @@ static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val)
 #define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST		0x02000000
 #define A4XX_PC_PRIM_VTX_CNTL_PSIZE				0x04000000
 
-#define REG_A4XX_UNKNOWN_21C5					0x000021c5
+#define REG_A4XX_PC_PRIM_VTX_CNTL2				0x000021c5
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK	0x00000007
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT	0
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK	0x00000038
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT	3
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__SHIFT) & A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A4XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE			0x00000040
 
 #define REG_A4XX_PC_RESTART_INDEX				0x000021c6
 
@@ -2646,20 +3796,6 @@ static inline uint32_t A4XX_PC_HS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
 
 #define REG_A4XX_UNKNOWN_20EF					0x000020ef
 
-#define REG_A4XX_UNKNOWN_20F0					0x000020f0
-
-#define REG_A4XX_UNKNOWN_20F1					0x000020f1
-
-#define REG_A4XX_UNKNOWN_20F2					0x000020f2
-
-#define REG_A4XX_UNKNOWN_20F7					0x000020f7
-#define A4XX_UNKNOWN_20F7__MASK					0xffffffff
-#define A4XX_UNKNOWN_20F7__SHIFT				0
-static inline uint32_t A4XX_UNKNOWN_20F7(float val)
-{
-	return ((fui(val)) << A4XX_UNKNOWN_20F7__SHIFT) & A4XX_UNKNOWN_20F7__MASK;
-}
-
 #define REG_A4XX_UNKNOWN_2152					0x00002152
 
 #define REG_A4XX_UNKNOWN_2153					0x00002153
@@ -2720,6 +3856,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val)
 {
 	return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK;
 }
+#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK				0xfff80000
+#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT				19
+static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val)
+{
+	return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK;
+}
 
 #define REG_A4XX_TEX_SAMP_1					0x00000001
 #define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK			0x0000000e
@@ -2728,6 +3870,7 @@ static inline uint32_t A4XX_TEX_SAMP_1_COMPARE_FUNC(enum adreno_compare_func val
 {
 	return ((val) << A4XX_TEX_SAMP_1_COMPARE_FUNC__SHIFT) & A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK;
 }
+#define A4XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF			0x00000010
 #define A4XX_TEX_SAMP_1_UNNORM_COORDS				0x00000020
 #define A4XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR			0x00000040
 #define A4XX_TEX_SAMP_1_MAX_LOD__MASK				0x000fff00
@@ -2796,7 +3939,7 @@ static inline uint32_t A4XX_TEX_CONST_1_HEIGHT(uint32_t val)
 {
 	return ((val) << A4XX_TEX_CONST_1_HEIGHT__SHIFT) & A4XX_TEX_CONST_1_HEIGHT__MASK;
 }
-#define A4XX_TEX_CONST_1_WIDTH__MASK				0x1fff8000
+#define A4XX_TEX_CONST_1_WIDTH__MASK				0x3fff8000
 #define A4XX_TEX_CONST_1_WIDTH__SHIFT				15
 static inline uint32_t A4XX_TEX_CONST_1_WIDTH(uint32_t val)
 {
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index a53f1be05f75..d0d3c7baa8fe 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -102,11 +102,17 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
-	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00020000);
-	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
+	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
+	/* Early A430's have a timing issue with SP/TP power collapse;
+	   disabling HW clock gating prevents it. */
+	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
+		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
+	else
+		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
 }
 
+
 static void a4xx_me_init(struct msm_gpu *gpu)
 {
 	struct msm_ringbuffer *ring = gpu->rb;
@@ -141,7 +147,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
 	uint32_t *ptr, len;
 	int i, ret;
 
-	if (adreno_is_a4xx(adreno_gpu)) {
+	if (adreno_is_a420(adreno_gpu)) {
 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
@@ -150,6 +156,13 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
+	} else if (adreno_is_a430(adreno_gpu)) {
+		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
+		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
+		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
+		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
+		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
+		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 	} else {
 		BUG();
 	}
@@ -161,6 +174,10 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
 
+	if (adreno_is_a430(adreno_gpu)) {
+		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
+	}
+
 	 /* Enable the RBBM error reporting bits */
 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
 
@@ -183,6 +200,14 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
 	/* Turn on performance counters: */
 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
 
+	/* use the first CP counter for timestamp queries.. userspace may set
+	 * this as well but it selects the same counter/countable:
+	 */
+	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
+
+	if (adreno_is_a430(adreno_gpu))
+		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
+
 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
@@ -190,6 +215,15 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
 
+	/* On A430 enable SP regfile sleep for power savings */
+	/* TODO downstream does this for !420, so maybe applies for 405 too? */
+	if (!adreno_is_a420(adreno_gpu)) {
+		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
+			0x00000441);
+		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
+			0x00000441);
+	}
+
 	a4xx_enable_hwcg(gpu);
 
 	/*
@@ -204,10 +238,6 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
 	}
 
-	ret = adreno_hw_init(gpu);
-	if (ret)
-		return ret;
-
 	/* setup access protection: */
 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
 
@@ -263,6 +293,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
 
 	a4xx_me_init(gpu);
+
 	return 0;
 }
 
@@ -317,6 +348,13 @@ static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
 	DBG("%s: Int status %08x", gpu->name, status);
 
+	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
+		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
+		printk("CP | Protected mode error| %s | addr=%x\n",
+			reg & (1 << 24) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2);
+	}
+
 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
 
 	msm_gpu_retire(gpu);
@@ -512,12 +550,63 @@ static void a4xx_dump(struct msm_gpu *gpu)
 	adreno_dump(gpu);
 }
 
+static int a4xx_pm_resume(struct msm_gpu *gpu) {
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	int ret;
+
+	ret = msm_gpu_pm_resume(gpu);
+	if (ret)
+		return ret;
+
+	if (adreno_is_a430(adreno_gpu)) {
+		unsigned int reg;
+		/* Set the default register values; set SW_COLLAPSE to 0 */
+		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
+		do {
+			udelay(5);
+			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
+		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
+	}
+	return 0;
+}
+
+static int a4xx_pm_suspend(struct msm_gpu *gpu) {
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	int ret;
+
+	ret = msm_gpu_pm_suspend(gpu);
+	if (ret)
+		return ret;
+
+	if (adreno_is_a430(adreno_gpu)) {
+		/* Set the default register values; set SW_COLLAPSE to 1 */
+		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
+	}
+	return 0;
+}
+
+static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
+{
+	uint32_t hi, lo, tmp;
+
+	tmp = gpu_read(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_HI);
+	do {
+		hi = tmp;
+		lo = gpu_read(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO);
+		tmp = gpu_read(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_HI);
+	} while (tmp != hi);
+
+	*value = (((uint64_t)hi) << 32) | lo;
+
+	return 0;
+}
+
 static const struct adreno_gpu_funcs funcs = {
 	.base = {
 		.get_param = adreno_get_param,
 		.hw_init = a4xx_hw_init,
-		.pm_suspend = msm_gpu_pm_suspend,
-		.pm_resume = msm_gpu_pm_resume,
+		.pm_suspend = a4xx_pm_suspend,
+		.pm_resume = a4xx_pm_resume,
 		.recover = a4xx_recover,
 		.last_fence = adreno_last_fence,
 		.submit = adreno_submit,
@@ -529,6 +618,7 @@ static const struct adreno_gpu_funcs funcs = {
 		.show = a4xx_show,
 #endif
 	},
+	.get_timestamp = a4xx_get_timestamp,
 };
 
 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_common.xml.h b/drivers/gpu/drm/msm/adreno/adreno_common.xml.h
index c304468cf2bd..e81481d1b7df 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_common.xml.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_common.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          ( 109916 bytes, from 2016-02-20 18:44:48)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
@@ -119,6 +120,23 @@ enum adreno_rb_copy_control_mode {
 	RB_COPY_DEPTH_STENCIL = 5,
 };
 
+enum a3xx_rop_code {
+	ROP_CLEAR = 0,
+	ROP_NOR = 1,
+	ROP_AND_INVERTED = 2,
+	ROP_COPY_INVERTED = 3,
+	ROP_AND_REVERSE = 4,
+	ROP_INVERT = 5,
+	ROP_NAND = 7,
+	ROP_AND = 8,
+	ROP_EQUIV = 9,
+	ROP_NOOP = 10,
+	ROP_OR_INVERTED = 11,
+	ROP_OR_REVERSE = 13,
+	ROP_OR = 14,
+	ROP_SET = 15,
+};
+
 enum a3xx_render_mode {
 	RB_RENDERING_PASS = 0,
 	RB_TILING_PASS = 1,
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 950d27d26b30..5127b75dbf40 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -69,6 +69,14 @@ static const struct adreno_info gpulist[] = {
 		.pfpfw = "a420_pfp.fw",
 		.gmem  = (SZ_1M + SZ_512K),
 		.init  = a4xx_gpu_init,
+	}, {
+		.rev   = ADRENO_REV(4, 3, 0, ANY_ID),
+		.revn  = 430,
+		.name  = "A430",
+		.pm4fw = "a420_pm4.fw",
+		.pfpfw = "a420_pfp.fw",
+		.gmem  = (SZ_1M + SZ_512K),
+		.init  = a4xx_gpu_init,
 	},
 };
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index a3b54cc76495..4951172ede06 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -41,6 +41,13 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
 				(adreno_gpu->rev.major << 16) |
 				(adreno_gpu->rev.core << 24);
 		return 0;
+	case MSM_PARAM_MAX_FREQ:
+		*value = adreno_gpu->base.fast_rate;
+		return 0;
+	case MSM_PARAM_TIMESTAMP:
+		if (adreno_gpu->funcs->get_timestamp)
+			return adreno_gpu->funcs->get_timestamp(gpu, value);
+		return -EINVAL;
 	default:
 		DBG("%s: invalid param: %u", gpu->name, param);
 		return -EINVAL;
@@ -68,18 +75,15 @@ int adreno_hw_init(struct msm_gpu *gpu)
 	adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
 			/* size is log2(quad-words): */
 			AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) |
-			AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)));
+			AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) |
+			(adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0));
 
 	/* Setup ringbuffer address: */
 	adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_BASE, gpu->rb_iova);
-	adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
-			rbmemptr(adreno_gpu, rptr));
-
-	/* Setup scratch/timestamp: */
-	adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_ADDR,
-			rbmemptr(adreno_gpu, fence));
 
-	adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_UMSK, 0x1);
+	if (!adreno_is_a430(adreno_gpu))
+		adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
+						rbmemptr(adreno_gpu, rptr));
 
 	return 0;
 }
@@ -89,6 +93,16 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring)
 	return ring->cur - ring->start;
 }
 
+/* Use this helper to read rptr, since a430 doesn't update rptr in memory */
+static uint32_t get_rptr(struct adreno_gpu *adreno_gpu)
+{
+	if (adreno_is_a430(adreno_gpu))
+		return adreno_gpu->memptrs->rptr = adreno_gpu_read(
+			adreno_gpu, REG_ADRENO_CP_RB_RPTR);
+	else
+		return adreno_gpu->memptrs->rptr;
+}
+
 uint32_t adreno_last_fence(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -137,7 +151,8 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 			if (priv->lastctx == ctx)
 				break;
 		case MSM_SUBMIT_CMD_BUF:
-			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
+			OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ?
+				CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
 			OUT_RING(ring, submit->cmd[i].iova);
 			OUT_RING(ring, submit->cmd[i].size);
 			ibs++;
@@ -216,9 +231,12 @@ void adreno_idle(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	uint32_t wptr = get_wptr(gpu->rb);
+	int ret;
 
 	/* wait for CP to drain ringbuffer: */
-	if (spin_until(adreno_gpu->memptrs->rptr == wptr))
+	ret = spin_until(get_rptr(adreno_gpu) == wptr);
+
+	if (ret)
 		DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
 
 	/* TODO maybe we need to reset GPU here to recover from hang? */
@@ -237,7 +255,7 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
 
 	seq_printf(m, "fence:    %d/%d\n", adreno_gpu->memptrs->fence,
 			gpu->submitted_fence);
-	seq_printf(m, "rptr:     %d\n", adreno_gpu->memptrs->rptr);
+	seq_printf(m, "rptr:     %d\n", get_rptr(adreno_gpu));
 	seq_printf(m, "wptr:     %d\n", adreno_gpu->memptrs->wptr);
 	seq_printf(m, "rb wptr:  %d\n", get_wptr(gpu->rb));
 
@@ -278,7 +296,7 @@ void adreno_dump_info(struct msm_gpu *gpu)
 
 	printk("fence:    %d/%d\n", adreno_gpu->memptrs->fence,
 			gpu->submitted_fence);
-	printk("rptr:     %d\n", adreno_gpu->memptrs->rptr);
+	printk("rptr:     %d\n", get_rptr(adreno_gpu));
 	printk("wptr:     %d\n", adreno_gpu->memptrs->wptr);
 	printk("rb wptr:  %d\n", get_wptr(gpu->rb));
 
@@ -313,7 +331,7 @@ static uint32_t ring_freewords(struct msm_gpu *gpu)
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	uint32_t size = gpu->rb->size / 4;
 	uint32_t wptr = get_wptr(gpu->rb);
-	uint32_t rptr = adreno_gpu->memptrs->rptr;
+	uint32_t rptr = get_rptr(adreno_gpu);
 	return (rptr + (size - 1) - wptr) % size;
 }
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 0a312e9d3afd..1d07511f4d22 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -114,6 +114,7 @@ struct adreno_rev {
 
 struct adreno_gpu_funcs {
 	struct msm_gpu_funcs base;
+	int (*get_timestamp)(struct msm_gpu *gpu, uint64_t *value);
 };
 
 struct adreno_info {
@@ -228,6 +229,11 @@ static inline int adreno_is_a420(struct adreno_gpu *gpu)
 	return gpu->revn == 420;
 }
 
+static inline int adreno_is_a430(struct adreno_gpu *gpu)
+{
+       return gpu->revn == 430;
+}
+
 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
 int adreno_hw_init(struct msm_gpu *gpu);
 uint32_t adreno_last_fence(struct msm_gpu *gpu);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h
index a22fef569499..d7477ff867c9 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          ( 109916 bytes, from 2016-02-20 18:44:48)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
@@ -172,6 +173,11 @@ enum adreno_pm4_type3_packets {
 	CP_UNKNOWN_1A = 26,
 	CP_UNKNOWN_4E = 78,
 	CP_WIDE_REG_WRITE = 116,
+	CP_SCRATCH_TO_REG = 77,
+	CP_REG_TO_SCRATCH = 74,
+	CP_WAIT_MEM_WRITES = 18,
+	CP_COND_REG_EXEC = 71,
+	CP_MEM_TO_REG = 66,
 	IN_IB_PREFETCH_END = 23,
 	IN_SUBBLK_PREFETCH = 31,
 	IN_INSTR_PREFETCH = 32,
@@ -199,7 +205,11 @@ enum adreno_state_type {
 
 enum adreno_state_src {
 	SS_DIRECT = 0,
+	SS_INVALID_ALL_IC = 2,
+	SS_INVALID_PART_IC = 3,
 	SS_INDIRECT = 4,
+	SS_INDIRECT_TCM = 5,
+	SS_INDIRECT_STM = 6,
 };
 
 enum a4xx_index_size {
@@ -227,7 +237,7 @@ static inline uint32_t CP_LOAD_STATE_0_STATE_BLOCK(enum adreno_state_block val)
 {
 	return ((val) << CP_LOAD_STATE_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE_0_STATE_BLOCK__MASK;
 }
-#define CP_LOAD_STATE_0_NUM_UNIT__MASK				0x7fc00000
+#define CP_LOAD_STATE_0_NUM_UNIT__MASK				0xffc00000
 #define CP_LOAD_STATE_0_NUM_UNIT__SHIFT				22
 static inline uint32_t CP_LOAD_STATE_0_NUM_UNIT(uint32_t val)
 {
@@ -499,5 +509,29 @@ static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val)
 	return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK;
 }
 
+#define REG_CP_REG_TO_MEM_0					0x00000000
+#define CP_REG_TO_MEM_0_REG__MASK				0x0000ffff
+#define CP_REG_TO_MEM_0_REG__SHIFT				0
+static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val)
+{
+	return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK;
+}
+#define CP_REG_TO_MEM_0_CNT__MASK				0x3ff80000
+#define CP_REG_TO_MEM_0_CNT__SHIFT				19
+static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val)
+{
+	return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK;
+}
+#define CP_REG_TO_MEM_0_64B					0x40000000
+#define CP_REG_TO_MEM_0_ACCUMULATE				0x80000000
+
+#define REG_CP_REG_TO_MEM_1					0x00000001
+#define CP_REG_TO_MEM_1_DEST__MASK				0xffffffff
+#define CP_REG_TO_MEM_1_DEST__SHIFT				0
+static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val)
+{
+	return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK;
+}
+
 
 #endif /* ADRENO_PM4_XML */
diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h b/drivers/gpu/drm/msm/dsi/dsi.xml.h
index b2b5f3dd1b4c..4958594d5266 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.xml.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h
@@ -9,7 +9,7 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  37194 bytes, from 2015-09-18 12:07:28)
@@ -17,11 +17,12 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  29154 bytes, from 2015-08-10 21:25:43)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
 - /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
index 2a827d8093a2..e58e9b91b34d 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
@@ -57,10 +57,9 @@ static const char * const dsi_8916_bus_clk_names[] = {
 static const struct msm_dsi_config msm8916_dsi_cfg = {
 	.io_offset = DSI_6G_REG_SHIFT,
 	.reg_cfg = {
-		.num = 4,
+		.num = 3,
 		.regs = {
 			{"gdsc", -1, -1, -1, -1},
-			{"vdd", 2850000, 2850000, 100000, 100},
 			{"vdda", 1200000, 1200000, 100000, 100},
 			{"vddio", 1800000, 1800000, 100000, 100},
 		},
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 48f9967b4a1b..4282ec6bbaaf 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -163,6 +163,10 @@ struct msm_dsi_host {
 	enum mipi_dsi_pixel_format format;
 	unsigned long mode_flags;
 
+	/* lane data parsed via DT */
+	int dlane_swap;
+	int num_data_lanes;
+
 	u32 dma_cmd_ctrl_restore;
 
 	bool registered;
@@ -845,19 +849,10 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable,
 	data = DSI_CTRL_CLK_EN;
 
 	DBG("lane number=%d", msm_host->lanes);
-	if (msm_host->lanes == 2) {
-		data |= DSI_CTRL_LANE1 | DSI_CTRL_LANE2;
-		/* swap lanes for 2-lane panel for better performance */
-		dsi_write(msm_host, REG_DSI_LANE_SWAP_CTRL,
-			DSI_LANE_SWAP_CTRL_DLN_SWAP_SEL(LANE_SWAP_1230));
-	} else {
-		/* Take 4 lanes as default */
-		data |= DSI_CTRL_LANE0 | DSI_CTRL_LANE1 | DSI_CTRL_LANE2 |
-			DSI_CTRL_LANE3;
-		/* Do not swap lanes for 4-lane panel */
-		dsi_write(msm_host, REG_DSI_LANE_SWAP_CTRL,
-			DSI_LANE_SWAP_CTRL_DLN_SWAP_SEL(LANE_SWAP_0123));
-	}
+	data |= ((DSI_CTRL_LANE0 << msm_host->lanes) - DSI_CTRL_LANE0);
+
+	dsi_write(msm_host, REG_DSI_LANE_SWAP_CTRL,
+		  DSI_LANE_SWAP_CTRL_DLN_SWAP_SEL(msm_host->dlane_swap));
 
 	if (!(flags & MIPI_DSI_CLOCK_NON_CONTINUOUS))
 		dsi_write(msm_host, REG_DSI_LANE_CTRL,
@@ -1479,13 +1474,14 @@ static int dsi_host_attach(struct mipi_dsi_host *host,
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
 	int ret;
 
+	if (dsi->lanes > msm_host->num_data_lanes)
+		return -EINVAL;
+
 	msm_host->channel = dsi->channel;
 	msm_host->lanes = dsi->lanes;
 	msm_host->format = dsi->format;
 	msm_host->mode_flags = dsi->mode_flags;
 
-	WARN_ON(dsi->dev.of_node != msm_host->device_node);
-
 	/* Some gpios defined in panel DT need to be controlled by host */
 	ret = dsi_host_init_panel_gpios(msm_host, &dsi->dev);
 	if (ret)
@@ -1534,6 +1530,75 @@ static struct mipi_dsi_host_ops dsi_host_ops = {
 	.transfer = dsi_host_transfer,
 };
 
+/*
+ * List of supported physical to logical lane mappings.
+ * For example, the 2nd entry represents the following mapping:
+ *
+ * "3012": Logic 3->Phys 0; Logic 0->Phys 1; Logic 1->Phys 2; Logic 2->Phys 3;
+ */
+static const int supported_data_lane_swaps[][4] = {
+	{ 0, 1, 2, 3 },
+	{ 3, 0, 1, 2 },
+	{ 2, 3, 0, 1 },
+	{ 1, 2, 3, 0 },
+	{ 0, 3, 2, 1 },
+	{ 1, 0, 3, 2 },
+	{ 2, 1, 0, 3 },
+	{ 3, 2, 1, 0 },
+};
+
+static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host,
+				    struct device_node *ep)
+{
+	struct device *dev = &msm_host->pdev->dev;
+	struct property *prop;
+	u32 lane_map[4];
+	int ret, i, len, num_lanes;
+
+	prop = of_find_property(ep, "qcom,data-lane-map", &len);
+	if (!prop) {
+		dev_dbg(dev, "failed to find data lane mapping\n");
+		return -EINVAL;
+	}
+
+	num_lanes = len / sizeof(u32);
+
+	if (num_lanes < 1 || num_lanes > 4) {
+		dev_err(dev, "bad number of data lanes\n");
+		return -EINVAL;
+	}
+
+	msm_host->num_data_lanes = num_lanes;
+
+	ret = of_property_read_u32_array(ep, "qcom,data-lane-map", lane_map,
+					 num_lanes);
+	if (ret) {
+		dev_err(dev, "failed to read lane data\n");
+		return ret;
+	}
+
+	/*
+	 * compare DT specified physical-logical lane mappings with the ones
+	 * supported by hardware
+	 */
+	for (i = 0; i < ARRAY_SIZE(supported_data_lane_swaps); i++) {
+		const int *swap = supported_data_lane_swaps[i];
+		int j;
+
+		for (j = 0; j < num_lanes; j++) {
+			if (swap[j] != lane_map[j])
+				break;
+		}
+
+		if (j == num_lanes) {
+			msm_host->dlane_swap = i;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
 static int dsi_host_parse_dt(struct msm_dsi_host *msm_host)
 {
 	struct device *dev = &msm_host->pdev->dev;
@@ -1560,17 +1625,21 @@ static int dsi_host_parse_dt(struct msm_dsi_host *msm_host)
 		return 0;
 	}
 
+	ret = dsi_host_parse_lane_data(msm_host, endpoint);
+	if (ret) {
+		dev_err(dev, "%s: invalid lane configuration %d\n",
+			__func__, ret);
+		goto err;
+	}
+
 	/* Get panel node from the output port's endpoint data */
 	device_node = of_graph_get_remote_port_parent(endpoint);
 	if (!device_node) {
 		dev_err(dev, "%s: no valid device\n", __func__);
-		of_node_put(endpoint);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err;
 	}
 
-	of_node_put(endpoint);
-	of_node_put(device_node);
-
 	msm_host->device_node = device_node;
 
 	if (of_property_read_bool(np, "syscon-sfpb")) {
@@ -1579,11 +1648,16 @@ static int dsi_host_parse_dt(struct msm_dsi_host *msm_host)
 		if (IS_ERR(msm_host->sfpb)) {
 			dev_err(dev, "%s: failed to get sfpb regmap\n",
 				__func__);
-			return PTR_ERR(msm_host->sfpb);
+			ret = PTR_ERR(msm_host->sfpb);
 		}
 	}
 
-	return 0;
+	of_node_put(device_node);
+
+err:
+	of_node_put(endpoint);
+
+	return ret;
 }
 
 int msm_dsi_host_init(struct msm_dsi *msm_dsi)
diff --git a/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h b/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h
index 80ec65e47468..2d999494cdea 100644
--- a/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h
+++ b/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h
@@ -9,7 +9,7 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  37194 bytes, from 2015-09-18 12:07:28)
@@ -17,11 +17,12 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  29154 bytes, from 2015-08-10 21:25:43)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
 - /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
index 80b6038334a6..2cf1664723e8 100644
--- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
@@ -97,8 +97,8 @@ static inline struct msm_dsi_pll *msm_dsi_pll_28nm_init(
 struct msm_dsi_pll *msm_dsi_pll_28nm_8960_init(struct platform_device *pdev,
 					       int id);
 #else
-struct msm_dsi_pll *msm_dsi_pll_28nm_8960_init(struct platform_device *pdev,
-					       int id)
+static inline struct msm_dsi_pll *msm_dsi_pll_28nm_8960_init(
+	struct platform_device *pdev, int id)
 {
 	return ERR_PTR(-ENODEV);
 }
diff --git a/drivers/gpu/drm/msm/dsi/sfpb.xml.h b/drivers/gpu/drm/msm/dsi/sfpb.xml.h
index 7d7662e69e11..506434fac993 100644
--- a/drivers/gpu/drm/msm/dsi/sfpb.xml.h
+++ b/drivers/gpu/drm/msm/dsi/sfpb.xml.h
@@ -9,7 +9,7 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  37194 bytes, from 2015-09-18 12:07:28)
@@ -17,11 +17,12 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  29154 bytes, from 2015-08-10 21:25:43)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
 - /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/drivers/gpu/drm/msm/edp/edp.xml.h b/drivers/gpu/drm/msm/edp/edp.xml.h
index 90bf5ed46746..f1072c18c81e 100644
--- a/drivers/gpu/drm/msm/edp/edp.xml.h
+++ b/drivers/gpu/drm/msm/edp/edp.xml.h
@@ -9,7 +9,7 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  37194 bytes, from 2015-09-18 12:07:28)
@@ -17,11 +17,12 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  29154 bytes, from 2015-08-10 21:25:43)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
 - /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 9a0989c0b4de..51b9ea552f97 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -21,7 +21,7 @@
 
 #include "hdmi.h"
 
-void hdmi_set_mode(struct hdmi *hdmi, bool power_on)
+void msm_hdmi_set_mode(struct hdmi *hdmi, bool power_on)
 {
 	uint32_t ctrl = 0;
 	unsigned long flags;
@@ -46,29 +46,27 @@ void hdmi_set_mode(struct hdmi *hdmi, bool power_on)
 			power_on ? "Enable" : "Disable", ctrl);
 }
 
-static irqreturn_t hdmi_irq(int irq, void *dev_id)
+static irqreturn_t msm_hdmi_irq(int irq, void *dev_id)
 {
 	struct hdmi *hdmi = dev_id;
 
 	/* Process HPD: */
-	hdmi_connector_irq(hdmi->connector);
+	msm_hdmi_connector_irq(hdmi->connector);
 
 	/* Process DDC: */
-	hdmi_i2c_irq(hdmi->i2c);
+	msm_hdmi_i2c_irq(hdmi->i2c);
 
 	/* Process HDCP: */
 	if (hdmi->hdcp_ctrl)
-		hdmi_hdcp_irq(hdmi->hdcp_ctrl);
+		msm_hdmi_hdcp_irq(hdmi->hdcp_ctrl);
 
 	/* TODO audio.. */
 
 	return IRQ_HANDLED;
 }
 
-static void hdmi_destroy(struct hdmi *hdmi)
+static void msm_hdmi_destroy(struct hdmi *hdmi)
 {
-	struct hdmi_phy *phy = hdmi->phy;
-
 	/*
 	 * at this point, hpd has been disabled,
 	 * after flush workq, it's safe to deinit hdcp
@@ -77,21 +75,53 @@ static void hdmi_destroy(struct hdmi *hdmi)
 		flush_workqueue(hdmi->workq);
 		destroy_workqueue(hdmi->workq);
 	}
-	hdmi_hdcp_destroy(hdmi);
-	if (phy)
-		phy->funcs->destroy(phy);
+	msm_hdmi_hdcp_destroy(hdmi);
+
+	if (hdmi->phy_dev) {
+		put_device(hdmi->phy_dev);
+		hdmi->phy = NULL;
+		hdmi->phy_dev = NULL;
+	}
 
 	if (hdmi->i2c)
-		hdmi_i2c_destroy(hdmi->i2c);
+		msm_hdmi_i2c_destroy(hdmi->i2c);
 
 	platform_set_drvdata(hdmi->pdev, NULL);
 }
 
+static int msm_hdmi_get_phy(struct hdmi *hdmi)
+{
+	struct platform_device *pdev = hdmi->pdev;
+	struct platform_device *phy_pdev;
+	struct device_node *phy_node;
+
+	phy_node = of_parse_phandle(pdev->dev.of_node, "phys", 0);
+	if (!phy_node) {
+		dev_err(&pdev->dev, "cannot find phy device\n");
+		return -ENXIO;
+	}
+
+	phy_pdev = of_find_device_by_node(phy_node);
+	if (phy_pdev)
+		hdmi->phy = platform_get_drvdata(phy_pdev);
+
+	of_node_put(phy_node);
+
+	if (!phy_pdev || !hdmi->phy) {
+		dev_err(&pdev->dev, "phy driver is not ready\n");
+		return -EPROBE_DEFER;
+	}
+
+	hdmi->phy_dev = get_device(&phy_pdev->dev);
+
+	return 0;
+}
+
 /* construct hdmi at bind/probe time, grab all the resources.  If
  * we are to EPROBE_DEFER we want to do it here, rather than later
  * at modeset_init() time
  */
-static struct hdmi *hdmi_init(struct platform_device *pdev)
+static struct hdmi *msm_hdmi_init(struct platform_device *pdev)
 {
 	struct hdmi_platform_config *config = pdev->dev.platform_data;
 	struct hdmi *hdmi = NULL;
@@ -108,18 +138,6 @@ static struct hdmi *hdmi_init(struct platform_device *pdev)
 	hdmi->config = config;
 	spin_lock_init(&hdmi->reg_lock);
 
-	/* not sure about which phy maps to which msm.. probably I miss some */
-	if (config->phy_init) {
-		hdmi->phy = config->phy_init(hdmi);
-
-		if (IS_ERR(hdmi->phy)) {
-			ret = PTR_ERR(hdmi->phy);
-			dev_err(&pdev->dev, "failed to load phy: %d\n", ret);
-			hdmi->phy = NULL;
-			goto fail;
-		}
-	}
-
 	hdmi->mmio = msm_ioremap(pdev, config->mmio_name, "HDMI");
 	if (IS_ERR(hdmi->mmio)) {
 		ret = PTR_ERR(hdmi->mmio);
@@ -222,7 +240,7 @@ static struct hdmi *hdmi_init(struct platform_device *pdev)
 
 	hdmi->workq = alloc_ordered_workqueue("msm_hdmi", 0);
 
-	hdmi->i2c = hdmi_i2c_init(hdmi);
+	hdmi->i2c = msm_hdmi_i2c_init(hdmi);
 	if (IS_ERR(hdmi->i2c)) {
 		ret = PTR_ERR(hdmi->i2c);
 		dev_err(&pdev->dev, "failed to get i2c: %d\n", ret);
@@ -230,7 +248,13 @@ static struct hdmi *hdmi_init(struct platform_device *pdev)
 		goto fail;
 	}
 
-	hdmi->hdcp_ctrl = hdmi_hdcp_init(hdmi);
+	ret = msm_hdmi_get_phy(hdmi);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to get phy\n");
+		goto fail;
+	}
+
+	hdmi->hdcp_ctrl = msm_hdmi_hdcp_init(hdmi);
 	if (IS_ERR(hdmi->hdcp_ctrl)) {
 		dev_warn(&pdev->dev, "failed to init hdcp: disabled\n");
 		hdmi->hdcp_ctrl = NULL;
@@ -240,7 +264,7 @@ static struct hdmi *hdmi_init(struct platform_device *pdev)
 
 fail:
 	if (hdmi)
-		hdmi_destroy(hdmi);
+		msm_hdmi_destroy(hdmi);
 
 	return ERR_PTR(ret);
 }
@@ -250,10 +274,10 @@ fail:
  * driver (not hdmi sub-device's probe/bind!)
  *
  * Any resource (regulator/clk/etc) which could be missing at boot
- * should be handled in hdmi_init() so that failure happens from
+ * should be handled in msm_hdmi_init() so that failure happens from
  * hdmi sub-device's probe.
  */
-int hdmi_modeset_init(struct hdmi *hdmi,
+int msm_hdmi_modeset_init(struct hdmi *hdmi,
 		struct drm_device *dev, struct drm_encoder *encoder)
 {
 	struct msm_drm_private *priv = dev->dev_private;
@@ -265,7 +289,7 @@ int hdmi_modeset_init(struct hdmi *hdmi,
 
 	hdmi_audio_infoframe_init(&hdmi->audio.infoframe);
 
-	hdmi->bridge = hdmi_bridge_init(hdmi);
+	hdmi->bridge = msm_hdmi_bridge_init(hdmi);
 	if (IS_ERR(hdmi->bridge)) {
 		ret = PTR_ERR(hdmi->bridge);
 		dev_err(dev->dev, "failed to create HDMI bridge: %d\n", ret);
@@ -273,7 +297,7 @@ int hdmi_modeset_init(struct hdmi *hdmi,
 		goto fail;
 	}
 
-	hdmi->connector = hdmi_connector_init(hdmi);
+	hdmi->connector = msm_hdmi_connector_init(hdmi);
 	if (IS_ERR(hdmi->connector)) {
 		ret = PTR_ERR(hdmi->connector);
 		dev_err(dev->dev, "failed to create HDMI connector: %d\n", ret);
@@ -289,7 +313,7 @@ int hdmi_modeset_init(struct hdmi *hdmi,
 	}
 
 	ret = devm_request_irq(&pdev->dev, hdmi->irq,
-			hdmi_irq, IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+			msm_hdmi_irq, IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
 			"hdmi_isr", hdmi);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to request IRQ%u: %d\n",
@@ -309,7 +333,7 @@ int hdmi_modeset_init(struct hdmi *hdmi,
 fail:
 	/* bridge is normally destroyed by drm: */
 	if (hdmi->bridge) {
-		hdmi_bridge_destroy(hdmi->bridge);
+		msm_hdmi_bridge_destroy(hdmi->bridge);
 		hdmi->bridge = NULL;
 	}
 	if (hdmi->connector) {
@@ -331,15 +355,12 @@ fail:
 static const char *pwr_reg_names_none[] = {};
 static const char *hpd_reg_names_none[] = {};
 
-static struct hdmi_platform_config hdmi_tx_8660_config = {
-		.phy_init = hdmi_phy_8x60_init,
-};
+static struct hdmi_platform_config hdmi_tx_8660_config;
 
 static const char *hpd_reg_names_8960[] = {"core-vdda", "hdmi-mux"};
 static const char *hpd_clk_names_8960[] = {"core_clk", "master_iface_clk", "slave_iface_clk"};
 
 static struct hdmi_platform_config hdmi_tx_8960_config = {
-		.phy_init = hdmi_phy_8960_init,
 		HDMI_CFG(hpd_reg, 8960),
 		HDMI_CFG(hpd_clk, 8960),
 };
@@ -351,7 +372,6 @@ static const char *hpd_clk_names_8x74[] = {"iface_clk", "core_clk", "mdp_core_cl
 static unsigned long hpd_clk_freq_8x74[] = {0, 19200000, 0};
 
 static struct hdmi_platform_config hdmi_tx_8974_config = {
-		.phy_init = hdmi_phy_8x74_init,
 		HDMI_CFG(pwr_reg, 8x74),
 		HDMI_CFG(hpd_reg, 8x74),
 		HDMI_CFG(pwr_clk, 8x74),
@@ -362,7 +382,6 @@ static struct hdmi_platform_config hdmi_tx_8974_config = {
 static const char *hpd_reg_names_8084[] = {"hpd-gdsc", "hpd-5v", "hpd-5v-en"};
 
 static struct hdmi_platform_config hdmi_tx_8084_config = {
-		.phy_init = hdmi_phy_8x74_init,
 		HDMI_CFG(pwr_reg, 8x74),
 		HDMI_CFG(hpd_reg, 8084),
 		HDMI_CFG(pwr_clk, 8x74),
@@ -371,7 +390,6 @@ static struct hdmi_platform_config hdmi_tx_8084_config = {
 };
 
 static struct hdmi_platform_config hdmi_tx_8994_config = {
-		.phy_init = NULL, /* nothing to do for this HDMI PHY 20nm */
 		HDMI_CFG(pwr_reg, 8x74),
 		HDMI_CFG(hpd_reg, none),
 		HDMI_CFG(pwr_clk, 8x74),
@@ -380,7 +398,6 @@ static struct hdmi_platform_config hdmi_tx_8994_config = {
 };
 
 static struct hdmi_platform_config hdmi_tx_8996_config = {
-		.phy_init = NULL,
 		HDMI_CFG(pwr_reg, none),
 		HDMI_CFG(hpd_reg, none),
 		HDMI_CFG(pwr_clk, 8x74),
@@ -388,7 +405,21 @@ static struct hdmi_platform_config hdmi_tx_8996_config = {
 		.hpd_freq      = hpd_clk_freq_8x74,
 };
 
-static int get_gpio(struct device *dev, struct device_node *of_node, const char *name)
+static const struct {
+	const char *name;
+	const bool output;
+	const int value;
+	const char *label;
+} msm_hdmi_gpio_pdata[] = {
+	{ "qcom,hdmi-tx-ddc-clk", true, 1, "HDMI_DDC_CLK" },
+	{ "qcom,hdmi-tx-ddc-data", true, 1, "HDMI_DDC_DATA" },
+	{ "qcom,hdmi-tx-hpd", false, 1, "HDMI_HPD" },
+	{ "qcom,hdmi-tx-mux-en", true, 1, "HDMI_MUX_EN" },
+	{ "qcom,hdmi-tx-mux-sel", true, 0, "HDMI_MUX_SEL" },
+	{ "qcom,hdmi-tx-mux-lpm", true, 1, "HDMI_MUX_LPM" },
+};
+
+static int msm_hdmi_get_gpio(struct device_node *of_node, const char *name)
 {
 	int gpio = of_get_named_gpio(of_node, name, 0);
 	if (gpio < 0) {
@@ -403,13 +434,14 @@ static int get_gpio(struct device *dev, struct device_node *of_node, const char
 	return gpio;
 }
 
-static int hdmi_bind(struct device *dev, struct device *master, void *data)
+static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct msm_drm_private *priv = drm->dev_private;
 	static struct hdmi_platform_config *hdmi_cfg;
 	struct hdmi *hdmi;
 	struct device_node *of_node = dev->of_node;
+	int i;
 
 	hdmi_cfg = (struct hdmi_platform_config *)
 			of_device_get_match_data(dev);
@@ -420,16 +452,18 @@ static int hdmi_bind(struct device *dev, struct device *master, void *data)
 
 	hdmi_cfg->mmio_name     = "core_physical";
 	hdmi_cfg->qfprom_mmio_name = "qfprom_physical";
-	hdmi_cfg->ddc_clk_gpio  = get_gpio(dev, of_node, "qcom,hdmi-tx-ddc-clk");
-	hdmi_cfg->ddc_data_gpio = get_gpio(dev, of_node, "qcom,hdmi-tx-ddc-data");
-	hdmi_cfg->hpd_gpio      = get_gpio(dev, of_node, "qcom,hdmi-tx-hpd");
-	hdmi_cfg->mux_en_gpio   = get_gpio(dev, of_node, "qcom,hdmi-tx-mux-en");
-	hdmi_cfg->mux_sel_gpio  = get_gpio(dev, of_node, "qcom,hdmi-tx-mux-sel");
-	hdmi_cfg->mux_lpm_gpio  = get_gpio(dev, of_node, "qcom,hdmi-tx-mux-lpm");
+
+	for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) {
+		hdmi_cfg->gpios[i].num = msm_hdmi_get_gpio(of_node,
+						msm_hdmi_gpio_pdata[i].name);
+		hdmi_cfg->gpios[i].output = msm_hdmi_gpio_pdata[i].output;
+		hdmi_cfg->gpios[i].value = msm_hdmi_gpio_pdata[i].value;
+		hdmi_cfg->gpios[i].label = msm_hdmi_gpio_pdata[i].label;
+	}
 
 	dev->platform_data = hdmi_cfg;
 
-	hdmi = hdmi_init(to_platform_device(dev));
+	hdmi = msm_hdmi_init(to_platform_device(dev));
 	if (IS_ERR(hdmi))
 		return PTR_ERR(hdmi);
 	priv->hdmi = hdmi;
@@ -437,34 +471,34 @@ static int hdmi_bind(struct device *dev, struct device *master, void *data)
 	return 0;
 }
 
-static void hdmi_unbind(struct device *dev, struct device *master,
+static void msm_hdmi_unbind(struct device *dev, struct device *master,
 		void *data)
 {
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct msm_drm_private *priv = drm->dev_private;
 	if (priv->hdmi) {
-		hdmi_destroy(priv->hdmi);
+		msm_hdmi_destroy(priv->hdmi);
 		priv->hdmi = NULL;
 	}
 }
 
-static const struct component_ops hdmi_ops = {
-		.bind   = hdmi_bind,
-		.unbind = hdmi_unbind,
+static const struct component_ops msm_hdmi_ops = {
+		.bind   = msm_hdmi_bind,
+		.unbind = msm_hdmi_unbind,
 };
 
-static int hdmi_dev_probe(struct platform_device *pdev)
+static int msm_hdmi_dev_probe(struct platform_device *pdev)
 {
-	return component_add(&pdev->dev, &hdmi_ops);
+	return component_add(&pdev->dev, &msm_hdmi_ops);
 }
 
-static int hdmi_dev_remove(struct platform_device *pdev)
+static int msm_hdmi_dev_remove(struct platform_device *pdev)
 {
-	component_del(&pdev->dev, &hdmi_ops);
+	component_del(&pdev->dev, &msm_hdmi_ops);
 	return 0;
 }
 
-static const struct of_device_id dt_match[] = {
+static const struct of_device_id msm_hdmi_dt_match[] = {
 	{ .compatible = "qcom,hdmi-tx-8996", .data = &hdmi_tx_8996_config },
 	{ .compatible = "qcom,hdmi-tx-8994", .data = &hdmi_tx_8994_config },
 	{ .compatible = "qcom,hdmi-tx-8084", .data = &hdmi_tx_8084_config },
@@ -474,21 +508,23 @@ static const struct of_device_id dt_match[] = {
 	{}
 };
 
-static struct platform_driver hdmi_driver = {
-	.probe = hdmi_dev_probe,
-	.remove = hdmi_dev_remove,
+static struct platform_driver msm_hdmi_driver = {
+	.probe = msm_hdmi_dev_probe,
+	.remove = msm_hdmi_dev_remove,
 	.driver = {
 		.name = "hdmi_msm",
-		.of_match_table = dt_match,
+		.of_match_table = msm_hdmi_dt_match,
 	},
 };
 
-void __init hdmi_register(void)
+void __init msm_hdmi_register(void)
 {
-	platform_driver_register(&hdmi_driver);
+	msm_hdmi_phy_driver_register();
+	platform_driver_register(&msm_hdmi_driver);
 }
 
-void __exit hdmi_unregister(void)
+void __exit msm_hdmi_unregister(void)
 {
-	platform_driver_unregister(&hdmi_driver);
+	platform_driver_unregister(&msm_hdmi_driver);
+	msm_hdmi_phy_driver_unregister();
 }
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
index d0e663192d01..b04a64664673 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.h
@@ -27,10 +27,18 @@
 #include "msm_drv.h"
 #include "hdmi.xml.h"
 
+#define HDMI_MAX_NUM_GPIO	6
 
 struct hdmi_phy;
 struct hdmi_platform_config;
 
+struct hdmi_gpio_data {
+	int num;
+	bool output;
+	int value;
+	const char *label;
+};
+
 struct hdmi_audio {
 	bool enabled;
 	struct hdmi_audio_infoframe infoframe;
@@ -62,6 +70,8 @@ struct hdmi {
 	struct clk **pwr_clks;
 
 	struct hdmi_phy *phy;
+	struct device *phy_dev;
+
 	struct i2c_adapter *i2c;
 	struct drm_connector *connector;
 	struct drm_bridge *bridge;
@@ -88,7 +98,6 @@ struct hdmi {
 
 /* platform config data (ie. from DT, or pdata) */
 struct hdmi_platform_config {
-	struct hdmi_phy *(*phy_init)(struct hdmi *hdmi);
 	const char *mmio_name;
 	const char *qfprom_mmio_name;
 
@@ -110,11 +119,10 @@ struct hdmi_platform_config {
 	int pwr_clk_cnt;
 
 	/* gpio's: */
-	int ddc_clk_gpio, ddc_data_gpio, hpd_gpio, mux_en_gpio, mux_sel_gpio;
-	int mux_lpm_gpio;
+	struct hdmi_gpio_data gpios[HDMI_MAX_NUM_GPIO];
 };
 
-void hdmi_set_mode(struct hdmi *hdmi, bool power_on);
+void msm_hdmi_set_mode(struct hdmi *hdmi, bool power_on);
 
 static inline void hdmi_write(struct hdmi *hdmi, u32 reg, u32 data)
 {
@@ -132,65 +140,113 @@ static inline u32 hdmi_qfprom_read(struct hdmi *hdmi, u32 reg)
 }
 
 /*
- * The phy appears to be different, for example between 8960 and 8x60,
- * so split the phy related functions out and load the correct one at
- * runtime:
+ * hdmi phy:
  */
 
-struct hdmi_phy_funcs {
-	void (*destroy)(struct hdmi_phy *phy);
+enum hdmi_phy_type {
+	MSM_HDMI_PHY_8x60,
+	MSM_HDMI_PHY_8960,
+	MSM_HDMI_PHY_8x74,
+	MSM_HDMI_PHY_8996,
+	MSM_HDMI_PHY_MAX,
+};
+
+struct hdmi_phy_cfg {
+	enum hdmi_phy_type type;
 	void (*powerup)(struct hdmi_phy *phy, unsigned long int pixclock);
 	void (*powerdown)(struct hdmi_phy *phy);
+	const char * const *reg_names;
+	int num_regs;
+	const char * const *clk_names;
+	int num_clks;
 };
 
+extern const struct hdmi_phy_cfg msm_hdmi_phy_8x60_cfg;
+extern const struct hdmi_phy_cfg msm_hdmi_phy_8960_cfg;
+extern const struct hdmi_phy_cfg msm_hdmi_phy_8x74_cfg;
+extern const struct hdmi_phy_cfg msm_hdmi_phy_8996_cfg;
+
 struct hdmi_phy {
+	struct platform_device *pdev;
+	void __iomem *mmio;
+	struct hdmi_phy_cfg *cfg;
 	const struct hdmi_phy_funcs *funcs;
+	struct regulator **regs;
+	struct clk **clks;
 };
 
-struct hdmi_phy *hdmi_phy_8960_init(struct hdmi *hdmi);
-struct hdmi_phy *hdmi_phy_8x60_init(struct hdmi *hdmi);
-struct hdmi_phy *hdmi_phy_8x74_init(struct hdmi *hdmi);
+static inline void hdmi_phy_write(struct hdmi_phy *phy, u32 reg, u32 data)
+{
+	msm_writel(data, phy->mmio + reg);
+}
+
+static inline u32 hdmi_phy_read(struct hdmi_phy *phy, u32 reg)
+{
+	return msm_readl(phy->mmio + reg);
+}
+
+int msm_hdmi_phy_resource_enable(struct hdmi_phy *phy);
+void msm_hdmi_phy_resource_disable(struct hdmi_phy *phy);
+void msm_hdmi_phy_powerup(struct hdmi_phy *phy, unsigned long int pixclock);
+void msm_hdmi_phy_powerdown(struct hdmi_phy *phy);
+void __init msm_hdmi_phy_driver_register(void);
+void __exit msm_hdmi_phy_driver_unregister(void);
+
+#ifdef CONFIG_COMMON_CLK
+int msm_hdmi_pll_8960_init(struct platform_device *pdev);
+int msm_hdmi_pll_8996_init(struct platform_device *pdev);
+#else
+static inline int msm_hdmi_pll_8960_init(struct platform_device *pdev);
+{
+	return -ENODEV;
+}
+
+static inline int msm_hdmi_pll_8996_init(struct platform_device *pdev)
+{
+	return -ENODEV;
+}
+#endif
 
 /*
  * audio:
  */
 
-int hdmi_audio_update(struct hdmi *hdmi);
-int hdmi_audio_info_setup(struct hdmi *hdmi, bool enabled,
+int msm_hdmi_audio_update(struct hdmi *hdmi);
+int msm_hdmi_audio_info_setup(struct hdmi *hdmi, bool enabled,
 	uint32_t num_of_channels, uint32_t channel_allocation,
 	uint32_t level_shift, bool down_mix);
-void hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate);
+void msm_hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate);
 
 
 /*
  * hdmi bridge:
  */
 
-struct drm_bridge *hdmi_bridge_init(struct hdmi *hdmi);
-void hdmi_bridge_destroy(struct drm_bridge *bridge);
+struct drm_bridge *msm_hdmi_bridge_init(struct hdmi *hdmi);
+void msm_hdmi_bridge_destroy(struct drm_bridge *bridge);
 
 /*
  * hdmi connector:
  */
 
-void hdmi_connector_irq(struct drm_connector *connector);
-struct drm_connector *hdmi_connector_init(struct hdmi *hdmi);
+void msm_hdmi_connector_irq(struct drm_connector *connector);
+struct drm_connector *msm_hdmi_connector_init(struct hdmi *hdmi);
 
 /*
  * i2c adapter for ddc:
  */
 
-void hdmi_i2c_irq(struct i2c_adapter *i2c);
-void hdmi_i2c_destroy(struct i2c_adapter *i2c);
-struct i2c_adapter *hdmi_i2c_init(struct hdmi *hdmi);
+void msm_hdmi_i2c_irq(struct i2c_adapter *i2c);
+void msm_hdmi_i2c_destroy(struct i2c_adapter *i2c);
+struct i2c_adapter *msm_hdmi_i2c_init(struct hdmi *hdmi);
 
 /*
  * hdcp
  */
-struct hdmi_hdcp_ctrl *hdmi_hdcp_init(struct hdmi *hdmi);
-void hdmi_hdcp_destroy(struct hdmi *hdmi);
-void hdmi_hdcp_on(struct hdmi_hdcp_ctrl *hdcp_ctrl);
-void hdmi_hdcp_off(struct hdmi_hdcp_ctrl *hdcp_ctrl);
-void hdmi_hdcp_irq(struct hdmi_hdcp_ctrl *hdcp_ctrl);
+struct hdmi_hdcp_ctrl *msm_hdmi_hdcp_init(struct hdmi *hdmi);
+void msm_hdmi_hdcp_destroy(struct hdmi *hdmi);
+void msm_hdmi_hdcp_on(struct hdmi_hdcp_ctrl *hdcp_ctrl);
+void msm_hdmi_hdcp_off(struct hdmi_hdcp_ctrl *hdcp_ctrl);
+void msm_hdmi_hdcp_irq(struct hdmi_hdcp_ctrl *hdcp_ctrl);
 
 #endif /* __HDMI_CONNECTOR_H__ */
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.xml.h b/drivers/gpu/drm/msm/hdmi/hdmi.xml.h
index 10c45700aefe..34c7df6549c1 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.xml.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.xml.h
@@ -9,7 +9,7 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  37194 bytes, from 2015-09-18 12:07:28)
@@ -17,11 +17,12 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  29154 bytes, from 2015-08-10 21:25:43)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
 - /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
 
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
@@ -559,7 +560,7 @@ static inline uint32_t HDMI_VSYNC_TOTAL_F2_V_TOTAL(uint32_t val)
 
 #define REG_HDMI_CEC_WR_CHECK_CONFIG				0x00000370
 
-#define REG_HDMI_8x60_PHY_REG0					0x00000300
+#define REG_HDMI_8x60_PHY_REG0					0x00000000
 #define HDMI_8x60_PHY_REG0_DESER_DEL_CTRL__MASK			0x0000001c
 #define HDMI_8x60_PHY_REG0_DESER_DEL_CTRL__SHIFT		2
 static inline uint32_t HDMI_8x60_PHY_REG0_DESER_DEL_CTRL(uint32_t val)
@@ -567,7 +568,7 @@ static inline uint32_t HDMI_8x60_PHY_REG0_DESER_DEL_CTRL(uint32_t val)
 	return ((val) << HDMI_8x60_PHY_REG0_DESER_DEL_CTRL__SHIFT) & HDMI_8x60_PHY_REG0_DESER_DEL_CTRL__MASK;
 }
 
-#define REG_HDMI_8x60_PHY_REG1					0x00000304
+#define REG_HDMI_8x60_PHY_REG1					0x00000004
 #define HDMI_8x60_PHY_REG1_DTEST_MUX_SEL__MASK			0x000000f0
 #define HDMI_8x60_PHY_REG1_DTEST_MUX_SEL__SHIFT			4
 static inline uint32_t HDMI_8x60_PHY_REG1_DTEST_MUX_SEL(uint32_t val)
@@ -581,7 +582,7 @@ static inline uint32_t HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL(uint32_t val)
 	return ((val) << HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL__SHIFT) & HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL__MASK;
 }
 
-#define REG_HDMI_8x60_PHY_REG2					0x00000308
+#define REG_HDMI_8x60_PHY_REG2					0x00000008
 #define HDMI_8x60_PHY_REG2_PD_DESER				0x00000001
 #define HDMI_8x60_PHY_REG2_PD_DRIVE_1				0x00000002
 #define HDMI_8x60_PHY_REG2_PD_DRIVE_2				0x00000004
@@ -591,152 +592,152 @@ static inline uint32_t HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL(uint32_t val)
 #define HDMI_8x60_PHY_REG2_PD_PWRGEN				0x00000040
 #define HDMI_8x60_PHY_REG2_RCV_SENSE_EN				0x00000080
 
-#define REG_HDMI_8x60_PHY_REG3					0x0000030c
+#define REG_HDMI_8x60_PHY_REG3					0x0000000c
 #define HDMI_8x60_PHY_REG3_PLL_ENABLE				0x00000001
 
-#define REG_HDMI_8x60_PHY_REG4					0x00000310
+#define REG_HDMI_8x60_PHY_REG4					0x00000010
 
-#define REG_HDMI_8x60_PHY_REG5					0x00000314
+#define REG_HDMI_8x60_PHY_REG5					0x00000014
 
-#define REG_HDMI_8x60_PHY_REG6					0x00000318
+#define REG_HDMI_8x60_PHY_REG6					0x00000018
 
-#define REG_HDMI_8x60_PHY_REG7					0x0000031c
+#define REG_HDMI_8x60_PHY_REG7					0x0000001c
 
-#define REG_HDMI_8x60_PHY_REG8					0x00000320
+#define REG_HDMI_8x60_PHY_REG8					0x00000020
 
-#define REG_HDMI_8x60_PHY_REG9					0x00000324
+#define REG_HDMI_8x60_PHY_REG9					0x00000024
 
-#define REG_HDMI_8x60_PHY_REG10					0x00000328
+#define REG_HDMI_8x60_PHY_REG10					0x00000028
 
-#define REG_HDMI_8x60_PHY_REG11					0x0000032c
+#define REG_HDMI_8x60_PHY_REG11					0x0000002c
 
-#define REG_HDMI_8x60_PHY_REG12					0x00000330
+#define REG_HDMI_8x60_PHY_REG12					0x00000030
 #define HDMI_8x60_PHY_REG12_RETIMING_EN				0x00000001
 #define HDMI_8x60_PHY_REG12_PLL_LOCK_DETECT_EN			0x00000002
 #define HDMI_8x60_PHY_REG12_FORCE_LOCK				0x00000010
 
-#define REG_HDMI_8960_PHY_REG0					0x00000400
+#define REG_HDMI_8960_PHY_REG0					0x00000000
 
-#define REG_HDMI_8960_PHY_REG1					0x00000404
+#define REG_HDMI_8960_PHY_REG1					0x00000004
 
-#define REG_HDMI_8960_PHY_REG2					0x00000408
+#define REG_HDMI_8960_PHY_REG2					0x00000008
 
-#define REG_HDMI_8960_PHY_REG3					0x0000040c
+#define REG_HDMI_8960_PHY_REG3					0x0000000c
 
-#define REG_HDMI_8960_PHY_REG4					0x00000410
+#define REG_HDMI_8960_PHY_REG4					0x00000010
 
-#define REG_HDMI_8960_PHY_REG5					0x00000414
+#define REG_HDMI_8960_PHY_REG5					0x00000014
 
-#define REG_HDMI_8960_PHY_REG6					0x00000418
+#define REG_HDMI_8960_PHY_REG6					0x00000018
 
-#define REG_HDMI_8960_PHY_REG7					0x0000041c
+#define REG_HDMI_8960_PHY_REG7					0x0000001c
 
-#define REG_HDMI_8960_PHY_REG8					0x00000420
+#define REG_HDMI_8960_PHY_REG8					0x00000020
 
-#define REG_HDMI_8960_PHY_REG9					0x00000424
+#define REG_HDMI_8960_PHY_REG9					0x00000024
 
-#define REG_HDMI_8960_PHY_REG10					0x00000428
+#define REG_HDMI_8960_PHY_REG10					0x00000028
 
-#define REG_HDMI_8960_PHY_REG11					0x0000042c
+#define REG_HDMI_8960_PHY_REG11					0x0000002c
 
-#define REG_HDMI_8960_PHY_REG12					0x00000430
+#define REG_HDMI_8960_PHY_REG12					0x00000030
 #define HDMI_8960_PHY_REG12_SW_RESET				0x00000020
 #define HDMI_8960_PHY_REG12_PWRDN_B				0x00000080
 
-#define REG_HDMI_8960_PHY_REG_BIST_CFG				0x00000434
+#define REG_HDMI_8960_PHY_REG_BIST_CFG				0x00000034
 
-#define REG_HDMI_8960_PHY_DEBUG_BUS_SEL				0x00000438
+#define REG_HDMI_8960_PHY_DEBUG_BUS_SEL				0x00000038
 
-#define REG_HDMI_8960_PHY_REG_MISC0				0x0000043c
+#define REG_HDMI_8960_PHY_REG_MISC0				0x0000003c
 
-#define REG_HDMI_8960_PHY_REG13					0x00000440
+#define REG_HDMI_8960_PHY_REG13					0x00000040
 
-#define REG_HDMI_8960_PHY_REG14					0x00000444
+#define REG_HDMI_8960_PHY_REG14					0x00000044
 
-#define REG_HDMI_8960_PHY_REG15					0x00000448
+#define REG_HDMI_8960_PHY_REG15					0x00000048
 
-#define REG_HDMI_8960_PHY_PLL_REFCLK_CFG			0x00000500
+#define REG_HDMI_8960_PHY_PLL_REFCLK_CFG			0x00000000
 
-#define REG_HDMI_8960_PHY_PLL_CHRG_PUMP_CFG			0x00000504
+#define REG_HDMI_8960_PHY_PLL_CHRG_PUMP_CFG			0x00000004
 
-#define REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0			0x00000508
+#define REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0			0x00000008
 
-#define REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1			0x0000050c
+#define REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1			0x0000000c
 
-#define REG_HDMI_8960_PHY_PLL_IDAC_ADJ_CFG			0x00000510
+#define REG_HDMI_8960_PHY_PLL_IDAC_ADJ_CFG			0x00000010
 
-#define REG_HDMI_8960_PHY_PLL_I_VI_KVCO_CFG			0x00000514
+#define REG_HDMI_8960_PHY_PLL_I_VI_KVCO_CFG			0x00000014
 
-#define REG_HDMI_8960_PHY_PLL_PWRDN_B				0x00000518
+#define REG_HDMI_8960_PHY_PLL_PWRDN_B				0x00000018
 #define HDMI_8960_PHY_PLL_PWRDN_B_PD_PLL			0x00000002
 #define HDMI_8960_PHY_PLL_PWRDN_B_PLL_PWRDN_B			0x00000008
 
-#define REG_HDMI_8960_PHY_PLL_SDM_CFG0				0x0000051c
+#define REG_HDMI_8960_PHY_PLL_SDM_CFG0				0x0000001c
 
-#define REG_HDMI_8960_PHY_PLL_SDM_CFG1				0x00000520
+#define REG_HDMI_8960_PHY_PLL_SDM_CFG1				0x00000020
 
-#define REG_HDMI_8960_PHY_PLL_SDM_CFG2				0x00000524
+#define REG_HDMI_8960_PHY_PLL_SDM_CFG2				0x00000024
 
-#define REG_HDMI_8960_PHY_PLL_SDM_CFG3				0x00000528
+#define REG_HDMI_8960_PHY_PLL_SDM_CFG3				0x00000028
 
-#define REG_HDMI_8960_PHY_PLL_SDM_CFG4				0x0000052c
+#define REG_HDMI_8960_PHY_PLL_SDM_CFG4				0x0000002c
 
-#define REG_HDMI_8960_PHY_PLL_SSC_CFG0				0x00000530
+#define REG_HDMI_8960_PHY_PLL_SSC_CFG0				0x00000030
 
-#define REG_HDMI_8960_PHY_PLL_SSC_CFG1				0x00000534
+#define REG_HDMI_8960_PHY_PLL_SSC_CFG1				0x00000034
 
-#define REG_HDMI_8960_PHY_PLL_SSC_CFG2				0x00000538
+#define REG_HDMI_8960_PHY_PLL_SSC_CFG2				0x00000038
 
-#define REG_HDMI_8960_PHY_PLL_SSC_CFG3				0x0000053c
+#define REG_HDMI_8960_PHY_PLL_SSC_CFG3				0x0000003c
 
-#define REG_HDMI_8960_PHY_PLL_LOCKDET_CFG0			0x00000540
+#define REG_HDMI_8960_PHY_PLL_LOCKDET_CFG0			0x00000040
 
-#define REG_HDMI_8960_PHY_PLL_LOCKDET_CFG1			0x00000544
+#define REG_HDMI_8960_PHY_PLL_LOCKDET_CFG1			0x00000044
 
-#define REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2			0x00000548
+#define REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2			0x00000048
 
-#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0			0x0000054c
+#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0			0x0000004c
 
-#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1			0x00000550
+#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1			0x00000050
 
-#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2			0x00000554
+#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2			0x00000054
 
-#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3			0x00000558
+#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3			0x00000058
 
-#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4			0x0000055c
+#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4			0x0000005c
 
-#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5			0x00000560
+#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5			0x00000060
 
-#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG6			0x00000564
+#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG6			0x00000064
 
-#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG7			0x00000568
+#define REG_HDMI_8960_PHY_PLL_VCOCAL_CFG7			0x00000068
 
-#define REG_HDMI_8960_PHY_PLL_DEBUG_SEL				0x0000056c
+#define REG_HDMI_8960_PHY_PLL_DEBUG_SEL				0x0000006c
 
-#define REG_HDMI_8960_PHY_PLL_MISC0				0x00000570
+#define REG_HDMI_8960_PHY_PLL_MISC0				0x00000070
 
-#define REG_HDMI_8960_PHY_PLL_MISC1				0x00000574
+#define REG_HDMI_8960_PHY_PLL_MISC1				0x00000074
 
-#define REG_HDMI_8960_PHY_PLL_MISC2				0x00000578
+#define REG_HDMI_8960_PHY_PLL_MISC2				0x00000078
 
-#define REG_HDMI_8960_PHY_PLL_MISC3				0x0000057c
+#define REG_HDMI_8960_PHY_PLL_MISC3				0x0000007c
 
-#define REG_HDMI_8960_PHY_PLL_MISC4				0x00000580
+#define REG_HDMI_8960_PHY_PLL_MISC4				0x00000080
 
-#define REG_HDMI_8960_PHY_PLL_MISC5				0x00000584
+#define REG_HDMI_8960_PHY_PLL_MISC5				0x00000084
 
-#define REG_HDMI_8960_PHY_PLL_MISC6				0x00000588
+#define REG_HDMI_8960_PHY_PLL_MISC6				0x00000088
 
-#define REG_HDMI_8960_PHY_PLL_DEBUG_BUS0			0x0000058c
+#define REG_HDMI_8960_PHY_PLL_DEBUG_BUS0			0x0000008c
 
-#define REG_HDMI_8960_PHY_PLL_DEBUG_BUS1			0x00000590
+#define REG_HDMI_8960_PHY_PLL_DEBUG_BUS1			0x00000090
 
-#define REG_HDMI_8960_PHY_PLL_DEBUG_BUS2			0x00000594
+#define REG_HDMI_8960_PHY_PLL_DEBUG_BUS2			0x00000094
 
-#define REG_HDMI_8960_PHY_PLL_STATUS0				0x00000598
+#define REG_HDMI_8960_PHY_PLL_STATUS0				0x00000098
 #define HDMI_8960_PHY_PLL_STATUS0_PLL_LOCK			0x00000001
 
-#define REG_HDMI_8960_PHY_PLL_STATUS1				0x0000059c
+#define REG_HDMI_8960_PHY_PLL_STATUS1				0x0000009c
 
 #define REG_HDMI_8x74_ANA_CFG0					0x00000000
 
@@ -843,5 +844,501 @@ static inline uint32_t HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL(uint32_t val)
 
 #define REG_HDMI_28nm_PHY_PLL_DEBUG_BUS_SEL			0x000000a0
 
+#define REG_HDMI_8996_PHY_CFG					0x00000000
+
+#define REG_HDMI_8996_PHY_PD_CTL				0x00000004
+
+#define REG_HDMI_8996_PHY_MODE					0x00000008
+
+#define REG_HDMI_8996_PHY_MISR_CLEAR				0x0000000c
+
+#define REG_HDMI_8996_PHY_TX0_TX1_BIST_CFG0			0x00000010
+
+#define REG_HDMI_8996_PHY_TX0_TX1_BIST_CFG1			0x00000014
+
+#define REG_HDMI_8996_PHY_TX0_TX1_PRBS_SEED_BYTE0		0x00000018
+
+#define REG_HDMI_8996_PHY_TX0_TX1_PRBS_SEED_BYTE1		0x0000001c
+
+#define REG_HDMI_8996_PHY_TX0_TX1_BIST_PATTERN0			0x00000020
+
+#define REG_HDMI_8996_PHY_TX0_TX1_BIST_PATTERN1			0x00000024
+
+#define REG_HDMI_8996_PHY_TX2_TX3_BIST_CFG0			0x00000028
+
+#define REG_HDMI_8996_PHY_TX2_TX3_BIST_CFG1			0x0000002c
+
+#define REG_HDMI_8996_PHY_TX2_TX3_PRBS_SEED_BYTE0		0x00000030
+
+#define REG_HDMI_8996_PHY_TX2_TX3_PRBS_SEED_BYTE1		0x00000034
+
+#define REG_HDMI_8996_PHY_TX2_TX3_BIST_PATTERN0			0x00000038
+
+#define REG_HDMI_8996_PHY_TX2_TX3_BIST_PATTERN1			0x0000003c
+
+#define REG_HDMI_8996_PHY_DEBUG_BUS_SEL				0x00000040
+
+#define REG_HDMI_8996_PHY_TXCAL_CFG0				0x00000044
+
+#define REG_HDMI_8996_PHY_TXCAL_CFG1				0x00000048
+
+#define REG_HDMI_8996_PHY_TX0_TX1_LANE_CTL			0x0000004c
+
+#define REG_HDMI_8996_PHY_TX2_TX3_LANE_CTL			0x00000050
+
+#define REG_HDMI_8996_PHY_LANE_BIST_CONFIG			0x00000054
+
+#define REG_HDMI_8996_PHY_CLOCK					0x00000058
+
+#define REG_HDMI_8996_PHY_MISC1					0x0000005c
+
+#define REG_HDMI_8996_PHY_MISC2					0x00000060
+
+#define REG_HDMI_8996_PHY_TX0_TX1_BIST_STATUS0			0x00000064
+
+#define REG_HDMI_8996_PHY_TX0_TX1_BIST_STATUS1			0x00000068
+
+#define REG_HDMI_8996_PHY_TX0_TX1_BIST_STATUS2			0x0000006c
+
+#define REG_HDMI_8996_PHY_TX2_TX3_BIST_STATUS0			0x00000070
+
+#define REG_HDMI_8996_PHY_TX2_TX3_BIST_STATUS1			0x00000074
+
+#define REG_HDMI_8996_PHY_TX2_TX3_BIST_STATUS2			0x00000078
+
+#define REG_HDMI_8996_PHY_PRE_MISR_STATUS0			0x0000007c
+
+#define REG_HDMI_8996_PHY_PRE_MISR_STATUS1			0x00000080
+
+#define REG_HDMI_8996_PHY_PRE_MISR_STATUS2			0x00000084
+
+#define REG_HDMI_8996_PHY_PRE_MISR_STATUS3			0x00000088
+
+#define REG_HDMI_8996_PHY_POST_MISR_STATUS0			0x0000008c
+
+#define REG_HDMI_8996_PHY_POST_MISR_STATUS1			0x00000090
+
+#define REG_HDMI_8996_PHY_POST_MISR_STATUS2			0x00000094
+
+#define REG_HDMI_8996_PHY_POST_MISR_STATUS3			0x00000098
+
+#define REG_HDMI_8996_PHY_STATUS				0x0000009c
+
+#define REG_HDMI_8996_PHY_MISC3_STATUS				0x000000a0
+
+#define REG_HDMI_8996_PHY_MISC4_STATUS				0x000000a4
+
+#define REG_HDMI_8996_PHY_DEBUG_BUS0				0x000000a8
+
+#define REG_HDMI_8996_PHY_DEBUG_BUS1				0x000000ac
+
+#define REG_HDMI_8996_PHY_DEBUG_BUS2				0x000000b0
+
+#define REG_HDMI_8996_PHY_DEBUG_BUS3				0x000000b4
+
+#define REG_HDMI_8996_PHY_PHY_REVISION_ID0			0x000000b8
+
+#define REG_HDMI_8996_PHY_PHY_REVISION_ID1			0x000000bc
+
+#define REG_HDMI_8996_PHY_PHY_REVISION_ID2			0x000000c0
+
+#define REG_HDMI_8996_PHY_PHY_REVISION_ID3			0x000000c4
+
+#define REG_HDMI_PHY_QSERDES_COM_ATB_SEL1			0x00000000
+
+#define REG_HDMI_PHY_QSERDES_COM_ATB_SEL2			0x00000004
+
+#define REG_HDMI_PHY_QSERDES_COM_FREQ_UPDATE			0x00000008
+
+#define REG_HDMI_PHY_QSERDES_COM_BG_TIMER			0x0000000c
+
+#define REG_HDMI_PHY_QSERDES_COM_SSC_EN_CENTER			0x00000010
+
+#define REG_HDMI_PHY_QSERDES_COM_SSC_ADJ_PER1			0x00000014
+
+#define REG_HDMI_PHY_QSERDES_COM_SSC_ADJ_PER2			0x00000018
+
+#define REG_HDMI_PHY_QSERDES_COM_SSC_PER1			0x0000001c
+
+#define REG_HDMI_PHY_QSERDES_COM_SSC_PER2			0x00000020
+
+#define REG_HDMI_PHY_QSERDES_COM_SSC_STEP_SIZE1			0x00000024
+
+#define REG_HDMI_PHY_QSERDES_COM_SSC_STEP_SIZE2			0x00000028
+
+#define REG_HDMI_PHY_QSERDES_COM_POST_DIV			0x0000002c
+
+#define REG_HDMI_PHY_QSERDES_COM_POST_DIV_MUX			0x00000030
+
+#define REG_HDMI_PHY_QSERDES_COM_BIAS_EN_CLKBUFLR_EN		0x00000034
+
+#define REG_HDMI_PHY_QSERDES_COM_CLK_ENABLE1			0x00000038
+
+#define REG_HDMI_PHY_QSERDES_COM_SYS_CLK_CTRL			0x0000003c
+
+#define REG_HDMI_PHY_QSERDES_COM_SYSCLK_BUF_ENABLE		0x00000040
+
+#define REG_HDMI_PHY_QSERDES_COM_PLL_EN				0x00000044
+
+#define REG_HDMI_PHY_QSERDES_COM_PLL_IVCO			0x00000048
+
+#define REG_HDMI_PHY_QSERDES_COM_LOCK_CMP1_MODE0		0x0000004c
+
+#define REG_HDMI_PHY_QSERDES_COM_LOCK_CMP2_MODE0		0x00000050
+
+#define REG_HDMI_PHY_QSERDES_COM_LOCK_CMP3_MODE0		0x00000054
+
+#define REG_HDMI_PHY_QSERDES_COM_LOCK_CMP1_MODE1		0x00000058
+
+#define REG_HDMI_PHY_QSERDES_COM_LOCK_CMP2_MODE1		0x0000005c
+
+#define REG_HDMI_PHY_QSERDES_COM_LOCK_CMP3_MODE1		0x00000060
+
+#define REG_HDMI_PHY_QSERDES_COM_LOCK_CMP1_MODE2		0x00000064
+
+#define REG_HDMI_PHY_QSERDES_COM_CMN_RSVD0			0x00000064
+
+#define REG_HDMI_PHY_QSERDES_COM_LOCK_CMP2_MODE2		0x00000068
+
+#define REG_HDMI_PHY_QSERDES_COM_EP_CLOCK_DETECT_CTRL		0x00000068
+
+#define REG_HDMI_PHY_QSERDES_COM_LOCK_CMP3_MODE2		0x0000006c
+
+#define REG_HDMI_PHY_QSERDES_COM_SYSCLK_DET_COMP_STATUS		0x0000006c
+
+#define REG_HDMI_PHY_QSERDES_COM_BG_TRIM			0x00000070
+
+#define REG_HDMI_PHY_QSERDES_COM_CLK_EP_DIV			0x00000074
+
+#define REG_HDMI_PHY_QSERDES_COM_CP_CTRL_MODE0			0x00000078
+
+#define REG_HDMI_PHY_QSERDES_COM_CP_CTRL_MODE1			0x0000007c
+
+#define REG_HDMI_PHY_QSERDES_COM_CP_CTRL_MODE2			0x00000080
+
+#define REG_HDMI_PHY_QSERDES_COM_CMN_RSVD1			0x00000080
+
+#define REG_HDMI_PHY_QSERDES_COM_PLL_RCTRL_MODE0		0x00000084
+
+#define REG_HDMI_PHY_QSERDES_COM_PLL_RCTRL_MODE1		0x00000088
+
+#define REG_HDMI_PHY_QSERDES_COM_PLL_RCTRL_MODE2		0x0000008c
+
+#define REG_HDMI_PHY_QSERDES_COM_CMN_RSVD2			0x0000008c
+
+#define REG_HDMI_PHY_QSERDES_COM_PLL_CCTRL_MODE0		0x00000090
+
+#define REG_HDMI_PHY_QSERDES_COM_PLL_CCTRL_MODE1		0x00000094
+
+#define REG_HDMI_PHY_QSERDES_COM_PLL_CCTRL_MODE2		0x00000098
+
+#define REG_HDMI_PHY_QSERDES_COM_CMN_RSVD3			0x00000098
+
+#define REG_HDMI_PHY_QSERDES_COM_PLL_CNTRL			0x0000009c
+
+#define REG_HDMI_PHY_QSERDES_COM_PHASE_SEL_CTRL			0x000000a0
+
+#define REG_HDMI_PHY_QSERDES_COM_PHASE_SEL_DC			0x000000a4
+
+#define REG_HDMI_PHY_QSERDES_COM_CORE_CLK_IN_SYNC_SEL		0x000000a8
+
+#define REG_HDMI_PHY_QSERDES_COM_BIAS_EN_CTRL_BY_PSM		0x000000a8
+
+#define REG_HDMI_PHY_QSERDES_COM_SYSCLK_EN_SEL			0x000000ac
+
+#define REG_HDMI_PHY_QSERDES_COM_CML_SYSCLK_SEL			0x000000b0
+
+#define REG_HDMI_PHY_QSERDES_COM_RESETSM_CNTRL			0x000000b4
+
+#define REG_HDMI_PHY_QSERDES_COM_RESETSM_CNTRL2			0x000000b8
+
+#define REG_HDMI_PHY_QSERDES_COM_RESTRIM_CTRL			0x000000bc
+
+#define REG_HDMI_PHY_QSERDES_COM_RESTRIM_CTRL2			0x000000c0
+
+#define REG_HDMI_PHY_QSERDES_COM_RESCODE_DIV_NUM		0x000000c4
+
+#define REG_HDMI_PHY_QSERDES_COM_LOCK_CMP_EN			0x000000c8
+
+#define REG_HDMI_PHY_QSERDES_COM_LOCK_CMP_CFG			0x000000cc
+
+#define REG_HDMI_PHY_QSERDES_COM_DEC_START_MODE0		0x000000d0
+
+#define REG_HDMI_PHY_QSERDES_COM_DEC_START_MODE1		0x000000d4
+
+#define REG_HDMI_PHY_QSERDES_COM_DEC_START_MODE2		0x000000d8
+
+#define REG_HDMI_PHY_QSERDES_COM_VCOCAL_DEADMAN_CTRL		0x000000d8
+
+#define REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START1_MODE0		0x000000dc
+
+#define REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START2_MODE0		0x000000e0
+
+#define REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START3_MODE0		0x000000e4
+
+#define REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START1_MODE1		0x000000e8
+
+#define REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START2_MODE1		0x000000ec
+
+#define REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START3_MODE1		0x000000f0
+
+#define REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START1_MODE2		0x000000f4
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_MINVAL1		0x000000f4
+
+#define REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START2_MODE2		0x000000f8
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_MINVAL2		0x000000f8
+
+#define REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START3_MODE2		0x000000fc
+
+#define REG_HDMI_PHY_QSERDES_COM_CMN_RSVD4			0x000000fc
+
+#define REG_HDMI_PHY_QSERDES_COM_INTEGLOOP_INITVAL		0x00000100
+
+#define REG_HDMI_PHY_QSERDES_COM_INTEGLOOP_EN			0x00000104
+
+#define REG_HDMI_PHY_QSERDES_COM_INTEGLOOP_GAIN0_MODE0		0x00000108
+
+#define REG_HDMI_PHY_QSERDES_COM_INTEGLOOP_GAIN1_MODE0		0x0000010c
+
+#define REG_HDMI_PHY_QSERDES_COM_INTEGLOOP_GAIN0_MODE1		0x00000110
+
+#define REG_HDMI_PHY_QSERDES_COM_INTEGLOOP_GAIN1_MODE1		0x00000114
+
+#define REG_HDMI_PHY_QSERDES_COM_INTEGLOOP_GAIN0_MODE2		0x00000118
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_MAXVAL1		0x00000118
+
+#define REG_HDMI_PHY_QSERDES_COM_INTEGLOOP_GAIN1_MODE2		0x0000011c
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_MAXVAL2		0x0000011c
+
+#define REG_HDMI_PHY_QSERDES_COM_RES_TRIM_CONTROL2		0x00000120
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_CTRL			0x00000124
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_MAP			0x00000128
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE1_MODE0		0x0000012c
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE2_MODE0		0x00000130
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE1_MODE1		0x00000134
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE2_MODE1		0x00000138
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE1_MODE2		0x0000013c
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_INITVAL1		0x0000013c
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE2_MODE2		0x00000140
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_INITVAL2		0x00000140
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_TIMER1		0x00000144
+
+#define REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_TIMER2		0x00000148
+
+#define REG_HDMI_PHY_QSERDES_COM_SAR				0x0000014c
+
+#define REG_HDMI_PHY_QSERDES_COM_SAR_CLK			0x00000150
+
+#define REG_HDMI_PHY_QSERDES_COM_SAR_CODE_OUT_STATUS		0x00000154
+
+#define REG_HDMI_PHY_QSERDES_COM_SAR_CODE_READY_STATUS		0x00000158
+
+#define REG_HDMI_PHY_QSERDES_COM_CMN_STATUS			0x0000015c
+
+#define REG_HDMI_PHY_QSERDES_COM_RESET_SM_STATUS		0x00000160
+
+#define REG_HDMI_PHY_QSERDES_COM_RESTRIM_CODE_STATUS		0x00000164
+
+#define REG_HDMI_PHY_QSERDES_COM_PLLCAL_CODE1_STATUS		0x00000168
+
+#define REG_HDMI_PHY_QSERDES_COM_PLLCAL_CODE2_STATUS		0x0000016c
+
+#define REG_HDMI_PHY_QSERDES_COM_BG_CTRL			0x00000170
+
+#define REG_HDMI_PHY_QSERDES_COM_CLK_SELECT			0x00000174
+
+#define REG_HDMI_PHY_QSERDES_COM_HSCLK_SEL			0x00000178
+
+#define REG_HDMI_PHY_QSERDES_COM_INTEGLOOP_BINCODE_STATUS	0x0000017c
+
+#define REG_HDMI_PHY_QSERDES_COM_PLL_ANALOG			0x00000180
+
+#define REG_HDMI_PHY_QSERDES_COM_CORECLK_DIV			0x00000184
+
+#define REG_HDMI_PHY_QSERDES_COM_SW_RESET			0x00000188
+
+#define REG_HDMI_PHY_QSERDES_COM_CORE_CLK_EN			0x0000018c
+
+#define REG_HDMI_PHY_QSERDES_COM_C_READY_STATUS			0x00000190
+
+#define REG_HDMI_PHY_QSERDES_COM_CMN_CONFIG			0x00000194
+
+#define REG_HDMI_PHY_QSERDES_COM_CMN_RATE_OVERRIDE		0x00000198
+
+#define REG_HDMI_PHY_QSERDES_COM_SVS_MODE_CLK_SEL		0x0000019c
+
+#define REG_HDMI_PHY_QSERDES_COM_DEBUG_BUS0			0x000001a0
+
+#define REG_HDMI_PHY_QSERDES_COM_DEBUG_BUS1			0x000001a4
+
+#define REG_HDMI_PHY_QSERDES_COM_DEBUG_BUS2			0x000001a8
+
+#define REG_HDMI_PHY_QSERDES_COM_DEBUG_BUS3			0x000001ac
+
+#define REG_HDMI_PHY_QSERDES_COM_DEBUG_BUS_SEL			0x000001b0
+
+#define REG_HDMI_PHY_QSERDES_COM_CMN_MISC1			0x000001b4
+
+#define REG_HDMI_PHY_QSERDES_COM_CMN_MISC2			0x000001b8
+
+#define REG_HDMI_PHY_QSERDES_COM_CORECLK_DIV_MODE1		0x000001bc
+
+#define REG_HDMI_PHY_QSERDES_COM_CORECLK_DIV_MODE2		0x000001c0
+
+#define REG_HDMI_PHY_QSERDES_COM_CMN_RSVD5			0x000001c4
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_MODE_LANENO		0x00000000
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_INVERT			0x00000004
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_CLKBUF_ENABLE		0x00000008
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_CMN_CONTROL_ONE		0x0000000c
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_CMN_CONTROL_TWO		0x00000010
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_CMN_CONTROL_THREE		0x00000014
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TX_EMP_POST1_LVL		0x00000018
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TX_POST2_EMPH		0x0000001c
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TX_BOOST_LVL_UP_DN		0x00000020
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_HP_PD_ENABLES		0x00000024
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TX_IDLE_LVL_LARGE_AMP	0x00000028
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TX_DRV_LVL			0x0000002c
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TX_DRV_LVL_OFFSET		0x00000030
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_RESET_TSYNC_EN		0x00000034
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PRE_STALL_LDO_BOOST_EN	0x00000038
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TX_BAND			0x0000003c
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_SLEW_CNTL			0x00000040
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_INTERFACE_SELECT		0x00000044
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_LPB_EN			0x00000048
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_RES_CODE_LANE_TX		0x0000004c
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_RES_CODE_LANE_RX		0x00000050
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_RES_CODE_LANE_OFFSET		0x00000054
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PERL_LENGTH1			0x00000058
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PERL_LENGTH2			0x0000005c
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_SERDES_BYP_EN_OUT		0x00000060
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_DEBUG_BUS_SEL		0x00000064
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_HIGHZ_TRANSCEIVEREN_BIAS_DRVR_EN	0x00000068
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TX_POL_INV			0x0000006c
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PARRATE_REC_DETECT_IDLE_EN	0x00000070
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_PATTERN1		0x00000074
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_PATTERN2		0x00000078
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_PATTERN3		0x0000007c
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_PATTERN4		0x00000080
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_PATTERN5		0x00000084
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_PATTERN6		0x00000088
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_PATTERN7		0x0000008c
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_PATTERN8		0x00000090
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_LANE_MODE			0x00000094
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_IDAC_CAL_LANE_MODE		0x00000098
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_IDAC_CAL_LANE_MODE_CONFIGURATION	0x0000009c
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_ATB_SEL1			0x000000a0
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_ATB_SEL2			0x000000a4
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_RCV_DETECT_LVL		0x000000a8
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_RCV_DETECT_LVL_2		0x000000ac
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PRBS_SEED1			0x000000b0
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PRBS_SEED2			0x000000b4
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PRBS_SEED3			0x000000b8
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PRBS_SEED4			0x000000bc
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_RESET_GEN			0x000000c0
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_RESET_GEN_MUXES		0x000000c4
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TRAN_DRVR_EMP_EN		0x000000c8
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TX_INTERFACE_MODE		0x000000cc
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PWM_CTRL			0x000000d0
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PWM_ENCODED_OR_DATA		0x000000d4
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PWM_GEAR_1_DIVIDER_BAND2	0x000000d8
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PWM_GEAR_2_DIVIDER_BAND2	0x000000dc
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PWM_GEAR_3_DIVIDER_BAND2	0x000000e0
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PWM_GEAR_4_DIVIDER_BAND2	0x000000e4
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PWM_GEAR_1_DIVIDER_BAND0_1	0x000000e8
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PWM_GEAR_2_DIVIDER_BAND0_1	0x000000ec
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PWM_GEAR_3_DIVIDER_BAND0_1	0x000000f0
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_PWM_GEAR_4_DIVIDER_BAND0_1	0x000000f4
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_VMODE_CTRL1			0x000000f8
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_VMODE_CTRL2			0x000000fc
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TX_ALOG_INTF_OBSV_CNTL	0x00000100
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_STATUS			0x00000104
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_ERROR_COUNT1		0x00000108
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_BIST_ERROR_COUNT2		0x0000010c
+
+#define REG_HDMI_PHY_QSERDES_TX_LX_TX_ALOG_INTF_OBSV		0x00000110
+
 
 #endif /* HDMI_XML */
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
index df232e20c13e..a54d3bb5baad 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
@@ -89,7 +89,7 @@ static const struct hdmi_msm_audio_arcs *get_arcs(unsigned long int pixclock)
 	return NULL;
 }
 
-int hdmi_audio_update(struct hdmi *hdmi)
+int msm_hdmi_audio_update(struct hdmi *hdmi)
 {
 	struct hdmi_audio *audio = &hdmi->audio;
 	struct hdmi_audio_infoframe *info = &audio->infoframe;
@@ -232,7 +232,7 @@ int hdmi_audio_update(struct hdmi *hdmi)
 	return 0;
 }
 
-int hdmi_audio_info_setup(struct hdmi *hdmi, bool enabled,
+int msm_hdmi_audio_info_setup(struct hdmi *hdmi, bool enabled,
 	uint32_t num_of_channels, uint32_t channel_allocation,
 	uint32_t level_shift, bool down_mix)
 {
@@ -252,10 +252,10 @@ int hdmi_audio_info_setup(struct hdmi *hdmi, bool enabled,
 	audio->infoframe.level_shift_value = level_shift;
 	audio->infoframe.downmix_inhibit = down_mix;
 
-	return hdmi_audio_update(hdmi);
+	return msm_hdmi_audio_update(hdmi);
 }
 
-void hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate)
+void msm_hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate)
 {
 	struct hdmi_audio *audio;
 
@@ -268,5 +268,5 @@ void hdmi_audio_set_sample_rate(struct hdmi *hdmi, int rate)
 		return;
 
 	audio->rate = rate;
-	hdmi_audio_update(hdmi);
+	msm_hdmi_audio_update(hdmi);
 }
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
index 92b69ae8caf9..bacbd5d8df0e 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
@@ -23,11 +23,11 @@ struct hdmi_bridge {
 };
 #define to_hdmi_bridge(x) container_of(x, struct hdmi_bridge, base)
 
-void hdmi_bridge_destroy(struct drm_bridge *bridge)
+void msm_hdmi_bridge_destroy(struct drm_bridge *bridge)
 {
 }
 
-static void power_on(struct drm_bridge *bridge)
+static void msm_hdmi_power_on(struct drm_bridge *bridge)
 {
 	struct drm_device *dev = bridge->dev;
 	struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
@@ -86,7 +86,7 @@ static void power_off(struct drm_bridge *bridge)
 	}
 }
 
-static void hdmi_bridge_pre_enable(struct drm_bridge *bridge)
+static void msm_hdmi_bridge_pre_enable(struct drm_bridge *bridge)
 {
 	struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
 	struct hdmi *hdmi = hdmi_bridge->hdmi;
@@ -95,51 +95,51 @@ static void hdmi_bridge_pre_enable(struct drm_bridge *bridge)
 	DBG("power up");
 
 	if (!hdmi->power_on) {
-		power_on(bridge);
+		msm_hdmi_phy_resource_enable(phy);
+		msm_hdmi_power_on(bridge);
 		hdmi->power_on = true;
-		hdmi_audio_update(hdmi);
+		msm_hdmi_audio_update(hdmi);
 	}
 
-	if (phy)
-		phy->funcs->powerup(phy, hdmi->pixclock);
+	msm_hdmi_phy_powerup(phy, hdmi->pixclock);
 
-	hdmi_set_mode(hdmi, true);
+	msm_hdmi_set_mode(hdmi, true);
 
 	if (hdmi->hdcp_ctrl)
-		hdmi_hdcp_on(hdmi->hdcp_ctrl);
+		msm_hdmi_hdcp_on(hdmi->hdcp_ctrl);
 }
 
-static void hdmi_bridge_enable(struct drm_bridge *bridge)
+static void msm_hdmi_bridge_enable(struct drm_bridge *bridge)
 {
 }
 
-static void hdmi_bridge_disable(struct drm_bridge *bridge)
+static void msm_hdmi_bridge_disable(struct drm_bridge *bridge)
 {
 }
 
-static void hdmi_bridge_post_disable(struct drm_bridge *bridge)
+static void msm_hdmi_bridge_post_disable(struct drm_bridge *bridge)
 {
 	struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
 	struct hdmi *hdmi = hdmi_bridge->hdmi;
 	struct hdmi_phy *phy = hdmi->phy;
 
 	if (hdmi->hdcp_ctrl)
-		hdmi_hdcp_off(hdmi->hdcp_ctrl);
+		msm_hdmi_hdcp_off(hdmi->hdcp_ctrl);
 
 	DBG("power down");
-	hdmi_set_mode(hdmi, false);
+	msm_hdmi_set_mode(hdmi, false);
 
-	if (phy)
-		phy->funcs->powerdown(phy);
+	msm_hdmi_phy_powerdown(phy);
 
 	if (hdmi->power_on) {
 		power_off(bridge);
 		hdmi->power_on = false;
-		hdmi_audio_update(hdmi);
+		msm_hdmi_audio_update(hdmi);
+		msm_hdmi_phy_resource_disable(phy);
 	}
 }
 
-static void hdmi_bridge_mode_set(struct drm_bridge *bridge,
+static void msm_hdmi_bridge_mode_set(struct drm_bridge *bridge,
 		 struct drm_display_mode *mode,
 		 struct drm_display_mode *adjusted_mode)
 {
@@ -196,20 +196,20 @@ static void hdmi_bridge_mode_set(struct drm_bridge *bridge,
 	DBG("frame_ctrl=%08x", frame_ctrl);
 	hdmi_write(hdmi, REG_HDMI_FRAME_CTRL, frame_ctrl);
 
-	hdmi_audio_update(hdmi);
+	msm_hdmi_audio_update(hdmi);
 }
 
-static const struct drm_bridge_funcs hdmi_bridge_funcs = {
-		.pre_enable = hdmi_bridge_pre_enable,
-		.enable = hdmi_bridge_enable,
-		.disable = hdmi_bridge_disable,
-		.post_disable = hdmi_bridge_post_disable,
-		.mode_set = hdmi_bridge_mode_set,
+static const struct drm_bridge_funcs msm_hdmi_bridge_funcs = {
+		.pre_enable = msm_hdmi_bridge_pre_enable,
+		.enable = msm_hdmi_bridge_enable,
+		.disable = msm_hdmi_bridge_disable,
+		.post_disable = msm_hdmi_bridge_post_disable,
+		.mode_set = msm_hdmi_bridge_mode_set,
 };
 
 
 /* initialize bridge */
-struct drm_bridge *hdmi_bridge_init(struct hdmi *hdmi)
+struct drm_bridge *msm_hdmi_bridge_init(struct hdmi *hdmi)
 {
 	struct drm_bridge *bridge = NULL;
 	struct hdmi_bridge *hdmi_bridge;
@@ -225,7 +225,7 @@ struct drm_bridge *hdmi_bridge_init(struct hdmi *hdmi)
 	hdmi_bridge->hdmi = hdmi;
 
 	bridge = &hdmi_bridge->base;
-	bridge->funcs = &hdmi_bridge_funcs;
+	bridge->funcs = &msm_hdmi_bridge_funcs;
 
 	ret = drm_bridge_attach(hdmi->dev, bridge);
 	if (ret)
@@ -235,7 +235,7 @@ struct drm_bridge *hdmi_bridge_init(struct hdmi *hdmi)
 
 fail:
 	if (bridge)
-		hdmi_bridge_destroy(bridge);
+		msm_hdmi_bridge_destroy(bridge);
 
 	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
index a3b05ae52dae..26129bff2dd6 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
@@ -28,7 +28,7 @@ struct hdmi_connector {
 };
 #define to_hdmi_connector(x) container_of(x, struct hdmi_connector, base)
 
-static void hdmi_phy_reset(struct hdmi *hdmi)
+static void msm_hdmi_phy_reset(struct hdmi *hdmi)
 {
 	unsigned int val;
 
@@ -81,114 +81,54 @@ static int gpio_config(struct hdmi *hdmi, bool on)
 {
 	struct device *dev = &hdmi->pdev->dev;
 	const struct hdmi_platform_config *config = hdmi->config;
-	int ret;
+	int ret, i;
 
 	if (on) {
-		if (config->ddc_clk_gpio != -1) {
-			ret = gpio_request(config->ddc_clk_gpio, "HDMI_DDC_CLK");
-			if (ret) {
-				dev_err(dev, "'%s'(%d) gpio_request failed: %d\n",
-					"HDMI_DDC_CLK", config->ddc_clk_gpio, ret);
-				goto error1;
-			}
-			gpio_set_value_cansleep(config->ddc_clk_gpio, 1);
-		}
-
-		if (config->ddc_data_gpio != -1) {
-			ret = gpio_request(config->ddc_data_gpio, "HDMI_DDC_DATA");
-			if (ret) {
-				dev_err(dev, "'%s'(%d) gpio_request failed: %d\n",
-					"HDMI_DDC_DATA", config->ddc_data_gpio, ret);
-				goto error2;
-			}
-			gpio_set_value_cansleep(config->ddc_data_gpio, 1);
-		}
-
-		ret = gpio_request(config->hpd_gpio, "HDMI_HPD");
-		if (ret) {
-			dev_err(dev, "'%s'(%d) gpio_request failed: %d\n",
-				"HDMI_HPD", config->hpd_gpio, ret);
-			goto error3;
-		}
-		gpio_direction_input(config->hpd_gpio);
-		gpio_set_value_cansleep(config->hpd_gpio, 1);
-
-		if (config->mux_en_gpio != -1) {
-			ret = gpio_request(config->mux_en_gpio, "HDMI_MUX_EN");
-			if (ret) {
-				dev_err(dev, "'%s'(%d) gpio_request failed: %d\n",
-					"HDMI_MUX_EN", config->mux_en_gpio, ret);
-				goto error4;
-			}
-			gpio_set_value_cansleep(config->mux_en_gpio, 1);
-		}
-
-		if (config->mux_sel_gpio != -1) {
-			ret = gpio_request(config->mux_sel_gpio, "HDMI_MUX_SEL");
-			if (ret) {
-				dev_err(dev, "'%s'(%d) gpio_request failed: %d\n",
-					"HDMI_MUX_SEL", config->mux_sel_gpio, ret);
-				goto error5;
+		for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) {
+			struct hdmi_gpio_data gpio = config->gpios[i];
+
+			if (gpio.num != -1) {
+				ret = gpio_request(gpio.num, gpio.label);
+				if (ret) {
+					dev_err(dev,
+						"'%s'(%d) gpio_request failed: %d\n",
+						gpio.label, gpio.num, ret);
+					goto err;
+				}
+
+				if (gpio.output) {
+					gpio_direction_output(gpio.num,
+							      gpio.value);
+				} else {
+					gpio_direction_input(gpio.num);
+					gpio_set_value_cansleep(gpio.num,
+								gpio.value);
+				}
 			}
-			gpio_set_value_cansleep(config->mux_sel_gpio, 0);
 		}
 
-		if (config->mux_lpm_gpio != -1) {
-			ret = gpio_request(config->mux_lpm_gpio,
-					"HDMI_MUX_LPM");
-			if (ret) {
-				dev_err(dev,
-					"'%s'(%d) gpio_request failed: %d\n",
-					"HDMI_MUX_LPM",
-					config->mux_lpm_gpio, ret);
-				goto error6;
-			}
-			gpio_set_value_cansleep(config->mux_lpm_gpio, 1);
-		}
 		DBG("gpio on");
 	} else {
-		if (config->ddc_clk_gpio != -1)
-			gpio_free(config->ddc_clk_gpio);
-
-		if (config->ddc_data_gpio != -1)
-			gpio_free(config->ddc_data_gpio);
+		for (i = 0; i < HDMI_MAX_NUM_GPIO; i++) {
+			struct hdmi_gpio_data gpio = config->gpios[i];
 
-		gpio_free(config->hpd_gpio);
+			if (gpio.output) {
+				int value = gpio.value ? 0 : 1;
 
-		if (config->mux_en_gpio != -1) {
-			gpio_set_value_cansleep(config->mux_en_gpio, 0);
-			gpio_free(config->mux_en_gpio);
-		}
+				gpio_set_value_cansleep(gpio.num, value);
+			}
 
-		if (config->mux_sel_gpio != -1) {
-			gpio_set_value_cansleep(config->mux_sel_gpio, 1);
-			gpio_free(config->mux_sel_gpio);
-		}
+			gpio_free(gpio.num);
+		};
 
-		if (config->mux_lpm_gpio != -1) {
-			gpio_set_value_cansleep(config->mux_lpm_gpio, 0);
-			gpio_free(config->mux_lpm_gpio);
-		}
 		DBG("gpio off");
 	}
 
 	return 0;
+err:
+	while (i--)
+		gpio_free(config->gpios[i].num);
 
-error6:
-	if (config->mux_sel_gpio != -1)
-		gpio_free(config->mux_sel_gpio);
-error5:
-	if (config->mux_en_gpio != -1)
-		gpio_free(config->mux_en_gpio);
-error4:
-	gpio_free(config->hpd_gpio);
-error3:
-	if (config->ddc_data_gpio != -1)
-		gpio_free(config->ddc_data_gpio);
-error2:
-	if (config->ddc_clk_gpio != -1)
-		gpio_free(config->ddc_clk_gpio);
-error1:
 	return ret;
 }
 
@@ -239,9 +179,9 @@ static int hpd_enable(struct hdmi_connector *hdmi_connector)
 		}
 	}
 
-	hdmi_set_mode(hdmi, false);
-	hdmi_phy_reset(hdmi);
-	hdmi_set_mode(hdmi, true);
+	msm_hdmi_set_mode(hdmi, false);
+	msm_hdmi_phy_reset(hdmi);
+	msm_hdmi_set_mode(hdmi, true);
 
 	hdmi_write(hdmi, REG_HDMI_USEC_REFTIMER, 0x0001001b);
 
@@ -278,7 +218,7 @@ static void hdp_disable(struct hdmi_connector *hdmi_connector)
 	/* Disable HPD interrupt */
 	hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL, 0);
 
-	hdmi_set_mode(hdmi, false);
+	msm_hdmi_set_mode(hdmi, false);
 
 	for (i = 0; i < config->hpd_clk_cnt; i++)
 		clk_disable_unprepare(hdmi->hpd_clks[i]);
@@ -300,7 +240,7 @@ static void hdp_disable(struct hdmi_connector *hdmi_connector)
 }
 
 static void
-hotplug_work(struct work_struct *work)
+msm_hdmi_hotplug_work(struct work_struct *work)
 {
 	struct hdmi_connector *hdmi_connector =
 		container_of(work, struct hdmi_connector, hpd_work);
@@ -308,7 +248,7 @@ hotplug_work(struct work_struct *work)
 	drm_helper_hpd_irq_event(connector->dev);
 }
 
-void hdmi_connector_irq(struct drm_connector *connector)
+void msm_hdmi_connector_irq(struct drm_connector *connector)
 {
 	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
 	struct hdmi *hdmi = hdmi_connector->hdmi;
@@ -345,10 +285,13 @@ static enum drm_connector_status detect_reg(struct hdmi *hdmi)
 			connector_status_connected : connector_status_disconnected;
 }
 
+#define HPD_GPIO_INDEX	2
 static enum drm_connector_status detect_gpio(struct hdmi *hdmi)
 {
 	const struct hdmi_platform_config *config = hdmi->config;
-	return gpio_get_value(config->hpd_gpio) ?
+	struct hdmi_gpio_data hpd_gpio = config->gpios[HPD_GPIO_INDEX];
+
+	return gpio_get_value(hpd_gpio.num) ?
 			connector_status_connected :
 			connector_status_disconnected;
 }
@@ -358,9 +301,18 @@ static enum drm_connector_status hdmi_connector_detect(
 {
 	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
 	struct hdmi *hdmi = hdmi_connector->hdmi;
+	const struct hdmi_platform_config *config = hdmi->config;
+	struct hdmi_gpio_data hpd_gpio = config->gpios[HPD_GPIO_INDEX];
 	enum drm_connector_status stat_gpio, stat_reg;
 	int retry = 20;
 
+	/*
+	 * some platforms may not have hpd gpio. Rely only on the status
+	 * provided by REG_HDMI_HPD_INT_STATUS in this case.
+	 */
+	if (hpd_gpio.num == -1)
+		return detect_reg(hdmi);
+
 	do {
 		stat_gpio = detect_gpio(hdmi);
 		stat_reg  = detect_reg(hdmi);
@@ -395,7 +347,7 @@ static void hdmi_connector_destroy(struct drm_connector *connector)
 	kfree(hdmi_connector);
 }
 
-static int hdmi_connector_get_modes(struct drm_connector *connector)
+static int msm_hdmi_connector_get_modes(struct drm_connector *connector)
 {
 	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
 	struct hdmi *hdmi = hdmi_connector->hdmi;
@@ -421,7 +373,7 @@ static int hdmi_connector_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
-static int hdmi_connector_mode_valid(struct drm_connector *connector,
+static int msm_hdmi_connector_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
 	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
@@ -451,7 +403,7 @@ static int hdmi_connector_mode_valid(struct drm_connector *connector,
 }
 
 static struct drm_encoder *
-hdmi_connector_best_encoder(struct drm_connector *connector)
+msm_hdmi_connector_best_encoder(struct drm_connector *connector)
 {
 	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
 	return hdmi_connector->hdmi->encoder;
@@ -467,14 +419,14 @@ static const struct drm_connector_funcs hdmi_connector_funcs = {
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
-static const struct drm_connector_helper_funcs hdmi_connector_helper_funcs = {
-	.get_modes = hdmi_connector_get_modes,
-	.mode_valid = hdmi_connector_mode_valid,
-	.best_encoder = hdmi_connector_best_encoder,
+static const struct drm_connector_helper_funcs msm_hdmi_connector_helper_funcs = {
+	.get_modes = msm_hdmi_connector_get_modes,
+	.mode_valid = msm_hdmi_connector_mode_valid,
+	.best_encoder = msm_hdmi_connector_best_encoder,
 };
 
 /* initialize connector */
-struct drm_connector *hdmi_connector_init(struct hdmi *hdmi)
+struct drm_connector *msm_hdmi_connector_init(struct hdmi *hdmi)
 {
 	struct drm_connector *connector = NULL;
 	struct hdmi_connector *hdmi_connector;
@@ -487,13 +439,13 @@ struct drm_connector *hdmi_connector_init(struct hdmi *hdmi)
 	}
 
 	hdmi_connector->hdmi = hdmi;
-	INIT_WORK(&hdmi_connector->hpd_work, hotplug_work);
+	INIT_WORK(&hdmi_connector->hpd_work, msm_hdmi_hotplug_work);
 
 	connector = &hdmi_connector->base;
 
 	drm_connector_init(hdmi->dev, connector, &hdmi_connector_funcs,
 			DRM_MODE_CONNECTOR_HDMIA);
-	drm_connector_helper_add(connector, &hdmi_connector_helper_funcs);
+	drm_connector_helper_add(connector, &msm_hdmi_connector_helper_funcs);
 
 	connector->polled = DRM_CONNECTOR_POLL_CONNECT |
 			DRM_CONNECTOR_POLL_DISCONNECT;
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c b/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c
index 1dc9c34eb0df..0baaaaabd002 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c
@@ -84,7 +84,7 @@ struct hdmi_hdcp_ctrl {
 	bool max_dev_exceeded;
 };
 
-static int hdmi_ddc_read(struct hdmi *hdmi, u16 addr, u8 offset,
+static int msm_hdmi_ddc_read(struct hdmi *hdmi, u16 addr, u8 offset,
 	u8 *data, u16 data_len)
 {
 	int rc;
@@ -122,7 +122,7 @@ retry:
 
 #define HDCP_DDC_WRITE_MAX_BYTE_NUM 32
 
-static int hdmi_ddc_write(struct hdmi *hdmi, u16 addr, u8 offset,
+static int msm_hdmi_ddc_write(struct hdmi *hdmi, u16 addr, u8 offset,
 	u8 *data, u16 data_len)
 {
 	int rc;
@@ -162,7 +162,7 @@ retry:
 	return rc;
 }
 
-static int hdmi_hdcp_scm_wr(struct hdmi_hdcp_ctrl *hdcp_ctrl, u32 *preg,
+static int msm_hdmi_hdcp_scm_wr(struct hdmi_hdcp_ctrl *hdcp_ctrl, u32 *preg,
 	u32 *pdata, u32 count)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
@@ -202,7 +202,7 @@ static int hdmi_hdcp_scm_wr(struct hdmi_hdcp_ctrl *hdcp_ctrl, u32 *preg,
 	return ret;
 }
 
-void hdmi_hdcp_irq(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+void msm_hdmi_hdcp_irq(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	u32 reg_val, hdcp_int_status;
@@ -247,7 +247,7 @@ void hdmi_hdcp_irq(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	}
 }
 
-static int hdmi_hdcp_msleep(struct hdmi_hdcp_ctrl *hdcp_ctrl, u32 ms, u32 ev)
+static int msm_hdmi_hdcp_msleep(struct hdmi_hdcp_ctrl *hdcp_ctrl, u32 ms, u32 ev)
 {
 	int rc;
 
@@ -264,7 +264,7 @@ static int hdmi_hdcp_msleep(struct hdmi_hdcp_ctrl *hdcp_ctrl, u32 ms, u32 ev)
 	return 0;
 }
 
-static int hdmi_hdcp_read_validate_aksv(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_read_validate_aksv(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 
@@ -287,7 +287,7 @@ static int hdmi_hdcp_read_validate_aksv(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	return 0;
 }
 
-static int reset_hdcp_ddc_failures(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_reset_hdcp_ddc_failures(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	u32 reg_val, failure, nack0;
@@ -337,7 +337,7 @@ static int reset_hdcp_ddc_failures(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 		reg_val |= HDMI_DDC_CTRL_SW_STATUS_RESET;
 		hdmi_write(hdmi, REG_HDMI_DDC_CTRL, reg_val);
 
-		rc = hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
+		rc = msm_hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
 
 		reg_val = hdmi_read(hdmi, REG_HDMI_DDC_CTRL);
 		reg_val &= ~HDMI_DDC_CTRL_SW_STATUS_RESET;
@@ -350,7 +350,7 @@ static int reset_hdcp_ddc_failures(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 
 		/* If previous msleep is aborted, skip this msleep */
 		if (!rc)
-			rc = hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
+			rc = msm_hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
 
 		reg_val = hdmi_read(hdmi, REG_HDMI_DDC_CTRL);
 		reg_val &= ~HDMI_DDC_CTRL_SOFT_RESET;
@@ -362,7 +362,7 @@ static int reset_hdcp_ddc_failures(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	return rc;
 }
 
-static int hdmi_hdcp_hw_ddc_clean(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_hw_ddc_clean(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	int rc;
 	u32 hdcp_ddc_status, ddc_hw_status;
@@ -394,7 +394,7 @@ static int hdmi_hdcp_hw_ddc_clean(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 			return -ETIMEDOUT;
 		}
 
-		rc = hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
+		rc = msm_hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
 		if (rc)
 			return rc;
 	} while (1);
@@ -402,7 +402,7 @@ static int hdmi_hdcp_hw_ddc_clean(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	return 0;
 }
 
-static void hdmi_hdcp_reauth_work(struct work_struct *work)
+static void msm_hdmi_hdcp_reauth_work(struct work_struct *work)
 {
 	struct hdmi_hdcp_ctrl *hdcp_ctrl = container_of(work,
 		struct hdmi_hdcp_ctrl, hdcp_reauth_work);
@@ -430,7 +430,7 @@ static void hdmi_hdcp_reauth_work(struct work_struct *work)
 		HDMI_HDCP_RESET_LINK0_DEAUTHENTICATE);
 
 	/* Wait to be clean on DDC HW engine */
-	if (hdmi_hdcp_hw_ddc_clean(hdcp_ctrl)) {
+	if (msm_hdmi_hdcp_hw_ddc_clean(hdcp_ctrl)) {
 		pr_info("%s: reauth work aborted\n", __func__);
 		return;
 	}
@@ -461,7 +461,7 @@ static void hdmi_hdcp_reauth_work(struct work_struct *work)
 	queue_work(hdmi->workq, &hdcp_ctrl->hdcp_auth_work);
 }
 
-static int hdmi_hdcp_auth_prepare(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_auth_prepare(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	u32 link0_status;
@@ -470,7 +470,7 @@ static int hdmi_hdcp_auth_prepare(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	int rc;
 
 	if (!hdcp_ctrl->aksv_valid) {
-		rc = hdmi_hdcp_read_validate_aksv(hdcp_ctrl);
+		rc = msm_hdmi_hdcp_read_validate_aksv(hdcp_ctrl);
 		if (rc) {
 			pr_err("%s: ASKV validation failed\n", __func__);
 			hdcp_ctrl->hdcp_state = HDCP_STATE_NO_AKSV;
@@ -538,12 +538,12 @@ static int hdmi_hdcp_auth_prepare(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 		DBG("An not ready after enabling HDCP");
 
 	/* Clear any DDC failures from previous tries before enable HDCP*/
-	rc = reset_hdcp_ddc_failures(hdcp_ctrl);
+	rc = msm_reset_hdcp_ddc_failures(hdcp_ctrl);
 
 	return rc;
 }
 
-static void hdmi_hdcp_auth_fail(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static void msm_hdmi_hdcp_auth_fail(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	u32 reg_val;
@@ -561,7 +561,7 @@ static void hdmi_hdcp_auth_fail(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	queue_work(hdmi->workq, &hdcp_ctrl->hdcp_reauth_work);
 }
 
-static void hdmi_hdcp_auth_done(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static void msm_hdmi_hdcp_auth_done(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	u32 reg_val;
@@ -596,7 +596,7 @@ static void hdmi_hdcp_auth_done(struct hdmi_hdcp_ctrl *hdcp_ctrl)
  * Write An and AKSV to sink
  * Read BKSV from sink and write into HDCP engine
  */
-static int hdmi_hdcp_wait_key_an_ready(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_wait_key_an_ready(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	int rc;
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
@@ -621,7 +621,7 @@ static int hdmi_hdcp_wait_key_an_ready(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 			return -ETIMEDOUT;
 		}
 
-		rc = hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
+		rc = msm_hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
 		if (rc)
 			return rc;
 	} while (1);
@@ -643,7 +643,7 @@ static int hdmi_hdcp_wait_key_an_ready(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 			return -ETIMEDOUT;
 		}
 
-		rc = hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
+		rc = msm_hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
 		if (rc)
 			return rc;
 	} while (1);
@@ -651,7 +651,7 @@ static int hdmi_hdcp_wait_key_an_ready(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	return 0;
 }
 
-static int hdmi_hdcp_send_aksv_an(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_send_aksv_an(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	int rc = 0;
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
@@ -676,7 +676,7 @@ static int hdmi_hdcp_send_aksv_an(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	aksv[4] =  link0_aksv_1        & 0xFF;
 
 	/* Write An to offset 0x18 */
-	rc = hdmi_ddc_write(hdmi, HDCP_PORT_ADDR, 0x18, (u8 *)link0_an,
+	rc = msm_hdmi_ddc_write(hdmi, HDCP_PORT_ADDR, 0x18, (u8 *)link0_an,
 		(u16)sizeof(link0_an));
 	if (rc) {
 		pr_err("%s:An write failed\n", __func__);
@@ -685,7 +685,7 @@ static int hdmi_hdcp_send_aksv_an(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	DBG("Link0-An=%08x%08x", link0_an[0], link0_an[1]);
 
 	/* Write AKSV to offset 0x10 */
-	rc = hdmi_ddc_write(hdmi, HDCP_PORT_ADDR, 0x10, aksv, 5);
+	rc = msm_hdmi_ddc_write(hdmi, HDCP_PORT_ADDR, 0x10, aksv, 5);
 	if (rc) {
 		pr_err("%s:AKSV write failed\n", __func__);
 		return rc;
@@ -695,7 +695,7 @@ static int hdmi_hdcp_send_aksv_an(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	return 0;
 }
 
-static int hdmi_hdcp_recv_bksv(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_recv_bksv(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	int rc = 0;
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
@@ -703,7 +703,7 @@ static int hdmi_hdcp_recv_bksv(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	u32 reg[2], data[2];
 
 	/* Read BKSV at offset 0x00 */
-	rc = hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x00, bksv, 5);
+	rc = msm_hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x00, bksv, 5);
 	if (rc) {
 		pr_err("%s:BKSV read failed\n", __func__);
 		return rc;
@@ -728,19 +728,19 @@ static int hdmi_hdcp_recv_bksv(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	data[0] = hdcp_ctrl->bksv_lsb;
 	reg[1] = REG_HDMI_HDCP_RCVPORT_DATA1;
 	data[1] = hdcp_ctrl->bksv_msb;
-	rc = hdmi_hdcp_scm_wr(hdcp_ctrl, reg, data, 2);
+	rc = msm_hdmi_hdcp_scm_wr(hdcp_ctrl, reg, data, 2);
 
 	return rc;
 }
 
-static int hdmi_hdcp_recv_bcaps(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_recv_bcaps(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	int rc = 0;
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	u32 reg, data;
 	u8 bcaps;
 
-	rc = hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x40, &bcaps, 1);
+	rc = msm_hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x40, &bcaps, 1);
 	if (rc) {
 		pr_err("%s:BCAPS read failed\n", __func__);
 		return rc;
@@ -753,26 +753,26 @@ static int hdmi_hdcp_recv_bcaps(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	/* Write BCAPS to the hardware */
 	reg = REG_HDMI_HDCP_RCVPORT_DATA12;
 	data = (u32)bcaps;
-	rc = hdmi_hdcp_scm_wr(hdcp_ctrl, &reg, &data, 1);
+	rc = msm_hdmi_hdcp_scm_wr(hdcp_ctrl, &reg, &data, 1);
 
 	return rc;
 }
 
-static int hdmi_hdcp_auth_part1_key_exchange(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_auth_part1_key_exchange(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	unsigned long flags;
 	int rc;
 
 	/* Wait for AKSV key and An ready */
-	rc = hdmi_hdcp_wait_key_an_ready(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_wait_key_an_ready(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s: wait key and an ready failed\n", __func__);
 		return rc;
 	};
 
 	/* Read BCAPS and send to HDCP engine */
-	rc = hdmi_hdcp_recv_bcaps(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_recv_bcaps(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s: read bcaps error, abort\n", __func__);
 		return rc;
@@ -785,14 +785,14 @@ static int hdmi_hdcp_auth_part1_key_exchange(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	hdmi_write(hdmi, REG_HDMI_HDCP_RCVPORT_DATA4, 0);
 
 	/* Send AKSV and An to sink */
-	rc = hdmi_hdcp_send_aksv_an(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_send_aksv_an(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s:An/Aksv write failed\n", __func__);
 		return rc;
 	}
 
 	/* Read BKSV and send to HDCP engine*/
-	rc = hdmi_hdcp_recv_bksv(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_recv_bksv(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s:BKSV Process failed\n", __func__);
 		return rc;
@@ -812,7 +812,7 @@ static int hdmi_hdcp_auth_part1_key_exchange(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 }
 
 /* read R0' from sink and pass it to HDCP engine */
-static int hdmi_hdcp_auth_part1_recv_r0(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_auth_part1_recv_r0(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	int rc = 0;
@@ -822,12 +822,12 @@ static int hdmi_hdcp_auth_part1_recv_r0(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	 * HDCP Compliance Test case 1A-01:
 	 * Wait here at least 100ms before reading R0'
 	 */
-	rc = hdmi_hdcp_msleep(hdcp_ctrl, 125, AUTH_ABORT_EV);
+	rc = msm_hdmi_hdcp_msleep(hdcp_ctrl, 125, AUTH_ABORT_EV);
 	if (rc)
 		return rc;
 
 	/* Read R0' at offset 0x08 */
-	rc = hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x08, buf, 2);
+	rc = msm_hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x08, buf, 2);
 	if (rc) {
 		pr_err("%s:R0' read failed\n", __func__);
 		return rc;
@@ -842,14 +842,14 @@ static int hdmi_hdcp_auth_part1_recv_r0(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 }
 
 /* Wait for authenticating result: R0/R0' are matched or not */
-static int hdmi_hdcp_auth_part1_verify_r0(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_auth_part1_verify_r0(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	u32 link0_status;
 	int rc;
 
 	/* wait for hdcp irq, 10 sec should be long enough */
-	rc = hdmi_hdcp_msleep(hdcp_ctrl, 10000, AUTH_RESULT_RDY_EV);
+	rc = msm_hdmi_hdcp_msleep(hdcp_ctrl, 10000, AUTH_RESULT_RDY_EV);
 	if (!rc) {
 		pr_err("%s: Wait Auth IRQ timeout\n", __func__);
 		return -ETIMEDOUT;
@@ -869,7 +869,7 @@ static int hdmi_hdcp_auth_part1_verify_r0(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	return 0;
 }
 
-static int hdmi_hdcp_recv_check_bstatus(struct hdmi_hdcp_ctrl *hdcp_ctrl,
+static int msm_hdmi_hdcp_recv_check_bstatus(struct hdmi_hdcp_ctrl *hdcp_ctrl,
 	u16 *pbstatus)
 {
 	int rc;
@@ -880,7 +880,7 @@ static int hdmi_hdcp_recv_check_bstatus(struct hdmi_hdcp_ctrl *hdcp_ctrl,
 	u8 buf[2];
 
 	/* Read BSTATUS at offset 0x41 */
-	rc = hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x41, buf, 2);
+	rc = msm_hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x41, buf, 2);
 	if (rc) {
 		pr_err("%s: BSTATUS read failed\n", __func__);
 		goto error;
@@ -936,7 +936,7 @@ error:
 	return rc;
 }
 
-static int hdmi_hdcp_auth_part2_wait_ksv_fifo_ready(
+static int msm_hdmi_hdcp_auth_part2_wait_ksv_fifo_ready(
 	struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	int rc;
@@ -953,7 +953,7 @@ static int hdmi_hdcp_auth_part2_wait_ksv_fifo_ready(
 	timeout_count = 100;
 	do {
 		/* Read BCAPS at offset 0x40 */
-		rc = hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x40, &bcaps, 1);
+		rc = msm_hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x40, &bcaps, 1);
 		if (rc) {
 			pr_err("%s: BCAPS read failed\n", __func__);
 			return rc;
@@ -968,12 +968,12 @@ static int hdmi_hdcp_auth_part2_wait_ksv_fifo_ready(
 			return -ETIMEDOUT;
 		}
 
-		rc = hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
+		rc = msm_hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
 		if (rc)
 			return rc;
 	} while (1);
 
-	rc = hdmi_hdcp_recv_check_bstatus(hdcp_ctrl, &bstatus);
+	rc = msm_hdmi_hdcp_recv_check_bstatus(hdcp_ctrl, &bstatus);
 	if (rc) {
 		pr_err("%s: bstatus error\n", __func__);
 		return rc;
@@ -982,7 +982,7 @@ static int hdmi_hdcp_auth_part2_wait_ksv_fifo_ready(
 	/* Write BSTATUS and BCAPS to HDCP registers */
 	reg = REG_HDMI_HDCP_RCVPORT_DATA12;
 	data = bcaps | (bstatus << 8);
-	rc = hdmi_hdcp_scm_wr(hdcp_ctrl, &reg, &data, 1);
+	rc = msm_hdmi_hdcp_scm_wr(hdcp_ctrl, &reg, &data, 1);
 	if (rc) {
 		pr_err("%s: BSTATUS write failed\n", __func__);
 		return rc;
@@ -997,7 +997,7 @@ static int hdmi_hdcp_auth_part2_wait_ksv_fifo_ready(
  * transfer V' from sink to HDCP engine
  * reset SHA engine
  */
-static int hdmi_hdcp_transfer_v_h(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_transfer_v_h(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	int rc = 0;
@@ -1016,7 +1016,7 @@ static int hdmi_hdcp_transfer_v_h(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 
 	for (i = 0; i < size; i++) {
 		rd = &reg_data[i];
-		rc = hdmi_ddc_read(hdmi, HDCP_PORT_ADDR,
+		rc = msm_hdmi_ddc_read(hdmi, HDCP_PORT_ADDR,
 			rd->off, (u8 *)&data[i], (u16)sizeof(data[i]));
 		if (rc) {
 			pr_err("%s: Read %s failed\n", __func__, rd->name);
@@ -1027,13 +1027,13 @@ static int hdmi_hdcp_transfer_v_h(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 		reg[i] = reg_data[i].reg_id;
 	}
 
-	rc = hdmi_hdcp_scm_wr(hdcp_ctrl, reg, data, size);
+	rc = msm_hdmi_hdcp_scm_wr(hdcp_ctrl, reg, data, size);
 
 error:
 	return rc;
 }
 
-static int hdmi_hdcp_recv_ksv_fifo(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_recv_ksv_fifo(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	int rc;
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
@@ -1041,7 +1041,7 @@ static int hdmi_hdcp_recv_ksv_fifo(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 
 	ksv_bytes = 5 * hdcp_ctrl->dev_count;
 
-	rc = hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x43,
+	rc = msm_hdmi_ddc_read(hdmi, HDCP_PORT_ADDR, 0x43,
 		hdcp_ctrl->ksv_list, ksv_bytes);
 	if (rc)
 		pr_err("%s: KSV FIFO read failed\n", __func__);
@@ -1049,7 +1049,7 @@ static int hdmi_hdcp_recv_ksv_fifo(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	return rc;
 }
 
-static int hdmi_hdcp_reset_sha_engine(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_reset_sha_engine(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	u32 reg[2], data[2];
 	u32 rc  = 0;
@@ -1059,12 +1059,12 @@ static int hdmi_hdcp_reset_sha_engine(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	reg[1] = REG_HDMI_HDCP_SHA_CTRL;
 	data[1] = HDCP_REG_DISABLE;
 
-	rc = hdmi_hdcp_scm_wr(hdcp_ctrl, reg, data, 2);
+	rc = msm_hdmi_hdcp_scm_wr(hdcp_ctrl, reg, data, 2);
 
 	return rc;
 }
 
-static int hdmi_hdcp_auth_part2_recv_ksv_fifo(
+static int msm_hdmi_hdcp_auth_part2_recv_ksv_fifo(
 	struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	int rc;
@@ -1081,7 +1081,7 @@ static int hdmi_hdcp_auth_part2_recv_ksv_fifo(
 	 */
 	timeout_count = 100;
 	do {
-		rc = hdmi_hdcp_recv_ksv_fifo(hdcp_ctrl);
+		rc = msm_hdmi_hdcp_recv_ksv_fifo(hdcp_ctrl);
 		if (!rc)
 			break;
 
@@ -1091,19 +1091,19 @@ static int hdmi_hdcp_auth_part2_recv_ksv_fifo(
 			return -ETIMEDOUT;
 		}
 
-		rc = hdmi_hdcp_msleep(hdcp_ctrl, 25, AUTH_ABORT_EV);
+		rc = msm_hdmi_hdcp_msleep(hdcp_ctrl, 25, AUTH_ABORT_EV);
 		if (rc)
 			return rc;
 	} while (1);
 
-	rc = hdmi_hdcp_transfer_v_h(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_transfer_v_h(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s: transfer V failed\n", __func__);
 		return rc;
 	}
 
 	/* reset SHA engine before write ksv fifo */
-	rc = hdmi_hdcp_reset_sha_engine(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_reset_sha_engine(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s: fail to reset sha engine\n", __func__);
 		return rc;
@@ -1120,7 +1120,7 @@ static int hdmi_hdcp_auth_part2_recv_ksv_fifo(
  * If the last byte is written, we need to poll for
  * HDCP_SHA_COMP_DONE to wait until HW finish
  */
-static int hdmi_hdcp_write_ksv_fifo(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_write_ksv_fifo(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	int i;
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
@@ -1169,7 +1169,7 @@ static int hdmi_hdcp_write_ksv_fifo(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 
 		reg = REG_HDMI_HDCP_SHA_DATA;
 		data = reg_val;
-		rc = hdmi_hdcp_scm_wr(hdcp_ctrl, &reg, &data, 1);
+		rc = msm_hdmi_hdcp_scm_wr(hdcp_ctrl, &reg, &data, 1);
 
 		if (rc)
 			return rc;
@@ -1184,7 +1184,7 @@ static int hdmi_hdcp_write_ksv_fifo(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 }
 
 /* write ksv fifo into HDCP engine */
-static int hdmi_hdcp_auth_part2_write_ksv_fifo(
+static int msm_hdmi_hdcp_auth_part2_write_ksv_fifo(
 	struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	int rc;
@@ -1193,7 +1193,7 @@ static int hdmi_hdcp_auth_part2_write_ksv_fifo(
 	hdcp_ctrl->ksv_fifo_w_index = 0;
 	timeout_count = 100;
 	do {
-		rc = hdmi_hdcp_write_ksv_fifo(hdcp_ctrl);
+		rc = msm_hdmi_hdcp_write_ksv_fifo(hdcp_ctrl);
 		if (!rc)
 			break;
 
@@ -1206,7 +1206,7 @@ static int hdmi_hdcp_auth_part2_write_ksv_fifo(
 			return -ETIMEDOUT;
 		}
 
-		rc = hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
+		rc = msm_hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
 		if (rc)
 			return rc;
 	} while (1);
@@ -1214,7 +1214,7 @@ static int hdmi_hdcp_auth_part2_write_ksv_fifo(
 	return 0;
 }
 
-static int hdmi_hdcp_auth_part2_check_v_match(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+static int msm_hdmi_hdcp_auth_part2_check_v_match(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	int rc = 0;
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
@@ -1232,7 +1232,7 @@ static int hdmi_hdcp_auth_part2_check_v_match(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 				return -ETIMEDOUT;
 		}
 
-		rc = hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
+		rc = msm_hdmi_hdcp_msleep(hdcp_ctrl, 20, AUTH_ABORT_EV);
 		if (rc)
 			return rc;
 	} while (1);
@@ -1240,32 +1240,32 @@ static int hdmi_hdcp_auth_part2_check_v_match(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	return 0;
 }
 
-static void hdmi_hdcp_auth_work(struct work_struct *work)
+static void msm_hdmi_hdcp_auth_work(struct work_struct *work)
 {
 	struct hdmi_hdcp_ctrl *hdcp_ctrl = container_of(work,
 		struct hdmi_hdcp_ctrl, hdcp_auth_work);
 	int rc;
 
-	rc = hdmi_hdcp_auth_prepare(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_auth_prepare(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s: auth prepare failed %d\n", __func__, rc);
 		goto end;
 	}
 
 	/* HDCP PartI */
-	rc = hdmi_hdcp_auth_part1_key_exchange(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_auth_part1_key_exchange(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s: key exchange failed %d\n", __func__, rc);
 		goto end;
 	}
 
-	rc = hdmi_hdcp_auth_part1_recv_r0(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_auth_part1_recv_r0(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s: receive r0 failed %d\n", __func__, rc);
 		goto end;
 	}
 
-	rc = hdmi_hdcp_auth_part1_verify_r0(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_auth_part1_verify_r0(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s: verify r0 failed %d\n", __func__, rc);
 		goto end;
@@ -1275,25 +1275,25 @@ static void hdmi_hdcp_auth_work(struct work_struct *work)
 		goto end;
 
 	/* HDCP PartII */
-	rc = hdmi_hdcp_auth_part2_wait_ksv_fifo_ready(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_auth_part2_wait_ksv_fifo_ready(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s: wait ksv fifo ready failed %d\n", __func__, rc);
 		goto end;
 	}
 
-	rc = hdmi_hdcp_auth_part2_recv_ksv_fifo(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_auth_part2_recv_ksv_fifo(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s: recv ksv fifo failed %d\n", __func__, rc);
 		goto end;
 	}
 
-	rc = hdmi_hdcp_auth_part2_write_ksv_fifo(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_auth_part2_write_ksv_fifo(hdcp_ctrl);
 	if (rc) {
 		pr_err("%s: write ksv fifo failed %d\n", __func__, rc);
 		goto end;
 	}
 
-	rc = hdmi_hdcp_auth_part2_check_v_match(hdcp_ctrl);
+	rc = msm_hdmi_hdcp_auth_part2_check_v_match(hdcp_ctrl);
 	if (rc)
 		pr_err("%s: check v match failed %d\n", __func__, rc);
 
@@ -1304,13 +1304,13 @@ end:
 		pr_info("%s: hdcp is not supported\n", __func__);
 	} else if (rc) {
 		pr_err("%s: hdcp authentication failed\n", __func__);
-		hdmi_hdcp_auth_fail(hdcp_ctrl);
+		msm_hdmi_hdcp_auth_fail(hdcp_ctrl);
 	} else {
-		hdmi_hdcp_auth_done(hdcp_ctrl);
+		msm_hdmi_hdcp_auth_done(hdcp_ctrl);
 	}
 }
 
-void hdmi_hdcp_on(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+void msm_hdmi_hdcp_on(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	u32 reg_val;
@@ -1335,7 +1335,7 @@ void hdmi_hdcp_on(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	queue_work(hdmi->workq, &hdcp_ctrl->hdcp_auth_work);
 }
 
-void hdmi_hdcp_off(struct hdmi_hdcp_ctrl *hdcp_ctrl)
+void msm_hdmi_hdcp_off(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 {
 	struct hdmi *hdmi = hdcp_ctrl->hdmi;
 	unsigned long flags;
@@ -1399,7 +1399,7 @@ void hdmi_hdcp_off(struct hdmi_hdcp_ctrl *hdcp_ctrl)
 	DBG("HDCP: Off");
 }
 
-struct hdmi_hdcp_ctrl *hdmi_hdcp_init(struct hdmi *hdmi)
+struct hdmi_hdcp_ctrl *msm_hdmi_hdcp_init(struct hdmi *hdmi)
 {
 	struct hdmi_hdcp_ctrl *hdcp_ctrl = NULL;
 
@@ -1413,8 +1413,8 @@ struct hdmi_hdcp_ctrl *hdmi_hdcp_init(struct hdmi *hdmi)
 	if (!hdcp_ctrl)
 		return ERR_PTR(-ENOMEM);
 
-	INIT_WORK(&hdcp_ctrl->hdcp_auth_work, hdmi_hdcp_auth_work);
-	INIT_WORK(&hdcp_ctrl->hdcp_reauth_work, hdmi_hdcp_reauth_work);
+	INIT_WORK(&hdcp_ctrl->hdcp_auth_work, msm_hdmi_hdcp_auth_work);
+	INIT_WORK(&hdcp_ctrl->hdcp_reauth_work, msm_hdmi_hdcp_reauth_work);
 	init_waitqueue_head(&hdcp_ctrl->auth_event_queue);
 	hdcp_ctrl->hdmi = hdmi;
 	hdcp_ctrl->hdcp_state = HDCP_STATE_INACTIVE;
@@ -1428,7 +1428,7 @@ struct hdmi_hdcp_ctrl *hdmi_hdcp_init(struct hdmi *hdmi)
 	return hdcp_ctrl;
 }
 
-void hdmi_hdcp_destroy(struct hdmi *hdmi)
+void msm_hdmi_hdcp_destroy(struct hdmi *hdmi)
 {
 	if (hdmi && hdmi->hdcp_ctrl) {
 		kfree(hdmi->hdcp_ctrl);
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c b/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c
index f4ab7f70fed1..de9007e72f4e 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c
@@ -97,7 +97,7 @@ static bool sw_done(struct hdmi_i2c_adapter *hdmi_i2c)
 	return hdmi_i2c->sw_done;
 }
 
-static int hdmi_i2c_xfer(struct i2c_adapter *i2c,
+static int msm_hdmi_i2c_xfer(struct i2c_adapter *i2c,
 		struct i2c_msg *msgs, int num)
 {
 	struct hdmi_i2c_adapter *hdmi_i2c = to_hdmi_i2c_adapter(i2c);
@@ -216,17 +216,17 @@ static int hdmi_i2c_xfer(struct i2c_adapter *i2c,
 	return i;
 }
 
-static u32 hdmi_i2c_func(struct i2c_adapter *adapter)
+static u32 msm_hdmi_i2c_func(struct i2c_adapter *adapter)
 {
 	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
 }
 
-static const struct i2c_algorithm hdmi_i2c_algorithm = {
-	.master_xfer	= hdmi_i2c_xfer,
-	.functionality	= hdmi_i2c_func,
+static const struct i2c_algorithm msm_hdmi_i2c_algorithm = {
+	.master_xfer	= msm_hdmi_i2c_xfer,
+	.functionality	= msm_hdmi_i2c_func,
 };
 
-void hdmi_i2c_irq(struct i2c_adapter *i2c)
+void msm_hdmi_i2c_irq(struct i2c_adapter *i2c)
 {
 	struct hdmi_i2c_adapter *hdmi_i2c = to_hdmi_i2c_adapter(i2c);
 
@@ -234,14 +234,14 @@ void hdmi_i2c_irq(struct i2c_adapter *i2c)
 		wake_up_all(&hdmi_i2c->ddc_event);
 }
 
-void hdmi_i2c_destroy(struct i2c_adapter *i2c)
+void msm_hdmi_i2c_destroy(struct i2c_adapter *i2c)
 {
 	struct hdmi_i2c_adapter *hdmi_i2c = to_hdmi_i2c_adapter(i2c);
 	i2c_del_adapter(i2c);
 	kfree(hdmi_i2c);
 }
 
-struct i2c_adapter *hdmi_i2c_init(struct hdmi *hdmi)
+struct i2c_adapter *msm_hdmi_i2c_init(struct hdmi *hdmi)
 {
 	struct drm_device *dev = hdmi->dev;
 	struct hdmi_i2c_adapter *hdmi_i2c;
@@ -264,7 +264,7 @@ struct i2c_adapter *hdmi_i2c_init(struct hdmi *hdmi)
 	i2c->class = I2C_CLASS_DDC;
 	snprintf(i2c->name, sizeof(i2c->name), "msm hdmi i2c");
 	i2c->dev.parent = &hdmi->pdev->dev;
-	i2c->algo = &hdmi_i2c_algorithm;
+	i2c->algo = &msm_hdmi_i2c_algorithm;
 
 	ret = i2c_add_adapter(i2c);
 	if (ret) {
@@ -276,6 +276,6 @@ struct i2c_adapter *hdmi_i2c_init(struct hdmi *hdmi)
 
 fail:
 	if (i2c)
-		hdmi_i2c_destroy(i2c);
+		msm_hdmi_i2c_destroy(i2c);
 	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c
new file mode 100644
index 000000000000..534ce5b49781
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/of_device.h>
+
+#include "hdmi.h"
+
+static int msm_hdmi_phy_resource_init(struct hdmi_phy *phy)
+{
+	struct hdmi_phy_cfg *cfg = phy->cfg;
+	struct device *dev = &phy->pdev->dev;
+	int i, ret;
+
+	phy->regs = devm_kzalloc(dev, sizeof(phy->regs[0]) * cfg->num_regs,
+				 GFP_KERNEL);
+	if (!phy->regs)
+		return -ENOMEM;
+
+	phy->clks = devm_kzalloc(dev, sizeof(phy->clks[0]) * cfg->num_clks,
+				 GFP_KERNEL);
+	if (!phy->clks)
+		return -ENOMEM;
+
+	for (i = 0; i < cfg->num_regs; i++) {
+		struct regulator *reg;
+
+		reg = devm_regulator_get(dev, cfg->reg_names[i]);
+		if (IS_ERR(reg)) {
+			ret = PTR_ERR(reg);
+			dev_err(dev, "failed to get phy regulator: %s (%d)\n",
+				cfg->reg_names[i], ret);
+			return ret;
+		}
+
+		phy->regs[i] = reg;
+	}
+
+	for (i = 0; i < cfg->num_clks; i++) {
+		struct clk *clk;
+
+		clk = devm_clk_get(dev, cfg->clk_names[i]);
+		if (IS_ERR(clk)) {
+			ret = PTR_ERR(clk);
+			dev_err(dev, "failed to get phy clock: %s (%d)\n",
+				cfg->clk_names[i], ret);
+			return ret;
+		}
+
+		phy->clks[i] = clk;
+	}
+
+	return 0;
+}
+
+int msm_hdmi_phy_resource_enable(struct hdmi_phy *phy)
+{
+	struct hdmi_phy_cfg *cfg = phy->cfg;
+	struct device *dev = &phy->pdev->dev;
+	int i, ret = 0;
+
+	pm_runtime_get_sync(dev);
+
+	for (i = 0; i < cfg->num_regs; i++) {
+		ret = regulator_enable(phy->regs[i]);
+		if (ret)
+			dev_err(dev, "failed to enable regulator: %s (%d)\n",
+				cfg->reg_names[i], ret);
+	}
+
+	for (i = 0; i < cfg->num_clks; i++) {
+		ret = clk_prepare_enable(phy->clks[i]);
+		if (ret)
+			dev_err(dev, "failed to enable clock: %s (%d)\n",
+				cfg->clk_names[i], ret);
+	}
+
+	return ret;
+}
+
+void msm_hdmi_phy_resource_disable(struct hdmi_phy *phy)
+{
+	struct hdmi_phy_cfg *cfg = phy->cfg;
+	struct device *dev = &phy->pdev->dev;
+	int i;
+
+	for (i = cfg->num_clks - 1; i >= 0; i--)
+		clk_disable_unprepare(phy->clks[i]);
+
+	for (i = cfg->num_regs - 1; i >= 0; i--)
+		regulator_disable(phy->regs[i]);
+
+	pm_runtime_put_sync(dev);
+}
+
+void msm_hdmi_phy_powerup(struct hdmi_phy *phy, unsigned long int pixclock)
+{
+	if (!phy || !phy->cfg->powerup)
+		return;
+
+	phy->cfg->powerup(phy, pixclock);
+}
+
+void msm_hdmi_phy_powerdown(struct hdmi_phy *phy)
+{
+	if (!phy || !phy->cfg->powerdown)
+		return;
+
+	phy->cfg->powerdown(phy);
+}
+
+static int msm_hdmi_phy_pll_init(struct platform_device *pdev,
+			     enum hdmi_phy_type type)
+{
+	int ret;
+
+	switch (type) {
+	case MSM_HDMI_PHY_8960:
+		ret = msm_hdmi_pll_8960_init(pdev);
+		break;
+	case MSM_HDMI_PHY_8996:
+		ret = msm_hdmi_pll_8996_init(pdev);
+		break;
+	/*
+	 * we don't have PLL support for these, don't report an error for now
+	 */
+	case MSM_HDMI_PHY_8x60:
+	case MSM_HDMI_PHY_8x74:
+	default:
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
+static int msm_hdmi_phy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct hdmi_phy *phy;
+	int ret;
+
+	phy = devm_kzalloc(dev, sizeof(*phy), GFP_KERNEL);
+	if (!phy)
+		return -ENODEV;
+
+	phy->cfg = (struct hdmi_phy_cfg *)of_device_get_match_data(dev);
+	if (!phy->cfg)
+		return -ENODEV;
+
+	phy->mmio = msm_ioremap(pdev, "hdmi_phy", "HDMI_PHY");
+	if (IS_ERR(phy->mmio)) {
+		dev_err(dev, "%s: failed to map phy base\n", __func__);
+		return -ENOMEM;
+	}
+
+	phy->pdev = pdev;
+
+	ret = msm_hdmi_phy_resource_init(phy);
+	if (ret)
+		return ret;
+
+	pm_runtime_enable(&pdev->dev);
+
+	ret = msm_hdmi_phy_resource_enable(phy);
+	if (ret)
+		return ret;
+
+	ret = msm_hdmi_phy_pll_init(pdev, phy->cfg->type);
+	if (ret) {
+		dev_err(dev, "couldn't init PLL\n");
+		msm_hdmi_phy_resource_disable(phy);
+		return ret;
+	}
+
+	msm_hdmi_phy_resource_disable(phy);
+
+	platform_set_drvdata(pdev, phy);
+
+	return 0;
+}
+
+static int msm_hdmi_phy_remove(struct platform_device *pdev)
+{
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static const struct of_device_id msm_hdmi_phy_dt_match[] = {
+	{ .compatible = "qcom,hdmi-phy-8660",
+	  .data = &msm_hdmi_phy_8x60_cfg },
+	{ .compatible = "qcom,hdmi-phy-8960",
+	  .data = &msm_hdmi_phy_8960_cfg },
+	{ .compatible = "qcom,hdmi-phy-8974",
+	  .data = &msm_hdmi_phy_8x74_cfg },
+	{ .compatible = "qcom,hdmi-phy-8084",
+	  .data = &msm_hdmi_phy_8x74_cfg },
+	{ .compatible = "qcom,hdmi-phy-8996",
+	  .data = &msm_hdmi_phy_8996_cfg },
+	{}
+};
+
+static struct platform_driver msm_hdmi_phy_platform_driver = {
+	.probe      = msm_hdmi_phy_probe,
+	.remove     = msm_hdmi_phy_remove,
+	.driver     = {
+		.name   = "msm_hdmi_phy",
+		.of_match_table = msm_hdmi_phy_dt_match,
+	},
+};
+
+void __init msm_hdmi_phy_driver_register(void)
+{
+	platform_driver_register(&msm_hdmi_phy_platform_driver);
+}
+
+void __exit msm_hdmi_phy_driver_unregister(void)
+{
+	platform_driver_unregister(&msm_hdmi_phy_platform_driver);
+}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c
index 3a01cb5051e2..e6ee6b745ab7 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c
@@ -15,495 +15,48 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifdef CONFIG_COMMON_CLK
-#include <linux/clk.h>
-#include <linux/clk-provider.h>
-#endif
-
 #include "hdmi.h"
 
-struct hdmi_phy_8960 {
-	struct hdmi_phy base;
-	struct hdmi *hdmi;
-#ifdef CONFIG_COMMON_CLK
-	struct clk_hw pll_hw;
-	struct clk *pll;
-	unsigned long pixclk;
-#endif
-};
-#define to_hdmi_phy_8960(x) container_of(x, struct hdmi_phy_8960, base)
-
-#ifdef CONFIG_COMMON_CLK
-#define clk_to_phy(x) container_of(x, struct hdmi_phy_8960, pll_hw)
-
-/*
- * HDMI PLL:
- *
- * To get the parent clock setup properly, we need to plug in hdmi pll
- * configuration into common-clock-framework.
- */
-
-struct pll_rate {
-	unsigned long rate;
-	struct {
-		uint32_t val;
-		uint32_t reg;
-	} conf[32];
-};
-
-/* NOTE: keep sorted highest freq to lowest: */
-static const struct pll_rate freqtbl[] = {
-	{ 154000000, {
-		{ 0x08, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
-		{ 0x20, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
-		{ 0xf9, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
-		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
-		{ 0x03, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
-		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
-		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
-		{ 0x0d, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
-		{ 0x4d, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
-		{ 0x5e, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
-		{ 0x42, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
-		{ 0, 0 } }
-	},
-	/* 1080p60/1080p50 case */
-	{ 148500000, {
-		{ 0x02, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
-		{ 0x02, REG_HDMI_8960_PHY_PLL_CHRG_PUMP_CFG },
-		{ 0x01, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
-		{ 0x33, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
-		{ 0x2c, REG_HDMI_8960_PHY_PLL_IDAC_ADJ_CFG  },
-		{ 0x06, REG_HDMI_8960_PHY_PLL_I_VI_KVCO_CFG },
-		{ 0x0a, REG_HDMI_8960_PHY_PLL_PWRDN_B       },
-		{ 0x76, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
-		{ 0x01, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
-		{ 0x4c, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
-		{ 0xc0, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
-		{ 0x9a, REG_HDMI_8960_PHY_PLL_SSC_CFG0      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG1      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG2      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG3      },
-		{ 0x10, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG0  },
-		{ 0x1a, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG1  },
-		{ 0x0d, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2  },
-		{ 0xe6, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
-		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
-		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
-		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
-		{ 0x33, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG6   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG7   },
-		{ 0, 0 } }
-	},
-	{ 108000000, {
-		{ 0x08, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
-		{ 0x21, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
-		{ 0xf9, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
-		{ 0x1c, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
-		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
-		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
-		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
-		{ 0x49, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
-		{ 0x49, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
-		{ 0, 0 } }
-	},
-	/* 720p60/720p50/1080i60/1080i50/1080p24/1080p30/1080p25 */
-	{ 74250000, {
-		{ 0x0a, REG_HDMI_8960_PHY_PLL_PWRDN_B       },
-		{ 0x12, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
-		{ 0x01, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
-		{ 0x33, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
-		{ 0x76, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
-		{ 0xe6, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
-		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
-		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
-		{ 0, 0 } }
-	},
-	{ 74176000, {
-		{ 0x18, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
-		{ 0x20, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
-		{ 0xf9, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
-		{ 0xe5, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
-		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
-		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
-		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
-		{ 0x0c, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
-		{ 0x4c, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
-		{ 0x7d, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
-		{ 0xbc, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
-		{ 0, 0 } }
-	},
-	{ 65000000, {
-		{ 0x18, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
-		{ 0x20, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
-		{ 0xf9, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
-		{ 0x8a, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
-		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
-		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
-		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
-		{ 0x0b, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
-		{ 0x4b, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
-		{ 0x7b, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
-		{ 0x09, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
-		{ 0, 0 } }
-	},
-	/* 480p60/480i60 */
-	{ 27030000, {
-		{ 0x0a, REG_HDMI_8960_PHY_PLL_PWRDN_B       },
-		{ 0x38, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
-		{ 0x02, REG_HDMI_8960_PHY_PLL_CHRG_PUMP_CFG },
-		{ 0x20, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
-		{ 0xff, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
-		{ 0x4e, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
-		{ 0xd7, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
-		{ 0x03, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
-		{ 0x2a, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
-		{ 0x03, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
-		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
-		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
-		{ 0x33, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG6   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG7   },
-		{ 0, 0 } }
-	},
-	/* 576p50/576i50 */
-	{ 27000000, {
-		{ 0x32, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
-		{ 0x02, REG_HDMI_8960_PHY_PLL_CHRG_PUMP_CFG },
-		{ 0x01, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
-		{ 0x33, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
-		{ 0x2c, REG_HDMI_8960_PHY_PLL_IDAC_ADJ_CFG  },
-		{ 0x06, REG_HDMI_8960_PHY_PLL_I_VI_KVCO_CFG },
-		{ 0x0a, REG_HDMI_8960_PHY_PLL_PWRDN_B       },
-		{ 0x7b, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
-		{ 0x01, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
-		{ 0x4c, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
-		{ 0xc0, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
-		{ 0x9a, REG_HDMI_8960_PHY_PLL_SSC_CFG0      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG1      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG2      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG3      },
-		{ 0x10, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG0  },
-		{ 0x1a, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG1  },
-		{ 0x0d, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2  },
-		{ 0x2a, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
-		{ 0x03, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
-		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
-		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
-		{ 0x33, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG6   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG7   },
-		{ 0, 0 } }
-	},
-	/* 640x480p60 */
-	{ 25200000, {
-		{ 0x32, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
-		{ 0x02, REG_HDMI_8960_PHY_PLL_CHRG_PUMP_CFG },
-		{ 0x01, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
-		{ 0x33, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
-		{ 0x2c, REG_HDMI_8960_PHY_PLL_IDAC_ADJ_CFG  },
-		{ 0x06, REG_HDMI_8960_PHY_PLL_I_VI_KVCO_CFG },
-		{ 0x0a, REG_HDMI_8960_PHY_PLL_PWRDN_B       },
-		{ 0x77, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
-		{ 0x4c, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
-		{ 0xc0, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
-		{ 0x9a, REG_HDMI_8960_PHY_PLL_SSC_CFG0      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG1      },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG2      },
-		{ 0x20, REG_HDMI_8960_PHY_PLL_SSC_CFG3      },
-		{ 0x10, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG0  },
-		{ 0x1a, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG1  },
-		{ 0x0d, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2  },
-		{ 0xf4, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
-		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
-		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
-		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
-		{ 0x33, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG6   },
-		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG7   },
-		{ 0, 0 } }
-	},
-};
-
-static int hdmi_pll_enable(struct clk_hw *hw)
-{
-	struct hdmi_phy_8960 *phy_8960 = clk_to_phy(hw);
-	struct hdmi *hdmi = phy_8960->hdmi;
-	int timeout_count, pll_lock_retry = 10;
-	unsigned int val;
-
-	DBG("");
-
-	/* Assert PLL S/W reset */
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2, 0x8d);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG0, 0x10);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG1, 0x1a);
-
-	/* Wait for a short time before de-asserting
-	 * to allow the hardware to complete its job.
-	 * This much of delay should be fine for hardware
-	 * to assert and de-assert.
-	 */
-	udelay(10);
-
-	/* De-assert PLL S/W reset */
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2, 0x0d);
-
-	val = hdmi_read(hdmi, REG_HDMI_8960_PHY_REG12);
-	val |= HDMI_8960_PHY_REG12_SW_RESET;
-	/* Assert PHY S/W reset */
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG12, val);
-	val &= ~HDMI_8960_PHY_REG12_SW_RESET;
-	/* Wait for a short time before de-asserting
-	   to allow the hardware to complete its job.
-	   This much of delay should be fine for hardware
-	   to assert and de-assert. */
-	udelay(10);
-	/* De-assert PHY S/W reset */
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG12, val);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG2,  0x3f);
-
-	val = hdmi_read(hdmi, REG_HDMI_8960_PHY_REG12);
-	val |= HDMI_8960_PHY_REG12_PWRDN_B;
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG12, val);
-	/* Wait 10 us for enabling global power for PHY */
-	mb();
-	udelay(10);
-
-	val = hdmi_read(hdmi, REG_HDMI_8960_PHY_PLL_PWRDN_B);
-	val |= HDMI_8960_PHY_PLL_PWRDN_B_PLL_PWRDN_B;
-	val &= ~HDMI_8960_PHY_PLL_PWRDN_B_PD_PLL;
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_PLL_PWRDN_B, val);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG2, 0x80);
-
-	timeout_count = 1000;
-	while (--pll_lock_retry > 0) {
-
-		/* are we there yet? */
-		val = hdmi_read(hdmi, REG_HDMI_8960_PHY_PLL_STATUS0);
-		if (val & HDMI_8960_PHY_PLL_STATUS0_PLL_LOCK)
-			break;
-
-		udelay(1);
-
-		if (--timeout_count > 0)
-			continue;
-
-		/*
-		 * PLL has still not locked.
-		 * Do a software reset and try again
-		 * Assert PLL S/W reset first
-		 */
-		hdmi_write(hdmi, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2, 0x8d);
-		udelay(10);
-		hdmi_write(hdmi, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2, 0x0d);
-
-		/*
-		 * Wait for a short duration for the PLL calibration
-		 * before checking if the PLL gets locked
-		 */
-		udelay(350);
-
-		timeout_count = 1000;
-	}
-
-	return 0;
-}
-
-static void hdmi_pll_disable(struct clk_hw *hw)
-{
-	struct hdmi_phy_8960 *phy_8960 = clk_to_phy(hw);
-	struct hdmi *hdmi = phy_8960->hdmi;
-	unsigned int val;
-
-	DBG("");
-
-	val = hdmi_read(hdmi, REG_HDMI_8960_PHY_REG12);
-	val &= ~HDMI_8960_PHY_REG12_PWRDN_B;
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG12, val);
-
-	val = hdmi_read(hdmi, REG_HDMI_8960_PHY_PLL_PWRDN_B);
-	val |= HDMI_8960_PHY_REG12_SW_RESET;
-	val &= ~HDMI_8960_PHY_REG12_PWRDN_B;
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_PLL_PWRDN_B, val);
-	/* Make sure HDMI PHY/PLL are powered down */
-	mb();
-}
-
-static const struct pll_rate *find_rate(unsigned long rate)
-{
-	int i;
-	for (i = 1; i < ARRAY_SIZE(freqtbl); i++)
-		if (rate > freqtbl[i].rate)
-			return &freqtbl[i-1];
-	return &freqtbl[i-1];
-}
-
-static unsigned long hdmi_pll_recalc_rate(struct clk_hw *hw,
-				unsigned long parent_rate)
-{
-	struct hdmi_phy_8960 *phy_8960 = clk_to_phy(hw);
-	return phy_8960->pixclk;
-}
-
-static long hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *parent_rate)
-{
-	const struct pll_rate *pll_rate = find_rate(rate);
-	return pll_rate->rate;
-}
-
-static int hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long parent_rate)
-{
-	struct hdmi_phy_8960 *phy_8960 = clk_to_phy(hw);
-	struct hdmi *hdmi = phy_8960->hdmi;
-	const struct pll_rate *pll_rate = find_rate(rate);
-	int i;
-
-	DBG("rate=%lu", rate);
-
-	for (i = 0; pll_rate->conf[i].reg; i++)
-		hdmi_write(hdmi, pll_rate->conf[i].reg, pll_rate->conf[i].val);
-
-	phy_8960->pixclk = rate;
-
-	return 0;
-}
-
-
-static const struct clk_ops hdmi_pll_ops = {
-	.enable = hdmi_pll_enable,
-	.disable = hdmi_pll_disable,
-	.recalc_rate = hdmi_pll_recalc_rate,
-	.round_rate = hdmi_pll_round_rate,
-	.set_rate = hdmi_pll_set_rate,
-};
-
-static const char *hdmi_pll_parents[] = {
-	"pxo",
-};
-
-static struct clk_init_data pll_init = {
-	.name = "hdmi_pll",
-	.ops = &hdmi_pll_ops,
-	.parent_names = hdmi_pll_parents,
-	.num_parents = ARRAY_SIZE(hdmi_pll_parents),
-};
-#endif
-
-/*
- * HDMI Phy:
- */
-
-static void hdmi_phy_8960_destroy(struct hdmi_phy *phy)
-{
-	struct hdmi_phy_8960 *phy_8960 = to_hdmi_phy_8960(phy);
-	kfree(phy_8960);
-}
-
 static void hdmi_phy_8960_powerup(struct hdmi_phy *phy,
-		unsigned long int pixclock)
+				  unsigned long int pixclock)
 {
-	struct hdmi_phy_8960 *phy_8960 = to_hdmi_phy_8960(phy);
-	struct hdmi *hdmi = phy_8960->hdmi;
-
 	DBG("pixclock: %lu", pixclock);
 
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG2, 0x00);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG0, 0x1b);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG1, 0xf2);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG4, 0x00);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG5, 0x00);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG6, 0x00);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG7, 0x00);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG8, 0x00);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG9, 0x00);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG10, 0x00);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG11, 0x00);
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG3, 0x20);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG2, 0x00);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG0, 0x1b);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG1, 0xf2);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG4, 0x00);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG5, 0x00);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG6, 0x00);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG7, 0x00);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG8, 0x00);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG9, 0x00);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG10, 0x00);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG11, 0x00);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG3, 0x20);
 }
 
 static void hdmi_phy_8960_powerdown(struct hdmi_phy *phy)
 {
-	struct hdmi_phy_8960 *phy_8960 = to_hdmi_phy_8960(phy);
-	struct hdmi *hdmi = phy_8960->hdmi;
-
 	DBG("");
 
-	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG2, 0x7f);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG2, 0x7f);
 }
 
-static const struct hdmi_phy_funcs hdmi_phy_8960_funcs = {
-		.destroy = hdmi_phy_8960_destroy,
-		.powerup = hdmi_phy_8960_powerup,
-		.powerdown = hdmi_phy_8960_powerdown,
+static const char * const hdmi_phy_8960_reg_names[] = {
+	"core-vdda",
 };
 
-struct hdmi_phy *hdmi_phy_8960_init(struct hdmi *hdmi)
-{
-	struct hdmi_phy_8960 *phy_8960;
-	struct hdmi_phy *phy = NULL;
-	int ret;
-#ifdef CONFIG_COMMON_CLK
-	int i;
-
-	/* sanity check: */
-	for (i = 0; i < (ARRAY_SIZE(freqtbl) - 1); i++)
-		if (WARN_ON(freqtbl[i].rate < freqtbl[i+1].rate))
-			return ERR_PTR(-EINVAL);
-#endif
-
-	phy_8960 = kzalloc(sizeof(*phy_8960), GFP_KERNEL);
-	if (!phy_8960) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	phy = &phy_8960->base;
-
-	phy->funcs = &hdmi_phy_8960_funcs;
-
-	phy_8960->hdmi = hdmi;
-
-#ifdef CONFIG_COMMON_CLK
-	phy_8960->pll_hw.init = &pll_init;
-	phy_8960->pll = devm_clk_register(&hdmi->pdev->dev, &phy_8960->pll_hw);
-	if (IS_ERR(phy_8960->pll)) {
-		ret = PTR_ERR(phy_8960->pll);
-		phy_8960->pll = NULL;
-		goto fail;
-	}
-#endif
-
-	return phy;
+static const char * const hdmi_phy_8960_clk_names[] = {
+	"slave_iface_clk",
+};
 
-fail:
-	if (phy)
-		hdmi_phy_8960_destroy(phy);
-	return ERR_PTR(ret);
-}
+const struct hdmi_phy_cfg msm_hdmi_phy_8960_cfg = {
+	.type = MSM_HDMI_PHY_8960,
+	.powerup = hdmi_phy_8960_powerup,
+	.powerdown = hdmi_phy_8960_powerdown,
+	.reg_names = hdmi_phy_8960_reg_names,
+	.num_regs = ARRAY_SIZE(hdmi_phy_8960_reg_names),
+	.clk_names = hdmi_phy_8960_clk_names,
+	.num_clks = ARRAY_SIZE(hdmi_phy_8960_clk_names),
+};
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c
new file mode 100644
index 000000000000..aa94a553794f
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c
@@ -0,0 +1,766 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+
+#include "hdmi.h"
+
+#define HDMI_VCO_MAX_FREQ			12000000000UL
+#define HDMI_VCO_MIN_FREQ			8000000000UL
+
+#define HDMI_PCLK_MAX_FREQ			600000000
+#define HDMI_PCLK_MIN_FREQ			25000000
+
+#define HDMI_HIGH_FREQ_BIT_CLK_THRESHOLD	3400000000UL
+#define HDMI_DIG_FREQ_BIT_CLK_THRESHOLD		1500000000UL
+#define HDMI_MID_FREQ_BIT_CLK_THRESHOLD		750000000UL
+#define HDMI_CORECLK_DIV			5
+#define HDMI_DEFAULT_REF_CLOCK			19200000
+#define HDMI_PLL_CMP_CNT			1024
+
+#define HDMI_PLL_POLL_MAX_READS			100
+#define HDMI_PLL_POLL_TIMEOUT_US		150
+
+#define HDMI_NUM_TX_CHANNEL			4
+
+struct hdmi_pll_8996 {
+	struct platform_device *pdev;
+	struct clk_hw clk_hw;
+
+	/* pll mmio base */
+	void __iomem *mmio_qserdes_com;
+	/* tx channel base */
+	void __iomem *mmio_qserdes_tx[HDMI_NUM_TX_CHANNEL];
+};
+
+#define hw_clk_to_pll(x) container_of(x, struct hdmi_pll_8996, clk_hw)
+
+struct hdmi_8996_phy_pll_reg_cfg {
+	u32 tx_lx_lane_mode[HDMI_NUM_TX_CHANNEL];
+	u32 tx_lx_tx_band[HDMI_NUM_TX_CHANNEL];
+	u32 com_svs_mode_clk_sel;
+	u32 com_hsclk_sel;
+	u32 com_pll_cctrl_mode0;
+	u32 com_pll_rctrl_mode0;
+	u32 com_cp_ctrl_mode0;
+	u32 com_dec_start_mode0;
+	u32 com_div_frac_start1_mode0;
+	u32 com_div_frac_start2_mode0;
+	u32 com_div_frac_start3_mode0;
+	u32 com_integloop_gain0_mode0;
+	u32 com_integloop_gain1_mode0;
+	u32 com_lock_cmp_en;
+	u32 com_lock_cmp1_mode0;
+	u32 com_lock_cmp2_mode0;
+	u32 com_lock_cmp3_mode0;
+	u32 com_core_clk_en;
+	u32 com_coreclk_div;
+	u32 com_vco_tune_ctrl;
+
+	u32 tx_lx_tx_drv_lvl[HDMI_NUM_TX_CHANNEL];
+	u32 tx_lx_tx_emp_post1_lvl[HDMI_NUM_TX_CHANNEL];
+	u32 tx_lx_vmode_ctrl1[HDMI_NUM_TX_CHANNEL];
+	u32 tx_lx_vmode_ctrl2[HDMI_NUM_TX_CHANNEL];
+	u32 tx_lx_res_code_lane_tx[HDMI_NUM_TX_CHANNEL];
+	u32 tx_lx_hp_pd_enables[HDMI_NUM_TX_CHANNEL];
+
+	u32 phy_mode;
+};
+
+struct hdmi_8996_post_divider {
+	u64 vco_freq;
+	int hsclk_divsel;
+	int vco_ratio;
+	int tx_band_sel;
+	int half_rate_mode;
+};
+
+static inline struct hdmi_phy *pll_get_phy(struct hdmi_pll_8996 *pll)
+{
+	return platform_get_drvdata(pll->pdev);
+}
+
+static inline void hdmi_pll_write(struct hdmi_pll_8996 *pll, int offset,
+				  u32 data)
+{
+	msm_writel(data, pll->mmio_qserdes_com + offset);
+}
+
+static inline u32 hdmi_pll_read(struct hdmi_pll_8996 *pll, int offset)
+{
+	return msm_readl(pll->mmio_qserdes_com + offset);
+}
+
+static inline void hdmi_tx_chan_write(struct hdmi_pll_8996 *pll, int channel,
+				      int offset, int data)
+{
+	 msm_writel(data, pll->mmio_qserdes_tx[channel] + offset);
+}
+
+static inline u32 pll_get_cpctrl(u64 frac_start, unsigned long ref_clk,
+				 bool gen_ssc)
+{
+	if ((frac_start != 0) || gen_ssc)
+		return (11000000 / (ref_clk / 20));
+
+	return 0x23;
+}
+
+static inline u32 pll_get_rctrl(u64 frac_start, bool gen_ssc)
+{
+	if ((frac_start != 0) || gen_ssc)
+		return 0x16;
+
+	return 0x10;
+}
+
+static inline u32 pll_get_cctrl(u64 frac_start, bool gen_ssc)
+{
+	if ((frac_start != 0) || gen_ssc)
+		return 0x28;
+
+	return 0x1;
+}
+
+static inline u32 pll_get_integloop_gain(u64 frac_start, u64 bclk, u32 ref_clk,
+					 bool gen_ssc)
+{
+	int digclk_divsel = bclk >= HDMI_DIG_FREQ_BIT_CLK_THRESHOLD ? 1 : 2;
+	u64 base;
+
+	if ((frac_start != 0) || gen_ssc)
+		base = (64 * ref_clk) / HDMI_DEFAULT_REF_CLOCK;
+	else
+		base = (1022 * ref_clk) / 100;
+
+	base <<= digclk_divsel;
+
+	return (base <= 2046 ? base : 2046);
+}
+
+static inline u32 pll_get_pll_cmp(u64 fdata, unsigned long ref_clk)
+{
+	u64 dividend = HDMI_PLL_CMP_CNT * fdata;
+	u32 divisor = ref_clk * 10;
+	u32 rem;
+
+	rem = do_div(dividend, divisor);
+	if (rem > (divisor >> 1))
+		dividend++;
+
+	return dividend - 1;
+}
+
+static inline u64 pll_cmp_to_fdata(u32 pll_cmp, unsigned long ref_clk)
+{
+	u64 fdata = ((u64)pll_cmp) * ref_clk * 10;
+
+	do_div(fdata, HDMI_PLL_CMP_CNT);
+
+	return fdata;
+}
+
+static int pll_get_post_div(struct hdmi_8996_post_divider *pd, u64 bclk)
+{
+	int ratio[] = { 2, 3, 4, 5, 6, 9, 10, 12, 14, 15, 20, 21, 25, 28, 35 };
+	int hs_divsel[] = { 0, 4, 8, 12, 1, 5, 2, 9, 3, 13, 10, 7, 14, 11, 15 };
+	int tx_band_sel[] = { 0, 1, 2, 3 };
+	u64 vco_freq[60];
+	u64 vco, vco_optimal;
+	int half_rate_mode = 0;
+	int vco_optimal_index, vco_freq_index;
+	int i, j;
+
+retry:
+	vco_optimal = HDMI_VCO_MAX_FREQ;
+	vco_optimal_index = -1;
+	vco_freq_index = 0;
+	for (i = 0; i < 15; i++) {
+		for (j = 0; j < 4; j++) {
+			u32 ratio_mult = ratio[i] << tx_band_sel[j];
+
+			vco = bclk >> half_rate_mode;
+			vco *= ratio_mult;
+			vco_freq[vco_freq_index++] = vco;
+		}
+	}
+
+	for (i = 0; i < 60; i++) {
+		u64 vco_tmp = vco_freq[i];
+
+		if ((vco_tmp >= HDMI_VCO_MIN_FREQ) &&
+		    (vco_tmp <= vco_optimal)) {
+			vco_optimal = vco_tmp;
+			vco_optimal_index = i;
+		}
+	}
+
+	if (vco_optimal_index == -1) {
+		if (!half_rate_mode) {
+			half_rate_mode = 1;
+			goto retry;
+		}
+	} else {
+		pd->vco_freq = vco_optimal;
+		pd->tx_band_sel = tx_band_sel[vco_optimal_index % 4];
+		pd->vco_ratio = ratio[vco_optimal_index / 4];
+		pd->hsclk_divsel = hs_divsel[vco_optimal_index / 4];
+
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int pll_calculate(unsigned long pix_clk, unsigned long ref_clk,
+			 struct hdmi_8996_phy_pll_reg_cfg *cfg)
+{
+	struct hdmi_8996_post_divider pd;
+	u64 bclk;
+	u64 tmds_clk;
+	u64 dec_start;
+	u64 frac_start;
+	u64 fdata;
+	u32 pll_divisor;
+	u32 rem;
+	u32 cpctrl;
+	u32 rctrl;
+	u32 cctrl;
+	u32 integloop_gain;
+	u32 pll_cmp;
+	int i, ret;
+
+	/* bit clk = 10 * pix_clk */
+	bclk = ((u64)pix_clk) * 10;
+
+	if (bclk > HDMI_HIGH_FREQ_BIT_CLK_THRESHOLD)
+		tmds_clk = pix_clk >> 2;
+	else
+		tmds_clk = pix_clk;
+
+	ret = pll_get_post_div(&pd, bclk);
+	if (ret)
+		return ret;
+
+	dec_start = pd.vco_freq;
+	pll_divisor = 4 * ref_clk;
+	do_div(dec_start, pll_divisor);
+
+	frac_start = pd.vco_freq * (1 << 20);
+
+	rem = do_div(frac_start, pll_divisor);
+	frac_start -= dec_start * (1 << 20);
+	if (rem > (pll_divisor >> 1))
+		frac_start++;
+
+	cpctrl = pll_get_cpctrl(frac_start, ref_clk, false);
+	rctrl = pll_get_rctrl(frac_start, false);
+	cctrl = pll_get_cctrl(frac_start, false);
+	integloop_gain = pll_get_integloop_gain(frac_start, bclk,
+						ref_clk, false);
+
+	fdata = pd.vco_freq;
+	do_div(fdata, pd.vco_ratio);
+
+	pll_cmp = pll_get_pll_cmp(fdata, ref_clk);
+
+	DBG("VCO freq: %llu", pd.vco_freq);
+	DBG("fdata: %llu", fdata);
+	DBG("pix_clk: %lu", pix_clk);
+	DBG("tmds clk: %llu", tmds_clk);
+	DBG("HSCLK_SEL: %d", pd.hsclk_divsel);
+	DBG("DEC_START: %llu", dec_start);
+	DBG("DIV_FRAC_START: %llu", frac_start);
+	DBG("PLL_CPCTRL: %u", cpctrl);
+	DBG("PLL_RCTRL: %u", rctrl);
+	DBG("PLL_CCTRL: %u", cctrl);
+	DBG("INTEGLOOP_GAIN: %u", integloop_gain);
+	DBG("TX_BAND: %d", pd.tx_band_sel);
+	DBG("PLL_CMP: %u", pll_cmp);
+
+	/* Convert these values to register specific values */
+	if (bclk > HDMI_DIG_FREQ_BIT_CLK_THRESHOLD)
+		cfg->com_svs_mode_clk_sel = 1;
+	else
+		cfg->com_svs_mode_clk_sel = 2;
+
+	cfg->com_hsclk_sel = (0x20 | pd.hsclk_divsel);
+	cfg->com_pll_cctrl_mode0 = cctrl;
+	cfg->com_pll_rctrl_mode0 = rctrl;
+	cfg->com_cp_ctrl_mode0 = cpctrl;
+	cfg->com_dec_start_mode0 = dec_start;
+	cfg->com_div_frac_start1_mode0 = (frac_start & 0xff);
+	cfg->com_div_frac_start2_mode0 = ((frac_start & 0xff00) >> 8);
+	cfg->com_div_frac_start3_mode0 = ((frac_start & 0xf0000) >> 16);
+	cfg->com_integloop_gain0_mode0 = (integloop_gain & 0xff);
+	cfg->com_integloop_gain1_mode0 = ((integloop_gain & 0xf00) >> 8);
+	cfg->com_lock_cmp1_mode0 = (pll_cmp & 0xff);
+	cfg->com_lock_cmp2_mode0 = ((pll_cmp & 0xff00) >> 8);
+	cfg->com_lock_cmp3_mode0 = ((pll_cmp & 0x30000) >> 16);
+	cfg->com_lock_cmp_en = 0x0;
+	cfg->com_core_clk_en = 0x2c;
+	cfg->com_coreclk_div = HDMI_CORECLK_DIV;
+	cfg->phy_mode = (bclk > HDMI_HIGH_FREQ_BIT_CLK_THRESHOLD) ? 0x10 : 0x0;
+	cfg->com_vco_tune_ctrl = 0x0;
+
+	cfg->tx_lx_lane_mode[0] =
+		cfg->tx_lx_lane_mode[2] = 0x43;
+
+	cfg->tx_lx_hp_pd_enables[0] =
+		cfg->tx_lx_hp_pd_enables[1] =
+		cfg->tx_lx_hp_pd_enables[2] = 0x0c;
+	cfg->tx_lx_hp_pd_enables[3] = 0x3;
+
+	for (i = 0; i < HDMI_NUM_TX_CHANNEL; i++)
+		cfg->tx_lx_tx_band[i] = pd.tx_band_sel + 4;
+
+	if (bclk > HDMI_HIGH_FREQ_BIT_CLK_THRESHOLD) {
+		cfg->tx_lx_tx_drv_lvl[0] =
+			cfg->tx_lx_tx_drv_lvl[1] =
+			cfg->tx_lx_tx_drv_lvl[2] = 0x25;
+		cfg->tx_lx_tx_drv_lvl[3] = 0x22;
+
+		cfg->tx_lx_tx_emp_post1_lvl[0] =
+			cfg->tx_lx_tx_emp_post1_lvl[1] =
+			cfg->tx_lx_tx_emp_post1_lvl[2] = 0x23;
+		cfg->tx_lx_tx_emp_post1_lvl[3] = 0x27;
+
+		cfg->tx_lx_vmode_ctrl1[0] =
+			cfg->tx_lx_vmode_ctrl1[1] =
+			cfg->tx_lx_vmode_ctrl1[2] =
+			cfg->tx_lx_vmode_ctrl1[3] = 0x00;
+
+		cfg->tx_lx_vmode_ctrl2[0] =
+			cfg->tx_lx_vmode_ctrl2[1] =
+			cfg->tx_lx_vmode_ctrl2[2] = 0x0D;
+
+		cfg->tx_lx_vmode_ctrl2[3] = 0x00;
+	} else if (bclk > HDMI_MID_FREQ_BIT_CLK_THRESHOLD) {
+		for (i = 0; i < HDMI_NUM_TX_CHANNEL; i++) {
+			cfg->tx_lx_tx_drv_lvl[i] = 0x25;
+			cfg->tx_lx_tx_emp_post1_lvl[i] = 0x23;
+			cfg->tx_lx_vmode_ctrl1[i] = 0x00;
+		}
+
+		cfg->tx_lx_vmode_ctrl2[0] =
+			cfg->tx_lx_vmode_ctrl2[1] =
+			cfg->tx_lx_vmode_ctrl2[2] = 0x0D;
+		cfg->tx_lx_vmode_ctrl2[3] = 0x00;
+	} else {
+		for (i = 0; i < HDMI_NUM_TX_CHANNEL; i++) {
+			cfg->tx_lx_tx_drv_lvl[i] = 0x20;
+			cfg->tx_lx_tx_emp_post1_lvl[i] = 0x20;
+			cfg->tx_lx_vmode_ctrl1[i] = 0x00;
+			cfg->tx_lx_vmode_ctrl2[i] = 0x0E;
+		}
+	}
+
+	DBG("com_svs_mode_clk_sel = 0x%x", cfg->com_svs_mode_clk_sel);
+	DBG("com_hsclk_sel = 0x%x", cfg->com_hsclk_sel);
+	DBG("com_lock_cmp_en = 0x%x", cfg->com_lock_cmp_en);
+	DBG("com_pll_cctrl_mode0 = 0x%x", cfg->com_pll_cctrl_mode0);
+	DBG("com_pll_rctrl_mode0 = 0x%x", cfg->com_pll_rctrl_mode0);
+	DBG("com_cp_ctrl_mode0 = 0x%x", cfg->com_cp_ctrl_mode0);
+	DBG("com_dec_start_mode0 = 0x%x", cfg->com_dec_start_mode0);
+	DBG("com_div_frac_start1_mode0 = 0x%x", cfg->com_div_frac_start1_mode0);
+	DBG("com_div_frac_start2_mode0 = 0x%x", cfg->com_div_frac_start2_mode0);
+	DBG("com_div_frac_start3_mode0 = 0x%x", cfg->com_div_frac_start3_mode0);
+	DBG("com_integloop_gain0_mode0 = 0x%x", cfg->com_integloop_gain0_mode0);
+	DBG("com_integloop_gain1_mode0 = 0x%x", cfg->com_integloop_gain1_mode0);
+	DBG("com_lock_cmp1_mode0 = 0x%x", cfg->com_lock_cmp1_mode0);
+	DBG("com_lock_cmp2_mode0 = 0x%x", cfg->com_lock_cmp2_mode0);
+	DBG("com_lock_cmp3_mode0 = 0x%x", cfg->com_lock_cmp3_mode0);
+	DBG("com_core_clk_en = 0x%x", cfg->com_core_clk_en);
+	DBG("com_coreclk_div = 0x%x", cfg->com_coreclk_div);
+	DBG("phy_mode = 0x%x", cfg->phy_mode);
+
+	DBG("tx_l0_lane_mode = 0x%x", cfg->tx_lx_lane_mode[0]);
+	DBG("tx_l2_lane_mode = 0x%x", cfg->tx_lx_lane_mode[2]);
+
+	for (i = 0; i < HDMI_NUM_TX_CHANNEL; i++) {
+		DBG("tx_l%d_tx_band = 0x%x", i, cfg->tx_lx_tx_band[i]);
+		DBG("tx_l%d_tx_drv_lvl = 0x%x", i, cfg->tx_lx_tx_drv_lvl[i]);
+		DBG("tx_l%d_tx_emp_post1_lvl = 0x%x", i,
+		    cfg->tx_lx_tx_emp_post1_lvl[i]);
+		DBG("tx_l%d_vmode_ctrl1 = 0x%x", i, cfg->tx_lx_vmode_ctrl1[i]);
+		DBG("tx_l%d_vmode_ctrl2 = 0x%x", i, cfg->tx_lx_vmode_ctrl2[i]);
+	}
+
+	return 0;
+}
+
+static int hdmi_8996_pll_set_clk_rate(struct clk_hw *hw, unsigned long rate,
+				      unsigned long parent_rate)
+{
+	struct hdmi_pll_8996 *pll = hw_clk_to_pll(hw);
+	struct hdmi_phy *phy = pll_get_phy(pll);
+	struct hdmi_8996_phy_pll_reg_cfg cfg;
+	int i, ret;
+
+	memset(&cfg, 0x00, sizeof(cfg));
+
+	ret = pll_calculate(rate, parent_rate, &cfg);
+	if (ret) {
+		DRM_ERROR("PLL calculation failed\n");
+		return ret;
+	}
+
+	/* Initially shut down PHY */
+	DBG("Disabling PHY");
+	hdmi_phy_write(phy, REG_HDMI_8996_PHY_PD_CTL, 0x0);
+	udelay(500);
+
+	/* Power up sequence */
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_BG_CTRL, 0x04);
+
+	hdmi_phy_write(phy, REG_HDMI_8996_PHY_PD_CTL, 0x1);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_RESETSM_CNTRL, 0x20);
+	hdmi_phy_write(phy, REG_HDMI_8996_PHY_TX0_TX1_LANE_CTL, 0x0F);
+	hdmi_phy_write(phy, REG_HDMI_8996_PHY_TX2_TX3_LANE_CTL, 0x0F);
+
+	for (i = 0; i < HDMI_NUM_TX_CHANNEL; i++) {
+		hdmi_tx_chan_write(pll, i,
+				   REG_HDMI_PHY_QSERDES_TX_LX_CLKBUF_ENABLE,
+				   0x03);
+		hdmi_tx_chan_write(pll, i,
+				   REG_HDMI_PHY_QSERDES_TX_LX_TX_BAND,
+				   cfg.tx_lx_tx_band[i]);
+		hdmi_tx_chan_write(pll, i,
+				   REG_HDMI_PHY_QSERDES_TX_LX_RESET_TSYNC_EN,
+				   0x03);
+	}
+
+	hdmi_tx_chan_write(pll, 0, REG_HDMI_PHY_QSERDES_TX_LX_LANE_MODE,
+			   cfg.tx_lx_lane_mode[0]);
+	hdmi_tx_chan_write(pll, 2, REG_HDMI_PHY_QSERDES_TX_LX_LANE_MODE,
+			   cfg.tx_lx_lane_mode[2]);
+
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_SYSCLK_BUF_ENABLE, 0x1E);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_BIAS_EN_CLKBUFLR_EN, 0x07);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_SYSCLK_EN_SEL, 0x37);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_SYS_CLK_CTRL, 0x02);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_CLK_ENABLE1, 0x0E);
+
+	/* Bypass VCO calibration */
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_SVS_MODE_CLK_SEL,
+		       cfg.com_svs_mode_clk_sel);
+
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_BG_TRIM, 0x0F);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_PLL_IVCO, 0x0F);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_CTRL,
+		       cfg.com_vco_tune_ctrl);
+
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_BG_CTRL, 0x06);
+
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_CLK_SELECT, 0x30);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_HSCLK_SEL,
+		       cfg.com_hsclk_sel);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_LOCK_CMP_EN,
+		       cfg.com_lock_cmp_en);
+
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_PLL_CCTRL_MODE0,
+		       cfg.com_pll_cctrl_mode0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_PLL_RCTRL_MODE0,
+		       cfg.com_pll_rctrl_mode0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_CP_CTRL_MODE0,
+		       cfg.com_cp_ctrl_mode0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_DEC_START_MODE0,
+		       cfg.com_dec_start_mode0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START1_MODE0,
+		       cfg.com_div_frac_start1_mode0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START2_MODE0,
+		       cfg.com_div_frac_start2_mode0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_DIV_FRAC_START3_MODE0,
+		       cfg.com_div_frac_start3_mode0);
+
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_INTEGLOOP_GAIN0_MODE0,
+		       cfg.com_integloop_gain0_mode0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_INTEGLOOP_GAIN1_MODE0,
+		       cfg.com_integloop_gain1_mode0);
+
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_LOCK_CMP1_MODE0,
+		       cfg.com_lock_cmp1_mode0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_LOCK_CMP2_MODE0,
+		       cfg.com_lock_cmp2_mode0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_LOCK_CMP3_MODE0,
+		       cfg.com_lock_cmp3_mode0);
+
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_VCO_TUNE_MAP, 0x00);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_CORE_CLK_EN,
+		       cfg.com_core_clk_en);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_CORECLK_DIV,
+		       cfg.com_coreclk_div);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_CMN_CONFIG, 0x02);
+
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_RESCODE_DIV_NUM, 0x15);
+
+	/* TX lanes setup (TX 0/1/2/3) */
+	for (i = 0; i < HDMI_NUM_TX_CHANNEL; i++) {
+		hdmi_tx_chan_write(pll, i,
+				   REG_HDMI_PHY_QSERDES_TX_LX_TX_DRV_LVL,
+				   cfg.tx_lx_tx_drv_lvl[i]);
+		hdmi_tx_chan_write(pll, i,
+				   REG_HDMI_PHY_QSERDES_TX_LX_TX_EMP_POST1_LVL,
+				   cfg.tx_lx_tx_emp_post1_lvl[i]);
+		hdmi_tx_chan_write(pll, i,
+				   REG_HDMI_PHY_QSERDES_TX_LX_VMODE_CTRL1,
+				   cfg.tx_lx_vmode_ctrl1[i]);
+		hdmi_tx_chan_write(pll, i,
+				   REG_HDMI_PHY_QSERDES_TX_LX_VMODE_CTRL2,
+				   cfg.tx_lx_vmode_ctrl2[i]);
+		hdmi_tx_chan_write(pll, i,
+				   REG_HDMI_PHY_QSERDES_TX_LX_TX_DRV_LVL_OFFSET,
+				   0x00);
+		hdmi_tx_chan_write(pll, i,
+			REG_HDMI_PHY_QSERDES_TX_LX_RES_CODE_LANE_OFFSET,
+			0x00);
+		hdmi_tx_chan_write(pll, i,
+			REG_HDMI_PHY_QSERDES_TX_LX_TRAN_DRVR_EMP_EN,
+			0x03);
+		hdmi_tx_chan_write(pll, i,
+			REG_HDMI_PHY_QSERDES_TX_LX_PARRATE_REC_DETECT_IDLE_EN,
+			0x40);
+		hdmi_tx_chan_write(pll, i,
+				   REG_HDMI_PHY_QSERDES_TX_LX_HP_PD_ENABLES,
+				   cfg.tx_lx_hp_pd_enables[i]);
+	}
+
+	hdmi_phy_write(phy, REG_HDMI_8996_PHY_MODE, cfg.phy_mode);
+	hdmi_phy_write(phy, REG_HDMI_8996_PHY_PD_CTL, 0x1F);
+
+	/*
+	 * Ensure that vco configuration gets flushed to hardware before
+	 * enabling the PLL
+	 */
+	wmb();
+
+	return 0;
+}
+
+static int hdmi_8996_phy_ready_status(struct hdmi_phy *phy)
+{
+	u32 nb_tries = HDMI_PLL_POLL_MAX_READS;
+	unsigned long timeout = HDMI_PLL_POLL_TIMEOUT_US;
+	u32 status;
+	int phy_ready = 0;
+
+	DBG("Waiting for PHY ready");
+
+	while (nb_tries--) {
+		status = hdmi_phy_read(phy, REG_HDMI_8996_PHY_STATUS);
+		phy_ready = status & BIT(0);
+
+		if (phy_ready)
+			break;
+
+		udelay(timeout);
+	}
+
+	DBG("PHY is %sready", phy_ready ? "" : "*not* ");
+
+	return phy_ready;
+}
+
+static int hdmi_8996_pll_lock_status(struct hdmi_pll_8996 *pll)
+{
+	u32 status;
+	int nb_tries = HDMI_PLL_POLL_MAX_READS;
+	unsigned long timeout = HDMI_PLL_POLL_TIMEOUT_US;
+	int pll_locked = 0;
+
+	DBG("Waiting for PLL lock");
+
+	while (nb_tries--) {
+		status = hdmi_pll_read(pll,
+				       REG_HDMI_PHY_QSERDES_COM_C_READY_STATUS);
+		pll_locked = status & BIT(0);
+
+		if (pll_locked)
+			break;
+
+		udelay(timeout);
+	}
+
+	DBG("HDMI PLL is %slocked", pll_locked ? "" : "*not* ");
+
+	return pll_locked;
+}
+
+static int hdmi_8996_pll_prepare(struct clk_hw *hw)
+{
+	struct hdmi_pll_8996 *pll = hw_clk_to_pll(hw);
+	struct hdmi_phy *phy = pll_get_phy(pll);
+	int i, ret = 0;
+
+	hdmi_phy_write(phy, REG_HDMI_8996_PHY_CFG, 0x1);
+	udelay(100);
+
+	hdmi_phy_write(phy, REG_HDMI_8996_PHY_CFG, 0x19);
+	udelay(100);
+
+	ret = hdmi_8996_pll_lock_status(pll);
+	if (!ret)
+		return ret;
+
+	for (i = 0; i < HDMI_NUM_TX_CHANNEL; i++)
+		hdmi_tx_chan_write(pll, i,
+			REG_HDMI_PHY_QSERDES_TX_LX_HIGHZ_TRANSCEIVEREN_BIAS_DRVR_EN,
+			0x6F);
+
+	/* Disable SSC */
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_SSC_PER1, 0x0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_SSC_PER2, 0x0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_SSC_STEP_SIZE1, 0x0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_SSC_STEP_SIZE2, 0x0);
+	hdmi_pll_write(pll, REG_HDMI_PHY_QSERDES_COM_SSC_EN_CENTER, 0x2);
+
+	ret = hdmi_8996_phy_ready_status(phy);
+	if (!ret)
+		return ret;
+
+	/* Restart the retiming buffer */
+	hdmi_phy_write(phy, REG_HDMI_8996_PHY_CFG, 0x18);
+	udelay(1);
+	hdmi_phy_write(phy, REG_HDMI_8996_PHY_CFG, 0x19);
+
+	return 0;
+}
+
+static long hdmi_8996_pll_round_rate(struct clk_hw *hw,
+				     unsigned long rate,
+				     unsigned long *parent_rate)
+{
+	if (rate < HDMI_PCLK_MIN_FREQ)
+		return HDMI_PCLK_MIN_FREQ;
+	else if (rate > HDMI_PCLK_MAX_FREQ)
+		return HDMI_PCLK_MAX_FREQ;
+	else
+		return rate;
+}
+
+static unsigned long hdmi_8996_pll_recalc_rate(struct clk_hw *hw,
+					       unsigned long parent_rate)
+{
+	struct hdmi_pll_8996 *pll = hw_clk_to_pll(hw);
+	u64 fdata;
+	u32 cmp1, cmp2, cmp3, pll_cmp;
+
+	cmp1 = hdmi_pll_read(pll, REG_HDMI_PHY_QSERDES_COM_LOCK_CMP1_MODE0);
+	cmp2 = hdmi_pll_read(pll, REG_HDMI_PHY_QSERDES_COM_LOCK_CMP2_MODE0);
+	cmp3 = hdmi_pll_read(pll, REG_HDMI_PHY_QSERDES_COM_LOCK_CMP3_MODE0);
+
+	pll_cmp = cmp1 | (cmp2 << 8) | (cmp3 << 16);
+
+	fdata = pll_cmp_to_fdata(pll_cmp + 1, parent_rate);
+
+	do_div(fdata, 10);
+
+	return fdata;
+}
+
+static void hdmi_8996_pll_unprepare(struct clk_hw *hw)
+{
+}
+
+static int hdmi_8996_pll_is_enabled(struct clk_hw *hw)
+{
+	struct hdmi_pll_8996 *pll = hw_clk_to_pll(hw);
+	u32 status;
+	int pll_locked;
+
+	status = hdmi_pll_read(pll, REG_HDMI_PHY_QSERDES_COM_C_READY_STATUS);
+	pll_locked = status & BIT(0);
+
+	return pll_locked;
+}
+
+static struct clk_ops hdmi_8996_pll_ops = {
+	.set_rate = hdmi_8996_pll_set_clk_rate,
+	.round_rate = hdmi_8996_pll_round_rate,
+	.recalc_rate = hdmi_8996_pll_recalc_rate,
+	.prepare = hdmi_8996_pll_prepare,
+	.unprepare = hdmi_8996_pll_unprepare,
+	.is_enabled = hdmi_8996_pll_is_enabled,
+};
+
+static const char * const hdmi_pll_parents[] = {
+	"xo",
+};
+
+static struct clk_init_data pll_init = {
+	.name = "hdmipll",
+	.ops = &hdmi_8996_pll_ops,
+	.parent_names = hdmi_pll_parents,
+	.num_parents = ARRAY_SIZE(hdmi_pll_parents),
+};
+
+int msm_hdmi_pll_8996_init(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct hdmi_pll_8996 *pll;
+	struct clk *clk;
+	int i;
+
+	pll = devm_kzalloc(dev, sizeof(*pll), GFP_KERNEL);
+	if (!pll)
+		return -ENOMEM;
+
+	pll->pdev = pdev;
+
+	pll->mmio_qserdes_com = msm_ioremap(pdev, "hdmi_pll", "HDMI_PLL");
+	if (IS_ERR(pll->mmio_qserdes_com)) {
+		dev_err(dev, "failed to map pll base\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < HDMI_NUM_TX_CHANNEL; i++) {
+		char name[32], label[32];
+
+		snprintf(name, sizeof(name), "hdmi_tx_l%d", i);
+		snprintf(label, sizeof(label), "HDMI_TX_L%d", i);
+
+		pll->mmio_qserdes_tx[i] = msm_ioremap(pdev, name, label);
+		if (IS_ERR(pll->mmio_qserdes_tx[i])) {
+			dev_err(dev, "failed to map pll base\n");
+			return -ENOMEM;
+		}
+	}
+	pll->clk_hw.init = &pll_init;
+
+	clk = devm_clk_register(dev, &pll->clk_hw);
+	if (IS_ERR(clk)) {
+		dev_err(dev, "failed to register pll clock\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const char * const hdmi_phy_8996_reg_names[] = {
+	"vddio",
+	"vcca",
+};
+
+static const char * const hdmi_phy_8996_clk_names[] = {
+	"mmagic_iface_clk",
+	"iface_clk",
+	"ref_clk",
+};
+
+const struct hdmi_phy_cfg msm_hdmi_phy_8996_cfg = {
+	.type = MSM_HDMI_PHY_8996,
+	.reg_names = hdmi_phy_8996_reg_names,
+	.num_regs = ARRAY_SIZE(hdmi_phy_8996_reg_names),
+	.clk_names = hdmi_phy_8996_clk_names,
+	.num_clks = ARRAY_SIZE(hdmi_phy_8996_clk_names),
+};
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c
index cb01421ae1e4..a68eea4153fc 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c
@@ -17,166 +17,122 @@
 
 #include "hdmi.h"
 
-struct hdmi_phy_8x60 {
-	struct hdmi_phy base;
-	struct hdmi *hdmi;
-};
-#define to_hdmi_phy_8x60(x) container_of(x, struct hdmi_phy_8x60, base)
-
-static void hdmi_phy_8x60_destroy(struct hdmi_phy *phy)
-{
-	struct hdmi_phy_8x60 *phy_8x60 = to_hdmi_phy_8x60(phy);
-	kfree(phy_8x60);
-}
-
 static void hdmi_phy_8x60_powerup(struct hdmi_phy *phy,
 		unsigned long int pixclock)
 {
-	struct hdmi_phy_8x60 *phy_8x60 = to_hdmi_phy_8x60(phy);
-	struct hdmi *hdmi = phy_8x60->hdmi;
-
 	/* De-serializer delay D/C for non-lbk mode: */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG0,
-			HDMI_8x60_PHY_REG0_DESER_DEL_CTRL(3));
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG0,
+		       HDMI_8x60_PHY_REG0_DESER_DEL_CTRL(3));
 
 	if (pixclock == 27000000) {
 		/* video_format == HDMI_VFRMT_720x480p60_16_9 */
-		hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG1,
-				HDMI_8x60_PHY_REG1_DTEST_MUX_SEL(5) |
-				HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL(3));
+		hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG1,
+			       HDMI_8x60_PHY_REG1_DTEST_MUX_SEL(5) |
+			       HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL(3));
 	} else {
-		hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG1,
-				HDMI_8x60_PHY_REG1_DTEST_MUX_SEL(5) |
-				HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL(4));
+		hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG1,
+			       HDMI_8x60_PHY_REG1_DTEST_MUX_SEL(5) |
+			       HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL(4));
 	}
 
 	/* No matter what, start from the power down mode: */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
-			HDMI_8x60_PHY_REG2_PD_PWRGEN |
-			HDMI_8x60_PHY_REG2_PD_PLL |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
-			HDMI_8x60_PHY_REG2_PD_DESER);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG2,
+		       HDMI_8x60_PHY_REG2_PD_PWRGEN |
+		       HDMI_8x60_PHY_REG2_PD_PLL |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
+		       HDMI_8x60_PHY_REG2_PD_DESER);
 
 	/* Turn PowerGen on: */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
-			HDMI_8x60_PHY_REG2_PD_PLL |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
-			HDMI_8x60_PHY_REG2_PD_DESER);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG2,
+		       HDMI_8x60_PHY_REG2_PD_PLL |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
+		       HDMI_8x60_PHY_REG2_PD_DESER);
 
 	/* Turn PLL power on: */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
-			HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
-			HDMI_8x60_PHY_REG2_PD_DESER);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG2,
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
+		       HDMI_8x60_PHY_REG2_PD_DESER);
 
 	/* Write to HIGH after PLL power down de-assert: */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG3,
-			HDMI_8x60_PHY_REG3_PLL_ENABLE);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG3,
+		       HDMI_8x60_PHY_REG3_PLL_ENABLE);
 
 	/* ASIC power on; PHY REG9 = 0 */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG9, 0);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG9, 0);
 
 	/* Enable PLL lock detect, PLL lock det will go high after lock
 	 * Enable the re-time logic
 	 */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG12,
-			HDMI_8x60_PHY_REG12_RETIMING_EN |
-			HDMI_8x60_PHY_REG12_PLL_LOCK_DETECT_EN);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG12,
+		       HDMI_8x60_PHY_REG12_RETIMING_EN |
+		       HDMI_8x60_PHY_REG12_PLL_LOCK_DETECT_EN);
 
 	/* Drivers are on: */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
-			HDMI_8x60_PHY_REG2_PD_DESER);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG2,
+		       HDMI_8x60_PHY_REG2_PD_DESER);
 
 	/* If the RX detector is needed: */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
-			HDMI_8x60_PHY_REG2_RCV_SENSE_EN |
-			HDMI_8x60_PHY_REG2_PD_DESER);
-
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG4, 0);
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG5, 0);
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG6, 0);
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG7, 0);
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG8, 0);
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG9, 0);
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG10, 0);
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG11, 0);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG2,
+		       HDMI_8x60_PHY_REG2_RCV_SENSE_EN |
+		       HDMI_8x60_PHY_REG2_PD_DESER);
+
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG4, 0);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG5, 0);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG6, 0);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG7, 0);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG8, 0);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG9, 0);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG10, 0);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG11, 0);
 
 	/* If we want to use lock enable based on counting: */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG12,
-			HDMI_8x60_PHY_REG12_RETIMING_EN |
-			HDMI_8x60_PHY_REG12_PLL_LOCK_DETECT_EN |
-			HDMI_8x60_PHY_REG12_FORCE_LOCK);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG12,
+		       HDMI_8x60_PHY_REG12_RETIMING_EN |
+		       HDMI_8x60_PHY_REG12_PLL_LOCK_DETECT_EN |
+		       HDMI_8x60_PHY_REG12_FORCE_LOCK);
 }
 
 static void hdmi_phy_8x60_powerdown(struct hdmi_phy *phy)
 {
-	struct hdmi_phy_8x60 *phy_8x60 = to_hdmi_phy_8x60(phy);
-	struct hdmi *hdmi = phy_8x60->hdmi;
-
 	/* Assert RESET PHY from controller */
-	hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
-			HDMI_PHY_CTRL_SW_RESET);
+	hdmi_phy_write(phy, REG_HDMI_PHY_CTRL,
+		       HDMI_PHY_CTRL_SW_RESET);
 	udelay(10);
 	/* De-assert RESET PHY from controller */
-	hdmi_write(hdmi, REG_HDMI_PHY_CTRL, 0);
+	hdmi_phy_write(phy, REG_HDMI_PHY_CTRL, 0);
 	/* Turn off Driver */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
-			HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
-			HDMI_8x60_PHY_REG2_PD_DESER);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG2,
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
+		       HDMI_8x60_PHY_REG2_PD_DESER);
 	udelay(10);
 	/* Disable PLL */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG3, 0);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG3, 0);
 	/* Power down PHY, but keep RX-sense: */
-	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
-			HDMI_8x60_PHY_REG2_RCV_SENSE_EN |
-			HDMI_8x60_PHY_REG2_PD_PWRGEN |
-			HDMI_8x60_PHY_REG2_PD_PLL |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
-			HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
-			HDMI_8x60_PHY_REG2_PD_DESER);
+	hdmi_phy_write(phy, REG_HDMI_8x60_PHY_REG2,
+		       HDMI_8x60_PHY_REG2_RCV_SENSE_EN |
+		       HDMI_8x60_PHY_REG2_PD_PWRGEN |
+		       HDMI_8x60_PHY_REG2_PD_PLL |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
+		       HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
+		       HDMI_8x60_PHY_REG2_PD_DESER);
 }
 
-static const struct hdmi_phy_funcs hdmi_phy_8x60_funcs = {
-		.destroy = hdmi_phy_8x60_destroy,
-		.powerup = hdmi_phy_8x60_powerup,
-		.powerdown = hdmi_phy_8x60_powerdown,
+const struct hdmi_phy_cfg msm_hdmi_phy_8x60_cfg = {
+	.type = MSM_HDMI_PHY_8x60,
+	.powerup = hdmi_phy_8x60_powerup,
+	.powerdown = hdmi_phy_8x60_powerdown,
 };
-
-struct hdmi_phy *hdmi_phy_8x60_init(struct hdmi *hdmi)
-{
-	struct hdmi_phy_8x60 *phy_8x60;
-	struct hdmi_phy *phy = NULL;
-	int ret;
-
-	phy_8x60 = kzalloc(sizeof(*phy_8x60), GFP_KERNEL);
-	if (!phy_8x60) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	phy = &phy_8x60->base;
-
-	phy->funcs = &hdmi_phy_8x60_funcs;
-
-	phy_8x60->hdmi = hdmi;
-
-	return phy;
-
-fail:
-	if (phy)
-		hdmi_phy_8x60_destroy(phy);
-	return ERR_PTR(ret);
-}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c
index 56ab8917ee9a..c4a61e537851 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x74.c
@@ -17,84 +17,40 @@
 
 #include "hdmi.h"
 
-struct hdmi_phy_8x74 {
-	struct hdmi_phy base;
-	void __iomem *mmio;
-};
-#define to_hdmi_phy_8x74(x) container_of(x, struct hdmi_phy_8x74, base)
-
-
-static void phy_write(struct hdmi_phy_8x74 *phy, u32 reg, u32 data)
-{
-	msm_writel(data, phy->mmio + reg);
-}
-
-//static u32 phy_read(struct hdmi_phy_8x74 *phy, u32 reg)
-//{
-//	return msm_readl(phy->mmio + reg);
-//}
-
-static void hdmi_phy_8x74_destroy(struct hdmi_phy *phy)
-{
-	struct hdmi_phy_8x74 *phy_8x74 = to_hdmi_phy_8x74(phy);
-	kfree(phy_8x74);
-}
-
 static void hdmi_phy_8x74_powerup(struct hdmi_phy *phy,
 		unsigned long int pixclock)
 {
-	struct hdmi_phy_8x74 *phy_8x74 = to_hdmi_phy_8x74(phy);
-
-	phy_write(phy_8x74, REG_HDMI_8x74_ANA_CFG0,   0x1b);
-	phy_write(phy_8x74, REG_HDMI_8x74_ANA_CFG1,   0xf2);
-	phy_write(phy_8x74, REG_HDMI_8x74_BIST_CFG0,  0x0);
-	phy_write(phy_8x74, REG_HDMI_8x74_BIST_PATN0, 0x0);
-	phy_write(phy_8x74, REG_HDMI_8x74_BIST_PATN1, 0x0);
-	phy_write(phy_8x74, REG_HDMI_8x74_BIST_PATN2, 0x0);
-	phy_write(phy_8x74, REG_HDMI_8x74_BIST_PATN3, 0x0);
-	phy_write(phy_8x74, REG_HDMI_8x74_PD_CTRL1,   0x20);
+	hdmi_phy_write(phy, REG_HDMI_8x74_ANA_CFG0,   0x1b);
+	hdmi_phy_write(phy, REG_HDMI_8x74_ANA_CFG1,   0xf2);
+	hdmi_phy_write(phy, REG_HDMI_8x74_BIST_CFG0,  0x0);
+	hdmi_phy_write(phy, REG_HDMI_8x74_BIST_PATN0, 0x0);
+	hdmi_phy_write(phy, REG_HDMI_8x74_BIST_PATN1, 0x0);
+	hdmi_phy_write(phy, REG_HDMI_8x74_BIST_PATN2, 0x0);
+	hdmi_phy_write(phy, REG_HDMI_8x74_BIST_PATN3, 0x0);
+	hdmi_phy_write(phy, REG_HDMI_8x74_PD_CTRL1,   0x20);
 }
 
 static void hdmi_phy_8x74_powerdown(struct hdmi_phy *phy)
 {
-	struct hdmi_phy_8x74 *phy_8x74 = to_hdmi_phy_8x74(phy);
-	phy_write(phy_8x74, REG_HDMI_8x74_PD_CTRL0, 0x7f);
+	hdmi_phy_write(phy, REG_HDMI_8x74_PD_CTRL0, 0x7f);
 }
 
-static const struct hdmi_phy_funcs hdmi_phy_8x74_funcs = {
-		.destroy = hdmi_phy_8x74_destroy,
-		.powerup = hdmi_phy_8x74_powerup,
-		.powerdown = hdmi_phy_8x74_powerdown,
+static const char * const hdmi_phy_8x74_reg_names[] = {
+	"core-vdda",
+	"vddio",
 };
 
-struct hdmi_phy *hdmi_phy_8x74_init(struct hdmi *hdmi)
-{
-	struct hdmi_phy_8x74 *phy_8x74;
-	struct hdmi_phy *phy = NULL;
-	int ret;
-
-	phy_8x74 = kzalloc(sizeof(*phy_8x74), GFP_KERNEL);
-	if (!phy_8x74) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	phy = &phy_8x74->base;
-
-	phy->funcs = &hdmi_phy_8x74_funcs;
-
-	/* for 8x74, the phy mmio is mapped separately: */
-	phy_8x74->mmio = msm_ioremap(hdmi->pdev,
-			"phy_physical", "HDMI_8x74");
-	if (IS_ERR(phy_8x74->mmio)) {
-		ret = PTR_ERR(phy_8x74->mmio);
-		goto fail;
-	}
-
-	return phy;
+static const char * const hdmi_phy_8x74_clk_names[] = {
+	"iface_clk",
+	"alt_iface_clk"
+};
 
-fail:
-	if (phy)
-		hdmi_phy_8x74_destroy(phy);
-	return ERR_PTR(ret);
-}
+const struct hdmi_phy_cfg msm_hdmi_phy_8x74_cfg = {
+	.type = MSM_HDMI_PHY_8x74,
+	.powerup = hdmi_phy_8x74_powerup,
+	.powerdown = hdmi_phy_8x74_powerdown,
+	.reg_names = hdmi_phy_8x74_reg_names,
+	.num_regs = ARRAY_SIZE(hdmi_phy_8x74_reg_names),
+	.clk_names = hdmi_phy_8x74_clk_names,
+	.num_clks = ARRAY_SIZE(hdmi_phy_8x74_clk_names),
+};
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c
new file mode 100644
index 000000000000..92da69aa6187
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c
@@ -0,0 +1,461 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk-provider.h>
+#include "hdmi.h"
+
+struct hdmi_pll_8960 {
+	struct platform_device *pdev;
+	struct clk_hw clk_hw;
+	void __iomem *mmio;
+
+	unsigned long pixclk;
+};
+
+#define hw_clk_to_pll(x) container_of(x, struct hdmi_pll_8960, clk_hw)
+
+/*
+ * HDMI PLL:
+ *
+ * To get the parent clock setup properly, we need to plug in hdmi pll
+ * configuration into common-clock-framework.
+ */
+
+struct pll_rate {
+	unsigned long rate;
+	int num_reg;
+	struct {
+		u32 val;
+		u32 reg;
+	} conf[32];
+};
+
+/* NOTE: keep sorted highest freq to lowest: */
+static const struct pll_rate freqtbl[] = {
+	{ 154000000, 14, {
+		{ 0x08, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
+		{ 0x20, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
+		{ 0xf9, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
+		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
+		{ 0x03, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
+		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
+		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
+		{ 0x0d, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
+		{ 0x4d, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
+		{ 0x5e, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
+		{ 0x42, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
+			}
+	},
+	/* 1080p60/1080p50 case */
+	{ 148500000, 27, {
+		{ 0x02, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
+		{ 0x02, REG_HDMI_8960_PHY_PLL_CHRG_PUMP_CFG },
+		{ 0x01, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
+		{ 0x33, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
+		{ 0x2c, REG_HDMI_8960_PHY_PLL_IDAC_ADJ_CFG  },
+		{ 0x06, REG_HDMI_8960_PHY_PLL_I_VI_KVCO_CFG },
+		{ 0x0a, REG_HDMI_8960_PHY_PLL_PWRDN_B       },
+		{ 0x76, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
+		{ 0x01, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
+		{ 0x4c, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
+		{ 0xc0, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
+		{ 0x9a, REG_HDMI_8960_PHY_PLL_SSC_CFG0      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG1      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG2      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG3      },
+		{ 0x10, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG0  },
+		{ 0x1a, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG1  },
+		{ 0x0d, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2  },
+		{ 0xe6, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
+		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
+		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
+		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
+		{ 0x33, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG6   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG7   },
+			}
+	},
+	{ 108000000, 13, {
+		{ 0x08, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
+		{ 0x21, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
+		{ 0xf9, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
+		{ 0x1c, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
+		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
+		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
+		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
+		{ 0x49, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
+		{ 0x49, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
+			}
+	},
+	/* 720p60/720p50/1080i60/1080i50/1080p24/1080p30/1080p25 */
+	{ 74250000, 8, {
+		{ 0x0a, REG_HDMI_8960_PHY_PLL_PWRDN_B       },
+		{ 0x12, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
+		{ 0x01, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
+		{ 0x33, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
+		{ 0x76, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
+		{ 0xe6, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
+		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
+		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
+			}
+	},
+	{ 74176000, 14, {
+		{ 0x18, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
+		{ 0x20, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
+		{ 0xf9, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
+		{ 0xe5, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
+		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
+		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
+		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
+		{ 0x0c, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
+		{ 0x4c, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
+		{ 0x7d, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
+		{ 0xbc, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
+			}
+	},
+	{ 65000000, 14, {
+		{ 0x18, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
+		{ 0x20, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
+		{ 0xf9, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
+		{ 0x8a, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
+		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
+		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
+		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
+		{ 0x0b, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
+		{ 0x4b, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
+		{ 0x7b, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
+		{ 0x09, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
+			}
+	},
+	/* 480p60/480i60 */
+	{ 27030000, 18, {
+		{ 0x0a, REG_HDMI_8960_PHY_PLL_PWRDN_B       },
+		{ 0x38, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
+		{ 0x02, REG_HDMI_8960_PHY_PLL_CHRG_PUMP_CFG },
+		{ 0x20, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
+		{ 0xff, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
+		{ 0x4e, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
+		{ 0xd7, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
+		{ 0x03, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
+		{ 0x2a, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
+		{ 0x03, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
+		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
+		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
+		{ 0x33, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG6   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG7   },
+			}
+	},
+	/* 576p50/576i50 */
+	{ 27000000, 27, {
+		{ 0x32, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
+		{ 0x02, REG_HDMI_8960_PHY_PLL_CHRG_PUMP_CFG },
+		{ 0x01, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
+		{ 0x33, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
+		{ 0x2c, REG_HDMI_8960_PHY_PLL_IDAC_ADJ_CFG  },
+		{ 0x06, REG_HDMI_8960_PHY_PLL_I_VI_KVCO_CFG },
+		{ 0x0a, REG_HDMI_8960_PHY_PLL_PWRDN_B       },
+		{ 0x7b, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
+		{ 0x01, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
+		{ 0x4c, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
+		{ 0xc0, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
+		{ 0x9a, REG_HDMI_8960_PHY_PLL_SSC_CFG0      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG1      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG2      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG3      },
+		{ 0x10, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG0  },
+		{ 0x1a, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG1  },
+		{ 0x0d, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2  },
+		{ 0x2a, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
+		{ 0x03, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
+		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
+		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
+		{ 0x33, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG6   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG7   },
+			}
+	},
+	/* 640x480p60 */
+	{ 25200000, 27, {
+		{ 0x32, REG_HDMI_8960_PHY_PLL_REFCLK_CFG    },
+		{ 0x02, REG_HDMI_8960_PHY_PLL_CHRG_PUMP_CFG },
+		{ 0x01, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG0 },
+		{ 0x33, REG_HDMI_8960_PHY_PLL_LOOP_FLT_CFG1 },
+		{ 0x2c, REG_HDMI_8960_PHY_PLL_IDAC_ADJ_CFG  },
+		{ 0x06, REG_HDMI_8960_PHY_PLL_I_VI_KVCO_CFG },
+		{ 0x0a, REG_HDMI_8960_PHY_PLL_PWRDN_B       },
+		{ 0x77, REG_HDMI_8960_PHY_PLL_SDM_CFG0      },
+		{ 0x4c, REG_HDMI_8960_PHY_PLL_SDM_CFG1      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG2      },
+		{ 0xc0, REG_HDMI_8960_PHY_PLL_SDM_CFG3      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SDM_CFG4      },
+		{ 0x9a, REG_HDMI_8960_PHY_PLL_SSC_CFG0      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG1      },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_SSC_CFG2      },
+		{ 0x20, REG_HDMI_8960_PHY_PLL_SSC_CFG3      },
+		{ 0x10, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG0  },
+		{ 0x1a, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG1  },
+		{ 0x0d, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2  },
+		{ 0xf4, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG0   },
+		{ 0x02, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG1   },
+		{ 0x3b, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG2   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG3   },
+		{ 0x86, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG4   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG5   },
+		{ 0x33, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG6   },
+		{ 0x00, REG_HDMI_8960_PHY_PLL_VCOCAL_CFG7   },
+			}
+	},
+};
+
+static inline void pll_write(struct hdmi_pll_8960 *pll, u32 reg, u32 data)
+{
+	msm_writel(data, pll->mmio + reg);
+}
+
+static inline u32 pll_read(struct hdmi_pll_8960 *pll, u32 reg)
+{
+	return msm_readl(pll->mmio + reg);
+}
+
+static inline struct hdmi_phy *pll_get_phy(struct hdmi_pll_8960 *pll)
+{
+	return platform_get_drvdata(pll->pdev);
+}
+
+static int hdmi_pll_enable(struct clk_hw *hw)
+{
+	struct hdmi_pll_8960 *pll = hw_clk_to_pll(hw);
+	struct hdmi_phy *phy = pll_get_phy(pll);
+	int timeout_count, pll_lock_retry = 10;
+	unsigned int val;
+
+	DBG("");
+
+	/* Assert PLL S/W reset */
+	pll_write(pll, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2, 0x8d);
+	pll_write(pll, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG0, 0x10);
+	pll_write(pll, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG1, 0x1a);
+
+	/* Wait for a short time before de-asserting
+	 * to allow the hardware to complete its job.
+	 * This much of delay should be fine for hardware
+	 * to assert and de-assert.
+	 */
+	udelay(10);
+
+	/* De-assert PLL S/W reset */
+	pll_write(pll, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2, 0x0d);
+
+	val = hdmi_phy_read(phy, REG_HDMI_8960_PHY_REG12);
+	val |= HDMI_8960_PHY_REG12_SW_RESET;
+	/* Assert PHY S/W reset */
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG12, val);
+	val &= ~HDMI_8960_PHY_REG12_SW_RESET;
+	/*
+	 * Wait for a short time before de-asserting to allow the hardware to
+	 * complete its job. This much of delay should be fine for hardware to
+	 * assert and de-assert.
+	 */
+	udelay(10);
+	/* De-assert PHY S/W reset */
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG12, val);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG2,  0x3f);
+
+	val = hdmi_phy_read(phy, REG_HDMI_8960_PHY_REG12);
+	val |= HDMI_8960_PHY_REG12_PWRDN_B;
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG12, val);
+	/* Wait 10 us for enabling global power for PHY */
+	mb();
+	udelay(10);
+
+	val = pll_read(pll, REG_HDMI_8960_PHY_PLL_PWRDN_B);
+	val |= HDMI_8960_PHY_PLL_PWRDN_B_PLL_PWRDN_B;
+	val &= ~HDMI_8960_PHY_PLL_PWRDN_B_PD_PLL;
+	pll_write(pll, REG_HDMI_8960_PHY_PLL_PWRDN_B, val);
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG2, 0x80);
+
+	timeout_count = 1000;
+	while (--pll_lock_retry > 0) {
+		/* are we there yet? */
+		val = pll_read(pll, REG_HDMI_8960_PHY_PLL_STATUS0);
+		if (val & HDMI_8960_PHY_PLL_STATUS0_PLL_LOCK)
+			break;
+
+		udelay(1);
+
+		if (--timeout_count > 0)
+			continue;
+
+		/*
+		 * PLL has still not locked.
+		 * Do a software reset and try again
+		 * Assert PLL S/W reset first
+		 */
+		pll_write(pll, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2, 0x8d);
+		udelay(10);
+		pll_write(pll, REG_HDMI_8960_PHY_PLL_LOCKDET_CFG2, 0x0d);
+
+		/*
+		 * Wait for a short duration for the PLL calibration
+		 * before checking if the PLL gets locked
+		 */
+		udelay(350);
+
+		timeout_count = 1000;
+	}
+
+	return 0;
+}
+
+static void hdmi_pll_disable(struct clk_hw *hw)
+{
+	struct hdmi_pll_8960 *pll = hw_clk_to_pll(hw);
+	struct hdmi_phy *phy = pll_get_phy(pll);
+	unsigned int val;
+
+	DBG("");
+
+	val = hdmi_phy_read(phy, REG_HDMI_8960_PHY_REG12);
+	val &= ~HDMI_8960_PHY_REG12_PWRDN_B;
+	hdmi_phy_write(phy, REG_HDMI_8960_PHY_REG12, val);
+
+	val = pll_read(pll, REG_HDMI_8960_PHY_PLL_PWRDN_B);
+	val |= HDMI_8960_PHY_REG12_SW_RESET;
+	val &= ~HDMI_8960_PHY_REG12_PWRDN_B;
+	pll_write(pll, REG_HDMI_8960_PHY_PLL_PWRDN_B, val);
+	/* Make sure HDMI PHY/PLL are powered down */
+	mb();
+}
+
+static const struct pll_rate *find_rate(unsigned long rate)
+{
+	int i;
+
+	for (i = 1; i < ARRAY_SIZE(freqtbl); i++)
+		if (rate > freqtbl[i].rate)
+			return &freqtbl[i - 1];
+
+	return &freqtbl[i - 1];
+}
+
+static unsigned long hdmi_pll_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	struct hdmi_pll_8960 *pll = hw_clk_to_pll(hw);
+
+	return pll->pixclk;
+}
+
+static long hdmi_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long *parent_rate)
+{
+	const struct pll_rate *pll_rate = find_rate(rate);
+
+	return pll_rate->rate;
+}
+
+static int hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
+			     unsigned long parent_rate)
+{
+	struct hdmi_pll_8960 *pll = hw_clk_to_pll(hw);
+	const struct pll_rate *pll_rate = find_rate(rate);
+	int i;
+
+	DBG("rate=%lu", rate);
+
+	for (i = 0; i < pll_rate->num_reg; i++)
+		pll_write(pll, pll_rate->conf[i].reg, pll_rate->conf[i].val);
+
+	pll->pixclk = rate;
+
+	return 0;
+}
+
+static const struct clk_ops hdmi_pll_ops = {
+	.enable = hdmi_pll_enable,
+	.disable = hdmi_pll_disable,
+	.recalc_rate = hdmi_pll_recalc_rate,
+	.round_rate = hdmi_pll_round_rate,
+	.set_rate = hdmi_pll_set_rate,
+};
+
+static const char * const hdmi_pll_parents[] = {
+	"pxo",
+};
+
+static struct clk_init_data pll_init = {
+	.name = "hdmi_pll",
+	.ops = &hdmi_pll_ops,
+	.parent_names = hdmi_pll_parents,
+	.num_parents = ARRAY_SIZE(hdmi_pll_parents),
+};
+
+int msm_hdmi_pll_8960_init(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct hdmi_pll_8960 *pll;
+	struct clk *clk;
+	int i;
+
+	/* sanity check: */
+	for (i = 0; i < (ARRAY_SIZE(freqtbl) - 1); i++)
+		if (WARN_ON(freqtbl[i].rate < freqtbl[i + 1].rate))
+			return -EINVAL;
+
+	pll = devm_kzalloc(dev, sizeof(*pll), GFP_KERNEL);
+	if (!pll)
+		return -ENOMEM;
+
+	pll->mmio = msm_ioremap(pdev, "hdmi_pll", "HDMI_PLL");
+	if (IS_ERR(pll->mmio)) {
+		dev_err(dev, "failed to map pll base\n");
+		return -ENOMEM;
+	}
+
+	pll->pdev = pdev;
+	pll->clk_hw.init = &pll_init;
+
+	clk = devm_clk_register(dev, &pll->clk_hw);
+	if (IS_ERR(clk)) {
+		dev_err(dev, "failed to register pll clock\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/msm/hdmi/qfprom.xml.h b/drivers/gpu/drm/msm/hdmi/qfprom.xml.h
index dbd9cc4daf2e..6eab7d0cf6b5 100644
--- a/drivers/gpu/drm/msm/hdmi/qfprom.xml.h
+++ b/drivers/gpu/drm/msm/hdmi/qfprom.xml.h
@@ -9,7 +9,7 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  37194 bytes, from 2015-09-18 12:07:28)
@@ -17,11 +17,12 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  29154 bytes, from 2015-08-10 21:25:43)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
 - /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h
index d5d94575fa1b..6688e79cc88e 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h
@@ -9,7 +9,7 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  37194 bytes, from 2015-09-18 12:07:28)
@@ -17,11 +17,12 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  29154 bytes, from 2015-08-10 21:25:43)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
 - /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
index 28df397c3b04..e233acf52334 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
@@ -147,13 +147,6 @@ static void mdp4_crtc_destroy(struct drm_crtc *crtc)
 	kfree(mdp4_crtc);
 }
 
-static bool mdp4_crtc_mode_fixup(struct drm_crtc *crtc,
-		const struct drm_display_mode *mode,
-		struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 /* statically (for now) map planes to mixer stage (z-order): */
 static const int idxs[] = {
 		[VG1]  = 1,
@@ -361,13 +354,6 @@ static void mdp4_crtc_atomic_flush(struct drm_crtc *crtc,
 	request_pending(crtc, PENDING_FLIP);
 }
 
-static int mdp4_crtc_set_property(struct drm_crtc *crtc,
-		struct drm_property *property, uint64_t val)
-{
-	// XXX
-	return -EINVAL;
-}
-
 #define CURSOR_WIDTH 64
 #define CURSOR_HEIGHT 64
 
@@ -499,7 +485,7 @@ static const struct drm_crtc_funcs mdp4_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.destroy = mdp4_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
-	.set_property = mdp4_crtc_set_property,
+	.set_property = drm_atomic_helper_crtc_set_property,
 	.cursor_set = mdp4_crtc_cursor_set,
 	.cursor_move = mdp4_crtc_cursor_move,
 	.reset = drm_atomic_helper_crtc_reset,
@@ -508,7 +494,6 @@ static const struct drm_crtc_funcs mdp4_crtc_funcs = {
 };
 
 static const struct drm_crtc_helper_funcs mdp4_crtc_helper_funcs = {
-	.mode_fixup = mdp4_crtc_mode_fixup,
 	.mode_set_nofb = mdp4_crtc_mode_set_nofb,
 	.disable = mdp4_crtc_disable,
 	.enable = mdp4_crtc_enable,
@@ -575,13 +560,6 @@ uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc)
 	return mdp4_crtc->vblank.irqmask;
 }
 
-void mdp4_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file)
-{
-	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
-	DBG("%s: cancel: %p", mdp4_crtc->name, file);
-	complete_flip(crtc, file);
-}
-
 /* set dma config, ie. the format the encoder wants. */
 void mdp4_crtc_set_config(struct drm_crtc *crtc, uint32_t config)
 {
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c
index 2f57e9453b67..106f0e772595 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c
@@ -47,13 +47,6 @@ static const struct drm_encoder_funcs mdp4_dsi_encoder_funcs = {
 	.destroy = mdp4_dsi_encoder_destroy,
 };
 
-static bool mdp4_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
-					const struct drm_display_mode *mode,
-					struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void mdp4_dsi_encoder_mode_set(struct drm_encoder *encoder,
 				      struct drm_display_mode *mode,
 				      struct drm_display_mode *adjusted_mode)
@@ -163,7 +156,6 @@ static void mdp4_dsi_encoder_enable(struct drm_encoder *encoder)
 }
 
 static const struct drm_encoder_helper_funcs mdp4_dsi_encoder_helper_funcs = {
-	.mode_fixup = mdp4_dsi_encoder_mode_fixup,
 	.mode_set = mdp4_dsi_encoder_mode_set,
 	.disable = mdp4_dsi_encoder_disable,
 	.enable = mdp4_dsi_encoder_enable,
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
index a21df54cb50f..35ad78a1dc1c 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
@@ -94,13 +94,6 @@ static const struct drm_encoder_funcs mdp4_dtv_encoder_funcs = {
 	.destroy = mdp4_dtv_encoder_destroy,
 };
 
-static bool mdp4_dtv_encoder_mode_fixup(struct drm_encoder *encoder,
-		const struct drm_display_mode *mode,
-		struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void mdp4_dtv_encoder_mode_set(struct drm_encoder *encoder,
 		struct drm_display_mode *mode,
 		struct drm_display_mode *adjusted_mode)
@@ -234,7 +227,6 @@ static void mdp4_dtv_encoder_enable(struct drm_encoder *encoder)
 }
 
 static const struct drm_encoder_helper_funcs mdp4_dtv_encoder_helper_funcs = {
-	.mode_fixup = mdp4_dtv_encoder_mode_fixup,
 	.mode_set = mdp4_dtv_encoder_mode_set,
 	.enable = mdp4_dtv_encoder_enable,
 	.disable = mdp4_dtv_encoder_disable,
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
index 5a8e3d6bcbff..76e1dfb5d25e 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
@@ -179,19 +179,20 @@ static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate,
 	}
 }
 
-static void mdp4_preclose(struct msm_kms *kms, struct drm_file *file)
-{
-	struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms));
-	struct msm_drm_private *priv = mdp4_kms->dev->dev_private;
-	unsigned i;
-
-	for (i = 0; i < priv->num_crtcs; i++)
-		mdp4_crtc_cancel_pending_flip(priv->crtcs[i], file);
-}
+static const char * const iommu_ports[] = {
+	"mdp_port0_cb0", "mdp_port1_cb0",
+};
 
 static void mdp4_destroy(struct msm_kms *kms)
 {
 	struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms));
+	struct msm_mmu *mmu = mdp4_kms->mmu;
+
+	if (mmu) {
+		mmu->funcs->detach(mmu, iommu_ports, ARRAY_SIZE(iommu_ports));
+		mmu->funcs->destroy(mmu);
+	}
+
 	if (mdp4_kms->blank_cursor_iova)
 		msm_gem_put_iova(mdp4_kms->blank_cursor_bo, mdp4_kms->id);
 	if (mdp4_kms->blank_cursor_bo)
@@ -213,7 +214,6 @@ static const struct mdp_kms_funcs kms_funcs = {
 		.wait_for_crtc_commit_done = mdp4_wait_for_crtc_commit_done,
 		.get_format      = mdp_get_format,
 		.round_pixclk    = mdp4_round_pixclk,
-		.preclose        = mdp4_preclose,
 		.destroy         = mdp4_destroy,
 	},
 	.set_irqmask         = mdp4_set_irqmask,
@@ -326,7 +326,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms,
 
 		if (priv->hdmi) {
 			/* Construct bridge/connector for HDMI: */
-			ret = hdmi_modeset_init(priv->hdmi, dev, encoder);
+			ret = msm_hdmi_modeset_init(priv->hdmi, dev, encoder);
 			if (ret) {
 				dev_err(dev->dev, "failed to initialize HDMI: %d\n", ret);
 				return ret;
@@ -457,10 +457,6 @@ fail:
 	return ret;
 }
 
-static const char *iommu_ports[] = {
-		"mdp_port0_cb0", "mdp_port1_cb0",
-};
-
 struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 {
 	struct platform_device *pdev = dev->platformdev;
@@ -565,6 +561,8 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 				ARRAY_SIZE(iommu_ports));
 		if (ret)
 			goto fail;
+
+		mdp4_kms->mmu = mmu;
 	} else {
 		dev_info(dev->dev, "no iommu, fallback to phys "
 				"contig buffers for scanout\n");
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
index d2c96ef431f4..b2828717be2a 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
@@ -45,6 +45,7 @@ struct mdp4_kms {
 	struct clk *pclk;
 	struct clk *lut_clk;
 	struct clk *axi_clk;
+	struct msm_mmu *mmu;
 
 	struct mdp_irq error_handler;
 
@@ -199,7 +200,6 @@ struct drm_plane *mdp4_plane_init(struct drm_device *dev,
 		enum mdp4_pipe pipe_id, bool private_plane);
 
 uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc);
-void mdp4_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file);
 void mdp4_crtc_set_config(struct drm_crtc *crtc, uint32_t config);
 void mdp4_crtc_set_intf(struct drm_crtc *crtc, enum mdp4_intf intf, int mixer);
 void mdp4_crtc_wait_for_commit_done(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
index cd63fedb67cc..bc3d8e719c6c 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
@@ -260,13 +260,6 @@ static void setup_phy(struct drm_encoder *encoder)
 	mdp4_write(mdp4_kms, REG_MDP4_LVDS_PHY_CFG0, lvds_phy_cfg0);
 }
 
-static bool mdp4_lcdc_encoder_mode_fixup(struct drm_encoder *encoder,
-		const struct drm_display_mode *mode,
-		struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void mdp4_lcdc_encoder_mode_set(struct drm_encoder *encoder,
 		struct drm_display_mode *mode,
 		struct drm_display_mode *adjusted_mode)
@@ -430,7 +423,6 @@ static void mdp4_lcdc_encoder_enable(struct drm_encoder *encoder)
 }
 
 static const struct drm_encoder_helper_funcs mdp4_lcdc_encoder_helper_funcs = {
-	.mode_fixup = mdp4_lcdc_encoder_mode_fixup,
 	.mode_set = mdp4_lcdc_encoder_mode_set,
 	.disable = mdp4_lcdc_encoder_disable,
 	.enable = mdp4_lcdc_encoder_enable,
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
index c37da9c61e29..b275ce11b24b 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
@@ -9,7 +9,7 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  37194 bytes, from 2015-09-18 12:07:28)
@@ -17,11 +17,12 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  29154 bytes, from 2015-08-10 21:25:43)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
 - /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
index 1aa21dba663d..69094cb28103 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
@@ -188,13 +188,6 @@ static const struct drm_encoder_funcs mdp5_cmd_encoder_funcs = {
 	.destroy = mdp5_cmd_encoder_destroy,
 };
 
-static bool mdp5_cmd_encoder_mode_fixup(struct drm_encoder *encoder,
-		const struct drm_display_mode *mode,
-		struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void mdp5_cmd_encoder_mode_set(struct drm_encoder *encoder,
 		struct drm_display_mode *mode,
 		struct drm_display_mode *adjusted_mode)
@@ -256,7 +249,6 @@ static void mdp5_cmd_encoder_enable(struct drm_encoder *encoder)
 }
 
 static const struct drm_encoder_helper_funcs mdp5_cmd_encoder_helper_funcs = {
-	.mode_fixup = mdp5_cmd_encoder_mode_fixup,
 	.mode_set = mdp5_cmd_encoder_mode_set,
 	.disable = mdp5_cmd_encoder_disable,
 	.enable = mdp5_cmd_encoder_enable,
@@ -340,4 +332,3 @@ fail:
 
 	return ERR_PTR(ret);
 }
-
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 20cee5ce4071..9673b9520b6a 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -185,13 +185,6 @@ static void mdp5_crtc_destroy(struct drm_crtc *crtc)
 	kfree(mdp5_crtc);
 }
 
-static bool mdp5_crtc_mode_fixup(struct drm_crtc *crtc,
-		const struct drm_display_mode *mode,
-		struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 /*
  * blend_setup() - blend all the planes of a CRTC
  *
@@ -468,13 +461,6 @@ static void mdp5_crtc_atomic_flush(struct drm_crtc *crtc,
 	request_pending(crtc, PENDING_FLIP);
 }
 
-static int mdp5_crtc_set_property(struct drm_crtc *crtc,
-		struct drm_property *property, uint64_t val)
-{
-	// XXX
-	return -EINVAL;
-}
-
 static void get_roi(struct drm_crtc *crtc, uint32_t *roi_w, uint32_t *roi_h)
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
@@ -625,7 +611,7 @@ static const struct drm_crtc_funcs mdp5_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.destroy = mdp5_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
-	.set_property = mdp5_crtc_set_property,
+	.set_property = drm_atomic_helper_crtc_set_property,
 	.reset = drm_atomic_helper_crtc_reset,
 	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
@@ -634,7 +620,6 @@ static const struct drm_crtc_funcs mdp5_crtc_funcs = {
 };
 
 static const struct drm_crtc_helper_funcs mdp5_crtc_helper_funcs = {
-	.mode_fixup = mdp5_crtc_mode_fixup,
 	.mode_set_nofb = mdp5_crtc_mode_set_nofb,
 	.disable = mdp5_crtc_disable,
 	.enable = mdp5_crtc_enable,
@@ -721,12 +706,6 @@ uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc)
 	return mdp5_crtc->vblank.irqmask;
 }
 
-void mdp5_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file)
-{
-	DBG("cancel: %p", file);
-	complete_flip(crtc, file);
-}
-
 void mdp5_crtc_set_pipeline(struct drm_crtc *crtc,
 		struct mdp5_interface *intf, struct mdp5_ctl *ctl)
 {
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
index 0d737cad03a6..1d95f9fd9dc7 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
@@ -112,13 +112,6 @@ static const struct drm_encoder_funcs mdp5_encoder_funcs = {
 	.destroy = mdp5_encoder_destroy,
 };
 
-static bool mdp5_encoder_mode_fixup(struct drm_encoder *encoder,
-		const struct drm_display_mode *mode,
-		struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void mdp5_encoder_mode_set(struct drm_encoder *encoder,
 		struct drm_display_mode *mode,
 		struct drm_display_mode *adjusted_mode)
@@ -287,7 +280,6 @@ static void mdp5_encoder_enable(struct drm_encoder *encoder)
 }
 
 static const struct drm_encoder_helper_funcs mdp5_encoder_helper_funcs = {
-	.mode_fixup = mdp5_encoder_mode_fixup,
 	.mode_set = mdp5_encoder_mode_set,
 	.disable = mdp5_encoder_disable,
 	.enable = mdp5_encoder_enable,
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index e115318402bd..484b4d15e71d 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -117,16 +117,6 @@ static int mdp5_set_split_display(struct msm_kms *kms,
 		return mdp5_encoder_set_split_display(encoder, slave_encoder);
 }
 
-static void mdp5_preclose(struct msm_kms *kms, struct drm_file *file)
-{
-	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
-	struct msm_drm_private *priv = mdp5_kms->dev->dev_private;
-	unsigned i;
-
-	for (i = 0; i < priv->num_crtcs; i++)
-		mdp5_crtc_cancel_pending_flip(priv->crtcs[i], file);
-}
-
 static void mdp5_destroy(struct msm_kms *kms)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
@@ -164,7 +154,6 @@ static const struct mdp_kms_funcs kms_funcs = {
 		.get_format      = mdp_get_format,
 		.round_pixclk    = mdp5_round_pixclk,
 		.set_split_display = mdp5_set_split_display,
-		.preclose        = mdp5_preclose,
 		.destroy         = mdp5_destroy,
 	},
 	.set_irqmask         = mdp5_set_irqmask,
@@ -295,7 +284,7 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num)
 			break;
 		}
 
-		ret = hdmi_modeset_init(priv->hdmi, dev, encoder);
+		ret = msm_hdmi_modeset_init(priv->hdmi, dev, encoder);
 		break;
 	case INTF_DSI:
 	{
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
index 00730ba08a60..9a25898239d3 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
@@ -211,7 +211,6 @@ struct drm_plane *mdp5_plane_init(struct drm_device *dev,
 uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc);
 
 int mdp5_crtc_get_lm(struct drm_crtc *crtc);
-void mdp5_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file);
 void mdp5_crtc_set_pipeline(struct drm_crtc *crtc,
 		struct mdp5_interface *intf, struct mdp5_ctl *ctl);
 void mdp5_crtc_wait_for_commit_done(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/msm/mdp/mdp_common.xml.h b/drivers/gpu/drm/msm/mdp/mdp_common.xml.h
index 0aec1ac1f6d0..452e3518f98b 100644
--- a/drivers/gpu/drm/msm/mdp/mdp_common.xml.h
+++ b/drivers/gpu/drm/msm/mdp/mdp_common.xml.h
@@ -9,7 +9,7 @@ git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
 - /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  37194 bytes, from 2015-09-18 12:07:28)
@@ -17,11 +17,12 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  29154 bytes, from 2015-08-10 21:25:43)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
 - /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
+- Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 9a30807b900b..d52910e2c26c 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -61,7 +61,7 @@ module_param(fbdev, bool, 0600);
 #endif
 
 static char *vram = "16m";
-MODULE_PARM_DESC(vram, "Configure VRAM size (for devices without IOMMU/GPUMMU");
+MODULE_PARM_DESC(vram, "Configure VRAM size (for devices without IOMMU/GPUMMU)");
 module_param(vram, charp, 0);
 
 /*
@@ -196,6 +196,11 @@ static int msm_unload(struct drm_device *dev)
 	}
 
 	drm_kms_helper_poll_fini(dev);
+
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+	if (fbdev && priv->fbdev)
+		msm_fbdev_free(dev);
+#endif
 	drm_mode_config_cleanup(dev);
 	drm_vblank_cleanup(dev);
 
@@ -1116,7 +1121,7 @@ static int __init msm_drm_register(void)
 	DBG("init");
 	msm_dsi_register();
 	msm_edp_register();
-	hdmi_register();
+	msm_hdmi_register();
 	adreno_register();
 	return platform_driver_register(&msm_platform_driver);
 }
@@ -1125,7 +1130,7 @@ static void __exit msm_drm_unregister(void)
 {
 	DBG("fini");
 	platform_driver_unregister(&msm_platform_driver);
-	hdmi_unregister();
+	msm_hdmi_unregister();
 	adreno_unregister();
 	msm_edp_unregister();
 	msm_dsi_unregister();
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index c1e7bba2fdb7..870dbe58c259 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -240,12 +240,13 @@ struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev,
 		struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd);
 
 struct drm_fb_helper *msm_fbdev_init(struct drm_device *dev);
+void msm_fbdev_free(struct drm_device *dev);
 
 struct hdmi;
-int hdmi_modeset_init(struct hdmi *hdmi, struct drm_device *dev,
+int msm_hdmi_modeset_init(struct hdmi *hdmi, struct drm_device *dev,
 		struct drm_encoder *encoder);
-void __init hdmi_register(void);
-void __exit hdmi_unregister(void);
+void __init msm_hdmi_register(void);
+void __exit msm_hdmi_unregister(void);
 
 struct msm_edp;
 void __init msm_edp_register(void);
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index d95af6eba602..d9759bf3482e 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -62,12 +62,8 @@ static int msm_fbdev_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	struct drm_fb_helper *helper = (struct drm_fb_helper *)info->par;
 	struct msm_fbdev *fbdev = to_msm_fbdev(helper);
 	struct drm_gem_object *drm_obj = fbdev->bo;
-	struct drm_device *dev = helper->dev;
 	int ret = 0;
 
-	if (drm_device_is_unplugged(dev))
-		return -ENODEV;
-
 	ret = drm_gem_mmap_obj(drm_obj, drm_obj->size, vma);
 	if (ret) {
 		pr_err("%s:drm_gem_mmap_obj fail\n", __func__);
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 6d7cd3fe21e7..43d2181231c0 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -323,28 +323,27 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct drm_msm_gem_submit *args = data;
 	struct msm_file_private *ctx = file->driver_priv;
 	struct msm_gem_submit *submit;
-	struct msm_gpu *gpu;
+	struct msm_gpu *gpu = priv->gpu;
 	unsigned i;
 	int ret;
 
+	if (!gpu)
+		return -ENXIO;
+
 	/* for now, we just have 3d pipe.. eventually this would need to
 	 * be more clever to dispatch to appropriate gpu module:
 	 */
 	if (args->pipe != MSM_PIPE_3D0)
 		return -EINVAL;
 
-	gpu = priv->gpu;
-
 	if (args->nr_cmds > MAX_CMDS)
 		return -EINVAL;
 
-	mutex_lock(&dev->struct_mutex);
-
 	submit = submit_create(dev, gpu, args->nr_bos);
-	if (!submit) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!submit)
+		return -ENOMEM;
+
+	mutex_lock(&dev->struct_mutex);
 
 	ret = submit_lookup_objects(submit, args, file);
 	if (ret)
@@ -419,8 +418,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	args->fence = submit->fence;
 
 out:
-	if (submit)
-		submit_cleanup(submit, !!ret);
+	submit_cleanup(submit, !!ret);
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 7ac2f1997e4a..a7a0b6d9b057 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -31,13 +31,15 @@ static int msm_fault_handler(struct iommu_domain *iommu, struct device *dev,
 	return 0;
 }
 
-static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt)
+static int msm_iommu_attach(struct msm_mmu *mmu, const char * const *names,
+			    int cnt)
 {
 	struct msm_iommu *iommu = to_msm_iommu(mmu);
 	return iommu_attach_device(iommu->domain, mmu->dev);
 }
 
-static void msm_iommu_detach(struct msm_mmu *mmu, const char **names, int cnt)
+static void msm_iommu_detach(struct msm_mmu *mmu, const char * const *names,
+			     int cnt)
 {
 	struct msm_iommu *iommu = to_msm_iommu(mmu);
 	iommu_detach_device(iommu->domain, mmu->dev);
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
index 7cd88d9dc155..b8ca9a0e9170 100644
--- a/drivers/gpu/drm/msm/msm_mmu.h
+++ b/drivers/gpu/drm/msm/msm_mmu.h
@@ -21,8 +21,8 @@
 #include <linux/iommu.h>
 
 struct msm_mmu_funcs {
-	int (*attach)(struct msm_mmu *mmu, const char **names, int cnt);
-	void (*detach)(struct msm_mmu *mmu, const char **names, int cnt);
+	int (*attach)(struct msm_mmu *mmu, const char * const *names, int cnt);
+	void (*detach)(struct msm_mmu *mmu, const char * const *names, int cnt);
 	int (*map)(struct msm_mmu *mmu, uint32_t iova, struct sg_table *sgt,
 			unsigned len, int prot);
 	int (*unmap)(struct msm_mmu *mmu, uint32_t iova, struct sg_table *sgt,
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index 6f04397d43a7..55ccbf006b5e 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -227,13 +227,6 @@ nv_crtc_dpms(struct drm_crtc *crtc, int mode)
 	NVWriteVgaCrtc(dev, nv_crtc->index, NV_CIO_CRE_RPC1_INDEX, crtc1A);
 }
 
-static bool
-nv_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode,
-		   struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void
 nv_crtc_mode_set_vga(struct drm_crtc *crtc, struct drm_display_mode *mode)
 {
@@ -1093,7 +1086,6 @@ static const struct drm_crtc_helper_funcs nv04_crtc_helper_funcs = {
 	.dpms = nv_crtc_dpms,
 	.prepare = nv_crtc_prepare,
 	.commit = nv_crtc_commit,
-	.mode_fixup = nv_crtc_mode_fixup,
 	.mode_set = nv_crtc_mode_set,
 	.mode_set_base = nv04_crtc_mode_set_base,
 	.mode_set_base_atomic = nv04_crtc_mode_set_base_atomic,
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
index 85b7827eb782..46301ec018ce 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
@@ -3,19 +3,27 @@
 
 struct kepler_channel_gpfifo_a_v0 {
 	__u8  version;
-#define KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_GR                               0x01
-#define KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_MSPDEC                           0x02
-#define KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_MSPPP                            0x04
-#define KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_MSVLD                            0x08
-#define KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_CE0                              0x10
-#define KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_CE1                              0x20
-#define KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_ENC                              0x40
-	__u8  engine;
+	__u8  pad01[5];
 	__u16 chid;
+#define NVA06F_V0_ENGINE_SW                                          0x00000001
+#define NVA06F_V0_ENGINE_GR                                          0x00000002
+#define NVA06F_V0_ENGINE_SEC                                         0x00000004
+#define NVA06F_V0_ENGINE_MSVLD                                       0x00000010
+#define NVA06F_V0_ENGINE_MSPDEC                                      0x00000020
+#define NVA06F_V0_ENGINE_MSPPP                                       0x00000040
+#define NVA06F_V0_ENGINE_MSENC                                       0x00000080
+#define NVA06F_V0_ENGINE_VIC                                         0x00000100
+#define NVA06F_V0_ENGINE_NVDEC                                       0x00000200
+#define NVA06F_V0_ENGINE_NVENC0                                      0x00000400
+#define NVA06F_V0_ENGINE_NVENC1                                      0x00000800
+#define NVA06F_V0_ENGINE_CE0                                         0x00010000
+#define NVA06F_V0_ENGINE_CE1                                         0x00020000
+#define NVA06F_V0_ENGINE_CE2                                         0x00040000
+	__u32 engines;
 	__u32 ilength;
 	__u64 ioffset;
 	__u64 vm;
 };
 
-#define KEPLER_CHANNEL_GPFIFO_A_V0_NTFY_UEVENT                             0x00
+#define NVA06F_V0_NTFY_UEVENT                                              0x00
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index 4179cd65ac0a..982aad8fa645 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -37,6 +37,7 @@
 #define G82_CHANNEL_GPFIFO                            /* cl826f.h */ 0x0000826f
 #define FERMI_CHANNEL_GPFIFO                          /* cl906f.h */ 0x0000906f
 #define KEPLER_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000a06f
+#define KEPLER_CHANNEL_GPFIFO_B                       /* cla06f.h */ 0x0000a16f
 #define MAXWELL_CHANNEL_GPFIFO_A                      /* cla06f.h */ 0x0000b06f
 
 #define NV50_DISP                                     /* cl5070.h */ 0x00005070
@@ -48,7 +49,7 @@
 #define GK104_DISP                                    /* cl5070.h */ 0x00009170
 #define GK110_DISP                                    /* cl5070.h */ 0x00009270
 #define GM107_DISP                                    /* cl5070.h */ 0x00009470
-#define GM204_DISP                                    /* cl5070.h */ 0x00009570
+#define GM200_DISP                                    /* cl5070.h */ 0x00009570
 
 #define NV31_MPEG                                                    0x00003174
 #define G82_MPEG                                                     0x00008274
@@ -84,7 +85,7 @@
 #define GK104_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000917d
 #define GK110_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000927d
 #define GM107_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000947d
-#define GM204_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000957d
+#define GM200_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000957d
 
 #define NV50_DISP_OVERLAY_CHANNEL_DMA                 /* cl507e.h */ 0x0000507e
 #define G82_DISP_OVERLAY_CHANNEL_DMA                  /* cl507e.h */ 0x0000827e
diff --git a/drivers/gpu/drm/nouveau/include/nvif/device.h b/drivers/gpu/drm/nouveau/include/nvif/device.h
index e0ed2f4b2f43..bcb981711617 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/device.h
@@ -62,6 +62,7 @@ u64  nvif_device_time(struct nvif_device *);
 #define nvxx_gpio(a) nvxx_device(a)->gpio
 #define nvxx_clk(a) nvxx_device(a)->clk
 #define nvxx_i2c(a) nvxx_device(a)->i2c
+#define nvxx_iccsense(a) nvxx_device(a)->iccsense
 #define nvxx_therm(a) nvxx_device(a)->therm
 #define nvxx_volt(a) nvxx_device(a)->volt
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index 913192c94876..4993a863adb9 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -22,30 +22,41 @@ enum nvkm_devidx {
 	NVKM_SUBDEV_BAR,
 	NVKM_SUBDEV_PMU,
 	NVKM_SUBDEV_VOLT,
+	NVKM_SUBDEV_ICCSENSE,
 	NVKM_SUBDEV_THERM,
 	NVKM_SUBDEV_CLK,
+	NVKM_SUBDEV_SECBOOT,
 
-	NVKM_ENGINE_DMAOBJ,
-	NVKM_ENGINE_IFB,
-	NVKM_ENGINE_FIFO,
-	NVKM_ENGINE_SW,
-	NVKM_ENGINE_GR,
-	NVKM_ENGINE_MPEG,
-	NVKM_ENGINE_ME,
-	NVKM_ENGINE_VP,
-	NVKM_ENGINE_CIPHER,
 	NVKM_ENGINE_BSP,
-	NVKM_ENGINE_MSPPP,
+
 	NVKM_ENGINE_CE0,
 	NVKM_ENGINE_CE1,
 	NVKM_ENGINE_CE2,
-	NVKM_ENGINE_VIC,
-	NVKM_ENGINE_MSENC,
+	NVKM_ENGINE_CE_LAST = NVKM_ENGINE_CE2,
+
+	NVKM_ENGINE_CIPHER,
 	NVKM_ENGINE_DISP,
-	NVKM_ENGINE_PM,
+	NVKM_ENGINE_DMAOBJ,
+	NVKM_ENGINE_FIFO,
+	NVKM_ENGINE_GR,
+	NVKM_ENGINE_IFB,
+	NVKM_ENGINE_ME,
+	NVKM_ENGINE_MPEG,
+	NVKM_ENGINE_MSENC,
+	NVKM_ENGINE_MSPDEC,
+	NVKM_ENGINE_MSPPP,
 	NVKM_ENGINE_MSVLD,
+
+	NVKM_ENGINE_NVENC0,
+	NVKM_ENGINE_NVENC1,
+	NVKM_ENGINE_NVENC_LAST = NVKM_ENGINE_NVENC1,
+
+	NVKM_ENGINE_NVDEC,
+	NVKM_ENGINE_PM,
 	NVKM_ENGINE_SEC,
-	NVKM_ENGINE_MSPDEC,
+	NVKM_ENGINE_SW,
+	NVKM_ENGINE_VIC,
+	NVKM_ENGINE_VP,
 
 	NVKM_SUBDEV_NR
 };
@@ -109,6 +120,7 @@ struct nvkm_device {
 	struct nvkm_gpio *gpio;
 	struct nvkm_i2c *i2c;
 	struct nvkm_subdev *ibus;
+	struct nvkm_iccsense *iccsense;
 	struct nvkm_instmem *imem;
 	struct nvkm_ltc *ltc;
 	struct nvkm_mc *mc;
@@ -116,6 +128,7 @@ struct nvkm_device {
 	struct nvkm_subdev *mxm;
 	struct nvkm_pci *pci;
 	struct nvkm_pmu *pmu;
+	struct nvkm_secboot *secboot;
 	struct nvkm_therm *therm;
 	struct nvkm_timer *timer;
 	struct nvkm_volt *volt;
@@ -134,6 +147,8 @@ struct nvkm_device {
 	struct nvkm_engine *mspdec;
 	struct nvkm_engine *msppp;
 	struct nvkm_engine *msvld;
+	struct nvkm_engine *nvenc[2];
+	struct nvkm_engine *nvdec;
 	struct nvkm_pm *pm;
 	struct nvkm_engine *sec;
 	struct nvkm_sw *sw;
@@ -164,46 +179,50 @@ struct nvkm_device_quirk {
 struct nvkm_device_chip {
 	const char *name;
 
-	int (*bar    )(struct nvkm_device *, int idx, struct nvkm_bar **);
-	int (*bios   )(struct nvkm_device *, int idx, struct nvkm_bios **);
-	int (*bus    )(struct nvkm_device *, int idx, struct nvkm_bus **);
-	int (*clk    )(struct nvkm_device *, int idx, struct nvkm_clk **);
-	int (*devinit)(struct nvkm_device *, int idx, struct nvkm_devinit **);
-	int (*fb     )(struct nvkm_device *, int idx, struct nvkm_fb **);
-	int (*fuse   )(struct nvkm_device *, int idx, struct nvkm_fuse **);
-	int (*gpio   )(struct nvkm_device *, int idx, struct nvkm_gpio **);
-	int (*i2c    )(struct nvkm_device *, int idx, struct nvkm_i2c **);
-	int (*ibus   )(struct nvkm_device *, int idx, struct nvkm_subdev **);
-	int (*imem   )(struct nvkm_device *, int idx, struct nvkm_instmem **);
-	int (*ltc    )(struct nvkm_device *, int idx, struct nvkm_ltc **);
-	int (*mc     )(struct nvkm_device *, int idx, struct nvkm_mc **);
-	int (*mmu    )(struct nvkm_device *, int idx, struct nvkm_mmu **);
-	int (*mxm    )(struct nvkm_device *, int idx, struct nvkm_subdev **);
-	int (*pci    )(struct nvkm_device *, int idx, struct nvkm_pci **);
-	int (*pmu    )(struct nvkm_device *, int idx, struct nvkm_pmu **);
-	int (*therm  )(struct nvkm_device *, int idx, struct nvkm_therm **);
-	int (*timer  )(struct nvkm_device *, int idx, struct nvkm_timer **);
-	int (*volt   )(struct nvkm_device *, int idx, struct nvkm_volt **);
-
-	int (*bsp    )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*ce[3]  )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*cipher )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*disp   )(struct nvkm_device *, int idx, struct nvkm_disp **);
-	int (*dma    )(struct nvkm_device *, int idx, struct nvkm_dma **);
-	int (*fifo   )(struct nvkm_device *, int idx, struct nvkm_fifo **);
-	int (*gr     )(struct nvkm_device *, int idx, struct nvkm_gr **);
-	int (*ifb    )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*me     )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*mpeg   )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*msenc  )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*mspdec )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*msppp  )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*msvld  )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*pm     )(struct nvkm_device *, int idx, struct nvkm_pm **);
-	int (*sec    )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*sw     )(struct nvkm_device *, int idx, struct nvkm_sw **);
-	int (*vic    )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*vp     )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*bar     )(struct nvkm_device *, int idx, struct nvkm_bar **);
+	int (*bios    )(struct nvkm_device *, int idx, struct nvkm_bios **);
+	int (*bus     )(struct nvkm_device *, int idx, struct nvkm_bus **);
+	int (*clk     )(struct nvkm_device *, int idx, struct nvkm_clk **);
+	int (*devinit )(struct nvkm_device *, int idx, struct nvkm_devinit **);
+	int (*fb      )(struct nvkm_device *, int idx, struct nvkm_fb **);
+	int (*fuse    )(struct nvkm_device *, int idx, struct nvkm_fuse **);
+	int (*gpio    )(struct nvkm_device *, int idx, struct nvkm_gpio **);
+	int (*i2c     )(struct nvkm_device *, int idx, struct nvkm_i2c **);
+	int (*ibus    )(struct nvkm_device *, int idx, struct nvkm_subdev **);
+	int (*iccsense)(struct nvkm_device *, int idx, struct nvkm_iccsense **);
+	int (*imem    )(struct nvkm_device *, int idx, struct nvkm_instmem **);
+	int (*ltc     )(struct nvkm_device *, int idx, struct nvkm_ltc **);
+	int (*mc      )(struct nvkm_device *, int idx, struct nvkm_mc **);
+	int (*mmu     )(struct nvkm_device *, int idx, struct nvkm_mmu **);
+	int (*mxm     )(struct nvkm_device *, int idx, struct nvkm_subdev **);
+	int (*pci     )(struct nvkm_device *, int idx, struct nvkm_pci **);
+	int (*pmu     )(struct nvkm_device *, int idx, struct nvkm_pmu **);
+	int (*secboot )(struct nvkm_device *, int idx, struct nvkm_secboot **);
+	int (*therm   )(struct nvkm_device *, int idx, struct nvkm_therm **);
+	int (*timer   )(struct nvkm_device *, int idx, struct nvkm_timer **);
+	int (*volt    )(struct nvkm_device *, int idx, struct nvkm_volt **);
+
+	int (*bsp     )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*ce[3]   )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*cipher  )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*disp    )(struct nvkm_device *, int idx, struct nvkm_disp **);
+	int (*dma     )(struct nvkm_device *, int idx, struct nvkm_dma **);
+	int (*fifo    )(struct nvkm_device *, int idx, struct nvkm_fifo **);
+	int (*gr      )(struct nvkm_device *, int idx, struct nvkm_gr **);
+	int (*ifb     )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*me      )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*mpeg    )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*msenc   )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*mspdec  )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*msppp   )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*msvld   )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*nvenc[2])(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*nvdec   )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*pm      )(struct nvkm_device *, int idx, struct nvkm_pm **);
+	int (*sec     )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*sw      )(struct nvkm_device *, int idx, struct nvkm_sw **);
+	int (*vic     )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*vp      )(struct nvkm_device *, int idx, struct nvkm_engine **);
 };
 
 struct nvkm_device *nvkm_device_find(u64 name);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h b/drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h
new file mode 100644
index 000000000000..a626ce378f04
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h
@@ -0,0 +1,11 @@
+#ifndef __NVKM_FIRMWARE_H__
+#define __NVKM_FIRMWARE_H__
+
+#include <core/device.h>
+
+int nvkm_firmware_get(struct nvkm_device *device, const char *fwname,
+		      const struct firmware **fw);
+
+void nvkm_firmware_put(const struct firmware *fw);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h b/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h
index d4f56eafb073..c23da4f05929 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/gpuobj.h
@@ -37,4 +37,8 @@ int nvkm_gpuobj_wrap(struct nvkm_memory *, struct nvkm_gpuobj **);
 int nvkm_gpuobj_map(struct nvkm_gpuobj *, struct nvkm_vm *, u32 access,
 		    struct nvkm_vma *);
 void nvkm_gpuobj_unmap(struct nvkm_vma *);
+void nvkm_gpuobj_memcpy_to(struct nvkm_gpuobj *dst, u32 dstoffset, void *src,
+			   u32 length);
+void nvkm_gpuobj_memcpy_from(void *dst, struct nvkm_gpuobj *src, u32 srcoffset,
+			     u32 length);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
index e2e22cd5305b..594d719ba41e 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
@@ -5,5 +5,6 @@
 int gt215_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gf100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gk104_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
-int gm204_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int gm107_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int gm200_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
index efc74d03346b..d4fdce27b297 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
@@ -31,5 +31,5 @@ int gf119_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gk104_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gk110_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gm107_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
-int gm204_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int gm200_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
index 9e6644955d19..15ddfcf5e8db 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
@@ -60,8 +60,10 @@ int nv50_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int g84_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gf100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gk104_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
+int gk110_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gk208_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gk20a_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
-int gm204_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
+int gm107_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
+int gm200_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gm20b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
index f126e54d2e30..6515f5810a26 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
@@ -40,7 +40,6 @@ int gk110b_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gk208_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gk20a_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gm107_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
-int gm204_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
-int gm206_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
+int gm200_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gm20b_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/msenc.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/msenc.h
new file mode 100644
index 000000000000..748ea9b7e559
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/msenc.h
@@ -0,0 +1,4 @@
+#ifndef __NVKM_MSENC_H__
+#define __NVKM_MSENC_H__
+#include <core/engine.h>
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h
new file mode 100644
index 000000000000..30b76d13fdcb
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h
@@ -0,0 +1,4 @@
+#ifndef __NVKM_NVDEC_H__
+#define __NVKM_NVDEC_H__
+#include <core/engine.h>
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvenc.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvenc.h
new file mode 100644
index 000000000000..8a819328059b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvenc.h
@@ -0,0 +1,4 @@
+#ifndef __NVKM_NVENC_H__
+#define __NVKM_NVENC_H__
+#include <core/engine.h>
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/vic.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/vic.h
new file mode 100644
index 000000000000..2b0dc4c695c2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/vic.h
@@ -0,0 +1,4 @@
+#ifndef __NVKM_VIC_H__
+#define __NVKM_VIC_H__
+#include <core/engine.h>
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/extdev.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/extdev.h
index 6d3bedc633b3..bb49bd5f879e 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/extdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/extdev.h
@@ -5,6 +5,9 @@ enum nvbios_extdev_type {
 	NVBIOS_EXTDEV_VT1103M		= 0x40,
 	NVBIOS_EXTDEV_PX3540		= 0x41,
 	NVBIOS_EXTDEV_VT1105M		= 0x42, /* or close enough... */
+	NVBIOS_EXTDEV_INA219		= 0x4c,
+	NVBIOS_EXTDEV_INA209		= 0x4d,
+	NVBIOS_EXTDEV_INA3221		= 0x4e,
 	NVBIOS_EXTDEV_ADT7473		= 0x70, /* can also be a LM64 */
 	NVBIOS_EXTDEV_HDCP_EEPROM	= 0x90,
 	NVBIOS_EXTDEV_NONE		= 0xff,
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/iccsense.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/iccsense.h
new file mode 100644
index 000000000000..9cb97477248b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/iccsense.h
@@ -0,0 +1,16 @@
+#ifndef __NVBIOS_ICCSENSE_H__
+#define __NVBIOS_ICCSENSE_H__
+struct pwr_rail_t {
+	u8 mode;
+	u8 extdev_id;
+	u8 resistor_mohm;
+	u8 rail;
+};
+
+struct nvbios_iccsense {
+	int nr_entry;
+	struct pwr_rail_t *rail;
+};
+
+int nvbios_iccsense_parse(struct nvkm_bios *, struct nvbios_iccsense *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h
index 6b33bc058924..fb54417bc458 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h
@@ -121,4 +121,5 @@ int gt215_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int gf100_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int gk104_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int gk20a_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
+int gm20b_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
index 6c1407fd317b..193626c69517 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
@@ -27,5 +27,5 @@ int gt215_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int mcp89_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gf100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gm107_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
-int gm204_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
+int gm200_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
index 6b6224dbd5bb..a63c5ac69f66 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
@@ -89,7 +89,7 @@ int g94_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
 int gf117_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
 int gf119_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
 int gk104_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
-int gm204_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
+int gm200_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **);
 
 static inline int
 nvkm_rdi2cr(struct i2c_adapter *adap, u8 addr, u8 reg)
@@ -108,6 +108,22 @@ nvkm_rdi2cr(struct i2c_adapter *adap, u8 addr, u8 reg)
 }
 
 static inline int
+nv_rd16i2cr(struct i2c_adapter *adap, u8 addr, u8 reg)
+{
+	u8 val[2];
+	struct i2c_msg msgs[] = {
+		{ .addr = addr, .flags = 0, .len = 1, .buf = &reg },
+		{ .addr = addr, .flags = I2C_M_RD, .len = 2, .buf = val },
+	};
+
+	int ret = i2c_transfer(adap, msgs, ARRAY_SIZE(msgs));
+	if (ret != 2)
+		return -EIO;
+
+	return val[0] << 8 | val[1];
+}
+
+static inline int
 nvkm_wri2cr(struct i2c_adapter *adap, u8 addr, u8 reg, u8 val)
 {
 	u8 buf[2] = { reg, val };
@@ -122,6 +138,21 @@ nvkm_wri2cr(struct i2c_adapter *adap, u8 addr, u8 reg, u8 val)
 	return 0;
 }
 
+static inline int
+nv_wr16i2cr(struct i2c_adapter *adap, u8 addr, u8 reg, u16 val)
+{
+	u8 buf[3] = { reg, val >> 8, val & 0xff};
+	struct i2c_msg msgs[] = {
+		{ .addr = addr, .flags = 0, .len = 3, .buf = buf },
+	};
+
+	int ret = i2c_transfer(adap, msgs, ARRAY_SIZE(msgs));
+	if (ret != 1)
+		return -EIO;
+
+	return 0;
+}
+
 static inline bool
 nvkm_probe_i2c(struct i2c_adapter *adap, u8 addr)
 {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h
index ea23e24a246c..c4ecf255ff39 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h
@@ -6,5 +6,5 @@ int gf100_ibus_new(struct nvkm_device *, int, struct nvkm_subdev **);
 int gf117_ibus_new(struct nvkm_device *, int, struct nvkm_subdev **);
 int gk104_ibus_new(struct nvkm_device *, int, struct nvkm_subdev **);
 int gk20a_ibus_new(struct nvkm_device *, int, struct nvkm_subdev **);
-int gm204_ibus_new(struct nvkm_device *, int, struct nvkm_subdev **);
+int gm200_ibus_new(struct nvkm_device *, int, struct nvkm_subdev **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h
new file mode 100644
index 000000000000..530c6215fe4f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h
@@ -0,0 +1,17 @@
+#ifndef __NVKM_ICCSENSE_H__
+#define __NVKM_ICCSENSE_H__
+
+#include <core/subdev.h>
+
+struct nkvm_iccsense_rail;
+struct nvkm_iccsense {
+	struct nvkm_subdev subdev;
+	u8 rail_count;
+	bool data_valid;
+	struct nvkm_iccsense_rail *rails;
+};
+
+int gf100_iccsense_new(struct nvkm_device *, int index, struct nvkm_iccsense **);
+int nvkm_iccsense_read(struct nvkm_iccsense *iccsense, u8 idx);
+int nvkm_iccsense_read_all(struct nvkm_iccsense *iccsense);
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
index 0ffa2ec106d6..c6b90b6543b3 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
@@ -37,5 +37,5 @@ int gf100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gk104_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gk20a_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
-int gm204_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+int gm200_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
new file mode 100644
index 000000000000..c6edd95a5b69
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECURE_BOOT_H__
+#define __NVKM_SECURE_BOOT_H__
+
+#include <core/subdev.h>
+
+enum nvkm_secboot_falcon {
+	NVKM_SECBOOT_FALCON_PMU	= 0,
+	NVKM_SECBOOT_FALCON_RESERVED = 1,
+	NVKM_SECBOOT_FALCON_FECS = 2,
+	NVKM_SECBOOT_FALCON_GPCCS = 3,
+	NVKM_SECBOOT_FALCON_END = 4,
+	NVKM_SECBOOT_FALCON_INVALID = 0xffffffff,
+};
+
+/**
+ * @base:		base IO address of the falcon performing secure boot
+ * @irq_mask:		IRQ mask of the falcon performing secure boot
+ * @enable_mask:	enable mask of the falcon performing secure boot
+*/
+struct nvkm_secboot {
+	const struct nvkm_secboot_func *func;
+	struct nvkm_subdev subdev;
+
+	u32 base;
+	u32 irq_mask;
+	u32 enable_mask;
+};
+#define nvkm_secboot(p) container_of((p), struct nvkm_secboot, subdev)
+
+bool nvkm_secboot_is_managed(struct nvkm_secboot *, enum nvkm_secboot_falcon);
+int nvkm_secboot_reset(struct nvkm_secboot *, u32 falcon);
+int nvkm_secboot_start(struct nvkm_secboot *, u32 falcon);
+
+int gm200_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **);
+int gm20b_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
index b458d046dba7..feff55cff05b 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
@@ -20,4 +20,5 @@ int nvkm_volt_set_id(struct nvkm_volt *, u8 id, int condition);
 int nv40_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk104_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk20a_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
+int gm20b_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 50f52ffe5b0c..a59e524c028c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -263,13 +263,23 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	/* hack to allow channel engine type specification on kepler */
 	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
 		if (init->fb_ctxdma_handle != ~0)
-			init->fb_ctxdma_handle = KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_GR;
-		else
-			init->fb_ctxdma_handle = init->tt_ctxdma_handle;
+			init->fb_ctxdma_handle = NVA06F_V0_ENGINE_GR;
+		else {
+			init->fb_ctxdma_handle = 0;
+#define _(A,B) if (init->tt_ctxdma_handle & (A)) init->fb_ctxdma_handle |= (B)
+			_(0x01, NVA06F_V0_ENGINE_GR);
+			_(0x02, NVA06F_V0_ENGINE_MSPDEC);
+			_(0x04, NVA06F_V0_ENGINE_MSPPP);
+			_(0x08, NVA06F_V0_ENGINE_MSVLD);
+			_(0x10, NVA06F_V0_ENGINE_CE0);
+			_(0x20, NVA06F_V0_ENGINE_CE1);
+			_(0x40, NVA06F_V0_ENGINE_MSENC);
+#undef _
+		}
 
 		/* allow flips to be executed if this is a graphics channel */
 		init->tt_ctxdma_handle = 0;
-		if (init->fb_ctxdma_handle == KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_GR)
+		if (init->fb_ctxdma_handle == NVA06F_V0_ENGINE_GR)
 			init->tt_ctxdma_handle = 1;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index d5e6938cc6bc..cdf522770cfa 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -314,7 +314,7 @@ void nouveau_register_dsm_handler(void)
 	if (!r)
 		return;
 
-	vga_switcheroo_register_handler(&nouveau_dsm_handler);
+	vga_switcheroo_register_handler(&nouveau_dsm_handler, 0);
 }
 
 /* Must be called for Optimus models before the card can be turned off */
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index e3acc35e3805..2cdaea58678d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1502,7 +1502,7 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
 	}
 #endif
 
-#ifdef CONFIG_SWIOTLB
+#if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
 	if (swiotlb_nr_tbl()) {
 		return ttm_dma_populate((void *)ttm, dev->dev);
 	}
@@ -1570,7 +1570,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	}
 #endif
 
-#ifdef CONFIG_SWIOTLB
+#if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
 	if (swiotlb_nr_tbl()) {
 		ttm_dma_unpopulate((void *)ttm, dev->dev);
 		return;
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 3f804a8c590c..879655c03ae9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -192,6 +192,7 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 		    u32 engine, struct nouveau_channel **pchan)
 {
 	static const u16 oclasses[] = { MAXWELL_CHANNEL_GPFIFO_A,
+					KEPLER_CHANNEL_GPFIFO_B,
 					KEPLER_CHANNEL_GPFIFO_A,
 					FERMI_CHANNEL_GPFIFO,
 					G82_CHANNEL_GPFIFO,
@@ -217,7 +218,7 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 	do {
 		if (oclass[0] >= KEPLER_CHANNEL_GPFIFO_A) {
 			args.kepler.version = 0;
-			args.kepler.engine  = engine;
+			args.kepler.engines = engine;
 			args.kepler.ilength = 0x02000;
 			args.kepler.ioffset = 0x10000 + chan->push.vma.offset;
 			args.kepler.vm = 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index fcebfae5d426..ae96ebc490fb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -27,6 +27,7 @@
 #include <acpi/button.h>
 
 #include <linux/pm_runtime.h>
+#include <linux/vga_switcheroo.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_edid.h>
@@ -153,6 +154,17 @@ nouveau_connector_ddc_detect(struct drm_connector *connector)
 			if (ret == 0)
 				break;
 		} else
+		if ((vga_switcheroo_handler_flags() &
+		     VGA_SWITCHEROO_CAN_SWITCH_DDC) &&
+		    nv_encoder->dcb->type == DCB_OUTPUT_LVDS &&
+		    nv_encoder->i2c) {
+			int ret;
+			vga_switcheroo_lock_ddc(dev->pdev);
+			ret = nvkm_probe_i2c(nv_encoder->i2c, 0x50);
+			vga_switcheroo_unlock_ddc(dev->pdev);
+			if (ret)
+				break;
+		} else
 		if (nv_encoder->i2c) {
 			if (nvkm_probe_i2c(nv_encoder->i2c, 0x50))
 				break;
@@ -265,7 +277,14 @@ nouveau_connector_detect(struct drm_connector *connector, bool force)
 
 	nv_encoder = nouveau_connector_ddc_detect(connector);
 	if (nv_encoder && (i2c = nv_encoder->i2c) != NULL) {
-		nv_connector->edid = drm_get_edid(connector, i2c);
+		if ((vga_switcheroo_handler_flags() &
+		     VGA_SWITCHEROO_CAN_SWITCH_DDC) &&
+		    nv_connector->type == DCB_CONNECTOR_LVDS)
+			nv_connector->edid = drm_get_edid_switcheroo(connector,
+								     i2c);
+		else
+			nv_connector->edid = drm_get_edid(connector, i2c);
+
 		drm_mode_connector_update_edid_property(connector,
 							nv_connector->edid);
 		if (!nv_connector->edid) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 20935eb2a09e..7ce7fa5cb5e6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -495,7 +495,7 @@ nouveau_display_create(struct drm_device *dev)
 
 	if (nouveau_modeset != 2 && drm->vbios.dcb.entries) {
 		static const u16 oclass[] = {
-			GM204_DISP,
+			GM200_DISP,
 			GM107_DISP,
 			GK110_DISP,
 			GK104_DISP,
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 2f2f252e3fb6..d06877d9c1ed 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -22,11 +22,13 @@
  * Authors: Ben Skeggs
  */
 
+#include <linux/apple-gmux.h>
 #include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
+#include <linux/vgaarb.h>
 #include <linux/vga_switcheroo.h>
 
 #include "drmP.h"
@@ -196,6 +198,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 			break;
 		case FERMI_CHANNEL_GPFIFO:
 		case KEPLER_CHANNEL_GPFIFO_A:
+		case KEPLER_CHANNEL_GPFIFO_B:
 		case MAXWELL_CHANNEL_GPFIFO_A:
 			ret = nvc0_fence_create(drm);
 			break;
@@ -213,13 +216,13 @@ nouveau_accel_init(struct nouveau_drm *drm)
 
 	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
 		ret = nouveau_channel_new(drm, &drm->device,
-					  KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_CE0|
-					  KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_CE1,
+					  NVA06F_V0_ENGINE_CE0 |
+					  NVA06F_V0_ENGINE_CE1,
 					  0, &drm->cechan);
 		if (ret)
 			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
 
-		arg0 = KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_GR;
+		arg0 = NVA06F_V0_ENGINE_GR;
 		arg1 = 1;
 	} else
 	if (device->info.chipset >= 0xa3 &&
@@ -312,6 +315,15 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
 	bool boot = false;
 	int ret;
 
+	/*
+	 * apple-gmux is needed on dual GPU MacBook Pro
+	 * to probe the panel if we're the inactive GPU.
+	 */
+	if (IS_ENABLED(CONFIG_VGA_ARB) && IS_ENABLED(CONFIG_VGA_SWITCHEROO) &&
+	    apple_gmux_present() && pdev != vga_default_device() &&
+	    !vga_switcheroo_handler_flags())
+		return -EPROBE_DEFER;
+
 	/* remove conflicting drivers (vesafb, efifb etc) */
 	aper = alloc_apertures(3);
 	if (!aper)
@@ -364,7 +376,7 @@ nouveau_get_hdmi_dev(struct nouveau_drm *drm)
 	struct pci_dev *pdev = drm->dev->pdev;
 
 	if (!pdev) {
-		DRM_INFO("not a PCI device; no HDMI\n");
+		NV_DEBUG(drm, "not a PCI device; no HDMI\n");
 		drm->hdmi_device = NULL;
 		return;
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_hwmon.c b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
index 8e13467d0ddb..67edd2f5b71a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hwmon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
@@ -34,6 +34,7 @@
 #include "nouveau_drm.h"
 #include "nouveau_hwmon.h"
 
+#include <nvkm/subdev/iccsense.h>
 #include <nvkm/subdev/volt.h>
 
 #if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
@@ -543,6 +544,24 @@ nouveau_hwmon_get_in0_label(struct device *d,
 static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO,
 			  nouveau_hwmon_get_in0_label, NULL, 0);
 
+static ssize_t
+nouveau_hwmon_get_power1_input(struct device *d, struct device_attribute *a,
+			       char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->device);
+	int result = nvkm_iccsense_read_all(iccsense);
+
+	if (result < 0)
+		return result;
+
+	return sprintf(buf, "%i\n", result);
+}
+
+static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO,
+			  nouveau_hwmon_get_power1_input, NULL, 0);
+
 static struct attribute *hwmon_default_attributes[] = {
 	&sensor_dev_attr_name.dev_attr.attr,
 	&sensor_dev_attr_update_rate.dev_attr.attr,
@@ -579,6 +598,11 @@ static struct attribute *hwmon_in0_attributes[] = {
 	NULL
 };
 
+static struct attribute *hwmon_power_attributes[] = {
+	&sensor_dev_attr_power1_input.dev_attr.attr,
+	NULL
+};
+
 static const struct attribute_group hwmon_default_attrgroup = {
 	.attrs = hwmon_default_attributes,
 };
@@ -594,6 +618,9 @@ static const struct attribute_group hwmon_pwm_fan_attrgroup = {
 static const struct attribute_group hwmon_in0_attrgroup = {
 	.attrs = hwmon_in0_attributes,
 };
+static const struct attribute_group hwmon_power_attrgroup = {
+	.attrs = hwmon_power_attributes,
+};
 #endif
 
 int
@@ -603,6 +630,7 @@ nouveau_hwmon_init(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nvkm_therm *therm = nvxx_therm(&drm->device);
 	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->device);
 	struct nouveau_hwmon *hwmon;
 	struct device *hwmon_dev;
 	int ret = 0;
@@ -612,10 +640,7 @@ nouveau_hwmon_init(struct drm_device *dev)
 		return -ENOMEM;
 	hwmon->dev = dev;
 
-	if (!therm || !therm->attr_get || !therm->attr_set)
-		return -ENODEV;
-
-	hwmon_dev = hwmon_device_register(&dev->pdev->dev);
+	hwmon_dev = hwmon_device_register(dev->dev);
 	if (IS_ERR(hwmon_dev)) {
 		ret = PTR_ERR(hwmon_dev);
 		NV_ERROR(drm, "Unable to register hwmon device: %d\n", ret);
@@ -628,26 +653,28 @@ nouveau_hwmon_init(struct drm_device *dev)
 	if (ret)
 		goto error;
 
-	/* if the card has a working thermal sensor */
-	if (nvkm_therm_temp_get(therm) >= 0) {
-		ret = sysfs_create_group(&hwmon_dev->kobj, &hwmon_temp_attrgroup);
-		if (ret)
-			goto error;
-	}
-
-	/* if the card has a pwm fan */
-	/*XXX: incorrect, need better detection for this, some boards have
-	 *     the gpio entries for pwm fan control even when there's no
-	 *     actual fan connected to it... therm table? */
-	if (therm->fan_get && therm->fan_get(therm) >= 0) {
-		ret = sysfs_create_group(&hwmon_dev->kobj,
-					 &hwmon_pwm_fan_attrgroup);
-		if (ret)
-			goto error;
+	if (therm && therm->attr_get && therm->attr_set) {
+		/* if the card has a working thermal sensor */
+		if (nvkm_therm_temp_get(therm) >= 0) {
+			ret = sysfs_create_group(&hwmon_dev->kobj, &hwmon_temp_attrgroup);
+			if (ret)
+				goto error;
+		}
+
+		/* if the card has a pwm fan */
+		/*XXX: incorrect, need better detection for this, some boards have
+		 *     the gpio entries for pwm fan control even when there's no
+		 *     actual fan connected to it... therm table? */
+		if (therm->fan_get && therm->fan_get(therm) >= 0) {
+			ret = sysfs_create_group(&hwmon_dev->kobj,
+						 &hwmon_pwm_fan_attrgroup);
+			if (ret)
+				goto error;
+		}
 	}
 
 	/* if the card can read the fan rpm */
-	if (nvkm_therm_fan_sense(therm) >= 0) {
+	if (therm && nvkm_therm_fan_sense(therm) >= 0) {
 		ret = sysfs_create_group(&hwmon_dev->kobj,
 					 &hwmon_fan_rpm_attrgroup);
 		if (ret)
@@ -662,6 +689,13 @@ nouveau_hwmon_init(struct drm_device *dev)
 			goto error;
 	}
 
+	if (iccsense && iccsense->data_valid && iccsense->rail_count) {
+		ret = sysfs_create_group(&hwmon_dev->kobj,
+					 &hwmon_power_attrgroup);
+		if (ret)
+			goto error;
+	}
+
 	hwmon->hwmon = hwmon_dev;
 
 	return 0;
@@ -688,6 +722,7 @@ nouveau_hwmon_fini(struct drm_device *dev)
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_pwm_fan_attrgroup);
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_fan_rpm_attrgroup);
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_in0_attrgroup);
+		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_power_attrgroup);
 
 		hwmon_device_unregister(hwmon->hwmon);
 	}
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index ea3921652449..a43445caae60 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -297,7 +297,7 @@ nv50_core_create(struct nvif_device *device, struct nvif_object *disp,
 		.pushbuf = 0xb0007d00,
 	};
 	static const s32 oclass[] = {
-		GM204_DISP_CORE_CHANNEL_DMA,
+		GM200_DISP_CORE_CHANNEL_DMA,
 		GM107_DISP_CORE_CHANNEL_DMA,
 		GK110_DISP_CORE_CHANNEL_DMA,
 		GK104_DISP_CORE_CHANNEL_DMA,
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/Kbuild b/drivers/gpu/drm/nouveau/nvkm/core/Kbuild
index 7f66963f305c..86a31a8e1e51 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/core/Kbuild
@@ -2,6 +2,7 @@ nvkm-y := nvkm/core/client.o
 nvkm-y += nvkm/core/engine.o
 nvkm-y += nvkm/core/enum.o
 nvkm-y += nvkm/core/event.o
+nvkm-y += nvkm/core/firmware.o
 nvkm-y += nvkm/core/gpuobj.o
 nvkm-y += nvkm/core/ioctl.o
 nvkm-y += nvkm/core/memory.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
new file mode 100644
index 000000000000..34ecd4a7e0c1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <core/device.h>
+
+/**
+ * nvkm_firmware_get - load firmware from the official nvidia/chip/ directory
+ * @device	device that will use that firmware
+ * @fwname	name of firmware file to load
+ * @fw		firmware structure to load to
+ *
+ * Use this function to load firmware files in the form nvidia/chip/fwname.bin.
+ * Firmware files released by NVIDIA will always follow this format.
+ */
+int
+nvkm_firmware_get(struct nvkm_device *device, const char *fwname,
+		  const struct firmware **fw)
+{
+	char f[64];
+	char cname[16];
+	int i;
+
+	/* Convert device name to lowercase */
+	strncpy(cname, device->chip->name, sizeof(cname));
+	cname[sizeof(cname) - 1] = '\0';
+	i = strlen(cname);
+	while (i) {
+		--i;
+		cname[i] = tolower(cname[i]);
+	}
+
+	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
+	return request_firmware(fw, f, device->dev);
+}
+
+/**
+ * nvkm_firmware_put - release firmware loaded with nvkm_firmware_get
+ */
+void
+nvkm_firmware_put(const struct firmware *fw)
+{
+	release_firmware(fw);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c b/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c
index c3a790eb8d6a..a7bd22706b2a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/gpuobj.c
@@ -253,3 +253,23 @@ nvkm_gpuobj_wrap(struct nvkm_memory *memory, struct nvkm_gpuobj **pgpuobj)
 	(*pgpuobj)->size = nvkm_memory_size(memory);
 	return 0;
 }
+
+void
+nvkm_gpuobj_memcpy_to(struct nvkm_gpuobj *dst, u32 dstoffset, void *src,
+		      u32 length)
+{
+	int i;
+
+	for (i = 0; i < length; i += 4)
+		nvkm_wo32(dst, dstoffset + i, *(u32 *)(src + i));
+}
+
+void
+nvkm_gpuobj_memcpy_from(void *dst, struct nvkm_gpuobj *src, u32 srcoffset,
+			u32 length)
+{
+	int i;
+
+	for (i = 0; i < length; i += 4)
+		((u32 *)src)[i / 4] = nvkm_ro32(src, srcoffset + i);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
index 3216e157a8a0..89da47234016 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
@@ -131,7 +131,7 @@ nvkm_ramht_del(struct nvkm_ramht **pramht)
 	struct nvkm_ramht *ramht = *pramht;
 	if (ramht) {
 		nvkm_gpuobj_del(&ramht->gpuobj);
-		kfree(*pramht);
+		vfree(*pramht);
 		*pramht = NULL;
 	}
 }
@@ -143,8 +143,8 @@ nvkm_ramht_new(struct nvkm_device *device, u32 size, u32 align,
 	struct nvkm_ramht *ramht;
 	int ret, i;
 
-	if (!(ramht = *pramht = kzalloc(sizeof(*ramht) + (size >> 3) *
-					sizeof(*ramht->data), GFP_KERNEL)))
+	if (!(ramht = *pramht = vzalloc(sizeof(*ramht) +
+					(size >> 3) * sizeof(*ramht->data))))
 		return -ENOMEM;
 
 	ramht->device = device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
index 7de98470a2a0..3bf08cb1a289 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
@@ -29,47 +29,52 @@ static struct lock_class_key nvkm_subdev_lock_class[NVKM_SUBDEV_NR];
 
 const char *
 nvkm_subdev_name[NVKM_SUBDEV_NR] = {
-	[NVKM_SUBDEV_BAR    ] = "bar",
-	[NVKM_SUBDEV_VBIOS  ] = "bios",
-	[NVKM_SUBDEV_BUS    ] = "bus",
-	[NVKM_SUBDEV_CLK    ] = "clk",
-	[NVKM_SUBDEV_DEVINIT] = "devinit",
-	[NVKM_SUBDEV_FB     ] = "fb",
-	[NVKM_SUBDEV_FUSE   ] = "fuse",
-	[NVKM_SUBDEV_GPIO   ] = "gpio",
-	[NVKM_SUBDEV_I2C    ] = "i2c",
-	[NVKM_SUBDEV_IBUS   ] = "priv",
-	[NVKM_SUBDEV_INSTMEM] = "imem",
-	[NVKM_SUBDEV_LTC    ] = "ltc",
-	[NVKM_SUBDEV_MC     ] = "mc",
-	[NVKM_SUBDEV_MMU    ] = "mmu",
-	[NVKM_SUBDEV_MXM    ] = "mxm",
-	[NVKM_SUBDEV_PCI    ] = "pci",
-	[NVKM_SUBDEV_PMU    ] = "pmu",
-	[NVKM_SUBDEV_THERM  ] = "therm",
-	[NVKM_SUBDEV_TIMER  ] = "tmr",
-	[NVKM_SUBDEV_VOLT   ] = "volt",
-	[NVKM_ENGINE_BSP    ] = "bsp",
-	[NVKM_ENGINE_CE0    ] = "ce0",
-	[NVKM_ENGINE_CE1    ] = "ce1",
-	[NVKM_ENGINE_CE2    ] = "ce2",
-	[NVKM_ENGINE_CIPHER ] = "cipher",
-	[NVKM_ENGINE_DISP   ] = "disp",
-	[NVKM_ENGINE_DMAOBJ ] = "dma",
-	[NVKM_ENGINE_FIFO   ] = "fifo",
-	[NVKM_ENGINE_GR     ] = "gr",
-	[NVKM_ENGINE_IFB    ] = "ifb",
-	[NVKM_ENGINE_ME     ] = "me",
-	[NVKM_ENGINE_MPEG   ] = "mpeg",
-	[NVKM_ENGINE_MSENC  ] = "msenc",
-	[NVKM_ENGINE_MSPDEC ] = "mspdec",
-	[NVKM_ENGINE_MSPPP  ] = "msppp",
-	[NVKM_ENGINE_MSVLD  ] = "msvld",
-	[NVKM_ENGINE_PM     ] = "pm",
-	[NVKM_ENGINE_SEC    ] = "sec",
-	[NVKM_ENGINE_SW     ] = "sw",
-	[NVKM_ENGINE_VIC    ] = "vic",
-	[NVKM_ENGINE_VP     ] = "vp",
+	[NVKM_SUBDEV_BAR     ] = "bar",
+	[NVKM_SUBDEV_VBIOS   ] = "bios",
+	[NVKM_SUBDEV_BUS     ] = "bus",
+	[NVKM_SUBDEV_CLK     ] = "clk",
+	[NVKM_SUBDEV_DEVINIT ] = "devinit",
+	[NVKM_SUBDEV_FB      ] = "fb",
+	[NVKM_SUBDEV_FUSE    ] = "fuse",
+	[NVKM_SUBDEV_GPIO    ] = "gpio",
+	[NVKM_SUBDEV_I2C     ] = "i2c",
+	[NVKM_SUBDEV_IBUS    ] = "priv",
+	[NVKM_SUBDEV_ICCSENSE] = "iccsense",
+	[NVKM_SUBDEV_INSTMEM ] = "imem",
+	[NVKM_SUBDEV_LTC     ] = "ltc",
+	[NVKM_SUBDEV_MC      ] = "mc",
+	[NVKM_SUBDEV_MMU     ] = "mmu",
+	[NVKM_SUBDEV_MXM     ] = "mxm",
+	[NVKM_SUBDEV_PCI     ] = "pci",
+	[NVKM_SUBDEV_PMU     ] = "pmu",
+	[NVKM_SUBDEV_SECBOOT ] = "secboot",
+	[NVKM_SUBDEV_THERM   ] = "therm",
+	[NVKM_SUBDEV_TIMER   ] = "tmr",
+	[NVKM_SUBDEV_VOLT    ] = "volt",
+	[NVKM_ENGINE_BSP     ] = "bsp",
+	[NVKM_ENGINE_CE0     ] = "ce0",
+	[NVKM_ENGINE_CE1     ] = "ce1",
+	[NVKM_ENGINE_CE2     ] = "ce2",
+	[NVKM_ENGINE_CIPHER  ] = "cipher",
+	[NVKM_ENGINE_DISP    ] = "disp",
+	[NVKM_ENGINE_DMAOBJ  ] = "dma",
+	[NVKM_ENGINE_FIFO    ] = "fifo",
+	[NVKM_ENGINE_GR      ] = "gr",
+	[NVKM_ENGINE_IFB     ] = "ifb",
+	[NVKM_ENGINE_ME      ] = "me",
+	[NVKM_ENGINE_MPEG    ] = "mpeg",
+	[NVKM_ENGINE_MSENC   ] = "msenc",
+	[NVKM_ENGINE_MSPDEC  ] = "mspdec",
+	[NVKM_ENGINE_MSPPP   ] = "msppp",
+	[NVKM_ENGINE_MSVLD   ] = "msvld",
+	[NVKM_ENGINE_NVENC0  ] = "nvenc0",
+	[NVKM_ENGINE_NVENC1  ] = "nvenc1",
+	[NVKM_ENGINE_NVDEC   ] = "nvdec",
+	[NVKM_ENGINE_PM      ] = "pm",
+	[NVKM_ENGINE_SEC     ] = "sec",
+	[NVKM_ENGINE_SW      ] = "sw",
+	[NVKM_ENGINE_VIC     ] = "vic",
+	[NVKM_ENGINE_VP      ] = "vp",
 };
 
 void
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/Kbuild
index 36f724763fde..c2c8d2ac01b8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/Kbuild
@@ -10,10 +10,14 @@ include $(src)/nvkm/engine/dma/Kbuild
 include $(src)/nvkm/engine/fifo/Kbuild
 include $(src)/nvkm/engine/gr/Kbuild
 include $(src)/nvkm/engine/mpeg/Kbuild
+include $(src)/nvkm/engine/msenc/Kbuild
 include $(src)/nvkm/engine/mspdec/Kbuild
 include $(src)/nvkm/engine/msppp/Kbuild
 include $(src)/nvkm/engine/msvld/Kbuild
+include $(src)/nvkm/engine/nvenc/Kbuild
+include $(src)/nvkm/engine/nvdec/Kbuild
 include $(src)/nvkm/engine/pm/Kbuild
 include $(src)/nvkm/engine/sec/Kbuild
 include $(src)/nvkm/engine/sw/Kbuild
+include $(src)/nvkm/engine/vic/Kbuild
 include $(src)/nvkm/engine/vp/Kbuild
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
index fa8cda7058cd..9c19d59b47df 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
@@ -1,4 +1,5 @@
 nvkm-y += nvkm/engine/ce/gt215.o
 nvkm-y += nvkm/engine/ce/gf100.o
 nvkm-y += nvkm/engine/ce/gk104.o
-nvkm-y += nvkm/engine/ce/gm204.o
+nvkm-y += nvkm/engine/ce/gm107.o
+nvkm-y += nvkm/engine/ce/gm200.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gm107.c
new file mode 100644
index 000000000000..4c2f42919c1f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gm107.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "priv.h"
+
+#include <nvif/class.h>
+
+static const struct nvkm_engine_func
+gm107_ce = {
+	.intr = gk104_ce_intr,
+	.sclass = {
+		{ -1, -1, KEPLER_DMA_COPY_A },
+		{ -1, -1, MAXWELL_DMA_COPY_A },
+		{}
+	}
+};
+
+int
+gm107_ce_new(struct nvkm_device *device, int index,
+	     struct nvkm_engine **pengine)
+{
+	if (index == NVKM_ENGINE_CE0) {
+		return nvkm_engine_new_(&gm107_ce, device, index,
+					0x00000040, true, pengine);
+	} else
+	if (index == NVKM_ENGINE_CE1) {
+		return nvkm_engine_new_(&gm107_ce, device, index,
+					0x00000080, true, pengine);
+	} else
+	if (index == NVKM_ENGINE_CE2) {
+		return nvkm_engine_new_(&gm107_ce, device, index,
+					0x00200000, true, pengine);
+	}
+	return -ENODEV;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gm200.c
index 8eaa72a59f40..13f07b32cd9c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gm200.c
@@ -26,7 +26,7 @@
 #include <nvif/class.h>
 
 static const struct nvkm_engine_func
-gm204_ce = {
+gm200_ce = {
 	.intr = gk104_ce_intr,
 	.sclass = {
 		{ -1, -1, MAXWELL_DMA_COPY_A },
@@ -35,19 +35,19 @@ gm204_ce = {
 };
 
 int
-gm204_ce_new(struct nvkm_device *device, int index,
+gm200_ce_new(struct nvkm_device *device, int index,
 	     struct nvkm_engine **pengine)
 {
 	if (index == NVKM_ENGINE_CE0) {
-		return nvkm_engine_new_(&gm204_ce, device, index,
+		return nvkm_engine_new_(&gm200_ce, device, index,
 					0x00000040, true, pengine);
 	} else
 	if (index == NVKM_ENGINE_CE1) {
-		return nvkm_engine_new_(&gm204_ce, device, index,
+		return nvkm_engine_new_(&gm200_ce, device, index,
 					0x00000080, true, pengine);
 	} else
 	if (index == NVKM_ENGINE_CE2) {
-		return nvkm_engine_new_(&gm204_ce, device, index,
+		return nvkm_engine_new_(&gm200_ce, device, index,
 					0x00200000, true, pengine);
 	}
 	return -ENODEV;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index b1ba1c782a2b..9f32c8739254 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1347,6 +1347,7 @@ nvc0_chipset = {
 	.gpio = g94_gpio_new,
 	.i2c = g94_i2c_new,
 	.ibus = gf100_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gf100_ltc_new,
 	.mc = gf100_mc_new,
@@ -1383,6 +1384,7 @@ nvc1_chipset = {
 	.gpio = g94_gpio_new,
 	.i2c = g94_i2c_new,
 	.ibus = gf100_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gf100_ltc_new,
 	.mc = gf100_mc_new,
@@ -1418,6 +1420,7 @@ nvc3_chipset = {
 	.gpio = g94_gpio_new,
 	.i2c = g94_i2c_new,
 	.ibus = gf100_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gf100_ltc_new,
 	.mc = gf100_mc_new,
@@ -1453,6 +1456,7 @@ nvc4_chipset = {
 	.gpio = g94_gpio_new,
 	.i2c = g94_i2c_new,
 	.ibus = gf100_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gf100_ltc_new,
 	.mc = gf100_mc_new,
@@ -1489,6 +1493,7 @@ nvc8_chipset = {
 	.gpio = g94_gpio_new,
 	.i2c = g94_i2c_new,
 	.ibus = gf100_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gf100_ltc_new,
 	.mc = gf100_mc_new,
@@ -1525,6 +1530,7 @@ nvce_chipset = {
 	.gpio = g94_gpio_new,
 	.i2c = g94_i2c_new,
 	.ibus = gf100_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gf100_ltc_new,
 	.mc = gf100_mc_new,
@@ -1561,6 +1567,7 @@ nvcf_chipset = {
 	.gpio = g94_gpio_new,
 	.i2c = g94_i2c_new,
 	.ibus = gf100_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gf100_ltc_new,
 	.mc = gf100_mc_new,
@@ -1596,6 +1603,7 @@ nvd7_chipset = {
 	.gpio = gf119_gpio_new,
 	.i2c = gf117_i2c_new,
 	.ibus = gf117_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gf100_ltc_new,
 	.mc = gf100_mc_new,
@@ -1629,6 +1637,7 @@ nvd9_chipset = {
 	.gpio = gf119_gpio_new,
 	.i2c = gf119_i2c_new,
 	.ibus = gf117_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gf100_ltc_new,
 	.mc = gf100_mc_new,
@@ -1664,6 +1673,7 @@ nve4_chipset = {
 	.gpio = gk104_gpio_new,
 	.i2c = gk104_i2c_new,
 	.ibus = gk104_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gk104_ltc_new,
 	.mc = gf100_mc_new,
@@ -1701,6 +1711,7 @@ nve6_chipset = {
 	.gpio = gk104_gpio_new,
 	.i2c = gk104_i2c_new,
 	.ibus = gk104_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gk104_ltc_new,
 	.mc = gf100_mc_new,
@@ -1738,6 +1749,7 @@ nve7_chipset = {
 	.gpio = gk104_gpio_new,
 	.i2c = gk104_i2c_new,
 	.ibus = gk104_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gk104_ltc_new,
 	.mc = gf100_mc_new,
@@ -1799,6 +1811,7 @@ nvf0_chipset = {
 	.gpio = gk104_gpio_new,
 	.i2c = gk104_i2c_new,
 	.ibus = gk104_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gk104_ltc_new,
 	.mc = gf100_mc_new,
@@ -1814,7 +1827,7 @@ nvf0_chipset = {
 	.ce[2] = gk104_ce_new,
 	.disp = gk110_disp_new,
 	.dma = gf119_dma_new,
-	.fifo = gk104_fifo_new,
+	.fifo = gk110_fifo_new,
 	.gr = gk110_gr_new,
 	.mspdec = gk104_mspdec_new,
 	.msppp = gf100_msppp_new,
@@ -1835,6 +1848,7 @@ nvf1_chipset = {
 	.gpio = gk104_gpio_new,
 	.i2c = gf119_i2c_new,
 	.ibus = gk104_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gk104_ltc_new,
 	.mc = gf100_mc_new,
@@ -1850,7 +1864,7 @@ nvf1_chipset = {
 	.ce[2] = gk104_ce_new,
 	.disp = gk110_disp_new,
 	.dma = gf119_dma_new,
-	.fifo = gk104_fifo_new,
+	.fifo = gk110_fifo_new,
 	.gr = gk110b_gr_new,
 	.mspdec = gk104_mspdec_new,
 	.msppp = gf100_msppp_new,
@@ -1871,6 +1885,7 @@ nv106_chipset = {
 	.gpio = gk104_gpio_new,
 	.i2c = gk104_i2c_new,
 	.ibus = gk104_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gk104_ltc_new,
 	.mc = gk20a_mc_new,
@@ -1907,6 +1922,7 @@ nv108_chipset = {
 	.gpio = gk104_gpio_new,
 	.i2c = gk104_i2c_new,
 	.ibus = gk104_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gk104_ltc_new,
 	.mc = gk20a_mc_new,
@@ -1943,6 +1959,7 @@ nv117_chipset = {
 	.gpio = gk104_gpio_new,
 	.i2c = gf119_i2c_new,
 	.ibus = gk104_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
 	.ltc = gm107_ltc_new,
 	.mc = gk20a_mc_new,
@@ -1953,43 +1970,78 @@ nv117_chipset = {
 	.therm = gm107_therm_new,
 	.timer = gk20a_timer_new,
 	.volt = gk104_volt_new,
-	.ce[0] = gk104_ce_new,
-	.ce[2] = gk104_ce_new,
+	.ce[0] = gm107_ce_new,
+	.ce[2] = gm107_ce_new,
 	.disp = gm107_disp_new,
 	.dma = gf119_dma_new,
-	.fifo = gk208_fifo_new,
+	.fifo = gm107_fifo_new,
 	.gr = gm107_gr_new,
 	.sw = gf100_sw_new,
 };
 
 static const struct nvkm_device_chip
+nv120_chipset = {
+	.name = "GM200",
+	.bar = gf100_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gm200_devinit_new,
+	.fb = gm107_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.iccsense = gf100_iccsense_new,
+	.imem = nv50_instmem_new,
+	.ltc = gm200_ltc_new,
+	.mc = gk20a_mc_new,
+	.mmu = gf100_mmu_new,
+	.mxm = nv50_mxm_new,
+	.pci = gk104_pci_new,
+	.pmu = gm107_pmu_new,
+	.secboot = gm200_secboot_new,
+	.timer = gk20a_timer_new,
+	.volt = gk104_volt_new,
+	.ce[0] = gm200_ce_new,
+	.ce[1] = gm200_ce_new,
+	.ce[2] = gm200_ce_new,
+	.disp = gm200_disp_new,
+	.dma = gf119_dma_new,
+	.fifo = gm200_fifo_new,
+	.gr = gm200_gr_new,
+	.sw = gf100_sw_new,
+};
+
+static const struct nvkm_device_chip
 nv124_chipset = {
 	.name = "GM204",
 	.bar = gf100_bar_new,
 	.bios = nvkm_bios_new,
 	.bus = gf100_bus_new,
-	.devinit = gm204_devinit_new,
+	.devinit = gm200_devinit_new,
 	.fb = gm107_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
-	.i2c = gm204_i2c_new,
-	.ibus = gm204_ibus_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
-	.ltc = gm204_ltc_new,
+	.ltc = gm200_ltc_new,
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
 	.pci = gk104_pci_new,
 	.pmu = gm107_pmu_new,
+	.secboot = gm200_secboot_new,
 	.timer = gk20a_timer_new,
 	.volt = gk104_volt_new,
-	.ce[0] = gm204_ce_new,
-	.ce[1] = gm204_ce_new,
-	.ce[2] = gm204_ce_new,
-	.disp = gm204_disp_new,
+	.ce[0] = gm200_ce_new,
+	.ce[1] = gm200_ce_new,
+	.ce[2] = gm200_ce_new,
+	.disp = gm200_disp_new,
 	.dma = gf119_dma_new,
-	.fifo = gm204_fifo_new,
-	.gr = gm204_gr_new,
+	.fifo = gm200_fifo_new,
+	.gr = gm200_gr_new,
 	.sw = gf100_sw_new,
 };
 
@@ -1999,28 +2051,30 @@ nv126_chipset = {
 	.bar = gf100_bar_new,
 	.bios = nvkm_bios_new,
 	.bus = gf100_bus_new,
-	.devinit = gm204_devinit_new,
+	.devinit = gm200_devinit_new,
 	.fb = gm107_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
-	.i2c = gm204_i2c_new,
-	.ibus = gm204_ibus_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
-	.ltc = gm204_ltc_new,
+	.ltc = gm200_ltc_new,
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
 	.pci = gk104_pci_new,
 	.pmu = gm107_pmu_new,
+	.secboot = gm200_secboot_new,
 	.timer = gk20a_timer_new,
 	.volt = gk104_volt_new,
-	.ce[0] = gm204_ce_new,
-	.ce[1] = gm204_ce_new,
-	.ce[2] = gm204_ce_new,
-	.disp = gm204_disp_new,
+	.ce[0] = gm200_ce_new,
+	.ce[1] = gm200_ce_new,
+	.ce[2] = gm200_ce_new,
+	.disp = gm200_disp_new,
 	.dma = gf119_dma_new,
-	.fifo = gm204_fifo_new,
-	.gr = gm206_gr_new,
+	.fifo = gm200_fifo_new,
+	.gr = gm200_gr_new,
 	.sw = gf100_sw_new,
 };
 
@@ -2029,15 +2083,18 @@ nv12b_chipset = {
 	.name = "GM20B",
 	.bar = gk20a_bar_new,
 	.bus = gf100_bus_new,
+	.clk = gm20b_clk_new,
 	.fb = gk20a_fb_new,
 	.fuse = gm107_fuse_new,
 	.ibus = gk20a_ibus_new,
 	.imem = gk20a_instmem_new,
-	.ltc = gm204_ltc_new,
+	.ltc = gm200_ltc_new,
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
+	.secboot = gm20b_secboot_new,
 	.timer = gk20a_timer_new,
-	.ce[2] = gm204_ce_new,
+	.ce[2] = gm200_ce_new,
+	.volt = gm20b_volt_new,
 	.dma = gf119_dma_new,
 	.fifo = gm20b_fifo_new,
 	.gr = gm20b_gr_new,
@@ -2072,26 +2129,28 @@ nvkm_device_subdev(struct nvkm_device *device, int index)
 
 	switch (index) {
 #define _(n,p,m) case NVKM_SUBDEV_##n: if (p) return (m); break
-	_(BAR    , device->bar    , &device->bar->subdev);
-	_(VBIOS  , device->bios   , &device->bios->subdev);
-	_(BUS    , device->bus    , &device->bus->subdev);
-	_(CLK    , device->clk    , &device->clk->subdev);
-	_(DEVINIT, device->devinit, &device->devinit->subdev);
-	_(FB     , device->fb     , &device->fb->subdev);
-	_(FUSE   , device->fuse   , &device->fuse->subdev);
-	_(GPIO   , device->gpio   , &device->gpio->subdev);
-	_(I2C    , device->i2c    , &device->i2c->subdev);
-	_(IBUS   , device->ibus   ,  device->ibus);
-	_(INSTMEM, device->imem   , &device->imem->subdev);
-	_(LTC    , device->ltc    , &device->ltc->subdev);
-	_(MC     , device->mc     , &device->mc->subdev);
-	_(MMU    , device->mmu    , &device->mmu->subdev);
-	_(MXM    , device->mxm    ,  device->mxm);
-	_(PCI    , device->pci    , &device->pci->subdev);
-	_(PMU    , device->pmu    , &device->pmu->subdev);
-	_(THERM  , device->therm  , &device->therm->subdev);
-	_(TIMER  , device->timer  , &device->timer->subdev);
-	_(VOLT   , device->volt   , &device->volt->subdev);
+	_(BAR     , device->bar     , &device->bar->subdev);
+	_(VBIOS   , device->bios    , &device->bios->subdev);
+	_(BUS     , device->bus     , &device->bus->subdev);
+	_(CLK     , device->clk     , &device->clk->subdev);
+	_(DEVINIT , device->devinit , &device->devinit->subdev);
+	_(FB      , device->fb      , &device->fb->subdev);
+	_(FUSE    , device->fuse    , &device->fuse->subdev);
+	_(GPIO    , device->gpio    , &device->gpio->subdev);
+	_(I2C     , device->i2c     , &device->i2c->subdev);
+	_(IBUS    , device->ibus    ,  device->ibus);
+	_(ICCSENSE, device->iccsense, &device->iccsense->subdev);
+	_(INSTMEM , device->imem    , &device->imem->subdev);
+	_(LTC     , device->ltc     , &device->ltc->subdev);
+	_(MC      , device->mc      , &device->mc->subdev);
+	_(MMU     , device->mmu     , &device->mmu->subdev);
+	_(MXM     , device->mxm     ,  device->mxm);
+	_(PCI     , device->pci     , &device->pci->subdev);
+	_(PMU     , device->pmu     , &device->pmu->subdev);
+	_(SECBOOT , device->secboot , &device->secboot->subdev);
+	_(THERM   , device->therm   , &device->therm->subdev);
+	_(TIMER   , device->timer   , &device->timer->subdev);
+	_(VOLT    , device->volt    , &device->volt->subdev);
 #undef _
 	default:
 		engine = nvkm_device_engine(device, index);
@@ -2110,27 +2169,30 @@ nvkm_device_engine(struct nvkm_device *device, int index)
 
 	switch (index) {
 #define _(n,p,m) case NVKM_ENGINE_##n: if (p) return (m); break
-	_(BSP    , device->bsp    ,  device->bsp);
-	_(CE0    , device->ce[0]  ,  device->ce[0]);
-	_(CE1    , device->ce[1]  ,  device->ce[1]);
-	_(CE2    , device->ce[2]  ,  device->ce[2]);
-	_(CIPHER , device->cipher ,  device->cipher);
-	_(DISP   , device->disp   , &device->disp->engine);
-	_(DMAOBJ , device->dma    , &device->dma->engine);
-	_(FIFO   , device->fifo   , &device->fifo->engine);
-	_(GR     , device->gr     , &device->gr->engine);
-	_(IFB    , device->ifb    ,  device->ifb);
-	_(ME     , device->me     ,  device->me);
-	_(MPEG   , device->mpeg   ,  device->mpeg);
-	_(MSENC  , device->msenc  ,  device->msenc);
-	_(MSPDEC , device->mspdec ,  device->mspdec);
-	_(MSPPP  , device->msppp  ,  device->msppp);
-	_(MSVLD  , device->msvld  ,  device->msvld);
-	_(PM     , device->pm     , &device->pm->engine);
-	_(SEC    , device->sec    ,  device->sec);
-	_(SW     , device->sw     , &device->sw->engine);
-	_(VIC    , device->vic    ,  device->vic);
-	_(VP     , device->vp     ,  device->vp);
+	_(BSP    , device->bsp     ,  device->bsp);
+	_(CE0    , device->ce[0]   ,  device->ce[0]);
+	_(CE1    , device->ce[1]   ,  device->ce[1]);
+	_(CE2    , device->ce[2]   ,  device->ce[2]);
+	_(CIPHER , device->cipher  ,  device->cipher);
+	_(DISP   , device->disp    , &device->disp->engine);
+	_(DMAOBJ , device->dma     , &device->dma->engine);
+	_(FIFO   , device->fifo    , &device->fifo->engine);
+	_(GR     , device->gr      , &device->gr->engine);
+	_(IFB    , device->ifb     ,  device->ifb);
+	_(ME     , device->me      ,  device->me);
+	_(MPEG   , device->mpeg    ,  device->mpeg);
+	_(MSENC  , device->msenc   ,  device->msenc);
+	_(MSPDEC , device->mspdec  ,  device->mspdec);
+	_(MSPPP  , device->msppp   ,  device->msppp);
+	_(MSVLD  , device->msvld   ,  device->msvld);
+	_(NVENC0 , device->nvenc[0],  device->nvenc[0]);
+	_(NVENC1 , device->nvenc[1],  device->nvenc[1]);
+	_(NVDEC  , device->nvdec   ,  device->nvdec);
+	_(PM     , device->pm      , &device->pm->engine);
+	_(SEC    , device->sec     ,  device->sec);
+	_(SW     , device->sw      , &device->sw->engine);
+	_(VIC    , device->vic     ,  device->vic);
+	_(VP     , device->vp      ,  device->vp);
 #undef _
 	default:
 		WARN_ON(1);
@@ -2261,6 +2323,8 @@ fail_subdev:
 	} while (--i >= 0);
 
 fail:
+	nvkm_device_fini(device, false);
+
 	nvdev_error(device, "init failed with %d\n", ret);
 	return ret;
 }
@@ -2459,6 +2523,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		case 0x106: device->chip = &nv106_chipset; break;
 		case 0x108: device->chip = &nv108_chipset; break;
 		case 0x117: device->chip = &nv117_chipset; break;
+		case 0x120: device->chip = &nv120_chipset; break;
 		case 0x124: device->chip = &nv124_chipset; break;
 		case 0x126: device->chip = &nv126_chipset; break;
 		case 0x12b: device->chip = &nv12b_chipset; break;
@@ -2518,47 +2583,52 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 	}                                                                      \
 	break
 		switch (i) {
-		_(NVKM_SUBDEV_BAR    ,     bar);
-		_(NVKM_SUBDEV_VBIOS  ,    bios);
-		_(NVKM_SUBDEV_BUS    ,     bus);
-		_(NVKM_SUBDEV_CLK    ,     clk);
-		_(NVKM_SUBDEV_DEVINIT, devinit);
-		_(NVKM_SUBDEV_FB     ,      fb);
-		_(NVKM_SUBDEV_FUSE   ,    fuse);
-		_(NVKM_SUBDEV_GPIO   ,    gpio);
-		_(NVKM_SUBDEV_I2C    ,     i2c);
-		_(NVKM_SUBDEV_IBUS   ,    ibus);
-		_(NVKM_SUBDEV_INSTMEM,    imem);
-		_(NVKM_SUBDEV_LTC    ,     ltc);
-		_(NVKM_SUBDEV_MC     ,      mc);
-		_(NVKM_SUBDEV_MMU    ,     mmu);
-		_(NVKM_SUBDEV_MXM    ,     mxm);
-		_(NVKM_SUBDEV_PCI    ,     pci);
-		_(NVKM_SUBDEV_PMU    ,     pmu);
-		_(NVKM_SUBDEV_THERM  ,   therm);
-		_(NVKM_SUBDEV_TIMER  ,   timer);
-		_(NVKM_SUBDEV_VOLT   ,    volt);
-		_(NVKM_ENGINE_BSP    ,     bsp);
-		_(NVKM_ENGINE_CE0    ,   ce[0]);
-		_(NVKM_ENGINE_CE1    ,   ce[1]);
-		_(NVKM_ENGINE_CE2    ,   ce[2]);
-		_(NVKM_ENGINE_CIPHER ,  cipher);
-		_(NVKM_ENGINE_DISP   ,    disp);
-		_(NVKM_ENGINE_DMAOBJ ,     dma);
-		_(NVKM_ENGINE_FIFO   ,    fifo);
-		_(NVKM_ENGINE_GR     ,      gr);
-		_(NVKM_ENGINE_IFB    ,     ifb);
-		_(NVKM_ENGINE_ME     ,      me);
-		_(NVKM_ENGINE_MPEG   ,    mpeg);
-		_(NVKM_ENGINE_MSENC  ,   msenc);
-		_(NVKM_ENGINE_MSPDEC ,  mspdec);
-		_(NVKM_ENGINE_MSPPP  ,   msppp);
-		_(NVKM_ENGINE_MSVLD  ,   msvld);
-		_(NVKM_ENGINE_PM     ,      pm);
-		_(NVKM_ENGINE_SEC    ,     sec);
-		_(NVKM_ENGINE_SW     ,      sw);
-		_(NVKM_ENGINE_VIC    ,     vic);
-		_(NVKM_ENGINE_VP     ,      vp);
+		_(NVKM_SUBDEV_BAR     ,      bar);
+		_(NVKM_SUBDEV_VBIOS   ,     bios);
+		_(NVKM_SUBDEV_BUS     ,      bus);
+		_(NVKM_SUBDEV_CLK     ,      clk);
+		_(NVKM_SUBDEV_DEVINIT ,  devinit);
+		_(NVKM_SUBDEV_FB      ,       fb);
+		_(NVKM_SUBDEV_FUSE    ,     fuse);
+		_(NVKM_SUBDEV_GPIO    ,     gpio);
+		_(NVKM_SUBDEV_I2C     ,      i2c);
+		_(NVKM_SUBDEV_IBUS    ,     ibus);
+		_(NVKM_SUBDEV_ICCSENSE, iccsense);
+		_(NVKM_SUBDEV_INSTMEM ,     imem);
+		_(NVKM_SUBDEV_LTC     ,      ltc);
+		_(NVKM_SUBDEV_MC      ,       mc);
+		_(NVKM_SUBDEV_MMU     ,      mmu);
+		_(NVKM_SUBDEV_MXM     ,      mxm);
+		_(NVKM_SUBDEV_PCI     ,      pci);
+		_(NVKM_SUBDEV_PMU     ,      pmu);
+		_(NVKM_SUBDEV_SECBOOT ,  secboot);
+		_(NVKM_SUBDEV_THERM   ,    therm);
+		_(NVKM_SUBDEV_TIMER   ,    timer);
+		_(NVKM_SUBDEV_VOLT    ,     volt);
+		_(NVKM_ENGINE_BSP     ,      bsp);
+		_(NVKM_ENGINE_CE0     ,    ce[0]);
+		_(NVKM_ENGINE_CE1     ,    ce[1]);
+		_(NVKM_ENGINE_CE2     ,    ce[2]);
+		_(NVKM_ENGINE_CIPHER  ,   cipher);
+		_(NVKM_ENGINE_DISP    ,     disp);
+		_(NVKM_ENGINE_DMAOBJ  ,      dma);
+		_(NVKM_ENGINE_FIFO    ,     fifo);
+		_(NVKM_ENGINE_GR      ,       gr);
+		_(NVKM_ENGINE_IFB     ,      ifb);
+		_(NVKM_ENGINE_ME      ,       me);
+		_(NVKM_ENGINE_MPEG    ,     mpeg);
+		_(NVKM_ENGINE_MSENC   ,    msenc);
+		_(NVKM_ENGINE_MSPDEC  ,   mspdec);
+		_(NVKM_ENGINE_MSPPP   ,    msppp);
+		_(NVKM_ENGINE_MSVLD   ,    msvld);
+		_(NVKM_ENGINE_NVENC0  , nvenc[0]);
+		_(NVKM_ENGINE_NVENC1  , nvenc[1]);
+		_(NVKM_ENGINE_NVDEC   ,    nvdec);
+		_(NVKM_ENGINE_PM      ,       pm);
+		_(NVKM_ENGINE_SEC     ,      sec);
+		_(NVKM_ENGINE_SW      ,       sw);
+		_(NVKM_ENGINE_VIC     ,      vic);
+		_(NVKM_ENGINE_VP      ,       vp);
 		default:
 			WARN_ON(1);
 			continue;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index 62ad0300cfa5..18fab3973ce5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1614,7 +1614,7 @@ nvkm_device_pci_func = {
 	.fini = nvkm_device_pci_fini,
 	.resource_addr = nvkm_device_pci_resource_addr,
 	.resource_size = nvkm_device_pci_resource_size,
-	.cpu_coherent = !IS_ENABLED(CONFIG_ARM),
+	.cpu_coherent = !IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_ARM64),
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
index ed3ad2c30e17..e80f6ab1c415 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
@@ -12,6 +12,7 @@
 #include <subdev/gpio.h>
 #include <subdev/i2c.h>
 #include <subdev/ibus.h>
+#include <subdev/iccsense.h>
 #include <subdev/instmem.h>
 #include <subdev/ltc.h>
 #include <subdev/mc.h>
@@ -22,6 +23,7 @@
 #include <subdev/therm.h>
 #include <subdev/timer.h>
 #include <subdev/volt.h>
+#include <subdev/secboot.h>
 
 #include <engine/bsp.h>
 #include <engine/ce.h>
@@ -34,9 +36,12 @@
 #include <engine/mspdec.h>
 #include <engine/msppp.h>
 #include <engine/msvld.h>
+#include <engine/nvenc.h>
+#include <engine/nvdec.h>
 #include <engine/pm.h>
 #include <engine/sec.h>
 #include <engine/sw.h>
+#include <engine/vic.h>
 #include <engine/vp.h>
 
 int  nvkm_device_ctor(const struct nvkm_device_func *,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index e7e581d6a8ff..9afa5f3e3c1c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -255,7 +255,6 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
 
 	tdev->func = func;
 	tdev->pdev = pdev;
-	tdev->irq = -1;
 
 	tdev->vdd = devm_regulator_get(&pdev->dev, "vdd");
 	if (IS_ERR(tdev->vdd)) {
@@ -281,6 +280,15 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
 		goto free;
 	}
 
+	/**
+	 * The IOMMU bit defines the upper limit of the GPU-addressable space.
+	 * This will be refined in nouveau_ttm_init but we need to do it early
+	 * for instmem to behave properly
+	 */
+	ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(tdev->func->iommu_bit));
+	if (ret)
+		goto free;
+
 	nvkm_device_tegra_probe_iommu(tdev);
 
 	ret = nvkm_device_tegra_power_up(tdev);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index 04f60452011e..a74c5dd27dc0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -9,7 +9,7 @@ nvkm-y += nvkm/engine/disp/gf119.o
 nvkm-y += nvkm/engine/disp/gk104.o
 nvkm-y += nvkm/engine/disp/gk110.o
 nvkm-y += nvkm/engine/disp/gm107.o
-nvkm-y += nvkm/engine/disp/gm204.o
+nvkm-y += nvkm/engine/disp/gm200.o
 
 nvkm-y += nvkm/engine/disp/outp.o
 nvkm-y += nvkm/engine/disp/outpdp.o
@@ -18,7 +18,7 @@ nvkm-y += nvkm/engine/disp/piornv50.o
 nvkm-y += nvkm/engine/disp/sornv50.o
 nvkm-y += nvkm/engine/disp/sorg94.o
 nvkm-y += nvkm/engine/disp/sorgf119.o
-nvkm-y += nvkm/engine/disp/sorgm204.o
+nvkm-y += nvkm/engine/disp/sorgm200.o
 nvkm-y += nvkm/engine/disp/dport.o
 
 nvkm-y += nvkm/engine/disp/conn.o
@@ -43,7 +43,7 @@ nvkm-y += nvkm/engine/disp/rootgf119.o
 nvkm-y += nvkm/engine/disp/rootgk104.o
 nvkm-y += nvkm/engine/disp/rootgk110.o
 nvkm-y += nvkm/engine/disp/rootgm107.o
-nvkm-y += nvkm/engine/disp/rootgm204.o
+nvkm-y += nvkm/engine/disp/rootgm200.o
 
 nvkm-y += nvkm/engine/disp/channv50.o
 nvkm-y += nvkm/engine/disp/changf119.o
@@ -68,7 +68,7 @@ nvkm-y += nvkm/engine/disp/coregf119.o
 nvkm-y += nvkm/engine/disp/coregk104.o
 nvkm-y += nvkm/engine/disp/coregk110.o
 nvkm-y += nvkm/engine/disp/coregm107.o
-nvkm-y += nvkm/engine/disp/coregm204.o
+nvkm-y += nvkm/engine/disp/coregm200.o
 
 nvkm-y += nvkm/engine/disp/ovlynv50.o
 nvkm-y += nvkm/engine/disp/ovlyg84.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm200.c
index 222f4a822f4d..bb23a8658ac0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm200.c
@@ -27,8 +27,8 @@
 #include <nvif/class.h>
 
 const struct nv50_disp_dmac_oclass
-gm204_disp_core_oclass = {
-	.base.oclass = GM204_DISP_CORE_CHANNEL_DMA,
+gm200_disp_core_oclass = {
+	.base.oclass = GM200_DISP_CORE_CHANNEL_DMA,
 	.base.minver = 0,
 	.base.maxver = 0,
 	.ctor = nv50_disp_core_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
index c748ca23ab70..fc84eb8b5c45 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
@@ -87,5 +87,5 @@ extern const struct nv50_disp_dmac_oclass gk110_disp_base_oclass;
 
 extern const struct nv50_disp_dmac_oclass gm107_disp_core_oclass;
 
-extern const struct nv50_disp_dmac_oclass gm204_disp_core_oclass;
+extern const struct nv50_disp_dmac_oclass gm200_disp_core_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
index 30f1987b5b40..67eec8620719 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
@@ -25,18 +25,18 @@
 #include "rootnv50.h"
 
 static const struct nv50_disp_func
-gm204_disp = {
+gm200_disp = {
 	.intr = gf119_disp_intr,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
-	.root = &gm204_disp_root_oclass,
+	.root = &gm200_disp_root_oclass,
 	.head.vblank_init = gf119_disp_vblank_init,
 	.head.vblank_fini = gf119_disp_vblank_fini,
 	.head.scanoutpos = gf119_disp_root_scanoutpos,
 	.outp.internal.crt = nv50_dac_output_new,
 	.outp.internal.tmds = nv50_sor_output_new,
 	.outp.internal.lvds = nv50_sor_output_new,
-	.outp.internal.dp = gm204_sor_dp_new,
+	.outp.internal.dp = gm200_sor_dp_new,
 	.dac.nr = 3,
 	.dac.power = nv50_dac_power,
 	.dac.sense = nv50_dac_sense,
@@ -44,11 +44,11 @@ gm204_disp = {
 	.sor.power = nv50_sor_power,
 	.sor.hda_eld = gf119_hda_eld,
 	.sor.hdmi = gk104_hdmi_ctrl,
-	.sor.magic = gm204_sor_magic,
+	.sor.magic = gm200_sor_magic,
 };
 
 int
-gm204_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+gm200_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return gf119_disp_new_(&gm204_disp, device, index, pdisp);
+	return gf119_disp_new_(&gm200_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h
index 2590fec67ca9..07727198d7ce 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h
@@ -42,7 +42,7 @@ int nv50_pior_output_new(struct nvkm_disp *, int, struct dcb_output *,
 
 u32 g94_sor_dp_lane_map(struct nvkm_device *, u8 lane);
 
-void gm204_sor_magic(struct nvkm_output *outp);
+void gm200_sor_magic(struct nvkm_output *outp);
 
 #define OUTP_MSG(o,l,f,a...) do {                                              \
 	struct nvkm_output *_outp = (o);                                       \
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
index 731136d660b7..e9067ba4e179 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
@@ -63,6 +63,6 @@ int gf119_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
 		     struct nvkm_output **);
 int gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *, int, int, bool);
 
-int  gm204_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
+int  gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
 		      struct nvkm_output **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c
index 168bffe0643c..38f5ee1dfc58 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c
@@ -27,11 +27,11 @@
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
-gm204_disp_root = {
+gm200_disp_root = {
 	.init = gf119_disp_root_init,
 	.fini = gf119_disp_root_fini,
 	.dmac = {
-		&gm204_disp_core_oclass,
+		&gm200_disp_core_oclass,
 		&gk110_disp_base_oclass,
 		&gk104_disp_ovly_oclass,
 	},
@@ -42,17 +42,17 @@ gm204_disp_root = {
 };
 
 static int
-gm204_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+gm200_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
 		    void *data, u32 size, struct nvkm_object **pobject)
 {
-	return nv50_disp_root_new_(&gm204_disp_root, disp, oclass,
+	return nv50_disp_root_new_(&gm200_disp_root, disp, oclass,
 				   data, size, pobject);
 }
 
 const struct nvkm_disp_oclass
-gm204_disp_root_oclass = {
-	.base.oclass = GM204_DISP,
+gm200_disp_root_oclass = {
+	.base.oclass = GM200_DISP,
 	.base.minver = -1,
 	.base.maxver = -1,
-	.ctor = gm204_disp_root_new,
+	.ctor = gm200_disp_root_new,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
index 5b2c903ce9ee..cb449ed8d92c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
@@ -39,5 +39,5 @@ extern const struct nvkm_disp_oclass gf119_disp_root_oclass;
 extern const struct nvkm_disp_oclass gk104_disp_root_oclass;
 extern const struct nvkm_disp_oclass gk110_disp_root_oclass;
 extern const struct nvkm_disp_oclass gm107_disp_root_oclass;
-extern const struct nvkm_disp_oclass gm204_disp_root_oclass;
+extern const struct nvkm_disp_oclass gm200_disp_root_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
index 029e5f16c2a8..2cfbef9c344f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
@@ -27,19 +27,19 @@
 #include <subdev/timer.h>
 
 static inline u32
-gm204_sor_soff(struct nvkm_output_dp *outp)
+gm200_sor_soff(struct nvkm_output_dp *outp)
 {
 	return (ffs(outp->base.info.or) - 1) * 0x800;
 }
 
 static inline u32
-gm204_sor_loff(struct nvkm_output_dp *outp)
+gm200_sor_loff(struct nvkm_output_dp *outp)
 {
-	return gm204_sor_soff(outp) + !(outp->base.info.sorconf.link & 1) * 0x80;
+	return gm200_sor_soff(outp) + !(outp->base.info.sorconf.link & 1) * 0x80;
 }
 
 void
-gm204_sor_magic(struct nvkm_output *outp)
+gm200_sor_magic(struct nvkm_output *outp)
 {
 	struct nvkm_device *device = outp->disp->engine.subdev.device;
 	const u32 soff = outp->or * 0x100;
@@ -51,16 +51,16 @@ gm204_sor_magic(struct nvkm_output *outp)
 }
 
 static inline u32
-gm204_sor_dp_lane_map(struct nvkm_device *device, u8 lane)
+gm200_sor_dp_lane_map(struct nvkm_device *device, u8 lane)
 {
 	return lane * 0x08;
 }
 
 static int
-gm204_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
+gm200_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
 {
 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
-	const u32 soff = gm204_sor_soff(outp);
+	const u32 soff = gm200_sor_soff(outp);
 	const u32 data = 0x01010101 * pattern;
 	if (outp->base.info.sorconf.link & 1)
 		nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data);
@@ -70,15 +70,15 @@ gm204_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
 }
 
 static int
-gm204_sor_dp_lnk_pwr(struct nvkm_output_dp *outp, int nr)
+gm200_sor_dp_lnk_pwr(struct nvkm_output_dp *outp, int nr)
 {
 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
-	const u32 soff = gm204_sor_soff(outp);
-	const u32 loff = gm204_sor_loff(outp);
+	const u32 soff = gm200_sor_soff(outp);
+	const u32 loff = gm200_sor_loff(outp);
 	u32 mask = 0, i;
 
 	for (i = 0; i < nr; i++)
-		mask |= 1 << (gm204_sor_dp_lane_map(device, i) >> 3);
+		mask |= 1 << (gm200_sor_dp_lane_map(device, i) >> 3);
 
 	nvkm_mask(device, 0x61c130 + loff, 0x0000000f, mask);
 	nvkm_mask(device, 0x61c034 + soff, 0x80000000, 0x80000000);
@@ -90,13 +90,13 @@ gm204_sor_dp_lnk_pwr(struct nvkm_output_dp *outp, int nr)
 }
 
 static int
-gm204_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
+gm200_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
 		     int ln, int vs, int pe, int pc)
 {
 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
 	struct nvkm_bios *bios = device->bios;
-	const u32 shift = gm204_sor_dp_lane_map(device, ln);
-	const u32 loff = gm204_sor_loff(outp);
+	const u32 shift = gm200_sor_dp_lane_map(device, ln);
+	const u32 loff = gm200_sor_loff(outp);
 	u32 addr, data[4];
 	u8  ver, hdr, cnt, len;
 	struct nvbios_dpout info;
@@ -128,16 +128,16 @@ gm204_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
 }
 
 static const struct nvkm_output_dp_func
-gm204_sor_dp_func = {
-	.pattern = gm204_sor_dp_pattern,
-	.lnk_pwr = gm204_sor_dp_lnk_pwr,
+gm200_sor_dp_func = {
+	.pattern = gm200_sor_dp_pattern,
+	.lnk_pwr = gm200_sor_dp_lnk_pwr,
 	.lnk_ctl = gf119_sor_dp_lnk_ctl,
-	.drv_ctl = gm204_sor_dp_drv_ctl,
+	.drv_ctl = gm200_sor_dp_drv_ctl,
 };
 
 int
-gm204_sor_dp_new(struct nvkm_disp *disp, int index, struct dcb_output *dcbE,
+gm200_sor_dp_new(struct nvkm_disp *disp, int index, struct dcb_output *dcbE,
 		 struct nvkm_output **poutp)
 {
-	return nvkm_output_dp_new_(&gm204_sor_dp_func, disp, index, dcbE, poutp);
+	return nvkm_output_dp_new_(&gm200_sor_dp_func, disp, index, dcbE, poutp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
index 74993c144a84..65e5d291ecda 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -7,9 +7,11 @@ nvkm-y += nvkm/engine/fifo/nv50.o
 nvkm-y += nvkm/engine/fifo/g84.o
 nvkm-y += nvkm/engine/fifo/gf100.o
 nvkm-y += nvkm/engine/fifo/gk104.o
+nvkm-y += nvkm/engine/fifo/gk110.o
 nvkm-y += nvkm/engine/fifo/gk208.o
 nvkm-y += nvkm/engine/fifo/gk20a.o
-nvkm-y += nvkm/engine/fifo/gm204.o
+nvkm-y += nvkm/engine/fifo/gm107.o
+nvkm-y += nvkm/engine/fifo/gm200.o
 nvkm-y += nvkm/engine/fifo/gm20b.o
 
 nvkm-y += nvkm/engine/fifo/chan.o
@@ -27,4 +29,5 @@ nvkm-y += nvkm/engine/fifo/gpfifonv50.o
 nvkm-y += nvkm/engine/fifo/gpfifog84.o
 nvkm-y += nvkm/engine/fifo/gpfifogf100.o
 nvkm-y += nvkm/engine/fifo/gpfifogk104.o
-nvkm-y += nvkm/engine/fifo/gpfifogm204.o
+nvkm-y += nvkm/engine/fifo/gpfifogk110.o
+nvkm-y += nvkm/engine/fifo/gpfifogm200.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
index 97bdddb7644a..e06f4d46f802 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
@@ -7,7 +7,7 @@
 struct gk104_fifo_chan {
 	struct nvkm_fifo_chan base;
 	struct gk104_fifo *fifo;
-	int engine;
+	int runl;
 
 	struct list_head head;
 	bool killed;
@@ -25,5 +25,6 @@ int gk104_fifo_gpfifo_new(struct nvkm_fifo *, const struct nvkm_oclass *,
 			  void *data, u32 size, struct nvkm_object **);
 
 extern const struct nvkm_fifo_chan_oclass gk104_fifo_gpfifo_oclass;
-extern const struct nvkm_fifo_chan_oclass gm204_fifo_gpfifo_oclass;
+extern const struct nvkm_fifo_chan_oclass gk110_fifo_gpfifo_oclass;
+extern const struct nvkm_fifo_chan_oclass gm200_fifo_gpfifo_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
index 36a39c7fd8d2..352a0baec84d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
@@ -54,6 +54,7 @@ gf100_fifo_runlist_commit(struct gf100_fifo *fifo)
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_memory *cur;
 	int nr = 0;
+	int target;
 
 	mutex_lock(&subdev->mutex);
 	cur = fifo->runlist.mem[fifo->runlist.active];
@@ -67,7 +68,10 @@ gf100_fifo_runlist_commit(struct gf100_fifo *fifo)
 	}
 	nvkm_done(cur);
 
-	nvkm_wr32(device, 0x002270, nvkm_memory_addr(cur) >> 12);
+	target = (nvkm_memory_target(cur) == NVKM_MEM_TARGET_HOST) ? 0x3 : 0x0;
+
+	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(cur) >> 12) |
+				    (target << 28));
 	nvkm_wr32(device, 0x002274, 0x01f00000 | nr);
 
 	if (wait_event_timeout(fifo->runlist.wait,
@@ -130,9 +134,9 @@ gf100_fifo_engine(struct gf100_fifo *fifo, u32 engn)
 }
 
 static void
-gf100_fifo_recover_work(struct work_struct *work)
+gf100_fifo_recover_work(struct work_struct *w)
 {
-	struct gf100_fifo *fifo = container_of(work, typeof(*fifo), fault);
+	struct gf100_fifo *fifo = container_of(w, typeof(*fifo), recover.work);
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
 	struct nvkm_engine *engine;
 	unsigned long flags;
@@ -140,15 +144,15 @@ gf100_fifo_recover_work(struct work_struct *work)
 	u64 mask, todo;
 
 	spin_lock_irqsave(&fifo->base.lock, flags);
-	mask = fifo->mask;
-	fifo->mask = 0ULL;
+	mask = fifo->recover.mask;
+	fifo->recover.mask = 0ULL;
 	spin_unlock_irqrestore(&fifo->base.lock, flags);
 
-	for (todo = mask; engn = __ffs64(todo), todo; todo &= ~(1 << engn))
+	for (todo = mask; engn = __ffs64(todo), todo; todo &= ~BIT_ULL(engn))
 		engm |= 1 << gf100_fifo_engidx(fifo, engn);
 	nvkm_mask(device, 0x002630, engm, engm);
 
-	for (todo = mask; engn = __ffs64(todo), todo; todo &= ~(1 << engn)) {
+	for (todo = mask; engn = __ffs64(todo), todo; todo &= ~BIT_ULL(engn)) {
 		if ((engine = nvkm_device_engine(device, engn))) {
 			nvkm_subdev_fini(&engine->subdev, false);
 			WARN_ON(nvkm_subdev_init(&engine->subdev));
@@ -176,8 +180,8 @@ gf100_fifo_recover(struct gf100_fifo *fifo, struct nvkm_engine *engine,
 	list_del_init(&chan->head);
 	chan->killed = true;
 
-	fifo->mask |= 1ULL << engine->subdev.index;
-	schedule_work(&fifo->fault);
+	fifo->recover.mask |= 1ULL << engine->subdev.index;
+	schedule_work(&fifo->recover.work);
 }
 
 static const struct nvkm_enum
@@ -330,7 +334,7 @@ gf100_fifo_intr_fault(struct gf100_fifo *fifo, int unit)
 		snprintf(gpcid, sizeof(gpcid), "GPC%d/", gpc);
 	}
 
-	if (eu) {
+	if (eu && eu->data2) {
 		switch (eu->data2) {
 		case NVKM_SUBDEV_BAR:
 			nvkm_mask(device, 0x001704, 0x00000000, 0x00000000);
@@ -544,9 +548,16 @@ static int
 gf100_fifo_oneinit(struct nvkm_fifo *base)
 {
 	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
 	int ret;
 
+	/* Determine number of PBDMAs by checking valid enable bits. */
+	nvkm_wr32(device, 0x002204, 0xffffffff);
+	fifo->pbdma_nr = hweight32(nvkm_rd32(device, 0x002204));
+	nvkm_debug(subdev, "%d PBDMA(s)\n", fifo->pbdma_nr);
+
+
 	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000,
 			      false, &fifo->runlist.mem[0]);
 	if (ret)
@@ -576,25 +587,22 @@ static void
 gf100_fifo_fini(struct nvkm_fifo *base)
 {
 	struct gf100_fifo *fifo = gf100_fifo(base);
-	flush_work(&fifo->fault);
+	flush_work(&fifo->recover.work);
 }
 
 static void
 gf100_fifo_init(struct nvkm_fifo *base)
 {
 	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
+	struct nvkm_device *device = fifo->base.engine.subdev.device;
 	int i;
 
-	nvkm_wr32(device, 0x000204, 0xffffffff);
-	nvkm_wr32(device, 0x002204, 0xffffffff);
-
-	fifo->spoon_nr = hweight32(nvkm_rd32(device, 0x002204));
-	nvkm_debug(subdev, "%d PBDMA unit(s)\n", fifo->spoon_nr);
+	/* Enable PBDMAs. */
+	nvkm_wr32(device, 0x000204, (1 << fifo->pbdma_nr) - 1);
+	nvkm_wr32(device, 0x002204, (1 << fifo->pbdma_nr) - 1);
 
-	/* assign engines to PBDMAs */
-	if (fifo->spoon_nr >= 3) {
+	/* Assign engines to PBDMAs. */
+	if (fifo->pbdma_nr >= 3) {
 		nvkm_wr32(device, 0x002208, ~(1 << 0)); /* PGRAPH */
 		nvkm_wr32(device, 0x00220c, ~(1 << 1)); /* PVP */
 		nvkm_wr32(device, 0x002210, ~(1 << 1)); /* PMSPP */
@@ -604,7 +612,7 @@ gf100_fifo_init(struct nvkm_fifo *base)
 	}
 
 	/* PBDMA[n] */
-	for (i = 0; i < fifo->spoon_nr; i++) {
+	for (i = 0; i < fifo->pbdma_nr; i++) {
 		nvkm_mask(device, 0x04013c + (i * 0x2000), 0x10000100, 0x00000000);
 		nvkm_wr32(device, 0x040108 + (i * 0x2000), 0xffffffff); /* INTR */
 		nvkm_wr32(device, 0x04010c + (i * 0x2000), 0xfffffeff); /* INTREN */
@@ -652,7 +660,7 @@ gf100_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
 	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
 		return -ENOMEM;
 	INIT_LIST_HEAD(&fifo->chan);
-	INIT_WORK(&fifo->fault, gf100_fifo_recover_work);
+	INIT_WORK(&fifo->recover.work, gf100_fifo_recover_work);
 	*pfifo = &fifo->base;
 
 	return nvkm_fifo_ctor(&gf100_fifo, device, index, 128, &fifo->base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h
index 08c33c3ceaf7..70db58eab9c3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h
@@ -11,8 +11,12 @@ struct gf100_fifo {
 
 	struct list_head chan;
 
-	struct work_struct fault;
-	u64 mask;
+	struct {
+		struct work_struct work;
+		u64 mask;
+	} recover;
+
+	int pbdma_nr;
 
 	struct {
 		struct nvkm_memory *mem[2];
@@ -24,7 +28,6 @@ struct gf100_fifo {
 		struct nvkm_memory *mem;
 		struct nvkm_vma bar;
 	} user;
-	int spoon_nr;
 };
 
 void gf100_fifo_intr_engine(struct gf100_fifo *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 4fcd147d43c8..68acb36b3e6d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -47,34 +47,41 @@ gk104_fifo_uevent_init(struct nvkm_fifo *fifo)
 }
 
 void
-gk104_fifo_runlist_commit(struct gk104_fifo *fifo, u32 engine)
+gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
 {
-	struct gk104_fifo_engn *engn = &fifo->engine[engine];
 	struct gk104_fifo_chan *chan;
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_memory *cur;
+	struct nvkm_memory *mem;
 	int nr = 0;
+	int target;
 
 	mutex_lock(&subdev->mutex);
-	cur = engn->runlist[engn->cur_runlist];
-	engn->cur_runlist = !engn->cur_runlist;
+	mem = fifo->runlist[runl].mem[fifo->runlist[runl].next];
+	fifo->runlist[runl].next = !fifo->runlist[runl].next;
 
-	nvkm_kmap(cur);
-	list_for_each_entry(chan, &engn->chan, head) {
-		nvkm_wo32(cur, (nr * 8) + 0, chan->base.chid);
-		nvkm_wo32(cur, (nr * 8) + 4, 0x00000000);
+	nvkm_kmap(mem);
+	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
+		nvkm_wo32(mem, (nr * 8) + 0, chan->base.chid);
+		nvkm_wo32(mem, (nr * 8) + 4, 0x00000000);
 		nr++;
 	}
-	nvkm_done(cur);
-
-	nvkm_wr32(device, 0x002270, nvkm_memory_addr(cur) >> 12);
-	nvkm_wr32(device, 0x002274, (engine << 20) | nr);
-
-	if (wait_event_timeout(engn->wait, !(nvkm_rd32(device, 0x002284 +
-			       (engine * 0x08)) & 0x00100000),
-				msecs_to_jiffies(2000)) == 0)
-		nvkm_error(subdev, "runlist %d update timeout\n", engine);
+	nvkm_done(mem);
+
+	if (nvkm_memory_target(mem) == NVKM_MEM_TARGET_VRAM)
+		target = 0;
+	else
+		target = 3;
+
+	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
+				    (target << 28));
+	nvkm_wr32(device, 0x002274, (runl << 20) | nr);
+
+	if (wait_event_timeout(fifo->runlist[runl].wait,
+			       !(nvkm_rd32(device, 0x002284 + (runl * 0x08))
+				       & 0x00100000),
+			       msecs_to_jiffies(2000)) == 0)
+		nvkm_error(subdev, "runlist %d update timeout\n", runl);
 	mutex_unlock(&subdev->mutex);
 }
 
@@ -90,58 +97,51 @@ void
 gk104_fifo_runlist_insert(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan)
 {
 	mutex_lock(&fifo->base.engine.subdev.mutex);
-	list_add_tail(&chan->head, &fifo->engine[chan->engine].chan);
+	list_add_tail(&chan->head, &fifo->runlist[chan->runl].chan);
 	mutex_unlock(&fifo->base.engine.subdev.mutex);
 }
 
-static inline struct nvkm_engine *
-gk104_fifo_engine(struct gk104_fifo *fifo, u32 engn)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u64 subdevs = gk104_fifo_engine_subdev(engn);
-	if (subdevs)
-		return nvkm_device_engine(device, __ffs(subdevs));
-	return NULL;
-}
-
 static void
-gk104_fifo_recover_work(struct work_struct *work)
+gk104_fifo_recover_work(struct work_struct *w)
 {
-	struct gk104_fifo *fifo = container_of(work, typeof(*fifo), fault);
+	struct gk104_fifo *fifo = container_of(w, typeof(*fifo), recover.work);
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
 	struct nvkm_engine *engine;
 	unsigned long flags;
-	u32 engn, engm = 0;
-	u64 mask, todo;
+	u32 engm, runm, todo;
+	int engn, runl;
 
 	spin_lock_irqsave(&fifo->base.lock, flags);
-	mask = fifo->mask;
-	fifo->mask = 0ULL;
+	runm = fifo->recover.runm;
+	engm = fifo->recover.engm;
+	fifo->recover.engm = 0;
+	fifo->recover.runm = 0;
 	spin_unlock_irqrestore(&fifo->base.lock, flags);
 
-	for (todo = mask; engn = __ffs64(todo), todo; todo &= ~(1 << engn))
-		engm |= 1 << gk104_fifo_subdev_engine(engn);
-	nvkm_mask(device, 0x002630, engm, engm);
+	nvkm_mask(device, 0x002630, runm, runm);
 
-	for (todo = mask; engn = __ffs64(todo), todo; todo &= ~(1 << engn)) {
-		if ((engine = nvkm_device_engine(device, engn))) {
+	for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT(engn)) {
+		if ((engine = fifo->engine[engn].engine)) {
 			nvkm_subdev_fini(&engine->subdev, false);
 			WARN_ON(nvkm_subdev_init(&engine->subdev));
 		}
-		gk104_fifo_runlist_commit(fifo, gk104_fifo_subdev_engine(engn));
 	}
 
-	nvkm_wr32(device, 0x00262c, engm);
-	nvkm_mask(device, 0x002630, engm, 0x00000000);
+	for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl))
+		gk104_fifo_runlist_commit(fifo, runl);
+
+	nvkm_wr32(device, 0x00262c, runm);
+	nvkm_mask(device, 0x002630, runm, 0x00000000);
 }
 
 static void
 gk104_fifo_recover(struct gk104_fifo *fifo, struct nvkm_engine *engine,
-		  struct gk104_fifo_chan *chan)
+		   struct gk104_fifo_chan *chan)
 {
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 chid = chan->base.chid;
+	int engn;
 
 	nvkm_error(subdev, "%s engine fault on channel %d, recovering...\n",
 		   nvkm_subdev_name[engine->subdev.index], chid);
@@ -151,8 +151,15 @@ gk104_fifo_recover(struct gk104_fifo *fifo, struct nvkm_engine *engine,
 	list_del_init(&chan->head);
 	chan->killed = true;
 
-	fifo->mask |= 1ULL << engine->subdev.index;
-	schedule_work(&fifo->fault);
+	for (engn = 0; engn < fifo->engine_nr; engn++) {
+		if (fifo->engine[engn].engine == engine) {
+			fifo->recover.engm |= BIT(engn);
+			break;
+		}
+	}
+
+	fifo->recover.runm |= BIT(chan->runl);
+	schedule_work(&fifo->recover.work);
 }
 
 static const struct nvkm_enum
@@ -189,32 +196,31 @@ static void
 gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo)
 {
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_engine *engine;
 	struct gk104_fifo_chan *chan;
 	unsigned long flags;
 	u32 engn;
 
 	spin_lock_irqsave(&fifo->base.lock, flags);
-	for (engn = 0; engn < ARRAY_SIZE(fifo->engine); engn++) {
+	for (engn = 0; engn < fifo->engine_nr; engn++) {
+		struct nvkm_engine *engine = fifo->engine[engn].engine;
+		int runl = fifo->engine[engn].runl;
 		u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
 		u32 busy = (stat & 0x80000000);
-		u32 next = (stat & 0x07ff0000) >> 16;
+		u32 next = (stat & 0x0fff0000) >> 16;
 		u32 chsw = (stat & 0x00008000);
 		u32 save = (stat & 0x00004000);
 		u32 load = (stat & 0x00002000);
-		u32 prev = (stat & 0x000007ff);
+		u32 prev = (stat & 0x00000fff);
 		u32 chid = load ? next : prev;
 		(void)save;
 
-		if (busy && chsw) {
-			list_for_each_entry(chan, &fifo->engine[engn].chan, head) {
-				if (chan->base.chid == chid) {
-					engine = gk104_fifo_engine(fifo, engn);
-					if (!engine)
-						break;
-					gk104_fifo_recover(fifo, engine, chan);
-					break;
-				}
+		if (!busy || !chsw)
+			continue;
+
+		list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
+			if (chan->base.chid == chid && engine) {
+				gk104_fifo_recover(fifo, engine, chan);
+				break;
 			}
 		}
 	}
@@ -395,7 +401,7 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 		snprintf(gpcid, sizeof(gpcid), "GPC%d/", gpc);
 	}
 
-	if (eu) {
+	if (eu && eu->data2) {
 		switch (eu->data2) {
 		case NVKM_SUBDEV_BAR:
 			nvkm_mask(device, 0x001704, 0x00000000, 0x00000000);
@@ -484,9 +490,10 @@ gk104_fifo_intr_pbdma_0(struct gk104_fifo *fifo, int unit)
 			if (nvkm_sw_mthd(device->sw, chid, subc, mthd, data))
 				show &= ~0x00800000;
 		}
-		nvkm_wr32(device, 0x0400c0 + (unit * 0x2000), 0x80600008);
 	}
 
+	nvkm_wr32(device, 0x0400c0 + (unit * 0x2000), 0x80600008);
+
 	if (show) {
 		nvkm_snprintbf(msg, sizeof(msg), gk104_fifo_pbdma_intr_0, show);
 		chan = nvkm_fifo_chan_chid(&fifo->base, chid, &flags);
@@ -537,10 +544,10 @@ gk104_fifo_intr_runlist(struct gk104_fifo *fifo)
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
 	u32 mask = nvkm_rd32(device, 0x002a00);
 	while (mask) {
-		u32 engn = __ffs(mask);
-		wake_up(&fifo->engine[engn].wait);
-		nvkm_wr32(device, 0x002a00, 1 << engn);
-		mask &= ~(1 << engn);
+		int runl = __ffs(mask);
+		wake_up(&fifo->runlist[runl].wait);
+		nvkm_wr32(device, 0x002a00, 1 << runl);
+		mask &= ~(1 << runl);
 	}
 }
 
@@ -647,7 +654,7 @@ gk104_fifo_fini(struct nvkm_fifo *base)
 {
 	struct gk104_fifo *fifo = gk104_fifo(base);
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	flush_work(&fifo->fault);
+	flush_work(&fifo->recover.work);
 	/* allow mmu fault interrupts, even when we're not using fifo */
 	nvkm_mask(device, 0x002140, 0x10000000, 0x10000000);
 }
@@ -656,24 +663,122 @@ int
 gk104_fifo_oneinit(struct nvkm_fifo *base)
 {
 	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
 	int ret, i;
+	u32 *map;
+
+	/* Determine number of PBDMAs by checking valid enable bits. */
+	nvkm_wr32(device, 0x000204, 0xffffffff);
+	fifo->pbdma_nr = hweight32(nvkm_rd32(device, 0x000204));
+	nvkm_debug(subdev, "%d PBDMA(s)\n", fifo->pbdma_nr);
+
+	/* Read PBDMA->runlist(s) mapping from HW. */
+	if (!(map = kzalloc(sizeof(*map) * fifo->pbdma_nr, GFP_KERNEL)))
+		return -ENOMEM;
+
+	for (i = 0; i < fifo->pbdma_nr; i++)
+		map[i] = nvkm_rd32(device, 0x002390 + (i * 0x04));
+
+	/* Read device topology from HW. */
+	for (i = 0; i < 64; i++) {
+		int type = -1, pbid = -1, engidx = -1;
+		int engn = -1, runl = -1, intr = -1, mcen = -1;
+		int fault = -1, j;
+		u32 data, addr = 0;
+
+		do {
+			data = nvkm_rd32(device, 0x022700 + (i * 0x04));
+			nvkm_trace(subdev, "%02x: %08x\n", i, data);
+			switch (data & 0x00000003) {
+			case 0x00000000: /* NOT_VALID */
+				continue;
+			case 0x00000001: /* DATA */
+				addr  = (data & 0x00fff000);
+				fault = (data & 0x000000f8) >> 3;
+				break;
+			case 0x00000002: /* ENUM */
+				if (data & 0x00000020)
+					engn = (data & 0x3c000000) >> 26;
+				if (data & 0x00000010)
+					runl = (data & 0x01e00000) >> 21;
+				if (data & 0x00000008)
+					intr = (data & 0x000f8000) >> 15;
+				if (data & 0x00000004)
+					mcen = (data & 0x00003e00) >> 9;
+				break;
+			case 0x00000003: /* ENGINE_TYPE */
+				type = (data & 0x7ffffffc) >> 2;
+				break;
+			}
+		} while ((data & 0x80000000) && ++i < 64);
+
+		if (!data)
+			continue;
+
+		/* Determine which PBDMA handles requests for this engine. */
+		for (j = 0; runl >= 0 && j < fifo->pbdma_nr; j++) {
+			if (map[j] & (1 << runl)) {
+				pbid = j;
+				break;
+			}
+		}
+
+		/* Translate engine type to NVKM engine identifier. */
+		switch (type) {
+		case 0x00000000: engidx = NVKM_ENGINE_GR; break;
+		case 0x00000001: engidx = NVKM_ENGINE_CE0; break;
+		case 0x00000002: engidx = NVKM_ENGINE_CE1; break;
+		case 0x00000003: engidx = NVKM_ENGINE_CE2; break;
+		case 0x00000008: engidx = NVKM_ENGINE_MSPDEC; break;
+		case 0x00000009: engidx = NVKM_ENGINE_MSPPP; break;
+		case 0x0000000a: engidx = NVKM_ENGINE_MSVLD; break;
+		case 0x0000000b: engidx = NVKM_ENGINE_MSENC; break;
+		case 0x0000000c: engidx = NVKM_ENGINE_VIC; break;
+		case 0x0000000d: engidx = NVKM_ENGINE_SEC; break;
+		case 0x0000000e: engidx = NVKM_ENGINE_NVENC0; break;
+		case 0x0000000f: engidx = NVKM_ENGINE_NVENC1; break;
+		case 0x00000010: engidx = NVKM_ENGINE_NVDEC; break;
+			break;
+		default:
+			break;
+		}
+
+		nvkm_debug(subdev, "%02x (%8s): engine %2d runlist %2d "
+				   "pbdma %2d intr %2d reset %2d "
+				   "fault %2d addr %06x\n", type,
+			   engidx < 0 ? NULL : nvkm_subdev_name[engidx],
+			   engn, runl, pbid, intr, mcen, fault, addr);
+
+		/* Mark the engine as supported if everything checks out. */
+		if (engn >= 0 && runl >= 0) {
+			fifo->engine[engn].engine = engidx < 0 ? NULL :
+				nvkm_device_engine(device, engidx);
+			fifo->engine[engn].runl = runl;
+			fifo->engine[engn].pbid = pbid;
+			fifo->engine_nr = max(fifo->engine_nr, engn + 1);
+			fifo->runlist[runl].engm |= 1 << engn;
+			fifo->runlist_nr = max(fifo->runlist_nr, runl + 1);
+		}
+	}
 
-	for (i = 0; i < ARRAY_SIZE(fifo->engine); i++) {
+	kfree(map);
+
+	for (i = 0; i < fifo->runlist_nr; i++) {
 		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
 				      0x8000, 0x1000, false,
-				      &fifo->engine[i].runlist[0]);
+				      &fifo->runlist[i].mem[0]);
 		if (ret)
 			return ret;
 
 		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
 				      0x8000, 0x1000, false,
-				      &fifo->engine[i].runlist[1]);
+				      &fifo->runlist[i].mem[1]);
 		if (ret)
 			return ret;
 
-		init_waitqueue_head(&fifo->engine[i].wait);
-		INIT_LIST_HEAD(&fifo->engine[i].chan);
+		init_waitqueue_head(&fifo->runlist[i].wait);
+		INIT_LIST_HEAD(&fifo->runlist[i].chan);
 	}
 
 	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
@@ -695,24 +800,21 @@ void
 gk104_fifo_init(struct nvkm_fifo *base)
 {
 	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
+	struct nvkm_device *device = fifo->base.engine.subdev.device;
 	int i;
 
-	/* enable all available PBDMA units */
-	nvkm_wr32(device, 0x000204, 0xffffffff);
-	fifo->spoon_nr = hweight32(nvkm_rd32(device, 0x000204));
-	nvkm_debug(subdev, "%d PBDMA unit(s)\n", fifo->spoon_nr);
+	/* Enable PBDMAs. */
+	nvkm_wr32(device, 0x000204, (1 << fifo->pbdma_nr) - 1);
 
 	/* PBDMA[n] */
-	for (i = 0; i < fifo->spoon_nr; i++) {
+	for (i = 0; i < fifo->pbdma_nr; i++) {
 		nvkm_mask(device, 0x04013c + (i * 0x2000), 0x10000100, 0x00000000);
 		nvkm_wr32(device, 0x040108 + (i * 0x2000), 0xffffffff); /* INTR */
 		nvkm_wr32(device, 0x04010c + (i * 0x2000), 0xfffffeff); /* INTREN */
 	}
 
 	/* PBDMA[n].HCE */
-	for (i = 0; i < fifo->spoon_nr; i++) {
+	for (i = 0; i < fifo->pbdma_nr; i++) {
 		nvkm_wr32(device, 0x040148 + (i * 0x2000), 0xffffffff); /* INTR */
 		nvkm_wr32(device, 0x04014c + (i * 0x2000), 0xffffffff); /* INTREN */
 	}
@@ -732,9 +834,9 @@ gk104_fifo_dtor(struct nvkm_fifo *base)
 	nvkm_vm_put(&fifo->user.bar);
 	nvkm_memory_del(&fifo->user.mem);
 
-	for (i = 0; i < ARRAY_SIZE(fifo->engine); i++) {
-		nvkm_memory_del(&fifo->engine[i].runlist[1]);
-		nvkm_memory_del(&fifo->engine[i].runlist[0]);
+	for (i = 0; i < fifo->runlist_nr; i++) {
+		nvkm_memory_del(&fifo->runlist[i].mem[1]);
+		nvkm_memory_del(&fifo->runlist[i].mem[0]);
 	}
 
 	return fifo;
@@ -748,7 +850,7 @@ gk104_fifo_new_(const struct nvkm_fifo_func *func, struct nvkm_device *device,
 
 	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
 		return -ENOMEM;
-	INIT_WORK(&fifo->fault, gk104_fifo_recover_work);
+	INIT_WORK(&fifo->recover.work, gk104_fifo_recover_work);
 	*pfifo = &fifo->base;
 
 	return nvkm_fifo_ctor(func, device, index, nr, &fifo->base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
index bec519d8f91e..9e5d00ba34a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
@@ -6,25 +6,37 @@
 #include <subdev/mmu.h>
 
 struct gk104_fifo_chan;
-struct gk104_fifo_engn {
-	struct nvkm_memory *runlist[2];
-	int cur_runlist;
-	wait_queue_head_t wait;
-	struct list_head chan;
-};
-
 struct gk104_fifo {
 	struct nvkm_fifo base;
 
-	struct work_struct fault;
-	u64 mask;
+	struct {
+		struct work_struct work;
+		u32 engm;
+		u32 runm;
+	} recover;
+
+	int pbdma_nr;
+
+	struct {
+		struct nvkm_engine *engine;
+		int runl;
+		int pbid;
+	} engine[16];
+	int engine_nr;
+
+	struct {
+		struct nvkm_memory *mem[2];
+		int next;
+		wait_queue_head_t wait;
+		struct list_head chan;
+		u32 engm;
+	} runlist[16];
+	int runlist_nr;
 
-	struct gk104_fifo_engn engine[7];
 	struct {
 		struct nvkm_memory *mem;
 		struct nvkm_vma bar;
 	} user;
-	int spoon_nr;
 };
 
 int gk104_fifo_new_(const struct nvkm_fifo_func *, struct nvkm_device *,
@@ -38,7 +50,7 @@ void gk104_fifo_uevent_init(struct nvkm_fifo *);
 void gk104_fifo_uevent_fini(struct nvkm_fifo *);
 void gk104_fifo_runlist_insert(struct gk104_fifo *, struct gk104_fifo_chan *);
 void gk104_fifo_runlist_remove(struct gk104_fifo *, struct gk104_fifo_chan *);
-void gk104_fifo_runlist_commit(struct gk104_fifo *, u32 engine);
+void gk104_fifo_runlist_commit(struct gk104_fifo *, int runl);
 
 static inline u64
 gk104_fifo_engine_subdev(int engine)
@@ -58,23 +70,4 @@ gk104_fifo_engine_subdev(int engine)
 		return 0;
 	}
 }
-
-static inline int
-gk104_fifo_subdev_engine(int subdev)
-{
-	switch (subdev) {
-	case NVKM_ENGINE_GR:
-	case NVKM_ENGINE_SW:
-	case NVKM_ENGINE_CE2   : return 0;
-	case NVKM_ENGINE_MSPDEC: return 1;
-	case NVKM_ENGINE_MSPPP : return 2;
-	case NVKM_ENGINE_MSVLD : return 3;
-	case NVKM_ENGINE_CE0   : return 4;
-	case NVKM_ENGINE_CE1   : return 5;
-	case NVKM_ENGINE_MSENC : return 6;
-	default:
-		WARN_ON(1);
-		return 0;
-	}
-}
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
new file mode 100644
index 000000000000..41307fcd4bb3
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "gk104.h"
+#include "changk104.h"
+
+static const struct nvkm_fifo_func
+gk110_fifo = {
+	.dtor = gk104_fifo_dtor,
+	.oneinit = gk104_fifo_oneinit,
+	.init = gk104_fifo_init,
+	.fini = gk104_fifo_fini,
+	.intr = gk104_fifo_intr,
+	.uevent_init = gk104_fifo_uevent_init,
+	.uevent_fini = gk104_fifo_uevent_fini,
+	.chan = {
+		&gk110_fifo_gpfifo_oclass,
+		NULL
+	},
+};
+
+int
+gk110_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
+{
+	return gk104_fifo_new_(&gk110_fifo, device, index, 4096, pfifo);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
new file mode 100644
index 000000000000..6d59d65794a1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "gk104.h"
+#include "changk104.h"
+
+static const struct nvkm_fifo_func
+gm107_fifo = {
+	.dtor = gk104_fifo_dtor,
+	.oneinit = gk104_fifo_oneinit,
+	.init = gk104_fifo_init,
+	.fini = gk104_fifo_fini,
+	.intr = gk104_fifo_intr,
+	.uevent_init = gk104_fifo_uevent_init,
+	.uevent_fini = gk104_fifo_uevent_fini,
+	.chan = {
+		&gk110_fifo_gpfifo_oclass,
+		NULL
+	},
+};
+
+int
+gm107_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
+{
+	return gk104_fifo_new_(&gm107_fifo, device, index, 2048, pfifo);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
index 2db629f1bf7e..4bdd43078df9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
@@ -25,7 +25,7 @@
 #include "changk104.h"
 
 static const struct nvkm_fifo_func
-gm204_fifo = {
+gm200_fifo = {
 	.dtor = gk104_fifo_dtor,
 	.oneinit = gk104_fifo_oneinit,
 	.init = gk104_fifo_init,
@@ -34,13 +34,13 @@ gm204_fifo = {
 	.uevent_init = gk104_fifo_uevent_init,
 	.uevent_fini = gk104_fifo_uevent_fini,
 	.chan = {
-		&gm204_fifo_gpfifo_oclass,
+		&gm200_fifo_gpfifo_oclass,
 		NULL
 	},
 };
 
 int
-gm204_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
+gm200_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gm204_fifo, device, index, 4096, pfifo);
+	return gk104_fifo_new_(&gm200_fifo, device, index, 4096, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
index ae6375d9760f..4c91d4aa1e9e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
@@ -32,7 +32,7 @@ gm20b_fifo = {
 	.uevent_init = gk104_fifo_uevent_init,
 	.uevent_fini = gk104_fifo_uevent_fini,
 	.chan = {
-		&gm204_fifo_gpfifo_oclass,
+		&gm200_fifo_gpfifo_oclass,
 		NULL
 	},
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
index 2e1df01bd928..ed4351032ed6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
@@ -63,9 +63,15 @@ gk104_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
 	case NVKM_ENGINE_CE1   :
 	case NVKM_ENGINE_CE2   : return 0x0000;
 	case NVKM_ENGINE_GR    : return 0x0210;
+	case NVKM_ENGINE_SEC   : return 0x0220;
 	case NVKM_ENGINE_MSPDEC: return 0x0250;
 	case NVKM_ENGINE_MSPPP : return 0x0260;
 	case NVKM_ENGINE_MSVLD : return 0x0270;
+	case NVKM_ENGINE_VIC   : return 0x0280;
+	case NVKM_ENGINE_MSENC : return 0x0290;
+	case NVKM_ENGINE_NVDEC : return 0x02100270;
+	case NVKM_ENGINE_NVENC0: return 0x02100290;
+	case NVKM_ENGINE_NVENC1: return 0x0210;
 	default:
 		WARN_ON(1);
 		return 0;
@@ -76,9 +82,9 @@ static int
 gk104_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine, bool suspend)
 {
-	const u32 offset = gk104_fifo_gpfifo_engine_addr(engine);
 	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
 	struct nvkm_gpuobj *inst = chan->base.inst;
+	u32 offset = gk104_fifo_gpfifo_engine_addr(engine);
 	int ret;
 
 	ret = gk104_fifo_gpfifo_kick(chan);
@@ -87,8 +93,12 @@ gk104_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
 
 	if (offset) {
 		nvkm_kmap(inst);
-		nvkm_wo32(inst, offset + 0x00, 0x00000000);
-		nvkm_wo32(inst, offset + 0x04, 0x00000000);
+		nvkm_wo32(inst, (offset & 0xffff) + 0x00, 0x00000000);
+		nvkm_wo32(inst, (offset & 0xffff) + 0x04, 0x00000000);
+		if ((offset >>= 16)) {
+			nvkm_wo32(inst, offset + 0x00, 0x00000000);
+			nvkm_wo32(inst, offset + 0x04, 0x00000000);
+		}
 		nvkm_done(inst);
 	}
 
@@ -99,15 +109,21 @@ static int
 gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine)
 {
-	const u32 offset = gk104_fifo_gpfifo_engine_addr(engine);
 	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
 	struct nvkm_gpuobj *inst = chan->base.inst;
+	u32 offset = gk104_fifo_gpfifo_engine_addr(engine);
 
 	if (offset) {
-		u64 addr = chan->engn[engine->subdev.index].vma.offset;
+		u64   addr = chan->engn[engine->subdev.index].vma.offset;
+		u32 datalo = lower_32_bits(addr) | 0x00000004;
+		u32 datahi = upper_32_bits(addr);
 		nvkm_kmap(inst);
-		nvkm_wo32(inst, offset + 0x00, lower_32_bits(addr) | 4);
-		nvkm_wo32(inst, offset + 0x04, upper_32_bits(addr));
+		nvkm_wo32(inst, (offset & 0xffff) + 0x00, datalo);
+		nvkm_wo32(inst, (offset & 0xffff) + 0x04, datahi);
+		if ((offset >>= 16)) {
+			nvkm_wo32(inst, offset + 0x00, datalo);
+			nvkm_wo32(inst, offset + 0x04, datahi);
+		}
 		nvkm_done(inst);
 	}
 
@@ -154,7 +170,8 @@ gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
 	if (!list_empty(&chan->head)) {
 		gk104_fifo_runlist_remove(fifo, chan);
 		nvkm_mask(device, 0x800004 + coff, 0x00000800, 0x00000800);
-		gk104_fifo_runlist_commit(fifo, chan->engine);
+		gk104_fifo_gpfifo_kick(chan);
+		gk104_fifo_runlist_commit(fifo, chan->runl);
 	}
 
 	nvkm_wr32(device, 0x800000 + coff, 0x00000000);
@@ -169,13 +186,13 @@ gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
 	u32 addr = chan->base.inst->addr >> 12;
 	u32 coff = chan->base.chid * 8;
 
-	nvkm_mask(device, 0x800004 + coff, 0x000f0000, chan->engine << 16);
+	nvkm_mask(device, 0x800004 + coff, 0x000f0000, chan->runl << 16);
 	nvkm_wr32(device, 0x800000 + coff, 0x80000000 | addr);
 
 	if (list_empty(&chan->head) && !chan->killed) {
 		gk104_fifo_runlist_insert(fifo, chan);
 		nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
-		gk104_fifo_runlist_commit(fifo, chan->engine);
+		gk104_fifo_runlist_commit(fifo, chan->runl);
 		nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
 	}
 }
@@ -201,73 +218,79 @@ gk104_fifo_gpfifo_func = {
 	.engine_fini = gk104_fifo_gpfifo_engine_fini,
 };
 
-int
-gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		      void *data, u32 size, struct nvkm_object **pobject)
+struct gk104_fifo_chan_func {
+	u32 engine;
+	u64 subdev;
+};
+
+static int
+gk104_fifo_gpfifo_new_(const struct gk104_fifo_chan_func *func,
+		       struct gk104_fifo *fifo, u32 *engmask, u16 *chid,
+		       u64 vm, u64 ioffset, u64 ilength,
+		       const struct nvkm_oclass *oclass,
+		       struct nvkm_object **pobject)
 {
-	union {
-		struct kepler_channel_gpfifo_a_v0 v0;
-	} *args = data;
-	struct gk104_fifo *fifo = gk104_fifo(base);
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_object *parent = oclass->parent;
 	struct gk104_fifo_chan *chan;
-	u64 usermem, ioffset, ilength;
-	u32 engines;
-	int ret = -ENOSYS, i;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vm %llx "
-				   "ioffset %016llx ilength %08x engine %08x\n",
-			   args->v0.version, args->v0.vm, args->v0.ioffset,
-			   args->v0.ilength, args->v0.engine);
-	} else
-		return ret;
-
-	/* determine which downstream engines are present */
-	for (i = 0, engines = 0; i < ARRAY_SIZE(fifo->engine); i++) {
-		u64 subdevs = gk104_fifo_engine_subdev(i);
-		if (!nvkm_device_engine(device, __ffs64(subdevs)))
-			continue;
-		engines |= (1 << i);
+	int runlist = -1, ret = -ENOSYS, i, j;
+	u32 engines = 0, present = 0;
+	u64 subdevs = 0;
+	u64 usermem;
+
+	/* Determine which downstream engines are present */
+	for (i = 0; i < fifo->engine_nr; i++) {
+		struct nvkm_engine *engine = fifo->engine[i].engine;
+		if (engine) {
+			u64 submask = BIT_ULL(engine->subdev.index);
+			for (j = 0; func[j].subdev; j++) {
+				if (func[j].subdev & submask) {
+					present |= func[j].engine;
+					break;
+				}
+			}
+
+			if (!func[j].subdev)
+				continue;
+
+			if (runlist < 0 && (*engmask & present))
+				runlist = fifo->engine[i].runl;
+			if (runlist == fifo->engine[i].runl) {
+				engines |= func[j].engine;
+				subdevs |= func[j].subdev;
+			}
+		}
 	}
 
-	/* if this is an engine mask query, we're done */
-	if (!args->v0.engine) {
-		args->v0.engine = engines;
+	/* Just an engine mask query?  All done here! */
+	if (!*engmask) {
+		*engmask = present;
 		return nvkm_object_new(oclass, NULL, 0, pobject);
 	}
 
-	/* check that we support a requested engine - note that the user
-	 * argument is a mask in order to allow the user to request (for
-	 * example) *any* copy engine, but doesn't matter which.
-	 */
-	args->v0.engine &= engines;
-	if (!args->v0.engine) {
-		nvif_ioctl(parent, "no supported engine\n");
+	/* No runlist?  No supported engines. */
+	*engmask = present;
+	if (runlist < 0)
 		return -ENODEV;
-	}
+	*engmask = engines;
 
-	/* allocate the channel */
+	/* Allocate the channel. */
 	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
 		return -ENOMEM;
 	*pobject = &chan->base.object;
 	chan->fifo = fifo;
-	chan->engine = __ffs(args->v0.engine);
+	chan->runl = runlist;
 	INIT_LIST_HEAD(&chan->head);
 
 	ret = nvkm_fifo_chan_ctor(&gk104_fifo_gpfifo_func, &fifo->base,
-				  0x1000, 0x1000, true, args->v0.vm, 0,
-				  gk104_fifo_engine_subdev(chan->engine),
+				  0x1000, 0x1000, true, vm, 0, subdevs,
 				  1, fifo->user.bar.offset, 0x200,
 				  oclass, &chan->base);
 	if (ret)
 		return ret;
 
-	args->v0.chid = chan->base.chid;
+	*chid = chan->base.chid;
 
-	/* page directory */
+	/* Page directory. */
 	ret = nvkm_gpuobj_new(device, 0x10000, 0x1000, false, NULL, &chan->pgd);
 	if (ret)
 		return ret;
@@ -283,10 +306,9 @@ gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
 	if (ret)
 		return ret;
 
-	/* clear channel control registers */
+	/* Clear channel control registers. */
 	usermem = chan->base.chid * 0x200;
-	ioffset = args->v0.ioffset;
-	ilength = order_base_2(args->v0.ilength / 8);
+	ilength = order_base_2(ilength / 8);
 
 	nvkm_kmap(fifo->user.mem);
 	for (i = 0; i < 0x200; i += 4)
@@ -315,6 +337,56 @@ gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
 	return 0;
 }
 
+static const struct gk104_fifo_chan_func
+gk104_fifo_gpfifo[] = {
+	{ NVA06F_V0_ENGINE_SW | NVA06F_V0_ENGINE_GR,
+		BIT_ULL(NVKM_ENGINE_SW) | BIT_ULL(NVKM_ENGINE_GR)
+	},
+	{ NVA06F_V0_ENGINE_SEC   , BIT_ULL(NVKM_ENGINE_SEC   ) },
+	{ NVA06F_V0_ENGINE_MSVLD , BIT_ULL(NVKM_ENGINE_MSVLD ) },
+	{ NVA06F_V0_ENGINE_MSPDEC, BIT_ULL(NVKM_ENGINE_MSPDEC) },
+	{ NVA06F_V0_ENGINE_MSPPP , BIT_ULL(NVKM_ENGINE_MSPPP ) },
+	{ NVA06F_V0_ENGINE_MSENC , BIT_ULL(NVKM_ENGINE_MSENC ) },
+	{ NVA06F_V0_ENGINE_VIC   , BIT_ULL(NVKM_ENGINE_VIC   ) },
+	{ NVA06F_V0_ENGINE_NVDEC , BIT_ULL(NVKM_ENGINE_NVDEC ) },
+	{ NVA06F_V0_ENGINE_NVENC0, BIT_ULL(NVKM_ENGINE_NVENC0) },
+	{ NVA06F_V0_ENGINE_NVENC1, BIT_ULL(NVKM_ENGINE_NVENC1) },
+	{ NVA06F_V0_ENGINE_CE0   , BIT_ULL(NVKM_ENGINE_CE0   ) },
+	{ NVA06F_V0_ENGINE_CE1   , BIT_ULL(NVKM_ENGINE_CE1   ) },
+	{ NVA06F_V0_ENGINE_CE2   , BIT_ULL(NVKM_ENGINE_CE2   ) },
+	{}
+};
+
+int
+gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
+		      void *data, u32 size, struct nvkm_object **pobject)
+{
+	struct nvkm_object *parent = oclass->parent;
+	union {
+		struct kepler_channel_gpfifo_a_v0 v0;
+	} *args = data;
+	struct gk104_fifo *fifo = gk104_fifo(base);
+	int ret = -ENOSYS;
+
+	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
+	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
+		nvif_ioctl(parent, "create channel gpfifo vers %d vm %llx "
+				   "ioffset %016llx ilength %08x engine %08x\n",
+			   args->v0.version, args->v0.vm, args->v0.ioffset,
+			   args->v0.ilength, args->v0.engines);
+		return gk104_fifo_gpfifo_new_(gk104_fifo_gpfifo, fifo,
+					      &args->v0.engines,
+					      &args->v0.chid,
+					       args->v0.vm,
+					       args->v0.ioffset,
+					       args->v0.ilength,
+					      oclass, pobject);
+
+	}
+
+	return ret;
+}
+
 const struct nvkm_fifo_chan_oclass
 gk104_fifo_gpfifo_oclass = {
 	.base.oclass = KEPLER_CHANNEL_GPFIFO_A,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk110.c
new file mode 100644
index 000000000000..a9aa69c82e8e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk110.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "changk104.h"
+
+#include <nvif/class.h>
+
+const struct nvkm_fifo_chan_oclass
+gk110_fifo_gpfifo_oclass = {
+	.base.oclass = KEPLER_CHANNEL_GPFIFO_B,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = gk104_fifo_gpfifo_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogm200.c
index 6511d6e21ecc..a13315147391 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogm200.c
@@ -26,7 +26,7 @@
 #include <nvif/class.h>
 
 const struct nvkm_fifo_chan_oclass
-gm204_fifo_gpfifo_oclass = {
+gm200_fifo_gpfifo_oclass = {
 	.base.oclass = MAXWELL_CHANNEL_GPFIFO_A,
 	.base.minver = 0,
 	.base.maxver = 0,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
index 9ad0d0e78a96..290ed0db8047 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
@@ -29,8 +29,7 @@ nvkm-y += nvkm/engine/gr/gk110b.o
 nvkm-y += nvkm/engine/gr/gk208.o
 nvkm-y += nvkm/engine/gr/gk20a.o
 nvkm-y += nvkm/engine/gr/gm107.o
-nvkm-y += nvkm/engine/gr/gm204.o
-nvkm-y += nvkm/engine/gr/gm206.o
+nvkm-y += nvkm/engine/gr/gm200.o
 nvkm-y += nvkm/engine/gr/gm20b.o
 
 nvkm-y += nvkm/engine/gr/ctxnv40.o
@@ -47,6 +46,5 @@ nvkm-y += nvkm/engine/gr/ctxgk110b.o
 nvkm-y += nvkm/engine/gr/ctxgk208.o
 nvkm-y += nvkm/engine/gr/ctxgk20a.o
 nvkm-y += nvkm/engine/gr/ctxgm107.o
-nvkm-y += nvkm/engine/gr/ctxgm204.o
-nvkm-y += nvkm/engine/gr/ctxgm206.o
+nvkm-y += nvkm/engine/gr/ctxgm200.o
 nvkm-y += nvkm/engine/gr/ctxgm20b.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index 3c64040ec5a2..3c8673958f22 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -97,12 +97,11 @@ void gm107_grctx_generate_bundle(struct gf100_grctx *);
 void gm107_grctx_generate_pagepool(struct gf100_grctx *);
 void gm107_grctx_generate_attrib(struct gf100_grctx *);
 
-extern const struct gf100_grctx_func gm204_grctx;
-void gm204_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
-void gm204_grctx_generate_tpcid(struct gf100_gr *);
-void gm204_grctx_generate_405b60(struct gf100_gr *);
+extern const struct gf100_grctx_func gm200_grctx;
+void gm200_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
+void gm200_grctx_generate_tpcid(struct gf100_gr *);
+void gm200_grctx_generate_405b60(struct gf100_gr *);
 
-extern const struct gf100_grctx_func gm206_grctx;
 extern const struct gf100_grctx_func gm20b_grctx;
 
 /* context init value lists */
@@ -210,19 +209,4 @@ extern const struct gf100_gr_init gk208_grctx_init_crstr_0[];
 
 extern const struct gf100_gr_init gm107_grctx_init_gpc_unk_0[];
 extern const struct gf100_gr_init gm107_grctx_init_wwdx_0[];
-
-extern const struct gf100_gr_pack gm204_grctx_pack_icmd[];
-
-extern const struct gf100_gr_pack gm204_grctx_pack_mthd[];
-
-extern const struct gf100_gr_pack gm204_grctx_pack_hub[];
-
-extern const struct gf100_gr_init gm204_grctx_init_prop_0[];
-extern const struct gf100_gr_init gm204_grctx_init_setup_0[];
-extern const struct gf100_gr_init gm204_grctx_init_gpm_0[];
-extern const struct gf100_gr_init gm204_grctx_init_gpc_unk_2[];
-
-extern const struct gf100_gr_pack gm204_grctx_pack_tpc[];
-
-extern const struct gf100_gr_pack gm204_grctx_pack_ppc[];
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
new file mode 100644
index 000000000000..e586699fc43f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "ctxgf100.h"
+
+/*******************************************************************************
+ * PGRAPH context implementation
+ ******************************************************************************/
+
+void
+gm200_grctx_generate_tpcid(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int gpc, tpc, id;
+
+	for (tpc = 0, id = 0; tpc < 4; tpc++) {
+		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+			if (tpc < gr->tpc_nr[gpc]) {
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
+				nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
+				id++;
+			}
+		}
+	}
+}
+
+static void
+gm200_grctx_generate_rop_active_fbps(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 fbp_count = nvkm_rd32(device, 0x12006c);
+	nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */
+	nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
+}
+
+void
+gm200_grctx_generate_405b60(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
+	u32 dist[TPC_MAX / 4] = {};
+	u32 gpcs[GPC_MAX] = {};
+	u8  tpcnr[GPC_MAX];
+	int tpc, gpc, i;
+
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+
+	/* won't result in the same distribution as the binary driver where
+	 * some of the gpcs have more tpcs than others, but this shall do
+	 * for the moment.  the code for earlier gpus has this issue too.
+	 */
+	for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
+		do {
+			gpc = (gpc + 1) % gr->gpc_nr;
+		} while(!tpcnr[gpc]);
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
+
+		dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
+		gpcs[gpc] |= i << (tpc * 8);
+	}
+
+	for (i = 0; i < dist_nr; i++)
+		nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]);
+	for (i = 0; i < gr->gpc_nr; i++)
+		nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
+}
+
+void
+gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	u32 tmp;
+	int i;
+
+	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
+
+	nvkm_wr32(device, 0x404154, 0x00000000);
+
+	grctx->bundle(info);
+	grctx->pagepool(info);
+	grctx->attrib(info);
+	grctx->unkn(gr);
+
+	gm200_grctx_generate_tpcid(gr);
+	gf100_grctx_generate_r406028(gr);
+	gk104_grctx_generate_r418bb8(gr);
+
+	for (i = 0; i < 8; i++)
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+	nvkm_wr32(device, 0x406500, 0x00000000);
+
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+
+	gm200_grctx_generate_rop_active_fbps(gr);
+
+	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
+		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
+	nvkm_wr32(device, 0x4041c4, tmp);
+
+	gm200_grctx_generate_405b60(gr);
+
+	gf100_gr_icmd(gr, gr->fuc_bundle);
+	nvkm_wr32(device, 0x404154, 0x00000800);
+	gf100_gr_mthd(gr, gr->fuc_method);
+
+	nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000);
+	nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000);
+}
+
+const struct gf100_grctx_func
+gm200_grctx = {
+	.main  = gm200_grctx_generate_main,
+	.unkn  = gk104_grctx_generate_unkn,
+	.bundle = gm107_grctx_generate_bundle,
+	.bundle_size = 0x3000,
+	.bundle_min_gpm_fifo_depth = 0x180,
+	.bundle_token_limit = 0x780,
+	.pagepool = gm107_grctx_generate_pagepool,
+	.pagepool_size = 0x20000,
+	.attrib = gm107_grctx_generate_attrib,
+	.attrib_nr_max = 0x600,
+	.attrib_nr = 0x400,
+	.alpha_nr_max = 0x1800,
+	.alpha_nr = 0x1000,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c
deleted file mode 100644
index 170cbfdbe1ae..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c
+++ /dev/null
@@ -1,1049 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "ctxgf100.h"
-
-/*******************************************************************************
- * PGRAPH context register lists
- ******************************************************************************/
-
-static const struct gf100_gr_init
-gm204_grctx_init_icmd_0[] = {
-	{ 0x001000,   1, 0x01, 0x00000002 },
-	{ 0x0006aa,   1, 0x01, 0x00000001 },
-	{ 0x0006ad,   2, 0x01, 0x00000100 },
-	{ 0x0006b1,   1, 0x01, 0x00000011 },
-	{ 0x00078c,   1, 0x01, 0x00000008 },
-	{ 0x000792,   1, 0x01, 0x00000001 },
-	{ 0x000794,   3, 0x01, 0x00000001 },
-	{ 0x000797,   1, 0x01, 0x000000cf },
-	{ 0x00079a,   1, 0x01, 0x00000002 },
-	{ 0x0007a1,   1, 0x01, 0x00000001 },
-	{ 0x0007a3,   3, 0x01, 0x00000001 },
-	{ 0x000831,   1, 0x01, 0x00000004 },
-	{ 0x01e100,   1, 0x01, 0x00000001 },
-	{ 0x001000,   1, 0x01, 0x00000008 },
-	{ 0x000039,   3, 0x01, 0x00000000 },
-	{ 0x000380,   1, 0x01, 0x00000001 },
-	{ 0x000366,   2, 0x01, 0x00000000 },
-	{ 0x000368,   1, 0x01, 0x00000fff },
-	{ 0x000370,   2, 0x01, 0x00000000 },
-	{ 0x000372,   1, 0x01, 0x000fffff },
-	{ 0x000374,   1, 0x01, 0x00000100 },
-	{ 0x000818,   8, 0x01, 0x00000000 },
-	{ 0x000848,  16, 0x01, 0x00000000 },
-	{ 0x000738,   1, 0x01, 0x00000000 },
-	{ 0x000b07,   1, 0x01, 0x00000002 },
-	{ 0x000b08,   2, 0x01, 0x00000100 },
-	{ 0x000b0a,   1, 0x01, 0x00000001 },
-	{ 0x000a04,   1, 0x01, 0x000000ff },
-	{ 0x000a0b,   1, 0x01, 0x00000040 },
-	{ 0x00097f,   1, 0x01, 0x00000100 },
-	{ 0x000a02,   1, 0x01, 0x00000001 },
-	{ 0x000809,   1, 0x01, 0x00000007 },
-	{ 0x00c221,   1, 0x01, 0x00000040 },
-	{ 0x00c401,   1, 0x01, 0x00000001 },
-	{ 0x00c402,   1, 0x01, 0x00010001 },
-	{ 0x00c403,   2, 0x01, 0x00000001 },
-	{ 0x00c40e,   1, 0x01, 0x00000020 },
-	{ 0x01e100,   1, 0x01, 0x00000001 },
-	{ 0x001000,   1, 0x01, 0x00000001 },
-	{ 0x000b07,   1, 0x01, 0x00000002 },
-	{ 0x000b08,   2, 0x01, 0x00000100 },
-	{ 0x000b0a,   1, 0x01, 0x00000001 },
-	{ 0x01e100,   1, 0x01, 0x00000001 },
-	{ 0x001000,   1, 0x01, 0x00000004 },
-	{ 0x000039,   3, 0x01, 0x00000000 },
-	{ 0x0000a9,   1, 0x01, 0x0000ffff },
-	{ 0x000038,   1, 0x01, 0x0fac6881 },
-	{ 0x00003d,   1, 0x01, 0x00000001 },
-	{ 0x0000e8,   8, 0x01, 0x00000400 },
-	{ 0x000078,   8, 0x01, 0x00000300 },
-	{ 0x000050,   1, 0x01, 0x00000011 },
-	{ 0x000058,   8, 0x01, 0x00000008 },
-	{ 0x000208,   8, 0x01, 0x00000001 },
-	{ 0x000081,   1, 0x01, 0x00000001 },
-	{ 0x000085,   1, 0x01, 0x00000004 },
-	{ 0x000088,   1, 0x01, 0x00000400 },
-	{ 0x000090,   1, 0x01, 0x00000300 },
-	{ 0x000098,   1, 0x01, 0x00001001 },
-	{ 0x0000e3,   1, 0x01, 0x00000001 },
-	{ 0x0000da,   1, 0x01, 0x00000001 },
-	{ 0x0000b4,   4, 0x01, 0x88888888 },
-	{ 0x0000f8,   1, 0x01, 0x00000003 },
-	{ 0x0000fa,   1, 0x01, 0x00000001 },
-	{ 0x0000b1,   2, 0x01, 0x00000001 },
-	{ 0x00009f,   4, 0x01, 0x0000ffff },
-	{ 0x0000a8,   1, 0x01, 0x0000ffff },
-	{ 0x0000ad,   1, 0x01, 0x0000013e },
-	{ 0x0000e1,   1, 0x01, 0x00000010 },
-	{ 0x000290,  16, 0x01, 0x00000000 },
-	{ 0x0003b0,  16, 0x01, 0x00000000 },
-	{ 0x0002a0,  16, 0x01, 0x00000000 },
-	{ 0x000420,  16, 0x01, 0x00000000 },
-	{ 0x0002b0,  16, 0x01, 0x00000000 },
-	{ 0x000430,  16, 0x01, 0x00000000 },
-	{ 0x0002c0,  16, 0x01, 0x00000000 },
-	{ 0x0004d0,  16, 0x01, 0x00000000 },
-	{ 0x000720,  16, 0x01, 0x00000000 },
-	{ 0x0008c0,  16, 0x01, 0x00000000 },
-	{ 0x000890,  16, 0x01, 0x00000000 },
-	{ 0x0008e0,  16, 0x01, 0x00000000 },
-	{ 0x0008a0,  16, 0x01, 0x00000000 },
-	{ 0x0008f0,  16, 0x01, 0x00000000 },
-	{ 0x00094c,   1, 0x01, 0x000000ff },
-	{ 0x00094d,   1, 0x01, 0xffffffff },
-	{ 0x00094e,   1, 0x01, 0x00000002 },
-	{ 0x0002f2,   2, 0x01, 0x00000001 },
-	{ 0x0002f5,   1, 0x01, 0x00000001 },
-	{ 0x0002f7,   1, 0x01, 0x00000001 },
-	{ 0x000303,   1, 0x01, 0x00000001 },
-	{ 0x0002e6,   1, 0x01, 0x00000001 },
-	{ 0x000466,   1, 0x01, 0x00000052 },
-	{ 0x000301,   1, 0x01, 0x3f800000 },
-	{ 0x000304,   1, 0x01, 0x30201000 },
-	{ 0x000305,   1, 0x01, 0x70605040 },
-	{ 0x000306,   1, 0x01, 0xb8a89888 },
-	{ 0x000307,   1, 0x01, 0xf8e8d8c8 },
-	{ 0x00030a,   1, 0x01, 0x00ffff00 },
-	{ 0x00030b,   1, 0x01, 0x0000001a },
-	{ 0x00030c,   1, 0x01, 0x00000001 },
-	{ 0x000318,   1, 0x01, 0x00000001 },
-	{ 0x000340,   1, 0x01, 0x00000000 },
-	{ 0x00037d,   1, 0x01, 0x00000006 },
-	{ 0x0003a0,   1, 0x01, 0x00000002 },
-	{ 0x0003aa,   1, 0x01, 0x00000001 },
-	{ 0x0003a9,   1, 0x01, 0x00000001 },
-	{ 0x000380,   1, 0x01, 0x00000001 },
-	{ 0x000383,   1, 0x01, 0x00000011 },
-	{ 0x000360,   1, 0x01, 0x00000040 },
-	{ 0x000366,   2, 0x01, 0x00000000 },
-	{ 0x000368,   1, 0x01, 0x00000fff },
-	{ 0x000370,   2, 0x01, 0x00000000 },
-	{ 0x000372,   1, 0x01, 0x000fffff },
-	{ 0x000374,   1, 0x01, 0x00000100 },
-	{ 0x00037a,   1, 0x01, 0x00000012 },
-	{ 0x000619,   1, 0x01, 0x00000003 },
-	{ 0x000811,   1, 0x01, 0x00000003 },
-	{ 0x000812,   1, 0x01, 0x00000004 },
-	{ 0x000813,   1, 0x01, 0x00000006 },
-	{ 0x000814,   1, 0x01, 0x00000008 },
-	{ 0x000815,   1, 0x01, 0x0000000b },
-	{ 0x000800,   6, 0x01, 0x00000001 },
-	{ 0x000632,   1, 0x01, 0x00000001 },
-	{ 0x000633,   1, 0x01, 0x00000002 },
-	{ 0x000634,   1, 0x01, 0x00000003 },
-	{ 0x000635,   1, 0x01, 0x00000004 },
-	{ 0x000654,   1, 0x01, 0x3f800000 },
-	{ 0x000657,   1, 0x01, 0x3f800000 },
-	{ 0x000655,   2, 0x01, 0x3f800000 },
-	{ 0x0006cd,   1, 0x01, 0x3f800000 },
-	{ 0x0007f5,   1, 0x01, 0x3f800000 },
-	{ 0x0007dc,   1, 0x01, 0x39291909 },
-	{ 0x0007dd,   1, 0x01, 0x79695949 },
-	{ 0x0007de,   1, 0x01, 0xb9a99989 },
-	{ 0x0007df,   1, 0x01, 0xf9e9d9c9 },
-	{ 0x0007e8,   1, 0x01, 0x00003210 },
-	{ 0x0007e9,   1, 0x01, 0x00007654 },
-	{ 0x0007ea,   1, 0x01, 0x00000098 },
-	{ 0x0007ec,   1, 0x01, 0x39291909 },
-	{ 0x0007ed,   1, 0x01, 0x79695949 },
-	{ 0x0007ee,   1, 0x01, 0xb9a99989 },
-	{ 0x0007ef,   1, 0x01, 0xf9e9d9c9 },
-	{ 0x0007f0,   1, 0x01, 0x00003210 },
-	{ 0x0007f1,   1, 0x01, 0x00007654 },
-	{ 0x0007f2,   1, 0x01, 0x00000098 },
-	{ 0x0005a5,   1, 0x01, 0x00000001 },
-	{ 0x0005aa,   1, 0x01, 0x00000002 },
-	{ 0x0005cb,   1, 0x01, 0x00000004 },
-	{ 0x0005d0,   1, 0x01, 0x20181008 },
-	{ 0x0005d1,   1, 0x01, 0x40383028 },
-	{ 0x0005d2,   1, 0x01, 0x60585048 },
-	{ 0x0005d3,   1, 0x01, 0x80787068 },
-	{ 0x000980, 128, 0x01, 0x00000000 },
-	{ 0x000468,   1, 0x01, 0x00000004 },
-	{ 0x00046c,   1, 0x01, 0x00000001 },
-	{ 0x000470,  96, 0x01, 0x00000000 },
-	{ 0x0005e0,  16, 0x01, 0x00000d10 },
-	{ 0x000510,  16, 0x01, 0x3f800000 },
-	{ 0x000520,   1, 0x01, 0x000002b6 },
-	{ 0x000529,   1, 0x01, 0x00000001 },
-	{ 0x000530,  16, 0x01, 0xffff0000 },
-	{ 0x000550,  32, 0x01, 0xffff0000 },
-	{ 0x000585,   1, 0x01, 0x0000003f },
-	{ 0x000576,   1, 0x01, 0x00000003 },
-	{ 0x00057b,   1, 0x01, 0x00000059 },
-	{ 0x000586,   1, 0x01, 0x00000040 },
-	{ 0x000582,   2, 0x01, 0x00000080 },
-	{ 0x000595,   1, 0x01, 0x00400040 },
-	{ 0x000596,   1, 0x01, 0x00000492 },
-	{ 0x000597,   1, 0x01, 0x08080203 },
-	{ 0x0005ad,   1, 0x01, 0x00000008 },
-	{ 0x000598,   1, 0x01, 0x00020001 },
-	{ 0x0005d4,   1, 0x01, 0x00000001 },
-	{ 0x0005c2,   1, 0x01, 0x00000001 },
-	{ 0x000638,   2, 0x01, 0x00000001 },
-	{ 0x00063a,   1, 0x01, 0x00000002 },
-	{ 0x00063b,   2, 0x01, 0x00000001 },
-	{ 0x00063d,   1, 0x01, 0x00000002 },
-	{ 0x00063e,   1, 0x01, 0x00000001 },
-	{ 0x0008b8,   8, 0x01, 0x00000001 },
-	{ 0x000900,   8, 0x01, 0x00000001 },
-	{ 0x000908,   8, 0x01, 0x00000002 },
-	{ 0x000910,  16, 0x01, 0x00000001 },
-	{ 0x000920,   8, 0x01, 0x00000002 },
-	{ 0x000928,   8, 0x01, 0x00000001 },
-	{ 0x000662,   1, 0x01, 0x00000001 },
-	{ 0x000648,   9, 0x01, 0x00000001 },
-	{ 0x000674,   1, 0x01, 0x00000001 },
-	{ 0x000658,   1, 0x01, 0x0000000f },
-	{ 0x0007ff,   1, 0x01, 0x0000000a },
-	{ 0x00066a,   1, 0x01, 0x40000000 },
-	{ 0x00066b,   1, 0x01, 0x10000000 },
-	{ 0x00066c,   2, 0x01, 0xffff0000 },
-	{ 0x0007af,   2, 0x01, 0x00000008 },
-	{ 0x0007f6,   1, 0x01, 0x00000001 },
-	{ 0x0006b2,   1, 0x01, 0x00000055 },
-	{ 0x0007ad,   1, 0x01, 0x00000003 },
-	{ 0x000971,   1, 0x01, 0x00000008 },
-	{ 0x000972,   1, 0x01, 0x00000040 },
-	{ 0x000973,   1, 0x01, 0x0000012c },
-	{ 0x00097c,   1, 0x01, 0x00000040 },
-	{ 0x000975,   1, 0x01, 0x00000020 },
-	{ 0x000976,   1, 0x01, 0x00000001 },
-	{ 0x000977,   1, 0x01, 0x00000020 },
-	{ 0x000978,   1, 0x01, 0x00000001 },
-	{ 0x000957,   1, 0x01, 0x00000003 },
-	{ 0x00095e,   1, 0x01, 0x20164010 },
-	{ 0x00095f,   1, 0x01, 0x00000020 },
-	{ 0x000a0d,   1, 0x01, 0x00000006 },
-	{ 0x00097d,   1, 0x01, 0x0000000c },
-	{ 0x000683,   1, 0x01, 0x00000006 },
-	{ 0x000687,   1, 0x01, 0x003fffff },
-	{ 0x0006a0,   1, 0x01, 0x00000005 },
-	{ 0x000840,   1, 0x01, 0x00400008 },
-	{ 0x000841,   1, 0x01, 0x08000080 },
-	{ 0x000842,   1, 0x01, 0x00400008 },
-	{ 0x000843,   1, 0x01, 0x08000080 },
-	{ 0x000818,   8, 0x01, 0x00000000 },
-	{ 0x000848,  16, 0x01, 0x00000000 },
-	{ 0x000738,   1, 0x01, 0x00000000 },
-	{ 0x0006aa,   1, 0x01, 0x00000001 },
-	{ 0x0006ab,   1, 0x01, 0x00000002 },
-	{ 0x0006ac,   1, 0x01, 0x00000080 },
-	{ 0x0006ad,   2, 0x01, 0x00000100 },
-	{ 0x0006b1,   1, 0x01, 0x00000011 },
-	{ 0x0006bb,   1, 0x01, 0x000000cf },
-	{ 0x0006ce,   1, 0x01, 0x2a712488 },
-	{ 0x000739,   1, 0x01, 0x4085c000 },
-	{ 0x00073a,   1, 0x01, 0x00000080 },
-	{ 0x000786,   1, 0x01, 0x80000100 },
-	{ 0x00073c,   1, 0x01, 0x00010100 },
-	{ 0x00073d,   1, 0x01, 0x02800000 },
-	{ 0x000787,   1, 0x01, 0x000000cf },
-	{ 0x00078c,   1, 0x01, 0x00000008 },
-	{ 0x000792,   1, 0x01, 0x00000001 },
-	{ 0x000794,   3, 0x01, 0x00000001 },
-	{ 0x000797,   1, 0x01, 0x000000cf },
-	{ 0x000836,   1, 0x01, 0x00000001 },
-	{ 0x00079a,   1, 0x01, 0x00000002 },
-	{ 0x000833,   1, 0x01, 0x04444480 },
-	{ 0x0007a1,   1, 0x01, 0x00000001 },
-	{ 0x0007a3,   3, 0x01, 0x00000001 },
-	{ 0x000831,   1, 0x01, 0x00000004 },
-	{ 0x000b07,   1, 0x01, 0x00000002 },
-	{ 0x000b08,   2, 0x01, 0x00000100 },
-	{ 0x000b0a,   1, 0x01, 0x00000001 },
-	{ 0x000a04,   1, 0x01, 0x000000ff },
-	{ 0x000a0b,   1, 0x01, 0x00000040 },
-	{ 0x00097f,   1, 0x01, 0x00000100 },
-	{ 0x000a02,   1, 0x01, 0x00000001 },
-	{ 0x000809,   1, 0x01, 0x00000007 },
-	{ 0x00c221,   1, 0x01, 0x00000040 },
-	{ 0x00c1b0,   8, 0x01, 0x0000000f },
-	{ 0x00c1b8,   1, 0x01, 0x0fac6881 },
-	{ 0x00c1b9,   1, 0x01, 0x00fac688 },
-	{ 0x00c401,   1, 0x01, 0x00000001 },
-	{ 0x00c402,   1, 0x01, 0x00010001 },
-	{ 0x00c403,   2, 0x01, 0x00000001 },
-	{ 0x00c40e,   1, 0x01, 0x00000020 },
-	{ 0x00c413,   4, 0x01, 0x88888888 },
-	{ 0x00c423,   1, 0x01, 0x0000ff00 },
-	{ 0x00c420,   1, 0x01, 0x00880101 },
-	{ 0x01e100,   1, 0x01, 0x00000001 },
-	{}
-};
-
-const struct gf100_gr_pack
-gm204_grctx_pack_icmd[] = {
-	{ gm204_grctx_init_icmd_0 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_b197_0[] = {
-	{ 0x000800,   8, 0x40, 0x00000000 },
-	{ 0x000804,   8, 0x40, 0x00000000 },
-	{ 0x000808,   8, 0x40, 0x00000400 },
-	{ 0x00080c,   8, 0x40, 0x00000300 },
-	{ 0x000810,   1, 0x04, 0x000000cf },
-	{ 0x000850,   7, 0x40, 0x00000000 },
-	{ 0x000814,   8, 0x40, 0x00000040 },
-	{ 0x000818,   8, 0x40, 0x00000001 },
-	{ 0x00081c,   8, 0x40, 0x00000000 },
-	{ 0x000820,   8, 0x40, 0x00000000 },
-	{ 0x001c00,  16, 0x10, 0x00000000 },
-	{ 0x001c04,  16, 0x10, 0x00000000 },
-	{ 0x001c08,  16, 0x10, 0x00000000 },
-	{ 0x001c0c,  16, 0x10, 0x00000000 },
-	{ 0x001d00,  16, 0x10, 0x00000000 },
-	{ 0x001d04,  16, 0x10, 0x00000000 },
-	{ 0x001d08,  16, 0x10, 0x00000000 },
-	{ 0x001d0c,  16, 0x10, 0x00000000 },
-	{ 0x001f00,  16, 0x08, 0x00000000 },
-	{ 0x001f04,  16, 0x08, 0x00000000 },
-	{ 0x001f80,  16, 0x08, 0x00000000 },
-	{ 0x001f84,  16, 0x08, 0x00000000 },
-	{ 0x002000,   1, 0x04, 0x00000000 },
-	{ 0x002040,   1, 0x04, 0x00000011 },
-	{ 0x002080,   1, 0x04, 0x00000020 },
-	{ 0x0020c0,   1, 0x04, 0x00000030 },
-	{ 0x002100,   1, 0x04, 0x00000040 },
-	{ 0x002140,   1, 0x04, 0x00000051 },
-	{ 0x00200c,   6, 0x40, 0x00000001 },
-	{ 0x002010,   1, 0x04, 0x00000000 },
-	{ 0x002050,   1, 0x04, 0x00000000 },
-	{ 0x002090,   1, 0x04, 0x00000001 },
-	{ 0x0020d0,   1, 0x04, 0x00000002 },
-	{ 0x002110,   1, 0x04, 0x00000003 },
-	{ 0x002150,   1, 0x04, 0x00000004 },
-	{ 0x000380,   4, 0x20, 0x00000000 },
-	{ 0x000384,   4, 0x20, 0x00000000 },
-	{ 0x000388,   4, 0x20, 0x00000000 },
-	{ 0x00038c,   4, 0x20, 0x00000000 },
-	{ 0x000700,   4, 0x10, 0x00000000 },
-	{ 0x000704,   4, 0x10, 0x00000000 },
-	{ 0x000708,   4, 0x10, 0x00000000 },
-	{ 0x002800, 128, 0x04, 0x00000000 },
-	{ 0x000a00,  16, 0x20, 0x00000000 },
-	{ 0x000a04,  16, 0x20, 0x00000000 },
-	{ 0x000a08,  16, 0x20, 0x00000000 },
-	{ 0x000a0c,  16, 0x20, 0x00000000 },
-	{ 0x000a10,  16, 0x20, 0x00000000 },
-	{ 0x000a14,  16, 0x20, 0x00000000 },
-	{ 0x000a18,  16, 0x20, 0x00006420 },
-	{ 0x000a1c,  16, 0x20, 0x00000000 },
-	{ 0x000c00,  16, 0x10, 0x00000000 },
-	{ 0x000c04,  16, 0x10, 0x00000000 },
-	{ 0x000c08,  16, 0x10, 0x00000000 },
-	{ 0x000c0c,  16, 0x10, 0x3f800000 },
-	{ 0x000d00,   8, 0x08, 0xffff0000 },
-	{ 0x000d04,   8, 0x08, 0xffff0000 },
-	{ 0x000e00,  16, 0x10, 0x00000000 },
-	{ 0x000e04,  16, 0x10, 0xffff0000 },
-	{ 0x000e08,  16, 0x10, 0xffff0000 },
-	{ 0x000d40,   4, 0x08, 0x00000000 },
-	{ 0x000d44,   4, 0x08, 0x00000000 },
-	{ 0x001e00,   8, 0x20, 0x00000001 },
-	{ 0x001e04,   8, 0x20, 0x00000001 },
-	{ 0x001e08,   8, 0x20, 0x00000002 },
-	{ 0x001e0c,   8, 0x20, 0x00000001 },
-	{ 0x001e10,   8, 0x20, 0x00000001 },
-	{ 0x001e14,   8, 0x20, 0x00000002 },
-	{ 0x001e18,   8, 0x20, 0x00000001 },
-	{ 0x001480,   8, 0x10, 0x00000000 },
-	{ 0x001484,   8, 0x10, 0x00000000 },
-	{ 0x001488,   8, 0x10, 0x00000000 },
-	{ 0x003400, 128, 0x04, 0x00000000 },
-	{ 0x00030c,   1, 0x04, 0x00000001 },
-	{ 0x001944,   1, 0x04, 0x00000000 },
-	{ 0x001514,   1, 0x04, 0x00000000 },
-	{ 0x000d68,   1, 0x04, 0x0000ffff },
-	{ 0x00121c,   1, 0x04, 0x0fac6881 },
-	{ 0x000fac,   1, 0x04, 0x00000001 },
-	{ 0x001538,   1, 0x04, 0x00000001 },
-	{ 0x000fe0,   2, 0x04, 0x00000000 },
-	{ 0x000fe8,   1, 0x04, 0x00000014 },
-	{ 0x000fec,   1, 0x04, 0x00000040 },
-	{ 0x000ff0,   1, 0x04, 0x00000000 },
-	{ 0x00179c,   1, 0x04, 0x00000000 },
-	{ 0x001228,   1, 0x04, 0x00000400 },
-	{ 0x00122c,   1, 0x04, 0x00000300 },
-	{ 0x001230,   1, 0x04, 0x00010001 },
-	{ 0x0007f8,   1, 0x04, 0x00000000 },
-	{ 0x001208,   1, 0x04, 0x00000000 },
-	{ 0x0015b4,   1, 0x04, 0x00000001 },
-	{ 0x0015cc,   1, 0x04, 0x00000000 },
-	{ 0x001534,   1, 0x04, 0x00000000 },
-	{ 0x000754,   1, 0x04, 0x00000001 },
-	{ 0x000fb0,   1, 0x04, 0x00000000 },
-	{ 0x0015d0,   1, 0x04, 0x00000000 },
-	{ 0x0011e0,   4, 0x04, 0x88888888 },
-	{ 0x00153c,   1, 0x04, 0x00000000 },
-	{ 0x0016b4,   1, 0x04, 0x00000003 },
-	{ 0x000fa4,   1, 0x04, 0x00000001 },
-	{ 0x000fbc,   4, 0x04, 0x0000ffff },
-	{ 0x000fa8,   1, 0x04, 0x0000ffff },
-	{ 0x000df8,   2, 0x04, 0x00000000 },
-	{ 0x001948,   1, 0x04, 0x00000000 },
-	{ 0x001970,   1, 0x04, 0x00000001 },
-	{ 0x00161c,   1, 0x04, 0x000009f0 },
-	{ 0x000dcc,   1, 0x04, 0x00000010 },
-	{ 0x0015e4,   1, 0x04, 0x00000000 },
-	{ 0x001160,  32, 0x04, 0x25e00040 },
-	{ 0x001880,  32, 0x04, 0x00000000 },
-	{ 0x000f84,   2, 0x04, 0x00000000 },
-	{ 0x0017c8,   2, 0x04, 0x00000000 },
-	{ 0x0017d0,   1, 0x04, 0x000000ff },
-	{ 0x0017d4,   1, 0x04, 0xffffffff },
-	{ 0x0017d8,   1, 0x04, 0x00000002 },
-	{ 0x0017dc,   1, 0x04, 0x00000000 },
-	{ 0x0015f4,   2, 0x04, 0x00000000 },
-	{ 0x001434,   2, 0x04, 0x00000000 },
-	{ 0x000d74,   1, 0x04, 0x00000000 },
-	{ 0x0013a4,   1, 0x04, 0x00000000 },
-	{ 0x001318,   1, 0x04, 0x00000001 },
-	{ 0x001080,   2, 0x04, 0x00000000 },
-	{ 0x001088,   2, 0x04, 0x00000001 },
-	{ 0x001090,   1, 0x04, 0x00000000 },
-	{ 0x001094,   1, 0x04, 0x00000001 },
-	{ 0x001098,   1, 0x04, 0x00000000 },
-	{ 0x00109c,   1, 0x04, 0x00000001 },
-	{ 0x0010a0,   2, 0x04, 0x00000000 },
-	{ 0x001644,   1, 0x04, 0x00000000 },
-	{ 0x000748,   1, 0x04, 0x00000000 },
-	{ 0x000de8,   1, 0x04, 0x00000000 },
-	{ 0x001648,   1, 0x04, 0x00000000 },
-	{ 0x0012a4,   1, 0x04, 0x00000000 },
-	{ 0x001120,   4, 0x04, 0x00000000 },
-	{ 0x001118,   1, 0x04, 0x00000000 },
-	{ 0x00164c,   1, 0x04, 0x00000000 },
-	{ 0x001658,   1, 0x04, 0x00000000 },
-	{ 0x001910,   1, 0x04, 0x00000290 },
-	{ 0x001518,   1, 0x04, 0x00000000 },
-	{ 0x00165c,   1, 0x04, 0x00000001 },
-	{ 0x001520,   1, 0x04, 0x00000000 },
-	{ 0x001604,   1, 0x04, 0x00000000 },
-	{ 0x001570,   1, 0x04, 0x00000000 },
-	{ 0x0013b0,   2, 0x04, 0x3f800000 },
-	{ 0x00020c,   1, 0x04, 0x00000000 },
-	{ 0x001670,   1, 0x04, 0x30201000 },
-	{ 0x001674,   1, 0x04, 0x70605040 },
-	{ 0x001678,   1, 0x04, 0xb8a89888 },
-	{ 0x00167c,   1, 0x04, 0xf8e8d8c8 },
-	{ 0x00166c,   1, 0x04, 0x00000000 },
-	{ 0x001680,   1, 0x04, 0x00ffff00 },
-	{ 0x0012d0,   1, 0x04, 0x00000003 },
-	{ 0x00113c,   1, 0x04, 0x00000000 },
-	{ 0x0012d4,   1, 0x04, 0x00000002 },
-	{ 0x001684,   2, 0x04, 0x00000000 },
-	{ 0x000dac,   2, 0x04, 0x00001b02 },
-	{ 0x000db4,   1, 0x04, 0x00000000 },
-	{ 0x00168c,   1, 0x04, 0x00000000 },
-	{ 0x0015bc,   1, 0x04, 0x00000000 },
-	{ 0x00156c,   1, 0x04, 0x00000000 },
-	{ 0x00187c,   1, 0x04, 0x00000000 },
-	{ 0x001110,   1, 0x04, 0x00000001 },
-	{ 0x000dc0,   3, 0x04, 0x00000000 },
-	{ 0x000f40,   5, 0x04, 0x00000000 },
-	{ 0x001234,   1, 0x04, 0x00000000 },
-	{ 0x001690,   1, 0x04, 0x00000000 },
-	{ 0x000790,   5, 0x04, 0x00000000 },
-	{ 0x00077c,   1, 0x04, 0x00000000 },
-	{ 0x001000,   1, 0x04, 0x00000010 },
-	{ 0x0010fc,   1, 0x04, 0x00000000 },
-	{ 0x001290,   1, 0x04, 0x00000000 },
-	{ 0x000218,   1, 0x04, 0x00000010 },
-	{ 0x0012d8,   1, 0x04, 0x00000000 },
-	{ 0x0012dc,   1, 0x04, 0x00000010 },
-	{ 0x000d94,   1, 0x04, 0x00000001 },
-	{ 0x00155c,   2, 0x04, 0x00000000 },
-	{ 0x001564,   1, 0x04, 0x00000fff },
-	{ 0x001574,   2, 0x04, 0x00000000 },
-	{ 0x00157c,   1, 0x04, 0x000fffff },
-	{ 0x001354,   1, 0x04, 0x00000000 },
-	{ 0x001610,   1, 0x04, 0x00000012 },
-	{ 0x001608,   2, 0x04, 0x00000000 },
-	{ 0x00260c,   1, 0x04, 0x00000000 },
-	{ 0x0007ac,   1, 0x04, 0x00000000 },
-	{ 0x00162c,   1, 0x04, 0x00000003 },
-	{ 0x000210,   1, 0x04, 0x00000000 },
-	{ 0x000320,   1, 0x04, 0x00000000 },
-	{ 0x000324,   6, 0x04, 0x3f800000 },
-	{ 0x000750,   1, 0x04, 0x00000000 },
-	{ 0x000760,   1, 0x04, 0x39291909 },
-	{ 0x000764,   1, 0x04, 0x79695949 },
-	{ 0x000768,   1, 0x04, 0xb9a99989 },
-	{ 0x00076c,   1, 0x04, 0xf9e9d9c9 },
-	{ 0x000770,   1, 0x04, 0x30201000 },
-	{ 0x000774,   1, 0x04, 0x70605040 },
-	{ 0x000778,   1, 0x04, 0x00009080 },
-	{ 0x000780,   1, 0x04, 0x39291909 },
-	{ 0x000784,   1, 0x04, 0x79695949 },
-	{ 0x000788,   1, 0x04, 0xb9a99989 },
-	{ 0x00078c,   1, 0x04, 0xf9e9d9c9 },
-	{ 0x0007d0,   1, 0x04, 0x30201000 },
-	{ 0x0007d4,   1, 0x04, 0x70605040 },
-	{ 0x0007d8,   1, 0x04, 0x00009080 },
-	{ 0x001004,   1, 0x04, 0x00000000 },
-	{ 0x001240,   8, 0x04, 0x00000000 },
-	{ 0x00037c,   1, 0x04, 0x00000001 },
-	{ 0x000740,   1, 0x04, 0x00000000 },
-	{ 0x001148,   1, 0x04, 0x00000000 },
-	{ 0x000fb4,   1, 0x04, 0x00000000 },
-	{ 0x000fb8,   1, 0x04, 0x00000002 },
-	{ 0x001130,   1, 0x04, 0x00000002 },
-	{ 0x000fd4,   2, 0x04, 0x00000000 },
-	{ 0x001030,   1, 0x04, 0x20181008 },
-	{ 0x001034,   1, 0x04, 0x40383028 },
-	{ 0x001038,   1, 0x04, 0x60585048 },
-	{ 0x00103c,   1, 0x04, 0x80787068 },
-	{ 0x000744,   1, 0x04, 0x00000000 },
-	{ 0x002600,   1, 0x04, 0x00000000 },
-	{ 0x001918,   1, 0x04, 0x00000000 },
-	{ 0x00191c,   1, 0x04, 0x00000900 },
-	{ 0x001920,   1, 0x04, 0x00000405 },
-	{ 0x001308,   1, 0x04, 0x00000001 },
-	{ 0x001924,   1, 0x04, 0x00000000 },
-	{ 0x0013ac,   1, 0x04, 0x00000000 },
-	{ 0x00192c,   1, 0x04, 0x00000001 },
-	{ 0x00193c,   1, 0x04, 0x00002c1c },
-	{ 0x000d7c,   1, 0x04, 0x00000000 },
-	{ 0x000f8c,   1, 0x04, 0x00000000 },
-	{ 0x0002c0,   1, 0x04, 0x00000001 },
-	{ 0x001510,   1, 0x04, 0x00000000 },
-	{ 0x001940,   1, 0x04, 0x00000000 },
-	{ 0x000ff4,   2, 0x04, 0x00000000 },
-	{ 0x00194c,   2, 0x04, 0x00000000 },
-	{ 0x001968,   1, 0x04, 0x00000000 },
-	{ 0x001590,   1, 0x04, 0x0000003f },
-	{ 0x0007e8,   4, 0x04, 0x00000000 },
-	{ 0x00196c,   1, 0x04, 0x00000011 },
-	{ 0x0002e4,   1, 0x04, 0x0000b001 },
-	{ 0x00036c,   2, 0x04, 0x00000000 },
-	{ 0x00197c,   1, 0x04, 0x00000000 },
-	{ 0x000fcc,   2, 0x04, 0x00000000 },
-	{ 0x0002d8,   1, 0x04, 0x00000040 },
-	{ 0x001980,   1, 0x04, 0x00000080 },
-	{ 0x001504,   1, 0x04, 0x00000080 },
-	{ 0x001984,   1, 0x04, 0x00000000 },
-	{ 0x000f60,   1, 0x04, 0x00000000 },
-	{ 0x000f64,   1, 0x04, 0x00400040 },
-	{ 0x000f68,   1, 0x04, 0x00002212 },
-	{ 0x000f6c,   1, 0x04, 0x08080203 },
-	{ 0x001108,   1, 0x04, 0x00000008 },
-	{ 0x000f70,   1, 0x04, 0x00080001 },
-	{ 0x000ffc,   1, 0x04, 0x00000000 },
-	{ 0x001134,   1, 0x04, 0x00000000 },
-	{ 0x000f1c,   1, 0x04, 0x00000000 },
-	{ 0x0011f8,   1, 0x04, 0x00000000 },
-	{ 0x001138,   1, 0x04, 0x00000001 },
-	{ 0x000300,   1, 0x04, 0x00000001 },
-	{ 0x0013a8,   1, 0x04, 0x00000000 },
-	{ 0x001224,   1, 0x04, 0x00000000 },
-	{ 0x0012ec,   1, 0x04, 0x00000000 },
-	{ 0x001310,   1, 0x04, 0x00000000 },
-	{ 0x001314,   1, 0x04, 0x00000001 },
-	{ 0x001380,   1, 0x04, 0x00000000 },
-	{ 0x001384,   4, 0x04, 0x00000001 },
-	{ 0x001394,   1, 0x04, 0x00000000 },
-	{ 0x00139c,   1, 0x04, 0x00000000 },
-	{ 0x001398,   1, 0x04, 0x00000000 },
-	{ 0x001594,   1, 0x04, 0x00000000 },
-	{ 0x001598,   4, 0x04, 0x00000001 },
-	{ 0x000f54,   3, 0x04, 0x00000000 },
-	{ 0x0019bc,   1, 0x04, 0x00000000 },
-	{ 0x000f9c,   2, 0x04, 0x00000000 },
-	{ 0x0012cc,   1, 0x04, 0x00000000 },
-	{ 0x0012e8,   1, 0x04, 0x00000000 },
-	{ 0x00130c,   1, 0x04, 0x00000001 },
-	{ 0x001360,   8, 0x04, 0x00000000 },
-	{ 0x00133c,   2, 0x04, 0x00000001 },
-	{ 0x001344,   1, 0x04, 0x00000002 },
-	{ 0x001348,   2, 0x04, 0x00000001 },
-	{ 0x001350,   1, 0x04, 0x00000002 },
-	{ 0x001358,   1, 0x04, 0x00000001 },
-	{ 0x0012e4,   1, 0x04, 0x00000000 },
-	{ 0x00131c,   4, 0x04, 0x00000000 },
-	{ 0x0019c0,   1, 0x04, 0x00000000 },
-	{ 0x001140,   1, 0x04, 0x00000000 },
-	{ 0x000dd0,   1, 0x04, 0x00000000 },
-	{ 0x000dd4,   1, 0x04, 0x00000001 },
-	{ 0x0002f4,   1, 0x04, 0x00000000 },
-	{ 0x0019c4,   1, 0x04, 0x00000000 },
-	{ 0x0019c8,   1, 0x04, 0x00001500 },
-	{ 0x00135c,   1, 0x04, 0x00000000 },
-	{ 0x000f90,   1, 0x04, 0x00000000 },
-	{ 0x0019e0,   8, 0x04, 0x00000001 },
-	{ 0x0019cc,   1, 0x04, 0x00000001 },
-	{ 0x00111c,   1, 0x04, 0x00000001 },
-	{ 0x0015b8,   1, 0x04, 0x00000000 },
-	{ 0x001a00,   1, 0x04, 0x00001111 },
-	{ 0x001a04,   7, 0x04, 0x00000000 },
-	{ 0x000d6c,   2, 0x04, 0xffff0000 },
-	{ 0x0010f8,   1, 0x04, 0x00001010 },
-	{ 0x000d80,   5, 0x04, 0x00000000 },
-	{ 0x000da0,   1, 0x04, 0x00000000 },
-	{ 0x0007a4,   2, 0x04, 0x00000000 },
-	{ 0x001508,   1, 0x04, 0x80000000 },
-	{ 0x00150c,   1, 0x04, 0x40000000 },
-	{ 0x001668,   1, 0x04, 0x00000000 },
-	{ 0x000318,   2, 0x04, 0x00000008 },
-	{ 0x000d9c,   1, 0x04, 0x00000001 },
-	{ 0x000f14,   1, 0x04, 0x00000000 },
-	{ 0x000374,   1, 0x04, 0x00000000 },
-	{ 0x000378,   1, 0x04, 0x0000000c },
-	{ 0x0007dc,   1, 0x04, 0x00000000 },
-	{ 0x00074c,   1, 0x04, 0x00000055 },
-	{ 0x001420,   1, 0x04, 0x00000003 },
-	{ 0x001008,   1, 0x04, 0x00000008 },
-	{ 0x00100c,   1, 0x04, 0x00000040 },
-	{ 0x001010,   1, 0x04, 0x0000012c },
-	{ 0x000d60,   1, 0x04, 0x00000040 },
-	{ 0x001018,   1, 0x04, 0x00000020 },
-	{ 0x00101c,   1, 0x04, 0x00000001 },
-	{ 0x001020,   1, 0x04, 0x00000020 },
-	{ 0x001024,   1, 0x04, 0x00000001 },
-	{ 0x001444,   3, 0x04, 0x00000000 },
-	{ 0x000360,   1, 0x04, 0x20164010 },
-	{ 0x000364,   1, 0x04, 0x00000020 },
-	{ 0x000368,   1, 0x04, 0x00000000 },
-	{ 0x000da8,   1, 0x04, 0x00000030 },
-	{ 0x000de4,   1, 0x04, 0x00000000 },
-	{ 0x000204,   1, 0x04, 0x00000006 },
-	{ 0x0002d0,   1, 0x04, 0x003fffff },
-	{ 0x001220,   1, 0x04, 0x00000005 },
-	{ 0x000fdc,   1, 0x04, 0x00000000 },
-	{ 0x000f98,   1, 0x04, 0x00400008 },
-	{ 0x001284,   1, 0x04, 0x08000080 },
-	{ 0x001450,   1, 0x04, 0x00400008 },
-	{ 0x001454,   1, 0x04, 0x08000080 },
-	{ 0x000214,   1, 0x04, 0x00000000 },
-	{}
-};
-
-const struct gf100_gr_pack
-gm204_grctx_pack_mthd[] = {
-	{ gm204_grctx_init_b197_0, 0xb197 },
-	{ gf100_grctx_init_902d_0, 0x902d },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_fe_0[] = {
-	{ 0x404004,   8, 0x04, 0x00000000 },
-	{ 0x404024,   1, 0x04, 0x0000e000 },
-	{ 0x404028,   8, 0x04, 0x00000000 },
-	{ 0x4040a8,   8, 0x04, 0x00000000 },
-	{ 0x4040c8,   1, 0x04, 0xf801008f },
-	{ 0x4040d0,   6, 0x04, 0x00000000 },
-	{ 0x4040f8,   1, 0x04, 0x00000000 },
-	{ 0x404100,  10, 0x04, 0x00000000 },
-	{ 0x404130,   2, 0x04, 0x00000000 },
-	{ 0x404150,   1, 0x04, 0x0000002e },
-	{ 0x404154,   2, 0x04, 0x00000800 },
-	{ 0x404164,   1, 0x04, 0x00000045 },
-	{ 0x40417c,   2, 0x04, 0x00000000 },
-	{ 0x404194,   1, 0x04, 0x33000700 },
-	{ 0x4041a0,   4, 0x04, 0x00000000 },
-	{ 0x4041c4,   2, 0x04, 0x00000000 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_ds_0[] = {
-	{ 0x405800,   1, 0x04, 0x8f8001bf },
-	{ 0x405830,   1, 0x04, 0x04001000 },
-	{ 0x405834,   1, 0x04, 0x08000000 },
-	{ 0x405838,   1, 0x04, 0x00010000 },
-	{ 0x405854,   1, 0x04, 0x00000000 },
-	{ 0x405870,   4, 0x04, 0x00000001 },
-	{ 0x405a00,   2, 0x04, 0x00000000 },
-	{ 0x405a18,   1, 0x04, 0x00000000 },
-	{ 0x405a1c,   1, 0x04, 0x000000ff },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_cwd_0[] = {
-	{ 0x405b00,   1, 0x04, 0x00000000 },
-	{ 0x405b10,   1, 0x04, 0x00001000 },
-	{ 0x405b20,   1, 0x04, 0x04000000 },
-	{ 0x405b60,   6, 0x04, 0x00000000 },
-	{ 0x405ba0,   6, 0x04, 0x00000000 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_pd_0[] = {
-	{ 0x406020,   1, 0x04, 0x17410001 },
-	{ 0x406028,   4, 0x04, 0x00000001 },
-	{ 0x4064a8,   1, 0x04, 0x00000000 },
-	{ 0x4064ac,   1, 0x04, 0x00003fff },
-	{ 0x4064b0,   3, 0x04, 0x00000000 },
-	{ 0x4064c0,   1, 0x04, 0x80400280 },
-	{ 0x4064c4,   1, 0x04, 0x0400ffff },
-	{ 0x4064c8,   1, 0x04, 0x01800780 },
-	{ 0x4064cc,   9, 0x04, 0x00000000 },
-	{ 0x4064fc,   1, 0x04, 0x0000022a },
-	{ 0x406500,   1, 0x04, 0x00000000 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_be_0[] = {
-	{ 0x408800,   1, 0x04, 0x32882a3c },
-	{ 0x408804,   1, 0x04, 0x00000040 },
-	{ 0x408808,   1, 0x04, 0x1003e005 },
-	{ 0x408840,   1, 0x04, 0x00000e0b },
-	{ 0x408900,   1, 0x04, 0xb080b801 },
-	{ 0x408904,   1, 0x04, 0x63038001 },
-	{ 0x408908,   1, 0x04, 0x12c8502f },
-	{ 0x408980,   1, 0x04, 0x0000011d },
-	{}
-};
-
-const struct gf100_gr_pack
-gm204_grctx_pack_hub[] = {
-	{ gf100_grctx_init_main_0 },
-	{ gm204_grctx_init_fe_0 },
-	{ gk110_grctx_init_pri_0 },
-	{ gk104_grctx_init_memfmt_0 },
-	{ gm204_grctx_init_ds_0 },
-	{ gm204_grctx_init_cwd_0 },
-	{ gm204_grctx_init_pd_0 },
-	{ gk208_grctx_init_rstr2d_0 },
-	{ gk104_grctx_init_scc_0 },
-	{ gm204_grctx_init_be_0 },
-	{}
-};
-
-const struct gf100_gr_init
-gm204_grctx_init_prop_0[] = {
-	{ 0x418400,   1, 0x04, 0x38e01e00 },
-	{ 0x418404,   1, 0x04, 0x70001fff },
-	{ 0x41840c,   1, 0x04, 0x20001008 },
-	{ 0x418410,   2, 0x04, 0x0fff0fff },
-	{ 0x418418,   1, 0x04, 0x07ff07ff },
-	{ 0x41841c,   1, 0x04, 0x3feffbff },
-	{ 0x418450,   6, 0x04, 0x00000000 },
-	{ 0x418468,   1, 0x04, 0x00000001 },
-	{ 0x41846c,   2, 0x04, 0x00000000 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_gpc_unk_1[] = {
-	{ 0x418600,   1, 0x04, 0x0000007f },
-	{ 0x418684,   1, 0x04, 0x0000001f },
-	{ 0x418700,   1, 0x04, 0x00000002 },
-	{ 0x418704,   1, 0x04, 0x00000080 },
-	{ 0x418708,   1, 0x04, 0x40000000 },
-	{ 0x41870c,   2, 0x04, 0x00000000 },
-	{ 0x418728,   1, 0x04, 0x00010000 },
-	{}
-};
-
-const struct gf100_gr_init
-gm204_grctx_init_setup_0[] = {
-	{ 0x418800,   1, 0x04, 0x7006863a },
-	{ 0x418808,   1, 0x04, 0x00000000 },
-	{ 0x418810,   1, 0x04, 0x00000000 },
-	{ 0x418828,   1, 0x04, 0x00000044 },
-	{ 0x418830,   1, 0x04, 0x10000001 },
-	{ 0x4188d8,   1, 0x04, 0x00000008 },
-	{ 0x4188e0,   1, 0x04, 0x01000000 },
-	{ 0x4188e8,   5, 0x04, 0x00000000 },
-	{ 0x4188fc,   1, 0x04, 0x20100058 },
-	{}
-};
-
-const struct gf100_gr_init
-gm204_grctx_init_gpm_0[] = {
-	{ 0x418c10,   8, 0x04, 0x00000000 },
-	{ 0x418c40,   1, 0x04, 0xffffffff },
-	{ 0x418c6c,   1, 0x04, 0x00000001 },
-	{ 0x418c80,   1, 0x04, 0x20200000 },
-	{}
-};
-
-const struct gf100_gr_init
-gm204_grctx_init_gpc_unk_2[] = {
-	{ 0x418e00,   1, 0x04, 0x90040000 },
-	{ 0x418e24,   1, 0x04, 0x00000000 },
-	{ 0x418e28,   1, 0x04, 0x00000030 },
-	{ 0x418e2c,   1, 0x04, 0x00000100 },
-	{ 0x418e30,   3, 0x04, 0x00000000 },
-	{ 0x418e40,  22, 0x04, 0x00000000 },
-	{ 0x418ea0,  12, 0x04, 0x00000000 },
-	{}
-};
-
-static const struct gf100_gr_pack
-gm204_grctx_pack_gpc[] = {
-	{ gm107_grctx_init_gpc_unk_0 },
-	{ gm204_grctx_init_prop_0 },
-	{ gm204_grctx_init_gpc_unk_1 },
-	{ gm204_grctx_init_setup_0 },
-	{ gf100_grctx_init_zcull_0 },
-	{ gk208_grctx_init_crstr_0 },
-	{ gm204_grctx_init_gpm_0 },
-	{ gm204_grctx_init_gpc_unk_2 },
-	{ gf100_grctx_init_gcc_0 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_pe_0[] = {
-	{ 0x419848,   1, 0x04, 0x00000000 },
-	{ 0x419864,   1, 0x04, 0x00000029 },
-	{ 0x419888,   1, 0x04, 0x00000000 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_tex_0[] = {
-	{ 0x419a00,   1, 0x04, 0x000100f0 },
-	{ 0x419a04,   1, 0x04, 0x00000005 },
-	{ 0x419a08,   1, 0x04, 0x00000621 },
-	{ 0x419a0c,   1, 0x04, 0x00320000 },
-	{ 0x419a10,   1, 0x04, 0x00000000 },
-	{ 0x419a14,   1, 0x04, 0x00000200 },
-	{ 0x419a1c,   1, 0x04, 0x0010c000 },
-	{ 0x419a20,   1, 0x04, 0x20008a00 },
-	{ 0x419a30,   1, 0x04, 0x00000001 },
-	{ 0x419a3c,   1, 0x04, 0x0000181e },
-	{ 0x419ac4,   1, 0x04, 0x00000000 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_mpc_0[] = {
-	{ 0x419c00,   1, 0x04, 0x0000009a },
-	{ 0x419c04,   1, 0x04, 0x80000bd6 },
-	{ 0x419c08,   1, 0x04, 0x00000002 },
-	{ 0x419c20,   1, 0x04, 0x00000000 },
-	{ 0x419c24,   1, 0x04, 0x00084210 },
-	{ 0x419c28,   1, 0x04, 0x3efbefbe },
-	{ 0x419c2c,   1, 0x04, 0x00000000 },
-	{ 0x419c34,   1, 0x04, 0x71ff1ff3 },
-	{ 0x419c3c,   1, 0x04, 0x00001919 },
-	{ 0x419c50,   1, 0x04, 0x00000005 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_l1c_0[] = {
-	{ 0x419c84,   1, 0x04, 0x0000003e },
-	{ 0x419c90,   1, 0x04, 0x0000000a },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_sm_0[] = {
-	{ 0x419e04,   3, 0x04, 0x00000000 },
-	{ 0x419e10,   1, 0x04, 0x00001c02 },
-	{ 0x419e44,   1, 0x04, 0x00d3eff2 },
-	{ 0x419e48,   1, 0x04, 0x00000000 },
-	{ 0x419e4c,   1, 0x04, 0x0000007f },
-	{ 0x419e50,   1, 0x04, 0x00000000 },
-	{ 0x419e58,   6, 0x04, 0x00000000 },
-	{ 0x419e74,  10, 0x04, 0x00000000 },
-	{ 0x419eac,   1, 0x04, 0x0001cf8b },
-	{ 0x419eb0,   1, 0x04, 0x00030300 },
-	{ 0x419eb8,   1, 0x04, 0x40000000 },
-	{ 0x419ef0,  24, 0x04, 0x00000000 },
-	{ 0x419f68,   2, 0x04, 0x00000000 },
-	{ 0x419f70,   1, 0x04, 0x00000020 },
-	{ 0x419f78,   1, 0x04, 0x00010beb },
-	{ 0x419f7c,   1, 0x04, 0x00000000 },
-	{}
-};
-
-const struct gf100_gr_pack
-gm204_grctx_pack_tpc[] = {
-	{ gm204_grctx_init_pe_0 },
-	{ gm204_grctx_init_tex_0 },
-	{ gm204_grctx_init_mpc_0 },
-	{ gm204_grctx_init_l1c_0 },
-	{ gm204_grctx_init_sm_0 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_pes_0[] = {
-	{ 0x41be24,   1, 0x04, 0x0000000e },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_grctx_init_cbm_0[] = {
-	{ 0x41bec0,   1, 0x04, 0x00000000 },
-	{ 0x41bec4,   1, 0x04, 0x01030000 },
-	{ 0x41bee4,   1, 0x04, 0x00000000 },
-	{ 0x41bef0,   1, 0x04, 0x000003ff },
-	{ 0x41bef4,   2, 0x04, 0x00000000 },
-	{}
-};
-
-const struct gf100_gr_pack
-gm204_grctx_pack_ppc[] = {
-	{ gm204_grctx_init_pes_0 },
-	{ gm204_grctx_init_cbm_0 },
-	{ gm107_grctx_init_wwdx_0 },
-	{}
-};
-
-/*******************************************************************************
- * PGRAPH context implementation
- ******************************************************************************/
-
-void
-gm204_grctx_generate_tpcid(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int gpc, tpc, id;
-
-	for (tpc = 0, id = 0; tpc < 4; tpc++) {
-		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-			if (tpc < gr->tpc_nr[gpc]) {
-				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
-				nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
-				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
-				id++;
-			}
-		}
-	}
-}
-
-static void
-gm204_grctx_generate_rop_active_fbps(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const u32 fbp_count = nvkm_rd32(device, 0x12006c);
-	nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */
-	nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
-}
-
-void
-gm204_grctx_generate_405b60(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
-	u32 dist[TPC_MAX / 4] = {};
-	u32 gpcs[GPC_MAX] = {};
-	u8  tpcnr[GPC_MAX];
-	int tpc, gpc, i;
-
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
-	/* won't result in the same distribution as the binary driver where
-	 * some of the gpcs have more tpcs than others, but this shall do
-	 * for the moment.  the code for earlier gpus has this issue too.
-	 */
-	for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while(!tpcnr[gpc]);
-		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
-		dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
-		gpcs[gpc] |= i << (tpc * 8);
-	}
-
-	for (i = 0; i < dist_nr; i++)
-		nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]);
-	for (i = 0; i < gr->gpc_nr; i++)
-		nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
-}
-
-void
-gm204_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const struct gf100_grctx_func *grctx = gr->func->grctx;
-	u32 tmp;
-	int i;
-
-	gf100_gr_mmio(gr, grctx->hub);
-	gf100_gr_mmio(gr, grctx->gpc);
-	gf100_gr_mmio(gr, grctx->zcull);
-	gf100_gr_mmio(gr, grctx->tpc);
-	gf100_gr_mmio(gr, grctx->ppc);
-
-	nvkm_wr32(device, 0x404154, 0x00000000);
-
-	grctx->bundle(info);
-	grctx->pagepool(info);
-	grctx->attrib(info);
-	grctx->unkn(gr);
-
-	gm204_grctx_generate_tpcid(gr);
-	gf100_grctx_generate_r406028(gr);
-	gk104_grctx_generate_r418bb8(gr);
-
-	for (i = 0; i < 8; i++)
-		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
-	nvkm_wr32(device, 0x406500, 0x00000000);
-
-	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
-
-	gm204_grctx_generate_rop_active_fbps(gr);
-
-	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
-		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
-	nvkm_wr32(device, 0x4041c4, tmp);
-
-	gm204_grctx_generate_405b60(gr);
-
-	gf100_gr_icmd(gr, grctx->icmd);
-	nvkm_wr32(device, 0x404154, 0x00000800);
-	gf100_gr_mthd(gr, grctx->mthd);
-
-	nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000);
-	nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000);
-}
-
-const struct gf100_grctx_func
-gm204_grctx = {
-	.main  = gm204_grctx_generate_main,
-	.unkn  = gk104_grctx_generate_unkn,
-	.hub   = gm204_grctx_pack_hub,
-	.gpc   = gm204_grctx_pack_gpc,
-	.zcull = gf100_grctx_pack_zcull,
-	.tpc   = gm204_grctx_pack_tpc,
-	.ppc   = gm204_grctx_pack_ppc,
-	.icmd  = gm204_grctx_pack_icmd,
-	.mthd  = gm204_grctx_pack_mthd,
-	.bundle = gm107_grctx_generate_bundle,
-	.bundle_size = 0x3000,
-	.bundle_min_gpm_fifo_depth = 0x180,
-	.bundle_token_limit = 0x780,
-	.pagepool = gm107_grctx_generate_pagepool,
-	.pagepool_size = 0x20000,
-	.attrib = gm107_grctx_generate_attrib,
-	.attrib_nr_max = 0x600,
-	.attrib_nr = 0x400,
-	.alpha_nr_max = 0x1800,
-	.alpha_nr = 0x1000,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm206.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm206.c
deleted file mode 100644
index d6be6034c2c2..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm206.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "ctxgf100.h"
-
-static const struct gf100_gr_init
-gm206_grctx_init_gpc_unk_1[] = {
-	{ 0x418600,   1, 0x04, 0x0000007f },
-	{ 0x418684,   1, 0x04, 0x0000001f },
-	{ 0x418700,   1, 0x04, 0x00000002 },
-	{ 0x418704,   1, 0x04, 0x00000080 },
-	{ 0x418708,   1, 0x04, 0x40000000 },
-	{ 0x41870c,   2, 0x04, 0x00000000 },
-	{ 0x418728,   1, 0x04, 0x00300020 },
-	{}
-};
-
-static const struct gf100_gr_pack
-gm206_grctx_pack_gpc[] = {
-	{ gm107_grctx_init_gpc_unk_0 },
-	{ gm204_grctx_init_prop_0 },
-	{ gm206_grctx_init_gpc_unk_1 },
-	{ gm204_grctx_init_setup_0 },
-	{ gf100_grctx_init_zcull_0 },
-	{ gk208_grctx_init_crstr_0 },
-	{ gm204_grctx_init_gpm_0 },
-	{ gm204_grctx_init_gpc_unk_2 },
-	{ gf100_grctx_init_gcc_0 },
-	{}
-};
-
-const struct gf100_grctx_func
-gm206_grctx = {
-	.main  = gm204_grctx_generate_main,
-	.unkn  = gk104_grctx_generate_unkn,
-	.hub   = gm204_grctx_pack_hub,
-	.gpc   = gm206_grctx_pack_gpc,
-	.zcull = gf100_grctx_pack_zcull,
-	.tpc   = gm204_grctx_pack_tpc,
-	.ppc   = gm204_grctx_pack_ppc,
-	.icmd  = gm204_grctx_pack_icmd,
-	.mthd  = gm204_grctx_pack_mthd,
-	.bundle = gm107_grctx_generate_bundle,
-	.bundle_size = 0x3000,
-	.bundle_min_gpm_fifo_depth = 0x180,
-	.bundle_token_limit = 0x780,
-	.pagepool = gm107_grctx_generate_pagepool,
-	.pagepool_size = 0x20000,
-	.attrib = gm107_grctx_generate_attrib,
-	.attrib_nr_max = 0x600,
-	.attrib_nr = 0x400,
-	.alpha_nr_max = 0x1800,
-	.alpha_nr = 0x1000,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
index 670260402538..a8827efa90ae 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
@@ -54,7 +54,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	grctx->unkn(gr);
 
-	gm204_grctx_generate_tpcid(gr);
+	gm200_grctx_generate_tpcid(gr);
 	gm20b_grctx_generate_r406028(gr);
 	gk104_grctx_generate_r418bb8(gr);
 
@@ -70,7 +70,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
 	nvkm_wr32(device, 0x4041c4, tmp);
 
-	gm204_grctx_generate_405b60(gr);
+	gm200_grctx_generate_405b60(gr);
 
 	gf100_gr_wait_idle(gr);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc
index e168b83a10c9..dc60509f76f7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpc.fuc
@@ -322,6 +322,7 @@ main:
 
 // interrupt handler
 ih:
+	push $r0
 	push $r8
 	mov $r8 $flags
 	push $r8
@@ -358,6 +359,7 @@ ih:
 	pop $r8
 	mov $flags $r8
 	pop $r8
+	pop $r0
 	bclr $flags $p0
 	iret
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h
index 231f696d1e0a..5f4ddfee48a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h
@@ -382,56 +382,57 @@ uint32_t gf100_grgpc_code[] = {
 	0xb60412fd,
 	0x1efd01e4,
 	0x0018fe05,
-	0x05b021f5,
+	0x05b421f5,
 /* 0x04eb: main_not_ctx_xfer */
 	0x94d30ef4,
 	0xf5f010ef,
 	0x7e21f501,
 	0xc60ef403,
 /* 0x04f8: ih */
-	0x88fe80f9,
-	0xf980f901,
-	0xf9a0f990,
-	0xf9d0f9b0,
-	0xbdf0f9e0,
-	0x00a7f104,
-	0x00a3f002,
-	0xc400aacf,
-	0x0bf404ab,
-	0x1cd7f02c,
-	0x1a00e7f1,
-	0xcf00e3f0,
-	0xf7f100ee,
-	0xf3f01900,
-	0x00ffcf00,
-	0xf00421f4,
-	0x07f101e7,
-	0x03f01d00,
-	0x000ed000,
-/* 0x0546: ih_no_fifo */
-	0x07f104bd,
-	0x03f00100,
-	0x000ad000,
-	0xf0fc04bd,
-	0xd0fce0fc,
-	0xa0fcb0fc,
-	0x80fc90fc,
-	0xfc0088fe,
-	0x0032f480,
-/* 0x056a: hub_barrier_done */
+	0x80f900f9,
+	0xf90188fe,
+	0xf990f980,
+	0xf9b0f9a0,
+	0xf9e0f9d0,
+	0xf104bdf0,
+	0xf00200a7,
+	0xaacf00a3,
+	0x04abc400,
+	0xf02c0bf4,
+	0xe7f11cd7,
+	0xe3f01a00,
+	0x00eecf00,
+	0x1900f7f1,
+	0xcf00f3f0,
+	0x21f400ff,
+	0x01e7f004,
+	0x1d0007f1,
+	0xd00003f0,
+	0x04bd000e,
+/* 0x0548: ih_no_fifo */
+	0x010007f1,
+	0xd00003f0,
+	0x04bd000a,
+	0xe0fcf0fc,
+	0xb0fcd0fc,
+	0x90fca0fc,
+	0x88fe80fc,
+	0xfc80fc00,
+	0x0032f400,
+/* 0x056e: hub_barrier_done */
 	0xf7f001f8,
 	0x040e9801,
 	0xb904febb,
 	0xe7f102ff,
 	0xe3f09418,
 	0x9d21f440,
-/* 0x0582: ctx_redswitch */
+/* 0x0586: ctx_redswitch */
 	0xf7f000f8,
 	0x0007f120,
 	0x0103f085,
 	0xbd000fd0,
 	0x08e7f004,
-/* 0x0594: ctx_redswitch_delay */
+/* 0x0598: ctx_redswitch_delay */
 	0xf401e2b6,
 	0xf5f1fd1b,
 	0xf5f10800,
@@ -439,13 +440,13 @@ uint32_t gf100_grgpc_code[] = {
 	0x03f08500,
 	0x000fd001,
 	0x00f804bd,
-/* 0x05b0: ctx_xfer */
+/* 0x05b4: ctx_xfer */
 	0x810007f1,
 	0xd00203f0,
 	0x04bd000f,
 	0xf50711f4,
-/* 0x05c3: ctx_xfer_not_load */
-	0xf5058221,
+/* 0x05c7: ctx_xfer_not_load */
+	0xf5058621,
 	0xbd026a21,
 	0xfc07f124,
 	0x0203f047,
@@ -475,12 +476,11 @@ uint32_t gf100_grgpc_code[] = {
 	0x6f21f508,
 	0x5e21f501,
 	0x0601f402,
-/* 0x063b: ctx_xfer_post */
+/* 0x063f: ctx_xfer_post */
 	0xf50712f4,
-/* 0x063f: ctx_xfer_done */
+/* 0x0643: ctx_xfer_done */
 	0xf5027f21,
-	0xf8056a21,
-	0x00000000,
+	0xf8056e21,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
index bb820ff28621..03381b163cfc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
@@ -408,56 +408,57 @@ uint32_t gf117_grgpc_code[] = {
 	0x0412fd20,
 	0xfd01e4b6,
 	0x18fe051e,
-	0xfd21f500,
-	0xd30ef405,
+	0x0121f500,
+	0xd30ef406,
 /* 0x0538: main_not_ctx_xfer */
 	0xf010ef94,
 	0x21f501f5,
 	0x0ef4037e,
 /* 0x0545: ih */
-	0xfe80f9c6,
-	0x80f90188,
-	0xa0f990f9,
-	0xd0f9b0f9,
-	0xf0f9e0f9,
-	0xa7f104bd,
-	0xa3f00200,
-	0x00aacf00,
-	0xf404abc4,
-	0xd7f02c0b,
-	0x00e7f124,
-	0x00e3f01a,
-	0xf100eecf,
-	0xf01900f7,
-	0xffcf00f3,
-	0x0421f400,
-	0xf101e7f0,
-	0xf01d0007,
-	0x0ed00003,
-/* 0x0593: ih_no_fifo */
-	0xf104bd00,
-	0xf0010007,
-	0x0ad00003,
-	0xfc04bd00,
-	0xfce0fcf0,
-	0xfcb0fcd0,
-	0xfc90fca0,
-	0x0088fe80,
-	0x32f480fc,
-/* 0x05b7: hub_barrier_done */
+	0xf900f9c6,
+	0x0188fe80,
+	0x90f980f9,
+	0xb0f9a0f9,
+	0xe0f9d0f9,
+	0x04bdf0f9,
+	0x0200a7f1,
+	0xcf00a3f0,
+	0xabc400aa,
+	0x2c0bf404,
+	0xf124d7f0,
+	0xf01a00e7,
+	0xeecf00e3,
+	0x00f7f100,
+	0x00f3f019,
+	0xf400ffcf,
+	0xe7f00421,
+	0x0007f101,
+	0x0003f01d,
+	0xbd000ed0,
+/* 0x0595: ih_no_fifo */
+	0x0007f104,
+	0x0003f001,
+	0xbd000ad0,
+	0xfcf0fc04,
+	0xfcd0fce0,
+	0xfca0fcb0,
+	0xfe80fc90,
+	0x80fc0088,
+	0x32f400fc,
+/* 0x05bb: hub_barrier_done */
 	0xf001f800,
 	0x0e9801f7,
 	0x04febb04,
 	0xf102ffb9,
 	0xf09418e7,
 	0x21f440e3,
-/* 0x05cf: ctx_redswitch */
+/* 0x05d3: ctx_redswitch */
 	0xf000f89d,
 	0x07f120f7,
 	0x03f08500,
 	0x000fd001,
 	0xe7f004bd,
-/* 0x05e1: ctx_redswitch_delay */
+/* 0x05e5: ctx_redswitch_delay */
 	0x01e2b608,
 	0xf1fd1bf4,
 	0xf10800f5,
@@ -465,13 +466,13 @@ uint32_t gf117_grgpc_code[] = {
 	0xf0850007,
 	0x0fd00103,
 	0xf804bd00,
-/* 0x05fd: ctx_xfer */
+/* 0x0601: ctx_xfer */
 	0x0007f100,
 	0x0203f081,
 	0xbd000fd0,
 	0x0711f404,
-	0x05cf21f5,
-/* 0x0610: ctx_xfer_not_load */
+	0x05d321f5,
+/* 0x0614: ctx_xfer_not_load */
 	0x026a21f5,
 	0x07f124bd,
 	0x03f047fc,
@@ -511,10 +512,10 @@ uint32_t gf117_grgpc_code[] = {
 	0x21f5016f,
 	0x01f4025e,
 	0x0712f406,
-/* 0x06ac: ctx_xfer_post */
+/* 0x06b0: ctx_xfer_post */
 	0x027f21f5,
-/* 0x06b0: ctx_xfer_done */
-	0x05b721f5,
+/* 0x06b4: ctx_xfer_done */
+	0x05bb21f5,
 	0x000000f8,
 	0x00000000,
 	0x00000000,
@@ -533,5 +534,4 @@ uint32_t gf117_grgpc_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
index 911976d20940..99d9b48a3b50 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
@@ -408,56 +408,57 @@ uint32_t gk104_grgpc_code[] = {
 	0x0412fd20,
 	0xfd01e4b6,
 	0x18fe051e,
-	0xfd21f500,
-	0xd30ef405,
+	0x0121f500,
+	0xd30ef406,
 /* 0x0538: main_not_ctx_xfer */
 	0xf010ef94,
 	0x21f501f5,
 	0x0ef4037e,
 /* 0x0545: ih */
-	0xfe80f9c6,
-	0x80f90188,
-	0xa0f990f9,
-	0xd0f9b0f9,
-	0xf0f9e0f9,
-	0xa7f104bd,
-	0xa3f00200,
-	0x00aacf00,
-	0xf404abc4,
-	0xd7f02c0b,
-	0x00e7f124,
-	0x00e3f01a,
-	0xf100eecf,
-	0xf01900f7,
-	0xffcf00f3,
-	0x0421f400,
-	0xf101e7f0,
-	0xf01d0007,
-	0x0ed00003,
-/* 0x0593: ih_no_fifo */
-	0xf104bd00,
-	0xf0010007,
-	0x0ad00003,
-	0xfc04bd00,
-	0xfce0fcf0,
-	0xfcb0fcd0,
-	0xfc90fca0,
-	0x0088fe80,
-	0x32f480fc,
-/* 0x05b7: hub_barrier_done */
+	0xf900f9c6,
+	0x0188fe80,
+	0x90f980f9,
+	0xb0f9a0f9,
+	0xe0f9d0f9,
+	0x04bdf0f9,
+	0x0200a7f1,
+	0xcf00a3f0,
+	0xabc400aa,
+	0x2c0bf404,
+	0xf124d7f0,
+	0xf01a00e7,
+	0xeecf00e3,
+	0x00f7f100,
+	0x00f3f019,
+	0xf400ffcf,
+	0xe7f00421,
+	0x0007f101,
+	0x0003f01d,
+	0xbd000ed0,
+/* 0x0595: ih_no_fifo */
+	0x0007f104,
+	0x0003f001,
+	0xbd000ad0,
+	0xfcf0fc04,
+	0xfcd0fce0,
+	0xfca0fcb0,
+	0xfe80fc90,
+	0x80fc0088,
+	0x32f400fc,
+/* 0x05bb: hub_barrier_done */
 	0xf001f800,
 	0x0e9801f7,
 	0x04febb04,
 	0xf102ffb9,
 	0xf09418e7,
 	0x21f440e3,
-/* 0x05cf: ctx_redswitch */
+/* 0x05d3: ctx_redswitch */
 	0xf000f89d,
 	0x07f120f7,
 	0x03f08500,
 	0x000fd001,
 	0xe7f004bd,
-/* 0x05e1: ctx_redswitch_delay */
+/* 0x05e5: ctx_redswitch_delay */
 	0x01e2b608,
 	0xf1fd1bf4,
 	0xf10800f5,
@@ -465,13 +466,13 @@ uint32_t gk104_grgpc_code[] = {
 	0xf0850007,
 	0x0fd00103,
 	0xf804bd00,
-/* 0x05fd: ctx_xfer */
+/* 0x0601: ctx_xfer */
 	0x0007f100,
 	0x0203f081,
 	0xbd000fd0,
 	0x0711f404,
-	0x05cf21f5,
-/* 0x0610: ctx_xfer_not_load */
+	0x05d321f5,
+/* 0x0614: ctx_xfer_not_load */
 	0x026a21f5,
 	0x07f124bd,
 	0x03f047fc,
@@ -511,10 +512,10 @@ uint32_t gk104_grgpc_code[] = {
 	0x21f5016f,
 	0x01f4025e,
 	0x0712f406,
-/* 0x06ac: ctx_xfer_post */
+/* 0x06b0: ctx_xfer_post */
 	0x027f21f5,
-/* 0x06b0: ctx_xfer_done */
-	0x05b721f5,
+/* 0x06b4: ctx_xfer_done */
+	0x05bb21f5,
 	0x000000f8,
 	0x00000000,
 	0x00000000,
@@ -533,5 +534,4 @@ uint32_t gk104_grgpc_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
index 1c6e11b05df2..f7267696cbfd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
@@ -408,56 +408,57 @@ uint32_t gk110_grgpc_code[] = {
 	0x0412fd20,
 	0xfd01e4b6,
 	0x18fe051e,
-	0xfd21f500,
-	0xd30ef405,
+	0x0121f500,
+	0xd30ef406,
 /* 0x0538: main_not_ctx_xfer */
 	0xf010ef94,
 	0x21f501f5,
 	0x0ef4037e,
 /* 0x0545: ih */
-	0xfe80f9c6,
-	0x80f90188,
-	0xa0f990f9,
-	0xd0f9b0f9,
-	0xf0f9e0f9,
-	0xa7f104bd,
-	0xa3f00200,
-	0x00aacf00,
-	0xf404abc4,
-	0xd7f02c0b,
-	0x00e7f124,
-	0x00e3f01a,
-	0xf100eecf,
-	0xf01900f7,
-	0xffcf00f3,
-	0x0421f400,
-	0xf101e7f0,
-	0xf01d0007,
-	0x0ed00003,
-/* 0x0593: ih_no_fifo */
-	0xf104bd00,
-	0xf0010007,
-	0x0ad00003,
-	0xfc04bd00,
-	0xfce0fcf0,
-	0xfcb0fcd0,
-	0xfc90fca0,
-	0x0088fe80,
-	0x32f480fc,
-/* 0x05b7: hub_barrier_done */
+	0xf900f9c6,
+	0x0188fe80,
+	0x90f980f9,
+	0xb0f9a0f9,
+	0xe0f9d0f9,
+	0x04bdf0f9,
+	0x0200a7f1,
+	0xcf00a3f0,
+	0xabc400aa,
+	0x2c0bf404,
+	0xf124d7f0,
+	0xf01a00e7,
+	0xeecf00e3,
+	0x00f7f100,
+	0x00f3f019,
+	0xf400ffcf,
+	0xe7f00421,
+	0x0007f101,
+	0x0003f01d,
+	0xbd000ed0,
+/* 0x0595: ih_no_fifo */
+	0x0007f104,
+	0x0003f001,
+	0xbd000ad0,
+	0xfcf0fc04,
+	0xfcd0fce0,
+	0xfca0fcb0,
+	0xfe80fc90,
+	0x80fc0088,
+	0x32f400fc,
+/* 0x05bb: hub_barrier_done */
 	0xf001f800,
 	0x0e9801f7,
 	0x04febb04,
 	0xf102ffb9,
 	0xf09418e7,
 	0x21f440e3,
-/* 0x05cf: ctx_redswitch */
+/* 0x05d3: ctx_redswitch */
 	0xf000f89d,
 	0x07f120f7,
 	0x03f08500,
 	0x000fd001,
 	0xe7f004bd,
-/* 0x05e1: ctx_redswitch_delay */
+/* 0x05e5: ctx_redswitch_delay */
 	0x01e2b608,
 	0xf1fd1bf4,
 	0xf10800f5,
@@ -465,13 +466,13 @@ uint32_t gk110_grgpc_code[] = {
 	0xf0850007,
 	0x0fd00103,
 	0xf804bd00,
-/* 0x05fd: ctx_xfer */
+/* 0x0601: ctx_xfer */
 	0x0007f100,
 	0x0203f081,
 	0xbd000fd0,
 	0x0711f404,
-	0x05cf21f5,
-/* 0x0610: ctx_xfer_not_load */
+	0x05d321f5,
+/* 0x0614: ctx_xfer_not_load */
 	0x026a21f5,
 	0x07f124bd,
 	0x03f047fc,
@@ -511,10 +512,10 @@ uint32_t gk110_grgpc_code[] = {
 	0x21f5016f,
 	0x01f4025e,
 	0x0712f406,
-/* 0x06ac: ctx_xfer_post */
+/* 0x06b0: ctx_xfer_post */
 	0x027f21f5,
-/* 0x06b0: ctx_xfer_done */
-	0x05b721f5,
+/* 0x06b4: ctx_xfer_done */
+	0x05bb21f5,
 	0x000000f8,
 	0x00000000,
 	0x00000000,
@@ -533,5 +534,4 @@ uint32_t gk110_grgpc_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
index 84af7ec6a78e..387d1fa3e231 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
@@ -360,61 +360,62 @@ uint32_t gk208_grgpc_code[] = {
 	0xb60412fd,
 	0x1efd01e4,
 	0x0018fe05,
-	0x00051b7e,
+	0x00051f7e,
 /* 0x0477: main_not_ctx_xfer */
 	0x94d40ef4,
 	0xf5f010ef,
 	0x02f87e01,
 	0xc70ef400,
 /* 0x0484: ih */
-	0x88fe80f9,
-	0xf980f901,
-	0xf9a0f990,
-	0xf9d0f9b0,
-	0xbdf0f9e0,
-	0x02004a04,
-	0xc400aacf,
-	0x0bf404ab,
-	0x4e240d1f,
-	0xeecf1a00,
-	0x19004f00,
-	0x7e00ffcf,
-	0x0e000004,
-	0x1d004001,
-	0xbd000ef6,
-/* 0x04c1: ih_no_fifo */
-	0x01004004,
-	0xbd000af6,
-	0xfcf0fc04,
-	0xfcd0fce0,
-	0xfca0fcb0,
-	0xfe80fc90,
-	0x80fc0088,
+	0x80f900f9,
+	0xf90188fe,
+	0xf990f980,
+	0xf9b0f9a0,
+	0xf9e0f9d0,
+	0x4a04bdf0,
+	0xaacf0200,
+	0x04abc400,
+	0x0d1f0bf4,
+	0x1a004e24,
+	0x4f00eecf,
+	0xffcf1900,
+	0x00047e00,
+	0x40010e00,
+	0x0ef61d00,
+/* 0x04c3: ih_no_fifo */
+	0x4004bd00,
+	0x0af60100,
+	0xfc04bd00,
+	0xfce0fcf0,
+	0xfcb0fcd0,
+	0xfc90fca0,
+	0x0088fe80,
+	0x00fc80fc,
 	0xf80032f4,
-/* 0x04e1: hub_barrier_done */
+/* 0x04e5: hub_barrier_done */
 	0x98010f01,
 	0xfebb040e,
 	0x8effb204,
 	0x7e409418,
 	0xf800008f,
-/* 0x04f5: ctx_redswitch */
+/* 0x04f9: ctx_redswitch */
 	0x80200f00,
 	0xf6018500,
 	0x04bd000f,
-/* 0x0502: ctx_redswitch_delay */
+/* 0x0506: ctx_redswitch_delay */
 	0xe2b6080e,
 	0xfd1bf401,
 	0x0800f5f1,
 	0x0200f5f1,
 	0x01850080,
 	0xbd000ff6,
-/* 0x051b: ctx_xfer */
+/* 0x051f: ctx_xfer */
 	0x8000f804,
 	0xf6028100,
 	0x04bd000f,
 	0x7e0711f4,
-/* 0x052b: ctx_xfer_not_load */
-	0x7e0004f5,
+/* 0x052f: ctx_xfer_not_load */
+	0x7e0004f9,
 	0xbd000216,
 	0x47fc8024,
 	0x0002f602,
@@ -449,10 +450,10 @@ uint32_t gk208_grgpc_code[] = {
 	0x7e00013d,
 	0xf400020a,
 	0x12f40601,
-/* 0x05b5: ctx_xfer_post */
+/* 0x05b9: ctx_xfer_post */
 	0x02277e07,
-/* 0x05b9: ctx_xfer_done */
-	0x04e17e00,
+/* 0x05bd: ctx_xfer_done */
+	0x04e57e00,
 	0x0000f800,
 	0x00000000,
 	0x00000000,
@@ -469,5 +470,4 @@ uint32_t gk208_grgpc_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
index 5136f9161706..fa9f3c0c5994 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
@@ -438,48 +438,49 @@ uint32_t gm107_grgpc_code[] = {
 	0x0412fd20,
 	0xfd01e4b6,
 	0x18fe051e,
-	0x06447e00,
+	0x06487e00,
 	0xd40ef400,
 /* 0x05a0: main_not_ctx_xfer */
 	0xf010ef94,
 	0xf87e01f5,
 	0x0ef40002,
 /* 0x05ad: ih */
-	0xfe80f9c7,
-	0x80f90188,
-	0xa0f990f9,
-	0xd0f9b0f9,
-	0xf0f9e0f9,
-	0x004a04bd,
-	0x00aacf02,
-	0xf404abc4,
-	0x240d1f0b,
-	0xcf1a004e,
-	0x004f00ee,
-	0x00ffcf19,
-	0x0000047e,
-	0x0040010e,
-	0x000ef61d,
-/* 0x05ea: ih_no_fifo */
-	0x004004bd,
-	0x000af601,
-	0xf0fc04bd,
-	0xd0fce0fc,
-	0xa0fcb0fc,
-	0x80fc90fc,
-	0xfc0088fe,
-	0x0032f480,
-/* 0x060a: hub_barrier_done */
+	0xf900f9c7,
+	0x0188fe80,
+	0x90f980f9,
+	0xb0f9a0f9,
+	0xe0f9d0f9,
+	0x04bdf0f9,
+	0xcf02004a,
+	0xabc400aa,
+	0x1f0bf404,
+	0x004e240d,
+	0x00eecf1a,
+	0xcf19004f,
+	0x047e00ff,
+	0x010e0000,
+	0xf61d0040,
+	0x04bd000e,
+/* 0x05ec: ih_no_fifo */
+	0xf6010040,
+	0x04bd000a,
+	0xe0fcf0fc,
+	0xb0fcd0fc,
+	0x90fca0fc,
+	0x88fe80fc,
+	0xfc80fc00,
+	0x0032f400,
+/* 0x060e: hub_barrier_done */
 	0x010f01f8,
 	0xbb040e98,
 	0xffb204fe,
 	0x4094188e,
 	0x00008f7e,
-/* 0x061e: ctx_redswitch */
+/* 0x0622: ctx_redswitch */
 	0x200f00f8,
 	0x01850080,
 	0xbd000ff6,
-/* 0x062b: ctx_redswitch_delay */
+/* 0x062f: ctx_redswitch_delay */
 	0xb6080e04,
 	0x1bf401e2,
 	0x00f5f1fd,
@@ -487,15 +488,15 @@ uint32_t gm107_grgpc_code[] = {
 	0x85008002,
 	0x000ff601,
 	0x00f804bd,
-/* 0x0644: ctx_xfer */
+/* 0x0648: ctx_xfer */
 	0x02810080,
 	0xbd000ff6,
 	0x1dc48e04,
 	0x01e5f050,
 	0x8f7effb2,
 	0x11f40000,
-	0x061e7e07,
-/* 0x0661: ctx_xfer_not_load */
+	0x06227e07,
+/* 0x0665: ctx_xfer_not_load */
 	0x02167e00,
 	0x8024bd00,
 	0xf60247fc,
@@ -550,15 +551,15 @@ uint32_t gm107_grgpc_code[] = {
 	0x7e00020a,
 	0xf4000314,
 	0x12f40601,
-/* 0x0739: ctx_xfer_post */
+/* 0x073d: ctx_xfer_post */
 	0x02277e1a,
 	0x8e0d0f00,
 	0xf0501da8,
 	0xffb201e5,
 	0x00008f7e,
 	0x0003147e,
-/* 0x0750: ctx_xfer_done */
-	0x00060a7e,
+/* 0x0754: ctx_xfer_done */
+	0x00060e7e,
 	0x000000f8,
 	0x00000000,
 	0x00000000,
@@ -601,5 +602,4 @@ uint32_t gm107_grgpc_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hub.fuc b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hub.fuc
index 87f99e38acbf..e3a2fb308271 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hub.fuc
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hub.fuc
@@ -306,6 +306,7 @@ main:
 
 // interrupt handler
 ih:
+	push $r0
 	push $r8
 	mov $r8 $flags
 	push $r8
@@ -380,6 +381,7 @@ ih:
 	pop $r8
 	mov $flags $r8
 	pop $r8
+	pop $r0
 	bclr $flags $p0
 	iret
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h
index f6acda505677..397921a9a46c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h
@@ -528,10 +528,10 @@ uint32_t gf100_grhub_code[] = {
 	0x0001d001,
 	0x17f104bd,
 	0xf7f00100,
-	0x0d21f502,
-	0x1f21f508,
+	0x1121f502,
+	0x2321f508,
 	0x10f7f008,
-	0x086c21f5,
+	0x087021f5,
 	0x98000e98,
 	0x21f5010f,
 	0x14950150,
@@ -574,9 +574,9 @@ uint32_t gf100_grhub_code[] = {
 	0xb6800040,
 	0x1bf40132,
 	0x00f7f0be,
-	0x086c21f5,
+	0x087021f5,
 	0xf500f7f0,
-	0xf1080d21,
+	0xf1081121,
 	0xf0010007,
 	0x01d00203,
 	0xbd04bd00,
@@ -610,7 +610,7 @@ uint32_t gf100_grhub_code[] = {
 	0x09d00203,
 	0xf404bd00,
 	0x31f40132,
-	0x4021f502,
+	0x4421f502,
 	0xf094bd0a,
 	0x07f10799,
 	0x03f01700,
@@ -621,7 +621,7 @@ uint32_t gf100_grhub_code[] = {
 	0x0203f00f,
 	0xbd0009d0,
 	0x0131f404,
-	0x0a4021f5,
+	0x0a4421f5,
 	0x99f094bd,
 	0x0007f106,
 	0x0203f017,
@@ -631,7 +631,7 @@ uint32_t gf100_grhub_code[] = {
 	0x12b920f9,
 	0x0132f402,
 	0xf50232f4,
-	0xfc0a4021,
+	0xfc0a4421,
 	0x0007f120,
 	0x0203f0c0,
 	0xbd0002d0,
@@ -640,7 +640,7 @@ uint32_t gf100_grhub_code[] = {
 	0xf41f23c8,
 	0x31f40d0b,
 	0x0232f401,
-	0x0a4021f5,
+	0x0a4421f5,
 /* 0x063c: chsw_done */
 	0xf10127f0,
 	0xf0c30007,
@@ -654,7 +654,7 @@ uint32_t gf100_grhub_code[] = {
 /* 0x0660: main_not_ctx_switch */
 	0xf401e4b0,
 	0xf2b90d1b,
-	0xd021f502,
+	0xd421f502,
 	0x460ef409,
 /* 0x0670: main_not_ctx_chan */
 	0xf402e4b0,
@@ -664,7 +664,7 @@ uint32_t gf100_grhub_code[] = {
 	0x09d00203,
 	0xf404bd00,
 	0x32f40132,
-	0x4021f502,
+	0x4421f502,
 	0xf094bd0a,
 	0x07f10799,
 	0x03f01700,
@@ -682,107 +682,108 @@ uint32_t gf100_grhub_code[] = {
 	0x04bd0002,
 	0xfea00ef5,
 /* 0x06c8: ih */
-	0x88fe80f9,
-	0xf980f901,
-	0xf9a0f990,
-	0xf9d0f9b0,
-	0xbdf0f9e0,
-	0x00a7f104,
-	0x00a3f002,
-	0xc400aacf,
-	0x0bf404ab,
-	0x10d7f030,
-	0x1a00e7f1,
-	0xcf00e3f0,
-	0xf7f100ee,
-	0xf3f01900,
-	0x00ffcf00,
-	0xb70421f4,
-	0xf00400b0,
-	0x07f101e7,
-	0x03f01d00,
-	0x000ed000,
-/* 0x071a: ih_no_fifo */
-	0xabe404bd,
-	0x0bf40100,
-	0x10d7f00d,
-	0x4001e7f1,
-/* 0x072b: ih_no_ctxsw */
-	0xe40421f4,
-	0xf40400ab,
-	0xe7f16c0b,
-	0xe3f00708,
-	0x6821f440,
-	0xf102ffb9,
-	0xf0040007,
-	0x0fd00203,
-	0xf104bd00,
-	0xf00704e7,
+	0x80f900f9,
+	0xf90188fe,
+	0xf990f980,
+	0xf9b0f9a0,
+	0xf9e0f9d0,
+	0xf104bdf0,
+	0xf00200a7,
+	0xaacf00a3,
+	0x04abc400,
+	0xf0300bf4,
+	0xe7f110d7,
+	0xe3f01a00,
+	0x00eecf00,
+	0x1900f7f1,
+	0xcf00f3f0,
+	0x21f400ff,
+	0x00b0b704,
+	0x01e7f004,
+	0x1d0007f1,
+	0xd00003f0,
+	0x04bd000e,
+/* 0x071c: ih_no_fifo */
+	0x0100abe4,
+	0xf00d0bf4,
+	0xe7f110d7,
+	0x21f44001,
+/* 0x072d: ih_no_ctxsw */
+	0x00abe404,
+	0x6c0bf404,
+	0x0708e7f1,
+	0xf440e3f0,
+	0xffb96821,
+	0x0007f102,
+	0x0203f004,
+	0xbd000fd0,
+	0x04e7f104,
+	0x40e3f007,
+	0xb96821f4,
+	0x07f102ff,
+	0x03f00300,
+	0x000fd002,
+	0xfec704bd,
+	0x02ee9450,
+	0x0700f7f1,
+	0xbb40f3f0,
+	0x21f400ef,
+	0x0007f168,
+	0x0203f002,
+	0xbd000fd0,
+	0x03f7f004,
+	0x037e21f5,
+	0x0100b7f1,
+	0xf102bfb9,
+	0xf00144e7,
 	0x21f440e3,
-	0x02ffb968,
-	0x030007f1,
-	0xd00203f0,
-	0x04bd000f,
-	0x9450fec7,
-	0xf7f102ee,
-	0xf3f00700,
-	0x00efbb40,
-	0xf16821f4,
-	0xf0020007,
-	0x0fd00203,
-	0xf004bd00,
-	0x21f503f7,
-	0xb7f1037e,
-	0xbfb90100,
-	0x44e7f102,
-	0x40e3f001,
-/* 0x079b: ih_no_fwmthd */
-	0xf19d21f4,
-	0xbd0504b7,
-	0xb4abffb0,
-	0xf10f0bf4,
-	0xf0070007,
-	0x0bd00303,
-/* 0x07b3: ih_no_other */
-	0xf104bd00,
-	0xf0010007,
-	0x0ad00003,
-	0xfc04bd00,
-	0xfce0fcf0,
-	0xfcb0fcd0,
-	0xfc90fca0,
-	0x0088fe80,
-	0x32f480fc,
-/* 0x07d7: ctx_4160s */
+/* 0x079d: ih_no_fwmthd */
+	0x04b7f19d,
+	0xffb0bd05,
+	0x0bf4b4ab,
+	0x0007f10f,
+	0x0303f007,
+	0xbd000bd0,
+/* 0x07b5: ih_no_other */
+	0x0007f104,
+	0x0003f001,
+	0xbd000ad0,
+	0xfcf0fc04,
+	0xfcd0fce0,
+	0xfca0fcb0,
+	0xfe80fc90,
+	0x80fc0088,
+	0x32f400fc,
+/* 0x07db: ctx_4160s */
 	0xf001f800,
 	0xffb901f7,
 	0x60e7f102,
 	0x40e3f041,
-/* 0x07e7: ctx_4160s_wait */
+/* 0x07eb: ctx_4160s_wait */
 	0xf19d21f4,
 	0xf04160e7,
 	0x21f440e3,
 	0x02ffb968,
 	0xf404ffc8,
 	0x00f8f00b,
-/* 0x07fc: ctx_4160c */
+/* 0x0800: ctx_4160c */
 	0xffb9f4bd,
 	0x60e7f102,
 	0x40e3f041,
 	0xf89d21f4,
-/* 0x080d: ctx_4170s */
+/* 0x0811: ctx_4170s */
 	0x10f5f000,
 	0xf102ffb9,
 	0xf04170e7,
 	0x21f440e3,
-/* 0x081f: ctx_4170w */
+/* 0x0823: ctx_4170w */
 	0xf100f89d,
 	0xf04170e7,
 	0x21f440e3,
 	0x02ffb968,
 	0xf410f4f0,
 	0x00f8f01b,
-/* 0x0834: ctx_redswitch */
+/* 0x0838: ctx_redswitch */
 	0x0200e7f1,
 	0xf040e5f0,
 	0xe5f020e5,
@@ -790,7 +791,7 @@ uint32_t gf100_grhub_code[] = {
 	0x0103f085,
 	0xbd000ed0,
 	0x08f7f004,
-/* 0x0850: ctx_redswitch_delay */
+/* 0x0854: ctx_redswitch_delay */
 	0xf401f2b6,
 	0xe5f1fd1b,
 	0xe5f10400,
@@ -798,7 +799,7 @@ uint32_t gf100_grhub_code[] = {
 	0x03f08500,
 	0x000ed001,
 	0x00f804bd,
-/* 0x086c: ctx_86c */
+/* 0x0870: ctx_86c */
 	0x1b0007f1,
 	0xd00203f0,
 	0x04bd000f,
@@ -809,16 +810,16 @@ uint32_t gf100_grhub_code[] = {
 	0xa86ce7f1,
 	0xf441e3f0,
 	0x00f89d21,
-/* 0x0894: ctx_mem */
+/* 0x0898: ctx_mem */
 	0x840007f1,
 	0xd00203f0,
 	0x04bd000f,
-/* 0x08a0: ctx_mem_wait */
+/* 0x08a4: ctx_mem_wait */
 	0x8400f7f1,
 	0xcf02f3f0,
 	0xfffd00ff,
 	0xf31bf405,
-/* 0x08b2: ctx_load */
+/* 0x08b6: ctx_load */
 	0x94bd00f8,
 	0xf10599f0,
 	0xf00f0007,
@@ -836,7 +837,7 @@ uint32_t gf100_grhub_code[] = {
 	0x02d00203,
 	0xf004bd00,
 	0x21f507f7,
-	0x07f10894,
+	0x07f10898,
 	0x03f0c000,
 	0x0002d002,
 	0x0bfe04bd,
@@ -891,31 +892,31 @@ uint32_t gf100_grhub_code[] = {
 	0x03f01700,
 	0x0009d002,
 	0x00f804bd,
-/* 0x09d0: ctx_chan */
-	0x07d721f5,
-	0x08b221f5,
+/* 0x09d4: ctx_chan */
+	0x07db21f5,
+	0x08b621f5,
 	0xf40ca7f0,
 	0xf7f0d021,
-	0x9421f505,
-	0xfc21f508,
-/* 0x09eb: ctx_mmio_exec */
-	0x9800f807,
+	0x9821f505,
+	0x0021f508,
+/* 0x09ef: ctx_mmio_exec */
+	0x9800f808,
 	0x07f14103,
 	0x03f08100,
 	0x0003d002,
 	0x34bd04bd,
-/* 0x09fc: ctx_mmio_loop */
+/* 0x0a00: ctx_mmio_loop */
 	0xf4ff34c4,
 	0x57f10f1b,
 	0x53f00200,
 	0x0535fa06,
-/* 0x0a0e: ctx_mmio_pull */
+/* 0x0a12: ctx_mmio_pull */
 	0x4e9803f8,
 	0x814f9880,
 	0xb69d21f4,
 	0x12b60830,
 	0xdf1bf401,
-/* 0x0a20: ctx_mmio_done */
+/* 0x0a24: ctx_mmio_done */
 	0xf1160398,
 	0xf0810007,
 	0x03d00203,
@@ -924,30 +925,30 @@ uint32_t gf100_grhub_code[] = {
 	0x13f00100,
 	0x0601fa06,
 	0x00f803f8,
-/* 0x0a40: ctx_xfer */
+/* 0x0a44: ctx_xfer */
 	0xf104e7f0,
 	0xf0020007,
 	0x0ed00303,
-/* 0x0a4f: ctx_xfer_idle */
+/* 0x0a53: ctx_xfer_idle */
 	0xf104bd00,
 	0xf00000e7,
 	0xeecf03e3,
 	0x00e4f100,
 	0xf21bf420,
 	0xf40611f4,
-/* 0x0a66: ctx_xfer_pre */
+/* 0x0a6a: ctx_xfer_pre */
 	0xf7f01102,
-	0x6c21f510,
-	0xd721f508,
+	0x7021f510,
+	0xdb21f508,
 	0x1c11f407,
-/* 0x0a74: ctx_xfer_pre_load */
+/* 0x0a78: ctx_xfer_pre_load */
 	0xf502f7f0,
-	0xf5080d21,
-	0xf5081f21,
-	0xbd083421,
-	0x0d21f5f4,
-	0xb221f508,
-/* 0x0a8d: ctx_xfer_exec */
+	0xf5081121,
+	0xf5082321,
+	0xbd083821,
+	0x1121f5f4,
+	0xb621f508,
+/* 0x0a91: ctx_xfer_exec */
 	0x16019808,
 	0x07f124bd,
 	0x03f00500,
@@ -982,24 +983,23 @@ uint32_t gf100_grhub_code[] = {
 	0x1301f402,
 	0xf40ca7f0,
 	0xf7f0d021,
-	0x9421f505,
+	0x9821f505,
 	0x3202f408,
-/* 0x0b1c: ctx_xfer_post */
+/* 0x0b20: ctx_xfer_post */
 	0xf502f7f0,
-	0xbd080d21,
-	0x6c21f5f4,
+	0xbd081121,
+	0x7021f5f4,
 	0x7f21f508,
-	0x1f21f502,
+	0x2321f502,
 	0xf5f4bd08,
-	0xf4080d21,
+	0xf4081121,
 	0x01981011,
 	0x0511fd40,
 	0xf5070bf4,
-/* 0x0b47: ctx_xfer_no_post_mmio */
-	0xf509eb21,
-/* 0x0b4b: ctx_xfer_done */
-	0xf807fc21,
-	0x00000000,
+/* 0x0b4b: ctx_xfer_no_post_mmio */
+	0xf509ef21,
+/* 0x0b4f: ctx_xfer_done */
+	0xf8080021,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h
index 7cb14e59dea1..50c97163dcdb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h
@@ -528,10 +528,10 @@ uint32_t gf117_grhub_code[] = {
 	0x0001d001,
 	0x17f104bd,
 	0xf7f00100,
-	0x0d21f502,
-	0x1f21f508,
+	0x1121f502,
+	0x2321f508,
 	0x10f7f008,
-	0x086c21f5,
+	0x087021f5,
 	0x98000e98,
 	0x21f5010f,
 	0x14950150,
@@ -574,9 +574,9 @@ uint32_t gf117_grhub_code[] = {
 	0xb6800040,
 	0x1bf40132,
 	0x00f7f0be,
-	0x086c21f5,
+	0x087021f5,
 	0xf500f7f0,
-	0xf1080d21,
+	0xf1081121,
 	0xf0010007,
 	0x01d00203,
 	0xbd04bd00,
@@ -610,7 +610,7 @@ uint32_t gf117_grhub_code[] = {
 	0x09d00203,
 	0xf404bd00,
 	0x31f40132,
-	0x4021f502,
+	0x4421f502,
 	0xf094bd0a,
 	0x07f10799,
 	0x03f01700,
@@ -621,7 +621,7 @@ uint32_t gf117_grhub_code[] = {
 	0x0203f00f,
 	0xbd0009d0,
 	0x0131f404,
-	0x0a4021f5,
+	0x0a4421f5,
 	0x99f094bd,
 	0x0007f106,
 	0x0203f017,
@@ -631,7 +631,7 @@ uint32_t gf117_grhub_code[] = {
 	0x12b920f9,
 	0x0132f402,
 	0xf50232f4,
-	0xfc0a4021,
+	0xfc0a4421,
 	0x0007f120,
 	0x0203f0c0,
 	0xbd0002d0,
@@ -640,7 +640,7 @@ uint32_t gf117_grhub_code[] = {
 	0xf41f23c8,
 	0x31f40d0b,
 	0x0232f401,
-	0x0a4021f5,
+	0x0a4421f5,
 /* 0x063c: chsw_done */
 	0xf10127f0,
 	0xf0c30007,
@@ -654,7 +654,7 @@ uint32_t gf117_grhub_code[] = {
 /* 0x0660: main_not_ctx_switch */
 	0xf401e4b0,
 	0xf2b90d1b,
-	0xd021f502,
+	0xd421f502,
 	0x460ef409,
 /* 0x0670: main_not_ctx_chan */
 	0xf402e4b0,
@@ -664,7 +664,7 @@ uint32_t gf117_grhub_code[] = {
 	0x09d00203,
 	0xf404bd00,
 	0x32f40132,
-	0x4021f502,
+	0x4421f502,
 	0xf094bd0a,
 	0x07f10799,
 	0x03f01700,
@@ -682,107 +682,108 @@ uint32_t gf117_grhub_code[] = {
 	0x04bd0002,
 	0xfea00ef5,
 /* 0x06c8: ih */
-	0x88fe80f9,
-	0xf980f901,
-	0xf9a0f990,
-	0xf9d0f9b0,
-	0xbdf0f9e0,
-	0x00a7f104,
-	0x00a3f002,
-	0xc400aacf,
-	0x0bf404ab,
-	0x10d7f030,
-	0x1a00e7f1,
-	0xcf00e3f0,
-	0xf7f100ee,
-	0xf3f01900,
-	0x00ffcf00,
-	0xb70421f4,
-	0xf00400b0,
-	0x07f101e7,
-	0x03f01d00,
-	0x000ed000,
-/* 0x071a: ih_no_fifo */
-	0xabe404bd,
-	0x0bf40100,
-	0x10d7f00d,
-	0x4001e7f1,
-/* 0x072b: ih_no_ctxsw */
-	0xe40421f4,
-	0xf40400ab,
-	0xe7f16c0b,
-	0xe3f00708,
-	0x6821f440,
-	0xf102ffb9,
-	0xf0040007,
-	0x0fd00203,
-	0xf104bd00,
-	0xf00704e7,
+	0x80f900f9,
+	0xf90188fe,
+	0xf990f980,
+	0xf9b0f9a0,
+	0xf9e0f9d0,
+	0xf104bdf0,
+	0xf00200a7,
+	0xaacf00a3,
+	0x04abc400,
+	0xf0300bf4,
+	0xe7f110d7,
+	0xe3f01a00,
+	0x00eecf00,
+	0x1900f7f1,
+	0xcf00f3f0,
+	0x21f400ff,
+	0x00b0b704,
+	0x01e7f004,
+	0x1d0007f1,
+	0xd00003f0,
+	0x04bd000e,
+/* 0x071c: ih_no_fifo */
+	0x0100abe4,
+	0xf00d0bf4,
+	0xe7f110d7,
+	0x21f44001,
+/* 0x072d: ih_no_ctxsw */
+	0x00abe404,
+	0x6c0bf404,
+	0x0708e7f1,
+	0xf440e3f0,
+	0xffb96821,
+	0x0007f102,
+	0x0203f004,
+	0xbd000fd0,
+	0x04e7f104,
+	0x40e3f007,
+	0xb96821f4,
+	0x07f102ff,
+	0x03f00300,
+	0x000fd002,
+	0xfec704bd,
+	0x02ee9450,
+	0x0700f7f1,
+	0xbb40f3f0,
+	0x21f400ef,
+	0x0007f168,
+	0x0203f002,
+	0xbd000fd0,
+	0x03f7f004,
+	0x037e21f5,
+	0x0100b7f1,
+	0xf102bfb9,
+	0xf00144e7,
 	0x21f440e3,
-	0x02ffb968,
-	0x030007f1,
-	0xd00203f0,
-	0x04bd000f,
-	0x9450fec7,
-	0xf7f102ee,
-	0xf3f00700,
-	0x00efbb40,
-	0xf16821f4,
-	0xf0020007,
-	0x0fd00203,
-	0xf004bd00,
-	0x21f503f7,
-	0xb7f1037e,
-	0xbfb90100,
-	0x44e7f102,
-	0x40e3f001,
-/* 0x079b: ih_no_fwmthd */
-	0xf19d21f4,
-	0xbd0504b7,
-	0xb4abffb0,
-	0xf10f0bf4,
-	0xf0070007,
-	0x0bd00303,
-/* 0x07b3: ih_no_other */
-	0xf104bd00,
-	0xf0010007,
-	0x0ad00003,
-	0xfc04bd00,
-	0xfce0fcf0,
-	0xfcb0fcd0,
-	0xfc90fca0,
-	0x0088fe80,
-	0x32f480fc,
-/* 0x07d7: ctx_4160s */
+/* 0x079d: ih_no_fwmthd */
+	0x04b7f19d,
+	0xffb0bd05,
+	0x0bf4b4ab,
+	0x0007f10f,
+	0x0303f007,
+	0xbd000bd0,
+/* 0x07b5: ih_no_other */
+	0x0007f104,
+	0x0003f001,
+	0xbd000ad0,
+	0xfcf0fc04,
+	0xfcd0fce0,
+	0xfca0fcb0,
+	0xfe80fc90,
+	0x80fc0088,
+	0x32f400fc,
+/* 0x07db: ctx_4160s */
 	0xf001f800,
 	0xffb901f7,
 	0x60e7f102,
 	0x40e3f041,
-/* 0x07e7: ctx_4160s_wait */
+/* 0x07eb: ctx_4160s_wait */
 	0xf19d21f4,
 	0xf04160e7,
 	0x21f440e3,
 	0x02ffb968,
 	0xf404ffc8,
 	0x00f8f00b,
-/* 0x07fc: ctx_4160c */
+/* 0x0800: ctx_4160c */
 	0xffb9f4bd,
 	0x60e7f102,
 	0x40e3f041,
 	0xf89d21f4,
-/* 0x080d: ctx_4170s */
+/* 0x0811: ctx_4170s */
 	0x10f5f000,
 	0xf102ffb9,
 	0xf04170e7,
 	0x21f440e3,
-/* 0x081f: ctx_4170w */
+/* 0x0823: ctx_4170w */
 	0xf100f89d,
 	0xf04170e7,
 	0x21f440e3,
 	0x02ffb968,
 	0xf410f4f0,
 	0x00f8f01b,
-/* 0x0834: ctx_redswitch */
+/* 0x0838: ctx_redswitch */
 	0x0200e7f1,
 	0xf040e5f0,
 	0xe5f020e5,
@@ -790,7 +791,7 @@ uint32_t gf117_grhub_code[] = {
 	0x0103f085,
 	0xbd000ed0,
 	0x08f7f004,
-/* 0x0850: ctx_redswitch_delay */
+/* 0x0854: ctx_redswitch_delay */
 	0xf401f2b6,
 	0xe5f1fd1b,
 	0xe5f10400,
@@ -798,7 +799,7 @@ uint32_t gf117_grhub_code[] = {
 	0x03f08500,
 	0x000ed001,
 	0x00f804bd,
-/* 0x086c: ctx_86c */
+/* 0x0870: ctx_86c */
 	0x1b0007f1,
 	0xd00203f0,
 	0x04bd000f,
@@ -809,16 +810,16 @@ uint32_t gf117_grhub_code[] = {
 	0xa86ce7f1,
 	0xf441e3f0,
 	0x00f89d21,
-/* 0x0894: ctx_mem */
+/* 0x0898: ctx_mem */
 	0x840007f1,
 	0xd00203f0,
 	0x04bd000f,
-/* 0x08a0: ctx_mem_wait */
+/* 0x08a4: ctx_mem_wait */
 	0x8400f7f1,
 	0xcf02f3f0,
 	0xfffd00ff,
 	0xf31bf405,
-/* 0x08b2: ctx_load */
+/* 0x08b6: ctx_load */
 	0x94bd00f8,
 	0xf10599f0,
 	0xf00f0007,
@@ -836,7 +837,7 @@ uint32_t gf117_grhub_code[] = {
 	0x02d00203,
 	0xf004bd00,
 	0x21f507f7,
-	0x07f10894,
+	0x07f10898,
 	0x03f0c000,
 	0x0002d002,
 	0x0bfe04bd,
@@ -891,31 +892,31 @@ uint32_t gf117_grhub_code[] = {
 	0x03f01700,
 	0x0009d002,
 	0x00f804bd,
-/* 0x09d0: ctx_chan */
-	0x07d721f5,
-	0x08b221f5,
+/* 0x09d4: ctx_chan */
+	0x07db21f5,
+	0x08b621f5,
 	0xf40ca7f0,
 	0xf7f0d021,
-	0x9421f505,
-	0xfc21f508,
-/* 0x09eb: ctx_mmio_exec */
-	0x9800f807,
+	0x9821f505,
+	0x0021f508,
+/* 0x09ef: ctx_mmio_exec */
+	0x9800f808,
 	0x07f14103,
 	0x03f08100,
 	0x0003d002,
 	0x34bd04bd,
-/* 0x09fc: ctx_mmio_loop */
+/* 0x0a00: ctx_mmio_loop */
 	0xf4ff34c4,
 	0x57f10f1b,
 	0x53f00200,
 	0x0535fa06,
-/* 0x0a0e: ctx_mmio_pull */
+/* 0x0a12: ctx_mmio_pull */
 	0x4e9803f8,
 	0x814f9880,
 	0xb69d21f4,
 	0x12b60830,
 	0xdf1bf401,
-/* 0x0a20: ctx_mmio_done */
+/* 0x0a24: ctx_mmio_done */
 	0xf1160398,
 	0xf0810007,
 	0x03d00203,
@@ -924,30 +925,30 @@ uint32_t gf117_grhub_code[] = {
 	0x13f00100,
 	0x0601fa06,
 	0x00f803f8,
-/* 0x0a40: ctx_xfer */
+/* 0x0a44: ctx_xfer */
 	0xf104e7f0,
 	0xf0020007,
 	0x0ed00303,
-/* 0x0a4f: ctx_xfer_idle */
+/* 0x0a53: ctx_xfer_idle */
 	0xf104bd00,
 	0xf00000e7,
 	0xeecf03e3,
 	0x00e4f100,
 	0xf21bf420,
 	0xf40611f4,
-/* 0x0a66: ctx_xfer_pre */
+/* 0x0a6a: ctx_xfer_pre */
 	0xf7f01102,
-	0x6c21f510,
-	0xd721f508,
+	0x7021f510,
+	0xdb21f508,
 	0x1c11f407,
-/* 0x0a74: ctx_xfer_pre_load */
+/* 0x0a78: ctx_xfer_pre_load */
 	0xf502f7f0,
-	0xf5080d21,
-	0xf5081f21,
-	0xbd083421,
-	0x0d21f5f4,
-	0xb221f508,
-/* 0x0a8d: ctx_xfer_exec */
+	0xf5081121,
+	0xf5082321,
+	0xbd083821,
+	0x1121f5f4,
+	0xb621f508,
+/* 0x0a91: ctx_xfer_exec */
 	0x16019808,
 	0x07f124bd,
 	0x03f00500,
@@ -982,24 +983,23 @@ uint32_t gf117_grhub_code[] = {
 	0x1301f402,
 	0xf40ca7f0,
 	0xf7f0d021,
-	0x9421f505,
+	0x9821f505,
 	0x3202f408,
-/* 0x0b1c: ctx_xfer_post */
+/* 0x0b20: ctx_xfer_post */
 	0xf502f7f0,
-	0xbd080d21,
-	0x6c21f5f4,
+	0xbd081121,
+	0x7021f5f4,
 	0x7f21f508,
-	0x1f21f502,
+	0x2321f502,
 	0xf5f4bd08,
-	0xf4080d21,
+	0xf4081121,
 	0x01981011,
 	0x0511fd40,
 	0xf5070bf4,
-/* 0x0b47: ctx_xfer_no_post_mmio */
-	0xf509eb21,
-/* 0x0b4b: ctx_xfer_done */
-	0xf807fc21,
-	0x00000000,
+/* 0x0b4b: ctx_xfer_no_post_mmio */
+	0xf509ef21,
+/* 0x0b4f: ctx_xfer_done */
+	0xf8080021,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h
index 95ac15110049..125824b394bb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h
@@ -528,10 +528,10 @@ uint32_t gk104_grhub_code[] = {
 	0x0001d001,
 	0x17f104bd,
 	0xf7f00100,
-	0xd721f502,
-	0xe921f507,
+	0xdb21f502,
+	0xed21f507,
 	0x10f7f007,
-	0x083621f5,
+	0x083a21f5,
 	0x98000e98,
 	0x21f5010f,
 	0x14950150,
@@ -574,9 +574,9 @@ uint32_t gk104_grhub_code[] = {
 	0xb6800040,
 	0x1bf40132,
 	0x00f7f0be,
-	0x083621f5,
+	0x083a21f5,
 	0xf500f7f0,
-	0xf107d721,
+	0xf107db21,
 	0xf0010007,
 	0x01d00203,
 	0xbd04bd00,
@@ -610,7 +610,7 @@ uint32_t gk104_grhub_code[] = {
 	0x09d00203,
 	0xf404bd00,
 	0x31f40132,
-	0x0221f502,
+	0x0621f502,
 	0xf094bd0a,
 	0x07f10799,
 	0x03f01700,
@@ -621,7 +621,7 @@ uint32_t gk104_grhub_code[] = {
 	0x0203f00f,
 	0xbd0009d0,
 	0x0131f404,
-	0x0a0221f5,
+	0x0a0621f5,
 	0x99f094bd,
 	0x0007f106,
 	0x0203f017,
@@ -631,7 +631,7 @@ uint32_t gk104_grhub_code[] = {
 	0x12b920f9,
 	0x0132f402,
 	0xf50232f4,
-	0xfc0a0221,
+	0xfc0a0621,
 	0x0007f120,
 	0x0203f0c0,
 	0xbd0002d0,
@@ -640,7 +640,7 @@ uint32_t gk104_grhub_code[] = {
 	0xf41f23c8,
 	0x31f40d0b,
 	0x0232f401,
-	0x0a0221f5,
+	0x0a0621f5,
 /* 0x063c: chsw_done */
 	0xf10127f0,
 	0xf0c30007,
@@ -654,7 +654,7 @@ uint32_t gk104_grhub_code[] = {
 /* 0x0660: main_not_ctx_switch */
 	0xf401e4b0,
 	0xf2b90d1b,
-	0x9a21f502,
+	0x9e21f502,
 	0x460ef409,
 /* 0x0670: main_not_ctx_chan */
 	0xf402e4b0,
@@ -664,7 +664,7 @@ uint32_t gk104_grhub_code[] = {
 	0x09d00203,
 	0xf404bd00,
 	0x32f40132,
-	0x0221f502,
+	0x0621f502,
 	0xf094bd0a,
 	0x07f10799,
 	0x03f01700,
@@ -682,90 +682,91 @@ uint32_t gk104_grhub_code[] = {
 	0x04bd0002,
 	0xfea00ef5,
 /* 0x06c8: ih */
-	0x88fe80f9,
-	0xf980f901,
-	0xf9a0f990,
-	0xf9d0f9b0,
-	0xbdf0f9e0,
-	0x00a7f104,
-	0x00a3f002,
-	0xc400aacf,
-	0x0bf404ab,
-	0x10d7f030,
-	0x1a00e7f1,
-	0xcf00e3f0,
-	0xf7f100ee,
-	0xf3f01900,
-	0x00ffcf00,
-	0xb70421f4,
-	0xf00400b0,
-	0x07f101e7,
-	0x03f01d00,
-	0x000ed000,
-/* 0x071a: ih_no_fifo */
-	0xabe404bd,
-	0x0bf40100,
-	0x10d7f00d,
-	0x4001e7f1,
-/* 0x072b: ih_no_ctxsw */
-	0xe40421f4,
-	0xf40400ab,
-	0xe7f16c0b,
-	0xe3f00708,
-	0x6821f440,
-	0xf102ffb9,
-	0xf0040007,
-	0x0fd00203,
-	0xf104bd00,
-	0xf00704e7,
+	0x80f900f9,
+	0xf90188fe,
+	0xf990f980,
+	0xf9b0f9a0,
+	0xf9e0f9d0,
+	0xf104bdf0,
+	0xf00200a7,
+	0xaacf00a3,
+	0x04abc400,
+	0xf0300bf4,
+	0xe7f110d7,
+	0xe3f01a00,
+	0x00eecf00,
+	0x1900f7f1,
+	0xcf00f3f0,
+	0x21f400ff,
+	0x00b0b704,
+	0x01e7f004,
+	0x1d0007f1,
+	0xd00003f0,
+	0x04bd000e,
+/* 0x071c: ih_no_fifo */
+	0x0100abe4,
+	0xf00d0bf4,
+	0xe7f110d7,
+	0x21f44001,
+/* 0x072d: ih_no_ctxsw */
+	0x00abe404,
+	0x6c0bf404,
+	0x0708e7f1,
+	0xf440e3f0,
+	0xffb96821,
+	0x0007f102,
+	0x0203f004,
+	0xbd000fd0,
+	0x04e7f104,
+	0x40e3f007,
+	0xb96821f4,
+	0x07f102ff,
+	0x03f00300,
+	0x000fd002,
+	0xfec704bd,
+	0x02ee9450,
+	0x0700f7f1,
+	0xbb40f3f0,
+	0x21f400ef,
+	0x0007f168,
+	0x0203f002,
+	0xbd000fd0,
+	0x03f7f004,
+	0x037e21f5,
+	0x0100b7f1,
+	0xf102bfb9,
+	0xf00144e7,
 	0x21f440e3,
-	0x02ffb968,
-	0x030007f1,
-	0xd00203f0,
-	0x04bd000f,
-	0x9450fec7,
-	0xf7f102ee,
-	0xf3f00700,
-	0x00efbb40,
-	0xf16821f4,
-	0xf0020007,
-	0x0fd00203,
-	0xf004bd00,
-	0x21f503f7,
-	0xb7f1037e,
-	0xbfb90100,
-	0x44e7f102,
-	0x40e3f001,
-/* 0x079b: ih_no_fwmthd */
-	0xf19d21f4,
-	0xbd0504b7,
-	0xb4abffb0,
-	0xf10f0bf4,
-	0xf0070007,
-	0x0bd00303,
-/* 0x07b3: ih_no_other */
-	0xf104bd00,
-	0xf0010007,
-	0x0ad00003,
-	0xfc04bd00,
-	0xfce0fcf0,
-	0xfcb0fcd0,
-	0xfc90fca0,
-	0x0088fe80,
-	0x32f480fc,
-/* 0x07d7: ctx_4170s */
+/* 0x079d: ih_no_fwmthd */
+	0x04b7f19d,
+	0xffb0bd05,
+	0x0bf4b4ab,
+	0x0007f10f,
+	0x0303f007,
+	0xbd000bd0,
+/* 0x07b5: ih_no_other */
+	0x0007f104,
+	0x0003f001,
+	0xbd000ad0,
+	0xfcf0fc04,
+	0xfcd0fce0,
+	0xfca0fcb0,
+	0xfe80fc90,
+	0x80fc0088,
+	0x32f400fc,
+/* 0x07db: ctx_4170s */
 	0xf001f800,
 	0xffb910f5,
 	0x70e7f102,
 	0x40e3f041,
 	0xf89d21f4,
-/* 0x07e9: ctx_4170w */
+/* 0x07ed: ctx_4170w */
 	0x70e7f100,
 	0x40e3f041,
 	0xb96821f4,
 	0xf4f002ff,
 	0xf01bf410,
-/* 0x07fe: ctx_redswitch */
+/* 0x0802: ctx_redswitch */
 	0xe7f100f8,
 	0xe5f00200,
 	0x20e5f040,
@@ -773,7 +774,7 @@ uint32_t gk104_grhub_code[] = {
 	0xf0850007,
 	0x0ed00103,
 	0xf004bd00,
-/* 0x081a: ctx_redswitch_delay */
+/* 0x081e: ctx_redswitch_delay */
 	0xf2b608f7,
 	0xfd1bf401,
 	0x0400e5f1,
@@ -781,7 +782,7 @@ uint32_t gk104_grhub_code[] = {
 	0x850007f1,
 	0xd00103f0,
 	0x04bd000e,
-/* 0x0836: ctx_86c */
+/* 0x083a: ctx_86c */
 	0x07f100f8,
 	0x03f01b00,
 	0x000fd002,
@@ -792,17 +793,17 @@ uint32_t gk104_grhub_code[] = {
 	0xe7f102ff,
 	0xe3f0a86c,
 	0x9d21f441,
-/* 0x085e: ctx_mem */
+/* 0x0862: ctx_mem */
 	0x07f100f8,
 	0x03f08400,
 	0x000fd002,
-/* 0x086a: ctx_mem_wait */
+/* 0x086e: ctx_mem_wait */
 	0xf7f104bd,
 	0xf3f08400,
 	0x00ffcf02,
 	0xf405fffd,
 	0x00f8f31b,
-/* 0x087c: ctx_load */
+/* 0x0880: ctx_load */
 	0x99f094bd,
 	0x0007f105,
 	0x0203f00f,
@@ -819,7 +820,7 @@ uint32_t gk104_grhub_code[] = {
 	0x0203f083,
 	0xbd0002d0,
 	0x07f7f004,
-	0x085e21f5,
+	0x086221f5,
 	0xc00007f1,
 	0xd00203f0,
 	0x04bd0002,
@@ -874,29 +875,29 @@ uint32_t gk104_grhub_code[] = {
 	0x170007f1,
 	0xd00203f0,
 	0x04bd0009,
-/* 0x099a: ctx_chan */
+/* 0x099e: ctx_chan */
 	0x21f500f8,
-	0xa7f0087c,
+	0xa7f00880,
 	0xd021f40c,
 	0xf505f7f0,
-	0xf8085e21,
-/* 0x09ad: ctx_mmio_exec */
+	0xf8086221,
+/* 0x09b1: ctx_mmio_exec */
 	0x41039800,
 	0x810007f1,
 	0xd00203f0,
 	0x04bd0003,
-/* 0x09be: ctx_mmio_loop */
+/* 0x09c2: ctx_mmio_loop */
 	0x34c434bd,
 	0x0f1bf4ff,
 	0x020057f1,
 	0xfa0653f0,
 	0x03f80535,
-/* 0x09d0: ctx_mmio_pull */
+/* 0x09d4: ctx_mmio_pull */
 	0x98804e98,
 	0x21f4814f,
 	0x0830b69d,
 	0xf40112b6,
-/* 0x09e2: ctx_mmio_done */
+/* 0x09e6: ctx_mmio_done */
 	0x0398df1b,
 	0x0007f116,
 	0x0203f081,
@@ -905,30 +906,30 @@ uint32_t gk104_grhub_code[] = {
 	0x010017f1,
 	0xfa0613f0,
 	0x03f80601,
-/* 0x0a02: ctx_xfer */
+/* 0x0a06: ctx_xfer */
 	0xe7f000f8,
 	0x0007f104,
 	0x0303f002,
 	0xbd000ed0,
-/* 0x0a11: ctx_xfer_idle */
+/* 0x0a15: ctx_xfer_idle */
 	0x00e7f104,
 	0x03e3f000,
 	0xf100eecf,
 	0xf42000e4,
 	0x11f4f21b,
 	0x0d02f406,
-/* 0x0a28: ctx_xfer_pre */
+/* 0x0a2c: ctx_xfer_pre */
 	0xf510f7f0,
-	0xf4083621,
-/* 0x0a32: ctx_xfer_pre_load */
+	0xf4083a21,
+/* 0x0a36: ctx_xfer_pre_load */
 	0xf7f01c11,
-	0xd721f502,
-	0xe921f507,
-	0xfe21f507,
-	0xf5f4bd07,
-	0xf507d721,
-/* 0x0a4b: ctx_xfer_exec */
-	0x98087c21,
+	0xdb21f502,
+	0xed21f507,
+	0x0221f507,
+	0xf5f4bd08,
+	0xf507db21,
+/* 0x0a4f: ctx_xfer_exec */
+	0x98088021,
 	0x24bd1601,
 	0x050007f1,
 	0xd00103f0,
@@ -963,21 +964,21 @@ uint32_t gk104_grhub_code[] = {
 	0xa7f01301,
 	0xd021f40c,
 	0xf505f7f0,
-	0xf4085e21,
-/* 0x0ada: ctx_xfer_post */
+	0xf4086221,
+/* 0x0ade: ctx_xfer_post */
 	0xf7f02e02,
-	0xd721f502,
+	0xdb21f502,
 	0xf5f4bd07,
-	0xf5083621,
+	0xf5083a21,
 	0xf5027f21,
-	0xbd07e921,
-	0xd721f5f4,
+	0xbd07ed21,
+	0xdb21f5f4,
 	0x1011f407,
 	0xfd400198,
 	0x0bf40511,
-	0xad21f507,
-/* 0x0b05: ctx_xfer_no_post_mmio */
-/* 0x0b05: ctx_xfer_done */
+	0xb121f507,
+/* 0x0b09: ctx_xfer_no_post_mmio */
+/* 0x0b09: ctx_xfer_done */
 	0x0000f809,
 	0x00000000,
 	0x00000000,
@@ -1040,5 +1041,4 @@ uint32_t gk104_grhub_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h
index 89986878480f..0a1b8c0b8b82 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h
@@ -528,10 +528,10 @@ uint32_t gk110_grhub_code[] = {
 	0x0001d001,
 	0x17f104bd,
 	0xf7f00100,
-	0xd721f502,
-	0xe921f507,
+	0xdb21f502,
+	0xed21f507,
 	0x10f7f007,
-	0x083621f5,
+	0x083a21f5,
 	0x98000e98,
 	0x21f5010f,
 	0x14950150,
@@ -574,9 +574,9 @@ uint32_t gk110_grhub_code[] = {
 	0xb6800040,
 	0x1bf40132,
 	0x00f7f0be,
-	0x083621f5,
+	0x083a21f5,
 	0xf500f7f0,
-	0xf107d721,
+	0xf107db21,
 	0xf0010007,
 	0x01d00203,
 	0xbd04bd00,
@@ -610,7 +610,7 @@ uint32_t gk110_grhub_code[] = {
 	0x09d00203,
 	0xf404bd00,
 	0x31f40132,
-	0x0221f502,
+	0x0621f502,
 	0xf094bd0a,
 	0x07f10799,
 	0x03f01700,
@@ -621,7 +621,7 @@ uint32_t gk110_grhub_code[] = {
 	0x0203f037,
 	0xbd0009d0,
 	0x0131f404,
-	0x0a0221f5,
+	0x0a0621f5,
 	0x99f094bd,
 	0x0007f106,
 	0x0203f017,
@@ -631,7 +631,7 @@ uint32_t gk110_grhub_code[] = {
 	0x12b920f9,
 	0x0132f402,
 	0xf50232f4,
-	0xfc0a0221,
+	0xfc0a0621,
 	0x0007f120,
 	0x0203f0c0,
 	0xbd0002d0,
@@ -640,7 +640,7 @@ uint32_t gk110_grhub_code[] = {
 	0xf41f23c8,
 	0x31f40d0b,
 	0x0232f401,
-	0x0a0221f5,
+	0x0a0621f5,
 /* 0x063c: chsw_done */
 	0xf10127f0,
 	0xf0c30007,
@@ -654,7 +654,7 @@ uint32_t gk110_grhub_code[] = {
 /* 0x0660: main_not_ctx_switch */
 	0xf401e4b0,
 	0xf2b90d1b,
-	0x9a21f502,
+	0x9e21f502,
 	0x460ef409,
 /* 0x0670: main_not_ctx_chan */
 	0xf402e4b0,
@@ -664,7 +664,7 @@ uint32_t gk110_grhub_code[] = {
 	0x09d00203,
 	0xf404bd00,
 	0x32f40132,
-	0x0221f502,
+	0x0621f502,
 	0xf094bd0a,
 	0x07f10799,
 	0x03f01700,
@@ -682,90 +682,91 @@ uint32_t gk110_grhub_code[] = {
 	0x04bd0002,
 	0xfea00ef5,
 /* 0x06c8: ih */
-	0x88fe80f9,
-	0xf980f901,
-	0xf9a0f990,
-	0xf9d0f9b0,
-	0xbdf0f9e0,
-	0x00a7f104,
-	0x00a3f002,
-	0xc400aacf,
-	0x0bf404ab,
-	0x10d7f030,
-	0x1a00e7f1,
-	0xcf00e3f0,
-	0xf7f100ee,
-	0xf3f01900,
-	0x00ffcf00,
-	0xb70421f4,
-	0xf00400b0,
-	0x07f101e7,
-	0x03f01d00,
-	0x000ed000,
-/* 0x071a: ih_no_fifo */
-	0xabe404bd,
-	0x0bf40100,
-	0x10d7f00d,
-	0x4001e7f1,
-/* 0x072b: ih_no_ctxsw */
-	0xe40421f4,
-	0xf40400ab,
-	0xe7f16c0b,
-	0xe3f00708,
-	0x6821f440,
-	0xf102ffb9,
-	0xf0040007,
-	0x0fd00203,
-	0xf104bd00,
-	0xf00704e7,
+	0x80f900f9,
+	0xf90188fe,
+	0xf990f980,
+	0xf9b0f9a0,
+	0xf9e0f9d0,
+	0xf104bdf0,
+	0xf00200a7,
+	0xaacf00a3,
+	0x04abc400,
+	0xf0300bf4,
+	0xe7f110d7,
+	0xe3f01a00,
+	0x00eecf00,
+	0x1900f7f1,
+	0xcf00f3f0,
+	0x21f400ff,
+	0x00b0b704,
+	0x01e7f004,
+	0x1d0007f1,
+	0xd00003f0,
+	0x04bd000e,
+/* 0x071c: ih_no_fifo */
+	0x0100abe4,
+	0xf00d0bf4,
+	0xe7f110d7,
+	0x21f44001,
+/* 0x072d: ih_no_ctxsw */
+	0x00abe404,
+	0x6c0bf404,
+	0x0708e7f1,
+	0xf440e3f0,
+	0xffb96821,
+	0x0007f102,
+	0x0203f004,
+	0xbd000fd0,
+	0x04e7f104,
+	0x40e3f007,
+	0xb96821f4,
+	0x07f102ff,
+	0x03f00300,
+	0x000fd002,
+	0xfec704bd,
+	0x02ee9450,
+	0x0700f7f1,
+	0xbb40f3f0,
+	0x21f400ef,
+	0x0007f168,
+	0x0203f002,
+	0xbd000fd0,
+	0x03f7f004,
+	0x037e21f5,
+	0x0100b7f1,
+	0xf102bfb9,
+	0xf00144e7,
 	0x21f440e3,
-	0x02ffb968,
-	0x030007f1,
-	0xd00203f0,
-	0x04bd000f,
-	0x9450fec7,
-	0xf7f102ee,
-	0xf3f00700,
-	0x00efbb40,
-	0xf16821f4,
-	0xf0020007,
-	0x0fd00203,
-	0xf004bd00,
-	0x21f503f7,
-	0xb7f1037e,
-	0xbfb90100,
-	0x44e7f102,
-	0x40e3f001,
-/* 0x079b: ih_no_fwmthd */
-	0xf19d21f4,
-	0xbd0504b7,
-	0xb4abffb0,
-	0xf10f0bf4,
-	0xf0070007,
-	0x0bd00303,
-/* 0x07b3: ih_no_other */
-	0xf104bd00,
-	0xf0010007,
-	0x0ad00003,
-	0xfc04bd00,
-	0xfce0fcf0,
-	0xfcb0fcd0,
-	0xfc90fca0,
-	0x0088fe80,
-	0x32f480fc,
-/* 0x07d7: ctx_4170s */
+/* 0x079d: ih_no_fwmthd */
+	0x04b7f19d,
+	0xffb0bd05,
+	0x0bf4b4ab,
+	0x0007f10f,
+	0x0303f007,
+	0xbd000bd0,
+/* 0x07b5: ih_no_other */
+	0x0007f104,
+	0x0003f001,
+	0xbd000ad0,
+	0xfcf0fc04,
+	0xfcd0fce0,
+	0xfca0fcb0,
+	0xfe80fc90,
+	0x80fc0088,
+	0x32f400fc,
+/* 0x07db: ctx_4170s */
 	0xf001f800,
 	0xffb910f5,
 	0x70e7f102,
 	0x40e3f041,
 	0xf89d21f4,
-/* 0x07e9: ctx_4170w */
+/* 0x07ed: ctx_4170w */
 	0x70e7f100,
 	0x40e3f041,
 	0xb96821f4,
 	0xf4f002ff,
 	0xf01bf410,
-/* 0x07fe: ctx_redswitch */
+/* 0x0802: ctx_redswitch */
 	0xe7f100f8,
 	0xe5f00200,
 	0x20e5f040,
@@ -773,7 +774,7 @@ uint32_t gk110_grhub_code[] = {
 	0xf0850007,
 	0x0ed00103,
 	0xf004bd00,
-/* 0x081a: ctx_redswitch_delay */
+/* 0x081e: ctx_redswitch_delay */
 	0xf2b608f7,
 	0xfd1bf401,
 	0x0400e5f1,
@@ -781,7 +782,7 @@ uint32_t gk110_grhub_code[] = {
 	0x850007f1,
 	0xd00103f0,
 	0x04bd000e,
-/* 0x0836: ctx_86c */
+/* 0x083a: ctx_86c */
 	0x07f100f8,
 	0x03f02300,
 	0x000fd002,
@@ -792,17 +793,17 @@ uint32_t gk110_grhub_code[] = {
 	0xe7f102ff,
 	0xe3f0a88c,
 	0x9d21f441,
-/* 0x085e: ctx_mem */
+/* 0x0862: ctx_mem */
 	0x07f100f8,
 	0x03f08400,
 	0x000fd002,
-/* 0x086a: ctx_mem_wait */
+/* 0x086e: ctx_mem_wait */
 	0xf7f104bd,
 	0xf3f08400,
 	0x00ffcf02,
 	0xf405fffd,
 	0x00f8f31b,
-/* 0x087c: ctx_load */
+/* 0x0880: ctx_load */
 	0x99f094bd,
 	0x0007f105,
 	0x0203f037,
@@ -819,7 +820,7 @@ uint32_t gk110_grhub_code[] = {
 	0x0203f083,
 	0xbd0002d0,
 	0x07f7f004,
-	0x085e21f5,
+	0x086221f5,
 	0xc00007f1,
 	0xd00203f0,
 	0x04bd0002,
@@ -874,29 +875,29 @@ uint32_t gk110_grhub_code[] = {
 	0x170007f1,
 	0xd00203f0,
 	0x04bd0009,
-/* 0x099a: ctx_chan */
+/* 0x099e: ctx_chan */
 	0x21f500f8,
-	0xa7f0087c,
+	0xa7f00880,
 	0xd021f40c,
 	0xf505f7f0,
-	0xf8085e21,
-/* 0x09ad: ctx_mmio_exec */
+	0xf8086221,
+/* 0x09b1: ctx_mmio_exec */
 	0x41039800,
 	0x810007f1,
 	0xd00203f0,
 	0x04bd0003,
-/* 0x09be: ctx_mmio_loop */
+/* 0x09c2: ctx_mmio_loop */
 	0x34c434bd,
 	0x0f1bf4ff,
 	0x020057f1,
 	0xfa0653f0,
 	0x03f80535,
-/* 0x09d0: ctx_mmio_pull */
+/* 0x09d4: ctx_mmio_pull */
 	0x98804e98,
 	0x21f4814f,
 	0x0830b69d,
 	0xf40112b6,
-/* 0x09e2: ctx_mmio_done */
+/* 0x09e6: ctx_mmio_done */
 	0x0398df1b,
 	0x0007f116,
 	0x0203f081,
@@ -905,30 +906,30 @@ uint32_t gk110_grhub_code[] = {
 	0x010017f1,
 	0xfa0613f0,
 	0x03f80601,
-/* 0x0a02: ctx_xfer */
+/* 0x0a06: ctx_xfer */
 	0xe7f000f8,
 	0x0007f104,
 	0x0303f002,
 	0xbd000ed0,
-/* 0x0a11: ctx_xfer_idle */
+/* 0x0a15: ctx_xfer_idle */
 	0x00e7f104,
 	0x03e3f000,
 	0xf100eecf,
 	0xf42000e4,
 	0x11f4f21b,
 	0x0d02f406,
-/* 0x0a28: ctx_xfer_pre */
+/* 0x0a2c: ctx_xfer_pre */
 	0xf510f7f0,
-	0xf4083621,
-/* 0x0a32: ctx_xfer_pre_load */
+	0xf4083a21,
+/* 0x0a36: ctx_xfer_pre_load */
 	0xf7f01c11,
-	0xd721f502,
-	0xe921f507,
-	0xfe21f507,
-	0xf5f4bd07,
-	0xf507d721,
-/* 0x0a4b: ctx_xfer_exec */
-	0x98087c21,
+	0xdb21f502,
+	0xed21f507,
+	0x0221f507,
+	0xf5f4bd08,
+	0xf507db21,
+/* 0x0a4f: ctx_xfer_exec */
+	0x98088021,
 	0x24bd1601,
 	0x050007f1,
 	0xd00103f0,
@@ -963,21 +964,21 @@ uint32_t gk110_grhub_code[] = {
 	0xa7f01301,
 	0xd021f40c,
 	0xf505f7f0,
-	0xf4085e21,
-/* 0x0ada: ctx_xfer_post */
+	0xf4086221,
+/* 0x0ade: ctx_xfer_post */
 	0xf7f02e02,
-	0xd721f502,
+	0xdb21f502,
 	0xf5f4bd07,
-	0xf5083621,
+	0xf5083a21,
 	0xf5027f21,
-	0xbd07e921,
-	0xd721f5f4,
+	0xbd07ed21,
+	0xdb21f5f4,
 	0x1011f407,
 	0xfd400198,
 	0x0bf40511,
-	0xad21f507,
-/* 0x0b05: ctx_xfer_no_post_mmio */
-/* 0x0b05: ctx_xfer_done */
+	0xb121f507,
+/* 0x0b09: ctx_xfer_no_post_mmio */
+/* 0x0b09: ctx_xfer_done */
 	0x0000f809,
 	0x00000000,
 	0x00000000,
@@ -1040,5 +1041,4 @@ uint32_t gk110_grhub_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h
index 0e98fa4a386e..16869d0b109b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h
@@ -478,10 +478,10 @@ uint32_t gk208_grhub_code[] = {
 	0x01040080,
 	0xbd0001f6,
 	0x01004104,
-	0xa87e020f,
-	0xb77e0006,
+	0xac7e020f,
+	0xbb7e0006,
 	0x100f0006,
-	0x0006f97e,
+	0x0006fd7e,
 	0x98000e98,
 	0x207e010f,
 	0x14950001,
@@ -523,8 +523,8 @@ uint32_t gk208_grhub_code[] = {
 	0x800040b7,
 	0xf40132b6,
 	0x000fb41b,
-	0x0006f97e,
-	0xa87e000f,
+	0x0006fd7e,
+	0xac7e000f,
 	0x00800006,
 	0x01f60201,
 	0xbd04bd00,
@@ -554,7 +554,7 @@ uint32_t gk208_grhub_code[] = {
 	0x0009f602,
 	0x32f404bd,
 	0x0231f401,
-	0x00087c7e,
+	0x0008807e,
 	0x99f094bd,
 	0x17008007,
 	0x0009f602,
@@ -563,7 +563,7 @@ uint32_t gk208_grhub_code[] = {
 	0x37008006,
 	0x0009f602,
 	0x31f404bd,
-	0x087c7e01,
+	0x08807e01,
 	0xf094bd00,
 	0x00800699,
 	0x09f60217,
@@ -572,7 +572,7 @@ uint32_t gk208_grhub_code[] = {
 	0x20f92f0e,
 	0x32f412b2,
 	0x0232f401,
-	0x00087c7e,
+	0x0008807e,
 	0x008020fc,
 	0x02f602c0,
 	0xf404bd00,
@@ -580,7 +580,7 @@ uint32_t gk208_grhub_code[] = {
 	0x23c8130e,
 	0x0d0bf41f,
 	0xf40131f4,
-	0x7c7e0232,
+	0x807e0232,
 /* 0x054e: chsw_done */
 	0x01020008,
 	0x02c30080,
@@ -593,7 +593,7 @@ uint32_t gk208_grhub_code[] = {
 	0xb0ff2a0e,
 	0x1bf401e4,
 	0x7ef2b20c,
-	0xf400081c,
+	0xf4000820,
 /* 0x057a: main_not_ctx_chan */
 	0xe4b0400e,
 	0x2c1bf402,
@@ -602,7 +602,7 @@ uint32_t gk208_grhub_code[] = {
 	0x0009f602,
 	0x32f404bd,
 	0x0232f401,
-	0x00087c7e,
+	0x0008807e,
 	0x99f094bd,
 	0x17008007,
 	0x0009f602,
@@ -618,91 +618,92 @@ uint32_t gk208_grhub_code[] = {
 	0xbd0002f6,
 	0xcc0ef504,
 /* 0x05c9: ih */
-	0xfe80f9fe,
-	0x80f90188,
-	0xa0f990f9,
-	0xd0f9b0f9,
-	0xf0f9e0f9,
-	0x004a04bd,
-	0x00aacf02,
-	0xf404abc4,
-	0x100d230b,
-	0xcf1a004e,
-	0x004f00ee,
-	0x00ffcf19,
+	0xf900f9fe,
+	0x0188fe80,
+	0x90f980f9,
+	0xb0f9a0f9,
+	0xe0f9d0f9,
+	0x04bdf0f9,
+	0xcf02004a,
+	0xabc400aa,
+	0x230bf404,
+	0x004e100d,
+	0x00eecf1a,
+	0xcf19004f,
+	0x047e00ff,
+	0xb0b70000,
+	0x010e0400,
+	0xf61d0040,
+	0x04bd000e,
+/* 0x060c: ih_no_fifo */
+	0x0100abe4,
+	0x0d0c0bf4,
+	0x40014e10,
 	0x0000047e,
-	0x0400b0b7,
-	0x0040010e,
-	0x000ef61d,
-/* 0x060a: ih_no_fifo */
-	0xabe404bd,
-	0x0bf40100,
-	0x4e100d0c,
-	0x047e4001,
-/* 0x061a: ih_no_ctxsw */
-	0xabe40000,
-	0x0bf40400,
-	0x07088e56,
-	0x00657e40,
-	0x80ffb200,
-	0xf6020400,
-	0x04bd000f,
-	0x4007048e,
-	0x0000657e,
-	0x0080ffb2,
-	0x0ff60203,
-	0xc704bd00,
-	0xee9450fe,
-	0x07008f02,
-	0x00efbb40,
-	0x0000657e,
-	0x02020080,
+/* 0x061c: ih_no_ctxsw */
+	0x0400abe4,
+	0x8e560bf4,
+	0x7e400708,
+	0xb2000065,
+	0x040080ff,
+	0x000ff602,
+	0x048e04bd,
+	0x657e4007,
+	0xffb20000,
+	0x02030080,
 	0xbd000ff6,
-	0x7e030f04,
-	0x4b0002f8,
-	0xbfb20100,
-	0x4001448e,
-	0x00008f7e,
-/* 0x0674: ih_no_fwmthd */
-	0xbd05044b,
-	0xb4abffb0,
-	0x800c0bf4,
-	0xf6030700,
-	0x04bd000b,
-/* 0x0688: ih_no_other */
-	0xf6010040,
-	0x04bd000a,
-	0xe0fcf0fc,
-	0xb0fcd0fc,
-	0x90fca0fc,
-	0x88fe80fc,
-	0xf480fc00,
+	0x50fec704,
+	0x8f02ee94,
+	0xbb400700,
+	0x657e00ef,
+	0x00800000,
+	0x0ff60202,
+	0x0f04bd00,
+	0x02f87e03,
+	0x01004b00,
+	0x448ebfb2,
+	0x8f7e4001,
+/* 0x0676: ih_no_fwmthd */
+	0x044b0000,
+	0xffb0bd05,
+	0x0bf4b4ab,
+	0x0700800c,
+	0x000bf603,
+/* 0x068a: ih_no_other */
+	0x004004bd,
+	0x000af601,
+	0xf0fc04bd,
+	0xd0fce0fc,
+	0xa0fcb0fc,
+	0x80fc90fc,
+	0xfc0088fe,
+	0xf400fc80,
 	0x01f80032,
-/* 0x06a8: ctx_4170s */
+/* 0x06ac: ctx_4170s */
 	0xb210f5f0,
 	0x41708eff,
 	0x008f7e40,
-/* 0x06b7: ctx_4170w */
+/* 0x06bb: ctx_4170w */
 	0x8e00f800,
 	0x7e404170,
 	0xb2000065,
 	0x10f4f0ff,
 	0xf8f31bf4,
-/* 0x06c9: ctx_redswitch */
+/* 0x06cd: ctx_redswitch */
 	0x02004e00,
 	0xf040e5f0,
 	0xe5f020e5,
 	0x85008010,
 	0x000ef601,
 	0x080f04bd,
-/* 0x06e0: ctx_redswitch_delay */
+/* 0x06e4: ctx_redswitch_delay */
 	0xf401f2b6,
 	0xe5f1fd1b,
 	0xe5f10400,
 	0x00800100,
 	0x0ef60185,
 	0xf804bd00,
-/* 0x06f9: ctx_86c */
+/* 0x06fd: ctx_86c */
 	0x23008000,
 	0x000ff602,
 	0xffb204bd,
@@ -711,15 +712,15 @@ uint32_t gk208_grhub_code[] = {
 	0x8c8effb2,
 	0x8f7e41a8,
 	0x00f80000,
-/* 0x0718: ctx_mem */
+/* 0x071c: ctx_mem */
 	0x02840080,
 	0xbd000ff6,
-/* 0x0721: ctx_mem_wait */
+/* 0x0725: ctx_mem_wait */
 	0x84008f04,
 	0x00ffcf02,
 	0xf405fffd,
 	0x00f8f61b,
-/* 0x0730: ctx_load */
+/* 0x0734: ctx_load */
 	0x99f094bd,
 	0x37008005,
 	0x0009f602,
@@ -733,7 +734,7 @@ uint32_t gk208_grhub_code[] = {
 	0x02830080,
 	0xbd0002f6,
 	0x7e070f04,
-	0x80000718,
+	0x8000071c,
 	0xf602c000,
 	0x04bd0002,
 	0xf0000bfe,
@@ -779,28 +780,28 @@ uint32_t gk208_grhub_code[] = {
 	0x17008005,
 	0x0009f602,
 	0x00f804bd,
-/* 0x081c: ctx_chan */
-	0x0007307e,
+/* 0x0820: ctx_chan */
+	0x0007347e,
 	0xb87e0c0a,
 	0x050f0000,
-	0x0007187e,
-/* 0x082e: ctx_mmio_exec */
+	0x00071c7e,
+/* 0x0832: ctx_mmio_exec */
 	0x039800f8,
 	0x81008041,
 	0x0003f602,
 	0x34bd04bd,
-/* 0x083c: ctx_mmio_loop */
+/* 0x0840: ctx_mmio_loop */
 	0xf4ff34c4,
 	0x00450e1b,
 	0x0653f002,
 	0xf80535fa,
-/* 0x084d: ctx_mmio_pull */
+/* 0x0851: ctx_mmio_pull */
 	0x804e9803,
 	0x7e814f98,
 	0xb600008f,
 	0x12b60830,
 	0xdf1bf401,
-/* 0x0860: ctx_mmio_done */
+/* 0x0864: ctx_mmio_done */
 	0x80160398,
 	0xf6028100,
 	0x04bd0003,
@@ -808,27 +809,27 @@ uint32_t gk208_grhub_code[] = {
 	0x13f00100,
 	0x0601fa06,
 	0x00f803f8,
-/* 0x087c: ctx_xfer */
+/* 0x0880: ctx_xfer */
 	0x0080040e,
 	0x0ef60302,
-/* 0x0887: ctx_xfer_idle */
+/* 0x088b: ctx_xfer_idle */
 	0x8e04bd00,
 	0xcf030000,
 	0xe4f100ee,
 	0x1bf42000,
 	0x0611f4f5,
-/* 0x089b: ctx_xfer_pre */
+/* 0x089f: ctx_xfer_pre */
 	0x0f0c02f4,
-	0x06f97e10,
+	0x06fd7e10,
 	0x1b11f400,
-/* 0x08a4: ctx_xfer_pre_load */
-	0xa87e020f,
-	0xb77e0006,
-	0xc97e0006,
+/* 0x08a8: ctx_xfer_pre_load */
+	0xac7e020f,
+	0xbb7e0006,
+	0xcd7e0006,
 	0xf4bd0006,
-	0x0006a87e,
-	0x0007307e,
-/* 0x08bc: ctx_xfer_exec */
+	0x0006ac7e,
+	0x0007347e,
+/* 0x08c0: ctx_xfer_exec */
 	0xbd160198,
 	0x05008024,
 	0x0002f601,
@@ -858,21 +859,21 @@ uint32_t gk208_grhub_code[] = {
 	0x01f40002,
 	0x7e0c0a12,
 	0x0f0000b8,
-	0x07187e05,
+	0x071c7e05,
 	0x2d02f400,
-/* 0x0938: ctx_xfer_post */
-	0xa87e020f,
+/* 0x093c: ctx_xfer_post */
+	0xac7e020f,
 	0xf4bd0006,
-	0x0006f97e,
+	0x0006fd7e,
 	0x0002277e,
-	0x0006b77e,
-	0xa87ef4bd,
+	0x0006bb7e,
+	0xac7ef4bd,
 	0x11f40006,
 	0x40019810,
 	0xf40511fd,
-	0x2e7e070b,
-/* 0x0962: ctx_xfer_no_post_mmio */
-/* 0x0962: ctx_xfer_done */
+	0x327e070b,
+/* 0x0966: ctx_xfer_no_post_mmio */
+/* 0x0966: ctx_xfer_done */
 	0x00f80008,
 	0x00000000,
 	0x00000000,
@@ -912,5 +913,4 @@ uint32_t gk208_grhub_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h
index 5f953c5c20b7..d6343d2a614c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h
@@ -478,10 +478,10 @@ uint32_t gm107_grhub_code[] = {
 	0x01040080,
 	0xbd0001f6,
 	0x01004104,
-	0xa87e020f,
-	0xb77e0006,
+	0xac7e020f,
+	0xbb7e0006,
 	0x100f0006,
-	0x0006f97e,
+	0x0006fd7e,
 	0x98000e98,
 	0x207e010f,
 	0x14950001,
@@ -523,8 +523,8 @@ uint32_t gm107_grhub_code[] = {
 	0x800040b7,
 	0xf40132b6,
 	0x000fb41b,
-	0x0006f97e,
-	0xa87e000f,
+	0x0006fd7e,
+	0xac7e000f,
 	0x00800006,
 	0x01f60201,
 	0xbd04bd00,
@@ -554,7 +554,7 @@ uint32_t gm107_grhub_code[] = {
 	0x0009f602,
 	0x32f404bd,
 	0x0231f401,
-	0x00087c7e,
+	0x0008807e,
 	0x99f094bd,
 	0x17008007,
 	0x0009f602,
@@ -563,7 +563,7 @@ uint32_t gm107_grhub_code[] = {
 	0x37008006,
 	0x0009f602,
 	0x31f404bd,
-	0x087c7e01,
+	0x08807e01,
 	0xf094bd00,
 	0x00800699,
 	0x09f60217,
@@ -572,7 +572,7 @@ uint32_t gm107_grhub_code[] = {
 	0x20f92f0e,
 	0x32f412b2,
 	0x0232f401,
-	0x00087c7e,
+	0x0008807e,
 	0x008020fc,
 	0x02f602c0,
 	0xf404bd00,
@@ -580,7 +580,7 @@ uint32_t gm107_grhub_code[] = {
 	0x23c8130e,
 	0x0d0bf41f,
 	0xf40131f4,
-	0x7c7e0232,
+	0x807e0232,
 /* 0x054e: chsw_done */
 	0x01020008,
 	0x02c30080,
@@ -593,7 +593,7 @@ uint32_t gm107_grhub_code[] = {
 	0xb0ff2a0e,
 	0x1bf401e4,
 	0x7ef2b20c,
-	0xf400081c,
+	0xf4000820,
 /* 0x057a: main_not_ctx_chan */
 	0xe4b0400e,
 	0x2c1bf402,
@@ -602,7 +602,7 @@ uint32_t gm107_grhub_code[] = {
 	0x0009f602,
 	0x32f404bd,
 	0x0232f401,
-	0x00087c7e,
+	0x0008807e,
 	0x99f094bd,
 	0x17008007,
 	0x0009f602,
@@ -618,91 +618,92 @@ uint32_t gm107_grhub_code[] = {
 	0xbd0002f6,
 	0xcc0ef504,
 /* 0x05c9: ih */
-	0xfe80f9fe,
-	0x80f90188,
-	0xa0f990f9,
-	0xd0f9b0f9,
-	0xf0f9e0f9,
-	0x004a04bd,
-	0x00aacf02,
-	0xf404abc4,
-	0x100d230b,
-	0xcf1a004e,
-	0x004f00ee,
-	0x00ffcf19,
+	0xf900f9fe,
+	0x0188fe80,
+	0x90f980f9,
+	0xb0f9a0f9,
+	0xe0f9d0f9,
+	0x04bdf0f9,
+	0xcf02004a,
+	0xabc400aa,
+	0x230bf404,
+	0x004e100d,
+	0x00eecf1a,
+	0xcf19004f,
+	0x047e00ff,
+	0xb0b70000,
+	0x010e0400,
+	0xf61d0040,
+	0x04bd000e,
+/* 0x060c: ih_no_fifo */
+	0x0100abe4,
+	0x0d0c0bf4,
+	0x40014e10,
 	0x0000047e,
-	0x0400b0b7,
-	0x0040010e,
-	0x000ef61d,
-/* 0x060a: ih_no_fifo */
-	0xabe404bd,
-	0x0bf40100,
-	0x4e100d0c,
-	0x047e4001,
-/* 0x061a: ih_no_ctxsw */
-	0xabe40000,
-	0x0bf40400,
-	0x07088e56,
-	0x00657e40,
-	0x80ffb200,
-	0xf6020400,
-	0x04bd000f,
-	0x4007048e,
-	0x0000657e,
-	0x0080ffb2,
-	0x0ff60203,
-	0xc704bd00,
-	0xee9450fe,
-	0x07008f02,
-	0x00efbb40,
-	0x0000657e,
-	0x02020080,
+/* 0x061c: ih_no_ctxsw */
+	0x0400abe4,
+	0x8e560bf4,
+	0x7e400708,
+	0xb2000065,
+	0x040080ff,
+	0x000ff602,
+	0x048e04bd,
+	0x657e4007,
+	0xffb20000,
+	0x02030080,
 	0xbd000ff6,
-	0x7e030f04,
-	0x4b0002f8,
-	0xbfb20100,
-	0x4001448e,
-	0x00008f7e,
-/* 0x0674: ih_no_fwmthd */
-	0xbd05044b,
-	0xb4abffb0,
-	0x800c0bf4,
-	0xf6030700,
-	0x04bd000b,
-/* 0x0688: ih_no_other */
-	0xf6010040,
-	0x04bd000a,
-	0xe0fcf0fc,
-	0xb0fcd0fc,
-	0x90fca0fc,
-	0x88fe80fc,
-	0xf480fc00,
+	0x50fec704,
+	0x8f02ee94,
+	0xbb400700,
+	0x657e00ef,
+	0x00800000,
+	0x0ff60202,
+	0x0f04bd00,
+	0x02f87e03,
+	0x01004b00,
+	0x448ebfb2,
+	0x8f7e4001,
+/* 0x0676: ih_no_fwmthd */
+	0x044b0000,
+	0xffb0bd05,
+	0x0bf4b4ab,
+	0x0700800c,
+	0x000bf603,
+/* 0x068a: ih_no_other */
+	0x004004bd,
+	0x000af601,
+	0xf0fc04bd,
+	0xd0fce0fc,
+	0xa0fcb0fc,
+	0x80fc90fc,
+	0xfc0088fe,
+	0xf400fc80,
 	0x01f80032,
-/* 0x06a8: ctx_4170s */
+/* 0x06ac: ctx_4170s */
 	0xb210f5f0,
 	0x41708eff,
 	0x008f7e40,
-/* 0x06b7: ctx_4170w */
+/* 0x06bb: ctx_4170w */
 	0x8e00f800,
 	0x7e404170,
 	0xb2000065,
 	0x10f4f0ff,
 	0xf8f31bf4,
-/* 0x06c9: ctx_redswitch */
+/* 0x06cd: ctx_redswitch */
 	0x02004e00,
 	0xf040e5f0,
 	0xe5f020e5,
 	0x85008010,
 	0x000ef601,
 	0x080f04bd,
-/* 0x06e0: ctx_redswitch_delay */
+/* 0x06e4: ctx_redswitch_delay */
 	0xf401f2b6,
 	0xe5f1fd1b,
 	0xe5f10400,
 	0x00800100,
 	0x0ef60185,
 	0xf804bd00,
-/* 0x06f9: ctx_86c */
+/* 0x06fd: ctx_86c */
 	0x23008000,
 	0x000ff602,
 	0xffb204bd,
@@ -711,15 +712,15 @@ uint32_t gm107_grhub_code[] = {
 	0x8c8effb2,
 	0x8f7e41a8,
 	0x00f80000,
-/* 0x0718: ctx_mem */
+/* 0x071c: ctx_mem */
 	0x02840080,
 	0xbd000ff6,
-/* 0x0721: ctx_mem_wait */
+/* 0x0725: ctx_mem_wait */
 	0x84008f04,
 	0x00ffcf02,
 	0xf405fffd,
 	0x00f8f61b,
-/* 0x0730: ctx_load */
+/* 0x0734: ctx_load */
 	0x99f094bd,
 	0x37008005,
 	0x0009f602,
@@ -733,7 +734,7 @@ uint32_t gm107_grhub_code[] = {
 	0x02830080,
 	0xbd0002f6,
 	0x7e070f04,
-	0x80000718,
+	0x8000071c,
 	0xf602c000,
 	0x04bd0002,
 	0xf0000bfe,
@@ -779,28 +780,28 @@ uint32_t gm107_grhub_code[] = {
 	0x17008005,
 	0x0009f602,
 	0x00f804bd,
-/* 0x081c: ctx_chan */
-	0x0007307e,
+/* 0x0820: ctx_chan */
+	0x0007347e,
 	0xb87e0c0a,
 	0x050f0000,
-	0x0007187e,
-/* 0x082e: ctx_mmio_exec */
+	0x00071c7e,
+/* 0x0832: ctx_mmio_exec */
 	0x039800f8,
 	0x81008041,
 	0x0003f602,
 	0x34bd04bd,
-/* 0x083c: ctx_mmio_loop */
+/* 0x0840: ctx_mmio_loop */
 	0xf4ff34c4,
 	0x00450e1b,
 	0x0653f002,
 	0xf80535fa,
-/* 0x084d: ctx_mmio_pull */
+/* 0x0851: ctx_mmio_pull */
 	0x804e9803,
 	0x7e814f98,
 	0xb600008f,
 	0x12b60830,
 	0xdf1bf401,
-/* 0x0860: ctx_mmio_done */
+/* 0x0864: ctx_mmio_done */
 	0x80160398,
 	0xf6028100,
 	0x04bd0003,
@@ -808,27 +809,27 @@ uint32_t gm107_grhub_code[] = {
 	0x13f00100,
 	0x0601fa06,
 	0x00f803f8,
-/* 0x087c: ctx_xfer */
+/* 0x0880: ctx_xfer */
 	0x0080040e,
 	0x0ef60302,
-/* 0x0887: ctx_xfer_idle */
+/* 0x088b: ctx_xfer_idle */
 	0x8e04bd00,
 	0xcf030000,
 	0xe4f100ee,
 	0x1bf42000,
 	0x0611f4f5,
-/* 0x089b: ctx_xfer_pre */
+/* 0x089f: ctx_xfer_pre */
 	0x0f0c02f4,
-	0x06f97e10,
+	0x06fd7e10,
 	0x1b11f400,
-/* 0x08a4: ctx_xfer_pre_load */
-	0xa87e020f,
-	0xb77e0006,
-	0xc97e0006,
+/* 0x08a8: ctx_xfer_pre_load */
+	0xac7e020f,
+	0xbb7e0006,
+	0xcd7e0006,
 	0xf4bd0006,
-	0x0006a87e,
-	0x0007307e,
-/* 0x08bc: ctx_xfer_exec */
+	0x0006ac7e,
+	0x0007347e,
+/* 0x08c0: ctx_xfer_exec */
 	0xbd160198,
 	0x05008024,
 	0x0002f601,
@@ -858,21 +859,21 @@ uint32_t gm107_grhub_code[] = {
 	0x01f40002,
 	0x7e0c0a12,
 	0x0f0000b8,
-	0x07187e05,
+	0x071c7e05,
 	0x2d02f400,
-/* 0x0938: ctx_xfer_post */
-	0xa87e020f,
+/* 0x093c: ctx_xfer_post */
+	0xac7e020f,
 	0xf4bd0006,
-	0x0006f97e,
+	0x0006fd7e,
 	0x0002277e,
-	0x0006b77e,
-	0xa87ef4bd,
+	0x0006bb7e,
+	0xac7ef4bd,
 	0x11f40006,
 	0x40019810,
 	0xf40511fd,
-	0x2e7e070b,
-/* 0x0962: ctx_xfer_no_post_mmio */
-/* 0x0962: ctx_xfer_done */
+	0x327e070b,
+/* 0x0966: ctx_xfer_no_post_mmio */
+/* 0x0966: ctx_xfer_done */
 	0x00f80008,
 	0x00000000,
 	0x00000000,
@@ -912,5 +913,4 @@ uint32_t gm107_grhub_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 1f81069edc58..c56a886229f1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -27,6 +27,8 @@
 
 #include <core/client.h>
 #include <core/option.h>
+#include <core/firmware.h>
+#include <subdev/secboot.h>
 #include <subdev/fb.h>
 #include <subdev/mc.h>
 #include <subdev/pmu.h>
@@ -1427,21 +1429,40 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
+	struct nvkm_secboot *sb = device->secboot;
 	int i;
 
 	if (gr->firmware) {
 		/* load fuc microcode */
 		nvkm_mc_unk260(device->mc, 0);
-		gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c, &gr->fuc409d);
-		gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac, &gr->fuc41ad);
+
+		/* securely-managed falcons must be reset using secure boot */
+		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
+			nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
+		else
+			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
+					 &gr->fuc409d);
+		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
+			nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
+		else
+			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
+					 &gr->fuc41ad);
+
 		nvkm_mc_unk260(device->mc, 1);
 
 		/* start both of them running */
 		nvkm_wr32(device, 0x409840, 0xffffffff);
 		nvkm_wr32(device, 0x41a10c, 0x00000000);
 		nvkm_wr32(device, 0x40910c, 0x00000000);
-		nvkm_wr32(device, 0x41a100, 0x00000002);
-		nvkm_wr32(device, 0x409100, 0x00000002);
+
+		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
+			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_GPCCS);
+		else
+			nvkm_wr32(device, 0x41a100, 0x00000002);
+		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
+			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_FECS);
+		else
+			nvkm_wr32(device, 0x409100, 0x00000002);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800) & 0x00000001)
 				break;
@@ -1683,6 +1704,12 @@ gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
 	fuc->data = NULL;
 }
 
+static void
+gf100_gr_dtor_init(struct gf100_gr_pack *pack)
+{
+	vfree(pack);
+}
+
 void *
 gf100_gr_dtor(struct nvkm_gr *base)
 {
@@ -1697,6 +1724,11 @@ gf100_gr_dtor(struct nvkm_gr *base)
 	gf100_gr_dtor_fw(&gr->fuc41ac);
 	gf100_gr_dtor_fw(&gr->fuc41ad);
 
+	gf100_gr_dtor_init(gr->fuc_bundle);
+	gf100_gr_dtor_init(gr->fuc_method);
+	gf100_gr_dtor_init(gr->fuc_sw_ctx);
+	gf100_gr_dtor_init(gr->fuc_sw_nonctx);
+
 	nvkm_memory_del(&gr->unk4188b8);
 	nvkm_memory_del(&gr->unk4188b4);
 	return gr;
@@ -1720,22 +1752,9 @@ gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	const struct firmware *fw;
-	char f[64];
-	char cname[16];
 	int ret;
-	int i;
 
-	/* Convert device name to lowercase */
-	strncpy(cname, device->chip->name, sizeof(cname));
-	cname[sizeof(cname) - 1] = '\0';
-	i = strlen(cname);
-	while (i) {
-		--i;
-		cname[i] = tolower(cname[i]);
-	}
-
-	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
-	ret = request_firmware(&fw, f, device->dev);
+	ret = nvkm_firmware_get(device, fwname, &fw);
 	if (ret) {
 		nvkm_error(subdev, "failed to load %s\n", fwname);
 		return ret;
@@ -1743,7 +1762,7 @@ gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
 
 	fuc->size = fw->size;
 	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
-	release_firmware(fw);
+	nvkm_firmware_put(fw);
 	return (fuc->data != NULL) ? 0 : -ENOMEM;
 }
 
@@ -1763,15 +1782,6 @@ gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 	if (ret)
 		return ret;
 
-	if (gr->firmware) {
-		nvkm_info(&gr->base.engine.subdev, "using external firmware\n");
-		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
-		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
-		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
-		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
-			return -ENODEV;
-	}
-
 	return 0;
 }
 
@@ -1780,10 +1790,25 @@ gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
 	      int index, struct nvkm_gr **pgr)
 {
 	struct gf100_gr *gr;
+	int ret;
+
 	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
 		return -ENOMEM;
 	*pgr = &gr->base;
-	return gf100_gr_ctor(func, device, index, gr);
+
+	ret = gf100_gr_ctor(func, device, index, gr);
+	if (ret)
+		return ret;
+
+	if (gr->firmware) {
+		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
+		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
+		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
+		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
+			return -ENODEV;
+	}
+
+	return 0;
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 02e78b8d93f6..f0c6acb0f8fd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -82,7 +82,7 @@ struct gf100_gr {
 
 	/*
 	 * Used if the register packs are loaded from NVIDIA fw instead of
-	 * using hardcoded arrays.
+	 * using hardcoded arrays. To be allocated with vzalloc().
 	 */
 	struct gf100_gr_pack *fuc_sw_nonctx;
 	struct gf100_gr_pack *fuc_sw_ctx;
@@ -138,12 +138,9 @@ int gf100_gr_init(struct gf100_gr *);
 
 int gk104_gr_init(struct gf100_gr *);
 
-int gk20a_gr_new_(const struct gf100_gr_func *, struct nvkm_device *,
-		  int, struct nvkm_gr **);
-void gk20a_gr_dtor(struct gf100_gr *);
 int gk20a_gr_init(struct gf100_gr *);
 
-int gm204_gr_init(struct gf100_gr *);
+int gm200_gr_init(struct gf100_gr *);
 
 #define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object)
 
@@ -204,6 +201,17 @@ void gf100_gr_icmd(struct gf100_gr *, const struct gf100_gr_pack *);
 void gf100_gr_mthd(struct gf100_gr *, const struct gf100_gr_pack *);
 int  gf100_gr_init_ctxctl(struct gf100_gr *);
 
+/* external bundles loading functions */
+int gk20a_gr_av_to_init(struct gf100_gr *, const char *,
+			struct gf100_gr_pack **);
+int gk20a_gr_aiv_to_init(struct gf100_gr *, const char *,
+			 struct gf100_gr_pack **);
+int gk20a_gr_av_to_method(struct gf100_gr *, const char *,
+			  struct gf100_gr_pack **);
+
+int gm200_gr_new_(const struct gf100_gr_func *, struct nvkm_device *, int,
+		  struct nvkm_gr **);
+
 /* register init value lists */
 
 extern const struct gf100_gr_init gf100_gr_init_main_0[];
@@ -279,6 +287,4 @@ extern const struct gf100_gr_init gm107_gr_init_l1c_0[];
 extern const struct gf100_gr_init gm107_gr_init_wwdx_0[];
 extern const struct gf100_gr_init gm107_gr_init_cbm_0[];
 void gm107_gr_init_bios(struct gf100_gr *);
-
-extern const struct gf100_gr_pack gm204_gr_pack_mmio[];
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index b8758d3b8b51..7ffb8a626196 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -26,37 +26,40 @@
 
 #include <nvif/class.h>
 
-static void
-gk20a_gr_init_dtor(struct gf100_gr_pack *pack)
-{
-	vfree(pack);
-}
-
 struct gk20a_fw_av
 {
 	u32 addr;
 	u32 data;
 };
 
-static struct gf100_gr_pack *
-gk20a_gr_av_to_init(struct gf100_gr_fuc *fuc)
+int
+gk20a_gr_av_to_init(struct gf100_gr *gr, const char *fw_name,
+		    struct gf100_gr_pack **ppack)
 {
+	struct gf100_gr_fuc fuc;
 	struct gf100_gr_init *init;
 	struct gf100_gr_pack *pack;
-	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
+	int nent;
+	int ret;
 	int i;
 
+	ret = gf100_gr_ctor_fw(gr, fw_name, &fuc);
+	if (ret)
+		return ret;
+
+	nent = (fuc.size / sizeof(struct gk20a_fw_av));
 	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
-	if (!pack)
-		return ERR_PTR(-ENOMEM);
+	if (!pack) {
+		ret = -ENOMEM;
+		goto end;
+	}
 
 	init = (void *)(pack + 2);
-
 	pack[0].init = init;
 
 	for (i = 0; i < nent; i++) {
 		struct gf100_gr_init *ent = &init[i];
-		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
+		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc.data)[i];
 
 		ent->addr = av->addr;
 		ent->data = av->data;
@@ -64,7 +67,11 @@ gk20a_gr_av_to_init(struct gf100_gr_fuc *fuc)
 		ent->pitch = 1;
 	}
 
-	return pack;
+	*ppack = pack;
+
+end:
+	gf100_gr_dtor_fw(&fuc);
+	return ret;
 }
 
 struct gk20a_fw_aiv
@@ -74,25 +81,34 @@ struct gk20a_fw_aiv
 	u32 data;
 };
 
-static struct gf100_gr_pack *
-gk20a_gr_aiv_to_init(struct gf100_gr_fuc *fuc)
+int
+gk20a_gr_aiv_to_init(struct gf100_gr *gr, const char *fw_name,
+		     struct gf100_gr_pack **ppack)
 {
+	struct gf100_gr_fuc fuc;
 	struct gf100_gr_init *init;
 	struct gf100_gr_pack *pack;
-	const int nent = (fuc->size / sizeof(struct gk20a_fw_aiv));
+	int nent;
+	int ret;
 	int i;
 
+	ret = gf100_gr_ctor_fw(gr, fw_name, &fuc);
+	if (ret)
+		return ret;
+
+	nent = (fuc.size / sizeof(struct gk20a_fw_aiv));
 	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
-	if (!pack)
-		return ERR_PTR(-ENOMEM);
+	if (!pack) {
+		ret = -ENOMEM;
+		goto end;
+	}
 
 	init = (void *)(pack + 2);
-
 	pack[0].init = init;
 
 	for (i = 0; i < nent; i++) {
 		struct gf100_gr_init *ent = &init[i];
-		struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)fuc->data)[i];
+		struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)fuc.data)[i];
 
 		ent->addr = av->addr;
 		ent->data = av->data;
@@ -100,30 +116,45 @@ gk20a_gr_aiv_to_init(struct gf100_gr_fuc *fuc)
 		ent->pitch = 1;
 	}
 
-	return pack;
+	*ppack = pack;
+
+end:
+	gf100_gr_dtor_fw(&fuc);
+	return ret;
 }
 
-static struct gf100_gr_pack *
-gk20a_gr_av_to_method(struct gf100_gr_fuc *fuc)
+int
+gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name,
+		      struct gf100_gr_pack **ppack)
 {
+	struct gf100_gr_fuc fuc;
 	struct gf100_gr_init *init;
 	struct gf100_gr_pack *pack;
 	/* We don't suppose we will initialize more than 16 classes here... */
 	static const unsigned int max_classes = 16;
-	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
-	int i, classidx = 0;
-	u32 prevclass = 0;
+	u32 classidx = 0, prevclass = 0;
+	int nent;
+	int ret;
+	int i;
+
+	ret = gf100_gr_ctor_fw(gr, fw_name, &fuc);
+	if (ret)
+		return ret;
+
+	nent = (fuc.size / sizeof(struct gk20a_fw_av));
 
 	pack = vzalloc((sizeof(*pack) * max_classes) +
 		       (sizeof(*init) * (nent + 1)));
-	if (!pack)
-		return ERR_PTR(-ENOMEM);
+	if (!pack) {
+		ret = -ENOMEM;
+		goto end;
+	}
 
 	init = (void *)(pack + max_classes);
 
 	for (i = 0; i < nent; i++) {
 		struct gf100_gr_init *ent = &init[i];
-		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
+		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc.data)[i];
 		u32 class = av->addr & 0xffff;
 		u32 addr = (av->addr & 0xffff0000) >> 14;
 
@@ -133,7 +164,8 @@ gk20a_gr_av_to_method(struct gf100_gr_fuc *fuc)
 			prevclass = class;
 			if (++classidx >= max_classes) {
 				vfree(pack);
-				return ERR_PTR(-ENOSPC);
+				ret = -ENOSPC;
+				goto end;
 			}
 		}
 
@@ -143,7 +175,11 @@ gk20a_gr_av_to_method(struct gf100_gr_fuc *fuc)
 		ent->pitch = 1;
 	}
 
-	return pack;
+	*ppack = pack;
+
+end:
+	gf100_gr_dtor_fw(&fuc);
+	return ret;
 }
 
 static int
@@ -273,20 +309,24 @@ gk20a_gr_init(struct gf100_gr *gr)
 	return gf100_gr_init_ctxctl(gr);
 }
 
-void
-gk20a_gr_dtor(struct gf100_gr *gr)
-{
-	gk20a_gr_init_dtor(gr->fuc_method);
-	gk20a_gr_init_dtor(gr->fuc_bundle);
-	gk20a_gr_init_dtor(gr->fuc_sw_ctx);
-	gk20a_gr_init_dtor(gr->fuc_sw_nonctx);
-}
+static const struct gf100_gr_func
+gk20a_gr = {
+	.init = gk20a_gr_init,
+	.set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
+	.ppc_nr = 1,
+	.grctx = &gk20a_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_A },
+		{ -1, -1, KEPLER_C, &gf100_fermi },
+		{ -1, -1, KEPLER_COMPUTE_A },
+		{}
+	}
+};
 
 int
-gk20a_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
-	      int index, struct nvkm_gr **pgr)
+gk20a_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
 {
-	struct gf100_gr_fuc fuc;
 	struct gf100_gr *gr;
 	int ret;
 
@@ -294,63 +334,32 @@ gk20a_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
 		return -ENOMEM;
 	*pgr = &gr->base;
 
-	ret = gf100_gr_ctor(func, device, index, gr);
+	ret = gf100_gr_ctor(&gk20a_gr, device, index, gr);
 	if (ret)
 		return ret;
 
-	ret = gf100_gr_ctor_fw(gr, "sw_nonctx", &fuc);
+	if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
+	    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
+	    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
+	    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
+		return -ENODEV;
+
+	ret = gk20a_gr_av_to_init(gr, "sw_nonctx", &gr->fuc_sw_nonctx);
 	if (ret)
 		return ret;
-	gr->fuc_sw_nonctx = gk20a_gr_av_to_init(&fuc);
-	gf100_gr_dtor_fw(&fuc);
-	if (IS_ERR(gr->fuc_sw_nonctx))
-		return PTR_ERR(gr->fuc_sw_nonctx);
 
-	ret = gf100_gr_ctor_fw(gr, "sw_ctx", &fuc);
+	ret = gk20a_gr_aiv_to_init(gr, "sw_ctx", &gr->fuc_sw_ctx);
 	if (ret)
 		return ret;
-	gr->fuc_sw_ctx = gk20a_gr_aiv_to_init(&fuc);
-	gf100_gr_dtor_fw(&fuc);
-	if (IS_ERR(gr->fuc_sw_ctx))
-		return PTR_ERR(gr->fuc_sw_ctx);
 
-	ret = gf100_gr_ctor_fw(gr, "sw_bundle_init", &fuc);
+	ret = gk20a_gr_av_to_init(gr, "sw_bundle_init", &gr->fuc_bundle);
 	if (ret)
 		return ret;
-	gr->fuc_bundle = gk20a_gr_av_to_init(&fuc);
-	gf100_gr_dtor_fw(&fuc);
-	if (IS_ERR(gr->fuc_bundle))
-		return PTR_ERR(gr->fuc_bundle);
 
-	ret = gf100_gr_ctor_fw(gr, "sw_method_init", &fuc);
+	ret = gk20a_gr_av_to_method(gr, "sw_method_init", &gr->fuc_method);
 	if (ret)
 		return ret;
-	gr->fuc_method = gk20a_gr_av_to_method(&fuc);
-	gf100_gr_dtor_fw(&fuc);
-	if (IS_ERR(gr->fuc_method))
-		return PTR_ERR(gr->fuc_method);
 
-	return 0;
-}
 
-static const struct gf100_gr_func
-gk20a_gr = {
-	.dtor = gk20a_gr_dtor,
-	.init = gk20a_gr_init,
-	.set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
-	.ppc_nr = 1,
-	.grctx = &gk20a_grctx,
-	.sclass = {
-		{ -1, -1, FERMI_TWOD_A },
-		{ -1, -1, KEPLER_INLINE_TO_MEMORY_A },
-		{ -1, -1, KEPLER_C, &gf100_fermi },
-		{ -1, -1, KEPLER_COMPUTE_A },
-		{}
-	}
-};
-
-int
-gk20a_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
-{
-	return gk20a_gr_new_(&gk20a_gr, device, index, pgr);
+	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
new file mode 100644
index 000000000000..058fc1d22c09
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "gf100.h"
+#include "ctxgf100.h"
+
+#include <subdev/secboot.h>
+
+#include <nvif/class.h>
+
+/*******************************************************************************
+ * PGRAPH engine/subdev functions
+ ******************************************************************************/
+
+int
+gm200_gr_init(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
+	u32 data[TPC_MAX / 8] = {}, tmp;
+	u8  tpcnr[GPC_MAX];
+	int gpc, tpc, ppc, rop;
+	int i;
+
+	tmp = nvkm_rd32(device, 0x100c80); /*XXX: mask? */
+	nvkm_wr32(device, 0x418880, 0x00001000 | (tmp & 0x00000fff));
+	nvkm_wr32(device, 0x418890, 0x00000000);
+	nvkm_wr32(device, 0x418894, 0x00000000);
+	nvkm_wr32(device, 0x4188b4, nvkm_memory_addr(gr->unk4188b4) >> 8);
+	nvkm_wr32(device, 0x4188b8, nvkm_memory_addr(gr->unk4188b8) >> 8);
+	nvkm_mask(device, 0x4188b0, 0x00040000, 0x00040000);
+
+	/*XXX: belongs in fb */
+	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(gr->unk4188b4) >> 8);
+	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(gr->unk4188b8) >> 8);
+	nvkm_mask(device, 0x100cc4, 0x00040000, 0x00040000);
+
+	gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
+
+	gm107_gr_init_bios(gr);
+
+	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
+
+	memset(data, 0x00, sizeof(data));
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
+		do {
+			gpc = (gpc + 1) % gr->gpc_nr;
+		} while (!tpcnr[gpc]);
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
+
+		data[i / 8] |= tpc << ((i % 8) * 4);
+	}
+
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+			gr->tpc_total);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
+	}
+
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+	nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
+
+	nvkm_wr32(device, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400124, 0x00000002);
+	nvkm_wr32(device, 0x409c24, 0x000e0000);
+	nvkm_wr32(device, 0x405848, 0xc0000000);
+	nvkm_wr32(device, 0x40584c, 0x00000001);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x407020, 0x40000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++)
+			nvkm_wr32(device, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
+		}
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+	}
+
+	for (rop = 0; rop < gr->rop_nr; rop++) {
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
+	}
+
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
+
+	nvkm_wr32(device, 0x400054, 0x2c350f63);
+
+	gf100_gr_zbc_init(gr);
+
+	return gf100_gr_init_ctxctl(gr);
+}
+
+int
+gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
+	      int index, struct nvkm_gr **pgr)
+{
+	struct gf100_gr *gr;
+	int ret;
+
+	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
+		return -ENOMEM;
+	*pgr = &gr->base;
+
+	ret = gf100_gr_ctor(func, device, index, gr);
+	if (ret)
+		return ret;
+
+	/* Load firmwares for non-secure falcons */
+	if (!nvkm_secboot_is_managed(device->secboot,
+				     NVKM_SECBOOT_FALCON_FECS)) {
+		if ((ret = gf100_gr_ctor_fw(gr, "gr/fecs_inst", &gr->fuc409c)) ||
+		    (ret = gf100_gr_ctor_fw(gr, "gr/fecs_data", &gr->fuc409d)))
+			return ret;
+	}
+	if (!nvkm_secboot_is_managed(device->secboot,
+				     NVKM_SECBOOT_FALCON_GPCCS)) {
+		if ((ret = gf100_gr_ctor_fw(gr, "gr/gpccs_inst", &gr->fuc41ac)) ||
+		    (ret = gf100_gr_ctor_fw(gr, "gr/gpccs_data", &gr->fuc41ad)))
+			return ret;
+	}
+
+	if ((ret = gk20a_gr_av_to_init(gr, "gr/sw_nonctx", &gr->fuc_sw_nonctx)) ||
+	    (ret = gk20a_gr_aiv_to_init(gr, "gr/sw_ctx", &gr->fuc_sw_ctx)) ||
+	    (ret = gk20a_gr_av_to_init(gr, "gr/sw_bundle_init", &gr->fuc_bundle)) ||
+	    (ret = gk20a_gr_av_to_method(gr, "gr/sw_method_init", &gr->fuc_method)))
+		return ret;
+
+	return 0;
+}
+
+static const struct gf100_gr_func
+gm200_gr = {
+	.init = gm200_gr_init,
+	.ppc_nr = 2,
+	.grctx = &gm200_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, MAXWELL_B, &gf100_fermi },
+		{ -1, -1, MAXWELL_COMPUTE_B },
+		{}
+	}
+};
+
+int
+gm200_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gm200_gr_new_(&gm200_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c
deleted file mode 100644
index 90381dde451a..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c
+++ /dev/null
@@ -1,373 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "gf100.h"
-#include "ctxgf100.h"
-
-#include <nvif/class.h>
-
-/*******************************************************************************
- * PGRAPH register lists
- ******************************************************************************/
-
-static const struct gf100_gr_init
-gm204_gr_init_main_0[] = {
-	{ 0x400080,   1, 0x04, 0x003003e2 },
-	{ 0x400088,   1, 0x04, 0xe007bfe7 },
-	{ 0x40008c,   1, 0x04, 0x00060000 },
-	{ 0x400090,   1, 0x04, 0x00000030 },
-	{ 0x40013c,   1, 0x04, 0x003901f3 },
-	{ 0x400140,   1, 0x04, 0x00000100 },
-	{ 0x400144,   1, 0x04, 0x00000000 },
-	{ 0x400148,   1, 0x04, 0x00000110 },
-	{ 0x400138,   1, 0x04, 0x00000000 },
-	{ 0x400130,   2, 0x04, 0x00000000 },
-	{ 0x400124,   1, 0x04, 0x00000002 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_gr_init_fe_0[] = {
-	{ 0x40415c,   1, 0x04, 0x00000000 },
-	{ 0x404170,   1, 0x04, 0x00000000 },
-	{ 0x4041b4,   1, 0x04, 0x00000000 },
-	{ 0x4041b8,   1, 0x04, 0x00000010 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_gr_init_ds_0[] = {
-	{ 0x40583c,   1, 0x04, 0x00000000 },
-	{ 0x405844,   1, 0x04, 0x00ffffff },
-	{ 0x40584c,   1, 0x04, 0x00000001 },
-	{ 0x405850,   1, 0x04, 0x00000000 },
-	{ 0x405900,   1, 0x04, 0x00000000 },
-	{ 0x405908,   1, 0x04, 0x00000000 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_gr_init_sked_0[] = {
-	{ 0x407010,   1, 0x04, 0x00000000 },
-	{ 0x407040,   1, 0x04, 0x80440434 },
-	{ 0x407048,   1, 0x04, 0x00000008 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_gr_init_tpccs_0[] = {
-	{ 0x419d60,   1, 0x04, 0x0000003f },
-	{ 0x419d88,   3, 0x04, 0x00000000 },
-	{ 0x419dc4,   1, 0x04, 0x00000000 },
-	{ 0x419dc8,   1, 0x04, 0x00000501 },
-	{ 0x419dd0,   1, 0x04, 0x00000000 },
-	{ 0x419dd4,   1, 0x04, 0x00000100 },
-	{ 0x419dd8,   1, 0x04, 0x00000001 },
-	{ 0x419ddc,   1, 0x04, 0x00000002 },
-	{ 0x419de0,   1, 0x04, 0x00000001 },
-	{ 0x419de8,   1, 0x04, 0x000000cc },
-	{ 0x419dec,   1, 0x04, 0x00000000 },
-	{ 0x419df0,   1, 0x04, 0x000000cc },
-	{ 0x419df4,   1, 0x04, 0x00000000 },
-	{ 0x419d0c,   1, 0x04, 0x00000000 },
-	{ 0x419d10,   1, 0x04, 0x00000014 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_gr_init_pe_0[] = {
-	{ 0x419900,   1, 0x04, 0x000000ff },
-	{ 0x419810,   1, 0x04, 0x00000000 },
-	{ 0x41980c,   1, 0x04, 0x00000010 },
-	{ 0x419844,   1, 0x04, 0x00000000 },
-	{ 0x419838,   1, 0x04, 0x000000ff },
-	{ 0x419850,   1, 0x04, 0x00000004 },
-	{ 0x419854,   2, 0x04, 0x00000000 },
-	{ 0x419894,   3, 0x04, 0x00100401 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_gr_init_sm_0[] = {
-	{ 0x419e30,   1, 0x04, 0x000000ff },
-	{ 0x419e00,   1, 0x04, 0x00000000 },
-	{ 0x419ea0,   1, 0x04, 0x00000000 },
-	{ 0x419ee4,   1, 0x04, 0x00000000 },
-	{ 0x419ea4,   1, 0x04, 0x00000100 },
-	{ 0x419ea8,   1, 0x04, 0x00000000 },
-	{ 0x419ee8,   1, 0x04, 0x00000091 },
-	{ 0x419eb4,   1, 0x04, 0x00000000 },
-	{ 0x419ebc,   2, 0x04, 0x00000000 },
-	{ 0x419edc,   1, 0x04, 0x000c1810 },
-	{ 0x419ed8,   1, 0x04, 0x00000000 },
-	{ 0x419ee0,   1, 0x04, 0x00000000 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_gr_init_l1c_1[] = {
-	{ 0x419cf8,   2, 0x04, 0x00000000 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_gr_init_sm_1[] = {
-	{ 0x419f74,   1, 0x04, 0x00055155 },
-	{ 0x419f80,   4, 0x04, 0x00000000 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_gr_init_l1c_2[] = {
-	{ 0x419ccc,   2, 0x04, 0x00000000 },
-	{ 0x419c80,   1, 0x04, 0x3f006022 },
-	{ 0x419c88,   1, 0x04, 0x00210000 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_gr_init_pes_0[] = {
-	{ 0x41be50,   1, 0x04, 0x000000ff },
-	{ 0x41be04,   1, 0x04, 0x00000000 },
-	{ 0x41be08,   1, 0x04, 0x00000004 },
-	{ 0x41be0c,   1, 0x04, 0x00000008 },
-	{ 0x41be10,   1, 0x04, 0x2e3b8bc7 },
-	{ 0x41be14,   2, 0x04, 0x00000000 },
-	{ 0x41be3c,   5, 0x04, 0x00100401 },
-	{}
-};
-
-static const struct gf100_gr_init
-gm204_gr_init_be_0[] = {
-	{ 0x408890,   1, 0x04, 0x000000ff },
-	{ 0x40880c,   1, 0x04, 0x00000000 },
-	{ 0x408850,   1, 0x04, 0x00000004 },
-	{ 0x408878,   1, 0x04, 0x01b4201c },
-	{ 0x40887c,   1, 0x04, 0x80004c55 },
-	{ 0x408880,   1, 0x04, 0x0018c258 },
-	{ 0x408884,   1, 0x04, 0x0000160f },
-	{ 0x408974,   1, 0x04, 0x000000ff },
-	{ 0x408910,   9, 0x04, 0x00000000 },
-	{ 0x408950,   1, 0x04, 0x00000000 },
-	{ 0x408954,   1, 0x04, 0x0000ffff },
-	{ 0x408958,   1, 0x04, 0x00000034 },
-	{ 0x40895c,   1, 0x04, 0x84b17403 },
-	{ 0x408960,   1, 0x04, 0x04c1884f },
-	{ 0x408964,   1, 0x04, 0x04714445 },
-	{ 0x408968,   1, 0x04, 0x0280802f },
-	{ 0x40896c,   1, 0x04, 0x04304856 },
-	{ 0x408970,   1, 0x04, 0x00012800 },
-	{ 0x408984,   1, 0x04, 0x00000000 },
-	{ 0x408988,   1, 0x04, 0x08040201 },
-	{ 0x40898c,   1, 0x04, 0x80402010 },
-	{}
-};
-
-const struct gf100_gr_pack
-gm204_gr_pack_mmio[] = {
-	{ gm204_gr_init_main_0 },
-	{ gm204_gr_init_fe_0 },
-	{ gf100_gr_init_pri_0 },
-	{ gf100_gr_init_rstr2d_0 },
-	{ gf100_gr_init_pd_0 },
-	{ gm204_gr_init_ds_0 },
-	{ gm107_gr_init_scc_0 },
-	{ gm204_gr_init_sked_0 },
-	{ gk110_gr_init_cwd_0 },
-	{ gm107_gr_init_prop_0 },
-	{ gk208_gr_init_gpc_unk_0 },
-	{ gf100_gr_init_setup_0 },
-	{ gf100_gr_init_crstr_0 },
-	{ gm107_gr_init_setup_1 },
-	{ gm107_gr_init_zcull_0 },
-	{ gf100_gr_init_gpm_0 },
-	{ gm107_gr_init_gpc_unk_1 },
-	{ gf100_gr_init_gcc_0 },
-	{ gm204_gr_init_tpccs_0 },
-	{ gm107_gr_init_tex_0 },
-	{ gm204_gr_init_pe_0 },
-	{ gm107_gr_init_l1c_0 },
-	{ gf100_gr_init_mpc_0 },
-	{ gm204_gr_init_sm_0 },
-	{ gm204_gr_init_l1c_1 },
-	{ gm204_gr_init_sm_1 },
-	{ gm204_gr_init_l1c_2 },
-	{ gm204_gr_init_pes_0 },
-	{ gm107_gr_init_wwdx_0 },
-	{ gm107_gr_init_cbm_0 },
-	{ gm204_gr_init_be_0 },
-	{}
-};
-
-const struct gf100_gr_pack *
-gm204_gr_data[] = {
-	gm204_gr_pack_mmio,
-	NULL
-};
-
-/*******************************************************************************
- * PGRAPH engine/subdev functions
- ******************************************************************************/
-
-static int
-gm204_gr_init_ctxctl(struct gf100_gr *gr)
-{
-	return 0;
-}
-
-int
-gm204_gr_init(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
-	u32 data[TPC_MAX / 8] = {}, tmp;
-	u8  tpcnr[GPC_MAX];
-	int gpc, tpc, ppc, rop;
-	int i;
-
-	tmp = nvkm_rd32(device, 0x100c80); /*XXX: mask? */
-	nvkm_wr32(device, 0x418880, 0x00001000 | (tmp & 0x00000fff));
-	nvkm_wr32(device, 0x418890, 0x00000000);
-	nvkm_wr32(device, 0x418894, 0x00000000);
-	nvkm_wr32(device, 0x4188b4, nvkm_memory_addr(gr->unk4188b4) >> 8);
-	nvkm_wr32(device, 0x4188b8, nvkm_memory_addr(gr->unk4188b8) >> 8);
-	nvkm_mask(device, 0x4188b0, 0x00040000, 0x00040000);
-
-	/*XXX: belongs in fb */
-	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(gr->unk4188b4) >> 8);
-	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(gr->unk4188b8) >> 8);
-	nvkm_mask(device, 0x100cc4, 0x00040000, 0x00040000);
-
-	gf100_gr_mmio(gr, gr->func->mmio);
-
-	gm107_gr_init_bios(gr);
-
-	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
-
-	memset(data, 0x00, sizeof(data));
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while (!tpcnr[gpc]);
-		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
-		data[i / 8] |= tpc << ((i % 8) * 4);
-	}
-
-	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
-	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
-	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
-	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
-			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
-			gr->tpc_total);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
-	}
-
-	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
-	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
-	nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
-
-	nvkm_wr32(device, 0x400500, 0x00010001);
-	nvkm_wr32(device, 0x400100, 0xffffffff);
-	nvkm_wr32(device, 0x40013c, 0xffffffff);
-	nvkm_wr32(device, 0x400124, 0x00000002);
-	nvkm_wr32(device, 0x409c24, 0x000e0000);
-	nvkm_wr32(device, 0x405848, 0xc0000000);
-	nvkm_wr32(device, 0x40584c, 0x00000001);
-	nvkm_wr32(device, 0x404000, 0xc0000000);
-	nvkm_wr32(device, 0x404600, 0xc0000000);
-	nvkm_wr32(device, 0x408030, 0xc0000000);
-	nvkm_wr32(device, 0x404490, 0xc0000000);
-	nvkm_wr32(device, 0x406018, 0xc0000000);
-	nvkm_wr32(device, 0x407020, 0x40000000);
-	nvkm_wr32(device, 0x405840, 0xc0000000);
-	nvkm_wr32(device, 0x405844, 0x00ffffff);
-	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
-
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++)
-			nvkm_wr32(device, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
-		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
-		}
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
-	}
-
-	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
-	}
-
-	nvkm_wr32(device, 0x400108, 0xffffffff);
-	nvkm_wr32(device, 0x400138, 0xffffffff);
-	nvkm_wr32(device, 0x400118, 0xffffffff);
-	nvkm_wr32(device, 0x400130, 0xffffffff);
-	nvkm_wr32(device, 0x40011c, 0xffffffff);
-	nvkm_wr32(device, 0x400134, 0xffffffff);
-
-	nvkm_wr32(device, 0x400054, 0x2c350f63);
-
-	gf100_gr_zbc_init(gr);
-
-	return gm204_gr_init_ctxctl(gr);
-}
-
-static const struct gf100_gr_func
-gm204_gr = {
-	.init = gm204_gr_init,
-	.mmio = gm204_gr_pack_mmio,
-	.ppc_nr = 2,
-	.grctx = &gm204_grctx,
-	.sclass = {
-		{ -1, -1, FERMI_TWOD_A },
-		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
-		{ -1, -1, MAXWELL_B, &gf100_fermi },
-		{ -1, -1, MAXWELL_COMPUTE_B },
-		{}
-	}
-};
-
-int
-gm204_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
-{
-	return gf100_gr_new_(&gm204_gr, device, index, pgr);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
index 65b6e3d1e90d..29732bc14415 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
@@ -32,12 +32,15 @@ gm20b_gr_init_gpc_mmu(struct gf100_gr *gr)
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 val;
 
-	/* TODO this needs to be removed once secure boot works */
-	if (1) {
+	/* Bypass MMU check for non-secure boot */
+	if (!device->secboot) {
 		nvkm_wr32(device, 0x100ce4, 0xffffffff);
+
+		if (nvkm_rd32(device, 0x100ce4) != 0xffffffff)
+			nvdev_warn(device,
+			  "cannot bypass secure boot - expect failure soon!\n");
 	}
 
-	/* TODO update once secure boot works */
 	val = nvkm_rd32(device, 0x100c80);
 	val &= 0xf000087f;
 	nvkm_wr32(device, 0x418880, val);
@@ -61,7 +64,6 @@ gm20b_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
 
 static const struct gf100_gr_func
 gm20b_gr = {
-	.dtor = gk20a_gr_dtor,
 	.init = gk20a_gr_init,
 	.init_gpc_mmu = gm20b_gr_init_gpc_mmu,
 	.set_hww_esr_report_mask = gm20b_gr_set_hww_esr_report_mask,
@@ -79,5 +81,5 @@ gm20b_gr = {
 int
 gm20b_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
 {
-	return gk20a_gr_new_(&gm20b_gr, device, index, pgr);
+	return gm200_gr_new_(&gm20b_gr, device, index, pgr);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/msenc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/msenc/Kbuild
new file mode 100644
index 000000000000..b5119564f608
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/msenc/Kbuild
@@ -0,0 +1 @@
+#nvkm-y += nvkm/engine/msenc/base.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild
new file mode 100644
index 000000000000..13b7c71ff900
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild
@@ -0,0 +1 @@
+#nvkm-y += nvkm/engine/nvdec/base.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/Kbuild
new file mode 100644
index 000000000000..ad8f1820fa53
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/Kbuild
@@ -0,0 +1 @@
+#nvkm-y += nvkm/engine/nvenc/base.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c
index 4bef72a9d106..3fda594700e0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c
@@ -59,9 +59,11 @@ static void
 nv40_perfctr_next(struct nvkm_pm *pm, struct nvkm_perfdom *dom)
 {
 	struct nvkm_device *device = pm->engine.subdev.device;
-	if (pm->sequence != pm->sequence) {
+	struct nv40_pm *nv40pm = container_of(pm, struct nv40_pm, base);
+
+	if (nv40pm->sequence != pm->sequence) {
 		nvkm_wr32(device, 0x400084, 0x00000020);
-		pm->sequence = pm->sequence;
+		nv40pm->sequence = pm->sequence;
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/vic/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/vic/Kbuild
new file mode 100644
index 000000000000..ed4fb6488013
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/vic/Kbuild
@@ -0,0 +1 @@
+#nvkm-y += nvkm/engine/vic/base.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
index ee2c38f50ef5..642d27dc99a3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
@@ -8,6 +8,7 @@ include $(src)/nvkm/subdev/fuse/Kbuild
 include $(src)/nvkm/subdev/gpio/Kbuild
 include $(src)/nvkm/subdev/i2c/Kbuild
 include $(src)/nvkm/subdev/ibus/Kbuild
+include $(src)/nvkm/subdev/iccsense/Kbuild
 include $(src)/nvkm/subdev/instmem/Kbuild
 include $(src)/nvkm/subdev/ltc/Kbuild
 include $(src)/nvkm/subdev/mc/Kbuild
@@ -15,6 +16,7 @@ include $(src)/nvkm/subdev/mmu/Kbuild
 include $(src)/nvkm/subdev/mxm/Kbuild
 include $(src)/nvkm/subdev/pci/Kbuild
 include $(src)/nvkm/subdev/pmu/Kbuild
+include $(src)/nvkm/subdev/secboot/Kbuild
 include $(src)/nvkm/subdev/therm/Kbuild
 include $(src)/nvkm/subdev/timer/Kbuild
 include $(src)/nvkm/subdev/volt/Kbuild
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
index 64730d5e9351..dbcb0ef21587 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
@@ -10,6 +10,7 @@ nvkm-y += nvkm/subdev/bios/extdev.o
 nvkm-y += nvkm/subdev/bios/fan.o
 nvkm-y += nvkm/subdev/bios/gpio.o
 nvkm-y += nvkm/subdev/bios/i2c.o
+nvkm-y += nvkm/subdev/bios/iccsense.o
 nvkm-y += nvkm/subdev/bios/image.o
 nvkm-y += nvkm/subdev/bios/init.o
 nvkm-y += nvkm/subdev/bios/mxm.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/extdev.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/extdev.c
index c9e6f6ff7c50..b8578359e61b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/extdev.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/extdev.c
@@ -32,7 +32,7 @@ extdev_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *len, u8 *cnt)
 	u16 dcb, extdev = 0;
 
 	dcb = dcb_table(bios, &dcb_ver, &dcb_hdr, &dcb_cnt, &dcb_len);
-	if (!dcb || (dcb_ver != 0x30 && dcb_ver != 0x40))
+	if (!dcb || (dcb_ver != 0x30 && dcb_ver != 0x40 && dcb_ver != 0x41))
 		return 0x0000;
 
 	extdev = nvbios_rd16(bios, dcb + 18);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c
new file mode 100644
index 000000000000..084328028af1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2015 Martin Peres
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Martin Peres
+ */
+#include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/iccsense.h>
+
+static u16
+nvbios_iccsense_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt,
+		      u8 *len)
+{
+	struct bit_entry bit_P;
+	u16 iccsense;
+
+	if (bit_entry(bios, 'P', &bit_P) || bit_P.version != 2 ||
+	    bit_P.length < 0x2c)
+		return 0;
+
+	iccsense = nvbios_rd16(bios, bit_P.offset + 0x28);
+	if (!iccsense)
+		return 0;
+
+	*ver = nvbios_rd08(bios, iccsense + 0);
+	switch (*ver) {
+	case 0x10:
+	case 0x20:
+		*hdr = nvbios_rd08(bios, iccsense + 1);
+		*len = nvbios_rd08(bios, iccsense + 2);
+		*cnt = nvbios_rd08(bios, iccsense + 3);
+		return iccsense;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int
+nvbios_iccsense_parse(struct nvkm_bios *bios, struct nvbios_iccsense *iccsense)
+{
+	struct nvkm_subdev *subdev = &bios->subdev;
+	u8 ver, hdr, cnt, len, i;
+	u16 table, entry;
+
+	table = nvbios_iccsense_table(bios, &ver, &hdr, &cnt, &len);
+	if (!table || !cnt)
+		return -EINVAL;
+
+	if (ver != 0x10 && ver != 0x20) {
+		nvkm_error(subdev, "ICCSENSE version 0x%02x unknown\n", ver);
+		return -EINVAL;
+	}
+
+	iccsense->nr_entry = cnt;
+	iccsense->rail = kmalloc(sizeof(struct pwr_rail_t) * cnt, GFP_KERNEL);
+	if (!iccsense->rail)
+		return -ENOMEM;
+
+	for (i = 0; i < cnt; ++i) {
+		struct pwr_rail_t *rail = &iccsense->rail[i];
+		entry = table + hdr + i * len;
+
+		switch(ver) {
+		case 0x10:
+			rail->mode = nvbios_rd08(bios, entry + 0x1);
+			rail->extdev_id = nvbios_rd08(bios, entry + 0x2);
+			rail->resistor_mohm = nvbios_rd08(bios, entry + 0x3);
+			rail->rail = nvbios_rd08(bios, entry + 0x4);
+			break;
+		case 0x20:
+			rail->mode = nvbios_rd08(bios, entry);
+			rail->extdev_id = nvbios_rd08(bios, entry + 0x1);
+			rail->resistor_mohm = nvbios_rd08(bios, entry + 0x5);
+			rail->rail = nvbios_rd08(bios, entry + 0x6);
+			break;
+		};
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c
index a7d69ce7abc1..38ed09fd3d2f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c
@@ -786,20 +786,20 @@ init_io_flag_condition(struct nvbios_init *init)
 }
 
 /**
- * INIT_DP_CONDITION - opcode 0x3a
+ * INIT_GENERIC_CONDITION - opcode 0x3a
  *
  */
 static void
-init_dp_condition(struct nvbios_init *init)
+init_generic_condition(struct nvbios_init *init)
 {
 	struct nvkm_bios *bios = init->bios;
 	struct nvbios_dpout info;
 	u8  cond = nvbios_rd08(bios, init->offset + 1);
-	u8  unkn = nvbios_rd08(bios, init->offset + 2);
+	u8  size = nvbios_rd08(bios, init->offset + 2);
 	u8  ver, hdr, cnt, len;
 	u16 data;
 
-	trace("DP_CONDITION\t0x%02x 0x%02x\n", cond, unkn);
+	trace("GENERIC_CONDITION\t0x%02x 0x%02x\n", cond, size);
 	init->offset += 3;
 
 	switch (cond) {
@@ -828,7 +828,8 @@ init_dp_condition(struct nvbios_init *init)
 			init_exec_set(init, false);
 		break;
 	default:
-		warn("unknown dp condition 0x%02x\n", cond);
+		warn("INIT_GENERIC_CONDITON: unknown 0x%02x\n", cond);
+		init->offset += size;
 		break;
 	}
 }
@@ -2205,7 +2206,7 @@ static struct nvbios_init_opcode {
 	[0x37] = { init_copy },
 	[0x38] = { init_not },
 	[0x39] = { init_io_flag_condition },
-	[0x3a] = { init_dp_condition },
+	[0x3a] = { init_generic_condition },
 	[0x3b] = { init_io_mask_or },
 	[0x3c] = { init_io_or },
 	[0x47] = { init_andn_reg },
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild
index ed7717bcc3a1..87d94883f790 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/Kbuild
@@ -8,6 +8,7 @@ nvkm-y += nvkm/subdev/clk/mcp77.o
 nvkm-y += nvkm/subdev/clk/gf100.o
 nvkm-y += nvkm/subdev/clk/gk104.o
 nvkm-y += nvkm/subdev/clk/gk20a.o
+nvkm-y += nvkm/subdev/clk/gm20b.o
 
 nvkm-y += nvkm/subdev/clk/pllnv04.o
 nvkm-y += nvkm/subdev/clk/pllgt215.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
index 5da2aa8cc333..5f0ee24e31b8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -22,19 +22,17 @@
  * Shamelessly ripped off from ChromeOS's gk20a/clk_pllg.c
  *
  */
-#define gk20a_clk(p) container_of((p), struct gk20a_clk, base)
 #include "priv.h"
+#include "gk20a.h"
 
 #include <core/tegra.h>
 #include <subdev/timer.h>
 
-#define MHZ (1000 * 1000)
+#define KHZ (1000)
+#define MHZ (KHZ * 1000)
 
 #define MASK(w)	((1 << w) - 1)
 
-#define SYS_GPCPLL_CFG_BASE			0x00137000
-#define GPC_BCASE_GPCPLL_CFG_BASE		0x00132800
-
 #define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
 #define GPCPLL_CFG_ENABLE	BIT(0)
 #define GPCPLL_CFG_IDDQ		BIT(1)
@@ -56,6 +54,7 @@
 #define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
 #define GPCPLL_CFG3_PLL_STEPB_SHIFT	16
 
+#define GPC_BCASE_GPCPLL_CFG_BASE		0x00132800
 #define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
 #define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
 #define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
@@ -75,7 +74,7 @@
 #define GPC2CLK_OUT_VCODIV1		0
 #define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
 					GPC2CLK_OUT_VCODIV_SHIFT)
-#define	GPC2CLK_OUT_BYPDIV_WIDTH	6
+#define GPC2CLK_OUT_BYPDIV_WIDTH	6
 #define GPC2CLK_OUT_BYPDIV_SHIFT	0
 #define GPC2CLK_OUT_BYPDIV31		0x3c
 #define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
@@ -92,45 +91,49 @@
 #define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
 	    (0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
 
-static const u8 pl_to_div[] = {
+static const u8 _pl_to_div[] = {
 /* PL:   0, 1, 2, 3, 4, 5, 6,  7,  8,  9, 10, 11, 12, 13, 14 */
 /* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32,
 };
 
-/* All frequencies in Mhz */
-struct gk20a_clk_pllg_params {
-	u32 min_vco, max_vco;
-	u32 min_u, max_u;
-	u32 min_m, max_m;
-	u32 min_n, max_n;
-	u32 min_pl, max_pl;
-};
+static u32 pl_to_div(u32 pl)
+{
+	if (pl >= ARRAY_SIZE(_pl_to_div))
+		return 1;
+
+	return _pl_to_div[pl];
+}
+
+static u32 div_to_pl(u32 div)
+{
+	u32 pl;
+
+	for (pl = 0; pl < ARRAY_SIZE(_pl_to_div) - 1; pl++) {
+		if (_pl_to_div[pl] >= div)
+			return pl;
+	}
+
+	return ARRAY_SIZE(_pl_to_div) - 1;
+}
 
 static const struct gk20a_clk_pllg_params gk20a_pllg_params = {
-	.min_vco = 1000, .max_vco = 2064,
-	.min_u = 12, .max_u = 38,
+	.min_vco = 1000000, .max_vco = 2064000,
+	.min_u = 12000, .max_u = 38000,
 	.min_m = 1, .max_m = 255,
 	.min_n = 8, .max_n = 255,
 	.min_pl = 1, .max_pl = 32,
 };
 
-struct gk20a_clk {
-	struct nvkm_clk base;
-	const struct gk20a_clk_pllg_params *params;
-	u32 m, n, pl;
-	u32 parent_rate;
-};
-
 static void
-gk20a_pllg_read_mnp(struct gk20a_clk *clk)
+gk20a_pllg_read_mnp(struct gk20a_clk *clk, struct gk20a_pll *pll)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
 	u32 val;
 
 	val = nvkm_rd32(device, GPCPLL_COEFF);
-	clk->m = (val >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
-	clk->n = (val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH);
-	clk->pl = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
+	pll->m = (val >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
+	pll->n = (val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH);
+	pll->pl = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
 }
 
 static u32
@@ -139,8 +142,8 @@ gk20a_pllg_calc_rate(struct gk20a_clk *clk)
 	u32 rate;
 	u32 divider;
 
-	rate = clk->parent_rate * clk->n;
-	divider = clk->m * pl_to_div[clk->pl];
+	rate = clk->parent_rate * clk->pll.n;
+	divider = clk->pll.m * clk->pl_to_div(clk->pll.pl);
 
 	return rate / divider / 2;
 }
@@ -152,15 +155,13 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 	u32 target_clk_f, ref_clk_f, target_freq;
 	u32 min_vco_f, max_vco_f;
 	u32 low_pl, high_pl, best_pl;
-	u32 target_vco_f, vco_f;
+	u32 target_vco_f;
 	u32 best_m, best_n;
-	u32 u_f;
-	u32 m, n, n2;
-	u32 delta, lwv, best_delta = ~0;
+	u32 best_delta = ~0;
 	u32 pl;
 
-	target_clk_f = rate * 2 / MHZ;
-	ref_clk_f = clk->parent_rate / MHZ;
+	target_clk_f = rate * 2 / KHZ;
+	ref_clk_f = clk->parent_rate / KHZ;
 
 	max_vco_f = clk->params->max_vco;
 	min_vco_f = clk->params->min_vco;
@@ -176,33 +177,26 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 	high_pl = (max_vco_f + target_vco_f - 1) / target_vco_f;
 	high_pl = min(high_pl, clk->params->max_pl);
 	high_pl = max(high_pl, clk->params->min_pl);
+	high_pl = clk->div_to_pl(high_pl);
 
 	/* min_pl <= low_pl <= max_pl */
 	low_pl = min_vco_f / target_vco_f;
 	low_pl = min(low_pl, clk->params->max_pl);
 	low_pl = max(low_pl, clk->params->min_pl);
-
-	/* Find Indices of high_pl and low_pl */
-	for (pl = 0; pl < ARRAY_SIZE(pl_to_div) - 1; pl++) {
-		if (pl_to_div[pl] >= low_pl) {
-			low_pl = pl;
-			break;
-		}
-	}
-	for (pl = 0; pl < ARRAY_SIZE(pl_to_div) - 1; pl++) {
-		if (pl_to_div[pl] >= high_pl) {
-			high_pl = pl;
-			break;
-		}
-	}
+	low_pl = clk->div_to_pl(low_pl);
 
 	nvkm_debug(subdev, "low_PL %d(div%d), high_PL %d(div%d)", low_pl,
-		   pl_to_div[low_pl], high_pl, pl_to_div[high_pl]);
+		   clk->pl_to_div(low_pl), high_pl, clk->pl_to_div(high_pl));
 
 	/* Select lowest possible VCO */
 	for (pl = low_pl; pl <= high_pl; pl++) {
-		target_vco_f = target_clk_f * pl_to_div[pl];
+		u32 m, n, n2;
+
+		target_vco_f = target_clk_f * clk->pl_to_div(pl);
+
 		for (m = clk->params->min_m; m <= clk->params->max_m; m++) {
+			u32 u_f, vco_f;
+
 			u_f = ref_clk_f / m;
 
 			if (u_f < clk->params->min_u)
@@ -225,8 +219,10 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 				vco_f = ref_clk_f * n / m;
 
 				if (vco_f >= min_vco_f && vco_f <= max_vco_f) {
-					lwv = (vco_f + (pl_to_div[pl] / 2))
-						/ pl_to_div[pl];
+					u32 delta, lwv;
+
+					lwv = (vco_f + (clk->pl_to_div(pl) / 2))
+						/ clk->pl_to_div(pl);
 					delta = abs(lwv - target_clk_f);
 
 					if (delta < best_delta) {
@@ -249,17 +245,18 @@ found_match:
 	if (best_delta != 0)
 		nvkm_debug(subdev,
 			   "no best match for target @ %dMHz on gpc_pll",
-			   target_clk_f);
+			   target_clk_f / KHZ);
 
-	clk->m = best_m;
-	clk->n = best_n;
-	clk->pl = best_pl;
+	clk->pll.m = best_m;
+	clk->pll.n = best_n;
+	clk->pll.pl = best_pl;
 
-	target_freq = gk20a_pllg_calc_rate(clk) / MHZ;
+	target_freq = gk20a_pllg_calc_rate(clk);
 
 	nvkm_debug(subdev,
 		   "actual target freq %d MHz, M %d, N %d, PL %d(div%d)\n",
-		   target_freq, clk->m, clk->n, clk->pl, pl_to_div[clk->pl]);
+		   target_freq / MHZ, clk->pll.m, clk->pll.n, clk->pll.pl,
+		   clk->pl_to_div(clk->pll.pl));
 	return 0;
 }
 
@@ -323,17 +320,19 @@ gk20a_pllg_slide(struct gk20a_clk *clk, u32 n)
 }
 
 static void
-_gk20a_pllg_enable(struct gk20a_clk *clk)
+gk20a_pllg_enable(struct gk20a_clk *clk)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
+
 	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, GPCPLL_CFG_ENABLE);
 	nvkm_rd32(device, GPCPLL_CFG);
 }
 
 static void
-_gk20a_pllg_disable(struct gk20a_clk *clk)
+gk20a_pllg_disable(struct gk20a_clk *clk)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
+
 	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, 0);
 	nvkm_rd32(device, GPCPLL_CFG);
 }
@@ -344,25 +343,26 @@ _gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 val, cfg;
-	u32 m_old, pl_old, n_lo;
+	struct gk20a_pll old_pll;
+	u32 n_lo;
 
 	/* get old coefficients */
-	val = nvkm_rd32(device, GPCPLL_COEFF);
-	m_old = (val >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
-	pl_old = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
+	gk20a_pllg_read_mnp(clk, &old_pll);
 
 	/* do NDIV slide if there is no change in M and PL */
 	cfg = nvkm_rd32(device, GPCPLL_CFG);
-	if (allow_slide && clk->m == m_old && clk->pl == pl_old &&
-	    (cfg & GPCPLL_CFG_ENABLE)) {
-		return gk20a_pllg_slide(clk, clk->n);
+	if (allow_slide && clk->pll.m == old_pll.m &&
+	    clk->pll.pl == old_pll.pl && (cfg & GPCPLL_CFG_ENABLE)) {
+		return gk20a_pllg_slide(clk, clk->pll.n);
 	}
 
 	/* slide down to NDIV_LO */
-	n_lo = DIV_ROUND_UP(m_old * clk->params->min_vco,
-			    clk->parent_rate / MHZ);
 	if (allow_slide && (cfg & GPCPLL_CFG_ENABLE)) {
-		int ret = gk20a_pllg_slide(clk, n_lo);
+		int ret;
+
+		n_lo = DIV_ROUND_UP(old_pll.m * clk->params->min_vco,
+				    clk->parent_rate / KHZ);
+		ret = gk20a_pllg_slide(clk, n_lo);
 
 		if (ret)
 			return ret;
@@ -387,19 +387,19 @@ _gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
 		udelay(2);
 	}
 
-	_gk20a_pllg_disable(clk);
+	gk20a_pllg_disable(clk);
 
 	nvkm_debug(subdev, "%s: m=%d n=%d pl=%d\n", __func__,
-		   clk->m, clk->n, clk->pl);
+		   clk->pll.m, clk->pll.n, clk->pll.pl);
 
-	n_lo = DIV_ROUND_UP(clk->m * clk->params->min_vco,
-			    clk->parent_rate / MHZ);
-	val = clk->m << GPCPLL_COEFF_M_SHIFT;
-	val |= (allow_slide ? n_lo : clk->n) << GPCPLL_COEFF_N_SHIFT;
-	val |= clk->pl << GPCPLL_COEFF_P_SHIFT;
+	n_lo = DIV_ROUND_UP(clk->pll.m * clk->params->min_vco,
+			    clk->parent_rate / KHZ);
+	val = clk->pll.m << GPCPLL_COEFF_M_SHIFT;
+	val |= (allow_slide ? n_lo : clk->pll.n) << GPCPLL_COEFF_N_SHIFT;
+	val |= clk->pll.pl << GPCPLL_COEFF_P_SHIFT;
 	nvkm_wr32(device, GPCPLL_COEFF, val);
 
-	_gk20a_pllg_enable(clk);
+	gk20a_pllg_enable(clk);
 
 	val = nvkm_rd32(device, GPCPLL_CFG);
 	if (val & GPCPLL_CFG_LOCK_DET_OFF) {
@@ -414,16 +414,24 @@ _gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
 		return -ETIMEDOUT;
 
 	/* switch to VCO mode */
-	nvkm_mask(device, SEL_VCO, 0, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT),
+		  BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
 
 	/* restore out divider 1:1 */
 	val = nvkm_rd32(device, GPC2CLK_OUT);
-	val &= ~GPC2CLK_OUT_VCODIV_MASK;
-	udelay(2);
-	nvkm_wr32(device, GPC2CLK_OUT, val);
+	if ((val & GPC2CLK_OUT_VCODIV_MASK) !=
+	    (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT)) {
+		val &= ~GPC2CLK_OUT_VCODIV_MASK;
+		val |= GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT;
+		udelay(2);
+		nvkm_wr32(device, GPC2CLK_OUT, val);
+		/* Intentional 2nd write to assure linear divider operation */
+		nvkm_wr32(device, GPC2CLK_OUT, val);
+		nvkm_rd32(device, GPC2CLK_OUT);
+	}
 
 	/* slide up to new NDIV */
-	return allow_slide ? gk20a_pllg_slide(clk, clk->n) : 0;
+	return allow_slide ? gk20a_pllg_slide(clk, clk->pll.n) : 0;
 }
 
 static int
@@ -438,32 +446,6 @@ gk20a_pllg_program_mnp(struct gk20a_clk *clk)
 	return err;
 }
 
-static void
-gk20a_pllg_disable(struct gk20a_clk *clk)
-{
-	struct nvkm_device *device = clk->base.subdev.device;
-	u32 val;
-
-	/* slide to VCO min */
-	val = nvkm_rd32(device, GPCPLL_CFG);
-	if (val & GPCPLL_CFG_ENABLE) {
-		u32 coeff, m, n_lo;
-
-		coeff = nvkm_rd32(device, GPCPLL_COEFF);
-		m = (coeff >> GPCPLL_COEFF_M_SHIFT) & MASK(GPCPLL_COEFF_M_WIDTH);
-		n_lo = DIV_ROUND_UP(m * clk->params->min_vco,
-				    clk->parent_rate / MHZ);
-		gk20a_pllg_slide(clk, n_lo);
-	}
-
-	/* put PLL in bypass before disabling it */
-	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
-
-	_gk20a_pllg_disable(clk);
-}
-
-#define GK20A_CLK_GPC_MDIV 1000
-
 static struct nvkm_pstate
 gk20a_pstates[] = {
 	{
@@ -558,7 +540,7 @@ gk20a_pstates[] = {
 	},
 };
 
-static int
+int
 gk20a_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
 {
 	struct gk20a_clk *clk = gk20a_clk(base);
@@ -569,7 +551,7 @@ gk20a_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
 	case nv_clk_src_crystal:
 		return device->crystal;
 	case nv_clk_src_gpc:
-		gk20a_pllg_read_mnp(clk);
+		gk20a_pllg_read_mnp(clk, &clk->pll);
 		return gk20a_pllg_calc_rate(clk) / GK20A_CLK_GPC_MDIV;
 	default:
 		nvkm_error(subdev, "invalid clock source %d\n", src);
@@ -577,7 +559,7 @@ gk20a_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
 	}
 }
 
-static int
+int
 gk20a_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate)
 {
 	struct gk20a_clk *clk = gk20a_clk(base);
@@ -586,7 +568,7 @@ gk20a_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate)
 					 GK20A_CLK_GPC_MDIV);
 }
 
-static int
+int
 gk20a_clk_prog(struct nvkm_clk *base)
 {
 	struct gk20a_clk *clk = gk20a_clk(base);
@@ -594,15 +576,33 @@ gk20a_clk_prog(struct nvkm_clk *base)
 	return gk20a_pllg_program_mnp(clk);
 }
 
-static void
+void
 gk20a_clk_tidy(struct nvkm_clk *base)
 {
 }
 
-static void
+void
 gk20a_clk_fini(struct nvkm_clk *base)
 {
+	struct nvkm_device *device = base->subdev.device;
 	struct gk20a_clk *clk = gk20a_clk(base);
+	u32 val;
+
+	/* slide to VCO min */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	if (val & GPCPLL_CFG_ENABLE) {
+		struct gk20a_pll pll;
+		u32 n_lo;
+
+		gk20a_pllg_read_mnp(clk, &pll);
+		n_lo = DIV_ROUND_UP(pll.m * clk->params->min_vco,
+				    clk->parent_rate / KHZ);
+		gk20a_pllg_slide(clk, n_lo);
+	}
+
+	/* put PLL in bypass before disabling it */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
+
 	gk20a_pllg_disable(clk);
 }
 
@@ -614,9 +614,12 @@ gk20a_clk_init(struct nvkm_clk *base)
 	struct nvkm_device *device = subdev->device;
 	int ret;
 
-	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_INIT_MASK, GPC2CLK_OUT_INIT_VAL);
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_INIT_MASK,
+		  GPC2CLK_OUT_INIT_VAL);
 
-	ret = gk20a_clk_prog(&clk->base);
+	/* Start with lowest frequency */
+	base->func->calc(base, &base->func->pstates[0].base);
+	ret = base->func->prog(&clk->base);
 	if (ret) {
 		nvkm_error(subdev, "cannot initialize clock\n");
 		return ret;
@@ -643,27 +646,50 @@ gk20a_clk = {
 };
 
 int
-gk20a_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
+_gk20a_clk_ctor(struct nvkm_device *device, int index,
+		const struct nvkm_clk_func *func,
+		const struct gk20a_clk_pllg_params *params,
+		struct gk20a_clk *clk)
 {
 	struct nvkm_device_tegra *tdev = device->func->tegra(device);
+	int ret;
+	int i;
+
+	/* Finish initializing the pstates */
+	for (i = 0; i < func->nr_pstates; i++) {
+		INIT_LIST_HEAD(&func->pstates[i].list);
+		func->pstates[i].pstate = i + 1;
+	}
+
+	clk->params = params;
+	clk->parent_rate = clk_get_rate(tdev->clk);
+
+	ret = nvkm_clk_ctor(func, device, index, true, &clk->base);
+	if (ret)
+		return ret;
+
+	nvkm_debug(&clk->base.subdev, "parent clock rate: %d Khz\n",
+		   clk->parent_rate / KHZ);
+
+	return 0;
+}
+
+int
+gk20a_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
+{
 	struct gk20a_clk *clk;
-	int ret, i;
+	int ret;
 
-	if (!(clk = kzalloc(sizeof(*clk), GFP_KERNEL)))
+	clk = kzalloc(sizeof(*clk), GFP_KERNEL);
+	if (!clk)
 		return -ENOMEM;
 	*pclk = &clk->base;
 
-	/* Finish initializing the pstates */
-	for (i = 0; i < ARRAY_SIZE(gk20a_pstates); i++) {
-		INIT_LIST_HEAD(&gk20a_pstates[i].list);
-		gk20a_pstates[i].pstate = i + 1;
-	}
+	ret = _gk20a_clk_ctor(device, index, &gk20a_clk, &gk20a_pllg_params,
+			      clk);
 
-	clk->params = &gk20a_pllg_params;
-	clk->parent_rate = clk_get_rate(tdev->clk);
+	clk->pl_to_div = pl_to_div;
+	clk->div_to_pl = div_to_pl;
 
-	ret = nvkm_clk_ctor(&gk20a_clk, device, index, true, &clk->base);
-	nvkm_info(&clk->base.subdev, "parent clock rate: %d Mhz\n",
-		  clk->parent_rate / MHZ);
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
new file mode 100644
index 000000000000..13c46740197d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NVKM_CLK_GK20A_H__
+#define __NVKM_CLK_GK20A_H__
+
+#define GK20A_CLK_GPC_MDIV 1000
+
+#define SYS_GPCPLL_CFG_BASE	0x00137000
+
+/* All frequencies in Khz */
+struct gk20a_clk_pllg_params {
+	u32 min_vco, max_vco;
+	u32 min_u, max_u;
+	u32 min_m, max_m;
+	u32 min_n, max_n;
+	u32 min_pl, max_pl;
+};
+
+struct gk20a_pll {
+	u32 m;
+	u32 n;
+	u32 pl;
+};
+
+struct gk20a_clk {
+	struct nvkm_clk base;
+	const struct gk20a_clk_pllg_params *params;
+	struct gk20a_pll pll;
+	u32 parent_rate;
+
+	u32 (*div_to_pl)(u32);
+	u32 (*pl_to_div)(u32);
+};
+#define gk20a_clk(p) container_of((p), struct gk20a_clk, base)
+
+int _gk20a_clk_ctor(struct nvkm_device *, int, const struct nvkm_clk_func *,
+		    const struct gk20a_clk_pllg_params *, struct gk20a_clk *);
+void gk20a_clk_fini(struct nvkm_clk *);
+int gk20a_clk_read(struct nvkm_clk *, enum nv_clk_src);
+int gk20a_clk_calc(struct nvkm_clk *, struct nvkm_cstate *);
+int gk20a_clk_prog(struct nvkm_clk *);
+void gk20a_clk_tidy(struct nvkm_clk *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
new file mode 100644
index 000000000000..71b2bbb61973
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <subdev/clk.h>
+#include <core/device.h>
+
+#include "priv.h"
+#include "gk20a.h"
+
+#define KHZ (1000)
+#define MHZ (KHZ * 1000)
+
+#define MASK(w)	((1 << w) - 1)
+
+#define BYPASSCTRL_SYS	(SYS_GPCPLL_CFG_BASE + 0x340)
+#define BYPASSCTRL_SYS_GPCPLL_SHIFT	0
+#define BYPASSCTRL_SYS_GPCPLL_WIDTH	1
+
+static u32 pl_to_div(u32 pl)
+{
+	return pl;
+}
+
+static u32 div_to_pl(u32 div)
+{
+	return div;
+}
+
+static const struct gk20a_clk_pllg_params gm20b_pllg_params = {
+	.min_vco = 1300000, .max_vco = 2600000,
+	.min_u = 12000, .max_u = 38400,
+	.min_m = 1, .max_m = 255,
+	.min_n = 8, .max_n = 255,
+	.min_pl = 1, .max_pl = 31,
+};
+
+static struct nvkm_pstate
+gm20b_pstates[] = {
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 76800,
+			.voltage = 0,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 153600,
+			.voltage = 1,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 230400,
+			.voltage = 2,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 307200,
+			.voltage = 3,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 384000,
+			.voltage = 4,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 460800,
+			.voltage = 5,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 537600,
+			.voltage = 6,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 614400,
+			.voltage = 7,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 691200,
+			.voltage = 8,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 768000,
+			.voltage = 9,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 844800,
+			.voltage = 10,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 921600,
+			.voltage = 11,
+		},
+	},
+	{
+		.base = {
+			.domain[nv_clk_src_gpc] = 998400,
+			.voltage = 12,
+		},
+	},
+
+};
+
+static int
+gm20b_clk_init(struct nvkm_clk *base)
+{
+	struct gk20a_clk *clk = gk20a_clk(base);
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	int ret;
+
+	/* Set the global bypass control to VCO */
+	nvkm_mask(device, BYPASSCTRL_SYS,
+	       MASK(BYPASSCTRL_SYS_GPCPLL_WIDTH) << BYPASSCTRL_SYS_GPCPLL_SHIFT,
+	       0);
+
+	/* Start with lowest frequency */
+	base->func->calc(base, &base->func->pstates[0].base);
+	ret = base->func->prog(&clk->base);
+	if (ret) {
+		nvkm_error(subdev, "cannot initialize clock\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct nvkm_clk_func
+gm20b_clk_speedo0 = {
+	.init = gm20b_clk_init,
+	.fini = gk20a_clk_fini,
+	.read = gk20a_clk_read,
+	.calc = gk20a_clk_calc,
+	.prog = gk20a_clk_prog,
+	.tidy = gk20a_clk_tidy,
+	.pstates = gm20b_pstates,
+	.nr_pstates = ARRAY_SIZE(gm20b_pstates) - 1,
+	.domains = {
+		{ nv_clk_src_crystal, 0xff },
+		{ nv_clk_src_gpc, 0xff, 0, "core", GK20A_CLK_GPC_MDIV },
+		{ nv_clk_src_max },
+	},
+};
+
+int
+gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
+{
+	struct gk20a_clk *clk;
+	int ret;
+
+	clk = kzalloc(sizeof(*clk), GFP_KERNEL);
+	if (!clk)
+		return -ENOMEM;
+	*pclk = &clk->base;
+
+	ret = _gk20a_clk_ctor(device, index, &gm20b_clk_speedo0,
+			      &gm20b_pllg_params, clk);
+
+	clk->pl_to_div = pl_to_div;
+	clk->div_to_pl = div_to_pl;
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
index 793e73d16dac..eac88e3dc6e5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
@@ -11,4 +11,4 @@ nvkm-y += nvkm/subdev/devinit/gt215.o
 nvkm-y += nvkm/subdev/devinit/mcp89.o
 nvkm-y += nvkm/subdev/devinit/gf100.o
 nvkm-y += nvkm/subdev/devinit/gm107.o
-nvkm-y += nvkm/subdev/devinit/gm204.o
+nvkm-y += nvkm/subdev/devinit/gm200.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c
index 22b0140e28c6..2923598b5fe9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c
@@ -90,9 +90,21 @@ gf100_devinit_disable(struct nvkm_devinit *init)
 	return disable;
 }
 
+void
+gf100_devinit_preinit(struct nvkm_devinit *base)
+{
+	struct nv50_devinit *init = nv50_devinit(base);
+	struct nvkm_subdev *subdev = &init->base.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	/* This bit is set by devinit, and flips back to 0 on suspend */
+	if (!base->post)
+		base->post = ((nvkm_rd32(device, 0x2240c) & BIT(1)) == 0);
+}
+
 static const struct nvkm_devinit_func
 gf100_devinit = {
-	.preinit = nv50_devinit_preinit,
+	.preinit = gf100_devinit_preinit,
 	.init = nv50_devinit_init,
 	.post = nv04_devinit_post,
 	.pll_set = gf100_devinit_pll_set,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
index 2be98bd78214..28ca01be3d38 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
@@ -46,7 +46,7 @@ gm107_devinit_disable(struct nvkm_devinit *init)
 
 static const struct nvkm_devinit_func
 gm107_devinit = {
-	.preinit = nv50_devinit_preinit,
+	.preinit = gf100_devinit_preinit,
 	.init = nv50_devinit_init,
 	.post = nv04_devinit_post,
 	.pll_set = gf100_devinit_pll_set,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
index 2b9c3f11b7a8..a410c0db8a08 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
@@ -107,7 +107,7 @@ pmu_load(struct nv50_devinit *init, u8 type, bool post,
 }
 
 static int
-gm204_devinit_post(struct nvkm_devinit *base, bool post)
+gm200_devinit_post(struct nvkm_devinit *base, bool post)
 {
 	struct nv50_devinit *init = nv50_devinit(base);
 	struct nvkm_subdev *subdev = &init->base.subdev;
@@ -165,17 +165,17 @@ gm204_devinit_post(struct nvkm_devinit *base, bool post)
 }
 
 static const struct nvkm_devinit_func
-gm204_devinit = {
-	.preinit = nv50_devinit_preinit,
+gm200_devinit = {
+	.preinit = gf100_devinit_preinit,
 	.init = nv50_devinit_init,
-	.post = gm204_devinit_post,
+	.post = gm200_devinit_post,
 	.pll_set = gf100_devinit_pll_set,
 	.disable = gm107_devinit_disable,
 };
 
 int
-gm204_devinit_new(struct nvkm_device *device, int index,
+gm200_devinit_new(struct nvkm_device *device, int index,
 		struct nvkm_devinit **pinit)
 {
-	return nv50_devinit_new_(&gm204_devinit, device, index, pinit);
+	return nv50_devinit_new_(&gm200_devinit, device, index, pinit);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
index 337c2c692dc7..c714b097719c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
@@ -93,28 +93,27 @@ nv50_devinit_disable(struct nvkm_devinit *init)
 void
 nv50_devinit_preinit(struct nvkm_devinit *base)
 {
-	struct nv50_devinit *init = nv50_devinit(base);
-	struct nvkm_subdev *subdev = &init->base.subdev;
+	struct nvkm_subdev *subdev = &base->subdev;
 	struct nvkm_device *device = subdev->device;
 
 	/* our heuristics can't detect whether the board has had its
 	 * devinit scripts executed or not if the display engine is
 	 * missing, assume it's a secondary gpu which requires post
 	 */
-	if (!init->base.post) {
-		u64 disable = nvkm_devinit_disable(&init->base);
+	if (!base->post) {
+		u64 disable = nvkm_devinit_disable(base);
 		if (disable & (1ULL << NVKM_ENGINE_DISP))
-			init->base.post = true;
+			base->post = true;
 	}
 
 	/* magic to detect whether or not x86 vbios code has executed
 	 * the devinit scripts to initialise the board
 	 */
-	if (!init->base.post) {
+	if (!base->post) {
 		if (!nvkm_rdvgac(device, 0, 0x00) &&
 		    !nvkm_rdvgac(device, 0, 0x1a)) {
 			nvkm_debug(subdev, "adaptor not initialised\n");
-			init->base.post = true;
+			base->post = true;
 		}
 	}
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h
index 5de70a8486b4..25d2ae3af1c6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h
@@ -20,6 +20,7 @@ int  gf100_devinit_ctor(struct nvkm_object *, struct nvkm_object *,
 			struct nvkm_oclass *, void *, u32,
 			struct nvkm_object **);
 int  gf100_devinit_pll_set(struct nvkm_devinit *, u32, u32);
+void gf100_devinit_preinit(struct nvkm_devinit *);
 
 u64  gm107_devinit_disable(struct nvkm_devinit *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild
index 1f730613c237..48f01e40b8fc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild
@@ -6,7 +6,7 @@ nvkm-y += nvkm/subdev/i2c/g94.o
 nvkm-y += nvkm/subdev/i2c/gf117.o
 nvkm-y += nvkm/subdev/i2c/gf119.o
 nvkm-y += nvkm/subdev/i2c/gk104.o
-nvkm-y += nvkm/subdev/i2c/gm204.o
+nvkm-y += nvkm/subdev/i2c/gm200.o
 
 nvkm-y += nvkm/subdev/i2c/pad.o
 nvkm-y += nvkm/subdev/i2c/padnv04.o
@@ -14,7 +14,7 @@ nvkm-y += nvkm/subdev/i2c/padnv4e.o
 nvkm-y += nvkm/subdev/i2c/padnv50.o
 nvkm-y += nvkm/subdev/i2c/padg94.o
 nvkm-y += nvkm/subdev/i2c/padgf119.o
-nvkm-y += nvkm/subdev/i2c/padgm204.o
+nvkm-y += nvkm/subdev/i2c/padgm200.o
 
 nvkm-y += nvkm/subdev/i2c/bus.o
 nvkm-y += nvkm/subdev/i2c/busnv04.o
@@ -25,6 +25,6 @@ nvkm-y += nvkm/subdev/i2c/bit.o
 
 nvkm-y += nvkm/subdev/i2c/aux.o
 nvkm-y += nvkm/subdev/i2c/auxg94.o
-nvkm-y += nvkm/subdev/i2c/auxgm204.o
+nvkm-y += nvkm/subdev/i2c/auxgm200.o
 
 nvkm-y += nvkm/subdev/i2c/anx9805.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h
index 35a892e4a4c3..fc6b162fa0b1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h
@@ -18,7 +18,7 @@ int nvkm_i2c_aux_xfer(struct nvkm_i2c_aux *, bool retry, u8 type,
 		      u32 addr, u8 *data, u8 size);
 
 int g94_i2c_aux_new(struct nvkm_i2c_pad *, int, u8, struct nvkm_i2c_aux **);
-int gm204_i2c_aux_new(struct nvkm_i2c_pad *, int, u8, struct nvkm_i2c_aux **);
+int gm200_i2c_aux_new(struct nvkm_i2c_pad *, int, u8, struct nvkm_i2c_aux **);
 
 #define AUX_MSG(b,l,f,a...) do {                                               \
 	struct nvkm_i2c_aux *_aux = (b);                                       \
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm204.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c
index bed231b56dbd..61d729b82c69 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c
@@ -21,23 +21,23 @@
  *
  * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
-#define gm204_i2c_aux(p) container_of((p), struct gm204_i2c_aux, base)
+#define gm200_i2c_aux(p) container_of((p), struct gm200_i2c_aux, base)
 #include "aux.h"
 
-struct gm204_i2c_aux {
+struct gm200_i2c_aux {
 	struct nvkm_i2c_aux base;
 	int ch;
 };
 
 static void
-gm204_i2c_aux_fini(struct gm204_i2c_aux *aux)
+gm200_i2c_aux_fini(struct gm200_i2c_aux *aux)
 {
 	struct nvkm_device *device = aux->base.pad->i2c->subdev.device;
 	nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00310000, 0x00000000);
 }
 
 static int
-gm204_i2c_aux_init(struct gm204_i2c_aux *aux)
+gm200_i2c_aux_init(struct gm200_i2c_aux *aux)
 {
 	struct nvkm_device *device = aux->base.pad->i2c->subdev.device;
 	const u32 unksel = 1; /* nfi which to use, or if it matters.. */
@@ -64,7 +64,7 @@ gm204_i2c_aux_init(struct gm204_i2c_aux *aux)
 		udelay(1);
 		if (!timeout--) {
 			AUX_ERR(&aux->base, "magic wait %08x", ctrl);
-			gm204_i2c_aux_fini(aux);
+			gm200_i2c_aux_fini(aux);
 			return -EBUSY;
 		}
 	} while ((ctrl & 0x03000000) != urep);
@@ -73,10 +73,10 @@ gm204_i2c_aux_init(struct gm204_i2c_aux *aux)
 }
 
 static int
-gm204_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
+gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 		   u8 type, u32 addr, u8 *data, u8 size)
 {
-	struct gm204_i2c_aux *aux = gm204_i2c_aux(obj);
+	struct gm200_i2c_aux *aux = gm200_i2c_aux(obj);
 	struct nvkm_device *device = aux->base.pad->i2c->subdev.device;
 	const u32 base = aux->ch * 0x50;
 	u32 ctrl, stat, timeout, retries;
@@ -85,7 +85,7 @@ gm204_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 
 	AUX_TRACE(&aux->base, "%d: %08x %d", type, addr, size);
 
-	ret = gm204_i2c_aux_init(aux);
+	ret = gm200_i2c_aux_init(aux);
 	if (ret < 0)
 		goto out;
 
@@ -155,26 +155,26 @@ gm204_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 	}
 
 out:
-	gm204_i2c_aux_fini(aux);
+	gm200_i2c_aux_fini(aux);
 	return ret < 0 ? ret : (stat & 0x000f0000) >> 16;
 }
 
 static const struct nvkm_i2c_aux_func
-gm204_i2c_aux_func = {
-	.xfer = gm204_i2c_aux_xfer,
+gm200_i2c_aux_func = {
+	.xfer = gm200_i2c_aux_xfer,
 };
 
 int
-gm204_i2c_aux_new(struct nvkm_i2c_pad *pad, int index, u8 drive,
+gm200_i2c_aux_new(struct nvkm_i2c_pad *pad, int index, u8 drive,
 		struct nvkm_i2c_aux **paux)
 {
-	struct gm204_i2c_aux *aux;
+	struct gm200_i2c_aux *aux;
 
 	if (!(aux = kzalloc(sizeof(*aux), GFP_KERNEL)))
 		return -ENOMEM;
 	*paux = &aux->base;
 
-	nvkm_i2c_aux_ctor(&gm204_i2c_aux_func, pad, index, &aux->base);
+	nvkm_i2c_aux_ctor(&gm200_i2c_aux_func, pad, index, &aux->base);
 	aux->ch = drive;
 	aux->base.intr = 1 << aux->ch;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm204.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c
index ff9f7d62f6be..a23c5f315221 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c
@@ -25,16 +25,16 @@
 #include "pad.h"
 
 static const struct nvkm_i2c_func
-gm204_i2c = {
+gm200_i2c = {
 	.pad_x_new = gf119_i2c_pad_x_new,
-	.pad_s_new = gm204_i2c_pad_s_new,
+	.pad_s_new = gm200_i2c_pad_s_new,
 	.aux = 8,
 	.aux_stat = gk104_aux_stat,
 	.aux_mask = gk104_aux_mask,
 };
 
 int
-gm204_i2c_new(struct nvkm_device *device, int index, struct nvkm_i2c **pi2c)
+gm200_i2c_new(struct nvkm_device *device, int index, struct nvkm_i2c **pi2c)
 {
-	return nvkm_i2c_new_(&gm204_i2c, device, index, pi2c);
+	return nvkm_i2c_new_(&gm200_i2c, device, index, pi2c);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h
index 9eeb992944c6..316c4536f29a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h
@@ -49,11 +49,11 @@ int nv4e_i2c_pad_new(struct nvkm_i2c *, int, struct nvkm_i2c_pad **);
 int nv50_i2c_pad_new(struct nvkm_i2c *, int, struct nvkm_i2c_pad **);
 int g94_i2c_pad_x_new(struct nvkm_i2c *, int, struct nvkm_i2c_pad **);
 int gf119_i2c_pad_x_new(struct nvkm_i2c *, int, struct nvkm_i2c_pad **);
-int gm204_i2c_pad_x_new(struct nvkm_i2c *, int, struct nvkm_i2c_pad **);
+int gm200_i2c_pad_x_new(struct nvkm_i2c *, int, struct nvkm_i2c_pad **);
 
 int g94_i2c_pad_s_new(struct nvkm_i2c *, int, struct nvkm_i2c_pad **);
 int gf119_i2c_pad_s_new(struct nvkm_i2c *, int, struct nvkm_i2c_pad **);
-int gm204_i2c_pad_s_new(struct nvkm_i2c *, int, struct nvkm_i2c_pad **);
+int gm200_i2c_pad_s_new(struct nvkm_i2c *, int, struct nvkm_i2c_pad **);
 
 int anx9805_pad_new(struct nvkm_i2c_bus *, int, u8, struct nvkm_i2c_pad **);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/padgm204.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/padgm200.c
index 24a4d760c67b..7d417f6a816e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/padgm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/padgm200.c
@@ -26,7 +26,7 @@
 #include "bus.h"
 
 static void
-gm204_i2c_pad_mode(struct nvkm_i2c_pad *pad, enum nvkm_i2c_pad_mode mode)
+gm200_i2c_pad_mode(struct nvkm_i2c_pad *pad, enum nvkm_i2c_pad_mode mode)
 {
 	struct nvkm_subdev *subdev = &pad->i2c->subdev;
 	struct nvkm_device *device = subdev->device;
@@ -51,26 +51,26 @@ gm204_i2c_pad_mode(struct nvkm_i2c_pad *pad, enum nvkm_i2c_pad_mode mode)
 }
 
 static const struct nvkm_i2c_pad_func
-gm204_i2c_pad_s_func = {
+gm200_i2c_pad_s_func = {
 	.bus_new_4 = gf119_i2c_bus_new,
-	.aux_new_6 = gm204_i2c_aux_new,
-	.mode = gm204_i2c_pad_mode,
+	.aux_new_6 = gm200_i2c_aux_new,
+	.mode = gm200_i2c_pad_mode,
 };
 
 int
-gm204_i2c_pad_s_new(struct nvkm_i2c *i2c, int id, struct nvkm_i2c_pad **ppad)
+gm200_i2c_pad_s_new(struct nvkm_i2c *i2c, int id, struct nvkm_i2c_pad **ppad)
 {
-	return nvkm_i2c_pad_new_(&gm204_i2c_pad_s_func, i2c, id, ppad);
+	return nvkm_i2c_pad_new_(&gm200_i2c_pad_s_func, i2c, id, ppad);
 }
 
 static const struct nvkm_i2c_pad_func
-gm204_i2c_pad_x_func = {
+gm200_i2c_pad_x_func = {
 	.bus_new_4 = gf119_i2c_bus_new,
-	.aux_new_6 = gm204_i2c_aux_new,
+	.aux_new_6 = gm200_i2c_aux_new,
 };
 
 int
-gm204_i2c_pad_x_new(struct nvkm_i2c *i2c, int id, struct nvkm_i2c_pad **ppad)
+gm200_i2c_pad_x_new(struct nvkm_i2c *i2c, int id, struct nvkm_i2c_pad **ppad)
 {
-	return nvkm_i2c_pad_new_(&gm204_i2c_pad_x_func, i2c, id, ppad);
+	return nvkm_i2c_pad_new_(&gm200_i2c_pad_x_func, i2c, id, ppad);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/Kbuild
index 7e77a7466992..ad572d3b5466 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/Kbuild
@@ -2,4 +2,4 @@ nvkm-y += nvkm/subdev/ibus/gf100.o
 nvkm-y += nvkm/subdev/ibus/gf117.o
 nvkm-y += nvkm/subdev/ibus/gk104.o
 nvkm-y += nvkm/subdev/ibus/gk20a.o
-nvkm-y += nvkm/subdev/ibus/gm204.o
+nvkm-y += nvkm/subdev/ibus/gm200.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gm204.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gm200.c
index b3839dc254ee..ef0b7f3b1128 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gm200.c
@@ -24,17 +24,17 @@
 #include "priv.h"
 
 static const struct nvkm_subdev_func
-gm204_ibus = {
+gm200_ibus = {
 	.intr = gk104_ibus_intr,
 };
 
 int
-gm204_ibus_new(struct nvkm_device *device, int index,
+gm200_ibus_new(struct nvkm_device *device, int index,
 	       struct nvkm_subdev **pibus)
 {
 	struct nvkm_subdev *ibus;
 	if (!(ibus = *pibus = kzalloc(sizeof(*ibus), GFP_KERNEL)))
 		return -ENOMEM;
-	nvkm_subdev_ctor(&gm204_ibus, device, index, 0, ibus);
+	nvkm_subdev_ctor(&gm200_ibus, device, index, 0, ibus);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/Kbuild
new file mode 100644
index 000000000000..98a4bd3e98ed
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/Kbuild
@@ -0,0 +1,2 @@
+nvkm-y += nvkm/subdev/iccsense/base.o
+nvkm-y += nvkm/subdev/iccsense/gf100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
new file mode 100644
index 000000000000..c44a85228074
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2015 Martin Peres
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Martin Peres
+ */
+#include "priv.h"
+
+#include <subdev/bios.h>
+#include <subdev/bios/extdev.h>
+#include <subdev/bios/iccsense.h>
+#include <subdev/i2c.h>
+
+static bool
+nvkm_iccsense_validate_device(struct i2c_adapter *i2c, u8 addr,
+			      enum nvbios_extdev_type type, u8 rail)
+{
+	switch (type) {
+	case NVBIOS_EXTDEV_INA209:
+	case NVBIOS_EXTDEV_INA219:
+		return rail == 0 && nv_rd16i2cr(i2c, addr, 0x0) >= 0;
+	case NVBIOS_EXTDEV_INA3221:
+		return rail <= 3 &&
+		       nv_rd16i2cr(i2c, addr, 0xff) == 0x3220 &&
+		       nv_rd16i2cr(i2c, addr, 0xfe) == 0x5449;
+	default:
+		return false;
+	}
+}
+
+static int
+nvkm_iccsense_poll_lane(struct i2c_adapter *i2c, u8 addr, u8 shunt_reg,
+			u8 shunt_shift, u8 bus_reg, u8 bus_shift, u8 shunt,
+			u16 lsb)
+{
+	int vshunt = nv_rd16i2cr(i2c, addr, shunt_reg);
+	int vbus = nv_rd16i2cr(i2c, addr, bus_reg);
+
+	if (vshunt < 0 || vbus < 0)
+		return -EINVAL;
+
+	vshunt >>= shunt_shift;
+	vbus >>= bus_shift;
+
+	return vbus * vshunt * lsb / shunt;
+}
+
+static int
+nvkm_iccsense_ina2x9_read(struct nvkm_iccsense *iccsense,
+                          struct nvkm_iccsense_rail *rail,
+			  u8 shunt_reg, u8 bus_reg)
+{
+	return nvkm_iccsense_poll_lane(rail->i2c, rail->addr, shunt_reg, 0,
+				       bus_reg, 3, rail->mohm, 10 * 4);
+}
+
+static int
+nvkm_iccsense_ina209_read(struct nvkm_iccsense *iccsense,
+			  struct nvkm_iccsense_rail *rail)
+{
+	return nvkm_iccsense_ina2x9_read(iccsense, rail, 3, 4);
+}
+
+static int
+nvkm_iccsense_ina219_read(struct nvkm_iccsense *iccsense,
+			  struct nvkm_iccsense_rail *rail)
+{
+	return nvkm_iccsense_ina2x9_read(iccsense, rail, 1, 2);
+}
+
+static int
+nvkm_iccsense_ina3221_read(struct nvkm_iccsense *iccsense,
+			   struct nvkm_iccsense_rail *rail)
+{
+	return nvkm_iccsense_poll_lane(rail->i2c, rail->addr,
+				       1 + (rail->rail * 2), 3,
+				       2 + (rail->rail * 2), 3, rail->mohm,
+				       40 * 8);
+}
+
+int
+nvkm_iccsense_read(struct nvkm_iccsense *iccsense, u8 idx)
+{
+	struct nvkm_iccsense_rail *rail;
+
+	if (!iccsense || idx >= iccsense->rail_count)
+		return -EINVAL;
+
+	rail = &iccsense->rails[idx];
+	if (!rail->read)
+		return -ENODEV;
+
+	return rail->read(iccsense, rail);
+}
+
+int
+nvkm_iccsense_read_all(struct nvkm_iccsense *iccsense)
+{
+	int result = 0, i;
+	for (i = 0; i < iccsense->rail_count; ++i) {
+		int res = nvkm_iccsense_read(iccsense, i);
+		if (res >= 0)
+			result += res;
+		else
+			return res;
+	}
+	return result;
+}
+
+static void *
+nvkm_iccsense_dtor(struct nvkm_subdev *subdev)
+{
+	struct nvkm_iccsense *iccsense = nvkm_iccsense(subdev);
+
+	if (iccsense->rails)
+		kfree(iccsense->rails);
+
+	return iccsense;
+}
+
+static int
+nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
+{
+	struct nvkm_iccsense *iccsense = nvkm_iccsense(subdev);
+	struct nvkm_bios *bios = subdev->device->bios;
+	struct nvkm_i2c *i2c = subdev->device->i2c;
+	struct nvbios_iccsense stbl;
+	int i;
+
+	if (!i2c || !bios || nvbios_iccsense_parse(bios, &stbl)
+	    || !stbl.nr_entry)
+		return 0;
+
+	iccsense->rails = kmalloc(sizeof(*iccsense->rails) * stbl.nr_entry,
+	                          GFP_KERNEL);
+	if (!iccsense->rails)
+		return -ENOMEM;
+
+	iccsense->data_valid = true;
+	for (i = 0; i < stbl.nr_entry; ++i) {
+		struct pwr_rail_t *r = &stbl.rail[i];
+		struct nvbios_extdev_func extdev;
+		struct nvkm_iccsense_rail *rail;
+		struct nvkm_i2c_bus *i2c_bus;
+		u8 addr;
+
+		if (!r->mode || r->resistor_mohm == 0)
+			continue;
+
+		if (nvbios_extdev_parse(bios, r->extdev_id, &extdev))
+			continue;
+
+		if (extdev.type == 0xff)
+			continue;
+
+		if (extdev.bus)
+			i2c_bus = nvkm_i2c_bus_find(i2c, NVKM_I2C_BUS_SEC);
+		else
+			i2c_bus = nvkm_i2c_bus_find(i2c, NVKM_I2C_BUS_PRI);
+		if (!i2c_bus)
+			continue;
+
+		addr = extdev.addr >> 1;
+		if (!nvkm_iccsense_validate_device(&i2c_bus->i2c, addr,
+						   extdev.type, r->rail)) {
+			iccsense->data_valid = false;
+			nvkm_warn(subdev, "found unknown or invalid rail entry"
+				  " type 0x%x rail %i, power reading might be"
+				  " invalid\n", extdev.type, r->rail);
+			continue;
+		}
+
+		rail = &iccsense->rails[iccsense->rail_count];
+		switch (extdev.type) {
+		case NVBIOS_EXTDEV_INA209:
+			rail->read = nvkm_iccsense_ina209_read;
+			break;
+		case NVBIOS_EXTDEV_INA219:
+			rail->read = nvkm_iccsense_ina219_read;
+			break;
+		case NVBIOS_EXTDEV_INA3221:
+			rail->read = nvkm_iccsense_ina3221_read;
+			break;
+		}
+
+		rail->addr = addr;
+		rail->rail = r->rail;
+		rail->mohm = r->resistor_mohm;
+		rail->i2c = &i2c_bus->i2c;
+		++iccsense->rail_count;
+	}
+	return 0;
+}
+
+struct nvkm_subdev_func iccsense_func = {
+	.oneinit = nvkm_iccsense_oneinit,
+	.dtor = nvkm_iccsense_dtor,
+};
+
+void
+nvkm_iccsense_ctor(struct nvkm_device *device, int index,
+		   struct nvkm_iccsense *iccsense)
+{
+	nvkm_subdev_ctor(&iccsense_func, device, index, 0, &iccsense->subdev);
+}
+
+int
+nvkm_iccsense_new_(struct nvkm_device *device, int index,
+		   struct nvkm_iccsense **iccsense)
+{
+	if (!(*iccsense = kzalloc(sizeof(**iccsense), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_iccsense_ctor(device, index, *iccsense);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/gf100.c
new file mode 100644
index 000000000000..cccff1c8a409
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/gf100.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2015 Karol Herbst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Karol Herbst
+ */
+#include "priv.h"
+
+int
+gf100_iccsense_new(struct nvkm_device *device, int index,
+		   struct nvkm_iccsense **piccsense)
+{
+	return nvkm_iccsense_new_(device, index, piccsense);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/priv.h
new file mode 100644
index 000000000000..ed398b81e86e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/priv.h
@@ -0,0 +1,16 @@
+#ifndef __NVKM_ICCSENSE_PRIV_H__
+#define __NVKM_ICCSENSE_PRIV_H__
+#define nvkm_iccsense(p) container_of((p), struct nvkm_iccsense, subdev)
+#include <subdev/iccsense.h>
+
+struct nvkm_iccsense_rail {
+	int (*read)(struct nvkm_iccsense *, struct nvkm_iccsense_rail *);
+	struct i2c_adapter *i2c;
+	u8 addr;
+	u8 rail;
+	u8 mohm;
+};
+
+void nvkm_iccsense_ctor(struct nvkm_device *, int, struct nvkm_iccsense *);
+int nvkm_iccsense_new_(struct nvkm_device *, int, struct nvkm_iccsense **);
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index 4c20fec64d96..6b8f2a19b2d9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -228,6 +228,8 @@ gk20a_instobj_release_dma(struct nvkm_memory *memory)
 	struct gk20a_instmem *imem = node->imem;
 	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
 
+	/* in case we got a write-combined mapping */
+	wmb();
 	nvkm_ltc_invalidate(ltc);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
index f8108df3cb38..932b366598aa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
@@ -2,4 +2,4 @@ nvkm-y += nvkm/subdev/ltc/base.o
 nvkm-y += nvkm/subdev/ltc/gf100.o
 nvkm-y += nvkm/subdev/ltc/gk104.o
 nvkm-y += nvkm/subdev/ltc/gm107.o
-nvkm-y += nvkm/subdev/ltc/gm204.o
+nvkm-y += nvkm/subdev/ltc/gm200.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
index fb0de83da13c..c9eb677967a8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
@@ -129,9 +129,7 @@ gf100_ltc_invalidate(struct nvkm_ltc *ltc)
 	s64 taken;
 
 	nvkm_wr32(device, 0x70004, 0x00000001);
-	taken = nvkm_wait_msec(device, 2, 0x70004, 0x00000003, 0x00000000);
-	if (taken < 0)
-		nvkm_warn(&ltc->subdev, "LTC invalidate timeout\n");
+	taken = nvkm_wait_msec(device, 2000, 0x70004, 0x00000003, 0x00000000);
 
 	if (taken > 0)
 		nvkm_debug(&ltc->subdev, "LTC invalidate took %lld ns\n", taken);
@@ -144,9 +142,7 @@ gf100_ltc_flush(struct nvkm_ltc *ltc)
 	s64 taken;
 
 	nvkm_wr32(device, 0x70010, 0x00000001);
-	taken = nvkm_wait_msec(device, 2, 0x70010, 0x00000003, 0x00000000);
-	if (taken < 0)
-		nvkm_warn(&ltc->subdev, "LTC flush timeout\n");
+	taken = nvkm_wait_msec(device, 2000, 0x70010, 0x00000003, 0x00000000);
 
 	if (taken > 0)
 		nvkm_debug(&ltc->subdev, "LTC flush took %lld ns\n", taken);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
index 2af1f9e100fc..e292f5679418 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
@@ -43,10 +43,8 @@ gm107_ltc_cbc_wait(struct nvkm_ltc *ltc)
 	for (c = 0; c < ltc->ltc_nr; c++) {
 		for (s = 0; s < ltc->lts_nr; s++) {
 			const u32 addr = 0x14046c + (c * 0x2000) + (s * 0x200);
-			nvkm_msec(device, 2000,
-				if (!nvkm_rd32(device, addr))
-					break;
-			);
+			nvkm_wait_msec(device, 2000, addr,
+				       0x00000004, 0x00000000);
 		}
 	}
 }
@@ -75,7 +73,7 @@ gm107_ltc_lts_isr(struct nvkm_ltc *ltc, int c, int s)
 {
 	struct nvkm_subdev *subdev = &ltc->subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 base = 0x140000 + (c * 0x2000) + (s * 0x400);
+	u32 base = 0x140000 + (c * 0x2000) + (s * 0x200);
 	u32 stat = nvkm_rd32(device, base + 0x00c);
 
 	if (stat) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm204.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
index 5ad6fb9d022d..2a29bfd5125a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
@@ -27,7 +27,7 @@
 #include <subdev/timer.h>
 
 static int
-gm204_ltc_oneinit(struct nvkm_ltc *ltc)
+gm200_ltc_oneinit(struct nvkm_ltc *ltc)
 {
 	struct nvkm_device *device = ltc->subdev.device;
 
@@ -37,15 +37,15 @@ gm204_ltc_oneinit(struct nvkm_ltc *ltc)
 	return gf100_ltc_oneinit_tag_ram(ltc);
 }
 static void
-gm204_ltc_init(struct nvkm_ltc *ltc)
+gm200_ltc_init(struct nvkm_ltc *ltc)
 {
 	nvkm_wr32(ltc->subdev.device, 0x17e278, ltc->tag_base);
 }
 
 static const struct nvkm_ltc_func
-gm204_ltc = {
-	.oneinit = gm204_ltc_oneinit,
-	.init = gm204_ltc_init,
+gm200_ltc = {
+	.oneinit = gm200_ltc_oneinit,
+	.init = gm200_ltc_init,
 	.intr = gm107_ltc_intr, /*XXX: not validated */
 	.cbc_clear = gm107_ltc_cbc_clear,
 	.cbc_wait = gm107_ltc_cbc_wait,
@@ -57,7 +57,7 @@ gm204_ltc = {
 };
 
 int
-gm204_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc)
+gm200_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc)
 {
-	return nvkm_ltc_new_(&gm204_ltc, device, index, pltc);
+	return nvkm_ltc_new_(&gm200_ltc, device, index, pltc);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h
index 770294457274..e2faccffee6f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h
@@ -24,8 +24,8 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 /* 0x0058: proc_list_head */
 	0x54534f48,
-	0x00000507,
-	0x000004a4,
+	0x0000050a,
+	0x000004a7,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -46,8 +46,8 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x584d454d,
-	0x00000753,
-	0x00000745,
+	0x00000756,
+	0x00000748,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -68,8 +68,8 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x46524550,
-	0x00000757,
-	0x00000755,
+	0x0000075a,
+	0x00000758,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -90,8 +90,8 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x5f433249,
-	0x00000b87,
-	0x00000a2a,
+	0x00000b8a,
+	0x00000a2d,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -112,8 +112,8 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x54534554,
-	0x00000bb0,
-	0x00000b89,
+	0x00000bb3,
+	0x00000b8c,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -134,8 +134,8 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x454c4449,
-	0x00000bbc,
-	0x00000bba,
+	0x00000bbf,
+	0x00000bbd,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -229,26 +229,26 @@ uint32_t gf100_pmu_data[] = {
 /* 0x0370: memx_func_head */
 	0x00000001,
 	0x00000000,
-	0x00000546,
+	0x00000549,
 /* 0x037c: memx_func_next */
 	0x00000002,
 	0x00000000,
-	0x000005d0,
+	0x000005d3,
 	0x00000003,
 	0x00000002,
-	0x0000069a,
+	0x0000069d,
 	0x00040004,
 	0x00000000,
-	0x000006b6,
+	0x000006b9,
 	0x00010005,
 	0x00000000,
-	0x000006d3,
+	0x000006d6,
 	0x00010006,
 	0x00000000,
-	0x00000658,
+	0x0000065b,
 	0x00000007,
 	0x00000000,
-	0x000006de,
+	0x000006e1,
 /* 0x03c4: memx_func_tail */
 /* 0x03c4: memx_ts_start */
 	0x00000000,
@@ -917,887 +917,887 @@ uint32_t gf100_pmu_data[] = {
 };
 
 uint32_t gf100_pmu_code[] = {
-	0x03930ef5,
+	0x03920ef5,
 /* 0x0004: rd32 */
 	0x07a007f1,
 	0xd00604b6,
 	0x04bd000e,
-	0xf001d7f0,
-	0x07f101d3,
-	0x04b607ac,
-	0x000dd006,
-/* 0x0022: rd32_wait */
-	0xd7f104bd,
-	0xd4b607ac,
-	0x00ddcf06,
-	0x7000d4f1,
-	0xf1f21bf4,
-	0xb607a4d7,
-	0xddcf06d4,
-/* 0x003f: wr32 */
-	0xf100f800,
-	0xb607a007,
-	0x0ed00604,
-	0xf104bd00,
-	0xb607a407,
+	0x0001d7f1,
+	0xf101d3f0,
+	0xb607ac07,
 	0x0dd00604,
-	0xf004bd00,
-	0xd5f002d7,
-	0x01d3f0f0,
-	0x07ac07f1,
+/* 0x0023: rd32_wait */
+	0xf104bd00,
+	0xb607acd7,
+	0xddcf06d4,
+	0x00d4f100,
+	0xf21bf470,
+	0x07a4d7f1,
+	0xcf06d4b6,
+	0x00f800dd,
+/* 0x0040: wr32 */
+	0x07a007f1,
+	0xd00604b6,
+	0x04bd000e,
+	0x07a407f1,
 	0xd00604b6,
 	0x04bd000d,
-/* 0x006c: wr32_wait */
-	0x07acd7f1,
-	0xcf06d4b6,
-	0xd4f100dd,
-	0x1bf47000,
-/* 0x007f: nsec */
-	0xf900f8f2,
-	0xf080f990,
-	0x84b62c87,
-	0x0088cf06,
-/* 0x008c: nsec_loop */
-	0xb62c97f0,
-	0x99cf0694,
-	0x0298bb00,
-	0xf4069eb8,
-	0x80fcf11e,
-	0x00f890fc,
-/* 0x00a4: wait */
-	0x80f990f9,
-	0xb62c87f0,
-	0x88cf0684,
-/* 0x00b1: wait_loop */
-	0x02eeb900,
-	0xb90421f4,
-	0xadfd02da,
-	0x06acb804,
-	0xf0150bf4,
+	0x00f2d7f1,
+	0xf101d3f0,
+	0xb607ac07,
+	0x0dd00604,
+/* 0x006b: wr32_wait */
+	0xf104bd00,
+	0xb607acd7,
+	0xddcf06d4,
+	0x00d4f100,
+	0xf21bf470,
+/* 0x007e: nsec */
+	0x90f900f8,
+	0x87f080f9,
+	0x0684b62c,
+/* 0x008b: nsec_loop */
+	0xf00088cf,
 	0x94b62c97,
 	0x0099cf06,
 	0xb80298bb,
-	0x1ef4069b,
-/* 0x00d5: wait_done */
-	0xfc80fcdf,
-/* 0x00db: intr_watchdog */
-	0x9800f890,
-	0x96b003e9,
-	0x2a0bf400,
-	0xbb9a0a98,
-	0x1cf4029a,
-	0x01d7f00f,
-	0x02d221f5,
-	0x0ef494bd,
-/* 0x00f9: intr_watchdog_next_time */
-	0x9b0a9815,
-	0xf400a6b0,
-	0x9ab8090b,
-	0x061cf406,
-/* 0x0108: intr_watchdog_next_time_set */
-/* 0x010b: intr_watchdog_next_proc */
-	0x809b0980,
-	0xe0b603e9,
-	0x68e6b158,
-	0xc61bf402,
-/* 0x011a: intr */
-	0x00f900f8,
-	0x80f904bd,
-	0xa0f990f9,
-	0xc0f9b0f9,
-	0xe0f9d0f9,
-	0xf7f0f0f9,
-	0x0188fe00,
-	0x87f180f9,
-	0x84b605d0,
+	0x1ef4069e,
+	0xfc80fcf1,
+/* 0x00a3: wait */
+	0xf900f890,
+	0xf080f990,
+	0x84b62c87,
 	0x0088cf06,
-	0xf10180b6,
-	0xb605d007,
+/* 0x00b0: wait_loop */
+	0xf402eeb9,
+	0xdab90421,
+	0x04adfd02,
+	0xf406acb8,
+	0x97f0150b,
+	0x0694b62c,
+	0xbb0099cf,
+	0x9bb80298,
+	0xdf1ef406,
+/* 0x00d4: wait_done */
+	0x90fc80fc,
+/* 0x00da: intr_watchdog */
+	0xe99800f8,
+	0x0096b003,
+	0x982a0bf4,
+	0x9abb9a0a,
+	0x0f1cf402,
+	0xf501d7f0,
+	0xbd02d121,
+	0x150ef494,
+/* 0x00f8: intr_watchdog_next_time */
+	0xb09b0a98,
+	0x0bf400a6,
+	0x069ab809,
+/* 0x0107: intr_watchdog_next_time_set */
+	0x80061cf4,
+/* 0x010a: intr_watchdog_next_proc */
+	0xe9809b09,
+	0x58e0b603,
+	0x0268e6b1,
+	0xf8c61bf4,
+/* 0x0119: intr */
+	0xbd00f900,
+	0xf980f904,
+	0xf9a0f990,
+	0xf9c0f9b0,
+	0xf9e0f9d0,
+	0x00f7f0f0,
+	0xf90188fe,
+	0xd087f180,
+	0x0684b605,
+	0xb60088cf,
+	0x07f10180,
+	0x04b605d0,
+	0x0008d006,
+	0x87f004bd,
+	0x0684b608,
+	0xc40088cf,
+	0x0bf40289,
+	0x9b008023,
+	0xf458e7f0,
+	0x0998da21,
+	0x0096b09b,
+	0xf0110bf4,
+	0x04b63407,
+	0x0009d006,
+	0x098004bd,
+/* 0x017d: intr_skip_watchdog */
+	0x0089e49a,
+	0x480bf408,
+	0x068897f1,
+	0xcf0694b6,
+	0x9ac40099,
+	0x2c0bf402,
+	0x04c0c7f1,
+	0xcf06c4b6,
+	0xc0f900cc,
+	0x4f48e7f1,
+	0x5453e3f1,
+	0xf500d7f0,
+	0xfc033621,
+	0xc007f1c0,
+	0x0604b604,
+	0xbd000cd0,
+/* 0x01bd: intr_subintr_skip_fifo */
+	0x8807f104,
+	0x0604b606,
+	0xbd0009d0,
+/* 0x01c9: intr_skip_subintr */
+	0xe097f104,
+	0xfd90bd00,
+	0x07f00489,
+	0x0604b604,
+	0xbd0008d0,
+	0xfe80fc04,
+	0xf0fc0088,
+	0xd0fce0fc,
+	0xb0fcc0fc,
+	0x90fca0fc,
+	0x00fc80fc,
+	0xf80032f4,
+/* 0x01f9: ticks_from_ns */
+	0xf9c0f901,
+	0xcbd7f1b0,
+	0x00d3f000,
+	0x040b21f5,
+	0x03e8ccec,
+	0xf400b4b0,
+	0xeeec120b,
+	0xd7f103e8,
+	0xd3f000cb,
+	0x0b21f500,
+/* 0x0221: ticks_from_ns_quit */
+	0x02ceb904,
+	0xc0fcb0fc,
+/* 0x022a: ticks_from_us */
+	0xc0f900f8,
+	0xd7f1b0f9,
+	0xd3f000cb,
+	0x0b21f500,
+	0x02ceb904,
+	0xf400b4b0,
+	0xe4bd050b,
+/* 0x0244: ticks_from_us_quit */
+	0xc0fcb0fc,
+/* 0x024a: ticks_to_us */
+	0xd7f100f8,
+	0xd3f000cb,
+	0xecedff00,
+/* 0x0256: timer */
+	0x90f900f8,
+	0x32f480f9,
+	0x03f89810,
+	0xf40086b0,
+	0x84bd651c,
+	0xb63807f0,
 	0x08d00604,
 	0xf004bd00,
-	0x84b60887,
+	0x84b63487,
 	0x0088cf06,
-	0xf40289c4,
-	0x0080230b,
-	0x58e7f09b,
-	0x98db21f4,
-	0x96b09b09,
-	0x110bf400,
+	0xbb9a0998,
+	0xe9bb0298,
+	0x03fe8000,
+	0xb60887f0,
+	0x88cf0684,
+	0x0284f000,
+	0xf0261bf4,
+	0x84b63487,
+	0x0088cf06,
+	0xf406e0b8,
+	0xe8b8090b,
+	0x111cf406,
+/* 0x02ac: timer_reset */
 	0xb63407f0,
-	0x09d00604,
+	0x0ed00604,
 	0x8004bd00,
-/* 0x017e: intr_skip_watchdog */
-	0x89e49a09,
-	0x0bf40800,
-	0x8897f148,
-	0x0694b606,
-	0xc40099cf,
-	0x0bf4029a,
-	0xc0c7f12c,
-	0x06c4b604,
-	0xf900cccf,
-	0x48e7f1c0,
-	0x53e3f14f,
-	0x00d7f054,
-	0x033721f5,
-	0x07f1c0fc,
-	0x04b604c0,
-	0x000cd006,
-/* 0x01be: intr_subintr_skip_fifo */
-	0x07f104bd,
-	0x04b60688,
-	0x0009d006,
-/* 0x01ca: intr_skip_subintr */
-	0x97f104bd,
-	0x90bd00e0,
-	0xf00489fd,
-	0x04b60407,
-	0x0008d006,
-	0x80fc04bd,
-	0xfc0088fe,
-	0xfce0fcf0,
-	0xfcc0fcd0,
-	0xfca0fcb0,
-	0xfc80fc90,
-	0x0032f400,
-/* 0x01fa: ticks_from_ns */
-	0xc0f901f8,
-	0xd7f1b0f9,
-	0xd3f000cb,
-	0x0821f500,
-	0xe8ccec04,
-	0x00b4b003,
-	0xec120bf4,
-	0xf103e8ee,
-	0xf000cbd7,
-	0x21f500d3,
-/* 0x0222: ticks_from_ns_quit */
-	0xceb90408,
-	0xfcb0fc02,
-/* 0x022b: ticks_from_us */
-	0xf900f8c0,
-	0xf1b0f9c0,
-	0xf000cbd7,
-	0x21f500d3,
-	0xceb90408,
-	0x00b4b002,
-	0xbd050bf4,
-/* 0x0245: ticks_from_us_quit */
-	0xfcb0fce4,
-/* 0x024b: ticks_to_us */
-	0xf100f8c0,
-	0xf000cbd7,
-	0xedff00d3,
-/* 0x0257: timer */
-	0xf900f8ec,
-	0xf480f990,
-	0xf8981032,
-	0x0086b003,
-	0xbd651cf4,
-	0x3807f084,
+/* 0x02ba: timer_enable */
+	0x87f09a0e,
+	0x3807f001,
 	0xd00604b6,
 	0x04bd0008,
-	0xb63487f0,
-	0x88cf0684,
-	0x9a099800,
-	0xbb0298bb,
-	0xfe8000e9,
-	0x0887f003,
-	0xcf0684b6,
-	0x84f00088,
-	0x261bf402,
-	0xb63487f0,
-	0x88cf0684,
-	0x06e0b800,
-	0xb8090bf4,
-	0x1cf406e8,
-/* 0x02ad: timer_reset */
-	0x3407f011,
-	0xd00604b6,
-	0x04bd000e,
-/* 0x02bb: timer_enable */
-	0xf09a0e80,
-	0x07f00187,
-	0x0604b638,
-	0xbd0008d0,
-/* 0x02c9: timer_done */
-	0x1031f404,
+/* 0x02c8: timer_done */
+	0xfc1031f4,
+	0xf890fc80,
+/* 0x02d1: send_proc */
+	0xf980f900,
+	0x05e89890,
+	0xf004e998,
+	0x89b80486,
+	0x2a0bf406,
+	0x940398c4,
+	0x80b60488,
+	0x008ebb18,
+	0x8000fa98,
+	0x8d80008a,
+	0x028c8001,
+	0xb6038b80,
+	0x94f00190,
+	0x04e98007,
+/* 0x030b: send_done */
+	0xfc0231f4,
+	0xf880fc90,
+/* 0x0311: find */
+	0xf080f900,
+	0x31f45887,
+/* 0x0319: find_loop */
+	0x008a9801,
+	0xf406aeb8,
+	0x80b6100b,
+	0x6886b158,
+	0xf01bf402,
+/* 0x032f: find_done */
+	0xb90132f4,
+	0x80fc028e,
+/* 0x0336: send */
+	0x21f500f8,
+	0x01f40311,
+/* 0x033f: recv */
+	0xf900f897,
+	0x9880f990,
+	0xe99805e8,
+	0x0132f404,
+	0xf40689b8,
+	0x89c43d0b,
+	0x0180b603,
+	0x800784f0,
+	0xea9805e8,
+	0xfef0f902,
+	0xf0f9018f,
+	0x9402efb9,
+	0xe9bb0499,
+	0x18e0b600,
+	0x9803eb98,
+	0xed9802ec,
+	0x00ee9801,
+	0xf0fca5f9,
+	0xf400f8fe,
+	0xf0fc0131,
+/* 0x038c: recv_done */
 	0x90fc80fc,
-/* 0x02d2: send_proc */
-	0x80f900f8,
-	0xe89890f9,
-	0x04e99805,
-	0xb80486f0,
-	0x0bf40689,
-	0x0398c42a,
-	0xb6048894,
-	0x8ebb1880,
-	0x00fa9800,
-	0x80008a80,
-	0x8c80018d,
-	0x038b8002,
-	0xf00190b6,
-	0xe9800794,
-	0x0231f404,
-/* 0x030c: send_done */
-	0x80fc90fc,
-/* 0x0312: find */
-	0x80f900f8,
-	0xf45887f0,
-/* 0x031a: find_loop */
-	0x8a980131,
-	0x06aeb800,
-	0xb6100bf4,
-	0x86b15880,
-	0x1bf40268,
-	0x0132f4f0,
-/* 0x0330: find_done */
-	0xfc028eb9,
-/* 0x0337: send */
-	0xf500f880,
-	0xf4031221,
-	0x00f89701,
-/* 0x0340: recv */
-	0x80f990f9,
-	0x9805e898,
-	0x32f404e9,
-	0x0689b801,
-	0xc43d0bf4,
-	0x80b60389,
-	0x0784f001,
-	0x9805e880,
-	0xf0f902ea,
-	0xf9018ffe,
-	0x02efb9f0,
-	0xbb049994,
-	0xe0b600e9,
-	0x03eb9818,
-	0x9802ec98,
-	0xee9801ed,
-	0xfca5f900,
-	0x00f8fef0,
-	0xfc0131f4,
-/* 0x038d: recv_done */
-	0xfc80fcf0,
-/* 0x0393: init */
-	0xf100f890,
-	0xb6010817,
-	0x11cf0614,
-	0x0911e700,
-	0x0814b601,
-	0xf10014fe,
-	0xf000e017,
-	0x07f00013,
-	0x0604b61c,
-	0xbd0001d0,
-	0xff17f004,
-	0xb61407f0,
-	0x01d00604,
-	0xf004bd00,
-	0x15f10217,
-	0x07f00800,
-	0x0604b610,
-	0xbd0001d0,
-	0x1a17f104,
-	0x0013f001,
-	0xf40010fe,
-	0x17f01031,
-	0x3807f001,
+/* 0x0392: init */
+	0x17f100f8,
+	0x14b60108,
+	0x0011cf06,
+	0x010911e7,
+	0xfe0814b6,
+	0x17f10014,
+	0x13f000e0,
+	0x1c07f000,
+	0xd00604b6,
+	0x04bd0001,
+	0xf0ff17f0,
+	0x04b61407,
+	0x0001d006,
+	0x17f004bd,
+	0x0015f102,
+	0x1007f008,
 	0xd00604b6,
 	0x04bd0001,
-/* 0x03f7: init_proc */
-	0x9858f7f0,
-	0x16b001f1,
-	0xfa0bf400,
-	0xf0b615f9,
-	0xf20ef458,
-/* 0x0408: mulu32_32_64 */
-	0x20f910f9,
-	0x40f930f9,
-	0x9510e195,
-	0xc4bd10d2,
-	0xedffb4bd,
-	0x301dffc0,
-	0xf10234b9,
-	0xb6ffff34,
-	0x45b61034,
-	0x00c3bb10,
-	0xff01b4bb,
-	0x34b930e2,
-	0xff34f102,
-	0x1034b6ff,
-	0xbb1045b6,
-	0xb4bb00c3,
-	0x3012ff01,
-	0xfc00b3bb,
-	0xfc30fc40,
-	0xf810fc20,
-/* 0x0459: host_send */
-	0xb017f100,
+	0x011917f1,
+	0xf10013f0,
+	0xfeffff14,
+	0x31f40010,
+	0x0117f010,
+	0xb63807f0,
+	0x01d00604,
+	0xf004bd00,
+/* 0x03fa: init_proc */
+	0xf19858f7,
+	0x0016b001,
+	0xf9fa0bf4,
+	0x58f0b615,
+/* 0x040b: mulu32_32_64 */
+	0xf9f20ef4,
+	0xf920f910,
+	0x9540f930,
+	0xd29510e1,
+	0xbdc4bd10,
+	0xc0edffb4,
+	0xb9301dff,
+	0x34f10234,
+	0x34b6ffff,
+	0x1045b610,
+	0xbb00c3bb,
+	0xe2ff01b4,
+	0x0234b930,
+	0xffff34f1,
+	0xb61034b6,
+	0xc3bb1045,
+	0x01b4bb00,
+	0xbb3012ff,
+	0x40fc00b3,
+	0x20fc30fc,
+	0x00f810fc,
+/* 0x045c: host_send */
+	0x04b017f1,
+	0xcf0614b6,
+	0x27f10011,
+	0x24b604a0,
+	0x0022cf06,
+	0xf40612b8,
+	0x1ec4320b,
+	0x04ee9407,
+	0x0270e0b7,
+	0x9803eb98,
+	0xed9802ec,
+	0x00ee9801,
+	0x033621f5,
+	0xc40110b6,
+	0x07f10f1e,
+	0x04b604b0,
+	0x000ed006,
+	0x0ef404bd,
+/* 0x04a5: host_send_done */
+/* 0x04a7: host_recv */
+	0xf100f8ba,
+	0xf14e4917,
+	0xb8525413,
+	0x0bf406e1,
+/* 0x04b5: host_recv_wait */
+	0xcc17f1aa,
 	0x0614b604,
 	0xf10011cf,
-	0xb604a027,
+	0xb604c827,
 	0x22cf0624,
-	0x0612b800,
-	0xc4320bf4,
-	0xee94071e,
-	0x70e0b704,
-	0x03eb9802,
-	0x9802ec98,
-	0xee9801ed,
-	0x3721f500,
-	0x0110b603,
-	0xf10f1ec4,
-	0xb604b007,
-	0x0ed00604,
-	0xf404bd00,
-/* 0x04a2: host_send_done */
-	0x00f8ba0e,
-/* 0x04a4: host_recv */
-	0x4e4917f1,
-	0x525413f1,
-	0xf406e1b8,
-/* 0x04b2: host_recv_wait */
-	0x17f1aa0b,
-	0x14b604cc,
-	0x0011cf06,
-	0x04c827f1,
-	0xcf0624b6,
-	0x16f00022,
-	0x0612b808,
-	0xc4e60bf4,
-	0x34b60723,
-	0xf030b704,
-	0x033b8002,
-	0x80023c80,
-	0x3e80013d,
-	0x0120b600,
-	0xf10f24f0,
-	0xb604c807,
-	0x02d00604,
-	0xf004bd00,
-	0x07f04027,
-	0x0604b600,
-	0xbd0002d0,
-/* 0x0507: host_init */
-	0xf100f804,
-	0xb6008017,
-	0x15f11014,
-	0x07f10270,
-	0x04b604d0,
-	0x0001d006,
-	0x17f104bd,
+	0x0816f000,
+	0xf40612b8,
+	0x23c4e60b,
+	0x0434b607,
+	0x02f030b7,
+	0x80033b80,
+	0x3d80023c,
+	0x003e8001,
+	0xf00120b6,
+	0x07f10f24,
+	0x04b604c8,
+	0x0002d006,
+	0x27f004bd,
+	0x0007f040,
+	0xd00604b6,
+	0x04bd0002,
+/* 0x050a: host_init */
+	0x17f100f8,
 	0x14b60080,
-	0xf015f110,
-	0xdc07f102,
+	0x7015f110,
+	0xd007f102,
 	0x0604b604,
 	0xbd0001d0,
-	0x0117f004,
-	0x04c407f1,
+	0x8017f104,
+	0x1014b600,
+	0x02f015f1,
+	0x04dc07f1,
 	0xd00604b6,
 	0x04bd0001,
-/* 0x0546: memx_func_enter */
-	0x67f100f8,
-	0x77f11620,
-	0x73f1f55d,
-	0x6eb9ffff,
-	0x0421f402,
-	0xfd02d8b9,
-	0x60f90487,
-	0xd0fc80f9,
-	0x21f4e0fc,
-	0xfe77f13f,
-	0xff73f1ff,
+	0xf10117f0,
+	0xb604c407,
+	0x01d00604,
+	0xf804bd00,
+/* 0x0549: memx_func_enter */
+	0x2067f100,
+	0x5d77f116,
+	0xff73f1f5,
 	0x026eb9ff,
 	0xb90421f4,
 	0x87fd02d8,
 	0xf960f904,
 	0xfcd0fc80,
-	0x3f21f4e0,
-	0x26f067f1,
+	0x4021f4e0,
+	0xfffe77f1,
+	0xffff73f1,
 	0xf4026eb9,
 	0xd8b90421,
 	0x0487fd02,
 	0x80f960f9,
 	0xe0fcd0fc,
-	0xf03f21f4,
+	0xf14021f4,
+	0xb926f067,
+	0x21f4026e,
+	0x02d8b904,
+	0xf90487fd,
+	0xfc80f960,
+	0xf4e0fcd0,
+	0x67f04021,
+	0xe007f104,
+	0x0604b607,
+	0xbd0006d0,
+/* 0x05b5: memx_func_enter_wait */
+	0xc067f104,
+	0x0664b607,
+	0xf00066cf,
+	0x0bf40464,
+	0x2c67f0f3,
+	0xcf0664b6,
+	0x06800066,
+/* 0x05d3: memx_func_leave */
+	0xf000f8f1,
+	0x64b62c67,
+	0x0066cf06,
+	0xf0f20680,
 	0x07f10467,
-	0x04b607e0,
+	0x04b607e4,
 	0x0006d006,
-/* 0x05b2: memx_func_enter_wait */
+/* 0x05ee: memx_func_leave_wait */
 	0x67f104bd,
 	0x64b607c0,
 	0x0066cf06,
 	0xf40464f0,
-	0x67f0f30b,
-	0x0664b62c,
-	0x800066cf,
-	0x00f8f106,
-/* 0x05d0: memx_func_leave */
-	0xb62c67f0,
-	0x66cf0664,
-	0xf2068000,
-	0xf10467f0,
-	0xb607e407,
-	0x06d00604,
-/* 0x05eb: memx_func_leave_wait */
-	0xf104bd00,
-	0xb607c067,
-	0x66cf0664,
-	0x0464f000,
-	0xf1f31bf4,
-	0xf126f067,
-	0xf0000177,
+	0x67f1f31b,
+	0x77f126f0,
+	0x73f00001,
+	0x026eb900,
+	0xb90421f4,
+	0x87fd02d8,
+	0xf960f905,
+	0xfcd0fc80,
+	0x4021f4e0,
+	0x162067f1,
+	0xf4026eb9,
+	0xd8b90421,
+	0x0587fd02,
+	0x80f960f9,
+	0xe0fcd0fc,
+	0xf14021f4,
+	0xf00aa277,
 	0x6eb90073,
 	0x0421f402,
 	0xfd02d8b9,
 	0x60f90587,
 	0xd0fc80f9,
 	0x21f4e0fc,
-	0x2067f13f,
-	0x026eb916,
-	0xb90421f4,
-	0x87fd02d8,
-	0xf960f905,
-	0xfcd0fc80,
-	0x3f21f4e0,
-	0x0aa277f1,
-	0xb90073f0,
-	0x21f4026e,
-	0x02d8b904,
-	0xf90587fd,
-	0xfc80f960,
-	0xf4e0fcd0,
-	0x00f83f21,
-/* 0x0658: memx_func_wait_vblank */
-	0xb0001698,
-	0x0bf40066,
-	0x0166b013,
-	0xf4060bf4,
-/* 0x066a: memx_func_wait_vblank_head1 */
-	0x77f12e0e,
-	0x0ef40020,
-/* 0x0671: memx_func_wait_vblank_head0 */
-	0x0877f107,
-/* 0x0675: memx_func_wait_vblank_0 */
-	0xc467f100,
-	0x0664b607,
-	0xfd0066cf,
-	0x1bf40467,
-/* 0x0685: memx_func_wait_vblank_1 */
-	0xc467f1f3,
-	0x0664b607,
-	0xfd0066cf,
-	0x0bf40467,
-/* 0x0695: memx_func_wait_vblank_fini */
-	0x0410b6f3,
-/* 0x069a: memx_func_wr32 */
-	0x169800f8,
-	0x01159800,
-	0xf90810b6,
-	0xfc50f960,
-	0xf4e0fcd0,
-	0x42b63f21,
-	0xe91bf402,
-/* 0x06b6: memx_func_wait */
-	0x87f000f8,
-	0x0684b62c,
-	0x980088cf,
-	0x1d98001e,
-	0x021c9801,
-	0xb6031b98,
-	0x21f41010,
-/* 0x06d3: memx_func_delay */
-	0x9800f8a4,
-	0x10b6001e,
-	0x7f21f404,
-/* 0x06de: memx_func_train */
-	0x00f800f8,
-/* 0x06e0: memx_exec */
-	0xd0f9e0f9,
-	0xb902c1b9,
-/* 0x06ea: memx_exec_next */
-	0x139802b2,
+/* 0x065b: memx_func_wait_vblank */
+	0x9800f840,
+	0x66b00016,
+	0x130bf400,
+	0xf40166b0,
+	0x0ef4060b,
+/* 0x066d: memx_func_wait_vblank_head1 */
+	0x2077f12e,
+	0x070ef400,
+/* 0x0674: memx_func_wait_vblank_head0 */
+	0x000877f1,
+/* 0x0678: memx_func_wait_vblank_0 */
+	0x07c467f1,
+	0xcf0664b6,
+	0x67fd0066,
+	0xf31bf404,
+/* 0x0688: memx_func_wait_vblank_1 */
+	0x07c467f1,
+	0xcf0664b6,
+	0x67fd0066,
+	0xf30bf404,
+/* 0x0698: memx_func_wait_vblank_fini */
+	0xf80410b6,
+/* 0x069d: memx_func_wr32 */
+	0x00169800,
+	0xb6011598,
+	0x60f90810,
+	0xd0fc50f9,
+	0x21f4e0fc,
+	0x0242b640,
+	0xf8e91bf4,
+/* 0x06b9: memx_func_wait */
+	0x2c87f000,
+	0xcf0684b6,
+	0x1e980088,
+	0x011d9800,
+	0x98021c98,
+	0x10b6031b,
+	0xa321f410,
+/* 0x06d6: memx_func_delay */
+	0x1e9800f8,
 	0x0410b600,
-	0x01f034e7,
-	0x01e033e7,
-	0xf00132b6,
-	0x35980c30,
-	0xb855f9de,
-	0x1ef40612,
-	0xf10b98e4,
-	0xbbf20c98,
-	0xb7f102cb,
-	0xb4b607c4,
-	0x00bbcf06,
-	0xe0fcd0fc,
-	0x033721f5,
-/* 0x0726: memx_info */
-	0xc67000f8,
-	0x0e0bf401,
-/* 0x072c: memx_info_data */
-	0x03ccc7f1,
-	0x0800b7f1,
-/* 0x0737: memx_info_train */
-	0xf10b0ef4,
-	0xf10bccc7,
-/* 0x073f: memx_info_send */
-	0xf50100b7,
-	0xf8033721,
-/* 0x0745: memx_recv */
-	0x01d6b000,
-	0xb0980bf4,
-	0x0bf400d6,
-/* 0x0753: memx_init */
-	0xf800f8d8,
-/* 0x0755: perf_recv */
-/* 0x0757: perf_init */
-	0xf800f800,
-/* 0x0759: i2c_drive_scl */
-	0x0036b000,
-	0xf1110bf4,
-	0xb607e007,
-	0x01d00604,
-	0xf804bd00,
-/* 0x076d: i2c_drive_scl_lo */
-	0xe407f100,
-	0x0604b607,
-	0xbd0001d0,
-/* 0x077b: i2c_drive_sda */
-	0xb000f804,
-	0x0bf40036,
-	0xe007f111,
-	0x0604b607,
-	0xbd0002d0,
-/* 0x078f: i2c_drive_sda_lo */
-	0xf100f804,
-	0xb607e407,
-	0x02d00604,
-	0xf804bd00,
-/* 0x079d: i2c_sense_scl */
-	0x0132f400,
-	0x07c437f1,
-	0xcf0634b6,
-	0x31fd0033,
-	0x060bf404,
-/* 0x07b3: i2c_sense_scl_done */
-	0xf80131f4,
-/* 0x07b5: i2c_sense_sda */
-	0x0132f400,
-	0x07c437f1,
-	0xcf0634b6,
-	0x32fd0033,
-	0x060bf404,
-/* 0x07cb: i2c_sense_sda_done */
-	0xf80131f4,
-/* 0x07cd: i2c_raise_scl */
-	0xf140f900,
-	0xf0089847,
-	0x21f50137,
-/* 0x07da: i2c_raise_scl_wait */
-	0xe7f10759,
-	0x21f403e8,
-	0x9d21f57f,
-	0x0901f407,
-	0xf40142b6,
-/* 0x07ee: i2c_raise_scl_done */
-	0x40fcef1b,
-/* 0x07f2: i2c_start */
-	0x21f500f8,
-	0x11f4079d,
-	0xb521f50d,
-	0x0611f407,
-/* 0x0803: i2c_start_rep */
-	0xf0300ef4,
-	0x21f50037,
-	0x37f00759,
-	0x7b21f501,
-	0x0076bb07,
-	0xf90465b6,
-	0x04659450,
-	0xbd0256bb,
-	0x0475fd50,
-	0x21f550fc,
-	0x64b607cd,
-	0x1f11f404,
-/* 0x0830: i2c_start_send */
-	0xf50037f0,
-	0xf1077b21,
-	0xf41388e7,
-	0x37f07f21,
-	0x5921f500,
-	0x88e7f107,
-	0x7f21f413,
-/* 0x084c: i2c_start_out */
-/* 0x084e: i2c_stop */
-	0x37f000f8,
-	0x5921f500,
-	0x0037f007,
-	0x077b21f5,
-	0x03e8e7f1,
-	0xf07f21f4,
-	0x21f50137,
-	0xe7f10759,
-	0x21f41388,
-	0x0137f07f,
-	0x077b21f5,
-	0x1388e7f1,
-	0xf87f21f4,
-/* 0x0881: i2c_bitw */
-	0x7b21f500,
-	0xe8e7f107,
-	0x7f21f403,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0xcd21f550,
-	0x0464b607,
-	0xf11811f4,
-	0xf41388e7,
-	0x37f07f21,
-	0x5921f500,
-	0x88e7f107,
-	0x7f21f413,
-/* 0x08c0: i2c_bitw_out */
-/* 0x08c2: i2c_bitr */
-	0x37f000f8,
-	0x7b21f501,
+	0xf87e21f4,
+/* 0x06e1: memx_func_train */
+/* 0x06e3: memx_exec */
+	0xf900f800,
+	0xb9d0f9e0,
+	0xb2b902c1,
+/* 0x06ed: memx_exec_next */
+	0x00139802,
+	0xe70410b6,
+	0xe701f034,
+	0xb601e033,
+	0x30f00132,
+	0xde35980c,
+	0x12b855f9,
+	0xe41ef406,
+	0x98f10b98,
+	0xcbbbf20c,
+	0xc4b7f102,
+	0x06b4b607,
+	0xfc00bbcf,
+	0xf5e0fcd0,
+	0xf8033621,
+/* 0x0729: memx_info */
+	0x01c67000,
+/* 0x072f: memx_info_data */
+	0xf10e0bf4,
+	0xf103ccc7,
+	0xf40800b7,
+/* 0x073a: memx_info_train */
+	0xc7f10b0e,
+	0xb7f10bcc,
+/* 0x0742: memx_info_send */
+	0x21f50100,
+	0x00f80336,
+/* 0x0748: memx_recv */
+	0xf401d6b0,
+	0xd6b0980b,
+	0xd80bf400,
+/* 0x0756: memx_init */
+	0x00f800f8,
+/* 0x0758: perf_recv */
+/* 0x075a: perf_init */
+	0x00f800f8,
+/* 0x075c: i2c_drive_scl */
+	0xf40036b0,
+	0x07f1110b,
+	0x04b607e0,
+	0x0001d006,
+	0x00f804bd,
+/* 0x0770: i2c_drive_scl_lo */
+	0x07e407f1,
+	0xd00604b6,
+	0x04bd0001,
+/* 0x077e: i2c_drive_sda */
+	0x36b000f8,
+	0x110bf400,
+	0x07e007f1,
+	0xd00604b6,
+	0x04bd0002,
+/* 0x0792: i2c_drive_sda_lo */
+	0x07f100f8,
+	0x04b607e4,
+	0x0002d006,
+	0x00f804bd,
+/* 0x07a0: i2c_sense_scl */
+	0xf10132f4,
+	0xb607c437,
+	0x33cf0634,
+	0x0431fd00,
+	0xf4060bf4,
+/* 0x07b6: i2c_sense_scl_done */
+	0x00f80131,
+/* 0x07b8: i2c_sense_sda */
+	0xf10132f4,
+	0xb607c437,
+	0x33cf0634,
+	0x0432fd00,
+	0xf4060bf4,
+/* 0x07ce: i2c_sense_sda_done */
+	0x00f80131,
+/* 0x07d0: i2c_raise_scl */
+	0x47f140f9,
+	0x37f00898,
+	0x5c21f501,
+/* 0x07dd: i2c_raise_scl_wait */
 	0xe8e7f107,
-	0x7f21f403,
+	0x7e21f403,
+	0x07a021f5,
+	0xb60901f4,
+	0x1bf40142,
+/* 0x07f1: i2c_raise_scl_done */
+	0xf840fcef,
+/* 0x07f5: i2c_start */
+	0xa021f500,
+	0x0d11f407,
+	0x07b821f5,
+	0xf40611f4,
+/* 0x0806: i2c_start_rep */
+	0x37f0300e,
+	0x5c21f500,
+	0x0137f007,
+	0x077e21f5,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0xcd21f550,
+	0xd021f550,
 	0x0464b607,
-	0xf51b11f4,
-	0xf007b521,
+/* 0x0833: i2c_start_send */
+	0xf01f11f4,
 	0x21f50037,
-	0xe7f10759,
+	0xe7f1077e,
 	0x21f41388,
-	0x013cf07f,
-/* 0x0907: i2c_bitr_done */
-	0xf80131f4,
-/* 0x0909: i2c_get_byte */
-	0x0057f000,
-/* 0x090f: i2c_get_byte_next */
-	0xb60847f0,
-	0x76bb0154,
-	0x0465b600,
-	0x659450f9,
-	0x0256bb04,
-	0x75fd50bd,
-	0xf550fc04,
-	0xb608c221,
-	0x11f40464,
-	0x0553fd2b,
-	0xf40142b6,
-	0x37f0d81b,
+	0x0037f07e,
+	0x075c21f5,
+	0x1388e7f1,
+/* 0x084f: i2c_start_out */
+	0xf87e21f4,
+/* 0x0851: i2c_stop */
+	0x0037f000,
+	0x075c21f5,
+	0xf50037f0,
+	0xf1077e21,
+	0xf403e8e7,
+	0x37f07e21,
+	0x5c21f501,
+	0x88e7f107,
+	0x7e21f413,
+	0xf50137f0,
+	0xf1077e21,
+	0xf41388e7,
+	0x00f87e21,
+/* 0x0884: i2c_bitw */
+	0x077e21f5,
+	0x03e8e7f1,
+	0xbb7e21f4,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x07d021f5,
+	0xf40464b6,
+	0xe7f11811,
+	0x21f41388,
+	0x0037f07e,
+	0x075c21f5,
+	0x1388e7f1,
+/* 0x08c3: i2c_bitw_out */
+	0xf87e21f4,
+/* 0x08c5: i2c_bitr */
+	0x0137f000,
+	0x077e21f5,
+	0x03e8e7f1,
+	0xbb7e21f4,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x07d021f5,
+	0xf40464b6,
+	0x21f51b11,
+	0x37f007b8,
+	0x5c21f500,
+	0x88e7f107,
+	0x7e21f413,
+	0xf4013cf0,
+/* 0x090a: i2c_bitr_done */
+	0x00f80131,
+/* 0x090c: i2c_get_byte */
+	0xf00057f0,
+/* 0x0912: i2c_get_byte_next */
+	0x54b60847,
 	0x0076bb01,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b60881,
-/* 0x0959: i2c_get_byte_done */
-/* 0x095b: i2c_put_byte */
-	0xf000f804,
-/* 0x095e: i2c_put_byte_next */
-	0x42b60847,
-	0x3854ff01,
+	0x64b608c5,
+	0x2b11f404,
+	0xb60553fd,
+	0x1bf40142,
+	0x0137f0d8,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0x8421f550,
+	0x0464b608,
+/* 0x095c: i2c_get_byte_done */
+/* 0x095e: i2c_put_byte */
+	0x47f000f8,
+/* 0x0961: i2c_put_byte_next */
+	0x0142b608,
+	0xbb3854ff,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x088421f5,
+	0xf40464b6,
+	0x46b03411,
+	0xd81bf400,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x8121f550,
+	0xc521f550,
 	0x0464b608,
-	0xb03411f4,
-	0x1bf40046,
-	0x0076bbd8,
+	0xbb0f11f4,
+	0x36b00076,
+	0x061bf401,
+/* 0x09b7: i2c_put_byte_done */
+	0xf80132f4,
+/* 0x09b9: i2c_addr */
+	0x0076bb00,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b608c2,
-	0x0f11f404,
-	0xb00076bb,
-	0x1bf40136,
-	0x0132f406,
-/* 0x09b4: i2c_put_byte_done */
-/* 0x09b6: i2c_addr */
-	0x76bb00f8,
+	0x64b607f5,
+	0x2911f404,
+	0x012ec3e7,
+	0xfd0134b6,
+	0x76bb0553,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb607f221,
-	0x11f40464,
-	0x2ec3e729,
-	0x0134b601,
-	0xbb0553fd,
+	0xb6095e21,
+/* 0x09fe: i2c_addr_done */
+	0x00f80464,
+/* 0x0a00: i2c_acquire_addr */
+	0xb6f8cec7,
+	0xe0b702e4,
+	0xee980d1c,
+/* 0x0a0f: i2c_acquire */
+	0xf500f800,
+	0xf40a0021,
+	0xd9f00421,
+	0x4021f403,
+/* 0x0a1e: i2c_release */
+	0x21f500f8,
+	0x21f40a00,
+	0x03daf004,
+	0xf84021f4,
+/* 0x0a2d: i2c_recv */
+	0x0132f400,
+	0xb6f8c1c7,
+	0x16b00214,
+	0x3a1ff528,
+	0xf413a001,
+	0x0032980c,
+	0x0ccc13a0,
+	0xf4003198,
+	0xd0f90231,
+	0xd0f9e0f9,
+	0x000067f1,
+	0x100063f1,
+	0xbb016792,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x095b21f5,
-/* 0x09fb: i2c_addr_done */
-	0xf80464b6,
-/* 0x09fd: i2c_acquire_addr */
-	0xf8cec700,
-	0xb702e4b6,
-	0x980d1ce0,
-	0x00f800ee,
-/* 0x0a0c: i2c_acquire */
-	0x09fd21f5,
-	0xf00421f4,
-	0x21f403d9,
-/* 0x0a1b: i2c_release */
-	0xf500f83f,
-	0xf409fd21,
-	0xdaf00421,
-	0x3f21f403,
-/* 0x0a2a: i2c_recv */
-	0x32f400f8,
-	0xf8c1c701,
-	0xb00214b6,
-	0x1ff52816,
-	0x13a0013a,
-	0x32980cf4,
-	0xcc13a000,
-	0x0031980c,
-	0xf90231f4,
-	0xf9e0f9d0,
-	0x0067f1d0,
-	0x0063f100,
-	0x01679210,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x0c21f550,
-	0x0464b60a,
-	0xd6b0d0fc,
-	0xb31bf500,
-	0x0057f000,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0xb621f550,
-	0x0464b609,
-	0x00d011f5,
-	0xbbe0c5c7,
+	0x0a0f21f5,
+	0xfc0464b6,
+	0x00d6b0d0,
+	0x00b31bf5,
+	0xbb0057f0,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x095b21f5,
+	0x09b921f5,
 	0xf50464b6,
-	0xf000ad11,
-	0x76bb0157,
+	0xc700d011,
+	0x76bbe0c5,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb609b621,
+	0xb6095e21,
 	0x11f50464,
-	0x76bb008a,
-	0x0465b600,
-	0x659450f9,
-	0x0256bb04,
-	0x75fd50bd,
-	0xf550fc04,
-	0xb6090921,
-	0x11f40464,
-	0xe05bcb6a,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x4e21f550,
-	0x0464b608,
-	0xbd025bb9,
-	0x430ef474,
-/* 0x0b30: i2c_recv_not_rd08 */
-	0xf401d6b0,
-	0x57f03d1b,
-	0xb621f500,
-	0x3311f409,
-	0xf5e0c5c7,
-	0xf4095b21,
-	0x57f02911,
-	0xb621f500,
-	0x1f11f409,
-	0xf5e0b5c7,
-	0xf4095b21,
-	0x21f51511,
-	0x74bd084e,
-	0xf408c5c7,
-	0x32f4091b,
-	0x030ef402,
-/* 0x0b70: i2c_recv_not_wr08 */
-/* 0x0b70: i2c_recv_done */
-	0xf5f8cec7,
-	0xfc0a1b21,
-	0xf4d0fce0,
-	0x7cb90a12,
-	0x3721f502,
-/* 0x0b85: i2c_recv_exit */
-/* 0x0b87: i2c_init */
-	0xf800f803,
-/* 0x0b89: test_recv */
-	0xd817f100,
-	0x0614b605,
-	0xb60011cf,
-	0x07f10110,
-	0x04b605d8,
-	0x0001d006,
-	0xe7f104bd,
-	0xe3f1d900,
-	0x21f5134f,
-	0x00f80257,
-/* 0x0bb0: test_init */
-	0x0800e7f1,
-	0x025721f5,
-/* 0x0bba: idle_recv */
+	0x57f000ad,
+	0x0076bb01,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b609b9,
+	0x8a11f504,
+	0x0076bb00,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b6090c,
+	0x6a11f404,
+	0xbbe05bcb,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x085121f5,
+	0xb90464b6,
+	0x74bd025b,
+/* 0x0b33: i2c_recv_not_rd08 */
+	0xb0430ef4,
+	0x1bf401d6,
+	0x0057f03d,
+	0x09b921f5,
+	0xc73311f4,
+	0x21f5e0c5,
+	0x11f4095e,
+	0x0057f029,
+	0x09b921f5,
+	0xc71f11f4,
+	0x21f5e0b5,
+	0x11f4095e,
+	0x5121f515,
+	0xc774bd08,
+	0x1bf408c5,
+	0x0232f409,
+/* 0x0b73: i2c_recv_not_wr08 */
+/* 0x0b73: i2c_recv_done */
+	0xc7030ef4,
+	0x21f5f8ce,
+	0xe0fc0a1e,
+	0x12f4d0fc,
+	0x027cb90a,
+	0x033621f5,
+/* 0x0b88: i2c_recv_exit */
+/* 0x0b8a: i2c_init */
 	0x00f800f8,
-/* 0x0bbc: idle */
-	0xf10031f4,
-	0xb605d417,
-	0x11cf0614,
-	0x0110b600,
-	0x05d407f1,
-	0xd00604b6,
-	0x04bd0001,
-/* 0x0bd8: idle_loop */
-	0xf45817f0,
-/* 0x0bde: idle_proc */
-/* 0x0bde: idle_proc_exec */
-	0x10f90232,
-	0xf5021eb9,
-	0xfc034021,
-	0x0911f410,
-	0xf40231f4,
-/* 0x0bf2: idle_proc_next */
-	0x10b6ef0e,
-	0x061fb858,
-	0xf4e61bf4,
-	0x28f4dd02,
-	0xbb0ef400,
-	0x00000000,
+/* 0x0b8c: test_recv */
+	0x05d817f1,
+	0xcf0614b6,
+	0x10b60011,
+	0xd807f101,
+	0x0604b605,
+	0xbd0001d0,
+	0x00e7f104,
+	0x4fe3f1d9,
+	0x5621f513,
+/* 0x0bb3: test_init */
+	0xf100f802,
+	0xf50800e7,
+	0xf8025621,
+/* 0x0bbd: idle_recv */
+/* 0x0bbf: idle */
+	0xf400f800,
+	0x17f10031,
+	0x14b605d4,
+	0x0011cf06,
+	0xf10110b6,
+	0xb605d407,
+	0x01d00604,
+/* 0x0bdb: idle_loop */
+	0xf004bd00,
+	0x32f45817,
+/* 0x0be1: idle_proc */
+/* 0x0be1: idle_proc_exec */
+	0xb910f902,
+	0x21f5021e,
+	0x10fc033f,
+	0xf40911f4,
+	0x0ef40231,
+/* 0x0bf5: idle_proc_next */
+	0x5810b6ef,
+	0xf4061fb8,
+	0x02f4e61b,
+	0x0028f4dd,
+	0x00bb0ef4,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h
index 7bf6b39ed205..2d5bdc539697 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h
@@ -24,8 +24,8 @@ uint32_t gf119_pmu_data[] = {
 	0x00000000,
 /* 0x0058: proc_list_head */
 	0x54534f48,
-	0x00000492,
-	0x0000043b,
+	0x00000495,
+	0x0000043e,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -46,8 +46,8 @@ uint32_t gf119_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x584d454d,
-	0x00000680,
-	0x00000672,
+	0x00000683,
+	0x00000675,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -68,8 +68,8 @@ uint32_t gf119_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x46524550,
-	0x00000684,
-	0x00000682,
+	0x00000687,
+	0x00000685,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -90,8 +90,8 @@ uint32_t gf119_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x5f433249,
-	0x00000a9f,
-	0x00000942,
+	0x00000aa2,
+	0x00000945,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -112,8 +112,8 @@ uint32_t gf119_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x54534554,
-	0x00000ac2,
-	0x00000aa1,
+	0x00000ac5,
+	0x00000aa4,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -134,8 +134,8 @@ uint32_t gf119_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x454c4449,
-	0x00000ace,
-	0x00000acc,
+	0x00000ad1,
+	0x00000acf,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -229,26 +229,26 @@ uint32_t gf119_pmu_data[] = {
 /* 0x0370: memx_func_head */
 	0x00000001,
 	0x00000000,
-	0x000004c8,
+	0x000004cb,
 /* 0x037c: memx_func_next */
 	0x00000002,
 	0x00000000,
-	0x00000549,
+	0x0000054c,
 	0x00000003,
 	0x00000002,
-	0x000005cd,
+	0x000005d0,
 	0x00040004,
 	0x00000000,
-	0x000005e9,
+	0x000005ec,
 	0x00010005,
 	0x00000000,
-	0x00000603,
+	0x00000606,
 	0x00010006,
 	0x00000000,
-	0x000005c8,
+	0x000005cb,
 	0x00000007,
 	0x00000000,
-	0x0000060e,
+	0x00000611,
 /* 0x03c4: memx_func_tail */
 /* 0x03c4: memx_ts_start */
 	0x00000000,
@@ -916,821 +916,821 @@ uint32_t gf119_pmu_data[] = {
 };
 
 uint32_t gf119_pmu_code[] = {
-	0x03420ef5,
+	0x03410ef5,
 /* 0x0004: rd32 */
 	0x07a007f1,
 	0xbd000ed0,
-	0x01d7f004,
-	0xf101d3f0,
-	0xd007ac07,
-	0x04bd000d,
-/* 0x001c: rd32_wait */
-	0x07acd7f1,
-	0xf100ddcf,
-	0xf47000d4,
-	0xd7f1f51b,
-	0xddcf07a4,
-/* 0x0033: wr32 */
-	0xf100f800,
-	0xd007a007,
-	0x04bd000e,
-	0x07a407f1,
+	0x01d7f104,
+	0x01d3f000,
+	0x07ac07f1,
 	0xbd000dd0,
-	0x02d7f004,
-	0xf0f0d5f0,
-	0x07f101d3,
-	0x0dd007ac,
-/* 0x0057: wr32_wait */
-	0xf104bd00,
-	0xcf07acd7,
-	0xd4f100dd,
-	0x1bf47000,
-/* 0x0067: nsec */
-	0xf900f8f5,
-	0xf080f990,
-	0x88cf2c87,
-/* 0x0071: nsec_loop */
-	0x2c97f000,
-	0xbb0099cf,
-	0x9eb80298,
-	0xf41ef406,
-	0x90fc80fc,
-/* 0x0086: wait */
+/* 0x001d: rd32_wait */
+	0xacd7f104,
+	0x00ddcf07,
+	0x7000d4f1,
+	0xf1f51bf4,
+	0xcf07a4d7,
+	0x00f800dd,
+/* 0x0034: wr32 */
+	0x07a007f1,
+	0xbd000ed0,
+	0xa407f104,
+	0x000dd007,
+	0xd7f104bd,
+	0xd3f000f2,
+	0xac07f101,
+	0x000dd007,
+/* 0x0056: wr32_wait */
+	0xd7f104bd,
+	0xddcf07ac,
+	0x00d4f100,
+	0xf51bf470,
+/* 0x0066: nsec */
 	0x90f900f8,
 	0x87f080f9,
 	0x0088cf2c,
-/* 0x0090: wait_loop */
-	0xf402eeb9,
-	0xdab90421,
-	0x04adfd02,
-	0xf406acb8,
-	0x97f0120b,
-	0x0099cf2c,
-	0xb80298bb,
-	0x1ef4069b,
-/* 0x00b1: wait_done */
-	0xfc80fce2,
-/* 0x00b7: intr_watchdog */
-	0x9800f890,
-	0x96b003e9,
-	0x2a0bf400,
-	0xbb9a0a98,
-	0x1cf4029a,
-	0x01d7f00f,
-	0x028121f5,
-	0x0ef494bd,
-/* 0x00d5: intr_watchdog_next_time */
-	0x9b0a9815,
-	0xf400a6b0,
-	0x9ab8090b,
-	0x061cf406,
-/* 0x00e4: intr_watchdog_next_time_set */
-/* 0x00e7: intr_watchdog_next_proc */
-	0x809b0980,
-	0xe0b603e9,
-	0x68e6b158,
-	0xc61bf402,
-/* 0x00f6: intr */
-	0x00f900f8,
-	0x80f904bd,
-	0xa0f990f9,
-	0xc0f9b0f9,
-	0xe0f9d0f9,
-	0xf7f0f0f9,
-	0x0188fe00,
-	0x87f180f9,
-	0x88cf05d0,
-	0x0180b600,
-	0x05d007f1,
-	0xbd0008d0,
-	0x0887f004,
-	0xc40088cf,
-	0x0bf40289,
-	0x9b008020,
-	0xf458e7f0,
-	0x0998b721,
-	0x0096b09b,
-	0xf00e0bf4,
-	0x09d03407,
-	0x8004bd00,
-/* 0x014e: intr_skip_watchdog */
-	0x89e49a09,
-	0x0bf40800,
-	0x8897f13c,
-	0x0099cf06,
-	0xf4029ac4,
-	0xc7f1260b,
-	0xcccf04c0,
-	0xf1c0f900,
-	0xf14f48e7,
-	0xf05453e3,
-	0x21f500d7,
-	0xc0fc02e6,
-	0x04c007f1,
-	0xbd000cd0,
-/* 0x0185: intr_subintr_skip_fifo */
-	0x8807f104,
-	0x0009d006,
-/* 0x018e: intr_skip_subintr */
-	0x97f104bd,
-	0x90bd00e0,
-	0xf00489fd,
-	0x08d00407,
-	0xfc04bd00,
-	0x0088fe80,
-	0xe0fcf0fc,
-	0xc0fcd0fc,
-	0xa0fcb0fc,
-	0x80fc90fc,
-	0x32f400fc,
-/* 0x01bb: ticks_from_ns */
-	0xf901f800,
+/* 0x0070: nsec_loop */
+	0xcf2c97f0,
+	0x98bb0099,
+	0x069eb802,
+	0xfcf41ef4,
+	0xf890fc80,
+/* 0x0085: wait */
+	0xf990f900,
+	0x2c87f080,
+/* 0x008f: wait_loop */
+	0xb90088cf,
+	0x21f402ee,
+	0x02dab904,
+	0xb804adfd,
+	0x0bf406ac,
+	0x2c97f012,
+	0xbb0099cf,
+	0x9bb80298,
+	0xe21ef406,
+/* 0x00b0: wait_done */
+	0x90fc80fc,
+/* 0x00b6: intr_watchdog */
+	0xe99800f8,
+	0x0096b003,
+	0x982a0bf4,
+	0x9abb9a0a,
+	0x0f1cf402,
+	0xf501d7f0,
+	0xbd028021,
+	0x150ef494,
+/* 0x00d4: intr_watchdog_next_time */
+	0xb09b0a98,
+	0x0bf400a6,
+	0x069ab809,
+/* 0x00e3: intr_watchdog_next_time_set */
+	0x80061cf4,
+/* 0x00e6: intr_watchdog_next_proc */
+	0xe9809b09,
+	0x58e0b603,
+	0x0268e6b1,
+	0xf8c61bf4,
+/* 0x00f5: intr */
+	0xbd00f900,
+	0xf980f904,
+	0xf9a0f990,
+	0xf9c0f9b0,
+	0xf9e0f9d0,
+	0x00f7f0f0,
+	0xf90188fe,
+	0xd087f180,
+	0x0088cf05,
+	0xf10180b6,
+	0xd005d007,
+	0x04bd0008,
+	0xcf0887f0,
+	0x89c40088,
+	0x200bf402,
+	0xf09b0080,
+	0x21f458e7,
+	0x9b0998b6,
+	0xf40096b0,
+	0x07f00e0b,
+	0x0009d034,
+	0x098004bd,
+/* 0x014d: intr_skip_watchdog */
+	0x0089e49a,
+	0x3c0bf408,
+	0x068897f1,
+	0xc40099cf,
+	0x0bf4029a,
+	0xc0c7f126,
+	0x00cccf04,
+	0xe7f1c0f9,
+	0xe3f14f48,
+	0xd7f05453,
+	0xe521f500,
+	0xf1c0fc02,
+	0xd004c007,
+	0x04bd000c,
+/* 0x0184: intr_subintr_skip_fifo */
+	0x068807f1,
+	0xbd0009d0,
+/* 0x018d: intr_skip_subintr */
+	0xe097f104,
+	0xfd90bd00,
+	0x07f00489,
+	0x0008d004,
+	0x80fc04bd,
+	0xfc0088fe,
+	0xfce0fcf0,
+	0xfcc0fcd0,
+	0xfca0fcb0,
+	0xfc80fc90,
+	0x0032f400,
+/* 0x01ba: ticks_from_ns */
+	0xc0f901f8,
+	0xd7f1b0f9,
+	0xd3f00144,
+	0xab21f500,
+	0xe8ccec03,
+	0x00b4b003,
+	0xec120bf4,
+	0xf103e8ee,
+	0xf00144d7,
+	0x21f500d3,
+/* 0x01e2: ticks_from_ns_quit */
+	0xceb903ab,
+	0xfcb0fc02,
+/* 0x01eb: ticks_from_us */
+	0xf900f8c0,
 	0xf1b0f9c0,
 	0xf00144d7,
 	0x21f500d3,
-	0xccec03a8,
-	0xb4b003e8,
-	0x120bf400,
-	0x03e8eeec,
-	0x0144d7f1,
-	0xf500d3f0,
-/* 0x01e3: ticks_from_ns_quit */
-	0xb903a821,
-	0xb0fc02ce,
-	0x00f8c0fc,
-/* 0x01ec: ticks_from_us */
-	0xb0f9c0f9,
-	0x0144d7f1,
-	0xf500d3f0,
-	0xb903a821,
-	0xb4b002ce,
-	0x050bf400,
-/* 0x0206: ticks_from_us_quit */
-	0xb0fce4bd,
-	0x00f8c0fc,
-/* 0x020c: ticks_to_us */
-	0x0144d7f1,
-	0xff00d3f0,
-	0x00f8eced,
-/* 0x0218: timer */
-	0x80f990f9,
-	0x981032f4,
-	0x86b003f8,
-	0x531cf400,
-	0x07f084bd,
-	0x0008d038,
-	0x87f004bd,
-	0x0088cf34,
-	0xbb9a0998,
-	0xe9bb0298,
-	0x03fe8000,
-	0xcf0887f0,
-	0x84f00088,
-	0x201bf402,
-	0xcf3487f0,
-	0xe0b80088,
-	0x090bf406,
-	0xf406e8b8,
-/* 0x0262: timer_reset */
-	0x07f00e1c,
-	0x000ed034,
-	0x0e8004bd,
-/* 0x026d: timer_enable */
-	0x0187f09a,
-	0xd03807f0,
-	0x04bd0008,
-/* 0x0278: timer_done */
-	0xfc1031f4,
+	0xceb903ab,
+	0x00b4b002,
+	0xbd050bf4,
+/* 0x0205: ticks_from_us_quit */
+	0xfcb0fce4,
+/* 0x020b: ticks_to_us */
+	0xf100f8c0,
+	0xf00144d7,
+	0xedff00d3,
+/* 0x0217: timer */
+	0xf900f8ec,
+	0xf480f990,
+	0xf8981032,
+	0x0086b003,
+	0xbd531cf4,
+	0x3807f084,
+	0xbd0008d0,
+	0x3487f004,
+	0x980088cf,
+	0x98bb9a09,
+	0x00e9bb02,
+	0xf003fe80,
+	0x88cf0887,
+	0x0284f000,
+	0xf0201bf4,
+	0x88cf3487,
+	0x06e0b800,
+	0xb8090bf4,
+	0x1cf406e8,
+/* 0x0261: timer_reset */
+	0x3407f00e,
+	0xbd000ed0,
+	0x9a0e8004,
+/* 0x026c: timer_enable */
+	0xf00187f0,
+	0x08d03807,
+/* 0x0277: timer_done */
+	0xf404bd00,
+	0x80fc1031,
+	0x00f890fc,
+/* 0x0280: send_proc */
+	0x90f980f9,
+	0x9805e898,
+	0x86f004e9,
+	0x0689b804,
+	0xc42a0bf4,
+	0x88940398,
+	0x1880b604,
+	0x98008ebb,
+	0x8a8000fa,
+	0x018d8000,
+	0x80028c80,
+	0x90b6038b,
+	0x0794f001,
+	0xf404e980,
+/* 0x02ba: send_done */
+	0x90fc0231,
+	0x00f880fc,
+/* 0x02c0: find */
+	0x87f080f9,
+	0x0131f458,
+/* 0x02c8: find_loop */
+	0xb8008a98,
+	0x0bf406ae,
+	0x5880b610,
+	0x026886b1,
+	0xf4f01bf4,
+/* 0x02de: find_done */
+	0x8eb90132,
+	0xf880fc02,
+/* 0x02e5: send */
+	0xc021f500,
+	0x9701f402,
+/* 0x02ee: recv */
+	0x90f900f8,
+	0xe89880f9,
+	0x04e99805,
+	0xb80132f4,
+	0x0bf40689,
+	0x0389c43d,
+	0xf00180b6,
+	0xe8800784,
+	0x02ea9805,
+	0x8ffef0f9,
+	0xb9f0f901,
+	0x999402ef,
+	0x00e9bb04,
+	0x9818e0b6,
+	0xec9803eb,
+	0x01ed9802,
+	0xf900ee98,
+	0xfef0fca5,
+	0x31f400f8,
+/* 0x033b: recv_done */
+	0xfcf0fc01,
 	0xf890fc80,
-/* 0x0281: send_proc */
-	0xf980f900,
-	0x05e89890,
-	0xf004e998,
-	0x89b80486,
-	0x2a0bf406,
-	0x940398c4,
-	0x80b60488,
-	0x008ebb18,
-	0x8000fa98,
-	0x8d80008a,
-	0x028c8001,
-	0xb6038b80,
-	0x94f00190,
-	0x04e98007,
-/* 0x02bb: send_done */
-	0xfc0231f4,
-	0xf880fc90,
-/* 0x02c1: find */
-	0xf080f900,
-	0x31f45887,
-/* 0x02c9: find_loop */
-	0x008a9801,
-	0xf406aeb8,
-	0x80b6100b,
-	0x6886b158,
-	0xf01bf402,
-/* 0x02df: find_done */
-	0xb90132f4,
-	0x80fc028e,
-/* 0x02e6: send */
-	0x21f500f8,
-	0x01f402c1,
-/* 0x02ef: recv */
-	0xf900f897,
-	0x9880f990,
-	0xe99805e8,
-	0x0132f404,
-	0xf40689b8,
-	0x89c43d0b,
-	0x0180b603,
-	0x800784f0,
-	0xea9805e8,
-	0xfef0f902,
-	0xf0f9018f,
-	0x9402efb9,
-	0xe9bb0499,
-	0x18e0b600,
-	0x9803eb98,
-	0xed9802ec,
-	0x00ee9801,
-	0xf0fca5f9,
-	0xf400f8fe,
-	0xf0fc0131,
-/* 0x033c: recv_done */
-	0x90fc80fc,
-/* 0x0342: init */
-	0x17f100f8,
-	0x11cf0108,
-	0x0911e700,
-	0x0814b601,
-	0xf10014fe,
-	0xf000e017,
-	0x07f00013,
-	0x0001d01c,
-	0x17f004bd,
-	0x1407f0ff,
+/* 0x0341: init */
+	0x0817f100,
+	0x0011cf01,
+	0x010911e7,
+	0xfe0814b6,
+	0x17f10014,
+	0x13f000e0,
+	0x1c07f000,
 	0xbd0001d0,
-	0x0217f004,
-	0x080015f1,
-	0xd01007f0,
-	0x04bd0001,
-	0x00f617f1,
-	0xfe0013f0,
-	0x31f40010,
-	0x0117f010,
-	0xd03807f0,
+	0xff17f004,
+	0xd01407f0,
 	0x04bd0001,
-/* 0x0397: init_proc */
-	0x9858f7f0,
-	0x16b001f1,
-	0xfa0bf400,
-	0xf0b615f9,
-	0xf20ef458,
-/* 0x03a8: mulu32_32_64 */
-	0x20f910f9,
-	0x40f930f9,
-	0x9510e195,
-	0xc4bd10d2,
-	0xedffb4bd,
-	0x301dffc0,
-	0xf10234b9,
-	0xb6ffff34,
-	0x45b61034,
-	0x00c3bb10,
-	0xff01b4bb,
-	0x34b930e2,
-	0xff34f102,
-	0x1034b6ff,
-	0xbb1045b6,
-	0xb4bb00c3,
-	0x3012ff01,
-	0xfc00b3bb,
-	0xfc30fc40,
-	0xf810fc20,
-/* 0x03f9: host_send */
-	0xb017f100,
-	0x0011cf04,
-	0x04a027f1,
-	0xb80022cf,
-	0x0bf40612,
-	0x071ec42f,
-	0xb704ee94,
-	0x980270e0,
-	0xec9803eb,
-	0x01ed9802,
-	0xf500ee98,
-	0xb602e621,
-	0x1ec40110,
-	0xb007f10f,
-	0x000ed004,
-	0x0ef404bd,
-/* 0x0439: host_send_done */
-/* 0x043b: host_recv */
-	0xf100f8c3,
-	0xf14e4917,
-	0xb8525413,
-	0x0bf406e1,
-/* 0x0449: host_recv_wait */
-	0xcc17f1b3,
-	0x0011cf04,
-	0x04c827f1,
-	0xf00022cf,
-	0x12b80816,
-	0xec0bf406,
-	0xb60723c4,
-	0x30b70434,
-	0x3b8002f0,
-	0x023c8003,
-	0x80013d80,
-	0x20b6003e,
-	0x0f24f001,
-	0x04c807f1,
-	0xbd0002d0,
-	0x4027f004,
-	0xd00007f0,
-	0x04bd0002,
-/* 0x0492: host_init */
+	0xf10217f0,
+	0xf0080015,
+	0x01d01007,
+	0xf104bd00,
+	0xf000f517,
+	0x14f10013,
+	0x10feffff,
+	0x1031f400,
+	0xf00117f0,
+	0x01d03807,
+	0xf004bd00,
+/* 0x039a: init_proc */
+	0xf19858f7,
+	0x0016b001,
+	0xf9fa0bf4,
+	0x58f0b615,
+/* 0x03ab: mulu32_32_64 */
+	0xf9f20ef4,
+	0xf920f910,
+	0x9540f930,
+	0xd29510e1,
+	0xbdc4bd10,
+	0xc0edffb4,
+	0xb9301dff,
+	0x34f10234,
+	0x34b6ffff,
+	0x1045b610,
+	0xbb00c3bb,
+	0xe2ff01b4,
+	0x0234b930,
+	0xffff34f1,
+	0xb61034b6,
+	0xc3bb1045,
+	0x01b4bb00,
+	0xbb3012ff,
+	0x40fc00b3,
+	0x20fc30fc,
+	0x00f810fc,
+/* 0x03fc: host_send */
+	0x04b017f1,
+	0xf10011cf,
+	0xcf04a027,
+	0x12b80022,
+	0x2f0bf406,
+	0x94071ec4,
+	0xe0b704ee,
+	0xeb980270,
+	0x02ec9803,
+	0x9801ed98,
+	0x21f500ee,
+	0x10b602e5,
+	0x0f1ec401,
+	0x04b007f1,
+	0xbd000ed0,
+	0xc30ef404,
+/* 0x043c: host_send_done */
+/* 0x043e: host_recv */
 	0x17f100f8,
-	0x14b60080,
-	0x7015f110,
-	0xd007f102,
-	0x0001d004,
-	0x17f104bd,
-	0x14b60080,
-	0xf015f110,
-	0xdc07f102,
-	0x0001d004,
-	0x17f004bd,
-	0xc407f101,
-	0x0001d004,
-	0x00f804bd,
-/* 0x04c8: memx_func_enter */
-	0x162067f1,
-	0xf55d77f1,
-	0xffff73f1,
-	0xf4026eb9,
-	0xd8b90421,
-	0x0487fd02,
-	0x80f960f9,
-	0xe0fcd0fc,
-	0xf13321f4,
-	0xf1fffe77,
+	0x13f14e49,
+	0xe1b85254,
+	0xb30bf406,
+/* 0x044c: host_recv_wait */
+	0x04cc17f1,
+	0xf10011cf,
+	0xcf04c827,
+	0x16f00022,
+	0x0612b808,
+	0xc4ec0bf4,
+	0x34b60723,
+	0xf030b704,
+	0x033b8002,
+	0x80023c80,
+	0x3e80013d,
+	0x0120b600,
+	0xf10f24f0,
+	0xd004c807,
+	0x04bd0002,
+	0xf04027f0,
+	0x02d00007,
+	0xf804bd00,
+/* 0x0495: host_init */
+	0x8017f100,
+	0x1014b600,
+	0x027015f1,
+	0x04d007f1,
+	0xbd0001d0,
+	0x8017f104,
+	0x1014b600,
+	0x02f015f1,
+	0x04dc07f1,
+	0xbd0001d0,
+	0x0117f004,
+	0x04c407f1,
+	0xbd0001d0,
+/* 0x04cb: memx_func_enter */
+	0xf100f804,
+	0xf1162067,
+	0xf1f55d77,
 	0xb9ffff73,
 	0x21f4026e,
 	0x02d8b904,
 	0xf90487fd,
 	0xfc80f960,
 	0xf4e0fcd0,
-	0x67f13321,
-	0x6eb926f0,
+	0x77f13421,
+	0x73f1fffe,
+	0x6eb9ffff,
 	0x0421f402,
 	0xfd02d8b9,
 	0x60f90487,
 	0xd0fc80f9,
 	0x21f4e0fc,
-	0x0467f033,
-	0x07e007f1,
+	0xf067f134,
+	0x026eb926,
+	0xb90421f4,
+	0x87fd02d8,
+	0xf960f904,
+	0xfcd0fc80,
+	0x3421f4e0,
+	0xf10467f0,
+	0xd007e007,
+	0x04bd0006,
+/* 0x0534: memx_func_enter_wait */
+	0x07c067f1,
+	0xf00066cf,
+	0x0bf40464,
+	0x2c67f0f6,
+	0x800066cf,
+	0x00f8f106,
+/* 0x054c: memx_func_leave */
+	0xcf2c67f0,
+	0x06800066,
+	0x0467f0f2,
+	0x07e407f1,
 	0xbd0006d0,
-/* 0x0531: memx_func_enter_wait */
+/* 0x0561: memx_func_leave_wait */
 	0xc067f104,
 	0x0066cf07,
 	0xf40464f0,
-	0x67f0f60b,
-	0x0066cf2c,
-	0xf8f10680,
-/* 0x0549: memx_func_leave */
-	0x2c67f000,
-	0x800066cf,
-	0x67f0f206,
-	0xe407f104,
-	0x0006d007,
-/* 0x055e: memx_func_leave_wait */
-	0x67f104bd,
-	0x66cf07c0,
-	0x0464f000,
-	0xf1f61bf4,
-	0xf126f067,
-	0xf0000177,
+	0x67f1f61b,
+	0x77f126f0,
+	0x73f00001,
+	0x026eb900,
+	0xb90421f4,
+	0x87fd02d8,
+	0xf960f905,
+	0xfcd0fc80,
+	0x3421f4e0,
+	0x162067f1,
+	0xf4026eb9,
+	0xd8b90421,
+	0x0587fd02,
+	0x80f960f9,
+	0xe0fcd0fc,
+	0xf13421f4,
+	0xf00aa277,
 	0x6eb90073,
 	0x0421f402,
 	0xfd02d8b9,
 	0x60f90587,
 	0xd0fc80f9,
 	0x21f4e0fc,
-	0x2067f133,
-	0x026eb916,
-	0xb90421f4,
-	0x87fd02d8,
-	0xf960f905,
-	0xfcd0fc80,
-	0x3321f4e0,
-	0x0aa277f1,
-	0xb90073f0,
-	0x21f4026e,
-	0x02d8b904,
-	0xf90587fd,
-	0xfc80f960,
-	0xf4e0fcd0,
-	0x00f83321,
-/* 0x05c8: memx_func_wait_vblank */
-	0xf80410b6,
-/* 0x05cd: memx_func_wr32 */
-	0x00169800,
-	0xb6011598,
-	0x60f90810,
-	0xd0fc50f9,
-	0x21f4e0fc,
-	0x0242b633,
-	0xf8e91bf4,
-/* 0x05e9: memx_func_wait */
-	0x2c87f000,
-	0x980088cf,
-	0x1d98001e,
-	0x021c9801,
-	0xb6031b98,
-	0x21f41010,
-/* 0x0603: memx_func_delay */
-	0x9800f886,
-	0x10b6001e,
-	0x6721f404,
-/* 0x060e: memx_func_train */
-	0x00f800f8,
-/* 0x0610: memx_exec */
-	0xd0f9e0f9,
-	0xb902c1b9,
-/* 0x061a: memx_exec_next */
-	0x139802b2,
+/* 0x05cb: memx_func_wait_vblank */
+	0xb600f834,
+	0x00f80410,
+/* 0x05d0: memx_func_wr32 */
+	0x98001698,
+	0x10b60115,
+	0xf960f908,
+	0xfcd0fc50,
+	0x3421f4e0,
+	0xf40242b6,
+	0x00f8e91b,
+/* 0x05ec: memx_func_wait */
+	0xcf2c87f0,
+	0x1e980088,
+	0x011d9800,
+	0x98021c98,
+	0x10b6031b,
+	0x8521f410,
+/* 0x0606: memx_func_delay */
+	0x1e9800f8,
 	0x0410b600,
-	0x01f034e7,
-	0x01e033e7,
-	0xf00132b6,
-	0x35980c30,
-	0xb855f9de,
-	0x1ef40612,
-	0xf10b98e4,
-	0xbbf20c98,
-	0xb7f102cb,
-	0xbbcf07c4,
-	0xfcd0fc00,
-	0xe621f5e0,
-/* 0x0653: memx_info */
-	0x7000f802,
-	0x0bf401c6,
-/* 0x0659: memx_info_data */
-	0xccc7f10e,
-	0x00b7f103,
-	0x0b0ef408,
-/* 0x0664: memx_info_train */
-	0x0bccc7f1,
-	0x0100b7f1,
-/* 0x066c: memx_info_send */
-	0x02e621f5,
-/* 0x0672: memx_recv */
-	0xd6b000f8,
-	0x9b0bf401,
-	0xf400d6b0,
-	0x00f8d80b,
-/* 0x0680: memx_init */
-/* 0x0682: perf_recv */
-	0x00f800f8,
-/* 0x0684: perf_init */
-/* 0x0686: i2c_drive_scl */
-	0x36b000f8,
-	0x0e0bf400,
-	0x07e007f1,
-	0xbd0001d0,
-/* 0x0697: i2c_drive_scl_lo */
-	0xf100f804,
-	0xd007e407,
+	0xf86621f4,
+/* 0x0611: memx_func_train */
+/* 0x0613: memx_exec */
+	0xf900f800,
+	0xb9d0f9e0,
+	0xb2b902c1,
+/* 0x061d: memx_exec_next */
+	0x00139802,
+	0xe70410b6,
+	0xe701f034,
+	0xb601e033,
+	0x30f00132,
+	0xde35980c,
+	0x12b855f9,
+	0xe41ef406,
+	0x98f10b98,
+	0xcbbbf20c,
+	0xc4b7f102,
+	0x00bbcf07,
+	0xe0fcd0fc,
+	0x02e521f5,
+/* 0x0656: memx_info */
+	0xc67000f8,
+	0x0e0bf401,
+/* 0x065c: memx_info_data */
+	0x03ccc7f1,
+	0x0800b7f1,
+/* 0x0667: memx_info_train */
+	0xf10b0ef4,
+	0xf10bccc7,
+/* 0x066f: memx_info_send */
+	0xf50100b7,
+	0xf802e521,
+/* 0x0675: memx_recv */
+	0x01d6b000,
+	0xb09b0bf4,
+	0x0bf400d6,
+/* 0x0683: memx_init */
+	0xf800f8d8,
+/* 0x0685: perf_recv */
+/* 0x0687: perf_init */
+	0xf800f800,
+/* 0x0689: i2c_drive_scl */
+	0x0036b000,
+	0xf10e0bf4,
+	0xd007e007,
 	0x04bd0001,
-/* 0x06a2: i2c_drive_sda */
-	0x36b000f8,
-	0x0e0bf400,
-	0x07e007f1,
-	0xbd0002d0,
-/* 0x06b3: i2c_drive_sda_lo */
-	0xf100f804,
-	0xd007e407,
+/* 0x069a: i2c_drive_scl_lo */
+	0x07f100f8,
+	0x01d007e4,
+	0xf804bd00,
+/* 0x06a5: i2c_drive_sda */
+	0x0036b000,
+	0xf10e0bf4,
+	0xd007e007,
 	0x04bd0002,
-/* 0x06be: i2c_sense_scl */
+/* 0x06b6: i2c_drive_sda_lo */
+	0x07f100f8,
+	0x02d007e4,
+	0xf804bd00,
+/* 0x06c1: i2c_sense_scl */
+	0x0132f400,
+	0x07c437f1,
+	0xfd0033cf,
+	0x0bf40431,
+	0x0131f406,
+/* 0x06d4: i2c_sense_scl_done */
+/* 0x06d6: i2c_sense_sda */
 	0x32f400f8,
 	0xc437f101,
 	0x0033cf07,
-	0xf40431fd,
+	0xf40432fd,
 	0x31f4060b,
-/* 0x06d1: i2c_sense_scl_done */
-/* 0x06d3: i2c_sense_sda */
-	0xf400f801,
-	0x37f10132,
-	0x33cf07c4,
-	0x0432fd00,
-	0xf4060bf4,
-/* 0x06e6: i2c_sense_sda_done */
-	0x00f80131,
-/* 0x06e8: i2c_raise_scl */
-	0x47f140f9,
-	0x37f00898,
-	0x8621f501,
-/* 0x06f5: i2c_raise_scl_wait */
-	0xe8e7f106,
-	0x6721f403,
-	0x06be21f5,
-	0xb60901f4,
-	0x1bf40142,
-/* 0x0709: i2c_raise_scl_done */
-	0xf840fcef,
-/* 0x070d: i2c_start */
-	0xbe21f500,
-	0x0d11f406,
-	0x06d321f5,
-	0xf40611f4,
-/* 0x071e: i2c_start_rep */
-	0x37f0300e,
-	0x8621f500,
-	0x0137f006,
-	0x06a221f5,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0xe821f550,
-	0x0464b606,
-/* 0x074b: i2c_start_send */
-	0xf01f11f4,
-	0x21f50037,
-	0xe7f106a2,
-	0x21f41388,
-	0x0037f067,
-	0x068621f5,
-	0x1388e7f1,
-/* 0x0767: i2c_start_out */
-	0xf86721f4,
-/* 0x0769: i2c_stop */
-	0x0037f000,
-	0x068621f5,
-	0xf50037f0,
-	0xf106a221,
-	0xf403e8e7,
-	0x37f06721,
-	0x8621f501,
-	0x88e7f106,
-	0x6721f413,
-	0xf50137f0,
-	0xf106a221,
-	0xf41388e7,
-	0x00f86721,
-/* 0x079c: i2c_bitw */
-	0x06a221f5,
+/* 0x06e9: i2c_sense_sda_done */
+/* 0x06eb: i2c_raise_scl */
+	0xf900f801,
+	0x9847f140,
+	0x0137f008,
+	0x068921f5,
+/* 0x06f8: i2c_raise_scl_wait */
 	0x03e8e7f1,
-	0xbb6721f4,
+	0xf56621f4,
+	0xf406c121,
+	0x42b60901,
+	0xef1bf401,
+/* 0x070c: i2c_raise_scl_done */
+	0x00f840fc,
+/* 0x0710: i2c_start */
+	0x06c121f5,
+	0xf50d11f4,
+	0xf406d621,
+	0x0ef40611,
+/* 0x0721: i2c_start_rep */
+	0x0037f030,
+	0x068921f5,
+	0xf50137f0,
+	0xbb06a521,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x06e821f5,
+	0x06eb21f5,
 	0xf40464b6,
-	0xe7f11811,
+/* 0x074e: i2c_start_send */
+	0x37f01f11,
+	0xa521f500,
+	0x88e7f106,
+	0x6621f413,
+	0xf50037f0,
+	0xf1068921,
+	0xf41388e7,
+/* 0x076a: i2c_start_out */
+	0x00f86621,
+/* 0x076c: i2c_stop */
+	0xf50037f0,
+	0xf0068921,
+	0x21f50037,
+	0xe7f106a5,
+	0x21f403e8,
+	0x0137f066,
+	0x068921f5,
+	0x1388e7f1,
+	0xf06621f4,
+	0x21f50137,
+	0xe7f106a5,
 	0x21f41388,
-	0x0037f067,
-	0x068621f5,
+/* 0x079f: i2c_bitw */
+	0xf500f866,
+	0xf106a521,
+	0xf403e8e7,
+	0x76bb6621,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0xf550fc04,
+	0xb606eb21,
+	0x11f40464,
+	0x88e7f118,
+	0x6621f413,
+	0xf50037f0,
+	0xf1068921,
+	0xf41388e7,
+/* 0x07de: i2c_bitw_out */
+	0x00f86621,
+/* 0x07e0: i2c_bitr */
+	0xf50137f0,
+	0xf106a521,
+	0xf403e8e7,
+	0x76bb6621,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0xf550fc04,
+	0xb606eb21,
+	0x11f40464,
+	0xd621f51b,
+	0x0037f006,
+	0x068921f5,
 	0x1388e7f1,
-/* 0x07db: i2c_bitw_out */
-	0xf86721f4,
-/* 0x07dd: i2c_bitr */
-	0x0137f000,
-	0x06a221f5,
-	0x03e8e7f1,
-	0xbb6721f4,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x06e821f5,
-	0xf40464b6,
-	0x21f51b11,
-	0x37f006d3,
-	0x8621f500,
-	0x88e7f106,
-	0x6721f413,
-	0xf4013cf0,
-/* 0x0822: i2c_bitr_done */
-	0x00f80131,
-/* 0x0824: i2c_get_byte */
-	0xf00057f0,
-/* 0x082a: i2c_get_byte_next */
-	0x54b60847,
-	0x0076bb01,
-	0xf90465b6,
-	0x04659450,
-	0xbd0256bb,
-	0x0475fd50,
-	0x21f550fc,
-	0x64b607dd,
-	0x2b11f404,
-	0xb60553fd,
-	0x1bf40142,
-	0x0137f0d8,
+	0xf06621f4,
+	0x31f4013c,
+/* 0x0825: i2c_bitr_done */
+/* 0x0827: i2c_get_byte */
+	0xf000f801,
+	0x47f00057,
+/* 0x082d: i2c_get_byte_next */
+	0x0154b608,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x9c21f550,
+	0xe021f550,
 	0x0464b607,
-/* 0x0874: i2c_get_byte_done */
-/* 0x0876: i2c_put_byte */
-	0x47f000f8,
-/* 0x0879: i2c_put_byte_next */
-	0x0142b608,
-	0xbb3854ff,
+	0xfd2b11f4,
+	0x42b60553,
+	0xd81bf401,
+	0xbb0137f0,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x079f21f5,
+/* 0x0877: i2c_get_byte_done */
+	0xf80464b6,
+/* 0x0879: i2c_put_byte */
+	0x0847f000,
+/* 0x087c: i2c_put_byte_next */
+	0xff0142b6,
+	0x76bb3854,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0xf550fc04,
+	0xb6079f21,
+	0x11f40464,
+	0x0046b034,
+	0xbbd81bf4,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x079c21f5,
+	0x07e021f5,
 	0xf40464b6,
-	0x46b03411,
-	0xd81bf400,
+	0x76bb0f11,
+	0x0136b000,
+	0xf4061bf4,
+/* 0x08d2: i2c_put_byte_done */
+	0x00f80132,
+/* 0x08d4: i2c_addr */
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0xdd21f550,
+	0x1021f550,
 	0x0464b607,
-	0xbb0f11f4,
-	0x36b00076,
-	0x061bf401,
-/* 0x08cf: i2c_put_byte_done */
-	0xf80132f4,
-/* 0x08d1: i2c_addr */
-	0x0076bb00,
+	0xe72911f4,
+	0xb6012ec3,
+	0x53fd0134,
+	0x0076bb05,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b6070d,
-	0x2911f404,
-	0x012ec3e7,
-	0xfd0134b6,
-	0x76bb0553,
-	0x0465b600,
-	0x659450f9,
-	0x0256bb04,
-	0x75fd50bd,
-	0xf550fc04,
-	0xb6087621,
-/* 0x0916: i2c_addr_done */
-	0x00f80464,
-/* 0x0918: i2c_acquire_addr */
-	0xb6f8cec7,
-	0xe0b705e4,
-	0x00f8d014,
-/* 0x0924: i2c_acquire */
-	0x091821f5,
-	0xf00421f4,
-	0x21f403d9,
-/* 0x0933: i2c_release */
-	0xf500f833,
-	0xf4091821,
-	0xdaf00421,
-	0x3321f403,
-/* 0x0942: i2c_recv */
-	0x32f400f8,
-	0xf8c1c701,
-	0xb00214b6,
-	0x1ff52816,
-	0x13a0013a,
-	0x32980cf4,
-	0xcc13a000,
-	0x0031980c,
-	0xf90231f4,
-	0xf9e0f9d0,
-	0x0067f1d0,
-	0x0063f100,
-	0x01679210,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x2421f550,
-	0x0464b609,
-	0xd6b0d0fc,
-	0xb31bf500,
-	0x0057f000,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0xd121f550,
-	0x0464b608,
-	0x00d011f5,
-	0xbbe0c5c7,
+	0x64b60879,
+/* 0x0919: i2c_addr_done */
+/* 0x091b: i2c_acquire_addr */
+	0xc700f804,
+	0xe4b6f8ce,
+	0x14e0b705,
+/* 0x0927: i2c_acquire */
+	0xf500f8d0,
+	0xf4091b21,
+	0xd9f00421,
+	0x3421f403,
+/* 0x0936: i2c_release */
+	0x21f500f8,
+	0x21f4091b,
+	0x03daf004,
+	0xf83421f4,
+/* 0x0945: i2c_recv */
+	0x0132f400,
+	0xb6f8c1c7,
+	0x16b00214,
+	0x3a1ff528,
+	0xf413a001,
+	0x0032980c,
+	0x0ccc13a0,
+	0xf4003198,
+	0xd0f90231,
+	0xd0f9e0f9,
+	0x000067f1,
+	0x100063f1,
+	0xbb016792,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x087621f5,
+	0x092721f5,
+	0xfc0464b6,
+	0x00d6b0d0,
+	0x00b31bf5,
+	0xbb0057f0,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x08d421f5,
 	0xf50464b6,
-	0xf000ad11,
-	0x76bb0157,
+	0xc700d011,
+	0x76bbe0c5,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb608d121,
+	0xb6087921,
 	0x11f50464,
-	0x76bb008a,
-	0x0465b600,
-	0x659450f9,
-	0x0256bb04,
-	0x75fd50bd,
-	0xf550fc04,
-	0xb6082421,
-	0x11f40464,
-	0xe05bcb6a,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x6921f550,
-	0x0464b607,
-	0xbd025bb9,
-	0x430ef474,
-/* 0x0a48: i2c_recv_not_rd08 */
-	0xf401d6b0,
-	0x57f03d1b,
-	0xd121f500,
-	0x3311f408,
-	0xf5e0c5c7,
-	0xf4087621,
-	0x57f02911,
-	0xd121f500,
-	0x1f11f408,
-	0xf5e0b5c7,
-	0xf4087621,
-	0x21f51511,
-	0x74bd0769,
-	0xf408c5c7,
-	0x32f4091b,
-	0x030ef402,
-/* 0x0a88: i2c_recv_not_wr08 */
-/* 0x0a88: i2c_recv_done */
-	0xf5f8cec7,
-	0xfc093321,
-	0xf4d0fce0,
-	0x7cb90a12,
-	0xe621f502,
-/* 0x0a9d: i2c_recv_exit */
-/* 0x0a9f: i2c_init */
+	0x57f000ad,
+	0x0076bb01,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b608d4,
+	0x8a11f504,
+	0x0076bb00,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b60827,
+	0x6a11f404,
+	0xbbe05bcb,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x076c21f5,
+	0xb90464b6,
+	0x74bd025b,
+/* 0x0a4b: i2c_recv_not_rd08 */
+	0xb0430ef4,
+	0x1bf401d6,
+	0x0057f03d,
+	0x08d421f5,
+	0xc73311f4,
+	0x21f5e0c5,
+	0x11f40879,
+	0x0057f029,
+	0x08d421f5,
+	0xc71f11f4,
+	0x21f5e0b5,
+	0x11f40879,
+	0x6c21f515,
+	0xc774bd07,
+	0x1bf408c5,
+	0x0232f409,
+/* 0x0a8b: i2c_recv_not_wr08 */
+/* 0x0a8b: i2c_recv_done */
+	0xc7030ef4,
+	0x21f5f8ce,
+	0xe0fc0936,
+	0x12f4d0fc,
+	0x027cb90a,
+	0x02e521f5,
+/* 0x0aa0: i2c_recv_exit */
+/* 0x0aa2: i2c_init */
+	0x00f800f8,
+/* 0x0aa4: test_recv */
+	0x05d817f1,
+	0xb60011cf,
+	0x07f10110,
+	0x01d005d8,
+	0xf104bd00,
+	0xf1d900e7,
+	0xf5134fe3,
+	0xf8021721,
+/* 0x0ac5: test_init */
+	0x00e7f100,
+	0x1721f508,
+/* 0x0acf: idle_recv */
 	0xf800f802,
-/* 0x0aa1: test_recv */
-	0xd817f100,
-	0x0011cf05,
-	0xf10110b6,
-	0xd005d807,
-	0x04bd0001,
-	0xd900e7f1,
-	0x134fe3f1,
-	0x021821f5,
-/* 0x0ac2: test_init */
-	0xe7f100f8,
-	0x21f50800,
-	0x00f80218,
-/* 0x0acc: idle_recv */
-/* 0x0ace: idle */
-	0x31f400f8,
-	0xd417f100,
-	0x0011cf05,
-	0xf10110b6,
-	0xd005d407,
-	0x04bd0001,
-/* 0x0ae4: idle_loop */
-	0xf45817f0,
-/* 0x0aea: idle_proc */
-/* 0x0aea: idle_proc_exec */
-	0x10f90232,
-	0xf5021eb9,
-	0xfc02ef21,
-	0x0911f410,
-	0xf40231f4,
-/* 0x0afe: idle_proc_next */
-	0x10b6ef0e,
-	0x061fb858,
-	0xf4e61bf4,
-	0x28f4dd02,
-	0xc10ef400,
-	0x00000000,
+/* 0x0ad1: idle */
+	0x0031f400,
+	0x05d417f1,
+	0xb60011cf,
+	0x07f10110,
+	0x01d005d4,
+/* 0x0ae7: idle_loop */
+	0xf004bd00,
+	0x32f45817,
+/* 0x0aed: idle_proc */
+/* 0x0aed: idle_proc_exec */
+	0xb910f902,
+	0x21f5021e,
+	0x10fc02ee,
+	0xf40911f4,
+	0x0ef40231,
+/* 0x0b01: idle_proc_next */
+	0x5810b6ef,
+	0xf4061fb8,
+	0x02f4e61b,
+	0x0028f4dd,
+	0x00c10ef4,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h
index 8a2b628642ac..3c731ff12871 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h
@@ -24,8 +24,8 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 /* 0x0058: proc_list_head */
 	0x54534f48,
-	0x00000447,
-	0x000003f8,
+	0x0000042c,
+	0x000003df,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -46,8 +46,8 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x584d454d,
-	0x00000621,
-	0x00000613,
+	0x000005f3,
+	0x000005e5,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -68,8 +68,8 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x46524550,
-	0x00000625,
-	0x00000623,
+	0x000005f7,
+	0x000005f5,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -90,8 +90,8 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x5f433249,
-	0x00000a29,
-	0x000008d0,
+	0x000009f8,
+	0x000008a2,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -112,8 +112,8 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x54534554,
-	0x00000a4a,
-	0x00000a2b,
+	0x00000a16,
+	0x000009fa,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -134,8 +134,8 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x454c4449,
-	0x00000a55,
-	0x00000a53,
+	0x00000a21,
+	0x00000a1f,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -229,26 +229,26 @@ uint32_t gk208_pmu_data[] = {
 /* 0x0370: memx_func_head */
 	0x00000001,
 	0x00000000,
-	0x00000477,
+	0x0000045c,
 /* 0x037c: memx_func_next */
 	0x00000002,
 	0x00000000,
-	0x000004f4,
+	0x000004cf,
 	0x00000003,
 	0x00000002,
-	0x00000574,
+	0x00000546,
 	0x00040004,
 	0x00000000,
-	0x00000591,
+	0x00000563,
 	0x00010005,
 	0x00000000,
-	0x000005ab,
+	0x0000057d,
 	0x00010006,
 	0x00000000,
-	0x0000056f,
+	0x00000541,
 	0x00000007,
 	0x00000000,
-	0x000005b7,
+	0x00000589,
 /* 0x03c4: memx_func_tail */
 /* 0x03c4: memx_ts_start */
 	0x00000000,
@@ -916,784 +916,771 @@ uint32_t gk208_pmu_data[] = {
 };
 
 uint32_t gk208_pmu_code[] = {
-	0x03100ef5,
+	0x02f90ef5,
 /* 0x0004: rd32 */
 	0xf607a040,
 	0x04bd000e,
-	0xd3f0010d,
+	0x0100018d,
+	0xf607ac40,
+	0x04bd000d,
+/* 0x0018: rd32_wait */
+	0xcf07ac4d,
+	0xd4f100dd,
+	0x1bf47000,
+	0x07a44df6,
+	0xf800ddcf,
+/* 0x002d: wr32 */
+	0x07a04000,
+	0xbd000ef6,
+	0x07a44004,
+	0xbd000df6,
+	0x00f28d04,
 	0x07ac4001,
 	0xbd000df6,
-/* 0x0019: rd32_wait */
+/* 0x0049: wr32_wait */
 	0x07ac4d04,
 	0xf100ddcf,
 	0xf47000d4,
-	0xa44df61b,
-	0x00ddcf07,
-/* 0x002e: wr32 */
-	0xa04000f8,
-	0x000ef607,
-	0xa44004bd,
-	0x000df607,
-	0x020d04bd,
-	0xf0f0d5f0,
-	0xac4001d3,
-	0x000df607,
-/* 0x004e: wr32_wait */
-	0xac4d04bd,
-	0x00ddcf07,
-	0x7000d4f1,
-	0xf8f61bf4,
-/* 0x005d: nsec */
-	0xf990f900,
-	0xcf2c0880,
-/* 0x0066: nsec_loop */
-	0x2c090088,
-	0xbb0099cf,
-	0x9ea60298,
-	0xfcf61ef4,
-	0xf890fc80,
-/* 0x0079: wait */
-	0xf990f900,
-	0xcf2c0880,
-/* 0x0082: wait_loop */
-	0xeeb20088,
-	0x0000047e,
-	0xadfddab2,
-	0xf4aca604,
-	0x2c09100b,
-	0xbb0099cf,
-	0x9ba60298,
-/* 0x009f: wait_done */
-	0xfce61ef4,
-	0xf890fc80,
-/* 0x00a5: intr_watchdog */
-	0x03e99800,
+	0x00f8f61b,
+/* 0x0058: nsec */
+	0x80f990f9,
+	0x88cf2c08,
+/* 0x0061: nsec_loop */
+	0xcf2c0900,
+	0x98bb0099,
+	0xf49ea602,
+	0x80fcf61e,
+	0x00f890fc,
+/* 0x0074: wait */
+	0x80f990f9,
+	0x88cf2c08,
+/* 0x007d: wait_loop */
+	0x7eeeb200,
+	0xb2000004,
+	0x04adfdda,
+	0x0bf4aca6,
+	0xcf2c0910,
+	0x98bb0099,
+	0xf49ba602,
+/* 0x009a: wait_done */
+	0x80fce61e,
+	0x00f890fc,
+/* 0x00a0: intr_watchdog */
+	0xb003e998,
+	0x0bf40096,
+	0x9a0a9828,
+	0xf4029abb,
+	0x010d0e1c,
+	0x00023e7e,
+	0x0ef494bd,
+/* 0x00bd: intr_watchdog_next_time */
+	0x9b0a9814,
+	0xf400a6b0,
+	0x9aa6080b,
+/* 0x00cb: intr_watchdog_next_time_set */
+	0xb5061cf4,
+/* 0x00ce: intr_watchdog_next_proc */
+	0xe9b59b09,
+	0x58e0b603,
+	0x0268e6b1,
+	0xf8c81bf4,
+/* 0x00dd: intr */
+	0xbd00f900,
+	0xf980f904,
+	0xf9a0f990,
+	0xf9c0f9b0,
+	0xf9e0f9d0,
+	0xfe000ff0,
+	0x80f90188,
+	0xcf045048,
+	0x80b60088,
+	0x04504001,
+	0xbd0008f6,
+	0xcf080804,
+	0x89c40088,
+	0x1f0bf402,
+	0x0e9b00b5,
+	0x00a07e58,
+	0x9b099800,
 	0xf40096b0,
-	0x0a98280b,
-	0x029abb9a,
-	0x0d0e1cf4,
-	0x02557e01,
-	0xf494bd00,
-/* 0x00c2: intr_watchdog_next_time */
-	0x0a98140e,
-	0x00a6b09b,
-	0xa6080bf4,
-	0x061cf49a,
-/* 0x00d0: intr_watchdog_next_time_set */
-/* 0x00d3: intr_watchdog_next_proc */
-	0xb59b09b5,
-	0xe0b603e9,
-	0x68e6b158,
-	0xc81bf402,
-/* 0x00e2: intr */
-	0x00f900f8,
-	0x80f904bd,
-	0xa0f990f9,
-	0xc0f9b0f9,
-	0xe0f9d0f9,
-	0x000ff0f9,
-	0xf90188fe,
-	0x04504880,
-	0xb60088cf,
-	0x50400180,
-	0x0008f604,
-	0x080804bd,
-	0xc40088cf,
-	0x0bf40289,
-	0x9b00b51f,
-	0xa57e580e,
-	0x09980000,
-	0x0096b09b,
-	0x000d0bf4,
-	0x0009f634,
-	0x09b504bd,
-/* 0x0135: intr_skip_watchdog */
-	0x0089e49a,
-	0x360bf408,
-	0xcf068849,
-	0x9ac40099,
-	0x220bf402,
-	0xcf04c04c,
-	0xc0f900cc,
-	0xf14f484e,
-	0x0d5453e3,
-	0x02b67e00,
-	0x40c0fc00,
-	0x0cf604c0,
-/* 0x0167: intr_subintr_skip_fifo */
-	0x4004bd00,
-	0x09f60688,
-/* 0x016f: intr_skip_subintr */
-	0x4904bd00,
-	0x90bd00e0,
-	0x000489fd,
-	0x0008f604,
-	0x80fc04bd,
-	0xfc0088fe,
-	0xfce0fcf0,
-	0xfcc0fcd0,
-	0xfca0fcb0,
-	0xfc80fc90,
-	0x0032f400,
-/* 0x019a: ticks_from_ns */
-	0xc0f901f8,
-	0xd7f1b0f9,
-	0xd3f00144,
-	0x6b21f500,
-	0xe8ccec03,
-	0x00b4b003,
-	0xec120bf4,
-	0xf103e8ee,
-	0xf00144d7,
-	0x21f500d3,
-/* 0x01c2: ticks_from_ns_quit */
-	0xceb2036b,
+	0x34000d0b,
+	0xbd0009f6,
+	0x9a09b504,
+/* 0x0130: intr_skip_watchdog */
+	0x080089e4,
+	0x49340bf4,
+	0x99cf0688,
+	0x029ac400,
+	0x4c200bf4,
+	0xcccf04c0,
+	0xdec0f900,
+	0x54534f48,
+	0x9f7e000d,
+	0xc0fc0002,
+	0xf604c040,
+	0x04bd000c,
+/* 0x0160: intr_subintr_skip_fifo */
+	0xf6068840,
+	0x04bd0009,
+/* 0x0168: intr_skip_subintr */
+	0xbd00e049,
+	0x0489fd90,
+	0x08f60400,
+	0xfc04bd00,
+	0x0088fe80,
+	0xe0fcf0fc,
+	0xc0fcd0fc,
+	0xa0fcb0fc,
+	0x80fc90fc,
+	0x32f400fc,
+/* 0x0193: ticks_from_ns */
+	0xf901f800,
+	0x4db0f9c0,
+	0x527e0144,
+	0xccec0003,
+	0xb4b003e8,
+	0x0e0bf400,
+	0x03e8eeec,
+	0x7e01444d,
+/* 0x01b3: ticks_from_ns_quit */
+	0xb2000352,
+	0xfcb0fcce,
+/* 0x01bb: ticks_from_us */
+	0xf900f8c0,
+	0x4db0f9c0,
+	0x527e0144,
+	0xceb20003,
+	0xf400b4b0,
+	0xe4bd050b,
+/* 0x01d0: ticks_from_us_quit */
 	0xc0fcb0fc,
-/* 0x01ca: ticks_from_us */
-	0xc0f900f8,
-	0xd7f1b0f9,
-	0xd3f00144,
-	0x6b21f500,
-	0xb0ceb203,
-	0x0bf400b4,
-/* 0x01e3: ticks_from_us_quit */
-	0xfce4bd05,
-	0xf8c0fcb0,
-/* 0x01e9: ticks_to_us */
-	0x44d7f100,
-	0x00d3f001,
-	0xf8ecedff,
-/* 0x01f5: timer */
-	0xf990f900,
-	0x1032f480,
-	0xb003f898,
-	0x1cf40086,
-	0x0084bd4a,
-	0x0008f638,
-	0x340804bd,
-	0x980088cf,
-	0x98bb9a09,
-	0x00e9bb02,
-	0x0803feb5,
-	0x0088cf08,
-	0xf40284f0,
-	0x34081c1b,
-	0xa60088cf,
-	0x080bf4e0,
-	0x1cf4e8a6,
-/* 0x0239: timer_reset */
-	0xf634000d,
-	0x04bd000e,
-/* 0x0243: timer_enable */
-	0x089a0eb5,
-	0xf6380001,
-	0x04bd0008,
-/* 0x024c: timer_done */
-	0xfc1031f4,
+/* 0x01d6: ticks_to_us */
+	0x444d00f8,
+	0xecedff01,
+/* 0x01de: timer */
+	0x90f900f8,
+	0x32f480f9,
+	0x03f89810,
+	0xf40086b0,
+	0x84bd4a1c,
+	0x08f63800,
+	0x0804bd00,
+	0x0088cf34,
+	0xbb9a0998,
+	0xe9bb0298,
+	0x03feb500,
+	0x88cf0808,
+	0x0284f000,
+	0x081c1bf4,
+	0x0088cf34,
+	0x0bf4e0a6,
+	0xf4e8a608,
+/* 0x0222: timer_reset */
+	0x34000d1c,
+	0xbd000ef6,
+	0x9a0eb504,
+/* 0x022c: timer_enable */
+	0x38000108,
+	0xbd0008f6,
+/* 0x0235: timer_done */
+	0x1031f404,
+	0x90fc80fc,
+/* 0x023e: send_proc */
+	0x80f900f8,
+	0xe89890f9,
+	0x04e99805,
+	0xa60486f0,
+	0x2a0bf489,
+	0x940398c4,
+	0x80b60488,
+	0x008ebb18,
+	0xb500fa98,
+	0x8db5008a,
+	0x028cb501,
+	0xb6038bb5,
+	0x94f00190,
+	0x04e9b507,
+/* 0x0277: send_done */
+	0xfc0231f4,
+	0xf880fc90,
+/* 0x027d: find */
+	0x0880f900,
+	0x0131f458,
+/* 0x0284: find_loop */
+	0xa6008a98,
+	0x100bf4ae,
+	0xb15880b6,
+	0xf4026886,
+	0x32f4f11b,
+/* 0x0299: find_done */
+	0xfc8eb201,
+/* 0x029f: send */
+	0x7e00f880,
+	0xf400027d,
+	0x00f89b01,
+/* 0x02a8: recv */
+	0x80f990f9,
+	0x9805e898,
+	0x32f404e9,
+	0xf489a601,
+	0x89c43c0b,
+	0x0180b603,
+	0xb50784f0,
+	0xea9805e8,
+	0xfef0f902,
+	0xf0f9018f,
+	0x9994efb2,
+	0x00e9bb04,
+	0x9818e0b6,
+	0xec9803eb,
+	0x01ed9802,
+	0xf900ee98,
+	0xfef0fca5,
+	0x31f400f8,
+/* 0x02f3: recv_done */
+	0xfcf0fc01,
 	0xf890fc80,
-/* 0x0255: send_proc */
-	0xf980f900,
-	0x05e89890,
-	0xf004e998,
-	0x89a60486,
-	0xc42a0bf4,
-	0x88940398,
-	0x1880b604,
-	0x98008ebb,
-	0x8ab500fa,
-	0x018db500,
-	0xb5028cb5,
-	0x90b6038b,
-	0x0794f001,
-	0xf404e9b5,
-/* 0x028e: send_done */
-	0x90fc0231,
-	0x00f880fc,
-/* 0x0294: find */
-	0x580880f9,
-/* 0x029b: find_loop */
-	0x980131f4,
-	0xaea6008a,
-	0xb6100bf4,
-	0x86b15880,
-	0x1bf40268,
-	0x0132f4f1,
-/* 0x02b0: find_done */
-	0x80fc8eb2,
-/* 0x02b6: send */
-	0x947e00f8,
-	0x01f40002,
-/* 0x02bf: recv */
-	0xf900f89b,
-	0x9880f990,
-	0xe99805e8,
-	0x0132f404,
-	0x0bf489a6,
-	0x0389c43c,
-	0xf00180b6,
-	0xe8b50784,
-	0x02ea9805,
-	0x8ffef0f9,
-	0xb2f0f901,
-	0x049994ef,
-	0xb600e9bb,
-	0xeb9818e0,
-	0x02ec9803,
-	0x9801ed98,
-	0xa5f900ee,
-	0xf8fef0fc,
-	0x0131f400,
-/* 0x030a: recv_done */
-	0x80fcf0fc,
-	0x00f890fc,
-/* 0x0310: init */
-	0xcf010841,
-	0x11e70011,
-	0x14b60109,
-	0x0014fe08,
-	0xf000e041,
-	0x1c000013,
-	0xbd0001f6,
-	0x00ff0104,
-	0x0001f614,
-	0x020104bd,
-	0x080015f1,
-	0x01f61000,
-	0x4104bd00,
-	0x13f000e2,
-	0x0010fe00,
-	0x011031f4,
-	0xf6380001,
+/* 0x02f9: init */
+	0x01084100,
+	0xe70011cf,
+	0xb6010911,
+	0x14fe0814,
+	0x00e04100,
+	0x01f61c00,
+	0x0104bd00,
+	0xf61400ff,
 	0x04bd0001,
-/* 0x035a: init_proc */
-	0xf198580f,
-	0x0016b001,
-	0xf9fa0bf4,
-	0x58f0b615,
-/* 0x036b: mulu32_32_64 */
-	0xf9f20ef4,
-	0xf920f910,
-	0x9540f930,
-	0xd29510e1,
-	0xbdc4bd10,
-	0xc0edffb4,
-	0xb2301dff,
+	0x15f10201,
+	0x10000800,
+	0xbd0001f6,
+	0x00dd4104,
+	0xffff14f1,
+	0xf40010fe,
+	0x01011031,
+	0x01f63800,
+	0x0f04bd00,
+/* 0x0341: init_proc */
+	0x01f19858,
+	0xf40016b0,
+	0x15f9fa0b,
+	0xf458f0b6,
+/* 0x0352: mulu32_32_64 */
+	0x10f9f20e,
+	0x30f920f9,
+	0xe19540f9,
+	0x10d29510,
+	0xb4bdc4bd,
+	0xffc0edff,
+	0x34b2301d,
+	0xffff34f1,
+	0xb61034b6,
+	0xc3bb1045,
+	0x01b4bb00,
+	0xb230e2ff,
 	0xff34f134,
 	0x1034b6ff,
 	0xbb1045b6,
 	0xb4bb00c3,
-	0x30e2ff01,
-	0x34f134b2,
-	0x34b6ffff,
-	0x1045b610,
-	0xbb00c3bb,
-	0x12ff01b4,
-	0x00b3bb30,
-	0x30fc40fc,
-	0x10fc20fc,
-/* 0x03ba: host_send */
-	0xb04100f8,
-	0x0011cf04,
-	0xcf04a042,
-	0x12a60022,
-	0xc42e0bf4,
-	0xee94071e,
-	0x70e0b704,
-	0x03eb9802,
-	0x9802ec98,
-	0xee9801ed,
-	0x02b67e00,
-	0x0110b600,
-	0x400f1ec4,
-	0x0ef604b0,
-	0xf404bd00,
-/* 0x03f6: host_send_done */
-	0x00f8c70e,
-/* 0x03f8: host_recv */
-	0xf14e4941,
-	0xa6525413,
-	0xb90bf4e1,
-/* 0x0404: host_recv_wait */
-	0xcf04cc41,
-	0xc8420011,
-	0x0022cf04,
-	0xa60816f0,
-	0xef0bf412,
-	0xb60723c4,
-	0x30b70434,
-	0x3bb502f0,
-	0x023cb503,
-	0xb5013db5,
-	0x20b6003e,
-	0x0f24f001,
-	0xf604c840,
-	0x04bd0002,
-	0x00004002,
+	0x3012ff01,
+	0xfc00b3bb,
+	0xfc30fc40,
+	0xf810fc20,
+/* 0x03a1: host_send */
+	0x04b04100,
+	0x420011cf,
+	0x22cf04a0,
+	0xf412a600,
+	0x1ec42e0b,
+	0x04ee9407,
+	0x0270e0b7,
+	0x9803eb98,
+	0xed9802ec,
+	0x00ee9801,
+	0x00029f7e,
+	0xc40110b6,
+	0xb0400f1e,
+	0x000ef604,
+	0x0ef404bd,
+/* 0x03dd: host_send_done */
+/* 0x03df: host_recv */
+	0xd100f8c7,
+	0x52544e49,
+	0x0bf4e1a6,
+/* 0x03e9: host_recv_wait */
+	0x04cc41bb,
+	0x420011cf,
+	0x22cf04c8,
+	0x0816f000,
+	0x0bf412a6,
+	0x0723c4ef,
+	0xb70434b6,
+	0xb502f030,
+	0x3cb5033b,
+	0x013db502,
+	0xb6003eb5,
+	0x24f00120,
+	0x04c8400f,
 	0xbd0002f6,
-/* 0x0447: host_init */
-	0x4100f804,
-	0x14b60080,
-	0x7015f110,
-	0x04d04002,
-	0xbd0001f6,
-	0x00804104,
-	0xf11014b6,
-	0x4002f015,
-	0x01f604dc,
-	0x0104bd00,
-	0x04c44001,
-	0xbd0001f6,
-/* 0x0477: memx_func_enter */
-	0xf100f804,
-	0xf1162067,
-	0xf1f55d77,
-	0xb2ffff73,
-	0x00047e6e,
-	0xfdd8b200,
-	0x60f90487,
-	0xd0fc80f9,
-	0x2e7ee0fc,
-	0x77f10000,
-	0x73f1fffe,
-	0x6eb2ffff,
-	0x0000047e,
-	0x87fdd8b2,
-	0xf960f904,
-	0xfcd0fc80,
-	0x002e7ee0,
-	0xf067f100,
-	0x7e6eb226,
+	0x00400204,
+	0x0002f600,
+	0x00f804bd,
+/* 0x042c: host_init */
+	0xb6008041,
+	0x15f11014,
+	0xd0400270,
+	0x0001f604,
+	0x804104bd,
+	0x1014b600,
+	0x02f015f1,
+	0xf604dc40,
+	0x04bd0001,
+	0xc4400101,
+	0x0001f604,
+	0x00f804bd,
+/* 0x045c: memx_func_enter */
+	0x162067f1,
+	0xf55d77f1,
+	0x047e6eb2,
+	0xd8b20000,
+	0xf90487fd,
+	0xfc80f960,
+	0x7ee0fcd0,
+	0x0700002d,
+	0x7e6eb2fe,
 	0xb2000004,
 	0x0487fdd8,
 	0x80f960f9,
 	0xe0fcd0fc,
-	0x00002e7e,
-	0xe0400406,
-	0x0006f607,
-/* 0x04de: memx_func_enter_wait */
-	0xc04604bd,
-	0x0066cf07,
-	0xf40464f0,
-	0x2c06f70b,
-	0xb50066cf,
-	0x00f8f106,
-/* 0x04f4: memx_func_leave */
-	0x66cf2c06,
-	0xf206b500,
-	0xe4400406,
-	0x0006f607,
-/* 0x0506: memx_func_leave_wait */
-	0xc04604bd,
-	0x0066cf07,
-	0xf40464f0,
-	0x67f1f71b,
-	0x77f126f0,
-	0x73f00001,
-	0x7e6eb200,
-	0xb2000004,
-	0x0587fdd8,
-	0x80f960f9,
-	0xe0fcd0fc,
-	0x00002e7e,
-	0x162067f1,
+	0x00002d7e,
+	0x26f067f1,
 	0x047e6eb2,
 	0xd8b20000,
-	0xf90587fd,
+	0xf90487fd,
 	0xfc80f960,
 	0x7ee0fcd0,
-	0xf100002e,
-	0xf00aa277,
-	0x6eb20073,
+	0x0600002d,
+	0x07e04004,
+	0xbd0006f6,
+/* 0x04b9: memx_func_enter_wait */
+	0x07c04604,
+	0xf00066cf,
+	0x0bf40464,
+	0xcf2c06f7,
+	0x06b50066,
+/* 0x04cf: memx_func_leave */
+	0x0600f8f1,
+	0x0066cf2c,
+	0x06f206b5,
+	0x07e44004,
+	0xbd0006f6,
+/* 0x04e1: memx_func_leave_wait */
+	0x07c04604,
+	0xf00066cf,
+	0x1bf40464,
+	0xf067f1f7,
+	0xb2010726,
+	0x00047e6e,
+	0xfdd8b200,
+	0x60f90587,
+	0xd0fc80f9,
+	0x2d7ee0fc,
+	0x67f10000,
+	0x6eb21620,
 	0x0000047e,
 	0x87fdd8b2,
 	0xf960f905,
 	0xfcd0fc80,
-	0x002e7ee0,
-/* 0x056f: memx_func_wait_vblank */
-	0xb600f800,
-	0x00f80410,
-/* 0x0574: memx_func_wr32 */
-	0x98001698,
-	0x10b60115,
-	0xf960f908,
-	0xfcd0fc50,
-	0x002e7ee0,
-	0x0242b600,
-	0xf8e81bf4,
-/* 0x0591: memx_func_wait */
-	0xcf2c0800,
-	0x1e980088,
-	0x011d9800,
-	0x98021c98,
-	0x10b6031b,
-	0x00797e10,
-/* 0x05ab: memx_func_delay */
-	0x9800f800,
-	0x10b6001e,
-	0x005d7e04,
-/* 0x05b7: memx_func_train */
-	0xf800f800,
-/* 0x05b9: memx_exec */
-	0xf9e0f900,
-	0xb2c1b2d0,
-/* 0x05c1: memx_exec_next */
-	0x001398b2,
-	0xe70410b6,
-	0xe701f034,
-	0xb601e033,
-	0x30f00132,
-	0xde35980c,
-	0x12a655f9,
-	0x98e51ef4,
-	0x0c98f10b,
-	0x02cbbbf2,
-	0xcf07c44b,
-	0xd0fc00bb,
-	0xb67ee0fc,
-	0x00f80002,
-/* 0x05f8: memx_info */
-	0xf401c670,
-/* 0x05fe: memx_info_data */
-	0xcc4c0c0b,
-	0x08004b03,
-/* 0x0607: memx_info_train */
-	0x4c090ef4,
-	0x004b0bcc,
-/* 0x060d: memx_info_send */
-	0x02b67e01,
-/* 0x0613: memx_recv */
-	0xb000f800,
-	0x0bf401d6,
-	0x00d6b0a3,
-	0xf8dc0bf4,
-/* 0x0621: memx_init */
-/* 0x0623: perf_recv */
+	0x002d7ee0,
+	0x0aa24700,
+	0x047e6eb2,
+	0xd8b20000,
+	0xf90587fd,
+	0xfc80f960,
+	0x7ee0fcd0,
+	0xf800002d,
+/* 0x0541: memx_func_wait_vblank */
+	0x0410b600,
+/* 0x0546: memx_func_wr32 */
+	0x169800f8,
+	0x01159800,
+	0xf90810b6,
+	0xfc50f960,
+	0x7ee0fcd0,
+	0xb600002d,
+	0x1bf40242,
+/* 0x0563: memx_func_wait */
+	0x0800f8e8,
+	0x0088cf2c,
+	0x98001e98,
+	0x1c98011d,
+	0x031b9802,
+	0x7e1010b6,
+	0xf8000074,
+/* 0x057d: memx_func_delay */
+	0x001e9800,
+	0x7e0410b6,
+	0xf8000058,
+/* 0x0589: memx_func_train */
+/* 0x058b: memx_exec */
+	0xf900f800,
+	0xb2d0f9e0,
+/* 0x0593: memx_exec_next */
+	0x98b2b2c1,
+	0x10b60013,
+	0xf034e704,
+	0xe033e701,
+	0x0132b601,
+	0x980c30f0,
+	0x55f9de35,
+	0x1ef412a6,
+	0xf10b98e5,
+	0xbbf20c98,
+	0xc44b02cb,
+	0x00bbcf07,
+	0xe0fcd0fc,
+	0x00029f7e,
+/* 0x05ca: memx_info */
+	0xc67000f8,
+	0x0c0bf401,
+/* 0x05d0: memx_info_data */
+	0x4b03cc4c,
+	0x0ef40800,
+/* 0x05d9: memx_info_train */
+	0x0bcc4c09,
+/* 0x05df: memx_info_send */
+	0x7e01004b,
+	0xf800029f,
+/* 0x05e5: memx_recv */
+	0x01d6b000,
+	0xb0a30bf4,
+	0x0bf400d6,
+/* 0x05f3: memx_init */
+	0xf800f8dc,
+/* 0x05f5: perf_recv */
+/* 0x05f7: perf_init */
 	0xf800f800,
-/* 0x0625: perf_init */
-/* 0x0627: i2c_drive_scl */
-	0xb000f800,
-	0x0bf40036,
-	0x07e0400d,
-	0xbd0001f6,
-/* 0x0637: i2c_drive_scl_lo */
-	0x4000f804,
-	0x01f607e4,
-	0xf804bd00,
-/* 0x0641: i2c_drive_sda */
+/* 0x05f9: i2c_drive_scl */
 	0x0036b000,
 	0x400d0bf4,
-	0x02f607e0,
+	0x01f607e0,
 	0xf804bd00,
-/* 0x0651: i2c_drive_sda_lo */
+/* 0x0609: i2c_drive_scl_lo */
 	0x07e44000,
+	0xbd0001f6,
+/* 0x0613: i2c_drive_sda */
+	0xb000f804,
+	0x0bf40036,
+	0x07e0400d,
 	0xbd0002f6,
-/* 0x065b: i2c_sense_scl */
-	0xf400f804,
-	0xc4430132,
-	0x0033cf07,
-	0xf40431fd,
-	0x31f4060b,
-/* 0x066d: i2c_sense_scl_done */
-/* 0x066f: i2c_sense_sda */
-	0xf400f801,
-	0xc4430132,
-	0x0033cf07,
-	0xf40432fd,
-	0x31f4060b,
-/* 0x0681: i2c_sense_sda_done */
-/* 0x0683: i2c_raise_scl */
-	0xf900f801,
-	0x08984440,
-	0x277e0103,
-/* 0x068e: i2c_raise_scl_wait */
-	0xe84e0006,
-	0x005d7e03,
-	0x065b7e00,
-	0x0901f400,
-	0xf40142b6,
-/* 0x06a2: i2c_raise_scl_done */
-	0x40fcef1b,
-/* 0x06a6: i2c_start */
-	0x5b7e00f8,
-	0x11f40006,
-	0x066f7e0d,
-	0x0611f400,
-/* 0x06b7: i2c_start_rep */
-	0x032e0ef4,
-	0x06277e00,
-	0x7e010300,
-	0xbb000641,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x0006837e,
-	0xf40464b6,
-/* 0x06e2: i2c_start_send */
-	0x00031d11,
-	0x0006417e,
-	0x7e13884e,
-	0x0300005d,
-	0x06277e00,
+/* 0x0623: i2c_drive_sda_lo */
+	0x4000f804,
+	0x02f607e4,
+	0xf804bd00,
+/* 0x062d: i2c_sense_scl */
+	0x0132f400,
+	0xcf07c443,
+	0x31fd0033,
+	0x060bf404,
+/* 0x063f: i2c_sense_scl_done */
+	0xf80131f4,
+/* 0x0641: i2c_sense_sda */
+	0x0132f400,
+	0xcf07c443,
+	0x32fd0033,
+	0x060bf404,
+/* 0x0653: i2c_sense_sda_done */
+	0xf80131f4,
+/* 0x0655: i2c_raise_scl */
+	0x4440f900,
+	0x01030898,
+	0x0005f97e,
+/* 0x0660: i2c_raise_scl_wait */
+	0x7e03e84e,
+	0x7e000058,
+	0xf400062d,
+	0x42b60901,
+	0xef1bf401,
+/* 0x0674: i2c_raise_scl_done */
+	0x00f840fc,
+/* 0x0678: i2c_start */
+	0x00062d7e,
+	0x7e0d11f4,
+	0xf4000641,
+	0x0ef40611,
+/* 0x0689: i2c_start_rep */
+	0x7e00032e,
+	0x030005f9,
+	0x06137e01,
+	0x0076bb00,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x557e50fc,
+	0x64b60006,
+	0x1d11f404,
+/* 0x06b4: i2c_start_send */
+	0x137e0003,
+	0x884e0006,
+	0x00587e13,
+	0x7e000300,
+	0x4e0005f9,
+	0x587e1388,
+/* 0x06ce: i2c_start_out */
+	0x00f80000,
+/* 0x06d0: i2c_stop */
+	0xf97e0003,
+	0x00030005,
+	0x0006137e,
+	0x7e03e84e,
+	0x03000058,
+	0x05f97e01,
 	0x13884e00,
-	0x00005d7e,
-/* 0x06fc: i2c_start_out */
-/* 0x06fe: i2c_stop */
-	0x000300f8,
-	0x0006277e,
-	0x417e0003,
-	0xe84e0006,
-	0x005d7e03,
-	0x7e010300,
-	0x4e000627,
-	0x5d7e1388,
-	0x01030000,
-	0x0006417e,
-	0x7e13884e,
-	0xf800005d,
-/* 0x072d: i2c_bitw */
-	0x06417e00,
-	0x03e84e00,
-	0x00005d7e,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x06837e50,
-	0x0464b600,
-	0x4e1711f4,
-	0x5d7e1388,
-	0x00030000,
-	0x0006277e,
-	0x7e13884e,
-/* 0x076b: i2c_bitw_out */
-	0xf800005d,
-/* 0x076d: i2c_bitr */
-	0x7e010300,
-	0x4e000641,
-	0x5d7e03e8,
+	0x0000587e,
+	0x137e0103,
+	0x884e0006,
+	0x00587e13,
+/* 0x06ff: i2c_bitw */
+	0x7e00f800,
+	0x4e000613,
+	0x587e03e8,
 	0x76bb0000,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0x7e50fc04,
-	0xb6000683,
+	0xb6000655,
 	0x11f40464,
-	0x066f7e1a,
-	0x7e000300,
-	0x4e000627,
-	0x5d7e1388,
-	0x3cf00000,
-	0x0131f401,
-/* 0x07b0: i2c_bitr_done */
-/* 0x07b2: i2c_get_byte */
-	0x000500f8,
-/* 0x07b6: i2c_get_byte_next */
-	0x54b60804,
-	0x0076bb01,
-	0xf90465b6,
-	0x04659450,
-	0xbd0256bb,
-	0x0475fd50,
-	0x6d7e50fc,
-	0x64b60007,
-	0x2a11f404,
-	0xb60553fd,
-	0x1bf40142,
-	0xbb0103d8,
+	0x13884e17,
+	0x0000587e,
+	0xf97e0003,
+	0x884e0005,
+	0x00587e13,
+/* 0x073d: i2c_bitw_out */
+/* 0x073f: i2c_bitr */
+	0x0300f800,
+	0x06137e01,
+	0x03e84e00,
+	0x0000587e,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0x06557e50,
+	0x0464b600,
+	0x7e1a11f4,
+	0x03000641,
+	0x05f97e00,
+	0x13884e00,
+	0x0000587e,
+	0xf4013cf0,
+/* 0x0782: i2c_bitr_done */
+	0x00f80131,
+/* 0x0784: i2c_get_byte */
+	0x08040005,
+/* 0x0788: i2c_get_byte_next */
+	0xbb0154b6,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x00072d7e,
-/* 0x07ff: i2c_get_byte_done */
-	0xf80464b6,
-/* 0x0801: i2c_put_byte */
-/* 0x0803: i2c_put_byte_next */
-	0xb6080400,
-	0x54ff0142,
-	0x0076bb38,
+	0x00073f7e,
+	0xf40464b6,
+	0x53fd2a11,
+	0x0142b605,
+	0x03d81bf4,
+	0x0076bb01,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
-	0x2d7e50fc,
-	0x64b60007,
-	0x3411f404,
-	0xf40046b0,
-	0x76bbd81b,
-	0x0465b600,
-	0x659450f9,
-	0x0256bb04,
-	0x75fd50bd,
-	0x7e50fc04,
-	0xb600076d,
-	0x11f40464,
-	0x0076bb0f,
-	0xf40136b0,
-	0x32f4061b,
-/* 0x0859: i2c_put_byte_done */
-/* 0x085b: i2c_addr */
-	0xbb00f801,
+	0xff7e50fc,
+	0x64b60006,
+/* 0x07d1: i2c_get_byte_done */
+/* 0x07d3: i2c_put_byte */
+	0x0400f804,
+/* 0x07d5: i2c_put_byte_next */
+	0x0142b608,
+	0xbb3854ff,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x0006a67e,
+	0x0006ff7e,
 	0xf40464b6,
-	0xc3e72911,
-	0x34b6012e,
-	0x0553fd01,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x08017e50,
-	0x0464b600,
-/* 0x08a0: i2c_addr_done */
-/* 0x08a2: i2c_acquire_addr */
-	0xcec700f8,
-	0x05e4b6f8,
-	0xd014e0b7,
-/* 0x08ae: i2c_acquire */
-	0xa27e00f8,
-	0x047e0008,
-	0xd9f00000,
-	0x002e7e03,
-/* 0x08bf: i2c_release */
-	0x7e00f800,
-	0x7e0008a2,
-	0xf0000004,
-	0x2e7e03da,
-	0x00f80000,
-/* 0x08d0: i2c_recv */
-	0xc70132f4,
-	0x14b6f8c1,
-	0x2816b002,
-	0x01371ff5,
-	0x0cf413b8,
-	0x00329800,
-	0x0ccc13b8,
-	0x00319800,
-	0xf90231f4,
-	0xf9e0f9d0,
-	0x0067f1d0,
-	0x0063f100,
-	0x01679210,
+	0x46b03411,
+	0xd81bf400,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x08ae7e50,
+	0x073f7e50,
 	0x0464b600,
-	0xd6b0d0fc,
-	0xb01bf500,
-	0xbb000500,
+	0xbb0f11f4,
+	0x36b00076,
+	0x061bf401,
+/* 0x082b: i2c_put_byte_done */
+	0xf80132f4,
+/* 0x082d: i2c_addr */
+	0x0076bb00,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x787e50fc,
+	0x64b60006,
+	0x2911f404,
+	0x012ec3e7,
+	0xfd0134b6,
+	0x76bb0553,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0x7e50fc04,
+	0xb60007d3,
+/* 0x0872: i2c_addr_done */
+	0x00f80464,
+/* 0x0874: i2c_acquire_addr */
+	0xb6f8cec7,
+	0xe0b705e4,
+	0x00f8d014,
+/* 0x0880: i2c_acquire */
+	0x0008747e,
+	0x0000047e,
+	0x7e03d9f0,
+	0xf800002d,
+/* 0x0891: i2c_release */
+	0x08747e00,
+	0x00047e00,
+	0x03daf000,
+	0x00002d7e,
+/* 0x08a2: i2c_recv */
+	0x32f400f8,
+	0xf8c1c701,
+	0xb00214b6,
+	0x1ff52816,
+	0x13b80134,
+	0x98000cf4,
+	0x13b80032,
+	0x98000ccc,
+	0x31f40031,
+	0xf9d0f902,
+	0xd6d0f9e0,
+	0x10000000,
+	0xbb016792,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x00085b7e,
-	0xf50464b6,
-	0xc700cc11,
-	0x76bbe0c5,
+	0x0008807e,
+	0xfc0464b6,
+	0x00d6b0d0,
+	0x00b01bf5,
+	0x76bb0005,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0x7e50fc04,
-	0xb6000801,
+	0xb600082d,
 	0x11f50464,
-	0x010500a9,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x085b7e50,
-	0x0464b600,
-	0x008711f5,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x07b27e50,
-	0x0464b600,
-	0xcb6711f4,
-	0x76bbe05b,
-	0x0465b600,
-	0x659450f9,
-	0x0256bb04,
-	0x75fd50bd,
-	0x7e50fc04,
-	0xb60006fe,
-	0x5bb20464,
-	0x0ef474bd,
-/* 0x09d5: i2c_recv_not_rd08 */
-	0x01d6b041,
-	0x053b1bf4,
-	0x085b7e00,
-	0x3211f400,
-	0x7ee0c5c7,
-	0xf4000801,
-	0x00052811,
-	0x00085b7e,
-	0xc71f11f4,
-	0x017ee0b5,
-	0x11f40008,
-	0x06fe7e15,
-	0xc774bd00,
-	0x1bf408c5,
-	0x0232f409,
-/* 0x0a13: i2c_recv_not_wr08 */
-/* 0x0a13: i2c_recv_done */
-	0xc7030ef4,
-	0xbf7ef8ce,
-	0xe0fc0008,
-	0x12f4d0fc,
-	0x7e7cb209,
-/* 0x0a27: i2c_recv_exit */
-	0xf80002b6,
-/* 0x0a29: i2c_init */
-/* 0x0a2b: test_recv */
-	0x4100f800,
-	0x11cf0458,
-	0x0110b600,
-	0xf6045840,
-	0x04bd0001,
-	0xd900e7f1,
-	0x134fe3f1,
-	0x0001f57e,
-/* 0x0a4a: test_init */
+	0xc5c700cc,
+	0x0076bbe0,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0xd37e50fc,
+	0x64b60007,
+	0xa911f504,
+	0xbb010500,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x00082d7e,
+	0xf50464b6,
+	0xbb008711,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x0007847e,
+	0xf40464b6,
+	0x5bcb6711,
+	0x0076bbe0,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0xd07e50fc,
+	0x64b60006,
+	0xbd5bb204,
+	0x410ef474,
+/* 0x09a4: i2c_recv_not_rd08 */
+	0xf401d6b0,
+	0x00053b1b,
+	0x00082d7e,
+	0xc73211f4,
+	0xd37ee0c5,
+	0x11f40007,
+	0x7e000528,
+	0xf400082d,
+	0xb5c71f11,
+	0x07d37ee0,
+	0x1511f400,
+	0x0006d07e,
+	0xc5c774bd,
+	0x091bf408,
+	0xf40232f4,
+/* 0x09e2: i2c_recv_not_wr08 */
+/* 0x09e2: i2c_recv_done */
+	0xcec7030e,
+	0x08917ef8,
+	0xfce0fc00,
+	0x0912f4d0,
+	0x9f7e7cb2,
+/* 0x09f6: i2c_recv_exit */
+	0x00f80002,
+/* 0x09f8: i2c_init */
+/* 0x09fa: test_recv */
+	0x584100f8,
+	0x0011cf04,
+	0x400110b6,
+	0x01f60458,
+	0xde04bd00,
+	0x134fd900,
+	0x0001de7e,
+/* 0x0a16: test_init */
 	0x004e00f8,
-	0x01f57e08,
-/* 0x0a53: idle_recv */
+	0x01de7e08,
+/* 0x0a1f: idle_recv */
 	0xf800f800,
-/* 0x0a55: idle */
+/* 0x0a21: idle */
 	0x0031f400,
 	0xcf045441,
 	0x10b60011,
 	0x04544001,
 	0xbd0001f6,
-/* 0x0a69: idle_loop */
+/* 0x0a35: idle_loop */
 	0xf4580104,
-/* 0x0a6e: idle_proc */
-/* 0x0a6e: idle_proc_exec */
+/* 0x0a3a: idle_proc */
+/* 0x0a3a: idle_proc_exec */
 	0x10f90232,
-	0xbf7e1eb2,
+	0xa87e1eb2,
 	0x10fc0002,
 	0xf40911f4,
 	0x0ef40231,
-/* 0x0a81: idle_proc_next */
+/* 0x0a4d: idle_proc_next */
 	0x5810b6f0,
 	0x1bf41fa6,
 	0xe002f4e8,
@@ -1726,4 +1713,17 @@ uint32_t gk208_pmu_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h
index 516569270bac..e83341815ec6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h
@@ -24,8 +24,8 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 /* 0x0058: proc_list_head */
 	0x54534f48,
-	0x00000507,
-	0x000004a4,
+	0x0000050a,
+	0x000004a7,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -46,8 +46,8 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x584d454d,
-	0x00000837,
-	0x00000829,
+	0x0000083a,
+	0x0000082c,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -68,8 +68,8 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x46524550,
-	0x0000083b,
-	0x00000839,
+	0x0000083e,
+	0x0000083c,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -90,8 +90,8 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x5f433249,
-	0x00000c6b,
-	0x00000b0e,
+	0x00000c6e,
+	0x00000b11,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -112,8 +112,8 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x54534554,
-	0x00000c94,
-	0x00000c6d,
+	0x00000c97,
+	0x00000c70,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -134,8 +134,8 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 	0x00000000,
 	0x454c4449,
-	0x00000ca0,
-	0x00000c9e,
+	0x00000ca3,
+	0x00000ca1,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -229,26 +229,26 @@ uint32_t gt215_pmu_data[] = {
 /* 0x0370: memx_func_head */
 	0x00000001,
 	0x00000000,
-	0x00000546,
+	0x00000549,
 /* 0x037c: memx_func_next */
 	0x00000002,
 	0x00000000,
-	0x0000059d,
+	0x000005a0,
 	0x00000003,
 	0x00000002,
-	0x0000062f,
+	0x00000632,
 	0x00040004,
 	0x00000000,
-	0x0000064b,
+	0x0000064e,
 	0x00010005,
 	0x00000000,
-	0x00000668,
+	0x0000066b,
 	0x00010006,
 	0x00000000,
-	0x000005ed,
+	0x000005f0,
 	0x00000007,
 	0x00000000,
-	0x00000673,
+	0x00000676,
 /* 0x03c4: memx_func_tail */
 /* 0x03c4: memx_ts_start */
 	0x00000000,
@@ -917,947 +917,947 @@ uint32_t gt215_pmu_data[] = {
 };
 
 uint32_t gt215_pmu_code[] = {
-	0x03930ef5,
+	0x03920ef5,
 /* 0x0004: rd32 */
 	0x07a007f1,
 	0xd00604b6,
 	0x04bd000e,
-	0xf001d7f0,
-	0x07f101d3,
-	0x04b607ac,
-	0x000dd006,
-/* 0x0022: rd32_wait */
-	0xd7f104bd,
-	0xd4b607ac,
-	0x00ddcf06,
-	0x7000d4f1,
-	0xf1f21bf4,
-	0xb607a4d7,
-	0xddcf06d4,
-/* 0x003f: wr32 */
-	0xf100f800,
-	0xb607a007,
-	0x0ed00604,
-	0xf104bd00,
-	0xb607a407,
+	0x0001d7f1,
+	0xf101d3f0,
+	0xb607ac07,
 	0x0dd00604,
-	0xf004bd00,
-	0xd5f002d7,
-	0x01d3f0f0,
-	0x07ac07f1,
+/* 0x0023: rd32_wait */
+	0xf104bd00,
+	0xb607acd7,
+	0xddcf06d4,
+	0x00d4f100,
+	0xf21bf470,
+	0x07a4d7f1,
+	0xcf06d4b6,
+	0x00f800dd,
+/* 0x0040: wr32 */
+	0x07a007f1,
+	0xd00604b6,
+	0x04bd000e,
+	0x07a407f1,
 	0xd00604b6,
 	0x04bd000d,
-/* 0x006c: wr32_wait */
-	0x07acd7f1,
-	0xcf06d4b6,
-	0xd4f100dd,
-	0x1bf47000,
-/* 0x007f: nsec */
-	0xf900f8f2,
-	0xf080f990,
-	0x84b62c87,
-	0x0088cf06,
-/* 0x008c: nsec_loop */
-	0xb62c97f0,
-	0x99cf0694,
-	0x0298bb00,
-	0xf4069eb8,
-	0x80fcf11e,
-	0x00f890fc,
-/* 0x00a4: wait */
-	0x80f990f9,
-	0xb62c87f0,
-	0x88cf0684,
-/* 0x00b1: wait_loop */
-	0x02eeb900,
-	0xb90421f4,
-	0xadfd02da,
-	0x06acb804,
-	0xf0150bf4,
+	0x00f2d7f1,
+	0xf101d3f0,
+	0xb607ac07,
+	0x0dd00604,
+/* 0x006b: wr32_wait */
+	0xf104bd00,
+	0xb607acd7,
+	0xddcf06d4,
+	0x00d4f100,
+	0xf21bf470,
+/* 0x007e: nsec */
+	0x90f900f8,
+	0x87f080f9,
+	0x0684b62c,
+/* 0x008b: nsec_loop */
+	0xf00088cf,
 	0x94b62c97,
 	0x0099cf06,
 	0xb80298bb,
-	0x1ef4069b,
-/* 0x00d5: wait_done */
-	0xfc80fcdf,
-/* 0x00db: intr_watchdog */
-	0x9800f890,
-	0x96b003e9,
-	0x2a0bf400,
-	0xbb9a0a98,
-	0x1cf4029a,
-	0x01d7f00f,
-	0x02d221f5,
-	0x0ef494bd,
-/* 0x00f9: intr_watchdog_next_time */
-	0x9b0a9815,
-	0xf400a6b0,
-	0x9ab8090b,
-	0x061cf406,
-/* 0x0108: intr_watchdog_next_time_set */
-/* 0x010b: intr_watchdog_next_proc */
-	0x809b0980,
-	0xe0b603e9,
-	0x68e6b158,
-	0xc61bf402,
-/* 0x011a: intr */
-	0x00f900f8,
-	0x80f904bd,
-	0xa0f990f9,
-	0xc0f9b0f9,
-	0xe0f9d0f9,
-	0xf7f0f0f9,
-	0x0188fe00,
-	0x87f180f9,
-	0x84b605d0,
+	0x1ef4069e,
+	0xfc80fcf1,
+/* 0x00a3: wait */
+	0xf900f890,
+	0xf080f990,
+	0x84b62c87,
 	0x0088cf06,
-	0xf10180b6,
-	0xb605d007,
+/* 0x00b0: wait_loop */
+	0xf402eeb9,
+	0xdab90421,
+	0x04adfd02,
+	0xf406acb8,
+	0x97f0150b,
+	0x0694b62c,
+	0xbb0099cf,
+	0x9bb80298,
+	0xdf1ef406,
+/* 0x00d4: wait_done */
+	0x90fc80fc,
+/* 0x00da: intr_watchdog */
+	0xe99800f8,
+	0x0096b003,
+	0x982a0bf4,
+	0x9abb9a0a,
+	0x0f1cf402,
+	0xf501d7f0,
+	0xbd02d121,
+	0x150ef494,
+/* 0x00f8: intr_watchdog_next_time */
+	0xb09b0a98,
+	0x0bf400a6,
+	0x069ab809,
+/* 0x0107: intr_watchdog_next_time_set */
+	0x80061cf4,
+/* 0x010a: intr_watchdog_next_proc */
+	0xe9809b09,
+	0x58e0b603,
+	0x0268e6b1,
+	0xf8c61bf4,
+/* 0x0119: intr */
+	0xbd00f900,
+	0xf980f904,
+	0xf9a0f990,
+	0xf9c0f9b0,
+	0xf9e0f9d0,
+	0x00f7f0f0,
+	0xf90188fe,
+	0xd087f180,
+	0x0684b605,
+	0xb60088cf,
+	0x07f10180,
+	0x04b605d0,
+	0x0008d006,
+	0x87f004bd,
+	0x0684b608,
+	0xc40088cf,
+	0x0bf40289,
+	0x9b008023,
+	0xf458e7f0,
+	0x0998da21,
+	0x0096b09b,
+	0xf0110bf4,
+	0x04b63407,
+	0x0009d006,
+	0x098004bd,
+/* 0x017d: intr_skip_watchdog */
+	0x0089e49a,
+	0x480bf408,
+	0x068897f1,
+	0xcf0694b6,
+	0x9ac40099,
+	0x2c0bf402,
+	0x04c0c7f1,
+	0xcf06c4b6,
+	0xc0f900cc,
+	0x4f48e7f1,
+	0x5453e3f1,
+	0xf500d7f0,
+	0xfc033621,
+	0xc007f1c0,
+	0x0604b604,
+	0xbd000cd0,
+/* 0x01bd: intr_subintr_skip_fifo */
+	0x8807f104,
+	0x0604b606,
+	0xbd0009d0,
+/* 0x01c9: intr_skip_subintr */
+	0xe097f104,
+	0xfd90bd00,
+	0x07f00489,
+	0x0604b604,
+	0xbd0008d0,
+	0xfe80fc04,
+	0xf0fc0088,
+	0xd0fce0fc,
+	0xb0fcc0fc,
+	0x90fca0fc,
+	0x00fc80fc,
+	0xf80032f4,
+/* 0x01f9: ticks_from_ns */
+	0xf9c0f901,
+	0xcbd7f1b0,
+	0x00d3f000,
+	0x040b21f5,
+	0x03e8ccec,
+	0xf400b4b0,
+	0xeeec120b,
+	0xd7f103e8,
+	0xd3f000cb,
+	0x0b21f500,
+/* 0x0221: ticks_from_ns_quit */
+	0x02ceb904,
+	0xc0fcb0fc,
+/* 0x022a: ticks_from_us */
+	0xc0f900f8,
+	0xd7f1b0f9,
+	0xd3f000cb,
+	0x0b21f500,
+	0x02ceb904,
+	0xf400b4b0,
+	0xe4bd050b,
+/* 0x0244: ticks_from_us_quit */
+	0xc0fcb0fc,
+/* 0x024a: ticks_to_us */
+	0xd7f100f8,
+	0xd3f000cb,
+	0xecedff00,
+/* 0x0256: timer */
+	0x90f900f8,
+	0x32f480f9,
+	0x03f89810,
+	0xf40086b0,
+	0x84bd651c,
+	0xb63807f0,
 	0x08d00604,
 	0xf004bd00,
-	0x84b60887,
+	0x84b63487,
 	0x0088cf06,
-	0xf40289c4,
-	0x0080230b,
-	0x58e7f09b,
-	0x98db21f4,
-	0x96b09b09,
-	0x110bf400,
+	0xbb9a0998,
+	0xe9bb0298,
+	0x03fe8000,
+	0xb60887f0,
+	0x88cf0684,
+	0x0284f000,
+	0xf0261bf4,
+	0x84b63487,
+	0x0088cf06,
+	0xf406e0b8,
+	0xe8b8090b,
+	0x111cf406,
+/* 0x02ac: timer_reset */
 	0xb63407f0,
-	0x09d00604,
+	0x0ed00604,
 	0x8004bd00,
-/* 0x017e: intr_skip_watchdog */
-	0x89e49a09,
-	0x0bf40800,
-	0x8897f148,
-	0x0694b606,
-	0xc40099cf,
-	0x0bf4029a,
-	0xc0c7f12c,
-	0x06c4b604,
-	0xf900cccf,
-	0x48e7f1c0,
-	0x53e3f14f,
-	0x00d7f054,
-	0x033721f5,
-	0x07f1c0fc,
-	0x04b604c0,
-	0x000cd006,
-/* 0x01be: intr_subintr_skip_fifo */
-	0x07f104bd,
-	0x04b60688,
-	0x0009d006,
-/* 0x01ca: intr_skip_subintr */
-	0x97f104bd,
-	0x90bd00e0,
-	0xf00489fd,
-	0x04b60407,
-	0x0008d006,
-	0x80fc04bd,
-	0xfc0088fe,
-	0xfce0fcf0,
-	0xfcc0fcd0,
-	0xfca0fcb0,
-	0xfc80fc90,
-	0x0032f400,
-/* 0x01fa: ticks_from_ns */
-	0xc0f901f8,
-	0xd7f1b0f9,
-	0xd3f000cb,
-	0x0821f500,
-	0xe8ccec04,
-	0x00b4b003,
-	0xec120bf4,
-	0xf103e8ee,
-	0xf000cbd7,
-	0x21f500d3,
-/* 0x0222: ticks_from_ns_quit */
-	0xceb90408,
-	0xfcb0fc02,
-/* 0x022b: ticks_from_us */
-	0xf900f8c0,
-	0xf1b0f9c0,
-	0xf000cbd7,
-	0x21f500d3,
-	0xceb90408,
-	0x00b4b002,
-	0xbd050bf4,
-/* 0x0245: ticks_from_us_quit */
-	0xfcb0fce4,
-/* 0x024b: ticks_to_us */
-	0xf100f8c0,
-	0xf000cbd7,
-	0xedff00d3,
-/* 0x0257: timer */
-	0xf900f8ec,
-	0xf480f990,
-	0xf8981032,
-	0x0086b003,
-	0xbd651cf4,
-	0x3807f084,
+/* 0x02ba: timer_enable */
+	0x87f09a0e,
+	0x3807f001,
 	0xd00604b6,
 	0x04bd0008,
-	0xb63487f0,
-	0x88cf0684,
-	0x9a099800,
-	0xbb0298bb,
-	0xfe8000e9,
-	0x0887f003,
-	0xcf0684b6,
-	0x84f00088,
-	0x261bf402,
-	0xb63487f0,
-	0x88cf0684,
-	0x06e0b800,
-	0xb8090bf4,
-	0x1cf406e8,
-/* 0x02ad: timer_reset */
-	0x3407f011,
-	0xd00604b6,
-	0x04bd000e,
-/* 0x02bb: timer_enable */
-	0xf09a0e80,
-	0x07f00187,
-	0x0604b638,
-	0xbd0008d0,
-/* 0x02c9: timer_done */
-	0x1031f404,
+/* 0x02c8: timer_done */
+	0xfc1031f4,
+	0xf890fc80,
+/* 0x02d1: send_proc */
+	0xf980f900,
+	0x05e89890,
+	0xf004e998,
+	0x89b80486,
+	0x2a0bf406,
+	0x940398c4,
+	0x80b60488,
+	0x008ebb18,
+	0x8000fa98,
+	0x8d80008a,
+	0x028c8001,
+	0xb6038b80,
+	0x94f00190,
+	0x04e98007,
+/* 0x030b: send_done */
+	0xfc0231f4,
+	0xf880fc90,
+/* 0x0311: find */
+	0xf080f900,
+	0x31f45887,
+/* 0x0319: find_loop */
+	0x008a9801,
+	0xf406aeb8,
+	0x80b6100b,
+	0x6886b158,
+	0xf01bf402,
+/* 0x032f: find_done */
+	0xb90132f4,
+	0x80fc028e,
+/* 0x0336: send */
+	0x21f500f8,
+	0x01f40311,
+/* 0x033f: recv */
+	0xf900f897,
+	0x9880f990,
+	0xe99805e8,
+	0x0132f404,
+	0xf40689b8,
+	0x89c43d0b,
+	0x0180b603,
+	0x800784f0,
+	0xea9805e8,
+	0xfef0f902,
+	0xf0f9018f,
+	0x9402efb9,
+	0xe9bb0499,
+	0x18e0b600,
+	0x9803eb98,
+	0xed9802ec,
+	0x00ee9801,
+	0xf0fca5f9,
+	0xf400f8fe,
+	0xf0fc0131,
+/* 0x038c: recv_done */
 	0x90fc80fc,
-/* 0x02d2: send_proc */
-	0x80f900f8,
-	0xe89890f9,
-	0x04e99805,
-	0xb80486f0,
-	0x0bf40689,
-	0x0398c42a,
-	0xb6048894,
-	0x8ebb1880,
-	0x00fa9800,
-	0x80008a80,
-	0x8c80018d,
-	0x038b8002,
-	0xf00190b6,
-	0xe9800794,
-	0x0231f404,
-/* 0x030c: send_done */
-	0x80fc90fc,
-/* 0x0312: find */
-	0x80f900f8,
-	0xf45887f0,
-/* 0x031a: find_loop */
-	0x8a980131,
-	0x06aeb800,
-	0xb6100bf4,
-	0x86b15880,
-	0x1bf40268,
-	0x0132f4f0,
-/* 0x0330: find_done */
-	0xfc028eb9,
-/* 0x0337: send */
-	0xf500f880,
-	0xf4031221,
-	0x00f89701,
-/* 0x0340: recv */
-	0x80f990f9,
-	0x9805e898,
-	0x32f404e9,
-	0x0689b801,
-	0xc43d0bf4,
-	0x80b60389,
-	0x0784f001,
-	0x9805e880,
-	0xf0f902ea,
-	0xf9018ffe,
-	0x02efb9f0,
-	0xbb049994,
-	0xe0b600e9,
-	0x03eb9818,
-	0x9802ec98,
-	0xee9801ed,
-	0xfca5f900,
-	0x00f8fef0,
-	0xfc0131f4,
-/* 0x038d: recv_done */
-	0xfc80fcf0,
-/* 0x0393: init */
-	0xf100f890,
-	0xb6010817,
-	0x11cf0614,
-	0x0911e700,
-	0x0814b601,
-	0xf10014fe,
-	0xf000e017,
-	0x07f00013,
-	0x0604b61c,
-	0xbd0001d0,
-	0xff17f004,
-	0xb61407f0,
-	0x01d00604,
-	0xf004bd00,
-	0x15f10217,
-	0x07f00800,
-	0x0604b610,
-	0xbd0001d0,
-	0x1a17f104,
-	0x0013f001,
-	0xf40010fe,
-	0x17f01031,
-	0x3807f001,
+/* 0x0392: init */
+	0x17f100f8,
+	0x14b60108,
+	0x0011cf06,
+	0x010911e7,
+	0xfe0814b6,
+	0x17f10014,
+	0x13f000e0,
+	0x1c07f000,
+	0xd00604b6,
+	0x04bd0001,
+	0xf0ff17f0,
+	0x04b61407,
+	0x0001d006,
+	0x17f004bd,
+	0x0015f102,
+	0x1007f008,
 	0xd00604b6,
 	0x04bd0001,
-/* 0x03f7: init_proc */
-	0x9858f7f0,
-	0x16b001f1,
-	0xfa0bf400,
-	0xf0b615f9,
-	0xf20ef458,
-/* 0x0408: mulu32_32_64 */
-	0x20f910f9,
-	0x40f930f9,
-	0x9510e195,
-	0xc4bd10d2,
-	0xedffb4bd,
-	0x301dffc0,
-	0xf10234b9,
-	0xb6ffff34,
-	0x45b61034,
-	0x00c3bb10,
-	0xff01b4bb,
-	0x34b930e2,
-	0xff34f102,
-	0x1034b6ff,
-	0xbb1045b6,
-	0xb4bb00c3,
-	0x3012ff01,
-	0xfc00b3bb,
-	0xfc30fc40,
-	0xf810fc20,
-/* 0x0459: host_send */
-	0xb017f100,
+	0x011917f1,
+	0xf10013f0,
+	0xfeffff14,
+	0x31f40010,
+	0x0117f010,
+	0xb63807f0,
+	0x01d00604,
+	0xf004bd00,
+/* 0x03fa: init_proc */
+	0xf19858f7,
+	0x0016b001,
+	0xf9fa0bf4,
+	0x58f0b615,
+/* 0x040b: mulu32_32_64 */
+	0xf9f20ef4,
+	0xf920f910,
+	0x9540f930,
+	0xd29510e1,
+	0xbdc4bd10,
+	0xc0edffb4,
+	0xb9301dff,
+	0x34f10234,
+	0x34b6ffff,
+	0x1045b610,
+	0xbb00c3bb,
+	0xe2ff01b4,
+	0x0234b930,
+	0xffff34f1,
+	0xb61034b6,
+	0xc3bb1045,
+	0x01b4bb00,
+	0xbb3012ff,
+	0x40fc00b3,
+	0x20fc30fc,
+	0x00f810fc,
+/* 0x045c: host_send */
+	0x04b017f1,
+	0xcf0614b6,
+	0x27f10011,
+	0x24b604a0,
+	0x0022cf06,
+	0xf40612b8,
+	0x1ec4320b,
+	0x04ee9407,
+	0x0270e0b7,
+	0x9803eb98,
+	0xed9802ec,
+	0x00ee9801,
+	0x033621f5,
+	0xc40110b6,
+	0x07f10f1e,
+	0x04b604b0,
+	0x000ed006,
+	0x0ef404bd,
+/* 0x04a5: host_send_done */
+/* 0x04a7: host_recv */
+	0xf100f8ba,
+	0xf14e4917,
+	0xb8525413,
+	0x0bf406e1,
+/* 0x04b5: host_recv_wait */
+	0xcc17f1aa,
 	0x0614b604,
 	0xf10011cf,
-	0xb604a027,
+	0xb604c827,
 	0x22cf0624,
-	0x0612b800,
-	0xc4320bf4,
-	0xee94071e,
-	0x70e0b704,
-	0x03eb9802,
-	0x9802ec98,
-	0xee9801ed,
-	0x3721f500,
-	0x0110b603,
-	0xf10f1ec4,
-	0xb604b007,
-	0x0ed00604,
-	0xf404bd00,
-/* 0x04a2: host_send_done */
-	0x00f8ba0e,
-/* 0x04a4: host_recv */
-	0x4e4917f1,
-	0x525413f1,
-	0xf406e1b8,
-/* 0x04b2: host_recv_wait */
-	0x17f1aa0b,
-	0x14b604cc,
-	0x0011cf06,
-	0x04c827f1,
-	0xcf0624b6,
-	0x16f00022,
-	0x0612b808,
-	0xc4e60bf4,
-	0x34b60723,
-	0xf030b704,
-	0x033b8002,
-	0x80023c80,
-	0x3e80013d,
-	0x0120b600,
-	0xf10f24f0,
-	0xb604c807,
-	0x02d00604,
-	0xf004bd00,
-	0x07f04027,
-	0x0604b600,
-	0xbd0002d0,
-/* 0x0507: host_init */
-	0xf100f804,
-	0xb6008017,
-	0x15f11014,
-	0x07f10270,
-	0x04b604d0,
-	0x0001d006,
-	0x17f104bd,
+	0x0816f000,
+	0xf40612b8,
+	0x23c4e60b,
+	0x0434b607,
+	0x02f030b7,
+	0x80033b80,
+	0x3d80023c,
+	0x003e8001,
+	0xf00120b6,
+	0x07f10f24,
+	0x04b604c8,
+	0x0002d006,
+	0x27f004bd,
+	0x0007f040,
+	0xd00604b6,
+	0x04bd0002,
+/* 0x050a: host_init */
+	0x17f100f8,
 	0x14b60080,
-	0xf015f110,
-	0xdc07f102,
+	0x7015f110,
+	0xd007f102,
 	0x0604b604,
 	0xbd0001d0,
-	0x0117f004,
-	0x04c407f1,
+	0x8017f104,
+	0x1014b600,
+	0x02f015f1,
+	0x04dc07f1,
 	0xd00604b6,
 	0x04bd0001,
-/* 0x0546: memx_func_enter */
-	0x87f100f8,
-	0x8eb91610,
-	0x0421f402,
-	0xf102d7b9,
-	0xf1fffc67,
-	0xfdffff63,
-	0x67f10476,
-	0x76fd0002,
-	0xf980f905,
-	0xfcd0fc70,
-	0x3f21f4e0,
+	0xf10117f0,
+	0xb604c407,
+	0x01d00604,
+	0xf804bd00,
+/* 0x0549: memx_func_enter */
+	0x1087f100,
+	0x028eb916,
+	0xb90421f4,
+	0x67f102d7,
+	0x63f1fffc,
+	0x76fdffff,
+	0x0267f104,
+	0x0576fd00,
+	0x70f980f9,
+	0xe0fcd0fc,
+	0xf04021f4,
+	0x07f10467,
+	0x04b607e0,
+	0x0006d006,
+/* 0x0582: memx_func_enter_wait */
+	0x67f104bd,
+	0x64b607c0,
+	0x0066cf06,
+	0xf40464f0,
+	0x67f0f30b,
+	0x0664b62c,
+	0x800066cf,
+	0x00f8f106,
+/* 0x05a0: memx_func_leave */
+	0xb62c67f0,
+	0x66cf0664,
+	0xf2068000,
 	0xf10467f0,
-	0xb607e007,
+	0xb607e407,
 	0x06d00604,
-/* 0x057f: memx_func_enter_wait */
+/* 0x05bb: memx_func_leave_wait */
 	0xf104bd00,
 	0xb607c067,
 	0x66cf0664,
 	0x0464f000,
-	0xf0f30bf4,
-	0x64b62c67,
-	0x0066cf06,
-	0xf8f10680,
-/* 0x059d: memx_func_leave */
-	0x2c67f000,
-	0xcf0664b6,
-	0x06800066,
-	0x0467f0f2,
-	0x07e407f1,
-	0xd00604b6,
-	0x04bd0006,
-/* 0x05b8: memx_func_leave_wait */
-	0x07c067f1,
-	0xcf0664b6,
-	0x64f00066,
-	0xf31bf404,
-	0x161087f1,
-	0xf4028eb9,
-	0xd7b90421,
-	0xcc67f102,
-	0xff63f1ff,
-	0x0476fdff,
-	0x70f980f9,
-	0xe0fcd0fc,
-	0xf83f21f4,
-/* 0x05ed: memx_func_wait_vblank */
-	0x00169800,
-	0xf40066b0,
-	0x66b0130b,
-	0x060bf401,
-/* 0x05ff: memx_func_wait_vblank_head1 */
-	0xf12e0ef4,
-	0xf4002077,
-/* 0x0606: memx_func_wait_vblank_head0 */
-	0x77f1070e,
-/* 0x060a: memx_func_wait_vblank_0 */
-	0x67f10008,
-	0x64b607c4,
-	0x0066cf06,
-	0xf40467fd,
-/* 0x061a: memx_func_wait_vblank_1 */
-	0x67f1f31b,
-	0x64b607c4,
-	0x0066cf06,
-	0xf40467fd,
-/* 0x062a: memx_func_wait_vblank_fini */
-	0x10b6f30b,
-/* 0x062f: memx_func_wr32 */
-	0x9800f804,
-	0x15980016,
-	0x0810b601,
-	0x50f960f9,
-	0xe0fcd0fc,
-	0xb63f21f4,
-	0x1bf40242,
-/* 0x064b: memx_func_wait */
-	0xf000f8e9,
-	0x84b62c87,
-	0x0088cf06,
-	0x98001e98,
-	0x1c98011d,
-	0x031b9802,
-	0xf41010b6,
-	0x00f8a421,
-/* 0x0668: memx_func_delay */
-	0xb6001e98,
-	0x21f40410,
-/* 0x0673: memx_func_train */
-	0xf100f87f,
-	0xf1000357,
-	0xf1000077,
-	0xf0000097,
-	0x9eb97093,
-	0x0421f402,
-	0xf102d8b9,
-	0xf42710e7,
-/* 0x0692: memx_func_train_loop_outer */
-	0x58e07f21,
-	0x83f10101,
-	0x97f10200,
-	0x93f011e0,
-	0xf990f911,
-	0xfcd0fc80,
-	0x3f21f4e0,
-	0x67f150f9,
-/* 0x06b2: memx_func_train_loop_inner */
-	0x87f10000,
-	0x68ff1111,
-	0x10989490,
-	0xf10589fd,
-	0xf0072097,
-	0x90f91093,
-	0xd0fc80f9,
-	0x21f4e0fc,
-	0x8097f13f,
-	0x1093f000,
-	0xf4029eb9,
-	0xd8b90421,
-	0x2088c502,
+	0xf1f31bf4,
+	0xb9161087,
+	0x21f4028e,
+	0x02d7b904,
+	0xffcc67f1,
+	0xffff63f1,
+	0xf90476fd,
+	0xfc70f980,
+	0xf4e0fcd0,
+	0x00f84021,
+/* 0x05f0: memx_func_wait_vblank */
+	0xb0001698,
+	0x0bf40066,
+	0x0166b013,
+	0xf4060bf4,
+/* 0x0602: memx_func_wait_vblank_head1 */
+	0x77f12e0e,
+	0x0ef40020,
+/* 0x0609: memx_func_wait_vblank_head0 */
+	0x0877f107,
+/* 0x060d: memx_func_wait_vblank_0 */
+	0xc467f100,
+	0x0664b607,
+	0xfd0066cf,
+	0x1bf40467,
+/* 0x061d: memx_func_wait_vblank_1 */
+	0xc467f1f3,
+	0x0664b607,
+	0xfd0066cf,
+	0x0bf40467,
+/* 0x062d: memx_func_wait_vblank_fini */
+	0x0410b6f3,
+/* 0x0632: memx_func_wr32 */
+	0x169800f8,
+	0x01159800,
+	0xf90810b6,
+	0xfc50f960,
+	0xf4e0fcd0,
+	0x42b64021,
+	0xe91bf402,
+/* 0x064e: memx_func_wait */
+	0x87f000f8,
+	0x0684b62c,
+	0x980088cf,
+	0x1d98001e,
+	0x021c9801,
+	0xb6031b98,
+	0x21f41010,
+/* 0x066b: memx_func_delay */
+	0x9800f8a3,
+	0x10b6001e,
+	0x7e21f404,
+/* 0x0676: memx_func_train */
+	0x57f100f8,
+	0x77f10003,
+	0x97f10000,
+	0x93f00000,
+	0x029eb970,
+	0xb90421f4,
+	0xe7f102d8,
+	0x21f42710,
+/* 0x0695: memx_func_train_loop_outer */
+	0x0158e07e,
+	0x0083f101,
+	0xe097f102,
+	0x1193f011,
 	0x80f990f9,
 	0xe0fcd0fc,
-	0xf13f21f4,
-	0xf0053c97,
-	0x87f11093,
-	0x83f13002,
-	0x90f98000,
-	0xd0fc80f9,
-	0x21f4e0fc,
-	0x60e7f13f,
-	0x10e3f005,
-	0x0000d7f1,
-	0x8000d3f1,
-	0xf100dc90,
-	0xf08480b7,
-	0x21f41eb3,
-	0x0057f1a4,
-	0xff97f100,
-	0x0093f1ff,
-/* 0x0731: memx_func_train_loop_4x */
-	0x80a7f183,
-	0x10a3f000,
-	0xf402aeb9,
-	0xd8b90421,
-	0xdfb7f102,
-	0xffb3f1ff,
-	0x048bfdff,
-	0x80f9a0f9,
-	0xe0fcd0fc,
-	0xf13f21f4,
-	0xf0053ca7,
-	0x87f110a3,
-	0x83f13002,
-	0xa0f98000,
-	0xd0fc80f9,
-	0x21f4e0fc,
-	0x60e7f13f,
-	0x10e3f005,
-	0x0000d7f1,
-	0x8000d3f1,
-	0xf102dcb9,
-	0xf02710b7,
-	0x21f400b3,
-	0x02eeb9a4,
-	0xb90421f4,
-	0x9dff02dd,
-	0x0150b694,
-	0xf4045670,
-	0x7aa0921e,
-	0xa9800bcc,
-	0x0160b600,
-	0x700470b6,
-	0x1ef51066,
-	0x50fcff00,
+	0xf94021f4,
+	0x0067f150,
+/* 0x06b5: memx_func_train_loop_inner */
+	0x1187f100,
+	0x9068ff11,
+	0xfd109894,
+	0x97f10589,
+	0x93f00720,
+	0xf990f910,
+	0xfcd0fc80,
+	0x4021f4e0,
+	0x008097f1,
+	0xb91093f0,
+	0x21f4029e,
+	0x02d8b904,
+	0xf92088c5,
+	0xfc80f990,
+	0xf4e0fcd0,
+	0x97f14021,
+	0x93f0053c,
+	0x0287f110,
+	0x0083f130,
+	0xf990f980,
+	0xfcd0fc80,
+	0x4021f4e0,
+	0x0560e7f1,
+	0xf110e3f0,
+	0xf10000d7,
+	0x908000d3,
+	0xb7f100dc,
+	0xb3f08480,
+	0xa321f41e,
+	0x000057f1,
+	0xffff97f1,
+	0x830093f1,
+/* 0x0734: memx_func_train_loop_4x */
+	0x0080a7f1,
+	0xb910a3f0,
+	0x21f402ae,
+	0x02d8b904,
+	0xffdfb7f1,
+	0xffffb3f1,
+	0xf9048bfd,
+	0xfc80f9a0,
+	0xf4e0fcd0,
+	0xa7f14021,
+	0xa3f0053c,
+	0x0287f110,
+	0x0083f130,
+	0xf9a0f980,
+	0xfcd0fc80,
+	0x4021f4e0,
+	0x0560e7f1,
+	0xf110e3f0,
+	0xf10000d7,
+	0xb98000d3,
+	0xb7f102dc,
+	0xb3f02710,
+	0xa321f400,
+	0xf402eeb9,
+	0xddb90421,
+	0x949dff02,
 	0x700150b6,
-	0x1ef50756,
-	0x00f8fed4,
-/* 0x07c4: memx_exec */
-	0xd0f9e0f9,
-	0xb902c1b9,
-/* 0x07ce: memx_exec_next */
-	0x139802b2,
-	0x0410b600,
-	0x01f034e7,
-	0x01e033e7,
-	0xf00132b6,
-	0x35980c30,
-	0xb855f9de,
-	0x1ef40612,
-	0xf10b98e4,
-	0xbbf20c98,
-	0xb7f102cb,
-	0xb4b607c4,
-	0x00bbcf06,
-	0xe0fcd0fc,
-	0x033721f5,
-/* 0x080a: memx_info */
-	0xc67000f8,
-	0x0e0bf401,
-/* 0x0810: memx_info_data */
-	0x03ccc7f1,
-	0x0800b7f1,
-/* 0x081b: memx_info_train */
-	0xf10b0ef4,
-	0xf10bccc7,
-/* 0x0823: memx_info_send */
-	0xf50100b7,
-	0xf8033721,
-/* 0x0829: memx_recv */
-	0x01d6b000,
-	0xb0980bf4,
-	0x0bf400d6,
-/* 0x0837: memx_init */
-	0xf800f8d8,
-/* 0x0839: perf_recv */
-/* 0x083b: perf_init */
-	0xf800f800,
-/* 0x083d: i2c_drive_scl */
-	0x0036b000,
-	0xf1110bf4,
-	0xb607e007,
-	0x01d00604,
-	0xf804bd00,
-/* 0x0851: i2c_drive_scl_lo */
-	0xe407f100,
-	0x0604b607,
-	0xbd0001d0,
-/* 0x085f: i2c_drive_sda */
-	0xb000f804,
-	0x0bf40036,
-	0xe007f111,
-	0x0604b607,
-	0xbd0002d0,
-/* 0x0873: i2c_drive_sda_lo */
-	0xf100f804,
-	0xb607e407,
-	0x02d00604,
-	0xf804bd00,
-/* 0x0881: i2c_sense_scl */
-	0x0132f400,
-	0x07c437f1,
-	0xcf0634b6,
-	0x31fd0033,
-	0x060bf404,
-/* 0x0897: i2c_sense_scl_done */
-	0xf80131f4,
-/* 0x0899: i2c_sense_sda */
-	0x0132f400,
-	0x07c437f1,
-	0xcf0634b6,
-	0x32fd0033,
-	0x060bf404,
-/* 0x08af: i2c_sense_sda_done */
-	0xf80131f4,
-/* 0x08b1: i2c_raise_scl */
-	0xf140f900,
-	0xf0089847,
-	0x21f50137,
-/* 0x08be: i2c_raise_scl_wait */
-	0xe7f1083d,
-	0x21f403e8,
-	0x8121f57f,
-	0x0901f408,
-	0xf40142b6,
-/* 0x08d2: i2c_raise_scl_done */
-	0x40fcef1b,
-/* 0x08d6: i2c_start */
-	0x21f500f8,
-	0x11f40881,
-	0x9921f50d,
-	0x0611f408,
-/* 0x08e7: i2c_start_rep */
-	0xf0300ef4,
-	0x21f50037,
-	0x37f0083d,
-	0x5f21f501,
-	0x0076bb08,
-	0xf90465b6,
-	0x04659450,
-	0xbd0256bb,
-	0x0475fd50,
-	0x21f550fc,
-	0x64b608b1,
-	0x1f11f404,
-/* 0x0914: i2c_start_send */
-	0xf50037f0,
-	0xf1085f21,
-	0xf41388e7,
-	0x37f07f21,
-	0x3d21f500,
-	0x88e7f108,
-	0x7f21f413,
-/* 0x0930: i2c_start_out */
-/* 0x0932: i2c_stop */
-	0x37f000f8,
-	0x3d21f500,
-	0x0037f008,
-	0x085f21f5,
-	0x03e8e7f1,
-	0xf07f21f4,
-	0x21f50137,
-	0xe7f1083d,
-	0x21f41388,
-	0x0137f07f,
-	0x085f21f5,
-	0x1388e7f1,
-	0xf87f21f4,
-/* 0x0965: i2c_bitw */
-	0x5f21f500,
-	0xe8e7f108,
-	0x7f21f403,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0xb121f550,
-	0x0464b608,
-	0xf11811f4,
-	0xf41388e7,
-	0x37f07f21,
-	0x3d21f500,
-	0x88e7f108,
-	0x7f21f413,
-/* 0x09a4: i2c_bitw_out */
-/* 0x09a6: i2c_bitr */
-	0x37f000f8,
-	0x5f21f501,
+	0x1ef40456,
+	0xcc7aa092,
+	0x00a9800b,
+	0xb60160b6,
+	0x66700470,
+	0x001ef510,
+	0xb650fcff,
+	0x56700150,
+	0xd41ef507,
+/* 0x07c7: memx_exec */
+	0xf900f8fe,
+	0xb9d0f9e0,
+	0xb2b902c1,
+/* 0x07d1: memx_exec_next */
+	0x00139802,
+	0xe70410b6,
+	0xe701f034,
+	0xb601e033,
+	0x30f00132,
+	0xde35980c,
+	0x12b855f9,
+	0xe41ef406,
+	0x98f10b98,
+	0xcbbbf20c,
+	0xc4b7f102,
+	0x06b4b607,
+	0xfc00bbcf,
+	0xf5e0fcd0,
+	0xf8033621,
+/* 0x080d: memx_info */
+	0x01c67000,
+/* 0x0813: memx_info_data */
+	0xf10e0bf4,
+	0xf103ccc7,
+	0xf40800b7,
+/* 0x081e: memx_info_train */
+	0xc7f10b0e,
+	0xb7f10bcc,
+/* 0x0826: memx_info_send */
+	0x21f50100,
+	0x00f80336,
+/* 0x082c: memx_recv */
+	0xf401d6b0,
+	0xd6b0980b,
+	0xd80bf400,
+/* 0x083a: memx_init */
+	0x00f800f8,
+/* 0x083c: perf_recv */
+/* 0x083e: perf_init */
+	0x00f800f8,
+/* 0x0840: i2c_drive_scl */
+	0xf40036b0,
+	0x07f1110b,
+	0x04b607e0,
+	0x0001d006,
+	0x00f804bd,
+/* 0x0854: i2c_drive_scl_lo */
+	0x07e407f1,
+	0xd00604b6,
+	0x04bd0001,
+/* 0x0862: i2c_drive_sda */
+	0x36b000f8,
+	0x110bf400,
+	0x07e007f1,
+	0xd00604b6,
+	0x04bd0002,
+/* 0x0876: i2c_drive_sda_lo */
+	0x07f100f8,
+	0x04b607e4,
+	0x0002d006,
+	0x00f804bd,
+/* 0x0884: i2c_sense_scl */
+	0xf10132f4,
+	0xb607c437,
+	0x33cf0634,
+	0x0431fd00,
+	0xf4060bf4,
+/* 0x089a: i2c_sense_scl_done */
+	0x00f80131,
+/* 0x089c: i2c_sense_sda */
+	0xf10132f4,
+	0xb607c437,
+	0x33cf0634,
+	0x0432fd00,
+	0xf4060bf4,
+/* 0x08b2: i2c_sense_sda_done */
+	0x00f80131,
+/* 0x08b4: i2c_raise_scl */
+	0x47f140f9,
+	0x37f00898,
+	0x4021f501,
+/* 0x08c1: i2c_raise_scl_wait */
 	0xe8e7f108,
-	0x7f21f403,
+	0x7e21f403,
+	0x088421f5,
+	0xb60901f4,
+	0x1bf40142,
+/* 0x08d5: i2c_raise_scl_done */
+	0xf840fcef,
+/* 0x08d9: i2c_start */
+	0x8421f500,
+	0x0d11f408,
+	0x089c21f5,
+	0xf40611f4,
+/* 0x08ea: i2c_start_rep */
+	0x37f0300e,
+	0x4021f500,
+	0x0137f008,
+	0x086221f5,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0xb121f550,
+	0xb421f550,
 	0x0464b608,
-	0xf51b11f4,
-	0xf0089921,
+/* 0x0917: i2c_start_send */
+	0xf01f11f4,
 	0x21f50037,
-	0xe7f1083d,
+	0xe7f10862,
 	0x21f41388,
-	0x013cf07f,
-/* 0x09eb: i2c_bitr_done */
-	0xf80131f4,
-/* 0x09ed: i2c_get_byte */
-	0x0057f000,
-/* 0x09f3: i2c_get_byte_next */
-	0xb60847f0,
-	0x76bb0154,
-	0x0465b600,
-	0x659450f9,
-	0x0256bb04,
-	0x75fd50bd,
-	0xf550fc04,
-	0xb609a621,
-	0x11f40464,
-	0x0553fd2b,
-	0xf40142b6,
-	0x37f0d81b,
+	0x0037f07e,
+	0x084021f5,
+	0x1388e7f1,
+/* 0x0933: i2c_start_out */
+	0xf87e21f4,
+/* 0x0935: i2c_stop */
+	0x0037f000,
+	0x084021f5,
+	0xf50037f0,
+	0xf1086221,
+	0xf403e8e7,
+	0x37f07e21,
+	0x4021f501,
+	0x88e7f108,
+	0x7e21f413,
+	0xf50137f0,
+	0xf1086221,
+	0xf41388e7,
+	0x00f87e21,
+/* 0x0968: i2c_bitw */
+	0x086221f5,
+	0x03e8e7f1,
+	0xbb7e21f4,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x08b421f5,
+	0xf40464b6,
+	0xe7f11811,
+	0x21f41388,
+	0x0037f07e,
+	0x084021f5,
+	0x1388e7f1,
+/* 0x09a7: i2c_bitw_out */
+	0xf87e21f4,
+/* 0x09a9: i2c_bitr */
+	0x0137f000,
+	0x086221f5,
+	0x03e8e7f1,
+	0xbb7e21f4,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x08b421f5,
+	0xf40464b6,
+	0x21f51b11,
+	0x37f0089c,
+	0x4021f500,
+	0x88e7f108,
+	0x7e21f413,
+	0xf4013cf0,
+/* 0x09ee: i2c_bitr_done */
+	0x00f80131,
+/* 0x09f0: i2c_get_byte */
+	0xf00057f0,
+/* 0x09f6: i2c_get_byte_next */
+	0x54b60847,
 	0x0076bb01,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b60965,
-/* 0x0a3d: i2c_get_byte_done */
-/* 0x0a3f: i2c_put_byte */
-	0xf000f804,
-/* 0x0a42: i2c_put_byte_next */
-	0x42b60847,
-	0x3854ff01,
+	0x64b609a9,
+	0x2b11f404,
+	0xb60553fd,
+	0x1bf40142,
+	0x0137f0d8,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x6521f550,
+	0x6821f550,
 	0x0464b609,
-	0xb03411f4,
-	0x1bf40046,
-	0x0076bbd8,
+/* 0x0a40: i2c_get_byte_done */
+/* 0x0a42: i2c_put_byte */
+	0x47f000f8,
+/* 0x0a45: i2c_put_byte_next */
+	0x0142b608,
+	0xbb3854ff,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x096821f5,
+	0xf40464b6,
+	0x46b03411,
+	0xd81bf400,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0xa921f550,
+	0x0464b609,
+	0xbb0f11f4,
+	0x36b00076,
+	0x061bf401,
+/* 0x0a9b: i2c_put_byte_done */
+	0xf80132f4,
+/* 0x0a9d: i2c_addr */
+	0x0076bb00,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b609a6,
-	0x0f11f404,
-	0xb00076bb,
-	0x1bf40136,
-	0x0132f406,
-/* 0x0a98: i2c_put_byte_done */
-/* 0x0a9a: i2c_addr */
-	0x76bb00f8,
+	0x64b608d9,
+	0x2911f404,
+	0x012ec3e7,
+	0xfd0134b6,
+	0x76bb0553,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb608d621,
-	0x11f40464,
-	0x2ec3e729,
-	0x0134b601,
-	0xbb0553fd,
+	0xb60a4221,
+/* 0x0ae2: i2c_addr_done */
+	0x00f80464,
+/* 0x0ae4: i2c_acquire_addr */
+	0xb6f8cec7,
+	0xe0b702e4,
+	0xee980d1c,
+/* 0x0af3: i2c_acquire */
+	0xf500f800,
+	0xf40ae421,
+	0xd9f00421,
+	0x4021f403,
+/* 0x0b02: i2c_release */
+	0x21f500f8,
+	0x21f40ae4,
+	0x03daf004,
+	0xf84021f4,
+/* 0x0b11: i2c_recv */
+	0x0132f400,
+	0xb6f8c1c7,
+	0x16b00214,
+	0x3a1ff528,
+	0xf413a001,
+	0x0032980c,
+	0x0ccc13a0,
+	0xf4003198,
+	0xd0f90231,
+	0xd0f9e0f9,
+	0x000067f1,
+	0x100063f1,
+	0xbb016792,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x0a3f21f5,
-/* 0x0adf: i2c_addr_done */
-	0xf80464b6,
-/* 0x0ae1: i2c_acquire_addr */
-	0xf8cec700,
-	0xb702e4b6,
-	0x980d1ce0,
-	0x00f800ee,
-/* 0x0af0: i2c_acquire */
-	0x0ae121f5,
-	0xf00421f4,
-	0x21f403d9,
-/* 0x0aff: i2c_release */
-	0xf500f83f,
-	0xf40ae121,
-	0xdaf00421,
-	0x3f21f403,
-/* 0x0b0e: i2c_recv */
-	0x32f400f8,
-	0xf8c1c701,
-	0xb00214b6,
-	0x1ff52816,
-	0x13a0013a,
-	0x32980cf4,
-	0xcc13a000,
-	0x0031980c,
-	0xf90231f4,
-	0xf9e0f9d0,
-	0x0067f1d0,
-	0x0063f100,
-	0x01679210,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0xf021f550,
-	0x0464b60a,
-	0xd6b0d0fc,
-	0xb31bf500,
-	0x0057f000,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x9a21f550,
-	0x0464b60a,
-	0x00d011f5,
-	0xbbe0c5c7,
+	0x0af321f5,
+	0xfc0464b6,
+	0x00d6b0d0,
+	0x00b31bf5,
+	0xbb0057f0,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x0a3f21f5,
+	0x0a9d21f5,
 	0xf50464b6,
-	0xf000ad11,
-	0x76bb0157,
+	0xc700d011,
+	0x76bbe0c5,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb60a9a21,
+	0xb60a4221,
 	0x11f50464,
-	0x76bb008a,
-	0x0465b600,
-	0x659450f9,
-	0x0256bb04,
-	0x75fd50bd,
-	0xf550fc04,
-	0xb609ed21,
-	0x11f40464,
-	0xe05bcb6a,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x3221f550,
-	0x0464b609,
-	0xbd025bb9,
-	0x430ef474,
-/* 0x0c14: i2c_recv_not_rd08 */
-	0xf401d6b0,
-	0x57f03d1b,
-	0x9a21f500,
-	0x3311f40a,
-	0xf5e0c5c7,
-	0xf40a3f21,
-	0x57f02911,
-	0x9a21f500,
-	0x1f11f40a,
-	0xf5e0b5c7,
-	0xf40a3f21,
-	0x21f51511,
-	0x74bd0932,
-	0xf408c5c7,
-	0x32f4091b,
-	0x030ef402,
-/* 0x0c54: i2c_recv_not_wr08 */
-/* 0x0c54: i2c_recv_done */
-	0xf5f8cec7,
-	0xfc0aff21,
-	0xf4d0fce0,
-	0x7cb90a12,
-	0x3721f502,
-/* 0x0c69: i2c_recv_exit */
-/* 0x0c6b: i2c_init */
-	0xf800f803,
-/* 0x0c6d: test_recv */
-	0xd817f100,
-	0x0614b605,
-	0xb60011cf,
-	0x07f10110,
-	0x04b605d8,
-	0x0001d006,
-	0xe7f104bd,
-	0xe3f1d900,
-	0x21f5134f,
-	0x00f80257,
-/* 0x0c94: test_init */
-	0x0800e7f1,
-	0x025721f5,
-/* 0x0c9e: idle_recv */
+	0x57f000ad,
+	0x0076bb01,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b60a9d,
+	0x8a11f504,
+	0x0076bb00,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b609f0,
+	0x6a11f404,
+	0xbbe05bcb,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x093521f5,
+	0xb90464b6,
+	0x74bd025b,
+/* 0x0c17: i2c_recv_not_rd08 */
+	0xb0430ef4,
+	0x1bf401d6,
+	0x0057f03d,
+	0x0a9d21f5,
+	0xc73311f4,
+	0x21f5e0c5,
+	0x11f40a42,
+	0x0057f029,
+	0x0a9d21f5,
+	0xc71f11f4,
+	0x21f5e0b5,
+	0x11f40a42,
+	0x3521f515,
+	0xc774bd09,
+	0x1bf408c5,
+	0x0232f409,
+/* 0x0c57: i2c_recv_not_wr08 */
+/* 0x0c57: i2c_recv_done */
+	0xc7030ef4,
+	0x21f5f8ce,
+	0xe0fc0b02,
+	0x12f4d0fc,
+	0x027cb90a,
+	0x033621f5,
+/* 0x0c6c: i2c_recv_exit */
+/* 0x0c6e: i2c_init */
 	0x00f800f8,
-/* 0x0ca0: idle */
-	0xf10031f4,
-	0xb605d417,
-	0x11cf0614,
-	0x0110b600,
-	0x05d407f1,
-	0xd00604b6,
-	0x04bd0001,
-/* 0x0cbc: idle_loop */
-	0xf45817f0,
-/* 0x0cc2: idle_proc */
-/* 0x0cc2: idle_proc_exec */
-	0x10f90232,
-	0xf5021eb9,
-	0xfc034021,
-	0x0911f410,
-	0xf40231f4,
-/* 0x0cd6: idle_proc_next */
-	0x10b6ef0e,
-	0x061fb858,
-	0xf4e61bf4,
-	0x28f4dd02,
-	0xbb0ef400,
-	0x00000000,
+/* 0x0c70: test_recv */
+	0x05d817f1,
+	0xcf0614b6,
+	0x10b60011,
+	0xd807f101,
+	0x0604b605,
+	0xbd0001d0,
+	0x00e7f104,
+	0x4fe3f1d9,
+	0x5621f513,
+/* 0x0c97: test_init */
+	0xf100f802,
+	0xf50800e7,
+	0xf8025621,
+/* 0x0ca1: idle_recv */
+/* 0x0ca3: idle */
+	0xf400f800,
+	0x17f10031,
+	0x14b605d4,
+	0x0011cf06,
+	0xf10110b6,
+	0xb605d407,
+	0x01d00604,
+/* 0x0cbf: idle_loop */
+	0xf004bd00,
+	0x32f45817,
+/* 0x0cc5: idle_proc */
+/* 0x0cc5: idle_proc_exec */
+	0xb910f902,
+	0x21f5021e,
+	0x10fc033f,
+	0xf40911f4,
+	0x0ef40231,
+/* 0x0cd9: idle_proc_next */
+	0x5810b6ef,
+	0xf4061fb8,
+	0x02f4e61b,
+	0x0028f4dd,
+	0x00bb0ef4,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/host.fuc b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/host.fuc
index c2bb616a8da5..f2420a37f45b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/host.fuc
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/host.fuc
@@ -98,8 +98,7 @@ host_send:
 // $r0  - zero
 host_recv:
 	// message from intr handler == HOST->PWR comms pending
-	mov $r1 (PROC_KERN & 0x0000ffff)
-	sethi $r1 (PROC_KERN & 0xffff0000)
+	imm32($r1, PROC_KERN)
 	cmp b32 $r14 $r1
 	bra e #host_send
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/kernel.fuc b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/kernel.fuc
index ad35fa57be94..c20a3bd33775 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/kernel.fuc
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/kernel.fuc
@@ -51,8 +51,7 @@ time_next: .b32 0
 // $r0  - zero
 rd32:
 	nv_iowr(NV_PPWR_MMIO_ADDR, $r14)
-	mov $r13 NV_PPWR_MMIO_CTRL_OP_RD
-	sethi $r13 NV_PPWR_MMIO_CTRL_TRIGGER
+	imm32($r13, NV_PPWR_MMIO_CTRL_OP_RD | NV_PPWR_MMIO_CTRL_TRIGGER)
 	nv_iowr(NV_PPWR_MMIO_CTRL, $r13)
 	rd32_wait:
 		nv_iord($r13, NV_PPWR_MMIO_CTRL)
@@ -70,9 +69,7 @@ rd32:
 wr32:
 	nv_iowr(NV_PPWR_MMIO_ADDR, $r14)
 	nv_iowr(NV_PPWR_MMIO_DATA, $r13)
-	mov $r13 NV_PPWR_MMIO_CTRL_OP_WR
-	or $r13 NV_PPWR_MMIO_CTRL_MASK_B32_0
-	sethi $r13 NV_PPWR_MMIO_CTRL_TRIGGER
+	imm32($r13, NV_PPWR_MMIO_CTRL_OP_WR | NV_PPWR_MMIO_CTRL_MASK_B32_0 | NV_PPWR_MMIO_CTRL_TRIGGER)
 
 #ifdef NVKM_FALCON_MMIO_TRAP
 	push $r13
@@ -215,8 +212,7 @@ intr:
 		bra z #intr_subintr_skip_fifo
 			nv_iord($r12, NV_PPWR_FIFO_INTR)
 			push $r12
-			mov $r14 (PROC_HOST & 0x0000ffff)
-			sethi $r14 (PROC_HOST & 0xffff0000)
+			imm32($r14, PROC_HOST)
 			mov $r13 KMSG_FIFO
 			call(send)
 			pop $r12
@@ -256,7 +252,7 @@ ticks_from_ns:
 
 	/* try not losing precision (multiply then divide) */
 	imm32($r13, HW_TICKS_PER_US)
-	call #mulu32_32_64
+	call(mulu32_32_64)
 
 	/* use an immeditate, it's ok because HW_TICKS_PER_US < 16 bits */
 	div $r12 $r12 1000
@@ -268,7 +264,7 @@ ticks_from_ns:
 	/* let's divide then multiply, too bad for the precision! */
 	div $r14 $r14 1000
 	imm32($r13, HW_TICKS_PER_US)
-	call #mulu32_32_64
+	call(mulu32_32_64)
 
 	/* this cannot overflow as long as HW_TICKS_PER_US < 1000 */
 
@@ -290,7 +286,7 @@ ticks_from_us:
 
 	/* simply multiply $us by HW_TICKS_PER_US */
 	imm32($r13, HW_TICKS_PER_US)
-	call #mulu32_32_64
+	call(mulu32_32_64)
 	mov b32 $r14 $r12
 
 	/* check if there wasn't any overflow */
@@ -511,14 +507,12 @@ init:
 #ifdef NVKM_FALCON_MMIO_UAS
 	// somehow allows the magic "access mmio via D[]" stuff that's
 	// used by the nv_rd32/nv_wr32 macros to work
-	mov $r1 0x0010
-	sethi $r1 NV_PPWR_UAS_CONFIG_ENABLE
+	imm32($r1, 0x10 | NV_PPWR_UAS_CONFIG_ENABLE)
 	nv_iowrs(NV_PPWR_UAS_CONFIG, $r1)
 #endif
 
 	// route all interrupts except user0/1 and pause to fuc
-	mov $r1 0x00e0
-	sethi $r1 0x00000000
+	imm32($r1, 0xe0)
 	nv_iowr(NV_PPWR_INTR_ROUTE, $r1)
 
 	// enable watchdog and subintr intrs
@@ -529,8 +523,8 @@ init:
 	nv_iowr(NV_PPWR_INTR_EN_SET, $r1)
 
 	// enable interrupts globally
-	mov $r1 #intr
-	sethi $r1 0x00000000
+	imm32($r1, #intr)
+	and $r1 0xffff
 	mov $iv0 $r1
 	bset $flags ie0
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/macros.fuc b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/macros.fuc
index 96fc984dafdc..3737bd27f74e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/macros.fuc
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/macros.fuc
@@ -169,7 +169,7 @@
 */	.b32 0 /*
 */	.skip 64
 
-#if NV_PPWR_CHIPSET < GK208
+#if NVKM_PPWR_CHIPSET < GK208
 #define imm32(reg,val) /*
 */	movw reg  ((val) & 0x0000ffff) /*
 */	sethi reg ((val) & 0xffff0000)
@@ -252,12 +252,12 @@
 #endif
 
 #define st(size, addr, reg) /*
-*/	movw $r0 addr /*
+*/	imm32($r0, addr) /*
 */	st size D[$r0] reg /*
 */	clear b32 $r0
 
 #define ld(size, reg, addr) /*
-*/	movw $r0 addr /*
+*/	imm32($r0, addr)  /*
 */	ld size reg D[$r0] /*
 */	clear b32 $r0
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/test.fuc b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/test.fuc
index 0c3a71bf5459..9e3f4e690dd1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/test.fuc
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/test.fuc
@@ -48,8 +48,7 @@ test_recv:
 	nv_iord($r1, NV_PPWR_DSCRATCH(2))
 	add b32 $r1 1
 	nv_iowr(NV_PPWR_DSCRATCH(2), $r1)
-	mov $r14 -0x2700 /* 0xd900, envyas grrr! */
-	sethi $r14 0x134f0000
+	imm32($r14, 0x134fd900)
 	call(timer)
 	ret
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
new file mode 100644
index 000000000000..b02b868a6589
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
@@ -0,0 +1,3 @@
+nvkm-y += nvkm/subdev/secboot/base.o
+nvkm-y += nvkm/subdev/secboot/gm200.o
+nvkm-y += nvkm/subdev/secboot/gm20b.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
new file mode 100644
index 000000000000..520facf9bc07
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+#include <subdev/timer.h>
+
+static const char *
+managed_falcons_names[] = {
+	[NVKM_SECBOOT_FALCON_PMU] = "PMU",
+	[NVKM_SECBOOT_FALCON_RESERVED] = "<reserved>",
+	[NVKM_SECBOOT_FALCON_FECS] = "FECS",
+	[NVKM_SECBOOT_FALCON_GPCCS] = "GPCCS",
+	[NVKM_SECBOOT_FALCON_END] = "<invalid>",
+};
+
+/*
+ * Helper falcon functions
+ */
+
+static int
+falcon_clear_halt_interrupt(struct nvkm_device *device, u32 base)
+{
+	int ret;
+
+	/* clear halt interrupt */
+	nvkm_mask(device, base + 0x004, 0x10, 0x10);
+	/* wait until halt interrupt is cleared */
+	ret = nvkm_wait_msec(device, 10, base + 0x008, 0x10, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+falcon_wait_idle(struct nvkm_device *device, u32 base)
+{
+	int ret;
+
+	ret = nvkm_wait_msec(device, 10, base + 0x04c, 0xffff, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
+{
+	struct nvkm_device *device = sb->subdev.device;
+	int ret;
+
+	/* enable engine */
+	nvkm_mask(device, 0x200, sb->enable_mask, sb->enable_mask);
+	nvkm_rd32(device, 0x200);
+	ret = nvkm_wait_msec(device, 10, sb->base + 0x10c, 0x6, 0x0);
+	if (ret < 0) {
+		nvkm_mask(device, 0x200, sb->enable_mask, 0x0);
+		nvkm_error(&sb->subdev, "Falcon mem scrubbing timeout\n");
+		return ret;
+	}
+
+	ret = falcon_wait_idle(device, sb->base);
+	if (ret)
+		return ret;
+
+	/* enable IRQs */
+	nvkm_wr32(device, sb->base + 0x010, 0xff);
+	nvkm_mask(device, 0x640, sb->irq_mask, sb->irq_mask);
+	nvkm_mask(device, 0x644, sb->irq_mask, sb->irq_mask);
+
+	return 0;
+}
+
+static int
+nvkm_secboot_falcon_disable(struct nvkm_secboot *sb)
+{
+	struct nvkm_device *device = sb->subdev.device;
+
+	/* disable IRQs and wait for any previous code to complete */
+	nvkm_mask(device, 0x644, sb->irq_mask, 0x0);
+	nvkm_mask(device, 0x640, sb->irq_mask, 0x0);
+	nvkm_wr32(device, sb->base + 0x014, 0xff);
+
+	falcon_wait_idle(device, sb->base);
+
+	/* disable engine */
+	nvkm_mask(device, 0x200, sb->enable_mask, 0x0);
+
+	return 0;
+}
+
+int
+nvkm_secboot_falcon_reset(struct nvkm_secboot *sb)
+{
+	int ret;
+
+	ret = nvkm_secboot_falcon_disable(sb);
+	if (ret)
+		return ret;
+
+	ret = nvkm_secboot_falcon_enable(sb);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/**
+ * nvkm_secboot_falcon_run - run the falcon that will perform secure boot
+ *
+ * This function is to be called after all chip-specific preparations have
+ * been completed. It will start the falcon to perform secure boot, wait for
+ * it to halt, and report if an error occurred.
+ */
+int
+nvkm_secboot_falcon_run(struct nvkm_secboot *sb)
+{
+	struct nvkm_device *device = sb->subdev.device;
+	int ret;
+
+	/* Start falcon */
+	nvkm_wr32(device, sb->base + 0x100, 0x2);
+
+	/* Wait for falcon halt */
+	ret = nvkm_wait_msec(device, 100, sb->base + 0x100, 0x10, 0x10);
+	if (ret < 0)
+		return ret;
+
+	/* If mailbox register contains an error code, then ACR has failed */
+	ret = nvkm_rd32(device, sb->base + 0x040);
+	if (ret) {
+		nvkm_error(&sb->subdev, "ACR boot failed, ret 0x%08x", ret);
+		falcon_clear_halt_interrupt(device, sb->base);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+
+/**
+ * nvkm_secboot_reset() - reset specified falcon
+ */
+int
+nvkm_secboot_reset(struct nvkm_secboot *sb, u32 falcon)
+{
+	/* Unmanaged falcon? */
+	if (!(BIT(falcon) & sb->func->managed_falcons)) {
+		nvkm_error(&sb->subdev, "cannot reset unmanaged falcon!\n");
+		return -EINVAL;
+	}
+
+	return sb->func->reset(sb, falcon);
+}
+
+/**
+ * nvkm_secboot_start() - start specified falcon
+ */
+int
+nvkm_secboot_start(struct nvkm_secboot *sb, u32 falcon)
+{
+	/* Unmanaged falcon? */
+	if (!(BIT(falcon) & sb->func->managed_falcons)) {
+		nvkm_error(&sb->subdev, "cannot start unmanaged falcon!\n");
+		return -EINVAL;
+	}
+
+	return sb->func->start(sb, falcon);
+}
+
+/**
+ * nvkm_secboot_is_managed() - check whether a given falcon is securely-managed
+ */
+bool
+nvkm_secboot_is_managed(struct nvkm_secboot *secboot,
+			enum nvkm_secboot_falcon fid)
+{
+	if (!secboot)
+		return false;
+
+	return secboot->func->managed_falcons & BIT(fid);
+}
+
+static int
+nvkm_secboot_oneinit(struct nvkm_subdev *subdev)
+{
+	struct nvkm_secboot *sb = nvkm_secboot(subdev);
+	int ret = 0;
+
+	/* Call chip-specific init function */
+	if (sb->func->init)
+		ret = sb->func->init(sb);
+	if (ret) {
+		nvkm_error(subdev, "Secure Boot initialization failed: %d\n",
+			   ret);
+		return ret;
+	}
+
+	/*
+	 * Build all blobs - the same blobs can be used to perform secure boot
+	 * multiple times
+	 */
+	if (sb->func->prepare_blobs)
+		ret = sb->func->prepare_blobs(sb);
+
+	return ret;
+}
+
+static int
+nvkm_secboot_fini(struct nvkm_subdev *subdev, bool suspend)
+{
+	struct nvkm_secboot *sb = nvkm_secboot(subdev);
+	int ret = 0;
+
+	if (sb->func->fini)
+		ret = sb->func->fini(sb, suspend);
+
+	return ret;
+}
+
+static void *
+nvkm_secboot_dtor(struct nvkm_subdev *subdev)
+{
+	struct nvkm_secboot *sb = nvkm_secboot(subdev);
+	void *ret = NULL;
+
+	if (sb->func->dtor)
+		ret = sb->func->dtor(sb);
+
+	return ret;
+}
+
+static const struct nvkm_subdev_func
+nvkm_secboot = {
+	.oneinit = nvkm_secboot_oneinit,
+	.fini = nvkm_secboot_fini,
+	.dtor = nvkm_secboot_dtor,
+};
+
+int
+nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
+		  struct nvkm_device *device, int index,
+		  struct nvkm_secboot *sb)
+{
+	unsigned long fid;
+
+	nvkm_subdev_ctor(&nvkm_secboot, device, index, 0, &sb->subdev);
+	sb->func = func;
+
+	/* setup the performing falcon's base address and masks */
+	switch (func->boot_falcon) {
+	case NVKM_SECBOOT_FALCON_PMU:
+		sb->base = 0x10a000;
+		sb->irq_mask = 0x1000000;
+		sb->enable_mask = 0x2000;
+		break;
+	default:
+		nvkm_error(&sb->subdev, "invalid secure boot falcon\n");
+		return -EINVAL;
+	};
+
+	nvkm_debug(&sb->subdev, "securely managed falcons:\n");
+	for_each_set_bit(fid, &sb->func->managed_falcons,
+			 NVKM_SECBOOT_FALCON_END)
+		nvkm_debug(&sb->subdev, "- %s\n", managed_falcons_names[fid]);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
new file mode 100644
index 000000000000..cc100dc940ea
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
@@ -0,0 +1,1489 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Secure boot is the process by which NVIDIA-signed firmware is loaded into
+ * some of the falcons of a GPU. For production devices this is the only way
+ * for the firmware to access useful (but sensitive) registers.
+ *
+ * A Falcon microprocessor supporting advanced security modes can run in one of
+ * three modes:
+ *
+ * - Non-secure (NS). In this mode, functionality is similar to Falcon
+ *   architectures before security modes were introduced (pre-Maxwell), but
+ *   capability is restricted. In particular, certain registers may be
+ *   inaccessible for reads and/or writes, and physical memory access may be
+ *   disabled (on certain Falcon instances). This is the only possible mode that
+ *   can be used if you don't have microcode cryptographically signed by NVIDIA.
+ *
+ * - Heavy Secure (HS). In this mode, the microprocessor is a black box - it's
+ *   not possible to read or write any Falcon internal state or Falcon registers
+ *   from outside the Falcon (for example, from the host system). The only way
+ *   to enable this mode is by loading microcode that has been signed by NVIDIA.
+ *   (The loading process involves tagging the IMEM block as secure, writing the
+ *   signature into a Falcon register, and starting execution. The hardware will
+ *   validate the signature, and if valid, grant HS privileges.)
+ *
+ * - Light Secure (LS). In this mode, the microprocessor has more privileges
+ *   than NS but fewer than HS. Some of the microprocessor state is visible to
+ *   host software to ease debugging. The only way to enable this mode is by HS
+ *   microcode enabling LS mode. Some privileges available to HS mode are not
+ *   available here. LS mode is introduced in GM20x.
+ *
+ * Secure boot consists in temporarily switching a HS-capable falcon (typically
+ * PMU) into HS mode in order to validate the LS firmwares of managed falcons,
+ * load them, and switch managed falcons into LS mode. Once secure boot
+ * completes, no falcon remains in HS mode.
+ *
+ * Secure boot requires a write-protected memory region (WPR) which can only be
+ * written by the secure falcon. On dGPU, the driver sets up the WPR region in
+ * video memory. On Tegra, it is set up by the bootloader and its location and
+ * size written into memory controller registers.
+ *
+ * The secure boot process takes place as follows:
+ *
+ * 1) A LS blob is constructed that contains all the LS firmwares we want to
+ *    load, along with their signatures and bootloaders.
+ *
+ * 2) A HS blob (also called ACR) is created that contains the signed HS
+ *    firmware in charge of loading the LS firmwares into their respective
+ *    falcons.
+ *
+ * 3) The HS blob is loaded (via its own bootloader) and executed on the
+ *    HS-capable falcon. It authenticates itself, switches the secure falcon to
+ *    HS mode and setup the WPR region around the LS blob (dGPU) or copies the
+ *    LS blob into the WPR region (Tegra).
+ *
+ * 4) The LS blob is now secure from all external tampering. The HS falcon
+ *    checks the signatures of the LS firmwares and, if valid, switches the
+ *    managed falcons to LS mode and makes them ready to run the LS firmware.
+ *
+ * 5) The managed falcons remain in LS mode and can be started.
+ *
+ */
+
+#include "priv.h"
+
+#include <core/gpuobj.h>
+#include <core/firmware.h>
+#include <subdev/fb.h>
+
+enum {
+	FALCON_DMAIDX_UCODE		= 0,
+	FALCON_DMAIDX_VIRT		= 1,
+	FALCON_DMAIDX_PHYS_VID		= 2,
+	FALCON_DMAIDX_PHYS_SYS_COH	= 3,
+	FALCON_DMAIDX_PHYS_SYS_NCOH	= 4,
+};
+
+/**
+ * struct fw_bin_header - header of firmware files
+ * @bin_magic:		always 0x3b1d14f0
+ * @bin_ver:		version of the bin format
+ * @bin_size:		entire image size including this header
+ * @header_offset:	offset of the firmware/bootloader header in the file
+ * @data_offset:	offset of the firmware/bootloader payload in the file
+ * @data_size:		size of the payload
+ *
+ * This header is located at the beginning of the HS firmware and HS bootloader
+ * files, to describe where the headers and data can be found.
+ */
+struct fw_bin_header {
+	u32 bin_magic;
+	u32 bin_ver;
+	u32 bin_size;
+	u32 header_offset;
+	u32 data_offset;
+	u32 data_size;
+};
+
+/**
+ * struct fw_bl_desc - firmware bootloader descriptor
+ * @start_tag:		starting tag of bootloader
+ * @desc_dmem_load_off:	DMEM offset of flcn_bl_dmem_desc
+ * @code_off:		offset of code section
+ * @code_size:		size of code section
+ * @data_off:		offset of data section
+ * @data_size:		size of data section
+ *
+ * This structure is embedded in bootloader firmware files at to describe the
+ * IMEM and DMEM layout expected by the bootloader.
+ */
+struct fw_bl_desc {
+	u32 start_tag;
+	u32 dmem_load_off;
+	u32 code_off;
+	u32 code_size;
+	u32 data_off;
+	u32 data_size;
+};
+
+
+/*
+ *
+ * LS blob structures
+ *
+ */
+
+/**
+ * struct lsf_ucode_desc - LS falcon signatures
+ * @prd_keys:		signature to use when the GPU is in production mode
+ * @dgb_keys:		signature to use when the GPU is in debug mode
+ * @b_prd_present:	whether the production key is present
+ * @b_dgb_present:	whether the debug key is present
+ * @falcon_id:		ID of the falcon the ucode applies to
+ *
+ * Directly loaded from a signature file.
+ */
+struct lsf_ucode_desc {
+	u8  prd_keys[2][16];
+	u8  dbg_keys[2][16];
+	u32 b_prd_present;
+	u32 b_dbg_present;
+	u32 falcon_id;
+};
+
+/**
+ * struct lsf_lsb_header - LS firmware header
+ * @signature:		signature to verify the firmware against
+ * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
+ *                      blob contains the bootloader, code and data of the
+ *                      LS falcon
+ * @ucode_size:		size of the ucode blob, including bootloader
+ * @data_size:		size of the ucode blob data
+ * @bl_code_size:	size of the bootloader code
+ * @bl_imem_off:	offset in imem of the bootloader
+ * @bl_data_off:	offset of the bootloader data in WPR region
+ * @bl_data_size:	size of the bootloader data
+ * @app_code_off:	offset of the app code relative to ucode_off
+ * @app_code_size:	size of the app code
+ * @app_data_off:	offset of the app data relative to ucode_off
+ * @app_data_size:	size of the app data
+ * @flags:		flags for the secure bootloader
+ *
+ * This structure is written into the WPR region for each managed falcon. Each
+ * instance is referenced by the lsb_offset member of the corresponding
+ * lsf_wpr_header.
+ */
+struct lsf_lsb_header {
+	struct lsf_ucode_desc signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 app_code_off;
+	u32 app_code_size;
+	u32 app_data_off;
+	u32 app_data_size;
+	u32 flags;
+#define LSF_FLAG_LOAD_CODE_AT_0		1
+#define LSF_FLAG_DMACTL_REQ_CTX		4
+#define LSF_FLAG_FORCE_PRIV_LOAD	8
+};
+
+/**
+ * struct lsf_wpr_header - LS blob WPR Header
+ * @falcon_id:		LS falcon ID
+ * @lsb_offset:		offset of the lsb_lsf_header in the WPR region
+ * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
+ * @lazy_bootstrap:	skip bootstrapping by ACR
+ * @status:		bootstrapping status
+ *
+ * An array of these is written at the beginning of the WPR region, one for
+ * each managed falcon. The array is terminated by an instance which falcon_id
+ * is LSF_FALCON_ID_INVALID.
+ */
+struct lsf_wpr_header {
+	u32  falcon_id;
+	u32  lsb_offset;
+	u32  bootstrap_owner;
+	u32  lazy_bootstrap;
+	u32  status;
+#define LSF_IMAGE_STATUS_NONE				0
+#define LSF_IMAGE_STATUS_COPY				1
+#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
+#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
+#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
+#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
+};
+
+
+/**
+ * struct ls_ucode_img_desc - descriptor of firmware image
+ * @descriptor_size:		size of this descriptor
+ * @image_size:			size of the whole image
+ * @bootloader_start_offset:	start offset of the bootloader in ucode image
+ * @bootloader_size:		size of the bootloader
+ * @bootloader_imem_offset:	start off set of the bootloader in IMEM
+ * @bootloader_entry_point:	entry point of the bootloader in IMEM
+ * @app_start_offset:		start offset of the LS firmware
+ * @app_size:			size of the LS firmware's code and data
+ * @app_imem_offset:		offset of the app in IMEM
+ * @app_imem_entry:		entry point of the app in IMEM
+ * @app_dmem_offset:		offset of the data in DMEM
+ * @app_resident_code_offset:	offset of app code from app_start_offset
+ * @app_resident_code_size:	size of the code
+ * @app_resident_data_offset:	offset of data from app_start_offset
+ * @app_resident_data_size:	size of data
+ *
+ * A firmware image contains the code, data, and bootloader of a given LS
+ * falcon in a single blob. This structure describes where everything is.
+ *
+ * This can be generated from a (bootloader, code, data) set if they have
+ * been loaded separately, or come directly from a file.
+ */
+struct ls_ucode_img_desc {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[64];
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;
+	u32 app_resident_data_size;
+	u32 nb_overlays;
+	struct {u32 start; u32 size; } load_ovl[64];
+	u32 compressed;
+};
+
+/**
+ * struct ls_ucode_img - temporary storage for loaded LS firmwares
+ * @node:		to link within lsf_ucode_mgr
+ * @falcon_id:		ID of the falcon this LS firmware is for
+ * @ucode_desc:		loaded or generated map of ucode_data
+ * @ucode_header:	header of the firmware
+ * @ucode_data:		firmware payload (code and data)
+ * @ucode_size:		size in bytes of data in ucode_data
+ * @wpr_header:		WPR header to be written to the LS blob
+ * @lsb_header:		LSB header to be written to the LS blob
+ *
+ * Preparing the WPR LS blob requires information about all the LS firmwares
+ * (size, etc) to be known. This structure contains all the data of one LS
+ * firmware.
+ */
+struct ls_ucode_img {
+	struct list_head node;
+	enum nvkm_secboot_falcon falcon_id;
+
+	struct ls_ucode_img_desc ucode_desc;
+	u32 *ucode_header;
+	u8 *ucode_data;
+	u32 ucode_size;
+
+	struct lsf_wpr_header wpr_header;
+	struct lsf_lsb_header lsb_header;
+};
+
+/**
+ * struct ls_ucode_mgr - manager for all LS falcon firmwares
+ * @count:	number of managed LS falcons
+ * @wpr_size:	size of the required WPR region in bytes
+ * @img_list:	linked list of lsf_ucode_img
+ */
+struct ls_ucode_mgr {
+	u16 count;
+	u32 wpr_size;
+	struct list_head img_list;
+};
+
+
+/*
+ *
+ * HS blob structures
+ *
+ */
+
+/**
+ * struct hsf_fw_header - HS firmware descriptor
+ * @sig_dbg_offset:	offset of the debug signature
+ * @sig_dbg_size:	size of the debug signature
+ * @sig_prod_offset:	offset of the production signature
+ * @sig_prod_size:	size of the production signature
+ * @patch_loc:		offset of the offset (sic) of where the signature is
+ * @patch_sig:		offset of the offset (sic) to add to sig_*_offset
+ * @hdr_offset:		offset of the load header (see struct hs_load_header)
+ * @hdr_size:		size of above header
+ *
+ * This structure is embedded in the HS firmware image at
+ * hs_bin_hdr.header_offset.
+ */
+struct hsf_fw_header {
+	u32 sig_dbg_offset;
+	u32 sig_dbg_size;
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 hdr_offset;
+	u32 hdr_size;
+};
+
+/**
+ * struct hsf_load_header - HS firmware load header
+ */
+struct hsf_load_header {
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 num_apps;
+	struct {
+		u32 sec_code_off;
+		u32 sec_code_size;
+	} app[0];
+};
+
+/**
+ * Convenience function to duplicate a firmware file in memory and check that
+ * it has the required minimum size.
+ */
+static void *
+gm200_secboot_load_firmware(struct nvkm_subdev *subdev, const char *name,
+		    size_t min_size)
+{
+	const struct firmware *fw;
+	void *blob;
+	int ret;
+
+	ret = nvkm_firmware_get(subdev->device, name, &fw);
+	if (ret)
+		return ERR_PTR(ret);
+	if (fw->size < min_size) {
+		nvkm_error(subdev, "%s is smaller than expected size %zu\n",
+			   name, min_size);
+		nvkm_firmware_put(fw);
+		return ERR_PTR(-EINVAL);
+	}
+	blob = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	nvkm_firmware_put(fw);
+	if (!blob)
+		return ERR_PTR(-ENOMEM);
+
+	return blob;
+}
+
+
+/*
+ * Low-secure blob creation
+ */
+
+#define BL_DESC_BLK_SIZE 256
+/**
+ * Build a ucode image and descriptor from provided bootloader, code and data.
+ *
+ * @bl:		bootloader image, including 16-bytes descriptor
+ * @code:	LS firmware code segment
+ * @data:	LS firmware data segment
+ * @desc:	ucode descriptor to be written
+ *
+ * Return: allocated ucode image with corresponding descriptor information. desc
+ *         is also updated to contain the right offsets within returned image.
+ */
+static void *
+ls_ucode_img_build(const struct firmware *bl, const struct firmware *code,
+		   const struct firmware *data, struct ls_ucode_img_desc *desc)
+{
+	struct fw_bin_header *bin_hdr = (void *)bl->data;
+	struct fw_bl_desc *bl_desc = (void *)bl->data + bin_hdr->header_offset;
+	void *bl_data = (void *)bl->data + bin_hdr->data_offset;
+	u32 pos = 0;
+	void *image;
+
+	desc->bootloader_start_offset = pos;
+	desc->bootloader_size = ALIGN(bl_desc->code_size, sizeof(u32));
+	desc->bootloader_imem_offset = bl_desc->start_tag * 256;
+	desc->bootloader_entry_point = bl_desc->start_tag * 256;
+
+	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
+	desc->app_start_offset = pos;
+	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
+			 ALIGN(data->size, BL_DESC_BLK_SIZE);
+	desc->app_imem_offset = 0;
+	desc->app_imem_entry = 0;
+	desc->app_dmem_offset = 0;
+	desc->app_resident_code_offset = 0;
+	desc->app_resident_code_size = ALIGN(code->size, BL_DESC_BLK_SIZE);
+
+	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
+	desc->app_resident_data_offset = pos - desc->app_start_offset;
+	desc->app_resident_data_size = ALIGN(data->size, BL_DESC_BLK_SIZE);
+
+	desc->image_size = ALIGN(bl_desc->code_size, BL_DESC_BLK_SIZE) +
+			   desc->app_size;
+
+	image = kzalloc(desc->image_size, GFP_KERNEL);
+	if (!image)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(image + desc->bootloader_start_offset, bl_data,
+	       bl_desc->code_size);
+	memcpy(image + desc->app_start_offset, code->data, code->size);
+	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
+	       data->data, data->size);
+
+	return image;
+}
+
+/**
+ * ls_ucode_img_load_generic() - load and prepare a LS ucode image
+ *
+ * Load the LS microcode, bootloader and signature and pack them into a single
+ * blob. Also generate the corresponding ucode descriptor.
+ */
+static int
+ls_ucode_img_load_generic(struct nvkm_subdev *subdev,
+			  struct ls_ucode_img *img, const char *falcon_name,
+			  const u32 falcon_id)
+{
+	const struct firmware *bl, *code, *data;
+	struct lsf_ucode_desc *lsf_desc;
+	char f[64];
+	int ret;
+
+	img->ucode_header = NULL;
+
+	snprintf(f, sizeof(f), "gr/%s_bl", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &bl);
+	if (ret)
+		goto error;
+
+	snprintf(f, sizeof(f), "gr/%s_inst", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &code);
+	if (ret)
+		goto free_bl;
+
+	snprintf(f, sizeof(f), "gr/%s_data", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &data);
+	if (ret)
+		goto free_inst;
+
+	img->ucode_data = ls_ucode_img_build(bl, code, data,
+					     &img->ucode_desc);
+	if (IS_ERR(img->ucode_data)) {
+		ret = PTR_ERR(img->ucode_data);
+		goto free_data;
+	}
+	img->ucode_size = img->ucode_desc.image_size;
+
+	snprintf(f, sizeof(f), "gr/%s_sig", falcon_name);
+	lsf_desc = gm200_secboot_load_firmware(subdev, f, sizeof(*lsf_desc));
+	if (IS_ERR(lsf_desc)) {
+		ret = PTR_ERR(lsf_desc);
+		goto free_image;
+	}
+	/* not needed? the signature should already have the right value */
+	lsf_desc->falcon_id = falcon_id;
+	memcpy(&img->lsb_header.signature, lsf_desc, sizeof(*lsf_desc));
+	img->falcon_id = lsf_desc->falcon_id;
+	kfree(lsf_desc);
+
+	/* success path - only free requested firmware files */
+	goto free_data;
+
+free_image:
+	kfree(img->ucode_data);
+free_data:
+	nvkm_firmware_put(data);
+free_inst:
+	nvkm_firmware_put(code);
+free_bl:
+	nvkm_firmware_put(bl);
+error:
+	return ret;
+}
+
+typedef int (*lsf_load_func)(struct nvkm_subdev *, struct ls_ucode_img *);
+
+static int
+ls_ucode_img_load_fecs(struct nvkm_subdev *subdev, struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_generic(subdev, img, "fecs",
+					 NVKM_SECBOOT_FALCON_FECS);
+}
+
+static int
+ls_ucode_img_load_gpccs(struct nvkm_subdev *subdev, struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_generic(subdev, img, "gpccs",
+					 NVKM_SECBOOT_FALCON_GPCCS);
+}
+
+/**
+ * ls_ucode_img_load() - create a lsf_ucode_img and load it
+ */
+static struct ls_ucode_img *
+ls_ucode_img_load(struct nvkm_subdev *subdev, lsf_load_func load_func)
+{
+	struct ls_ucode_img *img;
+	int ret;
+
+	img = kzalloc(sizeof(*img), GFP_KERNEL);
+	if (!img)
+		return ERR_PTR(-ENOMEM);
+
+	ret = load_func(subdev, img);
+	if (ret) {
+		kfree(img);
+		return ERR_PTR(ret);
+	}
+
+	return img;
+}
+
+static const lsf_load_func lsf_load_funcs[] = {
+	[NVKM_SECBOOT_FALCON_END] = NULL, /* reserve enough space */
+	[NVKM_SECBOOT_FALCON_FECS] = ls_ucode_img_load_fecs,
+	[NVKM_SECBOOT_FALCON_GPCCS] = ls_ucode_img_load_gpccs,
+};
+
+/**
+ * ls_ucode_img_populate_bl_desc() - populate a DMEM BL descriptor for LS image
+ * @img:	ucode image to generate against
+ * @desc:	descriptor to populate
+ * @sb:		secure boot state to use for base addresses
+ *
+ * Populate the DMEM BL descriptor with the information contained in a
+ * ls_ucode_desc.
+ *
+ */
+static void
+ls_ucode_img_populate_bl_desc(struct ls_ucode_img *img, u64 wpr_addr,
+			      struct gm200_flcn_bl_desc *desc)
+{
+	struct ls_ucode_img_desc *pdesc = &img->ucode_desc;
+	u64 addr_base;
+
+	addr_base = wpr_addr + img->lsb_header.ucode_off +
+		    pdesc->app_start_offset;
+
+	memset(desc, 0, sizeof(*desc));
+	desc->ctx_dma = FALCON_DMAIDX_UCODE;
+	desc->code_dma_base.lo = lower_32_bits(
+		(addr_base + pdesc->app_resident_code_offset));
+	desc->code_dma_base.hi = upper_32_bits(
+		(addr_base + pdesc->app_resident_code_offset));
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->data_dma_base.lo = lower_32_bits(
+		(addr_base + pdesc->app_resident_data_offset));
+	desc->data_dma_base.hi = upper_32_bits(
+		(addr_base + pdesc->app_resident_data_offset));
+	desc->data_size = pdesc->app_resident_data_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+}
+
+#define LSF_LSB_HEADER_ALIGN 256
+#define LSF_BL_DATA_ALIGN 256
+#define LSF_BL_DATA_SIZE_ALIGN 256
+#define LSF_BL_CODE_SIZE_ALIGN 256
+#define LSF_UCODE_DATA_ALIGN 4096
+
+/**
+ * ls_ucode_img_fill_headers - fill the WPR and LSB headers of an image
+ * @gsb:	secure boot device used
+ * @img:	image to generate for
+ * @offset:	offset in the WPR region where this image starts
+ *
+ * Allocate space in the WPR area from offset and write the WPR and LSB headers
+ * accordingly.
+ *
+ * Return: offset at the end of this image.
+ */
+static u32
+ls_ucode_img_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_img *img,
+			  u32 offset)
+{
+	struct lsf_wpr_header *whdr = &img->wpr_header;
+	struct lsf_lsb_header *lhdr = &img->lsb_header;
+	struct ls_ucode_img_desc *desc = &img->ucode_desc;
+
+	if (img->ucode_header) {
+		nvkm_fatal(&gsb->base.subdev,
+			    "images withough loader are not supported yet!\n");
+		return offset;
+	}
+
+	/* Fill WPR header */
+	whdr->falcon_id = img->falcon_id;
+	whdr->bootstrap_owner = gsb->base.func->boot_falcon;
+	whdr->status = LSF_IMAGE_STATUS_COPY;
+
+	/* Align, save off, and include an LSB header size */
+	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
+	whdr->lsb_offset = offset;
+	offset += sizeof(struct lsf_lsb_header);
+
+	/*
+	 * Align, save off, and include the original (static) ucode
+	 * image size
+	 */
+	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
+	lhdr->ucode_off = offset;
+	offset += img->ucode_size;
+
+	/*
+	 * For falcons that use a boot loader (BL), we append a loader
+	 * desc structure on the end of the ucode image and consider
+	 * this the boot loader data. The host will then copy the loader
+	 * desc args to this space within the WPR region (before locking
+	 * down) and the HS bin will then copy them to DMEM 0 for the
+	 * loader.
+	 */
+	lhdr->bl_code_size = ALIGN(desc->bootloader_size,
+				   LSF_BL_CODE_SIZE_ALIGN);
+	lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
+				 LSF_BL_CODE_SIZE_ALIGN) + lhdr->bl_code_size;
+	lhdr->data_size = ALIGN(desc->app_size, LSF_BL_CODE_SIZE_ALIGN) +
+				lhdr->bl_code_size - lhdr->ucode_size;
+	/*
+	 * Though the BL is located at 0th offset of the image, the VA
+	 * is different to make sure that it doesn't collide the actual
+	 * OS VA range
+	 */
+	lhdr->bl_imem_off = desc->bootloader_imem_offset;
+	lhdr->app_code_off = desc->app_start_offset +
+			     desc->app_resident_code_offset;
+	lhdr->app_code_size = desc->app_resident_code_size;
+	lhdr->app_data_off = desc->app_start_offset +
+			     desc->app_resident_data_offset;
+	lhdr->app_data_size = desc->app_resident_data_size;
+
+	lhdr->flags = 0;
+	if (img->falcon_id == gsb->base.func->boot_falcon)
+		lhdr->flags = LSF_FLAG_DMACTL_REQ_CTX;
+
+	/* GPCCS will be loaded using PRI */
+	if (img->falcon_id == NVKM_SECBOOT_FALCON_GPCCS)
+		lhdr->flags |= LSF_FLAG_FORCE_PRIV_LOAD;
+
+	/* Align (size bloat) and save off BL descriptor size */
+	lhdr->bl_data_size = ALIGN(sizeof(struct gm200_flcn_bl_desc),
+				   LSF_BL_DATA_SIZE_ALIGN);
+	/*
+	 * Align, save off, and include the additional BL data
+	 */
+	offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
+	lhdr->bl_data_off = offset;
+	offset += lhdr->bl_data_size;
+
+	return offset;
+}
+
+static void
+ls_ucode_mgr_init(struct ls_ucode_mgr *mgr)
+{
+	memset(mgr, 0, sizeof(*mgr));
+	INIT_LIST_HEAD(&mgr->img_list);
+}
+
+static void
+ls_ucode_mgr_cleanup(struct ls_ucode_mgr *mgr)
+{
+	struct ls_ucode_img *img, *t;
+
+	list_for_each_entry_safe(img, t, &mgr->img_list, node) {
+		kfree(img->ucode_data);
+		kfree(img->ucode_header);
+		kfree(img);
+	}
+}
+
+static void
+ls_ucode_mgr_add_img(struct ls_ucode_mgr *mgr, struct ls_ucode_img *img)
+{
+	mgr->count++;
+	list_add_tail(&img->node, &mgr->img_list);
+}
+
+/**
+ * ls_ucode_mgr_fill_headers - fill WPR and LSB headers of all managed images
+ */
+static void
+ls_ucode_mgr_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr)
+{
+	struct ls_ucode_img *img;
+	u32 offset;
+
+	/*
+	 * Start with an array of WPR headers at the base of the WPR.
+	 * The expectation here is that the secure falcon will do a single DMA
+	 * read of this array and cache it internally so it's ok to pack these.
+	 * Also, we add 1 to the falcon count to indicate the end of the array.
+	 */
+	offset = sizeof(struct lsf_wpr_header) * (mgr->count + 1);
+
+	/*
+	 * Walk the managed falcons, accounting for the LSB structs
+	 * as well as the ucode images.
+	 */
+	list_for_each_entry(img, &mgr->img_list, node) {
+		offset = ls_ucode_img_fill_headers(gsb, img, offset);
+	}
+
+	mgr->wpr_size = offset;
+}
+
+/**
+ * ls_ucode_mgr_write_wpr - write the WPR blob contents
+ */
+static int
+ls_ucode_mgr_write_wpr(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr,
+		       struct nvkm_gpuobj *wpr_blob)
+{
+	struct ls_ucode_img *img;
+	u32 pos = 0;
+
+	nvkm_kmap(wpr_blob);
+
+	list_for_each_entry(img, &mgr->img_list, node) {
+		nvkm_gpuobj_memcpy_to(wpr_blob, pos, &img->wpr_header,
+				      sizeof(img->wpr_header));
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->wpr_header.lsb_offset,
+				     &img->lsb_header, sizeof(img->lsb_header));
+
+		/* Generate and write BL descriptor */
+		if (!img->ucode_header) {
+			u8 desc[gsb->func->bl_desc_size];
+			struct gm200_flcn_bl_desc gdesc;
+
+			ls_ucode_img_populate_bl_desc(img, gsb->wpr_addr,
+						      &gdesc);
+			gsb->func->fixup_bl_desc(&gdesc, &desc);
+			nvkm_gpuobj_memcpy_to(wpr_blob,
+					      img->lsb_header.bl_data_off,
+					      &desc, gsb->func->bl_desc_size);
+		}
+
+		/* Copy ucode */
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.ucode_off,
+				      img->ucode_data, img->ucode_size);
+
+		pos += sizeof(img->wpr_header);
+	}
+
+	nvkm_wo32(wpr_blob, pos, NVKM_SECBOOT_FALCON_INVALID);
+
+	nvkm_done(wpr_blob);
+
+	return 0;
+}
+
+/* Both size and address of WPR need to be 128K-aligned */
+#define WPR_ALIGNMENT	0x20000
+/**
+ * gm200_secboot_prepare_ls_blob() - prepare the LS blob
+ *
+ * For each securely managed falcon, load the FW, signatures and bootloaders and
+ * prepare a ucode blob. Then, compute the offsets in the WPR region for each
+ * blob, and finally write the headers and ucode blobs into a GPU object that
+ * will be copied into the WPR region by the HS firmware.
+ */
+static int
+gm200_secboot_prepare_ls_blob(struct gm200_secboot *gsb)
+{
+	struct nvkm_secboot *sb = &gsb->base;
+	struct nvkm_device *device = sb->subdev.device;
+	struct ls_ucode_mgr mgr;
+	int falcon_id;
+	int ret;
+
+	ls_ucode_mgr_init(&mgr);
+
+	/* Load all LS blobs */
+	for_each_set_bit(falcon_id, &gsb->base.func->managed_falcons,
+			 NVKM_SECBOOT_FALCON_END) {
+		struct ls_ucode_img *img;
+
+		img = ls_ucode_img_load(&sb->subdev, lsf_load_funcs[falcon_id]);
+
+		if (IS_ERR(img)) {
+			ret = PTR_ERR(img);
+			goto cleanup;
+		}
+		ls_ucode_mgr_add_img(&mgr, img);
+	}
+
+	/*
+	 * Fill the WPR and LSF headers with the right offsets and compute
+	 * required WPR size
+	 */
+	ls_ucode_mgr_fill_headers(gsb, &mgr);
+	mgr.wpr_size = ALIGN(mgr.wpr_size, WPR_ALIGNMENT);
+
+	/* Allocate GPU object that will contain the WPR region */
+	ret = nvkm_gpuobj_new(device, mgr.wpr_size, WPR_ALIGNMENT, false, NULL,
+			      &gsb->ls_blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_debug(&sb->subdev, "%d managed LS falcons, WPR size is %d bytes\n",
+		    mgr.count, mgr.wpr_size);
+
+	/* If WPR address and size are not fixed, set them to fit the LS blob */
+	if (!gsb->wpr_size) {
+		gsb->wpr_addr = gsb->ls_blob->addr;
+		gsb->wpr_size = gsb->ls_blob->size;
+	}
+
+	/* Write LS blob */
+	ret = ls_ucode_mgr_write_wpr(gsb, &mgr, gsb->ls_blob);
+
+cleanup:
+	ls_ucode_mgr_cleanup(&mgr);
+
+	return ret;
+}
+
+/*
+ * High-secure blob creation
+ */
+
+/**
+ * gm200_secboot_hsf_patch_signature() - patch HS blob with correct signature
+ */
+static void
+gm200_secboot_hsf_patch_signature(struct gm200_secboot *gsb, void *acr_image)
+{
+	struct nvkm_secboot *sb = &gsb->base;
+	struct fw_bin_header *hsbin_hdr = acr_image;
+	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	void *hs_data = acr_image + hsbin_hdr->data_offset;
+	void *sig;
+	u32 sig_size;
+
+	/* Falcon in debug or production mode? */
+	if ((nvkm_rd32(sb->subdev.device, sb->base + 0xc08) >> 20) & 0x1) {
+		sig = acr_image + fw_hdr->sig_dbg_offset;
+		sig_size = fw_hdr->sig_dbg_size;
+	} else {
+		sig = acr_image + fw_hdr->sig_prod_offset;
+		sig_size = fw_hdr->sig_prod_size;
+	}
+
+	/* Patch signature */
+	memcpy(hs_data + fw_hdr->patch_loc, sig + fw_hdr->patch_sig, sig_size);
+}
+
+/**
+ * gm200_secboot_populate_hsf_bl_desc() - populate BL descriptor for HS image
+ */
+static void
+gm200_secboot_populate_hsf_bl_desc(void *acr_image,
+				   struct gm200_flcn_bl_desc *bl_desc)
+{
+	struct fw_bin_header *hsbin_hdr = acr_image;
+	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	struct hsf_load_header *load_hdr = acr_image + fw_hdr->hdr_offset;
+
+	/*
+	 * Descriptor for the bootloader that will load the ACR image into
+	 * IMEM/DMEM memory.
+	 */
+	fw_hdr = acr_image + hsbin_hdr->header_offset;
+	load_hdr = acr_image + fw_hdr->hdr_offset;
+	memset(bl_desc, 0, sizeof(*bl_desc));
+	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
+	bl_desc->non_sec_code_off = load_hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = load_hdr->non_sec_code_size;
+	bl_desc->sec_code_off = load_hdr->app[0].sec_code_off;
+	bl_desc->sec_code_size = load_hdr->app[0].sec_code_size;
+	bl_desc->code_entry_point = 0;
+	/*
+	 * We need to set code_dma_base to the virtual address of the acr_blob,
+	 * and add this address to data_dma_base before writing it into DMEM
+	 */
+	bl_desc->code_dma_base.lo = 0;
+	bl_desc->data_dma_base.lo = load_hdr->data_dma_base;
+	bl_desc->data_size = load_hdr->data_size;
+}
+
+/**
+ * gm200_secboot_prepare_hs_blob - load and prepare a HS blob and BL descriptor
+ *
+ * @gsb secure boot instance to prepare for
+ * @fw name of the HS firmware to load
+ * @blob pointer to gpuobj that will be allocated to receive the HS FW payload
+ * @bl_desc pointer to the BL descriptor to write for this firmware
+ * @patch whether we should patch the HS descriptor (only for HS loaders)
+ */
+static int
+gm200_secboot_prepare_hs_blob(struct gm200_secboot *gsb, const char *fw,
+			      struct nvkm_gpuobj **blob,
+			      struct gm200_flcn_bl_desc *bl_desc, bool patch)
+{
+	struct nvkm_subdev *subdev = &gsb->base.subdev;
+	void *acr_image;
+	struct fw_bin_header *hsbin_hdr;
+	struct hsf_fw_header *fw_hdr;
+	void *acr_data;
+	struct hsf_load_header *load_hdr;
+	struct hsflcn_acr_desc *desc;
+	int ret;
+
+	acr_image = gm200_secboot_load_firmware(subdev, fw, 0);
+	if (IS_ERR(acr_image))
+		return PTR_ERR(acr_image);
+	hsbin_hdr = acr_image;
+
+	/* Patch signature */
+	gm200_secboot_hsf_patch_signature(gsb, acr_image);
+
+	acr_data = acr_image + hsbin_hdr->data_offset;
+
+	/* Patch descriptor? */
+	if (patch) {
+		fw_hdr = acr_image + hsbin_hdr->header_offset;
+		load_hdr = acr_image + fw_hdr->hdr_offset;
+		desc = acr_data + load_hdr->data_dma_base;
+		gsb->func->fixup_hs_desc(gsb, desc);
+	}
+
+	/* Generate HS BL descriptor */
+	gm200_secboot_populate_hsf_bl_desc(acr_image, bl_desc);
+
+	/* Create ACR blob and copy HS data to it */
+	ret = nvkm_gpuobj_new(subdev->device, ALIGN(hsbin_hdr->data_size, 256),
+			      0x1000, false, NULL, blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_kmap(*blob);
+	nvkm_gpuobj_memcpy_to(*blob, 0, acr_data, hsbin_hdr->data_size);
+	nvkm_done(*blob);
+
+cleanup:
+	kfree(acr_image);
+
+	return ret;
+}
+
+/*
+ * High-secure bootloader blob creation
+ */
+
+static int
+gm200_secboot_prepare_hsbl_blob(struct gm200_secboot *gsb)
+{
+	struct nvkm_subdev *subdev = &gsb->base.subdev;
+
+	gsb->hsbl_blob = gm200_secboot_load_firmware(subdev, "acr/bl", 0);
+	if (IS_ERR(gsb->hsbl_blob)) {
+		int ret = PTR_ERR(gsb->hsbl_blob);
+
+		gsb->hsbl_blob = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * gm20x_secboot_prepare_blobs - load blobs common to all GM20X GPUs.
+ *
+ * This includes the LS blob, HS ucode loading blob, and HS bootloader.
+ *
+ * The HS ucode unload blob is only used on dGPU.
+ */
+int
+gm20x_secboot_prepare_blobs(struct gm200_secboot *gsb)
+{
+	int ret;
+
+	/* Load and prepare the managed falcon's firmwares */
+	ret = gm200_secboot_prepare_ls_blob(gsb);
+	if (ret)
+		return ret;
+
+	/* Load the HS firmware that will load the LS firmwares */
+	ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_load",
+					    &gsb->acr_load_blob,
+					    &gsb->acr_load_bl_desc, true);
+	if (ret)
+		return ret;
+
+	/* Load the HS firmware bootloader */
+	ret = gm200_secboot_prepare_hsbl_blob(gsb);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int
+gm200_secboot_prepare_blobs(struct nvkm_secboot *sb)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	int ret;
+
+	ret = gm20x_secboot_prepare_blobs(gsb);
+	if (ret)
+		return ret;
+
+	/* dGPU only: load the HS firmware that unprotects the WPR region */
+	ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_unload",
+					    &gsb->acr_unload_blob,
+					    &gsb->acr_unload_bl_desc, false);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+
+
+/*
+ * Secure Boot Execution
+ */
+
+/**
+ * gm200_secboot_load_hs_bl() - load HS bootloader into DMEM and IMEM
+ */
+static void
+gm200_secboot_load_hs_bl(struct gm200_secboot *gsb, void *data, u32 data_size)
+{
+	struct nvkm_device *device = gsb->base.subdev.device;
+	struct fw_bin_header *hdr = gsb->hsbl_blob;
+	struct fw_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
+	void *blob_data = gsb->hsbl_blob + hdr->data_offset;
+	void *hsbl_code = blob_data + hsbl_desc->code_off;
+	void *hsbl_data = blob_data + hsbl_desc->data_off;
+	u32 code_size = ALIGN(hsbl_desc->code_size, 256);
+	const u32 base = gsb->base.base;
+	u32 blk;
+	u32 tag;
+	int i;
+
+	/*
+	 * Copy HS bootloader data
+	 */
+	nvkm_wr32(device, base + 0x1c0, (0x00000000 | (0x1 << 24)));
+	for (i = 0; i < hsbl_desc->data_size / 4; i++)
+		nvkm_wr32(device, base + 0x1c4, ((u32 *)hsbl_data)[i]);
+
+	/*
+	 * Copy HS bootloader interface structure where the HS descriptor
+	 * expects it to be
+	 */
+	nvkm_wr32(device, base + 0x1c0,
+		  (hsbl_desc->dmem_load_off | (0x1 << 24)));
+	for (i = 0; i < data_size / 4; i++)
+		nvkm_wr32(device, base + 0x1c4, ((u32 *)data)[i]);
+
+	/* Copy HS bootloader code to end of IMEM */
+	blk = (nvkm_rd32(device, base + 0x108) & 0x1ff) - (code_size >> 8);
+	tag = hsbl_desc->start_tag;
+	nvkm_wr32(device, base + 0x180, ((blk & 0xff) << 8) | (0x1 << 24));
+	for (i = 0; i < code_size / 4; i++) {
+		/* write new tag every 256B */
+		if ((i & 0x3f) == 0) {
+			nvkm_wr32(device, base + 0x188, tag & 0xffff);
+			tag++;
+		}
+		nvkm_wr32(device, base + 0x184, ((u32 *)hsbl_code)[i]);
+	}
+	nvkm_wr32(device, base + 0x188, 0);
+}
+
+/**
+ * gm200_secboot_setup_falcon() - set up the secure falcon for secure boot
+ */
+static int
+gm200_secboot_setup_falcon(struct gm200_secboot *gsb)
+{
+	struct nvkm_device *device = gsb->base.subdev.device;
+	struct fw_bin_header *hdr = gsb->hsbl_blob;
+	struct fw_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
+	/* virtual start address for boot vector */
+	u32 virt_addr = hsbl_desc->start_tag << 8;
+	const u32 base = gsb->base.base;
+	const u32 reg_base = base + 0xe00;
+	u32 inst_loc;
+	int ret;
+
+	ret = nvkm_secboot_falcon_reset(&gsb->base);
+	if (ret)
+		return ret;
+
+	/* setup apertures - virtual */
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_UCODE), 0x4);
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_VIRT), 0x0);
+	/* setup apertures - physical */
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_VID), 0x4);
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_COH),
+		  0x4 | 0x1);
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_NCOH),
+		  0x4 | 0x2);
+
+	/* Set context */
+	if (nvkm_memory_target(gsb->inst->memory) == NVKM_MEM_TARGET_VRAM)
+		inst_loc = 0x0; /* FB */
+	else
+		inst_loc = 0x3; /* Non-coherent sysmem */
+
+	nvkm_mask(device, base + 0x048, 0x1, 0x1);
+	nvkm_wr32(device, base + 0x480,
+		  ((gsb->inst->addr >> 12) & 0xfffffff) |
+		  (inst_loc << 28) | (1 << 30));
+
+	/* Set boot vector to code's starting virtual address */
+	nvkm_wr32(device, base + 0x104, virt_addr);
+
+	return 0;
+}
+
+/**
+ * gm200_secboot_run_hs_blob() - run the given high-secure blob
+ */
+static int
+gm200_secboot_run_hs_blob(struct gm200_secboot *gsb, struct nvkm_gpuobj *blob,
+			  struct gm200_flcn_bl_desc *desc)
+{
+	struct nvkm_vma vma;
+	u64 vma_addr;
+	const u32 bl_desc_size = gsb->func->bl_desc_size;
+	u8 bl_desc[bl_desc_size];
+	int ret;
+
+	/* Map the HS firmware so the HS bootloader can see it */
+	ret = nvkm_gpuobj_map(blob, gsb->vm, NV_MEM_ACCESS_RW, &vma);
+	if (ret)
+		return ret;
+
+	/* Add the mapping address to the DMA bases */
+	vma_addr = flcn64_to_u64(desc->code_dma_base) + vma.offset;
+	desc->code_dma_base.lo = lower_32_bits(vma_addr);
+	desc->code_dma_base.hi = upper_32_bits(vma_addr);
+	vma_addr = flcn64_to_u64(desc->data_dma_base) + vma.offset;
+	desc->data_dma_base.lo = lower_32_bits(vma_addr);
+	desc->data_dma_base.hi = upper_32_bits(vma_addr);
+
+	/* Fixup the BL header */
+	gsb->func->fixup_bl_desc(desc, &bl_desc);
+
+	/* Reset the falcon and make it ready to run the HS bootloader */
+	ret = gm200_secboot_setup_falcon(gsb);
+	if (ret)
+		goto done;
+
+	/* Load the HS bootloader into the falcon's IMEM/DMEM */
+	gm200_secboot_load_hs_bl(gsb, &bl_desc, bl_desc_size);
+
+	/* Start the HS bootloader */
+	ret = nvkm_secboot_falcon_run(&gsb->base);
+	if (ret)
+		goto done;
+
+done:
+	/* Restore the original DMA addresses */
+	vma_addr = flcn64_to_u64(desc->code_dma_base) - vma.offset;
+	desc->code_dma_base.lo = lower_32_bits(vma_addr);
+	desc->code_dma_base.hi = upper_32_bits(vma_addr);
+	vma_addr = flcn64_to_u64(desc->data_dma_base) - vma.offset;
+	desc->data_dma_base.lo = lower_32_bits(vma_addr);
+	desc->data_dma_base.hi = upper_32_bits(vma_addr);
+
+	/* We don't need the ACR firmware anymore */
+	nvkm_gpuobj_unmap(&vma);
+
+	return ret;
+}
+
+/*
+ * gm200_secboot_reset() - execute secure boot from the prepared state
+ *
+ * Load the HS bootloader and ask the falcon to run it. This will in turn
+ * load the HS firmware and run it, so once the falcon stops all the managed
+ * falcons should have their LS firmware loaded and be ready to run.
+ */
+int
+gm200_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	int ret;
+
+	/*
+	 * Dummy GM200 implementation: perform secure boot each time we are
+	 * called on FECS. Since only FECS and GPCCS are managed and started
+	 * together, this ought to be safe.
+	 *
+	 * Once we have proper PMU firmware and support, this will be changed
+	 * to a proper call to the PMU method.
+	 */
+	if (falcon != NVKM_SECBOOT_FALCON_FECS)
+		goto end;
+
+	/* If WPR is set and we have an unload blob, run it to unlock WPR */
+	if (gsb->acr_unload_blob &&
+	    gsb->falcon_state[NVKM_SECBOOT_FALCON_FECS] != NON_SECURE) {
+		ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_unload_blob,
+						&gsb->acr_unload_bl_desc);
+		if (ret)
+			return ret;
+	}
+
+	/* Reload all managed falcons */
+	ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_load_blob,
+					&gsb->acr_load_bl_desc);
+	if (ret)
+		return ret;
+
+end:
+	gsb->falcon_state[falcon] = RESET;
+	return 0;
+}
+
+int
+gm200_secboot_start(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	int base;
+
+	switch (falcon) {
+	case NVKM_SECBOOT_FALCON_FECS:
+		base = 0x409000;
+		break;
+	case NVKM_SECBOOT_FALCON_GPCCS:
+		base = 0x41a000;
+		break;
+	default:
+		nvkm_error(&sb->subdev, "cannot start unhandled falcon!\n");
+		return -EINVAL;
+	}
+
+	nvkm_wr32(sb->subdev.device, base + 0x130, 0x00000002);
+	gsb->falcon_state[falcon] = RUNNING;
+
+	return 0;
+}
+
+
+
+int
+gm200_secboot_init(struct nvkm_secboot *sb)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	struct nvkm_device *device = sb->subdev.device;
+	struct nvkm_vm *vm;
+	const u64 vm_area_len = 600 * 1024;
+	int ret;
+
+	/* Allocate instance block and VM */
+	ret = nvkm_gpuobj_new(device, 0x1000, 0, true, NULL, &gsb->inst);
+	if (ret)
+		return ret;
+
+	ret = nvkm_gpuobj_new(device, 0x8000, 0, true, NULL, &gsb->pgd);
+	if (ret)
+		return ret;
+
+	ret = nvkm_vm_new(device, 0, vm_area_len, 0, NULL, &vm);
+	if (ret)
+		return ret;
+
+	atomic_inc(&vm->engref[NVKM_SUBDEV_PMU]);
+
+	ret = nvkm_vm_ref(vm, &gsb->vm, gsb->pgd);
+	nvkm_vm_ref(NULL, &vm, NULL);
+	if (ret)
+		return ret;
+
+	nvkm_kmap(gsb->inst);
+	nvkm_wo32(gsb->inst, 0x200, lower_32_bits(gsb->pgd->addr));
+	nvkm_wo32(gsb->inst, 0x204, upper_32_bits(gsb->pgd->addr));
+	nvkm_wo32(gsb->inst, 0x208, lower_32_bits(vm_area_len - 1));
+	nvkm_wo32(gsb->inst, 0x20c, upper_32_bits(vm_area_len - 1));
+	nvkm_done(gsb->inst);
+
+	return 0;
+}
+
+int
+gm200_secboot_fini(struct nvkm_secboot *sb, bool suspend)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	int ret = 0;
+	int i;
+
+	/* Run the unload blob to unprotect the WPR region */
+	if (gsb->acr_unload_blob &&
+	    gsb->falcon_state[NVKM_SECBOOT_FALCON_FECS] != NON_SECURE)
+		ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_unload_blob,
+						&gsb->acr_unload_bl_desc);
+
+	for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++)
+		gsb->falcon_state[i] = NON_SECURE;
+
+	return ret;
+}
+
+void *
+gm200_secboot_dtor(struct nvkm_secboot *sb)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+
+	nvkm_gpuobj_del(&gsb->acr_unload_blob);
+
+	kfree(gsb->hsbl_blob);
+	nvkm_gpuobj_del(&gsb->acr_load_blob);
+	nvkm_gpuobj_del(&gsb->ls_blob);
+
+	nvkm_vm_ref(NULL, &gsb->vm, gsb->pgd);
+	nvkm_gpuobj_del(&gsb->pgd);
+	nvkm_gpuobj_del(&gsb->inst);
+
+	return gsb;
+}
+
+
+static const struct nvkm_secboot_func
+gm200_secboot = {
+	.dtor = gm200_secboot_dtor,
+	.init = gm200_secboot_init,
+	.fini = gm200_secboot_fini,
+	.prepare_blobs = gm200_secboot_prepare_blobs,
+	.reset = gm200_secboot_reset,
+	.start = gm200_secboot_start,
+	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS) |
+			   BIT(NVKM_SECBOOT_FALCON_GPCCS),
+	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
+};
+
+/**
+ * gm200_fixup_bl_desc - just copy the BL descriptor
+ *
+ * Use the GM200 descriptor format by default.
+ */
+static void
+gm200_secboot_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
+{
+	memcpy(ret, desc, sizeof(*desc));
+}
+
+static void
+gm200_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
+			    struct hsflcn_acr_desc *desc)
+{
+	desc->ucode_blob_base = gsb->ls_blob->addr;
+	desc->ucode_blob_size = gsb->ls_blob->size;
+
+	desc->wpr_offset = 0;
+
+	/* WPR region information for the HS binary to set up */
+	desc->wpr_region_id = 1;
+	desc->regions.no_regions = 1;
+	desc->regions.region_props[0].region_id = 1;
+	desc->regions.region_props[0].start_addr = gsb->wpr_addr >> 8;
+	desc->regions.region_props[0].end_addr =
+		(gsb->wpr_addr + gsb->wpr_size) >> 8;
+}
+
+static const struct gm200_secboot_func
+gm200_secboot_func = {
+	.bl_desc_size = sizeof(struct gm200_flcn_bl_desc),
+	.fixup_bl_desc = gm200_secboot_fixup_bl_desc,
+	.fixup_hs_desc = gm200_secboot_fixup_hs_desc,
+};
+
+int
+gm200_secboot_new(struct nvkm_device *device, int index,
+		  struct nvkm_secboot **psb)
+{
+	int ret;
+	struct gm200_secboot *gsb;
+
+	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
+	if (!gsb) {
+		psb = NULL;
+		return -ENOMEM;
+	}
+	*psb = &gsb->base;
+
+	ret = nvkm_secboot_ctor(&gm200_secboot, device, index, &gsb->base);
+	if (ret)
+		return ret;
+
+	gsb->func = &gm200_secboot_func;
+
+	return 0;
+}
+
+MODULE_FIRMWARE("nvidia/gm200/acr/bl.bin");
+MODULE_FIRMWARE("nvidia/gm200/acr/ucode_load.bin");
+MODULE_FIRMWARE("nvidia/gm200/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/fecs_inst.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/fecs_data.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/gpccs_inst.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/gpccs_data.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/sw_ctx.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/sw_nonctx.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/sw_bundle_init.bin");
+MODULE_FIRMWARE("nvidia/gm200/gr/sw_method_init.bin");
+
+MODULE_FIRMWARE("nvidia/gm204/acr/bl.bin");
+MODULE_FIRMWARE("nvidia/gm204/acr/ucode_load.bin");
+MODULE_FIRMWARE("nvidia/gm204/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/fecs_inst.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/fecs_data.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/gpccs_inst.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/gpccs_data.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/sw_ctx.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/sw_nonctx.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/sw_bundle_init.bin");
+MODULE_FIRMWARE("nvidia/gm204/gr/sw_method_init.bin");
+
+MODULE_FIRMWARE("nvidia/gm206/acr/bl.bin");
+MODULE_FIRMWARE("nvidia/gm206/acr/ucode_load.bin");
+MODULE_FIRMWARE("nvidia/gm206/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/fecs_inst.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/fecs_data.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/gpccs_inst.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/gpccs_data.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/sw_ctx.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/sw_nonctx.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/sw_bundle_init.bin");
+MODULE_FIRMWARE("nvidia/gm206/gr/sw_method_init.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
new file mode 100644
index 000000000000..684320484b70
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+
+#include <core/gpuobj.h>
+
+/*
+ * The BL header format used by GM20B's firmware is slightly different
+ * from the one of GM200. Fix the differences here.
+ */
+struct gm20b_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	u32 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	u32 data_dma_base;
+	u32 data_size;
+};
+
+/**
+ * gm20b_secboot_fixup_bl_desc - adapt BL descriptor to format used by GM20B FW
+ *
+ * There is only a slight format difference (DMA addresses being 32-bits and
+ * 256B-aligned) to address.
+ */
+static void
+gm20b_secboot_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
+{
+	struct gm20b_flcn_bl_desc *gdesc = ret;
+	u64 addr;
+
+	memcpy(gdesc->reserved, desc->reserved, sizeof(gdesc->reserved));
+	memcpy(gdesc->signature, desc->signature, sizeof(gdesc->signature));
+	gdesc->ctx_dma = desc->ctx_dma;
+	addr = desc->code_dma_base.hi;
+	addr <<= 32;
+	addr |= desc->code_dma_base.lo;
+	gdesc->code_dma_base = lower_32_bits(addr >> 8);
+	gdesc->non_sec_code_off = desc->non_sec_code_off;
+	gdesc->non_sec_code_size = desc->non_sec_code_size;
+	gdesc->sec_code_off = desc->sec_code_off;
+	gdesc->sec_code_size = desc->sec_code_size;
+	gdesc->code_entry_point = desc->code_entry_point;
+	addr = desc->data_dma_base.hi;
+	addr <<= 32;
+	addr |= desc->data_dma_base.lo;
+	gdesc->data_dma_base = lower_32_bits(addr >> 8);
+	gdesc->data_size = desc->data_size;
+}
+
+static void
+gm20b_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
+			    struct hsflcn_acr_desc *desc)
+{
+	desc->ucode_blob_base = gsb->ls_blob->addr;
+	desc->ucode_blob_size = gsb->ls_blob->size;
+
+	desc->wpr_offset = 0;
+}
+
+static const struct gm200_secboot_func
+gm20b_secboot_func = {
+	.bl_desc_size = sizeof(struct gm20b_flcn_bl_desc),
+	.fixup_bl_desc = gm20b_secboot_fixup_bl_desc,
+	.fixup_hs_desc = gm20b_secboot_fixup_hs_desc,
+};
+
+
+#ifdef CONFIG_ARCH_TEGRA
+#define TEGRA_MC_BASE				0x70019000
+#define MC_SECURITY_CARVEOUT2_CFG0		0xc58
+#define MC_SECURITY_CARVEOUT2_BOM_0		0xc5c
+#define MC_SECURITY_CARVEOUT2_BOM_HI_0		0xc60
+#define MC_SECURITY_CARVEOUT2_SIZE_128K		0xc64
+#define TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED	(1 << 1)
+/**
+ * sb_tegra_read_wpr() - read the WPR registers on Tegra
+ *
+ * On dGPU, we can manage the WPR region ourselves, but on Tegra the WPR region
+ * is reserved from system memory by the bootloader and irreversibly locked.
+ * This function reads the address and size of the pre-configured WPR region.
+ */
+static int
+gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
+{
+	struct nvkm_secboot *sb = &gsb->base;
+	void __iomem *mc;
+	u32 cfg;
+
+	mc = ioremap(TEGRA_MC_BASE, 0xd00);
+	if (!mc) {
+		nvkm_error(&sb->subdev, "Cannot map Tegra MC registers\n");
+		return PTR_ERR(mc);
+	}
+	gsb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
+	      ((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32);
+	gsb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
+		<< 17;
+	cfg = ioread32_native(mc + MC_SECURITY_CARVEOUT2_CFG0);
+	iounmap(mc);
+
+	/* Check that WPR settings are valid */
+	if (gsb->wpr_size == 0) {
+		nvkm_error(&sb->subdev, "WPR region is empty\n");
+		return -EINVAL;
+	}
+
+	if (!(cfg & TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED)) {
+		nvkm_error(&sb->subdev, "WPR region not locked\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+static int
+gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
+{
+	nvkm_error(&gsb->base.subdev, "Tegra support not compiled in\n");
+	return -EINVAL;
+}
+#endif
+
+static int
+gm20b_secboot_prepare_blobs(struct nvkm_secboot *sb)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	int acr_size;
+	int ret;
+
+	ret = gm20x_secboot_prepare_blobs(gsb);
+	if (ret)
+		return ret;
+
+	acr_size = gsb->acr_load_blob->size;
+	/*
+	 * On Tegra the WPR region is set by the bootloader. It is illegal for
+	 * the HS blob to be larger than this region.
+	 */
+	if (acr_size > gsb->wpr_size) {
+		nvkm_error(&sb->subdev, "WPR region too small for FW blob!\n");
+		nvkm_error(&sb->subdev, "required: %dB\n", acr_size);
+		nvkm_error(&sb->subdev, "WPR size: %dB\n", gsb->wpr_size);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
+static int
+gm20b_secboot_init(struct nvkm_secboot *sb)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	int ret;
+
+	ret = gm20b_tegra_read_wpr(gsb);
+	if (ret)
+		return ret;
+
+	return gm200_secboot_init(sb);
+}
+
+static const struct nvkm_secboot_func
+gm20b_secboot = {
+	.dtor = gm200_secboot_dtor,
+	.init = gm20b_secboot_init,
+	.prepare_blobs = gm20b_secboot_prepare_blobs,
+	.reset = gm200_secboot_reset,
+	.start = gm200_secboot_start,
+	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS),
+	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
+};
+
+int
+gm20b_secboot_new(struct nvkm_device *device, int index,
+		  struct nvkm_secboot **psb)
+{
+	int ret;
+	struct gm200_secboot *gsb;
+
+	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
+	if (!gsb) {
+		psb = NULL;
+		return -ENOMEM;
+	}
+	*psb = &gsb->base;
+
+	ret = nvkm_secboot_ctor(&gm20b_secboot, device, index, &gsb->base);
+	if (ret)
+		return ret;
+
+	gsb->func = &gm20b_secboot_func;
+
+	return 0;
+}
+
+MODULE_FIRMWARE("nvidia/gm20b/acr/bl.bin");
+MODULE_FIRMWARE("nvidia/gm20b/acr/ucode_load.bin");
+MODULE_FIRMWARE("nvidia/gm20b/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/gm20b/gr/fecs_inst.bin");
+MODULE_FIRMWARE("nvidia/gm20b/gr/fecs_data.bin");
+MODULE_FIRMWARE("nvidia/gm20b/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/gm20b/gr/gpccs_inst.bin");
+MODULE_FIRMWARE("nvidia/gm20b/gr/gpccs_data.bin");
+MODULE_FIRMWARE("nvidia/gm20b/gr/sw_ctx.bin");
+MODULE_FIRMWARE("nvidia/gm20b/gr/sw_nonctx.bin");
+MODULE_FIRMWARE("nvidia/gm20b/gr/sw_bundle_init.bin");
+MODULE_FIRMWARE("nvidia/gm20b/gr/sw_method_init.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
new file mode 100644
index 000000000000..f2b09dee7c5d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECBOOT_PRIV_H__
+#define __NVKM_SECBOOT_PRIV_H__
+
+#include <subdev/secboot.h>
+#include <subdev/mmu.h>
+
+struct nvkm_secboot_func {
+	int (*init)(struct nvkm_secboot *);
+	int (*fini)(struct nvkm_secboot *, bool suspend);
+	void *(*dtor)(struct nvkm_secboot *);
+	int (*prepare_blobs)(struct nvkm_secboot *);
+	int (*reset)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
+	int (*start)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
+
+	/* ID of the falcon that will perform secure boot */
+	enum nvkm_secboot_falcon boot_falcon;
+	/* Bit-mask of IDs of managed falcons */
+	unsigned long managed_falcons;
+};
+
+int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_device *,
+		      int index, struct nvkm_secboot *);
+int nvkm_secboot_falcon_reset(struct nvkm_secboot *);
+int nvkm_secboot_falcon_run(struct nvkm_secboot *);
+
+struct flcn_u64 {
+	u32 lo;
+	u32 hi;
+};
+static inline u64 flcn64_to_u64(const struct flcn_u64 f)
+{
+	return ((u64)f.hi) << 32 | f.lo;
+}
+
+/**
+ * struct gm200_flcn_bl_desc - DMEM bootloader descriptor
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ *			(falcon's $xcbase register)
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ *			(falcon's $xdbase register)
+ * @data_size:		size of data block. Should be multiple of 256B
+ *
+ * Structure used by the bootloader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct gm200_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	struct flcn_u64 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	struct flcn_u64 data_dma_base;
+	u32 data_size;
+};
+
+/**
+ * struct hsflcn_acr_desc - data section of the HS firmware
+ *
+ * This header is to be copied at the beginning of DMEM by the HS bootloader.
+ *
+ * @signature:		signature of ACR ucode
+ * @wpr_region_id:	region ID holding the WPR header and its details
+ * @wpr_offset:		offset from the WPR region holding the wpr header
+ * @regions:		region descriptors
+ * @nonwpr_ucode_blob_size:	size of LS blob
+ * @nonwpr_ucode_blob_start:	FB location of LS blob is
+ */
+struct hsflcn_acr_desc {
+	union {
+		u8 reserved_dmem[0x200];
+		u32 signatures[4];
+	} ucode_reserved_space;
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	u32 mmu_mem_range;
+#define FLCN_ACR_MAX_REGIONS 2
+	struct {
+		u32 no_regions;
+		struct {
+			u32 start_addr;
+			u32 end_addr;
+			u32 region_id;
+			u32 read_mask;
+			u32 write_mask;
+			u32 client_mask;
+		} region_props[FLCN_ACR_MAX_REGIONS];
+	} regions;
+	u32 ucode_blob_size;
+	u64 ucode_blob_base __aligned(8);
+	struct {
+		u32 vpr_enabled;
+		u32 vpr_start;
+		u32 vpr_end;
+		u32 hdcp_policies;
+	} vpr_desc;
+};
+
+/**
+ * Contains the whole secure boot state, allowing it to be performed as needed
+ * @wpr_addr:		physical address of the WPR region
+ * @wpr_size:		size in bytes of the WPR region
+ * @ls_blob:		LS blob of all the LS firmwares, signatures, bootloaders
+ * @ls_blob_size:	size of the LS blob
+ * @ls_blob_nb_regions:	number of LS firmwares that will be loaded
+ * @acr_blob:		HS blob
+ * @acr_blob_vma:	mapping of the HS blob into the secure falcon's VM
+ * @acr_bl_desc:	bootloader descriptor of the HS blob
+ * @hsbl_blob:		HS blob bootloader
+ * @inst:		instance block for HS falcon
+ * @pgd:		page directory for the HS falcon
+ * @vm:			address space used by the HS falcon
+ * @bl_desc_size:	size of the BL descriptor used by this chip.
+ * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
+ *			the generic GM200 format into a data array of size
+ *			bl_desc_size
+ */
+struct gm200_secboot {
+	struct nvkm_secboot base;
+	const struct gm200_secboot_func *func;
+
+	/*
+	 * Address and size of the WPR region. On dGPU this will be the
+	 * address of the LS blob. On Tegra this is a fixed region set by the
+	 * bootloader
+	 */
+	u64 wpr_addr;
+	u32 wpr_size;
+
+	/*
+	 * HS FW - lock WPR region (dGPU only) and load LS FWs
+	 * on Tegra the HS FW copies the LS blob into the fixed WPR instead
+	 */
+	struct nvkm_gpuobj *acr_load_blob;
+	struct gm200_flcn_bl_desc acr_load_bl_desc;
+
+	/* HS FW - unlock WPR region (dGPU only) */
+	struct nvkm_gpuobj *acr_unload_blob;
+	struct gm200_flcn_bl_desc acr_unload_bl_desc;
+
+	/* HS bootloader */
+	void *hsbl_blob;
+
+	/* LS FWs, to be loaded by the HS ACR */
+	struct nvkm_gpuobj *ls_blob;
+
+	/* Instance block & address space used for HS FW execution */
+	struct nvkm_gpuobj *inst;
+	struct nvkm_gpuobj *pgd;
+	struct nvkm_vm *vm;
+
+	/* To keep track of the state of all managed falcons */
+	enum {
+		/* In non-secure state, no firmware loaded, no privileges*/
+		NON_SECURE = 0,
+		/* In low-secure mode and ready to be started */
+		RESET,
+		/* In low-secure mode and running */
+		RUNNING,
+	} falcon_state[NVKM_SECBOOT_FALCON_END];
+
+};
+#define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
+
+struct gm200_secboot_func {
+	/*
+	 * Size of the bootloader descriptor for this chip. A block of this
+	 * size is allocated before booting a falcon and the fixup_bl_desc
+	 * callback is called on it
+	 */
+	u32 bl_desc_size;
+	void (*fixup_bl_desc)(const struct gm200_flcn_bl_desc *, void *);
+
+	/*
+	 * Chip-specific modifications of the HS descriptor can be done here.
+	 * On dGPU this is used to fill the information about the WPR region
+	 * we want the HS FW to set up.
+	 */
+	void (*fixup_hs_desc)(struct gm200_secboot *, struct hsflcn_acr_desc *);
+};
+
+int gm200_secboot_init(struct nvkm_secboot *);
+void *gm200_secboot_dtor(struct nvkm_secboot *);
+int gm200_secboot_reset(struct nvkm_secboot *, u32);
+int gm200_secboot_start(struct nvkm_secboot *, u32);
+
+int gm20x_secboot_prepare_blobs(struct gm200_secboot *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
index b035c6e28be8..c34076223b7b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
@@ -3,3 +3,4 @@ nvkm-y += nvkm/subdev/volt/gpio.o
 nvkm-y += nvkm/subdev/volt/nv40.o
 nvkm-y += nvkm/subdev/volt/gk104.o
 nvkm-y += nvkm/subdev/volt/gk20a.o
+nvkm-y += nvkm/subdev/volt/gm20b.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
index fd56c6476064..d554455326da 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -24,21 +24,9 @@
 
 #include <core/tegra.h>
 
-struct cvb_coef {
-	int c0;
-	int c1;
-	int c2;
-	int c3;
-	int c4;
-	int c5;
-};
-
-struct gk20a_volt {
-	struct nvkm_volt base;
-	struct regulator *vdd;
-};
+#include "gk20a.h"
 
-const struct cvb_coef gk20a_cvb_coef[] = {
+static const struct cvb_coef gk20a_cvb_coef[] = {
 	/* MHz,        c0,     c1,   c2,    c3,     c4,   c5 */
 	/*  72 */ { 1209886, -36468,  515,   417, -13123,  203},
 	/* 108 */ { 1130804, -27659,  296,   298, -10834,  221},
@@ -89,7 +77,7 @@ gk20a_volt_get_cvb_t_voltage(int speedo, int temp, int s_scale, int t_scale,
 	return mv;
 }
 
-static int
+int
 gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo)
 {
 	int mv;
@@ -100,7 +88,7 @@ gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo)
 	return mv * 1000;
 }
 
-static int
+int
 gk20a_volt_vid_get(struct nvkm_volt *base)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -115,7 +103,7 @@ gk20a_volt_vid_get(struct nvkm_volt *base)
 	return -EINVAL;
 }
 
-static int
+int
 gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -125,7 +113,7 @@ gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 	return regulator_set_voltage(volt->vdd, volt->base.vid[vid].uv, 1200000);
 }
 
-static int
+int
 gk20a_volt_set_id(struct nvkm_volt *base, u8 id, int condition)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -155,30 +143,25 @@ gk20a_volt = {
 };
 
 int
-gk20a_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
+_gk20a_volt_ctor(struct nvkm_device *device, int index,
+		 const struct cvb_coef *coefs, int nb_coefs,
+		 struct gk20a_volt *volt)
 {
 	struct nvkm_device_tegra *tdev = device->func->tegra(device);
-	struct gk20a_volt *volt;
 	int i, uv;
 
-	if (!(volt = kzalloc(sizeof(*volt), GFP_KERNEL)))
-		return -ENOMEM;
-
 	nvkm_volt_ctor(&gk20a_volt, device, index, &volt->base);
-	*pvolt = &volt->base;
 
 	uv = regulator_get_voltage(tdev->vdd);
-	nvkm_info(&volt->base.subdev, "The default voltage is %duV\n", uv);
+	nvkm_debug(&volt->base.subdev, "the default voltage is %duV\n", uv);
 
 	volt->vdd = tdev->vdd;
 
-	volt->base.vid_nr = ARRAY_SIZE(gk20a_cvb_coef);
-	nvkm_debug(&volt->base.subdev, "%s - vid_nr = %d\n", __func__,
-		   volt->base.vid_nr);
+	volt->base.vid_nr = nb_coefs;
 	for (i = 0; i < volt->base.vid_nr; i++) {
 		volt->base.vid[i].vid = i;
 		volt->base.vid[i].uv =
-			gk20a_volt_calc_voltage(&gk20a_cvb_coef[i],
+			gk20a_volt_calc_voltage(&coefs[i],
 						tdev->gpu_speedo);
 		nvkm_debug(&volt->base.subdev, "%2d: vid=%d, uv=%d\n", i,
 			   volt->base.vid[i].vid, volt->base.vid[i].uv);
@@ -186,3 +169,17 @@ gk20a_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
 
 	return 0;
 }
+
+int
+gk20a_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
+{
+	struct gk20a_volt *volt;
+
+	volt = kzalloc(sizeof(*volt), GFP_KERNEL);
+	if (!volt)
+		return -ENOMEM;
+	*pvolt = &volt->base;
+
+	return _gk20a_volt_ctor(device, index, gk20a_cvb_coef,
+				ARRAY_SIZE(gk20a_cvb_coef), volt);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h
new file mode 100644
index 000000000000..0fa3b502bcf8
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __GK20A_VOLT_H__
+#define __GK20A_VOLT_H__
+
+struct cvb_coef {
+	int c0;
+	int c1;
+	int c2;
+	int c3;
+	int c4;
+	int c5;
+};
+
+struct gk20a_volt {
+	struct nvkm_volt base;
+	struct regulator *vdd;
+};
+
+int _gk20a_volt_ctor(struct nvkm_device *device, int index,
+		     const struct cvb_coef *coefs, int nb_coefs,
+		     struct gk20a_volt *volt);
+
+int gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo);
+int gk20a_volt_vid_get(struct nvkm_volt *volt);
+int gk20a_volt_vid_set(struct nvkm_volt *volt, u8 vid);
+int gk20a_volt_set_id(struct nvkm_volt *volt, u8 id, int condition);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
new file mode 100644
index 000000000000..49b5ecb701e4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+#include "gk20a.h"
+
+#include <core/tegra.h>
+
+const struct cvb_coef gm20b_cvb_coef[] = {
+	/* KHz,             c0,      c1,   c2 */
+	/*  76800 */ { 1786666,  -85625, 1632 },
+	/* 153600 */ { 1846729,  -87525, 1632 },
+	/* 230400 */ { 1910480,  -89425, 1632 },
+	/* 307200 */ { 1977920,  -91325, 1632 },
+	/* 384000 */ { 2049049,  -93215, 1632 },
+	/* 460800 */ { 2122872,  -95095, 1632 },
+	/* 537600 */ { 2201331,  -96985, 1632 },
+	/* 614400 */ { 2283479,  -98885, 1632 },
+	/* 691200 */ { 2369315, -100785, 1632 },
+	/* 768000 */ { 2458841, -102685, 1632 },
+	/* 844800 */ { 2550821, -104555, 1632 },
+	/* 921600 */ { 2647676, -106455, 1632 },
+};
+
+int
+gm20b_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
+{
+	struct gk20a_volt *volt;
+
+	volt = kzalloc(sizeof(*volt), GFP_KERNEL);
+	if (!volt)
+		return -ENOMEM;
+	*pvolt = &volt->base;
+
+	return _gk20a_volt_ctor(device, index, gm20b_cvb_coef,
+				ARRAY_SIZE(gm20b_cvb_coef), volt);
+}
diff --git a/drivers/gpu/drm/omapdrm/Kconfig b/drivers/gpu/drm/omapdrm/Kconfig
index 336ad4de9981..73241c4eb7aa 100644
--- a/drivers/gpu/drm/omapdrm/Kconfig
+++ b/drivers/gpu/drm/omapdrm/Kconfig
@@ -2,7 +2,6 @@ config DRM_OMAP
 	tristate "OMAP DRM"
 	depends on DRM
 	depends on ARCH_OMAP2PLUS || ARCH_MULTIPLATFORM
-	select OMAP2_DSS
 	select DRM_KMS_HELPER
 	select DRM_KMS_FB_HELPER
 	select FB_SYS_FILLRECT
diff --git a/drivers/gpu/drm/omapdrm/Makefile b/drivers/gpu/drm/omapdrm/Makefile
index fe4c2228bc18..48b7b750c05c 100644
--- a/drivers/gpu/drm/omapdrm/Makefile
+++ b/drivers/gpu/drm/omapdrm/Makefile
@@ -6,7 +6,7 @@
 obj-y += dss/
 obj-y += displays/
 
-ccflags-y := -Iinclude/drm -Werror
+ccflags-y := -Iinclude/drm
 omapdrm-y := omap_drv.o \
 	omap_irq.o \
 	omap_debugfs.o \
diff --git a/drivers/gpu/drm/omapdrm/displays/connector-dvi.c b/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
index d811e6dcaef7..747f26a55e43 100644
--- a/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
+++ b/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
@@ -236,46 +236,6 @@ static struct omap_dss_driver dvic_driver = {
 	.detect		= dvic_detect,
 };
 
-static int dvic_probe_pdata(struct platform_device *pdev)
-{
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct connector_dvi_platform_data *pdata;
-	struct omap_dss_device *in, *dssdev;
-	int i2c_bus_num;
-
-	pdata = dev_get_platdata(&pdev->dev);
-	i2c_bus_num = pdata->i2c_bus_num;
-
-	if (i2c_bus_num != -1) {
-		struct i2c_adapter *adapter;
-
-		adapter = i2c_get_adapter(i2c_bus_num);
-		if (!adapter) {
-			dev_err(&pdev->dev,
-					"Failed to get I2C adapter, bus %d\n",
-					i2c_bus_num);
-			return -EPROBE_DEFER;
-		}
-
-		ddata->i2c_adapter = adapter;
-	}
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		i2c_put_adapter(ddata->i2c_adapter);
-
-		dev_err(&pdev->dev, "Failed to find video source\n");
-		return -EPROBE_DEFER;
-	}
-
-	ddata->in = in;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	return 0;
-}
-
 static int dvic_probe_of(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
@@ -319,17 +279,12 @@ static int dvic_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ddata);
 
-	if (dev_get_platdata(&pdev->dev)) {
-		r = dvic_probe_pdata(pdev);
-		if (r)
-			return r;
-	} else if (pdev->dev.of_node) {
-		r = dvic_probe_of(pdev);
-		if (r)
-			return r;
-	} else {
+	if (!pdev->dev.of_node)
 		return -ENODEV;
-	}
+
+	r = dvic_probe_of(pdev);
+	if (r)
+		return r;
 
 	ddata->timings = dvic_default_timings;
 
diff --git a/drivers/gpu/drm/omapdrm/displays/connector-hdmi.c b/drivers/gpu/drm/omapdrm/displays/connector-hdmi.c
index 6ee4129bc0c0..225fd8d6ab31 100644
--- a/drivers/gpu/drm/omapdrm/displays/connector-hdmi.c
+++ b/drivers/gpu/drm/omapdrm/displays/connector-hdmi.c
@@ -206,30 +206,6 @@ static struct omap_dss_driver hdmic_driver = {
 	.set_hdmi_infoframe	= hdmic_set_infoframe,
 };
 
-static int hdmic_probe_pdata(struct platform_device *pdev)
-{
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct connector_hdmi_platform_data *pdata;
-	struct omap_dss_device *in, *dssdev;
-
-	pdata = dev_get_platdata(&pdev->dev);
-
-	ddata->hpd_gpio = -ENODEV;
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&pdev->dev, "Failed to find video source\n");
-		return -EPROBE_DEFER;
-	}
-
-	ddata->in = in;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	return 0;
-}
-
 static int hdmic_probe_of(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
@@ -268,17 +244,12 @@ static int hdmic_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, ddata);
 	ddata->dev = &pdev->dev;
 
-	if (dev_get_platdata(&pdev->dev)) {
-		r = hdmic_probe_pdata(pdev);
-		if (r)
-			return r;
-	} else if (pdev->dev.of_node) {
-		r = hdmic_probe_of(pdev);
-		if (r)
-			return r;
-	} else {
+	if (!pdev->dev.of_node)
 		return -ENODEV;
-	}
+
+	r = hdmic_probe_of(pdev);
+	if (r)
+		return r;
 
 	if (gpio_is_valid(ddata->hpd_gpio)) {
 		r = devm_gpio_request_one(&pdev->dev, ddata->hpd_gpio,
diff --git a/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c b/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c
index d9048b3df495..2fd5602880a7 100644
--- a/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c
+++ b/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c
@@ -166,32 +166,6 @@ static const struct omapdss_dvi_ops tfp410_dvi_ops = {
 	.get_timings	= tfp410_get_timings,
 };
 
-static int tfp410_probe_pdata(struct platform_device *pdev)
-{
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct encoder_tfp410_platform_data *pdata;
-	struct omap_dss_device *dssdev, *in;
-
-	pdata = dev_get_platdata(&pdev->dev);
-
-	ddata->pd_gpio = pdata->power_down_gpio;
-
-	ddata->data_lines = pdata->data_lines;
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&pdev->dev, "Failed to find video source\n");
-		return -ENODEV;
-	}
-
-	ddata->in = in;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	return 0;
-}
-
 static int tfp410_probe_of(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
@@ -231,17 +205,12 @@ static int tfp410_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ddata);
 
-	if (dev_get_platdata(&pdev->dev)) {
-		r = tfp410_probe_pdata(pdev);
-		if (r)
-			return r;
-	} else if (pdev->dev.of_node) {
-		r = tfp410_probe_of(pdev);
-		if (r)
-			return r;
-	} else {
+	if (!pdev->dev.of_node)
 		return -ENODEV;
-	}
+
+	r = tfp410_probe_of(pdev);
+	if (r)
+		return r;
 
 	if (gpio_is_valid(ddata->pd_gpio)) {
 		r = devm_gpio_request_one(&pdev->dev, ddata->pd_gpio,
diff --git a/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c b/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c
index 990af6baeb0f..916a89978387 100644
--- a/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c
+++ b/drivers/gpu/drm/omapdrm/displays/encoder-tpd12s015.c
@@ -13,9 +13,8 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/gpio.h>
 #include <linux/platform_device.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
 
 #include <video/omapdss.h>
 #include <video/omap-panel-data.h>
@@ -24,9 +23,9 @@ struct panel_drv_data {
 	struct omap_dss_device dssdev;
 	struct omap_dss_device *in;
 
-	int ct_cp_hpd_gpio;
-	int ls_oe_gpio;
-	int hpd_gpio;
+	struct gpio_desc *ct_cp_hpd_gpio;
+	struct gpio_desc *ls_oe_gpio;
+	struct gpio_desc *hpd_gpio;
 
 	struct omap_video_timings timings;
 };
@@ -47,7 +46,7 @@ static int tpd_connect(struct omap_dss_device *dssdev,
 	dst->src = dssdev;
 	dssdev->dst = dst;
 
-	gpio_set_value_cansleep(ddata->ct_cp_hpd_gpio, 1);
+	gpiod_set_value_cansleep(ddata->ct_cp_hpd_gpio, 1);
 	/* DC-DC converter needs at max 300us to get to 90% of 5V */
 	udelay(300);
 
@@ -65,7 +64,7 @@ static void tpd_disconnect(struct omap_dss_device *dssdev,
 	if (dst != dssdev->dst)
 		return;
 
-	gpio_set_value_cansleep(ddata->ct_cp_hpd_gpio, 0);
+	gpiod_set_value_cansleep(ddata->ct_cp_hpd_gpio, 0);
 
 	dst->src = NULL;
 	dssdev->dst = NULL;
@@ -145,16 +144,14 @@ static int tpd_read_edid(struct omap_dss_device *dssdev,
 	struct omap_dss_device *in = ddata->in;
 	int r;
 
-	if (!gpio_get_value_cansleep(ddata->hpd_gpio))
+	if (!gpiod_get_value_cansleep(ddata->hpd_gpio))
 		return -ENODEV;
 
-	if (gpio_is_valid(ddata->ls_oe_gpio))
-		gpio_set_value_cansleep(ddata->ls_oe_gpio, 1);
+	gpiod_set_value_cansleep(ddata->ls_oe_gpio, 1);
 
 	r = in->ops.hdmi->read_edid(in, edid, len);
 
-	if (gpio_is_valid(ddata->ls_oe_gpio))
-		gpio_set_value_cansleep(ddata->ls_oe_gpio, 0);
+	gpiod_set_value_cansleep(ddata->ls_oe_gpio, 0);
 
 	return r;
 }
@@ -163,7 +160,7 @@ static bool tpd_detect(struct omap_dss_device *dssdev)
 {
 	struct panel_drv_data *ddata = to_panel_data(dssdev);
 
-	return gpio_get_value_cansleep(ddata->hpd_gpio);
+	return gpiod_get_value_cansleep(ddata->hpd_gpio);
 }
 
 static int tpd_set_infoframe(struct omap_dss_device *dssdev,
@@ -201,63 +198,11 @@ static const struct omapdss_hdmi_ops tpd_hdmi_ops = {
 	.set_hdmi_mode		= tpd_set_hdmi_mode,
 };
 
-static int tpd_probe_pdata(struct platform_device *pdev)
-{
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct encoder_tpd12s015_platform_data *pdata;
-	struct omap_dss_device *dssdev, *in;
-
-	pdata = dev_get_platdata(&pdev->dev);
-
-	ddata->ct_cp_hpd_gpio = pdata->ct_cp_hpd_gpio;
-	ddata->ls_oe_gpio = pdata->ls_oe_gpio;
-	ddata->hpd_gpio = pdata->hpd_gpio;
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&pdev->dev, "Failed to find video source\n");
-		return -ENODEV;
-	}
-
-	ddata->in = in;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	return 0;
-}
-
 static int tpd_probe_of(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
 	struct device_node *node = pdev->dev.of_node;
 	struct omap_dss_device *in;
-	int gpio;
-
-	/* CT CP HPD GPIO */
-	gpio = of_get_gpio(node, 0);
-	if (!gpio_is_valid(gpio)) {
-		dev_err(&pdev->dev, "failed to parse CT CP HPD gpio\n");
-		return gpio;
-	}
-	ddata->ct_cp_hpd_gpio = gpio;
-
-	/* LS OE GPIO */
-	gpio = of_get_gpio(node, 1);
-	if (gpio_is_valid(gpio) || gpio == -ENOENT) {
-		ddata->ls_oe_gpio = gpio;
-	} else {
-		dev_err(&pdev->dev, "failed to parse LS OE gpio\n");
-		return gpio;
-	}
-
-	/* HPD GPIO */
-	gpio = of_get_gpio(node, 2);
-	if (!gpio_is_valid(gpio)) {
-		dev_err(&pdev->dev, "failed to parse HPD gpio\n");
-		return gpio;
-	}
-	ddata->hpd_gpio = gpio;
 
 	in = omapdss_of_find_source_for_first_ep(node);
 	if (IS_ERR(in)) {
@@ -275,6 +220,7 @@ static int tpd_probe(struct platform_device *pdev)
 	struct omap_dss_device *in, *dssdev;
 	struct panel_drv_data *ddata;
 	int r;
+	struct gpio_desc *gpio;
 
 	ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL);
 	if (!ddata)
@@ -282,35 +228,35 @@ static int tpd_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ddata);
 
-	if (dev_get_platdata(&pdev->dev)) {
-		r = tpd_probe_pdata(pdev);
-		if (r)
-			return r;
-	} else if (pdev->dev.of_node) {
-		r = tpd_probe_of(pdev);
-		if (r)
-			return r;
-	} else {
+	if (!pdev->dev.of_node)
 		return -ENODEV;
-	}
 
-	r = devm_gpio_request_one(&pdev->dev, ddata->ct_cp_hpd_gpio,
-			GPIOF_OUT_INIT_LOW, "hdmi_ct_cp_hpd");
+	r = tpd_probe_of(pdev);
 	if (r)
+		return r;
+
+
+	gpio = devm_gpiod_get_index_optional(&pdev->dev, NULL, 0,
+		 GPIOD_OUT_LOW);
+	if (IS_ERR(gpio))
 		goto err_gpio;
 
-	if (gpio_is_valid(ddata->ls_oe_gpio)) {
-		r = devm_gpio_request_one(&pdev->dev, ddata->ls_oe_gpio,
-				GPIOF_OUT_INIT_LOW, "hdmi_ls_oe");
-		if (r)
-			goto err_gpio;
-	}
+	ddata->ct_cp_hpd_gpio = gpio;
 
-	r = devm_gpio_request_one(&pdev->dev, ddata->hpd_gpio,
-			GPIOF_DIR_IN, "hdmi_hpd");
-	if (r)
+	gpio = devm_gpiod_get_index_optional(&pdev->dev, NULL, 1,
+		 GPIOD_OUT_LOW);
+	if (IS_ERR(gpio))
+		goto err_gpio;
+
+	ddata->ls_oe_gpio = gpio;
+
+	gpio = devm_gpiod_get_index(&pdev->dev, NULL, 2,
+		GPIOD_IN);
+	if (IS_ERR(gpio))
 		goto err_gpio;
 
+	ddata->hpd_gpio = gpio;
+
 	dssdev = &ddata->dssdev;
 	dssdev->ops.hdmi = &tpd_hdmi_ops;
 	dssdev->dev = &pdev->dev;
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
index 3414c2609320..36485c2137ce 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
@@ -1127,40 +1127,6 @@ static struct omap_dss_driver dsicm_ops = {
 	.memory_read	= dsicm_memory_read,
 };
 
-static int dsicm_probe_pdata(struct platform_device *pdev)
-{
-	const struct panel_dsicm_platform_data *pdata;
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct omap_dss_device *dssdev, *in;
-
-	pdata = dev_get_platdata(&pdev->dev);
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&pdev->dev, "failed to find video source\n");
-		return -EPROBE_DEFER;
-	}
-	ddata->in = in;
-
-	ddata->reset_gpio = pdata->reset_gpio;
-
-	if (pdata->use_ext_te)
-		ddata->ext_te_gpio = pdata->ext_te_gpio;
-	else
-		ddata->ext_te_gpio = -1;
-
-	ddata->ulps_timeout = pdata->ulps_timeout;
-
-	ddata->use_dsi_backlight = pdata->use_dsi_backlight;
-
-	ddata->pin_config = pdata->pin_config;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	return 0;
-}
-
 static int dsicm_probe_of(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
@@ -1214,17 +1180,12 @@ static int dsicm_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, ddata);
 	ddata->pdev = pdev;
 
-	if (dev_get_platdata(dev)) {
-		r = dsicm_probe_pdata(pdev);
-		if (r)
-			return r;
-	} else if (pdev->dev.of_node) {
-		r = dsicm_probe_of(pdev);
-		if (r)
-			return r;
-	} else {
+	if (!pdev->dev.of_node)
 		return -ENODEV;
-	}
+
+	r = dsicm_probe_of(pdev);
+	if (r)
+		return r;
 
 	ddata->timings.x_res = 864;
 	ddata->timings.y_res = 480;
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c b/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c
index 18eb60e9c9ec..458f77bc473d 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c
@@ -240,44 +240,6 @@ static struct omap_dss_driver lb035q02_ops = {
 	.get_resolution	= omapdss_default_get_resolution,
 };
 
-static int lb035q02_probe_pdata(struct spi_device *spi)
-{
-	const struct panel_lb035q02_platform_data *pdata;
-	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
-	struct omap_dss_device *dssdev, *in;
-	int r;
-
-	pdata = dev_get_platdata(&spi->dev);
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&spi->dev, "failed to find video source '%s'\n",
-				pdata->source);
-		return -EPROBE_DEFER;
-	}
-
-	ddata->in = in;
-
-	ddata->data_lines = pdata->data_lines;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	r = devm_gpio_request_one(&spi->dev, pdata->enable_gpio,
-					GPIOF_OUT_INIT_LOW, "panel enable");
-	if (r)
-		goto err_gpio;
-
-	ddata->enable_gpio = gpio_to_desc(pdata->enable_gpio);
-
-	ddata->backlight_gpio = pdata->backlight_gpio;
-
-	return 0;
-err_gpio:
-	omap_dss_put_device(ddata->in);
-	return r;
-}
-
 static int lb035q02_probe_of(struct spi_device *spi)
 {
 	struct device_node *node = spi->dev.of_node;
@@ -320,17 +282,12 @@ static int lb035q02_panel_spi_probe(struct spi_device *spi)
 
 	ddata->spi = spi;
 
-	if (dev_get_platdata(&spi->dev)) {
-		r = lb035q02_probe_pdata(spi);
-		if (r)
-			return r;
-	} else if (spi->dev.of_node) {
-		r = lb035q02_probe_of(spi);
-		if (r)
-			return r;
-	} else {
+	if (!spi->dev.of_node)
 		return -ENODEV;
-	}
+
+	r = lb035q02_probe_of(spi);
+	if (r)
+		return r;
 
 	if (gpio_is_valid(ddata->backlight_gpio)) {
 		r = devm_gpio_request_one(&spi->dev, ddata->backlight_gpio,
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c b/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c
index 8a928c9a2fc9..780cb263a318 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c
@@ -19,7 +19,6 @@
 #include <linux/of_gpio.h>
 
 #include <video/omapdss.h>
-#include <video/omap-panel-data.h>
 
 struct panel_drv_data {
 	struct omap_dss_device	dssdev;
@@ -232,34 +231,6 @@ static struct omap_dss_driver nec_8048_ops = {
 	.get_resolution	= omapdss_default_get_resolution,
 };
 
-
-static int nec_8048_probe_pdata(struct spi_device *spi)
-{
-	const struct panel_nec_nl8048hl11_platform_data *pdata;
-	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
-	struct omap_dss_device *dssdev, *in;
-
-	pdata = dev_get_platdata(&spi->dev);
-
-	ddata->qvga_gpio = pdata->qvga_gpio;
-	ddata->res_gpio = pdata->res_gpio;
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&spi->dev, "failed to find video source '%s'\n",
-				pdata->source);
-		return -EPROBE_DEFER;
-	}
-	ddata->in = in;
-
-	ddata->data_lines = pdata->data_lines;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	return 0;
-}
-
 static int nec_8048_probe_of(struct spi_device *spi)
 {
 	struct device_node *node = spi->dev.of_node;
@@ -315,17 +286,12 @@ static int nec_8048_probe(struct spi_device *spi)
 
 	ddata->spi = spi;
 
-	if (dev_get_platdata(&spi->dev)) {
-		r = nec_8048_probe_pdata(spi);
-		if (r)
-			return r;
-	} else if (spi->dev.of_node) {
-		r = nec_8048_probe_of(spi);
-		if (r)
-			return r;
-	} else {
+	if (!spi->dev.of_node)
 		return -ENODEV;
-	}
+
+	r = nec_8048_probe_of(spi);
+	if (r)
+		return r;
 
 	if (gpio_is_valid(ddata->qvga_gpio)) {
 		r = devm_gpio_request_one(&spi->dev, ddata->qvga_gpio,
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c b/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c
index abfd1f6e3327..529a017602e4 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c
@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <linux/regulator/consumer.h>
 #include <video/omapdss.h>
-#include <video/omap-panel-data.h>
 
 struct panel_drv_data {
 	struct omap_dss_device dssdev;
@@ -197,73 +196,6 @@ static struct omap_dss_driver sharp_ls_ops = {
 	.get_resolution	= omapdss_default_get_resolution,
 };
 
-static int sharp_ls_get_gpio(struct device *dev, int gpio, unsigned long flags,
-		  char *desc, struct gpio_desc **gpiod)
-{
-	struct gpio_desc *gd;
-	int r;
-
-	*gpiod = NULL;
-
-	r = devm_gpio_request_one(dev, gpio, flags, desc);
-	if (r)
-		return r == -ENOENT ? 0 : r;
-
-	gd = gpio_to_desc(gpio);
-	if (IS_ERR(gd))
-		return PTR_ERR(gd) == -ENOENT ? 0 : PTR_ERR(gd);
-
-	*gpiod = gd;
-	return 0;
-}
-
-static int sharp_ls_probe_pdata(struct platform_device *pdev)
-{
-	const struct panel_sharp_ls037v7dw01_platform_data *pdata;
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct omap_dss_device *dssdev, *in;
-	int r;
-
-	pdata = dev_get_platdata(&pdev->dev);
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&pdev->dev, "failed to find video source '%s'\n",
-				pdata->source);
-		return -EPROBE_DEFER;
-	}
-
-	ddata->in = in;
-
-	ddata->data_lines = pdata->data_lines;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	r = sharp_ls_get_gpio(&pdev->dev, pdata->mo_gpio, GPIOF_OUT_INIT_LOW,
-		"lcd MO", &ddata->mo_gpio);
-	if (r)
-		return r;
-	r = sharp_ls_get_gpio(&pdev->dev, pdata->lr_gpio, GPIOF_OUT_INIT_HIGH,
-		"lcd LR", &ddata->lr_gpio);
-	if (r)
-		return r;
-	r = sharp_ls_get_gpio(&pdev->dev, pdata->ud_gpio, GPIOF_OUT_INIT_HIGH,
-		"lcd UD", &ddata->ud_gpio);
-	if (r)
-		return r;
-	r = sharp_ls_get_gpio(&pdev->dev, pdata->resb_gpio, GPIOF_OUT_INIT_LOW,
-		"lcd RESB", &ddata->resb_gpio);
-	if (r)
-		return r;
-	r = sharp_ls_get_gpio(&pdev->dev, pdata->ini_gpio, GPIOF_OUT_INIT_LOW,
-		"lcd INI", &ddata->ini_gpio);
-	if (r)
-		return r;
-
-	return 0;
-}
-
 static  int sharp_ls_get_gpio_of(struct device *dev, int index, int val,
 	const char *desc, struct gpio_desc **gpiod)
 {
@@ -340,17 +272,12 @@ static int sharp_ls_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ddata);
 
-	if (dev_get_platdata(&pdev->dev)) {
-		r = sharp_ls_probe_pdata(pdev);
-		if (r)
-			return r;
-	} else if (pdev->dev.of_node) {
-		r = sharp_ls_probe_of(pdev);
-		if (r)
-			return r;
-	} else {
+	if (!pdev->dev.of_node)
 		return -ENODEV;
-	}
+
+	r = sharp_ls_probe_of(pdev);
+	if (r)
+		return r;
 
 	ddata->videomode = sharp_ls_timings;
 
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c b/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c
index 4d657f3ab679..bd8d85041926 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c
@@ -29,7 +29,6 @@
 #include <linux/spi/spi.h>
 #include <linux/gpio.h>
 #include <video/omapdss.h>
-#include <video/omap-panel-data.h>
 
 struct panel_drv_data {
 	struct omap_dss_device dssdev;
@@ -365,31 +364,6 @@ static struct omap_dss_driver td028ttec1_ops = {
 	.check_timings	= td028ttec1_panel_check_timings,
 };
 
-static int td028ttec1_panel_probe_pdata(struct spi_device *spi)
-{
-	const struct panel_tpo_td028ttec1_platform_data *pdata;
-	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
-	struct omap_dss_device *dssdev, *in;
-
-	pdata = dev_get_platdata(&spi->dev);
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&spi->dev, "failed to find video source '%s'\n",
-				pdata->source);
-		return -EPROBE_DEFER;
-	}
-
-	ddata->in = in;
-
-	ddata->data_lines = pdata->data_lines;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	return 0;
-}
-
 static int td028ttec1_probe_of(struct spi_device *spi)
 {
 	struct device_node *node = spi->dev.of_node;
@@ -432,17 +406,12 @@ static int td028ttec1_panel_probe(struct spi_device *spi)
 
 	ddata->spi_dev = spi;
 
-	if (dev_get_platdata(&spi->dev)) {
-		r = td028ttec1_panel_probe_pdata(spi);
-		if (r)
-			return r;
-	} else if (spi->dev.of_node) {
-		r = td028ttec1_probe_of(spi);
-		if (r)
-			return r;
-	} else {
+	if (!spi->dev.of_node)
 		return -ENODEV;
-	}
+
+	r = td028ttec1_probe_of(spi);
+	if (r)
+		return r;
 
 	ddata->videomode = td028ttec1_panel_timings;
 
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-tpo-td043mtea1.c b/drivers/gpu/drm/omapdrm/displays/panel-tpo-td043mtea1.c
index 68e3b68a2920..03e2beb7b4f0 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-tpo-td043mtea1.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-tpo-td043mtea1.c
@@ -20,7 +20,6 @@
 #include <linux/of_gpio.h>
 
 #include <video/omapdss.h>
-#include <video/omap-panel-data.h>
 
 #define TPO_R02_MODE(x)		((x) & 7)
 #define TPO_R02_MODE_800x480	7
@@ -464,33 +463,6 @@ static struct omap_dss_driver tpo_td043_ops = {
 	.get_resolution	= omapdss_default_get_resolution,
 };
 
-
-static int tpo_td043_probe_pdata(struct spi_device *spi)
-{
-	const struct panel_tpo_td043mtea1_platform_data *pdata;
-	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
-	struct omap_dss_device *dssdev, *in;
-
-	pdata = dev_get_platdata(&spi->dev);
-
-	ddata->nreset_gpio = pdata->nreset_gpio;
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&spi->dev, "failed to find video source '%s'\n",
-				pdata->source);
-		return -EPROBE_DEFER;
-	}
-	ddata->in = in;
-
-	ddata->data_lines = pdata->data_lines;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	return 0;
-}
-
 static int tpo_td043_probe_of(struct spi_device *spi)
 {
 	struct device_node *node = spi->dev.of_node;
@@ -541,17 +513,12 @@ static int tpo_td043_probe(struct spi_device *spi)
 
 	ddata->spi = spi;
 
-	if (dev_get_platdata(&spi->dev)) {
-		r = tpo_td043_probe_pdata(spi);
-		if (r)
-			return r;
-	} else if (spi->dev.of_node) {
-		r = tpo_td043_probe_of(spi);
-		if (r)
-			return r;
-	} else {
+	if (!spi->dev.of_node)
 		return -ENODEV;
-	}
+
+	r = tpo_td043_probe_of(spi);
+	if (r)
+		return r;
 
 	ddata->mode = TPO_R02_MODE_800x480;
 	memcpy(ddata->gamma, tpo_td043_def_gamma, sizeof(ddata->gamma));
diff --git a/drivers/gpu/drm/omapdrm/dss/Makefile b/drivers/gpu/drm/omapdrm/dss/Makefile
index b5136d3d4b77..b651ec9751e6 100644
--- a/drivers/gpu/drm/omapdrm/dss/Makefile
+++ b/drivers/gpu/drm/omapdrm/dss/Makefile
@@ -3,9 +3,6 @@ obj-$(CONFIG_OMAP2_DSS) += omapdss.o
 # Core DSS files
 omapdss-y := core.o dss.o dss_features.o dispc.o dispc_coefs.o display.o \
 	output.o dss-of.o pll.o video-pll.o
-# DSS compat layer files
-omapdss-y += manager.o manager-sysfs.o overlay.o overlay-sysfs.o apply.o \
-	dispc-compat.o display-sysfs.o
 omapdss-$(CONFIG_OMAP2_DSS_DPI) += dpi.o
 omapdss-$(CONFIG_OMAP2_DSS_RFBI) += rfbi.o
 omapdss-$(CONFIG_OMAP2_DSS_VENC) += venc.o
diff --git a/drivers/gpu/drm/omapdrm/dss/apply.c b/drivers/gpu/drm/omapdrm/dss/apply.c
deleted file mode 100644
index 663ccc3bf4e5..000000000000
--- a/drivers/gpu/drm/omapdrm/dss/apply.c
+++ /dev/null
@@ -1,1702 +0,0 @@
-/*
- * Copyright (C) 2011 Texas Instruments
- * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define DSS_SUBSYS_NAME "APPLY"
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/jiffies.h>
-
-#include <video/omapdss.h>
-
-#include "dss.h"
-#include "dss_features.h"
-#include "dispc-compat.h"
-
-/*
- * We have 4 levels of cache for the dispc settings. First two are in SW and
- * the latter two in HW.
- *
- *       set_info()
- *          v
- * +--------------------+
- * |     user_info      |
- * +--------------------+
- *          v
- *        apply()
- *          v
- * +--------------------+
- * |       info         |
- * +--------------------+
- *          v
- *      write_regs()
- *          v
- * +--------------------+
- * |  shadow registers  |
- * +--------------------+
- *          v
- * VFP or lcd/digit_enable
- *          v
- * +--------------------+
- * |      registers     |
- * +--------------------+
- */
-
-struct ovl_priv_data {
-
-	bool user_info_dirty;
-	struct omap_overlay_info user_info;
-
-	bool info_dirty;
-	struct omap_overlay_info info;
-
-	bool shadow_info_dirty;
-
-	bool extra_info_dirty;
-	bool shadow_extra_info_dirty;
-
-	bool enabled;
-	u32 fifo_low, fifo_high;
-
-	/*
-	 * True if overlay is to be enabled. Used to check and calculate configs
-	 * for the overlay before it is enabled in the HW.
-	 */
-	bool enabling;
-};
-
-struct mgr_priv_data {
-
-	bool user_info_dirty;
-	struct omap_overlay_manager_info user_info;
-
-	bool info_dirty;
-	struct omap_overlay_manager_info info;
-
-	bool shadow_info_dirty;
-
-	/* If true, GO bit is up and shadow registers cannot be written.
-	 * Never true for manual update displays */
-	bool busy;
-
-	/* If true, dispc output is enabled */
-	bool updating;
-
-	/* If true, a display is enabled using this manager */
-	bool enabled;
-
-	bool extra_info_dirty;
-	bool shadow_extra_info_dirty;
-
-	struct omap_video_timings timings;
-	struct dss_lcd_mgr_config lcd_config;
-
-	void (*framedone_handler)(void *);
-	void *framedone_handler_data;
-};
-
-static struct {
-	struct ovl_priv_data ovl_priv_data_array[MAX_DSS_OVERLAYS];
-	struct mgr_priv_data mgr_priv_data_array[MAX_DSS_MANAGERS];
-
-	bool irq_enabled;
-} dss_data;
-
-/* protects dss_data */
-static spinlock_t data_lock;
-/* lock for blocking functions */
-static DEFINE_MUTEX(apply_lock);
-static DECLARE_COMPLETION(extra_updated_completion);
-
-static void dss_register_vsync_isr(void);
-
-static struct ovl_priv_data *get_ovl_priv(struct omap_overlay *ovl)
-{
-	return &dss_data.ovl_priv_data_array[ovl->id];
-}
-
-static struct mgr_priv_data *get_mgr_priv(struct omap_overlay_manager *mgr)
-{
-	return &dss_data.mgr_priv_data_array[mgr->id];
-}
-
-static void apply_init_priv(void)
-{
-	const int num_ovls = dss_feat_get_num_ovls();
-	struct mgr_priv_data *mp;
-	int i;
-
-	spin_lock_init(&data_lock);
-
-	for (i = 0; i < num_ovls; ++i) {
-		struct ovl_priv_data *op;
-
-		op = &dss_data.ovl_priv_data_array[i];
-
-		op->info.color_mode = OMAP_DSS_COLOR_RGB16;
-		op->info.rotation_type = OMAP_DSS_ROT_DMA;
-
-		op->info.global_alpha = 255;
-
-		switch (i) {
-		case 0:
-			op->info.zorder = 0;
-			break;
-		case 1:
-			op->info.zorder =
-				dss_has_feature(FEAT_ALPHA_FREE_ZORDER) ? 3 : 0;
-			break;
-		case 2:
-			op->info.zorder =
-				dss_has_feature(FEAT_ALPHA_FREE_ZORDER) ? 2 : 0;
-			break;
-		case 3:
-			op->info.zorder =
-				dss_has_feature(FEAT_ALPHA_FREE_ZORDER) ? 1 : 0;
-			break;
-		}
-
-		op->user_info = op->info;
-	}
-
-	/*
-	 * Initialize some of the lcd_config fields for TV manager, this lets
-	 * us prevent checking if the manager is LCD or TV at some places
-	 */
-	mp = &dss_data.mgr_priv_data_array[OMAP_DSS_CHANNEL_DIGIT];
-
-	mp->lcd_config.video_port_width = 24;
-	mp->lcd_config.clock_info.lck_div = 1;
-	mp->lcd_config.clock_info.pck_div = 1;
-}
-
-/*
- * A LCD manager's stallmode decides whether it is in manual or auto update. TV
- * manager is always auto update, stallmode field for TV manager is false by
- * default
- */
-static bool ovl_manual_update(struct omap_overlay *ovl)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(ovl->manager);
-
-	return mp->lcd_config.stallmode;
-}
-
-static bool mgr_manual_update(struct omap_overlay_manager *mgr)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-
-	return mp->lcd_config.stallmode;
-}
-
-static int dss_check_settings_low(struct omap_overlay_manager *mgr,
-		bool applying)
-{
-	struct omap_overlay_info *oi;
-	struct omap_overlay_manager_info *mi;
-	struct omap_overlay *ovl;
-	struct omap_overlay_info *ois[MAX_DSS_OVERLAYS];
-	struct ovl_priv_data *op;
-	struct mgr_priv_data *mp;
-
-	mp = get_mgr_priv(mgr);
-
-	if (!mp->enabled)
-		return 0;
-
-	if (applying && mp->user_info_dirty)
-		mi = &mp->user_info;
-	else
-		mi = &mp->info;
-
-	/* collect the infos to be tested into the array */
-	list_for_each_entry(ovl, &mgr->overlays, list) {
-		op = get_ovl_priv(ovl);
-
-		if (!op->enabled && !op->enabling)
-			oi = NULL;
-		else if (applying && op->user_info_dirty)
-			oi = &op->user_info;
-		else
-			oi = &op->info;
-
-		ois[ovl->id] = oi;
-	}
-
-	return dss_mgr_check(mgr, mi, &mp->timings, &mp->lcd_config, ois);
-}
-
-/*
- * check manager and overlay settings using overlay_info from data->info
- */
-static int dss_check_settings(struct omap_overlay_manager *mgr)
-{
-	return dss_check_settings_low(mgr, false);
-}
-
-/*
- * check manager and overlay settings using overlay_info from ovl->info if
- * dirty and from data->info otherwise
- */
-static int dss_check_settings_apply(struct omap_overlay_manager *mgr)
-{
-	return dss_check_settings_low(mgr, true);
-}
-
-static bool need_isr(void)
-{
-	const int num_mgrs = dss_feat_get_num_mgrs();
-	int i;
-
-	for (i = 0; i < num_mgrs; ++i) {
-		struct omap_overlay_manager *mgr;
-		struct mgr_priv_data *mp;
-		struct omap_overlay *ovl;
-
-		mgr = omap_dss_get_overlay_manager(i);
-		mp = get_mgr_priv(mgr);
-
-		if (!mp->enabled)
-			continue;
-
-		if (mgr_manual_update(mgr)) {
-			/* to catch FRAMEDONE */
-			if (mp->updating)
-				return true;
-		} else {
-			/* to catch GO bit going down */
-			if (mp->busy)
-				return true;
-
-			/* to write new values to registers */
-			if (mp->info_dirty)
-				return true;
-
-			/* to set GO bit */
-			if (mp->shadow_info_dirty)
-				return true;
-
-			/*
-			 * NOTE: we don't check extra_info flags for disabled
-			 * managers, once the manager is enabled, the extra_info
-			 * related manager changes will be taken in by HW.
-			 */
-
-			/* to write new values to registers */
-			if (mp->extra_info_dirty)
-				return true;
-
-			/* to set GO bit */
-			if (mp->shadow_extra_info_dirty)
-				return true;
-
-			list_for_each_entry(ovl, &mgr->overlays, list) {
-				struct ovl_priv_data *op;
-
-				op = get_ovl_priv(ovl);
-
-				/*
-				 * NOTE: we check extra_info flags even for
-				 * disabled overlays, as extra_infos need to be
-				 * always written.
-				 */
-
-				/* to write new values to registers */
-				if (op->extra_info_dirty)
-					return true;
-
-				/* to set GO bit */
-				if (op->shadow_extra_info_dirty)
-					return true;
-
-				if (!op->enabled)
-					continue;
-
-				/* to write new values to registers */
-				if (op->info_dirty)
-					return true;
-
-				/* to set GO bit */
-				if (op->shadow_info_dirty)
-					return true;
-			}
-		}
-	}
-
-	return false;
-}
-
-static bool need_go(struct omap_overlay_manager *mgr)
-{
-	struct omap_overlay *ovl;
-	struct mgr_priv_data *mp;
-	struct ovl_priv_data *op;
-
-	mp = get_mgr_priv(mgr);
-
-	if (mp->shadow_info_dirty || mp->shadow_extra_info_dirty)
-		return true;
-
-	list_for_each_entry(ovl, &mgr->overlays, list) {
-		op = get_ovl_priv(ovl);
-		if (op->shadow_info_dirty || op->shadow_extra_info_dirty)
-			return true;
-	}
-
-	return false;
-}
-
-/* returns true if an extra_info field is currently being updated */
-static bool extra_info_update_ongoing(void)
-{
-	const int num_mgrs = dss_feat_get_num_mgrs();
-	int i;
-
-	for (i = 0; i < num_mgrs; ++i) {
-		struct omap_overlay_manager *mgr;
-		struct omap_overlay *ovl;
-		struct mgr_priv_data *mp;
-
-		mgr = omap_dss_get_overlay_manager(i);
-		mp = get_mgr_priv(mgr);
-
-		if (!mp->enabled)
-			continue;
-
-		if (!mp->updating)
-			continue;
-
-		if (mp->extra_info_dirty || mp->shadow_extra_info_dirty)
-			return true;
-
-		list_for_each_entry(ovl, &mgr->overlays, list) {
-			struct ovl_priv_data *op = get_ovl_priv(ovl);
-
-			if (op->extra_info_dirty || op->shadow_extra_info_dirty)
-				return true;
-		}
-	}
-
-	return false;
-}
-
-/* wait until no extra_info updates are pending */
-static void wait_pending_extra_info_updates(void)
-{
-	bool updating;
-	unsigned long flags;
-	unsigned long t;
-	int r;
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	updating = extra_info_update_ongoing();
-
-	if (!updating) {
-		spin_unlock_irqrestore(&data_lock, flags);
-		return;
-	}
-
-	init_completion(&extra_updated_completion);
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	t = msecs_to_jiffies(500);
-	r = wait_for_completion_timeout(&extra_updated_completion, t);
-	if (r == 0)
-		DSSWARN("timeout in wait_pending_extra_info_updates\n");
-}
-
-static struct omap_dss_device *dss_mgr_get_device(struct omap_overlay_manager *mgr)
-{
-	struct omap_dss_device *dssdev;
-
-	dssdev = mgr->output;
-	if (dssdev == NULL)
-		return NULL;
-
-	while (dssdev->dst)
-		dssdev = dssdev->dst;
-
-	if (dssdev->driver)
-		return dssdev;
-	else
-		return NULL;
-}
-
-static struct omap_dss_device *dss_ovl_get_device(struct omap_overlay *ovl)
-{
-	return ovl->manager ? dss_mgr_get_device(ovl->manager) : NULL;
-}
-
-static int dss_mgr_wait_for_vsync(struct omap_overlay_manager *mgr)
-{
-	unsigned long timeout = msecs_to_jiffies(500);
-	u32 irq;
-	int r;
-
-	if (mgr->output == NULL)
-		return -ENODEV;
-
-	r = dispc_runtime_get();
-	if (r)
-		return r;
-
-	switch (mgr->output->id) {
-	case OMAP_DSS_OUTPUT_VENC:
-		irq = DISPC_IRQ_EVSYNC_ODD;
-		break;
-	case OMAP_DSS_OUTPUT_HDMI:
-		irq = DISPC_IRQ_EVSYNC_EVEN;
-		break;
-	default:
-		irq = dispc_mgr_get_vsync_irq(mgr->id);
-		break;
-	}
-
-	r = omap_dispc_wait_for_irq_interruptible_timeout(irq, timeout);
-
-	dispc_runtime_put();
-
-	return r;
-}
-
-static int dss_mgr_wait_for_go(struct omap_overlay_manager *mgr)
-{
-	unsigned long timeout = msecs_to_jiffies(500);
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-	u32 irq;
-	unsigned long flags;
-	int r;
-	int i;
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	if (mgr_manual_update(mgr)) {
-		spin_unlock_irqrestore(&data_lock, flags);
-		return 0;
-	}
-
-	if (!mp->enabled) {
-		spin_unlock_irqrestore(&data_lock, flags);
-		return 0;
-	}
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	r = dispc_runtime_get();
-	if (r)
-		return r;
-
-	irq = dispc_mgr_get_vsync_irq(mgr->id);
-
-	i = 0;
-	while (1) {
-		bool shadow_dirty, dirty;
-
-		spin_lock_irqsave(&data_lock, flags);
-		dirty = mp->info_dirty;
-		shadow_dirty = mp->shadow_info_dirty;
-		spin_unlock_irqrestore(&data_lock, flags);
-
-		if (!dirty && !shadow_dirty) {
-			r = 0;
-			break;
-		}
-
-		/* 4 iterations is the worst case:
-		 * 1 - initial iteration, dirty = true (between VFP and VSYNC)
-		 * 2 - first VSYNC, dirty = true
-		 * 3 - dirty = false, shadow_dirty = true
-		 * 4 - shadow_dirty = false */
-		if (i++ == 3) {
-			DSSERR("mgr(%d)->wait_for_go() not finishing\n",
-					mgr->id);
-			r = 0;
-			break;
-		}
-
-		r = omap_dispc_wait_for_irq_interruptible_timeout(irq, timeout);
-		if (r == -ERESTARTSYS)
-			break;
-
-		if (r) {
-			DSSERR("mgr(%d)->wait_for_go() timeout\n", mgr->id);
-			break;
-		}
-	}
-
-	dispc_runtime_put();
-
-	return r;
-}
-
-static int dss_mgr_wait_for_go_ovl(struct omap_overlay *ovl)
-{
-	unsigned long timeout = msecs_to_jiffies(500);
-	struct ovl_priv_data *op;
-	struct mgr_priv_data *mp;
-	u32 irq;
-	unsigned long flags;
-	int r;
-	int i;
-
-	if (!ovl->manager)
-		return 0;
-
-	mp = get_mgr_priv(ovl->manager);
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	if (ovl_manual_update(ovl)) {
-		spin_unlock_irqrestore(&data_lock, flags);
-		return 0;
-	}
-
-	if (!mp->enabled) {
-		spin_unlock_irqrestore(&data_lock, flags);
-		return 0;
-	}
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	r = dispc_runtime_get();
-	if (r)
-		return r;
-
-	irq = dispc_mgr_get_vsync_irq(ovl->manager->id);
-
-	op = get_ovl_priv(ovl);
-	i = 0;
-	while (1) {
-		bool shadow_dirty, dirty;
-
-		spin_lock_irqsave(&data_lock, flags);
-		dirty = op->info_dirty;
-		shadow_dirty = op->shadow_info_dirty;
-		spin_unlock_irqrestore(&data_lock, flags);
-
-		if (!dirty && !shadow_dirty) {
-			r = 0;
-			break;
-		}
-
-		/* 4 iterations is the worst case:
-		 * 1 - initial iteration, dirty = true (between VFP and VSYNC)
-		 * 2 - first VSYNC, dirty = true
-		 * 3 - dirty = false, shadow_dirty = true
-		 * 4 - shadow_dirty = false */
-		if (i++ == 3) {
-			DSSERR("ovl(%d)->wait_for_go() not finishing\n",
-					ovl->id);
-			r = 0;
-			break;
-		}
-
-		r = omap_dispc_wait_for_irq_interruptible_timeout(irq, timeout);
-		if (r == -ERESTARTSYS)
-			break;
-
-		if (r) {
-			DSSERR("ovl(%d)->wait_for_go() timeout\n", ovl->id);
-			break;
-		}
-	}
-
-	dispc_runtime_put();
-
-	return r;
-}
-
-static void dss_ovl_write_regs(struct omap_overlay *ovl)
-{
-	struct ovl_priv_data *op = get_ovl_priv(ovl);
-	struct omap_overlay_info *oi;
-	bool replication;
-	struct mgr_priv_data *mp;
-	int r;
-
-	DSSDBG("writing ovl %d regs\n", ovl->id);
-
-	if (!op->enabled || !op->info_dirty)
-		return;
-
-	oi = &op->info;
-
-	mp = get_mgr_priv(ovl->manager);
-
-	replication = dss_ovl_use_replication(mp->lcd_config, oi->color_mode);
-
-	r = dispc_ovl_setup(ovl->id, oi, replication, &mp->timings, false);
-	if (r) {
-		/*
-		 * We can't do much here, as this function can be called from
-		 * vsync interrupt.
-		 */
-		DSSERR("dispc_ovl_setup failed for ovl %d\n", ovl->id);
-
-		/* This will leave fifo configurations in a nonoptimal state */
-		op->enabled = false;
-		dispc_ovl_enable(ovl->id, false);
-		return;
-	}
-
-	op->info_dirty = false;
-	if (mp->updating)
-		op->shadow_info_dirty = true;
-}
-
-static void dss_ovl_write_regs_extra(struct omap_overlay *ovl)
-{
-	struct ovl_priv_data *op = get_ovl_priv(ovl);
-	struct mgr_priv_data *mp;
-
-	DSSDBG("writing ovl %d regs extra\n", ovl->id);
-
-	if (!op->extra_info_dirty)
-		return;
-
-	/* note: write also when op->enabled == false, so that the ovl gets
-	 * disabled */
-
-	dispc_ovl_enable(ovl->id, op->enabled);
-	dispc_ovl_set_fifo_threshold(ovl->id, op->fifo_low, op->fifo_high);
-
-	mp = get_mgr_priv(ovl->manager);
-
-	op->extra_info_dirty = false;
-	if (mp->updating)
-		op->shadow_extra_info_dirty = true;
-}
-
-static void dss_mgr_write_regs(struct omap_overlay_manager *mgr)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-	struct omap_overlay *ovl;
-
-	DSSDBG("writing mgr %d regs\n", mgr->id);
-
-	if (!mp->enabled)
-		return;
-
-	WARN_ON(mp->busy);
-
-	/* Commit overlay settings */
-	list_for_each_entry(ovl, &mgr->overlays, list) {
-		dss_ovl_write_regs(ovl);
-		dss_ovl_write_regs_extra(ovl);
-	}
-
-	if (mp->info_dirty) {
-		dispc_mgr_setup(mgr->id, &mp->info);
-
-		mp->info_dirty = false;
-		if (mp->updating)
-			mp->shadow_info_dirty = true;
-	}
-}
-
-static void dss_mgr_write_regs_extra(struct omap_overlay_manager *mgr)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-
-	DSSDBG("writing mgr %d regs extra\n", mgr->id);
-
-	if (!mp->extra_info_dirty)
-		return;
-
-	dispc_mgr_set_timings(mgr->id, &mp->timings);
-
-	/* lcd_config parameters */
-	if (dss_mgr_is_lcd(mgr->id))
-		dispc_mgr_set_lcd_config(mgr->id, &mp->lcd_config);
-
-	mp->extra_info_dirty = false;
-	if (mp->updating)
-		mp->shadow_extra_info_dirty = true;
-}
-
-static void dss_write_regs(void)
-{
-	const int num_mgrs = omap_dss_get_num_overlay_managers();
-	int i;
-
-	for (i = 0; i < num_mgrs; ++i) {
-		struct omap_overlay_manager *mgr;
-		struct mgr_priv_data *mp;
-		int r;
-
-		mgr = omap_dss_get_overlay_manager(i);
-		mp = get_mgr_priv(mgr);
-
-		if (!mp->enabled || mgr_manual_update(mgr) || mp->busy)
-			continue;
-
-		r = dss_check_settings(mgr);
-		if (r) {
-			DSSERR("cannot write registers for manager %s: "
-					"illegal configuration\n", mgr->name);
-			continue;
-		}
-
-		dss_mgr_write_regs(mgr);
-		dss_mgr_write_regs_extra(mgr);
-	}
-}
-
-static void dss_set_go_bits(void)
-{
-	const int num_mgrs = omap_dss_get_num_overlay_managers();
-	int i;
-
-	for (i = 0; i < num_mgrs; ++i) {
-		struct omap_overlay_manager *mgr;
-		struct mgr_priv_data *mp;
-
-		mgr = omap_dss_get_overlay_manager(i);
-		mp = get_mgr_priv(mgr);
-
-		if (!mp->enabled || mgr_manual_update(mgr) || mp->busy)
-			continue;
-
-		if (!need_go(mgr))
-			continue;
-
-		mp->busy = true;
-
-		if (!dss_data.irq_enabled && need_isr())
-			dss_register_vsync_isr();
-
-		dispc_mgr_go(mgr->id);
-	}
-
-}
-
-static void mgr_clear_shadow_dirty(struct omap_overlay_manager *mgr)
-{
-	struct omap_overlay *ovl;
-	struct mgr_priv_data *mp;
-	struct ovl_priv_data *op;
-
-	mp = get_mgr_priv(mgr);
-	mp->shadow_info_dirty = false;
-	mp->shadow_extra_info_dirty = false;
-
-	list_for_each_entry(ovl, &mgr->overlays, list) {
-		op = get_ovl_priv(ovl);
-		op->shadow_info_dirty = false;
-		op->shadow_extra_info_dirty = false;
-	}
-}
-
-static int dss_mgr_connect_compat(struct omap_overlay_manager *mgr,
-		struct omap_dss_device *dst)
-{
-	return mgr->set_output(mgr, dst);
-}
-
-static void dss_mgr_disconnect_compat(struct omap_overlay_manager *mgr,
-		struct omap_dss_device *dst)
-{
-	mgr->unset_output(mgr);
-}
-
-static void dss_mgr_start_update_compat(struct omap_overlay_manager *mgr)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-	unsigned long flags;
-	int r;
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	WARN_ON(mp->updating);
-
-	r = dss_check_settings(mgr);
-	if (r) {
-		DSSERR("cannot start manual update: illegal configuration\n");
-		spin_unlock_irqrestore(&data_lock, flags);
-		return;
-	}
-
-	dss_mgr_write_regs(mgr);
-	dss_mgr_write_regs_extra(mgr);
-
-	mp->updating = true;
-
-	if (!dss_data.irq_enabled && need_isr())
-		dss_register_vsync_isr();
-
-	dispc_mgr_enable_sync(mgr->id);
-
-	spin_unlock_irqrestore(&data_lock, flags);
-}
-
-static void dss_apply_irq_handler(void *data, u32 mask);
-
-static void dss_register_vsync_isr(void)
-{
-	const int num_mgrs = dss_feat_get_num_mgrs();
-	u32 mask;
-	int r, i;
-
-	mask = 0;
-	for (i = 0; i < num_mgrs; ++i)
-		mask |= dispc_mgr_get_vsync_irq(i);
-
-	for (i = 0; i < num_mgrs; ++i)
-		mask |= dispc_mgr_get_framedone_irq(i);
-
-	r = omap_dispc_register_isr(dss_apply_irq_handler, NULL, mask);
-	WARN_ON(r);
-
-	dss_data.irq_enabled = true;
-}
-
-static void dss_unregister_vsync_isr(void)
-{
-	const int num_mgrs = dss_feat_get_num_mgrs();
-	u32 mask;
-	int r, i;
-
-	mask = 0;
-	for (i = 0; i < num_mgrs; ++i)
-		mask |= dispc_mgr_get_vsync_irq(i);
-
-	for (i = 0; i < num_mgrs; ++i)
-		mask |= dispc_mgr_get_framedone_irq(i);
-
-	r = omap_dispc_unregister_isr(dss_apply_irq_handler, NULL, mask);
-	WARN_ON(r);
-
-	dss_data.irq_enabled = false;
-}
-
-static void dss_apply_irq_handler(void *data, u32 mask)
-{
-	const int num_mgrs = dss_feat_get_num_mgrs();
-	int i;
-	bool extra_updating;
-
-	spin_lock(&data_lock);
-
-	/* clear busy, updating flags, shadow_dirty flags */
-	for (i = 0; i < num_mgrs; i++) {
-		struct omap_overlay_manager *mgr;
-		struct mgr_priv_data *mp;
-
-		mgr = omap_dss_get_overlay_manager(i);
-		mp = get_mgr_priv(mgr);
-
-		if (!mp->enabled)
-			continue;
-
-		mp->updating = dispc_mgr_is_enabled(i);
-
-		if (!mgr_manual_update(mgr)) {
-			bool was_busy = mp->busy;
-			mp->busy = dispc_mgr_go_busy(i);
-
-			if (was_busy && !mp->busy)
-				mgr_clear_shadow_dirty(mgr);
-		}
-	}
-
-	dss_write_regs();
-	dss_set_go_bits();
-
-	extra_updating = extra_info_update_ongoing();
-	if (!extra_updating)
-		complete_all(&extra_updated_completion);
-
-	/* call framedone handlers for manual update displays */
-	for (i = 0; i < num_mgrs; i++) {
-		struct omap_overlay_manager *mgr;
-		struct mgr_priv_data *mp;
-
-		mgr = omap_dss_get_overlay_manager(i);
-		mp = get_mgr_priv(mgr);
-
-		if (!mgr_manual_update(mgr) || !mp->framedone_handler)
-			continue;
-
-		if (mask & dispc_mgr_get_framedone_irq(i))
-			mp->framedone_handler(mp->framedone_handler_data);
-	}
-
-	if (!need_isr())
-		dss_unregister_vsync_isr();
-
-	spin_unlock(&data_lock);
-}
-
-static void omap_dss_mgr_apply_ovl(struct omap_overlay *ovl)
-{
-	struct ovl_priv_data *op;
-
-	op = get_ovl_priv(ovl);
-
-	if (!op->user_info_dirty)
-		return;
-
-	op->user_info_dirty = false;
-	op->info_dirty = true;
-	op->info = op->user_info;
-}
-
-static void omap_dss_mgr_apply_mgr(struct omap_overlay_manager *mgr)
-{
-	struct mgr_priv_data *mp;
-
-	mp = get_mgr_priv(mgr);
-
-	if (!mp->user_info_dirty)
-		return;
-
-	mp->user_info_dirty = false;
-	mp->info_dirty = true;
-	mp->info = mp->user_info;
-}
-
-static int omap_dss_mgr_apply(struct omap_overlay_manager *mgr)
-{
-	unsigned long flags;
-	struct omap_overlay *ovl;
-	int r;
-
-	DSSDBG("omap_dss_mgr_apply(%s)\n", mgr->name);
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	r = dss_check_settings_apply(mgr);
-	if (r) {
-		spin_unlock_irqrestore(&data_lock, flags);
-		DSSERR("failed to apply settings: illegal configuration.\n");
-		return r;
-	}
-
-	/* Configure overlays */
-	list_for_each_entry(ovl, &mgr->overlays, list)
-		omap_dss_mgr_apply_ovl(ovl);
-
-	/* Configure manager */
-	omap_dss_mgr_apply_mgr(mgr);
-
-	dss_write_regs();
-	dss_set_go_bits();
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	return 0;
-}
-
-static void dss_apply_ovl_enable(struct omap_overlay *ovl, bool enable)
-{
-	struct ovl_priv_data *op;
-
-	op = get_ovl_priv(ovl);
-
-	if (op->enabled == enable)
-		return;
-
-	op->enabled = enable;
-	op->extra_info_dirty = true;
-}
-
-static void dss_apply_ovl_fifo_thresholds(struct omap_overlay *ovl,
-		u32 fifo_low, u32 fifo_high)
-{
-	struct ovl_priv_data *op = get_ovl_priv(ovl);
-
-	if (op->fifo_low == fifo_low && op->fifo_high == fifo_high)
-		return;
-
-	op->fifo_low = fifo_low;
-	op->fifo_high = fifo_high;
-	op->extra_info_dirty = true;
-}
-
-static void dss_ovl_setup_fifo(struct omap_overlay *ovl)
-{
-	struct ovl_priv_data *op = get_ovl_priv(ovl);
-	u32 fifo_low, fifo_high;
-	bool use_fifo_merge = false;
-
-	if (!op->enabled && !op->enabling)
-		return;
-
-	dispc_ovl_compute_fifo_thresholds(ovl->id, &fifo_low, &fifo_high,
-			use_fifo_merge, ovl_manual_update(ovl));
-
-	dss_apply_ovl_fifo_thresholds(ovl, fifo_low, fifo_high);
-}
-
-static void dss_mgr_setup_fifos(struct omap_overlay_manager *mgr)
-{
-	struct omap_overlay *ovl;
-	struct mgr_priv_data *mp;
-
-	mp = get_mgr_priv(mgr);
-
-	if (!mp->enabled)
-		return;
-
-	list_for_each_entry(ovl, &mgr->overlays, list)
-		dss_ovl_setup_fifo(ovl);
-}
-
-static void dss_setup_fifos(void)
-{
-	const int num_mgrs = omap_dss_get_num_overlay_managers();
-	struct omap_overlay_manager *mgr;
-	int i;
-
-	for (i = 0; i < num_mgrs; ++i) {
-		mgr = omap_dss_get_overlay_manager(i);
-		dss_mgr_setup_fifos(mgr);
-	}
-}
-
-static int dss_mgr_enable_compat(struct omap_overlay_manager *mgr)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-	unsigned long flags;
-	int r;
-
-	mutex_lock(&apply_lock);
-
-	if (mp->enabled)
-		goto out;
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	mp->enabled = true;
-
-	r = dss_check_settings(mgr);
-	if (r) {
-		DSSERR("failed to enable manager %d: check_settings failed\n",
-				mgr->id);
-		goto err;
-	}
-
-	dss_setup_fifos();
-
-	dss_write_regs();
-	dss_set_go_bits();
-
-	if (!mgr_manual_update(mgr))
-		mp->updating = true;
-
-	if (!dss_data.irq_enabled && need_isr())
-		dss_register_vsync_isr();
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	if (!mgr_manual_update(mgr))
-		dispc_mgr_enable_sync(mgr->id);
-
-out:
-	mutex_unlock(&apply_lock);
-
-	return 0;
-
-err:
-	mp->enabled = false;
-	spin_unlock_irqrestore(&data_lock, flags);
-	mutex_unlock(&apply_lock);
-	return r;
-}
-
-static void dss_mgr_disable_compat(struct omap_overlay_manager *mgr)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-	unsigned long flags;
-
-	mutex_lock(&apply_lock);
-
-	if (!mp->enabled)
-		goto out;
-
-	wait_pending_extra_info_updates();
-
-	if (!mgr_manual_update(mgr))
-		dispc_mgr_disable_sync(mgr->id);
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	mp->updating = false;
-	mp->enabled = false;
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-out:
-	mutex_unlock(&apply_lock);
-}
-
-static int dss_mgr_set_info(struct omap_overlay_manager *mgr,
-		struct omap_overlay_manager_info *info)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-	unsigned long flags;
-	int r;
-
-	r = dss_mgr_simple_check(mgr, info);
-	if (r)
-		return r;
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	mp->user_info = *info;
-	mp->user_info_dirty = true;
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	return 0;
-}
-
-static void dss_mgr_get_info(struct omap_overlay_manager *mgr,
-		struct omap_overlay_manager_info *info)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-	unsigned long flags;
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	*info = mp->user_info;
-
-	spin_unlock_irqrestore(&data_lock, flags);
-}
-
-static int dss_mgr_set_output(struct omap_overlay_manager *mgr,
-		struct omap_dss_device *output)
-{
-	int r;
-
-	mutex_lock(&apply_lock);
-
-	if (mgr->output) {
-		DSSERR("manager %s is already connected to an output\n",
-			mgr->name);
-		r = -EINVAL;
-		goto err;
-	}
-
-	if ((mgr->supported_outputs & output->id) == 0) {
-		DSSERR("output does not support manager %s\n",
-			mgr->name);
-		r = -EINVAL;
-		goto err;
-	}
-
-	output->manager = mgr;
-	mgr->output = output;
-
-	mutex_unlock(&apply_lock);
-
-	return 0;
-err:
-	mutex_unlock(&apply_lock);
-	return r;
-}
-
-static int dss_mgr_unset_output(struct omap_overlay_manager *mgr)
-{
-	int r;
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-	unsigned long flags;
-
-	mutex_lock(&apply_lock);
-
-	if (!mgr->output) {
-		DSSERR("failed to unset output, output not set\n");
-		r = -EINVAL;
-		goto err;
-	}
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	if (mp->enabled) {
-		DSSERR("output can't be unset when manager is enabled\n");
-		r = -EINVAL;
-		goto err1;
-	}
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	mgr->output->manager = NULL;
-	mgr->output = NULL;
-
-	mutex_unlock(&apply_lock);
-
-	return 0;
-err1:
-	spin_unlock_irqrestore(&data_lock, flags);
-err:
-	mutex_unlock(&apply_lock);
-
-	return r;
-}
-
-static void dss_apply_mgr_timings(struct omap_overlay_manager *mgr,
-		const struct omap_video_timings *timings)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-
-	mp->timings = *timings;
-	mp->extra_info_dirty = true;
-}
-
-static void dss_mgr_set_timings_compat(struct omap_overlay_manager *mgr,
-		const struct omap_video_timings *timings)
-{
-	unsigned long flags;
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	if (mp->updating) {
-		DSSERR("cannot set timings for %s: manager needs to be disabled\n",
-			mgr->name);
-		goto out;
-	}
-
-	dss_apply_mgr_timings(mgr, timings);
-out:
-	spin_unlock_irqrestore(&data_lock, flags);
-}
-
-static void dss_apply_mgr_lcd_config(struct omap_overlay_manager *mgr,
-		const struct dss_lcd_mgr_config *config)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-
-	mp->lcd_config = *config;
-	mp->extra_info_dirty = true;
-}
-
-static void dss_mgr_set_lcd_config_compat(struct omap_overlay_manager *mgr,
-		const struct dss_lcd_mgr_config *config)
-{
-	unsigned long flags;
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	if (mp->enabled) {
-		DSSERR("cannot apply lcd config for %s: manager needs to be disabled\n",
-			mgr->name);
-		goto out;
-	}
-
-	dss_apply_mgr_lcd_config(mgr, config);
-out:
-	spin_unlock_irqrestore(&data_lock, flags);
-}
-
-static int dss_ovl_set_info(struct omap_overlay *ovl,
-		struct omap_overlay_info *info)
-{
-	struct ovl_priv_data *op = get_ovl_priv(ovl);
-	unsigned long flags;
-	int r;
-
-	r = dss_ovl_simple_check(ovl, info);
-	if (r)
-		return r;
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	op->user_info = *info;
-	op->user_info_dirty = true;
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	return 0;
-}
-
-static void dss_ovl_get_info(struct omap_overlay *ovl,
-		struct omap_overlay_info *info)
-{
-	struct ovl_priv_data *op = get_ovl_priv(ovl);
-	unsigned long flags;
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	*info = op->user_info;
-
-	spin_unlock_irqrestore(&data_lock, flags);
-}
-
-static int dss_ovl_set_manager(struct omap_overlay *ovl,
-		struct omap_overlay_manager *mgr)
-{
-	struct ovl_priv_data *op = get_ovl_priv(ovl);
-	unsigned long flags;
-	int r;
-
-	if (!mgr)
-		return -EINVAL;
-
-	mutex_lock(&apply_lock);
-
-	if (ovl->manager) {
-		DSSERR("overlay '%s' already has a manager '%s'\n",
-				ovl->name, ovl->manager->name);
-		r = -EINVAL;
-		goto err;
-	}
-
-	r = dispc_runtime_get();
-	if (r)
-		goto err;
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	if (op->enabled) {
-		spin_unlock_irqrestore(&data_lock, flags);
-		DSSERR("overlay has to be disabled to change the manager\n");
-		r = -EINVAL;
-		goto err1;
-	}
-
-	dispc_ovl_set_channel_out(ovl->id, mgr->id);
-
-	ovl->manager = mgr;
-	list_add_tail(&ovl->list, &mgr->overlays);
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	dispc_runtime_put();
-
-	mutex_unlock(&apply_lock);
-
-	return 0;
-
-err1:
-	dispc_runtime_put();
-err:
-	mutex_unlock(&apply_lock);
-	return r;
-}
-
-static int dss_ovl_unset_manager(struct omap_overlay *ovl)
-{
-	struct ovl_priv_data *op = get_ovl_priv(ovl);
-	unsigned long flags;
-	int r;
-
-	mutex_lock(&apply_lock);
-
-	if (!ovl->manager) {
-		DSSERR("failed to detach overlay: manager not set\n");
-		r = -EINVAL;
-		goto err;
-	}
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	if (op->enabled) {
-		spin_unlock_irqrestore(&data_lock, flags);
-		DSSERR("overlay has to be disabled to unset the manager\n");
-		r = -EINVAL;
-		goto err;
-	}
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	/* wait for pending extra_info updates to ensure the ovl is disabled */
-	wait_pending_extra_info_updates();
-
-	/*
-	 * For a manual update display, there is no guarantee that the overlay
-	 * is really disabled in HW, we may need an extra update from this
-	 * manager before the configurations can go in. Return an error if the
-	 * overlay needed an update from the manager.
-	 *
-	 * TODO: Instead of returning an error, try to do a dummy manager update
-	 * here to disable the overlay in hardware. Use the *GATED fields in
-	 * the DISPC_CONFIG registers to do a dummy update.
-	 */
-	spin_lock_irqsave(&data_lock, flags);
-
-	if (ovl_manual_update(ovl) && op->extra_info_dirty) {
-		spin_unlock_irqrestore(&data_lock, flags);
-		DSSERR("need an update to change the manager\n");
-		r = -EINVAL;
-		goto err;
-	}
-
-	ovl->manager = NULL;
-	list_del(&ovl->list);
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	mutex_unlock(&apply_lock);
-
-	return 0;
-err:
-	mutex_unlock(&apply_lock);
-	return r;
-}
-
-static bool dss_ovl_is_enabled(struct omap_overlay *ovl)
-{
-	struct ovl_priv_data *op = get_ovl_priv(ovl);
-	unsigned long flags;
-	bool e;
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	e = op->enabled;
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	return e;
-}
-
-static int dss_ovl_enable(struct omap_overlay *ovl)
-{
-	struct ovl_priv_data *op = get_ovl_priv(ovl);
-	unsigned long flags;
-	int r;
-
-	mutex_lock(&apply_lock);
-
-	if (op->enabled) {
-		r = 0;
-		goto err1;
-	}
-
-	if (ovl->manager == NULL || ovl->manager->output == NULL) {
-		r = -EINVAL;
-		goto err1;
-	}
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	op->enabling = true;
-
-	r = dss_check_settings(ovl->manager);
-	if (r) {
-		DSSERR("failed to enable overlay %d: check_settings failed\n",
-				ovl->id);
-		goto err2;
-	}
-
-	dss_setup_fifos();
-
-	op->enabling = false;
-	dss_apply_ovl_enable(ovl, true);
-
-	dss_write_regs();
-	dss_set_go_bits();
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	mutex_unlock(&apply_lock);
-
-	return 0;
-err2:
-	op->enabling = false;
-	spin_unlock_irqrestore(&data_lock, flags);
-err1:
-	mutex_unlock(&apply_lock);
-	return r;
-}
-
-static int dss_ovl_disable(struct omap_overlay *ovl)
-{
-	struct ovl_priv_data *op = get_ovl_priv(ovl);
-	unsigned long flags;
-	int r;
-
-	mutex_lock(&apply_lock);
-
-	if (!op->enabled) {
-		r = 0;
-		goto err;
-	}
-
-	if (ovl->manager == NULL || ovl->manager->output == NULL) {
-		r = -EINVAL;
-		goto err;
-	}
-
-	spin_lock_irqsave(&data_lock, flags);
-
-	dss_apply_ovl_enable(ovl, false);
-	dss_write_regs();
-	dss_set_go_bits();
-
-	spin_unlock_irqrestore(&data_lock, flags);
-
-	mutex_unlock(&apply_lock);
-
-	return 0;
-
-err:
-	mutex_unlock(&apply_lock);
-	return r;
-}
-
-static int dss_mgr_register_framedone_handler_compat(struct omap_overlay_manager *mgr,
-		void (*handler)(void *), void *data)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-
-	if (mp->framedone_handler)
-		return -EBUSY;
-
-	mp->framedone_handler = handler;
-	mp->framedone_handler_data = data;
-
-	return 0;
-}
-
-static void dss_mgr_unregister_framedone_handler_compat(struct omap_overlay_manager *mgr,
-		void (*handler)(void *), void *data)
-{
-	struct mgr_priv_data *mp = get_mgr_priv(mgr);
-
-	WARN_ON(mp->framedone_handler != handler ||
-			mp->framedone_handler_data != data);
-
-	mp->framedone_handler = NULL;
-	mp->framedone_handler_data = NULL;
-}
-
-static const struct dss_mgr_ops apply_mgr_ops = {
-	.connect = dss_mgr_connect_compat,
-	.disconnect = dss_mgr_disconnect_compat,
-	.start_update = dss_mgr_start_update_compat,
-	.enable = dss_mgr_enable_compat,
-	.disable = dss_mgr_disable_compat,
-	.set_timings = dss_mgr_set_timings_compat,
-	.set_lcd_config = dss_mgr_set_lcd_config_compat,
-	.register_framedone_handler = dss_mgr_register_framedone_handler_compat,
-	.unregister_framedone_handler = dss_mgr_unregister_framedone_handler_compat,
-};
-
-static int compat_refcnt;
-static DEFINE_MUTEX(compat_init_lock);
-
-int omapdss_compat_init(void)
-{
-	struct platform_device *pdev = dss_get_core_pdev();
-	int i, r;
-
-	mutex_lock(&compat_init_lock);
-
-	if (compat_refcnt++ > 0)
-		goto out;
-
-	apply_init_priv();
-
-	dss_init_overlay_managers_sysfs(pdev);
-	dss_init_overlays(pdev);
-
-	for (i = 0; i < omap_dss_get_num_overlay_managers(); i++) {
-		struct omap_overlay_manager *mgr;
-
-		mgr = omap_dss_get_overlay_manager(i);
-
-		mgr->set_output = &dss_mgr_set_output;
-		mgr->unset_output = &dss_mgr_unset_output;
-		mgr->apply = &omap_dss_mgr_apply;
-		mgr->set_manager_info = &dss_mgr_set_info;
-		mgr->get_manager_info = &dss_mgr_get_info;
-		mgr->wait_for_go = &dss_mgr_wait_for_go;
-		mgr->wait_for_vsync = &dss_mgr_wait_for_vsync;
-		mgr->get_device = &dss_mgr_get_device;
-	}
-
-	for (i = 0; i < omap_dss_get_num_overlays(); i++) {
-		struct omap_overlay *ovl = omap_dss_get_overlay(i);
-
-		ovl->is_enabled = &dss_ovl_is_enabled;
-		ovl->enable = &dss_ovl_enable;
-		ovl->disable = &dss_ovl_disable;
-		ovl->set_manager = &dss_ovl_set_manager;
-		ovl->unset_manager = &dss_ovl_unset_manager;
-		ovl->set_overlay_info = &dss_ovl_set_info;
-		ovl->get_overlay_info = &dss_ovl_get_info;
-		ovl->wait_for_go = &dss_mgr_wait_for_go_ovl;
-		ovl->get_device = &dss_ovl_get_device;
-	}
-
-	r = dss_install_mgr_ops(&apply_mgr_ops);
-	if (r)
-		goto err_mgr_ops;
-
-	r = display_init_sysfs(pdev);
-	if (r)
-		goto err_disp_sysfs;
-
-	dispc_runtime_get();
-
-	r = dss_dispc_initialize_irq();
-	if (r)
-		goto err_init_irq;
-
-	dispc_runtime_put();
-
-out:
-	mutex_unlock(&compat_init_lock);
-
-	return 0;
-
-err_init_irq:
-	dispc_runtime_put();
-	display_uninit_sysfs(pdev);
-
-err_disp_sysfs:
-	dss_uninstall_mgr_ops();
-
-err_mgr_ops:
-	dss_uninit_overlay_managers_sysfs(pdev);
-	dss_uninit_overlays(pdev);
-
-	compat_refcnt--;
-
-	mutex_unlock(&compat_init_lock);
-
-	return r;
-}
-EXPORT_SYMBOL(omapdss_compat_init);
-
-void omapdss_compat_uninit(void)
-{
-	struct platform_device *pdev = dss_get_core_pdev();
-
-	mutex_lock(&compat_init_lock);
-
-	if (--compat_refcnt > 0)
-		goto out;
-
-	dss_dispc_uninitialize_irq();
-
-	display_uninit_sysfs(pdev);
-
-	dss_uninstall_mgr_ops();
-
-	dss_uninit_overlay_managers_sysfs(pdev);
-	dss_uninit_overlays(pdev);
-out:
-	mutex_unlock(&compat_init_lock);
-}
-EXPORT_SYMBOL(omapdss_compat_uninit);
diff --git a/drivers/gpu/drm/omapdrm/dss/core.c b/drivers/gpu/drm/omapdrm/dss/core.c
index 54eeb507f9b3..7e4e5bebabbe 100644
--- a/drivers/gpu/drm/omapdrm/dss/core.c
+++ b/drivers/gpu/drm/omapdrm/dss/core.c
@@ -165,32 +165,20 @@ int dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *))
 #endif /* CONFIG_OMAP2_DSS_DEBUGFS */
 
 /* PLATFORM DEVICE */
-static int omap_dss_pm_notif(struct notifier_block *b, unsigned long v, void *d)
+
+static void dss_disable_all_devices(void)
 {
-	DSSDBG("pm notif %lu\n", v);
-
-	switch (v) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-	case PM_RESTORE_PREPARE:
-		DSSDBG("suspending displays\n");
-		return dss_suspend_all_devices();
-
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-	case PM_POST_RESTORE:
-		DSSDBG("resuming displays\n");
-		return dss_resume_all_devices();
-
-	default:
-		return 0;
+	struct omap_dss_device *dssdev = NULL;
+
+	for_each_dss_dev(dssdev) {
+		if (!dssdev->driver)
+			continue;
+
+		if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE)
+			dssdev->driver->disable(dssdev);
 	}
 }
 
-static struct notifier_block omap_dss_pm_notif_block = {
-	.notifier_call = omap_dss_pm_notif,
-};
-
 static int __init omap_dss_probe(struct platform_device *pdev)
 {
 	struct omap_dss_board_info *pdata = pdev->dev.platform_data;
@@ -211,8 +199,6 @@ static int __init omap_dss_probe(struct platform_device *pdev)
 	else if (pdata->default_device)
 		core.default_display_name = pdata->default_device->name;
 
-	register_pm_notifier(&omap_dss_pm_notif_block);
-
 	return 0;
 
 err_debugfs:
@@ -222,8 +208,6 @@ err_debugfs:
 
 static int omap_dss_remove(struct platform_device *pdev)
 {
-	unregister_pm_notifier(&omap_dss_pm_notif_block);
-
 	dss_uninitialize_debugfs();
 
 	return 0;
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc-compat.c b/drivers/gpu/drm/omapdrm/dss/dispc-compat.c
deleted file mode 100644
index 0918b3bfe82a..000000000000
--- a/drivers/gpu/drm/omapdrm/dss/dispc-compat.c
+++ /dev/null
@@ -1,667 +0,0 @@
-/*
- * Copyright (C) 2012 Texas Instruments
- * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define DSS_SUBSYS_NAME "APPLY"
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/jiffies.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/seq_file.h>
-
-#include <video/omapdss.h>
-
-#include "dss.h"
-#include "dss_features.h"
-#include "dispc-compat.h"
-
-#define DISPC_IRQ_MASK_ERROR            (DISPC_IRQ_GFX_FIFO_UNDERFLOW | \
-					 DISPC_IRQ_OCP_ERR | \
-					 DISPC_IRQ_VID1_FIFO_UNDERFLOW | \
-					 DISPC_IRQ_VID2_FIFO_UNDERFLOW | \
-					 DISPC_IRQ_SYNC_LOST | \
-					 DISPC_IRQ_SYNC_LOST_DIGIT)
-
-#define DISPC_MAX_NR_ISRS		8
-
-struct omap_dispc_isr_data {
-	omap_dispc_isr_t	isr;
-	void			*arg;
-	u32			mask;
-};
-
-struct dispc_irq_stats {
-	unsigned long last_reset;
-	unsigned irq_count;
-	unsigned irqs[32];
-};
-
-static struct {
-	spinlock_t irq_lock;
-	u32 irq_error_mask;
-	struct omap_dispc_isr_data registered_isr[DISPC_MAX_NR_ISRS];
-	u32 error_irqs;
-	struct work_struct error_work;
-
-#ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
-	spinlock_t irq_stats_lock;
-	struct dispc_irq_stats irq_stats;
-#endif
-} dispc_compat;
-
-
-#ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
-static void dispc_dump_irqs(struct seq_file *s)
-{
-	unsigned long flags;
-	struct dispc_irq_stats stats;
-
-	spin_lock_irqsave(&dispc_compat.irq_stats_lock, flags);
-
-	stats = dispc_compat.irq_stats;
-	memset(&dispc_compat.irq_stats, 0, sizeof(dispc_compat.irq_stats));
-	dispc_compat.irq_stats.last_reset = jiffies;
-
-	spin_unlock_irqrestore(&dispc_compat.irq_stats_lock, flags);
-
-	seq_printf(s, "period %u ms\n",
-			jiffies_to_msecs(jiffies - stats.last_reset));
-
-	seq_printf(s, "irqs %d\n", stats.irq_count);
-#define PIS(x) \
-	seq_printf(s, "%-20s %10d\n", #x, stats.irqs[ffs(DISPC_IRQ_##x)-1]);
-
-	PIS(FRAMEDONE);
-	PIS(VSYNC);
-	PIS(EVSYNC_EVEN);
-	PIS(EVSYNC_ODD);
-	PIS(ACBIAS_COUNT_STAT);
-	PIS(PROG_LINE_NUM);
-	PIS(GFX_FIFO_UNDERFLOW);
-	PIS(GFX_END_WIN);
-	PIS(PAL_GAMMA_MASK);
-	PIS(OCP_ERR);
-	PIS(VID1_FIFO_UNDERFLOW);
-	PIS(VID1_END_WIN);
-	PIS(VID2_FIFO_UNDERFLOW);
-	PIS(VID2_END_WIN);
-	if (dss_feat_get_num_ovls() > 3) {
-		PIS(VID3_FIFO_UNDERFLOW);
-		PIS(VID3_END_WIN);
-	}
-	PIS(SYNC_LOST);
-	PIS(SYNC_LOST_DIGIT);
-	PIS(WAKEUP);
-	if (dss_has_feature(FEAT_MGR_LCD2)) {
-		PIS(FRAMEDONE2);
-		PIS(VSYNC2);
-		PIS(ACBIAS_COUNT_STAT2);
-		PIS(SYNC_LOST2);
-	}
-	if (dss_has_feature(FEAT_MGR_LCD3)) {
-		PIS(FRAMEDONE3);
-		PIS(VSYNC3);
-		PIS(ACBIAS_COUNT_STAT3);
-		PIS(SYNC_LOST3);
-	}
-#undef PIS
-}
-#endif
-
-/* dispc.irq_lock has to be locked by the caller */
-static void _omap_dispc_set_irqs(void)
-{
-	u32 mask;
-	int i;
-	struct omap_dispc_isr_data *isr_data;
-
-	mask = dispc_compat.irq_error_mask;
-
-	for (i = 0; i < DISPC_MAX_NR_ISRS; i++) {
-		isr_data = &dispc_compat.registered_isr[i];
-
-		if (isr_data->isr == NULL)
-			continue;
-
-		mask |= isr_data->mask;
-	}
-
-	dispc_write_irqenable(mask);
-}
-
-int omap_dispc_register_isr(omap_dispc_isr_t isr, void *arg, u32 mask)
-{
-	int i;
-	int ret;
-	unsigned long flags;
-	struct omap_dispc_isr_data *isr_data;
-
-	if (isr == NULL)
-		return -EINVAL;
-
-	spin_lock_irqsave(&dispc_compat.irq_lock, flags);
-
-	/* check for duplicate entry */
-	for (i = 0; i < DISPC_MAX_NR_ISRS; i++) {
-		isr_data = &dispc_compat.registered_isr[i];
-		if (isr_data->isr == isr && isr_data->arg == arg &&
-				isr_data->mask == mask) {
-			ret = -EINVAL;
-			goto err;
-		}
-	}
-
-	isr_data = NULL;
-	ret = -EBUSY;
-
-	for (i = 0; i < DISPC_MAX_NR_ISRS; i++) {
-		isr_data = &dispc_compat.registered_isr[i];
-
-		if (isr_data->isr != NULL)
-			continue;
-
-		isr_data->isr = isr;
-		isr_data->arg = arg;
-		isr_data->mask = mask;
-		ret = 0;
-
-		break;
-	}
-
-	if (ret)
-		goto err;
-
-	_omap_dispc_set_irqs();
-
-	spin_unlock_irqrestore(&dispc_compat.irq_lock, flags);
-
-	return 0;
-err:
-	spin_unlock_irqrestore(&dispc_compat.irq_lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL(omap_dispc_register_isr);
-
-int omap_dispc_unregister_isr(omap_dispc_isr_t isr, void *arg, u32 mask)
-{
-	int i;
-	unsigned long flags;
-	int ret = -EINVAL;
-	struct omap_dispc_isr_data *isr_data;
-
-	spin_lock_irqsave(&dispc_compat.irq_lock, flags);
-
-	for (i = 0; i < DISPC_MAX_NR_ISRS; i++) {
-		isr_data = &dispc_compat.registered_isr[i];
-		if (isr_data->isr != isr || isr_data->arg != arg ||
-				isr_data->mask != mask)
-			continue;
-
-		/* found the correct isr */
-
-		isr_data->isr = NULL;
-		isr_data->arg = NULL;
-		isr_data->mask = 0;
-
-		ret = 0;
-		break;
-	}
-
-	if (ret == 0)
-		_omap_dispc_set_irqs();
-
-	spin_unlock_irqrestore(&dispc_compat.irq_lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL(omap_dispc_unregister_isr);
-
-static void print_irq_status(u32 status)
-{
-	if ((status & dispc_compat.irq_error_mask) == 0)
-		return;
-
-#define PIS(x) (status & DISPC_IRQ_##x) ? (#x " ") : ""
-
-	pr_debug("DISPC IRQ: 0x%x: %s%s%s%s%s%s%s%s%s\n",
-		status,
-		PIS(OCP_ERR),
-		PIS(GFX_FIFO_UNDERFLOW),
-		PIS(VID1_FIFO_UNDERFLOW),
-		PIS(VID2_FIFO_UNDERFLOW),
-		dss_feat_get_num_ovls() > 3 ? PIS(VID3_FIFO_UNDERFLOW) : "",
-		PIS(SYNC_LOST),
-		PIS(SYNC_LOST_DIGIT),
-		dss_has_feature(FEAT_MGR_LCD2) ? PIS(SYNC_LOST2) : "",
-		dss_has_feature(FEAT_MGR_LCD3) ? PIS(SYNC_LOST3) : "");
-#undef PIS
-}
-
-/* Called from dss.c. Note that we don't touch clocks here,
- * but we presume they are on because we got an IRQ. However,
- * an irq handler may turn the clocks off, so we may not have
- * clock later in the function. */
-static irqreturn_t omap_dispc_irq_handler(int irq, void *arg)
-{
-	int i;
-	u32 irqstatus, irqenable;
-	u32 handledirqs = 0;
-	u32 unhandled_errors;
-	struct omap_dispc_isr_data *isr_data;
-	struct omap_dispc_isr_data registered_isr[DISPC_MAX_NR_ISRS];
-
-	spin_lock(&dispc_compat.irq_lock);
-
-	irqstatus = dispc_read_irqstatus();
-	irqenable = dispc_read_irqenable();
-
-	/* IRQ is not for us */
-	if (!(irqstatus & irqenable)) {
-		spin_unlock(&dispc_compat.irq_lock);
-		return IRQ_NONE;
-	}
-
-#ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
-	spin_lock(&dispc_compat.irq_stats_lock);
-	dispc_compat.irq_stats.irq_count++;
-	dss_collect_irq_stats(irqstatus, dispc_compat.irq_stats.irqs);
-	spin_unlock(&dispc_compat.irq_stats_lock);
-#endif
-
-	print_irq_status(irqstatus);
-
-	/* Ack the interrupt. Do it here before clocks are possibly turned
-	 * off */
-	dispc_clear_irqstatus(irqstatus);
-	/* flush posted write */
-	dispc_read_irqstatus();
-
-	/* make a copy and unlock, so that isrs can unregister
-	 * themselves */
-	memcpy(registered_isr, dispc_compat.registered_isr,
-			sizeof(registered_isr));
-
-	spin_unlock(&dispc_compat.irq_lock);
-
-	for (i = 0; i < DISPC_MAX_NR_ISRS; i++) {
-		isr_data = &registered_isr[i];
-
-		if (!isr_data->isr)
-			continue;
-
-		if (isr_data->mask & irqstatus) {
-			isr_data->isr(isr_data->arg, irqstatus);
-			handledirqs |= isr_data->mask;
-		}
-	}
-
-	spin_lock(&dispc_compat.irq_lock);
-
-	unhandled_errors = irqstatus & ~handledirqs & dispc_compat.irq_error_mask;
-
-	if (unhandled_errors) {
-		dispc_compat.error_irqs |= unhandled_errors;
-
-		dispc_compat.irq_error_mask &= ~unhandled_errors;
-		_omap_dispc_set_irqs();
-
-		schedule_work(&dispc_compat.error_work);
-	}
-
-	spin_unlock(&dispc_compat.irq_lock);
-
-	return IRQ_HANDLED;
-}
-
-static void dispc_error_worker(struct work_struct *work)
-{
-	int i;
-	u32 errors;
-	unsigned long flags;
-	static const unsigned fifo_underflow_bits[] = {
-		DISPC_IRQ_GFX_FIFO_UNDERFLOW,
-		DISPC_IRQ_VID1_FIFO_UNDERFLOW,
-		DISPC_IRQ_VID2_FIFO_UNDERFLOW,
-		DISPC_IRQ_VID3_FIFO_UNDERFLOW,
-	};
-
-	spin_lock_irqsave(&dispc_compat.irq_lock, flags);
-	errors = dispc_compat.error_irqs;
-	dispc_compat.error_irqs = 0;
-	spin_unlock_irqrestore(&dispc_compat.irq_lock, flags);
-
-	dispc_runtime_get();
-
-	for (i = 0; i < omap_dss_get_num_overlays(); ++i) {
-		struct omap_overlay *ovl;
-		unsigned bit;
-
-		ovl = omap_dss_get_overlay(i);
-		bit = fifo_underflow_bits[i];
-
-		if (bit & errors) {
-			DSSERR("FIFO UNDERFLOW on %s, disabling the overlay\n",
-					ovl->name);
-			ovl->disable(ovl);
-			msleep(50);
-		}
-	}
-
-	for (i = 0; i < omap_dss_get_num_overlay_managers(); ++i) {
-		struct omap_overlay_manager *mgr;
-		unsigned bit;
-
-		mgr = omap_dss_get_overlay_manager(i);
-		bit = dispc_mgr_get_sync_lost_irq(i);
-
-		if (bit & errors) {
-			int j;
-
-			DSSERR("SYNC_LOST on channel %s, restarting the output "
-					"with video overlays disabled\n",
-					mgr->name);
-
-			dss_mgr_disable(mgr);
-
-			for (j = 0; j < omap_dss_get_num_overlays(); ++j) {
-				struct omap_overlay *ovl;
-				ovl = omap_dss_get_overlay(j);
-
-				if (ovl->id != OMAP_DSS_GFX &&
-						ovl->manager == mgr)
-					ovl->disable(ovl);
-			}
-
-			dss_mgr_enable(mgr);
-		}
-	}
-
-	if (errors & DISPC_IRQ_OCP_ERR) {
-		DSSERR("OCP_ERR\n");
-		for (i = 0; i < omap_dss_get_num_overlay_managers(); ++i) {
-			struct omap_overlay_manager *mgr;
-
-			mgr = omap_dss_get_overlay_manager(i);
-			dss_mgr_disable(mgr);
-		}
-	}
-
-	spin_lock_irqsave(&dispc_compat.irq_lock, flags);
-	dispc_compat.irq_error_mask |= errors;
-	_omap_dispc_set_irqs();
-	spin_unlock_irqrestore(&dispc_compat.irq_lock, flags);
-
-	dispc_runtime_put();
-}
-
-int dss_dispc_initialize_irq(void)
-{
-	int r;
-
-#ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
-	spin_lock_init(&dispc_compat.irq_stats_lock);
-	dispc_compat.irq_stats.last_reset = jiffies;
-	dss_debugfs_create_file("dispc_irq", dispc_dump_irqs);
-#endif
-
-	spin_lock_init(&dispc_compat.irq_lock);
-
-	memset(dispc_compat.registered_isr, 0,
-			sizeof(dispc_compat.registered_isr));
-
-	dispc_compat.irq_error_mask = DISPC_IRQ_MASK_ERROR;
-	if (dss_has_feature(FEAT_MGR_LCD2))
-		dispc_compat.irq_error_mask |= DISPC_IRQ_SYNC_LOST2;
-	if (dss_has_feature(FEAT_MGR_LCD3))
-		dispc_compat.irq_error_mask |= DISPC_IRQ_SYNC_LOST3;
-	if (dss_feat_get_num_ovls() > 3)
-		dispc_compat.irq_error_mask |= DISPC_IRQ_VID3_FIFO_UNDERFLOW;
-
-	/*
-	 * there's SYNC_LOST_DIGIT waiting after enabling the DSS,
-	 * so clear it
-	 */
-	dispc_clear_irqstatus(dispc_read_irqstatus());
-
-	INIT_WORK(&dispc_compat.error_work, dispc_error_worker);
-
-	_omap_dispc_set_irqs();
-
-	r = dispc_request_irq(omap_dispc_irq_handler, &dispc_compat);
-	if (r) {
-		DSSERR("dispc_request_irq failed\n");
-		return r;
-	}
-
-	return 0;
-}
-
-void dss_dispc_uninitialize_irq(void)
-{
-	dispc_free_irq(&dispc_compat);
-}
-
-static void dispc_mgr_disable_isr(void *data, u32 mask)
-{
-	struct completion *compl = data;
-	complete(compl);
-}
-
-static void dispc_mgr_enable_lcd_out(enum omap_channel channel)
-{
-	dispc_mgr_enable(channel, true);
-}
-
-static void dispc_mgr_disable_lcd_out(enum omap_channel channel)
-{
-	DECLARE_COMPLETION_ONSTACK(framedone_compl);
-	int r;
-	u32 irq;
-
-	if (!dispc_mgr_is_enabled(channel))
-		return;
-
-	/*
-	 * When we disable LCD output, we need to wait for FRAMEDONE to know
-	 * that DISPC has finished with the LCD output.
-	 */
-
-	irq = dispc_mgr_get_framedone_irq(channel);
-
-	r = omap_dispc_register_isr(dispc_mgr_disable_isr, &framedone_compl,
-			irq);
-	if (r)
-		DSSERR("failed to register FRAMEDONE isr\n");
-
-	dispc_mgr_enable(channel, false);
-
-	/* if we couldn't register for framedone, just sleep and exit */
-	if (r) {
-		msleep(100);
-		return;
-	}
-
-	if (!wait_for_completion_timeout(&framedone_compl,
-				msecs_to_jiffies(100)))
-		DSSERR("timeout waiting for FRAME DONE\n");
-
-	r = omap_dispc_unregister_isr(dispc_mgr_disable_isr, &framedone_compl,
-			irq);
-	if (r)
-		DSSERR("failed to unregister FRAMEDONE isr\n");
-}
-
-static void dispc_digit_out_enable_isr(void *data, u32 mask)
-{
-	struct completion *compl = data;
-
-	/* ignore any sync lost interrupts */
-	if (mask & (DISPC_IRQ_EVSYNC_EVEN | DISPC_IRQ_EVSYNC_ODD))
-		complete(compl);
-}
-
-static void dispc_mgr_enable_digit_out(void)
-{
-	DECLARE_COMPLETION_ONSTACK(vsync_compl);
-	int r;
-	u32 irq_mask;
-
-	if (dispc_mgr_is_enabled(OMAP_DSS_CHANNEL_DIGIT))
-		return;
-
-	/*
-	 * Digit output produces some sync lost interrupts during the first
-	 * frame when enabling. Those need to be ignored, so we register for the
-	 * sync lost irq to prevent the error handler from triggering.
-	 */
-
-	irq_mask = dispc_mgr_get_vsync_irq(OMAP_DSS_CHANNEL_DIGIT) |
-		dispc_mgr_get_sync_lost_irq(OMAP_DSS_CHANNEL_DIGIT);
-
-	r = omap_dispc_register_isr(dispc_digit_out_enable_isr, &vsync_compl,
-			irq_mask);
-	if (r) {
-		DSSERR("failed to register %x isr\n", irq_mask);
-		return;
-	}
-
-	dispc_mgr_enable(OMAP_DSS_CHANNEL_DIGIT, true);
-
-	/* wait for the first evsync */
-	if (!wait_for_completion_timeout(&vsync_compl, msecs_to_jiffies(100)))
-		DSSERR("timeout waiting for digit out to start\n");
-
-	r = omap_dispc_unregister_isr(dispc_digit_out_enable_isr, &vsync_compl,
-			irq_mask);
-	if (r)
-		DSSERR("failed to unregister %x isr\n", irq_mask);
-}
-
-static void dispc_mgr_disable_digit_out(void)
-{
-	DECLARE_COMPLETION_ONSTACK(framedone_compl);
-	int r, i;
-	u32 irq_mask;
-	int num_irqs;
-
-	if (!dispc_mgr_is_enabled(OMAP_DSS_CHANNEL_DIGIT))
-		return;
-
-	/*
-	 * When we disable the digit output, we need to wait for FRAMEDONE to
-	 * know that DISPC has finished with the output.
-	 */
-
-	irq_mask = dispc_mgr_get_framedone_irq(OMAP_DSS_CHANNEL_DIGIT);
-	num_irqs = 1;
-
-	if (!irq_mask) {
-		/*
-		 * omap 2/3 don't have framedone irq for TV, so we need to use
-		 * vsyncs for this.
-		 */
-
-		irq_mask = dispc_mgr_get_vsync_irq(OMAP_DSS_CHANNEL_DIGIT);
-		/*
-		 * We need to wait for both even and odd vsyncs. Note that this
-		 * is not totally reliable, as we could get a vsync interrupt
-		 * before we disable the output, which leads to timeout in the
-		 * wait_for_completion.
-		 */
-		num_irqs = 2;
-	}
-
-	r = omap_dispc_register_isr(dispc_mgr_disable_isr, &framedone_compl,
-			irq_mask);
-	if (r)
-		DSSERR("failed to register %x isr\n", irq_mask);
-
-	dispc_mgr_enable(OMAP_DSS_CHANNEL_DIGIT, false);
-
-	/* if we couldn't register the irq, just sleep and exit */
-	if (r) {
-		msleep(100);
-		return;
-	}
-
-	for (i = 0; i < num_irqs; ++i) {
-		if (!wait_for_completion_timeout(&framedone_compl,
-					msecs_to_jiffies(100)))
-			DSSERR("timeout waiting for digit out to stop\n");
-	}
-
-	r = omap_dispc_unregister_isr(dispc_mgr_disable_isr, &framedone_compl,
-			irq_mask);
-	if (r)
-		DSSERR("failed to unregister %x isr\n", irq_mask);
-}
-
-void dispc_mgr_enable_sync(enum omap_channel channel)
-{
-	if (dss_mgr_is_lcd(channel))
-		dispc_mgr_enable_lcd_out(channel);
-	else if (channel == OMAP_DSS_CHANNEL_DIGIT)
-		dispc_mgr_enable_digit_out();
-	else
-		WARN_ON(1);
-}
-
-void dispc_mgr_disable_sync(enum omap_channel channel)
-{
-	if (dss_mgr_is_lcd(channel))
-		dispc_mgr_disable_lcd_out(channel);
-	else if (channel == OMAP_DSS_CHANNEL_DIGIT)
-		dispc_mgr_disable_digit_out();
-	else
-		WARN_ON(1);
-}
-
-static inline void dispc_irq_wait_handler(void *data, u32 mask)
-{
-	complete((struct completion *)data);
-}
-
-int omap_dispc_wait_for_irq_interruptible_timeout(u32 irqmask,
-		unsigned long timeout)
-{
-
-	int r;
-	DECLARE_COMPLETION_ONSTACK(completion);
-
-	r = omap_dispc_register_isr(dispc_irq_wait_handler, &completion,
-			irqmask);
-
-	if (r)
-		return r;
-
-	timeout = wait_for_completion_interruptible_timeout(&completion,
-			timeout);
-
-	omap_dispc_unregister_isr(dispc_irq_wait_handler, &completion, irqmask);
-
-	if (timeout == 0)
-		return -ETIMEDOUT;
-
-	if (timeout == -ERESTARTSYS)
-		return -ERESTARTSYS;
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc-compat.h b/drivers/gpu/drm/omapdrm/dss/dispc-compat.h
deleted file mode 100644
index 14a69b3d4fb0..000000000000
--- a/drivers/gpu/drm/omapdrm/dss/dispc-compat.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2012 Texas Instruments
- * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __OMAP2_DSS_DISPC_COMPAT_H
-#define __OMAP2_DSS_DISPC_COMPAT_H
-
-void dispc_mgr_enable_sync(enum omap_channel channel);
-void dispc_mgr_disable_sync(enum omap_channel channel);
-
-int omap_dispc_wait_for_irq_interruptible_timeout(u32 irqmask,
-		unsigned long timeout);
-
-int dss_dispc_initialize_irq(void);
-void dss_dispc_uninitialize_irq(void);
-
-#endif
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c
index 6b50476ec669..f83608b69e68 100644
--- a/drivers/gpu/drm/omapdrm/dss/dispc.c
+++ b/drivers/gpu/drm/omapdrm/dss/dispc.c
@@ -104,6 +104,15 @@ struct dispc_features {
 	bool supports_sync_align:1;
 
 	bool has_writeback:1;
+
+	bool supports_double_pixel:1;
+
+	/*
+	 * Field order for VENC is different than HDMI. We should handle this in
+	 * some intelligent manner, but as the SoCs have either HDMI or VENC,
+	 * never both, we can just use this flag for now.
+	 */
+	bool reverse_ilace_field_order:1;
 };
 
 #define DISPC_MAX_NR_FIFOS 5
@@ -2552,47 +2561,6 @@ static int dispc_ovl_calc_scaling(unsigned long pclk, unsigned long lclk,
 	return 0;
 }
 
-int dispc_ovl_check(enum omap_plane plane, enum omap_channel channel,
-		const struct omap_overlay_info *oi,
-		const struct omap_video_timings *timings,
-		int *x_predecim, int *y_predecim)
-{
-	enum omap_overlay_caps caps = dss_feat_get_overlay_caps(plane);
-	bool five_taps = true;
-	bool fieldmode = false;
-	u16 in_height = oi->height;
-	u16 in_width = oi->width;
-	bool ilace = timings->interlace;
-	u16 out_width, out_height;
-	int pos_x = oi->pos_x;
-	unsigned long pclk = dispc_mgr_pclk_rate(channel);
-	unsigned long lclk = dispc_mgr_lclk_rate(channel);
-
-	out_width = oi->out_width == 0 ? oi->width : oi->out_width;
-	out_height = oi->out_height == 0 ? oi->height : oi->out_height;
-
-	if (ilace && oi->height == out_height)
-		fieldmode = true;
-
-	if (ilace) {
-		if (fieldmode)
-			in_height /= 2;
-		out_height /= 2;
-
-		DSSDBG("adjusting for ilace: height %d, out_height %d\n",
-				in_height, out_height);
-	}
-
-	if (!dss_feat_color_mode_supported(plane, oi->color_mode))
-		return -EINVAL;
-
-	return dispc_ovl_calc_scaling(pclk, lclk, caps, timings, in_width,
-			in_height, out_width, out_height, oi->color_mode,
-			&five_taps, x_predecim, y_predecim, pos_x,
-			oi->rotation_type, false);
-}
-EXPORT_SYMBOL(dispc_ovl_check);
-
 static int dispc_ovl_setup_common(enum omap_plane plane,
 		enum omap_overlay_caps caps, u32 paddr, u32 p_uv_addr,
 		u16 screen_width, int pos_x, int pos_y, u16 width, u16 height,
@@ -2747,6 +2715,9 @@ static int dispc_ovl_setup_common(enum omap_plane plane,
 
 	dispc_ovl_configure_burst_type(plane, rotation_type);
 
+	if (dispc.feat->reverse_ilace_field_order)
+		swap(offset0, offset1);
+
 	dispc_ovl_set_ba0(plane, paddr + offset0);
 	dispc_ovl_set_ba1(plane, paddr + offset1);
 
@@ -2898,6 +2869,12 @@ bool dispc_ovl_enabled(enum omap_plane plane)
 }
 EXPORT_SYMBOL(dispc_ovl_enabled);
 
+enum omap_dss_output_id dispc_mgr_get_supported_outputs(enum omap_channel channel)
+{
+	return dss_feat_get_supported_outputs(channel);
+}
+EXPORT_SYMBOL(dispc_mgr_get_supported_outputs);
+
 void dispc_mgr_enable(enum omap_channel channel, bool enable)
 {
 	mgr_fld_write(channel, DISPC_MGR_FLD_ENABLE, enable);
@@ -3287,6 +3264,10 @@ void dispc_mgr_set_timings(enum omap_channel channel,
 	} else {
 		if (t.interlace)
 			t.y_res /= 2;
+
+		if (dispc.feat->supports_double_pixel)
+			REG_FLD_MOD(DISPC_CONTROL, t.double_pixel ? 1 : 0,
+				19, 17);
 	}
 
 	dispc_mgr_set_size(channel, t.x_res, t.y_res);
@@ -3951,6 +3932,8 @@ static const struct dispc_features omap44xx_dispc_feats = {
 	.set_max_preload	=	true,
 	.supports_sync_align	=	true,
 	.has_writeback		=	true,
+	.supports_double_pixel	=	true,
+	.reverse_ilace_field_order =	true,
 };
 
 static const struct dispc_features omap54xx_dispc_feats = {
@@ -3974,6 +3957,8 @@ static const struct dispc_features omap54xx_dispc_feats = {
 	.set_max_preload	=	true,
 	.supports_sync_align	=	true,
 	.has_writeback		=	true,
+	.supports_double_pixel	=	true,
+	.reverse_ilace_field_order =	true,
 };
 
 static int dispc_init_features(struct platform_device *pdev)
@@ -4129,8 +4114,6 @@ static int dispc_bind(struct device *dev, struct device *master, void *data)
 
 	dispc_runtime_put();
 
-	dss_init_overlay_managers();
-
 	dss_debugfs_create_file("dispc", dispc_dump_regs);
 
 	return 0;
@@ -4144,8 +4127,6 @@ static void dispc_unbind(struct device *dev, struct device *master,
 			       void *data)
 {
 	pm_runtime_disable(dev);
-
-	dss_uninit_overlay_managers();
 }
 
 static const struct component_ops dispc_component_ops = {
diff --git a/drivers/gpu/drm/omapdrm/dss/display-sysfs.c b/drivers/gpu/drm/omapdrm/dss/display-sysfs.c
deleted file mode 100644
index 6ad0991f8259..000000000000
--- a/drivers/gpu/drm/omapdrm/dss/display-sysfs.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * Copyright (C) 2009 Nokia Corporation
- * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
- *
- * Some code and ideas taken from drivers/video/omap/ driver
- * by Imre Deak.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define DSS_SUBSYS_NAME "DISPLAY"
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/sysfs.h>
-
-#include <video/omapdss.h>
-#include "dss.h"
-
-static ssize_t display_name_show(struct omap_dss_device *dssdev, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%s\n",
-			dssdev->name ?
-			dssdev->name : "");
-}
-
-static ssize_t display_enabled_show(struct omap_dss_device *dssdev, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-			omapdss_device_is_enabled(dssdev));
-}
-
-static ssize_t display_enabled_store(struct omap_dss_device *dssdev,
-		const char *buf, size_t size)
-{
-	int r;
-	bool enable;
-
-	r = strtobool(buf, &enable);
-	if (r)
-		return r;
-
-	if (enable == omapdss_device_is_enabled(dssdev))
-		return size;
-
-	if (omapdss_device_is_connected(dssdev) == false)
-		return -ENODEV;
-
-	if (enable) {
-		r = dssdev->driver->enable(dssdev);
-		if (r)
-			return r;
-	} else {
-		dssdev->driver->disable(dssdev);
-	}
-
-	return size;
-}
-
-static ssize_t display_tear_show(struct omap_dss_device *dssdev, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-			dssdev->driver->get_te ?
-			dssdev->driver->get_te(dssdev) : 0);
-}
-
-static ssize_t display_tear_store(struct omap_dss_device *dssdev,
-	const char *buf, size_t size)
-{
-	int r;
-	bool te;
-
-	if (!dssdev->driver->enable_te || !dssdev->driver->get_te)
-		return -ENOENT;
-
-	r = strtobool(buf, &te);
-	if (r)
-		return r;
-
-	r = dssdev->driver->enable_te(dssdev, te);
-	if (r)
-		return r;
-
-	return size;
-}
-
-static ssize_t display_timings_show(struct omap_dss_device *dssdev, char *buf)
-{
-	struct omap_video_timings t;
-
-	if (!dssdev->driver->get_timings)
-		return -ENOENT;
-
-	dssdev->driver->get_timings(dssdev, &t);
-
-	return snprintf(buf, PAGE_SIZE, "%u,%u/%u/%u/%u,%u/%u/%u/%u\n",
-			t.pixelclock,
-			t.x_res, t.hfp, t.hbp, t.hsw,
-			t.y_res, t.vfp, t.vbp, t.vsw);
-}
-
-static ssize_t display_timings_store(struct omap_dss_device *dssdev,
-	const char *buf, size_t size)
-{
-	struct omap_video_timings t = dssdev->panel.timings;
-	int r, found;
-
-	if (!dssdev->driver->set_timings || !dssdev->driver->check_timings)
-		return -ENOENT;
-
-	found = 0;
-#ifdef CONFIG_OMAP2_DSS_VENC
-	if (strncmp("pal", buf, 3) == 0) {
-		t = omap_dss_pal_timings;
-		found = 1;
-	} else if (strncmp("ntsc", buf, 4) == 0) {
-		t = omap_dss_ntsc_timings;
-		found = 1;
-	}
-#endif
-	if (!found && sscanf(buf, "%u,%hu/%hu/%hu/%hu,%hu/%hu/%hu/%hu",
-				&t.pixelclock,
-				&t.x_res, &t.hfp, &t.hbp, &t.hsw,
-				&t.y_res, &t.vfp, &t.vbp, &t.vsw) != 9)
-		return -EINVAL;
-
-	r = dssdev->driver->check_timings(dssdev, &t);
-	if (r)
-		return r;
-
-	dssdev->driver->disable(dssdev);
-	dssdev->driver->set_timings(dssdev, &t);
-	r = dssdev->driver->enable(dssdev);
-	if (r)
-		return r;
-
-	return size;
-}
-
-static ssize_t display_rotate_show(struct omap_dss_device *dssdev, char *buf)
-{
-	int rotate;
-	if (!dssdev->driver->get_rotate)
-		return -ENOENT;
-	rotate = dssdev->driver->get_rotate(dssdev);
-	return snprintf(buf, PAGE_SIZE, "%u\n", rotate);
-}
-
-static ssize_t display_rotate_store(struct omap_dss_device *dssdev,
-	const char *buf, size_t size)
-{
-	int rot, r;
-
-	if (!dssdev->driver->set_rotate || !dssdev->driver->get_rotate)
-		return -ENOENT;
-
-	r = kstrtoint(buf, 0, &rot);
-	if (r)
-		return r;
-
-	r = dssdev->driver->set_rotate(dssdev, rot);
-	if (r)
-		return r;
-
-	return size;
-}
-
-static ssize_t display_mirror_show(struct omap_dss_device *dssdev, char *buf)
-{
-	int mirror;
-	if (!dssdev->driver->get_mirror)
-		return -ENOENT;
-	mirror = dssdev->driver->get_mirror(dssdev);
-	return snprintf(buf, PAGE_SIZE, "%u\n", mirror);
-}
-
-static ssize_t display_mirror_store(struct omap_dss_device *dssdev,
-	const char *buf, size_t size)
-{
-	int r;
-	bool mirror;
-
-	if (!dssdev->driver->set_mirror || !dssdev->driver->get_mirror)
-		return -ENOENT;
-
-	r = strtobool(buf, &mirror);
-	if (r)
-		return r;
-
-	r = dssdev->driver->set_mirror(dssdev, mirror);
-	if (r)
-		return r;
-
-	return size;
-}
-
-static ssize_t display_wss_show(struct omap_dss_device *dssdev, char *buf)
-{
-	unsigned int wss;
-
-	if (!dssdev->driver->get_wss)
-		return -ENOENT;
-
-	wss = dssdev->driver->get_wss(dssdev);
-
-	return snprintf(buf, PAGE_SIZE, "0x%05x\n", wss);
-}
-
-static ssize_t display_wss_store(struct omap_dss_device *dssdev,
-	const char *buf, size_t size)
-{
-	u32 wss;
-	int r;
-
-	if (!dssdev->driver->get_wss || !dssdev->driver->set_wss)
-		return -ENOENT;
-
-	r = kstrtou32(buf, 0, &wss);
-	if (r)
-		return r;
-
-	if (wss > 0xfffff)
-		return -EINVAL;
-
-	r = dssdev->driver->set_wss(dssdev, wss);
-	if (r)
-		return r;
-
-	return size;
-}
-
-struct display_attribute {
-	struct attribute attr;
-	ssize_t (*show)(struct omap_dss_device *, char *);
-	ssize_t	(*store)(struct omap_dss_device *, const char *, size_t);
-};
-
-#define DISPLAY_ATTR(_name, _mode, _show, _store) \
-	struct display_attribute display_attr_##_name = \
-	__ATTR(_name, _mode, _show, _store)
-
-static DISPLAY_ATTR(name, S_IRUGO, display_name_show, NULL);
-static DISPLAY_ATTR(display_name, S_IRUGO, display_name_show, NULL);
-static DISPLAY_ATTR(enabled, S_IRUGO|S_IWUSR,
-		display_enabled_show, display_enabled_store);
-static DISPLAY_ATTR(tear_elim, S_IRUGO|S_IWUSR,
-		display_tear_show, display_tear_store);
-static DISPLAY_ATTR(timings, S_IRUGO|S_IWUSR,
-		display_timings_show, display_timings_store);
-static DISPLAY_ATTR(rotate, S_IRUGO|S_IWUSR,
-		display_rotate_show, display_rotate_store);
-static DISPLAY_ATTR(mirror, S_IRUGO|S_IWUSR,
-		display_mirror_show, display_mirror_store);
-static DISPLAY_ATTR(wss, S_IRUGO|S_IWUSR,
-		display_wss_show, display_wss_store);
-
-static struct attribute *display_sysfs_attrs[] = {
-	&display_attr_name.attr,
-	&display_attr_display_name.attr,
-	&display_attr_enabled.attr,
-	&display_attr_tear_elim.attr,
-	&display_attr_timings.attr,
-	&display_attr_rotate.attr,
-	&display_attr_mirror.attr,
-	&display_attr_wss.attr,
-	NULL
-};
-
-static ssize_t display_attr_show(struct kobject *kobj, struct attribute *attr,
-		char *buf)
-{
-	struct omap_dss_device *dssdev;
-	struct display_attribute *display_attr;
-
-	dssdev = container_of(kobj, struct omap_dss_device, kobj);
-	display_attr = container_of(attr, struct display_attribute, attr);
-
-	if (!display_attr->show)
-		return -ENOENT;
-
-	return display_attr->show(dssdev, buf);
-}
-
-static ssize_t display_attr_store(struct kobject *kobj, struct attribute *attr,
-		const char *buf, size_t size)
-{
-	struct omap_dss_device *dssdev;
-	struct display_attribute *display_attr;
-
-	dssdev = container_of(kobj, struct omap_dss_device, kobj);
-	display_attr = container_of(attr, struct display_attribute, attr);
-
-	if (!display_attr->store)
-		return -ENOENT;
-
-	return display_attr->store(dssdev, buf, size);
-}
-
-static const struct sysfs_ops display_sysfs_ops = {
-	.show = display_attr_show,
-	.store = display_attr_store,
-};
-
-static struct kobj_type display_ktype = {
-	.sysfs_ops = &display_sysfs_ops,
-	.default_attrs = display_sysfs_attrs,
-};
-
-int display_init_sysfs(struct platform_device *pdev)
-{
-	struct omap_dss_device *dssdev = NULL;
-	int r;
-
-	for_each_dss_dev(dssdev) {
-		r = kobject_init_and_add(&dssdev->kobj, &display_ktype,
-			&pdev->dev.kobj, "%s", dssdev->alias);
-		if (r) {
-			DSSERR("failed to create sysfs files\n");
-			omap_dss_put_device(dssdev);
-			goto err;
-		}
-	}
-
-	return 0;
-
-err:
-	display_uninit_sysfs(pdev);
-
-	return r;
-}
-
-void display_uninit_sysfs(struct platform_device *pdev)
-{
-	struct omap_dss_device *dssdev = NULL;
-
-	for_each_dss_dev(dssdev) {
-		if (kobject_name(&dssdev->kobj) == NULL)
-			continue;
-
-		kobject_del(&dssdev->kobj);
-		kobject_put(&dssdev->kobj);
-
-		memset(&dssdev->kobj, 0, sizeof(dssdev->kobj));
-	}
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/display.c b/drivers/gpu/drm/omapdrm/dss/display.c
index ef5b9027985d..9f3dd09b0a6c 100644
--- a/drivers/gpu/drm/omapdrm/dss/display.c
+++ b/drivers/gpu/drm/omapdrm/dss/display.c
@@ -78,55 +78,6 @@ void omapdss_default_get_timings(struct omap_dss_device *dssdev,
 }
 EXPORT_SYMBOL(omapdss_default_get_timings);
 
-int dss_suspend_all_devices(void)
-{
-	struct omap_dss_device *dssdev = NULL;
-
-	for_each_dss_dev(dssdev) {
-		if (!dssdev->driver)
-			continue;
-
-		if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE) {
-			dssdev->driver->disable(dssdev);
-			dssdev->activate_after_resume = true;
-		} else {
-			dssdev->activate_after_resume = false;
-		}
-	}
-
-	return 0;
-}
-
-int dss_resume_all_devices(void)
-{
-	struct omap_dss_device *dssdev = NULL;
-
-	for_each_dss_dev(dssdev) {
-		if (!dssdev->driver)
-			continue;
-
-		if (dssdev->activate_after_resume) {
-			dssdev->driver->enable(dssdev);
-			dssdev->activate_after_resume = false;
-		}
-	}
-
-	return 0;
-}
-
-void dss_disable_all_devices(void)
-{
-	struct omap_dss_device *dssdev = NULL;
-
-	for_each_dss_dev(dssdev) {
-		if (!dssdev->driver)
-			continue;
-
-		if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE)
-			dssdev->driver->disable(dssdev);
-	}
-}
-
 static LIST_HEAD(panel_list);
 static DEFINE_MUTEX(panel_list_mutex);
 static int disp_num_counter;
diff --git a/drivers/gpu/drm/omapdrm/dss/dpi.c b/drivers/gpu/drm/omapdrm/dss/dpi.c
index 7953e6a52346..97ea60257884 100644
--- a/drivers/gpu/drm/omapdrm/dss/dpi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dpi.c
@@ -334,7 +334,7 @@ static int dpi_set_dispc_clk(struct dpi_data *dpi, unsigned long pck_req,
 static int dpi_set_mode(struct dpi_data *dpi)
 {
 	struct omap_dss_device *out = &dpi->output;
-	struct omap_overlay_manager *mgr = out->manager;
+	enum omap_channel channel = out->dispc_channel;
 	struct omap_video_timings *t = &dpi->timings;
 	int lck_div = 0, pck_div = 0;
 	unsigned long fck = 0;
@@ -342,7 +342,7 @@ static int dpi_set_mode(struct dpi_data *dpi)
 	int r = 0;
 
 	if (dpi->pll)
-		r = dpi_set_dsi_clk(dpi, mgr->id, t->pixelclock, &fck,
+		r = dpi_set_dsi_clk(dpi, channel, t->pixelclock, &fck,
 				&lck_div, &pck_div);
 	else
 		r = dpi_set_dispc_clk(dpi, t->pixelclock, &fck,
@@ -359,7 +359,7 @@ static int dpi_set_mode(struct dpi_data *dpi)
 		t->pixelclock = pck;
 	}
 
-	dss_mgr_set_timings(mgr, t);
+	dss_mgr_set_timings(channel, t);
 
 	return 0;
 }
@@ -367,7 +367,7 @@ static int dpi_set_mode(struct dpi_data *dpi)
 static void dpi_config_lcd_manager(struct dpi_data *dpi)
 {
 	struct omap_dss_device *out = &dpi->output;
-	struct omap_overlay_manager *mgr = out->manager;
+	enum omap_channel channel = out->dispc_channel;
 
 	dpi->mgr_config.io_pad_mode = DSS_IO_PAD_MODE_BYPASS;
 
@@ -378,13 +378,14 @@ static void dpi_config_lcd_manager(struct dpi_data *dpi)
 
 	dpi->mgr_config.lcden_sig_polarity = 0;
 
-	dss_mgr_set_lcd_config(mgr, &dpi->mgr_config);
+	dss_mgr_set_lcd_config(channel, &dpi->mgr_config);
 }
 
 static int dpi_display_enable(struct omap_dss_device *dssdev)
 {
 	struct dpi_data *dpi = dpi_get_data_from_dssdev(dssdev);
 	struct omap_dss_device *out = &dpi->output;
+	enum omap_channel channel = out->dispc_channel;
 	int r;
 
 	mutex_lock(&dpi->lock);
@@ -395,7 +396,7 @@ static int dpi_display_enable(struct omap_dss_device *dssdev)
 		goto err_no_reg;
 	}
 
-	if (out->manager == NULL) {
+	if (!out->dispc_channel_connected) {
 		DSSERR("failed to enable display: no output/manager\n");
 		r = -ENODEV;
 		goto err_no_out_mgr;
@@ -411,7 +412,7 @@ static int dpi_display_enable(struct omap_dss_device *dssdev)
 	if (r)
 		goto err_get_dispc;
 
-	r = dss_dpi_select_source(out->port_num, out->manager->id);
+	r = dss_dpi_select_source(out->port_num, channel);
 	if (r)
 		goto err_src_sel;
 
@@ -429,7 +430,7 @@ static int dpi_display_enable(struct omap_dss_device *dssdev)
 
 	mdelay(2);
 
-	r = dss_mgr_enable(out->manager);
+	r = dss_mgr_enable(channel);
 	if (r)
 		goto err_mgr_enable;
 
@@ -457,14 +458,14 @@ err_no_reg:
 static void dpi_display_disable(struct omap_dss_device *dssdev)
 {
 	struct dpi_data *dpi = dpi_get_data_from_dssdev(dssdev);
-	struct omap_overlay_manager *mgr = dpi->output.manager;
+	enum omap_channel channel = dpi->output.dispc_channel;
 
 	mutex_lock(&dpi->lock);
 
-	dss_mgr_disable(mgr);
+	dss_mgr_disable(channel);
 
 	if (dpi->pll) {
-		dss_select_lcd_clk_source(mgr->id, OMAP_DSS_CLK_SRC_FCK);
+		dss_select_lcd_clk_source(channel, OMAP_DSS_CLK_SRC_FCK);
 		dss_pll_disable(dpi->pll);
 	}
 
@@ -506,14 +507,17 @@ static int dpi_check_timings(struct omap_dss_device *dssdev,
 			struct omap_video_timings *timings)
 {
 	struct dpi_data *dpi = dpi_get_data_from_dssdev(dssdev);
-	struct omap_overlay_manager *mgr = dpi->output.manager;
+	enum omap_channel channel = dpi->output.dispc_channel;
 	int lck_div, pck_div;
 	unsigned long fck;
 	unsigned long pck;
 	struct dpi_clk_calc_ctx ctx;
 	bool ok;
 
-	if (mgr && !dispc_mgr_timings_ok(mgr->id, timings))
+	if (timings->x_res % 8 != 0)
+		return -EINVAL;
+
+	if (!dispc_mgr_timings_ok(channel, timings))
 		return -EINVAL;
 
 	if (timings->pixelclock == 0)
@@ -660,7 +664,7 @@ static int dpi_connect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
 	struct dpi_data *dpi = dpi_get_data_from_dssdev(dssdev);
-	struct omap_overlay_manager *mgr;
+	enum omap_channel channel = dpi->output.dispc_channel;
 	int r;
 
 	r = dpi_init_regulator(dpi);
@@ -669,11 +673,7 @@ static int dpi_connect(struct omap_dss_device *dssdev,
 
 	dpi_init_pll(dpi);
 
-	mgr = omap_dss_get_overlay_manager(dssdev->dispc_channel);
-	if (!mgr)
-		return -ENODEV;
-
-	r = dss_mgr_connect(mgr, dssdev);
+	r = dss_mgr_connect(channel, dssdev);
 	if (r)
 		return r;
 
@@ -681,7 +681,7 @@ static int dpi_connect(struct omap_dss_device *dssdev,
 	if (r) {
 		DSSERR("failed to connect output to new device: %s\n",
 				dst->name);
-		dss_mgr_disconnect(mgr, dssdev);
+		dss_mgr_disconnect(channel, dssdev);
 		return r;
 	}
 
@@ -691,6 +691,9 @@ static int dpi_connect(struct omap_dss_device *dssdev,
 static void dpi_disconnect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
+	struct dpi_data *dpi = dpi_get_data_from_dssdev(dssdev);
+	enum omap_channel channel = dpi->output.dispc_channel;
+
 	WARN_ON(dst != dssdev->dst);
 
 	if (dst != dssdev->dst)
@@ -698,8 +701,7 @@ static void dpi_disconnect(struct omap_dss_device *dssdev,
 
 	omapdss_output_unset_device(dssdev);
 
-	if (dssdev->manager)
-		dss_mgr_disconnect(dssdev->manager, dssdev);
+	dss_mgr_disconnect(channel, dssdev);
 }
 
 static const struct omapdss_dpi_ops dpi_ops = {
diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index 43be4b2a7b05..8730646a0cbb 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -214,9 +214,9 @@ struct dsi_reg { u16 module; u16 idx; };
 typedef void (*omap_dsi_isr_t) (void *arg, u32 mask);
 
 static int dsi_display_init_dispc(struct platform_device *dsidev,
-	struct omap_overlay_manager *mgr);
+	enum omap_channel channel);
 static void dsi_display_uninit_dispc(struct platform_device *dsidev,
-	struct omap_overlay_manager *mgr);
+	enum omap_channel channel);
 
 static int dsi_vc_send_null(struct omap_dss_device *dssdev, int channel);
 
@@ -3826,19 +3826,19 @@ static int dsi_enable_video_output(struct omap_dss_device *dssdev, int channel)
 {
 	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
-	struct omap_overlay_manager *mgr = dsi->output.manager;
+	enum omap_channel dispc_channel = dssdev->dispc_channel;
 	int bpp = dsi_get_pixel_size(dsi->pix_fmt);
 	struct omap_dss_device *out = &dsi->output;
 	u8 data_type;
 	u16 word_count;
 	int r;
 
-	if (out->manager == NULL) {
+	if (!out->dispc_channel_connected) {
 		DSSERR("failed to enable display: no output/manager\n");
 		return -ENODEV;
 	}
 
-	r = dsi_display_init_dispc(dsidev, mgr);
+	r = dsi_display_init_dispc(dsidev, dispc_channel);
 	if (r)
 		goto err_init_dispc;
 
@@ -3876,7 +3876,7 @@ static int dsi_enable_video_output(struct omap_dss_device *dssdev, int channel)
 		dsi_if_enable(dsidev, true);
 	}
 
-	r = dss_mgr_enable(mgr);
+	r = dss_mgr_enable(dispc_channel);
 	if (r)
 		goto err_mgr_enable;
 
@@ -3888,7 +3888,7 @@ err_mgr_enable:
 		dsi_vc_enable(dsidev, channel, false);
 	}
 err_pix_fmt:
-	dsi_display_uninit_dispc(dsidev, mgr);
+	dsi_display_uninit_dispc(dsidev, dispc_channel);
 err_init_dispc:
 	return r;
 }
@@ -3897,7 +3897,7 @@ static void dsi_disable_video_output(struct omap_dss_device *dssdev, int channel
 {
 	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
 	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
-	struct omap_overlay_manager *mgr = dsi->output.manager;
+	enum omap_channel dispc_channel = dssdev->dispc_channel;
 
 	if (dsi->mode == OMAP_DSS_DSI_VIDEO_MODE) {
 		dsi_if_enable(dsidev, false);
@@ -3910,15 +3910,15 @@ static void dsi_disable_video_output(struct omap_dss_device *dssdev, int channel
 		dsi_if_enable(dsidev, true);
 	}
 
-	dss_mgr_disable(mgr);
+	dss_mgr_disable(dispc_channel);
 
-	dsi_display_uninit_dispc(dsidev, mgr);
+	dsi_display_uninit_dispc(dsidev, dispc_channel);
 }
 
 static void dsi_update_screen_dispc(struct platform_device *dsidev)
 {
 	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
-	struct omap_overlay_manager *mgr = dsi->output.manager;
+	enum omap_channel dispc_channel = dsi->output.dispc_channel;
 	unsigned bytespp;
 	unsigned bytespl;
 	unsigned bytespf;
@@ -3980,9 +3980,9 @@ static void dsi_update_screen_dispc(struct platform_device *dsidev)
 		msecs_to_jiffies(250));
 	BUG_ON(r == 0);
 
-	dss_mgr_set_timings(mgr, &dsi->timings);
+	dss_mgr_set_timings(dispc_channel, &dsi->timings);
 
-	dss_mgr_start_update(mgr);
+	dss_mgr_start_update(dispc_channel);
 
 	if (dsi->te_enabled) {
 		/* disable LP_RX_TO, so that we can receive TE.  Time to wait
@@ -4105,17 +4105,17 @@ static int dsi_configure_dispc_clocks(struct platform_device *dsidev)
 }
 
 static int dsi_display_init_dispc(struct platform_device *dsidev,
-		struct omap_overlay_manager *mgr)
+		enum omap_channel channel)
 {
 	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	int r;
 
-	dss_select_lcd_clk_source(mgr->id, dsi->module_id == 0 ?
+	dss_select_lcd_clk_source(channel, dsi->module_id == 0 ?
 			OMAP_DSS_CLK_SRC_DSI_PLL_HSDIV_DISPC :
 			OMAP_DSS_CLK_SRC_DSI2_PLL_HSDIV_DISPC);
 
 	if (dsi->mode == OMAP_DSS_DSI_CMD_MODE) {
-		r = dss_mgr_register_framedone_handler(mgr,
+		r = dss_mgr_register_framedone_handler(channel,
 				dsi_framedone_irq_callback, dsidev);
 		if (r) {
 			DSSERR("can't register FRAMEDONE handler\n");
@@ -4140,7 +4140,7 @@ static int dsi_display_init_dispc(struct platform_device *dsidev,
 	dsi->timings.de_level = OMAPDSS_SIG_ACTIVE_HIGH;
 	dsi->timings.sync_pclk_edge = OMAPDSS_DRIVE_SIG_FALLING_EDGE;
 
-	dss_mgr_set_timings(mgr, &dsi->timings);
+	dss_mgr_set_timings(channel, &dsi->timings);
 
 	r = dsi_configure_dispc_clocks(dsidev);
 	if (r)
@@ -4151,28 +4151,28 @@ static int dsi_display_init_dispc(struct platform_device *dsidev,
 			dsi_get_pixel_size(dsi->pix_fmt);
 	dsi->mgr_config.lcden_sig_polarity = 0;
 
-	dss_mgr_set_lcd_config(mgr, &dsi->mgr_config);
+	dss_mgr_set_lcd_config(channel, &dsi->mgr_config);
 
 	return 0;
 err1:
 	if (dsi->mode == OMAP_DSS_DSI_CMD_MODE)
-		dss_mgr_unregister_framedone_handler(mgr,
+		dss_mgr_unregister_framedone_handler(channel,
 				dsi_framedone_irq_callback, dsidev);
 err:
-	dss_select_lcd_clk_source(mgr->id, OMAP_DSS_CLK_SRC_FCK);
+	dss_select_lcd_clk_source(channel, OMAP_DSS_CLK_SRC_FCK);
 	return r;
 }
 
 static void dsi_display_uninit_dispc(struct platform_device *dsidev,
-		struct omap_overlay_manager *mgr)
+		enum omap_channel channel)
 {
 	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 
 	if (dsi->mode == OMAP_DSS_DSI_CMD_MODE)
-		dss_mgr_unregister_framedone_handler(mgr,
+		dss_mgr_unregister_framedone_handler(channel,
 				dsi_framedone_irq_callback, dsidev);
 
-	dss_select_lcd_clk_source(mgr->id, OMAP_DSS_CLK_SRC_FCK);
+	dss_select_lcd_clk_source(channel, OMAP_DSS_CLK_SRC_FCK);
 }
 
 static int dsi_configure_dsi_clocks(struct platform_device *dsidev)
@@ -4983,18 +4983,14 @@ static int dsi_connect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
 	struct platform_device *dsidev = dsi_get_dsidev_from_dssdev(dssdev);
-	struct omap_overlay_manager *mgr;
+	enum omap_channel dispc_channel = dssdev->dispc_channel;
 	int r;
 
 	r = dsi_regulator_init(dsidev);
 	if (r)
 		return r;
 
-	mgr = omap_dss_get_overlay_manager(dssdev->dispc_channel);
-	if (!mgr)
-		return -ENODEV;
-
-	r = dss_mgr_connect(mgr, dssdev);
+	r = dss_mgr_connect(dispc_channel, dssdev);
 	if (r)
 		return r;
 
@@ -5002,7 +4998,7 @@ static int dsi_connect(struct omap_dss_device *dssdev,
 	if (r) {
 		DSSERR("failed to connect output to new device: %s\n",
 				dssdev->name);
-		dss_mgr_disconnect(mgr, dssdev);
+		dss_mgr_disconnect(dispc_channel, dssdev);
 		return r;
 	}
 
@@ -5012,6 +5008,8 @@ static int dsi_connect(struct omap_dss_device *dssdev,
 static void dsi_disconnect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
+	enum omap_channel dispc_channel = dssdev->dispc_channel;
+
 	WARN_ON(dst != dssdev->dst);
 
 	if (dst != dssdev->dst)
@@ -5019,8 +5017,7 @@ static void dsi_disconnect(struct omap_dss_device *dssdev,
 
 	omapdss_output_unset_device(dssdev);
 
-	if (dssdev->manager)
-		dss_mgr_disconnect(dssdev->manager, dssdev);
+	dss_mgr_disconnect(dispc_channel, dssdev);
 }
 
 static const struct omapdss_dsi_ops dsi_ops = {
diff --git a/drivers/gpu/drm/omapdrm/dss/dss.h b/drivers/gpu/drm/omapdrm/dss/dss.h
index 9a6453235585..38e6ab50142d 100644
--- a/drivers/gpu/drm/omapdrm/dss/dss.h
+++ b/drivers/gpu/drm/omapdrm/dss/dss.h
@@ -25,6 +25,8 @@
 
 #include <linux/interrupt.h>
 
+#include "omapdss.h"
+
 #ifdef pr_fmt
 #undef pr_fmt
 #endif
@@ -205,29 +207,6 @@ void dss_dsi_disable_pads(int dsi_id, unsigned lane_mask);
 int dss_set_min_bus_tput(struct device *dev, unsigned long tput);
 int dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *));
 
-/* display */
-int dss_suspend_all_devices(void);
-int dss_resume_all_devices(void);
-void dss_disable_all_devices(void);
-
-int display_init_sysfs(struct platform_device *pdev);
-void display_uninit_sysfs(struct platform_device *pdev);
-
-/* manager */
-int dss_init_overlay_managers(void);
-void dss_uninit_overlay_managers(void);
-int dss_init_overlay_managers_sysfs(struct platform_device *pdev);
-void dss_uninit_overlay_managers_sysfs(struct platform_device *pdev);
-int dss_mgr_simple_check(struct omap_overlay_manager *mgr,
-		const struct omap_overlay_manager_info *info);
-int dss_mgr_check_timings(struct omap_overlay_manager *mgr,
-		const struct omap_video_timings *timings);
-int dss_mgr_check(struct omap_overlay_manager *mgr,
-		struct omap_overlay_manager_info *info,
-		const struct omap_video_timings *mgr_timings,
-		const struct dss_lcd_mgr_config *config,
-		struct omap_overlay_info **overlay_infos);
-
 static inline bool dss_mgr_is_lcd(enum omap_channel id)
 {
 	if (id == OMAP_DSS_CHANNEL_LCD || id == OMAP_DSS_CHANNEL_LCD2 ||
@@ -237,24 +216,6 @@ static inline bool dss_mgr_is_lcd(enum omap_channel id)
 		return false;
 }
 
-int dss_manager_kobj_init(struct omap_overlay_manager *mgr,
-		struct platform_device *pdev);
-void dss_manager_kobj_uninit(struct omap_overlay_manager *mgr);
-
-/* overlay */
-void dss_init_overlays(struct platform_device *pdev);
-void dss_uninit_overlays(struct platform_device *pdev);
-void dss_overlay_setup_dispc_manager(struct omap_overlay_manager *mgr);
-int dss_ovl_simple_check(struct omap_overlay *ovl,
-		const struct omap_overlay_info *info);
-int dss_ovl_check(struct omap_overlay *ovl, struct omap_overlay_info *info,
-		const struct omap_video_timings *mgr_timings);
-bool dss_ovl_use_replication(struct dss_lcd_mgr_config config,
-		enum omap_color_mode mode);
-int dss_overlay_kobj_init(struct omap_overlay *ovl,
-		struct platform_device *pdev);
-void dss_overlay_kobj_uninit(struct omap_overlay *ovl);
-
 /* DSS */
 int dss_init_platform_driver(void) __init;
 void dss_uninit_platform_driver(void);
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
index 7103c659a534..f892ae157ff3 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
@@ -165,9 +165,10 @@ static int hdmi_power_on_full(struct omap_dss_device *dssdev)
 {
 	int r;
 	struct omap_video_timings *p;
-	struct omap_overlay_manager *mgr = hdmi.output.manager;
+	enum omap_channel channel = dssdev->dispc_channel;
 	struct hdmi_wp_data *wp = &hdmi.wp;
 	struct dss_pll_clock_info hdmi_cinfo = { 0 };
+	unsigned pc;
 
 	r = hdmi_power_on_core(dssdev);
 	if (r)
@@ -181,7 +182,11 @@ static int hdmi_power_on_full(struct omap_dss_device *dssdev)
 
 	DSSDBG("hdmi_power_on x_res= %d y_res = %d\n", p->x_res, p->y_res);
 
-	hdmi_pll_compute(&hdmi.pll, p->pixelclock, &hdmi_cinfo);
+	pc = p->pixelclock;
+	if (p->double_pixel)
+		pc *= 2;
+
+	hdmi_pll_compute(&hdmi.pll, pc, &hdmi_cinfo);
 
 	r = dss_pll_enable(&hdmi.pll.pll);
 	if (r) {
@@ -212,24 +217,24 @@ static int hdmi_power_on_full(struct omap_dss_device *dssdev)
 	dispc_enable_gamma_table(0);
 
 	/* tv size */
-	dss_mgr_set_timings(mgr, p);
+	dss_mgr_set_timings(channel, p);
 
-	r = hdmi_wp_video_start(&hdmi.wp);
+	r = dss_mgr_enable(channel);
 	if (r)
-		goto err_vid_enable;
+		goto err_mgr_enable;
 
-	r = dss_mgr_enable(mgr);
+	r = hdmi_wp_video_start(&hdmi.wp);
 	if (r)
-		goto err_mgr_enable;
+		goto err_vid_enable;
 
 	hdmi_wp_set_irqenable(wp,
 		HDMI_IRQ_LINK_CONNECT | HDMI_IRQ_LINK_DISCONNECT);
 
 	return 0;
 
-err_mgr_enable:
-	hdmi_wp_video_stop(&hdmi.wp);
 err_vid_enable:
+	dss_mgr_disable(channel);
+err_mgr_enable:
 	hdmi_wp_set_phy_pwr(&hdmi.wp, HDMI_PHYPWRCMD_OFF);
 err_phy_pwr:
 err_phy_cfg:
@@ -242,14 +247,14 @@ err_pll_enable:
 
 static void hdmi_power_off_full(struct omap_dss_device *dssdev)
 {
-	struct omap_overlay_manager *mgr = hdmi.output.manager;
+	enum omap_channel channel = dssdev->dispc_channel;
 
 	hdmi_wp_clear_irqenable(&hdmi.wp, 0xffffffff);
 
-	dss_mgr_disable(mgr);
-
 	hdmi_wp_video_stop(&hdmi.wp);
 
+	dss_mgr_disable(channel);
+
 	hdmi_wp_set_phy_pwr(&hdmi.wp, HDMI_PHYPWRCMD_OFF);
 
 	dss_pll_disable(&hdmi.pll.pll);
@@ -260,9 +265,7 @@ static void hdmi_power_off_full(struct omap_dss_device *dssdev)
 static int hdmi_display_check_timing(struct omap_dss_device *dssdev,
 					struct omap_video_timings *timings)
 {
-	struct omap_dss_device *out = &hdmi.output;
-
-	if (!dispc_mgr_timings_ok(out->dispc_channel, timings))
+	if (!dispc_mgr_timings_ok(dssdev->dispc_channel, timings))
 		return -EINVAL;
 
 	return 0;
@@ -343,7 +346,7 @@ static int hdmi_display_enable(struct omap_dss_device *dssdev)
 
 	mutex_lock(&hdmi.lock);
 
-	if (out->manager == NULL) {
+	if (!out->dispc_channel_connected) {
 		DSSERR("failed to enable display: no output/manager\n");
 		r = -ENODEV;
 		goto err0;
@@ -433,18 +436,14 @@ static void hdmi_core_disable(struct omap_dss_device *dssdev)
 static int hdmi_connect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
-	struct omap_overlay_manager *mgr;
+	enum omap_channel channel = dssdev->dispc_channel;
 	int r;
 
 	r = hdmi_init_regulator();
 	if (r)
 		return r;
 
-	mgr = omap_dss_get_overlay_manager(dssdev->dispc_channel);
-	if (!mgr)
-		return -ENODEV;
-
-	r = dss_mgr_connect(mgr, dssdev);
+	r = dss_mgr_connect(channel, dssdev);
 	if (r)
 		return r;
 
@@ -452,7 +451,7 @@ static int hdmi_connect(struct omap_dss_device *dssdev,
 	if (r) {
 		DSSERR("failed to connect output to new device: %s\n",
 				dst->name);
-		dss_mgr_disconnect(mgr, dssdev);
+		dss_mgr_disconnect(channel, dssdev);
 		return r;
 	}
 
@@ -462,6 +461,8 @@ static int hdmi_connect(struct omap_dss_device *dssdev,
 static void hdmi_disconnect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
+	enum omap_channel channel = dssdev->dispc_channel;
+
 	WARN_ON(dst != dssdev->dst);
 
 	if (dst != dssdev->dst)
@@ -469,8 +470,7 @@ static void hdmi_disconnect(struct omap_dss_device *dssdev,
 
 	omapdss_output_unset_device(dssdev);
 
-	if (dssdev->manager)
-		dss_mgr_disconnect(dssdev->manager, dssdev);
+	dss_mgr_disconnect(channel, dssdev);
 }
 
 static int hdmi_read_edid(struct omap_dss_device *dssdev,
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
index a955a2c4c061..a43f7b10e113 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
@@ -182,8 +182,9 @@ static int hdmi_power_on_full(struct omap_dss_device *dssdev)
 {
 	int r;
 	struct omap_video_timings *p;
-	struct omap_overlay_manager *mgr = hdmi.output.manager;
+	enum omap_channel channel = dssdev->dispc_channel;
 	struct dss_pll_clock_info hdmi_cinfo = { 0 };
+	unsigned pc;
 
 	r = hdmi_power_on_core(dssdev);
 	if (r)
@@ -193,7 +194,11 @@ static int hdmi_power_on_full(struct omap_dss_device *dssdev)
 
 	DSSDBG("hdmi_power_on x_res= %d y_res = %d\n", p->x_res, p->y_res);
 
-	hdmi_pll_compute(&hdmi.pll, p->pixelclock, &hdmi_cinfo);
+	pc = p->pixelclock;
+	if (p->double_pixel)
+		pc *= 2;
+
+	hdmi_pll_compute(&hdmi.pll, pc, &hdmi_cinfo);
 
 	/* disable and clear irqs */
 	hdmi_wp_clear_irqenable(&hdmi.wp, 0xffffffff);
@@ -229,24 +234,24 @@ static int hdmi_power_on_full(struct omap_dss_device *dssdev)
 	dispc_enable_gamma_table(0);
 
 	/* tv size */
-	dss_mgr_set_timings(mgr, p);
+	dss_mgr_set_timings(channel, p);
 
-	r = hdmi_wp_video_start(&hdmi.wp);
+	r = dss_mgr_enable(channel);
 	if (r)
-		goto err_vid_enable;
+		goto err_mgr_enable;
 
-	r = dss_mgr_enable(mgr);
+	r = hdmi_wp_video_start(&hdmi.wp);
 	if (r)
-		goto err_mgr_enable;
+		goto err_vid_enable;
 
 	hdmi_wp_set_irqenable(&hdmi.wp,
 			HDMI_IRQ_LINK_CONNECT | HDMI_IRQ_LINK_DISCONNECT);
 
 	return 0;
 
-err_mgr_enable:
-	hdmi_wp_video_stop(&hdmi.wp);
 err_vid_enable:
+	dss_mgr_disable(channel);
+err_mgr_enable:
 	hdmi_wp_set_phy_pwr(&hdmi.wp, HDMI_PHYPWRCMD_OFF);
 err_phy_pwr:
 err_phy_cfg:
@@ -259,14 +264,14 @@ err_pll_enable:
 
 static void hdmi_power_off_full(struct omap_dss_device *dssdev)
 {
-	struct omap_overlay_manager *mgr = hdmi.output.manager;
+	enum omap_channel channel = dssdev->dispc_channel;
 
 	hdmi_wp_clear_irqenable(&hdmi.wp, 0xffffffff);
 
-	dss_mgr_disable(mgr);
-
 	hdmi_wp_video_stop(&hdmi.wp);
 
+	dss_mgr_disable(channel);
+
 	hdmi_wp_set_phy_pwr(&hdmi.wp, HDMI_PHYPWRCMD_OFF);
 
 	dss_pll_disable(&hdmi.pll.pll);
@@ -277,13 +282,7 @@ static void hdmi_power_off_full(struct omap_dss_device *dssdev)
 static int hdmi_display_check_timing(struct omap_dss_device *dssdev,
 					struct omap_video_timings *timings)
 {
-	struct omap_dss_device *out = &hdmi.output;
-
-	/* TODO: proper interlace support */
-	if (timings->interlace)
-		return -EINVAL;
-
-	if (!dispc_mgr_timings_ok(out->dispc_channel, timings))
+	if (!dispc_mgr_timings_ok(dssdev->dispc_channel, timings))
 		return -EINVAL;
 
 	return 0;
@@ -373,7 +372,7 @@ static int hdmi_display_enable(struct omap_dss_device *dssdev)
 
 	mutex_lock(&hdmi.lock);
 
-	if (out->manager == NULL) {
+	if (!out->dispc_channel_connected) {
 		DSSERR("failed to enable display: no output/manager\n");
 		r = -ENODEV;
 		goto err0;
@@ -463,18 +462,14 @@ static void hdmi_core_disable(struct omap_dss_device *dssdev)
 static int hdmi_connect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
-	struct omap_overlay_manager *mgr;
+	enum omap_channel channel = dssdev->dispc_channel;
 	int r;
 
 	r = hdmi_init_regulator();
 	if (r)
 		return r;
 
-	mgr = omap_dss_get_overlay_manager(dssdev->dispc_channel);
-	if (!mgr)
-		return -ENODEV;
-
-	r = dss_mgr_connect(mgr, dssdev);
+	r = dss_mgr_connect(channel, dssdev);
 	if (r)
 		return r;
 
@@ -482,7 +477,7 @@ static int hdmi_connect(struct omap_dss_device *dssdev,
 	if (r) {
 		DSSERR("failed to connect output to new device: %s\n",
 				dst->name);
-		dss_mgr_disconnect(mgr, dssdev);
+		dss_mgr_disconnect(channel, dssdev);
 		return r;
 	}
 
@@ -492,6 +487,8 @@ static int hdmi_connect(struct omap_dss_device *dssdev,
 static void hdmi_disconnect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
+	enum omap_channel channel = dssdev->dispc_channel;
+
 	WARN_ON(dst != dssdev->dst);
 
 	if (dst != dssdev->dst)
@@ -499,8 +496,7 @@ static void hdmi_disconnect(struct omap_dss_device *dssdev,
 
 	omapdss_output_unset_device(dssdev);
 
-	if (dssdev->manager)
-		dss_mgr_disconnect(dssdev->manager, dssdev);
+	dss_mgr_disconnect(channel, dssdev);
 }
 
 static int hdmi_read_edid(struct omap_dss_device *dssdev,
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5_core.c b/drivers/gpu/drm/omapdrm/dss/hdmi5_core.c
index 8ea531d2652c..6a397520cae5 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5_core.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5_core.c
@@ -292,25 +292,36 @@ static void hdmi_core_init(struct hdmi_core_vid_config *video_cfg,
 {
 	DSSDBG("hdmi_core_init\n");
 
+	video_cfg->v_fc_config.timings = cfg->timings;
+
 	/* video core */
 	video_cfg->data_enable_pol = 1; /* It is always 1*/
-	video_cfg->v_fc_config.timings.hsync_level = cfg->timings.hsync_level;
-	video_cfg->v_fc_config.timings.x_res = cfg->timings.x_res;
-	video_cfg->v_fc_config.timings.hsw = cfg->timings.hsw - 1;
-	video_cfg->v_fc_config.timings.hbp = cfg->timings.hbp;
-	video_cfg->v_fc_config.timings.hfp = cfg->timings.hfp;
 	video_cfg->hblank = cfg->timings.hfp +
-				cfg->timings.hbp + cfg->timings.hsw - 1;
-	video_cfg->v_fc_config.timings.vsync_level = cfg->timings.vsync_level;
-	video_cfg->v_fc_config.timings.y_res = cfg->timings.y_res;
-	video_cfg->v_fc_config.timings.vsw = cfg->timings.vsw;
-	video_cfg->v_fc_config.timings.vfp = cfg->timings.vfp;
-	video_cfg->v_fc_config.timings.vbp = cfg->timings.vbp;
-	video_cfg->vblank_osc = 0; /* Always 0 - need to confirm */
+				cfg->timings.hbp + cfg->timings.hsw;
+	video_cfg->vblank_osc = 0;
 	video_cfg->vblank = cfg->timings.vsw +
 				cfg->timings.vfp + cfg->timings.vbp;
 	video_cfg->v_fc_config.hdmi_dvi_mode = cfg->hdmi_dvi_mode;
-	video_cfg->v_fc_config.timings.interlace = cfg->timings.interlace;
+
+	if (cfg->timings.interlace) {
+		/* set vblank_osc if vblank is fractional */
+		if (video_cfg->vblank % 2 != 0)
+			video_cfg->vblank_osc = 1;
+
+		video_cfg->v_fc_config.timings.y_res /= 2;
+		video_cfg->vblank /= 2;
+		video_cfg->v_fc_config.timings.vfp /= 2;
+		video_cfg->v_fc_config.timings.vsw /= 2;
+		video_cfg->v_fc_config.timings.vbp /= 2;
+	}
+
+	if (cfg->timings.double_pixel) {
+		video_cfg->v_fc_config.timings.x_res *= 2;
+		video_cfg->hblank *= 2;
+		video_cfg->v_fc_config.timings.hfp *= 2;
+		video_cfg->v_fc_config.timings.hsw *= 2;
+		video_cfg->v_fc_config.timings.hbp *= 2;
+	}
 }
 
 /* DSS_HDMI_CORE_VIDEO_CONFIG */
@@ -377,6 +388,11 @@ static void hdmi_core_video_config(struct hdmi_core_data *core,
 	/* select DVI mode */
 	REG_FLD_MOD(base, HDMI_CORE_FC_INVIDCONF,
 			cfg->v_fc_config.hdmi_dvi_mode, 3, 3);
+
+	if (cfg->v_fc_config.timings.double_pixel)
+		REG_FLD_MOD(base, HDMI_CORE_FC_PRCONF, 2, 7, 4);
+	else
+		REG_FLD_MOD(base, HDMI_CORE_FC_PRCONF, 1, 7, 4);
 }
 
 static void hdmi_core_config_video_packetizer(struct hdmi_core_data *core)
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi_wp.c b/drivers/gpu/drm/omapdrm/dss/hdmi_wp.c
index 7c544bc56fb5..13442b9052d1 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi_wp.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi_wp.c
@@ -165,12 +165,24 @@ void hdmi_wp_video_config_timing(struct hdmi_wp_data *wp,
 {
 	u32 timing_h = 0;
 	u32 timing_v = 0;
+	unsigned hsw_offset = 1;
 
 	DSSDBG("Enter hdmi_wp_video_config_timing\n");
 
+	/*
+	 * On OMAP4 and OMAP5 ES1 the HSW field is programmed as is. On OMAP5
+	 * ES2+ (including DRA7/AM5 SoCs) HSW field is programmed to hsw-1.
+	 * However, we don't support OMAP5 ES1 at all, so we can just check for
+	 * OMAP4 here.
+	 */
+	if (omapdss_get_version() == OMAPDSS_VER_OMAP4430_ES1 ||
+	    omapdss_get_version() == OMAPDSS_VER_OMAP4430_ES2 ||
+	    omapdss_get_version() == OMAPDSS_VER_OMAP4)
+		hsw_offset = 0;
+
 	timing_h |= FLD_VAL(timings->hbp, 31, 20);
 	timing_h |= FLD_VAL(timings->hfp, 19, 8);
-	timing_h |= FLD_VAL(timings->hsw, 7, 0);
+	timing_h |= FLD_VAL(timings->hsw - hsw_offset, 7, 0);
 	hdmi_write_reg(wp->base, HDMI_WP_VIDEO_TIMING_H, timing_h);
 
 	timing_v |= FLD_VAL(timings->vbp, 31, 20);
@@ -187,8 +199,6 @@ void hdmi_wp_init_vid_fmt_timings(struct hdmi_video_format *video_fmt,
 	video_fmt->packing_mode = HDMI_PACK_10b_RGB_YUV444;
 	video_fmt->y_res = param->timings.y_res;
 	video_fmt->x_res = param->timings.x_res;
-	if (param->timings.interlace)
-		video_fmt->y_res /= 2;
 
 	timings->hbp = param->timings.hbp;
 	timings->hfp = param->timings.hfp;
@@ -196,9 +206,25 @@ void hdmi_wp_init_vid_fmt_timings(struct hdmi_video_format *video_fmt,
 	timings->vbp = param->timings.vbp;
 	timings->vfp = param->timings.vfp;
 	timings->vsw = param->timings.vsw;
+
 	timings->vsync_level = param->timings.vsync_level;
 	timings->hsync_level = param->timings.hsync_level;
 	timings->interlace = param->timings.interlace;
+	timings->double_pixel = param->timings.double_pixel;
+
+	if (param->timings.interlace) {
+		video_fmt->y_res /= 2;
+		timings->vbp /= 2;
+		timings->vfp /= 2;
+		timings->vsw /= 2;
+	}
+
+	if (param->timings.double_pixel) {
+		video_fmt->x_res *= 2;
+		timings->hfp *= 2;
+		timings->hsw *= 2;
+		timings->hbp *= 2;
+	}
 }
 
 void hdmi_wp_audio_config_format(struct hdmi_wp_data *wp,
diff --git a/drivers/gpu/drm/omapdrm/dss/manager-sysfs.c b/drivers/gpu/drm/omapdrm/dss/manager-sysfs.c
deleted file mode 100644
index a7414fb12830..000000000000
--- a/drivers/gpu/drm/omapdrm/dss/manager-sysfs.c
+++ /dev/null
@@ -1,531 +0,0 @@
-/*
- * Copyright (C) 2009 Nokia Corporation
- * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
- *
- * Some code and ideas taken from drivers/video/omap/ driver
- * by Imre Deak.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define DSS_SUBSYS_NAME "MANAGER"
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/jiffies.h>
-
-#include <video/omapdss.h>
-
-#include "dss.h"
-#include "dss_features.h"
-
-static ssize_t manager_name_show(struct omap_overlay_manager *mgr, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%s\n", mgr->name);
-}
-
-static ssize_t manager_display_show(struct omap_overlay_manager *mgr, char *buf)
-{
-	struct omap_dss_device *dssdev = mgr->get_device(mgr);
-
-	return snprintf(buf, PAGE_SIZE, "%s\n", dssdev ?
-			dssdev->name : "<none>");
-}
-
-static int manager_display_match(struct omap_dss_device *dssdev, void *data)
-{
-	const char *str = data;
-
-	return sysfs_streq(dssdev->name, str);
-}
-
-static ssize_t manager_display_store(struct omap_overlay_manager *mgr,
-		const char *buf, size_t size)
-{
-	int r = 0;
-	size_t len = size;
-	struct omap_dss_device *dssdev = NULL;
-	struct omap_dss_device *old_dssdev;
-
-	if (buf[size-1] == '\n')
-		--len;
-
-	if (len > 0)
-		dssdev = omap_dss_find_device((void *)buf,
-			manager_display_match);
-
-	if (len > 0 && dssdev == NULL)
-		return -EINVAL;
-
-	if (dssdev) {
-		DSSDBG("display %s found\n", dssdev->name);
-
-		if (omapdss_device_is_connected(dssdev)) {
-			DSSERR("new display is already connected\n");
-			r = -EINVAL;
-			goto put_device;
-		}
-
-		if (omapdss_device_is_enabled(dssdev)) {
-			DSSERR("new display is not disabled\n");
-			r = -EINVAL;
-			goto put_device;
-		}
-	}
-
-	old_dssdev = mgr->get_device(mgr);
-	if (old_dssdev) {
-		if (omapdss_device_is_enabled(old_dssdev)) {
-			DSSERR("old display is not disabled\n");
-			r = -EINVAL;
-			goto put_device;
-		}
-
-		old_dssdev->driver->disconnect(old_dssdev);
-	}
-
-	if (dssdev) {
-		r = dssdev->driver->connect(dssdev);
-		if (r) {
-			DSSERR("failed to connect new device\n");
-			goto put_device;
-		}
-
-		old_dssdev = mgr->get_device(mgr);
-		if (old_dssdev != dssdev) {
-			DSSERR("failed to connect device to this manager\n");
-			dssdev->driver->disconnect(dssdev);
-			goto put_device;
-		}
-
-		r = mgr->apply(mgr);
-		if (r) {
-			DSSERR("failed to apply dispc config\n");
-			goto put_device;
-		}
-	}
-
-put_device:
-	if (dssdev)
-		omap_dss_put_device(dssdev);
-
-	return r ? r : size;
-}
-
-static ssize_t manager_default_color_show(struct omap_overlay_manager *mgr,
-					  char *buf)
-{
-	struct omap_overlay_manager_info info;
-
-	mgr->get_manager_info(mgr, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%#x\n", info.default_color);
-}
-
-static ssize_t manager_default_color_store(struct omap_overlay_manager *mgr,
-					   const char *buf, size_t size)
-{
-	struct omap_overlay_manager_info info;
-	u32 color;
-	int r;
-
-	r = kstrtouint(buf, 0, &color);
-	if (r)
-		return r;
-
-	mgr->get_manager_info(mgr, &info);
-
-	info.default_color = color;
-
-	r = mgr->set_manager_info(mgr, &info);
-	if (r)
-		return r;
-
-	r = mgr->apply(mgr);
-	if (r)
-		return r;
-
-	return size;
-}
-
-static const char *trans_key_type_str[] = {
-	"gfx-destination",
-	"video-source",
-};
-
-static ssize_t manager_trans_key_type_show(struct omap_overlay_manager *mgr,
-					   char *buf)
-{
-	enum omap_dss_trans_key_type key_type;
-	struct omap_overlay_manager_info info;
-
-	mgr->get_manager_info(mgr, &info);
-
-	key_type = info.trans_key_type;
-	BUG_ON(key_type >= ARRAY_SIZE(trans_key_type_str));
-
-	return snprintf(buf, PAGE_SIZE, "%s\n", trans_key_type_str[key_type]);
-}
-
-static ssize_t manager_trans_key_type_store(struct omap_overlay_manager *mgr,
-					    const char *buf, size_t size)
-{
-	enum omap_dss_trans_key_type key_type;
-	struct omap_overlay_manager_info info;
-	int r;
-
-	for (key_type = OMAP_DSS_COLOR_KEY_GFX_DST;
-			key_type < ARRAY_SIZE(trans_key_type_str); key_type++) {
-		if (sysfs_streq(buf, trans_key_type_str[key_type]))
-			break;
-	}
-
-	if (key_type == ARRAY_SIZE(trans_key_type_str))
-		return -EINVAL;
-
-	mgr->get_manager_info(mgr, &info);
-
-	info.trans_key_type = key_type;
-
-	r = mgr->set_manager_info(mgr, &info);
-	if (r)
-		return r;
-
-	r = mgr->apply(mgr);
-	if (r)
-		return r;
-
-	return size;
-}
-
-static ssize_t manager_trans_key_value_show(struct omap_overlay_manager *mgr,
-					    char *buf)
-{
-	struct omap_overlay_manager_info info;
-
-	mgr->get_manager_info(mgr, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%#x\n", info.trans_key);
-}
-
-static ssize_t manager_trans_key_value_store(struct omap_overlay_manager *mgr,
-					     const char *buf, size_t size)
-{
-	struct omap_overlay_manager_info info;
-	u32 key_value;
-	int r;
-
-	r = kstrtouint(buf, 0, &key_value);
-	if (r)
-		return r;
-
-	mgr->get_manager_info(mgr, &info);
-
-	info.trans_key = key_value;
-
-	r = mgr->set_manager_info(mgr, &info);
-	if (r)
-		return r;
-
-	r = mgr->apply(mgr);
-	if (r)
-		return r;
-
-	return size;
-}
-
-static ssize_t manager_trans_key_enabled_show(struct omap_overlay_manager *mgr,
-					      char *buf)
-{
-	struct omap_overlay_manager_info info;
-
-	mgr->get_manager_info(mgr, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", info.trans_enabled);
-}
-
-static ssize_t manager_trans_key_enabled_store(struct omap_overlay_manager *mgr,
-					       const char *buf, size_t size)
-{
-	struct omap_overlay_manager_info info;
-	bool enable;
-	int r;
-
-	r = strtobool(buf, &enable);
-	if (r)
-		return r;
-
-	mgr->get_manager_info(mgr, &info);
-
-	info.trans_enabled = enable;
-
-	r = mgr->set_manager_info(mgr, &info);
-	if (r)
-		return r;
-
-	r = mgr->apply(mgr);
-	if (r)
-		return r;
-
-	return size;
-}
-
-static ssize_t manager_alpha_blending_enabled_show(
-		struct omap_overlay_manager *mgr, char *buf)
-{
-	struct omap_overlay_manager_info info;
-
-	if(!dss_has_feature(FEAT_ALPHA_FIXED_ZORDER))
-		return -ENODEV;
-
-	mgr->get_manager_info(mgr, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-		info.partial_alpha_enabled);
-}
-
-static ssize_t manager_alpha_blending_enabled_store(
-		struct omap_overlay_manager *mgr,
-		const char *buf, size_t size)
-{
-	struct omap_overlay_manager_info info;
-	bool enable;
-	int r;
-
-	if(!dss_has_feature(FEAT_ALPHA_FIXED_ZORDER))
-		return -ENODEV;
-
-	r = strtobool(buf, &enable);
-	if (r)
-		return r;
-
-	mgr->get_manager_info(mgr, &info);
-
-	info.partial_alpha_enabled = enable;
-
-	r = mgr->set_manager_info(mgr, &info);
-	if (r)
-		return r;
-
-	r = mgr->apply(mgr);
-	if (r)
-		return r;
-
-	return size;
-}
-
-static ssize_t manager_cpr_enable_show(struct omap_overlay_manager *mgr,
-		char *buf)
-{
-	struct omap_overlay_manager_info info;
-
-	mgr->get_manager_info(mgr, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", info.cpr_enable);
-}
-
-static ssize_t manager_cpr_enable_store(struct omap_overlay_manager *mgr,
-		const char *buf, size_t size)
-{
-	struct omap_overlay_manager_info info;
-	int r;
-	bool enable;
-
-	if (!dss_has_feature(FEAT_CPR))
-		return -ENODEV;
-
-	r = strtobool(buf, &enable);
-	if (r)
-		return r;
-
-	mgr->get_manager_info(mgr, &info);
-
-	if (info.cpr_enable == enable)
-		return size;
-
-	info.cpr_enable = enable;
-
-	r = mgr->set_manager_info(mgr, &info);
-	if (r)
-		return r;
-
-	r = mgr->apply(mgr);
-	if (r)
-		return r;
-
-	return size;
-}
-
-static ssize_t manager_cpr_coef_show(struct omap_overlay_manager *mgr,
-		char *buf)
-{
-	struct omap_overlay_manager_info info;
-
-	mgr->get_manager_info(mgr, &info);
-
-	return snprintf(buf, PAGE_SIZE,
-			"%d %d %d %d %d %d %d %d %d\n",
-			info.cpr_coefs.rr,
-			info.cpr_coefs.rg,
-			info.cpr_coefs.rb,
-			info.cpr_coefs.gr,
-			info.cpr_coefs.gg,
-			info.cpr_coefs.gb,
-			info.cpr_coefs.br,
-			info.cpr_coefs.bg,
-			info.cpr_coefs.bb);
-}
-
-static ssize_t manager_cpr_coef_store(struct omap_overlay_manager *mgr,
-		const char *buf, size_t size)
-{
-	struct omap_overlay_manager_info info;
-	struct omap_dss_cpr_coefs coefs;
-	int r, i;
-	s16 *arr;
-
-	if (!dss_has_feature(FEAT_CPR))
-		return -ENODEV;
-
-	if (sscanf(buf, "%hd %hd %hd %hd %hd %hd %hd %hd %hd",
-				&coefs.rr, &coefs.rg, &coefs.rb,
-				&coefs.gr, &coefs.gg, &coefs.gb,
-				&coefs.br, &coefs.bg, &coefs.bb) != 9)
-		return -EINVAL;
-
-	arr = (s16[]){ coefs.rr, coefs.rg, coefs.rb,
-		coefs.gr, coefs.gg, coefs.gb,
-		coefs.br, coefs.bg, coefs.bb };
-
-	for (i = 0; i < 9; ++i) {
-		if (arr[i] < -512 || arr[i] > 511)
-			return -EINVAL;
-	}
-
-	mgr->get_manager_info(mgr, &info);
-
-	info.cpr_coefs = coefs;
-
-	r = mgr->set_manager_info(mgr, &info);
-	if (r)
-		return r;
-
-	r = mgr->apply(mgr);
-	if (r)
-		return r;
-
-	return size;
-}
-
-struct manager_attribute {
-	struct attribute attr;
-	ssize_t (*show)(struct omap_overlay_manager *, char *);
-	ssize_t	(*store)(struct omap_overlay_manager *, const char *, size_t);
-};
-
-#define MANAGER_ATTR(_name, _mode, _show, _store) \
-	struct manager_attribute manager_attr_##_name = \
-	__ATTR(_name, _mode, _show, _store)
-
-static MANAGER_ATTR(name, S_IRUGO, manager_name_show, NULL);
-static MANAGER_ATTR(display, S_IRUGO|S_IWUSR,
-		manager_display_show, manager_display_store);
-static MANAGER_ATTR(default_color, S_IRUGO|S_IWUSR,
-		manager_default_color_show, manager_default_color_store);
-static MANAGER_ATTR(trans_key_type, S_IRUGO|S_IWUSR,
-		manager_trans_key_type_show, manager_trans_key_type_store);
-static MANAGER_ATTR(trans_key_value, S_IRUGO|S_IWUSR,
-		manager_trans_key_value_show, manager_trans_key_value_store);
-static MANAGER_ATTR(trans_key_enabled, S_IRUGO|S_IWUSR,
-		manager_trans_key_enabled_show,
-		manager_trans_key_enabled_store);
-static MANAGER_ATTR(alpha_blending_enabled, S_IRUGO|S_IWUSR,
-		manager_alpha_blending_enabled_show,
-		manager_alpha_blending_enabled_store);
-static MANAGER_ATTR(cpr_enable, S_IRUGO|S_IWUSR,
-		manager_cpr_enable_show,
-		manager_cpr_enable_store);
-static MANAGER_ATTR(cpr_coef, S_IRUGO|S_IWUSR,
-		manager_cpr_coef_show,
-		manager_cpr_coef_store);
-
-
-static struct attribute *manager_sysfs_attrs[] = {
-	&manager_attr_name.attr,
-	&manager_attr_display.attr,
-	&manager_attr_default_color.attr,
-	&manager_attr_trans_key_type.attr,
-	&manager_attr_trans_key_value.attr,
-	&manager_attr_trans_key_enabled.attr,
-	&manager_attr_alpha_blending_enabled.attr,
-	&manager_attr_cpr_enable.attr,
-	&manager_attr_cpr_coef.attr,
-	NULL
-};
-
-static ssize_t manager_attr_show(struct kobject *kobj, struct attribute *attr,
-		char *buf)
-{
-	struct omap_overlay_manager *manager;
-	struct manager_attribute *manager_attr;
-
-	manager = container_of(kobj, struct omap_overlay_manager, kobj);
-	manager_attr = container_of(attr, struct manager_attribute, attr);
-
-	if (!manager_attr->show)
-		return -ENOENT;
-
-	return manager_attr->show(manager, buf);
-}
-
-static ssize_t manager_attr_store(struct kobject *kobj, struct attribute *attr,
-		const char *buf, size_t size)
-{
-	struct omap_overlay_manager *manager;
-	struct manager_attribute *manager_attr;
-
-	manager = container_of(kobj, struct omap_overlay_manager, kobj);
-	manager_attr = container_of(attr, struct manager_attribute, attr);
-
-	if (!manager_attr->store)
-		return -ENOENT;
-
-	return manager_attr->store(manager, buf, size);
-}
-
-static const struct sysfs_ops manager_sysfs_ops = {
-	.show = manager_attr_show,
-	.store = manager_attr_store,
-};
-
-static struct kobj_type manager_ktype = {
-	.sysfs_ops = &manager_sysfs_ops,
-	.default_attrs = manager_sysfs_attrs,
-};
-
-int dss_manager_kobj_init(struct omap_overlay_manager *mgr,
-		struct platform_device *pdev)
-{
-	return kobject_init_and_add(&mgr->kobj, &manager_ktype,
-			&pdev->dev.kobj, "manager%d", mgr->id);
-}
-
-void dss_manager_kobj_uninit(struct omap_overlay_manager *mgr)
-{
-	kobject_del(&mgr->kobj);
-	kobject_put(&mgr->kobj);
-
-	memset(&mgr->kobj, 0, sizeof(mgr->kobj));
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/manager.c b/drivers/gpu/drm/omapdrm/dss/manager.c
deleted file mode 100644
index 08a67f4f6a20..000000000000
--- a/drivers/gpu/drm/omapdrm/dss/manager.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * linux/drivers/video/omap2/dss/manager.c
- *
- * Copyright (C) 2009 Nokia Corporation
- * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
- *
- * Some code and ideas taken from drivers/video/omap/ driver
- * by Imre Deak.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define DSS_SUBSYS_NAME "MANAGER"
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/jiffies.h>
-
-#include <video/omapdss.h>
-
-#include "dss.h"
-#include "dss_features.h"
-
-static int num_managers;
-static struct omap_overlay_manager *managers;
-
-int dss_init_overlay_managers(void)
-{
-	int i;
-
-	num_managers = dss_feat_get_num_mgrs();
-
-	managers = kzalloc(sizeof(struct omap_overlay_manager) * num_managers,
-			GFP_KERNEL);
-
-	BUG_ON(managers == NULL);
-
-	for (i = 0; i < num_managers; ++i) {
-		struct omap_overlay_manager *mgr = &managers[i];
-
-		switch (i) {
-		case 0:
-			mgr->name = "lcd";
-			mgr->id = OMAP_DSS_CHANNEL_LCD;
-			break;
-		case 1:
-			mgr->name = "tv";
-			mgr->id = OMAP_DSS_CHANNEL_DIGIT;
-			break;
-		case 2:
-			mgr->name = "lcd2";
-			mgr->id = OMAP_DSS_CHANNEL_LCD2;
-			break;
-		case 3:
-			mgr->name = "lcd3";
-			mgr->id = OMAP_DSS_CHANNEL_LCD3;
-			break;
-		}
-
-		mgr->caps = 0;
-		mgr->supported_displays =
-			dss_feat_get_supported_displays(mgr->id);
-		mgr->supported_outputs =
-			dss_feat_get_supported_outputs(mgr->id);
-
-		INIT_LIST_HEAD(&mgr->overlays);
-	}
-
-	return 0;
-}
-
-int dss_init_overlay_managers_sysfs(struct platform_device *pdev)
-{
-	int i, r;
-
-	for (i = 0; i < num_managers; ++i) {
-		struct omap_overlay_manager *mgr = &managers[i];
-
-		r = dss_manager_kobj_init(mgr, pdev);
-		if (r)
-			DSSERR("failed to create sysfs file\n");
-	}
-
-	return 0;
-}
-
-void dss_uninit_overlay_managers(void)
-{
-	kfree(managers);
-	managers = NULL;
-	num_managers = 0;
-}
-
-void dss_uninit_overlay_managers_sysfs(struct platform_device *pdev)
-{
-	int i;
-
-	for (i = 0; i < num_managers; ++i) {
-		struct omap_overlay_manager *mgr = &managers[i];
-
-		dss_manager_kobj_uninit(mgr);
-	}
-}
-
-int omap_dss_get_num_overlay_managers(void)
-{
-	return num_managers;
-}
-EXPORT_SYMBOL(omap_dss_get_num_overlay_managers);
-
-struct omap_overlay_manager *omap_dss_get_overlay_manager(int num)
-{
-	if (num >= num_managers)
-		return NULL;
-
-	return &managers[num];
-}
-EXPORT_SYMBOL(omap_dss_get_overlay_manager);
-
-int dss_mgr_simple_check(struct omap_overlay_manager *mgr,
-		const struct omap_overlay_manager_info *info)
-{
-	if (dss_has_feature(FEAT_ALPHA_FIXED_ZORDER)) {
-		/*
-		 * OMAP3 supports only graphics source transparency color key
-		 * and alpha blending simultaneously. See TRM 15.4.2.4.2.2
-		 * Alpha Mode.
-		 */
-		if (info->partial_alpha_enabled && info->trans_enabled
-			&& info->trans_key_type != OMAP_DSS_COLOR_KEY_GFX_DST) {
-			DSSERR("check_manager: illegal transparency key\n");
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
-static int dss_mgr_check_zorder(struct omap_overlay_manager *mgr,
-		struct omap_overlay_info **overlay_infos)
-{
-	struct omap_overlay *ovl1, *ovl2;
-	struct omap_overlay_info *info1, *info2;
-
-	list_for_each_entry(ovl1, &mgr->overlays, list) {
-		info1 = overlay_infos[ovl1->id];
-
-		if (info1 == NULL)
-			continue;
-
-		list_for_each_entry(ovl2, &mgr->overlays, list) {
-			if (ovl1 == ovl2)
-				continue;
-
-			info2 = overlay_infos[ovl2->id];
-
-			if (info2 == NULL)
-				continue;
-
-			if (info1->zorder == info2->zorder) {
-				DSSERR("overlays %d and %d have the same "
-						"zorder %d\n",
-					ovl1->id, ovl2->id, info1->zorder);
-				return -EINVAL;
-			}
-		}
-	}
-
-	return 0;
-}
-
-int dss_mgr_check_timings(struct omap_overlay_manager *mgr,
-		const struct omap_video_timings *timings)
-{
-	if (!dispc_mgr_timings_ok(mgr->id, timings)) {
-		DSSERR("check_manager: invalid timings\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int dss_mgr_check_lcd_config(struct omap_overlay_manager *mgr,
-		const struct dss_lcd_mgr_config *config)
-{
-	struct dispc_clock_info cinfo = config->clock_info;
-	int dl = config->video_port_width;
-	bool stallmode = config->stallmode;
-	bool fifohandcheck = config->fifohandcheck;
-
-	if (cinfo.lck_div < 1 || cinfo.lck_div > 255)
-		return -EINVAL;
-
-	if (cinfo.pck_div < 1 || cinfo.pck_div > 255)
-		return -EINVAL;
-
-	if (dl != 12 && dl != 16 && dl != 18 && dl != 24)
-		return -EINVAL;
-
-	/* fifohandcheck should be used only with stallmode */
-	if (!stallmode && fifohandcheck)
-		return -EINVAL;
-
-	/*
-	 * io pad mode can be only checked by using dssdev connected to the
-	 * manager. Ignore checking these for now, add checks when manager
-	 * is capable of holding information related to the connected interface
-	 */
-
-	return 0;
-}
-
-int dss_mgr_check(struct omap_overlay_manager *mgr,
-		struct omap_overlay_manager_info *info,
-		const struct omap_video_timings *mgr_timings,
-		const struct dss_lcd_mgr_config *lcd_config,
-		struct omap_overlay_info **overlay_infos)
-{
-	struct omap_overlay *ovl;
-	int r;
-
-	if (dss_has_feature(FEAT_ALPHA_FREE_ZORDER)) {
-		r = dss_mgr_check_zorder(mgr, overlay_infos);
-		if (r)
-			return r;
-	}
-
-	r = dss_mgr_check_timings(mgr, mgr_timings);
-	if (r)
-		return r;
-
-	r = dss_mgr_check_lcd_config(mgr, lcd_config);
-	if (r)
-		return r;
-
-	list_for_each_entry(ovl, &mgr->overlays, list) {
-		struct omap_overlay_info *oi;
-		int r;
-
-		oi = overlay_infos[ovl->id];
-
-		if (oi == NULL)
-			continue;
-
-		r = dss_ovl_check(ovl, oi, mgr_timings);
-		if (r)
-			return r;
-	}
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/omapdss.h b/drivers/gpu/drm/omapdrm/dss/omapdss.h
new file mode 100644
index 000000000000..d7e7c909bbc2
--- /dev/null
+++ b/drivers/gpu/drm/omapdrm/dss/omapdss.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2016 Texas Instruments
+ * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __OMAP_DRM_DSS_H
+#define __OMAP_DRM_DSS_H
+
+#include <video/omapdss.h>
+
+u32 dispc_read_irqstatus(void);
+void dispc_clear_irqstatus(u32 mask);
+u32 dispc_read_irqenable(void);
+void dispc_write_irqenable(u32 mask);
+
+int dispc_request_irq(irq_handler_t handler, void *dev_id);
+void dispc_free_irq(void *dev_id);
+
+int dispc_runtime_get(void);
+void dispc_runtime_put(void);
+
+void dispc_mgr_enable(enum omap_channel channel, bool enable);
+bool dispc_mgr_is_enabled(enum omap_channel channel);
+u32 dispc_mgr_get_vsync_irq(enum omap_channel channel);
+u32 dispc_mgr_get_framedone_irq(enum omap_channel channel);
+u32 dispc_mgr_get_sync_lost_irq(enum omap_channel channel);
+bool dispc_mgr_go_busy(enum omap_channel channel);
+void dispc_mgr_go(enum omap_channel channel);
+void dispc_mgr_set_lcd_config(enum omap_channel channel,
+		const struct dss_lcd_mgr_config *config);
+void dispc_mgr_set_timings(enum omap_channel channel,
+		const struct omap_video_timings *timings);
+void dispc_mgr_setup(enum omap_channel channel,
+		const struct omap_overlay_manager_info *info);
+
+int dispc_ovl_enable(enum omap_plane plane, bool enable);
+bool dispc_ovl_enabled(enum omap_plane plane);
+void dispc_ovl_set_channel_out(enum omap_plane plane,
+		enum omap_channel channel);
+int dispc_ovl_setup(enum omap_plane plane, const struct omap_overlay_info *oi,
+		bool replication, const struct omap_video_timings *mgr_timings,
+		bool mem_to_mem);
+
+enum omap_dss_output_id dispc_mgr_get_supported_outputs(enum omap_channel channel);
+
+struct dss_mgr_ops {
+	int (*connect)(enum omap_channel channel,
+		struct omap_dss_device *dst);
+	void (*disconnect)(enum omap_channel channel,
+		struct omap_dss_device *dst);
+
+	void (*start_update)(enum omap_channel channel);
+	int (*enable)(enum omap_channel channel);
+	void (*disable)(enum omap_channel channel);
+	void (*set_timings)(enum omap_channel channel,
+			const struct omap_video_timings *timings);
+	void (*set_lcd_config)(enum omap_channel channel,
+			const struct dss_lcd_mgr_config *config);
+	int (*register_framedone_handler)(enum omap_channel channel,
+			void (*handler)(void *), void *data);
+	void (*unregister_framedone_handler)(enum omap_channel channel,
+			void (*handler)(void *), void *data);
+};
+
+int dss_install_mgr_ops(const struct dss_mgr_ops *mgr_ops);
+void dss_uninstall_mgr_ops(void);
+
+int dss_mgr_connect(enum omap_channel channel,
+		struct omap_dss_device *dst);
+void dss_mgr_disconnect(enum omap_channel channel,
+		struct omap_dss_device *dst);
+void dss_mgr_set_timings(enum omap_channel channel,
+		const struct omap_video_timings *timings);
+void dss_mgr_set_lcd_config(enum omap_channel channel,
+		const struct dss_lcd_mgr_config *config);
+int dss_mgr_enable(enum omap_channel channel);
+void dss_mgr_disable(enum omap_channel channel);
+void dss_mgr_start_update(enum omap_channel channel);
+int dss_mgr_register_framedone_handler(enum omap_channel channel,
+		void (*handler)(void *), void *data);
+void dss_mgr_unregister_framedone_handler(enum omap_channel channel,
+		void (*handler)(void *), void *data);
+
+#endif /* __OMAP_DRM_DSS_H */
diff --git a/drivers/gpu/drm/omapdrm/dss/output.c b/drivers/gpu/drm/omapdrm/dss/output.c
index 16072159bd24..829232ad8c81 100644
--- a/drivers/gpu/drm/omapdrm/dss/output.c
+++ b/drivers/gpu/drm/omapdrm/dss/output.c
@@ -169,24 +169,6 @@ struct omap_dss_device *omapdss_find_output_from_display(struct omap_dss_device
 }
 EXPORT_SYMBOL(omapdss_find_output_from_display);
 
-struct omap_overlay_manager *omapdss_find_mgr_from_display(struct omap_dss_device *dssdev)
-{
-	struct omap_dss_device *out;
-	struct omap_overlay_manager *mgr;
-
-	out = omapdss_find_output_from_display(dssdev);
-
-	if (out == NULL)
-		return NULL;
-
-	mgr = out->manager;
-
-	omap_dss_put_device(out);
-
-	return mgr;
-}
-EXPORT_SYMBOL(omapdss_find_mgr_from_display);
-
 static const struct dss_mgr_ops *dss_mgr_ops;
 
 int dss_install_mgr_ops(const struct dss_mgr_ops *mgr_ops)
@@ -206,62 +188,62 @@ void dss_uninstall_mgr_ops(void)
 }
 EXPORT_SYMBOL(dss_uninstall_mgr_ops);
 
-int dss_mgr_connect(struct omap_overlay_manager *mgr,
+int dss_mgr_connect(enum omap_channel channel,
 		struct omap_dss_device *dst)
 {
-	return dss_mgr_ops->connect(mgr, dst);
+	return dss_mgr_ops->connect(channel, dst);
 }
 EXPORT_SYMBOL(dss_mgr_connect);
 
-void dss_mgr_disconnect(struct omap_overlay_manager *mgr,
+void dss_mgr_disconnect(enum omap_channel channel,
 		struct omap_dss_device *dst)
 {
-	dss_mgr_ops->disconnect(mgr, dst);
+	dss_mgr_ops->disconnect(channel, dst);
 }
 EXPORT_SYMBOL(dss_mgr_disconnect);
 
-void dss_mgr_set_timings(struct omap_overlay_manager *mgr,
+void dss_mgr_set_timings(enum omap_channel channel,
 		const struct omap_video_timings *timings)
 {
-	dss_mgr_ops->set_timings(mgr, timings);
+	dss_mgr_ops->set_timings(channel, timings);
 }
 EXPORT_SYMBOL(dss_mgr_set_timings);
 
-void dss_mgr_set_lcd_config(struct omap_overlay_manager *mgr,
+void dss_mgr_set_lcd_config(enum omap_channel channel,
 		const struct dss_lcd_mgr_config *config)
 {
-	dss_mgr_ops->set_lcd_config(mgr, config);
+	dss_mgr_ops->set_lcd_config(channel, config);
 }
 EXPORT_SYMBOL(dss_mgr_set_lcd_config);
 
-int dss_mgr_enable(struct omap_overlay_manager *mgr)
+int dss_mgr_enable(enum omap_channel channel)
 {
-	return dss_mgr_ops->enable(mgr);
+	return dss_mgr_ops->enable(channel);
 }
 EXPORT_SYMBOL(dss_mgr_enable);
 
-void dss_mgr_disable(struct omap_overlay_manager *mgr)
+void dss_mgr_disable(enum omap_channel channel)
 {
-	dss_mgr_ops->disable(mgr);
+	dss_mgr_ops->disable(channel);
 }
 EXPORT_SYMBOL(dss_mgr_disable);
 
-void dss_mgr_start_update(struct omap_overlay_manager *mgr)
+void dss_mgr_start_update(enum omap_channel channel)
 {
-	dss_mgr_ops->start_update(mgr);
+	dss_mgr_ops->start_update(channel);
 }
 EXPORT_SYMBOL(dss_mgr_start_update);
 
-int dss_mgr_register_framedone_handler(struct omap_overlay_manager *mgr,
+int dss_mgr_register_framedone_handler(enum omap_channel channel,
 		void (*handler)(void *), void *data)
 {
-	return dss_mgr_ops->register_framedone_handler(mgr, handler, data);
+	return dss_mgr_ops->register_framedone_handler(channel, handler, data);
 }
 EXPORT_SYMBOL(dss_mgr_register_framedone_handler);
 
-void dss_mgr_unregister_framedone_handler(struct omap_overlay_manager *mgr,
+void dss_mgr_unregister_framedone_handler(enum omap_channel channel,
 		void (*handler)(void *), void *data)
 {
-	dss_mgr_ops->unregister_framedone_handler(mgr, handler, data);
+	dss_mgr_ops->unregister_framedone_handler(channel, handler, data);
 }
 EXPORT_SYMBOL(dss_mgr_unregister_framedone_handler);
diff --git a/drivers/gpu/drm/omapdrm/dss/overlay-sysfs.c b/drivers/gpu/drm/omapdrm/dss/overlay-sysfs.c
deleted file mode 100644
index 4cc5ddebfb34..000000000000
--- a/drivers/gpu/drm/omapdrm/dss/overlay-sysfs.c
+++ /dev/null
@@ -1,456 +0,0 @@
-/*
- * Copyright (C) 2009 Nokia Corporation
- * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
- *
- * Some code and ideas taken from drivers/video/omap/ driver
- * by Imre Deak.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define DSS_SUBSYS_NAME "OVERLAY"
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/sysfs.h>
-#include <linux/kobject.h>
-#include <linux/platform_device.h>
-
-#include <video/omapdss.h>
-
-#include "dss.h"
-#include "dss_features.h"
-
-static ssize_t overlay_name_show(struct omap_overlay *ovl, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%s\n", ovl->name);
-}
-
-static ssize_t overlay_manager_show(struct omap_overlay *ovl, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%s\n",
-			ovl->manager ? ovl->manager->name : "<none>");
-}
-
-static ssize_t overlay_manager_store(struct omap_overlay *ovl, const char *buf,
-		size_t size)
-{
-	int i, r;
-	struct omap_overlay_manager *mgr = NULL;
-	struct omap_overlay_manager *old_mgr;
-	int len = size;
-
-	if (buf[size-1] == '\n')
-		--len;
-
-	if (len > 0) {
-		for (i = 0; i < omap_dss_get_num_overlay_managers(); ++i) {
-			mgr = omap_dss_get_overlay_manager(i);
-
-			if (sysfs_streq(buf, mgr->name))
-				break;
-
-			mgr = NULL;
-		}
-	}
-
-	if (len > 0 && mgr == NULL)
-		return -EINVAL;
-
-	if (mgr)
-		DSSDBG("manager %s found\n", mgr->name);
-
-	if (mgr == ovl->manager)
-		return size;
-
-	old_mgr = ovl->manager;
-
-	r = dispc_runtime_get();
-	if (r)
-		return r;
-
-	/* detach old manager */
-	if (old_mgr) {
-		r = ovl->unset_manager(ovl);
-		if (r) {
-			DSSERR("detach failed\n");
-			goto err;
-		}
-
-		r = old_mgr->apply(old_mgr);
-		if (r)
-			goto err;
-	}
-
-	if (mgr) {
-		r = ovl->set_manager(ovl, mgr);
-		if (r) {
-			DSSERR("Failed to attach overlay\n");
-			goto err;
-		}
-
-		r = mgr->apply(mgr);
-		if (r)
-			goto err;
-	}
-
-	dispc_runtime_put();
-
-	return size;
-
-err:
-	dispc_runtime_put();
-	return r;
-}
-
-static ssize_t overlay_input_size_show(struct omap_overlay *ovl, char *buf)
-{
-	struct omap_overlay_info info;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%d,%d\n",
-			info.width, info.height);
-}
-
-static ssize_t overlay_screen_width_show(struct omap_overlay *ovl, char *buf)
-{
-	struct omap_overlay_info info;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", info.screen_width);
-}
-
-static ssize_t overlay_position_show(struct omap_overlay *ovl, char *buf)
-{
-	struct omap_overlay_info info;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%d,%d\n",
-			info.pos_x, info.pos_y);
-}
-
-static ssize_t overlay_position_store(struct omap_overlay *ovl,
-		const char *buf, size_t size)
-{
-	int r;
-	char *last;
-	struct omap_overlay_info info;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	info.pos_x = simple_strtoul(buf, &last, 10);
-	++last;
-	if (last - buf >= size)
-		return -EINVAL;
-
-	info.pos_y = simple_strtoul(last, &last, 10);
-
-	r = ovl->set_overlay_info(ovl, &info);
-	if (r)
-		return r;
-
-	if (ovl->manager) {
-		r = ovl->manager->apply(ovl->manager);
-		if (r)
-			return r;
-	}
-
-	return size;
-}
-
-static ssize_t overlay_output_size_show(struct omap_overlay *ovl, char *buf)
-{
-	struct omap_overlay_info info;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%d,%d\n",
-			info.out_width, info.out_height);
-}
-
-static ssize_t overlay_output_size_store(struct omap_overlay *ovl,
-		const char *buf, size_t size)
-{
-	int r;
-	char *last;
-	struct omap_overlay_info info;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	info.out_width = simple_strtoul(buf, &last, 10);
-	++last;
-	if (last - buf >= size)
-		return -EINVAL;
-
-	info.out_height = simple_strtoul(last, &last, 10);
-
-	r = ovl->set_overlay_info(ovl, &info);
-	if (r)
-		return r;
-
-	if (ovl->manager) {
-		r = ovl->manager->apply(ovl->manager);
-		if (r)
-			return r;
-	}
-
-	return size;
-}
-
-static ssize_t overlay_enabled_show(struct omap_overlay *ovl, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%d\n", ovl->is_enabled(ovl));
-}
-
-static ssize_t overlay_enabled_store(struct omap_overlay *ovl, const char *buf,
-		size_t size)
-{
-	int r;
-	bool enable;
-
-	r = strtobool(buf, &enable);
-	if (r)
-		return r;
-
-	if (enable)
-		r = ovl->enable(ovl);
-	else
-		r = ovl->disable(ovl);
-
-	if (r)
-		return r;
-
-	return size;
-}
-
-static ssize_t overlay_global_alpha_show(struct omap_overlay *ovl, char *buf)
-{
-	struct omap_overlay_info info;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-			info.global_alpha);
-}
-
-static ssize_t overlay_global_alpha_store(struct omap_overlay *ovl,
-		const char *buf, size_t size)
-{
-	int r;
-	u8 alpha;
-	struct omap_overlay_info info;
-
-	if ((ovl->caps & OMAP_DSS_OVL_CAP_GLOBAL_ALPHA) == 0)
-		return -ENODEV;
-
-	r = kstrtou8(buf, 0, &alpha);
-	if (r)
-		return r;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	info.global_alpha = alpha;
-
-	r = ovl->set_overlay_info(ovl, &info);
-	if (r)
-		return r;
-
-	if (ovl->manager) {
-		r = ovl->manager->apply(ovl->manager);
-		if (r)
-			return r;
-	}
-
-	return size;
-}
-
-static ssize_t overlay_pre_mult_alpha_show(struct omap_overlay *ovl,
-		char *buf)
-{
-	struct omap_overlay_info info;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-			info.pre_mult_alpha);
-}
-
-static ssize_t overlay_pre_mult_alpha_store(struct omap_overlay *ovl,
-		const char *buf, size_t size)
-{
-	int r;
-	u8 alpha;
-	struct omap_overlay_info info;
-
-	if ((ovl->caps & OMAP_DSS_OVL_CAP_PRE_MULT_ALPHA) == 0)
-		return -ENODEV;
-
-	r = kstrtou8(buf, 0, &alpha);
-	if (r)
-		return r;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	info.pre_mult_alpha = alpha;
-
-	r = ovl->set_overlay_info(ovl, &info);
-	if (r)
-		return r;
-
-	if (ovl->manager) {
-		r = ovl->manager->apply(ovl->manager);
-		if (r)
-			return r;
-	}
-
-	return size;
-}
-
-static ssize_t overlay_zorder_show(struct omap_overlay *ovl, char *buf)
-{
-	struct omap_overlay_info info;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", info.zorder);
-}
-
-static ssize_t overlay_zorder_store(struct omap_overlay *ovl,
-		const char *buf, size_t size)
-{
-	int r;
-	u8 zorder;
-	struct omap_overlay_info info;
-
-	if ((ovl->caps & OMAP_DSS_OVL_CAP_ZORDER) == 0)
-		return -ENODEV;
-
-	r = kstrtou8(buf, 0, &zorder);
-	if (r)
-		return r;
-
-	ovl->get_overlay_info(ovl, &info);
-
-	info.zorder = zorder;
-
-	r = ovl->set_overlay_info(ovl, &info);
-	if (r)
-		return r;
-
-	if (ovl->manager) {
-		r = ovl->manager->apply(ovl->manager);
-		if (r)
-			return r;
-	}
-
-	return size;
-}
-
-struct overlay_attribute {
-	struct attribute attr;
-	ssize_t (*show)(struct omap_overlay *, char *);
-	ssize_t	(*store)(struct omap_overlay *, const char *, size_t);
-};
-
-#define OVERLAY_ATTR(_name, _mode, _show, _store) \
-	struct overlay_attribute overlay_attr_##_name = \
-	__ATTR(_name, _mode, _show, _store)
-
-static OVERLAY_ATTR(name, S_IRUGO, overlay_name_show, NULL);
-static OVERLAY_ATTR(manager, S_IRUGO|S_IWUSR,
-		overlay_manager_show, overlay_manager_store);
-static OVERLAY_ATTR(input_size, S_IRUGO, overlay_input_size_show, NULL);
-static OVERLAY_ATTR(screen_width, S_IRUGO, overlay_screen_width_show, NULL);
-static OVERLAY_ATTR(position, S_IRUGO|S_IWUSR,
-		overlay_position_show, overlay_position_store);
-static OVERLAY_ATTR(output_size, S_IRUGO|S_IWUSR,
-		overlay_output_size_show, overlay_output_size_store);
-static OVERLAY_ATTR(enabled, S_IRUGO|S_IWUSR,
-		overlay_enabled_show, overlay_enabled_store);
-static OVERLAY_ATTR(global_alpha, S_IRUGO|S_IWUSR,
-		overlay_global_alpha_show, overlay_global_alpha_store);
-static OVERLAY_ATTR(pre_mult_alpha, S_IRUGO|S_IWUSR,
-		overlay_pre_mult_alpha_show,
-		overlay_pre_mult_alpha_store);
-static OVERLAY_ATTR(zorder, S_IRUGO|S_IWUSR,
-		overlay_zorder_show, overlay_zorder_store);
-
-static struct attribute *overlay_sysfs_attrs[] = {
-	&overlay_attr_name.attr,
-	&overlay_attr_manager.attr,
-	&overlay_attr_input_size.attr,
-	&overlay_attr_screen_width.attr,
-	&overlay_attr_position.attr,
-	&overlay_attr_output_size.attr,
-	&overlay_attr_enabled.attr,
-	&overlay_attr_global_alpha.attr,
-	&overlay_attr_pre_mult_alpha.attr,
-	&overlay_attr_zorder.attr,
-	NULL
-};
-
-static ssize_t overlay_attr_show(struct kobject *kobj, struct attribute *attr,
-		char *buf)
-{
-	struct omap_overlay *overlay;
-	struct overlay_attribute *overlay_attr;
-
-	overlay = container_of(kobj, struct omap_overlay, kobj);
-	overlay_attr = container_of(attr, struct overlay_attribute, attr);
-
-	if (!overlay_attr->show)
-		return -ENOENT;
-
-	return overlay_attr->show(overlay, buf);
-}
-
-static ssize_t overlay_attr_store(struct kobject *kobj, struct attribute *attr,
-		const char *buf, size_t size)
-{
-	struct omap_overlay *overlay;
-	struct overlay_attribute *overlay_attr;
-
-	overlay = container_of(kobj, struct omap_overlay, kobj);
-	overlay_attr = container_of(attr, struct overlay_attribute, attr);
-
-	if (!overlay_attr->store)
-		return -ENOENT;
-
-	return overlay_attr->store(overlay, buf, size);
-}
-
-static const struct sysfs_ops overlay_sysfs_ops = {
-	.show = overlay_attr_show,
-	.store = overlay_attr_store,
-};
-
-static struct kobj_type overlay_ktype = {
-	.sysfs_ops = &overlay_sysfs_ops,
-	.default_attrs = overlay_sysfs_attrs,
-};
-
-int dss_overlay_kobj_init(struct omap_overlay *ovl,
-		struct platform_device *pdev)
-{
-	return kobject_init_and_add(&ovl->kobj, &overlay_ktype,
-			&pdev->dev.kobj, "overlay%d", ovl->id);
-}
-
-void dss_overlay_kobj_uninit(struct omap_overlay *ovl)
-{
-	kobject_del(&ovl->kobj);
-	kobject_put(&ovl->kobj);
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/overlay.c b/drivers/gpu/drm/omapdrm/dss/overlay.c
deleted file mode 100644
index 2f7cee985cdd..000000000000
--- a/drivers/gpu/drm/omapdrm/dss/overlay.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * linux/drivers/video/omap2/dss/overlay.c
- *
- * Copyright (C) 2009 Nokia Corporation
- * Author: Tomi Valkeinen <tomi.valkeinen@nokia.com>
- *
- * Some code and ideas taken from drivers/video/omap/ driver
- * by Imre Deak.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define DSS_SUBSYS_NAME "OVERLAY"
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/sysfs.h>
-#include <linux/platform_device.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-
-#include <video/omapdss.h>
-
-#include "dss.h"
-#include "dss_features.h"
-
-static int num_overlays;
-static struct omap_overlay *overlays;
-
-int omap_dss_get_num_overlays(void)
-{
-	return num_overlays;
-}
-EXPORT_SYMBOL(omap_dss_get_num_overlays);
-
-struct omap_overlay *omap_dss_get_overlay(int num)
-{
-	if (num >= num_overlays)
-		return NULL;
-
-	return &overlays[num];
-}
-EXPORT_SYMBOL(omap_dss_get_overlay);
-
-void dss_init_overlays(struct platform_device *pdev)
-{
-	int i, r;
-
-	num_overlays = dss_feat_get_num_ovls();
-
-	overlays = kzalloc(sizeof(struct omap_overlay) * num_overlays,
-			GFP_KERNEL);
-
-	BUG_ON(overlays == NULL);
-
-	for (i = 0; i < num_overlays; ++i) {
-		struct omap_overlay *ovl = &overlays[i];
-
-		switch (i) {
-		case 0:
-			ovl->name = "gfx";
-			ovl->id = OMAP_DSS_GFX;
-			break;
-		case 1:
-			ovl->name = "vid1";
-			ovl->id = OMAP_DSS_VIDEO1;
-			break;
-		case 2:
-			ovl->name = "vid2";
-			ovl->id = OMAP_DSS_VIDEO2;
-			break;
-		case 3:
-			ovl->name = "vid3";
-			ovl->id = OMAP_DSS_VIDEO3;
-			break;
-		}
-
-		ovl->caps = dss_feat_get_overlay_caps(ovl->id);
-		ovl->supported_modes =
-			dss_feat_get_supported_color_modes(ovl->id);
-
-		r = dss_overlay_kobj_init(ovl, pdev);
-		if (r)
-			DSSERR("failed to create sysfs file\n");
-	}
-}
-
-void dss_uninit_overlays(struct platform_device *pdev)
-{
-	int i;
-
-	for (i = 0; i < num_overlays; ++i) {
-		struct omap_overlay *ovl = &overlays[i];
-		dss_overlay_kobj_uninit(ovl);
-	}
-
-	kfree(overlays);
-	overlays = NULL;
-	num_overlays = 0;
-}
-
-int dss_ovl_simple_check(struct omap_overlay *ovl,
-		const struct omap_overlay_info *info)
-{
-	if ((ovl->caps & OMAP_DSS_OVL_CAP_SCALE) == 0) {
-		if (info->out_width != 0 && info->width != info->out_width) {
-			DSSERR("check_overlay: overlay %d doesn't support "
-					"scaling\n", ovl->id);
-			return -EINVAL;
-		}
-
-		if (info->out_height != 0 && info->height != info->out_height) {
-			DSSERR("check_overlay: overlay %d doesn't support "
-					"scaling\n", ovl->id);
-			return -EINVAL;
-		}
-	}
-
-	if ((ovl->supported_modes & info->color_mode) == 0) {
-		DSSERR("check_overlay: overlay %d doesn't support mode %d\n",
-				ovl->id, info->color_mode);
-		return -EINVAL;
-	}
-
-	if (info->zorder >= omap_dss_get_num_overlays()) {
-		DSSERR("check_overlay: zorder %d too high\n", info->zorder);
-		return -EINVAL;
-	}
-
-	if (dss_feat_rotation_type_supported(info->rotation_type) == 0) {
-		DSSERR("check_overlay: rotation type %d not supported\n",
-				info->rotation_type);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-int dss_ovl_check(struct omap_overlay *ovl, struct omap_overlay_info *info,
-		const struct omap_video_timings *mgr_timings)
-{
-	u16 outw, outh;
-	u16 dw, dh;
-
-	dw = mgr_timings->x_res;
-	dh = mgr_timings->y_res;
-
-	if ((ovl->caps & OMAP_DSS_OVL_CAP_SCALE) == 0) {
-		outw = info->width;
-		outh = info->height;
-	} else {
-		if (info->out_width == 0)
-			outw = info->width;
-		else
-			outw = info->out_width;
-
-		if (info->out_height == 0)
-			outh = info->height;
-		else
-			outh = info->out_height;
-	}
-
-	if (dw < info->pos_x + outw) {
-		DSSERR("overlay %d horizontally not inside the display area "
-				"(%d + %d >= %d)\n",
-				ovl->id, info->pos_x, outw, dw);
-		return -EINVAL;
-	}
-
-	if (dh < info->pos_y + outh) {
-		DSSERR("overlay %d vertically not inside the display area "
-				"(%d + %d >= %d)\n",
-				ovl->id, info->pos_y, outh, dh);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
- * Checks if replication logic should be used. Only use when overlay is in
- * RGB12U or RGB16 mode, and video port width interface is 18bpp or 24bpp
- */
-bool dss_ovl_use_replication(struct dss_lcd_mgr_config config,
-		enum omap_color_mode mode)
-{
-	if (mode != OMAP_DSS_COLOR_RGB12U && mode != OMAP_DSS_COLOR_RGB16)
-		return false;
-
-	return config.video_port_width > 16;
-}
diff --git a/drivers/gpu/drm/omapdrm/dss/rfbi.c b/drivers/gpu/drm/omapdrm/dss/rfbi.c
index aea6a1d0fb20..3796576dfadf 100644
--- a/drivers/gpu/drm/omapdrm/dss/rfbi.c
+++ b/drivers/gpu/drm/omapdrm/dss/rfbi.c
@@ -880,7 +880,7 @@ static int rfbi_display_enable(struct omap_dss_device *dssdev)
 	struct omap_dss_device *out = &rfbi.output;
 	int r;
 
-	if (out->manager == NULL) {
+	if (!out->dispc_channel_connected) {
 		DSSERR("failed to enable display: no output/manager\n");
 		return -ENODEV;
 	}
diff --git a/drivers/gpu/drm/omapdrm/dss/sdi.c b/drivers/gpu/drm/omapdrm/dss/sdi.c
index d747cc6b59e1..cd6d3bfb041d 100644
--- a/drivers/gpu/drm/omapdrm/dss/sdi.c
+++ b/drivers/gpu/drm/omapdrm/dss/sdi.c
@@ -114,7 +114,7 @@ static int sdi_calc_clock_div(unsigned long pclk,
 
 static void sdi_config_lcd_manager(struct omap_dss_device *dssdev)
 {
-	struct omap_overlay_manager *mgr = sdi.output.manager;
+	enum omap_channel channel = dssdev->dispc_channel;
 
 	sdi.mgr_config.io_pad_mode = DSS_IO_PAD_MODE_BYPASS;
 
@@ -124,19 +124,20 @@ static void sdi_config_lcd_manager(struct omap_dss_device *dssdev)
 	sdi.mgr_config.video_port_width = 24;
 	sdi.mgr_config.lcden_sig_polarity = 1;
 
-	dss_mgr_set_lcd_config(mgr, &sdi.mgr_config);
+	dss_mgr_set_lcd_config(channel, &sdi.mgr_config);
 }
 
 static int sdi_display_enable(struct omap_dss_device *dssdev)
 {
 	struct omap_dss_device *out = &sdi.output;
+	enum omap_channel channel = dssdev->dispc_channel;
 	struct omap_video_timings *t = &sdi.timings;
 	unsigned long fck;
 	struct dispc_clock_info dispc_cinfo;
 	unsigned long pck;
 	int r;
 
-	if (out->manager == NULL) {
+	if (!out->dispc_channel_connected) {
 		DSSERR("failed to enable display: no output/manager\n");
 		return -ENODEV;
 	}
@@ -169,7 +170,7 @@ static int sdi_display_enable(struct omap_dss_device *dssdev)
 	}
 
 
-	dss_mgr_set_timings(out->manager, t);
+	dss_mgr_set_timings(channel, t);
 
 	r = dss_set_fck_rate(fck);
 	if (r)
@@ -188,7 +189,7 @@ static int sdi_display_enable(struct omap_dss_device *dssdev)
 	 * need to care about the shadow register mechanism for pck-free. The
 	 * exact reason for this is unknown.
 	 */
-	dispc_mgr_set_clock_div(out->manager->id, &sdi.mgr_config.clock_info);
+	dispc_mgr_set_clock_div(channel, &sdi.mgr_config.clock_info);
 
 	dss_sdi_init(sdi.datapairs);
 	r = dss_sdi_enable();
@@ -196,7 +197,7 @@ static int sdi_display_enable(struct omap_dss_device *dssdev)
 		goto err_sdi_enable;
 	mdelay(2);
 
-	r = dss_mgr_enable(out->manager);
+	r = dss_mgr_enable(channel);
 	if (r)
 		goto err_mgr_enable;
 
@@ -216,9 +217,9 @@ err_reg_enable:
 
 static void sdi_display_disable(struct omap_dss_device *dssdev)
 {
-	struct omap_overlay_manager *mgr = sdi.output.manager;
+	enum omap_channel channel = dssdev->dispc_channel;
 
-	dss_mgr_disable(mgr);
+	dss_mgr_disable(channel);
 
 	dss_sdi_disable();
 
@@ -242,9 +243,9 @@ static void sdi_get_timings(struct omap_dss_device *dssdev,
 static int sdi_check_timings(struct omap_dss_device *dssdev,
 			struct omap_video_timings *timings)
 {
-	struct omap_overlay_manager *mgr = sdi.output.manager;
+	enum omap_channel channel = dssdev->dispc_channel;
 
-	if (mgr && !dispc_mgr_timings_ok(mgr->id, timings))
+	if (!dispc_mgr_timings_ok(channel, timings))
 		return -EINVAL;
 
 	if (timings->pixelclock == 0)
@@ -280,18 +281,14 @@ static int sdi_init_regulator(void)
 static int sdi_connect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
-	struct omap_overlay_manager *mgr;
+	enum omap_channel channel = dssdev->dispc_channel;
 	int r;
 
 	r = sdi_init_regulator();
 	if (r)
 		return r;
 
-	mgr = omap_dss_get_overlay_manager(dssdev->dispc_channel);
-	if (!mgr)
-		return -ENODEV;
-
-	r = dss_mgr_connect(mgr, dssdev);
+	r = dss_mgr_connect(channel, dssdev);
 	if (r)
 		return r;
 
@@ -299,7 +296,7 @@ static int sdi_connect(struct omap_dss_device *dssdev,
 	if (r) {
 		DSSERR("failed to connect output to new device: %s\n",
 				dst->name);
-		dss_mgr_disconnect(mgr, dssdev);
+		dss_mgr_disconnect(channel, dssdev);
 		return r;
 	}
 
@@ -309,6 +306,8 @@ static int sdi_connect(struct omap_dss_device *dssdev,
 static void sdi_disconnect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
+	enum omap_channel channel = dssdev->dispc_channel;
+
 	WARN_ON(dst != dssdev->dst);
 
 	if (dst != dssdev->dst)
@@ -316,8 +315,7 @@ static void sdi_disconnect(struct omap_dss_device *dssdev,
 
 	omapdss_output_unset_device(dssdev);
 
-	if (dssdev->manager)
-		dss_mgr_disconnect(dssdev->manager, dssdev);
+	dss_mgr_disconnect(channel, dssdev);
 }
 
 static const struct omapdss_sdi_ops sdi_ops = {
diff --git a/drivers/gpu/drm/omapdrm/dss/venc.c b/drivers/gpu/drm/omapdrm/dss/venc.c
index 08f9def76e27..08a2cc778ba9 100644
--- a/drivers/gpu/drm/omapdrm/dss/venc.c
+++ b/drivers/gpu/drm/omapdrm/dss/venc.c
@@ -443,7 +443,7 @@ static const struct venc_config *venc_timings_to_config(
 
 static int venc_power_on(struct omap_dss_device *dssdev)
 {
-	struct omap_overlay_manager *mgr = venc.output.manager;
+	enum omap_channel channel = dssdev->dispc_channel;
 	u32 l;
 	int r;
 
@@ -469,13 +469,13 @@ static int venc_power_on(struct omap_dss_device *dssdev)
 
 	venc_write_reg(VENC_OUTPUT_CONTROL, l);
 
-	dss_mgr_set_timings(mgr, &venc.timings);
+	dss_mgr_set_timings(channel, &venc.timings);
 
 	r = regulator_enable(venc.vdda_dac_reg);
 	if (r)
 		goto err1;
 
-	r = dss_mgr_enable(mgr);
+	r = dss_mgr_enable(channel);
 	if (r)
 		goto err2;
 
@@ -494,12 +494,12 @@ err0:
 
 static void venc_power_off(struct omap_dss_device *dssdev)
 {
-	struct omap_overlay_manager *mgr = venc.output.manager;
+	enum omap_channel channel = dssdev->dispc_channel;
 
 	venc_write_reg(VENC_OUTPUT_CONTROL, 0);
 	dss_set_dac_pwrdn_bgz(0);
 
-	dss_mgr_disable(mgr);
+	dss_mgr_disable(channel);
 
 	regulator_disable(venc.vdda_dac_reg);
 
@@ -515,7 +515,7 @@ static int venc_display_enable(struct omap_dss_device *dssdev)
 
 	mutex_lock(&venc.venc_lock);
 
-	if (out->manager == NULL) {
+	if (!out->dispc_channel_connected) {
 		DSSERR("Failed to enable display: no output/manager\n");
 		r = -ENODEV;
 		goto err0;
@@ -742,18 +742,14 @@ static int venc_get_clocks(struct platform_device *pdev)
 static int venc_connect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
-	struct omap_overlay_manager *mgr;
+	enum omap_channel channel = dssdev->dispc_channel;
 	int r;
 
 	r = venc_init_regulator();
 	if (r)
 		return r;
 
-	mgr = omap_dss_get_overlay_manager(dssdev->dispc_channel);
-	if (!mgr)
-		return -ENODEV;
-
-	r = dss_mgr_connect(mgr, dssdev);
+	r = dss_mgr_connect(channel, dssdev);
 	if (r)
 		return r;
 
@@ -761,7 +757,7 @@ static int venc_connect(struct omap_dss_device *dssdev,
 	if (r) {
 		DSSERR("failed to connect output to new device: %s\n",
 				dst->name);
-		dss_mgr_disconnect(mgr, dssdev);
+		dss_mgr_disconnect(channel, dssdev);
 		return r;
 	}
 
@@ -771,6 +767,8 @@ static int venc_connect(struct omap_dss_device *dssdev,
 static void venc_disconnect(struct omap_dss_device *dssdev,
 		struct omap_dss_device *dst)
 {
+	enum omap_channel channel = dssdev->dispc_channel;
+
 	WARN_ON(dst != dssdev->dst);
 
 	if (dst != dssdev->dst)
@@ -778,8 +776,7 @@ static void venc_disconnect(struct omap_dss_device *dssdev,
 
 	omapdss_output_unset_device(dssdev);
 
-	if (dssdev->manager)
-		dss_mgr_disconnect(dssdev->manager, dssdev);
+	dss_mgr_disconnect(channel, dssdev);
 }
 
 static const struct omapdss_atv_ops venc_ops = {
diff --git a/drivers/gpu/drm/omapdrm/omap_connector.c b/drivers/gpu/drm/omapdrm/omap_connector.c
index 83f2a9177c14..ce2d67b6a8c7 100644
--- a/drivers/gpu/drm/omapdrm/omap_connector.c
+++ b/drivers/gpu/drm/omapdrm/omap_connector.c
@@ -63,6 +63,9 @@ void copy_timings_omap_to_drm(struct drm_display_mode *mode,
 	if (timings->interlace)
 		mode->flags |= DRM_MODE_FLAG_INTERLACE;
 
+	if (timings->double_pixel)
+		mode->flags |= DRM_MODE_FLAG_DBLCLK;
+
 	if (timings->hsync_level == OMAPDSS_SIG_ACTIVE_HIGH)
 		mode->flags |= DRM_MODE_FLAG_PHSYNC;
 	else
@@ -90,6 +93,7 @@ void copy_timings_drm_to_omap(struct omap_video_timings *timings,
 	timings->vbp = mode->vtotal - mode->vsync_end;
 
 	timings->interlace = !!(mode->flags & DRM_MODE_FLAG_INTERLACE);
+	timings->double_pixel = !!(mode->flags & DRM_MODE_FLAG_DBLCLK);
 
 	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
 		timings->hsync_level = OMAPDSS_SIG_ACTIVE_HIGH;
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
index 2ed0754ed19e..075f2bb44867 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.c
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
@@ -34,14 +34,6 @@ struct omap_crtc {
 	const char *name;
 	enum omap_channel channel;
 
-	/*
-	 * Temporary: eventually this will go away, but it is needed
-	 * for now to keep the output's happy.  (They only need
-	 * mgr->id.)  Eventually this will be replaced w/ something
-	 * more common-panel-framework-y
-	 */
-	struct omap_overlay_manager *mgr;
-
 	struct omap_video_timings timings;
 
 	struct omap_drm_irq vblank_irq;
@@ -80,9 +72,13 @@ int omap_crtc_wait_pending(struct drm_crtc *crtc)
 {
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
 
+	/*
+	 * Timeout is set to a "sufficiently" high value, which should cover
+	 * a single frame refresh even on slower displays.
+	 */
 	return wait_event_timeout(omap_crtc->pending_wait,
 				  !omap_crtc->pending,
-				  msecs_to_jiffies(50));
+				  msecs_to_jiffies(250));
 }
 
 /* -----------------------------------------------------------------------------
@@ -100,31 +96,32 @@ int omap_crtc_wait_pending(struct drm_crtc *crtc)
 
 /* ovl-mgr-id -> crtc */
 static struct omap_crtc *omap_crtcs[8];
+static struct omap_dss_device *omap_crtc_output[8];
 
 /* we can probably ignore these until we support command-mode panels: */
-static int omap_crtc_dss_connect(struct omap_overlay_manager *mgr,
+static int omap_crtc_dss_connect(enum omap_channel channel,
 		struct omap_dss_device *dst)
 {
-	if (mgr->output)
+	if (omap_crtc_output[channel])
 		return -EINVAL;
 
-	if ((mgr->supported_outputs & dst->id) == 0)
+	if ((dispc_mgr_get_supported_outputs(channel) & dst->id) == 0)
 		return -EINVAL;
 
-	dst->manager = mgr;
-	mgr->output = dst;
+	omap_crtc_output[channel] = dst;
+	dst->dispc_channel_connected = true;
 
 	return 0;
 }
 
-static void omap_crtc_dss_disconnect(struct omap_overlay_manager *mgr,
+static void omap_crtc_dss_disconnect(enum omap_channel channel,
 		struct omap_dss_device *dst)
 {
-	mgr->output->manager = NULL;
-	mgr->output = NULL;
+	omap_crtc_output[channel] = NULL;
+	dst->dispc_channel_connected = false;
 }
 
-static void omap_crtc_dss_start_update(struct omap_overlay_manager *mgr)
+static void omap_crtc_dss_start_update(enum omap_channel channel)
 {
 }
 
@@ -138,6 +135,11 @@ static void omap_crtc_set_enabled(struct drm_crtc *crtc, bool enable)
 	u32 framedone_irq, vsync_irq;
 	int ret;
 
+	if (omap_crtc_output[channel]->output_type == OMAP_DISPLAY_TYPE_HDMI) {
+		dispc_mgr_enable(channel, enable);
+		return;
+	}
+
 	if (dispc_mgr_is_enabled(channel) == enable)
 		return;
 
@@ -186,9 +188,9 @@ static void omap_crtc_set_enabled(struct drm_crtc *crtc, bool enable)
 }
 
 
-static int omap_crtc_dss_enable(struct omap_overlay_manager *mgr)
+static int omap_crtc_dss_enable(enum omap_channel channel)
 {
-	struct omap_crtc *omap_crtc = omap_crtcs[mgr->id];
+	struct omap_crtc *omap_crtc = omap_crtcs[channel];
 	struct omap_overlay_manager_info info;
 
 	memset(&info, 0, sizeof(info));
@@ -205,38 +207,38 @@ static int omap_crtc_dss_enable(struct omap_overlay_manager *mgr)
 	return 0;
 }
 
-static void omap_crtc_dss_disable(struct omap_overlay_manager *mgr)
+static void omap_crtc_dss_disable(enum omap_channel channel)
 {
-	struct omap_crtc *omap_crtc = omap_crtcs[mgr->id];
+	struct omap_crtc *omap_crtc = omap_crtcs[channel];
 
 	omap_crtc_set_enabled(&omap_crtc->base, false);
 }
 
-static void omap_crtc_dss_set_timings(struct omap_overlay_manager *mgr,
+static void omap_crtc_dss_set_timings(enum omap_channel channel,
 		const struct omap_video_timings *timings)
 {
-	struct omap_crtc *omap_crtc = omap_crtcs[mgr->id];
+	struct omap_crtc *omap_crtc = omap_crtcs[channel];
 	DBG("%s", omap_crtc->name);
 	omap_crtc->timings = *timings;
 }
 
-static void omap_crtc_dss_set_lcd_config(struct omap_overlay_manager *mgr,
+static void omap_crtc_dss_set_lcd_config(enum omap_channel channel,
 		const struct dss_lcd_mgr_config *config)
 {
-	struct omap_crtc *omap_crtc = omap_crtcs[mgr->id];
+	struct omap_crtc *omap_crtc = omap_crtcs[channel];
 	DBG("%s", omap_crtc->name);
 	dispc_mgr_set_lcd_config(omap_crtc->channel, config);
 }
 
 static int omap_crtc_dss_register_framedone(
-		struct omap_overlay_manager *mgr,
+		enum omap_channel channel,
 		void (*handler)(void *), void *data)
 {
 	return 0;
 }
 
 static void omap_crtc_dss_unregister_framedone(
-		struct omap_overlay_manager *mgr,
+		enum omap_channel channel,
 		void (*handler)(void *), void *data)
 {
 }
@@ -269,18 +271,7 @@ static void omap_crtc_complete_page_flip(struct drm_crtc *crtc)
 		return;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
-
-	list_del(&event->base.link);
-
-	/*
-	 * Queue the event for delivery if it's still linked to a file
-	 * handle, otherwise just destroy it.
-	 */
-	if (event->base.file_priv)
-		drm_crtc_send_vblank_event(crtc, event);
-	else
-		event->base.destroy(&event->base);
-
+	drm_crtc_send_vblank_event(crtc, event);
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
@@ -341,13 +332,6 @@ static void omap_crtc_destroy(struct drm_crtc *crtc)
 	kfree(omap_crtc);
 }
 
-static bool omap_crtc_mode_fixup(struct drm_crtc *crtc,
-		const struct drm_display_mode *mode,
-		struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void omap_crtc_enable(struct drm_crtc *crtc)
 {
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
@@ -414,24 +398,40 @@ static void omap_crtc_atomic_flush(struct drm_crtc *crtc,
 	}
 }
 
+static bool omap_crtc_is_plane_prop(struct drm_device *dev,
+	struct drm_property *property)
+{
+	struct omap_drm_private *priv = dev->dev_private;
+
+	return property == priv->zorder_prop ||
+		property == dev->mode_config.rotation_property;
+}
+
 static int omap_crtc_atomic_set_property(struct drm_crtc *crtc,
 					 struct drm_crtc_state *state,
 					 struct drm_property *property,
 					 uint64_t val)
 {
-	struct drm_plane_state *plane_state;
-	struct drm_plane *plane = crtc->primary;
+	struct drm_device *dev = crtc->dev;
 
-	/*
-	 * Delegate property set to the primary plane. Get the plane state and
-	 * set the property directly.
-	 */
+	if (omap_crtc_is_plane_prop(dev, property)) {
+		struct drm_plane_state *plane_state;
+		struct drm_plane *plane = crtc->primary;
 
-	plane_state = drm_atomic_get_plane_state(state->state, plane);
-	if (!plane_state)
-		return -EINVAL;
+		/*
+		 * Delegate property set to the primary plane. Get the plane
+		 * state and set the property directly.
+		 */
+
+		plane_state = drm_atomic_get_plane_state(state->state, plane);
+		if (IS_ERR(plane_state))
+			return PTR_ERR(plane_state);
+
+		return drm_atomic_plane_set_property(plane, plane_state,
+				property, val);
+	}
 
-	return drm_atomic_plane_set_property(plane, plane_state, property, val);
+	return -EINVAL;
 }
 
 static int omap_crtc_atomic_get_property(struct drm_crtc *crtc,
@@ -439,14 +439,20 @@ static int omap_crtc_atomic_get_property(struct drm_crtc *crtc,
 					 struct drm_property *property,
 					 uint64_t *val)
 {
-	/*
-	 * Delegate property get to the primary plane. The
-	 * drm_atomic_plane_get_property() function isn't exported, but can be
-	 * called through drm_object_property_get_value() as that will call
-	 * drm_atomic_get_property() for atomic drivers.
-	 */
-	return drm_object_property_get_value(&crtc->primary->base, property,
-					     val);
+	struct drm_device *dev = crtc->dev;
+
+	if (omap_crtc_is_plane_prop(dev, property)) {
+		/*
+		 * Delegate property get to the primary plane. The
+		 * drm_atomic_plane_get_property() function isn't exported, but
+		 * can be called through drm_object_property_get_value() as that
+		 * will call drm_atomic_get_property() for atomic drivers.
+		 */
+		return drm_object_property_get_value(&crtc->primary->base,
+				property, val);
+	}
+
+	return -EINVAL;
 }
 
 static const struct drm_crtc_funcs omap_crtc_funcs = {
@@ -462,7 +468,6 @@ static const struct drm_crtc_funcs omap_crtc_funcs = {
 };
 
 static const struct drm_crtc_helper_funcs omap_crtc_helper_funcs = {
-	.mode_fixup = omap_crtc_mode_fixup,
 	.mode_set_nofb = omap_crtc_mode_set_nofb,
 	.disable = omap_crtc_disable,
 	.enable = omap_crtc_enable,
@@ -520,9 +525,6 @@ struct drm_crtc *omap_crtc_init(struct drm_device *dev,
 	omap_crtc->error_irq.irq = omap_crtc_error_irq;
 	omap_irq_register(dev, &omap_crtc->error_irq);
 
-	/* temporary: */
-	omap_crtc->mgr = omap_dss_get_overlay_manager(channel);
-
 	ret = drm_crtc_init_with_planes(dev, crtc, plane, NULL,
 					&omap_crtc_funcs, NULL);
 	if (ret < 0) {
diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
index 85dfe3674b41..de275a5be1db 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
@@ -79,6 +79,16 @@ static const uint32_t reg[][4] = {
 			DMM_PAT_DESCR__2, DMM_PAT_DESCR__3},
 };
 
+static u32 dmm_read(struct dmm *dmm, u32 reg)
+{
+	return readl(dmm->base + reg);
+}
+
+static void dmm_write(struct dmm *dmm, u32 val, u32 reg)
+{
+	writel(val, dmm->base + reg);
+}
+
 /* simple allocator to grab next 16 byte aligned memory from txn */
 static void *alloc_dma(struct dmm_txn *txn, size_t sz, dma_addr_t *pa)
 {
@@ -108,7 +118,7 @@ static int wait_status(struct refill_engine *engine, uint32_t wait_mask)
 
 	i = DMM_FIXED_RETRY_COUNT;
 	while (true) {
-		r = readl(dmm->base + reg[PAT_STATUS][engine->id]);
+		r = dmm_read(dmm, reg[PAT_STATUS][engine->id]);
 		err = r & DMM_PATSTATUS_ERR;
 		if (err)
 			return -EFAULT;
@@ -140,11 +150,11 @@ static void release_engine(struct refill_engine *engine)
 static irqreturn_t omap_dmm_irq_handler(int irq, void *arg)
 {
 	struct dmm *dmm = arg;
-	uint32_t status = readl(dmm->base + DMM_PAT_IRQSTATUS);
+	uint32_t status = dmm_read(dmm, DMM_PAT_IRQSTATUS);
 	int i;
 
 	/* ack IRQ */
-	writel(status, dmm->base + DMM_PAT_IRQSTATUS);
+	dmm_write(dmm, status, DMM_PAT_IRQSTATUS);
 
 	for (i = 0; i < dmm->num_engines; i++) {
 		if (status & DMM_IRQSTAT_LST) {
@@ -264,7 +274,7 @@ static int dmm_txn_commit(struct dmm_txn *txn, bool wait)
 	txn->last_pat->next_pa = 0;
 
 	/* write to PAT_DESCR to clear out any pending transaction */
-	writel(0x0, dmm->base + reg[PAT_DESCR][engine->id]);
+	dmm_write(dmm, 0x0, reg[PAT_DESCR][engine->id]);
 
 	/* wait for engine ready: */
 	ret = wait_status(engine, DMM_PATSTATUS_READY);
@@ -280,8 +290,7 @@ static int dmm_txn_commit(struct dmm_txn *txn, bool wait)
 	smp_mb();
 
 	/* kick reload */
-	writel(engine->refill_pa,
-		dmm->base + reg[PAT_DESCR][engine->id]);
+	dmm_write(dmm, engine->refill_pa, reg[PAT_DESCR][engine->id]);
 
 	if (wait) {
 		if (!wait_for_completion_timeout(&engine->compl,
@@ -309,6 +318,21 @@ static int fill(struct tcm_area *area, struct page **pages,
 	struct tcm_area slice, area_s;
 	struct dmm_txn *txn;
 
+	/*
+	 * FIXME
+	 *
+	 * Asynchronous fill does not work reliably, as the driver does not
+	 * handle errors in the async code paths. The fill operation may
+	 * silently fail, leading to leaking DMM engines, which may eventually
+	 * lead to deadlock if we run out of DMM engines.
+	 *
+	 * For now, always set 'wait' so that we only use sync fills. Async
+	 * fills should be fixed, or alternatively we could decide to only
+	 * support sync fills and so the whole async code path could be removed.
+	 */
+
+	wait = true;
+
 	txn = dmm_txn_init(omap_dmm, area->tcm);
 	if (IS_ERR_OR_NULL(txn))
 		return -ENOMEM;
@@ -641,7 +665,7 @@ static int omap_dmm_probe(struct platform_device *dev)
 
 	omap_dmm->dev = &dev->dev;
 
-	hwinfo = readl(omap_dmm->base + DMM_PAT_HWINFO);
+	hwinfo = dmm_read(omap_dmm, DMM_PAT_HWINFO);
 	omap_dmm->num_engines = (hwinfo >> 24) & 0x1F;
 	omap_dmm->num_lut = (hwinfo >> 16) & 0x1F;
 	omap_dmm->container_width = 256;
@@ -650,7 +674,7 @@ static int omap_dmm_probe(struct platform_device *dev)
 	atomic_set(&omap_dmm->engine_counter, omap_dmm->num_engines);
 
 	/* read out actual LUT width and height */
-	pat_geom = readl(omap_dmm->base + DMM_PAT_GEOMETRY);
+	pat_geom = dmm_read(omap_dmm, DMM_PAT_GEOMETRY);
 	omap_dmm->lut_width = ((pat_geom >> 16) & 0xF) << 5;
 	omap_dmm->lut_height = ((pat_geom >> 24) & 0xF) << 5;
 
@@ -660,12 +684,12 @@ static int omap_dmm_probe(struct platform_device *dev)
 		omap_dmm->num_lut++;
 
 	/* initialize DMM registers */
-	writel(0x88888888, omap_dmm->base + DMM_PAT_VIEW__0);
-	writel(0x88888888, omap_dmm->base + DMM_PAT_VIEW__1);
-	writel(0x80808080, omap_dmm->base + DMM_PAT_VIEW_MAP__0);
-	writel(0x80000000, omap_dmm->base + DMM_PAT_VIEW_MAP_BASE);
-	writel(0x88888888, omap_dmm->base + DMM_TILER_OR__0);
-	writel(0x88888888, omap_dmm->base + DMM_TILER_OR__1);
+	dmm_write(omap_dmm, 0x88888888, DMM_PAT_VIEW__0);
+	dmm_write(omap_dmm, 0x88888888, DMM_PAT_VIEW__1);
+	dmm_write(omap_dmm, 0x80808080, DMM_PAT_VIEW_MAP__0);
+	dmm_write(omap_dmm, 0x80000000, DMM_PAT_VIEW_MAP_BASE);
+	dmm_write(omap_dmm, 0x88888888, DMM_TILER_OR__0);
+	dmm_write(omap_dmm, 0x88888888, DMM_TILER_OR__1);
 
 	ret = request_irq(omap_dmm->irq, omap_dmm_irq_handler, IRQF_SHARED,
 				"omap_dmm_irq_handler", omap_dmm);
@@ -683,7 +707,7 @@ static int omap_dmm_probe(struct platform_device *dev)
 	 * buffers for accelerated pan/scroll) and FILL_DSC<n> which
 	 * we just generally don't care about.
 	 */
-	writel(0x7e7e7e7e, omap_dmm->base + DMM_PAT_IRQENABLE_SET);
+	dmm_write(omap_dmm, 0x7e7e7e7e, DMM_PAT_IRQENABLE_SET);
 
 	omap_dmm->dummy_page = alloc_page(GFP_KERNEL | __GFP_DMA32);
 	if (!omap_dmm->dummy_page) {
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index dfafdb602ad2..80398a684cae 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -142,7 +142,6 @@ static int omap_atomic_commit(struct drm_device *dev,
 {
 	struct omap_drm_private *priv = dev->dev_private;
 	struct omap_atomic_state_commit *commit;
-	unsigned long flags;
 	unsigned int i;
 	int ret;
 
@@ -175,17 +174,6 @@ static int omap_atomic_commit(struct drm_device *dev,
 	priv->commit.pending |= commit->crtcs;
 	spin_unlock(&priv->commit.lock);
 
-	/* Keep track of all CRTC events to unlink them in preclose(). */
-	spin_lock_irqsave(&dev->event_lock, flags);
-	for (i = 0; i < dev->mode_config.num_crtc; ++i) {
-		struct drm_crtc_state *cstate = state->crtc_states[i];
-
-		if (cstate && cstate->event)
-			list_add_tail(&cstate->event->base.link,
-				      &priv->commit.events);
-	}
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-
 	/* Swap the state, this is the point of no return. */
 	drm_atomic_helper_swap_state(dev, state);
 
@@ -352,7 +340,7 @@ static int omap_modeset_init(struct drm_device *dev)
 		struct drm_connector *connector;
 		struct drm_encoder *encoder;
 		enum omap_channel channel;
-		struct omap_overlay_manager *mgr;
+		struct omap_dss_device *out;
 
 		if (!omapdss_device_is_connected(dssdev))
 			continue;
@@ -399,8 +387,10 @@ static int omap_modeset_init(struct drm_device *dev)
 		 * not considered.
 		 */
 
-		mgr = omapdss_find_mgr_from_display(dssdev);
-		channel = mgr->id;
+		out = omapdss_find_output_from_display(dssdev);
+		channel = out->dispc_channel;
+		omap_dss_put_device(out);
+
 		/*
 		 * if this channel hasn't already been taken by a previously
 		 * allocated crtc, we create a new crtc for it
@@ -673,7 +663,6 @@ static int dev_load(struct drm_device *dev, unsigned long flags)
 	priv->wq = alloc_ordered_workqueue("omapdrm", 0);
 	init_waitqueue_head(&priv->commit.wait);
 	spin_lock_init(&priv->commit.lock);
-	INIT_LIST_HEAD(&priv->commit.events);
 
 	spin_lock_init(&priv->list_lock);
 	INIT_LIST_HEAD(&priv->obj_list);
@@ -787,33 +776,6 @@ static void dev_lastclose(struct drm_device *dev)
 	}
 }
 
-static void dev_preclose(struct drm_device *dev, struct drm_file *file)
-{
-	struct omap_drm_private *priv = dev->dev_private;
-	struct drm_pending_event *event;
-	unsigned long flags;
-
-	DBG("preclose: dev=%p", dev);
-
-	/*
-	 * Unlink all pending CRTC events to make sure they won't be queued up
-	 * by a pending asynchronous commit.
-	 */
-	spin_lock_irqsave(&dev->event_lock, flags);
-	list_for_each_entry(event, &priv->commit.events, link) {
-		if (event->file_priv == file) {
-			file->event_space += event->event->length;
-			event->file_priv = NULL;
-		}
-	}
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-}
-
-static void dev_postclose(struct drm_device *dev, struct drm_file *file)
-{
-	DBG("postclose: dev=%p, file=%p", dev, file);
-}
-
 static const struct vm_operations_struct omap_gem_vm_ops = {
 	.fault = omap_gem_fault,
 	.open = drm_gem_vm_open,
@@ -838,8 +800,6 @@ static struct drm_driver omap_drm_driver = {
 	.unload = dev_unload,
 	.open = dev_open,
 	.lastclose = dev_lastclose,
-	.preclose = dev_preclose,
-	.postclose = dev_postclose,
 	.set_busid = drm_platform_set_busid,
 	.get_vblank_counter = drm_vblank_no_hw_counter,
 	.enable_vblank = omap_irq_enable_vblank,
@@ -900,12 +860,52 @@ static int pdev_remove(struct platform_device *device)
 }
 
 #ifdef CONFIG_PM_SLEEP
+static int omap_drm_suspend_all_displays(void)
+{
+	struct omap_dss_device *dssdev = NULL;
+
+	for_each_dss_dev(dssdev) {
+		if (!dssdev->driver)
+			continue;
+
+		if (dssdev->state == OMAP_DSS_DISPLAY_ACTIVE) {
+			dssdev->driver->disable(dssdev);
+			dssdev->activate_after_resume = true;
+		} else {
+			dssdev->activate_after_resume = false;
+		}
+	}
+
+	return 0;
+}
+
+static int omap_drm_resume_all_displays(void)
+{
+	struct omap_dss_device *dssdev = NULL;
+
+	for_each_dss_dev(dssdev) {
+		if (!dssdev->driver)
+			continue;
+
+		if (dssdev->activate_after_resume) {
+			dssdev->driver->enable(dssdev);
+			dssdev->activate_after_resume = false;
+		}
+	}
+
+	return 0;
+}
+
 static int omap_drm_suspend(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 
 	drm_kms_helper_poll_disable(drm_dev);
 
+	drm_modeset_lock_all(drm_dev);
+	omap_drm_suspend_all_displays();
+	drm_modeset_unlock_all(drm_dev);
+
 	return 0;
 }
 
@@ -913,6 +913,10 @@ static int omap_drm_resume(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 
+	drm_modeset_lock_all(drm_dev);
+	omap_drm_resume_all_displays();
+	drm_modeset_unlock_all(drm_dev);
+
 	drm_kms_helper_poll_enable(drm_dev);
 
 	return omap_gem_resume(dev);
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h
index 9e0030731c37..0fbe17d0ec6f 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.h
+++ b/drivers/gpu/drm/omapdrm/omap_drv.h
@@ -31,6 +31,8 @@
 #include <drm/drm_gem.h>
 #include <drm/omap_drm.h>
 
+#include "dss/omapdss.h"
+
 #define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
 #define VERB(fmt, ...) if (0) DRM_DEBUG(fmt, ##__VA_ARGS__) /* verbose debug */
 
@@ -106,7 +108,6 @@ struct omap_drm_private {
 
 	/* atomic commit */
 	struct {
-		struct list_head events;
 		wait_queue_head_t wait;
 		u32 pending;
 		spinlock_t lock;	/* Protects commit.pending */
@@ -189,12 +190,15 @@ void omap_framebuffer_update_scanout(struct drm_framebuffer *fb,
 		struct omap_drm_window *win, struct omap_overlay_info *info);
 struct drm_connector *omap_framebuffer_get_next_connector(
 		struct drm_framebuffer *fb, struct drm_connector *from);
+bool omap_framebuffer_supports_rotation(struct drm_framebuffer *fb);
 
 void omap_gem_init(struct drm_device *dev);
 void omap_gem_deinit(struct drm_device *dev);
 
 struct drm_gem_object *omap_gem_new(struct drm_device *dev,
 		union omap_gem_size gsize, uint32_t flags);
+struct drm_gem_object *omap_gem_new_dmabuf(struct drm_device *dev, size_t size,
+		struct sg_table *sgt);
 int omap_gem_new_handle(struct drm_device *dev, struct drm_file *file,
 		union omap_gem_size gsize, uint32_t flags, uint32_t *handle);
 void omap_gem_free_object(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/omapdrm/omap_encoder.c b/drivers/gpu/drm/omapdrm/omap_encoder.c
index 61714e9670ae..0bbb9c59622e 100644
--- a/drivers/gpu/drm/omapdrm/omap_encoder.c
+++ b/drivers/gpu/drm/omapdrm/omap_encoder.c
@@ -139,11 +139,16 @@ static void omap_encoder_enable(struct drm_encoder *encoder)
 	struct omap_encoder *omap_encoder = to_omap_encoder(encoder);
 	struct omap_dss_device *dssdev = omap_encoder->dssdev;
 	struct omap_dss_driver *dssdrv = dssdev->driver;
+	int r;
 
 	omap_encoder_update(encoder, omap_crtc_channel(encoder->crtc),
 			    omap_crtc_timings(encoder->crtc));
 
-	dssdrv->enable(dssdev);
+	r = dssdrv->enable(dssdev);
+	if (r)
+		dev_err(encoder->dev->dev,
+			"Failed to enable display '%s': %d\n",
+			dssdev->name, r);
 }
 
 static int omap_encoder_atomic_check(struct drm_encoder *encoder,
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index ad202dfc1a49..610962396eb0 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -145,6 +145,14 @@ static uint32_t get_linear_addr(struct plane *plane,
 	return plane->paddr + offset;
 }
 
+bool omap_framebuffer_supports_rotation(struct drm_framebuffer *fb)
+{
+	struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb);
+	struct plane *plane = &omap_fb->planes[0];
+
+	return omap_gem_flags(plane->bo) & OMAP_BO_TILED;
+}
+
 /* update ovl info for scanout, handles cases of multi-planar fb's, etc.
  */
 void omap_framebuffer_update_scanout(struct drm_framebuffer *fb,
@@ -449,6 +457,14 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev,
 			goto fail;
 		}
 
+		if (i > 0 && pitch != mode_cmd->pitches[i - 1]) {
+			dev_err(dev->dev,
+				"pitches are not the same between framebuffer planes %d != %d\n",
+				pitch, mode_cmd->pitches[i - 1]);
+			ret = -EINVAL;
+			goto fail;
+		}
+
 		plane->bo     = bos[i];
 		plane->offset = mode_cmd->offsets[i];
 		plane->pitch  = pitch;
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index 359b0d7e8ef7..907154f5b67c 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -31,9 +31,9 @@
  */
 
 /* note: we use upper 8 bits of flags for driver-internal flags: */
-#define OMAP_BO_DMA		0x01000000	/* actually is physically contiguous */
-#define OMAP_BO_EXT_SYNC	0x02000000	/* externally allocated sync object */
-#define OMAP_BO_EXT_MEM		0x04000000	/* externally allocated memory */
+#define OMAP_BO_MEM_DMA_API	0x01000000	/* memory allocated with the dma_alloc_* API */
+#define OMAP_BO_MEM_SHMEM	0x02000000	/* memory allocated through shmem backing */
+#define OMAP_BO_MEM_DMABUF	0x08000000	/* memory imported from a dmabuf */
 
 struct omap_gem_object {
 	struct drm_gem_object base;
@@ -49,17 +49,25 @@ struct omap_gem_object {
 	uint32_t roll;
 
 	/**
-	 * If buffer is allocated physically contiguous, the OMAP_BO_DMA flag
-	 * is set and the paddr is valid.  Also if the buffer is remapped in
-	 * TILER and paddr_cnt > 0, then paddr is valid.  But if you are using
-	 * the physical address and OMAP_BO_DMA is not set, then you should
-	 * be going thru omap_gem_{get,put}_paddr() to ensure the mapping is
-	 * not removed from under your feet.
+	 * paddr contains the buffer DMA address. It is valid for
 	 *
-	 * Note that OMAP_BO_SCANOUT is a hint from userspace that DMA capable
-	 * buffer is requested, but doesn't mean that it is.  Use the
-	 * OMAP_BO_DMA flag to determine if the buffer has a DMA capable
-	 * physical address.
+	 * - buffers allocated through the DMA mapping API (with the
+	 *   OMAP_BO_MEM_DMA_API flag set)
+	 *
+	 * - buffers imported from dmabuf (with the OMAP_BO_MEM_DMABUF flag set)
+	 *   if they are physically contiguous (when sgt->orig_nents == 1)
+	 *
+	 * - buffers mapped through the TILER when paddr_cnt is not zero, in
+	 *   which case the DMA address points to the TILER aperture
+	 *
+	 * Physically contiguous buffers have their DMA address equal to the
+	 * physical address as we don't remap those buffers through the TILER.
+	 *
+	 * Buffers mapped to the TILER have their DMA address pointing to the
+	 * TILER aperture. As TILER mappings are refcounted (through paddr_cnt)
+	 * the DMA address must be accessed through omap_get_get_paddr() to
+	 * ensure that the mapping won't disappear unexpectedly. References must
+	 * be released with omap_gem_put_paddr().
 	 */
 	dma_addr_t paddr;
 
@@ -69,6 +77,12 @@ struct omap_gem_object {
 	uint32_t paddr_cnt;
 
 	/**
+	 * If the buffer has been imported from a dmabuf the OMAP_DB_DMABUF flag
+	 * is set and the sgt field is valid.
+	 */
+	struct sg_table *sgt;
+
+	/**
 	 * tiler block used when buffer is remapped in DMM/TILER.
 	 */
 	struct tiler_block *block;
@@ -91,17 +105,7 @@ struct omap_gem_object {
 	 * sync-object allocated on demand (if needed)
 	 *
 	 * Per-buffer sync-object for tracking pending and completed hw/dma
-	 * read and write operations.  The layout in memory is dictated by
-	 * the SGX firmware, which uses this information to stall the command
-	 * stream if a surface is not ready yet.
-	 *
-	 * Note that when buffer is used by SGX, the sync-object needs to be
-	 * allocated from a special heap of sync-objects.  This way many sync
-	 * objects can be packed in a page, and not waste GPU virtual address
-	 * space.  Because of this we have to have a omap_gem_set_sync_object()
-	 * API to allow replacement of the syncobj after it has (potentially)
-	 * already been allocated.  A bit ugly but I haven't thought of a
-	 * better alternative.
+	 * read and write operations.
 	 */
 	struct {
 		uint32_t write_pending;
@@ -166,16 +170,15 @@ static uint64_t mmap_offset(struct drm_gem_object *obj)
 	return drm_vma_node_offset_addr(&obj->vma_node);
 }
 
-/* GEM objects can either be allocated from contiguous memory (in which
- * case obj->filp==NULL), or w/ shmem backing (obj->filp!=NULL).  But non
- * contiguous buffers can be remapped in TILER/DMM if they need to be
- * contiguous... but we don't do this all the time to reduce pressure
- * on TILER/DMM space when we know at allocation time that the buffer
- * will need to be scanned out.
- */
-static inline bool is_shmem(struct drm_gem_object *obj)
+static bool is_contiguous(struct omap_gem_object *omap_obj)
 {
-	return obj->filp != NULL;
+	if (omap_obj->flags & OMAP_BO_MEM_DMA_API)
+		return true;
+
+	if ((omap_obj->flags & OMAP_BO_MEM_DMABUF) && omap_obj->sgt->nents == 1)
+		return true;
+
+	return false;
 }
 
 /* -----------------------------------------------------------------------------
@@ -264,6 +267,19 @@ static int omap_gem_attach_pages(struct drm_gem_object *obj)
 		for (i = 0; i < npages; i++) {
 			addrs[i] = dma_map_page(dev->dev, pages[i],
 					0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+			if (dma_mapping_error(dev->dev, addrs[i])) {
+				dev_warn(dev->dev,
+					"%s: failed to map page\n", __func__);
+
+				for (i = i - 1; i >= 0; --i) {
+					dma_unmap_page(dev->dev, addrs[i],
+						PAGE_SIZE, DMA_BIDIRECTIONAL);
+				}
+
+				ret = -ENOMEM;
+				goto free_addrs;
+			}
 		}
 	} else {
 		addrs = kzalloc(npages * sizeof(*addrs), GFP_KERNEL);
@@ -278,6 +294,8 @@ static int omap_gem_attach_pages(struct drm_gem_object *obj)
 
 	return 0;
 
+free_addrs:
+	kfree(addrs);
 free_pages:
 	drm_gem_put_pages(obj, pages, true, false);
 
@@ -292,7 +310,7 @@ static int get_pages(struct drm_gem_object *obj, struct page ***pages)
 	struct omap_gem_object *omap_obj = to_omap_bo(obj);
 	int ret = 0;
 
-	if (is_shmem(obj) && !omap_obj->pages) {
+	if ((omap_obj->flags & OMAP_BO_MEM_SHMEM) && !omap_obj->pages) {
 		ret = omap_gem_attach_pages(obj);
 		if (ret) {
 			dev_err(obj->dev->dev, "could not attach pages\n");
@@ -396,7 +414,7 @@ static int fault_1d(struct drm_gem_object *obj,
 		omap_gem_cpu_sync(obj, pgoff);
 		pfn = page_to_pfn(omap_obj->pages[pgoff]);
 	} else {
-		BUG_ON(!(omap_obj->flags & OMAP_BO_DMA));
+		BUG_ON(!is_contiguous(omap_obj));
 		pfn = (omap_obj->paddr >> PAGE_SHIFT) + pgoff;
 	}
 
@@ -560,6 +578,11 @@ fail:
 	case 0:
 	case -ERESTARTSYS:
 	case -EINTR:
+	case -EBUSY:
+		/*
+		 * EBUSY is ok: this just means that another thread
+		 * already did the job.
+		 */
 		return VM_FAULT_NOPAGE;
 	case -ENOMEM:
 		return VM_FAULT_OOM;
@@ -728,7 +751,8 @@ fail:
 static inline bool is_cached_coherent(struct drm_gem_object *obj)
 {
 	struct omap_gem_object *omap_obj = to_omap_bo(obj);
-	return is_shmem(obj) &&
+
+	return (omap_obj->flags & OMAP_BO_MEM_SHMEM) &&
 		((omap_obj->flags & OMAP_BO_CACHE_MASK) == OMAP_BO_CACHED);
 }
 
@@ -761,9 +785,20 @@ void omap_gem_dma_sync(struct drm_gem_object *obj,
 
 		for (i = 0; i < npages; i++) {
 			if (!omap_obj->addrs[i]) {
-				omap_obj->addrs[i] = dma_map_page(dev->dev, pages[i], 0,
+				dma_addr_t addr;
+
+				addr = dma_map_page(dev->dev, pages[i], 0,
 						PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+				if (dma_mapping_error(dev->dev, addr)) {
+					dev_warn(dev->dev,
+						"%s: failed to map page\n",
+						__func__);
+					break;
+				}
+
 				dirty = true;
+				omap_obj->addrs[i] = addr;
 			}
 		}
 
@@ -787,7 +822,7 @@ int omap_gem_get_paddr(struct drm_gem_object *obj,
 
 	mutex_lock(&obj->dev->struct_mutex);
 
-	if (remap && is_shmem(obj) && priv->has_dmm) {
+	if (!is_contiguous(omap_obj) && remap && priv->has_dmm) {
 		if (omap_obj->paddr_cnt == 0) {
 			struct page **pages;
 			uint32_t npages = obj->size >> PAGE_SHIFT;
@@ -834,7 +869,7 @@ int omap_gem_get_paddr(struct drm_gem_object *obj,
 		omap_obj->paddr_cnt++;
 
 		*paddr = omap_obj->paddr;
-	} else if (omap_obj->flags & OMAP_BO_DMA) {
+	} else if (is_contiguous(omap_obj)) {
 		*paddr = omap_obj->paddr;
 	} else {
 		ret = -EINVAL;
@@ -1138,20 +1173,6 @@ unlock:
 	return ret;
 }
 
-/* it is a bit lame to handle updates in this sort of polling way, but
- * in case of PVR, the GPU can directly update read/write complete
- * values, and not really tell us which ones it updated.. this also
- * means that sync_lock is not quite sufficient.  So we'll need to
- * do something a bit better when it comes time to add support for
- * separate 2d hw..
- */
-void omap_gem_op_update(void)
-{
-	spin_lock(&sync_lock);
-	sync_op_update();
-	spin_unlock(&sync_lock);
-}
-
 /* mark the start of read and/or write operation */
 int omap_gem_op_start(struct drm_gem_object *obj, enum omap_gem_op op)
 {
@@ -1219,7 +1240,7 @@ int omap_gem_op_sync(struct drm_gem_object *obj, enum omap_gem_op op)
  * is currently blocked..  fxn() can be called from any context
  *
  * (TODO for now fxn is called back from whichever context calls
- * omap_gem_op_update().. but this could be better defined later
+ * omap_gem_op_finish().. but this could be better defined later
  * if needed)
  *
  * TODO more code in common w/ _sync()..
@@ -1261,50 +1282,10 @@ int omap_gem_op_async(struct drm_gem_object *obj, enum omap_gem_op op,
 	return 0;
 }
 
-/* special API so PVR can update the buffer to use a sync-object allocated
- * from it's sync-obj heap.  Only used for a newly allocated (from PVR's
- * perspective) sync-object, so we overwrite the new syncobj w/ values
- * from the already allocated syncobj (if there is one)
- */
-int omap_gem_set_sync_object(struct drm_gem_object *obj, void *syncobj)
-{
-	struct omap_gem_object *omap_obj = to_omap_bo(obj);
-	int ret = 0;
-
-	spin_lock(&sync_lock);
-
-	if ((omap_obj->flags & OMAP_BO_EXT_SYNC) && !syncobj) {
-		/* clearing a previously set syncobj */
-		syncobj = kmemdup(omap_obj->sync, sizeof(*omap_obj->sync),
-				  GFP_ATOMIC);
-		if (!syncobj) {
-			ret = -ENOMEM;
-			goto unlock;
-		}
-		omap_obj->flags &= ~OMAP_BO_EXT_SYNC;
-		omap_obj->sync = syncobj;
-	} else if (syncobj && !(omap_obj->flags & OMAP_BO_EXT_SYNC)) {
-		/* replacing an existing syncobj */
-		if (omap_obj->sync) {
-			memcpy(syncobj, omap_obj->sync, sizeof(*omap_obj->sync));
-			kfree(omap_obj->sync);
-		}
-		omap_obj->flags |= OMAP_BO_EXT_SYNC;
-		omap_obj->sync = syncobj;
-	}
-
-unlock:
-	spin_unlock(&sync_lock);
-	return ret;
-}
-
 /* -----------------------------------------------------------------------------
  * Constructor & Destructor
  */
 
-/* don't call directly.. called from GEM core when it is time to actually
- * free the object..
- */
 void omap_gem_free_object(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
@@ -1324,22 +1305,23 @@ void omap_gem_free_object(struct drm_gem_object *obj)
 	 */
 	WARN_ON(omap_obj->paddr_cnt > 0);
 
-	/* don't free externally allocated backing memory */
-	if (!(omap_obj->flags & OMAP_BO_EXT_MEM)) {
-		if (omap_obj->pages)
+	if (omap_obj->pages) {
+		if (omap_obj->flags & OMAP_BO_MEM_DMABUF)
+			kfree(omap_obj->pages);
+		else
 			omap_gem_detach_pages(obj);
+	}
 
-		if (!is_shmem(obj)) {
-			dma_free_wc(dev->dev, obj->size, omap_obj->vaddr,
-				    omap_obj->paddr);
-		} else if (omap_obj->vaddr) {
-			vunmap(omap_obj->vaddr);
-		}
+	if (omap_obj->flags & OMAP_BO_MEM_DMA_API) {
+		dma_free_wc(dev->dev, obj->size, omap_obj->vaddr,
+			    omap_obj->paddr);
+	} else if (omap_obj->vaddr) {
+		vunmap(omap_obj->vaddr);
+	} else if (obj->import_attach) {
+		drm_prime_gem_destroy(obj, omap_obj->sgt);
 	}
 
-	/* don't free externally allocated syncobj */
-	if (!(omap_obj->flags & OMAP_BO_EXT_SYNC))
-		kfree(omap_obj->sync);
+	kfree(omap_obj->sync);
 
 	drm_gem_object_release(obj);
 
@@ -1357,84 +1339,160 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev,
 	size_t size;
 	int ret;
 
+	/* Validate the flags and compute the memory and cache flags. */
 	if (flags & OMAP_BO_TILED) {
 		if (!priv->usergart) {
 			dev_err(dev->dev, "Tiled buffers require DMM\n");
 			return NULL;
 		}
 
-		/* tiled buffers are always shmem paged backed.. when they are
-		 * scanned out, they are remapped into DMM/TILER
+		/*
+		 * Tiled buffers are always shmem paged backed. When they are
+		 * scanned out, they are remapped into DMM/TILER.
 		 */
 		flags &= ~OMAP_BO_SCANOUT;
+		flags |= OMAP_BO_MEM_SHMEM;
 
-		/* currently don't allow cached buffers.. there is some caching
-		 * stuff that needs to be handled better
+		/*
+		 * Currently don't allow cached buffers. There is some caching
+		 * stuff that needs to be handled better.
 		 */
 		flags &= ~(OMAP_BO_CACHED|OMAP_BO_WC|OMAP_BO_UNCACHED);
 		flags |= tiler_get_cpu_cache_flags();
-
-		/* align dimensions to slot boundaries... */
-		tiler_align(gem2fmt(flags),
-				&gsize.tiled.width, &gsize.tiled.height);
-
-		/* ...and calculate size based on aligned dimensions */
-		size = tiler_size(gem2fmt(flags),
-				gsize.tiled.width, gsize.tiled.height);
-	} else {
-		size = PAGE_ALIGN(gsize.bytes);
+	} else if ((flags & OMAP_BO_SCANOUT) && !priv->has_dmm) {
+		/*
+		 * OMAP_BO_SCANOUT hints that the buffer doesn't need to be
+		 * tiled. However, to lower the pressure on memory allocation,
+		 * use contiguous memory only if no TILER is available.
+		 */
+		flags |= OMAP_BO_MEM_DMA_API;
+	} else if (!(flags & OMAP_BO_MEM_DMABUF)) {
+		/*
+		 * All other buffers not backed by dma_buf are shmem-backed.
+		 */
+		flags |= OMAP_BO_MEM_SHMEM;
 	}
 
+	/* Allocate the initialize the OMAP GEM object. */
 	omap_obj = kzalloc(sizeof(*omap_obj), GFP_KERNEL);
 	if (!omap_obj)
 		return NULL;
 
 	obj = &omap_obj->base;
+	omap_obj->flags = flags;
 
-	if ((flags & OMAP_BO_SCANOUT) && !priv->has_dmm) {
-		/* attempt to allocate contiguous memory if we don't
-		 * have DMM for remappign discontiguous buffers
+	if (flags & OMAP_BO_TILED) {
+		/*
+		 * For tiled buffers align dimensions to slot boundaries and
+		 * calculate size based on aligned dimensions.
 		 */
-		omap_obj->vaddr =  dma_alloc_wc(dev->dev, size,
-						&omap_obj->paddr, GFP_KERNEL);
-		if (!omap_obj->vaddr) {
-			kfree(omap_obj);
+		tiler_align(gem2fmt(flags), &gsize.tiled.width,
+			    &gsize.tiled.height);
 
-			return NULL;
-		}
-
-		flags |= OMAP_BO_DMA;
-	}
+		size = tiler_size(gem2fmt(flags), gsize.tiled.width,
+				  gsize.tiled.height);
 
-	spin_lock(&priv->list_lock);
-	list_add(&omap_obj->mm_list, &priv->obj_list);
-	spin_unlock(&priv->list_lock);
-
-	omap_obj->flags = flags;
-
-	if (flags & OMAP_BO_TILED) {
 		omap_obj->width = gsize.tiled.width;
 		omap_obj->height = gsize.tiled.height;
+	} else {
+		size = PAGE_ALIGN(gsize.bytes);
 	}
 
-	if (flags & (OMAP_BO_DMA|OMAP_BO_EXT_MEM)) {
+	/* Initialize the GEM object. */
+	if (!(flags & OMAP_BO_MEM_SHMEM)) {
 		drm_gem_private_object_init(dev, obj, size);
 	} else {
 		ret = drm_gem_object_init(dev, obj, size);
 		if (ret)
-			goto fail;
+			goto err_free;
 
 		mapping = file_inode(obj->filp)->i_mapping;
 		mapping_set_gfp_mask(mapping, GFP_USER | __GFP_DMA32);
 	}
 
+	/* Allocate memory if needed. */
+	if (flags & OMAP_BO_MEM_DMA_API) {
+		omap_obj->vaddr = dma_alloc_wc(dev->dev, size,
+					       &omap_obj->paddr,
+					       GFP_KERNEL);
+		if (!omap_obj->vaddr)
+			goto err_release;
+	}
+
+	spin_lock(&priv->list_lock);
+	list_add(&omap_obj->mm_list, &priv->obj_list);
+	spin_unlock(&priv->list_lock);
+
 	return obj;
 
-fail:
-	omap_gem_free_object(obj);
+err_release:
+	drm_gem_object_release(obj);
+err_free:
+	kfree(omap_obj);
 	return NULL;
 }
 
+struct drm_gem_object *omap_gem_new_dmabuf(struct drm_device *dev, size_t size,
+					   struct sg_table *sgt)
+{
+	struct omap_drm_private *priv = dev->dev_private;
+	struct omap_gem_object *omap_obj;
+	struct drm_gem_object *obj;
+	union omap_gem_size gsize;
+
+	/* Without a DMM only physically contiguous buffers can be supported. */
+	if (sgt->orig_nents != 1 && !priv->has_dmm)
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&dev->struct_mutex);
+
+	gsize.bytes = PAGE_ALIGN(size);
+	obj = omap_gem_new(dev, gsize, OMAP_BO_MEM_DMABUF | OMAP_BO_WC);
+	if (!obj) {
+		obj = ERR_PTR(-ENOMEM);
+		goto done;
+	}
+
+	omap_obj = to_omap_bo(obj);
+	omap_obj->sgt = sgt;
+
+	if (sgt->orig_nents == 1) {
+		omap_obj->paddr = sg_dma_address(sgt->sgl);
+	} else {
+		/* Create pages list from sgt */
+		struct sg_page_iter iter;
+		struct page **pages;
+		unsigned int npages;
+		unsigned int i = 0;
+
+		npages = DIV_ROUND_UP(size, PAGE_SIZE);
+		pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+		if (!pages) {
+			omap_gem_free_object(obj);
+			obj = ERR_PTR(-ENOMEM);
+			goto done;
+		}
+
+		omap_obj->pages = pages;
+
+		for_each_sg_page(sgt->sgl, &iter, sgt->orig_nents, 0) {
+			pages[i++] = sg_page_iter_page(&iter);
+			if (i > npages)
+				break;
+		}
+
+		if (WARN_ON(i != npages)) {
+			omap_gem_free_object(obj);
+			obj = ERR_PTR(-ENOMEM);
+			goto done;
+		}
+	}
+
+done:
+	mutex_unlock(&dev->struct_mutex);
+	return obj;
+}
+
 /* convenience method to construct a GEM buffer object, and userspace handle */
 int omap_gem_new_handle(struct drm_device *dev, struct drm_file *file,
 		union omap_gem_size gsize, uint32_t flags, uint32_t *handle)
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index 27c297672076..af267c35d813 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -21,6 +21,10 @@
 
 #include "omap_drv.h"
 
+/* -----------------------------------------------------------------------------
+ * DMABUF Export
+ */
+
 static struct sg_table *omap_gem_map_dma_buf(
 		struct dma_buf_attachment *attachment,
 		enum dma_data_direction dir)
@@ -79,7 +83,7 @@ static void omap_gem_dmabuf_release(struct dma_buf *buffer)
 
 
 static int omap_gem_dmabuf_begin_cpu_access(struct dma_buf *buffer,
-		size_t start, size_t len, enum dma_data_direction dir)
+		enum dma_data_direction dir)
 {
 	struct drm_gem_object *obj = buffer->priv;
 	struct page **pages;
@@ -93,11 +97,12 @@ static int omap_gem_dmabuf_begin_cpu_access(struct dma_buf *buffer,
 	return omap_gem_get_pages(obj, &pages, true);
 }
 
-static void omap_gem_dmabuf_end_cpu_access(struct dma_buf *buffer,
-		size_t start, size_t len, enum dma_data_direction dir)
+static int omap_gem_dmabuf_end_cpu_access(struct dma_buf *buffer,
+					  enum dma_data_direction dir)
 {
 	struct drm_gem_object *obj = buffer->priv;
 	omap_gem_put_pages(obj);
+	return 0;
 }
 
 
@@ -178,15 +183,20 @@ struct dma_buf *omap_gem_prime_export(struct drm_device *dev,
 	return dma_buf_export(&exp_info);
 }
 
+/* -----------------------------------------------------------------------------
+ * DMABUF Import
+ */
+
 struct drm_gem_object *omap_gem_prime_import(struct drm_device *dev,
-		struct dma_buf *buffer)
+					     struct dma_buf *dma_buf)
 {
+	struct dma_buf_attachment *attach;
 	struct drm_gem_object *obj;
+	struct sg_table *sgt;
+	int ret;
 
-	/* is this one of own objects? */
-	if (buffer->ops == &omap_dmabuf_ops) {
-		obj = buffer->priv;
-		/* is it from our device? */
+	if (dma_buf->ops == &omap_dmabuf_ops) {
+		obj = dma_buf->priv;
 		if (obj->dev == dev) {
 			/*
 			 * Importing dmabuf exported from out own gem increases
@@ -197,9 +207,33 @@ struct drm_gem_object *omap_gem_prime_import(struct drm_device *dev,
 		}
 	}
 
-	/*
-	 * TODO add support for importing buffers from other devices..
-	 * for now we don't need this but would be nice to add eventually
-	 */
-	return ERR_PTR(-EINVAL);
+	attach = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attach))
+		return ERR_CAST(attach);
+
+	get_dma_buf(dma_buf);
+
+	sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto fail_detach;
+	}
+
+	obj = omap_gem_new_dmabuf(dev, dma_buf->size, sgt);
+	if (IS_ERR(obj)) {
+		ret = PTR_ERR(obj);
+		goto fail_unmap;
+	}
+
+	obj->import_attach = attach;
+
+	return obj;
+
+fail_unmap:
+	dma_buf_unmap_attachment(attach, sgt, DMA_BIDIRECTIONAL);
+fail_detach:
+	dma_buf_detach(dma_buf, attach);
+	dma_buf_put(dma_buf);
+
+	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c
index d75b197eff46..93ee538a99f5 100644
--- a/drivers/gpu/drm/omapdrm/omap_plane.c
+++ b/drivers/gpu/drm/omapdrm/omap_plane.c
@@ -177,6 +177,12 @@ static int omap_plane_atomic_check(struct drm_plane *plane,
 	if (state->crtc_y + state->crtc_h > crtc_state->adjusted_mode.vdisplay)
 		return -EINVAL;
 
+	if (state->fb) {
+		if (state->rotation != BIT(DRM_ROTATE_0) &&
+		    !omap_framebuffer_supports_rotation(state->fb))
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index f88a631c43ab..ceb20486dacf 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -847,6 +847,7 @@ static const struct drm_display_mode innolux_g121x1_l03_mode = {
 	.vsync_end = 768 + 38 + 1,
 	.vtotal = 768 + 38 + 1 + 0,
 	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
 };
 
 static const struct panel_desc innolux_g121x1_l03 = {
@@ -982,6 +983,29 @@ static const struct panel_desc lg_lb070wv8 = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
 };
 
+static const struct drm_display_mode lg_lp120up1_mode = {
+	.clock = 162300,
+	.hdisplay = 1920,
+	.hsync_start = 1920 + 40,
+	.hsync_end = 1920 + 40 + 40,
+	.htotal = 1920 + 40 + 40+ 80,
+	.vdisplay = 1280,
+	.vsync_start = 1280 + 4,
+	.vsync_end = 1280 + 4 + 4,
+	.vtotal = 1280 + 4 + 4 + 12,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc lg_lp120up1 = {
+	.modes = &lg_lp120up1_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 267,
+		.height = 183,
+	},
+};
+
 static const struct drm_display_mode lg_lp129qe_mode = {
 	.clock = 285250,
 	.hdisplay = 2560,
@@ -1016,6 +1040,7 @@ static const struct drm_display_mode nec_nl4827hc19_05b_mode = {
 	.vsync_end = 272 + 2 + 4,
 	.vtotal = 272 + 2 + 4 + 2,
 	.vrefresh = 74,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
 };
 
 static const struct panel_desc nec_nl4827hc19_05b = {
@@ -1176,6 +1201,42 @@ static const struct panel_desc shelly_sca07010_bfn_lnn = {
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
 };
 
+static const struct display_timing urt_umsh_8596md_timing = {
+	.pixelclock = { 33260000, 33260000, 33260000 },
+	.hactive = { 800, 800, 800 },
+	.hfront_porch = { 41, 41, 41 },
+	.hback_porch = { 216 - 128, 216 - 128, 216 - 128 },
+	.hsync_len = { 71, 128, 128 },
+	.vactive = { 480, 480, 480 },
+	.vfront_porch = { 10, 10, 10 },
+	.vback_porch = { 35 - 2, 35 - 2, 35 - 2 },
+	.vsync_len = { 2, 2, 2 },
+	.flags = DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_NEGEDGE |
+		DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW,
+};
+
+static const struct panel_desc urt_umsh_8596md_lvds = {
+	.timings = &urt_umsh_8596md_timing,
+	.num_timings = 1,
+	.bpc = 6,
+	.size = {
+		.width = 152,
+		.height = 91,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG,
+};
+
+static const struct panel_desc urt_umsh_8596md_parallel = {
+	.timings = &urt_umsh_8596md_timing,
+	.num_timings = 1,
+	.bpc = 6,
+	.size = {
+		.width = 152,
+		.height = 91,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
+};
+
 static const struct of_device_id platform_of_match[] = {
 	{
 		.compatible = "ampire,am800480r3tmqwa1h",
@@ -1256,6 +1317,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "lg,lb070wv8",
 		.data = &lg_lb070wv8,
 	}, {
+		.compatible = "lg,lp120up1",
+		.data = &lg_lp120up1,
+	}, {
 		.compatible = "lg,lp129qe",
 		.data = &lg_lp129qe,
 	}, {
@@ -1280,6 +1344,24 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "shelly,sca07010-bfn-lnn",
 		.data = &shelly_sca07010_bfn_lnn,
 	}, {
+		.compatible = "urt,umsh-8596md-t",
+		.data = &urt_umsh_8596md_parallel,
+	}, {
+		.compatible = "urt,umsh-8596md-1t",
+		.data = &urt_umsh_8596md_parallel,
+	}, {
+		.compatible = "urt,umsh-8596md-7t",
+		.data = &urt_umsh_8596md_parallel,
+	}, {
+		.compatible = "urt,umsh-8596md-11t",
+		.data = &urt_umsh_8596md_lvds,
+	}, {
+		.compatible = "urt,umsh-8596md-19t",
+		.data = &urt_umsh_8596md_lvds,
+	}, {
+		.compatible = "urt,umsh-8596md-20t",
+		.data = &urt_umsh_8596md_parallel,
+	}, {
 		/* sentinel */
 	}
 };
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 86276519b2ef..43e5f503d1c5 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -734,14 +734,6 @@ static void qxl_enc_dpms(struct drm_encoder *encoder, int mode)
 	DRM_DEBUG("\n");
 }
 
-static bool qxl_enc_mode_fixup(struct drm_encoder *encoder,
-			       const struct drm_display_mode *mode,
-			       struct drm_display_mode *adjusted_mode)
-{
-	DRM_DEBUG("\n");
-	return true;
-}
-
 static void qxl_enc_prepare(struct drm_encoder *encoder)
 {
 	DRM_DEBUG("\n");
@@ -864,7 +856,6 @@ static struct drm_encoder *qxl_best_encoder(struct drm_connector *connector)
 
 static const struct drm_encoder_helper_funcs qxl_enc_helper_funcs = {
 	.dpms = qxl_enc_dpms,
-	.mode_fixup = qxl_enc_mode_fixup,
 	.prepare = qxl_enc_prepare,
 	.mode_set = qxl_enc_mode_set,
 	.commit = qxl_enc_commit,
diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index ec1593a6a561..f66c33dd21a3 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -66,9 +66,10 @@ int atom_debug = 0;
 static int atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t * params);
 int atom_execute_table(struct atom_context *ctx, int index, uint32_t * params);
 
-static uint32_t atom_arg_mask[8] =
-    { 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000,
-0xFF000000 };
+static uint32_t atom_arg_mask[8] = {
+	0xFFFFFFFF, 0x0000FFFF, 0x00FFFF00, 0xFFFF0000,
+	0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000
+};
 static int atom_arg_shift[8] = { 0, 0, 8, 16, 0, 8, 16, 24 };
 
 static int atom_dst_to_src[8][4] = {
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 801dd60ac192..cf61e0856f4a 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -275,13 +275,13 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode)
 		if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev))
 			atombios_enable_crtc_memreq(crtc, ATOM_ENABLE);
 		atombios_blank_crtc(crtc, ATOM_DISABLE);
-		drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
+		drm_vblank_on(dev, radeon_crtc->crtc_id);
 		radeon_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
+		drm_vblank_off(dev, radeon_crtc->crtc_id);
 		if (radeon_crtc->enabled)
 			atombios_blank_crtc(crtc, ATOM_ENABLE);
 		if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev))
@@ -1665,11 +1665,11 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 }
 
 int atombios_crtc_set_base_atomic(struct drm_crtc *crtc,
-                                  struct drm_framebuffer *fb,
+				  struct drm_framebuffer *fb,
 				  int x, int y, enum mode_set_atomic state)
 {
-       struct drm_device *dev = crtc->dev;
-       struct radeon_device *rdev = dev->dev_private;
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
 
 	if (ASIC_IS_DCE4(rdev))
 		return dce4_crtc_do_set_base(crtc, fb, x, y, 1);
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 6af832545bc5..afa9db1dc0e3 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -37,10 +37,10 @@
 #define DP_DPCD_SIZE DP_RECEIVER_CAP_SIZE
 
 static char *voltage_names[] = {
-        "0.4V", "0.6V", "0.8V", "1.2V"
+	"0.4V", "0.6V", "0.8V", "1.2V"
 };
 static char *pre_emph_names[] = {
-        "0dB", "3.5dB", "6dB", "9.5dB"
+	"0dB", "3.5dB", "6dB", "9.5dB"
 };
 
 /***** radeon AUX functions *****/
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 01b20e14a247..edd05cdb0cd8 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -892,8 +892,6 @@ atombios_dig_encoder_setup2(struct drm_encoder *encoder, int action, int panel_m
 			else
 				args.v1.ucLaneNum = 4;
 
-			if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode) && (dp_clock == 270000))
-				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
 			switch (radeon_encoder->encoder_id) {
 			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
 				args.v1.ucConfig = ATOM_ENCODER_CONFIG_V2_TRANSMITTER1;
@@ -910,6 +908,10 @@ atombios_dig_encoder_setup2(struct drm_encoder *encoder, int action, int panel_m
 				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKB;
 			else
 				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_LINKA;
+
+			if (ENCODER_MODE_IS_DP(args.v1.ucEncoderMode) && (dp_clock == 270000))
+				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
+
 			break;
 		case 2:
 		case 3:
@@ -2623,16 +2625,8 @@ radeon_atom_ext_dpms(struct drm_encoder *encoder, int mode)
 
 }
 
-static bool radeon_atom_ext_mode_fixup(struct drm_encoder *encoder,
-				       const struct drm_display_mode *mode,
-				       struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static const struct drm_encoder_helper_funcs radeon_atom_ext_helper_funcs = {
 	.dpms = radeon_atom_ext_dpms,
-	.mode_fixup = radeon_atom_ext_mode_fixup,
 	.prepare = radeon_atom_ext_prepare,
 	.mode_set = radeon_atom_ext_mode_set,
 	.commit = radeon_atom_ext_commit,
diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c
index 69556f5e247e..38e5123708e7 100644
--- a/drivers/gpu/drm/radeon/btc_dpm.c
+++ b/drivers/gpu/drm/radeon/btc_dpm.c
@@ -1163,12 +1163,11 @@ u32 btc_valid_sclk[40] =
 	155000, 160000, 165000, 170000, 175000, 180000, 185000, 190000, 195000, 200000
 };
 
-static const struct radeon_blacklist_clocks btc_blacklist_clocks[] =
-{
-        { 10000, 30000, RADEON_SCLK_UP },
-        { 15000, 30000, RADEON_SCLK_UP },
-        { 20000, 30000, RADEON_SCLK_UP },
-        { 25000, 30000, RADEON_SCLK_UP }
+static const struct radeon_blacklist_clocks btc_blacklist_clocks[] = {
+	{ 10000, 30000, RADEON_SCLK_UP },
+	{ 15000, 30000, RADEON_SCLK_UP },
+	{ 20000, 30000, RADEON_SCLK_UP },
+	{ 25000, 30000, RADEON_SCLK_UP }
 };
 
 void btc_get_max_clock_from_voltage_dependency_table(struct radeon_clock_voltage_dependency_table *table,
@@ -1637,14 +1636,14 @@ static int btc_init_smc_table(struct radeon_device *rdev,
 	cypress_populate_smc_voltage_tables(rdev, table);
 
 	switch (rdev->pm.int_thermal_type) {
-        case THERMAL_TYPE_EVERGREEN:
-        case THERMAL_TYPE_EMC2103_WITH_INTERNAL:
+	case THERMAL_TYPE_EVERGREEN:
+	case THERMAL_TYPE_EMC2103_WITH_INTERNAL:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_INTERNAL;
 		break;
-        case THERMAL_TYPE_NONE:
+	case THERMAL_TYPE_NONE:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_NONE;
 		break;
-        default:
+	default:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_EXTERNAL;
 		break;
 	}
@@ -1860,37 +1859,37 @@ static bool btc_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg)
 	case MC_SEQ_RAS_TIMING >> 2:
 		*out_reg = MC_SEQ_RAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_CAS_TIMING >> 2:
+	case MC_SEQ_CAS_TIMING >> 2:
 		*out_reg = MC_SEQ_CAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING >> 2:
+	case MC_SEQ_MISC_TIMING >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING2 >> 2:
+	case MC_SEQ_MISC_TIMING2 >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING2_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D0 >> 2:
+	case MC_SEQ_RD_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D1 >> 2:
+	case MC_SEQ_RD_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D1_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D0 >> 2:
+	case MC_SEQ_WR_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D1 >> 2:
+	case MC_SEQ_WR_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D1_LP >> 2;
 		break;
-        case MC_PMG_CMD_EMRS >> 2:
+	case MC_PMG_CMD_EMRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_EMRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS >> 2:
+	case MC_PMG_CMD_MRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS1 >> 2:
+	case MC_PMG_CMD_MRS1 >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS1_LP >> 2;
 		break;
-        default:
+	default:
 		result = false;
 		break;
 	}
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 4a09947be244..35e0fc3ae8a7 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -192,9 +192,9 @@ static void ci_fan_ctrl_set_default_mode(struct radeon_device *rdev);
 
 static struct ci_power_info *ci_get_pi(struct radeon_device *rdev)
 {
-        struct ci_power_info *pi = rdev->pm.dpm.priv;
+	struct ci_power_info *pi = rdev->pm.dpm.priv;
 
-        return pi;
+	return pi;
 }
 
 static struct ci_ps *ci_get_ps(struct radeon_ps *rps)
@@ -1632,7 +1632,7 @@ static int ci_notify_hw_of_power_source(struct radeon_device *rdev,
 	else
 		power_limit = (u32)(cac_tdp_table->battery_power_limit * 256);
 
-        ci_set_power_limit(rdev, power_limit);
+	ci_set_power_limit(rdev, power_limit);
 
 	if (pi->caps_automatic_dc_transition) {
 		if (ac_power)
@@ -2017,9 +2017,9 @@ static void ci_enable_display_gap(struct radeon_device *rdev)
 {
 	u32 tmp = RREG32_SMC(CG_DISPLAY_GAP_CNTL);
 
-        tmp &= ~(DISP_GAP_MASK | DISP_GAP_MCHG_MASK);
-        tmp |= (DISP_GAP(R600_PM_DISPLAY_GAP_IGNORE) |
-                DISP_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK));
+	tmp &= ~(DISP_GAP_MASK | DISP_GAP_MCHG_MASK);
+	tmp |= (DISP_GAP(R600_PM_DISPLAY_GAP_IGNORE) |
+		DISP_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK));
 
 	WREG32_SMC(CG_DISPLAY_GAP_CNTL, tmp);
 }
@@ -2938,8 +2938,8 @@ static int ci_populate_single_memory_level(struct radeon_device *rdev,
 
 	memory_level->MinVddc = cpu_to_be32(memory_level->MinVddc * VOLTAGE_SCALE);
 	memory_level->MinVddcPhases = cpu_to_be32(memory_level->MinVddcPhases);
-        memory_level->MinVddci = cpu_to_be32(memory_level->MinVddci * VOLTAGE_SCALE);
-        memory_level->MinMvdd = cpu_to_be32(memory_level->MinMvdd * VOLTAGE_SCALE);
+	memory_level->MinVddci = cpu_to_be32(memory_level->MinVddci * VOLTAGE_SCALE);
+	memory_level->MinMvdd = cpu_to_be32(memory_level->MinMvdd * VOLTAGE_SCALE);
 
 	memory_level->MclkFrequency = cpu_to_be32(memory_level->MclkFrequency);
 	memory_level->ActivityLevel = cpu_to_be16(memory_level->ActivityLevel);
@@ -3152,7 +3152,7 @@ static int ci_calculate_sclk_params(struct radeon_device *rdev,
 
 	spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK;
 	spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv);
-        spll_func_cntl_3 |= SPLL_DITHEN;
+	spll_func_cntl_3 |= SPLL_DITHEN;
 
 	if (pi->caps_sclk_ss_support) {
 		struct radeon_atom_ss ss;
@@ -3229,7 +3229,7 @@ static int ci_populate_single_graphic_level(struct radeon_device *rdev,
 	graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
 	graphic_level->Flags = cpu_to_be32(graphic_level->Flags);
-        graphic_level->MinVddc = cpu_to_be32(graphic_level->MinVddc * VOLTAGE_SCALE);
+	graphic_level->MinVddc = cpu_to_be32(graphic_level->MinVddc * VOLTAGE_SCALE);
 	graphic_level->MinVddcPhases = cpu_to_be32(graphic_level->MinVddcPhases);
 	graphic_level->SclkFrequency = cpu_to_be32(graphic_level->SclkFrequency);
 	graphic_level->ActivityLevel = cpu_to_be16(graphic_level->ActivityLevel);
@@ -4393,7 +4393,7 @@ static bool ci_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg)
 		break;
 	case MC_SEQ_CAS_TIMING >> 2:
 		*out_reg = MC_SEQ_CAS_TIMING_LP >> 2;
-            break;
+		break;
 	case MC_SEQ_MISC_TIMING >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING_LP >> 2;
 		break;
@@ -4625,7 +4625,7 @@ static int ci_initialize_mc_reg_table(struct radeon_device *rdev)
 	if (ret)
 		goto init_mc_done;
 
-        ret = ci_copy_vbios_mc_reg_table(table, ci_table);
+	ret = ci_copy_vbios_mc_reg_table(table, ci_table);
 	if (ret)
 		goto init_mc_done;
 
@@ -4916,7 +4916,7 @@ static int ci_set_private_data_variables_based_on_pptable(struct radeon_device *
 		allowed_mclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].clk;
 	rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddc =
 		allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v;
-        rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddci =
+	rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddci =
 		allowed_mclk_vddci_table->entries[allowed_mclk_vddci_table->count - 1].v;
 
 	return 0;
@@ -5517,7 +5517,7 @@ static int ci_parse_power_table(struct radeon_device *rdev)
 	struct _NonClockInfoArray *non_clock_info_array;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 	struct ci_ps *ps;
@@ -5693,8 +5693,8 @@ int ci_dpm_init(struct radeon_device *rdev)
 		return ret;
 	}
 
-        pi->dll_default_on = false;
-        pi->sram_end = SMC_RAM_END;
+	pi->dll_default_on = false;
+	pi->sram_end = SMC_RAM_END;
 
 	pi->activity_target[0] = CISLAND_TARGETACTIVITY_DFLT;
 	pi->activity_target[1] = CISLAND_TARGETACTIVITY_DFLT;
@@ -5734,9 +5734,9 @@ int ci_dpm_init(struct radeon_device *rdev)
 	pi->caps_uvd_dpm = true;
 	pi->caps_vce_dpm = true;
 
-        ci_get_leakage_voltages(rdev);
-        ci_patch_dependency_tables_with_leakage(rdev);
-        ci_set_private_data_variables_based_on_pptable(rdev);
+	ci_get_leakage_voltages(rdev);
+	ci_patch_dependency_tables_with_leakage(rdev);
+	ci_set_private_data_variables_based_on_pptable(rdev);
 
 	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries =
 		kzalloc(4 * sizeof(struct radeon_clock_voltage_dependency_entry), GFP_KERNEL);
@@ -5839,7 +5839,7 @@ int ci_dpm_init(struct radeon_device *rdev)
 			pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2;
 		else
 			rdev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL;
-        }
+	}
 
 	if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_MVDDCONTROL) {
 		if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_MVDDC, VOLTAGE_OBJ_GPIO_LUT))
@@ -5860,7 +5860,7 @@ int ci_dpm_init(struct radeon_device *rdev)
 #endif
 
 	if (atom_parse_data_header(rdev->mode_info.atom_context, index, &size,
-                                   &frev, &crev, &data_offset)) {
+				   &frev, &crev, &data_offset)) {
 		pi->caps_sclk_ss_support = true;
 		pi->caps_mclk_ss_support = true;
 		pi->dynamic_ss = true;
diff --git a/drivers/gpu/drm/radeon/ci_smc.c b/drivers/gpu/drm/radeon/ci_smc.c
index 35c6f648ba04..24760ee3063e 100644
--- a/drivers/gpu/drm/radeon/ci_smc.c
+++ b/drivers/gpu/drm/radeon/ci_smc.c
@@ -194,11 +194,11 @@ PPSMC_Result ci_wait_for_smc_inactive(struct radeon_device *rdev)
 		return PPSMC_Result_OK;
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
-                tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0);
-                if ((tmp & CKEN) == 0)
+		tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0);
+		if ((tmp & CKEN) == 0)
 			break;
-                udelay(1);
-        }
+		udelay(1);
+	}
 
 	return PPSMC_Result_OK;
 }
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 4c30d8c65558..8ac82df2efde 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -1712,7 +1712,7 @@ static void cik_init_golden_registers(struct radeon_device *rdev)
  */
 u32 cik_get_xclk(struct radeon_device *rdev)
 {
-        u32 reference_clock = rdev->clock.spll.reference_freq;
+	u32 reference_clock = rdev->clock.spll.reference_freq;
 
 	if (rdev->flags & RADEON_IS_IGP) {
 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
@@ -2343,9 +2343,13 @@ out:
  */
 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
 {
-	const u32 num_tile_mode_states = 32;
-	const u32 num_secondary_tile_mode_states = 16;
-	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
+	u32 *tile = rdev->config.cik.tile_mode_array;
+	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
+	const u32 num_tile_mode_states =
+			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
+	const u32 num_secondary_tile_mode_states =
+			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
+	u32 reg_offset, split_equal_to_row_size;
 	u32 num_pipe_configs;
 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
 		rdev->config.cik.max_shader_engines;
@@ -2367,1032 +2371,669 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev)
 	if (num_pipe_configs > 8)
 		num_pipe_configs = 16;
 
-	if (num_pipe_configs == 16) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
-				break;
-			case 1:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
-				break;
-			case 2:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 3:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
-				break;
-			case 4:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 5:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 6:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 7:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 8:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
-				break;
-			case 9:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
-				break;
-			case 10:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 11:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 12:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 13:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-				break;
-			case 14:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 16:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 17:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 27:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
-				break;
-			case 28:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 29:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 30:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 4:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 5:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 9:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 10:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 14:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-	} else if (num_pipe_configs == 8) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
-				break;
-			case 1:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
-				break;
-			case 2:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 3:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
-				break;
-			case 4:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 5:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 6:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 7:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 8:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
-				break;
-			case 9:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
-				break;
-			case 10:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 11:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 12:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 13:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-				break;
-			case 14:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 16:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 17:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 27:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
-				break;
-			case 28:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 29:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 30:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 4:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 5:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 9:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 10:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 14:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_2_BANK));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-	} else if (num_pipe_configs == 4) {
+	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+		tile[reg_offset] = 0;
+	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+		macrotile[reg_offset] = 0;
+
+	switch(num_pipe_configs) {
+	case 16:
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+
+		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			   NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			   NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			   NUM_BANKS(ADDR_SURF_2_BANK));
+		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+			   NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			    NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			    NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			    NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			    NUM_BANKS(ADDR_SURF_2_BANK));
+		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+			    NUM_BANKS(ADDR_SURF_2_BANK));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
+		break;
+
+	case 8:
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+
+		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK));
+		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_2_BANK));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
+		break;
+
+	case 4:
 		if (num_rbs == 4) {
-			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-				switch (reg_offset) {
-				case 0:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
-					break;
-				case 1:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
-					break;
-				case 2:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-					break;
-				case 3:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
-					break;
-				case 4:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(split_equal_to_row_size));
-					break;
-				case 5:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-					break;
-				case 6:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-					break;
-				case 7:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 TILE_SPLIT(split_equal_to_row_size));
-					break;
-				case 8:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
-					break;
-				case 9:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
-					break;
-				case 10:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 11:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 12:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 13:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-					break;
-				case 14:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 16:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 17:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 27:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
-					break;
-				case 28:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 29:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 30:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				default:
-					gb_tile_moden = 0;
-					break;
-				}
-				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
-				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-			}
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+
 		} else if (num_rbs < 4) {
-			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-				switch (reg_offset) {
-				case 0:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
-					break;
-				case 1:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
-					break;
-				case 2:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-					break;
-				case 3:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
-					break;
-				case 4:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(split_equal_to_row_size));
-					break;
-				case 5:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-					break;
-				case 6:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-					break;
-				case 7:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 TILE_SPLIT(split_equal_to_row_size));
-					break;
-				case 8:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
-					break;
-				case 9:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
-					break;
-				case 10:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 11:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 12:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 13:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-					break;
-				case 14:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 16:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 17:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 27:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
-					break;
-				case 28:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 29:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				case 30:
-					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-					break;
-				default:
-					gb_tile_moden = 0;
-					break;
-				}
-				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
-				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-			}
-		}
-		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 4:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 5:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 9:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 10:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 14:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
-						 NUM_BANKS(ADDR_SURF_4_BANK));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-	} else if (num_pipe_configs == 2) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
-				break;
-			case 1:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
-				break;
-			case 2:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 3:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
-				break;
-			case 4:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 5:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
-				break;
-			case 6:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
-				break;
-			case 7:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 TILE_SPLIT(split_equal_to_row_size));
-				break;
-			case 8:
-				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						PIPE_CONFIG(ADDR_SURF_P2);
-				break;
-			case 9:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2));
-				break;
-			case 10:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 11:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 12:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 13:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
-				break;
-			case 14:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 16:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 17:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 27:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2));
-				break;
-			case 28:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 29:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			case 30:
-				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P2) |
-						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
 		}
-		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 1:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 2:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 3:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 4:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 5:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 6:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			case 8:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 9:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 10:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 11:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 12:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 13:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
-						 NUM_BANKS(ADDR_SURF_16_BANK));
-				break;
-			case 14:
-				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
-						 NUM_BANKS(ADDR_SURF_8_BANK));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-	} else
+
+		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+				NUM_BANKS(ADDR_SURF_4_BANK));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
+		break;
+
+	case 2:
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
+		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
+		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2) |
+			   TILE_SPLIT(split_equal_to_row_size));
+		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   PIPE_CONFIG(ADDR_SURF_P2);
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P2));
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2));
+		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+			    PIPE_CONFIG(ADDR_SURF_P2) |
+			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+
+		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+				NUM_BANKS(ADDR_SURF_16_BANK));
+		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+				NUM_BANKS(ADDR_SURF_8_BANK));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
+		break;
+
+	default:
 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
+	}
 }
 
 /**
@@ -4219,13 +3860,20 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 		return r;
 	}
-	r = radeon_fence_wait(ib.fence, false);
-	if (r) {
+	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
+		RADEON_USEC_IB_TEST_TIMEOUT));
+	if (r < 0) {
 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
 		radeon_scratch_free(rdev, scratch);
 		radeon_ib_free(rdev, &ib);
 		return r;
+	} else if (r == 0) {
+		DRM_ERROR("radeon: fence wait timed out.\n");
+		radeon_scratch_free(rdev, scratch);
+		radeon_ib_free(rdev, &ib);
+		return -ETIMEDOUT;
 	}
+	r = 0;
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
@@ -9702,13 +9350,13 @@ uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
 	mutex_lock(&rdev->gpu_clock_mutex);
 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
-	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
 	mutex_unlock(&rdev->gpu_clock_mutex);
 	return clock;
 }
 
 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
-                              u32 cntl_reg, u32 status_reg)
+			     u32 cntl_reg, u32 status_reg)
 {
 	int r, i;
 	struct atom_clock_dividers dividers;
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index d16f2eebd95e..9c351dc8a9e0 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -737,11 +737,16 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 		return r;
 	}
-	r = radeon_fence_wait(ib.fence, false);
-	if (r) {
+	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
+		RADEON_USEC_IB_TEST_TIMEOUT));
+	if (r < 0) {
 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
 		return r;
+	} else if (r == 0) {
+		DRM_ERROR("radeon: fence wait timed out.\n");
+		return -ETIMEDOUT;
 	}
+	r = 0;
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
 		if (tmp == 0xDEADBEEF)
diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c
index ca058589ddef..a4edd0702718 100644
--- a/drivers/gpu/drm/radeon/cypress_dpm.c
+++ b/drivers/gpu/drm/radeon/cypress_dpm.c
@@ -1620,14 +1620,14 @@ static int cypress_init_smc_table(struct radeon_device *rdev,
 	cypress_populate_smc_voltage_tables(rdev, table);
 
 	switch (rdev->pm.int_thermal_type) {
-        case THERMAL_TYPE_EVERGREEN:
-        case THERMAL_TYPE_EMC2103_WITH_INTERNAL:
+	case THERMAL_TYPE_EVERGREEN:
+	case THERMAL_TYPE_EMC2103_WITH_INTERNAL:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_INTERNAL;
 		break;
-        case THERMAL_TYPE_NONE:
+	case THERMAL_TYPE_NONE:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_NONE;
 		break;
-        default:
+	default:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_EXTERNAL;
 		break;
 	}
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 2ad462896896..76c4bdf21b20 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1140,7 +1140,7 @@ static int sumo_set_uvd_clock(struct radeon_device *rdev, u32 clock,
 	int r, i;
 	struct atom_clock_dividers dividers;
 
-        r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
 					   clock, false, &dividers);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 46f87d4aaf31..9e93205eb9e4 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -1816,8 +1816,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         (idx_value & 0xfffffff0) +
-		         ((u64)(tmp & 0xff) << 32);
+			 (idx_value & 0xfffffff0) +
+			 ((u64)(tmp & 0xff) << 32);
 
 		ib[idx + 0] = offset;
 		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
@@ -1862,8 +1862,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         idx_value +
-		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+			 idx_value +
+			 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
 
 		ib[idx+0] = offset;
 		ib[idx+1] = upper_32_bits(offset) & 0xff;
@@ -1897,8 +1897,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         idx_value +
-		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+			 idx_value +
+			 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
 
 		ib[idx+0] = offset;
 		ib[idx+1] = upper_32_bits(offset) & 0xff;
@@ -1925,8 +1925,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         radeon_get_ib_value(p, idx+1) +
-		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+			 radeon_get_ib_value(p, idx+1) +
+			 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 		ib[idx+1] = offset;
 		ib[idx+2] = upper_32_bits(offset) & 0xff;
@@ -2098,8 +2098,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			}
 
 			offset = reloc->gpu_offset +
-			         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
-			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+				 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+				 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
@@ -2239,8 +2239,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				return -EINVAL;
 			}
 			offset = reloc->gpu_offset +
-			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
-			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+				 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
+				 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 			ib[idx+1] = offset & 0xfffffff8;
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
@@ -2261,8 +2261,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
-		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+			 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+			 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 		ib[idx+1] = offset & 0xfffffffc;
 		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
@@ -2283,8 +2283,8 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
-		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+			 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+			 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 		ib[idx+1] = offset & 0xfffffffc;
 		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 3cf04a2f44bb..f766c967a284 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -206,7 +206,7 @@ void evergreen_hdmi_write_sad_regs(struct drm_encoder *encoder,
  * build a AVI Info Frame
  */
 void evergreen_set_avi_packet(struct radeon_device *rdev, u32 offset,
-    unsigned char *buffer, size_t size)
+			      unsigned char *buffer, size_t size)
 {
 	uint8_t *frame = buffer + 3;
 
diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c
index 2d71da448487..d0240743a17c 100644
--- a/drivers/gpu/drm/radeon/kv_dpm.c
+++ b/drivers/gpu/drm/radeon/kv_dpm.c
@@ -2640,7 +2640,7 @@ static int kv_parse_power_table(struct radeon_device *rdev)
 	struct _NonClockInfoArray *non_clock_info_array;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 	struct kv_ps *ps;
@@ -2738,7 +2738,7 @@ int kv_dpm_init(struct radeon_device *rdev)
 	for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++)
 		pi->at[i] = TRINITY_AT_DFLT;
 
-        pi->sram_end = SMC_RAM_END;
+	pi->sram_end = SMC_RAM_END;
 
 	/* Enabling nb dpm on an asrock system prevents dpm from working */
 	if (rdev->pdev->subsystem_vendor == 0x1849)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 158872eb78e4..b88d63c9be99 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1257,7 +1257,7 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 		tmp = RREG32_CG(CG_CGTT_LOCAL_0);
 		tmp &= ~0x00380000;
 		WREG32_CG(CG_CGTT_LOCAL_0, tmp);
-                tmp = RREG32_CG(CG_CGTT_LOCAL_1);
+		tmp = RREG32_CG(CG_CGTT_LOCAL_1);
 		tmp &= ~0x0e000000;
 		WREG32_CG(CG_CGTT_LOCAL_1, tmp);
 	}
@@ -2634,7 +2634,7 @@ int tn_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
 	struct atom_clock_dividers dividers;
 	int r, i;
 
-        r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
 					   ecclk, false, &dividers);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index c3d531a1114b..4a601f990562 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -725,9 +725,9 @@ extern int ni_mc_load_microcode(struct radeon_device *rdev);
 
 struct ni_power_info *ni_get_pi(struct radeon_device *rdev)
 {
-        struct ni_power_info *pi = rdev->pm.dpm.priv;
+	struct ni_power_info *pi = rdev->pm.dpm.priv;
 
-        return pi;
+	return pi;
 }
 
 struct ni_ps *ni_get_ps(struct radeon_ps *rps)
@@ -1096,9 +1096,9 @@ static void ni_stop_smc(struct radeon_device *rdev)
 
 static int ni_process_firmware_header(struct radeon_device *rdev)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
-        struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	u32 tmp;
 	int ret;
 
@@ -1202,14 +1202,14 @@ static int ni_enter_ulp_state(struct radeon_device *rdev)
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 
 	if (pi->gfx_clock_gating) {
-                WREG32_P(SCLK_PWRMGT_CNTL, 0, ~DYN_GFX_CLK_OFF_EN);
+		WREG32_P(SCLK_PWRMGT_CNTL, 0, ~DYN_GFX_CLK_OFF_EN);
 		WREG32_P(SCLK_PWRMGT_CNTL, GFX_CLK_FORCE_ON, ~GFX_CLK_FORCE_ON);
-                WREG32_P(SCLK_PWRMGT_CNTL, 0, ~GFX_CLK_FORCE_ON);
+		WREG32_P(SCLK_PWRMGT_CNTL, 0, ~GFX_CLK_FORCE_ON);
 		RREG32(GB_ADDR_CONFIG);
-        }
+	}
 
 	WREG32_P(SMC_MSG, HOST_SMC_MSG(PPSMC_MSG_SwitchToMinimumPower),
-                 ~HOST_SMC_MSG_MASK);
+		 ~HOST_SMC_MSG_MASK);
 
 	udelay(25000);
 
@@ -1321,12 +1321,12 @@ static void ni_populate_mvdd_value(struct radeon_device *rdev,
 				   u32 mclk,
 				   NISLANDS_SMC_VOLTAGE_VALUE *voltage)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 
 	if (!pi->mvdd_control) {
 		voltage->index = eg_pi->mvdd_high_index;
-                voltage->value = cpu_to_be16(MVDD_HIGH_VALUE);
+		voltage->value = cpu_to_be16(MVDD_HIGH_VALUE);
 		return;
 	}
 
@@ -1510,47 +1510,47 @@ int ni_copy_and_switch_arb_sets(struct radeon_device *rdev,
 	u32 mc_cg_config;
 
 	switch (arb_freq_src) {
-        case MC_CG_ARB_FREQ_F0:
+	case MC_CG_ARB_FREQ_F0:
 		mc_arb_dram_timing  = RREG32(MC_ARB_DRAM_TIMING);
 		mc_arb_dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2);
 		burst_time = (RREG32(MC_ARB_BURST_TIME) & STATE0_MASK) >> STATE0_SHIFT;
 		break;
-        case MC_CG_ARB_FREQ_F1:
+	case MC_CG_ARB_FREQ_F1:
 		mc_arb_dram_timing  = RREG32(MC_ARB_DRAM_TIMING_1);
 		mc_arb_dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2_1);
 		burst_time = (RREG32(MC_ARB_BURST_TIME) & STATE1_MASK) >> STATE1_SHIFT;
 		break;
-        case MC_CG_ARB_FREQ_F2:
+	case MC_CG_ARB_FREQ_F2:
 		mc_arb_dram_timing  = RREG32(MC_ARB_DRAM_TIMING_2);
 		mc_arb_dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2_2);
 		burst_time = (RREG32(MC_ARB_BURST_TIME) & STATE2_MASK) >> STATE2_SHIFT;
 		break;
-        case MC_CG_ARB_FREQ_F3:
+	case MC_CG_ARB_FREQ_F3:
 		mc_arb_dram_timing  = RREG32(MC_ARB_DRAM_TIMING_3);
 		mc_arb_dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2_3);
 		burst_time = (RREG32(MC_ARB_BURST_TIME) & STATE3_MASK) >> STATE3_SHIFT;
 		break;
-        default:
+	default:
 		return -EINVAL;
 	}
 
 	switch (arb_freq_dest) {
-        case MC_CG_ARB_FREQ_F0:
+	case MC_CG_ARB_FREQ_F0:
 		WREG32(MC_ARB_DRAM_TIMING, mc_arb_dram_timing);
 		WREG32(MC_ARB_DRAM_TIMING2, mc_arb_dram_timing2);
 		WREG32_P(MC_ARB_BURST_TIME, STATE0(burst_time), ~STATE0_MASK);
 		break;
-        case MC_CG_ARB_FREQ_F1:
+	case MC_CG_ARB_FREQ_F1:
 		WREG32(MC_ARB_DRAM_TIMING_1, mc_arb_dram_timing);
 		WREG32(MC_ARB_DRAM_TIMING2_1, mc_arb_dram_timing2);
 		WREG32_P(MC_ARB_BURST_TIME, STATE1(burst_time), ~STATE1_MASK);
 		break;
-        case MC_CG_ARB_FREQ_F2:
+	case MC_CG_ARB_FREQ_F2:
 		WREG32(MC_ARB_DRAM_TIMING_2, mc_arb_dram_timing);
 		WREG32(MC_ARB_DRAM_TIMING2_2, mc_arb_dram_timing2);
 		WREG32_P(MC_ARB_BURST_TIME, STATE2(burst_time), ~STATE2_MASK);
 		break;
-        case MC_CG_ARB_FREQ_F3:
+	case MC_CG_ARB_FREQ_F3:
 		WREG32(MC_ARB_DRAM_TIMING_3, mc_arb_dram_timing);
 		WREG32(MC_ARB_DRAM_TIMING2_3, mc_arb_dram_timing2);
 		WREG32_P(MC_ARB_BURST_TIME, STATE3(burst_time), ~STATE3_MASK);
@@ -1621,9 +1621,7 @@ static int ni_populate_memory_timing_parameters(struct radeon_device *rdev,
 		(u8)rv770_calculate_memory_refresh_rate(rdev, pl->sclk);
 
 
-	radeon_atom_set_engine_dram_timings(rdev,
-                                            pl->sclk,
-                                            pl->mclk);
+	radeon_atom_set_engine_dram_timings(rdev, pl->sclk, pl->mclk);
 
 	dram_timing = RREG32(MC_ARB_DRAM_TIMING);
 	dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2);
@@ -1867,9 +1865,9 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
 
 	mpll_ad_func_cntl_2 |= BIAS_GEN_PDNB | RESET_EN;
 
-        if (pi->mem_gddr5)
-                mpll_dq_func_cntl &= ~PDNB;
-        mpll_dq_func_cntl_2 |= BIAS_GEN_PDNB | RESET_EN | BYPASS;
+	if (pi->mem_gddr5)
+		mpll_dq_func_cntl &= ~PDNB;
+	mpll_dq_func_cntl_2 |= BIAS_GEN_PDNB | RESET_EN | BYPASS;
 
 
 	mclk_pwrmgt_cntl |= (MRDCKA0_RESET |
@@ -1891,15 +1889,15 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
 			      MRDCKD1_PDNB);
 
 	dll_cntl |= (MRDCKA0_BYPASS |
-                     MRDCKA1_BYPASS |
-                     MRDCKB0_BYPASS |
-                     MRDCKB1_BYPASS |
-                     MRDCKC0_BYPASS |
-                     MRDCKC1_BYPASS |
-                     MRDCKD0_BYPASS |
-                     MRDCKD1_BYPASS);
-
-        spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
+		     MRDCKA1_BYPASS |
+		     MRDCKB0_BYPASS |
+		     MRDCKB1_BYPASS |
+		     MRDCKC0_BYPASS |
+		     MRDCKC1_BYPASS |
+		     MRDCKD0_BYPASS |
+		     MRDCKD1_BYPASS);
+
+	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
 	spll_func_cntl_2 |= SCLK_MUX_SEL(4);
 
 	table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl);
@@ -2089,7 +2087,7 @@ static int ni_populate_sclk_value(struct radeon_device *rdev,
 
 static int ni_init_smc_spll_table(struct radeon_device *rdev)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	SMC_NISLANDS_SPLL_DIV_TABLE *spll_table;
 	NISLANDS_SMC_SCLK_VALUE sclk_params;
@@ -2311,8 +2309,8 @@ static int ni_convert_power_level_to_smc(struct radeon_device *rdev,
 					 NISLANDS_SMC_HW_PERFORMANCE_LEVEL *level)
 {
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
-        struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	int ret;
 	bool dll_state_on;
 	u16 std_vddc;
@@ -2391,8 +2389,8 @@ static int ni_populate_smc_t(struct radeon_device *rdev,
 			     struct radeon_ps *radeon_state,
 			     NISLANDS_SMC_SWSTATE *smc_state)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
-        struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 	struct ni_ps *state = ni_get_ps(radeon_state);
 	u32 a_t;
 	u32 t_l, t_h;
@@ -2451,8 +2449,8 @@ static int ni_populate_power_containment_values(struct radeon_device *rdev,
 						struct radeon_ps *radeon_state,
 						NISLANDS_SMC_SWSTATE *smc_state)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
-        struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	struct ni_ps *state = ni_get_ps(radeon_state);
 	u32 prev_sclk;
@@ -2595,7 +2593,7 @@ static int ni_enable_power_containment(struct radeon_device *rdev,
 				       struct radeon_ps *radeon_new_state,
 				       bool enable)
 {
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	PPSMC_Result smc_result;
 	int ret = 0;
 
@@ -2625,7 +2623,7 @@ static int ni_convert_power_state_to_smc(struct radeon_device *rdev,
 					 struct radeon_ps *radeon_state,
 					 NISLANDS_SMC_SWSTATE *smc_state)
 {
-        struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	struct ni_ps *state = ni_get_ps(radeon_state);
 	int i, ret;
@@ -2770,46 +2768,46 @@ static bool ni_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg)
 	bool result = true;
 
 	switch (in_reg) {
-        case  MC_SEQ_RAS_TIMING >> 2:
+	case  MC_SEQ_RAS_TIMING >> 2:
 		*out_reg = MC_SEQ_RAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_CAS_TIMING >> 2:
+	case MC_SEQ_CAS_TIMING >> 2:
 		*out_reg = MC_SEQ_CAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING >> 2:
+	case MC_SEQ_MISC_TIMING >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING2 >> 2:
+	case MC_SEQ_MISC_TIMING2 >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING2_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D0 >> 2:
+	case MC_SEQ_RD_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D1 >> 2:
+	case MC_SEQ_RD_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D1_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D0 >> 2:
+	case MC_SEQ_WR_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D1 >> 2:
+	case MC_SEQ_WR_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D1_LP >> 2;
 		break;
-        case MC_PMG_CMD_EMRS >> 2:
+	case MC_PMG_CMD_EMRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_EMRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS >> 2:
+	case MC_PMG_CMD_MRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS1 >> 2:
+	case MC_PMG_CMD_MRS1 >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS1_LP >> 2;
 		break;
-        case MC_SEQ_PMG_TIMING >> 2:
+	case MC_SEQ_PMG_TIMING >> 2:
 		*out_reg = MC_SEQ_PMG_TIMING_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS2 >> 2:
+	case MC_PMG_CMD_MRS2 >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS2_LP >> 2;
 		break;
-        default:
+	default:
 		result = false;
 		break;
 	}
@@ -2876,9 +2874,9 @@ static int ni_initialize_mc_reg_table(struct radeon_device *rdev)
 	struct ni_mc_reg_table *ni_table = &ni_pi->mc_reg_table;
 	u8 module_index = rv770_get_memory_module_index(rdev);
 
-        table = kzalloc(sizeof(struct atom_mc_reg_table), GFP_KERNEL);
-        if (!table)
-                return -ENOMEM;
+	table = kzalloc(sizeof(struct atom_mc_reg_table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
 
 	WREG32(MC_SEQ_RAS_TIMING_LP, RREG32(MC_SEQ_RAS_TIMING));
 	WREG32(MC_SEQ_CAS_TIMING_LP, RREG32(MC_SEQ_CAS_TIMING));
@@ -2896,25 +2894,25 @@ static int ni_initialize_mc_reg_table(struct radeon_device *rdev)
 
 	ret = radeon_atom_init_mc_reg_table(rdev, module_index, table);
 
-        if (ret)
-                goto init_mc_done;
+	if (ret)
+		goto init_mc_done;
 
 	ret = ni_copy_vbios_mc_reg_table(table, ni_table);
 
-        if (ret)
-                goto init_mc_done;
+	if (ret)
+		goto init_mc_done;
 
 	ni_set_s0_mc_reg_index(ni_table);
 
 	ret = ni_set_mc_special_registers(rdev, ni_table);
 
-        if (ret)
-                goto init_mc_done;
+	if (ret)
+		goto init_mc_done;
 
 	ni_set_valid_flag(ni_table);
 
 init_mc_done:
-        kfree(table);
+	kfree(table);
 
 	return ret;
 }
@@ -2994,7 +2992,7 @@ static int ni_populate_mc_reg_table(struct radeon_device *rdev,
 {
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	struct ni_ps *boot_state = ni_get_ps(radeon_boot_state);
 	SMC_NIslands_MCRegisters *mc_reg_table = &ni_pi->smc_mc_reg_table;
 
@@ -3025,7 +3023,7 @@ static int ni_upload_mc_reg_table(struct radeon_device *rdev,
 {
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	struct ni_ps *ni_new_state = ni_get_ps(radeon_new_state);
 	SMC_NIslands_MCRegisters *mc_reg_table = &ni_pi->smc_mc_reg_table;
 	u16 address;
@@ -3142,7 +3140,7 @@ static int ni_initialize_smc_cac_tables(struct radeon_device *rdev)
 	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 	PP_NIslands_CACTABLES *cac_tables = NULL;
 	int i, ret;
-        u32 reg;
+	u32 reg;
 
 	if (ni_pi->enable_cac == false)
 		return 0;
@@ -3422,13 +3420,13 @@ static int ni_pcie_performance_request(struct radeon_device *rdev,
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 
 	if ((perf_req == PCIE_PERF_REQ_PECI_GEN1) ||
-            (perf_req == PCIE_PERF_REQ_PECI_GEN2)) {
+	    (perf_req == PCIE_PERF_REQ_PECI_GEN2)) {
 		if (eg_pi->pcie_performance_request_registered == false)
 			radeon_acpi_pcie_notify_device_ready(rdev);
 		eg_pi->pcie_performance_request_registered = true;
 		return radeon_acpi_pcie_performance_request(rdev, perf_req, advertise);
 	} else if ((perf_req == PCIE_PERF_REQ_REMOVE_REGISTRY) &&
-                   eg_pi->pcie_performance_request_registered) {
+		    eg_pi->pcie_performance_request_registered) {
 		eg_pi->pcie_performance_request_registered = false;
 		return radeon_acpi_pcie_performance_request(rdev, perf_req, advertise);
 	}
@@ -3441,12 +3439,12 @@ static int ni_advertise_gen2_capability(struct radeon_device *rdev)
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	u32 tmp;
 
-        tmp = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+	tmp = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 
-        if ((tmp & LC_OTHER_SIDE_EVER_SENT_GEN2) &&
-            (tmp & LC_OTHER_SIDE_SUPPORTS_GEN2))
-                pi->pcie_gen2 = true;
-        else
+	if ((tmp & LC_OTHER_SIDE_EVER_SENT_GEN2) &&
+	    (tmp & LC_OTHER_SIDE_SUPPORTS_GEN2))
+		pi->pcie_gen2 = true;
+	else
 		pi->pcie_gen2 = false;
 
 	if (!pi->pcie_gen2)
@@ -3458,8 +3456,8 @@ static int ni_advertise_gen2_capability(struct radeon_device *rdev)
 static void ni_enable_bif_dynamic_pcie_gen2(struct radeon_device *rdev,
 					    bool enable)
 {
-        struct rv7xx_power_info *pi = rv770_get_pi(rdev);
-        u32 tmp, bif;
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	u32 tmp, bif;
 
 	tmp = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
 
@@ -3502,7 +3500,7 @@ static void ni_enable_dynamic_pcie_gen2(struct radeon_device *rdev,
 	if (enable)
 		WREG32_P(GENERAL_PWRMGT, ENABLE_GEN2PCIE, ~ENABLE_GEN2PCIE);
 	else
-                WREG32_P(GENERAL_PWRMGT, 0, ~ENABLE_GEN2PCIE);
+		WREG32_P(GENERAL_PWRMGT, 0, ~ENABLE_GEN2PCIE);
 }
 
 void ni_set_uvd_clock_before_set_eng_clock(struct radeon_device *rdev,
@@ -3563,7 +3561,7 @@ void ni_update_current_ps(struct radeon_device *rdev,
 {
 	struct ni_ps *new_ps = ni_get_ps(rps);
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 
 	eg_pi->current_rps = *rps;
 	ni_pi->current_ps = *new_ps;
@@ -3575,7 +3573,7 @@ void ni_update_requested_ps(struct radeon_device *rdev,
 {
 	struct ni_ps *new_ps = ni_get_ps(rps);
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
-        struct ni_power_info *ni_pi = ni_get_pi(rdev);
+	struct ni_power_info *ni_pi = ni_get_pi(rdev);
 
 	eg_pi->requested_rps = *rps;
 	ni_pi->requested_ps = *new_ps;
@@ -3591,8 +3589,8 @@ int ni_dpm_enable(struct radeon_device *rdev)
 
 	if (pi->gfx_clock_gating)
 		ni_cg_clockgating_default(rdev);
-        if (btc_dpm_enabled(rdev))
-                return -EINVAL;
+	if (btc_dpm_enabled(rdev))
+		return -EINVAL;
 	if (pi->mg_clock_gating)
 		ni_mg_clockgating_default(rdev);
 	if (eg_pi->ls_clock_gating)
@@ -3991,7 +3989,7 @@ static int ni_parse_power_table(struct radeon_device *rdev)
 	union pplib_clock_info *clock_info;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	struct ni_ps *ps;
 
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 5eae0a88dd3e..6e478a248628 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -3732,11 +3732,17 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 		goto free_ib;
 	}
-	r = radeon_fence_wait(ib.fence, false);
-	if (r) {
+	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
+		RADEON_USEC_IB_TEST_TIMEOUT));
+	if (r < 0) {
 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
 		goto free_ib;
+	} else if (r == 0) {
+		DRM_ERROR("radeon: fence wait timed out.\n");
+		r = -ETIMEDOUT;
+		goto free_ib;
 	}
+	r = 0;
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF) {
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index cc2fdf0be37a..f86ab695ee8f 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -235,8 +235,8 @@ int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 		fb_div |= 1;
 
 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
-        if (r)
-                return r;
+	if (r)
+		return r;
 
 	/* assert PLL_RESET */
 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
@@ -1490,7 +1490,7 @@ static int r600_mc_init(struct radeon_device *rdev)
 					rdev->fastfb_working = true;
 				}
 			}
-  		}
+		}
 	}
 
 	radeon_update_bandwidth_info(rdev);
@@ -3381,11 +3381,17 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 		goto free_ib;
 	}
-	r = radeon_fence_wait(ib.fence, false);
-	if (r) {
+	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
+		RADEON_USEC_IB_TEST_TIMEOUT));
+	if (r < 0) {
 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
 		goto free_ib;
+	} else if (r == 0) {
+		DRM_ERROR("radeon: fence wait timed out.\n");
+		r = -ETIMEDOUT;
+		goto free_ib;
 	}
+	r = 0;
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
@@ -4568,7 +4574,7 @@ uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev)
 	mutex_lock(&rdev->gpu_clock_mutex);
 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
-	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
 	mutex_unlock(&rdev->gpu_clock_mutex);
 	return clock;
 }
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 2f36fa1576e0..b69c8de35bd3 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -1671,8 +1671,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         (idx_value & 0xfffffff0) +
-		         ((u64)(tmp & 0xff) << 32);
+			 (idx_value & 0xfffffff0) +
+			 ((u64)(tmp & 0xff) << 32);
 
 		ib[idx + 0] = offset;
 		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
@@ -1712,8 +1712,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         idx_value +
-		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
+			 idx_value +
+			 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
 
 		ib[idx+0] = offset;
 		ib[idx+1] = upper_32_bits(offset) & 0xff;
@@ -1764,8 +1764,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 			}
 
 			offset = reloc->gpu_offset +
-			         (radeon_get_ib_value(p, idx+1) & 0xfffffff0) +
-			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+				 (radeon_get_ib_value(p, idx+1) & 0xfffffff0) +
+				 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffff0);
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
@@ -1876,8 +1876,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 				return -EINVAL;
 			}
 			offset = reloc->gpu_offset +
-			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
-			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+				 (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
+				 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 			ib[idx+1] = offset & 0xfffffff8;
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
@@ -1898,8 +1898,8 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 		}
 
 		offset = reloc->gpu_offset +
-		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
-		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
+			 (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
+			 ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
 
 		ib[idx+1] = offset & 0xfffffffc;
 		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index d2dd29ab24fa..fb65e6fb5c4f 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -368,11 +368,16 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 		return r;
 	}
-	r = radeon_fence_wait(ib.fence, false);
-	if (r) {
+	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
+		RADEON_USEC_IB_TEST_TIMEOUT));
+	if (r < 0) {
 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
 		return r;
+	} else if (r == 0) {
+		DRM_ERROR("radeon: fence wait timed out.\n");
+		return -ETIMEDOUT;
 	}
+	r = 0;
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
 		if (tmp == 0xDEADBEEF)
diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c
index fa2154493cf1..6a4b020dd0b4 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.c
+++ b/drivers/gpu/drm/radeon/r600_dpm.c
@@ -844,7 +844,7 @@ int r600_get_platform_caps(struct radeon_device *rdev)
 	struct radeon_mode_info *mode_info = &rdev->mode_info;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 
 	if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
@@ -874,7 +874,7 @@ int r600_parse_extended_power_table(struct radeon_device *rdev)
 	union fan_info *fan_info;
 	ATOM_PPLIB_Clock_Voltage_Dependency_Table *dep_table;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	int ret, i;
 
@@ -1070,7 +1070,7 @@ int r600_parse_extended_power_table(struct radeon_device *rdev)
 			ext_hdr->usVCETableOffset) {
 			VCEClockInfoArray *array = (VCEClockInfoArray *)
 				(mode_info->atom_context->bios + data_offset +
-                                 le16_to_cpu(ext_hdr->usVCETableOffset) + 1);
+				 le16_to_cpu(ext_hdr->usVCETableOffset) + 1);
 			ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table *limits =
 				(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table *)
 				(mode_info->atom_context->bios + data_offset +
diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c
index e85894ade95c..e82a99cb2459 100644
--- a/drivers/gpu/drm/radeon/r600_hdmi.c
+++ b/drivers/gpu/drm/radeon/r600_hdmi.c
@@ -215,7 +215,7 @@ void r600_hdmi_update_acr(struct drm_encoder *encoder, long offset,
  * build a HDMI Video Info Frame
  */
 void r600_set_avi_packet(struct radeon_device *rdev, u32 offset,
-    unsigned char *buffer, size_t size)
+			 unsigned char *buffer, size_t size)
 {
 	uint8_t *frame = buffer + 3;
 
@@ -312,7 +312,7 @@ void r600_hdmi_audio_workaround(struct drm_encoder *encoder)
 }
 
 void r600_hdmi_audio_set_dto(struct radeon_device *rdev,
-    struct radeon_crtc *crtc, unsigned int clock)
+			     struct radeon_crtc *crtc, unsigned int clock)
 {
 	struct radeon_encoder *radeon_encoder;
 	struct radeon_encoder_atom_dig *dig;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 78a51b3eda10..007be29a0020 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -120,6 +120,7 @@ extern int radeon_mst;
  */
 #define RADEON_MAX_USEC_TIMEOUT			100000	/* 100 ms */
 #define RADEON_FENCE_JIFFIES_TIMEOUT		(HZ / 2)
+#define RADEON_USEC_IB_TEST_TIMEOUT		1000000 /* 1s */
 /* RADEON_IB_POOL_SIZE must be a power of 2 */
 #define RADEON_IB_POOL_SIZE			16
 #define RADEON_DEBUGFS_MAX_COMPONENTS		32
@@ -382,6 +383,7 @@ void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring);
 int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
 void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
+long radeon_fence_wait_timeout(struct radeon_fence *fence, bool interruptible, long timeout);
 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
 int radeon_fence_wait_next(struct radeon_device *rdev, int ring);
 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring);
diff --git a/drivers/gpu/drm/radeon/radeon_acpi.h b/drivers/gpu/drm/radeon/radeon_acpi.h
index be4af76f213d..cd872f7953c6 100644
--- a/drivers/gpu/drm/radeon/radeon_acpi.h
+++ b/drivers/gpu/drm/radeon/radeon_acpi.h
@@ -291,6 +291,8 @@ int radeon_atif_handler(struct radeon_device *rdev,
 #       define ATPX_FIXED_NOT_SUPPORTED                            (1 << 9)
 #       define ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED               (1 << 10)
 #       define ATPX_DGPU_REQ_POWER_FOR_DISPLAYS                    (1 << 11)
+#       define ATPX_DGPU_CAN_DRIVE_DISPLAYS                        (1 << 12)
+#       define ATPX_MS_HYBRID_GFX_SUPPORTED                        (1 << 14)
 #define ATPX_FUNCTION_POWER_CONTROL                                0x2
 /* ARG0: ATPX_FUNCTION_POWER_CONTROL
  * ARG1:
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index de9a2ffcf5f7..f8097a0e7a79 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -2095,7 +2095,7 @@ static int radeon_atombios_parse_power_table_1_3(struct radeon_device *rdev)
 	struct radeon_i2c_bus_rec i2c_bus;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 
 	if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
@@ -2575,7 +2575,7 @@ static int radeon_atombios_parse_power_table_4_5(struct radeon_device *rdev)
 	bool valid;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 
 	if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
@@ -2666,7 +2666,7 @@ static int radeon_atombios_parse_power_table_6(struct radeon_device *rdev)
 	bool valid;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index c4b4f298a283..fd8c4d317e60 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -62,6 +62,10 @@ bool radeon_has_atpx(void) {
 	return radeon_atpx_priv.atpx_detected;
 }
 
+bool radeon_has_atpx_dgpu_power_cntl(void) {
+	return radeon_atpx_priv.atpx.functions.power_cntl;
+}
+
 /**
  * radeon_atpx_call - call an ATPX method
  *
@@ -141,10 +145,6 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas
  */
 static int radeon_atpx_validate(struct radeon_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	atpx->functions.power_cntl = true;
-
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
@@ -551,13 +551,14 @@ static bool radeon_atpx_detect(void)
 void radeon_register_atpx_handler(void)
 {
 	bool r;
+	enum vga_switcheroo_handler_flags_t handler_flags = 0;
 
 	/* detect if we have any ATPX + 2 VGA in the system */
 	r = radeon_atpx_detect();
 	if (!r)
 		return;
 
-	vga_switcheroo_register_handler(&radeon_atpx_handler);
+	vga_switcheroo_register_handler(&radeon_atpx_handler, handler_flags);
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 340f3f549f29..cfcc099c537d 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -34,6 +34,7 @@
 #include "atom.h"
 
 #include <linux/pm_runtime.h>
+#include <linux/vga_switcheroo.h>
 
 static int radeon_dp_handle_hpd(struct drm_connector *connector)
 {
@@ -344,6 +345,11 @@ static void radeon_connector_get_edid(struct drm_connector *connector)
 		else if (radeon_connector->ddc_bus)
 			radeon_connector->edid = drm_get_edid(&radeon_connector->base,
 							      &radeon_connector->ddc_bus->adapter);
+	} else if (vga_switcheroo_handler_flags() & VGA_SWITCHEROO_CAN_SWITCH_DDC &&
+		   connector->connector_type == DRM_MODE_CONNECTOR_LVDS &&
+		   radeon_connector->ddc_bus) {
+		radeon_connector->edid = drm_get_edid_switcheroo(&radeon_connector->base,
+								 &radeon_connector->ddc_bus->adapter);
 	} else if (radeon_connector->ddc_bus) {
 		radeon_connector->edid = drm_get_edid(&radeon_connector->base,
 						      &radeon_connector->ddc_bus->adapter);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 4197ca1bb1e4..4fd1a961012d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -103,6 +103,12 @@ static const char radeon_family_name[][16] = {
 	"LAST",
 };
 
+#if defined(CONFIG_VGA_SWITCHEROO)
+bool radeon_has_atpx_dgpu_power_cntl(void);
+#else
+static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; }
+#endif
+
 #define RADEON_PX_QUIRK_DISABLE_PX  (1 << 0)
 #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1)
 
@@ -1155,9 +1161,9 @@ static void radeon_check_arguments(struct radeon_device *rdev)
 		radeon_vm_size = 4;
 	}
 
-       /*
-        * Max GPUVM size for Cayman, SI and CI are 40 bits.
-        */
+	/*
+	 * Max GPUVM size for Cayman, SI and CI are 40 bits.
+	 */
 	if (radeon_vm_size > 1024) {
 		dev_warn(rdev->dev, "VM size (%d) too large, max is 1TB\n",
 			 radeon_vm_size);
@@ -1433,7 +1439,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	 * ignore it */
 	vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
 
-	if (rdev->flags & RADEON_IS_PX)
+	if ((rdev->flags & RADEON_IS_PX) && radeon_has_atpx_dgpu_power_cntl())
 		runtime = true;
 	vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, runtime);
 	if (runtime)
@@ -1895,7 +1901,7 @@ int radeon_debugfs_add_files(struct radeon_device *rdev,
 	if (i > RADEON_DEBUGFS_MAX_COMPONENTS) {
 		DRM_ERROR("Reached maximum number of debugfs components.\n");
 		DRM_ERROR("Report so we increase "
-		          "RADEON_DEBUGFS_MAX_COMPONENTS.\n");
+			  "RADEON_DEBUGFS_MAX_COMPONENTS.\n");
 		return -EINVAL;
 	}
 	rdev->debugfs[rdev->debugfs_count].files = files;
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 2d9196a447fd..fcc7483d3f7b 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -407,7 +407,7 @@ static void radeon_flip_work_func(struct work_struct *__work)
 	unsigned repcnt = 4;
 	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
-        down_read(&rdev->exclusive_lock);
+	down_read(&rdev->exclusive_lock);
 	if (work->fence) {
 		struct radeon_fence *fence;
 
@@ -919,7 +919,7 @@ static void avivo_reduce_ratio(unsigned *nom, unsigned *den,
 	*den /= tmp;
 
 	/* make sure nominator is large enough */
-        if (*nom < nom_min) {
+	if (*nom < nom_min) {
 		tmp = DIV_ROUND_UP(nom_min, *nom);
 		*nom *= tmp;
 		*den *= tmp;
@@ -959,7 +959,7 @@ static void avivo_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div,
 	*fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den);
 
 	/* limit fb divider to its maximum */
-        if (*fb_div > fb_div_max) {
+	if (*fb_div > fb_div_max) {
 		*ref_div = DIV_ROUND_CLOSEST(*ref_div * fb_div_max, *fb_div);
 		*fb_div = fb_div_max;
 	}
@@ -1683,10 +1683,8 @@ int radeon_modeset_init(struct radeon_device *rdev)
 	/* setup afmt */
 	radeon_afmt_init(rdev);
 
-	if (!list_empty(&rdev->ddev->mode_config.connector_list)) {
-		radeon_fbdev_init(rdev);
-		drm_kms_helper_poll_init(rdev->ddev);
-	}
+	radeon_fbdev_init(rdev);
+	drm_kms_helper_poll_init(rdev->ddev);
 
 	/* do pm late init */
 	ret = radeon_pm_late_init(rdev);
@@ -1699,6 +1697,9 @@ void radeon_modeset_fini(struct radeon_device *rdev)
 	radeon_fbdev_fini(rdev);
 	kfree(rdev->mode_info.bios_hardcoded_edid);
 
+	/* free i2c buses */
+	radeon_i2c_fini(rdev);
+
 	if (rdev->mode_info.mode_config_initialized) {
 		radeon_afmt_fini(rdev);
 		drm_kms_helper_poll_fini(rdev->ddev);
@@ -1706,8 +1707,6 @@ void radeon_modeset_fini(struct radeon_device *rdev)
 		drm_mode_config_cleanup(rdev->ddev);
 		rdev->mode_info.mode_config_initialized = false;
 	}
-	/* free i2c buses */
-	radeon_i2c_fini(rdev);
 }
 
 static bool is_hdtv_mode(const struct drm_display_mode *mode)
diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c
index df7a1719c841..43cffb526b0c 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_mst.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c
@@ -510,6 +510,7 @@ static bool radeon_mst_mode_fixup(struct drm_encoder *encoder,
 {
 	struct radeon_encoder_mst *mst_enc;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_connector_atom_dig *dig_connector;
 	int bpp = 24;
 
 	mst_enc = radeon_encoder->enc_priv;
@@ -523,22 +524,11 @@ static bool radeon_mst_mode_fixup(struct drm_encoder *encoder,
 
 
 	drm_mode_set_crtcinfo(adjusted_mode, 0);
-	{
-	  struct radeon_connector_atom_dig *dig_connector;
-	  int ret;
-
-	  dig_connector = mst_enc->connector->con_priv;
-	  ret = radeon_dp_get_dp_link_config(&mst_enc->connector->base,
-					     dig_connector->dpcd, adjusted_mode->clock,
-					     &dig_connector->dp_lane_count,
-					     &dig_connector->dp_clock);
-	  if (ret) {
-		  dig_connector->dp_lane_count = 0;
-		  dig_connector->dp_clock = 0;
-	  }
-	  DRM_DEBUG_KMS("dig clock %p %d %d\n", dig_connector,
-			dig_connector->dp_lane_count, dig_connector->dp_clock);
-	}
+	dig_connector = mst_enc->connector->con_priv;
+	dig_connector->dp_lane_count = drm_dp_max_lane_count(dig_connector->dpcd);
+	dig_connector->dp_clock = drm_dp_max_link_rate(dig_connector->dpcd);
+	DRM_DEBUG_KMS("dig clock %p %d %d\n", dig_connector,
+		      dig_connector->dp_lane_count, dig_connector->dp_clock);
 	return true;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index e266ffc520d2..ccd4ad4ee592 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -34,9 +34,11 @@
 #include "radeon_drv.h"
 
 #include <drm/drm_pciids.h>
+#include <linux/apple-gmux.h>
 #include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
+#include <linux/vgaarb.h>
 #include <linux/vga_switcheroo.h>
 #include <drm/drm_gem.h>
 
@@ -319,6 +321,23 @@ static int radeon_pci_probe(struct pci_dev *pdev,
 {
 	int ret;
 
+	/*
+	 * Initialize amdkfd before starting radeon. If it was not loaded yet,
+	 * defer radeon probing
+	 */
+	ret = radeon_kfd_init();
+	if (ret == -EPROBE_DEFER)
+		return ret;
+
+	/*
+	 * apple-gmux is needed on dual GPU MacBook Pro
+	 * to probe the panel if we're the inactive GPU.
+	 */
+	if (IS_ENABLED(CONFIG_VGA_ARB) && IS_ENABLED(CONFIG_VGA_SWITCHEROO) &&
+	    apple_gmux_present() && pdev != vga_default_device() &&
+	    !vga_switcheroo_handler_flags())
+		return -EPROBE_DEFER;
+
 	/* Get rid of things like offb */
 	ret = radeon_kick_out_firmware_fb(pdev);
 	if (ret)
@@ -570,8 +589,6 @@ static int __init radeon_init(void)
 		return -EINVAL;
 	}
 
-	radeon_kfd_init();
-
 	/* let modprobe override vga console setting */
 	return drm_pci_init(driver, pdriver);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index d2e628eea53d..0e3143acb565 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -38,9 +38,9 @@
 #include <linux/vga_switcheroo.h>
 
 /* object hierarchy -
-   this contains a helper + a radeon fb
-   the helper contains a pointer to radeon framebuffer baseclass.
-*/
+ * this contains a helper + a radeon fb
+ * the helper contains a pointer to radeon framebuffer baseclass.
+ */
 struct radeon_fbdev {
 	struct drm_fb_helper helper;
 	struct radeon_framebuffer rfb;
@@ -292,7 +292,8 @@ out_unref:
 
 void radeon_fb_output_poll_changed(struct radeon_device *rdev)
 {
-	drm_fb_helper_hotplug_event(&rdev->mode_info.rfbdev->helper);
+	if (rdev->mode_info.rfbdev)
+		drm_fb_helper_hotplug_event(&rdev->mode_info.rfbdev->helper);
 }
 
 static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfbdev)
@@ -325,6 +326,10 @@ int radeon_fbdev_init(struct radeon_device *rdev)
 	int bpp_sel = 32;
 	int ret;
 
+	/* don't enable fbdev if no connectors */
+	if (list_empty(&rdev->ddev->mode_config.connector_list))
+		return 0;
+
 	/* select 8 bpp console on RN50 or 16MB cards */
 	if (ASIC_IS_RN50(rdev) || rdev->mc.real_vram_size <= (32*1024*1024))
 		bpp_sel = 8;
@@ -377,11 +382,15 @@ void radeon_fbdev_fini(struct radeon_device *rdev)
 
 void radeon_fbdev_set_suspend(struct radeon_device *rdev, int state)
 {
-	fb_set_suspend(rdev->mode_info.rfbdev->helper.fbdev, state);
+	if (rdev->mode_info.rfbdev)
+		fb_set_suspend(rdev->mode_info.rfbdev->helper.fbdev, state);
 }
 
 bool radeon_fbdev_robj_is_fb(struct radeon_device *rdev, struct radeon_bo *robj)
 {
+	if (!rdev->mode_info.rfbdev)
+		return false;
+
 	if (robj == gem_to_radeon_bo(rdev->mode_info.rfbdev->rfb.obj))
 		return true;
 	return false;
@@ -389,12 +398,14 @@ bool radeon_fbdev_robj_is_fb(struct radeon_device *rdev, struct radeon_bo *robj)
 
 void radeon_fb_add_connector(struct radeon_device *rdev, struct drm_connector *connector)
 {
-	drm_fb_helper_add_one_connector(&rdev->mode_info.rfbdev->helper, connector);
+	if (rdev->mode_info.rfbdev)
+		drm_fb_helper_add_one_connector(&rdev->mode_info.rfbdev->helper, connector);
 }
 
 void radeon_fb_remove_connector(struct radeon_device *rdev, struct drm_connector *connector)
 {
-	drm_fb_helper_remove_one_connector(&rdev->mode_info.rfbdev->helper, connector);
+	if (rdev->mode_info.rfbdev)
+		drm_fb_helper_remove_one_connector(&rdev->mode_info.rfbdev->helper, connector);
 }
 
 void radeon_fbdev_restore_mode(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 05815c47b246..7ef075acde9c 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -527,7 +527,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
 }
 
 /**
- * radeon_fence_wait - wait for a fence to signal
+ * radeon_fence_wait_timeout - wait for a fence to signal with timeout
  *
  * @fence: radeon fence object
  * @intr: use interruptible sleep
@@ -535,12 +535,15 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
  * Wait for the requested fence to signal (all asics).
  * @intr selects whether to use interruptable (true) or non-interruptable
  * (false) sleep when waiting for the fence.
- * Returns 0 if the fence has passed, error for all other cases.
+ * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
+ * Returns remaining time if the sequence number has passed, 0 when
+ * the wait timeout, or an error for all other cases.
  */
-int radeon_fence_wait(struct radeon_fence *fence, bool intr)
+long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeout)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};
 	long r;
+	int r_sig;
 
 	/*
 	 * This function should not be called on !radeon fences.
@@ -552,15 +555,36 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 		return fence_wait(&fence->base, intr);
 
 	seq[fence->ring] = fence->seq;
-	r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
-	if (r < 0) {
+	r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout);
+	if (r <= 0) {
 		return r;
 	}
 
-	r = fence_signal(&fence->base);
-	if (!r)
+	r_sig = fence_signal(&fence->base);
+	if (!r_sig)
 		FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
-	return 0;
+	return r;
+}
+
+/**
+ * radeon_fence_wait - wait for a fence to signal
+ *
+ * @fence: radeon fence object
+ * @intr: use interruptible sleep
+ *
+ * Wait for the requested fence to signal (all asics).
+ * @intr selects whether to use interruptable (true) or non-interruptable
+ * (false) sleep when waiting for the fence.
+ * Returns 0 if the fence has passed, error for all other cases.
+ */
+int radeon_fence_wait(struct radeon_fence *fence, bool intr)
+{
+	long r = radeon_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT);
+	if (r > 0) {
+		return 0;
+	} else {
+		return r;
+	}
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
index c39ce1f05703..92ce0e533bc0 100644
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@@ -274,7 +274,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev)
 			if (i == RADEON_RING_TYPE_GFX_INDEX) {
 				/* oh, oh, that's really bad */
 				DRM_ERROR("radeon: failed testing IB on GFX ring (%d).\n", r);
-		                rdev->accel_working = false;
+				rdev->accel_working = false;
 				return r;
 
 			} else {
@@ -304,7 +304,7 @@ static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
 }
 
 static struct drm_info_list radeon_debugfs_sa_list[] = {
-        {"radeon_sa_info", &radeon_debugfs_sa_info, 0, NULL},
+	{"radeon_sa_info", &radeon_debugfs_sa_info, 0, NULL},
 };
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 9a4d69e59401..87a9ebb5f58f 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -132,35 +132,34 @@ static const struct kfd2kgd_calls kfd2kgd = {
 
 static const struct kgd2kfd_calls *kgd2kfd;
 
-bool radeon_kfd_init(void)
+int radeon_kfd_init(void)
 {
+	int ret;
+
 #if defined(CONFIG_HSA_AMD_MODULE)
-	bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
+	int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
 
 	kgd2kfd_init_p = symbol_request(kgd2kfd_init);
 
 	if (kgd2kfd_init_p == NULL)
-		return false;
+		return -ENOENT;
 
-	if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
+	ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
+	if (ret) {
 		symbol_put(kgd2kfd_init);
 		kgd2kfd = NULL;
-
-		return false;
 	}
 
-	return true;
 #elif defined(CONFIG_HSA_AMD)
-	if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
+	ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
+	if (ret)
 		kgd2kfd = NULL;
 
-		return false;
-	}
-
-	return true;
 #else
-	return false;
+	ret = -ENOENT;
 #endif
+
+	return ret;
 }
 
 void radeon_kfd_fini(void)
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.h b/drivers/gpu/drm/radeon/radeon_kfd.h
index 1103f9082f6b..9df1fea8e971 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.h
+++ b/drivers/gpu/drm/radeon/radeon_kfd.h
@@ -33,7 +33,7 @@
 
 struct radeon_device;
 
-bool radeon_kfd_init(void);
+int radeon_kfd_init(void);
 void radeon_kfd_fini(void);
 
 void radeon_kfd_suspend(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 32b338ff436b..24152dfef199 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -331,13 +331,13 @@ static void radeon_crtc_dpms(struct drm_crtc *crtc, int mode)
 									 RADEON_CRTC_DISP_REQ_EN_B));
 			WREG32_P(RADEON_CRTC_EXT_CNTL, crtc_ext_cntl, ~(mask | crtc_ext_cntl));
 		}
-		drm_vblank_post_modeset(dev, radeon_crtc->crtc_id);
+		drm_vblank_on(dev, radeon_crtc->crtc_id);
 		radeon_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_pre_modeset(dev, radeon_crtc->crtc_id);
+		drm_vblank_off(dev, radeon_crtc->crtc_id);
 		if (radeon_crtc->crtc_id)
 			WREG32_P(RADEON_CRTC2_GEN_CNTL, mask, ~(RADEON_CRTC2_EN | mask));
 		else {
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index 88dc973fb209..868c3ba2efaa 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -818,52 +818,52 @@ static void radeon_legacy_tmds_int_mode_set(struct drm_encoder *encoder,
 	tmds_transmitter_cntl = RREG32(RADEON_TMDS_TRANSMITTER_CNTL) &
 		~(RADEON_TMDS_TRANSMITTER_PLLRST);
 
-    if (rdev->family == CHIP_R200 ||
-	rdev->family == CHIP_R100 ||
-	ASIC_IS_R300(rdev))
-	    tmds_transmitter_cntl &= ~(RADEON_TMDS_TRANSMITTER_PLLEN);
-    else /* RV chips got this bit reversed */
-	    tmds_transmitter_cntl |= RADEON_TMDS_TRANSMITTER_PLLEN;
-
-    fp_gen_cntl = (RREG32(RADEON_FP_GEN_CNTL) |
-		   (RADEON_FP_CRTC_DONT_SHADOW_VPAR |
-		    RADEON_FP_CRTC_DONT_SHADOW_HEND));
-
-    fp_gen_cntl &= ~(RADEON_FP_FPON | RADEON_FP_TMDS_EN);
-
-    fp_gen_cntl &= ~(RADEON_FP_RMX_HVSYNC_CONTROL_EN |
-		     RADEON_FP_DFP_SYNC_SEL |
-		     RADEON_FP_CRT_SYNC_SEL |
-		     RADEON_FP_CRTC_LOCK_8DOT |
-		     RADEON_FP_USE_SHADOW_EN |
-		     RADEON_FP_CRTC_USE_SHADOW_VEND |
-		     RADEON_FP_CRT_SYNC_ALT);
-
-    if (1) /*  FIXME rgbBits == 8 */
-	    fp_gen_cntl |= RADEON_FP_PANEL_FORMAT;  /* 24 bit format */
-    else
-	    fp_gen_cntl &= ~RADEON_FP_PANEL_FORMAT;/* 18 bit format */
-
-    if (radeon_crtc->crtc_id == 0) {
-	    if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
-		    fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
-		    if (radeon_encoder->rmx_type != RMX_OFF)
-			    fp_gen_cntl |= R200_FP_SOURCE_SEL_RMX;
-		    else
-			    fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC1;
-	    } else
-		    fp_gen_cntl &= ~RADEON_FP_SEL_CRTC2;
-    } else {
-	    if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
-		    fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
-		    fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC2;
-	    } else
-		    fp_gen_cntl |= RADEON_FP_SEL_CRTC2;
-    }
-
-    WREG32(RADEON_TMDS_PLL_CNTL, tmds_pll_cntl);
-    WREG32(RADEON_TMDS_TRANSMITTER_CNTL, tmds_transmitter_cntl);
-    WREG32(RADEON_FP_GEN_CNTL, fp_gen_cntl);
+	if (rdev->family == CHIP_R200 ||
+	    rdev->family == CHIP_R100 ||
+	    ASIC_IS_R300(rdev))
+		tmds_transmitter_cntl &= ~(RADEON_TMDS_TRANSMITTER_PLLEN);
+	else /* RV chips got this bit reversed */
+		tmds_transmitter_cntl |= RADEON_TMDS_TRANSMITTER_PLLEN;
+
+	fp_gen_cntl = (RREG32(RADEON_FP_GEN_CNTL) |
+		      (RADEON_FP_CRTC_DONT_SHADOW_VPAR |
+		       RADEON_FP_CRTC_DONT_SHADOW_HEND));
+
+	fp_gen_cntl &= ~(RADEON_FP_FPON | RADEON_FP_TMDS_EN);
+
+	fp_gen_cntl &= ~(RADEON_FP_RMX_HVSYNC_CONTROL_EN |
+			 RADEON_FP_DFP_SYNC_SEL |
+			 RADEON_FP_CRT_SYNC_SEL |
+			 RADEON_FP_CRTC_LOCK_8DOT |
+			 RADEON_FP_USE_SHADOW_EN |
+			 RADEON_FP_CRTC_USE_SHADOW_VEND |
+			 RADEON_FP_CRT_SYNC_ALT);
+
+	if (1) /*  FIXME rgbBits == 8 */
+		fp_gen_cntl |= RADEON_FP_PANEL_FORMAT;  /* 24 bit format */
+	else
+		fp_gen_cntl &= ~RADEON_FP_PANEL_FORMAT;/* 18 bit format */
+
+	if (radeon_crtc->crtc_id == 0) {
+		if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
+			fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
+			if (radeon_encoder->rmx_type != RMX_OFF)
+				fp_gen_cntl |= R200_FP_SOURCE_SEL_RMX;
+			else
+				fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC1;
+		} else
+			fp_gen_cntl &= ~RADEON_FP_SEL_CRTC2;
+	} else {
+		if (ASIC_IS_R300(rdev) || rdev->family == CHIP_R200) {
+			fp_gen_cntl &= ~R200_FP_SOURCE_SEL_MASK;
+			fp_gen_cntl |= R200_FP_SOURCE_SEL_CRTC2;
+		} else
+			fp_gen_cntl |= RADEON_FP_SEL_CRTC2;
+	}
+
+	WREG32(RADEON_TMDS_PLL_CNTL, tmds_pll_cntl);
+	WREG32(RADEON_TMDS_TRANSMITTER_CNTL, tmds_transmitter_cntl);
+	WREG32(RADEON_FP_GEN_CNTL, fp_gen_cntl);
 
 	if (rdev->is_atom_bios)
 		radeon_atombios_encoder_crtc_scratch_regs(encoder, radeon_crtc->crtc_id);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index fb6ad143873f..dd46c38676db 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -214,8 +214,8 @@ int radeon_bo_create(struct radeon_device *rdev,
 	INIT_LIST_HEAD(&bo->list);
 	INIT_LIST_HEAD(&bo->va);
 	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
-	                               RADEON_GEM_DOMAIN_GTT |
-	                               RADEON_GEM_DOMAIN_CPU);
+				       RADEON_GEM_DOMAIN_GTT |
+				       RADEON_GEM_DOMAIN_CPU);
 
 	bo->flags = flags;
 	/* PCI GART is always snooped */
@@ -848,7 +848,7 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
  *
  */
 void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
-                     bool shared)
+		     bool shared)
 {
 	struct reservation_object *resv = bo->tbo.resv;
 
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 7a98823bacd1..38226d925a5b 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -79,7 +79,7 @@ void radeon_pm_acpi_event_handler(struct radeon_device *rdev)
 				radeon_dpm_enable_bapm(rdev, rdev->pm.dpm.ac_power);
 		}
 		mutex_unlock(&rdev->pm.mutex);
-        } else if (rdev->pm.pm_method == PM_METHOD_PROFILE) {
+	} else if (rdev->pm.pm_method == PM_METHOD_PROFILE) {
 		if (rdev->pm.profile == PM_PROFILE_AUTO) {
 			mutex_lock(&rdev->pm.mutex);
 			radeon_pm_update_profile(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index e6ad54cdfa62..b0eb28e8fb73 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -56,7 +56,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
 }
 
 bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ridx,
-			          struct radeon_semaphore *semaphore)
+				  struct radeon_semaphore *semaphore)
 {
 	struct radeon_ring *ring = &rdev->ring[ridx];
 
@@ -73,7 +73,7 @@ bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ridx,
 }
 
 bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx,
-			        struct radeon_semaphore *semaphore)
+				struct radeon_semaphore *semaphore)
 {
 	struct radeon_ring *ring = &rdev->ring[ridx];
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index e06ac546a90f..6d8c32377c6f 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -554,8 +554,7 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
 		struct page **pages = ttm->pages + pinned;
 
-		r = get_user_pages(current, current->mm, userptr, num_pages,
-				   write, 0, pages, NULL);
+		r = get_user_pages(userptr, num_pages, write, 0, pages, NULL);
 		if (r < 0)
 			goto release_pages;
 
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 6edcb5485092..6fe9e4e76284 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -722,9 +722,11 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev,
 	return r;
 }
 
-/* multiple fence commands without any stream commands in between can
-   crash the vcpu so just try to emmit a dummy create/destroy msg to
-   avoid this */
+/*
+ * multiple fence commands without any stream commands in between can
+ * crash the vcpu so just try to emmit a dummy create/destroy msg to
+ * avoid this
+ */
 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
 			      uint32_t handle, struct radeon_fence **fence)
 {
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 7eb1ae758906..c1c619facb47 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -166,7 +166,7 @@ int radeon_vce_init(struct radeon_device *rdev)
 	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
 		atomic_set(&rdev->vce.handles[i], 0);
 		rdev->vce.filp[i] = NULL;
-        }
+	}
 
 	return 0;
 }
@@ -389,7 +389,7 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
 
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
-	        DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 	}
 
 	if (fence)
@@ -446,7 +446,7 @@ int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
 
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
-	        DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 	}
 
 	if (fence)
@@ -769,18 +769,18 @@ int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	radeon_ring_unlock_commit(rdev, ring, false);
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
-	        if (vce_v1_0_get_rptr(rdev, ring) != rptr)
-	                break;
-	        DRM_UDELAY(1);
+		if (vce_v1_0_get_rptr(rdev, ring) != rptr)
+			break;
+		DRM_UDELAY(1);
 	}
 
 	if (i < rdev->usec_timeout) {
-	        DRM_INFO("ring test on %d succeeded in %d usecs\n",
-	                 ring->idx, i);
+		DRM_INFO("ring test on %d succeeded in %d usecs\n",
+			 ring->idx, i);
 	} else {
-	        DRM_ERROR("radeon: ring %d test failed\n",
-	                  ring->idx);
-	        r = -ETIMEDOUT;
+		DRM_ERROR("radeon: ring %d test failed\n",
+			 ring->idx);
+		r = -ETIMEDOUT;
 	}
 
 	return r;
@@ -810,11 +810,16 @@ int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		goto error;
 	}
 
-	r = radeon_fence_wait(fence, false);
-	if (r) {
+	r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies(
+		RADEON_USEC_IB_TEST_TIMEOUT));
+	if (r < 0) {
 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+	} else if (r == 0) {
+		DRM_ERROR("radeon: fence wait timed out.\n");
+		r = -ETIMEDOUT;
 	} else {
-	        DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	}
 error:
 	radeon_fence_unref(&fence);
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 3979632b9225..a1358748cea5 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -611,15 +611,16 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
  */
 static uint32_t radeon_vm_page_flags(uint32_t flags)
 {
-        uint32_t hw_flags = 0;
-        hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
-        hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
-        hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
-        if (flags & RADEON_VM_PAGE_SYSTEM) {
-                hw_flags |= R600_PTE_SYSTEM;
-                hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
-        }
-        return hw_flags;
+	uint32_t hw_flags = 0;
+
+	hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
+	hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
+	hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
+	if (flags & RADEON_VM_PAGE_SYSTEM) {
+		hw_flags |= R600_PTE_SYSTEM;
+		hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
+	}
+	return hw_flags;
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/rs780_dpm.c b/drivers/gpu/drm/radeon/rs780_dpm.c
index cb0afe78abed..94b48fc1e266 100644
--- a/drivers/gpu/drm/radeon/rs780_dpm.c
+++ b/drivers/gpu/drm/radeon/rs780_dpm.c
@@ -795,7 +795,7 @@ static int rs780_parse_power_table(struct radeon_device *rdev)
 	union pplib_clock_info *clock_info;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	struct igp_ps *ps;
 
diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c
index 97e5a6f1ce58..25e29303b119 100644
--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c
+++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c
@@ -209,7 +209,7 @@ static struct rv6xx_sclk_stepping rv6xx_next_vco_step(struct radeon_device *rdev
 
 static bool rv6xx_can_step_post_div(struct radeon_device *rdev,
 				    struct rv6xx_sclk_stepping *cur,
-                                    struct rv6xx_sclk_stepping *target)
+				    struct rv6xx_sclk_stepping *target)
 {
 	return (cur->post_divider > target->post_divider) &&
 		((cur->vco_frequency * target->post_divider) <=
@@ -239,7 +239,7 @@ static bool rv6xx_reached_stepping_target(struct radeon_device *rdev,
 
 static void rv6xx_generate_steps(struct radeon_device *rdev,
 				 u32 low, u32 high,
-                                 u32 start_index, u8 *end_index)
+				 u32 start_index, u8 *end_index)
 {
 	struct rv6xx_sclk_stepping cur;
 	struct rv6xx_sclk_stepping target;
@@ -1356,23 +1356,23 @@ static void rv6xx_set_dpm_event_sources(struct radeon_device *rdev, u32 sources)
 	enum radeon_dpm_event_src dpm_event_src;
 
 	switch (sources) {
-        case 0:
-        default:
+	case 0:
+	default:
 		want_thermal_protection = false;
 		break;
-        case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL):
+	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGITAL;
 		break;
 
-        case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL):
+	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_EXTERNAL;
 		break;
 
-        case ((1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL) |
+	case ((1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL) |
 	      (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL)):
-		want_thermal_protection = true;
+			want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL;
 		break;
 	}
@@ -1879,7 +1879,7 @@ static int rv6xx_parse_power_table(struct radeon_device *rdev)
 	union pplib_clock_info *clock_info;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	struct rv6xx_ps *ps;
 
diff --git a/drivers/gpu/drm/radeon/rv740_dpm.c b/drivers/gpu/drm/radeon/rv740_dpm.c
index c4c8da501da8..4b850824fe06 100644
--- a/drivers/gpu/drm/radeon/rv740_dpm.c
+++ b/drivers/gpu/drm/radeon/rv740_dpm.c
@@ -36,28 +36,28 @@ u32 rv740_get_decoded_reference_divider(u32 encoded_ref)
 	u32 ref = 0;
 
 	switch (encoded_ref) {
-        case 0:
+	case 0:
 		ref = 1;
 		break;
-        case 16:
+	case 16:
 		ref = 2;
 		break;
-        case 17:
+	case 17:
 		ref = 3;
 		break;
-        case 18:
+	case 18:
 		ref = 2;
 		break;
-        case 19:
+	case 19:
 		ref = 3;
 		break;
-        case 20:
+	case 20:
 		ref = 4;
 		break;
-        case 21:
+	case 21:
 		ref = 5;
 		break;
-        default:
+	default:
 		DRM_ERROR("Invalid encoded Reference Divider\n");
 		ref = 0;
 		break;
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c
index e830c8935db0..a010decf59af 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@@ -345,27 +345,27 @@ static int rv770_encode_yclk_post_div(u32 postdiv, u32 *encoded_postdiv)
 	int ret = 0;
 
 	switch (postdiv) {
-        case 1:
+	case 1:
 		*encoded_postdiv = 0;
 		break;
-        case 2:
+	case 2:
 		*encoded_postdiv = 1;
 		break;
-        case 4:
+	case 4:
 		*encoded_postdiv = 2;
 		break;
-        case 8:
+	case 8:
 		*encoded_postdiv = 3;
 		break;
-        case 16:
+	case 16:
 		*encoded_postdiv = 4;
 		break;
-        default:
+	default:
 		ret = -EINVAL;
 		break;
 	}
 
-    return ret;
+	return ret;
 }
 
 u32 rv770_map_clkf_to_ibias(struct radeon_device *rdev, u32 clkf)
@@ -1175,15 +1175,15 @@ static int rv770_init_smc_table(struct radeon_device *rdev,
 	rv770_populate_smc_mvdd_table(rdev, table);
 
 	switch (rdev->pm.int_thermal_type) {
-        case THERMAL_TYPE_RV770:
-        case THERMAL_TYPE_ADT7473_WITH_INTERNAL:
+	case THERMAL_TYPE_RV770:
+	case THERMAL_TYPE_ADT7473_WITH_INTERNAL:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_INTERNAL;
 		break;
-        case THERMAL_TYPE_NONE:
+	case THERMAL_TYPE_NONE:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_NONE;
 		break;
-        case THERMAL_TYPE_EXTERNAL_GPIO:
-        default:
+	case THERMAL_TYPE_EXTERNAL_GPIO:
+	default:
 		table->thermalProtectType = PPSMC_THERMAL_PROTECT_TYPE_EXTERNAL;
 		break;
 	}
@@ -1567,18 +1567,18 @@ void rv770_reset_smio_status(struct radeon_device *rdev)
 	sw_smio_index =
 		(RREG32(GENERAL_PWRMGT) & SW_SMIO_INDEX_MASK) >> SW_SMIO_INDEX_SHIFT;
 	switch (sw_smio_index) {
-        case 3:
+	case 3:
 		vid_smio_cntl = RREG32(S3_VID_LOWER_SMIO_CNTL);
 		break;
-        case 2:
+	case 2:
 		vid_smio_cntl = RREG32(S2_VID_LOWER_SMIO_CNTL);
 		break;
-        case 1:
+	case 1:
 		vid_smio_cntl = RREG32(S1_VID_LOWER_SMIO_CNTL);
 		break;
-        case 0:
+	case 0:
 		return;
-        default:
+	default:
 		vid_smio_cntl = pi->s0_vid_lower_smio_cntl;
 		break;
 	}
@@ -1817,21 +1817,21 @@ static void rv770_set_dpm_event_sources(struct radeon_device *rdev, u32 sources)
 	enum radeon_dpm_event_src dpm_event_src;
 
 	switch (sources) {
-        case 0:
-        default:
+	case 0:
+	default:
 		want_thermal_protection = false;
 		break;
-        case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL):
+	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGITAL;
 		break;
 
-        case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL):
+	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_EXTERNAL;
 		break;
 
-        case ((1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL) |
+	case ((1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL) |
 	      (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL)):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL;
@@ -2273,7 +2273,7 @@ int rv7xx_parse_power_table(struct radeon_device *rdev)
 	union pplib_clock_info *clock_info;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	struct rv7xx_ps *ps;
 
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index f878d6962da5..ae21550fe767 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1307,7 +1307,7 @@ int si_get_allowed_info_register(struct radeon_device *rdev,
  */
 u32 si_get_xclk(struct radeon_device *rdev)
 {
-        u32 reference_clock = rdev->clock.spll.reference_freq;
+	u32 reference_clock = rdev->clock.spll.reference_freq;
 	u32 tmp;
 
 	tmp = RREG32(CG_CLKPIN_CNTL_2);
@@ -2442,8 +2442,10 @@ void dce6_bandwidth_update(struct radeon_device *rdev)
  */
 static void si_tiling_mode_table_init(struct radeon_device *rdev)
 {
-	const u32 num_tile_mode_states = 32;
-	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
+	u32 *tile = rdev->config.si.tile_mode_array;
+	const u32 num_tile_mode_states =
+			ARRAY_SIZE(rdev->config.si.tile_mode_array);
+	u32 reg_offset, split_equal_to_row_size;
 
 	switch (rdev->config.si.mem_row_size_in_kb) {
 	case 1:
@@ -2458,491 +2460,442 @@ static void si_tiling_mode_table_init(struct radeon_device *rdev)
 		break;
 	}
 
-	if ((rdev->family == CHIP_TAHITI) ||
-	    (rdev->family == CHIP_PITCAIRN)) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:  /* non-AA compressed depth or any compressed stencil */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 1:  /* 2xAA/4xAA compressed depth only */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 2:  /* 8xAA compressed depth only */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 8:  /* 1D and 1D Array Surfaces */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 9:  /* Displayable maps. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 10:  /* Display 8bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 11:  /* Display 16bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 12:  /* Display 32bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 13:  /* Thin. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 14:  /* Thin 8 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 15:  /* Thin 16 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 16:  /* Thin 32 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 17:  /* Thin 64 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			case 21:  /* 8 bpp PRT. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 22:  /* 16 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 23:  /* 32 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 24:  /* 64 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 25:  /* 128 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 NUM_BANKS(ADDR_SURF_8_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-	} else if ((rdev->family == CHIP_VERDE) ||
-		   (rdev->family == CHIP_OLAND) ||
-		   (rdev->family == CHIP_HAINAN)) {
-		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
-			switch (reg_offset) {
-			case 0:  /* non-AA compressed depth or any compressed stencil */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 1:  /* 2xAA/4xAA compressed depth only */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 2:  /* 8xAA compressed depth only */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 8:  /* 1D and 1D Array Surfaces */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 9:  /* Displayable maps. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 10:  /* Display 8bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 11:  /* Display 16bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 12:  /* Display 32bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 13:  /* Thin. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 14:  /* Thin 8 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 15:  /* Thin 16 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 16:  /* Thin 32 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 17:  /* Thin 64 bpp. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
-						 TILE_SPLIT(split_equal_to_row_size) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 21:  /* 8 bpp PRT. */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 22:  /* 16 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
-				break;
-			case 23:  /* 32 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 24:  /* 64 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
-						 NUM_BANKS(ADDR_SURF_16_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
-				break;
-			case 25:  /* 128 bpp PRT */
-				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
-						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
-						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
-						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
-						 NUM_BANKS(ADDR_SURF_8_BANK) |
-						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
-						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
-						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
-				break;
-			default:
-				gb_tile_moden = 0;
-				break;
-			}
-			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
-			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
-		}
-	} else
+	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+		tile[reg_offset] = 0;
+
+	switch(rdev->family) {
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+		/* non-AA compressed depth or any compressed stencil */
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 2xAA/4xAA compressed depth only */
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 8xAA compressed depth only */
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
+		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
+		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
+		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
+		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 1D and 1D Array Surfaces */
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Displayable maps. */
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Display 8bpp. */
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Display 16bpp. */
+		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Display 32bpp. */
+		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* Thin. */
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin 8 bpp. */
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* Thin 16 bpp. */
+		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* Thin 32 bpp. */
+		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* Thin 64 bpp. */
+		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+		/* 8 bpp PRT. */
+		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 16 bpp PRT */
+		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 32 bpp PRT */
+		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 64 bpp PRT */
+		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 128 bpp PRT */
+		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+			   NUM_BANKS(ADDR_SURF_8_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		break;
+
+	case CHIP_VERDE:
+	case CHIP_OLAND:
+	case CHIP_HAINAN:
+		/* non-AA compressed depth or any compressed stencil */
+		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 2xAA/4xAA compressed depth only */
+		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 8xAA compressed depth only */
+		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
+		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
+		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
+		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
+		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
+		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 1D and 1D Array Surfaces */
+		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Displayable maps. */
+		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Display 8bpp. */
+		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* Display 16bpp. */
+		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Display 32bpp. */
+		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin. */
+		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin 8 bpp. */
+		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin 16 bpp. */
+		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin 32 bpp. */
+		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* Thin 64 bpp. */
+		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+			   TILE_SPLIT(split_equal_to_row_size) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 8 bpp PRT. */
+		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 16 bpp PRT */
+		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
+		/* 32 bpp PRT */
+		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 64 bpp PRT */
+		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+			   NUM_BANKS(ADDR_SURF_16_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
+		/* 128 bpp PRT */
+		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+			   NUM_BANKS(ADDR_SURF_8_BANK) |
+			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
+
+		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
+		break;
+
+	default:
 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
+	}
 }
 
 static void si_select_se_sh(struct radeon_device *rdev,
@@ -7314,7 +7267,7 @@ uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
 	mutex_lock(&rdev->gpu_clock_mutex);
 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
-	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
 	mutex_unlock(&rdev->gpu_clock_mutex);
 	return clock;
 }
@@ -7775,33 +7728,33 @@ static void si_program_aspm(struct radeon_device *rdev)
 
 int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
 {
-        unsigned i;
+	unsigned i;
 
-        /* make sure VCEPLL_CTLREQ is deasserted */
-        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+	/* make sure VCEPLL_CTLREQ is deasserted */
+	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
 
-        mdelay(10);
+	mdelay(10);
 
-        /* assert UPLL_CTLREQ */
-        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
+	/* assert UPLL_CTLREQ */
+	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
 
-        /* wait for CTLACK and CTLACK2 to get asserted */
-        for (i = 0; i < 100; ++i) {
-                uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
-                if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
-                        break;
-                mdelay(10);
-        }
+	/* wait for CTLACK and CTLACK2 to get asserted */
+	for (i = 0; i < 100; ++i) {
+		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
+		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
+			break;
+		mdelay(10);
+	}
 
-        /* deassert UPLL_CTLREQ */
-        WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+	/* deassert UPLL_CTLREQ */
+	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
 
-        if (i == 100) {
-                DRM_ERROR("Timeout setting UVD clocks!\n");
-                return -ETIMEDOUT;
-        }
+	if (i == 100) {
+		DRM_ERROR("Timeout setting UVD clocks!\n");
+		return -ETIMEDOUT;
+	}
 
-        return 0;
+	return 0;
 }
 
 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index a82b891ae1fe..cb75ab72098a 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -499,7 +499,7 @@ static const struct si_cac_config_reg lcac_pitcairn[] =
 
 static const struct si_cac_config_reg cac_override_pitcairn[] =
 {
-    { 0xFFFFFFFF }
+	{ 0xFFFFFFFF }
 };
 
 static const struct si_powertune_data powertune_data_pitcairn =
@@ -991,7 +991,7 @@ static const struct si_cac_config_reg lcac_cape_verde[] =
 
 static const struct si_cac_config_reg cac_override_cape_verde[] =
 {
-    { 0xFFFFFFFF }
+	{ 0xFFFFFFFF }
 };
 
 static const struct si_powertune_data powertune_data_cape_verde =
@@ -1762,9 +1762,9 @@ static void si_fan_ctrl_set_default_mode(struct radeon_device *rdev);
 
 static struct si_power_info *si_get_pi(struct radeon_device *rdev)
 {
-        struct si_power_info *pi = rdev->pm.dpm.priv;
+	struct si_power_info *pi = rdev->pm.dpm.priv;
 
-        return pi;
+	return pi;
 }
 
 static void si_calculate_leakage_for_v_and_t_formula(const struct ni_leakage_coeffients *coeff,
@@ -3150,9 +3150,9 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 		}
 	}
 
-        for (i = 0; i < ps->performance_level_count; i++)
-                btc_adjust_clock_combinations(rdev, max_limits,
-                                              &ps->performance_levels[i]);
+	for (i = 0; i < ps->performance_level_count; i++)
+		btc_adjust_clock_combinations(rdev, max_limits,
+					      &ps->performance_levels[i]);
 
 	for (i = 0; i < ps->performance_level_count; i++) {
 		if (ps->performance_levels[i].vddc < min_vce_voltage)
@@ -3291,7 +3291,7 @@ static void si_set_dpm_event_sources(struct radeon_device *rdev, u32 sources)
 	case 0:
 	default:
 		want_thermal_protection = false;
-                break;
+		break;
 	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL):
 		want_thermal_protection = true;
 		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGITAL;
@@ -3493,7 +3493,7 @@ static int si_process_firmware_header(struct radeon_device *rdev)
 	if (ret)
 		return ret;
 
-        si_pi->state_table_start = tmp;
+	si_pi->state_table_start = tmp;
 
 	ret = si_read_smc_sram_dword(rdev,
 				     SISLANDS_SMC_FIRMWARE_HEADER_LOCATION +
@@ -3652,7 +3652,7 @@ static void si_program_response_times(struct radeon_device *rdev)
 	si_write_smc_soft_register(rdev, SI_SMC_SOFT_REGISTER_mvdd_chg_time, 1);
 
 	voltage_response_time = (u32)rdev->pm.dpm.voltage_response_time;
-        backbias_response_time = (u32)rdev->pm.dpm.backbias_response_time;
+	backbias_response_time = (u32)rdev->pm.dpm.backbias_response_time;
 
 	if (voltage_response_time == 0)
 		voltage_response_time = 1000;
@@ -3760,7 +3760,7 @@ static void si_setup_bsp(struct radeon_device *rdev)
 			       &pi->pbsu);
 
 
-        pi->dsp = BSP(pi->bsp) | BSU(pi->bsu);
+	pi->dsp = BSP(pi->bsp) | BSU(pi->bsu);
 	pi->psp = BSP(pi->pbsp) | BSU(pi->pbsu);
 
 	WREG32(CG_BSP, pi->dsp);
@@ -4308,7 +4308,7 @@ static int si_populate_memory_timing_parameters(struct radeon_device *rdev,
 
 	radeon_atom_set_engine_dram_timings(rdev,
 					    pl->sclk,
-                                            pl->mclk);
+					    pl->mclk);
 
 	dram_timing  = RREG32(MC_ARB_DRAM_TIMING);
 	dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2);
@@ -4343,7 +4343,7 @@ static int si_do_program_memory_timing_parameters(struct radeon_device *rdev,
 					   si_pi->sram_end);
 		if (ret)
 			break;
-        }
+	}
 
 	return ret;
 }
@@ -4821,9 +4821,9 @@ static int si_calculate_sclk_params(struct radeon_device *rdev,
 	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
 	spll_func_cntl_2 |= SCLK_MUX_SEL(2);
 
-        spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK;
-        spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv);
-        spll_func_cntl_3 |= SPLL_DITHEN;
+	spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK;
+	spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv);
+	spll_func_cntl_3 |= SPLL_DITHEN;
 
 	if (pi->sclk_ss) {
 		struct radeon_atom_ss ss;
@@ -4930,15 +4930,15 @@ static int si_populate_mclk_value(struct radeon_device *rdev,
 		tmp = freq_nom / reference_clock;
 		tmp = tmp * tmp;
 		if (radeon_atombios_get_asic_ss_info(rdev, &ss,
-                                                     ASIC_INTERNAL_MEMORY_SS, freq_nom)) {
+						     ASIC_INTERNAL_MEMORY_SS, freq_nom)) {
 			u32 clks = reference_clock * 5 / ss.rate;
 			u32 clkv = (u32)((((131 * ss.percentage * ss.rate) / 100) * tmp) / freq_nom);
 
-                        mpll_ss1 &= ~CLKV_MASK;
-                        mpll_ss1 |= CLKV(clkv);
+			mpll_ss1 &= ~CLKV_MASK;
+			mpll_ss1 |= CLKV(clkv);
 
-                        mpll_ss2 &= ~CLKS_MASK;
-                        mpll_ss2 |= CLKS(clks);
+			mpll_ss2 &= ~CLKS_MASK;
+			mpll_ss2 |= CLKS(clks);
 		}
 	}
 
@@ -5265,7 +5265,7 @@ static int si_convert_power_state_to_smc(struct radeon_device *rdev,
 		ni_pi->enable_power_containment = false;
 
 	ret = si_populate_sq_ramping_values(rdev, radeon_state, smc_state);
-        if (ret)
+	if (ret)
 		ni_pi->enable_sq_ramping = false;
 
 	return si_populate_smc_t(rdev, radeon_state, smc_state);
@@ -5436,46 +5436,46 @@ static bool si_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg)
 	case  MC_SEQ_RAS_TIMING >> 2:
 		*out_reg = MC_SEQ_RAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_CAS_TIMING >> 2:
+	case MC_SEQ_CAS_TIMING >> 2:
 		*out_reg = MC_SEQ_CAS_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING >> 2:
+	case MC_SEQ_MISC_TIMING >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING_LP >> 2;
 		break;
-        case MC_SEQ_MISC_TIMING2 >> 2:
+	case MC_SEQ_MISC_TIMING2 >> 2:
 		*out_reg = MC_SEQ_MISC_TIMING2_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D0 >> 2:
+	case MC_SEQ_RD_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_RD_CTL_D1 >> 2:
+	case MC_SEQ_RD_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_RD_CTL_D1_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D0 >> 2:
+	case MC_SEQ_WR_CTL_D0 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D0_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_D1 >> 2:
+	case MC_SEQ_WR_CTL_D1 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_D1_LP >> 2;
 		break;
-        case MC_PMG_CMD_EMRS >> 2:
+	case MC_PMG_CMD_EMRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_EMRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS >> 2:
+	case MC_PMG_CMD_MRS >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS1 >> 2:
+	case MC_PMG_CMD_MRS1 >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS1_LP >> 2;
 		break;
-        case MC_SEQ_PMG_TIMING >> 2:
+	case MC_SEQ_PMG_TIMING >> 2:
 		*out_reg = MC_SEQ_PMG_TIMING_LP >> 2;
 		break;
-        case MC_PMG_CMD_MRS2 >> 2:
+	case MC_PMG_CMD_MRS2 >> 2:
 		*out_reg = MC_SEQ_PMG_CMD_MRS2_LP >> 2;
 		break;
-        case MC_SEQ_WR_CTL_2 >> 2:
+	case MC_SEQ_WR_CTL_2 >> 2:
 		*out_reg = MC_SEQ_WR_CTL_2_LP >> 2;
 		break;
-        default:
+	default:
 		result = false;
 		break;
 	}
@@ -5562,19 +5562,19 @@ static int si_initialize_mc_reg_table(struct radeon_device *rdev)
 	WREG32(MC_SEQ_PMG_CMD_MRS2_LP, RREG32(MC_PMG_CMD_MRS2));
 	WREG32(MC_SEQ_WR_CTL_2_LP, RREG32(MC_SEQ_WR_CTL_2));
 
-        ret = radeon_atom_init_mc_reg_table(rdev, module_index, table);
-        if (ret)
-                goto init_mc_done;
+	ret = radeon_atom_init_mc_reg_table(rdev, module_index, table);
+	if (ret)
+		goto init_mc_done;
 
-        ret = si_copy_vbios_mc_reg_table(table, si_table);
-        if (ret)
-                goto init_mc_done;
+	ret = si_copy_vbios_mc_reg_table(table, si_table);
+	if (ret)
+		goto init_mc_done;
 
 	si_set_s0_mc_reg_index(si_table);
 
 	ret = si_set_mc_special_registers(rdev, si_table);
-        if (ret)
-                goto init_mc_done;
+	if (ret)
+		goto init_mc_done;
 
 	si_set_valid_flag(si_table);
 
@@ -5715,10 +5715,10 @@ static int si_upload_mc_reg_table(struct radeon_device *rdev,
 
 static void si_enable_voltage_control(struct radeon_device *rdev, bool enable)
 {
-        if (enable)
-                WREG32_P(GENERAL_PWRMGT, VOLT_PWRMGT_EN, ~VOLT_PWRMGT_EN);
-        else
-                WREG32_P(GENERAL_PWRMGT, 0, ~VOLT_PWRMGT_EN);
+	if (enable)
+		WREG32_P(GENERAL_PWRMGT, VOLT_PWRMGT_EN, ~VOLT_PWRMGT_EN);
+	else
+		WREG32_P(GENERAL_PWRMGT, 0, ~VOLT_PWRMGT_EN);
 }
 
 static enum radeon_pcie_gen si_get_maximum_link_speed(struct radeon_device *rdev,
@@ -6820,7 +6820,7 @@ static int si_parse_power_table(struct radeon_device *rdev)
 	struct _NonClockInfoArray *non_clock_info_array;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 	struct ni_ps *ps;
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index cd0862809adf..f0d5c1724f55 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -787,8 +787,8 @@ static void sumo_program_acpi_power_level(struct radeon_device *rdev)
 	struct atom_clock_dividers dividers;
 	int ret;
 
-        ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
-                                             pi->acpi_pl.sclk,
+	ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+					     pi->acpi_pl.sclk,
 					     false, &dividers);
 	if (ret)
 		return;
@@ -1462,7 +1462,7 @@ static int sumo_parse_power_table(struct radeon_device *rdev)
 	struct _NonClockInfoArray *non_clock_info_array;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 	struct sumo_ps *ps;
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index d34bfcdab9be..6730367ac228 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -369,8 +369,8 @@ static void trinity_gfx_powergating_initialize(struct radeon_device *rdev)
 	int ret;
 	u32 hw_rev = (RREG32(HW_REV) & ATI_REV_ID_MASK) >> ATI_REV_ID_SHIFT;
 
-        ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
-                                             25000, false, &dividers);
+	ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+					     25000, false, &dividers);
 	if (ret)
 		return;
 
@@ -587,8 +587,8 @@ static void trinity_set_divider_value(struct radeon_device *rdev,
 	u32 value;
 	u32 ix = index * TRINITY_SIZEOF_DPM_STATE_TABLE;
 
-        ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
-                                             sclk, false, &dividers);
+	ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+					     sclk, false, &dividers);
 	if (ret)
 		return;
 
@@ -597,8 +597,8 @@ static void trinity_set_divider_value(struct radeon_device *rdev,
 	value |= CLK_DIVIDER(dividers.post_div);
 	WREG32_SMC(SMU_SCLK_DPM_STATE_0_CNTL_0 + ix, value);
 
-        ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
-                                             sclk/2, false, &dividers);
+	ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+					     sclk/2, false, &dividers);
 	if (ret)
 		return;
 
@@ -1045,14 +1045,14 @@ static int trinity_set_thermal_temperature_range(struct radeon_device *rdev,
 	int low_temp = 0 * 1000;
 	int high_temp = 255 * 1000;
 
-        if (low_temp < min_temp)
+	if (low_temp < min_temp)
 		low_temp = min_temp;
-        if (high_temp > max_temp)
+	if (high_temp > max_temp)
 		high_temp = max_temp;
-        if (high_temp < low_temp) {
+	if (high_temp < low_temp) {
 		DRM_ERROR("invalid thermal range: %d - %d\n", low_temp, high_temp);
-                return -EINVAL;
-        }
+		return -EINVAL;
+	}
 
 	WREG32_P(CG_THERMAL_INT_CTRL, DIG_THERM_INTH(49 + (high_temp / 1000)), ~DIG_THERM_INTH_MASK);
 	WREG32_P(CG_THERMAL_INT_CTRL, DIG_THERM_INTL(49 + (low_temp / 1000)), ~DIG_THERM_INTL_MASK);
@@ -1737,7 +1737,7 @@ static int trinity_parse_power_table(struct radeon_device *rdev)
 	struct _NonClockInfoArray *non_clock_info_array;
 	union power_info *power_info;
 	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
-        u16 data_offset;
+	u16 data_offset;
 	u8 frev, crev;
 	u8 *power_state_offset;
 	struct sumo_ps *ps;
diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c
index c6b1cbca47fc..12ddcfa82e20 100644
--- a/drivers/gpu/drm/radeon/uvd_v1_0.c
+++ b/drivers/gpu/drm/radeon/uvd_v1_0.c
@@ -522,11 +522,17 @@ int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		goto error;
 	}
 
-	r = radeon_fence_wait(fence, false);
-	if (r) {
+	r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies(
+		RADEON_USEC_IB_TEST_TIMEOUT));
+	if (r < 0) {
 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
 		goto error;
+	} else if (r == 0) {
+		DRM_ERROR("radeon: fence wait timed out.\n");
+		r = -ETIMEDOUT;
+		goto error;
 	}
+	r = 0;
 	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
 error:
 	radeon_fence_unref(&fence);
diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c
index cdeaab7c7b1e..fce214482e72 100644
--- a/drivers/gpu/drm/radeon/vce_v2_0.c
+++ b/drivers/gpu/drm/radeon/vce_v2_0.c
@@ -53,7 +53,7 @@ static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated)
 		WREG32(VCE_UENC_REG_CLOCK_GATING, tmp);
 
 		WREG32(VCE_CGTT_CLK_OVERRIDE, 0);
-    } else {
+	} else {
 		tmp = RREG32(VCE_CLOCK_GATING_B);
 		tmp |= 0xe7;
 		tmp &= ~0xe70000;
diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig
index 96dcd4a78951..1f10fa0928b4 100644
--- a/drivers/gpu/drm/rcar-du/Kconfig
+++ b/drivers/gpu/drm/rcar-du/Kconfig
@@ -1,6 +1,7 @@
 config DRM_RCAR_DU
 	tristate "DRM Support for R-Car Display Unit"
-	depends on DRM && ARM && OF
+	depends on DRM && OF
+	depends on ARM || ARM64
 	depends on ARCH_SHMOBILE || COMPILE_TEST
 	select DRM_KMS_HELPER
 	select DRM_KMS_CMA_HELPER
@@ -14,14 +15,18 @@ config DRM_RCAR_DU
 config DRM_RCAR_HDMI
 	bool "R-Car DU HDMI Encoder Support"
 	depends on DRM_RCAR_DU
-	depends on OF
 	help
 	  Enable support for external HDMI encoders.
 
 config DRM_RCAR_LVDS
 	bool "R-Car DU LVDS Encoder Support"
 	depends on DRM_RCAR_DU
-	depends on ARCH_R8A7790 || ARCH_R8A7791 || COMPILE_TEST
 	help
-	  Enable support for the R-Car Display Unit embedded LVDS encoders
-	  (currently only on R8A7790 and R8A7791).
+	  Enable support for the R-Car Display Unit embedded LVDS encoders.
+
+config DRM_RCAR_VSP
+	bool "R-Car DU VSP Compositor Support"
+	depends on DRM_RCAR_DU
+	depends on VIDEO_RENESAS_VSP1
+	help
+	  Enable support to expose the R-Car VSP Compositor as KMS planes.
diff --git a/drivers/gpu/drm/rcar-du/Makefile b/drivers/gpu/drm/rcar-du/Makefile
index 05de1c4097af..827711e28226 100644
--- a/drivers/gpu/drm/rcar-du/Makefile
+++ b/drivers/gpu/drm/rcar-du/Makefile
@@ -11,4 +11,6 @@ rcar-du-drm-$(CONFIG_DRM_RCAR_HDMI)	+= rcar_du_hdmicon.o \
 					   rcar_du_hdmienc.o
 rcar-du-drm-$(CONFIG_DRM_RCAR_LVDS)	+= rcar_du_lvdsenc.o
 
+rcar-du-drm-$(CONFIG_DRM_RCAR_VSP)	+= rcar_du_vsp.o
+
 obj-$(CONFIG_DRM_RCAR_DU)		+= rcar-du-drm.o
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index 88a4b706be16..d9f06cc361fa 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -1,7 +1,7 @@
 /*
  * rcar_du_crtc.c  --  R-Car Display Unit CRTCs
  *
- * Copyright (C) 2013-2014 Renesas Electronics Corporation
+ * Copyright (C) 2013-2015 Renesas Electronics Corporation
  *
  * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
  *
@@ -28,6 +28,7 @@
 #include "rcar_du_kms.h"
 #include "rcar_du_plane.h"
 #include "rcar_du_regs.h"
+#include "rcar_du_vsp.h"
 
 static u32 rcar_du_crtc_read(struct rcar_du_crtc *rcrtc, u32 reg)
 {
@@ -150,7 +151,7 @@ static void rcar_du_crtc_set_display_timing(struct rcar_du_crtc *rcrtc)
 	/* Signal polarities */
 	value = ((mode->flags & DRM_MODE_FLAG_PVSYNC) ? 0 : DSMR_VSL)
 	      | ((mode->flags & DRM_MODE_FLAG_PHSYNC) ? 0 : DSMR_HSL)
-	      | DSMR_DIPM_DE | DSMR_CSPM;
+	      | DSMR_DIPM_DISP | DSMR_CSPM;
 	rcar_du_crtc_write(rcrtc, DSMR, value);
 
 	/* Display timings */
@@ -207,6 +208,7 @@ plane_format(struct rcar_du_plane *plane)
 static void rcar_du_crtc_update_planes(struct rcar_du_crtc *rcrtc)
 {
 	struct rcar_du_plane *planes[RCAR_DU_NUM_HW_PLANES];
+	struct rcar_du_device *rcdu = rcrtc->group->dev;
 	unsigned int num_planes = 0;
 	unsigned int dptsr_planes;
 	unsigned int hwplanes = 0;
@@ -250,6 +252,17 @@ static void rcar_du_crtc_update_planes(struct rcar_du_crtc *rcrtc)
 		}
 	}
 
+	/* If VSP+DU integration is enabled the plane assignment is fixed. */
+	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE)) {
+		if (rcdu->info->gen < 3) {
+			dspr = (rcrtc->index % 2) + 1;
+			hwplanes = 1 << (rcrtc->index % 2);
+		} else {
+			dspr = (rcrtc->index % 2) ? 3 : 1;
+			hwplanes = 1 << ((rcrtc->index % 2) ? 2 : 0);
+		}
+	}
+
 	/* Update the planes to display timing and dot clock generator
 	 * associations.
 	 *
@@ -272,6 +285,10 @@ static void rcar_du_crtc_update_planes(struct rcar_du_crtc *rcrtc)
 			rcar_du_group_restart(rcrtc->group);
 	}
 
+	/* Restart the group if plane sources have changed. */
+	if (rcrtc->group->need_restart)
+		rcar_du_group_restart(rcrtc->group);
+
 	mutex_unlock(&rcrtc->group->lock);
 
 	rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR,
@@ -282,26 +299,6 @@ static void rcar_du_crtc_update_planes(struct rcar_du_crtc *rcrtc)
  * Page Flip
  */
 
-void rcar_du_crtc_cancel_page_flip(struct rcar_du_crtc *rcrtc,
-				   struct drm_file *file)
-{
-	struct drm_pending_vblank_event *event;
-	struct drm_device *dev = rcrtc->crtc.dev;
-	unsigned long flags;
-
-	/* Destroy the pending vertical blanking event associated with the
-	 * pending page flip, if any, and disable vertical blanking interrupts.
-	 */
-	spin_lock_irqsave(&dev->event_lock, flags);
-	event = rcrtc->event;
-	if (event && event->base.file_priv == file) {
-		rcrtc->event = NULL;
-		event->base.destroy(&event->base);
-		drm_crtc_vblank_put(&rcrtc->crtc);
-	}
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-}
-
 static void rcar_du_crtc_finish_page_flip(struct rcar_du_crtc *rcrtc)
 {
 	struct drm_pending_vblank_event *event;
@@ -385,6 +382,10 @@ static void rcar_du_crtc_start(struct rcar_du_crtc *rcrtc)
 
 	rcar_du_group_start_stop(rcrtc->group, true);
 
+	/* Enable the VSP compositor. */
+	if (rcar_du_has(rcrtc->group->dev, RCAR_DU_FEATURE_VSP1_SOURCE))
+		rcar_du_vsp_enable(rcrtc);
+
 	/* Turn vertical blanking interrupt reporting back on. */
 	drm_crtc_vblank_on(crtc);
 
@@ -418,6 +419,10 @@ static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc)
 	rcar_du_crtc_wait_page_flip(rcrtc);
 	drm_crtc_vblank_off(crtc);
 
+	/* Disable the VSP compositor. */
+	if (rcar_du_has(rcrtc->group->dev, RCAR_DU_FEATURE_VSP1_SOURCE))
+		rcar_du_vsp_disable(rcrtc);
+
 	/* Select switch sync mode. This stops display operation and configures
 	 * the HSYNC and VSYNC signals as inputs.
 	 */
@@ -430,6 +435,9 @@ static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc)
 
 void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc)
 {
+	if (rcar_du_has(rcrtc->group->dev, RCAR_DU_FEATURE_VSP1_SOURCE))
+		rcar_du_vsp_disable(rcrtc);
+
 	rcar_du_crtc_stop(rcrtc);
 	rcar_du_crtc_put(rcrtc);
 }
@@ -438,20 +446,24 @@ void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc)
 {
 	unsigned int i;
 
-	if (!rcrtc->enabled)
+	if (!rcrtc->crtc.state->active)
 		return;
 
 	rcar_du_crtc_get(rcrtc);
 	rcar_du_crtc_start(rcrtc);
 
 	/* Commit the planes state. */
-	for (i = 0; i < rcrtc->group->num_planes; ++i) {
-		struct rcar_du_plane *plane = &rcrtc->group->planes[i];
+	if (rcar_du_has(rcrtc->group->dev, RCAR_DU_FEATURE_VSP1_SOURCE)) {
+		rcar_du_vsp_enable(rcrtc);
+	} else {
+		for (i = 0; i < rcrtc->group->num_planes; ++i) {
+			struct rcar_du_plane *plane = &rcrtc->group->planes[i];
 
-		if (plane->plane.state->crtc != &rcrtc->crtc)
-			continue;
+			if (plane->plane.state->crtc != &rcrtc->crtc)
+				continue;
 
-		rcar_du_plane_setup(plane);
+			rcar_du_plane_setup(plane);
+		}
 	}
 
 	rcar_du_crtc_update_planes(rcrtc);
@@ -465,37 +477,20 @@ static void rcar_du_crtc_enable(struct drm_crtc *crtc)
 {
 	struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
 
-	if (rcrtc->enabled)
-		return;
-
 	rcar_du_crtc_get(rcrtc);
 	rcar_du_crtc_start(rcrtc);
-
-	rcrtc->enabled = true;
 }
 
 static void rcar_du_crtc_disable(struct drm_crtc *crtc)
 {
 	struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
 
-	if (!rcrtc->enabled)
-		return;
-
 	rcar_du_crtc_stop(rcrtc);
 	rcar_du_crtc_put(rcrtc);
 
-	rcrtc->enabled = false;
 	rcrtc->outputs = 0;
 }
 
-static bool rcar_du_crtc_mode_fixup(struct drm_crtc *crtc,
-				    const struct drm_display_mode *mode,
-				    struct drm_display_mode *adjusted_mode)
-{
-	/* TODO Fixup modes */
-	return true;
-}
-
 static void rcar_du_crtc_atomic_begin(struct drm_crtc *crtc,
 				      struct drm_crtc_state *old_crtc_state)
 {
@@ -511,6 +506,9 @@ static void rcar_du_crtc_atomic_begin(struct drm_crtc *crtc,
 		rcrtc->event = event;
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 	}
+
+	if (rcar_du_has(rcrtc->group->dev, RCAR_DU_FEATURE_VSP1_SOURCE))
+		rcar_du_vsp_atomic_begin(rcrtc);
 }
 
 static void rcar_du_crtc_atomic_flush(struct drm_crtc *crtc,
@@ -519,10 +517,12 @@ static void rcar_du_crtc_atomic_flush(struct drm_crtc *crtc,
 	struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
 
 	rcar_du_crtc_update_planes(rcrtc);
+
+	if (rcar_du_has(rcrtc->group->dev, RCAR_DU_FEATURE_VSP1_SOURCE))
+		rcar_du_vsp_atomic_flush(rcrtc);
 }
 
 static const struct drm_crtc_helper_funcs crtc_helper_funcs = {
-	.mode_fixup = rcar_du_crtc_mode_fixup,
 	.disable = rcar_du_crtc_disable,
 	.enable = rcar_du_crtc_enable,
 	.atomic_begin = rcar_du_crtc_atomic_begin,
@@ -567,13 +567,14 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg)
 int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
 {
 	static const unsigned int mmio_offsets[] = {
-		DU0_REG_OFFSET, DU1_REG_OFFSET, DU2_REG_OFFSET
+		DU0_REG_OFFSET, DU1_REG_OFFSET, DU2_REG_OFFSET, DU3_REG_OFFSET
 	};
 
 	struct rcar_du_device *rcdu = rgrp->dev;
 	struct platform_device *pdev = to_platform_device(rcdu->dev);
 	struct rcar_du_crtc *rcrtc = &rcdu->crtcs[index];
 	struct drm_crtc *crtc = &rcrtc->crtc;
+	struct drm_plane *primary;
 	unsigned int irqflags;
 	struct clk *clk;
 	char clk_name[9];
@@ -609,10 +610,13 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
 	rcrtc->group = rgrp;
 	rcrtc->mmio_offset = mmio_offsets[index];
 	rcrtc->index = index;
-	rcrtc->enabled = false;
 
-	ret = drm_crtc_init_with_planes(rcdu->ddev, crtc,
-					&rgrp->planes[index % 2].plane,
+	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE))
+		primary = &rcrtc->vsp->planes[0].plane;
+	else
+		primary = &rgrp->planes[index % 2].plane;
+
+	ret = drm_crtc_init_with_planes(rcdu->ddev, crtc, primary,
 					NULL, &crtc_funcs, NULL);
 	if (ret < 0)
 		return ret;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
index 4b95d9d08c49..6f08b7e7db06 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
@@ -21,6 +21,7 @@
 #include <drm/drm_crtc.h>
 
 struct rcar_du_group;
+struct rcar_du_vsp;
 
 /**
  * struct rcar_du_crtc - the CRTC, representing a DU superposition processor
@@ -33,7 +34,6 @@ struct rcar_du_group;
  * @event: event to post when the pending page flip completes
  * @flip_wait: wait queue used to signal page flip completion
  * @outputs: bitmask of the outputs (enum rcar_du_output) driven by this CRTC
- * @enabled: whether the CRTC is enabled, used to control system resume
  * @group: CRTC group this CRTC belongs to
  */
 struct rcar_du_crtc {
@@ -49,9 +49,9 @@ struct rcar_du_crtc {
 	wait_queue_head_t flip_wait;
 
 	unsigned int outputs;
-	bool enabled;
 
 	struct rcar_du_group *group;
+	struct rcar_du_vsp *vsp;
 };
 
 #define to_rcar_crtc(c)	container_of(c, struct rcar_du_crtc, crtc)
@@ -67,8 +67,6 @@ enum rcar_du_output {
 
 int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index);
 void rcar_du_crtc_enable_vblank(struct rcar_du_crtc *rcrtc, bool enable);
-void rcar_du_crtc_cancel_page_flip(struct rcar_du_crtc *rcrtc,
-				   struct drm_file *file);
 void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc);
 void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc);
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
index 40422f6b645e..ed6006bf6bd8 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
@@ -1,7 +1,7 @@
 /*
  * rcar_du_drv.c  --  R-Car Display Unit DRM driver
  *
- * Copyright (C) 2013-2014 Renesas Electronics Corporation
+ * Copyright (C) 2013-2015 Renesas Electronics Corporation
  *
  * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
  *
@@ -36,6 +36,7 @@
  */
 
 static const struct rcar_du_device_info rcar_du_r8a7779_info = {
+	.gen = 2,
 	.features = 0,
 	.num_crtcs = 2,
 	.routes = {
@@ -57,6 +58,7 @@ static const struct rcar_du_device_info rcar_du_r8a7779_info = {
 };
 
 static const struct rcar_du_device_info rcar_du_r8a7790_info = {
+	.gen = 2,
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS,
 	.quirks = RCAR_DU_QUIRK_ALIGN_128B | RCAR_DU_QUIRK_LVDS_LANES,
@@ -86,6 +88,7 @@ static const struct rcar_du_device_info rcar_du_r8a7790_info = {
 
 /* M2-W (r8a7791) and M2-N (r8a7793) are identical */
 static const struct rcar_du_device_info rcar_du_r8a7791_info = {
+	.gen = 2,
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS,
 	.num_crtcs = 2,
@@ -108,6 +111,7 @@ static const struct rcar_du_device_info rcar_du_r8a7791_info = {
 };
 
 static const struct rcar_du_device_info rcar_du_r8a7794_info = {
+	.gen = 2,
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS,
 	.num_crtcs = 2,
@@ -129,12 +133,37 @@ static const struct rcar_du_device_info rcar_du_r8a7794_info = {
 	.num_lvds = 0,
 };
 
+static const struct rcar_du_device_info rcar_du_r8a7795_info = {
+	.gen = 3,
+	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
+		  | RCAR_DU_FEATURE_EXT_CTRL_REGS
+		  | RCAR_DU_FEATURE_VSP1_SOURCE,
+	.num_crtcs = 4,
+	.routes = {
+		/* R8A7795 has one RGB output, one LVDS output and two
+		 * (currently unsupported) HDMI outputs.
+		 */
+		[RCAR_DU_OUTPUT_DPAD0] = {
+			.possible_crtcs = BIT(3),
+			.encoder_type = DRM_MODE_ENCODER_NONE,
+			.port = 0,
+		},
+		[RCAR_DU_OUTPUT_LVDS0] = {
+			.possible_crtcs = BIT(0),
+			.encoder_type = DRM_MODE_ENCODER_LVDS,
+			.port = 3,
+		},
+	},
+	.num_lvds = 1,
+};
+
 static const struct of_device_id rcar_du_of_table[] = {
 	{ .compatible = "renesas,du-r8a7779", .data = &rcar_du_r8a7779_info },
 	{ .compatible = "renesas,du-r8a7790", .data = &rcar_du_r8a7790_info },
 	{ .compatible = "renesas,du-r8a7791", .data = &rcar_du_r8a7791_info },
 	{ .compatible = "renesas,du-r8a7793", .data = &rcar_du_r8a7791_info },
 	{ .compatible = "renesas,du-r8a7794", .data = &rcar_du_r8a7794_info },
+	{ .compatible = "renesas,du-r8a7795", .data = &rcar_du_r8a7795_info },
 	{ }
 };
 
@@ -144,91 +173,6 @@ MODULE_DEVICE_TABLE(of, rcar_du_of_table);
  * DRM operations
  */
 
-static int rcar_du_unload(struct drm_device *dev)
-{
-	struct rcar_du_device *rcdu = dev->dev_private;
-
-	if (rcdu->fbdev)
-		drm_fbdev_cma_fini(rcdu->fbdev);
-
-	drm_kms_helper_poll_fini(dev);
-	drm_mode_config_cleanup(dev);
-	drm_vblank_cleanup(dev);
-
-	dev->irq_enabled = 0;
-	dev->dev_private = NULL;
-
-	return 0;
-}
-
-static int rcar_du_load(struct drm_device *dev, unsigned long flags)
-{
-	struct platform_device *pdev = dev->platformdev;
-	struct device_node *np = pdev->dev.of_node;
-	struct rcar_du_device *rcdu;
-	struct resource *mem;
-	int ret;
-
-	if (np == NULL) {
-		dev_err(dev->dev, "no platform data\n");
-		return -ENODEV;
-	}
-
-	rcdu = devm_kzalloc(&pdev->dev, sizeof(*rcdu), GFP_KERNEL);
-	if (rcdu == NULL) {
-		dev_err(dev->dev, "failed to allocate private data\n");
-		return -ENOMEM;
-	}
-
-	init_waitqueue_head(&rcdu->commit.wait);
-
-	rcdu->dev = &pdev->dev;
-	rcdu->info = of_match_device(rcar_du_of_table, rcdu->dev)->data;
-	rcdu->ddev = dev;
-	dev->dev_private = rcdu;
-
-	/* I/O resources */
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	rcdu->mmio = devm_ioremap_resource(&pdev->dev, mem);
-	if (IS_ERR(rcdu->mmio))
-		return PTR_ERR(rcdu->mmio);
-
-	/* Initialize vertical blanking interrupts handling. Start with vblank
-	 * disabled for all CRTCs.
-	 */
-	ret = drm_vblank_init(dev, (1 << rcdu->info->num_crtcs) - 1);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to initialize vblank\n");
-		goto done;
-	}
-
-	/* DRM/KMS objects */
-	ret = rcar_du_modeset_init(rcdu);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to initialize DRM/KMS (%d)\n", ret);
-		goto done;
-	}
-
-	dev->irq_enabled = 1;
-
-	platform_set_drvdata(pdev, rcdu);
-
-done:
-	if (ret)
-		rcar_du_unload(dev);
-
-	return ret;
-}
-
-static void rcar_du_preclose(struct drm_device *dev, struct drm_file *file)
-{
-	struct rcar_du_device *rcdu = dev->dev_private;
-	unsigned int i;
-
-	for (i = 0; i < rcdu->num_crtcs; ++i)
-		rcar_du_crtc_cancel_page_flip(&rcdu->crtcs[i], file);
-}
-
 static void rcar_du_lastclose(struct drm_device *dev)
 {
 	struct rcar_du_device *rcdu = dev->dev_private;
@@ -269,11 +213,7 @@ static const struct file_operations rcar_du_fops = {
 static struct drm_driver rcar_du_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME
 				| DRIVER_ATOMIC,
-	.load			= rcar_du_load,
-	.unload			= rcar_du_unload,
-	.preclose		= rcar_du_preclose,
 	.lastclose		= rcar_du_lastclose,
-	.set_busid		= drm_platform_set_busid,
 	.get_vblank_counter	= drm_vblank_no_hw_counter,
 	.enable_vblank		= rcar_du_enable_vblank,
 	.disable_vblank		= rcar_du_disable_vblank,
@@ -333,18 +273,116 @@ static const struct dev_pm_ops rcar_du_pm_ops = {
  * Platform driver
  */
 
-static int rcar_du_probe(struct platform_device *pdev)
+static int rcar_du_remove(struct platform_device *pdev)
 {
-	return drm_platform_init(&rcar_du_driver, pdev);
+	struct rcar_du_device *rcdu = platform_get_drvdata(pdev);
+	struct drm_device *ddev = rcdu->ddev;
+
+	mutex_lock(&ddev->mode_config.mutex);
+	drm_connector_unplug_all(ddev);
+	mutex_unlock(&ddev->mode_config.mutex);
+
+	drm_dev_unregister(ddev);
+
+	if (rcdu->fbdev)
+		drm_fbdev_cma_fini(rcdu->fbdev);
+
+	drm_kms_helper_poll_fini(ddev);
+	drm_mode_config_cleanup(ddev);
+	drm_vblank_cleanup(ddev);
+
+	drm_dev_unref(ddev);
+
+	return 0;
 }
 
-static int rcar_du_remove(struct platform_device *pdev)
+static int rcar_du_probe(struct platform_device *pdev)
 {
-	struct rcar_du_device *rcdu = platform_get_drvdata(pdev);
+	struct device_node *np = pdev->dev.of_node;
+	struct rcar_du_device *rcdu;
+	struct drm_connector *connector;
+	struct drm_device *ddev;
+	struct resource *mem;
+	int ret;
+
+	if (np == NULL) {
+		dev_err(&pdev->dev, "no device tree node\n");
+		return -ENODEV;
+	}
+
+	/* Allocate and initialize the DRM and R-Car device structures. */
+	rcdu = devm_kzalloc(&pdev->dev, sizeof(*rcdu), GFP_KERNEL);
+	if (rcdu == NULL)
+		return -ENOMEM;
+
+	init_waitqueue_head(&rcdu->commit.wait);
+
+	rcdu->dev = &pdev->dev;
+	rcdu->info = of_match_device(rcar_du_of_table, rcdu->dev)->data;
+
+	ddev = drm_dev_alloc(&rcar_du_driver, &pdev->dev);
+	if (!ddev)
+		return -ENOMEM;
+
+	drm_dev_set_unique(ddev, dev_name(&pdev->dev));
+
+	rcdu->ddev = ddev;
+	ddev->dev_private = rcdu;
+
+	platform_set_drvdata(pdev, rcdu);
+
+	/* I/O resources */
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	rcdu->mmio = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(rcdu->mmio)) {
+		ret = PTR_ERR(rcdu->mmio);
+		goto error;
+	}
 
-	drm_put_dev(rcdu->ddev);
+	/* Initialize vertical blanking interrupts handling. Start with vblank
+	 * disabled for all CRTCs.
+	 */
+	ret = drm_vblank_init(ddev, (1 << rcdu->info->num_crtcs) - 1);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to initialize vblank\n");
+		goto error;
+	}
+
+	/* DRM/KMS objects */
+	ret = rcar_du_modeset_init(rcdu);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to initialize DRM/KMS (%d)\n", ret);
+		goto error;
+	}
+
+	ddev->irq_enabled = 1;
+
+	/* Register the DRM device with the core and the connectors with
+	 * sysfs.
+	 */
+	ret = drm_dev_register(ddev, 0);
+	if (ret)
+		goto error;
+
+	mutex_lock(&ddev->mode_config.mutex);
+	drm_for_each_connector(connector, ddev) {
+		ret = drm_connector_register(connector);
+		if (ret < 0)
+			break;
+	}
+	mutex_unlock(&ddev->mode_config.mutex);
+
+	if (ret < 0)
+		goto error;
+
+	DRM_INFO("Device %s probed\n", dev_name(&pdev->dev));
 
 	return 0;
+
+error:
+	rcar_du_remove(pdev);
+
+	return ret;
 }
 
 static struct platform_driver rcar_du_platform_driver = {
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
index 9f34fc86436a..ed35467d96cf 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
@@ -1,7 +1,7 @@
 /*
  * rcar_du_drv.h  --  R-Car Display Unit DRM driver
  *
- * Copyright (C) 2013-2014 Renesas Electronics Corporation
+ * Copyright (C) 2013-2015 Renesas Electronics Corporation
  *
  * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
  *
@@ -19,6 +19,7 @@
 
 #include "rcar_du_crtc.h"
 #include "rcar_du_group.h"
+#include "rcar_du_vsp.h"
 
 struct clk;
 struct device;
@@ -29,6 +30,7 @@ struct rcar_du_lvdsenc;
 
 #define RCAR_DU_FEATURE_CRTC_IRQ_CLOCK	(1 << 0)	/* Per-CRTC IRQ and clock */
 #define RCAR_DU_FEATURE_EXT_CTRL_REGS	(1 << 1)	/* Has extended control registers */
+#define RCAR_DU_FEATURE_VSP1_SOURCE	(1 << 2)	/* Has inputs from VSP1 */
 
 #define RCAR_DU_QUIRK_ALIGN_128B	(1 << 0)	/* Align pitches to 128 bytes */
 #define RCAR_DU_QUIRK_LVDS_LANES	(1 << 1)	/* LVDS lanes 1 and 3 inverted */
@@ -51,6 +53,7 @@ struct rcar_du_output_routing {
 
 /*
  * struct rcar_du_device_info - DU model-specific information
+ * @gen: device generation (2 or 3)
  * @features: device features (RCAR_DU_FEATURE_*)
  * @quirks: device quirks (RCAR_DU_QUIRK_*)
  * @num_crtcs: total number of CRTCs
@@ -58,6 +61,7 @@ struct rcar_du_output_routing {
  * @num_lvds: number of internal LVDS encoders
  */
 struct rcar_du_device_info {
+	unsigned int gen;
 	unsigned int features;
 	unsigned int quirks;
 	unsigned int num_crtcs;
@@ -65,9 +69,10 @@ struct rcar_du_device_info {
 	unsigned int num_lvds;
 };
 
-#define RCAR_DU_MAX_CRTCS		3
+#define RCAR_DU_MAX_CRTCS		4
 #define RCAR_DU_MAX_GROUPS		DIV_ROUND_UP(RCAR_DU_MAX_CRTCS, 2)
 #define RCAR_DU_MAX_LVDS		2
+#define RCAR_DU_MAX_VSPS		4
 
 struct rcar_du_device {
 	struct device *dev;
@@ -82,6 +87,7 @@ struct rcar_du_device {
 	unsigned int num_crtcs;
 
 	struct rcar_du_group groups[RCAR_DU_MAX_GROUPS];
+	struct rcar_du_vsp vsps[RCAR_DU_MAX_VSPS];
 
 	struct {
 		struct drm_property *alpha;
@@ -90,6 +96,8 @@ struct rcar_du_device {
 	} props;
 
 	unsigned int dpad0_source;
+	unsigned int vspd1_sink;
+
 	struct rcar_du_lvdsenc *lvds[RCAR_DU_MAX_LVDS];
 
 	struct {
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
index c08700757feb..4e939e41f030 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
@@ -89,12 +89,8 @@ static int rcar_du_encoder_atomic_check(struct drm_encoder *encoder,
 	/* The flat panel mode is fixed, just copy it to the adjusted mode. */
 	drm_mode_copy(adjusted_mode, panel_mode);
 
-	/* The internal LVDS encoder has a clock frequency operating range of
-	 * 30MHz to 150MHz. Clamp the clock accordingly.
-	 */
 	if (renc->lvds)
-		adjusted_mode->clock = clamp(adjusted_mode->clock,
-					     30000, 150000);
+		rcar_du_lvdsenc_atomic_check(renc->lvds, adjusted_mode);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c
index 8e2ffe025153..33b2fc53da3e 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c
@@ -1,7 +1,7 @@
 /*
  * rcar_du_group.c  --  R-Car Display Unit Channels Pair
  *
- * Copyright (C) 2013-2014 Renesas Electronics Corporation
+ * Copyright (C) 2013-2015 Renesas Electronics Corporation
  *
  * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
  *
@@ -44,29 +44,64 @@ void rcar_du_group_write(struct rcar_du_group *rgrp, u32 reg, u32 data)
 	rcar_du_write(rgrp->dev, rgrp->mmio_offset + reg, data);
 }
 
+static void rcar_du_group_setup_pins(struct rcar_du_group *rgrp)
+{
+	u32 defr6 = DEFR6_CODE | DEFR6_ODPM12_DISP;
+
+	if (rgrp->num_crtcs > 1)
+		defr6 |= DEFR6_ODPM22_DISP;
+
+	rcar_du_group_write(rgrp, DEFR6, defr6);
+}
+
 static void rcar_du_group_setup_defr8(struct rcar_du_group *rgrp)
 {
-	u32 defr8 = DEFR8_CODE | DEFR8_DEFE8;
+	struct rcar_du_device *rcdu = rgrp->dev;
+	unsigned int possible_crtcs =
+		rcdu->info->routes[RCAR_DU_OUTPUT_DPAD0].possible_crtcs;
+	u32 defr8 = DEFR8_CODE;
 
-	/* The DEFR8 register for the first group also controls RGB output
-	 * routing to DPAD0 for DU instances that support it.
-	 */
-	if (rgrp->dev->info->routes[RCAR_DU_OUTPUT_DPAD0].possible_crtcs > 1 &&
-	    rgrp->index == 0)
-		defr8 |= DEFR8_DRGBS_DU(rgrp->dev->dpad0_source);
+	if (rcdu->info->gen < 3) {
+		defr8 |= DEFR8_DEFE8;
+
+		/* On Gen2 the DEFR8 register for the first group also controls
+		 * RGB output routing to DPAD0 and VSPD1 routing to DU0/1/2 for
+		 * DU instances that support it.
+		 */
+		if (rgrp->index == 0) {
+			if (possible_crtcs > 1)
+				defr8 |= DEFR8_DRGBS_DU(rcdu->dpad0_source);
+			if (rgrp->dev->vspd1_sink == 2)
+				defr8 |= DEFR8_VSCS;
+		}
+	} else {
+		/* On Gen3 VSPD routing can't be configured, but DPAD routing
+		 * needs to be set despite having a single option available.
+		 */
+		u32 crtc = ffs(possible_crtcs) - 1;
+
+		if (crtc / 2 == rgrp->index)
+			defr8 |= DEFR8_DRGBS_DU(crtc);
+	}
 
 	rcar_du_group_write(rgrp, DEFR8, defr8);
 }
 
 static void rcar_du_group_setup(struct rcar_du_group *rgrp)
 {
+	struct rcar_du_device *rcdu = rgrp->dev;
+
 	/* Enable extended features */
 	rcar_du_group_write(rgrp, DEFR, DEFR_CODE | DEFR_DEFE);
-	rcar_du_group_write(rgrp, DEFR2, DEFR2_CODE | DEFR2_DEFE2G);
-	rcar_du_group_write(rgrp, DEFR3, DEFR3_CODE | DEFR3_DEFE3);
-	rcar_du_group_write(rgrp, DEFR4, DEFR4_CODE);
+	if (rcdu->info->gen < 3) {
+		rcar_du_group_write(rgrp, DEFR2, DEFR2_CODE | DEFR2_DEFE2G);
+		rcar_du_group_write(rgrp, DEFR3, DEFR3_CODE | DEFR3_DEFE3);
+		rcar_du_group_write(rgrp, DEFR4, DEFR4_CODE);
+	}
 	rcar_du_group_write(rgrp, DEFR5, DEFR5_CODE | DEFR5_DEFE5);
 
+	rcar_du_group_setup_pins(rgrp);
+
 	if (rcar_du_has(rgrp->dev, RCAR_DU_FEATURE_EXT_CTRL_REGS)) {
 		rcar_du_group_setup_defr8(rgrp);
 
@@ -82,6 +117,9 @@ static void rcar_du_group_setup(struct rcar_du_group *rgrp)
 				    DIDSR_PDCS_CLK(0, 0));
 	}
 
+	if (rcdu->info->gen >= 3)
+		rcar_du_group_write(rgrp, DEFR10, DEFR10_CODE | DEFR10_DEFE10);
+
 	/* Use DS1PR and DS2PR to configure planes priorities and connects the
 	 * superposition 0 to DU0 pins. DU1 pins will be configured dynamically.
 	 */
@@ -158,21 +196,23 @@ void rcar_du_group_start_stop(struct rcar_du_group *rgrp, bool start)
 
 void rcar_du_group_restart(struct rcar_du_group *rgrp)
 {
+	rgrp->need_restart = false;
+
 	__rcar_du_group_start_stop(rgrp, false);
 	__rcar_du_group_start_stop(rgrp, true);
 }
 
-static int rcar_du_set_dpad0_routing(struct rcar_du_device *rcdu)
+int rcar_du_set_dpad0_vsp1_routing(struct rcar_du_device *rcdu)
 {
 	int ret;
 
 	if (!rcar_du_has(rcdu, RCAR_DU_FEATURE_EXT_CTRL_REGS))
 		return 0;
 
-	/* RGB output routing to DPAD0 is configured in the DEFR8 register of
-	 * the first group. As this function can be called with the DU0 and DU1
-	 * CRTCs disabled, we need to enable the first group clock before
-	 * accessing the register.
+	/* RGB output routing to DPAD0 and VSP1D routing to DU0/1/2 are
+	 * configured in the DEFR8 register of the first group. As this function
+	 * can be called with the DU0 and DU1 CRTCs disabled, we need to enable
+	 * the first group clock before accessing the register.
 	 */
 	ret = clk_prepare_enable(rcdu->crtcs[0].clock);
 	if (ret < 0)
@@ -203,5 +243,5 @@ int rcar_du_group_set_routing(struct rcar_du_group *rgrp)
 
 	rcar_du_group_write(rgrp, DORCR, dorcr);
 
-	return rcar_du_set_dpad0_routing(rgrp->dev);
+	return rcar_du_set_dpad0_vsp1_routing(rgrp->dev);
 }
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.h b/drivers/gpu/drm/rcar-du/rcar_du_group.h
index d7318e1a6b00..5e3adc6b31b5 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.h
@@ -32,6 +32,7 @@ struct rcar_du_device;
  * @dptsr_planes: bitmask of planes driven by dot-clock and timing generator 1
  * @num_planes: number of planes in the group
  * @planes: planes handled by the group
+ * @need_restart: the group needs to be restarted due to a configuration change
  */
 struct rcar_du_group {
 	struct rcar_du_device *dev;
@@ -47,6 +48,7 @@ struct rcar_du_group {
 
 	unsigned int num_planes;
 	struct rcar_du_plane planes[RCAR_DU_NUM_KMS_PLANES];
+	bool need_restart;
 };
 
 u32 rcar_du_group_read(struct rcar_du_group *rgrp, u32 reg);
@@ -58,4 +60,6 @@ void rcar_du_group_start_stop(struct rcar_du_group *rgrp, bool start);
 void rcar_du_group_restart(struct rcar_du_group *rgrp);
 int rcar_du_group_set_routing(struct rcar_du_group *rgrp);
 
+int rcar_du_set_dpad0_vsp1_routing(struct rcar_du_device *rcdu);
+
 #endif /* __RCAR_DU_GROUP_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c
index a37b6e2fe51a..6c927144b5c9 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c
@@ -55,12 +55,6 @@ static const struct drm_connector_helper_funcs connector_helper_funcs = {
 	.best_encoder = rcar_du_connector_best_encoder,
 };
 
-static void rcar_du_hdmi_connector_destroy(struct drm_connector *connector)
-{
-	drm_connector_unregister(connector);
-	drm_connector_cleanup(connector);
-}
-
 static enum drm_connector_status
 rcar_du_hdmi_connector_detect(struct drm_connector *connector, bool force)
 {
@@ -79,7 +73,7 @@ static const struct drm_connector_funcs connector_funcs = {
 	.reset = drm_atomic_helper_connector_reset,
 	.detect = rcar_du_hdmi_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
-	.destroy = rcar_du_hdmi_connector_destroy,
+	.destroy = drm_connector_cleanup,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
@@ -108,9 +102,6 @@ int rcar_du_hdmi_connector_init(struct rcar_du_device *rcdu,
 		return ret;
 
 	drm_connector_helper_add(connector, &connector_helper_funcs);
-	ret = drm_connector_register(connector);
-	if (ret < 0)
-		return ret;
 
 	connector->dpms = DRM_MODE_DPMS_OFF;
 	drm_object_property_set_value(&connector->base,
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
index 2567efcbee36..461662d231e2 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
@@ -71,12 +71,9 @@ static int rcar_du_hdmienc_atomic_check(struct drm_encoder *encoder,
 	struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode;
 	const struct drm_display_mode *mode = &crtc_state->mode;
 
-	/* The internal LVDS encoder has a clock frequency operating range of
-	 * 30MHz to 150MHz. Clamp the clock accordingly.
-	 */
 	if (hdmienc->renc->lvds)
-		adjusted_mode->clock = clamp(adjusted_mode->clock,
-					     30000, 150000);
+		rcar_du_lvdsenc_atomic_check(hdmienc->renc->lvds,
+					     adjusted_mode);
 
 	if (sfuncs->mode_fixup == NULL)
 		return 0;
@@ -134,12 +131,19 @@ int rcar_du_hdmienc_init(struct rcar_du_device *rcdu,
 
 	/* Locate the slave I2C device and driver. */
 	i2c_slave = of_find_i2c_device_by_node(np);
-	if (!i2c_slave || !i2c_get_clientdata(i2c_slave))
+	if (!i2c_slave || !i2c_get_clientdata(i2c_slave)) {
+		dev_dbg(rcdu->dev,
+			"can't get I2C slave for %s, deferring probe\n",
+			of_node_full_name(np));
 		return -EPROBE_DEFER;
+	}
 
 	hdmienc->dev = &i2c_slave->dev;
 
 	if (hdmienc->dev->driver == NULL) {
+		dev_dbg(rcdu->dev,
+			"I2C slave %s not probed yet, deferring probe\n",
+			dev_name(hdmienc->dev));
 		ret = -EPROBE_DEFER;
 		goto error;
 	}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index 43bce69d8560..24725bf859b4 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -1,7 +1,7 @@
 /*
  * rcar_du_kms.c  --  R-Car Display Unit Mode Setting
  *
- * Copyright (C) 2013-2014 Renesas Electronics Corporation
+ * Copyright (C) 2013-2015 Renesas Electronics Corporation
  *
  * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
  *
@@ -28,6 +28,7 @@
 #include "rcar_du_kms.h"
 #include "rcar_du_lvdsenc.h"
 #include "rcar_du_regs.h"
+#include "rcar_du_vsp.h"
 
 /* -----------------------------------------------------------------------------
  * Format helpers
@@ -89,13 +90,44 @@ static const struct rcar_du_format_info rcar_du_format_infos[] = {
 		.pnmr = PnMR_SPIM_TP_OFF | PnMR_DDDF_YC,
 		.edf = PnDDCR4_EDF_NONE,
 	}, {
-		/* In YUV 4:2:2, only NV16 is supported (NV61 isn't) */
 		.fourcc = DRM_FORMAT_NV16,
 		.bpp = 16,
 		.planes = 2,
 		.pnmr = PnMR_SPIM_TP_OFF | PnMR_DDDF_YC,
 		.edf = PnDDCR4_EDF_NONE,
 	},
+	/* The following formats are not supported on Gen2 and thus have no
+	 * associated .pnmr or .edf settings.
+	 */
+	{
+		.fourcc = DRM_FORMAT_NV61,
+		.bpp = 16,
+		.planes = 2,
+	}, {
+		.fourcc = DRM_FORMAT_YUV420,
+		.bpp = 12,
+		.planes = 3,
+	}, {
+		.fourcc = DRM_FORMAT_YVU420,
+		.bpp = 12,
+		.planes = 3,
+	}, {
+		.fourcc = DRM_FORMAT_YUV422,
+		.bpp = 16,
+		.planes = 3,
+	}, {
+		.fourcc = DRM_FORMAT_YVU422,
+		.bpp = 16,
+		.planes = 3,
+	}, {
+		.fourcc = DRM_FORMAT_YUV444,
+		.bpp = 24,
+		.planes = 3,
+	}, {
+		.fourcc = DRM_FORMAT_YVU444,
+		.bpp = 24,
+		.planes = 3,
+	},
 };
 
 const struct rcar_du_format_info *rcar_du_format_info(u32 fourcc)
@@ -143,6 +175,7 @@ rcar_du_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 	unsigned int max_pitch;
 	unsigned int align;
 	unsigned int bpp;
+	unsigned int i;
 
 	format = rcar_du_format_info(mode_cmd->pixel_format);
 	if (format == NULL) {
@@ -155,7 +188,7 @@ rcar_du_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 	 * The pitch and alignment constraints are expressed in pixels on the
 	 * hardware side and in bytes in the DRM API.
 	 */
-	bpp = format->planes == 2 ? 1 : format->bpp / 8;
+	bpp = format->planes == 1 ? format->bpp / 8 : 1;
 	max_pitch =  4096 * bpp;
 
 	if (rcar_du_needs(rcdu, RCAR_DU_QUIRK_ALIGN_128B))
@@ -170,8 +203,8 @@ rcar_du_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (format->planes == 2) {
-		if (mode_cmd->pitches[1] != mode_cmd->pitches[0]) {
+	for (i = 1; i < format->planes; ++i) {
+		if (mode_cmd->pitches[i] != mode_cmd->pitches[0]) {
 			dev_dbg(dev->dev,
 				"luma and chroma pitches do not match\n");
 			return ERR_PTR(-EINVAL);
@@ -192,252 +225,20 @@ static void rcar_du_output_poll_changed(struct drm_device *dev)
  * Atomic Check and Update
  */
 
-/*
- * Atomic hardware plane allocator
- *
- * The hardware plane allocator is solely based on the atomic plane states
- * without keeping any external state to avoid races between .atomic_check()
- * and .atomic_commit().
- *
- * The core idea is to avoid using a free planes bitmask that would need to be
- * shared between check and commit handlers with a collective knowledge based on
- * the allocated hardware plane(s) for each KMS plane. The allocator then loops
- * over all plane states to compute the free planes bitmask, allocates hardware
- * planes based on that bitmask, and stores the result back in the plane states.
- *
- * For this to work we need to access the current state of planes not touched by
- * the atomic update. To ensure that it won't be modified, we need to lock all
- * planes using drm_atomic_get_plane_state(). This effectively serializes atomic
- * updates from .atomic_check() up to completion (when swapping the states if
- * the check step has succeeded) or rollback (when freeing the states if the
- * check step has failed).
- *
- * Allocation is performed in the .atomic_check() handler and applied
- * automatically when the core swaps the old and new states.
- */
-
-static bool rcar_du_plane_needs_realloc(struct rcar_du_plane *plane,
-					struct rcar_du_plane_state *state)
-{
-	const struct rcar_du_format_info *cur_format;
-
-	cur_format = to_rcar_plane_state(plane->plane.state)->format;
-
-	/* Lowering the number of planes doesn't strictly require reallocation
-	 * as the extra hardware plane will be freed when committing, but doing
-	 * so could lead to more fragmentation.
-	 */
-	return !cur_format || cur_format->planes != state->format->planes;
-}
-
-static unsigned int rcar_du_plane_hwmask(struct rcar_du_plane_state *state)
-{
-	unsigned int mask;
-
-	if (state->hwindex == -1)
-		return 0;
-
-	mask = 1 << state->hwindex;
-	if (state->format->planes == 2)
-		mask |= 1 << ((state->hwindex + 1) % 8);
-
-	return mask;
-}
-
-static int rcar_du_plane_hwalloc(unsigned int num_planes, unsigned int free)
-{
-	unsigned int i;
-
-	for (i = 0; i < RCAR_DU_NUM_HW_PLANES; ++i) {
-		if (!(free & (1 << i)))
-			continue;
-
-		if (num_planes == 1 || free & (1 << ((i + 1) % 8)))
-			break;
-	}
-
-	return i == RCAR_DU_NUM_HW_PLANES ? -EBUSY : i;
-}
-
 static int rcar_du_atomic_check(struct drm_device *dev,
 				struct drm_atomic_state *state)
 {
 	struct rcar_du_device *rcdu = dev->dev_private;
-	unsigned int group_freed_planes[RCAR_DU_MAX_GROUPS] = { 0, };
-	unsigned int group_free_planes[RCAR_DU_MAX_GROUPS] = { 0, };
-	bool needs_realloc = false;
-	unsigned int groups = 0;
-	unsigned int i;
 	int ret;
 
 	ret = drm_atomic_helper_check(dev, state);
 	if (ret < 0)
 		return ret;
 
-	/* Check if hardware planes need to be reallocated. */
-	for (i = 0; i < dev->mode_config.num_total_plane; ++i) {
-		struct rcar_du_plane_state *plane_state;
-		struct rcar_du_plane *plane;
-		unsigned int index;
-
-		if (!state->planes[i])
-			continue;
-
-		plane = to_rcar_plane(state->planes[i]);
-		plane_state = to_rcar_plane_state(state->plane_states[i]);
-
-		dev_dbg(rcdu->dev, "%s: checking plane (%u,%u)\n", __func__,
-			plane->group->index, plane - plane->group->planes);
-
-		/* If the plane is being disabled we don't need to go through
-		 * the full reallocation procedure. Just mark the hardware
-		 * plane(s) as freed.
-		 */
-		if (!plane_state->format) {
-			dev_dbg(rcdu->dev, "%s: plane is being disabled\n",
-				__func__);
-			index = plane - plane->group->planes;
-			group_freed_planes[plane->group->index] |= 1 << index;
-			plane_state->hwindex = -1;
-			continue;
-		}
-
-		/* If the plane needs to be reallocated mark it as such, and
-		 * mark the hardware plane(s) as free.
-		 */
-		if (rcar_du_plane_needs_realloc(plane, plane_state)) {
-			dev_dbg(rcdu->dev, "%s: plane needs reallocation\n",
-				__func__);
-			groups |= 1 << plane->group->index;
-			needs_realloc = true;
-
-			index = plane - plane->group->planes;
-			group_freed_planes[plane->group->index] |= 1 << index;
-			plane_state->hwindex = -1;
-		}
-	}
-
-	if (!needs_realloc)
+	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE))
 		return 0;
 
-	/* Grab all plane states for the groups that need reallocation to ensure
-	 * locking and avoid racy updates. This serializes the update operation,
-	 * but there's not much we can do about it as that's the hardware
-	 * design.
-	 *
-	 * Compute the used planes mask for each group at the same time to avoid
-	 * looping over the planes separately later.
-	 */
-	while (groups) {
-		unsigned int index = ffs(groups) - 1;
-		struct rcar_du_group *group = &rcdu->groups[index];
-		unsigned int used_planes = 0;
-
-		dev_dbg(rcdu->dev, "%s: finding free planes for group %u\n",
-			__func__, index);
-
-		for (i = 0; i < group->num_planes; ++i) {
-			struct rcar_du_plane *plane = &group->planes[i];
-			struct rcar_du_plane_state *plane_state;
-			struct drm_plane_state *s;
-
-			s = drm_atomic_get_plane_state(state, &plane->plane);
-			if (IS_ERR(s))
-				return PTR_ERR(s);
-
-			/* If the plane has been freed in the above loop its
-			 * hardware planes must not be added to the used planes
-			 * bitmask. However, the current state doesn't reflect
-			 * the free state yet, as we've modified the new state
-			 * above. Use the local freed planes list to check for
-			 * that condition instead.
-			 */
-			if (group_freed_planes[index] & (1 << i)) {
-				dev_dbg(rcdu->dev,
-					"%s: plane (%u,%u) has been freed, skipping\n",
-					__func__, plane->group->index,
-					plane - plane->group->planes);
-				continue;
-			}
-
-			plane_state = to_rcar_plane_state(plane->plane.state);
-			used_planes |= rcar_du_plane_hwmask(plane_state);
-
-			dev_dbg(rcdu->dev,
-				"%s: plane (%u,%u) uses %u hwplanes (index %d)\n",
-				__func__, plane->group->index,
-				plane - plane->group->planes,
-				plane_state->format ?
-				plane_state->format->planes : 0,
-				plane_state->hwindex);
-		}
-
-		group_free_planes[index] = 0xff & ~used_planes;
-		groups &= ~(1 << index);
-
-		dev_dbg(rcdu->dev, "%s: group %u free planes mask 0x%02x\n",
-			__func__, index, group_free_planes[index]);
-	}
-
-	/* Reallocate hardware planes for each plane that needs it. */
-	for (i = 0; i < dev->mode_config.num_total_plane; ++i) {
-		struct rcar_du_plane_state *plane_state;
-		struct rcar_du_plane *plane;
-		unsigned int crtc_planes;
-		unsigned int free;
-		int idx;
-
-		if (!state->planes[i])
-			continue;
-
-		plane = to_rcar_plane(state->planes[i]);
-		plane_state = to_rcar_plane_state(state->plane_states[i]);
-
-		dev_dbg(rcdu->dev, "%s: allocating plane (%u,%u)\n", __func__,
-			plane->group->index, plane - plane->group->planes);
-
-		/* Skip planes that are being disabled or don't need to be
-		 * reallocated.
-		 */
-		if (!plane_state->format ||
-		    !rcar_du_plane_needs_realloc(plane, plane_state))
-			continue;
-
-		/* Try to allocate the plane from the free planes currently
-		 * associated with the target CRTC to avoid restarting the CRTC
-		 * group and thus minimize flicker. If it fails fall back to
-		 * allocating from all free planes.
-		 */
-		crtc_planes = to_rcar_crtc(plane_state->state.crtc)->index % 2
-			    ? plane->group->dptsr_planes
-			    : ~plane->group->dptsr_planes;
-		free = group_free_planes[plane->group->index];
-
-		idx = rcar_du_plane_hwalloc(plane_state->format->planes,
-					    free & crtc_planes);
-		if (idx < 0)
-			idx = rcar_du_plane_hwalloc(plane_state->format->planes,
-						    free);
-		if (idx < 0) {
-			dev_dbg(rcdu->dev, "%s: no available hardware plane\n",
-				__func__);
-			return idx;
-		}
-
-		dev_dbg(rcdu->dev, "%s: allocated %u hwplanes (index %u)\n",
-			__func__, plane_state->format->planes, idx);
-
-		plane_state->hwindex = idx;
-
-		group_free_planes[plane->group->index] &=
-			~rcar_du_plane_hwmask(plane_state);
-
-		dev_dbg(rcdu->dev, "%s: group %u free planes mask 0x%02x\n",
-			__func__, plane->group->index,
-			group_free_planes[plane->group->index]);
-	}
-
-	return 0;
+	return rcar_du_atomic_check_planes(dev, state);
 }
 
 struct rcar_du_commit {
@@ -456,7 +257,7 @@ static void rcar_du_atomic_complete(struct rcar_du_commit *commit)
 	/* Apply the atomic update. */
 	drm_atomic_helper_commit_modeset_disables(dev, old_state);
 	drm_atomic_helper_commit_modeset_enables(dev, old_state);
-	drm_atomic_helper_commit_planes(dev, old_state, false);
+	drm_atomic_helper_commit_planes(dev, old_state, true);
 
 	drm_atomic_helper_wait_for_vblanks(dev, old_state);
 
@@ -775,14 +576,34 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
 		rgrp->num_crtcs = min(rcdu->num_crtcs - 2 * i, 2U);
 
 		/* If we have more than one CRTCs in this group pre-associate
-		 * planes 0-3 with CRTC 0 and planes 4-7 with CRTC 1 to minimize
-		 * flicker occurring when the association is changed.
+		 * the low-order planes with CRTC 0 and the high-order planes
+		 * with CRTC 1 to minimize flicker occurring when the
+		 * association is changed.
 		 */
-		rgrp->dptsr_planes = rgrp->num_crtcs > 1 ? 0xf0 : 0;
+		rgrp->dptsr_planes = rgrp->num_crtcs > 1
+				   ? (rcdu->info->gen >= 3 ? 0x04 : 0xf0)
+				   : 0;
 
-		ret = rcar_du_planes_init(rgrp);
-		if (ret < 0)
-			return ret;
+		if (!rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE)) {
+			ret = rcar_du_planes_init(rgrp);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	/* Initialize the compositors. */
+	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE)) {
+		for (i = 0; i < rcdu->num_crtcs; ++i) {
+			struct rcar_du_vsp *vsp = &rcdu->vsps[i];
+
+			vsp->index = i;
+			vsp->dev = rcdu;
+			rcdu->crtcs[i].vsp = vsp;
+
+			ret = rcar_du_vsp_init(vsp);
+			if (ret < 0)
+				return ret;
+		}
 	}
 
 	/* Create the CRTCs. */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
index 0c43032fc693..e905f5da7aaa 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
@@ -62,12 +62,6 @@ static const struct drm_connector_helper_funcs connector_helper_funcs = {
 	.best_encoder = rcar_du_connector_best_encoder,
 };
 
-static void rcar_du_lvds_connector_destroy(struct drm_connector *connector)
-{
-	drm_connector_unregister(connector);
-	drm_connector_cleanup(connector);
-}
-
 static enum drm_connector_status
 rcar_du_lvds_connector_detect(struct drm_connector *connector, bool force)
 {
@@ -79,7 +73,7 @@ static const struct drm_connector_funcs connector_funcs = {
 	.reset = drm_atomic_helper_connector_reset,
 	.detect = rcar_du_lvds_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
-	.destroy = rcar_du_lvds_connector_destroy,
+	.destroy = drm_connector_cleanup,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
@@ -117,9 +111,6 @@ int rcar_du_lvds_connector_init(struct rcar_du_device *rcdu,
 		return ret;
 
 	drm_connector_helper_add(connector, &connector_helper_funcs);
-	ret = drm_connector_register(connector);
-	if (ret < 0)
-		return ret;
 
 	connector->dpms = DRM_MODE_DPMS_OFF;
 	drm_object_property_set_value(&connector->base,
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.c
index 85043c5bad03..ef3a50321ecc 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.c
@@ -38,35 +38,106 @@ static void rcar_lvds_write(struct rcar_du_lvdsenc *lvds, u32 reg, u32 data)
 	iowrite32(data, lvds->mmio + reg);
 }
 
-static int rcar_du_lvdsenc_start(struct rcar_du_lvdsenc *lvds,
-				 struct rcar_du_crtc *rcrtc)
+static void rcar_du_lvdsenc_start_gen2(struct rcar_du_lvdsenc *lvds,
+				       struct rcar_du_crtc *rcrtc)
 {
 	const struct drm_display_mode *mode = &rcrtc->crtc.mode;
 	unsigned int freq = mode->clock;
 	u32 lvdcr0;
-	u32 lvdhcr;
 	u32 pllcr;
-	int ret;
-
-	if (lvds->enabled)
-		return 0;
-
-	ret = clk_prepare_enable(lvds->clock);
-	if (ret < 0)
-		return ret;
 
 	/* PLL clock configuration */
-	if (freq <= 38000)
+	if (freq < 39000)
 		pllcr = LVDPLLCR_CEEN | LVDPLLCR_COSEL | LVDPLLCR_PLLDLYCNT_38M;
-	else if (freq <= 60000)
+	else if (freq < 61000)
 		pllcr = LVDPLLCR_CEEN | LVDPLLCR_COSEL | LVDPLLCR_PLLDLYCNT_60M;
-	else if (freq <= 121000)
+	else if (freq < 121000)
 		pllcr = LVDPLLCR_CEEN | LVDPLLCR_COSEL | LVDPLLCR_PLLDLYCNT_121M;
 	else
 		pllcr = LVDPLLCR_PLLDLYCNT_150M;
 
 	rcar_lvds_write(lvds, LVDPLLCR, pllcr);
 
+	/* Select the input, hardcode mode 0, enable LVDS operation and turn
+	 * bias circuitry on.
+	 */
+	lvdcr0 = LVDCR0_BEN | LVDCR0_LVEN;
+	if (rcrtc->index == 2)
+		lvdcr0 |= LVDCR0_DUSEL;
+	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
+
+	/* Turn all the channels on. */
+	rcar_lvds_write(lvds, LVDCR1,
+			LVDCR1_CHSTBY_GEN2(3) | LVDCR1_CHSTBY_GEN2(2) |
+			LVDCR1_CHSTBY_GEN2(1) | LVDCR1_CHSTBY_GEN2(0) |
+			LVDCR1_CLKSTBY_GEN2);
+
+	/* Turn the PLL on, wait for the startup delay, and turn the output
+	 * on.
+	 */
+	lvdcr0 |= LVDCR0_PLLON;
+	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
+
+	usleep_range(100, 150);
+
+	lvdcr0 |= LVDCR0_LVRES;
+	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
+}
+
+static void rcar_du_lvdsenc_start_gen3(struct rcar_du_lvdsenc *lvds,
+				       struct rcar_du_crtc *rcrtc)
+{
+	const struct drm_display_mode *mode = &rcrtc->crtc.mode;
+	unsigned int freq = mode->clock;
+	u32 lvdcr0;
+	u32 pllcr;
+
+	/* PLL clock configuration */
+	if (freq < 42000)
+		pllcr = LVDPLLCR_PLLDIVCNT_42M;
+	else if (freq < 85000)
+		pllcr = LVDPLLCR_PLLDIVCNT_85M;
+	else if (freq < 128000)
+		pllcr = LVDPLLCR_PLLDIVCNT_128M;
+	else
+		pllcr = LVDPLLCR_PLLDIVCNT_148M;
+
+	rcar_lvds_write(lvds, LVDPLLCR, pllcr);
+
+	/* Turn the PLL on, set it to LVDS normal mode, wait for the startup
+	 * delay and turn the output on.
+	 */
+	lvdcr0 = LVDCR0_PLLON;
+	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
+
+	lvdcr0 |= LVDCR0_PWD;
+	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
+
+	usleep_range(100, 150);
+
+	lvdcr0 |= LVDCR0_LVRES;
+	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
+
+	/* Turn all the channels on. */
+	rcar_lvds_write(lvds, LVDCR1,
+			LVDCR1_CHSTBY_GEN3(3) | LVDCR1_CHSTBY_GEN3(2) |
+			LVDCR1_CHSTBY_GEN3(1) | LVDCR1_CHSTBY_GEN3(0) |
+			LVDCR1_CLKSTBY_GEN3);
+}
+
+static int rcar_du_lvdsenc_start(struct rcar_du_lvdsenc *lvds,
+				 struct rcar_du_crtc *rcrtc)
+{
+	u32 lvdhcr;
+	int ret;
+
+	if (lvds->enabled)
+		return 0;
+
+	ret = clk_prepare_enable(lvds->clock);
+	if (ret < 0)
+		return ret;
+
 	/* Hardcode the channels and control signals routing for now.
 	 *
 	 * HSYNC -> CTRL0
@@ -87,30 +158,14 @@ static int rcar_du_lvdsenc_start(struct rcar_du_lvdsenc *lvds,
 
 	rcar_lvds_write(lvds, LVDCHCR, lvdhcr);
 
-	/* Select the input, hardcode mode 0, enable LVDS operation and turn
-	 * bias circuitry on.
-	 */
-	lvdcr0 = LVDCR0_BEN | LVDCR0_LVEN;
-	if (rcrtc->index == 2)
-		lvdcr0 |= LVDCR0_DUSEL;
-	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
-
-	/* Turn all the channels on. */
-	rcar_lvds_write(lvds, LVDCR1, LVDCR1_CHSTBY(3) | LVDCR1_CHSTBY(2) |
-			LVDCR1_CHSTBY(1) | LVDCR1_CHSTBY(0) | LVDCR1_CLKSTBY);
-
-	/* Turn the PLL on, wait for the startup delay, and turn the output
-	 * on.
-	 */
-	lvdcr0 |= LVDCR0_PLLEN;
-	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
-
-	usleep_range(100, 150);
-
-	lvdcr0 |= LVDCR0_LVRES;
-	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
+	/* Perform generation-specific initialization. */
+	if (lvds->dev->info->gen < 3)
+		rcar_du_lvdsenc_start_gen2(lvds, rcrtc);
+	else
+		rcar_du_lvdsenc_start_gen3(lvds, rcrtc);
 
 	lvds->enabled = true;
+
 	return 0;
 }
 
@@ -140,6 +195,21 @@ int rcar_du_lvdsenc_enable(struct rcar_du_lvdsenc *lvds, struct drm_crtc *crtc,
 		return -EINVAL;
 }
 
+void rcar_du_lvdsenc_atomic_check(struct rcar_du_lvdsenc *lvds,
+				  struct drm_display_mode *mode)
+{
+	struct rcar_du_device *rcdu = lvds->dev;
+
+	/* The internal LVDS encoder has a restricted clock frequency operating
+	 * range (30MHz to 150MHz on Gen2, 25.175MHz to 148.5MHz on Gen3). Clamp
+	 * the clock accordingly.
+	 */
+	if (rcdu->info->gen < 3)
+		mode->clock = clamp(mode->clock, 30000, 150000);
+	else
+		mode->clock = clamp(mode->clock, 25175, 148500);
+}
+
 static int rcar_du_lvdsenc_get_resources(struct rcar_du_lvdsenc *lvds,
 					 struct platform_device *pdev)
 {
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h b/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h
index 9a6001c07303..dfdba746edf4 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h
@@ -30,6 +30,8 @@ enum rcar_lvds_input {
 int rcar_du_lvdsenc_init(struct rcar_du_device *rcdu);
 int rcar_du_lvdsenc_enable(struct rcar_du_lvdsenc *lvds,
 			   struct drm_crtc *crtc, bool enable);
+void rcar_du_lvdsenc_atomic_check(struct rcar_du_lvdsenc *lvds,
+				  struct drm_display_mode *mode);
 #else
 static inline int rcar_du_lvdsenc_init(struct rcar_du_device *rcdu)
 {
@@ -40,6 +42,10 @@ static inline int rcar_du_lvdsenc_enable(struct rcar_du_lvdsenc *lvds,
 {
 	return 0;
 }
+static inline void rcar_du_lvdsenc_atomic_check(struct rcar_du_lvdsenc *lvds,
+						struct drm_display_mode *mode)
+{
+}
 #endif
 
 #endif /* __RCAR_DU_LVDSENC_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
index c3ed9522c0e1..8460ae1ffa4b 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
@@ -1,7 +1,7 @@
 /*
  * rcar_du_plane.c  --  R-Car Display Unit Planes
  *
- * Copyright (C) 2013-2014 Renesas Electronics Corporation
+ * Copyright (C) 2013-2015 Renesas Electronics Corporation
  *
  * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
  *
@@ -12,6 +12,7 @@
  */
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
@@ -20,21 +21,300 @@
 #include <drm/drm_plane_helper.h>
 
 #include "rcar_du_drv.h"
+#include "rcar_du_group.h"
 #include "rcar_du_kms.h"
 #include "rcar_du_plane.h"
 #include "rcar_du_regs.h"
 
-#define RCAR_DU_COLORKEY_NONE		(0 << 24)
-#define RCAR_DU_COLORKEY_SOURCE		(1 << 24)
-#define RCAR_DU_COLORKEY_MASK		(1 << 24)
+/* -----------------------------------------------------------------------------
+ * Atomic hardware plane allocator
+ *
+ * The hardware plane allocator is solely based on the atomic plane states
+ * without keeping any external state to avoid races between .atomic_check()
+ * and .atomic_commit().
+ *
+ * The core idea is to avoid using a free planes bitmask that would need to be
+ * shared between check and commit handlers with a collective knowledge based on
+ * the allocated hardware plane(s) for each KMS plane. The allocator then loops
+ * over all plane states to compute the free planes bitmask, allocates hardware
+ * planes based on that bitmask, and stores the result back in the plane states.
+ *
+ * For this to work we need to access the current state of planes not touched by
+ * the atomic update. To ensure that it won't be modified, we need to lock all
+ * planes using drm_atomic_get_plane_state(). This effectively serializes atomic
+ * updates from .atomic_check() up to completion (when swapping the states if
+ * the check step has succeeded) or rollback (when freeing the states if the
+ * check step has failed).
+ *
+ * Allocation is performed in the .atomic_check() handler and applied
+ * automatically when the core swaps the old and new states.
+ */
+
+static bool rcar_du_plane_needs_realloc(struct rcar_du_plane *plane,
+					struct rcar_du_plane_state *new_state)
+{
+	struct rcar_du_plane_state *cur_state;
+
+	cur_state = to_rcar_plane_state(plane->plane.state);
+
+	/* Lowering the number of planes doesn't strictly require reallocation
+	 * as the extra hardware plane will be freed when committing, but doing
+	 * so could lead to more fragmentation.
+	 */
+	if (!cur_state->format ||
+	    cur_state->format->planes != new_state->format->planes)
+		return true;
+
+	/* Reallocate hardware planes if the source has changed. */
+	if (cur_state->source != new_state->source)
+		return true;
 
-static u32 rcar_du_plane_read(struct rcar_du_group *rgrp,
-			      unsigned int index, u32 reg)
+	return false;
+}
+
+static unsigned int rcar_du_plane_hwmask(struct rcar_du_plane_state *state)
+{
+	unsigned int mask;
+
+	if (state->hwindex == -1)
+		return 0;
+
+	mask = 1 << state->hwindex;
+	if (state->format->planes == 2)
+		mask |= 1 << ((state->hwindex + 1) % 8);
+
+	return mask;
+}
+
+/*
+ * The R8A7790 DU can source frames directly from the VSP1 devices VSPD0 and
+ * VSPD1. VSPD0 feeds DU0/1 plane 0, and VSPD1 feeds either DU2 plane 0 or
+ * DU0/1 plane 1.
+ *
+ * Allocate the correct fixed plane when sourcing frames from VSPD0 or VSPD1,
+ * and allocate planes in reverse index order otherwise to ensure maximum
+ * availability of planes 0 and 1.
+ *
+ * The caller is responsible for ensuring that the requested source is
+ * compatible with the DU revision.
+ */
+static int rcar_du_plane_hwalloc(struct rcar_du_plane *plane,
+				 struct rcar_du_plane_state *state,
+				 unsigned int free)
 {
-	return rcar_du_read(rgrp->dev,
-			    rgrp->mmio_offset + index * PLANE_OFF + reg);
+	unsigned int num_planes = state->format->planes;
+	int fixed = -1;
+	int i;
+
+	if (state->source == RCAR_DU_PLANE_VSPD0) {
+		/* VSPD0 feeds plane 0 on DU0/1. */
+		if (plane->group->index != 0)
+			return -EINVAL;
+
+		fixed = 0;
+	} else if (state->source == RCAR_DU_PLANE_VSPD1) {
+		/* VSPD1 feeds plane 1 on DU0/1 or plane 0 on DU2. */
+		fixed = plane->group->index == 0 ? 1 : 0;
+	}
+
+	if (fixed >= 0)
+		return free & (1 << fixed) ? fixed : -EBUSY;
+
+	for (i = RCAR_DU_NUM_HW_PLANES - 1; i >= 0; --i) {
+		if (!(free & (1 << i)))
+			continue;
+
+		if (num_planes == 1 || free & (1 << ((i + 1) % 8)))
+			break;
+	}
+
+	return i < 0 ? -EBUSY : i;
 }
 
+int rcar_du_atomic_check_planes(struct drm_device *dev,
+				struct drm_atomic_state *state)
+{
+	struct rcar_du_device *rcdu = dev->dev_private;
+	unsigned int group_freed_planes[RCAR_DU_MAX_GROUPS] = { 0, };
+	unsigned int group_free_planes[RCAR_DU_MAX_GROUPS] = { 0, };
+	bool needs_realloc = false;
+	unsigned int groups = 0;
+	unsigned int i;
+
+	/* Check if hardware planes need to be reallocated. */
+	for (i = 0; i < dev->mode_config.num_total_plane; ++i) {
+		struct rcar_du_plane_state *plane_state;
+		struct rcar_du_plane *plane;
+		unsigned int index;
+
+		if (!state->planes[i])
+			continue;
+
+		plane = to_rcar_plane(state->planes[i]);
+		plane_state = to_rcar_plane_state(state->plane_states[i]);
+
+		dev_dbg(rcdu->dev, "%s: checking plane (%u,%tu)\n", __func__,
+			plane->group->index, plane - plane->group->planes);
+
+		/* If the plane is being disabled we don't need to go through
+		 * the full reallocation procedure. Just mark the hardware
+		 * plane(s) as freed.
+		 */
+		if (!plane_state->format) {
+			dev_dbg(rcdu->dev, "%s: plane is being disabled\n",
+				__func__);
+			index = plane - plane->group->planes;
+			group_freed_planes[plane->group->index] |= 1 << index;
+			plane_state->hwindex = -1;
+			continue;
+		}
+
+		/* If the plane needs to be reallocated mark it as such, and
+		 * mark the hardware plane(s) as free.
+		 */
+		if (rcar_du_plane_needs_realloc(plane, plane_state)) {
+			dev_dbg(rcdu->dev, "%s: plane needs reallocation\n",
+				__func__);
+			groups |= 1 << plane->group->index;
+			needs_realloc = true;
+
+			index = plane - plane->group->planes;
+			group_freed_planes[plane->group->index] |= 1 << index;
+			plane_state->hwindex = -1;
+		}
+	}
+
+	if (!needs_realloc)
+		return 0;
+
+	/* Grab all plane states for the groups that need reallocation to ensure
+	 * locking and avoid racy updates. This serializes the update operation,
+	 * but there's not much we can do about it as that's the hardware
+	 * design.
+	 *
+	 * Compute the used planes mask for each group at the same time to avoid
+	 * looping over the planes separately later.
+	 */
+	while (groups) {
+		unsigned int index = ffs(groups) - 1;
+		struct rcar_du_group *group = &rcdu->groups[index];
+		unsigned int used_planes = 0;
+
+		dev_dbg(rcdu->dev, "%s: finding free planes for group %u\n",
+			__func__, index);
+
+		for (i = 0; i < group->num_planes; ++i) {
+			struct rcar_du_plane *plane = &group->planes[i];
+			struct rcar_du_plane_state *plane_state;
+			struct drm_plane_state *s;
+
+			s = drm_atomic_get_plane_state(state, &plane->plane);
+			if (IS_ERR(s))
+				return PTR_ERR(s);
+
+			/* If the plane has been freed in the above loop its
+			 * hardware planes must not be added to the used planes
+			 * bitmask. However, the current state doesn't reflect
+			 * the free state yet, as we've modified the new state
+			 * above. Use the local freed planes list to check for
+			 * that condition instead.
+			 */
+			if (group_freed_planes[index] & (1 << i)) {
+				dev_dbg(rcdu->dev,
+					"%s: plane (%u,%tu) has been freed, skipping\n",
+					__func__, plane->group->index,
+					plane - plane->group->planes);
+				continue;
+			}
+
+			plane_state = to_rcar_plane_state(plane->plane.state);
+			used_planes |= rcar_du_plane_hwmask(plane_state);
+
+			dev_dbg(rcdu->dev,
+				"%s: plane (%u,%tu) uses %u hwplanes (index %d)\n",
+				__func__, plane->group->index,
+				plane - plane->group->planes,
+				plane_state->format ?
+				plane_state->format->planes : 0,
+				plane_state->hwindex);
+		}
+
+		group_free_planes[index] = 0xff & ~used_planes;
+		groups &= ~(1 << index);
+
+		dev_dbg(rcdu->dev, "%s: group %u free planes mask 0x%02x\n",
+			__func__, index, group_free_planes[index]);
+	}
+
+	/* Reallocate hardware planes for each plane that needs it. */
+	for (i = 0; i < dev->mode_config.num_total_plane; ++i) {
+		struct rcar_du_plane_state *plane_state;
+		struct rcar_du_plane *plane;
+		unsigned int crtc_planes;
+		unsigned int free;
+		int idx;
+
+		if (!state->planes[i])
+			continue;
+
+		plane = to_rcar_plane(state->planes[i]);
+		plane_state = to_rcar_plane_state(state->plane_states[i]);
+
+		dev_dbg(rcdu->dev, "%s: allocating plane (%u,%tu)\n", __func__,
+			plane->group->index, plane - plane->group->planes);
+
+		/* Skip planes that are being disabled or don't need to be
+		 * reallocated.
+		 */
+		if (!plane_state->format ||
+		    !rcar_du_plane_needs_realloc(plane, plane_state))
+			continue;
+
+		/* Try to allocate the plane from the free planes currently
+		 * associated with the target CRTC to avoid restarting the CRTC
+		 * group and thus minimize flicker. If it fails fall back to
+		 * allocating from all free planes.
+		 */
+		crtc_planes = to_rcar_crtc(plane_state->state.crtc)->index % 2
+			    ? plane->group->dptsr_planes
+			    : ~plane->group->dptsr_planes;
+		free = group_free_planes[plane->group->index];
+
+		idx = rcar_du_plane_hwalloc(plane, plane_state,
+					    free & crtc_planes);
+		if (idx < 0)
+			idx = rcar_du_plane_hwalloc(plane, plane_state,
+						    free);
+		if (idx < 0) {
+			dev_dbg(rcdu->dev, "%s: no available hardware plane\n",
+				__func__);
+			return idx;
+		}
+
+		dev_dbg(rcdu->dev, "%s: allocated %u hwplanes (index %u)\n",
+			__func__, plane_state->format->planes, idx);
+
+		plane_state->hwindex = idx;
+
+		group_free_planes[plane->group->index] &=
+			~rcar_du_plane_hwmask(plane_state);
+
+		dev_dbg(rcdu->dev, "%s: group %u free planes mask 0x%02x\n",
+			__func__, plane->group->index,
+			group_free_planes[plane->group->index]);
+	}
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Plane Setup
+ */
+
+#define RCAR_DU_COLORKEY_NONE		(0 << 24)
+#define RCAR_DU_COLORKEY_SOURCE		(1 << 24)
+#define RCAR_DU_COLORKEY_MASK		(1 << 24)
+
 static void rcar_du_plane_write(struct rcar_du_group *rgrp,
 				unsigned int index, u32 reg, u32 data)
 {
@@ -42,34 +322,45 @@ static void rcar_du_plane_write(struct rcar_du_group *rgrp,
 		      data);
 }
 
-static void rcar_du_plane_setup_fb(struct rcar_du_plane *plane)
+static void rcar_du_plane_setup_scanout(struct rcar_du_group *rgrp,
+					const struct rcar_du_plane_state *state)
 {
-	struct rcar_du_plane_state *state =
-		to_rcar_plane_state(plane->plane.state);
-	struct drm_framebuffer *fb = plane->plane.state->fb;
-	struct rcar_du_group *rgrp = plane->group;
 	unsigned int src_x = state->state.src_x >> 16;
 	unsigned int src_y = state->state.src_y >> 16;
 	unsigned int index = state->hwindex;
-	struct drm_gem_cma_object *gem;
+	unsigned int pitch;
 	bool interlaced;
-	u32 mwr;
+	u32 dma[2];
 
 	interlaced = state->state.crtc->state->adjusted_mode.flags
 		   & DRM_MODE_FLAG_INTERLACE;
 
+	if (state->source == RCAR_DU_PLANE_MEMORY) {
+		struct drm_framebuffer *fb = state->state.fb;
+		struct drm_gem_cma_object *gem;
+		unsigned int i;
+
+		if (state->format->planes == 2)
+			pitch = fb->pitches[0];
+		else
+			pitch = fb->pitches[0] * 8 / state->format->bpp;
+
+		for (i = 0; i < state->format->planes; ++i) {
+			gem = drm_fb_cma_get_gem_obj(fb, i);
+			dma[i] = gem->paddr + fb->offsets[i];
+		}
+	} else {
+		pitch = state->state.src_w >> 16;
+		dma[0] = 0;
+		dma[1] = 0;
+	}
+
 	/* Memory pitch (expressed in pixels). Must be doubled for interlaced
 	 * operation with 32bpp formats.
 	 */
-	if (state->format->planes == 2)
-		mwr = fb->pitches[0];
-	else
-		mwr = fb->pitches[0] * 8 / state->format->bpp;
-
-	if (interlaced && state->format->bpp == 32)
-		mwr *= 2;
-
-	rcar_du_plane_write(rgrp, index, PnMWR, mwr);
+	rcar_du_plane_write(rgrp, index, PnMWR,
+			    (interlaced && state->format->bpp == 32) ?
+			    pitch * 2 : pitch);
 
 	/* The Y position is expressed in raster line units and must be doubled
 	 * for 32bpp formats, according to the R8A7790 datasheet. No mention of
@@ -87,30 +378,25 @@ static void rcar_du_plane_setup_fb(struct rcar_du_plane *plane)
 	rcar_du_plane_write(rgrp, index, PnSPYR, src_y *
 			    (!interlaced && state->format->bpp == 32 ? 2 : 1));
 
-	gem = drm_fb_cma_get_gem_obj(fb, 0);
-	rcar_du_plane_write(rgrp, index, PnDSA0R, gem->paddr + fb->offsets[0]);
+	rcar_du_plane_write(rgrp, index, PnDSA0R, dma[0]);
 
 	if (state->format->planes == 2) {
 		index = (index + 1) % 8;
 
-		rcar_du_plane_write(rgrp, index, PnMWR, fb->pitches[0]);
+		rcar_du_plane_write(rgrp, index, PnMWR, pitch);
 
 		rcar_du_plane_write(rgrp, index, PnSPXR, src_x);
 		rcar_du_plane_write(rgrp, index, PnSPYR, src_y *
 				    (state->format->bpp == 16 ? 2 : 1) / 2);
 
-		gem = drm_fb_cma_get_gem_obj(fb, 1);
-		rcar_du_plane_write(rgrp, index, PnDSA0R,
-				    gem->paddr + fb->offsets[1]);
+		rcar_du_plane_write(rgrp, index, PnDSA0R, dma[1]);
 	}
 }
 
-static void rcar_du_plane_setup_mode(struct rcar_du_plane *plane,
-				     unsigned int index)
+static void rcar_du_plane_setup_mode(struct rcar_du_group *rgrp,
+				     unsigned int index,
+				     const struct rcar_du_plane_state *state)
 {
-	struct rcar_du_plane_state *state =
-		to_rcar_plane_state(plane->plane.state);
-	struct rcar_du_group *rgrp = plane->group;
 	u32 colorkey;
 	u32 pnmr;
 
@@ -168,12 +454,10 @@ static void rcar_du_plane_setup_mode(struct rcar_du_plane *plane,
 	}
 }
 
-static void __rcar_du_plane_setup(struct rcar_du_plane *plane,
-				  unsigned int index)
+static void rcar_du_plane_setup_format_gen2(struct rcar_du_group *rgrp,
+					    unsigned int index,
+					    const struct rcar_du_plane_state *state)
 {
-	struct rcar_du_plane_state *state =
-		to_rcar_plane_state(plane->plane.state);
-	struct rcar_du_group *rgrp = plane->group;
 	u32 ddcr2 = PnDDCR2_CODE;
 	u32 ddcr4;
 
@@ -182,11 +466,8 @@ static void __rcar_du_plane_setup(struct rcar_du_plane *plane,
 	 * The data format is selected by the DDDF field in PnMR and the EDF
 	 * field in DDCR4.
 	 */
-	ddcr4 = rcar_du_plane_read(rgrp, index, PnDDCR4);
-	ddcr4 &= ~PnDDCR4_EDF_MASK;
-	ddcr4 |= state->format->edf | PnDDCR4_CODE;
 
-	rcar_du_plane_setup_mode(plane, index);
+	rcar_du_plane_setup_mode(rgrp, index, state);
 
 	if (state->format->planes == 2) {
 		if (state->hwindex != index) {
@@ -204,31 +485,72 @@ static void __rcar_du_plane_setup(struct rcar_du_plane *plane,
 	}
 
 	rcar_du_plane_write(rgrp, index, PnDDCR2, ddcr2);
+
+	ddcr4 = state->format->edf | PnDDCR4_CODE;
+	if (state->source != RCAR_DU_PLANE_MEMORY)
+		ddcr4 |= PnDDCR4_VSPS;
+
 	rcar_du_plane_write(rgrp, index, PnDDCR4, ddcr4);
+}
+
+static void rcar_du_plane_setup_format_gen3(struct rcar_du_group *rgrp,
+					    unsigned int index,
+					    const struct rcar_du_plane_state *state)
+{
+	rcar_du_plane_write(rgrp, index, PnMR,
+			    PnMR_SPIM_TP_OFF | state->format->pnmr);
+
+	rcar_du_plane_write(rgrp, index, PnDDCR4,
+			    state->format->edf | PnDDCR4_CODE);
+}
+
+static void rcar_du_plane_setup_format(struct rcar_du_group *rgrp,
+				       unsigned int index,
+				       const struct rcar_du_plane_state *state)
+{
+	struct rcar_du_device *rcdu = rgrp->dev;
+
+	if (rcdu->info->gen < 3)
+		rcar_du_plane_setup_format_gen2(rgrp, index, state);
+	else
+		rcar_du_plane_setup_format_gen3(rgrp, index, state);
 
 	/* Destination position and size */
-	rcar_du_plane_write(rgrp, index, PnDSXR, plane->plane.state->crtc_w);
-	rcar_du_plane_write(rgrp, index, PnDSYR, plane->plane.state->crtc_h);
-	rcar_du_plane_write(rgrp, index, PnDPXR, plane->plane.state->crtc_x);
-	rcar_du_plane_write(rgrp, index, PnDPYR, plane->plane.state->crtc_y);
-
-	/* Wrap-around and blinking, disabled */
-	rcar_du_plane_write(rgrp, index, PnWASPR, 0);
-	rcar_du_plane_write(rgrp, index, PnWAMWR, 4095);
-	rcar_du_plane_write(rgrp, index, PnBTR, 0);
-	rcar_du_plane_write(rgrp, index, PnMLR, 0);
+	rcar_du_plane_write(rgrp, index, PnDSXR, state->state.crtc_w);
+	rcar_du_plane_write(rgrp, index, PnDSYR, state->state.crtc_h);
+	rcar_du_plane_write(rgrp, index, PnDPXR, state->state.crtc_x);
+	rcar_du_plane_write(rgrp, index, PnDPYR, state->state.crtc_y);
+
+	if (rcdu->info->gen < 3) {
+		/* Wrap-around and blinking, disabled */
+		rcar_du_plane_write(rgrp, index, PnWASPR, 0);
+		rcar_du_plane_write(rgrp, index, PnWAMWR, 4095);
+		rcar_du_plane_write(rgrp, index, PnBTR, 0);
+		rcar_du_plane_write(rgrp, index, PnMLR, 0);
+	}
 }
 
-void rcar_du_plane_setup(struct rcar_du_plane *plane)
+void __rcar_du_plane_setup(struct rcar_du_group *rgrp,
+			   const struct rcar_du_plane_state *state)
 {
-	struct rcar_du_plane_state *state =
-		to_rcar_plane_state(plane->plane.state);
+	struct rcar_du_device *rcdu = rgrp->dev;
 
-	__rcar_du_plane_setup(plane, state->hwindex);
+	rcar_du_plane_setup_format(rgrp, state->hwindex, state);
 	if (state->format->planes == 2)
-		__rcar_du_plane_setup(plane, (state->hwindex + 1) % 8);
+		rcar_du_plane_setup_format(rgrp, (state->hwindex + 1) % 8,
+					   state);
 
-	rcar_du_plane_setup_fb(plane);
+	if (rcdu->info->gen < 3)
+		rcar_du_plane_setup_scanout(rgrp, state);
+
+	if (state->source == RCAR_DU_PLANE_VSPD1) {
+		unsigned int vspd1_sink = rgrp->index ? 2 : 0;
+
+		if (rcdu->vspd1_sink != vspd1_sink) {
+			rcdu->vspd1_sink = vspd1_sink;
+			rcar_du_set_dpad0_vsp1_routing(rcdu);
+		}
+	}
 }
 
 static int rcar_du_plane_atomic_check(struct drm_plane *plane,
@@ -263,9 +585,27 @@ static void rcar_du_plane_atomic_update(struct drm_plane *plane,
 					struct drm_plane_state *old_state)
 {
 	struct rcar_du_plane *rplane = to_rcar_plane(plane);
+	struct rcar_du_plane_state *old_rstate;
+	struct rcar_du_plane_state *new_rstate;
+
+	if (!plane->state->crtc)
+		return;
+
+	rcar_du_plane_setup(rplane);
+
+	/* Check whether the source has changed from memory to live source or
+	 * from live source to memory. The source has been configured by the
+	 * VSPS bit in the PnDDCR4 register. Although the datasheet states that
+	 * the bit is updated during vertical blanking, it seems that updates
+	 * only occur when the DU group is held in reset through the DSYSR.DRES
+	 * bit. We thus need to restart the group if the source changes.
+	 */
+	old_rstate = to_rcar_plane_state(old_state);
+	new_rstate = to_rcar_plane_state(plane->state);
 
-	if (plane->state->crtc)
-		rcar_du_plane_setup(rplane);
+	if ((old_rstate->source == RCAR_DU_PLANE_MEMORY) !=
+	    (new_rstate->source == RCAR_DU_PLANE_MEMORY))
+		rplane->group->need_restart = true;
 }
 
 static const struct drm_plane_helper_funcs rcar_du_plane_helper_funcs = {
@@ -313,6 +653,7 @@ static void rcar_du_plane_reset(struct drm_plane *plane)
 		return;
 
 	state->hwindex = -1;
+	state->source = RCAR_DU_PLANE_MEMORY;
 	state->alpha = 255;
 	state->colorkey = RCAR_DU_COLORKEY_NONE;
 	state->zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.h b/drivers/gpu/drm/rcar-du/rcar_du_plane.h
index 9732bff1911b..b18b7b25dbfa 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.h
@@ -28,6 +28,12 @@ struct rcar_du_group;
 #define RCAR_DU_NUM_KMS_PLANES		9
 #define RCAR_DU_NUM_HW_PLANES		8
 
+enum rcar_du_plane_source {
+	RCAR_DU_PLANE_MEMORY,
+	RCAR_DU_PLANE_VSPD0,
+	RCAR_DU_PLANE_VSPD1,
+};
+
 struct rcar_du_plane {
 	struct drm_plane plane;
 	struct rcar_du_group *group;
@@ -52,6 +58,7 @@ struct rcar_du_plane_state {
 
 	const struct rcar_du_format_info *format;
 	int hwindex;
+	enum rcar_du_plane_source source;
 
 	unsigned int alpha;
 	unsigned int colorkey;
@@ -64,8 +71,20 @@ to_rcar_plane_state(struct drm_plane_state *state)
 	return container_of(state, struct rcar_du_plane_state, state);
 }
 
+int rcar_du_atomic_check_planes(struct drm_device *dev,
+				struct drm_atomic_state *state);
+
 int rcar_du_planes_init(struct rcar_du_group *rgrp);
 
-void rcar_du_plane_setup(struct rcar_du_plane *plane);
+void __rcar_du_plane_setup(struct rcar_du_group *rgrp,
+			   const struct rcar_du_plane_state *state);
+
+static inline void rcar_du_plane_setup(struct rcar_du_plane *plane)
+{
+	struct rcar_du_plane_state *state =
+		to_rcar_plane_state(plane->plane.state);
+
+	return __rcar_du_plane_setup(plane->group, state);
+}
 
 #endif /* __RCAR_DU_PLANE_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_regs.h b/drivers/gpu/drm/rcar-du/rcar_du_regs.h
index 70fcbc471ebd..d2f66068e52c 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_regs.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_regs.h
@@ -1,7 +1,7 @@
 /*
  * rcar_du_regs.h  --  R-Car Display Unit Registers Definitions
  *
- * Copyright (C) 2013 Renesas Electronics Corporation
+ * Copyright (C) 2013-2015 Renesas Electronics Corporation
  *
  * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
  *
@@ -16,6 +16,7 @@
 #define DU0_REG_OFFSET		0x00000
 #define DU1_REG_OFFSET		0x30000
 #define DU2_REG_OFFSET		0x40000
+#define DU3_REG_OFFSET		0x70000
 
 /* -----------------------------------------------------------------------------
  * Display Control Registers
@@ -186,7 +187,7 @@
 
 #define DEFR6			0x000e8
 #define DEFR6_CODE		(0x7778 << 16)
-#define DEFR6_ODPM22_D2SMR	(0 << 10)
+#define DEFR6_ODPM22_DSMR	(0 << 10)
 #define DEFR6_ODPM22_DISP	(2 << 10)
 #define DEFR6_ODPM22_CDE	(3 << 10)
 #define DEFR6_ODPM22_MASK	(3 << 10)
@@ -260,6 +261,21 @@
 #define DIDSR_PDCS_CLK(n, clk)	(clk << ((n) * 2))
 #define DIDSR_PDCS_MASK(n)	(3 << ((n) * 2))
 
+#define DEFR10			0x20038
+#define DEFR10_CODE		(0x7795 << 16)
+#define DEFR10_VSPF1_RGB	(0 << 14)
+#define DEFR10_VSPF1_YC		(1 << 14)
+#define DEFR10_DOCF1_RGB	(0 << 12)
+#define DEFR10_DOCF1_YC		(1 << 12)
+#define DEFR10_YCDF0_YCBCR444	(0 << 11)
+#define DEFR10_YCDF0_YCBCR422	(1 << 11)
+#define DEFR10_VSPF0_RGB	(0 << 10)
+#define DEFR10_VSPF0_YC		(1 << 10)
+#define DEFR10_DOCF0_RGB	(0 << 8)
+#define DEFR10_DOCF0_YC		(1 << 8)
+#define DEFR10_TSEL_H3_TCON1	(0 << 1) /* DEFR102 register only (DU2/DU3) */
+#define DEFR10_DEFE10		(1 << 0)
+
 /* -----------------------------------------------------------------------------
  * Display Timing Generation Registers
  */
@@ -389,6 +405,7 @@
 
 #define PnDDCR4			0x00190
 #define PnDDCR4_CODE		(0x7766 << 16)
+#define PnDDCR4_VSPS		(1 << 13)
 #define PnDDCR4_SDFS_RGB	(0 << 4)
 #define PnDDCR4_SDFS_YC		(5 << 4)
 #define PnDDCR4_SDFS_MASK	(7 << 4)
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c
index e0a5d8f93963..9d7e5c99caf6 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c
@@ -31,12 +31,6 @@ static const struct drm_connector_helper_funcs connector_helper_funcs = {
 	.best_encoder = rcar_du_connector_best_encoder,
 };
 
-static void rcar_du_vga_connector_destroy(struct drm_connector *connector)
-{
-	drm_connector_unregister(connector);
-	drm_connector_cleanup(connector);
-}
-
 static enum drm_connector_status
 rcar_du_vga_connector_detect(struct drm_connector *connector, bool force)
 {
@@ -48,7 +42,7 @@ static const struct drm_connector_funcs connector_funcs = {
 	.reset = drm_atomic_helper_connector_reset,
 	.detect = rcar_du_vga_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
-	.destroy = rcar_du_vga_connector_destroy,
+	.destroy = drm_connector_cleanup,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
@@ -76,9 +70,6 @@ int rcar_du_vga_connector_init(struct rcar_du_device *rcdu,
 		return ret;
 
 	drm_connector_helper_add(connector, &connector_helper_funcs);
-	ret = drm_connector_register(connector);
-	if (ret < 0)
-		return ret;
 
 	connector->dpms = DRM_MODE_DPMS_OFF;
 	drm_object_property_set_value(&connector->base,
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
new file mode 100644
index 000000000000..de7ef041182b
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -0,0 +1,384 @@
+/*
+ * rcar_du_vsp.h  --  R-Car Display Unit VSP-Based Compositor
+ *
+ * Copyright (C) 2015 Renesas Electronics Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_plane_helper.h>
+
+#include <linux/of_platform.h>
+#include <linux/videodev2.h>
+
+#include <media/vsp1.h>
+
+#include "rcar_du_drv.h"
+#include "rcar_du_kms.h"
+#include "rcar_du_vsp.h"
+
+void rcar_du_vsp_enable(struct rcar_du_crtc *crtc)
+{
+	const struct drm_display_mode *mode = &crtc->crtc.state->adjusted_mode;
+	struct rcar_du_device *rcdu = crtc->group->dev;
+	struct rcar_du_plane_state state = {
+		.state = {
+			.crtc = &crtc->crtc,
+			.crtc_x = 0,
+			.crtc_y = 0,
+			.crtc_w = mode->hdisplay,
+			.crtc_h = mode->vdisplay,
+			.src_x = 0,
+			.src_y = 0,
+			.src_w = mode->hdisplay << 16,
+			.src_h = mode->vdisplay << 16,
+		},
+		.format = rcar_du_format_info(DRM_FORMAT_ARGB8888),
+		.source = RCAR_DU_PLANE_VSPD1,
+		.alpha = 255,
+		.colorkey = 0,
+		.zpos = 0,
+	};
+
+	if (rcdu->info->gen >= 3)
+		state.hwindex = (crtc->index % 2) ? 2 : 0;
+	else
+		state.hwindex = crtc->index % 2;
+
+	__rcar_du_plane_setup(crtc->group, &state);
+
+	/* Ensure that the plane source configuration takes effect by requesting
+	 * a restart of the group. See rcar_du_plane_atomic_update() for a more
+	 * detailed explanation.
+	 *
+	 * TODO: Check whether this is still needed on Gen3.
+	 */
+	crtc->group->need_restart = true;
+
+	vsp1_du_setup_lif(crtc->vsp->vsp, mode->hdisplay, mode->vdisplay);
+}
+
+void rcar_du_vsp_disable(struct rcar_du_crtc *crtc)
+{
+	vsp1_du_setup_lif(crtc->vsp->vsp, 0, 0);
+}
+
+void rcar_du_vsp_atomic_begin(struct rcar_du_crtc *crtc)
+{
+	vsp1_du_atomic_begin(crtc->vsp->vsp);
+}
+
+void rcar_du_vsp_atomic_flush(struct rcar_du_crtc *crtc)
+{
+	vsp1_du_atomic_flush(crtc->vsp->vsp);
+}
+
+/* Keep the two tables in sync. */
+static const u32 formats_kms[] = {
+	DRM_FORMAT_RGB332,
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_XRGB4444,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_BGR888,
+	DRM_FORMAT_RGB888,
+	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_BGRX8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_VYUY,
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_YVYU,
+	DRM_FORMAT_NV12,
+	DRM_FORMAT_NV21,
+	DRM_FORMAT_NV16,
+	DRM_FORMAT_NV61,
+	DRM_FORMAT_YUV420,
+	DRM_FORMAT_YVU420,
+	DRM_FORMAT_YUV422,
+	DRM_FORMAT_YVU422,
+	DRM_FORMAT_YUV444,
+	DRM_FORMAT_YVU444,
+};
+
+static const u32 formats_v4l2[] = {
+	V4L2_PIX_FMT_RGB332,
+	V4L2_PIX_FMT_ARGB444,
+	V4L2_PIX_FMT_XRGB444,
+	V4L2_PIX_FMT_ARGB555,
+	V4L2_PIX_FMT_XRGB555,
+	V4L2_PIX_FMT_RGB565,
+	V4L2_PIX_FMT_RGB24,
+	V4L2_PIX_FMT_BGR24,
+	V4L2_PIX_FMT_ARGB32,
+	V4L2_PIX_FMT_XRGB32,
+	V4L2_PIX_FMT_ABGR32,
+	V4L2_PIX_FMT_XBGR32,
+	V4L2_PIX_FMT_UYVY,
+	V4L2_PIX_FMT_VYUY,
+	V4L2_PIX_FMT_YUYV,
+	V4L2_PIX_FMT_YVYU,
+	V4L2_PIX_FMT_NV12M,
+	V4L2_PIX_FMT_NV21M,
+	V4L2_PIX_FMT_NV16M,
+	V4L2_PIX_FMT_NV61M,
+	V4L2_PIX_FMT_YUV420M,
+	V4L2_PIX_FMT_YVU420M,
+	V4L2_PIX_FMT_YUV422M,
+	V4L2_PIX_FMT_YVU422M,
+	V4L2_PIX_FMT_YUV444M,
+	V4L2_PIX_FMT_YVU444M,
+};
+
+static void rcar_du_vsp_plane_setup(struct rcar_du_vsp_plane *plane)
+{
+	struct rcar_du_vsp_plane_state *state =
+		to_rcar_vsp_plane_state(plane->plane.state);
+	struct drm_framebuffer *fb = plane->plane.state->fb;
+	struct v4l2_rect src;
+	struct v4l2_rect dst;
+	dma_addr_t paddr[2] = { 0, };
+	u32 pixelformat = 0;
+	unsigned int i;
+
+	src.left = state->state.src_x >> 16;
+	src.top = state->state.src_y >> 16;
+	src.width = state->state.src_w >> 16;
+	src.height = state->state.src_h >> 16;
+
+	dst.left = state->state.crtc_x;
+	dst.top = state->state.crtc_y;
+	dst.width = state->state.crtc_w;
+	dst.height = state->state.crtc_h;
+
+	for (i = 0; i < state->format->planes; ++i) {
+		struct drm_gem_cma_object *gem;
+
+		gem = drm_fb_cma_get_gem_obj(fb, i);
+		paddr[i] = gem->paddr + fb->offsets[i];
+	}
+
+	for (i = 0; i < ARRAY_SIZE(formats_kms); ++i) {
+		if (formats_kms[i] == state->format->fourcc) {
+			pixelformat = formats_v4l2[i];
+			break;
+		}
+	}
+
+	WARN_ON(!pixelformat);
+
+	vsp1_du_atomic_update(plane->vsp->vsp, plane->index, pixelformat,
+			      fb->pitches[0], paddr, &src, &dst);
+}
+
+static int rcar_du_vsp_plane_atomic_check(struct drm_plane *plane,
+					  struct drm_plane_state *state)
+{
+	struct rcar_du_vsp_plane_state *rstate = to_rcar_vsp_plane_state(state);
+	struct rcar_du_vsp_plane *rplane = to_rcar_vsp_plane(plane);
+	struct rcar_du_device *rcdu = rplane->vsp->dev;
+
+	if (!state->fb || !state->crtc) {
+		rstate->format = NULL;
+		return 0;
+	}
+
+	if (state->src_w >> 16 != state->crtc_w ||
+	    state->src_h >> 16 != state->crtc_h) {
+		dev_dbg(rcdu->dev, "%s: scaling not supported\n", __func__);
+		return -EINVAL;
+	}
+
+	rstate->format = rcar_du_format_info(state->fb->pixel_format);
+	if (rstate->format == NULL) {
+		dev_dbg(rcdu->dev, "%s: unsupported format %08x\n", __func__,
+			state->fb->pixel_format);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void rcar_du_vsp_plane_atomic_update(struct drm_plane *plane,
+					struct drm_plane_state *old_state)
+{
+	struct rcar_du_vsp_plane *rplane = to_rcar_vsp_plane(plane);
+
+	if (plane->state->crtc)
+		rcar_du_vsp_plane_setup(rplane);
+	else
+		vsp1_du_atomic_update(rplane->vsp->vsp, rplane->index, 0, 0, 0,
+				      NULL, NULL);
+}
+
+static const struct drm_plane_helper_funcs rcar_du_vsp_plane_helper_funcs = {
+	.atomic_check = rcar_du_vsp_plane_atomic_check,
+	.atomic_update = rcar_du_vsp_plane_atomic_update,
+};
+
+static struct drm_plane_state *
+rcar_du_vsp_plane_atomic_duplicate_state(struct drm_plane *plane)
+{
+	struct rcar_du_vsp_plane_state *state;
+	struct rcar_du_vsp_plane_state *copy;
+
+	if (WARN_ON(!plane->state))
+		return NULL;
+
+	state = to_rcar_vsp_plane_state(plane->state);
+	copy = kmemdup(state, sizeof(*state), GFP_KERNEL);
+	if (copy == NULL)
+		return NULL;
+
+	__drm_atomic_helper_plane_duplicate_state(plane, &copy->state);
+
+	return &copy->state;
+}
+
+static void rcar_du_vsp_plane_atomic_destroy_state(struct drm_plane *plane,
+						   struct drm_plane_state *state)
+{
+	__drm_atomic_helper_plane_destroy_state(plane, state);
+	kfree(to_rcar_vsp_plane_state(state));
+}
+
+static void rcar_du_vsp_plane_reset(struct drm_plane *plane)
+{
+	struct rcar_du_vsp_plane_state *state;
+
+	if (plane->state) {
+		rcar_du_vsp_plane_atomic_destroy_state(plane, plane->state);
+		plane->state = NULL;
+	}
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (state == NULL)
+		return;
+
+	state->alpha = 255;
+
+	plane->state = &state->state;
+	plane->state->plane = plane;
+}
+
+static int rcar_du_vsp_plane_atomic_set_property(struct drm_plane *plane,
+	struct drm_plane_state *state, struct drm_property *property,
+	uint64_t val)
+{
+	struct rcar_du_vsp_plane_state *rstate = to_rcar_vsp_plane_state(state);
+	struct rcar_du_device *rcdu = to_rcar_vsp_plane(plane)->vsp->dev;
+
+	if (property == rcdu->props.alpha)
+		rstate->alpha = val;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int rcar_du_vsp_plane_atomic_get_property(struct drm_plane *plane,
+	const struct drm_plane_state *state, struct drm_property *property,
+	uint64_t *val)
+{
+	const struct rcar_du_vsp_plane_state *rstate =
+		container_of(state, const struct rcar_du_vsp_plane_state, state);
+	struct rcar_du_device *rcdu = to_rcar_vsp_plane(plane)->vsp->dev;
+
+	if (property == rcdu->props.alpha)
+		*val = rstate->alpha;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static const struct drm_plane_funcs rcar_du_vsp_plane_funcs = {
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
+	.reset = rcar_du_vsp_plane_reset,
+	.set_property = drm_atomic_helper_plane_set_property,
+	.destroy = drm_plane_cleanup,
+	.atomic_duplicate_state = rcar_du_vsp_plane_atomic_duplicate_state,
+	.atomic_destroy_state = rcar_du_vsp_plane_atomic_destroy_state,
+	.atomic_set_property = rcar_du_vsp_plane_atomic_set_property,
+	.atomic_get_property = rcar_du_vsp_plane_atomic_get_property,
+};
+
+int rcar_du_vsp_init(struct rcar_du_vsp *vsp)
+{
+	struct rcar_du_device *rcdu = vsp->dev;
+	struct platform_device *pdev;
+	struct device_node *np;
+	unsigned int i;
+	int ret;
+
+	/* Find the VSP device and initialize it. */
+	np = of_parse_phandle(rcdu->dev->of_node, "vsps", vsp->index);
+	if (!np) {
+		dev_err(rcdu->dev, "vsps node not found\n");
+		return -ENXIO;
+	}
+
+	pdev = of_find_device_by_node(np);
+	of_node_put(np);
+	if (!pdev)
+		return -ENXIO;
+
+	vsp->vsp = &pdev->dev;
+
+	ret = vsp1_du_init(vsp->vsp);
+	if (ret < 0)
+		return ret;
+
+	 /* The VSP2D (Gen3) has 5 RPFs, but the VSP1D (Gen2) is limited to
+	  * 4 RPFs.
+	  */
+	vsp->num_planes = rcdu->info->gen >= 3 ? 5 : 4;
+
+	vsp->planes = devm_kcalloc(rcdu->dev, vsp->num_planes,
+				   sizeof(*vsp->planes), GFP_KERNEL);
+	if (!vsp->planes)
+		return -ENOMEM;
+
+	for (i = 0; i < vsp->num_planes; ++i) {
+		enum drm_plane_type type = i ? DRM_PLANE_TYPE_OVERLAY
+					 : DRM_PLANE_TYPE_PRIMARY;
+		struct rcar_du_vsp_plane *plane = &vsp->planes[i];
+
+		plane->vsp = vsp;
+		plane->index = i;
+
+		ret = drm_universal_plane_init(rcdu->ddev, &plane->plane,
+					       1 << vsp->index,
+					       &rcar_du_vsp_plane_funcs,
+					       formats_kms,
+					       ARRAY_SIZE(formats_kms), type,
+					       NULL);
+		if (ret < 0)
+			return ret;
+
+		drm_plane_helper_add(&plane->plane,
+				     &rcar_du_vsp_plane_helper_funcs);
+
+		if (type == DRM_PLANE_TYPE_PRIMARY)
+			continue;
+
+		drm_object_attach_property(&plane->plane.base,
+					   rcdu->props.alpha, 255);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.h b/drivers/gpu/drm/rcar-du/rcar_du_vsp.h
new file mode 100644
index 000000000000..df3bf3805c69
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.h
@@ -0,0 +1,76 @@
+/*
+ * rcar_du_vsp.h  --  R-Car Display Unit VSP-Based Compositor
+ *
+ * Copyright (C) 2015 Renesas Electronics Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __RCAR_DU_VSP_H__
+#define __RCAR_DU_VSP_H__
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+
+struct rcar_du_format_info;
+struct rcar_du_vsp;
+
+struct rcar_du_vsp_plane {
+	struct drm_plane plane;
+	struct rcar_du_vsp *vsp;
+	unsigned int index;
+};
+
+struct rcar_du_vsp {
+	unsigned int index;
+	struct device *vsp;
+	struct rcar_du_device *dev;
+	struct rcar_du_vsp_plane *planes;
+	unsigned int num_planes;
+};
+
+static inline struct rcar_du_vsp_plane *to_rcar_vsp_plane(struct drm_plane *p)
+{
+	return container_of(p, struct rcar_du_vsp_plane, plane);
+}
+
+/**
+ * struct rcar_du_vsp_plane_state - Driver-specific plane state
+ * @state: base DRM plane state
+ * @format: information about the pixel format used by the plane
+ * @alpha: value of the plane alpha property
+ */
+struct rcar_du_vsp_plane_state {
+	struct drm_plane_state state;
+
+	const struct rcar_du_format_info *format;
+
+	unsigned int alpha;
+};
+
+static inline struct rcar_du_vsp_plane_state *
+to_rcar_vsp_plane_state(struct drm_plane_state *state)
+{
+	return container_of(state, struct rcar_du_vsp_plane_state, state);
+}
+
+#ifdef CONFIG_DRM_RCAR_VSP
+int rcar_du_vsp_init(struct rcar_du_vsp *vsp);
+void rcar_du_vsp_enable(struct rcar_du_crtc *crtc);
+void rcar_du_vsp_disable(struct rcar_du_crtc *crtc);
+void rcar_du_vsp_atomic_begin(struct rcar_du_crtc *crtc);
+void rcar_du_vsp_atomic_flush(struct rcar_du_crtc *crtc);
+#else
+static inline int rcar_du_vsp_init(struct rcar_du_vsp *vsp) { return 0; };
+static inline void rcar_du_vsp_enable(struct rcar_du_crtc *crtc) { };
+static inline void rcar_du_vsp_disable(struct rcar_du_crtc *crtc) { };
+static inline void rcar_du_vsp_atomic_begin(struct rcar_du_crtc *crtc) { };
+static inline void rcar_du_vsp_atomic_flush(struct rcar_du_crtc *crtc) { };
+#endif
+
+#endif /* __RCAR_DU_VSP_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds_regs.h b/drivers/gpu/drm/rcar-du/rcar_lvds_regs.h
index 77cf9289ab65..d7d294ba2dbe 100644
--- a/drivers/gpu/drm/rcar-du/rcar_lvds_regs.h
+++ b/drivers/gpu/drm/rcar-du/rcar_lvds_regs.h
@@ -1,7 +1,7 @@
 /*
  * rcar_lvds_regs.h  --  R-Car LVDS Interface Registers Definitions
  *
- * Copyright (C) 2013 Renesas Electronics Corporation
+ * Copyright (C) 2013-2015 Renesas Electronics Corporation
  *
  * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
  *
@@ -15,28 +15,38 @@
 
 #define LVDCR0				0x0000
 #define LVDCR0_DUSEL			(1 << 15)
-#define LVDCR0_DMD			(1 << 12)
+#define LVDCR0_DMD			(1 << 12)		/* Gen2 only */
 #define LVDCR0_LVMD_MASK		(0xf << 8)
 #define LVDCR0_LVMD_SHIFT		8
-#define LVDCR0_PLLEN			(1 << 4)
-#define LVDCR0_BEN			(1 << 2)
-#define LVDCR0_LVEN			(1 << 1)
+#define LVDCR0_PLLON			(1 << 4)
+#define LVDCR0_PWD			(1 << 2)		/* Gen3 only */
+#define LVDCR0_BEN			(1 << 2)		/* Gen2 only */
+#define LVDCR0_LVEN			(1 << 1)		/* Gen2 only */
 #define LVDCR0_LVRES			(1 << 0)
 
 #define LVDCR1				0x0004
-#define LVDCR1_CKSEL			(1 << 15)
-#define LVDCR1_CHSTBY(n)		(3 << (2 + (n) * 2))
-#define LVDCR1_CLKSTBY			(3 << 0)
+#define LVDCR1_CKSEL			(1 << 15)		/* Gen2 only */
+#define LVDCR1_CHSTBY_GEN2(n)		(3 << (2 + (n) * 2))	/* Gen2 only */
+#define LVDCR1_CHSTBY_GEN3(n)		(1 << (2 + (n) * 2))	/* Gen3 only */
+#define LVDCR1_CLKSTBY_GEN2		(3 << 0)		/* Gen2 only */
+#define LVDCR1_CLKSTBY_GEN3		(1 << 0)		/* Gen3 only */
 
 #define LVDPLLCR			0x0008
 #define LVDPLLCR_CEEN			(1 << 14)
 #define LVDPLLCR_FBEN			(1 << 13)
 #define LVDPLLCR_COSEL			(1 << 12)
+/* Gen2 */
 #define LVDPLLCR_PLLDLYCNT_150M		(0x1bf << 0)
 #define LVDPLLCR_PLLDLYCNT_121M		(0x22c << 0)
 #define LVDPLLCR_PLLDLYCNT_60M		(0x77b << 0)
 #define LVDPLLCR_PLLDLYCNT_38M		(0x69a << 0)
 #define LVDPLLCR_PLLDLYCNT_MASK		(0x7ff << 0)
+/* Gen3 */
+#define LVDPLLCR_PLLDIVCNT_42M		(0x014cb << 0)
+#define LVDPLLCR_PLLDIVCNT_85M		(0x00a45 << 0)
+#define LVDPLLCR_PLLDIVCNT_128M		(0x006c3 << 0)
+#define LVDPLLCR_PLLDIVCNT_148M		(0x046c1 << 0)
+#define LVDPLLCR_PLLDIVCNT_MASK		(0x7ffff << 0)
 
 #define LVDCTRCR			0x000c
 #define LVDCTRCR_CTR3SEL_ZERO		(0 << 12)
diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig
index 85739859dffc..76b3362c5e59 100644
--- a/drivers/gpu/drm/rockchip/Kconfig
+++ b/drivers/gpu/drm/rockchip/Kconfig
@@ -35,3 +35,11 @@ config ROCKCHIP_DW_MIPI_DSI
 	 for the Synopsys DesignWare HDMI driver. If you want to
 	 enable MIPI DSI on RK3288 based SoC, you should selet this
 	 option.
+
+config ROCKCHIP_INNO_HDMI
+	tristate "Rockchip specific extensions for Innosilicon HDMI"
+	depends on DRM_ROCKCHIP
+	help
+	  This selects support for Rockchip SoC specific extensions
+	  for the Innosilicon HDMI driver. If you want to enable
+	  HDMI on RK3036 based SoC, you should select this option.
diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile
index f6a809afceec..df8fbef17791 100644
--- a/drivers/gpu/drm/rockchip/Makefile
+++ b/drivers/gpu/drm/rockchip/Makefile
@@ -8,5 +8,6 @@ rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o
 
 obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
 obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
+obj-$(CONFIG_ROCKCHIP_INNO_HDMI) += inno_hdmi.o
 
 obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_vop_reg.o
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
index f8f8f29fb7c3..7975158064e8 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
@@ -875,17 +875,10 @@ static void dw_mipi_dsi_encoder_disable(struct drm_encoder *encoder)
 	clk_disable_unprepare(dsi->pclk);
 }
 
-static bool dw_mipi_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
-					const struct drm_display_mode *mode,
-					struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void dw_mipi_dsi_encoder_commit(struct drm_encoder *encoder)
 {
 	struct dw_mipi_dsi *dsi = encoder_to_dsi(encoder);
-	int mux  = rockchip_drm_encoder_get_mux_id(dsi->dev->of_node, encoder);
+	int mux = drm_of_encoder_active_endpoint_id(dsi->dev->of_node, encoder);
 	u32 interface_pix_fmt;
 	u32 val;
 
@@ -931,7 +924,6 @@ static void dw_mipi_dsi_encoder_commit(struct drm_encoder *encoder)
 
 static struct drm_encoder_helper_funcs
 dw_mipi_dsi_encoder_helper_funcs = {
-	.mode_fixup = dw_mipi_dsi_encoder_mode_fixup,
 	.commit = dw_mipi_dsi_encoder_commit,
 	.mode_set = dw_mipi_dsi_encoder_mode_set,
 	.disable = dw_mipi_dsi_encoder_disable,
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index c65ce8cb30d3..3d3cf2f8891e 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -204,7 +204,7 @@ static void dw_hdmi_rockchip_encoder_enable(struct drm_encoder *encoder)
 	rockchip_drm_crtc_mode_config(encoder->crtc, DRM_MODE_CONNECTOR_HDMIA,
 				      ROCKCHIP_OUT_MODE_AAAA);
 
-	mux = rockchip_drm_encoder_get_mux_id(hdmi->dev->of_node, encoder);
+	mux = drm_of_encoder_active_endpoint_id(hdmi->dev->of_node, encoder);
 	if (mux)
 		val = HDMI_SEL_VOP_LIT | (HDMI_SEL_VOP_LIT << 16);
 	else
diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c
new file mode 100644
index 000000000000..10d62fff22f1
--- /dev/null
+++ b/drivers/gpu/drm/rockchip/inno_hdmi.c
@@ -0,0 +1,938 @@
+/*
+ * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
+ *    Zheng Yang <zhengyang@rock-chips.com>
+ *    Yakir Yang <ykk@rock-chips.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/irq.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/hdmi.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+
+#include <drm/drm_of.h>
+#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_edid.h>
+
+#include "rockchip_drm_drv.h"
+#include "rockchip_drm_vop.h"
+
+#include "inno_hdmi.h"
+
+#define to_inno_hdmi(x)	container_of(x, struct inno_hdmi, x)
+
+struct hdmi_data_info {
+	int vic;
+	bool sink_is_hdmi;
+	bool sink_has_audio;
+	unsigned int enc_in_format;
+	unsigned int enc_out_format;
+	unsigned int colorimetry;
+};
+
+struct inno_hdmi_i2c {
+	struct i2c_adapter adap;
+
+	u8 ddc_addr;
+	u8 segment_addr;
+
+	struct mutex lock;
+	struct completion cmp;
+};
+
+struct inno_hdmi {
+	struct device *dev;
+	struct drm_device *drm_dev;
+
+	int irq;
+	struct clk *pclk;
+	void __iomem *regs;
+
+	struct drm_connector	connector;
+	struct drm_encoder	encoder;
+
+	struct inno_hdmi_i2c *i2c;
+	struct i2c_adapter *ddc;
+
+	unsigned int tmds_rate;
+
+	struct hdmi_data_info	hdmi_data;
+	struct drm_display_mode previous_mode;
+};
+
+enum {
+	CSC_ITU601_16_235_TO_RGB_0_255_8BIT,
+	CSC_ITU601_0_255_TO_RGB_0_255_8BIT,
+	CSC_ITU709_16_235_TO_RGB_0_255_8BIT,
+	CSC_RGB_0_255_TO_ITU601_16_235_8BIT,
+	CSC_RGB_0_255_TO_ITU709_16_235_8BIT,
+	CSC_RGB_0_255_TO_RGB_16_235_8BIT,
+};
+
+static const char coeff_csc[][24] = {
+	/*
+	 * YUV2RGB:601 SD mode(Y[16:235], UV[16:240], RGB[0:255]):
+	 *   R = 1.164*Y + 1.596*V - 204
+	 *   G = 1.164*Y - 0.391*U - 0.813*V + 154
+	 *   B = 1.164*Y + 2.018*U - 258
+	 */
+	{
+		0x04, 0xa7, 0x00, 0x00, 0x06, 0x62, 0x02, 0xcc,
+		0x04, 0xa7, 0x11, 0x90, 0x13, 0x40, 0x00, 0x9a,
+		0x04, 0xa7, 0x08, 0x12, 0x00, 0x00, 0x03, 0x02
+	},
+	/*
+	 * YUV2RGB:601 SD mode(YUV[0:255],RGB[0:255]):
+	 *   R = Y + 1.402*V - 248
+	 *   G = Y - 0.344*U - 0.714*V + 135
+	 *   B = Y + 1.772*U - 227
+	 */
+	{
+		0x04, 0x00, 0x00, 0x00, 0x05, 0x9b, 0x02, 0xf8,
+		0x04, 0x00, 0x11, 0x60, 0x12, 0xdb, 0x00, 0x87,
+		0x04, 0x00, 0x07, 0x16, 0x00, 0x00, 0x02, 0xe3
+	},
+	/*
+	 * YUV2RGB:709 HD mode(Y[16:235],UV[16:240],RGB[0:255]):
+	 *   R = 1.164*Y + 1.793*V - 248
+	 *   G = 1.164*Y - 0.213*U - 0.534*V + 77
+	 *   B = 1.164*Y + 2.115*U - 289
+	 */
+	{
+		0x04, 0xa7, 0x00, 0x00, 0x07, 0x2c, 0x02, 0xf8,
+		0x04, 0xa7, 0x10, 0xda, 0x12, 0x22, 0x00, 0x4d,
+		0x04, 0xa7, 0x08, 0x74, 0x00, 0x00, 0x03, 0x21
+	},
+
+	/*
+	 * RGB2YUV:601 SD mode:
+	 *   Cb = -0.291G - 0.148R + 0.439B + 128
+	 *   Y  = 0.504G  + 0.257R + 0.098B + 16
+	 *   Cr = -0.368G + 0.439R - 0.071B + 128
+	 */
+	{
+		0x11, 0x5f, 0x01, 0x82, 0x10, 0x23, 0x00, 0x80,
+		0x02, 0x1c, 0x00, 0xa1, 0x00, 0x36, 0x00, 0x1e,
+		0x11, 0x29, 0x10, 0x59, 0x01, 0x82, 0x00, 0x80
+	},
+	/*
+	 * RGB2YUV:709 HD mode:
+	 *   Cb = - 0.338G - 0.101R + 0.439B + 128
+	 *   Y  = 0.614G   + 0.183R + 0.062B + 16
+	 *   Cr = - 0.399G + 0.439R - 0.040B + 128
+	 */
+	{
+		0x11, 0x98, 0x01, 0xc1, 0x10, 0x28, 0x00, 0x80,
+		0x02, 0x74, 0x00, 0xbb, 0x00, 0x3f, 0x00, 0x10,
+		0x11, 0x5a, 0x10, 0x67, 0x01, 0xc1, 0x00, 0x80
+	},
+	/*
+	 * RGB[0:255]2RGB[16:235]:
+	 *   R' = R x (235-16)/255 + 16;
+	 *   G' = G x (235-16)/255 + 16;
+	 *   B' = B x (235-16)/255 + 16;
+	 */
+	{
+		0x00, 0x00, 0x03, 0x6F, 0x00, 0x00, 0x00, 0x10,
+		0x03, 0x6F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+		0x00, 0x00, 0x00, 0x00, 0x03, 0x6F, 0x00, 0x10
+	},
+};
+
+static inline u8 hdmi_readb(struct inno_hdmi *hdmi, u16 offset)
+{
+	return readl_relaxed(hdmi->regs + (offset) * 0x04);
+}
+
+static inline void hdmi_writeb(struct inno_hdmi *hdmi, u16 offset, u32 val)
+{
+	writel_relaxed(val, hdmi->regs + (offset) * 0x04);
+}
+
+static inline void hdmi_modb(struct inno_hdmi *hdmi, u16 offset,
+			     u32 msk, u32 val)
+{
+	u8 temp = hdmi_readb(hdmi, offset) & ~msk;
+
+	temp |= val & msk;
+	hdmi_writeb(hdmi, offset, temp);
+}
+
+static void inno_hdmi_i2c_init(struct inno_hdmi *hdmi)
+{
+	int ddc_bus_freq;
+
+	ddc_bus_freq = (hdmi->tmds_rate >> 2) / HDMI_SCL_RATE;
+
+	hdmi_writeb(hdmi, DDC_BUS_FREQ_L, ddc_bus_freq & 0xFF);
+	hdmi_writeb(hdmi, DDC_BUS_FREQ_H, (ddc_bus_freq >> 8) & 0xFF);
+
+	/* Clear the EDID interrupt flag and mute the interrupt */
+	hdmi_writeb(hdmi, HDMI_INTERRUPT_MASK1, 0);
+	hdmi_writeb(hdmi, HDMI_INTERRUPT_STATUS1, m_INT_EDID_READY);
+}
+
+static void inno_hdmi_sys_power(struct inno_hdmi *hdmi, bool enable)
+{
+	if (enable)
+		hdmi_modb(hdmi, HDMI_SYS_CTRL, m_POWER, v_PWR_ON);
+	else
+		hdmi_modb(hdmi, HDMI_SYS_CTRL, m_POWER, v_PWR_OFF);
+}
+
+static void inno_hdmi_set_pwr_mode(struct inno_hdmi *hdmi, int mode)
+{
+	switch (mode) {
+	case NORMAL:
+		inno_hdmi_sys_power(hdmi, false);
+
+		hdmi_writeb(hdmi, HDMI_PHY_PRE_EMPHASIS, 0x6f);
+		hdmi_writeb(hdmi, HDMI_PHY_DRIVER, 0xbb);
+
+		hdmi_writeb(hdmi, HDMI_PHY_SYS_CTL, 0x15);
+		hdmi_writeb(hdmi, HDMI_PHY_SYS_CTL, 0x14);
+		hdmi_writeb(hdmi, HDMI_PHY_SYS_CTL, 0x10);
+		hdmi_writeb(hdmi, HDMI_PHY_CHG_PWR, 0x0f);
+		hdmi_writeb(hdmi, HDMI_PHY_SYNC, 0x00);
+		hdmi_writeb(hdmi, HDMI_PHY_SYNC, 0x01);
+
+		inno_hdmi_sys_power(hdmi, true);
+		break;
+
+	case LOWER_PWR:
+		inno_hdmi_sys_power(hdmi, false);
+		hdmi_writeb(hdmi, HDMI_PHY_DRIVER, 0x00);
+		hdmi_writeb(hdmi, HDMI_PHY_PRE_EMPHASIS, 0x00);
+		hdmi_writeb(hdmi, HDMI_PHY_CHG_PWR, 0x00);
+		hdmi_writeb(hdmi, HDMI_PHY_SYS_CTL, 0x15);
+
+		break;
+
+	default:
+		dev_err(hdmi->dev, "Unknown power mode %d\n", mode);
+	}
+}
+
+static void inno_hdmi_reset(struct inno_hdmi *hdmi)
+{
+	u32 val;
+	u32 msk;
+
+	hdmi_modb(hdmi, HDMI_SYS_CTRL, m_RST_DIGITAL, v_NOT_RST_DIGITAL);
+	udelay(100);
+
+	hdmi_modb(hdmi, HDMI_SYS_CTRL, m_RST_ANALOG, v_NOT_RST_ANALOG);
+	udelay(100);
+
+	msk = m_REG_CLK_INV | m_REG_CLK_SOURCE | m_POWER | m_INT_POL;
+	val = v_REG_CLK_INV | v_REG_CLK_SOURCE_SYS | v_PWR_ON | v_INT_POL_HIGH;
+	hdmi_modb(hdmi, HDMI_SYS_CTRL, msk, val);
+
+	inno_hdmi_set_pwr_mode(hdmi, NORMAL);
+}
+
+static int inno_hdmi_upload_frame(struct inno_hdmi *hdmi, int setup_rc,
+				  union hdmi_infoframe *frame, u32 frame_index,
+				  u32 mask, u32 disable, u32 enable)
+{
+	if (mask)
+		hdmi_modb(hdmi, HDMI_PACKET_SEND_AUTO, mask, disable);
+
+	hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_BUF_INDEX, frame_index);
+
+	if (setup_rc >= 0) {
+		u8 packed_frame[HDMI_MAXIMUM_INFO_FRAME_SIZE];
+		ssize_t rc, i;
+
+		rc = hdmi_infoframe_pack(frame, packed_frame,
+					 sizeof(packed_frame));
+		if (rc < 0)
+			return rc;
+
+		for (i = 0; i < rc; i++)
+			hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i,
+				    packed_frame[i]);
+
+		if (mask)
+			hdmi_modb(hdmi, HDMI_PACKET_SEND_AUTO, mask, enable);
+	}
+
+	return setup_rc;
+}
+
+static int inno_hdmi_config_video_vsi(struct inno_hdmi *hdmi,
+				      struct drm_display_mode *mode)
+{
+	union hdmi_infoframe frame;
+	int rc;
+
+	rc = drm_hdmi_vendor_infoframe_from_display_mode(&frame.vendor.hdmi,
+							 mode);
+
+	return inno_hdmi_upload_frame(hdmi, rc, &frame, INFOFRAME_VSI,
+		m_PACKET_VSI_EN, v_PACKET_VSI_EN(0), v_PACKET_VSI_EN(1));
+}
+
+static int inno_hdmi_config_video_avi(struct inno_hdmi *hdmi,
+				      struct drm_display_mode *mode)
+{
+	union hdmi_infoframe frame;
+	int rc;
+
+	rc = drm_hdmi_avi_infoframe_from_display_mode(&frame.avi, mode);
+
+	if (hdmi->hdmi_data.enc_out_format == HDMI_COLORSPACE_YUV444)
+		frame.avi.colorspace = HDMI_COLORSPACE_YUV444;
+	else if (hdmi->hdmi_data.enc_out_format == HDMI_COLORSPACE_YUV422)
+		frame.avi.colorspace = HDMI_COLORSPACE_YUV422;
+	else
+		frame.avi.colorspace = HDMI_COLORSPACE_RGB;
+
+	return inno_hdmi_upload_frame(hdmi, rc, &frame, INFOFRAME_AVI, 0, 0, 0);
+}
+
+static int inno_hdmi_config_video_csc(struct inno_hdmi *hdmi)
+{
+	struct hdmi_data_info *data = &hdmi->hdmi_data;
+	int c0_c2_change = 0;
+	int csc_enable = 0;
+	int csc_mode = 0;
+	int auto_csc = 0;
+	int value;
+	int i;
+
+	/* Input video mode is SDR RGB24bit, data enable signal from external */
+	hdmi_writeb(hdmi, HDMI_VIDEO_CONTRL1, v_DE_EXTERNAL |
+		    v_VIDEO_INPUT_FORMAT(VIDEO_INPUT_SDR_RGB444));
+
+	/* Input color hardcode to RGB, and output color hardcode to RGB888 */
+	value = v_VIDEO_INPUT_BITS(VIDEO_INPUT_8BITS) |
+		v_VIDEO_OUTPUT_COLOR(0) |
+		v_VIDEO_INPUT_CSP(0);
+	hdmi_writeb(hdmi, HDMI_VIDEO_CONTRL2, value);
+
+	if (data->enc_in_format == data->enc_out_format) {
+		if ((data->enc_in_format == HDMI_COLORSPACE_RGB) ||
+		    (data->enc_in_format >= HDMI_COLORSPACE_YUV444)) {
+			value = v_SOF_DISABLE | v_COLOR_DEPTH_NOT_INDICATED(1);
+			hdmi_writeb(hdmi, HDMI_VIDEO_CONTRL3, value);
+
+			hdmi_modb(hdmi, HDMI_VIDEO_CONTRL,
+				  m_VIDEO_AUTO_CSC | m_VIDEO_C0_C2_SWAP,
+				  v_VIDEO_AUTO_CSC(AUTO_CSC_DISABLE) |
+				  v_VIDEO_C0_C2_SWAP(C0_C2_CHANGE_DISABLE));
+			return 0;
+		}
+	}
+
+	if (data->colorimetry == HDMI_COLORIMETRY_ITU_601) {
+		if ((data->enc_in_format == HDMI_COLORSPACE_RGB) &&
+		    (data->enc_out_format == HDMI_COLORSPACE_YUV444)) {
+			csc_mode = CSC_RGB_0_255_TO_ITU601_16_235_8BIT;
+			auto_csc = AUTO_CSC_DISABLE;
+			c0_c2_change = C0_C2_CHANGE_DISABLE;
+			csc_enable = v_CSC_ENABLE;
+		} else if ((data->enc_in_format == HDMI_COLORSPACE_YUV444) &&
+			   (data->enc_out_format == HDMI_COLORSPACE_RGB)) {
+			csc_mode = CSC_ITU601_16_235_TO_RGB_0_255_8BIT;
+			auto_csc = AUTO_CSC_ENABLE;
+			c0_c2_change = C0_C2_CHANGE_DISABLE;
+			csc_enable = v_CSC_DISABLE;
+		}
+	} else {
+		if ((data->enc_in_format == HDMI_COLORSPACE_RGB) &&
+		    (data->enc_out_format == HDMI_COLORSPACE_YUV444)) {
+			csc_mode = CSC_RGB_0_255_TO_ITU709_16_235_8BIT;
+			auto_csc = AUTO_CSC_DISABLE;
+			c0_c2_change = C0_C2_CHANGE_DISABLE;
+			csc_enable = v_CSC_ENABLE;
+		} else if ((data->enc_in_format == HDMI_COLORSPACE_YUV444) &&
+			   (data->enc_out_format == HDMI_COLORSPACE_RGB)) {
+			csc_mode = CSC_ITU709_16_235_TO_RGB_0_255_8BIT;
+			auto_csc = AUTO_CSC_ENABLE;
+			c0_c2_change = C0_C2_CHANGE_DISABLE;
+			csc_enable = v_CSC_DISABLE;
+		}
+	}
+
+	for (i = 0; i < 24; i++)
+		hdmi_writeb(hdmi, HDMI_VIDEO_CSC_COEF + i,
+			    coeff_csc[csc_mode][i]);
+
+	value = v_SOF_DISABLE | csc_enable | v_COLOR_DEPTH_NOT_INDICATED(1);
+	hdmi_writeb(hdmi, HDMI_VIDEO_CONTRL3, value);
+	hdmi_modb(hdmi, HDMI_VIDEO_CONTRL, m_VIDEO_AUTO_CSC |
+		  m_VIDEO_C0_C2_SWAP, v_VIDEO_AUTO_CSC(auto_csc) |
+		  v_VIDEO_C0_C2_SWAP(c0_c2_change));
+
+	return 0;
+}
+
+static int inno_hdmi_config_video_timing(struct inno_hdmi *hdmi,
+					 struct drm_display_mode *mode)
+{
+	int value;
+
+	/* Set detail external video timing polarity and interlace mode */
+	value = v_EXTERANL_VIDEO(1);
+	value |= mode->flags & DRM_MODE_FLAG_PHSYNC ?
+		 v_HSYNC_POLARITY(1) : v_HSYNC_POLARITY(0);
+	value |= mode->flags & DRM_MODE_FLAG_PVSYNC ?
+		 v_VSYNC_POLARITY(1) : v_VSYNC_POLARITY(0);
+	value |= mode->flags & DRM_MODE_FLAG_INTERLACE ?
+		 v_INETLACE(1) : v_INETLACE(0);
+	hdmi_writeb(hdmi, HDMI_VIDEO_TIMING_CTL, value);
+
+	/* Set detail external video timing */
+	value = mode->htotal;
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HTOTAL_L, value & 0xFF);
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HTOTAL_H, (value >> 8) & 0xFF);
+
+	value = mode->htotal - mode->hdisplay;
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HBLANK_L, value & 0xFF);
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HBLANK_H, (value >> 8) & 0xFF);
+
+	value = mode->hsync_start - mode->hdisplay;
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HDELAY_L, value & 0xFF);
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HDELAY_H, (value >> 8) & 0xFF);
+
+	value = mode->hsync_end - mode->hsync_start;
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HDURATION_L, value & 0xFF);
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_HDURATION_H, (value >> 8) & 0xFF);
+
+	value = mode->vtotal;
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_VTOTAL_L, value & 0xFF);
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_VTOTAL_H, (value >> 8) & 0xFF);
+
+	value = mode->vtotal - mode->vdisplay;
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_VBLANK, value & 0xFF);
+
+	value = mode->vsync_start - mode->vdisplay;
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_VDELAY, value & 0xFF);
+
+	value = mode->vsync_end - mode->vsync_start;
+	hdmi_writeb(hdmi, HDMI_VIDEO_EXT_VDURATION, value & 0xFF);
+
+	hdmi_writeb(hdmi, HDMI_PHY_PRE_DIV_RATIO, 0x1e);
+	hdmi_writeb(hdmi, HDMI_PHY_FEEDBACK_DIV_RATIO_LOW, 0x2c);
+	hdmi_writeb(hdmi, HDMI_PHY_FEEDBACK_DIV_RATIO_HIGH, 0x01);
+
+	return 0;
+}
+
+static int inno_hdmi_setup(struct inno_hdmi *hdmi,
+			   struct drm_display_mode *mode)
+{
+	hdmi->hdmi_data.vic = drm_match_cea_mode(mode);
+
+	hdmi->hdmi_data.enc_in_format = HDMI_COLORSPACE_RGB;
+	hdmi->hdmi_data.enc_out_format = HDMI_COLORSPACE_RGB;
+
+	if ((hdmi->hdmi_data.vic == 6) || (hdmi->hdmi_data.vic == 7) ||
+	    (hdmi->hdmi_data.vic == 21) || (hdmi->hdmi_data.vic == 22) ||
+	    (hdmi->hdmi_data.vic == 2) || (hdmi->hdmi_data.vic == 3) ||
+	    (hdmi->hdmi_data.vic == 17) || (hdmi->hdmi_data.vic == 18))
+		hdmi->hdmi_data.colorimetry = HDMI_COLORIMETRY_ITU_601;
+	else
+		hdmi->hdmi_data.colorimetry = HDMI_COLORIMETRY_ITU_709;
+
+	/* Mute video and audio output */
+	hdmi_modb(hdmi, HDMI_AV_MUTE, m_AUDIO_MUTE | m_VIDEO_BLACK,
+		  v_AUDIO_MUTE(1) | v_VIDEO_MUTE(1));
+
+	/* Set HDMI Mode */
+	hdmi_writeb(hdmi, HDMI_HDCP_CTRL,
+		    v_HDMI_DVI(hdmi->hdmi_data.sink_is_hdmi));
+
+	inno_hdmi_config_video_timing(hdmi, mode);
+
+	inno_hdmi_config_video_csc(hdmi);
+
+	if (hdmi->hdmi_data.sink_is_hdmi) {
+		inno_hdmi_config_video_avi(hdmi, mode);
+		inno_hdmi_config_video_vsi(hdmi, mode);
+	}
+
+	/*
+	 * When IP controller have configured to an accurate video
+	 * timing, then the TMDS clock source would be switched to
+	 * DCLK_LCDC, so we need to init the TMDS rate to mode pixel
+	 * clock rate, and reconfigure the DDC clock.
+	 */
+	hdmi->tmds_rate = mode->clock * 1000;
+	inno_hdmi_i2c_init(hdmi);
+
+	/* Unmute video and audio output */
+	hdmi_modb(hdmi, HDMI_AV_MUTE, m_AUDIO_MUTE | m_VIDEO_BLACK,
+		  v_AUDIO_MUTE(0) | v_VIDEO_MUTE(0));
+
+	return 0;
+}
+
+static void inno_hdmi_encoder_mode_set(struct drm_encoder *encoder,
+				       struct drm_display_mode *mode,
+				       struct drm_display_mode *adj_mode)
+{
+	struct inno_hdmi *hdmi = to_inno_hdmi(encoder);
+
+	inno_hdmi_setup(hdmi, adj_mode);
+
+	/* Store the display mode for plugin/DPMS poweron events */
+	memcpy(&hdmi->previous_mode, adj_mode, sizeof(hdmi->previous_mode));
+}
+
+static void inno_hdmi_encoder_enable(struct drm_encoder *encoder)
+{
+	struct inno_hdmi *hdmi = to_inno_hdmi(encoder);
+
+	rockchip_drm_crtc_mode_config(encoder->crtc, DRM_MODE_CONNECTOR_HDMIA,
+				      ROCKCHIP_OUT_MODE_P888);
+
+	inno_hdmi_set_pwr_mode(hdmi, NORMAL);
+}
+
+static void inno_hdmi_encoder_disable(struct drm_encoder *encoder)
+{
+	struct inno_hdmi *hdmi = to_inno_hdmi(encoder);
+
+	inno_hdmi_set_pwr_mode(hdmi, LOWER_PWR);
+}
+
+static bool inno_hdmi_encoder_mode_fixup(struct drm_encoder *encoder,
+					 const struct drm_display_mode *mode,
+					 struct drm_display_mode *adj_mode)
+{
+	return true;
+}
+
+static struct drm_encoder_helper_funcs inno_hdmi_encoder_helper_funcs = {
+	.enable     = inno_hdmi_encoder_enable,
+	.disable    = inno_hdmi_encoder_disable,
+	.mode_fixup = inno_hdmi_encoder_mode_fixup,
+	.mode_set   = inno_hdmi_encoder_mode_set,
+};
+
+static struct drm_encoder_funcs inno_hdmi_encoder_funcs = {
+	.destroy = drm_encoder_cleanup,
+};
+
+static enum drm_connector_status
+inno_hdmi_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct inno_hdmi *hdmi = to_inno_hdmi(connector);
+
+	return (hdmi_readb(hdmi, HDMI_STATUS) & m_HOTPLUG) ?
+		connector_status_connected : connector_status_disconnected;
+}
+
+static int inno_hdmi_connector_get_modes(struct drm_connector *connector)
+{
+	struct inno_hdmi *hdmi = to_inno_hdmi(connector);
+	struct edid *edid;
+	int ret = 0;
+
+	if (!hdmi->ddc)
+		return 0;
+
+	edid = drm_get_edid(connector, hdmi->ddc);
+	if (edid) {
+		hdmi->hdmi_data.sink_is_hdmi = drm_detect_hdmi_monitor(edid);
+		hdmi->hdmi_data.sink_has_audio = drm_detect_monitor_audio(edid);
+		drm_mode_connector_update_edid_property(connector, edid);
+		ret = drm_add_edid_modes(connector, edid);
+		kfree(edid);
+	}
+
+	return ret;
+}
+
+static enum drm_mode_status
+inno_hdmi_connector_mode_valid(struct drm_connector *connector,
+			       struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static struct drm_encoder *
+inno_hdmi_connector_best_encoder(struct drm_connector *connector)
+{
+	struct inno_hdmi *hdmi = to_inno_hdmi(connector);
+
+	return &hdmi->encoder;
+}
+
+static int
+inno_hdmi_probe_single_connector_modes(struct drm_connector *connector,
+				       uint32_t maxX, uint32_t maxY)
+{
+	return drm_helper_probe_single_connector_modes(connector, 1920, 1080);
+}
+
+static void inno_hdmi_connector_destroy(struct drm_connector *connector)
+{
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+}
+
+static struct drm_connector_funcs inno_hdmi_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.fill_modes = inno_hdmi_probe_single_connector_modes,
+	.detect = inno_hdmi_connector_detect,
+	.destroy = inno_hdmi_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static struct drm_connector_helper_funcs inno_hdmi_connector_helper_funcs = {
+	.get_modes = inno_hdmi_connector_get_modes,
+	.mode_valid = inno_hdmi_connector_mode_valid,
+	.best_encoder = inno_hdmi_connector_best_encoder,
+};
+
+static int inno_hdmi_register(struct drm_device *drm, struct inno_hdmi *hdmi)
+{
+	struct drm_encoder *encoder = &hdmi->encoder;
+	struct device *dev = hdmi->dev;
+
+	encoder->possible_crtcs = drm_of_find_possible_crtcs(drm, dev->of_node);
+
+	/*
+	 * If we failed to find the CRTC(s) which this encoder is
+	 * supposed to be connected to, it's because the CRTC has
+	 * not been registered yet.  Defer probing, and hope that
+	 * the required CRTC is added later.
+	 */
+	if (encoder->possible_crtcs == 0)
+		return -EPROBE_DEFER;
+
+	drm_encoder_helper_add(encoder, &inno_hdmi_encoder_helper_funcs);
+	drm_encoder_init(drm, encoder, &inno_hdmi_encoder_funcs,
+			 DRM_MODE_ENCODER_TMDS, NULL);
+
+	hdmi->connector.polled = DRM_CONNECTOR_POLL_HPD;
+
+	drm_connector_helper_add(&hdmi->connector,
+				 &inno_hdmi_connector_helper_funcs);
+	drm_connector_init(drm, &hdmi->connector, &inno_hdmi_connector_funcs,
+			   DRM_MODE_CONNECTOR_HDMIA);
+
+	drm_mode_connector_attach_encoder(&hdmi->connector, encoder);
+
+	return 0;
+}
+
+static irqreturn_t inno_hdmi_i2c_irq(struct inno_hdmi *hdmi)
+{
+	struct inno_hdmi_i2c *i2c = hdmi->i2c;
+	u8 stat;
+
+	stat = hdmi_readb(hdmi, HDMI_INTERRUPT_STATUS1);
+	if (!(stat & m_INT_EDID_READY))
+		return IRQ_NONE;
+
+	/* Clear HDMI EDID interrupt flag */
+	hdmi_writeb(hdmi, HDMI_INTERRUPT_STATUS1, m_INT_EDID_READY);
+
+	complete(&i2c->cmp);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t inno_hdmi_hardirq(int irq, void *dev_id)
+{
+	struct inno_hdmi *hdmi = dev_id;
+	irqreturn_t ret = IRQ_NONE;
+	u8 interrupt;
+
+	if (hdmi->i2c)
+		ret = inno_hdmi_i2c_irq(hdmi);
+
+	interrupt = hdmi_readb(hdmi, HDMI_STATUS);
+	if (interrupt & m_INT_HOTPLUG) {
+		hdmi_modb(hdmi, HDMI_STATUS, m_INT_HOTPLUG, m_INT_HOTPLUG);
+		ret = IRQ_WAKE_THREAD;
+	}
+
+	return ret;
+}
+
+static irqreturn_t inno_hdmi_irq(int irq, void *dev_id)
+{
+	struct inno_hdmi *hdmi = dev_id;
+
+	drm_helper_hpd_irq_event(hdmi->connector.dev);
+
+	return IRQ_HANDLED;
+}
+
+static int inno_hdmi_i2c_read(struct inno_hdmi *hdmi, struct i2c_msg *msgs)
+{
+	int length = msgs->len;
+	u8 *buf = msgs->buf;
+	int ret;
+
+	ret = wait_for_completion_timeout(&hdmi->i2c->cmp, HZ / 10);
+	if (!ret)
+		return -EAGAIN;
+
+	while (length--)
+		*buf++ = hdmi_readb(hdmi, HDMI_EDID_FIFO_ADDR);
+
+	return 0;
+}
+
+static int inno_hdmi_i2c_write(struct inno_hdmi *hdmi, struct i2c_msg *msgs)
+{
+	/*
+	 * The DDC module only support read EDID message, so
+	 * we assume that each word write to this i2c adapter
+	 * should be the offset of EDID word address.
+	 */
+	if ((msgs->len != 1) ||
+	    ((msgs->addr != DDC_ADDR) && (msgs->addr != DDC_SEGMENT_ADDR)))
+		return -EINVAL;
+
+	reinit_completion(&hdmi->i2c->cmp);
+
+	if (msgs->addr == DDC_SEGMENT_ADDR)
+		hdmi->i2c->segment_addr = msgs->buf[0];
+	if (msgs->addr == DDC_ADDR)
+		hdmi->i2c->ddc_addr = msgs->buf[0];
+
+	/* Set edid fifo first addr */
+	hdmi_writeb(hdmi, HDMI_EDID_FIFO_OFFSET, 0x00);
+
+	/* Set edid word address 0x00/0x80 */
+	hdmi_writeb(hdmi, HDMI_EDID_WORD_ADDR, hdmi->i2c->ddc_addr);
+
+	/* Set edid segment pointer */
+	hdmi_writeb(hdmi, HDMI_EDID_SEGMENT_POINTER, hdmi->i2c->segment_addr);
+
+	return 0;
+}
+
+static int inno_hdmi_i2c_xfer(struct i2c_adapter *adap,
+			      struct i2c_msg *msgs, int num)
+{
+	struct inno_hdmi *hdmi = i2c_get_adapdata(adap);
+	struct inno_hdmi_i2c *i2c = hdmi->i2c;
+	int i, ret = 0;
+
+	mutex_lock(&i2c->lock);
+
+	/* Clear the EDID interrupt flag and unmute the interrupt */
+	hdmi_writeb(hdmi, HDMI_INTERRUPT_MASK1, m_INT_EDID_READY);
+	hdmi_writeb(hdmi, HDMI_INTERRUPT_STATUS1, m_INT_EDID_READY);
+
+	for (i = 0; i < num; i++) {
+		dev_dbg(hdmi->dev, "xfer: num: %d/%d, len: %d, flags: %#x\n",
+			i + 1, num, msgs[i].len, msgs[i].flags);
+
+		if (msgs[i].flags & I2C_M_RD)
+			ret = inno_hdmi_i2c_read(hdmi, &msgs[i]);
+		else
+			ret = inno_hdmi_i2c_write(hdmi, &msgs[i]);
+
+		if (ret < 0)
+			break;
+	}
+
+	if (!ret)
+		ret = num;
+
+	/* Mute HDMI EDID interrupt */
+	hdmi_writeb(hdmi, HDMI_INTERRUPT_MASK1, 0);
+
+	mutex_unlock(&i2c->lock);
+
+	return ret;
+}
+
+static u32 inno_hdmi_i2c_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm inno_hdmi_algorithm = {
+	.master_xfer	= inno_hdmi_i2c_xfer,
+	.functionality	= inno_hdmi_i2c_func,
+};
+
+static struct i2c_adapter *inno_hdmi_i2c_adapter(struct inno_hdmi *hdmi)
+{
+	struct i2c_adapter *adap;
+	struct inno_hdmi_i2c *i2c;
+	int ret;
+
+	i2c = devm_kzalloc(hdmi->dev, sizeof(*i2c), GFP_KERNEL);
+	if (!i2c)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&i2c->lock);
+	init_completion(&i2c->cmp);
+
+	adap = &i2c->adap;
+	adap->class = I2C_CLASS_DDC;
+	adap->owner = THIS_MODULE;
+	adap->dev.parent = hdmi->dev;
+	adap->dev.of_node = hdmi->dev->of_node;
+	adap->algo = &inno_hdmi_algorithm;
+	strlcpy(adap->name, "Inno HDMI", sizeof(adap->name));
+	i2c_set_adapdata(adap, hdmi);
+
+	ret = i2c_add_adapter(adap);
+	if (ret) {
+		dev_warn(hdmi->dev, "cannot add %s I2C adapter\n", adap->name);
+		devm_kfree(hdmi->dev, i2c);
+		return ERR_PTR(ret);
+	}
+
+	hdmi->i2c = i2c;
+
+	dev_info(hdmi->dev, "registered %s I2C bus driver\n", adap->name);
+
+	return adap;
+}
+
+static int inno_hdmi_bind(struct device *dev, struct device *master,
+				 void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct drm_device *drm = data;
+	struct inno_hdmi *hdmi;
+	struct resource *iores;
+	int irq;
+	int ret;
+
+	hdmi = devm_kzalloc(dev, sizeof(*hdmi), GFP_KERNEL);
+	if (!hdmi)
+		return -ENOMEM;
+
+	hdmi->dev = dev;
+	hdmi->drm_dev = drm;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -ENXIO;
+
+	hdmi->regs = devm_ioremap_resource(dev, iores);
+	if (IS_ERR(hdmi->regs))
+		return PTR_ERR(hdmi->regs);
+
+	hdmi->pclk = devm_clk_get(hdmi->dev, "pclk");
+	if (IS_ERR(hdmi->pclk)) {
+		dev_err(hdmi->dev, "Unable to get HDMI pclk clk\n");
+		return PTR_ERR(hdmi->pclk);
+	}
+
+	ret = clk_prepare_enable(hdmi->pclk);
+	if (ret) {
+		dev_err(hdmi->dev, "Cannot enable HDMI pclk clock: %d\n", ret);
+		return ret;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	inno_hdmi_reset(hdmi);
+
+	hdmi->ddc = inno_hdmi_i2c_adapter(hdmi);
+	if (IS_ERR(hdmi->ddc)) {
+		hdmi->ddc = NULL;
+		return PTR_ERR(hdmi->ddc);
+	}
+
+	/*
+	 * When IP controller haven't configured to an accurate video
+	 * timing, then the TMDS clock source would be switched to
+	 * PCLK_HDMI, so we need to init the TMDS rate to PCLK rate,
+	 * and reconfigure the DDC clock.
+	 */
+	hdmi->tmds_rate = clk_get_rate(hdmi->pclk);
+	inno_hdmi_i2c_init(hdmi);
+
+	ret = inno_hdmi_register(drm, hdmi);
+	if (ret)
+		return ret;
+
+	dev_set_drvdata(dev, hdmi);
+
+	/* Unmute hotplug interrupt */
+	hdmi_modb(hdmi, HDMI_STATUS, m_MASK_INT_HOTPLUG, v_MASK_INT_HOTPLUG(1));
+
+	ret = devm_request_threaded_irq(dev, irq, inno_hdmi_hardirq,
+					inno_hdmi_irq, IRQF_SHARED,
+					dev_name(dev), hdmi);
+
+	return ret;
+}
+
+static void inno_hdmi_unbind(struct device *dev, struct device *master,
+			     void *data)
+{
+	struct inno_hdmi *hdmi = dev_get_drvdata(dev);
+
+	hdmi->connector.funcs->destroy(&hdmi->connector);
+	hdmi->encoder.funcs->destroy(&hdmi->encoder);
+
+	clk_disable_unprepare(hdmi->pclk);
+	i2c_put_adapter(hdmi->ddc);
+}
+
+static const struct component_ops inno_hdmi_ops = {
+	.bind	= inno_hdmi_bind,
+	.unbind	= inno_hdmi_unbind,
+};
+
+static int inno_hdmi_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &inno_hdmi_ops);
+}
+
+static int inno_hdmi_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &inno_hdmi_ops);
+
+	return 0;
+}
+
+static const struct of_device_id inno_hdmi_dt_ids[] = {
+	{ .compatible = "rockchip,rk3036-inno-hdmi",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, inno_hdmi_dt_ids);
+
+static struct platform_driver inno_hdmi_driver = {
+	.probe  = inno_hdmi_probe,
+	.remove = inno_hdmi_remove,
+	.driver = {
+		.name = "innohdmi-rockchip",
+		.of_match_table = inno_hdmi_dt_ids,
+	},
+};
+
+module_platform_driver(inno_hdmi_driver);
+
+MODULE_AUTHOR("Zheng Yang <zhengyang@rock-chips.com>");
+MODULE_AUTHOR("Yakir Yang <ykk@rock-chips.com>");
+MODULE_DESCRIPTION("Rockchip Specific INNO-HDMI Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:innohdmi-rockchip");
diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.h b/drivers/gpu/drm/rockchip/inno_hdmi.h
new file mode 100644
index 000000000000..aa7c415f8cc1
--- /dev/null
+++ b/drivers/gpu/drm/rockchip/inno_hdmi.h
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
+ *    Zheng Yang <zhengyang@rock-chips.com>
+ *    Yakir Yang <ykk@rock-chips.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __INNO_HDMI_H__
+#define __INNO_HDMI_H__
+
+#define DDC_SEGMENT_ADDR		0x30
+
+enum PWR_MODE {
+	NORMAL,
+	LOWER_PWR,
+};
+
+#define HDMI_SCL_RATE			(100*1000)
+#define DDC_BUS_FREQ_L			0x4b
+#define DDC_BUS_FREQ_H			0x4c
+
+#define HDMI_SYS_CTRL			0x00
+#define m_RST_ANALOG			(1 << 6)
+#define v_RST_ANALOG			(0 << 6)
+#define v_NOT_RST_ANALOG		(1 << 6)
+#define m_RST_DIGITAL			(1 << 5)
+#define v_RST_DIGITAL			(0 << 5)
+#define v_NOT_RST_DIGITAL		(1 << 5)
+#define m_REG_CLK_INV			(1 << 4)
+#define v_REG_CLK_NOT_INV		(0 << 4)
+#define v_REG_CLK_INV			(1 << 4)
+#define m_VCLK_INV			(1 << 3)
+#define v_VCLK_NOT_INV			(0 << 3)
+#define v_VCLK_INV			(1 << 3)
+#define m_REG_CLK_SOURCE		(1 << 2)
+#define v_REG_CLK_SOURCE_TMDS		(0 << 2)
+#define v_REG_CLK_SOURCE_SYS		(1 << 2)
+#define m_POWER				(1 << 1)
+#define v_PWR_ON			(0 << 1)
+#define v_PWR_OFF			(1 << 1)
+#define m_INT_POL			(1 << 0)
+#define v_INT_POL_HIGH			1
+#define v_INT_POL_LOW			0
+
+#define HDMI_VIDEO_CONTRL1		0x01
+#define m_VIDEO_INPUT_FORMAT		(7 << 1)
+#define m_DE_SOURCE			(1 << 0)
+#define v_VIDEO_INPUT_FORMAT(n)		(n << 1)
+#define v_DE_EXTERNAL			1
+#define v_DE_INTERNAL			0
+enum {
+	VIDEO_INPUT_SDR_RGB444 = 0,
+	VIDEO_INPUT_DDR_RGB444 = 5,
+	VIDEO_INPUT_DDR_YCBCR422 = 6
+};
+
+#define HDMI_VIDEO_CONTRL2		0x02
+#define m_VIDEO_OUTPUT_COLOR		(3 << 6)
+#define m_VIDEO_INPUT_BITS		(3 << 4)
+#define m_VIDEO_INPUT_CSP		(1 << 0)
+#define v_VIDEO_OUTPUT_COLOR(n)		(((n) & 0x3) << 6)
+#define v_VIDEO_INPUT_BITS(n)		(n << 4)
+#define v_VIDEO_INPUT_CSP(n)		(n << 0)
+enum {
+	VIDEO_INPUT_12BITS = 0,
+	VIDEO_INPUT_10BITS = 1,
+	VIDEO_INPUT_REVERT = 2,
+	VIDEO_INPUT_8BITS = 3,
+};
+
+#define HDMI_VIDEO_CONTRL		0x03
+#define m_VIDEO_AUTO_CSC		(1 << 7)
+#define v_VIDEO_AUTO_CSC(n)		(n << 7)
+#define m_VIDEO_C0_C2_SWAP		(1 << 0)
+#define v_VIDEO_C0_C2_SWAP(n)		(n << 0)
+enum {
+	C0_C2_CHANGE_ENABLE = 0,
+	C0_C2_CHANGE_DISABLE = 1,
+	AUTO_CSC_DISABLE = 0,
+	AUTO_CSC_ENABLE = 1,
+};
+
+#define HDMI_VIDEO_CONTRL3		0x04
+#define m_COLOR_DEPTH_NOT_INDICATED	(1 << 4)
+#define m_SOF				(1 << 3)
+#define m_COLOR_RANGE			(1 << 2)
+#define m_CSC				(1 << 0)
+#define v_COLOR_DEPTH_NOT_INDICATED(n)	((n) << 4)
+#define v_SOF_ENABLE			(0 << 3)
+#define v_SOF_DISABLE			(1 << 3)
+#define v_COLOR_RANGE_FULL		(1 << 2)
+#define v_COLOR_RANGE_LIMITED		(0 << 2)
+#define v_CSC_ENABLE			1
+#define v_CSC_DISABLE			0
+
+#define HDMI_AV_MUTE			0x05
+#define m_AVMUTE_CLEAR			(1 << 7)
+#define m_AVMUTE_ENABLE			(1 << 6)
+#define m_AUDIO_MUTE			(1 << 1)
+#define m_VIDEO_BLACK			(1 << 0)
+#define v_AVMUTE_CLEAR(n)		(n << 7)
+#define v_AVMUTE_ENABLE(n)		(n << 6)
+#define v_AUDIO_MUTE(n)			(n << 1)
+#define v_VIDEO_MUTE(n)			(n << 0)
+
+#define HDMI_VIDEO_TIMING_CTL		0x08
+#define v_HSYNC_POLARITY(n)		(n << 3)
+#define v_VSYNC_POLARITY(n)		(n << 2)
+#define v_INETLACE(n)			(n << 1)
+#define v_EXTERANL_VIDEO(n)		(n << 0)
+
+#define HDMI_VIDEO_EXT_HTOTAL_L		0x09
+#define HDMI_VIDEO_EXT_HTOTAL_H		0x0a
+#define HDMI_VIDEO_EXT_HBLANK_L		0x0b
+#define HDMI_VIDEO_EXT_HBLANK_H		0x0c
+#define HDMI_VIDEO_EXT_HDELAY_L		0x0d
+#define HDMI_VIDEO_EXT_HDELAY_H		0x0e
+#define HDMI_VIDEO_EXT_HDURATION_L	0x0f
+#define HDMI_VIDEO_EXT_HDURATION_H	0x10
+#define HDMI_VIDEO_EXT_VTOTAL_L		0x11
+#define HDMI_VIDEO_EXT_VTOTAL_H		0x12
+#define HDMI_VIDEO_EXT_VBLANK		0x13
+#define HDMI_VIDEO_EXT_VDELAY		0x14
+#define HDMI_VIDEO_EXT_VDURATION	0x15
+
+#define HDMI_VIDEO_CSC_COEF		0x18
+
+#define HDMI_AUDIO_CTRL1		0x35
+enum {
+	CTS_SOURCE_INTERNAL = 0,
+	CTS_SOURCE_EXTERNAL = 1,
+};
+#define v_CTS_SOURCE(n)			(n << 7)
+
+enum {
+	DOWNSAMPLE_DISABLE = 0,
+	DOWNSAMPLE_1_2 = 1,
+	DOWNSAMPLE_1_4 = 2,
+};
+#define v_DOWN_SAMPLE(n)		(n << 5)
+
+enum {
+	AUDIO_SOURCE_IIS = 0,
+	AUDIO_SOURCE_SPDIF = 1,
+};
+#define v_AUDIO_SOURCE(n)		(n << 3)
+
+#define v_MCLK_ENABLE(n)		(n << 2)
+enum {
+	MCLK_128FS = 0,
+	MCLK_256FS = 1,
+	MCLK_384FS = 2,
+	MCLK_512FS = 3,
+};
+#define v_MCLK_RATIO(n)			(n)
+
+#define AUDIO_SAMPLE_RATE		0x37
+enum {
+	AUDIO_32K = 0x3,
+	AUDIO_441K = 0x0,
+	AUDIO_48K = 0x2,
+	AUDIO_882K = 0x8,
+	AUDIO_96K = 0xa,
+	AUDIO_1764K = 0xc,
+	AUDIO_192K = 0xe,
+};
+
+#define AUDIO_I2S_MODE			0x38
+enum {
+	I2S_CHANNEL_1_2 = 1,
+	I2S_CHANNEL_3_4 = 3,
+	I2S_CHANNEL_5_6 = 7,
+	I2S_CHANNEL_7_8 = 0xf
+};
+#define v_I2S_CHANNEL(n)		((n) << 2)
+enum {
+	I2S_STANDARD = 0,
+	I2S_LEFT_JUSTIFIED = 1,
+	I2S_RIGHT_JUSTIFIED = 2,
+};
+#define v_I2S_MODE(n)			(n)
+
+#define AUDIO_I2S_MAP			0x39
+#define AUDIO_I2S_SWAPS_SPDIF		0x3a
+#define v_SPIDF_FREQ(n)			(n)
+
+#define N_32K				0x1000
+#define N_441K				0x1880
+#define N_882K				0x3100
+#define N_1764K				0x6200
+#define N_48K				0x1800
+#define N_96K				0x3000
+#define N_192K				0x6000
+
+#define HDMI_AUDIO_CHANNEL_STATUS	0x3e
+#define m_AUDIO_STATUS_NLPCM		(1 << 7)
+#define m_AUDIO_STATUS_USE		(1 << 6)
+#define m_AUDIO_STATUS_COPYRIGHT	(1 << 5)
+#define m_AUDIO_STATUS_ADDITION		(3 << 2)
+#define m_AUDIO_STATUS_CLK_ACCURACY	(2 << 0)
+#define v_AUDIO_STATUS_NLPCM(n)		((n & 1) << 7)
+#define AUDIO_N_H			0x3f
+#define AUDIO_N_M			0x40
+#define AUDIO_N_L			0x41
+
+#define HDMI_AUDIO_CTS_H		0x45
+#define HDMI_AUDIO_CTS_M		0x46
+#define HDMI_AUDIO_CTS_L		0x47
+
+#define HDMI_DDC_CLK_L			0x4b
+#define HDMI_DDC_CLK_H			0x4c
+
+#define HDMI_EDID_SEGMENT_POINTER	0x4d
+#define HDMI_EDID_WORD_ADDR		0x4e
+#define HDMI_EDID_FIFO_OFFSET		0x4f
+#define HDMI_EDID_FIFO_ADDR		0x50
+
+#define HDMI_PACKET_SEND_MANUAL		0x9c
+#define HDMI_PACKET_SEND_AUTO		0x9d
+#define m_PACKET_GCP_EN			(1 << 7)
+#define m_PACKET_MSI_EN			(1 << 6)
+#define m_PACKET_SDI_EN			(1 << 5)
+#define m_PACKET_VSI_EN			(1 << 4)
+#define v_PACKET_GCP_EN(n)		((n & 1) << 7)
+#define v_PACKET_MSI_EN(n)		((n & 1) << 6)
+#define v_PACKET_SDI_EN(n)		((n & 1) << 5)
+#define v_PACKET_VSI_EN(n)		((n & 1) << 4)
+
+#define HDMI_CONTROL_PACKET_BUF_INDEX	0x9f
+enum {
+	INFOFRAME_VSI = 0x05,
+	INFOFRAME_AVI = 0x06,
+	INFOFRAME_AAI = 0x08,
+};
+
+#define HDMI_CONTROL_PACKET_ADDR	0xa0
+#define HDMI_MAXIMUM_INFO_FRAME_SIZE	0x11
+enum {
+	AVI_COLOR_MODE_RGB = 0,
+	AVI_COLOR_MODE_YCBCR422 = 1,
+	AVI_COLOR_MODE_YCBCR444 = 2,
+	AVI_COLORIMETRY_NO_DATA = 0,
+
+	AVI_COLORIMETRY_SMPTE_170M = 1,
+	AVI_COLORIMETRY_ITU709 = 2,
+	AVI_COLORIMETRY_EXTENDED = 3,
+
+	AVI_CODED_FRAME_ASPECT_NO_DATA = 0,
+	AVI_CODED_FRAME_ASPECT_4_3 = 1,
+	AVI_CODED_FRAME_ASPECT_16_9 = 2,
+
+	ACTIVE_ASPECT_RATE_SAME_AS_CODED_FRAME = 0x08,
+	ACTIVE_ASPECT_RATE_4_3 = 0x09,
+	ACTIVE_ASPECT_RATE_16_9 = 0x0A,
+	ACTIVE_ASPECT_RATE_14_9 = 0x0B,
+};
+
+#define HDMI_HDCP_CTRL			0x52
+#define m_HDMI_DVI			(1 << 1)
+#define v_HDMI_DVI(n)			(n << 1)
+
+#define HDMI_INTERRUPT_MASK1		0xc0
+#define HDMI_INTERRUPT_STATUS1		0xc1
+#define	m_INT_ACTIVE_VSYNC		(1 << 5)
+#define m_INT_EDID_READY		(1 << 2)
+
+#define HDMI_INTERRUPT_MASK2		0xc2
+#define HDMI_INTERRUPT_STATUS2		0xc3
+#define m_INT_HDCP_ERR			(1 << 7)
+#define m_INT_BKSV_FLAG			(1 << 6)
+#define m_INT_HDCP_OK			(1 << 4)
+
+#define HDMI_STATUS			0xc8
+#define m_HOTPLUG			(1 << 7)
+#define m_MASK_INT_HOTPLUG		(1 << 5)
+#define m_INT_HOTPLUG			(1 << 1)
+#define v_MASK_INT_HOTPLUG(n)		((n & 0x1) << 5)
+
+#define HDMI_COLORBAR                   0xc9
+
+#define HDMI_PHY_SYNC			0xce
+#define HDMI_PHY_SYS_CTL		0xe0
+#define m_TMDS_CLK_SOURCE		(1 << 5)
+#define v_TMDS_FROM_PLL			(0 << 5)
+#define v_TMDS_FROM_GEN			(1 << 5)
+#define m_PHASE_CLK			(1 << 4)
+#define v_DEFAULT_PHASE			(0 << 4)
+#define v_SYNC_PHASE			(1 << 4)
+#define m_TMDS_CURRENT_PWR		(1 << 3)
+#define v_TURN_ON_CURRENT		(0 << 3)
+#define v_CAT_OFF_CURRENT		(1 << 3)
+#define m_BANDGAP_PWR			(1 << 2)
+#define v_BANDGAP_PWR_UP		(0 << 2)
+#define v_BANDGAP_PWR_DOWN		(1 << 2)
+#define m_PLL_PWR			(1 << 1)
+#define v_PLL_PWR_UP			(0 << 1)
+#define v_PLL_PWR_DOWN			(1 << 1)
+#define m_TMDS_CHG_PWR			(1 << 0)
+#define v_TMDS_CHG_PWR_UP		(0 << 0)
+#define v_TMDS_CHG_PWR_DOWN		(1 << 0)
+
+#define HDMI_PHY_CHG_PWR		0xe1
+#define v_CLK_CHG_PWR(n)		((n & 1) << 3)
+#define v_DATA_CHG_PWR(n)		((n & 7) << 0)
+
+#define HDMI_PHY_DRIVER			0xe2
+#define v_CLK_MAIN_DRIVER(n)		(n << 4)
+#define v_DATA_MAIN_DRIVER(n)		(n << 0)
+
+#define HDMI_PHY_PRE_EMPHASIS		0xe3
+#define v_PRE_EMPHASIS(n)		((n & 7) << 4)
+#define v_CLK_PRE_DRIVER(n)		((n & 3) << 2)
+#define v_DATA_PRE_DRIVER(n)		((n & 3) << 0)
+
+#define HDMI_PHY_FEEDBACK_DIV_RATIO_LOW		0xe7
+#define v_FEEDBACK_DIV_LOW(n)			(n & 0xff)
+#define HDMI_PHY_FEEDBACK_DIV_RATIO_HIGH	0xe8
+#define v_FEEDBACK_DIV_HIGH(n)			(n & 1)
+
+#define HDMI_PHY_PRE_DIV_RATIO		0xed
+#define v_PRE_DIV_RATIO(n)		(n & 0x1f)
+
+#define HDMI_CEC_CTRL			0xd0
+#define m_ADJUST_FOR_HISENSE		(1 << 6)
+#define m_REJECT_RX_BROADCAST		(1 << 5)
+#define m_BUSFREETIME_ENABLE		(1 << 2)
+#define m_REJECT_RX			(1 << 1)
+#define m_START_TX			(1 << 0)
+
+#define HDMI_CEC_DATA			0xd1
+#define HDMI_CEC_TX_OFFSET		0xd2
+#define HDMI_CEC_RX_OFFSET		0xd3
+#define HDMI_CEC_CLK_H			0xd4
+#define HDMI_CEC_CLK_L			0xd5
+#define HDMI_CEC_TX_LENGTH		0xd6
+#define HDMI_CEC_RX_LENGTH		0xd7
+#define HDMI_CEC_TX_INT_MASK		0xd8
+#define m_TX_DONE			(1 << 3)
+#define m_TX_NOACK			(1 << 2)
+#define m_TX_BROADCAST_REJ		(1 << 1)
+#define m_TX_BUSNOTFREE			(1 << 0)
+
+#define HDMI_CEC_RX_INT_MASK		0xd9
+#define m_RX_LA_ERR			(1 << 4)
+#define m_RX_GLITCH			(1 << 3)
+#define m_RX_DONE			(1 << 0)
+
+#define HDMI_CEC_TX_INT			0xda
+#define HDMI_CEC_RX_INT			0xdb
+#define HDMI_CEC_BUSFREETIME_L		0xdc
+#define HDMI_CEC_BUSFREETIME_H		0xdd
+#define HDMI_CEC_LOGICADDR		0xde
+
+#endif /* __INNO_HDMI_H__ */
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index a0d51ccb6ea4..896da09e49ee 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -384,36 +384,6 @@ static const struct dev_pm_ops rockchip_drm_pm_ops = {
 				rockchip_drm_sys_resume)
 };
 
-/*
- * @node: device tree node containing encoder input ports
- * @encoder: drm_encoder
- */
-int rockchip_drm_encoder_get_mux_id(struct device_node *node,
-				    struct drm_encoder *encoder)
-{
-	struct device_node *ep;
-	struct drm_crtc *crtc = encoder->crtc;
-	struct of_endpoint endpoint;
-	struct device_node *port;
-	int ret;
-
-	if (!node || !crtc)
-		return -EINVAL;
-
-	for_each_endpoint_of_node(node, ep) {
-		port = of_graph_get_remote_port(ep);
-		of_node_put(port);
-		if (port == crtc->port) {
-			ret = of_graph_parse_endpoint(ep, &endpoint);
-			of_node_put(ep);
-			return ret ?: endpoint.id;
-		}
-	}
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(rockchip_drm_encoder_get_mux_id);
-
 static int compare_of(struct device *dev, void *data)
 {
 	struct device_node *np = data;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index bb8b076f1dbb..3529f692edb8 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -67,8 +67,6 @@ void rockchip_drm_atomic_work(struct work_struct *work);
 int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
 				 const struct rockchip_crtc_funcs *crtc_funcs);
 void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc);
-int rockchip_drm_encoder_get_mux_id(struct device_node *node,
-				    struct drm_encoder *encoder);
 int rockchip_drm_crtc_mode_config(struct drm_crtc *crtc, int connector_type,
 				  int out_mode);
 int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
index db0763794edc..88643ab160bf 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
@@ -359,13 +359,6 @@ static void shmob_drm_crtc_dpms(struct drm_crtc *crtc, int mode)
 	scrtc->dpms = mode;
 }
 
-static bool shmob_drm_crtc_mode_fixup(struct drm_crtc *crtc,
-				      const struct drm_display_mode *mode,
-				      struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void shmob_drm_crtc_mode_prepare(struct drm_crtc *crtc)
 {
 	shmob_drm_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
@@ -431,33 +424,12 @@ static int shmob_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 
 static const struct drm_crtc_helper_funcs crtc_helper_funcs = {
 	.dpms = shmob_drm_crtc_dpms,
-	.mode_fixup = shmob_drm_crtc_mode_fixup,
 	.prepare = shmob_drm_crtc_mode_prepare,
 	.commit = shmob_drm_crtc_mode_commit,
 	.mode_set = shmob_drm_crtc_mode_set,
 	.mode_set_base = shmob_drm_crtc_mode_set_base,
 };
 
-void shmob_drm_crtc_cancel_page_flip(struct shmob_drm_crtc *scrtc,
-				     struct drm_file *file)
-{
-	struct drm_pending_vblank_event *event;
-	struct drm_device *dev = scrtc->crtc.dev;
-	unsigned long flags;
-
-	/* Destroy the pending vertical blanking event associated with the
-	 * pending page flip, if any, and disable vertical blanking interrupts.
-	 */
-	spin_lock_irqsave(&dev->event_lock, flags);
-	event = scrtc->event;
-	if (event && event->base.file_priv == file) {
-		scrtc->event = NULL;
-		event->base.destroy(&event->base);
-		drm_vblank_put(dev, 0);
-	}
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-}
-
 void shmob_drm_crtc_finish_page_flip(struct shmob_drm_crtc *scrtc)
 {
 	struct drm_pending_vblank_event *event;
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.h b/drivers/gpu/drm/shmobile/shmob_drm_crtc.h
index eddad6dcc88a..38ed4ff8aaf2 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.h
@@ -47,8 +47,6 @@ struct shmob_drm_connector {
 
 int shmob_drm_crtc_create(struct shmob_drm_device *sdev);
 void shmob_drm_crtc_enable_vblank(struct shmob_drm_device *sdev, bool enable);
-void shmob_drm_crtc_cancel_page_flip(struct shmob_drm_crtc *scrtc,
-				     struct drm_file *file);
 void shmob_drm_crtc_finish_page_flip(struct shmob_drm_crtc *scrtc);
 void shmob_drm_crtc_suspend(struct shmob_drm_crtc *scrtc);
 void shmob_drm_crtc_resume(struct shmob_drm_crtc *scrtc);
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
index 04e66e3751b4..7700ff172079 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
@@ -200,13 +200,6 @@ done:
 	return ret;
 }
 
-static void shmob_drm_preclose(struct drm_device *dev, struct drm_file *file)
-{
-	struct shmob_drm_device *sdev = dev->dev_private;
-
-	shmob_drm_crtc_cancel_page_flip(&sdev->crtc, file);
-}
-
 static irqreturn_t shmob_drm_irq(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
@@ -266,7 +259,6 @@ static struct drm_driver shmob_drm_driver = {
 				| DRIVER_PRIME,
 	.load			= shmob_drm_load,
 	.unload			= shmob_drm_unload,
-	.preclose		= shmob_drm_preclose,
 	.set_busid		= drm_platform_set_busid,
 	.irq_handler		= shmob_drm_irq,
 	.get_vblank_counter	= drm_vblank_no_hw_counter,
diff --git a/drivers/gpu/drm/sti/sti_awg_utils.c b/drivers/gpu/drm/sti/sti_awg_utils.c
index 00d0698be9d3..a516eb869f6f 100644
--- a/drivers/gpu/drm/sti/sti_awg_utils.c
+++ b/drivers/gpu/drm/sti/sti_awg_utils.c
@@ -7,6 +7,7 @@
 #include "sti_awg_utils.h"
 
 #define AWG_OPCODE_OFFSET 10
+#define AWG_MAX_ARG       0x3ff
 
 enum opcode {
 	SET,
@@ -34,6 +35,8 @@ static int awg_generate_instr(enum opcode opcode,
 	/* skip, repeat and replay arg should not exceed 1023.
 	 * If user wants to exceed this value, the instruction should be
 	 * duplicate and arg should be adjust for each duplicated instruction.
+	 *
+	 * mux_sel is used in case of SAV/EAV synchronization.
 	 */
 
 	while (arg_tmp > 0) {
@@ -65,7 +68,7 @@ static int awg_generate_instr(enum opcode opcode,
 
 			mux = 0;
 			data_enable = 0;
-			arg &= (0x3ff);
+			arg &= AWG_MAX_ARG;
 			break;
 		case REPEAT:
 		case REPLAY:
@@ -76,13 +79,13 @@ static int awg_generate_instr(enum opcode opcode,
 
 			mux = 0;
 			data_enable = 0;
-			arg &= (0x3ff);
+			arg &= AWG_MAX_ARG;
 			break;
 		case JUMP:
 			mux = 0;
 			data_enable = 0;
 			arg |= 0x40; /* for jump instruction 7th bit is 1 */
-			arg &= 0x3ff;
+			arg &= AWG_MAX_ARG;
 			break;
 		case STOP:
 			arg = 0;
@@ -110,68 +113,75 @@ static int awg_generate_instr(enum opcode opcode,
 	return 0;
 }
 
-int sti_awg_generate_code_data_enable_mode(
+static int awg_generate_line_signal(
 		struct awg_code_generation_params *fwparams,
 		struct awg_timing *timing)
 {
 	long int val;
-	long int data_en;
 	int ret = 0;
 
-	if (timing->trailing_lines > 0) {
-		/* skip trailing lines */
-		val = timing->blanking_level;
-		data_en = 0;
-		ret |= awg_generate_instr(RPLSET, val, 0, data_en, fwparams);
-
-		val = timing->trailing_lines - 1;
-		data_en = 0;
-		ret |= awg_generate_instr(REPLAY, val, 0, data_en, fwparams);
-	}
-
 	if (timing->trailing_pixels > 0) {
 		/* skip trailing pixel */
 		val = timing->blanking_level;
-		data_en = 0;
-		ret |= awg_generate_instr(RPLSET, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(RPLSET, val, 0, 0, fwparams);
 
 		val = timing->trailing_pixels - 1;
-		data_en = 0;
-		ret |= awg_generate_instr(SKIP, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(SKIP, val, 0, 0, fwparams);
 	}
 
 	/* set DE signal high */
 	val = timing->blanking_level;
-	data_en = 1;
 	ret |= awg_generate_instr((timing->trailing_pixels > 0) ? SET : RPLSET,
-			val, 0, data_en, fwparams);
+			val, 0, 1, fwparams);
 
 	if (timing->blanking_pixels > 0) {
 		/* skip the number of active pixel */
 		val = timing->active_pixels - 1;
-		data_en = 1;
-		ret |= awg_generate_instr(SKIP, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(SKIP, val, 0, 1, fwparams);
 
 		/* set DE signal low */
 		val = timing->blanking_level;
-		data_en = 0;
-		ret |= awg_generate_instr(SET, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(SET, val, 0, 0, fwparams);
+	}
+
+	return ret;
+}
+
+int sti_awg_generate_code_data_enable_mode(
+		struct awg_code_generation_params *fwparams,
+		struct awg_timing *timing)
+{
+	long int val, tmp_val;
+	int ret = 0;
+
+	if (timing->trailing_lines > 0) {
+		/* skip trailing lines */
+		val = timing->blanking_level;
+		ret |= awg_generate_instr(RPLSET, val, 0, 0, fwparams);
+
+		val = timing->trailing_lines - 1;
+		ret |= awg_generate_instr(REPLAY, val, 0, 0, fwparams);
 	}
 
-	/* replay the sequence as many active lines defined */
-	val = timing->active_lines - 1;
-	data_en = 0;
-	ret |= awg_generate_instr(REPLAY, val, 0, data_en, fwparams);
+	tmp_val = timing->active_lines - 1;
+
+	while (tmp_val > 0) {
+		/* generate DE signal for each line */
+		ret |= awg_generate_line_signal(fwparams, timing);
+		/* replay the sequence as many active lines defined */
+		ret |= awg_generate_instr(REPLAY,
+					  min_t(int, AWG_MAX_ARG, tmp_val),
+					  0, 0, fwparams);
+		tmp_val -= AWG_MAX_ARG;
+	}
 
 	if (timing->blanking_lines > 0) {
 		/* skip blanking lines */
 		val = timing->blanking_level;
-		data_en = 0;
-		ret |= awg_generate_instr(RPLSET, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(RPLSET, val, 0, 0, fwparams);
 
 		val = timing->blanking_lines - 1;
-		data_en = 0;
-		ret |= awg_generate_instr(REPLAY, val, 0, data_en, fwparams);
+		ret |= awg_generate_instr(REPLAY, val, 0, 0, fwparams);
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/sti/sti_compositor.c b/drivers/gpu/drm/sti/sti_compositor.c
index afed2171beb9..3d2fa3ab33df 100644
--- a/drivers/gpu/drm/sti/sti_compositor.c
+++ b/drivers/gpu/drm/sti/sti_compositor.c
@@ -75,13 +75,13 @@ static int sti_compositor_bind(struct device *dev,
 		switch (desc[i].type) {
 		case STI_VID_SUBDEV:
 			compo->vid[vid_id++] =
-			    sti_vid_create(compo->dev, desc[i].id,
+			    sti_vid_create(compo->dev, drm_dev, desc[i].id,
 					   compo->regs + desc[i].offset);
 			break;
 		case STI_MIXER_MAIN_SUBDEV:
 		case STI_MIXER_AUX_SUBDEV:
 			compo->mixer[mixer_id++] =
-			    sti_mixer_create(compo->dev, desc[i].id,
+			    sti_mixer_create(compo->dev, drm_dev, desc[i].id,
 					     compo->regs + desc[i].offset);
 			break;
 		case STI_GPD_SUBDEV:
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index de11c7cfb02f..505620c7c2c8 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -56,6 +56,7 @@ static bool sti_crtc_mode_fixup(struct drm_crtc *crtc,
 				struct drm_display_mode *adjusted_mode)
 {
 	/* accept the provided drm_display_mode, do not fix it up */
+	drm_mode_set_crtcinfo(adjusted_mode, CRTC_INTERLACE_HALVE_V);
 	return true;
 }
 
diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c
index bd736ace3f81..3abb400151ac 100644
--- a/drivers/gpu/drm/sti/sti_cursor.c
+++ b/drivers/gpu/drm/sti/sti_cursor.c
@@ -5,12 +5,10 @@
  *          for STMicroelectronics.
  * License terms:  GNU General Public License (GPL), version 2
  */
-#include <drm/drmP.h>
 
-#include <drm/drm_atomic_helper.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
-#include <drm/drm_plane_helper.h>
 
 #include "sti_compositor.h"
 #include "sti_cursor.h"
@@ -74,6 +72,82 @@ static const uint32_t cursor_supported_formats[] = {
 
 #define to_sti_cursor(x) container_of(x, struct sti_cursor, plane)
 
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(cursor->regs + reg))
+
+static void cursor_dbg_vpo(struct seq_file *s, u32 val)
+{
+	seq_printf(s, "\txdo:%4d\tydo:%4d", val & 0x0FFF, (val >> 16) & 0x0FFF);
+}
+
+static void cursor_dbg_size(struct seq_file *s, u32 val)
+{
+	seq_printf(s, "\t%d x %d", val & 0x07FF, (val >> 16) & 0x07FF);
+}
+
+static void cursor_dbg_pml(struct seq_file *s,
+			   struct sti_cursor *cursor, u32 val)
+{
+	if (cursor->pixmap.paddr == val)
+		seq_printf(s, "\tVirt @: %p", cursor->pixmap.base);
+}
+
+static void cursor_dbg_cml(struct seq_file *s,
+			   struct sti_cursor *cursor, u32 val)
+{
+	if (cursor->clut_paddr == val)
+		seq_printf(s, "\tVirt @: %p", cursor->clut);
+}
+
+static int cursor_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_cursor *cursor = (struct sti_cursor *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s: (vaddr = 0x%p)",
+		   sti_plane_to_str(&cursor->plane), cursor->regs);
+
+	DBGFS_DUMP(CUR_CTL);
+	DBGFS_DUMP(CUR_VPO);
+	cursor_dbg_vpo(s, readl(cursor->regs + CUR_VPO));
+	DBGFS_DUMP(CUR_PML);
+	cursor_dbg_pml(s, cursor, readl(cursor->regs + CUR_PML));
+	DBGFS_DUMP(CUR_PMP);
+	DBGFS_DUMP(CUR_SIZE);
+	cursor_dbg_size(s, readl(cursor->regs + CUR_SIZE));
+	DBGFS_DUMP(CUR_CML);
+	cursor_dbg_cml(s, cursor, readl(cursor->regs + CUR_CML));
+	DBGFS_DUMP(CUR_AWS);
+	DBGFS_DUMP(CUR_AWE);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list cursor_debugfs_files[] = {
+	{ "cursor", cursor_dbg_show, 0, NULL },
+};
+
+static int cursor_debugfs_init(struct sti_cursor *cursor,
+			       struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(cursor_debugfs_files); i++)
+		cursor_debugfs_files[i].data = cursor;
+
+	return drm_debugfs_create_files(cursor_debugfs_files,
+					ARRAY_SIZE(cursor_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
 static void sti_cursor_argb8888_to_clut8(struct sti_cursor *cursor, u32 *src)
 {
 	u8  *dst = cursor->pixmap.base;
@@ -110,35 +184,31 @@ static void sti_cursor_init(struct sti_cursor *cursor)
 						  (b * 5);
 }
 
-static void sti_cursor_atomic_update(struct drm_plane *drm_plane,
-				     struct drm_plane_state *oldstate)
+static int sti_cursor_atomic_check(struct drm_plane *drm_plane,
+				   struct drm_plane_state *state)
 {
-	struct drm_plane_state *state = drm_plane->state;
 	struct sti_plane *plane = to_sti_plane(drm_plane);
 	struct sti_cursor *cursor = to_sti_cursor(plane);
 	struct drm_crtc *crtc = state->crtc;
-	struct sti_mixer *mixer = to_sti_mixer(crtc);
 	struct drm_framebuffer *fb = state->fb;
-	struct drm_display_mode *mode = &crtc->mode;
-	int dst_x = state->crtc_x;
-	int dst_y = state->crtc_y;
-	int dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
-	int dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
+	struct drm_crtc_state *crtc_state;
+	struct drm_display_mode *mode;
+	int dst_x, dst_y, dst_w, dst_h;
+	int src_w, src_h;
+
+	/* no need for further checks if the plane is being disabled */
+	if (!crtc || !fb)
+		return 0;
+
+	crtc_state = drm_atomic_get_crtc_state(state->state, crtc);
+	mode = &crtc_state->mode;
+	dst_x = state->crtc_x;
+	dst_y = state->crtc_y;
+	dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
+	dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
 	/* src_x are in 16.16 format */
-	int src_w = state->src_w >> 16;
-	int src_h = state->src_h >> 16;
-	bool first_prepare = plane->status == STI_PLANE_DISABLED ? true : false;
-	struct drm_gem_cma_object *cma_obj;
-	u32 y, x;
-	u32 val;
-
-	DRM_DEBUG_KMS("CRTC:%d (%s) drm plane:%d (%s)\n",
-		      crtc->base.id, sti_mixer_to_str(mixer),
-		      drm_plane->base.id, sti_plane_to_str(plane));
-	DRM_DEBUG_KMS("(%dx%d)@(%d,%d)\n", dst_w, dst_h, dst_x, dst_y);
-
-	dev_dbg(cursor->dev, "%s %s\n", __func__,
-		sti_plane_to_str(plane));
+	src_w = state->src_w >> 16;
+	src_h = state->src_h >> 16;
 
 	if (src_w < STI_CURS_MIN_SIZE ||
 	    src_h < STI_CURS_MIN_SIZE ||
@@ -146,7 +216,7 @@ static void sti_cursor_atomic_update(struct drm_plane *drm_plane,
 	    src_h > STI_CURS_MAX_SIZE) {
 		DRM_ERROR("Invalid cursor size (%dx%d)\n",
 				src_w, src_h);
-		return;
+		return -EINVAL;
 	}
 
 	/* If the cursor size has changed, re-allocated the pixmap */
@@ -168,16 +238,46 @@ static void sti_cursor_atomic_update(struct drm_plane *drm_plane,
 						   GFP_KERNEL | GFP_DMA);
 		if (!cursor->pixmap.base) {
 			DRM_ERROR("Failed to allocate memory for pixmap\n");
-			return;
+			return -EINVAL;
 		}
 	}
 
-	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
-	if (!cma_obj) {
+	if (!drm_fb_cma_get_gem_obj(fb, 0)) {
 		DRM_ERROR("Can't get CMA GEM object for fb\n");
-		return;
+		return -EINVAL;
 	}
 
+	DRM_DEBUG_KMS("CRTC:%d (%s) drm plane:%d (%s)\n",
+		      crtc->base.id, sti_mixer_to_str(to_sti_mixer(crtc)),
+		      drm_plane->base.id, sti_plane_to_str(plane));
+	DRM_DEBUG_KMS("(%dx%d)@(%d,%d)\n", dst_w, dst_h, dst_x, dst_y);
+
+	return 0;
+}
+
+static void sti_cursor_atomic_update(struct drm_plane *drm_plane,
+				     struct drm_plane_state *oldstate)
+{
+	struct drm_plane_state *state = drm_plane->state;
+	struct sti_plane *plane = to_sti_plane(drm_plane);
+	struct sti_cursor *cursor = to_sti_cursor(plane);
+	struct drm_crtc *crtc = state->crtc;
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_display_mode *mode;
+	int dst_x, dst_y;
+	struct drm_gem_cma_object *cma_obj;
+	u32 y, x;
+	u32 val;
+
+	if (!crtc || !fb)
+		return;
+
+	mode = &crtc->mode;
+	dst_x = state->crtc_x;
+	dst_y = state->crtc_y;
+
+	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
+
 	/* Convert ARGB8888 to CLUT8 */
 	sti_cursor_argb8888_to_clut8(cursor, (u32 *)cma_obj->vaddr);
 
@@ -191,21 +291,21 @@ static void sti_cursor_atomic_update(struct drm_plane *drm_plane,
 	val = y << 16 | x;
 	writel(val, cursor->regs + CUR_AWE);
 
-	if (first_prepare) {
-		/* Set and fetch CLUT */
-		writel(cursor->clut_paddr, cursor->regs + CUR_CML);
-		writel(CUR_CTL_CLUT_UPDATE, cursor->regs + CUR_CTL);
-	}
-
 	/* Set memory location, size, and position */
 	writel(cursor->pixmap.paddr, cursor->regs + CUR_PML);
 	writel(cursor->width, cursor->regs + CUR_PMP);
 	writel(cursor->height << 16 | cursor->width, cursor->regs + CUR_SIZE);
 
 	y = sti_vtg_get_line_number(*mode, dst_y);
-	x = sti_vtg_get_pixel_number(*mode, dst_y);
+	x = sti_vtg_get_pixel_number(*mode, dst_x);
 	writel((y << 16) | x, cursor->regs + CUR_VPO);
 
+	/* Set and fetch CLUT */
+	writel(cursor->clut_paddr, cursor->regs + CUR_CML);
+	writel(CUR_CTL_CLUT_UPDATE, cursor->regs + CUR_CTL);
+
+	sti_plane_update_fps(plane, true, false);
+
 	plane->status = STI_PLANE_UPDATED;
 }
 
@@ -213,7 +313,6 @@ static void sti_cursor_atomic_disable(struct drm_plane *drm_plane,
 				      struct drm_plane_state *oldstate)
 {
 	struct sti_plane *plane = to_sti_plane(drm_plane);
-	struct sti_mixer *mixer = to_sti_mixer(drm_plane->crtc);
 
 	if (!drm_plane->crtc) {
 		DRM_DEBUG_DRIVER("drm plane:%d not enabled\n",
@@ -222,13 +321,15 @@ static void sti_cursor_atomic_disable(struct drm_plane *drm_plane,
 	}
 
 	DRM_DEBUG_DRIVER("CRTC:%d (%s) drm plane:%d (%s)\n",
-			 drm_plane->crtc->base.id, sti_mixer_to_str(mixer),
+			 drm_plane->crtc->base.id,
+			 sti_mixer_to_str(to_sti_mixer(drm_plane->crtc)),
 			 drm_plane->base.id, sti_plane_to_str(plane));
 
 	plane->status = STI_PLANE_DISABLING;
 }
 
 static const struct drm_plane_helper_funcs sti_cursor_helpers_funcs = {
+	.atomic_check = sti_cursor_atomic_check,
 	.atomic_update = sti_cursor_atomic_update,
 	.atomic_disable = sti_cursor_atomic_disable,
 };
@@ -281,6 +382,9 @@ struct drm_plane *sti_cursor_create(struct drm_device *drm_dev,
 
 	sti_plane_init_property(&cursor->plane, DRM_PLANE_TYPE_CURSOR);
 
+	if (cursor_debugfs_init(cursor, drm_dev->primary))
+		DRM_ERROR("CURSOR debugfs setup failed\n");
+
 	return &cursor->plane.drm_plane;
 
 err_plane:
diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c
index 506b5626f3ed..6bd6abaa5a70 100644
--- a/drivers/gpu/drm/sti/sti_drv.c
+++ b/drivers/gpu/drm/sti/sti_drv.c
@@ -20,6 +20,7 @@
 
 #include "sti_crtc.h"
 #include "sti_drv.h"
+#include "sti_plane.h"
 
 #define DRIVER_NAME	"sti"
 #define DRIVER_DESC	"STMicroelectronics SoC DRM"
@@ -30,6 +31,130 @@
 #define STI_MAX_FB_HEIGHT	4096
 #define STI_MAX_FB_WIDTH	4096
 
+static int sti_drm_fps_get(void *data, u64 *val)
+{
+	struct drm_device *drm_dev = data;
+	struct drm_plane *p;
+	unsigned int i = 0;
+
+	*val = 0;
+	list_for_each_entry(p, &drm_dev->mode_config.plane_list, head) {
+		struct sti_plane *plane = to_sti_plane(p);
+
+		*val |= plane->fps_info.output << i;
+		i++;
+	}
+
+	return 0;
+}
+
+static int sti_drm_fps_set(void *data, u64 val)
+{
+	struct drm_device *drm_dev = data;
+	struct drm_plane *p;
+	unsigned int i = 0;
+
+	list_for_each_entry(p, &drm_dev->mode_config.plane_list, head) {
+		struct sti_plane *plane = to_sti_plane(p);
+
+		plane->fps_info.output = (val >> i) & 1;
+		i++;
+	}
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(sti_drm_fps_fops,
+			sti_drm_fps_get, sti_drm_fps_set, "%llu\n");
+
+static int sti_drm_fps_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_plane *p;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	list_for_each_entry(p, &dev->mode_config.plane_list, head) {
+		struct sti_plane *plane = to_sti_plane(p);
+
+		seq_printf(s, "%s%s\n",
+			   plane->fps_info.fps_str,
+			   plane->fps_info.fips_str);
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list sti_drm_dbg_list[] = {
+	{"fps_get", sti_drm_fps_dbg_show, 0},
+};
+
+static int sti_drm_debugfs_create(struct dentry *root,
+				  struct drm_minor *minor,
+				  const char *name,
+				  const struct file_operations *fops)
+{
+	struct drm_device *dev = minor->dev;
+	struct drm_info_node *node;
+	struct dentry *ent;
+
+	ent = debugfs_create_file(name, S_IRUGO | S_IWUSR, root, dev, fops);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	node = kmalloc(sizeof(*node), GFP_KERNEL);
+	if (!node) {
+		debugfs_remove(ent);
+		return -ENOMEM;
+	}
+
+	node->minor = minor;
+	node->dent = ent;
+	node->info_ent = (void *)fops;
+
+	mutex_lock(&minor->debugfs_lock);
+	list_add(&node->list, &minor->debugfs_list);
+	mutex_unlock(&minor->debugfs_lock);
+
+	return 0;
+}
+
+static int sti_drm_dbg_init(struct drm_minor *minor)
+{
+	int ret;
+
+	ret = drm_debugfs_create_files(sti_drm_dbg_list,
+				       ARRAY_SIZE(sti_drm_dbg_list),
+				       minor->debugfs_root, minor);
+	if (ret)
+		goto err;
+
+	ret = sti_drm_debugfs_create(minor->debugfs_root, minor, "fps_show",
+				     &sti_drm_fps_fops);
+	if (ret)
+		goto err;
+
+	DRM_INFO("%s: debugfs installed\n", DRIVER_NAME);
+	return 0;
+err:
+	DRM_ERROR("%s: cannot install debugfs\n", DRIVER_NAME);
+	return ret;
+}
+
+void sti_drm_dbg_cleanup(struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(sti_drm_dbg_list,
+				 ARRAY_SIZE(sti_drm_dbg_list), minor);
+
+	drm_debugfs_remove_files((struct drm_info_list *)&sti_drm_fps_fops,
+				 1, minor);
+}
+
 static void sti_atomic_schedule(struct sti_private *private,
 				struct drm_atomic_state *state)
 {
@@ -181,18 +306,9 @@ static const struct file_operations sti_driver_fops = {
 	.release = drm_release,
 };
 
-static struct dma_buf *sti_gem_prime_export(struct drm_device *dev,
-					    struct drm_gem_object *obj,
-					    int flags)
-{
-	/* we want to be able to write in mmapped buffer */
-	flags |= O_RDWR;
-	return drm_gem_prime_export(dev, obj, flags);
-}
-
 static struct drm_driver sti_driver = {
 	.driver_features = DRIVER_HAVE_IRQ | DRIVER_MODESET |
-	    DRIVER_GEM | DRIVER_PRIME,
+	    DRIVER_GEM | DRIVER_PRIME | DRIVER_ATOMIC,
 	.load = sti_load,
 	.gem_free_object = drm_gem_cma_free_object,
 	.gem_vm_ops = &drm_gem_cma_vm_ops,
@@ -207,7 +323,7 @@ static struct drm_driver sti_driver = {
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
-	.gem_prime_export = sti_gem_prime_export,
+	.gem_prime_export = drm_gem_prime_export,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
 	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
@@ -215,6 +331,9 @@ static struct drm_driver sti_driver = {
 	.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
 	.gem_prime_mmap = drm_gem_cma_prime_mmap,
 
+	.debugfs_init = sti_drm_dbg_init,
+	.debugfs_cleanup = sti_drm_dbg_cleanup,
+
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
 	.date = DRIVER_DATE,
diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c
index 45cbe2bf7dd6..25f76632002c 100644
--- a/drivers/gpu/drm/sti/sti_dvo.c
+++ b/drivers/gpu/drm/sti/sti_dvo.c
@@ -6,6 +6,7 @@
 
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/debugfs.h>
 #include <linux/module.h>
 #include <linux/of_gpio.h>
 #include <linux/platform_device.h>
@@ -156,6 +157,69 @@ static void dvo_awg_configure(struct sti_dvo *dvo, u32 *awg_ram_code, int nb)
 	writel(DVO_AWG_CTRL_EN, dvo->regs + DVO_AWG_DIGSYNC_CTRL);
 }
 
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(dvo->regs + reg))
+
+static void dvo_dbg_awg_microcode(struct seq_file *s, void __iomem *reg)
+{
+	unsigned int i;
+
+	seq_puts(s, "\n\n");
+	seq_puts(s, "  DVO AWG microcode:");
+	for (i = 0; i < AWG_MAX_INST; i++) {
+		if (i % 8 == 0)
+			seq_printf(s, "\n  %04X:", i);
+		seq_printf(s, " %04X", readl(reg + i * 4));
+	}
+}
+
+static int dvo_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_dvo *dvo = (struct sti_dvo *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "DVO: (vaddr = 0x%p)", dvo->regs);
+	DBGFS_DUMP(DVO_AWG_DIGSYNC_CTRL);
+	DBGFS_DUMP(DVO_DOF_CFG);
+	DBGFS_DUMP(DVO_LUT_PROG_LOW);
+	DBGFS_DUMP(DVO_LUT_PROG_MID);
+	DBGFS_DUMP(DVO_LUT_PROG_HIGH);
+	dvo_dbg_awg_microcode(s, dvo->regs + DVO_DIGSYNC_INSTR_I);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list dvo_debugfs_files[] = {
+	{ "dvo", dvo_dbg_show, 0, NULL },
+};
+
+static void dvo_debugfs_exit(struct sti_dvo *dvo, struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(dvo_debugfs_files,
+				 ARRAY_SIZE(dvo_debugfs_files),
+				 minor);
+}
+
+static int dvo_debugfs_init(struct sti_dvo *dvo, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(dvo_debugfs_files); i++)
+		dvo_debugfs_files[i].data = dvo;
+
+	return drm_debugfs_create_files(dvo_debugfs_files,
+					ARRAY_SIZE(dvo_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
 static void sti_dvo_disable(struct drm_bridge *bridge)
 {
 	struct sti_dvo *dvo = bridge->driver_private;
@@ -345,12 +409,14 @@ sti_dvo_connector_detect(struct drm_connector *connector, bool force)
 
 	DRM_DEBUG_DRIVER("\n");
 
-	if (!dvo->panel)
+	if (!dvo->panel) {
 		dvo->panel = of_drm_find_panel(dvo->panel_node);
+		if (dvo->panel)
+			drm_panel_attach(dvo->panel, connector);
+	}
 
 	if (dvo->panel)
-		if (!drm_panel_attach(dvo->panel, connector))
-			return connector_status_connected;
+		return connector_status_connected;
 
 	return connector_status_disconnected;
 }
@@ -453,6 +519,9 @@ static int sti_dvo_bind(struct device *dev, struct device *master, void *data)
 		goto err_sysfs;
 	}
 
+	if (dvo_debugfs_init(dvo, drm_dev->primary))
+		DRM_ERROR("DVO debugfs setup failed\n");
+
 	return 0;
 
 err_sysfs:
@@ -467,6 +536,9 @@ static void sti_dvo_unbind(struct device *dev,
 			   struct device *master, void *data)
 {
 	struct sti_dvo *dvo = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+
+	dvo_debugfs_exit(dvo, drm_dev->primary);
 
 	drm_bridge_remove(dvo->bridge);
 }
diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c
index 514551c857bb..ff3d3e7e7704 100644
--- a/drivers/gpu/drm/sti/sti_gdp.c
+++ b/drivers/gpu/drm/sti/sti_gdp.c
@@ -6,9 +6,7 @@
  * License terms:  GNU General Public License (GPL), version 2
  */
 
-#include <linux/clk.h>
-#include <linux/dma-mapping.h>
-
+#include <drm/drm_atomic.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 
@@ -32,10 +30,23 @@
 #define GDP_ABGR8888    (GDP_ARGB8888 | BIGNOTLITTLE | ALPHASWITCH)
 #define GDP_ARGB1555    0x06
 #define GDP_ARGB4444    0x07
-#define GDP_CLUT8       0x0B
-#define GDP_YCBR888     0x10
-#define GDP_YCBR422R    0x12
-#define GDP_AYCBR8888   0x15
+
+#define GDP2STR(fmt) { GDP_ ## fmt, #fmt }
+
+static struct gdp_format_to_str {
+	int format;
+	char name[20];
+} gdp_format_to_str[] = {
+		GDP2STR(RGB565),
+		GDP2STR(RGB888),
+		GDP2STR(RGB888_32),
+		GDP2STR(XBGR8888),
+		GDP2STR(ARGB8565),
+		GDP2STR(ARGB8888),
+		GDP2STR(ABGR8888),
+		GDP2STR(ARGB1555),
+		GDP2STR(ARGB4444)
+		};
 
 #define GAM_GDP_CTL_OFFSET      0x00
 #define GAM_GDP_AGC_OFFSET      0x04
@@ -97,6 +108,7 @@ struct sti_gdp_node_list {
  * @vtg_field_nb:       callback for VTG FIELD (top or bottom) notification
  * @is_curr_top:        true if the current node processed is the top field
  * @node_list:          array of node list
+ * @vtg:                registered vtg
  */
 struct sti_gdp {
 	struct sti_plane plane;
@@ -108,6 +120,7 @@ struct sti_gdp {
 	struct notifier_block vtg_field_nb;
 	bool is_curr_top;
 	struct sti_gdp_node_list node_list[GDP_NODE_NB_BANK];
+	struct sti_vtg *vtg;
 };
 
 #define to_sti_gdp(x) container_of(x, struct sti_gdp, plane)
@@ -121,12 +134,224 @@ static const uint32_t gdp_supported_formats[] = {
 	DRM_FORMAT_ARGB1555,
 	DRM_FORMAT_RGB565,
 	DRM_FORMAT_RGB888,
-	DRM_FORMAT_AYUV,
-	DRM_FORMAT_YUV444,
-	DRM_FORMAT_VYUY,
-	DRM_FORMAT_C8,
 };
 
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(gdp->regs + reg ## _OFFSET))
+
+static void gdp_dbg_ctl(struct seq_file *s, int val)
+{
+	int i;
+
+	seq_puts(s, "\tColor:");
+	for (i = 0; i < ARRAY_SIZE(gdp_format_to_str); i++) {
+		if (gdp_format_to_str[i].format == (val & 0x1F)) {
+			seq_printf(s, gdp_format_to_str[i].name);
+			break;
+		}
+	}
+	if (i == ARRAY_SIZE(gdp_format_to_str))
+		seq_puts(s, "<UNKNOWN>");
+
+	seq_printf(s, "\tWaitNextVsync:%d", val & WAIT_NEXT_VSYNC ? 1 : 0);
+}
+
+static void gdp_dbg_vpo(struct seq_file *s, int val)
+{
+	seq_printf(s, "\txdo:%4d\tydo:%4d", val & 0xFFFF, (val >> 16) & 0xFFFF);
+}
+
+static void gdp_dbg_vps(struct seq_file *s, int val)
+{
+	seq_printf(s, "\txds:%4d\tyds:%4d", val & 0xFFFF, (val >> 16) & 0xFFFF);
+}
+
+static void gdp_dbg_size(struct seq_file *s, int val)
+{
+	seq_printf(s, "\t%d x %d", val & 0xFFFF, (val >> 16) & 0xFFFF);
+}
+
+static void gdp_dbg_nvn(struct seq_file *s, struct sti_gdp *gdp, int val)
+{
+	void *base = NULL;
+	unsigned int i;
+
+	for (i = 0; i < GDP_NODE_NB_BANK; i++) {
+		if (gdp->node_list[i].top_field_paddr == val) {
+			base = gdp->node_list[i].top_field;
+			break;
+		}
+		if (gdp->node_list[i].btm_field_paddr == val) {
+			base = gdp->node_list[i].btm_field;
+			break;
+		}
+	}
+
+	if (base)
+		seq_printf(s, "\tVirt @: %p", base);
+}
+
+static void gdp_dbg_ppt(struct seq_file *s, int val)
+{
+	if (val & GAM_GDP_PPT_IGNORE)
+		seq_puts(s, "\tNot displayed on mixer!");
+}
+
+static void gdp_dbg_mst(struct seq_file *s, int val)
+{
+	if (val & 1)
+		seq_puts(s, "\tBUFFER UNDERFLOW!");
+}
+
+static int gdp_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_gdp *gdp = (struct sti_gdp *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_plane *drm_plane = &gdp->plane.drm_plane;
+	struct drm_crtc *crtc = drm_plane->crtc;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s: (vaddr = 0x%p)",
+		   sti_plane_to_str(&gdp->plane), gdp->regs);
+
+	DBGFS_DUMP(GAM_GDP_CTL);
+	gdp_dbg_ctl(s, readl(gdp->regs + GAM_GDP_CTL_OFFSET));
+	DBGFS_DUMP(GAM_GDP_AGC);
+	DBGFS_DUMP(GAM_GDP_VPO);
+	gdp_dbg_vpo(s, readl(gdp->regs + GAM_GDP_VPO_OFFSET));
+	DBGFS_DUMP(GAM_GDP_VPS);
+	gdp_dbg_vps(s, readl(gdp->regs + GAM_GDP_VPS_OFFSET));
+	DBGFS_DUMP(GAM_GDP_PML);
+	DBGFS_DUMP(GAM_GDP_PMP);
+	DBGFS_DUMP(GAM_GDP_SIZE);
+	gdp_dbg_size(s, readl(gdp->regs + GAM_GDP_SIZE_OFFSET));
+	DBGFS_DUMP(GAM_GDP_NVN);
+	gdp_dbg_nvn(s, gdp, readl(gdp->regs + GAM_GDP_NVN_OFFSET));
+	DBGFS_DUMP(GAM_GDP_KEY1);
+	DBGFS_DUMP(GAM_GDP_KEY2);
+	DBGFS_DUMP(GAM_GDP_PPT);
+	gdp_dbg_ppt(s, readl(gdp->regs + GAM_GDP_PPT_OFFSET));
+	DBGFS_DUMP(GAM_GDP_CML);
+	DBGFS_DUMP(GAM_GDP_MST);
+	gdp_dbg_mst(s, readl(gdp->regs + GAM_GDP_MST_OFFSET));
+
+	seq_puts(s, "\n\n");
+	if (!crtc)
+		seq_puts(s, "  Not connected to any DRM CRTC\n");
+	else
+		seq_printf(s, "  Connected to DRM CRTC #%d (%s)\n",
+			   crtc->base.id, sti_mixer_to_str(to_sti_mixer(crtc)));
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static void gdp_node_dump_node(struct seq_file *s, struct sti_gdp_node *node)
+{
+	seq_printf(s, "\t@:0x%p", node);
+	seq_printf(s, "\n\tCTL  0x%08X", node->gam_gdp_ctl);
+	gdp_dbg_ctl(s, node->gam_gdp_ctl);
+	seq_printf(s, "\n\tAGC  0x%08X", node->gam_gdp_agc);
+	seq_printf(s, "\n\tVPO  0x%08X", node->gam_gdp_vpo);
+	gdp_dbg_vpo(s, node->gam_gdp_vpo);
+	seq_printf(s, "\n\tVPS  0x%08X", node->gam_gdp_vps);
+	gdp_dbg_vps(s, node->gam_gdp_vps);
+	seq_printf(s, "\n\tPML  0x%08X", node->gam_gdp_pml);
+	seq_printf(s, "\n\tPMP  0x%08X", node->gam_gdp_pmp);
+	seq_printf(s, "\n\tSIZE 0x%08X", node->gam_gdp_size);
+	gdp_dbg_size(s, node->gam_gdp_size);
+	seq_printf(s, "\n\tNVN  0x%08X", node->gam_gdp_nvn);
+	seq_printf(s, "\n\tKEY1 0x%08X", node->gam_gdp_key1);
+	seq_printf(s, "\n\tKEY2 0x%08X", node->gam_gdp_key2);
+	seq_printf(s, "\n\tPPT  0x%08X", node->gam_gdp_ppt);
+	gdp_dbg_ppt(s, node->gam_gdp_ppt);
+	seq_printf(s, "\n\tCML  0x%08X", node->gam_gdp_cml);
+	seq_puts(s, "\n");
+}
+
+static int gdp_node_dbg_show(struct seq_file *s, void *arg)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_gdp *gdp = (struct sti_gdp *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	unsigned int b;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	for (b = 0; b < GDP_NODE_NB_BANK; b++) {
+		seq_printf(s, "\n%s[%d].top", sti_plane_to_str(&gdp->plane), b);
+		gdp_node_dump_node(s, gdp->node_list[b].top_field);
+		seq_printf(s, "\n%s[%d].btm", sti_plane_to_str(&gdp->plane), b);
+		gdp_node_dump_node(s, gdp->node_list[b].btm_field);
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list gdp0_debugfs_files[] = {
+	{ "gdp0", gdp_dbg_show, 0, NULL },
+	{ "gdp0_node", gdp_node_dbg_show, 0, NULL },
+};
+
+static struct drm_info_list gdp1_debugfs_files[] = {
+	{ "gdp1", gdp_dbg_show, 0, NULL },
+	{ "gdp1_node", gdp_node_dbg_show, 0, NULL },
+};
+
+static struct drm_info_list gdp2_debugfs_files[] = {
+	{ "gdp2", gdp_dbg_show, 0, NULL },
+	{ "gdp2_node", gdp_node_dbg_show, 0, NULL },
+};
+
+static struct drm_info_list gdp3_debugfs_files[] = {
+	{ "gdp3", gdp_dbg_show, 0, NULL },
+	{ "gdp3_node", gdp_node_dbg_show, 0, NULL },
+};
+
+static int gdp_debugfs_init(struct sti_gdp *gdp, struct drm_minor *minor)
+{
+	unsigned int i;
+	struct drm_info_list *gdp_debugfs_files;
+	int nb_files;
+
+	switch (gdp->plane.desc) {
+	case STI_GDP_0:
+		gdp_debugfs_files = gdp0_debugfs_files;
+		nb_files = ARRAY_SIZE(gdp0_debugfs_files);
+		break;
+	case STI_GDP_1:
+		gdp_debugfs_files = gdp1_debugfs_files;
+		nb_files = ARRAY_SIZE(gdp1_debugfs_files);
+		break;
+	case STI_GDP_2:
+		gdp_debugfs_files = gdp2_debugfs_files;
+		nb_files = ARRAY_SIZE(gdp2_debugfs_files);
+		break;
+	case STI_GDP_3:
+		gdp_debugfs_files = gdp3_debugfs_files;
+		nb_files = ARRAY_SIZE(gdp3_debugfs_files);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	for (i = 0; i < nb_files; i++)
+		gdp_debugfs_files[i].data = gdp;
+
+	return drm_debugfs_create_files(gdp_debugfs_files,
+					nb_files,
+					minor->debugfs_root, minor);
+}
+
 static int sti_gdp_fourcc2format(int fourcc)
 {
 	switch (fourcc) {
@@ -146,14 +371,6 @@ static int sti_gdp_fourcc2format(int fourcc)
 		return GDP_RGB565;
 	case DRM_FORMAT_RGB888:
 		return GDP_RGB888;
-	case DRM_FORMAT_AYUV:
-		return GDP_AYCBR8888;
-	case DRM_FORMAT_YUV444:
-		return GDP_YCBR888;
-	case DRM_FORMAT_VYUY:
-		return GDP_YCBR422R;
-	case DRM_FORMAT_C8:
-		return GDP_CLUT8;
 	}
 	return -1;
 }
@@ -163,7 +380,6 @@ static int sti_gdp_get_alpharange(int format)
 	switch (format) {
 	case GDP_ARGB8565:
 	case GDP_ARGB8888:
-	case GDP_AYCBR8888:
 	case GDP_ABGR8888:
 		return GAM_GDP_ALPHARANGE_255;
 	}
@@ -240,9 +456,6 @@ end:
  */
 static void sti_gdp_disable(struct sti_gdp *gdp)
 {
-	struct drm_plane *drm_plane = &gdp->plane.drm_plane;
-	struct sti_mixer *mixer = to_sti_mixer(drm_plane->crtc);
-	struct sti_compositor *compo = dev_get_drvdata(gdp->dev);
 	unsigned int i;
 
 	DRM_DEBUG_DRIVER("%s\n", sti_plane_to_str(&gdp->plane));
@@ -253,8 +466,7 @@ static void sti_gdp_disable(struct sti_gdp *gdp)
 		gdp->node_list[i].btm_field->gam_gdp_ppt |= GAM_GDP_PPT_IGNORE;
 	}
 
-	if (sti_vtg_unregister_client(mixer->id == STI_MIXER_MAIN ?
-			compo->vtg_main : compo->vtg_aux, &gdp->vtg_field_nb))
+	if (sti_vtg_unregister_client(gdp->vtg, &gdp->vtg_field_nb))
 		DRM_DEBUG_DRIVER("Warning: cannot unregister VTG notifier\n");
 
 	if (gdp->clk_pix)
@@ -379,20 +591,140 @@ static void sti_gdp_init(struct sti_gdp *gdp)
 	}
 }
 
-static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
-				  struct drm_plane_state *oldstate)
+/**
+ * sti_gdp_get_dst
+ * @dev: device
+ * @dst: requested destination size
+ * @src: source size
+ *
+ * Return the cropped / clamped destination size
+ *
+ * RETURNS:
+ * cropped / clamped destination size
+ */
+static int sti_gdp_get_dst(struct device *dev, int dst, int src)
+{
+	if (dst == src)
+		return dst;
+
+	if (dst < src) {
+		dev_dbg(dev, "WARNING: GDP scale not supported, will crop\n");
+		return dst;
+	}
+
+	dev_dbg(dev, "WARNING: GDP scale not supported, will clamp\n");
+	return src;
+}
+
+static int sti_gdp_atomic_check(struct drm_plane *drm_plane,
+				struct drm_plane_state *state)
 {
-	struct drm_plane_state *state = drm_plane->state;
 	struct sti_plane *plane = to_sti_plane(drm_plane);
 	struct sti_gdp *gdp = to_sti_gdp(plane);
 	struct drm_crtc *crtc = state->crtc;
 	struct sti_compositor *compo = dev_get_drvdata(gdp->dev);
 	struct drm_framebuffer *fb =  state->fb;
 	bool first_prepare = plane->status == STI_PLANE_DISABLED ? true : false;
+	struct drm_crtc_state *crtc_state;
 	struct sti_mixer *mixer;
 	struct drm_display_mode *mode;
 	int dst_x, dst_y, dst_w, dst_h;
 	int src_x, src_y, src_w, src_h;
+	int format;
+
+	/* no need for further checks if the plane is being disabled */
+	if (!crtc || !fb)
+		return 0;
+
+	mixer = to_sti_mixer(crtc);
+	crtc_state = drm_atomic_get_crtc_state(state->state, crtc);
+	mode = &crtc_state->mode;
+	dst_x = state->crtc_x;
+	dst_y = state->crtc_y;
+	dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
+	dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
+	/* src_x are in 16.16 format */
+	src_x = state->src_x >> 16;
+	src_y = state->src_y >> 16;
+	src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX);
+	src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX);
+
+	format = sti_gdp_fourcc2format(fb->pixel_format);
+	if (format == -1) {
+		DRM_ERROR("Format not supported by GDP %.4s\n",
+			  (char *)&fb->pixel_format);
+		return -EINVAL;
+	}
+
+	if (!drm_fb_cma_get_gem_obj(fb, 0)) {
+		DRM_ERROR("Can't get CMA GEM object for fb\n");
+		return -EINVAL;
+	}
+
+	if (first_prepare) {
+		/* Register gdp callback */
+		gdp->vtg = mixer->id == STI_MIXER_MAIN ?
+					compo->vtg_main : compo->vtg_aux;
+		if (sti_vtg_register_client(gdp->vtg,
+					    &gdp->vtg_field_nb, crtc)) {
+			DRM_ERROR("Cannot register VTG notifier\n");
+			return -EINVAL;
+		}
+
+		/* Set and enable gdp clock */
+		if (gdp->clk_pix) {
+			struct clk *clkp;
+			int rate = mode->clock * 1000;
+			int res;
+
+			/*
+			 * According to the mixer used, the gdp pixel clock
+			 * should have a different parent clock.
+			 */
+			if (mixer->id == STI_MIXER_MAIN)
+				clkp = gdp->clk_main_parent;
+			else
+				clkp = gdp->clk_aux_parent;
+
+			if (clkp)
+				clk_set_parent(gdp->clk_pix, clkp);
+
+			res = clk_set_rate(gdp->clk_pix, rate);
+			if (res < 0) {
+				DRM_ERROR("Cannot set rate (%dHz) for gdp\n",
+					  rate);
+				return -EINVAL;
+			}
+
+			if (clk_prepare_enable(gdp->clk_pix)) {
+				DRM_ERROR("Failed to prepare/enable gdp\n");
+				return -EINVAL;
+			}
+		}
+	}
+
+	DRM_DEBUG_KMS("CRTC:%d (%s) drm plane:%d (%s)\n",
+		      crtc->base.id, sti_mixer_to_str(mixer),
+		      drm_plane->base.id, sti_plane_to_str(plane));
+	DRM_DEBUG_KMS("%s dst=(%dx%d)@(%d,%d) - src=(%dx%d)@(%d,%d)\n",
+		      sti_plane_to_str(plane),
+		      dst_w, dst_h, dst_x, dst_y,
+		      src_w, src_h, src_x, src_y);
+
+	return 0;
+}
+
+static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
+				  struct drm_plane_state *oldstate)
+{
+	struct drm_plane_state *state = drm_plane->state;
+	struct sti_plane *plane = to_sti_plane(drm_plane);
+	struct sti_gdp *gdp = to_sti_gdp(plane);
+	struct drm_crtc *crtc = state->crtc;
+	struct drm_framebuffer *fb =  state->fb;
+	struct drm_display_mode *mode;
+	int dst_x, dst_y, dst_w, dst_h;
+	int src_x, src_y, src_w, src_h;
 	struct drm_gem_cma_object *cma_obj;
 	struct sti_gdp_node_list *list;
 	struct sti_gdp_node_list *curr_list;
@@ -402,13 +734,10 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	int format;
 	unsigned int depth, bpp;
 	u32 ydo, xdo, yds, xds;
-	int res;
 
-	/* Manage the case where crtc is null (disabled) */
-	if (!crtc)
+	if (!crtc || !fb)
 		return;
 
-	mixer = to_sti_mixer(crtc);
 	mode = &crtc->mode;
 	dst_x = state->crtc_x;
 	dst_y = state->crtc_y;
@@ -417,16 +746,8 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	/* src_x are in 16.16 format */
 	src_x = state->src_x >> 16;
 	src_y = state->src_y >> 16;
-	src_w = state->src_w >> 16;
-	src_h = state->src_h >> 16;
-
-	DRM_DEBUG_KMS("CRTC:%d (%s) drm plane:%d (%s)\n",
-		      crtc->base.id, sti_mixer_to_str(mixer),
-		      drm_plane->base.id, sti_plane_to_str(plane));
-	DRM_DEBUG_KMS("%s dst=(%dx%d)@(%d,%d) - src=(%dx%d)@(%d,%d)\n",
-		      sti_plane_to_str(plane),
-		      dst_w, dst_h, dst_x, dst_y,
-		      src_w, src_h, src_x, src_y);
+	src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX);
+	src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX);
 
 	list = sti_gdp_get_free_nodes(gdp);
 	top_field = list->top_field;
@@ -439,20 +760,11 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	top_field->gam_gdp_agc = GAM_GDP_AGC_FULL_RANGE;
 	top_field->gam_gdp_ctl = WAIT_NEXT_VSYNC;
 	format = sti_gdp_fourcc2format(fb->pixel_format);
-	if (format == -1) {
-		DRM_ERROR("Format not supported by GDP %.4s\n",
-			  (char *)&fb->pixel_format);
-		return;
-	}
 	top_field->gam_gdp_ctl |= format;
 	top_field->gam_gdp_ctl |= sti_gdp_get_alpharange(format);
 	top_field->gam_gdp_ppt &= ~GAM_GDP_PPT_IGNORE;
 
 	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
-	if (!cma_obj) {
-		DRM_ERROR("Can't get CMA GEM object for fb\n");
-		return;
-	}
 
 	DRM_DEBUG_DRIVER("drm FB:%d format:%.4s phys@:0x%lx\n", fb->base.id,
 			 (char *)&fb->pixel_format,
@@ -464,12 +776,9 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	top_field->gam_gdp_pml += src_x * (bpp >> 3);
 	top_field->gam_gdp_pml += src_y * fb->pitches[0];
 
-	/* input parameters */
-	top_field->gam_gdp_pmp = fb->pitches[0];
-	top_field->gam_gdp_size = clamp_val(src_h, 0, GAM_GDP_SIZE_MAX) << 16 |
-				  clamp_val(src_w, 0, GAM_GDP_SIZE_MAX);
-
-	/* output parameters */
+	/* output parameters (clamped / cropped) */
+	dst_w = sti_gdp_get_dst(gdp->dev, dst_w, src_w);
+	dst_h = sti_gdp_get_dst(gdp->dev, dst_h, src_h);
 	ydo = sti_vtg_get_line_number(*mode, dst_y);
 	yds = sti_vtg_get_line_number(*mode, dst_y + dst_h - 1);
 	xdo = sti_vtg_get_pixel_number(*mode, dst_x);
@@ -477,6 +786,11 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	top_field->gam_gdp_vpo = (ydo << 16) | xdo;
 	top_field->gam_gdp_vps = (yds << 16) | xds;
 
+	/* input parameters */
+	src_w = dst_w;
+	top_field->gam_gdp_pmp = fb->pitches[0];
+	top_field->gam_gdp_size = src_h << 16 | src_w;
+
 	/* Same content and chained together */
 	memcpy(btm_field, top_field, sizeof(*btm_field));
 	top_field->gam_gdp_nvn = list->btm_field_paddr;
@@ -487,44 +801,6 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 		btm_field->gam_gdp_pml = top_field->gam_gdp_pml +
 					 fb->pitches[0];
 
-	if (first_prepare) {
-		/* Register gdp callback */
-		if (sti_vtg_register_client(mixer->id == STI_MIXER_MAIN ?
-				compo->vtg_main : compo->vtg_aux,
-				&gdp->vtg_field_nb, crtc)) {
-			DRM_ERROR("Cannot register VTG notifier\n");
-			return;
-		}
-
-		/* Set and enable gdp clock */
-		if (gdp->clk_pix) {
-			struct clk *clkp;
-			int rate = mode->clock * 1000;
-
-			/* According to the mixer used, the gdp pixel clock
-			 * should have a different parent clock. */
-			if (mixer->id == STI_MIXER_MAIN)
-				clkp = gdp->clk_main_parent;
-			else
-				clkp = gdp->clk_aux_parent;
-
-			if (clkp)
-				clk_set_parent(gdp->clk_pix, clkp);
-
-			res = clk_set_rate(gdp->clk_pix, rate);
-			if (res < 0) {
-				DRM_ERROR("Cannot set rate (%dHz) for gdp\n",
-					  rate);
-				return;
-			}
-
-			if (clk_prepare_enable(gdp->clk_pix)) {
-				DRM_ERROR("Failed to prepare/enable gdp\n");
-				return;
-			}
-		}
-	}
-
 	/* Update the NVN field of the 'right' field of the current GDP node
 	 * (being used by the HW) with the address of the updated ('free') top
 	 * field GDP node.
@@ -573,6 +849,8 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	}
 
 end:
+	sti_plane_update_fps(plane, true, false);
+
 	plane->status = STI_PLANE_UPDATED;
 }
 
@@ -580,7 +858,6 @@ static void sti_gdp_atomic_disable(struct drm_plane *drm_plane,
 				   struct drm_plane_state *oldstate)
 {
 	struct sti_plane *plane = to_sti_plane(drm_plane);
-	struct sti_mixer *mixer = to_sti_mixer(drm_plane->crtc);
 
 	if (!drm_plane->crtc) {
 		DRM_DEBUG_DRIVER("drm plane:%d not enabled\n",
@@ -589,13 +866,15 @@ static void sti_gdp_atomic_disable(struct drm_plane *drm_plane,
 	}
 
 	DRM_DEBUG_DRIVER("CRTC:%d (%s) drm plane:%d (%s)\n",
-			 drm_plane->crtc->base.id, sti_mixer_to_str(mixer),
+			 drm_plane->crtc->base.id,
+			 sti_mixer_to_str(to_sti_mixer(drm_plane->crtc)),
 			 drm_plane->base.id, sti_plane_to_str(plane));
 
 	plane->status = STI_PLANE_DISABLING;
 }
 
 static const struct drm_plane_helper_funcs sti_gdp_helpers_funcs = {
+	.atomic_check = sti_gdp_atomic_check,
 	.atomic_update = sti_gdp_atomic_update,
 	.atomic_disable = sti_gdp_atomic_disable,
 };
@@ -639,6 +918,9 @@ struct drm_plane *sti_gdp_create(struct drm_device *drm_dev,
 
 	sti_plane_init_property(&gdp->plane, type);
 
+	if (gdp_debugfs_init(gdp, drm_dev->primary))
+		DRM_ERROR("GDP debugfs setup failed\n");
+
 	return &gdp->plane.drm_plane;
 
 err:
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
index 49cce833f2c8..ec0d017eaf1a 100644
--- a/drivers/gpu/drm/sti/sti_hda.c
+++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -326,6 +326,103 @@ static void hda_enable_hd_dacs(struct sti_hda *hda, bool enable)
 	}
 }
 
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(hda->regs + reg))
+
+static void hda_dbg_cfg(struct seq_file *s, int val)
+{
+	seq_puts(s, "\tAWG ");
+	seq_puts(s, val & CFG_AWG_ASYNC_EN ? "enabled" : "disabled");
+}
+
+static void hda_dbg_awg_microcode(struct seq_file *s, void __iomem *reg)
+{
+	unsigned int i;
+
+	seq_puts(s, "\n\n");
+	seq_puts(s, "  HDA AWG microcode:");
+	for (i = 0; i < AWG_MAX_INST; i++) {
+		if (i % 8 == 0)
+			seq_printf(s, "\n  %04X:", i);
+		seq_printf(s, " %04X", readl(reg + i * 4));
+	}
+}
+
+static void hda_dbg_video_dacs_ctrl(struct seq_file *s, void __iomem *reg)
+{
+	u32 val = readl(reg);
+	u32 mask;
+
+	switch ((u32)reg & VIDEO_DACS_CONTROL_MASK) {
+	case VIDEO_DACS_CONTROL_SYSCFG2535:
+		mask = DAC_CFG_HD_OFF_MASK;
+		break;
+	case VIDEO_DACS_CONTROL_SYSCFG5072:
+		mask = DAC_CFG_HD_HZUVW_OFF_MASK;
+		break;
+	default:
+		DRM_DEBUG_DRIVER("Warning: DACS ctrl register not supported!");
+		return;
+	}
+
+	seq_puts(s, "\n");
+	seq_printf(s, "\n  %-25s 0x%08X", "VIDEO_DACS_CONTROL", val);
+	seq_puts(s, "\tHD DACs ");
+	seq_puts(s, val & mask ? "disabled" : "enabled");
+}
+
+static int hda_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_hda *hda = (struct sti_hda *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "HD Analog: (vaddr = 0x%p)", hda->regs);
+	DBGFS_DUMP(HDA_ANA_CFG);
+	hda_dbg_cfg(s, readl(hda->regs + HDA_ANA_CFG));
+	DBGFS_DUMP(HDA_ANA_SCALE_CTRL_Y);
+	DBGFS_DUMP(HDA_ANA_SCALE_CTRL_CB);
+	DBGFS_DUMP(HDA_ANA_SCALE_CTRL_CR);
+	DBGFS_DUMP(HDA_ANA_ANC_CTRL);
+	DBGFS_DUMP(HDA_ANA_SRC_Y_CFG);
+	DBGFS_DUMP(HDA_ANA_SRC_C_CFG);
+	hda_dbg_awg_microcode(s, hda->regs + HDA_SYNC_AWGI);
+	if (hda->video_dacs_ctrl)
+		hda_dbg_video_dacs_ctrl(s, hda->video_dacs_ctrl);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list hda_debugfs_files[] = {
+	{ "hda", hda_dbg_show, 0, NULL },
+};
+
+static void hda_debugfs_exit(struct sti_hda *hda, struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(hda_debugfs_files,
+				 ARRAY_SIZE(hda_debugfs_files),
+				 minor);
+}
+
+static int hda_debugfs_init(struct sti_hda *hda, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(hda_debugfs_files); i++)
+		hda_debugfs_files[i].data = hda;
+
+	return drm_debugfs_create_files(hda_debugfs_files,
+					ARRAY_SIZE(hda_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
 /**
  * Configure AWG, writing instructions
  *
@@ -685,6 +782,12 @@ static int sti_hda_bind(struct device *dev, struct device *master, void *data)
 		goto err_sysfs;
 	}
 
+	/* force to disable hd dacs at startup */
+	hda_enable_hd_dacs(hda, false);
+
+	if (hda_debugfs_init(hda, drm_dev->primary))
+		DRM_ERROR("HDA debugfs setup failed\n");
+
 	return 0;
 
 err_sysfs:
@@ -697,7 +800,10 @@ err_connector:
 static void sti_hda_unbind(struct device *dev,
 		struct device *master, void *data)
 {
-	/* do nothing */
+	struct sti_hda *hda = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+
+	hda_debugfs_exit(hda, drm_dev->primary);
 }
 
 static const struct component_ops sti_hda_ops = {
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index cd501563c0cc..6ef0715bd5b9 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -6,6 +6,7 @@
 
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/debugfs.h>
 #include <linux/hdmi.h>
 #include <linux/module.h>
 #include <linux/of_gpio.h>
@@ -51,9 +52,18 @@
 #define HDMI_SW_DI_2_PKT_WORD4          0x0614
 #define HDMI_SW_DI_2_PKT_WORD5          0x0618
 #define HDMI_SW_DI_2_PKT_WORD6          0x061C
+#define HDMI_SW_DI_3_HEAD_WORD          0x0620
+#define HDMI_SW_DI_3_PKT_WORD0          0x0624
+#define HDMI_SW_DI_3_PKT_WORD1          0x0628
+#define HDMI_SW_DI_3_PKT_WORD2          0x062C
+#define HDMI_SW_DI_3_PKT_WORD3          0x0630
+#define HDMI_SW_DI_3_PKT_WORD4          0x0634
+#define HDMI_SW_DI_3_PKT_WORD5          0x0638
+#define HDMI_SW_DI_3_PKT_WORD6          0x063C
 
 #define HDMI_IFRAME_SLOT_AVI            1
 #define HDMI_IFRAME_SLOT_AUDIO          2
+#define HDMI_IFRAME_SLOT_VENDOR         3
 
 #define  XCAT(prefix, x, suffix)        prefix ## x ## suffix
 #define  HDMI_SW_DI_N_HEAD_WORD(x)      XCAT(HDMI_SW_DI_, x, _HEAD_WORD)
@@ -65,6 +75,8 @@
 #define  HDMI_SW_DI_N_PKT_WORD5(x)      XCAT(HDMI_SW_DI_, x, _PKT_WORD5)
 #define  HDMI_SW_DI_N_PKT_WORD6(x)      XCAT(HDMI_SW_DI_, x, _PKT_WORD6)
 
+#define HDMI_SW_DI_MAX_WORD             7
+
 #define HDMI_IFRAME_DISABLED            0x0
 #define HDMI_IFRAME_SINGLE_SHOT         0x1
 #define HDMI_IFRAME_FIELD               0x2
@@ -117,6 +129,8 @@ struct sti_hdmi_connector {
 	struct drm_connector drm_connector;
 	struct drm_encoder *encoder;
 	struct sti_hdmi *hdmi;
+	struct drm_property *colorspace_property;
+	struct drm_property *hdmi_mode_property;
 };
 
 #define to_sti_hdmi_connector(x) \
@@ -217,8 +231,10 @@ static void hdmi_config(struct sti_hdmi *hdmi)
 	/* Clear overrun and underrun fifo */
 	conf = HDMI_CFG_FIFO_OVERRUN_CLR | HDMI_CFG_FIFO_UNDERRUN_CLR;
 
-	/* Enable HDMI mode not DVI */
-	conf |= HDMI_CFG_HDMI_NOT_DVI | HDMI_CFG_ESS_NOT_OESS;
+	/* Select encryption type and the framing mode */
+	conf |= HDMI_CFG_ESS_NOT_OESS;
+	if (hdmi->hdmi_mode == HDMI_MODE_HDMI)
+		conf |= HDMI_CFG_HDMI_NOT_DVI;
 
 	/* Enable sink term detection */
 	conf |= HDMI_CFG_SINK_TERM_DET_EN;
@@ -241,6 +257,47 @@ static void hdmi_config(struct sti_hdmi *hdmi)
 	hdmi_write(hdmi, conf, HDMI_CFG);
 }
 
+/*
+ * Helper to reset info frame
+ *
+ * @hdmi: pointer on the hdmi internal structure
+ * @slot: infoframe to reset
+ */
+static void hdmi_infoframe_reset(struct sti_hdmi *hdmi,
+				 u32 slot)
+{
+	u32 val, i;
+	u32 head_offset, pack_offset;
+
+	switch (slot) {
+	case HDMI_IFRAME_SLOT_AVI:
+		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_AVI);
+		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_AVI);
+		break;
+	case HDMI_IFRAME_SLOT_AUDIO:
+		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_AUDIO);
+		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_AUDIO);
+		break;
+	case HDMI_IFRAME_SLOT_VENDOR:
+		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_VENDOR);
+		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_VENDOR);
+		break;
+	default:
+		DRM_ERROR("unsupported infoframe slot: %#x\n", slot);
+		return;
+	}
+
+	/* Disable transmission for the selected slot */
+	val = hdmi_read(hdmi, HDMI_SW_DI_CFG);
+	val &= ~HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, slot);
+	hdmi_write(hdmi, val, HDMI_SW_DI_CFG);
+
+	/* Reset info frame registers */
+	hdmi_write(hdmi, 0x0, head_offset);
+	for (i = 0; i < HDMI_SW_DI_MAX_WORD; i += sizeof(u32))
+		hdmi_write(hdmi, 0x0, pack_offset + i);
+}
+
 /**
  * Helper to concatenate infoframe in 32 bits word
  *
@@ -266,12 +323,13 @@ static inline unsigned int hdmi_infoframe_subpack(const u8 *ptr, size_t size)
  * @data: infoframe to write
  * @size: size to write
  */
-static void hdmi_infoframe_write_infopack(struct sti_hdmi *hdmi, const u8 *data)
+static void hdmi_infoframe_write_infopack(struct sti_hdmi *hdmi,
+					  const u8 *data,
+					  size_t size)
 {
 	const u8 *ptr = data;
 	u32 val, slot, mode, i;
 	u32 head_offset, pack_offset;
-	size_t size;
 
 	switch (*ptr) {
 	case HDMI_INFOFRAME_TYPE_AVI:
@@ -279,17 +337,19 @@ static void hdmi_infoframe_write_infopack(struct sti_hdmi *hdmi, const u8 *data)
 		mode = HDMI_IFRAME_FIELD;
 		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_AVI);
 		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_AVI);
-		size = HDMI_AVI_INFOFRAME_SIZE;
 		break;
-
 	case HDMI_INFOFRAME_TYPE_AUDIO:
 		slot = HDMI_IFRAME_SLOT_AUDIO;
 		mode = HDMI_IFRAME_FRAME;
 		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_AUDIO);
 		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_AUDIO);
-		size = HDMI_AUDIO_INFOFRAME_SIZE;
 		break;
-
+	case HDMI_INFOFRAME_TYPE_VENDOR:
+		slot = HDMI_IFRAME_SLOT_VENDOR;
+		mode = HDMI_IFRAME_FRAME;
+		head_offset = HDMI_SW_DI_N_HEAD_WORD(HDMI_IFRAME_SLOT_VENDOR);
+		pack_offset = HDMI_SW_DI_N_PKT_WORD0(HDMI_IFRAME_SLOT_VENDOR);
+		break;
 	default:
 		DRM_ERROR("unsupported infoframe type: %#x\n", *ptr);
 		return;
@@ -308,8 +368,9 @@ static void hdmi_infoframe_write_infopack(struct sti_hdmi *hdmi, const u8 *data)
 	/*
 	 * Each subpack contains 4 bytes
 	 * The First Bytes of the first subpacket must contain the checksum
-	 * Packet size in increase by one.
+	 * Packet size is increase by one.
 	 */
+	size = size - HDMI_INFOFRAME_HEADER_SIZE + 1;
 	for (i = 0; i < size; i += sizeof(u32)) {
 		size_t num;
 
@@ -321,7 +382,7 @@ static void hdmi_infoframe_write_infopack(struct sti_hdmi *hdmi, const u8 *data)
 
 	/* Enable transmission slot for updated infoframe */
 	val = hdmi_read(hdmi, HDMI_SW_DI_CFG);
-	val |= HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_FIELD, slot);
+	val |= HDMI_IFRAME_CFG_DI_N(mode, slot);
 	hdmi_write(hdmi, val, HDMI_SW_DI_CFG);
 }
 
@@ -352,7 +413,7 @@ static int hdmi_avi_infoframe_config(struct sti_hdmi *hdmi)
 	}
 
 	/* fixed infoframe configuration not linked to the mode */
-	infoframe.colorspace = HDMI_COLORSPACE_RGB;
+	infoframe.colorspace = hdmi->colorspace;
 	infoframe.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT;
 	infoframe.colorimetry = HDMI_COLORIMETRY_NONE;
 
@@ -362,7 +423,7 @@ static int hdmi_avi_infoframe_config(struct sti_hdmi *hdmi)
 		return ret;
 	}
 
-	hdmi_infoframe_write_infopack(hdmi, buffer);
+	hdmi_infoframe_write_infopack(hdmi, buffer, ret);
 
 	return 0;
 }
@@ -398,7 +459,49 @@ static int hdmi_audio_infoframe_config(struct sti_hdmi *hdmi)
 		return ret;
 	}
 
-	hdmi_infoframe_write_infopack(hdmi, buffer);
+	hdmi_infoframe_write_infopack(hdmi, buffer, ret);
+
+	return 0;
+}
+
+/*
+ * Prepare and configure the VS infoframe
+ *
+ * Vendor Specific infoframe are transmitted once per frame and
+ * contains vendor specific information.
+ *
+ * @hdmi: pointer on the hdmi internal structure
+ *
+ * Return negative value if error occurs
+ */
+#define HDMI_VENDOR_INFOFRAME_MAX_SIZE 6
+static int hdmi_vendor_infoframe_config(struct sti_hdmi *hdmi)
+{
+	struct drm_display_mode *mode = &hdmi->mode;
+	struct hdmi_vendor_infoframe infoframe;
+	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_VENDOR_INFOFRAME_MAX_SIZE];
+	int ret;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	ret = drm_hdmi_vendor_infoframe_from_display_mode(&infoframe, mode);
+	if (ret < 0) {
+		/*
+		 * Going into that statement does not means vendor infoframe
+		 * fails. It just informed us that vendor infoframe is not
+		 * needed for the selected mode. Only  4k or stereoscopic 3D
+		 * mode requires vendor infoframe. So just simply return 0.
+		 */
+		return 0;
+	}
+
+	ret = hdmi_vendor_infoframe_pack(&infoframe, buffer, sizeof(buffer));
+	if (ret < 0) {
+		DRM_ERROR("failed to pack VS infoframe: %d\n", ret);
+		return ret;
+	}
+
+	hdmi_infoframe_write_infopack(hdmi, buffer, ret);
 
 	return 0;
 }
@@ -448,6 +551,172 @@ static void hdmi_swreset(struct sti_hdmi *hdmi)
 	clk_disable_unprepare(hdmi->clk_audio);
 }
 
+#define DBGFS_PRINT_STR(str1, str2) seq_printf(s, "%-24s %s\n", str1, str2)
+#define DBGFS_PRINT_INT(str1, int2) seq_printf(s, "%-24s %d\n", str1, int2)
+#define DBGFS_DUMP(str, reg) seq_printf(s, "%s  %-25s 0x%08X", str, #reg, \
+					hdmi_read(hdmi, reg))
+#define DBGFS_DUMP_DI(reg, slot) DBGFS_DUMP("\n", reg(slot))
+
+static void hdmi_dbg_cfg(struct seq_file *s, int val)
+{
+	int tmp;
+
+	seq_puts(s, "\t");
+	tmp = val & HDMI_CFG_HDMI_NOT_DVI;
+	DBGFS_PRINT_STR("mode:", tmp ? "HDMI" : "DVI");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_HDCP_EN;
+	DBGFS_PRINT_STR("HDCP:", tmp ? "enable" : "disable");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_ESS_NOT_OESS;
+	DBGFS_PRINT_STR("HDCP mode:", tmp ? "ESS enable" : "OESS enable");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_SINK_TERM_DET_EN;
+	DBGFS_PRINT_STR("Sink term detection:", tmp ? "enable" : "disable");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_H_SYNC_POL_NEG;
+	DBGFS_PRINT_STR("Hsync polarity:", tmp ? "inverted" : "normal");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_V_SYNC_POL_NEG;
+	DBGFS_PRINT_STR("Vsync polarity:", tmp ? "inverted" : "normal");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = val & HDMI_CFG_422_EN;
+	DBGFS_PRINT_STR("YUV422 format:", tmp ? "enable" : "disable");
+}
+
+static void hdmi_dbg_sta(struct seq_file *s, int val)
+{
+	int tmp;
+
+	seq_puts(s, "\t");
+	tmp = (val & HDMI_STA_DLL_LCK);
+	DBGFS_PRINT_STR("pll:", tmp ? "locked" : "not locked");
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_STA_HOT_PLUG);
+	DBGFS_PRINT_STR("hdmi cable:", tmp ? "connected" : "not connected");
+}
+
+static void hdmi_dbg_sw_di_cfg(struct seq_file *s, int val)
+{
+	int tmp;
+	char *const en_di[] = {"no transmission",
+			       "single transmission",
+			       "once every field",
+			       "once every frame"};
+
+	seq_puts(s, "\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 1));
+	DBGFS_PRINT_STR("Data island 1:", en_di[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 2)) >> 4;
+	DBGFS_PRINT_STR("Data island 2:", en_di[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 3)) >> 8;
+	DBGFS_PRINT_STR("Data island 3:", en_di[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 4)) >> 12;
+	DBGFS_PRINT_STR("Data island 4:", en_di[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 5)) >> 16;
+	DBGFS_PRINT_STR("Data island 5:", en_di[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK, 6)) >> 20;
+	DBGFS_PRINT_STR("Data island 6:", en_di[tmp]);
+}
+
+static int hdmi_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_hdmi *hdmi = (struct sti_hdmi *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "HDMI: (vaddr = 0x%p)", hdmi->regs);
+	DBGFS_DUMP("\n", HDMI_CFG);
+	hdmi_dbg_cfg(s, hdmi_read(hdmi, HDMI_CFG));
+	DBGFS_DUMP("", HDMI_INT_EN);
+	DBGFS_DUMP("\n", HDMI_STA);
+	hdmi_dbg_sta(s, hdmi_read(hdmi, HDMI_STA));
+	DBGFS_DUMP("", HDMI_ACTIVE_VID_XMIN);
+	seq_puts(s, "\t");
+	DBGFS_PRINT_INT("Xmin:", hdmi_read(hdmi, HDMI_ACTIVE_VID_XMIN));
+	DBGFS_DUMP("", HDMI_ACTIVE_VID_XMAX);
+	seq_puts(s, "\t");
+	DBGFS_PRINT_INT("Xmax:", hdmi_read(hdmi, HDMI_ACTIVE_VID_XMAX));
+	DBGFS_DUMP("", HDMI_ACTIVE_VID_YMIN);
+	seq_puts(s, "\t");
+	DBGFS_PRINT_INT("Ymin:", hdmi_read(hdmi, HDMI_ACTIVE_VID_YMIN));
+	DBGFS_DUMP("", HDMI_ACTIVE_VID_YMAX);
+	seq_puts(s, "\t");
+	DBGFS_PRINT_INT("Ymax:", hdmi_read(hdmi, HDMI_ACTIVE_VID_YMAX));
+	DBGFS_DUMP("", HDMI_SW_DI_CFG);
+	hdmi_dbg_sw_di_cfg(s, hdmi_read(hdmi, HDMI_SW_DI_CFG));
+
+	seq_printf(s, "\n AVI Infoframe (Data Island slot N=%d):",
+		   HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_HEAD_WORD, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD0, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD1, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD2, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD3, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD4, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD5, HDMI_IFRAME_SLOT_AVI);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD6, HDMI_IFRAME_SLOT_AVI);
+	seq_puts(s, "\n");
+	seq_printf(s, "\n AUDIO Infoframe (Data Island slot N=%d):",
+		   HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_HEAD_WORD, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD0, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD1, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD2, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD3, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD4, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD5, HDMI_IFRAME_SLOT_AUDIO);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD6, HDMI_IFRAME_SLOT_AUDIO);
+	seq_puts(s, "\n");
+	seq_printf(s, "\n VENDOR SPECIFIC Infoframe (Data Island slot N=%d):",
+		   HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_HEAD_WORD, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD0, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD1, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD2, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD3, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD4, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD5, HDMI_IFRAME_SLOT_VENDOR);
+	DBGFS_DUMP_DI(HDMI_SW_DI_N_PKT_WORD6, HDMI_IFRAME_SLOT_VENDOR);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list hdmi_debugfs_files[] = {
+	{ "hdmi", hdmi_dbg_show, 0, NULL },
+};
+
+static void hdmi_debugfs_exit(struct sti_hdmi *hdmi, struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(hdmi_debugfs_files,
+				 ARRAY_SIZE(hdmi_debugfs_files),
+				 minor);
+}
+
+static int hdmi_debugfs_init(struct sti_hdmi *hdmi, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(hdmi_debugfs_files); i++)
+		hdmi_debugfs_files[i].data = hdmi;
+
+	return drm_debugfs_create_files(hdmi_debugfs_files,
+					ARRAY_SIZE(hdmi_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
 static void sti_hdmi_disable(struct drm_bridge *bridge)
 {
 	struct sti_hdmi *hdmi = bridge->driver_private;
@@ -468,6 +737,11 @@ static void sti_hdmi_disable(struct drm_bridge *bridge)
 	/* Stop the phy */
 	hdmi->phy_ops->stop(hdmi);
 
+	/* Reset info frame transmission */
+	hdmi_infoframe_reset(hdmi, HDMI_IFRAME_SLOT_AVI);
+	hdmi_infoframe_reset(hdmi, HDMI_IFRAME_SLOT_AUDIO);
+	hdmi_infoframe_reset(hdmi, HDMI_IFRAME_SLOT_VENDOR);
+
 	/* Set the default channel data to be a dark red */
 	hdmi_write(hdmi, 0x0000, HDMI_DFLT_CHL0_DAT);
 	hdmi_write(hdmi, 0x0000, HDMI_DFLT_CHL1_DAT);
@@ -523,6 +797,10 @@ static void sti_hdmi_pre_enable(struct drm_bridge *bridge)
 	if (hdmi_audio_infoframe_config(hdmi))
 		DRM_ERROR("Unable to configure AUDIO infoframe\n");
 
+	/* Program VS infoframe */
+	if (hdmi_vendor_infoframe_config(hdmi))
+		DRM_ERROR("Unable to configure VS infoframe\n");
+
 	/* Sw reset */
 	hdmi_swreset(hdmi);
 }
@@ -664,12 +942,97 @@ static void sti_hdmi_connector_destroy(struct drm_connector *connector)
 	kfree(hdmi_connector);
 }
 
+static void sti_hdmi_connector_init_property(struct drm_device *drm_dev,
+					     struct drm_connector *connector)
+{
+	struct sti_hdmi_connector *hdmi_connector
+		= to_sti_hdmi_connector(connector);
+	struct sti_hdmi *hdmi = hdmi_connector->hdmi;
+	struct drm_property *prop;
+
+	/* colorspace property */
+	hdmi->colorspace = DEFAULT_COLORSPACE_MODE;
+	prop = drm_property_create_enum(drm_dev, 0, "colorspace",
+					colorspace_mode_names,
+					ARRAY_SIZE(colorspace_mode_names));
+	if (!prop) {
+		DRM_ERROR("fails to create colorspace property\n");
+		return;
+	}
+	hdmi_connector->colorspace_property = prop;
+	drm_object_attach_property(&connector->base, prop, hdmi->colorspace);
+
+	/* hdmi_mode property */
+	hdmi->hdmi_mode = DEFAULT_HDMI_MODE;
+	prop = drm_property_create_enum(drm_dev, 0, "hdmi_mode",
+					hdmi_mode_names,
+					ARRAY_SIZE(hdmi_mode_names));
+	if (!prop) {
+		DRM_ERROR("fails to create colorspace property\n");
+		return;
+	}
+	hdmi_connector->hdmi_mode_property = prop;
+	drm_object_attach_property(&connector->base, prop, hdmi->hdmi_mode);
+
+}
+
+static int
+sti_hdmi_connector_set_property(struct drm_connector *connector,
+				struct drm_connector_state *state,
+				struct drm_property *property,
+				uint64_t val)
+{
+	struct sti_hdmi_connector *hdmi_connector
+		= to_sti_hdmi_connector(connector);
+	struct sti_hdmi *hdmi = hdmi_connector->hdmi;
+
+	if (property == hdmi_connector->colorspace_property) {
+		hdmi->colorspace = val;
+		return 0;
+	}
+
+	if (property == hdmi_connector->hdmi_mode_property) {
+		hdmi->hdmi_mode = val;
+		return 0;
+	}
+
+	DRM_ERROR("failed to set hdmi connector property\n");
+	return -EINVAL;
+}
+
+static int
+sti_hdmi_connector_get_property(struct drm_connector *connector,
+				const struct drm_connector_state *state,
+				struct drm_property *property,
+				uint64_t *val)
+{
+	struct sti_hdmi_connector *hdmi_connector
+		= to_sti_hdmi_connector(connector);
+	struct sti_hdmi *hdmi = hdmi_connector->hdmi;
+
+	if (property == hdmi_connector->colorspace_property) {
+		*val = hdmi->colorspace;
+		return 0;
+	}
+
+	if (property == hdmi_connector->hdmi_mode_property) {
+		*val = hdmi->hdmi_mode;
+		return 0;
+	}
+
+	DRM_ERROR("failed to get hdmi connector property\n");
+	return -EINVAL;
+}
+
 static const struct drm_connector_funcs sti_hdmi_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = sti_hdmi_connector_detect,
 	.destroy = sti_hdmi_connector_destroy,
 	.reset = drm_atomic_helper_connector_reset,
+	.set_property = drm_atomic_helper_connector_set_property,
+	.atomic_set_property = sti_hdmi_connector_set_property,
+	.atomic_get_property = sti_hdmi_connector_get_property,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
@@ -729,6 +1092,9 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 	drm_connector_helper_add(drm_connector,
 			&sti_hdmi_connector_helper_funcs);
 
+	/* initialise property */
+	sti_hdmi_connector_init_property(drm_dev, drm_connector);
+
 	err = drm_connector_register(drm_connector);
 	if (err)
 		goto err_connector;
@@ -742,6 +1108,9 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 	/* Enable default interrupts */
 	hdmi_write(hdmi, HDMI_DEFAULT_INT, HDMI_INT_EN);
 
+	if (hdmi_debugfs_init(hdmi, drm_dev->primary))
+		DRM_ERROR("HDMI debugfs setup failed\n");
+
 	return 0;
 
 err_sysfs:
@@ -755,7 +1124,10 @@ err_connector:
 static void sti_hdmi_unbind(struct device *dev,
 		struct device *master, void *data)
 {
-	/* do nothing */
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+
+	hdmi_debugfs_exit(hdmi, drm_dev->primary);
 }
 
 static const struct component_ops sti_hdmi_ops = {
diff --git a/drivers/gpu/drm/sti/sti_hdmi.h b/drivers/gpu/drm/sti/sti_hdmi.h
index 3d22390e1f3b..ef3a94583bbd 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.h
+++ b/drivers/gpu/drm/sti/sti_hdmi.h
@@ -7,15 +7,14 @@
 #ifndef _STI_HDMI_H_
 #define _STI_HDMI_H_
 
+#include <linux/hdmi.h>
 #include <linux/platform_device.h>
 
 #include <drm/drmP.h>
 
 #define HDMI_STA           0x0010
 #define HDMI_STA_DLL_LCK   BIT(5)
-
-#define HDMI_STA_HOT_PLUG_SHIFT 4
-#define HDMI_STA_HOT_PLUG	(1 << HDMI_STA_HOT_PLUG_SHIFT)
+#define HDMI_STA_HOT_PLUG  BIT(4)
 
 struct sti_hdmi;
 
@@ -24,6 +23,27 @@ struct hdmi_phy_ops {
 	void (*stop)(struct sti_hdmi *hdmi);
 };
 
+/* values for the framing mode property */
+enum sti_hdmi_modes {
+	HDMI_MODE_HDMI,
+	HDMI_MODE_DVI,
+};
+
+static const struct drm_prop_enum_list hdmi_mode_names[] = {
+	{ HDMI_MODE_HDMI, "hdmi" },
+	{ HDMI_MODE_DVI, "dvi" },
+};
+
+#define DEFAULT_HDMI_MODE HDMI_MODE_HDMI
+
+static const struct drm_prop_enum_list colorspace_mode_names[] = {
+	{ HDMI_COLORSPACE_RGB, "rgb" },
+	{ HDMI_COLORSPACE_YUV422, "yuv422" },
+	{ HDMI_COLORSPACE_YUV444, "yuv444" },
+};
+
+#define DEFAULT_COLORSPACE_MODE HDMI_COLORSPACE_RGB
+
 /**
  * STI hdmi structure
  *
@@ -44,6 +64,9 @@ struct hdmi_phy_ops {
  * @wait_event: wait event
  * @event_received: wait event status
  * @reset: reset control of the hdmi phy
+ * @ddc_adapt: i2c ddc adapter
+ * @colorspace: current colorspace selected
+ * @hdmi_mode: select framing for HDMI or DVI
  */
 struct sti_hdmi {
 	struct device dev;
@@ -64,6 +87,8 @@ struct sti_hdmi {
 	bool event_received;
 	struct reset_control *reset;
 	struct i2c_adapter *ddc_adapt;
+	enum hdmi_colorspace colorspace;
+	enum sti_hdmi_modes hdmi_mode;
 };
 
 u32 hdmi_read(struct sti_hdmi *hdmi, int offset);
diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index 1d3c3d029603..e05b0dc523ff 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c
@@ -4,14 +4,11 @@
  * License terms:  GNU General Public License (GPL), version 2
  */
 
-#include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/firmware.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
 #include <linux/reset.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 
@@ -329,8 +326,6 @@ struct sti_hqvdp_cmd {
  * @reset:             reset control
  * @vtg_nb:            notifier to handle VTG Vsync
  * @btm_field_pending: is there any bottom field (interlaced frame) to display
- * @curr_field_count:  number of field updates
- * @last_field_count:  number of field updates since last fps measure
  * @hqvdp_cmd:         buffer of commands
  * @hqvdp_cmd_paddr:   physical address of hqvdp_cmd
  * @vtg:               vtg for main data path
@@ -346,10 +341,8 @@ struct sti_hqvdp {
 	struct reset_control *reset;
 	struct notifier_block vtg_nb;
 	bool btm_field_pending;
-	unsigned int curr_field_count;
-	unsigned int last_field_count;
 	void *hqvdp_cmd;
-	dma_addr_t hqvdp_cmd_paddr;
+	u32 hqvdp_cmd_paddr;
 	struct sti_vtg *vtg;
 	bool xp70_initialized;
 };
@@ -372,8 +365,8 @@ static const uint32_t hqvdp_supported_formats[] = {
  */
 static int sti_hqvdp_get_free_cmd(struct sti_hqvdp *hqvdp)
 {
-	int curr_cmd, next_cmd;
-	dma_addr_t cmd = hqvdp->hqvdp_cmd_paddr;
+	u32 curr_cmd, next_cmd;
+	u32 cmd = hqvdp->hqvdp_cmd_paddr;
 	int i;
 
 	curr_cmd = readl(hqvdp->regs + HQVDP_MBX_CURRENT_CMD);
@@ -400,8 +393,8 @@ static int sti_hqvdp_get_free_cmd(struct sti_hqvdp *hqvdp)
  */
 static int sti_hqvdp_get_curr_cmd(struct sti_hqvdp *hqvdp)
 {
-	int curr_cmd;
-	dma_addr_t cmd = hqvdp->hqvdp_cmd_paddr;
+	u32 curr_cmd;
+	u32 cmd = hqvdp->hqvdp_cmd_paddr;
 	unsigned int i;
 
 	curr_cmd = readl(hqvdp->regs + HQVDP_MBX_CURRENT_CMD);
@@ -417,6 +410,246 @@ static int sti_hqvdp_get_curr_cmd(struct sti_hqvdp *hqvdp)
 }
 
 /**
+ * sti_hqvdp_get_next_cmd
+ * @hqvdp: hqvdp structure
+ *
+ * Look for the next hqvdp_cmd that will be used by the FW.
+ *
+ * RETURNS:
+ *  the offset of the next command that will be used.
+ * -1 in error cases
+ */
+static int sti_hqvdp_get_next_cmd(struct sti_hqvdp *hqvdp)
+{
+	int next_cmd;
+	dma_addr_t cmd = hqvdp->hqvdp_cmd_paddr;
+	unsigned int i;
+
+	next_cmd = readl(hqvdp->regs + HQVDP_MBX_NEXT_CMD);
+
+	for (i = 0; i < NB_VDP_CMD; i++) {
+		if (cmd == next_cmd)
+			return i * sizeof(struct sti_hqvdp_cmd);
+
+		cmd += sizeof(struct sti_hqvdp_cmd);
+	}
+
+	return -1;
+}
+
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(hqvdp->regs + reg))
+
+static const char *hqvdp_dbg_get_lut(u32 *coef)
+{
+	if (!memcmp(coef, coef_lut_a_legacy, 16))
+		return "LUT A";
+	if (!memcmp(coef, coef_lut_b, 16))
+		return "LUT B";
+	if (!memcmp(coef, coef_lut_c_y_legacy, 16))
+		return "LUT C Y";
+	if (!memcmp(coef, coef_lut_c_c_legacy, 16))
+		return "LUT C C";
+	if (!memcmp(coef, coef_lut_d_y_legacy, 16))
+		return "LUT D Y";
+	if (!memcmp(coef, coef_lut_d_c_legacy, 16))
+		return "LUT D C";
+	if (!memcmp(coef, coef_lut_e_y_legacy, 16))
+		return "LUT E Y";
+	if (!memcmp(coef, coef_lut_e_c_legacy, 16))
+		return "LUT E C";
+	if (!memcmp(coef, coef_lut_f_y_legacy, 16))
+		return "LUT F Y";
+	if (!memcmp(coef, coef_lut_f_c_legacy, 16))
+		return "LUT F C";
+	return "<UNKNOWN>";
+}
+
+static void hqvdp_dbg_dump_cmd(struct seq_file *s, struct sti_hqvdp_cmd *c)
+{
+	int src_w, src_h, dst_w, dst_h;
+
+	seq_puts(s, "\n\tTOP:");
+	seq_printf(s, "\n\t %-20s 0x%08X", "Config", c->top.config);
+	switch (c->top.config) {
+	case TOP_CONFIG_PROGRESSIVE:
+		seq_puts(s, "\tProgressive");
+		break;
+	case TOP_CONFIG_INTER_TOP:
+		seq_puts(s, "\tInterlaced, top field");
+		break;
+	case TOP_CONFIG_INTER_BTM:
+		seq_puts(s, "\tInterlaced, bottom field");
+		break;
+	default:
+		seq_puts(s, "\t<UNKNOWN>");
+		break;
+	}
+
+	seq_printf(s, "\n\t %-20s 0x%08X", "MemFormat", c->top.mem_format);
+	seq_printf(s, "\n\t %-20s 0x%08X", "CurrentY", c->top.current_luma);
+	seq_printf(s, "\n\t %-20s 0x%08X", "CurrentC", c->top.current_chroma);
+	seq_printf(s, "\n\t %-20s 0x%08X", "YSrcPitch", c->top.luma_src_pitch);
+	seq_printf(s, "\n\t %-20s 0x%08X", "CSrcPitch",
+		   c->top.chroma_src_pitch);
+	seq_printf(s, "\n\t %-20s 0x%08X", "InputFrameSize",
+		   c->top.input_frame_size);
+	seq_printf(s, "\t%dx%d",
+		   c->top.input_frame_size & 0x0000FFFF,
+		   c->top.input_frame_size >> 16);
+	seq_printf(s, "\n\t %-20s 0x%08X", "InputViewportSize",
+		   c->top.input_viewport_size);
+	src_w = c->top.input_viewport_size & 0x0000FFFF;
+	src_h = c->top.input_viewport_size >> 16;
+	seq_printf(s, "\t%dx%d", src_w, src_h);
+
+	seq_puts(s, "\n\tHVSRC:");
+	seq_printf(s, "\n\t %-20s 0x%08X", "OutputPictureSize",
+		   c->hvsrc.output_picture_size);
+	dst_w = c->hvsrc.output_picture_size & 0x0000FFFF;
+	dst_h = c->hvsrc.output_picture_size >> 16;
+	seq_printf(s, "\t%dx%d", dst_w, dst_h);
+	seq_printf(s, "\n\t %-20s 0x%08X", "ParamCtrl", c->hvsrc.param_ctrl);
+
+	seq_printf(s, "\n\t %-20s %s", "yh_coef",
+		   hqvdp_dbg_get_lut(c->hvsrc.yh_coef));
+	seq_printf(s, "\n\t %-20s %s", "ch_coef",
+		   hqvdp_dbg_get_lut(c->hvsrc.ch_coef));
+	seq_printf(s, "\n\t %-20s %s", "yv_coef",
+		   hqvdp_dbg_get_lut(c->hvsrc.yv_coef));
+	seq_printf(s, "\n\t %-20s %s", "cv_coef",
+		   hqvdp_dbg_get_lut(c->hvsrc.cv_coef));
+
+	seq_printf(s, "\n\t %-20s", "ScaleH");
+	if (dst_w > src_w)
+		seq_printf(s, " %d/1", dst_w / src_w);
+	else
+		seq_printf(s, " 1/%d", src_w / dst_w);
+
+	seq_printf(s, "\n\t %-20s", "tScaleV");
+	if (dst_h > src_h)
+		seq_printf(s, " %d/1", dst_h / src_h);
+	else
+		seq_printf(s, " 1/%d", src_h / dst_h);
+
+	seq_puts(s, "\n\tCSDI:");
+	seq_printf(s, "\n\t %-20s 0x%08X\t", "Config", c->csdi.config);
+	switch (c->csdi.config) {
+	case CSDI_CONFIG_PROG:
+		seq_puts(s, "Bypass");
+		break;
+	case CSDI_CONFIG_INTER_DIR:
+		seq_puts(s, "Deinterlace, directional");
+		break;
+	default:
+		seq_puts(s, "<UNKNOWN>");
+		break;
+	}
+
+	seq_printf(s, "\n\t %-20s 0x%08X", "Config2", c->csdi.config2);
+	seq_printf(s, "\n\t %-20s 0x%08X", "DcdiConfig", c->csdi.dcdi_config);
+}
+
+static int hqvdp_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_hqvdp *hqvdp = (struct sti_hqvdp *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int cmd, cmd_offset, infoxp70;
+	void *virt;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s: (vaddr = 0x%p)",
+		   sti_plane_to_str(&hqvdp->plane), hqvdp->regs);
+
+	DBGFS_DUMP(HQVDP_MBX_IRQ_TO_XP70);
+	DBGFS_DUMP(HQVDP_MBX_INFO_HOST);
+	DBGFS_DUMP(HQVDP_MBX_IRQ_TO_HOST);
+	DBGFS_DUMP(HQVDP_MBX_INFO_XP70);
+	infoxp70 = readl(hqvdp->regs + HQVDP_MBX_INFO_XP70);
+	seq_puts(s, "\tFirmware state: ");
+	if (infoxp70 & INFO_XP70_FW_READY)
+		seq_puts(s, "idle and ready");
+	else if (infoxp70 & INFO_XP70_FW_PROCESSING)
+		seq_puts(s, "processing a picture");
+	else if (infoxp70 & INFO_XP70_FW_INITQUEUES)
+		seq_puts(s, "programming queues");
+	else
+		seq_puts(s, "NOT READY");
+
+	DBGFS_DUMP(HQVDP_MBX_SW_RESET_CTRL);
+	DBGFS_DUMP(HQVDP_MBX_STARTUP_CTRL1);
+	if (readl(hqvdp->regs + HQVDP_MBX_STARTUP_CTRL1)
+					& STARTUP_CTRL1_RST_DONE)
+		seq_puts(s, "\tReset is done");
+	else
+		seq_puts(s, "\tReset is NOT done");
+	DBGFS_DUMP(HQVDP_MBX_STARTUP_CTRL2);
+	if (readl(hqvdp->regs + HQVDP_MBX_STARTUP_CTRL2)
+					& STARTUP_CTRL2_FETCH_EN)
+		seq_puts(s, "\tFetch is enabled");
+	else
+		seq_puts(s, "\tFetch is NOT enabled");
+	DBGFS_DUMP(HQVDP_MBX_GP_STATUS);
+	DBGFS_DUMP(HQVDP_MBX_NEXT_CMD);
+	DBGFS_DUMP(HQVDP_MBX_CURRENT_CMD);
+	DBGFS_DUMP(HQVDP_MBX_SOFT_VSYNC);
+	if (!(readl(hqvdp->regs + HQVDP_MBX_SOFT_VSYNC) & 3))
+		seq_puts(s, "\tHW Vsync");
+	else
+		seq_puts(s, "\tSW Vsync ?!?!");
+
+	/* Last command */
+	cmd = readl(hqvdp->regs + HQVDP_MBX_CURRENT_CMD);
+	cmd_offset = sti_hqvdp_get_curr_cmd(hqvdp);
+	if (cmd_offset == -1) {
+		seq_puts(s, "\n\n  Last command: unknown");
+	} else {
+		virt = hqvdp->hqvdp_cmd + cmd_offset;
+		seq_printf(s, "\n\n  Last command: address @ 0x%x (0x%p)",
+			   cmd, virt);
+		hqvdp_dbg_dump_cmd(s, (struct sti_hqvdp_cmd *)virt);
+	}
+
+	/* Next command */
+	cmd = readl(hqvdp->regs + HQVDP_MBX_NEXT_CMD);
+	cmd_offset = sti_hqvdp_get_next_cmd(hqvdp);
+	if (cmd_offset == -1) {
+		seq_puts(s, "\n\n  Next command: unknown");
+	} else {
+		virt = hqvdp->hqvdp_cmd + cmd_offset;
+		seq_printf(s, "\n\n  Next command address: @ 0x%x (0x%p)",
+			   cmd, virt);
+		hqvdp_dbg_dump_cmd(s, (struct sti_hqvdp_cmd *)virt);
+	}
+
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list hqvdp_debugfs_files[] = {
+	{ "hqvdp", hqvdp_dbg_show, 0, NULL },
+};
+
+static int hqvdp_debugfs_init(struct sti_hqvdp *hqvdp, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(hqvdp_debugfs_files); i++)
+		hqvdp_debugfs_files[i].data = hqvdp;
+
+	return drm_debugfs_create_files(hqvdp_debugfs_files,
+					ARRAY_SIZE(hqvdp_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
+/**
  * sti_hqvdp_update_hvsrc
  * @orient: horizontal or vertical
  * @scale:  scaling/zoom factor
@@ -580,7 +813,7 @@ int sti_hqvdp_vtg_cb(struct notifier_block *nb, unsigned long evt, void *data)
 		btm_cmd_offset = sti_hqvdp_get_free_cmd(hqvdp);
 		top_cmd_offest = sti_hqvdp_get_curr_cmd(hqvdp);
 		if ((btm_cmd_offset == -1) || (top_cmd_offest == -1)) {
-			DRM_ERROR("Cannot get cmds, skip btm field\n");
+			DRM_DEBUG_DRIVER("Warning: no cmd, will skip field\n");
 			return -EBUSY;
 		}
 
@@ -599,11 +832,12 @@ int sti_hqvdp_vtg_cb(struct notifier_block *nb, unsigned long evt, void *data)
 		writel(hqvdp->hqvdp_cmd_paddr + btm_cmd_offset,
 				hqvdp->regs + HQVDP_MBX_NEXT_CMD);
 
-		hqvdp->curr_field_count++;
 		hqvdp->btm_field_pending = false;
 
 		dev_dbg(hqvdp->dev, "%s Posted command:0x%x\n",
 				__func__, hqvdp->hqvdp_cmd_paddr);
+
+		sti_plane_update_fps(&hqvdp->plane, false, true);
 	}
 
 	return 0;
@@ -612,19 +846,21 @@ int sti_hqvdp_vtg_cb(struct notifier_block *nb, unsigned long evt, void *data)
 static void sti_hqvdp_init(struct sti_hqvdp *hqvdp)
 {
 	int size;
+	dma_addr_t dma_addr;
 
 	hqvdp->vtg_nb.notifier_call = sti_hqvdp_vtg_cb;
 
 	/* Allocate memory for the VDP commands */
 	size = NB_VDP_CMD * sizeof(struct sti_hqvdp_cmd);
 	hqvdp->hqvdp_cmd = dma_alloc_wc(hqvdp->dev, size,
-					&hqvdp->hqvdp_cmd_paddr,
+					&dma_addr,
 					GFP_KERNEL | GFP_DMA);
 	if (!hqvdp->hqvdp_cmd) {
 		DRM_ERROR("Failed to allocate memory for VDP cmd\n");
 		return;
 	}
 
+	hqvdp->hqvdp_cmd_paddr = (u32)dma_addr;
 	memset(hqvdp->hqvdp_cmd, 0, size);
 }
 
@@ -670,7 +906,7 @@ static void sti_hqvdp_start_xp70(struct sti_hqvdp *hqvdp)
 	DRM_DEBUG_DRIVER("\n");
 
 	if (hqvdp->xp70_initialized) {
-		DRM_INFO("HQVDP XP70 already initialized\n");
+		DRM_DEBUG_DRIVER("HQVDP XP70 already initialized\n");
 		return;
 	}
 
@@ -775,53 +1011,131 @@ out:
 	release_firmware(firmware);
 }
 
-static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
-				    struct drm_plane_state *oldstate)
+static int sti_hqvdp_atomic_check(struct drm_plane *drm_plane,
+				  struct drm_plane_state *state)
 {
-	struct drm_plane_state *state = drm_plane->state;
 	struct sti_plane *plane = to_sti_plane(drm_plane);
 	struct sti_hqvdp *hqvdp = to_sti_hqvdp(plane);
 	struct drm_crtc *crtc = state->crtc;
-	struct sti_mixer *mixer = to_sti_mixer(crtc);
 	struct drm_framebuffer *fb = state->fb;
-	struct drm_display_mode *mode = &crtc->mode;
-	int dst_x = state->crtc_x;
-	int dst_y = state->crtc_y;
-	int dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
-	int dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
-	/* src_x are in 16.16 format */
-	int src_x = state->src_x >> 16;
-	int src_y = state->src_y >> 16;
-	int src_w = state->src_w >> 16;
-	int src_h = state->src_h >> 16;
 	bool first_prepare = plane->status == STI_PLANE_DISABLED ? true : false;
-	struct drm_gem_cma_object *cma_obj;
-	struct sti_hqvdp_cmd *cmd;
-	int scale_h, scale_v;
-	int cmd_offset;
+	struct drm_crtc_state *crtc_state;
+	struct drm_display_mode *mode;
+	int dst_x, dst_y, dst_w, dst_h;
+	int src_x, src_y, src_w, src_h;
+
+	/* no need for further checks if the plane is being disabled */
+	if (!crtc || !fb)
+		return 0;
+
+	crtc_state = drm_atomic_get_crtc_state(state->state, crtc);
+	mode = &crtc_state->mode;
+	dst_x = state->crtc_x;
+	dst_y = state->crtc_y;
+	dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
+	dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
+	/* src_x are in 16.16 format */
+	src_x = state->src_x >> 16;
+	src_y = state->src_y >> 16;
+	src_w = state->src_w >> 16;
+	src_h = state->src_h >> 16;
+
+	if (!sti_hqvdp_check_hw_scaling(hqvdp, mode,
+					src_w, src_h,
+					dst_w, dst_h)) {
+		DRM_ERROR("Scaling beyond HW capabilities\n");
+		return -EINVAL;
+	}
+
+	if (!drm_fb_cma_get_gem_obj(fb, 0)) {
+		DRM_ERROR("Can't get CMA GEM object for fb\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Input / output size
+	 * Align to upper even value
+	 */
+	dst_w = ALIGN(dst_w, 2);
+	dst_h = ALIGN(dst_h, 2);
+
+	if ((src_w > MAX_WIDTH) || (src_w < MIN_WIDTH) ||
+	    (src_h > MAX_HEIGHT) || (src_h < MIN_HEIGHT) ||
+	    (dst_w > MAX_WIDTH) || (dst_w < MIN_WIDTH) ||
+	    (dst_h > MAX_HEIGHT) || (dst_h < MIN_HEIGHT)) {
+		DRM_ERROR("Invalid in/out size %dx%d -> %dx%d\n",
+			  src_w, src_h,
+			  dst_w, dst_h);
+		return -EINVAL;
+	}
+
+	if (first_prepare) {
+		/* Start HQVDP XP70 coprocessor */
+		sti_hqvdp_start_xp70(hqvdp);
+
+		/* Prevent VTG shutdown */
+		if (clk_prepare_enable(hqvdp->clk_pix_main)) {
+			DRM_ERROR("Failed to prepare/enable pix main clk\n");
+			return -EINVAL;
+		}
+
+		/* Register VTG Vsync callback to handle bottom fields */
+		if (sti_vtg_register_client(hqvdp->vtg,
+					    &hqvdp->vtg_nb,
+					    crtc)) {
+			DRM_ERROR("Cannot register VTG notifier\n");
+			return -EINVAL;
+		}
+	}
 
 	DRM_DEBUG_KMS("CRTC:%d (%s) drm plane:%d (%s)\n",
-		      crtc->base.id, sti_mixer_to_str(mixer),
+		      crtc->base.id, sti_mixer_to_str(to_sti_mixer(crtc)),
 		      drm_plane->base.id, sti_plane_to_str(plane));
 	DRM_DEBUG_KMS("%s dst=(%dx%d)@(%d,%d) - src=(%dx%d)@(%d,%d)\n",
 		      sti_plane_to_str(plane),
 		      dst_w, dst_h, dst_x, dst_y,
 		      src_w, src_h, src_x, src_y);
 
+	return 0;
+}
+
+static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
+				    struct drm_plane_state *oldstate)
+{
+	struct drm_plane_state *state = drm_plane->state;
+	struct sti_plane *plane = to_sti_plane(drm_plane);
+	struct sti_hqvdp *hqvdp = to_sti_hqvdp(plane);
+	struct drm_crtc *crtc = state->crtc;
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_display_mode *mode;
+	int dst_x, dst_y, dst_w, dst_h;
+	int src_x, src_y, src_w, src_h;
+	struct drm_gem_cma_object *cma_obj;
+	struct sti_hqvdp_cmd *cmd;
+	int scale_h, scale_v;
+	int cmd_offset;
+
+	if (!crtc || !fb)
+		return;
+
+	mode = &crtc->mode;
+	dst_x = state->crtc_x;
+	dst_y = state->crtc_y;
+	dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
+	dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
+	/* src_x are in 16.16 format */
+	src_x = state->src_x >> 16;
+	src_y = state->src_y >> 16;
+	src_w = state->src_w >> 16;
+	src_h = state->src_h >> 16;
+
 	cmd_offset = sti_hqvdp_get_free_cmd(hqvdp);
 	if (cmd_offset == -1) {
-		DRM_ERROR("No available hqvdp_cmd now\n");
+		DRM_DEBUG_DRIVER("Warning: no cmd, will skip frame\n");
 		return;
 	}
 	cmd = hqvdp->hqvdp_cmd + cmd_offset;
 
-	if (!sti_hqvdp_check_hw_scaling(hqvdp, mode,
-					src_w, src_h,
-					dst_w, dst_h)) {
-		DRM_ERROR("Scaling beyond HW capabilities\n");
-		return;
-	}
-
 	/* Static parameters, defaulting to progressive mode */
 	cmd->top.config = TOP_CONFIG_PROGRESSIVE;
 	cmd->top.mem_format = TOP_MEM_FORMAT_DFLT;
@@ -836,10 +1150,6 @@ static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
 	cmd->iqi.pxf_conf = IQI_PXF_CONF_DFLT;
 
 	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
-	if (!cma_obj) {
-		DRM_ERROR("Can't get CMA GEM object for fb\n");
-		return;
-	}
 
 	DRM_DEBUG_DRIVER("drm FB:%d format:%.4s phys@:0x%lx\n", fb->base.id,
 			 (char *)&fb->pixel_format,
@@ -860,16 +1170,6 @@ static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
 	dst_w = ALIGN(dst_w, 2);
 	dst_h = ALIGN(dst_h, 2);
 
-	if ((src_w > MAX_WIDTH) || (src_w < MIN_WIDTH) ||
-	    (src_h > MAX_HEIGHT) || (src_h < MIN_HEIGHT) ||
-	    (dst_w > MAX_WIDTH) || (dst_w < MIN_WIDTH) ||
-	    (dst_h > MAX_HEIGHT) || (dst_h < MIN_HEIGHT)) {
-		DRM_ERROR("Invalid in/out size %dx%d -> %dx%d\n",
-			  src_w, src_h,
-			  dst_w, dst_h);
-		return;
-	}
-
 	cmd->top.input_viewport_size = src_h << 16 | src_w;
 	cmd->top.input_frame_size = src_h << 16 | src_w;
 	cmd->hvsrc.output_picture_size = dst_h << 16 | dst_w;
@@ -900,30 +1200,9 @@ static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
 	scale_v = SCALE_FACTOR * dst_h / src_h;
 	sti_hqvdp_update_hvsrc(HVSRC_VERT, scale_v, &cmd->hvsrc);
 
-	if (first_prepare) {
-		/* Start HQVDP XP70 coprocessor */
-		sti_hqvdp_start_xp70(hqvdp);
-
-		/* Prevent VTG shutdown */
-		if (clk_prepare_enable(hqvdp->clk_pix_main)) {
-			DRM_ERROR("Failed to prepare/enable pix main clk\n");
-			return;
-		}
-
-		/* Register VTG Vsync callback to handle bottom fields */
-		if (sti_vtg_register_client(hqvdp->vtg,
-					    &hqvdp->vtg_nb,
-					    crtc)) {
-			DRM_ERROR("Cannot register VTG notifier\n");
-			return;
-		}
-	}
-
 	writel(hqvdp->hqvdp_cmd_paddr + cmd_offset,
 	       hqvdp->regs + HQVDP_MBX_NEXT_CMD);
 
-	hqvdp->curr_field_count++;
-
 	/* Interlaced : get ready to display the bottom field at next Vsync */
 	if (fb->flags & DRM_MODE_FB_INTERLACED)
 		hqvdp->btm_field_pending = true;
@@ -931,6 +1210,8 @@ static void sti_hqvdp_atomic_update(struct drm_plane *drm_plane,
 	dev_dbg(hqvdp->dev, "%s Posted command:0x%x\n",
 		__func__, hqvdp->hqvdp_cmd_paddr + cmd_offset);
 
+	sti_plane_update_fps(plane, true, true);
+
 	plane->status = STI_PLANE_UPDATED;
 }
 
@@ -938,7 +1219,6 @@ static void sti_hqvdp_atomic_disable(struct drm_plane *drm_plane,
 				     struct drm_plane_state *oldstate)
 {
 	struct sti_plane *plane = to_sti_plane(drm_plane);
-	struct sti_mixer *mixer = to_sti_mixer(drm_plane->crtc);
 
 	if (!drm_plane->crtc) {
 		DRM_DEBUG_DRIVER("drm plane:%d not enabled\n",
@@ -947,13 +1227,15 @@ static void sti_hqvdp_atomic_disable(struct drm_plane *drm_plane,
 	}
 
 	DRM_DEBUG_DRIVER("CRTC:%d (%s) drm plane:%d (%s)\n",
-			 drm_plane->crtc->base.id, sti_mixer_to_str(mixer),
+			 drm_plane->crtc->base.id,
+			 sti_mixer_to_str(to_sti_mixer(drm_plane->crtc)),
 			 drm_plane->base.id, sti_plane_to_str(plane));
 
 	plane->status = STI_PLANE_DISABLING;
 }
 
 static const struct drm_plane_helper_funcs sti_hqvdp_helpers_funcs = {
+	.atomic_check = sti_hqvdp_atomic_check,
 	.atomic_update = sti_hqvdp_atomic_update,
 	.atomic_disable = sti_hqvdp_atomic_disable,
 };
@@ -983,6 +1265,9 @@ static struct drm_plane *sti_hqvdp_create(struct drm_device *drm_dev,
 
 	sti_plane_init_property(&hqvdp->plane, DRM_PLANE_TYPE_OVERLAY);
 
+	if (hqvdp_debugfs_init(hqvdp, drm_dev->primary))
+		DRM_ERROR("HQVDP debugfs setup failed\n");
+
 	return &hqvdp->plane.drm_plane;
 }
 
diff --git a/drivers/gpu/drm/sti/sti_mixer.c b/drivers/gpu/drm/sti/sti_mixer.c
index 49db835dce03..e7425c38fc93 100644
--- a/drivers/gpu/drm/sti/sti_mixer.c
+++ b/drivers/gpu/drm/sti/sti_mixer.c
@@ -75,6 +75,145 @@ static inline void sti_mixer_reg_write(struct sti_mixer *mixer,
 	writel(val, mixer->regs + reg_id);
 }
 
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   sti_mixer_reg_read(mixer, reg))
+
+static void mixer_dbg_ctl(struct seq_file *s, int val)
+{
+	unsigned int i;
+	int count = 0;
+	char *const disp_layer[] = {"BKG", "VID0", "VID1", "GDP0",
+				    "GDP1", "GDP2", "GDP3"};
+
+	seq_puts(s, "\tEnabled: ");
+	for (i = 0; i < 7; i++) {
+		if (val & 1) {
+			seq_printf(s, "%s ", disp_layer[i]);
+			count++;
+		}
+		val = val >> 1;
+	}
+
+	val = val >> 2;
+	if (val & 1) {
+		seq_puts(s, "CURS ");
+		count++;
+	}
+	if (!count)
+		seq_puts(s, "Nothing");
+}
+
+static void mixer_dbg_crb(struct seq_file *s, int val)
+{
+	int i;
+
+	seq_puts(s, "\tDepth: ");
+	for (i = 0; i < GAM_MIXER_NB_DEPTH_LEVEL; i++) {
+		switch (val & GAM_DEPTH_MASK_ID) {
+		case GAM_DEPTH_VID0_ID:
+			seq_puts(s, "VID0");
+			break;
+		case GAM_DEPTH_VID1_ID:
+			seq_puts(s, "VID1");
+			break;
+		case GAM_DEPTH_GDP0_ID:
+			seq_puts(s, "GDP0");
+			break;
+		case GAM_DEPTH_GDP1_ID:
+			seq_puts(s, "GDP1");
+			break;
+		case GAM_DEPTH_GDP2_ID:
+			seq_puts(s, "GDP2");
+			break;
+		case GAM_DEPTH_GDP3_ID:
+			seq_puts(s, "GDP3");
+			break;
+		default:
+			seq_puts(s, "---");
+		}
+
+		if (i < GAM_MIXER_NB_DEPTH_LEVEL - 1)
+			seq_puts(s, " < ");
+		val = val >> 3;
+	}
+}
+
+static void mixer_dbg_mxn(struct seq_file *s, void *addr)
+{
+	int i;
+
+	for (i = 1; i < 8; i++)
+		seq_printf(s, "-0x%08X", (int)readl(addr + i * 4));
+}
+
+static int mixer_dbg_show(struct seq_file *s, void *arg)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_mixer *mixer = (struct sti_mixer *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "%s: (vaddr = 0x%p)",
+		   sti_mixer_to_str(mixer), mixer->regs);
+
+	DBGFS_DUMP(GAM_MIXER_CTL);
+	mixer_dbg_ctl(s, sti_mixer_reg_read(mixer, GAM_MIXER_CTL));
+	DBGFS_DUMP(GAM_MIXER_BKC);
+	DBGFS_DUMP(GAM_MIXER_BCO);
+	DBGFS_DUMP(GAM_MIXER_BCS);
+	DBGFS_DUMP(GAM_MIXER_AVO);
+	DBGFS_DUMP(GAM_MIXER_AVS);
+	DBGFS_DUMP(GAM_MIXER_CRB);
+	mixer_dbg_crb(s, sti_mixer_reg_read(mixer, GAM_MIXER_CRB));
+	DBGFS_DUMP(GAM_MIXER_ACT);
+	DBGFS_DUMP(GAM_MIXER_MBP);
+	DBGFS_DUMP(GAM_MIXER_MX0);
+	mixer_dbg_mxn(s, mixer->regs + GAM_MIXER_MX0);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list mixer0_debugfs_files[] = {
+	{ "mixer_main", mixer_dbg_show, 0, NULL },
+};
+
+static struct drm_info_list mixer1_debugfs_files[] = {
+	{ "mixer_aux", mixer_dbg_show, 0, NULL },
+};
+
+static int mixer_debugfs_init(struct sti_mixer *mixer, struct drm_minor *minor)
+{
+	unsigned int i;
+	struct drm_info_list *mixer_debugfs_files;
+	int nb_files;
+
+	switch (mixer->id) {
+	case STI_MIXER_MAIN:
+		mixer_debugfs_files = mixer0_debugfs_files;
+		nb_files = ARRAY_SIZE(mixer0_debugfs_files);
+		break;
+	case STI_MIXER_AUX:
+		mixer_debugfs_files = mixer1_debugfs_files;
+		nb_files = ARRAY_SIZE(mixer1_debugfs_files);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	for (i = 0; i < nb_files; i++)
+		mixer_debugfs_files[i].data = mixer;
+
+	return drm_debugfs_create_files(mixer_debugfs_files,
+					nb_files,
+					minor->debugfs_root, minor);
+}
+
 void sti_mixer_set_background_status(struct sti_mixer *mixer, bool enable)
 {
 	u32 val = sti_mixer_reg_read(mixer, GAM_MIXER_CTL);
@@ -237,7 +376,9 @@ void sti_mixer_set_matrix(struct sti_mixer *mixer)
 				    mixerColorSpaceMatIdentity[i]);
 }
 
-struct sti_mixer *sti_mixer_create(struct device *dev, int id,
+struct sti_mixer *sti_mixer_create(struct device *dev,
+				   struct drm_device *drm_dev,
+				   int id,
 				   void __iomem *baseaddr)
 {
 	struct sti_mixer *mixer = devm_kzalloc(dev, sizeof(*mixer), GFP_KERNEL);
@@ -258,5 +399,8 @@ struct sti_mixer *sti_mixer_create(struct device *dev, int id,
 	DRM_DEBUG_DRIVER("%s created. Regs=%p\n",
 			 sti_mixer_to_str(mixer), mixer->regs);
 
+	if (mixer_debugfs_init(mixer, drm_dev->primary))
+		DRM_ERROR("MIXER debugfs setup failed\n");
+
 	return mixer;
 }
diff --git a/drivers/gpu/drm/sti/sti_mixer.h b/drivers/gpu/drm/sti/sti_mixer.h
index efb1a9a5ba86..6f35fc086873 100644
--- a/drivers/gpu/drm/sti/sti_mixer.h
+++ b/drivers/gpu/drm/sti/sti_mixer.h
@@ -42,7 +42,9 @@ struct sti_mixer {
 
 const char *sti_mixer_to_str(struct sti_mixer *mixer);
 
-struct sti_mixer *sti_mixer_create(struct device *dev, int id,
+struct sti_mixer *sti_mixer_create(struct device *dev,
+				   struct drm_device *drm_dev,
+				   int id,
 				   void __iomem *baseaddr);
 
 int sti_mixer_set_plane_status(struct sti_mixer *mixer,
diff --git a/drivers/gpu/drm/sti/sti_plane.c b/drivers/gpu/drm/sti/sti_plane.c
index 2e5c751910c5..f10c98d3f012 100644
--- a/drivers/gpu/drm/sti/sti_plane.c
+++ b/drivers/gpu/drm/sti/sti_plane.c
@@ -43,6 +43,69 @@ const char *sti_plane_to_str(struct sti_plane *plane)
 	}
 }
 
+#define STI_FPS_INTERVAL_MS     3000
+
+static int sti_plane_timespec_ms_diff(struct timespec lhs, struct timespec rhs)
+{
+	struct timespec tmp_ts = timespec_sub(lhs, rhs);
+	u64 tmp_ns = (u64)timespec_to_ns(&tmp_ts);
+
+	do_div(tmp_ns, NSEC_PER_MSEC);
+
+	return (u32)tmp_ns;
+}
+
+void sti_plane_update_fps(struct sti_plane *plane,
+			  bool new_frame,
+			  bool new_field)
+{
+	struct timespec now;
+	struct sti_fps_info *fps;
+	int fpks, fipks, ms_since_last, num_frames, num_fields;
+
+	getrawmonotonic(&now);
+
+	/* Compute number of frame updates */
+	fps = &plane->fps_info;
+
+	if (new_field)
+		fps->curr_field_counter++;
+
+	/* do not perform fps calcul if new_frame is false */
+	if (!new_frame)
+		return;
+
+	fps->curr_frame_counter++;
+	ms_since_last = sti_plane_timespec_ms_diff(now, fps->last_timestamp);
+	num_frames = fps->curr_frame_counter - fps->last_frame_counter;
+
+	if (num_frames <= 0  || ms_since_last < STI_FPS_INTERVAL_MS)
+		return;
+
+	fps->last_timestamp = now;
+	fps->last_frame_counter = fps->curr_frame_counter;
+	fpks = (num_frames * 1000000) / ms_since_last;
+	snprintf(plane->fps_info.fps_str, FPS_LENGTH, "%-6s @ %d.%.3d fps",
+		 sti_plane_to_str(plane), fpks / 1000, fpks % 1000);
+
+	if (fps->curr_field_counter) {
+		/* Compute number of field updates */
+		num_fields = fps->curr_field_counter - fps->last_field_counter;
+		fps->last_field_counter = fps->curr_field_counter;
+		fipks = (num_fields * 1000000) / ms_since_last;
+		snprintf(plane->fps_info.fips_str,
+			 FPS_LENGTH, " - %d.%.3d field/sec",
+			 fipks / 1000, fipks % 1000);
+	} else {
+		plane->fps_info.fips_str[0] = '\0';
+	}
+
+	if (fps->output)
+		DRM_INFO("%s%s\n",
+			 plane->fps_info.fps_str,
+			 plane->fps_info.fips_str);
+}
+
 static void sti_plane_destroy(struct drm_plane *drm_plane)
 {
 	DRM_DEBUG_DRIVER("\n");
diff --git a/drivers/gpu/drm/sti/sti_plane.h b/drivers/gpu/drm/sti/sti_plane.h
index 86f1e6fc81b9..c50a3b9f5d37 100644
--- a/drivers/gpu/drm/sti/sti_plane.h
+++ b/drivers/gpu/drm/sti/sti_plane.h
@@ -50,6 +50,18 @@ enum sti_plane_status {
 	STI_PLANE_DISABLED,
 };
 
+#define FPS_LENGTH 64
+struct sti_fps_info {
+	bool output;
+	unsigned int curr_frame_counter;
+	unsigned int last_frame_counter;
+	unsigned int curr_field_counter;
+	unsigned int last_field_counter;
+	struct timespec last_timestamp;
+	char fps_str[FPS_LENGTH];
+	char fips_str[FPS_LENGTH];
+};
+
 /**
  * STI plane structure
  *
@@ -57,15 +69,20 @@ enum sti_plane_status {
  * @desc:               plane type & id
  * @status:             to know the status of the plane
  * @zorder:             plane z-order
+ * @fps_info:           frame per second info
  */
 struct sti_plane {
 	struct drm_plane drm_plane;
 	enum sti_plane_desc desc;
 	enum sti_plane_status status;
 	int zorder;
+	struct sti_fps_info fps_info;
 };
 
 const char *sti_plane_to_str(struct sti_plane *plane);
+void sti_plane_update_fps(struct sti_plane *plane,
+			  bool new_frame,
+			  bool new_field);
 void sti_plane_init_property(struct sti_plane *plane,
 			     enum drm_plane_type type);
 #endif
diff --git a/drivers/gpu/drm/sti/sti_tvout.c b/drivers/gpu/drm/sti/sti_tvout.c
index f2afcf5438b8..2c99016443e5 100644
--- a/drivers/gpu/drm/sti/sti_tvout.c
+++ b/drivers/gpu/drm/sti/sti_tvout.c
@@ -17,6 +17,7 @@
 #include <drm/drm_crtc_helper.h>
 
 #include "sti_crtc.h"
+#include "sti_vtg.h"
 
 /* glue registers */
 #define TVO_CSC_MAIN_M0                  0x000
@@ -85,19 +86,7 @@
 #define TVO_VIP_SEL_INPUT_BYPASSED       1
 
 #define TVO_SYNC_MAIN_VTG_SET_REF        0x00
-#define TVO_SYNC_MAIN_VTG_SET_1          0x01
-#define TVO_SYNC_MAIN_VTG_SET_2          0x02
-#define TVO_SYNC_MAIN_VTG_SET_3          0x03
-#define TVO_SYNC_MAIN_VTG_SET_4          0x04
-#define TVO_SYNC_MAIN_VTG_SET_5          0x05
-#define TVO_SYNC_MAIN_VTG_SET_6          0x06
 #define TVO_SYNC_AUX_VTG_SET_REF         0x10
-#define TVO_SYNC_AUX_VTG_SET_1           0x11
-#define TVO_SYNC_AUX_VTG_SET_2           0x12
-#define TVO_SYNC_AUX_VTG_SET_3           0x13
-#define TVO_SYNC_AUX_VTG_SET_4           0x14
-#define TVO_SYNC_AUX_VTG_SET_5           0x15
-#define TVO_SYNC_AUX_VTG_SET_6           0x16
 
 #define TVO_SYNC_HD_DCS_SHIFT            8
 
@@ -106,6 +95,8 @@
 
 #define ENCODER_CRTC_MASK                (BIT(0) | BIT(1))
 
+#define TVO_MIN_HD_HEIGHT                720
+
 /* enum listing the supported output data format */
 enum sti_tvout_video_out_type {
 	STI_TVOUT_VIDEO_OUT_RGB,
@@ -269,6 +260,31 @@ static void tvout_vip_set_in_vid_fmt(struct sti_tvout *tvout,
 }
 
 /**
+ * Set preformatter matrix
+ *
+ * @tvout: tvout structure
+ * @mode: display mode structure
+ */
+static void tvout_preformatter_set_matrix(struct sti_tvout *tvout,
+					  struct drm_display_mode *mode)
+{
+	unsigned int i;
+	const u32 *pf_matrix;
+
+	if (mode->vdisplay >= TVO_MIN_HD_HEIGHT)
+		pf_matrix = rgb_to_ycbcr_709;
+	else
+		pf_matrix = rgb_to_ycbcr_601;
+
+	for (i = 0; i < 8; i++) {
+		tvout_write(tvout, *(pf_matrix + i),
+			    TVO_CSC_MAIN_M0 + (i * 4));
+		tvout_write(tvout, *(pf_matrix + i),
+			    TVO_CSC_AUX_M0 + (i * 4));
+	}
+}
+
+/**
  * Start VIP block for DVO output
  *
  * @tvout: pointer on tvout structure
@@ -280,24 +296,26 @@ static void tvout_dvo_start(struct sti_tvout *tvout, bool main_path)
 	struct device_node *node = tvout->dev->of_node;
 	bool sel_input_logic_inverted = false;
 	u32 tvo_in_vid_format;
-	int val;
+	int val, tmp;
 
 	dev_dbg(tvout->dev, "%s\n", __func__);
 
 	if (main_path) {
 		DRM_DEBUG_DRIVER("main vip for DVO\n");
-		/* Select the input sync for dvo = VTG set 4 */
-		val  = TVO_SYNC_MAIN_VTG_SET_4 << TVO_SYNC_DVO_PAD_VSYNC_SHIFT;
-		val |= TVO_SYNC_MAIN_VTG_SET_4 << TVO_SYNC_DVO_PAD_HSYNC_SHIFT;
-		val |= TVO_SYNC_MAIN_VTG_SET_4;
+		/* Select the input sync for dvo */
+		tmp = TVO_SYNC_MAIN_VTG_SET_REF | VTG_SYNC_ID_DVO;
+		val  = tmp << TVO_SYNC_DVO_PAD_VSYNC_SHIFT;
+		val |= tmp << TVO_SYNC_DVO_PAD_HSYNC_SHIFT;
+		val |= tmp;
 		tvout_write(tvout, val, TVO_DVO_SYNC_SEL);
 		tvo_in_vid_format = TVO_MAIN_IN_VID_FORMAT;
 	} else {
 		DRM_DEBUG_DRIVER("aux vip for DVO\n");
-		/* Select the input sync for dvo = VTG set 4 */
-		val  = TVO_SYNC_AUX_VTG_SET_4 << TVO_SYNC_DVO_PAD_VSYNC_SHIFT;
-		val |= TVO_SYNC_AUX_VTG_SET_4 << TVO_SYNC_DVO_PAD_HSYNC_SHIFT;
-		val |= TVO_SYNC_AUX_VTG_SET_4;
+		/* Select the input sync for dvo */
+		tmp = TVO_SYNC_AUX_VTG_SET_REF | VTG_SYNC_ID_DVO;
+		val  = tmp << TVO_SYNC_DVO_PAD_VSYNC_SHIFT;
+		val |= tmp << TVO_SYNC_DVO_PAD_HSYNC_SHIFT;
+		val |= tmp;
 		tvout_write(tvout, val, TVO_DVO_SYNC_SEL);
 		tvo_in_vid_format = TVO_AUX_IN_VID_FORMAT;
 	}
@@ -308,9 +326,8 @@ static void tvout_dvo_start(struct sti_tvout *tvout, bool main_path)
 				  TVO_VIP_REORDER_Y_G_SEL,
 				  TVO_VIP_REORDER_CB_B_SEL);
 
-	/* Set clipping mode (Limited range RGB/Y) */
-	tvout_vip_set_clip_mode(tvout, TVO_VIP_DVO,
-				TVO_VIP_CLIP_LIMITED_RANGE_RGB_Y);
+	/* Set clipping mode */
+	tvout_vip_set_clip_mode(tvout, TVO_VIP_DVO, TVO_VIP_CLIP_DISABLED);
 
 	/* Set round mode (rounded to 8-bit per component) */
 	tvout_vip_set_rnd(tvout, TVO_VIP_DVO, TVO_VIP_RND_8BIT_ROUNDED);
@@ -345,13 +362,17 @@ static void tvout_hdmi_start(struct sti_tvout *tvout, bool main_path)
 
 	if (main_path) {
 		DRM_DEBUG_DRIVER("main vip for hdmi\n");
-		/* select the input sync for hdmi = VTG set 1 */
-		tvout_write(tvout, TVO_SYNC_MAIN_VTG_SET_1, TVO_HDMI_SYNC_SEL);
+		/* select the input sync for hdmi */
+		tvout_write(tvout,
+			    TVO_SYNC_MAIN_VTG_SET_REF | VTG_SYNC_ID_HDMI,
+			    TVO_HDMI_SYNC_SEL);
 		tvo_in_vid_format = TVO_MAIN_IN_VID_FORMAT;
 	} else {
 		DRM_DEBUG_DRIVER("aux vip for hdmi\n");
-		/* select the input sync for hdmi = VTG set 1 */
-		tvout_write(tvout, TVO_SYNC_AUX_VTG_SET_1, TVO_HDMI_SYNC_SEL);
+		/* select the input sync for hdmi */
+		tvout_write(tvout,
+			    TVO_SYNC_AUX_VTG_SET_REF | VTG_SYNC_ID_HDMI,
+			    TVO_HDMI_SYNC_SEL);
 		tvo_in_vid_format = TVO_AUX_IN_VID_FORMAT;
 	}
 
@@ -361,9 +382,8 @@ static void tvout_hdmi_start(struct sti_tvout *tvout, bool main_path)
 				  TVO_VIP_REORDER_Y_G_SEL,
 				  TVO_VIP_REORDER_CB_B_SEL);
 
-	/* set clipping mode (Limited range RGB/Y) */
-	tvout_vip_set_clip_mode(tvout, TVO_VIP_HDMI,
-			TVO_VIP_CLIP_LIMITED_RANGE_RGB_Y);
+	/* set clipping mode */
+	tvout_vip_set_clip_mode(tvout, TVO_VIP_HDMI, TVO_VIP_CLIP_DISABLED);
 
 	/* set round mode (rounded to 8-bit per component) */
 	tvout_vip_set_rnd(tvout, TVO_VIP_HDMI, TVO_VIP_RND_8BIT_ROUNDED);
@@ -397,13 +417,19 @@ static void tvout_hda_start(struct sti_tvout *tvout, bool main_path)
 	dev_dbg(tvout->dev, "%s\n", __func__);
 
 	if (main_path) {
-		val = TVO_SYNC_MAIN_VTG_SET_2 << TVO_SYNC_HD_DCS_SHIFT;
-		val |= TVO_SYNC_MAIN_VTG_SET_3;
+		DRM_DEBUG_DRIVER("main vip for HDF\n");
+		/* Select the input sync for HD analog and HD DCS */
+		val  = TVO_SYNC_MAIN_VTG_SET_REF | VTG_SYNC_ID_HDDCS;
+		val  = val << TVO_SYNC_HD_DCS_SHIFT;
+		val |= TVO_SYNC_MAIN_VTG_SET_REF | VTG_SYNC_ID_HDF;
 		tvout_write(tvout, val, TVO_HD_SYNC_SEL);
 		tvo_in_vid_format = TVO_MAIN_IN_VID_FORMAT;
 	} else {
-		val = TVO_SYNC_AUX_VTG_SET_2 << TVO_SYNC_HD_DCS_SHIFT;
-		val |= TVO_SYNC_AUX_VTG_SET_3;
+		DRM_DEBUG_DRIVER("aux vip for HDF\n");
+		/* Select the input sync for HD analog and HD DCS */
+		val  = TVO_SYNC_AUX_VTG_SET_REF | VTG_SYNC_ID_HDDCS;
+		val  = val << TVO_SYNC_HD_DCS_SHIFT;
+		val |= TVO_SYNC_AUX_VTG_SET_REF | VTG_SYNC_ID_HDF;
 		tvout_write(tvout, val, TVO_HD_SYNC_SEL);
 		tvo_in_vid_format = TVO_AUX_IN_VID_FORMAT;
 	}
@@ -414,8 +440,8 @@ static void tvout_hda_start(struct sti_tvout *tvout, bool main_path)
 				  TVO_VIP_REORDER_Y_G_SEL,
 				  TVO_VIP_REORDER_CB_B_SEL);
 
-	/* set clipping mode (EAV/SAV clipping) */
-	tvout_vip_set_clip_mode(tvout, TVO_VIP_HDF, TVO_VIP_CLIP_EAV_SAV);
+	/* set clipping mode */
+	tvout_vip_set_clip_mode(tvout, TVO_VIP_HDF, TVO_VIP_CLIP_DISABLED);
 
 	/* set round mode (rounded to 10-bit per component) */
 	tvout_vip_set_rnd(tvout, TVO_VIP_HDF, TVO_VIP_RND_10BIT_ROUNDED);
@@ -436,24 +462,164 @@ static void tvout_hda_start(struct sti_tvout *tvout, bool main_path)
 	tvout_write(tvout, 0, TVO_HD_DAC_CFG_OFF);
 }
 
-static void sti_tvout_encoder_dpms(struct drm_encoder *encoder, int mode)
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(tvout->regs + reg))
+
+static void tvout_dbg_vip(struct seq_file *s, int val)
 {
+	int r, g, b, tmp, mask;
+	char *const reorder[] = {"Y_G", "Cb_B", "Cr_R"};
+	char *const clipping[] = {"No", "EAV/SAV", "Limited range RGB/Y",
+				  "Limited range Cb/Cr", "decided by register"};
+	char *const round[] = {"8-bit", "10-bit", "12-bit"};
+	char *const input_sel[] = {"Main (color matrix enabled)",
+				   "Main (color matrix by-passed)",
+				   "", "", "", "", "", "",
+				   "Aux (color matrix enabled)",
+				   "Aux (color matrix by-passed)",
+				   "", "", "", "", "", "Force value"};
+
+	seq_puts(s, "\t");
+	mask = TVO_VIP_REORDER_MASK << TVO_VIP_REORDER_R_SHIFT;
+	r = (val & mask) >> TVO_VIP_REORDER_R_SHIFT;
+	mask = TVO_VIP_REORDER_MASK << TVO_VIP_REORDER_G_SHIFT;
+	g = (val & mask) >> TVO_VIP_REORDER_G_SHIFT;
+	mask = TVO_VIP_REORDER_MASK << TVO_VIP_REORDER_B_SHIFT;
+	b = (val & mask) >> TVO_VIP_REORDER_B_SHIFT;
+	seq_printf(s, "%-24s %s->%s %s->%s %s->%s\n", "Reorder:",
+		   reorder[r], reorder[TVO_VIP_REORDER_CR_R_SEL],
+		   reorder[g], reorder[TVO_VIP_REORDER_Y_G_SEL],
+		   reorder[b], reorder[TVO_VIP_REORDER_CB_B_SEL]);
+	seq_puts(s, "\t\t\t\t\t");
+	mask = TVO_VIP_CLIP_MASK << TVO_VIP_CLIP_SHIFT;
+	tmp = (val & mask) >> TVO_VIP_CLIP_SHIFT;
+	seq_printf(s, "%-24s %s\n", "Clipping:", clipping[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	mask = TVO_VIP_RND_MASK << TVO_VIP_RND_SHIFT;
+	tmp = (val & mask) >> TVO_VIP_RND_SHIFT;
+	seq_printf(s, "%-24s input data rounded to %s per component\n",
+		   "Round:", round[tmp]);
+	seq_puts(s, "\t\t\t\t\t");
+	tmp = (val & TVO_VIP_SEL_INPUT_MASK);
+	seq_printf(s, "%-24s %s", "Input selection:", input_sel[tmp]);
 }
 
-static bool sti_tvout_encoder_mode_fixup(struct drm_encoder *encoder,
-				       const struct drm_display_mode *mode,
-				       struct drm_display_mode *adjusted_mode)
+static void tvout_dbg_hd_dac_cfg(struct seq_file *s, int val)
 {
-	return true;
+	seq_printf(s, "\t%-24s %s", "HD DAC:",
+		   val & 1 ? "disabled" : "enabled");
 }
 
-static void sti_tvout_encoder_mode_set(struct drm_encoder *encoder,
-				       struct drm_display_mode *mode,
-				       struct drm_display_mode *adjusted_mode)
+static int tvout_dbg_show(struct seq_file *s, void *data)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_tvout *tvout = (struct sti_tvout *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_crtc *crtc;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "TVOUT: (vaddr = 0x%p)", tvout->regs);
+
+	seq_puts(s, "\n\n  HDMI encoder: ");
+	crtc = tvout->hdmi->crtc;
+	if (crtc) {
+		seq_printf(s, "connected to %s path",
+			   sti_crtc_is_main(crtc) ? "main" : "aux");
+		DBGFS_DUMP(TVO_HDMI_SYNC_SEL);
+		DBGFS_DUMP(TVO_VIP_HDMI);
+		tvout_dbg_vip(s, readl(tvout->regs + TVO_VIP_HDMI));
+	} else {
+		seq_puts(s, "disabled");
+	}
+
+	seq_puts(s, "\n\n  DVO encoder: ");
+	crtc = tvout->dvo->crtc;
+	if (crtc) {
+		seq_printf(s, "connected to %s path",
+			   sti_crtc_is_main(crtc) ? "main" : "aux");
+		DBGFS_DUMP(TVO_DVO_SYNC_SEL);
+		DBGFS_DUMP(TVO_DVO_CONFIG);
+		DBGFS_DUMP(TVO_VIP_DVO);
+		tvout_dbg_vip(s, readl(tvout->regs + TVO_VIP_DVO));
+	} else {
+		seq_puts(s, "disabled");
+	}
+
+	seq_puts(s, "\n\n  HDA encoder: ");
+	crtc = tvout->hda->crtc;
+	if (crtc) {
+		seq_printf(s, "connected to %s path",
+			   sti_crtc_is_main(crtc) ? "main" : "aux");
+		DBGFS_DUMP(TVO_HD_SYNC_SEL);
+		DBGFS_DUMP(TVO_HD_DAC_CFG_OFF);
+		tvout_dbg_hd_dac_cfg(s,
+				     readl(tvout->regs + TVO_HD_DAC_CFG_OFF));
+		DBGFS_DUMP(TVO_VIP_HDF);
+		tvout_dbg_vip(s, readl(tvout->regs + TVO_VIP_HDF));
+	} else {
+		seq_puts(s, "disabled");
+	}
+
+	seq_puts(s, "\n\n  main path configuration");
+	DBGFS_DUMP(TVO_CSC_MAIN_M0);
+	DBGFS_DUMP(TVO_CSC_MAIN_M1);
+	DBGFS_DUMP(TVO_CSC_MAIN_M2);
+	DBGFS_DUMP(TVO_CSC_MAIN_M3);
+	DBGFS_DUMP(TVO_CSC_MAIN_M4);
+	DBGFS_DUMP(TVO_CSC_MAIN_M5);
+	DBGFS_DUMP(TVO_CSC_MAIN_M6);
+	DBGFS_DUMP(TVO_CSC_MAIN_M7);
+	DBGFS_DUMP(TVO_MAIN_IN_VID_FORMAT);
+
+	seq_puts(s, "\n\n  auxiliary path configuration");
+	DBGFS_DUMP(TVO_CSC_AUX_M0);
+	DBGFS_DUMP(TVO_CSC_AUX_M2);
+	DBGFS_DUMP(TVO_CSC_AUX_M3);
+	DBGFS_DUMP(TVO_CSC_AUX_M4);
+	DBGFS_DUMP(TVO_CSC_AUX_M5);
+	DBGFS_DUMP(TVO_CSC_AUX_M6);
+	DBGFS_DUMP(TVO_CSC_AUX_M7);
+	DBGFS_DUMP(TVO_AUX_IN_VID_FORMAT);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list tvout_debugfs_files[] = {
+	{ "tvout", tvout_dbg_show, 0, NULL },
+};
+
+static void tvout_debugfs_exit(struct sti_tvout *tvout, struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(tvout_debugfs_files,
+				 ARRAY_SIZE(tvout_debugfs_files),
+				 minor);
+}
+
+static int tvout_debugfs_init(struct sti_tvout *tvout, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(tvout_debugfs_files); i++)
+		tvout_debugfs_files[i].data = tvout;
+
+	return drm_debugfs_create_files(tvout_debugfs_files,
+					ARRAY_SIZE(tvout_debugfs_files),
+					minor->debugfs_root, minor);
+}
+
+static void sti_tvout_encoder_dpms(struct drm_encoder *encoder, int mode)
 {
 }
 
-static void sti_tvout_encoder_prepare(struct drm_encoder *encoder)
+static void sti_tvout_encoder_mode_set(struct drm_encoder *encoder,
+				       struct drm_display_mode *mode,
+				       struct drm_display_mode *adjusted_mode)
 {
 }
 
@@ -469,10 +635,12 @@ static const struct drm_encoder_funcs sti_tvout_encoder_funcs = {
 	.destroy = sti_tvout_encoder_destroy,
 };
 
-static void sti_dvo_encoder_commit(struct drm_encoder *encoder)
+static void sti_dvo_encoder_enable(struct drm_encoder *encoder)
 {
 	struct sti_tvout *tvout = to_sti_tvout(encoder);
 
+	tvout_preformatter_set_matrix(tvout, &encoder->crtc->mode);
+
 	tvout_dvo_start(tvout, sti_crtc_is_main(encoder->crtc));
 }
 
@@ -486,10 +654,8 @@ static void sti_dvo_encoder_disable(struct drm_encoder *encoder)
 
 static const struct drm_encoder_helper_funcs sti_dvo_encoder_helper_funcs = {
 	.dpms = sti_tvout_encoder_dpms,
-	.mode_fixup = sti_tvout_encoder_mode_fixup,
 	.mode_set = sti_tvout_encoder_mode_set,
-	.prepare = sti_tvout_encoder_prepare,
-	.commit = sti_dvo_encoder_commit,
+	.enable = sti_dvo_encoder_enable,
 	.disable = sti_dvo_encoder_disable,
 };
 
@@ -520,10 +686,12 @@ sti_tvout_create_dvo_encoder(struct drm_device *dev,
 	return drm_encoder;
 }
 
-static void sti_hda_encoder_commit(struct drm_encoder *encoder)
+static void sti_hda_encoder_enable(struct drm_encoder *encoder)
 {
 	struct sti_tvout *tvout = to_sti_tvout(encoder);
 
+	tvout_preformatter_set_matrix(tvout, &encoder->crtc->mode);
+
 	tvout_hda_start(tvout, sti_crtc_is_main(encoder->crtc));
 }
 
@@ -540,10 +708,8 @@ static void sti_hda_encoder_disable(struct drm_encoder *encoder)
 
 static const struct drm_encoder_helper_funcs sti_hda_encoder_helper_funcs = {
 	.dpms = sti_tvout_encoder_dpms,
-	.mode_fixup = sti_tvout_encoder_mode_fixup,
 	.mode_set = sti_tvout_encoder_mode_set,
-	.prepare = sti_tvout_encoder_prepare,
-	.commit = sti_hda_encoder_commit,
+	.commit = sti_hda_encoder_enable,
 	.disable = sti_hda_encoder_disable,
 };
 
@@ -572,10 +738,12 @@ static struct drm_encoder *sti_tvout_create_hda_encoder(struct drm_device *dev,
 	return drm_encoder;
 }
 
-static void sti_hdmi_encoder_commit(struct drm_encoder *encoder)
+static void sti_hdmi_encoder_enable(struct drm_encoder *encoder)
 {
 	struct sti_tvout *tvout = to_sti_tvout(encoder);
 
+	tvout_preformatter_set_matrix(tvout, &encoder->crtc->mode);
+
 	tvout_hdmi_start(tvout, sti_crtc_is_main(encoder->crtc));
 }
 
@@ -589,10 +757,8 @@ static void sti_hdmi_encoder_disable(struct drm_encoder *encoder)
 
 static const struct drm_encoder_helper_funcs sti_hdmi_encoder_helper_funcs = {
 	.dpms = sti_tvout_encoder_dpms,
-	.mode_fixup = sti_tvout_encoder_mode_fixup,
 	.mode_set = sti_tvout_encoder_mode_set,
-	.prepare = sti_tvout_encoder_prepare,
-	.commit = sti_hdmi_encoder_commit,
+	.commit = sti_hdmi_encoder_enable,
 	.disable = sti_hdmi_encoder_disable,
 };
 
@@ -638,26 +804,24 @@ static void sti_tvout_destroy_encoders(struct sti_tvout *tvout)
 	if (tvout->hda)
 		drm_encoder_cleanup(tvout->hda);
 	tvout->hda = NULL;
+
+	if (tvout->dvo)
+		drm_encoder_cleanup(tvout->dvo);
+	tvout->dvo = NULL;
 }
 
 static int sti_tvout_bind(struct device *dev, struct device *master, void *data)
 {
 	struct sti_tvout *tvout = dev_get_drvdata(dev);
 	struct drm_device *drm_dev = data;
-	unsigned int i;
 
 	tvout->drm_dev = drm_dev;
 
-	/* set preformatter matrix */
-	for (i = 0; i < 8; i++) {
-		tvout_write(tvout, rgb_to_ycbcr_601[i],
-			TVO_CSC_MAIN_M0 + (i * 4));
-		tvout_write(tvout, rgb_to_ycbcr_601[i],
-			TVO_CSC_AUX_M0 + (i * 4));
-	}
-
 	sti_tvout_create_encoders(drm_dev, tvout);
 
+	if (tvout_debugfs_init(tvout, drm_dev->primary))
+		DRM_ERROR("TVOUT debugfs setup failed\n");
+
 	return 0;
 }
 
@@ -665,8 +829,11 @@ static void sti_tvout_unbind(struct device *dev, struct device *master,
 	void *data)
 {
 	struct sti_tvout *tvout = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
 
 	sti_tvout_destroy_encoders(tvout);
+
+	tvout_debugfs_exit(tvout, drm_dev->primary);
 }
 
 static const struct component_ops sti_tvout_ops = {
diff --git a/drivers/gpu/drm/sti/sti_vid.c b/drivers/gpu/drm/sti/sti_vid.c
index a8254cc362a1..5a2c5dc3687b 100644
--- a/drivers/gpu/drm/sti/sti_vid.c
+++ b/drivers/gpu/drm/sti/sti_vid.c
@@ -42,6 +42,104 @@
 #define VID_MPR1_BT709          0x0AC50000
 #define VID_MPR2_BT709          0x07150545
 #define VID_MPR3_BT709          0x00000AE8
+/* YCbCr to RGB BT709:
+ * R = Y+1.3711Cr
+ * G = Y-0.6992Cr-0.3359Cb
+ * B = Y+1.7344Cb
+ */
+#define VID_MPR0_BT601          0x0A800000
+#define VID_MPR1_BT601          0x0AAF0000
+#define VID_MPR2_BT601          0x094E0754
+#define VID_MPR3_BT601          0x00000ADD
+
+#define VID_MIN_HD_HEIGHT       720
+
+#define DBGFS_DUMP(reg) seq_printf(s, "\n  %-25s 0x%08X", #reg, \
+				   readl(vid->regs + reg))
+
+static void vid_dbg_ctl(struct seq_file *s, int val)
+{
+	val = val >> 30;
+	seq_puts(s, "\t");
+
+	if (!(val & 1))
+		seq_puts(s, "NOT ");
+	seq_puts(s, "ignored on main mixer - ");
+
+	if (!(val & 2))
+		seq_puts(s, "NOT ");
+	seq_puts(s, "ignored on aux mixer");
+}
+
+static void vid_dbg_vpo(struct seq_file *s, int val)
+{
+	seq_printf(s, "\txdo:%4d\tydo:%4d", val & 0x0FFF, (val >> 16) & 0x0FFF);
+}
+
+static void vid_dbg_vps(struct seq_file *s, int val)
+{
+	seq_printf(s, "\txds:%4d\tyds:%4d", val & 0x0FFF, (val >> 16) & 0x0FFF);
+}
+
+static void vid_dbg_mst(struct seq_file *s, int val)
+{
+	if (val & 1)
+		seq_puts(s, "\tBUFFER UNDERFLOW!");
+}
+
+static int vid_dbg_show(struct seq_file *s, void *arg)
+{
+	struct drm_info_node *node = s->private;
+	struct sti_vid *vid = (struct sti_vid *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(s, "VID: (vaddr= 0x%p)", vid->regs);
+
+	DBGFS_DUMP(VID_CTL);
+	vid_dbg_ctl(s, readl(vid->regs + VID_CTL));
+	DBGFS_DUMP(VID_ALP);
+	DBGFS_DUMP(VID_CLF);
+	DBGFS_DUMP(VID_VPO);
+	vid_dbg_vpo(s, readl(vid->regs + VID_VPO));
+	DBGFS_DUMP(VID_VPS);
+	vid_dbg_vps(s, readl(vid->regs + VID_VPS));
+	DBGFS_DUMP(VID_KEY1);
+	DBGFS_DUMP(VID_KEY2);
+	DBGFS_DUMP(VID_MPR0);
+	DBGFS_DUMP(VID_MPR1);
+	DBGFS_DUMP(VID_MPR2);
+	DBGFS_DUMP(VID_MPR3);
+	DBGFS_DUMP(VID_MST);
+	vid_dbg_mst(s, readl(vid->regs + VID_MST));
+	DBGFS_DUMP(VID_BC);
+	DBGFS_DUMP(VID_TINT);
+	DBGFS_DUMP(VID_CSAT);
+	seq_puts(s, "\n");
+
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
+
+static struct drm_info_list vid_debugfs_files[] = {
+	{ "vid", vid_dbg_show, 0, NULL },
+};
+
+static int vid_debugfs_init(struct sti_vid *vid, struct drm_minor *minor)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(vid_debugfs_files); i++)
+		vid_debugfs_files[i].data = vid;
+
+	return drm_debugfs_create_files(vid_debugfs_files,
+					ARRAY_SIZE(vid_debugfs_files),
+					minor->debugfs_root, minor);
+}
 
 void sti_vid_commit(struct sti_vid *vid,
 		    struct drm_plane_state *state)
@@ -52,6 +150,7 @@ void sti_vid_commit(struct sti_vid *vid,
 	int dst_y = state->crtc_y;
 	int dst_w = clamp_val(state->crtc_w, 0, mode->crtc_hdisplay - dst_x);
 	int dst_h = clamp_val(state->crtc_h, 0, mode->crtc_vdisplay - dst_y);
+	int src_h = state->src_h >> 16;
 	u32 val, ydo, xdo, yds, xds;
 
 	/* Input / output size
@@ -71,6 +170,19 @@ void sti_vid_commit(struct sti_vid *vid,
 
 	writel((ydo << 16) | xdo, vid->regs + VID_VPO);
 	writel((yds << 16) | xds, vid->regs + VID_VPS);
+
+	/* Color conversion parameters */
+	if (src_h >= VID_MIN_HD_HEIGHT) {
+		writel(VID_MPR0_BT709, vid->regs + VID_MPR0);
+		writel(VID_MPR1_BT709, vid->regs + VID_MPR1);
+		writel(VID_MPR2_BT709, vid->regs + VID_MPR2);
+		writel(VID_MPR3_BT709, vid->regs + VID_MPR3);
+	} else {
+		writel(VID_MPR0_BT601, vid->regs + VID_MPR0);
+		writel(VID_MPR1_BT601, vid->regs + VID_MPR1);
+		writel(VID_MPR2_BT601, vid->regs + VID_MPR2);
+		writel(VID_MPR3_BT601, vid->regs + VID_MPR3);
+	}
 }
 
 void sti_vid_disable(struct sti_vid *vid)
@@ -91,20 +203,14 @@ static void sti_vid_init(struct sti_vid *vid)
 	/* Opaque */
 	writel(VID_ALP_OPAQUE, vid->regs + VID_ALP);
 
-	/* Color conversion parameters */
-	writel(VID_MPR0_BT709, vid->regs + VID_MPR0);
-	writel(VID_MPR1_BT709, vid->regs + VID_MPR1);
-	writel(VID_MPR2_BT709, vid->regs + VID_MPR2);
-	writel(VID_MPR3_BT709, vid->regs + VID_MPR3);
-
 	/* Brightness, contrast, tint, saturation */
 	writel(VID_BC_DFLT, vid->regs + VID_BC);
 	writel(VID_TINT_DFLT, vid->regs + VID_TINT);
 	writel(VID_CSAT_DFLT, vid->regs + VID_CSAT);
 }
 
-struct sti_vid *sti_vid_create(struct device *dev, int id,
-			       void __iomem *baseaddr)
+struct sti_vid *sti_vid_create(struct device *dev, struct drm_device *drm_dev,
+			       int id, void __iomem *baseaddr)
 {
 	struct sti_vid *vid;
 
@@ -120,5 +226,8 @@ struct sti_vid *sti_vid_create(struct device *dev, int id,
 
 	sti_vid_init(vid);
 
+	if (vid_debugfs_init(vid, drm_dev->primary))
+		DRM_ERROR("VID debugfs setup failed\n");
+
 	return vid;
 }
diff --git a/drivers/gpu/drm/sti/sti_vid.h b/drivers/gpu/drm/sti/sti_vid.h
index 5dea4791f1d6..6c842344f3d8 100644
--- a/drivers/gpu/drm/sti/sti_vid.h
+++ b/drivers/gpu/drm/sti/sti_vid.h
@@ -23,7 +23,7 @@ struct sti_vid {
 void sti_vid_commit(struct sti_vid *vid,
 		    struct drm_plane_state *state);
 void sti_vid_disable(struct sti_vid *vid);
-struct sti_vid *sti_vid_create(struct device *dev, int id,
-			       void __iomem *baseaddr);
+struct sti_vid *sti_vid_create(struct device *dev, struct drm_device *drm_dev,
+			       int id, void __iomem *baseaddr);
 
 #endif
diff --git a/drivers/gpu/drm/sti/sti_vtg.c b/drivers/gpu/drm/sti/sti_vtg.c
index d56630c60039..32c7986b63ab 100644
--- a/drivers/gpu/drm/sti/sti_vtg.c
+++ b/drivers/gpu/drm/sti/sti_vtg.c
@@ -15,8 +15,8 @@
 
 #include "sti_vtg.h"
 
-#define VTG_TYPE_MASTER         0
-#define VTG_TYPE_SLAVE_BY_EXT0  1
+#define VTG_MODE_MASTER         0
+#define VTG_MODE_SLAVE_BY_EXT0  1
 
 /* registers offset */
 #define VTG_MODE            0x0000
@@ -64,6 +64,9 @@
 /* Delay introduced by the HDMI in nb of pixel */
 #define HDMI_DELAY          (5)
 
+/* Delay introduced by the DVO in nb of pixel */
+#define DVO_DELAY           (2)
+
 /* delay introduced by the Arbitrary Waveform Generator in nb of pixels */
 #define AWG_DELAY_HD        (-9)
 #define AWG_DELAY_ED        (-8)
@@ -71,13 +74,61 @@
 
 LIST_HEAD(vtg_lookup);
 
+/*
+ * STI VTG register offset structure
+ *
+ *@h_hd:     stores the VTG_H_HD_x     register offset
+ *@top_v_vd: stores the VTG_TOP_V_VD_x register offset
+ *@bot_v_vd: stores the VTG_BOT_V_VD_x register offset
+ *@top_v_hd: stores the VTG_TOP_V_HD_x register offset
+ *@bot_v_hd: stores the VTG_BOT_V_HD_x register offset
+ */
+struct sti_vtg_regs_offs {
+	u32 h_hd;
+	u32 top_v_vd;
+	u32 bot_v_vd;
+	u32 top_v_hd;
+	u32 bot_v_hd;
+};
+
+#define VTG_MAX_SYNC_OUTPUT 4
+static const struct sti_vtg_regs_offs vtg_regs_offs[VTG_MAX_SYNC_OUTPUT] = {
+	{ VTG_H_HD_1,
+	  VTG_TOP_V_VD_1, VTG_BOT_V_VD_1, VTG_TOP_V_HD_1, VTG_BOT_V_HD_1 },
+	{ VTG_H_HD_2,
+	  VTG_TOP_V_VD_2, VTG_BOT_V_VD_2, VTG_TOP_V_HD_2, VTG_BOT_V_HD_2 },
+	{ VTG_H_HD_3,
+	  VTG_TOP_V_VD_3, VTG_BOT_V_VD_3, VTG_TOP_V_HD_3, VTG_BOT_V_HD_3 },
+	{ VTG_H_HD_4,
+	  VTG_TOP_V_VD_4, VTG_BOT_V_VD_4, VTG_TOP_V_HD_4, VTG_BOT_V_HD_4 }
+};
+
+/*
+ * STI VTG synchronisation parameters structure
+ *
+ *@hsync: sample number falling and rising edge
+ *@vsync_line_top: vertical top field line number falling and rising edge
+ *@vsync_line_bot: vertical bottom field line number falling and rising edge
+ *@vsync_off_top: vertical top field sample number rising and falling edge
+ *@vsync_off_bot: vertical bottom field sample number rising and falling edge
+ */
+struct sti_vtg_sync_params {
+	u32 hsync;
+	u32 vsync_line_top;
+	u32 vsync_line_bot;
+	u32 vsync_off_top;
+	u32 vsync_off_bot;
+};
+
 /**
  * STI VTG structure
  *
  * @dev: pointer to device driver
- * @data: data associated to the device
+ * @np: device node
+ * @regs: register mapping
+ * @sync_params: synchronisation parameters used to generate timings
  * @irq: VTG irq
- * @type: VTG type (main or aux)
+ * @irq_status: store the IRQ status value
  * @notifier_list: notifier callback
  * @crtc: the CRTC for vblank event
  * @slave: slave vtg
@@ -87,6 +138,7 @@ struct sti_vtg {
 	struct device *dev;
 	struct device_node *np;
 	void __iomem *regs;
+	struct sti_vtg_sync_params sync_params[VTG_MAX_SYNC_OUTPUT];
 	int irq;
 	u32 irq_status;
 	struct raw_notifier_head notifier_list;
@@ -146,13 +198,69 @@ static void vtg_set_output_window(void __iomem *regs,
 	writel(video_bottom_field_stop, regs + VTG_VID_BFS);
 }
 
+static void vtg_set_hsync_vsync_pos(struct sti_vtg_sync_params *sync,
+				    int delay,
+				    const struct drm_display_mode *mode)
+{
+	long clocksperline, start, stop;
+	u32 risesync_top, fallsync_top;
+	u32 risesync_offs_top, fallsync_offs_top;
+
+	clocksperline = mode->htotal;
+
+	/* Get the hsync position */
+	start = 0;
+	stop = mode->hsync_end - mode->hsync_start;
+
+	start += delay;
+	stop  += delay;
+
+	if (start < 0)
+		start += clocksperline;
+	else if (start >= clocksperline)
+		start -= clocksperline;
+
+	if (stop < 0)
+		stop += clocksperline;
+	else if (stop >= clocksperline)
+		stop -= clocksperline;
+
+	sync->hsync = (stop << 16) | start;
+
+	/* Get the vsync position */
+	if (delay >= 0) {
+		risesync_top = 1;
+		fallsync_top = risesync_top;
+		fallsync_top += mode->vsync_end - mode->vsync_start;
+
+		fallsync_offs_top = (u32)delay;
+		risesync_offs_top = (u32)delay;
+	} else {
+		risesync_top = mode->vtotal;
+		fallsync_top = mode->vsync_end - mode->vsync_start;
+
+		fallsync_offs_top = clocksperline + delay;
+		risesync_offs_top = clocksperline + delay;
+	}
+
+	sync->vsync_line_top = (fallsync_top << 16) | risesync_top;
+	sync->vsync_off_top = (fallsync_offs_top << 16) | risesync_offs_top;
+
+	/* Only progressive supported for now */
+	sync->vsync_line_bot = sync->vsync_line_top;
+	sync->vsync_off_bot = sync->vsync_off_top;
+}
+
 static void vtg_set_mode(struct sti_vtg *vtg,
-			 int type, const struct drm_display_mode *mode)
+			 int type,
+			 struct sti_vtg_sync_params *sync,
+			 const struct drm_display_mode *mode)
 {
-	u32 tmp;
+	unsigned int i;
 
 	if (vtg->slave)
-		vtg_set_mode(vtg->slave, VTG_TYPE_SLAVE_BY_EXT0, mode);
+		vtg_set_mode(vtg->slave, VTG_MODE_SLAVE_BY_EXT0,
+			     vtg->sync_params, mode);
 
 	/* Set the number of clock cycles per line */
 	writel(mode->htotal, vtg->regs + VTG_CLKLN);
@@ -163,57 +271,31 @@ static void vtg_set_mode(struct sti_vtg *vtg,
 	/* Program output window */
 	vtg_set_output_window(vtg->regs, mode);
 
-	/* prepare VTG set 1 for HDMI */
-	tmp = (mode->hsync_end - mode->hsync_start + HDMI_DELAY) << 16;
-	tmp |= HDMI_DELAY;
-	writel(tmp, vtg->regs + VTG_H_HD_1);
-
-	tmp = (mode->vsync_end - mode->vsync_start + 1) << 16;
-	tmp |= 1;
-	writel(tmp, vtg->regs + VTG_TOP_V_VD_1);
-	writel(tmp, vtg->regs + VTG_BOT_V_VD_1);
-
-	tmp = HDMI_DELAY << 16;
-	tmp |= HDMI_DELAY;
-	writel(tmp, vtg->regs + VTG_TOP_V_HD_1);
-	writel(tmp, vtg->regs + VTG_BOT_V_HD_1);
-
-	/* prepare VTG set 2 for for HD DCS */
-	tmp = (mode->hsync_end - mode->hsync_start) << 16;
-	writel(tmp, vtg->regs + VTG_H_HD_2);
-
-	tmp = (mode->vsync_end - mode->vsync_start + 1) << 16;
-	tmp |= 1;
-	writel(tmp, vtg->regs + VTG_TOP_V_VD_2);
-	writel(tmp, vtg->regs + VTG_BOT_V_VD_2);
-	writel(0, vtg->regs + VTG_TOP_V_HD_2);
-	writel(0, vtg->regs + VTG_BOT_V_HD_2);
-
-	/* prepare VTG set 3 for HD Analog in HD mode */
-	tmp = (mode->hsync_end - mode->hsync_start + AWG_DELAY_HD) << 16;
-	tmp |= mode->htotal + AWG_DELAY_HD;
-	writel(tmp, vtg->regs + VTG_H_HD_3);
-
-	tmp = (mode->vsync_end - mode->vsync_start) << 16;
-	tmp |= mode->vtotal;
-	writel(tmp, vtg->regs + VTG_TOP_V_VD_3);
-	writel(tmp, vtg->regs + VTG_BOT_V_VD_3);
-
-	tmp = (mode->htotal + AWG_DELAY_HD) << 16;
-	tmp |= mode->htotal + AWG_DELAY_HD;
-	writel(tmp, vtg->regs + VTG_TOP_V_HD_3);
-	writel(tmp, vtg->regs + VTG_BOT_V_HD_3);
-
-	/* Prepare VTG set 4 for DVO */
-	tmp = (mode->hsync_end - mode->hsync_start) << 16;
-	writel(tmp, vtg->regs + VTG_H_HD_4);
-
-	tmp = (mode->vsync_end - mode->vsync_start + 1) << 16;
-	tmp |= 1;
-	writel(tmp, vtg->regs + VTG_TOP_V_VD_4);
-	writel(tmp, vtg->regs + VTG_BOT_V_VD_4);
-	writel(0, vtg->regs + VTG_TOP_V_HD_4);
-	writel(0, vtg->regs + VTG_BOT_V_HD_4);
+	/* Set hsync and vsync position for HDMI */
+	vtg_set_hsync_vsync_pos(&sync[VTG_SYNC_ID_HDMI - 1], HDMI_DELAY, mode);
+
+	/* Set hsync and vsync position for HD DCS */
+	vtg_set_hsync_vsync_pos(&sync[VTG_SYNC_ID_HDDCS - 1], 0, mode);
+
+	/* Set hsync and vsync position for HDF */
+	vtg_set_hsync_vsync_pos(&sync[VTG_SYNC_ID_HDF - 1], AWG_DELAY_HD, mode);
+
+	/* Set hsync and vsync position for DVO */
+	vtg_set_hsync_vsync_pos(&sync[VTG_SYNC_ID_DVO - 1], DVO_DELAY, mode);
+
+	/* Progam the syncs outputs */
+	for (i = 0; i < VTG_MAX_SYNC_OUTPUT ; i++) {
+		writel(sync[i].hsync,
+		       vtg->regs + vtg_regs_offs[i].h_hd);
+		writel(sync[i].vsync_line_top,
+		       vtg->regs + vtg_regs_offs[i].top_v_vd);
+		writel(sync[i].vsync_line_bot,
+		       vtg->regs + vtg_regs_offs[i].bot_v_vd);
+		writel(sync[i].vsync_off_top,
+		       vtg->regs + vtg_regs_offs[i].top_v_hd);
+		writel(sync[i].vsync_off_bot,
+		       vtg->regs + vtg_regs_offs[i].bot_v_hd);
+	}
 
 	/* mode */
 	writel(type, vtg->regs + VTG_MODE);
@@ -231,7 +313,7 @@ void sti_vtg_set_config(struct sti_vtg *vtg,
 		const struct drm_display_mode *mode)
 {
 	/* write configuration */
-	vtg_set_mode(vtg, VTG_TYPE_MASTER, mode);
+	vtg_set_mode(vtg, VTG_MODE_MASTER, vtg->sync_params, mode);
 
 	vtg_reset(vtg);
 
diff --git a/drivers/gpu/drm/sti/sti_vtg.h b/drivers/gpu/drm/sti/sti_vtg.h
index cd2439f89d05..f1dcdf9c2342 100644
--- a/drivers/gpu/drm/sti/sti_vtg.h
+++ b/drivers/gpu/drm/sti/sti_vtg.h
@@ -10,6 +10,11 @@
 #define VTG_TOP_FIELD_EVENT     1
 #define VTG_BOTTOM_FIELD_EVENT  2
 
+#define VTG_SYNC_ID_HDMI        1
+#define VTG_SYNC_ID_HDDCS       2
+#define VTG_SYNC_ID_HDF         3
+#define VTG_SYNC_ID_DVO         4
+
 struct sti_vtg;
 struct drm_display_mode;
 struct notifier_block;
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index dde6f208c347..fb2b4b0271a2 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -988,23 +988,6 @@ static void tegra_dc_finish_page_flip(struct tegra_dc *dc)
 	spin_unlock_irqrestore(&drm->event_lock, flags);
 }
 
-void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file)
-{
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct drm_device *drm = crtc->dev;
-	unsigned long flags;
-
-	spin_lock_irqsave(&drm->event_lock, flags);
-
-	if (dc->event && dc->event->base.file_priv == file) {
-		dc->event->base.destroy(&dc->event->base);
-		drm_crtc_vblank_put(crtc);
-		dc->event = NULL;
-	}
-
-	spin_unlock_irqrestore(&drm->event_lock, flags);
-}
-
 static void tegra_dc_destroy(struct drm_crtc *crtc)
 {
 	drm_crtc_cleanup(crtc);
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index c5c856a0879d..8e6b18caa706 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -858,10 +858,6 @@ static void tegra_drm_preclose(struct drm_device *drm, struct drm_file *file)
 {
 	struct tegra_drm_file *fpriv = file->driver_priv;
 	struct tegra_drm_context *context, *tmp;
-	struct drm_crtc *crtc;
-
-	list_for_each_entry(crtc, &drm->mode_config.crtc_list, head)
-		tegra_dc_cancel_page_flip(crtc, file);
 
 	list_for_each_entry_safe(context, tmp, &fpriv->contexts, list)
 		tegra_drm_context_free(context);
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index c088f2f67eda..8a10f5b7d9dc 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -195,7 +195,6 @@ struct tegra_dc_window {
 u32 tegra_dc_get_vblank_counter(struct tegra_dc *dc);
 void tegra_dc_enable_vblank(struct tegra_dc *dc);
 void tegra_dc_disable_vblank(struct tegra_dc *dc);
-void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file);
 void tegra_dc_commit(struct tegra_dc *dc);
 int tegra_dc_state_setup_clock(struct tegra_dc *dc,
 			       struct drm_crtc_state *crtc_state,
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index 7d07733bdc86..051e5e1b7ad6 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -21,25 +21,31 @@
 #include "tilcdc_drv.h"
 #include "tilcdc_regs.h"
 
+#define TILCDC_VBLANK_SAFETY_THRESHOLD_US 1000
+
 struct tilcdc_crtc {
 	struct drm_crtc base;
 
 	const struct tilcdc_panel_info *info;
-	uint32_t dirty;
-	dma_addr_t start, end;
 	struct drm_pending_vblank_event *event;
 	int dpms;
 	wait_queue_head_t frame_done_wq;
 	bool frame_done;
+	spinlock_t irq_lock;
 
-	/* fb currently set to scanout 0/1: */
-	struct drm_framebuffer *scanout[2];
+	ktime_t last_vblank;
+
+	struct drm_framebuffer *curr_fb;
+	struct drm_framebuffer *next_fb;
 
 	/* for deferred fb unref's: */
 	struct drm_flip_work unref_work;
 
 	/* Only set if an external encoder is connected */
 	bool simulate_vesa_sync;
+
+	int sync_lost_count;
+	bool frame_intact;
 };
 #define to_tilcdc_crtc(x) container_of(x, struct tilcdc_crtc, base)
 
@@ -54,79 +60,53 @@ static void unref_worker(struct drm_flip_work *work, void *val)
 	mutex_unlock(&dev->mode_config.mutex);
 }
 
-static void set_scanout(struct drm_crtc *crtc, int n)
-{
-	static const uint32_t base_reg[] = {
-			LCDC_DMA_FB_BASE_ADDR_0_REG,
-			LCDC_DMA_FB_BASE_ADDR_1_REG,
-	};
-	static const uint32_t ceil_reg[] = {
-			LCDC_DMA_FB_CEILING_ADDR_0_REG,
-			LCDC_DMA_FB_CEILING_ADDR_1_REG,
-	};
-	static const uint32_t stat[] = {
-			LCDC_END_OF_FRAME0, LCDC_END_OF_FRAME1,
-	};
-	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
-	struct drm_device *dev = crtc->dev;
-	struct tilcdc_drm_private *priv = dev->dev_private;
-
-	pm_runtime_get_sync(dev->dev);
-	tilcdc_write(dev, base_reg[n], tilcdc_crtc->start);
-	tilcdc_write(dev, ceil_reg[n], tilcdc_crtc->end);
-	if (tilcdc_crtc->scanout[n]) {
-		drm_flip_work_queue(&tilcdc_crtc->unref_work, tilcdc_crtc->scanout[n]);
-		drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
-	}
-	tilcdc_crtc->scanout[n] = crtc->primary->fb;
-	drm_framebuffer_reference(tilcdc_crtc->scanout[n]);
-	tilcdc_crtc->dirty &= ~stat[n];
-	pm_runtime_put_sync(dev->dev);
-}
-
-static void update_scanout(struct drm_crtc *crtc)
+static void set_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb)
 {
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
-	struct drm_framebuffer *fb = crtc->primary->fb;
 	struct drm_gem_cma_object *gem;
 	unsigned int depth, bpp;
+	dma_addr_t start, end;
 
 	drm_fb_get_bpp_depth(fb->pixel_format, &depth, &bpp);
 	gem = drm_fb_cma_get_gem_obj(fb, 0);
 
-	tilcdc_crtc->start = gem->paddr + fb->offsets[0] +
-			(crtc->y * fb->pitches[0]) + (crtc->x * bpp/8);
+	start = gem->paddr + fb->offsets[0] +
+		crtc->y * fb->pitches[0] +
+		crtc->x * bpp / 8;
 
-	tilcdc_crtc->end = tilcdc_crtc->start +
-			(crtc->mode.vdisplay * fb->pitches[0]);
+	end = start + (crtc->mode.vdisplay * fb->pitches[0]);
 
-	if (tilcdc_crtc->dpms == DRM_MODE_DPMS_ON) {
-		/* already enabled, so just mark the frames that need
-		 * updating and they will be updated on vblank:
-		 */
-		tilcdc_crtc->dirty |= LCDC_END_OF_FRAME0 | LCDC_END_OF_FRAME1;
-		drm_vblank_get(dev, 0);
-	} else {
-		/* not enabled yet, so update registers immediately: */
-		set_scanout(crtc, 0);
-		set_scanout(crtc, 1);
-	}
+	tilcdc_write(dev, LCDC_DMA_FB_BASE_ADDR_0_REG, start);
+	tilcdc_write(dev, LCDC_DMA_FB_CEILING_ADDR_0_REG, end);
+
+	if (tilcdc_crtc->curr_fb)
+		drm_flip_work_queue(&tilcdc_crtc->unref_work,
+			tilcdc_crtc->curr_fb);
+
+	tilcdc_crtc->curr_fb = fb;
 }
 
-static void start(struct drm_crtc *crtc)
+static void reset(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct tilcdc_drm_private *priv = dev->dev_private;
 
-	if (priv->rev == 2) {
-		tilcdc_set(dev, LCDC_CLK_RESET_REG, LCDC_CLK_MAIN_RESET);
-		msleep(1);
-		tilcdc_clear(dev, LCDC_CLK_RESET_REG, LCDC_CLK_MAIN_RESET);
-		msleep(1);
-	}
+	if (priv->rev != 2)
+		return;
 
-	tilcdc_set(dev, LCDC_DMA_CTRL_REG, LCDC_DUAL_FRAME_BUFFER_ENABLE);
+	tilcdc_set(dev, LCDC_CLK_RESET_REG, LCDC_CLK_MAIN_RESET);
+	usleep_range(250, 1000);
+	tilcdc_clear(dev, LCDC_CLK_RESET_REG, LCDC_CLK_MAIN_RESET);
+}
+
+static void start(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+
+	reset(crtc);
+
+	tilcdc_clear(dev, LCDC_DMA_CTRL_REG, LCDC_DUAL_FRAME_BUFFER_ENABLE);
 	tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_PALETTE_LOAD_MODE(DATA_ONLY));
 	tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE);
 }
@@ -138,17 +118,31 @@ static void stop(struct drm_crtc *crtc)
 	tilcdc_clear(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE);
 }
 
-static void tilcdc_crtc_dpms(struct drm_crtc *crtc, int mode);
 static void tilcdc_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 
 	tilcdc_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 
+	of_node_put(crtc->port);
 	drm_crtc_cleanup(crtc);
 	drm_flip_work_cleanup(&tilcdc_crtc->unref_work);
+}
+
+static int tilcdc_verify_fb(struct drm_crtc *crtc, struct drm_framebuffer *fb)
+{
+	struct drm_device *dev = crtc->dev;
+	unsigned int depth, bpp;
+
+	drm_fb_get_bpp_depth(fb->pixel_format, &depth, &bpp);
 
-	kfree(tilcdc_crtc);
+	if (fb->pitches[0] != crtc->mode.hdisplay * bpp / 8) {
+		dev_err(dev->dev,
+			"Invalid pitch: fb and crtc widths must be the same");
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 static int tilcdc_crtc_page_flip(struct drm_crtc *crtc,
@@ -158,20 +152,48 @@ static int tilcdc_crtc_page_flip(struct drm_crtc *crtc,
 {
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
+	int r;
+	unsigned long flags;
+	s64 tdiff;
+	ktime_t next_vblank;
+
+	r = tilcdc_verify_fb(crtc, fb);
+	if (r)
+		return r;
 
 	if (tilcdc_crtc->event) {
 		dev_err(dev->dev, "already pending page flip!\n");
 		return -EBUSY;
 	}
 
+	drm_framebuffer_reference(fb);
+
 	crtc->primary->fb = fb;
+
+	pm_runtime_get_sync(dev->dev);
+
+	spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+
+	next_vblank = ktime_add_us(tilcdc_crtc->last_vblank,
+		1000000 / crtc->hwmode.vrefresh);
+
+	tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get()));
+
+	if (tdiff >= TILCDC_VBLANK_SAFETY_THRESHOLD_US)
+		set_scanout(crtc, fb);
+	else
+		tilcdc_crtc->next_fb = fb;
+
 	tilcdc_crtc->event = event;
-	update_scanout(crtc);
+
+	spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
+
+	pm_runtime_put_sync(dev->dev);
 
 	return 0;
 }
 
-static void tilcdc_crtc_dpms(struct drm_crtc *crtc, int mode)
+void tilcdc_crtc_dpms(struct drm_crtc *crtc, int mode)
 {
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
@@ -186,10 +208,8 @@ static void tilcdc_crtc_dpms(struct drm_crtc *crtc, int mode)
 
 	tilcdc_crtc->dpms = mode;
 
-	pm_runtime_get_sync(dev->dev);
-
 	if (mode == DRM_MODE_DPMS_ON) {
-		pm_runtime_forbid(dev->dev);
+		pm_runtime_get_sync(dev->dev);
 		start(crtc);
 	} else {
 		tilcdc_crtc->frame_done = false;
@@ -207,10 +227,23 @@ static void tilcdc_crtc_dpms(struct drm_crtc *crtc, int mode)
 			if (ret == 0)
 				dev_err(dev->dev, "timeout waiting for framedone\n");
 		}
-		pm_runtime_allow(dev->dev);
-	}
 
-	pm_runtime_put_sync(dev->dev);
+		pm_runtime_put_sync(dev->dev);
+
+		if (tilcdc_crtc->next_fb) {
+			drm_flip_work_queue(&tilcdc_crtc->unref_work,
+					    tilcdc_crtc->next_fb);
+			tilcdc_crtc->next_fb = NULL;
+		}
+
+		if (tilcdc_crtc->curr_fb) {
+			drm_flip_work_queue(&tilcdc_crtc->unref_work,
+					    tilcdc_crtc->curr_fb);
+			tilcdc_crtc->curr_fb = NULL;
+		}
+
+		drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
+	}
 }
 
 static bool tilcdc_crtc_mode_fixup(struct drm_crtc *crtc,
@@ -272,6 +305,10 @@ static int tilcdc_crtc_mode_set(struct drm_crtc *crtc,
 	if (WARN_ON(!info))
 		return -EINVAL;
 
+	ret = tilcdc_verify_fb(crtc, crtc->primary->fb);
+	if (ret)
+		return ret;
+
 	pm_runtime_get_sync(dev->dev);
 
 	/* Configure the Burst Size and fifo threshold of DMA: */
@@ -419,8 +456,10 @@ static int tilcdc_crtc_mode_set(struct drm_crtc *crtc,
 	else
 		tilcdc_clear(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ORDER);
 
+	drm_framebuffer_reference(crtc->primary->fb);
+
+	set_scanout(crtc, crtc->primary->fb);
 
-	update_scanout(crtc);
 	tilcdc_crtc_update_clk(crtc);
 
 	pm_runtime_put_sync(dev->dev);
@@ -431,7 +470,21 @@ static int tilcdc_crtc_mode_set(struct drm_crtc *crtc,
 static int tilcdc_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 		struct drm_framebuffer *old_fb)
 {
-	update_scanout(crtc);
+	struct drm_device *dev = crtc->dev;
+	int r;
+
+	r = tilcdc_verify_fb(crtc, crtc->primary->fb);
+	if (r)
+		return r;
+
+	drm_framebuffer_reference(crtc->primary->fb);
+
+	pm_runtime_get_sync(dev->dev);
+
+	set_scanout(crtc, crtc->primary->fb);
+
+	pm_runtime_put_sync(dev->dev);
+
 	return 0;
 }
 
@@ -573,7 +626,8 @@ void tilcdc_crtc_update_clk(struct drm_crtc *crtc)
 	struct drm_device *dev = crtc->dev;
 	struct tilcdc_drm_private *priv = dev->dev_private;
 	int dpms = tilcdc_crtc->dpms;
-	unsigned int lcd_clk, div;
+	unsigned long lcd_clk;
+	const unsigned clkdiv = 2; /* using a fixed divider of 2 */
 	int ret;
 
 	pm_runtime_get_sync(dev->dev);
@@ -581,22 +635,21 @@ void tilcdc_crtc_update_clk(struct drm_crtc *crtc)
 	if (dpms == DRM_MODE_DPMS_ON)
 		tilcdc_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 
-	/* in raster mode, minimum divisor is 2: */
-	ret = clk_set_rate(priv->disp_clk, crtc->mode.clock * 1000 * 2);
-	if (ret) {
+	/* mode.clock is in KHz, set_rate wants parameter in Hz */
+	ret = clk_set_rate(priv->clk, crtc->mode.clock * 1000 * clkdiv);
+	if (ret < 0) {
 		dev_err(dev->dev, "failed to set display clock rate to: %d\n",
 				crtc->mode.clock);
 		goto out;
 	}
 
 	lcd_clk = clk_get_rate(priv->clk);
-	div = lcd_clk / (crtc->mode.clock * 1000);
 
-	DBG("lcd_clk=%u, mode clock=%d, div=%u", lcd_clk, crtc->mode.clock, div);
-	DBG("fck=%lu, dpll_disp_ck=%lu", clk_get_rate(priv->clk), clk_get_rate(priv->disp_clk));
+	DBG("lcd_clk=%lu, mode clock=%d, div=%u",
+		lcd_clk, crtc->mode.clock, clkdiv);
 
 	/* Configure the LCD clock divisor. */
-	tilcdc_write(dev, LCDC_CTRL_REG, LCDC_CLK_DIVISOR(div) |
+	tilcdc_write(dev, LCDC_CTRL_REG, LCDC_CLK_DIVISOR(clkdiv) |
 			LCDC_RASTER_MODE);
 
 	if (priv->rev == 2)
@@ -611,44 +664,58 @@ out:
 	pm_runtime_put_sync(dev->dev);
 }
 
+#define SYNC_LOST_COUNT_LIMIT 50
+
 irqreturn_t tilcdc_crtc_irq(struct drm_crtc *crtc)
 {
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct tilcdc_drm_private *priv = dev->dev_private;
-	uint32_t stat = tilcdc_read_irqstatus(dev);
+	uint32_t stat;
 
-	if ((stat & LCDC_SYNC_LOST) && (stat & LCDC_FIFO_UNDERFLOW)) {
-		stop(crtc);
-		dev_err(dev->dev, "error: %08x\n", stat);
-		tilcdc_clear_irqstatus(dev, stat);
-		start(crtc);
-	} else if (stat & LCDC_PL_LOAD_DONE) {
-		tilcdc_clear_irqstatus(dev, stat);
-	} else {
-		struct drm_pending_vblank_event *event;
+	stat = tilcdc_read_irqstatus(dev);
+	tilcdc_clear_irqstatus(dev, stat);
+
+	if (stat & LCDC_END_OF_FRAME0) {
 		unsigned long flags;
-		uint32_t dirty = tilcdc_crtc->dirty & stat;
+		bool skip_event = false;
+		ktime_t now;
 
-		tilcdc_clear_irqstatus(dev, stat);
+		now = ktime_get();
 
-		if (dirty & LCDC_END_OF_FRAME0)
-			set_scanout(crtc, 0);
+		drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
 
-		if (dirty & LCDC_END_OF_FRAME1)
-			set_scanout(crtc, 1);
+		spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags);
+
+		tilcdc_crtc->last_vblank = now;
+
+		if (tilcdc_crtc->next_fb) {
+			set_scanout(crtc, tilcdc_crtc->next_fb);
+			tilcdc_crtc->next_fb = NULL;
+			skip_event = true;
+		}
+
+		spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
 
 		drm_handle_vblank(dev, 0);
 
-		spin_lock_irqsave(&dev->event_lock, flags);
-		event = tilcdc_crtc->event;
-		tilcdc_crtc->event = NULL;
-		if (event)
-			drm_send_vblank_event(dev, 0, event);
-		spin_unlock_irqrestore(&dev->event_lock, flags);
+		if (!skip_event) {
+			struct drm_pending_vblank_event *event;
+
+			spin_lock_irqsave(&dev->event_lock, flags);
+
+			event = tilcdc_crtc->event;
+			tilcdc_crtc->event = NULL;
+			if (event)
+				drm_send_vblank_event(dev, 0, event);
+
+			spin_unlock_irqrestore(&dev->event_lock, flags);
+		}
 
-		if (dirty && !tilcdc_crtc->dirty)
-			drm_vblank_put(dev, 0);
+		if (tilcdc_crtc->frame_intact)
+			tilcdc_crtc->sync_lost_count = 0;
+		else
+			tilcdc_crtc->frame_intact = true;
 	}
 
 	if (priv->rev == 2) {
@@ -659,36 +726,34 @@ irqreturn_t tilcdc_crtc_irq(struct drm_crtc *crtc)
 		tilcdc_write(dev, LCDC_END_OF_INT_IND_REG, 0);
 	}
 
-	return IRQ_HANDLED;
-}
+	if (stat & LCDC_SYNC_LOST) {
+		dev_err_ratelimited(dev->dev, "%s(0x%08x): Sync lost",
+				    __func__, stat);
+		tilcdc_crtc->frame_intact = false;
+		if (tilcdc_crtc->sync_lost_count++ > SYNC_LOST_COUNT_LIMIT) {
+			dev_err(dev->dev,
+				"%s(0x%08x): Sync lost flood detected, disabling the interrupt",
+				__func__, stat);
+			tilcdc_write(dev, LCDC_INT_ENABLE_CLR_REG,
+				     LCDC_SYNC_LOST);
+		}
+	}
 
-void tilcdc_crtc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file)
-{
-	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
-	struct drm_pending_vblank_event *event;
-	struct drm_device *dev = crtc->dev;
-	unsigned long flags;
+	if (stat & LCDC_FIFO_UNDERFLOW)
+		dev_err_ratelimited(dev->dev, "%s(0x%08x): FIFO underfow",
+				    __func__, stat);
 
-	/* Destroy the pending vertical blanking event associated with the
-	 * pending page flip, if any, and disable vertical blanking interrupts.
-	 */
-	spin_lock_irqsave(&dev->event_lock, flags);
-	event = tilcdc_crtc->event;
-	if (event && event->base.file_priv == file) {
-		tilcdc_crtc->event = NULL;
-		event->base.destroy(&event->base);
-		drm_vblank_put(dev, 0);
-	}
-	spin_unlock_irqrestore(&dev->event_lock, flags);
+	return IRQ_HANDLED;
 }
 
 struct drm_crtc *tilcdc_crtc_create(struct drm_device *dev)
 {
+	struct tilcdc_drm_private *priv = dev->dev_private;
 	struct tilcdc_crtc *tilcdc_crtc;
 	struct drm_crtc *crtc;
 	int ret;
 
-	tilcdc_crtc = kzalloc(sizeof(*tilcdc_crtc), GFP_KERNEL);
+	tilcdc_crtc = devm_kzalloc(dev->dev, sizeof(*tilcdc_crtc), GFP_KERNEL);
 	if (!tilcdc_crtc) {
 		dev_err(dev->dev, "allocation failed\n");
 		return NULL;
@@ -702,12 +767,32 @@ struct drm_crtc *tilcdc_crtc_create(struct drm_device *dev)
 	drm_flip_work_init(&tilcdc_crtc->unref_work,
 			"unref", unref_worker);
 
+	spin_lock_init(&tilcdc_crtc->irq_lock);
+
 	ret = drm_crtc_init(dev, crtc, &tilcdc_crtc_funcs);
 	if (ret < 0)
 		goto fail;
 
 	drm_crtc_helper_add(crtc, &tilcdc_crtc_helper_funcs);
 
+	if (priv->is_componentized) {
+		struct device_node *ports =
+			of_get_child_by_name(dev->dev->of_node, "ports");
+
+		if (ports) {
+			crtc->port = of_get_child_by_name(ports, "port");
+			of_node_put(ports);
+		} else {
+			crtc->port =
+				of_get_child_by_name(dev->dev->of_node, "port");
+		}
+		if (!crtc->port) { /* This should never happen */
+			dev_err(dev->dev, "Port node not found in %s\n",
+				dev->dev->of_node->full_name);
+			goto fail;
+		}
+	}
+
 	return crtc;
 
 fail:
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index d7f5b897c6c5..709bc903524d 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -18,6 +18,8 @@
 /* LCDC DRM driver, based on da8xx-fb */
 
 #include <linux/component.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/suspend.h>
 
 #include "tilcdc_drv.h"
 #include "tilcdc_regs.h"
@@ -110,6 +112,8 @@ static int tilcdc_unload(struct drm_device *dev)
 {
 	struct tilcdc_drm_private *priv = dev->dev_private;
 
+	tilcdc_crtc_dpms(priv->crtc, DRM_MODE_DPMS_OFF);
+
 	tilcdc_remove_external_encoders(dev);
 
 	drm_fbdev_cma_fini(priv->fbdev);
@@ -139,11 +143,11 @@ static int tilcdc_unload(struct drm_device *dev)
 
 	pm_runtime_disable(dev->dev);
 
-	kfree(priv);
-
 	return 0;
 }
 
+static size_t tilcdc_num_regs(void);
+
 static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 {
 	struct platform_device *pdev = dev->platformdev;
@@ -154,8 +158,12 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 	u32 bpp = 0;
 	int ret;
 
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
+	priv = devm_kzalloc(dev->dev, sizeof(*priv), GFP_KERNEL);
+	if (priv)
+		priv->saved_register =
+			devm_kcalloc(dev->dev, tilcdc_num_regs(),
+				     sizeof(*priv->saved_register), GFP_KERNEL);
+	if (!priv || !priv->saved_register) {
 		dev_err(dev->dev, "failed to allocate private data\n");
 		return -ENOMEM;
 	}
@@ -168,7 +176,7 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 	priv->wq = alloc_ordered_workqueue("tilcdc", 0);
 	if (!priv->wq) {
 		ret = -ENOMEM;
-		goto fail_free_priv;
+		goto fail_unset_priv;
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -192,13 +200,6 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 		goto fail_iounmap;
 	}
 
-	priv->disp_clk = clk_get(dev->dev, "dpll_disp_ck");
-	if (IS_ERR(priv->clk)) {
-		dev_err(dev->dev, "failed to get display clock\n");
-		ret = -ENODEV;
-		goto fail_put_clk;
-	}
-
 #ifdef CONFIG_CPU_FREQ
 	priv->lcd_fck_rate = clk_get_rate(priv->clk);
 	priv->freq_transition.notifier_call = cpufreq_transition;
@@ -206,7 +207,7 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 			CPUFREQ_TRANSITION_NOTIFIER);
 	if (ret) {
 		dev_err(dev->dev, "failed to register cpufreq notifier\n");
-		goto fail_put_disp_clk;
+		goto fail_put_clk;
 	}
 #endif
 
@@ -227,7 +228,6 @@ static int tilcdc_load(struct drm_device *dev, unsigned long flags)
 	DBG("Maximum Pixel Clock Value %dKHz", priv->max_pixelclock);
 
 	pm_runtime_enable(dev->dev);
-	pm_runtime_irq_safe(dev->dev);
 
 	/* Determine LCD IP Version */
 	pm_runtime_get_sync(dev->dev);
@@ -330,11 +330,9 @@ fail_cpufreq_unregister:
 #ifdef CONFIG_CPU_FREQ
 	cpufreq_unregister_notifier(&priv->freq_transition,
 			CPUFREQ_TRANSITION_NOTIFIER);
-fail_put_disp_clk:
-	clk_put(priv->disp_clk);
-#endif
 
 fail_put_clk:
+#endif
 	clk_put(priv->clk);
 
 fail_iounmap:
@@ -344,17 +342,10 @@ fail_free_wq:
 	flush_workqueue(priv->wq);
 	destroy_workqueue(priv->wq);
 
-fail_free_priv:
+fail_unset_priv:
 	dev->dev_private = NULL;
-	kfree(priv);
-	return ret;
-}
 
-static void tilcdc_preclose(struct drm_device *dev, struct drm_file *file)
-{
-	struct tilcdc_drm_private *priv = dev->dev_private;
-
-	tilcdc_crtc_cancel_page_flip(priv->crtc, file);
+	return ret;
 }
 
 static void tilcdc_lastclose(struct drm_device *dev)
@@ -380,10 +371,14 @@ static int tilcdc_irq_postinstall(struct drm_device *dev)
 	struct tilcdc_drm_private *priv = dev->dev_private;
 
 	/* enable FIFO underflow irq: */
-	if (priv->rev == 1)
+	if (priv->rev == 1) {
 		tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_V1_UNDERFLOW_INT_ENA);
-	else
-		tilcdc_set(dev, LCDC_INT_ENABLE_SET_REG, LCDC_V2_UNDERFLOW_INT_ENA);
+	} else {
+		tilcdc_write(dev, LCDC_INT_ENABLE_SET_REG,
+			   LCDC_V2_UNDERFLOW_INT_ENA |
+			   LCDC_V2_END_OF_FRAME0_INT_ENA |
+			   LCDC_FRAME_DONE | LCDC_SYNC_LOST);
+	}
 
 	return 0;
 }
@@ -398,43 +393,21 @@ static void tilcdc_irq_uninstall(struct drm_device *dev)
 				LCDC_V1_UNDERFLOW_INT_ENA | LCDC_V1_PL_INT_ENA);
 		tilcdc_clear(dev, LCDC_DMA_CTRL_REG, LCDC_V1_END_OF_FRAME_INT_ENA);
 	} else {
-		tilcdc_clear(dev, LCDC_INT_ENABLE_SET_REG,
+		tilcdc_write(dev, LCDC_INT_ENABLE_CLR_REG,
 			LCDC_V2_UNDERFLOW_INT_ENA | LCDC_V2_PL_INT_ENA |
-			LCDC_V2_END_OF_FRAME0_INT_ENA | LCDC_V2_END_OF_FRAME1_INT_ENA |
-			LCDC_FRAME_DONE);
+			LCDC_V2_END_OF_FRAME0_INT_ENA |
+			LCDC_FRAME_DONE | LCDC_SYNC_LOST);
 	}
-
-}
-
-static void enable_vblank(struct drm_device *dev, bool enable)
-{
-	struct tilcdc_drm_private *priv = dev->dev_private;
-	u32 reg, mask;
-
-	if (priv->rev == 1) {
-		reg = LCDC_DMA_CTRL_REG;
-		mask = LCDC_V1_END_OF_FRAME_INT_ENA;
-	} else {
-		reg = LCDC_INT_ENABLE_SET_REG;
-		mask = LCDC_V2_END_OF_FRAME0_INT_ENA |
-			LCDC_V2_END_OF_FRAME1_INT_ENA | LCDC_FRAME_DONE;
-	}
-
-	if (enable)
-		tilcdc_set(dev, reg, mask);
-	else
-		tilcdc_clear(dev, reg, mask);
 }
 
 static int tilcdc_enable_vblank(struct drm_device *dev, unsigned int pipe)
 {
-	enable_vblank(dev, true);
 	return 0;
 }
 
 static void tilcdc_disable_vblank(struct drm_device *dev, unsigned int pipe)
 {
-	enable_vblank(dev, false);
+	return;
 }
 
 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_PM_SLEEP)
@@ -461,13 +434,22 @@ static const struct {
 		/* new in revision 2: */
 		REG(2, false, LCDC_RAW_STAT_REG),
 		REG(2, false, LCDC_MASKED_STAT_REG),
-		REG(2, false, LCDC_INT_ENABLE_SET_REG),
+		REG(2, true, LCDC_INT_ENABLE_SET_REG),
 		REG(2, false, LCDC_INT_ENABLE_CLR_REG),
 		REG(2, false, LCDC_END_OF_INT_IND_REG),
 		REG(2, true,  LCDC_CLK_ENABLE_REG),
-		REG(2, true,  LCDC_INT_ENABLE_SET_REG),
 #undef REG
 };
+
+static size_t tilcdc_num_regs(void)
+{
+	return ARRAY_SIZE(registers);
+}
+#else
+static size_t tilcdc_num_regs(void)
+{
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_DEBUG_FS
@@ -554,10 +536,10 @@ static const struct file_operations fops = {
 };
 
 static struct drm_driver tilcdc_driver = {
-	.driver_features    = DRIVER_HAVE_IRQ | DRIVER_GEM | DRIVER_MODESET,
+	.driver_features    = (DRIVER_HAVE_IRQ | DRIVER_GEM | DRIVER_MODESET |
+			       DRIVER_PRIME),
 	.load               = tilcdc_load,
 	.unload             = tilcdc_unload,
-	.preclose           = tilcdc_preclose,
 	.lastclose          = tilcdc_lastclose,
 	.set_busid          = drm_platform_set_busid,
 	.irq_handler        = tilcdc_irq,
@@ -572,6 +554,16 @@ static struct drm_driver tilcdc_driver = {
 	.dumb_create        = drm_gem_cma_dumb_create,
 	.dumb_map_offset    = drm_gem_cma_dumb_map_offset,
 	.dumb_destroy       = drm_gem_dumb_destroy,
+
+	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
+	.gem_prime_import	= drm_gem_prime_import,
+	.gem_prime_export	= drm_gem_prime_export,
+	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
+	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+	.gem_prime_vmap		= drm_gem_cma_prime_vmap,
+	.gem_prime_vunmap	= drm_gem_cma_prime_vunmap,
+	.gem_prime_mmap		= drm_gem_cma_prime_mmap,
 #ifdef CONFIG_DEBUG_FS
 	.debugfs_init       = tilcdc_debugfs_init,
 	.debugfs_cleanup    = tilcdc_debugfs_cleanup,
@@ -597,11 +589,24 @@ static int tilcdc_pm_suspend(struct device *dev)
 
 	drm_kms_helper_poll_disable(ddev);
 
+	/* Select sleep pin state */
+	pinctrl_pm_select_sleep_state(dev);
+
+	if (pm_runtime_suspended(dev)) {
+		priv->ctx_valid = false;
+		return 0;
+	}
+
+	/* Disable the LCDC controller, to avoid locking up the PRCM */
+	tilcdc_crtc_dpms(priv->crtc, DRM_MODE_DPMS_OFF);
+
 	/* Save register state: */
 	for (i = 0; i < ARRAY_SIZE(registers); i++)
 		if (registers[i].save && (priv->rev >= registers[i].rev))
 			priv->saved_register[n++] = tilcdc_read(ddev, registers[i].reg);
 
+	priv->ctx_valid = true;
+
 	return 0;
 }
 
@@ -611,10 +616,17 @@ static int tilcdc_pm_resume(struct device *dev)
 	struct tilcdc_drm_private *priv = ddev->dev_private;
 	unsigned i, n = 0;
 
-	/* Restore register state: */
-	for (i = 0; i < ARRAY_SIZE(registers); i++)
-		if (registers[i].save && (priv->rev >= registers[i].rev))
-			tilcdc_write(ddev, registers[i].reg, priv->saved_register[n++]);
+	/* Select default pin state */
+	pinctrl_pm_select_default_state(dev);
+
+	if (priv->ctx_valid == true) {
+		/* Restore register state: */
+		for (i = 0; i < ARRAY_SIZE(registers); i++)
+			if (registers[i].save &&
+			    (priv->rev >= registers[i].rev))
+				tilcdc_write(ddev, registers[i].reg,
+					     priv->saved_register[n++]);
+	}
 
 	drm_kms_helper_poll_enable(ddev);
 
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.h b/drivers/gpu/drm/tilcdc/tilcdc_drv.h
index e863ad0d26fe..c1de18bae415 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.h
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.h
@@ -49,7 +49,6 @@
 struct tilcdc_drm_private {
 	void __iomem *mmio;
 
-	struct clk *disp_clk;    /* display dpll */
 	struct clk *clk;         /* functional clock */
 	int rev;                 /* IP revision */
 
@@ -67,7 +66,8 @@ struct tilcdc_drm_private {
 	uint32_t max_width;
 
 	/* register contents saved across suspend/resume: */
-	u32 saved_register[12];
+	u32 *saved_register;
+	bool ctx_valid;
 
 #ifdef CONFIG_CPU_FREQ
 	struct notifier_block freq_transition;
@@ -163,7 +163,6 @@ struct tilcdc_panel_info {
 #define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
 
 struct drm_crtc *tilcdc_crtc_create(struct drm_device *dev);
-void tilcdc_crtc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file);
 irqreturn_t tilcdc_crtc_irq(struct drm_crtc *crtc);
 void tilcdc_crtc_update_clk(struct drm_crtc *crtc);
 void tilcdc_crtc_set_panel_info(struct drm_crtc *crtc,
@@ -172,5 +171,6 @@ void tilcdc_crtc_set_simulate_vesa_sync(struct drm_crtc *crtc,
 					bool simulate_vesa_sync);
 int tilcdc_crtc_mode_valid(struct drm_crtc *crtc, struct drm_display_mode *mode);
 int tilcdc_crtc_max_width(struct drm_crtc *crtc);
+void tilcdc_crtc_dpms(struct drm_crtc *crtc, int mode);
 
 #endif /* __TILCDC_DRV_H__ */
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_panel.c b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
index 4dda6e2f464b..ff7774c17d7c 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_panel.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
@@ -45,14 +45,6 @@ struct panel_encoder {
 };
 #define to_panel_encoder(x) container_of(x, struct panel_encoder, base)
 
-
-static void panel_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct panel_encoder *panel_encoder = to_panel_encoder(encoder);
-	drm_encoder_cleanup(encoder);
-	kfree(panel_encoder);
-}
-
 static void panel_encoder_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct panel_encoder *panel_encoder = to_panel_encoder(encoder);
@@ -70,14 +62,6 @@ static void panel_encoder_dpms(struct drm_encoder *encoder, int mode)
 					 mode == DRM_MODE_DPMS_ON ? 1 : 0);
 }
 
-static bool panel_encoder_mode_fixup(struct drm_encoder *encoder,
-		const struct drm_display_mode *mode,
-		struct drm_display_mode *adjusted_mode)
-{
-	/* nothing needed */
-	return true;
-}
-
 static void panel_encoder_prepare(struct drm_encoder *encoder)
 {
 	struct panel_encoder *panel_encoder = to_panel_encoder(encoder);
@@ -98,12 +82,11 @@ static void panel_encoder_mode_set(struct drm_encoder *encoder,
 }
 
 static const struct drm_encoder_funcs panel_encoder_funcs = {
-		.destroy        = panel_encoder_destroy,
+		.destroy        = drm_encoder_cleanup,
 };
 
 static const struct drm_encoder_helper_funcs panel_encoder_helper_funcs = {
 		.dpms           = panel_encoder_dpms,
-		.mode_fixup     = panel_encoder_mode_fixup,
 		.prepare        = panel_encoder_prepare,
 		.commit         = panel_encoder_commit,
 		.mode_set       = panel_encoder_mode_set,
@@ -116,7 +99,8 @@ static struct drm_encoder *panel_encoder_create(struct drm_device *dev,
 	struct drm_encoder *encoder;
 	int ret;
 
-	panel_encoder = kzalloc(sizeof(*panel_encoder), GFP_KERNEL);
+	panel_encoder = devm_kzalloc(dev->dev, sizeof(*panel_encoder),
+				     GFP_KERNEL);
 	if (!panel_encoder) {
 		dev_err(dev->dev, "allocation failed\n");
 		return NULL;
@@ -137,7 +121,7 @@ static struct drm_encoder *panel_encoder_create(struct drm_device *dev,
 	return encoder;
 
 fail:
-	panel_encoder_destroy(encoder);
+	drm_encoder_cleanup(encoder);
 	return NULL;
 }
 
@@ -156,10 +140,8 @@ struct panel_connector {
 
 static void panel_connector_destroy(struct drm_connector *connector)
 {
-	struct panel_connector *panel_connector = to_panel_connector(connector);
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
-	kfree(panel_connector);
 }
 
 static enum drm_connector_status panel_connector_detect(
@@ -232,7 +214,8 @@ static struct drm_connector *panel_connector_create(struct drm_device *dev,
 	struct drm_connector *connector;
 	int ret;
 
-	panel_connector = kzalloc(sizeof(*panel_connector), GFP_KERNEL);
+	panel_connector = devm_kzalloc(dev->dev, sizeof(*panel_connector),
+				       GFP_KERNEL);
 	if (!panel_connector) {
 		dev_err(dev->dev, "allocation failed\n");
 		return NULL;
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
index 5052a8af7ecb..7716f42f8aab 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
@@ -54,14 +54,6 @@ struct tfp410_encoder {
 };
 #define to_tfp410_encoder(x) container_of(x, struct tfp410_encoder, base)
 
-
-static void tfp410_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct tfp410_encoder *tfp410_encoder = to_tfp410_encoder(encoder);
-	drm_encoder_cleanup(encoder);
-	kfree(tfp410_encoder);
-}
-
 static void tfp410_encoder_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct tfp410_encoder *tfp410_encoder = to_tfp410_encoder(encoder);
@@ -80,14 +72,6 @@ static void tfp410_encoder_dpms(struct drm_encoder *encoder, int mode)
 	tfp410_encoder->dpms = mode;
 }
 
-static bool tfp410_encoder_mode_fixup(struct drm_encoder *encoder,
-		const struct drm_display_mode *mode,
-		struct drm_display_mode *adjusted_mode)
-{
-	/* nothing needed */
-	return true;
-}
-
 static void tfp410_encoder_prepare(struct drm_encoder *encoder)
 {
 	tfp410_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
@@ -107,12 +91,11 @@ static void tfp410_encoder_mode_set(struct drm_encoder *encoder,
 }
 
 static const struct drm_encoder_funcs tfp410_encoder_funcs = {
-		.destroy        = tfp410_encoder_destroy,
+		.destroy        = drm_encoder_cleanup,
 };
 
 static const struct drm_encoder_helper_funcs tfp410_encoder_helper_funcs = {
 		.dpms           = tfp410_encoder_dpms,
-		.mode_fixup     = tfp410_encoder_mode_fixup,
 		.prepare        = tfp410_encoder_prepare,
 		.commit         = tfp410_encoder_commit,
 		.mode_set       = tfp410_encoder_mode_set,
@@ -125,7 +108,8 @@ static struct drm_encoder *tfp410_encoder_create(struct drm_device *dev,
 	struct drm_encoder *encoder;
 	int ret;
 
-	tfp410_encoder = kzalloc(sizeof(*tfp410_encoder), GFP_KERNEL);
+	tfp410_encoder = devm_kzalloc(dev->dev, sizeof(*tfp410_encoder),
+				      GFP_KERNEL);
 	if (!tfp410_encoder) {
 		dev_err(dev->dev, "allocation failed\n");
 		return NULL;
@@ -147,7 +131,7 @@ static struct drm_encoder *tfp410_encoder_create(struct drm_device *dev,
 	return encoder;
 
 fail:
-	tfp410_encoder_destroy(encoder);
+	drm_encoder_cleanup(encoder);
 	return NULL;
 }
 
@@ -166,10 +150,8 @@ struct tfp410_connector {
 
 static void tfp410_connector_destroy(struct drm_connector *connector)
 {
-	struct tfp410_connector *tfp410_connector = to_tfp410_connector(connector);
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
-	kfree(tfp410_connector);
 }
 
 static enum drm_connector_status tfp410_connector_detect(
@@ -237,7 +219,8 @@ static struct drm_connector *tfp410_connector_create(struct drm_device *dev,
 	struct drm_connector *connector;
 	int ret;
 
-	tfp410_connector = kzalloc(sizeof(*tfp410_connector), GFP_KERNEL);
+	tfp410_connector = devm_kzalloc(dev->dev, sizeof(*tfp410_connector),
+					GFP_KERNEL);
 	if (!tfp410_connector) {
 		dev_err(dev->dev, "allocation failed\n");
 		return NULL;
@@ -322,7 +305,7 @@ static int tfp410_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	tfp410_mod = kzalloc(sizeof(*tfp410_mod), GFP_KERNEL);
+	tfp410_mod = devm_kzalloc(&pdev->dev, sizeof(*tfp410_mod), GFP_KERNEL);
 	if (!tfp410_mod)
 		return -ENOMEM;
 
@@ -375,7 +358,6 @@ fail_adapter:
 	i2c_put_adapter(tfp410_mod->i2c);
 
 fail:
-	kfree(tfp410_mod);
 	tilcdc_module_cleanup(mod);
 	return ret;
 }
@@ -389,7 +371,6 @@ static int tfp410_remove(struct platform_device *pdev)
 	gpio_free(tfp410_mod->gpio);
 
 	tilcdc_module_cleanup(mod);
-	kfree(tfp410_mod);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c
index d5728ec85254..772ec9e1f590 100644
--- a/drivers/gpu/drm/udl/udl_drv.c
+++ b/drivers/gpu/drm/udl/udl_drv.c
@@ -125,17 +125,5 @@ static struct usb_driver udl_driver = {
 	.disconnect = udl_usb_disconnect,
 	.id_table = id_table,
 };
-
-static int __init udl_init(void)
-{
-	return usb_register(&udl_driver);
-}
-
-static void __exit udl_exit(void)
-{
-	usb_deregister(&udl_driver);
-}
-
-module_init(udl_init);
-module_exit(udl_exit);
+module_usb_driver(udl_driver);
 MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/udl/udl_encoder.c b/drivers/gpu/drm/udl/udl_encoder.c
index a181a647fcf9..59a4b34e87ed 100644
--- a/drivers/gpu/drm/udl/udl_encoder.c
+++ b/drivers/gpu/drm/udl/udl_encoder.c
@@ -26,13 +26,6 @@ static void udl_encoder_disable(struct drm_encoder *encoder)
 {
 }
 
-static bool udl_mode_fixup(struct drm_encoder *encoder,
-			   const struct drm_display_mode *mode,
-			   struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void udl_encoder_prepare(struct drm_encoder *encoder)
 {
 }
@@ -54,7 +47,6 @@ udl_encoder_dpms(struct drm_encoder *encoder, int mode)
 
 static const struct drm_encoder_helper_funcs udl_helper_funcs = {
 	.dpms = udl_encoder_dpms,
-	.mode_fixup = udl_mode_fixup,
 	.prepare = udl_encoder_prepare,
 	.mode_set = udl_encoder_mode_set,
 	.commit = udl_encoder_commit,
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 200419d4d43c..33239a2b264a 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -409,7 +409,6 @@ static int udl_user_framebuffer_dirty(struct drm_framebuffer *fb,
 
 	if (ufb->obj->base.import_attach) {
 		ret = dma_buf_begin_cpu_access(ufb->obj->base.import_attach->dmabuf,
-					       0, ufb->obj->base.size,
 					       DMA_FROM_DEVICE);
 		if (ret)
 			goto unlock;
@@ -424,9 +423,8 @@ static int udl_user_framebuffer_dirty(struct drm_framebuffer *fb,
 	}
 
 	if (ufb->obj->base.import_attach) {
-		dma_buf_end_cpu_access(ufb->obj->base.import_attach->dmabuf,
-				       0, ufb->obj->base.size,
-				       DMA_FROM_DEVICE);
+		ret = dma_buf_end_cpu_access(ufb->obj->base.import_attach->dmabuf,
+					     DMA_FROM_DEVICE);
 	}
 
  unlock:
diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
index 160ef2a08b89..b87afee44995 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -279,14 +279,6 @@ static void udl_crtc_dpms(struct drm_crtc *crtc, int mode)
 
 }
 
-static bool udl_crtc_mode_fixup(struct drm_crtc *crtc,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
-
-{
-	return true;
-}
-
 #if 0
 static int
 udl_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
@@ -402,7 +394,6 @@ static void udl_crtc_commit(struct drm_crtc *crtc)
 
 static const struct drm_crtc_helper_funcs udl_helper_funcs = {
 	.dpms = udl_crtc_dpms,
-	.mode_fixup = udl_crtc_mode_fixup,
 	.mode_set = udl_crtc_mode_set,
 	.prepare = udl_crtc_prepare,
 	.commit = udl_crtc_commit,
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 034ef2de9037..9807bc9d296e 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -498,11 +498,12 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
-	ret = copy_from_user(bo->base.vaddr,
+	if (copy_from_user(bo->base.vaddr,
 			     (void __user *)(uintptr_t)args->data,
-			     args->size);
-	if (ret != 0)
+			     args->size)) {
+		ret = -EFAULT;
 		goto fail;
+	}
 	/* Clear the rest of the memory from allocating from the BO
 	 * cache.
 	 */
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 018145e0b87d..355ee4b091b3 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -49,22 +49,27 @@ struct vc4_crtc {
 	/* Which HVS channel we're using for our CRTC. */
 	int channel;
 
-	/* Pointer to the actual hardware display list memory for the
-	 * crtc.
-	 */
-	u32 __iomem *dlist;
-
-	u32 dlist_size; /* in dwords */
-
 	struct drm_pending_vblank_event *event;
 };
 
+struct vc4_crtc_state {
+	struct drm_crtc_state base;
+	/* Dlist area for this CRTC configuration. */
+	struct drm_mm_node mm;
+};
+
 static inline struct vc4_crtc *
 to_vc4_crtc(struct drm_crtc *crtc)
 {
 	return (struct vc4_crtc *)crtc;
 }
 
+static inline struct vc4_crtc_state *
+to_vc4_crtc_state(struct drm_crtc_state *crtc_state)
+{
+	return (struct vc4_crtc_state *)crtc_state;
+}
+
 struct vc4_crtc_data {
 	/* Which channel of the HVS this pixelvalve sources from. */
 	int hvs_channel;
@@ -83,7 +88,7 @@ static const struct {
 } crtc_regs[] = {
 	CRTC_REG(PV_CONTROL),
 	CRTC_REG(PV_V_CONTROL),
-	CRTC_REG(PV_VSYNCD),
+	CRTC_REG(PV_VSYNCD_EVEN),
 	CRTC_REG(PV_HORZA),
 	CRTC_REG(PV_HORZB),
 	CRTC_REG(PV_VERTA),
@@ -183,6 +188,8 @@ static int vc4_get_clock_select(struct drm_crtc *crtc)
 
 static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
+	struct drm_device *dev = crtc->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
 	struct drm_crtc_state *state = crtc->state;
 	struct drm_display_mode *mode = &state->adjusted_mode;
@@ -212,6 +219,16 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
 				 PV_HORZB_HFP) |
 		   VC4_SET_FIELD(mode->hdisplay, PV_HORZB_HACTIVE));
 
+	CRTC_WRITE(PV_VERTA,
+		   VC4_SET_FIELD(mode->vtotal - mode->vsync_end,
+				 PV_VERTA_VBP) |
+		   VC4_SET_FIELD(mode->vsync_end - mode->vsync_start,
+				 PV_VERTA_VSYNC));
+	CRTC_WRITE(PV_VERTB,
+		   VC4_SET_FIELD(mode->vsync_start - mode->vdisplay,
+				 PV_VERTB_VFP) |
+		   VC4_SET_FIELD(vactive, PV_VERTB_VACTIVE));
+
 	if (interlace) {
 		CRTC_WRITE(PV_VERTA_EVEN,
 			   VC4_SET_FIELD(mode->vtotal - mode->vsync_end - 1,
@@ -241,6 +258,10 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
 		   PV_CONTROL_FIFO_CLR |
 		   PV_CONTROL_EN);
 
+	HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel),
+		  SCALER_DISPBKGND_AUTOHS |
+		  (interlace ? SCALER_DISPBKGND_INTERLACE : 0));
+
 	if (debug_dump_regs) {
 		DRM_INFO("CRTC %d regs after:\n", drm_crtc_index(crtc));
 		vc4_crtc_dump_regs(vc4_crtc);
@@ -319,11 +340,13 @@ static void vc4_crtc_enable(struct drm_crtc *crtc)
 static int vc4_crtc_atomic_check(struct drm_crtc *crtc,
 				 struct drm_crtc_state *state)
 {
+	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state);
 	struct drm_device *dev = crtc->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct drm_plane *plane;
-	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+	unsigned long flags;
 	u32 dlist_count = 0;
+	int ret;
 
 	/* The pixelvalve can only feed one encoder (and encoders are
 	 * 1:1 with connectors.)
@@ -346,18 +369,12 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc,
 
 	dlist_count++; /* Account for SCALER_CTL0_END. */
 
-	if (!vc4_crtc->dlist || dlist_count > vc4_crtc->dlist_size) {
-		vc4_crtc->dlist = ((u32 __iomem *)vc4->hvs->dlist +
-				   HVS_BOOTLOADER_DLIST_END);
-		vc4_crtc->dlist_size = ((SCALER_DLIST_SIZE >> 2) -
-					HVS_BOOTLOADER_DLIST_END);
-
-		if (dlist_count > vc4_crtc->dlist_size) {
-			DRM_DEBUG_KMS("dlist too large for CRTC (%d > %d).\n",
-				      dlist_count, vc4_crtc->dlist_size);
-			return -EINVAL;
-		}
-	}
+	spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
+	ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm,
+				 dlist_count, 1, 0);
+	spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
+	if (ret)
+		return ret;
 
 	return 0;
 }
@@ -368,47 +385,29 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
 	struct drm_device *dev = crtc->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
 	struct drm_plane *plane;
 	bool debug_dump_regs = false;
-	u32 __iomem *dlist_next = vc4_crtc->dlist;
+	u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start;
+	u32 __iomem *dlist_next = dlist_start;
 
 	if (debug_dump_regs) {
 		DRM_INFO("CRTC %d HVS before:\n", drm_crtc_index(crtc));
 		vc4_hvs_dump_state(dev);
 	}
 
-	/* Copy all the active planes' dlist contents to the hardware dlist.
-	 *
-	 * XXX: If the new display list was large enough that it
-	 * overlapped a currently-read display list, we need to do
-	 * something like disable scanout before putting in the new
-	 * list.  For now, we're safe because we only have the two
-	 * planes.
-	 */
+	/* Copy all the active planes' dlist contents to the hardware dlist. */
 	drm_atomic_crtc_for_each_plane(plane, crtc) {
 		dlist_next += vc4_plane_write_dlist(plane, dlist_next);
 	}
 
-	if (dlist_next == vc4_crtc->dlist) {
-		/* If no planes were enabled, use the SCALER_CTL0_END
-		 * at the start of the display list memory (in the
-		 * bootloader section).  We'll rewrite that
-		 * SCALER_CTL0_END, just in case, though.
-		 */
-		writel(SCALER_CTL0_END, vc4->hvs->dlist);
-		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), 0);
-	} else {
-		writel(SCALER_CTL0_END, dlist_next);
-		dlist_next++;
-
-		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
-			  (u32 __iomem *)vc4_crtc->dlist -
-			  (u32 __iomem *)vc4->hvs->dlist);
-
-		/* Make the next display list start after ours. */
-		vc4_crtc->dlist_size -= (dlist_next - vc4_crtc->dlist);
-		vc4_crtc->dlist = dlist_next;
-	}
+	writel(SCALER_CTL0_END, dlist_next);
+	dlist_next++;
+
+	WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
+
+	HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+		  vc4_state->mm.start);
 
 	if (debug_dump_regs) {
 		DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
@@ -544,6 +543,7 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
 	/* Make sure all other async modesetes have landed. */
 	ret = down_interruptible(&vc4->async_modeset);
 	if (ret) {
+		drm_framebuffer_unreference(fb);
 		kfree(flip_state);
 		return ret;
 	}
@@ -573,6 +573,36 @@ static int vc4_page_flip(struct drm_crtc *crtc,
 		return drm_atomic_helper_page_flip(crtc, fb, event, flags);
 }
 
+static struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc)
+{
+	struct vc4_crtc_state *vc4_state;
+
+	vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
+	if (!vc4_state)
+		return NULL;
+
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &vc4_state->base);
+	return &vc4_state->base;
+}
+
+static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
+				   struct drm_crtc_state *state)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(crtc->dev);
+	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state);
+
+	if (vc4_state->mm.allocated) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
+		drm_mm_remove_node(&vc4_state->mm);
+		spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
+
+	}
+
+	__drm_atomic_helper_crtc_destroy_state(crtc, state);
+}
+
 static const struct drm_crtc_funcs vc4_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.destroy = vc4_crtc_destroy,
@@ -581,8 +611,8 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = {
 	.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
 	.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
 	.reset = drm_atomic_helper_crtc_reset,
-	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+	.atomic_duplicate_state = vc4_crtc_duplicate_state,
+	.atomic_destroy_state = vc4_crtc_destroy_state,
 };
 
 static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = {
@@ -593,26 +623,6 @@ static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = {
 	.atomic_flush = vc4_crtc_atomic_flush,
 };
 
-/* Frees the page flip event when the DRM device is closed with the
- * event still outstanding.
- */
-void vc4_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file)
-{
-	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
-	struct drm_device *dev = crtc->dev;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->event_lock, flags);
-
-	if (vc4_crtc->event && vc4_crtc->event->base.file_priv == file) {
-		vc4_crtc->event->base.destroy(&vc4_crtc->event->base);
-		drm_crtc_vblank_put(crtc);
-		vc4_crtc->event = NULL;
-	}
-
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-}
-
 static const struct vc4_crtc_data pv0_data = {
 	.hvs_channel = 0,
 	.encoder0_type = VC4_ENCODER_TYPE_DSI0,
@@ -664,9 +674,9 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
 	struct vc4_dev *vc4 = to_vc4_dev(drm);
 	struct vc4_crtc *vc4_crtc;
 	struct drm_crtc *crtc;
-	struct drm_plane *primary_plane, *cursor_plane;
+	struct drm_plane *primary_plane, *cursor_plane, *destroy_plane, *temp;
 	const struct of_device_id *match;
-	int ret;
+	int ret, i;
 
 	vc4_crtc = devm_kzalloc(dev, sizeof(*vc4_crtc), GFP_KERNEL);
 	if (!vc4_crtc)
@@ -695,27 +705,49 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
 		goto err;
 	}
 
-	cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
-	if (IS_ERR(cursor_plane)) {
-		dev_err(dev, "failed to construct cursor plane\n");
-		ret = PTR_ERR(cursor_plane);
-		goto err_primary;
-	}
-
-	drm_crtc_init_with_planes(drm, crtc, primary_plane, cursor_plane,
+	drm_crtc_init_with_planes(drm, crtc, primary_plane, NULL,
 				  &vc4_crtc_funcs, NULL);
 	drm_crtc_helper_add(crtc, &vc4_crtc_helper_funcs);
 	primary_plane->crtc = crtc;
-	cursor_plane->crtc = crtc;
 	vc4->crtc[drm_crtc_index(crtc)] = vc4_crtc;
 	vc4_crtc->channel = vc4_crtc->data->hvs_channel;
 
+	/* Set up some arbitrary number of planes.  We're not limited
+	 * by a set number of physical registers, just the space in
+	 * the HVS (16k) and how small an plane can be (28 bytes).
+	 * However, each plane we set up takes up some memory, and
+	 * increases the cost of looping over planes, which atomic
+	 * modesetting does quite a bit.  As a result, we pick a
+	 * modest number of planes to expose, that should hopefully
+	 * still cover any sane usecase.
+	 */
+	for (i = 0; i < 8; i++) {
+		struct drm_plane *plane =
+			vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY);
+
+		if (IS_ERR(plane))
+			continue;
+
+		plane->possible_crtcs = 1 << drm_crtc_index(crtc);
+	}
+
+	/* Set up the legacy cursor after overlay initialization,
+	 * since we overlay planes on the CRTC in the order they were
+	 * initialized.
+	 */
+	cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
+	if (!IS_ERR(cursor_plane)) {
+		cursor_plane->possible_crtcs = 1 << drm_crtc_index(crtc);
+		cursor_plane->crtc = crtc;
+		crtc->cursor = cursor_plane;
+	}
+
 	CRTC_WRITE(PV_INTEN, 0);
 	CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
 	ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
 			       vc4_crtc_irq_handler, 0, "vc4 crtc", vc4_crtc);
 	if (ret)
-		goto err_cursor;
+		goto err_destroy_planes;
 
 	vc4_set_crtc_possible_masks(drm, crtc);
 
@@ -723,10 +755,12 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
 
 	return 0;
 
-err_cursor:
-	cursor_plane->funcs->destroy(cursor_plane);
-err_primary:
-	primary_plane->funcs->destroy(primary_plane);
+err_destroy_planes:
+	list_for_each_entry_safe(destroy_plane, temp,
+				 &drm->mode_config.plane_list, head) {
+		if (destroy_plane->possible_crtcs == 1 << drm_crtc_index(crtc))
+		    destroy_plane->funcs->destroy(destroy_plane);
+	}
 err:
 	return ret;
 }
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index f1655fff8425..b7d2ff0e6e1f 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -43,14 +43,6 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index)
 	return map;
 }
 
-static void vc4_drm_preclose(struct drm_device *dev, struct drm_file *file)
-{
-	struct drm_crtc *crtc;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		vc4_cancel_page_flip(crtc, file);
-}
-
 static void vc4_lastclose(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
@@ -91,8 +83,6 @@ static struct drm_driver vc4_drm_driver = {
 			    DRIVER_HAVE_IRQ |
 			    DRIVER_PRIME),
 	.lastclose = vc4_lastclose,
-	.preclose = vc4_drm_preclose,
-
 	.irq_handler = vc4_irq,
 	.irq_preinstall = vc4_irq_preinstall,
 	.irq_postinstall = vc4_irq_postinstall,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 51a63330d4f8..fa2ad15d4f62 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -52,7 +52,7 @@ struct vc4_dev {
 	/* Protects bo_cache and the BO stats. */
 	struct mutex bo_lock;
 
-	/* Sequence number for the last job queued in job_list.
+	/* Sequence number for the last job queued in bin_job_list.
 	 * Starts at 0 (no jobs emitted).
 	 */
 	uint64_t emit_seqno;
@@ -62,11 +62,19 @@ struct vc4_dev {
 	 */
 	uint64_t finished_seqno;
 
-	/* List of all struct vc4_exec_info for jobs to be executed.
-	 * The first job in the list is the one currently programmed
-	 * into ct0ca/ct1ca for execution.
+	/* List of all struct vc4_exec_info for jobs to be executed in
+	 * the binner.  The first job in the list is the one currently
+	 * programmed into ct0ca for execution.
 	 */
-	struct list_head job_list;
+	struct list_head bin_job_list;
+
+	/* List of all struct vc4_exec_info for jobs that have
+	 * completed binning and are ready for rendering.  The first
+	 * job in the list is the one currently programmed into ct1ca
+	 * for execution.
+	 */
+	struct list_head render_job_list;
+
 	/* List of the finished vc4_exec_infos waiting to be freed by
 	 * job_done_work.
 	 */
@@ -154,7 +162,17 @@ struct vc4_v3d {
 struct vc4_hvs {
 	struct platform_device *pdev;
 	void __iomem *regs;
-	void __iomem *dlist;
+	u32 __iomem *dlist;
+
+	/* Memory manager for CRTCs to allocate space in the display
+	 * list.  Units are dwords.
+	 */
+	struct drm_mm dlist_mm;
+	/* Memory manager for the LBM memory used by HVS scaling. */
+	struct drm_mm lbm_mm;
+	spinlock_t mm_lock;
+
+	struct drm_mm_node mitchell_netravali_filter;
 };
 
 struct vc4_plane {
@@ -286,11 +304,20 @@ struct vc4_exec_info {
 };
 
 static inline struct vc4_exec_info *
-vc4_first_job(struct vc4_dev *vc4)
+vc4_first_bin_job(struct vc4_dev *vc4)
+{
+	if (list_empty(&vc4->bin_job_list))
+		return NULL;
+	return list_first_entry(&vc4->bin_job_list, struct vc4_exec_info, head);
+}
+
+static inline struct vc4_exec_info *
+vc4_first_render_job(struct vc4_dev *vc4)
 {
-	if (list_empty(&vc4->job_list))
+	if (list_empty(&vc4->render_job_list))
 		return NULL;
-	return list_first_entry(&vc4->job_list, struct vc4_exec_info, head);
+	return list_first_entry(&vc4->render_job_list,
+				struct vc4_exec_info, head);
 }
 
 /**
@@ -386,7 +413,6 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
 extern struct platform_driver vc4_crtc_driver;
 int vc4_enable_vblank(struct drm_device *dev, unsigned int crtc_id);
 void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id);
-void vc4_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file);
 int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
 
 /* vc4_debugfs.c */
@@ -405,7 +431,9 @@ int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
-void vc4_submit_next_job(struct drm_device *dev);
+void vc4_submit_next_bin_job(struct drm_device *dev);
+void vc4_submit_next_render_job(struct drm_device *dev);
+void vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec);
 int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
 		       uint64_t timeout_ns, bool interruptible);
 void vc4_job_handle_completed(struct vc4_dev *vc4);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 202aa1544acc..8d4384f8b78d 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -141,10 +141,10 @@ vc4_save_hang_state(struct drm_device *dev)
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct drm_vc4_get_hang_state *state;
 	struct vc4_hang_state *kernel_state;
-	struct vc4_exec_info *exec;
+	struct vc4_exec_info *exec[2];
 	struct vc4_bo *bo;
 	unsigned long irqflags;
-	unsigned int i, unref_list_count;
+	unsigned int i, j, unref_list_count, prev_idx;
 
 	kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
 	if (!kernel_state)
@@ -153,37 +153,55 @@ vc4_save_hang_state(struct drm_device *dev)
 	state = &kernel_state->user_state;
 
 	spin_lock_irqsave(&vc4->job_lock, irqflags);
-	exec = vc4_first_job(vc4);
-	if (!exec) {
+	exec[0] = vc4_first_bin_job(vc4);
+	exec[1] = vc4_first_render_job(vc4);
+	if (!exec[0] && !exec[1]) {
 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 		return;
 	}
 
-	unref_list_count = 0;
-	list_for_each_entry(bo, &exec->unref_list, unref_head)
-		unref_list_count++;
+	/* Get the bos from both binner and renderer into hang state. */
+	state->bo_count = 0;
+	for (i = 0; i < 2; i++) {
+		if (!exec[i])
+			continue;
+
+		unref_list_count = 0;
+		list_for_each_entry(bo, &exec[i]->unref_list, unref_head)
+			unref_list_count++;
+		state->bo_count += exec[i]->bo_count + unref_list_count;
+	}
+
+	kernel_state->bo = kcalloc(state->bo_count,
+				   sizeof(*kernel_state->bo), GFP_ATOMIC);
 
-	state->bo_count = exec->bo_count + unref_list_count;
-	kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
-				   GFP_ATOMIC);
 	if (!kernel_state->bo) {
 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 		return;
 	}
 
-	for (i = 0; i < exec->bo_count; i++) {
-		drm_gem_object_reference(&exec->bo[i]->base);
-		kernel_state->bo[i] = &exec->bo[i]->base;
-	}
+	prev_idx = 0;
+	for (i = 0; i < 2; i++) {
+		if (!exec[i])
+			continue;
 
-	list_for_each_entry(bo, &exec->unref_list, unref_head) {
-		drm_gem_object_reference(&bo->base.base);
-		kernel_state->bo[i] = &bo->base.base;
-		i++;
+		for (j = 0; j < exec[i]->bo_count; j++) {
+			drm_gem_object_reference(&exec[i]->bo[j]->base);
+			kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base;
+		}
+
+		list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
+			drm_gem_object_reference(&bo->base.base);
+			kernel_state->bo[j + prev_idx] = &bo->base.base;
+			j++;
+		}
+		prev_idx = j + 1;
 	}
 
-	state->start_bin = exec->ct0ca;
-	state->start_render = exec->ct1ca;
+	if (exec[0])
+		state->start_bin = exec[0]->ct0ca;
+	if (exec[1])
+		state->start_render = exec[1]->ct1ca;
 
 	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 
@@ -267,13 +285,15 @@ vc4_hangcheck_elapsed(unsigned long data)
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	uint32_t ct0ca, ct1ca;
 	unsigned long irqflags;
-	struct vc4_exec_info *exec;
+	struct vc4_exec_info *bin_exec, *render_exec;
 
 	spin_lock_irqsave(&vc4->job_lock, irqflags);
-	exec = vc4_first_job(vc4);
+
+	bin_exec = vc4_first_bin_job(vc4);
+	render_exec = vc4_first_render_job(vc4);
 
 	/* If idle, we can stop watching for hangs. */
-	if (!exec) {
+	if (!bin_exec && !render_exec) {
 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 		return;
 	}
@@ -284,9 +304,12 @@ vc4_hangcheck_elapsed(unsigned long data)
 	/* If we've made any progress in execution, rearm the timer
 	 * and wait.
 	 */
-	if (ct0ca != exec->last_ct0ca || ct1ca != exec->last_ct1ca) {
-		exec->last_ct0ca = ct0ca;
-		exec->last_ct1ca = ct1ca;
+	if ((bin_exec && ct0ca != bin_exec->last_ct0ca) ||
+	    (render_exec && ct1ca != render_exec->last_ct1ca)) {
+		if (bin_exec)
+			bin_exec->last_ct0ca = ct0ca;
+		if (render_exec)
+			render_exec->last_ct1ca = ct1ca;
 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 		vc4_queue_hangcheck(dev);
 		return;
@@ -386,11 +409,13 @@ vc4_flush_caches(struct drm_device *dev)
  * The job_lock should be held during this.
  */
 void
-vc4_submit_next_job(struct drm_device *dev)
+vc4_submit_next_bin_job(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	struct vc4_exec_info *exec = vc4_first_job(vc4);
+	struct vc4_exec_info *exec;
 
+again:
+	exec = vc4_first_bin_job(vc4);
 	if (!exec)
 		return;
 
@@ -400,11 +425,40 @@ vc4_submit_next_job(struct drm_device *dev)
 	V3D_WRITE(V3D_BPOA, 0);
 	V3D_WRITE(V3D_BPOS, 0);
 
-	if (exec->ct0ca != exec->ct0ea)
+	/* Either put the job in the binner if it uses the binner, or
+	 * immediately move it to the to-be-rendered queue.
+	 */
+	if (exec->ct0ca != exec->ct0ea) {
 		submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
+	} else {
+		vc4_move_job_to_render(dev, exec);
+		goto again;
+	}
+}
+
+void
+vc4_submit_next_render_job(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_exec_info *exec = vc4_first_render_job(vc4);
+
+	if (!exec)
+		return;
+
 	submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
 }
 
+void
+vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	bool was_empty = list_empty(&vc4->render_job_list);
+
+	list_move_tail(&exec->head, &vc4->render_job_list);
+	if (was_empty)
+		vc4_submit_next_render_job(dev);
+}
+
 static void
 vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
 {
@@ -443,14 +497,14 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
 	exec->seqno = seqno;
 	vc4_update_bo_seqnos(exec, seqno);
 
-	list_add_tail(&exec->head, &vc4->job_list);
+	list_add_tail(&exec->head, &vc4->bin_job_list);
 
 	/* If no job was executing, kick ours off.  Otherwise, it'll
-	 * get started when the previous job's frame done interrupt
+	 * get started when the previous job's flush done interrupt
 	 * occurs.
 	 */
-	if (vc4_first_job(vc4) == exec) {
-		vc4_submit_next_job(dev);
+	if (vc4_first_bin_job(vc4) == exec) {
+		vc4_submit_next_bin_job(dev);
 		vc4_queue_hangcheck(dev);
 	}
 
@@ -859,7 +913,8 @@ vc4_gem_init(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
-	INIT_LIST_HEAD(&vc4->job_list);
+	INIT_LIST_HEAD(&vc4->bin_job_list);
+	INIT_LIST_HEAD(&vc4->render_job_list);
 	INIT_LIST_HEAD(&vc4->job_done_list);
 	INIT_LIST_HEAD(&vc4->seqno_cb_list);
 	spin_lock_init(&vc4->job_lock);
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index c69c0460196b..d8b864925fd3 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -47,6 +47,7 @@ struct vc4_hdmi {
 	void __iomem *hdmicore_regs;
 	void __iomem *hd_regs;
 	int hpd_gpio;
+	bool hpd_active_low;
 
 	struct clk *pixel_clock;
 	struct clk *hsm_clock;
@@ -95,6 +96,7 @@ static const struct {
 	HDMI_REG(VC4_HDMI_SW_RESET_CONTROL),
 	HDMI_REG(VC4_HDMI_HOTPLUG_INT),
 	HDMI_REG(VC4_HDMI_HOTPLUG),
+	HDMI_REG(VC4_HDMI_RAM_PACKET_CONFIG),
 	HDMI_REG(VC4_HDMI_HORZA),
 	HDMI_REG(VC4_HDMI_HORZB),
 	HDMI_REG(VC4_HDMI_FIFO_CTL),
@@ -165,7 +167,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
 	if (vc4->hdmi->hpd_gpio) {
-		if (gpio_get_value(vc4->hdmi->hpd_gpio))
+		if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio) ^
+		    vc4->hdmi->hpd_active_low)
 			return connector_status_connected;
 		else
 			return connector_status_disconnected;
@@ -495,6 +498,16 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 		goto err_put_i2c;
 	}
 
+	/* This is the rate that is set by the firmware.  The number
+	 * needs to be a bit higher than the pixel clock rate
+	 * (generally 148.5Mhz).
+	 */
+	ret = clk_set_rate(hdmi->hsm_clock, 163682864);
+	if (ret) {
+		DRM_ERROR("Failed to set HSM clock rate: %d\n", ret);
+		goto err_unprepare_pix;
+	}
+
 	ret = clk_prepare_enable(hdmi->hsm_clock);
 	if (ret) {
 		DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n",
@@ -506,17 +519,40 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 	 * we'll use the HDMI core's register.
 	 */
 	if (of_find_property(dev->of_node, "hpd-gpios", &value)) {
-		hdmi->hpd_gpio = of_get_named_gpio(dev->of_node, "hpd-gpios", 0);
+		enum of_gpio_flags hpd_gpio_flags;
+
+		hdmi->hpd_gpio = of_get_named_gpio_flags(dev->of_node,
+							 "hpd-gpios", 0,
+							 &hpd_gpio_flags);
 		if (hdmi->hpd_gpio < 0) {
 			ret = hdmi->hpd_gpio;
 			goto err_unprepare_hsm;
 		}
+
+		hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW;
 	}
 
 	vc4->hdmi = hdmi;
 
 	/* HDMI core must be enabled. */
-	WARN_ON_ONCE((HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE) == 0);
+	if (!(HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE)) {
+		HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST);
+		udelay(1);
+		HD_WRITE(VC4_HD_M_CTL, 0);
+
+		HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE);
+
+		HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL,
+			   VC4_HDMI_SW_RESET_HDMI |
+			   VC4_HDMI_SW_RESET_FORMAT_DETECT);
+
+		HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, 0);
+
+		/* PHY should be in reset, like
+		 * vc4_hdmi_encoder_disable() does.
+		 */
+		HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16);
+	}
 
 	drm_encoder_init(drm, hdmi->encoder, &vc4_hdmi_encoder_funcs,
 			 DRM_MODE_ENCODER_TMDS, NULL);
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index 8098c5b21ba4..6fbab1c82cb1 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -100,12 +100,76 @@ int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused)
 }
 #endif
 
+/* The filter kernel is composed of dwords each containing 3 9-bit
+ * signed integers packed next to each other.
+ */
+#define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff)
+#define VC4_PPF_FILTER_WORD(c0, c1, c2)				\
+	((((c0) & 0x1ff) << 0) |				\
+	 (((c1) & 0x1ff) << 9) |				\
+	 (((c2) & 0x1ff) << 18))
+
+/* The whole filter kernel is arranged as the coefficients 0-16 going
+ * up, then a pad, then 17-31 going down and reversed within the
+ * dwords.  This means that a linear phase kernel (where it's
+ * symmetrical at the boundary between 15 and 16) has the last 5
+ * dwords matching the first 5, but reversed.
+ */
+#define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8,	\
+				c9, c10, c11, c12, c13, c14, c15)	\
+	{VC4_PPF_FILTER_WORD(c0, c1, c2),				\
+	 VC4_PPF_FILTER_WORD(c3, c4, c5),				\
+	 VC4_PPF_FILTER_WORD(c6, c7, c8),				\
+	 VC4_PPF_FILTER_WORD(c9, c10, c11),				\
+	 VC4_PPF_FILTER_WORD(c12, c13, c14),				\
+	 VC4_PPF_FILTER_WORD(c15, c15, 0)}
+
+#define VC4_LINEAR_PHASE_KERNEL_DWORDS 6
+#define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1)
+
+/* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali.
+ * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf
+ */
+static const u32 mitchell_netravali_1_3_1_3_kernel[] =
+	VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18,
+				50, 82, 119, 155, 187, 213, 227);
+
+static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs,
+					struct drm_mm_node *space,
+					const u32 *kernel)
+{
+	int ret, i;
+	u32 __iomem *dst_kernel;
+
+	ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS, 1,
+				 0);
+	if (ret) {
+		DRM_ERROR("Failed to allocate space for filter kernel: %d\n",
+			  ret);
+		return ret;
+	}
+
+	dst_kernel = hvs->dlist + space->start;
+
+	for (i = 0; i < VC4_KERNEL_DWORDS; i++) {
+		if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS)
+			writel(kernel[i], &dst_kernel[i]);
+		else {
+			writel(kernel[VC4_KERNEL_DWORDS - i - 1],
+			       &dst_kernel[i]);
+		}
+	}
+
+	return 0;
+}
+
 static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct vc4_dev *vc4 = drm->dev_private;
 	struct vc4_hvs *hvs = NULL;
+	int ret;
 
 	hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL);
 	if (!hvs)
@@ -119,6 +183,33 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
 
 	hvs->dlist = hvs->regs + SCALER_DLIST_START;
 
+	spin_lock_init(&hvs->mm_lock);
+
+	/* Set up the HVS display list memory manager.  We never
+	 * overwrite the setup from the bootloader (just 128b out of
+	 * our 16K), since we don't want to scramble the screen when
+	 * transitioning from the firmware's boot setup to runtime.
+	 */
+	drm_mm_init(&hvs->dlist_mm,
+		    HVS_BOOTLOADER_DLIST_END,
+		    (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END);
+
+	/* Set up the HVS LBM memory manager.  We could have some more
+	 * complicated data structure that allowed reuse of LBM areas
+	 * between planes when they don't overlap on the screen, but
+	 * for now we just allocate globally.
+	 */
+	drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024);
+
+	/* Upload filter kernels.  We only have the one for now, so we
+	 * keep it around for the lifetime of the driver.
+	 */
+	ret = vc4_hvs_upload_linear_kernel(hvs,
+					   &hvs->mitchell_netravali_filter,
+					   mitchell_netravali_1_3_1_3_kernel);
+	if (ret)
+		return ret;
+
 	vc4->hvs = hvs;
 	return 0;
 }
@@ -129,6 +220,12 @@ static void vc4_hvs_unbind(struct device *dev, struct device *master,
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct vc4_dev *vc4 = drm->dev_private;
 
+	if (vc4->hvs->mitchell_netravali_filter.allocated)
+		drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter);
+
+	drm_mm_takedown(&vc4->hvs->dlist_mm);
+	drm_mm_takedown(&vc4->hvs->lbm_mm);
+
 	vc4->hvs = NULL;
 }
 
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index 78a21357fb2d..b0104a346a74 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -30,6 +30,10 @@
  * disables that specific interrupt, and 0s written are ignored
  * (reading either one returns the set of enabled interrupts).
  *
+ * When we take a binning flush done interrupt, we need to submit the
+ * next frame for binning and move the finished frame to the render
+ * thread.
+ *
  * When we take a render frame interrupt, we need to wake the
  * processes waiting for some frame to be done, and get the next frame
  * submitted ASAP (so the hardware doesn't sit idle when there's work
@@ -44,6 +48,7 @@
 #include "vc4_regs.h"
 
 #define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
+			 V3D_INT_FLDONE | \
 			 V3D_INT_FRDONE)
 
 DECLARE_WAIT_QUEUE_HEAD(render_wait);
@@ -77,7 +82,7 @@ vc4_overflow_mem_work(struct work_struct *work)
 		unsigned long irqflags;
 
 		spin_lock_irqsave(&vc4->job_lock, irqflags);
-		current_exec = vc4_first_job(vc4);
+		current_exec = vc4_first_bin_job(vc4);
 		if (current_exec) {
 			vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
 			list_add_tail(&vc4->overflow_mem->unref_head,
@@ -98,17 +103,43 @@ vc4_overflow_mem_work(struct work_struct *work)
 }
 
 static void
-vc4_irq_finish_job(struct drm_device *dev)
+vc4_irq_finish_bin_job(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_exec_info *exec = vc4_first_bin_job(vc4);
+
+	if (!exec)
+		return;
+
+	vc4_move_job_to_render(dev, exec);
+	vc4_submit_next_bin_job(dev);
+}
+
+static void
+vc4_cancel_bin_job(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_exec_info *exec = vc4_first_bin_job(vc4);
+
+	if (!exec)
+		return;
+
+	list_move_tail(&exec->head, &vc4->bin_job_list);
+	vc4_submit_next_bin_job(dev);
+}
+
+static void
+vc4_irq_finish_render_job(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
-	struct vc4_exec_info *exec = vc4_first_job(vc4);
+	struct vc4_exec_info *exec = vc4_first_render_job(vc4);
 
 	if (!exec)
 		return;
 
 	vc4->finished_seqno++;
 	list_move_tail(&exec->head, &vc4->job_done_list);
-	vc4_submit_next_job(dev);
+	vc4_submit_next_render_job(dev);
 
 	wake_up_all(&vc4->job_wait_queue);
 	schedule_work(&vc4->job_done_work);
@@ -125,9 +156,10 @@ vc4_irq(int irq, void *arg)
 	barrier();
 	intctl = V3D_READ(V3D_INTCTL);
 
-	/* Acknowledge the interrupts we're handling here. The render
-	 * frame done interrupt will be cleared, while OUTOMEM will
-	 * stay high until the underlying cause is cleared.
+	/* Acknowledge the interrupts we're handling here. The binner
+	 * last flush / render frame done interrupt will be cleared,
+	 * while OUTOMEM will stay high until the underlying cause is
+	 * cleared.
 	 */
 	V3D_WRITE(V3D_INTCTL, intctl);
 
@@ -138,9 +170,16 @@ vc4_irq(int irq, void *arg)
 		status = IRQ_HANDLED;
 	}
 
+	if (intctl & V3D_INT_FLDONE) {
+		spin_lock(&vc4->job_lock);
+		vc4_irq_finish_bin_job(dev);
+		spin_unlock(&vc4->job_lock);
+		status = IRQ_HANDLED;
+	}
+
 	if (intctl & V3D_INT_FRDONE) {
 		spin_lock(&vc4->job_lock);
-		vc4_irq_finish_job(dev);
+		vc4_irq_finish_render_job(dev);
 		spin_unlock(&vc4->job_lock);
 		status = IRQ_HANDLED;
 	}
@@ -205,6 +244,7 @@ void vc4_irq_reset(struct drm_device *dev)
 	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
 
 	spin_lock_irqsave(&vc4->job_lock, irqflags);
-	vc4_irq_finish_job(dev);
+	vc4_cancel_bin_job(dev);
+	vc4_irq_finish_render_job(dev);
 	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 }
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index f95f2df5f8d1..4718ae5176cc 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -49,6 +49,15 @@ vc4_atomic_complete_commit(struct vc4_commit *c)
 
 	drm_atomic_helper_commit_modeset_enables(dev, state);
 
+	/* Make sure that drm_atomic_helper_wait_for_vblanks()
+	 * actually waits for vblank.  If we're doing a full atomic
+	 * modeset (as opposed to a vc4_update_plane() short circuit),
+	 * then we need to wait for scanout to be done with our
+	 * display lists before we free it and potentially reallocate
+	 * and overwrite the dlist memory with a new modeset.
+	 */
+	state->legacy_cursor_update = false;
+
 	drm_atomic_helper_wait_for_vblanks(dev, state);
 
 	drm_atomic_helper_cleanup_planes(dev, state);
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 0addbad15832..7b0c72ae02a0 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -24,19 +24,52 @@
 #include "drm_fb_cma_helper.h"
 #include "drm_plane_helper.h"
 
+enum vc4_scaling_mode {
+	VC4_SCALING_NONE,
+	VC4_SCALING_TPZ,
+	VC4_SCALING_PPF,
+};
+
 struct vc4_plane_state {
 	struct drm_plane_state base;
+	/* System memory copy of the display list for this element, computed
+	 * at atomic_check time.
+	 */
 	u32 *dlist;
-	u32 dlist_size; /* Number of dwords in allocated for the display list */
+	u32 dlist_size; /* Number of dwords allocated for the display list */
 	u32 dlist_count; /* Number of used dwords in the display list. */
 
-	/* Offset in the dlist to pointer word 0. */
-	u32 pw0_offset;
+	/* Offset in the dlist to various words, for pageflip or
+	 * cursor updates.
+	 */
+	u32 pos0_offset;
+	u32 pos2_offset;
+	u32 ptr0_offset;
 
 	/* Offset where the plane's dlist was last stored in the
-	   hardware at vc4_crtc_atomic_flush() time.
-	*/
-	u32 *hw_dlist;
+	 * hardware at vc4_crtc_atomic_flush() time.
+	 */
+	u32 __iomem *hw_dlist;
+
+	/* Clipped coordinates of the plane on the display. */
+	int crtc_x, crtc_y, crtc_w, crtc_h;
+	/* Clipped area being scanned from in the FB. */
+	u32 src_x, src_y;
+
+	u32 src_w[2], src_h[2];
+
+	/* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */
+	enum vc4_scaling_mode x_scaling[2], y_scaling[2];
+	bool is_unity;
+	bool is_yuv;
+
+	/* Offset to start scanning out from the start of the plane's
+	 * BO.
+	 */
+	u32 offsets[3];
+
+	/* Our allocation in LBM for temporary storage during scaling. */
+	struct drm_mm_node lbm;
 };
 
 static inline struct vc4_plane_state *
@@ -50,6 +83,7 @@ static const struct hvs_format {
 	u32 hvs; /* HVS_FORMAT_* */
 	u32 pixel_order;
 	bool has_alpha;
+	bool flip_cbcr;
 } hvs_formats[] = {
 	{
 		.drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
@@ -59,6 +93,48 @@ static const struct hvs_format {
 		.drm = DRM_FORMAT_ARGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
 		.pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true,
 	},
+	{
+		.drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565,
+		.pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false,
+	},
+	{
+		.drm = DRM_FORMAT_BGR565, .hvs = HVS_PIXEL_FORMAT_RGB565,
+		.pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false,
+	},
+	{
+		.drm = DRM_FORMAT_ARGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
+		.pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true,
+	},
+	{
+		.drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
+		.pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false,
+	},
+	{
+		.drm = DRM_FORMAT_YUV422,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
+	},
+	{
+		.drm = DRM_FORMAT_YVU422,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
+		.flip_cbcr = true,
+	},
+	{
+		.drm = DRM_FORMAT_YUV420,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
+	},
+	{
+		.drm = DRM_FORMAT_YVU420,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
+		.flip_cbcr = true,
+	},
+	{
+		.drm = DRM_FORMAT_NV12,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
+	},
+	{
+		.drm = DRM_FORMAT_NV16,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
+	},
 };
 
 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
@@ -73,6 +149,16 @@ static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
 	return NULL;
 }
 
+static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
+{
+	if (dst > src)
+		return VC4_SCALING_PPF;
+	else if (dst < src)
+		return VC4_SCALING_TPZ;
+	else
+		return VC4_SCALING_NONE;
+}
+
 static bool plane_enabled(struct drm_plane_state *state)
 {
 	return state->fb && state->crtc;
@@ -89,6 +175,8 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane
 	if (!vc4_state)
 		return NULL;
 
+	memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
+
 	__drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
 
 	if (vc4_state->dlist) {
@@ -108,8 +196,17 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane
 static void vc4_plane_destroy_state(struct drm_plane *plane,
 				    struct drm_plane_state *state)
 {
+	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 
+	if (vc4_state->lbm.allocated) {
+		unsigned long irqflags;
+
+		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
+		drm_mm_remove_node(&vc4_state->lbm);
+		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
+	}
+
 	kfree(vc4_state->dlist);
 	__drm_atomic_helper_plane_destroy_state(plane, &vc4_state->base);
 	kfree(state);
@@ -148,84 +245,400 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
 	vc4_state->dlist[vc4_state->dlist_count++] = val;
 }
 
+/* Returns the scl0/scl1 field based on whether the dimensions need to
+ * be up/down/non-scaled.
+ *
+ * This is a replication of a table from the spec.
+ */
+static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
+{
+	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+
+	switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
+	case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
+		return SCALER_CTL0_SCL_H_PPF_V_PPF;
+	case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
+		return SCALER_CTL0_SCL_H_TPZ_V_PPF;
+	case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
+		return SCALER_CTL0_SCL_H_PPF_V_TPZ;
+	case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
+		return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
+	case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
+		return SCALER_CTL0_SCL_H_PPF_V_NONE;
+	case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
+		return SCALER_CTL0_SCL_H_NONE_V_PPF;
+	case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
+		return SCALER_CTL0_SCL_H_NONE_V_TPZ;
+	case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
+		return SCALER_CTL0_SCL_H_TPZ_V_NONE;
+	default:
+	case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
+		/* The unity case is independently handled by
+		 * SCALER_CTL0_UNITY.
+		 */
+		return 0;
+	}
+}
+
+static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
+{
+	struct drm_plane *plane = state->plane;
+	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
+	u32 subpixel_src_mask = (1 << 16) - 1;
+	u32 format = fb->pixel_format;
+	int num_planes = drm_format_num_planes(format);
+	u32 h_subsample = 1;
+	u32 v_subsample = 1;
+	int i;
+
+	for (i = 0; i < num_planes; i++)
+		vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
+
+	/* We don't support subpixel source positioning for scaling. */
+	if ((state->src_x & subpixel_src_mask) ||
+	    (state->src_y & subpixel_src_mask) ||
+	    (state->src_w & subpixel_src_mask) ||
+	    (state->src_h & subpixel_src_mask)) {
+		return -EINVAL;
+	}
+
+	vc4_state->src_x = state->src_x >> 16;
+	vc4_state->src_y = state->src_y >> 16;
+	vc4_state->src_w[0] = state->src_w >> 16;
+	vc4_state->src_h[0] = state->src_h >> 16;
+
+	vc4_state->crtc_x = state->crtc_x;
+	vc4_state->crtc_y = state->crtc_y;
+	vc4_state->crtc_w = state->crtc_w;
+	vc4_state->crtc_h = state->crtc_h;
+
+	vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
+						       vc4_state->crtc_w);
+	vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
+						       vc4_state->crtc_h);
+
+	if (num_planes > 1) {
+		vc4_state->is_yuv = true;
+
+		h_subsample = drm_format_horz_chroma_subsampling(format);
+		v_subsample = drm_format_vert_chroma_subsampling(format);
+		vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
+		vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
+
+		vc4_state->x_scaling[1] =
+			vc4_get_scaling_mode(vc4_state->src_w[1],
+					     vc4_state->crtc_w);
+		vc4_state->y_scaling[1] =
+			vc4_get_scaling_mode(vc4_state->src_h[1],
+					     vc4_state->crtc_h);
+
+		/* YUV conversion requires that scaling be enabled,
+		 * even on a plane that's otherwise 1:1.  Choose TPZ
+		 * for simplicity.
+		 */
+		if (vc4_state->x_scaling[0] == VC4_SCALING_NONE)
+			vc4_state->x_scaling[0] = VC4_SCALING_TPZ;
+		if (vc4_state->y_scaling[0] == VC4_SCALING_NONE)
+			vc4_state->y_scaling[0] = VC4_SCALING_TPZ;
+	}
+
+	vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
+			       vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
+			       vc4_state->x_scaling[1] == VC4_SCALING_NONE &&
+			       vc4_state->y_scaling[1] == VC4_SCALING_NONE);
+
+	/* No configuring scaling on the cursor plane, since it gets
+	   non-vblank-synced updates, and scaling requires requires
+	   LBM changes which have to be vblank-synced.
+	 */
+	if (plane->type == DRM_PLANE_TYPE_CURSOR && !vc4_state->is_unity)
+		return -EINVAL;
+
+	/* Clamp the on-screen start x/y to 0.  The hardware doesn't
+	 * support negative y, and negative x wastes bandwidth.
+	 */
+	if (vc4_state->crtc_x < 0) {
+		for (i = 0; i < num_planes; i++) {
+			u32 cpp = drm_format_plane_cpp(fb->pixel_format, i);
+			u32 subs = ((i == 0) ? 1 : h_subsample);
+
+			vc4_state->offsets[i] += (cpp *
+						  (-vc4_state->crtc_x) / subs);
+		}
+		vc4_state->src_w[0] += vc4_state->crtc_x;
+		vc4_state->src_w[1] += vc4_state->crtc_x / h_subsample;
+		vc4_state->crtc_x = 0;
+	}
+
+	if (vc4_state->crtc_y < 0) {
+		for (i = 0; i < num_planes; i++) {
+			u32 subs = ((i == 0) ? 1 : v_subsample);
+
+			vc4_state->offsets[i] += (fb->pitches[i] *
+						  (-vc4_state->crtc_y) / subs);
+		}
+		vc4_state->src_h[0] += vc4_state->crtc_y;
+		vc4_state->src_h[1] += vc4_state->crtc_y / v_subsample;
+		vc4_state->crtc_y = 0;
+	}
+
+	return 0;
+}
+
+static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
+{
+	u32 scale, recip;
+
+	scale = (1 << 16) * src / dst;
+
+	/* The specs note that while the reciprocal would be defined
+	 * as (1<<32)/scale, ~0 is close enough.
+	 */
+	recip = ~0 / scale;
+
+	vc4_dlist_write(vc4_state,
+			VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
+			VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
+	vc4_dlist_write(vc4_state,
+			VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
+}
+
+static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
+{
+	u32 scale = (1 << 16) * src / dst;
+
+	vc4_dlist_write(vc4_state,
+			SCALER_PPF_AGC |
+			VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
+			VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
+}
+
+static u32 vc4_lbm_size(struct drm_plane_state *state)
+{
+	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+	/* This is the worst case number.  One of the two sizes will
+	 * be used depending on the scaling configuration.
+	 */
+	u32 pix_per_line = max(vc4_state->src_w[0], (u32)vc4_state->crtc_w);
+	u32 lbm;
+
+	if (!vc4_state->is_yuv) {
+		if (vc4_state->is_unity)
+			return 0;
+		else if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
+			lbm = pix_per_line * 8;
+		else {
+			/* In special cases, this multiplier might be 12. */
+			lbm = pix_per_line * 16;
+		}
+	} else {
+		/* There are cases for this going down to a multiplier
+		 * of 2, but according to the firmware source, the
+		 * table in the docs is somewhat wrong.
+		 */
+		lbm = pix_per_line * 16;
+	}
+
+	lbm = roundup(lbm, 32);
+
+	return lbm;
+}
+
+static void vc4_write_scaling_parameters(struct drm_plane_state *state,
+					 int channel)
+{
+	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+
+	/* Ch0 H-PPF Word 0: Scaling Parameters */
+	if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
+		vc4_write_ppf(vc4_state,
+			      vc4_state->src_w[channel], vc4_state->crtc_w);
+	}
+
+	/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
+	if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
+		vc4_write_ppf(vc4_state,
+			      vc4_state->src_h[channel], vc4_state->crtc_h);
+		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+	}
+
+	/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
+	if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
+		vc4_write_tpz(vc4_state,
+			      vc4_state->src_w[channel], vc4_state->crtc_w);
+	}
+
+	/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
+	if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
+		vc4_write_tpz(vc4_state,
+			      vc4_state->src_h[channel], vc4_state->crtc_h);
+		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+	}
+}
+
 /* Writes out a full display list for an active plane to the plane's
  * private dlist state.
  */
 static int vc4_plane_mode_set(struct drm_plane *plane,
 			      struct drm_plane_state *state)
 {
+	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 	struct drm_framebuffer *fb = state->fb;
-	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
 	u32 ctl0_offset = vc4_state->dlist_count;
 	const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format);
-	uint32_t offset = fb->offsets[0];
-	int crtc_x = state->crtc_x;
-	int crtc_y = state->crtc_y;
-	int crtc_w = state->crtc_w;
-	int crtc_h = state->crtc_h;
-
-	if (state->crtc_w << 16 != state->src_w ||
-	    state->crtc_h << 16 != state->src_h) {
-		/* We don't support scaling yet, which involves
-		 * allocating the LBM memory for scaling temporary
-		 * storage, and putting filter kernels in the HVS
-		 * context.
-		 */
-		return -EINVAL;
+	int num_planes = drm_format_num_planes(format->drm);
+	u32 scl0, scl1;
+	u32 lbm_size;
+	unsigned long irqflags;
+	int ret, i;
+
+	ret = vc4_plane_setup_clipping_and_scaling(state);
+	if (ret)
+		return ret;
+
+	/* Allocate the LBM memory that the HVS will use for temporary
+	 * storage due to our scaling/format conversion.
+	 */
+	lbm_size = vc4_lbm_size(state);
+	if (lbm_size) {
+		if (!vc4_state->lbm.allocated) {
+			spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
+			ret = drm_mm_insert_node(&vc4->hvs->lbm_mm,
+						 &vc4_state->lbm,
+						 lbm_size, 32, 0);
+			spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
+		} else {
+			WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
+		}
 	}
 
-	if (crtc_x < 0) {
-		offset += drm_format_plane_cpp(fb->pixel_format, 0) * -crtc_x;
-		crtc_w += crtc_x;
-		crtc_x = 0;
-	}
+	if (ret)
+		return ret;
 
-	if (crtc_y < 0) {
-		offset += fb->pitches[0] * -crtc_y;
-		crtc_h += crtc_y;
-		crtc_y = 0;
+	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
+	 * and 4:4:4, scl1 should be set to scl0 so both channels of
+	 * the scaler do the same thing.  For YUV, the Y plane needs
+	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
+	 * the scl fields here.
+	 */
+	if (num_planes == 1) {
+		scl0 = vc4_get_scl_field(state, 1);
+		scl1 = scl0;
+	} else {
+		scl0 = vc4_get_scl_field(state, 1);
+		scl1 = vc4_get_scl_field(state, 0);
 	}
 
+	/* Control word */
 	vc4_dlist_write(vc4_state,
 			SCALER_CTL0_VALID |
 			(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
 			(format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
-			SCALER_CTL0_UNITY);
+			(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
+			VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
+			VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
 
 	/* Position Word 0: Image Positions and Alpha Value */
+	vc4_state->pos0_offset = vc4_state->dlist_count;
 	vc4_dlist_write(vc4_state,
 			VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) |
-			VC4_SET_FIELD(crtc_x, SCALER_POS0_START_X) |
-			VC4_SET_FIELD(crtc_y, SCALER_POS0_START_Y));
-
-	/* Position Word 1: Scaled Image Dimensions.
-	 * Skipped due to SCALER_CTL0_UNITY scaling.
-	 */
+			VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
+			VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
+
+	/* Position Word 1: Scaled Image Dimensions. */
+	if (!vc4_state->is_unity) {
+		vc4_dlist_write(vc4_state,
+				VC4_SET_FIELD(vc4_state->crtc_w,
+					      SCALER_POS1_SCL_WIDTH) |
+				VC4_SET_FIELD(vc4_state->crtc_h,
+					      SCALER_POS1_SCL_HEIGHT));
+	}
 
 	/* Position Word 2: Source Image Size, Alpha Mode */
+	vc4_state->pos2_offset = vc4_state->dlist_count;
 	vc4_dlist_write(vc4_state,
 			VC4_SET_FIELD(format->has_alpha ?
 				      SCALER_POS2_ALPHA_MODE_PIPELINE :
 				      SCALER_POS2_ALPHA_MODE_FIXED,
 				      SCALER_POS2_ALPHA_MODE) |
-			VC4_SET_FIELD(crtc_w, SCALER_POS2_WIDTH) |
-			VC4_SET_FIELD(crtc_h, SCALER_POS2_HEIGHT));
+			VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) |
+			VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT));
 
 	/* Position Word 3: Context.  Written by the HVS. */
 	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
 
-	vc4_state->pw0_offset = vc4_state->dlist_count;
 
-	/* Pointer Word 0: RGB / Y Pointer */
-	vc4_dlist_write(vc4_state, bo->paddr + offset);
+	/* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
+	 *
+	 * The pointers may be any byte address.
+	 */
+	vc4_state->ptr0_offset = vc4_state->dlist_count;
+	if (!format->flip_cbcr) {
+		for (i = 0; i < num_planes; i++)
+			vc4_dlist_write(vc4_state, vc4_state->offsets[i]);
+	} else {
+		WARN_ON_ONCE(num_planes != 3);
+		vc4_dlist_write(vc4_state, vc4_state->offsets[0]);
+		vc4_dlist_write(vc4_state, vc4_state->offsets[2]);
+		vc4_dlist_write(vc4_state, vc4_state->offsets[1]);
+	}
 
-	/* Pointer Context Word 0: Written by the HVS */
-	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+	/* Pointer Context Word 0/1/2: Written by the HVS */
+	for (i = 0; i < num_planes; i++)
+		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
 
-	/* Pitch word 0: Pointer 0 Pitch */
-	vc4_dlist_write(vc4_state,
-			VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH));
+	/* Pitch word 0/1/2 */
+	for (i = 0; i < num_planes; i++) {
+		vc4_dlist_write(vc4_state,
+				VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH));
+	}
+
+	/* Colorspace conversion words */
+	if (vc4_state->is_yuv) {
+		vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5);
+		vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5);
+		vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
+	}
+
+	if (!vc4_state->is_unity) {
+		/* LBM Base Address. */
+		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
+		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
+			vc4_dlist_write(vc4_state, vc4_state->lbm.start);
+		}
+
+		if (num_planes > 1) {
+			/* Emit Cb/Cr as channel 0 and Y as channel
+			 * 1. This matches how we set up scl0/scl1
+			 * above.
+			 */
+			vc4_write_scaling_parameters(state, 1);
+		}
+		vc4_write_scaling_parameters(state, 0);
+
+		/* If any PPF setup was done, then all the kernel
+		 * pointers get uploaded.
+		 */
+		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
+		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
+		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
+		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
+			u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
+						   SCALER_PPF_KERNEL_OFFSET);
+
+			/* HPPF plane 0 */
+			vc4_dlist_write(vc4_state, kernel);
+			/* VPPF plane 0 */
+			vc4_dlist_write(vc4_state, kernel);
+			/* HPPF plane 1 */
+			vc4_dlist_write(vc4_state, kernel);
+			/* VPPF plane 1 */
+			vc4_dlist_write(vc4_state, kernel);
+		}
+	}
 
 	vc4_state->dlist[ctl0_offset] |=
 		VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
@@ -303,13 +716,13 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
 	 * scanout will start from this address as soon as the FIFO
 	 * needs to refill with pixels.
 	 */
-	writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]);
+	writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
 
 	/* Also update the CPU-side dlist copy, so that any later
 	 * atomic updates that don't do a new modeset on our plane
 	 * also use our updated address.
 	 */
-	vc4_state->dlist[vc4_state->pw0_offset] = addr;
+	vc4_state->dlist[vc4_state->ptr0_offset] = addr;
 }
 
 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
@@ -325,8 +738,83 @@ static void vc4_plane_destroy(struct drm_plane *plane)
 	drm_plane_cleanup(plane);
 }
 
+/* Implements immediate (non-vblank-synced) updates of the cursor
+ * position, or falls back to the atomic helper otherwise.
+ */
+static int
+vc4_update_plane(struct drm_plane *plane,
+		 struct drm_crtc *crtc,
+		 struct drm_framebuffer *fb,
+		 int crtc_x, int crtc_y,
+		 unsigned int crtc_w, unsigned int crtc_h,
+		 uint32_t src_x, uint32_t src_y,
+		 uint32_t src_w, uint32_t src_h)
+{
+	struct drm_plane_state *plane_state;
+	struct vc4_plane_state *vc4_state;
+
+	if (plane != crtc->cursor)
+		goto out;
+
+	plane_state = plane->state;
+	vc4_state = to_vc4_plane_state(plane_state);
+
+	if (!plane_state)
+		goto out;
+
+	/* If we're changing the cursor contents, do that in the
+	 * normal vblank-synced atomic path.
+	 */
+	if (fb != plane_state->fb)
+		goto out;
+
+	/* No configuring new scaling in the fast path. */
+	if (crtc_w != plane_state->crtc_w ||
+	    crtc_h != plane_state->crtc_h ||
+	    src_w != plane_state->src_w ||
+	    src_h != plane_state->src_h) {
+		goto out;
+	}
+
+	/* Set the cursor's position on the screen.  This is the
+	 * expected change from the drm_mode_cursor_universal()
+	 * helper.
+	 */
+	plane_state->crtc_x = crtc_x;
+	plane_state->crtc_y = crtc_y;
+
+	/* Allow changing the start position within the cursor BO, if
+	 * that matters.
+	 */
+	plane_state->src_x = src_x;
+	plane_state->src_y = src_y;
+
+	/* Update the display list based on the new crtc_x/y. */
+	vc4_plane_atomic_check(plane, plane_state);
+
+	/* Note that we can't just call vc4_plane_write_dlist()
+	 * because that would smash the context data that the HVS is
+	 * currently using.
+	 */
+	writel(vc4_state->dlist[vc4_state->pos0_offset],
+	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
+	writel(vc4_state->dlist[vc4_state->pos2_offset],
+	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
+	writel(vc4_state->dlist[vc4_state->ptr0_offset],
+	       &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
+
+	return 0;
+
+out:
+	return drm_atomic_helper_update_plane(plane, crtc, fb,
+					      crtc_x, crtc_y,
+					      crtc_w, crtc_h,
+					      src_x, src_y,
+					      src_w, src_h);
+}
+
 static const struct drm_plane_funcs vc4_plane_funcs = {
-	.update_plane = drm_atomic_helper_update_plane,
+	.update_plane = vc4_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = vc4_plane_destroy,
 	.set_property = NULL,
@@ -341,6 +829,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
 	struct drm_plane *plane = NULL;
 	struct vc4_plane *vc4_plane;
 	u32 formats[ARRAY_SIZE(hvs_formats)];
+	u32 num_formats = 0;
 	int ret = 0;
 	unsigned i;
 
@@ -351,12 +840,20 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
 		goto fail;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++)
-		formats[i] = hvs_formats[i].drm;
+	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
+		/* Don't allow YUV in cursor planes, since that means
+		 * tuning on the scaler, which we don't allow for the
+		 * cursor.
+		 */
+		if (type != DRM_PLANE_TYPE_CURSOR ||
+		    hvs_formats[i].hvs < HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE) {
+			formats[num_formats++] = hvs_formats[i].drm;
+		}
+	}
 	plane = &vc4_plane->base;
 	ret = drm_universal_plane_init(dev, plane, 0xff,
 				       &vc4_plane_funcs,
-				       formats, ARRAY_SIZE(formats),
+				       formats, num_formats,
 				       type, NULL);
 
 	drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 4e52a0a88551..bf42a8e87111 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -187,7 +187,7 @@
 # define PV_VCONTROL_CONTINUOUS			BIT(1)
 # define PV_VCONTROL_VIDEN			BIT(0)
 
-#define PV_VSYNCD				0x08
+#define PV_VSYNCD_EVEN				0x08
 
 #define PV_HORZA				0x0c
 # define PV_HORZA_HBP_MASK			VC4_MASK(31, 16)
@@ -350,6 +350,17 @@
 # define SCALER_DISPCTRLX_HEIGHT_SHIFT		0
 
 #define SCALER_DISPBKGND0                       0x00000044
+# define SCALER_DISPBKGND_AUTOHS		BIT(31)
+# define SCALER_DISPBKGND_INTERLACE		BIT(30)
+# define SCALER_DISPBKGND_GAMMA			BIT(29)
+# define SCALER_DISPBKGND_TESTMODE_MASK		VC4_MASK(28, 25)
+# define SCALER_DISPBKGND_TESTMODE_SHIFT	25
+/* Enables filling the scaler line with the RGB value in the low 24
+ * bits before compositing.  Costs cycles, so should be skipped if
+ * opaque display planes will cover everything.
+ */
+# define SCALER_DISPBKGND_FILL			BIT(24)
+
 #define SCALER_DISPSTAT0                        0x00000048
 #define SCALER_DISPBASE0                        0x0000004c
 # define SCALER_DISPSTATX_MODE_MASK		VC4_MASK(31, 30)
@@ -362,6 +373,9 @@
 # define SCALER_DISPSTATX_EMPTY			BIT(28)
 #define SCALER_DISPCTRL1                        0x00000050
 #define SCALER_DISPBKGND1                       0x00000054
+#define SCALER_DISPBKGNDX(x)			(SCALER_DISPBKGND0 +        \
+						 (x) * (SCALER_DISPBKGND1 - \
+							SCALER_DISPBKGND0))
 #define SCALER_DISPSTAT1                        0x00000058
 #define SCALER_DISPSTATX(x)			(SCALER_DISPSTAT0 +        \
 						 (x) * (SCALER_DISPSTAT1 - \
@@ -456,6 +470,8 @@
 #define VC4_HDMI_TX_PHY_RESET_CTL		0x2c0
 
 #define VC4_HD_M_CTL				0x00c
+# define VC4_HD_M_REGISTER_FILE_STANDBY		(3 << 6)
+# define VC4_HD_M_RAM_STANDBY			(3 << 4)
 # define VC4_HD_M_SW_RST			BIT(2)
 # define VC4_HD_M_ENABLE			BIT(0)
 
@@ -503,7 +519,12 @@ enum hvs_pixel_format {
 	HVS_PIXEL_FORMAT_RGB888 = 5,
 	HVS_PIXEL_FORMAT_RGBA6666 = 6,
 	/* 32bpp */
-	HVS_PIXEL_FORMAT_RGBA8888 = 7
+	HVS_PIXEL_FORMAT_RGBA8888 = 7,
+
+	HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE = 8,
+	HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE = 9,
+	HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE = 10,
+	HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE = 11,
 };
 
 /* Note: the LSB is the rightmost character shown.  Only valid for
@@ -536,6 +557,21 @@ enum hvs_pixel_format {
 #define SCALER_CTL0_ORDER_MASK			VC4_MASK(14, 13)
 #define SCALER_CTL0_ORDER_SHIFT			13
 
+#define SCALER_CTL0_SCL1_MASK			VC4_MASK(10, 8)
+#define SCALER_CTL0_SCL1_SHIFT			8
+
+#define SCALER_CTL0_SCL0_MASK			VC4_MASK(7, 5)
+#define SCALER_CTL0_SCL0_SHIFT			5
+
+#define SCALER_CTL0_SCL_H_PPF_V_PPF		0
+#define SCALER_CTL0_SCL_H_TPZ_V_PPF		1
+#define SCALER_CTL0_SCL_H_PPF_V_TPZ		2
+#define SCALER_CTL0_SCL_H_TPZ_V_TPZ		3
+#define SCALER_CTL0_SCL_H_PPF_V_NONE		4
+#define SCALER_CTL0_SCL_H_NONE_V_PPF		5
+#define SCALER_CTL0_SCL_H_NONE_V_TPZ		6
+#define SCALER_CTL0_SCL_H_TPZ_V_NONE		7
+
 /* Set to indicate no scaling. */
 #define SCALER_CTL0_UNITY			BIT(4)
 
@@ -551,6 +587,12 @@ enum hvs_pixel_format {
 #define SCALER_POS0_START_X_MASK		VC4_MASK(11, 0)
 #define SCALER_POS0_START_X_SHIFT		0
 
+#define SCALER_POS1_SCL_HEIGHT_MASK		VC4_MASK(27, 16)
+#define SCALER_POS1_SCL_HEIGHT_SHIFT		16
+
+#define SCALER_POS1_SCL_WIDTH_MASK		VC4_MASK(11, 0)
+#define SCALER_POS1_SCL_WIDTH_SHIFT		0
+
 #define SCALER_POS2_ALPHA_MODE_MASK		VC4_MASK(31, 30)
 #define SCALER_POS2_ALPHA_MODE_SHIFT		30
 #define SCALER_POS2_ALPHA_MODE_PIPELINE		0
@@ -564,6 +606,80 @@ enum hvs_pixel_format {
 #define SCALER_POS2_WIDTH_MASK			VC4_MASK(11, 0)
 #define SCALER_POS2_WIDTH_SHIFT			0
 
+/* Color Space Conversion words.  Some values are S2.8 signed
+ * integers, except that the 2 integer bits map as {0x0: 0, 0x1: 1,
+ * 0x2: 2, 0x3: -1}
+ */
+/* bottom 8 bits of S2.8 contribution of Cr to Blue */
+#define SCALER_CSC0_COEF_CR_BLU_MASK		VC4_MASK(31, 24)
+#define SCALER_CSC0_COEF_CR_BLU_SHIFT		24
+/* Signed offset to apply to Y before CSC. (Y' = Y + YY_OFS) */
+#define SCALER_CSC0_COEF_YY_OFS_MASK		VC4_MASK(23, 16)
+#define SCALER_CSC0_COEF_YY_OFS_SHIFT		16
+/* Signed offset to apply to CB before CSC (Cb' = Cb - 128 + CB_OFS). */
+#define SCALER_CSC0_COEF_CB_OFS_MASK		VC4_MASK(15, 8)
+#define SCALER_CSC0_COEF_CB_OFS_SHIFT		8
+/* Signed offset to apply to CB before CSC (Cr' = Cr - 128 + CR_OFS). */
+#define SCALER_CSC0_COEF_CR_OFS_MASK		VC4_MASK(7, 0)
+#define SCALER_CSC0_COEF_CR_OFS_SHIFT		0
+#define SCALER_CSC0_ITR_R_601_5			0x00f00000
+#define SCALER_CSC0_ITR_R_709_3			0x00f00000
+#define SCALER_CSC0_JPEG_JFIF			0x00000000
+
+/* S2.8 contribution of Cb to Green */
+#define SCALER_CSC1_COEF_CB_GRN_MASK		VC4_MASK(31, 22)
+#define SCALER_CSC1_COEF_CB_GRN_SHIFT		22
+/* S2.8 contribution of Cr to Green */
+#define SCALER_CSC1_COEF_CR_GRN_MASK		VC4_MASK(21, 12)
+#define SCALER_CSC1_COEF_CR_GRN_SHIFT		12
+/* S2.8 contribution of Y to all of RGB */
+#define SCALER_CSC1_COEF_YY_ALL_MASK		VC4_MASK(11, 2)
+#define SCALER_CSC1_COEF_YY_ALL_SHIFT		2
+/* top 2 bits of S2.8 contribution of Cr to Blue */
+#define SCALER_CSC1_COEF_CR_BLU_MASK		VC4_MASK(1, 0)
+#define SCALER_CSC1_COEF_CR_BLU_SHIFT		0
+#define SCALER_CSC1_ITR_R_601_5			0xe73304a8
+#define SCALER_CSC1_ITR_R_709_3			0xf2b784a8
+#define SCALER_CSC1_JPEG_JFIF			0xea34a400
+
+/* S2.8 contribution of Cb to Red */
+#define SCALER_CSC2_COEF_CB_RED_MASK		VC4_MASK(29, 20)
+#define SCALER_CSC2_COEF_CB_RED_SHIFT		20
+/* S2.8 contribution of Cr to Red */
+#define SCALER_CSC2_COEF_CR_RED_MASK		VC4_MASK(19, 10)
+#define SCALER_CSC2_COEF_CR_RED_SHIFT		10
+/* S2.8 contribution of Cb to Blue */
+#define SCALER_CSC2_COEF_CB_BLU_MASK		VC4_MASK(19, 10)
+#define SCALER_CSC2_COEF_CB_BLU_SHIFT		10
+#define SCALER_CSC2_ITR_R_601_5			0x00066204
+#define SCALER_CSC2_ITR_R_709_3			0x00072a1c
+#define SCALER_CSC2_JPEG_JFIF			0x000599c5
+
+#define SCALER_TPZ0_VERT_RECALC			BIT(31)
+#define SCALER_TPZ0_SCALE_MASK			VC4_MASK(28, 8)
+#define SCALER_TPZ0_SCALE_SHIFT			8
+#define SCALER_TPZ0_IPHASE_MASK			VC4_MASK(7, 0)
+#define SCALER_TPZ0_IPHASE_SHIFT		0
+#define SCALER_TPZ1_RECIP_MASK			VC4_MASK(15, 0)
+#define SCALER_TPZ1_RECIP_SHIFT			0
+
+/* Skips interpolating coefficients to 64 phases, so just 8 are used.
+ * Required for nearest neighbor.
+ */
+#define SCALER_PPF_NOINTERP			BIT(31)
+/* Replaes the highest valued coefficient with one that makes all 4
+ * sum to unity.
+ */
+#define SCALER_PPF_AGC				BIT(30)
+#define SCALER_PPF_SCALE_MASK			VC4_MASK(24, 8)
+#define SCALER_PPF_SCALE_SHIFT			8
+#define SCALER_PPF_IPHASE_MASK			VC4_MASK(6, 0)
+#define SCALER_PPF_IPHASE_SHIFT			0
+
+#define SCALER_PPF_KERNEL_OFFSET_MASK		VC4_MASK(13, 0)
+#define SCALER_PPF_KERNEL_OFFSET_SHIFT		0
+#define SCALER_PPF_KERNEL_UNCACHED		BIT(31)
+
 #define SCALER_SRC_PITCH_MASK			VC4_MASK(15, 0)
 #define SCALER_SRC_PITCH_SHIFT			0
 
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 31de5d17bc85..e6d3c6028341 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -268,6 +268,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id vc4_v3d_dt_match[] = {
+	{ .compatible = "brcm,bcm2835-v3d" },
 	{ .compatible = "brcm,vc4-v3d" },
 	{}
 };
diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c
index d0cbd5ecd7f0..e797dfc07ae3 100644
--- a/drivers/gpu/drm/via/via_dmablit.c
+++ b/drivers/gpu/drm/via/via_dmablit.c
@@ -239,8 +239,7 @@ via_lock_all_dma_pages(drm_via_sg_info_t *vsg,  drm_via_dmablit_t *xfer)
 	if (NULL == vsg->pages)
 		return -ENOMEM;
 	down_read(&current->mm->mmap_sem);
-	ret = get_user_pages(current, current->mm,
-			     (unsigned long)xfer->mem_addr,
+	ret = get_user_pages((unsigned long)xfer->mem_addr,
 			     vsg->num_pages,
 			     (vsg->direction == DMA_FROM_DEVICE),
 			     0, vsg->pages, NULL);
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
index a165f03eaa79..4854dac87e24 100644
--- a/drivers/gpu/drm/virtio/virtgpu_display.c
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -237,13 +237,6 @@ virtio_gpu_framebuffer_init(struct drm_device *dev,
 	return 0;
 }
 
-static bool virtio_gpu_crtc_mode_fixup(struct drm_crtc *crtc,
-				       const struct drm_display_mode *mode,
-				       struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void virtio_gpu_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -277,18 +270,10 @@ static int virtio_gpu_crtc_atomic_check(struct drm_crtc *crtc,
 static const struct drm_crtc_helper_funcs virtio_gpu_crtc_helper_funcs = {
 	.enable        = virtio_gpu_crtc_enable,
 	.disable       = virtio_gpu_crtc_disable,
-	.mode_fixup    = virtio_gpu_crtc_mode_fixup,
 	.mode_set_nofb = virtio_gpu_crtc_mode_set_nofb,
 	.atomic_check  = virtio_gpu_crtc_atomic_check,
 };
 
-static bool virtio_gpu_enc_mode_fixup(struct drm_encoder *encoder,
-				      const struct drm_display_mode *mode,
-				      struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static void virtio_gpu_enc_mode_set(struct drm_encoder *encoder,
 				    struct drm_display_mode *mode,
 				    struct drm_display_mode *adjusted_mode)
@@ -362,7 +347,6 @@ virtio_gpu_best_encoder(struct drm_connector *connector)
 }
 
 static const struct drm_encoder_helper_funcs virtio_gpu_enc_helper_funcs = {
-	.mode_fixup = virtio_gpu_enc_mode_fixup,
 	.mode_set   = virtio_gpu_enc_mode_set,
 	.enable     = virtio_gpu_enc_enable,
 	.disable    = virtio_gpu_enc_disable,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index b40ed6061f05..7f898cfdc746 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -118,7 +118,7 @@ static const struct file_operations virtio_gpu_driver_fops = {
 
 
 static struct drm_driver driver = {
-	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_RENDER,
+	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_RENDER | DRIVER_ATOMIC,
 	.set_busid = drm_virtio_set_busid,
 	.load = virtio_gpu_driver_load,
 	.unload = virtio_gpu_driver_unload,
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index 572fb351feab..70b44a2345ab 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -68,10 +68,17 @@ static void virtio_gpu_plane_atomic_update(struct drm_plane *plane,
 	struct virtio_gpu_object *bo;
 	uint32_t handle;
 
-	if (plane->fb) {
-		vgfb = to_virtio_gpu_framebuffer(plane->fb);
+	if (plane->state->fb) {
+		vgfb = to_virtio_gpu_framebuffer(plane->state->fb);
 		bo = gem_to_virtio_gpu_obj(vgfb->obj);
 		handle = bo->hw_res_handle;
+		if (bo->dumb) {
+			virtio_gpu_cmd_transfer_to_host_2d
+				(vgdev, handle, 0,
+				 cpu_to_le32(plane->state->crtc_w),
+				 cpu_to_le32(plane->state->crtc_h),
+				 plane->state->crtc_x, plane->state->crtc_y, NULL);
+		}
 	} else {
 		handle = 0;
 	}
@@ -84,6 +91,11 @@ static void virtio_gpu_plane_atomic_update(struct drm_plane *plane,
 				   plane->state->crtc_h,
 				   plane->state->crtc_x,
 				   plane->state->crtc_y);
+	virtio_gpu_cmd_resource_flush(vgdev, handle,
+				      plane->state->crtc_x,
+				      plane->state->crtc_y,
+				      plane->state->crtc_w,
+				      plane->state->crtc_h);
 }
 
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 24fb348a44e1..6cbb7d4bdd11 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -195,7 +195,7 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
 		      DRM_MASTER | DRM_AUTH),
 	VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT,
 		      vmw_kms_update_layout_ioctl,
-		      DRM_MASTER),
+		      DRM_MASTER | DRM_CONTROL_ALLOW),
 	VMW_IOCTL_DEF(VMW_CREATE_SHADER,
 		      vmw_shader_define_ioctl,
 		      DRM_AUTH | DRM_RENDER_ALLOW),
@@ -972,15 +972,6 @@ static int vmw_driver_unload(struct drm_device *dev)
 	return 0;
 }
 
-static void vmw_preclose(struct drm_device *dev,
-			 struct drm_file *file_priv)
-{
-	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
-	struct vmw_private *dev_priv = vmw_priv(dev);
-
-	vmw_event_fence_fpriv_gone(dev_priv->fman, &vmw_fp->fence_events);
-}
-
 static void vmw_postclose(struct drm_device *dev,
 			 struct drm_file *file_priv)
 {
@@ -1011,7 +1002,6 @@ static int vmw_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 	if (unlikely(vmw_fp == NULL))
 		return ret;
 
-	INIT_LIST_HEAD(&vmw_fp->fence_events);
 	vmw_fp->tfile = ttm_object_file_init(dev_priv->tdev, 10);
 	if (unlikely(vmw_fp->tfile == NULL))
 		goto out_no_tfile;
@@ -1214,6 +1204,7 @@ static int vmw_master_set(struct drm_device *dev,
 	}
 
 	dev_priv->active_master = vmaster;
+	drm_sysfs_hotplug_event(dev);
 
 	return 0;
 }
@@ -1501,7 +1492,6 @@ static struct drm_driver driver = {
 	.master_set = vmw_master_set,
 	.master_drop = vmw_master_drop,
 	.open = vmw_driver_open,
-	.preclose = vmw_preclose,
 	.postclose = vmw_postclose,
 	.set_busid = drm_pci_set_busid,
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 469cdd520615..019a6ca3e8e9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -40,9 +40,9 @@
 #include <drm/ttm/ttm_module.h>
 #include "vmwgfx_fence.h"
 
-#define VMWGFX_DRIVER_DATE "20150810"
+#define VMWGFX_DRIVER_DATE "20160210"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 9
+#define VMWGFX_DRIVER_MINOR 10
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
@@ -80,7 +80,6 @@
 struct vmw_fpriv {
 	struct drm_master *locked_master;
 	struct ttm_object_file *tfile;
-	struct list_head fence_events;
 	bool gb_aware;
 };
 
@@ -408,8 +407,11 @@ struct vmw_private {
 	void *fb_info;
 	enum vmw_display_unit_type active_display_unit;
 	struct vmw_legacy_display *ldu_priv;
-	struct vmw_screen_object_display *sou_priv;
 	struct vmw_overlay *overlay_priv;
+	struct drm_property *hotplug_mode_update_property;
+	struct drm_property *implicit_placement_property;
+	unsigned num_implicit;
+	struct vmw_framebuffer *implicit_fb;
 
 	/*
 	 * Context and surface management.
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 5da5de0cb522..723ba16c6084 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -3009,6 +3009,26 @@ out_unref:
 	return ret;
 }
 
+/**
+ * vmw_cmd_dx_genmips - Validate an SVGA_3D_CMD_DX_GENMIPS command
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context being used for this batch.
+ * @header: Pointer to the command header in the command stream.
+ */
+static int vmw_cmd_dx_genmips(struct vmw_private *dev_priv,
+			      struct vmw_sw_context *sw_context,
+			      SVGA3dCmdHeader *header)
+{
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDXGenMips body;
+	} *cmd = container_of(header, typeof(*cmd), header);
+
+	return vmw_view_id_val_add(sw_context, vmw_view_sr,
+				   cmd->body.shaderResourceViewId);
+}
+
 static int vmw_cmd_check_not_3d(struct vmw_private *dev_priv,
 				struct vmw_sw_context *sw_context,
 				void *buf, uint32_t *size)
@@ -3297,7 +3317,7 @@ static const struct vmw_cmd_entry vmw_cmd_entries[SVGA_3D_CMD_MAX] = {
 		    &vmw_cmd_dx_clear_depthstencil_view, true, false, true),
 	VMW_CMD_DEF(SVGA_3D_CMD_DX_PRED_COPY, &vmw_cmd_invalid,
 		    true, false, true),
-	VMW_CMD_DEF(SVGA_3D_CMD_DX_GENMIPS, &vmw_cmd_invalid,
+	VMW_CMD_DEF(SVGA_3D_CMD_DX_GENMIPS, &vmw_cmd_dx_genmips,
 		    true, false, true),
 	VMW_CMD_DEF(SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE,
 		    &vmw_cmd_dx_check_subresource, true, false, true),
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 8e689b439890..e959df6ede83 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -71,7 +71,6 @@ struct vmw_user_fence {
  */
 struct vmw_event_fence_action {
 	struct vmw_fence_action action;
-	struct list_head fpriv_head;
 
 	struct drm_pending_event *event;
 	struct vmw_fence_obj *fence;
@@ -808,44 +807,6 @@ int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
 }
 
 /**
- * vmw_event_fence_fpriv_gone - Remove references to struct drm_file objects
- *
- * @fman: Pointer to a struct vmw_fence_manager
- * @event_list: Pointer to linked list of struct vmw_event_fence_action objects
- * with pointers to a struct drm_file object about to be closed.
- *
- * This function removes all pending fence events with references to a
- * specific struct drm_file object about to be closed. The caller is required
- * to pass a list of all struct vmw_event_fence_action objects with such
- * events attached. This function is typically called before the
- * struct drm_file object's event management is taken down.
- */
-void vmw_event_fence_fpriv_gone(struct vmw_fence_manager *fman,
-				struct list_head *event_list)
-{
-	struct vmw_event_fence_action *eaction;
-	struct drm_pending_event *event;
-	unsigned long irq_flags;
-
-	while (1) {
-		spin_lock_irqsave(&fman->lock, irq_flags);
-		if (list_empty(event_list))
-			goto out_unlock;
-		eaction = list_first_entry(event_list,
-					   struct vmw_event_fence_action,
-					   fpriv_head);
-		list_del_init(&eaction->fpriv_head);
-		event = eaction->event;
-		eaction->event = NULL;
-		spin_unlock_irqrestore(&fman->lock, irq_flags);
-		event->destroy(event);
-	}
-out_unlock:
-	spin_unlock_irqrestore(&fman->lock, irq_flags);
-}
-
-
-/**
  * vmw_event_fence_action_seq_passed
  *
  * @action: The struct vmw_fence_action embedded in a struct
@@ -879,10 +840,8 @@ static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action)
 		*eaction->tv_usec = tv.tv_usec;
 	}
 
-	list_del_init(&eaction->fpriv_head);
-	list_add_tail(&eaction->event->link, &file_priv->event_list);
+	drm_send_event_locked(dev, eaction->event);
 	eaction->event = NULL;
-	wake_up_all(&file_priv->event_wait);
 	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
 }
 
@@ -899,12 +858,6 @@ static void vmw_event_fence_action_cleanup(struct vmw_fence_action *action)
 {
 	struct vmw_event_fence_action *eaction =
 		container_of(action, struct vmw_event_fence_action, action);
-	struct vmw_fence_manager *fman = fman_from_fence(eaction->fence);
-	unsigned long irq_flags;
-
-	spin_lock_irqsave(&fman->lock, irq_flags);
-	list_del(&eaction->fpriv_head);
-	spin_unlock_irqrestore(&fman->lock, irq_flags);
 
 	vmw_fence_obj_unreference(&eaction->fence);
 	kfree(eaction);
@@ -984,8 +937,6 @@ int vmw_event_fence_action_queue(struct drm_file *file_priv,
 {
 	struct vmw_event_fence_action *eaction;
 	struct vmw_fence_manager *fman = fman_from_fence(fence);
-	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
-	unsigned long irq_flags;
 
 	eaction = kzalloc(sizeof(*eaction), GFP_KERNEL);
 	if (unlikely(eaction == NULL))
@@ -1002,10 +953,6 @@ int vmw_event_fence_action_queue(struct drm_file *file_priv,
 	eaction->tv_sec = tv_sec;
 	eaction->tv_usec = tv_usec;
 
-	spin_lock_irqsave(&fman->lock, irq_flags);
-	list_add_tail(&eaction->fpriv_head, &vmw_fp->fence_events);
-	spin_unlock_irqrestore(&fman->lock, irq_flags);
-
 	vmw_fence_obj_add_action(fence, &eaction->action);
 
 	return 0;
@@ -1025,38 +972,26 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv,
 	struct vmw_event_fence_pending *event;
 	struct vmw_fence_manager *fman = fman_from_fence(fence);
 	struct drm_device *dev = fman->dev_priv->dev;
-	unsigned long irq_flags;
 	int ret;
 
-	spin_lock_irqsave(&dev->event_lock, irq_flags);
-
-	ret = (file_priv->event_space < sizeof(event->event)) ? -EBUSY : 0;
-	if (likely(ret == 0))
-		file_priv->event_space -= sizeof(event->event);
-
-	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
-
-	if (unlikely(ret != 0)) {
-		DRM_ERROR("Failed to allocate event space for this file.\n");
-		goto out_no_space;
-	}
-
-
 	event = kzalloc(sizeof(*event), GFP_KERNEL);
 	if (unlikely(event == NULL)) {
 		DRM_ERROR("Failed to allocate an event.\n");
 		ret = -ENOMEM;
-		goto out_no_event;
+		goto out_no_space;
 	}
 
 	event->event.base.type = DRM_VMW_EVENT_FENCE_SIGNALED;
 	event->event.base.length = sizeof(*event);
 	event->event.user_data = user_data;
 
-	event->base.event = &event->event.base;
-	event->base.file_priv = file_priv;
-	event->base.destroy = (void (*) (struct drm_pending_event *)) kfree;
+	ret = drm_event_reserve_init(dev, file_priv, &event->base, &event->event.base);
 
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed to allocate event space for this file.\n");
+		kfree(event);
+		goto out_no_space;
+	}
 
 	if (flags & DRM_VMW_FE_FLAG_REQ_TIME)
 		ret = vmw_event_fence_action_queue(file_priv, fence,
@@ -1076,11 +1011,7 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv,
 	return 0;
 
 out_no_queue:
-	event->base.destroy(&event->base);
-out_no_event:
-	spin_lock_irqsave(&dev->event_lock, irq_flags);
-	file_priv->event_space += sizeof(*event);
-	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+	drm_event_cancel_free(dev, &event->base);
 out_no_space:
 	return ret;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
index 8be6c29f5eb5..83ae301ee141 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
@@ -116,8 +116,6 @@ extern int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
 				     struct drm_file *file_priv);
 extern int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file_priv);
-extern void vmw_event_fence_fpriv_gone(struct vmw_fence_manager *fman,
-				       struct list_head *event_list);
 extern int vmw_event_fence_action_queue(struct drm_file *filee_priv,
 					struct vmw_fence_obj *fence,
 					struct drm_pending_event *event,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index b221a8c40282..4742ec4ead27 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -236,8 +236,8 @@ int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
 	bool shown = du->cursor_surface || du->cursor_dmabuf ? true : false;
 
-	du->cursor_x = x + crtc->x;
-	du->cursor_y = y + crtc->y;
+	du->cursor_x = x + du->set_gui_x;
+	du->cursor_y = y + du->set_gui_y;
 
 	/*
 	 * FIXME: Unclear whether there's any global state touched by the
@@ -663,9 +663,8 @@ static int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
 		break;
 	case vmw_du_screen_object:
 		ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, &vfbd->base,
-						  clips, num_clips, increment,
-						  true,
-						  NULL);
+						  clips, NULL, num_clips,
+						  increment, true, NULL);
 		break;
 	case vmw_du_legacy:
 		ret = vmw_kms_ldu_do_dmabuf_dirty(dev_priv, &vfbd->base, 0, 0,
@@ -1109,6 +1108,22 @@ int vmw_kms_present(struct vmw_private *dev_priv,
 	return 0;
 }
 
+static void
+vmw_kms_create_hotplug_mode_update_property(struct vmw_private *dev_priv)
+{
+	if (dev_priv->hotplug_mode_update_property)
+		return;
+
+	dev_priv->hotplug_mode_update_property =
+		drm_property_create_range(dev_priv->dev,
+					  DRM_MODE_PROP_IMMUTABLE,
+					  "hotplug_mode_update", 0, 1);
+
+	if (!dev_priv->hotplug_mode_update_property)
+		return;
+
+}
+
 int vmw_kms_init(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -1121,6 +1136,9 @@ int vmw_kms_init(struct vmw_private *dev_priv)
 	dev->mode_config.max_width = dev_priv->texture_max_width;
 	dev->mode_config.max_height = dev_priv->texture_max_height;
 
+	drm_mode_create_suggested_offset_properties(dev);
+	vmw_kms_create_hotplug_mode_update_property(dev_priv);
+
 	ret = vmw_kms_stdu_init_display(dev_priv);
 	if (ret) {
 		ret = vmw_kms_sou_init_display(dev_priv);
@@ -1360,15 +1378,28 @@ static int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
 			du->pref_active = true;
 			du->gui_x = rects[du->unit].x;
 			du->gui_y = rects[du->unit].y;
+			drm_object_property_set_value
+			  (&con->base, dev->mode_config.suggested_x_property,
+			   du->gui_x);
+			drm_object_property_set_value
+			  (&con->base, dev->mode_config.suggested_y_property,
+			   du->gui_y);
 		} else {
 			du->pref_width = 800;
 			du->pref_height = 600;
 			du->pref_active = false;
+			drm_object_property_set_value
+			  (&con->base, dev->mode_config.suggested_x_property,
+			   0);
+			drm_object_property_set_value
+			  (&con->base, dev->mode_config.suggested_y_property,
+			   0);
 		}
 		con->status = vmw_du_connector_detect(con, true);
 	}
 
 	mutex_unlock(&dev->mode_config.mutex);
+	drm_sysfs_hotplug_event(dev);
 
 	return 0;
 }
@@ -1591,6 +1622,12 @@ int vmw_du_connector_set_property(struct drm_connector *connector,
 				  struct drm_property *property,
 				  uint64_t val)
 {
+	struct vmw_display_unit *du = vmw_connector_to_du(connector);
+	struct vmw_private *dev_priv = vmw_priv(connector->dev);
+
+	if (property == dev_priv->implicit_placement_property)
+		du->is_implicit = val;
+
 	return 0;
 }
 
@@ -2096,3 +2133,119 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
 
 	return 0;
 }
+
+/**
+ * vmw_kms_del_active - unregister a crtc binding to the implicit framebuffer
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @du: The display unit of the crtc.
+ */
+void vmw_kms_del_active(struct vmw_private *dev_priv,
+			struct vmw_display_unit *du)
+{
+	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
+
+	if (du->active_implicit) {
+		if (--(dev_priv->num_implicit) == 0)
+			dev_priv->implicit_fb = NULL;
+		du->active_implicit = false;
+	}
+}
+
+/**
+ * vmw_kms_add_active - register a crtc binding to an implicit framebuffer
+ *
+ * @vmw_priv: Pointer to a device private struct.
+ * @du: The display unit of the crtc.
+ * @vfb: The implicit framebuffer
+ *
+ * Registers a binding to an implicit framebuffer.
+ */
+void vmw_kms_add_active(struct vmw_private *dev_priv,
+			struct vmw_display_unit *du,
+			struct vmw_framebuffer *vfb)
+{
+	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
+
+	WARN_ON_ONCE(!dev_priv->num_implicit && dev_priv->implicit_fb);
+
+	if (!du->active_implicit && du->is_implicit) {
+		dev_priv->implicit_fb = vfb;
+		du->active_implicit = true;
+		dev_priv->num_implicit++;
+	}
+}
+
+/**
+ * vmw_kms_screen_object_flippable - Check whether we can page-flip a crtc.
+ *
+ * @dev_priv: Pointer to device-private struct.
+ * @crtc: The crtc we want to flip.
+ *
+ * Returns true or false depending whether it's OK to flip this crtc
+ * based on the criterion that we must not have more than one implicit
+ * frame-buffer at any one time.
+ */
+bool vmw_kms_crtc_flippable(struct vmw_private *dev_priv,
+			    struct drm_crtc *crtc)
+{
+	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
+
+	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
+
+	if (!du->is_implicit)
+		return true;
+
+	if (dev_priv->num_implicit != 1)
+		return false;
+
+	return true;
+}
+
+/**
+ * vmw_kms_update_implicit_fb - Update the implicit fb.
+ *
+ * @dev_priv: Pointer to device-private struct.
+ * @crtc: The crtc the new implicit frame-buffer is bound to.
+ */
+void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv,
+				struct drm_crtc *crtc)
+{
+	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
+	struct vmw_framebuffer *vfb;
+
+	lockdep_assert_held_once(&dev_priv->dev->mode_config.mutex);
+
+	if (!du->is_implicit)
+		return;
+
+	vfb = vmw_framebuffer_to_vfb(crtc->primary->fb);
+	WARN_ON_ONCE(dev_priv->num_implicit != 1 &&
+		     dev_priv->implicit_fb != vfb);
+
+	dev_priv->implicit_fb = vfb;
+}
+
+/**
+ * vmw_kms_create_implicit_placement_proparty - Set up the implicit placement
+ * property.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @immutable: Whether the property is immutable.
+ *
+ * Sets up the implicit placement property unless it's already set up.
+ */
+void
+vmw_kms_create_implicit_placement_property(struct vmw_private *dev_priv,
+					   bool immutable)
+{
+	if (dev_priv->implicit_placement_property)
+		return;
+
+	dev_priv->implicit_placement_property =
+		drm_property_create_range(dev_priv->dev,
+					  immutable ?
+					  DRM_MODE_PROP_IMMUTABLE : 0,
+					  "implicit_placement", 0, 1);
+
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index edd81503516d..57203212c501 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -178,6 +178,9 @@ struct vmw_display_unit {
 	int gui_x;
 	int gui_y;
 	bool is_implicit;
+	bool active_implicit;
+	int set_gui_x;
+	int set_gui_y;
 };
 
 #define vmw_crtc_to_du(x) \
@@ -254,6 +257,18 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
 			    struct drm_crtc **p_crtc,
 			    struct drm_display_mode **p_mode);
 void vmw_guess_mode_timing(struct drm_display_mode *mode);
+void vmw_kms_del_active(struct vmw_private *dev_priv,
+			struct vmw_display_unit *du);
+void vmw_kms_add_active(struct vmw_private *dev_priv,
+			struct vmw_display_unit *du,
+			struct vmw_framebuffer *vfb);
+bool vmw_kms_crtc_flippable(struct vmw_private *dev_priv,
+			    struct drm_crtc *crtc);
+void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv,
+				struct drm_crtc *crtc);
+void vmw_kms_create_implicit_placement_property(struct vmw_private *dev_priv,
+						bool immutable);
+
 
 /*
  * Legacy display unit functions - vmwgfx_ldu.c
@@ -287,6 +302,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv,
 int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
 				struct vmw_framebuffer *framebuffer,
 				struct drm_clip_rect *clips,
+				struct drm_vmw_rect *vclips,
 				unsigned num_clips, int increment,
 				bool interruptible,
 				struct vmw_fence_obj **out_fence);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index b6fa44fe8929..63ccd9871ec9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -288,6 +288,8 @@ static int vmw_ldu_crtc_set_config(struct drm_mode_set *set)
 	crtc->y = set->y;
 	crtc->mode = *mode;
 	crtc->enabled = true;
+	ldu->base.set_gui_x = set->x;
+	ldu->base.set_gui_y = set->y;
 
 	vmw_ldu_add_active(dev_priv, ldu, vfb);
 
@@ -375,8 +377,19 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 	drm_mode_crtc_set_gamma_size(crtc, 256);
 
 	drm_object_attach_property(&connector->base,
-				      dev->mode_config.dirty_info_property,
-				      1);
+				   dev->mode_config.dirty_info_property,
+				   1);
+	drm_object_attach_property(&connector->base,
+				   dev_priv->hotplug_mode_update_property, 1);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_x_property, 0);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_y_property, 0);
+	if (dev_priv->implicit_placement_property)
+		drm_object_attach_property
+			(&connector->base,
+			 dev_priv->implicit_placement_property,
+			 1);
 
 	return 0;
 }
@@ -412,6 +425,8 @@ int vmw_kms_ldu_init_display(struct vmw_private *dev_priv)
 	if (ret != 0)
 		goto err_vblank_cleanup;
 
+	vmw_kms_create_implicit_placement_property(dev_priv, true);
+
 	if (dev_priv->capabilities & SVGA_CAP_MULTIMON)
 		for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i)
 			vmw_ldu_init(dev_priv, i);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index c5a1a08b0449..0ea22fd112c9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -74,19 +74,6 @@ struct vmw_kms_sou_dirty_cmd {
 	SVGA3dCmdBlitSurfaceToScreen body;
 };
 
-
-/*
- * Other structs.
- */
-
-struct vmw_screen_object_display {
-	unsigned num_implicit;
-
-	struct vmw_framebuffer *implicit_fb;
-	SVGAFifoCmdDefineGMRFB cur;
-	struct vmw_dma_buffer *pinned_gmrfb;
-};
-
 /**
  * Display unit using screen objects.
  */
@@ -97,7 +84,6 @@ struct vmw_screen_object_unit {
 	struct vmw_dma_buffer *buffer; /**< Backing store buffer */
 
 	bool defined;
-	bool active_implicit;
 };
 
 static void vmw_sou_destroy(struct vmw_screen_object_unit *sou)
@@ -116,33 +102,6 @@ static void vmw_sou_crtc_destroy(struct drm_crtc *crtc)
 	vmw_sou_destroy(vmw_crtc_to_sou(crtc));
 }
 
-static void vmw_sou_del_active(struct vmw_private *vmw_priv,
-			       struct vmw_screen_object_unit *sou)
-{
-	struct vmw_screen_object_display *ld = vmw_priv->sou_priv;
-
-	if (sou->active_implicit) {
-		if (--(ld->num_implicit) == 0)
-			ld->implicit_fb = NULL;
-		sou->active_implicit = false;
-	}
-}
-
-static void vmw_sou_add_active(struct vmw_private *vmw_priv,
-			       struct vmw_screen_object_unit *sou,
-			       struct vmw_framebuffer *vfb)
-{
-	struct vmw_screen_object_display *ld = vmw_priv->sou_priv;
-
-	BUG_ON(!ld->num_implicit && ld->implicit_fb);
-
-	if (!sou->active_implicit && sou->base.is_implicit) {
-		ld->implicit_fb = vfb;
-		sou->active_implicit = true;
-		ld->num_implicit++;
-	}
-}
-
 /**
  * Send the fifo command to create a screen.
  */
@@ -185,6 +144,8 @@ static int vmw_sou_fifo_create(struct vmw_private *dev_priv,
 		cmd->obj.root.x = sou->base.gui_x;
 		cmd->obj.root.y = sou->base.gui_y;
 	}
+	sou->base.set_gui_x = cmd->obj.root.x;
+	sou->base.set_gui_y = cmd->obj.root.y;
 
 	/* Ok to assume that buffer is pinned in vram */
 	vmw_bo_get_guest_ptr(&sou->buffer->base, &cmd->obj.backingStore.ptr);
@@ -323,13 +284,13 @@ static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
 		return -EINVAL;
 	}
 
-	/* sou only supports one fb active at the time */
+	/* Only one active implicit frame-buffer at a time. */
 	if (sou->base.is_implicit &&
-	    dev_priv->sou_priv->implicit_fb && vfb &&
-	    !(dev_priv->sou_priv->num_implicit == 1 &&
-	      sou->active_implicit) &&
-	    dev_priv->sou_priv->implicit_fb != vfb) {
-		DRM_ERROR("Multiple framebuffers not supported\n");
+	    dev_priv->implicit_fb && vfb &&
+	    !(dev_priv->num_implicit == 1 &&
+	      sou->base.active_implicit) &&
+	    dev_priv->implicit_fb != vfb) {
+		DRM_ERROR("Multiple implicit framebuffers not supported.\n");
 		return -EINVAL;
 	}
 
@@ -351,7 +312,7 @@ static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
 		crtc->y = 0;
 		crtc->enabled = false;
 
-		vmw_sou_del_active(dev_priv, sou);
+		vmw_kms_del_active(dev_priv, &sou->base);
 
 		vmw_sou_backing_free(dev_priv, sou);
 
@@ -415,7 +376,7 @@ static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
 		return ret;
 	}
 
-	vmw_sou_add_active(dev_priv, sou, vfb);
+	vmw_kms_add_active(dev_priv, &sou->base, vfb);
 
 	connector->encoder = encoder;
 	encoder->crtc = crtc;
@@ -428,39 +389,6 @@ static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
 	return 0;
 }
 
-/**
- * Returns if this unit can be page flipped.
- * Must be called with the mode_config mutex held.
- */
-static bool vmw_sou_screen_object_flippable(struct vmw_private *dev_priv,
-					    struct drm_crtc *crtc)
-{
-	struct vmw_screen_object_unit *sou = vmw_crtc_to_sou(crtc);
-
-	if (!sou->base.is_implicit)
-		return true;
-
-	if (dev_priv->sou_priv->num_implicit != 1)
-		return false;
-
-	return true;
-}
-
-/**
- * Update the implicit fb to the current fb of this crtc.
- * Must be called with the mode_config mutex held.
- */
-static void vmw_sou_update_implicit_fb(struct vmw_private *dev_priv,
-				       struct drm_crtc *crtc)
-{
-	struct vmw_screen_object_unit *sou = vmw_crtc_to_sou(crtc);
-
-	BUG_ON(!sou->base.is_implicit);
-
-	dev_priv->sou_priv->implicit_fb =
-		vmw_framebuffer_to_vfb(sou->base.crtc.primary->fb);
-}
-
 static int vmw_sou_crtc_page_flip(struct drm_crtc *crtc,
 				  struct drm_framebuffer *fb,
 				  struct drm_pending_vblank_event *event,
@@ -470,30 +398,27 @@ static int vmw_sou_crtc_page_flip(struct drm_crtc *crtc,
 	struct drm_framebuffer *old_fb = crtc->primary->fb;
 	struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(fb);
 	struct vmw_fence_obj *fence = NULL;
-	struct drm_clip_rect clips;
+	struct drm_vmw_rect vclips;
 	int ret;
 
-	/* require ScreenObject support for page flipping */
-	if (!dev_priv->sou_priv)
-		return -ENOSYS;
-
-	if (!vmw_sou_screen_object_flippable(dev_priv, crtc))
+	if (!vmw_kms_crtc_flippable(dev_priv, crtc))
 		return -EINVAL;
 
 	crtc->primary->fb = fb;
 
 	/* do a full screen dirty update */
-	clips.x1 = clips.y1 = 0;
-	clips.x2 = fb->width;
-	clips.y2 = fb->height;
+	vclips.x = crtc->x;
+	vclips.y = crtc->y;
+	vclips.w = crtc->mode.hdisplay;
+	vclips.h = crtc->mode.vdisplay;
 
 	if (vfb->dmabuf)
 		ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, vfb,
-						  &clips, 1, 1,
+						  NULL, &vclips, 1, 1,
 						  true, &fence);
 	else
 		ret = vmw_kms_sou_do_surface_dirty(dev_priv, vfb,
-						   &clips, NULL, NULL,
+						   NULL, &vclips, NULL,
 						   0, 0, 1, 1, &fence);
 
 
@@ -521,7 +446,7 @@ static int vmw_sou_crtc_page_flip(struct drm_crtc *crtc,
 	vmw_fence_obj_unreference(&fence);
 
 	if (vmw_crtc_to_du(crtc)->is_implicit)
-		vmw_sou_update_implicit_fb(dev_priv, crtc);
+		vmw_kms_update_implicit_fb(dev_priv, crtc);
 
 	return ret;
 
@@ -586,13 +511,12 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit)
 	encoder = &sou->base.encoder;
 	connector = &sou->base.connector;
 
-	sou->active_implicit = false;
-
+	sou->base.active_implicit = false;
 	sou->base.pref_active = (unit == 0);
 	sou->base.pref_width = dev_priv->initial_width;
 	sou->base.pref_height = dev_priv->initial_height;
 	sou->base.pref_mode = NULL;
-	sou->base.is_implicit = true;
+	sou->base.is_implicit = false;
 
 	drm_connector_init(dev, connector, &vmw_sou_connector_funcs,
 			   DRM_MODE_CONNECTOR_VIRTUAL);
@@ -611,8 +535,19 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit)
 	drm_mode_crtc_set_gamma_size(crtc, 256);
 
 	drm_object_attach_property(&connector->base,
-				      dev->mode_config.dirty_info_property,
-				      1);
+				   dev->mode_config.dirty_info_property,
+				   1);
+	drm_object_attach_property(&connector->base,
+				   dev_priv->hotplug_mode_update_property, 1);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_x_property, 0);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_y_property, 0);
+	if (dev_priv->implicit_placement_property)
+		drm_object_attach_property
+			(&connector->base,
+			 dev_priv->implicit_placement_property,
+			 sou->base.is_implicit);
 
 	return 0;
 }
@@ -622,11 +557,6 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv)
 	struct drm_device *dev = dev_priv->dev;
 	int i, ret;
 
-	if (dev_priv->sou_priv) {
-		DRM_INFO("sou system already on\n");
-		return -EINVAL;
-	}
-
 	if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) {
 		DRM_INFO("Not using screen objects,"
 			 " missing cap SCREEN_OBJECT_2\n");
@@ -634,21 +564,19 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv)
 	}
 
 	ret = -ENOMEM;
-	dev_priv->sou_priv = kmalloc(sizeof(*dev_priv->sou_priv), GFP_KERNEL);
-	if (unlikely(!dev_priv->sou_priv))
-		goto err_no_mem;
-
-	dev_priv->sou_priv->num_implicit = 0;
-	dev_priv->sou_priv->implicit_fb = NULL;
+	dev_priv->num_implicit = 0;
+	dev_priv->implicit_fb = NULL;
 
 	ret = drm_vblank_init(dev, VMWGFX_NUM_DISPLAY_UNITS);
 	if (unlikely(ret != 0))
-		goto err_free;
+		return ret;
 
 	ret = drm_mode_create_dirty_info_property(dev);
 	if (unlikely(ret != 0))
 		goto err_vblank_cleanup;
 
+	vmw_kms_create_implicit_placement_property(dev_priv, false);
+
 	for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i)
 		vmw_sou_init(dev_priv, i);
 
@@ -660,10 +588,6 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv)
 
 err_vblank_cleanup:
 	drm_vblank_cleanup(dev);
-err_free:
-	kfree(dev_priv->sou_priv);
-	dev_priv->sou_priv = NULL;
-err_no_mem:
 	return ret;
 }
 
@@ -671,13 +595,8 @@ int vmw_kms_sou_close_display(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
 
-	if (!dev_priv->sou_priv)
-		return -ENOSYS;
-
 	drm_vblank_cleanup(dev);
 
-	kfree(dev_priv->sou_priv);
-
 	return 0;
 }
 
@@ -738,6 +657,11 @@ static void vmw_sou_surface_fifo_commit(struct vmw_kms_dirty *dirty)
 	SVGASignedRect *blit = (SVGASignedRect *) &cmd[1];
 	int i;
 
+	if (!dirty->num_hits) {
+		vmw_fifo_commit(dirty->dev_priv, 0);
+		return;
+	}
+
 	cmd->header.id = SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN;
 	cmd->header.size = sizeof(cmd->body) + region_size;
 
@@ -875,6 +799,11 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv,
  */
 static void vmw_sou_dmabuf_fifo_commit(struct vmw_kms_dirty *dirty)
 {
+	if (!dirty->num_hits) {
+		vmw_fifo_commit(dirty->dev_priv, 0);
+		return;
+	}
+
 	vmw_fifo_commit(dirty->dev_priv,
 			sizeof(struct vmw_kms_sou_dmabuf_blit) *
 			dirty->num_hits);
@@ -909,6 +838,8 @@ static void vmw_sou_dmabuf_clip(struct vmw_kms_dirty *dirty)
  * @dev_priv: Pointer to the device private structure.
  * @framebuffer: Pointer to the dma-buffer backed framebuffer.
  * @clips: Array of clip rects.
+ * @vclips: Alternate array of clip rects. Either @clips or @vclips must
+ * be NULL.
  * @num_clips: Number of clip rects in @clips.
  * @increment: Increment to use when looping over @clips.
  * @interruptible: Whether to perform waits interruptible if possible.
@@ -922,6 +853,7 @@ static void vmw_sou_dmabuf_clip(struct vmw_kms_dirty *dirty)
 int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
 				struct vmw_framebuffer *framebuffer,
 				struct drm_clip_rect *clips,
+				struct drm_vmw_rect *vclips,
 				unsigned num_clips, int increment,
 				bool interruptible,
 				struct vmw_fence_obj **out_fence)
@@ -945,7 +877,7 @@ int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
 	dirty.clip = vmw_sou_dmabuf_clip;
 	dirty.fifo_reserve_size = sizeof(struct vmw_kms_sou_dmabuf_blit) *
 		num_clips;
-	ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, NULL,
+	ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips,
 				   0, 0, num_clips, increment, &dirty);
 	vmw_kms_helper_buffer_finish(dev_priv, NULL, buf, out_fence, NULL);
 
@@ -967,6 +899,11 @@ out_revert:
  */
 static void vmw_sou_readback_fifo_commit(struct vmw_kms_dirty *dirty)
 {
+	if (!dirty->num_hits) {
+		vmw_fifo_commit(dirty->dev_priv, 0);
+		return;
+	}
+
 	vmw_fifo_commit(dirty->dev_priv,
 			sizeof(struct vmw_kms_sou_readback_blit) *
 			dirty->num_hits);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 4ef5ffd7189d..b949102ad864 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -96,7 +96,6 @@ struct vmw_stdu_surface_copy {
  *               content_vfbs dimensions, then this is a pointer into the
  *               corresponding field in content_vfbs.  If not, then this
  *               is a separate buffer to which content_vfbs will blit to.
- * @content_fb: holds the rendered content, can be a surface or DMA buffer
  * @content_type:  content_fb type
  * @defined:  true if the current display unit has been initialized
  */
@@ -104,8 +103,6 @@ struct vmw_screen_target_display_unit {
 	struct vmw_display_unit base;
 
 	struct vmw_surface     *display_srf;
-	struct drm_framebuffer *content_fb;
-
 	enum stdu_content_type content_fb_type;
 
 	bool defined;
@@ -122,22 +119,6 @@ static void vmw_stdu_destroy(struct vmw_screen_target_display_unit *stdu);
  *****************************************************************************/
 
 /**
- * vmw_stdu_pin_display - pins the resource associated with the display surface
- *
- * @stdu: contains the display surface
- *
- * Since the display surface can either be a private surface allocated by us,
- * or it can point to the content surface, we use this function to not pin the
- * same resource twice.
- */
-static int vmw_stdu_pin_display(struct vmw_screen_target_display_unit *stdu)
-{
-	return vmw_resource_pin(&stdu->display_srf->res, false);
-}
-
-
-
-/**
  * vmw_stdu_unpin_display - unpins the resource associated with display surface
  *
  * @stdu: contains the display surface
@@ -153,13 +134,7 @@ static void vmw_stdu_unpin_display(struct vmw_screen_target_display_unit *stdu)
 		struct vmw_resource *res = &stdu->display_srf->res;
 
 		vmw_resource_unpin(res);
-
-		if (stdu->content_fb_type != SAME_AS_DISPLAY) {
-			vmw_resource_unreference(&res);
-			stdu->content_fb_type = SAME_AS_DISPLAY;
-		}
-
-		stdu->display_srf = NULL;
+		vmw_surface_unreference(&stdu->display_srf);
 	}
 }
 
@@ -185,6 +160,9 @@ static void vmw_stdu_crtc_destroy(struct drm_crtc *crtc)
  *
  * @dev_priv:  VMW DRM device
  * @stdu: display unit to create a Screen Target for
+ * @mode: The mode to set.
+ * @crtc_x: X coordinate of screen target relative to framebuffer origin.
+ * @crtc_y: Y coordinate of screen target relative to framebuffer origin.
  *
  * Creates a STDU that we can used later.  This function is called whenever the
  * framebuffer size changes.
@@ -193,7 +171,9 @@ static void vmw_stdu_crtc_destroy(struct drm_crtc *crtc)
  * 0 on success, error code on failure
  */
 static int vmw_stdu_define_st(struct vmw_private *dev_priv,
-			      struct vmw_screen_target_display_unit *stdu)
+			      struct vmw_screen_target_display_unit *stdu,
+			      struct drm_display_mode *mode,
+			      int crtc_x, int crtc_y)
 {
 	struct {
 		SVGA3dCmdHeader header;
@@ -211,17 +191,19 @@ static int vmw_stdu_define_st(struct vmw_private *dev_priv,
 	cmd->header.size = sizeof(cmd->body);
 
 	cmd->body.stid   = stdu->base.unit;
-	cmd->body.width  = stdu->display_srf->base_size.width;
-	cmd->body.height = stdu->display_srf->base_size.height;
+	cmd->body.width  = mode->hdisplay;
+	cmd->body.height = mode->vdisplay;
 	cmd->body.flags  = (0 == cmd->body.stid) ? SVGA_STFLAG_PRIMARY : 0;
 	cmd->body.dpi    = 0;
-	cmd->body.xRoot  = stdu->base.crtc.x;
-	cmd->body.yRoot  = stdu->base.crtc.y;
-
-	if (!stdu->base.is_implicit) {
+	if (stdu->base.is_implicit) {
+		cmd->body.xRoot  = crtc_x;
+		cmd->body.yRoot  = crtc_y;
+	} else {
 		cmd->body.xRoot  = stdu->base.gui_x;
 		cmd->body.yRoot  = stdu->base.gui_y;
 	}
+	stdu->base.set_gui_x = cmd->body.xRoot;
+	stdu->base.set_gui_y = cmd->body.yRoot;
 
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
 
@@ -392,126 +374,43 @@ static int vmw_stdu_destroy_st(struct vmw_private *dev_priv,
 	return ret;
 }
 
-
-
 /**
- * vmw_stdu_crtc_set_config - Sets a mode
+ * vmw_stdu_bind_fb - Bind an fb to a defined screen target
  *
- * @set:  mode parameters
- *
- * This function is the device-specific portion of the DRM CRTC mode set.
- * For the SVGA device, we do this by defining a Screen Target, binding a
- * GB Surface to that target, and finally update the screen target.
+ * @dev_priv: Pointer to a device private struct.
+ * @crtc: The crtc holding the screen target.
+ * @mode: The mode currently used by the screen target. Must be non-NULL.
+ * @new_fb: The new framebuffer to bind. Must be non-NULL.
  *
  * RETURNS:
- * 0 on success, error code otherwise
+ * 0 on success, error code on failure.
  */
-static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
+static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
+			    struct drm_crtc *crtc,
+			    struct drm_display_mode *mode,
+			    struct drm_framebuffer *new_fb)
 {
-	struct vmw_private *dev_priv;
-	struct vmw_screen_target_display_unit *stdu;
-	struct vmw_framebuffer *vfb;
+	struct vmw_screen_target_display_unit *stdu = vmw_crtc_to_stdu(crtc);
+	struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(new_fb);
+	struct vmw_surface *new_display_srf = NULL;
+	enum stdu_content_type new_content_type;
 	struct vmw_framebuffer_surface *new_vfbs;
-	struct drm_display_mode *mode;
-	struct drm_framebuffer  *new_fb;
-	struct drm_crtc      *crtc;
-	struct drm_encoder   *encoder;
-	struct drm_connector *connector;
-	int    ret;
-
-
-	if (!set || !set->crtc)
-		return -EINVAL;
-
-	crtc     = set->crtc;
-	crtc->x  = set->x;
-	crtc->y  = set->y;
-	stdu     = vmw_crtc_to_stdu(crtc);
-	mode     = set->mode;
-	new_fb   = set->fb;
-	dev_priv = vmw_priv(crtc->dev);
-
-
-	if (set->num_connectors > 1) {
-		DRM_ERROR("Too many connectors\n");
-		return -EINVAL;
-	}
-
-	if (set->num_connectors == 1 &&
-	    set->connectors[0] != &stdu->base.connector) {
-		DRM_ERROR("Connectors don't match %p %p\n",
-			set->connectors[0], &stdu->base.connector);
-		return -EINVAL;
-	}
-
-
-	/* Since they always map one to one these are safe */
-	connector = &stdu->base.connector;
-	encoder   = &stdu->base.encoder;
-
-
-	/*
-	 * After this point the CRTC will be considered off unless a new fb
-	 * is bound
-	 */
-	if (stdu->defined) {
-		/* Unbind current surface by binding an invalid one */
-		ret = vmw_stdu_bind_st(dev_priv, stdu, NULL);
-		if (unlikely(ret != 0))
-			return ret;
-
-		/* Update Screen Target, display will now be blank */
-		if (crtc->primary->fb) {
-			vmw_stdu_update_st(dev_priv, stdu);
-			if (unlikely(ret != 0))
-				return ret;
-		}
-
-		crtc->primary->fb  = NULL;
-		crtc->enabled      = false;
-		encoder->crtc      = NULL;
-		connector->encoder = NULL;
-
-		vmw_stdu_unpin_display(stdu);
-		stdu->content_fb      = NULL;
-		stdu->content_fb_type = SAME_AS_DISPLAY;
-
-		ret = vmw_stdu_destroy_st(dev_priv, stdu);
-		/* The hardware is hung, give up */
-		if (unlikely(ret != 0))
-			return ret;
-	}
-
-
-	/* Any of these conditions means the caller wants CRTC off */
-	if (set->num_connectors == 0 || !mode || !new_fb)
-		return 0;
-
-
-	if (set->x + mode->hdisplay > new_fb->width ||
-	    set->y + mode->vdisplay > new_fb->height) {
-		DRM_ERROR("Set outside of framebuffer\n");
-		return -EINVAL;
-	}
+	int ret;
 
-	stdu->content_fb = new_fb;
-	vfb = vmw_framebuffer_to_vfb(stdu->content_fb);
+	WARN_ON_ONCE(!stdu->defined);
 
-	if (vfb->dmabuf)
-		stdu->content_fb_type = SEPARATE_DMA;
+	if (!vfb->dmabuf && new_fb->width == mode->hdisplay &&
+	    new_fb->height == mode->vdisplay)
+		new_content_type = SAME_AS_DISPLAY;
+	else if (vfb->dmabuf)
+		new_content_type = SEPARATE_DMA;
+	else
+		new_content_type = SEPARATE_SURFACE;
 
-	/*
-	 * If the requested mode is different than the width and height
-	 * of the FB or if the content buffer is a DMA buf, then allocate
-	 * a display FB that matches the dimension of the mode
-	 */
-	if (mode->hdisplay != new_fb->width  ||
-	    mode->vdisplay != new_fb->height ||
-	    stdu->content_fb_type != SAME_AS_DISPLAY) {
+	if (new_content_type != SAME_AS_DISPLAY &&
+	    !stdu->display_srf) {
 		struct vmw_surface content_srf;
 		struct drm_vmw_size display_base_size = {0};
-		struct vmw_surface *display_srf;
-
 
 		display_base_size.width  = mode->hdisplay;
 		display_base_size.height = mode->vdisplay;
@@ -521,7 +420,7 @@ static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
 		 * If content buffer is a DMA buf, then we have to construct
 		 * surface info
 		 */
-		if (stdu->content_fb_type == SEPARATE_DMA) {
+		if (new_content_type == SEPARATE_DMA) {
 
 			switch (new_fb->bits_per_pixel) {
 			case 32:
@@ -538,17 +437,13 @@ static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
 
 			default:
 				DRM_ERROR("Invalid format\n");
-				ret = -EINVAL;
-				goto err_unref_content;
+				return -EINVAL;
 			}
 
 			content_srf.flags             = 0;
 			content_srf.mip_levels[0]     = 1;
 			content_srf.multisample_count = 0;
 		} else {
-
-			stdu->content_fb_type = SEPARATE_SURFACE;
-
 			new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
 			content_srf = *new_vfbs->surface;
 		}
@@ -563,26 +458,136 @@ static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
 				content_srf.multisample_count,
 				0,
 				display_base_size,
-				&display_srf);
+				&new_display_srf);
 		if (unlikely(ret != 0)) {
-			DRM_ERROR("Cannot allocate a display FB.\n");
-			goto err_unref_content;
+			DRM_ERROR("Could not allocate screen target surface.\n");
+			return ret;
 		}
-
-		stdu->display_srf = display_srf;
-	} else {
+	} else if (new_content_type == SAME_AS_DISPLAY) {
 		new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
-		stdu->display_srf = new_vfbs->surface;
+		new_display_srf = vmw_surface_reference(new_vfbs->surface);
 	}
 
+	if (new_display_srf) {
+		/* Pin new surface before flipping */
+		ret = vmw_resource_pin(&new_display_srf->res, false);
+		if (ret)
+			goto out_srf_unref;
+
+		ret = vmw_stdu_bind_st(dev_priv, stdu, &new_display_srf->res);
+		if (ret)
+			goto out_srf_unpin;
+
+		/* Unpin and unreference old surface */
+		vmw_stdu_unpin_display(stdu);
 
-	ret = vmw_stdu_pin_display(stdu);
-	if (unlikely(ret != 0)) {
-		stdu->display_srf = NULL;
-		goto err_unref_content;
+		/* Transfer the reference */
+		stdu->display_srf = new_display_srf;
+		new_display_srf = NULL;
 	}
 
-	vmw_svga_enable(dev_priv);
+	crtc->primary->fb = new_fb;
+	stdu->content_fb_type = new_content_type;
+	return 0;
+
+out_srf_unpin:
+	vmw_resource_unpin(&new_display_srf->res);
+out_srf_unref:
+	vmw_surface_unreference(&new_display_srf);
+	return ret;
+}
+
+/**
+ * vmw_stdu_crtc_set_config - Sets a mode
+ *
+ * @set:  mode parameters
+ *
+ * This function is the device-specific portion of the DRM CRTC mode set.
+ * For the SVGA device, we do this by defining a Screen Target, binding a
+ * GB Surface to that target, and finally update the screen target.
+ *
+ * RETURNS:
+ * 0 on success, error code otherwise
+ */
+static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
+{
+	struct vmw_private *dev_priv;
+	struct vmw_framebuffer *vfb;
+	struct vmw_screen_target_display_unit *stdu;
+	struct drm_display_mode *mode;
+	struct drm_framebuffer  *new_fb;
+	struct drm_crtc      *crtc;
+	struct drm_encoder   *encoder;
+	struct drm_connector *connector;
+	bool turning_off;
+	int    ret;
+
+
+	if (!set || !set->crtc)
+		return -EINVAL;
+
+	crtc     = set->crtc;
+	stdu     = vmw_crtc_to_stdu(crtc);
+	mode     = set->mode;
+	new_fb   = set->fb;
+	dev_priv = vmw_priv(crtc->dev);
+	turning_off = set->num_connectors == 0 || !mode || !new_fb;
+	vfb = (new_fb) ? vmw_framebuffer_to_vfb(new_fb) : NULL;
+
+	if (set->num_connectors > 1) {
+		DRM_ERROR("Too many connectors\n");
+		return -EINVAL;
+	}
+
+	if (set->num_connectors == 1 &&
+	    set->connectors[0] != &stdu->base.connector) {
+		DRM_ERROR("Connectors don't match %p %p\n",
+			set->connectors[0], &stdu->base.connector);
+		return -EINVAL;
+	}
+
+	if (!turning_off && (set->x + mode->hdisplay > new_fb->width ||
+			     set->y + mode->vdisplay > new_fb->height)) {
+		DRM_ERROR("Set outside of framebuffer\n");
+		return -EINVAL;
+	}
+
+	/* Only one active implicit frame-buffer at a time. */
+	if (!turning_off && stdu->base.is_implicit && dev_priv->implicit_fb &&
+	    !(dev_priv->num_implicit == 1 && stdu->base.active_implicit)
+	    && dev_priv->implicit_fb != vfb) {
+		DRM_ERROR("Multiple implicit framebuffers not supported.\n");
+		return -EINVAL;
+	}
+
+	/* Since they always map one to one these are safe */
+	connector = &stdu->base.connector;
+	encoder   = &stdu->base.encoder;
+
+	if (stdu->defined) {
+		ret = vmw_stdu_bind_st(dev_priv, stdu, NULL);
+		if (ret)
+			return ret;
+
+		vmw_stdu_unpin_display(stdu);
+		(void) vmw_stdu_update_st(dev_priv, stdu);
+		vmw_kms_del_active(dev_priv, &stdu->base);
+
+		ret = vmw_stdu_destroy_st(dev_priv, stdu);
+		if (ret)
+			return ret;
+
+		crtc->primary->fb = NULL;
+		crtc->enabled = false;
+		encoder->crtc = NULL;
+		connector->encoder = NULL;
+		stdu->content_fb_type = SAME_AS_DISPLAY;
+		crtc->x = set->x;
+		crtc->y = set->y;
+	}
+
+	if (turning_off)
+		return 0;
 
 	/*
 	 * Steps to displaying a surface, assume surface is already
@@ -592,35 +597,33 @@ static int vmw_stdu_crtc_set_config(struct drm_mode_set *set)
 	 *   3.  update that screen target (this is done later by
 	 *       vmw_kms_stdu_do_surface_dirty_or_present)
 	 */
-	ret = vmw_stdu_define_st(dev_priv, stdu);
-	if (unlikely(ret != 0))
-		goto err_unpin_display_and_content;
+	/*
+	 * Note on error handling: We can't really restore the crtc to
+	 * it's original state on error, but we at least update the
+	 * current state to what's submitted to hardware to enable
+	 * future recovery.
+	 */
+	vmw_svga_enable(dev_priv);
+	ret = vmw_stdu_define_st(dev_priv, stdu, mode, set->x, set->y);
+	if (ret)
+		return ret;
 
-	ret = vmw_stdu_bind_st(dev_priv, stdu, &stdu->display_srf->res);
-	if (unlikely(ret != 0))
-		goto err_unpin_destroy_st;
+	crtc->x = set->x;
+	crtc->y = set->y;
+	crtc->mode = *mode;
 
+	ret = vmw_stdu_bind_fb(dev_priv, crtc, mode, new_fb);
+	if (ret)
+		return ret;
 
+	vmw_kms_add_active(dev_priv, &stdu->base, vfb);
+	crtc->enabled = true;
 	connector->encoder = encoder;
 	encoder->crtc      = crtc;
 
-	crtc->mode    = *mode;
-	crtc->primary->fb = new_fb;
-	crtc->enabled = true;
-
-	return ret;
-
-err_unpin_destroy_st:
-	vmw_stdu_destroy_st(dev_priv, stdu);
-err_unpin_display_and_content:
-	vmw_stdu_unpin_display(stdu);
-err_unref_content:
-	stdu->content_fb = NULL;
-	return ret;
+	return 0;
 }
 
-
-
 /**
  * vmw_stdu_crtc_page_flip - Binds a buffer to a screen target
  *
@@ -648,59 +651,34 @@ static int vmw_stdu_crtc_page_flip(struct drm_crtc *crtc,
 {
 	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
 	struct vmw_screen_target_display_unit *stdu;
+	struct drm_vmw_rect vclips;
+	struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(new_fb);
 	int ret;
 
-	if (crtc == NULL)
-		return -EINVAL;
-
 	dev_priv          = vmw_priv(crtc->dev);
 	stdu              = vmw_crtc_to_stdu(crtc);
-	crtc->primary->fb = new_fb;
-	stdu->content_fb  = new_fb;
-
-	if (stdu->display_srf) {
-		/*
-		 * If the display surface is the same as the content surface
-		 * then remove the reference
-		 */
-		if (stdu->content_fb_type == SAME_AS_DISPLAY) {
-			if (stdu->defined) {
-				/* Unbind the current surface */
-				ret = vmw_stdu_bind_st(dev_priv, stdu, NULL);
-				if (unlikely(ret != 0))
-					goto err_out;
-			}
-			vmw_stdu_unpin_display(stdu);
-			stdu->display_srf = NULL;
-		}
-	}
-
-
-	if (!new_fb) {
-		/* Blanks the display */
-		(void) vmw_stdu_update_st(dev_priv, stdu);
-
-		return 0;
-	}
 
+	if (!stdu->defined || !vmw_kms_crtc_flippable(dev_priv, crtc))
+		return -EINVAL;
 
-	if (stdu->content_fb_type == SAME_AS_DISPLAY) {
-		stdu->display_srf = vmw_framebuffer_to_vfbs(new_fb)->surface;
-		ret = vmw_stdu_pin_display(stdu);
-		if (ret) {
-			stdu->display_srf = NULL;
-			goto err_out;
-		}
+	ret = vmw_stdu_bind_fb(dev_priv, crtc, &crtc->mode, new_fb);
+	if (ret)
+		return ret;
 
-		/* Bind display surface */
-		ret = vmw_stdu_bind_st(dev_priv, stdu, &stdu->display_srf->res);
-		if (unlikely(ret != 0))
-			goto err_unpin_display_and_content;
-	}
+	if (stdu->base.is_implicit)
+		vmw_kms_update_implicit_fb(dev_priv, crtc);
 
-	/* Update display surface: after this point everything is bound */
-	ret = vmw_stdu_update_st(dev_priv, stdu);
-	if (unlikely(ret != 0))
+	vclips.x = crtc->x;
+	vclips.y = crtc->y;
+	vclips.w = crtc->mode.hdisplay;
+	vclips.h = crtc->mode.vdisplay;
+	if (vfb->dmabuf)
+		ret = vmw_kms_stdu_dma(dev_priv, NULL, vfb, NULL, NULL, &vclips,
+				       1, 1, true, false);
+	else
+		ret = vmw_kms_stdu_surface_dirty(dev_priv, vfb, NULL, &vclips,
+						 NULL, 0, 0, 1, 1, NULL);
+	if (ret)
 		return ret;
 
 	if (event) {
@@ -721,14 +699,7 @@ static int vmw_stdu_crtc_page_flip(struct drm_crtc *crtc,
 		vmw_fifo_flush(dev_priv, false);
 	}
 
-	return ret;
-
-err_unpin_display_and_content:
-	vmw_stdu_unpin_display(stdu);
-err_out:
-	crtc->primary->fb = NULL;
-	stdu->content_fb = NULL;
-	return ret;
+	return 0;
 }
 
 
@@ -1138,7 +1109,7 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit)
 	stdu->base.pref_active = (unit == 0);
 	stdu->base.pref_width  = dev_priv->initial_width;
 	stdu->base.pref_height = dev_priv->initial_height;
-	stdu->base.is_implicit = true;
+	stdu->base.is_implicit = false;
 
 	drm_connector_init(dev, connector, &vmw_stdu_connector_funcs,
 			   DRM_MODE_CONNECTOR_VIRTUAL);
@@ -1159,7 +1130,17 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit)
 	drm_object_attach_property(&connector->base,
 				   dev->mode_config.dirty_info_property,
 				   1);
-
+	drm_object_attach_property(&connector->base,
+				   dev_priv->hotplug_mode_update_property, 1);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_x_property, 0);
+	drm_object_attach_property(&connector->base,
+				   dev->mode_config.suggested_y_property, 0);
+	if (dev_priv->implicit_placement_property)
+		drm_object_attach_property
+			(&connector->base,
+			 dev_priv->implicit_placement_property,
+			 stdu->base.is_implicit);
 	return 0;
 }
 
@@ -1224,6 +1205,8 @@ int vmw_kms_stdu_init_display(struct vmw_private *dev_priv)
 
 	dev_priv->active_display_unit = vmw_du_screen_target;
 
+	vmw_kms_create_implicit_placement_property(dev_priv, false);
+
 	for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i) {
 		ret = vmw_stdu_init(dev_priv, i);
 
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index dd2dbb9746ce..c27858ae0552 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -83,8 +83,10 @@ static int host1x_device_parse_dt(struct host1x_device *device,
 		if (of_match_node(driver->subdevs, np) &&
 		    of_device_is_available(np)) {
 			err = host1x_subdev_add(device, np);
-			if (err < 0)
+			if (err < 0) {
+				of_node_put(np);
 				return err;
+			}
 		}
 	}
 
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index defa7995f213..b4515d544039 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -225,7 +225,7 @@ unpin:
 	return 0;
 }
 
-static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
+static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
 {
 	int i = 0;
 	u32 last_page = ~0;
diff --git a/drivers/gpu/ipu-v3/ipu-dc.c b/drivers/gpu/ipu-v3/ipu-dc.c
index d3ad5347342c..2f29780e7c68 100644
--- a/drivers/gpu/ipu-v3/ipu-dc.c
+++ b/drivers/gpu/ipu-v3/ipu-dc.c
@@ -171,6 +171,7 @@ int ipu_dc_init_sync(struct ipu_dc *dc, struct ipu_di *di, bool interlaced,
 		u32 bus_format, u32 width)
 {
 	struct ipu_dc_priv *priv = dc->priv;
+	int addr, sync;
 	u32 reg = 0;
 	int map;
 
@@ -182,41 +183,39 @@ int ipu_dc_init_sync(struct ipu_dc *dc, struct ipu_di *di, bool interlaced,
 		return map;
 	}
 
-	if (interlaced) {
-		int addr;
-
-		if (dc->di)
-			addr = 1;
-		else
-			addr = 0;
+	/*
+	 * In interlaced mode we need more counters to create the asymmetric
+	 * per-field VSYNC signals. The pixel active signal synchronising DC
+	 * to DI moves to signal generator #6 (see ipu-di.c). In progressive
+	 * mode counter #5 is used.
+	 */
+	sync = interlaced ? 6 : 5;
+
+	/* Reserve 5 microcode template words for each DI */
+	if (dc->di)
+		addr = 5;
+	else
+		addr = 0;
 
+	if (interlaced) {
 		dc_link_event(dc, DC_EVT_NL, addr, 3);
 		dc_link_event(dc, DC_EVT_EOL, addr, 2);
 		dc_link_event(dc, DC_EVT_NEW_DATA, addr, 1);
 
 		/* Init template microcode */
-		dc_write_tmpl(dc, addr, WROD(0), 0, map, SYNC_WAVE, 0, 6, 1);
+		dc_write_tmpl(dc, addr, WROD(0), 0, map, SYNC_WAVE, 0, sync, 1);
 	} else {
-		if (dc->di) {
-			dc_link_event(dc, DC_EVT_NL, 2, 3);
-			dc_link_event(dc, DC_EVT_EOL, 3, 2);
-			dc_link_event(dc, DC_EVT_NEW_DATA, 1, 1);
-			/* Init template microcode */
-			dc_write_tmpl(dc, 2, WROD(0), 0, map, SYNC_WAVE, 8, 5, 1);
-			dc_write_tmpl(dc, 3, WROD(0), 0, map, SYNC_WAVE, 4, 5, 0);
-			dc_write_tmpl(dc, 4, WRG, 0, map, NULL_WAVE, 0, 0, 1);
-			dc_write_tmpl(dc, 1, WROD(0), 0, map, SYNC_WAVE, 0, 5, 1);
-		} else {
-			dc_link_event(dc, DC_EVT_NL, 5, 3);
-			dc_link_event(dc, DC_EVT_EOL, 6, 2);
-			dc_link_event(dc, DC_EVT_NEW_DATA, 8, 1);
-			/* Init template microcode */
-			dc_write_tmpl(dc, 5, WROD(0), 0, map, SYNC_WAVE, 8, 5, 1);
-			dc_write_tmpl(dc, 6, WROD(0), 0, map, SYNC_WAVE, 4, 5, 0);
-			dc_write_tmpl(dc, 7, WRG, 0, map, NULL_WAVE, 0, 0, 1);
-			dc_write_tmpl(dc, 8, WROD(0), 0, map, SYNC_WAVE, 0, 5, 1);
-		}
+		dc_link_event(dc, DC_EVT_NL, addr + 2, 3);
+		dc_link_event(dc, DC_EVT_EOL, addr + 3, 2);
+		dc_link_event(dc, DC_EVT_NEW_DATA, addr + 1, 1);
+
+		/* Init template microcode */
+		dc_write_tmpl(dc, addr + 2, WROD(0), 0, map, SYNC_WAVE, 8, sync, 1);
+		dc_write_tmpl(dc, addr + 3, WROD(0), 0, map, SYNC_WAVE, 4, sync, 0);
+		dc_write_tmpl(dc, addr + 4, WRG, 0, map, NULL_WAVE, 0, 0, 1);
+		dc_write_tmpl(dc, addr + 1, WROD(0), 0, map, SYNC_WAVE, 0, sync, 1);
 	}
+
 	dc_link_event(dc, DC_EVT_NF, 0, 0);
 	dc_link_event(dc, DC_EVT_NFIELD, 0, 0);
 	dc_link_event(dc, DC_EVT_EOF, 0, 0);
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 665ab9fd0e01..cbd7c986d926 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -74,9 +74,17 @@
  * there can thus be up to three clients: Two vga clients (GPUs) and one audio
  * client (on the discrete GPU). The code is mostly prepared to support
  * machines with more than two GPUs should they become available.
+ *
  * The GPU to which the outputs are currently switched is called the
  * active client in vga_switcheroo parlance. The GPU not in use is the
- * inactive client.
+ * inactive client. When the inactive client's DRM driver is loaded,
+ * it will be unable to probe the panel's EDID and hence depends on
+ * VBIOS to provide its display modes. If the VBIOS modes are bogus or
+ * if there is no VBIOS at all (which is common on the MacBook Pro),
+ * a client may alternatively request that the DDC lines are temporarily
+ * switched to it, provided that the handler supports this. Switching
+ * only the DDC lines and not the entire output avoids unnecessary
+ * flickering.
  */
 
 /**
@@ -126,6 +134,10 @@ static DEFINE_MUTEX(vgasr_mutex);
  * 	(counting only vga clients, not audio clients)
  * @clients: list of registered clients
  * @handler: registered handler
+ * @handler_flags: flags of registered handler
+ * @mux_hw_lock: protects mux state
+ *	(in particular while DDC lines are temporarily switched)
+ * @old_ddc_owner: client to which DDC lines will be switched back on unlock
  *
  * vga_switcheroo private data. Currently only one vga_switcheroo instance
  * per system is supported.
@@ -142,6 +154,9 @@ struct vgasr_priv {
 	struct list_head clients;
 
 	const struct vga_switcheroo_handler *handler;
+	enum vga_switcheroo_handler_flags_t handler_flags;
+	struct mutex mux_hw_lock;
+	int old_ddc_owner;
 };
 
 #define ID_BIT_AUDIO		0x100
@@ -156,6 +171,7 @@ static void vga_switcheroo_debugfs_fini(struct vgasr_priv *priv);
 /* only one switcheroo per system */
 static struct vgasr_priv vgasr_priv = {
 	.clients = LIST_HEAD_INIT(vgasr_priv.clients),
+	.mux_hw_lock = __MUTEX_INITIALIZER(vgasr_priv.mux_hw_lock),
 };
 
 static bool vga_switcheroo_ready(void)
@@ -190,13 +206,15 @@ static void vga_switcheroo_enable(void)
 /**
  * vga_switcheroo_register_handler() - register handler
  * @handler: handler callbacks
+ * @handler_flags: handler flags
  *
  * Register handler. Enable vga_switcheroo if two vga clients have already
  * registered.
  *
  * Return: 0 on success, -EINVAL if a handler was already registered.
  */
-int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler)
+int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler,
+				    enum vga_switcheroo_handler_flags_t handler_flags)
 {
 	mutex_lock(&vgasr_mutex);
 	if (vgasr_priv.handler) {
@@ -205,6 +223,7 @@ int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler
 	}
 
 	vgasr_priv.handler = handler;
+	vgasr_priv.handler_flags = handler_flags;
 	if (vga_switcheroo_ready()) {
 		pr_info("enabled\n");
 		vga_switcheroo_enable();
@@ -222,16 +241,33 @@ EXPORT_SYMBOL(vga_switcheroo_register_handler);
 void vga_switcheroo_unregister_handler(void)
 {
 	mutex_lock(&vgasr_mutex);
+	mutex_lock(&vgasr_priv.mux_hw_lock);
+	vgasr_priv.handler_flags = 0;
 	vgasr_priv.handler = NULL;
 	if (vgasr_priv.active) {
 		pr_info("disabled\n");
 		vga_switcheroo_debugfs_fini(&vgasr_priv);
 		vgasr_priv.active = false;
 	}
+	mutex_unlock(&vgasr_priv.mux_hw_lock);
 	mutex_unlock(&vgasr_mutex);
 }
 EXPORT_SYMBOL(vga_switcheroo_unregister_handler);
 
+/**
+ * vga_switcheroo_handler_flags() - obtain handler flags
+ *
+ * Helper for clients to obtain the handler flags bitmask.
+ *
+ * Return: Handler flags. A value of 0 means that no handler is registered
+ * or that the handler has no special capabilities.
+ */
+enum vga_switcheroo_handler_flags_t vga_switcheroo_handler_flags(void)
+{
+	return vgasr_priv.handler_flags;
+}
+EXPORT_SYMBOL(vga_switcheroo_handler_flags);
+
 static int register_client(struct pci_dev *pdev,
 			   const struct vga_switcheroo_client_ops *ops,
 			   enum vga_switcheroo_client_id id, bool active,
@@ -413,6 +449,76 @@ void vga_switcheroo_client_fb_set(struct pci_dev *pdev,
 EXPORT_SYMBOL(vga_switcheroo_client_fb_set);
 
 /**
+ * vga_switcheroo_lock_ddc() - temporarily switch DDC lines to a given client
+ * @pdev: client pci device
+ *
+ * Temporarily switch DDC lines to the client identified by @pdev
+ * (but leave the outputs otherwise switched to where they are).
+ * This allows the inactive client to probe EDID. The DDC lines must
+ * afterwards be switched back by calling vga_switcheroo_unlock_ddc(),
+ * even if this function returns an error.
+ *
+ * Return: Previous DDC owner on success or a negative int on error.
+ * Specifically, %-ENODEV if no handler has registered or if the handler
+ * does not support switching the DDC lines. Also, a negative value
+ * returned by the handler is propagated back to the caller.
+ * The return value has merely an informational purpose for any caller
+ * which might be interested in it. It is acceptable to ignore the return
+ * value and simply rely on the result of the subsequent EDID probe,
+ * which will be %NULL if DDC switching failed.
+ */
+int vga_switcheroo_lock_ddc(struct pci_dev *pdev)
+{
+	enum vga_switcheroo_client_id id;
+
+	mutex_lock(&vgasr_priv.mux_hw_lock);
+	if (!vgasr_priv.handler || !vgasr_priv.handler->switch_ddc) {
+		vgasr_priv.old_ddc_owner = -ENODEV;
+		return -ENODEV;
+	}
+
+	id = vgasr_priv.handler->get_client_id(pdev);
+	vgasr_priv.old_ddc_owner = vgasr_priv.handler->switch_ddc(id);
+	return vgasr_priv.old_ddc_owner;
+}
+EXPORT_SYMBOL(vga_switcheroo_lock_ddc);
+
+/**
+ * vga_switcheroo_unlock_ddc() - switch DDC lines back to previous owner
+ * @pdev: client pci device
+ *
+ * Switch DDC lines back to the previous owner after calling
+ * vga_switcheroo_lock_ddc(). This must be called even if
+ * vga_switcheroo_lock_ddc() returned an error.
+ *
+ * Return: Previous DDC owner on success (i.e. the client identifier of @pdev)
+ * or a negative int on error.
+ * Specifically, %-ENODEV if no handler has registered or if the handler
+ * does not support switching the DDC lines. Also, a negative value
+ * returned by the handler is propagated back to the caller.
+ * Finally, invoking this function without calling vga_switcheroo_lock_ddc()
+ * first is not allowed and will result in %-EINVAL.
+ */
+int vga_switcheroo_unlock_ddc(struct pci_dev *pdev)
+{
+	enum vga_switcheroo_client_id id;
+	int ret = vgasr_priv.old_ddc_owner;
+
+	if (WARN_ON_ONCE(!mutex_is_locked(&vgasr_priv.mux_hw_lock)))
+		return -EINVAL;
+
+	if (vgasr_priv.old_ddc_owner >= 0) {
+		id = vgasr_priv.handler->get_client_id(pdev);
+		if (vgasr_priv.old_ddc_owner != id)
+			ret = vgasr_priv.handler->switch_ddc(
+						     vgasr_priv.old_ddc_owner);
+	}
+	mutex_unlock(&vgasr_priv.mux_hw_lock);
+	return ret;
+}
+EXPORT_SYMBOL(vga_switcheroo_unlock_ddc);
+
+/**
  * DOC: Manual switching and manual power control
  *
  * In this mode of use, the file /sys/kernel/debug/vgaswitcheroo/switch
@@ -549,7 +655,9 @@ static int vga_switchto_stage2(struct vga_switcheroo_client *new_client)
 		console_unlock();
 	}
 
+	mutex_lock(&vgasr_priv.mux_hw_lock);
 	ret = vgasr_priv.handler->switchto(new_client->id);
+	mutex_unlock(&vgasr_priv.mux_hw_lock);
 	if (ret)
 		return ret;
 
@@ -664,7 +772,9 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf,
 	vgasr_priv.delayed_switch_active = false;
 
 	if (just_mux) {
+		mutex_lock(&vgasr_priv.mux_hw_lock);
 		ret = vgasr_priv.handler->switchto(client_id);
+		mutex_unlock(&vgasr_priv.mux_hw_lock);
 		goto out;
 	}
 
@@ -876,8 +986,11 @@ static int vga_switcheroo_runtime_suspend(struct device *dev)
 	if (ret)
 		return ret;
 	mutex_lock(&vgasr_mutex);
-	if (vgasr_priv.handler->switchto)
+	if (vgasr_priv.handler->switchto) {
+		mutex_lock(&vgasr_priv.mux_hw_lock);
 		vgasr_priv.handler->switchto(VGA_SWITCHEROO_IGD);
+		mutex_unlock(&vgasr_priv.mux_hw_lock);
+	}
 	vga_switcheroo_power_switch(pdev, VGA_SWITCHEROO_OFF);
 	mutex_unlock(&vgasr_mutex);
 	return 0;
diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index e094c572b86e..16b6f11a0700 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -384,7 +384,7 @@ struct uhid_create_req_compat {
 static int uhid_event_from_user(const char __user *buffer, size_t len,
 				struct uhid_event *event)
 {
-	if (is_compat_task()) {
+	if (in_compat_syscall()) {
 		u32 type;
 
 		if (get_user(type, buffer))
diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c
index 7e20567bc369..912b449c8303 100644
--- a/drivers/hwmon/scpi-hwmon.c
+++ b/drivers/hwmon/scpi-hwmon.c
@@ -52,7 +52,7 @@ static int scpi_read_temp(void *dev, int *temp)
 	struct scpi_sensors *scpi_sensors = zone->scpi_sensors;
 	struct scpi_ops *scpi_ops = scpi_sensors->scpi_ops;
 	struct sensor_data *sensor = &scpi_sensors->data[zone->sensor_id];
-	u32 value;
+	u64 value;
 	int ret;
 
 	ret = scpi_ops->sensor_get_value(sensor->info.sensor_id, &value);
@@ -70,7 +70,7 @@ scpi_show_sensor(struct device *dev, struct device_attribute *attr, char *buf)
 	struct scpi_sensors *scpi_sensors = dev_get_drvdata(dev);
 	struct scpi_ops *scpi_ops = scpi_sensors->scpi_ops;
 	struct sensor_data *sensor;
-	u32 value;
+	u64 value;
 	int ret;
 
 	sensor = container_of(attr, struct sensor_data, dev_attr_input);
@@ -79,7 +79,7 @@ scpi_show_sensor(struct device *dev, struct device_attribute *attr, char *buf)
 	if (ret)
 		return ret;
 
-	return sprintf(buf, "%u\n", value);
+	return sprintf(buf, "%llu\n", value);
 }
 
 static ssize_t
@@ -114,6 +114,7 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
 {
 	u16 nr_sensors, i;
 	int num_temp = 0, num_volt = 0, num_current = 0, num_power = 0;
+	int num_energy = 0;
 	struct scpi_ops *scpi_ops;
 	struct device *hwdev, *dev = &pdev->dev;
 	struct scpi_sensors *scpi_sensors;
@@ -182,6 +183,13 @@ static int scpi_hwmon_probe(struct platform_device *pdev)
 				 "power%d_label", num_power + 1);
 			num_power++;
 			break;
+		case ENERGY:
+			snprintf(sensor->input, sizeof(sensor->input),
+				 "energy%d_input", num_energy + 1);
+			snprintf(sensor->label, sizeof(sensor->input),
+				 "energy%d_label", num_energy + 1);
+			num_energy++;
+			break;
 		default:
 			continue;
 		}
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 0299dfa746a3..faa8e6821fea 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -789,7 +789,7 @@ config I2C_QUP
 
 config I2C_RIIC
 	tristate "Renesas RIIC adapter"
-	depends on ARCH_SHMOBILE || COMPILE_TEST
+	depends on ARCH_RENESAS || COMPILE_TEST
 	help
 	  If you say yes to this option, support will be included for the
 	  Renesas RIIC I2C interface.
@@ -833,7 +833,7 @@ config I2C_SH7760
 config I2C_SH_MOBILE
 	tristate "SuperH Mobile I2C Controller"
 	depends on HAS_DMA
-	depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST
+	depends on SUPERH || ARCH_RENESAS || COMPILE_TEST
 	help
 	  If you say yes to this option, support will be included for the
 	  built-in I2C interface on the Renesas SH-Mobile processor.
@@ -908,7 +908,7 @@ config I2C_TEGRA
 
 config I2C_UNIPHIER
 	tristate "UniPhier FIFO-less I2C controller"
-	depends on ARCH_UNIPHIER
+	depends on ARCH_UNIPHIER || COMPILE_TEST
 	help
 	  If you say yes to this option, support will be included for
 	  the UniPhier FIFO-less I2C interface embedded in PH1-LD4, PH1-sLD8,
@@ -916,7 +916,7 @@ config I2C_UNIPHIER
 
 config I2C_UNIPHIER_F
 	tristate "UniPhier FIFO-builtin I2C controller"
-	depends on ARCH_UNIPHIER
+	depends on ARCH_UNIPHIER || COMPILE_TEST
 	help
 	  If you say yes to this option, support will be included for
 	  the UniPhier FIFO-builtin I2C interface embedded in PH1-Pro4,
@@ -985,7 +985,7 @@ config I2C_XLP9XX
 
 config I2C_RCAR
 	tristate "Renesas R-Car I2C Controller"
-	depends on ARCH_SHMOBILE || COMPILE_TEST
+	depends on ARCH_RENESAS || COMPILE_TEST
 	select I2C_SLAVE
 	help
 	  If you say yes to this option, support will be included for the
diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c
index 0419f5284609..b9f0fff4e723 100644
--- a/drivers/i2c/busses/i2c-bcm-iproc.c
+++ b/drivers/i2c/busses/i2c-bcm-iproc.c
@@ -58,11 +58,13 @@
 #define IE_M_RX_FIFO_FULL_SHIFT      31
 #define IE_M_RX_THLD_SHIFT           30
 #define IE_M_START_BUSY_SHIFT        28
+#define IE_M_TX_UNDERRUN_SHIFT       27
 
 #define IS_OFFSET                    0x3c
 #define IS_M_RX_FIFO_FULL_SHIFT      31
 #define IS_M_RX_THLD_SHIFT           30
 #define IS_M_START_BUSY_SHIFT        28
+#define IS_M_TX_UNDERRUN_SHIFT       27
 
 #define M_TX_OFFSET                  0x40
 #define M_TX_WR_STATUS_SHIFT         31
@@ -76,7 +78,7 @@
 #define M_RX_DATA_SHIFT              0
 #define M_RX_DATA_MASK               0xff
 
-#define I2C_TIMEOUT_MESC             100
+#define I2C_TIMEOUT_MSEC             50000
 #define M_TX_RX_FIFO_SIZE            64
 
 enum bus_speed_index {
@@ -95,12 +97,17 @@ struct bcm_iproc_i2c_dev {
 
 	struct completion done;
 	int xfer_is_done;
+
+	struct i2c_msg *msg;
+
+	/* bytes that have been transferred */
+	unsigned int tx_bytes;
 };
 
 /*
  * Can be expanded in the future if more interrupt status bits are utilized
  */
-#define ISR_MASK (1 << IS_M_START_BUSY_SHIFT)
+#define ISR_MASK (BIT(IS_M_START_BUSY_SHIFT) | BIT(IS_M_TX_UNDERRUN_SHIFT))
 
 static irqreturn_t bcm_iproc_i2c_isr(int irq, void *data)
 {
@@ -112,13 +119,95 @@ static irqreturn_t bcm_iproc_i2c_isr(int irq, void *data)
 	if (!status)
 		return IRQ_NONE;
 
+	/* TX FIFO is empty and we have more data to send */
+	if (status & BIT(IS_M_TX_UNDERRUN_SHIFT)) {
+		struct i2c_msg *msg = iproc_i2c->msg;
+		unsigned int tx_bytes = msg->len - iproc_i2c->tx_bytes;
+		unsigned int i;
+		u32 val;
+
+		/* can only fill up to the FIFO size */
+		tx_bytes = min_t(unsigned int, tx_bytes, M_TX_RX_FIFO_SIZE);
+		for (i = 0; i < tx_bytes; i++) {
+			/* start from where we left over */
+			unsigned int idx = iproc_i2c->tx_bytes + i;
+
+			val = msg->buf[idx];
+
+			/* mark the last byte */
+			if (idx == msg->len - 1) {
+				u32 tmp;
+
+				val |= BIT(M_TX_WR_STATUS_SHIFT);
+
+				/*
+				 * Since this is the last byte, we should
+				 * now disable TX FIFO underrun interrupt
+				 */
+				tmp = readl(iproc_i2c->base + IE_OFFSET);
+				tmp &= ~BIT(IE_M_TX_UNDERRUN_SHIFT);
+				writel(tmp, iproc_i2c->base + IE_OFFSET);
+			}
+
+			/* load data into TX FIFO */
+			writel(val, iproc_i2c->base + M_TX_OFFSET);
+		}
+		/* update number of transferred bytes */
+		iproc_i2c->tx_bytes += tx_bytes;
+	}
+
+	if (status & BIT(IS_M_START_BUSY_SHIFT)) {
+		iproc_i2c->xfer_is_done = 1;
+		complete_all(&iproc_i2c->done);
+	}
+
 	writel(status, iproc_i2c->base + IS_OFFSET);
-	iproc_i2c->xfer_is_done = 1;
-	complete_all(&iproc_i2c->done);
 
 	return IRQ_HANDLED;
 }
 
+static int bcm_iproc_i2c_init(struct bcm_iproc_i2c_dev *iproc_i2c)
+{
+	u32 val;
+
+	/* put controller in reset */
+	val = readl(iproc_i2c->base + CFG_OFFSET);
+	val |= 1 << CFG_RESET_SHIFT;
+	val &= ~(1 << CFG_EN_SHIFT);
+	writel(val, iproc_i2c->base + CFG_OFFSET);
+
+	/* wait 100 usec per spec */
+	udelay(100);
+
+	/* bring controller out of reset */
+	val &= ~(1 << CFG_RESET_SHIFT);
+	writel(val, iproc_i2c->base + CFG_OFFSET);
+
+	/* flush TX/RX FIFOs and set RX FIFO threshold to zero */
+	val = (1 << M_FIFO_RX_FLUSH_SHIFT) | (1 << M_FIFO_TX_FLUSH_SHIFT);
+	writel(val, iproc_i2c->base + M_FIFO_CTRL_OFFSET);
+	/* disable all interrupts */
+	writel(0, iproc_i2c->base + IE_OFFSET);
+
+	/* clear all pending interrupts */
+	writel(0xffffffff, iproc_i2c->base + IS_OFFSET);
+
+	return 0;
+}
+
+static void bcm_iproc_i2c_enable_disable(struct bcm_iproc_i2c_dev *iproc_i2c,
+					 bool enable)
+{
+	u32 val;
+
+	val = readl(iproc_i2c->base + CFG_OFFSET);
+	if (enable)
+		val |= BIT(CFG_EN_SHIFT);
+	else
+		val &= ~BIT(CFG_EN_SHIFT);
+	writel(val, iproc_i2c->base + CFG_OFFSET);
+}
+
 static int bcm_iproc_i2c_check_status(struct bcm_iproc_i2c_dev *iproc_i2c,
 				      struct i2c_msg *msg)
 {
@@ -149,6 +238,12 @@ static int bcm_iproc_i2c_check_status(struct bcm_iproc_i2c_dev *iproc_i2c,
 
 	default:
 		dev_dbg(iproc_i2c->device, "unknown error code=%d\n", val);
+
+		/* re-initialize i2c for recovery */
+		bcm_iproc_i2c_enable_disable(iproc_i2c, false);
+		bcm_iproc_i2c_init(iproc_i2c);
+		bcm_iproc_i2c_enable_disable(iproc_i2c, true);
+
 		return -EIO;
 	}
 }
@@ -159,7 +254,8 @@ static int bcm_iproc_i2c_xfer_single_msg(struct bcm_iproc_i2c_dev *iproc_i2c,
 	int ret, i;
 	u8 addr;
 	u32 val;
-	unsigned long time_left = msecs_to_jiffies(I2C_TIMEOUT_MESC);
+	unsigned int tx_bytes;
+	unsigned long time_left = msecs_to_jiffies(I2C_TIMEOUT_MSEC);
 
 	/* check if bus is busy */
 	if (!!(readl(iproc_i2c->base + M_CMD_OFFSET) &
@@ -168,13 +264,20 @@ static int bcm_iproc_i2c_xfer_single_msg(struct bcm_iproc_i2c_dev *iproc_i2c,
 		return -EBUSY;
 	}
 
+	iproc_i2c->msg = msg;
+
 	/* format and load slave address into the TX FIFO */
 	addr = msg->addr << 1 | (msg->flags & I2C_M_RD ? 1 : 0);
 	writel(addr, iproc_i2c->base + M_TX_OFFSET);
 
-	/* for a write transaction, load data into the TX FIFO */
+	/*
+	 * For a write transaction, load data into the TX FIFO. Only allow
+	 * loading up to TX FIFO size - 1 bytes of data since the first byte
+	 * has been used up by the slave address
+	 */
+	tx_bytes = min_t(unsigned int, msg->len, M_TX_RX_FIFO_SIZE - 1);
 	if (!(msg->flags & I2C_M_RD)) {
-		for (i = 0; i < msg->len; i++) {
+		for (i = 0; i < tx_bytes; i++) {
 			val = msg->buf[i];
 
 			/* mark the last byte */
@@ -183,6 +286,7 @@ static int bcm_iproc_i2c_xfer_single_msg(struct bcm_iproc_i2c_dev *iproc_i2c,
 
 			writel(val, iproc_i2c->base + M_TX_OFFSET);
 		}
+		iproc_i2c->tx_bytes = tx_bytes;
 	}
 
 	/* mark as incomplete before starting the transaction */
@@ -194,13 +298,24 @@ static int bcm_iproc_i2c_xfer_single_msg(struct bcm_iproc_i2c_dev *iproc_i2c,
 	 * transaction is done, i.e., the internal start_busy bit, transitions
 	 * from 1 to 0.
 	 */
-	writel(1 << IE_M_START_BUSY_SHIFT, iproc_i2c->base + IE_OFFSET);
+	val = BIT(IE_M_START_BUSY_SHIFT);
+
+	/*
+	 * If TX data size is larger than the TX FIFO, need to enable TX
+	 * underrun interrupt, which will be triggerred when the TX FIFO is
+	 * empty. When that happens we can then pump more data into the FIFO
+	 */
+	if (!(msg->flags & I2C_M_RD) &&
+	    msg->len > iproc_i2c->tx_bytes)
+		val |= BIT(IE_M_TX_UNDERRUN_SHIFT);
+
+	writel(val, iproc_i2c->base + IE_OFFSET);
 
 	/*
 	 * Now we can activate the transfer. For a read operation, specify the
 	 * number of bytes to read
 	 */
-	val = 1 << M_CMD_START_BUSY_SHIFT;
+	val = BIT(M_CMD_START_BUSY_SHIFT);
 	if (msg->flags & I2C_M_RD) {
 		val |= (M_CMD_PROTOCOL_BLK_RD << M_CMD_PROTOCOL_SHIFT) |
 		       (msg->len << M_CMD_RD_CNT_SHIFT);
@@ -283,7 +398,6 @@ static const struct i2c_algorithm bcm_iproc_algo = {
 static struct i2c_adapter_quirks bcm_iproc_i2c_quirks = {
 	/* need to reserve one byte in the FIFO for the slave address */
 	.max_read_len = M_TX_RX_FIFO_SIZE - 1,
-	.max_write_len = M_TX_RX_FIFO_SIZE - 1,
 };
 
 static int bcm_iproc_i2c_cfg_speed(struct bcm_iproc_i2c_dev *iproc_i2c)
@@ -321,49 +435,6 @@ static int bcm_iproc_i2c_cfg_speed(struct bcm_iproc_i2c_dev *iproc_i2c)
 	return 0;
 }
 
-static int bcm_iproc_i2c_init(struct bcm_iproc_i2c_dev *iproc_i2c)
-{
-	u32 val;
-
-	/* put controller in reset */
-	val = readl(iproc_i2c->base + CFG_OFFSET);
-	val |= 1 << CFG_RESET_SHIFT;
-	val &= ~(1 << CFG_EN_SHIFT);
-	writel(val, iproc_i2c->base + CFG_OFFSET);
-
-	/* wait 100 usec per spec */
-	udelay(100);
-
-	/* bring controller out of reset */
-	val &= ~(1 << CFG_RESET_SHIFT);
-	writel(val, iproc_i2c->base + CFG_OFFSET);
-
-	/* flush TX/RX FIFOs and set RX FIFO threshold to zero */
-	val = (1 << M_FIFO_RX_FLUSH_SHIFT) | (1 << M_FIFO_TX_FLUSH_SHIFT);
-	writel(val, iproc_i2c->base + M_FIFO_CTRL_OFFSET);
-
-	/* disable all interrupts */
-	writel(0, iproc_i2c->base + IE_OFFSET);
-
-	/* clear all pending interrupts */
-	writel(0xffffffff, iproc_i2c->base + IS_OFFSET);
-
-	return 0;
-}
-
-static void bcm_iproc_i2c_enable_disable(struct bcm_iproc_i2c_dev *iproc_i2c,
-					 bool enable)
-{
-	u32 val;
-
-	val = readl(iproc_i2c->base + CFG_OFFSET);
-	if (enable)
-		val |= BIT(CFG_EN_SHIFT);
-	else
-		val &= ~BIT(CFG_EN_SHIFT);
-	writel(val, iproc_i2c->base + CFG_OFFSET);
-}
-
 static int bcm_iproc_i2c_probe(struct platform_device *pdev)
 {
 	int irq, ret = 0;
diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 6b08d1607b7a..90bbd9f9dd8f 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -124,6 +124,8 @@
 
 /**
  * struct cdns_i2c - I2C device private data structure
+ *
+ * @dev:		Pointer to device structure
  * @membase:		Base address of the I2C device
  * @adap:		I2C adapter instance
  * @p_msg:		Message pointer
@@ -171,7 +173,7 @@ struct cdns_platform_data {
 					     clk_rate_change_nb)
 
 /**
- * cdns_i2c_clear_bus_hold() - Clear bus hold bit
+ * cdns_i2c_clear_bus_hold - Clear bus hold bit
  * @id:	Pointer to driver data struct
  *
  * Helper to clear the controller's bus hold bit.
@@ -815,8 +817,8 @@ static int cdns_i2c_clk_notifier_cb(struct notifier_block *nb, unsigned long
 }
 
 /**
- * cdns_i2c_suspend - Suspend method for the driver
- * @_dev:	Address of the platform_device structure
+ * cdns_i2c_runtime_suspend -  Runtime suspend method for the driver
+ * @dev:	Address of the platform_device structure
  *
  * Put the driver into low power mode.
  *
@@ -833,10 +835,10 @@ static int __maybe_unused cdns_i2c_runtime_suspend(struct device *dev)
 }
 
 /**
- * cdns_i2c_resume - Resume from suspend
- * @_dev:	Address of the platform_device structure
+ * cdns_i2c_runtime_resume - Runtime resume
+ * @dev:	Address of the platform_device structure
  *
- * Resume operation after suspend.
+ * Runtime resume callback.
  *
  * Return: 0 on success and error value on error
  */
diff --git a/drivers/i2c/busses/i2c-designware-baytrail.c b/drivers/i2c/busses/i2c-designware-baytrail.c
index e38c2bbba940..1590ad0a8081 100644
--- a/drivers/i2c/busses/i2c-designware-baytrail.c
+++ b/drivers/i2c/busses/i2c-designware-baytrail.c
@@ -11,7 +11,6 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  */
-#include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/acpi.h>
@@ -151,7 +150,3 @@ int i2c_dw_eval_lock_support(struct dw_i2c_dev *dev)
 
 	return 0;
 }
-
-MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>");
-MODULE_DESCRIPTION("Baytrail I2C Semaphore driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index 10fbd6d841e0..99b54be6ba73 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -634,7 +634,6 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 
 	dev_dbg(dev->dev, "%s: msgs: %d\n", __func__, num);
 
-	mutex_lock(&dev->lock);
 	pm_runtime_get_sync(dev->dev);
 
 	reinit_completion(&dev->cmd_complete);
@@ -673,11 +672,12 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 	}
 
 	/*
-	 * We must disable the adapter before unlocking the &dev->lock mutex
-	 * below. Otherwise the hardware might continue generating interrupts
-	 * which in turn causes a race condition with the following transfer.
-	 * Needs some more investigation if the additional interrupts are
-	 * a hardware bug or this driver doesn't handle them correctly yet.
+	 * We must disable the adapter before returning and signaling the end
+	 * of the current transfer. Otherwise the hardware might continue
+	 * generating interrupts which in turn causes a race condition with
+	 * the following transfer.  Needs some more investigation if the
+	 * additional interrupts are a hardware bug or this driver doesn't
+	 * handle them correctly yet.
 	 */
 	__i2c_dw_enable(dev, false);
 
@@ -706,7 +706,6 @@ done:
 done_nolock:
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
-	mutex_unlock(&dev->lock);
 
 	return ret;
 }
@@ -860,7 +859,6 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
 	int r;
 
 	init_completion(&dev->cmd_complete);
-	mutex_init(&dev->lock);
 
 	r = i2c_dw_init(dev);
 	if (r)
@@ -883,9 +881,17 @@ int i2c_dw_probe(struct dw_i2c_dev *dev)
 		return r;
 	}
 
+	/*
+	 * Increment PM usage count during adapter registration in order to
+	 * avoid possible spurious runtime suspend when adapter device is
+	 * registered to the device core and immediate resume in case bus has
+	 * registered I2C slaves that do I2C transfers in their probe.
+	 */
+	pm_runtime_get_noresume(dev->dev);
 	r = i2c_add_numbered_adapter(adap);
 	if (r)
 		dev_err(dev->dev, "failure adding adapter: %d\n", r);
+	pm_runtime_put_noidle(dev->dev);
 
 	return r;
 }
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 9ffb63a60f95..cd409e7fbc71 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -36,7 +36,6 @@
  * @dev: driver model device node
  * @base: IO registers pointer
  * @cmd_complete: tx completion indicator
- * @lock: protect this struct and IO registers
  * @clk: input reference clock
  * @cmd_err: run time hadware error code
  * @msgs: points to an array of messages currently being transfered
@@ -73,7 +72,6 @@ struct dw_i2c_dev {
 	struct device		*dev;
 	void __iomem		*base;
 	struct completion	cmd_complete;
-	struct mutex		lock;
 	struct clk		*clk;
 	u32			(*get_clk_rate_khz) (struct dw_i2c_dev *dev);
 	struct dw_pci_controller *controller;
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 27fa0cb09538..585a3b7915bd 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -184,7 +184,7 @@
 
 /* Older devices have their ID defined in <linux/pci_ids.h> */
 #define PCI_DEVICE_ID_INTEL_BAYTRAIL_SMBUS		0x0f12
-#define PCI_DEVICE_ID_INTEL_BRASWELL_SMBUS		0x2292
+#define PCI_DEVICE_ID_INTEL_DNV_SMBUS			0x19df
 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS		0x1c22
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS		0x1d22
 /* Patsburg also has three 'Integrated Device Function' SMBus controllers */
@@ -193,9 +193,11 @@
 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF2		0x1d72
 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_SMBUS		0x1e22
 #define PCI_DEVICE_ID_INTEL_AVOTON_SMBUS		0x1f3c
+#define PCI_DEVICE_ID_INTEL_BRASWELL_SMBUS		0x2292
 #define PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS		0x2330
 #define PCI_DEVICE_ID_INTEL_COLETOCREEK_SMBUS		0x23b0
 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS		0x3b30
+#define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS		0x5ad4
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_SMBUS		0x8c22
 #define PCI_DEVICE_ID_INTEL_WILDCATPOINT_SMBUS		0x8ca2
 #define PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS		0x8d22
@@ -204,10 +206,8 @@
 #define PCI_DEVICE_ID_INTEL_WELLSBURG_SMBUS_MS2		0x8d7f
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_SMBUS		0x9c22
 #define PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_SMBUS	0x9ca2
-#define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS	0xa123
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS	0x9d23
-#define PCI_DEVICE_ID_INTEL_DNV_SMBUS			0x19df
-#define PCI_DEVICE_ID_INTEL_BROXTON_SMBUS		0x5ad4
+#define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS	0xa123
 #define PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS		0xa1a3
 #define PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS	0xa223
 
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index a2b132cef717..1ca7ef2314f7 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -212,7 +212,7 @@ struct imx_i2c_struct {
 	struct imx_i2c_dma	*dma;
 };
 
-static const struct imx_i2c_hwdata imx1_i2c_hwdata  = {
+static const struct imx_i2c_hwdata imx1_i2c_hwdata = {
 	.devtype		= IMX1_I2C,
 	.regshift		= IMX_I2C_REGSHIFT,
 	.clk_div		= imx_i2c_clk_div,
@@ -222,7 +222,7 @@ static const struct imx_i2c_hwdata imx1_i2c_hwdata  = {
 
 };
 
-static const struct imx_i2c_hwdata imx21_i2c_hwdata  = {
+static const struct imx_i2c_hwdata imx21_i2c_hwdata = {
 	.devtype		= IMX21_I2C,
 	.regshift		= IMX_I2C_REGSHIFT,
 	.clk_div		= imx_i2c_clk_div,
@@ -871,7 +871,7 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, bo
 		if ((!i) && block_data)
 			msgs->buf[0] = len;
 		else
-			msgs->buf[i] =  imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
+			msgs->buf[i] = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2DR);
 		dev_dbg(&i2c_imx->adapter.dev,
 			"<%s> read byte: B%d=0x%X\n",
 			__func__, i, msgs->buf[i]);
@@ -916,7 +916,7 @@ static int i2c_imx_xfer(struct i2c_adapter *adapter,
 			temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2CR);
 			temp |= I2CR_RSTA;
 			imx_i2c_write_reg(temp, i2c_imx, IMX_I2C_I2CR);
-			result =  i2c_imx_bus_busy(i2c_imx, 1);
+			result = i2c_imx_bus_busy(i2c_imx, 1);
 			if (result)
 				goto fail0;
 		}
@@ -1192,7 +1192,7 @@ static int i2c_imx_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM
 static int i2c_imx_runtime_suspend(struct device *dev)
 {
-	struct imx_i2c_struct *i2c_imx  = dev_get_drvdata(dev);
+	struct imx_i2c_struct *i2c_imx = dev_get_drvdata(dev);
 
 	clk_disable_unprepare(i2c_imx->clk);
 
@@ -1201,7 +1201,7 @@ static int i2c_imx_runtime_suspend(struct device *dev)
 
 static int i2c_imx_runtime_resume(struct device *dev)
 {
-	struct imx_i2c_struct *i2c_imx  = dev_get_drvdata(dev);
+	struct imx_i2c_struct *i2c_imx = dev_get_drvdata(dev);
 	int ret;
 
 	ret = clk_prepare_enable(i2c_imx->clk);
diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index aec8e6ce38a4..453358b4d9ca 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -60,6 +60,7 @@
 #define I2C_DMA_INT_FLAG_NONE		0x0000
 #define I2C_DMA_CLR_FLAG		0x0000
 #define I2C_DMA_HARD_RST		0x0002
+#define I2C_DMA_4G_MODE			0x0001
 
 #define I2C_DEFAULT_SPEED		100000	/* hz */
 #define MAX_FS_MODE_SPEED		400000
@@ -88,6 +89,8 @@ enum DMA_REGS_OFFSET {
 	OFFSET_RX_MEM_ADDR = 0x20,
 	OFFSET_TX_LEN = 0x24,
 	OFFSET_RX_LEN = 0x28,
+	OFFSET_TX_4G_MODE = 0x54,
+	OFFSET_RX_4G_MODE = 0x58,
 };
 
 enum i2c_trans_st_rs {
@@ -133,6 +136,7 @@ struct mtk_i2c_compatible {
 	unsigned char dcm: 1;
 	unsigned char auto_restart: 1;
 	unsigned char aux_len_reg: 1;
+	unsigned char support_33bits: 1;
 };
 
 struct mtk_i2c {
@@ -182,6 +186,7 @@ static const struct mtk_i2c_compatible mt6577_compat = {
 	.dcm = 1,
 	.auto_restart = 0,
 	.aux_len_reg = 0,
+	.support_33bits = 0,
 };
 
 static const struct mtk_i2c_compatible mt6589_compat = {
@@ -190,6 +195,7 @@ static const struct mtk_i2c_compatible mt6589_compat = {
 	.dcm = 0,
 	.auto_restart = 0,
 	.aux_len_reg = 0,
+	.support_33bits = 0,
 };
 
 static const struct mtk_i2c_compatible mt8173_compat = {
@@ -198,6 +204,7 @@ static const struct mtk_i2c_compatible mt8173_compat = {
 	.dcm = 1,
 	.auto_restart = 1,
 	.aux_len_reg = 1,
+	.support_33bits = 1,
 };
 
 static const struct of_device_id mtk_i2c_of_match[] = {
@@ -366,6 +373,11 @@ static int mtk_i2c_set_speed(struct mtk_i2c *i2c, unsigned int parent_clk,
 	return 0;
 }
 
+static inline u32 mtk_i2c_set_4g_mode(dma_addr_t addr)
+{
+	return (addr & BIT_ULL(32)) ? I2C_DMA_4G_MODE : I2C_DMA_CLR_FLAG;
+}
+
 static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs,
 			       int num, int left_num)
 {
@@ -373,6 +385,7 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs,
 	u16 start_reg;
 	u16 control_reg;
 	u16 restart_flag = 0;
+	u32 reg_4g_mode;
 	dma_addr_t rpaddr = 0;
 	dma_addr_t wpaddr = 0;
 	int ret;
@@ -439,6 +452,12 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs,
 					msgs->len, DMA_FROM_DEVICE);
 		if (dma_mapping_error(i2c->dev, rpaddr))
 			return -ENOMEM;
+
+		if (i2c->dev_comp->support_33bits) {
+			reg_4g_mode = mtk_i2c_set_4g_mode(rpaddr);
+			writel(reg_4g_mode, i2c->pdmabase + OFFSET_RX_4G_MODE);
+		}
+
 		writel((u32)rpaddr, i2c->pdmabase + OFFSET_RX_MEM_ADDR);
 		writel(msgs->len, i2c->pdmabase + OFFSET_RX_LEN);
 	} else if (i2c->op == I2C_MASTER_WR) {
@@ -448,6 +467,12 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs,
 					msgs->len, DMA_TO_DEVICE);
 		if (dma_mapping_error(i2c->dev, wpaddr))
 			return -ENOMEM;
+
+		if (i2c->dev_comp->support_33bits) {
+			reg_4g_mode = mtk_i2c_set_4g_mode(wpaddr);
+			writel(reg_4g_mode, i2c->pdmabase + OFFSET_TX_4G_MODE);
+		}
+
 		writel((u32)wpaddr, i2c->pdmabase + OFFSET_TX_MEM_ADDR);
 		writel(msgs->len, i2c->pdmabase + OFFSET_TX_LEN);
 	} else {
@@ -465,6 +490,15 @@ static int mtk_i2c_do_transfer(struct mtk_i2c *i2c, struct i2c_msg *msgs,
 					 msgs->len, DMA_TO_DEVICE);
 			return -ENOMEM;
 		}
+
+		if (i2c->dev_comp->support_33bits) {
+			reg_4g_mode = mtk_i2c_set_4g_mode(wpaddr);
+			writel(reg_4g_mode, i2c->pdmabase + OFFSET_TX_4G_MODE);
+
+			reg_4g_mode = mtk_i2c_set_4g_mode(rpaddr);
+			writel(reg_4g_mode, i2c->pdmabase + OFFSET_RX_4G_MODE);
+		}
+
 		writel((u32)wpaddr, i2c->pdmabase + OFFSET_TX_MEM_ADDR);
 		writel((u32)rpaddr, i2c->pdmabase + OFFSET_RX_MEM_ADDR);
 		writel(msgs->len, i2c->pdmabase + OFFSET_TX_LEN);
@@ -729,6 +763,14 @@ static int mtk_i2c_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	if (i2c->dev_comp->support_33bits) {
+		ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(33));
+		if (ret) {
+			dev_err(&pdev->dev, "dma_set_mask return error.\n");
+			return ret;
+		}
+	}
+
 	ret = mtk_i2c_clock_enable(i2c);
 	if (ret) {
 		dev_err(&pdev->dev, "clock enable failed!\n");
diff --git a/drivers/i2c/busses/i2c-octeon.c b/drivers/i2c/busses/i2c-octeon.c
index 32914ab42a19..46fb6c42934f 100644
--- a/drivers/i2c/busses/i2c-octeon.c
+++ b/drivers/i2c/busses/i2c-octeon.c
@@ -2,7 +2,7 @@
  * (C) Copyright 2009-2010
  * Nokia Siemens Networks, michael.lawnick.ext@nsn.com
  *
- * Portions Copyright (C) 2010, 2011 Cavium Networks, Inc.
+ * Portions Copyright (C) 2010 - 2016 Cavium, Inc.
  *
  * This is a driver for the i2c adapter in Cavium Networks' OCTEON processors.
  *
@@ -26,39 +26,48 @@
 
 #define DRV_NAME "i2c-octeon"
 
-/* The previous out-of-tree version was implicitly version 1.0. */
-#define DRV_VERSION	"2.0"
-
-/* register offsets */
-#define SW_TWSI	 0x00
-#define TWSI_INT 0x10
+/* Register offsets */
+#define SW_TWSI			0x00
+#define TWSI_INT		0x10
 
 /* Controller command patterns */
-#define SW_TWSI_V               0x8000000000000000ull
-#define SW_TWSI_EOP_TWSI_DATA   0x0C00000100000000ull
-#define SW_TWSI_EOP_TWSI_CTL    0x0C00000200000000ull
-#define SW_TWSI_EOP_TWSI_CLKCTL 0x0C00000300000000ull
-#define SW_TWSI_EOP_TWSI_STAT   0x0C00000300000000ull
-#define SW_TWSI_EOP_TWSI_RST    0x0C00000700000000ull
-#define SW_TWSI_OP_TWSI_CLK     0x0800000000000000ull
-#define SW_TWSI_R               0x0100000000000000ull
+#define SW_TWSI_V		BIT_ULL(63)	/* Valid bit */
+#define SW_TWSI_R		BIT_ULL(56)	/* Result or read bit */
+
+/* Controller opcode word (bits 60:57) */
+#define SW_TWSI_OP_SHIFT	57
+#define SW_TWSI_OP_TWSI_CLK	(4ULL << SW_TWSI_OP_SHIFT)
+#define SW_TWSI_OP_EOP		(6ULL << SW_TWSI_OP_SHIFT) /* Extended opcode */
+
+/* Controller extended opcode word (bits 34:32) */
+#define SW_TWSI_EOP_SHIFT	32
+#define SW_TWSI_EOP_TWSI_DATA	(SW_TWSI_OP_EOP | 1ULL << SW_TWSI_EOP_SHIFT)
+#define SW_TWSI_EOP_TWSI_CTL	(SW_TWSI_OP_EOP | 2ULL << SW_TWSI_EOP_SHIFT)
+#define SW_TWSI_EOP_TWSI_CLKCTL	(SW_TWSI_OP_EOP | 3ULL << SW_TWSI_EOP_SHIFT)
+#define SW_TWSI_EOP_TWSI_STAT	(SW_TWSI_OP_EOP | 3ULL << SW_TWSI_EOP_SHIFT)
+#define SW_TWSI_EOP_TWSI_RST	(SW_TWSI_OP_EOP | 7ULL << SW_TWSI_EOP_SHIFT)
 
 /* Controller command and status bits */
-#define TWSI_CTL_CE   0x80
-#define TWSI_CTL_ENAB 0x40
-#define TWSI_CTL_STA  0x20
-#define TWSI_CTL_STP  0x10
-#define TWSI_CTL_IFLG 0x08
-#define TWSI_CTL_AAK  0x04
+#define TWSI_CTL_CE		0x80
+#define TWSI_CTL_ENAB		0x40	/* Bus enable */
+#define TWSI_CTL_STA		0x20	/* Master-mode start, HW clears when done */
+#define TWSI_CTL_STP		0x10	/* Master-mode stop, HW clears when done */
+#define TWSI_CTL_IFLG		0x08	/* HW event, SW writes 0 to ACK */
+#define TWSI_CTL_AAK		0x04	/* Assert ACK */
 
 /* Some status values */
-#define STAT_START      0x08
-#define STAT_RSTART     0x10
-#define STAT_TXADDR_ACK 0x18
-#define STAT_TXDATA_ACK 0x28
-#define STAT_RXADDR_ACK 0x40
-#define STAT_RXDATA_ACK 0x50
-#define STAT_IDLE       0xF8
+#define STAT_START		0x08
+#define STAT_RSTART		0x10
+#define STAT_TXADDR_ACK		0x18
+#define STAT_TXDATA_ACK		0x28
+#define STAT_RXADDR_ACK		0x40
+#define STAT_RXDATA_ACK		0x50
+#define STAT_IDLE		0xF8
+
+/* TWSI_INT values */
+#define TWSI_INT_CORE_EN	BIT_ULL(6)
+#define TWSI_INT_SDA_OVR	BIT_ULL(8)
+#define TWSI_INT_SCL_OVR	BIT_ULL(9)
 
 struct octeon_i2c {
 	wait_queue_head_t queue;
@@ -66,23 +75,19 @@ struct octeon_i2c {
 	int irq;
 	u32 twsi_freq;
 	int sys_freq;
-	resource_size_t twsi_phys;
 	void __iomem *twsi_base;
-	resource_size_t regsize;
 	struct device *dev;
 };
 
 /**
- * octeon_i2c_write_sw - write an I2C core register.
- * @i2c: The struct octeon_i2c.
- * @eop_reg: Register selector.
- * @data: Value to be written.
+ * octeon_i2c_write_sw - write an I2C core register
+ * @i2c: The struct octeon_i2c
+ * @eop_reg: Register selector
+ * @data: Value to be written
  *
  * The I2C core registers are accessed indirectly via the SW_TWSI CSR.
  */
-static void octeon_i2c_write_sw(struct octeon_i2c *i2c,
-				u64 eop_reg,
-				u8 data)
+static void octeon_i2c_write_sw(struct octeon_i2c *i2c, u64 eop_reg, u8 data)
 {
 	u64 tmp;
 
@@ -93,9 +98,9 @@ static void octeon_i2c_write_sw(struct octeon_i2c *i2c,
 }
 
 /**
- * octeon_i2c_read_sw - write an I2C core register.
- * @i2c: The struct octeon_i2c.
- * @eop_reg: Register selector.
+ * octeon_i2c_read_sw - read lower bits of an I2C core register
+ * @i2c: The struct octeon_i2c
+ * @eop_reg: Register selector
  *
  * Returns the data.
  *
@@ -115,8 +120,8 @@ static u8 octeon_i2c_read_sw(struct octeon_i2c *i2c, u64 eop_reg)
 
 /**
  * octeon_i2c_write_int - write the TWSI_INT register
- * @i2c: The struct octeon_i2c.
- * @data: Value to be written.
+ * @i2c: The struct octeon_i2c
+ * @data: Value to be written
  */
 static void octeon_i2c_write_int(struct octeon_i2c *i2c, u64 data)
 {
@@ -125,57 +130,52 @@ static void octeon_i2c_write_int(struct octeon_i2c *i2c, u64 data)
 }
 
 /**
- * octeon_i2c_int_enable - enable the TS interrupt.
- * @i2c: The struct octeon_i2c.
+ * octeon_i2c_int_enable - enable the CORE interrupt
+ * @i2c: The struct octeon_i2c
  *
  * The interrupt will be asserted when there is non-STAT_IDLE state in
  * the SW_TWSI_EOP_TWSI_STAT register.
  */
 static void octeon_i2c_int_enable(struct octeon_i2c *i2c)
 {
-	octeon_i2c_write_int(i2c, 0x40);
+	octeon_i2c_write_int(i2c, TWSI_INT_CORE_EN);
 }
 
-/**
- * octeon_i2c_int_disable - disable the TS interrupt.
- * @i2c: The struct octeon_i2c.
- */
+/* disable the CORE interrupt */
 static void octeon_i2c_int_disable(struct octeon_i2c *i2c)
 {
+	/* clear TS/ST/IFLG events */
 	octeon_i2c_write_int(i2c, 0);
 }
 
 /**
- * octeon_i2c_unblock - unblock the bus.
- * @i2c: The struct octeon_i2c.
+ * octeon_i2c_unblock - unblock the bus
+ * @i2c: The struct octeon_i2c
  *
- * If there was a reset while a device was driving 0 to bus,
- * bus is blocked. We toggle it free manually by some clock
- * cycles and send a stop.
+ * If there was a reset while a device was driving 0 to bus, bus is blocked.
+ * We toggle it free manually by some clock cycles and send a stop.
  */
 static void octeon_i2c_unblock(struct octeon_i2c *i2c)
 {
 	int i;
 
 	dev_dbg(i2c->dev, "%s\n", __func__);
+
 	for (i = 0; i < 9; i++) {
-		octeon_i2c_write_int(i2c, 0x0);
+		octeon_i2c_write_int(i2c, 0);
 		udelay(5);
-		octeon_i2c_write_int(i2c, 0x200);
+		octeon_i2c_write_int(i2c, TWSI_INT_SCL_OVR);
 		udelay(5);
 	}
-	octeon_i2c_write_int(i2c, 0x300);
+	/* hand-crank a STOP */
+	octeon_i2c_write_int(i2c, TWSI_INT_SDA_OVR | TWSI_INT_SCL_OVR);
 	udelay(5);
-	octeon_i2c_write_int(i2c, 0x100);
+	octeon_i2c_write_int(i2c, TWSI_INT_SDA_OVR);
 	udelay(5);
-	octeon_i2c_write_int(i2c, 0x0);
+	octeon_i2c_write_int(i2c, 0);
 }
 
-/**
- * octeon_i2c_isr - the interrupt service routine.
- * @int: The irq, unused.
- * @dev_id: Our struct octeon_i2c.
- */
+/* interrupt service routine */
 static irqreturn_t octeon_i2c_isr(int irq, void *dev_id)
 {
 	struct octeon_i2c *i2c = dev_id;
@@ -193,24 +193,20 @@ static int octeon_i2c_test_iflg(struct octeon_i2c *i2c)
 }
 
 /**
- * octeon_i2c_wait - wait for the IFLG to be set.
- * @i2c: The struct octeon_i2c.
+ * octeon_i2c_wait - wait for the IFLG to be set
+ * @i2c: The struct octeon_i2c
  *
  * Returns 0 on success, otherwise a negative errno.
  */
 static int octeon_i2c_wait(struct octeon_i2c *i2c)
 {
-	long result;
+	long time_left;
 
 	octeon_i2c_int_enable(i2c);
-
-	result = wait_event_timeout(i2c->queue,
-					octeon_i2c_test_iflg(i2c),
-					i2c->adap.timeout);
-
+	time_left = wait_event_timeout(i2c->queue, octeon_i2c_test_iflg(i2c),
+				       i2c->adap.timeout);
 	octeon_i2c_int_disable(i2c);
-
-	if (result == 0) {
+	if (!time_left) {
 		dev_dbg(i2c->dev, "%s: timeout\n", __func__);
 		return -ETIMEDOUT;
 	}
@@ -219,18 +215,18 @@ static int octeon_i2c_wait(struct octeon_i2c *i2c)
 }
 
 /**
- * octeon_i2c_start - send START to the bus.
- * @i2c: The struct octeon_i2c.
+ * octeon_i2c_start - send START to the bus
+ * @i2c: The struct octeon_i2c
  *
  * Returns 0 on success, otherwise a negative errno.
  */
 static int octeon_i2c_start(struct octeon_i2c *i2c)
 {
-	u8 data;
 	int result;
+	u8 data;
 
 	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL,
-				TWSI_CTL_ENAB | TWSI_CTL_STA);
+			    TWSI_CTL_ENAB | TWSI_CTL_STA);
 
 	result = octeon_i2c_wait(i2c);
 	if (result) {
@@ -243,7 +239,6 @@ static int octeon_i2c_start(struct octeon_i2c *i2c)
 			octeon_i2c_unblock(i2c);
 			octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL,
 					    TWSI_CTL_ENAB | TWSI_CTL_STA);
-
 			result = octeon_i2c_wait(i2c);
 		}
 		if (result)
@@ -259,34 +254,19 @@ static int octeon_i2c_start(struct octeon_i2c *i2c)
 	return 0;
 }
 
-/**
- * octeon_i2c_stop - send STOP to the bus.
- * @i2c: The struct octeon_i2c.
- *
- * Returns 0 on success, otherwise a negative errno.
- */
-static int octeon_i2c_stop(struct octeon_i2c *i2c)
+/* send STOP to the bus */
+static void octeon_i2c_stop(struct octeon_i2c *i2c)
 {
-	u8 data;
-
 	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL,
 			    TWSI_CTL_ENAB | TWSI_CTL_STP);
-
-	data = octeon_i2c_read_sw(i2c, SW_TWSI_EOP_TWSI_STAT);
-
-	if (data != STAT_IDLE) {
-		dev_err(i2c->dev, "%s: bad status(0x%x)\n", __func__, data);
-		return -EIO;
-	}
-	return 0;
 }
 
 /**
- * octeon_i2c_write - send data to the bus.
- * @i2c: The struct octeon_i2c.
- * @target: Target address.
- * @data: Pointer to the data to be sent.
- * @length: Length of the data.
+ * octeon_i2c_write - send data to the bus via low-level controller
+ * @i2c: The struct octeon_i2c
+ * @target: Target address
+ * @data: Pointer to the data to be sent
+ * @length: Length of the data
  *
  * The address is sent over the bus, then the data.
  *
@@ -311,6 +291,7 @@ static int octeon_i2c_write(struct octeon_i2c *i2c, int target,
 
 	for (i = 0; i < length; i++) {
 		tmp = octeon_i2c_read_sw(i2c, SW_TWSI_EOP_TWSI_STAT);
+
 		if ((tmp != STAT_TXADDR_ACK) && (tmp != STAT_TXDATA_ACK)) {
 			dev_err(i2c->dev,
 				"%s: bad status before write (0x%x)\n",
@@ -330,20 +311,21 @@ static int octeon_i2c_write(struct octeon_i2c *i2c, int target,
 }
 
 /**
- * octeon_i2c_read - receive data from the bus.
- * @i2c: The struct octeon_i2c.
- * @target: Target address.
- * @data: Pointer to the location to store the datae .
- * @length: Length of the data.
+ * octeon_i2c_read - receive data from the bus via low-level controller
+ * @i2c: The struct octeon_i2c
+ * @target: Target address
+ * @data: Pointer to the location to store the data
+ * @rlength: Length of the data
+ * @recv_len: flag for length byte
  *
  * The address is sent over the bus, then the data is read.
  *
  * Returns 0 on success, otherwise a negative errno.
  */
 static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
-			   u8 *data, int length)
+			   u8 *data, u16 *rlength, bool recv_len)
 {
-	int i, result;
+	int i, result, length = *rlength;
 	u8 tmp;
 
 	if (length < 1)
@@ -353,7 +335,7 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
 	if (result)
 		return result;
 
-	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_DATA, (target<<1) | 1);
+	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_DATA, (target << 1) | 1);
 	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL, TWSI_CTL_ENAB);
 
 	result = octeon_i2c_wait(i2c);
@@ -362,6 +344,7 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
 
 	for (i = 0; i < length; i++) {
 		tmp = octeon_i2c_read_sw(i2c, SW_TWSI_EOP_TWSI_STAT);
+
 		if ((tmp != STAT_RXDATA_ACK) && (tmp != STAT_RXADDR_ACK)) {
 			dev_err(i2c->dev,
 				"%s: bad status before read (0x%x)\n",
@@ -369,52 +352,59 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
 			return -EIO;
 		}
 
-		if (i+1 < length)
+		if (i + 1 < length)
 			octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL,
-						TWSI_CTL_ENAB | TWSI_CTL_AAK);
+					    TWSI_CTL_ENAB | TWSI_CTL_AAK);
 		else
 			octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CTL,
-						TWSI_CTL_ENAB);
+					    TWSI_CTL_ENAB);
 
 		result = octeon_i2c_wait(i2c);
 		if (result)
 			return result;
 
 		data[i] = octeon_i2c_read_sw(i2c, SW_TWSI_EOP_TWSI_DATA);
+		if (recv_len && i == 0) {
+			if (data[i] > I2C_SMBUS_BLOCK_MAX + 1) {
+				dev_err(i2c->dev,
+					"%s: read len > I2C_SMBUS_BLOCK_MAX %d\n",
+					__func__, data[i]);
+				return -EPROTO;
+			}
+			length += data[i];
+		}
 	}
+	*rlength = length;
 	return 0;
 }
 
 /**
- * octeon_i2c_xfer - The driver's master_xfer function.
- * @adap: Pointer to the i2c_adapter structure.
- * @msgs: Pointer to the messages to be processed.
- * @num: Length of the MSGS array.
+ * octeon_i2c_xfer - The driver's master_xfer function
+ * @adap: Pointer to the i2c_adapter structure
+ * @msgs: Pointer to the messages to be processed
+ * @num: Length of the MSGS array
  *
- * Returns the number of messages processed, or a negative errno on
- * failure.
+ * Returns the number of messages processed, or a negative errno on failure.
  */
-static int octeon_i2c_xfer(struct i2c_adapter *adap,
-			   struct i2c_msg *msgs,
+static int octeon_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
 			   int num)
 {
-	struct i2c_msg *pmsg;
-	int i;
-	int ret = 0;
 	struct octeon_i2c *i2c = i2c_get_adapdata(adap);
+	int i, ret = 0;
 
 	for (i = 0; ret == 0 && i < num; i++) {
-		pmsg = &msgs[i];
+		struct i2c_msg *pmsg = &msgs[i];
+
 		dev_dbg(i2c->dev,
 			"Doing %s %d byte(s) to/from 0x%02x - %d of %d messages\n",
 			 pmsg->flags & I2C_M_RD ? "read" : "write",
 			 pmsg->len, pmsg->addr, i + 1, num);
 		if (pmsg->flags & I2C_M_RD)
 			ret = octeon_i2c_read(i2c, pmsg->addr, pmsg->buf,
-						pmsg->len);
+					      &pmsg->len, pmsg->flags & I2C_M_RECV_LEN);
 		else
 			ret = octeon_i2c_write(i2c, pmsg->addr, pmsg->buf,
-						pmsg->len);
+					       pmsg->len);
 	}
 	octeon_i2c_stop(i2c);
 
@@ -423,7 +413,8 @@ static int octeon_i2c_xfer(struct i2c_adapter *adap,
 
 static u32 octeon_i2c_functionality(struct i2c_adapter *adap)
 {
-	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL |
+	       I2C_FUNC_SMBUS_READ_BLOCK_DATA | I2C_SMBUS_BLOCK_PROC_CALL;
 }
 
 static const struct i2c_algorithm octeon_i2c_algo = {
@@ -438,10 +429,8 @@ static struct i2c_adapter octeon_i2c_ops = {
 	.timeout = HZ / 50,
 };
 
-/**
- * octeon_i2c_setclock - Calculate and set clock divisors.
- */
-static int octeon_i2c_setclock(struct octeon_i2c *i2c)
+/* calculate and set clock divisors */
+static void octeon_i2c_set_clock(struct octeon_i2c *i2c)
 {
 	int tclk, thp_base, inc, thp_idx, mdiv_idx, ndiv_idx, foscl, diff;
 	int thp = 0x18, mdiv = 2, ndiv = 0, delta_hz = 1000000;
@@ -449,8 +438,7 @@ static int octeon_i2c_setclock(struct octeon_i2c *i2c)
 	for (ndiv_idx = 0; ndiv_idx < 8 && delta_hz != 0; ndiv_idx++) {
 		/*
 		 * An mdiv value of less than 2 seems to not work well
-		 * with ds1337 RTCs, so we constrain it to larger
-		 * values.
+		 * with ds1337 RTCs, so we constrain it to larger values.
 		 */
 		for (mdiv_idx = 15; mdiv_idx >= 2 && delta_hz != 0; mdiv_idx--) {
 			/*
@@ -460,6 +448,7 @@ static int octeon_i2c_setclock(struct octeon_i2c *i2c)
 			tclk = i2c->twsi_freq * (mdiv_idx + 1) * 10;
 			tclk *= (1 << ndiv_idx);
 			thp_base = (i2c->sys_freq / (tclk * 2)) - 1;
+
 			for (inc = 0; inc <= 1; inc++) {
 				thp_idx = thp_base + inc;
 				if (thp_idx < 5 || thp_idx > 0xff)
@@ -480,11 +469,9 @@ static int octeon_i2c_setclock(struct octeon_i2c *i2c)
 	}
 	octeon_i2c_write_sw(i2c, SW_TWSI_OP_TWSI_CLK, thp);
 	octeon_i2c_write_sw(i2c, SW_TWSI_EOP_TWSI_CLKCTL, (mdiv << 3) | ndiv);
-
-	return 0;
 }
 
-static int octeon_i2c_initlowlevel(struct octeon_i2c *i2c)
+static int octeon_i2c_init_lowlevel(struct octeon_i2c *i2c)
 {
 	u8 status;
 	int tries;
@@ -507,9 +494,10 @@ static int octeon_i2c_initlowlevel(struct octeon_i2c *i2c)
 
 static int octeon_i2c_probe(struct platform_device *pdev)
 {
-	int irq, result = 0;
-	struct octeon_i2c *i2c;
+	struct device_node *node = pdev->dev.of_node;
 	struct resource *res_mem;
+	struct octeon_i2c *i2c;
+	int irq, result = 0;
 
 	/* All adaptors have an irq.  */
 	irq = platform_get_irq(pdev, 0);
@@ -518,31 +506,25 @@ static int octeon_i2c_probe(struct platform_device *pdev)
 
 	i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
 	if (!i2c) {
-		dev_err(&pdev->dev, "kzalloc failed\n");
 		result = -ENOMEM;
 		goto out;
 	}
 	i2c->dev = &pdev->dev;
 
 	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-	if (res_mem == NULL) {
-		dev_err(i2c->dev, "found no memory resource\n");
-		result = -ENXIO;
+	i2c->twsi_base = devm_ioremap_resource(&pdev->dev, res_mem);
+	if (IS_ERR(i2c->twsi_base)) {
+		result = PTR_ERR(i2c->twsi_base);
 		goto out;
 	}
-	i2c->twsi_phys = res_mem->start;
-	i2c->regsize = resource_size(res_mem);
 
 	/*
 	 * "clock-rate" is a legacy binding, the official binding is
 	 * "clock-frequency".  Try the official one first and then
 	 * fall back if it doesn't exist.
 	 */
-	if (of_property_read_u32(pdev->dev.of_node,
-				 "clock-frequency", &i2c->twsi_freq) &&
-	    of_property_read_u32(pdev->dev.of_node,
-				 "clock-rate", &i2c->twsi_freq)) {
+	if (of_property_read_u32(node, "clock-frequency", &i2c->twsi_freq) &&
+	    of_property_read_u32(node, "clock-rate", &i2c->twsi_freq)) {
 		dev_err(i2c->dev,
 			"no I2C 'clock-rate' or 'clock-frequency' property\n");
 		result = -ENXIO;
@@ -551,13 +533,6 @@ static int octeon_i2c_probe(struct platform_device *pdev)
 
 	i2c->sys_freq = octeon_get_io_clock_rate();
 
-	if (!devm_request_mem_region(&pdev->dev, i2c->twsi_phys, i2c->regsize,
-				      res_mem->name)) {
-		dev_err(i2c->dev, "request_mem_region failed\n");
-		goto out;
-	}
-	i2c->twsi_base = devm_ioremap(&pdev->dev, i2c->twsi_phys, i2c->regsize);
-
 	init_waitqueue_head(&i2c->queue);
 
 	i2c->irq = irq;
@@ -569,21 +544,17 @@ static int octeon_i2c_probe(struct platform_device *pdev)
 		goto out;
 	}
 
-	result = octeon_i2c_initlowlevel(i2c);
+	result = octeon_i2c_init_lowlevel(i2c);
 	if (result) {
 		dev_err(i2c->dev, "init low level failed\n");
 		goto  out;
 	}
 
-	result = octeon_i2c_setclock(i2c);
-	if (result) {
-		dev_err(i2c->dev, "clock init failed\n");
-		goto  out;
-	}
+	octeon_i2c_set_clock(i2c);
 
 	i2c->adap = octeon_i2c_ops;
 	i2c->adap.dev.parent = &pdev->dev;
-	i2c->adap.dev.of_node = pdev->dev.of_node;
+	i2c->adap.dev.of_node = node;
 	i2c_set_adapdata(&i2c->adap, i2c);
 	platform_set_drvdata(pdev, i2c);
 
@@ -592,8 +563,7 @@ static int octeon_i2c_probe(struct platform_device *pdev)
 		dev_err(i2c->dev, "failed to add adapter\n");
 		goto out;
 	}
-	dev_info(i2c->dev, "version %s\n", DRV_VERSION);
-
+	dev_info(i2c->dev, "probed\n");
 	return 0;
 
 out:
@@ -608,10 +578,8 @@ static int octeon_i2c_remove(struct platform_device *pdev)
 	return 0;
 };
 
-static struct of_device_id octeon_i2c_match[] = {
-	{
-		.compatible = "cavium,octeon-3860-twsi",
-	},
+static const struct of_device_id octeon_i2c_match[] = {
+	{ .compatible = "cavium,octeon-3860-twsi", },
 	{},
 };
 MODULE_DEVICE_TABLE(of, octeon_i2c_match);
@@ -630,4 +598,3 @@ module_platform_driver(octeon_i2c_driver);
 MODULE_AUTHOR("Michael Lawnick <michael.lawnick.ext@nsn.com>");
 MODULE_DESCRIPTION("I2C-Bus adapter for Cavium OCTEON processors");
 MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index 93f2895383ee..23d1c167b5d7 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -85,8 +85,14 @@
 /* SB800 constants */
 #define SB800_PIIX4_SMB_IDX		0xcd6
 
-/* SB800 port is selected by bits 2:1 of the smb_en register (0x2c) */
+/*
+ * SB800 port is selected by bits 2:1 of the smb_en register (0x2c)
+ * or the smb_sel register (0x2e), depending on bit 0 of register 0x2f.
+ * Hudson-2/Bolton port is always selected by bits 2:1 of register 0x2f.
+ */
 #define SB800_PIIX4_PORT_IDX		0x2c
+#define SB800_PIIX4_PORT_IDX_ALT	0x2e
+#define SB800_PIIX4_PORT_IDX_SEL	0x2f
 #define SB800_PIIX4_PORT_IDX_MASK	0x06
 
 /* insmod parameters */
@@ -136,8 +142,13 @@ static const struct dmi_system_id piix4_dmi_ibm[] = {
 	{ },
 };
 
-/* SB800 globals */
+/*
+ * SB800 globals
+ * piix4_mutex_sb800 protects piix4_port_sel_sb800 and the pair
+ * of I/O ports at SB800_PIIX4_SMB_IDX.
+ */
 static DEFINE_MUTEX(piix4_mutex_sb800);
+static u8 piix4_port_sel_sb800;
 static const char *piix4_main_port_names_sb800[PIIX4_MAX_ADAPTERS] = {
 	" port 0", " port 2", " port 3", " port 4"
 };
@@ -148,7 +159,7 @@ struct i2c_piix4_adapdata {
 
 	/* SB800 */
 	bool sb800_main;
-	unsigned short port;
+	u8 port;		/* Port number, shifted */
 };
 
 static int piix4_setup(struct pci_dev *PIIX4_dev,
@@ -254,7 +265,7 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
 			     const struct pci_device_id *id, u8 aux)
 {
 	unsigned short piix4_smba;
-	u8 smba_en_lo, smba_en_hi, smb_en, smb_en_status;
+	u8 smba_en_lo, smba_en_hi, smb_en, smb_en_status, port_sel;
 	u8 i2ccfg, i2ccfg_offset = 0x10;
 
 	/* SB800 and later SMBus does not support forcing address */
@@ -334,6 +345,23 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
 		 "SMBus Host Controller at 0x%x, revision %d\n",
 		 piix4_smba, i2ccfg >> 4);
 
+	/* Find which register is used for port selection */
+	if (PIIX4_dev->vendor == PCI_VENDOR_ID_AMD) {
+		piix4_port_sel_sb800 = SB800_PIIX4_PORT_IDX_ALT;
+	} else {
+		mutex_lock(&piix4_mutex_sb800);
+		outb_p(SB800_PIIX4_PORT_IDX_SEL, SB800_PIIX4_SMB_IDX);
+		port_sel = inb_p(SB800_PIIX4_SMB_IDX + 1);
+		piix4_port_sel_sb800 = (port_sel & 0x01) ?
+				       SB800_PIIX4_PORT_IDX_ALT :
+				       SB800_PIIX4_PORT_IDX;
+		mutex_unlock(&piix4_mutex_sb800);
+	}
+
+	dev_info(&PIIX4_dev->dev,
+		 "Using register 0x%02x for SMBus port selection\n",
+		 (unsigned int)piix4_port_sel_sb800);
+
 	return piix4_smba;
 }
 
@@ -563,12 +591,12 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 
 	mutex_lock(&piix4_mutex_sb800);
 
-	outb_p(SB800_PIIX4_PORT_IDX, SB800_PIIX4_SMB_IDX);
+	outb_p(piix4_port_sel_sb800, SB800_PIIX4_SMB_IDX);
 	smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
 
 	port = adapdata->port;
-	if ((smba_en_lo & SB800_PIIX4_PORT_IDX_MASK) != (port << 1))
-		outb_p((smba_en_lo & ~SB800_PIIX4_PORT_IDX_MASK) | (port << 1),
+	if ((smba_en_lo & SB800_PIIX4_PORT_IDX_MASK) != port)
+		outb_p((smba_en_lo & ~SB800_PIIX4_PORT_IDX_MASK) | port,
 		       SB800_PIIX4_SMB_IDX + 1);
 
 	retval = piix4_access(adap, addr, flags, read_write,
@@ -627,7 +655,7 @@ static struct i2c_adapter *piix4_main_adapters[PIIX4_MAX_ADAPTERS];
 static struct i2c_adapter *piix4_aux_adapter;
 
 static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
-			     bool sb800_main, unsigned short port,
+			     bool sb800_main, u8 port,
 			     const char *name, struct i2c_adapter **padap)
 {
 	struct i2c_adapter *adap;
@@ -654,7 +682,7 @@ static int piix4_add_adapter(struct pci_dev *dev, unsigned short smba,
 
 	adapdata->smba = smba;
 	adapdata->sb800_main = sb800_main;
-	adapdata->port = port;
+	adapdata->port = port << 1;
 
 	/* set up the sysfs linkage to our parent device */
 	adap->dev.parent = &dev->dev;
@@ -790,7 +818,7 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
 
 	if (adapdata->smba) {
 		i2c_del_adapter(adap);
-		if (adapdata->port == 0) {
+		if (adapdata->port == (0 << 1)) {
 			release_region(adapdata->smba, SMBIOSIZE);
 			if (adapdata->sb800_main)
 				release_region(SB800_PIIX4_SMB_IDX, 2);
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index fdcbdab808e9..23eaabb19f96 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -14,8 +14,12 @@
  *
  */
 
+#include <linux/atomic.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
@@ -24,6 +28,7 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
 
 /* QUP Registers */
 #define QUP_CONFIG		0x000
@@ -33,6 +38,7 @@
 #define QUP_OPERATIONAL		0x018
 #define QUP_ERROR_FLAGS		0x01c
 #define QUP_ERROR_FLAGS_EN	0x020
+#define QUP_OPERATIONAL_MASK	0x028
 #define QUP_HW_VERSION		0x030
 #define QUP_MX_OUTPUT_CNT	0x100
 #define QUP_OUT_FIFO_BASE	0x110
@@ -42,6 +48,7 @@
 #define QUP_IN_FIFO_BASE	0x218
 #define QUP_I2C_CLK_CTL		0x400
 #define QUP_I2C_STATUS		0x404
+#define QUP_I2C_MASTER_GEN	0x408
 
 /* QUP States and reset values */
 #define QUP_RESET_STATE		0
@@ -51,6 +58,7 @@
 
 #define QUP_STATE_VALID		BIT(2)
 #define QUP_I2C_MAST_GEN	BIT(4)
+#define QUP_I2C_FLUSH		BIT(6)
 
 #define QUP_OPERATIONAL_RESET	0x000ff0
 #define QUP_I2C_STATUS_RESET	0xfffffc
@@ -69,16 +77,22 @@
 #define QUP_CLOCK_AUTO_GATE	BIT(13)
 #define I2C_MINI_CORE		(2 << 8)
 #define I2C_N_VAL		15
+#define I2C_N_VAL_V2		7
+
 /* Most significant word offset in FIFO port */
 #define QUP_MSW_SHIFT		(I2C_N_VAL + 1)
 
 /* Packing/Unpacking words in FIFOs, and IO modes */
 #define QUP_OUTPUT_BLK_MODE	(1 << 10)
+#define QUP_OUTPUT_BAM_MODE	(3 << 10)
 #define QUP_INPUT_BLK_MODE	(1 << 12)
+#define QUP_INPUT_BAM_MODE	(3 << 12)
+#define QUP_BAM_MODE		(QUP_OUTPUT_BAM_MODE | QUP_INPUT_BAM_MODE)
 #define QUP_UNPACK_EN		BIT(14)
 #define QUP_PACK_EN		BIT(15)
 
 #define QUP_REPACK_EN		(QUP_UNPACK_EN | QUP_PACK_EN)
+#define QUP_V2_TAGS_EN		1
 
 #define QUP_OUTPUT_BLOCK_SIZE(x)(((x) >> 0) & 0x03)
 #define QUP_OUTPUT_FIFO_SIZE(x)	(((x) >> 2) & 0x07)
@@ -90,6 +104,15 @@
 #define QUP_TAG_DATA		(2 << 8)
 #define QUP_TAG_STOP		(3 << 8)
 #define QUP_TAG_REC		(4 << 8)
+#define QUP_BAM_INPUT_EOT		0x93
+#define QUP_BAM_FLUSH_STOP		0x96
+
+/* QUP v2 tags */
+#define QUP_TAG_V2_START               0x81
+#define QUP_TAG_V2_DATAWR              0x82
+#define QUP_TAG_V2_DATAWR_STOP         0x83
+#define QUP_TAG_V2_DATARD              0x85
+#define QUP_TAG_V2_DATARD_STOP         0x87
 
 /* Status, Error flags */
 #define I2C_STATUS_WR_BUFFER_FULL	BIT(0)
@@ -98,6 +121,36 @@
 #define QUP_STATUS_ERROR_FLAGS		0x7c
 
 #define QUP_READ_LIMIT			256
+#define SET_BIT				0x1
+#define RESET_BIT			0x0
+#define ONE_BYTE			0x1
+#define QUP_I2C_MX_CONFIG_DURING_RUN   BIT(31)
+
+#define MX_TX_RX_LEN			SZ_64K
+#define MX_BLOCKS			(MX_TX_RX_LEN / QUP_READ_LIMIT)
+
+/* Max timeout in ms for 32k bytes */
+#define TOUT_MAX			300
+
+struct qup_i2c_block {
+	int	count;
+	int	pos;
+	int	tx_tag_len;
+	int	rx_tag_len;
+	int	data_len;
+	u8	tags[6];
+};
+
+struct qup_i2c_tag {
+	u8 *start;
+	dma_addr_t addr;
+};
+
+struct qup_i2c_bam {
+	struct	qup_i2c_tag tag;
+	struct	dma_chan *dma;
+	struct	scatterlist *sg;
+};
 
 struct qup_i2c_dev {
 	struct device		*dev;
@@ -114,6 +167,7 @@ struct qup_i2c_dev {
 	int			in_blk_sz;
 
 	unsigned long		one_byte_t;
+	struct qup_i2c_block	blk;
 
 	struct i2c_msg		*msg;
 	/* Current posion in user message buffer */
@@ -123,6 +177,19 @@ struct qup_i2c_dev {
 	/* QUP core errors */
 	u32			qup_err;
 
+	/* To check if this is the last msg */
+	bool			is_last;
+
+	/* To configure when bus is in run state */
+	int			config_run;
+
+	/* dma parameters */
+	bool			is_dma;
+	struct			dma_pool *dpool;
+	struct			qup_i2c_tag start_tag;
+	struct			qup_i2c_bam brx;
+	struct			qup_i2c_bam btx;
+
 	struct completion	xfer;
 };
 
@@ -199,6 +266,14 @@ static int qup_i2c_poll_state(struct qup_i2c_dev *qup, u32 req_state)
 	return qup_i2c_poll_state_mask(qup, req_state, QUP_STATE_MASK);
 }
 
+static void qup_i2c_flush(struct qup_i2c_dev *qup)
+{
+	u32 val = readl(qup->base + QUP_STATE);
+
+	val |= QUP_I2C_FLUSH;
+	writel(val, qup->base + QUP_STATE);
+}
+
 static int qup_i2c_poll_state_valid(struct qup_i2c_dev *qup)
 {
 	return qup_i2c_poll_state_mask(qup, 0, 0);
@@ -221,26 +296,62 @@ static int qup_i2c_change_state(struct qup_i2c_dev *qup, u32 state)
 	return 0;
 }
 
-static int qup_i2c_wait_writeready(struct qup_i2c_dev *qup)
+/**
+ * qup_i2c_wait_ready - wait for a give number of bytes in tx/rx path
+ * @qup: The qup_i2c_dev device
+ * @op: The bit/event to wait on
+ * @val: value of the bit to wait on, 0 or 1
+ * @len: The length the bytes to be transferred
+ */
+static int qup_i2c_wait_ready(struct qup_i2c_dev *qup, int op, bool val,
+			      int len)
 {
 	unsigned long timeout;
 	u32 opflags;
 	u32 status;
+	u32 shift = __ffs(op);
 
-	timeout = jiffies + HZ;
+	len *= qup->one_byte_t;
+	/* timeout after a wait of twice the max time */
+	timeout = jiffies + len * 4;
 
 	for (;;) {
 		opflags = readl(qup->base + QUP_OPERATIONAL);
 		status = readl(qup->base + QUP_I2C_STATUS);
 
-		if (!(opflags & QUP_OUT_NOT_EMPTY) &&
-		    !(status & I2C_STATUS_BUS_ACTIVE))
-			return 0;
+		if (((opflags & op) >> shift) == val) {
+			if ((op == QUP_OUT_NOT_EMPTY) && qup->is_last) {
+				if (!(status & I2C_STATUS_BUS_ACTIVE))
+					return 0;
+			} else {
+				return 0;
+			}
+		}
 
 		if (time_after(jiffies, timeout))
 			return -ETIMEDOUT;
 
-		usleep_range(qup->one_byte_t, qup->one_byte_t * 2);
+		usleep_range(len, len * 2);
+	}
+}
+
+static void qup_i2c_set_write_mode_v2(struct qup_i2c_dev *qup,
+				      struct i2c_msg *msg)
+{
+	/* Number of entries to shift out, including the tags */
+	int total = msg->len + qup->blk.tx_tag_len;
+
+	total |= qup->config_run;
+
+	if (total < qup->out_fifo_sz) {
+		/* FIFO mode */
+		writel(QUP_REPACK_EN, qup->base + QUP_IO_MODE);
+		writel(total, qup->base + QUP_MX_WRITE_CNT);
+	} else {
+		/* BLOCK mode (transfer data on chunks) */
+		writel(QUP_OUTPUT_BLK_MODE | QUP_REPACK_EN,
+		       qup->base + QUP_IO_MODE);
+		writel(total, qup->base + QUP_MX_OUTPUT_CNT);
 	}
 }
 
@@ -261,13 +372,45 @@ static void qup_i2c_set_write_mode(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 	}
 }
 
-static void qup_i2c_issue_write(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+static int check_for_fifo_space(struct qup_i2c_dev *qup)
+{
+	int ret;
+
+	ret = qup_i2c_change_state(qup, QUP_PAUSE_STATE);
+	if (ret)
+		goto out;
+
+	ret = qup_i2c_wait_ready(qup, QUP_OUT_FULL,
+				 RESET_BIT, 4 * ONE_BYTE);
+	if (ret) {
+		/* Fifo is full. Drain out the fifo */
+		ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+		if (ret)
+			goto out;
+
+		ret = qup_i2c_wait_ready(qup, QUP_OUT_NOT_EMPTY,
+					 RESET_BIT, 256 * ONE_BYTE);
+		if (ret) {
+			dev_err(qup->dev, "timeout for fifo out full");
+			goto out;
+		}
+
+		ret = qup_i2c_change_state(qup, QUP_PAUSE_STATE);
+		if (ret)
+			goto out;
+	}
+
+out:
+	return ret;
+}
+
+static int qup_i2c_issue_write(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 {
 	u32 addr = msg->addr << 1;
 	u32 qup_tag;
-	u32 opflags;
 	int idx;
 	u32 val;
+	int ret = 0;
 
 	if (qup->pos == 0) {
 		val = QUP_TAG_START | addr;
@@ -279,9 +422,9 @@ static void qup_i2c_issue_write(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 
 	while (qup->pos < msg->len) {
 		/* Check that there's space in the FIFO for our pair */
-		opflags = readl(qup->base + QUP_OPERATIONAL);
-		if (opflags & QUP_OUT_FULL)
-			break;
+		ret = check_for_fifo_space(qup);
+		if (ret)
+			return ret;
 
 		if (qup->pos == msg->len - 1)
 			qup_tag = QUP_TAG_STOP;
@@ -300,11 +443,501 @@ static void qup_i2c_issue_write(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 		qup->pos++;
 		idx++;
 	}
+
+	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+
+	return ret;
 }
 
-static int qup_i2c_write_one(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+static void qup_i2c_set_blk_data(struct qup_i2c_dev *qup,
+				 struct i2c_msg *msg)
+{
+	memset(&qup->blk, 0, sizeof(qup->blk));
+
+	qup->blk.data_len = msg->len;
+	qup->blk.count = (msg->len + QUP_READ_LIMIT - 1) / QUP_READ_LIMIT;
+
+	/* 4 bytes for first block and 2 writes for rest */
+	qup->blk.tx_tag_len = 4 + (qup->blk.count - 1) * 2;
+
+	/* There are 2 tag bytes that are read in to fifo for every block */
+	if (msg->flags & I2C_M_RD)
+		qup->blk.rx_tag_len = qup->blk.count * 2;
+}
+
+static int qup_i2c_send_data(struct qup_i2c_dev *qup, int tlen, u8 *tbuf,
+			     int dlen, u8 *dbuf)
+{
+	u32 val = 0, idx = 0, pos = 0, i = 0, t;
+	int  len = tlen + dlen;
+	u8 *buf = tbuf;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = check_for_fifo_space(qup);
+		if (ret)
+			return ret;
+
+		t = (len >= 4) ? 4 : len;
+
+		while (idx < t) {
+			if (!i && (pos >= tlen)) {
+				buf = dbuf;
+				pos = 0;
+				i = 1;
+			}
+			val |= buf[pos++] << (idx++ * 8);
+		}
+
+		writel(val, qup->base + QUP_OUT_FIFO_BASE);
+		idx  = 0;
+		val = 0;
+		len -= 4;
+	}
+
+	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+
+	return ret;
+}
+
+static int qup_i2c_get_data_len(struct qup_i2c_dev *qup)
+{
+	int data_len;
+
+	if (qup->blk.data_len > QUP_READ_LIMIT)
+		data_len = QUP_READ_LIMIT;
+	else
+		data_len = qup->blk.data_len;
+
+	return data_len;
+}
+
+static int qup_i2c_set_tags(u8 *tags, struct qup_i2c_dev *qup,
+			    struct i2c_msg *msg,  int is_dma)
+{
+	u16 addr = (msg->addr << 1) | ((msg->flags & I2C_M_RD) == I2C_M_RD);
+	int len = 0;
+	int data_len;
+
+	int last = (qup->blk.pos == (qup->blk.count - 1)) && (qup->is_last);
+
+	if (qup->blk.pos == 0) {
+		tags[len++] = QUP_TAG_V2_START;
+		tags[len++] = addr & 0xff;
+
+		if (msg->flags & I2C_M_TEN)
+			tags[len++] = addr >> 8;
+	}
+
+	/* Send _STOP commands for the last block */
+	if (last) {
+		if (msg->flags & I2C_M_RD)
+			tags[len++] = QUP_TAG_V2_DATARD_STOP;
+		else
+			tags[len++] = QUP_TAG_V2_DATAWR_STOP;
+	} else {
+		if (msg->flags & I2C_M_RD)
+			tags[len++] = QUP_TAG_V2_DATARD;
+		else
+			tags[len++] = QUP_TAG_V2_DATAWR;
+	}
+
+	data_len = qup_i2c_get_data_len(qup);
+
+	/* 0 implies 256 bytes */
+	if (data_len == QUP_READ_LIMIT)
+		tags[len++] = 0;
+	else
+		tags[len++] = data_len;
+
+	if ((msg->flags & I2C_M_RD) && last && is_dma) {
+		tags[len++] = QUP_BAM_INPUT_EOT;
+		tags[len++] = QUP_BAM_FLUSH_STOP;
+	}
+
+	return len;
+}
+
+static int qup_i2c_issue_xfer_v2(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+{
+	int data_len = 0, tag_len, index;
+	int ret;
+
+	tag_len = qup_i2c_set_tags(qup->blk.tags, qup, msg, 0);
+	index = msg->len - qup->blk.data_len;
+
+	/* only tags are written for read */
+	if (!(msg->flags & I2C_M_RD))
+		data_len = qup_i2c_get_data_len(qup);
+
+	ret = qup_i2c_send_data(qup, tag_len, qup->blk.tags,
+				data_len, &msg->buf[index]);
+	qup->blk.data_len -= data_len;
+
+	return ret;
+}
+
+static void qup_i2c_bam_cb(void *data)
+{
+	struct qup_i2c_dev *qup = data;
+
+	complete(&qup->xfer);
+}
+
+static int qup_sg_set_buf(struct scatterlist *sg, void *buf,
+			  struct qup_i2c_tag *tg, unsigned int buflen,
+			  struct qup_i2c_dev *qup, int map, int dir)
+{
+	int ret;
+
+	sg_set_buf(sg, buf, buflen);
+	ret = dma_map_sg(qup->dev, sg, 1, dir);
+	if (!ret)
+		return -EINVAL;
+
+	if (!map)
+		sg_dma_address(sg) = tg->addr + ((u8 *)buf - tg->start);
+
+	return 0;
+}
+
+static void qup_i2c_rel_dma(struct qup_i2c_dev *qup)
+{
+	if (qup->btx.dma)
+		dma_release_channel(qup->btx.dma);
+	if (qup->brx.dma)
+		dma_release_channel(qup->brx.dma);
+	qup->btx.dma = NULL;
+	qup->brx.dma = NULL;
+}
+
+static int qup_i2c_req_dma(struct qup_i2c_dev *qup)
+{
+	int err;
+
+	if (!qup->btx.dma) {
+		qup->btx.dma = dma_request_slave_channel_reason(qup->dev, "tx");
+		if (IS_ERR(qup->btx.dma)) {
+			err = PTR_ERR(qup->btx.dma);
+			qup->btx.dma = NULL;
+			dev_err(qup->dev, "\n tx channel not available");
+			return err;
+		}
+	}
+
+	if (!qup->brx.dma) {
+		qup->brx.dma = dma_request_slave_channel_reason(qup->dev, "rx");
+		if (IS_ERR(qup->brx.dma)) {
+			dev_err(qup->dev, "\n rx channel not available");
+			err = PTR_ERR(qup->brx.dma);
+			qup->brx.dma = NULL;
+			qup_i2c_rel_dma(qup);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int qup_i2c_bam_do_xfer(struct qup_i2c_dev *qup, struct i2c_msg *msg,
+			       int num)
+{
+	struct dma_async_tx_descriptor *txd, *rxd = NULL;
+	int ret = 0, idx = 0, limit = QUP_READ_LIMIT;
+	dma_cookie_t cookie_rx, cookie_tx;
+	u32 rx_nents = 0, tx_nents = 0, len, blocks, rem;
+	u32 i, tlen, tx_len, tx_buf = 0, rx_buf = 0, off = 0;
+	u8 *tags;
+
+	while (idx < num) {
+		blocks = (msg->len + limit) / limit;
+		rem = msg->len % limit;
+		tx_len = 0, len = 0, i = 0;
+
+		qup->is_last = (idx == (num - 1));
+
+		qup_i2c_set_blk_data(qup, msg);
+
+		if (msg->flags & I2C_M_RD) {
+			rx_nents += (blocks * 2) + 1;
+			tx_nents += 1;
+
+			while (qup->blk.pos < blocks) {
+				/* length set to '0' implies 256 bytes */
+				tlen = (i == (blocks - 1)) ? rem : 0;
+				tags = &qup->start_tag.start[off + len];
+				len += qup_i2c_set_tags(tags, qup, msg, 1);
+
+				/* scratch buf to read the start and len tags */
+				ret = qup_sg_set_buf(&qup->brx.sg[rx_buf++],
+						     &qup->brx.tag.start[0],
+						     &qup->brx.tag,
+						     2, qup, 0, 0);
+
+				if (ret)
+					return ret;
+
+				ret = qup_sg_set_buf(&qup->brx.sg[rx_buf++],
+						     &msg->buf[limit * i],
+						     NULL, tlen, qup,
+						     1, DMA_FROM_DEVICE);
+				if (ret)
+					return ret;
+
+				i++;
+				qup->blk.pos = i;
+			}
+			ret = qup_sg_set_buf(&qup->btx.sg[tx_buf++],
+					     &qup->start_tag.start[off],
+					     &qup->start_tag, len, qup, 0, 0);
+			if (ret)
+				return ret;
+
+			off += len;
+			/* scratch buf to read the BAM EOT and FLUSH tags */
+			ret = qup_sg_set_buf(&qup->brx.sg[rx_buf++],
+					     &qup->brx.tag.start[0],
+					     &qup->brx.tag, 2,
+					     qup, 0, 0);
+			if (ret)
+				return ret;
+		} else {
+			tx_nents += (blocks * 2);
+
+			while (qup->blk.pos < blocks) {
+				tlen = (i == (blocks - 1)) ? rem : 0;
+				tags = &qup->start_tag.start[off + tx_len];
+				len = qup_i2c_set_tags(tags, qup, msg, 1);
+
+				ret = qup_sg_set_buf(&qup->btx.sg[tx_buf++],
+						     tags,
+						     &qup->start_tag, len,
+						     qup, 0, 0);
+				if (ret)
+					return ret;
+
+				tx_len += len;
+				ret = qup_sg_set_buf(&qup->btx.sg[tx_buf++],
+						     &msg->buf[limit * i],
+						     NULL, tlen, qup, 1,
+						     DMA_TO_DEVICE);
+				if (ret)
+					return ret;
+				i++;
+				qup->blk.pos = i;
+			}
+			off += tx_len;
+
+			if (idx == (num - 1)) {
+				len = 1;
+				if (rx_nents) {
+					qup->btx.tag.start[0] =
+							QUP_BAM_INPUT_EOT;
+					len++;
+				}
+				qup->btx.tag.start[len - 1] =
+							QUP_BAM_FLUSH_STOP;
+				ret = qup_sg_set_buf(&qup->btx.sg[tx_buf++],
+						     &qup->btx.tag.start[0],
+						     &qup->btx.tag, len,
+						     qup, 0, 0);
+				if (ret)
+					return ret;
+				tx_nents += 1;
+			}
+		}
+		idx++;
+		msg++;
+	}
+
+	txd = dmaengine_prep_slave_sg(qup->btx.dma, qup->btx.sg, tx_nents,
+				      DMA_MEM_TO_DEV,
+				      DMA_PREP_INTERRUPT | DMA_PREP_FENCE);
+	if (!txd) {
+		dev_err(qup->dev, "failed to get tx desc\n");
+		ret = -EINVAL;
+		goto desc_err;
+	}
+
+	if (!rx_nents) {
+		txd->callback = qup_i2c_bam_cb;
+		txd->callback_param = qup;
+	}
+
+	cookie_tx = dmaengine_submit(txd);
+	if (dma_submit_error(cookie_tx)) {
+		ret = -EINVAL;
+		goto desc_err;
+	}
+
+	dma_async_issue_pending(qup->btx.dma);
+
+	if (rx_nents) {
+		rxd = dmaengine_prep_slave_sg(qup->brx.dma, qup->brx.sg,
+					      rx_nents, DMA_DEV_TO_MEM,
+					      DMA_PREP_INTERRUPT);
+		if (!rxd) {
+			dev_err(qup->dev, "failed to get rx desc\n");
+			ret = -EINVAL;
+
+			/* abort TX descriptors */
+			dmaengine_terminate_all(qup->btx.dma);
+			goto desc_err;
+		}
+
+		rxd->callback = qup_i2c_bam_cb;
+		rxd->callback_param = qup;
+		cookie_rx = dmaengine_submit(rxd);
+		if (dma_submit_error(cookie_rx)) {
+			ret = -EINVAL;
+			goto desc_err;
+		}
+
+		dma_async_issue_pending(qup->brx.dma);
+	}
+
+	if (!wait_for_completion_timeout(&qup->xfer, TOUT_MAX * HZ)) {
+		dev_err(qup->dev, "normal trans timed out\n");
+		ret = -ETIMEDOUT;
+	}
+
+	if (ret || qup->bus_err || qup->qup_err) {
+		if (qup->bus_err & QUP_I2C_NACK_FLAG) {
+			msg--;
+			dev_err(qup->dev, "NACK from %x\n", msg->addr);
+			ret = -EIO;
+
+			if (qup_i2c_change_state(qup, QUP_RUN_STATE)) {
+				dev_err(qup->dev, "change to run state timed out");
+				return ret;
+			}
+
+			if (rx_nents)
+				writel(QUP_BAM_INPUT_EOT,
+				       qup->base + QUP_OUT_FIFO_BASE);
+
+			writel(QUP_BAM_FLUSH_STOP,
+			       qup->base + QUP_OUT_FIFO_BASE);
+
+			qup_i2c_flush(qup);
+
+			/* wait for remaining interrupts to occur */
+			if (!wait_for_completion_timeout(&qup->xfer, HZ))
+				dev_err(qup->dev, "flush timed out\n");
+
+			qup_i2c_rel_dma(qup);
+		}
+	}
+
+	dma_unmap_sg(qup->dev, qup->btx.sg, tx_nents, DMA_TO_DEVICE);
+
+	if (rx_nents)
+		dma_unmap_sg(qup->dev, qup->brx.sg, rx_nents,
+			     DMA_FROM_DEVICE);
+desc_err:
+	return ret;
+}
+
+static int qup_i2c_bam_xfer(struct i2c_adapter *adap, struct i2c_msg *msg,
+			    int num)
+{
+	struct qup_i2c_dev *qup = i2c_get_adapdata(adap);
+	int ret = 0;
+
+	enable_irq(qup->irq);
+	ret = qup_i2c_req_dma(qup);
+
+	if (ret)
+		goto out;
+
+	qup->bus_err = 0;
+	qup->qup_err = 0;
+
+	writel(0, qup->base + QUP_MX_INPUT_CNT);
+	writel(0, qup->base + QUP_MX_OUTPUT_CNT);
+
+	/* set BAM mode */
+	writel(QUP_REPACK_EN | QUP_BAM_MODE, qup->base + QUP_IO_MODE);
+
+	/* mask fifo irqs */
+	writel((0x3 << 8), qup->base + QUP_OPERATIONAL_MASK);
+
+	/* set RUN STATE */
+	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+	if (ret)
+		goto out;
+
+	writel(qup->clk_ctl, qup->base + QUP_I2C_CLK_CTL);
+
+	qup->msg = msg;
+	ret = qup_i2c_bam_do_xfer(qup, qup->msg, num);
+out:
+	disable_irq(qup->irq);
+
+	qup->msg = NULL;
+	return ret;
+}
+
+static int qup_i2c_wait_for_complete(struct qup_i2c_dev *qup,
+				     struct i2c_msg *msg)
 {
 	unsigned long left;
+	int ret = 0;
+
+	left = wait_for_completion_timeout(&qup->xfer, HZ);
+	if (!left) {
+		writel(1, qup->base + QUP_SW_RESET);
+		ret = -ETIMEDOUT;
+	}
+
+	if (qup->bus_err || qup->qup_err) {
+		if (qup->bus_err & QUP_I2C_NACK_FLAG) {
+			dev_err(qup->dev, "NACK from %x\n", msg->addr);
+			ret = -EIO;
+		}
+	}
+
+	return ret;
+}
+
+static int qup_i2c_write_one_v2(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+{
+	int ret = 0;
+
+	qup->msg = msg;
+	qup->pos = 0;
+	enable_irq(qup->irq);
+	qup_i2c_set_blk_data(qup, msg);
+	qup_i2c_set_write_mode_v2(qup, msg);
+
+	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+	if (ret)
+		goto err;
+
+	writel(qup->clk_ctl, qup->base + QUP_I2C_CLK_CTL);
+
+	do {
+		ret = qup_i2c_issue_xfer_v2(qup, msg);
+		if (ret)
+			goto err;
+
+		ret = qup_i2c_wait_for_complete(qup, msg);
+		if (ret)
+			goto err;
+
+		qup->blk.pos++;
+	} while (qup->blk.pos < qup->blk.count);
+
+	ret = qup_i2c_wait_ready(qup, QUP_OUT_NOT_EMPTY, RESET_BIT, ONE_BYTE);
+
+err:
+	disable_irq(qup->irq);
+	qup->msg = NULL;
+
+	return ret;
+}
+
+static int qup_i2c_write_one(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+{
 	int ret;
 
 	qup->msg = msg;
@@ -325,30 +958,21 @@ static int qup_i2c_write_one(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 		if (ret)
 			goto err;
 
-		qup_i2c_issue_write(qup, msg);
-
-		ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+		ret = qup_i2c_issue_write(qup, msg);
 		if (ret)
 			goto err;
 
-		left = wait_for_completion_timeout(&qup->xfer, HZ);
-		if (!left) {
-			writel(1, qup->base + QUP_SW_RESET);
-			ret = -ETIMEDOUT;
+		ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+		if (ret)
 			goto err;
-		}
 
-		if (qup->bus_err || qup->qup_err) {
-			if (qup->bus_err & QUP_I2C_NACK_FLAG)
-				dev_err(qup->dev, "NACK from %x\n", msg->addr);
-			ret = -EIO;
+		ret = qup_i2c_wait_for_complete(qup, msg);
+		if (ret)
 			goto err;
-		}
 	} while (qup->pos < msg->len);
 
 	/* Wait for the outstanding data in the fifo to drain */
-	ret = qup_i2c_wait_writeready(qup);
-
+	ret = qup_i2c_wait_ready(qup, QUP_OUT_NOT_EMPTY, RESET_BIT, ONE_BYTE);
 err:
 	disable_irq(qup->irq);
 	qup->msg = NULL;
@@ -370,6 +994,28 @@ static void qup_i2c_set_read_mode(struct qup_i2c_dev *qup, int len)
 	}
 }
 
+static void qup_i2c_set_read_mode_v2(struct qup_i2c_dev *qup, int len)
+{
+	int tx_len = qup->blk.tx_tag_len;
+
+	len += qup->blk.rx_tag_len;
+	len |= qup->config_run;
+	tx_len |= qup->config_run;
+
+	if (len < qup->in_fifo_sz) {
+		/* FIFO mode */
+		writel(QUP_REPACK_EN, qup->base + QUP_IO_MODE);
+		writel(tx_len, qup->base + QUP_MX_WRITE_CNT);
+		writel(len, qup->base + QUP_MX_READ_CNT);
+	} else {
+		/* BLOCK mode (transfer data on chunks) */
+		writel(QUP_INPUT_BLK_MODE | QUP_REPACK_EN,
+		       qup->base + QUP_IO_MODE);
+		writel(tx_len, qup->base + QUP_MX_OUTPUT_CNT);
+		writel(len, qup->base + QUP_MX_INPUT_CNT);
+	}
+}
+
 static void qup_i2c_issue_read(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 {
 	u32 addr, len, val;
@@ -384,18 +1030,19 @@ static void qup_i2c_issue_read(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 }
 
 
-static void qup_i2c_read_fifo(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+static int qup_i2c_read_fifo(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 {
-	u32 opflags;
 	u32 val = 0;
 	int idx;
+	int ret = 0;
 
 	for (idx = 0; qup->pos < msg->len; idx++) {
 		if ((idx & 1) == 0) {
 			/* Check that FIFO have data */
-			opflags = readl(qup->base + QUP_OPERATIONAL);
-			if (!(opflags & QUP_IN_NOT_EMPTY))
-				break;
+			ret = qup_i2c_wait_ready(qup, QUP_IN_NOT_EMPTY,
+						 SET_BIT, 4 * ONE_BYTE);
+			if (ret)
+				return ret;
 
 			/* Reading 2 words at time */
 			val = readl(qup->base + QUP_IN_FIFO_BASE);
@@ -405,18 +1052,94 @@ static void qup_i2c_read_fifo(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 			msg->buf[qup->pos++] = val >> QUP_MSW_SHIFT;
 		}
 	}
+
+	return ret;
+}
+
+static int qup_i2c_read_fifo_v2(struct qup_i2c_dev *qup,
+				struct i2c_msg *msg)
+{
+	u32 val;
+	int idx, pos = 0, ret = 0, total;
+
+	total = qup_i2c_get_data_len(qup);
+
+	/* 2 extra bytes for read tags */
+	while (pos < (total + 2)) {
+		/* Check that FIFO have data */
+		ret = qup_i2c_wait_ready(qup, QUP_IN_NOT_EMPTY,
+					 SET_BIT, 4 * ONE_BYTE);
+		if (ret) {
+			dev_err(qup->dev, "timeout for fifo not empty");
+			return ret;
+		}
+		val = readl(qup->base + QUP_IN_FIFO_BASE);
+
+		for (idx = 0; idx < 4; idx++, val >>= 8, pos++) {
+			/* first 2 bytes are tag bytes */
+			if (pos < 2)
+				continue;
+
+			if (pos >= (total + 2))
+				goto out;
+
+			msg->buf[qup->pos++] = val & 0xff;
+		}
+	}
+
+out:
+	qup->blk.data_len -= total;
+
+	return ret;
+}
+
+static int qup_i2c_read_one_v2(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+{
+	int ret = 0;
+
+	qup->msg = msg;
+	qup->pos  = 0;
+	enable_irq(qup->irq);
+	qup_i2c_set_blk_data(qup, msg);
+	qup_i2c_set_read_mode_v2(qup, msg->len);
+
+	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+	if (ret)
+		goto err;
+
+	writel(qup->clk_ctl, qup->base + QUP_I2C_CLK_CTL);
+
+	do {
+		ret = qup_i2c_issue_xfer_v2(qup, msg);
+		if (ret)
+			goto err;
+
+		ret = qup_i2c_wait_for_complete(qup, msg);
+		if (ret)
+			goto err;
+
+		ret = qup_i2c_read_fifo_v2(qup, msg);
+		if (ret)
+			goto err;
+
+		qup->blk.pos++;
+	} while (qup->blk.pos < qup->blk.count);
+
+err:
+	disable_irq(qup->irq);
+	qup->msg = NULL;
+
+	return ret;
 }
 
 static int qup_i2c_read_one(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 {
-	unsigned long left;
 	int ret;
 
 	qup->msg = msg;
 	qup->pos  = 0;
 
 	enable_irq(qup->irq);
-
 	qup_i2c_set_read_mode(qup, msg->len);
 
 	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
@@ -436,21 +1159,13 @@ static int qup_i2c_read_one(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 		goto err;
 
 	do {
-		left = wait_for_completion_timeout(&qup->xfer, HZ);
-		if (!left) {
-			writel(1, qup->base + QUP_SW_RESET);
-			ret = -ETIMEDOUT;
+		ret = qup_i2c_wait_for_complete(qup, msg);
+		if (ret)
 			goto err;
-		}
 
-		if (qup->bus_err || qup->qup_err) {
-			if (qup->bus_err & QUP_I2C_NACK_FLAG)
-				dev_err(qup->dev, "NACK from %x\n", msg->addr);
-			ret = -EIO;
+		ret = qup_i2c_read_fifo(qup, msg);
+		if (ret)
 			goto err;
-		}
-
-		qup_i2c_read_fifo(qup, msg);
 	} while (qup->pos < msg->len);
 
 err:
@@ -513,6 +1228,87 @@ out:
 	return ret;
 }
 
+static int qup_i2c_xfer_v2(struct i2c_adapter *adap,
+			   struct i2c_msg msgs[],
+			   int num)
+{
+	struct qup_i2c_dev *qup = i2c_get_adapdata(adap);
+	int ret, len, idx = 0, use_dma = 0;
+
+	ret = pm_runtime_get_sync(qup->dev);
+	if (ret < 0)
+		goto out;
+
+	writel(1, qup->base + QUP_SW_RESET);
+	ret = qup_i2c_poll_state(qup, QUP_RESET_STATE);
+	if (ret)
+		goto out;
+
+	/* Configure QUP as I2C mini core */
+	writel(I2C_MINI_CORE | I2C_N_VAL_V2, qup->base + QUP_CONFIG);
+	writel(QUP_V2_TAGS_EN, qup->base + QUP_I2C_MASTER_GEN);
+
+	if ((qup->is_dma)) {
+		/* All i2c_msgs should be transferred using either dma or cpu */
+		for (idx = 0; idx < num; idx++) {
+			if (msgs[idx].len == 0) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			len = (msgs[idx].len > qup->out_fifo_sz) ||
+			      (msgs[idx].len > qup->in_fifo_sz);
+
+			if ((!is_vmalloc_addr(msgs[idx].buf)) && len) {
+				use_dma = 1;
+			 } else {
+				use_dma = 0;
+				break;
+			}
+		}
+	}
+
+	do {
+		if (msgs[idx].len == 0) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (qup_i2c_poll_state_i2c_master(qup)) {
+			ret = -EIO;
+			goto out;
+		}
+
+		qup->is_last = (idx == (num - 1));
+		if (idx)
+			qup->config_run = QUP_I2C_MX_CONFIG_DURING_RUN;
+		else
+			qup->config_run = 0;
+
+		reinit_completion(&qup->xfer);
+
+		if (use_dma) {
+			ret = qup_i2c_bam_xfer(adap, &msgs[idx], num);
+		} else {
+			if (msgs[idx].flags & I2C_M_RD)
+				ret = qup_i2c_read_one_v2(qup, &msgs[idx]);
+			else
+				ret = qup_i2c_write_one_v2(qup, &msgs[idx]);
+		}
+	} while ((idx++ < (num - 1)) && !use_dma && !ret);
+
+	if (!ret)
+		ret = qup_i2c_change_state(qup, QUP_RESET_STATE);
+
+	if (ret == 0)
+		ret = num;
+out:
+	pm_runtime_mark_last_busy(qup->dev);
+	pm_runtime_put_autosuspend(qup->dev);
+
+	return ret;
+}
+
 static u32 qup_i2c_func(struct i2c_adapter *adap)
 {
 	return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
@@ -523,6 +1319,11 @@ static const struct i2c_algorithm qup_i2c_algo = {
 	.functionality	= qup_i2c_func,
 };
 
+static const struct i2c_algorithm qup_i2c_algo_v2 = {
+	.master_xfer	= qup_i2c_xfer_v2,
+	.functionality	= qup_i2c_func,
+};
+
 /*
  * The QUP block will issue a NACK and STOP on the bus when reaching
  * the end of the read, the length of the read is specified as one byte
@@ -561,6 +1362,7 @@ static int qup_i2c_probe(struct platform_device *pdev)
 	int ret, fs_div, hs_div;
 	int src_clk_freq;
 	u32 clk_freq = 100000;
+	int blocks;
 
 	qup = devm_kzalloc(&pdev->dev, sizeof(*qup), GFP_KERNEL);
 	if (!qup)
@@ -572,6 +1374,68 @@ static int qup_i2c_probe(struct platform_device *pdev)
 
 	of_property_read_u32(node, "clock-frequency", &clk_freq);
 
+	if (of_device_is_compatible(pdev->dev.of_node, "qcom,i2c-qup-v1.1.1")) {
+		qup->adap.algo = &qup_i2c_algo;
+		qup->adap.quirks = &qup_i2c_quirks;
+	} else {
+		qup->adap.algo = &qup_i2c_algo_v2;
+		ret = qup_i2c_req_dma(qup);
+
+		if (ret == -EPROBE_DEFER)
+			goto fail_dma;
+		else if (ret != 0)
+			goto nodma;
+
+		blocks = (MX_BLOCKS << 1) + 1;
+		qup->btx.sg = devm_kzalloc(&pdev->dev,
+					   sizeof(*qup->btx.sg) * blocks,
+					   GFP_KERNEL);
+		if (!qup->btx.sg) {
+			ret = -ENOMEM;
+			goto fail_dma;
+		}
+		sg_init_table(qup->btx.sg, blocks);
+
+		qup->brx.sg = devm_kzalloc(&pdev->dev,
+					   sizeof(*qup->brx.sg) * blocks,
+					   GFP_KERNEL);
+		if (!qup->brx.sg) {
+			ret = -ENOMEM;
+			goto fail_dma;
+		}
+		sg_init_table(qup->brx.sg, blocks);
+
+		/* 2 tag bytes for each block + 5 for start, stop tags */
+		size = blocks * 2 + 5;
+		qup->dpool = dma_pool_create("qup_i2c-dma-pool", &pdev->dev,
+					     size, 4, 0);
+
+		qup->start_tag.start = dma_pool_alloc(qup->dpool, GFP_KERNEL,
+						      &qup->start_tag.addr);
+		if (!qup->start_tag.start) {
+			ret = -ENOMEM;
+			goto fail_dma;
+		}
+
+		qup->brx.tag.start = dma_pool_alloc(qup->dpool,
+						    GFP_KERNEL,
+						    &qup->brx.tag.addr);
+		if (!qup->brx.tag.start) {
+			ret = -ENOMEM;
+			goto fail_dma;
+		}
+
+		qup->btx.tag.start = dma_pool_alloc(qup->dpool,
+						    GFP_KERNEL,
+						    &qup->btx.tag.addr);
+		if (!qup->btx.tag.start) {
+			ret = -ENOMEM;
+			goto fail_dma;
+		}
+		qup->is_dma = true;
+	}
+
+nodma:
 	/* We support frequencies up to FAST Mode (400KHz) */
 	if (!clk_freq || clk_freq > 400000) {
 		dev_err(qup->dev, "clock frequency not supported %d\n",
@@ -667,10 +1531,10 @@ static int qup_i2c_probe(struct platform_device *pdev)
 		qup->out_blk_sz, qup->out_fifo_sz);
 
 	i2c_set_adapdata(&qup->adap, qup);
-	qup->adap.algo = &qup_i2c_algo;
-	qup->adap.quirks = &qup_i2c_quirks;
 	qup->adap.dev.parent = qup->dev;
 	qup->adap.dev.of_node = pdev->dev.of_node;
+	qup->is_last = true;
+
 	strlcpy(qup->adap.name, "QUP I2C adapter", sizeof(qup->adap.name));
 
 	pm_runtime_set_autosuspend_delay(qup->dev, MSEC_PER_SEC);
@@ -689,6 +1553,11 @@ fail_runtime:
 	pm_runtime_set_suspended(qup->dev);
 fail:
 	qup_i2c_disable_clocks(qup);
+fail_dma:
+	if (qup->btx.dma)
+		dma_release_channel(qup->btx.dma);
+	if (qup->brx.dma)
+		dma_release_channel(qup->brx.dma);
 	return ret;
 }
 
@@ -696,6 +1565,18 @@ static int qup_i2c_remove(struct platform_device *pdev)
 {
 	struct qup_i2c_dev *qup = platform_get_drvdata(pdev);
 
+	if (qup->is_dma) {
+		dma_pool_free(qup->dpool, qup->start_tag.start,
+			      qup->start_tag.addr);
+		dma_pool_free(qup->dpool, qup->brx.tag.start,
+			      qup->brx.tag.addr);
+		dma_pool_free(qup->dpool, qup->btx.tag.start,
+			      qup->btx.tag.addr);
+		dma_pool_destroy(qup->dpool);
+		dma_release_channel(qup->btx.dma);
+		dma_release_channel(qup->brx.dma);
+	}
+
 	disable_irq(qup->irq);
 	qup_i2c_disable_clocks(qup);
 	i2c_del_adapter(&qup->adap);
diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 1abeadc8ab79..68ecb5630ad5 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -611,7 +611,7 @@ static int rcar_i2c_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->io))
 		return PTR_ERR(priv->io);
 
-	priv->devtype = (enum rcar_i2c_type)of_match_device(rcar_i2c_dt_ids, dev)->data;
+	priv->devtype = (enum rcar_i2c_type)of_device_get_match_data(dev);
 	init_waitqueue_head(&priv->wait);
 
 	adap = &priv->adap;
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index a0522fcc4ff8..929185a7296c 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -805,9 +805,7 @@ static int tegra_i2c_probe(struct platform_device *pdev)
 	i2c_dev->hw = &tegra20_i2c_hw;
 
 	if (pdev->dev.of_node) {
-		const struct of_device_id *match;
-		match = of_match_device(tegra_i2c_of_match, &pdev->dev);
-		i2c_dev->hw = match->data;
+		i2c_dev->hw = of_device_get_match_data(&pdev->dev);
 		i2c_dev->is_dvc = of_device_is_compatible(pdev->dev.of_node,
 						"nvidia,tegra20-i2c-dvc");
 	} else if (pdev->id == 3) {
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 6efd20095d5d..74f54f2f471f 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -37,6 +37,8 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
 
 #define DRIVER_NAME "xiic-i2c"
 
@@ -66,6 +68,7 @@ enum xiic_endian {
  * @endianness: big/little-endian byte order
  */
 struct xiic_i2c {
+	struct device		*dev;
 	void __iomem		*base;
 	wait_queue_head_t	wait;
 	struct i2c_adapter	adap;
@@ -77,6 +80,7 @@ struct xiic_i2c {
 	struct i2c_msg		*rx_msg;
 	int			rx_pos;
 	enum xiic_endian	endianness;
+	struct clk *clk;
 };
 
 
@@ -164,6 +168,7 @@ struct xiic_i2c {
 
 #define XIIC_RESET_MASK             0xAUL
 
+#define XIIC_PM_TIMEOUT		1000	/* ms */
 /*
  * The following constant is used for the device global interrupt enable
  * register, to enable all interrupts for the device, this is the only bit
@@ -676,9 +681,13 @@ static int xiic_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 	dev_dbg(adap->dev.parent, "%s entry SR: 0x%x\n", __func__,
 		xiic_getreg8(i2c, XIIC_SR_REG_OFFSET));
 
+	err = pm_runtime_get_sync(i2c->dev);
+	if (err < 0)
+		return err;
+
 	err = xiic_busy(i2c);
 	if (err)
-		return err;
+		goto out;
 
 	i2c->tx_msg = msgs;
 	i2c->nmsgs = num;
@@ -686,14 +695,20 @@ static int xiic_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 	xiic_start_xfer(i2c);
 
 	if (wait_event_timeout(i2c->wait, (i2c->state == STATE_ERROR) ||
-		(i2c->state == STATE_DONE), HZ))
-		return (i2c->state == STATE_DONE) ? num : -EIO;
-	else {
+		(i2c->state == STATE_DONE), HZ)) {
+		err = (i2c->state == STATE_DONE) ? num : -EIO;
+		goto out;
+	} else {
 		i2c->tx_msg = NULL;
 		i2c->rx_msg = NULL;
 		i2c->nmsgs = 0;
-		return -ETIMEDOUT;
+		err = -ETIMEDOUT;
+		goto out;
 	}
+out:
+	pm_runtime_mark_last_busy(i2c->dev);
+	pm_runtime_put_autosuspend(i2c->dev);
+	return err;
 }
 
 static u32 xiic_func(struct i2c_adapter *adap)
@@ -748,13 +763,28 @@ static int xiic_i2c_probe(struct platform_device *pdev)
 	mutex_init(&i2c->lock);
 	init_waitqueue_head(&i2c->wait);
 
+	i2c->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(i2c->clk)) {
+		dev_err(&pdev->dev, "input clock not found.\n");
+		return PTR_ERR(i2c->clk);
+	}
+	ret = clk_prepare_enable(i2c->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to enable clock.\n");
+		return ret;
+	}
+	i2c->dev = &pdev->dev;
+	pm_runtime_enable(i2c->dev);
+	pm_runtime_set_autosuspend_delay(i2c->dev, XIIC_PM_TIMEOUT);
+	pm_runtime_use_autosuspend(i2c->dev);
+	pm_runtime_set_active(i2c->dev);
 	ret = devm_request_threaded_irq(&pdev->dev, irq, xiic_isr,
 					xiic_process, IRQF_ONESHOT,
 					pdev->name, i2c);
 
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Cannot claim IRQ\n");
-		return ret;
+		goto err_clk_dis;
 	}
 
 	/*
@@ -776,7 +806,7 @@ static int xiic_i2c_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to add adapter\n");
 		xiic_deinit(i2c);
-		return ret;
+		goto err_clk_dis;
 	}
 
 	if (pdata) {
@@ -786,16 +816,30 @@ static int xiic_i2c_probe(struct platform_device *pdev)
 	}
 
 	return 0;
+
+err_clk_dis:
+	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	clk_disable_unprepare(i2c->clk);
+	return ret;
 }
 
 static int xiic_i2c_remove(struct platform_device *pdev)
 {
 	struct xiic_i2c *i2c = platform_get_drvdata(pdev);
+	int ret;
 
 	/* remove adapter & data */
 	i2c_del_adapter(&i2c->adap);
 
+	ret = clk_prepare_enable(i2c->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to enable clock.\n");
+		return ret;
+	}
 	xiic_deinit(i2c);
+	clk_disable_unprepare(i2c->clk);
+	pm_runtime_disable(&pdev->dev);
 
 	return 0;
 }
@@ -808,12 +852,42 @@ static const struct of_device_id xiic_of_match[] = {
 MODULE_DEVICE_TABLE(of, xiic_of_match);
 #endif
 
+static int __maybe_unused cdns_i2c_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct xiic_i2c *i2c = platform_get_drvdata(pdev);
+
+	clk_disable(i2c->clk);
+
+	return 0;
+}
+
+static int __maybe_unused cdns_i2c_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct xiic_i2c *i2c = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = clk_enable(i2c->clk);
+	if (ret) {
+		dev_err(dev, "Cannot enable clock.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops xiic_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(cdns_i2c_runtime_suspend,
+			   cdns_i2c_runtime_resume, NULL)
+};
 static struct platform_driver xiic_i2c_driver = {
 	.probe   = xiic_i2c_probe,
 	.remove  = xiic_i2c_remove,
 	.driver  = {
 		.name = DRIVER_NAME,
 		.of_match_table = of_match_ptr(xiic_of_match),
+		.pm = &xiic_dev_pm_ops,
 	},
 };
 
diff --git a/drivers/i2c/i2c-boardinfo.c b/drivers/i2c/i2c-boardinfo.c
index 90e322959303..e33022e2d459 100644
--- a/drivers/i2c/i2c-boardinfo.c
+++ b/drivers/i2c/i2c-boardinfo.c
@@ -12,11 +12,11 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/kernel.h>
-#include <linux/i2c.h>
-#include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
 #include <linux/rwsem.h>
+#include <linux/slab.h>
 
 #include "i2c-core.h"
 
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index ffe715d346d8..0f2f8484e8ec 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -28,32 +28,32 @@
  */
 
 #include <dt-bindings/i2c/i2c.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include <linux/acpi.h>
+#include <linux/clk/clk-conf.h>
+#include <linux/completion.h>
 #include <linux/delay.h>
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/gpio.h>
-#include <linux/slab.h>
+#include <linux/hardirq.h>
 #include <linux/i2c.h>
-#include <linux/init.h>
 #include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/irqflags.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/clk/clk-conf.h>
-#include <linux/completion.h>
-#include <linux/hardirq.h>
-#include <linux/irqflags.h>
-#include <linux/rwsem.h>
-#include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
 #include <linux/pm_wakeirq.h>
-#include <linux/acpi.h>
-#include <linux/jump_label.h>
-#include <asm/uaccess.h>
-#include <linux/err.h>
 #include <linux/property.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
 
 #include "i2c-core.h"
 
@@ -73,6 +73,7 @@ static struct device_type i2c_client_type;
 static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);
 
 static struct static_key i2c_trace_msg = STATIC_KEY_INIT_FALSE;
+static bool is_registered;
 
 void i2c_transfer_trace_reg(void)
 {
@@ -1529,7 +1530,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
 	int res = 0;
 
 	/* Can't register until after driver model init */
-	if (unlikely(WARN_ON(!i2c_bus_type.p))) {
+	if (WARN_ON(!is_registered)) {
 		res = -EAGAIN;
 		goto out_list;
 	}
@@ -1926,7 +1927,7 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
 	int res;
 
 	/* Can't register until after driver model init */
-	if (unlikely(WARN_ON(!i2c_bus_type.p)))
+	if (WARN_ON(!is_registered))
 		return -EAGAIN;
 
 	/* add the driver to the list of i2c drivers in the driver core */
@@ -2104,6 +2105,9 @@ static int __init i2c_init(void)
 	retval = bus_register(&i2c_bus_type);
 	if (retval)
 		return retval;
+
+	is_registered = true;
+
 #ifdef CONFIG_I2C_COMPAT
 	i2c_adapter_compat_class = class_compat_register("i2c-adapter");
 	if (!i2c_adapter_compat_class) {
@@ -2125,6 +2129,7 @@ class_err:
 	class_compat_unregister(i2c_adapter_compat_class);
 bus_err:
 #endif
+	is_registered = false;
 	bus_unregister(&i2c_bus_type);
 	return retval;
 }
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 2413ec9f8207..0b1108d3c2f3 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -22,17 +22,17 @@
 
 /* The I2C_RDWR ioctl code is written by Kolja Waschk <waschk@telos.de> */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/device.h>
-#include <linux/notifier.h>
 #include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/i2c.h>
 #include <linux/i2c-dev.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
 #include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
 #include <linux/uaccess.h>
 
 /*
diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
index 00fc5b1c7b66..d4022878b2f0 100644
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -19,13 +19,13 @@
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
+#include <linux/acpi.h>
 #include <linux/i2c.h>
 #include <linux/i2c-mux.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of.h>
-#include <linux/acpi.h>
+#include <linux/slab.h>
 
 /* multiplexer per channel data */
 struct i2c_mux_priv {
diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c
index 94765a81970d..abb55d3e76f3 100644
--- a/drivers/i2c/i2c-smbus.c
+++ b/drivers/i2c/i2c-smbus.c
@@ -15,14 +15,14 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/device.h>
-#include <linux/interrupt.h>
-#include <linux/workqueue.h>
 #include <linux/i2c.h>
 #include <linux/i2c-smbus.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/workqueue.h>
 
 struct i2c_smbus_alert {
 	unsigned int		alert_edge_triggered:1;
diff --git a/drivers/i2c/i2c-stub.c b/drivers/i2c/i2c-stub.c
index af2a94e1140b..06af583d5101 100644
--- a/drivers/i2c/i2c-stub.c
+++ b/drivers/i2c/i2c-stub.c
@@ -17,13 +17,13 @@
 
 #define DEBUG 1
 
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
 
 #define MAX_CHIPS 10
 
diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig
index f06b0e24673b..e280c8ecc0b5 100644
--- a/drivers/i2c/muxes/Kconfig
+++ b/drivers/i2c/muxes/Kconfig
@@ -72,4 +72,13 @@ config I2C_MUX_REG
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-mux-reg.
 
+config I2C_DEMUX_PINCTRL
+	tristate "pinctrl-based I2C demultiplexer"
+	depends on PINCTRL && OF
+	select OF_DYNAMIC
+	help
+	  If you say yes to this option, support will be included for an I2C
+	  demultiplexer that uses the pinctrl subsystem. This is useful if you
+	  want to change the I2C master at run-time depending on features.
+
 endmenu
diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile
index e89799b76a92..7c267c29b191 100644
--- a/drivers/i2c/muxes/Makefile
+++ b/drivers/i2c/muxes/Makefile
@@ -3,6 +3,8 @@
 
 obj-$(CONFIG_I2C_ARB_GPIO_CHALLENGE)	+= i2c-arb-gpio-challenge.o
 
+obj-$(CONFIG_I2C_DEMUX_PINCTRL)		+= i2c-demux-pinctrl.o
+
 obj-$(CONFIG_I2C_MUX_GPIO)	+= i2c-mux-gpio.o
 obj-$(CONFIG_I2C_MUX_PCA9541)	+= i2c-mux-pca9541.o
 obj-$(CONFIG_I2C_MUX_PCA954x)	+= i2c-mux-pca954x.o
diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c
new file mode 100644
index 000000000000..7748a0a5ddb9
--- /dev/null
+++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c
@@ -0,0 +1,272 @@
+/*
+ * Pinctrl based I2C DeMultiplexer
+ *
+ * Copyright (C) 2015-16 by Wolfram Sang, Sang Engineering <wsa@sang-engineering.com>
+ * Copyright (C) 2015-16 by Renesas Electronics Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * See the bindings doc for DTS setup and the sysfs doc for usage information.
+ * (look for filenames containing 'i2c-demux-pinctrl' in Documentation/)
+ */
+
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+
+struct i2c_demux_pinctrl_chan {
+	struct device_node *parent_np;
+	struct i2c_adapter *parent_adap;
+	struct of_changeset chgset;
+};
+
+struct i2c_demux_pinctrl_priv {
+	int cur_chan;
+	int num_chan;
+	struct device *dev;
+	const char *bus_name;
+	struct i2c_adapter cur_adap;
+	struct i2c_algorithm algo;
+	struct i2c_demux_pinctrl_chan chan[];
+};
+
+static struct property status_okay = { .name = "status", .length = 3, .value = "ok" };
+
+static int i2c_demux_master_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
+{
+	struct i2c_demux_pinctrl_priv *priv = adap->algo_data;
+	struct i2c_adapter *parent = priv->chan[priv->cur_chan].parent_adap;
+
+	return __i2c_transfer(parent, msgs, num);
+}
+
+static u32 i2c_demux_functionality(struct i2c_adapter *adap)
+{
+	struct i2c_demux_pinctrl_priv *priv = adap->algo_data;
+	struct i2c_adapter *parent = priv->chan[priv->cur_chan].parent_adap;
+
+	return parent->algo->functionality(parent);
+}
+
+static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 new_chan)
+{
+	struct i2c_adapter *adap;
+	struct pinctrl *p;
+	int ret;
+
+	ret = of_changeset_apply(&priv->chan[new_chan].chgset);
+	if (ret)
+		goto err;
+
+	adap = of_find_i2c_adapter_by_node(priv->chan[new_chan].parent_np);
+	if (!adap) {
+		ret = -ENODEV;
+		goto err;
+	}
+
+	p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name);
+	if (IS_ERR(p)) {
+		ret = PTR_ERR(p);
+		goto err_with_put;
+	}
+
+	priv->chan[new_chan].parent_adap = adap;
+	priv->cur_chan = new_chan;
+
+	/* Now fill out current adapter structure. cur_chan must be up to date */
+	priv->algo.master_xfer = i2c_demux_master_xfer;
+	priv->algo.functionality = i2c_demux_functionality;
+
+	snprintf(priv->cur_adap.name, sizeof(priv->cur_adap.name),
+		 "i2c-demux (master i2c-%d)", i2c_adapter_id(adap));
+	priv->cur_adap.owner = THIS_MODULE;
+	priv->cur_adap.algo = &priv->algo;
+	priv->cur_adap.algo_data = priv;
+	priv->cur_adap.dev.parent = priv->dev;
+	priv->cur_adap.class = adap->class;
+	priv->cur_adap.retries = adap->retries;
+	priv->cur_adap.timeout = adap->timeout;
+	priv->cur_adap.quirks = adap->quirks;
+	priv->cur_adap.dev.of_node = priv->dev->of_node;
+	ret = i2c_add_adapter(&priv->cur_adap);
+	if (ret < 0)
+		goto err_with_put;
+
+	return 0;
+
+ err_with_put:
+	i2c_put_adapter(adap);
+ err:
+	dev_err(priv->dev, "failed to setup demux-adapter %d (%d)\n", new_chan, ret);
+	return ret;
+}
+
+static int i2c_demux_deactivate_master(struct i2c_demux_pinctrl_priv *priv)
+{
+	int ret, cur = priv->cur_chan;
+
+	if (cur < 0)
+		return 0;
+
+	i2c_del_adapter(&priv->cur_adap);
+	i2c_put_adapter(priv->chan[cur].parent_adap);
+
+	ret = of_changeset_revert(&priv->chan[cur].chgset);
+
+	priv->chan[cur].parent_adap = NULL;
+	priv->cur_chan = -EINVAL;
+
+	return ret;
+}
+
+static int i2c_demux_change_master(struct i2c_demux_pinctrl_priv *priv, u32 new_chan)
+{
+	int ret;
+
+	if (new_chan == priv->cur_chan)
+		return 0;
+
+	ret = i2c_demux_deactivate_master(priv);
+	if (ret)
+		return ret;
+
+	return i2c_demux_activate_master(priv, new_chan);
+}
+
+static ssize_t cur_master_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct i2c_demux_pinctrl_priv *priv = dev_get_drvdata(dev);
+	int count = 0, i;
+
+	for (i = 0; i < priv->num_chan && count < PAGE_SIZE; i++)
+		count += scnprintf(buf + count, PAGE_SIZE - count, "%c %d - %s\n",
+				 i == priv->cur_chan ? '*' : ' ', i,
+				 priv->chan[i].parent_np->full_name);
+
+	return count;
+}
+
+static ssize_t cur_master_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct i2c_demux_pinctrl_priv *priv = dev_get_drvdata(dev);
+	unsigned int val;
+	int ret;
+
+	ret = kstrtouint(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	if (val >= priv->num_chan)
+		return -EINVAL;
+
+	ret = i2c_demux_change_master(priv, val);
+
+	return ret < 0 ? ret : count;
+}
+static DEVICE_ATTR_RW(cur_master);
+
+static int i2c_demux_pinctrl_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct i2c_demux_pinctrl_priv *priv;
+	int num_chan, i, j, err;
+
+	num_chan = of_count_phandle_with_args(np, "i2c-parent", NULL);
+	if (num_chan < 2) {
+		dev_err(&pdev->dev, "Need at least two I2C masters to switch\n");
+		return -EINVAL;
+	}
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv)
+			   + num_chan * sizeof(struct i2c_demux_pinctrl_chan), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	err = of_property_read_string(np, "i2c-bus-name", &priv->bus_name);
+	if (err)
+		return err;
+
+	for (i = 0; i < num_chan; i++) {
+		struct device_node *adap_np;
+
+		adap_np = of_parse_phandle(np, "i2c-parent", i);
+		if (!adap_np) {
+			dev_err(&pdev->dev, "can't get phandle for parent %d\n", i);
+			err = -ENOENT;
+			goto err_rollback;
+		}
+		priv->chan[i].parent_np = adap_np;
+
+		of_changeset_init(&priv->chan[i].chgset);
+		of_changeset_update_property(&priv->chan[i].chgset, adap_np, &status_okay);
+	}
+
+	priv->num_chan = num_chan;
+	priv->dev = &pdev->dev;
+
+	platform_set_drvdata(pdev, priv);
+
+	/* switch to first parent as active master */
+	i2c_demux_activate_master(priv, 0);
+
+	err = device_create_file(&pdev->dev, &dev_attr_cur_master);
+	if (err)
+		goto err_rollback;
+
+	return 0;
+
+err_rollback:
+	for (j = 0; j < i; j++) {
+		of_node_put(priv->chan[j].parent_np);
+		of_changeset_destroy(&priv->chan[j].chgset);
+	}
+
+	return err;
+}
+
+static int i2c_demux_pinctrl_remove(struct platform_device *pdev)
+{
+	struct i2c_demux_pinctrl_priv *priv = platform_get_drvdata(pdev);
+	int i;
+
+	device_remove_file(&pdev->dev, &dev_attr_cur_master);
+
+	i2c_demux_deactivate_master(priv);
+
+	for (i = 0; i < priv->num_chan; i++) {
+		of_node_put(priv->chan[i].parent_np);
+		of_changeset_destroy(&priv->chan[i].chgset);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id i2c_demux_pinctrl_of_match[] = {
+	{ .compatible = "i2c-demux-pinctrl", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, i2c_demux_pinctrl_of_match);
+
+static struct platform_driver i2c_demux_pinctrl_driver = {
+	.driver	= {
+		.name = "i2c-demux-pinctrl",
+		.of_match_table = i2c_demux_pinctrl_of_match,
+	},
+	.probe	= i2c_demux_pinctrl_probe,
+	.remove	= i2c_demux_pinctrl_remove,
+};
+module_platform_driver(i2c_demux_pinctrl_driver);
+
+MODULE_DESCRIPTION("pinctrl-based I2C demux driver");
+MODULE_AUTHOR("Wolfram Sang <wsa@sang-engineering.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:i2c-demux-pinctrl");
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index cd4510a63375..ba947df5a8c7 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -65,7 +65,7 @@
 #include <asm/mwait.h>
 #include <asm/msr.h>
 
-#define INTEL_IDLE_VERSION "0.4"
+#define INTEL_IDLE_VERSION "0.4.1"
 #define PREFIX "intel_idle: "
 
 static struct cpuidle_driver intel_idle_driver = {
@@ -716,6 +716,26 @@ static struct cpuidle_state avn_cstates[] = {
 	{
 		.enter = NULL }
 };
+static struct cpuidle_state knl_cstates[] = {
+	{
+		.name = "C1-KNL",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00),
+		.exit_latency = 1,
+		.target_residency = 2,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze },
+	{
+		.name = "C6-KNL",
+		.desc = "MWAIT 0x10",
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 120,
+		.target_residency = 500,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze },
+	{
+		.enter = NULL }
+};
 
 /**
  * intel_idle
@@ -890,6 +910,10 @@ static const struct idle_cpu idle_cpu_avn = {
 	.disable_promotion_to_c1e = true,
 };
 
+static const struct idle_cpu idle_cpu_knl = {
+	.state_table = knl_cstates,
+};
+
 #define ICPU(model, cpu) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
 
@@ -921,6 +945,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	ICPU(0x56, idle_cpu_bdw),
 	ICPU(0x4e, idle_cpu_skl),
 	ICPU(0x5e, idle_cpu_skl),
+	ICPU(0x57, idle_cpu_knl),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@@ -994,36 +1019,92 @@ static void intel_idle_cpuidle_devices_uninit(void)
 }
 
 /*
- * intel_idle_state_table_update()
+ * ivt_idle_state_table_update(void)
  *
- * Update the default state_table for this CPU-id
- *
- * Currently used to access tuned IVT multi-socket targets
+ * Tune IVT multi-socket targets
  * Assumption: num_sockets == (max_package_num + 1)
  */
-void intel_idle_state_table_update(void)
+static void ivt_idle_state_table_update(void)
 {
 	/* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
-	if (boot_cpu_data.x86_model == 0x3e) { /* IVT */
-		int cpu, package_num, num_sockets = 1;
-
-		for_each_online_cpu(cpu) {
-			package_num = topology_physical_package_id(cpu);
-			if (package_num + 1 > num_sockets) {
-				num_sockets = package_num + 1;
-
-				if (num_sockets > 4) {
-					cpuidle_state_table = ivt_cstates_8s;
-					return;
-				}
+	int cpu, package_num, num_sockets = 1;
+
+	for_each_online_cpu(cpu) {
+		package_num = topology_physical_package_id(cpu);
+		if (package_num + 1 > num_sockets) {
+			num_sockets = package_num + 1;
+
+			if (num_sockets > 4) {
+				cpuidle_state_table = ivt_cstates_8s;
+				return;
 			}
 		}
+	}
+
+	if (num_sockets > 2)
+		cpuidle_state_table = ivt_cstates_4s;
 
-		if (num_sockets > 2)
-			cpuidle_state_table = ivt_cstates_4s;
-		/* else, 1 and 2 socket systems use default ivt_cstates */
+	/* else, 1 and 2 socket systems use default ivt_cstates */
+}
+/*
+ * sklh_idle_state_table_update(void)
+ *
+ * On SKL-H (model 0x5e) disable C8 and C9 if:
+ * C10 is enabled and SGX disabled
+ */
+static void sklh_idle_state_table_update(void)
+{
+	unsigned long long msr;
+	unsigned int eax, ebx, ecx, edx;
+
+
+	/* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */
+	if (max_cstate <= 7)
+		return;
+
+	/* if PC10 not present in CPUID.MWAIT.EDX */
+	if ((mwait_substates & (0xF << 28)) == 0)
+		return;
+
+	rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
+
+	/* PC10 is not enabled in PKG C-state limit */
+	if ((msr & 0xF) != 8)
+		return;
+
+	ecx = 0;
+	cpuid(7, &eax, &ebx, &ecx, &edx);
+
+	/* if SGX is present */
+	if (ebx & (1 << 2)) {
+
+		rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
+
+		/* if SGX is enabled */
+		if (msr & (1 << 18))
+			return;
+	}
+
+	skl_cstates[5].disabled = 1;	/* C8-SKL */
+	skl_cstates[6].disabled = 1;	/* C9-SKL */
+}
+/*
+ * intel_idle_state_table_update()
+ *
+ * Update the default state_table for this CPU-id
+ */
+
+static void intel_idle_state_table_update(void)
+{
+	switch (boot_cpu_data.x86_model) {
+
+	case 0x3e: /* IVT */
+		ivt_idle_state_table_update();
+		break;
+	case 0x5e: /* SKL-H */
+		sklh_idle_state_table_update();
+		break;
 	}
-	return;
 }
 
 /*
@@ -1063,6 +1144,14 @@ static int __init intel_idle_cpuidle_driver_init(void)
 		if (num_substates == 0)
 			continue;
 
+		/* if state marked as disabled, skip it */
+		if (cpuidle_state_table[cstate].disabled != 0) {
+			pr_debug(PREFIX "state %s is disabled",
+				cpuidle_state_table[cstate].name);
+			continue;
+		}
+
+
 		if (((mwait_cstate + 1) > 2) &&
 			!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 			mark_tsc_unstable("TSC halts in idle"
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 8a8440c0eed1..6425c0e5d18a 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -68,6 +68,7 @@ source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/qib/Kconfig"
 source "drivers/infiniband/hw/cxgb3/Kconfig"
 source "drivers/infiniband/hw/cxgb4/Kconfig"
+source "drivers/infiniband/hw/i40iw/Kconfig"
 source "drivers/infiniband/hw/mlx4/Kconfig"
 source "drivers/infiniband/hw/mlx5/Kconfig"
 source "drivers/infiniband/hw/nes/Kconfig"
@@ -82,4 +83,6 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/isert/Kconfig"
 
+source "drivers/infiniband/sw/rdmavt/Kconfig"
+
 endif # INFINIBAND
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index dc21836b5a8d..fad0b44c356f 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND)		+= hw/
 obj-$(CONFIG_INFINIBAND)		+= ulp/
+obj-$(CONFIG_INFINIBAND)		+= sw/
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 270c7ff6cba7..10979844026a 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -650,10 +650,23 @@ int ib_query_port(struct ib_device *device,
 		  u8 port_num,
 		  struct ib_port_attr *port_attr)
 {
+	union ib_gid gid;
+	int err;
+
 	if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
 		return -EINVAL;
 
-	return device->query_port(device, port_num, port_attr);
+	memset(port_attr, 0, sizeof(*port_attr));
+	err = device->query_port(device, port_num, port_attr);
+	if (err || port_attr->subnet_prefix)
+		return err;
+
+	err = ib_query_gid(device, port_num, 0, &gid, NULL);
+	if (err)
+		return err;
+
+	port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
+	return 0;
 }
 EXPORT_SYMBOL(ib_query_port);
 
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index b5656a2298ee..8a09c0fb268d 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -885,6 +885,11 @@ static void update_sm_ah(struct work_struct *work)
 	ah_attr.dlid     = port_attr.sm_lid;
 	ah_attr.sl       = port_attr.sm_sl;
 	ah_attr.port_num = port->port_num;
+	if (port_attr.grh_required) {
+		ah_attr.ah_flags = IB_AH_GRH;
+		ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
+		ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
+	}
 
 	new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
 	if (IS_ERR(new_ah->ah)) {
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 38acb3cfc545..fe4d2e1a8b58 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -188,7 +188,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	sg_list_start = umem->sg_head.sgl;
 
 	while (npages) {
-		ret = get_user_pages(current, current->mm, cur_base,
+		ret = get_user_pages(cur_base,
 				     min_t(unsigned long, npages,
 					   PAGE_SIZE / sizeof (struct page *)),
 				     1, !umem->writable, page_list, vma_list);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index e69bf266049d..75077a018675 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -572,10 +572,10 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 		 * complex (and doesn't gain us much performance in most use
 		 * cases).
 		 */
-		npages = get_user_pages(owning_process, owning_mm, user_virt,
-					gup_num_pages,
-					access_mask & ODP_WRITE_ALLOWED_BIT, 0,
-					local_page_list, NULL);
+		npages = get_user_pages_remote(owning_process, owning_mm,
+				user_virt, gup_num_pages,
+				access_mask & ODP_WRITE_ALLOWED_BIT,
+				0, local_page_list, NULL);
 		up_read(&owning_mm->mmap_sem);
 
 		if (npages < 0)
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3638c787cb7c..6fdc7ecdaca0 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -402,7 +402,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
 	resp->hw_ver		= attr->hw_ver;
 	resp->max_qp		= attr->max_qp;
 	resp->max_qp_wr		= attr->max_qp_wr;
-	resp->device_cap_flags	= attr->device_cap_flags;
+	resp->device_cap_flags	= lower_32_bits(attr->device_cap_flags);
 	resp->max_sge		= attr->max_sge;
 	resp->max_sge_rd	= attr->max_sge_rd;
 	resp->max_cq		= attr->max_cq;
@@ -3600,9 +3600,9 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 			      struct ib_udata *ucore,
 			      struct ib_udata *uhw)
 {
-	struct ib_uverbs_ex_query_device_resp resp;
+	struct ib_uverbs_ex_query_device_resp resp = { {0} };
 	struct ib_uverbs_ex_query_device  cmd;
-	struct ib_device_attr attr;
+	struct ib_device_attr attr = {0};
 	int err;
 
 	if (ucore->inlen < sizeof(cmd))
@@ -3623,14 +3623,11 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 	if (ucore->outlen < resp.response_length)
 		return -ENOSPC;
 
-	memset(&attr, 0, sizeof(attr));
-
 	err = ib_dev->query_device(ib_dev, &attr, uhw);
 	if (err)
 		return err;
 
 	copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
-	resp.comp_mask = 0;
 
 	if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
 		goto end;
@@ -3643,9 +3640,6 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 		attr.odp_caps.per_transport_caps.uc_odp_caps;
 	resp.odp_caps.per_transport_caps.ud_odp_caps =
 		attr.odp_caps.per_transport_caps.ud_odp_caps;
-	resp.odp_caps.reserved = 0;
-#else
-	memset(&resp.odp_caps, 0, sizeof(resp.odp_caps));
 #endif
 	resp.response_length += sizeof(resp.odp_caps);
 
@@ -3663,8 +3657,5 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 
 end:
 	err = ib_copy_to_udata(ucore, &resp, resp.response_length);
-	if (err)
-		return err;
-
-	return 0;
+	return err;
 }
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 5cd1e3987f2b..15b8adbf39c0 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1551,6 +1551,46 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
 }
 EXPORT_SYMBOL(ib_check_mr_status);
 
+int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+			 int state)
+{
+	if (!device->set_vf_link_state)
+		return -ENOSYS;
+
+	return device->set_vf_link_state(device, vf, port, state);
+}
+EXPORT_SYMBOL(ib_set_vf_link_state);
+
+int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+		     struct ifla_vf_info *info)
+{
+	if (!device->get_vf_config)
+		return -ENOSYS;
+
+	return device->get_vf_config(device, vf, port, info);
+}
+EXPORT_SYMBOL(ib_get_vf_config);
+
+int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+		    struct ifla_vf_stats *stats)
+{
+	if (!device->get_vf_stats)
+		return -ENOSYS;
+
+	return device->get_vf_stats(device, vf, port, stats);
+}
+EXPORT_SYMBOL(ib_get_vf_stats);
+
+int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+		   int type)
+{
+	if (!device->set_vf_guid)
+		return -ENOSYS;
+
+	return device->set_vf_guid(device, vf, port, guid, type);
+}
+EXPORT_SYMBOL(ib_set_vf_guid);
+
 /**
  * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
  *     and set it the memory region.
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index aded2a5cc2d5..c7ad0a4c8b15 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_INFINIBAND_MTHCA)		+= mthca/
 obj-$(CONFIG_INFINIBAND_QIB)		+= qib/
 obj-$(CONFIG_INFINIBAND_CXGB3)		+= cxgb3/
 obj-$(CONFIG_INFINIBAND_CXGB4)		+= cxgb4/
+obj-$(CONFIG_INFINIBAND_I40IW)		+= i40iw/
 obj-$(CONFIG_MLX4_INFINIBAND)		+= mlx4/
 obj-$(CONFIG_MLX5_INFINIBAND)		+= mlx5/
 obj-$(CONFIG_INFINIBAND_NES)		+= nes/
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index 343e8daf2270..1e26669793c3 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -753,103 +753,4 @@ struct fw_ri_wr {
 #define FW_RI_WR_P2PTYPE_G(x)	\
 	(((x) >> FW_RI_WR_P2PTYPE_S) & FW_RI_WR_P2PTYPE_M)
 
-struct tcp_options {
-	__be16 mss;
-	__u8 wsf;
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u8:4;
-	__u8 unknown:1;
-	__u8:1;
-	__u8 sack:1;
-	__u8 tstamp:1;
-#else
-	__u8 tstamp:1;
-	__u8 sack:1;
-	__u8:1;
-	__u8 unknown:1;
-	__u8:4;
-#endif
-};
-
-struct cpl_pass_accept_req {
-	union opcode_tid ot;
-	__be16 rsvd;
-	__be16 len;
-	__be32 hdr_len;
-	__be16 vlan;
-	__be16 l2info;
-	__be32 tos_stid;
-	struct tcp_options tcpopt;
-};
-
-/* cpl_pass_accept_req.hdr_len fields */
-#define SYN_RX_CHAN_S    0
-#define SYN_RX_CHAN_M    0xF
-#define SYN_RX_CHAN_V(x) ((x) << SYN_RX_CHAN_S)
-#define SYN_RX_CHAN_G(x) (((x) >> SYN_RX_CHAN_S) & SYN_RX_CHAN_M)
-
-#define TCP_HDR_LEN_S    10
-#define TCP_HDR_LEN_M    0x3F
-#define TCP_HDR_LEN_V(x) ((x) << TCP_HDR_LEN_S)
-#define TCP_HDR_LEN_G(x) (((x) >> TCP_HDR_LEN_S) & TCP_HDR_LEN_M)
-
-#define IP_HDR_LEN_S    16
-#define IP_HDR_LEN_M    0x3FF
-#define IP_HDR_LEN_V(x) ((x) << IP_HDR_LEN_S)
-#define IP_HDR_LEN_G(x) (((x) >> IP_HDR_LEN_S) & IP_HDR_LEN_M)
-
-#define ETH_HDR_LEN_S    26
-#define ETH_HDR_LEN_M    0x1F
-#define ETH_HDR_LEN_V(x) ((x) << ETH_HDR_LEN_S)
-#define ETH_HDR_LEN_G(x) (((x) >> ETH_HDR_LEN_S) & ETH_HDR_LEN_M)
-
-/* cpl_pass_accept_req.l2info fields */
-#define SYN_MAC_IDX_S    0
-#define SYN_MAC_IDX_M    0x1FF
-#define SYN_MAC_IDX_V(x) ((x) << SYN_MAC_IDX_S)
-#define SYN_MAC_IDX_G(x) (((x) >> SYN_MAC_IDX_S) & SYN_MAC_IDX_M)
-
-#define SYN_XACT_MATCH_S    9
-#define SYN_XACT_MATCH_V(x) ((x) << SYN_XACT_MATCH_S)
-#define SYN_XACT_MATCH_F    SYN_XACT_MATCH_V(1U)
-
-#define SYN_INTF_S    12
-#define SYN_INTF_M    0xF
-#define SYN_INTF_V(x) ((x) << SYN_INTF_S)
-#define SYN_INTF_G(x) (((x) >> SYN_INTF_S) & SYN_INTF_M)
-
-struct ulptx_idata {
-	__be32 cmd_more;
-	__be32 len;
-};
-
-#define ULPTX_NSGE_S    0
-#define ULPTX_NSGE_M    0xFFFF
-#define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S)
-
-#define RX_DACK_MODE_S    29
-#define RX_DACK_MODE_M    0x3
-#define RX_DACK_MODE_V(x) ((x) << RX_DACK_MODE_S)
-#define RX_DACK_MODE_G(x) (((x) >> RX_DACK_MODE_S) & RX_DACK_MODE_M)
-
-#define RX_DACK_CHANGE_S    31
-#define RX_DACK_CHANGE_V(x) ((x) << RX_DACK_CHANGE_S)
-#define RX_DACK_CHANGE_F    RX_DACK_CHANGE_V(1U)
-
-enum {                     /* TCP congestion control algorithms */
-	CONG_ALG_RENO,
-	CONG_ALG_TAHOE,
-	CONG_ALG_NEWRENO,
-	CONG_ALG_HIGHSPEED
-};
-
-#define CONG_CNTRL_S    14
-#define CONG_CNTRL_M    0x3
-#define CONG_CNTRL_V(x) ((x) << CONG_CNTRL_S)
-#define CONG_CNTRL_G(x) (((x) >> CONG_CNTRL_S) & CONG_CNTRL_M)
-
-#define T5_ISS_S    18
-#define T5_ISS_V(x) ((x) << T5_ISS_S)
-#define T5_ISS_F    T5_ISS_V(1U)
-
 #endif /* _T4FW_RI_API_H_ */
diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig
new file mode 100644
index 000000000000..6e7d27a14061
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/Kconfig
@@ -0,0 +1,7 @@
+config INFINIBAND_I40IW
+	tristate "Intel(R) Ethernet X722 iWARP Driver"
+	depends on INET && I40E
+	select GENERIC_ALLOCATOR
+	---help---
+	Intel(R) Ethernet X722 iWARP Driver
+	INET && I40IW && INFINIBAND && I40E
diff --git a/drivers/infiniband/hw/i40iw/Makefile b/drivers/infiniband/hw/i40iw/Makefile
new file mode 100644
index 000000000000..90068c03d217
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/Makefile
@@ -0,0 +1,9 @@
+ccflags-y :=  -Idrivers/net/ethernet/intel/i40e
+
+obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o
+
+i40iw-objs :=\
+               i40iw_cm.o i40iw_ctrl.o \
+               i40iw_hmc.o i40iw_hw.o i40iw_main.o  \
+               i40iw_pble.o i40iw_puda.o i40iw_uk.o i40iw_utils.o \
+               i40iw_verbs.o i40iw_virtchnl.o i40iw_vf.o
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
new file mode 100644
index 000000000000..819767681445
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -0,0 +1,570 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_IW_H
+#define I40IW_IW_H
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/crc32c.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/iw_cm.h>
+#include <rdma/iw_portmap.h>
+#include <rdma/rdma_netlink.h>
+#include <crypto/hash.h>
+
+#include "i40iw_status.h"
+#include "i40iw_osdep.h"
+#include "i40iw_d.h"
+#include "i40iw_hmc.h"
+
+#include <i40e_client.h>
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_ucontext.h"
+#include "i40iw_pble.h"
+#include "i40iw_verbs.h"
+#include "i40iw_cm.h"
+#include "i40iw_user.h"
+#include "i40iw_puda.h"
+
+#define I40IW_FW_VERSION  2
+#define I40IW_HW_VERSION  2
+
+#define I40IW_ARP_ADD     1
+#define I40IW_ARP_DELETE  2
+#define I40IW_ARP_RESOLVE 3
+
+#define I40IW_MACIP_ADD     1
+#define I40IW_MACIP_DELETE  2
+
+#define IW_CCQ_SIZE         (I40IW_CQP_SW_SQSIZE_2048 + 1)
+#define IW_CEQ_SIZE         2048
+#define IW_AEQ_SIZE         2048
+
+#define RX_BUF_SIZE            (1536 + 8)
+#define IW_REG0_SIZE           (4 * 1024)
+#define IW_TX_TIMEOUT          (6 * HZ)
+#define IW_FIRST_QPN           1
+#define IW_SW_CONTEXT_ALIGN    1024
+
+#define MAX_DPC_ITERATIONS		128
+
+#define I40IW_EVENT_TIMEOUT		100000
+#define I40IW_VCHNL_EVENT_TIMEOUT	100000
+
+#define	I40IW_NO_VLAN			0xffff
+#define	I40IW_NO_QSET			0xffff
+
+/* access to mcast filter list */
+#define IW_ADD_MCAST false
+#define IW_DEL_MCAST true
+
+#define I40IW_DRV_OPT_ENABLE_MPA_VER_0     0x00000001
+#define I40IW_DRV_OPT_DISABLE_MPA_CRC      0x00000002
+#define I40IW_DRV_OPT_DISABLE_FIRST_WRITE  0x00000004
+#define I40IW_DRV_OPT_DISABLE_INTF         0x00000008
+#define I40IW_DRV_OPT_ENABLE_MSI           0x00000010
+#define I40IW_DRV_OPT_DUAL_LOGICAL_PORT    0x00000020
+#define I40IW_DRV_OPT_NO_INLINE_DATA       0x00000080
+#define I40IW_DRV_OPT_DISABLE_INT_MOD      0x00000100
+#define I40IW_DRV_OPT_DISABLE_VIRT_WQ      0x00000200
+#define I40IW_DRV_OPT_ENABLE_PAU           0x00000400
+#define I40IW_DRV_OPT_MCAST_LOGPORT_MAP    0x00000800
+
+#define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
+#define IW_CFG_FPM_QP_COUNT		32768
+
+#define I40IW_MTU_TO_MSS		40
+#define I40IW_DEFAULT_MSS		1460
+
+struct i40iw_cqp_compl_info {
+	u32 op_ret_val;
+	u16 maj_err_code;
+	u16 min_err_code;
+	bool error;
+	u8 op_code;
+};
+
+#define i40iw_pr_err(fmt, args ...) pr_err("%s: "fmt, __func__, ## args)
+
+#define i40iw_pr_info(fmt, args ...) pr_info("%s: " fmt, __func__, ## args)
+
+#define i40iw_pr_warn(fmt, args ...) pr_warn("%s: " fmt, __func__, ## args)
+
+struct i40iw_cqp_request {
+	struct cqp_commands_info info;
+	wait_queue_head_t waitq;
+	struct list_head list;
+	atomic_t refcount;
+	void (*callback_fcn)(struct i40iw_cqp_request*, u32);
+	void *param;
+	struct i40iw_cqp_compl_info compl_info;
+	bool waiting;
+	bool request_done;
+	bool dynamic;
+};
+
+struct i40iw_cqp {
+	struct i40iw_sc_cqp sc_cqp;
+	spinlock_t req_lock; /*cqp request list */
+	wait_queue_head_t waitq;
+	struct i40iw_dma_mem sq;
+	struct i40iw_dma_mem host_ctx;
+	u64 *scratch_array;
+	struct i40iw_cqp_request *cqp_requests;
+	struct list_head cqp_avail_reqs;
+	struct list_head cqp_pending_reqs;
+};
+
+struct i40iw_device;
+
+struct i40iw_ccq {
+	struct i40iw_sc_cq sc_cq;
+	spinlock_t lock; /* ccq control */
+	wait_queue_head_t waitq;
+	struct i40iw_dma_mem mem_cq;
+	struct i40iw_dma_mem shadow_area;
+};
+
+struct i40iw_ceq {
+	struct i40iw_sc_ceq sc_ceq;
+	struct i40iw_dma_mem mem;
+	u32 irq;
+	u32 msix_idx;
+	struct i40iw_device *iwdev;
+	struct tasklet_struct dpc_tasklet;
+};
+
+struct i40iw_aeq {
+	struct i40iw_sc_aeq sc_aeq;
+	struct i40iw_dma_mem mem;
+};
+
+struct i40iw_arp_entry {
+	u32 ip_addr[4];
+	u8 mac_addr[ETH_ALEN];
+};
+
+enum init_completion_state {
+	INVALID_STATE = 0,
+	INITIAL_STATE,
+	CQP_CREATED,
+	HMC_OBJS_CREATED,
+	PBLE_CHUNK_MEM,
+	CCQ_CREATED,
+	AEQ_CREATED,
+	CEQ_CREATED,
+	ILQ_CREATED,
+	IEQ_CREATED,
+	INET_NOTIFIER,
+	IP_ADDR_REGISTERED,
+	RDMA_DEV_REGISTERED
+};
+
+struct i40iw_msix_vector {
+	u32 idx;
+	u32 irq;
+	u32 cpu_affinity;
+	u32 ceq_id;
+};
+
+#define I40IW_MSIX_TABLE_SIZE   65
+
+struct virtchnl_work {
+	struct work_struct work;
+	union {
+		struct i40iw_cqp_request *cqp_request;
+		struct i40iw_virtchnl_work_info work_info;
+	};
+};
+
+struct i40e_qvlist_info;
+
+struct i40iw_device {
+	struct i40iw_ib_device *iwibdev;
+	struct net_device *netdev;
+	wait_queue_head_t vchnl_waitq;
+	struct i40iw_sc_dev sc_dev;
+	struct i40iw_handler *hdl;
+	struct i40e_info *ldev;
+	struct i40e_client *client;
+	struct i40iw_hw hw;
+	struct i40iw_cm_core cm_core;
+	unsigned long *mem_resources;
+	unsigned long *allocated_qps;
+	unsigned long *allocated_cqs;
+	unsigned long *allocated_mrs;
+	unsigned long *allocated_pds;
+	unsigned long *allocated_arps;
+	struct i40iw_qp **qp_table;
+	bool msix_shared;
+	u32 msix_count;
+	struct i40iw_msix_vector *iw_msixtbl;
+	struct i40e_qvlist_info *iw_qvlist;
+
+	struct i40iw_hmc_pble_rsrc *pble_rsrc;
+	struct i40iw_arp_entry *arp_table;
+	struct i40iw_cqp cqp;
+	struct i40iw_ccq ccq;
+	u32 ceqs_count;
+	struct i40iw_ceq *ceqlist;
+	struct i40iw_aeq aeq;
+	u32 arp_table_size;
+	u32 next_arp_index;
+	spinlock_t resource_lock; /* hw resource access */
+	u32 vendor_id;
+	u32 vendor_part_id;
+	u32 of_device_registered;
+
+	u32 device_cap_flags;
+	unsigned long db_start;
+	u8 resource_profile;
+	u8 max_rdma_vfs;
+	u8 max_enabled_vfs;
+	u8 max_sge;
+	u8 iw_status;
+	u8 send_term_ok;
+	bool push_mode;		/* Initialized from parameter passed to driver */
+
+	/* x710 specific */
+	struct mutex pbl_mutex;
+	struct tasklet_struct dpc_tasklet;
+	struct workqueue_struct *virtchnl_wq;
+	struct virtchnl_work virtchnl_w[I40IW_MAX_PE_ENABLED_VF_COUNT];
+	struct i40iw_dma_mem obj_mem;
+	struct i40iw_dma_mem obj_next;
+	u8 *hmc_info_mem;
+	u32 sd_type;
+	struct workqueue_struct *param_wq;
+	atomic_t params_busy;
+	u32 mss;
+	enum init_completion_state init_state;
+	u16 mac_ip_table_idx;
+	atomic_t vchnl_msgs;
+	u32 max_mr;
+	u32 max_qp;
+	u32 max_cq;
+	u32 max_pd;
+	u32 next_qp;
+	u32 next_cq;
+	u32 next_pd;
+	u32 max_mr_size;
+	u32 max_qp_wr;
+	u32 max_cqe;
+	u32 mr_stagmask;
+	u32 mpa_version;
+	bool dcb;
+};
+
+struct i40iw_ib_device {
+	struct ib_device ibdev;
+	struct i40iw_device *iwdev;
+};
+
+struct i40iw_handler {
+	struct list_head list;
+	struct i40e_client *client;
+	struct i40iw_device device;
+	struct i40e_info ldev;
+};
+
+/**
+ * to_iwdev - get device
+ * @ibdev: ib device
+ **/
+static inline struct i40iw_device *to_iwdev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct i40iw_ib_device, ibdev)->iwdev;
+}
+
+/**
+ * to_ucontext - get user context
+ * @ibucontext: ib user context
+ **/
+static inline struct i40iw_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct i40iw_ucontext, ibucontext);
+}
+
+/**
+ * to_iwpd - get protection domain
+ * @ibpd: ib pd
+ **/
+static inline struct i40iw_pd *to_iwpd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct i40iw_pd, ibpd);
+}
+
+/**
+ * to_iwmr - get device memory region
+ * @ibdev: ib memory region
+ **/
+static inline struct i40iw_mr *to_iwmr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct i40iw_mr, ibmr);
+}
+
+/**
+ * to_iwmr_from_ibfmr - get device memory region
+ * @ibfmr: ib fmr
+ **/
+static inline struct i40iw_mr *to_iwmr_from_ibfmr(struct ib_fmr *ibfmr)
+{
+	return container_of(ibfmr, struct i40iw_mr, ibfmr);
+}
+
+/**
+ * to_iwmw - get device memory window
+ * @ibmw: ib memory window
+ **/
+static inline struct i40iw_mr *to_iwmw(struct ib_mw *ibmw)
+{
+	return container_of(ibmw, struct i40iw_mr, ibmw);
+}
+
+/**
+ * to_iwcq - get completion queue
+ * @ibcq: ib cqdevice
+ **/
+static inline struct i40iw_cq *to_iwcq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct i40iw_cq, ibcq);
+}
+
+/**
+ * to_iwqp - get device qp
+ * @ibqp: ib qp
+ **/
+static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct i40iw_qp, ibqp);
+}
+
+/* i40iw.c */
+void i40iw_add_ref(struct ib_qp *);
+void i40iw_rem_ref(struct ib_qp *);
+struct ib_qp *i40iw_get_qp(struct ib_device *, int);
+
+void i40iw_flush_wqes(struct i40iw_device *iwdev,
+		      struct i40iw_qp *qp);
+
+void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
+			    unsigned char *mac_addr,
+			    __be32 *ip_addr,
+			    bool ipv4,
+			    u32 action);
+
+int i40iw_manage_apbvt(struct i40iw_device *iwdev,
+		       u16 accel_local_port,
+		       bool add_port);
+
+struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait);
+void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request);
+void i40iw_put_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request);
+
+/**
+ * i40iw_alloc_resource - allocate a resource
+ * @iwdev: device pointer
+ * @resource_array: resource bit array:
+ * @max_resources: maximum resource number
+ * @req_resources_num: Allocated resource number
+ * @next: next free id
+ **/
+static inline int i40iw_alloc_resource(struct i40iw_device *iwdev,
+				       unsigned long *resource_array,
+				       u32 max_resources,
+				       u32 *req_resource_num,
+				       u32 *next)
+{
+	u32 resource_num;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iwdev->resource_lock, flags);
+	resource_num = find_next_zero_bit(resource_array, max_resources, *next);
+	if (resource_num >= max_resources) {
+		resource_num = find_first_zero_bit(resource_array, max_resources);
+		if (resource_num >= max_resources) {
+			spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+			return -EOVERFLOW;
+		}
+	}
+	set_bit(resource_num, resource_array);
+	*next = resource_num + 1;
+	if (*next == max_resources)
+		*next = 0;
+	spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+	*req_resource_num = resource_num;
+
+	return 0;
+}
+
+/**
+ * i40iw_is_resource_allocated - detrmine if resource is
+ * allocated
+ * @iwdev: device pointer
+ * @resource_array: resource array for the resource_num
+ * @resource_num: resource number to check
+ **/
+static inline bool i40iw_is_resource_allocated(struct i40iw_device *iwdev,
+					       unsigned long *resource_array,
+					       u32 resource_num)
+{
+	bool bit_is_set;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iwdev->resource_lock, flags);
+
+	bit_is_set = test_bit(resource_num, resource_array);
+	spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+
+	return bit_is_set;
+}
+
+/**
+ * i40iw_free_resource - free a resource
+ * @iwdev: device pointer
+ * @resource_array: resource array for the resource_num
+ * @resource_num: resource number to free
+ **/
+static inline void i40iw_free_resource(struct i40iw_device *iwdev,
+				       unsigned long *resource_array,
+				       u32 resource_num)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&iwdev->resource_lock, flags);
+	clear_bit(resource_num, resource_array);
+	spin_unlock_irqrestore(&iwdev->resource_lock, flags);
+}
+
+/**
+ * to_iwhdl - Get the handler from the device pointer
+ * @iwdev: device pointer
+ **/
+static inline struct i40iw_handler *to_iwhdl(struct i40iw_device *iw_dev)
+{
+	return container_of(iw_dev, struct i40iw_handler, device);
+}
+
+struct i40iw_handler *i40iw_find_netdev(struct net_device *netdev);
+
+/**
+ * iw_init_resources -
+ */
+u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev);
+
+int i40iw_register_rdma_device(struct i40iw_device *iwdev);
+void i40iw_port_ibevent(struct i40iw_device *iwdev);
+int i40iw_cm_disconn(struct i40iw_qp *);
+void i40iw_cm_disconn_worker(void *);
+int mini_cm_recv_pkt(struct i40iw_cm_core *, struct i40iw_device *,
+		     struct sk_buff *);
+
+enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
+					   struct i40iw_cqp_request *cqp_request);
+enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev,
+					  u8 *mac_addr, u8 *mac_index);
+int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
+
+void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev);
+void i40iw_add_pdusecount(struct i40iw_pd *iwpd);
+void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
+			struct i40iw_modify_qp_info *info, bool wait);
+
+enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
+					  struct i40iw_cm_info *cminfo,
+					  enum i40iw_quad_entry_type etype,
+					  enum i40iw_quad_hash_manage_type mtype,
+					  void *cmnode,
+					  bool wait);
+void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf);
+void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp);
+void i40iw_free_qp_resources(struct i40iw_device *iwdev,
+			     struct i40iw_qp *iwqp,
+			     u32 qp_num);
+enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
+					     struct i40iw_dma_mem *memptr,
+					     u32 size, u32 mask);
+
+void i40iw_request_reset(struct i40iw_device *iwdev);
+void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev);
+void i40iw_setup_cm_core(struct i40iw_device *iwdev);
+void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core);
+void i40iw_process_ceq(struct i40iw_device *, struct i40iw_ceq *iwceq);
+void i40iw_process_aeq(struct i40iw_device *);
+void i40iw_next_iw_state(struct i40iw_qp *iwqp,
+			 u8 state, u8 del_hash,
+			 u8 term, u8 term_len);
+int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack);
+struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
+				      u16 rem_port,
+				      u32 *rem_addr,
+				      u16 loc_port,
+				      u32 *loc_addr,
+				      bool add_refcnt);
+
+enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
+					   struct i40iw_sc_qp *qp,
+					   struct i40iw_qp_flush_info *info,
+					   bool wait);
+
+void i40iw_copy_ip_ntohl(u32 *dst, u32 *src);
+struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd,
+				u64 addr,
+				u64 size,
+				int acc,
+				u64 *iova_start);
+
+int i40iw_inetaddr_event(struct notifier_block *notifier,
+			 unsigned long event,
+			 void *ptr);
+int i40iw_inet6addr_event(struct notifier_block *notifier,
+			  unsigned long event,
+			  void *ptr);
+int i40iw_net_event(struct notifier_block *notifier,
+		    unsigned long event,
+		    void *ptr);
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
new file mode 100644
index 000000000000..92745d755272
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -0,0 +1,4141 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/atomic.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/notifier.h>
+#include <linux/net.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/random.h>
+#include <linux/list.h>
+#include <linux/threads.h>
+#include <linux/highmem.h>
+#include <net/arp.h>
+#include <net/ndisc.h>
+#include <net/neighbour.h>
+#include <net/route.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/ip_fib.h>
+#include <net/tcp.h>
+#include <asm/checksum.h>
+
+#include "i40iw.h"
+
+static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *);
+static void i40iw_cm_post_event(struct i40iw_cm_event *event);
+static void i40iw_disconnect_worker(struct work_struct *work);
+
+/**
+ * i40iw_free_sqbuf - put back puda buffer if refcount = 0
+ * @dev: FPK device
+ * @buf: puda buffer to free
+ */
+void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp)
+{
+	struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)bufp;
+	struct i40iw_puda_rsrc *ilq = dev->ilq;
+
+	if (!atomic_dec_return(&buf->refcount))
+		i40iw_puda_ret_bufpool(ilq, buf);
+}
+
+/**
+ * i40iw_derive_hw_ird_setting - Calculate IRD
+ *
+ * @cm_ird: IRD of connection's node
+ *
+ * The ird from the connection is rounded to a supported HW
+ * setting (2,8,32,64) and then encoded for ird_size field of
+ * qp_ctx
+ */
+static u8 i40iw_derive_hw_ird_setting(u16 cm_ird)
+{
+	u8 encoded_ird_size;
+	u8 pof2_cm_ird = 1;
+
+	/* round-off to next powerof2 */
+	while (pof2_cm_ird < cm_ird)
+		pof2_cm_ird *= 2;
+
+	/* ird_size field is encoded in qp_ctx */
+	switch (pof2_cm_ird) {
+	case I40IW_HW_IRD_SETTING_64:
+		encoded_ird_size = 3;
+		break;
+	case I40IW_HW_IRD_SETTING_32:
+	case I40IW_HW_IRD_SETTING_16:
+		encoded_ird_size = 2;
+		break;
+	case I40IW_HW_IRD_SETTING_8:
+	case I40IW_HW_IRD_SETTING_4:
+		encoded_ird_size = 1;
+		break;
+	case I40IW_HW_IRD_SETTING_2:
+	default:
+		encoded_ird_size = 0;
+		break;
+	}
+	return encoded_ird_size;
+}
+
+/**
+ * i40iw_record_ird_ord - Record IRD/ORD passed in
+ * @cm_node: connection's node
+ * @conn_ird: connection IRD
+ * @conn_ord: connection ORD
+ */
+static void i40iw_record_ird_ord(struct i40iw_cm_node *cm_node, u16 conn_ird, u16 conn_ord)
+{
+	if (conn_ird > I40IW_MAX_IRD_SIZE)
+		conn_ird = I40IW_MAX_IRD_SIZE;
+
+	if (conn_ord > I40IW_MAX_ORD_SIZE)
+		conn_ord = I40IW_MAX_ORD_SIZE;
+
+	cm_node->ird_size = conn_ird;
+	cm_node->ord_size = conn_ord;
+}
+
+/**
+ * i40iw_copy_ip_ntohl - change network to host ip
+ * @dst: host ip
+ * @src: big endian
+ */
+void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src)
+{
+	*dst++ = ntohl(*src++);
+	*dst++ = ntohl(*src++);
+	*dst++ = ntohl(*src++);
+	*dst = ntohl(*src);
+}
+
+/**
+ * i40iw_copy_ip_htonl - change host addr to network ip
+ * @dst: host ip
+ * @src: little endian
+ */
+static inline void i40iw_copy_ip_htonl(__be32 *dst, u32 *src)
+{
+	*dst++ = htonl(*src++);
+	*dst++ = htonl(*src++);
+	*dst++ = htonl(*src++);
+	*dst = htonl(*src);
+}
+
+/**
+ * i40iw_fill_sockaddr4 - get addr info for passive connection
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void i40iw_fill_sockaddr4(struct i40iw_cm_node *cm_node,
+					struct iw_cm_event *event)
+{
+	struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr;
+	struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr;
+
+	laddr->sin_family = AF_INET;
+	raddr->sin_family = AF_INET;
+
+	laddr->sin_port = htons(cm_node->loc_port);
+	raddr->sin_port = htons(cm_node->rem_port);
+
+	laddr->sin_addr.s_addr = htonl(cm_node->loc_addr[0]);
+	raddr->sin_addr.s_addr = htonl(cm_node->rem_addr[0]);
+}
+
+/**
+ * i40iw_fill_sockaddr6 - get ipv6 addr info for passive side
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void i40iw_fill_sockaddr6(struct i40iw_cm_node *cm_node,
+					struct iw_cm_event *event)
+{
+	struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr;
+	struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)&event->remote_addr;
+
+	laddr6->sin6_family = AF_INET6;
+	raddr6->sin6_family = AF_INET6;
+
+	laddr6->sin6_port = htons(cm_node->loc_port);
+	raddr6->sin6_port = htons(cm_node->rem_port);
+
+	i40iw_copy_ip_htonl(laddr6->sin6_addr.in6_u.u6_addr32,
+			    cm_node->loc_addr);
+	i40iw_copy_ip_htonl(raddr6->sin6_addr.in6_u.u6_addr32,
+			    cm_node->rem_addr);
+}
+
+/**
+ * i40iw_get_addr_info
+ * @cm_node: contains ip/tcp info
+ * @cm_info: to get a copy of the cm_node ip/tcp info
+*/
+static void i40iw_get_addr_info(struct i40iw_cm_node *cm_node,
+				struct i40iw_cm_info *cm_info)
+{
+	cm_info->ipv4 = cm_node->ipv4;
+	cm_info->vlan_id = cm_node->vlan_id;
+	memcpy(cm_info->loc_addr, cm_node->loc_addr, sizeof(cm_info->loc_addr));
+	memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr));
+	cm_info->loc_port = cm_node->loc_port;
+	cm_info->rem_port = cm_node->rem_port;
+}
+
+/**
+ * i40iw_get_cmevent_info - for cm event upcall
+ * @cm_node: connection's node
+ * @cm_id: upper layers cm struct for the event
+ * @event: upper layer's cm event
+ */
+static inline void i40iw_get_cmevent_info(struct i40iw_cm_node *cm_node,
+					  struct iw_cm_id *cm_id,
+					  struct iw_cm_event *event)
+{
+	memcpy(&event->local_addr, &cm_id->m_local_addr,
+	       sizeof(event->local_addr));
+	memcpy(&event->remote_addr, &cm_id->m_remote_addr,
+	       sizeof(event->remote_addr));
+	if (cm_node) {
+		event->private_data = (void *)cm_node->pdata_buf;
+		event->private_data_len = (u8)cm_node->pdata.size;
+		event->ird = cm_node->ird_size;
+		event->ord = cm_node->ord_size;
+	}
+}
+
+/**
+ * i40iw_send_cm_event - upcall cm's event handler
+ * @cm_node: connection's node
+ * @cm_id: upper layer's cm info struct
+ * @type: Event type to indicate
+ * @status: status for the event type
+ */
+static int i40iw_send_cm_event(struct i40iw_cm_node *cm_node,
+			       struct iw_cm_id *cm_id,
+			       enum iw_cm_event_type type,
+			       int status)
+{
+	struct iw_cm_event event;
+
+	memset(&event, 0, sizeof(event));
+	event.event = type;
+	event.status = status;
+	switch (type) {
+	case IW_CM_EVENT_CONNECT_REQUEST:
+		if (cm_node->ipv4)
+			i40iw_fill_sockaddr4(cm_node, &event);
+		else
+			i40iw_fill_sockaddr6(cm_node, &event);
+		event.provider_data = (void *)cm_node;
+		event.private_data = (void *)cm_node->pdata_buf;
+		event.private_data_len = (u8)cm_node->pdata.size;
+		break;
+	case IW_CM_EVENT_CONNECT_REPLY:
+		i40iw_get_cmevent_info(cm_node, cm_id, &event);
+		break;
+	case IW_CM_EVENT_ESTABLISHED:
+		event.ird = cm_node->ird_size;
+		event.ord = cm_node->ord_size;
+		break;
+	case IW_CM_EVENT_DISCONNECT:
+		break;
+	case IW_CM_EVENT_CLOSE:
+		break;
+	default:
+		i40iw_pr_err("event type received type = %d\n", type);
+		return -1;
+	}
+	return cm_id->event_handler(cm_id, &event);
+}
+
+/**
+ * i40iw_create_event - create cm event
+ * @cm_node: connection's node
+ * @type: Event type to generate
+ */
+static struct i40iw_cm_event *i40iw_create_event(struct i40iw_cm_node *cm_node,
+						 enum i40iw_cm_event_type type)
+{
+	struct i40iw_cm_event *event;
+
+	if (!cm_node->cm_id)
+		return NULL;
+
+	event = kzalloc(sizeof(*event), GFP_ATOMIC);
+
+	if (!event)
+		return NULL;
+
+	event->type = type;
+	event->cm_node = cm_node;
+	memcpy(event->cm_info.rem_addr, cm_node->rem_addr, sizeof(event->cm_info.rem_addr));
+	memcpy(event->cm_info.loc_addr, cm_node->loc_addr, sizeof(event->cm_info.loc_addr));
+	event->cm_info.rem_port = cm_node->rem_port;
+	event->cm_info.loc_port = cm_node->loc_port;
+	event->cm_info.cm_id = cm_node->cm_id;
+
+	i40iw_debug(cm_node->dev,
+		    I40IW_DEBUG_CM,
+		    "node=%p event=%p type=%u dst=%pI4 src=%pI4\n",
+		    cm_node,
+		    event,
+		    type,
+		    event->cm_info.loc_addr,
+		    event->cm_info.rem_addr);
+
+	i40iw_cm_post_event(event);
+	return event;
+}
+
+/**
+ * i40iw_free_retrans_entry - free send entry
+ * @cm_node: connection's node
+ */
+static void i40iw_free_retrans_entry(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_sc_dev *dev = cm_node->dev;
+	struct i40iw_timer_entry *send_entry;
+
+	send_entry = cm_node->send_entry;
+	if (send_entry) {
+		cm_node->send_entry = NULL;
+		i40iw_free_sqbuf(dev, (void *)send_entry->sqbuf);
+		kfree(send_entry);
+		atomic_dec(&cm_node->ref_count);
+	}
+}
+
+/**
+ * i40iw_cleanup_retrans_entry - free send entry with lock
+ * @cm_node: connection's node
+ */
+static void i40iw_cleanup_retrans_entry(struct i40iw_cm_node *cm_node)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+	i40iw_free_retrans_entry(cm_node);
+	spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+}
+
+static bool is_remote_ne020_or_chelsio(struct i40iw_cm_node *cm_node)
+{
+	if ((cm_node->rem_mac[0] == 0x0) &&
+	    (((cm_node->rem_mac[1] == 0x12) && (cm_node->rem_mac[2] == 0x55)) ||
+	     ((cm_node->rem_mac[1] == 0x07 && (cm_node->rem_mac[2] == 0x43)))))
+		return true;
+	return false;
+}
+
+/**
+ * i40iw_form_cm_frame - get a free packet and build frame
+ * @cm_node: connection's node ionfo to use in frame
+ * @options: pointer to options info
+ * @hdr: pointer mpa header
+ * @pdata: pointer to private data
+ * @flags:  indicates FIN or ACK
+ */
+static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
+						  struct i40iw_kmem_info *options,
+						  struct i40iw_kmem_info *hdr,
+						  struct i40iw_kmem_info *pdata,
+						  u8 flags)
+{
+	struct i40iw_puda_buf *sqbuf;
+	struct i40iw_sc_dev *dev = cm_node->dev;
+	u8 *buf;
+
+	struct tcphdr *tcph;
+	struct iphdr *iph;
+	struct ipv6hdr *ip6h;
+	struct ethhdr *ethh;
+	u16 packetsize;
+	u16 eth_hlen = ETH_HLEN;
+	u32 opts_len = 0;
+	u32 pd_len = 0;
+	u32 hdr_len = 0;
+
+	sqbuf = i40iw_puda_get_bufpool(dev->ilq);
+	if (!sqbuf)
+		return NULL;
+	buf = sqbuf->mem.va;
+
+	if (options)
+		opts_len = (u32)options->size;
+
+	if (hdr)
+		hdr_len = hdr->size;
+
+	if (pdata) {
+		pd_len = pdata->size;
+		if (!is_remote_ne020_or_chelsio(cm_node))
+			pd_len += MPA_ZERO_PAD_LEN;
+	}
+
+	if (cm_node->vlan_id < VLAN_TAG_PRESENT)
+		eth_hlen += 4;
+
+	if (cm_node->ipv4)
+		packetsize = sizeof(*iph) + sizeof(*tcph);
+	else
+		packetsize = sizeof(*ip6h) + sizeof(*tcph);
+	packetsize += opts_len + hdr_len + pd_len;
+
+	memset(buf, 0x00, eth_hlen + packetsize);
+
+	sqbuf->totallen = packetsize + eth_hlen;
+	sqbuf->maclen = eth_hlen;
+	sqbuf->tcphlen = sizeof(*tcph) + opts_len;
+	sqbuf->scratch = (void *)cm_node;
+
+	ethh = (struct ethhdr *)buf;
+	buf += eth_hlen;
+
+	if (cm_node->ipv4) {
+		sqbuf->ipv4 = true;
+
+		iph = (struct iphdr *)buf;
+		buf += sizeof(*iph);
+		tcph = (struct tcphdr *)buf;
+		buf += sizeof(*tcph);
+
+		ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+		ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+		if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+			((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
+			((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+
+			((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IP);
+		} else {
+			ethh->h_proto = htons(ETH_P_IP);
+		}
+
+		iph->version = IPVERSION;
+		iph->ihl = 5;	/* 5 * 4Byte words, IP headr len */
+		iph->tos = 0;
+		iph->tot_len = htons(packetsize);
+		iph->id = htons(++cm_node->tcp_cntxt.loc_id);
+
+		iph->frag_off = htons(0x4000);
+		iph->ttl = 0x40;
+		iph->protocol = IPPROTO_TCP;
+		iph->saddr = htonl(cm_node->loc_addr[0]);
+		iph->daddr = htonl(cm_node->rem_addr[0]);
+	} else {
+		sqbuf->ipv4 = false;
+		ip6h = (struct ipv6hdr *)buf;
+		buf += sizeof(*ip6h);
+		tcph = (struct tcphdr *)buf;
+		buf += sizeof(*tcph);
+
+		ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+		ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+		if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+			((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
+			((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+			((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IPV6);
+		} else {
+			ethh->h_proto = htons(ETH_P_IPV6);
+		}
+		ip6h->version = 6;
+		ip6h->flow_lbl[0] = 0;
+		ip6h->flow_lbl[1] = 0;
+		ip6h->flow_lbl[2] = 0;
+		ip6h->payload_len = htons(packetsize - sizeof(*ip6h));
+		ip6h->nexthdr = 6;
+		ip6h->hop_limit = 128;
+		i40iw_copy_ip_htonl(ip6h->saddr.in6_u.u6_addr32,
+				    cm_node->loc_addr);
+		i40iw_copy_ip_htonl(ip6h->daddr.in6_u.u6_addr32,
+				    cm_node->rem_addr);
+	}
+
+	tcph->source = htons(cm_node->loc_port);
+	tcph->dest = htons(cm_node->rem_port);
+
+	tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
+
+	if (flags & SET_ACK) {
+		cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
+		tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
+		tcph->ack = 1;
+	} else {
+		tcph->ack_seq = 0;
+	}
+
+	if (flags & SET_SYN) {
+		cm_node->tcp_cntxt.loc_seq_num++;
+		tcph->syn = 1;
+	} else {
+		cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
+	}
+
+	if (flags & SET_FIN) {
+		cm_node->tcp_cntxt.loc_seq_num++;
+		tcph->fin = 1;
+	}
+
+	if (flags & SET_RST)
+		tcph->rst = 1;
+
+	tcph->doff = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
+	sqbuf->tcphlen = tcph->doff << 2;
+	tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
+	tcph->urg_ptr = 0;
+
+	if (opts_len) {
+		memcpy(buf, options->addr, opts_len);
+		buf += opts_len;
+	}
+
+	if (hdr_len) {
+		memcpy(buf, hdr->addr, hdr_len);
+		buf += hdr_len;
+	}
+
+	if (pd_len)
+		memcpy(buf, pdata->addr, pd_len);
+
+	atomic_set(&sqbuf->refcount, 1);
+
+	return sqbuf;
+}
+
+/**
+ * i40iw_send_reset - Send RST packet
+ * @cm_node: connection's node
+ */
+static int i40iw_send_reset(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_puda_buf *sqbuf;
+	int flags = SET_RST | SET_ACK;
+
+	sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, flags);
+	if (!sqbuf) {
+		i40iw_pr_err("no sqbuf\n");
+		return -1;
+	}
+
+	return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 0, 1);
+}
+
+/**
+ * i40iw_active_open_err - send event for active side cm error
+ * @cm_node: connection's node
+ * @reset: Flag to send reset or not
+ */
+static void i40iw_active_open_err(struct i40iw_cm_node *cm_node, bool reset)
+{
+	i40iw_cleanup_retrans_entry(cm_node);
+	cm_node->cm_core->stats_connect_errs++;
+	if (reset) {
+		i40iw_debug(cm_node->dev,
+			    I40IW_DEBUG_CM,
+			    "%s cm_node=%p state=%d\n",
+			    __func__,
+			    cm_node,
+			    cm_node->state);
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+	}
+
+	cm_node->state = I40IW_CM_STATE_CLOSED;
+	i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+}
+
+/**
+ * i40iw_passive_open_err - handle passive side cm error
+ * @cm_node: connection's node
+ * @reset: send reset or just free cm_node
+ */
+static void i40iw_passive_open_err(struct i40iw_cm_node *cm_node, bool reset)
+{
+	i40iw_cleanup_retrans_entry(cm_node);
+	cm_node->cm_core->stats_passive_errs++;
+	cm_node->state = I40IW_CM_STATE_CLOSED;
+	i40iw_debug(cm_node->dev,
+		    I40IW_DEBUG_CM,
+		    "%s cm_node=%p state =%d\n",
+		    __func__,
+		    cm_node,
+		    cm_node->state);
+	if (reset)
+		i40iw_send_reset(cm_node);
+	else
+		i40iw_rem_ref_cm_node(cm_node);
+}
+
+/**
+ * i40iw_event_connect_error - to create connect error event
+ * @event: cm information for connect event
+ */
+static void i40iw_event_connect_error(struct i40iw_cm_event *event)
+{
+	struct i40iw_qp *iwqp;
+	struct iw_cm_id *cm_id;
+
+	cm_id = event->cm_node->cm_id;
+	if (!cm_id)
+		return;
+
+	iwqp = cm_id->provider_data;
+
+	if (!iwqp || !iwqp->iwdev)
+		return;
+
+	iwqp->cm_id = NULL;
+	cm_id->provider_data = NULL;
+	i40iw_send_cm_event(event->cm_node, cm_id,
+			    IW_CM_EVENT_CONNECT_REPLY,
+			    -ECONNRESET);
+	cm_id->rem_ref(cm_id);
+	i40iw_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * i40iw_process_options
+ * @cm_node: connection's node
+ * @optionsloc: point to start of options
+ * @optionsize: size of all options
+ * @syn_packet: flag if syn packet
+ */
+static int i40iw_process_options(struct i40iw_cm_node *cm_node,
+				 u8 *optionsloc,
+				 u32 optionsize,
+				 u32 syn_packet)
+{
+	u32 tmp;
+	u32 offset = 0;
+	union all_known_options *all_options;
+	char got_mss_option = 0;
+
+	while (offset < optionsize) {
+		all_options = (union all_known_options *)(optionsloc + offset);
+		switch (all_options->as_base.optionnum) {
+		case OPTION_NUMBER_END:
+			offset = optionsize;
+			break;
+		case OPTION_NUMBER_NONE:
+			offset += 1;
+			continue;
+		case OPTION_NUMBER_MSS:
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "%s: MSS Length: %d Offset: %d Size: %d\n",
+				    __func__,
+				    all_options->as_mss.length,
+				    offset,
+				    optionsize);
+			got_mss_option = 1;
+			if (all_options->as_mss.length != 4)
+				return -1;
+			tmp = ntohs(all_options->as_mss.mss);
+			if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss)
+				cm_node->tcp_cntxt.mss = tmp;
+			break;
+		case OPTION_NUMBER_WINDOW_SCALE:
+			cm_node->tcp_cntxt.snd_wscale =
+			    all_options->as_windowscale.shiftcount;
+			break;
+		default:
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "TCP Option not understood: %x\n",
+				    all_options->as_base.optionnum);
+			break;
+		}
+		offset += all_options->as_base.length;
+	}
+	if (!got_mss_option && syn_packet)
+		cm_node->tcp_cntxt.mss = I40IW_CM_DEFAULT_MSS;
+	return 0;
+}
+
+/**
+ * i40iw_handle_tcp_options -
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ * @optionsize: size of options rcvd
+ * @passive: active or passive flag
+ */
+static int i40iw_handle_tcp_options(struct i40iw_cm_node *cm_node,
+				    struct tcphdr *tcph,
+				    int optionsize,
+				    int passive)
+{
+	u8 *optionsloc = (u8 *)&tcph[1];
+
+	if (optionsize) {
+		if (i40iw_process_options(cm_node,
+					  optionsloc,
+					  optionsize,
+					  (u32)tcph->syn)) {
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "%s: Node %p, Sending RESET\n",
+				    __func__,
+				    cm_node);
+			if (passive)
+				i40iw_passive_open_err(cm_node, true);
+			else
+				i40iw_active_open_err(cm_node, true);
+			return -1;
+		}
+	}
+
+	cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
+	    cm_node->tcp_cntxt.snd_wscale;
+
+	if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
+		cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+	return 0;
+}
+
+/**
+ * i40iw_build_mpa_v1 - build a MPA V1 frame
+ * @cm_node: connection's node
+ * @mpa_key: to do read0 or write0
+ */
+static void i40iw_build_mpa_v1(struct i40iw_cm_node *cm_node,
+			       void *start_addr,
+			       u8 mpa_key)
+{
+	struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
+
+	switch (mpa_key) {
+	case MPA_KEY_REQUEST:
+		memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
+		break;
+	case MPA_KEY_REPLY:
+		memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
+		break;
+	default:
+		break;
+	}
+	mpa_frame->flags = IETF_MPA_FLAGS_CRC;
+	mpa_frame->rev = cm_node->mpa_frame_rev;
+	mpa_frame->priv_data_len = htons(cm_node->pdata.size);
+}
+
+/**
+ * i40iw_build_mpa_v2 - build a MPA V2 frame
+ * @cm_node: connection's node
+ * @start_addr: buffer start address
+ * @mpa_key: to do read0 or write0
+ */
+static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
+			       void *start_addr,
+			       u8 mpa_key)
+{
+	struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
+	struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
+
+	/* initialize the upper 5 bytes of the frame */
+	i40iw_build_mpa_v1(cm_node, start_addr, mpa_key);
+	mpa_frame->flags |= IETF_MPA_V2_FLAG;
+	mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
+
+	/* initialize RTR msg */
+	if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+		rtr_msg->ctrl_ird = IETF_NO_IRD_ORD;
+		rtr_msg->ctrl_ord = IETF_NO_IRD_ORD;
+	} else {
+		rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
+			IETF_NO_IRD_ORD : cm_node->ird_size;
+		rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
+			IETF_NO_IRD_ORD : cm_node->ord_size;
+	}
+
+	rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER;
+	rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN;
+
+	switch (mpa_key) {
+	case MPA_KEY_REQUEST:
+		rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+		rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+		break;
+	case MPA_KEY_REPLY:
+		switch (cm_node->send_rdma0_op) {
+		case SEND_RDMA_WRITE_ZERO:
+			rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+			break;
+		case SEND_RDMA_READ_ZERO:
+			rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+	rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird);
+	rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord);
+}
+
+/**
+ * i40iw_cm_build_mpa_frame - build mpa frame for mpa version 1 or version 2
+ * @cm_node: connection's node
+ * @mpa: mpa: data buffer
+ * @mpa_key: to do read0 or write0
+ */
+static int i40iw_cm_build_mpa_frame(struct i40iw_cm_node *cm_node,
+				    struct i40iw_kmem_info *mpa,
+				    u8 mpa_key)
+{
+	int hdr_len = 0;
+
+	switch (cm_node->mpa_frame_rev) {
+	case IETF_MPA_V1:
+		hdr_len = sizeof(struct ietf_mpa_v1);
+		i40iw_build_mpa_v1(cm_node, mpa->addr, mpa_key);
+		break;
+	case IETF_MPA_V2:
+		hdr_len = sizeof(struct ietf_mpa_v2);
+		i40iw_build_mpa_v2(cm_node, mpa->addr, mpa_key);
+		break;
+	default:
+		break;
+	}
+
+	return hdr_len;
+}
+
+/**
+ * i40iw_send_mpa_request - active node send mpa request to passive node
+ * @cm_node: connection's node
+ */
+static int i40iw_send_mpa_request(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_puda_buf *sqbuf;
+
+	if (!cm_node) {
+		i40iw_pr_err("cm_node == NULL\n");
+		return -1;
+	}
+
+	cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
+	cm_node->mpa_hdr.size = i40iw_cm_build_mpa_frame(cm_node,
+							 &cm_node->mpa_hdr,
+							 MPA_KEY_REQUEST);
+	if (!cm_node->mpa_hdr.size) {
+		i40iw_pr_err("mpa size = %d\n", cm_node->mpa_hdr.size);
+		return -1;
+	}
+
+	sqbuf = i40iw_form_cm_frame(cm_node,
+				    NULL,
+				    &cm_node->mpa_hdr,
+				    &cm_node->pdata,
+				    SET_ACK);
+	if (!sqbuf) {
+		i40iw_pr_err("sq_buf == NULL\n");
+		return -1;
+	}
+	return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * i40iw_send_mpa_reject -
+ * @cm_node: connection's node
+ * @pdata: reject data for connection
+ * @plen: length of reject data
+ */
+static int i40iw_send_mpa_reject(struct i40iw_cm_node *cm_node,
+				 const void *pdata,
+				 u8 plen)
+{
+	struct i40iw_puda_buf *sqbuf;
+	struct i40iw_kmem_info priv_info;
+
+	cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
+	cm_node->mpa_hdr.size = i40iw_cm_build_mpa_frame(cm_node,
+							 &cm_node->mpa_hdr,
+							 MPA_KEY_REPLY);
+
+	cm_node->mpa_frame.flags |= IETF_MPA_FLAGS_REJECT;
+	priv_info.addr = (void *)pdata;
+	priv_info.size = plen;
+
+	sqbuf = i40iw_form_cm_frame(cm_node,
+				    NULL,
+				    &cm_node->mpa_hdr,
+				    &priv_info,
+				    SET_ACK | SET_FIN);
+	if (!sqbuf) {
+		i40iw_pr_err("no sqbuf\n");
+		return -ENOMEM;
+	}
+	cm_node->state = I40IW_CM_STATE_FIN_WAIT1;
+	return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * recv_mpa - process an IETF MPA frame
+ * @cm_node: connection's node
+ * @buffer: Data pointer
+ * @type: to return accept or reject
+ * @len: Len of mpa buffer
+ */
+static int i40iw_parse_mpa(struct i40iw_cm_node *cm_node, u8 *buffer, u32 *type, u32 len)
+{
+	struct ietf_mpa_v1 *mpa_frame;
+	struct ietf_mpa_v2 *mpa_v2_frame;
+	struct ietf_rtr_msg *rtr_msg;
+	int mpa_hdr_len;
+	int priv_data_len;
+
+	*type = I40IW_MPA_REQUEST_ACCEPT;
+
+	if (len < sizeof(struct ietf_mpa_v1)) {
+		i40iw_pr_err("ietf buffer small (%x)\n", len);
+		return -1;
+	}
+
+	mpa_frame = (struct ietf_mpa_v1 *)buffer;
+	mpa_hdr_len = sizeof(struct ietf_mpa_v1);
+	priv_data_len = ntohs(mpa_frame->priv_data_len);
+
+	if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
+		i40iw_pr_err("large pri_data %d\n", priv_data_len);
+		return -1;
+	}
+	if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
+		i40iw_pr_err("unsupported mpa rev = %d\n", mpa_frame->rev);
+		return -1;
+	}
+	if (mpa_frame->rev > cm_node->mpa_frame_rev) {
+		i40iw_pr_err("rev %d\n", mpa_frame->rev);
+		return -1;
+	}
+	cm_node->mpa_frame_rev = mpa_frame->rev;
+
+	if (cm_node->state != I40IW_CM_STATE_MPAREQ_SENT) {
+		if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
+			i40iw_pr_err("Unexpected MPA Key received\n");
+			return -1;
+		}
+	} else {
+		if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
+			i40iw_pr_err("Unexpected MPA Key received\n");
+			return -1;
+		}
+	}
+
+	if (priv_data_len + mpa_hdr_len > len) {
+		i40iw_pr_err("ietf buffer len(%x + %x != %x)\n",
+			     priv_data_len, mpa_hdr_len, len);
+		return -1;
+	}
+	if (len > MAX_CM_BUFFER) {
+		i40iw_pr_err("ietf buffer large len = %d\n", len);
+		return -1;
+	}
+
+	switch (mpa_frame->rev) {
+	case IETF_MPA_V2:{
+			u16 ird_size;
+			u16 ord_size;
+			u16 ctrl_ord;
+			u16 ctrl_ird;
+
+			mpa_v2_frame = (struct ietf_mpa_v2 *)buffer;
+			mpa_hdr_len += IETF_RTR_MSG_SIZE;
+			rtr_msg = &mpa_v2_frame->rtr_msg;
+
+			/* parse rtr message */
+			ctrl_ord = ntohs(rtr_msg->ctrl_ord);
+			ctrl_ird = ntohs(rtr_msg->ctrl_ird);
+			ird_size = ctrl_ird & IETF_NO_IRD_ORD;
+			ord_size = ctrl_ord & IETF_NO_IRD_ORD;
+
+			if (!(ctrl_ird & IETF_PEER_TO_PEER))
+				return -1;
+
+			if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) {
+				cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
+				goto negotiate_done;
+			}
+
+			if (cm_node->state != I40IW_CM_STATE_MPAREQ_SENT) {
+				/* responder */
+				if (!ord_size && (ctrl_ord & IETF_RDMA0_READ))
+					cm_node->ird_size = 1;
+				if (cm_node->ord_size > ird_size)
+					cm_node->ord_size = ird_size;
+			} else {
+				/* initiator */
+				if (!ird_size && (ctrl_ord & IETF_RDMA0_READ))
+					return -1;
+				if (cm_node->ord_size > ird_size)
+					cm_node->ord_size = ird_size;
+
+				if (cm_node->ird_size < ord_size)
+					/* no resources available */
+					return -1;
+			}
+
+negotiate_done:
+			if (ctrl_ord & IETF_RDMA0_READ)
+				cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+			else if (ctrl_ord & IETF_RDMA0_WRITE)
+				cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
+			else	/* Not supported RDMA0 operation */
+				return -1;
+			i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
+				    "MPAV2: Negotiated ORD: %d, IRD: %d\n",
+				    cm_node->ord_size, cm_node->ird_size);
+			break;
+		}
+		break;
+	case IETF_MPA_V1:
+	default:
+		break;
+	}
+
+	memcpy(cm_node->pdata_buf, buffer + mpa_hdr_len, priv_data_len);
+	cm_node->pdata.size = priv_data_len;
+
+	if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
+		*type = I40IW_MPA_REQUEST_REJECT;
+
+	if (mpa_frame->flags & IETF_MPA_FLAGS_MARKERS)
+		cm_node->snd_mark_en = true;
+
+	return 0;
+}
+
+/**
+ * i40iw_schedule_cm_timer
+ * @@cm_node: connection's node
+ * @sqbuf: buffer to send
+ * @type: if it es send ot close
+ * @send_retrans: if rexmits to be done
+ * @close_when_complete: is cm_node to be removed
+ *
+ * note - cm_node needs to be protected before calling this. Encase in:
+ *		i40iw_rem_ref_cm_node(cm_core, cm_node);
+ *		i40iw_schedule_cm_timer(...)
+ *		atomic_inc(&cm_node->ref_count);
+ */
+int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
+			    struct i40iw_puda_buf *sqbuf,
+			    enum i40iw_timer_type type,
+			    int send_retrans,
+			    int close_when_complete)
+{
+	struct i40iw_sc_dev *dev = cm_node->dev;
+	struct i40iw_cm_core *cm_core = cm_node->cm_core;
+	struct i40iw_timer_entry *new_send;
+	int ret = 0;
+	u32 was_timer_set;
+	unsigned long flags;
+
+	new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
+	if (!new_send) {
+		i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+		return -ENOMEM;
+	}
+	new_send->retrycount = I40IW_DEFAULT_RETRYS;
+	new_send->retranscount = I40IW_DEFAULT_RETRANS;
+	new_send->sqbuf = sqbuf;
+	new_send->timetosend = jiffies;
+	new_send->type = type;
+	new_send->send_retrans = send_retrans;
+	new_send->close_when_complete = close_when_complete;
+
+	if (type == I40IW_TIMER_TYPE_CLOSE) {
+		new_send->timetosend += (HZ / 10);
+		if (cm_node->close_entry) {
+			kfree(new_send);
+			i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+			i40iw_pr_err("already close entry\n");
+			return -EINVAL;
+		}
+		cm_node->close_entry = new_send;
+	}
+
+	if (type == I40IW_TIMER_TYPE_SEND) {
+		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+		cm_node->send_entry = new_send;
+		atomic_inc(&cm_node->ref_count);
+		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+		new_send->timetosend = jiffies + I40IW_RETRY_TIMEOUT;
+
+		atomic_inc(&sqbuf->refcount);
+		i40iw_puda_send_buf(dev->ilq, sqbuf);
+		if (!send_retrans) {
+			i40iw_cleanup_retrans_entry(cm_node);
+			if (close_when_complete)
+				i40iw_rem_ref_cm_node(cm_node);
+			return ret;
+		}
+	}
+
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+	was_timer_set = timer_pending(&cm_core->tcp_timer);
+
+	if (!was_timer_set) {
+		cm_core->tcp_timer.expires = new_send->timetosend;
+		add_timer(&cm_core->tcp_timer);
+	}
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	return ret;
+}
+
+/**
+ * i40iw_retrans_expired - Could not rexmit the packet
+ * @cm_node: connection's node
+ */
+static void i40iw_retrans_expired(struct i40iw_cm_node *cm_node)
+{
+	struct iw_cm_id *cm_id = cm_node->cm_id;
+	enum i40iw_cm_node_state state = cm_node->state;
+
+	cm_node->state = I40IW_CM_STATE_CLOSED;
+	switch (state) {
+	case I40IW_CM_STATE_SYN_RCVD:
+	case I40IW_CM_STATE_CLOSING:
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	case I40IW_CM_STATE_FIN_WAIT1:
+	case I40IW_CM_STATE_LAST_ACK:
+		if (cm_node->cm_id)
+			cm_id->rem_ref(cm_id);
+		i40iw_send_reset(cm_node);
+		break;
+	default:
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+		i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+		break;
+	}
+}
+
+/**
+ * i40iw_handle_close_entry - for handling retry/timeouts
+ * @cm_node: connection's node
+ * @rem_node: flag for remove cm_node
+ */
+static void i40iw_handle_close_entry(struct i40iw_cm_node *cm_node, u32 rem_node)
+{
+	struct i40iw_timer_entry *close_entry = cm_node->close_entry;
+	struct iw_cm_id *cm_id = cm_node->cm_id;
+	struct i40iw_qp *iwqp;
+	unsigned long flags;
+
+	if (!close_entry)
+		return;
+	iwqp = (struct i40iw_qp *)close_entry->sqbuf;
+	if (iwqp) {
+		spin_lock_irqsave(&iwqp->lock, flags);
+		if (iwqp->cm_id) {
+			iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
+			iwqp->hw_iwarp_state = I40IW_QP_STATE_ERROR;
+			iwqp->last_aeq = I40IW_AE_RESET_SENT;
+			iwqp->ibqp_state = IB_QPS_ERR;
+			spin_unlock_irqrestore(&iwqp->lock, flags);
+			i40iw_cm_disconn(iwqp);
+		} else {
+			spin_unlock_irqrestore(&iwqp->lock, flags);
+		}
+	} else if (rem_node) {
+		/* TIME_WAIT state */
+		i40iw_rem_ref_cm_node(cm_node);
+	}
+	if (cm_id)
+		cm_id->rem_ref(cm_id);
+	kfree(close_entry);
+	cm_node->close_entry = NULL;
+}
+
+/**
+ * i40iw_cm_timer_tick - system's timer expired callback
+ * @pass: Pointing to cm_core
+ */
+static void i40iw_cm_timer_tick(unsigned long pass)
+{
+	unsigned long nexttimeout = jiffies + I40IW_LONG_TIME;
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_timer_entry *send_entry, *close_entry;
+	struct list_head *list_core_temp;
+	struct list_head *list_node;
+	struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass;
+	u32 settimer = 0;
+	unsigned long timetosend;
+	struct i40iw_sc_dev *dev;
+	unsigned long flags;
+
+	struct list_head timer_list;
+
+	INIT_LIST_HEAD(&timer_list);
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+	list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
+		cm_node = container_of(list_node, struct i40iw_cm_node, list);
+		if (cm_node->close_entry || cm_node->send_entry) {
+			atomic_inc(&cm_node->ref_count);
+			list_add(&cm_node->timer_entry, &timer_list);
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	list_for_each_safe(list_node, list_core_temp, &timer_list) {
+		cm_node = container_of(list_node,
+				       struct i40iw_cm_node,
+				       timer_entry);
+		close_entry = cm_node->close_entry;
+
+		if (close_entry) {
+			if (time_after(close_entry->timetosend, jiffies)) {
+				if (nexttimeout > close_entry->timetosend ||
+				    !settimer) {
+					nexttimeout = close_entry->timetosend;
+					settimer = 1;
+				}
+			} else {
+				i40iw_handle_close_entry(cm_node, 1);
+			}
+		}
+
+		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+
+		send_entry = cm_node->send_entry;
+		if (!send_entry)
+			goto done;
+		if (time_after(send_entry->timetosend, jiffies)) {
+			if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
+				if ((nexttimeout > send_entry->timetosend) ||
+				    !settimer) {
+					nexttimeout = send_entry->timetosend;
+					settimer = 1;
+				}
+			} else {
+				i40iw_free_retrans_entry(cm_node);
+			}
+			goto done;
+		}
+
+		if ((cm_node->state == I40IW_CM_STATE_OFFLOADED) ||
+		    (cm_node->state == I40IW_CM_STATE_CLOSED)) {
+			i40iw_free_retrans_entry(cm_node);
+			goto done;
+		}
+
+		if (!send_entry->retranscount || !send_entry->retrycount) {
+			i40iw_free_retrans_entry(cm_node);
+
+			spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+			i40iw_retrans_expired(cm_node);
+			cm_node->state = I40IW_CM_STATE_CLOSED;
+			spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+			goto done;
+		}
+		cm_node->cm_core->stats_pkt_retrans++;
+		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+
+		dev = cm_node->dev;
+		atomic_inc(&send_entry->sqbuf->refcount);
+		i40iw_puda_send_buf(dev->ilq, send_entry->sqbuf);
+		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+		if (send_entry->send_retrans) {
+			send_entry->retranscount--;
+			timetosend = (I40IW_RETRY_TIMEOUT <<
+				      (I40IW_DEFAULT_RETRANS -
+				       send_entry->retranscount));
+
+			send_entry->timetosend = jiffies +
+			    min(timetosend, I40IW_MAX_TIMEOUT);
+			if (nexttimeout > send_entry->timetosend || !settimer) {
+				nexttimeout = send_entry->timetosend;
+				settimer = 1;
+			}
+		} else {
+			int close_when_complete;
+
+			close_when_complete = send_entry->close_when_complete;
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "cm_node=%p state=%d\n",
+				    cm_node,
+				    cm_node->state);
+			i40iw_free_retrans_entry(cm_node);
+			if (close_when_complete)
+				i40iw_rem_ref_cm_node(cm_node);
+		}
+done:
+		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+		i40iw_rem_ref_cm_node(cm_node);
+	}
+
+	if (settimer) {
+		spin_lock_irqsave(&cm_core->ht_lock, flags);
+		if (!timer_pending(&cm_core->tcp_timer)) {
+			cm_core->tcp_timer.expires = nexttimeout;
+			add_timer(&cm_core->tcp_timer);
+		}
+		spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+	}
+}
+
+/**
+ * i40iw_send_syn - send SYN packet
+ * @cm_node: connection's node
+ * @sendack: flag to set ACK bit or not
+ */
+int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack)
+{
+	struct i40iw_puda_buf *sqbuf;
+	int flags = SET_SYN;
+	char optionsbuffer[sizeof(struct option_mss) +
+			   sizeof(struct option_windowscale) +
+			   sizeof(struct option_base) + TCP_OPTIONS_PADDING];
+	struct i40iw_kmem_info opts;
+
+	int optionssize = 0;
+	/* Sending MSS option */
+	union all_known_options *options;
+
+	opts.addr = optionsbuffer;
+	if (!cm_node) {
+		i40iw_pr_err("no cm_node\n");
+		return -EINVAL;
+	}
+
+	options = (union all_known_options *)&optionsbuffer[optionssize];
+	options->as_mss.optionnum = OPTION_NUMBER_MSS;
+	options->as_mss.length = sizeof(struct option_mss);
+	options->as_mss.mss = htons(cm_node->tcp_cntxt.mss);
+	optionssize += sizeof(struct option_mss);
+
+	options = (union all_known_options *)&optionsbuffer[optionssize];
+	options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE;
+	options->as_windowscale.length = sizeof(struct option_windowscale);
+	options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
+	optionssize += sizeof(struct option_windowscale);
+	options = (union all_known_options *)&optionsbuffer[optionssize];
+	options->as_end = OPTION_NUMBER_END;
+	optionssize += 1;
+
+	if (sendack)
+		flags |= SET_ACK;
+
+	opts.size = optionssize;
+
+	sqbuf = i40iw_form_cm_frame(cm_node, &opts, NULL, NULL, flags);
+	if (!sqbuf) {
+		i40iw_pr_err("no sqbuf\n");
+		return -1;
+	}
+	return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * i40iw_send_ack - Send ACK packet
+ * @cm_node: connection's node
+ */
+static void i40iw_send_ack(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_puda_buf *sqbuf;
+
+	sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK);
+	if (sqbuf)
+		i40iw_puda_send_buf(cm_node->dev->ilq, sqbuf);
+	else
+		i40iw_pr_err("no sqbuf\n");
+}
+
+/**
+ * i40iw_send_fin - Send FIN pkt
+ * @cm_node: connection's node
+ */
+static int i40iw_send_fin(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_puda_buf *sqbuf;
+
+	sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK | SET_FIN);
+	if (!sqbuf) {
+		i40iw_pr_err("no sqbuf\n");
+		return -1;
+	}
+	return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
+}
+
+/**
+ * i40iw_find_node - find a cm node that matches the reference cm node
+ * @cm_core: cm's core
+ * @rem_port: remote tcp port num
+ * @rem_addr: remote ip addr
+ * @loc_port: local tcp port num
+ * @loc_addr: loc ip addr
+ * @add_refcnt: flag to increment refcount of cm_node
+ */
+struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
+				      u16 rem_port,
+				      u32 *rem_addr,
+				      u16 loc_port,
+				      u32 *loc_addr,
+				      bool add_refcnt)
+{
+	struct list_head *hte;
+	struct i40iw_cm_node *cm_node;
+	unsigned long flags;
+
+	hte = &cm_core->connected_nodes;
+
+	/* walk list and find cm_node associated with this session ID */
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+	list_for_each_entry(cm_node, hte, list) {
+		if (!memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) &&
+		    (cm_node->loc_port == loc_port) &&
+		    !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr)) &&
+		    (cm_node->rem_port == rem_port)) {
+			if (add_refcnt)
+				atomic_inc(&cm_node->ref_count);
+			spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+			return cm_node;
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	/* no owner node */
+	return NULL;
+}
+
+/**
+ * i40iw_find_listener - find a cm node listening on this addr-port pair
+ * @cm_core: cm's core
+ * @dst_port: listener tcp port num
+ * @dst_addr: listener ip addr
+ * @listener_state: state to match with listen node's
+ */
+static struct i40iw_cm_listener *i40iw_find_listener(
+						     struct i40iw_cm_core *cm_core,
+						     u32 *dst_addr,
+						     u16 dst_port,
+						     u16 vlan_id,
+						     enum i40iw_cm_listener_state
+						     listener_state)
+{
+	struct i40iw_cm_listener *listen_node;
+	static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+	u32 listen_addr[4];
+	u16 listen_port;
+	unsigned long flags;
+
+	/* walk list and find cm_node associated with this session ID */
+	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+	list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+		memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
+		listen_port = listen_node->loc_port;
+		/* compare node pair, return node handle if a match */
+		if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) ||
+		     !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) &&
+		     (listen_port == dst_port) &&
+		     (listener_state & listen_node->listener_state)) {
+			atomic_inc(&listen_node->ref_count);
+			spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+			return listen_node;
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+	return NULL;
+}
+
+/**
+ * i40iw_add_hte_node - add a cm node to the hash table
+ * @cm_core: cm's core
+ * @cm_node: connection's node
+ */
+static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
+			       struct i40iw_cm_node *cm_node)
+{
+	struct list_head *hte;
+	unsigned long flags;
+
+	if (!cm_node || !cm_core) {
+		i40iw_pr_err("cm_node or cm_core == NULL\n");
+		return;
+	}
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+	/* get a handle on the hash table element (list head for this slot) */
+	hte = &cm_core->connected_nodes;
+	list_add_tail(&cm_node->list, hte);
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+}
+
+/**
+ * listen_port_in_use - determine if port is in use
+ * @port: Listen port number
+ */
+static bool i40iw_listen_port_in_use(struct i40iw_cm_core *cm_core, u16 port)
+{
+	struct i40iw_cm_listener *listen_node;
+	unsigned long flags;
+	bool ret = false;
+
+	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+	list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+		if (listen_node->loc_port == port) {
+			ret = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+	return ret;
+}
+
+/**
+ * i40iw_del_multiple_qhash - Remove qhash and child listens
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ */
+static enum i40iw_status_code i40iw_del_multiple_qhash(
+						       struct i40iw_device *iwdev,
+						       struct i40iw_cm_info *cm_info,
+						       struct i40iw_cm_listener *cm_parent_listen_node)
+{
+	struct i40iw_cm_listener *child_listen_node;
+	enum i40iw_status_code ret = I40IW_ERR_CONFIG;
+	struct list_head *pos, *tpos;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+	list_for_each_safe(pos, tpos, &cm_parent_listen_node->child_listen_list) {
+		child_listen_node = list_entry(pos, struct i40iw_cm_listener, child_listen_list);
+		if (child_listen_node->ipv4)
+			i40iw_debug(&iwdev->sc_dev,
+				    I40IW_DEBUG_CM,
+				    "removing child listen for IP=%pI4, port=%d, vlan=%d\n",
+				    child_listen_node->loc_addr,
+				    child_listen_node->loc_port,
+				    child_listen_node->vlan_id);
+		else
+			i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
+				    "removing child listen for IP=%pI6, port=%d, vlan=%d\n",
+				    child_listen_node->loc_addr,
+				    child_listen_node->loc_port,
+				    child_listen_node->vlan_id);
+		list_del(pos);
+		memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+		       sizeof(cm_info->loc_addr));
+		cm_info->vlan_id = child_listen_node->vlan_id;
+		ret = i40iw_manage_qhash(iwdev, cm_info,
+					 I40IW_QHASH_TYPE_TCP_SYN,
+					 I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false);
+		kfree(child_listen_node);
+		cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
+		i40iw_debug(&iwdev->sc_dev,
+			    I40IW_DEBUG_CM,
+			    "freed pointer = %p\n",
+			    child_listen_node);
+	}
+	spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+
+	return ret;
+}
+
+/**
+ * i40iw_netdev_vlan_ipv6 - Gets the netdev and mac
+ * @addr: local IPv6 address
+ * @vlan_id: vlan id for the given IPv6 address
+ * @mac: mac address for the given IPv6 address
+ *
+ * Returns the net_device of the IPv6 address and also sets the
+ * vlan id and mac for that address.
+ */
+static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
+{
+	struct net_device *ip_dev = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct in6_addr laddr6;
+
+	i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
+	if (vlan_id)
+		*vlan_id = I40IW_NO_VLAN;
+	if (mac)
+		eth_zero_addr(mac);
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, ip_dev) {
+		if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) {
+			if (vlan_id)
+				*vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+			if (ip_dev->dev_addr && mac)
+				ether_addr_copy(mac, ip_dev->dev_addr);
+			break;
+		}
+	}
+	rcu_read_unlock();
+#endif
+	return ip_dev;
+}
+
+/**
+ * i40iw_get_vlan_ipv4 - Returns the vlan_id for IPv4 address
+ * @addr: local IPv4 address
+ */
+static u16 i40iw_get_vlan_ipv4(u32 *addr)
+{
+	struct net_device *netdev;
+	u16 vlan_id = I40IW_NO_VLAN;
+
+	netdev = ip_dev_find(&init_net, htonl(addr[0]));
+	if (netdev) {
+		vlan_id = rdma_vlan_dev_vlan_id(netdev);
+		dev_put(netdev);
+	}
+	return vlan_id;
+}
+
+/**
+ * i40iw_add_mqh_6 - Adds multiple qhashes for IPv6
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv6 address
+ * on the adapter and adds the associated qhash filter
+ */
+static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
+					      struct i40iw_cm_info *cm_info,
+					      struct i40iw_cm_listener *cm_parent_listen_node)
+{
+	struct net_device *ip_dev;
+	struct inet6_dev *idev;
+	struct inet6_ifaddr *ifp;
+	enum i40iw_status_code ret = 0;
+	struct i40iw_cm_listener *child_listen_node;
+	unsigned long flags;
+
+	rtnl_lock();
+	for_each_netdev_rcu(&init_net, ip_dev) {
+		if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
+		      (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
+		     (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
+			idev = __in6_dev_get(ip_dev);
+			if (!idev) {
+				i40iw_pr_err("idev == NULL\n");
+				break;
+			}
+			list_for_each_entry(ifp, &idev->addr_list, if_list) {
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "IP=%pI6, vlan_id=%d, MAC=%pM\n",
+					    &ifp->addr,
+					    rdma_vlan_dev_vlan_id(ip_dev),
+					    ip_dev->dev_addr);
+				child_listen_node =
+					kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "Allocating child listener %p\n",
+					    child_listen_node);
+				if (!child_listen_node) {
+					i40iw_pr_err("listener memory allocation\n");
+					ret = I40IW_ERR_NO_MEMORY;
+					goto exit;
+				}
+				cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+				cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+
+				memcpy(child_listen_node, cm_parent_listen_node,
+				       sizeof(*child_listen_node));
+
+				i40iw_copy_ip_ntohl(child_listen_node->loc_addr,
+						    ifp->addr.in6_u.u6_addr32);
+				memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+				       sizeof(cm_info->loc_addr));
+
+				ret = i40iw_manage_qhash(iwdev, cm_info,
+							 I40IW_QHASH_TYPE_TCP_SYN,
+							 I40IW_QHASH_MANAGE_TYPE_ADD,
+							 NULL, true);
+				if (!ret) {
+					spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+					list_add(&child_listen_node->child_listen_list,
+						 &cm_parent_listen_node->child_listen_list);
+					spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+					cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+				} else {
+					kfree(child_listen_node);
+				}
+			}
+		}
+	}
+exit:
+	rtnl_unlock();
+	return ret;
+}
+
+/**
+ * i40iw_add_mqh_4 - Adds multiple qhashes for IPv4
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv4 address
+ * on the adapter and adds the associated qhash filter
+ */
+static enum i40iw_status_code i40iw_add_mqh_4(
+				struct i40iw_device *iwdev,
+				struct i40iw_cm_info *cm_info,
+				struct i40iw_cm_listener *cm_parent_listen_node)
+{
+	struct net_device *dev;
+	struct in_device *idev;
+	struct i40iw_cm_listener *child_listen_node;
+	enum i40iw_status_code ret = 0;
+	unsigned long flags;
+
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		if ((((rdma_vlan_dev_vlan_id(dev) < I40IW_NO_VLAN) &&
+		      (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
+		    (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
+			idev = in_dev_get(dev);
+			for_ifa(idev) {
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n",
+					    &ifa->ifa_address,
+					    rdma_vlan_dev_vlan_id(dev),
+					    dev->dev_addr);
+				child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+				cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "Allocating child listener %p\n",
+					    child_listen_node);
+				if (!child_listen_node) {
+					i40iw_pr_err("listener memory allocation\n");
+					in_dev_put(idev);
+					ret = I40IW_ERR_NO_MEMORY;
+					goto exit;
+				}
+				cm_info->vlan_id = rdma_vlan_dev_vlan_id(dev);
+				cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+				memcpy(child_listen_node,
+				       cm_parent_listen_node,
+				       sizeof(*child_listen_node));
+
+				child_listen_node->loc_addr[0] = ntohl(ifa->ifa_address);
+				memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+				       sizeof(cm_info->loc_addr));
+
+				ret = i40iw_manage_qhash(iwdev,
+							 cm_info,
+							 I40IW_QHASH_TYPE_TCP_SYN,
+							 I40IW_QHASH_MANAGE_TYPE_ADD,
+							 NULL,
+							 true);
+				if (!ret) {
+					spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+					list_add(&child_listen_node->child_listen_list,
+						 &cm_parent_listen_node->child_listen_list);
+					spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+				} else {
+					kfree(child_listen_node);
+					cm_parent_listen_node->cm_core->stats_listen_nodes_created--;
+				}
+			}
+			endfor_ifa(idev);
+			in_dev_put(idev);
+		}
+	}
+exit:
+	rtnl_unlock();
+	return ret;
+}
+
+/**
+ * i40iw_dec_refcnt_listen - delete listener and associated cm nodes
+ * @cm_core: cm's core
+ * @free_hanging_nodes: to free associated cm_nodes
+ * @apbvt_del: flag to delete the apbvt
+ */
+static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
+				   struct i40iw_cm_listener *listener,
+				   int free_hanging_nodes, bool apbvt_del)
+{
+	int ret = -EINVAL;
+	int err = 0;
+	struct list_head *list_pos;
+	struct list_head *list_temp;
+	struct i40iw_cm_node *cm_node;
+	struct list_head reset_list;
+	struct i40iw_cm_info nfo;
+	struct i40iw_cm_node *loopback;
+	enum i40iw_cm_node_state old_state;
+	unsigned long flags;
+
+	/* free non-accelerated child nodes for this listener */
+	INIT_LIST_HEAD(&reset_list);
+	if (free_hanging_nodes) {
+		spin_lock_irqsave(&cm_core->ht_lock, flags);
+		list_for_each_safe(list_pos, list_temp, &cm_core->connected_nodes) {
+			cm_node = container_of(list_pos, struct i40iw_cm_node, list);
+			if ((cm_node->listener == listener) && !cm_node->accelerated) {
+				atomic_inc(&cm_node->ref_count);
+				list_add(&cm_node->reset_entry, &reset_list);
+			}
+		}
+		spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+	}
+
+	list_for_each_safe(list_pos, list_temp, &reset_list) {
+		cm_node = container_of(list_pos, struct i40iw_cm_node, reset_entry);
+		loopback = cm_node->loopbackpartner;
+		if (cm_node->state >= I40IW_CM_STATE_FIN_WAIT1) {
+			i40iw_rem_ref_cm_node(cm_node);
+		} else {
+			if (!loopback) {
+				i40iw_cleanup_retrans_entry(cm_node);
+				err = i40iw_send_reset(cm_node);
+				if (err) {
+					cm_node->state = I40IW_CM_STATE_CLOSED;
+					i40iw_pr_err("send reset\n");
+				} else {
+					old_state = cm_node->state;
+					cm_node->state = I40IW_CM_STATE_LISTENER_DESTROYED;
+					if (old_state != I40IW_CM_STATE_MPAREQ_RCVD)
+						i40iw_rem_ref_cm_node(cm_node);
+				}
+			} else {
+				struct i40iw_cm_event event;
+
+				event.cm_node = loopback;
+				memcpy(event.cm_info.rem_addr,
+				       loopback->rem_addr, sizeof(event.cm_info.rem_addr));
+				memcpy(event.cm_info.loc_addr,
+				       loopback->loc_addr, sizeof(event.cm_info.loc_addr));
+				event.cm_info.rem_port = loopback->rem_port;
+				event.cm_info.loc_port = loopback->loc_port;
+				event.cm_info.cm_id = loopback->cm_id;
+				event.cm_info.ipv4 = loopback->ipv4;
+				atomic_inc(&loopback->ref_count);
+				loopback->state = I40IW_CM_STATE_CLOSED;
+				i40iw_event_connect_error(&event);
+				cm_node->state = I40IW_CM_STATE_LISTENER_DESTROYED;
+				i40iw_rem_ref_cm_node(cm_node);
+			}
+		}
+	}
+
+	if (!atomic_dec_return(&listener->ref_count)) {
+		spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+		list_del(&listener->list);
+		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+		if (listener->iwdev) {
+			if (apbvt_del && !i40iw_listen_port_in_use(cm_core, listener->loc_port))
+				i40iw_manage_apbvt(listener->iwdev,
+						   listener->loc_port,
+						   I40IW_MANAGE_APBVT_DEL);
+
+			memcpy(nfo.loc_addr, listener->loc_addr, sizeof(nfo.loc_addr));
+			nfo.loc_port = listener->loc_port;
+			nfo.ipv4 = listener->ipv4;
+			nfo.vlan_id = listener->vlan_id;
+
+			if (!list_empty(&listener->child_listen_list)) {
+				i40iw_del_multiple_qhash(listener->iwdev, &nfo, listener);
+			} else {
+				if (listener->qhash_set)
+					i40iw_manage_qhash(listener->iwdev,
+							   &nfo,
+							   I40IW_QHASH_TYPE_TCP_SYN,
+							   I40IW_QHASH_MANAGE_TYPE_DELETE,
+							   NULL,
+							   false);
+			}
+		}
+
+		cm_core->stats_listen_destroyed++;
+		kfree(listener);
+		cm_core->stats_listen_nodes_destroyed++;
+		listener = NULL;
+		ret = 0;
+	}
+
+	if (listener) {
+		if (atomic_read(&listener->pend_accepts_cnt) > 0)
+			i40iw_debug(cm_core->dev,
+				    I40IW_DEBUG_CM,
+				    "%s: listener (%p) pending accepts=%u\n",
+				    __func__,
+				    listener,
+				    atomic_read(&listener->pend_accepts_cnt));
+	}
+
+	return ret;
+}
+
+/**
+ * i40iw_cm_del_listen - delete a linstener
+ * @cm_core: cm's core
+  * @listener: passive connection's listener
+ * @apbvt_del: flag to delete apbvt
+ */
+static int i40iw_cm_del_listen(struct i40iw_cm_core *cm_core,
+			       struct i40iw_cm_listener *listener,
+			       bool apbvt_del)
+{
+	listener->listener_state = I40IW_CM_LISTENER_PASSIVE_STATE;
+	listener->cm_id = NULL;	/* going to be destroyed pretty soon */
+	return i40iw_dec_refcnt_listen(cm_core, listener, 1, apbvt_del);
+}
+
+/**
+ * i40iw_addr_resolve_neigh - resolve neighbor address
+ * @iwdev: iwarp device structure
+ * @src_ip: local ip address
+ * @dst_ip: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev,
+				    u32 src_ip,
+				    u32 dst_ip,
+				    int arpindex)
+{
+	struct rtable *rt;
+	struct neighbour *neigh;
+	int rc = arpindex;
+	struct net_device *netdev = iwdev->netdev;
+	__be32 dst_ipaddr = htonl(dst_ip);
+	__be32 src_ipaddr = htonl(src_ip);
+
+	rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0);
+	if (IS_ERR(rt)) {
+		i40iw_pr_err("ip_route_output\n");
+		return rc;
+	}
+
+	if (netif_is_bond_slave(netdev))
+		netdev = netdev_master_upper_dev_get(netdev);
+
+	neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
+
+	rcu_read_lock();
+	if (neigh) {
+		if (neigh->nud_state & NUD_VALID) {
+			if (arpindex >= 0) {
+				if (ether_addr_equal(iwdev->arp_table[arpindex].mac_addr,
+						     neigh->ha))
+					/* Mac address same as arp table */
+					goto resolve_neigh_exit;
+				i40iw_manage_arp_cache(iwdev,
+						       iwdev->arp_table[arpindex].mac_addr,
+						       &dst_ip,
+						       true,
+						       I40IW_ARP_DELETE);
+			}
+
+			i40iw_manage_arp_cache(iwdev, neigh->ha, &dst_ip, true, I40IW_ARP_ADD);
+			rc = i40iw_arp_table(iwdev, &dst_ip, true, NULL, I40IW_ARP_RESOLVE);
+		} else {
+			neigh_event_send(neigh, NULL);
+		}
+	}
+ resolve_neigh_exit:
+
+	rcu_read_unlock();
+	if (neigh)
+		neigh_release(neigh);
+
+	ip_rt_put(rt);
+	return rc;
+}
+
+/**
+ * i40iw_get_dst_ipv6
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr,
+					    struct sockaddr_in6 *dst_addr)
+{
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.daddr = dst_addr->sin6_addr;
+	fl6.saddr = src_addr->sin6_addr;
+	if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+		fl6.flowi6_oif = dst_addr->sin6_scope_id;
+
+	dst = ip6_route_output(&init_net, NULL, &fl6);
+	return dst;
+}
+#endif
+
+/**
+ * i40iw_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address
+ * @iwdev: iwarp device structure
+ * @dst_ip: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
+					 u32 *src,
+					 u32 *dest,
+					 int arpindex)
+{
+	struct neighbour *neigh;
+	int rc = arpindex;
+	struct net_device *netdev = iwdev->netdev;
+	struct dst_entry *dst;
+	struct sockaddr_in6 dst_addr;
+	struct sockaddr_in6 src_addr;
+
+	memset(&dst_addr, 0, sizeof(dst_addr));
+	dst_addr.sin6_family = AF_INET6;
+	i40iw_copy_ip_htonl(dst_addr.sin6_addr.in6_u.u6_addr32, dest);
+	memset(&src_addr, 0, sizeof(src_addr));
+	src_addr.sin6_family = AF_INET6;
+	i40iw_copy_ip_htonl(src_addr.sin6_addr.in6_u.u6_addr32, src);
+	dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr);
+	if (!dst || dst->error) {
+		if (dst) {
+			dst_release(dst);
+			i40iw_pr_err("ip6_route_output returned dst->error = %d\n",
+				     dst->error);
+		}
+		return rc;
+	}
+
+	if (netif_is_bond_slave(netdev))
+		netdev = netdev_master_upper_dev_get(netdev);
+
+	neigh = dst_neigh_lookup(dst, &dst_addr);
+
+	rcu_read_lock();
+	if (neigh) {
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "dst_neigh_lookup MAC=%pM\n", neigh->ha);
+		if (neigh->nud_state & NUD_VALID) {
+			if (arpindex >= 0) {
+				if (ether_addr_equal
+				    (iwdev->arp_table[arpindex].mac_addr,
+				     neigh->ha)) {
+					/* Mac address same as in arp table */
+					goto resolve_neigh_exit6;
+				}
+				i40iw_manage_arp_cache(iwdev,
+						       iwdev->arp_table[arpindex].mac_addr,
+						       dest,
+						       false,
+						       I40IW_ARP_DELETE);
+			}
+			i40iw_manage_arp_cache(iwdev,
+					       neigh->ha,
+					       dest,
+					       false,
+					       I40IW_ARP_ADD);
+			rc = i40iw_arp_table(iwdev,
+					     dest,
+					     false,
+					     NULL,
+					     I40IW_ARP_RESOLVE);
+		} else {
+			neigh_event_send(neigh, NULL);
+		}
+	}
+
+ resolve_neigh_exit6:
+	rcu_read_unlock();
+	if (neigh)
+		neigh_release(neigh);
+	dst_release(dst);
+	return rc;
+}
+#endif
+
+/**
+ * i40iw_ipv4_is_loopback - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+static bool i40iw_ipv4_is_loopback(u32 loc_addr, u32 rem_addr)
+{
+	return ipv4_is_loopback(htonl(rem_addr)) || (loc_addr == rem_addr);
+}
+
+/**
+ * i40iw_ipv6_is_loopback - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+static bool i40iw_ipv6_is_loopback(u32 *loc_addr, u32 *rem_addr)
+{
+	struct in6_addr raddr6;
+
+	i40iw_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr);
+	return (!memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6));
+}
+
+/**
+ * i40iw_make_cm_node - create a new instance of a cm node
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ * @listener: passive connection's listener
+ */
+static struct i40iw_cm_node *i40iw_make_cm_node(
+				   struct i40iw_cm_core *cm_core,
+				   struct i40iw_device *iwdev,
+				   struct i40iw_cm_info *cm_info,
+				   struct i40iw_cm_listener *listener)
+{
+	struct i40iw_cm_node *cm_node;
+	struct timespec ts;
+	int oldarpindex;
+	int arpindex;
+	struct net_device *netdev = iwdev->netdev;
+
+	/* create an hte and cm_node for this instance */
+	cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
+	if (!cm_node)
+		return NULL;
+
+	/* set our node specific transport info */
+	cm_node->ipv4 = cm_info->ipv4;
+	cm_node->vlan_id = cm_info->vlan_id;
+	memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
+	memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
+	cm_node->loc_port = cm_info->loc_port;
+	cm_node->rem_port = cm_info->rem_port;
+
+	cm_node->mpa_frame_rev = iwdev->mpa_version;
+	cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+	cm_node->ird_size = I40IW_MAX_IRD_SIZE;
+	cm_node->ord_size = I40IW_MAX_ORD_SIZE;
+
+	cm_node->listener = listener;
+	cm_node->cm_id = cm_info->cm_id;
+	ether_addr_copy(cm_node->loc_mac, netdev->dev_addr);
+	spin_lock_init(&cm_node->retrans_list_lock);
+
+	atomic_set(&cm_node->ref_count, 1);
+	/* associate our parent CM core */
+	cm_node->cm_core = cm_core;
+	cm_node->tcp_cntxt.loc_id = I40IW_CM_DEF_LOCAL_ID;
+	cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
+	cm_node->tcp_cntxt.rcv_wnd =
+			I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
+	ts = current_kernel_time();
+	cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
+	cm_node->tcp_cntxt.mss = iwdev->mss;
+
+	cm_node->iwdev = iwdev;
+	cm_node->dev = &iwdev->sc_dev;
+
+	if ((cm_node->ipv4 &&
+	     i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
+	     (!cm_node->ipv4 && i40iw_ipv6_is_loopback(cm_node->loc_addr,
+						       cm_node->rem_addr))) {
+		arpindex = i40iw_arp_table(iwdev,
+					   cm_node->rem_addr,
+					   false,
+					   NULL,
+					   I40IW_ARP_RESOLVE);
+	} else {
+		oldarpindex = i40iw_arp_table(iwdev,
+					      cm_node->rem_addr,
+					      false,
+					      NULL,
+					      I40IW_ARP_RESOLVE);
+		if (cm_node->ipv4)
+			arpindex = i40iw_addr_resolve_neigh(iwdev,
+							    cm_info->loc_addr[0],
+							    cm_info->rem_addr[0],
+							    oldarpindex);
+#if IS_ENABLED(CONFIG_IPV6)
+		else
+			arpindex = i40iw_addr_resolve_neigh_ipv6(iwdev,
+								 cm_info->loc_addr,
+								 cm_info->rem_addr,
+								 oldarpindex);
+#endif
+	}
+	if (arpindex < 0) {
+		i40iw_pr_err("cm_node arpindex\n");
+		kfree(cm_node);
+		return NULL;
+	}
+	ether_addr_copy(cm_node->rem_mac, iwdev->arp_table[arpindex].mac_addr);
+	i40iw_add_hte_node(cm_core, cm_node);
+	cm_core->stats_nodes_created++;
+	return cm_node;
+}
+
+/**
+ * i40iw_rem_ref_cm_node - destroy an instance of a cm node
+ * @cm_node: connection's node
+ */
+static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_cm_core *cm_core = cm_node->cm_core;
+	struct i40iw_qp *iwqp;
+	struct i40iw_cm_info nfo;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags);
+	if (atomic_dec_return(&cm_node->ref_count)) {
+		spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+		return;
+	}
+	list_del(&cm_node->list);
+	spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
+
+	/* if the node is destroyed before connection was accelerated */
+	if (!cm_node->accelerated && cm_node->accept_pend) {
+		pr_err("node destroyed before established\n");
+		atomic_dec(&cm_node->listener->pend_accepts_cnt);
+	}
+	if (cm_node->close_entry)
+		i40iw_handle_close_entry(cm_node, 0);
+	if (cm_node->listener) {
+		i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
+	} else {
+		if (!i40iw_listen_port_in_use(cm_core, htons(cm_node->loc_port)) &&
+		    cm_node->apbvt_set && cm_node->iwdev) {
+			i40iw_manage_apbvt(cm_node->iwdev,
+					   cm_node->loc_port,
+					   I40IW_MANAGE_APBVT_DEL);
+			i40iw_get_addr_info(cm_node, &nfo);
+			if (cm_node->qhash_set) {
+				i40iw_manage_qhash(cm_node->iwdev,
+						   &nfo,
+						   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+						   I40IW_QHASH_MANAGE_TYPE_DELETE,
+						   NULL,
+						   false);
+				cm_node->qhash_set = 0;
+			}
+		}
+	}
+
+	iwqp = cm_node->iwqp;
+	if (iwqp) {
+		iwqp->cm_node = NULL;
+		i40iw_rem_ref(&iwqp->ibqp);
+		cm_node->iwqp = NULL;
+	} else if (cm_node->qhash_set) {
+		i40iw_get_addr_info(cm_node, &nfo);
+		i40iw_manage_qhash(cm_node->iwdev,
+				   &nfo,
+				   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+				   I40IW_QHASH_MANAGE_TYPE_DELETE,
+				   NULL,
+				   false);
+		cm_node->qhash_set = 0;
+	}
+
+	cm_node->cm_core->stats_nodes_destroyed++;
+	kfree(cm_node);
+}
+
+/**
+ * i40iw_handle_fin_pkt - FIN packet received
+ * @cm_node: connection's node
+ */
+static void i40iw_handle_fin_pkt(struct i40iw_cm_node *cm_node)
+{
+	u32 ret;
+
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_RCVD:
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_MPAREJ_RCVD:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_LAST_ACK;
+		i40iw_send_fin(cm_node);
+		break;
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+		cm_node->tcp_cntxt.rcv_nxt++;
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_FIN_WAIT1:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSING;
+		i40iw_send_ack(cm_node);
+		/*
+		 * Wait for ACK as this is simultaneous close.
+		 * After we receive ACK, do not send anything.
+		 * Just rm the node.
+		 */
+		break;
+	case I40IW_CM_STATE_FIN_WAIT2:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_TIME_WAIT;
+		i40iw_send_ack(cm_node);
+		ret =
+		    i40iw_schedule_cm_timer(cm_node, NULL, I40IW_TIMER_TYPE_CLOSE, 1, 0);
+		if (ret)
+			i40iw_pr_err("node %p state = %d\n", cm_node, cm_node->state);
+		break;
+	case I40IW_CM_STATE_TIME_WAIT:
+		cm_node->tcp_cntxt.rcv_nxt++;
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	case I40IW_CM_STATE_OFFLOADED:
+	default:
+		i40iw_pr_err("bad state node %p state = %d\n", cm_node, cm_node->state);
+		break;
+	}
+}
+
+/**
+ * i40iw_handle_rst_pkt - process received RST packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node,
+				 struct i40iw_puda_buf *rbuf)
+{
+	i40iw_cleanup_retrans_entry(cm_node);
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		switch (cm_node->mpa_frame_rev) {
+		case IETF_MPA_V2:
+			cm_node->mpa_frame_rev = IETF_MPA_V1;
+			/* send a syn and goto syn sent state */
+			cm_node->state = I40IW_CM_STATE_SYN_SENT;
+			if (i40iw_send_syn(cm_node, 0))
+				i40iw_active_open_err(cm_node, false);
+			break;
+		case IETF_MPA_V1:
+		default:
+			i40iw_active_open_err(cm_node, false);
+			break;
+		}
+		break;
+	case I40IW_CM_STATE_MPAREQ_RCVD:
+		atomic_add_return(1, &cm_node->passive_state);
+		break;
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_SYN_RCVD:
+	case I40IW_CM_STATE_LISTENING:
+		i40iw_pr_err("Bad state state = %d\n", cm_node->state);
+		i40iw_passive_open_err(cm_node, false);
+		break;
+	case I40IW_CM_STATE_OFFLOADED:
+		i40iw_active_open_err(cm_node, false);
+		break;
+	case I40IW_CM_STATE_CLOSED:
+		break;
+	case I40IW_CM_STATE_FIN_WAIT2:
+	case I40IW_CM_STATE_FIN_WAIT1:
+	case I40IW_CM_STATE_LAST_ACK:
+		cm_node->cm_id->rem_ref(cm_node->cm_id);
+	case I40IW_CM_STATE_TIME_WAIT:
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * i40iw_handle_rcv_mpa - Process a recv'd mpa buffer
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_rcv_mpa(struct i40iw_cm_node *cm_node,
+				 struct i40iw_puda_buf *rbuf)
+{
+	int ret;
+	int datasize = rbuf->datalen;
+	u8 *dataloc = rbuf->data;
+
+	enum i40iw_cm_event_type type = I40IW_CM_EVENT_UNKNOWN;
+	u32 res_type;
+
+	ret = i40iw_parse_mpa(cm_node, dataloc, &res_type, datasize);
+	if (ret) {
+		if (cm_node->state == I40IW_CM_STATE_MPAREQ_SENT)
+			i40iw_active_open_err(cm_node, true);
+		else
+			i40iw_passive_open_err(cm_node, true);
+		return;
+	}
+
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_ESTABLISHED:
+		if (res_type == I40IW_MPA_REQUEST_REJECT)
+			i40iw_pr_err("state for reject\n");
+		cm_node->state = I40IW_CM_STATE_MPAREQ_RCVD;
+		type = I40IW_CM_EVENT_MPA_REQ;
+		i40iw_send_ack(cm_node);	/* ACK received MPA request */
+		atomic_set(&cm_node->passive_state,
+			   I40IW_PASSIVE_STATE_INDICATED);
+		break;
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		i40iw_cleanup_retrans_entry(cm_node);
+		if (res_type == I40IW_MPA_REQUEST_REJECT) {
+			type = I40IW_CM_EVENT_MPA_REJECT;
+			cm_node->state = I40IW_CM_STATE_MPAREJ_RCVD;
+		} else {
+			type = I40IW_CM_EVENT_CONNECTED;
+			cm_node->state = I40IW_CM_STATE_OFFLOADED;
+			i40iw_send_ack(cm_node);
+		}
+		break;
+	default:
+		pr_err("%s wrong cm_node state =%d\n", __func__, cm_node->state);
+		break;
+	}
+	i40iw_create_event(cm_node, type);
+}
+
+/**
+ * i40iw_indicate_pkt_err - Send up err event to cm
+ * @cm_node: connection's node
+ */
+static void i40iw_indicate_pkt_err(struct i40iw_cm_node *cm_node)
+{
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		i40iw_active_open_err(cm_node, true);
+		break;
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_SYN_RCVD:
+		i40iw_passive_open_err(cm_node, true);
+		break;
+	case I40IW_CM_STATE_OFFLOADED:
+	default:
+		break;
+	}
+}
+
+/**
+ * i40iw_check_syn - Check for error on received syn ack
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int i40iw_check_syn(struct i40iw_cm_node *cm_node, struct tcphdr *tcph)
+{
+	int err = 0;
+
+	if (ntohl(tcph->ack_seq) != cm_node->tcp_cntxt.loc_seq_num) {
+		err = 1;
+		i40iw_active_open_err(cm_node, true);
+	}
+	return err;
+}
+
+/**
+ * i40iw_check_seq - check seq numbers if OK
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int i40iw_check_seq(struct i40iw_cm_node *cm_node, struct tcphdr *tcph)
+{
+	int err = 0;
+	u32 seq;
+	u32 ack_seq;
+	u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
+	u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+	u32 rcv_wnd;
+
+	seq = ntohl(tcph->seq);
+	ack_seq = ntohl(tcph->ack_seq);
+	rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
+	if (ack_seq != loc_seq_num)
+		err = -1;
+	else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
+		err = -1;
+	if (err) {
+		i40iw_pr_err("seq number\n");
+		i40iw_indicate_pkt_err(cm_node);
+	}
+	return err;
+}
+
+/**
+ * i40iw_handle_syn_pkt - is for Passive node
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_syn_pkt(struct i40iw_cm_node *cm_node,
+				 struct i40iw_puda_buf *rbuf)
+{
+	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+	int ret;
+	u32 inc_sequence;
+	int optionsize;
+	struct i40iw_cm_info nfo;
+
+	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+	inc_sequence = ntohl(tcph->seq);
+
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		/* Rcvd syn on active open connection */
+		i40iw_active_open_err(cm_node, 1);
+		break;
+	case I40IW_CM_STATE_LISTENING:
+		/* Passive OPEN */
+		if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
+		    cm_node->listener->backlog) {
+			cm_node->cm_core->stats_backlog_drops++;
+			i40iw_passive_open_err(cm_node, false);
+			break;
+		}
+		ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 1);
+		if (ret) {
+			i40iw_passive_open_err(cm_node, false);
+			/* drop pkt */
+			break;
+		}
+		cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+		cm_node->accept_pend = 1;
+		atomic_inc(&cm_node->listener->pend_accepts_cnt);
+
+		cm_node->state = I40IW_CM_STATE_SYN_RCVD;
+		i40iw_get_addr_info(cm_node, &nfo);
+		ret = i40iw_manage_qhash(cm_node->iwdev,
+					 &nfo,
+					 I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+					 I40IW_QHASH_MANAGE_TYPE_ADD,
+					 (void *)cm_node,
+					 false);
+		cm_node->qhash_set = true;
+		break;
+	case I40IW_CM_STATE_CLOSED:
+		i40iw_cleanup_retrans_entry(cm_node);
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_OFFLOADED:
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_FIN_WAIT1:
+	case I40IW_CM_STATE_FIN_WAIT2:
+	case I40IW_CM_STATE_MPAREQ_RCVD:
+	case I40IW_CM_STATE_LAST_ACK:
+	case I40IW_CM_STATE_CLOSING:
+	case I40IW_CM_STATE_UNKNOWN:
+	default:
+		break;
+	}
+}
+
+/**
+ * i40iw_handle_synack_pkt - Process SYN+ACK packet (active side)
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_handle_synack_pkt(struct i40iw_cm_node *cm_node,
+				    struct i40iw_puda_buf *rbuf)
+{
+	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+	int ret;
+	u32 inc_sequence;
+	int optionsize;
+
+	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+	inc_sequence = ntohl(tcph->seq);
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_SENT:
+		i40iw_cleanup_retrans_entry(cm_node);
+		/* active open */
+		if (i40iw_check_syn(cm_node, tcph)) {
+			i40iw_pr_err("check syn fail\n");
+			return;
+		}
+		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+		/* setup options */
+		ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 0);
+		if (ret) {
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "cm_node=%p tcp_options failed\n",
+				    cm_node);
+			break;
+		}
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+		i40iw_send_ack(cm_node);	/* ACK  for the syn_ack */
+		ret = i40iw_send_mpa_request(cm_node);
+		if (ret) {
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "cm_node=%p i40iw_send_mpa_request failed\n",
+				    cm_node);
+			break;
+		}
+		cm_node->state = I40IW_CM_STATE_MPAREQ_SENT;
+		break;
+	case I40IW_CM_STATE_MPAREQ_RCVD:
+		i40iw_passive_open_err(cm_node, true);
+		break;
+	case I40IW_CM_STATE_LISTENING:
+		cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_CLOSED:
+		cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+		i40iw_cleanup_retrans_entry(cm_node);
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_FIN_WAIT1:
+	case I40IW_CM_STATE_FIN_WAIT2:
+	case I40IW_CM_STATE_LAST_ACK:
+	case I40IW_CM_STATE_OFFLOADED:
+	case I40IW_CM_STATE_CLOSING:
+	case I40IW_CM_STATE_UNKNOWN:
+	case I40IW_CM_STATE_MPAREQ_SENT:
+	default:
+		break;
+	}
+}
+
+/**
+ * i40iw_handle_ack_pkt - process packet with ACK
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static int i40iw_handle_ack_pkt(struct i40iw_cm_node *cm_node,
+				struct i40iw_puda_buf *rbuf)
+{
+	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+	u32 inc_sequence;
+	int ret = 0;
+	int optionsize;
+	u32 datasize = rbuf->datalen;
+
+	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+
+	if (i40iw_check_seq(cm_node, tcph))
+		return -EINVAL;
+
+	inc_sequence = ntohl(tcph->seq);
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_RCVD:
+		i40iw_cleanup_retrans_entry(cm_node);
+		ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 1);
+		if (ret)
+			break;
+		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+		cm_node->state = I40IW_CM_STATE_ESTABLISHED;
+		if (datasize) {
+			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+			i40iw_handle_rcv_mpa(cm_node, rbuf);
+		}
+		break;
+	case I40IW_CM_STATE_ESTABLISHED:
+		i40iw_cleanup_retrans_entry(cm_node);
+		if (datasize) {
+			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+			i40iw_handle_rcv_mpa(cm_node, rbuf);
+		}
+		break;
+	case I40IW_CM_STATE_MPAREQ_SENT:
+		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+		if (datasize) {
+			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+			i40iw_handle_rcv_mpa(cm_node, rbuf);
+		}
+		break;
+	case I40IW_CM_STATE_LISTENING:
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_CLOSED:
+		i40iw_cleanup_retrans_entry(cm_node);
+		atomic_inc(&cm_node->ref_count);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_LAST_ACK:
+	case I40IW_CM_STATE_CLOSING:
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_CLOSED;
+		if (!cm_node->accept_pend)
+			cm_node->cm_id->rem_ref(cm_node->cm_id);
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	case I40IW_CM_STATE_FIN_WAIT1:
+		i40iw_cleanup_retrans_entry(cm_node);
+		cm_node->state = I40IW_CM_STATE_FIN_WAIT2;
+		break;
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_FIN_WAIT2:
+	case I40IW_CM_STATE_OFFLOADED:
+	case I40IW_CM_STATE_MPAREQ_RCVD:
+	case I40IW_CM_STATE_UNKNOWN:
+	default:
+		i40iw_cleanup_retrans_entry(cm_node);
+		break;
+	}
+	return ret;
+}
+
+/**
+ * i40iw_process_packet - process cm packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void i40iw_process_packet(struct i40iw_cm_node *cm_node,
+				 struct i40iw_puda_buf *rbuf)
+{
+	enum i40iw_tcpip_pkt_type pkt_type = I40IW_PKT_TYPE_UNKNOWN;
+	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+	u32 fin_set = 0;
+	int ret;
+
+	if (tcph->rst) {
+		pkt_type = I40IW_PKT_TYPE_RST;
+	} else if (tcph->syn) {
+		pkt_type = I40IW_PKT_TYPE_SYN;
+		if (tcph->ack)
+			pkt_type = I40IW_PKT_TYPE_SYNACK;
+	} else if (tcph->ack) {
+		pkt_type = I40IW_PKT_TYPE_ACK;
+	}
+	if (tcph->fin)
+		fin_set = 1;
+
+	switch (pkt_type) {
+	case I40IW_PKT_TYPE_SYN:
+		i40iw_handle_syn_pkt(cm_node, rbuf);
+		break;
+	case I40IW_PKT_TYPE_SYNACK:
+		i40iw_handle_synack_pkt(cm_node, rbuf);
+		break;
+	case I40IW_PKT_TYPE_ACK:
+		ret = i40iw_handle_ack_pkt(cm_node, rbuf);
+		if (fin_set && !ret)
+			i40iw_handle_fin_pkt(cm_node);
+		break;
+	case I40IW_PKT_TYPE_RST:
+		i40iw_handle_rst_pkt(cm_node, rbuf);
+		break;
+	default:
+		if (fin_set &&
+		    (!i40iw_check_seq(cm_node, (struct tcphdr *)rbuf->tcph)))
+			i40iw_handle_fin_pkt(cm_node);
+		break;
+	}
+}
+
+/**
+ * i40iw_make_listen_node - create a listen node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ */
+static struct i40iw_cm_listener *i40iw_make_listen_node(
+					struct i40iw_cm_core *cm_core,
+					struct i40iw_device *iwdev,
+					struct i40iw_cm_info *cm_info)
+{
+	struct i40iw_cm_listener *listener;
+	unsigned long flags;
+
+	/* cannot have multiple matching listeners */
+	listener = i40iw_find_listener(cm_core, cm_info->loc_addr,
+				       cm_info->loc_port,
+				       cm_info->vlan_id,
+				       I40IW_CM_LISTENER_EITHER_STATE);
+	if (listener &&
+	    (listener->listener_state == I40IW_CM_LISTENER_ACTIVE_STATE)) {
+		atomic_dec(&listener->ref_count);
+		i40iw_debug(cm_core->dev,
+			    I40IW_DEBUG_CM,
+			    "Not creating listener since it already exists\n");
+		return NULL;
+	}
+
+	if (!listener) {
+		/* create a CM listen node (1/2 node to compare incoming traffic to) */
+		listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
+		if (!listener)
+			return NULL;
+		cm_core->stats_listen_nodes_created++;
+		memcpy(listener->loc_addr, cm_info->loc_addr, sizeof(listener->loc_addr));
+		listener->loc_port = cm_info->loc_port;
+
+		INIT_LIST_HEAD(&listener->child_listen_list);
+
+		atomic_set(&listener->ref_count, 1);
+	} else {
+		listener->reused_node = 1;
+	}
+
+	listener->cm_id = cm_info->cm_id;
+	listener->ipv4 = cm_info->ipv4;
+	listener->vlan_id = cm_info->vlan_id;
+	atomic_set(&listener->pend_accepts_cnt, 0);
+	listener->cm_core = cm_core;
+	listener->iwdev = iwdev;
+
+	listener->backlog = cm_info->backlog;
+	listener->listener_state = I40IW_CM_LISTENER_ACTIVE_STATE;
+
+	if (!listener->reused_node) {
+		spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+		list_add(&listener->list, &cm_core->listen_nodes);
+		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+	}
+
+	return listener;
+}
+
+/**
+ * i40iw_create_cm_node - make a connection node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @private_data_len: len to provate data for mpa request
+ * @private_data: pointer to private data for connection
+ * @cm_info: quad info for connection
+ */
+static struct i40iw_cm_node *i40iw_create_cm_node(
+					struct i40iw_cm_core *cm_core,
+					struct i40iw_device *iwdev,
+					u16 private_data_len,
+					void *private_data,
+					struct i40iw_cm_info *cm_info)
+{
+	int ret;
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_cm_listener *loopback_remotelistener;
+	struct i40iw_cm_node *loopback_remotenode;
+	struct i40iw_cm_info loopback_cm_info;
+
+	/* create a CM connection node */
+	cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL);
+	if (!cm_node)
+		return NULL;
+	/* set our node side to client (active) side */
+	cm_node->tcp_cntxt.client = 1;
+	cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
+
+	if (!memcmp(cm_info->loc_addr, cm_info->rem_addr, sizeof(cm_info->loc_addr))) {
+		loopback_remotelistener = i40iw_find_listener(
+						cm_core,
+						cm_info->rem_addr,
+						cm_node->rem_port,
+						cm_node->vlan_id,
+						I40IW_CM_LISTENER_ACTIVE_STATE);
+		if (!loopback_remotelistener) {
+			i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+		} else {
+			loopback_cm_info = *cm_info;
+			loopback_cm_info.loc_port = cm_info->rem_port;
+			loopback_cm_info.rem_port = cm_info->loc_port;
+			loopback_cm_info.cm_id = loopback_remotelistener->cm_id;
+			loopback_cm_info.ipv4 = cm_info->ipv4;
+			loopback_remotenode = i40iw_make_cm_node(cm_core,
+								 iwdev,
+								 &loopback_cm_info,
+								 loopback_remotelistener);
+			if (!loopback_remotenode) {
+				i40iw_rem_ref_cm_node(cm_node);
+				return NULL;
+			}
+			cm_core->stats_loopbacks++;
+			loopback_remotenode->loopbackpartner = cm_node;
+			loopback_remotenode->tcp_cntxt.rcv_wscale =
+				I40IW_CM_DEFAULT_RCV_WND_SCALE;
+			cm_node->loopbackpartner = loopback_remotenode;
+			memcpy(loopback_remotenode->pdata_buf, private_data,
+			       private_data_len);
+			loopback_remotenode->pdata.size = private_data_len;
+
+			cm_node->state = I40IW_CM_STATE_OFFLOADED;
+			cm_node->tcp_cntxt.rcv_nxt =
+				loopback_remotenode->tcp_cntxt.loc_seq_num;
+			loopback_remotenode->tcp_cntxt.rcv_nxt =
+				cm_node->tcp_cntxt.loc_seq_num;
+			cm_node->tcp_cntxt.max_snd_wnd =
+				loopback_remotenode->tcp_cntxt.rcv_wnd;
+			loopback_remotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
+			cm_node->tcp_cntxt.snd_wnd = loopback_remotenode->tcp_cntxt.rcv_wnd;
+			loopback_remotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
+			cm_node->tcp_cntxt.snd_wscale = loopback_remotenode->tcp_cntxt.rcv_wscale;
+			loopback_remotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale;
+			loopback_remotenode->state = I40IW_CM_STATE_MPAREQ_RCVD;
+			i40iw_create_event(loopback_remotenode, I40IW_CM_EVENT_MPA_REQ);
+		}
+		return cm_node;
+	}
+
+	cm_node->pdata.size = private_data_len;
+	cm_node->pdata.addr = cm_node->pdata_buf;
+
+	memcpy(cm_node->pdata_buf, private_data, private_data_len);
+
+	cm_node->state = I40IW_CM_STATE_SYN_SENT;
+	ret = i40iw_send_syn(cm_node, 0);
+
+	if (ret) {
+		if (cm_node->ipv4)
+			i40iw_debug(cm_node->dev,
+				    I40IW_DEBUG_CM,
+				    "Api - connect() FAILED: dest addr=%pI4",
+				    cm_node->rem_addr);
+		else
+			i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
+				    "Api - connect() FAILED: dest addr=%pI6",
+				    cm_node->rem_addr);
+		i40iw_rem_ref_cm_node(cm_node);
+		cm_node = NULL;
+	}
+
+	if (cm_node)
+		i40iw_debug(cm_node->dev,
+			    I40IW_DEBUG_CM,
+			    "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
+			    cm_node->rem_port,
+			    cm_node,
+			    cm_node->cm_id);
+
+	return cm_node;
+}
+
+/**
+ * i40iw_cm_reject - reject and teardown a connection
+ * @cm_node: connection's node
+ * @pdate: ptr to private data for reject
+ * @plen: size of private data
+ */
+static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 plen)
+{
+	int ret = 0;
+	int err;
+	int passive_state;
+	struct iw_cm_id *cm_id = cm_node->cm_id;
+	struct i40iw_cm_node *loopback = cm_node->loopbackpartner;
+
+	if (cm_node->tcp_cntxt.client)
+		return ret;
+	i40iw_cleanup_retrans_entry(cm_node);
+
+	if (!loopback) {
+		passive_state = atomic_add_return(1, &cm_node->passive_state);
+		if (passive_state == I40IW_SEND_RESET_EVENT) {
+			cm_node->state = I40IW_CM_STATE_CLOSED;
+			i40iw_rem_ref_cm_node(cm_node);
+		} else {
+			if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
+				i40iw_rem_ref_cm_node(cm_node);
+			} else {
+				ret = i40iw_send_mpa_reject(cm_node, pdata, plen);
+				if (ret) {
+					cm_node->state = I40IW_CM_STATE_CLOSED;
+					err = i40iw_send_reset(cm_node);
+					if (err)
+						i40iw_pr_err("send reset failed\n");
+				} else {
+					cm_id->add_ref(cm_id);
+				}
+			}
+		}
+	} else {
+		cm_node->cm_id = NULL;
+		if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
+			i40iw_rem_ref_cm_node(cm_node);
+			i40iw_rem_ref_cm_node(loopback);
+		} else {
+			ret = i40iw_send_cm_event(loopback,
+						  loopback->cm_id,
+						  IW_CM_EVENT_CONNECT_REPLY,
+						  -ECONNREFUSED);
+			i40iw_rem_ref_cm_node(cm_node);
+			loopback->state = I40IW_CM_STATE_CLOSING;
+
+			cm_id = loopback->cm_id;
+			i40iw_rem_ref_cm_node(loopback);
+			cm_id->rem_ref(cm_id);
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * i40iw_cm_close - close of cm connection
+ * @cm_node: connection's node
+ */
+static int i40iw_cm_close(struct i40iw_cm_node *cm_node)
+{
+	int ret = 0;
+
+	if (!cm_node)
+		return -EINVAL;
+
+	switch (cm_node->state) {
+	case I40IW_CM_STATE_SYN_RCVD:
+	case I40IW_CM_STATE_SYN_SENT:
+	case I40IW_CM_STATE_ONE_SIDE_ESTABLISHED:
+	case I40IW_CM_STATE_ESTABLISHED:
+	case I40IW_CM_STATE_ACCEPTING:
+	case I40IW_CM_STATE_MPAREQ_SENT:
+	case I40IW_CM_STATE_MPAREQ_RCVD:
+		i40iw_cleanup_retrans_entry(cm_node);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_CLOSE_WAIT:
+		cm_node->state = I40IW_CM_STATE_LAST_ACK;
+		i40iw_send_fin(cm_node);
+		break;
+	case I40IW_CM_STATE_FIN_WAIT1:
+	case I40IW_CM_STATE_FIN_WAIT2:
+	case I40IW_CM_STATE_LAST_ACK:
+	case I40IW_CM_STATE_TIME_WAIT:
+	case I40IW_CM_STATE_CLOSING:
+		ret = -1;
+		break;
+	case I40IW_CM_STATE_LISTENING:
+		i40iw_cleanup_retrans_entry(cm_node);
+		i40iw_send_reset(cm_node);
+		break;
+	case I40IW_CM_STATE_MPAREJ_RCVD:
+	case I40IW_CM_STATE_UNKNOWN:
+	case I40IW_CM_STATE_INITED:
+	case I40IW_CM_STATE_CLOSED:
+	case I40IW_CM_STATE_LISTENER_DESTROYED:
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	case I40IW_CM_STATE_OFFLOADED:
+		if (cm_node->send_entry)
+			i40iw_pr_err("send_entry\n");
+		i40iw_rem_ref_cm_node(cm_node);
+		break;
+	}
+	return ret;
+}
+
+/**
+ * i40iw_receive_ilq - recv an ETHERNET packet, and process it
+ * through CM
+ * @dev: FPK dev struct
+ * @rbuf: receive buffer
+ */
+void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf)
+{
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_cm_listener *listener;
+	struct iphdr *iph;
+	struct ipv6hdr *ip6h;
+	struct tcphdr *tcph;
+	struct i40iw_cm_info cm_info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+	struct vlan_ethhdr *ethh;
+
+	/* if vlan, then maclen = 18 else 14 */
+	iph = (struct iphdr *)rbuf->iph;
+	memset(&cm_info, 0, sizeof(cm_info));
+
+	i40iw_debug_buf(dev,
+			I40IW_DEBUG_ILQ,
+			"RECEIVE ILQ BUFFER",
+			rbuf->mem.va,
+			rbuf->totallen);
+	ethh = (struct vlan_ethhdr *)rbuf->mem.va;
+
+	if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) {
+		cm_info.vlan_id = ntohs(ethh->h_vlan_TCI) & VLAN_VID_MASK;
+		i40iw_debug(cm_core->dev,
+			    I40IW_DEBUG_CM,
+			    "%s vlan_id=%d\n",
+			    __func__,
+			    cm_info.vlan_id);
+	} else {
+		cm_info.vlan_id = I40IW_NO_VLAN;
+	}
+	tcph = (struct tcphdr *)rbuf->tcph;
+
+	if (rbuf->ipv4) {
+		cm_info.loc_addr[0] = ntohl(iph->daddr);
+		cm_info.rem_addr[0] = ntohl(iph->saddr);
+		cm_info.ipv4 = true;
+	} else {
+		ip6h = (struct ipv6hdr *)rbuf->iph;
+		i40iw_copy_ip_ntohl(cm_info.loc_addr,
+				    ip6h->daddr.in6_u.u6_addr32);
+		i40iw_copy_ip_ntohl(cm_info.rem_addr,
+				    ip6h->saddr.in6_u.u6_addr32);
+		cm_info.ipv4 = false;
+	}
+	cm_info.loc_port = ntohs(tcph->dest);
+	cm_info.rem_port = ntohs(tcph->source);
+	cm_node = i40iw_find_node(cm_core,
+				  cm_info.rem_port,
+				  cm_info.rem_addr,
+				  cm_info.loc_port,
+				  cm_info.loc_addr,
+				  true);
+
+	if (!cm_node) {
+		/* Only type of packet accepted are for */
+		/* the PASSIVE open (syn only) */
+		if (!tcph->syn || tcph->ack)
+			return;
+		listener =
+		    i40iw_find_listener(cm_core,
+					cm_info.loc_addr,
+					cm_info.loc_port,
+					cm_info.vlan_id,
+					I40IW_CM_LISTENER_ACTIVE_STATE);
+		if (!listener) {
+			cm_info.cm_id = NULL;
+			i40iw_debug(cm_core->dev,
+				    I40IW_DEBUG_CM,
+				    "%s no listener found\n",
+				    __func__);
+			return;
+		}
+		cm_info.cm_id = listener->cm_id;
+		cm_node = i40iw_make_cm_node(cm_core, iwdev, &cm_info, listener);
+		if (!cm_node) {
+			i40iw_debug(cm_core->dev,
+				    I40IW_DEBUG_CM,
+				    "%s allocate node failed\n",
+				    __func__);
+			atomic_dec(&listener->ref_count);
+			return;
+		}
+		if (!tcph->rst && !tcph->fin) {
+			cm_node->state = I40IW_CM_STATE_LISTENING;
+		} else {
+			i40iw_rem_ref_cm_node(cm_node);
+			return;
+		}
+		atomic_inc(&cm_node->ref_count);
+	} else if (cm_node->state == I40IW_CM_STATE_OFFLOADED) {
+		i40iw_rem_ref_cm_node(cm_node);
+		return;
+	}
+	i40iw_process_packet(cm_node, rbuf);
+	i40iw_rem_ref_cm_node(cm_node);
+}
+
+/**
+ * i40iw_setup_cm_core - allocate a top level instance of a cm
+ * core
+ * @iwdev: iwarp device structure
+ */
+void i40iw_setup_cm_core(struct i40iw_device *iwdev)
+{
+	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+
+	cm_core->iwdev = iwdev;
+	cm_core->dev = &iwdev->sc_dev;
+
+	INIT_LIST_HEAD(&cm_core->connected_nodes);
+	INIT_LIST_HEAD(&cm_core->listen_nodes);
+
+	init_timer(&cm_core->tcp_timer);
+	cm_core->tcp_timer.function = i40iw_cm_timer_tick;
+	cm_core->tcp_timer.data = (unsigned long)cm_core;
+
+	spin_lock_init(&cm_core->ht_lock);
+	spin_lock_init(&cm_core->listen_list_lock);
+
+	cm_core->event_wq = create_singlethread_workqueue("iwewq");
+	cm_core->disconn_wq = create_singlethread_workqueue("iwdwq");
+}
+
+/**
+ * i40iw_cleanup_cm_core - deallocate a top level instance of a
+ * cm core
+ * @cm_core: cm's core
+ */
+void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core)
+{
+	unsigned long flags;
+
+	if (!cm_core)
+		return;
+
+	spin_lock_irqsave(&cm_core->ht_lock, flags);
+	if (timer_pending(&cm_core->tcp_timer))
+		del_timer_sync(&cm_core->tcp_timer);
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	destroy_workqueue(cm_core->event_wq);
+	destroy_workqueue(cm_core->disconn_wq);
+}
+
+/**
+ * i40iw_init_tcp_ctx - setup qp context
+ * @cm_node: connection's node
+ * @tcp_info: offload info for tcp
+ * @iwqp: associate qp for the connection
+ */
+static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
+			       struct i40iw_tcp_offload_info *tcp_info,
+			       struct i40iw_qp *iwqp)
+{
+	tcp_info->ipv4 = cm_node->ipv4;
+	tcp_info->drop_ooo_seg = true;
+	tcp_info->wscale = true;
+	tcp_info->ignore_tcp_opt = true;
+	tcp_info->ignore_tcp_uns_opt = true;
+	tcp_info->no_nagle = false;
+
+	tcp_info->ttl = I40IW_DEFAULT_TTL;
+	tcp_info->rtt_var = cpu_to_le32(I40IW_DEFAULT_RTT_VAR);
+	tcp_info->ss_thresh = cpu_to_le32(I40IW_DEFAULT_SS_THRESH);
+	tcp_info->rexmit_thresh = I40IW_DEFAULT_REXMIT_THRESH;
+
+	tcp_info->tcp_state = I40IW_TCP_STATE_ESTABLISHED;
+	tcp_info->snd_wscale = cm_node->tcp_cntxt.snd_wscale;
+	tcp_info->rcv_wscale = cm_node->tcp_cntxt.rcv_wscale;
+
+	tcp_info->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+	tcp_info->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
+	tcp_info->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
+	tcp_info->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+
+	tcp_info->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+	tcp_info->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss);
+	tcp_info->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
+	tcp_info->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
+	tcp_info->max_snd_window = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
+	tcp_info->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
+					cm_node->tcp_cntxt.rcv_wscale);
+
+	tcp_info->flow_label = 0;
+	tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss));
+	if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+		tcp_info->insert_vlan_tag = true;
+		tcp_info->vlan_tag = cpu_to_le16(cm_node->vlan_id);
+	}
+	if (cm_node->ipv4) {
+		tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
+		tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
+
+		tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[0]);
+		tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[0]);
+		tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(iwqp->iwdev,
+								&tcp_info->dest_ip_addr3,
+								true,
+								NULL,
+								I40IW_ARP_RESOLVE));
+	} else {
+		tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
+		tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
+		tcp_info->dest_ip_addr0 = cpu_to_le32(cm_node->rem_addr[0]);
+		tcp_info->dest_ip_addr1 = cpu_to_le32(cm_node->rem_addr[1]);
+		tcp_info->dest_ip_addr2 = cpu_to_le32(cm_node->rem_addr[2]);
+		tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[3]);
+		tcp_info->local_ipaddr0 = cpu_to_le32(cm_node->loc_addr[0]);
+		tcp_info->local_ipaddr1 = cpu_to_le32(cm_node->loc_addr[1]);
+		tcp_info->local_ipaddr2 = cpu_to_le32(cm_node->loc_addr[2]);
+		tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[3]);
+		tcp_info->arp_idx = cpu_to_le32(i40iw_arp_table(
+							iwqp->iwdev,
+							&tcp_info->dest_ip_addr0,
+							false,
+							NULL,
+							I40IW_ARP_RESOLVE));
+	}
+}
+
+/**
+ * i40iw_cm_init_tsa_conn - setup qp for RTS
+ * @iwqp: associate qp for the connection
+ * @cm_node: connection's node
+ */
+static void i40iw_cm_init_tsa_conn(struct i40iw_qp *iwqp,
+				   struct i40iw_cm_node *cm_node)
+{
+	struct i40iw_tcp_offload_info tcp_info;
+	struct i40iwarp_offload_info *iwarp_info;
+	struct i40iw_qp_host_ctx_info *ctx_info;
+	struct i40iw_device *iwdev = iwqp->iwdev;
+	struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
+
+	memset(&tcp_info, 0x00, sizeof(struct i40iw_tcp_offload_info));
+	iwarp_info = &iwqp->iwarp_info;
+	ctx_info = &iwqp->ctx_info;
+
+	ctx_info->tcp_info = &tcp_info;
+	ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+	ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+
+	iwarp_info->ord_size = cm_node->ord_size;
+	iwarp_info->ird_size = i40iw_derive_hw_ird_setting(cm_node->ird_size);
+
+	if (iwarp_info->ord_size == 1)
+		iwarp_info->ord_size = 2;
+
+	iwarp_info->rd_enable = true;
+	iwarp_info->rdmap_ver = 1;
+	iwarp_info->ddp_ver = 1;
+
+	iwarp_info->pd_id = iwqp->iwpd->sc_pd.pd_id;
+
+	ctx_info->tcp_info_valid = true;
+	ctx_info->iwarp_info_valid = true;
+
+	i40iw_init_tcp_ctx(cm_node, &tcp_info, iwqp);
+	if (cm_node->snd_mark_en) {
+		iwarp_info->snd_mark_en = true;
+		iwarp_info->snd_mark_offset = (tcp_info.snd_nxt &
+				SNDMARKER_SEQNMASK) + cm_node->lsmm_size;
+	}
+
+	cm_node->state = I40IW_CM_STATE_OFFLOADED;
+	tcp_info.tcp_state = I40IW_TCP_STATE_ESTABLISHED;
+	tcp_info.src_mac_addr_idx = iwdev->mac_ip_table_idx;
+
+	dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)(iwqp->host_ctx.va), ctx_info);
+
+	/* once tcp_info is set, no need to do it again */
+	ctx_info->tcp_info_valid = false;
+	ctx_info->iwarp_info_valid = false;
+}
+
+/**
+ * i40iw_cm_disconn - when a connection is being closed
+ * @iwqp: associate qp for the connection
+ */
+int i40iw_cm_disconn(struct i40iw_qp *iwqp)
+{
+	struct disconn_work *work;
+	struct i40iw_device *iwdev = iwqp->iwdev;
+	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return -ENOMEM;	/* Timer will clean up */
+
+	i40iw_add_ref(&iwqp->ibqp);
+	work->iwqp = iwqp;
+	INIT_WORK(&work->work, i40iw_disconnect_worker);
+	queue_work(cm_core->disconn_wq, &work->work);
+	return 0;
+}
+
+/**
+ * i40iw_loopback_nop - Send a nop
+ * @qp: associated hw qp
+ */
+static void i40iw_loopback_nop(struct i40iw_sc_qp *qp)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = qp->qp_uk.sq_base->elem;
+	set_64bit_val(wqe, 0, 0);
+	set_64bit_val(wqe, 8, 0);
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+	    LS_64(0, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+	set_64bit_val(wqe, 24, header);
+}
+
+/**
+ * i40iw_qp_disconnect - free qp and close cm
+ * @iwqp: associate qp for the connection
+ */
+static void i40iw_qp_disconnect(struct i40iw_qp *iwqp)
+{
+	struct i40iw_device *iwdev;
+	struct i40iw_ib_device *iwibdev;
+
+	iwdev = to_iwdev(iwqp->ibqp.device);
+	if (!iwdev) {
+		i40iw_pr_err("iwdev == NULL\n");
+		return;
+	}
+
+	iwibdev = iwdev->iwibdev;
+
+	if (iwqp->active_conn) {
+		/* indicate this connection is NOT active */
+		iwqp->active_conn = 0;
+	} else {
+		/* Need to free the Last Streaming Mode Message */
+		if (iwqp->ietf_mem.va) {
+			if (iwqp->lsmm_mr)
+				iwibdev->ibdev.dereg_mr(iwqp->lsmm_mr);
+			i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem);
+		}
+	}
+
+	/* close the CM node down if it is still active */
+	if (iwqp->cm_node) {
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "%s Call close API\n", __func__);
+		i40iw_cm_close(iwqp->cm_node);
+	}
+}
+
+/**
+ * i40iw_cm_disconn_true - called by worker thread to disconnect qp
+ * @iwqp: associate qp for the connection
+ */
+static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp)
+{
+	struct iw_cm_id *cm_id;
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+	u16 last_ae;
+	u8 original_hw_tcp_state;
+	u8 original_ibqp_state;
+	int disconn_status = 0;
+	int issue_disconn = 0;
+	int issue_close = 0;
+	int issue_flush = 0;
+	struct ib_event ibevent;
+	unsigned long flags;
+	int ret;
+
+	if (!iwqp) {
+		i40iw_pr_err("iwqp == NULL\n");
+		return;
+	}
+
+	spin_lock_irqsave(&iwqp->lock, flags);
+	cm_id = iwqp->cm_id;
+	/* make sure we havent already closed this connection */
+	if (!cm_id) {
+		spin_unlock_irqrestore(&iwqp->lock, flags);
+		return;
+	}
+
+	iwdev = to_iwdev(iwqp->ibqp.device);
+
+	original_hw_tcp_state = iwqp->hw_tcp_state;
+	original_ibqp_state = iwqp->ibqp_state;
+	last_ae = iwqp->last_aeq;
+
+	if (qp->term_flags) {
+		issue_disconn = 1;
+		issue_close = 1;
+		iwqp->cm_id = NULL;
+		/*When term timer expires after cm_timer, don't want
+		 *terminate-handler to issue cm_disconn which can re-free
+		 *a QP even after its refcnt=0.
+		 */
+		del_timer(&iwqp->terminate_timer);
+		if (!iwqp->flush_issued) {
+			iwqp->flush_issued = 1;
+			issue_flush = 1;
+		}
+	} else if ((original_hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) ||
+		   ((original_ibqp_state == IB_QPS_RTS) &&
+		    (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) {
+		issue_disconn = 1;
+		if (last_ae == I40IW_AE_LLP_CONNECTION_RESET)
+			disconn_status = -ECONNRESET;
+	}
+
+	if (((original_hw_tcp_state == I40IW_TCP_STATE_CLOSED) ||
+	     (original_hw_tcp_state == I40IW_TCP_STATE_TIME_WAIT) ||
+	     (last_ae == I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE) ||
+	     (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) {
+		issue_close = 1;
+		iwqp->cm_id = NULL;
+		if (!iwqp->flush_issued) {
+			iwqp->flush_issued = 1;
+			issue_flush = 1;
+		}
+	}
+
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+	if (issue_flush && !iwqp->destroyed) {
+		/* Flush the queues */
+		i40iw_flush_wqes(iwdev, iwqp);
+
+		if (qp->term_flags) {
+			ibevent.device = iwqp->ibqp.device;
+			ibevent.event = (qp->eventtype == TERM_EVENT_QP_FATAL) ?
+					IB_EVENT_QP_FATAL : IB_EVENT_QP_ACCESS_ERR;
+			ibevent.element.qp = &iwqp->ibqp;
+			iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context);
+		}
+	}
+
+	if (cm_id && cm_id->event_handler) {
+		if (issue_disconn) {
+			ret = i40iw_send_cm_event(NULL,
+						  cm_id,
+						  IW_CM_EVENT_DISCONNECT,
+						  disconn_status);
+
+			if (ret)
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "disconnect event failed %s: - cm_id = %p\n",
+					    __func__, cm_id);
+		}
+		if (issue_close) {
+			i40iw_qp_disconnect(iwqp);
+			cm_id->provider_data = iwqp;
+			ret = i40iw_send_cm_event(NULL, cm_id, IW_CM_EVENT_CLOSE, 0);
+			if (ret)
+				i40iw_debug(&iwdev->sc_dev,
+					    I40IW_DEBUG_CM,
+					    "close event failed %s: - cm_id = %p\n",
+					    __func__, cm_id);
+			cm_id->rem_ref(cm_id);
+		}
+	}
+}
+
+/**
+ * i40iw_disconnect_worker - worker for connection close
+ * @work: points or disconn structure
+ */
+static void i40iw_disconnect_worker(struct work_struct *work)
+{
+	struct disconn_work *dwork = container_of(work, struct disconn_work, work);
+	struct i40iw_qp *iwqp = dwork->iwqp;
+
+	kfree(dwork);
+	i40iw_cm_disconn_true(iwqp);
+	i40iw_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * i40iw_accept - registered call for connection to be accepted
+ * @cm_id: cm information for passive connection
+ * @conn_param: accpet parameters
+ */
+int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+	struct ib_qp *ibqp;
+	struct i40iw_qp *iwqp;
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_cm_node *cm_node;
+	struct ib_qp_attr attr;
+	int passive_state;
+	struct i40iw_ib_device *iwibdev;
+	struct ib_mr *ibmr;
+	struct i40iw_pd *iwpd;
+	u16 buf_len = 0;
+	struct i40iw_kmem_info accept;
+	enum i40iw_status_code status;
+	u64 tagged_offset;
+
+	memset(&attr, 0, sizeof(attr));
+	ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+
+	iwqp = to_iwqp(ibqp);
+	iwdev = iwqp->iwdev;
+	dev = &iwdev->sc_dev;
+	cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
+
+	if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) {
+		cm_node->ipv4 = true;
+		cm_node->vlan_id = i40iw_get_vlan_ipv4(cm_node->loc_addr);
+	} else {
+		cm_node->ipv4 = false;
+		i40iw_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id, NULL);
+	}
+	i40iw_debug(cm_node->dev,
+		    I40IW_DEBUG_CM,
+		    "Accept vlan_id=%d\n",
+		    cm_node->vlan_id);
+	if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
+		if (cm_node->loopbackpartner)
+			i40iw_rem_ref_cm_node(cm_node->loopbackpartner);
+		i40iw_rem_ref_cm_node(cm_node);
+		return -EINVAL;
+	}
+
+	passive_state = atomic_add_return(1, &cm_node->passive_state);
+	if (passive_state == I40IW_SEND_RESET_EVENT) {
+		i40iw_rem_ref_cm_node(cm_node);
+		return -ECONNRESET;
+	}
+
+	cm_node->cm_core->stats_accepts++;
+	iwqp->cm_node = (void *)cm_node;
+	cm_node->iwqp = iwqp;
+
+	buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE + MPA_ZERO_PAD_LEN;
+
+	status = i40iw_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, buf_len, 1);
+
+	if (status)
+		return -ENOMEM;
+	cm_node->pdata.size = conn_param->private_data_len;
+	accept.addr = iwqp->ietf_mem.va;
+	accept.size = i40iw_cm_build_mpa_frame(cm_node, &accept, MPA_KEY_REPLY);
+	memcpy(accept.addr + accept.size, conn_param->private_data,
+	       conn_param->private_data_len);
+
+	/* setup our first outgoing iWarp send WQE (the IETF frame response) */
+	if ((cm_node->ipv4 &&
+	     !i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
+	    (!cm_node->ipv4 &&
+	     !i40iw_ipv6_is_loopback(cm_node->loc_addr, cm_node->rem_addr))) {
+		iwibdev = iwdev->iwibdev;
+		iwpd = iwqp->iwpd;
+		tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
+		ibmr = i40iw_reg_phys_mr(&iwpd->ibpd,
+					 iwqp->ietf_mem.pa,
+					 buf_len,
+					 IB_ACCESS_LOCAL_WRITE,
+					 &tagged_offset);
+		if (IS_ERR(ibmr)) {
+			i40iw_free_dma_mem(dev->hw, &iwqp->ietf_mem);
+			return -ENOMEM;
+		}
+
+		ibmr->pd = &iwpd->ibpd;
+		ibmr->device = iwpd->ibpd.device;
+		iwqp->lsmm_mr = ibmr;
+		if (iwqp->page)
+			iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+		if (is_remote_ne020_or_chelsio(cm_node))
+			dev->iw_priv_qp_ops->qp_send_lsmm(
+							&iwqp->sc_qp,
+							iwqp->ietf_mem.va,
+							(accept.size + conn_param->private_data_len),
+							ibmr->lkey);
+		else
+			dev->iw_priv_qp_ops->qp_send_lsmm(
+							&iwqp->sc_qp,
+							iwqp->ietf_mem.va,
+							(accept.size + conn_param->private_data_len + MPA_ZERO_PAD_LEN),
+							ibmr->lkey);
+
+	} else {
+		if (iwqp->page)
+			iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+		i40iw_loopback_nop(&iwqp->sc_qp);
+	}
+
+	if (iwqp->page)
+		kunmap(iwqp->page);
+
+	iwqp->cm_id = cm_id;
+	cm_node->cm_id = cm_id;
+
+	cm_id->provider_data = (void *)iwqp;
+	iwqp->active_conn = 0;
+
+	cm_node->lsmm_size = accept.size + conn_param->private_data_len;
+	i40iw_cm_init_tsa_conn(iwqp, cm_node);
+	cm_id->add_ref(cm_id);
+	i40iw_add_ref(&iwqp->ibqp);
+
+	i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
+
+	attr.qp_state = IB_QPS_RTS;
+	cm_node->qhash_set = false;
+	i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+	if (cm_node->loopbackpartner) {
+		cm_node->loopbackpartner->pdata.size = conn_param->private_data_len;
+
+		/* copy entire MPA frame to our cm_node's frame */
+		memcpy(cm_node->loopbackpartner->pdata_buf,
+		       conn_param->private_data,
+		       conn_param->private_data_len);
+		i40iw_create_event(cm_node->loopbackpartner, I40IW_CM_EVENT_CONNECTED);
+	}
+
+	cm_node->accelerated = 1;
+	if (cm_node->accept_pend) {
+		if (!cm_node->listener)
+			i40iw_pr_err("cm_node->listener NULL for passive node\n");
+		atomic_dec(&cm_node->listener->pend_accepts_cnt);
+		cm_node->accept_pend = 0;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_reject - registered call for connection to be rejected
+ * @cm_id: cm information for passive connection
+ * @pdata: private data to be sent
+ * @pdata_len: private data length
+ */
+int i40iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+	struct i40iw_device *iwdev;
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_cm_node *loopback;
+
+	cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
+	loopback = cm_node->loopbackpartner;
+	cm_node->cm_id = cm_id;
+	cm_node->pdata.size = pdata_len;
+
+	iwdev = to_iwdev(cm_id->device);
+	if (!iwdev)
+		return -EINVAL;
+	cm_node->cm_core->stats_rejects++;
+
+	if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER)
+		return -EINVAL;
+
+	if (loopback) {
+		memcpy(&loopback->pdata_buf, pdata, pdata_len);
+		loopback->pdata.size = pdata_len;
+	}
+
+	return i40iw_cm_reject(cm_node, pdata, pdata_len);
+}
+
+/**
+ * i40iw_connect - registered call for connection to be established
+ * @cm_id: cm information for passive connection
+ * @conn_param: Information about the connection
+ */
+int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+	struct ib_qp *ibqp;
+	struct i40iw_qp *iwqp;
+	struct i40iw_device *iwdev;
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_cm_info cm_info;
+	struct sockaddr_in *laddr;
+	struct sockaddr_in *raddr;
+	struct sockaddr_in6 *laddr6;
+	struct sockaddr_in6 *raddr6;
+	int apbvt_set = 0;
+	enum i40iw_status_code status;
+
+	ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
+	if (!ibqp)
+		return -EINVAL;
+	iwqp = to_iwqp(ibqp);
+	if (!iwqp)
+		return -EINVAL;
+	iwdev = to_iwdev(iwqp->ibqp.device);
+	if (!iwdev)
+		return -EINVAL;
+
+	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+	laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+	raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
+
+	if (!(laddr->sin_port) || !(raddr->sin_port))
+		return -EINVAL;
+
+	iwqp->active_conn = 1;
+	iwqp->cm_id = NULL;
+	cm_id->provider_data = iwqp;
+
+	/* set up the connection params for the node */
+	if (cm_id->remote_addr.ss_family == AF_INET) {
+		cm_info.ipv4 = true;
+		memset(cm_info.loc_addr, 0, sizeof(cm_info.loc_addr));
+		memset(cm_info.rem_addr, 0, sizeof(cm_info.rem_addr));
+		cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+		cm_info.rem_addr[0] = ntohl(raddr->sin_addr.s_addr);
+		cm_info.loc_port = ntohs(laddr->sin_port);
+		cm_info.rem_port = ntohs(raddr->sin_port);
+		cm_info.vlan_id = i40iw_get_vlan_ipv4(cm_info.loc_addr);
+	} else {
+		cm_info.ipv4 = false;
+		i40iw_copy_ip_ntohl(cm_info.loc_addr,
+				    laddr6->sin6_addr.in6_u.u6_addr32);
+		i40iw_copy_ip_ntohl(cm_info.rem_addr,
+				    raddr6->sin6_addr.in6_u.u6_addr32);
+		cm_info.loc_port = ntohs(laddr6->sin6_port);
+		cm_info.rem_port = ntohs(raddr6->sin6_port);
+		i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL);
+	}
+	cm_info.cm_id = cm_id;
+	if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
+	    (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
+				     raddr6->sin6_addr.in6_u.u6_addr32,
+				     sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
+		status = i40iw_manage_qhash(iwdev,
+					    &cm_info,
+					    I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+					    I40IW_QHASH_MANAGE_TYPE_ADD,
+					    NULL,
+					    true);
+		if (status)
+			return -EINVAL;
+	}
+	status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
+	if (status) {
+		i40iw_manage_qhash(iwdev,
+				   &cm_info,
+				   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+				   I40IW_QHASH_MANAGE_TYPE_DELETE,
+				   NULL,
+				   false);
+		return -EINVAL;
+	}
+
+	apbvt_set = 1;
+	cm_id->add_ref(cm_id);
+	cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
+				       conn_param->private_data_len,
+				       (void *)conn_param->private_data,
+				       &cm_info);
+	if (!cm_node) {
+		i40iw_manage_qhash(iwdev,
+				   &cm_info,
+				   I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+				   I40IW_QHASH_MANAGE_TYPE_DELETE,
+				   NULL,
+				   false);
+
+		if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
+							   cm_info.loc_port))
+			i40iw_manage_apbvt(iwdev,
+					   cm_info.loc_port,
+					   I40IW_MANAGE_APBVT_DEL);
+		cm_id->rem_ref(cm_id);
+		iwdev->cm_core.stats_connect_errs++;
+		return -ENOMEM;
+	}
+
+	i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+	if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
+	    !cm_node->ord_size)
+		cm_node->ord_size = 1;
+
+	cm_node->apbvt_set = apbvt_set;
+	cm_node->qhash_set = true;
+	iwqp->cm_node = cm_node;
+	cm_node->iwqp = iwqp;
+	iwqp->cm_id = cm_id;
+	i40iw_add_ref(&iwqp->ibqp);
+	return 0;
+}
+
+/**
+ * i40iw_create_listen - registered call creating listener
+ * @cm_id: cm information for passive connection
+ * @backlog: to max accept pending count
+ */
+int i40iw_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+	struct i40iw_device *iwdev;
+	struct i40iw_cm_listener *cm_listen_node;
+	struct i40iw_cm_info cm_info;
+	enum i40iw_status_code ret;
+	struct sockaddr_in *laddr;
+	struct sockaddr_in6 *laddr6;
+	bool wildcard = false;
+
+	iwdev = to_iwdev(cm_id->device);
+	if (!iwdev)
+		return -EINVAL;
+
+	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+	memset(&cm_info, 0, sizeof(cm_info));
+	if (laddr->sin_family == AF_INET) {
+		cm_info.ipv4 = true;
+		cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+		cm_info.loc_port = ntohs(laddr->sin_port);
+
+		if (laddr->sin_addr.s_addr != INADDR_ANY)
+			cm_info.vlan_id = i40iw_get_vlan_ipv4(cm_info.loc_addr);
+		else
+			wildcard = true;
+
+	} else {
+		cm_info.ipv4 = false;
+		i40iw_copy_ip_ntohl(cm_info.loc_addr,
+				    laddr6->sin6_addr.in6_u.u6_addr32);
+		cm_info.loc_port = ntohs(laddr6->sin6_port);
+		if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY)
+			i40iw_netdev_vlan_ipv6(cm_info.loc_addr,
+					       &cm_info.vlan_id,
+					       NULL);
+		else
+			wildcard = true;
+	}
+	cm_info.backlog = backlog;
+	cm_info.cm_id = cm_id;
+
+	cm_listen_node = i40iw_make_listen_node(&iwdev->cm_core, iwdev, &cm_info);
+	if (!cm_listen_node) {
+		i40iw_pr_err("cm_listen_node == NULL\n");
+		return -ENOMEM;
+	}
+
+	cm_id->provider_data = cm_listen_node;
+
+	if (!cm_listen_node->reused_node) {
+		if (wildcard) {
+			if (cm_info.ipv4)
+				ret = i40iw_add_mqh_4(iwdev,
+						      &cm_info,
+						      cm_listen_node);
+			else
+				ret = i40iw_add_mqh_6(iwdev,
+						      &cm_info,
+						      cm_listen_node);
+			if (ret)
+				goto error;
+
+			ret = i40iw_manage_apbvt(iwdev,
+						 cm_info.loc_port,
+						 I40IW_MANAGE_APBVT_ADD);
+
+			if (ret)
+				goto error;
+		} else {
+			ret = i40iw_manage_qhash(iwdev,
+						 &cm_info,
+						 I40IW_QHASH_TYPE_TCP_SYN,
+						 I40IW_QHASH_MANAGE_TYPE_ADD,
+						 NULL,
+						 true);
+			if (ret)
+				goto error;
+			cm_listen_node->qhash_set = true;
+			ret = i40iw_manage_apbvt(iwdev,
+						 cm_info.loc_port,
+						 I40IW_MANAGE_APBVT_ADD);
+			if (ret)
+				goto error;
+		}
+	}
+	cm_id->add_ref(cm_id);
+	cm_listen_node->cm_core->stats_listen_created++;
+	return 0;
+ error:
+	i40iw_cm_del_listen(&iwdev->cm_core, (void *)cm_listen_node, false);
+	return -EINVAL;
+}
+
+/**
+ * i40iw_destroy_listen - registered call to destroy listener
+ * @cm_id: cm information for passive connection
+ */
+int i40iw_destroy_listen(struct iw_cm_id *cm_id)
+{
+	struct i40iw_device *iwdev;
+
+	iwdev = to_iwdev(cm_id->device);
+	if (cm_id->provider_data)
+		i40iw_cm_del_listen(&iwdev->cm_core, cm_id->provider_data, true);
+	else
+		i40iw_pr_err("cm_id->provider_data was NULL\n");
+
+	cm_id->rem_ref(cm_id);
+
+	return 0;
+}
+
+/**
+ * i40iw_cm_event_connected - handle connected active node
+ * @event: the info for cm_node of connection
+ */
+static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
+{
+	struct i40iw_qp *iwqp;
+	struct i40iw_device *iwdev;
+	struct i40iw_cm_node *cm_node;
+	struct i40iw_sc_dev *dev;
+	struct ib_qp_attr attr;
+	struct iw_cm_id *cm_id;
+	int status;
+	bool read0;
+
+	cm_node = event->cm_node;
+	cm_id = cm_node->cm_id;
+	iwqp = (struct i40iw_qp *)cm_id->provider_data;
+	iwdev = to_iwdev(iwqp->ibqp.device);
+	dev = &iwdev->sc_dev;
+
+	if (iwqp->destroyed) {
+		status = -ETIMEDOUT;
+		goto error;
+	}
+	i40iw_cm_init_tsa_conn(iwqp, cm_node);
+	read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO);
+	if (iwqp->page)
+		iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
+	dev->iw_priv_qp_ops->qp_send_rtt(&iwqp->sc_qp, read0);
+	if (iwqp->page)
+		kunmap(iwqp->page);
+	status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0);
+	if (status)
+		i40iw_pr_err("send cm event\n");
+
+	memset(&attr, 0, sizeof(attr));
+	attr.qp_state = IB_QPS_RTS;
+	cm_node->qhash_set = false;
+	i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+
+	cm_node->accelerated = 1;
+	if (cm_node->accept_pend) {
+		if (!cm_node->listener)
+			i40iw_pr_err("listener is null for passive node\n");
+		atomic_dec(&cm_node->listener->pend_accepts_cnt);
+		cm_node->accept_pend = 0;
+	}
+	return;
+
+error:
+	iwqp->cm_id = NULL;
+	cm_id->provider_data = NULL;
+	i40iw_send_cm_event(event->cm_node,
+			    cm_id,
+			    IW_CM_EVENT_CONNECT_REPLY,
+			    status);
+	cm_id->rem_ref(cm_id);
+	i40iw_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * i40iw_cm_event_reset - handle reset
+ * @event: the info for cm_node of connection
+ */
+static void i40iw_cm_event_reset(struct i40iw_cm_event *event)
+{
+	struct i40iw_cm_node *cm_node = event->cm_node;
+	struct iw_cm_id   *cm_id = cm_node->cm_id;
+	struct i40iw_qp *iwqp;
+
+	if (!cm_id)
+		return;
+
+	iwqp = cm_id->provider_data;
+	if (!iwqp)
+		return;
+
+	i40iw_debug(cm_node->dev,
+		    I40IW_DEBUG_CM,
+		    "reset event %p - cm_id = %p\n",
+		     event->cm_node, cm_id);
+	iwqp->cm_id = NULL;
+
+	i40iw_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_DISCONNECT, -ECONNRESET);
+	i40iw_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CLOSE, 0);
+}
+
+/**
+ * i40iw_cm_event_handler - worker thread callback to send event to cm upper layer
+ * @work: pointer of cm event info.
+ */
+static void i40iw_cm_event_handler(struct work_struct *work)
+{
+	struct i40iw_cm_event *event = container_of(work,
+						    struct i40iw_cm_event,
+						    event_work);
+	struct i40iw_cm_node *cm_node;
+
+	if (!event || !event->cm_node || !event->cm_node->cm_core)
+		return;
+
+	cm_node = event->cm_node;
+
+	switch (event->type) {
+	case I40IW_CM_EVENT_MPA_REQ:
+		i40iw_send_cm_event(cm_node,
+				    cm_node->cm_id,
+				    IW_CM_EVENT_CONNECT_REQUEST,
+				    0);
+		break;
+	case I40IW_CM_EVENT_RESET:
+		i40iw_cm_event_reset(event);
+		break;
+	case I40IW_CM_EVENT_CONNECTED:
+		if (!event->cm_node->cm_id ||
+		    (event->cm_node->state != I40IW_CM_STATE_OFFLOADED))
+			break;
+		i40iw_cm_event_connected(event);
+		break;
+	case I40IW_CM_EVENT_MPA_REJECT:
+		if (!event->cm_node->cm_id ||
+		    (cm_node->state == I40IW_CM_STATE_OFFLOADED))
+			break;
+		i40iw_send_cm_event(cm_node,
+				    cm_node->cm_id,
+				    IW_CM_EVENT_CONNECT_REPLY,
+				    -ECONNREFUSED);
+		break;
+	case I40IW_CM_EVENT_ABORTED:
+		if (!event->cm_node->cm_id ||
+		    (event->cm_node->state == I40IW_CM_STATE_OFFLOADED))
+			break;
+		i40iw_event_connect_error(event);
+		break;
+	default:
+		i40iw_pr_err("event type = %d\n", event->type);
+		break;
+	}
+
+	event->cm_info.cm_id->rem_ref(event->cm_info.cm_id);
+	i40iw_rem_ref_cm_node(event->cm_node);
+	kfree(event);
+}
+
+/**
+ * i40iw_cm_post_event - queue event request for worker thread
+ * @event: cm node's info for up event call
+ */
+static void i40iw_cm_post_event(struct i40iw_cm_event *event)
+{
+	atomic_inc(&event->cm_node->ref_count);
+	event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
+	INIT_WORK(&event->event_work, i40iw_cm_event_handler);
+
+	queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
new file mode 100644
index 000000000000..5f8ceb4a8e84
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -0,0 +1,456 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_CM_H
+#define I40IW_CM_H
+
+#define QUEUE_EVENTS
+
+#define I40IW_MANAGE_APBVT_DEL 0
+#define I40IW_MANAGE_APBVT_ADD 1
+
+#define I40IW_MPA_REQUEST_ACCEPT  1
+#define I40IW_MPA_REQUEST_REJECT  2
+
+/* IETF MPA -- defines, enums, structs */
+#define IEFT_MPA_KEY_REQ  "MPA ID Req Frame"
+#define IEFT_MPA_KEY_REP  "MPA ID Rep Frame"
+#define IETF_MPA_KEY_SIZE 16
+#define IETF_MPA_VERSION  1
+#define IETF_MAX_PRIV_DATA_LEN 512
+#define IETF_MPA_FRAME_SIZE    20
+#define IETF_RTR_MSG_SIZE      4
+#define IETF_MPA_V2_FLAG       0x10
+#define SNDMARKER_SEQNMASK     0x000001FF
+
+#define I40IW_MAX_IETF_SIZE      32
+
+#define MPA_ZERO_PAD_LEN	4
+
+/* IETF RTR MSG Fields               */
+#define IETF_PEER_TO_PEER       0x8000
+#define IETF_FLPDU_ZERO_LEN     0x4000
+#define IETF_RDMA0_WRITE        0x8000
+#define IETF_RDMA0_READ         0x4000
+#define IETF_NO_IRD_ORD         0x3FFF
+
+/* HW-supported IRD sizes*/
+#define	I40IW_HW_IRD_SETTING_2	2
+#define	I40IW_HW_IRD_SETTING_4	4
+#define	I40IW_HW_IRD_SETTING_8	8
+#define	I40IW_HW_IRD_SETTING_16	16
+#define	I40IW_HW_IRD_SETTING_32	32
+#define	I40IW_HW_IRD_SETTING_64	64
+
+enum ietf_mpa_flags {
+	IETF_MPA_FLAGS_MARKERS = 0x80,	/* receive Markers */
+	IETF_MPA_FLAGS_CRC = 0x40,	/* receive Markers */
+	IETF_MPA_FLAGS_REJECT = 0x20,	/* Reject */
+};
+
+struct ietf_mpa_v1 {
+	u8 key[IETF_MPA_KEY_SIZE];
+	u8 flags;
+	u8 rev;
+	__be16 priv_data_len;
+	u8 priv_data[0];
+};
+
+#define ietf_mpa_req_resp_frame ietf_mpa_frame
+
+struct ietf_rtr_msg {
+	__be16 ctrl_ird;
+	__be16 ctrl_ord;
+};
+
+struct ietf_mpa_v2 {
+	u8 key[IETF_MPA_KEY_SIZE];
+	u8 flags;
+	u8 rev;
+	__be16 priv_data_len;
+	struct ietf_rtr_msg rtr_msg;
+	u8 priv_data[0];
+};
+
+struct i40iw_cm_node;
+enum i40iw_timer_type {
+	I40IW_TIMER_TYPE_SEND,
+	I40IW_TIMER_TYPE_RECV,
+	I40IW_TIMER_NODE_CLEANUP,
+	I40IW_TIMER_TYPE_CLOSE,
+};
+
+#define I40IW_PASSIVE_STATE_INDICATED    0
+#define I40IW_DO_NOT_SEND_RESET_EVENT    1
+#define I40IW_SEND_RESET_EVENT           2
+
+#define MAX_I40IW_IFS 4
+
+#define SET_ACK 0x1
+#define SET_SYN 0x2
+#define SET_FIN 0x4
+#define SET_RST 0x8
+
+#define TCP_OPTIONS_PADDING     3
+
+struct option_base {
+	u8 optionnum;
+	u8 length;
+};
+
+enum option_numbers {
+	OPTION_NUMBER_END,
+	OPTION_NUMBER_NONE,
+	OPTION_NUMBER_MSS,
+	OPTION_NUMBER_WINDOW_SCALE,
+	OPTION_NUMBER_SACK_PERM,
+	OPTION_NUMBER_SACK,
+	OPTION_NUMBER_WRITE0 = 0xbc
+};
+
+struct option_mss {
+	u8 optionnum;
+	u8 length;
+	__be16 mss;
+};
+
+struct option_windowscale {
+	u8 optionnum;
+	u8 length;
+	u8 shiftcount;
+};
+
+union all_known_options {
+	char as_end;
+	struct option_base as_base;
+	struct option_mss as_mss;
+	struct option_windowscale as_windowscale;
+};
+
+struct i40iw_timer_entry {
+	struct list_head list;
+	unsigned long timetosend;	/* jiffies */
+	struct i40iw_puda_buf *sqbuf;
+	u32 type;
+	u32 retrycount;
+	u32 retranscount;
+	u32 context;
+	u32 send_retrans;
+	int close_when_complete;
+};
+
+#define I40IW_DEFAULT_RETRYS	64
+#define I40IW_DEFAULT_RETRANS	8
+#define I40IW_DEFAULT_TTL	0x40
+#define I40IW_DEFAULT_RTT_VAR	0x6
+#define I40IW_DEFAULT_SS_THRESH 0x3FFFFFFF
+#define I40IW_DEFAULT_REXMIT_THRESH 8
+
+#define I40IW_RETRY_TIMEOUT   HZ
+#define I40IW_SHORT_TIME      10
+#define I40IW_LONG_TIME       (2 * HZ)
+#define I40IW_MAX_TIMEOUT     ((unsigned long)(12 * HZ))
+
+#define I40IW_CM_HASHTABLE_SIZE         1024
+#define I40IW_CM_TCP_TIMER_INTERVAL     3000
+#define I40IW_CM_DEFAULT_MTU            1540
+#define I40IW_CM_DEFAULT_FRAME_CNT      10
+#define I40IW_CM_THREAD_STACK_SIZE      256
+#define I40IW_CM_DEFAULT_RCV_WND        64240
+#define I40IW_CM_DEFAULT_RCV_WND_SCALED 0x3fffc
+#define I40IW_CM_DEFAULT_RCV_WND_SCALE  2
+#define I40IW_CM_DEFAULT_FREE_PKTS      0x000A
+#define I40IW_CM_FREE_PKT_LO_WATERMARK  2
+
+#define I40IW_CM_DEFAULT_MSS   536
+
+#define I40IW_CM_DEF_SEQ       0x159bf75f
+#define I40IW_CM_DEF_LOCAL_ID  0x3b47
+
+#define I40IW_CM_DEF_SEQ2      0x18ed5740
+#define I40IW_CM_DEF_LOCAL_ID2 0xb807
+#define MAX_CM_BUFFER   (I40IW_MAX_IETF_SIZE + IETF_MAX_PRIV_DATA_LEN)
+
+typedef u32 i40iw_addr_t;
+
+#define i40iw_cm_tsa_context i40iw_qp_context
+
+struct i40iw_qp;
+
+/* cm node transition states */
+enum i40iw_cm_node_state {
+	I40IW_CM_STATE_UNKNOWN,
+	I40IW_CM_STATE_INITED,
+	I40IW_CM_STATE_LISTENING,
+	I40IW_CM_STATE_SYN_RCVD,
+	I40IW_CM_STATE_SYN_SENT,
+	I40IW_CM_STATE_ONE_SIDE_ESTABLISHED,
+	I40IW_CM_STATE_ESTABLISHED,
+	I40IW_CM_STATE_ACCEPTING,
+	I40IW_CM_STATE_MPAREQ_SENT,
+	I40IW_CM_STATE_MPAREQ_RCVD,
+	I40IW_CM_STATE_MPAREJ_RCVD,
+	I40IW_CM_STATE_OFFLOADED,
+	I40IW_CM_STATE_FIN_WAIT1,
+	I40IW_CM_STATE_FIN_WAIT2,
+	I40IW_CM_STATE_CLOSE_WAIT,
+	I40IW_CM_STATE_TIME_WAIT,
+	I40IW_CM_STATE_LAST_ACK,
+	I40IW_CM_STATE_CLOSING,
+	I40IW_CM_STATE_LISTENER_DESTROYED,
+	I40IW_CM_STATE_CLOSED
+};
+
+enum mpa_frame_version {
+	IETF_MPA_V1 = 1,
+	IETF_MPA_V2 = 2
+};
+
+enum mpa_frame_key {
+	MPA_KEY_REQUEST,
+	MPA_KEY_REPLY
+};
+
+enum send_rdma0 {
+	SEND_RDMA_READ_ZERO = 1,
+	SEND_RDMA_WRITE_ZERO = 2
+};
+
+enum i40iw_tcpip_pkt_type {
+	I40IW_PKT_TYPE_UNKNOWN,
+	I40IW_PKT_TYPE_SYN,
+	I40IW_PKT_TYPE_SYNACK,
+	I40IW_PKT_TYPE_ACK,
+	I40IW_PKT_TYPE_FIN,
+	I40IW_PKT_TYPE_RST
+};
+
+/* CM context params */
+struct i40iw_cm_tcp_context {
+	u8 client;
+
+	u32 loc_seq_num;
+	u32 loc_ack_num;
+	u32 rem_ack_num;
+	u32 rcv_nxt;
+
+	u32 loc_id;
+	u32 rem_id;
+
+	u32 snd_wnd;
+	u32 max_snd_wnd;
+
+	u32 rcv_wnd;
+	u32 mss;
+	u8 snd_wscale;
+	u8 rcv_wscale;
+
+	struct timeval sent_ts;
+};
+
+enum i40iw_cm_listener_state {
+	I40IW_CM_LISTENER_PASSIVE_STATE = 1,
+	I40IW_CM_LISTENER_ACTIVE_STATE = 2,
+	I40IW_CM_LISTENER_EITHER_STATE = 3
+};
+
+struct i40iw_cm_listener {
+	struct list_head list;
+	struct i40iw_cm_core *cm_core;
+	u8 loc_mac[ETH_ALEN];
+	u32 loc_addr[4];
+	u16 loc_port;
+	u32 map_loc_addr[4];
+	u16 map_loc_port;
+	struct iw_cm_id *cm_id;
+	atomic_t ref_count;
+	struct i40iw_device *iwdev;
+	atomic_t pend_accepts_cnt;
+	int backlog;
+	enum i40iw_cm_listener_state listener_state;
+	u32 reused_node;
+	u8 user_pri;
+	u16 vlan_id;
+	bool qhash_set;
+	bool ipv4;
+	struct list_head child_listen_list;
+
+};
+
+struct i40iw_kmem_info {
+	void *addr;
+	u32 size;
+};
+
+/* per connection node and node state information */
+struct i40iw_cm_node {
+	u32 loc_addr[4], rem_addr[4];
+	u16 loc_port, rem_port;
+	u32 map_loc_addr[4], map_rem_addr[4];
+	u16 map_loc_port, map_rem_port;
+	u16 vlan_id;
+	enum i40iw_cm_node_state state;
+	u8 loc_mac[ETH_ALEN];
+	u8 rem_mac[ETH_ALEN];
+	atomic_t ref_count;
+	struct i40iw_qp *iwqp;
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_cm_tcp_context tcp_cntxt;
+	struct i40iw_cm_core *cm_core;
+	struct i40iw_cm_node *loopbackpartner;
+	struct i40iw_timer_entry *send_entry;
+	struct i40iw_timer_entry *close_entry;
+	spinlock_t retrans_list_lock; /* cm transmit packet */
+	enum send_rdma0 send_rdma0_op;
+	u16 ird_size;
+	u16 ord_size;
+	u16     mpav2_ird_ord;
+	struct iw_cm_id *cm_id;
+	struct list_head list;
+	int accelerated;
+	struct i40iw_cm_listener *listener;
+	int apbvt_set;
+	int accept_pend;
+	struct list_head timer_entry;
+	struct list_head reset_entry;
+	atomic_t passive_state;
+	bool qhash_set;
+	u8 user_pri;
+	bool ipv4;
+	bool snd_mark_en;
+	u16 lsmm_size;
+	enum mpa_frame_version mpa_frame_rev;
+	struct i40iw_kmem_info pdata;
+	union {
+		struct ietf_mpa_v1 mpa_frame;
+		struct ietf_mpa_v2 mpa_v2_frame;
+	};
+
+	u8 pdata_buf[IETF_MAX_PRIV_DATA_LEN];
+	struct i40iw_kmem_info mpa_hdr;
+};
+
+/* structure for client or CM to fill when making CM api calls. */
+/*	- only need to set relevant data, based on op. */
+struct i40iw_cm_info {
+	struct iw_cm_id *cm_id;
+	u16 loc_port;
+	u16 rem_port;
+	u32 loc_addr[4];
+	u32 rem_addr[4];
+	u16 map_loc_port;
+	u16 map_rem_port;
+	u32 map_loc_addr[4];
+	u32 map_rem_addr[4];
+	u16 vlan_id;
+	int backlog;
+	u16 user_pri;
+	bool ipv4;
+};
+
+/* CM event codes */
+enum i40iw_cm_event_type {
+	I40IW_CM_EVENT_UNKNOWN,
+	I40IW_CM_EVENT_ESTABLISHED,
+	I40IW_CM_EVENT_MPA_REQ,
+	I40IW_CM_EVENT_MPA_CONNECT,
+	I40IW_CM_EVENT_MPA_ACCEPT,
+	I40IW_CM_EVENT_MPA_REJECT,
+	I40IW_CM_EVENT_MPA_ESTABLISHED,
+	I40IW_CM_EVENT_CONNECTED,
+	I40IW_CM_EVENT_RESET,
+	I40IW_CM_EVENT_ABORTED
+};
+
+/* event to post to CM event handler */
+struct i40iw_cm_event {
+	enum i40iw_cm_event_type type;
+	struct i40iw_cm_info cm_info;
+	struct work_struct event_work;
+	struct i40iw_cm_node *cm_node;
+};
+
+struct i40iw_cm_core {
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_dev *dev;
+
+	struct list_head listen_nodes;
+	struct list_head connected_nodes;
+
+	struct timer_list tcp_timer;
+
+	struct workqueue_struct *event_wq;
+	struct workqueue_struct *disconn_wq;
+
+	spinlock_t ht_lock; /* manage hash table */
+	spinlock_t listen_list_lock; /* listen list */
+
+	u64	stats_nodes_created;
+	u64	stats_nodes_destroyed;
+	u64	stats_listen_created;
+	u64	stats_listen_destroyed;
+	u64	stats_listen_nodes_created;
+	u64	stats_listen_nodes_destroyed;
+	u64	stats_loopbacks;
+	u64	stats_accepts;
+	u64	stats_rejects;
+	u64	stats_connect_errs;
+	u64	stats_passive_errs;
+	u64	stats_pkt_retrans;
+	u64	stats_backlog_drops;
+};
+
+int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
+			    struct i40iw_puda_buf *sqbuf,
+			    enum i40iw_timer_type type,
+			    int send_retrans,
+			    int close_when_complete);
+
+int i40iw_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
+int i40iw_reject(struct iw_cm_id *, const void *, u8);
+int i40iw_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
+int i40iw_create_listen(struct iw_cm_id *, int);
+int i40iw_destroy_listen(struct iw_cm_id *);
+
+int i40iw_cm_start(struct i40iw_device *);
+int i40iw_cm_stop(struct i40iw_device *);
+
+int i40iw_arp_table(struct i40iw_device *iwdev,
+		    u32 *ip_addr,
+		    bool ipv4,
+		    u8 *mac_addr,
+		    u32 action);
+
+#endif /* I40IW_CM_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
new file mode 100644
index 000000000000..f05802bf6ca0
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -0,0 +1,4743 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_vf.h"
+#include "i40iw_virtchnl.h"
+
+/**
+ * i40iw_insert_wqe_hdr - write wqe header
+ * @wqe: cqp wqe for header
+ * @header: header for the cqp wqe
+ */
+static inline void i40iw_insert_wqe_hdr(u64 *wqe, u64 header)
+{
+	wmb();            /* make sure WQE is populated before polarity is set */
+	set_64bit_val(wqe, 24, header);
+}
+
+/**
+ * i40iw_get_cqp_reg_info - get head and tail for cqp using registers
+ * @cqp: struct for cqp hw
+ * @val: cqp tail register value
+ * @tail:wqtail register value
+ * @error: cqp processing err
+ */
+static inline void i40iw_get_cqp_reg_info(struct i40iw_sc_cqp *cqp,
+					  u32 *val,
+					  u32 *tail,
+					  u32 *error)
+{
+	if (cqp->dev->is_pf) {
+		*val = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPTAIL);
+		*tail = RS_32(*val, I40E_PFPE_CQPTAIL_WQTAIL);
+		*error = RS_32(*val, I40E_PFPE_CQPTAIL_CQP_OP_ERR);
+	} else {
+		*val = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPTAIL1);
+		*tail = RS_32(*val, I40E_VFPE_CQPTAIL_WQTAIL);
+		*error = RS_32(*val, I40E_VFPE_CQPTAIL_CQP_OP_ERR);
+	}
+}
+
+/**
+ * i40iw_cqp_poll_registers - poll cqp registers
+ * @cqp: struct for cqp hw
+ * @tail:wqtail register value
+ * @count: how many times to try for completion
+ */
+static enum i40iw_status_code i40iw_cqp_poll_registers(
+						struct i40iw_sc_cqp *cqp,
+						u32 tail,
+						u32 count)
+{
+	u32 i = 0;
+	u32 newtail, error, val;
+
+	while (i < count) {
+		i++;
+		i40iw_get_cqp_reg_info(cqp, &val, &newtail, &error);
+		if (error) {
+			error = (cqp->dev->is_pf) ?
+				 i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPERRCODES) :
+				 i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPERRCODES1);
+			return I40IW_ERR_CQP_COMPL_ERROR;
+		}
+		if (newtail != tail) {
+			/* SUCCESS */
+			I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+			return 0;
+		}
+		udelay(I40IW_SLEEP_COUNT);
+	}
+	return I40IW_ERR_TIMEOUT;
+}
+
+/**
+ * i40iw_sc_parse_fpm_commit_buf - parse fpm commit buffer
+ * @buf: ptr to fpm commit buffer
+ * @info: ptr to i40iw_hmc_obj_info struct
+ *
+ * parses fpm commit info and copy base value
+ * of hmc objects in hmc_info
+ */
+static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf(
+				u64 *buf,
+				struct i40iw_hmc_obj_info *info)
+{
+	u64 temp;
+	u32 i, j;
+	u32 low;
+
+	/* copy base values in obj_info */
+	for (i = I40IW_HMC_IW_QP, j = 0;
+			i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
+		get_64bit_val(buf, j, &temp);
+		info[i].base = RS_64_1(temp, 32) * 512;
+		low = (u32)(temp);
+		if (low)
+			info[i].cnt = low;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer
+ * @buf: ptr to fpm query buffer
+ * @info: ptr to i40iw_hmc_obj_info struct
+ * @hmc_fpm_misc: ptr to fpm data
+ *
+ * parses fpm query buffer and copy max_cnt and
+ * size value of hmc objects in hmc_info
+ */
+static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf(
+				u64 *buf,
+				struct i40iw_hmc_info *hmc_info,
+				struct i40iw_hmc_fpm_misc *hmc_fpm_misc)
+{
+	u64 temp;
+	struct i40iw_hmc_obj_info *obj_info;
+	u32 i, j, size;
+	u16 max_pe_sds;
+
+	obj_info = hmc_info->hmc_obj;
+
+	get_64bit_val(buf, 0, &temp);
+	hmc_info->first_sd_index = (u16)RS_64(temp, I40IW_QUERY_FPM_FIRST_PE_SD_INDEX);
+	max_pe_sds = (u16)RS_64(temp, I40IW_QUERY_FPM_MAX_PE_SDS);
+
+	/* Reduce SD count for VFs by 1 to account for PBLE backing page rounding */
+	if (hmc_info->hmc_fn_id >= I40IW_FIRST_VF_FPM_ID)
+		max_pe_sds--;
+	hmc_fpm_misc->max_sds = max_pe_sds;
+	hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index;
+
+	for (i = I40IW_HMC_IW_QP, j = 8;
+	     i <= I40IW_HMC_IW_ARP; i++, j += 8) {
+		get_64bit_val(buf, j, &temp);
+		if (i == I40IW_HMC_IW_QP)
+			obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS);
+		else if (i == I40IW_HMC_IW_CQ)
+			obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS);
+		else
+			obj_info[i].max_cnt = (u32)temp;
+
+		size = (u32)RS_64_1(temp, 32);
+		obj_info[i].size = ((u64)1 << size);
+	}
+	for (i = I40IW_HMC_IW_MR, j = 48;
+			i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
+		get_64bit_val(buf, j, &temp);
+		obj_info[i].max_cnt = (u32)temp;
+		size = (u32)RS_64_1(temp, 32);
+		obj_info[i].size = LS_64_1(1, size);
+	}
+
+	get_64bit_val(buf, 120, &temp);
+	hmc_fpm_misc->max_ceqs = (u8)RS_64(temp, I40IW_QUERY_FPM_MAX_CEQS);
+	get_64bit_val(buf, 120, &temp);
+	hmc_fpm_misc->ht_multiplier = RS_64(temp, I40IW_QUERY_FPM_HTMULTIPLIER);
+	get_64bit_val(buf, 120, &temp);
+	hmc_fpm_misc->timer_bucket = RS_64(temp, I40IW_QUERY_FPM_TIMERBUCKET);
+	get_64bit_val(buf, 64, &temp);
+	hmc_fpm_misc->xf_block_size = RS_64(temp, I40IW_QUERY_FPM_XFBLOCKSIZE);
+	if (!hmc_fpm_misc->xf_block_size)
+		return I40IW_ERR_INVALID_SIZE;
+	get_64bit_val(buf, 80, &temp);
+	hmc_fpm_misc->q1_block_size = RS_64(temp, I40IW_QUERY_FPM_Q1BLOCKSIZE);
+	if (!hmc_fpm_misc->q1_block_size)
+		return I40IW_ERR_INVALID_SIZE;
+	return 0;
+}
+
+/**
+ * i40iw_sc_pd_init - initialize sc pd struct
+ * @dev: sc device struct
+ * @pd: sc pd ptr
+ * @pd_id: pd_id for allocated pd
+ */
+static void i40iw_sc_pd_init(struct i40iw_sc_dev *dev,
+			     struct i40iw_sc_pd *pd,
+			     u16 pd_id)
+{
+	pd->size = sizeof(*pd);
+	pd->pd_id = pd_id;
+	pd->dev = dev;
+}
+
+/**
+ * i40iw_get_encoded_wqe_size - given wq size, returns hardware encoded size
+ * @wqsize: size of the wq (sq, rq, srq) to encoded_size
+ * @cqpsq: encoded size for sq for cqp as its encoded size is 1+ other wq's
+ */
+u8 i40iw_get_encoded_wqe_size(u32 wqsize, bool cqpsq)
+{
+	u8 encoded_size = 0;
+
+	/* cqp sq's hw coded value starts from 1 for size of 4
+	 * while it starts from 0 for qp' wq's.
+	 */
+	if (cqpsq)
+		encoded_size = 1;
+	wqsize >>= 2;
+	while (wqsize >>= 1)
+		encoded_size++;
+	return encoded_size;
+}
+
+/**
+ * i40iw_sc_cqp_init - Initialize buffers for a control Queue Pair
+ * @cqp: IWARP control queue pair pointer
+ * @info: IWARP control queue pair init info pointer
+ *
+ * Initializes the object and context buffers for a control Queue Pair.
+ */
+static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp,
+						struct i40iw_cqp_init_info *info)
+{
+	u8 hw_sq_size;
+
+	if ((info->sq_size > I40IW_CQP_SW_SQSIZE_2048) ||
+	    (info->sq_size < I40IW_CQP_SW_SQSIZE_4) ||
+	    ((info->sq_size & (info->sq_size - 1))))
+		return I40IW_ERR_INVALID_SIZE;
+
+	hw_sq_size = i40iw_get_encoded_wqe_size(info->sq_size, true);
+	cqp->size = sizeof(*cqp);
+	cqp->sq_size = info->sq_size;
+	cqp->hw_sq_size = hw_sq_size;
+	cqp->sq_base = info->sq;
+	cqp->host_ctx = info->host_ctx;
+	cqp->sq_pa = info->sq_pa;
+	cqp->host_ctx_pa = info->host_ctx_pa;
+	cqp->dev = info->dev;
+	cqp->struct_ver = info->struct_ver;
+	cqp->scratch_array = info->scratch_array;
+	cqp->polarity = 0;
+	cqp->en_datacenter_tcp = info->en_datacenter_tcp;
+	cqp->enabled_vf_count = info->enabled_vf_count;
+	cqp->hmc_profile = info->hmc_profile;
+	info->dev->cqp = cqp;
+
+	I40IW_RING_INIT(cqp->sq_ring, cqp->sq_size);
+	i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
+		    "%s: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04X]\n",
+		    __func__, cqp->sq_size, cqp->hw_sq_size,
+		    cqp->sq_base, cqp->sq_pa, cqp, cqp->polarity);
+	return 0;
+}
+
+/**
+ * i40iw_sc_cqp_create - create cqp during bringup
+ * @cqp: struct for cqp hw
+ * @disable_pfpdus: if pfpdu to be disabled
+ * @maj_err: If error, major err number
+ * @min_err: If error, minor err number
+ */
+static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
+						  bool disable_pfpdus,
+						  u16 *maj_err,
+						  u16 *min_err)
+{
+	u64 temp;
+	u32 cnt = 0, p1, p2, val = 0, err_code;
+	enum i40iw_status_code ret_code;
+
+	ret_code = i40iw_allocate_dma_mem(cqp->dev->hw,
+					  &cqp->sdbuf,
+					  128,
+					  I40IW_SD_BUF_ALIGNMENT);
+
+	if (ret_code)
+		goto exit;
+
+	temp = LS_64(cqp->hw_sq_size, I40IW_CQPHC_SQSIZE) |
+	       LS_64(cqp->struct_ver, I40IW_CQPHC_SVER);
+
+	if (disable_pfpdus)
+		temp |= LS_64(1, I40IW_CQPHC_DISABLE_PFPDUS);
+
+	set_64bit_val(cqp->host_ctx, 0, temp);
+	set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
+	temp = LS_64(cqp->enabled_vf_count, I40IW_CQPHC_ENABLED_VFS) |
+	       LS_64(cqp->hmc_profile, I40IW_CQPHC_HMC_PROFILE);
+	set_64bit_val(cqp->host_ctx, 16, temp);
+	set_64bit_val(cqp->host_ctx, 24, (uintptr_t)cqp);
+	set_64bit_val(cqp->host_ctx, 32, 0);
+	set_64bit_val(cqp->host_ctx, 40, 0);
+	set_64bit_val(cqp->host_ctx, 48, 0);
+	set_64bit_val(cqp->host_ctx, 56, 0);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQP_HOST_CTX",
+			cqp->host_ctx, I40IW_CQP_CTX_SIZE * 8);
+
+	p1 = RS_32_1(cqp->host_ctx_pa, 32);
+	p2 = (u32)cqp->host_ctx_pa;
+
+	if (cqp->dev->is_pf) {
+		i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPHIGH, p1);
+		i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPLOW, p2);
+	} else {
+		i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPHIGH1, p1);
+		i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPLOW1, p2);
+	}
+	do {
+		if (cnt++ > I40IW_DONE_COUNT) {
+			i40iw_free_dma_mem(cqp->dev->hw, &cqp->sdbuf);
+			ret_code = I40IW_ERR_TIMEOUT;
+			/*
+			 * read PFPE_CQPERRORCODES register to get the minor
+			 * and major error code
+			 */
+			if (cqp->dev->is_pf)
+				err_code = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPERRCODES);
+			else
+				err_code = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPERRCODES1);
+			*min_err = RS_32(err_code, I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE);
+			*maj_err = RS_32(err_code, I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE);
+			goto exit;
+		}
+		udelay(I40IW_SLEEP_COUNT);
+		if (cqp->dev->is_pf)
+			val = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CCQPSTATUS);
+		else
+			val = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CCQPSTATUS1);
+	} while (!val);
+
+exit:
+	if (!ret_code)
+		cqp->process_cqp_sds = i40iw_update_sds_noccq;
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_cqp_post_sq - post of cqp's sq
+ * @cqp: struct for cqp hw
+ */
+void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp)
+{
+	if (cqp->dev->is_pf)
+		i40iw_wr32(cqp->dev->hw, I40E_PFPE_CQPDB, I40IW_RING_GETCURRENT_HEAD(cqp->sq_ring));
+	else
+		i40iw_wr32(cqp->dev->hw, I40E_VFPE_CQPDB1, I40IW_RING_GETCURRENT_HEAD(cqp->sq_ring));
+
+	i40iw_debug(cqp->dev,
+		    I40IW_DEBUG_WQE,
+		    "%s: HEAD_TAIL[%04d,%04d,%04d]\n",
+		    __func__,
+		    cqp->sq_ring.head,
+		    cqp->sq_ring.tail,
+		    cqp->sq_ring.size);
+}
+
+/**
+ * i40iw_sc_cqp_get_next_send_wqe - get next wqe on cqp sq
+ * @cqp: struct for cqp hw
+ * @wqe_idx: we index of cqp ring
+ */
+u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch)
+{
+	u64 *wqe = NULL;
+	u32	wqe_idx;
+	enum i40iw_status_code ret_code;
+
+	if (I40IW_RING_FULL_ERR(cqp->sq_ring)) {
+		i40iw_debug(cqp->dev,
+			    I40IW_DEBUG_WQE,
+			    "%s: ring is full head %x tail %x size %x\n",
+			    __func__,
+			    cqp->sq_ring.head,
+			    cqp->sq_ring.tail,
+			    cqp->sq_ring.size);
+		return NULL;
+	}
+	I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, wqe_idx, ret_code);
+	if (ret_code)
+		return NULL;
+	if (!wqe_idx)
+		cqp->polarity = !cqp->polarity;
+
+	wqe = cqp->sq_base[wqe_idx].elem;
+	cqp->scratch_array[wqe_idx] = scratch;
+	I40IW_CQP_INIT_WQE(wqe);
+
+	return wqe;
+}
+
+/**
+ * i40iw_sc_cqp_destroy - destroy cqp during close
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_cqp_destroy(struct i40iw_sc_cqp *cqp)
+{
+	u32 cnt = 0, val = 1;
+	enum i40iw_status_code ret_code = 0;
+	u32 cqpstat_addr;
+
+	if (cqp->dev->is_pf) {
+		i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPHIGH, 0);
+		i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPLOW, 0);
+		cqpstat_addr = I40E_PFPE_CCQPSTATUS;
+	} else {
+		i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPHIGH1, 0);
+		i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPLOW1, 0);
+		cqpstat_addr = I40E_VFPE_CCQPSTATUS1;
+	}
+	do {
+		if (cnt++ > I40IW_DONE_COUNT) {
+			ret_code = I40IW_ERR_TIMEOUT;
+			break;
+		}
+		udelay(I40IW_SLEEP_COUNT);
+		val = i40iw_rd32(cqp->dev->hw, cqpstat_addr);
+	} while (val);
+
+	i40iw_free_dma_mem(cqp->dev->hw, &cqp->sdbuf);
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_ccq_arm - enable intr for control cq
+ * @ccq: ccq sc struct
+ */
+static void i40iw_sc_ccq_arm(struct i40iw_sc_cq *ccq)
+{
+	u64 temp_val;
+	u16 sw_cq_sel;
+	u8 arm_next_se;
+	u8 arm_seq_num;
+
+	/* write to cq doorbell shadow area */
+	/* arm next se should always be zero */
+	get_64bit_val(ccq->cq_uk.shadow_area, 32, &temp_val);
+
+	sw_cq_sel = (u16)RS_64(temp_val, I40IW_CQ_DBSA_SW_CQ_SELECT);
+	arm_next_se = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_NEXT_SE);
+
+	arm_seq_num = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_SEQ_NUM);
+	arm_seq_num++;
+
+	temp_val = LS_64(arm_seq_num, I40IW_CQ_DBSA_ARM_SEQ_NUM) |
+		   LS_64(sw_cq_sel, I40IW_CQ_DBSA_SW_CQ_SELECT) |
+		   LS_64(arm_next_se, I40IW_CQ_DBSA_ARM_NEXT_SE) |
+		   LS_64(1, I40IW_CQ_DBSA_ARM_NEXT);
+
+	set_64bit_val(ccq->cq_uk.shadow_area, 32, temp_val);
+
+	wmb();       /* make sure shadow area is updated before arming */
+
+	if (ccq->dev->is_pf)
+		i40iw_wr32(ccq->dev->hw, I40E_PFPE_CQARM, ccq->cq_uk.cq_id);
+	else
+		i40iw_wr32(ccq->dev->hw, I40E_VFPE_CQARM1, ccq->cq_uk.cq_id);
+}
+
+/**
+ * i40iw_sc_ccq_get_cqe_info - get ccq's cq entry
+ * @ccq: ccq sc struct
+ * @info: completion q entry to return
+ */
+static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info(
+					struct i40iw_sc_cq *ccq,
+					struct i40iw_ccq_cqe_info *info)
+{
+	u64 qp_ctx, temp, temp1;
+	u64 *cqe;
+	struct i40iw_sc_cqp *cqp;
+	u32 wqe_idx;
+	u8 polarity;
+	enum i40iw_status_code ret_code = 0;
+
+	if (ccq->cq_uk.avoid_mem_cflct)
+		cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(&ccq->cq_uk);
+	else
+		cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(&ccq->cq_uk);
+
+	get_64bit_val(cqe, 24, &temp);
+	polarity = (u8)RS_64(temp, I40IW_CQ_VALID);
+	if (polarity != ccq->cq_uk.polarity)
+		return I40IW_ERR_QUEUE_EMPTY;
+
+	get_64bit_val(cqe, 8, &qp_ctx);
+	cqp = (struct i40iw_sc_cqp *)(unsigned long)qp_ctx;
+	info->error = (bool)RS_64(temp, I40IW_CQ_ERROR);
+	info->min_err_code = (u16)RS_64(temp, I40IW_CQ_MINERR);
+	if (info->error) {
+		info->maj_err_code = (u16)RS_64(temp, I40IW_CQ_MAJERR);
+		info->min_err_code = (u16)RS_64(temp, I40IW_CQ_MINERR);
+	}
+	wqe_idx = (u32)RS_64(temp, I40IW_CQ_WQEIDX);
+	info->scratch = cqp->scratch_array[wqe_idx];
+
+	get_64bit_val(cqe, 16, &temp1);
+	info->op_ret_val = (u32)RS_64(temp1, I40IW_CCQ_OPRETVAL);
+	get_64bit_val(cqp->sq_base[wqe_idx].elem, 24, &temp1);
+	info->op_code = (u8)RS_64(temp1, I40IW_CQPSQ_OPCODE);
+	info->cqp = cqp;
+
+	/*  move the head for cq */
+	I40IW_RING_MOVE_HEAD(ccq->cq_uk.cq_ring, ret_code);
+	if (I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring) == 0)
+		ccq->cq_uk.polarity ^= 1;
+
+	/* update cq tail in cq shadow memory also */
+	I40IW_RING_MOVE_TAIL(ccq->cq_uk.cq_ring);
+	set_64bit_val(ccq->cq_uk.shadow_area,
+		      0,
+		      I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring));
+	wmb(); /* write shadow area before tail */
+	I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ
+ * @cqp: struct for cqp hw
+ * @op_code: cqp opcode for completion
+ * @info: completion q entry to return
+ */
+static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done(
+					struct i40iw_sc_cqp *cqp,
+					u8 op_code,
+					struct i40iw_ccq_cqe_info *compl_info)
+{
+	struct i40iw_ccq_cqe_info info;
+	struct i40iw_sc_cq *ccq;
+	enum i40iw_status_code ret_code = 0;
+	u32 cnt = 0;
+
+	memset(&info, 0, sizeof(info));
+	ccq = cqp->dev->ccq;
+	while (1) {
+		if (cnt++ > I40IW_DONE_COUNT)
+			return I40IW_ERR_TIMEOUT;
+
+		if (i40iw_sc_ccq_get_cqe_info(ccq, &info)) {
+			udelay(I40IW_SLEEP_COUNT);
+			continue;
+		}
+
+		if (info.error) {
+			ret_code = I40IW_ERR_CQP_COMPL_ERROR;
+			break;
+		}
+		/* check if opcode is cq create */
+		if (op_code != info.op_code) {
+			i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
+				    "%s: opcode mismatch for my op code 0x%x, returned opcode %x\n",
+				    __func__, op_code, info.op_code);
+		}
+		/* success, exit out of the loop */
+		if (op_code == info.op_code)
+			break;
+	}
+
+	if (compl_info)
+		memcpy(compl_info, &info, sizeof(*compl_info));
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_manage_push_page - Handle push page
+ * @cqp: struct for cqp hw
+ * @info: push page info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_manage_push_page(
+				struct i40iw_sc_cqp *cqp,
+				struct i40iw_cqp_manage_push_page_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	if (info->push_idx >= I40IW_MAX_PUSH_PAGE_COUNT)
+		return I40IW_ERR_INVALID_PUSH_PAGE_INDEX;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, info->qs_handle);
+
+	header = LS_64(info->push_idx, I40IW_CQPSQ_MPP_PPIDX) |
+		 LS_64(I40IW_CQP_OP_MANAGE_PUSH_PAGES, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
+		 LS_64(info->free_page, I40IW_CQPSQ_MPP_FREE_PAGE);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_manage_hmc_pm_func_table - manage of function table
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @vf_index: vf index for cqp
+ * @free_pm_fcn: function number
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_manage_hmc_pm_func_table(
+				struct i40iw_sc_cqp *cqp,
+				u64 scratch,
+				u8 vf_index,
+				bool free_pm_fcn,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	if (vf_index >= I40IW_MAX_VF_PER_PF)
+		return I40IW_ERR_INVALID_VF_ID;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	header = LS_64(vf_index, I40IW_CQPSQ_MHMC_VFIDX) |
+		 LS_64(I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE, I40IW_CQPSQ_OPCODE) |
+		 LS_64(free_pm_fcn, I40IW_CQPSQ_MHMC_FREEPMFN) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_HMC_PM_FUNC_TABLE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_set_hmc_resource_profile - cqp wqe for hmc profile
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_profile_type: type of profile to set
+ * @vf_num: vf number for profile
+ * @post_sq: flag for cqp db to ring
+ * @poll_registers: flag to poll register for cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_set_hmc_resource_profile(
+				struct i40iw_sc_cqp *cqp,
+				u64 scratch,
+				u8 hmc_profile_type,
+				u8 vf_num, bool post_sq,
+				bool poll_registers)
+{
+	u64 *wqe;
+	u64 header;
+	u32 val, tail, error;
+	enum i40iw_status_code ret_code = 0;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16,
+		      (LS_64(hmc_profile_type, I40IW_CQPSQ_SHMCRP_HMC_PROFILE) |
+				LS_64(vf_num, I40IW_CQPSQ_SHMCRP_VFNUM)));
+
+	header = LS_64(I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE, I40IW_CQPSQ_OPCODE) |
+		       LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_HMC_PM_FUNC_TABLE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+	if (error)
+		return I40IW_ERR_CQP_COMPL_ERROR;
+
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+		if (poll_registers)
+			ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000000);
+		else
+			ret_code = i40iw_sc_poll_for_cqp_op_done(cqp,
+								 I40IW_CQP_OP_SHMC_PAGES_ALLOCATED,
+								 NULL);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_manage_hmc_pm_func_table_done - wait for cqp wqe completion for function table
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_manage_hmc_pm_func_table_done(struct i40iw_sc_cqp *cqp)
+{
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE, NULL);
+}
+
+/**
+ * i40iw_sc_commit_fpm_values_done - wait for cqp eqe completion for fpm commit
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_commit_fpm_values_done(struct i40iw_sc_cqp *cqp)
+{
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_COMMIT_FPM_VALUES, NULL);
+}
+
+/**
+ * i40iw_sc_commit_fpm_values - cqp wqe for commit fpm values
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @commit_fpm_mem; Memory for fpm values
+ * @post_sq: flag for cqp db to ring
+ * @wait_type: poll ccq or cqp registers for cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_commit_fpm_values(
+					struct i40iw_sc_cqp *cqp,
+					u64 scratch,
+					u8 hmc_fn_id,
+					struct i40iw_dma_mem *commit_fpm_mem,
+					bool post_sq,
+					u8 wait_type)
+{
+	u64 *wqe;
+	u64 header;
+	u32 tail, val, error;
+	enum i40iw_status_code ret_code = 0;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, hmc_fn_id);
+	set_64bit_val(wqe, 32, commit_fpm_mem->pa);
+
+	header = LS_64(I40IW_CQP_OP_COMMIT_FPM_VALUES, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "COMMIT_FPM_VALUES WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+	if (error)
+		return I40IW_ERR_CQP_COMPL_ERROR;
+
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+
+		if (wait_type == I40IW_CQP_WAIT_POLL_REGS)
+			ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
+		else if (wait_type == I40IW_CQP_WAIT_POLL_CQ)
+			ret_code = i40iw_sc_commit_fpm_values_done(cqp);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_query_fpm_values_done - poll for cqp wqe completion for query fpm
+ * @cqp: struct for cqp hw
+ */
+static enum i40iw_status_code i40iw_sc_query_fpm_values_done(struct i40iw_sc_cqp *cqp)
+{
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_QUERY_FPM_VALUES, NULL);
+}
+
+/**
+ * i40iw_sc_query_fpm_values - cqp wqe query fpm values
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @query_fpm_mem: memory for return fpm values
+ * @post_sq: flag for cqp db to ring
+ * @wait_type: poll ccq or cqp registers for cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_query_fpm_values(
+					struct i40iw_sc_cqp *cqp,
+					u64 scratch,
+					u8 hmc_fn_id,
+					struct i40iw_dma_mem *query_fpm_mem,
+					bool post_sq,
+					u8 wait_type)
+{
+	u64 *wqe;
+	u64 header;
+	u32 tail, val, error;
+	enum i40iw_status_code ret_code = 0;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, hmc_fn_id);
+	set_64bit_val(wqe, 32, query_fpm_mem->pa);
+
+	header = LS_64(I40IW_CQP_OP_QUERY_FPM_VALUES, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY_FPM WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	/* read the tail from CQP_TAIL register */
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+	if (error)
+		return I40IW_ERR_CQP_COMPL_ERROR;
+
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+		if (wait_type == I40IW_CQP_WAIT_POLL_REGS)
+			ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
+		else if (wait_type == I40IW_CQP_WAIT_POLL_CQ)
+			ret_code = i40iw_sc_query_fpm_values_done(cqp);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_add_arp_cache_entry - cqp wqe add arp cache entry
+ * @cqp: struct for cqp hw
+ * @info: arp entry information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_add_arp_cache_entry(
+				struct i40iw_sc_cqp *cqp,
+				struct i40iw_add_arp_cache_entry_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 temp, header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 8, info->reach_max);
+
+	temp = info->mac_addr[5] |
+	       LS_64_1(info->mac_addr[4], 8) |
+	       LS_64_1(info->mac_addr[3], 16) |
+	       LS_64_1(info->mac_addr[2], 24) |
+	       LS_64_1(info->mac_addr[1], 32) |
+	       LS_64_1(info->mac_addr[0], 40);
+
+	set_64bit_val(wqe, 16, temp);
+
+	header = info->arp_index |
+		 LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
+		 LS_64((info->permanent ? 1 : 0), I40IW_CQPSQ_MAT_PERMANENT) |
+		 LS_64(1, I40IW_CQPSQ_MAT_ENTRYVALID) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ARP_CACHE_ENTRY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_del_arp_cache_entry - dele arp cache entry
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @arp_index: arp index to delete arp entry
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_del_arp_cache_entry(
+					struct i40iw_sc_cqp *cqp,
+					u64 scratch,
+					u16 arp_index,
+					bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	header = arp_index |
+		 LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ARP_CACHE_DEL_ENTRY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_query_arp_cache_entry - cqp wqe to query arp and arp index
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @arp_index: arp index to delete arp entry
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_query_arp_cache_entry(
+				struct i40iw_sc_cqp *cqp,
+				u64 scratch,
+				u16 arp_index,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	header = arp_index |
+		 LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(1, I40IW_CQPSQ_MAT_QUERY) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY_ARP_CACHE_ENTRY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_manage_apbvt_entry - for adding and deleting apbvt entries
+ * @cqp: struct for cqp hw
+ * @info: info for apbvt entry to add or delete
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_manage_apbvt_entry(
+				struct i40iw_sc_cqp *cqp,
+				struct i40iw_apbvt_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, info->port);
+
+	header = LS_64(I40IW_CQP_OP_MANAGE_APBVT, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->add, I40IW_CQPSQ_MAPT_ADDPORT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_APBVT WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_manage_qhash_table_entry - manage quad hash entries
+ * @cqp: struct for cqp hw
+ * @info: info for quad hash to manage
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ *
+ * This is called before connection establishment is started. For passive connections, when
+ * listener is created, it will call with entry type of  I40IW_QHASH_TYPE_TCP_SYN with local
+ * ip address and tcp port. When SYN is received (passive connections) or
+ * sent (active connections), this routine is called with entry type of
+ * I40IW_QHASH_TYPE_TCP_ESTABLISHED and quad is passed in info.
+ *
+ * When iwarp connection is done and its state moves to RTS, the quad hash entry in
+ * the hardware will point to iwarp's qp number and requires no calls from the driver.
+ */
+static enum i40iw_status_code i40iw_sc_manage_qhash_table_entry(
+					struct i40iw_sc_cqp *cqp,
+					struct i40iw_qhash_table_info *info,
+					u64 scratch,
+					bool post_sq)
+{
+	u64 *wqe;
+	u64 qw1 = 0;
+	u64 qw2 = 0;
+	u64 temp;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	temp = info->mac_addr[5] |
+		LS_64_1(info->mac_addr[4], 8) |
+		LS_64_1(info->mac_addr[3], 16) |
+		LS_64_1(info->mac_addr[2], 24) |
+		LS_64_1(info->mac_addr[1], 32) |
+		LS_64_1(info->mac_addr[0], 40);
+
+	set_64bit_val(wqe, 0, temp);
+
+	qw1 = LS_64(info->qp_num, I40IW_CQPSQ_QHASH_QPN) |
+	      LS_64(info->dest_port, I40IW_CQPSQ_QHASH_DEST_PORT);
+	if (info->ipv4_valid) {
+		set_64bit_val(wqe,
+			      48,
+			      LS_64(info->dest_ip[0], I40IW_CQPSQ_QHASH_ADDR3));
+	} else {
+		set_64bit_val(wqe,
+			      56,
+			      LS_64(info->dest_ip[0], I40IW_CQPSQ_QHASH_ADDR0) |
+			      LS_64(info->dest_ip[1], I40IW_CQPSQ_QHASH_ADDR1));
+
+		set_64bit_val(wqe,
+			      48,
+			      LS_64(info->dest_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
+			      LS_64(info->dest_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
+	}
+	qw2 = LS_64(cqp->dev->qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE);
+	if (info->vlan_valid)
+		qw2 |= LS_64(info->vlan_id, I40IW_CQPSQ_QHASH_VLANID);
+	set_64bit_val(wqe, 16, qw2);
+	if (info->entry_type == I40IW_QHASH_TYPE_TCP_ESTABLISHED) {
+		qw1 |= LS_64(info->src_port, I40IW_CQPSQ_QHASH_SRC_PORT);
+		if (!info->ipv4_valid) {
+			set_64bit_val(wqe,
+				      40,
+				      LS_64(info->src_ip[0], I40IW_CQPSQ_QHASH_ADDR0) |
+				      LS_64(info->src_ip[1], I40IW_CQPSQ_QHASH_ADDR1));
+			set_64bit_val(wqe,
+				      32,
+				      LS_64(info->src_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
+				      LS_64(info->src_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
+		} else {
+			set_64bit_val(wqe,
+				      32,
+				      LS_64(info->src_ip[0], I40IW_CQPSQ_QHASH_ADDR3));
+		}
+	}
+
+	set_64bit_val(wqe, 8, qw1);
+	temp = LS_64(cqp->polarity, I40IW_CQPSQ_QHASH_WQEVALID) |
+	       LS_64(I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY, I40IW_CQPSQ_QHASH_OPCODE) |
+	       LS_64(info->manage, I40IW_CQPSQ_QHASH_MANAGE) |
+	       LS_64(info->ipv4_valid, I40IW_CQPSQ_QHASH_IPV4VALID) |
+	       LS_64(info->vlan_valid, I40IW_CQPSQ_QHASH_VLANVALID) |
+	       LS_64(info->entry_type, I40IW_CQPSQ_QHASH_ENTRYTYPE);
+
+	i40iw_insert_wqe_hdr(wqe, temp);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_QHASH WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_alloc_local_mac_ipaddr_entry - cqp wqe for loc mac entry
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_alloc_local_mac_ipaddr_entry(
+					struct i40iw_sc_cqp *cqp,
+					u64 scratch,
+					bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	header = LS_64(I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ALLOCATE_LOCAL_MAC_IPADDR WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_add_local_mac_ipaddr_entry - add mac enry
+ * @cqp: struct for cqp hw
+ * @info:mac addr info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_add_local_mac_ipaddr_entry(
+				struct i40iw_sc_cqp *cqp,
+				struct i40iw_local_mac_ipaddr_entry_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 temp, header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	temp = info->mac_addr[5] |
+		LS_64_1(info->mac_addr[4], 8) |
+		LS_64_1(info->mac_addr[3], 16) |
+		LS_64_1(info->mac_addr[2], 24) |
+		LS_64_1(info->mac_addr[1], 32) |
+		LS_64_1(info->mac_addr[0], 40);
+
+	set_64bit_val(wqe, 32, temp);
+
+	header = LS_64(info->entry_idx, I40IW_CQPSQ_MLIPA_IPTABLEIDX) |
+		 LS_64(I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ADD_LOCAL_MAC_IPADDR WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_del_local_mac_ipaddr_entry - cqp wqe to dele local mac
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @entry_idx: index of mac entry
+ * @ ignore_ref_count: to force mac adde delete
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_del_local_mac_ipaddr_entry(
+				struct i40iw_sc_cqp *cqp,
+				u64 scratch,
+				u8 entry_idx,
+				u8 ignore_ref_count,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	header = LS_64(entry_idx, I40IW_CQPSQ_MLIPA_IPTABLEIDX) |
+		 LS_64(I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE, I40IW_CQPSQ_OPCODE) |
+		 LS_64(1, I40IW_CQPSQ_MLIPA_FREEENTRY) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
+		 LS_64(ignore_ref_count, I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "DEL_LOCAL_MAC_IPADDR WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_cqp_nop - send a nop wqe
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_cqp_nop(struct i40iw_sc_cqp *cqp,
+					       u64 scratch,
+					       bool post_sq)
+{
+	u64 *wqe;
+	u64 header;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	header = LS_64(I40IW_CQP_OP_NOP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "NOP WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_ceq_init - initialize ceq
+ * @ceq: ceq sc structure
+ * @info: ceq initialization info
+ */
+static enum i40iw_status_code i40iw_sc_ceq_init(struct i40iw_sc_ceq *ceq,
+						struct i40iw_ceq_init_info *info)
+{
+	u32 pble_obj_cnt;
+
+	if ((info->elem_cnt < I40IW_MIN_CEQ_ENTRIES) ||
+	    (info->elem_cnt > I40IW_MAX_CEQ_ENTRIES))
+		return I40IW_ERR_INVALID_SIZE;
+
+	if (info->ceq_id >= I40IW_MAX_CEQID)
+		return I40IW_ERR_INVALID_CEQ_ID;
+
+	pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	ceq->size = sizeof(*ceq);
+	ceq->ceqe_base = (struct i40iw_ceqe *)info->ceqe_base;
+	ceq->ceq_id = info->ceq_id;
+	ceq->dev = info->dev;
+	ceq->elem_cnt = info->elem_cnt;
+	ceq->ceq_elem_pa = info->ceqe_pa;
+	ceq->virtual_map = info->virtual_map;
+
+	ceq->pbl_chunk_size = (ceq->virtual_map ? info->pbl_chunk_size : 0);
+	ceq->first_pm_pbl_idx = (ceq->virtual_map ? info->first_pm_pbl_idx : 0);
+	ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL);
+
+	ceq->tph_en = info->tph_en;
+	ceq->tph_val = info->tph_val;
+	ceq->polarity = 1;
+	I40IW_RING_INIT(ceq->ceq_ring, ceq->elem_cnt);
+	ceq->dev->ceq[info->ceq_id] = ceq;
+
+	return 0;
+}
+
+/**
+ * i40iw_sc_ceq_create - create ceq wqe
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ceq_create(struct i40iw_sc_ceq *ceq,
+						  u64 scratch,
+						  bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+
+	cqp = ceq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, ceq->elem_cnt);
+	set_64bit_val(wqe, 32, (ceq->virtual_map ? 0 : ceq->ceq_elem_pa));
+	set_64bit_val(wqe, 48, (ceq->virtual_map ? ceq->first_pm_pbl_idx : 0));
+	set_64bit_val(wqe, 56, LS_64(ceq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+	header = ceq->ceq_id |
+		 LS_64(I40IW_CQP_OP_CREATE_CEQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(ceq->pbl_chunk_size, I40IW_CQPSQ_CEQ_LPBLSIZE) |
+		 LS_64(ceq->virtual_map, I40IW_CQPSQ_CEQ_VMAP) |
+		 LS_64(ceq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CEQ_CREATE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_cceq_create_done - poll for control ceq wqe to complete
+ * @ceq: ceq sc structure
+ */
+static enum i40iw_status_code i40iw_sc_cceq_create_done(struct i40iw_sc_ceq *ceq)
+{
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = ceq->dev->cqp;
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_CEQ, NULL);
+}
+
+/**
+ * i40iw_sc_cceq_destroy_done - poll for destroy cceq to complete
+ * @ceq: ceq sc structure
+ */
+static enum i40iw_status_code i40iw_sc_cceq_destroy_done(struct i40iw_sc_ceq *ceq)
+{
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = ceq->dev->cqp;
+	cqp->process_cqp_sds = i40iw_update_sds_noccq;
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_DESTROY_CEQ, NULL);
+}
+
+/**
+ * i40iw_sc_cceq_create - create cceq
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static enum i40iw_status_code i40iw_sc_cceq_create(struct i40iw_sc_ceq *ceq, u64 scratch)
+{
+	enum i40iw_status_code ret_code;
+
+	ret_code = i40iw_sc_ceq_create(ceq, scratch, true);
+	if (!ret_code)
+		ret_code = i40iw_sc_cceq_create_done(ceq);
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_ceq_destroy - destroy ceq
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ceq_destroy(struct i40iw_sc_ceq *ceq,
+						   u64 scratch,
+						   bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+
+	cqp = ceq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, ceq->elem_cnt);
+	set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx);
+	header = ceq->ceq_id |
+		 LS_64(I40IW_CQP_OP_DESTROY_CEQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(ceq->pbl_chunk_size, I40IW_CQPSQ_CEQ_LPBLSIZE) |
+		 LS_64(ceq->virtual_map, I40IW_CQPSQ_CEQ_VMAP) |
+		 LS_64(ceq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CEQ_DESTROY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_process_ceq - process ceq
+ * @dev: sc device struct
+ * @ceq: ceq sc structure
+ */
+static void *i40iw_sc_process_ceq(struct i40iw_sc_dev *dev, struct i40iw_sc_ceq *ceq)
+{
+	u64 temp;
+	u64 *ceqe;
+	struct i40iw_sc_cq *cq = NULL;
+	u8 polarity;
+
+	ceqe = (u64 *)I40IW_GET_CURRENT_CEQ_ELEMENT(ceq);
+	get_64bit_val(ceqe, 0, &temp);
+	polarity = (u8)RS_64(temp, I40IW_CEQE_VALID);
+	if (polarity != ceq->polarity)
+		return cq;
+
+	cq = (struct i40iw_sc_cq *)(unsigned long)LS_64_1(temp, 1);
+
+	I40IW_RING_MOVE_TAIL(ceq->ceq_ring);
+	if (I40IW_RING_GETCURRENT_TAIL(ceq->ceq_ring) == 0)
+		ceq->polarity ^= 1;
+
+	if (dev->is_pf)
+		i40iw_wr32(dev->hw, I40E_PFPE_CQACK, cq->cq_uk.cq_id);
+	else
+		i40iw_wr32(dev->hw, I40E_VFPE_CQACK1, cq->cq_uk.cq_id);
+
+	return cq;
+}
+
+/**
+ * i40iw_sc_aeq_init - initialize aeq
+ * @aeq: aeq structure ptr
+ * @info: aeq initialization info
+ */
+static enum i40iw_status_code i40iw_sc_aeq_init(struct i40iw_sc_aeq *aeq,
+						struct i40iw_aeq_init_info *info)
+{
+	u32 pble_obj_cnt;
+
+	if ((info->elem_cnt < I40IW_MIN_AEQ_ENTRIES) ||
+	    (info->elem_cnt > I40IW_MAX_AEQ_ENTRIES))
+		return I40IW_ERR_INVALID_SIZE;
+	pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	aeq->size = sizeof(*aeq);
+	aeq->polarity = 1;
+	aeq->aeqe_base = (struct i40iw_sc_aeqe *)info->aeqe_base;
+	aeq->dev = info->dev;
+	aeq->elem_cnt = info->elem_cnt;
+
+	aeq->aeq_elem_pa = info->aeq_elem_pa;
+	I40IW_RING_INIT(aeq->aeq_ring, aeq->elem_cnt);
+	info->dev->aeq = aeq;
+
+	aeq->virtual_map = info->virtual_map;
+	aeq->pbl_list = (aeq->virtual_map ? info->pbl_list : NULL);
+	aeq->pbl_chunk_size = (aeq->virtual_map ? info->pbl_chunk_size : 0);
+	aeq->first_pm_pbl_idx = (aeq->virtual_map ? info->first_pm_pbl_idx : 0);
+	info->dev->aeq = aeq;
+	return 0;
+}
+
+/**
+ * i40iw_sc_aeq_create - create aeq
+ * @aeq: aeq structure ptr
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_aeq_create(struct i40iw_sc_aeq *aeq,
+						  u64 scratch,
+						  bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	cqp = aeq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, aeq->elem_cnt);
+	set_64bit_val(wqe, 32,
+		      (aeq->virtual_map ? 0 : aeq->aeq_elem_pa));
+	set_64bit_val(wqe, 48,
+		      (aeq->virtual_map ? aeq->first_pm_pbl_idx : 0));
+
+	header = LS_64(I40IW_CQP_OP_CREATE_AEQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(aeq->pbl_chunk_size, I40IW_CQPSQ_AEQ_LPBLSIZE) |
+		 LS_64(aeq->virtual_map, I40IW_CQPSQ_AEQ_VMAP) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "AEQ_CREATE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_aeq_destroy - destroy aeq during close
+ * @aeq: aeq structure ptr
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_aeq_destroy(struct i40iw_sc_aeq *aeq,
+						   u64 scratch,
+						   bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	cqp = aeq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, aeq->elem_cnt);
+	set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx);
+	header = LS_64(I40IW_CQP_OP_DESTROY_AEQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(aeq->pbl_chunk_size, I40IW_CQPSQ_AEQ_LPBLSIZE) |
+		 LS_64(aeq->virtual_map, I40IW_CQPSQ_AEQ_VMAP) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "AEQ_DESTROY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_get_next_aeqe - get next aeq entry
+ * @aeq: aeq structure ptr
+ * @info: aeqe info to be returned
+ */
+static enum i40iw_status_code i40iw_sc_get_next_aeqe(struct i40iw_sc_aeq *aeq,
+						     struct i40iw_aeqe_info *info)
+{
+	u64 temp, compl_ctx;
+	u64 *aeqe;
+	u16 wqe_idx;
+	u8 ae_src;
+	u8 polarity;
+
+	aeqe = (u64 *)I40IW_GET_CURRENT_AEQ_ELEMENT(aeq);
+	get_64bit_val(aeqe, 0, &compl_ctx);
+	get_64bit_val(aeqe, 8, &temp);
+	polarity = (u8)RS_64(temp, I40IW_AEQE_VALID);
+
+	if (aeq->polarity != polarity)
+		return I40IW_ERR_QUEUE_EMPTY;
+
+	i40iw_debug_buf(aeq->dev, I40IW_DEBUG_WQE, "AEQ_ENTRY", aeqe, 16);
+
+	ae_src = (u8)RS_64(temp, I40IW_AEQE_AESRC);
+	wqe_idx = (u16)RS_64(temp, I40IW_AEQE_WQDESCIDX);
+	info->qp_cq_id = (u32)RS_64(temp, I40IW_AEQE_QPCQID);
+	info->ae_id = (u16)RS_64(temp, I40IW_AEQE_AECODE);
+	info->tcp_state = (u8)RS_64(temp, I40IW_AEQE_TCPSTATE);
+	info->iwarp_state = (u8)RS_64(temp, I40IW_AEQE_IWSTATE);
+	info->q2_data_written = (u8)RS_64(temp, I40IW_AEQE_Q2DATA);
+	info->aeqe_overflow = (bool)RS_64(temp, I40IW_AEQE_OVERFLOW);
+	switch (ae_src) {
+	case I40IW_AE_SOURCE_RQ:
+	case I40IW_AE_SOURCE_RQ_0011:
+		info->qp = true;
+		info->wqe_idx = wqe_idx;
+		info->compl_ctx = compl_ctx;
+		break;
+	case I40IW_AE_SOURCE_CQ:
+	case I40IW_AE_SOURCE_CQ_0110:
+	case I40IW_AE_SOURCE_CQ_1010:
+	case I40IW_AE_SOURCE_CQ_1110:
+		info->cq = true;
+		info->compl_ctx = LS_64_1(compl_ctx, 1);
+		break;
+	case I40IW_AE_SOURCE_SQ:
+	case I40IW_AE_SOURCE_SQ_0111:
+		info->qp = true;
+		info->sq = true;
+		info->wqe_idx = wqe_idx;
+		info->compl_ctx = compl_ctx;
+		break;
+	case I40IW_AE_SOURCE_IN_RR_WR:
+	case I40IW_AE_SOURCE_IN_RR_WR_1011:
+		info->qp = true;
+		info->compl_ctx = compl_ctx;
+		info->in_rdrsp_wr = true;
+		break;
+	case I40IW_AE_SOURCE_OUT_RR:
+	case I40IW_AE_SOURCE_OUT_RR_1111:
+		info->qp = true;
+		info->compl_ctx = compl_ctx;
+		info->out_rdrsp = true;
+		break;
+	default:
+		break;
+	}
+	I40IW_RING_MOVE_TAIL(aeq->aeq_ring);
+	if (I40IW_RING_GETCURRENT_TAIL(aeq->aeq_ring) == 0)
+		aeq->polarity ^= 1;
+	return 0;
+}
+
+/**
+ * i40iw_sc_repost_aeq_entries - repost completed aeq entries
+ * @dev: sc device struct
+ * @count: allocate count
+ */
+static enum i40iw_status_code i40iw_sc_repost_aeq_entries(struct i40iw_sc_dev *dev,
+							  u32 count)
+{
+	if (count > I40IW_MAX_AEQ_ALLOCATE_COUNT)
+		return I40IW_ERR_INVALID_SIZE;
+
+	if (dev->is_pf)
+		i40iw_wr32(dev->hw, I40E_PFPE_AEQALLOC, count);
+	else
+		i40iw_wr32(dev->hw, I40E_VFPE_AEQALLOC1, count);
+
+	return 0;
+}
+
+/**
+ * i40iw_sc_aeq_create_done - create aeq
+ * @aeq: aeq structure ptr
+ */
+static enum i40iw_status_code i40iw_sc_aeq_create_done(struct i40iw_sc_aeq *aeq)
+{
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = aeq->dev->cqp;
+	return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_AEQ, NULL);
+}
+
+/**
+ * i40iw_sc_aeq_destroy_done - destroy of aeq during close
+ * @aeq: aeq structure ptr
+ */
+static enum i40iw_status_code i40iw_sc_aeq_destroy_done(struct i40iw_sc_aeq *aeq)
+{
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = aeq->dev->cqp;
+	return  i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_DESTROY_AEQ, NULL);
+}
+
+/**
+ * i40iw_sc_ccq_init - initialize control cq
+ * @cq: sc's cq ctruct
+ * @info: info for control cq initialization
+ */
+static enum i40iw_status_code i40iw_sc_ccq_init(struct i40iw_sc_cq *cq,
+						struct i40iw_ccq_init_info *info)
+{
+	u32 pble_obj_cnt;
+
+	if (info->num_elem < I40IW_MIN_CQ_SIZE || info->num_elem > I40IW_MAX_CQ_SIZE)
+		return I40IW_ERR_INVALID_SIZE;
+
+	if (info->ceq_id > I40IW_MAX_CEQID)
+		return I40IW_ERR_INVALID_CEQ_ID;
+
+	pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	cq->cq_pa = info->cq_pa;
+	cq->cq_uk.cq_base = info->cq_base;
+	cq->shadow_area_pa = info->shadow_area_pa;
+	cq->cq_uk.shadow_area = info->shadow_area;
+	cq->shadow_read_threshold = info->shadow_read_threshold;
+	cq->dev = info->dev;
+	cq->ceq_id = info->ceq_id;
+	cq->cq_uk.cq_size = info->num_elem;
+	cq->cq_type = I40IW_CQ_TYPE_CQP;
+	cq->ceqe_mask = info->ceqe_mask;
+	I40IW_RING_INIT(cq->cq_uk.cq_ring, info->num_elem);
+
+	cq->cq_uk.cq_id = 0;    /* control cq is id 0 always */
+	cq->ceq_id_valid = info->ceq_id_valid;
+	cq->tph_en = info->tph_en;
+	cq->tph_val = info->tph_val;
+	cq->cq_uk.avoid_mem_cflct = info->avoid_mem_cflct;
+
+	cq->pbl_list = info->pbl_list;
+	cq->virtual_map = info->virtual_map;
+	cq->pbl_chunk_size = info->pbl_chunk_size;
+	cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+	cq->cq_uk.polarity = true;
+
+	/* following are only for iw cqs so initialize them to zero */
+	cq->cq_uk.cqe_alloc_reg = NULL;
+	info->dev->ccq = cq;
+	return 0;
+}
+
+/**
+ * i40iw_sc_ccq_create_done - poll cqp for ccq create
+ * @ccq: ccq sc struct
+ */
+static enum i40iw_status_code i40iw_sc_ccq_create_done(struct i40iw_sc_cq *ccq)
+{
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = ccq->dev->cqp;
+	return	i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_CQ, NULL);
+}
+
+/**
+ * i40iw_sc_ccq_create - create control cq
+ * @ccq: ccq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @check_overflow: overlow flag for ccq
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ccq_create(struct i40iw_sc_cq *ccq,
+						  u64 scratch,
+						  bool check_overflow,
+						  bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+	enum i40iw_status_code ret_code;
+
+	cqp = ccq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(ccq, 1));
+	set_64bit_val(wqe, 16,
+		      LS_64(ccq->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+	set_64bit_val(wqe, 32, (ccq->virtual_map ? 0 : ccq->cq_pa));
+	set_64bit_val(wqe, 40, ccq->shadow_area_pa);
+	set_64bit_val(wqe, 48,
+		      (ccq->virtual_map ? ccq->first_pm_pbl_idx : 0));
+	set_64bit_val(wqe, 56,
+		      LS_64(ccq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+	header = ccq->cq_uk.cq_id |
+		 LS_64((ccq->ceq_id_valid ? ccq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+		 LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(ccq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+		 LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+		 LS_64(ccq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+		 LS_64(ccq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+		 LS_64(ccq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+		 LS_64(ccq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(ccq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CCQ_CREATE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+		ret_code = i40iw_sc_ccq_create_done(ccq);
+		if (ret_code)
+			return ret_code;
+	}
+	cqp->process_cqp_sds = i40iw_cqp_sds_cmd;
+
+	return 0;
+}
+
+/**
+ * i40iw_sc_ccq_destroy - destroy ccq during close
+ * @ccq: ccq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_ccq_destroy(struct i40iw_sc_cq *ccq,
+						   u64 scratch,
+						   bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+	enum i40iw_status_code ret_code = 0;
+	u32 tail, val, error;
+
+	cqp = ccq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(ccq, 1));
+	set_64bit_val(wqe, 40, ccq->shadow_area_pa);
+
+	header = ccq->cq_uk.cq_id |
+		 LS_64((ccq->ceq_id_valid ? ccq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+		 LS_64(I40IW_CQP_OP_DESTROY_CQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(ccq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+		 LS_64(ccq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+		 LS_64(ccq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(ccq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CCQ_DESTROY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+	if (error)
+		return I40IW_ERR_CQP_COMPL_ERROR;
+
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+		ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_cq_init - initialize completion q
+ * @cq: cq struct
+ * @info: cq initialization info
+ */
+static enum i40iw_status_code i40iw_sc_cq_init(struct i40iw_sc_cq *cq,
+					       struct i40iw_cq_init_info *info)
+{
+	u32 __iomem *cqe_alloc_reg = NULL;
+	enum i40iw_status_code ret_code;
+	u32 pble_obj_cnt;
+	u32 arm_offset;
+
+	pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	cq->cq_pa = info->cq_base_pa;
+	cq->dev = info->dev;
+	cq->ceq_id = info->ceq_id;
+	arm_offset = (info->dev->is_pf) ? I40E_PFPE_CQARM : I40E_VFPE_CQARM1;
+	if (i40iw_get_hw_addr(cq->dev))
+		cqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(cq->dev) +
+					      arm_offset);
+	info->cq_uk_init_info.cqe_alloc_reg = cqe_alloc_reg;
+	ret_code = i40iw_cq_uk_init(&cq->cq_uk, &info->cq_uk_init_info);
+	if (ret_code)
+		return ret_code;
+	cq->virtual_map = info->virtual_map;
+	cq->pbl_chunk_size = info->pbl_chunk_size;
+	cq->ceqe_mask = info->ceqe_mask;
+	cq->cq_type = (info->type) ? info->type : I40IW_CQ_TYPE_IWARP;
+
+	cq->shadow_area_pa = info->shadow_area_pa;
+	cq->shadow_read_threshold = info->shadow_read_threshold;
+
+	cq->ceq_id_valid = info->ceq_id_valid;
+	cq->tph_en = info->tph_en;
+	cq->tph_val = info->tph_val;
+
+	cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+
+	return 0;
+}
+
+/**
+ * i40iw_sc_cq_create - create completion q
+ * @cq: cq struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @check_overflow: flag for overflow check
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_cq_create(struct i40iw_sc_cq *cq,
+						 u64 scratch,
+						 bool check_overflow,
+						 bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	if (cq->cq_uk.cq_id > I40IW_MAX_CQID)
+		return I40IW_ERR_INVALID_CQ_ID;
+
+	if (cq->ceq_id > I40IW_MAX_CEQID)
+		return I40IW_ERR_INVALID_CEQ_ID;
+
+	cqp = cq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(cq->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+
+	set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
+
+	set_64bit_val(wqe, 40, cq->shadow_area_pa);
+	set_64bit_val(wqe, 48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
+	set_64bit_val(wqe, 56, LS_64(cq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+	header = cq->cq_uk.cq_id |
+		 LS_64((cq->ceq_id_valid ? cq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+		 LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+		 LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+		 LS_64(cq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+		 LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+		 LS_64(cq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+		 LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_CREATE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_cq_destroy - destroy completion q
+ * @cq: cq struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_cq_destroy(struct i40iw_sc_cq *cq,
+						  u64 scratch,
+						  bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+
+	cqp = cq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+	set_64bit_val(wqe, 40, cq->shadow_area_pa);
+	set_64bit_val(wqe, 48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
+
+	header = cq->cq_uk.cq_id |
+		 LS_64((cq->ceq_id_valid ? cq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
+		 LS_64(I40IW_CQP_OP_DESTROY_CQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+		 LS_64(cq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+		 LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+		 LS_64(cq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+		 LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_DESTROY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_cq_modify - modify a Completion Queue
+ * @cq: cq struct
+ * @info: modification info struct
+ * @scratch:
+ * @post_sq: flag to post to sq
+ */
+static enum i40iw_status_code i40iw_sc_cq_modify(struct i40iw_sc_cq *cq,
+						 struct i40iw_modify_cq_info *info,
+						 u64 scratch,
+						 bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+	u32 cq_size, ceq_id, first_pm_pbl_idx;
+	u8 pbl_chunk_size;
+	bool virtual_map, ceq_id_valid, check_overflow;
+	u32 pble_obj_cnt;
+
+	if (info->ceq_valid && (info->ceq_id > I40IW_MAX_CEQID))
+		return I40IW_ERR_INVALID_CEQ_ID;
+
+	pble_obj_cnt = cq->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->cq_resize && info->virtual_map &&
+	    (info->first_pm_pbl_idx >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	cqp = cq->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	cq->pbl_list = info->pbl_list;
+	cq->cq_pa = info->cq_pa;
+	cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+
+	cq_size = info->cq_resize ? info->cq_size : cq->cq_uk.cq_size;
+	if (info->ceq_change) {
+		ceq_id_valid = true;
+		ceq_id = info->ceq_id;
+	} else {
+		ceq_id_valid = cq->ceq_id_valid;
+		ceq_id = ceq_id_valid ? cq->ceq_id : 0;
+	}
+	virtual_map = info->cq_resize ? info->virtual_map : cq->virtual_map;
+	first_pm_pbl_idx = (info->cq_resize ?
+			    (info->virtual_map ? info->first_pm_pbl_idx : 0) :
+			    (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
+	pbl_chunk_size = (info->cq_resize ?
+			  (info->virtual_map ? info->pbl_chunk_size : 0) :
+			  (cq->virtual_map ? cq->pbl_chunk_size : 0));
+	check_overflow = info->check_overflow_change ? info->check_overflow :
+			 cq->check_overflow;
+	cq->cq_uk.cq_size = cq_size;
+	cq->ceq_id_valid = ceq_id_valid;
+	cq->ceq_id = ceq_id;
+	cq->virtual_map = virtual_map;
+	cq->first_pm_pbl_idx = first_pm_pbl_idx;
+	cq->pbl_chunk_size = pbl_chunk_size;
+	cq->check_overflow = check_overflow;
+
+	set_64bit_val(wqe, 0, cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+	set_64bit_val(wqe, 16,
+		      LS_64(info->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+	set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
+	set_64bit_val(wqe, 40, cq->shadow_area_pa);
+	set_64bit_val(wqe, 48, (cq->virtual_map ? first_pm_pbl_idx : 0));
+	set_64bit_val(wqe, 56, LS_64(cq->tph_val, I40IW_CQPSQ_TPHVAL));
+
+	header = cq->cq_uk.cq_id |
+		 LS_64(ceq_id, I40IW_CQPSQ_CQ_CEQID) |
+		 LS_64(I40IW_CQP_OP_MODIFY_CQ, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->cq_resize, I40IW_CQPSQ_CQ_CQRESIZE) |
+		 LS_64(pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
+		 LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+		 LS_64(virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
+		 LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+		 LS_64(ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
+		 LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
+		 LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_MODIFY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_init - initialize qp
+ * @qp: sc qp
+ * @info: initialization qp info
+ */
+static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp,
+					       struct i40iw_qp_init_info *info)
+{
+	u32 __iomem *wqe_alloc_reg = NULL;
+	enum i40iw_status_code ret_code;
+	u32 pble_obj_cnt;
+	u8 wqe_size;
+	u32 offset;
+
+	qp->dev = info->pd->dev;
+	qp->sq_pa = info->sq_pa;
+	qp->rq_pa = info->rq_pa;
+	qp->hw_host_ctx_pa = info->host_ctx_pa;
+	qp->q2_pa = info->q2_pa;
+	qp->shadow_area_pa = info->shadow_area_pa;
+
+	qp->q2_buf = info->q2;
+	qp->pd = info->pd;
+	qp->hw_host_ctx = info->host_ctx;
+	offset = (qp->pd->dev->is_pf) ? I40E_PFPE_WQEALLOC : I40E_VFPE_WQEALLOC1;
+	if (i40iw_get_hw_addr(qp->pd->dev))
+		wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
+					      offset);
+
+	info->qp_uk_init_info.wqe_alloc_reg = wqe_alloc_reg;
+	ret_code = i40iw_qp_uk_init(&qp->qp_uk, &info->qp_uk_init_info);
+	if (ret_code)
+		return ret_code;
+	qp->virtual_map = info->virtual_map;
+
+	pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if ((info->virtual_map && (info->sq_pa >= pble_obj_cnt)) ||
+	    (info->virtual_map && (info->rq_pa >= pble_obj_cnt)))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	qp->llp_stream_handle = (void *)(-1);
+	qp->qp_type = (info->type) ? info->type : I40IW_QP_TYPE_IWARP;
+
+	qp->hw_sq_size = i40iw_get_encoded_wqe_size(qp->qp_uk.sq_ring.size,
+						    false);
+	i40iw_debug(qp->dev, I40IW_DEBUG_WQE, "%s: hw_sq_size[%04d] sq_ring.size[%04d]\n",
+		    __func__, qp->hw_sq_size, qp->qp_uk.sq_ring.size);
+	ret_code = i40iw_fragcnt_to_wqesize_rq(qp->qp_uk.max_rq_frag_cnt,
+					       &wqe_size);
+	if (ret_code)
+		return ret_code;
+	qp->hw_rq_size = i40iw_get_encoded_wqe_size(qp->qp_uk.rq_size *
+				(wqe_size / I40IW_QP_WQE_MIN_SIZE), false);
+	i40iw_debug(qp->dev, I40IW_DEBUG_WQE,
+		    "%s: hw_rq_size[%04d] qp_uk.rq_size[%04d] wqe_size[%04d]\n",
+		    __func__, qp->hw_rq_size, qp->qp_uk.rq_size, wqe_size);
+	qp->sq_tph_val = info->sq_tph_val;
+	qp->rq_tph_val = info->rq_tph_val;
+	qp->sq_tph_en = info->sq_tph_en;
+	qp->rq_tph_en = info->rq_tph_en;
+	qp->rcv_tph_en = info->rcv_tph_en;
+	qp->xmit_tph_en = info->xmit_tph_en;
+	qp->qs_handle = qp->pd->dev->qs_handle;
+	qp->exception_lan_queue = qp->pd->dev->exception_lan_queue;
+
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_create - create qp
+ * @qp: sc qp
+ * @info: qp create info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_create(
+				struct i40iw_sc_qp *qp,
+				struct i40iw_create_qp_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+
+	if ((qp->qp_uk.qp_id < I40IW_MIN_IW_QP_ID) ||
+	    (qp->qp_uk.qp_id > I40IW_MAX_IW_QP_ID))
+		return I40IW_ERR_INVALID_QP_ID;
+
+	cqp = qp->pd->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+
+	set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+	header = qp->qp_uk.qp_id |
+		 LS_64(I40IW_CQP_OP_CREATE_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64((info->ord_valid ? 1 : 0), I40IW_CQPSQ_QP_ORDVALID) |
+		 LS_64(info->tcp_ctx_valid, I40IW_CQPSQ_QP_TOECTXVALID) |
+		 LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
+		 LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) |
+		 LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
+		 LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
+		 LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) |
+		 LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_CREATE WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_modify - modify qp cqp wqe
+ * @qp: sc qp
+ * @info: modify qp info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_modify(
+				struct i40iw_sc_qp *qp,
+				struct i40iw_modify_qp_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+	u8 term_actions = 0;
+	u8 term_len = 0;
+
+	cqp = qp->pd->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	if (info->next_iwarp_state == I40IW_QP_STATE_TERMINATE) {
+		if (info->dont_send_fin)
+			term_actions += I40IWQP_TERM_SEND_TERM_ONLY;
+		if (info->dont_send_term)
+			term_actions += I40IWQP_TERM_SEND_FIN_ONLY;
+		if ((term_actions == I40IWQP_TERM_SEND_TERM_AND_FIN) ||
+		    (term_actions == I40IWQP_TERM_SEND_TERM_ONLY))
+			term_len = info->termlen;
+	}
+
+	set_64bit_val(wqe,
+		      8,
+		      LS_64(info->new_mss, I40IW_CQPSQ_QP_NEWMSS) |
+		      LS_64(term_len, I40IW_CQPSQ_QP_TERMLEN));
+
+	set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+	set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+	header = qp->qp_uk.qp_id |
+		 LS_64(I40IW_CQP_OP_MODIFY_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->ord_valid, I40IW_CQPSQ_QP_ORDVALID) |
+		 LS_64(info->tcp_ctx_valid, I40IW_CQPSQ_QP_TOECTXVALID) |
+		 LS_64(info->cached_var_valid, I40IW_CQPSQ_QP_CACHEDVARVALID) |
+		 LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) |
+		 LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
+		 LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) |
+		 LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
+		 LS_64(info->mss_change, I40IW_CQPSQ_QP_MSSCHANGE) |
+		 LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
+		 LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
+		 LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) |
+		 LS_64(info->reset_tcp_conn, I40IW_CQPSQ_QP_RESETCON) |
+		 LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) |
+		 LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_MODIFY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_destroy - cqp destroy qp
+ * @qp: sc qp
+ * @scratch: u64 saved to be used during cqp completion
+ * @remove_hash_idx: flag if to remove hash idx
+ * @ignore_mw_bnd: memory window bind flag
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_destroy(
+					struct i40iw_sc_qp *qp,
+					u64 scratch,
+					bool remove_hash_idx,
+					bool ignore_mw_bnd,
+					bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	cqp = qp->pd->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+	set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+	header = qp->qp_uk.qp_id |
+		 LS_64(I40IW_CQP_OP_DESTROY_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
+		 LS_64(ignore_mw_bnd, I40IW_CQPSQ_QP_IGNOREMWBOUND) |
+		 LS_64(remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_DESTROY WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_flush_wqes - flush qp's wqe
+ * @qp: sc qp
+ * @info: dlush information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_flush_wqes(
+				struct i40iw_sc_qp *qp,
+				struct i40iw_qp_flush_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 temp = 0;
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+	bool flush_sq = false, flush_rq = false;
+
+	if (info->rq && !qp->flush_rq)
+		flush_rq = true;
+
+	if (info->sq && !qp->flush_sq)
+		flush_sq = true;
+
+	qp->flush_sq |= flush_sq;
+	qp->flush_rq |= flush_rq;
+	if (!flush_sq && !flush_rq) {
+		if (info->ae_code != I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR)
+			return 0;
+	}
+
+	cqp = qp->pd->dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	if (info->userflushcode) {
+		if (flush_rq) {
+			temp |= LS_64(info->rq_minor_code, I40IW_CQPSQ_FWQE_RQMNERR) |
+				LS_64(info->rq_major_code, I40IW_CQPSQ_FWQE_RQMJERR);
+		}
+		if (flush_sq) {
+			temp |= LS_64(info->sq_minor_code, I40IW_CQPSQ_FWQE_SQMNERR) |
+				LS_64(info->sq_major_code, I40IW_CQPSQ_FWQE_SQMJERR);
+		}
+	}
+	set_64bit_val(wqe, 16, temp);
+
+	temp = (info->generate_ae) ?
+		info->ae_code | LS_64(info->ae_source, I40IW_CQPSQ_FWQE_AESOURCE) : 0;
+
+	set_64bit_val(wqe, 8, temp);
+
+	header = qp->qp_uk.qp_id |
+		 LS_64(I40IW_CQP_OP_FLUSH_WQES, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->generate_ae, I40IW_CQPSQ_FWQE_GENERATE_AE) |
+		 LS_64(info->userflushcode, I40IW_CQPSQ_FWQE_USERFLCODE) |
+		 LS_64(flush_sq, I40IW_CQPSQ_FWQE_FLUSHSQ) |
+		 LS_64(flush_rq, I40IW_CQPSQ_FWQE_FLUSHRQ) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_FLUSH WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_upload_context - upload qp's context
+ * @dev: sc device struct
+ * @info: upload context info ptr for return
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_qp_upload_context(
+					struct i40iw_sc_dev *dev,
+					struct i40iw_upload_context_info *info,
+					u64 scratch,
+					bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 16, info->buf_pa);
+
+	header = LS_64(info->qp_id, I40IW_CQPSQ_UCTX_QPID) |
+		 LS_64(I40IW_CQP_OP_UPLOAD_CONTEXT, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->qp_type, I40IW_CQPSQ_UCTX_QPTYPE) |
+		 LS_64(info->raw_format, I40IW_CQPSQ_UCTX_RAWFORMAT) |
+		 LS_64(info->freeze_qp, I40IW_CQPSQ_UCTX_FREEZEQP) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "QP_UPLOAD_CTX WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_qp_setctx - set qp's context
+ * @qp: sc qp
+ * @qp_ctx: context ptr
+ * @info: ctx info
+ */
+static enum i40iw_status_code i40iw_sc_qp_setctx(
+				struct i40iw_sc_qp *qp,
+				u64 *qp_ctx,
+				struct i40iw_qp_host_ctx_info *info)
+{
+	struct i40iwarp_offload_info *iw;
+	struct i40iw_tcp_offload_info *tcp;
+	u64 qw0, qw3, qw7 = 0;
+
+	iw = info->iwarp_info;
+	tcp = info->tcp_info;
+	qw0 = LS_64(qp->qp_uk.rq_wqe_size, I40IWQPC_RQWQESIZE) |
+	      LS_64(info->err_rq_idx_valid, I40IWQPC_ERR_RQ_IDX_VALID) |
+	      LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) |
+	      LS_64(qp->xmit_tph_en, I40IWQPC_XMITTPHEN) |
+	      LS_64(qp->rq_tph_en, I40IWQPC_RQTPHEN) |
+	      LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN) |
+	      LS_64(info->push_idx, I40IWQPC_PPIDX) |
+	      LS_64(info->push_mode_en, I40IWQPC_PMENA);
+
+	set_64bit_val(qp_ctx, 8, qp->sq_pa);
+	set_64bit_val(qp_ctx, 16, qp->rq_pa);
+
+	qw3 = LS_64(qp->src_mac_addr_idx, I40IWQPC_SRCMACADDRIDX) |
+	      LS_64(qp->hw_rq_size, I40IWQPC_RQSIZE) |
+	      LS_64(qp->hw_sq_size, I40IWQPC_SQSIZE);
+
+	set_64bit_val(qp_ctx,
+		      128,
+		      LS_64(info->err_rq_idx, I40IWQPC_ERR_RQ_IDX));
+
+	set_64bit_val(qp_ctx,
+		      136,
+		      LS_64(info->send_cq_num, I40IWQPC_TXCQNUM) |
+		      LS_64(info->rcv_cq_num, I40IWQPC_RXCQNUM));
+
+	set_64bit_val(qp_ctx,
+		      168,
+		      LS_64(info->qp_compl_ctx, I40IWQPC_QPCOMPCTX));
+	set_64bit_val(qp_ctx,
+		      176,
+		      LS_64(qp->sq_tph_val, I40IWQPC_SQTPHVAL) |
+		      LS_64(qp->rq_tph_val, I40IWQPC_RQTPHVAL) |
+		      LS_64(qp->qs_handle, I40IWQPC_QSHANDLE) |
+		      LS_64(qp->exception_lan_queue, I40IWQPC_EXCEPTION_LAN_QUEUE));
+
+	if (info->iwarp_info_valid) {
+		qw0 |= LS_64(iw->ddp_ver, I40IWQPC_DDP_VER) |
+		       LS_64(iw->rdmap_ver, I40IWQPC_RDMAP_VER);
+
+		qw7 |= LS_64(iw->pd_id, I40IWQPC_PDIDX);
+		set_64bit_val(qp_ctx, 144, qp->q2_pa);
+		set_64bit_val(qp_ctx,
+			      152,
+			      LS_64(iw->last_byte_sent, I40IWQPC_LASTBYTESENT));
+
+		/*
+		* Hard-code IRD_SIZE to hw-limit, 128, in qpctx, i.e matching an
+		*advertisable IRD of 64
+		*/
+		iw->ird_size = I40IW_QPCTX_ENCD_MAXIRD;
+		set_64bit_val(qp_ctx,
+			      160,
+			      LS_64(iw->ord_size, I40IWQPC_ORDSIZE) |
+			      LS_64(iw->ird_size, I40IWQPC_IRDSIZE) |
+			      LS_64(iw->wr_rdresp_en, I40IWQPC_WRRDRSPOK) |
+			      LS_64(iw->rd_enable, I40IWQPC_RDOK) |
+			      LS_64(iw->snd_mark_en, I40IWQPC_SNDMARKERS) |
+			      LS_64(iw->bind_en, I40IWQPC_BINDEN) |
+			      LS_64(iw->fast_reg_en, I40IWQPC_FASTREGEN) |
+			      LS_64(iw->priv_mode_en, I40IWQPC_PRIVEN) |
+			      LS_64(1, I40IWQPC_IWARPMODE) |
+			      LS_64(iw->rcv_mark_en, I40IWQPC_RCVMARKERS) |
+			      LS_64(iw->align_hdrs, I40IWQPC_ALIGNHDRS) |
+			      LS_64(iw->rcv_no_mpa_crc, I40IWQPC_RCVNOMPACRC) |
+			      LS_64(iw->rcv_mark_offset, I40IWQPC_RCVMARKOFFSET) |
+			      LS_64(iw->snd_mark_offset, I40IWQPC_SNDMARKOFFSET));
+	}
+	if (info->tcp_info_valid) {
+		qw0 |= LS_64(tcp->ipv4, I40IWQPC_IPV4) |
+		       LS_64(tcp->no_nagle, I40IWQPC_NONAGLE) |
+		       LS_64(tcp->insert_vlan_tag, I40IWQPC_INSERTVLANTAG) |
+		       LS_64(tcp->time_stamp, I40IWQPC_TIMESTAMP) |
+		       LS_64(tcp->cwnd_inc_limit, I40IWQPC_LIMIT) |
+		       LS_64(tcp->drop_ooo_seg, I40IWQPC_DROPOOOSEG) |
+		       LS_64(tcp->dup_ack_thresh, I40IWQPC_DUPACK_THRESH);
+
+		qw3 |= LS_64(tcp->ttl, I40IWQPC_TTL) |
+		       LS_64(tcp->src_mac_addr_idx, I40IWQPC_SRCMACADDRIDX) |
+		       LS_64(tcp->avoid_stretch_ack, I40IWQPC_AVOIDSTRETCHACK) |
+		       LS_64(tcp->tos, I40IWQPC_TOS) |
+		       LS_64(tcp->src_port, I40IWQPC_SRCPORTNUM) |
+		       LS_64(tcp->dst_port, I40IWQPC_DESTPORTNUM);
+
+		qp->src_mac_addr_idx = tcp->src_mac_addr_idx;
+		set_64bit_val(qp_ctx,
+			      32,
+			      LS_64(tcp->dest_ip_addr2, I40IWQPC_DESTIPADDR2) |
+			      LS_64(tcp->dest_ip_addr3, I40IWQPC_DESTIPADDR3));
+
+		set_64bit_val(qp_ctx,
+			      40,
+			      LS_64(tcp->dest_ip_addr0, I40IWQPC_DESTIPADDR0) |
+			      LS_64(tcp->dest_ip_addr1, I40IWQPC_DESTIPADDR1));
+
+		set_64bit_val(qp_ctx,
+			      48,
+			      LS_64(tcp->snd_mss, I40IWQPC_SNDMSS) |
+				LS_64(tcp->vlan_tag, I40IWQPC_VLANTAG) |
+				LS_64(tcp->arp_idx, I40IWQPC_ARPIDX));
+
+		qw7 |= LS_64(tcp->flow_label, I40IWQPC_FLOWLABEL) |
+		       LS_64(tcp->wscale, I40IWQPC_WSCALE) |
+		       LS_64(tcp->ignore_tcp_opt, I40IWQPC_IGNORE_TCP_OPT) |
+		       LS_64(tcp->ignore_tcp_uns_opt, I40IWQPC_IGNORE_TCP_UNS_OPT) |
+		       LS_64(tcp->tcp_state, I40IWQPC_TCPSTATE) |
+		       LS_64(tcp->rcv_wscale, I40IWQPC_RCVSCALE) |
+		       LS_64(tcp->snd_wscale, I40IWQPC_SNDSCALE);
+
+		set_64bit_val(qp_ctx,
+			      72,
+			      LS_64(tcp->time_stamp_recent, I40IWQPC_TIMESTAMP_RECENT) |
+			      LS_64(tcp->time_stamp_age, I40IWQPC_TIMESTAMP_AGE));
+		set_64bit_val(qp_ctx,
+			      80,
+			      LS_64(tcp->snd_nxt, I40IWQPC_SNDNXT) |
+			      LS_64(tcp->snd_wnd, I40IWQPC_SNDWND));
+
+		set_64bit_val(qp_ctx,
+			      88,
+			      LS_64(tcp->rcv_nxt, I40IWQPC_RCVNXT) |
+			      LS_64(tcp->rcv_wnd, I40IWQPC_RCVWND));
+		set_64bit_val(qp_ctx,
+			      96,
+			      LS_64(tcp->snd_max, I40IWQPC_SNDMAX) |
+			      LS_64(tcp->snd_una, I40IWQPC_SNDUNA));
+		set_64bit_val(qp_ctx,
+			      104,
+			      LS_64(tcp->srtt, I40IWQPC_SRTT) |
+			      LS_64(tcp->rtt_var, I40IWQPC_RTTVAR));
+		set_64bit_val(qp_ctx,
+			      112,
+			      LS_64(tcp->ss_thresh, I40IWQPC_SSTHRESH) |
+			      LS_64(tcp->cwnd, I40IWQPC_CWND));
+		set_64bit_val(qp_ctx,
+			      120,
+			      LS_64(tcp->snd_wl1, I40IWQPC_SNDWL1) |
+			      LS_64(tcp->snd_wl2, I40IWQPC_SNDWL2));
+		set_64bit_val(qp_ctx,
+			      128,
+			      LS_64(tcp->max_snd_window, I40IWQPC_MAXSNDWND) |
+			      LS_64(tcp->rexmit_thresh, I40IWQPC_REXMIT_THRESH));
+		set_64bit_val(qp_ctx,
+			      184,
+			      LS_64(tcp->local_ipaddr3, I40IWQPC_LOCAL_IPADDR3) |
+			      LS_64(tcp->local_ipaddr2, I40IWQPC_LOCAL_IPADDR2));
+		set_64bit_val(qp_ctx,
+			      192,
+			      LS_64(tcp->local_ipaddr1, I40IWQPC_LOCAL_IPADDR1) |
+			      LS_64(tcp->local_ipaddr0, I40IWQPC_LOCAL_IPADDR0));
+	}
+
+	set_64bit_val(qp_ctx, 0, qw0);
+	set_64bit_val(qp_ctx, 24, qw3);
+	set_64bit_val(qp_ctx, 56, qw7);
+
+	i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "QP_HOST)CTX WQE",
+			qp_ctx, I40IW_QP_CTX_SIZE);
+	return 0;
+}
+
+/**
+ * i40iw_sc_alloc_stag - mr stag alloc
+ * @dev: sc device struct
+ * @info: stag info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_alloc_stag(
+				struct i40iw_sc_dev *dev,
+				struct i40iw_allocate_stag_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe,
+		      8,
+		      LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID) |
+		      LS_64(info->total_len, I40IW_CQPSQ_STAG_STAGLEN));
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
+	set_64bit_val(wqe,
+		      40,
+		      LS_64(info->hmc_fcn_index, I40IW_CQPSQ_STAG_HMCFNIDX));
+
+	header = LS_64(I40IW_CQP_OP_ALLOC_STAG, I40IW_CQPSQ_OPCODE) |
+		 LS_64(1, I40IW_CQPSQ_STAG_MR) |
+		 LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
+		 LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
+		 LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+		 LS_64(info->remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
+		 LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
+		 LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "ALLOC_STAG WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_mr_reg_non_shared - non-shared mr registration
+ * @dev: sc device struct
+ * @info: mr info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_mr_reg_non_shared(
+				struct i40iw_sc_dev *dev,
+				struct i40iw_reg_ns_stag_info *info,
+				u64 scratch,
+				bool post_sq)
+{
+	u64 *wqe;
+	u64 temp;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+	u32 pble_obj_cnt;
+	bool remote_access;
+	u8 addr_type;
+
+	if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
+				   I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
+		remote_access = true;
+	else
+		remote_access = false;
+
+	pble_obj_cnt = dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
+
+	if (info->chunk_size && (info->first_pm_pbl_index >= pble_obj_cnt))
+		return I40IW_ERR_INVALID_PBLE_INDEX;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo;
+	set_64bit_val(wqe, 0, temp);
+
+	set_64bit_val(wqe,
+		      8,
+		      LS_64(info->total_len, I40IW_CQPSQ_STAG_STAGLEN) |
+		      LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
+
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(info->stag_key, I40IW_CQPSQ_STAG_KEY) |
+		      LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
+	if (!info->chunk_size) {
+		set_64bit_val(wqe, 32, info->reg_addr_pa);
+		set_64bit_val(wqe, 48, 0);
+	} else {
+		set_64bit_val(wqe, 32, 0);
+		set_64bit_val(wqe, 48, info->first_pm_pbl_index);
+	}
+	set_64bit_val(wqe, 40, info->hmc_fcn_index);
+	set_64bit_val(wqe, 56, 0);
+
+	addr_type = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? 1 : 0;
+	header = LS_64(I40IW_CQP_OP_REG_MR, I40IW_CQPSQ_OPCODE) |
+		 LS_64(1, I40IW_CQPSQ_STAG_MR) |
+		 LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
+		 LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+		 LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
+		 LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
+		 LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
+		 LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
+		 LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MR_REG_NS WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_mr_reg_shared - registered shared memory region
+ * @dev: sc device struct
+ * @info: info for shared memory registeration
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_mr_reg_shared(
+					struct i40iw_sc_dev *dev,
+					struct i40iw_register_shared_stag *info,
+					u64 scratch,
+					bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 temp, va64, fbo, header;
+	u32 va32;
+	bool remote_access;
+	u8 addr_type;
+
+	if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
+				   I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
+		remote_access = true;
+	else
+		remote_access = false;
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	va64 = (uintptr_t)(info->va);
+	va32 = (u32)(va64 & 0x00000000FFFFFFFF);
+	fbo = (u64)(va32 & (4096 - 1));
+
+	set_64bit_val(wqe,
+		      0,
+		      (info->addr_type == I40IW_ADDR_TYPE_VA_BASED ? (uintptr_t)info->va : fbo));
+
+	set_64bit_val(wqe,
+		      8,
+		      LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
+	temp = LS_64(info->new_stag_key, I40IW_CQPSQ_STAG_KEY) |
+	       LS_64(info->new_stag_idx, I40IW_CQPSQ_STAG_IDX) |
+	       LS_64(info->parent_stag_idx, I40IW_CQPSQ_STAG_PARENTSTAGIDX);
+	set_64bit_val(wqe, 16, temp);
+
+	addr_type = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? 1 : 0;
+	header = LS_64(I40IW_CQP_OP_REG_SMR, I40IW_CQPSQ_OPCODE) |
+		 LS_64(1, I40IW_CQPSQ_STAG_MR) |
+		 LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
+		 LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
+		 LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MR_REG_SHARED WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_dealloc_stag - deallocate stag
+ * @dev: sc device struct
+ * @info: dealloc stag info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_dealloc_stag(
+					struct i40iw_sc_dev *dev,
+					struct i40iw_dealloc_stag_info *info,
+					u64 scratch,
+					bool post_sq)
+{
+	u64 header;
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe,
+		      8,
+		      LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
+
+	header = LS_64(I40IW_CQP_OP_DEALLOC_STAG, I40IW_CQPSQ_OPCODE) |
+		 LS_64(info->mr, I40IW_CQPSQ_STAG_MR) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "DEALLOC_STAG WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_query_stag - query hardware for stag
+ * @dev: sc device struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @stag_index: stag index for query
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_query_stag(struct i40iw_sc_dev *dev,
+						  u64 scratch,
+						  u32 stag_index,
+						  bool post_sq)
+{
+	u64 header;
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(stag_index, I40IW_CQPSQ_QUERYSTAG_IDX));
+
+	header = LS_64(I40IW_CQP_OP_QUERY_STAG, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "QUERY_STAG WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_mw_alloc - mw allocate
+ * @dev: sc device struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @mw_stag_index:stag index
+ * @pd_id: pd is for this mw
+ * @post_sq: flag for cqp db to ring
+ */
+static enum i40iw_status_code i40iw_sc_mw_alloc(
+					struct i40iw_sc_dev *dev,
+					u64 scratch,
+					u32 mw_stag_index,
+					u16 pd_id,
+					bool post_sq)
+{
+	u64 header;
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe, 8, LS_64(pd_id, I40IW_CQPSQ_STAG_PDID));
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(mw_stag_index, I40IW_CQPSQ_STAG_IDX));
+
+	header = LS_64(I40IW_CQP_OP_ALLOC_STAG, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MW_ALLOC WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_send_lsmm - send last streaming mode message
+ * @qp: sc qp struct
+ * @lsmm_buf: buffer with lsmm message
+ * @size: size of lsmm buffer
+ * @stag: stag of lsmm buffer
+ */
+static void i40iw_sc_send_lsmm(struct i40iw_sc_qp *qp,
+			       void *lsmm_buf,
+			       u32 size,
+			       i40iw_stag stag)
+{
+	u64 *wqe;
+	u64 header;
+	struct i40iw_qp_uk *qp_uk;
+
+	qp_uk = &qp->qp_uk;
+	wqe = qp_uk->sq_base->elem;
+
+	set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
+
+	set_64bit_val(wqe, 8, (size | LS_64(stag, I40IWQPSQ_FRAG_STAG)));
+
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
+		 LS_64(1, I40IWQPSQ_STREAMMODE) |
+		 LS_64(1, I40IWQPSQ_WAITFORRCVPDU) |
+		 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(qp->dev, I40IW_DEBUG_QP, "SEND_LSMM WQE",
+			wqe, I40IW_QP_WQE_MIN_SIZE);
+}
+
+/**
+ * i40iw_sc_send_lsmm_nostag - for privilege qp
+ * @qp: sc qp struct
+ * @lsmm_buf: buffer with lsmm message
+ * @size: size of lsmm buffer
+ */
+static void i40iw_sc_send_lsmm_nostag(struct i40iw_sc_qp *qp,
+				      void *lsmm_buf,
+				      u32 size)
+{
+	u64 *wqe;
+	u64 header;
+	struct i40iw_qp_uk *qp_uk;
+
+	qp_uk = &qp->qp_uk;
+	wqe = qp_uk->sq_base->elem;
+
+	set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
+
+	set_64bit_val(wqe, 8, size);
+
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
+		 LS_64(1, I40IWQPSQ_STREAMMODE) |
+		 LS_64(1, I40IWQPSQ_WAITFORRCVPDU) |
+		 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "SEND_LSMM_NOSTAG WQE",
+			wqe, I40IW_QP_WQE_MIN_SIZE);
+}
+
+/**
+ * i40iw_sc_send_rtt - send last read0 or write0
+ * @qp: sc qp struct
+ * @read: Do read0 or write0
+ */
+static void i40iw_sc_send_rtt(struct i40iw_sc_qp *qp, bool read)
+{
+	u64 *wqe;
+	u64 header;
+	struct i40iw_qp_uk *qp_uk;
+
+	qp_uk = &qp->qp_uk;
+	wqe = qp_uk->sq_base->elem;
+
+	set_64bit_val(wqe, 0, 0);
+	set_64bit_val(wqe, 8, 0);
+	set_64bit_val(wqe, 16, 0);
+	if (read) {
+		header = LS_64(0x1234, I40IWQPSQ_REMSTAG) |
+			 LS_64(I40IWQP_OP_RDMA_READ, I40IWQPSQ_OPCODE) |
+			 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+		set_64bit_val(wqe, 8, ((u64)0xabcd << 32));
+	} else {
+		header = LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
+			 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+	}
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "RTR WQE",
+			wqe, I40IW_QP_WQE_MIN_SIZE);
+}
+
+/**
+ * i40iw_sc_post_wqe0 - send wqe with opcode
+ * @qp: sc qp struct
+ * @opcode: opcode to use for wqe0
+ */
+static enum i40iw_status_code i40iw_sc_post_wqe0(struct i40iw_sc_qp *qp, u8 opcode)
+{
+	u64 *wqe;
+	u64 header;
+	struct i40iw_qp_uk *qp_uk;
+
+	qp_uk = &qp->qp_uk;
+	wqe = qp_uk->sq_base->elem;
+
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+	switch (opcode) {
+	case I40IWQP_OP_NOP:
+		set_64bit_val(wqe, 0, 0);
+		set_64bit_val(wqe, 8, 0);
+		set_64bit_val(wqe, 16, 0);
+		header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+			 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
+
+		i40iw_insert_wqe_hdr(wqe, header);
+		break;
+	case I40IWQP_OP_RDMA_SEND:
+		set_64bit_val(wqe, 0, 0);
+		set_64bit_val(wqe, 8, 0);
+		set_64bit_val(wqe, 16, 0);
+		header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
+			 LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID) |
+			 LS_64(1, I40IWQPSQ_STREAMMODE) |
+			 LS_64(1, I40IWQPSQ_WAITFORRCVPDU);
+
+		i40iw_insert_wqe_hdr(wqe, header);
+		break;
+	default:
+		i40iw_debug(qp->dev, I40IW_DEBUG_QP, "%s: Invalid WQE zero opcode\n",
+			    __func__);
+		break;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_sc_init_iw_hmc() - queries fpm values using cqp and populates hmc_info
+ * @dev : ptr to i40iw_dev struct
+ * @hmc_fn_id: hmc function id
+ */
+enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_id)
+{
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_dma_mem query_fpm_mem;
+	struct i40iw_virt_mem virt_mem;
+	struct i40iw_vfdev *vf_dev = NULL;
+	u32 mem_size;
+	enum i40iw_status_code ret_code = 0;
+	bool poll_registers = true;
+	u16 iw_vf_idx;
+	u8 wait_type;
+
+	if (hmc_fn_id >= I40IW_MAX_VF_FPM_ID ||
+	    (dev->hmc_fn_id != hmc_fn_id && hmc_fn_id < I40IW_FIRST_VF_FPM_ID))
+		return I40IW_ERR_INVALID_HMCFN_ID;
+
+	i40iw_debug(dev, I40IW_DEBUG_HMC, "hmc_fn_id %u, dev->hmc_fn_id %u\n", hmc_fn_id,
+		    dev->hmc_fn_id);
+	if (hmc_fn_id == dev->hmc_fn_id) {
+		hmc_info = dev->hmc_info;
+		query_fpm_mem.pa = dev->fpm_query_buf_pa;
+		query_fpm_mem.va = dev->fpm_query_buf;
+	} else {
+		vf_dev = i40iw_vfdev_from_fpm(dev, hmc_fn_id);
+		if (!vf_dev)
+			return I40IW_ERR_INVALID_VF_ID;
+
+		hmc_info = &vf_dev->hmc_info;
+		iw_vf_idx = vf_dev->iw_vf_idx;
+		i40iw_debug(dev, I40IW_DEBUG_HMC, "vf_dev %p, hmc_info %p, hmc_obj %p\n", vf_dev,
+			    hmc_info, hmc_info->hmc_obj);
+		if (!vf_dev->fpm_query_buf) {
+			if (!dev->vf_fpm_query_buf[iw_vf_idx].va) {
+				ret_code = i40iw_alloc_query_fpm_buf(dev,
+								     &dev->vf_fpm_query_buf[iw_vf_idx]);
+				if (ret_code)
+					return ret_code;
+			}
+			vf_dev->fpm_query_buf = dev->vf_fpm_query_buf[iw_vf_idx].va;
+			vf_dev->fpm_query_buf_pa = dev->vf_fpm_query_buf[iw_vf_idx].pa;
+		}
+		query_fpm_mem.pa = vf_dev->fpm_query_buf_pa;
+		query_fpm_mem.va = vf_dev->fpm_query_buf;
+		/**
+		 * It is HARDWARE specific:
+		 * this call is done by PF for VF and
+		 * i40iw_sc_query_fpm_values needs ccq poll
+		 * because PF ccq is already created.
+		 */
+		poll_registers = false;
+	}
+
+	hmc_info->hmc_fn_id = hmc_fn_id;
+
+	if (hmc_fn_id != dev->hmc_fn_id) {
+		ret_code =
+			i40iw_cqp_query_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
+	} else {
+		wait_type = poll_registers ? (u8)I40IW_CQP_WAIT_POLL_REGS :
+			    (u8)I40IW_CQP_WAIT_POLL_CQ;
+
+		ret_code = i40iw_sc_query_fpm_values(
+					dev->cqp,
+					0,
+					hmc_info->hmc_fn_id,
+					&query_fpm_mem,
+					true,
+					wait_type);
+	}
+	if (ret_code)
+		return ret_code;
+
+	/* parse the fpm_query_buf and fill hmc obj info */
+	ret_code =
+		i40iw_sc_parse_fpm_query_buf((u64 *)query_fpm_mem.va,
+					     hmc_info,
+					     &dev->hmc_fpm_misc);
+	if (ret_code)
+		return ret_code;
+	i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "QUERY FPM BUFFER",
+			query_fpm_mem.va, I40IW_QUERY_FPM_BUF_SIZE);
+
+	if (hmc_fn_id != dev->hmc_fn_id) {
+		i40iw_cqp_commit_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
+
+		/* parse the fpm_commit_buf and fill hmc obj info */
+		i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj);
+		mem_size = sizeof(struct i40iw_hmc_sd_entry) *
+			   (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index);
+		ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
+		if (ret_code)
+			return ret_code;
+		hmc_info->sd_table.sd_entry = virt_mem.va;
+	}
+
+	/* fill size of objects which are fixed */
+	hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].size = 4;
+	hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].size = 4;
+	hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size = 8;
+	hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].size = 8192;
+	hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].max_cnt = 1;
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_configure_iw_fpm() - commits hmc obj cnt values using cqp command and
+ * populates fpm base address in hmc_info
+ * @dev : ptr to i40iw_dev struct
+ * @hmc_fn_id: hmc function id
+ */
+static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev,
+							u8 hmc_fn_id)
+{
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_hmc_obj_info *obj_info;
+	u64 *buf;
+	struct i40iw_dma_mem commit_fpm_mem;
+	u32 i, j;
+	enum i40iw_status_code ret_code = 0;
+	bool poll_registers = true;
+	u8 wait_type;
+
+	if (hmc_fn_id >= I40IW_MAX_VF_FPM_ID ||
+	    (dev->hmc_fn_id != hmc_fn_id && hmc_fn_id < I40IW_FIRST_VF_FPM_ID))
+		return I40IW_ERR_INVALID_HMCFN_ID;
+
+	if (hmc_fn_id == dev->hmc_fn_id) {
+		hmc_info = dev->hmc_info;
+	} else {
+		hmc_info = i40iw_vf_hmcinfo_from_fpm(dev, hmc_fn_id);
+		poll_registers = false;
+	}
+	if (!hmc_info)
+		return I40IW_ERR_BAD_PTR;
+
+	obj_info = hmc_info->hmc_obj;
+	buf = dev->fpm_commit_buf;
+
+	/* copy cnt values in commit buf */
+	for (i = I40IW_HMC_IW_QP, j = 0; i <= I40IW_HMC_IW_PBLE;
+	     i++, j += 8)
+		set_64bit_val(buf, j, (u64)obj_info[i].cnt);
+
+	set_64bit_val(buf, 40, 0);   /* APBVT rsvd */
+
+	commit_fpm_mem.pa = dev->fpm_commit_buf_pa;
+	commit_fpm_mem.va = dev->fpm_commit_buf;
+	wait_type = poll_registers ? (u8)I40IW_CQP_WAIT_POLL_REGS :
+			(u8)I40IW_CQP_WAIT_POLL_CQ;
+	ret_code = i40iw_sc_commit_fpm_values(
+					dev->cqp,
+					0,
+					hmc_info->hmc_fn_id,
+					&commit_fpm_mem,
+					true,
+					wait_type);
+
+	/* parse the fpm_commit_buf and fill hmc obj info */
+	if (!ret_code)
+		ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf, hmc_info->hmc_obj);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "COMMIT FPM BUFFER",
+			commit_fpm_mem.va, I40IW_COMMIT_FPM_BUF_SIZE);
+
+	return ret_code;
+}
+
+/**
+ * cqp_sds_wqe_fill - fill cqp wqe doe sd
+ * @cqp: struct for cqp hw
+ * @info; sd info for wqe
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp,
+					       struct i40iw_update_sds_info *info,
+					       u64 scratch)
+{
+	u64 data;
+	u64 header;
+	u64 *wqe;
+	int mem_entries, wqe_entries;
+	struct i40iw_dma_mem *sdbuf = &cqp->sdbuf;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	I40IW_CQP_INIT_WQE(wqe);
+	wqe_entries = (info->cnt > 3) ? 3 : info->cnt;
+	mem_entries = info->cnt - wqe_entries;
+
+	header = LS_64(I40IW_CQP_OP_UPDATE_PE_SDS, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
+		 LS_64(mem_entries, I40IW_CQPSQ_UPESD_ENTRY_COUNT);
+
+	if (mem_entries) {
+		memcpy(sdbuf->va, &info->entry[3], (mem_entries << 4));
+		data = sdbuf->pa;
+	} else {
+		data = 0;
+	}
+	data |= LS_64(info->hmc_fn_id, I40IW_CQPSQ_UPESD_HMCFNID);
+
+	set_64bit_val(wqe, 16, data);
+
+	switch (wqe_entries) {
+	case 3:
+		set_64bit_val(wqe, 48,
+			      (LS_64(info->entry[2].cmd, I40IW_CQPSQ_UPESD_SDCMD) |
+					LS_64(1, I40IW_CQPSQ_UPESD_ENTRY_VALID)));
+
+		set_64bit_val(wqe, 56, info->entry[2].data);
+		/* fallthrough */
+	case 2:
+		set_64bit_val(wqe, 32,
+			      (LS_64(info->entry[1].cmd, I40IW_CQPSQ_UPESD_SDCMD) |
+					LS_64(1, I40IW_CQPSQ_UPESD_ENTRY_VALID)));
+
+		set_64bit_val(wqe, 40, info->entry[1].data);
+		/* fallthrough */
+	case 1:
+		set_64bit_val(wqe, 0,
+			      LS_64(info->entry[0].cmd, I40IW_CQPSQ_UPESD_SDCMD));
+
+		set_64bit_val(wqe, 8, info->entry[0].data);
+		break;
+	default:
+		break;
+	}
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "UPDATE_PE_SDS WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	return 0;
+}
+
+/**
+ * i40iw_update_pe_sds - cqp wqe for sd
+ * @dev: ptr to i40iw_dev struct
+ * @info: sd info for sd's
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static enum i40iw_status_code i40iw_update_pe_sds(struct i40iw_sc_dev *dev,
+						  struct i40iw_update_sds_info *info,
+						  u64 scratch)
+{
+	struct i40iw_sc_cqp *cqp = dev->cqp;
+	enum i40iw_status_code ret_code;
+
+	ret_code = cqp_sds_wqe_fill(cqp, info, scratch);
+	if (!ret_code)
+		i40iw_sc_cqp_post_sq(cqp);
+
+	return ret_code;
+}
+
+/**
+ * i40iw_update_sds_noccq - update sd before ccq created
+ * @dev: sc device struct
+ * @info: sd info for sd's
+ */
+enum i40iw_status_code i40iw_update_sds_noccq(struct i40iw_sc_dev *dev,
+					      struct i40iw_update_sds_info *info)
+{
+	u32 error, val, tail;
+	struct i40iw_sc_cqp *cqp = dev->cqp;
+	enum i40iw_status_code ret_code;
+
+	ret_code = cqp_sds_wqe_fill(cqp, info, 0);
+	if (ret_code)
+		return ret_code;
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+	if (error)
+		return I40IW_ERR_CQP_COMPL_ERROR;
+
+	i40iw_sc_cqp_post_sq(cqp);
+	ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
+
+	return ret_code;
+}
+
+/**
+ * i40iw_sc_suspend_qp - suspend qp for param change
+ * @cqp: struct for cqp hw
+ * @qp: sc qp struct
+ * @scratch: u64 saved to be used during cqp completion
+ */
+enum i40iw_status_code i40iw_sc_suspend_qp(struct i40iw_sc_cqp *cqp,
+					   struct i40iw_sc_qp *qp,
+					   u64 scratch)
+{
+	u64 header;
+	u64 *wqe;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	header = LS_64(qp->qp_uk.qp_id, I40IW_CQPSQ_SUSPENDQP_QPID) |
+		 LS_64(I40IW_CQP_OP_SUSPEND_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "SUSPEND_QP WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_resume_qp - resume qp after suspend
+ * @cqp: struct for cqp hw
+ * @qp: sc qp struct
+ * @scratch: u64 saved to be used during cqp completion
+ */
+enum i40iw_status_code i40iw_sc_resume_qp(struct i40iw_sc_cqp *cqp,
+					  struct i40iw_sc_qp *qp,
+					  u64 scratch)
+{
+	u64 header;
+	u64 *wqe;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe,
+		      16,
+			LS_64(qp->qs_handle, I40IW_CQPSQ_RESUMEQP_QSHANDLE));
+
+	header = LS_64(qp->qp_uk.qp_id, I40IW_CQPSQ_RESUMEQP_QPID) |
+		 LS_64(I40IW_CQP_OP_RESUME_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "RESUME_QP WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+/**
+ * i40iw_sc_static_hmc_pages_allocated - cqp wqe to allocate hmc pages
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @post_sq: flag for cqp db to ring
+ * @poll_registers: flag to poll register for cqp completion
+ */
+enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(
+					struct i40iw_sc_cqp *cqp,
+					u64 scratch,
+					u8 hmc_fn_id,
+					bool post_sq,
+					bool poll_registers)
+{
+	u64 header;
+	u64 *wqe;
+	u32 tail, val, error;
+	enum i40iw_status_code ret_code = 0;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+	set_64bit_val(wqe,
+		      16,
+		      LS_64(hmc_fn_id, I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID));
+
+	header = LS_64(I40IW_CQP_OP_SHMC_PAGES_ALLOCATED, I40IW_CQPSQ_OPCODE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	i40iw_insert_wqe_hdr(wqe, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "SHMC_PAGES_ALLOCATED WQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+	i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
+	if (error) {
+		ret_code = I40IW_ERR_CQP_COMPL_ERROR;
+		return ret_code;
+	}
+	if (post_sq) {
+		i40iw_sc_cqp_post_sq(cqp);
+		if (poll_registers)
+			/* check for cqp sq tail update */
+			ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
+		else
+			ret_code = i40iw_sc_poll_for_cqp_op_done(cqp,
+								 I40IW_CQP_OP_SHMC_PAGES_ALLOCATED,
+								 NULL);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_ring_full - check if cqp ring is full
+ * @cqp: struct for cqp hw
+ */
+static bool i40iw_ring_full(struct i40iw_sc_cqp *cqp)
+{
+	return I40IW_RING_FULL_ERR(cqp->sq_ring);
+}
+
+/**
+ * i40iw_config_fpm_values - configure HMC objects
+ * @dev: sc device struct
+ * @qp_count: desired qp count
+ */
+enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count)
+{
+	struct i40iw_virt_mem virt_mem;
+	u32 i, mem_size;
+	u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted;
+	u32 powerof2;
+	u64 sd_needed, bytes_needed;
+	u32 loop_count = 0;
+
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_hmc_fpm_misc *hmc_fpm_misc;
+	enum i40iw_status_code ret_code = 0;
+
+	hmc_info = dev->hmc_info;
+	hmc_fpm_misc = &dev->hmc_fpm_misc;
+
+	ret_code = i40iw_sc_init_iw_hmc(dev, dev->hmc_fn_id);
+	if (ret_code) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "i40iw_sc_init_iw_hmc returned error_code = %d\n",
+			    ret_code);
+		return ret_code;
+	}
+
+	bytes_needed = 0;
+	for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
+		hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
+		bytes_needed +=
+		    (hmc_info->hmc_obj[i].max_cnt) * (hmc_info->hmc_obj[i].size);
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s i[%04d] max_cnt[0x%04X] size[0x%04llx]\n",
+			    __func__, i, hmc_info->hmc_obj[i].max_cnt,
+			    hmc_info->hmc_obj[i].size);
+	}
+	sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1; /* round up */
+	i40iw_debug(dev, I40IW_DEBUG_HMC,
+		    "%s: FW initial max sd_count[%08lld] first_sd_index[%04d]\n",
+		    __func__, sd_needed, hmc_info->first_sd_index);
+	i40iw_debug(dev, I40IW_DEBUG_HMC,
+		    "%s: bytes_needed=0x%llx sd count %d where max sd is %d\n",
+		    __func__, bytes_needed, hmc_info->sd_table.sd_cnt,
+		    hmc_fpm_misc->max_sds);
+
+	qpwanted = min(qp_count, hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt);
+	qpwantedoriginal = qpwanted;
+	mrwanted = hmc_info->hmc_obj[I40IW_HMC_IW_MR].max_cnt;
+	pblewanted = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].max_cnt;
+
+	i40iw_debug(dev, I40IW_DEBUG_HMC,
+		    "req_qp=%d max_sd=%d, max_qp = %d, max_cq=%d, max_mr=%d, max_pble=%d\n",
+		    qp_count, hmc_fpm_misc->max_sds,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_CQ].max_cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_MR].max_cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].max_cnt);
+
+	do {
+		++loop_count;
+		hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt = qpwanted;
+		hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt =
+			min(2 * qpwanted, hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt);
+		hmc_info->hmc_obj[I40IW_HMC_IW_SRQ].cnt = 0x00; /* Reserved */
+		hmc_info->hmc_obj[I40IW_HMC_IW_HTE].cnt =
+					qpwanted * hmc_fpm_misc->ht_multiplier;
+		hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt =
+			hmc_info->hmc_obj[I40IW_HMC_IW_ARP].max_cnt;
+		hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].cnt = 1;
+		hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt = mrwanted;
+
+		hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt = I40IW_MAX_WQ_ENTRIES * qpwanted;
+		hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt = 4 * I40IW_MAX_IRD_SIZE * qpwanted;
+		hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].cnt =
+			hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size;
+		hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].cnt =
+			hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
+		hmc_info->hmc_obj[I40IW_HMC_IW_TIMER].cnt =
+			((qpwanted) / 512 + 1) * hmc_fpm_misc->timer_bucket;
+		hmc_info->hmc_obj[I40IW_HMC_IW_FSIMC].cnt = 0x00;
+		hmc_info->hmc_obj[I40IW_HMC_IW_FSIAV].cnt = 0x00;
+		hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt = pblewanted;
+
+		/* How much memory is needed for all the objects. */
+		bytes_needed = 0;
+		for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
+			bytes_needed +=
+			    (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
+		sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1;
+		if ((loop_count > 1000) ||
+		    ((!(loop_count % 10)) &&
+		    (qpwanted > qpwantedoriginal * 2 / 3))) {
+			if (qpwanted > FPM_MULTIPLIER) {
+				qpwanted -= FPM_MULTIPLIER;
+				powerof2 = 1;
+				while (powerof2 < qpwanted)
+					powerof2 *= 2;
+				powerof2 /= 2;
+				qpwanted = powerof2;
+			} else {
+				qpwanted /= 2;
+			}
+		}
+		if (mrwanted > FPM_MULTIPLIER * 10)
+			mrwanted -= FPM_MULTIPLIER * 10;
+		if (pblewanted > FPM_MULTIPLIER * 1000)
+			pblewanted -= FPM_MULTIPLIER * 1000;
+	} while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000);
+
+	bytes_needed = 0;
+	for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++) {
+		bytes_needed += (hmc_info->hmc_obj[i].cnt) * (hmc_info->hmc_obj[i].size);
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s i[%04d] cnt[0x%04x] size[0x%04llx]\n",
+			    __func__, i, hmc_info->hmc_obj[i].cnt,
+			    hmc_info->hmc_obj[i].size);
+	}
+	sd_needed = (bytes_needed / I40IW_HMC_DIRECT_BP_SIZE) + 1;    /* round up not truncate. */
+
+	i40iw_debug(dev, I40IW_DEBUG_HMC,
+		    "loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n",
+		    loop_count, sd_needed,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt,
+		    hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt);
+
+	ret_code = i40iw_sc_configure_iw_fpm(dev, dev->hmc_fn_id);
+	if (ret_code) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "configure_iw_fpm returned error_code[x%08X]\n",
+			    i40iw_rd32(dev->hw, dev->is_pf ? I40E_PFPE_CQPERRCODES : I40E_VFPE_CQPERRCODES1));
+		return ret_code;
+	}
+
+	hmc_info->sd_table.sd_cnt = (u32)sd_needed;
+
+	mem_size = sizeof(struct i40iw_hmc_sd_entry) *
+		   (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1);
+	ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
+	if (ret_code) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s: failed to allocate memory for sd_entry buffer\n",
+			    __func__);
+		return ret_code;
+	}
+	hmc_info->sd_table.sd_entry = virt_mem.va;
+
+	return ret_code;
+}
+
+/**
+ * i40iw_exec_cqp_cmd - execute cqp cmd when wqe are available
+ * @dev: rdma device
+ * @pcmdinfo: cqp command info
+ */
+static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev,
+						 struct cqp_commands_info *pcmdinfo)
+{
+	enum i40iw_status_code status;
+	struct i40iw_dma_mem values_mem;
+
+	dev->cqp_cmd_stats[pcmdinfo->cqp_cmd]++;
+	switch (pcmdinfo->cqp_cmd) {
+	case OP_DELETE_LOCAL_MAC_IPADDR_ENTRY:
+		status = i40iw_sc_del_local_mac_ipaddr_entry(
+				pcmdinfo->in.u.del_local_mac_ipaddr_entry.cqp,
+				pcmdinfo->in.u.del_local_mac_ipaddr_entry.scratch,
+				pcmdinfo->in.u.del_local_mac_ipaddr_entry.entry_idx,
+				pcmdinfo->in.u.del_local_mac_ipaddr_entry.ignore_ref_count,
+				pcmdinfo->post_sq);
+		break;
+	case OP_CEQ_DESTROY:
+		status = i40iw_sc_ceq_destroy(pcmdinfo->in.u.ceq_destroy.ceq,
+					      pcmdinfo->in.u.ceq_destroy.scratch,
+					      pcmdinfo->post_sq);
+		break;
+	case OP_AEQ_DESTROY:
+		status = i40iw_sc_aeq_destroy(pcmdinfo->in.u.aeq_destroy.aeq,
+					      pcmdinfo->in.u.aeq_destroy.scratch,
+					      pcmdinfo->post_sq);
+
+		break;
+	case OP_DELETE_ARP_CACHE_ENTRY:
+		status = i40iw_sc_del_arp_cache_entry(
+				pcmdinfo->in.u.del_arp_cache_entry.cqp,
+				pcmdinfo->in.u.del_arp_cache_entry.scratch,
+				pcmdinfo->in.u.del_arp_cache_entry.arp_index,
+				pcmdinfo->post_sq);
+		break;
+	case OP_MANAGE_APBVT_ENTRY:
+		status = i40iw_sc_manage_apbvt_entry(
+				pcmdinfo->in.u.manage_apbvt_entry.cqp,
+				&pcmdinfo->in.u.manage_apbvt_entry.info,
+				pcmdinfo->in.u.manage_apbvt_entry.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_CEQ_CREATE:
+		status = i40iw_sc_ceq_create(pcmdinfo->in.u.ceq_create.ceq,
+					     pcmdinfo->in.u.ceq_create.scratch,
+					     pcmdinfo->post_sq);
+		break;
+	case OP_AEQ_CREATE:
+		status = i40iw_sc_aeq_create(pcmdinfo->in.u.aeq_create.aeq,
+					     pcmdinfo->in.u.aeq_create.scratch,
+					     pcmdinfo->post_sq);
+		break;
+	case OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY:
+		status = i40iw_sc_alloc_local_mac_ipaddr_entry(
+				pcmdinfo->in.u.alloc_local_mac_ipaddr_entry.cqp,
+				pcmdinfo->in.u.alloc_local_mac_ipaddr_entry.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_ADD_LOCAL_MAC_IPADDR_ENTRY:
+		status = i40iw_sc_add_local_mac_ipaddr_entry(
+				pcmdinfo->in.u.add_local_mac_ipaddr_entry.cqp,
+				&pcmdinfo->in.u.add_local_mac_ipaddr_entry.info,
+				pcmdinfo->in.u.add_local_mac_ipaddr_entry.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_MANAGE_QHASH_TABLE_ENTRY:
+		status = i40iw_sc_manage_qhash_table_entry(
+				pcmdinfo->in.u.manage_qhash_table_entry.cqp,
+				&pcmdinfo->in.u.manage_qhash_table_entry.info,
+				pcmdinfo->in.u.manage_qhash_table_entry.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_QP_MODIFY:
+		status = i40iw_sc_qp_modify(
+				pcmdinfo->in.u.qp_modify.qp,
+				&pcmdinfo->in.u.qp_modify.info,
+				pcmdinfo->in.u.qp_modify.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_QP_UPLOAD_CONTEXT:
+		status = i40iw_sc_qp_upload_context(
+				pcmdinfo->in.u.qp_upload_context.dev,
+				&pcmdinfo->in.u.qp_upload_context.info,
+				pcmdinfo->in.u.qp_upload_context.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_CQ_CREATE:
+		status = i40iw_sc_cq_create(
+				pcmdinfo->in.u.cq_create.cq,
+				pcmdinfo->in.u.cq_create.scratch,
+				pcmdinfo->in.u.cq_create.check_overflow,
+				pcmdinfo->post_sq);
+		break;
+	case OP_CQ_DESTROY:
+		status = i40iw_sc_cq_destroy(
+				pcmdinfo->in.u.cq_destroy.cq,
+				pcmdinfo->in.u.cq_destroy.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_QP_CREATE:
+		status = i40iw_sc_qp_create(
+				pcmdinfo->in.u.qp_create.qp,
+				&pcmdinfo->in.u.qp_create.info,
+				pcmdinfo->in.u.qp_create.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_QP_DESTROY:
+		status = i40iw_sc_qp_destroy(
+				pcmdinfo->in.u.qp_destroy.qp,
+				pcmdinfo->in.u.qp_destroy.scratch,
+				pcmdinfo->in.u.qp_destroy.remove_hash_idx,
+				pcmdinfo->in.u.qp_destroy.
+				ignore_mw_bnd,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_ALLOC_STAG:
+		status = i40iw_sc_alloc_stag(
+				pcmdinfo->in.u.alloc_stag.dev,
+				&pcmdinfo->in.u.alloc_stag.info,
+				pcmdinfo->in.u.alloc_stag.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_MR_REG_NON_SHARED:
+		status = i40iw_sc_mr_reg_non_shared(
+				pcmdinfo->in.u.mr_reg_non_shared.dev,
+				&pcmdinfo->in.u.mr_reg_non_shared.info,
+				pcmdinfo->in.u.mr_reg_non_shared.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_DEALLOC_STAG:
+		status = i40iw_sc_dealloc_stag(
+				pcmdinfo->in.u.dealloc_stag.dev,
+				&pcmdinfo->in.u.dealloc_stag.info,
+				pcmdinfo->in.u.dealloc_stag.scratch,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_MW_ALLOC:
+		status = i40iw_sc_mw_alloc(
+				pcmdinfo->in.u.mw_alloc.dev,
+				pcmdinfo->in.u.mw_alloc.scratch,
+				pcmdinfo->in.u.mw_alloc.mw_stag_index,
+				pcmdinfo->in.u.mw_alloc.pd_id,
+				pcmdinfo->post_sq);
+
+		break;
+	case OP_QP_FLUSH_WQES:
+		status = i40iw_sc_qp_flush_wqes(
+				pcmdinfo->in.u.qp_flush_wqes.qp,
+				&pcmdinfo->in.u.qp_flush_wqes.info,
+				pcmdinfo->in.u.qp_flush_wqes.
+				scratch, pcmdinfo->post_sq);
+		break;
+	case OP_ADD_ARP_CACHE_ENTRY:
+		status = i40iw_sc_add_arp_cache_entry(
+				pcmdinfo->in.u.add_arp_cache_entry.cqp,
+				&pcmdinfo->in.u.add_arp_cache_entry.info,
+				pcmdinfo->in.u.add_arp_cache_entry.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_MANAGE_PUSH_PAGE:
+		status = i40iw_sc_manage_push_page(
+				pcmdinfo->in.u.manage_push_page.cqp,
+				&pcmdinfo->in.u.manage_push_page.info,
+				pcmdinfo->in.u.manage_push_page.scratch,
+				pcmdinfo->post_sq);
+		break;
+	case OP_UPDATE_PE_SDS:
+		/* case I40IW_CQP_OP_UPDATE_PE_SDS */
+		status = i40iw_update_pe_sds(
+				pcmdinfo->in.u.update_pe_sds.dev,
+				&pcmdinfo->in.u.update_pe_sds.info,
+				pcmdinfo->in.u.update_pe_sds.
+				scratch);
+
+		break;
+	case OP_MANAGE_HMC_PM_FUNC_TABLE:
+		status = i40iw_sc_manage_hmc_pm_func_table(
+				pcmdinfo->in.u.manage_hmc_pm.dev->cqp,
+				pcmdinfo->in.u.manage_hmc_pm.scratch,
+				(u8)pcmdinfo->in.u.manage_hmc_pm.info.vf_id,
+				pcmdinfo->in.u.manage_hmc_pm.info.free_fcn,
+				true);
+		break;
+	case OP_SUSPEND:
+		status = i40iw_sc_suspend_qp(
+				pcmdinfo->in.u.suspend_resume.cqp,
+				pcmdinfo->in.u.suspend_resume.qp,
+				pcmdinfo->in.u.suspend_resume.scratch);
+		break;
+	case OP_RESUME:
+		status = i40iw_sc_resume_qp(
+				pcmdinfo->in.u.suspend_resume.cqp,
+				pcmdinfo->in.u.suspend_resume.qp,
+				pcmdinfo->in.u.suspend_resume.scratch);
+		break;
+	case OP_MANAGE_VF_PBLE_BP:
+		status = i40iw_manage_vf_pble_bp(
+				pcmdinfo->in.u.manage_vf_pble_bp.cqp,
+				&pcmdinfo->in.u.manage_vf_pble_bp.info,
+				pcmdinfo->in.u.manage_vf_pble_bp.scratch, true);
+		break;
+	case OP_QUERY_FPM_VALUES:
+		values_mem.pa = pcmdinfo->in.u.query_fpm_values.fpm_values_pa;
+		values_mem.va = pcmdinfo->in.u.query_fpm_values.fpm_values_va;
+		status = i40iw_sc_query_fpm_values(
+				pcmdinfo->in.u.query_fpm_values.cqp,
+				pcmdinfo->in.u.query_fpm_values.scratch,
+				pcmdinfo->in.u.query_fpm_values.hmc_fn_id,
+				&values_mem, true, I40IW_CQP_WAIT_EVENT);
+		break;
+	case OP_COMMIT_FPM_VALUES:
+		values_mem.pa = pcmdinfo->in.u.commit_fpm_values.fpm_values_pa;
+		values_mem.va = pcmdinfo->in.u.commit_fpm_values.fpm_values_va;
+		status = i40iw_sc_commit_fpm_values(
+				pcmdinfo->in.u.commit_fpm_values.cqp,
+				pcmdinfo->in.u.commit_fpm_values.scratch,
+				pcmdinfo->in.u.commit_fpm_values.hmc_fn_id,
+				&values_mem,
+				true,
+				I40IW_CQP_WAIT_EVENT);
+		break;
+	default:
+		status = I40IW_NOT_SUPPORTED;
+		break;
+	}
+
+	return status;
+}
+
+/**
+ * i40iw_process_cqp_cmd - process all cqp commands
+ * @dev: sc device struct
+ * @pcmdinfo: cqp command info
+ */
+enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev,
+					     struct cqp_commands_info *pcmdinfo)
+{
+	enum i40iw_status_code status = 0;
+	unsigned long	flags;
+
+	spin_lock_irqsave(&dev->cqp_lock, flags);
+	if (list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp))
+		status = i40iw_exec_cqp_cmd(dev, pcmdinfo);
+	else
+		list_add_tail(&pcmdinfo->cqp_cmd_entry, &dev->cqp_cmd_head);
+	spin_unlock_irqrestore(&dev->cqp_lock, flags);
+	return status;
+}
+
+/**
+ * i40iw_process_bh - called from tasklet for cqp list
+ * @dev: sc device struct
+ */
+enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev)
+{
+	enum i40iw_status_code status = 0;
+	struct cqp_commands_info *pcmdinfo;
+	unsigned long	flags;
+
+	spin_lock_irqsave(&dev->cqp_lock, flags);
+	while (!list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) {
+		pcmdinfo = (struct cqp_commands_info *)i40iw_remove_head(&dev->cqp_cmd_head);
+
+		status = i40iw_exec_cqp_cmd(dev, pcmdinfo);
+		if (status)
+			break;
+	}
+	spin_unlock_irqrestore(&dev->cqp_lock, flags);
+	return status;
+}
+
+/**
+ * i40iw_iwarp_opcode - determine if incoming is rdma layer
+ * @info: aeq info for the packet
+ * @pkt: packet for error
+ */
+static u32 i40iw_iwarp_opcode(struct i40iw_aeqe_info *info, u8 *pkt)
+{
+	u16 *mpa;
+	u32 opcode = 0xffffffff;
+
+	if (info->q2_data_written) {
+		mpa = (u16 *)pkt;
+		opcode = ntohs(mpa[1]) & 0xf;
+	}
+	return opcode;
+}
+
+/**
+ * i40iw_locate_mpa - return pointer to mpa in the pkt
+ * @pkt: packet with data
+ */
+static u8 *i40iw_locate_mpa(u8 *pkt)
+{
+	/* skip over ethernet header */
+	pkt += I40IW_MAC_HLEN;
+
+	/* Skip over IP and TCP headers */
+	pkt += 4 * (pkt[0] & 0x0f);
+	pkt += 4 * ((pkt[12] >> 4) & 0x0f);
+	return pkt;
+}
+
+/**
+ * i40iw_setup_termhdr - termhdr for terminate pkt
+ * @qp: sc qp ptr for pkt
+ * @hdr: term hdr
+ * @opcode: flush opcode for termhdr
+ * @layer_etype: error layer + error type
+ * @err: error cod ein the header
+ */
+static void i40iw_setup_termhdr(struct i40iw_sc_qp *qp,
+				struct i40iw_terminate_hdr *hdr,
+				enum i40iw_flush_opcode opcode,
+				u8 layer_etype,
+				u8 err)
+{
+	qp->flush_code = opcode;
+	hdr->layer_etype = layer_etype;
+	hdr->error_code = err;
+}
+
+/**
+ * i40iw_bld_terminate_hdr - build terminate message header
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp,
+				   struct i40iw_aeqe_info *info)
+{
+	u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
+	u16 ddp_seg_len;
+	int copy_len = 0;
+	u8 is_tagged = 0;
+	enum i40iw_flush_opcode flush_code = FLUSH_INVALID;
+	u32 opcode;
+	struct i40iw_terminate_hdr *termhdr;
+
+	termhdr = (struct i40iw_terminate_hdr *)qp->q2_buf;
+	memset(termhdr, 0, Q2_BAD_FRAME_OFFSET);
+
+	if (info->q2_data_written) {
+		/* Use data from offending packet to fill in ddp & rdma hdrs */
+		pkt = i40iw_locate_mpa(pkt);
+		ddp_seg_len = ntohs(*(u16 *)pkt);
+		if (ddp_seg_len) {
+			copy_len = 2;
+			termhdr->hdrct = DDP_LEN_FLAG;
+			if (pkt[2] & 0x80) {
+				is_tagged = 1;
+				if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
+					copy_len += TERM_DDP_LEN_TAGGED;
+					termhdr->hdrct |= DDP_HDR_FLAG;
+				}
+			} else {
+				if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
+					copy_len += TERM_DDP_LEN_UNTAGGED;
+					termhdr->hdrct |= DDP_HDR_FLAG;
+				}
+
+				if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
+					if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
+						copy_len += TERM_RDMA_LEN;
+						termhdr->hdrct |= RDMA_HDR_FLAG;
+					}
+				}
+			}
+		}
+	}
+
+	opcode = i40iw_iwarp_opcode(info, pkt);
+
+	switch (info->ae_id) {
+	case I40IW_AE_AMP_UNALLOCATED_STAG:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		if (opcode == I40IW_OP_TYPE_RDMA_WRITE)
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
+					    (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_INV_STAG);
+		else
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG);
+		break;
+	case I40IW_AE_AMP_BOUNDS_VIOLATION:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		if (info->q2_data_written)
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
+					    (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_BOUNDS);
+		else
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_BOUNDS);
+		break;
+	case I40IW_AE_AMP_BAD_PD:
+		switch (opcode) {
+		case I40IW_OP_TYPE_RDMA_WRITE:
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
+					    (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_UNASSOC_STAG);
+			break;
+		case I40IW_OP_TYPE_SEND_INV:
+		case I40IW_OP_TYPE_SEND_SOL_INV:
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_CANT_INV_STAG);
+			break;
+		default:
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_UNASSOC_STAG);
+		}
+		break;
+	case I40IW_AE_AMP_INVALID_STAG:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG);
+		break;
+	case I40IW_AE_AMP_BAD_QP:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_QN);
+		break;
+	case I40IW_AE_AMP_BAD_STAG_KEY:
+	case I40IW_AE_AMP_BAD_STAG_INDEX:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		switch (opcode) {
+		case I40IW_OP_TYPE_SEND_INV:
+		case I40IW_OP_TYPE_SEND_SOL_INV:
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_OP_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_CANT_INV_STAG);
+			break;
+		default:
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+					    (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_STAG);
+		}
+		break;
+	case I40IW_AE_AMP_RIGHTS_VIOLATION:
+	case I40IW_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+	case I40IW_AE_PRIV_OPERATION_DENIED:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_ACCESS);
+		break;
+	case I40IW_AE_AMP_TO_WRAP:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_TO_WRAP);
+		break;
+	case I40IW_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
+				    (LAYER_MPA << 4) | DDP_LLP, MPA_MARKER);
+		break;
+	case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+				    (LAYER_MPA << 4) | DDP_LLP, MPA_CRC);
+		break;
+	case I40IW_AE_LLP_SEGMENT_TOO_LARGE:
+	case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
+				    (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL);
+		break;
+	case I40IW_AE_LCE_QP_CATASTROPHIC:
+	case I40IW_AE_DDP_NO_L_BIT:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_FATAL_ERR,
+				    (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL);
+		break;
+	case I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN:
+	case I40IW_AE_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MSN_RANGE);
+		break;
+	case I40IW_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+		qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_TOO_LONG);
+		break;
+	case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION:
+		if (is_tagged)
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+					    (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_INV_DDP_VER);
+		else
+			i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+					    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_DDP_VER);
+		break;
+	case I40IW_AE_DDP_UBE_INVALID_MO:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MO);
+		break;
+	case I40IW_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_OP_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MSN_NO_BUF);
+		break;
+	case I40IW_AE_DDP_UBE_INVALID_QN:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+				    (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_QN);
+		break;
+	case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_RDMAP_VER);
+		break;
+	case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_UNEXPECTED_OP);
+		break;
+	default:
+		i40iw_setup_termhdr(qp, termhdr, FLUSH_FATAL_ERR,
+				    (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_UNSPECIFIED);
+		break;
+	}
+
+	if (copy_len)
+		memcpy(termhdr + 1, pkt, copy_len);
+
+	if (flush_code && !info->in_rdrsp_wr)
+		qp->sq_flush = (info->sq) ? true : false;
+
+	return sizeof(struct i40iw_terminate_hdr) + copy_len;
+}
+
+/**
+ * i40iw_terminate_send_fin() - Send fin for terminate message
+ * @qp: qp associated with received terminate AE
+ */
+void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp)
+{
+	/* Send the fin only */
+	i40iw_term_modify_qp(qp,
+			     I40IW_QP_STATE_TERMINATE,
+			     I40IWQP_TERM_SEND_FIN_ONLY,
+			     0);
+}
+
+/**
+ * i40iw_terminate_connection() - Bad AE and send terminate to remote QP
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
+{
+	u8 termlen = 0;
+
+	if (qp->term_flags & I40IW_TERM_SENT)
+		return;         /* Sanity check */
+
+	/* Eventtype can change from bld_terminate_hdr */
+	qp->eventtype = TERM_EVENT_QP_FATAL;
+	termlen = i40iw_bld_terminate_hdr(qp, info);
+	i40iw_terminate_start_timer(qp);
+	qp->term_flags |= I40IW_TERM_SENT;
+	i40iw_term_modify_qp(qp, I40IW_QP_STATE_TERMINATE,
+			     I40IWQP_TERM_SEND_TERM_ONLY, termlen);
+}
+
+/**
+ * i40iw_terminate_received - handle terminate received AE
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
+{
+	u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
+	u32 *mpa;
+	u8 ddp_ctl;
+	u8 rdma_ctl;
+	u16 aeq_id = 0;
+	struct i40iw_terminate_hdr *termhdr;
+
+	mpa = (u32 *)i40iw_locate_mpa(pkt);
+	if (info->q2_data_written) {
+		/* did not validate the frame - do it now */
+		ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff;
+		rdma_ctl = ntohl(mpa[0]) & 0xff;
+		if ((ddp_ctl & 0xc0) != 0x40)
+			aeq_id = I40IW_AE_LCE_QP_CATASTROPHIC;
+		else if ((ddp_ctl & 0x03) != 1)
+			aeq_id = I40IW_AE_DDP_UBE_INVALID_DDP_VERSION;
+		else if (ntohl(mpa[2]) != 2)
+			aeq_id = I40IW_AE_DDP_UBE_INVALID_QN;
+		else if (ntohl(mpa[3]) != 1)
+			aeq_id = I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN;
+		else if (ntohl(mpa[4]) != 0)
+			aeq_id = I40IW_AE_DDP_UBE_INVALID_MO;
+		else if ((rdma_ctl & 0xc0) != 0x40)
+			aeq_id = I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION;
+
+		info->ae_id = aeq_id;
+		if (info->ae_id) {
+			/* Bad terminate recvd - send back a terminate */
+			i40iw_terminate_connection(qp, info);
+			return;
+		}
+	}
+
+	qp->term_flags |= I40IW_TERM_RCVD;
+	qp->eventtype = TERM_EVENT_QP_FATAL;
+	termhdr = (struct i40iw_terminate_hdr *)&mpa[5];
+	if (termhdr->layer_etype == RDMAP_REMOTE_PROT ||
+	    termhdr->layer_etype == RDMAP_REMOTE_OP) {
+		i40iw_terminate_done(qp, 0);
+	} else {
+		i40iw_terminate_start_timer(qp);
+		i40iw_terminate_send_fin(qp);
+	}
+}
+
+/**
+ * i40iw_hw_stat_init - Initiliaze HW stats table
+ * @devstat: pestat struct
+ * @fcn_idx: PCI fn id
+ * @hw: PF i40iw_hw structure.
+ * @is_pf: Is it a PF?
+ *
+ * Populate the HW stat table with register offset addr for each
+ * stat. And start the perioidic stats timer.
+ */
+static void i40iw_hw_stat_init(struct i40iw_dev_pestat *devstat,
+			       u8 fcn_idx,
+			       struct i40iw_hw *hw, bool is_pf)
+{
+	u32 stat_reg_offset;
+	u32 stat_index;
+	struct i40iw_dev_hw_stat_offsets *stat_table =
+		&devstat->hw_stat_offsets;
+	struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
+
+	devstat->hw = hw;
+
+	if (is_pf) {
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+				I40E_GLPES_PFIP4RXDISCARD(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+				I40E_GLPES_PFIP4RXTRUNC(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+				I40E_GLPES_PFIP4TXNOROUTE(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+				I40E_GLPES_PFIP6RXDISCARD(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+				I40E_GLPES_PFIP6RXTRUNC(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+				I40E_GLPES_PFIP6TXNOROUTE(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+				I40E_GLPES_PFTCPRTXSEG(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+				I40E_GLPES_PFTCPRXOPTERR(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+				I40E_GLPES_PFTCPRXPROTOERR(fcn_idx);
+
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+				I40E_GLPES_PFIP4RXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+				I40E_GLPES_PFIP4RXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+				I40E_GLPES_PFIP4RXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+				I40E_GLPES_PFIP4RXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+				I40E_GLPES_PFIP4TXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+				I40E_GLPES_PFIP4TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+				I40E_GLPES_PFIP4TXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+				I40E_GLPES_PFIP4TXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+				I40E_GLPES_PFIP6RXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+				I40E_GLPES_PFIP6RXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+				I40E_GLPES_PFIP6RXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+				I40E_GLPES_PFIP6RXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+				I40E_GLPES_PFIP6TXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+				I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+				I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+				I40E_GLPES_PFIP6TXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+				I40E_GLPES_PFTCPRXSEGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+				I40E_GLPES_PFTCPTXSEGLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+				I40E_GLPES_PFRDMARXRDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+				I40E_GLPES_PFRDMARXSNDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+				I40E_GLPES_PFRDMARXWRSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+				I40E_GLPES_PFRDMATXRDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+				I40E_GLPES_PFRDMATXSNDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+				I40E_GLPES_PFRDMATXWRSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+				I40E_GLPES_PFRDMAVBNDLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+				I40E_GLPES_PFRDMAVINVLO(fcn_idx);
+	} else {
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+				I40E_GLPES_VFIP4RXDISCARD(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+				I40E_GLPES_VFIP4RXTRUNC(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+				I40E_GLPES_VFIP4TXNOROUTE(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+				I40E_GLPES_VFIP6RXDISCARD(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+				I40E_GLPES_VFIP6RXTRUNC(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+				I40E_GLPES_VFIP6TXNOROUTE(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+				I40E_GLPES_VFTCPRTXSEG(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+				I40E_GLPES_VFTCPRXOPTERR(fcn_idx);
+		stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+				I40E_GLPES_VFTCPRXPROTOERR(fcn_idx);
+
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+				I40E_GLPES_VFIP4RXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+				I40E_GLPES_VFIP4RXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+				I40E_GLPES_VFIP4RXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+				I40E_GLPES_VFIP4RXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+				I40E_GLPES_VFIP4TXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+				I40E_GLPES_VFIP4TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+				I40E_GLPES_VFIP4TXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+				I40E_GLPES_VFIP4TXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+				I40E_GLPES_VFIP6RXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+				I40E_GLPES_VFIP6RXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+				I40E_GLPES_VFIP6RXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+				I40E_GLPES_VFIP6RXMCPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+				I40E_GLPES_VFIP6TXOCTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+				I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+				I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+				I40E_GLPES_VFIP6TXFRAGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+				I40E_GLPES_VFTCPRXSEGSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+				I40E_GLPES_VFTCPTXSEGLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+				I40E_GLPES_VFRDMARXRDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+				I40E_GLPES_VFRDMARXSNDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+				I40E_GLPES_VFRDMARXWRSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+				I40E_GLPES_VFRDMATXRDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+				I40E_GLPES_VFRDMATXSNDSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+				I40E_GLPES_VFRDMATXWRSLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+				I40E_GLPES_VFRDMAVBNDLO(fcn_idx);
+		stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+				I40E_GLPES_VFRDMAVINVLO(fcn_idx);
+	}
+
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
+	     stat_index++) {
+		stat_reg_offset = stat_table->stat_offset_64[stat_index];
+		last_rd_stats->stat_value_64[stat_index] =
+			readq(devstat->hw->hw_addr + stat_reg_offset);
+	}
+
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
+	     stat_index++) {
+		stat_reg_offset = stat_table->stat_offset_32[stat_index];
+		last_rd_stats->stat_value_32[stat_index] =
+			i40iw_rd32(devstat->hw, stat_reg_offset);
+	}
+}
+
+/**
+ * i40iw_hw_stat_read_32 - Read 32-bit HW stat counters and accommodates for roll-overs.
+ * @devstat: pestat struct
+ * @index: index in HW stat table which contains offset reg-addr
+ * @value: hw stat value
+ */
+static void i40iw_hw_stat_read_32(struct i40iw_dev_pestat *devstat,
+				  enum i40iw_hw_stat_index_32b index,
+				  u64 *value)
+{
+	struct i40iw_dev_hw_stat_offsets *stat_table =
+		&devstat->hw_stat_offsets;
+	struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
+	struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
+	u64 new_stat_value = 0;
+	u32 stat_reg_offset = stat_table->stat_offset_32[index];
+
+	new_stat_value = i40iw_rd32(devstat->hw, stat_reg_offset);
+	/*roll-over case */
+	if (new_stat_value < last_rd_stats->stat_value_32[index])
+		hw_stats->stat_value_32[index] += new_stat_value;
+	else
+		hw_stats->stat_value_32[index] +=
+			new_stat_value - last_rd_stats->stat_value_32[index];
+	last_rd_stats->stat_value_32[index] = new_stat_value;
+	*value = hw_stats->stat_value_32[index];
+}
+
+/**
+ * i40iw_hw_stat_read_64 - Read HW stat counters (greater than 32-bit) and accommodates for roll-overs.
+ * @devstat: pestat struct
+ * @index: index in HW stat table which contains offset reg-addr
+ * @value: hw stat value
+ */
+static void i40iw_hw_stat_read_64(struct i40iw_dev_pestat *devstat,
+				  enum i40iw_hw_stat_index_64b index,
+				  u64 *value)
+{
+	struct i40iw_dev_hw_stat_offsets *stat_table =
+		&devstat->hw_stat_offsets;
+	struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
+	struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
+	u64 new_stat_value = 0;
+	u32 stat_reg_offset = stat_table->stat_offset_64[index];
+
+	new_stat_value = readq(devstat->hw->hw_addr + stat_reg_offset);
+	/*roll-over case */
+	if (new_stat_value < last_rd_stats->stat_value_64[index])
+		hw_stats->stat_value_64[index] += new_stat_value;
+	else
+		hw_stats->stat_value_64[index] +=
+			new_stat_value - last_rd_stats->stat_value_64[index];
+	last_rd_stats->stat_value_64[index] = new_stat_value;
+	*value = hw_stats->stat_value_64[index];
+}
+
+/**
+ * i40iw_hw_stat_read_all - read all HW stat counters
+ * @devstat: pestat struct
+ * @stat_values: hw stats structure
+ *
+ * Read all the HW stat counters and populates hw_stats structure
+ * of passed-in dev's pestat as well as copy created in stat_values.
+ */
+static void i40iw_hw_stat_read_all(struct i40iw_dev_pestat *devstat,
+				   struct i40iw_dev_hw_stats *stat_values)
+{
+	u32 stat_index;
+
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
+	     stat_index++)
+		i40iw_hw_stat_read_32(devstat, stat_index,
+				      &stat_values->stat_value_32[stat_index]);
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
+	     stat_index++)
+		i40iw_hw_stat_read_64(devstat, stat_index,
+				      &stat_values->stat_value_64[stat_index]);
+}
+
+/**
+ * i40iw_hw_stat_refresh_all - Update all HW stat structs
+ * @devstat: pestat struct
+ * @stat_values: hw stats structure
+ *
+ * Read all the HW stat counters to refresh values in hw_stats structure
+ * of passed-in dev's pestat
+ */
+static void i40iw_hw_stat_refresh_all(struct i40iw_dev_pestat *devstat)
+{
+	u64 stat_value;
+	u32 stat_index;
+
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
+	     stat_index++)
+		i40iw_hw_stat_read_32(devstat, stat_index, &stat_value);
+	for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
+	     stat_index++)
+		i40iw_hw_stat_read_64(devstat, stat_index, &stat_value);
+}
+
+static struct i40iw_cqp_ops iw_cqp_ops = {
+	i40iw_sc_cqp_init,
+	i40iw_sc_cqp_create,
+	i40iw_sc_cqp_post_sq,
+	i40iw_sc_cqp_get_next_send_wqe,
+	i40iw_sc_cqp_destroy,
+	i40iw_sc_poll_for_cqp_op_done
+};
+
+static struct i40iw_ccq_ops iw_ccq_ops = {
+	i40iw_sc_ccq_init,
+	i40iw_sc_ccq_create,
+	i40iw_sc_ccq_destroy,
+	i40iw_sc_ccq_create_done,
+	i40iw_sc_ccq_get_cqe_info,
+	i40iw_sc_ccq_arm
+};
+
+static struct i40iw_ceq_ops iw_ceq_ops = {
+	i40iw_sc_ceq_init,
+	i40iw_sc_ceq_create,
+	i40iw_sc_cceq_create_done,
+	i40iw_sc_cceq_destroy_done,
+	i40iw_sc_cceq_create,
+	i40iw_sc_ceq_destroy,
+	i40iw_sc_process_ceq
+};
+
+static struct i40iw_aeq_ops iw_aeq_ops = {
+	i40iw_sc_aeq_init,
+	i40iw_sc_aeq_create,
+	i40iw_sc_aeq_destroy,
+	i40iw_sc_get_next_aeqe,
+	i40iw_sc_repost_aeq_entries,
+	i40iw_sc_aeq_create_done,
+	i40iw_sc_aeq_destroy_done
+};
+
+/* iwarp pd ops */
+static struct i40iw_pd_ops iw_pd_ops = {
+	i40iw_sc_pd_init,
+};
+
+static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
+	i40iw_sc_qp_init,
+	i40iw_sc_qp_create,
+	i40iw_sc_qp_modify,
+	i40iw_sc_qp_destroy,
+	i40iw_sc_qp_flush_wqes,
+	i40iw_sc_qp_upload_context,
+	i40iw_sc_qp_setctx,
+	i40iw_sc_send_lsmm,
+	i40iw_sc_send_lsmm_nostag,
+	i40iw_sc_send_rtt,
+	i40iw_sc_post_wqe0,
+};
+
+static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
+	i40iw_sc_cq_init,
+	i40iw_sc_cq_create,
+	i40iw_sc_cq_destroy,
+	i40iw_sc_cq_modify,
+};
+
+static struct i40iw_mr_ops iw_mr_ops = {
+	i40iw_sc_alloc_stag,
+	i40iw_sc_mr_reg_non_shared,
+	i40iw_sc_mr_reg_shared,
+	i40iw_sc_dealloc_stag,
+	i40iw_sc_query_stag,
+	i40iw_sc_mw_alloc
+};
+
+static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = {
+	i40iw_sc_manage_push_page,
+	i40iw_sc_manage_hmc_pm_func_table,
+	i40iw_sc_set_hmc_resource_profile,
+	i40iw_sc_commit_fpm_values,
+	i40iw_sc_query_fpm_values,
+	i40iw_sc_static_hmc_pages_allocated,
+	i40iw_sc_add_arp_cache_entry,
+	i40iw_sc_del_arp_cache_entry,
+	i40iw_sc_query_arp_cache_entry,
+	i40iw_sc_manage_apbvt_entry,
+	i40iw_sc_manage_qhash_table_entry,
+	i40iw_sc_alloc_local_mac_ipaddr_entry,
+	i40iw_sc_add_local_mac_ipaddr_entry,
+	i40iw_sc_del_local_mac_ipaddr_entry,
+	i40iw_sc_cqp_nop,
+	i40iw_sc_commit_fpm_values_done,
+	i40iw_sc_query_fpm_values_done,
+	i40iw_sc_manage_hmc_pm_func_table_done,
+	i40iw_sc_suspend_qp,
+	i40iw_sc_resume_qp
+};
+
+static struct i40iw_hmc_ops iw_hmc_ops = {
+	i40iw_sc_init_iw_hmc,
+	i40iw_sc_parse_fpm_query_buf,
+	i40iw_sc_configure_iw_fpm,
+	i40iw_sc_parse_fpm_commit_buf,
+	i40iw_sc_create_hmc_obj,
+	i40iw_sc_del_hmc_obj,
+	NULL,
+	NULL
+};
+
+static const struct i40iw_device_pestat_ops iw_device_pestat_ops = {
+	i40iw_hw_stat_init,
+	i40iw_hw_stat_read_32,
+	i40iw_hw_stat_read_64,
+	i40iw_hw_stat_read_all,
+	i40iw_hw_stat_refresh_all
+};
+
+/**
+ * i40iw_device_init_pestat - Initialize the pestat structure
+ * @dev: pestat struct
+ */
+enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *devstat)
+{
+	devstat->ops = iw_device_pestat_ops;
+	return 0;
+}
+
+/**
+ * i40iw_device_init - Initialize IWARP device
+ * @dev: IWARP device pointer
+ * @info: IWARP init info
+ */
+enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
+					 struct i40iw_device_init_info *info)
+{
+	u32 val;
+	u32 vchnl_ver = 0;
+	u16 hmc_fcn = 0;
+	enum i40iw_status_code ret_code = 0;
+	u8 db_size;
+
+	spin_lock_init(&dev->cqp_lock);
+	INIT_LIST_HEAD(&dev->cqp_cmd_head);             /* for the cqp commands backlog. */
+
+	i40iw_device_init_uk(&dev->dev_uk);
+
+	dev->debug_mask = info->debug_mask;
+
+	ret_code = i40iw_device_init_pestat(&dev->dev_pestat);
+	if (ret_code) {
+		i40iw_debug(dev, I40IW_DEBUG_DEV,
+			    "%s: i40iw_device_init_pestat failed\n", __func__);
+		return ret_code;
+	}
+	dev->hmc_fn_id = info->hmc_fn_id;
+	dev->qs_handle = info->qs_handle;
+	dev->exception_lan_queue = info->exception_lan_queue;
+	dev->is_pf = info->is_pf;
+
+	dev->fpm_query_buf_pa = info->fpm_query_buf_pa;
+	dev->fpm_query_buf = info->fpm_query_buf;
+
+	dev->fpm_commit_buf_pa = info->fpm_commit_buf_pa;
+	dev->fpm_commit_buf = info->fpm_commit_buf;
+
+	dev->hw = info->hw;
+	dev->hw->hw_addr = info->bar0;
+
+	val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID);
+	dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID);
+
+	if (dev->is_pf) {
+		dev->dev_pestat.ops.iw_hw_stat_init(&dev->dev_pestat,
+			dev->hmc_fn_id, dev->hw, true);
+		spin_lock_init(&dev->dev_pestat.stats_lock);
+		/*start the periodic stats_timer */
+		i40iw_hw_stats_start_timer(dev);
+		val = i40iw_rd32(dev->hw, I40E_GLPCI_LBARCTRL);
+		db_size = (u8)RS_32(val, I40E_GLPCI_LBARCTRL_PE_DB_SIZE);
+		if ((db_size != I40IW_PE_DB_SIZE_4M) &&
+		    (db_size != I40IW_PE_DB_SIZE_8M)) {
+			i40iw_debug(dev, I40IW_DEBUG_DEV,
+				    "%s: PE doorbell is not enabled in CSR val 0x%x\n",
+				    __func__, val);
+			ret_code = I40IW_ERR_PE_DOORBELL_NOT_ENABLED;
+			return ret_code;
+		}
+		dev->db_addr = dev->hw->hw_addr + I40IW_DB_ADDR_OFFSET;
+		dev->vchnl_if.vchnl_recv = i40iw_vchnl_recv_pf;
+	} else {
+		dev->db_addr = dev->hw->hw_addr + I40IW_VF_DB_ADDR_OFFSET;
+	}
+
+	dev->cqp_ops = &iw_cqp_ops;
+	dev->ccq_ops = &iw_ccq_ops;
+	dev->ceq_ops = &iw_ceq_ops;
+	dev->aeq_ops = &iw_aeq_ops;
+	dev->cqp_misc_ops = &iw_cqp_misc_ops;
+	dev->iw_pd_ops = &iw_pd_ops;
+	dev->iw_priv_qp_ops = &iw_priv_qp_ops;
+	dev->iw_priv_cq_ops = &iw_priv_cq_ops;
+	dev->mr_ops = &iw_mr_ops;
+	dev->hmc_ops = &iw_hmc_ops;
+	dev->vchnl_if.vchnl_send = info->vchnl_send;
+	if (dev->vchnl_if.vchnl_send)
+		dev->vchnl_up = true;
+	else
+		dev->vchnl_up = false;
+	if (!dev->is_pf) {
+		dev->vchnl_if.vchnl_recv = i40iw_vchnl_recv_vf;
+		ret_code = i40iw_vchnl_vf_get_ver(dev, &vchnl_ver);
+		if (!ret_code) {
+			i40iw_debug(dev, I40IW_DEBUG_DEV,
+				    "%s: Get Channel version rc = 0x%0x, version is %u\n",
+				__func__, ret_code, vchnl_ver);
+			ret_code = i40iw_vchnl_vf_get_hmc_fcn(dev, &hmc_fcn);
+			if (!ret_code) {
+				i40iw_debug(dev, I40IW_DEBUG_DEV,
+					    "%s Get HMC function rc = 0x%0x, hmc fcn is %u\n",
+					    __func__, ret_code, hmc_fcn);
+				dev->hmc_fn_id = (u8)hmc_fcn;
+			}
+		}
+	}
+	dev->iw_vf_cqp_ops = &iw_vf_cqp_ops;
+
+	return ret_code;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
new file mode 100644
index 000000000000..aab88d65f805
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -0,0 +1,1713 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_D_H
+#define I40IW_D_H
+
+#define I40IW_DB_ADDR_OFFSET    (4 * 1024 * 1024 - 64 * 1024)
+#define I40IW_VF_DB_ADDR_OFFSET (64 * 1024)
+
+#define I40IW_PUSH_OFFSET       (4 * 1024 * 1024)
+#define I40IW_PF_FIRST_PUSH_PAGE_INDEX 16
+#define I40IW_VF_PUSH_OFFSET    ((8 + 64) * 1024)
+#define I40IW_VF_FIRST_PUSH_PAGE_INDEX 2
+
+#define I40IW_PE_DB_SIZE_4M     1
+#define I40IW_PE_DB_SIZE_8M     2
+
+#define I40IW_DDP_VER 1
+#define I40IW_RDMAP_VER 1
+
+#define I40IW_RDMA_MODE_RDMAC 0
+#define I40IW_RDMA_MODE_IETF  1
+
+#define I40IW_QP_STATE_INVALID 0
+#define I40IW_QP_STATE_IDLE 1
+#define I40IW_QP_STATE_RTS 2
+#define I40IW_QP_STATE_CLOSING 3
+#define I40IW_QP_STATE_RESERVED 4
+#define I40IW_QP_STATE_TERMINATE 5
+#define I40IW_QP_STATE_ERROR 6
+
+#define I40IW_STAG_STATE_INVALID 0
+#define I40IW_STAG_STATE_VALID 1
+
+#define I40IW_STAG_TYPE_SHARED 0
+#define I40IW_STAG_TYPE_NONSHARED 1
+
+#define I40IW_MAX_USER_PRIORITY 8
+
+#define LS_64_1(val, bits)      ((u64)(uintptr_t)val << bits)
+#define RS_64_1(val, bits)      ((u64)(uintptr_t)val >> bits)
+#define LS_32_1(val, bits)      (u32)(val << bits)
+#define RS_32_1(val, bits)      (u32)(val >> bits)
+#define I40E_HI_DWORD(x)        ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF))
+
+#define LS_64(val, field) (((u64)val << field ## _SHIFT) & (field ## _MASK))
+
+#define RS_64(val, field) ((u64)(val & field ## _MASK) >> field ## _SHIFT)
+#define LS_32(val, field) ((val << field ## _SHIFT) & (field ## _MASK))
+#define RS_32(val, field) ((val & field ## _MASK) >> field ## _SHIFT)
+
+#define TERM_DDP_LEN_TAGGED     14
+#define TERM_DDP_LEN_UNTAGGED   18
+#define TERM_RDMA_LEN           28
+#define RDMA_OPCODE_MASK        0x0f
+#define RDMA_READ_REQ_OPCODE    1
+#define Q2_BAD_FRAME_OFFSET     72
+#define CQE_MAJOR_DRV           0x8000
+
+#define I40IW_TERM_SENT 0x01
+#define I40IW_TERM_RCVD 0x02
+#define I40IW_TERM_DONE 0x04
+#define I40IW_MAC_HLEN  14
+
+#define I40IW_INVALID_WQE_INDEX 0xffffffff
+
+#define I40IW_CQP_WAIT_POLL_REGS 1
+#define I40IW_CQP_WAIT_POLL_CQ 2
+#define I40IW_CQP_WAIT_EVENT 3
+
+#define I40IW_CQP_INIT_WQE(wqe) memset(wqe, 0, 64)
+
+#define I40IW_GET_CURRENT_CQ_ELEMENT(_cq) \
+	( \
+		&((_cq)->cq_base[I40IW_RING_GETCURRENT_HEAD((_cq)->cq_ring)])  \
+	)
+#define I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(_cq) \
+	( \
+		&(((struct i40iw_extended_cqe *)        \
+		   ((_cq)->cq_base))[I40IW_RING_GETCURRENT_HEAD((_cq)->cq_ring)]) \
+	)
+
+#define I40IW_GET_CURRENT_AEQ_ELEMENT(_aeq) \
+	( \
+		&_aeq->aeqe_base[I40IW_RING_GETCURRENT_TAIL(_aeq->aeq_ring)]   \
+	)
+
+#define I40IW_GET_CURRENT_CEQ_ELEMENT(_ceq) \
+	( \
+		&_ceq->ceqe_base[I40IW_RING_GETCURRENT_TAIL(_ceq->ceq_ring)]   \
+	)
+
+#define I40IW_AE_SOURCE_RQ              0x1
+#define I40IW_AE_SOURCE_RQ_0011         0x3
+
+#define I40IW_AE_SOURCE_CQ              0x2
+#define I40IW_AE_SOURCE_CQ_0110         0x6
+#define I40IW_AE_SOURCE_CQ_1010         0xA
+#define I40IW_AE_SOURCE_CQ_1110         0xE
+
+#define I40IW_AE_SOURCE_SQ              0x5
+#define I40IW_AE_SOURCE_SQ_0111         0x7
+
+#define I40IW_AE_SOURCE_IN_RR_WR        0x9
+#define I40IW_AE_SOURCE_IN_RR_WR_1011   0xB
+#define I40IW_AE_SOURCE_OUT_RR          0xD
+#define I40IW_AE_SOURCE_OUT_RR_1111     0xF
+
+#define I40IW_TCP_STATE_NON_EXISTENT 0
+#define I40IW_TCP_STATE_CLOSED 1
+#define I40IW_TCP_STATE_LISTEN 2
+#define I40IW_STATE_SYN_SEND 3
+#define I40IW_TCP_STATE_SYN_RECEIVED 4
+#define I40IW_TCP_STATE_ESTABLISHED 5
+#define I40IW_TCP_STATE_CLOSE_WAIT 6
+#define I40IW_TCP_STATE_FIN_WAIT_1 7
+#define I40IW_TCP_STATE_CLOSING  8
+#define I40IW_TCP_STATE_LAST_ACK 9
+#define I40IW_TCP_STATE_FIN_WAIT_2 10
+#define I40IW_TCP_STATE_TIME_WAIT 11
+#define I40IW_TCP_STATE_RESERVED_1 12
+#define I40IW_TCP_STATE_RESERVED_2 13
+#define I40IW_TCP_STATE_RESERVED_3 14
+#define I40IW_TCP_STATE_RESERVED_4 15
+
+/* ILQ CQP hash table fields */
+#define I40IW_CQPSQ_QHASH_VLANID_SHIFT 32
+#define I40IW_CQPSQ_QHASH_VLANID_MASK \
+	((u64)0xfff << I40IW_CQPSQ_QHASH_VLANID_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_QPN_SHIFT 32
+#define I40IW_CQPSQ_QHASH_QPN_MASK \
+	((u64)0x3ffff << I40IW_CQPSQ_QHASH_QPN_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_QS_HANDLE_SHIFT 0
+#define I40IW_CQPSQ_QHASH_QS_HANDLE_MASK ((u64)0x3ff << I40IW_CQPSQ_QHASH_QS_HANDLE_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_SRC_PORT_SHIFT 16
+#define I40IW_CQPSQ_QHASH_SRC_PORT_MASK \
+	((u64)0xffff << I40IW_CQPSQ_QHASH_SRC_PORT_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_DEST_PORT_SHIFT 0
+#define I40IW_CQPSQ_QHASH_DEST_PORT_MASK \
+	((u64)0xffff << I40IW_CQPSQ_QHASH_DEST_PORT_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR0_SHIFT 32
+#define I40IW_CQPSQ_QHASH_ADDR0_MASK \
+	((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR0_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR1_SHIFT 0
+#define I40IW_CQPSQ_QHASH_ADDR1_MASK \
+	((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR1_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR2_SHIFT 32
+#define I40IW_CQPSQ_QHASH_ADDR2_MASK \
+	((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR2_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ADDR3_SHIFT 0
+#define I40IW_CQPSQ_QHASH_ADDR3_MASK \
+	((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR3_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_WQEVALID_SHIFT 63
+#define I40IW_CQPSQ_QHASH_WQEVALID_MASK \
+	((u64)0x1 << I40IW_CQPSQ_QHASH_WQEVALID_SHIFT)
+#define I40IW_CQPSQ_QHASH_OPCODE_SHIFT 32
+#define I40IW_CQPSQ_QHASH_OPCODE_MASK \
+	((u64)0x3f << I40IW_CQPSQ_QHASH_OPCODE_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_MANAGE_SHIFT 61
+#define I40IW_CQPSQ_QHASH_MANAGE_MASK \
+	((u64)0x3 << I40IW_CQPSQ_QHASH_MANAGE_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_IPV4VALID_SHIFT 60
+#define I40IW_CQPSQ_QHASH_IPV4VALID_MASK \
+	((u64)0x1 << I40IW_CQPSQ_QHASH_IPV4VALID_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_VLANVALID_SHIFT 59
+#define I40IW_CQPSQ_QHASH_VLANVALID_MASK \
+	((u64)0x1 << I40IW_CQPSQ_QHASH_VLANVALID_SHIFT)
+
+#define I40IW_CQPSQ_QHASH_ENTRYTYPE_SHIFT 42
+#define I40IW_CQPSQ_QHASH_ENTRYTYPE_MASK \
+	((u64)0x7 << I40IW_CQPSQ_QHASH_ENTRYTYPE_SHIFT)
+/* CQP Host Context */
+#define I40IW_CQPHC_EN_DC_TCP_SHIFT 0
+#define I40IW_CQPHC_EN_DC_TCP_MASK (1UL << I40IW_CQPHC_EN_DC_TCP_SHIFT)
+
+#define I40IW_CQPHC_SQSIZE_SHIFT 8
+#define I40IW_CQPHC_SQSIZE_MASK (0xfUL << I40IW_CQPHC_SQSIZE_SHIFT)
+
+#define I40IW_CQPHC_DISABLE_PFPDUS_SHIFT 1
+#define I40IW_CQPHC_DISABLE_PFPDUS_MASK (0x1UL << I40IW_CQPHC_DISABLE_PFPDUS_SHIFT)
+
+#define I40IW_CQPHC_ENABLED_VFS_SHIFT 32
+#define I40IW_CQPHC_ENABLED_VFS_MASK (0x3fULL << I40IW_CQPHC_ENABLED_VFS_SHIFT)
+
+#define I40IW_CQPHC_HMC_PROFILE_SHIFT 0
+#define I40IW_CQPHC_HMC_PROFILE_MASK (0x7ULL << I40IW_CQPHC_HMC_PROFILE_SHIFT)
+
+#define I40IW_CQPHC_SVER_SHIFT 24
+#define I40IW_CQPHC_SVER_MASK (0xffUL << I40IW_CQPHC_SVER_SHIFT)
+
+#define I40IW_CQPHC_SQBASE_SHIFT 9
+#define I40IW_CQPHC_SQBASE_MASK \
+	(0xfffffffffffffeULL << I40IW_CQPHC_SQBASE_SHIFT)
+
+#define I40IW_CQPHC_QPCTX_SHIFT 0
+#define I40IW_CQPHC_QPCTX_MASK  \
+	(0xffffffffffffffffULL << I40IW_CQPHC_QPCTX_SHIFT)
+#define I40IW_CQPHC_SVER        1
+
+#define I40IW_CQP_SW_SQSIZE_4 4
+#define I40IW_CQP_SW_SQSIZE_2048 2048
+
+/* iWARP QP Doorbell shadow area */
+#define I40IW_QP_DBSA_HW_SQ_TAIL_SHIFT 0
+#define I40IW_QP_DBSA_HW_SQ_TAIL_MASK \
+	(0x3fffUL << I40IW_QP_DBSA_HW_SQ_TAIL_SHIFT)
+
+/* Completion Queue Doorbell shadow area */
+#define I40IW_CQ_DBSA_CQEIDX_SHIFT 0
+#define I40IW_CQ_DBSA_CQEIDX_MASK (0xfffffUL << I40IW_CQ_DBSA_CQEIDX_SHIFT)
+
+#define I40IW_CQ_DBSA_SW_CQ_SELECT_SHIFT 0
+#define I40IW_CQ_DBSA_SW_CQ_SELECT_MASK \
+	(0x3fffUL << I40IW_CQ_DBSA_SW_CQ_SELECT_SHIFT)
+
+#define I40IW_CQ_DBSA_ARM_NEXT_SHIFT 14
+#define I40IW_CQ_DBSA_ARM_NEXT_MASK (1UL << I40IW_CQ_DBSA_ARM_NEXT_SHIFT)
+
+#define I40IW_CQ_DBSA_ARM_NEXT_SE_SHIFT 15
+#define I40IW_CQ_DBSA_ARM_NEXT_SE_MASK (1UL << I40IW_CQ_DBSA_ARM_NEXT_SE_SHIFT)
+
+#define I40IW_CQ_DBSA_ARM_SEQ_NUM_SHIFT 16
+#define I40IW_CQ_DBSA_ARM_SEQ_NUM_MASK \
+	(0x3UL << I40IW_CQ_DBSA_ARM_SEQ_NUM_SHIFT)
+
+/* CQP and iWARP Completion Queue */
+#define I40IW_CQ_QPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQ_QPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CCQ_OPRETVAL_SHIFT 0
+#define I40IW_CCQ_OPRETVAL_MASK (0xffffffffUL << I40IW_CCQ_OPRETVAL_SHIFT)
+
+#define I40IW_CQ_MINERR_SHIFT 0
+#define I40IW_CQ_MINERR_MASK (0xffffUL << I40IW_CQ_MINERR_SHIFT)
+
+#define I40IW_CQ_MAJERR_SHIFT 16
+#define I40IW_CQ_MAJERR_MASK (0xffffUL << I40IW_CQ_MAJERR_SHIFT)
+
+#define I40IW_CQ_WQEIDX_SHIFT 32
+#define I40IW_CQ_WQEIDX_MASK (0x3fffULL << I40IW_CQ_WQEIDX_SHIFT)
+
+#define I40IW_CQ_ERROR_SHIFT 55
+#define I40IW_CQ_ERROR_MASK (1ULL << I40IW_CQ_ERROR_SHIFT)
+
+#define I40IW_CQ_SQ_SHIFT 62
+#define I40IW_CQ_SQ_MASK (1ULL << I40IW_CQ_SQ_SHIFT)
+
+#define I40IW_CQ_VALID_SHIFT 63
+#define I40IW_CQ_VALID_MASK (1ULL << I40IW_CQ_VALID_SHIFT)
+
+#define I40IWCQ_PAYLDLEN_SHIFT 0
+#define I40IWCQ_PAYLDLEN_MASK (0xffffffffUL << I40IWCQ_PAYLDLEN_SHIFT)
+
+#define I40IWCQ_TCPSEQNUM_SHIFT 32
+#define I40IWCQ_TCPSEQNUM_MASK (0xffffffffULL << I40IWCQ_TCPSEQNUM_SHIFT)
+
+#define I40IWCQ_INVSTAG_SHIFT 0
+#define I40IWCQ_INVSTAG_MASK (0xffffffffUL << I40IWCQ_INVSTAG_SHIFT)
+
+#define I40IWCQ_QPID_SHIFT 32
+#define I40IWCQ_QPID_MASK (0x3ffffULL << I40IWCQ_QPID_SHIFT)
+
+#define I40IWCQ_PSHDROP_SHIFT 51
+#define I40IWCQ_PSHDROP_MASK (1ULL << I40IWCQ_PSHDROP_SHIFT)
+
+#define I40IWCQ_SRQ_SHIFT 52
+#define I40IWCQ_SRQ_MASK (1ULL << I40IWCQ_SRQ_SHIFT)
+
+#define I40IWCQ_STAG_SHIFT 53
+#define I40IWCQ_STAG_MASK (1ULL << I40IWCQ_STAG_SHIFT)
+
+#define I40IWCQ_SOEVENT_SHIFT 54
+#define I40IWCQ_SOEVENT_MASK (1ULL << I40IWCQ_SOEVENT_SHIFT)
+
+#define I40IWCQ_OP_SHIFT 56
+#define I40IWCQ_OP_MASK (0x3fULL << I40IWCQ_OP_SHIFT)
+
+/* CEQE format */
+#define I40IW_CEQE_CQCTX_SHIFT 0
+#define I40IW_CEQE_CQCTX_MASK   \
+	(0x7fffffffffffffffULL << I40IW_CEQE_CQCTX_SHIFT)
+
+#define I40IW_CEQE_VALID_SHIFT 63
+#define I40IW_CEQE_VALID_MASK (1ULL << I40IW_CEQE_VALID_SHIFT)
+
+/* AEQE format */
+#define I40IW_AEQE_COMPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_AEQE_COMPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_AEQE_QPCQID_SHIFT 0
+#define I40IW_AEQE_QPCQID_MASK (0x3ffffUL << I40IW_AEQE_QPCQID_SHIFT)
+
+#define I40IW_AEQE_WQDESCIDX_SHIFT 18
+#define I40IW_AEQE_WQDESCIDX_MASK (0x3fffULL << I40IW_AEQE_WQDESCIDX_SHIFT)
+
+#define I40IW_AEQE_OVERFLOW_SHIFT 33
+#define I40IW_AEQE_OVERFLOW_MASK (1ULL << I40IW_AEQE_OVERFLOW_SHIFT)
+
+#define I40IW_AEQE_AECODE_SHIFT 34
+#define I40IW_AEQE_AECODE_MASK (0xffffULL << I40IW_AEQE_AECODE_SHIFT)
+
+#define I40IW_AEQE_AESRC_SHIFT 50
+#define I40IW_AEQE_AESRC_MASK (0xfULL << I40IW_AEQE_AESRC_SHIFT)
+
+#define I40IW_AEQE_IWSTATE_SHIFT 54
+#define I40IW_AEQE_IWSTATE_MASK (0x7ULL << I40IW_AEQE_IWSTATE_SHIFT)
+
+#define I40IW_AEQE_TCPSTATE_SHIFT 57
+#define I40IW_AEQE_TCPSTATE_MASK (0xfULL << I40IW_AEQE_TCPSTATE_SHIFT)
+
+#define I40IW_AEQE_Q2DATA_SHIFT 61
+#define I40IW_AEQE_Q2DATA_MASK (0x3ULL << I40IW_AEQE_Q2DATA_SHIFT)
+
+#define I40IW_AEQE_VALID_SHIFT 63
+#define I40IW_AEQE_VALID_MASK (1ULL << I40IW_AEQE_VALID_SHIFT)
+
+/* CQP SQ WQES */
+#define I40IW_QP_TYPE_IWARP     1
+#define I40IW_QP_TYPE_UDA       2
+#define I40IW_QP_TYPE_CQP       4
+
+#define I40IW_CQ_TYPE_IWARP     1
+#define I40IW_CQ_TYPE_ILQ       2
+#define I40IW_CQ_TYPE_IEQ       3
+#define I40IW_CQ_TYPE_CQP       4
+
+#define I40IWQP_TERM_SEND_TERM_AND_FIN          0
+#define I40IWQP_TERM_SEND_TERM_ONLY             1
+#define I40IWQP_TERM_SEND_FIN_ONLY              2
+#define I40IWQP_TERM_DONOT_SEND_TERM_OR_FIN     3
+
+#define I40IW_CQP_OP_CREATE_QP                  0
+#define I40IW_CQP_OP_MODIFY_QP                  0x1
+#define I40IW_CQP_OP_DESTROY_QP                 0x02
+#define I40IW_CQP_OP_CREATE_CQ                  0x03
+#define I40IW_CQP_OP_MODIFY_CQ                  0x04
+#define I40IW_CQP_OP_DESTROY_CQ                 0x05
+#define I40IW_CQP_OP_CREATE_SRQ                 0x06
+#define I40IW_CQP_OP_MODIFY_SRQ                 0x07
+#define I40IW_CQP_OP_DESTROY_SRQ                0x08
+#define I40IW_CQP_OP_ALLOC_STAG                 0x09
+#define I40IW_CQP_OP_REG_MR                     0x0a
+#define I40IW_CQP_OP_QUERY_STAG                 0x0b
+#define I40IW_CQP_OP_REG_SMR                    0x0c
+#define I40IW_CQP_OP_DEALLOC_STAG               0x0d
+#define I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE    0x0e
+#define I40IW_CQP_OP_MANAGE_ARP                 0x0f
+#define I40IW_CQP_OP_MANAGE_VF_PBLE_BP          0x10
+#define I40IW_CQP_OP_MANAGE_PUSH_PAGES          0x11
+#define I40IW_CQP_OP_MANAGE_PE_TEAM             0x12
+#define I40IW_CQP_OP_UPLOAD_CONTEXT             0x13
+#define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14
+#define I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE   0x15
+#define I40IW_CQP_OP_CREATE_CEQ                 0x16
+#define I40IW_CQP_OP_DESTROY_CEQ                0x18
+#define I40IW_CQP_OP_CREATE_AEQ                 0x19
+#define I40IW_CQP_OP_DESTROY_AEQ                0x1b
+#define I40IW_CQP_OP_CREATE_ADDR_VECT           0x1c
+#define I40IW_CQP_OP_MODIFY_ADDR_VECT           0x1d
+#define I40IW_CQP_OP_DESTROY_ADDR_VECT          0x1e
+#define I40IW_CQP_OP_UPDATE_PE_SDS              0x1f
+#define I40IW_CQP_OP_QUERY_FPM_VALUES           0x20
+#define I40IW_CQP_OP_COMMIT_FPM_VALUES          0x21
+#define I40IW_CQP_OP_FLUSH_WQES                 0x22
+#define I40IW_CQP_OP_MANAGE_APBVT               0x23
+#define I40IW_CQP_OP_NOP                        0x24
+#define I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY 0x25
+#define I40IW_CQP_OP_CREATE_UDA_MCAST_GROUP     0x26
+#define I40IW_CQP_OP_MODIFY_UDA_MCAST_GROUP     0x27
+#define I40IW_CQP_OP_DESTROY_UDA_MCAST_GROUP    0x28
+#define I40IW_CQP_OP_SUSPEND_QP                 0x29
+#define I40IW_CQP_OP_RESUME_QP                  0x2a
+#define I40IW_CQP_OP_SHMC_PAGES_ALLOCATED       0x2b
+#define I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE   0x2d
+
+#define I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT 16
+#define I40IW_UDA_QPSQ_NEXT_HEADER_MASK ((u64)0xff << I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT)
+
+#define I40IW_UDA_QPSQ_OPCODE_SHIFT 32
+#define I40IW_UDA_QPSQ_OPCODE_MASK ((u64)0x3f << I40IW_UDA_QPSQ_OPCODE_SHIFT)
+
+#define I40IW_UDA_QPSQ_MACLEN_SHIFT 56
+#define I40IW_UDA_QPSQ_MACLEN_MASK \
+	((u64)0x7f << I40IW_UDA_QPSQ_MACLEN_SHIFT)
+
+#define I40IW_UDA_QPSQ_IPLEN_SHIFT 48
+#define I40IW_UDA_QPSQ_IPLEN_MASK \
+	((u64)0x7f << I40IW_UDA_QPSQ_IPLEN_SHIFT)
+
+#define I40IW_UDA_QPSQ_L4T_SHIFT 30
+#define I40IW_UDA_QPSQ_L4T_MASK \
+	((u64)0x3 << I40IW_UDA_QPSQ_L4T_SHIFT)
+
+#define I40IW_UDA_QPSQ_IIPT_SHIFT 28
+#define I40IW_UDA_QPSQ_IIPT_MASK \
+	((u64)0x3 << I40IW_UDA_QPSQ_IIPT_SHIFT)
+
+#define I40IW_UDA_QPSQ_L4LEN_SHIFT 24
+#define I40IW_UDA_QPSQ_L4LEN_MASK ((u64)0xf << I40IW_UDA_QPSQ_L4LEN_SHIFT)
+
+#define I40IW_UDA_QPSQ_AVIDX_SHIFT 0
+#define I40IW_UDA_QPSQ_AVIDX_MASK ((u64)0xffff << I40IW_UDA_QPSQ_AVIDX_SHIFT)
+
+#define I40IW_UDA_QPSQ_VALID_SHIFT 63
+#define I40IW_UDA_QPSQ_VALID_MASK \
+	((u64)0x1 << I40IW_UDA_QPSQ_VALID_SHIFT)
+
+#define I40IW_UDA_QPSQ_SIGCOMPL_SHIFT 62
+#define I40IW_UDA_QPSQ_SIGCOMPL_MASK ((u64)0x1 << I40IW_UDA_QPSQ_SIGCOMPL_SHIFT)
+
+#define I40IW_UDA_PAYLOADLEN_SHIFT 0
+#define I40IW_UDA_PAYLOADLEN_MASK ((u64)0x3fff << I40IW_UDA_PAYLOADLEN_SHIFT)
+
+#define I40IW_UDA_HDRLEN_SHIFT 16
+#define I40IW_UDA_HDRLEN_MASK ((u64)0x1ff << I40IW_UDA_HDRLEN_SHIFT)
+
+#define I40IW_VLAN_TAG_VALID_SHIFT 50
+#define I40IW_VLAN_TAG_VALID_MASK ((u64)0x1 << I40IW_VLAN_TAG_VALID_SHIFT)
+
+#define I40IW_UDA_L3PROTO_SHIFT 0
+#define I40IW_UDA_L3PROTO_MASK ((u64)0x3 << I40IW_UDA_L3PROTO_SHIFT)
+
+#define I40IW_UDA_L4PROTO_SHIFT 16
+#define I40IW_UDA_L4PROTO_MASK ((u64)0x3 << I40IW_UDA_L4PROTO_SHIFT)
+
+#define I40IW_UDA_QPSQ_DOLOOPBACK_SHIFT 44
+#define I40IW_UDA_QPSQ_DOLOOPBACK_MASK \
+	((u64)0x1 << I40IW_UDA_QPSQ_DOLOOPBACK_SHIFT)
+
+/* CQP SQ WQE common fields */
+#define I40IW_CQPSQ_OPCODE_SHIFT 32
+#define I40IW_CQPSQ_OPCODE_MASK (0x3fULL << I40IW_CQPSQ_OPCODE_SHIFT)
+
+#define I40IW_CQPSQ_WQEVALID_SHIFT 63
+#define I40IW_CQPSQ_WQEVALID_MASK (1ULL << I40IW_CQPSQ_WQEVALID_SHIFT)
+
+#define I40IW_CQPSQ_TPHVAL_SHIFT 0
+#define I40IW_CQPSQ_TPHVAL_MASK (0xffUL << I40IW_CQPSQ_TPHVAL_SHIFT)
+
+#define I40IW_CQPSQ_TPHEN_SHIFT 60
+#define I40IW_CQPSQ_TPHEN_MASK (1ULL << I40IW_CQPSQ_TPHEN_SHIFT)
+
+#define I40IW_CQPSQ_PBUFADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_PBUFADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* Create/Modify/Destroy QP */
+
+#define I40IW_CQPSQ_QP_NEWMSS_SHIFT 32
+#define I40IW_CQPSQ_QP_NEWMSS_MASK (0x3fffULL << I40IW_CQPSQ_QP_NEWMSS_SHIFT)
+
+#define I40IW_CQPSQ_QP_TERMLEN_SHIFT 48
+#define I40IW_CQPSQ_QP_TERMLEN_MASK (0xfULL << I40IW_CQPSQ_QP_TERMLEN_SHIFT)
+
+#define I40IW_CQPSQ_QP_QPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_QP_QPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_QP_QPID_SHIFT 0
+#define I40IW_CQPSQ_QP_QPID_MASK (0x3FFFFUL)
+/* I40IWCQ_QPID_MASK */
+
+#define I40IW_CQPSQ_QP_OP_SHIFT 32
+#define I40IW_CQPSQ_QP_OP_MASK I40IWCQ_OP_MASK
+
+#define I40IW_CQPSQ_QP_ORDVALID_SHIFT 42
+#define I40IW_CQPSQ_QP_ORDVALID_MASK (1ULL << I40IW_CQPSQ_QP_ORDVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_TOECTXVALID_SHIFT 43
+#define I40IW_CQPSQ_QP_TOECTXVALID_MASK \
+	(1ULL << I40IW_CQPSQ_QP_TOECTXVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_CACHEDVARVALID_SHIFT 44
+#define I40IW_CQPSQ_QP_CACHEDVARVALID_MASK      \
+	(1ULL << I40IW_CQPSQ_QP_CACHEDVARVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_VQ_SHIFT 45
+#define I40IW_CQPSQ_QP_VQ_MASK (1ULL << I40IW_CQPSQ_QP_VQ_SHIFT)
+
+#define I40IW_CQPSQ_QP_FORCELOOPBACK_SHIFT 46
+#define I40IW_CQPSQ_QP_FORCELOOPBACK_MASK       \
+	(1ULL << I40IW_CQPSQ_QP_FORCELOOPBACK_SHIFT)
+
+#define I40IW_CQPSQ_QP_CQNUMVALID_SHIFT 47
+#define I40IW_CQPSQ_QP_CQNUMVALID_MASK  \
+	(1ULL << I40IW_CQPSQ_QP_CQNUMVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_QPTYPE_SHIFT 48
+#define I40IW_CQPSQ_QP_QPTYPE_MASK (0x3ULL << I40IW_CQPSQ_QP_QPTYPE_SHIFT)
+
+#define I40IW_CQPSQ_QP_MSSCHANGE_SHIFT 52
+#define I40IW_CQPSQ_QP_MSSCHANGE_MASK (1ULL << I40IW_CQPSQ_QP_MSSCHANGE_SHIFT)
+
+#define I40IW_CQPSQ_QP_STATRSRC_SHIFT 53
+#define I40IW_CQPSQ_QP_STATRSRC_MASK (1ULL << I40IW_CQPSQ_QP_STATRSRC_SHIFT)
+
+#define I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT 54
+#define I40IW_CQPSQ_QP_IGNOREMWBOUND_MASK       \
+	(1ULL << I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT)
+
+#define I40IW_CQPSQ_QP_REMOVEHASHENTRY_SHIFT 55
+#define I40IW_CQPSQ_QP_REMOVEHASHENTRY_MASK     \
+	(1ULL << I40IW_CQPSQ_QP_REMOVEHASHENTRY_SHIFT)
+
+#define I40IW_CQPSQ_QP_TERMACT_SHIFT 56
+#define I40IW_CQPSQ_QP_TERMACT_MASK (0x3ULL << I40IW_CQPSQ_QP_TERMACT_SHIFT)
+
+#define I40IW_CQPSQ_QP_RESETCON_SHIFT 58
+#define I40IW_CQPSQ_QP_RESETCON_MASK (1ULL << I40IW_CQPSQ_QP_RESETCON_SHIFT)
+
+#define I40IW_CQPSQ_QP_ARPTABIDXVALID_SHIFT 59
+#define I40IW_CQPSQ_QP_ARPTABIDXVALID_MASK      \
+	(1ULL << I40IW_CQPSQ_QP_ARPTABIDXVALID_SHIFT)
+
+#define I40IW_CQPSQ_QP_NEXTIWSTATE_SHIFT 60
+#define I40IW_CQPSQ_QP_NEXTIWSTATE_MASK \
+	(0x7ULL << I40IW_CQPSQ_QP_NEXTIWSTATE_SHIFT)
+
+#define I40IW_CQPSQ_QP_DBSHADOWADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_QP_DBSHADOWADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* Create/Modify/Destroy CQ */
+#define I40IW_CQPSQ_CQ_CQSIZE_SHIFT 0
+#define I40IW_CQPSQ_CQ_CQSIZE_MASK (0x3ffffUL << I40IW_CQPSQ_CQ_CQSIZE_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CQCTX_SHIFT 0
+#define I40IW_CQPSQ_CQ_CQCTX_MASK       \
+	(0x7fffffffffffffffULL << I40IW_CQPSQ_CQ_CQCTX_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CQCTX_SHIFT 0
+#define I40IW_CQPSQ_CQ_CQCTX_MASK       \
+	(0x7fffffffffffffffULL << I40IW_CQPSQ_CQ_CQCTX_SHIFT)
+
+#define I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_SHIFT 0
+#define I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_MASK       \
+	(0x3ffff << I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CEQID_SHIFT 24
+#define I40IW_CQPSQ_CQ_CEQID_MASK (0x7fUL << I40IW_CQPSQ_CQ_CEQID_SHIFT)
+
+#define I40IW_CQPSQ_CQ_OP_SHIFT 32
+#define I40IW_CQPSQ_CQ_OP_MASK (0x3fULL << I40IW_CQPSQ_CQ_OP_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CQRESIZE_SHIFT 43
+#define I40IW_CQPSQ_CQ_CQRESIZE_MASK (1ULL << I40IW_CQPSQ_CQ_CQRESIZE_SHIFT)
+
+#define I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT 44
+#define I40IW_CQPSQ_CQ_LPBLSIZE_MASK (3ULL << I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CHKOVERFLOW_SHIFT 46
+#define I40IW_CQPSQ_CQ_CHKOVERFLOW_MASK         \
+	(1ULL << I40IW_CQPSQ_CQ_CHKOVERFLOW_SHIFT)
+
+#define I40IW_CQPSQ_CQ_VIRTMAP_SHIFT 47
+#define I40IW_CQPSQ_CQ_VIRTMAP_MASK (1ULL << I40IW_CQPSQ_CQ_VIRTMAP_SHIFT)
+
+#define I40IW_CQPSQ_CQ_ENCEQEMASK_SHIFT 48
+#define I40IW_CQPSQ_CQ_ENCEQEMASK_MASK  \
+	(1ULL << I40IW_CQPSQ_CQ_ENCEQEMASK_SHIFT)
+
+#define I40IW_CQPSQ_CQ_CEQIDVALID_SHIFT 49
+#define I40IW_CQPSQ_CQ_CEQIDVALID_MASK  \
+	(1ULL << I40IW_CQPSQ_CQ_CEQIDVALID_SHIFT)
+
+#define I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_SHIFT 61
+#define I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_MASK      \
+	(1ULL << I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_SHIFT)
+
+/* Create/Modify/Destroy Shared Receive Queue */
+
+#define I40IW_CQPSQ_SRQ_RQSIZE_SHIFT 0
+#define I40IW_CQPSQ_SRQ_RQSIZE_MASK (0xfUL << I40IW_CQPSQ_SRQ_RQSIZE_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_RQWQESIZE_SHIFT 4
+#define I40IW_CQPSQ_SRQ_RQWQESIZE_MASK \
+	(0x7UL << I40IW_CQPSQ_SRQ_RQWQESIZE_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_SRQLIMIT_SHIFT 32
+#define I40IW_CQPSQ_SRQ_SRQLIMIT_MASK   \
+	(0xfffULL << I40IW_CQPSQ_SRQ_SRQLIMIT_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_SRQCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_SRQ_SRQCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_SRQ_PDID_SHIFT 16
+#define I40IW_CQPSQ_SRQ_PDID_MASK       \
+	(0x7fffULL << I40IW_CQPSQ_SRQ_PDID_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_SRQID_SHIFT 0
+#define I40IW_CQPSQ_SRQ_SRQID_MASK (0x7fffUL << I40IW_CQPSQ_SRQ_SRQID_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_SRQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_SRQ_VIRTMAP_SHIFT I40IW_CQPSQ_CQ_VIRTMAP_SHIFT
+#define I40IW_CQPSQ_SRQ_VIRTMAP_MASK I40IW_CQPSQ_CQ_VIRTMAP_MASK
+
+#define I40IW_CQPSQ_SRQ_TPHEN_SHIFT I40IW_CQPSQ_TPHEN_SHIFT
+#define I40IW_CQPSQ_SRQ_TPHEN_MASK I40IW_CQPSQ_TPHEN_MASK
+
+#define I40IW_CQPSQ_SRQ_ARMLIMITEVENT_SHIFT 61
+#define I40IW_CQPSQ_SRQ_ARMLIMITEVENT_MASK      \
+	(1ULL << I40IW_CQPSQ_SRQ_ARMLIMITEVENT_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_DBSHADOWAREA_SHIFT 6
+#define I40IW_CQPSQ_SRQ_DBSHADOWAREA_MASK       \
+	(0x3ffffffffffffffULL << I40IW_CQPSQ_SRQ_DBSHADOWAREA_SHIFT)
+
+#define I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_MASK      \
+	(0xfffffffUL << I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_SHIFT)
+
+/* Allocate/Register/Register Shared/Deallocate Stag */
+#define I40IW_CQPSQ_STAG_VA_FBO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_STAG_VA_FBO_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_STAG_STAGLEN_SHIFT 0
+#define I40IW_CQPSQ_STAG_STAGLEN_MASK   \
+	(0x3fffffffffffULL << I40IW_CQPSQ_STAG_STAGLEN_SHIFT)
+
+#define I40IW_CQPSQ_STAG_PDID_SHIFT 48
+#define I40IW_CQPSQ_STAG_PDID_MASK (0x7fffULL << I40IW_CQPSQ_STAG_PDID_SHIFT)
+
+#define I40IW_CQPSQ_STAG_KEY_SHIFT 0
+#define I40IW_CQPSQ_STAG_KEY_MASK (0xffUL << I40IW_CQPSQ_STAG_KEY_SHIFT)
+
+#define I40IW_CQPSQ_STAG_IDX_SHIFT 8
+#define I40IW_CQPSQ_STAG_IDX_MASK (0xffffffUL << I40IW_CQPSQ_STAG_IDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_PARENTSTAGIDX_SHIFT 32
+#define I40IW_CQPSQ_STAG_PARENTSTAGIDX_MASK     \
+	(0xffffffULL << I40IW_CQPSQ_STAG_PARENTSTAGIDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_MR_SHIFT 43
+#define I40IW_CQPSQ_STAG_MR_MASK (1ULL << I40IW_CQPSQ_STAG_MR_SHIFT)
+
+#define I40IW_CQPSQ_STAG_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_STAG_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_STAG_HPAGESIZE_SHIFT 46
+#define I40IW_CQPSQ_STAG_HPAGESIZE_MASK \
+	(1ULL << I40IW_CQPSQ_STAG_HPAGESIZE_SHIFT)
+
+#define I40IW_CQPSQ_STAG_ARIGHTS_SHIFT 48
+#define I40IW_CQPSQ_STAG_ARIGHTS_MASK   \
+	(0x1fULL << I40IW_CQPSQ_STAG_ARIGHTS_SHIFT)
+
+#define I40IW_CQPSQ_STAG_REMACCENABLED_SHIFT 53
+#define I40IW_CQPSQ_STAG_REMACCENABLED_MASK     \
+	(1ULL << I40IW_CQPSQ_STAG_REMACCENABLED_SHIFT)
+
+#define I40IW_CQPSQ_STAG_VABASEDTO_SHIFT 59
+#define I40IW_CQPSQ_STAG_VABASEDTO_MASK \
+	(1ULL << I40IW_CQPSQ_STAG_VABASEDTO_SHIFT)
+
+#define I40IW_CQPSQ_STAG_USEHMCFNIDX_SHIFT 60
+#define I40IW_CQPSQ_STAG_USEHMCFNIDX_MASK       \
+	(1ULL << I40IW_CQPSQ_STAG_USEHMCFNIDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_USEPFRID_SHIFT 61
+#define I40IW_CQPSQ_STAG_USEPFRID_MASK  \
+	(1ULL << I40IW_CQPSQ_STAG_USEPFRID_SHIFT)
+
+#define I40IW_CQPSQ_STAG_PBA_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_STAG_PBA_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_STAG_HMCFNIDX_SHIFT 0
+#define I40IW_CQPSQ_STAG_HMCFNIDX_MASK \
+	(0x3fUL << I40IW_CQPSQ_STAG_HMCFNIDX_SHIFT)
+
+#define I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_MASK     \
+	(0xfffffffUL << I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_SHIFT)
+
+/* Query stag */
+#define I40IW_CQPSQ_QUERYSTAG_IDX_SHIFT I40IW_CQPSQ_STAG_IDX_SHIFT
+#define I40IW_CQPSQ_QUERYSTAG_IDX_MASK I40IW_CQPSQ_STAG_IDX_MASK
+
+/* Allocate Local IP Address Entry */
+
+/* Manage Local IP Address Table - MLIPA */
+#define I40IW_CQPSQ_MLIPA_IPV6LO_SHIFT  I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_MLIPA_IPV6LO_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_MLIPA_IPV6HI_SHIFT  I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_MLIPA_IPV6HI_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_MLIPA_IPV4_SHIFT 0
+#define I40IW_CQPSQ_MLIPA_IPV4_MASK \
+	(0xffffffffUL << I40IW_CQPSQ_MLIPA_IPV4_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IPTABLEIDX_SHIFT 0
+#define I40IW_CQPSQ_MLIPA_IPTABLEIDX_MASK       \
+	(0x3fUL << I40IW_CQPSQ_MLIPA_IPTABLEIDX_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IPV4VALID_SHIFT 42
+#define I40IW_CQPSQ_MLIPA_IPV4VALID_MASK        \
+	(1ULL << I40IW_CQPSQ_MLIPA_IPV4VALID_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IPV6VALID_SHIFT 43
+#define I40IW_CQPSQ_MLIPA_IPV6VALID_MASK        \
+	(1ULL << I40IW_CQPSQ_MLIPA_IPV6VALID_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_FREEENTRY_SHIFT 62
+#define I40IW_CQPSQ_MLIPA_FREEENTRY_MASK        \
+	(1ULL << I40IW_CQPSQ_MLIPA_FREEENTRY_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_SHIFT 61
+#define I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_MASK   \
+	(1ULL << I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC0_SHIFT 0
+#define I40IW_CQPSQ_MLIPA_MAC0_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC0_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC1_SHIFT 8
+#define I40IW_CQPSQ_MLIPA_MAC1_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC1_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC2_SHIFT 16
+#define I40IW_CQPSQ_MLIPA_MAC2_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC2_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC3_SHIFT 24
+#define I40IW_CQPSQ_MLIPA_MAC3_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC3_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC4_SHIFT 32
+#define I40IW_CQPSQ_MLIPA_MAC4_MASK (0xffULL << I40IW_CQPSQ_MLIPA_MAC4_SHIFT)
+
+#define I40IW_CQPSQ_MLIPA_MAC5_SHIFT 40
+#define I40IW_CQPSQ_MLIPA_MAC5_MASK (0xffULL << I40IW_CQPSQ_MLIPA_MAC5_SHIFT)
+
+/* Manage ARP Table  - MAT */
+#define I40IW_CQPSQ_MAT_REACHMAX_SHIFT 0
+#define I40IW_CQPSQ_MAT_REACHMAX_MASK   \
+	(0xffffffffUL << I40IW_CQPSQ_MAT_REACHMAX_SHIFT)
+
+#define I40IW_CQPSQ_MAT_MACADDR_SHIFT 0
+#define I40IW_CQPSQ_MAT_MACADDR_MASK    \
+	(0xffffffffffffULL << I40IW_CQPSQ_MAT_MACADDR_SHIFT)
+
+#define I40IW_CQPSQ_MAT_ARPENTRYIDX_SHIFT 0
+#define I40IW_CQPSQ_MAT_ARPENTRYIDX_MASK        \
+	(0xfffUL << I40IW_CQPSQ_MAT_ARPENTRYIDX_SHIFT)
+
+#define I40IW_CQPSQ_MAT_ENTRYVALID_SHIFT 42
+#define I40IW_CQPSQ_MAT_ENTRYVALID_MASK \
+	(1ULL << I40IW_CQPSQ_MAT_ENTRYVALID_SHIFT)
+
+#define I40IW_CQPSQ_MAT_PERMANENT_SHIFT 43
+#define I40IW_CQPSQ_MAT_PERMANENT_MASK  \
+	(1ULL << I40IW_CQPSQ_MAT_PERMANENT_SHIFT)
+
+#define I40IW_CQPSQ_MAT_QUERY_SHIFT 44
+#define I40IW_CQPSQ_MAT_QUERY_MASK (1ULL << I40IW_CQPSQ_MAT_QUERY_SHIFT)
+
+/* Manage VF PBLE Backing Pages - MVPBP*/
+#define I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_SHIFT 0
+#define I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_MASK \
+	(0x3ffULL << I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_FIRST_PD_INX_SHIFT 16
+#define I40IW_CQPSQ_MVPBP_FIRST_PD_INX_MASK \
+	(0x1ffULL << I40IW_CQPSQ_MVPBP_FIRST_PD_INX_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_SD_INX_SHIFT 32
+#define I40IW_CQPSQ_MVPBP_SD_INX_MASK \
+	(0xfffULL << I40IW_CQPSQ_MVPBP_SD_INX_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_INV_PD_ENT_SHIFT 62
+#define I40IW_CQPSQ_MVPBP_INV_PD_ENT_MASK \
+	(0x1ULL << I40IW_CQPSQ_MVPBP_INV_PD_ENT_SHIFT)
+
+#define I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT 3
+#define I40IW_CQPSQ_MVPBP_PD_PLPBA_MASK \
+	(0x1fffffffffffffffULL << I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT)
+
+/* Manage Push Page - MPP */
+#define I40IW_INVALID_PUSH_PAGE_INDEX 0xffff
+
+#define I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT 0
+#define I40IW_CQPSQ_MPP_QS_HANDLE_MASK (0xffffUL << \
+					I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT)
+
+#define I40IW_CQPSQ_MPP_PPIDX_SHIFT 0
+#define I40IW_CQPSQ_MPP_PPIDX_MASK (0x3ffUL << I40IW_CQPSQ_MPP_PPIDX_SHIFT)
+
+#define I40IW_CQPSQ_MPP_FREE_PAGE_SHIFT 62
+#define I40IW_CQPSQ_MPP_FREE_PAGE_MASK (1ULL << I40IW_CQPSQ_MPP_FREE_PAGE_SHIFT)
+
+/* Upload Context - UCTX */
+#define I40IW_CQPSQ_UCTX_QPCTXADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IW_CQPSQ_UCTX_QPCTXADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IW_CQPSQ_UCTX_QPID_SHIFT 0
+#define I40IW_CQPSQ_UCTX_QPID_MASK (0x3ffffUL << I40IW_CQPSQ_UCTX_QPID_SHIFT)
+
+#define I40IW_CQPSQ_UCTX_QPTYPE_SHIFT 48
+#define I40IW_CQPSQ_UCTX_QPTYPE_MASK (0xfULL << I40IW_CQPSQ_UCTX_QPTYPE_SHIFT)
+
+#define I40IW_CQPSQ_UCTX_RAWFORMAT_SHIFT 61
+#define I40IW_CQPSQ_UCTX_RAWFORMAT_MASK \
+	(1ULL << I40IW_CQPSQ_UCTX_RAWFORMAT_SHIFT)
+
+#define I40IW_CQPSQ_UCTX_FREEZEQP_SHIFT 62
+#define I40IW_CQPSQ_UCTX_FREEZEQP_MASK  \
+	(1ULL << I40IW_CQPSQ_UCTX_FREEZEQP_SHIFT)
+
+/* Manage HMC PM Function Table - MHMC */
+#define I40IW_CQPSQ_MHMC_VFIDX_SHIFT 0
+#define I40IW_CQPSQ_MHMC_VFIDX_MASK (0x7fUL << I40IW_CQPSQ_MHMC_VFIDX_SHIFT)
+
+#define I40IW_CQPSQ_MHMC_FREEPMFN_SHIFT 62
+#define I40IW_CQPSQ_MHMC_FREEPMFN_MASK  \
+	(1ULL << I40IW_CQPSQ_MHMC_FREEPMFN_SHIFT)
+
+/* Set HMC Resource Profile - SHMCRP */
+#define I40IW_CQPSQ_SHMCRP_HMC_PROFILE_SHIFT 0
+#define I40IW_CQPSQ_SHMCRP_HMC_PROFILE_MASK \
+	(0x7ULL << I40IW_CQPSQ_SHMCRP_HMC_PROFILE_SHIFT)
+#define I40IW_CQPSQ_SHMCRP_VFNUM_SHIFT 32
+#define I40IW_CQPSQ_SHMCRP_VFNUM_MASK (0x3fULL << I40IW_CQPSQ_SHMCRP_VFNUM_SHIFT)
+
+/* Create/Destroy CEQ */
+#define I40IW_CQPSQ_CEQ_CEQSIZE_SHIFT 0
+#define I40IW_CQPSQ_CEQ_CEQSIZE_MASK \
+	(0x1ffffUL << I40IW_CQPSQ_CEQ_CEQSIZE_SHIFT)
+
+#define I40IW_CQPSQ_CEQ_CEQID_SHIFT 0
+#define I40IW_CQPSQ_CEQ_CEQID_MASK (0x7fUL << I40IW_CQPSQ_CEQ_CEQID_SHIFT)
+
+#define I40IW_CQPSQ_CEQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_CEQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_CEQ_VMAP_SHIFT 47
+#define I40IW_CQPSQ_CEQ_VMAP_MASK (1ULL << I40IW_CQPSQ_CEQ_VMAP_SHIFT)
+
+#define I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_MASK      \
+	(0xfffffffUL << I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_SHIFT)
+
+/* Create/Destroy AEQ */
+#define I40IW_CQPSQ_AEQ_AEQECNT_SHIFT 0
+#define I40IW_CQPSQ_AEQ_AEQECNT_MASK \
+	(0x7ffffUL << I40IW_CQPSQ_AEQ_AEQECNT_SHIFT)
+
+#define I40IW_CQPSQ_AEQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
+#define I40IW_CQPSQ_AEQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
+
+#define I40IW_CQPSQ_AEQ_VMAP_SHIFT 47
+#define I40IW_CQPSQ_AEQ_VMAP_MASK (1ULL << I40IW_CQPSQ_AEQ_VMAP_SHIFT)
+
+#define I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_SHIFT 0
+#define I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_MASK      \
+	(0xfffffffUL << I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_SHIFT)
+
+/* Commit FPM Values - CFPM */
+#define I40IW_CQPSQ_CFPM_HMCFNID_SHIFT 0
+#define I40IW_CQPSQ_CFPM_HMCFNID_MASK (0x3fUL << I40IW_CQPSQ_CFPM_HMCFNID_SHIFT)
+
+/* Flush WQEs - FWQE */
+#define I40IW_CQPSQ_FWQE_AECODE_SHIFT 0
+#define I40IW_CQPSQ_FWQE_AECODE_MASK (0xffffUL << I40IW_CQPSQ_FWQE_AECODE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_AESOURCE_SHIFT 16
+#define I40IW_CQPSQ_FWQE_AESOURCE_MASK \
+	(0xfUL << I40IW_CQPSQ_FWQE_AESOURCE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_RQMNERR_SHIFT 0
+#define I40IW_CQPSQ_FWQE_RQMNERR_MASK \
+	(0xffffUL << I40IW_CQPSQ_FWQE_RQMNERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_RQMJERR_SHIFT 16
+#define I40IW_CQPSQ_FWQE_RQMJERR_MASK \
+	(0xffffUL << I40IW_CQPSQ_FWQE_RQMJERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_SQMNERR_SHIFT 32
+#define I40IW_CQPSQ_FWQE_SQMNERR_MASK   \
+	(0xffffULL << I40IW_CQPSQ_FWQE_SQMNERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_SQMJERR_SHIFT 48
+#define I40IW_CQPSQ_FWQE_SQMJERR_MASK   \
+	(0xffffULL << I40IW_CQPSQ_FWQE_SQMJERR_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_QPID_SHIFT 0
+#define I40IW_CQPSQ_FWQE_QPID_MASK (0x3ffffULL << I40IW_CQPSQ_FWQE_QPID_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_GENERATE_AE_SHIFT 59
+#define I40IW_CQPSQ_FWQE_GENERATE_AE_MASK (1ULL <<      \
+					   I40IW_CQPSQ_FWQE_GENERATE_AE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_USERFLCODE_SHIFT 60
+#define I40IW_CQPSQ_FWQE_USERFLCODE_MASK        \
+	(1ULL << I40IW_CQPSQ_FWQE_USERFLCODE_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_FLUSHSQ_SHIFT 61
+#define I40IW_CQPSQ_FWQE_FLUSHSQ_MASK (1ULL << I40IW_CQPSQ_FWQE_FLUSHSQ_SHIFT)
+
+#define I40IW_CQPSQ_FWQE_FLUSHRQ_SHIFT 62
+#define I40IW_CQPSQ_FWQE_FLUSHRQ_MASK (1ULL << I40IW_CQPSQ_FWQE_FLUSHRQ_SHIFT)
+
+/* Manage Accelerated Port Table - MAPT */
+#define I40IW_CQPSQ_MAPT_PORT_SHIFT 0
+#define I40IW_CQPSQ_MAPT_PORT_MASK (0xffffUL << I40IW_CQPSQ_MAPT_PORT_SHIFT)
+
+#define I40IW_CQPSQ_MAPT_ADDPORT_SHIFT 62
+#define I40IW_CQPSQ_MAPT_ADDPORT_MASK (1ULL << I40IW_CQPSQ_MAPT_ADDPORT_SHIFT)
+
+/* Update Protocol Engine SDs */
+#define I40IW_CQPSQ_UPESD_SDCMD_SHIFT 0
+#define I40IW_CQPSQ_UPESD_SDCMD_MASK (0xffffffffUL << I40IW_CQPSQ_UPESD_SDCMD_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_SDDATALOW_SHIFT 0
+#define I40IW_CQPSQ_UPESD_SDDATALOW_MASK        \
+	(0xffffffffUL << I40IW_CQPSQ_UPESD_SDDATALOW_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_SDDATAHI_SHIFT 32
+#define I40IW_CQPSQ_UPESD_SDDATAHI_MASK \
+	(0xffffffffULL << I40IW_CQPSQ_UPESD_SDDATAHI_SHIFT)
+#define I40IW_CQPSQ_UPESD_HMCFNID_SHIFT 0
+#define I40IW_CQPSQ_UPESD_HMCFNID_MASK  \
+	(0x3fUL << I40IW_CQPSQ_UPESD_HMCFNID_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_ENTRY_VALID_SHIFT 63
+#define I40IW_CQPSQ_UPESD_ENTRY_VALID_MASK      \
+	((u64)1 << I40IW_CQPSQ_UPESD_ENTRY_VALID_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_ENTRY_COUNT_SHIFT 0
+#define I40IW_CQPSQ_UPESD_ENTRY_COUNT_MASK      \
+	(0xfUL << I40IW_CQPSQ_UPESD_ENTRY_COUNT_SHIFT)
+
+#define I40IW_CQPSQ_UPESD_SKIP_ENTRY_SHIFT 7
+#define I40IW_CQPSQ_UPESD_SKIP_ENTRY_MASK       \
+	(0x1UL << I40IW_CQPSQ_UPESD_SKIP_ENTRY_SHIFT)
+
+/* Suspend QP */
+#define I40IW_CQPSQ_SUSPENDQP_QPID_SHIFT 0
+#define I40IW_CQPSQ_SUSPENDQP_QPID_MASK (0x3FFFFUL)
+/* I40IWCQ_QPID_MASK */
+
+/* Resume QP */
+#define I40IW_CQPSQ_RESUMEQP_QSHANDLE_SHIFT 0
+#define I40IW_CQPSQ_RESUMEQP_QSHANDLE_MASK      \
+	(0xffffffffUL << I40IW_CQPSQ_RESUMEQP_QSHANDLE_SHIFT)
+
+#define I40IW_CQPSQ_RESUMEQP_QPID_SHIFT 0
+#define I40IW_CQPSQ_RESUMEQP_QPID_MASK (0x3FFFFUL)
+/* I40IWCQ_QPID_MASK */
+
+/* IW QP Context */
+#define I40IWQPC_DDP_VER_SHIFT 0
+#define I40IWQPC_DDP_VER_MASK (3UL << I40IWQPC_DDP_VER_SHIFT)
+
+#define I40IWQPC_SNAP_SHIFT 2
+#define I40IWQPC_SNAP_MASK (1UL << I40IWQPC_SNAP_SHIFT)
+
+#define I40IWQPC_IPV4_SHIFT 3
+#define I40IWQPC_IPV4_MASK (1UL << I40IWQPC_IPV4_SHIFT)
+
+#define I40IWQPC_NONAGLE_SHIFT 4
+#define I40IWQPC_NONAGLE_MASK (1UL << I40IWQPC_NONAGLE_SHIFT)
+
+#define I40IWQPC_INSERTVLANTAG_SHIFT 5
+#define I40IWQPC_INSERTVLANTAG_MASK (1 << I40IWQPC_INSERTVLANTAG_SHIFT)
+
+#define I40IWQPC_USESRQ_SHIFT 6
+#define I40IWQPC_USESRQ_MASK (1UL << I40IWQPC_USESRQ_SHIFT)
+
+#define I40IWQPC_TIMESTAMP_SHIFT 7
+#define I40IWQPC_TIMESTAMP_MASK (1UL << I40IWQPC_TIMESTAMP_SHIFT)
+
+#define I40IWQPC_RQWQESIZE_SHIFT 8
+#define I40IWQPC_RQWQESIZE_MASK (3UL << I40IWQPC_RQWQESIZE_SHIFT)
+
+#define I40IWQPC_INSERTL2TAG2_SHIFT 11
+#define I40IWQPC_INSERTL2TAG2_MASK (1UL << I40IWQPC_INSERTL2TAG2_SHIFT)
+
+#define I40IWQPC_LIMIT_SHIFT 12
+#define I40IWQPC_LIMIT_MASK (3UL << I40IWQPC_LIMIT_SHIFT)
+
+#define I40IWQPC_DROPOOOSEG_SHIFT 15
+#define I40IWQPC_DROPOOOSEG_MASK (1UL << I40IWQPC_DROPOOOSEG_SHIFT)
+
+#define I40IWQPC_DUPACK_THRESH_SHIFT 16
+#define I40IWQPC_DUPACK_THRESH_MASK (7UL << I40IWQPC_DUPACK_THRESH_SHIFT)
+
+#define I40IWQPC_ERR_RQ_IDX_VALID_SHIFT 19
+#define I40IWQPC_ERR_RQ_IDX_VALID_MASK  (1UL << I40IWQPC_ERR_RQ_IDX_VALID_SHIFT)
+
+#define I40IWQPC_DIS_VLAN_CHECKS_SHIFT 19
+#define I40IWQPC_DIS_VLAN_CHECKS_MASK (7UL << I40IWQPC_DIS_VLAN_CHECKS_SHIFT)
+
+#define I40IWQPC_RCVTPHEN_SHIFT 28
+#define I40IWQPC_RCVTPHEN_MASK (1UL << I40IWQPC_RCVTPHEN_SHIFT)
+
+#define I40IWQPC_XMITTPHEN_SHIFT 29
+#define I40IWQPC_XMITTPHEN_MASK (1ULL << I40IWQPC_XMITTPHEN_SHIFT)
+
+#define I40IWQPC_RQTPHEN_SHIFT 30
+#define I40IWQPC_RQTPHEN_MASK (1UL << I40IWQPC_RQTPHEN_SHIFT)
+
+#define I40IWQPC_SQTPHEN_SHIFT 31
+#define I40IWQPC_SQTPHEN_MASK (1ULL << I40IWQPC_SQTPHEN_SHIFT)
+
+#define I40IWQPC_PPIDX_SHIFT 32
+#define I40IWQPC_PPIDX_MASK (0x3ffULL << I40IWQPC_PPIDX_SHIFT)
+
+#define I40IWQPC_PMENA_SHIFT 47
+#define I40IWQPC_PMENA_MASK (1ULL << I40IWQPC_PMENA_SHIFT)
+
+#define I40IWQPC_RDMAP_VER_SHIFT 62
+#define I40IWQPC_RDMAP_VER_MASK (3ULL << I40IWQPC_RDMAP_VER_SHIFT)
+
+#define I40IWQPC_SQADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_SQADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_RQADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_RQADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_TTL_SHIFT 0
+#define I40IWQPC_TTL_MASK (0xffUL << I40IWQPC_TTL_SHIFT)
+
+#define I40IWQPC_RQSIZE_SHIFT 8
+#define I40IWQPC_RQSIZE_MASK (0xfUL << I40IWQPC_RQSIZE_SHIFT)
+
+#define I40IWQPC_SQSIZE_SHIFT 12
+#define I40IWQPC_SQSIZE_MASK (0xfUL << I40IWQPC_SQSIZE_SHIFT)
+
+#define I40IWQPC_SRCMACADDRIDX_SHIFT 16
+#define I40IWQPC_SRCMACADDRIDX_MASK (0x3fUL << I40IWQPC_SRCMACADDRIDX_SHIFT)
+
+#define I40IWQPC_AVOIDSTRETCHACK_SHIFT 23
+#define I40IWQPC_AVOIDSTRETCHACK_MASK (1UL << I40IWQPC_AVOIDSTRETCHACK_SHIFT)
+
+#define I40IWQPC_TOS_SHIFT 24
+#define I40IWQPC_TOS_MASK (0xffUL << I40IWQPC_TOS_SHIFT)
+
+#define I40IWQPC_SRCPORTNUM_SHIFT 32
+#define I40IWQPC_SRCPORTNUM_MASK (0xffffULL << I40IWQPC_SRCPORTNUM_SHIFT)
+
+#define I40IWQPC_DESTPORTNUM_SHIFT 48
+#define I40IWQPC_DESTPORTNUM_MASK (0xffffULL << I40IWQPC_DESTPORTNUM_SHIFT)
+
+#define I40IWQPC_DESTIPADDR0_SHIFT 32
+#define I40IWQPC_DESTIPADDR0_MASK       \
+	(0xffffffffULL << I40IWQPC_DESTIPADDR0_SHIFT)
+
+#define I40IWQPC_DESTIPADDR1_SHIFT 0
+#define I40IWQPC_DESTIPADDR1_MASK       \
+	(0xffffffffULL << I40IWQPC_DESTIPADDR1_SHIFT)
+
+#define I40IWQPC_DESTIPADDR2_SHIFT 32
+#define I40IWQPC_DESTIPADDR2_MASK       \
+	(0xffffffffULL << I40IWQPC_DESTIPADDR2_SHIFT)
+
+#define I40IWQPC_DESTIPADDR3_SHIFT 0
+#define I40IWQPC_DESTIPADDR3_MASK       \
+	(0xffffffffULL << I40IWQPC_DESTIPADDR3_SHIFT)
+
+#define I40IWQPC_SNDMSS_SHIFT 16
+#define I40IWQPC_SNDMSS_MASK (0x3fffUL << I40IWQPC_SNDMSS_SHIFT)
+
+#define I40IWQPC_VLANTAG_SHIFT 32
+#define I40IWQPC_VLANTAG_MASK (0xffffULL << I40IWQPC_VLANTAG_SHIFT)
+
+#define I40IWQPC_ARPIDX_SHIFT 48
+#define I40IWQPC_ARPIDX_MASK (0xfffULL << I40IWQPC_ARPIDX_SHIFT)
+
+#define I40IWQPC_FLOWLABEL_SHIFT 0
+#define I40IWQPC_FLOWLABEL_MASK (0xfffffUL << I40IWQPC_FLOWLABEL_SHIFT)
+
+#define I40IWQPC_WSCALE_SHIFT 20
+#define I40IWQPC_WSCALE_MASK (1UL << I40IWQPC_WSCALE_SHIFT)
+
+#define I40IWQPC_KEEPALIVE_SHIFT 21
+#define I40IWQPC_KEEPALIVE_MASK (1UL << I40IWQPC_KEEPALIVE_SHIFT)
+
+#define I40IWQPC_IGNORE_TCP_OPT_SHIFT 22
+#define I40IWQPC_IGNORE_TCP_OPT_MASK (1UL << I40IWQPC_IGNORE_TCP_OPT_SHIFT)
+
+#define I40IWQPC_IGNORE_TCP_UNS_OPT_SHIFT 23
+#define I40IWQPC_IGNORE_TCP_UNS_OPT_MASK        \
+	(1UL << I40IWQPC_IGNORE_TCP_UNS_OPT_SHIFT)
+
+#define I40IWQPC_TCPSTATE_SHIFT 28
+#define I40IWQPC_TCPSTATE_MASK (0xfUL << I40IWQPC_TCPSTATE_SHIFT)
+
+#define I40IWQPC_RCVSCALE_SHIFT 32
+#define I40IWQPC_RCVSCALE_MASK (0xfULL << I40IWQPC_RCVSCALE_SHIFT)
+
+#define I40IWQPC_SNDSCALE_SHIFT 40
+#define I40IWQPC_SNDSCALE_MASK (0xfULL << I40IWQPC_SNDSCALE_SHIFT)
+
+#define I40IWQPC_PDIDX_SHIFT 48
+#define I40IWQPC_PDIDX_MASK (0x7fffULL << I40IWQPC_PDIDX_SHIFT)
+
+#define I40IWQPC_KALIVE_TIMER_MAX_PROBES_SHIFT 16
+#define I40IWQPC_KALIVE_TIMER_MAX_PROBES_MASK   \
+	(0xffUL << I40IWQPC_KALIVE_TIMER_MAX_PROBES_SHIFT)
+
+#define I40IWQPC_KEEPALIVE_INTERVAL_SHIFT 24
+#define I40IWQPC_KEEPALIVE_INTERVAL_MASK        \
+	(0xffUL << I40IWQPC_KEEPALIVE_INTERVAL_SHIFT)
+
+#define I40IWQPC_TIMESTAMP_RECENT_SHIFT 0
+#define I40IWQPC_TIMESTAMP_RECENT_MASK  \
+	(0xffffffffUL << I40IWQPC_TIMESTAMP_RECENT_SHIFT)
+
+#define I40IWQPC_TIMESTAMP_AGE_SHIFT 32
+#define I40IWQPC_TIMESTAMP_AGE_MASK     \
+	(0xffffffffULL << I40IWQPC_TIMESTAMP_AGE_SHIFT)
+
+#define I40IWQPC_SNDNXT_SHIFT 0
+#define I40IWQPC_SNDNXT_MASK (0xffffffffUL << I40IWQPC_SNDNXT_SHIFT)
+
+#define I40IWQPC_SNDWND_SHIFT 32
+#define I40IWQPC_SNDWND_MASK (0xffffffffULL << I40IWQPC_SNDWND_SHIFT)
+
+#define I40IWQPC_RCVNXT_SHIFT 0
+#define I40IWQPC_RCVNXT_MASK (0xffffffffUL << I40IWQPC_RCVNXT_SHIFT)
+
+#define I40IWQPC_RCVWND_SHIFT 32
+#define I40IWQPC_RCVWND_MASK (0xffffffffULL << I40IWQPC_RCVWND_SHIFT)
+
+#define I40IWQPC_SNDMAX_SHIFT 0
+#define I40IWQPC_SNDMAX_MASK (0xffffffffUL << I40IWQPC_SNDMAX_SHIFT)
+
+#define I40IWQPC_SNDUNA_SHIFT 32
+#define I40IWQPC_SNDUNA_MASK (0xffffffffULL << I40IWQPC_SNDUNA_SHIFT)
+
+#define I40IWQPC_SRTT_SHIFT 0
+#define I40IWQPC_SRTT_MASK (0xffffffffUL << I40IWQPC_SRTT_SHIFT)
+
+#define I40IWQPC_RTTVAR_SHIFT 32
+#define I40IWQPC_RTTVAR_MASK (0xffffffffULL << I40IWQPC_RTTVAR_SHIFT)
+
+#define I40IWQPC_SSTHRESH_SHIFT 0
+#define I40IWQPC_SSTHRESH_MASK (0xffffffffUL << I40IWQPC_SSTHRESH_SHIFT)
+
+#define I40IWQPC_CWND_SHIFT 32
+#define I40IWQPC_CWND_MASK (0xffffffffULL << I40IWQPC_CWND_SHIFT)
+
+#define I40IWQPC_SNDWL1_SHIFT 0
+#define I40IWQPC_SNDWL1_MASK (0xffffffffUL << I40IWQPC_SNDWL1_SHIFT)
+
+#define I40IWQPC_SNDWL2_SHIFT 32
+#define I40IWQPC_SNDWL2_MASK (0xffffffffULL << I40IWQPC_SNDWL2_SHIFT)
+
+#define I40IWQPC_ERR_RQ_IDX_SHIFT 32
+#define I40IWQPC_ERR_RQ_IDX_MASK  (0x3fffULL << I40IWQPC_ERR_RQ_IDX_SHIFT)
+
+#define I40IWQPC_MAXSNDWND_SHIFT 0
+#define I40IWQPC_MAXSNDWND_MASK (0xffffffffUL << I40IWQPC_MAXSNDWND_SHIFT)
+
+#define I40IWQPC_REXMIT_THRESH_SHIFT 48
+#define I40IWQPC_REXMIT_THRESH_MASK (0x3fULL << I40IWQPC_REXMIT_THRESH_SHIFT)
+
+#define I40IWQPC_TXCQNUM_SHIFT 0
+#define I40IWQPC_TXCQNUM_MASK (0x1ffffUL << I40IWQPC_TXCQNUM_SHIFT)
+
+#define I40IWQPC_RXCQNUM_SHIFT 32
+#define I40IWQPC_RXCQNUM_MASK (0x1ffffULL << I40IWQPC_RXCQNUM_SHIFT)
+
+#define I40IWQPC_Q2ADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_Q2ADDR_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_LASTBYTESENT_SHIFT 0
+#define I40IWQPC_LASTBYTESENT_MASK (0xffUL << I40IWQPC_LASTBYTESENT_SHIFT)
+
+#define I40IWQPC_SRQID_SHIFT 32
+#define I40IWQPC_SRQID_MASK (0xffULL << I40IWQPC_SRQID_SHIFT)
+
+#define I40IWQPC_ORDSIZE_SHIFT 0
+#define I40IWQPC_ORDSIZE_MASK (0x7fUL << I40IWQPC_ORDSIZE_SHIFT)
+
+#define I40IWQPC_IRDSIZE_SHIFT 16
+#define I40IWQPC_IRDSIZE_MASK (0x3UL << I40IWQPC_IRDSIZE_SHIFT)
+
+#define I40IWQPC_WRRDRSPOK_SHIFT 20
+#define I40IWQPC_WRRDRSPOK_MASK (1UL << I40IWQPC_WRRDRSPOK_SHIFT)
+
+#define I40IWQPC_RDOK_SHIFT 21
+#define I40IWQPC_RDOK_MASK (1UL << I40IWQPC_RDOK_SHIFT)
+
+#define I40IWQPC_SNDMARKERS_SHIFT 22
+#define I40IWQPC_SNDMARKERS_MASK (1UL << I40IWQPC_SNDMARKERS_SHIFT)
+
+#define I40IWQPC_BINDEN_SHIFT 23
+#define I40IWQPC_BINDEN_MASK (1UL << I40IWQPC_BINDEN_SHIFT)
+
+#define I40IWQPC_FASTREGEN_SHIFT 24
+#define I40IWQPC_FASTREGEN_MASK (1UL << I40IWQPC_FASTREGEN_SHIFT)
+
+#define I40IWQPC_PRIVEN_SHIFT 25
+#define I40IWQPC_PRIVEN_MASK (1UL << I40IWQPC_PRIVEN_SHIFT)
+
+#define I40IWQPC_LSMMPRESENT_SHIFT 26
+#define I40IWQPC_LSMMPRESENT_MASK (1UL << I40IWQPC_LSMMPRESENT_SHIFT)
+
+#define I40IWQPC_ADJUSTFORLSMM_SHIFT 27
+#define I40IWQPC_ADJUSTFORLSMM_MASK (1UL << I40IWQPC_ADJUSTFORLSMM_SHIFT)
+
+#define I40IWQPC_IWARPMODE_SHIFT 28
+#define I40IWQPC_IWARPMODE_MASK (1UL << I40IWQPC_IWARPMODE_SHIFT)
+
+#define I40IWQPC_RCVMARKERS_SHIFT 29
+#define I40IWQPC_RCVMARKERS_MASK (1UL << I40IWQPC_RCVMARKERS_SHIFT)
+
+#define I40IWQPC_ALIGNHDRS_SHIFT 30
+#define I40IWQPC_ALIGNHDRS_MASK (1UL << I40IWQPC_ALIGNHDRS_SHIFT)
+
+#define I40IWQPC_RCVNOMPACRC_SHIFT 31
+#define I40IWQPC_RCVNOMPACRC_MASK (1UL << I40IWQPC_RCVNOMPACRC_SHIFT)
+
+#define I40IWQPC_RCVMARKOFFSET_SHIFT 33
+#define I40IWQPC_RCVMARKOFFSET_MASK (0x1ffULL << I40IWQPC_RCVMARKOFFSET_SHIFT)
+
+#define I40IWQPC_SNDMARKOFFSET_SHIFT 48
+#define I40IWQPC_SNDMARKOFFSET_MASK (0x1ffULL << I40IWQPC_SNDMARKOFFSET_SHIFT)
+
+#define I40IWQPC_QPCOMPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPC_QPCOMPCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPC_SQTPHVAL_SHIFT 0
+#define I40IWQPC_SQTPHVAL_MASK (0xffUL << I40IWQPC_SQTPHVAL_SHIFT)
+
+#define I40IWQPC_RQTPHVAL_SHIFT 8
+#define I40IWQPC_RQTPHVAL_MASK (0xffUL << I40IWQPC_RQTPHVAL_SHIFT)
+
+#define I40IWQPC_QSHANDLE_SHIFT 16
+#define I40IWQPC_QSHANDLE_MASK (0x3ffUL << I40IWQPC_QSHANDLE_SHIFT)
+
+#define I40IWQPC_EXCEPTION_LAN_QUEUE_SHIFT 32
+#define I40IWQPC_EXCEPTION_LAN_QUEUE_MASK (0xfffULL <<  \
+					   I40IWQPC_EXCEPTION_LAN_QUEUE_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR3_SHIFT 0
+#define I40IWQPC_LOCAL_IPADDR3_MASK \
+	(0xffffffffUL << I40IWQPC_LOCAL_IPADDR3_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR2_SHIFT 32
+#define I40IWQPC_LOCAL_IPADDR2_MASK     \
+	(0xffffffffULL << I40IWQPC_LOCAL_IPADDR2_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR1_SHIFT 0
+#define I40IWQPC_LOCAL_IPADDR1_MASK     \
+	(0xffffffffUL << I40IWQPC_LOCAL_IPADDR1_SHIFT)
+
+#define I40IWQPC_LOCAL_IPADDR0_SHIFT 32
+#define I40IWQPC_LOCAL_IPADDR0_MASK     \
+	(0xffffffffULL << I40IWQPC_LOCAL_IPADDR0_SHIFT)
+
+/* wqe size considering 32 bytes per wqe*/
+#define I40IWQP_SW_MIN_WQSIZE 4		/* 128 bytes */
+#define I40IWQP_SW_MAX_WQSIZE 16384	/* 524288 bytes */
+
+#define I40IWQP_OP_RDMA_WRITE 0
+#define I40IWQP_OP_RDMA_READ 1
+#define I40IWQP_OP_RDMA_SEND 3
+#define I40IWQP_OP_RDMA_SEND_INV 4
+#define I40IWQP_OP_RDMA_SEND_SOL_EVENT 5
+#define I40IWQP_OP_RDMA_SEND_SOL_EVENT_INV 6
+#define I40IWQP_OP_BIND_MW 8
+#define I40IWQP_OP_FAST_REGISTER 9
+#define I40IWQP_OP_LOCAL_INVALIDATE 10
+#define I40IWQP_OP_RDMA_READ_LOC_INV 11
+#define I40IWQP_OP_NOP 12
+
+#define I40IW_RSVD_SHIFT        41
+#define I40IW_RSVD_MASK (0x7fffULL << I40IW_RSVD_SHIFT)
+
+/* iwarp QP SQ WQE common fields */
+#define I40IWQPSQ_OPCODE_SHIFT 32
+#define I40IWQPSQ_OPCODE_MASK (0x3fULL << I40IWQPSQ_OPCODE_SHIFT)
+
+#define I40IWQPSQ_ADDFRAGCNT_SHIFT 38
+#define I40IWQPSQ_ADDFRAGCNT_MASK (0x7ULL << I40IWQPSQ_ADDFRAGCNT_SHIFT)
+
+#define I40IWQPSQ_PUSHWQE_SHIFT 56
+#define I40IWQPSQ_PUSHWQE_MASK (1ULL << I40IWQPSQ_PUSHWQE_SHIFT)
+
+#define I40IWQPSQ_STREAMMODE_SHIFT 58
+#define I40IWQPSQ_STREAMMODE_MASK (1ULL << I40IWQPSQ_STREAMMODE_SHIFT)
+
+#define I40IWQPSQ_WAITFORRCVPDU_SHIFT 59
+#define I40IWQPSQ_WAITFORRCVPDU_MASK (1ULL << I40IWQPSQ_WAITFORRCVPDU_SHIFT)
+
+#define I40IWQPSQ_READFENCE_SHIFT 60
+#define I40IWQPSQ_READFENCE_MASK (1ULL << I40IWQPSQ_READFENCE_SHIFT)
+
+#define I40IWQPSQ_LOCALFENCE_SHIFT 61
+#define I40IWQPSQ_LOCALFENCE_MASK (1ULL << I40IWQPSQ_LOCALFENCE_SHIFT)
+
+#define I40IWQPSQ_SIGCOMPL_SHIFT 62
+#define I40IWQPSQ_SIGCOMPL_MASK (1ULL << I40IWQPSQ_SIGCOMPL_SHIFT)
+
+#define I40IWQPSQ_VALID_SHIFT 63
+#define I40IWQPSQ_VALID_MASK (1ULL << I40IWQPSQ_VALID_SHIFT)
+
+#define I40IWQPSQ_FRAG_TO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_FRAG_TO_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPSQ_FRAG_LEN_SHIFT 0
+#define I40IWQPSQ_FRAG_LEN_MASK (0xffffffffUL << I40IWQPSQ_FRAG_LEN_SHIFT)
+
+#define I40IWQPSQ_FRAG_STAG_SHIFT 32
+#define I40IWQPSQ_FRAG_STAG_MASK (0xffffffffULL << I40IWQPSQ_FRAG_STAG_SHIFT)
+
+#define I40IWQPSQ_REMSTAGINV_SHIFT 0
+#define I40IWQPSQ_REMSTAGINV_MASK (0xffffffffUL << I40IWQPSQ_REMSTAGINV_SHIFT)
+
+#define I40IWQPSQ_INLINEDATAFLAG_SHIFT 57
+#define I40IWQPSQ_INLINEDATAFLAG_MASK (1ULL << I40IWQPSQ_INLINEDATAFLAG_SHIFT)
+
+#define I40IWQPSQ_INLINEDATALEN_SHIFT 48
+#define I40IWQPSQ_INLINEDATALEN_MASK    \
+	(0x7fULL << I40IWQPSQ_INLINEDATALEN_SHIFT)
+
+/* iwarp send with push mode */
+#define I40IWQPSQ_WQDESCIDX_SHIFT 0
+#define I40IWQPSQ_WQDESCIDX_MASK (0x3fffUL << I40IWQPSQ_WQDESCIDX_SHIFT)
+
+/* rdma write */
+#define I40IWQPSQ_REMSTAG_SHIFT 0
+#define I40IWQPSQ_REMSTAG_MASK (0xffffffffUL << I40IWQPSQ_REMSTAG_SHIFT)
+
+#define I40IWQPSQ_REMTO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_REMTO_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* memory window */
+#define I40IWQPSQ_STAGRIGHTS_SHIFT 48
+#define I40IWQPSQ_STAGRIGHTS_MASK (0x1fULL << I40IWQPSQ_STAGRIGHTS_SHIFT)
+
+#define I40IWQPSQ_VABASEDTO_SHIFT 53
+#define I40IWQPSQ_VABASEDTO_MASK (1ULL << I40IWQPSQ_VABASEDTO_SHIFT)
+
+#define I40IWQPSQ_MWLEN_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_MWLEN_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPSQ_PARENTMRSTAG_SHIFT 0
+#define I40IWQPSQ_PARENTMRSTAG_MASK \
+	(0xffffffffUL << I40IWQPSQ_PARENTMRSTAG_SHIFT)
+
+#define I40IWQPSQ_MWSTAG_SHIFT 32
+#define I40IWQPSQ_MWSTAG_MASK (0xffffffffULL << I40IWQPSQ_MWSTAG_SHIFT)
+
+#define I40IWQPSQ_BASEVA_TO_FBO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPSQ_BASEVA_TO_FBO_MASK I40IW_CQPHC_QPCTX_MASK
+
+/* Local Invalidate */
+#define I40IWQPSQ_LOCSTAG_SHIFT 32
+#define I40IWQPSQ_LOCSTAG_MASK (0xffffffffULL << I40IWQPSQ_LOCSTAG_SHIFT)
+
+/* Fast Register */
+#define I40IWQPSQ_STAGKEY_SHIFT 0
+#define I40IWQPSQ_STAGKEY_MASK (0xffUL << I40IWQPSQ_STAGKEY_SHIFT)
+
+#define I40IWQPSQ_STAGINDEX_SHIFT 8
+#define I40IWQPSQ_STAGINDEX_MASK (0xffffffUL << I40IWQPSQ_STAGINDEX_SHIFT)
+
+#define I40IWQPSQ_COPYHOSTPBLS_SHIFT 43
+#define I40IWQPSQ_COPYHOSTPBLS_MASK (1ULL << I40IWQPSQ_COPYHOSTPBLS_SHIFT)
+
+#define I40IWQPSQ_LPBLSIZE_SHIFT 44
+#define I40IWQPSQ_LPBLSIZE_MASK (3ULL << I40IWQPSQ_LPBLSIZE_SHIFT)
+
+#define I40IWQPSQ_HPAGESIZE_SHIFT 46
+#define I40IWQPSQ_HPAGESIZE_MASK (3ULL << I40IWQPSQ_HPAGESIZE_SHIFT)
+
+#define I40IWQPSQ_STAGLEN_SHIFT 0
+#define I40IWQPSQ_STAGLEN_MASK (0x1ffffffffffULL << I40IWQPSQ_STAGLEN_SHIFT)
+
+#define I40IWQPSQ_FIRSTPMPBLIDXLO_SHIFT 48
+#define I40IWQPSQ_FIRSTPMPBLIDXLO_MASK  \
+	(0xffffULL << I40IWQPSQ_FIRSTPMPBLIDXLO_SHIFT)
+
+#define I40IWQPSQ_FIRSTPMPBLIDXHI_SHIFT 0
+#define I40IWQPSQ_FIRSTPMPBLIDXHI_MASK  \
+	(0xfffUL << I40IWQPSQ_FIRSTPMPBLIDXHI_SHIFT)
+
+#define I40IWQPSQ_PBLADDR_SHIFT 12
+#define I40IWQPSQ_PBLADDR_MASK (0xfffffffffffffULL << I40IWQPSQ_PBLADDR_SHIFT)
+
+/*  iwarp QP RQ WQE common fields */
+#define I40IWQPRQ_ADDFRAGCNT_SHIFT I40IWQPSQ_ADDFRAGCNT_SHIFT
+#define I40IWQPRQ_ADDFRAGCNT_MASK I40IWQPSQ_ADDFRAGCNT_MASK
+
+#define I40IWQPRQ_VALID_SHIFT I40IWQPSQ_VALID_SHIFT
+#define I40IWQPRQ_VALID_MASK I40IWQPSQ_VALID_MASK
+
+#define I40IWQPRQ_COMPLCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
+#define I40IWQPRQ_COMPLCTX_MASK I40IW_CQPHC_QPCTX_MASK
+
+#define I40IWQPRQ_FRAG_LEN_SHIFT I40IWQPSQ_FRAG_LEN_SHIFT
+#define I40IWQPRQ_FRAG_LEN_MASK I40IWQPSQ_FRAG_LEN_MASK
+
+#define I40IWQPRQ_STAG_SHIFT I40IWQPSQ_FRAG_STAG_SHIFT
+#define I40IWQPRQ_STAG_MASK I40IWQPSQ_FRAG_STAG_MASK
+
+#define I40IWQPRQ_TO_SHIFT I40IWQPSQ_FRAG_TO_SHIFT
+#define I40IWQPRQ_TO_MASK I40IWQPSQ_FRAG_TO_MASK
+
+/* Query FPM CQP buf */
+#define I40IW_QUERY_FPM_MAX_QPS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_QPS_MASK               \
+	(0x7ffffUL << I40IW_QUERY_FPM_MAX_QPS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_CQS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_CQS_MASK               \
+	(0x3ffffUL << I40IW_QUERY_FPM_MAX_CQS_SHIFT)
+
+#define I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_SHIFT 0
+#define I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_MASK  \
+	(0x3fffUL << I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_PE_SDS_SHIFT 32
+#define I40IW_QUERY_FPM_MAX_PE_SDS_MASK \
+	(0x3fffULL << I40IW_QUERY_FPM_MAX_PE_SDS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_QPS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_QPS_MASK    \
+	(0x7ffffUL << I40IW_QUERY_FPM_MAX_QPS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_CQS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_CQS_MASK    \
+	(0x3ffffUL << I40IW_QUERY_FPM_MAX_CQS_SHIFT)
+
+#define I40IW_QUERY_FPM_MAX_CEQS_SHIFT 0
+#define I40IW_QUERY_FPM_MAX_CEQS_MASK   \
+	(0xffUL << I40IW_QUERY_FPM_MAX_CEQS_SHIFT)
+
+#define I40IW_QUERY_FPM_XFBLOCKSIZE_SHIFT 32
+#define I40IW_QUERY_FPM_XFBLOCKSIZE_MASK        \
+	(0xffffffffULL << I40IW_QUERY_FPM_XFBLOCKSIZE_SHIFT)
+
+#define I40IW_QUERY_FPM_Q1BLOCKSIZE_SHIFT 32
+#define I40IW_QUERY_FPM_Q1BLOCKSIZE_MASK        \
+	(0xffffffffULL << I40IW_QUERY_FPM_Q1BLOCKSIZE_SHIFT)
+
+#define I40IW_QUERY_FPM_HTMULTIPLIER_SHIFT 16
+#define I40IW_QUERY_FPM_HTMULTIPLIER_MASK       \
+	(0xfUL << I40IW_QUERY_FPM_HTMULTIPLIER_SHIFT)
+
+#define I40IW_QUERY_FPM_TIMERBUCKET_SHIFT 32
+#define I40IW_QUERY_FPM_TIMERBUCKET_MASK        \
+	(0xffFFULL << I40IW_QUERY_FPM_TIMERBUCKET_SHIFT)
+
+/* Static HMC pages allocated buf */
+#define I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_SHIFT 0
+#define I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_MASK        \
+	(0x3fUL << I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_SHIFT)
+
+#define I40IW_HW_PAGE_SIZE	4096
+#define I40IW_DONE_COUNT	1000
+#define I40IW_SLEEP_COUNT	10
+
+enum {
+	I40IW_QUEUES_ALIGNMENT_MASK =		(128 - 1),
+	I40IW_AEQ_ALIGNMENT_MASK =		(256 - 1),
+	I40IW_Q2_ALIGNMENT_MASK =		(256 - 1),
+	I40IW_CEQ_ALIGNMENT_MASK =		(256 - 1),
+	I40IW_CQ0_ALIGNMENT_MASK =		(256 - 1),
+	I40IW_HOST_CTX_ALIGNMENT_MASK =		(4 - 1),
+	I40IW_SHADOWAREA_MASK =			(128 - 1),
+	I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK =	0,
+	I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK =	0
+};
+
+enum i40iw_alignment {
+	I40IW_CQP_ALIGNMENT =		0x200,
+	I40IW_AEQ_ALIGNMENT =		0x100,
+	I40IW_CEQ_ALIGNMENT =		0x100,
+	I40IW_CQ0_ALIGNMENT =		0x100,
+	I40IW_SD_BUF_ALIGNMENT =	0x100
+};
+
+#define I40IW_QP_WQE_MIN_SIZE	32
+#define I40IW_QP_WQE_MAX_SIZE	128
+
+#define I40IW_CQE_QTYPE_RQ 0
+#define I40IW_CQE_QTYPE_SQ 1
+
+#define I40IW_RING_INIT(_ring, _size) \
+	{ \
+		(_ring).head = 0; \
+		(_ring).tail = 0; \
+		(_ring).size = (_size); \
+	}
+#define I40IW_RING_GETSIZE(_ring) ((_ring).size)
+#define I40IW_RING_GETCURRENT_HEAD(_ring) ((_ring).head)
+#define I40IW_RING_GETCURRENT_TAIL(_ring) ((_ring).tail)
+
+#define I40IW_RING_MOVE_HEAD(_ring, _retcode) \
+	{ \
+		register u32 size; \
+		size = (_ring).size;  \
+		if (!I40IW_RING_FULL_ERR(_ring)) { \
+			(_ring).head = ((_ring).head + 1) % size; \
+			(_retcode) = 0; \
+		} else { \
+			(_retcode) = I40IW_ERR_RING_FULL; \
+		} \
+	}
+
+#define I40IW_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
+	{ \
+		register u32 size; \
+		size = (_ring).size; \
+		if ((I40IW_RING_WORK_AVAILABLE(_ring) + (_count)) < size) { \
+			(_ring).head = ((_ring).head + (_count)) % size; \
+			(_retcode) = 0; \
+		} else { \
+			(_retcode) = I40IW_ERR_RING_FULL; \
+		} \
+	}
+
+#define I40IW_RING_MOVE_TAIL(_ring) \
+	(_ring).tail = ((_ring).tail + 1) % (_ring).size
+
+#define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
+	(_ring).tail = ((_ring).tail + (_count)) % (_ring).size
+
+#define I40IW_RING_SET_TAIL(_ring, _pos) \
+	(_ring).tail = (_pos) % (_ring).size
+
+#define I40IW_RING_FULL_ERR(_ring) \
+	( \
+		(I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 1))  \
+	)
+
+#define I40IW_ERR_RING_FULL2(_ring) \
+	( \
+		(I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 2))  \
+	)
+
+#define I40IW_ERR_RING_FULL3(_ring) \
+	( \
+		(I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 3))  \
+	)
+
+#define I40IW_RING_MORE_WORK(_ring) \
+	( \
+		(I40IW_RING_WORK_AVAILABLE(_ring) != 0) \
+	)
+
+#define I40IW_RING_WORK_AVAILABLE(_ring) \
+	( \
+		(((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \
+	)
+
+#define I40IW_RING_GET_WQES_AVAILABLE(_ring) \
+	( \
+		((_ring).size - I40IW_RING_WORK_AVAILABLE(_ring) - 1) \
+	)
+
+#define I40IW_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \
+	{ \
+		index = I40IW_RING_GETCURRENT_HEAD(_ring); \
+		I40IW_RING_MOVE_HEAD(_ring, _retcode); \
+	}
+
+/* Async Events codes */
+#define I40IW_AE_AMP_UNALLOCATED_STAG                                   0x0102
+#define I40IW_AE_AMP_INVALID_STAG                                       0x0103
+#define I40IW_AE_AMP_BAD_QP                                             0x0104
+#define I40IW_AE_AMP_BAD_PD                                             0x0105
+#define I40IW_AE_AMP_BAD_STAG_KEY                                       0x0106
+#define I40IW_AE_AMP_BAD_STAG_INDEX                                     0x0107
+#define I40IW_AE_AMP_BOUNDS_VIOLATION                                   0x0108
+#define I40IW_AE_AMP_RIGHTS_VIOLATION                                   0x0109
+#define I40IW_AE_AMP_TO_WRAP                                            0x010a
+#define I40IW_AE_AMP_FASTREG_SHARED                                     0x010b
+#define I40IW_AE_AMP_FASTREG_VALID_STAG                                 0x010c
+#define I40IW_AE_AMP_FASTREG_MW_STAG                                    0x010d
+#define I40IW_AE_AMP_FASTREG_INVALID_RIGHTS                             0x010e
+#define I40IW_AE_AMP_FASTREG_PBL_TABLE_OVERFLOW                         0x010f
+#define I40IW_AE_AMP_FASTREG_INVALID_LENGTH                             0x0110
+#define I40IW_AE_AMP_INVALIDATE_SHARED                                  0x0111
+#define I40IW_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS                 0x0112
+#define I40IW_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS                   0x0113
+#define I40IW_AE_AMP_MWBIND_VALID_STAG                                  0x0114
+#define I40IW_AE_AMP_MWBIND_OF_MR_STAG                                  0x0115
+#define I40IW_AE_AMP_MWBIND_TO_ZERO_BASED_STAG                          0x0116
+#define I40IW_AE_AMP_MWBIND_TO_MW_STAG                                  0x0117
+#define I40IW_AE_AMP_MWBIND_INVALID_RIGHTS                              0x0118
+#define I40IW_AE_AMP_MWBIND_INVALID_BOUNDS                              0x0119
+#define I40IW_AE_AMP_MWBIND_TO_INVALID_PARENT                           0x011a
+#define I40IW_AE_AMP_MWBIND_BIND_DISABLED                               0x011b
+#define I40IW_AE_AMP_WQE_INVALID_PARAMETER                              0x0130
+#define I40IW_AE_BAD_CLOSE                                              0x0201
+#define I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE                                0x0202
+#define I40IW_AE_CQ_OPERATION_ERROR                                     0x0203
+#define I40IW_AE_PRIV_OPERATION_DENIED                                  0x011c
+#define I40IW_AE_RDMA_READ_WHILE_ORD_ZERO                               0x0205
+#define I40IW_AE_STAG_ZERO_INVALID                                      0x0206
+#define I40IW_AE_IB_RREQ_AND_Q1_FULL                                    0x0207
+#define I40IW_AE_SRQ_LIMIT                                              0x0209
+#define I40IW_AE_WQE_UNEXPECTED_OPCODE                                  0x020a
+#define I40IW_AE_WQE_INVALID_PARAMETER                                  0x020b
+#define I40IW_AE_WQE_LSMM_TOO_LONG                                      0x0220
+#define I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN                             0x0301
+#define I40IW_AE_DDP_INVALID_MSN_RANGE_IS_NOT_VALID                     0x0302
+#define I40IW_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER      0x0303
+#define I40IW_AE_DDP_UBE_INVALID_DDP_VERSION                            0x0304
+#define I40IW_AE_DDP_UBE_INVALID_MO                                     0x0305
+#define I40IW_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE                0x0306
+#define I40IW_AE_DDP_UBE_INVALID_QN                                     0x0307
+#define I40IW_AE_DDP_NO_L_BIT                                           0x0308
+#define I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION                        0x0311
+#define I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE                            0x0312
+#define I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST                          0x0313
+#define I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP                    0x0314
+#define I40IW_AE_INVALID_ARP_ENTRY                                      0x0401
+#define I40IW_AE_INVALID_TCP_OPTION_RCVD                                0x0402
+#define I40IW_AE_STALE_ARP_ENTRY                                        0x0403
+#define I40IW_AE_INVALID_WQE_LENGTH                                     0x0404
+#define I40IW_AE_INVALID_MAC_ENTRY                                      0x0405
+#define I40IW_AE_LLP_CLOSE_COMPLETE                                     0x0501
+#define I40IW_AE_LLP_CONNECTION_RESET                                   0x0502
+#define I40IW_AE_LLP_FIN_RECEIVED                                       0x0503
+#define I40IW_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH       0x0504
+#define I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR                             0x0505
+#define I40IW_AE_LLP_SEGMENT_TOO_LARGE                                  0x0506
+#define I40IW_AE_LLP_SEGMENT_TOO_SMALL                                  0x0507
+#define I40IW_AE_LLP_SYN_RECEIVED                                       0x0508
+#define I40IW_AE_LLP_TERMINATE_RECEIVED                                 0x0509
+#define I40IW_AE_LLP_TOO_MANY_RETRIES                                   0x050a
+#define I40IW_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES                         0x050b
+#define I40IW_AE_LLP_DOUBT_REACHABILITY                                 0x050c
+#define I40IW_AE_LLP_RX_VLAN_MISMATCH                                   0x050d
+#define I40IW_AE_RESOURCE_EXHAUSTION                                    0x0520
+#define I40IW_AE_RESET_SENT                                             0x0601
+#define I40IW_AE_TERMINATE_SENT                                         0x0602
+#define I40IW_AE_RESET_NOT_SENT                                         0x0603
+#define I40IW_AE_LCE_QP_CATASTROPHIC                                    0x0700
+#define I40IW_AE_LCE_FUNCTION_CATASTROPHIC                              0x0701
+#define I40IW_AE_LCE_CQ_CATASTROPHIC                                    0x0702
+#define I40IW_AE_UDA_XMIT_FRAG_SEQ                                      0x0800
+#define I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG                                0x0801
+#define I40IW_AE_UDA_XMIT_IPADDR_MISMATCH                               0x0802
+#define I40IW_AE_QP_SUSPEND_COMPLETE                                    0x0900
+
+#define OP_DELETE_LOCAL_MAC_IPADDR_ENTRY        1
+#define OP_CEQ_DESTROY                          2
+#define OP_AEQ_DESTROY                          3
+#define OP_DELETE_ARP_CACHE_ENTRY               4
+#define OP_MANAGE_APBVT_ENTRY                   5
+#define OP_CEQ_CREATE                           6
+#define OP_AEQ_CREATE                           7
+#define OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY         8
+#define OP_ADD_LOCAL_MAC_IPADDR_ENTRY           9
+#define OP_MANAGE_QHASH_TABLE_ENTRY             10
+#define OP_QP_MODIFY                            11
+#define OP_QP_UPLOAD_CONTEXT                    12
+#define OP_CQ_CREATE                            13
+#define OP_CQ_DESTROY                           14
+#define OP_QP_CREATE                            15
+#define OP_QP_DESTROY                           16
+#define OP_ALLOC_STAG                           17
+#define OP_MR_REG_NON_SHARED                    18
+#define OP_DEALLOC_STAG                         19
+#define OP_MW_ALLOC                             20
+#define OP_QP_FLUSH_WQES                        21
+#define OP_ADD_ARP_CACHE_ENTRY                  22
+#define OP_MANAGE_PUSH_PAGE                     23
+#define OP_UPDATE_PE_SDS                        24
+#define OP_MANAGE_HMC_PM_FUNC_TABLE             25
+#define OP_SUSPEND                              26
+#define OP_RESUME                               27
+#define OP_MANAGE_VF_PBLE_BP                    28
+#define OP_QUERY_FPM_VALUES                     29
+#define OP_COMMIT_FPM_VALUES                    30
+#define OP_SIZE_CQP_STAT_ARRAY                  31
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.c b/drivers/infiniband/hw/i40iw/i40iw_hmc.c
new file mode 100644
index 000000000000..5484cbf55f0f
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_hmc.c
@@ -0,0 +1,821 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_vf.h"
+#include "i40iw_virtchnl.h"
+
+/**
+ * i40iw_find_sd_index_limit - finds segment descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @type: type of HMC resources we're searching
+ * @index: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @sd_idx: pointer to return index of the segment descriptor in question
+ * @sd_limit: pointer to return the maximum number of segment descriptors
+ *
+ * This function calculates the segment descriptor index and index limit
+ * for the resource defined by i40iw_hmc_rsrc_type.
+ */
+
+static inline void i40iw_find_sd_index_limit(struct i40iw_hmc_info *hmc_info,
+					     u32 type,
+					     u32 idx,
+					     u32 cnt,
+					     u32 *sd_idx,
+					     u32 *sd_limit)
+{
+	u64 fpm_addr, fpm_limit;
+
+	fpm_addr = hmc_info->hmc_obj[(type)].base +
+			hmc_info->hmc_obj[type].size * idx;
+	fpm_limit = fpm_addr + hmc_info->hmc_obj[type].size * cnt;
+	*sd_idx = (u32)(fpm_addr / I40IW_HMC_DIRECT_BP_SIZE);
+	*sd_limit = (u32)((fpm_limit - 1) / I40IW_HMC_DIRECT_BP_SIZE);
+	*sd_limit += 1;
+}
+
+/**
+ * i40iw_find_pd_index_limit - finds page descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @type: HMC resource type we're examining
+ * @idx: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @pd_index: pointer to return page descriptor index
+ * @pd_limit: pointer to return page descriptor index limit
+ *
+ * Calculates the page descriptor index and index limit for the resource
+ * defined by i40iw_hmc_rsrc_type.
+ */
+
+static inline void i40iw_find_pd_index_limit(struct i40iw_hmc_info *hmc_info,
+					     u32 type,
+					     u32 idx,
+					     u32 cnt,
+					     u32 *pd_idx,
+					     u32 *pd_limit)
+{
+	u64 fpm_adr, fpm_limit;
+
+	fpm_adr = hmc_info->hmc_obj[type].base +
+			hmc_info->hmc_obj[type].size * idx;
+	fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt);
+	*(pd_idx) = (u32)(fpm_adr / I40IW_HMC_PAGED_BP_SIZE);
+	*(pd_limit) = (u32)((fpm_limit - 1) / I40IW_HMC_PAGED_BP_SIZE);
+	*(pd_limit) += 1;
+}
+
+/**
+ * i40iw_set_sd_entry - setup entry for sd programming
+ * @pa: physical addr
+ * @idx: sd index
+ * @type: paged or direct sd
+ * @entry: sd entry ptr
+ */
+static inline void i40iw_set_sd_entry(u64 pa,
+				      u32 idx,
+				      enum i40iw_sd_entry_type type,
+				      struct update_sd_entry *entry)
+{
+	entry->data = pa | (I40IW_HMC_MAX_BP_COUNT << I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |
+			(((type == I40IW_SD_TYPE_PAGED) ? 0 : 1) <<
+				I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT) |
+			(1 << I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT);
+	entry->cmd = (idx | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT) | (1 << 15));
+}
+
+/**
+ * i40iw_clr_sd_entry - setup entry for sd clear
+ * @idx: sd index
+ * @type: paged or direct sd
+ * @entry: sd entry ptr
+ */
+static inline void i40iw_clr_sd_entry(u32 idx, enum i40iw_sd_entry_type type,
+				      struct update_sd_entry *entry)
+{
+	entry->data = (I40IW_HMC_MAX_BP_COUNT <<
+			I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |
+			(((type == I40IW_SD_TYPE_PAGED) ? 0 : 1) <<
+				I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT);
+	entry->cmd = (idx | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT) | (1 << 15));
+}
+
+/**
+ * i40iw_hmc_sd_one - setup 1 sd entry for cqp
+ * @dev: pointer to the device structure
+ * @hmc_fn_id: hmc's function id
+ * @pa: physical addr
+ * @sd_idx: sd index
+ * @type: paged or direct sd
+ * @setsd: flag to set or clear sd
+ */
+enum i40iw_status_code i40iw_hmc_sd_one(struct i40iw_sc_dev *dev,
+					u8 hmc_fn_id,
+					u64 pa, u32 sd_idx,
+					enum i40iw_sd_entry_type type,
+					bool setsd)
+{
+	struct i40iw_update_sds_info sdinfo;
+
+	sdinfo.cnt = 1;
+	sdinfo.hmc_fn_id = hmc_fn_id;
+	if (setsd)
+		i40iw_set_sd_entry(pa, sd_idx, type, sdinfo.entry);
+	else
+		i40iw_clr_sd_entry(sd_idx, type, sdinfo.entry);
+
+	return dev->cqp->process_cqp_sds(dev, &sdinfo);
+}
+
+/**
+ * i40iw_hmc_sd_grp - setup group od sd entries for cqp
+ * @dev: pointer to the device structure
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: sd index
+ * @sd_cnt: number of sd entries
+ * @setsd: flag to set or clear sd
+ */
+static enum i40iw_status_code i40iw_hmc_sd_grp(struct i40iw_sc_dev *dev,
+					       struct i40iw_hmc_info *hmc_info,
+					       u32 sd_index,
+					       u32 sd_cnt,
+					       bool setsd)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+	struct i40iw_update_sds_info sdinfo;
+	u64 pa;
+	u32 i;
+	enum i40iw_status_code ret_code = 0;
+
+	memset(&sdinfo, 0, sizeof(sdinfo));
+	sdinfo.hmc_fn_id = hmc_info->hmc_fn_id;
+	for (i = sd_index; i < sd_index + sd_cnt; i++) {
+		sd_entry = &hmc_info->sd_table.sd_entry[i];
+		if (!sd_entry ||
+		    (!sd_entry->valid && setsd) ||
+		    (sd_entry->valid && !setsd))
+			continue;
+		if (setsd) {
+			pa = (sd_entry->entry_type == I40IW_SD_TYPE_PAGED) ?
+			    sd_entry->u.pd_table.pd_page_addr.pa :
+			    sd_entry->u.bp.addr.pa;
+			i40iw_set_sd_entry(pa, i, sd_entry->entry_type,
+					   &sdinfo.entry[sdinfo.cnt]);
+		} else {
+			i40iw_clr_sd_entry(i, sd_entry->entry_type,
+					   &sdinfo.entry[sdinfo.cnt]);
+		}
+		sdinfo.cnt++;
+		if (sdinfo.cnt == I40IW_MAX_SD_ENTRIES) {
+			ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
+			if (ret_code) {
+				i40iw_debug(dev, I40IW_DEBUG_HMC,
+					    "i40iw_hmc_sd_grp: sd_programming failed err=%d\n",
+					    ret_code);
+				return ret_code;
+			}
+			sdinfo.cnt = 0;
+		}
+	}
+	if (sdinfo.cnt)
+		ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
+
+	return ret_code;
+}
+
+/**
+ * i40iw_vfdev_from_fpm - return vf dev ptr for hmc function id
+ * @dev: pointer to the device structure
+ * @hmc_fn_id: hmc's function id
+ */
+struct i40iw_vfdev *i40iw_vfdev_from_fpm(struct i40iw_sc_dev *dev, u8 hmc_fn_id)
+{
+	struct i40iw_vfdev *vf_dev = NULL;
+	u16 idx;
+
+	for (idx = 0; idx < I40IW_MAX_PE_ENABLED_VF_COUNT; idx++) {
+		if (dev->vf_dev[idx] &&
+		    ((u8)dev->vf_dev[idx]->pmf_index == hmc_fn_id)) {
+			vf_dev = dev->vf_dev[idx];
+			break;
+		}
+	}
+	return vf_dev;
+}
+
+/**
+ * i40iw_vf_hmcinfo_from_fpm - get ptr to hmc for func_id
+ * @dev: pointer to the device structure
+ * @hmc_fn_id: hmc's function id
+ */
+struct i40iw_hmc_info *i40iw_vf_hmcinfo_from_fpm(struct i40iw_sc_dev *dev,
+						 u8 hmc_fn_id)
+{
+	struct i40iw_hmc_info *hmc_info = NULL;
+	u16 idx;
+
+	for (idx = 0; idx < I40IW_MAX_PE_ENABLED_VF_COUNT; idx++) {
+		if (dev->vf_dev[idx] &&
+		    ((u8)dev->vf_dev[idx]->pmf_index == hmc_fn_id)) {
+			hmc_info = &dev->vf_dev[idx]->hmc_info;
+			break;
+		}
+	}
+	return hmc_info;
+}
+
+/**
+ * i40iw_hmc_finish_add_sd_reg - program sd entries for objects
+ * @dev: pointer to the device structure
+ * @info: create obj info
+ */
+static enum i40iw_status_code i40iw_hmc_finish_add_sd_reg(struct i40iw_sc_dev *dev,
+							  struct i40iw_hmc_create_obj_info *info)
+{
+	if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+		return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
+
+	if ((info->start_idx + info->count) >
+			info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+		return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
+
+	if (!info->add_sd_cnt)
+		return 0;
+
+	return i40iw_hmc_sd_grp(dev, info->hmc_info,
+				info->hmc_info->sd_indexes[0],
+				info->add_sd_cnt, true);
+}
+
+/**
+ * i40iw_create_iw_hmc_obj - allocate backing store for hmc objects
+ * @dev: pointer to the device structure
+ * @info: pointer to i40iw_hmc_iw_create_obj_info struct
+ *
+ * This will allocate memory for PDs and backing pages and populate
+ * the sd and pd entries.
+ */
+enum i40iw_status_code i40iw_sc_create_hmc_obj(struct i40iw_sc_dev *dev,
+					       struct i40iw_hmc_create_obj_info *info)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+	u32 sd_idx, sd_lmt;
+	u32 pd_idx = 0, pd_lmt = 0;
+	u32 pd_idx1 = 0, pd_lmt1 = 0;
+	u32 i, j;
+	bool pd_error = false;
+	enum i40iw_status_code ret_code = 0;
+
+	if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+		return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
+
+	if ((info->start_idx + info->count) >
+	    info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s: error type %u, start = %u, req cnt %u, cnt = %u\n",
+			    __func__, info->rsrc_type, info->start_idx, info->count,
+			    info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+		return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
+	}
+
+	if (!dev->is_pf)
+		return i40iw_vchnl_vf_add_hmc_objs(dev, info->rsrc_type, 0, info->count);
+
+	i40iw_find_sd_index_limit(info->hmc_info, info->rsrc_type,
+				  info->start_idx, info->count,
+				  &sd_idx, &sd_lmt);
+	if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+	    sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+		return I40IW_ERR_INVALID_SD_INDEX;
+	}
+	i40iw_find_pd_index_limit(info->hmc_info, info->rsrc_type,
+				  info->start_idx, info->count, &pd_idx, &pd_lmt);
+
+	for (j = sd_idx; j < sd_lmt; j++) {
+		ret_code = i40iw_add_sd_table_entry(dev->hw, info->hmc_info,
+						    j,
+						    info->entry_type,
+						    I40IW_HMC_DIRECT_BP_SIZE);
+		if (ret_code)
+			goto exit_sd_error;
+		sd_entry = &info->hmc_info->sd_table.sd_entry[j];
+
+		if ((sd_entry->entry_type == I40IW_SD_TYPE_PAGED) &&
+		    ((dev->hmc_info == info->hmc_info) &&
+		     (info->rsrc_type != I40IW_HMC_IW_PBLE))) {
+			pd_idx1 = max(pd_idx, (j * I40IW_HMC_MAX_BP_COUNT));
+			pd_lmt1 = min(pd_lmt,
+				      (j + 1) * I40IW_HMC_MAX_BP_COUNT);
+			for (i = pd_idx1; i < pd_lmt1; i++) {
+				/* update the pd table entry */
+				ret_code = i40iw_add_pd_table_entry(dev->hw, info->hmc_info,
+								    i, NULL);
+				if (ret_code) {
+					pd_error = true;
+					break;
+				}
+			}
+			if (pd_error) {
+				while (i && (i > pd_idx1)) {
+					i40iw_remove_pd_bp(dev->hw, info->hmc_info, (i - 1),
+							   info->is_pf);
+					i--;
+				}
+			}
+		}
+		if (sd_entry->valid)
+			continue;
+
+		info->hmc_info->sd_indexes[info->add_sd_cnt] = (u16)j;
+		info->add_sd_cnt++;
+		sd_entry->valid = true;
+	}
+	return i40iw_hmc_finish_add_sd_reg(dev, info);
+
+exit_sd_error:
+	while (j && (j > sd_idx)) {
+		sd_entry = &info->hmc_info->sd_table.sd_entry[j - 1];
+		switch (sd_entry->entry_type) {
+		case I40IW_SD_TYPE_PAGED:
+			pd_idx1 = max(pd_idx,
+				      (j - 1) * I40IW_HMC_MAX_BP_COUNT);
+			pd_lmt1 = min(pd_lmt, (j * I40IW_HMC_MAX_BP_COUNT));
+			for (i = pd_idx1; i < pd_lmt1; i++)
+				i40iw_prep_remove_pd_page(info->hmc_info, i);
+			break;
+		case I40IW_SD_TYPE_DIRECT:
+			i40iw_prep_remove_pd_page(info->hmc_info, (j - 1));
+			break;
+		default:
+			ret_code = I40IW_ERR_INVALID_SD_TYPE;
+			break;
+		}
+		j--;
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_finish_del_sd_reg - delete sd entries for objects
+ * @dev: pointer to the device structure
+ * @info: dele obj info
+ * @reset: true if called before reset
+ */
+static enum i40iw_status_code i40iw_finish_del_sd_reg(struct i40iw_sc_dev *dev,
+						      struct i40iw_hmc_del_obj_info *info,
+						      bool reset)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+	enum i40iw_status_code ret_code = 0;
+	u32 i, sd_idx;
+	struct i40iw_dma_mem *mem;
+
+	if (dev->is_pf && !reset)
+		ret_code = i40iw_hmc_sd_grp(dev, info->hmc_info,
+					    info->hmc_info->sd_indexes[0],
+					    info->del_sd_cnt, false);
+
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error cqp sd sd_grp\n", __func__);
+
+	for (i = 0; i < info->del_sd_cnt; i++) {
+		sd_idx = info->hmc_info->sd_indexes[i];
+		sd_entry = &info->hmc_info->sd_table.sd_entry[sd_idx];
+		if (!sd_entry)
+			continue;
+		mem = (sd_entry->entry_type == I40IW_SD_TYPE_PAGED) ?
+			&sd_entry->u.pd_table.pd_page_addr :
+			&sd_entry->u.bp.addr;
+
+		if (!mem || !mem->va)
+			i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error cqp sd mem\n", __func__);
+		else
+			i40iw_free_dma_mem(dev->hw, mem);
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_del_iw_hmc_obj - remove pe hmc objects
+ * @dev: pointer to the device structure
+ * @info: pointer to i40iw_hmc_del_obj_info struct
+ * @reset: true if called before reset
+ *
+ * This will de-populate the SDs and PDs.  It frees
+ * the memory for PDS and backing storage.  After this function is returned,
+ * caller should deallocate memory allocated previously for
+ * book-keeping information about PDs and backing storage.
+ */
+enum i40iw_status_code i40iw_sc_del_hmc_obj(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_del_obj_info *info,
+					    bool reset)
+{
+	struct i40iw_hmc_pd_table *pd_table;
+	u32 sd_idx, sd_lmt;
+	u32 pd_idx, pd_lmt, rel_pd_idx;
+	u32 i, j;
+	enum i40iw_status_code ret_code = 0;
+
+	if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s: error start_idx[%04d]  >= [type %04d].cnt[%04d]\n",
+			    __func__, info->start_idx, info->rsrc_type,
+			    info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+		return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
+	}
+
+	if ((info->start_idx + info->count) >
+	    info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC,
+			    "%s: error start_idx[%04d] + count %04d  >= [type %04d].cnt[%04d]\n",
+			    __func__, info->start_idx, info->count,
+			    info->rsrc_type,
+			    info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+		return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
+	}
+	if (!dev->is_pf) {
+		ret_code = i40iw_vchnl_vf_del_hmc_obj(dev, info->rsrc_type, 0,
+						      info->count);
+		if (info->rsrc_type != I40IW_HMC_IW_PBLE)
+			return ret_code;
+	}
+
+	i40iw_find_pd_index_limit(info->hmc_info, info->rsrc_type,
+				  info->start_idx, info->count, &pd_idx, &pd_lmt);
+
+	for (j = pd_idx; j < pd_lmt; j++) {
+		sd_idx = j / I40IW_HMC_PD_CNT_IN_SD;
+
+		if (info->hmc_info->sd_table.sd_entry[sd_idx].entry_type !=
+		    I40IW_SD_TYPE_PAGED)
+			continue;
+
+		rel_pd_idx = j % I40IW_HMC_PD_CNT_IN_SD;
+		pd_table = &info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+		if (pd_table->pd_entry[rel_pd_idx].valid) {
+			ret_code = i40iw_remove_pd_bp(dev->hw, info->hmc_info, j,
+						      info->is_pf);
+			if (ret_code) {
+				i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error\n", __func__);
+				return ret_code;
+			}
+		}
+	}
+
+	i40iw_find_sd_index_limit(info->hmc_info, info->rsrc_type,
+				  info->start_idx, info->count, &sd_idx, &sd_lmt);
+	if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+	    sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error invalid sd_idx\n", __func__);
+		return I40IW_ERR_INVALID_SD_INDEX;
+	}
+
+	for (i = sd_idx; i < sd_lmt; i++) {
+		if (!info->hmc_info->sd_table.sd_entry[i].valid)
+			continue;
+		switch (info->hmc_info->sd_table.sd_entry[i].entry_type) {
+		case I40IW_SD_TYPE_DIRECT:
+			ret_code = i40iw_prep_remove_sd_bp(info->hmc_info, i);
+			if (!ret_code) {
+				info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
+				info->del_sd_cnt++;
+			}
+			break;
+		case I40IW_SD_TYPE_PAGED:
+			ret_code = i40iw_prep_remove_pd_page(info->hmc_info, i);
+			if (!ret_code) {
+				info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
+				info->del_sd_cnt++;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	return i40iw_finish_del_sd_reg(dev, info, reset);
+}
+
+/**
+ * i40iw_add_sd_table_entry - Adds a segment descriptor to the table
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: segment descriptor index to manipulate
+ * @type: what type of segment descriptor we're manipulating
+ * @direct_mode_sz: size to alloc in direct mode
+ */
+enum i40iw_status_code i40iw_add_sd_table_entry(struct i40iw_hw *hw,
+						struct i40iw_hmc_info *hmc_info,
+						u32 sd_index,
+						enum i40iw_sd_entry_type type,
+						u64 direct_mode_sz)
+{
+	enum i40iw_status_code ret_code = 0;
+	struct i40iw_hmc_sd_entry *sd_entry;
+	bool dma_mem_alloc_done = false;
+	struct i40iw_dma_mem mem;
+	u64 alloc_len;
+
+	sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
+	if (!sd_entry->valid) {
+		if (type == I40IW_SD_TYPE_PAGED)
+			alloc_len = I40IW_HMC_PAGED_BP_SIZE;
+		else
+			alloc_len = direct_mode_sz;
+
+		/* allocate a 4K pd page or 2M backing page */
+		ret_code = i40iw_allocate_dma_mem(hw, &mem, alloc_len,
+						  I40IW_HMC_PD_BP_BUF_ALIGNMENT);
+		if (ret_code)
+			goto exit;
+		dma_mem_alloc_done = true;
+		if (type == I40IW_SD_TYPE_PAGED) {
+			ret_code = i40iw_allocate_virt_mem(hw,
+							   &sd_entry->u.pd_table.pd_entry_virt_mem,
+							   sizeof(struct i40iw_hmc_pd_entry) * 512);
+			if (ret_code)
+				goto exit;
+			sd_entry->u.pd_table.pd_entry = (struct i40iw_hmc_pd_entry *)
+							 sd_entry->u.pd_table.pd_entry_virt_mem.va;
+
+			memcpy(&sd_entry->u.pd_table.pd_page_addr, &mem, sizeof(struct i40iw_dma_mem));
+		} else {
+			memcpy(&sd_entry->u.bp.addr, &mem, sizeof(struct i40iw_dma_mem));
+			sd_entry->u.bp.sd_pd_index = sd_index;
+		}
+
+		hmc_info->sd_table.sd_entry[sd_index].entry_type = type;
+
+		I40IW_INC_SD_REFCNT(&hmc_info->sd_table);
+	}
+	if (sd_entry->entry_type == I40IW_SD_TYPE_DIRECT)
+		I40IW_INC_BP_REFCNT(&sd_entry->u.bp);
+exit:
+	if (ret_code)
+		if (dma_mem_alloc_done)
+			i40iw_free_dma_mem(hw, &mem);
+
+	return ret_code;
+}
+
+/**
+ * i40iw_add_pd_table_entry - Adds page descriptor to the specified table
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @pd_index: which page descriptor index to manipulate
+ * @rsrc_pg: if not NULL, use preallocated page instead of allocating new one.
+ *
+ * This function:
+ *	1. Initializes the pd entry
+ *	2. Adds pd_entry in the pd_table
+ *	3. Mark the entry valid in i40iw_hmc_pd_entry structure
+ *	4. Initializes the pd_entry's ref count to 1
+ * assumptions:
+ *	1. The memory for pd should be pinned down, physically contiguous and
+ *	   aligned on 4K boundary and zeroed memory.
+ *	2. It should be 4K in size.
+ */
+enum i40iw_status_code i40iw_add_pd_table_entry(struct i40iw_hw *hw,
+						struct i40iw_hmc_info *hmc_info,
+						u32 pd_index,
+						struct i40iw_dma_mem *rsrc_pg)
+{
+	enum i40iw_status_code ret_code = 0;
+	struct i40iw_hmc_pd_table *pd_table;
+	struct i40iw_hmc_pd_entry *pd_entry;
+	struct i40iw_dma_mem mem;
+	struct i40iw_dma_mem *page = &mem;
+	u32 sd_idx, rel_pd_idx;
+	u64 *pd_addr;
+	u64 page_desc;
+
+	if (pd_index / I40IW_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt)
+		return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
+
+	sd_idx = (pd_index / I40IW_HMC_PD_CNT_IN_SD);
+	if (hmc_info->sd_table.sd_entry[sd_idx].entry_type != I40IW_SD_TYPE_PAGED)
+		return 0;
+
+	rel_pd_idx = (pd_index % I40IW_HMC_PD_CNT_IN_SD);
+	pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+	pd_entry = &pd_table->pd_entry[rel_pd_idx];
+	if (!pd_entry->valid) {
+		if (rsrc_pg) {
+			pd_entry->rsrc_pg = true;
+			page = rsrc_pg;
+		} else {
+			ret_code = i40iw_allocate_dma_mem(hw, page,
+							  I40IW_HMC_PAGED_BP_SIZE,
+							  I40IW_HMC_PD_BP_BUF_ALIGNMENT);
+			if (ret_code)
+				return ret_code;
+			pd_entry->rsrc_pg = false;
+		}
+
+		memcpy(&pd_entry->bp.addr, page, sizeof(struct i40iw_dma_mem));
+		pd_entry->bp.sd_pd_index = pd_index;
+		pd_entry->bp.entry_type = I40IW_SD_TYPE_PAGED;
+		page_desc = page->pa | 0x1;
+
+		pd_addr = (u64 *)pd_table->pd_page_addr.va;
+		pd_addr += rel_pd_idx;
+
+		memcpy(pd_addr, &page_desc, sizeof(*pd_addr));
+
+		pd_entry->sd_index = sd_idx;
+		pd_entry->valid = true;
+		I40IW_INC_PD_REFCNT(pd_table);
+		if (hmc_info->hmc_fn_id < I40IW_FIRST_VF_FPM_ID)
+			I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, rel_pd_idx);
+		else if (hw->hmc.hmc_fn_id != hmc_info->hmc_fn_id)
+			I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, rel_pd_idx,
+						   hmc_info->hmc_fn_id);
+	}
+	I40IW_INC_BP_REFCNT(&pd_entry->bp);
+
+	return 0;
+}
+
+/**
+ * i40iw_remove_pd_bp - remove a backing page from a page descriptor
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ * @is_pf: distinguishes a VF from a PF
+ *
+ * This function:
+ *	1. Marks the entry in pd table (for paged address mode) or in sd table
+ *	   (for direct address mode) invalid.
+ *	2. Write to register PMPDINV to invalidate the backing page in FV cache
+ *	3. Decrement the ref count for the pd _entry
+ * assumptions:
+ *	1. Caller can deallocate the memory used by backing storage after this
+ *	   function returns.
+ */
+enum i40iw_status_code i40iw_remove_pd_bp(struct i40iw_hw *hw,
+					  struct i40iw_hmc_info *hmc_info,
+					  u32 idx,
+					  bool is_pf)
+{
+	struct i40iw_hmc_pd_entry *pd_entry;
+	struct i40iw_hmc_pd_table *pd_table;
+	struct i40iw_hmc_sd_entry *sd_entry;
+	u32 sd_idx, rel_pd_idx;
+	struct i40iw_dma_mem *mem;
+	u64 *pd_addr;
+
+	sd_idx = idx / I40IW_HMC_PD_CNT_IN_SD;
+	rel_pd_idx = idx % I40IW_HMC_PD_CNT_IN_SD;
+	if (sd_idx >= hmc_info->sd_table.sd_cnt)
+		return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
+
+	sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
+	if (sd_entry->entry_type != I40IW_SD_TYPE_PAGED)
+		return I40IW_ERR_INVALID_SD_TYPE;
+
+	pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+	pd_entry = &pd_table->pd_entry[rel_pd_idx];
+	I40IW_DEC_BP_REFCNT(&pd_entry->bp);
+	if (pd_entry->bp.ref_cnt)
+		return 0;
+
+	pd_entry->valid = false;
+	I40IW_DEC_PD_REFCNT(pd_table);
+	pd_addr = (u64 *)pd_table->pd_page_addr.va;
+	pd_addr += rel_pd_idx;
+	memset(pd_addr, 0, sizeof(u64));
+	if (is_pf)
+		I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, idx);
+	else
+		I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, idx,
+					   hmc_info->hmc_fn_id);
+
+	if (!pd_entry->rsrc_pg) {
+		mem = &pd_entry->bp.addr;
+		if (!mem || !mem->va)
+			return I40IW_ERR_PARAM;
+		i40iw_free_dma_mem(hw, mem);
+	}
+	if (!pd_table->ref_cnt)
+		i40iw_free_virt_mem(hw, &pd_table->pd_entry_virt_mem);
+
+	return 0;
+}
+
+/**
+ * i40iw_prep_remove_sd_bp - Prepares to remove a backing page from a sd entry
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ */
+enum i40iw_status_code i40iw_prep_remove_sd_bp(struct i40iw_hmc_info *hmc_info, u32 idx)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+
+	sd_entry = &hmc_info->sd_table.sd_entry[idx];
+	I40IW_DEC_BP_REFCNT(&sd_entry->u.bp);
+	if (sd_entry->u.bp.ref_cnt)
+		return I40IW_ERR_NOT_READY;
+
+	I40IW_DEC_SD_REFCNT(&hmc_info->sd_table);
+	sd_entry->valid = false;
+
+	return 0;
+}
+
+/**
+ * i40iw_prep_remove_pd_page - Prepares to remove a PD page from sd entry.
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ */
+enum i40iw_status_code i40iw_prep_remove_pd_page(struct i40iw_hmc_info *hmc_info,
+						 u32 idx)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+
+	sd_entry = &hmc_info->sd_table.sd_entry[idx];
+
+	if (sd_entry->u.pd_table.ref_cnt)
+		return I40IW_ERR_NOT_READY;
+
+	sd_entry->valid = false;
+	I40IW_DEC_SD_REFCNT(&hmc_info->sd_table);
+
+	return 0;
+}
+
+/**
+ * i40iw_pf_init_vfhmc -
+ * @vf_cnt_array: array of cnt values of iwarp hmc objects
+ * @vf_hmc_fn_id: hmc function id ofr vf driver
+ * @dev: pointer to i40iw_dev struct
+ *
+ * Called by pf driver to initialize hmc_info for vf driver instance.
+ */
+enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev,
+					   u8 vf_hmc_fn_id,
+					   u32 *vf_cnt_array)
+{
+	struct i40iw_hmc_info *hmc_info;
+	enum i40iw_status_code ret_code = 0;
+	u32 i;
+
+	if ((vf_hmc_fn_id < I40IW_FIRST_VF_FPM_ID) ||
+	    (vf_hmc_fn_id >= I40IW_FIRST_VF_FPM_ID +
+	     I40IW_MAX_PE_ENABLED_VF_COUNT)) {
+		i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: invalid vf_hmc_fn_id  0x%x\n",
+			    __func__, vf_hmc_fn_id);
+		return I40IW_ERR_INVALID_HMCFN_ID;
+	}
+
+	ret_code = i40iw_sc_init_iw_hmc(dev, vf_hmc_fn_id);
+	if (ret_code)
+		return ret_code;
+
+	hmc_info = i40iw_vf_hmcinfo_from_fpm(dev, vf_hmc_fn_id);
+
+	for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
+		if (vf_cnt_array)
+			hmc_info->hmc_obj[i].cnt =
+			    vf_cnt_array[i - I40IW_HMC_IW_QP];
+		else
+			hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.h b/drivers/infiniband/hw/i40iw/i40iw_hmc.h
new file mode 100644
index 000000000000..4c3fdd875621
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_hmc.h
@@ -0,0 +1,241 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_HMC_H
+#define I40IW_HMC_H
+
+#include "i40iw_d.h"
+
+struct i40iw_hw;
+enum i40iw_status_code;
+
+#define I40IW_HMC_MAX_BP_COUNT 512
+#define I40IW_MAX_SD_ENTRIES 11
+#define I40IW_HW_DBG_HMC_INVALID_BP_MARK     0xCA
+
+#define I40IW_HMC_INFO_SIGNATURE	0x484D5347
+#define I40IW_HMC_PD_CNT_IN_SD		512
+#define I40IW_HMC_DIRECT_BP_SIZE	0x200000
+#define I40IW_HMC_MAX_SD_COUNT		4096
+#define I40IW_HMC_PAGED_BP_SIZE		4096
+#define I40IW_HMC_PD_BP_BUF_ALIGNMENT	4096
+#define I40IW_FIRST_VF_FPM_ID		16
+#define FPM_MULTIPLIER			1024
+
+#define I40IW_INC_SD_REFCNT(sd_table)   ((sd_table)->ref_cnt++)
+#define I40IW_INC_PD_REFCNT(pd_table)   ((pd_table)->ref_cnt++)
+#define I40IW_INC_BP_REFCNT(bp)         ((bp)->ref_cnt++)
+
+#define I40IW_DEC_SD_REFCNT(sd_table)   ((sd_table)->ref_cnt--)
+#define I40IW_DEC_PD_REFCNT(pd_table)   ((pd_table)->ref_cnt--)
+#define I40IW_DEC_BP_REFCNT(bp)         ((bp)->ref_cnt--)
+
+/**
+ * I40IW_INVALIDATE_PF_HMC_PD - Invalidates the pd cache in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_idx: segment descriptor index
+ * @pd_idx: page descriptor index
+ */
+#define I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, pd_idx)                  \
+	i40iw_wr32((hw), I40E_PFHMC_PDINV,                                    \
+		(((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |             \
+		(0x1 << I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT) | \
+		((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
+
+/**
+ * I40IW_INVALIDATE_VF_HMC_PD - Invalidates the pd cache in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_idx: segment descriptor index
+ * @pd_idx: page descriptor index
+ * @hmc_fn_id: VF's function id
+ */
+#define I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, pd_idx, hmc_fn_id)        \
+	i40iw_wr32(hw, I40E_GLHMC_VFPDINV(hmc_fn_id - I40IW_FIRST_VF_FPM_ID),  \
+	     ((sd_idx << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |              \
+	      (pd_idx << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
+
+struct i40iw_hmc_obj_info {
+	u64 base;
+	u32 max_cnt;
+	u32 cnt;
+	u64 size;
+};
+
+enum i40iw_sd_entry_type {
+	I40IW_SD_TYPE_INVALID = 0,
+	I40IW_SD_TYPE_PAGED = 1,
+	I40IW_SD_TYPE_DIRECT = 2
+};
+
+struct i40iw_hmc_bp {
+	enum i40iw_sd_entry_type entry_type;
+	struct i40iw_dma_mem addr;
+	u32 sd_pd_index;
+	u32 ref_cnt;
+};
+
+struct i40iw_hmc_pd_entry {
+	struct i40iw_hmc_bp bp;
+	u32 sd_index;
+	bool rsrc_pg;
+	bool valid;
+};
+
+struct i40iw_hmc_pd_table {
+	struct i40iw_dma_mem pd_page_addr;
+	struct i40iw_hmc_pd_entry *pd_entry;
+	struct i40iw_virt_mem pd_entry_virt_mem;
+	u32 ref_cnt;
+	u32 sd_index;
+};
+
+struct i40iw_hmc_sd_entry {
+	enum i40iw_sd_entry_type entry_type;
+	bool valid;
+
+	union {
+		struct i40iw_hmc_pd_table pd_table;
+		struct i40iw_hmc_bp bp;
+	} u;
+};
+
+struct i40iw_hmc_sd_table {
+	struct i40iw_virt_mem addr;
+	u32 sd_cnt;
+	u32 ref_cnt;
+	struct i40iw_hmc_sd_entry *sd_entry;
+};
+
+struct i40iw_hmc_info {
+	u32 signature;
+	u8 hmc_fn_id;
+	u16 first_sd_index;
+
+	struct i40iw_hmc_obj_info *hmc_obj;
+	struct i40iw_virt_mem hmc_obj_virt_mem;
+	struct i40iw_hmc_sd_table sd_table;
+	u16 sd_indexes[I40IW_HMC_MAX_SD_COUNT];
+};
+
+struct update_sd_entry {
+	u64 cmd;
+	u64 data;
+};
+
+struct i40iw_update_sds_info {
+	u32 cnt;
+	u8 hmc_fn_id;
+	struct update_sd_entry entry[I40IW_MAX_SD_ENTRIES];
+};
+
+struct i40iw_ccq_cqe_info;
+struct i40iw_hmc_fcn_info {
+	void (*callback_fcn)(struct i40iw_sc_dev *, void *,
+			     struct i40iw_ccq_cqe_info *);
+	void *cqp_callback_param;
+	u32 vf_id;
+	u16 iw_vf_idx;
+	bool free_fcn;
+};
+
+enum i40iw_hmc_rsrc_type {
+	I40IW_HMC_IW_QP = 0,
+	I40IW_HMC_IW_CQ = 1,
+	I40IW_HMC_IW_SRQ = 2,
+	I40IW_HMC_IW_HTE = 3,
+	I40IW_HMC_IW_ARP = 4,
+	I40IW_HMC_IW_APBVT_ENTRY = 5,
+	I40IW_HMC_IW_MR = 6,
+	I40IW_HMC_IW_XF = 7,
+	I40IW_HMC_IW_XFFL = 8,
+	I40IW_HMC_IW_Q1 = 9,
+	I40IW_HMC_IW_Q1FL = 10,
+	I40IW_HMC_IW_TIMER = 11,
+	I40IW_HMC_IW_FSIMC = 12,
+	I40IW_HMC_IW_FSIAV = 13,
+	I40IW_HMC_IW_PBLE = 14,
+	I40IW_HMC_IW_MAX = 15,
+};
+
+struct i40iw_hmc_create_obj_info {
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_virt_mem add_sd_virt_mem;
+	u32 rsrc_type;
+	u32 start_idx;
+	u32 count;
+	u32 add_sd_cnt;
+	enum i40iw_sd_entry_type entry_type;
+	bool is_pf;
+};
+
+struct i40iw_hmc_del_obj_info {
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_virt_mem del_sd_virt_mem;
+	u32 rsrc_type;
+	u32 start_idx;
+	u32 count;
+	u32 del_sd_cnt;
+	bool is_pf;
+};
+
+enum i40iw_status_code i40iw_copy_dma_mem(struct i40iw_hw *hw, void *dest_buf,
+					  struct i40iw_dma_mem *src_mem, u64 src_offset, u64 size);
+enum i40iw_status_code i40iw_sc_create_hmc_obj(struct i40iw_sc_dev *dev,
+					       struct i40iw_hmc_create_obj_info *info);
+enum i40iw_status_code i40iw_sc_del_hmc_obj(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_del_obj_info *info,
+					    bool reset);
+enum i40iw_status_code i40iw_hmc_sd_one(struct i40iw_sc_dev *dev, u8 hmc_fn_id,
+					u64 pa, u32 sd_idx, enum i40iw_sd_entry_type type,
+					bool setsd);
+enum i40iw_status_code i40iw_update_sds_noccq(struct i40iw_sc_dev *dev,
+					      struct i40iw_update_sds_info *info);
+struct i40iw_vfdev *i40iw_vfdev_from_fpm(struct i40iw_sc_dev *dev, u8 hmc_fn_id);
+struct i40iw_hmc_info *i40iw_vf_hmcinfo_from_fpm(struct i40iw_sc_dev *dev,
+						 u8 hmc_fn_id);
+enum i40iw_status_code i40iw_add_sd_table_entry(struct i40iw_hw *hw,
+						struct i40iw_hmc_info *hmc_info, u32 sd_index,
+						enum i40iw_sd_entry_type type, u64 direct_mode_sz);
+enum i40iw_status_code i40iw_add_pd_table_entry(struct i40iw_hw *hw,
+						struct i40iw_hmc_info *hmc_info, u32 pd_index,
+						struct i40iw_dma_mem *rsrc_pg);
+enum i40iw_status_code i40iw_remove_pd_bp(struct i40iw_hw *hw,
+					  struct i40iw_hmc_info *hmc_info, u32 idx, bool is_pf);
+enum i40iw_status_code i40iw_prep_remove_sd_bp(struct i40iw_hmc_info *hmc_info, u32 idx);
+enum i40iw_status_code i40iw_prep_remove_pd_page(struct i40iw_hmc_info *hmc_info, u32 idx);
+
+#define     ENTER_SHARED_FUNCTION()
+#define     EXIT_SHARED_FUNCTION()
+
+#endif				/* I40IW_HMC_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
new file mode 100644
index 000000000000..9fd302425563
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -0,0 +1,730 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+
+#include "i40iw.h"
+
+/**
+ * i40iw_initialize_hw_resources - initialize hw resource during open
+ * @iwdev: iwarp device
+ */
+u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev)
+{
+	unsigned long num_pds;
+	u32 resources_size;
+	u32 max_mr;
+	u32 max_qp;
+	u32 max_cq;
+	u32 arp_table_size;
+	u32 mrdrvbits;
+	void *resource_ptr;
+
+	max_qp = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt;
+	max_cq = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+	max_mr = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt;
+	arp_table_size = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt;
+	iwdev->max_cqe = 0xFFFFF;
+	num_pds = max_qp * 4;
+	resources_size = sizeof(struct i40iw_arp_entry) * arp_table_size;
+	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
+	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
+	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq);
+	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds);
+	resources_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size);
+	resources_size += sizeof(struct i40iw_qp **) * max_qp;
+	iwdev->mem_resources = kzalloc(resources_size, GFP_KERNEL);
+
+	if (!iwdev->mem_resources)
+		return -ENOMEM;
+
+	iwdev->max_qp = max_qp;
+	iwdev->max_mr = max_mr;
+	iwdev->max_cq = max_cq;
+	iwdev->max_pd = num_pds;
+	iwdev->arp_table_size = arp_table_size;
+	iwdev->arp_table = (struct i40iw_arp_entry *)iwdev->mem_resources;
+	resource_ptr = iwdev->mem_resources + (sizeof(struct i40iw_arp_entry) * arp_table_size);
+
+	iwdev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
+	    IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_MGT_EXTENSIONS;
+
+	iwdev->allocated_qps = resource_ptr;
+	iwdev->allocated_cqs = &iwdev->allocated_qps[BITS_TO_LONGS(max_qp)];
+	iwdev->allocated_mrs = &iwdev->allocated_cqs[BITS_TO_LONGS(max_cq)];
+	iwdev->allocated_pds = &iwdev->allocated_mrs[BITS_TO_LONGS(max_mr)];
+	iwdev->allocated_arps = &iwdev->allocated_pds[BITS_TO_LONGS(num_pds)];
+	iwdev->qp_table = (struct i40iw_qp **)(&iwdev->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
+	set_bit(0, iwdev->allocated_mrs);
+	set_bit(0, iwdev->allocated_qps);
+	set_bit(0, iwdev->allocated_cqs);
+	set_bit(0, iwdev->allocated_pds);
+	set_bit(0, iwdev->allocated_arps);
+
+	/* Following for ILQ/IEQ */
+	set_bit(1, iwdev->allocated_qps);
+	set_bit(1, iwdev->allocated_cqs);
+	set_bit(1, iwdev->allocated_pds);
+	set_bit(2, iwdev->allocated_cqs);
+	set_bit(2, iwdev->allocated_pds);
+
+	spin_lock_init(&iwdev->resource_lock);
+	mrdrvbits = 24 - get_count_order(iwdev->max_mr);
+	iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits));
+	return 0;
+}
+
+/**
+ * i40iw_cqp_ce_handler - handle cqp completions
+ * @iwdev: iwarp device
+ * @arm: flag to arm after completions
+ * @cq: cq for cqp completions
+ */
+static void i40iw_cqp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq, bool arm)
+{
+	struct i40iw_cqp_request *cqp_request;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	u32 cqe_count = 0;
+	struct i40iw_ccq_cqe_info info;
+	int ret;
+
+	do {
+		memset(&info, 0, sizeof(info));
+		ret = dev->ccq_ops->ccq_get_cqe_info(cq, &info);
+		if (ret)
+			break;
+		cqp_request = (struct i40iw_cqp_request *)(unsigned long)info.scratch;
+		if (info.error)
+			i40iw_pr_err("opcode = 0x%x maj_err_code = 0x%x min_err_code = 0x%x\n",
+				     info.op_code, info.maj_err_code, info.min_err_code);
+		if (cqp_request) {
+			cqp_request->compl_info.maj_err_code = info.maj_err_code;
+			cqp_request->compl_info.min_err_code = info.min_err_code;
+			cqp_request->compl_info.op_ret_val = info.op_ret_val;
+			cqp_request->compl_info.error = info.error;
+
+			if (cqp_request->waiting) {
+				cqp_request->request_done = true;
+				wake_up(&cqp_request->waitq);
+				i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+			} else {
+				if (cqp_request->callback_fcn)
+					cqp_request->callback_fcn(cqp_request, 1);
+				i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+			}
+		}
+
+		cqe_count++;
+	} while (1);
+
+	if (arm && cqe_count) {
+		i40iw_process_bh(dev);
+		dev->ccq_ops->ccq_arm(cq);
+	}
+}
+
+/**
+ * i40iw_iwarp_ce_handler - handle iwarp completions
+ * @iwdev: iwarp device
+ * @iwcp: iwarp cq receiving event
+ */
+static void i40iw_iwarp_ce_handler(struct i40iw_device *iwdev,
+				   struct i40iw_sc_cq *iwcq)
+{
+	struct i40iw_cq *i40iwcq = iwcq->back_cq;
+
+	if (i40iwcq->ibcq.comp_handler)
+		i40iwcq->ibcq.comp_handler(&i40iwcq->ibcq,
+					   i40iwcq->ibcq.cq_context);
+}
+
+/**
+ * i40iw_puda_ce_handler - handle puda completion events
+ * @iwdev: iwarp device
+ * @cq: puda completion q for event
+ */
+static void i40iw_puda_ce_handler(struct i40iw_device *iwdev,
+				  struct i40iw_sc_cq *cq)
+{
+	struct i40iw_sc_dev *dev = (struct i40iw_sc_dev *)&iwdev->sc_dev;
+	enum i40iw_status_code status;
+	u32 compl_error;
+
+	do {
+		status = i40iw_puda_poll_completion(dev, cq, &compl_error);
+		if (status == I40IW_ERR_QUEUE_EMPTY)
+			break;
+		if (status) {
+			i40iw_pr_err("puda  status = %d\n", status);
+			break;
+		}
+		if (compl_error) {
+			i40iw_pr_err("puda compl_err  =0x%x\n", compl_error);
+			break;
+		}
+	} while (1);
+
+	dev->ccq_ops->ccq_arm(cq);
+}
+
+/**
+ * i40iw_process_ceq - handle ceq for completions
+ * @iwdev: iwarp device
+ * @ceq: ceq having cq for completion
+ */
+void i40iw_process_ceq(struct i40iw_device *iwdev, struct i40iw_ceq *ceq)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_sc_ceq *sc_ceq;
+	struct i40iw_sc_cq *cq;
+	bool arm = true;
+
+	sc_ceq = &ceq->sc_ceq;
+	do {
+		cq = dev->ceq_ops->process_ceq(dev, sc_ceq);
+		if (!cq)
+			break;
+
+		if (cq->cq_type == I40IW_CQ_TYPE_CQP)
+			i40iw_cqp_ce_handler(iwdev, cq, arm);
+		else if (cq->cq_type == I40IW_CQ_TYPE_IWARP)
+			i40iw_iwarp_ce_handler(iwdev, cq);
+		else if ((cq->cq_type == I40IW_CQ_TYPE_ILQ) ||
+			 (cq->cq_type == I40IW_CQ_TYPE_IEQ))
+			i40iw_puda_ce_handler(iwdev, cq);
+	} while (1);
+}
+
+/**
+ * i40iw_next_iw_state - modify qp state
+ * @iwqp: iwarp qp to modify
+ * @state: next state for qp
+ * @del_hash: del hash
+ * @term: term message
+ * @termlen: length of term message
+ */
+void i40iw_next_iw_state(struct i40iw_qp *iwqp,
+			 u8 state,
+			 u8 del_hash,
+			 u8 term,
+			 u8 termlen)
+{
+	struct i40iw_modify_qp_info info;
+
+	memset(&info, 0, sizeof(info));
+	info.next_iwarp_state = state;
+	info.remove_hash_idx = del_hash;
+	info.cq_num_valid = true;
+	info.arp_cache_idx_valid = true;
+	info.dont_send_term = true;
+	info.dont_send_fin = true;
+	info.termlen = termlen;
+
+	if (term & I40IWQP_TERM_SEND_TERM_ONLY)
+		info.dont_send_term = false;
+	if (term & I40IWQP_TERM_SEND_FIN_ONLY)
+		info.dont_send_fin = false;
+	if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR))
+		info.reset_tcp_conn = true;
+	i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
+}
+
+/**
+ * i40iw_process_aeq - handle aeq events
+ * @iwdev: iwarp device
+ */
+void i40iw_process_aeq(struct i40iw_device *iwdev)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_aeq *aeq = &iwdev->aeq;
+	struct i40iw_sc_aeq *sc_aeq = &aeq->sc_aeq;
+	struct i40iw_aeqe_info aeinfo;
+	struct i40iw_aeqe_info *info = &aeinfo;
+	int ret;
+	struct i40iw_qp *iwqp = NULL;
+	struct i40iw_sc_cq *cq = NULL;
+	struct i40iw_cq *iwcq = NULL;
+	struct i40iw_sc_qp *qp = NULL;
+	struct i40iw_qp_host_ctx_info *ctx_info = NULL;
+	unsigned long flags;
+
+	u32 aeqcnt = 0;
+
+	if (!sc_aeq->size)
+		return;
+
+	do {
+		memset(info, 0, sizeof(*info));
+		ret = dev->aeq_ops->get_next_aeqe(sc_aeq, info);
+		if (ret)
+			break;
+
+		aeqcnt++;
+		i40iw_debug(dev, I40IW_DEBUG_AEQ,
+			    "%s ae_id = 0x%x bool qp=%d qp_id = %d\n",
+			    __func__, info->ae_id, info->qp, info->qp_cq_id);
+		if (info->qp) {
+			iwqp = iwdev->qp_table[info->qp_cq_id];
+			if (!iwqp) {
+				i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id);
+				continue;
+			}
+			qp = &iwqp->sc_qp;
+			spin_lock_irqsave(&iwqp->lock, flags);
+			iwqp->hw_tcp_state = info->tcp_state;
+			iwqp->hw_iwarp_state = info->iwarp_state;
+			iwqp->last_aeq = info->ae_id;
+			spin_unlock_irqrestore(&iwqp->lock, flags);
+			ctx_info = &iwqp->ctx_info;
+			ctx_info->err_rq_idx_valid = true;
+		} else {
+			if (info->ae_id != I40IW_AE_CQ_OPERATION_ERROR)
+				continue;
+		}
+
+		switch (info->ae_id) {
+		case I40IW_AE_LLP_FIN_RECEIVED:
+			if (qp->term_flags)
+				continue;
+			if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
+				iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT;
+				if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) &&
+				    (iwqp->ibqp_state == IB_QPS_RTS)) {
+					i40iw_next_iw_state(iwqp,
+							    I40IW_QP_STATE_CLOSING, 0, 0, 0);
+					i40iw_cm_disconn(iwqp);
+				}
+				iwqp->cm_id->add_ref(iwqp->cm_id);
+				i40iw_schedule_cm_timer(iwqp->cm_node,
+							(struct i40iw_puda_buf *)iwqp,
+							I40IW_TIMER_TYPE_CLOSE, 1, 0);
+			}
+			break;
+		case I40IW_AE_LLP_CLOSE_COMPLETE:
+			if (qp->term_flags)
+				i40iw_terminate_done(qp, 0);
+			else
+				i40iw_cm_disconn(iwqp);
+			break;
+		case I40IW_AE_RESET_SENT:
+			i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 1, 0, 0);
+			i40iw_cm_disconn(iwqp);
+			break;
+		case I40IW_AE_LLP_CONNECTION_RESET:
+			if (atomic_read(&iwqp->close_timer_started))
+				continue;
+			i40iw_cm_disconn(iwqp);
+			break;
+		case I40IW_AE_TERMINATE_SENT:
+			i40iw_terminate_send_fin(qp);
+			break;
+		case I40IW_AE_LLP_TERMINATE_RECEIVED:
+			i40iw_terminate_received(qp, info);
+			break;
+		case I40IW_AE_CQ_OPERATION_ERROR:
+			i40iw_pr_err("Processing an iWARP related AE for CQ misc = 0x%04X\n",
+				     info->ae_id);
+			cq = (struct i40iw_sc_cq *)(unsigned long)info->compl_ctx;
+			iwcq = (struct i40iw_cq *)cq->back_cq;
+
+			if (iwcq->ibcq.event_handler) {
+				struct ib_event ibevent;
+
+				ibevent.device = iwcq->ibcq.device;
+				ibevent.event = IB_EVENT_CQ_ERR;
+				ibevent.element.cq = &iwcq->ibcq;
+				iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context);
+			}
+			break;
+		case I40IW_AE_PRIV_OPERATION_DENIED:
+		case I40IW_AE_STAG_ZERO_INVALID:
+		case I40IW_AE_IB_RREQ_AND_Q1_FULL:
+		case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION:
+		case I40IW_AE_DDP_UBE_INVALID_MO:
+		case I40IW_AE_DDP_UBE_INVALID_QN:
+		case I40IW_AE_DDP_NO_L_BIT:
+		case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+		case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
+		case I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST:
+		case I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
+		case I40IW_AE_INVALID_ARP_ENTRY:
+		case I40IW_AE_INVALID_TCP_OPTION_RCVD:
+		case I40IW_AE_STALE_ARP_ENTRY:
+		case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+		case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
+		case I40IW_AE_LLP_SYN_RECEIVED:
+		case I40IW_AE_LLP_TOO_MANY_RETRIES:
+		case I40IW_AE_LLP_DOUBT_REACHABILITY:
+		case I40IW_AE_LCE_QP_CATASTROPHIC:
+		case I40IW_AE_LCE_FUNCTION_CATASTROPHIC:
+		case I40IW_AE_LCE_CQ_CATASTROPHIC:
+		case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG:
+		case I40IW_AE_UDA_XMIT_IPADDR_MISMATCH:
+		case I40IW_AE_QP_SUSPEND_COMPLETE:
+			ctx_info->err_rq_idx_valid = false;
+		default:
+				if (!info->sq && ctx_info->err_rq_idx_valid) {
+					ctx_info->err_rq_idx = info->wqe_idx;
+					ctx_info->tcp_info_valid = false;
+					ctx_info->iwarp_info_valid = false;
+					ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+									     iwqp->host_ctx.va,
+									     ctx_info);
+				}
+				i40iw_terminate_connection(qp, info);
+				break;
+		}
+	} while (1);
+
+	if (aeqcnt)
+		dev->aeq_ops->repost_aeq_entries(dev, aeqcnt);
+}
+
+/**
+ * i40iw_manage_apbvt - add or delete tcp port
+ * @iwdev: iwarp device
+ * @accel_local_port: port for apbvt
+ * @add_port: add or delete port
+ */
+int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port)
+{
+	struct i40iw_apbvt_info *info;
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port);
+	if (!cqp_request)
+		return -ENOMEM;
+
+	cqp_info = &cqp_request->info;
+	info = &cqp_info->in.u.manage_apbvt_entry.info;
+
+	memset(info, 0, sizeof(*info));
+	info->add = add_port;
+	info->port = cpu_to_le16(accel_local_port);
+
+	cqp_info->cqp_cmd = OP_MANAGE_APBVT_ENTRY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.manage_apbvt_entry.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.manage_apbvt_entry.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Manage APBVT entry fail");
+	return status;
+}
+
+/**
+ * i40iw_manage_arp_cache - manage hw arp cache
+ * @iwdev: iwarp device
+ * @mac_addr: mac address ptr
+ * @ip_addr: ip addr for arp cache
+ * @action: add, delete or modify
+ */
+void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
+			    unsigned char *mac_addr,
+			    __be32 *ip_addr,
+			    bool ipv4,
+			    u32 action)
+{
+	struct i40iw_add_arp_cache_entry_info *info;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	int arp_index;
+
+	arp_index = i40iw_arp_table(iwdev, ip_addr, ipv4, mac_addr, action);
+	if (arp_index == -1)
+		return;
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+	if (!cqp_request)
+		return;
+
+	cqp_info = &cqp_request->info;
+	if (action == I40IW_ARP_ADD) {
+		cqp_info->cqp_cmd = OP_ADD_ARP_CACHE_ENTRY;
+		info = &cqp_info->in.u.add_arp_cache_entry.info;
+		memset(info, 0, sizeof(*info));
+		info->arp_index = cpu_to_le32(arp_index);
+		info->permanent = true;
+		ether_addr_copy(info->mac_addr, mac_addr);
+		cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
+		cqp_info->in.u.add_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
+	} else {
+		cqp_info->cqp_cmd = OP_DELETE_ARP_CACHE_ENTRY;
+		cqp_info->in.u.del_arp_cache_entry.scratch = (uintptr_t)cqp_request;
+		cqp_info->in.u.del_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
+		cqp_info->in.u.del_arp_cache_entry.arp_index = arp_index;
+	}
+
+	cqp_info->in.u.add_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
+	cqp_info->post_sq = 1;
+	if (i40iw_handle_cqp_op(iwdev, cqp_request))
+		i40iw_pr_err("CQP-OP Add/Del Arp Cache entry fail");
+}
+
+/**
+ * i40iw_send_syn_cqp_callback - do syn/ack after qhash
+ * @cqp_request: qhash cqp completion
+ * @send_ack: flag send ack
+ */
+static void i40iw_send_syn_cqp_callback(struct i40iw_cqp_request *cqp_request, u32 send_ack)
+{
+	i40iw_send_syn(cqp_request->param, send_ack);
+}
+
+/**
+ * i40iw_manage_qhash - add or modify qhash
+ * @iwdev: iwarp device
+ * @cminfo: cm info for qhash
+ * @etype: type (syn or quad)
+ * @mtype: type of qhash
+ * @cmnode: cmnode associated with connection
+ * @wait: wait for completion
+ * @user_pri:user pri of the connection
+ */
+enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
+					  struct i40iw_cm_info *cminfo,
+					  enum i40iw_quad_entry_type etype,
+					  enum i40iw_quad_hash_manage_type mtype,
+					  void *cmnode,
+					  bool wait)
+{
+	struct i40iw_qhash_table_info *info;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	cqp_request = i40iw_get_cqp_request(iwcqp, wait);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+	cqp_info = &cqp_request->info;
+	info = &cqp_info->in.u.manage_qhash_table_entry.info;
+	memset(info, 0, sizeof(*info));
+
+	info->manage = mtype;
+	info->entry_type = etype;
+	if (cminfo->vlan_id != 0xFFFF) {
+		info->vlan_valid = true;
+		info->vlan_id = cpu_to_le16(cminfo->vlan_id);
+	} else {
+		info->vlan_valid = false;
+	}
+
+	info->ipv4_valid = cminfo->ipv4;
+	ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr);
+	info->qp_num = cpu_to_le32(dev->ilq->qp_id);
+	info->dest_port = cpu_to_le16(cminfo->loc_port);
+	info->dest_ip[0] = cpu_to_le32(cminfo->loc_addr[0]);
+	info->dest_ip[1] = cpu_to_le32(cminfo->loc_addr[1]);
+	info->dest_ip[2] = cpu_to_le32(cminfo->loc_addr[2]);
+	info->dest_ip[3] = cpu_to_le32(cminfo->loc_addr[3]);
+	if (etype == I40IW_QHASH_TYPE_TCP_ESTABLISHED) {
+		info->src_port = cpu_to_le16(cminfo->rem_port);
+		info->src_ip[0] = cpu_to_le32(cminfo->rem_addr[0]);
+		info->src_ip[1] = cpu_to_le32(cminfo->rem_addr[1]);
+		info->src_ip[2] = cpu_to_le32(cminfo->rem_addr[2]);
+		info->src_ip[3] = cpu_to_le32(cminfo->rem_addr[3]);
+	}
+	if (cmnode) {
+		cqp_request->callback_fcn = i40iw_send_syn_cqp_callback;
+		cqp_request->param = (void *)cmnode;
+	}
+
+	if (info->ipv4_valid)
+		i40iw_debug(dev, I40IW_DEBUG_CM,
+			    "%s:%s IP=%pI4, port=%d, mac=%pM, vlan_id=%d\n",
+			    __func__, (!mtype) ? "DELETE" : "ADD",
+			    info->dest_ip,
+			    info->dest_port, info->mac_addr, cminfo->vlan_id);
+	else
+		i40iw_debug(dev, I40IW_DEBUG_CM,
+			    "%s:%s IP=%pI6, port=%d, mac=%pM, vlan_id=%d\n",
+			    __func__, (!mtype) ? "DELETE" : "ADD",
+			    info->dest_ip,
+			    info->dest_port, info->mac_addr, cminfo->vlan_id);
+	cqp_info->in.u.manage_qhash_table_entry.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.manage_qhash_table_entry.scratch = (uintptr_t)cqp_request;
+	cqp_info->cqp_cmd = OP_MANAGE_QHASH_TABLE_ENTRY;
+	cqp_info->post_sq = 1;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Manage Qhash Entry fail");
+	return status;
+}
+
+/**
+ * i40iw_hw_flush_wqes - flush qp's wqe
+ * @iwdev: iwarp device
+ * @qp: hardware control qp
+ * @info: info for flush
+ * @wait: flag wait for completion
+ */
+enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
+					   struct i40iw_sc_qp *qp,
+					   struct i40iw_qp_flush_info *info,
+					   bool wait)
+{
+	enum i40iw_status_code status;
+	struct i40iw_qp_flush_info *hw_info;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+
+	cqp_info = &cqp_request->info;
+	hw_info = &cqp_request->info.in.u.qp_flush_wqes.info;
+	memcpy(hw_info, info, sizeof(*hw_info));
+
+	cqp_info->cqp_cmd = OP_QP_FLUSH_WQES;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.qp_flush_wqes.qp = qp;
+	cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Flush WQE's fail");
+	return status;
+}
+
+/**
+ * i40iw_hw_manage_vf_pble_bp - manage vf pbles
+ * @iwdev: iwarp device
+ * @info: info for managing pble
+ * @wait: flag wait for completion
+ */
+enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
+						  struct i40iw_manage_vf_pble_info *info,
+						  bool wait)
+{
+	enum i40iw_status_code status;
+	struct i40iw_manage_vf_pble_info *hw_info;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	if ((iwdev->init_state < CCQ_CREATED) && wait)
+		wait = false;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+
+	cqp_info = &cqp_request->info;
+	hw_info = &cqp_request->info.in.u.manage_vf_pble_bp.info;
+	memcpy(hw_info, info, sizeof(*hw_info));
+
+	cqp_info->cqp_cmd = OP_MANAGE_VF_PBLE_BP;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.manage_vf_pble_bp.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.manage_vf_pble_bp.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Manage VF pble_bp fail");
+	return status;
+}
+
+/**
+ * i40iw_get_ib_wc - return change flush code to IB's
+ * @opcode: iwarp flush code
+ */
+static enum ib_wc_status i40iw_get_ib_wc(enum i40iw_flush_opcode opcode)
+{
+	switch (opcode) {
+	case FLUSH_PROT_ERR:
+		return IB_WC_LOC_PROT_ERR;
+	case FLUSH_REM_ACCESS_ERR:
+		return IB_WC_REM_ACCESS_ERR;
+	case FLUSH_LOC_QP_OP_ERR:
+		return IB_WC_LOC_QP_OP_ERR;
+	case FLUSH_REM_OP_ERR:
+		return IB_WC_REM_OP_ERR;
+	case FLUSH_LOC_LEN_ERR:
+		return IB_WC_LOC_LEN_ERR;
+	case FLUSH_GENERAL_ERR:
+		return IB_WC_GENERAL_ERR;
+	case FLUSH_FATAL_ERR:
+	default:
+		return IB_WC_FATAL_ERR;
+	}
+}
+
+/**
+ * i40iw_set_flush_info - set flush info
+ * @pinfo: set flush info
+ * @min: minor err
+ * @maj: major err
+ * @opcode: flush error code
+ */
+static void i40iw_set_flush_info(struct i40iw_qp_flush_info *pinfo,
+				 u16 *min,
+				 u16 *maj,
+				 enum i40iw_flush_opcode opcode)
+{
+	*min = (u16)i40iw_get_ib_wc(opcode);
+	*maj = CQE_MAJOR_DRV;
+	pinfo->userflushcode = true;
+}
+
+/**
+ * i40iw_flush_wqes - flush wqe for qp
+ * @iwdev: iwarp device
+ * @iwqp: qp to flush wqes
+ */
+void i40iw_flush_wqes(struct i40iw_device *iwdev, struct i40iw_qp *iwqp)
+{
+	struct i40iw_qp_flush_info info;
+	struct i40iw_qp_flush_info *pinfo = &info;
+
+	struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+
+	memset(pinfo, 0, sizeof(*pinfo));
+	info.sq = true;
+	info.rq = true;
+	if (qp->term_flags) {
+		i40iw_set_flush_info(pinfo, &pinfo->sq_minor_code,
+				     &pinfo->sq_major_code, qp->flush_code);
+		i40iw_set_flush_info(pinfo, &pinfo->rq_minor_code,
+				     &pinfo->rq_major_code, qp->flush_code);
+	}
+	(void)i40iw_hw_flush_wqes(iwdev, &iwqp->sc_qp, &info, true);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
new file mode 100644
index 000000000000..90e5af21737e
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -0,0 +1,1910 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <net/addrconf.h>
+
+#include "i40iw.h"
+#include "i40iw_register.h"
+#include <net/netevent.h>
+#define CLIENT_IW_INTERFACE_VERSION_MAJOR 0
+#define CLIENT_IW_INTERFACE_VERSION_MINOR 01
+#define CLIENT_IW_INTERFACE_VERSION_BUILD 00
+
+#define DRV_VERSION_MAJOR 0
+#define DRV_VERSION_MINOR 5
+#define DRV_VERSION_BUILD 123
+#define DRV_VERSION	__stringify(DRV_VERSION_MAJOR) "."		\
+	__stringify(DRV_VERSION_MINOR) "." __stringify(DRV_VERSION_BUILD)
+
+static int push_mode;
+module_param(push_mode, int, 0644);
+MODULE_PARM_DESC(push_mode, "Low latency mode: 0=disabled (default), 1=enabled)");
+
+static int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "debug flags: 0=disabled (default), 0x7fffffff=all");
+
+static int resource_profile;
+module_param(resource_profile, int, 0644);
+MODULE_PARM_DESC(resource_profile,
+		 "Resource Profile: 0=no VF RDMA support (default), 1=Weighted VF, 2=Even Distribution");
+
+static int max_rdma_vfs = 32;
+module_param(max_rdma_vfs, int, 0644);
+MODULE_PARM_DESC(max_rdma_vfs, "Maximum VF count: 0-32 32=default");
+static int mpa_version = 2;
+module_param(mpa_version, int, 0644);
+MODULE_PARM_DESC(mpa_version, "MPA version to be used in MPA Req/Resp 1 or 2");
+
+MODULE_AUTHOR("Intel Corporation, <e1000-rdma@lists.sourceforge.net>");
+MODULE_DESCRIPTION("Intel(R) Ethernet Connection X722 iWARP RDMA Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static struct i40e_client i40iw_client;
+static char i40iw_client_name[I40E_CLIENT_STR_LENGTH] = "i40iw";
+
+static LIST_HEAD(i40iw_handlers);
+static spinlock_t i40iw_handler_lock;
+
+static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
+						  u32 vf_id, u8 *msg, u16 len);
+
+static struct notifier_block i40iw_inetaddr_notifier = {
+	.notifier_call = i40iw_inetaddr_event
+};
+
+static struct notifier_block i40iw_inetaddr6_notifier = {
+	.notifier_call = i40iw_inet6addr_event
+};
+
+static struct notifier_block i40iw_net_notifier = {
+	.notifier_call = i40iw_net_event
+};
+
+static int i40iw_notifiers_registered;
+
+/**
+ * i40iw_find_i40e_handler - find a handler given a client info
+ * @ldev: pointer to a client info
+ */
+static struct i40iw_handler *i40iw_find_i40e_handler(struct i40e_info *ldev)
+{
+	struct i40iw_handler *hdl;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i40iw_handler_lock, flags);
+	list_for_each_entry(hdl, &i40iw_handlers, list) {
+		if (hdl->ldev.netdev == ldev->netdev) {
+			spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+			return hdl;
+		}
+	}
+	spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+	return NULL;
+}
+
+/**
+ * i40iw_find_netdev - find a handler given a netdev
+ * @netdev: pointer to net_device
+ */
+struct i40iw_handler *i40iw_find_netdev(struct net_device *netdev)
+{
+	struct i40iw_handler *hdl;
+	unsigned long flags;
+
+	spin_lock_irqsave(&i40iw_handler_lock, flags);
+	list_for_each_entry(hdl, &i40iw_handlers, list) {
+		if (hdl->ldev.netdev == netdev) {
+			spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+			return hdl;
+		}
+	}
+	spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+	return NULL;
+}
+
+/**
+ * i40iw_add_handler - add a handler to the list
+ * @hdl: handler to be added to the handler list
+ */
+static void i40iw_add_handler(struct i40iw_handler *hdl)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i40iw_handler_lock, flags);
+	list_add(&hdl->list, &i40iw_handlers);
+	spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+}
+
+/**
+ * i40iw_del_handler - delete a handler from the list
+ * @hdl: handler to be deleted from the handler list
+ */
+static int i40iw_del_handler(struct i40iw_handler *hdl)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i40iw_handler_lock, flags);
+	list_del(&hdl->list);
+	spin_unlock_irqrestore(&i40iw_handler_lock, flags);
+	return 0;
+}
+
+/**
+ * i40iw_enable_intr - set up device interrupts
+ * @dev: hardware control device structure
+ * @msix_id: id of the interrupt to be enabled
+ */
+static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id)
+{
+	u32 val;
+
+	val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+		I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
+		(3 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
+	if (dev->is_pf)
+		i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_id - 1), val);
+	else
+		i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_id - 1), val);
+}
+
+/**
+ * i40iw_dpc - tasklet for aeq and ceq 0
+ * @data: iwarp device
+ */
+static void i40iw_dpc(unsigned long data)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)data;
+
+	if (iwdev->msix_shared)
+		i40iw_process_ceq(iwdev, iwdev->ceqlist);
+	i40iw_process_aeq(iwdev);
+	i40iw_enable_intr(&iwdev->sc_dev, iwdev->iw_msixtbl[0].idx);
+}
+
+/**
+ * i40iw_ceq_dpc - dpc handler for CEQ
+ * @data: data points to CEQ
+ */
+static void i40iw_ceq_dpc(unsigned long data)
+{
+	struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
+	struct i40iw_device *iwdev = iwceq->iwdev;
+
+	i40iw_process_ceq(iwdev, iwceq);
+	i40iw_enable_intr(&iwdev->sc_dev, iwceq->msix_idx);
+}
+
+/**
+ * i40iw_irq_handler - interrupt handler for aeq and ceq0
+ * @irq: Interrupt request number
+ * @data: iwarp device
+ */
+static irqreturn_t i40iw_irq_handler(int irq, void *data)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)data;
+
+	tasklet_schedule(&iwdev->dpc_tasklet);
+	return IRQ_HANDLED;
+}
+
+/**
+ * i40iw_destroy_cqp  - destroy control qp
+ * @iwdev: iwarp device
+ * @create_done: 1 if cqp create poll was success
+ *
+ * Issue destroy cqp request and
+ * free the resources associated with the cqp
+ */
+static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp)
+{
+	enum i40iw_status_code status = 0;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_cqp *cqp = &iwdev->cqp;
+
+	if (free_hwcqp && dev->cqp_ops->cqp_destroy)
+		status = dev->cqp_ops->cqp_destroy(dev->cqp);
+	if (status)
+		i40iw_pr_err("destroy cqp failed");
+
+	i40iw_free_dma_mem(dev->hw, &cqp->sq);
+	kfree(cqp->scratch_array);
+	iwdev->cqp.scratch_array = NULL;
+
+	kfree(cqp->cqp_requests);
+	cqp->cqp_requests = NULL;
+}
+
+/**
+ * i40iw_disable_irqs - disable device interrupts
+ * @dev: hardware control device structure
+ * @msic_vec: msix vector to disable irq
+ * @dev_id: parameter to pass to free_irq (used during irq setup)
+ *
+ * The function is called when destroying aeq/ceq
+ */
+static void i40iw_disable_irq(struct i40iw_sc_dev *dev,
+			      struct i40iw_msix_vector *msix_vec,
+			      void *dev_id)
+{
+	if (dev->is_pf)
+		i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0);
+	else
+		i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0);
+	synchronize_irq(msix_vec->irq);
+	free_irq(msix_vec->irq, dev_id);
+}
+
+/**
+ * i40iw_destroy_aeq - destroy aeq
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ *
+ * Issue a destroy aeq request and
+ * free the resources associated with the aeq
+ * The function is called during driver unload
+ */
+static void i40iw_destroy_aeq(struct i40iw_device *iwdev, bool reset)
+{
+	enum i40iw_status_code status = I40IW_ERR_NOT_READY;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_aeq *aeq = &iwdev->aeq;
+
+	if (!iwdev->msix_shared)
+		i40iw_disable_irq(dev, iwdev->iw_msixtbl, (void *)iwdev);
+	if (reset)
+		goto exit;
+
+	if (!dev->aeq_ops->aeq_destroy(&aeq->sc_aeq, 0, 1))
+		status = dev->aeq_ops->aeq_destroy_done(&aeq->sc_aeq);
+	if (status)
+		i40iw_pr_err("destroy aeq failed %d\n", status);
+
+exit:
+	i40iw_free_dma_mem(dev->hw, &aeq->mem);
+}
+
+/**
+ * i40iw_destroy_ceq - destroy ceq
+ * @iwdev: iwarp device
+ * @iwceq: ceq to be destroyed
+ * @reset: true if called before reset
+ *
+ * Issue a destroy ceq request and
+ * free the resources associated with the ceq
+ */
+static void i40iw_destroy_ceq(struct i40iw_device *iwdev,
+			      struct i40iw_ceq *iwceq,
+			      bool reset)
+{
+	enum i40iw_status_code status;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+
+	if (reset)
+		goto exit;
+
+	status = dev->ceq_ops->ceq_destroy(&iwceq->sc_ceq, 0, 1);
+	if (status) {
+		i40iw_pr_err("ceq destroy command failed %d\n", status);
+		goto exit;
+	}
+
+	status = dev->ceq_ops->cceq_destroy_done(&iwceq->sc_ceq);
+	if (status)
+		i40iw_pr_err("ceq destroy completion failed %d\n", status);
+exit:
+	i40iw_free_dma_mem(dev->hw, &iwceq->mem);
+}
+
+/**
+ * i40iw_dele_ceqs - destroy all ceq's
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ *
+ * Go through all of the device ceq's and for each ceq
+ * disable the ceq interrupt and destroy the ceq
+ */
+static void i40iw_dele_ceqs(struct i40iw_device *iwdev, bool reset)
+{
+	u32 i = 0;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_ceq *iwceq = iwdev->ceqlist;
+	struct i40iw_msix_vector *msix_vec = iwdev->iw_msixtbl;
+
+	if (iwdev->msix_shared) {
+		i40iw_disable_irq(dev, msix_vec, (void *)iwdev);
+		i40iw_destroy_ceq(iwdev, iwceq, reset);
+		iwceq++;
+		i++;
+	}
+
+	for (msix_vec++; i < iwdev->ceqs_count; i++, msix_vec++, iwceq++) {
+		i40iw_disable_irq(dev, msix_vec, (void *)iwceq);
+		i40iw_destroy_ceq(iwdev, iwceq, reset);
+	}
+}
+
+/**
+ * i40iw_destroy_ccq - destroy control cq
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ *
+ * Issue destroy ccq request and
+ * free the resources associated with the ccq
+ */
+static void i40iw_destroy_ccq(struct i40iw_device *iwdev, bool reset)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_ccq *ccq = &iwdev->ccq;
+	enum i40iw_status_code status = 0;
+
+	if (!reset)
+		status = dev->ccq_ops->ccq_destroy(dev->ccq, 0, true);
+	if (status)
+		i40iw_pr_err("ccq destroy failed %d\n", status);
+	i40iw_free_dma_mem(dev->hw, &ccq->mem_cq);
+}
+
+/* types of hmc objects */
+static enum i40iw_hmc_rsrc_type iw_hmc_obj_types[] = {
+	I40IW_HMC_IW_QP,
+	I40IW_HMC_IW_CQ,
+	I40IW_HMC_IW_HTE,
+	I40IW_HMC_IW_ARP,
+	I40IW_HMC_IW_APBVT_ENTRY,
+	I40IW_HMC_IW_MR,
+	I40IW_HMC_IW_XF,
+	I40IW_HMC_IW_XFFL,
+	I40IW_HMC_IW_Q1,
+	I40IW_HMC_IW_Q1FL,
+	I40IW_HMC_IW_TIMER,
+};
+
+/**
+ * i40iw_close_hmc_objects_type - delete hmc objects of a given type
+ * @iwdev: iwarp device
+ * @obj_type: the hmc object type to be deleted
+ * @is_pf: true if the function is PF otherwise false
+ * @reset: true if called before reset
+ */
+static void i40iw_close_hmc_objects_type(struct i40iw_sc_dev *dev,
+					 enum i40iw_hmc_rsrc_type obj_type,
+					 struct i40iw_hmc_info *hmc_info,
+					 bool is_pf,
+					 bool reset)
+{
+	struct i40iw_hmc_del_obj_info info;
+
+	memset(&info, 0, sizeof(info));
+	info.hmc_info = hmc_info;
+	info.rsrc_type = obj_type;
+	info.count = hmc_info->hmc_obj[obj_type].cnt;
+	info.is_pf = is_pf;
+	if (dev->hmc_ops->del_hmc_object(dev, &info, reset))
+		i40iw_pr_err("del obj of type %d failed\n", obj_type);
+}
+
+/**
+ * i40iw_del_hmc_objects - remove all device hmc objects
+ * @dev: iwarp device
+ * @hmc_info: hmc_info to free
+ * @is_pf: true if hmc_info belongs to PF, not vf nor allocated
+ *	   by PF on behalf of VF
+ * @reset: true if called before reset
+ */
+static void i40iw_del_hmc_objects(struct i40iw_sc_dev *dev,
+				  struct i40iw_hmc_info *hmc_info,
+				  bool is_pf,
+				  bool reset)
+{
+	unsigned int i;
+
+	for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++)
+		i40iw_close_hmc_objects_type(dev, iw_hmc_obj_types[i], hmc_info, is_pf, reset);
+}
+
+/**
+ * i40iw_ceq_handler - interrupt handler for ceq
+ * @data: ceq pointer
+ */
+static irqreturn_t i40iw_ceq_handler(int irq, void *data)
+{
+	struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
+
+	if (iwceq->irq != irq)
+		i40iw_pr_err("expected irq = %d received irq = %d\n", iwceq->irq, irq);
+	tasklet_schedule(&iwceq->dpc_tasklet);
+	return IRQ_HANDLED;
+}
+
+/**
+ * i40iw_create_hmc_obj_type - create hmc object of a given type
+ * @dev: hardware control device structure
+ * @info: information for the hmc object to create
+ */
+static enum i40iw_status_code i40iw_create_hmc_obj_type(struct i40iw_sc_dev *dev,
+							struct i40iw_hmc_create_obj_info *info)
+{
+	return dev->hmc_ops->create_hmc_object(dev, info);
+}
+
+/**
+ * i40iw_create_hmc_objs - create all hmc objects for the device
+ * @iwdev: iwarp device
+ * @is_pf: true if the function is PF otherwise false
+ *
+ * Create the device hmc objects and allocate hmc pages
+ * Return 0 if successful, otherwise clean up and return error
+ */
+static enum i40iw_status_code i40iw_create_hmc_objs(struct i40iw_device *iwdev,
+						    bool is_pf)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_hmc_create_obj_info info;
+	enum i40iw_status_code status;
+	int i;
+
+	memset(&info, 0, sizeof(info));
+	info.hmc_info = dev->hmc_info;
+	info.is_pf = is_pf;
+	info.entry_type = iwdev->sd_type;
+	for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
+		info.rsrc_type = iw_hmc_obj_types[i];
+		info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
+		status = i40iw_create_hmc_obj_type(dev, &info);
+		if (status) {
+			i40iw_pr_err("create obj type %d status = %d\n",
+				     iw_hmc_obj_types[i], status);
+			break;
+		}
+	}
+	if (!status)
+		return (dev->cqp_misc_ops->static_hmc_pages_allocated(dev->cqp, 0,
+								      dev->hmc_fn_id,
+								      true, true));
+
+	while (i) {
+		i--;
+		/* destroy the hmc objects of a given type */
+		i40iw_close_hmc_objects_type(dev,
+					     iw_hmc_obj_types[i],
+					     dev->hmc_info,
+					     is_pf,
+					     false);
+	}
+	return status;
+}
+
+/**
+ * i40iw_obj_aligned_mem - get aligned memory from device allocated memory
+ * @iwdev: iwarp device
+ * @memptr: points to the memory addresses
+ * @size: size of memory needed
+ * @mask: mask for the aligned memory
+ *
+ * Get aligned memory of the requested size and
+ * update the memptr to point to the new aligned memory
+ * Return 0 if successful, otherwise return no memory error
+ */
+enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
+					     struct i40iw_dma_mem *memptr,
+					     u32 size,
+					     u32 mask)
+{
+	unsigned long va, newva;
+	unsigned long extra;
+
+	va = (unsigned long)iwdev->obj_next.va;
+	newva = va;
+	if (mask)
+		newva = ALIGN(va, (mask + 1));
+	extra = newva - va;
+	memptr->va = (u8 *)va + extra;
+	memptr->pa = iwdev->obj_next.pa + extra;
+	memptr->size = size;
+	if ((memptr->va + size) > (iwdev->obj_mem.va + iwdev->obj_mem.size))
+		return I40IW_ERR_NO_MEMORY;
+
+	iwdev->obj_next.va = memptr->va + size;
+	iwdev->obj_next.pa = memptr->pa + size;
+	return 0;
+}
+
+/**
+ * i40iw_create_cqp - create control qp
+ * @iwdev: iwarp device
+ *
+ * Return 0, if the cqp and all the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
+{
+	enum i40iw_status_code status;
+	u32 sqsize = I40IW_CQP_SW_SQSIZE_2048;
+	struct i40iw_dma_mem mem;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_cqp_init_info cqp_init_info;
+	struct i40iw_cqp *cqp = &iwdev->cqp;
+	u16 maj_err, min_err;
+	int i;
+
+	cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL);
+	if (!cqp->cqp_requests)
+		return I40IW_ERR_NO_MEMORY;
+	cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL);
+	if (!cqp->scratch_array) {
+		kfree(cqp->cqp_requests);
+		return I40IW_ERR_NO_MEMORY;
+	}
+	dev->cqp = &cqp->sc_cqp;
+	dev->cqp->dev = dev;
+	memset(&cqp_init_info, 0, sizeof(cqp_init_info));
+	status = i40iw_allocate_dma_mem(dev->hw, &cqp->sq,
+					(sizeof(struct i40iw_cqp_sq_wqe) * sqsize),
+					I40IW_CQP_ALIGNMENT);
+	if (status)
+		goto exit;
+	status = i40iw_obj_aligned_mem(iwdev, &mem, sizeof(struct i40iw_cqp_ctx),
+				       I40IW_HOST_CTX_ALIGNMENT_MASK);
+	if (status)
+		goto exit;
+	dev->cqp->host_ctx_pa = mem.pa;
+	dev->cqp->host_ctx = mem.va;
+	/* populate the cqp init info */
+	cqp_init_info.dev = dev;
+	cqp_init_info.sq_size = sqsize;
+	cqp_init_info.sq = cqp->sq.va;
+	cqp_init_info.sq_pa = cqp->sq.pa;
+	cqp_init_info.host_ctx_pa = mem.pa;
+	cqp_init_info.host_ctx = mem.va;
+	cqp_init_info.hmc_profile = iwdev->resource_profile;
+	cqp_init_info.enabled_vf_count = iwdev->max_rdma_vfs;
+	cqp_init_info.scratch_array = cqp->scratch_array;
+	status = dev->cqp_ops->cqp_init(dev->cqp, &cqp_init_info);
+	if (status) {
+		i40iw_pr_err("cqp init status %d maj_err %d min_err %d\n",
+			     status, maj_err, min_err);
+		goto exit;
+	}
+	status = dev->cqp_ops->cqp_create(dev->cqp, true, &maj_err, &min_err);
+	if (status) {
+		i40iw_pr_err("cqp create status %d maj_err %d min_err %d\n",
+			     status, maj_err, min_err);
+		goto exit;
+	}
+	spin_lock_init(&cqp->req_lock);
+	INIT_LIST_HEAD(&cqp->cqp_avail_reqs);
+	INIT_LIST_HEAD(&cqp->cqp_pending_reqs);
+	/* init the waitq of the cqp_requests and add them to the list */
+	for (i = 0; i < I40IW_CQP_SW_SQSIZE_2048; i++) {
+		init_waitqueue_head(&cqp->cqp_requests[i].waitq);
+		list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs);
+	}
+	return 0;
+exit:
+	/* clean up the created resources */
+	i40iw_destroy_cqp(iwdev, false);
+	return status;
+}
+
+/**
+ * i40iw_create_ccq - create control cq
+ * @iwdev: iwarp device
+ *
+ * Return 0, if the ccq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_ccq(struct i40iw_device *iwdev)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_dma_mem mem;
+	enum i40iw_status_code status;
+	struct i40iw_ccq_init_info info;
+	struct i40iw_ccq *ccq = &iwdev->ccq;
+
+	memset(&info, 0, sizeof(info));
+	dev->ccq = &ccq->sc_cq;
+	dev->ccq->dev = dev;
+	info.dev = dev;
+	ccq->shadow_area.size = sizeof(struct i40iw_cq_shadow_area);
+	ccq->mem_cq.size = sizeof(struct i40iw_cqe) * IW_CCQ_SIZE;
+	status = i40iw_allocate_dma_mem(dev->hw, &ccq->mem_cq,
+					ccq->mem_cq.size, I40IW_CQ0_ALIGNMENT);
+	if (status)
+		goto exit;
+	status = i40iw_obj_aligned_mem(iwdev, &mem, ccq->shadow_area.size,
+				       I40IW_SHADOWAREA_MASK);
+	if (status)
+		goto exit;
+	ccq->sc_cq.back_cq = (void *)ccq;
+	/* populate the ccq init info */
+	info.cq_base = ccq->mem_cq.va;
+	info.cq_pa = ccq->mem_cq.pa;
+	info.num_elem = IW_CCQ_SIZE;
+	info.shadow_area = mem.va;
+	info.shadow_area_pa = mem.pa;
+	info.ceqe_mask = false;
+	info.ceq_id_valid = true;
+	info.shadow_read_threshold = 16;
+	status = dev->ccq_ops->ccq_init(dev->ccq, &info);
+	if (!status)
+		status = dev->ccq_ops->ccq_create(dev->ccq, 0, true, true);
+exit:
+	if (status)
+		i40iw_free_dma_mem(dev->hw, &ccq->mem_cq);
+	return status;
+}
+
+/**
+ * i40iw_configure_ceq_vector - set up the msix interrupt vector for ceq
+ * @iwdev: iwarp device
+ * @msix_vec: interrupt vector information
+ * @iwceq: ceq associated with the vector
+ * @ceq_id: the id number of the iwceq
+ *
+ * Allocate interrupt resources and enable irq handling
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iwdev,
+							 struct i40iw_ceq *iwceq,
+							 u32 ceq_id,
+							 struct i40iw_msix_vector *msix_vec)
+{
+	enum i40iw_status_code status;
+
+	if (iwdev->msix_shared && !ceq_id) {
+		tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+		status = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "AEQCEQ", iwdev);
+	} else {
+		tasklet_init(&iwceq->dpc_tasklet, i40iw_ceq_dpc, (unsigned long)iwceq);
+		status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
+	}
+
+	if (status) {
+		i40iw_pr_err("ceq irq config fail\n");
+		return I40IW_ERR_CONFIG;
+	}
+	msix_vec->ceq_id = ceq_id;
+	msix_vec->cpu_affinity = 0;
+
+	return 0;
+}
+
+/**
+ * i40iw_create_ceq - create completion event queue
+ * @iwdev: iwarp device
+ * @iwceq: pointer to the ceq resources to be created
+ * @ceq_id: the id number of the iwceq
+ *
+ * Return 0, if the ceq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_ceq(struct i40iw_device *iwdev,
+					       struct i40iw_ceq *iwceq,
+					       u32 ceq_id)
+{
+	enum i40iw_status_code status;
+	struct i40iw_ceq_init_info info;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	u64 scratch;
+
+	memset(&info, 0, sizeof(info));
+	info.ceq_id = ceq_id;
+	iwceq->iwdev = iwdev;
+	iwceq->mem.size = sizeof(struct i40iw_ceqe) *
+		iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+	status = i40iw_allocate_dma_mem(dev->hw, &iwceq->mem, iwceq->mem.size,
+					I40IW_CEQ_ALIGNMENT);
+	if (status)
+		goto exit;
+	info.ceq_id = ceq_id;
+	info.ceqe_base = iwceq->mem.va;
+	info.ceqe_pa = iwceq->mem.pa;
+
+	info.elem_cnt = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+	iwceq->sc_ceq.ceq_id = ceq_id;
+	info.dev = dev;
+	scratch = (uintptr_t)&iwdev->cqp.sc_cqp;
+	status = dev->ceq_ops->ceq_init(&iwceq->sc_ceq, &info);
+	if (!status)
+		status = dev->ceq_ops->cceq_create(&iwceq->sc_ceq, scratch);
+
+exit:
+	if (status)
+		i40iw_free_dma_mem(dev->hw, &iwceq->mem);
+	return status;
+}
+
+void i40iw_request_reset(struct i40iw_device *iwdev)
+{
+	struct i40e_info *ldev = iwdev->ldev;
+
+	ldev->ops->request_reset(ldev, iwdev->client, 1);
+}
+
+/**
+ * i40iw_setup_ceqs - manage the device ceq's and their interrupt resources
+ * @iwdev: iwarp device
+ * @ldev: i40e lan device
+ *
+ * Allocate a list for all device completion event queues
+ * Create the ceq's and configure their msix interrupt vectors
+ * Return 0, if at least one ceq is successfully set up, otherwise return error
+ */
+static enum i40iw_status_code i40iw_setup_ceqs(struct i40iw_device *iwdev,
+					       struct i40e_info *ldev)
+{
+	u32 i;
+	u32 ceq_id;
+	struct i40iw_ceq *iwceq;
+	struct i40iw_msix_vector *msix_vec;
+	enum i40iw_status_code status = 0;
+	u32 num_ceqs;
+
+	if (ldev && ldev->ops && ldev->ops->setup_qvlist) {
+		status = ldev->ops->setup_qvlist(ldev, &i40iw_client,
+						 iwdev->iw_qvlist);
+		if (status)
+			goto exit;
+	} else {
+		status = I40IW_ERR_BAD_PTR;
+		goto exit;
+	}
+
+	num_ceqs = min(iwdev->msix_count, iwdev->sc_dev.hmc_fpm_misc.max_ceqs);
+	iwdev->ceqlist = kcalloc(num_ceqs, sizeof(*iwdev->ceqlist), GFP_KERNEL);
+	if (!iwdev->ceqlist) {
+		status = I40IW_ERR_NO_MEMORY;
+		goto exit;
+	}
+	i = (iwdev->msix_shared) ? 0 : 1;
+	for (ceq_id = 0; i < num_ceqs; i++, ceq_id++) {
+		iwceq = &iwdev->ceqlist[ceq_id];
+		status = i40iw_create_ceq(iwdev, iwceq, ceq_id);
+		if (status) {
+			i40iw_pr_err("create ceq status = %d\n", status);
+			break;
+		}
+
+		msix_vec = &iwdev->iw_msixtbl[i];
+		iwceq->irq = msix_vec->irq;
+		iwceq->msix_idx = msix_vec->idx;
+		status = i40iw_configure_ceq_vector(iwdev, iwceq, ceq_id, msix_vec);
+		if (status) {
+			i40iw_destroy_ceq(iwdev, iwceq, false);
+			break;
+		}
+		i40iw_enable_intr(&iwdev->sc_dev, msix_vec->idx);
+		iwdev->ceqs_count++;
+	}
+
+exit:
+	if (status) {
+		if (!iwdev->ceqs_count) {
+			kfree(iwdev->ceqlist);
+			iwdev->ceqlist = NULL;
+		} else {
+			status = 0;
+		}
+	}
+	return status;
+}
+
+/**
+ * i40iw_configure_aeq_vector - set up the msix vector for aeq
+ * @iwdev: iwarp device
+ *
+ * Allocate interrupt resources and enable irq handling
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_configure_aeq_vector(struct i40iw_device *iwdev)
+{
+	struct i40iw_msix_vector *msix_vec = iwdev->iw_msixtbl;
+	u32 ret = 0;
+
+	if (!iwdev->msix_shared) {
+		tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
+		ret = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "i40iw", iwdev);
+	}
+	if (ret) {
+		i40iw_pr_err("aeq irq config fail\n");
+		return I40IW_ERR_CONFIG;
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_create_aeq - create async event queue
+ * @iwdev: iwarp device
+ *
+ * Return 0, if the aeq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static enum i40iw_status_code i40iw_create_aeq(struct i40iw_device *iwdev)
+{
+	enum i40iw_status_code status;
+	struct i40iw_aeq_init_info info;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_aeq *aeq = &iwdev->aeq;
+	u64 scratch = 0;
+	u32 aeq_size;
+
+	aeq_size = 2 * iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt +
+		iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
+	memset(&info, 0, sizeof(info));
+	aeq->mem.size = sizeof(struct i40iw_sc_aeqe) * aeq_size;
+	status = i40iw_allocate_dma_mem(dev->hw, &aeq->mem, aeq->mem.size,
+					I40IW_AEQ_ALIGNMENT);
+	if (status)
+		goto exit;
+
+	info.aeqe_base = aeq->mem.va;
+	info.aeq_elem_pa = aeq->mem.pa;
+	info.elem_cnt = aeq_size;
+	info.dev = dev;
+	status = dev->aeq_ops->aeq_init(&aeq->sc_aeq, &info);
+	if (status)
+		goto exit;
+	status = dev->aeq_ops->aeq_create(&aeq->sc_aeq, scratch, 1);
+	if (!status)
+		status = dev->aeq_ops->aeq_create_done(&aeq->sc_aeq);
+exit:
+	if (status)
+		i40iw_free_dma_mem(dev->hw, &aeq->mem);
+	return status;
+}
+
+/**
+ * i40iw_setup_aeq - set up the device aeq
+ * @iwdev: iwarp device
+ *
+ * Create the aeq and configure its msix interrupt vector
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_setup_aeq(struct i40iw_device *iwdev)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+
+	status = i40iw_create_aeq(iwdev);
+	if (status)
+		return status;
+
+	status = i40iw_configure_aeq_vector(iwdev);
+	if (status) {
+		i40iw_destroy_aeq(iwdev, false);
+		return status;
+	}
+
+	if (!iwdev->msix_shared)
+		i40iw_enable_intr(dev, iwdev->iw_msixtbl[0].idx);
+	return 0;
+}
+
+/**
+ * i40iw_initialize_ilq - create iwarp local queue for cm
+ * @iwdev: iwarp device
+ *
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_initialize_ilq(struct i40iw_device *iwdev)
+{
+	struct i40iw_puda_rsrc_info info;
+	enum i40iw_status_code status;
+
+	info.type = I40IW_PUDA_RSRC_TYPE_ILQ;
+	info.cq_id = 1;
+	info.qp_id = 0;
+	info.count = 1;
+	info.pd_id = 1;
+	info.sq_size = 8192;
+	info.rq_size = 8192;
+	info.buf_size = 1024;
+	info.tx_buf_cnt = 16384;
+	info.mss = iwdev->mss;
+	info.receive = i40iw_receive_ilq;
+	info.xmit_complete = i40iw_free_sqbuf;
+	status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+	if (status)
+		i40iw_pr_err("ilq create fail\n");
+	return status;
+}
+
+/**
+ * i40iw_initialize_ieq - create iwarp exception queue
+ * @iwdev: iwarp device
+ *
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev)
+{
+	struct i40iw_puda_rsrc_info info;
+	enum i40iw_status_code status;
+
+	info.type = I40IW_PUDA_RSRC_TYPE_IEQ;
+	info.cq_id = 2;
+	info.qp_id = iwdev->sc_dev.exception_lan_queue;
+	info.count = 1;
+	info.pd_id = 2;
+	info.sq_size = 8192;
+	info.rq_size = 8192;
+	info.buf_size = 2048;
+	info.mss = iwdev->mss;
+	info.tx_buf_cnt = 16384;
+	status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+	if (status)
+		i40iw_pr_err("ieq create fail\n");
+	return status;
+}
+
+/**
+ * i40iw_hmc_setup - create hmc objects for the device
+ * @iwdev: iwarp device
+ *
+ * Set up the device private memory space for the number and size of
+ * the hmc objects and create the objects
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_hmc_setup(struct i40iw_device *iwdev)
+{
+	enum i40iw_status_code status;
+
+	iwdev->sd_type = I40IW_SD_TYPE_DIRECT;
+	status = i40iw_config_fpm_values(&iwdev->sc_dev, IW_CFG_FPM_QP_COUNT);
+	if (status)
+		goto exit;
+	status = i40iw_create_hmc_objs(iwdev, true);
+	if (status)
+		goto exit;
+	iwdev->init_state = HMC_OBJS_CREATED;
+exit:
+	return status;
+}
+
+/**
+ * i40iw_del_init_mem - deallocate memory resources
+ * @iwdev: iwarp device
+ */
+static void i40iw_del_init_mem(struct i40iw_device *iwdev)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+
+	i40iw_free_dma_mem(&iwdev->hw, &iwdev->obj_mem);
+	kfree(dev->hmc_info->sd_table.sd_entry);
+	dev->hmc_info->sd_table.sd_entry = NULL;
+	kfree(iwdev->mem_resources);
+	iwdev->mem_resources = NULL;
+	kfree(iwdev->ceqlist);
+	iwdev->ceqlist = NULL;
+	kfree(iwdev->iw_msixtbl);
+	iwdev->iw_msixtbl = NULL;
+	kfree(iwdev->hmc_info_mem);
+	iwdev->hmc_info_mem = NULL;
+}
+
+/**
+ * i40iw_del_macip_entry - remove a mac ip address entry from the hw table
+ * @iwdev: iwarp device
+ * @idx: the index of the mac ip address to delete
+ */
+static void i40iw_del_macip_entry(struct i40iw_device *iwdev, u8 idx)
+{
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	enum i40iw_status_code status = 0;
+
+	cqp_request = i40iw_get_cqp_request(iwcqp, true);
+	if (!cqp_request) {
+		i40iw_pr_err("cqp_request memory failed\n");
+		return;
+	}
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_DELETE_LOCAL_MAC_IPADDR_ENTRY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.del_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
+	cqp_info->in.u.del_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+	cqp_info->in.u.del_local_mac_ipaddr_entry.entry_idx = idx;
+	cqp_info->in.u.del_local_mac_ipaddr_entry.ignore_ref_count = 0;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Del MAC Ip entry fail");
+}
+
+/**
+ * i40iw_add_mac_ipaddr_entry - add a mac ip address entry to the hw table
+ * @iwdev: iwarp device
+ * @mac_addr: pointer to mac address
+ * @idx: the index of the mac ip address to add
+ */
+static enum i40iw_status_code i40iw_add_mac_ipaddr_entry(struct i40iw_device *iwdev,
+							 u8 *mac_addr,
+							 u8 idx)
+{
+	struct i40iw_local_mac_ipaddr_entry_info *info;
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	enum i40iw_status_code status = 0;
+
+	cqp_request = i40iw_get_cqp_request(iwcqp, true);
+	if (!cqp_request) {
+		i40iw_pr_err("cqp_request memory failed\n");
+		return I40IW_ERR_NO_MEMORY;
+	}
+
+	cqp_info = &cqp_request->info;
+
+	cqp_info->post_sq = 1;
+	info = &cqp_info->in.u.add_local_mac_ipaddr_entry.info;
+	ether_addr_copy(info->mac_addr, mac_addr);
+	info->entry_idx = idx;
+	cqp_info->in.u.add_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+	cqp_info->cqp_cmd = OP_ADD_LOCAL_MAC_IPADDR_ENTRY;
+	cqp_info->in.u.add_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
+	cqp_info->in.u.add_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Add MAC Ip entry fail");
+	return status;
+}
+
+/**
+ * i40iw_alloc_local_mac_ipaddr_entry - allocate a mac ip address entry
+ * @iwdev: iwarp device
+ * @mac_ip_tbl_idx: the index of the new mac ip address
+ *
+ * Allocate a mac ip address entry and update the mac_ip_tbl_idx
+ * to hold the index of the newly created mac ip address
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_alloc_local_mac_ipaddr_entry(struct i40iw_device *iwdev,
+								 u16 *mac_ip_tbl_idx)
+{
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	enum i40iw_status_code status = 0;
+
+	cqp_request = i40iw_get_cqp_request(iwcqp, true);
+	if (!cqp_request) {
+		i40iw_pr_err("cqp_request memory failed\n");
+		return I40IW_ERR_NO_MEMORY;
+	}
+
+	/* increment refcount, because we need the cqp request ret value */
+	atomic_inc(&cqp_request->refcount);
+
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.alloc_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
+	cqp_info->in.u.alloc_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (!status)
+		*mac_ip_tbl_idx = cqp_request->compl_info.op_ret_val;
+	else
+		i40iw_pr_err("CQP-OP Alloc MAC Ip entry fail");
+	/* decrement refcount and free the cqp request, if no longer used */
+	i40iw_put_cqp_request(iwcqp, cqp_request);
+	return status;
+}
+
+/**
+ * i40iw_alloc_set_mac_ipaddr - set up a mac ip address table entry
+ * @iwdev: iwarp device
+ * @macaddr: pointer to mac address
+ *
+ * Allocate a mac ip address entry and add it to the hw table
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_alloc_set_mac_ipaddr(struct i40iw_device *iwdev,
+							 u8 *macaddr)
+{
+	enum i40iw_status_code status;
+
+	status = i40iw_alloc_local_mac_ipaddr_entry(iwdev, &iwdev->mac_ip_table_idx);
+	if (!status) {
+		status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
+						    (u8)iwdev->mac_ip_table_idx);
+		if (!status)
+			status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
+							    (u8)iwdev->mac_ip_table_idx);
+		else
+			i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
+	}
+	return status;
+}
+
+/**
+ * i40iw_add_ipv6_addr - add ipv6 address to the hw arp table
+ * @iwdev: iwarp device
+ */
+static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev)
+{
+	struct net_device *ip_dev;
+	struct inet6_dev *idev;
+	struct inet6_ifaddr *ifp;
+	__be32 local_ipaddr6[4];
+
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, ip_dev) {
+		if ((((rdma_vlan_dev_vlan_id(ip_dev) < 0xFFFF) &&
+		      (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
+		     (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
+			idev = __in6_dev_get(ip_dev);
+			if (!idev) {
+				i40iw_pr_err("ipv6 inet device not found\n");
+				break;
+			}
+			list_for_each_entry(ifp, &idev->addr_list, if_list) {
+				i40iw_pr_info("IP=%pI6, vlan_id=%d, MAC=%pM\n", &ifp->addr,
+					      rdma_vlan_dev_vlan_id(ip_dev), ip_dev->dev_addr);
+				i40iw_copy_ip_ntohl(local_ipaddr6,
+						    ifp->addr.in6_u.u6_addr32);
+				i40iw_manage_arp_cache(iwdev,
+						       ip_dev->dev_addr,
+						       local_ipaddr6,
+						       false,
+						       I40IW_ARP_ADD);
+			}
+		}
+	}
+	rcu_read_unlock();
+}
+
+/**
+ * i40iw_add_ipv4_addr - add ipv4 address to the hw arp table
+ * @iwdev: iwarp device
+ */
+static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev)
+{
+	struct net_device *dev;
+	struct in_device *idev;
+	bool got_lock = true;
+	u32 ip_addr;
+
+	if (!rtnl_trylock())
+		got_lock = false;
+
+	for_each_netdev(&init_net, dev) {
+		if ((((rdma_vlan_dev_vlan_id(dev) < 0xFFFF) &&
+		      (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
+		    (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
+			idev = in_dev_get(dev);
+			for_ifa(idev) {
+				i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
+					    "IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address,
+					     rdma_vlan_dev_vlan_id(dev), dev->dev_addr);
+
+				ip_addr = ntohl(ifa->ifa_address);
+				i40iw_manage_arp_cache(iwdev,
+						       dev->dev_addr,
+						       &ip_addr,
+						       true,
+						       I40IW_ARP_ADD);
+			}
+			endfor_ifa(idev);
+			in_dev_put(idev);
+		}
+	}
+	if (got_lock)
+		rtnl_unlock();
+}
+
+/**
+ * i40iw_add_mac_ip - add mac and ip addresses
+ * @iwdev: iwarp device
+ *
+ * Create and add a mac ip address entry to the hw table and
+ * ipv4/ipv6 addresses to the arp cache
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_add_mac_ip(struct i40iw_device *iwdev)
+{
+	struct net_device *netdev = iwdev->netdev;
+	enum i40iw_status_code status;
+
+	status = i40iw_alloc_set_mac_ipaddr(iwdev, (u8 *)netdev->dev_addr);
+	if (status)
+		return status;
+	i40iw_add_ipv4_addr(iwdev);
+	i40iw_add_ipv6_addr(iwdev);
+	return 0;
+}
+
+/**
+ * i40iw_wait_pe_ready - Check if firmware is ready
+ * @hw: provides access to registers
+ */
+static void i40iw_wait_pe_ready(struct i40iw_hw *hw)
+{
+	u32 statusfw;
+	u32 statuscpu0;
+	u32 statuscpu1;
+	u32 statuscpu2;
+	u32 retrycount = 0;
+
+	do {
+		statusfw = i40iw_rd32(hw, I40E_GLPE_FWLDSTATUS);
+		i40iw_pr_info("[%04d] fm load status[x%04X]\n", __LINE__, statusfw);
+		statuscpu0 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS0);
+		i40iw_pr_info("[%04d] CSR_CQP status[x%04X]\n", __LINE__, statuscpu0);
+		statuscpu1 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS1);
+		i40iw_pr_info("[%04d] I40E_GLPE_CPUSTATUS1 status[x%04X]\n",
+			      __LINE__, statuscpu1);
+		statuscpu2 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS2);
+		i40iw_pr_info("[%04d] I40E_GLPE_CPUSTATUS2 status[x%04X]\n",
+			      __LINE__, statuscpu2);
+		if ((statuscpu0 == 0x80) && (statuscpu1 == 0x80) && (statuscpu2 == 0x80))
+			break;	/* SUCCESS */
+		mdelay(1000);
+		retrycount++;
+	} while (retrycount < 14);
+	i40iw_wr32(hw, 0xb4040, 0x4C104C5);
+}
+
+/**
+ * i40iw_initialize_dev - initialize device
+ * @iwdev: iwarp device
+ * @ldev: lan device information
+ *
+ * Allocate memory for the hmc objects and initialize iwdev
+ * Return 0 if successful, otherwise clean up the resources
+ * and return error
+ */
+static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
+						   struct i40e_info *ldev)
+{
+	enum i40iw_status_code status;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_device_init_info info;
+	struct i40iw_dma_mem mem;
+	u32 size;
+
+	memset(&info, 0, sizeof(info));
+	size = sizeof(struct i40iw_hmc_pble_rsrc) + sizeof(struct i40iw_hmc_info) +
+				(sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX);
+	iwdev->hmc_info_mem = kzalloc(size, GFP_KERNEL);
+	if (!iwdev->hmc_info_mem) {
+		i40iw_pr_err("memory alloc fail\n");
+		return I40IW_ERR_NO_MEMORY;
+	}
+	iwdev->pble_rsrc = (struct i40iw_hmc_pble_rsrc *)iwdev->hmc_info_mem;
+	dev->hmc_info = &iwdev->hw.hmc;
+	dev->hmc_info->hmc_obj = (struct i40iw_hmc_obj_info *)(iwdev->pble_rsrc + 1);
+	status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_QUERY_FPM_BUF_SIZE,
+				       I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
+	if (status)
+		goto exit;
+	info.fpm_query_buf_pa = mem.pa;
+	info.fpm_query_buf = mem.va;
+	status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_COMMIT_FPM_BUF_SIZE,
+				       I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK);
+	if (status)
+		goto exit;
+	info.fpm_commit_buf_pa = mem.pa;
+	info.fpm_commit_buf = mem.va;
+	info.hmc_fn_id = ldev->fid;
+	info.is_pf = (ldev->ftype) ? false : true;
+	info.bar0 = ldev->hw_addr;
+	info.hw = &iwdev->hw;
+	info.debug_mask = debug;
+	info.qs_handle = ldev->params.qos.prio_qos[0].qs_handle;
+	info.exception_lan_queue = 1;
+	info.vchnl_send = i40iw_virtchnl_send;
+	status = i40iw_device_init(&iwdev->sc_dev, &info);
+exit:
+	if (status) {
+		kfree(iwdev->hmc_info_mem);
+		iwdev->hmc_info_mem = NULL;
+	}
+	return status;
+}
+
+/**
+ * i40iw_register_notifiers - register tcp ip notifiers
+ */
+static void i40iw_register_notifiers(void)
+{
+	if (!i40iw_notifiers_registered) {
+		register_inetaddr_notifier(&i40iw_inetaddr_notifier);
+		register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+		register_netevent_notifier(&i40iw_net_notifier);
+	}
+	i40iw_notifiers_registered++;
+}
+
+/**
+ * i40iw_save_msix_info - copy msix vector information to iwarp device
+ * @iwdev: iwarp device
+ * @ldev: lan device information
+ *
+ * Allocate iwdev msix table and copy the ldev msix info to the table
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
+						   struct i40e_info *ldev)
+{
+	struct i40e_qvlist_info *iw_qvlist;
+	struct i40e_qv_info *iw_qvinfo;
+	u32 ceq_idx;
+	u32 i;
+	u32 size;
+
+	iwdev->msix_count = ldev->msix_count;
+
+	size = sizeof(struct i40iw_msix_vector) * iwdev->msix_count;
+	size += sizeof(struct i40e_qvlist_info);
+	size +=  sizeof(struct i40e_qv_info) * iwdev->msix_count - 1;
+	iwdev->iw_msixtbl = kzalloc(size, GFP_KERNEL);
+
+	if (!iwdev->iw_msixtbl)
+		return I40IW_ERR_NO_MEMORY;
+	iwdev->iw_qvlist = (struct i40e_qvlist_info *)(&iwdev->iw_msixtbl[iwdev->msix_count]);
+	iw_qvlist = iwdev->iw_qvlist;
+	iw_qvinfo = iw_qvlist->qv_info;
+	iw_qvlist->num_vectors = iwdev->msix_count;
+	if (iwdev->msix_count <= num_online_cpus())
+		iwdev->msix_shared = true;
+	for (i = 0, ceq_idx = 0; i < iwdev->msix_count; i++, iw_qvinfo++) {
+		iwdev->iw_msixtbl[i].idx = ldev->msix_entries[i].entry;
+		iwdev->iw_msixtbl[i].irq = ldev->msix_entries[i].vector;
+		if (i == 0) {
+			iw_qvinfo->aeq_idx = 0;
+			if (iwdev->msix_shared)
+				iw_qvinfo->ceq_idx = ceq_idx++;
+			else
+				iw_qvinfo->ceq_idx = I40E_QUEUE_INVALID_IDX;
+		} else {
+			iw_qvinfo->aeq_idx = I40E_QUEUE_INVALID_IDX;
+			iw_qvinfo->ceq_idx = ceq_idx++;
+		}
+		iw_qvinfo->itr_idx = 3;
+		iw_qvinfo->v_idx = iwdev->iw_msixtbl[i].idx;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_deinit_device - clean up the device resources
+ * @iwdev: iwarp device
+ * @reset: true if called before reset
+ * @del_hdl: true if delete hdl entry
+ *
+ * Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses,
+ * destroy the device queues and free the pble and the hmc objects
+ */
+static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del_hdl)
+{
+	struct i40e_info *ldev = iwdev->ldev;
+
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+
+	i40iw_pr_info("state = %d\n", iwdev->init_state);
+
+	switch (iwdev->init_state) {
+	case RDMA_DEV_REGISTERED:
+		iwdev->iw_status = 0;
+		i40iw_port_ibevent(iwdev);
+		i40iw_destroy_rdma_device(iwdev->iwibdev);
+		/* fallthrough */
+	case IP_ADDR_REGISTERED:
+		if (!reset)
+			i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
+		/* fallthrough */
+	case INET_NOTIFIER:
+		if (i40iw_notifiers_registered > 0) {
+			i40iw_notifiers_registered--;
+			unregister_netevent_notifier(&i40iw_net_notifier);
+			unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
+			unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
+		}
+		/* fallthrough */
+	case CEQ_CREATED:
+		i40iw_dele_ceqs(iwdev, reset);
+		/* fallthrough */
+	case AEQ_CREATED:
+		i40iw_destroy_aeq(iwdev, reset);
+		/* fallthrough */
+	case IEQ_CREATED:
+		i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
+		/* fallthrough */
+	case ILQ_CREATED:
+		i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
+		/* fallthrough */
+	case CCQ_CREATED:
+		i40iw_destroy_ccq(iwdev, reset);
+		/* fallthrough */
+	case PBLE_CHUNK_MEM:
+		i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
+		/* fallthrough */
+	case HMC_OBJS_CREATED:
+		i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset);
+		/* fallthrough */
+	case CQP_CREATED:
+		i40iw_destroy_cqp(iwdev, !reset);
+		/* fallthrough */
+	case INITIAL_STATE:
+		i40iw_cleanup_cm_core(&iwdev->cm_core);
+		if (dev->is_pf)
+			i40iw_hw_stats_del_timer(dev);
+
+		i40iw_del_init_mem(iwdev);
+		break;
+	case INVALID_STATE:
+		/* fallthrough */
+	default:
+		i40iw_pr_err("bad init_state = %d\n", iwdev->init_state);
+		break;
+	}
+
+	if (del_hdl)
+		i40iw_del_handler(i40iw_find_i40e_handler(ldev));
+	kfree(iwdev->hdl);
+}
+
+/**
+ * i40iw_setup_init_state - set up the initial device struct
+ * @hdl: handler for iwarp device - one per instance
+ * @ldev: lan device information
+ * @client: iwarp client information, provided during registration
+ *
+ * Initialize the iwarp device and its hdl information
+ * using the ldev and client information
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
+						     struct i40e_info *ldev,
+						     struct i40e_client *client)
+{
+	struct i40iw_device *iwdev = &hdl->device;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+
+	memcpy(&hdl->ldev, ldev, sizeof(*ldev));
+	if (resource_profile == 1)
+		resource_profile = 2;
+
+	iwdev->mpa_version = mpa_version;
+	iwdev->resource_profile = (resource_profile < I40IW_HMC_PROFILE_EQUAL) ?
+	    (u8)resource_profile + I40IW_HMC_PROFILE_DEFAULT :
+	    I40IW_HMC_PROFILE_DEFAULT;
+	iwdev->max_rdma_vfs =
+		(iwdev->resource_profile != I40IW_HMC_PROFILE_DEFAULT) ?  max_rdma_vfs : 0;
+	iwdev->netdev = ldev->netdev;
+	hdl->client = client;
+	iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS;
+	if (!ldev->ftype)
+		iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_DB_ADDR_OFFSET;
+	else
+		iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_VF_DB_ADDR_OFFSET;
+
+	status = i40iw_save_msix_info(iwdev, ldev);
+	if (status)
+		goto exit;
+	iwdev->hw.dev_context = (void *)ldev->pcidev;
+	iwdev->hw.hw_addr = ldev->hw_addr;
+	status = i40iw_allocate_dma_mem(&iwdev->hw,
+					&iwdev->obj_mem, 8192, 4096);
+	if (status)
+		goto exit;
+	iwdev->obj_next = iwdev->obj_mem;
+	iwdev->push_mode = push_mode;
+	init_waitqueue_head(&iwdev->vchnl_waitq);
+	status = i40iw_initialize_dev(iwdev, ldev);
+exit:
+	if (status) {
+		kfree(iwdev->iw_msixtbl);
+		i40iw_free_dma_mem(dev->hw, &iwdev->obj_mem);
+		iwdev->iw_msixtbl = NULL;
+	}
+	return status;
+}
+
+/**
+ * i40iw_open - client interface operation open for iwarp/uda device
+ * @ldev: lan device information
+ * @client: iwarp client information, provided during registration
+ *
+ * Called by the lan driver during the processing of client register
+ * Create device resources, set up queues, pble and hmc objects and
+ * register the device with the ib verbs interface
+ * Return 0 if successful, otherwise return error
+ */
+static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
+{
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_dev *dev;
+	enum i40iw_status_code status;
+	struct i40iw_handler *hdl;
+
+	hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
+	if (!hdl)
+		return -ENOMEM;
+	iwdev = &hdl->device;
+	iwdev->hdl = hdl;
+	dev = &iwdev->sc_dev;
+	i40iw_setup_cm_core(iwdev);
+
+	dev->back_dev = (void *)iwdev;
+	iwdev->ldev = &hdl->ldev;
+	iwdev->client = client;
+	mutex_init(&iwdev->pbl_mutex);
+	i40iw_add_handler(hdl);
+
+	do {
+		status = i40iw_setup_init_state(hdl, ldev, client);
+		if (status)
+			break;
+		iwdev->init_state = INITIAL_STATE;
+		if (dev->is_pf)
+			i40iw_wait_pe_ready(dev->hw);
+		status = i40iw_create_cqp(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = CQP_CREATED;
+		status = i40iw_hmc_setup(iwdev);
+		if (status)
+			break;
+		status = i40iw_create_ccq(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = CCQ_CREATED;
+		status = i40iw_initialize_ilq(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = ILQ_CREATED;
+		status = i40iw_initialize_ieq(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = IEQ_CREATED;
+		status = i40iw_setup_aeq(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = AEQ_CREATED;
+		status = i40iw_setup_ceqs(iwdev, ldev);
+		if (status)
+			break;
+		iwdev->init_state = CEQ_CREATED;
+		status = i40iw_initialize_hw_resources(iwdev);
+		if (status)
+			break;
+		dev->ccq_ops->ccq_arm(dev->ccq);
+		status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc);
+		if (status)
+			break;
+		iwdev->virtchnl_wq = create_singlethread_workqueue("iwvch");
+		i40iw_register_notifiers();
+		iwdev->init_state = INET_NOTIFIER;
+		status = i40iw_add_mac_ip(iwdev);
+		if (status)
+			break;
+		iwdev->init_state = IP_ADDR_REGISTERED;
+		if (i40iw_register_rdma_device(iwdev)) {
+			i40iw_pr_err("register rdma device fail\n");
+			break;
+		};
+
+		iwdev->init_state = RDMA_DEV_REGISTERED;
+		iwdev->iw_status = 1;
+		i40iw_port_ibevent(iwdev);
+		i40iw_pr_info("i40iw_open completed\n");
+		return 0;
+	} while (0);
+
+	i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state);
+	i40iw_deinit_device(iwdev, false, false);
+	return -ERESTART;
+}
+
+/**
+ * i40iw_l2param_change : handle qs handles for qos and mss change
+ * @ldev: lan device information
+ * @client: client for paramater change
+ * @params: new parameters from L2
+ */
+static void i40iw_l2param_change(struct i40e_info *ldev,
+				 struct i40e_client *client,
+				 struct i40e_params *params)
+{
+	struct i40iw_handler *hdl;
+	struct i40iw_device *iwdev;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return;
+
+	iwdev = &hdl->device;
+	if (params->mtu)
+		iwdev->mss = params->mtu - I40IW_MTU_TO_MSS;
+}
+
+/**
+ * i40iw_close - client interface operation close for iwarp/uda device
+ * @ldev: lan device information
+ * @client: client to close
+ *
+ * Called by the lan driver during the processing of client unregister
+ * Destroy and clean up the driver resources
+ */
+static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool reset)
+{
+	struct i40iw_device *iwdev;
+	struct i40iw_handler *hdl;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return;
+
+	iwdev = &hdl->device;
+	destroy_workqueue(iwdev->virtchnl_wq);
+	i40iw_deinit_device(iwdev, reset, true);
+}
+
+/**
+ * i40iw_vf_reset - process VF reset
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @vf_id: virtual function id
+ *
+ * Called when a VF is reset by the PF
+ * Destroy and clean up the VF resources
+ */
+static void i40iw_vf_reset(struct i40e_info *ldev, struct i40e_client *client, u32 vf_id)
+{
+	struct i40iw_handler *hdl;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_hmc_fcn_info hmc_fcn_info;
+	struct i40iw_virt_mem vf_dev_mem;
+	struct i40iw_vfdev *tmp_vfdev;
+	unsigned int i;
+	unsigned long flags;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return;
+
+	dev = &hdl->device.sc_dev;
+
+	for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) {
+		if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id))
+			continue;
+
+		/* free all resources allocated on behalf of vf */
+		tmp_vfdev = dev->vf_dev[i];
+		spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
+		dev->vf_dev[i] = NULL;
+		spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+		i40iw_del_hmc_objects(dev, &tmp_vfdev->hmc_info, false, false);
+		/* remove vf hmc function */
+		memset(&hmc_fcn_info, 0, sizeof(hmc_fcn_info));
+		hmc_fcn_info.vf_id = vf_id;
+		hmc_fcn_info.iw_vf_idx = tmp_vfdev->iw_vf_idx;
+		hmc_fcn_info.free_fcn = true;
+		i40iw_cqp_manage_hmc_fcn_cmd(dev, &hmc_fcn_info);
+		/* free vf_dev */
+		vf_dev_mem.va = tmp_vfdev;
+		vf_dev_mem.size = sizeof(struct i40iw_vfdev) +
+					sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX;
+		i40iw_free_virt_mem(dev->hw, &vf_dev_mem);
+		break;
+	}
+}
+
+/**
+ * i40iw_vf_enable - enable a number of VFs
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @num_vfs: number of VFs for the PF
+ *
+ * Called when the number of VFs changes
+ */
+static void i40iw_vf_enable(struct i40e_info *ldev,
+			    struct i40e_client *client,
+			    u32 num_vfs)
+{
+	struct i40iw_handler *hdl;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return;
+
+	if (num_vfs > I40IW_MAX_PE_ENABLED_VF_COUNT)
+		hdl->device.max_enabled_vfs = I40IW_MAX_PE_ENABLED_VF_COUNT;
+	else
+		hdl->device.max_enabled_vfs = num_vfs;
+}
+
+/**
+ * i40iw_vf_capable - check if VF capable
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @vf_id: virtual function id
+ *
+ * Return 1 if a VF slot is available or if VF is already RDMA enabled
+ * Return 0 otherwise
+ */
+static int i40iw_vf_capable(struct i40e_info *ldev,
+			    struct i40e_client *client,
+			    u32 vf_id)
+{
+	struct i40iw_handler *hdl;
+	struct i40iw_sc_dev *dev;
+	unsigned int i;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return 0;
+
+	dev = &hdl->device.sc_dev;
+
+	for (i = 0; i < hdl->device.max_enabled_vfs; i++) {
+		if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id == vf_id))
+			return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_virtchnl_receive - receive a message through the virtual channel
+ * @ldev: lan device information
+ * @client: client interface instance
+ * @vf_id: virtual function id associated with the message
+ * @msg: message buffer pointer
+ * @len: length of the message
+ *
+ * Invoke virtual channel receive operation for the given msg
+ * Return 0 if successful, otherwise return error
+ */
+static int i40iw_virtchnl_receive(struct i40e_info *ldev,
+				  struct i40e_client *client,
+				  u32 vf_id,
+				  u8 *msg,
+				  u16 len)
+{
+	struct i40iw_handler *hdl;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_device *iwdev;
+	int ret_code = I40IW_NOT_SUPPORTED;
+
+	if (!len || !msg)
+		return I40IW_ERR_PARAM;
+
+	hdl = i40iw_find_i40e_handler(ldev);
+	if (!hdl)
+		return I40IW_ERR_PARAM;
+
+	dev = &hdl->device.sc_dev;
+	iwdev = dev->back_dev;
+
+	i40iw_debug(dev, I40IW_DEBUG_VIRT, "msg %p, message length %u\n", msg, len);
+
+	if (dev->vchnl_if.vchnl_recv) {
+		ret_code = dev->vchnl_if.vchnl_recv(dev, vf_id, msg, len);
+		if (!dev->is_pf) {
+			atomic_dec(&iwdev->vchnl_msgs);
+			wake_up(&iwdev->vchnl_waitq);
+		}
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_virtchnl_send - send a message through the virtual channel
+ * @dev: iwarp device
+ * @vf_id: virtual function id associated with the message
+ * @msg: virtual channel message buffer pointer
+ * @len: length of the message
+ *
+ * Invoke virtual channel send operation for the given msg
+ * Return 0 if successful, otherwise return error
+ */
+static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
+						  u32 vf_id,
+						  u8 *msg,
+						  u16 len)
+{
+	struct i40iw_device *iwdev;
+	struct i40e_info *ldev;
+	enum i40iw_status_code ret_code = I40IW_ERR_BAD_PTR;
+
+	if (!dev || !dev->back_dev)
+		return ret_code;
+
+	iwdev = dev->back_dev;
+	ldev = iwdev->ldev;
+
+	if (ldev && ldev->ops && ldev->ops->virtchnl_send)
+		ret_code = ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len);
+
+	return ret_code;
+}
+
+/* client interface functions */
+static struct i40e_client_ops i40e_ops = {
+	.open = i40iw_open,
+	.close = i40iw_close,
+	.l2_param_change = i40iw_l2param_change,
+	.virtchnl_receive = i40iw_virtchnl_receive,
+	.vf_reset = i40iw_vf_reset,
+	.vf_enable = i40iw_vf_enable,
+	.vf_capable = i40iw_vf_capable
+};
+
+/**
+ * i40iw_init_module - driver initialization function
+ *
+ * First function to call when the driver is loaded
+ * Register the driver as i40e client and port mapper client
+ */
+static int __init i40iw_init_module(void)
+{
+	int ret;
+
+	memset(&i40iw_client, 0, sizeof(i40iw_client));
+	i40iw_client.version.major = CLIENT_IW_INTERFACE_VERSION_MAJOR;
+	i40iw_client.version.minor = CLIENT_IW_INTERFACE_VERSION_MINOR;
+	i40iw_client.version.build = CLIENT_IW_INTERFACE_VERSION_BUILD;
+	i40iw_client.ops = &i40e_ops;
+	memcpy(i40iw_client.name, i40iw_client_name, I40E_CLIENT_STR_LENGTH);
+	i40iw_client.type = I40E_CLIENT_IWARP;
+	spin_lock_init(&i40iw_handler_lock);
+	ret = i40e_register_client(&i40iw_client);
+	return ret;
+}
+
+/**
+ * i40iw_exit_module - driver exit clean up function
+ *
+ * The function is called just before the driver is unloaded
+ * Unregister the driver as i40e client and port mapper client
+ */
+static void __exit i40iw_exit_module(void)
+{
+	i40e_unregister_client(&i40iw_client);
+}
+
+module_init(i40iw_init_module);
+module_exit(i40iw_exit_module);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
new file mode 100644
index 000000000000..7e20493510e8
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
@@ -0,0 +1,215 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_OSDEP_H
+#define I40IW_OSDEP_H
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <net/tcp.h>
+#include <crypto/hash.h>
+/* get readq/writeq support for 32 bit kernels, use the low-first version */
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#define STATS_TIMER_DELAY 1000
+
+static inline void set_64bit_val(u64 *wqe_words, u32 byte_index, u64 value)
+{
+	wqe_words[byte_index >> 3] = value;
+}
+
+/**
+ * set_32bit_val - set 32 value to hw wqe
+ * @wqe_words: wqe addr to write
+ * @byte_index: index in wqe
+ * @value: value to write
+ **/
+static inline void set_32bit_val(u32 *wqe_words, u32 byte_index, u32 value)
+{
+	wqe_words[byte_index >> 2] = value;
+}
+
+/**
+ * get_64bit_val - read 64 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to read from
+ * @value: read value
+ **/
+static inline void get_64bit_val(u64 *wqe_words, u32 byte_index, u64 *value)
+{
+	*value = wqe_words[byte_index >> 3];
+}
+
+/**
+ * get_32bit_val - read 32 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to reaad from
+ * @value: return 32 bit value
+ **/
+static inline void get_32bit_val(u32 *wqe_words, u32 byte_index, u32 *value)
+{
+	*value = wqe_words[byte_index >> 2];
+}
+
+struct i40iw_dma_mem {
+	void *va;
+	dma_addr_t pa;
+	u32 size;
+} __packed;
+
+struct i40iw_virt_mem {
+	void *va;
+	u32 size;
+} __packed;
+
+#define i40iw_debug(h, m, s, ...)                               \
+do {                                                            \
+	if (((m) & (h)->debug_mask))                            \
+		pr_info("i40iw " s, ##__VA_ARGS__);             \
+} while (0)
+
+#define i40iw_flush(a)          readl((a)->hw_addr + I40E_GLGEN_STAT)
+
+#define I40E_GLHMC_VFSDCMD(_i)  (0x000C8000 + ((_i) * 4)) \
+				/* _i=0...31 */
+#define I40E_GLHMC_VFSDCMD_MAX_INDEX    31
+#define I40E_GLHMC_VFSDCMD_PMSDIDX_SHIFT  0
+#define I40E_GLHMC_VFSDCMD_PMSDIDX_MASK  (0xFFF \
+					  << I40E_GLHMC_VFSDCMD_PMSDIDX_SHIFT)
+#define I40E_GLHMC_VFSDCMD_PF_SHIFT       16
+#define I40E_GLHMC_VFSDCMD_PF_MASK        (0xF << I40E_GLHMC_VFSDCMD_PF_SHIFT)
+#define I40E_GLHMC_VFSDCMD_VF_SHIFT       20
+#define I40E_GLHMC_VFSDCMD_VF_MASK        (0x1FF << I40E_GLHMC_VFSDCMD_VF_SHIFT)
+#define I40E_GLHMC_VFSDCMD_PMF_TYPE_SHIFT 29
+#define I40E_GLHMC_VFSDCMD_PMF_TYPE_MASK  (0x3 \
+					   << I40E_GLHMC_VFSDCMD_PMF_TYPE_SHIFT)
+#define I40E_GLHMC_VFSDCMD_PMSDWR_SHIFT   31
+#define I40E_GLHMC_VFSDCMD_PMSDWR_MASK  (0x1 << I40E_GLHMC_VFSDCMD_PMSDWR_SHIFT)
+
+#define I40E_GLHMC_VFSDDATAHIGH(_i)     (0x000C8200 + ((_i) * 4)) \
+				/* _i=0...31 */
+#define I40E_GLHMC_VFSDDATAHIGH_MAX_INDEX       31
+#define I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_SHIFT 0
+#define I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_MASK  (0xFFFFFFFF \
+			<< I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_SHIFT)
+
+#define I40E_GLHMC_VFSDDATALOW(_i)      (0x000C8100 + ((_i) * 4)) \
+				/* _i=0...31 */
+#define I40E_GLHMC_VFSDDATALOW_MAX_INDEX        31
+#define I40E_GLHMC_VFSDDATALOW_PMSDVALID_SHIFT   0
+#define I40E_GLHMC_VFSDDATALOW_PMSDVALID_MASK  (0x1 \
+			<< I40E_GLHMC_VFSDDATALOW_PMSDVALID_SHIFT)
+#define I40E_GLHMC_VFSDDATALOW_PMSDTYPE_SHIFT    1
+#define I40E_GLHMC_VFSDDATALOW_PMSDTYPE_MASK  (0x1 \
+			<< I40E_GLHMC_VFSDDATALOW_PMSDTYPE_SHIFT)
+#define I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_SHIFT 2
+#define I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_MASK  (0x3FF \
+			<< I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_SHIFT)
+#define I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_SHIFT 12
+#define I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_MASK  (0xFFFFF \
+			<< I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_SHIFT)
+
+#define I40E_GLPE_FWLDSTATUS                     0x0000D200
+#define I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_SHIFT 0
+#define I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_MASK  (0x1 \
+			<< I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_DONE_SHIFT           1
+#define I40E_GLPE_FWLDSTATUS_DONE_MASK  (0x1 << I40E_GLPE_FWLDSTATUS_DONE_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_CQP_FAIL_SHIFT       2
+#define I40E_GLPE_FWLDSTATUS_CQP_FAIL_MASK  (0x1 \
+			 << I40E_GLPE_FWLDSTATUS_CQP_FAIL_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_TEP_FAIL_SHIFT       3
+#define I40E_GLPE_FWLDSTATUS_TEP_FAIL_MASK  (0x1 \
+			 << I40E_GLPE_FWLDSTATUS_TEP_FAIL_SHIFT)
+#define I40E_GLPE_FWLDSTATUS_OOP_FAIL_SHIFT       4
+#define I40E_GLPE_FWLDSTATUS_OOP_FAIL_MASK  (0x1 \
+			 << I40E_GLPE_FWLDSTATUS_OOP_FAIL_SHIFT)
+
+struct i40iw_sc_dev;
+struct i40iw_sc_qp;
+struct i40iw_puda_buf;
+struct i40iw_puda_completion_info;
+struct i40iw_update_sds_info;
+struct i40iw_hmc_fcn_info;
+struct i40iw_virtchnl_work_info;
+struct i40iw_manage_vf_pble_info;
+struct i40iw_device;
+struct i40iw_hmc_info;
+struct i40iw_hw;
+
+u8 __iomem *i40iw_get_hw_addr(void *dev);
+void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev);
+enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc, void *addr,
+					      u32 length, u32 value);
+struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf);
+void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum);
+void i40iw_free_hash_desc(struct shash_desc *);
+enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **);
+enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
+						 struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
+					 struct i40iw_update_sds_info *info);
+enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev,
+						    struct i40iw_hmc_fcn_info *hmcfcninfo);
+enum i40iw_status_code i40iw_cqp_query_fpm_values_cmd(struct i40iw_sc_dev *dev,
+						      struct i40iw_dma_mem *values_mem,
+						      u8 hmc_fn_id);
+enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
+						       struct i40iw_dma_mem *values_mem,
+						       u8 hmc_fn_id);
+enum i40iw_status_code i40iw_alloc_query_fpm_buf(struct i40iw_sc_dev *dev,
+						 struct i40iw_dma_mem *mem);
+enum i40iw_status_code i40iw_cqp_manage_vf_pble_bp(struct i40iw_sc_dev *dev,
+						   struct i40iw_manage_vf_pble_info *info);
+void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
+			    struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx);
+void *i40iw_remove_head(struct list_head *list);
+
+void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len);
+void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred);
+void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp);
+void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp);
+
+enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
+						  struct i40iw_manage_vf_pble_info *info,
+						  bool wait);
+struct i40iw_dev_pestat;
+void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *);
+void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *);
+#define i40iw_mmiowb() mmiowb()
+void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value);
+u32  i40iw_rd32(struct i40iw_hw *hw, u32 reg);
+#endif				/* _I40IW_OSDEP_H_ */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h
new file mode 100644
index 000000000000..a0b8ca10d67e
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_p.h
@@ -0,0 +1,106 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_P_H
+#define I40IW_P_H
+
+#define PAUSE_TIMER_VALUE  0xFFFF
+#define REFRESH_THRESHOLD  0x7FFF
+#define HIGH_THRESHOLD     0x800
+#define LOW_THRESHOLD      0x200
+#define ALL_TC2PFC         0xFF
+
+void i40iw_debug_buf(struct i40iw_sc_dev *dev, enum i40iw_debug_flag mask,
+		     char *desc, u64 *buf, u32 size);
+/* init operations */
+enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
+					 struct i40iw_device_init_info *info);
+
+enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *);
+
+void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp);
+
+u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch);
+
+enum i40iw_status_code i40iw_sc_mr_fast_register(struct i40iw_sc_qp *qp,
+						 struct i40iw_fast_reg_stag_info *info,
+						 bool post_sq);
+
+/* HMC/FPM functions */
+enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev,
+					    u8 hmc_fn_id);
+
+enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev, u8 vf_hmc_fn_id,
+					   u32 *vf_cnt_array);
+
+/* cqp misc functions */
+
+void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp);
+
+void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info);
+
+void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info);
+
+enum i40iw_status_code i40iw_sc_suspend_qp(struct i40iw_sc_cqp *cqp,
+					   struct i40iw_sc_qp *qp, u64 scratch);
+
+enum i40iw_status_code i40iw_sc_resume_qp(struct i40iw_sc_cqp *cqp,
+					  struct i40iw_sc_qp *qp, u64 scratch);
+
+enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(struct i40iw_sc_cqp *cqp,
+							   u64 scratch, u8 hmc_fn_id,
+							   bool post_sq,
+							   bool poll_registers);
+
+enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count);
+
+void free_sd_mem(struct i40iw_sc_dev *dev);
+
+enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev,
+					     struct cqp_commands_info *pcmdinfo);
+
+enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev);
+
+/* prototype for functions used for dynamic memory allocation */
+enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
+					      struct i40iw_dma_mem *mem, u64 size,
+					      u32 alignment);
+void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem);
+enum i40iw_status_code i40iw_allocate_virt_mem(struct i40iw_hw *hw,
+					       struct i40iw_virt_mem *mem, u32 size);
+enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
+					   struct i40iw_virt_mem *mem);
+u8 i40iw_get_encoded_wqe_size(u32 wqsize, bool cqpsq);
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
new file mode 100644
index 000000000000..ded853d2fad8
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c
@@ -0,0 +1,618 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_status.h"
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_hmc.h"
+
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+
+#include <linux/pci.h>
+#include <linux/genalloc.h>
+#include <linux/vmalloc.h>
+#include "i40iw_pble.h"
+#include "i40iw.h"
+
+struct i40iw_device;
+static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_pble_rsrc *pble_rsrc);
+static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk);
+
+/**
+ * i40iw_destroy_pble_pool - destroy pool during module unload
+ * @pble_rsrc:	pble resources
+ */
+void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc)
+{
+	struct list_head *clist;
+	struct list_head *tlist;
+	struct i40iw_chunk *chunk;
+	struct i40iw_pble_pool *pinfo = &pble_rsrc->pinfo;
+
+	if (pinfo->pool) {
+		list_for_each_safe(clist, tlist, &pinfo->clist) {
+			chunk = list_entry(clist, struct i40iw_chunk, list);
+			if (chunk->type == I40IW_VMALLOC)
+				i40iw_free_vmalloc_mem(dev->hw, chunk);
+			kfree(chunk);
+		}
+		gen_pool_destroy(pinfo->pool);
+	}
+}
+
+/**
+ * i40iw_hmc_init_pble - Initialize pble resources during module load
+ * @dev: i40iw_sc_dev struct
+ * @pble_rsrc:	pble resources
+ */
+enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev,
+					   struct i40iw_hmc_pble_rsrc *pble_rsrc)
+{
+	struct i40iw_hmc_info *hmc_info;
+	u32 fpm_idx = 0;
+
+	hmc_info = dev->hmc_info;
+	pble_rsrc->fpm_base_addr = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].base;
+	/* Now start the pble' on 4k boundary */
+	if (pble_rsrc->fpm_base_addr & 0xfff)
+		fpm_idx = (PAGE_SIZE - (pble_rsrc->fpm_base_addr & 0xfff)) >> 3;
+
+	pble_rsrc->unallocated_pble =
+	    hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt - fpm_idx;
+	pble_rsrc->next_fpm_addr = pble_rsrc->fpm_base_addr + (fpm_idx << 3);
+
+	pble_rsrc->pinfo.pool_shift = POOL_SHIFT;
+	pble_rsrc->pinfo.pool = gen_pool_create(pble_rsrc->pinfo.pool_shift, -1);
+	INIT_LIST_HEAD(&pble_rsrc->pinfo.clist);
+	if (!pble_rsrc->pinfo.pool)
+		goto error;
+
+	if (add_pble_pool(dev, pble_rsrc))
+		goto error;
+
+	return 0;
+
+ error:i40iw_destroy_pble_pool(dev, pble_rsrc);
+	return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * get_sd_pd_idx -  Returns sd index, pd index and rel_pd_idx from fpm address
+ * @ pble_rsrc:	structure containing fpm address
+ * @ idx: where to return indexes
+ */
+static inline void get_sd_pd_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+				 struct sd_pd_idx *idx)
+{
+	idx->sd_idx = (u32)(pble_rsrc->next_fpm_addr) / I40IW_HMC_DIRECT_BP_SIZE;
+	idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr) / I40IW_HMC_PAGED_BP_SIZE;
+	idx->rel_pd_idx = (idx->pd_idx % I40IW_HMC_PD_CNT_IN_SD);
+}
+
+/**
+ * add_sd_direct - add sd direct for pble
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource ptr
+ * @info: page info for sd
+ */
+static enum i40iw_status_code add_sd_direct(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_pble_rsrc *pble_rsrc,
+					    struct i40iw_add_page_info *info)
+{
+	enum i40iw_status_code ret_code = 0;
+	struct sd_pd_idx *idx = &info->idx;
+	struct i40iw_chunk *chunk = info->chunk;
+	struct i40iw_hmc_info *hmc_info = info->hmc_info;
+	struct i40iw_hmc_sd_entry *sd_entry = info->sd_entry;
+	u32 offset = 0;
+
+	if (!sd_entry->valid) {
+		if (dev->is_pf) {
+			ret_code = i40iw_add_sd_table_entry(dev->hw, hmc_info,
+							    info->idx.sd_idx,
+							    I40IW_SD_TYPE_DIRECT,
+							    I40IW_HMC_DIRECT_BP_SIZE);
+			if (ret_code)
+				return ret_code;
+			chunk->type = I40IW_DMA_COHERENT;
+		}
+	}
+	offset = idx->rel_pd_idx << I40IW_HMC_PAGED_BP_SHIFT;
+	chunk->size = info->pages << I40IW_HMC_PAGED_BP_SHIFT;
+	chunk->vaddr = ((u8 *)sd_entry->u.bp.addr.va + offset);
+	chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+	i40iw_debug(dev, I40IW_DEBUG_PBLE, "chunk_size[%d] = 0x%x vaddr=%p fpm_addr = %llx\n",
+		    chunk->size, chunk->size, chunk->vaddr, chunk->fpm_addr);
+	return 0;
+}
+
+/**
+ * i40iw_free_vmalloc_mem - free vmalloc during close
+ * @hw: hw struct
+ * @chunk: chunk information for vmalloc
+ */
+static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk)
+{
+	struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+	int i;
+
+	if (!chunk->pg_cnt)
+		goto done;
+	for (i = 0; i < chunk->pg_cnt; i++)
+		dma_unmap_page(&pcidev->dev, chunk->dmaaddrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ done:
+	kfree(chunk->dmaaddrs);
+	chunk->dmaaddrs = NULL;
+	vfree(chunk->vaddr);
+	chunk->vaddr = NULL;
+	chunk->type = 0;
+}
+
+/**
+ * i40iw_get_vmalloc_mem - get 2M page for sd
+ * @hw: hardware address
+ * @chunk: chunk to adf
+ * @pg_cnt: #of 4 K pages
+ */
+static enum i40iw_status_code i40iw_get_vmalloc_mem(struct i40iw_hw *hw,
+						    struct i40iw_chunk *chunk,
+						    int pg_cnt)
+{
+	struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+	struct page *page;
+	u8 *addr;
+	u32 size;
+	int i;
+
+	chunk->dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL);
+	if (!chunk->dmaaddrs)
+		return I40IW_ERR_NO_MEMORY;
+	size = PAGE_SIZE * pg_cnt;
+	chunk->vaddr = vmalloc(size);
+	if (!chunk->vaddr) {
+		kfree(chunk->dmaaddrs);
+		chunk->dmaaddrs = NULL;
+		return I40IW_ERR_NO_MEMORY;
+	}
+	chunk->size = size;
+	addr = (u8 *)chunk->vaddr;
+	for (i = 0; i < pg_cnt; i++) {
+		page = vmalloc_to_page((void *)addr);
+		if (!page)
+			break;
+		chunk->dmaaddrs[i] = dma_map_page(&pcidev->dev, page, 0,
+						  PAGE_SIZE, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(&pcidev->dev, chunk->dmaaddrs[i]))
+			break;
+		addr += PAGE_SIZE;
+	}
+
+	chunk->pg_cnt = i;
+	chunk->type = I40IW_VMALLOC;
+	if (i == pg_cnt)
+		return 0;
+
+	i40iw_free_vmalloc_mem(hw, chunk);
+	return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * fpm_to_idx - given fpm address, get pble index
+ * @pble_rsrc: pble resource management
+ * @addr: fpm address for index
+ */
+static inline u32 fpm_to_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc, u64 addr)
+{
+	return (addr - (pble_rsrc->fpm_base_addr)) >> 3;
+}
+
+/**
+ * add_bp_pages - add backing pages for sd
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource management
+ * @info: page info for sd
+ */
+static enum i40iw_status_code add_bp_pages(struct i40iw_sc_dev *dev,
+					   struct i40iw_hmc_pble_rsrc *pble_rsrc,
+					   struct i40iw_add_page_info *info)
+{
+	u8 *addr;
+	struct i40iw_dma_mem mem;
+	struct i40iw_hmc_pd_entry *pd_entry;
+	struct i40iw_hmc_sd_entry *sd_entry = info->sd_entry;
+	struct i40iw_hmc_info *hmc_info = info->hmc_info;
+	struct i40iw_chunk *chunk = info->chunk;
+	struct i40iw_manage_vf_pble_info vf_pble_info;
+	enum i40iw_status_code status = 0;
+	u32 rel_pd_idx = info->idx.rel_pd_idx;
+	u32 pd_idx = info->idx.pd_idx;
+	u32 i;
+
+	status = i40iw_get_vmalloc_mem(dev->hw, chunk, info->pages);
+	if (status)
+		return I40IW_ERR_NO_MEMORY;
+	status = i40iw_add_sd_table_entry(dev->hw, hmc_info,
+					  info->idx.sd_idx, I40IW_SD_TYPE_PAGED,
+					  I40IW_HMC_DIRECT_BP_SIZE);
+	if (status) {
+		i40iw_free_vmalloc_mem(dev->hw, chunk);
+		return status;
+	}
+	if (!dev->is_pf) {
+		status = i40iw_vchnl_vf_add_hmc_objs(dev, I40IW_HMC_IW_PBLE,
+						     fpm_to_idx(pble_rsrc,
+								pble_rsrc->next_fpm_addr),
+						     (info->pages << PBLE_512_SHIFT));
+		if (status) {
+			i40iw_pr_err("allocate PBLEs in the PF.  Error %i\n", status);
+			i40iw_free_vmalloc_mem(dev->hw, chunk);
+			return status;
+		}
+	}
+	addr = chunk->vaddr;
+	for (i = 0; i < info->pages; i++) {
+		mem.pa = chunk->dmaaddrs[i];
+		mem.size = PAGE_SIZE;
+		mem.va = (void *)(addr);
+		pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx++];
+		if (!pd_entry->valid) {
+			status = i40iw_add_pd_table_entry(dev->hw, hmc_info, pd_idx++, &mem);
+			if (status)
+				goto error;
+			addr += PAGE_SIZE;
+		} else {
+			i40iw_pr_err("pd entry is valid expecting to be invalid\n");
+		}
+	}
+	if (!dev->is_pf) {
+		vf_pble_info.first_pd_index = info->idx.rel_pd_idx;
+		vf_pble_info.inv_pd_ent = false;
+		vf_pble_info.pd_entry_cnt = PBLE_PER_PAGE;
+		vf_pble_info.pd_pl_pba = sd_entry->u.pd_table.pd_page_addr.pa;
+		vf_pble_info.sd_index = info->idx.sd_idx;
+		status = i40iw_hw_manage_vf_pble_bp(dev->back_dev,
+						    &vf_pble_info, true);
+		if (status) {
+			i40iw_pr_err("CQP manage VF PBLE BP failed.  %i\n", status);
+			goto error;
+		}
+	}
+	chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+	return 0;
+error:
+	i40iw_free_vmalloc_mem(dev->hw, chunk);
+	return status;
+}
+
+/**
+ * add_pble_pool - add a sd entry for pble resoure
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource management
+ */
+static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_pble_rsrc *pble_rsrc)
+{
+	struct i40iw_hmc_sd_entry *sd_entry;
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_chunk *chunk;
+	struct i40iw_add_page_info info;
+	struct sd_pd_idx *idx = &info.idx;
+	enum i40iw_status_code ret_code = 0;
+	enum i40iw_sd_entry_type sd_entry_type;
+	u64 sd_reg_val = 0;
+	u32 pages;
+
+	if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE)
+		return I40IW_ERR_NO_MEMORY;
+	if (pble_rsrc->next_fpm_addr & 0xfff) {
+		i40iw_pr_err("next fpm_addr %llx\n", pble_rsrc->next_fpm_addr);
+		return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
+	}
+	chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
+	if (!chunk)
+		return I40IW_ERR_NO_MEMORY;
+	hmc_info = dev->hmc_info;
+	chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+	get_sd_pd_idx(pble_rsrc, idx);
+	sd_entry = &hmc_info->sd_table.sd_entry[idx->sd_idx];
+	pages = (idx->rel_pd_idx) ? (I40IW_HMC_PD_CNT_IN_SD -
+			idx->rel_pd_idx) : I40IW_HMC_PD_CNT_IN_SD;
+	pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT);
+	if (!pages) {
+		ret_code = I40IW_ERR_NO_PBLCHUNKS_AVAILABLE;
+		goto error;
+	}
+	info.chunk = chunk;
+	info.hmc_info = hmc_info;
+	info.pages = pages;
+	info.sd_entry = sd_entry;
+	if (!sd_entry->valid) {
+		sd_entry_type = (!idx->rel_pd_idx &&
+				 (pages == I40IW_HMC_PD_CNT_IN_SD) &&
+				 dev->is_pf) ? I40IW_SD_TYPE_DIRECT : I40IW_SD_TYPE_PAGED;
+	} else {
+		sd_entry_type = sd_entry->entry_type;
+	}
+	i40iw_debug(dev, I40IW_DEBUG_PBLE,
+		    "pages = %d, unallocated_pble[%u] current_fpm_addr = %llx\n",
+		    pages, pble_rsrc->unallocated_pble, pble_rsrc->next_fpm_addr);
+	i40iw_debug(dev, I40IW_DEBUG_PBLE, "sd_entry_type = %d sd_entry valid = %d\n",
+		    sd_entry_type, sd_entry->valid);
+
+	if (sd_entry_type == I40IW_SD_TYPE_DIRECT)
+		ret_code = add_sd_direct(dev, pble_rsrc, &info);
+	if (ret_code)
+		sd_entry_type = I40IW_SD_TYPE_PAGED;
+	else
+		pble_rsrc->stats_direct_sds++;
+
+	if (sd_entry_type == I40IW_SD_TYPE_PAGED) {
+		ret_code = add_bp_pages(dev, pble_rsrc, &info);
+		if (ret_code)
+			goto error;
+		else
+			pble_rsrc->stats_paged_sds++;
+	}
+
+	if (gen_pool_add_virt(pble_rsrc->pinfo.pool, (unsigned long)chunk->vaddr,
+			      (phys_addr_t)chunk->fpm_addr, chunk->size, -1)) {
+		i40iw_pr_err("could not allocate memory by gen_pool_addr_virt()\n");
+		ret_code = I40IW_ERR_NO_MEMORY;
+		goto error;
+	}
+	pble_rsrc->next_fpm_addr += chunk->size;
+	i40iw_debug(dev, I40IW_DEBUG_PBLE, "next_fpm_addr = %llx chunk_size[%u] = 0x%x\n",
+		    pble_rsrc->next_fpm_addr, chunk->size, chunk->size);
+	pble_rsrc->unallocated_pble -= (chunk->size >> 3);
+	list_add(&chunk->list, &pble_rsrc->pinfo.clist);
+	sd_reg_val = (sd_entry_type == I40IW_SD_TYPE_PAGED) ?
+			sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa;
+	if (sd_entry->valid)
+		return 0;
+	if (dev->is_pf)
+		ret_code = i40iw_hmc_sd_one(dev, hmc_info->hmc_fn_id,
+					    sd_reg_val, idx->sd_idx,
+					    sd_entry->entry_type, true);
+	if (ret_code) {
+		i40iw_pr_err("cqp cmd failed for sd (pbles)\n");
+		goto error;
+	}
+
+	sd_entry->valid = true;
+	return 0;
+ error:
+	kfree(chunk);
+	return ret_code;
+}
+
+/**
+ * free_lvl2 - fee level 2 pble
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ */
+static void free_lvl2(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+		      struct i40iw_pble_alloc *palloc)
+{
+	u32 i;
+	struct gen_pool *pool;
+	struct i40iw_pble_level2 *lvl2 = &palloc->level2;
+	struct i40iw_pble_info *root = &lvl2->root;
+	struct i40iw_pble_info *leaf = lvl2->leaf;
+
+	pool = pble_rsrc->pinfo.pool;
+
+	for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+		if (leaf->addr)
+			gen_pool_free(pool, leaf->addr, (leaf->cnt << 3));
+		else
+			break;
+	}
+
+	if (root->addr)
+		gen_pool_free(pool, root->addr, (root->cnt << 3));
+
+	kfree(lvl2->leaf);
+	lvl2->leaf = NULL;
+}
+
+/**
+ * get_lvl2_pble - get level 2 pble resource
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ * @pool: pool pointer
+ */
+static enum i40iw_status_code get_lvl2_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+					    struct i40iw_pble_alloc *palloc,
+					    struct gen_pool *pool)
+{
+	u32 lf4k, lflast, total, i;
+	u32 pblcnt = PBLE_PER_PAGE;
+	u64 *addr;
+	struct i40iw_pble_level2 *lvl2 = &palloc->level2;
+	struct i40iw_pble_info *root = &lvl2->root;
+	struct i40iw_pble_info *leaf;
+
+	/* number of full 512 (4K) leafs) */
+	lf4k = palloc->total_cnt >> 9;
+	lflast = palloc->total_cnt % PBLE_PER_PAGE;
+	total = (lflast == 0) ? lf4k : lf4k + 1;
+	lvl2->leaf_cnt = total;
+
+	leaf = kzalloc((sizeof(*leaf) * total), GFP_ATOMIC);
+	if (!leaf)
+		return I40IW_ERR_NO_MEMORY;
+	lvl2->leaf = leaf;
+	/* allocate pbles for the root */
+	root->addr = gen_pool_alloc(pool, (total << 3));
+	if (!root->addr) {
+		kfree(lvl2->leaf);
+		lvl2->leaf = NULL;
+		return I40IW_ERR_NO_MEMORY;
+	}
+	root->idx = fpm_to_idx(pble_rsrc,
+			       (u64)gen_pool_virt_to_phys(pool, root->addr));
+	root->cnt = total;
+	addr = (u64 *)root->addr;
+	for (i = 0; i < total; i++, leaf++) {
+		pblcnt = (lflast && ((i + 1) == total)) ? lflast : PBLE_PER_PAGE;
+		leaf->addr = gen_pool_alloc(pool, (pblcnt << 3));
+		if (!leaf->addr)
+			goto error;
+		leaf->idx = fpm_to_idx(pble_rsrc, (u64)gen_pool_virt_to_phys(pool, leaf->addr));
+
+		leaf->cnt = pblcnt;
+		*addr = (u64)leaf->idx;
+		addr++;
+	}
+	palloc->level = I40IW_LEVEL_2;
+	pble_rsrc->stats_lvl2++;
+	return 0;
+ error:
+	free_lvl2(pble_rsrc, palloc);
+	return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * get_lvl1_pble - get level 1 pble resource
+ * @dev: hardware control device structure
+ * @pble_rsrc: pble resource management
+ * @palloc: level 1 pble allocation
+ */
+static enum i40iw_status_code get_lvl1_pble(struct i40iw_sc_dev *dev,
+					    struct i40iw_hmc_pble_rsrc *pble_rsrc,
+					    struct i40iw_pble_alloc *palloc)
+{
+	u64 *addr;
+	struct gen_pool *pool;
+	struct i40iw_pble_info *lvl1 = &palloc->level1;
+
+	pool = pble_rsrc->pinfo.pool;
+	addr = (u64 *)gen_pool_alloc(pool, (palloc->total_cnt << 3));
+
+	if (!addr)
+		return I40IW_ERR_NO_MEMORY;
+
+	palloc->level = I40IW_LEVEL_1;
+	lvl1->addr = (unsigned long)addr;
+	lvl1->idx = fpm_to_idx(pble_rsrc, (u64)gen_pool_virt_to_phys(pool,
+			       (unsigned long)addr));
+	lvl1->cnt = palloc->total_cnt;
+	pble_rsrc->stats_lvl1++;
+	return 0;
+}
+
+/**
+ * get_lvl1_lvl2_pble - calls get_lvl1 and get_lvl2 pble routine
+ * @dev: i40iw_sc_dev struct
+ * @pble_rsrc:	pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @pool: pointer to general purpose special memory pool descriptor
+ */
+static inline enum i40iw_status_code get_lvl1_lvl2_pble(struct i40iw_sc_dev *dev,
+							struct i40iw_hmc_pble_rsrc *pble_rsrc,
+							struct i40iw_pble_alloc *palloc,
+							struct gen_pool *pool)
+{
+	enum i40iw_status_code status = 0;
+
+	status = get_lvl1_pble(dev, pble_rsrc, palloc);
+	if (status && (palloc->total_cnt > PBLE_PER_PAGE))
+		status = get_lvl2_pble(pble_rsrc, palloc, pool);
+	return status;
+}
+
+/**
+ * i40iw_get_pble - allocate pbles from the pool
+ * @dev: i40iw_sc_dev struct
+ * @pble_rsrc:	pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @pble_cnt: #of pbles requested
+ */
+enum i40iw_status_code i40iw_get_pble(struct i40iw_sc_dev *dev,
+				      struct i40iw_hmc_pble_rsrc *pble_rsrc,
+				      struct i40iw_pble_alloc *palloc,
+				      u32 pble_cnt)
+{
+	struct gen_pool *pool;
+	enum i40iw_status_code status = 0;
+	u32 max_sds = 0;
+	int i;
+
+	pool = pble_rsrc->pinfo.pool;
+	palloc->total_cnt = pble_cnt;
+	palloc->level = I40IW_LEVEL_0;
+	/*check first to see if we can get pble's without acquiring additional sd's */
+	status = get_lvl1_lvl2_pble(dev, pble_rsrc, palloc, pool);
+	if (!status)
+		goto exit;
+	max_sds = (palloc->total_cnt >> 18) + 1;
+	for (i = 0; i < max_sds; i++) {
+		status = add_pble_pool(dev, pble_rsrc);
+		if (status)
+			break;
+		status = get_lvl1_lvl2_pble(dev, pble_rsrc, palloc, pool);
+		if (!status)
+			break;
+	}
+exit:
+	if (!status)
+		pble_rsrc->stats_alloc_ok++;
+	else
+		pble_rsrc->stats_alloc_fail++;
+
+	return status;
+}
+
+/**
+ * i40iw_free_pble - put pbles back into pool
+ * @pble_rsrc:	pble resources
+ * @palloc: contains all inforamtion regarding pble resource being freed
+ */
+void i40iw_free_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc,
+		     struct i40iw_pble_alloc *palloc)
+{
+	struct gen_pool *pool;
+
+	pool = pble_rsrc->pinfo.pool;
+	if (palloc->level == I40IW_LEVEL_2)
+		free_lvl2(pble_rsrc, palloc);
+	else
+		gen_pool_free(pool, palloc->level1.addr,
+			      (palloc->level1.cnt << 3));
+	pble_rsrc->stats_alloc_freed++;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.h b/drivers/infiniband/hw/i40iw/i40iw_pble.h
new file mode 100644
index 000000000000..7b1851d21cc0
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.h
@@ -0,0 +1,131 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_PBLE_H
+#define I40IW_PBLE_H
+
+#define POOL_SHIFT      6
+#define PBLE_PER_PAGE   512
+#define I40IW_HMC_PAGED_BP_SHIFT 12
+#define PBLE_512_SHIFT  9
+
+enum i40iw_pble_level {
+	I40IW_LEVEL_0 = 0,
+	I40IW_LEVEL_1 = 1,
+	I40IW_LEVEL_2 = 2
+};
+
+enum i40iw_alloc_type {
+	I40IW_NO_ALLOC = 0,
+	I40IW_DMA_COHERENT = 1,
+	I40IW_VMALLOC = 2
+};
+
+struct i40iw_pble_info {
+	unsigned long addr;
+	u32 idx;
+	u32 cnt;
+};
+
+struct i40iw_pble_level2 {
+	struct i40iw_pble_info root;
+	struct i40iw_pble_info *leaf;
+	u32 leaf_cnt;
+};
+
+struct i40iw_pble_alloc {
+	u32 total_cnt;
+	enum i40iw_pble_level level;
+	union {
+		struct i40iw_pble_info level1;
+		struct i40iw_pble_level2 level2;
+	};
+};
+
+struct sd_pd_idx {
+	u32 sd_idx;
+	u32 pd_idx;
+	u32 rel_pd_idx;
+};
+
+struct i40iw_add_page_info {
+	struct i40iw_chunk *chunk;
+	struct i40iw_hmc_sd_entry *sd_entry;
+	struct i40iw_hmc_info *hmc_info;
+	struct sd_pd_idx idx;
+	u32 pages;
+};
+
+struct i40iw_chunk {
+	struct list_head list;
+	u32 size;
+	void *vaddr;
+	u64 fpm_addr;
+	u32 pg_cnt;
+	dma_addr_t *dmaaddrs;
+	enum i40iw_alloc_type type;
+};
+
+struct i40iw_pble_pool {
+	struct gen_pool *pool;
+	struct list_head clist;
+	u32 total_pble_alloc;
+	u32 free_pble_cnt;
+	u32 pool_shift;
+};
+
+struct i40iw_hmc_pble_rsrc {
+	u32 unallocated_pble;
+	u64 fpm_base_addr;
+	u64 next_fpm_addr;
+	struct i40iw_pble_pool pinfo;
+
+	u32 stats_direct_sds;
+	u32 stats_paged_sds;
+	u64 stats_alloc_ok;
+	u64 stats_alloc_fail;
+	u64 stats_alloc_freed;
+	u64 stats_lvl1;
+	u64 stats_lvl2;
+};
+
+void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc);
+enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev,
+					   struct i40iw_hmc_pble_rsrc *pble_rsrc);
+void i40iw_free_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc, struct i40iw_pble_alloc *palloc);
+enum i40iw_status_code i40iw_get_pble(struct i40iw_sc_dev *dev,
+				      struct i40iw_hmc_pble_rsrc *pble_rsrc,
+				      struct i40iw_pble_alloc *palloc,
+				      u32 pble_cnt);
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
new file mode 100644
index 000000000000..8eb400d8a7a0
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -0,0 +1,1436 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_puda.h"
+
+static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+			      struct i40iw_puda_buf *buf);
+static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid);
+static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx);
+static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc
+						      *rsrc, bool initial);
+/**
+ * i40iw_puda_get_listbuf - get buffer from puda list
+ * @list: list to use for buffers (ILQ or IEQ)
+ */
+static struct i40iw_puda_buf *i40iw_puda_get_listbuf(struct list_head *list)
+{
+	struct i40iw_puda_buf *buf = NULL;
+
+	if (!list_empty(list)) {
+		buf = (struct i40iw_puda_buf *)list->next;
+		list_del((struct list_head *)&buf->list);
+	}
+	return buf;
+}
+
+/**
+ * i40iw_puda_get_bufpool - return buffer from resource
+ * @rsrc: resource to use for buffer
+ */
+struct i40iw_puda_buf *i40iw_puda_get_bufpool(struct i40iw_puda_rsrc *rsrc)
+{
+	struct i40iw_puda_buf *buf = NULL;
+	struct list_head *list = &rsrc->bufpool;
+	unsigned long	flags;
+
+	spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+	buf = i40iw_puda_get_listbuf(list);
+	if (buf)
+		rsrc->avail_buf_count--;
+	else
+		rsrc->stats_buf_alloc_fail++;
+	spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+	return buf;
+}
+
+/**
+ * i40iw_puda_ret_bufpool - return buffer to rsrc list
+ * @rsrc: resource to use for buffer
+ * @buf: buffe to return to resouce
+ */
+void i40iw_puda_ret_bufpool(struct i40iw_puda_rsrc *rsrc,
+			    struct i40iw_puda_buf *buf)
+{
+	unsigned long	flags;
+
+	spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+	list_add(&buf->list, &rsrc->bufpool);
+	spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+	rsrc->avail_buf_count++;
+}
+
+/**
+ * i40iw_puda_post_recvbuf - set wqe for rcv buffer
+ * @rsrc: resource ptr
+ * @wqe_idx: wqe index to use
+ * @buf: puda buffer for rcv q
+ * @initial: flag if during init time
+ */
+static void i40iw_puda_post_recvbuf(struct i40iw_puda_rsrc *rsrc, u32 wqe_idx,
+				    struct i40iw_puda_buf *buf, bool initial)
+{
+	u64 *wqe;
+	struct i40iw_sc_qp *qp = &rsrc->qp;
+	u64 offset24 = 0;
+
+	qp->qp_uk.rq_wrid_array[wqe_idx] = (uintptr_t)buf;
+	wqe = qp->qp_uk.rq_base[wqe_idx].elem;
+	i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+		    "%s: wqe_idx= %d buf = %p wqe = %p\n", __func__,
+		    wqe_idx, buf, wqe);
+	if (!initial)
+		get_64bit_val(wqe, 24, &offset24);
+
+	offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID);
+	set_64bit_val(wqe, 24, offset24);
+
+	set_64bit_val(wqe, 0, buf->mem.pa);
+	set_64bit_val(wqe, 8,
+		      LS_64(buf->mem.size, I40IWQPSQ_FRAG_LEN));
+	set_64bit_val(wqe, 24, offset24);
+}
+
+/**
+ * i40iw_puda_replenish_rq - post rcv buffers
+ * @rsrc: resource to use for buffer
+ * @initial: flag if during init time
+ */
+static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc *rsrc,
+						      bool initial)
+{
+	u32 i;
+	u32 invalid_cnt = rsrc->rxq_invalid_cnt;
+	struct i40iw_puda_buf *buf = NULL;
+
+	for (i = 0; i < invalid_cnt; i++) {
+		buf = i40iw_puda_get_bufpool(rsrc);
+		if (!buf)
+			return I40IW_ERR_list_empty;
+		i40iw_puda_post_recvbuf(rsrc, rsrc->rx_wqe_idx, buf,
+					initial);
+		rsrc->rx_wqe_idx =
+		    ((rsrc->rx_wqe_idx + 1) % rsrc->rq_size);
+		rsrc->rxq_invalid_cnt--;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_puda_alloc_buf - allocate mem for buffer
+ * @dev: iwarp device
+ * @length: length of buffer
+ */
+static struct i40iw_puda_buf *i40iw_puda_alloc_buf(struct i40iw_sc_dev *dev,
+						   u32 length)
+{
+	struct i40iw_puda_buf *buf = NULL;
+	struct i40iw_virt_mem buf_mem;
+	enum i40iw_status_code ret;
+
+	ret = i40iw_allocate_virt_mem(dev->hw, &buf_mem,
+				      sizeof(struct i40iw_puda_buf));
+	if (ret) {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA,
+			    "%s: error mem for buf\n", __func__);
+		return NULL;
+	}
+	buf = (struct i40iw_puda_buf *)buf_mem.va;
+	ret = i40iw_allocate_dma_mem(dev->hw, &buf->mem, length, 1);
+	if (ret) {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA,
+			    "%s: error dma mem for buf\n", __func__);
+		i40iw_free_virt_mem(dev->hw, &buf_mem);
+		return NULL;
+	}
+	buf->buf_mem.va = buf_mem.va;
+	buf->buf_mem.size = buf_mem.size;
+	return buf;
+}
+
+/**
+ * i40iw_puda_dele_buf - delete buffer back to system
+ * @dev: iwarp device
+ * @buf: buffer to free
+ */
+static void i40iw_puda_dele_buf(struct i40iw_sc_dev *dev,
+				struct i40iw_puda_buf *buf)
+{
+	i40iw_free_dma_mem(dev->hw, &buf->mem);
+	i40iw_free_virt_mem(dev->hw, &buf->buf_mem);
+}
+
+/**
+ * i40iw_puda_get_next_send_wqe - return next wqe for processing
+ * @qp: puda qp for wqe
+ * @wqe_idx: wqe index for caller
+ */
+static u64 *i40iw_puda_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx)
+{
+	u64 *wqe = NULL;
+	enum i40iw_status_code ret_code = 0;
+
+	*wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+	if (!*wqe_idx)
+		qp->swqe_polarity = !qp->swqe_polarity;
+	I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+	if (ret_code)
+		return wqe;
+	wqe = qp->sq_base[*wqe_idx].elem;
+
+	return wqe;
+}
+
+/**
+ * i40iw_puda_poll_info - poll cq for completion
+ * @cq: cq for poll
+ * @info: info return for successful completion
+ */
+static enum i40iw_status_code i40iw_puda_poll_info(struct i40iw_sc_cq *cq,
+						   struct i40iw_puda_completion_info *info)
+{
+	u64 qword0, qword2, qword3;
+	u64 *cqe;
+	u64 comp_ctx;
+	bool valid_bit;
+	u32 major_err, minor_err;
+	bool error;
+
+	cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(&cq->cq_uk);
+	get_64bit_val(cqe, 24, &qword3);
+	valid_bit = (bool)RS_64(qword3, I40IW_CQ_VALID);
+
+	if (valid_bit != cq->cq_uk.polarity)
+		return I40IW_ERR_QUEUE_EMPTY;
+
+	i40iw_debug_buf(cq->dev, I40IW_DEBUG_PUDA, "PUDA CQE", cqe, 32);
+	error = (bool)RS_64(qword3, I40IW_CQ_ERROR);
+	if (error) {
+		i40iw_debug(cq->dev, I40IW_DEBUG_PUDA, "%s receive error\n", __func__);
+		major_err = (u32)(RS_64(qword3, I40IW_CQ_MAJERR));
+		minor_err = (u32)(RS_64(qword3, I40IW_CQ_MINERR));
+		info->compl_error = major_err << 16 | minor_err;
+		return I40IW_ERR_CQ_COMPL_ERROR;
+	}
+
+	get_64bit_val(cqe, 0, &qword0);
+	get_64bit_val(cqe, 16, &qword2);
+
+	info->q_type = (u8)RS_64(qword3, I40IW_CQ_SQ);
+	info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID);
+
+	get_64bit_val(cqe, 8, &comp_ctx);
+	info->qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
+	info->wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
+
+	if (info->q_type == I40IW_CQE_QTYPE_RQ) {
+		info->vlan_valid = (bool)RS_64(qword3, I40IW_VLAN_TAG_VALID);
+		info->l4proto = (u8)RS_64(qword2, I40IW_UDA_L4PROTO);
+		info->l3proto = (u8)RS_64(qword2, I40IW_UDA_L3PROTO);
+		info->payload_len = (u16)RS_64(qword0, I40IW_UDA_PAYLOADLEN);
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_puda_poll_completion - processes completion for cq
+ * @dev: iwarp device
+ * @cq: cq getting interrupt
+ * @compl_err: return any completion err
+ */
+enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
+						  struct i40iw_sc_cq *cq, u32 *compl_err)
+{
+	struct i40iw_qp_uk *qp;
+	struct i40iw_cq_uk *cq_uk = &cq->cq_uk;
+	struct i40iw_puda_completion_info info;
+	enum i40iw_status_code ret = 0;
+	struct i40iw_puda_buf *buf;
+	struct i40iw_puda_rsrc *rsrc;
+	void *sqwrid;
+	u8 cq_type = cq->cq_type;
+	unsigned long	flags;
+
+	if ((cq_type == I40IW_CQ_TYPE_ILQ) || (cq_type == I40IW_CQ_TYPE_IEQ)) {
+		rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? dev->ilq : dev->ieq;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s qp_type error\n", __func__);
+		return I40IW_ERR_BAD_PTR;
+	}
+	memset(&info, 0, sizeof(info));
+	ret = i40iw_puda_poll_info(cq, &info);
+	*compl_err = info.compl_error;
+	if (ret == I40IW_ERR_QUEUE_EMPTY)
+		return ret;
+	if (ret)
+		goto done;
+
+	qp = info.qp;
+	if (!qp || !rsrc) {
+		ret = I40IW_ERR_BAD_PTR;
+		goto done;
+	}
+
+	if (qp->qp_id != rsrc->qp_id) {
+		ret = I40IW_ERR_BAD_PTR;
+		goto done;
+	}
+
+	if (info.q_type == I40IW_CQE_QTYPE_RQ) {
+		buf = (struct i40iw_puda_buf *)(uintptr_t)qp->rq_wrid_array[info.wqe_idx];
+		/* Get all the tcpip information in the buf header */
+		ret = i40iw_puda_get_tcpip_info(&info, buf);
+		if (ret) {
+			rsrc->stats_rcvd_pkt_err++;
+			if (cq_type == I40IW_CQ_TYPE_ILQ) {
+				i40iw_ilq_putback_rcvbuf(&rsrc->qp,
+							 info.wqe_idx);
+			} else {
+				i40iw_puda_ret_bufpool(rsrc, buf);
+				i40iw_puda_replenish_rq(rsrc, false);
+			}
+			goto done;
+		}
+
+		rsrc->stats_pkt_rcvd++;
+		rsrc->compl_rxwqe_idx = info.wqe_idx;
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s RQ completion\n", __func__);
+		rsrc->receive(rsrc->dev, buf);
+		if (cq_type == I40IW_CQ_TYPE_ILQ)
+			i40iw_ilq_putback_rcvbuf(&rsrc->qp, info.wqe_idx);
+		else
+			i40iw_puda_replenish_rq(rsrc, false);
+
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s SQ completion\n", __func__);
+		sqwrid = (void *)(uintptr_t)qp->sq_wrtrk_array[info.wqe_idx].wrid;
+		I40IW_RING_SET_TAIL(qp->sq_ring, info.wqe_idx);
+		rsrc->xmit_complete(rsrc->dev, sqwrid);
+		spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+		rsrc->tx_wqe_avail_cnt++;
+		spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+		if (!list_empty(&dev->ilq->txpend))
+			i40iw_puda_send_buf(dev->ilq, NULL);
+	}
+
+done:
+	I40IW_RING_MOVE_HEAD(cq_uk->cq_ring, ret);
+	if (I40IW_RING_GETCURRENT_HEAD(cq_uk->cq_ring) == 0)
+		cq_uk->polarity = !cq_uk->polarity;
+	/* update cq tail in cq shadow memory also */
+	I40IW_RING_MOVE_TAIL(cq_uk->cq_ring);
+	set_64bit_val(cq_uk->shadow_area, 0,
+		      I40IW_RING_GETCURRENT_HEAD(cq_uk->cq_ring));
+	return 0;
+}
+
+/**
+ * i40iw_puda_send - complete send wqe for transmit
+ * @qp: puda qp for send
+ * @info: buffer information for transmit
+ */
+enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
+				       struct i40iw_puda_send_info *info)
+{
+	u64 *wqe;
+	u32 iplen, l4len;
+	u64 header[2];
+	u32 wqe_idx;
+	u8 iipt;
+
+	/* number of 32 bits DWORDS in header */
+	l4len = info->tcplen >> 2;
+	if (info->ipv4) {
+		iipt = 3;
+		iplen = 5;
+	} else {
+		iipt = 1;
+		iplen = 10;
+	}
+
+	wqe = i40iw_puda_get_next_send_wqe(&qp->qp_uk, &wqe_idx);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+	qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid = (uintptr_t)info->scratch;
+	/* Third line of WQE descriptor */
+	/* maclen is in words */
+	header[0] = LS_64((info->maclen >> 1), I40IW_UDA_QPSQ_MACLEN) |
+		    LS_64(iplen, I40IW_UDA_QPSQ_IPLEN) | LS_64(1, I40IW_UDA_QPSQ_L4T) |
+		    LS_64(iipt, I40IW_UDA_QPSQ_IIPT) |
+		    LS_64(l4len, I40IW_UDA_QPSQ_L4LEN);
+	/* Forth line of WQE descriptor */
+	header[1] = LS_64(I40IW_OP_TYPE_SEND, I40IW_UDA_QPSQ_OPCODE) |
+		    LS_64(1, I40IW_UDA_QPSQ_SIGCOMPL) |
+		    LS_64(info->doloopback, I40IW_UDA_QPSQ_DOLOOPBACK) |
+		    LS_64(qp->qp_uk.swqe_polarity, I40IW_UDA_QPSQ_VALID);
+
+	set_64bit_val(wqe, 0, info->paddr);
+	set_64bit_val(wqe, 8, LS_64(info->len, I40IWQPSQ_FRAG_LEN));
+	set_64bit_val(wqe, 16, header[0]);
+	set_64bit_val(wqe, 24, header[1]);
+
+	i40iw_debug_buf(qp->dev, I40IW_DEBUG_PUDA, "PUDA SEND WQE", wqe, 32);
+	i40iw_qp_post_wr(&qp->qp_uk);
+	return 0;
+}
+
+/**
+ * i40iw_puda_send_buf - transmit puda buffer
+ * @rsrc: resource to use for buffer
+ * @buf: puda buffer to transmit
+ */
+void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc, struct i40iw_puda_buf *buf)
+{
+	struct i40iw_puda_send_info info;
+	enum i40iw_status_code ret = 0;
+	unsigned long	flags;
+
+	spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+	/* if no wqe available or not from a completion and we have
+	 * pending buffers, we must queue new buffer
+	 */
+	if (!rsrc->tx_wqe_avail_cnt || (buf && !list_empty(&rsrc->txpend))) {
+		list_add_tail(&buf->list, &rsrc->txpend);
+		spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+		rsrc->stats_sent_pkt_q++;
+		if (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ)
+			i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+				    "%s: adding to txpend\n", __func__);
+		return;
+	}
+	rsrc->tx_wqe_avail_cnt--;
+	/* if we are coming from a completion and have pending buffers
+	 * then Get one from pending list
+	 */
+	if (!buf) {
+		buf = i40iw_puda_get_listbuf(&rsrc->txpend);
+		if (!buf)
+			goto done;
+	}
+
+	info.scratch = (void *)buf;
+	info.paddr = buf->mem.pa;
+	info.len = buf->totallen;
+	info.tcplen = buf->tcphlen;
+	info.maclen = buf->maclen;
+	info.ipv4 = buf->ipv4;
+	info.doloopback = (rsrc->type == I40IW_PUDA_RSRC_TYPE_IEQ);
+
+	ret = i40iw_puda_send(&rsrc->qp, &info);
+	if (ret) {
+		rsrc->tx_wqe_avail_cnt++;
+		rsrc->stats_sent_pkt_q++;
+		list_add(&buf->list, &rsrc->txpend);
+		if (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ)
+			i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+				    "%s: adding to puda_send\n", __func__);
+	} else {
+		rsrc->stats_pkt_sent++;
+	}
+done:
+	spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+}
+
+/**
+ * i40iw_puda_qp_setctx - during init, set qp's context
+ * @rsrc: qp's resource
+ */
+static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc)
+{
+	struct i40iw_sc_qp *qp = &rsrc->qp;
+	u64 *qp_ctx = qp->hw_host_ctx;
+
+	set_64bit_val(qp_ctx, 8, qp->sq_pa);
+	set_64bit_val(qp_ctx, 16, qp->rq_pa);
+
+	set_64bit_val(qp_ctx, 24,
+		      LS_64(qp->hw_rq_size, I40IWQPC_RQSIZE) |
+		      LS_64(qp->hw_sq_size, I40IWQPC_SQSIZE));
+
+	set_64bit_val(qp_ctx, 48, LS_64(1514, I40IWQPC_SNDMSS));
+	set_64bit_val(qp_ctx, 56, 0);
+	set_64bit_val(qp_ctx, 64, 1);
+
+	set_64bit_val(qp_ctx, 136,
+		      LS_64(rsrc->cq_id, I40IWQPC_TXCQNUM) |
+		      LS_64(rsrc->cq_id, I40IWQPC_RXCQNUM));
+
+	set_64bit_val(qp_ctx, 160, LS_64(1, I40IWQPC_PRIVEN));
+
+	set_64bit_val(qp_ctx, 168,
+		      LS_64((uintptr_t)qp, I40IWQPC_QPCOMPCTX));
+
+	set_64bit_val(qp_ctx, 176,
+		      LS_64(qp->sq_tph_val, I40IWQPC_SQTPHVAL) |
+		      LS_64(qp->rq_tph_val, I40IWQPC_RQTPHVAL) |
+		      LS_64(qp->qs_handle, I40IWQPC_QSHANDLE));
+
+	i40iw_debug_buf(rsrc->dev, I40IW_DEBUG_PUDA, "PUDA QP CONTEXT",
+			qp_ctx, I40IW_QP_CTX_SIZE);
+}
+
+/**
+ * i40iw_puda_qp_wqe - setup wqe for qp create
+ * @rsrc: resource for qp
+ */
+static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_puda_rsrc *rsrc)
+{
+	struct i40iw_sc_qp *qp = &rsrc->qp;
+	struct i40iw_sc_dev *dev = rsrc->dev;
+	struct i40iw_sc_cqp *cqp;
+	u64 *wqe;
+	u64 header;
+	struct i40iw_ccq_cqe_info compl_info;
+	enum i40iw_status_code status = 0;
+
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+	set_64bit_val(wqe, 40, qp->shadow_area_pa);
+	header = qp->qp_uk.qp_id |
+		 LS_64(I40IW_CQP_OP_CREATE_QP, I40IW_CQPSQ_OPCODE) |
+		 LS_64(I40IW_QP_TYPE_UDA, I40IW_CQPSQ_QP_QPTYPE) |
+		 LS_64(1, I40IW_CQPSQ_QP_CQNUMVALID) |
+		 LS_64(2, I40IW_CQPSQ_QP_NEXTIWSTATE) |
+		 LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+
+	set_64bit_val(wqe, 24, header);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_PUDA, "PUDA CQE", wqe, 32);
+	i40iw_sc_cqp_post_sq(cqp);
+	status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+						    I40IW_CQP_OP_CREATE_QP,
+						    &compl_info);
+	return status;
+}
+
+/**
+ * i40iw_puda_qp_create - create qp for resource
+ * @rsrc: resource to use for buffer
+ */
+static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc)
+{
+	struct i40iw_sc_qp *qp = &rsrc->qp;
+	struct i40iw_qp_uk *ukqp = &qp->qp_uk;
+	enum i40iw_status_code ret = 0;
+	u32 sq_size, rq_size, t_size;
+	struct i40iw_dma_mem *mem;
+
+	sq_size = rsrc->sq_size * I40IW_QP_WQE_MIN_SIZE;
+	rq_size = rsrc->rq_size * I40IW_QP_WQE_MIN_SIZE;
+	t_size = (sq_size + rq_size + (I40IW_SHADOW_AREA_SIZE << 3) +
+		  I40IW_QP_CTX_SIZE);
+	/* Get page aligned memory */
+	ret =
+	    i40iw_allocate_dma_mem(rsrc->dev->hw, &rsrc->qpmem, t_size,
+				   I40IW_HW_PAGE_SIZE);
+	if (ret) {
+		i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, "%s: error dma mem\n", __func__);
+		return ret;
+	}
+
+	mem = &rsrc->qpmem;
+	memset(mem->va, 0, t_size);
+	qp->hw_sq_size = i40iw_get_encoded_wqe_size(rsrc->sq_size, false);
+	qp->hw_rq_size = i40iw_get_encoded_wqe_size(rsrc->rq_size, false);
+	qp->pd = &rsrc->sc_pd;
+	qp->qp_type = I40IW_QP_TYPE_UDA;
+	qp->dev = rsrc->dev;
+	qp->back_qp = (void *)rsrc;
+	qp->sq_pa = mem->pa;
+	qp->rq_pa = qp->sq_pa + sq_size;
+	ukqp->sq_base = mem->va;
+	ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size];
+	ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem;
+	qp->shadow_area_pa = qp->rq_pa + rq_size;
+	qp->hw_host_ctx = ukqp->shadow_area + I40IW_SHADOW_AREA_SIZE;
+	qp->hw_host_ctx_pa =
+		qp->shadow_area_pa + (I40IW_SHADOW_AREA_SIZE << 3);
+	ukqp->qp_id = rsrc->qp_id;
+	ukqp->sq_wrtrk_array = rsrc->sq_wrtrk_array;
+	ukqp->rq_wrid_array = rsrc->rq_wrid_array;
+
+	ukqp->qp_id = rsrc->qp_id;
+	ukqp->sq_size = rsrc->sq_size;
+	ukqp->rq_size = rsrc->rq_size;
+
+	I40IW_RING_INIT(ukqp->sq_ring, ukqp->sq_size);
+	I40IW_RING_INIT(ukqp->initial_ring, ukqp->sq_size);
+	I40IW_RING_INIT(ukqp->rq_ring, ukqp->rq_size);
+
+	if (qp->pd->dev->is_pf)
+		ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
+						    I40E_PFPE_WQEALLOC);
+	else
+		ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
+						    I40E_VFPE_WQEALLOC1);
+
+	qp->qs_handle = qp->dev->qs_handle;
+	i40iw_puda_qp_setctx(rsrc);
+	ret = i40iw_puda_qp_wqe(rsrc);
+	if (ret)
+		i40iw_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem);
+	return ret;
+}
+
+/**
+ * i40iw_puda_cq_create - create cq for resource
+ * @rsrc: resource for which cq to create
+ */
+static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc)
+{
+	struct i40iw_sc_dev *dev = rsrc->dev;
+	struct i40iw_sc_cq *cq = &rsrc->cq;
+	u64 *wqe;
+	struct i40iw_sc_cqp *cqp;
+	u64 header;
+	enum i40iw_status_code ret = 0;
+	u32 tsize, cqsize;
+	u32 shadow_read_threshold = 128;
+	struct i40iw_dma_mem *mem;
+	struct i40iw_ccq_cqe_info compl_info;
+	struct i40iw_cq_init_info info;
+	struct i40iw_cq_uk_init_info *init_info = &info.cq_uk_init_info;
+
+	cq->back_cq = (void *)rsrc;
+	cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe));
+	tsize = cqsize + sizeof(struct i40iw_cq_shadow_area);
+	ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize,
+				     I40IW_CQ0_ALIGNMENT_MASK);
+	if (ret)
+		return ret;
+
+	mem = &rsrc->cqmem;
+	memset(&info, 0, sizeof(info));
+	info.dev = dev;
+	info.type = (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ) ?
+			 I40IW_CQ_TYPE_ILQ : I40IW_CQ_TYPE_IEQ;
+	info.shadow_read_threshold = rsrc->cq_size >> 2;
+	info.ceq_id_valid = true;
+	info.cq_base_pa = mem->pa;
+	info.shadow_area_pa = mem->pa + cqsize;
+	init_info->cq_base = mem->va;
+	init_info->shadow_area = (u64 *)((u8 *)mem->va + cqsize);
+	init_info->cq_size = rsrc->cq_size;
+	init_info->cq_id = rsrc->cq_id;
+	ret = dev->iw_priv_cq_ops->cq_init(cq, &info);
+	if (ret)
+		goto error;
+	cqp = dev->cqp;
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
+	if (!wqe) {
+		ret = I40IW_ERR_RING_FULL;
+		goto error;
+	}
+
+	set_64bit_val(wqe, 0, rsrc->cq_size);
+	set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+	set_64bit_val(wqe, 16, LS_64(shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+	set_64bit_val(wqe, 32, cq->cq_pa);
+
+	set_64bit_val(wqe, 40, cq->shadow_area_pa);
+
+	header = rsrc->cq_id |
+	    LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+	    LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+	    LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+	    LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) |
+	    LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	set_64bit_val(wqe, 24, header);
+
+	i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE",
+			wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	i40iw_sc_cqp_post_sq(dev->cqp);
+	ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+						 I40IW_CQP_OP_CREATE_CQ,
+						 &compl_info);
+
+error:
+	if (ret)
+		i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
+	return ret;
+}
+
+/**
+ * i40iw_puda_dele_resources - delete all resources during close
+ * @dev: iwarp device
+ * @type: type of resource to dele
+ * @reset: true if reset chip
+ */
+void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+			       enum puda_resource_type type,
+			       bool reset)
+{
+	struct i40iw_ccq_cqe_info compl_info;
+	struct i40iw_puda_rsrc *rsrc;
+	struct i40iw_puda_buf *buf = NULL;
+	struct i40iw_puda_buf *nextbuf = NULL;
+	struct i40iw_virt_mem *vmem;
+	enum i40iw_status_code ret;
+
+	switch (type) {
+	case I40IW_PUDA_RSRC_TYPE_ILQ:
+		rsrc = dev->ilq;
+		vmem = &dev->ilq_mem;
+		break;
+	case I40IW_PUDA_RSRC_TYPE_IEQ:
+		rsrc = dev->ieq;
+		vmem = &dev->ieq_mem;
+		break;
+	default:
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s: error resource type = 0x%x\n",
+			    __func__, type);
+		return;
+	}
+
+	switch (rsrc->completion) {
+	case PUDA_HASH_CRC_COMPLETE:
+		i40iw_free_hash_desc(rsrc->hash_desc);
+	case PUDA_QP_CREATED:
+		do {
+			if (reset)
+				break;
+			ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp,
+							      0, false, true, true);
+			if (ret)
+				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+					    "%s error ieq qp destroy\n",
+					    __func__);
+
+			ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+								 I40IW_CQP_OP_DESTROY_QP,
+								 &compl_info);
+			if (ret)
+				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+					    "%s error ieq qp destroy done\n",
+					    __func__);
+		} while (0);
+
+		i40iw_free_dma_mem(dev->hw, &rsrc->qpmem);
+		/* fallthrough */
+	case PUDA_CQ_CREATED:
+		do {
+			if (reset)
+				break;
+			ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true);
+			if (ret)
+				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+					    "%s error ieq cq destroy\n",
+					    __func__);
+
+			ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+								 I40IW_CQP_OP_DESTROY_CQ,
+								 &compl_info);
+			if (ret)
+				i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
+					    "%s error ieq qp destroy done\n",
+					    __func__);
+		} while (0);
+
+		i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
+		break;
+	default:
+		i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, "%s error no resources\n", __func__);
+		break;
+	}
+	/* Free all allocated puda buffers for both tx and rx */
+	buf = rsrc->alloclist;
+	while (buf) {
+		nextbuf = buf->next;
+		i40iw_puda_dele_buf(dev, buf);
+		buf = nextbuf;
+		rsrc->alloc_buf_count--;
+	}
+	i40iw_free_virt_mem(dev->hw, vmem);
+}
+
+/**
+ * i40iw_puda_allocbufs - allocate buffers for resource
+ * @rsrc: resource for buffer allocation
+ * @count: number of buffers to create
+ */
+static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc,
+						   u32 count)
+{
+	u32 i;
+	struct i40iw_puda_buf *buf;
+	struct i40iw_puda_buf *nextbuf;
+
+	for (i = 0; i < count; i++) {
+		buf = i40iw_puda_alloc_buf(rsrc->dev, rsrc->buf_size);
+		if (!buf) {
+			rsrc->stats_buf_alloc_fail++;
+			return I40IW_ERR_NO_MEMORY;
+		}
+		i40iw_puda_ret_bufpool(rsrc, buf);
+		rsrc->alloc_buf_count++;
+		if (!rsrc->alloclist) {
+			rsrc->alloclist = buf;
+		} else {
+			nextbuf = rsrc->alloclist;
+			rsrc->alloclist = buf;
+			buf->next = nextbuf;
+		}
+	}
+	rsrc->avail_buf_count = rsrc->alloc_buf_count;
+	return 0;
+}
+
+/**
+ * i40iw_puda_create_rsrc - create resouce (ilq or ieq)
+ * @dev: iwarp device
+ * @info: resource information
+ */
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+					      struct i40iw_puda_rsrc_info *info)
+{
+	enum i40iw_status_code ret = 0;
+	struct i40iw_puda_rsrc *rsrc;
+	u32 pudasize;
+	u32 sqwridsize, rqwridsize;
+	struct i40iw_virt_mem *vmem;
+
+	info->count = 1;
+	pudasize = sizeof(struct i40iw_puda_rsrc);
+	sqwridsize = info->sq_size * sizeof(struct i40iw_sq_uk_wr_trk_info);
+	rqwridsize = info->rq_size * 8;
+	switch (info->type) {
+	case I40IW_PUDA_RSRC_TYPE_ILQ:
+		vmem = &dev->ilq_mem;
+		break;
+	case I40IW_PUDA_RSRC_TYPE_IEQ:
+		vmem = &dev->ieq_mem;
+		break;
+	default:
+		return I40IW_NOT_SUPPORTED;
+	}
+	ret =
+	    i40iw_allocate_virt_mem(dev->hw, vmem,
+				    pudasize + sqwridsize + rqwridsize);
+	if (ret)
+		return ret;
+	rsrc = (struct i40iw_puda_rsrc *)vmem->va;
+	spin_lock_init(&rsrc->bufpool_lock);
+	if (info->type == I40IW_PUDA_RSRC_TYPE_ILQ) {
+		dev->ilq = (struct i40iw_puda_rsrc *)vmem->va;
+		dev->ilq_count = info->count;
+		rsrc->receive = info->receive;
+		rsrc->xmit_complete = info->xmit_complete;
+	} else {
+		vmem = &dev->ieq_mem;
+		dev->ieq_count = info->count;
+		dev->ieq = (struct i40iw_puda_rsrc *)vmem->va;
+		rsrc->receive = i40iw_ieq_receive;
+		rsrc->xmit_complete = i40iw_ieq_tx_compl;
+	}
+
+	rsrc->type = info->type;
+	rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize);
+	rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize);
+	rsrc->mss = info->mss;
+	/* Initialize all ieq lists */
+	INIT_LIST_HEAD(&rsrc->bufpool);
+	INIT_LIST_HEAD(&rsrc->txpend);
+
+	rsrc->tx_wqe_avail_cnt = info->sq_size - 1;
+	dev->iw_pd_ops->pd_init(dev, &rsrc->sc_pd, info->pd_id);
+	rsrc->qp_id = info->qp_id;
+	rsrc->cq_id = info->cq_id;
+	rsrc->sq_size = info->sq_size;
+	rsrc->rq_size = info->rq_size;
+	rsrc->cq_size = info->rq_size + info->sq_size;
+	rsrc->buf_size = info->buf_size;
+	rsrc->dev = dev;
+
+	ret = i40iw_puda_cq_create(rsrc);
+	if (!ret) {
+		rsrc->completion = PUDA_CQ_CREATED;
+		ret = i40iw_puda_qp_create(rsrc);
+	}
+	if (ret) {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error qp_create\n", __func__);
+		goto error;
+	}
+	rsrc->completion = PUDA_QP_CREATED;
+
+	ret = i40iw_puda_allocbufs(rsrc, info->tx_buf_cnt + info->rq_size);
+	if (ret) {
+		i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error allloc_buf\n", __func__);
+		goto error;
+	}
+
+	rsrc->rxq_invalid_cnt = info->rq_size;
+	ret = i40iw_puda_replenish_rq(rsrc, true);
+	if (ret)
+		goto error;
+
+	if (info->type == I40IW_PUDA_RSRC_TYPE_IEQ) {
+		if (!i40iw_init_hash_desc(&rsrc->hash_desc)) {
+			rsrc->check_crc = true;
+			rsrc->completion = PUDA_HASH_CRC_COMPLETE;
+			ret = 0;
+		}
+	}
+
+	dev->ccq_ops->ccq_arm(&rsrc->cq);
+	return ret;
+ error:
+	i40iw_puda_dele_resources(dev, info->type, false);
+
+	return ret;
+}
+
+/**
+ * i40iw_ilq_putback_rcvbuf - ilq buffer to put back on rq
+ * @qp: ilq's qp resource
+ * @wqe_idx:  wqe index of completed rcvbuf
+ */
+static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx)
+{
+	u64 *wqe;
+	u64 offset24;
+
+	wqe = qp->qp_uk.rq_base[wqe_idx].elem;
+	get_64bit_val(wqe, 24, &offset24);
+	offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID);
+	set_64bit_val(wqe, 24, offset24);
+}
+
+/**
+ * i40iw_ieq_get_fpdu - given length return fpdu length
+ * @length: length if fpdu
+ */
+static u16 i40iw_ieq_get_fpdu_length(u16 length)
+{
+	u16 fpdu_len;
+
+	fpdu_len = length + I40IW_IEQ_MPA_FRAMING;
+	fpdu_len = (fpdu_len + 3) & 0xfffffffc;
+	return fpdu_len;
+}
+
+/**
+ * i40iw_ieq_copy_to_txbuf - copydata from rcv buf to tx buf
+ * @buf: rcv buffer with partial
+ * @txbuf: tx buffer for sendign back
+ * @buf_offset: rcv buffer offset to copy from
+ * @txbuf_offset: at offset in tx buf to copy
+ * @length: length of data to copy
+ */
+static void i40iw_ieq_copy_to_txbuf(struct i40iw_puda_buf *buf,
+				    struct i40iw_puda_buf *txbuf,
+				    u16 buf_offset, u32 txbuf_offset,
+				    u32 length)
+{
+	void *mem1 = (u8 *)buf->mem.va + buf_offset;
+	void *mem2 = (u8 *)txbuf->mem.va + txbuf_offset;
+
+	memcpy(mem2, mem1, length);
+}
+
+/**
+ * i40iw_ieq_setup_tx_buf - setup tx buffer for partial handling
+ * @buf: reeive buffer with partial
+ * @txbuf: buffer to prepare
+ */
+static void i40iw_ieq_setup_tx_buf(struct i40iw_puda_buf *buf,
+				   struct i40iw_puda_buf *txbuf)
+{
+	txbuf->maclen = buf->maclen;
+	txbuf->tcphlen = buf->tcphlen;
+	txbuf->ipv4 = buf->ipv4;
+	txbuf->hdrlen = buf->hdrlen;
+	i40iw_ieq_copy_to_txbuf(buf, txbuf, 0, 0, buf->hdrlen);
+}
+
+/**
+ * i40iw_ieq_check_first_buf - check if rcv buffer's seq is in range
+ * @buf: receive exception buffer
+ * @fps: first partial sequence number
+ */
+static void i40iw_ieq_check_first_buf(struct i40iw_puda_buf *buf, u32 fps)
+{
+	u32 offset;
+
+	if (buf->seqnum < fps) {
+		offset = fps - buf->seqnum;
+		if (offset > buf->datalen)
+			return;
+		buf->data += offset;
+		buf->datalen -= (u16)offset;
+		buf->seqnum = fps;
+	}
+}
+
+/**
+ * i40iw_ieq_compl_pfpdu - write txbuf with full fpdu
+ * @ieq: ieq resource
+ * @rxlist: ieq's received buffer list
+ * @pbufl: temporary list for buffers for fpddu
+ * @txbuf: tx buffer for fpdu
+ * @fpdu_len: total length of fpdu
+ */
+static void  i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
+				   struct list_head *rxlist,
+				   struct list_head *pbufl,
+				   struct i40iw_puda_buf *txbuf,
+				   u16 fpdu_len)
+{
+	struct i40iw_puda_buf *buf;
+	u32 nextseqnum;
+	u16 txoffset, bufoffset;
+
+	buf = i40iw_puda_get_listbuf(pbufl);
+	nextseqnum = buf->seqnum + fpdu_len;
+	txbuf->totallen = buf->hdrlen + fpdu_len;
+	txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen;
+	i40iw_ieq_setup_tx_buf(buf, txbuf);
+
+	txoffset = buf->hdrlen;
+	bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
+
+	do {
+		if (buf->datalen >= fpdu_len) {
+			/* copied full fpdu */
+			i40iw_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, fpdu_len);
+			buf->datalen -= fpdu_len;
+			buf->data += fpdu_len;
+			buf->seqnum = nextseqnum;
+			break;
+		}
+		/* copy partial fpdu */
+		i40iw_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, buf->datalen);
+		txoffset += buf->datalen;
+		fpdu_len -= buf->datalen;
+		i40iw_puda_ret_bufpool(ieq, buf);
+		buf = i40iw_puda_get_listbuf(pbufl);
+		bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
+	} while (1);
+
+	/* last buffer on the list*/
+	if (buf->datalen)
+		list_add(&buf->list, rxlist);
+	else
+		i40iw_puda_ret_bufpool(ieq, buf);
+}
+
+/**
+ * i40iw_ieq_create_pbufl - create buffer list for single fpdu
+ * @rxlist: resource list for receive ieq buffes
+ * @pbufl: temp. list for buffers for fpddu
+ * @buf: first receive buffer
+ * @fpdu_len: total length of fpdu
+ */
+static enum i40iw_status_code i40iw_ieq_create_pbufl(
+						     struct i40iw_pfpdu *pfpdu,
+						     struct list_head *rxlist,
+						     struct list_head *pbufl,
+						     struct i40iw_puda_buf *buf,
+						     u16 fpdu_len)
+{
+	enum i40iw_status_code status = 0;
+	struct i40iw_puda_buf *nextbuf;
+	u32	nextseqnum;
+	u16 plen = fpdu_len - buf->datalen;
+	bool done = false;
+
+	nextseqnum = buf->seqnum + buf->datalen;
+	do {
+		nextbuf = i40iw_puda_get_listbuf(rxlist);
+		if (!nextbuf) {
+			status = I40IW_ERR_list_empty;
+			break;
+		}
+		list_add_tail(&nextbuf->list, pbufl);
+		if (nextbuf->seqnum != nextseqnum) {
+			pfpdu->bad_seq_num++;
+			status = I40IW_ERR_SEQ_NUM;
+			break;
+		}
+		if (nextbuf->datalen >= plen) {
+			done = true;
+		} else {
+			plen -= nextbuf->datalen;
+			nextseqnum = nextbuf->seqnum + nextbuf->datalen;
+		}
+
+	} while (!done);
+
+	return status;
+}
+
+/**
+ * i40iw_ieq_handle_partial - process partial fpdu buffer
+ * @ieq: ieq resource
+ * @pfpdu: partial management per user qp
+ * @buf: receive buffer
+ * @fpdu_len: fpdu len in the buffer
+ */
+static enum i40iw_status_code i40iw_ieq_handle_partial(struct i40iw_puda_rsrc *ieq,
+						       struct i40iw_pfpdu *pfpdu,
+						       struct i40iw_puda_buf *buf,
+						       u16 fpdu_len)
+{
+	enum i40iw_status_code status = 0;
+	u8 *crcptr;
+	u32 mpacrc;
+	u32 seqnum = buf->seqnum;
+	struct list_head pbufl;	/* partial buffer list */
+	struct i40iw_puda_buf *txbuf = NULL;
+	struct list_head *rxlist = &pfpdu->rxlist;
+
+	INIT_LIST_HEAD(&pbufl);
+	list_add(&buf->list, &pbufl);
+
+	status = i40iw_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len);
+	if (!status)
+		goto error;
+
+	txbuf = i40iw_puda_get_bufpool(ieq);
+	if (!txbuf) {
+		pfpdu->no_tx_bufs++;
+		status = I40IW_ERR_NO_TXBUFS;
+		goto error;
+	}
+
+	i40iw_ieq_compl_pfpdu(ieq, rxlist, &pbufl, txbuf, fpdu_len);
+	i40iw_ieq_update_tcpip_info(txbuf, fpdu_len, seqnum);
+	crcptr = txbuf->data + fpdu_len - 4;
+	mpacrc = *(u32 *)crcptr;
+	if (ieq->check_crc) {
+		status = i40iw_ieq_check_mpacrc(ieq->hash_desc, txbuf->data,
+						(fpdu_len - 4), mpacrc);
+		if (status) {
+			i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+				    "%s: error bad crc\n", __func__);
+			goto error;
+		}
+	}
+
+	i40iw_debug_buf(ieq->dev, I40IW_DEBUG_IEQ, "IEQ TX BUFFER",
+			txbuf->mem.va, txbuf->totallen);
+	i40iw_puda_send_buf(ieq, txbuf);
+	pfpdu->rcv_nxt = seqnum + fpdu_len;
+	return status;
+ error:
+	while (!list_empty(&pbufl)) {
+		buf = (struct i40iw_puda_buf *)(pbufl.prev);
+		list_del(&buf->list);
+		list_add(&buf->list, rxlist);
+	}
+	if (txbuf)
+		i40iw_puda_ret_bufpool(ieq, txbuf);
+	return status;
+}
+
+/**
+ * i40iw_ieq_process_buf - process buffer rcvd for ieq
+ * @ieq: ieq resource
+ * @pfpdu: partial management per user qp
+ * @buf: receive buffer
+ */
+static enum i40iw_status_code i40iw_ieq_process_buf(struct i40iw_puda_rsrc *ieq,
+						    struct i40iw_pfpdu *pfpdu,
+						    struct i40iw_puda_buf *buf)
+{
+	u16 fpdu_len = 0;
+	u16 datalen = buf->datalen;
+	u8 *datap = buf->data;
+	u8 *crcptr;
+	u16 ioffset = 0;
+	u32 mpacrc;
+	u32 seqnum = buf->seqnum;
+	u16 length = 0;
+	u16 full = 0;
+	bool partial = false;
+	struct i40iw_puda_buf *txbuf;
+	struct list_head *rxlist = &pfpdu->rxlist;
+	enum i40iw_status_code ret = 0;
+	enum i40iw_status_code status = 0;
+
+	ioffset = (u16)(buf->data - (u8 *)buf->mem.va);
+	while (datalen) {
+		fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(u16 *)datap));
+		if (fpdu_len > pfpdu->max_fpdu_data) {
+			i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+				    "%s: error bad fpdu_len\n", __func__);
+			status = I40IW_ERR_MPA_CRC;
+			list_add(&buf->list, rxlist);
+			return status;
+		}
+
+		if (datalen < fpdu_len) {
+			partial = true;
+			break;
+		}
+		crcptr = datap + fpdu_len - 4;
+		mpacrc = *(u32 *)crcptr;
+		if (ieq->check_crc)
+			ret = i40iw_ieq_check_mpacrc(ieq->hash_desc,
+						     datap, fpdu_len - 4, mpacrc);
+		if (ret) {
+			status = I40IW_ERR_MPA_CRC;
+			list_add(&buf->list, rxlist);
+			return status;
+		}
+		full++;
+		pfpdu->fpdu_processed++;
+		datap += fpdu_len;
+		length += fpdu_len;
+		datalen -= fpdu_len;
+	}
+	if (full) {
+		/* copy full pdu's in the txbuf and send them out */
+		txbuf = i40iw_puda_get_bufpool(ieq);
+		if (!txbuf) {
+			pfpdu->no_tx_bufs++;
+			status = I40IW_ERR_NO_TXBUFS;
+			list_add(&buf->list, rxlist);
+			return status;
+		}
+		/* modify txbuf's buffer header */
+		i40iw_ieq_setup_tx_buf(buf, txbuf);
+		/* copy full fpdu's to new buffer */
+		i40iw_ieq_copy_to_txbuf(buf, txbuf, ioffset, buf->hdrlen,
+					length);
+		txbuf->totallen = buf->hdrlen + length;
+
+		i40iw_ieq_update_tcpip_info(txbuf, length, buf->seqnum);
+		i40iw_puda_send_buf(ieq, txbuf);
+
+		if (!datalen) {
+			pfpdu->rcv_nxt = buf->seqnum + length;
+			i40iw_puda_ret_bufpool(ieq, buf);
+			return status;
+		}
+		buf->data = datap;
+		buf->seqnum = seqnum + length;
+		buf->datalen = datalen;
+		pfpdu->rcv_nxt = buf->seqnum;
+	}
+	if (partial)
+		status = i40iw_ieq_handle_partial(ieq, pfpdu, buf, fpdu_len);
+
+	return status;
+}
+
+/**
+ * i40iw_ieq_process_fpdus - process fpdu's buffers on its list
+ * @qp: qp for which partial fpdus
+ * @ieq: ieq resource
+ */
+static void i40iw_ieq_process_fpdus(struct i40iw_sc_qp *qp,
+				    struct i40iw_puda_rsrc *ieq)
+{
+	struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
+	struct list_head *rxlist = &pfpdu->rxlist;
+	struct i40iw_puda_buf *buf;
+	enum i40iw_status_code status;
+
+	do {
+		if (list_empty(rxlist))
+			break;
+		buf = i40iw_puda_get_listbuf(rxlist);
+		if (!buf) {
+			i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+				    "%s: error no buf\n", __func__);
+			break;
+		}
+		if (buf->seqnum != pfpdu->rcv_nxt) {
+			/* This could be out of order or missing packet */
+			pfpdu->out_of_order++;
+			list_add(&buf->list, rxlist);
+			break;
+		}
+		/* keep processing buffers from the head of the list */
+		status = i40iw_ieq_process_buf(ieq, pfpdu, buf);
+		if (status == I40IW_ERR_MPA_CRC) {
+			pfpdu->mpa_crc_err = true;
+			while (!list_empty(rxlist)) {
+				buf = i40iw_puda_get_listbuf(rxlist);
+				i40iw_puda_ret_bufpool(ieq, buf);
+				pfpdu->crc_err++;
+			}
+			/* create CQP for AE */
+			i40iw_ieq_mpa_crc_ae(ieq->dev, qp);
+		}
+	} while (!status);
+}
+
+/**
+ * i40iw_ieq_handle_exception - handle qp's exception
+ * @ieq: ieq resource
+ * @qp: qp receiving excpetion
+ * @buf: receive buffer
+ */
+static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
+				       struct i40iw_sc_qp *qp,
+				       struct i40iw_puda_buf *buf)
+{
+	struct i40iw_puda_buf *tmpbuf = NULL;
+	struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
+	u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx;
+	u32 rcv_wnd = hw_host_ctx[23];
+	/* first partial seq # in q2 */
+	u32 fps = qp->q2_buf[16];
+	struct list_head *rxlist = &pfpdu->rxlist;
+	struct list_head *plist;
+
+	pfpdu->total_ieq_bufs++;
+
+	if (pfpdu->mpa_crc_err) {
+		pfpdu->crc_err++;
+		goto error;
+	}
+	if (pfpdu->mode && (fps != pfpdu->fps)) {
+		/* clean up qp as it is new partial sequence */
+		i40iw_ieq_cleanup_qp(ieq->dev, qp);
+		i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
+			    "%s: restarting new partial\n", __func__);
+		pfpdu->mode = false;
+	}
+
+	if (!pfpdu->mode) {
+		i40iw_debug_buf(ieq->dev, I40IW_DEBUG_IEQ, "Q2 BUFFER", (u64 *)qp->q2_buf, 128);
+		/* First_Partial_Sequence_Number check */
+		pfpdu->rcv_nxt = fps;
+		pfpdu->fps = fps;
+		pfpdu->mode = true;
+		pfpdu->max_fpdu_data = ieq->mss;
+		pfpdu->pmode_count++;
+		INIT_LIST_HEAD(rxlist);
+		i40iw_ieq_check_first_buf(buf, fps);
+	}
+
+	if (!(rcv_wnd >= (buf->seqnum - pfpdu->rcv_nxt))) {
+		pfpdu->bad_seq_num++;
+		goto error;
+	}
+
+	if (!list_empty(rxlist)) {
+		tmpbuf = (struct i40iw_puda_buf *)rxlist->next;
+		plist = &tmpbuf->list;
+		while ((struct list_head *)tmpbuf != rxlist) {
+			if ((int)(buf->seqnum - tmpbuf->seqnum) < 0)
+				break;
+			tmpbuf = (struct i40iw_puda_buf *)plist->next;
+		}
+		/* Insert buf before tmpbuf */
+		list_add_tail(&buf->list, &tmpbuf->list);
+	} else {
+		list_add_tail(&buf->list, rxlist);
+	}
+	i40iw_ieq_process_fpdus(qp, ieq);
+	return;
+ error:
+	i40iw_puda_ret_bufpool(ieq, buf);
+}
+
+/**
+ * i40iw_ieq_receive - received exception buffer
+ * @dev: iwarp device
+ * @buf: exception buffer received
+ */
+static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+			      struct i40iw_puda_buf *buf)
+{
+	struct i40iw_puda_rsrc *ieq = dev->ieq;
+	struct i40iw_sc_qp *qp = NULL;
+	u32 wqe_idx = ieq->compl_rxwqe_idx;
+
+	qp = i40iw_ieq_get_qp(dev, buf);
+	if (!qp) {
+		ieq->stats_bad_qp_id++;
+		i40iw_puda_ret_bufpool(ieq, buf);
+	} else {
+		i40iw_ieq_handle_exception(ieq, qp, buf);
+	}
+	/*
+	 * ieq->rx_wqe_idx is used by i40iw_puda_replenish_rq()
+	 * on which wqe_idx to start replenish rq
+	 */
+	if (!ieq->rxq_invalid_cnt)
+		ieq->rx_wqe_idx = wqe_idx;
+	ieq->rxq_invalid_cnt++;
+}
+
+/**
+ * i40iw_ieq_tx_compl - put back after sending completed exception buffer
+ * @dev: iwarp device
+ * @sqwrid: pointer to puda buffer
+ */
+static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid)
+{
+	struct i40iw_puda_rsrc *ieq = dev->ieq;
+	struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid;
+
+	i40iw_puda_ret_bufpool(ieq, buf);
+	if (!list_empty(&ieq->txpend)) {
+		buf = i40iw_puda_get_listbuf(&ieq->txpend);
+		i40iw_puda_send_buf(ieq, buf);
+	}
+}
+
+/**
+ * i40iw_ieq_cleanup_qp - qp is being destroyed
+ * @dev: iwarp device
+ * @qp: all pending fpdu buffers
+ */
+void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+	struct i40iw_puda_buf *buf;
+	struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
+	struct list_head *rxlist = &pfpdu->rxlist;
+	struct i40iw_puda_rsrc *ieq = dev->ieq;
+
+	if (!pfpdu->mode)
+		return;
+	while (!list_empty(rxlist)) {
+		buf = i40iw_puda_get_listbuf(rxlist);
+		i40iw_puda_ret_bufpool(ieq, buf);
+	}
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h
new file mode 100644
index 000000000000..52bf7826ce4e
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h
@@ -0,0 +1,183 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_PUDA_H
+#define I40IW_PUDA_H
+
+#define I40IW_IEQ_MPA_FRAMING 6
+
+struct i40iw_sc_dev;
+struct i40iw_sc_qp;
+struct i40iw_sc_cq;
+
+enum puda_resource_type {
+	I40IW_PUDA_RSRC_TYPE_ILQ = 1,
+	I40IW_PUDA_RSRC_TYPE_IEQ
+};
+
+enum puda_rsrc_complete {
+	PUDA_CQ_CREATED = 1,
+	PUDA_QP_CREATED,
+	PUDA_TX_COMPLETE,
+	PUDA_RX_COMPLETE,
+	PUDA_HASH_CRC_COMPLETE
+};
+
+struct i40iw_puda_completion_info {
+	struct i40iw_qp_uk *qp;
+	u8 q_type;
+	u8 vlan_valid;
+	u8 l3proto;
+	u8 l4proto;
+	u16 payload_len;
+	u32 compl_error;	/* No_err=0, else major and minor err code */
+	u32 qp_id;
+	u32 wqe_idx;
+};
+
+struct i40iw_puda_send_info {
+	u64 paddr;		/* Physical address */
+	u32 len;
+	u8 tcplen;
+	u8 maclen;
+	bool ipv4;
+	bool doloopback;
+	void *scratch;
+};
+
+struct i40iw_puda_buf {
+	struct list_head list;	/* MUST be first entry */
+	struct i40iw_dma_mem mem;	/* DMA memory for the buffer */
+	struct i40iw_puda_buf *next;	/* for alloclist in rsrc struct */
+	struct i40iw_virt_mem buf_mem;	/* Buffer memory for this buffer */
+	void *scratch;
+	u8 *iph;
+	u8 *tcph;
+	u8 *data;
+	u16 datalen;
+	u16 vlan_id;
+	u8 tcphlen;		/* tcp length in bytes */
+	u8 maclen;		/* mac length in bytes */
+	u32 totallen;		/* machlen+iphlen+tcphlen+datalen */
+	atomic_t refcount;
+	u8 hdrlen;
+	bool ipv4;
+	u32 seqnum;
+};
+
+struct i40iw_puda_rsrc_info {
+	enum puda_resource_type type;	/* ILQ or IEQ */
+	u32 count;
+	u16 pd_id;
+	u32 cq_id;
+	u32 qp_id;
+	u32 sq_size;
+	u32 rq_size;
+	u16 buf_size;
+	u16 mss;
+	u32 tx_buf_cnt;		/* total bufs allocated will be rq_size + tx_buf_cnt */
+	void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
+	void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+};
+
+struct i40iw_puda_rsrc {
+	struct i40iw_sc_cq cq;
+	struct i40iw_sc_qp qp;
+	struct i40iw_sc_pd sc_pd;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_dma_mem cqmem;
+	struct i40iw_dma_mem qpmem;
+	struct i40iw_virt_mem ilq_mem;
+	enum puda_rsrc_complete completion;
+	enum puda_resource_type type;
+	u16 buf_size;		/*buffer must be max datalen + tcpip hdr + mac */
+	u16 mss;
+	u32 cq_id;
+	u32 qp_id;
+	u32 sq_size;
+	u32 rq_size;
+	u32 cq_size;
+	struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
+	u64 *rq_wrid_array;
+	u32 compl_rxwqe_idx;
+	u32 rx_wqe_idx;
+	u32 rxq_invalid_cnt;
+	u32 tx_wqe_avail_cnt;
+	bool check_crc;
+	struct shash_desc *hash_desc;
+	struct list_head txpend;
+	struct list_head bufpool;	/* free buffers pool list for recv and xmit */
+	u32 alloc_buf_count;
+	u32 avail_buf_count;		/* snapshot of currently available buffers */
+	spinlock_t bufpool_lock;
+	struct i40iw_puda_buf *alloclist;
+	void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
+	void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+	/* puda stats */
+	u64 stats_buf_alloc_fail;
+	u64 stats_pkt_rcvd;
+	u64 stats_pkt_sent;
+	u64 stats_rcvd_pkt_err;
+	u64 stats_sent_pkt_q;
+	u64 stats_bad_qp_id;
+};
+
+struct i40iw_puda_buf *i40iw_puda_get_bufpool(struct i40iw_puda_rsrc *rsrc);
+void i40iw_puda_ret_bufpool(struct i40iw_puda_rsrc *rsrc,
+			    struct i40iw_puda_buf *buf);
+void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc,
+			 struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
+				       struct i40iw_puda_send_info *info);
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+					      struct i40iw_puda_rsrc_info *info);
+void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+			       enum puda_resource_type type,
+			       bool reset);
+enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
+						  struct i40iw_sc_cq *cq, u32 *compl_err);
+void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+
+struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
+				     struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
+						 struct i40iw_puda_buf *buf);
+enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc,
+					      void *addr, u32 length, u32 value);
+enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **desc);
+void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+void i40iw_free_hash_desc(struct shash_desc *desc);
+void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length,
+				 u32 seqnum);
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_register.h b/drivers/infiniband/hw/i40iw/i40iw_register.h
new file mode 100644
index 000000000000..57768184e251
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_register.h
@@ -0,0 +1,1030 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_REGISTER_H
+#define I40IW_REGISTER_H
+
+#define I40E_GLGEN_STAT               0x000B612C /* Reset: POR */
+
+#define I40E_PFHMC_PDINV               0x000C0300 /* Reset: PFR */
+#define I40E_PFHMC_PDINV_PMSDIDX_SHIFT 0
+#define I40E_PFHMC_PDINV_PMSDIDX_MASK  (0xFFF <<  I40E_PFHMC_PDINV_PMSDIDX_SHIFT)
+#define I40E_PFHMC_PDINV_PMPDIDX_SHIFT 16
+#define I40E_PFHMC_PDINV_PMPDIDX_MASK  (0x1FF <<  I40E_PFHMC_PDINV_PMPDIDX_SHIFT)
+#define I40E_PFHMC_SDCMD_PMSDWR_SHIFT  31
+#define I40E_PFHMC_SDCMD_PMSDWR_MASK   (0x1 <<  I40E_PFHMC_SDCMD_PMSDWR_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT   0
+#define I40E_PFHMC_SDDATALOW_PMSDVALID_MASK    (0x1 <<  I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT    1
+#define I40E_PFHMC_SDDATALOW_PMSDTYPE_MASK     (0x1 <<  I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT 2
+#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_MASK  (0x3FF <<  I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT)
+
+#define I40E_PFINT_DYN_CTLN(_INTPF) (0x00034800 + ((_INTPF) * 4)) /* _i=0...511 */	/* Reset: PFR */
+#define I40E_PFINT_DYN_CTLN_INTENA_SHIFT          0
+#define I40E_PFINT_DYN_CTLN_INTENA_MASK           (0x1 <<  I40E_PFINT_DYN_CTLN_INTENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT        1
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_MASK         (0x1 <<  I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT        3
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK         (0x3 <<  I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
+
+#define I40E_VFINT_DYN_CTLN1(_INTVF)               (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */
+#define I40E_GLHMC_VFPDINV(_i)               (0x000C8300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+
+#define I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT 15
+#define I40E_PFHMC_PDINV_PMSDPARTSEL_MASK  (0x1 <<  I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT)
+#define I40E_GLPCI_LBARCTRL                    0x000BE484 /* Reset: POR */
+#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT    4
+#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_MASK     (0x3 <<  I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT)
+#define I40E_GLPCI_DREVID			0x0009C480 /* Reset: PCIR */
+#define I40E_GLPCI_DREVID_DEFAULT_REVID_SHIFT 0
+#define I40E_GLPCI_DREVID_DEFAULT_REVID_MASK 0xFF
+
+#define I40E_PFPE_AEQALLOC               0x00131180 /* Reset: PFR */
+#define I40E_PFPE_AEQALLOC_AECOUNT_SHIFT 0
+#define I40E_PFPE_AEQALLOC_AECOUNT_MASK  (0xFFFFFFFF <<  I40E_PFPE_AEQALLOC_AECOUNT_SHIFT)
+#define I40E_PFPE_CCQPHIGH                  0x00008200 /* Reset: PFR */
+#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
+#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_MASK  (0xFFFFFFFF <<  I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
+#define I40E_PFPE_CCQPLOW                 0x00008180 /* Reset: PFR */
+#define I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT 0
+#define I40E_PFPE_CCQPLOW_PECCQPLOW_MASK  (0xFFFFFFFF <<  I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT)
+#define I40E_PFPE_CCQPSTATUS                   0x00008100 /* Reset: PFR */
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT   0
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_MASK    (0x1 <<  I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
+#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_MASK  (0x7 <<  I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
+#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_MASK  (0x3F <<  I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT    31
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_MASK     (0x1 <<  I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
+#define I40E_PFPE_CQACK              0x00131100 /* Reset: PFR */
+#define I40E_PFPE_CQACK_PECQID_SHIFT 0
+#define I40E_PFPE_CQACK_PECQID_MASK  (0x1FFFF <<  I40E_PFPE_CQACK_PECQID_SHIFT)
+#define I40E_PFPE_CQARM              0x00131080 /* Reset: PFR */
+#define I40E_PFPE_CQARM_PECQID_SHIFT 0
+#define I40E_PFPE_CQARM_PECQID_MASK  (0x1FFFF <<  I40E_PFPE_CQARM_PECQID_SHIFT)
+#define I40E_PFPE_CQPDB              0x00008000 /* Reset: PFR */
+#define I40E_PFPE_CQPDB_WQHEAD_SHIFT 0
+#define I40E_PFPE_CQPDB_WQHEAD_MASK  (0x7FF <<  I40E_PFPE_CQPDB_WQHEAD_SHIFT)
+#define I40E_PFPE_CQPERRCODES                      0x00008880 /* Reset: PFR */
+#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
+#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_MASK  (0xFFFF <<  I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
+#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK  (0xFFFF <<  I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
+#define I40E_PFPE_CQPTAIL                  0x00008080 /* Reset: PFR */
+#define I40E_PFPE_CQPTAIL_WQTAIL_SHIFT     0
+#define I40E_PFPE_CQPTAIL_WQTAIL_MASK      (0x7FF <<  I40E_PFPE_CQPTAIL_WQTAIL_SHIFT)
+#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
+#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_MASK  (0x1 <<  I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
+#define I40E_PFPE_FLMQ1ALLOCERR                   0x00008980 /* Reset: PFR */
+#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_MASK  (0xFFFF <<  I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_PFPE_FLMXMITALLOCERR                   0x00008900 /* Reset: PFR */
+#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_MASK  (0xFFFF <<  I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_PFPE_IPCONFIG0                        0x00008280 /* Reset: PFR */
+#define I40E_PFPE_IPCONFIG0_PEIPID_SHIFT           0
+#define I40E_PFPE_IPCONFIG0_PEIPID_MASK            (0xFFFF <<  I40E_PFPE_IPCONFIG0_PEIPID_SHIFT)
+#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
+#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_MASK  (0x1 <<  I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
+#define I40E_PFPE_MRTEIDXMASK                       0x00008600 /* Reset: PFR */
+#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK  (0x1F <<  I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
+#define I40E_PFPE_RCVUNEXPECTEDERROR                        0x00008680 /* Reset: PFR */
+#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK  (0xFFFFFF <<  I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_PFPE_TCPNOWTIMER               0x00008580 /* Reset: PFR */
+#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
+#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_MASK  (0xFFFFFFFF <<  I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
+
+#define I40E_PFPE_WQEALLOC                      0x00138C00 /* Reset: PFR */
+#define I40E_PFPE_WQEALLOC_PEQPID_SHIFT         0
+#define I40E_PFPE_WQEALLOC_PEQPID_MASK          (0x3FFFF <<  I40E_PFPE_WQEALLOC_PEQPID_SHIFT)
+#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
+#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_MASK  (0xFFF <<  I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
+
+#define I40E_VFPE_AEQALLOC(_VF)          (0x00130C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_AEQALLOC_MAX_INDEX     127
+#define I40E_VFPE_AEQALLOC_AECOUNT_SHIFT 0
+#define I40E_VFPE_AEQALLOC_AECOUNT_MASK  (0xFFFFFFFF <<  I40E_VFPE_AEQALLOC_AECOUNT_SHIFT)
+#define I40E_VFPE_CCQPHIGH(_VF)             (0x00001000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPHIGH_MAX_INDEX        127
+#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
+#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_MASK  (0xFFFFFFFF <<  I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
+#define I40E_VFPE_CCQPLOW(_VF)            (0x00000C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPLOW_MAX_INDEX       127
+#define I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT 0
+#define I40E_VFPE_CCQPLOW_PECCQPLOW_MASK  (0xFFFFFFFF <<  I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT)
+#define I40E_VFPE_CCQPSTATUS(_VF)              (0x00000800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPSTATUS_MAX_INDEX         127
+#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT   0
+#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_MASK    (0x1 <<  I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
+#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_MASK  (0x7 <<  I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
+#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_MASK  (0x3F <<  I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT    31
+#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_MASK     (0x1 <<  I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
+#define I40E_VFPE_CQACK(_VF)         (0x00130800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQACK_MAX_INDEX    127
+#define I40E_VFPE_CQACK_PECQID_SHIFT 0
+#define I40E_VFPE_CQACK_PECQID_MASK  (0x1FFFF <<  I40E_VFPE_CQACK_PECQID_SHIFT)
+#define I40E_VFPE_CQARM(_VF)         (0x00130400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQARM_MAX_INDEX    127
+#define I40E_VFPE_CQARM_PECQID_SHIFT 0
+#define I40E_VFPE_CQARM_PECQID_MASK  (0x1FFFF <<  I40E_VFPE_CQARM_PECQID_SHIFT)
+#define I40E_VFPE_CQPDB(_VF)         (0x00000000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPDB_MAX_INDEX    127
+#define I40E_VFPE_CQPDB_WQHEAD_SHIFT 0
+#define I40E_VFPE_CQPDB_WQHEAD_MASK  (0x7FF <<  I40E_VFPE_CQPDB_WQHEAD_SHIFT)
+#define I40E_VFPE_CQPERRCODES(_VF)                 (0x00001800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPERRCODES_MAX_INDEX            127
+#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
+#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_MASK  (0xFFFF <<  I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
+#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK  (0xFFFF <<  I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
+#define I40E_VFPE_CQPTAIL(_VF)             (0x00000400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPTAIL_MAX_INDEX        127
+#define I40E_VFPE_CQPTAIL_WQTAIL_SHIFT     0
+#define I40E_VFPE_CQPTAIL_WQTAIL_MASK      (0x7FF <<  I40E_VFPE_CQPTAIL_WQTAIL_SHIFT)
+#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
+#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_MASK  (0x1 <<  I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
+#define I40E_VFPE_IPCONFIG0(_VF)                   (0x00001400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_IPCONFIG0_MAX_INDEX              127
+#define I40E_VFPE_IPCONFIG0_PEIPID_SHIFT           0
+#define I40E_VFPE_IPCONFIG0_PEIPID_MASK            (0xFFFF <<  I40E_VFPE_IPCONFIG0_PEIPID_SHIFT)
+#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
+#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_MASK  (0x1 <<  I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
+#define I40E_VFPE_MRTEIDXMASK(_VF)                  (0x00003000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_MRTEIDXMASK_MAX_INDEX             127
+#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK  (0x1F <<  I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
+#define I40E_VFPE_RCVUNEXPECTEDERROR(_VF)                   (0x00003400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_RCVUNEXPECTEDERROR_MAX_INDEX              127
+#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK  (0xFFFFFF <<  I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_VFPE_TCPNOWTIMER(_VF)          (0x00002C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_TCPNOWTIMER_MAX_INDEX     127
+#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
+#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_MASK  (0xFFFFFFFF <<  I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
+#define I40E_VFPE_WQEALLOC(_VF)                 (0x00138000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_WQEALLOC_MAX_INDEX            127
+#define I40E_VFPE_WQEALLOC_PEQPID_SHIFT         0
+#define I40E_VFPE_WQEALLOC_PEQPID_MASK          (0x3FFFF <<  I40E_VFPE_WQEALLOC_PEQPID_SHIFT)
+#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
+#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_MASK  (0xFFF <<  I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
+
+#define I40E_GLPE_CPUSTATUS0                    0x0000D040 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT 0
+#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_MASK  (0xFFFFFFFF <<  I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT)
+#define I40E_GLPE_CPUSTATUS1                    0x0000D044 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT 0
+#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_MASK  (0xFFFFFFFF <<  I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT)
+#define I40E_GLPE_CPUSTATUS2                    0x0000D048 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT 0
+#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_MASK  (0xFFFFFFFF <<  I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT)
+#define I40E_GLPE_CPUTRIG0                   0x0000D060 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT  0
+#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_MASK   (0xFFFF <<  I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT)
+#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT 17
+#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_MASK  (0x1 <<  I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT)
+#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT 18
+#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_MASK  (0x1 <<  I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT)
+#define I40E_GLPE_DUAL40_RUPM                     0x0000DA04 /* Reset: PE_CORER */
+#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT 0
+#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_MASK  (0x1 <<  I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT)
+#define I40E_GLPE_PFAEQEDROPCNT(_i)               (0x00131440 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFAEQEDROPCNT_MAX_INDEX         15
+#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
+#define I40E_GLPE_PFCEQEDROPCNT(_i)               (0x001313C0 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFCEQEDROPCNT_MAX_INDEX         15
+#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
+#define I40E_GLPE_PFCQEDROPCNT(_i)              (0x00131340 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFCQEDROPCNT_MAX_INDEX        15
+#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT)
+#define I40E_GLPE_RUPM_CQPPOOL                0x0000DACC /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_MASK  (0xFF <<  I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT)
+#define I40E_GLPE_RUPM_FLRPOOL                0x0000DAC4 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_MASK  (0xFF <<  I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL                   0x0000DA00 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT    0
+#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_MASK     (0xFF <<  I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT 26
+#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_MASK  (0x1 <<  I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT 27
+#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_MASK  (0x1 <<  I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT 28
+#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_MASK  (0x1 <<  I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT 29
+#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_MASK  (0x1 <<  I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT    30
+#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_MASK     (0x1 <<  I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT   31
+#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_MASK    (0x1 <<  I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT)
+#define I40E_GLPE_RUPM_PTXPOOL                0x0000DAC8 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_MASK  (0xFF <<  I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT)
+#define I40E_GLPE_RUPM_PUSHPOOL                 0x0000DAC0 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_MASK  (0xFF <<  I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT)
+#define I40E_GLPE_RUPM_TXHOST_EN                 0x0000DA08 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT 0
+#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_MASK  (0x1 <<  I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT)
+#define I40E_GLPE_VFAEQEDROPCNT(_i)               (0x00132540 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFAEQEDROPCNT_MAX_INDEX         31
+#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFCEQEDROPCNT(_i)               (0x00132440 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFCEQEDROPCNT_MAX_INDEX         31
+#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFCQEDROPCNT(_i)              (0x00132340 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFCQEDROPCNT_MAX_INDEX        31
+#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_MASK  (0xFFFF <<  I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFFLMOBJCTRL(_i)                  (0x0000D400 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMOBJCTRL_MAX_INDEX            31
+#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT 0
+#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_MASK  (0x7 <<  I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT   8
+#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_MASK    (0x7 <<  I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_VFFLMQ1ALLOCERR(_i)               (0x0000C700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMQ1ALLOCERR_MAX_INDEX         31
+#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_MASK  (0xFFFF <<  I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_GLPE_VFFLMXMITALLOCERR(_i)               (0x0000C600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMXMITALLOCERR_MAX_INDEX         31
+#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_MASK  (0xFFFF <<  I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_GLPE_VFUDACTRL(_i)                    (0x0000C000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFUDACTRL_MAX_INDEX              31
+#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT  0
+#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_MASK   (0x1 <<  I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT  1
+#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_MASK   (0x1 <<  I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT  2
+#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_MASK   (0x1 <<  I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT  3
+#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_MASK   (0x1 <<  I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT 4
+#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_MASK  (0x1 <<  I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT)
+#define I40E_GLPE_VFUDAUCFBQPN(_i)         (0x0000C100 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFUDAUCFBQPN_MAX_INDEX   31
+#define I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT   0
+#define I40E_GLPE_VFUDAUCFBQPN_QPN_MASK    (0x3FFFF <<  I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT)
+#define I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT 31
+#define I40E_GLPE_VFUDAUCFBQPN_VALID_MASK  (0x1 <<  I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT)
+
+#define I40E_GLPES_PFIP4RXDISCARD(_i)                (0x00010600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXDISCARD_MAX_INDEX          15
+#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
+#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
+#define I40E_GLPES_PFIP4RXFRAGSHI(_i)                (0x00010804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXFRAGSHI_MAX_INDEX          15
+#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXFRAGSLO(_i)                (0x00010800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXFRAGSLO_MAX_INDEX          15
+#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXMCOCTSHI(_i)                 (0x00010A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCOCTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXMCOCTSLO(_i)                 (0x00010A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCOCTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXMCPKTSHI(_i)                 (0x00010C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCPKTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXMCPKTSLO(_i)                 (0x00010C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCPKTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXOCTSHI(_i)               (0x00010204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXOCTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXOCTSLO(_i)               (0x00010200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXOCTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXPKTSHI(_i)               (0x00010404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXPKTSLO(_i)               (0x00010400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXTRUNC(_i)              (0x00010700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXTRUNC_MAX_INDEX        15
+#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
+#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
+#define I40E_GLPES_PFIP4TXFRAGSHI(_i)                (0x00011E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXFRAGSHI_MAX_INDEX          15
+#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXFRAGSLO(_i)                (0x00011E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXFRAGSLO_MAX_INDEX          15
+#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXMCOCTSHI(_i)                 (0x00012004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCOCTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXMCOCTSLO(_i)                 (0x00012000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCOCTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXMCPKTSHI(_i)                 (0x00012204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCPKTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXMCPKTSLO(_i)                 (0x00012200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCPKTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXNOROUTE(_i)                (0x00012E00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXNOROUTE_MAX_INDEX          15
+#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
+#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_MASK  (0xFFFFFF <<  I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
+#define I40E_GLPES_PFIP4TXOCTSHI(_i)               (0x00011A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXOCTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXOCTSLO(_i)               (0x00011A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXOCTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXPKTSHI(_i)               (0x00011C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXPKTSLO(_i)               (0x00011C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXDISCARD(_i)                (0x00011200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXDISCARD_MAX_INDEX          15
+#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
+#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
+#define I40E_GLPES_PFIP6RXFRAGSHI(_i)                (0x00011404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXFRAGSHI_MAX_INDEX          15
+#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXFRAGSLO(_i)                (0x00011400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXFRAGSLO_MAX_INDEX          15
+#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXMCOCTSHI(_i)                 (0x00011604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCOCTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXMCOCTSLO(_i)                 (0x00011600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCOCTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXMCPKTSHI(_i)                 (0x00011804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCPKTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXMCPKTSLO(_i)                 (0x00011800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCPKTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXOCTSHI(_i)               (0x00010E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXOCTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXOCTSLO(_i)               (0x00010E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXOCTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXPKTSHI(_i)               (0x00011004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXPKTSLO(_i)               (0x00011000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXTRUNC(_i)              (0x00011300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXTRUNC_MAX_INDEX        15
+#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
+#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
+#define I40E_GLPES_PFIP6TXFRAGSHI(_i)                (0x00012804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXFRAGSHI_MAX_INDEX          15
+#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXFRAGSLO(_i)                (0x00012800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXFRAGSLO_MAX_INDEX          15
+#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXMCOCTSHI(_i)                 (0x00012A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCOCTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXMCOCTSLO(_i)                 (0x00012A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCOCTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXMCPKTSHI(_i)                 (0x00012C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCPKTSHI_MAX_INDEX           15
+#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXMCPKTSLO(_i)                 (0x00012C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCPKTSLO_MAX_INDEX           15
+#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXNOROUTE(_i)                (0x00012F00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXNOROUTE_MAX_INDEX          15
+#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
+#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_MASK  (0xFFFFFF <<  I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
+#define I40E_GLPES_PFIP6TXOCTSHI(_i)               (0x00012404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXOCTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXOCTSLO(_i)               (0x00012400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXOCTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXPKTSHI(_i)               (0x00012604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXPKTSLO(_i)               (0x00012600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXRDSHI(_i)               (0x00013E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXRDSHI_MAX_INDEX         15
+#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXRDSLO(_i)               (0x00013E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXRDSLO_MAX_INDEX         15
+#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXSNDSHI(_i)                (0x00014004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXSNDSHI_MAX_INDEX          15
+#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXSNDSLO(_i)                (0x00014000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXSNDSLO_MAX_INDEX          15
+#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXWRSHI(_i)               (0x00013C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXWRSHI_MAX_INDEX         15
+#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXWRSLO(_i)               (0x00013C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXWRSLO_MAX_INDEX         15
+#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXRDSHI(_i)               (0x00014404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXRDSHI_MAX_INDEX         15
+#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXRDSLO(_i)               (0x00014400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXRDSLO_MAX_INDEX         15
+#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXSNDSHI(_i)                (0x00014604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXSNDSHI_MAX_INDEX          15
+#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXSNDSLO(_i)                (0x00014600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXSNDSLO_MAX_INDEX          15
+#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXWRSHI(_i)               (0x00014204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXWRSHI_MAX_INDEX         15
+#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_MASK  (0xFFFF <<  I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXWRSLO(_i)               (0x00014200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXWRSLO_MAX_INDEX         15
+#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_PFRDMAVBNDHI(_i)              (0x00014804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVBNDHI_MAX_INDEX        15
+#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
+#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
+#define I40E_GLPES_PFRDMAVBNDLO(_i)              (0x00014800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVBNDLO_MAX_INDEX        15
+#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
+#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
+#define I40E_GLPES_PFRDMAVINVHI(_i)              (0x00014A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVINVHI_MAX_INDEX        15
+#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT 0
+#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT)
+#define I40E_GLPES_PFRDMAVINVLO(_i)              (0x00014A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVINVLO_MAX_INDEX        15
+#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT 0
+#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT)
+#define I40E_GLPES_PFRXVLANERR(_i)             (0x00010000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRXVLANERR_MAX_INDEX       15
+#define I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT 0
+#define I40E_GLPES_PFRXVLANERR_RXVLANERR_MASK  (0xFFFFFF <<  I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT)
+#define I40E_GLPES_PFTCPRTXSEG(_i)             (0x00013600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRTXSEG_MAX_INDEX       15
+#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT 0
+#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT)
+#define I40E_GLPES_PFTCPRXOPTERR(_i)               (0x00013200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXOPTERR_MAX_INDEX         15
+#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
+#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_MASK  (0xFFFFFF <<  I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
+#define I40E_GLPES_PFTCPRXPROTOERR(_i)                 (0x00013300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXPROTOERR_MAX_INDEX           15
+#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
+#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_MASK  (0xFFFFFF <<  I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
+#define I40E_GLPES_PFTCPRXSEGSHI(_i)               (0x00013004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXSEGSHI_MAX_INDEX         15
+#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
+#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_MASK  (0xFFFF <<  I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
+#define I40E_GLPES_PFTCPRXSEGSLO(_i)               (0x00013000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXSEGSLO_MAX_INDEX         15
+#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
+#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
+#define I40E_GLPES_PFTCPTXSEGHI(_i)              (0x00013404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPTXSEGHI_MAX_INDEX        15
+#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
+#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_MASK  (0xFFFF <<  I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
+#define I40E_GLPES_PFTCPTXSEGLO(_i)              (0x00013400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPTXSEGLO_MAX_INDEX        15
+#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
+#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
+#define I40E_GLPES_PFUDPRXPKTSHI(_i)               (0x00013804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPRXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
+#define I40E_GLPES_PFUDPRXPKTSLO(_i)               (0x00013800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPRXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
+#define I40E_GLPES_PFUDPTXPKTSHI(_i)               (0x00013A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPTXPKTSHI_MAX_INDEX         15
+#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
+#define I40E_GLPES_PFUDPTXPKTSLO(_i)               (0x00013A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPTXPKTSLO_MAX_INDEX         15
+#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
+#define I40E_GLPES_RDMARXMULTFPDUSHI                         0x0001E014 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT 0
+#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_MASK  (0xFFFFFF <<  I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT)
+#define I40E_GLPES_RDMARXMULTFPDUSLO                         0x0001E010 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT 0
+#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT)
+#define I40E_GLPES_RDMARXOOODDPHI                      0x0001E01C /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT 0
+#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_MASK  (0xFFFFFF <<  I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT)
+#define I40E_GLPES_RDMARXOOODDPLO                      0x0001E018 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT 0
+#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT)
+#define I40E_GLPES_RDMARXOOONOMARK                     0x0001E004 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT 0
+#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_MASK  (0xFFFFFFFF <<  I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT)
+#define I40E_GLPES_RDMARXUNALIGN                     0x0001E000 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT 0
+#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_MASK  (0xFFFFFFFF <<  I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT)
+#define I40E_GLPES_TCPRXFOURHOLEHI                       0x0001E044 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXFOURHOLELO                       0x0001E040 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXONEHOLEHI                      0x0001E02C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXONEHOLELO                      0x0001E028 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXPUREACKHI                       0x0001E024 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT 0
+#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT)
+#define I40E_GLPES_TCPRXPUREACKSLO                      0x0001E020 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT 0
+#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT)
+#define I40E_GLPES_TCPRXTHREEHOLEHI                        0x0001E03C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXTHREEHOLELO                        0x0001E038 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXTWOHOLEHI                      0x0001E034 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXTWOHOLELO                      0x0001E030 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT)
+#define I40E_GLPES_TCPTXRETRANSFASTHI                          0x0001E04C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT 0
+#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT)
+#define I40E_GLPES_TCPTXRETRANSFASTLO                          0x0001E048 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT 0
+#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSFASTHI                        0x0001E054 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSFASTLO                        0x0001E050 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSHI                    0x0001E05C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_MASK  (0xFFFFFF <<  I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSLO                    0x0001E058 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXDISCARD(_i)                (0x00018600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXDISCARD_MAX_INDEX          31
+#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
+#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
+#define I40E_GLPES_VFIP4RXFRAGSHI(_i)                (0x00018804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXFRAGSHI_MAX_INDEX          31
+#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXFRAGSLO(_i)                (0x00018800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXFRAGSLO_MAX_INDEX          31
+#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXMCOCTSHI(_i)                 (0x00018A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCOCTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXMCOCTSLO(_i)                 (0x00018A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCOCTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXMCPKTSHI(_i)                 (0x00018C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCPKTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXMCPKTSLO(_i)                 (0x00018C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCPKTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXOCTSHI(_i)               (0x00018204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXOCTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXOCTSLO(_i)               (0x00018200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXOCTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXPKTSHI(_i)               (0x00018404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXPKTSLO(_i)               (0x00018400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXTRUNC(_i)              (0x00018700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXTRUNC_MAX_INDEX        31
+#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
+#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
+#define I40E_GLPES_VFIP4TXFRAGSHI(_i)                (0x00019E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXFRAGSHI_MAX_INDEX          31
+#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXFRAGSLO(_i)                (0x00019E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXFRAGSLO_MAX_INDEX          31
+#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXMCOCTSHI(_i)                 (0x0001A004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCOCTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXMCOCTSLO(_i)                 (0x0001A000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCOCTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXMCPKTSHI(_i)                 (0x0001A204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCPKTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXMCPKTSLO(_i)                 (0x0001A200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCPKTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXNOROUTE(_i)                (0x0001AE00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXNOROUTE_MAX_INDEX          31
+#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
+#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_MASK  (0xFFFFFF <<  I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
+#define I40E_GLPES_VFIP4TXOCTSHI(_i)               (0x00019A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXOCTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXOCTSLO(_i)               (0x00019A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXOCTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXPKTSHI(_i)               (0x00019C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXPKTSLO(_i)               (0x00019C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXDISCARD(_i)                (0x00019200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXDISCARD_MAX_INDEX          31
+#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
+#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
+#define I40E_GLPES_VFIP6RXFRAGSHI(_i)                (0x00019404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXFRAGSHI_MAX_INDEX          31
+#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXFRAGSLO(_i)                (0x00019400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXFRAGSLO_MAX_INDEX          31
+#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXMCOCTSHI(_i)                 (0x00019604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCOCTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXMCOCTSLO(_i)                 (0x00019600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCOCTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXMCPKTSHI(_i)                 (0x00019804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCPKTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXMCPKTSLO(_i)                 (0x00019800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCPKTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXOCTSHI(_i)               (0x00018E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXOCTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXOCTSLO(_i)               (0x00018E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXOCTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXPKTSHI(_i)               (0x00019004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXPKTSLO(_i)               (0x00019000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXTRUNC(_i)              (0x00019300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXTRUNC_MAX_INDEX        31
+#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
+#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
+#define I40E_GLPES_VFIP6TXFRAGSHI(_i)                (0x0001A804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXFRAGSHI_MAX_INDEX          31
+#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXFRAGSLO(_i)                (0x0001A800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXFRAGSLO_MAX_INDEX          31
+#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXMCOCTSHI(_i)                 (0x0001AA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCOCTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXMCOCTSLO(_i)                 (0x0001AA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCOCTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXMCPKTSHI(_i)                 (0x0001AC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCPKTSHI_MAX_INDEX           31
+#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXMCPKTSLO(_i)                 (0x0001AC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCPKTSLO_MAX_INDEX           31
+#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXNOROUTE(_i)                (0x0001AF00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXNOROUTE_MAX_INDEX          31
+#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
+#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_MASK  (0xFFFFFF <<  I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
+#define I40E_GLPES_VFIP6TXOCTSHI(_i)               (0x0001A404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXOCTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXOCTSLO(_i)               (0x0001A400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXOCTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXPKTSHI(_i)               (0x0001A604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXPKTSLO(_i)               (0x0001A600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXRDSHI(_i)               (0x0001BE04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXRDSHI_MAX_INDEX         31
+#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXRDSLO(_i)               (0x0001BE00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXRDSLO_MAX_INDEX         31
+#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXSNDSHI(_i)                (0x0001C004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXSNDSHI_MAX_INDEX          31
+#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXSNDSLO(_i)                (0x0001C000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXSNDSLO_MAX_INDEX          31
+#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXWRSHI(_i)               (0x0001BC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXWRSHI_MAX_INDEX         31
+#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXWRSLO(_i)               (0x0001BC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXWRSLO_MAX_INDEX         31
+#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXRDSHI(_i)               (0x0001C404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXRDSHI_MAX_INDEX         31
+#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXRDSLO(_i)               (0x0001C400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXRDSLO_MAX_INDEX         31
+#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXSNDSHI(_i)                (0x0001C604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXSNDSHI_MAX_INDEX          31
+#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXSNDSLO(_i)                (0x0001C600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXSNDSLO_MAX_INDEX          31
+#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXWRSHI(_i)               (0x0001C204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXWRSHI_MAX_INDEX         31
+#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_MASK  (0xFFFF <<  I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXWRSLO(_i)               (0x0001C200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXWRSLO_MAX_INDEX         31
+#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_VFRDMAVBNDHI(_i)              (0x0001C804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVBNDHI_MAX_INDEX        31
+#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
+#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
+#define I40E_GLPES_VFRDMAVBNDLO(_i)              (0x0001C800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVBNDLO_MAX_INDEX        31
+#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
+#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
+#define I40E_GLPES_VFRDMAVINVHI(_i)              (0x0001CA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVINVHI_MAX_INDEX        31
+#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT 0
+#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT)
+#define I40E_GLPES_VFRDMAVINVLO(_i)              (0x0001CA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVINVLO_MAX_INDEX        31
+#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT 0
+#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT)
+#define I40E_GLPES_VFRXVLANERR(_i)             (0x00018000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRXVLANERR_MAX_INDEX       31
+#define I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT 0
+#define I40E_GLPES_VFRXVLANERR_RXVLANERR_MASK  (0xFFFFFF <<  I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT)
+#define I40E_GLPES_VFTCPRTXSEG(_i)             (0x0001B600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRTXSEG_MAX_INDEX       31
+#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT 0
+#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT)
+#define I40E_GLPES_VFTCPRXOPTERR(_i)               (0x0001B200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXOPTERR_MAX_INDEX         31
+#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
+#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_MASK  (0xFFFFFF <<  I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
+#define I40E_GLPES_VFTCPRXPROTOERR(_i)                 (0x0001B300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXPROTOERR_MAX_INDEX           31
+#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
+#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_MASK  (0xFFFFFF <<  I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
+#define I40E_GLPES_VFTCPRXSEGSHI(_i)               (0x0001B004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXSEGSHI_MAX_INDEX         31
+#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
+#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_MASK  (0xFFFF <<  I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
+#define I40E_GLPES_VFTCPRXSEGSLO(_i)               (0x0001B000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXSEGSLO_MAX_INDEX         31
+#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
+#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
+#define I40E_GLPES_VFTCPTXSEGHI(_i)              (0x0001B404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPTXSEGHI_MAX_INDEX        31
+#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
+#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_MASK  (0xFFFF <<  I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
+#define I40E_GLPES_VFTCPTXSEGLO(_i)              (0x0001B400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPTXSEGLO_MAX_INDEX        31
+#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
+#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
+#define I40E_GLPES_VFUDPRXPKTSHI(_i)               (0x0001B804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPRXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
+#define I40E_GLPES_VFUDPRXPKTSLO(_i)               (0x0001B800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPRXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
+#define I40E_GLPES_VFUDPTXPKTSHI(_i)               (0x0001BA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPTXPKTSHI_MAX_INDEX         31
+#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_MASK  (0xFFFF <<  I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
+#define I40E_GLPES_VFUDPTXPKTSLO(_i)               (0x0001BA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPTXPKTSLO_MAX_INDEX         31
+#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_MASK  (0xFFFFFFFF <<  I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
+
+#define I40E_VFPE_AEQALLOC1               0x0000A400 /* Reset: VFR */
+#define I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT 0
+#define I40E_VFPE_AEQALLOC1_AECOUNT_MASK  (0xFFFFFFFF <<  I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT)
+#define I40E_VFPE_CCQPHIGH1                  0x00009800 /* Reset: VFR */
+#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT 0
+#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_MASK  (0xFFFFFFFF <<  I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT)
+#define I40E_VFPE_CCQPLOW1                 0x0000AC00 /* Reset: VFR */
+#define I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT 0
+#define I40E_VFPE_CCQPLOW1_PECCQPLOW_MASK  (0xFFFFFFFF <<  I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1                   0x0000B800 /* Reset: VFR */
+#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT   0
+#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_MASK    (0x1 <<  I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT 4
+#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_MASK  (0x7 <<  I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT 16
+#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_MASK  (0x3F <<  I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT    31
+#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_MASK     (0x1 <<  I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT)
+#define I40E_VFPE_CQACK1              0x0000B000 /* Reset: VFR */
+#define I40E_VFPE_CQACK1_PECQID_SHIFT 0
+#define I40E_VFPE_CQACK1_PECQID_MASK  (0x1FFFF <<  I40E_VFPE_CQACK1_PECQID_SHIFT)
+#define I40E_VFPE_CQARM1              0x0000B400 /* Reset: VFR */
+#define I40E_VFPE_CQARM1_PECQID_SHIFT 0
+#define I40E_VFPE_CQARM1_PECQID_MASK  (0x1FFFF <<  I40E_VFPE_CQARM1_PECQID_SHIFT)
+#define I40E_VFPE_CQPDB1              0x0000BC00 /* Reset: VFR */
+#define I40E_VFPE_CQPDB1_WQHEAD_SHIFT 0
+#define I40E_VFPE_CQPDB1_WQHEAD_MASK  (0x7FF <<  I40E_VFPE_CQPDB1_WQHEAD_SHIFT)
+#define I40E_VFPE_CQPERRCODES1                      0x00009C00 /* Reset: VFR */
+#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT 0
+#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_MASK  (0xFFFF <<  I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT)
+#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_MASK  (0xFFFF <<  I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT)
+#define I40E_VFPE_CQPTAIL1                  0x0000A000 /* Reset: VFR */
+#define I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT     0
+#define I40E_VFPE_CQPTAIL1_WQTAIL_MASK      (0x7FF <<  I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT)
+#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT 31
+#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_MASK  (0x1 <<  I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT)
+#define I40E_VFPE_IPCONFIG01                        0x00008C00 /* Reset: VFR */
+#define I40E_VFPE_IPCONFIG01_PEIPID_SHIFT           0
+#define I40E_VFPE_IPCONFIG01_PEIPID_MASK            (0xFFFF <<  I40E_VFPE_IPCONFIG01_PEIPID_SHIFT)
+#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT 16
+#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_MASK  (0x1 <<  I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT)
+#define I40E_VFPE_MRTEIDXMASK1                       0x00009000 /* Reset: VFR */
+#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_MASK  (0x1F <<  I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT)
+#define I40E_VFPE_RCVUNEXPECTEDERROR1                        0x00009400 /* Reset: VFR */
+#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_MASK  (0xFFFFFF <<  I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_VFPE_TCPNOWTIMER1               0x0000A800 /* Reset: VFR */
+#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT 0
+#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_MASK  (0xFFFFFFFF <<  I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT)
+#define I40E_VFPE_WQEALLOC1                      0x0000C000 /* Reset: VFR */
+#define I40E_VFPE_WQEALLOC1_PEQPID_SHIFT         0
+#define I40E_VFPE_WQEALLOC1_PEQPID_MASK          (0x3FFFF <<  I40E_VFPE_WQEALLOC1_PEQPID_SHIFT)
+#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT 20
+#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_MASK  (0xFFF <<  I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT)
+#endif /* I40IW_REGISTER_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h
new file mode 100644
index 000000000000..b0110c15e044
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_status.h
@@ -0,0 +1,100 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_STATUS_H
+#define I40IW_STATUS_H
+
+/* Error Codes */
+enum i40iw_status_code {
+	I40IW_SUCCESS = 0,
+	I40IW_ERR_NVM = -1,
+	I40IW_ERR_NVM_CHECKSUM = -2,
+	I40IW_ERR_CONFIG = -4,
+	I40IW_ERR_PARAM = -5,
+	I40IW_ERR_DEVICE_NOT_SUPPORTED = -6,
+	I40IW_ERR_RESET_FAILED = -7,
+	I40IW_ERR_SWFW_SYNC = -8,
+	I40IW_ERR_NO_MEMORY = -9,
+	I40IW_ERR_BAD_PTR = -10,
+	I40IW_ERR_INVALID_PD_ID = -11,
+	I40IW_ERR_INVALID_QP_ID = -12,
+	I40IW_ERR_INVALID_CQ_ID = -13,
+	I40IW_ERR_INVALID_CEQ_ID = -14,
+	I40IW_ERR_INVALID_AEQ_ID = -15,
+	I40IW_ERR_INVALID_SIZE = -16,
+	I40IW_ERR_INVALID_ARP_INDEX = -17,
+	I40IW_ERR_INVALID_FPM_FUNC_ID = -18,
+	I40IW_ERR_QP_INVALID_MSG_SIZE = -19,
+	I40IW_ERR_QP_TOOMANY_WRS_POSTED = -20,
+	I40IW_ERR_INVALID_FRAG_COUNT = -21,
+	I40IW_ERR_QUEUE_EMPTY = -22,
+	I40IW_ERR_INVALID_ALIGNMENT = -23,
+	I40IW_ERR_FLUSHED_QUEUE = -24,
+	I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25,
+	I40IW_ERR_INVALID_IMM_DATA_SIZE = -26,
+	I40IW_ERR_TIMEOUT = -27,
+	I40IW_ERR_OPCODE_MISMATCH = -28,
+	I40IW_ERR_CQP_COMPL_ERROR = -29,
+	I40IW_ERR_INVALID_VF_ID = -30,
+	I40IW_ERR_INVALID_HMCFN_ID = -31,
+	I40IW_ERR_BACKING_PAGE_ERROR = -32,
+	I40IW_ERR_NO_PBLCHUNKS_AVAILABLE = -33,
+	I40IW_ERR_INVALID_PBLE_INDEX = -34,
+	I40IW_ERR_INVALID_SD_INDEX = -35,
+	I40IW_ERR_INVALID_PAGE_DESC_INDEX = -36,
+	I40IW_ERR_INVALID_SD_TYPE = -37,
+	I40IW_ERR_MEMCPY_FAILED = -38,
+	I40IW_ERR_INVALID_HMC_OBJ_INDEX = -39,
+	I40IW_ERR_INVALID_HMC_OBJ_COUNT = -40,
+	I40IW_ERR_INVALID_SRQ_ARM_LIMIT = -41,
+	I40IW_ERR_SRQ_ENABLED = -42,
+	I40IW_ERR_BUF_TOO_SHORT = -43,
+	I40IW_ERR_BAD_IWARP_CQE = -44,
+	I40IW_ERR_NVM_BLANK_MODE = -45,
+	I40IW_ERR_NOT_IMPLEMENTED = -46,
+	I40IW_ERR_PE_DOORBELL_NOT_ENABLED = -47,
+	I40IW_ERR_NOT_READY = -48,
+	I40IW_NOT_SUPPORTED = -49,
+	I40IW_ERR_FIRMWARE_API_VERSION = -50,
+	I40IW_ERR_RING_FULL = -51,
+	I40IW_ERR_MPA_CRC = -61,
+	I40IW_ERR_NO_TXBUFS = -62,
+	I40IW_ERR_SEQ_NUM = -63,
+	I40IW_ERR_list_empty = -64,
+	I40IW_ERR_INVALID_MAC_ADDR = -65,
+	I40IW_ERR_BAD_STAG      = -66,
+	I40IW_ERR_CQ_COMPL_ERROR = -67,
+
+};
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
new file mode 100644
index 000000000000..edb3a8c8267a
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -0,0 +1,1312 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_TYPE_H
+#define I40IW_TYPE_H
+#include "i40iw_user.h"
+#include "i40iw_hmc.h"
+#include "i40iw_vf.h"
+#include "i40iw_virtchnl.h"
+
+struct i40iw_cqp_sq_wqe {
+	u64 buf[I40IW_CQP_WQE_SIZE];
+};
+
+struct i40iw_sc_aeqe {
+	u64 buf[I40IW_AEQE_SIZE];
+};
+
+struct i40iw_ceqe {
+	u64 buf[I40IW_CEQE_SIZE];
+};
+
+struct i40iw_cqp_ctx {
+	u64 buf[I40IW_CQP_CTX_SIZE];
+};
+
+struct i40iw_cq_shadow_area {
+	u64 buf[I40IW_SHADOW_AREA_SIZE];
+};
+
+struct i40iw_sc_dev;
+struct i40iw_hmc_info;
+struct i40iw_dev_pestat;
+
+struct i40iw_cqp_ops;
+struct i40iw_ccq_ops;
+struct i40iw_ceq_ops;
+struct i40iw_aeq_ops;
+struct i40iw_mr_ops;
+struct i40iw_cqp_misc_ops;
+struct i40iw_pd_ops;
+struct i40iw_priv_qp_ops;
+struct i40iw_priv_cq_ops;
+struct i40iw_hmc_ops;
+
+enum i40iw_resource_indicator_type {
+	I40IW_RSRC_INDICATOR_TYPE_ADAPTER = 0,
+	I40IW_RSRC_INDICATOR_TYPE_CQ,
+	I40IW_RSRC_INDICATOR_TYPE_QP,
+	I40IW_RSRC_INDICATOR_TYPE_SRQ
+};
+
+enum i40iw_hdrct_flags {
+	DDP_LEN_FLAG = 0x80,
+	DDP_HDR_FLAG = 0x40,
+	RDMA_HDR_FLAG = 0x20
+};
+
+enum i40iw_term_layers {
+	LAYER_RDMA = 0,
+	LAYER_DDP = 1,
+	LAYER_MPA = 2
+};
+
+enum i40iw_term_error_types {
+	RDMAP_REMOTE_PROT = 1,
+	RDMAP_REMOTE_OP = 2,
+	DDP_CATASTROPHIC = 0,
+	DDP_TAGGED_BUFFER = 1,
+	DDP_UNTAGGED_BUFFER = 2,
+	DDP_LLP = 3
+};
+
+enum i40iw_term_rdma_errors {
+	RDMAP_INV_STAG = 0x00,
+	RDMAP_INV_BOUNDS = 0x01,
+	RDMAP_ACCESS = 0x02,
+	RDMAP_UNASSOC_STAG = 0x03,
+	RDMAP_TO_WRAP = 0x04,
+	RDMAP_INV_RDMAP_VER = 0x05,
+	RDMAP_UNEXPECTED_OP = 0x06,
+	RDMAP_CATASTROPHIC_LOCAL = 0x07,
+	RDMAP_CATASTROPHIC_GLOBAL = 0x08,
+	RDMAP_CANT_INV_STAG = 0x09,
+	RDMAP_UNSPECIFIED = 0xff
+};
+
+enum i40iw_term_ddp_errors {
+	DDP_CATASTROPHIC_LOCAL = 0x00,
+	DDP_TAGGED_INV_STAG = 0x00,
+	DDP_TAGGED_BOUNDS = 0x01,
+	DDP_TAGGED_UNASSOC_STAG = 0x02,
+	DDP_TAGGED_TO_WRAP = 0x03,
+	DDP_TAGGED_INV_DDP_VER = 0x04,
+	DDP_UNTAGGED_INV_QN = 0x01,
+	DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
+	DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
+	DDP_UNTAGGED_INV_MO = 0x04,
+	DDP_UNTAGGED_INV_TOO_LONG = 0x05,
+	DDP_UNTAGGED_INV_DDP_VER = 0x06
+};
+
+enum i40iw_term_mpa_errors {
+	MPA_CLOSED = 0x01,
+	MPA_CRC = 0x02,
+	MPA_MARKER = 0x03,
+	MPA_REQ_RSP = 0x04,
+};
+
+enum i40iw_flush_opcode {
+	FLUSH_INVALID = 0,
+	FLUSH_PROT_ERR,
+	FLUSH_REM_ACCESS_ERR,
+	FLUSH_LOC_QP_OP_ERR,
+	FLUSH_REM_OP_ERR,
+	FLUSH_LOC_LEN_ERR,
+	FLUSH_GENERAL_ERR,
+	FLUSH_FATAL_ERR
+};
+
+enum i40iw_term_eventtypes {
+	TERM_EVENT_QP_FATAL,
+	TERM_EVENT_QP_ACCESS_ERR
+};
+
+struct i40iw_terminate_hdr {
+	u8 layer_etype;
+	u8 error_code;
+	u8 hdrct;
+	u8 rsvd;
+};
+
+enum i40iw_debug_flag {
+	I40IW_DEBUG_NONE	= 0x00000000,
+	I40IW_DEBUG_ERR		= 0x00000001,
+	I40IW_DEBUG_INIT	= 0x00000002,
+	I40IW_DEBUG_DEV		= 0x00000004,
+	I40IW_DEBUG_CM		= 0x00000008,
+	I40IW_DEBUG_VERBS	= 0x00000010,
+	I40IW_DEBUG_PUDA	= 0x00000020,
+	I40IW_DEBUG_ILQ		= 0x00000040,
+	I40IW_DEBUG_IEQ		= 0x00000080,
+	I40IW_DEBUG_QP		= 0x00000100,
+	I40IW_DEBUG_CQ		= 0x00000200,
+	I40IW_DEBUG_MR		= 0x00000400,
+	I40IW_DEBUG_PBLE	= 0x00000800,
+	I40IW_DEBUG_WQE		= 0x00001000,
+	I40IW_DEBUG_AEQ		= 0x00002000,
+	I40IW_DEBUG_CQP		= 0x00004000,
+	I40IW_DEBUG_HMC		= 0x00008000,
+	I40IW_DEBUG_USER	= 0x00010000,
+	I40IW_DEBUG_VIRT	= 0x00020000,
+	I40IW_DEBUG_DCB		= 0x00040000,
+	I40IW_DEBUG_CQE		= 0x00800000,
+	I40IW_DEBUG_ALL		= 0xFFFFFFFF
+};
+
+enum i40iw_hw_stat_index_32b {
+	I40IW_HW_STAT_INDEX_IP4RXDISCARD = 0,
+	I40IW_HW_STAT_INDEX_IP4RXTRUNC,
+	I40IW_HW_STAT_INDEX_IP4TXNOROUTE,
+	I40IW_HW_STAT_INDEX_IP6RXDISCARD,
+	I40IW_HW_STAT_INDEX_IP6RXTRUNC,
+	I40IW_HW_STAT_INDEX_IP6TXNOROUTE,
+	I40IW_HW_STAT_INDEX_TCPRTXSEG,
+	I40IW_HW_STAT_INDEX_TCPRXOPTERR,
+	I40IW_HW_STAT_INDEX_TCPRXPROTOERR,
+	I40IW_HW_STAT_INDEX_MAX_32
+};
+
+enum i40iw_hw_stat_index_64b {
+	I40IW_HW_STAT_INDEX_IP4RXOCTS = 0,
+	I40IW_HW_STAT_INDEX_IP4RXPKTS,
+	I40IW_HW_STAT_INDEX_IP4RXFRAGS,
+	I40IW_HW_STAT_INDEX_IP4RXMCPKTS,
+	I40IW_HW_STAT_INDEX_IP4TXOCTS,
+	I40IW_HW_STAT_INDEX_IP4TXPKTS,
+	I40IW_HW_STAT_INDEX_IP4TXFRAGS,
+	I40IW_HW_STAT_INDEX_IP4TXMCPKTS,
+	I40IW_HW_STAT_INDEX_IP6RXOCTS,
+	I40IW_HW_STAT_INDEX_IP6RXPKTS,
+	I40IW_HW_STAT_INDEX_IP6RXFRAGS,
+	I40IW_HW_STAT_INDEX_IP6RXMCPKTS,
+	I40IW_HW_STAT_INDEX_IP6TXOCTS,
+	I40IW_HW_STAT_INDEX_IP6TXPKTS,
+	I40IW_HW_STAT_INDEX_IP6TXFRAGS,
+	I40IW_HW_STAT_INDEX_IP6TXMCPKTS,
+	I40IW_HW_STAT_INDEX_TCPRXSEGS,
+	I40IW_HW_STAT_INDEX_TCPTXSEG,
+	I40IW_HW_STAT_INDEX_RDMARXRDS,
+	I40IW_HW_STAT_INDEX_RDMARXSNDS,
+	I40IW_HW_STAT_INDEX_RDMARXWRS,
+	I40IW_HW_STAT_INDEX_RDMATXRDS,
+	I40IW_HW_STAT_INDEX_RDMATXSNDS,
+	I40IW_HW_STAT_INDEX_RDMATXWRS,
+	I40IW_HW_STAT_INDEX_RDMAVBND,
+	I40IW_HW_STAT_INDEX_RDMAVINV,
+	I40IW_HW_STAT_INDEX_MAX_64
+};
+
+struct i40iw_dev_hw_stat_offsets {
+	u32 stat_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
+	u32 stat_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
+};
+
+struct i40iw_dev_hw_stats {
+	u64 stat_value_32[I40IW_HW_STAT_INDEX_MAX_32];
+	u64 stat_value_64[I40IW_HW_STAT_INDEX_MAX_64];
+};
+
+struct i40iw_device_pestat_ops {
+	void (*iw_hw_stat_init)(struct i40iw_dev_pestat *, u8, struct i40iw_hw *, bool);
+	void (*iw_hw_stat_read_32)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_32b, u64 *);
+	void (*iw_hw_stat_read_64)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_64b, u64 *);
+	void (*iw_hw_stat_read_all)(struct i40iw_dev_pestat *, struct i40iw_dev_hw_stats *);
+	void (*iw_hw_stat_refresh_all)(struct i40iw_dev_pestat *);
+};
+
+struct i40iw_dev_pestat {
+	struct i40iw_hw *hw;
+	struct i40iw_device_pestat_ops ops;
+	struct i40iw_dev_hw_stats hw_stats;
+	struct i40iw_dev_hw_stats last_read_hw_stats;
+	struct i40iw_dev_hw_stat_offsets hw_stat_offsets;
+	struct timer_list stats_timer;
+	spinlock_t stats_lock; /* rdma stats lock */
+};
+
+struct i40iw_hw {
+	u8 __iomem *hw_addr;
+	void *dev_context;
+	struct i40iw_hmc_info hmc;
+};
+
+struct i40iw_pfpdu {
+	struct list_head rxlist;
+	u32 rcv_nxt;
+	u32 fps;
+	u32 max_fpdu_data;
+	bool mode;
+	bool mpa_crc_err;
+	u64 total_ieq_bufs;
+	u64 fpdu_processed;
+	u64 bad_seq_num;
+	u64 crc_err;
+	u64 no_tx_bufs;
+	u64 tx_err;
+	u64 out_of_order;
+	u64 pmode_count;
+};
+
+struct i40iw_sc_pd {
+	u32 size;
+	struct i40iw_sc_dev *dev;
+	u16 pd_id;
+};
+
+struct i40iw_cqp_quanta {
+	u64 elem[I40IW_CQP_WQE_SIZE];
+};
+
+struct i40iw_sc_cqp {
+	u32 size;
+	u64 sq_pa;
+	u64 host_ctx_pa;
+	void *back_cqp;
+	struct i40iw_sc_dev *dev;
+	enum i40iw_status_code (*process_cqp_sds)(struct i40iw_sc_dev *,
+						  struct i40iw_update_sds_info *);
+	struct i40iw_dma_mem sdbuf;
+	struct i40iw_ring sq_ring;
+	struct i40iw_cqp_quanta *sq_base;
+	u64 *host_ctx;
+	u64 *scratch_array;
+	u32 cqp_id;
+	u32 sq_size;
+	u32 hw_sq_size;
+	u8 struct_ver;
+	u8 polarity;
+	bool en_datacenter_tcp;
+	u8 hmc_profile;
+	u8 enabled_vf_count;
+	u8 timeout_count;
+};
+
+struct i40iw_sc_aeq {
+	u32 size;
+	u64 aeq_elem_pa;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_sc_aeqe *aeqe_base;
+	void *pbl_list;
+	u32 elem_cnt;
+	struct i40iw_ring aeq_ring;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	u32 first_pm_pbl_idx;
+	u8 polarity;
+};
+
+struct i40iw_sc_ceq {
+	u32 size;
+	u64 ceq_elem_pa;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_ceqe *ceqe_base;
+	void *pbl_list;
+	u32 ceq_id;
+	u32 elem_cnt;
+	struct i40iw_ring ceq_ring;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	bool tph_en;
+	u8 tph_val;
+	u32 first_pm_pbl_idx;
+	u8 polarity;
+};
+
+struct i40iw_sc_cq {
+	struct i40iw_cq_uk cq_uk;
+	u64 cq_pa;
+	u64 shadow_area_pa;
+	struct i40iw_sc_dev *dev;
+	void *pbl_list;
+	void *back_cq;
+	u32 ceq_id;
+	u32 shadow_read_threshold;
+	bool ceqe_mask;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	u8 cq_type;
+	bool ceq_id_valid;
+	bool tph_en;
+	u8 tph_val;
+	u32 first_pm_pbl_idx;
+	bool check_overflow;
+};
+
+struct i40iw_sc_qp {
+	struct i40iw_qp_uk qp_uk;
+	u64 sq_pa;
+	u64 rq_pa;
+	u64 hw_host_ctx_pa;
+	u64 shadow_area_pa;
+	u64 q2_pa;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_sc_pd *pd;
+	u64 *hw_host_ctx;
+	void *llp_stream_handle;
+	void *back_qp;
+	struct i40iw_pfpdu pfpdu;
+	u8 *q2_buf;
+	u64 qp_compl_ctx;
+	u16 qs_handle;
+	u16 exception_lan_queue;
+	u16 push_idx;
+	u8 sq_tph_val;
+	u8 rq_tph_val;
+	u8 qp_state;
+	u8 qp_type;
+	u8 hw_sq_size;
+	u8 hw_rq_size;
+	u8 src_mac_addr_idx;
+	bool sq_tph_en;
+	bool rq_tph_en;
+	bool rcv_tph_en;
+	bool xmit_tph_en;
+	bool virtual_map;
+	bool flush_sq;
+	bool flush_rq;
+	bool sq_flush;
+	enum i40iw_flush_opcode flush_code;
+	enum i40iw_term_eventtypes eventtype;
+	u8 term_flags;
+};
+
+struct i40iw_hmc_fpm_misc {
+	u32 max_ceqs;
+	u32 max_sds;
+	u32 xf_block_size;
+	u32 q1_block_size;
+	u32 ht_multiplier;
+	u32 timer_bucket;
+};
+
+struct i40iw_vchnl_if {
+	enum i40iw_status_code (*vchnl_recv)(struct i40iw_sc_dev *, u32, u8 *, u16);
+	enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *dev, u32, u8 *, u16);
+};
+
+#define I40IW_VCHNL_MAX_VF_MSG_SIZE 512
+
+struct i40iw_vchnl_vf_msg_buffer {
+	struct i40iw_virtchnl_op_buf vchnl_msg;
+	char parm_buffer[I40IW_VCHNL_MAX_VF_MSG_SIZE - 1];
+};
+
+struct i40iw_vfdev {
+	struct i40iw_sc_dev *pf_dev;
+	u8 *hmc_info_mem;
+	struct i40iw_dev_pestat dev_pestat;
+	struct i40iw_hmc_pble_info *pble_info;
+	struct i40iw_hmc_info hmc_info;
+	struct i40iw_vchnl_vf_msg_buffer vf_msg_buffer;
+	u64 fpm_query_buf_pa;
+	u64 *fpm_query_buf;
+	u32 vf_id;
+	u32 msg_count;
+	bool pf_hmc_initialized;
+	u16 pmf_index;
+	u16 iw_vf_idx;		/* VF Device table index */
+	bool stats_initialized;
+};
+
+struct i40iw_sc_dev {
+	struct list_head cqp_cmd_head;	/* head of the CQP command list */
+	spinlock_t cqp_lock; /* cqp list sync */
+	struct i40iw_dev_uk dev_uk;
+	struct i40iw_dev_pestat dev_pestat;
+	struct i40iw_dma_mem vf_fpm_query_buf[I40IW_MAX_PE_ENABLED_VF_COUNT];
+	u64 fpm_query_buf_pa;
+	u64 fpm_commit_buf_pa;
+	u64 *fpm_query_buf;
+	u64 *fpm_commit_buf;
+	void *back_dev;
+	struct i40iw_hw *hw;
+	u8 __iomem *db_addr;
+	struct i40iw_hmc_info *hmc_info;
+	struct i40iw_hmc_pble_info *pble_info;
+	struct i40iw_vfdev *vf_dev[I40IW_MAX_PE_ENABLED_VF_COUNT];
+	struct i40iw_sc_cqp *cqp;
+	struct i40iw_sc_aeq *aeq;
+	struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT];
+	struct i40iw_sc_cq *ccq;
+	struct i40iw_cqp_ops *cqp_ops;
+	struct i40iw_ccq_ops *ccq_ops;
+	struct i40iw_ceq_ops *ceq_ops;
+	struct i40iw_aeq_ops *aeq_ops;
+	struct i40iw_pd_ops *iw_pd_ops;
+	struct i40iw_priv_qp_ops *iw_priv_qp_ops;
+	struct i40iw_priv_cq_ops *iw_priv_cq_ops;
+	struct i40iw_mr_ops *mr_ops;
+	struct i40iw_cqp_misc_ops *cqp_misc_ops;
+	struct i40iw_hmc_ops *hmc_ops;
+	struct i40iw_vchnl_if vchnl_if;
+	u32 ilq_count;
+	struct i40iw_virt_mem ilq_mem;
+	struct i40iw_puda_rsrc *ilq;
+	u32 ieq_count;
+	struct i40iw_virt_mem ieq_mem;
+	struct i40iw_puda_rsrc *ieq;
+
+	struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
+
+	struct i40iw_hmc_fpm_misc hmc_fpm_misc;
+	u16 qs_handle;
+	u32	debug_mask;
+	u16 exception_lan_queue;
+	u8 hmc_fn_id;
+	bool is_pf;
+	bool vchnl_up;
+	u8 vf_id;
+	u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY];
+	struct i40iw_vchnl_vf_msg_buffer vchnl_vf_msg_buf;
+	u8 hw_rev;
+};
+
+struct i40iw_modify_cq_info {
+	u64 cq_pa;
+	struct i40iw_cqe *cq_base;
+	void *pbl_list;
+	u32 ceq_id;
+	u32 cq_size;
+	u32 shadow_read_threshold;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	bool check_overflow;
+	bool cq_resize;
+	bool ceq_change;
+	bool check_overflow_change;
+	u32 first_pm_pbl_idx;
+	bool ceq_valid;
+};
+
+struct i40iw_create_qp_info {
+	u8 next_iwarp_state;
+	bool ord_valid;
+	bool tcp_ctx_valid;
+	bool cq_num_valid;
+	bool static_rsrc;
+	bool arp_cache_idx_valid;
+};
+
+struct i40iw_modify_qp_info {
+	u64 rx_win0;
+	u64 rx_win1;
+	u16 new_mss;
+	u8 next_iwarp_state;
+	u8 termlen;
+	bool ord_valid;
+	bool tcp_ctx_valid;
+	bool cq_num_valid;
+	bool static_rsrc;
+	bool arp_cache_idx_valid;
+	bool reset_tcp_conn;
+	bool remove_hash_idx;
+	bool dont_send_term;
+	bool dont_send_fin;
+	bool cached_var_valid;
+	bool mss_change;
+	bool force_loopback;
+};
+
+struct i40iw_ccq_cqe_info {
+	struct i40iw_sc_cqp *cqp;
+	u64 scratch;
+	u32 op_ret_val;
+	u16 maj_err_code;
+	u16 min_err_code;
+	u8 op_code;
+	bool error;
+};
+
+struct i40iw_l2params {
+	u16 qs_handle_list[I40IW_MAX_USER_PRIORITY];
+	u16 mss;
+};
+
+struct i40iw_device_init_info {
+	u64 fpm_query_buf_pa;
+	u64 fpm_commit_buf_pa;
+	u64 *fpm_query_buf;
+	u64 *fpm_commit_buf;
+	struct i40iw_hw *hw;
+	void __iomem *bar0;
+	enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *, u32, u8 *, u16);
+	u16 qs_handle;
+	u16 exception_lan_queue;
+	u8 hmc_fn_id;
+	bool is_pf;
+	u32 debug_mask;
+};
+
+enum i40iw_cqp_hmc_profile {
+	I40IW_HMC_PROFILE_DEFAULT = 1,
+	I40IW_HMC_PROFILE_FAVOR_VF = 2,
+	I40IW_HMC_PROFILE_EQUAL = 3,
+};
+
+struct i40iw_cqp_init_info {
+	u64 cqp_compl_ctx;
+	u64 host_ctx_pa;
+	u64 sq_pa;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_cqp_quanta *sq;
+	u64 *host_ctx;
+	u64 *scratch_array;
+	u32 sq_size;
+	u8 struct_ver;
+	bool en_datacenter_tcp;
+	u8 hmc_profile;
+	u8 enabled_vf_count;
+};
+
+struct i40iw_ceq_init_info {
+	u64 ceqe_pa;
+	struct i40iw_sc_dev *dev;
+	u64 *ceqe_base;
+	void *pbl_list;
+	u32 elem_cnt;
+	u32 ceq_id;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	bool tph_en;
+	u8 tph_val;
+	u32 first_pm_pbl_idx;
+};
+
+struct i40iw_aeq_init_info {
+	u64 aeq_elem_pa;
+	struct i40iw_sc_dev *dev;
+	u32 *aeqe_base;
+	void *pbl_list;
+	u32 elem_cnt;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	u32 first_pm_pbl_idx;
+};
+
+struct i40iw_ccq_init_info {
+	u64 cq_pa;
+	u64 shadow_area_pa;
+	struct i40iw_sc_dev *dev;
+	struct i40iw_cqe *cq_base;
+	u64 *shadow_area;
+	void *pbl_list;
+	u32 num_elem;
+	u32 ceq_id;
+	u32 shadow_read_threshold;
+	bool ceqe_mask;
+	bool ceq_id_valid;
+	bool tph_en;
+	u8 tph_val;
+	bool avoid_mem_cflct;
+	bool virtual_map;
+	u8 pbl_chunk_size;
+	u32 first_pm_pbl_idx;
+};
+
+struct i40iwarp_offload_info {
+	u16 rcv_mark_offset;
+	u16 snd_mark_offset;
+	u16 pd_id;
+	u8 ddp_ver;
+	u8 rdmap_ver;
+	u8 ord_size;
+	u8 ird_size;
+	bool wr_rdresp_en;
+	bool rd_enable;
+	bool snd_mark_en;
+	bool rcv_mark_en;
+	bool bind_en;
+	bool fast_reg_en;
+	bool priv_mode_en;
+	bool lsmm_present;
+	u8 iwarp_mode;
+	bool align_hdrs;
+	bool rcv_no_mpa_crc;
+
+	u8 last_byte_sent;
+};
+
+struct i40iw_tcp_offload_info {
+	bool ipv4;
+	bool no_nagle;
+	bool insert_vlan_tag;
+	bool time_stamp;
+	u8 cwnd_inc_limit;
+	bool drop_ooo_seg;
+	bool dup_ack_thresh;
+	u8 ttl;
+	u8 src_mac_addr_idx;
+	bool avoid_stretch_ack;
+	u8 tos;
+	u16 src_port;
+	u16 dst_port;
+	u32 dest_ip_addr0;
+	u32 dest_ip_addr1;
+	u32 dest_ip_addr2;
+	u32 dest_ip_addr3;
+	u32 snd_mss;
+	u16 vlan_tag;
+	u16 arp_idx;
+	u32 flow_label;
+	bool wscale;
+	u8 tcp_state;
+	u8 snd_wscale;
+	u8 rcv_wscale;
+	u32 time_stamp_recent;
+	u32 time_stamp_age;
+	u32 snd_nxt;
+	u32 snd_wnd;
+	u32 rcv_nxt;
+	u32 rcv_wnd;
+	u32 snd_max;
+	u32 snd_una;
+	u32 srtt;
+	u32 rtt_var;
+	u32 ss_thresh;
+	u32 cwnd;
+	u32 snd_wl1;
+	u32 snd_wl2;
+	u32 max_snd_window;
+	u8 rexmit_thresh;
+	u32 local_ipaddr0;
+	u32 local_ipaddr1;
+	u32 local_ipaddr2;
+	u32 local_ipaddr3;
+	bool ignore_tcp_opt;
+	bool ignore_tcp_uns_opt;
+};
+
+struct i40iw_qp_host_ctx_info {
+	u64 qp_compl_ctx;
+	struct i40iw_tcp_offload_info *tcp_info;
+	struct i40iwarp_offload_info *iwarp_info;
+	u32 send_cq_num;
+	u32 rcv_cq_num;
+	u16 push_idx;
+	bool push_mode_en;
+	bool tcp_info_valid;
+	bool iwarp_info_valid;
+	bool err_rq_idx_valid;
+	u16 err_rq_idx;
+};
+
+struct i40iw_aeqe_info {
+	u64 compl_ctx;
+	u32 qp_cq_id;
+	u16 ae_id;
+	u16 wqe_idx;
+	u8 tcp_state;
+	u8 iwarp_state;
+	bool qp;
+	bool cq;
+	bool sq;
+	bool in_rdrsp_wr;
+	bool out_rdrsp;
+	u8 q2_data_written;
+	bool aeqe_overflow;
+};
+
+struct i40iw_allocate_stag_info {
+	u64 total_len;
+	u32 chunk_size;
+	u32 stag_idx;
+	u32 page_size;
+	u16 pd_id;
+	u16 access_rights;
+	bool remote_access;
+	bool use_hmc_fcn_index;
+	u8 hmc_fcn_index;
+	bool use_pf_rid;
+};
+
+struct i40iw_reg_ns_stag_info {
+	u64 reg_addr_pa;
+	u64 fbo;
+	void *va;
+	u64 total_len;
+	u32 page_size;
+	u32 chunk_size;
+	u32 first_pm_pbl_index;
+	enum i40iw_addressing_type addr_type;
+	i40iw_stag_index stag_idx;
+	u16 access_rights;
+	u16 pd_id;
+	i40iw_stag_key stag_key;
+	bool use_hmc_fcn_index;
+	u8 hmc_fcn_index;
+	bool use_pf_rid;
+};
+
+struct i40iw_fast_reg_stag_info {
+	u64 wr_id;
+	u64 reg_addr_pa;
+	u64 fbo;
+	void *va;
+	u64 total_len;
+	u32 page_size;
+	u32 chunk_size;
+	u32 first_pm_pbl_index;
+	enum i40iw_addressing_type addr_type;
+	i40iw_stag_index stag_idx;
+	u16 access_rights;
+	u16 pd_id;
+	i40iw_stag_key stag_key;
+	bool local_fence;
+	bool read_fence;
+	bool signaled;
+	bool use_hmc_fcn_index;
+	u8 hmc_fcn_index;
+	bool use_pf_rid;
+	bool defer_flag;
+};
+
+struct i40iw_dealloc_stag_info {
+	u32 stag_idx;
+	u16 pd_id;
+	bool mr;
+	bool dealloc_pbl;
+};
+
+struct i40iw_register_shared_stag {
+	void *va;
+	enum i40iw_addressing_type addr_type;
+	i40iw_stag_index new_stag_idx;
+	i40iw_stag_index parent_stag_idx;
+	u32 access_rights;
+	u16 pd_id;
+	i40iw_stag_key new_stag_key;
+};
+
+struct i40iw_qp_init_info {
+	struct i40iw_qp_uk_init_info qp_uk_init_info;
+	struct i40iw_sc_pd *pd;
+	u64 *host_ctx;
+	u8 *q2;
+	u64 sq_pa;
+	u64 rq_pa;
+	u64 host_ctx_pa;
+	u64 q2_pa;
+	u64 shadow_area_pa;
+	u8 sq_tph_val;
+	u8 rq_tph_val;
+	u8 type;
+	bool sq_tph_en;
+	bool rq_tph_en;
+	bool rcv_tph_en;
+	bool xmit_tph_en;
+	bool virtual_map;
+};
+
+struct i40iw_cq_init_info {
+	struct i40iw_sc_dev *dev;
+	u64 cq_base_pa;
+	u64 shadow_area_pa;
+	u32 ceq_id;
+	u32 shadow_read_threshold;
+	bool virtual_map;
+	bool ceqe_mask;
+	u8 pbl_chunk_size;
+	u32 first_pm_pbl_idx;
+	bool ceq_id_valid;
+	bool tph_en;
+	u8 tph_val;
+	u8 type;
+	struct i40iw_cq_uk_init_info cq_uk_init_info;
+};
+
+struct i40iw_upload_context_info {
+	u64 buf_pa;
+	bool freeze_qp;
+	bool raw_format;
+	u32 qp_id;
+	u8 qp_type;
+};
+
+struct i40iw_add_arp_cache_entry_info {
+	u8 mac_addr[6];
+	u32 reach_max;
+	u16 arp_index;
+	bool permanent;
+};
+
+struct i40iw_apbvt_info {
+	u16 port;
+	bool add;
+};
+
+enum i40iw_quad_entry_type {
+	I40IW_QHASH_TYPE_TCP_ESTABLISHED = 1,
+	I40IW_QHASH_TYPE_TCP_SYN,
+};
+
+enum i40iw_quad_hash_manage_type {
+	I40IW_QHASH_MANAGE_TYPE_DELETE = 0,
+	I40IW_QHASH_MANAGE_TYPE_ADD,
+	I40IW_QHASH_MANAGE_TYPE_MODIFY
+};
+
+struct i40iw_qhash_table_info {
+	enum i40iw_quad_hash_manage_type manage;
+	enum i40iw_quad_entry_type entry_type;
+	bool vlan_valid;
+	bool ipv4_valid;
+	u8 mac_addr[6];
+	u16 vlan_id;
+	u16 qs_handle;
+	u32 qp_num;
+	u32 dest_ip[4];
+	u32 src_ip[4];
+	u32 dest_port;
+	u32 src_port;
+};
+
+struct i40iw_local_mac_ipaddr_entry_info {
+	u8 mac_addr[6];
+	u8 entry_idx;
+};
+
+struct i40iw_cqp_manage_push_page_info {
+	u32 push_idx;
+	u16 qs_handle;
+	u8 free_page;
+};
+
+struct i40iw_qp_flush_info {
+	u16 sq_minor_code;
+	u16 sq_major_code;
+	u16 rq_minor_code;
+	u16 rq_major_code;
+	u16 ae_code;
+	u8 ae_source;
+	bool sq;
+	bool rq;
+	bool userflushcode;
+	bool generate_ae;
+};
+
+struct i40iw_cqp_commit_fpm_values {
+	u64 qp_base;
+	u64 cq_base;
+	u32 hte_base;
+	u32 arp_base;
+	u32 apbvt_inuse_base;
+	u32 mr_base;
+	u32 xf_base;
+	u32 xffl_base;
+	u32 q1_base;
+	u32 q1fl_base;
+	u32 fsimc_base;
+	u32 fsiav_base;
+	u32 pbl_base;
+
+	u32 qp_cnt;
+	u32 cq_cnt;
+	u32 hte_cnt;
+	u32 arp_cnt;
+	u32 mr_cnt;
+	u32 xf_cnt;
+	u32 xffl_cnt;
+	u32 q1_cnt;
+	u32 q1fl_cnt;
+	u32 fsimc_cnt;
+	u32 fsiav_cnt;
+	u32 pbl_cnt;
+};
+
+struct i40iw_cqp_query_fpm_values {
+	u16 first_pe_sd_index;
+	u32 qp_objsize;
+	u32 cq_objsize;
+	u32 hte_objsize;
+	u32 arp_objsize;
+	u32 mr_objsize;
+	u32 xf_objsize;
+	u32 q1_objsize;
+	u32 fsimc_objsize;
+	u32 fsiav_objsize;
+
+	u32 qp_max;
+	u32 cq_max;
+	u32 hte_max;
+	u32 arp_max;
+	u32 mr_max;
+	u32 xf_max;
+	u32 xffl_max;
+	u32 q1_max;
+	u32 q1fl_max;
+	u32 fsimc_max;
+	u32 fsiav_max;
+	u32 pbl_max;
+};
+
+struct i40iw_cqp_ops {
+	enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *,
+					   struct i40iw_cqp_init_info *);
+	enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, bool, u16 *, u16 *);
+	void (*cqp_post_sq)(struct i40iw_sc_cqp *);
+	u64 *(*cqp_get_next_send_wqe)(struct i40iw_sc_cqp *, u64 scratch);
+	enum i40iw_status_code (*cqp_destroy)(struct i40iw_sc_cqp *);
+	enum i40iw_status_code (*poll_for_cqp_op_done)(struct i40iw_sc_cqp *, u8,
+						       struct i40iw_ccq_cqe_info *);
+};
+
+struct i40iw_ccq_ops {
+	enum i40iw_status_code (*ccq_init)(struct i40iw_sc_cq *,
+					   struct i40iw_ccq_init_info *);
+	enum i40iw_status_code (*ccq_create)(struct i40iw_sc_cq *, u64, bool, bool);
+	enum i40iw_status_code (*ccq_destroy)(struct i40iw_sc_cq *, u64, bool);
+	enum i40iw_status_code (*ccq_create_done)(struct i40iw_sc_cq *);
+	enum i40iw_status_code (*ccq_get_cqe_info)(struct i40iw_sc_cq *,
+						   struct i40iw_ccq_cqe_info *);
+	void (*ccq_arm)(struct i40iw_sc_cq *);
+};
+
+struct i40iw_ceq_ops {
+	enum i40iw_status_code (*ceq_init)(struct i40iw_sc_ceq *,
+					   struct i40iw_ceq_init_info *);
+	enum i40iw_status_code (*ceq_create)(struct i40iw_sc_ceq *, u64, bool);
+	enum i40iw_status_code (*cceq_create_done)(struct i40iw_sc_ceq *);
+	enum i40iw_status_code (*cceq_destroy_done)(struct i40iw_sc_ceq *);
+	enum i40iw_status_code (*cceq_create)(struct i40iw_sc_ceq *, u64);
+	enum i40iw_status_code (*ceq_destroy)(struct i40iw_sc_ceq *, u64, bool);
+	void *(*process_ceq)(struct i40iw_sc_dev *, struct i40iw_sc_ceq *);
+};
+
+struct i40iw_aeq_ops {
+	enum i40iw_status_code (*aeq_init)(struct i40iw_sc_aeq *,
+					   struct i40iw_aeq_init_info *);
+	enum i40iw_status_code (*aeq_create)(struct i40iw_sc_aeq *, u64, bool);
+	enum i40iw_status_code (*aeq_destroy)(struct i40iw_sc_aeq *, u64, bool);
+	enum i40iw_status_code (*get_next_aeqe)(struct i40iw_sc_aeq *,
+						struct i40iw_aeqe_info *);
+	enum i40iw_status_code (*repost_aeq_entries)(struct i40iw_sc_dev *, u32);
+	enum i40iw_status_code (*aeq_create_done)(struct i40iw_sc_aeq *);
+	enum i40iw_status_code (*aeq_destroy_done)(struct i40iw_sc_aeq *);
+};
+
+struct i40iw_pd_ops {
+	void (*pd_init)(struct i40iw_sc_dev *, struct i40iw_sc_pd *, u16);
+};
+
+struct i40iw_priv_qp_ops {
+	enum i40iw_status_code (*qp_init)(struct i40iw_sc_qp *, struct i40iw_qp_init_info *);
+	enum i40iw_status_code (*qp_create)(struct i40iw_sc_qp *,
+					    struct i40iw_create_qp_info *, u64, bool);
+	enum i40iw_status_code (*qp_modify)(struct i40iw_sc_qp *,
+					    struct i40iw_modify_qp_info *, u64, bool);
+	enum i40iw_status_code (*qp_destroy)(struct i40iw_sc_qp *, u64, bool, bool, bool);
+	enum i40iw_status_code (*qp_flush_wqes)(struct i40iw_sc_qp *,
+						struct i40iw_qp_flush_info *, u64, bool);
+	enum i40iw_status_code (*qp_upload_context)(struct i40iw_sc_dev *,
+						    struct i40iw_upload_context_info *,
+						    u64, bool);
+	enum i40iw_status_code (*qp_setctx)(struct i40iw_sc_qp *, u64 *,
+					    struct i40iw_qp_host_ctx_info *);
+
+	void (*qp_send_lsmm)(struct i40iw_sc_qp *, void *, u32, i40iw_stag);
+	void (*qp_send_lsmm_nostag)(struct i40iw_sc_qp *, void *, u32);
+	void (*qp_send_rtt)(struct i40iw_sc_qp *, bool);
+	enum i40iw_status_code (*qp_post_wqe0)(struct i40iw_sc_qp *, u8);
+};
+
+struct i40iw_priv_cq_ops {
+	enum i40iw_status_code (*cq_init)(struct i40iw_sc_cq *, struct i40iw_cq_init_info *);
+	enum i40iw_status_code (*cq_create)(struct i40iw_sc_cq *, u64, bool, bool);
+	enum i40iw_status_code (*cq_destroy)(struct i40iw_sc_cq *, u64, bool);
+	enum i40iw_status_code (*cq_modify)(struct i40iw_sc_cq *,
+					    struct i40iw_modify_cq_info *, u64, bool);
+};
+
+struct i40iw_mr_ops {
+	enum i40iw_status_code (*alloc_stag)(struct i40iw_sc_dev *,
+					     struct i40iw_allocate_stag_info *, u64, bool);
+	enum i40iw_status_code (*mr_reg_non_shared)(struct i40iw_sc_dev *,
+						    struct i40iw_reg_ns_stag_info *,
+						    u64, bool);
+	enum i40iw_status_code (*mr_reg_shared)(struct i40iw_sc_dev *,
+						struct i40iw_register_shared_stag *,
+						u64, bool);
+	enum i40iw_status_code (*dealloc_stag)(struct i40iw_sc_dev *,
+					       struct i40iw_dealloc_stag_info *,
+					       u64, bool);
+	enum i40iw_status_code (*query_stag)(struct i40iw_sc_dev *, u64, u32, bool);
+	enum i40iw_status_code (*mw_alloc)(struct i40iw_sc_dev *, u64, u32, u16, bool);
+};
+
+struct i40iw_cqp_misc_ops {
+	enum i40iw_status_code (*manage_push_page)(struct i40iw_sc_cqp *,
+						   struct i40iw_cqp_manage_push_page_info *,
+						   u64, bool);
+	enum i40iw_status_code (*manage_hmc_pm_func_table)(struct i40iw_sc_cqp *,
+							   u64, u8, bool, bool);
+	enum i40iw_status_code (*set_hmc_resource_profile)(struct i40iw_sc_cqp *,
+							   u64, u8, u8, bool, bool);
+	enum i40iw_status_code (*commit_fpm_values)(struct i40iw_sc_cqp *, u64, u8,
+						    struct i40iw_dma_mem *, bool, u8);
+	enum i40iw_status_code (*query_fpm_values)(struct i40iw_sc_cqp *, u64, u8,
+						   struct i40iw_dma_mem *, bool, u8);
+	enum i40iw_status_code (*static_hmc_pages_allocated)(struct i40iw_sc_cqp *,
+							     u64, u8, bool, bool);
+	enum i40iw_status_code (*add_arp_cache_entry)(struct i40iw_sc_cqp *,
+						      struct i40iw_add_arp_cache_entry_info *,
+						      u64, bool);
+	enum i40iw_status_code (*del_arp_cache_entry)(struct i40iw_sc_cqp *, u64, u16, bool);
+	enum i40iw_status_code (*query_arp_cache_entry)(struct i40iw_sc_cqp *, u64, u16, bool);
+	enum i40iw_status_code (*manage_apbvt_entry)(struct i40iw_sc_cqp *,
+						     struct i40iw_apbvt_info *, u64, bool);
+	enum i40iw_status_code (*manage_qhash_table_entry)(struct i40iw_sc_cqp *,
+							   struct i40iw_qhash_table_info *, u64, bool);
+	enum i40iw_status_code (*alloc_local_mac_ipaddr_table_entry)(struct i40iw_sc_cqp *, u64, bool);
+	enum i40iw_status_code (*add_local_mac_ipaddr_entry)(struct i40iw_sc_cqp *,
+							     struct i40iw_local_mac_ipaddr_entry_info *,
+							     u64, bool);
+	enum i40iw_status_code (*del_local_mac_ipaddr_entry)(struct i40iw_sc_cqp *, u64, u8, u8, bool);
+	enum i40iw_status_code (*cqp_nop)(struct i40iw_sc_cqp *, u64, bool);
+	enum i40iw_status_code (*commit_fpm_values_done)(struct i40iw_sc_cqp
+							  *);
+	enum i40iw_status_code (*query_fpm_values_done)(struct i40iw_sc_cqp *);
+	enum i40iw_status_code (*manage_hmc_pm_func_table_done)(struct i40iw_sc_cqp *);
+	enum i40iw_status_code (*update_suspend_qp)(struct i40iw_sc_cqp *, struct i40iw_sc_qp *, u64);
+	enum i40iw_status_code (*update_resume_qp)(struct i40iw_sc_cqp *, struct i40iw_sc_qp *, u64);
+};
+
+struct i40iw_hmc_ops {
+	enum i40iw_status_code (*init_iw_hmc)(struct i40iw_sc_dev *, u8);
+	enum i40iw_status_code (*parse_fpm_query_buf)(u64 *, struct i40iw_hmc_info *,
+						      struct i40iw_hmc_fpm_misc *);
+	enum i40iw_status_code (*configure_iw_fpm)(struct i40iw_sc_dev *, u8);
+	enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *);
+	enum i40iw_status_code (*create_hmc_object)(struct i40iw_sc_dev *dev,
+						    struct i40iw_hmc_create_obj_info *);
+	enum i40iw_status_code (*del_hmc_object)(struct i40iw_sc_dev *dev,
+						 struct i40iw_hmc_del_obj_info *,
+						 bool reset);
+	enum i40iw_status_code (*pf_init_vfhmc)(struct i40iw_sc_dev *, u8, u32 *);
+	enum i40iw_status_code (*vf_configure_vffpm)(struct i40iw_sc_dev *, u32 *);
+};
+
+struct cqp_info {
+	union {
+		struct {
+			struct i40iw_sc_qp *qp;
+			struct i40iw_create_qp_info info;
+			u64 scratch;
+		} qp_create;
+
+		struct {
+			struct i40iw_sc_qp *qp;
+			struct i40iw_modify_qp_info info;
+			u64 scratch;
+		} qp_modify;
+
+		struct {
+			struct i40iw_sc_qp *qp;
+			u64 scratch;
+			bool remove_hash_idx;
+			bool ignore_mw_bnd;
+		} qp_destroy;
+
+		struct {
+			struct i40iw_sc_cq *cq;
+			u64 scratch;
+			bool check_overflow;
+		} cq_create;
+
+		struct {
+			struct i40iw_sc_cq *cq;
+			u64 scratch;
+		} cq_destroy;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_allocate_stag_info info;
+			u64 scratch;
+		} alloc_stag;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			u64 scratch;
+			u32 mw_stag_index;
+			u16 pd_id;
+		} mw_alloc;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_reg_ns_stag_info info;
+			u64 scratch;
+		} mr_reg_non_shared;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_dealloc_stag_info info;
+			u64 scratch;
+		} dealloc_stag;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_local_mac_ipaddr_entry_info info;
+			u64 scratch;
+		} add_local_mac_ipaddr_entry;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_add_arp_cache_entry_info info;
+			u64 scratch;
+		} add_arp_cache_entry;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			u64 scratch;
+			u8 entry_idx;
+			u8 ignore_ref_count;
+		} del_local_mac_ipaddr_entry;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			u64 scratch;
+			u16 arp_index;
+		} del_arp_cache_entry;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_manage_vf_pble_info info;
+			u64 scratch;
+		} manage_vf_pble_bp;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_cqp_manage_push_page_info info;
+			u64 scratch;
+		} manage_push_page;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_upload_context_info info;
+			u64 scratch;
+		} qp_upload_context;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			u64 scratch;
+		} alloc_local_mac_ipaddr_entry;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_hmc_fcn_info info;
+			u64 scratch;
+		} manage_hmc_pm;
+
+		struct {
+			struct i40iw_sc_ceq *ceq;
+			u64 scratch;
+		} ceq_create;
+
+		struct {
+			struct i40iw_sc_ceq *ceq;
+			u64 scratch;
+		} ceq_destroy;
+
+		struct {
+			struct i40iw_sc_aeq *aeq;
+			u64 scratch;
+		} aeq_create;
+
+		struct {
+			struct i40iw_sc_aeq *aeq;
+			u64 scratch;
+		} aeq_destroy;
+
+		struct {
+			struct i40iw_sc_qp *qp;
+			struct i40iw_qp_flush_info info;
+			u64 scratch;
+		} qp_flush_wqes;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			void *fpm_values_va;
+			u64 fpm_values_pa;
+			u8 hmc_fn_id;
+			u64 scratch;
+		} query_fpm_values;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			void *fpm_values_va;
+			u64 fpm_values_pa;
+			u8 hmc_fn_id;
+			u64 scratch;
+		} commit_fpm_values;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_apbvt_info info;
+			u64 scratch;
+		} manage_apbvt_entry;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_qhash_table_info info;
+			u64 scratch;
+		} manage_qhash_table_entry;
+
+		struct {
+			struct i40iw_sc_dev *dev;
+			struct i40iw_update_sds_info info;
+			u64 scratch;
+		} update_pe_sds;
+
+		struct {
+			struct i40iw_sc_cqp *cqp;
+			struct i40iw_sc_qp *qp;
+			u64 scratch;
+		} suspend_resume;
+	} u;
+};
+
+struct cqp_commands_info {
+	struct list_head cqp_cmd_entry;
+	u8 cqp_cmd;
+	u8 post_sq;
+	struct cqp_info in;
+};
+
+struct i40iw_virtchnl_work_info {
+	void (*callback_fcn)(void *vf_dev);
+	void *worker_vf_dev;
+};
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ucontext.h b/drivers/infiniband/hw/i40iw/i40iw_ucontext.h
new file mode 100644
index 000000000000..12acd688def4
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_ucontext.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2006 - 2016 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef I40IW_USER_CONTEXT_H
+#define I40IW_USER_CONTEXT_H
+
+#include <linux/types.h>
+
+#define I40IW_ABI_USERSPACE_VER 4
+#define I40IW_ABI_KERNEL_VER    4
+struct i40iw_alloc_ucontext_req {
+	__u32 reserved32;
+	__u8 userspace_ver;
+	__u8 reserved8[3];
+};
+
+struct i40iw_alloc_ucontext_resp {
+	__u32 max_pds;		/* maximum pds allowed for this user process */
+	__u32 max_qps;		/* maximum qps allowed for this user process */
+	__u32 wq_size;		/* size of the WQs (sq+rq) allocated to the mmaped area */
+	__u8 kernel_ver;
+	__u8 reserved[3];
+};
+
+struct i40iw_alloc_pd_resp {
+	__u32 pd_id;
+	__u8 reserved[4];
+};
+
+struct i40iw_create_cq_req {
+	__u64 user_cq_buffer;
+	__u64 user_shadow_area;
+};
+
+struct i40iw_create_qp_req {
+	__u64 user_wqe_buffers;
+	__u64 user_compl_ctx;
+
+	/* UDA QP PHB */
+	__u64 user_sq_phb;	/* place for VA of the sq phb buff */
+	__u64 user_rq_phb;	/* place for VA of the rq phb buff */
+};
+
+enum i40iw_memreg_type {
+	IW_MEMREG_TYPE_MEM = 0x0000,
+	IW_MEMREG_TYPE_QP = 0x0001,
+	IW_MEMREG_TYPE_CQ = 0x0002,
+};
+
+struct i40iw_mem_reg_req {
+	__u16 reg_type;		/* Memory, QP or CQ */
+	__u16 cq_pages;
+	__u16 rq_pages;
+	__u16 sq_pages;
+};
+
+struct i40iw_create_cq_resp {
+	__u32 cq_id;
+	__u32 cq_size;
+	__u32 mmap_db_index;
+	__u32 reserved;
+};
+
+struct i40iw_create_qp_resp {
+	__u32 qp_id;
+	__u32 actual_sq_size;
+	__u32 actual_rq_size;
+	__u32 i40iw_drv_opt;
+	__u16 push_idx;
+	__u8  lsmm;
+	__u8  rsvd2;
+};
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
new file mode 100644
index 000000000000..f78c3dc8bdb2
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -0,0 +1,1204 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_status.h"
+#include "i40iw_d.h"
+#include "i40iw_user.h"
+#include "i40iw_register.h"
+
+static u32 nop_signature = 0x55550000;
+
+/**
+ * i40iw_nop_1 - insert a nop wqe and move head. no post work
+ * @qp: hw qp ptr
+ */
+static enum i40iw_status_code i40iw_nop_1(struct i40iw_qp_uk *qp)
+{
+	u64 header, *wqe;
+	u64 *wqe_0 = NULL;
+	u32 wqe_idx, peek_head;
+	bool signaled = false;
+
+	if (!qp->sq_ring.head)
+		return I40IW_ERR_PARAM;
+
+	wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+	wqe = qp->sq_base[wqe_idx].elem;
+	peek_head = (qp->sq_ring.head + 1) % qp->sq_ring.size;
+	wqe_0 = qp->sq_base[peek_head].elem;
+	if (peek_head)
+		wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
+	else
+		wqe_0[3] = LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	set_64bit_val(wqe, 0, 0);
+	set_64bit_val(wqe, 8, 0);
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+	    LS_64(signaled, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->swqe_polarity, I40IWQPSQ_VALID) | nop_signature++;
+
+	wmb();	/* Memory barrier to ensure data is written before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+	return 0;
+}
+
+/**
+ * i40iw_qp_post_wr - post wr to hrdware
+ * @qp: hw qp ptr
+ */
+void i40iw_qp_post_wr(struct i40iw_qp_uk *qp)
+{
+	u64 temp;
+	u32 hw_sq_tail;
+	u32 sw_sq_head;
+
+	mb(); /* valid bit is written and loads completed before reading shadow */
+
+	/* read the doorbell shadow area */
+	get_64bit_val(qp->shadow_area, 0, &temp);
+
+	hw_sq_tail = (u32)RS_64(temp, I40IW_QP_DBSA_HW_SQ_TAIL);
+	sw_sq_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+	if (sw_sq_head != hw_sq_tail) {
+		if (sw_sq_head > qp->initial_ring.head) {
+			if ((hw_sq_tail >= qp->initial_ring.head) &&
+			    (hw_sq_tail < sw_sq_head)) {
+				writel(qp->qp_id, qp->wqe_alloc_reg);
+			}
+		} else if (sw_sq_head != qp->initial_ring.head) {
+			if ((hw_sq_tail >= qp->initial_ring.head) ||
+			    (hw_sq_tail < sw_sq_head)) {
+				writel(qp->qp_id, qp->wqe_alloc_reg);
+			}
+		}
+	}
+
+	qp->initial_ring.head = qp->sq_ring.head;
+}
+
+/**
+ * i40iw_qp_ring_push_db -  ring qp doorbell
+ * @qp: hw qp ptr
+ * @wqe_idx: wqe index
+ */
+static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx)
+{
+	set_32bit_val(qp->push_db, 0, LS_32((wqe_idx >> 2), I40E_PFPE_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id);
+	qp->initial_ring.head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+}
+
+/**
+ * i40iw_qp_get_next_send_wqe - return next wqe ptr
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ * @wqe_size: size of sq wqe
+ */
+u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
+				u32 *wqe_idx,
+				u8 wqe_size)
+{
+	u64 *wqe = NULL;
+	u64 wqe_ptr;
+	u32 peek_head = 0;
+	u16 offset;
+	enum i40iw_status_code ret_code = 0;
+	u8 nop_wqe_cnt = 0, i;
+	u64 *wqe_0 = NULL;
+
+	*wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+
+	if (!*wqe_idx)
+		qp->swqe_polarity = !qp->swqe_polarity;
+	wqe_ptr = (uintptr_t)qp->sq_base[*wqe_idx].elem;
+	offset = (u16)(wqe_ptr) & 0x7F;
+	if ((offset + wqe_size) > I40IW_QP_WQE_MAX_SIZE) {
+		nop_wqe_cnt = (u8)(I40IW_QP_WQE_MAX_SIZE - offset) / I40IW_QP_WQE_MIN_SIZE;
+		for (i = 0; i < nop_wqe_cnt; i++) {
+			i40iw_nop_1(qp);
+			I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+			if (ret_code)
+				return NULL;
+		}
+
+		*wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+		if (!*wqe_idx)
+			qp->swqe_polarity = !qp->swqe_polarity;
+	}
+	for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) {
+		I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+		if (ret_code)
+			return NULL;
+	}
+
+	wqe = qp->sq_base[*wqe_idx].elem;
+
+	peek_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
+	wqe_0 = qp->sq_base[peek_head].elem;
+	if (peek_head & 0x3)
+		wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
+	return wqe;
+}
+
+/**
+ * i40iw_set_fragment - set fragment in wqe
+ * @wqe: wqe for setting fragment
+ * @offset: offset value
+ * @sge: sge length and stag
+ */
+static void i40iw_set_fragment(u64 *wqe, u32 offset, struct i40iw_sge *sge)
+{
+	if (sge) {
+		set_64bit_val(wqe, offset, LS_64(sge->tag_off, I40IWQPSQ_FRAG_TO));
+		set_64bit_val(wqe, (offset + 8),
+			      (LS_64(sge->len, I40IWQPSQ_FRAG_LEN) |
+			       LS_64(sge->stag, I40IWQPSQ_FRAG_STAG)));
+	}
+}
+
+/**
+ * i40iw_qp_get_next_recv_wqe - get next qp's rcv wqe
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ */
+u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx)
+{
+	u64 *wqe = NULL;
+	enum i40iw_status_code ret_code;
+
+	if (I40IW_RING_FULL_ERR(qp->rq_ring))
+		return NULL;
+
+	I40IW_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
+	if (ret_code)
+		return NULL;
+	if (!*wqe_idx)
+		qp->rwqe_polarity = !qp->rwqe_polarity;
+	/* rq_wqe_size_multiplier is no of qwords in one rq wqe */
+	wqe = qp->rq_base[*wqe_idx * (qp->rq_wqe_size_multiplier >> 2)].elem;
+
+	return wqe;
+}
+
+/**
+ * i40iw_rdma_write - rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp,
+					       struct i40iw_post_sq_info *info,
+					       bool post_sq)
+{
+	u64 header;
+	u64 *wqe;
+	struct i40iw_rdma_write *op_info;
+	u32 i, wqe_idx;
+	u32 total_size = 0, byte_off;
+	enum i40iw_status_code ret_code;
+	bool read_fence = false;
+	u8 wqe_size;
+
+	op_info = &info->op.rdma_write;
+	if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+
+	for (i = 0; i < op_info->num_lo_sges; i++)
+		total_size += op_info->lo_sg_list[i].len;
+
+	if (total_size > I40IW_MAX_OUTBOUND_MESSAGE_SIZE)
+		return I40IW_ERR_QP_INVALID_MSG_SIZE;
+
+	read_fence |= info->read_fence;
+
+	ret_code = i40iw_fragcnt_to_wqesize_sq(op_info->num_lo_sges, &wqe_size);
+	if (ret_code)
+		return ret_code;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
+	set_64bit_val(wqe, 16,
+		      LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
+	if (!op_info->rem_addr.stag)
+		return I40IW_ERR_BAD_STAG;
+
+	header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
+		 LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
+		 LS_64((op_info->num_lo_sges > 1 ?  (op_info->num_lo_sges - 1) : 0), I40IWQPSQ_ADDFRAGCNT) |
+		 LS_64(read_fence, I40IWQPSQ_READFENCE) |
+		 LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+		 LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+		 LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_set_fragment(wqe, 0, op_info->lo_sg_list);
+
+	for (i = 1; i < op_info->num_lo_sges; i++) {
+		byte_off = 32 + (i - 1) * 16;
+		i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]);
+	}
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_rdma_read - rdma read command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @inv_stag: flag for inv_stag
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_rdma_read(struct i40iw_qp_uk *qp,
+					      struct i40iw_post_sq_info *info,
+					      bool inv_stag,
+					      bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_rdma_read *op_info;
+	u64 header;
+	u32 wqe_idx;
+	enum i40iw_status_code ret_code;
+	u8 wqe_size;
+	bool local_fence = false;
+
+	op_info = &info->op.rdma_read;
+	ret_code = i40iw_fragcnt_to_wqesize_sq(1, &wqe_size);
+	if (ret_code)
+		return ret_code;
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->lo_addr.len;
+	local_fence |= info->local_fence;
+
+	set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
+	header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
+		 LS_64((inv_stag ? I40IWQP_OP_RDMA_READ_LOC_INV : I40IWQP_OP_RDMA_READ), I40IWQPSQ_OPCODE) |
+		 LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
+		 LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
+		 LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+		 LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_set_fragment(wqe, 0, &op_info->lo_addr);
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_send - rdma send command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @stag_to_inv: stag_to_inv value
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp,
+					 struct i40iw_post_sq_info *info,
+					 u32 stag_to_inv,
+					 bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_post_send *op_info;
+	u64 header;
+	u32 i, wqe_idx, total_size = 0, byte_off;
+	enum i40iw_status_code ret_code;
+	bool read_fence = false;
+	u8 wqe_size;
+
+	op_info = &info->op.send;
+	if (qp->max_sq_frag_cnt < op_info->num_sges)
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+
+	for (i = 0; i < op_info->num_sges; i++)
+		total_size += op_info->sg_list[i].len;
+	ret_code = i40iw_fragcnt_to_wqesize_sq(op_info->num_sges, &wqe_size);
+	if (ret_code)
+		return ret_code;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	read_fence |= info->read_fence;
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = total_size;
+	set_64bit_val(wqe, 16, 0);
+	header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
+		 LS_64(info->op_type, I40IWQPSQ_OPCODE) |
+		 LS_64((op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0),
+		       I40IWQPSQ_ADDFRAGCNT) |
+		 LS_64(read_fence, I40IWQPSQ_READFENCE) |
+		 LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+		 LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+		 LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_set_fragment(wqe, 0, op_info->sg_list);
+
+	for (i = 1; i < op_info->num_sges; i++) {
+		byte_off = 32 + (i - 1) * 16;
+		i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]);
+	}
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_inline_rdma_write - inline rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
+						      struct i40iw_post_sq_info *info,
+						      bool post_sq)
+{
+	u64 *wqe;
+	u8 *dest, *src;
+	struct i40iw_inline_rdma_write *op_info;
+	u64 *push;
+	u64 header = 0;
+	u32 i, wqe_idx;
+	enum i40iw_status_code ret_code;
+	bool read_fence = false;
+	u8 wqe_size;
+
+	op_info = &info->op.inline_rdma_write;
+	if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE)
+		return I40IW_ERR_INVALID_IMM_DATA_SIZE;
+
+	ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size);
+	if (ret_code)
+		return ret_code;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	read_fence |= info->read_fence;
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
+	set_64bit_val(wqe, 16,
+		      LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
+
+	header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
+		 LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
+		 LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
+		 LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
+		 LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
+		 LS_64(read_fence, I40IWQPSQ_READFENCE) |
+		 LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+		 LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+		 LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	dest = (u8 *)wqe;
+	src = (u8 *)(op_info->data);
+
+	if (op_info->len <= 16) {
+		for (i = 0; i < op_info->len; i++, src++, dest++)
+			*dest = *src;
+	} else {
+		for (i = 0; i < 16; i++, src++, dest++)
+			*dest = *src;
+		dest = (u8 *)wqe + 32;
+		for (; i < op_info->len; i++, src++, dest++)
+			*dest = *src;
+	}
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	if (qp->push_db) {
+		push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
+		memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
+		i40iw_qp_ring_push_db(qp, wqe_idx);
+	} else {
+		if (post_sq)
+			i40iw_qp_post_wr(qp);
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_inline_send - inline send operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @stag_to_inv: remote stag
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
+						struct i40iw_post_sq_info *info,
+						u32 stag_to_inv,
+						bool post_sq)
+{
+	u64 *wqe;
+	u8 *dest, *src;
+	struct i40iw_post_inline_send *op_info;
+	u64 header;
+	u32 wqe_idx, i;
+	enum i40iw_status_code ret_code;
+	bool read_fence = false;
+	u8 wqe_size;
+	u64 *push;
+
+	op_info = &info->op.inline_send;
+	if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE)
+		return I40IW_ERR_INVALID_IMM_DATA_SIZE;
+
+	ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size);
+	if (ret_code)
+		return ret_code;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	read_fence |= info->read_fence;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = op_info->len;
+	header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
+	    LS_64(info->op_type, I40IWQPSQ_OPCODE) |
+	    LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
+	    LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
+	    LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
+	    LS_64(read_fence, I40IWQPSQ_READFENCE) |
+	    LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
+	    LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	dest = (u8 *)wqe;
+	src = (u8 *)(op_info->data);
+
+	if (op_info->len <= 16) {
+		for (i = 0; i < op_info->len; i++, src++, dest++)
+			*dest = *src;
+	} else {
+		for (i = 0; i < 16; i++, src++, dest++)
+			*dest = *src;
+		dest = (u8 *)wqe + 32;
+		for (; i < op_info->len; i++, src++, dest++)
+			*dest = *src;
+	}
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	if (qp->push_db) {
+		push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
+		memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
+		i40iw_qp_ring_push_db(qp, wqe_idx);
+	} else {
+		if (post_sq)
+			i40iw_qp_post_wr(qp);
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_stag_local_invalidate - stag invalidate operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_stag_local_invalidate(struct i40iw_qp_uk *qp,
+							  struct i40iw_post_sq_info *info,
+							  bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_inv_local_stag *op_info;
+	u64 header;
+	u32 wqe_idx;
+	bool local_fence = false;
+
+	op_info = &info->op.inv_local_stag;
+	local_fence = info->local_fence;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
+	set_64bit_val(wqe, 0, 0);
+	set_64bit_val(wqe, 8,
+		      LS_64(op_info->target_stag, I40IWQPSQ_LOCSTAG));
+	set_64bit_val(wqe, 16, 0);
+	header = LS_64(I40IW_OP_TYPE_INV_STAG, I40IWQPSQ_OPCODE) |
+	    LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
+	    LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
+	    LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_mw_bind - Memory Window bind operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+static enum i40iw_status_code i40iw_mw_bind(struct i40iw_qp_uk *qp,
+					    struct i40iw_post_sq_info *info,
+					    bool post_sq)
+{
+	u64 *wqe;
+	struct i40iw_bind_window *op_info;
+	u64 header;
+	u32 wqe_idx;
+	bool local_fence = false;
+
+	op_info = &info->op.bind_window;
+
+	local_fence |= info->local_fence;
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = info->wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
+	set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
+	set_64bit_val(wqe, 8,
+		      LS_64(op_info->mr_stag, I40IWQPSQ_PARENTMRSTAG) |
+		      LS_64(op_info->mw_stag, I40IWQPSQ_MWSTAG));
+	set_64bit_val(wqe, 16, op_info->bind_length);
+	header = LS_64(I40IW_OP_TYPE_BIND_MW, I40IWQPSQ_OPCODE) |
+	    LS_64(((op_info->enable_reads << 2) |
+		   (op_info->enable_writes << 3)),
+		  I40IWQPSQ_STAGRIGHTS) |
+	    LS_64((op_info->addressing_type == I40IW_ADDR_TYPE_VA_BASED ?  1 : 0),
+		  I40IWQPSQ_VABASEDTO) |
+	    LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
+	    LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
+	    LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_post_receive - post receive wqe
+ * @qp: hw qp ptr
+ * @info: post rq information
+ */
+static enum i40iw_status_code i40iw_post_receive(struct i40iw_qp_uk *qp,
+						 struct i40iw_post_rq_info *info)
+{
+	u64 *wqe;
+	u64 header;
+	u32 total_size = 0, wqe_idx, i, byte_off;
+
+	if (qp->max_rq_frag_cnt < info->num_sges)
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+	for (i = 0; i < info->num_sges; i++)
+		total_size += info->sg_list[i].len;
+	wqe = i40iw_qp_get_next_recv_wqe(qp, &wqe_idx);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->rq_wrid_array[wqe_idx] = info->wr_id;
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64((info->num_sges > 1 ? (info->num_sges - 1) : 0),
+		       I40IWQPSQ_ADDFRAGCNT) |
+	    LS_64(qp->rwqe_polarity, I40IWQPSQ_VALID);
+
+	i40iw_set_fragment(wqe, 0, info->sg_list);
+
+	for (i = 1; i < info->num_sges; i++) {
+		byte_off = 32 + (i - 1) * 16;
+		i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]);
+	}
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+
+	return 0;
+}
+
+/**
+ * i40iw_cq_request_notification - cq notification request (door bell)
+ * @cq: hw cq
+ * @cq_notify: notification type
+ */
+static void i40iw_cq_request_notification(struct i40iw_cq_uk *cq,
+					  enum i40iw_completion_notify cq_notify)
+{
+	u64 temp_val;
+	u16 sw_cq_sel;
+	u8 arm_next_se = 0;
+	u8 arm_next = 0;
+	u8 arm_seq_num;
+
+	get_64bit_val(cq->shadow_area, 32, &temp_val);
+	arm_seq_num = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_SEQ_NUM);
+	arm_seq_num++;
+
+	sw_cq_sel = (u16)RS_64(temp_val, I40IW_CQ_DBSA_SW_CQ_SELECT);
+	arm_next_se = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_NEXT_SE);
+	arm_next_se |= 1;
+	if (cq_notify == IW_CQ_COMPL_EVENT)
+		arm_next = 1;
+	temp_val = LS_64(arm_seq_num, I40IW_CQ_DBSA_ARM_SEQ_NUM) |
+	    LS_64(sw_cq_sel, I40IW_CQ_DBSA_SW_CQ_SELECT) |
+	    LS_64(arm_next_se, I40IW_CQ_DBSA_ARM_NEXT_SE) |
+	    LS_64(arm_next, I40IW_CQ_DBSA_ARM_NEXT);
+
+	set_64bit_val(cq->shadow_area, 32, temp_val);
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	writel(cq->cq_id, cq->cqe_alloc_reg);
+}
+
+/**
+ * i40iw_cq_post_entries - update tail in shadow memory
+ * @cq: hw cq
+ * @count: # of entries processed
+ */
+static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq,
+						    u8 count)
+{
+	I40IW_RING_MOVE_TAIL_BY_COUNT(cq->cq_ring, count);
+	set_64bit_val(cq->shadow_area, 0,
+		      I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
+	return 0;
+}
+
+/**
+ * i40iw_cq_poll_completion - get cq completion info
+ * @cq: hw cq
+ * @info: cq poll information returned
+ * @post_cq: update cq tail
+ */
+static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
+						       struct i40iw_cq_poll_info *info,
+						       bool post_cq)
+{
+	u64 comp_ctx, qword0, qword2, qword3, wqe_qword;
+	u64 *cqe, *sw_wqe;
+	struct i40iw_qp_uk *qp;
+	struct i40iw_ring *pring = NULL;
+	u32 wqe_idx, q_type, array_idx = 0;
+	enum i40iw_status_code ret_code = 0;
+	enum i40iw_status_code ret_code2 = 0;
+	bool move_cq_head = true;
+	u8 polarity;
+	u8 addl_frag_cnt, addl_wqes = 0;
+
+	if (cq->avoid_mem_cflct)
+		cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(cq);
+	else
+		cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(cq);
+
+	get_64bit_val(cqe, 24, &qword3);
+	polarity = (u8)RS_64(qword3, I40IW_CQ_VALID);
+
+	if (polarity != cq->polarity)
+		return I40IW_ERR_QUEUE_EMPTY;
+
+	q_type = (u8)RS_64(qword3, I40IW_CQ_SQ);
+	info->error = (bool)RS_64(qword3, I40IW_CQ_ERROR);
+	info->push_dropped = (bool)RS_64(qword3, I40IWCQ_PSHDROP);
+	if (info->error) {
+		info->comp_status = I40IW_COMPL_STATUS_FLUSHED;
+		info->major_err = (bool)RS_64(qword3, I40IW_CQ_MAJERR);
+		info->minor_err = (bool)RS_64(qword3, I40IW_CQ_MINERR);
+	} else {
+		info->comp_status = I40IW_COMPL_STATUS_SUCCESS;
+	}
+
+	get_64bit_val(cqe, 0, &qword0);
+	get_64bit_val(cqe, 16, &qword2);
+
+	info->tcp_seq_num = (u8)RS_64(qword0, I40IWCQ_TCPSEQNUM);
+
+	info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID);
+
+	get_64bit_val(cqe, 8, &comp_ctx);
+
+	info->solicited_event = (bool)RS_64(qword3, I40IWCQ_SOEVENT);
+	info->is_srq = (bool)RS_64(qword3, I40IWCQ_SRQ);
+
+	qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
+	wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
+	info->qp_handle = (i40iw_qp_handle)(unsigned long)qp;
+
+	if (q_type == I40IW_CQE_QTYPE_RQ) {
+		array_idx = (wqe_idx * 4) / qp->rq_wqe_size_multiplier;
+		if (info->comp_status == I40IW_COMPL_STATUS_FLUSHED) {
+			info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail];
+			array_idx = qp->rq_ring.tail;
+		} else {
+			info->wr_id = qp->rq_wrid_array[array_idx];
+		}
+
+		info->op_type = I40IW_OP_TYPE_REC;
+		if (qword3 & I40IWCQ_STAG_MASK) {
+			info->stag_invalid_set = true;
+			info->inv_stag = (u32)RS_64(qword2, I40IWCQ_INVSTAG);
+		} else {
+			info->stag_invalid_set = false;
+		}
+		info->bytes_xfered = (u32)RS_64(qword0, I40IWCQ_PAYLDLEN);
+		I40IW_RING_SET_TAIL(qp->rq_ring, array_idx + 1);
+		pring = &qp->rq_ring;
+	} else {
+		if (info->comp_status != I40IW_COMPL_STATUS_FLUSHED) {
+			info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
+			info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
+
+			info->op_type = (u8)RS_64(qword3, I40IWCQ_OP);
+			sw_wqe = qp->sq_base[wqe_idx].elem;
+			get_64bit_val(sw_wqe, 24, &wqe_qword);
+			addl_frag_cnt =
+			    (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT);
+			i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
+
+			addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE);
+			I40IW_RING_SET_TAIL(qp->sq_ring, (wqe_idx + addl_wqes));
+		} else {
+			do {
+				u8 op_type;
+				u32 tail;
+
+				tail = qp->sq_ring.tail;
+				sw_wqe = qp->sq_base[tail].elem;
+				get_64bit_val(sw_wqe, 24, &wqe_qword);
+				op_type = (u8)RS_64(wqe_qword, I40IWQPSQ_OPCODE);
+				info->op_type = op_type;
+				addl_frag_cnt = (u8)RS_64(wqe_qword, I40IWQPSQ_ADDFRAGCNT);
+				i40iw_fragcnt_to_wqesize_sq(addl_frag_cnt + 1, &addl_wqes);
+				addl_wqes = (addl_wqes / I40IW_QP_WQE_MIN_SIZE);
+				I40IW_RING_SET_TAIL(qp->sq_ring, (tail + addl_wqes));
+				if (op_type != I40IWQP_OP_NOP) {
+					info->wr_id = qp->sq_wrtrk_array[tail].wrid;
+					info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len;
+					break;
+				}
+			} while (1);
+		}
+		pring = &qp->sq_ring;
+	}
+
+	ret_code = 0;
+
+	if (!ret_code &&
+	    (info->comp_status == I40IW_COMPL_STATUS_FLUSHED))
+		if (pring && (I40IW_RING_MORE_WORK(*pring)))
+			move_cq_head = false;
+
+	if (move_cq_head) {
+		I40IW_RING_MOVE_HEAD(cq->cq_ring, ret_code2);
+
+		if (ret_code2 && !ret_code)
+			ret_code = ret_code2;
+
+		if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0)
+			cq->polarity ^= 1;
+
+		if (post_cq) {
+			I40IW_RING_MOVE_TAIL(cq->cq_ring);
+			set_64bit_val(cq->shadow_area, 0,
+				      I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
+		}
+	} else {
+		if (info->is_srq)
+			return ret_code;
+		qword3 &= ~I40IW_CQ_WQEIDX_MASK;
+		qword3 |= LS_64(pring->tail, I40IW_CQ_WQEIDX);
+		set_64bit_val(cqe, 24, qword3);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40iw_get_wqe_shift - get shift count for maximum wqe size
+ * @wqdepth: depth of wq required.
+ * @sge: Maximum Scatter Gather Elements wqe
+ * @shift: Returns the shift needed based on sge
+ *
+ * Shift can be used to left shift the wqe size based on sge.
+ * If sge, == 1, shift =0 (wqe_size of 32 bytes), for sge=2 and 3, shift =1
+ * (64 bytes wqes) and 2 otherwise (128 bytes wqe).
+ */
+enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift)
+{
+	u32 size;
+
+	*shift = 0;
+	if (sge > 1)
+		*shift = (sge < 4) ? 1 : 2;
+
+	/* check if wqdepth is multiple of 2 or not */
+
+	if ((wqdepth < I40IWQP_SW_MIN_WQSIZE) || (wqdepth & (wqdepth - 1)))
+		return I40IW_ERR_INVALID_SIZE;
+
+	size = wqdepth << *shift;	/* multiple of 32 bytes count */
+	if (size > I40IWQP_SW_MAX_WQSIZE)
+		return I40IW_ERR_INVALID_SIZE;
+	return 0;
+}
+
+static struct i40iw_qp_uk_ops iw_qp_uk_ops = {
+	i40iw_qp_post_wr,
+	i40iw_qp_ring_push_db,
+	i40iw_rdma_write,
+	i40iw_rdma_read,
+	i40iw_send,
+	i40iw_inline_rdma_write,
+	i40iw_inline_send,
+	i40iw_stag_local_invalidate,
+	i40iw_mw_bind,
+	i40iw_post_receive,
+	i40iw_nop
+};
+
+static struct i40iw_cq_ops iw_cq_ops = {
+	i40iw_cq_request_notification,
+	i40iw_cq_poll_completion,
+	i40iw_cq_post_entries,
+	i40iw_clean_cq
+};
+
+static struct i40iw_device_uk_ops iw_device_uk_ops = {
+	i40iw_cq_uk_init,
+	i40iw_qp_uk_init,
+};
+
+/**
+ * i40iw_qp_uk_init - initialize shared qp
+ * @qp: hw qp (user and kernel)
+ * @info: qp initialization info
+ *
+ * initializes the vars used in both user and kernel mode.
+ * size of the wqe depends on numbers of max. fragements
+ * allowed. Then size of wqe * the number of wqes should be the
+ * amount of memory allocated for sq and rq. If srq is used,
+ * then rq_base will point to one rq wqe only (not the whole
+ * array of wqes)
+ */
+enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
+					struct i40iw_qp_uk_init_info *info)
+{
+	enum i40iw_status_code ret_code = 0;
+	u32 sq_ring_size;
+	u8 sqshift, rqshift;
+
+	if (info->max_sq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+
+	if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+	ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, &sqshift);
+	if (ret_code)
+		return ret_code;
+
+	ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, &rqshift);
+	if (ret_code)
+		return ret_code;
+
+	qp->sq_base = info->sq;
+	qp->rq_base = info->rq;
+	qp->shadow_area = info->shadow_area;
+	qp->sq_wrtrk_array = info->sq_wrtrk_array;
+	qp->rq_wrid_array = info->rq_wrid_array;
+
+	qp->wqe_alloc_reg = info->wqe_alloc_reg;
+	qp->qp_id = info->qp_id;
+
+	qp->sq_size = info->sq_size;
+	qp->push_db = info->push_db;
+	qp->push_wqe = info->push_wqe;
+
+	qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
+	sq_ring_size = qp->sq_size << sqshift;
+
+	I40IW_RING_INIT(qp->sq_ring, sq_ring_size);
+	I40IW_RING_INIT(qp->initial_ring, sq_ring_size);
+	I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+	I40IW_RING_MOVE_TAIL(qp->sq_ring);
+	I40IW_RING_MOVE_HEAD(qp->initial_ring, ret_code);
+	qp->swqe_polarity = 1;
+	qp->swqe_polarity_deferred = 1;
+	qp->rwqe_polarity = 0;
+
+	if (!qp->use_srq) {
+		qp->rq_size = info->rq_size;
+		qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
+		qp->rq_wqe_size = rqshift;
+		I40IW_RING_INIT(qp->rq_ring, qp->rq_size);
+		qp->rq_wqe_size_multiplier = 4 << rqshift;
+	}
+	qp->ops = iw_qp_uk_ops;
+
+	return ret_code;
+}
+
+/**
+ * i40iw_cq_uk_init - initialize shared cq (user and kernel)
+ * @cq: hw cq
+ * @info: hw cq initialization info
+ */
+enum i40iw_status_code i40iw_cq_uk_init(struct i40iw_cq_uk *cq,
+					struct i40iw_cq_uk_init_info *info)
+{
+	if ((info->cq_size < I40IW_MIN_CQ_SIZE) ||
+	    (info->cq_size > I40IW_MAX_CQ_SIZE))
+		return I40IW_ERR_INVALID_SIZE;
+	cq->cq_base = (struct i40iw_cqe *)info->cq_base;
+	cq->cq_id = info->cq_id;
+	cq->cq_size = info->cq_size;
+	cq->cqe_alloc_reg = info->cqe_alloc_reg;
+	cq->shadow_area = info->shadow_area;
+	cq->avoid_mem_cflct = info->avoid_mem_cflct;
+
+	I40IW_RING_INIT(cq->cq_ring, cq->cq_size);
+	cq->polarity = 1;
+	cq->ops = iw_cq_ops;
+
+	return 0;
+}
+
+/**
+ * i40iw_device_init_uk - setup routines for iwarp shared device
+ * @dev: iwarp shared (user and kernel)
+ */
+void i40iw_device_init_uk(struct i40iw_dev_uk *dev)
+{
+	dev->ops_uk = iw_device_uk_ops;
+}
+
+/**
+ * i40iw_clean_cq - clean cq entries
+ * @ queue completion context
+ * @cq: cq to clean
+ */
+void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq)
+{
+	u64 *cqe;
+	u64 qword3, comp_ctx;
+	u32 cq_head;
+	u8 polarity, temp;
+
+	cq_head = cq->cq_ring.head;
+	temp = cq->polarity;
+	do {
+		if (cq->avoid_mem_cflct)
+			cqe = (u64 *)&(((struct i40iw_extended_cqe *)cq->cq_base)[cq_head]);
+		else
+			cqe = (u64 *)&cq->cq_base[cq_head];
+		get_64bit_val(cqe, 24, &qword3);
+		polarity = (u8)RS_64(qword3, I40IW_CQ_VALID);
+
+		if (polarity != temp)
+			break;
+
+		get_64bit_val(cqe, 8, &comp_ctx);
+		if ((void *)(unsigned long)comp_ctx == queue)
+			set_64bit_val(cqe, 8, 0);
+
+		cq_head = (cq_head + 1) % cq->cq_ring.size;
+		if (!cq_head)
+			temp ^= 1;
+	} while (true);
+}
+
+/**
+ * i40iw_nop - send a nop
+ * @qp: hw qp ptr
+ * @wr_id: work request id
+ * @signaled: flag if signaled for completion
+ * @post_sq: flag to post sq
+ */
+enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp,
+				 u64 wr_id,
+				 bool signaled,
+				 bool post_sq)
+{
+	u64 header, *wqe;
+	u32 wqe_idx;
+
+	wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE);
+	if (!wqe)
+		return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
+
+	qp->sq_wrtrk_array[wqe_idx].wrid = wr_id;
+	qp->sq_wrtrk_array[wqe_idx].wr_len = 0;
+	set_64bit_val(wqe, 0, 0);
+	set_64bit_val(wqe, 8, 0);
+	set_64bit_val(wqe, 16, 0);
+
+	header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
+	    LS_64(signaled, I40IWQPSQ_SIGCOMPL) |
+	    LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
+
+	wmb(); /* make sure WQE is populated before valid bit is set */
+
+	set_64bit_val(wqe, 24, header);
+	if (post_sq)
+		i40iw_qp_post_wr(qp);
+
+	return 0;
+}
+
+/**
+ * i40iw_fragcnt_to_wqesize_sq - calculate wqe size based on fragment count for SQ
+ * @frag_cnt: number of fragments
+ * @wqe_size: size of sq wqe returned
+ */
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size)
+{
+	switch (frag_cnt) {
+	case 0:
+	case 1:
+		*wqe_size = I40IW_QP_WQE_MIN_SIZE;
+		break;
+	case 2:
+	case 3:
+		*wqe_size = 64;
+		break;
+	case 4:
+	case 5:
+		*wqe_size = 96;
+		break;
+	case 6:
+	case 7:
+		*wqe_size = 128;
+		break;
+	default:
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
+ * @frag_cnt: number of fragments
+ * @wqe_size: size of rq wqe returned
+ */
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size)
+{
+	switch (frag_cnt) {
+	case 0:
+	case 1:
+		*wqe_size = 32;
+		break;
+	case 2:
+	case 3:
+		*wqe_size = 64;
+		break;
+	case 4:
+	case 5:
+	case 6:
+	case 7:
+		*wqe_size = 128;
+		break;
+	default:
+		return I40IW_ERR_INVALID_FRAG_COUNT;
+	}
+
+	return 0;
+}
+
+/**
+ * i40iw_inline_data_size_to_wqesize - based on inline data, wqe size
+ * @data_size: data size for inline
+ * @wqe_size: size of sq wqe returned
+ */
+enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
+							 u8 *wqe_size)
+{
+	if (data_size > I40IW_MAX_INLINE_DATA_SIZE)
+		return I40IW_ERR_INVALID_IMM_DATA_SIZE;
+
+	if (data_size <= 16)
+		*wqe_size = I40IW_QP_WQE_MIN_SIZE;
+	else if (data_size <= 48)
+		*wqe_size = 64;
+	else if (data_size <= 80)
+		*wqe_size = 96;
+	else
+		*wqe_size = 128;
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
new file mode 100644
index 000000000000..5cd971bb8cc7
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -0,0 +1,442 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_USER_H
+#define I40IW_USER_H
+
+enum i40iw_device_capabilities_const {
+	I40IW_WQE_SIZE =			4,
+	I40IW_CQP_WQE_SIZE =			8,
+	I40IW_CQE_SIZE =			4,
+	I40IW_EXTENDED_CQE_SIZE =		8,
+	I40IW_AEQE_SIZE =			2,
+	I40IW_CEQE_SIZE =			1,
+	I40IW_CQP_CTX_SIZE =			8,
+	I40IW_SHADOW_AREA_SIZE =		8,
+	I40IW_CEQ_MAX_COUNT =			256,
+	I40IW_QUERY_FPM_BUF_SIZE =		128,
+	I40IW_COMMIT_FPM_BUF_SIZE =		128,
+	I40IW_MIN_IW_QP_ID =			1,
+	I40IW_MAX_IW_QP_ID =			262143,
+	I40IW_MIN_CEQID =			0,
+	I40IW_MAX_CEQID =			256,
+	I40IW_MIN_CQID =			0,
+	I40IW_MAX_CQID =			131071,
+	I40IW_MIN_AEQ_ENTRIES =			1,
+	I40IW_MAX_AEQ_ENTRIES =			524287,
+	I40IW_MIN_CEQ_ENTRIES =			1,
+	I40IW_MAX_CEQ_ENTRIES =			131071,
+	I40IW_MIN_CQ_SIZE =			1,
+	I40IW_MAX_CQ_SIZE =			1048575,
+	I40IW_MAX_AEQ_ALLOCATE_COUNT =		255,
+	I40IW_DB_ID_ZERO =			0,
+	I40IW_MAX_WQ_FRAGMENT_COUNT =		6,
+	I40IW_MAX_SGE_RD =			1,
+	I40IW_MAX_OUTBOUND_MESSAGE_SIZE =	2147483647,
+	I40IW_MAX_INBOUND_MESSAGE_SIZE =	2147483647,
+	I40IW_MAX_PUSH_PAGE_COUNT =		4096,
+	I40IW_MAX_PE_ENABLED_VF_COUNT =		32,
+	I40IW_MAX_VF_FPM_ID =			47,
+	I40IW_MAX_VF_PER_PF =			127,
+	I40IW_MAX_SQ_PAYLOAD_SIZE =		2145386496,
+	I40IW_MAX_INLINE_DATA_SIZE =		112,
+	I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE =	112,
+	I40IW_MAX_IRD_SIZE =			32,
+	I40IW_QPCTX_ENCD_MAXIRD =		3,
+	I40IW_MAX_WQ_ENTRIES =			2048,
+	I40IW_MAX_ORD_SIZE =			32,
+	I40IW_Q2_BUFFER_SIZE =			(248 + 100),
+	I40IW_QP_CTX_SIZE =			248
+};
+
+#define i40iw_handle void *
+#define i40iw_adapter_handle i40iw_handle
+#define i40iw_qp_handle i40iw_handle
+#define i40iw_cq_handle i40iw_handle
+#define i40iw_srq_handle i40iw_handle
+#define i40iw_pd_id i40iw_handle
+#define i40iw_stag_handle i40iw_handle
+#define i40iw_stag_index u32
+#define i40iw_stag u32
+#define i40iw_stag_key u8
+
+#define i40iw_tagged_offset u64
+#define i40iw_access_privileges u32
+#define i40iw_physical_fragment u64
+#define i40iw_address_list u64 *
+
+#define I40IW_CREATE_STAG(index, key)       (((index) << 8) + (key))
+
+#define I40IW_STAG_KEY_FROM_STAG(stag)      ((stag) && 0x000000FF)
+
+#define I40IW_STAG_INDEX_FROM_STAG(stag)    (((stag) && 0xFFFFFF00) >> 8)
+
+struct i40iw_qp_uk;
+struct i40iw_cq_uk;
+struct i40iw_srq_uk;
+struct i40iw_qp_uk_init_info;
+struct i40iw_cq_uk_init_info;
+struct i40iw_srq_uk_init_info;
+
+struct i40iw_sge {
+	i40iw_tagged_offset tag_off;
+	u32 len;
+	i40iw_stag stag;
+};
+
+#define i40iw_sgl struct i40iw_sge *
+
+struct i40iw_ring {
+	u32 head;
+	u32 tail;
+	u32 size;
+};
+
+struct i40iw_cqe {
+	u64 buf[I40IW_CQE_SIZE];
+};
+
+struct i40iw_extended_cqe {
+	u64 buf[I40IW_EXTENDED_CQE_SIZE];
+};
+
+struct i40iw_wqe {
+	u64 buf[I40IW_WQE_SIZE];
+};
+
+struct i40iw_qp_uk_ops;
+
+enum i40iw_addressing_type {
+	I40IW_ADDR_TYPE_ZERO_BASED = 0,
+	I40IW_ADDR_TYPE_VA_BASED = 1,
+};
+
+#define I40IW_ACCESS_FLAGS_LOCALREAD		0x01
+#define I40IW_ACCESS_FLAGS_LOCALWRITE		0x02
+#define I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY	0x04
+#define I40IW_ACCESS_FLAGS_REMOTEREAD		0x05
+#define I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY	0x08
+#define I40IW_ACCESS_FLAGS_REMOTEWRITE		0x0a
+#define I40IW_ACCESS_FLAGS_BIND_WINDOW		0x10
+#define I40IW_ACCESS_FLAGS_ALL			0x1F
+
+#define I40IW_OP_TYPE_RDMA_WRITE	0
+#define I40IW_OP_TYPE_RDMA_READ		1
+#define I40IW_OP_TYPE_SEND		3
+#define I40IW_OP_TYPE_SEND_INV		4
+#define I40IW_OP_TYPE_SEND_SOL		5
+#define I40IW_OP_TYPE_SEND_SOL_INV	6
+#define I40IW_OP_TYPE_REC		7
+#define I40IW_OP_TYPE_BIND_MW		8
+#define I40IW_OP_TYPE_FAST_REG_NSMR	9
+#define I40IW_OP_TYPE_INV_STAG		10
+#define I40IW_OP_TYPE_RDMA_READ_INV_STAG 11
+#define I40IW_OP_TYPE_NOP		12
+
+enum i40iw_completion_status {
+	I40IW_COMPL_STATUS_SUCCESS = 0,
+	I40IW_COMPL_STATUS_FLUSHED,
+	I40IW_COMPL_STATUS_INVALID_WQE,
+	I40IW_COMPL_STATUS_QP_CATASTROPHIC,
+	I40IW_COMPL_STATUS_REMOTE_TERMINATION,
+	I40IW_COMPL_STATUS_INVALID_STAG,
+	I40IW_COMPL_STATUS_BASE_BOUND_VIOLATION,
+	I40IW_COMPL_STATUS_ACCESS_VIOLATION,
+	I40IW_COMPL_STATUS_INVALID_PD_ID,
+	I40IW_COMPL_STATUS_WRAP_ERROR,
+	I40IW_COMPL_STATUS_STAG_INVALID_PDID,
+	I40IW_COMPL_STATUS_RDMA_READ_ZERO_ORD,
+	I40IW_COMPL_STATUS_QP_NOT_PRIVLEDGED,
+	I40IW_COMPL_STATUS_STAG_NOT_INVALID,
+	I40IW_COMPL_STATUS_INVALID_PHYS_BUFFER_SIZE,
+	I40IW_COMPL_STATUS_INVALID_PHYS_BUFFER_ENTRY,
+	I40IW_COMPL_STATUS_INVALID_FBO,
+	I40IW_COMPL_STATUS_INVALID_LENGTH,
+	I40IW_COMPL_STATUS_INVALID_ACCESS,
+	I40IW_COMPL_STATUS_PHYS_BUFFER_LIST_TOO_LONG,
+	I40IW_COMPL_STATUS_INVALID_VIRT_ADDRESS,
+	I40IW_COMPL_STATUS_INVALID_REGION,
+	I40IW_COMPL_STATUS_INVALID_WINDOW,
+	I40IW_COMPL_STATUS_INVALID_TOTAL_LENGTH
+};
+
+enum i40iw_completion_notify {
+	IW_CQ_COMPL_EVENT = 0,
+	IW_CQ_COMPL_SOLICITED = 1
+};
+
+struct i40iw_post_send {
+	i40iw_sgl sg_list;
+	u8 num_sges;
+};
+
+struct i40iw_post_inline_send {
+	void *data;
+	u32 len;
+};
+
+struct i40iw_post_send_w_inv {
+	i40iw_sgl sg_list;
+	u32 num_sges;
+	i40iw_stag remote_stag_to_inv;
+};
+
+struct i40iw_post_inline_send_w_inv {
+	void *data;
+	u32 len;
+	i40iw_stag remote_stag_to_inv;
+};
+
+struct i40iw_rdma_write {
+	i40iw_sgl lo_sg_list;
+	u8 num_lo_sges;
+	struct i40iw_sge rem_addr;
+};
+
+struct i40iw_inline_rdma_write {
+	void *data;
+	u32 len;
+	struct i40iw_sge rem_addr;
+};
+
+struct i40iw_rdma_read {
+	struct i40iw_sge lo_addr;
+	struct i40iw_sge rem_addr;
+};
+
+struct i40iw_bind_window {
+	i40iw_stag mr_stag;
+	u64 bind_length;
+	void *va;
+	enum i40iw_addressing_type addressing_type;
+	bool enable_reads;
+	bool enable_writes;
+	i40iw_stag mw_stag;
+};
+
+struct i40iw_inv_local_stag {
+	i40iw_stag target_stag;
+};
+
+struct i40iw_post_sq_info {
+	u64 wr_id;
+	u8 op_type;
+	bool signaled;
+	bool read_fence;
+	bool local_fence;
+	bool inline_data;
+	bool defer_flag;
+	union {
+		struct i40iw_post_send send;
+		struct i40iw_post_send send_w_sol;
+		struct i40iw_post_send_w_inv send_w_inv;
+		struct i40iw_post_send_w_inv send_w_sol_inv;
+		struct i40iw_rdma_write rdma_write;
+		struct i40iw_rdma_read rdma_read;
+		struct i40iw_rdma_read rdma_read_inv;
+		struct i40iw_bind_window bind_window;
+		struct i40iw_inv_local_stag inv_local_stag;
+		struct i40iw_inline_rdma_write inline_rdma_write;
+		struct i40iw_post_inline_send inline_send;
+		struct i40iw_post_inline_send inline_send_w_sol;
+		struct i40iw_post_inline_send_w_inv inline_send_w_inv;
+		struct i40iw_post_inline_send_w_inv inline_send_w_sol_inv;
+	} op;
+};
+
+struct i40iw_post_rq_info {
+	u64 wr_id;
+	i40iw_sgl sg_list;
+	u32 num_sges;
+};
+
+struct i40iw_cq_poll_info {
+	u64 wr_id;
+	i40iw_qp_handle qp_handle;
+	u32 bytes_xfered;
+	u32 tcp_seq_num;
+	u32 qp_id;
+	i40iw_stag inv_stag;
+	enum i40iw_completion_status comp_status;
+	u16 major_err;
+	u16 minor_err;
+	u8 op_type;
+	bool stag_invalid_set;
+	bool push_dropped;
+	bool error;
+	bool is_srq;
+	bool solicited_event;
+};
+
+struct i40iw_qp_uk_ops {
+	void (*iw_qp_post_wr)(struct i40iw_qp_uk *);
+	void (*iw_qp_ring_push_db)(struct i40iw_qp_uk *, u32);
+	enum i40iw_status_code (*iw_rdma_write)(struct i40iw_qp_uk *,
+						struct i40iw_post_sq_info *, bool);
+	enum i40iw_status_code (*iw_rdma_read)(struct i40iw_qp_uk *,
+					       struct i40iw_post_sq_info *, bool, bool);
+	enum i40iw_status_code (*iw_send)(struct i40iw_qp_uk *,
+					  struct i40iw_post_sq_info *, u32, bool);
+	enum i40iw_status_code (*iw_inline_rdma_write)(struct i40iw_qp_uk *,
+						       struct i40iw_post_sq_info *, bool);
+	enum i40iw_status_code (*iw_inline_send)(struct i40iw_qp_uk *,
+						 struct i40iw_post_sq_info *, u32, bool);
+	enum i40iw_status_code (*iw_stag_local_invalidate)(struct i40iw_qp_uk *,
+							   struct i40iw_post_sq_info *, bool);
+	enum i40iw_status_code (*iw_mw_bind)(struct i40iw_qp_uk *,
+					     struct i40iw_post_sq_info *, bool);
+	enum i40iw_status_code (*iw_post_receive)(struct i40iw_qp_uk *,
+						  struct i40iw_post_rq_info *);
+	enum i40iw_status_code (*iw_post_nop)(struct i40iw_qp_uk *, u64, bool, bool);
+};
+
+struct i40iw_cq_ops {
+	void (*iw_cq_request_notification)(struct i40iw_cq_uk *,
+					   enum i40iw_completion_notify);
+	enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *,
+							struct i40iw_cq_poll_info *, bool);
+	enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count);
+	void (*iw_cq_clean)(void *, struct i40iw_cq_uk *);
+};
+
+struct i40iw_dev_uk;
+
+struct i40iw_device_uk_ops {
+	enum i40iw_status_code (*iwarp_cq_uk_init)(struct i40iw_cq_uk *,
+						   struct i40iw_cq_uk_init_info *);
+	enum i40iw_status_code (*iwarp_qp_uk_init)(struct i40iw_qp_uk *,
+						   struct i40iw_qp_uk_init_info *);
+};
+
+struct i40iw_dev_uk {
+	struct i40iw_device_uk_ops ops_uk;
+};
+
+struct i40iw_sq_uk_wr_trk_info {
+	u64 wrid;
+	u64 wr_len;
+};
+
+struct i40iw_qp_quanta {
+	u64 elem[I40IW_WQE_SIZE];
+};
+
+struct i40iw_qp_uk {
+	struct i40iw_qp_quanta *sq_base;
+	struct i40iw_qp_quanta *rq_base;
+	u32 __iomem *wqe_alloc_reg;
+	struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
+	u64 *rq_wrid_array;
+	u64 *shadow_area;
+	u32 *push_db;
+	u64 *push_wqe;
+	struct i40iw_ring sq_ring;
+	struct i40iw_ring rq_ring;
+	struct i40iw_ring initial_ring;
+	u32 qp_id;
+	u32 sq_size;
+	u32 rq_size;
+	struct i40iw_qp_uk_ops ops;
+	bool use_srq;
+	u8 swqe_polarity;
+	u8 swqe_polarity_deferred;
+	u8 rwqe_polarity;
+	u8 rq_wqe_size;
+	u8 rq_wqe_size_multiplier;
+	u8 max_sq_frag_cnt;
+	u8 max_rq_frag_cnt;
+	bool deferred_flag;
+};
+
+struct i40iw_cq_uk {
+	struct i40iw_cqe *cq_base;
+	u32 __iomem *cqe_alloc_reg;
+	u64 *shadow_area;
+	u32 cq_id;
+	u32 cq_size;
+	struct i40iw_ring cq_ring;
+	u8 polarity;
+	bool avoid_mem_cflct;
+
+	struct i40iw_cq_ops ops;
+};
+
+struct i40iw_qp_uk_init_info {
+	struct i40iw_qp_quanta *sq;
+	struct i40iw_qp_quanta *rq;
+	u32 __iomem *wqe_alloc_reg;
+	u64 *shadow_area;
+	struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
+	u64 *rq_wrid_array;
+	u32 *push_db;
+	u64 *push_wqe;
+	u32 qp_id;
+	u32 sq_size;
+	u32 rq_size;
+	u8 max_sq_frag_cnt;
+	u8 max_rq_frag_cnt;
+
+};
+
+struct i40iw_cq_uk_init_info {
+	u32 __iomem *cqe_alloc_reg;
+	struct i40iw_cqe *cq_base;
+	u64 *shadow_area;
+	u32 cq_size;
+	u32 cq_id;
+	bool avoid_mem_cflct;
+};
+
+void i40iw_device_init_uk(struct i40iw_dev_uk *dev);
+
+void i40iw_qp_post_wr(struct i40iw_qp_uk *qp);
+u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx,
+				u8 wqe_size);
+u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx);
+u64 *i40iw_qp_get_next_srq_wqe(struct i40iw_srq_uk *srq, u32 *wqe_idx);
+
+enum i40iw_status_code i40iw_cq_uk_init(struct i40iw_cq_uk *cq,
+					struct i40iw_cq_uk_init_info *info);
+enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
+					struct i40iw_qp_uk_init_info *info);
+
+void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq);
+enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 wr_id,
+				 bool signaled, bool post_sq);
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u8 frag_cnt, u8 *wqe_size);
+enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u8 frag_cnt, u8 *wqe_size);
+enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
+							 u8 *wqe_size);
+enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u8 sge, u8 *shift);
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
new file mode 100644
index 000000000000..1ceec81bd8eb
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -0,0 +1,1270 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <net/netevent.h>
+#include <net/neighbour.h>
+#include "i40iw.h"
+
+/**
+ * i40iw_arp_table - manage arp table
+ * @iwdev: iwarp device
+ * @ip_addr: ip address for device
+ * @mac_addr: mac address ptr
+ * @action: modify, delete or add
+ */
+int i40iw_arp_table(struct i40iw_device *iwdev,
+		    __be32 *ip_addr,
+		    bool ipv4,
+		    u8 *mac_addr,
+		    u32 action)
+{
+	int arp_index;
+	int err;
+	u32 ip[4];
+
+	if (ipv4) {
+		memset(ip, 0, sizeof(ip));
+		ip[0] = *ip_addr;
+	} else {
+		memcpy(ip, ip_addr, sizeof(ip));
+	}
+
+	for (arp_index = 0; (u32)arp_index < iwdev->arp_table_size; arp_index++)
+		if (memcmp(iwdev->arp_table[arp_index].ip_addr, ip, sizeof(ip)) == 0)
+			break;
+	switch (action) {
+	case I40IW_ARP_ADD:
+		if (arp_index != iwdev->arp_table_size)
+			return -1;
+
+		arp_index = 0;
+		err = i40iw_alloc_resource(iwdev, iwdev->allocated_arps,
+					   iwdev->arp_table_size,
+					   (u32 *)&arp_index,
+					   &iwdev->next_arp_index);
+
+		if (err)
+			return err;
+
+		memcpy(iwdev->arp_table[arp_index].ip_addr, ip, sizeof(ip));
+		ether_addr_copy(iwdev->arp_table[arp_index].mac_addr, mac_addr);
+		break;
+	case I40IW_ARP_RESOLVE:
+		if (arp_index == iwdev->arp_table_size)
+			return -1;
+		break;
+	case I40IW_ARP_DELETE:
+		if (arp_index == iwdev->arp_table_size)
+			return -1;
+		memset(iwdev->arp_table[arp_index].ip_addr, 0,
+		       sizeof(iwdev->arp_table[arp_index].ip_addr));
+		eth_zero_addr(iwdev->arp_table[arp_index].mac_addr);
+		i40iw_free_resource(iwdev, iwdev->allocated_arps, arp_index);
+		break;
+	default:
+		return -1;
+	}
+	return arp_index;
+}
+
+/**
+ * i40iw_wr32 - write 32 bits to hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ * @value: vvalue to write to register
+ */
+inline void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value)
+{
+	writel(value, hw->hw_addr + reg);
+}
+
+/**
+ * i40iw_rd32 - read a 32 bit hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ *
+ * Return value of register content
+ */
+inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg)
+{
+	return readl(hw->hw_addr + reg);
+}
+
+/**
+ * i40iw_inetaddr_event - system notifier for netdev events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int i40iw_inetaddr_event(struct notifier_block *notifier,
+			 unsigned long event,
+			 void *ptr)
+{
+	struct in_ifaddr *ifa = ptr;
+	struct net_device *event_netdev = ifa->ifa_dev->dev;
+	struct net_device *netdev;
+	struct net_device *upper_dev;
+	struct i40iw_device *iwdev;
+	struct i40iw_handler *hdl;
+	__be32 local_ipaddr;
+
+	hdl = i40iw_find_netdev(event_netdev);
+	if (!hdl)
+		return NOTIFY_DONE;
+
+	iwdev = &hdl->device;
+	netdev = iwdev->ldev->netdev;
+	upper_dev = netdev_master_upper_dev_get(netdev);
+	if (netdev != event_netdev)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_DOWN:
+		if (upper_dev)
+			local_ipaddr =
+				((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+		else
+			local_ipaddr = ifa->ifa_address;
+		local_ipaddr = ntohl(local_ipaddr);
+		i40iw_manage_arp_cache(iwdev,
+				       netdev->dev_addr,
+				       &local_ipaddr,
+				       true,
+				       I40IW_ARP_DELETE);
+		return NOTIFY_OK;
+	case NETDEV_UP:
+		if (upper_dev)
+			local_ipaddr =
+				((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+		else
+			local_ipaddr = ifa->ifa_address;
+		local_ipaddr = ntohl(local_ipaddr);
+		i40iw_manage_arp_cache(iwdev,
+				       netdev->dev_addr,
+				       &local_ipaddr,
+				       true,
+				       I40IW_ARP_ADD);
+		break;
+	case NETDEV_CHANGEADDR:
+		/* Add the address to the IP table */
+		if (upper_dev)
+			local_ipaddr =
+				((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
+		else
+			local_ipaddr = ifa->ifa_address;
+
+		local_ipaddr = ntohl(local_ipaddr);
+		i40iw_manage_arp_cache(iwdev,
+				       netdev->dev_addr,
+				       &local_ipaddr,
+				       true,
+				       I40IW_ARP_ADD);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+/**
+ * i40iw_inet6addr_event - system notifier for ipv6 netdev events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int i40iw_inet6addr_event(struct notifier_block *notifier,
+			  unsigned long event,
+			  void *ptr)
+{
+	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+	struct net_device *event_netdev = ifa->idev->dev;
+	struct net_device *netdev;
+	struct i40iw_device *iwdev;
+	struct i40iw_handler *hdl;
+	__be32 local_ipaddr6[4];
+
+	hdl = i40iw_find_netdev(event_netdev);
+	if (!hdl)
+		return NOTIFY_DONE;
+
+	iwdev = &hdl->device;
+	netdev = iwdev->ldev->netdev;
+	if (netdev != event_netdev)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_DOWN:
+		i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
+		i40iw_manage_arp_cache(iwdev,
+				       netdev->dev_addr,
+				       local_ipaddr6,
+				       false,
+				       I40IW_ARP_DELETE);
+		return NOTIFY_OK;
+	case NETDEV_UP:
+		/* Fall through */
+	case NETDEV_CHANGEADDR:
+		i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
+		i40iw_manage_arp_cache(iwdev,
+				       netdev->dev_addr,
+				       local_ipaddr6,
+				       false,
+				       I40IW_ARP_ADD);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+/**
+ * i40iw_net_event - system notifier for net events
+ * @notfier: not used
+ * @event: event for notifier
+ * @ptr: neighbor
+ */
+int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *ptr)
+{
+	struct neighbour *neigh = ptr;
+	struct i40iw_device *iwdev;
+	struct i40iw_handler *iwhdl;
+	__be32 *p;
+	u32 local_ipaddr[4];
+
+	switch (event) {
+	case NETEVENT_NEIGH_UPDATE:
+		iwhdl = i40iw_find_netdev((struct net_device *)neigh->dev);
+		if (!iwhdl)
+			return NOTIFY_DONE;
+		iwdev = &iwhdl->device;
+		p = (__be32 *)neigh->primary_key;
+		i40iw_copy_ip_ntohl(local_ipaddr, p);
+		if (neigh->nud_state & NUD_VALID) {
+			i40iw_manage_arp_cache(iwdev,
+					       neigh->ha,
+					       local_ipaddr,
+					       false,
+					       I40IW_ARP_ADD);
+
+		} else {
+			i40iw_manage_arp_cache(iwdev,
+					       neigh->ha,
+					       local_ipaddr,
+					       false,
+					       I40IW_ARP_DELETE);
+		}
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+/**
+ * i40iw_get_cqp_request - get cqp struct
+ * @cqp: device cqp ptr
+ * @wait: cqp to be used in wait mode
+ */
+struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait)
+{
+	struct i40iw_cqp_request *cqp_request = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cqp->req_lock, flags);
+	if (!list_empty(&cqp->cqp_avail_reqs)) {
+		cqp_request = list_entry(cqp->cqp_avail_reqs.next,
+					 struct i40iw_cqp_request, list);
+		list_del_init(&cqp_request->list);
+	}
+	spin_unlock_irqrestore(&cqp->req_lock, flags);
+	if (!cqp_request) {
+		cqp_request = kzalloc(sizeof(*cqp_request), GFP_ATOMIC);
+		if (cqp_request) {
+			cqp_request->dynamic = true;
+			INIT_LIST_HEAD(&cqp_request->list);
+			init_waitqueue_head(&cqp_request->waitq);
+		}
+	}
+	if (!cqp_request) {
+		i40iw_pr_err("CQP Request Fail: No Memory");
+		return NULL;
+	}
+
+	if (wait) {
+		atomic_set(&cqp_request->refcount, 2);
+		cqp_request->waiting = true;
+	} else {
+		atomic_set(&cqp_request->refcount, 1);
+	}
+	return cqp_request;
+}
+
+/**
+ * i40iw_free_cqp_request - free cqp request
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request)
+{
+	unsigned long flags;
+
+	if (cqp_request->dynamic) {
+		kfree(cqp_request);
+	} else {
+		cqp_request->request_done = false;
+		cqp_request->callback_fcn = NULL;
+		cqp_request->waiting = false;
+
+		spin_lock_irqsave(&cqp->req_lock, flags);
+		list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs);
+		spin_unlock_irqrestore(&cqp->req_lock, flags);
+	}
+}
+
+/**
+ * i40iw_put_cqp_request - dec ref count and free if 0
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void i40iw_put_cqp_request(struct i40iw_cqp *cqp,
+			   struct i40iw_cqp_request *cqp_request)
+{
+	if (atomic_dec_and_test(&cqp_request->refcount))
+		i40iw_free_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * i40iw_free_qp - callback after destroy cqp completes
+ * @cqp_request: cqp request for destroy qp
+ * @num: not used
+ */
+static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num)
+{
+	struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param;
+	struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
+	struct i40iw_device *iwdev;
+	u32 qp_num = iwqp->ibqp.qp_num;
+
+	iwdev = iwqp->iwdev;
+
+	i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
+	i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+}
+
+/**
+ * i40iw_wait_event - wait for completion
+ * @iwdev: iwarp device
+ * @cqp_request: cqp request to wait
+ */
+static int i40iw_wait_event(struct i40iw_device *iwdev,
+			    struct i40iw_cqp_request *cqp_request)
+{
+	struct cqp_commands_info *info = &cqp_request->info;
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	bool cqp_error = false;
+	int err_code = 0;
+	int timeout_ret = 0;
+
+	timeout_ret = wait_event_timeout(cqp_request->waitq,
+					 cqp_request->request_done,
+					 I40IW_EVENT_TIMEOUT);
+	if (!timeout_ret) {
+		i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n",
+			     info->cqp_cmd, timeout_ret);
+		err_code = -ETIME;
+		i40iw_request_reset(iwdev);
+		goto done;
+	}
+	cqp_error = cqp_request->compl_info.error;
+	if (cqp_error) {
+		i40iw_pr_err("error cqp command 0x%x completion maj = 0x%x min=0x%x\n",
+			     info->cqp_cmd, cqp_request->compl_info.maj_err_code,
+			     cqp_request->compl_info.min_err_code);
+		err_code = -EPROTO;
+		goto done;
+	}
+done:
+	i40iw_put_cqp_request(iwcqp, cqp_request);
+	return err_code;
+}
+
+/**
+ * i40iw_handle_cqp_op - process cqp command
+ * @iwdev: iwarp device
+ * @cqp_request: cqp request to process
+ */
+enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
+					   struct i40iw_cqp_request
+					   *cqp_request)
+{
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+	struct cqp_commands_info *info = &cqp_request->info;
+	int err_code = 0;
+
+	status = i40iw_process_cqp_cmd(dev, info);
+	if (status) {
+		i40iw_pr_err("error cqp command 0x%x failed\n", info->cqp_cmd);
+		i40iw_free_cqp_request(&iwdev->cqp, cqp_request);
+		return status;
+	}
+	if (cqp_request->waiting)
+		err_code = i40iw_wait_event(iwdev, cqp_request);
+	if (err_code)
+		status = I40IW_ERR_CQP_COMPL_ERROR;
+	return status;
+}
+
+/**
+ * i40iw_add_pdusecount - add pd refcount
+ * @iwpd: pd for refcount
+ */
+void i40iw_add_pdusecount(struct i40iw_pd *iwpd)
+{
+	atomic_inc(&iwpd->usecount);
+}
+
+/**
+ * i40iw_rem_pdusecount - decrement refcount for pd and free if 0
+ * @iwpd: pd for refcount
+ * @iwdev: iwarp device
+ */
+void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev)
+{
+	if (!atomic_dec_and_test(&iwpd->usecount))
+		return;
+	i40iw_free_resource(iwdev, iwdev->allocated_pds, iwpd->sc_pd.pd_id);
+	kfree(iwpd);
+}
+
+/**
+ * i40iw_add_ref - add refcount for qp
+ * @ibqp: iqarp qp
+ */
+void i40iw_add_ref(struct ib_qp *ibqp)
+{
+	struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp;
+
+	atomic_inc(&iwqp->refcount);
+}
+
+/**
+ * i40iw_rem_ref - rem refcount for qp and free if 0
+ * @ibqp: iqarp qp
+ */
+void i40iw_rem_ref(struct ib_qp *ibqp)
+{
+	struct i40iw_qp *iwqp;
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_device *iwdev;
+	u32 qp_num;
+
+	iwqp = to_iwqp(ibqp);
+	if (!atomic_dec_and_test(&iwqp->refcount))
+		return;
+
+	iwdev = iwqp->iwdev;
+	qp_num = iwqp->ibqp.qp_num;
+	iwdev->qp_table[qp_num] = NULL;
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+	if (!cqp_request)
+		return;
+
+	cqp_request->callback_fcn = i40iw_free_qp;
+	cqp_request->param = (void *)&iwqp->sc_qp;
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_QP_DESTROY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp;
+	cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
+	cqp_info->in.u.qp_destroy.remove_hash_idx = true;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Destroy QP fail");
+}
+
+/**
+ * i40iw_get_qp - get qp address
+ * @device: iwarp device
+ * @qpn: qp number
+ */
+struct ib_qp *i40iw_get_qp(struct ib_device *device, int qpn)
+{
+	struct i40iw_device *iwdev = to_iwdev(device);
+
+	if ((qpn < IW_FIRST_QPN) || (qpn >= iwdev->max_qp))
+		return NULL;
+
+	return &iwdev->qp_table[qpn]->ibqp;
+}
+
+/**
+ * i40iw_debug_buf - print debug msg and buffer is mask set
+ * @dev: hardware control device structure
+ * @mask: mask to compare if to print debug buffer
+ * @buf: points buffer addr
+ * @size: saize of buffer to print
+ */
+void i40iw_debug_buf(struct i40iw_sc_dev *dev,
+		     enum i40iw_debug_flag mask,
+		     char *desc,
+		     u64 *buf,
+		     u32 size)
+{
+	u32 i;
+
+	if (!(dev->debug_mask & mask))
+		return;
+	i40iw_debug(dev, mask, "%s\n", desc);
+	i40iw_debug(dev, mask, "starting address virt=%p phy=%llxh\n", buf,
+		    (unsigned long long)virt_to_phys(buf));
+
+	for (i = 0; i < size; i += 8)
+		i40iw_debug(dev, mask, "index %03d val: %016llx\n", i, buf[i / 8]);
+}
+
+/**
+ * i40iw_get_hw_addr - return hw addr
+ * @par: points to shared dev
+ */
+u8 __iomem *i40iw_get_hw_addr(void *par)
+{
+	struct i40iw_sc_dev *dev = (struct i40iw_sc_dev *)par;
+
+	return dev->hw->hw_addr;
+}
+
+/**
+ * i40iw_remove_head - return head entry and remove from list
+ * @list: list for entry
+ */
+void *i40iw_remove_head(struct list_head *list)
+{
+	struct list_head *entry;
+
+	if (list_empty(list))
+		return NULL;
+
+	entry = (void *)list->next;
+	list_del(entry);
+	return (void *)entry;
+}
+
+/**
+ * i40iw_allocate_dma_mem - Memory alloc helper fn
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ * @alignment: what to align the allocation to
+ */
+enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
+					      struct i40iw_dma_mem *mem,
+					      u64 size,
+					      u32 alignment)
+{
+	struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+
+	if (!mem)
+		return I40IW_ERR_PARAM;
+	mem->size = ALIGN(size, alignment);
+	mem->va = dma_zalloc_coherent(&pcidev->dev, mem->size,
+				      (dma_addr_t *)&mem->pa, GFP_KERNEL);
+	if (!mem->va)
+		return I40IW_ERR_NO_MEMORY;
+	return 0;
+}
+
+/**
+ * i40iw_free_dma_mem - Memory free helper fn
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ */
+void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem)
+{
+	struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
+
+	if (!mem || !mem->va)
+		return;
+
+	dma_free_coherent(&pcidev->dev, mem->size,
+			  mem->va, (dma_addr_t)mem->pa);
+	mem->va = NULL;
+}
+
+/**
+ * i40iw_allocate_virt_mem - virtual memory alloc helper fn
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ */
+enum i40iw_status_code i40iw_allocate_virt_mem(struct i40iw_hw *hw,
+					       struct i40iw_virt_mem *mem,
+					       u32 size)
+{
+	if (!mem)
+		return I40IW_ERR_PARAM;
+
+	mem->size = size;
+	mem->va = kzalloc(size, GFP_KERNEL);
+
+	if (mem->va)
+		return 0;
+	else
+		return I40IW_ERR_NO_MEMORY;
+}
+
+/**
+ * i40iw_free_virt_mem - virtual memory free helper fn
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ */
+enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
+					   struct i40iw_virt_mem *mem)
+{
+	if (!mem)
+		return I40IW_ERR_PARAM;
+	kfree(mem->va);
+	mem->va = NULL;
+	return 0;
+}
+
+/**
+ * i40iw_cqp_sds_cmd - create cqp command for sd
+ * @dev: hardware control device structure
+ * @sd_info: information  for sd cqp
+ *
+ */
+enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
+					 struct i40iw_update_sds_info *sdinfo)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+	cqp_info = &cqp_request->info;
+	memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo,
+	       sizeof(cqp_info->in.u.update_pe_sds.info));
+	cqp_info->cqp_cmd = OP_UPDATE_PE_SDS;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.update_pe_sds.dev = dev;
+	cqp_info->in.u.update_pe_sds.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Update SD's fail");
+	return status;
+}
+
+/**
+ * i40iw_term_modify_qp - modify qp for term message
+ * @qp: hardware control qp
+ * @next_state: qp's next state
+ * @term: terminate code
+ * @term_len: length
+ */
+void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len)
+{
+	struct i40iw_qp *iwqp;
+
+	iwqp = (struct i40iw_qp *)qp->back_qp;
+	i40iw_next_iw_state(iwqp, next_state, 0, term, term_len);
+};
+
+/**
+ * i40iw_terminate_done - after terminate is completed
+ * @qp: hardware control qp
+ * @timeout_occurred: indicates if terminate timer expired
+ */
+void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred)
+{
+	struct i40iw_qp *iwqp;
+	u32 next_iwarp_state = I40IW_QP_STATE_ERROR;
+	u8 hte = 0;
+	bool first_time;
+	unsigned long flags;
+
+	iwqp = (struct i40iw_qp *)qp->back_qp;
+	spin_lock_irqsave(&iwqp->lock, flags);
+	if (iwqp->hte_added) {
+		iwqp->hte_added = 0;
+		hte = 1;
+	}
+	first_time = !(qp->term_flags & I40IW_TERM_DONE);
+	qp->term_flags |= I40IW_TERM_DONE;
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+	if (first_time) {
+		if (!timeout_occurred)
+			i40iw_terminate_del_timer(qp);
+		else
+			next_iwarp_state = I40IW_QP_STATE_CLOSING;
+
+		i40iw_next_iw_state(iwqp, next_iwarp_state, hte, 0, 0);
+		i40iw_cm_disconn(iwqp);
+	}
+}
+
+/**
+ * i40iw_terminate_imeout - timeout happened
+ * @context: points to iwarp qp
+ */
+static void i40iw_terminate_timeout(unsigned long context)
+{
+	struct i40iw_qp *iwqp = (struct i40iw_qp *)context;
+	struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
+
+	i40iw_terminate_done(qp, 1);
+}
+
+/**
+ * i40iw_terminate_start_timer - start terminate timeout
+ * @qp: hardware control qp
+ */
+void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
+{
+	struct i40iw_qp *iwqp;
+
+	iwqp = (struct i40iw_qp *)qp->back_qp;
+	init_timer(&iwqp->terminate_timer);
+	iwqp->terminate_timer.function = i40iw_terminate_timeout;
+	iwqp->terminate_timer.expires = jiffies + HZ;
+	iwqp->terminate_timer.data = (unsigned long)iwqp;
+	add_timer(&iwqp->terminate_timer);
+}
+
+/**
+ * i40iw_terminate_del_timer - delete terminate timeout
+ * @qp: hardware control qp
+ */
+void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp)
+{
+	struct i40iw_qp *iwqp;
+
+	iwqp = (struct i40iw_qp *)qp->back_qp;
+	del_timer(&iwqp->terminate_timer);
+}
+
+/**
+ * i40iw_cqp_generic_worker - generic worker for cqp
+ * @work: work pointer
+ */
+static void i40iw_cqp_generic_worker(struct work_struct *work)
+{
+	struct i40iw_virtchnl_work_info *work_info =
+	    &((struct virtchnl_work *)work)->work_info;
+
+	if (work_info->worker_vf_dev)
+		work_info->callback_fcn(work_info->worker_vf_dev);
+}
+
+/**
+ * i40iw_cqp_spawn_worker - spawn worket thread
+ * @iwdev: device struct pointer
+ * @work_info: work request info
+ * @iw_vf_idx: virtual function index
+ */
+void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
+			    struct i40iw_virtchnl_work_info *work_info,
+			    u32 iw_vf_idx)
+{
+	struct virtchnl_work *work;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	work = &iwdev->virtchnl_w[iw_vf_idx];
+	memcpy(&work->work_info, work_info, sizeof(*work_info));
+	INIT_WORK(&work->work, i40iw_cqp_generic_worker);
+	queue_work(iwdev->virtchnl_wq, &work->work);
+}
+
+/**
+ * i40iw_cqp_manage_hmc_fcn_worker -
+ * @work: work pointer for hmc info
+ */
+static void i40iw_cqp_manage_hmc_fcn_worker(struct work_struct *work)
+{
+	struct i40iw_cqp_request *cqp_request =
+	    ((struct virtchnl_work *)work)->cqp_request;
+	struct i40iw_ccq_cqe_info ccq_cqe_info;
+	struct i40iw_hmc_fcn_info *hmcfcninfo =
+			&cqp_request->info.in.u.manage_hmc_pm.info;
+	struct i40iw_device *iwdev =
+	    (struct i40iw_device *)cqp_request->info.in.u.manage_hmc_pm.dev->back_dev;
+
+	ccq_cqe_info.cqp = NULL;
+	ccq_cqe_info.maj_err_code = cqp_request->compl_info.maj_err_code;
+	ccq_cqe_info.min_err_code = cqp_request->compl_info.min_err_code;
+	ccq_cqe_info.op_code = cqp_request->compl_info.op_code;
+	ccq_cqe_info.op_ret_val = cqp_request->compl_info.op_ret_val;
+	ccq_cqe_info.scratch = 0;
+	ccq_cqe_info.error = cqp_request->compl_info.error;
+	hmcfcninfo->callback_fcn(cqp_request->info.in.u.manage_hmc_pm.dev,
+				 hmcfcninfo->cqp_callback_param, &ccq_cqe_info);
+	i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+}
+
+/**
+ * i40iw_cqp_manage_hmc_fcn_callback - called function after cqp completion
+ * @cqp_request: cqp request info struct for hmc fun
+ * @unused: unused param of callback
+ */
+static void i40iw_cqp_manage_hmc_fcn_callback(struct i40iw_cqp_request *cqp_request,
+					      u32 unused)
+{
+	struct virtchnl_work *work;
+	struct i40iw_hmc_fcn_info *hmcfcninfo =
+	    &cqp_request->info.in.u.manage_hmc_pm.info;
+	struct i40iw_device *iwdev =
+	    (struct i40iw_device *)cqp_request->info.in.u.manage_hmc_pm.dev->
+	    back_dev;
+
+	if (hmcfcninfo && hmcfcninfo->callback_fcn) {
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s1\n", __func__);
+		atomic_inc(&cqp_request->refcount);
+		work = &iwdev->virtchnl_w[hmcfcninfo->iw_vf_idx];
+		work->cqp_request = cqp_request;
+		INIT_WORK(&work->work, i40iw_cqp_manage_hmc_fcn_worker);
+		queue_work(iwdev->virtchnl_wq, &work->work);
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s2\n", __func__);
+	} else {
+		i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s: Something wrong\n", __func__);
+	}
+}
+
+/**
+ * i40iw_cqp_manage_hmc_fcn_cmd - issue cqp command to manage hmc
+ * @dev: hardware control device structure
+ * @hmcfcninfo: info for hmc
+ */
+enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev,
+						    struct i40iw_hmc_fcn_info *hmcfcninfo)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s\n", __func__);
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+	cqp_info = &cqp_request->info;
+	cqp_request->callback_fcn = i40iw_cqp_manage_hmc_fcn_callback;
+	cqp_request->param = hmcfcninfo;
+	memcpy(&cqp_info->in.u.manage_hmc_pm.info, hmcfcninfo,
+	       sizeof(*hmcfcninfo));
+	cqp_info->in.u.manage_hmc_pm.dev = dev;
+	cqp_info->cqp_cmd = OP_MANAGE_HMC_PM_FUNC_TABLE;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.manage_hmc_pm.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Manage HMC fail");
+	return status;
+}
+
+/**
+ * i40iw_cqp_query_fpm_values_cmd - send cqp command for fpm
+ * @iwdev: function device struct
+ * @values_mem: buffer for fpm
+ * @hmc_fn_id: function id for fpm
+ */
+enum i40iw_status_code i40iw_cqp_query_fpm_values_cmd(struct i40iw_sc_dev *dev,
+						      struct i40iw_dma_mem *values_mem,
+						      u8 hmc_fn_id)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+	cqp_info = &cqp_request->info;
+	cqp_request->param = NULL;
+	cqp_info->in.u.query_fpm_values.cqp = dev->cqp;
+	cqp_info->in.u.query_fpm_values.fpm_values_pa = values_mem->pa;
+	cqp_info->in.u.query_fpm_values.fpm_values_va = values_mem->va;
+	cqp_info->in.u.query_fpm_values.hmc_fn_id = hmc_fn_id;
+	cqp_info->cqp_cmd = OP_QUERY_FPM_VALUES;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.query_fpm_values.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Query FPM fail");
+	return status;
+}
+
+/**
+ * i40iw_cqp_commit_fpm_values_cmd - commit fpm values in hw
+ * @dev: hardware control device structure
+ * @values_mem: buffer with fpm values
+ * @hmc_fn_id: function id for fpm
+ */
+enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
+						       struct i40iw_dma_mem *values_mem,
+						       u8 hmc_fn_id)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
+	cqp_info = &cqp_request->info;
+	cqp_request->param = NULL;
+	cqp_info->in.u.commit_fpm_values.cqp = dev->cqp;
+	cqp_info->in.u.commit_fpm_values.fpm_values_pa = values_mem->pa;
+	cqp_info->in.u.commit_fpm_values.fpm_values_va = values_mem->va;
+	cqp_info->in.u.commit_fpm_values.hmc_fn_id = hmc_fn_id;
+	cqp_info->cqp_cmd = OP_COMMIT_FPM_VALUES;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.commit_fpm_values.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Commit FPM fail");
+	return status;
+}
+
+/**
+ * i40iw_vf_wait_vchnl_resp - wait for channel msg
+ * @iwdev: function's device struct
+ */
+enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev)
+{
+	struct i40iw_device *iwdev = dev->back_dev;
+	enum i40iw_status_code err_code = 0;
+	int timeout_ret;
+
+	i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s[%u] dev %p, iwdev %p\n",
+		    __func__, __LINE__, dev, iwdev);
+	atomic_add(2, &iwdev->vchnl_msgs);
+	timeout_ret = wait_event_timeout(iwdev->vchnl_waitq,
+					 (atomic_read(&iwdev->vchnl_msgs) == 1),
+					 I40IW_VCHNL_EVENT_TIMEOUT);
+	atomic_dec(&iwdev->vchnl_msgs);
+	if (!timeout_ret) {
+		i40iw_pr_err("virt channel completion timeout = 0x%x\n", timeout_ret);
+		err_code = I40IW_ERR_TIMEOUT;
+	}
+	return err_code;
+}
+
+/**
+ * i40iw_ieq_mpa_crc_ae - generate AE for crc error
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ */
+void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+	struct i40iw_qp_flush_info info;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	i40iw_debug(dev, I40IW_DEBUG_AEQ, "%s entered\n", __func__);
+	memset(&info, 0, sizeof(info));
+	info.ae_code = I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR;
+	info.generate_ae = true;
+	info.ae_source = 0x3;
+	(void)i40iw_hw_flush_wqes(iwdev, qp, &info, false);
+}
+
+/**
+ * i40iw_init_hash_desc - initialize hash for crc calculation
+ * @desc: cryption type
+ */
+enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **desc)
+{
+	struct crypto_shash *tfm;
+	struct shash_desc *tdesc;
+
+	tfm = crypto_alloc_shash("crc32c", 0, 0);
+	if (IS_ERR(tfm))
+		return I40IW_ERR_MPA_CRC;
+
+	tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm),
+			GFP_KERNEL);
+	if (!tdesc) {
+		crypto_free_shash(tfm);
+		return I40IW_ERR_MPA_CRC;
+	}
+	tdesc->tfm = tfm;
+	*desc = tdesc;
+
+	return 0;
+}
+
+/**
+ * i40iw_free_hash_desc - free hash desc
+ * @desc: to be freed
+ */
+void i40iw_free_hash_desc(struct shash_desc *desc)
+{
+	if (desc) {
+		crypto_free_shash(desc->tfm);
+		kfree(desc);
+	}
+}
+
+/**
+ * i40iw_alloc_query_fpm_buf - allocate buffer for fpm
+ * @dev: hardware control device structure
+ * @mem: buffer ptr for fpm to be allocated
+ * @return: memory allocation status
+ */
+enum i40iw_status_code i40iw_alloc_query_fpm_buf(struct i40iw_sc_dev *dev,
+						 struct i40iw_dma_mem *mem)
+{
+	enum i40iw_status_code status;
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+	status = i40iw_obj_aligned_mem(iwdev, mem, I40IW_QUERY_FPM_BUF_SIZE,
+				       I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
+	return status;
+}
+
+/**
+ * i40iw_ieq_check_mpacrc - check if mpa crc is OK
+ * @desc: desc for hash
+ * @addr: address of buffer for crc
+ * @length: length of buffer
+ * @value: value to be compared
+ */
+enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc,
+					      void *addr,
+					      u32 length,
+					      u32 value)
+{
+	u32 crc = 0;
+	int ret;
+	enum i40iw_status_code ret_code = 0;
+
+	crypto_shash_init(desc);
+	ret = crypto_shash_update(desc, addr, length);
+	if (!ret)
+		crypto_shash_final(desc, (u8 *)&crc);
+	if (crc != value) {
+		i40iw_pr_err("mpa crc check fail\n");
+		ret_code = I40IW_ERR_MPA_CRC;
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_ieq_get_qp - get qp based on quad in puda buffer
+ * @dev: hardware control device structure
+ * @buf: receive puda buffer on exception q
+ */
+struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
+				     struct i40iw_puda_buf *buf)
+{
+	struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+	struct i40iw_qp *iwqp;
+	struct i40iw_cm_node *cm_node;
+	u32 loc_addr[4], rem_addr[4];
+	u16 loc_port, rem_port;
+	struct ipv6hdr *ip6h;
+	struct iphdr *iph = (struct iphdr *)buf->iph;
+	struct tcphdr *tcph = (struct tcphdr *)buf->tcph;
+
+	if (iph->version == 4) {
+		memset(loc_addr, 0, sizeof(loc_addr));
+		loc_addr[0] = ntohl(iph->daddr);
+		memset(rem_addr, 0, sizeof(rem_addr));
+		rem_addr[0] = ntohl(iph->saddr);
+	} else {
+		ip6h = (struct ipv6hdr *)buf->iph;
+		i40iw_copy_ip_ntohl(loc_addr, ip6h->daddr.in6_u.u6_addr32);
+		i40iw_copy_ip_ntohl(rem_addr, ip6h->saddr.in6_u.u6_addr32);
+	}
+	loc_port = ntohs(tcph->dest);
+	rem_port = ntohs(tcph->source);
+
+	cm_node = i40iw_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port,
+				  loc_addr, false);
+	if (!cm_node)
+		return NULL;
+	iwqp = cm_node->iwqp;
+	return &iwqp->sc_qp;
+}
+
+/**
+ * i40iw_ieq_update_tcpip_info - update tcpip in the buffer
+ * @buf: puda to update
+ * @length: length of buffer
+ * @seqnum: seq number for tcp
+ */
+void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum)
+{
+	struct tcphdr *tcph;
+	struct iphdr *iph;
+	u16 iphlen;
+	u16 packetsize;
+	u8 *addr = (u8 *)buf->mem.va;
+
+	iphlen = (buf->ipv4) ? 20 : 40;
+	iph = (struct iphdr *)(addr + buf->maclen);
+	tcph = (struct tcphdr *)(addr + buf->maclen + iphlen);
+	packetsize = length + buf->tcphlen + iphlen;
+
+	iph->tot_len = htons(packetsize);
+	tcph->seq = htonl(seqnum);
+}
+
+/**
+ * i40iw_puda_get_tcpip_info - get tcpip info from puda buffer
+ * @info: to get information
+ * @buf: puda buffer
+ */
+enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
+						 struct i40iw_puda_buf *buf)
+{
+	struct iphdr *iph;
+	struct ipv6hdr *ip6h;
+	struct tcphdr *tcph;
+	u16 iphlen;
+	u16 pkt_len;
+	u8 *mem = (u8 *)buf->mem.va;
+	struct ethhdr *ethh = (struct ethhdr *)buf->mem.va;
+
+	if (ethh->h_proto == htons(0x8100)) {
+		info->vlan_valid = true;
+		buf->vlan_id = ntohs(((struct vlan_ethhdr *)ethh)->h_vlan_TCI) & VLAN_VID_MASK;
+	}
+	buf->maclen = (info->vlan_valid) ? 18 : 14;
+	iphlen = (info->l3proto) ? 40 : 20;
+	buf->ipv4 = (info->l3proto) ? false : true;
+	buf->iph = mem + buf->maclen;
+	iph = (struct iphdr *)buf->iph;
+
+	buf->tcph = buf->iph + iphlen;
+	tcph = (struct tcphdr *)buf->tcph;
+
+	if (buf->ipv4) {
+		pkt_len = ntohs(iph->tot_len);
+	} else {
+		ip6h = (struct ipv6hdr *)buf->iph;
+		pkt_len = ntohs(ip6h->payload_len) + iphlen;
+	}
+
+	buf->totallen = pkt_len + buf->maclen;
+
+	if (info->payload_len < buf->totallen - 4) {
+		i40iw_pr_err("payload_len = 0x%x totallen expected0x%x\n",
+			     info->payload_len, buf->totallen);
+		return I40IW_ERR_INVALID_SIZE;
+	}
+
+	buf->tcphlen = (tcph->doff) << 2;
+	buf->datalen = pkt_len - iphlen - buf->tcphlen;
+	buf->data = (buf->datalen) ? buf->tcph + buf->tcphlen : NULL;
+	buf->hdrlen = buf->maclen + iphlen + buf->tcphlen;
+	buf->seqnum = ntohl(tcph->seq);
+	return 0;
+}
+
+/**
+ * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats
+ * @dev: hardware control device structure
+ */
+static void i40iw_hw_stats_timeout(unsigned long dev)
+{
+	struct i40iw_sc_dev *pf_dev = (struct i40iw_sc_dev *)dev;
+	struct i40iw_dev_pestat *pf_devstat = &pf_dev->dev_pestat;
+	struct i40iw_dev_pestat *vf_devstat = NULL;
+	u16 iw_vf_idx;
+	unsigned long flags;
+
+	/*PF*/
+	pf_devstat->ops.iw_hw_stat_read_all(pf_devstat, &pf_devstat->hw_stats);
+	for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
+		spin_lock_irqsave(&pf_devstat->stats_lock, flags);
+		if (pf_dev->vf_dev[iw_vf_idx]) {
+			if (pf_dev->vf_dev[iw_vf_idx]->stats_initialized) {
+				vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->dev_pestat;
+				vf_devstat->ops.iw_hw_stat_read_all(vf_devstat, &vf_devstat->hw_stats);
+			}
+		}
+		spin_unlock_irqrestore(&pf_devstat->stats_lock, flags);
+	}
+
+	mod_timer(&pf_devstat->stats_timer,
+		  jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * i40iw_hw_stats_start_timer - Start periodic stats timer
+ * @dev: hardware control device structure
+ */
+void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *dev)
+{
+	struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+
+	init_timer(&devstat->stats_timer);
+	devstat->stats_timer.function = i40iw_hw_stats_timeout;
+	devstat->stats_timer.data = (unsigned long)dev;
+	mod_timer(&devstat->stats_timer,
+		  jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * i40iw_hw_stats_del_timer - Delete periodic stats timer
+ * @dev: hardware control device structure
+ */
+void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *dev)
+{
+	struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+
+	del_timer_sync(&devstat->stats_timer);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
new file mode 100644
index 000000000000..1fe3b84a06e4
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -0,0 +1,2437 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/highmem.h>
+#include <linux/time.h>
+#include <asm/byteorder.h>
+#include <net/ip.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include "i40iw.h"
+
+/**
+ * i40iw_query_device - get device attributes
+ * @ibdev: device pointer from stack
+ * @props: returning device attributes
+ * @udata: user data
+ */
+static int i40iw_query_device(struct ib_device *ibdev,
+			      struct ib_device_attr *props,
+			      struct ib_udata *udata)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+
+	if (udata->inlen || udata->outlen)
+		return -EINVAL;
+	memset(props, 0, sizeof(*props));
+	ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
+	props->fw_ver = I40IW_FW_VERSION;
+	props->device_cap_flags = iwdev->device_cap_flags;
+	props->vendor_id = iwdev->vendor_id;
+	props->vendor_part_id = iwdev->vendor_part_id;
+	props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
+	props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
+	props->max_qp = iwdev->max_qp;
+	props->max_qp_wr = (I40IW_MAX_WQ_ENTRIES >> 2) - 1;
+	props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+	props->max_cq = iwdev->max_cq;
+	props->max_cqe = iwdev->max_cqe;
+	props->max_mr = iwdev->max_mr;
+	props->max_pd = iwdev->max_pd;
+	props->max_sge_rd = 1;
+	props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
+	props->max_qp_init_rd_atom = props->max_qp_rd_atom;
+	props->atomic_cap = IB_ATOMIC_NONE;
+	props->max_map_per_fmr = 1;
+	return 0;
+}
+
+/**
+ * i40iw_query_port - get port attrubutes
+ * @ibdev: device pointer from stack
+ * @port: port number for query
+ * @props: returning device attributes
+ */
+static int i40iw_query_port(struct ib_device *ibdev,
+			    u8 port,
+			    struct ib_port_attr *props)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+	struct net_device *netdev = iwdev->netdev;
+
+	memset(props, 0, sizeof(*props));
+
+	props->max_mtu = IB_MTU_4096;
+	if (netdev->mtu >= 4096)
+		props->active_mtu = IB_MTU_4096;
+	else if (netdev->mtu >= 2048)
+		props->active_mtu = IB_MTU_2048;
+	else if (netdev->mtu >= 1024)
+		props->active_mtu = IB_MTU_1024;
+	else if (netdev->mtu >= 512)
+		props->active_mtu = IB_MTU_512;
+	else
+		props->active_mtu = IB_MTU_256;
+
+	props->lid = 1;
+	if (netif_carrier_ok(iwdev->netdev))
+		props->state = IB_PORT_ACTIVE;
+	else
+		props->state = IB_PORT_DOWN;
+	props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+		IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+	props->gid_tbl_len = 1;
+	props->pkey_tbl_len = 1;
+	props->active_width = IB_WIDTH_4X;
+	props->active_speed = 1;
+	props->max_msg_sz = 0x80000000;
+	return 0;
+}
+
+/**
+ * i40iw_alloc_ucontext - Allocate the user context data structure
+ * @ibdev: device pointer from stack
+ * @udata: user data
+ *
+ * This keeps track of all objects associated with a particular
+ * user-mode client.
+ */
+static struct ib_ucontext *i40iw_alloc_ucontext(struct ib_device *ibdev,
+						struct ib_udata *udata)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+	struct i40iw_alloc_ucontext_req req;
+	struct i40iw_alloc_ucontext_resp uresp;
+	struct i40iw_ucontext *ucontext;
+
+	if (ib_copy_from_udata(&req, udata, sizeof(req)))
+		return ERR_PTR(-EINVAL);
+
+	if (req.userspace_ver != I40IW_ABI_USERSPACE_VER) {
+		i40iw_pr_err("Invalid userspace driver version detected. Detected version %d, should be %d\n",
+			     req.userspace_ver, I40IW_ABI_USERSPACE_VER);
+		return ERR_PTR(-EINVAL);
+	}
+
+	memset(&uresp, 0, sizeof(uresp));
+	uresp.max_qps = iwdev->max_qp;
+	uresp.max_pds = iwdev->max_pd;
+	uresp.wq_size = iwdev->max_qp_wr * 2;
+	uresp.kernel_ver = I40IW_ABI_KERNEL_VER;
+
+	ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
+	if (!ucontext)
+		return ERR_PTR(-ENOMEM);
+
+	ucontext->iwdev = iwdev;
+
+	if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
+		kfree(ucontext);
+		return ERR_PTR(-EFAULT);
+	}
+
+	INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
+	spin_lock_init(&ucontext->cq_reg_mem_list_lock);
+	INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
+	spin_lock_init(&ucontext->qp_reg_mem_list_lock);
+
+	return &ucontext->ibucontext;
+}
+
+/**
+ * i40iw_dealloc_ucontext - deallocate the user context data structure
+ * @context: user context created during alloc
+ */
+static int i40iw_dealloc_ucontext(struct ib_ucontext *context)
+{
+	struct i40iw_ucontext *ucontext = to_ucontext(context);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+	if (!list_empty(&ucontext->cq_reg_mem_list)) {
+		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+		return -EBUSY;
+	}
+	spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+	spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+	if (!list_empty(&ucontext->qp_reg_mem_list)) {
+		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+		return -EBUSY;
+	}
+	spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+
+	kfree(ucontext);
+	return 0;
+}
+
+/**
+ * i40iw_mmap - user memory map
+ * @context: context created during alloc
+ * @vma: kernel info for user memory map
+ */
+static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+	struct i40iw_ucontext *ucontext;
+	u64 db_addr_offset;
+	u64 push_offset;
+
+	ucontext = to_ucontext(context);
+	if (ucontext->iwdev->sc_dev.is_pf) {
+		db_addr_offset = I40IW_DB_ADDR_OFFSET;
+		push_offset = I40IW_PUSH_OFFSET;
+		if (vma->vm_pgoff)
+			vma->vm_pgoff += I40IW_PF_FIRST_PUSH_PAGE_INDEX - 1;
+	} else {
+		db_addr_offset = I40IW_VF_DB_ADDR_OFFSET;
+		push_offset = I40IW_VF_PUSH_OFFSET;
+		if (vma->vm_pgoff)
+			vma->vm_pgoff += I40IW_VF_FIRST_PUSH_PAGE_INDEX - 1;
+	}
+
+	vma->vm_pgoff += db_addr_offset >> PAGE_SHIFT;
+
+	if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) {
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		vma->vm_private_data = ucontext;
+	} else {
+		if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2)
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		else
+			vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	if (io_remap_pfn_range(vma, vma->vm_start,
+			       vma->vm_pgoff + (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >> PAGE_SHIFT),
+			       PAGE_SIZE, vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+/**
+ * i40iw_alloc_push_page - allocate a push page for qp
+ * @iwdev: iwarp device
+ * @qp: hardware control qp
+ */
+static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
+{
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+
+	if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX)
+		return;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return;
+
+	atomic_inc(&cqp_request->refcount);
+
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
+	cqp_info->post_sq = 1;
+
+	cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+	cqp_info->in.u.manage_push_page.info.free_page = 0;
+	cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (!status)
+		qp->push_idx = cqp_request->compl_info.op_ret_val;
+	else
+		i40iw_pr_err("CQP-OP Push page fail");
+	i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
+}
+
+/**
+ * i40iw_dealloc_push_page - free a push page for qp
+ * @iwdev: iwarp device
+ * @qp: hardware control qp
+ */
+static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
+{
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	enum i40iw_status_code status;
+
+	if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX)
+		return;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+	if (!cqp_request)
+		return;
+
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
+	cqp_info->post_sq = 1;
+
+	cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
+	cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+	cqp_info->in.u.manage_push_page.info.free_page = 1;
+	cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
+	cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (!status)
+		qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
+	else
+		i40iw_pr_err("CQP-OP Push page fail");
+}
+
+/**
+ * i40iw_alloc_pd - allocate protection domain
+ * @ibdev: device pointer from stack
+ * @context: user context created during alloc
+ * @udata: user data
+ */
+static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
+				    struct ib_ucontext *context,
+				    struct ib_udata *udata)
+{
+	struct i40iw_pd *iwpd;
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_alloc_pd_resp uresp;
+	struct i40iw_sc_pd *sc_pd;
+	u32 pd_id = 0;
+	int err;
+
+	err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds,
+				   iwdev->max_pd, &pd_id, &iwdev->next_pd);
+	if (err) {
+		i40iw_pr_err("alloc resource failed\n");
+		return ERR_PTR(err);
+	}
+
+	iwpd = kzalloc(sizeof(*iwpd), GFP_KERNEL);
+	if (!iwpd) {
+		err = -ENOMEM;
+		goto free_res;
+	}
+
+	sc_pd = &iwpd->sc_pd;
+	dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id);
+
+	if (context) {
+		memset(&uresp, 0, sizeof(uresp));
+		uresp.pd_id = pd_id;
+		if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
+			err = -EFAULT;
+			goto error;
+		}
+	}
+
+	i40iw_add_pdusecount(iwpd);
+	return &iwpd->ibpd;
+error:
+	kfree(iwpd);
+free_res:
+	i40iw_free_resource(iwdev, iwdev->allocated_pds, pd_id);
+	return ERR_PTR(err);
+}
+
+/**
+ * i40iw_dealloc_pd - deallocate pd
+ * @ibpd: ptr of pd to be deallocated
+ */
+static int i40iw_dealloc_pd(struct ib_pd *ibpd)
+{
+	struct i40iw_pd *iwpd = to_iwpd(ibpd);
+	struct i40iw_device *iwdev = to_iwdev(ibpd->device);
+
+	i40iw_rem_pdusecount(iwpd, iwdev);
+	return 0;
+}
+
+/**
+ * i40iw_qp_roundup - return round up qp ring size
+ * @wr_ring_size: ring size to round up
+ */
+static int i40iw_qp_roundup(u32 wr_ring_size)
+{
+	int scount = 1;
+
+	if (wr_ring_size < I40IWQP_SW_MIN_WQSIZE)
+		wr_ring_size = I40IWQP_SW_MIN_WQSIZE;
+
+	for (wr_ring_size--; scount <= 16; scount *= 2)
+		wr_ring_size |= wr_ring_size >> scount;
+	return ++wr_ring_size;
+}
+
+/**
+ * i40iw_get_pbl - Retrieve pbl from a list given a virtual
+ * address
+ * @va: user virtual address
+ * @pbl_list: pbl list to search in (QP's or CQ's)
+ */
+static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
+				       struct list_head *pbl_list)
+{
+	struct i40iw_pbl *iwpbl;
+
+	list_for_each_entry(iwpbl, pbl_list, list) {
+		if (iwpbl->user_base == va) {
+			list_del(&iwpbl->list);
+			return iwpbl;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * i40iw_free_qp_resources - free up memory resources for qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @qp_num: qp number assigned
+ */
+void i40iw_free_qp_resources(struct i40iw_device *iwdev,
+			     struct i40iw_qp *iwqp,
+			     u32 qp_num)
+{
+	i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
+	if (qp_num)
+		i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num);
+	i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->q2_ctx_mem);
+	i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem);
+	kfree(iwqp->kqp.wrid_mem);
+	iwqp->kqp.wrid_mem = NULL;
+	kfree(iwqp->allocated_buffer);
+	iwqp->allocated_buffer = NULL;
+}
+
+/**
+ * i40iw_clean_cqes - clean cq entries for qp
+ * @iwqp: qp ptr (user or kernel)
+ * @iwcq: cq ptr
+ */
+static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq)
+{
+	struct i40iw_cq_uk *ukcq = &iwcq->sc_cq.cq_uk;
+
+	ukcq->ops.iw_cq_clean(&iwqp->sc_qp.qp_uk, ukcq);
+}
+
+/**
+ * i40iw_destroy_qp - destroy qp
+ * @ibqp: qp's ib pointer also to get to device's qp address
+ */
+static int i40iw_destroy_qp(struct ib_qp *ibqp)
+{
+	struct i40iw_qp *iwqp = to_iwqp(ibqp);
+
+	iwqp->destroyed = 1;
+
+	if (iwqp->ibqp_state >= IB_QPS_INIT && iwqp->ibqp_state < IB_QPS_RTS)
+		i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 0, 0, 0);
+
+	if (!iwqp->user_mode) {
+		if (iwqp->iwscq) {
+			i40iw_clean_cqes(iwqp, iwqp->iwscq);
+			if (iwqp->iwrcq != iwqp->iwscq)
+				i40iw_clean_cqes(iwqp, iwqp->iwrcq);
+		}
+	}
+
+	i40iw_rem_ref(&iwqp->ibqp);
+	return 0;
+}
+
+/**
+ * i40iw_setup_virt_qp - setup for allocation of virtual qp
+ * @dev: iwarp device
+ * @qp: qp ptr
+ * @init_info: initialize info to return
+ */
+static int i40iw_setup_virt_qp(struct i40iw_device *iwdev,
+			       struct i40iw_qp *iwqp,
+			       struct i40iw_qp_init_info *init_info)
+{
+	struct i40iw_pbl *iwpbl = iwqp->iwpbl;
+	struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
+
+	iwqp->page = qpmr->sq_page;
+	init_info->shadow_area_pa = cpu_to_le64(qpmr->shadow);
+	if (iwpbl->pbl_allocated) {
+		init_info->virtual_map = true;
+		init_info->sq_pa = qpmr->sq_pbl.idx;
+		init_info->rq_pa = qpmr->rq_pbl.idx;
+	} else {
+		init_info->sq_pa = qpmr->sq_pbl.addr;
+		init_info->rq_pa = qpmr->rq_pbl.addr;
+	}
+	return 0;
+}
+
+/**
+ * i40iw_setup_kmode_qp - setup initialization for kernel mode qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: initialize info to return
+ */
+static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
+				struct i40iw_qp *iwqp,
+				struct i40iw_qp_init_info *info)
+{
+	struct i40iw_dma_mem *mem = &iwqp->kqp.dma_mem;
+	u32 sqdepth, rqdepth;
+	u32 sq_size, rq_size;
+	u8 sqshift, rqshift;
+	u32 size;
+	enum i40iw_status_code status;
+	struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
+
+	ukinfo->max_sq_frag_cnt = I40IW_MAX_WQ_FRAGMENT_COUNT;
+
+	sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1);
+	rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1);
+
+	status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, &sqshift);
+	if (!status)
+		status = i40iw_get_wqe_shift(rq_size, ukinfo->max_rq_frag_cnt, &rqshift);
+
+	if (status)
+		return -ENOSYS;
+
+	sqdepth = sq_size << sqshift;
+	rqdepth = rq_size << rqshift;
+
+	size = sqdepth * sizeof(struct i40iw_sq_uk_wr_trk_info) + (rqdepth << 3);
+	iwqp->kqp.wrid_mem = kzalloc(size, GFP_KERNEL);
+
+	ukinfo->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)iwqp->kqp.wrid_mem;
+	if (!ukinfo->sq_wrtrk_array)
+		return -ENOMEM;
+
+	ukinfo->rq_wrid_array = (u64 *)&ukinfo->sq_wrtrk_array[sqdepth];
+
+	size = (sqdepth + rqdepth) * I40IW_QP_WQE_MIN_SIZE;
+	size += (I40IW_SHADOW_AREA_SIZE << 3);
+
+	status = i40iw_allocate_dma_mem(iwdev->sc_dev.hw, mem, size, 256);
+	if (status) {
+		kfree(ukinfo->sq_wrtrk_array);
+		ukinfo->sq_wrtrk_array = NULL;
+		return -ENOMEM;
+	}
+
+	ukinfo->sq = mem->va;
+	info->sq_pa = mem->pa;
+
+	ukinfo->rq = &ukinfo->sq[sqdepth];
+	info->rq_pa = info->sq_pa + (sqdepth * I40IW_QP_WQE_MIN_SIZE);
+
+	ukinfo->shadow_area = ukinfo->rq[rqdepth].elem;
+	info->shadow_area_pa = info->rq_pa + (rqdepth * I40IW_QP_WQE_MIN_SIZE);
+
+	ukinfo->sq_size = sq_size;
+	ukinfo->rq_size = rq_size;
+	ukinfo->qp_id = iwqp->ibqp.qp_num;
+	return 0;
+}
+
+/**
+ * i40iw_create_qp - create qp
+ * @ibpd: ptr of pd
+ * @init_attr: attributes for qp
+ * @udata: user data for create qp
+ */
+static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
+				     struct ib_qp_init_attr *init_attr,
+				     struct ib_udata *udata)
+{
+	struct i40iw_pd *iwpd = to_iwpd(ibpd);
+	struct i40iw_device *iwdev = to_iwdev(ibpd->device);
+	struct i40iw_cqp *iwcqp = &iwdev->cqp;
+	struct i40iw_qp *iwqp;
+	struct i40iw_ucontext *ucontext;
+	struct i40iw_create_qp_req req;
+	struct i40iw_create_qp_resp uresp;
+	u32 qp_num = 0;
+	void *mem;
+	enum i40iw_status_code ret;
+	int err_code;
+	int sq_size;
+	int rq_size;
+	struct i40iw_sc_qp *qp;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_qp_init_info init_info;
+	struct i40iw_create_qp_info *qp_info;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	struct i40iw_qp_host_ctx_info *ctx_info;
+	struct i40iwarp_offload_info *iwarp_info;
+	unsigned long flags;
+
+	if (init_attr->create_flags)
+		return ERR_PTR(-EINVAL);
+	if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
+		init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
+
+	memset(&init_info, 0, sizeof(init_info));
+
+	sq_size = init_attr->cap.max_send_wr;
+	rq_size = init_attr->cap.max_recv_wr;
+
+	init_info.qp_uk_init_info.sq_size = sq_size;
+	init_info.qp_uk_init_info.rq_size = rq_size;
+	init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
+	init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
+
+	mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
+	if (!mem)
+		return ERR_PTR(-ENOMEM);
+
+	iwqp = (struct i40iw_qp *)mem;
+	qp = &iwqp->sc_qp;
+	qp->back_qp = (void *)iwqp;
+	qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
+
+	iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
+
+	if (i40iw_allocate_dma_mem(dev->hw,
+				   &iwqp->q2_ctx_mem,
+				   I40IW_Q2_BUFFER_SIZE + I40IW_QP_CTX_SIZE,
+				   256)) {
+		i40iw_pr_err("dma_mem failed\n");
+		err_code = -ENOMEM;
+		goto error;
+	}
+
+	init_info.q2 = iwqp->q2_ctx_mem.va;
+	init_info.q2_pa = iwqp->q2_ctx_mem.pa;
+
+	init_info.host_ctx = (void *)init_info.q2 + I40IW_Q2_BUFFER_SIZE;
+	init_info.host_ctx_pa = init_info.q2_pa + I40IW_Q2_BUFFER_SIZE;
+
+	err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_qps, iwdev->max_qp,
+					&qp_num, &iwdev->next_qp);
+	if (err_code) {
+		i40iw_pr_err("qp resource\n");
+		goto error;
+	}
+
+	iwqp->allocated_buffer = mem;
+	iwqp->iwdev = iwdev;
+	iwqp->iwpd = iwpd;
+	iwqp->ibqp.qp_num = qp_num;
+	qp = &iwqp->sc_qp;
+	iwqp->iwscq = to_iwcq(init_attr->send_cq);
+	iwqp->iwrcq = to_iwcq(init_attr->recv_cq);
+
+	iwqp->host_ctx.va = init_info.host_ctx;
+	iwqp->host_ctx.pa = init_info.host_ctx_pa;
+	iwqp->host_ctx.size = I40IW_QP_CTX_SIZE;
+
+	init_info.pd = &iwpd->sc_pd;
+	init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num;
+	iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
+
+	if (init_attr->qp_type != IB_QPT_RC) {
+		err_code = -ENOSYS;
+		goto error;
+	}
+	if (iwdev->push_mode)
+		i40iw_alloc_push_page(iwdev, qp);
+	if (udata) {
+		err_code = ib_copy_from_udata(&req, udata, sizeof(req));
+		if (err_code) {
+			i40iw_pr_err("ib_copy_from_data\n");
+			goto error;
+		}
+		iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
+		if (ibpd->uobject && ibpd->uobject->context) {
+			iwqp->user_mode = 1;
+			ucontext = to_ucontext(ibpd->uobject->context);
+
+			if (req.user_wqe_buffers) {
+				spin_lock_irqsave(
+				    &ucontext->qp_reg_mem_list_lock, flags);
+				iwqp->iwpbl = i40iw_get_pbl(
+				    (unsigned long)req.user_wqe_buffers,
+				    &ucontext->qp_reg_mem_list);
+				spin_unlock_irqrestore(
+				    &ucontext->qp_reg_mem_list_lock, flags);
+
+				if (!iwqp->iwpbl) {
+					err_code = -ENODATA;
+					i40iw_pr_err("no pbl info\n");
+					goto error;
+				}
+			}
+		}
+		err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info);
+	} else {
+		err_code = i40iw_setup_kmode_qp(iwdev, iwqp, &init_info);
+	}
+
+	if (err_code) {
+		i40iw_pr_err("setup qp failed\n");
+		goto error;
+	}
+
+	init_info.type = I40IW_QP_TYPE_IWARP;
+	ret = dev->iw_priv_qp_ops->qp_init(qp, &init_info);
+	if (ret) {
+		err_code = -EPROTO;
+		i40iw_pr_err("qp_init fail\n");
+		goto error;
+	}
+	ctx_info = &iwqp->ctx_info;
+	iwarp_info = &iwqp->iwarp_info;
+	iwarp_info->rd_enable = true;
+	iwarp_info->wr_rdresp_en = true;
+	if (!iwqp->user_mode)
+		iwarp_info->priv_mode_en = true;
+	iwarp_info->ddp_ver = 1;
+	iwarp_info->rdmap_ver = 1;
+
+	ctx_info->iwarp_info_valid = true;
+	ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+	ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+	if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) {
+		ctx_info->push_mode_en = false;
+	} else {
+		ctx_info->push_mode_en = true;
+		ctx_info->push_idx = qp->push_idx;
+	}
+
+	ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+					     (u64 *)iwqp->host_ctx.va,
+					     ctx_info);
+	ctx_info->iwarp_info_valid = false;
+	cqp_request = i40iw_get_cqp_request(iwcqp, true);
+	if (!cqp_request) {
+		err_code = -ENOMEM;
+		goto error;
+	}
+	cqp_info = &cqp_request->info;
+	qp_info = &cqp_request->info.in.u.qp_create.info;
+
+	memset(qp_info, 0, sizeof(*qp_info));
+
+	qp_info->cq_num_valid = true;
+	qp_info->next_iwarp_state = I40IW_QP_STATE_IDLE;
+
+	cqp_info->cqp_cmd = OP_QP_CREATE;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.qp_create.qp = qp;
+	cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+	ret = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (ret) {
+		i40iw_pr_err("CQP-OP QP create fail");
+		err_code = -EACCES;
+		goto error;
+	}
+
+	i40iw_add_ref(&iwqp->ibqp);
+	spin_lock_init(&iwqp->lock);
+	iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
+	iwdev->qp_table[qp_num] = iwqp;
+	i40iw_add_pdusecount(iwqp->iwpd);
+	if (ibpd->uobject && udata) {
+		memset(&uresp, 0, sizeof(uresp));
+		uresp.actual_sq_size = sq_size;
+		uresp.actual_rq_size = rq_size;
+		uresp.qp_id = qp_num;
+		uresp.push_idx = qp->push_idx;
+		err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+		if (err_code) {
+			i40iw_pr_err("copy_to_udata failed\n");
+			i40iw_destroy_qp(&iwqp->ibqp);
+			   /* let the completion of the qp destroy free the qp */
+			return ERR_PTR(err_code);
+		}
+	}
+
+	return &iwqp->ibqp;
+error:
+	i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+	kfree(mem);
+	return ERR_PTR(err_code);
+}
+
+/**
+ * i40iw_query - query qp attributes
+ * @ibqp: qp pointer
+ * @attr: attributes pointer
+ * @attr_mask: Not used
+ * @init_attr: qp attributes to return
+ */
+static int i40iw_query_qp(struct ib_qp *ibqp,
+			  struct ib_qp_attr *attr,
+			  int attr_mask,
+			  struct ib_qp_init_attr *init_attr)
+{
+	struct i40iw_qp *iwqp = to_iwqp(ibqp);
+	struct i40iw_sc_qp *qp = &iwqp->sc_qp;
+
+	attr->qp_access_flags = 0;
+	attr->cap.max_send_wr = qp->qp_uk.sq_size;
+	attr->cap.max_recv_wr = qp->qp_uk.rq_size;
+	attr->cap.max_recv_sge = 1;
+	attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
+	init_attr->event_handler = iwqp->ibqp.event_handler;
+	init_attr->qp_context = iwqp->ibqp.qp_context;
+	init_attr->send_cq = iwqp->ibqp.send_cq;
+	init_attr->recv_cq = iwqp->ibqp.recv_cq;
+	init_attr->srq = iwqp->ibqp.srq;
+	init_attr->cap = attr->cap;
+	return 0;
+}
+
+/**
+ * i40iw_hw_modify_qp - setup cqp for modify qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: info for modify qp
+ * @wait: flag to wait or not for modify qp completion
+ */
+void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
+			struct i40iw_modify_qp_info *info, bool wait)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_modify_qp_info *m_info;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
+	if (!cqp_request)
+		return;
+
+	cqp_info = &cqp_request->info;
+	m_info = &cqp_info->in.u.qp_modify.info;
+	memcpy(m_info, info, sizeof(*m_info));
+	cqp_info->cqp_cmd = OP_QP_MODIFY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
+	cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Modify QP fail");
+}
+
+/**
+ * i40iw_modify_qp - modify qp request
+ * @ibqp: qp's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		    int attr_mask, struct ib_udata *udata)
+{
+	struct i40iw_qp *iwqp = to_iwqp(ibqp);
+	struct i40iw_device *iwdev = iwqp->iwdev;
+	struct i40iw_qp_host_ctx_info *ctx_info;
+	struct i40iwarp_offload_info *iwarp_info;
+	struct i40iw_modify_qp_info info;
+	u8 issue_modify_qp = 0;
+	u8 dont_wait = 0;
+	u32 err;
+	unsigned long flags;
+
+	memset(&info, 0, sizeof(info));
+	ctx_info = &iwqp->ctx_info;
+	iwarp_info = &iwqp->iwarp_info;
+
+	spin_lock_irqsave(&iwqp->lock, flags);
+
+	if (attr_mask & IB_QP_STATE) {
+		switch (attr->qp_state) {
+		case IB_QPS_INIT:
+		case IB_QPS_RTR:
+			if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_IDLE) {
+				err = -EINVAL;
+				goto exit;
+			}
+			if (iwqp->iwarp_state == I40IW_QP_STATE_INVALID) {
+				info.next_iwarp_state = I40IW_QP_STATE_IDLE;
+				issue_modify_qp = 1;
+			}
+			break;
+		case IB_QPS_RTS:
+			if ((iwqp->iwarp_state > (u32)I40IW_QP_STATE_RTS) ||
+			    (!iwqp->cm_id)) {
+				err = -EINVAL;
+				goto exit;
+			}
+
+			issue_modify_qp = 1;
+			iwqp->hw_tcp_state = I40IW_TCP_STATE_ESTABLISHED;
+			iwqp->hte_added = 1;
+			info.next_iwarp_state = I40IW_QP_STATE_RTS;
+			info.tcp_ctx_valid = true;
+			info.ord_valid = true;
+			info.arp_cache_idx_valid = true;
+			info.cq_num_valid = true;
+			break;
+		case IB_QPS_SQD:
+			if (iwqp->hw_iwarp_state > (u32)I40IW_QP_STATE_RTS) {
+				err = 0;
+				goto exit;
+			}
+			if ((iwqp->iwarp_state == (u32)I40IW_QP_STATE_CLOSING) ||
+			    (iwqp->iwarp_state < (u32)I40IW_QP_STATE_RTS)) {
+				err = 0;
+				goto exit;
+			}
+			if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_CLOSING) {
+				err = -EINVAL;
+				goto exit;
+			}
+			info.next_iwarp_state = I40IW_QP_STATE_CLOSING;
+			issue_modify_qp = 1;
+			break;
+		case IB_QPS_SQE:
+			if (iwqp->iwarp_state >= (u32)I40IW_QP_STATE_TERMINATE) {
+				err = -EINVAL;
+				goto exit;
+			}
+			info.next_iwarp_state = I40IW_QP_STATE_TERMINATE;
+			issue_modify_qp = 1;
+			break;
+		case IB_QPS_ERR:
+		case IB_QPS_RESET:
+			if (iwqp->iwarp_state == (u32)I40IW_QP_STATE_ERROR) {
+				err = -EINVAL;
+				goto exit;
+			}
+			if (iwqp->sc_qp.term_flags)
+				del_timer(&iwqp->terminate_timer);
+			info.next_iwarp_state = I40IW_QP_STATE_ERROR;
+			if ((iwqp->hw_tcp_state > I40IW_TCP_STATE_CLOSED) &&
+			    iwdev->iw_status &&
+			    (iwqp->hw_tcp_state != I40IW_TCP_STATE_TIME_WAIT))
+				info.reset_tcp_conn = true;
+			else
+				dont_wait = 1;
+			issue_modify_qp = 1;
+			info.next_iwarp_state = I40IW_QP_STATE_ERROR;
+			break;
+		default:
+			err = -EINVAL;
+			goto exit;
+		}
+
+		iwqp->ibqp_state = attr->qp_state;
+
+		if (issue_modify_qp)
+			iwqp->iwarp_state = info.next_iwarp_state;
+		else
+			info.next_iwarp_state = iwqp->iwarp_state;
+	}
+	if (attr_mask & IB_QP_ACCESS_FLAGS) {
+		ctx_info->iwarp_info_valid = true;
+		if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
+			iwarp_info->wr_rdresp_en = true;
+		if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+			iwarp_info->wr_rdresp_en = true;
+		if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+			iwarp_info->rd_enable = true;
+		if (attr->qp_access_flags & IB_ACCESS_MW_BIND)
+			iwarp_info->bind_en = true;
+
+		if (iwqp->user_mode) {
+			iwarp_info->rd_enable = true;
+			iwarp_info->wr_rdresp_en = true;
+			iwarp_info->priv_mode_en = false;
+		}
+	}
+
+	if (ctx_info->iwarp_info_valid) {
+		struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+		int ret;
+
+		ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+		ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+		ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+						     (u64 *)iwqp->host_ctx.va,
+						     ctx_info);
+		if (ret) {
+			i40iw_pr_err("setting QP context\n");
+			err = -EINVAL;
+			goto exit;
+		}
+	}
+
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+
+	if (issue_modify_qp)
+		i40iw_hw_modify_qp(iwdev, iwqp, &info, true);
+
+	if (issue_modify_qp && (iwqp->ibqp_state > IB_QPS_RTS)) {
+		if (dont_wait) {
+			if (iwqp->cm_id && iwqp->hw_tcp_state) {
+				spin_lock_irqsave(&iwqp->lock, flags);
+				iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
+				iwqp->last_aeq = I40IW_AE_RESET_SENT;
+				spin_unlock_irqrestore(&iwqp->lock, flags);
+			}
+		}
+	}
+	return 0;
+exit:
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+	return err;
+}
+
+/**
+ * cq_free_resources - free up recources for cq
+ * @iwdev: iwarp device
+ * @iwcq: cq ptr
+ */
+static void cq_free_resources(struct i40iw_device *iwdev, struct i40iw_cq *iwcq)
+{
+	struct i40iw_sc_cq *cq = &iwcq->sc_cq;
+
+	if (!iwcq->user_mode)
+		i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwcq->kmem);
+	i40iw_free_resource(iwdev, iwdev->allocated_cqs, cq->cq_uk.cq_id);
+}
+
+/**
+ * cq_wq_destroy - send cq destroy cqp
+ * @iwdev: iwarp device
+ * @cq: hardware control cq
+ */
+static void cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
+{
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return;
+
+	cqp_info = &cqp_request->info;
+
+	cqp_info->cqp_cmd = OP_CQ_DESTROY;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.cq_destroy.cq = cq;
+	cqp_info->in.u.cq_destroy.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP Destroy QP fail");
+}
+
+/**
+ * i40iw_destroy_cq - destroy cq
+ * @ib_cq: cq pointer
+ */
+static int i40iw_destroy_cq(struct ib_cq *ib_cq)
+{
+	struct i40iw_cq *iwcq;
+	struct i40iw_device *iwdev;
+	struct i40iw_sc_cq *cq;
+
+	if (!ib_cq) {
+		i40iw_pr_err("ib_cq == NULL\n");
+		return 0;
+	}
+
+	iwcq = to_iwcq(ib_cq);
+	iwdev = to_iwdev(ib_cq->device);
+	cq = &iwcq->sc_cq;
+	cq_wq_destroy(iwdev, cq);
+	cq_free_resources(iwdev, iwcq);
+	kfree(iwcq);
+	return 0;
+}
+
+/**
+ * i40iw_create_cq - create cq
+ * @ibdev: device pointer from stack
+ * @attr: attributes for cq
+ * @context: user context created during alloc
+ * @udata: user data
+ */
+static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
+				     const struct ib_cq_init_attr *attr,
+				     struct ib_ucontext *context,
+				     struct ib_udata *udata)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+	struct i40iw_cq *iwcq;
+	struct i40iw_pbl *iwpbl;
+	u32 cq_num = 0;
+	struct i40iw_sc_cq *cq;
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_cq_init_info info;
+	enum i40iw_status_code status;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	struct i40iw_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
+	unsigned long flags;
+	int err_code;
+	int entries = attr->cqe;
+
+	if (entries > iwdev->max_cqe)
+		return ERR_PTR(-EINVAL);
+
+	iwcq = kzalloc(sizeof(*iwcq), GFP_KERNEL);
+	if (!iwcq)
+		return ERR_PTR(-ENOMEM);
+
+	memset(&info, 0, sizeof(info));
+
+	err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_cqs,
+					iwdev->max_cq, &cq_num,
+					&iwdev->next_cq);
+	if (err_code)
+		goto error;
+
+	cq = &iwcq->sc_cq;
+	cq->back_cq = (void *)iwcq;
+	spin_lock_init(&iwcq->lock);
+
+	info.dev = dev;
+	ukinfo->cq_size = max(entries, 4);
+	ukinfo->cq_id = cq_num;
+	iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
+	info.ceqe_mask = 0;
+	info.ceq_id = 0;
+	info.ceq_id_valid = true;
+	info.ceqe_mask = 1;
+	info.type = I40IW_CQ_TYPE_IWARP;
+	if (context) {
+		struct i40iw_ucontext *ucontext;
+		struct i40iw_create_cq_req req;
+		struct i40iw_cq_mr *cqmr;
+
+		memset(&req, 0, sizeof(req));
+		iwcq->user_mode = true;
+		ucontext = to_ucontext(context);
+		if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req)))
+			goto cq_free_resources;
+
+		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+		iwpbl = i40iw_get_pbl((unsigned long)req.user_cq_buffer,
+				      &ucontext->cq_reg_mem_list);
+		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+		if (!iwpbl) {
+			err_code = -EPROTO;
+			goto cq_free_resources;
+		}
+
+		iwcq->iwpbl = iwpbl;
+		iwcq->cq_mem_size = 0;
+		cqmr = &iwpbl->cq_mr;
+		info.shadow_area_pa = cpu_to_le64(cqmr->shadow);
+		if (iwpbl->pbl_allocated) {
+			info.virtual_map = true;
+			info.pbl_chunk_size = 1;
+			info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
+		} else {
+			info.cq_base_pa = cqmr->cq_pbl.addr;
+		}
+	} else {
+		/* Kmode allocations */
+		int rsize;
+		int shadow;
+
+		rsize = info.cq_uk_init_info.cq_size * sizeof(struct i40iw_cqe);
+		rsize = round_up(rsize, 256);
+		shadow = I40IW_SHADOW_AREA_SIZE << 3;
+		status = i40iw_allocate_dma_mem(dev->hw, &iwcq->kmem,
+						rsize + shadow, 256);
+		if (status) {
+			err_code = -ENOMEM;
+			goto cq_free_resources;
+		}
+		ukinfo->cq_base = iwcq->kmem.va;
+		info.cq_base_pa = iwcq->kmem.pa;
+		info.shadow_area_pa = info.cq_base_pa + rsize;
+		ukinfo->shadow_area = iwcq->kmem.va + rsize;
+	}
+
+	if (dev->iw_priv_cq_ops->cq_init(cq, &info)) {
+		i40iw_pr_err("init cq fail\n");
+		err_code = -EPROTO;
+		goto cq_free_resources;
+	}
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request) {
+		err_code = -ENOMEM;
+		goto cq_free_resources;
+	}
+
+	cqp_info = &cqp_request->info;
+	cqp_info->cqp_cmd = OP_CQ_CREATE;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.cq_create.cq = cq;
+	cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status) {
+		i40iw_pr_err("CQP-OP Create QP fail");
+		err_code = -EPROTO;
+		goto cq_free_resources;
+	}
+
+	if (context) {
+		struct i40iw_create_cq_resp resp;
+
+		memset(&resp, 0, sizeof(resp));
+		resp.cq_id = info.cq_uk_init_info.cq_id;
+		resp.cq_size = info.cq_uk_init_info.cq_size;
+		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
+			i40iw_pr_err("copy to user data\n");
+			err_code = -EPROTO;
+			goto cq_destroy;
+		}
+	}
+
+	return (struct ib_cq *)iwcq;
+
+cq_destroy:
+	cq_wq_destroy(iwdev, cq);
+cq_free_resources:
+	cq_free_resources(iwdev, iwcq);
+error:
+	kfree(iwcq);
+	return ERR_PTR(err_code);
+}
+
+/**
+ * i40iw_get_user_access - get hw access from IB access
+ * @acc: IB access to return hw access
+ */
+static inline u16 i40iw_get_user_access(int acc)
+{
+	u16 access = 0;
+
+	access |= (acc & IB_ACCESS_LOCAL_WRITE) ? I40IW_ACCESS_FLAGS_LOCALWRITE : 0;
+	access |= (acc & IB_ACCESS_REMOTE_WRITE) ? I40IW_ACCESS_FLAGS_REMOTEWRITE : 0;
+	access |= (acc & IB_ACCESS_REMOTE_READ) ? I40IW_ACCESS_FLAGS_REMOTEREAD : 0;
+	access |= (acc & IB_ACCESS_MW_BIND) ? I40IW_ACCESS_FLAGS_BIND_WINDOW : 0;
+	return access;
+}
+
+/**
+ * i40iw_free_stag - free stag resource
+ * @iwdev: iwarp device
+ * @stag: stag to free
+ */
+static void i40iw_free_stag(struct i40iw_device *iwdev, u32 stag)
+{
+	u32 stag_idx;
+
+	stag_idx = (stag & iwdev->mr_stagmask) >> I40IW_CQPSQ_STAG_IDX_SHIFT;
+	i40iw_free_resource(iwdev, iwdev->allocated_mrs, stag_idx);
+}
+
+/**
+ * i40iw_create_stag - create random stag
+ * @iwdev: iwarp device
+ */
+static u32 i40iw_create_stag(struct i40iw_device *iwdev)
+{
+	u32 stag = 0;
+	u32 stag_index = 0;
+	u32 next_stag_index;
+	u32 driver_key;
+	u32 random;
+	u8 consumer_key;
+	int ret;
+
+	get_random_bytes(&random, sizeof(random));
+	consumer_key = (u8)random;
+
+	driver_key = random & ~iwdev->mr_stagmask;
+	next_stag_index = (random & iwdev->mr_stagmask) >> 8;
+	next_stag_index %= iwdev->max_mr;
+
+	ret = i40iw_alloc_resource(iwdev,
+				   iwdev->allocated_mrs, iwdev->max_mr,
+				   &stag_index, &next_stag_index);
+	if (!ret) {
+		stag = stag_index << I40IW_CQPSQ_STAG_IDX_SHIFT;
+		stag |= driver_key;
+		stag += (u32)consumer_key;
+	}
+	return stag;
+}
+
+/**
+ * i40iw_next_pbl_addr - Get next pbl address
+ * @palloc: Poiner to allocated pbles
+ * @pbl: pointer to a pble
+ * @pinfo: info pointer
+ * @idx: index
+ */
+static inline u64 *i40iw_next_pbl_addr(struct i40iw_pble_alloc *palloc,
+				       u64 *pbl,
+				       struct i40iw_pble_info **pinfo,
+				       u32 *idx)
+{
+	*idx += 1;
+	if ((!(*pinfo)) || (*idx != (*pinfo)->cnt))
+		return ++pbl;
+	*idx = 0;
+	(*pinfo)++;
+	return (u64 *)(*pinfo)->addr;
+}
+
+/**
+ * i40iw_copy_user_pgaddrs - copy user page address to pble's os locally
+ * @iwmr: iwmr for IB's user page addresses
+ * @pbl: ple pointer to save 1 level or 0 level pble
+ * @level: indicated level 0, 1 or 2
+ */
+static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
+				    u64 *pbl,
+				    enum i40iw_pble_level level)
+{
+	struct ib_umem *region = iwmr->region;
+	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+	int chunk_pages, entry, pg_shift, i;
+	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+	struct i40iw_pble_info *pinfo;
+	struct scatterlist *sg;
+	u32 idx = 0;
+
+	pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
+	pg_shift = ffs(region->page_size) - 1;
+	for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+		chunk_pages = sg_dma_len(sg) >> pg_shift;
+		if ((iwmr->type == IW_MEMREG_TYPE_QP) &&
+		    !iwpbl->qp_mr.sq_page)
+			iwpbl->qp_mr.sq_page = sg_page(sg);
+		for (i = 0; i < chunk_pages; i++) {
+			*pbl = cpu_to_le64(sg_dma_address(sg) + region->page_size * i);
+			pbl = i40iw_next_pbl_addr(palloc, pbl, &pinfo, &idx);
+		}
+	}
+}
+
+/**
+ * i40iw_setup_pbles - copy user pg address to pble's
+ * @iwdev: iwarp device
+ * @iwmr: mr pointer for this memory registration
+ * @use_pbles: flag if to use pble's or memory (level 0)
+ */
+static int i40iw_setup_pbles(struct i40iw_device *iwdev,
+			     struct i40iw_mr *iwmr,
+			     bool use_pbles)
+{
+	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+	struct i40iw_pble_info *pinfo;
+	u64 *pbl;
+	enum i40iw_status_code status;
+	enum i40iw_pble_level level = I40IW_LEVEL_1;
+
+	if (!use_pbles && (iwmr->page_cnt > MAX_SAVE_PAGE_ADDRS))
+		return -ENOMEM;
+
+	if (use_pbles) {
+		mutex_lock(&iwdev->pbl_mutex);
+		status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
+		mutex_unlock(&iwdev->pbl_mutex);
+		if (status)
+			return -ENOMEM;
+
+		iwpbl->pbl_allocated = true;
+		level = palloc->level;
+		pinfo = (level == I40IW_LEVEL_1) ? &palloc->level1 : palloc->level2.leaf;
+		pbl = (u64 *)pinfo->addr;
+	} else {
+		pbl = iwmr->pgaddrmem;
+	}
+
+	i40iw_copy_user_pgaddrs(iwmr, pbl, level);
+	return 0;
+}
+
+/**
+ * i40iw_handle_q_mem - handle memory for qp and cq
+ * @iwdev: iwarp device
+ * @req: information for q memory management
+ * @iwpbl: pble struct
+ * @use_pbles: flag to use pble
+ */
+static int i40iw_handle_q_mem(struct i40iw_device *iwdev,
+			      struct i40iw_mem_reg_req *req,
+			      struct i40iw_pbl *iwpbl,
+			      bool use_pbles)
+{
+	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+	struct i40iw_mr *iwmr = iwpbl->iwmr;
+	struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
+	struct i40iw_cq_mr *cqmr = &iwpbl->cq_mr;
+	struct i40iw_hmc_pble *hmc_p;
+	u64 *arr = iwmr->pgaddrmem;
+	int err;
+	int total;
+
+	total = req->sq_pages + req->rq_pages + req->cq_pages;
+
+	err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
+	if (err)
+		return err;
+	if (use_pbles && (palloc->level != I40IW_LEVEL_1)) {
+		i40iw_free_pble(iwdev->pble_rsrc, palloc);
+		iwpbl->pbl_allocated = false;
+		return -ENOMEM;
+	}
+
+	if (use_pbles)
+		arr = (u64 *)palloc->level1.addr;
+	if (req->reg_type == IW_MEMREG_TYPE_QP) {
+		hmc_p = &qpmr->sq_pbl;
+		qpmr->shadow = (dma_addr_t)arr[total];
+		if (use_pbles) {
+			hmc_p->idx = palloc->level1.idx;
+			hmc_p = &qpmr->rq_pbl;
+			hmc_p->idx = palloc->level1.idx + req->sq_pages;
+		} else {
+			hmc_p->addr = arr[0];
+			hmc_p = &qpmr->rq_pbl;
+			hmc_p->addr = arr[1];
+		}
+	} else {		/* CQ */
+		hmc_p = &cqmr->cq_pbl;
+		cqmr->shadow = (dma_addr_t)arr[total];
+		if (use_pbles)
+			hmc_p->idx = palloc->level1.idx;
+		else
+			hmc_p->addr = arr[0];
+	}
+	return err;
+}
+
+/**
+ * i40iw_hwreg_mr - send cqp command for memory registration
+ * @iwdev: iwarp device
+ * @iwmr: iwarp mr pointer
+ * @access: access for MR
+ */
+static int i40iw_hwreg_mr(struct i40iw_device *iwdev,
+			  struct i40iw_mr *iwmr,
+			  u16 access)
+{
+	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+	struct i40iw_reg_ns_stag_info *stag_info;
+	struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+	enum i40iw_status_code status;
+	int err = 0;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return -ENOMEM;
+
+	cqp_info = &cqp_request->info;
+	stag_info = &cqp_info->in.u.mr_reg_non_shared.info;
+	memset(stag_info, 0, sizeof(*stag_info));
+	stag_info->va = (void *)(unsigned long)iwpbl->user_base;
+	stag_info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
+	stag_info->stag_key = (u8)iwmr->stag;
+	stag_info->total_len = iwmr->length;
+	stag_info->access_rights = access;
+	stag_info->pd_id = iwpd->sc_pd.pd_id;
+	stag_info->addr_type = I40IW_ADDR_TYPE_VA_BASED;
+
+	if (iwmr->page_cnt > 1) {
+		if (palloc->level == I40IW_LEVEL_1) {
+			stag_info->first_pm_pbl_index = palloc->level1.idx;
+			stag_info->chunk_size = 1;
+		} else {
+			stag_info->first_pm_pbl_index = palloc->level2.root.idx;
+			stag_info->chunk_size = 3;
+		}
+	} else {
+		stag_info->reg_addr_pa = iwmr->pgaddrmem[0];
+	}
+
+	cqp_info->cqp_cmd = OP_MR_REG_NON_SHARED;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->sc_dev;
+	cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request;
+
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status) {
+		err = -ENOMEM;
+		i40iw_pr_err("CQP-OP MR Reg fail");
+	}
+	return err;
+}
+
+/**
+ * i40iw_reg_user_mr - Register a user memory region
+ * @pd: ptr of pd
+ * @start: virtual start address
+ * @length: length of mr
+ * @virt: virtual address
+ * @acc: access of mr
+ * @udata: user data
+ */
+static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
+				       u64 start,
+				       u64 length,
+				       u64 virt,
+				       int acc,
+				       struct ib_udata *udata)
+{
+	struct i40iw_pd *iwpd = to_iwpd(pd);
+	struct i40iw_device *iwdev = to_iwdev(pd->device);
+	struct i40iw_ucontext *ucontext;
+	struct i40iw_pble_alloc *palloc;
+	struct i40iw_pbl *iwpbl;
+	struct i40iw_mr *iwmr;
+	struct ib_umem *region;
+	struct i40iw_mem_reg_req req;
+	u32 pbl_depth = 0;
+	u32 stag = 0;
+	u16 access;
+	u32 region_length;
+	bool use_pbles = false;
+	unsigned long flags;
+	int err = -ENOSYS;
+
+	region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+	if (IS_ERR(region))
+		return (struct ib_mr *)region;
+
+	if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+		ib_umem_release(region);
+		return ERR_PTR(-EFAULT);
+	}
+
+	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+	if (!iwmr) {
+		ib_umem_release(region);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	iwpbl = &iwmr->iwpbl;
+	iwpbl->iwmr = iwmr;
+	iwmr->region = region;
+	iwmr->ibmr.pd = pd;
+	iwmr->ibmr.device = pd->device;
+	ucontext = to_ucontext(pd->uobject->context);
+	region_length = region->length + (start & 0xfff);
+	pbl_depth = region_length >> 12;
+	pbl_depth += (region_length & (4096 - 1)) ? 1 : 0;
+	iwmr->length = region->length;
+
+	iwpbl->user_base = virt;
+	palloc = &iwpbl->pble_alloc;
+
+	iwmr->type = req.reg_type;
+	iwmr->page_cnt = pbl_depth;
+
+	switch (req.reg_type) {
+	case IW_MEMREG_TYPE_QP:
+		use_pbles = ((req.sq_pages + req.rq_pages) > 2);
+		err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+		if (err)
+			goto error;
+		spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+		list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
+		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+		break;
+	case IW_MEMREG_TYPE_CQ:
+		use_pbles = (req.cq_pages > 1);
+		err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
+		if (err)
+			goto error;
+
+		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+		list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
+		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+		break;
+	case IW_MEMREG_TYPE_MEM:
+		access = I40IW_ACCESS_FLAGS_LOCALREAD;
+
+		use_pbles = (iwmr->page_cnt != 1);
+		err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
+		if (err)
+			goto error;
+
+		access |= i40iw_get_user_access(acc);
+		stag = i40iw_create_stag(iwdev);
+		if (!stag) {
+			err = -ENOMEM;
+			goto error;
+		}
+
+		iwmr->stag = stag;
+		iwmr->ibmr.rkey = stag;
+		iwmr->ibmr.lkey = stag;
+
+		err = i40iw_hwreg_mr(iwdev, iwmr, access);
+		if (err) {
+			i40iw_free_stag(iwdev, stag);
+			goto error;
+		}
+		break;
+	default:
+		goto error;
+	}
+
+	iwmr->type = req.reg_type;
+	if (req.reg_type == IW_MEMREG_TYPE_MEM)
+		i40iw_add_pdusecount(iwpd);
+	return &iwmr->ibmr;
+
+error:
+	if (palloc->level != I40IW_LEVEL_0)
+		i40iw_free_pble(iwdev->pble_rsrc, palloc);
+	ib_umem_release(region);
+	kfree(iwmr);
+	return ERR_PTR(err);
+}
+
+/**
+ * i40iw_reg_phys_mr - register kernel physical memory
+ * @pd: ibpd pointer
+ * @addr: physical address of memory to register
+ * @size: size of memory to register
+ * @acc: Access rights
+ * @iova_start: start of virtual address for physical buffers
+ */
+struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *pd,
+				u64 addr,
+				u64 size,
+				int acc,
+				u64 *iova_start)
+{
+	struct i40iw_pd *iwpd = to_iwpd(pd);
+	struct i40iw_device *iwdev = to_iwdev(pd->device);
+	struct i40iw_pbl *iwpbl;
+	struct i40iw_mr *iwmr;
+	enum i40iw_status_code status;
+	u32 stag;
+	u16 access = I40IW_ACCESS_FLAGS_LOCALREAD;
+	int ret;
+
+	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+	if (!iwmr)
+		return ERR_PTR(-ENOMEM);
+	iwmr->ibmr.pd = pd;
+	iwmr->ibmr.device = pd->device;
+	iwpbl = &iwmr->iwpbl;
+	iwpbl->iwmr = iwmr;
+	iwmr->type = IW_MEMREG_TYPE_MEM;
+	iwpbl->user_base = *iova_start;
+	stag = i40iw_create_stag(iwdev);
+	if (!stag) {
+		ret = -EOVERFLOW;
+		goto err;
+	}
+	access |= i40iw_get_user_access(acc);
+	iwmr->stag = stag;
+	iwmr->ibmr.rkey = stag;
+	iwmr->ibmr.lkey = stag;
+	iwmr->page_cnt = 1;
+	iwmr->pgaddrmem[0]  = addr;
+	status = i40iw_hwreg_mr(iwdev, iwmr, access);
+	if (status) {
+		i40iw_free_stag(iwdev, stag);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	i40iw_add_pdusecount(iwpd);
+	return &iwmr->ibmr;
+ err:
+	kfree(iwmr);
+	return ERR_PTR(ret);
+}
+
+/**
+ * i40iw_get_dma_mr - register physical mem
+ * @pd: ptr of pd
+ * @acc: access for memory
+ */
+static struct ib_mr *i40iw_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	u64 kva = 0;
+
+	return i40iw_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
+}
+
+/**
+ * i40iw_del_mem_list - Deleting pbl list entries for CQ/QP
+ * @iwmr: iwmr for IB's user page addresses
+ * @ucontext: ptr to user context
+ */
+static void i40iw_del_memlist(struct i40iw_mr *iwmr,
+			      struct i40iw_ucontext *ucontext)
+{
+	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+	unsigned long flags;
+
+	switch (iwmr->type) {
+	case IW_MEMREG_TYPE_CQ:
+		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+		if (!list_empty(&ucontext->cq_reg_mem_list))
+			list_del(&iwpbl->list);
+		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+		break;
+	case IW_MEMREG_TYPE_QP:
+		spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+		if (!list_empty(&ucontext->qp_reg_mem_list))
+			list_del(&iwpbl->list);
+		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * i40iw_dereg_mr - deregister mr
+ * @ib_mr: mr ptr for dereg
+ */
+static int i40iw_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct ib_pd *ibpd = ib_mr->pd;
+	struct i40iw_pd *iwpd = to_iwpd(ibpd);
+	struct i40iw_mr *iwmr = to_iwmr(ib_mr);
+	struct i40iw_device *iwdev = to_iwdev(ib_mr->device);
+	enum i40iw_status_code status;
+	struct i40iw_dealloc_stag_info *info;
+	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
+	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
+	struct i40iw_cqp_request *cqp_request;
+	struct cqp_commands_info *cqp_info;
+	u32 stag_idx;
+
+	if (iwmr->region)
+		ib_umem_release(iwmr->region);
+
+	if (iwmr->type != IW_MEMREG_TYPE_MEM) {
+		if (ibpd->uobject) {
+			struct i40iw_ucontext *ucontext;
+
+			ucontext = to_ucontext(ibpd->uobject->context);
+			i40iw_del_memlist(iwmr, ucontext);
+		}
+		if (iwpbl->pbl_allocated)
+			i40iw_free_pble(iwdev->pble_rsrc, palloc);
+		kfree(iwpbl->iwmr);
+		iwpbl->iwmr = NULL;
+		return 0;
+	}
+
+	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
+	if (!cqp_request)
+		return -ENOMEM;
+
+	cqp_info = &cqp_request->info;
+	info = &cqp_info->in.u.dealloc_stag.info;
+	memset(info, 0, sizeof(*info));
+
+	info->pd_id = cpu_to_le32(iwpd->sc_pd.pd_id & 0x00007fff);
+	info->stag_idx = RS_64_1(ib_mr->rkey, I40IW_CQPSQ_STAG_IDX_SHIFT);
+	stag_idx = info->stag_idx;
+	info->mr = true;
+	if (iwpbl->pbl_allocated)
+		info->dealloc_pbl = true;
+
+	cqp_info->cqp_cmd = OP_DEALLOC_STAG;
+	cqp_info->post_sq = 1;
+	cqp_info->in.u.dealloc_stag.dev = &iwdev->sc_dev;
+	cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
+	status = i40iw_handle_cqp_op(iwdev, cqp_request);
+	if (status)
+		i40iw_pr_err("CQP-OP dealloc failed for stag_idx = 0x%x\n", stag_idx);
+	i40iw_rem_pdusecount(iwpd, iwdev);
+	i40iw_free_stag(iwdev, iwmr->stag);
+	if (iwpbl->pbl_allocated)
+		i40iw_free_pble(iwdev->pble_rsrc, palloc);
+	kfree(iwmr);
+	return 0;
+}
+
+/**
+ * i40iw_show_rev
+ */
+static ssize_t i40iw_show_rev(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct i40iw_ib_device *iwibdev = container_of(dev,
+						       struct i40iw_ib_device,
+						       ibdev.dev);
+	u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev;
+
+	return sprintf(buf, "%x\n", hw_rev);
+}
+
+/**
+ * i40iw_show_fw_ver
+ */
+static ssize_t i40iw_show_fw_ver(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	u32 firmware_version = I40IW_FW_VERSION;
+
+	return sprintf(buf, "%u.%u\n", firmware_version,
+		       (firmware_version & 0x000000ff));
+}
+
+/**
+ * i40iw_show_hca
+ */
+static ssize_t i40iw_show_hca(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "I40IW\n");
+}
+
+/**
+ * i40iw_show_board
+ */
+static ssize_t i40iw_show_board(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, i40iw_show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
+
+static struct device_attribute *i40iw_dev_attributes[] = {
+	&dev_attr_hw_rev,
+	&dev_attr_fw_ver,
+	&dev_attr_hca_type,
+	&dev_attr_board_id
+};
+
+/**
+ * i40iw_copy_sg_list - copy sg list for qp
+ * @sg_list: copied into sg_list
+ * @sgl: copy from sgl
+ * @num_sges: count of sg entries
+ */
+static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, int num_sges)
+{
+	unsigned int i;
+
+	for (i = 0; (i < num_sges) && (i < I40IW_MAX_WQ_FRAGMENT_COUNT); i++) {
+		sg_list[i].tag_off = sgl[i].addr;
+		sg_list[i].len = sgl[i].length;
+		sg_list[i].stag = sgl[i].lkey;
+	}
+}
+
+/**
+ * i40iw_post_send -  kernel application wr
+ * @ibqp: qp ptr for wr
+ * @ib_wr: work request ptr
+ * @bad_wr: return of bad wr if err
+ */
+static int i40iw_post_send(struct ib_qp *ibqp,
+			   struct ib_send_wr *ib_wr,
+			   struct ib_send_wr **bad_wr)
+{
+	struct i40iw_qp *iwqp;
+	struct i40iw_qp_uk *ukqp;
+	struct i40iw_post_sq_info info;
+	enum i40iw_status_code ret;
+	int err = 0;
+	unsigned long flags;
+
+	iwqp = (struct i40iw_qp *)ibqp;
+	ukqp = &iwqp->sc_qp.qp_uk;
+
+	spin_lock_irqsave(&iwqp->lock, flags);
+	while (ib_wr) {
+		memset(&info, 0, sizeof(info));
+		info.wr_id = (u64)(ib_wr->wr_id);
+		if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
+			info.signaled = true;
+		if (ib_wr->send_flags & IB_SEND_FENCE)
+			info.read_fence = true;
+
+		switch (ib_wr->opcode) {
+		case IB_WR_SEND:
+			if (ib_wr->send_flags & IB_SEND_SOLICITED)
+				info.op_type = I40IW_OP_TYPE_SEND_SOL;
+			else
+				info.op_type = I40IW_OP_TYPE_SEND;
+
+			if (ib_wr->send_flags & IB_SEND_INLINE) {
+				info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
+				info.op.inline_send.len = ib_wr->sg_list[0].length;
+				ret = ukqp->ops.iw_inline_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false);
+			} else {
+				info.op.send.num_sges = ib_wr->num_sge;
+				info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list;
+				ret = ukqp->ops.iw_send(ukqp, &info, rdma_wr(ib_wr)->rkey, false);
+			}
+
+			if (ret)
+				err = -EIO;
+			break;
+		case IB_WR_RDMA_WRITE:
+			info.op_type = I40IW_OP_TYPE_RDMA_WRITE;
+
+			if (ib_wr->send_flags & IB_SEND_INLINE) {
+				info.op.inline_rdma_write.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
+				info.op.inline_rdma_write.len = ib_wr->sg_list[0].length;
+				info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+				info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+				info.op.inline_rdma_write.rem_addr.len = ib_wr->sg_list->length;
+				ret = ukqp->ops.iw_inline_rdma_write(ukqp, &info, false);
+			} else {
+				info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
+				info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
+				info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+				info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+				info.op.rdma_write.rem_addr.len = ib_wr->sg_list->length;
+				ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
+			}
+
+			if (ret)
+				err = -EIO;
+			break;
+		case IB_WR_RDMA_READ:
+			info.op_type = I40IW_OP_TYPE_RDMA_READ;
+			info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
+			info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
+			info.op.rdma_read.rem_addr.len = ib_wr->sg_list->length;
+			info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
+			info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
+			info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
+			ret = ukqp->ops.iw_rdma_read(ukqp, &info, false, false);
+			if (ret)
+				err = -EIO;
+			break;
+		default:
+			err = -EINVAL;
+			i40iw_pr_err(" upost_send bad opcode = 0x%x\n",
+				     ib_wr->opcode);
+			break;
+		}
+
+		if (err)
+			break;
+		ib_wr = ib_wr->next;
+	}
+
+	if (err)
+		*bad_wr = ib_wr;
+	else
+		ukqp->ops.iw_qp_post_wr(ukqp);
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+
+	return err;
+}
+
+/**
+ * i40iw_post_recv - post receive wr for kernel application
+ * @ibqp: ib qp pointer
+ * @ib_wr: work request for receive
+ * @bad_wr: bad wr caused an error
+ */
+static int i40iw_post_recv(struct ib_qp *ibqp,
+			   struct ib_recv_wr *ib_wr,
+			   struct ib_recv_wr **bad_wr)
+{
+	struct i40iw_qp *iwqp;
+	struct i40iw_qp_uk *ukqp;
+	struct i40iw_post_rq_info post_recv;
+	struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT];
+	enum i40iw_status_code ret = 0;
+	unsigned long flags;
+
+	iwqp = (struct i40iw_qp *)ibqp;
+	ukqp = &iwqp->sc_qp.qp_uk;
+
+	memset(&post_recv, 0, sizeof(post_recv));
+	spin_lock_irqsave(&iwqp->lock, flags);
+	while (ib_wr) {
+		post_recv.num_sges = ib_wr->num_sge;
+		post_recv.wr_id = ib_wr->wr_id;
+		i40iw_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge);
+		post_recv.sg_list = sg_list;
+		ret = ukqp->ops.iw_post_receive(ukqp, &post_recv);
+		if (ret) {
+			i40iw_pr_err(" post_recv err %d\n", ret);
+			*bad_wr = ib_wr;
+			goto out;
+		}
+		ib_wr = ib_wr->next;
+	}
+ out:
+	spin_unlock_irqrestore(&iwqp->lock, flags);
+	if (ret)
+		return -ENOSYS;
+	return 0;
+}
+
+/**
+ * i40iw_poll_cq - poll cq for completion (kernel apps)
+ * @ibcq: cq to poll
+ * @num_entries: number of entries to poll
+ * @entry: wr of entry completed
+ */
+static int i40iw_poll_cq(struct ib_cq *ibcq,
+			 int num_entries,
+			 struct ib_wc *entry)
+{
+	struct i40iw_cq *iwcq;
+	int cqe_count = 0;
+	struct i40iw_cq_poll_info cq_poll_info;
+	enum i40iw_status_code ret;
+	struct i40iw_cq_uk *ukcq;
+	struct i40iw_sc_qp *qp;
+	unsigned long flags;
+
+	iwcq = (struct i40iw_cq *)ibcq;
+	ukcq = &iwcq->sc_cq.cq_uk;
+
+	spin_lock_irqsave(&iwcq->lock, flags);
+	while (cqe_count < num_entries) {
+		ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info, true);
+		if (ret == I40IW_ERR_QUEUE_EMPTY) {
+			break;
+		} else if (ret) {
+			if (!cqe_count)
+				cqe_count = -1;
+			break;
+		}
+		entry->wc_flags = 0;
+		entry->wr_id = cq_poll_info.wr_id;
+		if (!cq_poll_info.error)
+			entry->status = IB_WC_SUCCESS;
+		else
+			entry->status = IB_WC_WR_FLUSH_ERR;
+
+		switch (cq_poll_info.op_type) {
+		case I40IW_OP_TYPE_RDMA_WRITE:
+			entry->opcode = IB_WC_RDMA_WRITE;
+			break;
+		case I40IW_OP_TYPE_RDMA_READ_INV_STAG:
+		case I40IW_OP_TYPE_RDMA_READ:
+			entry->opcode = IB_WC_RDMA_READ;
+			break;
+		case I40IW_OP_TYPE_SEND_SOL:
+		case I40IW_OP_TYPE_SEND_SOL_INV:
+		case I40IW_OP_TYPE_SEND_INV:
+		case I40IW_OP_TYPE_SEND:
+			entry->opcode = IB_WC_SEND;
+			break;
+		case I40IW_OP_TYPE_REC:
+			entry->opcode = IB_WC_RECV;
+			break;
+		default:
+			entry->opcode = IB_WC_RECV;
+			break;
+		}
+
+		entry->vendor_err =
+		    cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
+		entry->ex.imm_data = 0;
+		qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle;
+		entry->qp = (struct ib_qp *)qp->back_qp;
+		entry->src_qp = cq_poll_info.qp_id;
+		entry->byte_len = cq_poll_info.bytes_xfered;
+		entry++;
+		cqe_count++;
+	}
+	spin_unlock_irqrestore(&iwcq->lock, flags);
+	return cqe_count;
+}
+
+/**
+ * i40iw_req_notify_cq - arm cq kernel application
+ * @ibcq: cq to arm
+ * @notify_flags: notofication flags
+ */
+static int i40iw_req_notify_cq(struct ib_cq *ibcq,
+			       enum ib_cq_notify_flags notify_flags)
+{
+	struct i40iw_cq *iwcq;
+	struct i40iw_cq_uk *ukcq;
+	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_SOLICITED;
+
+	iwcq = (struct i40iw_cq *)ibcq;
+	ukcq = &iwcq->sc_cq.cq_uk;
+	if (notify_flags == IB_CQ_NEXT_COMP)
+		cq_notify = IW_CQ_COMPL_EVENT;
+	ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
+	return 0;
+}
+
+/**
+ * i40iw_port_immutable - return port's immutable data
+ * @ibdev: ib dev struct
+ * @port_num: port number
+ * @immutable: immutable data for the port return
+ */
+static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
+				struct ib_port_immutable *immutable)
+{
+	struct ib_port_attr attr;
+	int err;
+
+	err = i40iw_query_port(ibdev, port_num, &attr);
+
+	if (err)
+		return err;
+
+	immutable->pkey_tbl_len = attr.pkey_tbl_len;
+	immutable->gid_tbl_len = attr.gid_tbl_len;
+	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+	return 0;
+}
+
+/**
+ * i40iw_get_protocol_stats - Populates the rdma_stats structure
+ * @ibdev: ib dev struct
+ * @stats: iw protocol stats struct
+ */
+static int i40iw_get_protocol_stats(struct ib_device *ibdev,
+				    union rdma_protocol_stats *stats)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+	struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+	struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
+	struct timespec curr_time;
+	static struct timespec last_rd_time = {0, 0};
+	enum i40iw_status_code status = 0;
+	unsigned long flags;
+
+	curr_time = current_kernel_time();
+	memset(stats, 0, sizeof(*stats));
+
+	if (dev->is_pf) {
+		spin_lock_irqsave(&devstat->stats_lock, flags);
+		devstat->ops.iw_hw_stat_read_all(devstat,
+			&devstat->hw_stats);
+		spin_unlock_irqrestore(&devstat->stats_lock, flags);
+	} else {
+		if (((u64)curr_time.tv_sec - (u64)last_rd_time.tv_sec) > 1)
+			status = i40iw_vchnl_vf_get_pe_stats(dev,
+							     &devstat->hw_stats);
+
+		if (status)
+			return -ENOSYS;
+	}
+
+	stats->iw.ipInReceives = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] +
+				 hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXPKTS];
+	stats->iw.ipInTruncatedPkts = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] +
+				      hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC];
+	stats->iw.ipInDiscards = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] +
+				 hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD];
+	stats->iw.ipOutNoRoutes = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] +
+				  hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE];
+	stats->iw.ipReasmReqds = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] +
+				 hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS];
+	stats->iw.ipFragCreates = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] +
+				  hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS];
+	stats->iw.ipInMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] +
+				  hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS];
+	stats->iw.ipOutMcastPkts = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] +
+				   hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_IP6TXMCPKTS];
+	stats->iw.tcpOutSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPTXSEG];
+	stats->iw.tcpInSegs = hw_stats->stat_value_64[I40IW_HW_STAT_INDEX_TCPRXSEGS];
+	stats->iw.tcpRetransSegs = hw_stats->stat_value_32[I40IW_HW_STAT_INDEX_TCPRTXSEG];
+
+	last_rd_time = curr_time;
+	return 0;
+}
+
+/**
+ * i40iw_query_gid - Query port GID
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: Entry index
+ * @gid: Global ID
+ */
+static int i40iw_query_gid(struct ib_device *ibdev,
+			   u8 port,
+			   int index,
+			   union ib_gid *gid)
+{
+	struct i40iw_device *iwdev = to_iwdev(ibdev);
+
+	memset(gid->raw, 0, sizeof(gid->raw));
+	ether_addr_copy(gid->raw, iwdev->netdev->dev_addr);
+	return 0;
+}
+
+/**
+ * i40iw_modify_port  Modify port properties
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @port_modify_mask: mask for port modifications
+ * @props: port properties
+ */
+static int i40iw_modify_port(struct ib_device *ibdev,
+			     u8 port,
+			     int port_modify_mask,
+			     struct ib_port_modify *props)
+{
+	return 0;
+}
+
+/**
+ * i40iw_query_pkey - Query partition key
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: index of pkey
+ * @pkey: pointer to store the pkey
+ */
+static int i40iw_query_pkey(struct ib_device *ibdev,
+			    u8 port,
+			    u16 index,
+			    u16 *pkey)
+{
+	*pkey = 0;
+	return 0;
+}
+
+/**
+ * i40iw_create_ah - create address handle
+ * @ibpd: ptr of pd
+ * @ah_attr: address handle attributes
+ */
+static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd,
+				     struct ib_ah_attr *attr)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+/**
+ * i40iw_destroy_ah - Destroy address handle
+ * @ah: pointer to address handle
+ */
+static int i40iw_destroy_ah(struct ib_ah *ah)
+{
+	return -ENOSYS;
+}
+
+/**
+ * i40iw_init_rdma_device - initialization of iwarp device
+ * @iwdev: iwarp device
+ */
+static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev)
+{
+	struct i40iw_ib_device *iwibdev;
+	struct net_device *netdev = iwdev->netdev;
+	struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context;
+
+	iwibdev = (struct i40iw_ib_device *)ib_alloc_device(sizeof(*iwibdev));
+	if (!iwibdev) {
+		i40iw_pr_err("iwdev == NULL\n");
+		return NULL;
+	}
+	strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX);
+	iwibdev->ibdev.owner = THIS_MODULE;
+	iwdev->iwibdev = iwibdev;
+	iwibdev->iwdev = iwdev;
+
+	iwibdev->ibdev.node_type = RDMA_NODE_RNIC;
+	ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr);
+
+	iwibdev->ibdev.uverbs_cmd_mask =
+	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+	    (1ull << IB_USER_VERBS_CMD_POST_SEND);
+	iwibdev->ibdev.phys_port_cnt = 1;
+	iwibdev->ibdev.num_comp_vectors = 1;
+	iwibdev->ibdev.dma_device = &pcidev->dev;
+	iwibdev->ibdev.dev.parent = &pcidev->dev;
+	iwibdev->ibdev.query_port = i40iw_query_port;
+	iwibdev->ibdev.modify_port = i40iw_modify_port;
+	iwibdev->ibdev.query_pkey = i40iw_query_pkey;
+	iwibdev->ibdev.query_gid = i40iw_query_gid;
+	iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext;
+	iwibdev->ibdev.dealloc_ucontext = i40iw_dealloc_ucontext;
+	iwibdev->ibdev.mmap = i40iw_mmap;
+	iwibdev->ibdev.alloc_pd = i40iw_alloc_pd;
+	iwibdev->ibdev.dealloc_pd = i40iw_dealloc_pd;
+	iwibdev->ibdev.create_qp = i40iw_create_qp;
+	iwibdev->ibdev.modify_qp = i40iw_modify_qp;
+	iwibdev->ibdev.query_qp = i40iw_query_qp;
+	iwibdev->ibdev.destroy_qp = i40iw_destroy_qp;
+	iwibdev->ibdev.create_cq = i40iw_create_cq;
+	iwibdev->ibdev.destroy_cq = i40iw_destroy_cq;
+	iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr;
+	iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr;
+	iwibdev->ibdev.dereg_mr = i40iw_dereg_mr;
+	iwibdev->ibdev.get_protocol_stats = i40iw_get_protocol_stats;
+	iwibdev->ibdev.query_device = i40iw_query_device;
+	iwibdev->ibdev.create_ah = i40iw_create_ah;
+	iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
+	iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
+	if (!iwibdev->ibdev.iwcm) {
+		ib_dealloc_device(&iwibdev->ibdev);
+		i40iw_pr_err("iwcm == NULL\n");
+		return NULL;
+	}
+
+	iwibdev->ibdev.iwcm->add_ref = i40iw_add_ref;
+	iwibdev->ibdev.iwcm->rem_ref = i40iw_rem_ref;
+	iwibdev->ibdev.iwcm->get_qp = i40iw_get_qp;
+	iwibdev->ibdev.iwcm->connect = i40iw_connect;
+	iwibdev->ibdev.iwcm->accept = i40iw_accept;
+	iwibdev->ibdev.iwcm->reject = i40iw_reject;
+	iwibdev->ibdev.iwcm->create_listen = i40iw_create_listen;
+	iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen;
+	memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name,
+	       sizeof(iwibdev->ibdev.iwcm->ifname));
+	iwibdev->ibdev.get_port_immutable   = i40iw_port_immutable;
+	iwibdev->ibdev.poll_cq = i40iw_poll_cq;
+	iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
+	iwibdev->ibdev.post_send = i40iw_post_send;
+	iwibdev->ibdev.post_recv = i40iw_post_recv;
+
+	return iwibdev;
+}
+
+/**
+ * i40iw_port_ibevent - indicate port event
+ * @iwdev: iwarp device
+ */
+void i40iw_port_ibevent(struct i40iw_device *iwdev)
+{
+	struct i40iw_ib_device *iwibdev = iwdev->iwibdev;
+	struct ib_event event;
+
+	event.device = &iwibdev->ibdev;
+	event.element.port_num = 1;
+	event.event = iwdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+	ib_dispatch_event(&event);
+}
+
+/**
+ * i40iw_unregister_rdma_device - unregister of iwarp from IB
+ * @iwibdev: rdma device ptr
+ */
+static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i)
+		device_remove_file(&iwibdev->ibdev.dev,
+				   i40iw_dev_attributes[i]);
+	ib_unregister_device(&iwibdev->ibdev);
+}
+
+/**
+ * i40iw_destroy_rdma_device - destroy rdma device and free resources
+ * @iwibdev: IB device ptr
+ */
+void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
+{
+	if (!iwibdev)
+		return;
+
+	i40iw_unregister_rdma_device(iwibdev);
+	kfree(iwibdev->ibdev.iwcm);
+	iwibdev->ibdev.iwcm = NULL;
+	ib_dealloc_device(&iwibdev->ibdev);
+}
+
+/**
+ * i40iw_register_rdma_device - register iwarp device to IB
+ * @iwdev: iwarp device
+ */
+int i40iw_register_rdma_device(struct i40iw_device *iwdev)
+{
+	int i, ret;
+	struct i40iw_ib_device *iwibdev;
+
+	iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
+	if (!iwdev->iwibdev)
+		return -ENOSYS;
+	iwibdev = iwdev->iwibdev;
+
+	ret = ib_register_device(&iwibdev->ibdev, NULL);
+	if (ret)
+		goto error;
+
+	for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) {
+		ret =
+		    device_create_file(&iwibdev->ibdev.dev,
+				       i40iw_dev_attributes[i]);
+		if (ret) {
+			while (i > 0) {
+				i--;
+				device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]);
+			}
+			ib_unregister_device(&iwibdev->ibdev);
+			goto error;
+		}
+	}
+	return 0;
+error:
+	kfree(iwdev->iwibdev->ibdev.iwcm);
+	iwdev->iwibdev->ibdev.iwcm = NULL;
+	ib_dealloc_device(&iwdev->iwibdev->ibdev);
+	return -ENOSYS;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
new file mode 100644
index 000000000000..1101f77080e6
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -0,0 +1,173 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_VERBS_H
+#define I40IW_VERBS_H
+
+struct i40iw_ucontext {
+	struct ib_ucontext ibucontext;
+	struct i40iw_device *iwdev;
+	struct list_head cq_reg_mem_list;
+	spinlock_t cq_reg_mem_list_lock; /* memory list for cq's */
+	struct list_head qp_reg_mem_list;
+	spinlock_t qp_reg_mem_list_lock; /* memory list for qp's */
+};
+
+struct i40iw_pd {
+	struct ib_pd ibpd;
+	struct i40iw_sc_pd sc_pd;
+	atomic_t usecount;
+};
+
+struct i40iw_hmc_pble {
+	union {
+		u32 idx;
+		dma_addr_t addr;
+	};
+};
+
+struct i40iw_cq_mr {
+	struct i40iw_hmc_pble cq_pbl;
+	dma_addr_t shadow;
+};
+
+struct i40iw_qp_mr {
+	struct i40iw_hmc_pble sq_pbl;
+	struct i40iw_hmc_pble rq_pbl;
+	dma_addr_t shadow;
+	struct page *sq_page;
+};
+
+struct i40iw_pbl {
+	struct list_head list;
+	union {
+		struct i40iw_qp_mr qp_mr;
+		struct i40iw_cq_mr cq_mr;
+	};
+
+	bool pbl_allocated;
+	u64 user_base;
+	struct i40iw_pble_alloc pble_alloc;
+	struct i40iw_mr *iwmr;
+};
+
+#define MAX_SAVE_PAGE_ADDRS     4
+struct i40iw_mr {
+	union {
+		struct ib_mr ibmr;
+		struct ib_mw ibmw;
+		struct ib_fmr ibfmr;
+	};
+	struct ib_umem *region;
+	u16 type;
+	u32 page_cnt;
+	u32 stag;
+	u64 length;
+	u64 pgaddrmem[MAX_SAVE_PAGE_ADDRS];
+	struct i40iw_pbl iwpbl;
+};
+
+struct i40iw_cq {
+	struct ib_cq ibcq;
+	struct i40iw_sc_cq sc_cq;
+	u16 cq_head;
+	u16 cq_size;
+	u16 cq_number;
+	bool user_mode;
+	u32 polled_completions;
+	u32 cq_mem_size;
+	struct i40iw_dma_mem kmem;
+	spinlock_t lock; /* for poll cq */
+	struct i40iw_pbl *iwpbl;
+};
+
+struct disconn_work {
+	struct work_struct work;
+	struct i40iw_qp *iwqp;
+};
+
+struct iw_cm_id;
+struct ietf_mpa_frame;
+struct i40iw_ud_file;
+
+struct i40iw_qp_kmode {
+	struct i40iw_dma_mem dma_mem;
+	u64 *wrid_mem;
+};
+
+struct i40iw_qp {
+	struct ib_qp ibqp;
+	struct i40iw_sc_qp sc_qp;
+	struct i40iw_device *iwdev;
+	struct i40iw_cq *iwscq;
+	struct i40iw_cq *iwrcq;
+	struct i40iw_pd *iwpd;
+	struct i40iw_qp_host_ctx_info ctx_info;
+	struct i40iwarp_offload_info iwarp_info;
+	void *allocated_buffer;
+	atomic_t refcount;
+	struct iw_cm_id *cm_id;
+	void *cm_node;
+	struct ib_mr *lsmm_mr;
+	struct work_struct work;
+	enum ib_qp_state ibqp_state;
+	u32 iwarp_state;
+	u32 qp_mem_size;
+	u32 last_aeq;
+	atomic_t close_timer_started;
+	spinlock_t lock; /* for post work requests */
+	struct i40iw_qp_context *iwqp_context;
+	void *pbl_vbase;
+	dma_addr_t pbl_pbase;
+	struct page *page;
+	u8 active_conn:1;
+	u8 user_mode:1;
+	u8 hte_added:1;
+	u8 flush_issued:1;
+	u8 destroyed:1;
+	u8 sig_all:1;
+	u8 pau_mode:1;
+	u8 rsvd:1;
+	u16 term_sq_flush_code;
+	u16 term_rq_flush_code;
+	u8 hw_iwarp_state;
+	u8 hw_tcp_state;
+	struct i40iw_qp_kmode kqp;
+	struct i40iw_dma_mem host_ctx;
+	struct timer_list terminate_timer;
+	struct i40iw_pbl *iwpbl;
+	struct i40iw_dma_mem q2_ctx_mem;
+	struct i40iw_dma_mem ietf_mem;
+};
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.c b/drivers/infiniband/hw/i40iw/i40iw_vf.c
new file mode 100644
index 000000000000..cb0f18340e14
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_vf.c
@@ -0,0 +1,85 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_vf.h"
+
+/**
+ * i40iw_manage_vf_pble_bp - manage vf pble
+ * @cqp: cqp for cqp' sq wqe
+ * @info: pble info
+ * @scratch: pointer for completion
+ * @post_sq: to post and ring
+ */
+enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
+					       struct i40iw_manage_vf_pble_info *info,
+					       u64 scratch,
+					       bool post_sq)
+{
+	u64 *wqe;
+	u64 temp, header, pd_pl_pba = 0;
+
+	wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
+	if (!wqe)
+		return I40IW_ERR_RING_FULL;
+
+	temp = LS_64(info->pd_entry_cnt, I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT) |
+	    LS_64(info->first_pd_index, I40IW_CQPSQ_MVPBP_FIRST_PD_INX) |
+	    LS_64(info->sd_index, I40IW_CQPSQ_MVPBP_SD_INX);
+	set_64bit_val(wqe, 16, temp);
+
+	header = LS_64((info->inv_pd_ent ? 1 : 0), I40IW_CQPSQ_MVPBP_INV_PD_ENT) |
+	    LS_64(I40IW_CQP_OP_MANAGE_VF_PBLE_BP, I40IW_CQPSQ_OPCODE) |
+	    LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+	set_64bit_val(wqe, 24, header);
+
+	pd_pl_pba = LS_64(info->pd_pl_pba >> 3, I40IW_CQPSQ_MVPBP_PD_PLPBA);
+	set_64bit_val(wqe, 32, pd_pl_pba);
+
+	i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE VF_PBLE_BP WQE", wqe, I40IW_CQP_WQE_SIZE * 8);
+
+	if (post_sq)
+		i40iw_sc_cqp_post_sq(cqp);
+	return 0;
+}
+
+struct i40iw_vf_cqp_ops iw_vf_cqp_ops = {
+	i40iw_manage_vf_pble_bp
+};
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.h b/drivers/infiniband/hw/i40iw/i40iw_vf.h
new file mode 100644
index 000000000000..f649f3a62e13
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_vf.h
@@ -0,0 +1,62 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_VF_H
+#define I40IW_VF_H
+
+struct i40iw_sc_cqp;
+
+struct i40iw_manage_vf_pble_info {
+	u32 sd_index;
+	u16 first_pd_index;
+	u16 pd_entry_cnt;
+	u8 inv_pd_ent;
+	u64 pd_pl_pba;
+};
+
+struct i40iw_vf_cqp_ops {
+	enum i40iw_status_code (*manage_vf_pble_bp)(struct i40iw_sc_cqp *,
+						    struct i40iw_manage_vf_pble_info *,
+						    u64,
+						    bool);
+};
+
+enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
+					       struct i40iw_manage_vf_pble_info *info,
+					       u64 scratch,
+					       bool post_sq);
+
+extern struct i40iw_vf_cqp_ops iw_vf_cqp_ops;
+
+#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
new file mode 100644
index 000000000000..6b68f7890b76
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
@@ -0,0 +1,748 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include "i40iw_osdep.h"
+#include "i40iw_register.h"
+#include "i40iw_status.h"
+#include "i40iw_hmc.h"
+#include "i40iw_d.h"
+#include "i40iw_type.h"
+#include "i40iw_p.h"
+#include "i40iw_virtchnl.h"
+
+/**
+ * vchnl_vf_send_get_ver_req - Request Channel version
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_get_ver_req(struct i40iw_sc_dev *dev,
+							struct i40iw_virtchnl_req *vchnl_req)
+{
+	enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+
+	if (!dev->vchnl_up)
+		return ret_code;
+
+	memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+	vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+	vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg);
+	vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_VER;
+	vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_VER_V0;
+	ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+	return ret_code;
+}
+
+/**
+ * vchnl_vf_send_get_hmc_fcn_req - Request HMC Function from VF
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_get_hmc_fcn_req(struct i40iw_sc_dev *dev,
+							    struct i40iw_virtchnl_req *vchnl_req)
+{
+	enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+
+	if (!dev->vchnl_up)
+		return ret_code;
+
+	memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+	vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+	vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg);
+	vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_HMC_FCN;
+	vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_HMC_FCN_V0;
+	ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+	return ret_code;
+}
+
+/**
+ * vchnl_vf_send_get_pe_stats_req - Request PE stats from VF
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_get_pe_stats_req(struct i40iw_sc_dev *dev,
+							     struct i40iw_virtchnl_req  *vchnl_req)
+{
+	enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+
+	if (!dev->vchnl_up)
+		return ret_code;
+
+	memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+	vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+	vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_dev_hw_stats) - 1;
+	vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_STATS;
+	vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_STATS_V0;
+	ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+	return ret_code;
+}
+
+/**
+ * vchnl_vf_send_add_hmc_objs_req - Add HMC objects
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ */
+static enum i40iw_status_code vchnl_vf_send_add_hmc_objs_req(struct i40iw_sc_dev *dev,
+							     struct i40iw_virtchnl_req *vchnl_req,
+							     enum i40iw_hmc_rsrc_type rsrc_type,
+							     u32 start_index,
+							     u32 rsrc_count)
+{
+	enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+	struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
+
+	if (!dev->vchnl_up)
+		return ret_code;
+
+	add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+	memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+	memset(add_hmc_obj, 0, sizeof(*add_hmc_obj));
+	vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+	vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_virtchnl_hmc_obj_range) - 1;
+	vchnl_msg->iw_op_code = I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE;
+	vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE_V0;
+	add_hmc_obj->obj_type = (u16)rsrc_type;
+	add_hmc_obj->start_index = start_index;
+	add_hmc_obj->obj_count = rsrc_count;
+	ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+	return ret_code;
+}
+
+/**
+ * vchnl_vf_send_del_hmc_objs_req - del HMC objects
+ * @dev: IWARP device pointer
+ * @vchnl_req: Virtual channel message request pointer
+ * @ rsrc_type - resource type to delete
+ * @ start_index - starting index for resource
+ * @ rsrc_count - number of resource type to delete
+ */
+static enum i40iw_status_code vchnl_vf_send_del_hmc_objs_req(struct i40iw_sc_dev *dev,
+							     struct i40iw_virtchnl_req *vchnl_req,
+							     enum i40iw_hmc_rsrc_type rsrc_type,
+							     u32 start_index,
+							     u32 rsrc_count)
+{
+	enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
+	struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
+
+	if (!dev->vchnl_up)
+		return ret_code;
+
+	add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+	memset(vchnl_msg, 0, sizeof(*vchnl_msg));
+	memset(add_hmc_obj, 0, sizeof(*add_hmc_obj));
+	vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
+	vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_virtchnl_hmc_obj_range) - 1;
+	vchnl_msg->iw_op_code = I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE;
+	vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE_V0;
+	add_hmc_obj->obj_type = (u16)rsrc_type;
+	add_hmc_obj->start_index = start_index;
+	add_hmc_obj->obj_count = rsrc_count;
+	ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+	return ret_code;
+}
+
+/**
+ * vchnl_pf_send_get_ver_resp - Send channel version to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ */
+static void vchnl_pf_send_get_ver_resp(struct i40iw_sc_dev *dev,
+				       u32 vf_id,
+				       struct i40iw_virtchnl_op_buf *vchnl_msg)
+{
+	enum i40iw_status_code ret_code;
+	u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(u32) - 1];
+	struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+	memset(resp_buffer, 0, sizeof(*resp_buffer));
+	vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+	vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+	vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
+	*((u32 *)vchnl_msg_resp->iw_chnl_buf) = I40IW_VCHNL_CHNL_VER_V0;
+	ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * vchnl_pf_send_get_hmc_fcn_resp - Send HMC Function to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ */
+static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev,
+					   u32 vf_id,
+					   struct i40iw_virtchnl_op_buf *vchnl_msg,
+					   u16 hmc_fcn)
+{
+	enum i40iw_status_code ret_code;
+	u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(u16) - 1];
+	struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+	memset(resp_buffer, 0, sizeof(*resp_buffer));
+	vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+	vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+	vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
+	*((u16 *)vchnl_msg_resp->iw_chnl_buf) = hmc_fcn;
+	ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * vchnl_pf_send_get_pe_stats_resp - Send PE Stats to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ * @hw_stats: HW Stats struct
+ */
+
+static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev,
+					    u32 vf_id,
+					    struct i40iw_virtchnl_op_buf *vchnl_msg,
+					    struct i40iw_dev_hw_stats hw_stats)
+{
+	enum i40iw_status_code ret_code;
+	u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1];
+	struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+	memset(resp_buffer, 0, sizeof(*resp_buffer));
+	vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+	vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+	vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
+	*((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = hw_stats;
+	ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * vchnl_pf_send_error_resp - Send an error response to VF
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @vchnl_msg: Virtual channel message buffer pointer
+ */
+static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id,
+				     struct i40iw_virtchnl_op_buf *vchnl_msg,
+				     u16 op_ret_code)
+{
+	enum i40iw_status_code ret_code;
+	u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf)];
+	struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
+
+	memset(resp_buffer, 0, sizeof(resp_buffer));
+	vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
+	vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
+	vchnl_msg_resp->iw_op_ret_code = (u16)op_ret_code;
+	ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
+	if (ret_code)
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: virt channel send failed 0x%x\n", __func__, ret_code);
+}
+
+/**
+ * pf_cqp_get_hmc_fcn_callback - Callback for Get HMC Fcn
+ * @cqp_req_param: CQP Request param value
+ * @not_used: unused CQP callback parameter
+ */
+static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback_param,
+					struct i40iw_ccq_cqe_info *cqe_info)
+{
+	struct i40iw_vfdev *vf_dev = callback_param;
+	struct i40iw_virt_mem vf_dev_mem;
+
+	if (cqe_info->error) {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "CQP Completion Error on Get HMC Function.  Maj = 0x%04x, Minor = 0x%04x\n",
+			    cqe_info->maj_err_code, cqe_info->min_err_code);
+		dev->vf_dev[vf_dev->iw_vf_idx] = NULL;
+		vchnl_pf_send_error_resp(dev, vf_dev->vf_id, &vf_dev->vf_msg_buffer.vchnl_msg,
+					 (u16)I40IW_ERR_CQP_COMPL_ERROR);
+		vf_dev_mem.va = vf_dev;
+		vf_dev_mem.size = sizeof(*vf_dev);
+		i40iw_free_virt_mem(dev->hw, &vf_dev_mem);
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "CQP Completion Operation Return information = 0x%08x\n",
+			    cqe_info->op_ret_val);
+		vf_dev->pmf_index = (u16)cqe_info->op_ret_val;
+		vf_dev->msg_count--;
+		vchnl_pf_send_get_hmc_fcn_resp(dev,
+					       vf_dev->vf_id,
+					       &vf_dev->vf_msg_buffer.vchnl_msg,
+					       vf_dev->pmf_index);
+	}
+}
+
+/**
+ * pf_add_hmc_obj - Callback for Add HMC Object
+ * @vf_dev: pointer to the VF Device
+ */
+static void pf_add_hmc_obj_callback(void *work_vf_dev)
+{
+	struct i40iw_vfdev *vf_dev = (struct i40iw_vfdev *)work_vf_dev;
+	struct i40iw_hmc_info *hmc_info = &vf_dev->hmc_info;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = &vf_dev->vf_msg_buffer.vchnl_msg;
+	struct i40iw_hmc_create_obj_info info;
+	struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
+	enum i40iw_status_code ret_code;
+
+	if (!vf_dev->pf_hmc_initialized) {
+		ret_code = i40iw_pf_init_vfhmc(vf_dev->pf_dev, (u8)vf_dev->pmf_index, NULL);
+		if (ret_code)
+			goto add_out;
+		vf_dev->pf_hmc_initialized = true;
+	}
+
+	add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+
+	memset(&info, 0, sizeof(info));
+	info.hmc_info = hmc_info;
+	info.is_pf = false;
+	info.rsrc_type = (u32)add_hmc_obj->obj_type;
+	info.entry_type = (info.rsrc_type == I40IW_HMC_IW_PBLE) ? I40IW_SD_TYPE_PAGED : I40IW_SD_TYPE_DIRECT;
+	info.start_idx = add_hmc_obj->start_index;
+	info.count = add_hmc_obj->obj_count;
+	i40iw_debug(vf_dev->pf_dev, I40IW_DEBUG_VIRT,
+		    "I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE.  Add %u type %u objects\n",
+		    info.count, info.rsrc_type);
+	ret_code = i40iw_sc_create_hmc_obj(vf_dev->pf_dev, &info);
+	if (!ret_code)
+		vf_dev->hmc_info.hmc_obj[add_hmc_obj->obj_type].cnt = add_hmc_obj->obj_count;
+add_out:
+	vf_dev->msg_count--;
+	vchnl_pf_send_error_resp(vf_dev->pf_dev, vf_dev->vf_id, vchnl_msg, (u16)ret_code);
+}
+
+/**
+ * pf_del_hmc_obj_callback - Callback for delete HMC Object
+ * @work_vf_dev: pointer to the VF Device
+ */
+static void pf_del_hmc_obj_callback(void *work_vf_dev)
+{
+	struct i40iw_vfdev *vf_dev = (struct i40iw_vfdev *)work_vf_dev;
+	struct i40iw_hmc_info *hmc_info = &vf_dev->hmc_info;
+	struct i40iw_virtchnl_op_buf *vchnl_msg = &vf_dev->vf_msg_buffer.vchnl_msg;
+	struct i40iw_hmc_del_obj_info info;
+	struct i40iw_virtchnl_hmc_obj_range *del_hmc_obj;
+	enum i40iw_status_code ret_code = I40IW_SUCCESS;
+
+	if (!vf_dev->pf_hmc_initialized)
+		goto del_out;
+
+	del_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
+
+	memset(&info, 0, sizeof(info));
+	info.hmc_info = hmc_info;
+	info.is_pf = false;
+	info.rsrc_type = (u32)del_hmc_obj->obj_type;
+	info.start_idx = del_hmc_obj->start_index;
+	info.count = del_hmc_obj->obj_count;
+	i40iw_debug(vf_dev->pf_dev, I40IW_DEBUG_VIRT,
+		    "I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE.  Delete %u type %u objects\n",
+		    info.count, info.rsrc_type);
+	ret_code = i40iw_sc_del_hmc_obj(vf_dev->pf_dev, &info, false);
+del_out:
+	vf_dev->msg_count--;
+	vchnl_pf_send_error_resp(vf_dev->pf_dev, vf_dev->vf_id, vchnl_msg, (u16)ret_code);
+}
+
+/**
+ * i40iw_vchnl_recv_pf - Receive PF virtual channel messages
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @msg: Virtual channel message buffer pointer
+ * @len: Length of the virtual channels message
+ */
+enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
+					   u32 vf_id,
+					   u8 *msg,
+					   u16 len)
+{
+	struct i40iw_virtchnl_op_buf *vchnl_msg = (struct i40iw_virtchnl_op_buf *)msg;
+	struct i40iw_vfdev *vf_dev = NULL;
+	struct i40iw_hmc_fcn_info hmc_fcn_info;
+	u16 iw_vf_idx;
+	u16 first_avail_iw_vf = I40IW_MAX_PE_ENABLED_VF_COUNT;
+	struct i40iw_virt_mem vf_dev_mem;
+	struct i40iw_virtchnl_work_info work_info;
+	struct i40iw_dev_pestat *devstat;
+	enum i40iw_status_code ret_code;
+	unsigned long flags;
+
+	if (!dev || !msg || !len)
+		return I40IW_ERR_PARAM;
+
+	if (!dev->vchnl_up)
+		return I40IW_ERR_NOT_READY;
+	if (vchnl_msg->iw_op_code == I40IW_VCHNL_OP_GET_VER) {
+		if (vchnl_msg->iw_op_ver != I40IW_VCHNL_OP_GET_VER_V0)
+			vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
+		else
+			vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
+		return I40IW_SUCCESS;
+	}
+	for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT;
+	     iw_vf_idx++) {
+		if (!dev->vf_dev[iw_vf_idx]) {
+			if (first_avail_iw_vf ==
+			    I40IW_MAX_PE_ENABLED_VF_COUNT)
+				first_avail_iw_vf = iw_vf_idx;
+			continue;
+		}
+		if (dev->vf_dev[iw_vf_idx]->vf_id == vf_id) {
+			vf_dev = dev->vf_dev[iw_vf_idx];
+			break;
+		}
+	}
+	if (vf_dev) {
+		if (!vf_dev->msg_count) {
+			vf_dev->msg_count++;
+		} else {
+			i40iw_debug(dev, I40IW_DEBUG_VIRT,
+				    "VF%u already has a channel message in progress.\n",
+				    vf_id);
+			return I40IW_SUCCESS;
+		}
+	}
+	switch (vchnl_msg->iw_op_code) {
+	case I40IW_VCHNL_OP_GET_HMC_FCN:
+		if (!vf_dev &&
+		    (first_avail_iw_vf != I40IW_MAX_PE_ENABLED_VF_COUNT)) {
+			ret_code = i40iw_allocate_virt_mem(dev->hw, &vf_dev_mem, sizeof(struct i40iw_vfdev) +
+							   (sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX));
+			if (!ret_code) {
+				vf_dev = vf_dev_mem.va;
+				vf_dev->stats_initialized = false;
+				vf_dev->pf_dev = dev;
+				vf_dev->msg_count = 1;
+				vf_dev->vf_id = vf_id;
+				vf_dev->iw_vf_idx = first_avail_iw_vf;
+				vf_dev->pf_hmc_initialized = false;
+				vf_dev->hmc_info.hmc_obj = (struct i40iw_hmc_obj_info *)(&vf_dev[1]);
+				i40iw_debug(dev, I40IW_DEBUG_VIRT,
+					    "vf_dev %p, hmc_info %p, hmc_obj %p\n",
+					    vf_dev, &vf_dev->hmc_info, vf_dev->hmc_info.hmc_obj);
+				dev->vf_dev[first_avail_iw_vf] = vf_dev;
+				iw_vf_idx = first_avail_iw_vf;
+			} else {
+				i40iw_debug(dev, I40IW_DEBUG_VIRT,
+					    "VF%u Unable to allocate a VF device structure.\n",
+					    vf_id);
+				vchnl_pf_send_error_resp(dev, vf_id, vchnl_msg, (u16)I40IW_ERR_NO_MEMORY);
+				return I40IW_SUCCESS;
+			}
+			memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
+			hmc_fcn_info.callback_fcn = pf_cqp_get_hmc_fcn_callback;
+			hmc_fcn_info.vf_id = vf_id;
+			hmc_fcn_info.iw_vf_idx = vf_dev->iw_vf_idx;
+			hmc_fcn_info.cqp_callback_param = vf_dev;
+			hmc_fcn_info.free_fcn = false;
+			ret_code = i40iw_cqp_manage_hmc_fcn_cmd(dev, &hmc_fcn_info);
+			if (ret_code)
+				i40iw_debug(dev, I40IW_DEBUG_VIRT,
+					    "VF%u error CQP HMC Function operation.\n",
+					    vf_id);
+			ret_code = i40iw_device_init_pestat(&vf_dev->dev_pestat);
+			if (ret_code)
+				i40iw_debug(dev, I40IW_DEBUG_VIRT,
+					    "VF%u - i40iw_device_init_pestat failed\n",
+					    vf_id);
+			vf_dev->dev_pestat.ops.iw_hw_stat_init(&vf_dev->dev_pestat,
+							      (u8)vf_dev->pmf_index,
+							      dev->hw, false);
+			vf_dev->stats_initialized = true;
+		} else {
+			if (vf_dev) {
+				vf_dev->msg_count--;
+				vchnl_pf_send_get_hmc_fcn_resp(dev, vf_id, vchnl_msg, vf_dev->pmf_index);
+			} else {
+				vchnl_pf_send_error_resp(dev, vf_id, vchnl_msg,
+							 (u16)I40IW_ERR_NO_MEMORY);
+			}
+		}
+		break;
+	case I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE:
+		if (!vf_dev)
+			return I40IW_ERR_BAD_PTR;
+		work_info.worker_vf_dev = vf_dev;
+		work_info.callback_fcn = pf_add_hmc_obj_callback;
+		memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
+		i40iw_cqp_spawn_worker(dev, &work_info, vf_dev->iw_vf_idx);
+		break;
+	case I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE:
+		if (!vf_dev)
+			return I40IW_ERR_BAD_PTR;
+		work_info.worker_vf_dev = vf_dev;
+		work_info.callback_fcn = pf_del_hmc_obj_callback;
+		memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
+		i40iw_cqp_spawn_worker(dev, &work_info, vf_dev->iw_vf_idx);
+		break;
+	case I40IW_VCHNL_OP_GET_STATS:
+		if (!vf_dev)
+			return I40IW_ERR_BAD_PTR;
+		devstat = &vf_dev->dev_pestat;
+		spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
+		devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats);
+		spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+		vf_dev->msg_count--;
+		vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, devstat->hw_stats);
+		break;
+	default:
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "40iw_vchnl_recv_pf: Invalid OpCode 0x%x\n",
+			    vchnl_msg->iw_op_code);
+		vchnl_pf_send_error_resp(dev, vf_id,
+					 vchnl_msg, (u16)I40IW_ERR_NOT_IMPLEMENTED);
+	}
+	return I40IW_SUCCESS;
+}
+
+/**
+ * i40iw_vchnl_recv_vf - Receive VF virtual channel messages
+ * @dev: IWARP device pointer
+ * @vf_id: Virtual function ID associated with the message
+ * @msg: Virtual channel message buffer pointer
+ * @len: Length of the virtual channels message
+ */
+enum i40iw_status_code i40iw_vchnl_recv_vf(struct i40iw_sc_dev *dev,
+					   u32 vf_id,
+					   u8 *msg,
+					   u16 len)
+{
+	struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)msg;
+	struct i40iw_virtchnl_req *vchnl_req;
+
+	vchnl_req = (struct i40iw_virtchnl_req *)(uintptr_t)vchnl_msg_resp->iw_chnl_op_ctx;
+	vchnl_req->ret_code = (enum i40iw_status_code)vchnl_msg_resp->iw_op_ret_code;
+	if (len == (sizeof(*vchnl_msg_resp) + vchnl_req->parm_len - 1)) {
+		if (vchnl_req->parm_len && vchnl_req->parm)
+			memcpy(vchnl_req->parm, vchnl_msg_resp->iw_chnl_buf, vchnl_req->parm_len);
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: Got response, data size %u\n", __func__,
+			    vchnl_req->parm_len);
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s: error length on response, Got %u, expected %u\n", __func__,
+			    len, (u32)(sizeof(*vchnl_msg_resp) + vchnl_req->parm_len - 1));
+	}
+
+	return I40IW_SUCCESS;
+}
+
+/**
+ * i40iw_vchnl_vf_get_ver - Request Channel version
+ * @dev: IWARP device pointer
+ * @vchnl_ver: Virtual channel message version pointer
+ */
+enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
+					      u32 *vchnl_ver)
+{
+	struct i40iw_virtchnl_req vchnl_req;
+	enum i40iw_status_code ret_code;
+
+	memset(&vchnl_req, 0, sizeof(vchnl_req));
+	vchnl_req.dev = dev;
+	vchnl_req.parm = vchnl_ver;
+	vchnl_req.parm_len = sizeof(*vchnl_ver);
+	vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+	ret_code = vchnl_vf_send_get_ver_req(dev, &vchnl_req);
+	if (!ret_code) {
+		ret_code = i40iw_vf_wait_vchnl_resp(dev);
+		if (!ret_code)
+			ret_code = vchnl_req.ret_code;
+		else
+			dev->vchnl_up = false;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s Send message failed 0x%0x\n", __func__, ret_code);
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_get_hmc_fcn - Request HMC Function
+ * @dev: IWARP device pointer
+ * @hmc_fcn: HMC function index pointer
+ */
+enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
+						  u16 *hmc_fcn)
+{
+	struct i40iw_virtchnl_req vchnl_req;
+	enum i40iw_status_code ret_code;
+
+	memset(&vchnl_req, 0, sizeof(vchnl_req));
+	vchnl_req.dev = dev;
+	vchnl_req.parm = hmc_fcn;
+	vchnl_req.parm_len = sizeof(*hmc_fcn);
+	vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+	ret_code = vchnl_vf_send_get_hmc_fcn_req(dev, &vchnl_req);
+	if (!ret_code) {
+		ret_code = i40iw_vf_wait_vchnl_resp(dev);
+		if (!ret_code)
+			ret_code = vchnl_req.ret_code;
+		else
+			dev->vchnl_up = false;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s Send message failed 0x%0x\n", __func__, ret_code);
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_add_hmc_objs - Add HMC Object
+ * @dev: IWARP device pointer
+ * @rsrc_type: HMC Resource type
+ * @start_index: Starting index of the objects to be added
+ * @rsrc_count: Number of resources to be added
+ */
+enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
+						   enum i40iw_hmc_rsrc_type rsrc_type,
+						   u32 start_index,
+						   u32 rsrc_count)
+{
+	struct i40iw_virtchnl_req vchnl_req;
+	enum i40iw_status_code ret_code;
+
+	memset(&vchnl_req, 0, sizeof(vchnl_req));
+	vchnl_req.dev = dev;
+	vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+	ret_code = vchnl_vf_send_add_hmc_objs_req(dev,
+						  &vchnl_req,
+						  rsrc_type,
+						  start_index,
+						  rsrc_count);
+	if (!ret_code) {
+		ret_code = i40iw_vf_wait_vchnl_resp(dev);
+		if (!ret_code)
+			ret_code = vchnl_req.ret_code;
+		else
+			dev->vchnl_up = false;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s Send message failed 0x%0x\n", __func__, ret_code);
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_del_hmc_obj - del HMC obj
+ * @dev: IWARP device pointer
+ * @rsrc_type: HMC Resource type
+ * @start_index: Starting index of the object to delete
+ * @rsrc_count: Number of resources to be delete
+ */
+enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
+						  enum i40iw_hmc_rsrc_type rsrc_type,
+						  u32 start_index,
+						  u32 rsrc_count)
+{
+	struct i40iw_virtchnl_req vchnl_req;
+	enum i40iw_status_code ret_code;
+
+	memset(&vchnl_req, 0, sizeof(vchnl_req));
+	vchnl_req.dev = dev;
+	vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+	ret_code = vchnl_vf_send_del_hmc_objs_req(dev,
+						  &vchnl_req,
+						  rsrc_type,
+						  start_index,
+						  rsrc_count);
+	if (!ret_code) {
+		ret_code = i40iw_vf_wait_vchnl_resp(dev);
+		if (!ret_code)
+			ret_code = vchnl_req.ret_code;
+		else
+			dev->vchnl_up = false;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s Send message failed 0x%0x\n", __func__, ret_code);
+	}
+	return ret_code;
+}
+
+/**
+ * i40iw_vchnl_vf_get_pe_stats - Get PE stats
+ * @dev: IWARP device pointer
+ * @hw_stats: HW stats struct
+ */
+enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
+						   struct i40iw_dev_hw_stats *hw_stats)
+{
+	struct i40iw_virtchnl_req  vchnl_req;
+	enum i40iw_status_code ret_code;
+
+	memset(&vchnl_req, 0, sizeof(vchnl_req));
+	vchnl_req.dev = dev;
+	vchnl_req.parm = hw_stats;
+	vchnl_req.parm_len = sizeof(*hw_stats);
+	vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
+	ret_code = vchnl_vf_send_get_pe_stats_req(dev, &vchnl_req);
+	if (!ret_code) {
+		ret_code = i40iw_vf_wait_vchnl_resp(dev);
+		if (!ret_code)
+			ret_code = vchnl_req.ret_code;
+		else
+			dev->vchnl_up = false;
+	} else {
+		i40iw_debug(dev, I40IW_DEBUG_VIRT,
+			    "%s Send message failed 0x%0x\n", __func__, ret_code);
+	}
+	return ret_code;
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h
new file mode 100644
index 000000000000..24886ef08293
--- /dev/null
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h
@@ -0,0 +1,124 @@
+/*******************************************************************************
+*
+* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenFabrics.org BSD license below:
+*
+*   Redistribution and use in source and binary forms, with or
+*   without modification, are permitted provided that the following
+*   conditions are met:
+*
+*    - Redistributions of source code must retain the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer.
+*
+*    - Redistributions in binary form must reproduce the above
+*	copyright notice, this list of conditions and the following
+*	disclaimer in the documentation and/or other materials
+*	provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#ifndef I40IW_VIRTCHNL_H
+#define I40IW_VIRTCHNL_H
+
+#include "i40iw_hmc.h"
+
+#pragma pack(push, 1)
+
+struct i40iw_virtchnl_op_buf {
+	u16 iw_op_code;
+	u16 iw_op_ver;
+	u16 iw_chnl_buf_len;
+	u16 rsvd;
+	u64 iw_chnl_op_ctx;
+	/* Member alignment MUST be maintained above this location */
+	u8 iw_chnl_buf[1];
+};
+
+struct i40iw_virtchnl_resp_buf {
+	u64 iw_chnl_op_ctx;
+	u16 iw_chnl_buf_len;
+	s16 iw_op_ret_code;
+	/* Member alignment MUST be maintained above this location */
+	u16 rsvd[2];
+	u8 iw_chnl_buf[1];
+};
+
+enum i40iw_virtchnl_ops {
+	I40IW_VCHNL_OP_GET_VER = 0,
+	I40IW_VCHNL_OP_GET_HMC_FCN,
+	I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE,
+	I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE,
+	I40IW_VCHNL_OP_GET_STATS
+};
+
+#define I40IW_VCHNL_OP_GET_VER_V0 0
+#define I40IW_VCHNL_OP_GET_HMC_FCN_V0 0
+#define I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE_V0 0
+#define I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE_V0 0
+#define I40IW_VCHNL_OP_GET_STATS_V0 0
+#define I40IW_VCHNL_CHNL_VER_V0 0
+
+struct i40iw_dev_hw_stats;
+
+struct i40iw_virtchnl_hmc_obj_range {
+	u16 obj_type;
+	u16 rsvd;
+	u32 start_index;
+	u32 obj_count;
+};
+
+enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
+					   u32 vf_id,
+					   u8 *msg,
+					   u16 len);
+
+enum i40iw_status_code i40iw_vchnl_recv_vf(struct i40iw_sc_dev *dev,
+					   u32 vf_id,
+					   u8 *msg,
+					   u16 len);
+
+struct i40iw_virtchnl_req {
+	struct i40iw_sc_dev *dev;
+	struct i40iw_virtchnl_op_buf *vchnl_msg;
+	void *parm;
+	u32 vf_id;
+	u16 parm_len;
+	s16 ret_code;
+};
+
+#pragma pack(pop)
+
+enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
+					      u32 *vchnl_ver);
+
+enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
+						  u16 *hmc_fcn);
+
+enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
+						   enum i40iw_hmc_rsrc_type rsrc_type,
+						   u32 start_index,
+						   u32 rsrc_count);
+
+enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
+						  enum i40iw_hmc_rsrc_type rsrc_type,
+						  u32 start_index,
+						  u32 rsrc_count);
+
+enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
+						   struct i40iw_dev_hw_stats *hw_stats);
+#endif
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 4e851889355a..7493a83acd28 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 
-mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o
+mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
new file mode 100644
index 000000000000..c1b9de800fe5
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_ib.h"
+
+static inline u32 mlx_to_net_policy(enum port_state_policy mlx_policy)
+{
+	switch (mlx_policy) {
+	case MLX5_POLICY_DOWN:
+		return IFLA_VF_LINK_STATE_DISABLE;
+	case MLX5_POLICY_UP:
+		return IFLA_VF_LINK_STATE_ENABLE;
+	case MLX5_POLICY_FOLLOW:
+		return IFLA_VF_LINK_STATE_AUTO;
+	default:
+		return __IFLA_VF_LINK_STATE_MAX;
+	}
+}
+
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+			  struct ifla_vf_info *info)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *rep;
+	int err;
+
+	rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+	if (!rep)
+		return -ENOMEM;
+
+	err = mlx5_query_hca_vport_context(mdev, 1, 1,  vf + 1, rep);
+	if (err) {
+		mlx5_ib_warn(dev, "failed to query port policy for vf %d (%d)\n",
+			     vf, err);
+		goto free;
+	}
+	memset(info, 0, sizeof(*info));
+	info->linkstate = mlx_to_net_policy(rep->policy);
+	if (info->linkstate == __IFLA_VF_LINK_STATE_MAX)
+		err = -EINVAL;
+
+free:
+	kfree(rep);
+	return err;
+}
+
+static inline enum port_state_policy net_to_mlx_policy(int policy)
+{
+	switch (policy) {
+	case IFLA_VF_LINK_STATE_DISABLE:
+		return MLX5_POLICY_DOWN;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		return MLX5_POLICY_UP;
+	case IFLA_VF_LINK_STATE_AUTO:
+		return MLX5_POLICY_FOLLOW;
+	default:
+		return MLX5_POLICY_INVALID;
+	}
+}
+
+int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
+			      u8 port, int state)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->policy = net_to_mlx_policy(state);
+	if (in->policy == MLX5_POLICY_INVALID) {
+		err = -EINVAL;
+		goto out;
+	}
+	in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+
+out:
+	kfree(in);
+	return err;
+}
+
+int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
+			 u8 port, struct ifla_vf_stats *stats)
+{
+	int out_sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+	struct mlx5_core_dev *mdev;
+	struct mlx5_ib_dev *dev;
+	void *out;
+	int err;
+
+	dev = to_mdev(device);
+	mdev = dev->mdev;
+
+	out = kzalloc(out_sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	err = mlx5_core_query_vport_counter(mdev, true, vf, port, out, out_sz);
+	if (err)
+		goto ex;
+
+	stats->rx_packets = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.packets);
+	stats->tx_packets = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.packets);
+	stats->rx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.octets);
+	stats->tx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.octets);
+	stats->multicast = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_multicast.packets);
+
+ex:
+	kfree(out);
+	return err;
+}
+
+static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
+	in->node_guid = guid;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+	kfree(in);
+	return err;
+}
+
+static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
+	in->port_guid = guid;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+	kfree(in);
+	return err;
+}
+
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+			u64 guid, int type)
+{
+	if (type == IFLA_VF_IB_NODE_GUID)
+		return set_vf_node_guid(device, vf, port, guid);
+	else if (type == IFLA_VF_IB_PORT_GUID)
+		return set_vf_port_guid(device, vf, port, guid);
+
+	return -EINVAL;
+}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 41d8a0036465..1534af113058 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -208,7 +208,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
 		if (!out_cnt)
 			return IB_MAD_RESULT_FAILURE;
 
-		err = mlx5_core_query_vport_counter(dev->mdev, 0,
+		err = mlx5_core_query_vport_counter(dev->mdev, 0, 0,
 						    port_num, out_cnt, sz);
 		if (!err)
 			pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index edd8b8741846..5acf346e048e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -284,7 +284,7 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
 
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
-	return !dev->mdev->issi;
+	return !MLX5_CAP_GEN(dev->mdev, ib_virt);
 }
 
 enum {
@@ -563,6 +563,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	if (MLX5_CAP_GEN(mdev, cd))
 		props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
 
+	if (!mlx5_core_is_pf(mdev))
+		props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
+
 	return 0;
 }
 
@@ -700,6 +703,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
 	props->qkey_viol_cntr	= rep->qkey_violation_counter;
 	props->subnet_timeout	= rep->subnet_timeout;
 	props->init_type_reply	= rep->init_type_reply;
+	props->grh_required	= rep->grh_required;
 
 	err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
 	if (err)
@@ -2350,6 +2354,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.map_mr_sg		= mlx5_ib_map_mr_sg;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
+	if (mlx5_core_is_pf(mdev)) {
+		dev->ib_dev.get_vf_config	= mlx5_ib_get_vf_config;
+		dev->ib_dev.set_vf_link_state	= mlx5_ib_set_vf_link_state;
+		dev->ib_dev.get_vf_stats	= mlx5_ib_get_vf_stats;
+		dev->ib_dev.set_vf_guid		= mlx5_ib_set_vf_guid;
+	}
 
 	mlx5_ib_internal_fill_odp_caps(dev);
 
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 76b2b42e0535..f16c818ad2e6 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -776,6 +776,14 @@ void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
 void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
 void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
 			      unsigned long end);
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
+			  u8 port, struct ifla_vf_info *info);
+int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
+			      u8 port, int state);
+int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
+			 u8 port, struct ifla_vf_stats *stats);
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+			u64 guid, int type);
 
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 7d2e42dd6926..6c00d04b8b28 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -472,8 +472,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
 		goto out;
 	}
 
-	ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
-			     pages, NULL);
+	ret = get_user_pages(uaddr & PAGE_MASK, 1, 1, 0, pages, NULL);
 	if (ret < 0)
 		goto out;
 
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
index 495be09781b1..e0fdb9201423 100644
--- a/drivers/infiniband/hw/qib/Kconfig
+++ b/drivers/infiniband/hw/qib/Kconfig
@@ -1,6 +1,6 @@
 config INFINIBAND_QIB
 	tristate "Intel PCIe HCA support"
-	depends on 64BIT
+	depends on 64BIT && INFINIBAND_RDMAVT
 	---help---
 	This is a low-level driver for Intel PCIe QLE InfiniBand host
 	channel adapters.  This driver does not support the Intel
diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
index 57f8103e51f8..79ebd79e8405 100644
--- a/drivers/infiniband/hw/qib/Makefile
+++ b/drivers/infiniband/hw/qib/Makefile
@@ -1,11 +1,11 @@
 obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o
 
-ib_qib-y := qib_cq.o qib_diag.o qib_dma.o qib_driver.o qib_eeprom.o \
-	qib_file_ops.o qib_fs.o qib_init.o qib_intr.o qib_keys.o \
-	qib_mad.o qib_mmap.o qib_mr.o qib_pcie.o qib_pio_copy.o \
-	qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \
+ib_qib-y := qib_diag.o qib_driver.o qib_eeprom.o \
+	qib_file_ops.o qib_fs.o qib_init.o qib_intr.o \
+	qib_mad.o qib_pcie.o qib_pio_copy.o \
+	qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o \
 	qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \
-	qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \
+	qib_user_pages.o qib_user_sdma.o qib_iba7220.o \
 	qib_sd7220.o qib_iba7322.o qib_verbs.o
 
 # 6120 has no fallback if no MSI interrupts, others can do INTx
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 7df16f74bb45..bbf0a163aeab 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -52,6 +52,7 @@
 #include <linux/kref.h>
 #include <linux/sched.h>
 #include <linux/kthread.h>
+#include <rdma/rdma_vt.h>
 
 #include "qib_common.h"
 #include "qib_verbs.h"
@@ -229,9 +230,6 @@ struct qib_ctxtdata {
 	u8 redirect_seq_cnt;
 	/* ctxt rcvhdrq head offset */
 	u32 head;
-	/* lookaside fields */
-	struct qib_qp *lookaside_qp;
-	u32 lookaside_qpn;
 	/* QPs waiting for context processing */
 	struct list_head qp_wait_list;
 #ifdef CONFIG_DEBUG_FS
@@ -240,7 +238,7 @@ struct qib_ctxtdata {
 #endif
 };
 
-struct qib_sge_state;
+struct rvt_sge_state;
 
 struct qib_sdma_txreq {
 	int                 flags;
@@ -258,14 +256,14 @@ struct qib_sdma_desc {
 
 struct qib_verbs_txreq {
 	struct qib_sdma_txreq   txreq;
-	struct qib_qp           *qp;
-	struct qib_swqe         *wqe;
+	struct rvt_qp           *qp;
+	struct rvt_swqe         *wqe;
 	u32                     dwords;
 	u16                     hdr_dwords;
 	u16                     hdr_inx;
 	struct qib_pio_header	*align_buf;
-	struct qib_mregion	*mr;
-	struct qib_sge_state    *ss;
+	struct rvt_mregion	*mr;
+	struct rvt_sge_state    *ss;
 };
 
 #define QIB_SDMA_TXREQ_F_USELARGEBUF  0x1
@@ -1096,8 +1094,6 @@ struct qib_devdata {
 	u16 psxmitwait_check_rate;
 	/* high volume overflow errors defered to tasklet */
 	struct tasklet_struct error_tasklet;
-	/* per device cq worker */
-	struct kthread_worker *worker;
 
 	int assigned_node_id; /* NUMA node closest to HCA */
 };
@@ -1135,8 +1131,9 @@ extern spinlock_t qib_devs_lock;
 extern struct qib_devdata *qib_lookup(int unit);
 extern u32 qib_cpulist_count;
 extern unsigned long *qib_cpulist;
-
+extern u16 qpt_mask;
 extern unsigned qib_cc_table_size;
+
 int qib_init(struct qib_devdata *, int);
 int init_chip_wc_pat(struct qib_devdata *dd, u32);
 int qib_enable_wc(struct qib_devdata *dd);
@@ -1323,7 +1320,7 @@ void __qib_sdma_intr(struct qib_pportdata *);
 void qib_sdma_intr(struct qib_pportdata *);
 void qib_user_sdma_send_desc(struct qib_pportdata *dd,
 			struct list_head *pktlist);
-int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
+int qib_sdma_verbs_send(struct qib_pportdata *, struct rvt_sge_state *,
 			u32, struct qib_verbs_txreq *);
 /* ppd->sdma_lock should be locked before calling this. */
 int qib_sdma_make_progress(struct qib_pportdata *dd);
@@ -1454,6 +1451,8 @@ u64 qib_sps_ints(void);
 dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long,
 			  size_t, int);
 const char *qib_get_unit_name(int unit);
+const char *qib_get_card_name(struct rvt_dev_info *rdi);
+struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi);
 
 /*
  * Flush write combining store buffers (if present) and perform a write
@@ -1540,4 +1539,14 @@ struct qib_hwerror_msgs {
 void qib_format_hwerrors(u64 hwerrs,
 			 const struct qib_hwerror_msgs *hwerrmsgs,
 			 size_t nhwerrmsgs, char *msg, size_t lmsg);
+
+void qib_stop_send_queue(struct rvt_qp *qp);
+void qib_quiesce_qp(struct rvt_qp *qp);
+void qib_flush_qp_waiters(struct rvt_qp *qp);
+int qib_mtu_to_path_mtu(u32 mtu);
+u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
+void qib_notify_error_qp(struct rvt_qp *qp);
+int qib_get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			   struct ib_qp_attr *attr);
+
 #endif                          /* _QIB_KERNEL_H */
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index 4fb78abd8ba1..1d6e63eb1146 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -742,14 +742,11 @@ struct qib_tid_session_member {
 #define SIZE_OF_CRC 1
 
 #define QIB_DEFAULT_P_KEY 0xFFFF
-#define QIB_PERMISSIVE_LID 0xFFFF
 #define QIB_AETH_CREDIT_SHIFT 24
 #define QIB_AETH_CREDIT_MASK 0x1F
 #define QIB_AETH_CREDIT_INVAL 0x1F
 #define QIB_PSN_MASK 0xFFFFFF
 #define QIB_MSN_MASK 0xFFFFFF
-#define QIB_QPN_MASK 0xFFFFFF
-#define QIB_MULTICAST_LID_BASE 0xC000
 #define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK
 #define QIB_MULTICAST_QPN 0xFFFFFF
 
diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c
deleted file mode 100644
index 2b45d0b02300..000000000000
--- a/drivers/infiniband/hw/qib/qib_cq.c
+++ /dev/null
@@ -1,545 +0,0 @@
-/*
- * Copyright (c) 2013 Intel Corporation.  All rights reserved.
- * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/kthread.h>
-
-#include "qib_verbs.h"
-#include "qib.h"
-
-/**
- * qib_cq_enter - add a new entry to the completion queue
- * @cq: completion queue
- * @entry: work completion entry to add
- * @sig: true if @entry is a solicitated entry
- *
- * This may be called with qp->s_lock held.
- */
-void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
-{
-	struct qib_cq_wc *wc;
-	unsigned long flags;
-	u32 head;
-	u32 next;
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	/*
-	 * Note that the head pointer might be writable by user processes.
-	 * Take care to verify it is a sane value.
-	 */
-	wc = cq->queue;
-	head = wc->head;
-	if (head >= (unsigned) cq->ibcq.cqe) {
-		head = cq->ibcq.cqe;
-		next = 0;
-	} else
-		next = head + 1;
-	if (unlikely(next == wc->tail)) {
-		spin_unlock_irqrestore(&cq->lock, flags);
-		if (cq->ibcq.event_handler) {
-			struct ib_event ev;
-
-			ev.device = cq->ibcq.device;
-			ev.element.cq = &cq->ibcq;
-			ev.event = IB_EVENT_CQ_ERR;
-			cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
-		}
-		return;
-	}
-	if (cq->ip) {
-		wc->uqueue[head].wr_id = entry->wr_id;
-		wc->uqueue[head].status = entry->status;
-		wc->uqueue[head].opcode = entry->opcode;
-		wc->uqueue[head].vendor_err = entry->vendor_err;
-		wc->uqueue[head].byte_len = entry->byte_len;
-		wc->uqueue[head].ex.imm_data =
-			(__u32 __force)entry->ex.imm_data;
-		wc->uqueue[head].qp_num = entry->qp->qp_num;
-		wc->uqueue[head].src_qp = entry->src_qp;
-		wc->uqueue[head].wc_flags = entry->wc_flags;
-		wc->uqueue[head].pkey_index = entry->pkey_index;
-		wc->uqueue[head].slid = entry->slid;
-		wc->uqueue[head].sl = entry->sl;
-		wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
-		wc->uqueue[head].port_num = entry->port_num;
-		/* Make sure entry is written before the head index. */
-		smp_wmb();
-	} else
-		wc->kqueue[head] = *entry;
-	wc->head = next;
-
-	if (cq->notify == IB_CQ_NEXT_COMP ||
-	    (cq->notify == IB_CQ_SOLICITED &&
-	     (solicited || entry->status != IB_WC_SUCCESS))) {
-		struct kthread_worker *worker;
-		/*
-		 * This will cause send_complete() to be called in
-		 * another thread.
-		 */
-		smp_rmb();
-		worker = cq->dd->worker;
-		if (likely(worker)) {
-			cq->notify = IB_CQ_NONE;
-			cq->triggered++;
-			queue_kthread_work(worker, &cq->comptask);
-		}
-	}
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-}
-
-/**
- * qib_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-	struct qib_cq *cq = to_icq(ibcq);
-	struct qib_cq_wc *wc;
-	unsigned long flags;
-	int npolled;
-	u32 tail;
-
-	/* The kernel can only poll a kernel completion queue */
-	if (cq->ip) {
-		npolled = -EINVAL;
-		goto bail;
-	}
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	wc = cq->queue;
-	tail = wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (tail == wc->head)
-			break;
-		/* The kernel doesn't need a RMB since it has the lock. */
-		*entry = wc->kqueue[tail];
-		if (tail >= cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	wc->tail = tail;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-	return npolled;
-}
-
-static void send_complete(struct kthread_work *work)
-{
-	struct qib_cq *cq = container_of(work, struct qib_cq, comptask);
-
-	/*
-	 * The completion handler will most likely rearm the notification
-	 * and poll for all pending entries.  If a new completion entry
-	 * is added while we are in this routine, queue_work()
-	 * won't call us again until we return so we check triggered to
-	 * see if we need to call the handler again.
-	 */
-	for (;;) {
-		u8 triggered = cq->triggered;
-
-		/*
-		 * IPoIB connected mode assumes the callback is from a
-		 * soft IRQ. We simulate this by blocking "bottom halves".
-		 * See the implementation for ipoib_cm_handle_tx_wc(),
-		 * netif_tx_lock_bh() and netif_tx_lock().
-		 */
-		local_bh_disable();
-		cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
-		local_bh_enable();
-
-		if (cq->triggered == triggered)
-			return;
-	}
-}
-
-/**
- * qib_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
- * @attr: creation attributes
- * @context: unused by the QLogic_IB driver
- * @udata: user data for libibverbs.so
- *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
- * Called by ib_create_cq() in the generic verbs code.
- */
-struct ib_cq *qib_create_cq(struct ib_device *ibdev,
-			    const struct ib_cq_init_attr *attr,
-			    struct ib_ucontext *context,
-			    struct ib_udata *udata)
-{
-	int entries = attr->cqe;
-	struct qib_ibdev *dev = to_idev(ibdev);
-	struct qib_cq *cq;
-	struct qib_cq_wc *wc;
-	struct ib_cq *ret;
-	u32 sz;
-
-	if (attr->flags)
-		return ERR_PTR(-EINVAL);
-
-	if (entries < 1 || entries > ib_qib_max_cqes) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	/* Allocate the completion queue structure. */
-	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-	if (!cq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
-	/*
-	 * Allocate the completion queue entries and head/tail pointers.
-	 * This is allocated separately so that it can be resized and
-	 * also mapped into user space.
-	 * We need to use vmalloc() in order to support mmap and large
-	 * numbers of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
-	else
-		sz += sizeof(struct ib_wc) * (entries + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_cq;
-	}
-
-	/*
-	 * Return the address of the WC as the offset to mmap.
-	 * See qib_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-
-		cq->ip = qib_create_mmap_info(dev, sz, context, wc);
-		if (!cq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wc;
-		}
-
-		err = ib_copy_to_udata(udata, &cq->ip->offset,
-				       sizeof(cq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		cq->ip = NULL;
-
-	spin_lock(&dev->n_cqs_lock);
-	if (dev->n_cqs_allocated == ib_qib_max_cqs) {
-		spin_unlock(&dev->n_cqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_cqs_allocated++;
-	spin_unlock(&dev->n_cqs_lock);
-
-	if (cq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	/*
-	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
-	 * The number of entries should be >= the number requested or return
-	 * an error.
-	 */
-	cq->dd = dd_from_dev(dev);
-	cq->ibcq.cqe = entries;
-	cq->notify = IB_CQ_NONE;
-	cq->triggered = 0;
-	spin_lock_init(&cq->lock);
-	init_kthread_work(&cq->comptask, send_complete);
-	wc->head = 0;
-	wc->tail = 0;
-	cq->queue = wc;
-
-	ret = &cq->ibcq;
-
-	goto done;
-
-bail_ip:
-	kfree(cq->ip);
-bail_wc:
-	vfree(wc);
-bail_cq:
-	kfree(cq);
-done:
-	return ret;
-}
-
-/**
- * qib_destroy_cq - destroy a completion queue
- * @ibcq: the completion queue to destroy.
- *
- * Returns 0 for success.
- *
- * Called by ib_destroy_cq() in the generic verbs code.
- */
-int qib_destroy_cq(struct ib_cq *ibcq)
-{
-	struct qib_ibdev *dev = to_idev(ibcq->device);
-	struct qib_cq *cq = to_icq(ibcq);
-
-	flush_kthread_work(&cq->comptask);
-	spin_lock(&dev->n_cqs_lock);
-	dev->n_cqs_allocated--;
-	spin_unlock(&dev->n_cqs_lock);
-	if (cq->ip)
-		kref_put(&cq->ip->ref, qib_release_mmap_info);
-	else
-		vfree(cq->queue);
-	kfree(cq);
-
-	return 0;
-}
-
-/**
- * qib_req_notify_cq - change the notification type for a completion queue
- * @ibcq: the completion queue
- * @notify_flags: the type of notification to request
- *
- * Returns 0 for success.
- *
- * This may be called from interrupt context.  Also called by
- * ib_req_notify_cq() in the generic verbs code.
- */
-int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-	struct qib_cq *cq = to_icq(ibcq);
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&cq->lock, flags);
-	/*
-	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
-	 */
-	if (cq->notify != IB_CQ_NEXT_COMP)
-		cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
-
-	if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
-	    cq->queue->head != cq->queue->tail)
-		ret = 1;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-	return ret;
-}
-
-/**
- * qib_resize_cq - change the size of the CQ
- * @ibcq: the completion queue
- *
- * Returns 0 for success.
- */
-int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
-{
-	struct qib_cq *cq = to_icq(ibcq);
-	struct qib_cq_wc *old_wc;
-	struct qib_cq_wc *wc;
-	u32 head, tail, n;
-	int ret;
-	u32 sz;
-
-	if (cqe < 1 || cqe > ib_qib_max_cqes) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
-	else
-		sz += sizeof(struct ib_wc) * (cqe + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	/* Check that we can write the offset to mmap. */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		__u64 offset = 0;
-
-		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
-		if (ret)
-			goto bail_free;
-	}
-
-	spin_lock_irq(&cq->lock);
-	/*
-	 * Make sure head and tail are sane since they
-	 * might be user writable.
-	 */
-	old_wc = cq->queue;
-	head = old_wc->head;
-	if (head > (u32) cq->ibcq.cqe)
-		head = (u32) cq->ibcq.cqe;
-	tail = old_wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	if (head < tail)
-		n = cq->ibcq.cqe + 1 + head - tail;
-	else
-		n = head - tail;
-	if (unlikely((u32)cqe < n)) {
-		ret = -EINVAL;
-		goto bail_unlock;
-	}
-	for (n = 0; tail != head; n++) {
-		if (cq->ip)
-			wc->uqueue[n] = old_wc->uqueue[tail];
-		else
-			wc->kqueue[n] = old_wc->kqueue[tail];
-		if (tail == (u32) cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	cq->ibcq.cqe = cqe;
-	wc->head = n;
-	wc->tail = 0;
-	cq->queue = wc;
-	spin_unlock_irq(&cq->lock);
-
-	vfree(old_wc);
-
-	if (cq->ip) {
-		struct qib_ibdev *dev = to_idev(ibcq->device);
-		struct qib_mmap_info *ip = cq->ip;
-
-		qib_update_mmap_info(dev, ip, sz, wc);
-
-		/*
-		 * Return the offset to mmap.
-		 * See qib_mmap() for details.
-		 */
-		if (udata && udata->outlen >= sizeof(__u64)) {
-			ret = ib_copy_to_udata(udata, &ip->offset,
-					       sizeof(ip->offset));
-			if (ret)
-				goto bail;
-		}
-
-		spin_lock_irq(&dev->pending_lock);
-		if (list_empty(&ip->pending_mmaps))
-			list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = 0;
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&cq->lock);
-bail_free:
-	vfree(wc);
-bail:
-	return ret;
-}
-
-int qib_cq_init(struct qib_devdata *dd)
-{
-	int ret = 0;
-	int cpu;
-	struct task_struct *task;
-
-	if (dd->worker)
-		return 0;
-	dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
-	if (!dd->worker)
-		return -ENOMEM;
-	init_kthread_worker(dd->worker);
-	task = kthread_create_on_node(
-		kthread_worker_fn,
-		dd->worker,
-		dd->assigned_node_id,
-		"qib_cq%d", dd->unit);
-	if (IS_ERR(task))
-		goto task_fail;
-	cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
-	kthread_bind(task, cpu);
-	wake_up_process(task);
-out:
-	return ret;
-task_fail:
-	ret = PTR_ERR(task);
-	kfree(dd->worker);
-	dd->worker = NULL;
-	goto out;
-}
-
-void qib_cq_exit(struct qib_devdata *dd)
-{
-	struct kthread_worker *worker;
-
-	worker = dd->worker;
-	if (!worker)
-		return;
-	/* blocks future queuing from send_complete() */
-	dd->worker = NULL;
-	smp_wmb();
-	flush_kthread_worker(worker);
-	kthread_stop(worker->task);
-	kfree(worker);
-}
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index f58fdc3d25a2..67ee6438cf59 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -90,6 +90,22 @@ const char *qib_get_unit_name(int unit)
 	return iname;
 }
 
+const char *qib_get_card_name(struct rvt_dev_info *rdi)
+{
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
+	return qib_get_unit_name(dd->unit);
+}
+
+struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi)
+{
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
+	return dd->pcidev;
+}
+
 /*
  * Return count of units with at least one port ACTIVE.
  */
@@ -306,7 +322,9 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 		struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
 		struct qib_other_headers *ohdr = NULL;
 		struct qib_ibport *ibp = &ppd->ibport_data;
-		struct qib_qp *qp = NULL;
+		struct qib_devdata *dd = ppd->dd;
+		struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+		struct rvt_qp *qp = NULL;
 		u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
 		u16 lid  = be16_to_cpu(hdr->lrh[1]);
 		int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
@@ -319,7 +337,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 		if (tlen < 24)
 			goto drop;
 
-		if (lid < QIB_MULTICAST_LID_BASE) {
+		if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 			lid &= ~((1 << ppd->lmc) - 1);
 			if (unlikely(lid != ppd->lid))
 				goto drop;
@@ -346,13 +364,16 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 		psn = be32_to_cpu(ohdr->bth[2]);
 
 		/* Get the destination QP number. */
-		qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+		qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 		if (qp_num != QIB_MULTICAST_QPN) {
 			int ruc_res;
 
-			qp = qib_lookup_qpn(ibp, qp_num);
-			if (!qp)
+			rcu_read_lock();
+			qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+			if (!qp) {
+				rcu_read_unlock();
 				goto drop;
+			}
 
 			/*
 			 * Handle only RC QPs - for other QP types drop error
@@ -361,9 +382,9 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 			spin_lock(&qp->r_lock);
 
 			/* Check for valid receive state. */
-			if (!(ib_qib_state_ops[qp->state] &
-			      QIB_PROCESS_RECV_OK)) {
-				ibp->n_pkt_drops++;
+			if (!(ib_rvt_state_ops[qp->state] &
+			      RVT_PROCESS_RECV_OK)) {
+				ibp->rvp.n_pkt_drops++;
 				goto unlock;
 			}
 
@@ -383,7 +404,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 				    IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
 					diff = qib_cmp24(psn, qp->r_psn);
 					if (!qp->r_nak_state && diff >= 0) {
-						ibp->n_rc_seqnak++;
+						ibp->rvp.n_rc_seqnak++;
 						qp->r_nak_state =
 							IB_NAK_PSN_ERROR;
 						/* Use the expected PSN. */
@@ -398,7 +419,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 						 */
 						if (list_empty(&qp->rspwait)) {
 							qp->r_flags |=
-								QIB_R_RSP_NAK;
+								RVT_R_RSP_NAK;
 							atomic_inc(
 								&qp->refcount);
 							list_add_tail(
@@ -419,12 +440,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 
 unlock:
 			spin_unlock(&qp->r_lock);
-			/*
-			 * Notify qib_destroy_qp() if it is waiting
-			 * for us to finish.
-			 */
-			if (atomic_dec_and_test(&qp->refcount))
-				wake_up(&qp->wait);
+			rcu_read_unlock();
 		} /* Unicast QP */
 	} /* Valid packet with TIDErr */
 
@@ -456,7 +472,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
 	u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
 	int last;
 	u64 lval;
-	struct qib_qp *qp, *nqp;
+	struct rvt_qp *qp, *nqp;
 
 	l = rcd->head;
 	rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
@@ -549,15 +565,6 @@ move_along:
 			updegr = 0;
 		}
 	}
-	/*
-	 * Notify qib_destroy_qp() if it is waiting
-	 * for lookaside_qp to finish.
-	 */
-	if (rcd->lookaside_qp) {
-		if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
-			wake_up(&rcd->lookaside_qp->wait);
-		rcd->lookaside_qp = NULL;
-	}
 
 	rcd->head = l;
 
@@ -567,17 +574,17 @@ move_along:
 	 */
 	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
 		list_del_init(&qp->rspwait);
-		if (qp->r_flags & QIB_R_RSP_NAK) {
-			qp->r_flags &= ~QIB_R_RSP_NAK;
+		if (qp->r_flags & RVT_R_RSP_NAK) {
+			qp->r_flags &= ~RVT_R_RSP_NAK;
 			qib_send_rc_ack(qp);
 		}
-		if (qp->r_flags & QIB_R_RSP_SEND) {
+		if (qp->r_flags & RVT_R_RSP_SEND) {
 			unsigned long flags;
 
-			qp->r_flags &= ~QIB_R_RSP_SEND;
+			qp->r_flags &= ~RVT_R_RSP_SEND;
 			spin_lock_irqsave(&qp->s_lock, flags);
-			if (ib_qib_state_ops[qp->state] &
-					QIB_PROCESS_OR_FLUSH_SEND)
+			if (ib_rvt_state_ops[qp->state] &
+					RVT_PROCESS_OR_FLUSH_SEND)
 				qib_schedule_send(qp);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 		}
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 4b927809d1a1..a3733f25280f 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -2956,13 +2956,13 @@ static void pma_6120_timer(unsigned long data)
 	struct qib_ibport *ibp = &ppd->ibport_data;
 	unsigned long flags;
 
-	spin_lock_irqsave(&ibp->lock, flags);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
 	if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED) {
 		cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
 		qib_snapshot_counters(ppd, &cs->sword, &cs->rword,
 				      &cs->spkts, &cs->rpkts, &cs->xmit_wait);
 		mod_timer(&cs->pma_timer,
-			  jiffies + usecs_to_jiffies(ibp->pma_sample_interval));
+		      jiffies + usecs_to_jiffies(ibp->rvp.pma_sample_interval));
 	} else if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
 		u64 ta, tb, tc, td, te;
 
@@ -2975,11 +2975,11 @@ static void pma_6120_timer(unsigned long data)
 		cs->rpkts = td - cs->rpkts;
 		cs->xmit_wait = te - cs->xmit_wait;
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 }
 
 /*
- * Note that the caller has the ibp->lock held.
+ * Note that the caller has the ibp->rvp.lock held.
  */
 static void qib_set_cntr_6120_sample(struct qib_pportdata *ppd, u32 intv,
 				     u32 start)
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 6c8ff10101c0..82d7c4bf5970 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2910,8 +2910,6 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
 			spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
 			qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data);
 		}
-		if (dd->pport[i].ibport_data.smi_ah)
-			ib_destroy_ah(&dd->pport[i].ibport_data.smi_ah->ibah);
 	}
 }
 
@@ -5497,7 +5495,7 @@ static void try_7322_ipg(struct qib_pportdata *ppd)
 	unsigned delay;
 	int ret;
 
-	agent = ibp->send_agent;
+	agent = ibp->rvp.send_agent;
 	if (!agent)
 		goto retry;
 
@@ -5515,7 +5513,7 @@ static void try_7322_ipg(struct qib_pportdata *ppd)
 			ret = PTR_ERR(ah);
 		else {
 			send_buf->ah = ah;
-			ibp->smi_ah = to_iah(ah);
+			ibp->smi_ah = ibah_to_rvtah(ah);
 			ret = 0;
 		}
 	} else {
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 4ff340fe904f..3f062f0dd9d8 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -42,6 +42,7 @@
 #ifdef CONFIG_INFINIBAND_QIB_DCA
 #include <linux/dca.h>
 #endif
+#include <rdma/rdma_vt.h>
 
 #include "qib.h"
 #include "qib_common.h"
@@ -244,6 +245,13 @@ int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
 		alloc_percpu(struct qib_pma_counters);
 	if (!ppd->ibport_data.pmastats)
 		return -ENOMEM;
+	ppd->ibport_data.rvp.rc_acks = alloc_percpu(u64);
+	ppd->ibport_data.rvp.rc_qacks = alloc_percpu(u64);
+	ppd->ibport_data.rvp.rc_delayed_comp = alloc_percpu(u64);
+	if (!(ppd->ibport_data.rvp.rc_acks) ||
+	    !(ppd->ibport_data.rvp.rc_qacks) ||
+	    !(ppd->ibport_data.rvp.rc_delayed_comp))
+		return -ENOMEM;
 
 	if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
 		goto bail;
@@ -449,8 +457,6 @@ static int loadtime_init(struct qib_devdata *dd)
 	init_timer(&dd->intrchk_timer);
 	dd->intrchk_timer.function = verify_interrupt;
 	dd->intrchk_timer.data = (unsigned long) dd;
-
-	ret = qib_cq_init(dd);
 done:
 	return ret;
 }
@@ -631,6 +637,9 @@ wq_error:
 static void qib_free_pportdata(struct qib_pportdata *ppd)
 {
 	free_percpu(ppd->ibport_data.pmastats);
+	free_percpu(ppd->ibport_data.rvp.rc_acks);
+	free_percpu(ppd->ibport_data.rvp.rc_qacks);
+	free_percpu(ppd->ibport_data.rvp.rc_delayed_comp);
 	ppd->ibport_data.pmastats = NULL;
 }
 
@@ -1081,7 +1090,7 @@ void qib_free_devdata(struct qib_devdata *dd)
 	qib_dbg_ibdev_exit(&dd->verbs_dev);
 #endif
 	free_percpu(dd->int_counter);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 }
 
 u64 qib_int_counter(struct qib_devdata *dd)
@@ -1120,9 +1129,12 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
 {
 	unsigned long flags;
 	struct qib_devdata *dd;
-	int ret;
+	int ret, nports;
 
-	dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
+	/* extra is * number of ports */
+	nports = extra / sizeof(struct qib_pportdata);
+	dd = (struct qib_devdata *)rvt_alloc_device(sizeof(*dd) + extra,
+						    nports);
 	if (!dd)
 		return ERR_PTR(-ENOMEM);
 
@@ -1171,7 +1183,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
 bail:
 	if (!list_empty(&dd->list))
 		list_del_init(&dd->list);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 	return ERR_PTR(ret);
 }
 
@@ -1421,7 +1433,6 @@ static void cleanup_device_data(struct qib_devdata *dd)
 	}
 	kfree(tmp);
 	kfree(dd->boardname);
-	qib_cq_exit(dd);
 }
 
 /*
diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c
index 086616d071b9..a014fd4cd076 100644
--- a/drivers/infiniband/hw/qib/qib_intr.c
+++ b/drivers/infiniband/hw/qib/qib_intr.c
@@ -74,7 +74,7 @@ static void signal_ib_event(struct qib_pportdata *ppd, enum ib_event_type ev)
 	struct ib_event event;
 	struct qib_devdata *dd = ppd->dd;
 
-	event.device = &dd->verbs_dev.ibdev;
+	event.device = &dd->verbs_dev.rdi.ibdev;
 	event.element.port_num = ppd->port;
 	event.event = ev;
 	ib_dispatch_event(&event);
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index d725c565518d..2c3c93572c17 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -46,20 +46,20 @@
  *
  */
 
-int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
+int qib_alloc_lkey(struct rvt_mregion *mr, int dma_region)
 {
 	unsigned long flags;
 	u32 r;
 	u32 n;
 	int ret = 0;
 	struct qib_ibdev *dev = to_idev(mr->pd->device);
-	struct qib_lkey_table *rkt = &dev->lk_table;
+	struct rvt_lkey_table *rkt = &dev->lk_table;
 
 	spin_lock_irqsave(&rkt->lock, flags);
 
 	/* special case for dma_mr lkey == 0 */
 	if (dma_region) {
-		struct qib_mregion *tmr;
+		struct rvt_mregion *tmr;
 
 		tmr = rcu_access_pointer(dev->dma_mr);
 		if (!tmr) {
@@ -90,8 +90,8 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
 	 * bits are capped in qib_verbs.c to insure enough bits
 	 * for generation number
 	 */
-	mr->lkey = (r << (32 - ib_qib_lkey_table_size)) |
-		((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen)
+	mr->lkey = (r << (32 - ib_rvt_lkey_table_size)) |
+		((((1 << (24 - ib_rvt_lkey_table_size)) - 1) & rkt->gen)
 		 << 8);
 	if (mr->lkey == 0) {
 		mr->lkey |= 1 << 8;
@@ -114,13 +114,13 @@ bail:
  * qib_free_lkey - free an lkey
  * @mr: mr to free from tables
  */
-void qib_free_lkey(struct qib_mregion *mr)
+void qib_free_lkey(struct rvt_mregion *mr)
 {
 	unsigned long flags;
 	u32 lkey = mr->lkey;
 	u32 r;
 	struct qib_ibdev *dev = to_idev(mr->pd->device);
-	struct qib_lkey_table *rkt = &dev->lk_table;
+	struct rvt_lkey_table *rkt = &dev->lk_table;
 
 	spin_lock_irqsave(&rkt->lock, flags);
 	if (!mr->lkey_published)
@@ -128,7 +128,7 @@ void qib_free_lkey(struct qib_mregion *mr)
 	if (lkey == 0)
 		RCU_INIT_POINTER(dev->dma_mr, NULL);
 	else {
-		r = lkey >> (32 - ib_qib_lkey_table_size);
+		r = lkey >> (32 - ib_rvt_lkey_table_size);
 		RCU_INIT_POINTER(rkt->table[r], NULL);
 	}
 	qib_put_mr(mr);
@@ -138,105 +138,6 @@ out:
 }
 
 /**
- * qib_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @pd: protection domain
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * increments the reference count upon success
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
-		struct qib_sge *isge, struct ib_sge *sge, int acc)
-{
-	struct qib_mregion *mr;
-	unsigned n, m;
-	size_t off;
-
-	/*
-	 * We use LKEY == zero for kernel virtual addresses
-	 * (see qib_get_dma_mr and qib_dma.c).
-	 */
-	rcu_read_lock();
-	if (sge->lkey == 0) {
-		struct qib_ibdev *dev = to_idev(pd->ibpd.device);
-
-		if (pd->user)
-			goto bail;
-		mr = rcu_dereference(dev->dma_mr);
-		if (!mr)
-			goto bail;
-		if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
-			goto bail;
-		rcu_read_unlock();
-
-		isge->mr = mr;
-		isge->vaddr = (void *) sge->addr;
-		isge->length = sge->length;
-		isge->sge_length = sge->length;
-		isge->m = 0;
-		isge->n = 0;
-		goto ok;
-	}
-	mr = rcu_dereference(
-		rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]);
-	if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
-		goto bail;
-
-	off = sge->addr - mr->user_base;
-	if (unlikely(sge->addr < mr->user_base ||
-		     off + sge->length > mr->length ||
-		     (mr->access_flags & acc) != acc))
-		goto bail;
-	if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
-		goto bail;
-	rcu_read_unlock();
-
-	off += mr->offset;
-	if (mr->page_shift) {
-		/*
-		page sizes are uniform power of 2 so no loop is necessary
-		entries_spanned_by_off is the number of times the loop below
-		would have executed.
-		*/
-		size_t entries_spanned_by_off;
-
-		entries_spanned_by_off = off >> mr->page_shift;
-		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off/QIB_SEGSZ;
-		n = entries_spanned_by_off%QIB_SEGSZ;
-	} else {
-		m = 0;
-		n = 0;
-		while (off >= mr->map[m]->segs[n].length) {
-			off -= mr->map[m]->segs[n].length;
-			n++;
-			if (n >= QIB_SEGSZ) {
-				m++;
-				n = 0;
-			}
-		}
-	}
-	isge->mr = mr;
-	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	isge->length = mr->map[m]->segs[n].length - off;
-	isge->sge_length = sge->length;
-	isge->m = m;
-	isge->n = n;
-ok:
-	return 1;
-bail:
-	rcu_read_unlock();
-	return 0;
-}
-
-/**
  * qib_rkey_ok - check the IB virtual address, length, and RKEY
  * @qp: qp for validation
  * @sge: SGE state
@@ -249,11 +150,11 @@ bail:
  *
  * increments the reference count upon success
  */
-int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
+int qib_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
 		u32 len, u64 vaddr, u32 rkey, int acc)
 {
-	struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-	struct qib_mregion *mr;
+	struct rvt_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
+	struct rvt_mregion *mr;
 	unsigned n, m;
 	size_t off;
 
@@ -263,7 +164,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 	 */
 	rcu_read_lock();
 	if (rkey == 0) {
-		struct qib_pd *pd = to_ipd(qp->ibqp.pd);
+		struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
 		struct qib_ibdev *dev = to_idev(pd->ibpd.device);
 
 		if (pd->user)
@@ -285,7 +186,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 	}
 
 	mr = rcu_dereference(
-		rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]);
+		rkt->table[(rkey >> (32 - ib_rvt_lkey_table_size))]);
 	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
 		goto bail;
 
@@ -308,15 +209,15 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 
 		entries_spanned_by_off = off >> mr->page_shift;
 		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off/QIB_SEGSZ;
-		n = entries_spanned_by_off%QIB_SEGSZ;
+		m = entries_spanned_by_off / RVT_SEGSZ;
+		n = entries_spanned_by_off % RVT_SEGSZ;
 	} else {
 		m = 0;
 		n = 0;
 		while (off >= mr->map[m]->segs[n].length) {
 			off -= mr->map[m]->segs[n].length;
 			n++;
-			if (n >= QIB_SEGSZ) {
+			if (n >= RVT_SEGSZ) {
 				m++;
 				n = 0;
 			}
@@ -335,58 +236,3 @@ bail:
 	return 0;
 }
 
-/*
- * Initialize the memory region specified by the work request.
- */
-int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr)
-{
-	struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-	struct qib_pd *pd = to_ipd(qp->ibqp.pd);
-	struct qib_mr *mr = to_imr(wr->mr);
-	struct qib_mregion *mrg;
-	u32 key = wr->key;
-	unsigned i, n, m;
-	int ret = -EINVAL;
-	unsigned long flags;
-	u64 *page_list;
-	size_t ps;
-
-	spin_lock_irqsave(&rkt->lock, flags);
-	if (pd->user || key == 0)
-		goto bail;
-
-	mrg = rcu_dereference_protected(
-		rkt->table[(key >> (32 - ib_qib_lkey_table_size))],
-		lockdep_is_held(&rkt->lock));
-	if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd))
-		goto bail;
-
-	if (mr->npages > mrg->max_segs)
-		goto bail;
-
-	ps = mr->ibmr.page_size;
-	if (mr->ibmr.length > ps * mr->npages)
-		goto bail;
-
-	mrg->user_base = mr->ibmr.iova;
-	mrg->iova = mr->ibmr.iova;
-	mrg->lkey = key;
-	mrg->length = mr->ibmr.length;
-	mrg->access_flags = wr->access;
-	page_list = mr->pages;
-	m = 0;
-	n = 0;
-	for (i = 0; i < mr->npages; i++) {
-		mrg->map[m]->segs[n].vaddr = (void *) page_list[i];
-		mrg->map[m]->segs[n].length = ps;
-		if (++n == QIB_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-
-	ret = 0;
-bail:
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	return ret;
-}
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 9625e7c438e5..0bd18375d7df 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -70,7 +70,7 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
 	unsigned long flags;
 	unsigned long timeout;
 
-	agent = ibp->send_agent;
+	agent = ibp->rvp.send_agent;
 	if (!agent)
 		return;
 
@@ -79,7 +79,8 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
 		return;
 
 	/* o14-2 */
-	if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout))
+	if (ibp->rvp.trap_timeout &&
+	    time_before(jiffies, ibp->rvp.trap_timeout))
 		return;
 
 	send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR,
@@ -93,42 +94,42 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
 	smp->class_version = 1;
 	smp->method = IB_MGMT_METHOD_TRAP;
-	ibp->tid++;
-	smp->tid = cpu_to_be64(ibp->tid);
+	ibp->rvp.tid++;
+	smp->tid = cpu_to_be64(ibp->rvp.tid);
 	smp->attr_id = IB_SMP_ATTR_NOTICE;
 	/* o14-1: smp->mkey = 0; */
 	memcpy(smp->data, data, len);
 
-	spin_lock_irqsave(&ibp->lock, flags);
-	if (!ibp->sm_ah) {
-		if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	if (!ibp->rvp.sm_ah) {
+		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
 			struct ib_ah *ah;
 
-			ah = qib_create_qp0_ah(ibp, ibp->sm_lid);
+			ah = qib_create_qp0_ah(ibp, ibp->rvp.sm_lid);
 			if (IS_ERR(ah))
 				ret = PTR_ERR(ah);
 			else {
 				send_buf->ah = ah;
-				ibp->sm_ah = to_iah(ah);
+				ibp->rvp.sm_ah = ibah_to_rvtah(ah);
 				ret = 0;
 			}
 		} else
 			ret = -EINVAL;
 	} else {
-		send_buf->ah = &ibp->sm_ah->ibah;
+		send_buf->ah = &ibp->rvp.sm_ah->ibah;
 		ret = 0;
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 	if (!ret)
 		ret = ib_post_send_mad(send_buf, NULL);
 	if (!ret) {
 		/* 4.096 usec. */
-		timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000;
-		ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout);
+		timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
+		ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
 	} else {
 		ib_free_send_mad(send_buf);
-		ibp->trap_timeout = 0;
+		ibp->rvp.trap_timeout = 0;
 	}
 }
 
@@ -141,10 +142,10 @@ void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
 	struct ib_mad_notice_attr data;
 
 	if (trap_num == IB_NOTICE_TRAP_BAD_PKEY)
-		ibp->pkey_violations++;
+		ibp->rvp.pkey_violations++;
 	else
-		ibp->qkey_violations++;
-	ibp->n_pkt_drops++;
+		ibp->rvp.qkey_violations++;
+	ibp->rvp.n_pkt_drops++;
 
 	/* Send violation trap */
 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
@@ -205,8 +206,11 @@ static void qib_bad_mkey(struct qib_ibport *ibp, struct ib_smp *smp)
 /*
  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
  */
-void qib_cap_mask_chg(struct qib_ibport *ibp)
+void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
 {
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = dd_from_dev(ibdev);
+	struct qib_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
 	struct ib_mad_notice_attr data;
 
 	data.generic_type = IB_NOTICE_TYPE_INFO;
@@ -217,8 +221,8 @@ void qib_cap_mask_chg(struct qib_ibport *ibp)
 	data.toggle_count = 0;
 	memset(&data.details, 0, sizeof(data.details));
 	data.details.ntc_144.lid = data.issuer_lid;
-	data.details.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags);
-
+	data.details.ntc_144.new_cap_mask =
+					cpu_to_be32(ibp->rvp.port_cap_flags);
 	qib_send_trap(ibp, &data, sizeof(data));
 }
 
@@ -409,37 +413,38 @@ static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags)
 	int ret = 0;
 
 	/* Is the mkey in the process of expiring? */
-	if (ibp->mkey_lease_timeout &&
-	    time_after_eq(jiffies, ibp->mkey_lease_timeout)) {
+	if (ibp->rvp.mkey_lease_timeout &&
+	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
 		/* Clear timeout and mkey protection field. */
-		ibp->mkey_lease_timeout = 0;
-		ibp->mkeyprot = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
+		ibp->rvp.mkeyprot = 0;
 	}
 
-	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->mkey == 0 ||
-	    ibp->mkey == smp->mkey)
+	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
+	    ibp->rvp.mkey == smp->mkey)
 		valid_mkey = 1;
 
 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
-	if (valid_mkey && ibp->mkey_lease_timeout &&
+	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
 	    (smp->method == IB_MGMT_METHOD_GET ||
 	     smp->method == IB_MGMT_METHOD_SET ||
 	     smp->method == IB_MGMT_METHOD_TRAP_REPRESS))
-		ibp->mkey_lease_timeout = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
 
 	if (!valid_mkey) {
 		switch (smp->method) {
 		case IB_MGMT_METHOD_GET:
 			/* Bad mkey not a violation below level 2 */
-			if (ibp->mkeyprot < 2)
+			if (ibp->rvp.mkeyprot < 2)
 				break;
 		case IB_MGMT_METHOD_SET:
 		case IB_MGMT_METHOD_TRAP_REPRESS:
-			if (ibp->mkey_violations != 0xFFFF)
-				++ibp->mkey_violations;
-			if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
-				ibp->mkey_lease_timeout = jiffies +
-					ibp->mkey_lease_period * HZ;
+			if (ibp->rvp.mkey_violations != 0xFFFF)
+				++ibp->rvp.mkey_violations;
+			if (!ibp->rvp.mkey_lease_timeout &&
+			    ibp->rvp.mkey_lease_period)
+				ibp->rvp.mkey_lease_timeout = jiffies +
+					ibp->rvp.mkey_lease_period * HZ;
 			/* Generate a trap notice. */
 			qib_bad_mkey(ibp, smp);
 			ret = 1;
@@ -489,15 +494,15 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 
 	/* Only return the mkey if the protection field allows it. */
 	if (!(smp->method == IB_MGMT_METHOD_GET &&
-	      ibp->mkey != smp->mkey &&
-	      ibp->mkeyprot == 1))
-		pip->mkey = ibp->mkey;
-	pip->gid_prefix = ibp->gid_prefix;
+	      ibp->rvp.mkey != smp->mkey &&
+	      ibp->rvp.mkeyprot == 1))
+		pip->mkey = ibp->rvp.mkey;
+	pip->gid_prefix = ibp->rvp.gid_prefix;
 	pip->lid = cpu_to_be16(ppd->lid);
-	pip->sm_lid = cpu_to_be16(ibp->sm_lid);
-	pip->cap_mask = cpu_to_be32(ibp->port_cap_flags);
+	pip->sm_lid = cpu_to_be16(ibp->rvp.sm_lid);
+	pip->cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
 	/* pip->diag_code; */
-	pip->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period);
+	pip->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
 	pip->local_port_num = port;
 	pip->link_width_enabled = ppd->link_width_enabled;
 	pip->link_width_supported = ppd->link_width_supported;
@@ -508,7 +513,7 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	pip->portphysstate_linkdown =
 		(dd->f_ibphys_portstate(ppd->lastibcstat) << 4) |
 		(get_linkdowndefaultstate(ppd) ? 1 : 2);
-	pip->mkeyprot_resv_lmc = (ibp->mkeyprot << 6) | ppd->lmc;
+	pip->mkeyprot_resv_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
 	pip->linkspeedactive_enabled = (ppd->link_speed_active << 4) |
 		ppd->link_speed_enabled;
 	switch (ppd->ibmtu) {
@@ -529,9 +534,9 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 		mtu = IB_MTU_256;
 		break;
 	}
-	pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->sm_sl;
+	pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->rvp.sm_sl;
 	pip->vlcap_inittype = ppd->vls_supported << 4;  /* InitType = 0 */
-	pip->vl_high_limit = ibp->vl_high_limit;
+	pip->vl_high_limit = ibp->rvp.vl_high_limit;
 	pip->vl_arb_high_cap =
 		dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_CAP);
 	pip->vl_arb_low_cap =
@@ -542,20 +547,20 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	/* pip->vlstallcnt_hoqlife; */
 	pip->operationalvl_pei_peo_fpi_fpo =
 		dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OP_VLS) << 4;
-	pip->mkey_violations = cpu_to_be16(ibp->mkey_violations);
+	pip->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
 	/* P_KeyViolations are counted by hardware. */
-	pip->pkey_violations = cpu_to_be16(ibp->pkey_violations);
-	pip->qkey_violations = cpu_to_be16(ibp->qkey_violations);
+	pip->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
+	pip->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
 	/* Only the hardware GUID is supported for now */
 	pip->guid_cap = QIB_GUIDS_PER_PORT;
-	pip->clientrereg_resv_subnetto = ibp->subnet_timeout;
+	pip->clientrereg_resv_subnetto = ibp->rvp.subnet_timeout;
 	/* 32.768 usec. response time (guessing) */
 	pip->resv_resptimevalue = 3;
 	pip->localphyerrors_overrunerrors =
 		(get_phyerrthreshold(ppd) << 4) |
 		get_overrunthreshold(ppd);
 	/* pip->max_credit_hint; */
-	if (ibp->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
+	if (ibp->rvp.port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
 		u32 v;
 
 		v = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKLATENCY);
@@ -685,13 +690,13 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	event.device = ibdev;
 	event.element.port_num = port;
 
-	ibp->mkey = pip->mkey;
-	ibp->gid_prefix = pip->gid_prefix;
-	ibp->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
+	ibp->rvp.mkey = pip->mkey;
+	ibp->rvp.gid_prefix = pip->gid_prefix;
+	ibp->rvp.mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
 
 	lid = be16_to_cpu(pip->lid);
 	/* Must be a valid unicast LID address. */
-	if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE)
+	if (lid == 0 || lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
 		smp->status |= IB_SMP_INVALID_FIELD;
 	else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
 		if (ppd->lid != lid)
@@ -706,21 +711,21 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	smlid = be16_to_cpu(pip->sm_lid);
 	msl = pip->neighbormtu_mastersmsl & 0xF;
 	/* Must be a valid unicast LID address. */
-	if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE)
+	if (smlid == 0 || smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
 		smp->status |= IB_SMP_INVALID_FIELD;
-	else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
-		spin_lock_irqsave(&ibp->lock, flags);
-		if (ibp->sm_ah) {
-			if (smlid != ibp->sm_lid)
-				ibp->sm_ah->attr.dlid = smlid;
-			if (msl != ibp->sm_sl)
-				ibp->sm_ah->attr.sl = msl;
+	else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
+		spin_lock_irqsave(&ibp->rvp.lock, flags);
+		if (ibp->rvp.sm_ah) {
+			if (smlid != ibp->rvp.sm_lid)
+				ibp->rvp.sm_ah->attr.dlid = smlid;
+			if (msl != ibp->rvp.sm_sl)
+				ibp->rvp.sm_ah->attr.sl = msl;
 		}
-		spin_unlock_irqrestore(&ibp->lock, flags);
-		if (smlid != ibp->sm_lid)
-			ibp->sm_lid = smlid;
-		if (msl != ibp->sm_sl)
-			ibp->sm_sl = msl;
+		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+		if (smlid != ibp->rvp.sm_lid)
+			ibp->rvp.sm_lid = smlid;
+		if (msl != ibp->rvp.sm_sl)
+			ibp->rvp.sm_sl = msl;
 		event.event = IB_EVENT_SM_CHANGE;
 		ib_dispatch_event(&event);
 	}
@@ -768,10 +773,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 		smp->status |= IB_SMP_INVALID_FIELD;
 	}
 
-	ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
-	ibp->vl_high_limit = pip->vl_high_limit;
+	ibp->rvp.mkeyprot = pip->mkeyprot_resv_lmc >> 6;
+	ibp->rvp.vl_high_limit = pip->vl_high_limit;
 	(void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_LIMIT,
-				    ibp->vl_high_limit);
+				    ibp->rvp.vl_high_limit);
 
 	mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF);
 	if (mtu == -1)
@@ -789,13 +794,13 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	}
 
 	if (pip->mkey_violations == 0)
-		ibp->mkey_violations = 0;
+		ibp->rvp.mkey_violations = 0;
 
 	if (pip->pkey_violations == 0)
-		ibp->pkey_violations = 0;
+		ibp->rvp.pkey_violations = 0;
 
 	if (pip->qkey_violations == 0)
-		ibp->qkey_violations = 0;
+		ibp->rvp.qkey_violations = 0;
 
 	ore = pip->localphyerrors_overrunerrors;
 	if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF))
@@ -804,7 +809,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	if (set_overrunthreshold(ppd, (ore & 0xF)))
 		smp->status |= IB_SMP_INVALID_FIELD;
 
-	ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
+	ibp->rvp.subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
 
 	/*
 	 * Do the port state change now that the other link parameters
@@ -1028,7 +1033,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys)
 		(void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
 
 		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev.ibdev;
+		event.device = &dd->verbs_dev.rdi.ibdev;
 		event.element.port_num = port;
 		ib_dispatch_event(&event);
 	}
@@ -1062,7 +1067,7 @@ static int subn_get_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev,
 
 	memset(smp->data, 0, sizeof(smp->data));
 
-	if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP))
+	if (!(ibp->rvp.port_cap_flags & IB_PORT_SL_MAP_SUP))
 		smp->status |= IB_SMP_UNSUP_METHOD;
 	else
 		for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2)
@@ -1078,7 +1083,7 @@ static int subn_set_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev,
 	u8 *p = (u8 *) smp->data;
 	unsigned i;
 
-	if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP)) {
+	if (!(ibp->rvp.port_cap_flags & IB_PORT_SL_MAP_SUP)) {
 		smp->status |= IB_SMP_UNSUP_METHOD;
 		return reply(smp);
 	}
@@ -1195,20 +1200,20 @@ static int pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		goto bail;
 	}
-	spin_lock_irqsave(&ibp->lock, flags);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
 	p->tick = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PMA_TICKS);
 	p->sample_status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
 	p->counter_width = 4;   /* 32 bit counters */
 	p->counter_mask0_9 = COUNTER_MASK0_9;
-	p->sample_start = cpu_to_be32(ibp->pma_sample_start);
-	p->sample_interval = cpu_to_be32(ibp->pma_sample_interval);
-	p->tag = cpu_to_be16(ibp->pma_tag);
-	p->counter_select[0] = ibp->pma_counter_select[0];
-	p->counter_select[1] = ibp->pma_counter_select[1];
-	p->counter_select[2] = ibp->pma_counter_select[2];
-	p->counter_select[3] = ibp->pma_counter_select[3];
-	p->counter_select[4] = ibp->pma_counter_select[4];
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	p->sample_start = cpu_to_be32(ibp->rvp.pma_sample_start);
+	p->sample_interval = cpu_to_be32(ibp->rvp.pma_sample_interval);
+	p->tag = cpu_to_be16(ibp->rvp.pma_tag);
+	p->counter_select[0] = ibp->rvp.pma_counter_select[0];
+	p->counter_select[1] = ibp->rvp.pma_counter_select[1];
+	p->counter_select[2] = ibp->rvp.pma_counter_select[2];
+	p->counter_select[3] = ibp->rvp.pma_counter_select[3];
+	p->counter_select[4] = ibp->rvp.pma_counter_select[4];
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 bail:
 	return reply((struct ib_smp *) pmp);
@@ -1233,7 +1238,7 @@ static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
 		goto bail;
 	}
 
-	spin_lock_irqsave(&ibp->lock, flags);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
 
 	/* Port Sampling code owns the PS* HW counters */
 	xmit_flags = ppd->cong_stats.flags;
@@ -1242,18 +1247,18 @@ static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
 	if (status == IB_PMA_SAMPLE_STATUS_DONE ||
 	    (status == IB_PMA_SAMPLE_STATUS_RUNNING &&
 	     xmit_flags == IB_PMA_CONG_HW_CONTROL_TIMER)) {
-		ibp->pma_sample_start = be32_to_cpu(p->sample_start);
-		ibp->pma_sample_interval = be32_to_cpu(p->sample_interval);
-		ibp->pma_tag = be16_to_cpu(p->tag);
-		ibp->pma_counter_select[0] = p->counter_select[0];
-		ibp->pma_counter_select[1] = p->counter_select[1];
-		ibp->pma_counter_select[2] = p->counter_select[2];
-		ibp->pma_counter_select[3] = p->counter_select[3];
-		ibp->pma_counter_select[4] = p->counter_select[4];
-		dd->f_set_cntr_sample(ppd, ibp->pma_sample_interval,
-				      ibp->pma_sample_start);
+		ibp->rvp.pma_sample_start = be32_to_cpu(p->sample_start);
+		ibp->rvp.pma_sample_interval = be32_to_cpu(p->sample_interval);
+		ibp->rvp.pma_tag = be16_to_cpu(p->tag);
+		ibp->rvp.pma_counter_select[0] = p->counter_select[0];
+		ibp->rvp.pma_counter_select[1] = p->counter_select[1];
+		ibp->rvp.pma_counter_select[2] = p->counter_select[2];
+		ibp->rvp.pma_counter_select[3] = p->counter_select[3];
+		ibp->rvp.pma_counter_select[4] = p->counter_select[4];
+		dd->f_set_cntr_sample(ppd, ibp->rvp.pma_sample_interval,
+				      ibp->rvp.pma_sample_start);
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 	ret = pma_get_portsamplescontrol(pmp, ibdev, port);
 
@@ -1357,8 +1362,8 @@ static int pma_get_portsamplesresult(struct ib_pma_mad *pmp,
 	int i;
 
 	memset(pmp->data, 0, sizeof(pmp->data));
-	spin_lock_irqsave(&ibp->lock, flags);
-	p->tag = cpu_to_be16(ibp->pma_tag);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	p->tag = cpu_to_be16(ibp->rvp.pma_tag);
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
 		p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 	else {
@@ -1373,11 +1378,11 @@ static int pma_get_portsamplesresult(struct ib_pma_mad *pmp,
 			ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
 		}
 	}
-	for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++)
+	for (i = 0; i < ARRAY_SIZE(ibp->rvp.pma_counter_select); i++)
 		p->counter[i] = cpu_to_be32(
 			get_cache_hw_sample_counters(
-				ppd, ibp->pma_counter_select[i]));
-	spin_unlock_irqrestore(&ibp->lock, flags);
+				ppd, ibp->rvp.pma_counter_select[i]));
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 	return reply((struct ib_smp *) pmp);
 }
@@ -1397,8 +1402,8 @@ static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
 
 	/* Port Sampling code owns the PS* HW counters */
 	memset(pmp->data, 0, sizeof(pmp->data));
-	spin_lock_irqsave(&ibp->lock, flags);
-	p->tag = cpu_to_be16(ibp->pma_tag);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	p->tag = cpu_to_be16(ibp->rvp.pma_tag);
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
 		p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 	else {
@@ -1415,11 +1420,11 @@ static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
 			ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
 		}
 	}
-	for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++)
+	for (i = 0; i < ARRAY_SIZE(ibp->rvp.pma_counter_select); i++)
 		p->counter[i] = cpu_to_be64(
 			get_cache_hw_sample_counters(
-				ppd, ibp->pma_counter_select[i]));
-	spin_unlock_irqrestore(&ibp->lock, flags);
+				ppd, ibp->rvp.pma_counter_select[i]));
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 	return reply((struct ib_smp *) pmp);
 }
@@ -1453,7 +1458,7 @@ static int pma_get_portcounters(struct ib_pma_mad *pmp,
 	cntrs.excessive_buffer_overrun_errors -=
 		ibp->z_excessive_buffer_overrun_errors;
 	cntrs.vl15_dropped -= ibp->z_vl15_dropped;
-	cntrs.vl15_dropped += ibp->n_vl15_dropped;
+	cntrs.vl15_dropped += ibp->rvp.n_vl15_dropped;
 
 	memset(pmp->data, 0, sizeof(pmp->data));
 
@@ -1546,9 +1551,9 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 
 	qib_get_counters(ppd, &cntrs);
-	spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+	spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
 	xmit_wait_counter = xmit_wait_get_value_delta(ppd);
-	spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+	spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
 
 	/* Adjust counters for any resets done. */
 	cntrs.symbol_error_counter -= ibp->z_symbol_error_counter;
@@ -1564,7 +1569,7 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
 	cntrs.excessive_buffer_overrun_errors -=
 		ibp->z_excessive_buffer_overrun_errors;
 	cntrs.vl15_dropped -= ibp->z_vl15_dropped;
-	cntrs.vl15_dropped += ibp->n_vl15_dropped;
+	cntrs.vl15_dropped += ibp->rvp.n_vl15_dropped;
 	cntrs.port_xmit_data -= ibp->z_port_xmit_data;
 	cntrs.port_rcv_data -= ibp->z_port_rcv_data;
 	cntrs.port_xmit_packets -= ibp->z_port_xmit_packets;
@@ -1743,7 +1748,7 @@ static int pma_set_portcounters(struct ib_pma_mad *pmp,
 			cntrs.excessive_buffer_overrun_errors;
 
 	if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
-		ibp->n_vl15_dropped = 0;
+		ibp->rvp.n_vl15_dropped = 0;
 		ibp->z_vl15_dropped = cntrs.vl15_dropped;
 	}
 
@@ -1778,11 +1783,11 @@ static int pma_set_portcounters_cong(struct ib_pma_mad *pmp,
 	ret = pma_get_portcounters_cong(pmp, ibdev, port);
 
 	if (counter_select & IB_PMA_SEL_CONG_XMIT) {
-		spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+		spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
 		ppd->cong_stats.counter = 0;
 		dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL,
 				      0x0);
-		spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+		spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
 	}
 	if (counter_select & IB_PMA_SEL_CONG_PORT_DATA) {
 		ibp->z_port_xmit_data = cntrs.port_xmit_data;
@@ -1806,7 +1811,7 @@ static int pma_set_portcounters_cong(struct ib_pma_mad *pmp,
 			cntrs.local_link_integrity_errors;
 		ibp->z_excessive_buffer_overrun_errors =
 			cntrs.excessive_buffer_overrun_errors;
-		ibp->n_vl15_dropped = 0;
+		ibp->rvp.n_vl15_dropped = 0;
 		ibp->z_vl15_dropped = cntrs.vl15_dropped;
 	}
 
@@ -1916,12 +1921,12 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
 			ret = subn_get_vl_arb(smp, ibdev, port);
 			goto bail;
 		case IB_SMP_ATTR_SM_INFO:
-			if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) {
 				ret = IB_MAD_RESULT_SUCCESS |
 					IB_MAD_RESULT_CONSUMED;
 				goto bail;
 			}
-			if (ibp->port_cap_flags & IB_PORT_SM) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM) {
 				ret = IB_MAD_RESULT_SUCCESS;
 				goto bail;
 			}
@@ -1950,12 +1955,12 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
 			ret = subn_set_vl_arb(smp, ibdev, port);
 			goto bail;
 		case IB_SMP_ATTR_SM_INFO:
-			if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) {
 				ret = IB_MAD_RESULT_SUCCESS |
 					IB_MAD_RESULT_CONSUMED;
 				goto bail;
 			}
-			if (ibp->port_cap_flags & IB_PORT_SM) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM) {
 				ret = IB_MAD_RESULT_SUCCESS;
 				goto bail;
 			}
@@ -2443,12 +2448,6 @@ bail:
 	return ret;
 }
 
-static void send_handler(struct ib_mad_agent *agent,
-			 struct ib_mad_send_wc *mad_send_wc)
-{
-	ib_free_send_mad(mad_send_wc->send_buf);
-}
-
 static void xmit_wait_timer_func(unsigned long opaque)
 {
 	struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
@@ -2456,7 +2455,7 @@ static void xmit_wait_timer_func(unsigned long opaque)
 	unsigned long flags;
 	u8 status;
 
-	spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+	spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_SAMPLE) {
 		status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
 		if (status == IB_PMA_SAMPLE_STATUS_DONE) {
@@ -2469,74 +2468,35 @@ static void xmit_wait_timer_func(unsigned long opaque)
 	ppd->cong_stats.counter = xmit_wait_get_value_delta(ppd);
 	dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, 0x0);
 done:
-	spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+	spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
 	mod_timer(&ppd->cong_stats.timer, jiffies + HZ);
 }
 
-int qib_create_agents(struct qib_ibdev *dev)
+void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx)
 {
-	struct qib_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct qib_ibport *ibp;
-	int p;
-	int ret;
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
 
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
-					      NULL, 0, send_handler,
-					      NULL, NULL, 0);
-		if (IS_ERR(agent)) {
-			ret = PTR_ERR(agent);
-			goto err;
-		}
-
-		/* Initialize xmit_wait structure */
-		dd->pport[p].cong_stats.counter = 0;
-		init_timer(&dd->pport[p].cong_stats.timer);
-		dd->pport[p].cong_stats.timer.function = xmit_wait_timer_func;
-		dd->pport[p].cong_stats.timer.data =
-			(unsigned long)(&dd->pport[p]);
-		dd->pport[p].cong_stats.timer.expires = 0;
-		add_timer(&dd->pport[p].cong_stats.timer);
-
-		ibp->send_agent = agent;
-	}
-
-	return 0;
-
-err:
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-	}
-
-	return ret;
+	/* Initialize xmit_wait structure */
+	dd->pport[port_idx].cong_stats.counter = 0;
+	init_timer(&dd->pport[port_idx].cong_stats.timer);
+	dd->pport[port_idx].cong_stats.timer.function = xmit_wait_timer_func;
+	dd->pport[port_idx].cong_stats.timer.data =
+		(unsigned long)(&dd->pport[port_idx]);
+	dd->pport[port_idx].cong_stats.timer.expires = 0;
+	add_timer(&dd->pport[port_idx].cong_stats.timer);
 }
 
-void qib_free_agents(struct qib_ibdev *dev)
+void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx)
 {
-	struct qib_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct qib_ibport *ibp;
-	int p;
-
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-		if (ibp->sm_ah) {
-			ib_destroy_ah(&ibp->sm_ah->ibah);
-			ibp->sm_ah = NULL;
-		}
-		if (dd->pport[p].cong_stats.timer.data)
-			del_timer_sync(&dd->pport[p].cong_stats.timer);
-	}
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
+
+	if (dd->pport[port_idx].cong_stats.timer.data)
+		del_timer_sync(&dd->pport[port_idx].cong_stats.timer);
+
+	if (dd->pport[port_idx].ibport_data.smi_ah)
+		ib_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah);
 }
diff --git a/drivers/infiniband/hw/qib/qib_mmap.c b/drivers/infiniband/hw/qib/qib_mmap.c
deleted file mode 100644
index 34927b700b0e..000000000000
--- a/drivers/infiniband/hw/qib/qib_mmap.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <asm/pgtable.h>
-
-#include "qib_verbs.h"
-
-/**
- * qib_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct qib_mmap_info
- */
-void qib_release_mmap_info(struct kref *ref)
-{
-	struct qib_mmap_info *ip =
-		container_of(ref, struct qib_mmap_info, ref);
-	struct qib_ibdev *dev = to_idev(ip->context->device);
-
-	spin_lock_irq(&dev->pending_lock);
-	list_del(&ip->pending_mmaps);
-	spin_unlock_irq(&dev->pending_lock);
-
-	vfree(ip->obj);
-	kfree(ip);
-}
-
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void qib_vma_open(struct vm_area_struct *vma)
-{
-	struct qib_mmap_info *ip = vma->vm_private_data;
-
-	kref_get(&ip->ref);
-}
-
-static void qib_vma_close(struct vm_area_struct *vma)
-{
-	struct qib_mmap_info *ip = vma->vm_private_data;
-
-	kref_put(&ip->ref, qib_release_mmap_info);
-}
-
-static const struct vm_operations_struct qib_vm_ops = {
-	.open =     qib_vma_open,
-	.close =    qib_vma_close,
-};
-
-/**
- * qib_mmap - create a new mmap region
- * @context: the IB user context of the process making the mmap() call
- * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
- */
-int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	struct qib_ibdev *dev = to_idev(context->device);
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned long size = vma->vm_end - vma->vm_start;
-	struct qib_mmap_info *ip, *pp;
-	int ret = -EINVAL;
-
-	/*
-	 * Search the device's list of objects waiting for a mmap call.
-	 * Normally, this list is very short since a call to create a
-	 * CQ, QP, or SRQ is soon followed by a call to mmap().
-	 */
-	spin_lock_irq(&dev->pending_lock);
-	list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
-				 pending_mmaps) {
-		/* Only the creator is allowed to mmap the object */
-		if (context != ip->context || (__u64) offset != ip->offset)
-			continue;
-		/* Don't allow a mmap larger than the object. */
-		if (size > ip->size)
-			break;
-
-		list_del_init(&ip->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-
-		ret = remap_vmalloc_range(vma, ip->obj, 0);
-		if (ret)
-			goto done;
-		vma->vm_ops = &qib_vm_ops;
-		vma->vm_private_data = ip;
-		qib_vma_open(vma);
-		goto done;
-	}
-	spin_unlock_irq(&dev->pending_lock);
-done:
-	return ret;
-}
-
-/*
- * Allocate information for qib_mmap
- */
-struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev,
-					   u32 size,
-					   struct ib_ucontext *context,
-					   void *obj) {
-	struct qib_mmap_info *ip;
-
-	ip = kmalloc(sizeof(*ip), GFP_KERNEL);
-	if (!ip)
-		goto bail;
-
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	INIT_LIST_HEAD(&ip->pending_mmaps);
-	ip->size = size;
-	ip->context = context;
-	ip->obj = obj;
-	kref_init(&ip->ref);
-
-bail:
-	return ip;
-}
-
-void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
-			  u32 size, void *obj)
-{
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	ip->size = size;
-	ip->obj = obj;
-}
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
deleted file mode 100644
index 5f53304e8a9b..000000000000
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ /dev/null
@@ -1,490 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_smi.h>
-
-#include "qib.h"
-
-/* Fast memory region */
-struct qib_fmr {
-	struct ib_fmr ibfmr;
-	struct qib_mregion mr;        /* must be last */
-};
-
-static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-	return container_of(ibfmr, struct qib_fmr, ibfmr);
-}
-
-static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd,
-	int count)
-{
-	int m, i = 0;
-	int rval = 0;
-
-	m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
-	for (; i < m; i++) {
-		mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
-		if (!mr->map[i])
-			goto bail;
-	}
-	mr->mapsz = m;
-	init_completion(&mr->comp);
-	/* count returning the ptr to user */
-	atomic_set(&mr->refcount, 1);
-	mr->pd = pd;
-	mr->max_segs = count;
-out:
-	return rval;
-bail:
-	while (i)
-		kfree(mr->map[--i]);
-	rval = -ENOMEM;
-	goto out;
-}
-
-static void deinit_qib_mregion(struct qib_mregion *mr)
-{
-	int i = mr->mapsz;
-
-	mr->mapsz = 0;
-	while (i)
-		kfree(mr->map[--i]);
-}
-
-
-/**
- * qib_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see qib_dma.c).
- */
-struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
-{
-	struct qib_mr *mr = NULL;
-	struct ib_mr *ret;
-	int rval;
-
-	if (to_ipd(pd)->user) {
-		ret = ERR_PTR(-EPERM);
-		goto bail;
-	}
-
-	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-	if (!mr) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	rval = init_qib_mregion(&mr->mr, pd, 0);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail;
-	}
-
-
-	rval = qib_alloc_lkey(&mr->mr, 1);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail_mregion;
-	}
-
-	mr->mr.access_flags = acc;
-	ret = &mr->ibmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_qib_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	goto done;
-}
-
-static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
-{
-	struct qib_mr *mr;
-	int rval = -ENOMEM;
-	int m;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
-	mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
-	if (!mr)
-		goto bail;
-
-	rval = init_qib_mregion(&mr->mr, pd, count);
-	if (rval)
-		goto bail;
-
-	rval = qib_alloc_lkey(&mr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	mr->ibmr.lkey = mr->mr.lkey;
-	mr->ibmr.rkey = mr->mr.lkey;
-done:
-	return mr;
-
-bail_mregion:
-	deinit_qib_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	mr = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * qib_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the QLogic_IB driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			      u64 virt_addr, int mr_access_flags,
-			      struct ib_udata *udata)
-{
-	struct qib_mr *mr;
-	struct ib_umem *umem;
-	struct scatterlist *sg;
-	int n, m, entry;
-	struct ib_mr *ret;
-
-	if (length == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	umem = ib_umem_get(pd->uobject->context, start, length,
-			   mr_access_flags, 0);
-	if (IS_ERR(umem))
-		return (void *) umem;
-
-	n = umem->nmap;
-
-	mr = alloc_mr(n, pd);
-	if (IS_ERR(mr)) {
-		ret = (struct ib_mr *)mr;
-		ib_umem_release(umem);
-		goto bail;
-	}
-
-	mr->mr.user_base = start;
-	mr->mr.iova = virt_addr;
-	mr->mr.length = length;
-	mr->mr.offset = ib_umem_offset(umem);
-	mr->mr.access_flags = mr_access_flags;
-	mr->umem = umem;
-
-	if (is_power_of_2(umem->page_size))
-		mr->mr.page_shift = ilog2(umem->page_size);
-	m = 0;
-	n = 0;
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-			void *vaddr;
-
-			vaddr = page_address(sg_page(sg));
-			if (!vaddr) {
-				ret = ERR_PTR(-EINVAL);
-				goto bail;
-			}
-			mr->mr.map[m]->segs[n].vaddr = vaddr;
-			mr->mr.map[m]->segs[n].length = umem->page_size;
-			n++;
-			if (n == QIB_SEGSZ) {
-				m++;
-				n = 0;
-			}
-	}
-	ret = &mr->ibmr;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by qib_get_dma_mr()
- * or qib_reg_user_mr().
- */
-int qib_dereg_mr(struct ib_mr *ibmr)
-{
-	struct qib_mr *mr = to_imr(ibmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	kfree(mr->pages);
-	qib_free_lkey(&mr->mr);
-
-	qib_put_mr(&mr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&mr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		qib_get_mr(&mr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_qib_mregion(&mr->mr);
-	if (mr->umem)
-		ib_umem_release(mr->umem);
-	kfree(mr);
-out:
-	return ret;
-}
-
-/*
- * Allocate a memory region usable with the
- * IB_WR_REG_MR send work request.
- *
- * Return the memory region on success, otherwise return an errno.
- */
-struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
-			   enum ib_mr_type mr_type,
-			   u32 max_num_sg)
-{
-	struct qib_mr *mr;
-
-	if (mr_type != IB_MR_TYPE_MEM_REG)
-		return ERR_PTR(-EINVAL);
-
-	mr = alloc_mr(max_num_sg, pd);
-	if (IS_ERR(mr))
-		return (struct ib_mr *)mr;
-
-	mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
-	if (!mr->pages)
-		goto err;
-
-	return &mr->ibmr;
-
-err:
-	qib_dereg_mr(&mr->ibmr);
-	return ERR_PTR(-ENOMEM);
-}
-
-static int qib_set_page(struct ib_mr *ibmr, u64 addr)
-{
-	struct qib_mr *mr = to_imr(ibmr);
-
-	if (unlikely(mr->npages == mr->mr.max_segs))
-		return -ENOMEM;
-
-	mr->pages[mr->npages++] = addr;
-
-	return 0;
-}
-
-int qib_map_mr_sg(struct ib_mr *ibmr,
-		  struct scatterlist *sg,
-		  int sg_nents)
-{
-	struct qib_mr *mr = to_imr(ibmr);
-
-	mr->npages = 0;
-
-	return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page);
-}
-
-/**
- * qib_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			     struct ib_fmr_attr *fmr_attr)
-{
-	struct qib_fmr *fmr;
-	int m;
-	struct ib_fmr *ret;
-	int rval = -ENOMEM;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ;
-	fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
-	if (!fmr)
-		goto bail;
-
-	rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages);
-	if (rval)
-		goto bail;
-
-	/*
-	 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
-	 * rkey.
-	 */
-	rval = qib_alloc_lkey(&fmr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	fmr->ibfmr.rkey = fmr->mr.lkey;
-	fmr->ibfmr.lkey = fmr->mr.lkey;
-	/*
-	 * Resources are allocated but no valid mapping (RKEY can't be
-	 * used).
-	 */
-	fmr->mr.access_flags = mr_access_flags;
-	fmr->mr.max_segs = fmr_attr->max_pages;
-	fmr->mr.page_shift = fmr_attr->page_shift;
-
-	ret = &fmr->ibfmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_qib_mregion(&fmr->mr);
-bail:
-	kfree(fmr);
-	ret = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * qib_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		     int list_len, u64 iova)
-{
-	struct qib_fmr *fmr = to_ifmr(ibfmr);
-	struct qib_lkey_table *rkt;
-	unsigned long flags;
-	int m, n, i;
-	u32 ps;
-	int ret;
-
-	i = atomic_read(&fmr->mr.refcount);
-	if (i > 2)
-		return -EBUSY;
-
-	if (list_len > fmr->mr.max_segs) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	rkt = &to_idev(ibfmr->device)->lk_table;
-	spin_lock_irqsave(&rkt->lock, flags);
-	fmr->mr.user_base = iova;
-	fmr->mr.iova = iova;
-	ps = 1 << fmr->mr.page_shift;
-	fmr->mr.length = list_len * ps;
-	m = 0;
-	n = 0;
-	for (i = 0; i < list_len; i++) {
-		fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
-		fmr->mr.map[m]->segs[n].length = ps;
-		if (++n == QIB_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int qib_unmap_fmr(struct list_head *fmr_list)
-{
-	struct qib_fmr *fmr;
-	struct qib_lkey_table *rkt;
-	unsigned long flags;
-
-	list_for_each_entry(fmr, fmr_list, ibfmr.list) {
-		rkt = &to_idev(fmr->ibfmr.device)->lk_table;
-		spin_lock_irqsave(&rkt->lock, flags);
-		fmr->mr.user_base = 0;
-		fmr->mr.iova = 0;
-		fmr->mr.length = 0;
-		spin_unlock_irqrestore(&rkt->lock, flags);
-	}
-	return 0;
-}
-
-/**
- * qib_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int qib_dealloc_fmr(struct ib_fmr *ibfmr)
-{
-	struct qib_fmr *fmr = to_ifmr(ibfmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	qib_free_lkey(&fmr->mr);
-	qib_put_mr(&fmr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&fmr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		qib_get_mr(&fmr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_qib_mregion(&fmr->mr);
-	kfree(fmr);
-out:
-	return ret;
-}
-
-void mr_rcu_callback(struct rcu_head *list)
-{
-	struct qib_mregion *mr = container_of(list, struct qib_mregion, list);
-
-	complete(&mr->comp);
-}
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 3eff35c2d453..575b737d9ef3 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -34,32 +34,38 @@
 
 #include <linux/err.h>
 #include <linux/vmalloc.h>
-#include <linux/jhash.h>
+#include <rdma/rdma_vt.h>
 #ifdef CONFIG_DEBUG_FS
 #include <linux/seq_file.h>
 #endif
 
 #include "qib.h"
 
-#define BITS_PER_PAGE           (PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
+/*
+ * mask field which was present in now deleted qib_qpn_table
+ * is not present in rvt_qpn_table. Defining the same field
+ * as qpt_mask here instead of adding the mask field to
+ * rvt_qpn_table.
+ */
+u16 qpt_mask;
 
-static inline unsigned mk_qpn(struct qib_qpn_table *qpt,
-			      struct qpn_map *map, unsigned off)
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+			      struct rvt_qpn_map *map, unsigned off)
 {
-	return (map - qpt->map) * BITS_PER_PAGE + off;
+	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
 }
 
-static inline unsigned find_next_offset(struct qib_qpn_table *qpt,
-					struct qpn_map *map, unsigned off,
+static inline unsigned find_next_offset(struct rvt_qpn_table *qpt,
+					struct rvt_qpn_map *map, unsigned off,
 					unsigned n)
 {
-	if (qpt->mask) {
+	if (qpt_mask) {
 		off++;
-		if (((off & qpt->mask) >> 1) >= n)
-			off = (off | qpt->mask) + 2;
-	} else
-		off = find_next_zero_bit(map->page, BITS_PER_PAGE, off);
+		if (((off & qpt_mask) >> 1) >= n)
+			off = (off | qpt_mask) + 2;
+	} else {
+		off = find_next_zero_bit(map->page, RVT_BITS_PER_PAGE, off);
+	}
 	return off;
 }
 
@@ -100,7 +106,7 @@ static u32 credit_table[31] = {
 	32768                   /* 1E */
 };
 
-static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
+static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map,
 			 gfp_t gfp)
 {
 	unsigned long page = get_zeroed_page(gfp);
@@ -121,12 +127,15 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
  * Allocate the next available QPN or
  * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
  */
-static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
-		     enum ib_qp_type type, u8 port, gfp_t gfp)
+int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+		  enum ib_qp_type type, u8 port, gfp_t gfp)
 {
 	u32 i, offset, max_scan, qpn;
-	struct qpn_map *map;
+	struct rvt_qpn_map *map;
 	u32 ret;
+	struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+					      verbs_dev);
 
 	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
 		unsigned n;
@@ -143,12 +152,12 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
 	}
 
 	qpn = qpt->last + 2;
-	if (qpn >= QPN_MAX)
+	if (qpn >= RVT_QPN_MAX)
 		qpn = 2;
-	if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues)
-		qpn = (qpn | qpt->mask) + 2;
-	offset = qpn & BITS_PER_PAGE_MASK;
-	map = &qpt->map[qpn / BITS_PER_PAGE];
+	if (qpt_mask && ((qpn & qpt_mask) >> 1) >= dd->n_krcv_queues)
+		qpn = (qpn | qpt_mask) + 2;
+	offset = qpn & RVT_BITS_PER_PAGE_MASK;
+	map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
 	max_scan = qpt->nmaps - !offset;
 	for (i = 0;;) {
 		if (unlikely(!map->page)) {
@@ -173,14 +182,14 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
 			 * We just need to be sure we don't loop
 			 * forever.
 			 */
-		} while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
+		} while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
 		/*
 		 * In order to keep the number of pages allocated to a
 		 * minimum, we scan the all existing pages before increasing
 		 * the size of the bitmap table.
 		 */
 		if (++i > max_scan) {
-			if (qpt->nmaps == QPNMAP_ENTRIES)
+			if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
 				break;
 			map = &qpt->map[qpt->nmaps++];
 			offset = 0;
@@ -200,706 +209,113 @@ bail:
 	return ret;
 }
 
-static void free_qpn(struct qib_qpn_table *qpt, u32 qpn)
-{
-	struct qpn_map *map;
-
-	map = qpt->map + qpn / BITS_PER_PAGE;
-	if (map->page)
-		clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-}
-
-static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn)
-{
-	return jhash_1word(qpn, dev->qp_rnd) &
-		(dev->qp_table_size - 1);
-}
-
-
-/*
- * Put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
-{
-	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	unsigned long flags;
-	unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
-
-	atomic_inc(&qp->refcount);
-	spin_lock_irqsave(&dev->qpt_lock, flags);
-
-	if (qp->ibqp.qp_num == 0)
-		rcu_assign_pointer(ibp->qp0, qp);
-	else if (qp->ibqp.qp_num == 1)
-		rcu_assign_pointer(ibp->qp1, qp);
-	else {
-		qp->next = dev->qp_table[n];
-		rcu_assign_pointer(dev->qp_table[n], qp);
-	}
-
-	spin_unlock_irqrestore(&dev->qpt_lock, flags);
-}
-
-/*
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
-{
-	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
-	unsigned long flags;
-	int removed = 1;
-
-	spin_lock_irqsave(&dev->qpt_lock, flags);
-
-	if (rcu_dereference_protected(ibp->qp0,
-			lockdep_is_held(&dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp0, NULL);
-	} else if (rcu_dereference_protected(ibp->qp1,
-			lockdep_is_held(&dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp1, NULL);
-	} else {
-		struct qib_qp *q;
-		struct qib_qp __rcu **qpp;
-
-		removed = 0;
-		qpp = &dev->qp_table[n];
-		for (; (q = rcu_dereference_protected(*qpp,
-				lockdep_is_held(&dev->qpt_lock))) != NULL;
-				qpp = &q->next)
-			if (q == qp) {
-				RCU_INIT_POINTER(*qpp,
-					rcu_dereference_protected(qp->next,
-					 lockdep_is_held(&dev->qpt_lock)));
-				removed = 1;
-				break;
-			}
-	}
-
-	spin_unlock_irqrestore(&dev->qpt_lock, flags);
-	if (removed) {
-		synchronize_rcu();
-		atomic_dec(&qp->refcount);
-	}
-}
-
 /**
  * qib_free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
  */
-unsigned qib_free_all_qps(struct qib_devdata *dd)
+unsigned qib_free_all_qps(struct rvt_dev_info *rdi)
 {
-	struct qib_ibdev *dev = &dd->verbs_dev;
-	unsigned long flags;
-	struct qib_qp *qp;
+	struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+					      verbs_dev);
 	unsigned n, qp_inuse = 0;
 
 	for (n = 0; n < dd->num_pports; n++) {
 		struct qib_ibport *ibp = &dd->pport[n].ibport_data;
 
-		if (!qib_mcast_tree_empty(ibp))
-			qp_inuse++;
 		rcu_read_lock();
-		if (rcu_dereference(ibp->qp0))
+		if (rcu_dereference(ibp->rvp.qp[0]))
 			qp_inuse++;
-		if (rcu_dereference(ibp->qp1))
+		if (rcu_dereference(ibp->rvp.qp[1]))
 			qp_inuse++;
 		rcu_read_unlock();
 	}
-
-	spin_lock_irqsave(&dev->qpt_lock, flags);
-	for (n = 0; n < dev->qp_table_size; n++) {
-		qp = rcu_dereference_protected(dev->qp_table[n],
-			lockdep_is_held(&dev->qpt_lock));
-		RCU_INIT_POINTER(dev->qp_table[n], NULL);
-
-		for (; qp; qp = rcu_dereference_protected(qp->next,
-					lockdep_is_held(&dev->qpt_lock)))
-			qp_inuse++;
-	}
-	spin_unlock_irqrestore(&dev->qpt_lock, flags);
-	synchronize_rcu();
-
 	return qp_inuse;
 }
 
-/**
- * qib_lookup_qpn - return the QP with the given QPN
- * @qpt: the QP table
- * @qpn: the QP number to look up
- *
- * The caller is responsible for decrementing the QP reference count
- * when done.
- */
-struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
+void qib_notify_qp_reset(struct rvt_qp *qp)
 {
-	struct qib_qp *qp = NULL;
-
-	rcu_read_lock();
-	if (unlikely(qpn <= 1)) {
-		if (qpn == 0)
-			qp = rcu_dereference(ibp->qp0);
-		else
-			qp = rcu_dereference(ibp->qp1);
-		if (qp)
-			atomic_inc(&qp->refcount);
-	} else {
-		struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
-		unsigned n = qpn_hash(dev, qpn);
-
-		for (qp = rcu_dereference(dev->qp_table[n]); qp;
-			qp = rcu_dereference(qp->next))
-			if (qp->ibqp.qp_num == qpn) {
-				atomic_inc(&qp->refcount);
-				break;
-			}
-	}
-	rcu_read_unlock();
-	return qp;
-}
-
-/**
- * qib_reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type)
-{
-	qp->remote_qpn = 0;
-	qp->qkey = 0;
-	qp->qp_access_flags = 0;
-	atomic_set(&qp->s_dma_busy, 0);
-	qp->s_flags &= QIB_S_SIGNAL_REQ_WR;
-	qp->s_hdrwords = 0;
-	qp->s_wqe = NULL;
-	qp->s_draining = 0;
-	qp->s_next_psn = 0;
-	qp->s_last_psn = 0;
-	qp->s_sending_psn = 0;
-	qp->s_sending_hpsn = 0;
-	qp->s_psn = 0;
-	qp->r_psn = 0;
-	qp->r_msn = 0;
-	if (type == IB_QPT_RC) {
-		qp->s_state = IB_OPCODE_RC_SEND_LAST;
-		qp->r_state = IB_OPCODE_RC_SEND_LAST;
-	} else {
-		qp->s_state = IB_OPCODE_UC_SEND_LAST;
-		qp->r_state = IB_OPCODE_UC_SEND_LAST;
-	}
-	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-	qp->r_nak_state = 0;
-	qp->r_aflags = 0;
-	qp->r_flags = 0;
-	qp->s_head = 0;
-	qp->s_tail = 0;
-	qp->s_cur = 0;
-	qp->s_acked = 0;
-	qp->s_last = 0;
-	qp->s_ssn = 1;
-	qp->s_lsn = 0;
-	qp->s_mig_state = IB_MIG_MIGRATED;
-	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
-	qp->r_head_ack_queue = 0;
-	qp->s_tail_ack_queue = 0;
-	qp->s_num_rd_atomic = 0;
-	if (qp->r_rq.wq) {
-		qp->r_rq.wq->head = 0;
-		qp->r_rq.wq->tail = 0;
-	}
-	qp->r_sge.num_sge = 0;
-}
-
-static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
-{
-	unsigned n;
-
-	if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
-		qib_put_ss(&qp->s_rdma_read_sge);
-
-	qib_put_ss(&qp->r_sge);
-
-	if (clr_sends) {
-		while (qp->s_last != qp->s_head) {
-			struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-			unsigned i;
-
-			for (i = 0; i < wqe->wr.num_sge; i++) {
-				struct qib_sge *sge = &wqe->sg_list[i];
-
-				qib_put_mr(sge->mr);
-			}
-			if (qp->ibqp.qp_type == IB_QPT_UD ||
-			    qp->ibqp.qp_type == IB_QPT_SMI ||
-			    qp->ibqp.qp_type == IB_QPT_GSI)
-				atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
-			if (++qp->s_last >= qp->s_size)
-				qp->s_last = 0;
-		}
-		if (qp->s_rdma_mr) {
-			qib_put_mr(qp->s_rdma_mr);
-			qp->s_rdma_mr = NULL;
-		}
-	}
-
-	if (qp->ibqp.qp_type != IB_QPT_RC)
-		return;
+	struct qib_qp_priv *priv = qp->priv;
 
-	for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
-		struct qib_ack_entry *e = &qp->s_ack_queue[n];
-
-		if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
-		    e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
-			e->rdma_sge.mr = NULL;
-		}
-	}
+	atomic_set(&priv->s_dma_busy, 0);
 }
 
-/**
- * qib_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
+void qib_notify_error_qp(struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ib_wc wc;
-	int ret = 0;
-
-	if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
-		goto bail;
-
-	qp->state = IB_QPS_ERR;
-
-	if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
-	}
-
-	if (qp->s_flags & QIB_S_ANY_WAIT_SEND)
-		qp->s_flags &= ~QIB_S_ANY_WAIT_SEND;
 
-	spin_lock(&dev->pending_lock);
-	if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) {
-		qp->s_flags &= ~QIB_S_ANY_WAIT_IO;
-		list_del_init(&qp->iowait);
+	spin_lock(&dev->rdi.pending_lock);
+	if (!list_empty(&priv->iowait) && !(qp->s_flags & RVT_S_BUSY)) {
+		qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+		list_del_init(&priv->iowait);
 	}
-	spin_unlock(&dev->pending_lock);
+	spin_unlock(&dev->rdi.pending_lock);
 
-	if (!(qp->s_flags & QIB_S_BUSY)) {
+	if (!(qp->s_flags & RVT_S_BUSY)) {
 		qp->s_hdrwords = 0;
 		if (qp->s_rdma_mr) {
-			qib_put_mr(qp->s_rdma_mr);
+			rvt_put_mr(qp->s_rdma_mr);
 			qp->s_rdma_mr = NULL;
 		}
-		if (qp->s_tx) {
-			qib_put_txreq(qp->s_tx);
-			qp->s_tx = NULL;
+		if (priv->s_tx) {
+			qib_put_txreq(priv->s_tx);
+			priv->s_tx = NULL;
 		}
 	}
-
-	/* Schedule the sending tasklet to drain the send work queue. */
-	if (qp->s_last != qp->s_head)
-		qib_schedule_send(qp);
-
-	clear_mr_refs(qp, 0);
-
-	memset(&wc, 0, sizeof(wc));
-	wc.qp = &qp->ibqp;
-	wc.opcode = IB_WC_RECV;
-
-	if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) {
-		wc.wr_id = qp->r_wr_id;
-		wc.status = err;
-		qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-	}
-	wc.status = IB_WC_WR_FLUSH_ERR;
-
-	if (qp->r_rq.wq) {
-		struct qib_rwq *wq;
-		u32 head;
-		u32 tail;
-
-		spin_lock(&qp->r_rq.lock);
-
-		/* sanity check pointers before trusting them */
-		wq = qp->r_rq.wq;
-		head = wq->head;
-		if (head >= qp->r_rq.size)
-			head = 0;
-		tail = wq->tail;
-		if (tail >= qp->r_rq.size)
-			tail = 0;
-		while (tail != head) {
-			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
-			if (++tail >= qp->r_rq.size)
-				tail = 0;
-			qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-		}
-		wq->tail = tail;
-
-		spin_unlock(&qp->r_rq.lock);
-	} else if (qp->ibqp.event_handler)
-		ret = 1;
-
-bail:
-	return ret;
 }
 
-/**
- * qib_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_udata *udata)
+static int mtu_to_enum(u32 mtu)
 {
-	struct qib_ibdev *dev = to_idev(ibqp->device);
-	struct qib_qp *qp = to_iqp(ibqp);
-	enum ib_qp_state cur_state, new_state;
-	struct ib_event ev;
-	int lastwqe = 0;
-	int mig = 0;
-	int ret;
-	u32 pmtu = 0; /* for gcc warning only */
-
-	spin_lock_irq(&qp->r_lock);
-	spin_lock(&qp->s_lock);
-
-	cur_state = attr_mask & IB_QP_CUR_STATE ?
-		attr->cur_qp_state : qp->state;
-	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask, IB_LINK_LAYER_UNSPECIFIED))
-		goto inval;
-
-	if (attr_mask & IB_QP_AV) {
-		if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE)
-			goto inval;
-		if (qib_check_ah(qp->ibqp.device, &attr->ah_attr))
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_ALT_PATH) {
-		if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE)
-			goto inval;
-		if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
-			goto inval;
-		if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev)))
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev)))
-			goto inval;
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		if (attr->min_rnr_timer > 31)
-			goto inval;
-
-	if (attr_mask & IB_QP_PORT)
-		if (qp->ibqp.qp_type == IB_QPT_SMI ||
-		    qp->ibqp.qp_type == IB_QPT_GSI ||
-		    attr->port_num == 0 ||
-		    attr->port_num > ibqp->device->phys_port_cnt)
-			goto inval;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		if (attr->dest_qp_num > QIB_QPN_MASK)
-			goto inval;
-
-	if (attr_mask & IB_QP_RETRY_CNT)
-		if (attr->retry_cnt > 7)
-			goto inval;
-
-	if (attr_mask & IB_QP_RNR_RETRY)
-		if (attr->rnr_retry > 7)
-			goto inval;
-
-	/*
-	 * Don't allow invalid path_mtu values.  OK to set greater
-	 * than the active mtu (or even the max_cap, if we have tuned
-	 * that to a small mtu.  We'll set qp->path_mtu
-	 * to the lesser of requested attribute mtu and active,
-	 * for packetizing messages.
-	 * Note that the QP port has to be set in INIT and MTU in RTR.
-	 */
-	if (attr_mask & IB_QP_PATH_MTU) {
-		struct qib_devdata *dd = dd_from_dev(dev);
-		int mtu, pidx = qp->port_num - 1;
-
-		mtu = ib_mtu_enum_to_int(attr->path_mtu);
-		if (mtu == -1)
-			goto inval;
-		if (mtu > dd->pport[pidx].ibmtu) {
-			switch (dd->pport[pidx].ibmtu) {
-			case 4096:
-				pmtu = IB_MTU_4096;
-				break;
-			case 2048:
-				pmtu = IB_MTU_2048;
-				break;
-			case 1024:
-				pmtu = IB_MTU_1024;
-				break;
-			case 512:
-				pmtu = IB_MTU_512;
-				break;
-			case 256:
-				pmtu = IB_MTU_256;
-				break;
-			default:
-				pmtu = IB_MTU_2048;
-			}
-		} else
-			pmtu = attr->path_mtu;
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		if (attr->path_mig_state == IB_MIG_REARM) {
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				goto inval;
-			if (new_state != IB_QPS_RTS)
-				goto inval;
-		} else if (attr->path_mig_state == IB_MIG_MIGRATED) {
-			if (qp->s_mig_state == IB_MIG_REARM)
-				goto inval;
-			if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
-				goto inval;
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				mig = 1;
-		} else
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC)
-			goto inval;
+	int enum_mtu;
 
-	switch (new_state) {
-	case IB_QPS_RESET:
-		if (qp->state != IB_QPS_RESET) {
-			qp->state = IB_QPS_RESET;
-			spin_lock(&dev->pending_lock);
-			if (!list_empty(&qp->iowait))
-				list_del_init(&qp->iowait);
-			spin_unlock(&dev->pending_lock);
-			qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);
-			spin_unlock(&qp->s_lock);
-			spin_unlock_irq(&qp->r_lock);
-			/* Stop the sending work queue and retry timer */
-			cancel_work_sync(&qp->s_work);
-			del_timer_sync(&qp->s_timer);
-			wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-			if (qp->s_tx) {
-				qib_put_txreq(qp->s_tx);
-				qp->s_tx = NULL;
-			}
-			remove_qp(dev, qp);
-			wait_event(qp->wait, !atomic_read(&qp->refcount));
-			spin_lock_irq(&qp->r_lock);
-			spin_lock(&qp->s_lock);
-			clear_mr_refs(qp, 1);
-			qib_reset_qp(qp, ibqp->qp_type);
-		}
+	switch (mtu) {
+	case 4096:
+		enum_mtu = IB_MTU_4096;
 		break;
-
-	case IB_QPS_RTR:
-		/* Allow event to retrigger if QP set to RTR more than once */
-		qp->r_flags &= ~QIB_R_COMM_EST;
-		qp->state = new_state;
+	case 2048:
+		enum_mtu = IB_MTU_2048;
 		break;
-
-	case IB_QPS_SQD:
-		qp->s_draining = qp->s_last != qp->s_cur;
-		qp->state = new_state;
+	case 1024:
+		enum_mtu = IB_MTU_1024;
 		break;
-
-	case IB_QPS_SQE:
-		if (qp->ibqp.qp_type == IB_QPT_RC)
-			goto inval;
-		qp->state = new_state;
+	case 512:
+		enum_mtu = IB_MTU_512;
 		break;
-
-	case IB_QPS_ERR:
-		lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+	case 256:
+		enum_mtu = IB_MTU_256;
 		break;
-
 	default:
-		qp->state = new_state;
-		break;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		qp->s_pkey_index = attr->pkey_index;
-
-	if (attr_mask & IB_QP_PORT)
-		qp->port_num = attr->port_num;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		qp->remote_qpn = attr->dest_qp_num;
-
-	if (attr_mask & IB_QP_SQ_PSN) {
-		qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK;
-		qp->s_psn = qp->s_next_psn;
-		qp->s_sending_psn = qp->s_next_psn;
-		qp->s_last_psn = qp->s_next_psn - 1;
-		qp->s_sending_hpsn = qp->s_last_psn;
-	}
-
-	if (attr_mask & IB_QP_RQ_PSN)
-		qp->r_psn = attr->rq_psn & QIB_PSN_MASK;
-
-	if (attr_mask & IB_QP_ACCESS_FLAGS)
-		qp->qp_access_flags = attr->qp_access_flags;
-
-	if (attr_mask & IB_QP_AV) {
-		qp->remote_ah_attr = attr->ah_attr;
-		qp->s_srate = attr->ah_attr.static_rate;
-	}
-
-	if (attr_mask & IB_QP_ALT_PATH) {
-		qp->alt_ah_attr = attr->alt_ah_attr;
-		qp->s_alt_pkey_index = attr->alt_pkey_index;
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		qp->s_mig_state = attr->path_mig_state;
-		if (mig) {
-			qp->remote_ah_attr = qp->alt_ah_attr;
-			qp->port_num = qp->alt_ah_attr.port_num;
-			qp->s_pkey_index = qp->s_alt_pkey_index;
-		}
-	}
-
-	if (attr_mask & IB_QP_PATH_MTU) {
-		qp->path_mtu = pmtu;
-		qp->pmtu = ib_mtu_enum_to_int(pmtu);
-	}
-
-	if (attr_mask & IB_QP_RETRY_CNT) {
-		qp->s_retry_cnt = attr->retry_cnt;
-		qp->s_retry = attr->retry_cnt;
-	}
-
-	if (attr_mask & IB_QP_RNR_RETRY) {
-		qp->s_rnr_retry_cnt = attr->rnr_retry;
-		qp->s_rnr_retry = attr->rnr_retry;
+		enum_mtu = IB_MTU_2048;
 	}
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		qp->r_min_rnr_timer = attr->min_rnr_timer;
-
-	if (attr_mask & IB_QP_TIMEOUT) {
-		qp->timeout = attr->timeout;
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-	}
-
-	if (attr_mask & IB_QP_QKEY)
-		qp->qkey = attr->qkey;
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-		qp->s_max_rd_atomic = attr->max_rd_atomic;
-
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-
-	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
-		insert_qp(dev, qp);
-
-	if (lastwqe) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	if (mig) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_PATH_MIG;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	ret = 0;
-	goto bail;
-
-inval:
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-	ret = -EINVAL;
-
-bail:
-	return ret;
+	return enum_mtu;
 }
 
-int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		 int attr_mask, struct ib_qp_init_attr *init_attr)
+int qib_get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			   struct ib_qp_attr *attr)
 {
-	struct qib_qp *qp = to_iqp(ibqp);
+	int mtu, pmtu, pidx = qp->port_num - 1;
+	struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+					      verbs_dev);
+	mtu = ib_mtu_enum_to_int(attr->path_mtu);
+	if (mtu == -1)
+		return -EINVAL;
+
+	if (mtu > dd->pport[pidx].ibmtu)
+		pmtu = mtu_to_enum(dd->pport[pidx].ibmtu);
+	else
+		pmtu = attr->path_mtu;
+	return pmtu;
+}
 
-	attr->qp_state = qp->state;
-	attr->cur_qp_state = attr->qp_state;
-	attr->path_mtu = qp->path_mtu;
-	attr->path_mig_state = qp->s_mig_state;
-	attr->qkey = qp->qkey;
-	attr->rq_psn = qp->r_psn & QIB_PSN_MASK;
-	attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK;
-	attr->dest_qp_num = qp->remote_qpn;
-	attr->qp_access_flags = qp->qp_access_flags;
-	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
-	attr->cap.max_send_sge = qp->s_max_sge;
-	attr->cap.max_recv_sge = qp->r_rq.max_sge;
-	attr->cap.max_inline_data = 0;
-	attr->ah_attr = qp->remote_ah_attr;
-	attr->alt_ah_attr = qp->alt_ah_attr;
-	attr->pkey_index = qp->s_pkey_index;
-	attr->alt_pkey_index = qp->s_alt_pkey_index;
-	attr->en_sqd_async_notify = 0;
-	attr->sq_draining = qp->s_draining;
-	attr->max_rd_atomic = qp->s_max_rd_atomic;
-	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
-	attr->min_rnr_timer = qp->r_min_rnr_timer;
-	attr->port_num = qp->port_num;
-	attr->timeout = qp->timeout;
-	attr->retry_cnt = qp->s_retry_cnt;
-	attr->rnr_retry = qp->s_rnr_retry_cnt;
-	attr->alt_port_num = qp->alt_ah_attr.port_num;
-	attr->alt_timeout = qp->alt_timeout;
+int qib_mtu_to_path_mtu(u32 mtu)
+{
+	return mtu_to_enum(mtu);
+}
 
-	init_attr->event_handler = qp->ibqp.event_handler;
-	init_attr->qp_context = qp->ibqp.qp_context;
-	init_attr->send_cq = qp->ibqp.send_cq;
-	init_attr->recv_cq = qp->ibqp.recv_cq;
-	init_attr->srq = qp->ibqp.srq;
-	init_attr->cap = attr->cap;
-	if (qp->s_flags & QIB_S_SIGNAL_REQ_WR)
-		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-	else
-		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-	init_attr->qp_type = qp->ibqp.qp_type;
-	init_attr->port_num = qp->port_num;
-	return 0;
+u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
+{
+	return ib_mtu_enum_to_int(pmtu);
 }
 
 /**
@@ -908,7 +324,7 @@ int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
  *
  * Returns the AETH.
  */
-__be32 qib_compute_aeth(struct qib_qp *qp)
+__be32 qib_compute_aeth(struct rvt_qp *qp)
 {
 	u32 aeth = qp->r_msn & QIB_MSN_MASK;
 
@@ -921,7 +337,7 @@ __be32 qib_compute_aeth(struct qib_qp *qp)
 	} else {
 		u32 min, max, x;
 		u32 credits;
-		struct qib_rwq *wq = qp->r_rq.wq;
+		struct rvt_rwq *wq = qp->r_rq.wq;
 		u32 head;
 		u32 tail;
 
@@ -962,315 +378,63 @@ __be32 qib_compute_aeth(struct qib_qp *qp)
 	return cpu_to_be32(aeth);
 }
 
-/**
- * qib_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
-			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata)
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp)
 {
-	struct qib_qp *qp;
-	int err;
-	struct qib_swqe *swq = NULL;
-	struct qib_ibdev *dev;
-	struct qib_devdata *dd;
-	size_t sz;
-	size_t sg_list_sz;
-	struct ib_qp *ret;
-	gfp_t gfp;
+	struct qib_qp_priv *priv;
 
+	priv = kzalloc(sizeof(*priv), gfp);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+	priv->owner = qp;
 
-	if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
-	    init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
-	    init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
-		return ERR_PTR(-EINVAL);
-
-	/* GFP_NOIO is applicable in RC QPs only */
-	if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
-	    init_attr->qp_type != IB_QPT_RC)
-		return ERR_PTR(-EINVAL);
-
-	gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
-			GFP_NOIO : GFP_KERNEL;
-
-	/* Check receive queue parameters if no SRQ is specified. */
-	if (!init_attr->srq) {
-		if (init_attr->cap.max_recv_sge > ib_qib_max_sges ||
-		    init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		if (init_attr->cap.max_send_sge +
-		    init_attr->cap.max_send_wr +
-		    init_attr->cap.max_recv_sge +
-		    init_attr->cap.max_recv_wr == 0) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
+	priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp);
+	if (!priv->s_hdr) {
+		kfree(priv);
+		return ERR_PTR(-ENOMEM);
 	}
+	init_waitqueue_head(&priv->wait_dma);
+	INIT_WORK(&priv->s_work, _qib_do_send);
+	INIT_LIST_HEAD(&priv->iowait);
 
-	switch (init_attr->qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		if (init_attr->port_num == 0 ||
-		    init_attr->port_num > ibpd->device->phys_port_cnt) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-	case IB_QPT_UC:
-	case IB_QPT_RC:
-	case IB_QPT_UD:
-		sz = sizeof(struct qib_sge) *
-			init_attr->cap.max_send_sge +
-			sizeof(struct qib_swqe);
-		swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
-				gfp, PAGE_KERNEL);
-		if (swq == NULL) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail;
-		}
-		sz = sizeof(*qp);
-		sg_list_sz = 0;
-		if (init_attr->srq) {
-			struct qib_srq *srq = to_isrq(init_attr->srq);
-
-			if (srq->rq.max_sge > 1)
-				sg_list_sz = sizeof(*qp->r_sg_list) *
-					(srq->rq.max_sge - 1);
-		} else if (init_attr->cap.max_recv_sge > 1)
-			sg_list_sz = sizeof(*qp->r_sg_list) *
-				(init_attr->cap.max_recv_sge - 1);
-		qp = kzalloc(sz + sg_list_sz, gfp);
-		if (!qp) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_swq;
-		}
-		RCU_INIT_POINTER(qp->next, NULL);
-		qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
-		if (!qp->s_hdr) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_qp;
-		}
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-		if (init_attr->srq)
-			sz = 0;
-		else {
-			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
-			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
-				sizeof(struct qib_rwqe);
-			if (gfp != GFP_NOIO)
-				qp->r_rq.wq = vmalloc_user(
-						sizeof(struct qib_rwq) +
-						qp->r_rq.size * sz);
-			else
-				qp->r_rq.wq = __vmalloc(
-						sizeof(struct qib_rwq) +
-						qp->r_rq.size * sz,
-						gfp, PAGE_KERNEL);
-
-			if (!qp->r_rq.wq) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_qp;
-			}
-		}
-
-		/*
-		 * ib_create_qp() will initialize qp->ibqp
-		 * except for qp->ibqp.qp_num.
-		 */
-		spin_lock_init(&qp->r_lock);
-		spin_lock_init(&qp->s_lock);
-		spin_lock_init(&qp->r_rq.lock);
-		atomic_set(&qp->refcount, 0);
-		init_waitqueue_head(&qp->wait);
-		init_waitqueue_head(&qp->wait_dma);
-		init_timer(&qp->s_timer);
-		qp->s_timer.data = (unsigned long)qp;
-		INIT_WORK(&qp->s_work, qib_do_send);
-		INIT_LIST_HEAD(&qp->iowait);
-		INIT_LIST_HEAD(&qp->rspwait);
-		qp->state = IB_QPS_RESET;
-		qp->s_wq = swq;
-		qp->s_size = init_attr->cap.max_send_wr + 1;
-		qp->s_max_sge = init_attr->cap.max_send_sge;
-		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-			qp->s_flags = QIB_S_SIGNAL_REQ_WR;
-		dev = to_idev(ibpd->device);
-		dd = dd_from_dev(dev);
-		err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
-				init_attr->port_num, gfp);
-		if (err < 0) {
-			ret = ERR_PTR(err);
-			vfree(qp->r_rq.wq);
-			goto bail_qp;
-		}
-		qp->ibqp.qp_num = err;
-		qp->port_num = init_attr->port_num;
-		qib_reset_qp(qp, init_attr->qp_type);
-		break;
-
-	default:
-		/* Don't support raw QPs */
-		ret = ERR_PTR(-ENOSYS);
-		goto bail;
-	}
-
-	init_attr->cap.max_inline_data = 0;
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See qib_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		if (!qp->r_rq.wq) {
-			__u64 offset = 0;
-
-			err = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		} else {
-			u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz;
-
-			qp->ip = qib_create_mmap_info(dev, s,
-						      ibpd->uobject->context,
-						      qp->r_rq.wq);
-			if (!qp->ip) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_ip;
-			}
-
-			err = ib_copy_to_udata(udata, &(qp->ip->offset),
-					       sizeof(qp->ip->offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		}
-	}
-
-	spin_lock(&dev->n_qps_lock);
-	if (dev->n_qps_allocated == ib_qib_max_qps) {
-		spin_unlock(&dev->n_qps_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_qps_allocated++;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &qp->ibqp;
-	goto bail;
-
-bail_ip:
-	if (qp->ip)
-		kref_put(&qp->ip->ref, qib_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
-bail_qp:
-	kfree(qp->s_hdr);
-	kfree(qp);
-bail_swq:
-	vfree(swq);
-bail:
-	return ret;
+	return priv;
 }
 
-/**
- * qib_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int qib_destroy_qp(struct ib_qp *ibqp)
+void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 {
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_ibdev *dev = to_idev(ibqp->device);
+	struct qib_qp_priv *priv = qp->priv;
 
-	/* Make sure HW and driver activity is stopped. */
-	spin_lock_irq(&qp->s_lock);
-	if (qp->state != IB_QPS_RESET) {
-		qp->state = IB_QPS_RESET;
-		spin_lock(&dev->pending_lock);
-		if (!list_empty(&qp->iowait))
-			list_del_init(&qp->iowait);
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);
-		spin_unlock_irq(&qp->s_lock);
-		cancel_work_sync(&qp->s_work);
-		del_timer_sync(&qp->s_timer);
-		wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-		if (qp->s_tx) {
-			qib_put_txreq(qp->s_tx);
-			qp->s_tx = NULL;
-		}
-		remove_qp(dev, qp);
-		wait_event(qp->wait, !atomic_read(&qp->refcount));
-		clear_mr_refs(qp, 1);
-	} else
-		spin_unlock_irq(&qp->s_lock);
+	kfree(priv->s_hdr);
+	kfree(priv);
+}
 
-	/* all user's cleaned up, mark it available */
-	free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
-	spin_lock(&dev->n_qps_lock);
-	dev->n_qps_allocated--;
-	spin_unlock(&dev->n_qps_lock);
+void qib_stop_send_queue(struct rvt_qp *qp)
+{
+	struct qib_qp_priv *priv = qp->priv;
 
-	if (qp->ip)
-		kref_put(&qp->ip->ref, qib_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	vfree(qp->s_wq);
-	kfree(qp->s_hdr);
-	kfree(qp);
-	return 0;
+	cancel_work_sync(&priv->s_work);
+	del_timer_sync(&qp->s_timer);
 }
 
-/**
- * qib_init_qpn_table - initialize the QP number table for a device
- * @qpt: the QPN table
- */
-void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt)
+void qib_quiesce_qp(struct rvt_qp *qp)
 {
-	spin_lock_init(&qpt->lock);
-	qpt->last = 1;          /* start with QPN 2 */
-	qpt->nmaps = 1;
-	qpt->mask = dd->qpn_mask;
+	struct qib_qp_priv *priv = qp->priv;
+
+	wait_event(priv->wait_dma, !atomic_read(&priv->s_dma_busy));
+	if (priv->s_tx) {
+		qib_put_txreq(priv->s_tx);
+		priv->s_tx = NULL;
+	}
 }
 
-/**
- * qib_free_qpn_table - free the QP number table for a device
- * @qpt: the QPN table
- */
-void qib_free_qpn_table(struct qib_qpn_table *qpt)
+void qib_flush_qp_waiters(struct rvt_qp *qp)
 {
-	int i;
+	struct qib_qp_priv *priv = qp->priv;
+	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 
-	for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
-		if (qpt->map[i].page)
-			free_page((unsigned long) qpt->map[i].page);
+	spin_lock(&dev->rdi.pending_lock);
+	if (!list_empty(&priv->iowait))
+		list_del_init(&priv->iowait);
+	spin_unlock(&dev->rdi.pending_lock);
 }
 
 /**
@@ -1280,7 +444,7 @@ void qib_free_qpn_table(struct qib_qpn_table *qpt)
  *
  * The QP s_lock should be held.
  */
-void qib_get_credit(struct qib_qp *qp, u32 aeth)
+void qib_get_credit(struct rvt_qp *qp, u32 aeth)
 {
 	u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK;
 
@@ -1290,31 +454,70 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth)
 	 * honor the credit field.
 	 */
 	if (credit == QIB_AETH_CREDIT_INVAL) {
-		if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {
-			qp->s_flags |= QIB_S_UNLIMITED_CREDIT;
-			if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;
+		if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+			qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				qib_schedule_send(qp);
 			}
 		}
-	} else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {
+	} else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
 		/* Compute new LSN (i.e., MSN + credit) */
 		credit = (aeth + credit_table[credit]) & QIB_MSN_MASK;
 		if (qib_cmp24(credit, qp->s_lsn) > 0) {
 			qp->s_lsn = credit;
-			if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				qib_schedule_send(qp);
 			}
 		}
 	}
 }
 
+/**
+ * qib_check_send_wqe - validate wr/wqe
+ * @qp - The qp
+ * @wqe - The built wqe
+ *
+ * validate wr/wqe.  This is called
+ * prior to inserting the wqe into
+ * the ring but after the wqe has been
+ * setup.
+ *
+ * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
+ */
+int qib_check_send_wqe(struct rvt_qp *qp,
+		       struct rvt_swqe *wqe)
+{
+	struct rvt_ah *ah;
+	int ret = 0;
+
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+		if (wqe->length > 0x80000000U)
+			return -EINVAL;
+		break;
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		ah = ibah_to_rvtah(wqe->ud_wr.ah);
+		if (wqe->length > (1 << ah->log_pmtu))
+			return -EINVAL;
+		/* progress hint */
+		ret = 1;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
 #ifdef CONFIG_DEBUG_FS
 
 struct qib_qp_iter {
 	struct qib_ibdev *dev;
-	struct qib_qp *qp;
+	struct rvt_qp *qp;
 	int n;
 };
 
@@ -1340,14 +543,14 @@ int qib_qp_iter_next(struct qib_qp_iter *iter)
 	struct qib_ibdev *dev = iter->dev;
 	int n = iter->n;
 	int ret = 1;
-	struct qib_qp *pqp = iter->qp;
-	struct qib_qp *qp;
+	struct rvt_qp *pqp = iter->qp;
+	struct rvt_qp *qp;
 
-	for (; n < dev->qp_table_size; n++) {
+	for (; n < dev->rdi.qp_dev->qp_table_size; n++) {
 		if (pqp)
 			qp = rcu_dereference(pqp->next);
 		else
-			qp = rcu_dereference(dev->qp_table[n]);
+			qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]);
 		pqp = qp;
 		if (qp) {
 			iter->qp = qp;
@@ -1364,10 +567,11 @@ static const char * const qp_type_str[] = {
 
 void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
 {
-	struct qib_swqe *wqe;
-	struct qib_qp *qp = iter->qp;
+	struct rvt_swqe *wqe;
+	struct rvt_qp *qp = iter->qp;
+	struct qib_qp_priv *priv = qp->priv;
 
-	wqe = get_swqe_ptr(qp, qp->s_last);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 	seq_printf(s,
 		   "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n",
 		   iter->n,
@@ -1377,8 +581,8 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
 		   wqe->wr.opcode,
 		   qp->s_hdrwords,
 		   qp->s_flags,
-		   atomic_read(&qp->s_dma_busy),
-		   !list_empty(&qp->iowait),
+		   atomic_read(&priv->s_dma_busy),
+		   !list_empty(&priv->iowait),
 		   qp->timeout,
 		   wqe->ssn,
 		   qp->s_lsn,
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index e6b7556d5221..9088e26d3ac8 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -40,7 +40,7 @@
 
 static void rc_timeout(unsigned long arg);
 
-static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe,
+static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
 		       u32 psn, u32 pmtu)
 {
 	u32 len;
@@ -54,9 +54,9 @@ static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe,
 	return wqe->length - len;
 }
 
-static void start_timer(struct qib_qp *qp)
+static void start_timer(struct rvt_qp *qp)
 {
-	qp->s_flags |= QIB_S_TIMER;
+	qp->s_flags |= RVT_S_TIMER;
 	qp->s_timer.function = rc_timeout;
 	/* 4.096 usec. * (1 << qp->timeout) */
 	qp->s_timer.expires = jiffies + qp->timeout_jiffies;
@@ -74,17 +74,17 @@ static void start_timer(struct qib_qp *qp)
  * Note that we are in the responder's side of the QP context.
  * Note the QP s_lock must be held.
  */
-static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
+static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
 			   struct qib_other_headers *ohdr, u32 pmtu)
 {
-	struct qib_ack_entry *e;
+	struct rvt_ack_entry *e;
 	u32 hwords;
 	u32 len;
 	u32 bth0;
 	u32 bth2;
 
 	/* Don't send an ACK if we aren't supposed to. */
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto bail;
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -95,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
 	case OP(RDMA_READ_RESPONSE_ONLY):
 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 		if (e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		/* FALLTHROUGH */
@@ -112,7 +112,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
 	case OP(ACKNOWLEDGE):
 		/* Check for no next entry in the queue. */
 		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
-			if (qp->s_flags & QIB_S_ACK_PENDING)
+			if (qp->s_flags & RVT_S_ACK_PENDING)
 				goto normal;
 			goto bail;
 		}
@@ -133,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
 			/* Copy SGE state in case we need to resend */
 			qp->s_rdma_mr = e->rdma_sge.mr;
 			if (qp->s_rdma_mr)
-				qib_get_mr(qp->s_rdma_mr);
+				rvt_get_mr(qp->s_rdma_mr);
 			qp->s_ack_rdma_sge.sge = e->rdma_sge;
 			qp->s_ack_rdma_sge.num_sge = 1;
 			qp->s_cur_sge = &qp->s_ack_rdma_sge;
@@ -172,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
 		qp->s_cur_sge = &qp->s_ack_rdma_sge;
 		qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
 		if (qp->s_rdma_mr)
-			qib_get_mr(qp->s_rdma_mr);
+			rvt_get_mr(qp->s_rdma_mr);
 		len = qp->s_ack_rdma_sge.sge.sge_length;
 		if (len > pmtu)
 			len = pmtu;
@@ -196,7 +196,7 @@ normal:
 		 * (see above).
 		 */
 		qp->s_ack_state = OP(SEND_ONLY);
-		qp->s_flags &= ~QIB_S_ACK_PENDING;
+		qp->s_flags &= ~RVT_S_ACK_PENDING;
 		qp->s_cur_sge = NULL;
 		if (qp->s_nak_state)
 			ohdr->u.aeth =
@@ -218,7 +218,7 @@ normal:
 
 bail:
 	qp->s_ack_state = OP(ACKNOWLEDGE);
-	qp->s_flags &= ~(QIB_S_RESP_PENDING | QIB_S_ACK_PENDING);
+	qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING);
 	return 0;
 }
 
@@ -226,63 +226,60 @@ bail:
  * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
  * @qp: a pointer to the QP
  *
+ * Assumes the s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int qib_make_rc_req(struct qib_qp *qp)
+int qib_make_rc_req(struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_other_headers *ohdr;
-	struct qib_sge_state *ss;
-	struct qib_swqe *wqe;
+	struct rvt_sge_state *ss;
+	struct rvt_swqe *wqe;
 	u32 hwords;
 	u32 len;
 	u32 bth0;
 	u32 bth2;
 	u32 pmtu = qp->pmtu;
 	char newreq;
-	unsigned long flags;
 	int ret = 0;
 	int delta;
 
-	ohdr = &qp->s_hdr->u.oth;
+	ohdr = &priv->s_hdr->u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->u.l.oth;
-
-	/*
-	 * The lock is needed to synchronize between the sending tasklet,
-	 * the receive interrupt handler, and timeout resends.
-	 */
-	spin_lock_irqsave(&qp->s_lock, flags);
+		ohdr = &priv->s_hdr->u.l.oth;
 
 	/* Sending responses has higher priority over sending requests. */
-	if ((qp->s_flags & QIB_S_RESP_PENDING) &&
+	if ((qp->s_flags & RVT_S_RESP_PENDING) &&
 	    qib_make_rc_ack(dev, qp, ohdr, pmtu))
 		goto done;
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
-		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= QIB_S_WAIT_DMA;
+		if (atomic_read(&priv->s_dma_busy)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
 			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
 		/* will get called again */
 		goto done;
 	}
 
-	if (qp->s_flags & (QIB_S_WAIT_RNR | QIB_S_WAIT_ACK))
+	if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
 		goto bail;
 
 	if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
 		if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
-			qp->s_flags |= QIB_S_WAIT_PSN;
+			qp->s_flags |= RVT_S_WAIT_PSN;
 			goto bail;
 		}
 		qp->s_sending_psn = qp->s_psn;
@@ -294,10 +291,10 @@ int qib_make_rc_req(struct qib_qp *qp)
 	bth0 = 0;
 
 	/* Send a request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	switch (qp->s_state) {
 	default:
-		if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 		/*
 		 * Resend an old request or start a new one.
@@ -317,11 +314,11 @@ int qib_make_rc_req(struct qib_qp *qp)
 			 */
 			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
 			    qp->s_num_rd_atomic) {
-				qp->s_flags |= QIB_S_WAIT_FENCE;
+				qp->s_flags |= RVT_S_WAIT_FENCE;
 				goto bail;
 			}
-			wqe->psn = qp->s_next_psn;
 			newreq = 1;
+			qp->s_psn = wqe->psn;
 		}
 		/*
 		 * Note that we have to be careful not to modify the
@@ -335,14 +332,12 @@ int qib_make_rc_req(struct qib_qp *qp)
 		case IB_WR_SEND:
 		case IB_WR_SEND_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(SEND_FIRST);
 				len = pmtu;
 				break;
@@ -363,14 +358,14 @@ int qib_make_rc_req(struct qib_qp *qp)
 			break;
 
 		case IB_WR_RDMA_WRITE:
-			if (newreq && !(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+			if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 				qp->s_lsn++;
 			/* FALLTHROUGH */
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
 
@@ -380,9 +375,7 @@ int qib_make_rc_req(struct qib_qp *qp)
 				cpu_to_be32(wqe->rdma_wr.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
 			hwords += sizeof(struct ib_reth) / sizeof(u32);
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(RDMA_WRITE_FIRST);
 				len = pmtu;
 				break;
@@ -411,19 +404,12 @@ int qib_make_rc_req(struct qib_qp *qp)
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= QIB_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 				}
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
-				/*
-				 * Adjust s_next_psn to count the
-				 * expected number of responses.
-				 */
-				if (len > pmtu)
-					qp->s_next_psn += (len - 1) / pmtu;
-				wqe->lpsn = qp->s_next_psn++;
 			}
 
 			ohdr->u.rc.reth.vaddr =
@@ -449,13 +435,12 @@ int qib_make_rc_req(struct qib_qp *qp)
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= QIB_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 				}
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
-				wqe->lpsn = wqe->psn;
 			}
 			if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 				qp->s_state = OP(COMPARE_SWAP);
@@ -498,11 +483,8 @@ int qib_make_rc_req(struct qib_qp *qp)
 		}
 		if (wqe->wr.opcode == IB_WR_RDMA_READ)
 			qp->s_psn = wqe->lpsn + 1;
-		else {
+		else
 			qp->s_psn++;
-			if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-				qp->s_next_psn = qp->s_psn;
-		}
 		break;
 
 	case OP(RDMA_READ_RESPONSE_FIRST):
@@ -522,8 +504,6 @@ int qib_make_rc_req(struct qib_qp *qp)
 		/* FALLTHROUGH */
 	case OP(SEND_MIDDLE):
 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
-		if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -563,8 +543,6 @@ int qib_make_rc_req(struct qib_qp *qp)
 		/* FALLTHROUGH */
 	case OP(RDMA_WRITE_MIDDLE):
 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
-		if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -618,9 +596,9 @@ int qib_make_rc_req(struct qib_qp *qp)
 	delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
 	if (delta && delta % QIB_PSN_CREDIT == 0)
 		bth2 |= IB_BTH_REQ_ACK;
-	if (qp->s_flags & QIB_S_SEND_ONE) {
-		qp->s_flags &= ~QIB_S_SEND_ONE;
-		qp->s_flags |= QIB_S_WAIT_ACK;
+	if (qp->s_flags & RVT_S_SEND_ONE) {
+		qp->s_flags &= ~RVT_S_SEND_ONE;
+		qp->s_flags |= RVT_S_WAIT_ACK;
 		bth2 |= IB_BTH_REQ_ACK;
 	}
 	qp->s_len -= len;
@@ -629,13 +607,9 @@ int qib_make_rc_req(struct qib_qp *qp)
 	qp->s_cur_size = len;
 	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
 done:
-	ret = 1;
-	goto unlock;
-
+	return 1;
 bail:
-	qp->s_flags &= ~QIB_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags &= ~RVT_S_BUSY;
 	return ret;
 }
 
@@ -647,7 +621,7 @@ unlock:
  * Note that RDMA reads and atomics are handled in the
  * send side QP state and tasklet.
  */
-void qib_send_rc_ack(struct qib_qp *qp)
+void qib_send_rc_ack(struct rvt_qp *qp)
 {
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -665,11 +639,11 @@ void qib_send_rc_ack(struct qib_qp *qp)
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto unlock;
 
 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-	if ((qp->s_flags & QIB_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
+	if ((qp->s_flags & RVT_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
 		goto queue_ack;
 
 	/* Construct the header with s_lock held so APM doesn't change it. */
@@ -758,9 +732,9 @@ void qib_send_rc_ack(struct qib_qp *qp)
 	goto done;
 
 queue_ack:
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
-		ibp->n_rc_qacks++;
-		qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING;
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		this_cpu_inc(*ibp->rvp.rc_qacks);
+		qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
 		qp->s_nak_state = qp->r_nak_state;
 		qp->s_ack_psn = qp->r_ack_psn;
 
@@ -782,10 +756,10 @@ done:
  * for the given QP.
  * Called at interrupt level with the QP s_lock held.
  */
-static void reset_psn(struct qib_qp *qp, u32 psn)
+static void reset_psn(struct rvt_qp *qp, u32 psn)
 {
 	u32 n = qp->s_acked;
-	struct qib_swqe *wqe = get_swqe_ptr(qp, n);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
 	u32 opcode;
 
 	qp->s_cur = n;
@@ -808,7 +782,7 @@ static void reset_psn(struct qib_qp *qp, u32 psn)
 			n = 0;
 		if (n == qp->s_tail)
 			break;
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		diff = qib_cmp24(psn, wqe->psn);
 		if (diff < 0)
 			break;
@@ -854,22 +828,22 @@ static void reset_psn(struct qib_qp *qp, u32 psn)
 done:
 	qp->s_psn = psn;
 	/*
-	 * Set QIB_S_WAIT_PSN as qib_rc_complete() may start the timer
+	 * Set RVT_S_WAIT_PSN as qib_rc_complete() may start the timer
 	 * asynchronously before the send tasklet can get scheduled.
 	 * Doing it in qib_make_rc_req() is too late.
 	 */
 	if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
 	    (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
-		qp->s_flags |= QIB_S_WAIT_PSN;
+		qp->s_flags |= RVT_S_WAIT_PSN;
 }
 
 /*
  * Back up requester to resend the last un-ACKed request.
  * The QP r_lock and s_lock should be held and interrupts disabled.
  */
-static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
+static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 {
-	struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	struct qib_ibport *ibp;
 
 	if (qp->s_retry == 0) {
@@ -878,7 +852,7 @@ static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
 			qp->s_retry = qp->s_retry_cnt;
 		} else if (qp->s_last == qp->s_acked) {
 			qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-			qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			return;
 		} else /* XXX need to handle delayed completion */
 			return;
@@ -887,15 +861,15 @@ static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
 
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
-		ibp->n_rc_resends++;
+		ibp->rvp.n_rc_resends++;
 	else
-		ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+		ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
 
-	qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR |
-			 QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN |
-			 QIB_S_WAIT_ACK);
+	qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
+			 RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
+			 RVT_S_WAIT_ACK);
 	if (wait)
-		qp->s_flags |= QIB_S_SEND_ONE;
+		qp->s_flags |= RVT_S_SEND_ONE;
 	reset_psn(qp, psn);
 }
 
@@ -904,16 +878,16 @@ static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
  */
 static void rc_timeout(unsigned long arg)
 {
-	struct qib_qp *qp = (struct qib_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	struct qib_ibport *ibp;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->r_lock, flags);
 	spin_lock(&qp->s_lock);
-	if (qp->s_flags & QIB_S_TIMER) {
+	if (qp->s_flags & RVT_S_TIMER) {
 		ibp = to_iport(qp->ibqp.device, qp->port_num);
-		ibp->n_rc_timeouts++;
-		qp->s_flags &= ~QIB_S_TIMER;
+		ibp->rvp.n_rc_timeouts++;
+		qp->s_flags &= ~RVT_S_TIMER;
 		del_timer(&qp->s_timer);
 		qib_restart_rc(qp, qp->s_last_psn + 1, 1);
 		qib_schedule_send(qp);
@@ -927,12 +901,12 @@ static void rc_timeout(unsigned long arg)
  */
 void qib_rc_rnr_retry(unsigned long arg)
 {
-	struct qib_qp *qp = (struct qib_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (qp->s_flags & QIB_S_WAIT_RNR) {
-		qp->s_flags &= ~QIB_S_WAIT_RNR;
+	if (qp->s_flags & RVT_S_WAIT_RNR) {
+		qp->s_flags &= ~RVT_S_WAIT_RNR;
 		del_timer(&qp->s_timer);
 		qib_schedule_send(qp);
 	}
@@ -943,14 +917,14 @@ void qib_rc_rnr_retry(unsigned long arg)
  * Set qp->s_sending_psn to the next PSN after the given one.
  * This would be psn+1 except when RDMA reads are present.
  */
-static void reset_sending_psn(struct qib_qp *qp, u32 psn)
+static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
 {
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	u32 n = qp->s_last;
 
 	/* Find the work request corresponding to the given PSN. */
 	for (;;) {
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		if (qib_cmp24(psn, wqe->lpsn) <= 0) {
 			if (wqe->wr.opcode == IB_WR_RDMA_READ)
 				qp->s_sending_psn = wqe->lpsn + 1;
@@ -968,16 +942,16 @@ static void reset_sending_psn(struct qib_qp *qp, u32 psn)
 /*
  * This should be called with the QP s_lock held and interrupts disabled.
  */
-void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
+void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr)
 {
 	struct qib_other_headers *ohdr;
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	struct ib_wc wc;
 	unsigned i;
 	u32 opcode;
 	u32 psn;
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 
 	/* Find out where the BTH is */
@@ -1002,22 +976,30 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
 	 * there are still requests that haven't been acked.
 	 */
 	if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
-	    !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) &&
-	    (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	    !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
+	    (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		start_timer(qp);
 
 	while (qp->s_last != qp->s_acked) {
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		u32 s_last;
+
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
 		    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
 			break;
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct qib_sge *sge = &wqe->sg_list[i];
+			struct rvt_sge *sge = &wqe->sg_list[i];
 
-			qib_put_mr(sge->mr);
+			rvt_put_mr(sge->mr);
 		}
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
@@ -1025,25 +1007,23 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
-			qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	}
 	/*
 	 * If we were waiting for sends to complete before resending,
 	 * and they are now complete, restart sending.
 	 */
-	if (qp->s_flags & QIB_S_WAIT_PSN &&
+	if (qp->s_flags & RVT_S_WAIT_PSN &&
 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
-		qp->s_flags &= ~QIB_S_WAIT_PSN;
+		qp->s_flags &= ~RVT_S_WAIT_PSN;
 		qp->s_sending_psn = qp->s_psn;
 		qp->s_sending_hpsn = qp->s_psn - 1;
 		qib_schedule_send(qp);
 	}
 }
 
-static inline void update_last_psn(struct qib_qp *qp, u32 psn)
+static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
 {
 	qp->s_last_psn = psn;
 }
@@ -1053,8 +1033,8 @@ static inline void update_last_psn(struct qib_qp *qp, u32 psn)
  * This is similar to qib_send_complete but has to check to be sure
  * that the SGEs are not being referenced if the SWQE is being resent.
  */
-static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
-					 struct qib_swqe *wqe,
+static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+					 struct rvt_swqe *wqe,
 					 struct qib_ibport *ibp)
 {
 	struct ib_wc wc;
@@ -1067,13 +1047,21 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
 	 */
 	if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+		u32 s_last;
+
 		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct qib_sge *sge = &wqe->sg_list[i];
+			struct rvt_sge *sge = &wqe->sg_list[i];
 
-			qib_put_mr(sge->mr);
+			rvt_put_mr(sge->mr);
 		}
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
@@ -1081,12 +1069,10 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
-			qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	} else
-		ibp->n_rc_delayed_comp++;
+		this_cpu_inc(*ibp->rvp.rc_delayed_comp);
 
 	qp->s_retry = qp->s_retry_cnt;
 	update_last_psn(qp, wqe->lpsn);
@@ -1100,7 +1086,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
 		if (++qp->s_cur >= qp->s_size)
 			qp->s_cur = 0;
 		qp->s_acked = qp->s_cur;
-		wqe = get_swqe_ptr(qp, qp->s_cur);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 		if (qp->s_acked != qp->s_tail) {
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = wqe->psn;
@@ -1110,7 +1096,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
 			qp->s_acked = 0;
 		if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
 			qp->s_draining = 0;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	}
 	return wqe;
 }
@@ -1126,19 +1112,19 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
  * Called at interrupt level with the QP s_lock held.
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  */
-static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
+static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 		     u64 val, struct qib_ctxtdata *rcd)
 {
 	struct qib_ibport *ibp;
 	enum ib_wc_status status;
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	int ret = 0;
 	u32 ack_psn;
 	int diff;
 
 	/* Remove QP from retry timer */
-	if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
 		del_timer(&qp->s_timer);
 	}
 
@@ -1151,7 +1137,7 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 	ack_psn = psn;
 	if (aeth >> 29)
 		ack_psn--;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 
 	/*
@@ -1186,11 +1172,11 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
 			/* Retry this request. */
-			if (!(qp->r_flags & QIB_R_RDMAR_SEQ)) {
-				qp->r_flags |= QIB_R_RDMAR_SEQ;
+			if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
+				qp->r_flags |= RVT_R_RDMAR_SEQ;
 				qib_restart_rc(qp, qp->s_last_psn + 1, 0);
 				if (list_empty(&qp->rspwait)) {
-					qp->r_flags |= QIB_R_RSP_SEND;
+					qp->r_flags |= RVT_R_RSP_SEND;
 					atomic_inc(&qp->refcount);
 					list_add_tail(&qp->rspwait,
 						      &rcd->qp_wait_list);
@@ -1213,14 +1199,14 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
 			qp->s_num_rd_atomic--;
 			/* Restart sending task if fence is complete */
-			if ((qp->s_flags & QIB_S_WAIT_FENCE) &&
+			if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
 			    !qp->s_num_rd_atomic) {
-				qp->s_flags &= ~(QIB_S_WAIT_FENCE |
-						 QIB_S_WAIT_ACK);
+				qp->s_flags &= ~(RVT_S_WAIT_FENCE |
+						 RVT_S_WAIT_ACK);
 				qib_schedule_send(qp);
-			} else if (qp->s_flags & QIB_S_WAIT_RDMAR) {
-				qp->s_flags &= ~(QIB_S_WAIT_RDMAR |
-						 QIB_S_WAIT_ACK);
+			} else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
+				qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
+						 RVT_S_WAIT_ACK);
 				qib_schedule_send(qp);
 			}
 		}
@@ -1231,7 +1217,7 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 
 	switch (aeth >> 29) {
 	case 0:         /* ACK */
-		ibp->n_rc_acks++;
+		this_cpu_inc(*ibp->rvp.rc_acks);
 		if (qp->s_acked != qp->s_tail) {
 			/*
 			 * We are expecting more ACKs so
@@ -1248,8 +1234,8 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = psn + 1;
 		}
-		if (qp->s_flags & QIB_S_WAIT_ACK) {
-			qp->s_flags &= ~QIB_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			qib_schedule_send(qp);
 		}
 		qib_get_credit(qp, aeth);
@@ -1260,10 +1246,10 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 		goto bail;
 
 	case 1:         /* RNR NAK */
-		ibp->n_rnr_naks++;
+		ibp->rvp.n_rnr_naks++;
 		if (qp->s_acked == qp->s_tail)
 			goto bail;
-		if (qp->s_flags & QIB_S_WAIT_RNR)
+		if (qp->s_flags & RVT_S_WAIT_RNR)
 			goto bail;
 		if (qp->s_rnr_retry == 0) {
 			status = IB_WC_RNR_RETRY_EXC_ERR;
@@ -1275,12 +1261,12 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 
-		ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+		ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
 
 		reset_psn(qp, psn);
 
-		qp->s_flags &= ~(QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_ACK);
-		qp->s_flags |= QIB_S_WAIT_RNR;
+		qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
+		qp->s_flags |= RVT_S_WAIT_RNR;
 		qp->s_timer.function = qib_rc_rnr_retry;
 		qp->s_timer.expires = jiffies + usecs_to_jiffies(
 			ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
@@ -1296,7 +1282,7 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 		switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
 			QIB_AETH_CREDIT_MASK) {
 		case 0: /* PSN sequence error */
-			ibp->n_seq_naks++;
+			ibp->rvp.n_seq_naks++;
 			/*
 			 * Back up to the responder's expected PSN.
 			 * Note that we might get a NAK in the middle of an
@@ -1309,21 +1295,21 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 
 		case 1: /* Invalid Request */
 			status = IB_WC_REM_INV_REQ_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 
 		case 2: /* Remote Access Error */
 			status = IB_WC_REM_ACCESS_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 
 		case 3: /* Remote Operation Error */
 			status = IB_WC_REM_OP_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 class_b:
 			if (qp->s_last == qp->s_acked) {
 				qib_send_complete(qp, wqe, status);
-				qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+				rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			}
 			break;
 
@@ -1349,18 +1335,18 @@ bail:
  * We have seen an out of sequence RDMA read middle or last packet.
  * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
  */
-static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
+static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
 			 struct qib_ctxtdata *rcd)
 {
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 
 	/* Remove QP from retry timer */
-	if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
 		del_timer(&qp->s_timer);
 	}
 
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 
 	while (qib_cmp24(psn, wqe->lpsn) > 0) {
 		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
@@ -1370,11 +1356,11 @@ static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
 		wqe = do_rc_completion(qp, wqe, ibp);
 	}
 
-	ibp->n_rdma_seq++;
-	qp->r_flags |= QIB_R_RDMAR_SEQ;
+	ibp->rvp.n_rdma_seq++;
+	qp->r_flags |= RVT_R_RDMAR_SEQ;
 	qib_restart_rc(qp, qp->s_last_psn + 1, 0);
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_SEND;
+		qp->r_flags |= RVT_R_RSP_SEND;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
@@ -1399,12 +1385,12 @@ static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
 static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 			    struct qib_other_headers *ohdr,
 			    void *data, u32 tlen,
-			    struct qib_qp *qp,
+			    struct rvt_qp *qp,
 			    u32 opcode,
 			    u32 psn, u32 hdrsize, u32 pmtu,
 			    struct qib_ctxtdata *rcd)
 {
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 	enum ib_wc_status status;
 	unsigned long flags;
@@ -1425,7 +1411,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 			 * If send tasklet not running attempt to progress
 			 * SDMA queue.
 			 */
-			if (!(qp->s_flags & QIB_S_BUSY)) {
+			if (!(qp->s_flags & RVT_S_BUSY)) {
 				/* Acquire SDMA Lock */
 				spin_lock_irqsave(&ppd->sdma_lock, flags);
 				/* Invoke sdma make progress */
@@ -1437,11 +1423,12 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 	}
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto ack_done;
 
 	/* Ignore invalid responses. */
-	if (qib_cmp24(psn, qp->s_next_psn) >= 0)
+	smp_read_barrier_depends(); /* see post_one_send */
+	if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
 		goto ack_done;
 
 	/* Ignore duplicate responses. */
@@ -1460,15 +1447,15 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 	 * Skip everything other than the PSN we expect, if we are waiting
 	 * for a reply to a restarted RDMA read or atomic op.
 	 */
-	if (qp->r_flags & QIB_R_RDMAR_SEQ) {
+	if (qp->r_flags & RVT_R_RDMAR_SEQ) {
 		if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
 			goto ack_done;
-		qp->r_flags &= ~QIB_R_RDMAR_SEQ;
+		qp->r_flags &= ~RVT_R_RDMAR_SEQ;
 	}
 
 	if (unlikely(qp->s_acked == qp->s_tail))
 		goto ack_done;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	status = IB_WC_SUCCESS;
 
 	switch (opcode) {
@@ -1487,7 +1474,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 			goto ack_done;
 		hdrsize += 4;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 			goto ack_op_err;
 		/*
@@ -1515,10 +1502,10 @@ read_middle:
 		 * We got a response so update the timeout.
 		 * 4.096 usec. * (1 << qp->timeout)
 		 */
-		qp->s_flags |= QIB_S_TIMER;
+		qp->s_flags |= RVT_S_TIMER;
 		mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
-		if (qp->s_flags & QIB_S_WAIT_ACK) {
-			qp->s_flags &= ~QIB_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			qib_schedule_send(qp);
 		}
 
@@ -1553,7 +1540,7 @@ read_middle:
 		 * have to be careful to copy the data to the right
 		 * location.
 		 */
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
 						  wqe, psn, pmtu);
 		goto read_last;
@@ -1598,7 +1585,7 @@ ack_len_err:
 ack_err:
 	if (qp->s_last == qp->s_acked) {
 		qib_send_complete(qp, wqe, status);
-		qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 	}
 ack_done:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1623,14 +1610,14 @@ bail:
  */
 static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 			    void *data,
-			    struct qib_qp *qp,
+			    struct rvt_qp *qp,
 			    u32 opcode,
 			    u32 psn,
 			    int diff,
 			    struct qib_ctxtdata *rcd)
 {
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	struct qib_ack_entry *e;
+	struct rvt_ack_entry *e;
 	unsigned long flags;
 	u8 i, prev;
 	int old_req;
@@ -1642,7 +1629,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 		 * Don't queue the NAK if we already sent one.
 		 */
 		if (!qp->r_nak_state) {
-			ibp->n_rc_seqnak++;
+			ibp->rvp.n_rc_seqnak++;
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;
@@ -1652,7 +1639,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 			 * Otherwise, we end up propagating congestion.
 			 */
 			if (list_empty(&qp->rspwait)) {
-				qp->r_flags |= QIB_R_RSP_NAK;
+				qp->r_flags |= RVT_R_RSP_NAK;
 				atomic_inc(&qp->refcount);
 				list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 			}
@@ -1678,7 +1665,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 	 */
 	e = NULL;
 	old_req = 1;
-	ibp->n_rc_dupreq++;
+	ibp->rvp.n_rc_dupreq++;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -1732,7 +1719,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 		if (unlikely(offset + len != e->rdma_sge.sge_length))
 			goto unlock_done;
 		if (e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		if (len != 0) {
@@ -1740,7 +1727,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 
-			ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
 					 IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 				goto unlock_done;
@@ -1791,7 +1778,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 		 * which doesn't accept a RDMA read response or atomic
 		 * response as an ACK for earlier SENDs or RDMA writes.
 		 */
-		if (!(qp->s_flags & QIB_S_RESP_PENDING)) {
+		if (!(qp->s_flags & RVT_S_RESP_PENDING)) {
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			qp->r_nak_state = 0;
 			qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
@@ -1805,7 +1792,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 		break;
 	}
 	qp->s_ack_state = OP(ACKNOWLEDGE);
-	qp->s_flags |= QIB_S_RESP_PENDING;
+	qp->s_flags |= RVT_S_RESP_PENDING;
 	qp->r_nak_state = 0;
 	qib_schedule_send(qp);
 
@@ -1818,13 +1805,13 @@ send_ack:
 	return 0;
 }
 
-void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err)
+void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
 {
 	unsigned long flags;
 	int lastwqe;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	lastwqe = qib_error_qp(qp, err);
+	lastwqe = rvt_error_qp(qp, err);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 
 	if (lastwqe) {
@@ -1837,7 +1824,7 @@ void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err)
 	}
 }
 
-static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n)
+static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
 {
 	unsigned next;
 
@@ -1862,7 +1849,7 @@ static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n)
  * Called at interrupt level.
  */
 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 	struct qib_other_headers *ohdr;
@@ -1948,8 +1935,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 		break;
 	}
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
-		qp->r_flags |= QIB_R_COMM_EST;
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
+		qp->r_flags |= RVT_R_COMM_EST;
 		if (qp->ibqp.event_handler) {
 			struct ib_event ev;
 
@@ -2026,9 +2013,9 @@ send_last:
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto nack_inv;
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 		qp->r_msn++;
-		if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+		if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 			break;
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -2047,7 +2034,7 @@ send_last:
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
-		qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 			     (ohdr->bth[0] &
 			      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 		break;
@@ -2069,7 +2056,7 @@ send_last:
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
 					 rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 				goto nack_acc;
@@ -2096,7 +2083,7 @@ send_last:
 		goto send_last;
 
 	case OP(RDMA_READ_REQUEST): {
-		struct qib_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u32 len;
 		u8 next;
 
@@ -2114,7 +2101,7 @@ send_last:
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		reth = &ohdr->u.rc.reth;
@@ -2125,7 +2112,7 @@ send_last:
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
 					 rkey, IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 				goto nack_acc_unlck;
@@ -2157,7 +2144,7 @@ send_last:
 		qp->r_head_ack_queue = next;
 
 		/* Schedule the send tasklet. */
-		qp->s_flags |= QIB_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		qib_schedule_send(qp);
 
 		goto sunlock;
@@ -2166,7 +2153,7 @@ send_last:
 	case OP(COMPARE_SWAP):
 	case OP(FETCH_ADD): {
 		struct ib_atomic_eth *ateth;
-		struct qib_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u64 vaddr;
 		atomic64_t *maddr;
 		u64 sdata;
@@ -2186,7 +2173,7 @@ send_last:
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		ateth = &ohdr->u.atomic_eth;
@@ -2196,7 +2183,7 @@ send_last:
 			goto nack_inv_unlck;
 		rkey = be32_to_cpu(ateth->rkey);
 		/* Check rkey & NAK */
-		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
 					  vaddr, rkey,
 					  IB_ACCESS_REMOTE_ATOMIC)))
 			goto nack_acc_unlck;
@@ -2208,7 +2195,7 @@ send_last:
 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
 				      be64_to_cpu(ateth->compare_data),
 				      sdata);
-		qib_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		e->opcode = opcode;
 		e->sent = 0;
@@ -2221,7 +2208,7 @@ send_last:
 		qp->r_head_ack_queue = next;
 
 		/* Schedule the send tasklet. */
-		qp->s_flags |= QIB_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		qib_schedule_send(qp);
 
 		goto sunlock;
@@ -2245,7 +2232,7 @@ rnr_nak:
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue RNR NAK for later */
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_NAK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
@@ -2257,7 +2244,7 @@ nack_op_err:
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue NAK for later */
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_NAK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
@@ -2271,7 +2258,7 @@ nack_inv:
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue NAK for later */
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_NAK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index b1aa21bdd484..a5f07a64b228 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -79,16 +79,16 @@ const u32 ib_qib_rnr_table[32] = {
  * Validate a RWQE and fill in the SGE state.
  * Return 1 if OK.
  */
-static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
+static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
 {
 	int i, j, ret;
 	struct ib_wc wc;
-	struct qib_lkey_table *rkt;
-	struct qib_pd *pd;
-	struct qib_sge_state *ss;
+	struct rvt_lkey_table *rkt;
+	struct rvt_pd *pd;
+	struct rvt_sge_state *ss;
 
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+	rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
+	pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
 	ss = &qp->r_sge;
 	ss->sg_list = qp->r_sg_list;
 	qp->r_len = 0;
@@ -96,7 +96,7 @@ static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
 		if (wqe->sg_list[i].length == 0)
 			continue;
 		/* Check LKEY */
-		if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+		if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
 				 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
 			goto bad_lkey;
 		qp->r_len += wqe->sg_list[i].length;
@@ -109,9 +109,9 @@ static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
 
 bad_lkey:
 	while (j) {
-		struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+		struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
 
-		qib_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	ss->num_sge = 0;
 	memset(&wc, 0, sizeof(wc));
@@ -120,7 +120,7 @@ bad_lkey:
 	wc.opcode = IB_WC_RECV;
 	wc.qp = &qp->ibqp;
 	/* Signal solicited completion event. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
 	ret = 0;
 bail:
 	return ret;
@@ -136,19 +136,19 @@ bail:
  *
  * Can be called from interrupt level.
  */
-int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
+int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only)
 {
 	unsigned long flags;
-	struct qib_rq *rq;
-	struct qib_rwq *wq;
-	struct qib_srq *srq;
-	struct qib_rwqe *wqe;
+	struct rvt_rq *rq;
+	struct rvt_rwq *wq;
+	struct rvt_srq *srq;
+	struct rvt_rwqe *wqe;
 	void (*handler)(struct ib_event *, void *);
 	u32 tail;
 	int ret;
 
 	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
+		srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
 		handler = srq->ibsrq.event_handler;
 		rq = &srq->rq;
 	} else {
@@ -158,7 +158,7 @@ int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
 	}
 
 	spin_lock_irqsave(&rq->lock, flags);
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 		ret = 0;
 		goto unlock;
 	}
@@ -174,7 +174,7 @@ int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
 	}
 	/* Make sure entry is read after head index is read. */
 	smp_rmb();
-	wqe = get_rwqe_ptr(rq, tail);
+	wqe = rvt_get_rwqe_ptr(rq, tail);
 	/*
 	 * Even though we update the tail index in memory, the verbs
 	 * consumer is not supposed to post more entries until a
@@ -190,7 +190,7 @@ int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
 	qp->r_wr_id = wqe->wr_id;
 
 	ret = 1;
-	set_bit(QIB_R_WRID_VALID, &qp->r_aflags);
+	set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
 	if (handler) {
 		u32 n;
 
@@ -227,7 +227,7 @@ bail:
  * Switch to alternate path.
  * The QP s_lock should be held and interrupts disabled.
  */
-void qib_migrate_qp(struct qib_qp *qp)
+void qib_migrate_qp(struct rvt_qp *qp)
 {
 	struct ib_event ev;
 
@@ -266,7 +266,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
  * The s_lock will be acquired around the qib_migrate_qp() call.
  */
 int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		      int has_grh, struct qib_qp *qp, u32 bth0)
+		      int has_grh, struct rvt_qp *qp, u32 bth0)
 {
 	__be64 guid;
 	unsigned long flags;
@@ -279,7 +279,8 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 			if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
 				goto err;
 			guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid,
+				    ibp->rvp.gid_prefix, guid))
 				goto err;
 			if (!gid_ok(&hdr->u.l.grh.sgid,
 			    qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
@@ -311,7 +312,8 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 				goto err;
 			guid = get_sguid(ibp,
 					 qp->remote_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid,
+				    ibp->rvp.gid_prefix, guid))
 				goto err;
 			if (!gid_ok(&hdr->u.l.grh.sgid,
 			    qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
@@ -353,12 +355,15 @@ err:
  * receive interrupts since this is a connected protocol and all packets
  * will pass through here.
  */
-static void qib_ruc_loopback(struct qib_qp *sqp)
+static void qib_ruc_loopback(struct rvt_qp *sqp)
 {
 	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
-	struct qib_qp *qp;
-	struct qib_swqe *wqe;
-	struct qib_sge *sge;
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+	struct rvt_qp *qp;
+	struct rvt_swqe *wqe;
+	struct rvt_sge *sge;
 	unsigned long flags;
 	struct ib_wc wc;
 	u64 sdata;
@@ -367,29 +372,33 @@ static void qib_ruc_loopback(struct qib_qp *sqp)
 	int release;
 	int ret;
 
+	rcu_read_lock();
 	/*
 	 * Note that we check the responder QP state after
 	 * checking the requester's state.
 	 */
-	qp = qib_lookup_qpn(ibp, sqp->remote_qpn);
+	qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn);
+	if (!qp)
+		goto done;
 
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
 	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT)) ||
-	    !(ib_qib_state_ops[sqp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		goto unlock;
 
-	sqp->s_flags |= QIB_S_BUSY;
+	sqp->s_flags |= RVT_S_BUSY;
 
 again:
-	if (sqp->s_last == sqp->s_head)
+	smp_read_barrier_depends(); /* see post_one_send() */
+	if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
 		goto clr_busy;
-	wqe = get_swqe_ptr(sqp, sqp->s_last);
+	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
 
 	/* Return if it is not OK to start a new work reqeust. */
-	if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_qib_state_ops[sqp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
 			goto clr_busy;
 		/* We are in the error state, flush the work request. */
 		send_status = IB_WC_WR_FLUSH_ERR;
@@ -407,9 +416,9 @@ again:
 	}
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 
-	if (!qp || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) ||
+	if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
 	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		/*
 		 * For RC, the requester would timeout and retry so
 		 * shortcut the timeouts and just signal too many retries.
@@ -458,7 +467,7 @@ again:
 			goto inv_err;
 		if (wqe->length == 0)
 			break;
-		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
 					  wqe->rdma_wr.remote_addr,
 					  wqe->rdma_wr.rkey,
 					  IB_ACCESS_REMOTE_WRITE)))
@@ -471,7 +480,7 @@ again:
 	case IB_WR_RDMA_READ:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
 			goto inv_err;
-		if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+		if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
 					  wqe->rdma_wr.remote_addr,
 					  wqe->rdma_wr.rkey,
 					  IB_ACCESS_REMOTE_READ)))
@@ -489,7 +498,7 @@ again:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 			goto inv_err;
-		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
 					  wqe->atomic_wr.remote_addr,
 					  wqe->atomic_wr.rkey,
 					  IB_ACCESS_REMOTE_ATOMIC)))
@@ -502,7 +511,7 @@ again:
 			(u64) atomic64_add_return(sdata, maddr) - sdata :
 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
 				      sdata, wqe->atomic_wr.swap);
-		qib_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		goto send_comp;
 
@@ -526,11 +535,11 @@ again:
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (!release)
-				qib_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--sqp->s_sge.num_sge)
 				*sge = *sqp->s_sge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -543,9 +552,9 @@ again:
 		sqp->s_len -= len;
 	}
 	if (release)
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 
-	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto send_comp;
 
 	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -561,12 +570,12 @@ again:
 	wc.sl = qp->remote_ah_attr.sl;
 	wc.port_num = 1;
 	/* Signal completion event if the solicited bit is set. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		       wqe->wr.send_flags & IB_SEND_SOLICITED);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     wqe->wr.send_flags & IB_SEND_SOLICITED);
 
 send_comp:
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	ibp->n_loop_pkts++;
+	ibp->rvp.n_loop_pkts++;
 flush_send:
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 	qib_send_complete(sqp, wqe, send_status);
@@ -576,7 +585,7 @@ rnr_nak:
 	/* Handle RNR NAK */
 	if (qp->ibqp.qp_type == IB_QPT_UC)
 		goto send_comp;
-	ibp->n_rnr_naks++;
+	ibp->rvp.n_rnr_naks++;
 	/*
 	 * Note: we don't need the s_lock held since the BUSY flag
 	 * makes this single threaded.
@@ -588,9 +597,9 @@ rnr_nak:
 	if (sqp->s_rnr_retry_cnt < 7)
 		sqp->s_rnr_retry--;
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
 		goto clr_busy;
-	sqp->s_flags |= QIB_S_WAIT_RNR;
+	sqp->s_flags |= RVT_S_WAIT_RNR;
 	sqp->s_timer.function = qib_rc_rnr_retry;
 	sqp->s_timer.expires = jiffies +
 		usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]);
@@ -618,9 +627,9 @@ serr:
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	qib_send_complete(sqp, wqe, send_status);
 	if (sqp->ibqp.qp_type == IB_QPT_RC) {
-		int lastwqe = qib_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
 
-		sqp->s_flags &= ~QIB_S_BUSY;
+		sqp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		if (lastwqe) {
 			struct ib_event ev;
@@ -633,12 +642,11 @@ serr:
 		goto done;
 	}
 clr_busy:
-	sqp->s_flags &= ~QIB_S_BUSY;
+	sqp->s_flags &= ~RVT_S_BUSY;
 unlock:
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 done:
-	if (qp && atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
+	rcu_read_unlock();
 }
 
 /**
@@ -663,7 +671,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
 	hdr->next_hdr = IB_GRH_NEXT_HDR;
 	hdr->hop_limit = grh->hop_limit;
 	/* The SGID is 32-bit aligned. */
-	hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
+	hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
 	hdr->sgid.global.interface_id = grh->sgid_index ?
 		ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid;
 	hdr->dgid = grh->dgid;
@@ -672,9 +680,10 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
 	return sizeof(struct ib_grh) / sizeof(u32);
 }
 
-void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
 			 u32 bth0, u32 bth2)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	u16 lrh0;
 	u32 nwords;
@@ -685,17 +694,18 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
 	nwords = (qp->s_cur_size + extra_bytes) >> 2;
 	lrh0 = QIB_LRH_BTH;
 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
+		qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
 					       &qp->remote_ah_attr.grh,
 					       qp->s_hdrwords, nwords);
 		lrh0 = QIB_LRH_GRH;
 	}
 	lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
 		qp->remote_ah_attr.sl << 4;
-	qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+	priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+	priv->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+	priv->s_hdr->lrh[2] =
+			cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+	priv->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
 				       qp->remote_ah_attr.src_path_bits);
 	bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
@@ -707,20 +717,29 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
 	this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 }
 
+void _qib_do_send(struct work_struct *work)
+{
+	struct qib_qp_priv *priv = container_of(work, struct qib_qp_priv,
+						s_work);
+	struct rvt_qp *qp = priv->owner;
+
+	qib_do_send(qp);
+}
+
 /**
  * qib_do_send - perform a send on a QP
- * @work: contains a pointer to the QP
+ * @qp: pointer to the QP
  *
  * Process entries in the send work queue until credit or queue is
  * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
  * Otherwise, two threads could send packets out of order.
  */
-void qib_do_send(struct work_struct *work)
+void qib_do_send(struct rvt_qp *qp)
 {
-	struct qib_qp *qp = container_of(work, struct qib_qp, s_work);
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-	int (*make_req)(struct qib_qp *qp);
+	int (*make_req)(struct rvt_qp *qp);
 	unsigned long flags;
 
 	if ((qp->ibqp.qp_type == IB_QPT_RC ||
@@ -745,50 +764,59 @@ void qib_do_send(struct work_struct *work)
 		return;
 	}
 
-	qp->s_flags |= QIB_S_BUSY;
-
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags |= RVT_S_BUSY;
 
 	do {
 		/* Check for a constructed packet to be sent. */
 		if (qp->s_hdrwords != 0) {
+			spin_unlock_irqrestore(&qp->s_lock, flags);
 			/*
 			 * If the packet cannot be sent now, return and
 			 * the send tasklet will be woken up later.
 			 */
-			if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords,
+			if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords,
 					   qp->s_cur_sge, qp->s_cur_size))
-				break;
+				return;
 			/* Record that s_hdr is empty. */
 			qp->s_hdrwords = 0;
+			spin_lock_irqsave(&qp->s_lock, flags);
 		}
 	} while (make_req(qp));
+
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
 /*
  * This should be called with s_lock held.
  */
-void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 		       enum ib_wc_status status)
 {
 	u32 old_last, last;
 	unsigned i;
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 
+	last = qp->s_last;
+	old_last = last;
+	if (++last >= qp->s_size)
+		last = 0;
+	qp->s_last = last;
+	/* See post_send() */
+	barrier();
 	for (i = 0; i < wqe->wr.num_sge; i++) {
-		struct qib_sge *sge = &wqe->sg_list[i];
+		struct rvt_sge *sge = &wqe->sg_list[i];
 
-		qib_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	if (qp->ibqp.qp_type == IB_QPT_UD ||
 	    qp->ibqp.qp_type == IB_QPT_SMI ||
 	    qp->ibqp.qp_type == IB_QPT_GSI)
-		atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
+		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
 
 	/* See ch. 11.2.4.1 and 10.7.3.1 */
-	if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
 	    status != IB_WC_SUCCESS) {
 		struct ib_wc wc;
@@ -800,15 +828,10 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
 		wc.qp = &qp->ibqp;
 		if (status == IB_WC_SUCCESS)
 			wc.byte_len = wqe->length;
-		qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
 			     status != IB_WC_SUCCESS);
 	}
 
-	last = qp->s_last;
-	old_last = last;
-	if (++last >= qp->s_size)
-		last = 0;
-	qp->s_last = last;
 	if (qp->s_acked == old_last)
 		qp->s_acked = last;
 	if (qp->s_cur == old_last)
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index c6d6a54d2e19..891873b38a1e 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -513,7 +513,9 @@ int qib_sdma_running(struct qib_pportdata *ppd)
 static void complete_sdma_err_req(struct qib_pportdata *ppd,
 				  struct qib_verbs_txreq *tx)
 {
-	atomic_inc(&tx->qp->s_dma_busy);
+	struct qib_qp_priv *priv = tx->qp->priv;
+
+	atomic_inc(&priv->s_dma_busy);
 	/* no sdma descriptors, so no unmap_desc */
 	tx->txreq.start_idx = 0;
 	tx->txreq.next_descq_idx = 0;
@@ -531,18 +533,19 @@ static void complete_sdma_err_req(struct qib_pportdata *ppd,
  * 3) The SGE addresses are suitable for passing to dma_map_single().
  */
 int qib_sdma_verbs_send(struct qib_pportdata *ppd,
-			struct qib_sge_state *ss, u32 dwords,
+			struct rvt_sge_state *ss, u32 dwords,
 			struct qib_verbs_txreq *tx)
 {
 	unsigned long flags;
-	struct qib_sge *sge;
-	struct qib_qp *qp;
+	struct rvt_sge *sge;
+	struct rvt_qp *qp;
 	int ret = 0;
 	u16 tail;
 	__le64 *descqp;
 	u64 sdmadesc[2];
 	u32 dwoffset;
 	dma_addr_t addr;
+	struct qib_qp_priv *priv;
 
 	spin_lock_irqsave(&ppd->sdma_lock, flags);
 
@@ -621,7 +624,7 @@ retry:
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -644,8 +647,8 @@ retry:
 		descqp[0] |= cpu_to_le64(SDMA_DESC_DMA_HEAD);
 	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_INTREQ)
 		descqp[0] |= cpu_to_le64(SDMA_DESC_INTR);
-
-	atomic_inc(&tx->qp->s_dma_busy);
+	priv = tx->qp->priv;
+	atomic_inc(&priv->s_dma_busy);
 	tx->txreq.next_descq_idx = tail;
 	ppd->dd->f_sdma_update_tail(ppd, tail);
 	ppd->sdma_descq_added += tx->txreq.sg_count;
@@ -663,13 +666,14 @@ unmap:
 		unmap_desc(ppd, tail);
 	}
 	qp = tx->qp;
+	priv = qp->priv;
 	qib_put_txreq(tx);
 	spin_lock(&qp->r_lock);
 	spin_lock(&qp->s_lock);
 	if (qp->ibqp.qp_type == IB_QPT_RC) {
 		/* XXX what about error sending RDMA read responses? */
-		if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)
-			qib_error_qp(qp, IB_WC_GENERAL_ERR);
+		if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)
+			rvt_error_qp(qp, IB_WC_GENERAL_ERR);
 	} else if (qp->s_wqe)
 		qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
 	spin_unlock(&qp->s_lock);
@@ -679,8 +683,9 @@ unmap:
 
 busy:
 	qp = tx->qp;
+	priv = qp->priv;
 	spin_lock(&qp->s_lock);
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		struct qib_ibdev *dev;
 
 		/*
@@ -690,19 +695,19 @@ busy:
 		 */
 		tx->ss = ss;
 		tx->dwords = dwords;
-		qp->s_tx = tx;
+		priv->s_tx = tx;
 		dev = &ppd->dd->verbs_dev;
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->iowait)) {
+		spin_lock(&dev->rdi.pending_lock);
+		if (list_empty(&priv->iowait)) {
 			struct qib_ibport *ibp;
 
 			ibp = &ppd->ibport_data;
-			ibp->n_dmawait++;
-			qp->s_flags |= QIB_S_WAIT_DMA_DESC;
-			list_add_tail(&qp->iowait, &dev->dmawait);
+			ibp->rvp.n_dmawait++;
+			qp->s_flags |= RVT_S_WAIT_DMA_DESC;
+			list_add_tail(&priv->iowait, &dev->dmawait);
 		}
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~QIB_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock(&qp->s_lock);
 		ret = -EBUSY;
 	} else {
diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c
deleted file mode 100644
index d6235931a1ba..000000000000
--- a/drivers/infiniband/hw/qib/qib_srq.c
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "qib_verbs.h"
-
-/**
- * qib_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: A pointer to the first WR to cause a problem is put here
- *
- * This may be called from interrupt context.
- */
-int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-	struct qib_rwq *wq;
-	unsigned long flags;
-	int ret;
-
-	for (; wr; wr = wr->next) {
-		struct qib_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		wq = srq->rq.wq;
-		next = wq->head + 1;
-		if (next >= srq->rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&srq->rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_create_srq - create a shared receive queue
- * @ibpd: the protection domain of the SRQ to create
- * @srq_init_attr: the attributes of the SRQ
- * @udata: data from libibverbs when creating a user SRQ
- */
-struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
-			      struct ib_srq_init_attr *srq_init_attr,
-			      struct ib_udata *udata)
-{
-	struct qib_ibdev *dev = to_idev(ibpd->device);
-	struct qib_srq *srq;
-	u32 sz;
-	struct ib_srq *ret;
-
-	if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-		ret = ERR_PTR(-ENOSYS);
-		goto done;
-	}
-
-	if (srq_init_attr->attr.max_sge == 0 ||
-	    srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
-	    srq_init_attr->attr.max_wr == 0 ||
-	    srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
-	if (!srq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	srq->rq.size = srq_init_attr->attr.max_wr + 1;
-	srq->rq.max_sge = srq_init_attr->attr.max_sge;
-	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
-		sizeof(struct qib_rwqe);
-	srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz);
-	if (!srq->rq.wq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_srq;
-	}
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See qib_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-		u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz;
-
-		srq->ip =
-		    qib_create_mmap_info(dev, s, ibpd->uobject->context,
-					 srq->rq.wq);
-		if (!srq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wq;
-		}
-
-		err = ib_copy_to_udata(udata, &srq->ip->offset,
-				       sizeof(srq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		srq->ip = NULL;
-
-	/*
-	 * ib_create_srq() will initialize srq->ibsrq.
-	 */
-	spin_lock_init(&srq->rq.lock);
-	srq->rq.wq->head = 0;
-	srq->rq.wq->tail = 0;
-	srq->limit = srq_init_attr->attr.srq_limit;
-
-	spin_lock(&dev->n_srqs_lock);
-	if (dev->n_srqs_allocated == ib_qib_max_srqs) {
-		spin_unlock(&dev->n_srqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_srqs_allocated++;
-	spin_unlock(&dev->n_srqs_lock);
-
-	if (srq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &srq->ibsrq;
-	goto done;
-
-bail_ip:
-	kfree(srq->ip);
-bail_wq:
-	vfree(srq->rq.wq);
-bail_srq:
-	kfree(srq);
-done:
-	return ret;
-}
-
-/**
- * qib_modify_srq - modify a shared receive queue
- * @ibsrq: the SRQ to modify
- * @attr: the new attributes of the SRQ
- * @attr_mask: indicates which attributes to modify
- * @udata: user data for libibverbs.so
- */
-int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		   enum ib_srq_attr_mask attr_mask,
-		   struct ib_udata *udata)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-	struct qib_rwq *wq;
-	int ret = 0;
-
-	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct qib_rwq *owq;
-		struct qib_rwqe *p;
-		u32 sz, size, n, head, tail;
-
-		/* Check that the requested sizes are below the limits. */
-		if ((attr->max_wr > ib_qib_max_srq_wrs) ||
-		    ((attr_mask & IB_SRQ_LIMIT) ?
-		     attr->srq_limit : srq->limit) > attr->max_wr) {
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		sz = sizeof(struct qib_rwqe) +
-			srq->rq.max_sge * sizeof(struct ib_sge);
-		size = attr->max_wr + 1;
-		wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz);
-		if (!wq) {
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		/* Check that we can write the offset to mmap. */
-		if (udata && udata->inlen >= sizeof(__u64)) {
-			__u64 offset_addr;
-			__u64 offset = 0;
-
-			ret = ib_copy_from_udata(&offset_addr, udata,
-						 sizeof(offset_addr));
-			if (ret)
-				goto bail_free;
-			udata->outbuf =
-				(void __user *) (unsigned long) offset_addr;
-			ret = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (ret)
-				goto bail_free;
-		}
-
-		spin_lock_irq(&srq->rq.lock);
-		/*
-		 * validate head and tail pointer values and compute
-		 * the number of remaining WQEs.
-		 */
-		owq = srq->rq.wq;
-		head = owq->head;
-		tail = owq->tail;
-		if (head >= srq->rq.size || tail >= srq->rq.size) {
-			ret = -EINVAL;
-			goto bail_unlock;
-		}
-		n = head;
-		if (n < tail)
-			n += srq->rq.size - tail;
-		else
-			n -= tail;
-		if (size <= n) {
-			ret = -EINVAL;
-			goto bail_unlock;
-		}
-		n = 0;
-		p = wq->wq;
-		while (tail != head) {
-			struct qib_rwqe *wqe;
-			int i;
-
-			wqe = get_rwqe_ptr(&srq->rq, tail);
-			p->wr_id = wqe->wr_id;
-			p->num_sge = wqe->num_sge;
-			for (i = 0; i < wqe->num_sge; i++)
-				p->sg_list[i] = wqe->sg_list[i];
-			n++;
-			p = (struct qib_rwqe *)((char *) p + sz);
-			if (++tail >= srq->rq.size)
-				tail = 0;
-		}
-		srq->rq.wq = wq;
-		srq->rq.size = size;
-		wq->head = n;
-		wq->tail = 0;
-		if (attr_mask & IB_SRQ_LIMIT)
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-
-		vfree(owq);
-
-		if (srq->ip) {
-			struct qib_mmap_info *ip = srq->ip;
-			struct qib_ibdev *dev = to_idev(srq->ibsrq.device);
-			u32 s = sizeof(struct qib_rwq) + size * sz;
-
-			qib_update_mmap_info(dev, ip, s, wq);
-
-			/*
-			 * Return the offset to mmap.
-			 * See qib_mmap() for details.
-			 */
-			if (udata && udata->inlen >= sizeof(__u64)) {
-				ret = ib_copy_to_udata(udata, &ip->offset,
-						       sizeof(ip->offset));
-				if (ret)
-					goto bail;
-			}
-
-			/*
-			 * Put user mapping info onto the pending list
-			 * unless it already is on the list.
-			 */
-			spin_lock_irq(&dev->pending_lock);
-			if (list_empty(&ip->pending_mmaps))
-				list_add(&ip->pending_mmaps,
-					 &dev->pending_mmaps);
-			spin_unlock_irq(&dev->pending_lock);
-		}
-	} else if (attr_mask & IB_SRQ_LIMIT) {
-		spin_lock_irq(&srq->rq.lock);
-		if (attr->srq_limit >= srq->rq.size)
-			ret = -EINVAL;
-		else
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-	}
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&srq->rq.lock);
-bail_free:
-	vfree(wq);
-bail:
-	return ret;
-}
-
-int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-
-	attr->max_wr = srq->rq.size - 1;
-	attr->max_sge = srq->rq.max_sge;
-	attr->srq_limit = srq->limit;
-	return 0;
-}
-
-/**
- * qib_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
- */
-int qib_destroy_srq(struct ib_srq *ibsrq)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-	struct qib_ibdev *dev = to_idev(ibsrq->device);
-
-	spin_lock(&dev->n_srqs_lock);
-	dev->n_srqs_allocated--;
-	spin_unlock(&dev->n_srqs_lock);
-	if (srq->ip)
-		kref_put(&srq->ip->ref, qib_release_mmap_info);
-	else
-		vfree(srq->rq.wq);
-	kfree(srq);
-
-	return 0;
-}
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 81f56cdff2bc..fe4cf5e4acec 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -406,7 +406,13 @@ static struct kobj_type qib_sl2vl_ktype = {
 #define QIB_DIAGC_ATTR(N) \
 	static struct qib_diagc_attr qib_diagc_attr_##N = { \
 		.attr = { .name = __stringify(N), .mode = 0664 }, \
-		.counter = offsetof(struct qib_ibport, n_##N) \
+		.counter = offsetof(struct qib_ibport, rvp.n_##N) \
+	}
+
+#define QIB_DIAGC_ATTR_PER_CPU(N) \
+	static struct qib_diagc_attr qib_diagc_attr_##N = { \
+		.attr = { .name = __stringify(N), .mode = 0664 }, \
+		.counter = offsetof(struct qib_ibport, rvp.z_##N) \
 	}
 
 struct qib_diagc_attr {
@@ -414,10 +420,11 @@ struct qib_diagc_attr {
 	size_t counter;
 };
 
+QIB_DIAGC_ATTR_PER_CPU(rc_acks);
+QIB_DIAGC_ATTR_PER_CPU(rc_qacks);
+QIB_DIAGC_ATTR_PER_CPU(rc_delayed_comp);
+
 QIB_DIAGC_ATTR(rc_resends);
-QIB_DIAGC_ATTR(rc_acks);
-QIB_DIAGC_ATTR(rc_qacks);
-QIB_DIAGC_ATTR(rc_delayed_comp);
 QIB_DIAGC_ATTR(seq_naks);
 QIB_DIAGC_ATTR(rdma_seq);
 QIB_DIAGC_ATTR(rnr_naks);
@@ -449,6 +456,35 @@ static struct attribute *diagc_default_attributes[] = {
 	NULL
 };
 
+static u64 get_all_cpu_total(u64 __percpu *cntr)
+{
+	int cpu;
+	u64 counter = 0;
+
+	for_each_possible_cpu(cpu)
+		counter += *per_cpu_ptr(cntr, cpu);
+	return counter;
+}
+
+#define def_write_per_cpu(cntr) \
+static void write_per_cpu_##cntr(struct qib_pportdata *ppd, u32 data)	\
+{									\
+	struct qib_devdata *dd = ppd->dd;				\
+	struct qib_ibport *qibp = &ppd->ibport_data;			\
+	/*  A write can only zero the counter */			\
+	if (data == 0)							\
+		qibp->rvp.z_##cntr = get_all_cpu_total(qibp->rvp.cntr); \
+	else								\
+		qib_dev_err(dd, "Per CPU cntrs can only be zeroed");	\
+}
+
+def_write_per_cpu(rc_acks)
+def_write_per_cpu(rc_qacks)
+def_write_per_cpu(rc_delayed_comp)
+
+#define READ_PER_CPU_CNTR(cntr) (get_all_cpu_total(qibp->rvp.cntr) - \
+							qibp->rvp.z_##cntr)
+
 static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
 			       char *buf)
 {
@@ -458,7 +494,16 @@ static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
 		container_of(kobj, struct qib_pportdata, diagc_kobj);
 	struct qib_ibport *qibp = &ppd->ibport_data;
 
-	return sprintf(buf, "%u\n", *(u32 *)((char *)qibp + dattr->counter));
+	if (!strncmp(dattr->attr.name, "rc_acks", 7))
+		return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks));
+	else if (!strncmp(dattr->attr.name, "rc_qacks", 8))
+		return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks));
+	else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15))
+		return sprintf(buf, "%llu\n",
+					READ_PER_CPU_CNTR(rc_delayed_comp));
+	else
+		return sprintf(buf, "%u\n",
+				*(u32 *)((char *)qibp + dattr->counter));
 }
 
 static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
@@ -475,7 +520,15 @@ static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
 		return ret;
-	*(u32 *)((char *) qibp + dattr->counter) = val;
+
+	if (!strncmp(dattr->attr.name, "rc_acks", 7))
+		write_per_cpu_rc_acks(ppd, val);
+	else if (!strncmp(dattr->attr.name, "rc_qacks", 8))
+		write_per_cpu_rc_qacks(ppd, val);
+	else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15))
+		write_per_cpu_rc_delayed_comp(ppd, val);
+	else
+		*(u32 *)((char *)qibp + dattr->counter) = val;
 	return size;
 }
 
@@ -502,7 +555,7 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
 			char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 
 	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
 }
@@ -511,7 +564,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 			char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -533,7 +586,7 @@ static ssize_t show_boardversion(struct device *device,
 				 struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* The string printed here is already newline-terminated. */
@@ -545,7 +598,7 @@ static ssize_t show_localbus_info(struct device *device,
 				  struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* The string printed here is already newline-terminated. */
@@ -557,7 +610,7 @@ static ssize_t show_nctxts(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* Return the number of user ports (contexts) available. */
@@ -572,7 +625,7 @@ static ssize_t show_nfreectxts(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* Return the number of free user ports (contexts) available. */
@@ -583,7 +636,7 @@ static ssize_t show_serial(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	buf[sizeof(dd->serial)] = '\0';
@@ -597,7 +650,7 @@ static ssize_t store_chip_reset(struct device *device,
 				size_t count)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -618,7 +671,7 @@ static ssize_t show_tempsense(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 	int idx;
@@ -778,7 +831,7 @@ bail:
  */
 int qib_verbs_register_sysfs(struct qib_devdata *dd)
 {
-	struct ib_device *dev = &dd->verbs_dev.ibdev;
+	struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
 	int i, ret;
 
 	for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 06a564589c35..7bdbc79ceaa3 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -41,61 +41,62 @@
  * qib_make_uc_req - construct a request packet (SEND, RDMA write)
  * @qp: a pointer to the QP
  *
+ * Assumes the s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int qib_make_uc_req(struct qib_qp *qp)
+int qib_make_uc_req(struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_other_headers *ohdr;
-	struct qib_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 hwords;
 	u32 bth0;
 	u32 len;
 	u32 pmtu = qp->pmtu;
 	int ret = 0;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
-		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= QIB_S_WAIT_DMA;
+		if (atomic_read(&priv->s_dma_busy)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		goto done;
 	}
 
-	ohdr = &qp->s_hdr->u.oth;
+	ohdr = &priv->s_hdr->u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->u.l.oth;
+		ohdr = &priv->s_hdr->u.l.oth;
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	hwords = 5;
 	bth0 = 0;
 
 	/* Get the next send request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	qp->s_wqe = NULL;
 	switch (qp->s_state) {
 	default:
-		if (!(ib_qib_state_ops[qp->state] &
-		    QIB_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] &
+		    RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 		/* Check if send work queue is empty. */
-		if (qp->s_cur == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_cur == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/*
 		 * Start a new request.
 		 */
-		wqe->psn = qp->s_next_psn;
-		qp->s_psn = qp->s_next_psn;
+		qp->s_psn = wqe->psn;
 		qp->s_sge.sge = wqe->sg_list[0];
 		qp->s_sge.sg_list = wqe->sg_list + 1;
 		qp->s_sge.num_sge = wqe->wr.num_sge;
@@ -214,15 +215,11 @@ int qib_make_uc_req(struct qib_qp *qp)
 	qp->s_cur_sge = &qp->s_sge;
 	qp->s_cur_size = len;
 	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
-			    qp->s_next_psn++ & QIB_PSN_MASK);
+			    qp->s_psn++ & QIB_PSN_MASK);
 done:
-	ret = 1;
-	goto unlock;
-
+	return 1;
 bail:
-	qp->s_flags &= ~QIB_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags &= ~RVT_S_BUSY;
 	return ret;
 }
 
@@ -240,7 +237,7 @@ unlock:
  * Called at interrupt level.
  */
 void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 	struct qib_other_headers *ohdr;
 	u32 opcode;
@@ -278,10 +275,10 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 inv:
 		if (qp->r_state == OP(SEND_FIRST) ||
 		    qp->r_state == OP(SEND_MIDDLE)) {
-			set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+			set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 			qp->r_sge.num_sge = 0;
 		} else
-			qib_put_ss(&qp->r_sge);
+			rvt_put_ss(&qp->r_sge);
 		qp->r_state = OP(SEND_LAST);
 		switch (opcode) {
 		case OP(SEND_FIRST):
@@ -328,8 +325,8 @@ inv:
 		goto inv;
 	}
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
-		qp->r_flags |= QIB_R_COMM_EST;
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
+		qp->r_flags |= RVT_R_COMM_EST;
 		if (qp->ibqp.event_handler) {
 			struct ib_event ev;
 
@@ -346,7 +343,7 @@ inv:
 	case OP(SEND_ONLY):
 	case OP(SEND_ONLY_WITH_IMMEDIATE):
 send_first:
-		if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
 			qp->r_sge = qp->s_rdma_read_sge;
 		else {
 			ret = qib_get_rwqe(qp, 0);
@@ -400,7 +397,7 @@ send_last:
 			goto rewind;
 		wc.opcode = IB_WC_RECV;
 		qib_copy_sge(&qp->r_sge, data, tlen, 0);
-		qib_put_ss(&qp->s_rdma_read_sge);
+		rvt_put_ss(&qp->s_rdma_read_sge);
 last_imm:
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -414,7 +411,7 @@ last_imm:
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
-		qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 			     (ohdr->bth[0] &
 				cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 		break;
@@ -438,7 +435,7 @@ rdma_first:
 			int ok;
 
 			/* Check rkey */
-			ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
 					 vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 				goto drop;
@@ -483,8 +480,8 @@ rdma_last_imm:
 		tlen -= (hdrsize + pad + 4);
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
-		if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
-			qib_put_ss(&qp->s_rdma_read_sge);
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
+			rvt_put_ss(&qp->s_rdma_read_sge);
 		else {
 			ret = qib_get_rwqe(qp, 1);
 			if (ret < 0)
@@ -495,7 +492,7 @@ rdma_last_imm:
 		wc.byte_len = qp->r_len;
 		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 		goto last_imm;
 
 	case OP(RDMA_WRITE_LAST):
@@ -511,7 +508,7 @@ rdma_last:
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 		break;
 
 	default:
@@ -523,10 +520,10 @@ rdma_last:
 	return;
 
 rewind:
-	set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+	set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 	qp->r_sge.num_sge = 0;
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 	return;
 
 op_err:
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 59193f67ea78..d9502137de62 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -32,6 +32,7 @@
  */
 
 #include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
 
 #include "qib.h"
 #include "qib_mad.h"
@@ -46,22 +47,26 @@
  * Note that the receive interrupt handler may be calling qib_ud_rcv()
  * while this is being called.
  */
-static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
+static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 {
 	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
-	struct qib_pportdata *ppd;
-	struct qib_qp *qp;
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+	struct rvt_qp *qp;
 	struct ib_ah_attr *ah_attr;
 	unsigned long flags;
-	struct qib_sge_state ssge;
-	struct qib_sge *sge;
+	struct rvt_sge_state ssge;
+	struct rvt_sge *sge;
 	struct ib_wc wc;
 	u32 length;
 	enum ib_qp_type sqptype, dqptype;
 
-	qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
+	rcu_read_lock();
+	qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn);
 	if (!qp) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
+		rcu_read_unlock();
 		return;
 	}
 
@@ -71,12 +76,12 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 			IB_QPT_UD : qp->ibqp.qp_type;
 
 	if (dqptype != sqptype ||
-	    !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
-		ibp->n_pkt_drops++;
+	    !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+		ibp->rvp.n_pkt_drops++;
 		goto drop;
 	}
 
-	ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
+	ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
 	ppd = ppd_from_ibp(ibp);
 
 	if (qp->ibqp.qp_num > 1) {
@@ -140,8 +145,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
-	if (qp->r_flags & QIB_R_REUSE_SGE)
-		qp->r_flags &= ~QIB_R_REUSE_SGE;
+	if (qp->r_flags & RVT_R_REUSE_SGE)
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
 	else {
 		int ret;
 
@@ -152,14 +157,14 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 		}
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			goto bail_unlock;
 		}
 	}
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= QIB_R_REUSE_SGE;
-		ibp->n_pkt_drops++;
+		qp->r_flags |= RVT_R_REUSE_SGE;
+		ibp->rvp.n_pkt_drops++;
 		goto bail_unlock;
 	}
 
@@ -189,7 +194,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 			if (--ssge.num_sge)
 				*sge = *ssge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -201,8 +206,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 		}
 		length -= len;
 	}
-	qib_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto bail_unlock;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
@@ -216,30 +221,31 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 	wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 		     swqe->wr.send_flags & IB_SEND_SOLICITED);
-	ibp->n_loop_pkts++;
+	ibp->rvp.n_loop_pkts++;
 bail_unlock:
 	spin_unlock_irqrestore(&qp->r_lock, flags);
 drop:
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
+	rcu_read_unlock();
 }
 
 /**
  * qib_make_ud_req - construct a UD request packet
  * @qp: the QP
  *
+ * Assumes the s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int qib_make_ud_req(struct qib_qp *qp)
+int qib_make_ud_req(struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_other_headers *ohdr;
 	struct ib_ah_attr *ah_attr;
 	struct qib_pportdata *ppd;
 	struct qib_ibport *ibp;
-	struct qib_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 nwords;
 	u32 extra_bytes;
 	u32 bth0;
@@ -248,28 +254,29 @@ int qib_make_ud_req(struct qib_qp *qp)
 	int ret = 0;
 	int next_cur;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= QIB_S_WAIT_DMA;
+		if (atomic_read(&priv->s_dma_busy)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		goto done;
 	}
 
-	if (qp->s_cur == qp->s_head)
+	/* see post_one_send() */
+	smp_read_barrier_depends();
+	if (qp->s_cur == ACCESS_ONCE(qp->s_head))
 		goto bail;
 
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	next_cur = qp->s_cur + 1;
 	if (next_cur >= qp->s_size)
 		next_cur = 0;
@@ -277,9 +284,9 @@ int qib_make_ud_req(struct qib_qp *qp)
 	/* Construct the header. */
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ppd = ppd_from_ibp(ibp);
-	ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
-	if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
-		if (ah_attr->dlid != QIB_PERMISSIVE_LID)
+	ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+	if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+		if (ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE))
 			this_cpu_inc(ibp->pmastats->n_multicast_xmit);
 		else
 			this_cpu_inc(ibp->pmastats->n_unicast_xmit);
@@ -287,6 +294,7 @@ int qib_make_ud_req(struct qib_qp *qp)
 		this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 		lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
 		if (unlikely(lid == ppd->lid)) {
+			unsigned long flags;
 			/*
 			 * If DMAs are in progress, we can't generate
 			 * a completion for the loopback packet since
@@ -294,11 +302,12 @@ int qib_make_ud_req(struct qib_qp *qp)
 			 * XXX Instead of waiting, we could queue a
 			 * zero length descriptor so we get a callback.
 			 */
-			if (atomic_read(&qp->s_dma_busy)) {
-				qp->s_flags |= QIB_S_WAIT_DMA;
+			if (atomic_read(&priv->s_dma_busy)) {
+				qp->s_flags |= RVT_S_WAIT_DMA;
 				goto bail;
 			}
 			qp->s_cur = next_cur;
+			local_irq_save(flags);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			qib_ud_loopback(qp, wqe);
 			spin_lock_irqsave(&qp->s_lock, flags);
@@ -324,11 +333,11 @@ int qib_make_ud_req(struct qib_qp *qp)
 
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		/* Header size in 32-bit words. */
-		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
+		qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
 					       &ah_attr->grh,
 					       qp->s_hdrwords, nwords);
 		lrh0 = QIB_LRH_GRH;
-		ohdr = &qp->s_hdr->u.l.oth;
+		ohdr = &priv->s_hdr->u.l.oth;
 		/*
 		 * Don't worry about sending to locally attached multicast
 		 * QPs.  It is unspecified by the spec. what happens.
@@ -336,7 +345,7 @@ int qib_make_ud_req(struct qib_qp *qp)
 	} else {
 		/* Header size in 32-bit words. */
 		lrh0 = QIB_LRH_BTH;
-		ohdr = &qp->s_hdr->u.oth;
+		ohdr = &priv->s_hdr->u.oth;
 	}
 	if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 		qp->s_hdrwords++;
@@ -349,15 +358,16 @@ int qib_make_ud_req(struct qib_qp *qp)
 		lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
 	else
 		lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
-	qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
-	qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+	priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+	priv->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
+	priv->s_hdr->lrh[2] =
+			cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
 	lid = ppd->lid;
 	if (lid) {
 		lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
-		qp->s_hdr->lrh[3] = cpu_to_be16(lid);
+		priv->s_hdr->lrh[3] = cpu_to_be16(lid);
 	} else
-		qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
+		priv->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
 	if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 		bth0 |= IB_BTH_SOLICITED;
 	bth0 |= extra_bytes << 20;
@@ -368,11 +378,11 @@ int qib_make_ud_req(struct qib_qp *qp)
 	/*
 	 * Use the multicast QP if the destination LID is a multicast LID.
 	 */
-	ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
-		ah_attr->dlid != QIB_PERMISSIVE_LID ?
+	ohdr->bth[1] = ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+		ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ?
 		cpu_to_be32(QIB_MULTICAST_QPN) :
 		cpu_to_be32(wqe->ud_wr.remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
+	ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK);
 	/*
 	 * Qkeys with the high order bit set mean use the
 	 * qkey from the QP context instead of the WR (see 10.2.5).
@@ -382,13 +392,9 @@ int qib_make_ud_req(struct qib_qp *qp)
 	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
 
 done:
-	ret = 1;
-	goto unlock;
-
+	return 1;
 bail:
-	qp->s_flags &= ~QIB_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags &= ~RVT_S_BUSY;
 	return ret;
 }
 
@@ -426,7 +432,7 @@ static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey)
  * Called at interrupt level.
  */
 void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 	struct qib_other_headers *ohdr;
 	int opcode;
@@ -446,7 +452,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 		hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
 	}
 	qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK;
+	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
 
 	/*
 	 * Get the number of bytes the message was padded by
@@ -531,8 +537,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
-	if (qp->r_flags & QIB_R_REUSE_SGE)
-		qp->r_flags &= ~QIB_R_REUSE_SGE;
+	if (qp->r_flags & RVT_R_REUSE_SGE)
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
 	else {
 		int ret;
 
@@ -543,13 +549,13 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 		}
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			return;
 		}
 	}
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= QIB_R_REUSE_SGE;
+		qp->r_flags |= RVT_R_REUSE_SGE;
 		goto drop;
 	}
 	if (has_grh) {
@@ -559,8 +565,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	} else
 		qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
 	qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
-	qib_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		return;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
@@ -576,15 +582,15 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	/*
 	 * Save the LMC lower bits if the destination LID is a unicast LID.
 	 */
-	wc.dlid_path_bits = dlid >= QIB_MULTICAST_LID_BASE ? 0 :
+	wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 :
 		dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 		     (ohdr->bth[0] &
 			cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 	return;
 
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 74f90b2619f6..2d2b94fd3633 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -66,8 +66,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
 	}
 
 	for (got = 0; got < num_pages; got += ret) {
-		ret = get_user_pages(current, current->mm,
-				     start_page + got * PAGE_SIZE,
+		ret = get_user_pages(start_page + got * PAGE_SIZE,
 				     num_pages - got, 1, 1,
 				     p + got, NULL);
 		if (ret < 0)
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index baf1e42b6896..cbf6200e6afc 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -41,6 +41,7 @@
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/vmalloc.h>
+#include <rdma/rdma_vt.h>
 
 #include "qib.h"
 #include "qib_common.h"
@@ -49,8 +50,8 @@ static unsigned int ib_qib_qp_table_size = 256;
 module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
 
-unsigned int ib_qib_lkey_table_size = 16;
-module_param_named(lkey_table_size, ib_qib_lkey_table_size, uint,
+static unsigned int qib_lkey_table_size = 16;
+module_param_named(lkey_table_size, qib_lkey_table_size, uint,
 		   S_IRUGO);
 MODULE_PARM_DESC(lkey_table_size,
 		 "LKEY table size in bits (2^n, 1 <= n <= 23)");
@@ -113,36 +114,6 @@ module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 
 /*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; qib_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
- */
-const int ib_qib_state_ops[IB_QPS_ERR + 1] = {
-	[IB_QPS_RESET] = 0,
-	[IB_QPS_INIT] = QIB_POST_RECV_OK,
-	[IB_QPS_RTR] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK,
-	[IB_QPS_RTS] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
-	    QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK |
-	    QIB_PROCESS_NEXT_SEND_OK,
-	[IB_QPS_SQD] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
-	    QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK,
-	[IB_QPS_SQE] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
-	    QIB_POST_SEND_OK | QIB_FLUSH_SEND,
-	[IB_QPS_ERR] = QIB_POST_RECV_OK | QIB_FLUSH_RECV |
-	    QIB_POST_SEND_OK | QIB_FLUSH_SEND,
-};
-
-struct qib_ucontext {
-	struct ib_ucontext ibucontext;
-};
-
-static inline struct qib_ucontext *to_iucontext(struct ib_ucontext
-						  *ibucontext)
-{
-	return container_of(ibucontext, struct qib_ucontext, ibucontext);
-}
-
-/*
  * Translate ib_wr_opcode into ib_wc_opcode.
  */
 const enum ib_wc_opcode ib_qib_wc_opcode[] = {
@@ -166,9 +137,9 @@ __be64 ib_qib_sys_image_guid;
  * @data: the data to copy
  * @length: the length of the data
  */
-void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
+void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	while (length) {
 		u32 len = sge->length;
@@ -184,11 +155,11 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (release)
-				qib_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -208,9 +179,9 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
  * @ss: the SGE state
  * @length: the number of bytes to skip
  */
-void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
+void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	while (length) {
 		u32 len = sge->length;
@@ -225,11 +196,11 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (release)
-				qib_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -248,10 +219,10 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
  * Don't modify the qib_sge_state to get the count.
  * Return zero if any of the segments is not aligned.
  */
-static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
+static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
 {
-	struct qib_sge *sg_list = ss->sg_list;
-	struct qib_sge sge = ss->sge;
+	struct rvt_sge *sg_list = ss->sg_list;
+	struct rvt_sge sge = ss->sge;
 	u8 num_sge = ss->num_sge;
 	u32 ndesc = 1;  /* count the header */
 
@@ -276,7 +247,7 @@ static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
 			if (--num_sge)
 				sge = *sg_list++;
 		} else if (sge.length == 0 && sge.mr->lkey) {
-			if (++sge.n >= QIB_SEGSZ) {
+			if (++sge.n >= RVT_SEGSZ) {
 				if (++sge.m >= sge.mr->mapsz)
 					break;
 				sge.n = 0;
@@ -294,9 +265,9 @@ static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
 /*
  * Copy from the SGEs to the data buffer.
  */
-static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
+static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	while (length) {
 		u32 len = sge->length;
@@ -314,7 +285,7 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -330,242 +301,6 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
 }
 
 /**
- * qib_post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
-	int *scheduled)
-{
-	struct qib_swqe *wqe;
-	u32 next;
-	int i;
-	int j;
-	int acc;
-	int ret;
-	unsigned long flags;
-	struct qib_lkey_table *rkt;
-	struct qib_pd *pd;
-	int avoid_schedule = 0;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Check that state is OK to post send. */
-	if (unlikely(!(ib_qib_state_ops[qp->state] & QIB_POST_SEND_OK)))
-		goto bail_inval;
-
-	/* IB spec says that num_sge == 0 is OK. */
-	if (wr->num_sge > qp->s_max_sge)
-		goto bail_inval;
-
-	/*
-	 * Don't allow RDMA reads or atomic operations on UC or
-	 * undefined operations.
-	 * Make sure buffer is large enough to hold the result for atomics.
-	 */
-	if (wr->opcode == IB_WR_REG_MR) {
-		if (qib_reg_mr(qp, reg_wr(wr)))
-			goto bail_inval;
-	} else if (qp->ibqp.qp_type == IB_QPT_UC) {
-		if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
-			goto bail_inval;
-	} else if (qp->ibqp.qp_type != IB_QPT_RC) {
-		/* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
-		if (wr->opcode != IB_WR_SEND &&
-		    wr->opcode != IB_WR_SEND_WITH_IMM)
-			goto bail_inval;
-		/* Check UD destination address PD */
-		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
-			goto bail_inval;
-	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
-		goto bail_inval;
-	else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-		   (wr->num_sge == 0 ||
-		    wr->sg_list[0].length < sizeof(u64) ||
-		    wr->sg_list[0].addr & (sizeof(u64) - 1)))
-		goto bail_inval;
-	else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
-		goto bail_inval;
-
-	next = qp->s_head + 1;
-	if (next >= qp->s_size)
-		next = 0;
-	if (next == qp->s_last) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.pd);
-	wqe = get_swqe_ptr(qp, qp->s_head);
-
-	if (qp->ibqp.qp_type != IB_QPT_UC &&
-	    qp->ibqp.qp_type != IB_QPT_RC)
-		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
-	else if (wr->opcode == IB_WR_REG_MR)
-		memcpy(&wqe->reg_wr, reg_wr(wr),
-			sizeof(wqe->reg_wr));
-	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
-		 wr->opcode == IB_WR_RDMA_WRITE ||
-		 wr->opcode == IB_WR_RDMA_READ)
-		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
-	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
-	else
-		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
-	wqe->length = 0;
-	j = 0;
-	if (wr->num_sge) {
-		acc = wr->opcode >= IB_WR_RDMA_READ ?
-			IB_ACCESS_LOCAL_WRITE : 0;
-		for (i = 0; i < wr->num_sge; i++) {
-			u32 length = wr->sg_list[i].length;
-			int ok;
-
-			if (length == 0)
-				continue;
-			ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j],
-					 &wr->sg_list[i], acc);
-			if (!ok)
-				goto bail_inval_free;
-			wqe->length += length;
-			j++;
-		}
-		wqe->wr.num_sge = j;
-	}
-	if (qp->ibqp.qp_type == IB_QPT_UC ||
-	    qp->ibqp.qp_type == IB_QPT_RC) {
-		if (wqe->length > 0x80000000U)
-			goto bail_inval_free;
-		if (wqe->length <= qp->pmtu)
-			avoid_schedule = 1;
-	} else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
-				  qp->port_num - 1)->ibmtu) {
-		goto bail_inval_free;
-	} else {
-		atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
-		avoid_schedule = 1;
-	}
-	wqe->ssn = qp->s_ssn++;
-	qp->s_head = next;
-
-	ret = 0;
-	goto bail;
-
-bail_inval_free:
-	while (j) {
-		struct qib_sge *sge = &wqe->sg_list[--j];
-
-		qib_put_mr(sge->mr);
-	}
-bail_inval:
-	ret = -EINVAL;
-bail:
-	if (!ret && !wr->next && !avoid_schedule &&
-	 !qib_sdma_empty(
-	   dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
-		qib_schedule_send(qp);
-		*scheduled = 1;
-	}
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
-}
-
-/**
- * qib_post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			 struct ib_send_wr **bad_wr)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	int err = 0;
-	int scheduled = 0;
-
-	for (; wr; wr = wr->next) {
-		err = qib_post_one_send(qp, wr, &scheduled);
-		if (err) {
-			*bad_wr = wr;
-			goto bail;
-		}
-	}
-
-	/* Try to do the send work in the caller's context. */
-	if (!scheduled)
-		qib_do_send(&qp->s_work);
-
-bail:
-	return err;
-}
-
-/**
- * qib_post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			    struct ib_recv_wr **bad_wr)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_rwq *wq = qp->r_rq.wq;
-	unsigned long flags;
-	int ret;
-
-	/* Check that state is OK to post receive. */
-	if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) {
-		*bad_wr = wr;
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	for (; wr; wr = wr->next) {
-		struct qib_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = wq->head + 1;
-		if (next >= qp->r_rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
  * qib_qp_rcv - processing an incoming packet on a QP
  * @rcd: the context pointer
  * @hdr: the packet header
@@ -579,15 +314,15 @@ bail:
  * Called at interrupt level.
  */
 static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
-		       int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		       int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 
 	spin_lock(&qp->r_lock);
 
 	/* Check for valid receive state. */
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
-		ibp->n_pkt_drops++;
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+		ibp->rvp.n_pkt_drops++;
 		goto unlock;
 	}
 
@@ -632,8 +367,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 	struct qib_pportdata *ppd = rcd->ppd;
 	struct qib_ibport *ibp = &ppd->ibport_data;
 	struct qib_ib_header *hdr = rhdr;
+	struct qib_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 	struct qib_other_headers *ohdr;
-	struct qib_qp *qp;
+	struct rvt_qp *qp;
 	u32 qp_num;
 	int lnh;
 	u8 opcode;
@@ -645,7 +382,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 
 	/* Check for a valid destination LID (see ch. 7.11.1). */
 	lid = be16_to_cpu(hdr->lrh[1]);
-	if (lid < QIB_MULTICAST_LID_BASE) {
+	if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 		lid &= ~((1 << ppd->lmc) - 1);
 		if (unlikely(lid != ppd->lid))
 			goto drop;
@@ -674,50 +411,40 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 #endif
 
 	/* Get the destination QP number. */
-	qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+	qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 	if (qp_num == QIB_MULTICAST_QPN) {
-		struct qib_mcast *mcast;
-		struct qib_mcast_qp *p;
+		struct rvt_mcast *mcast;
+		struct rvt_mcast_qp *p;
 
 		if (lnh != QIB_LRH_GRH)
 			goto drop;
-		mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
+		mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
 		if (mcast == NULL)
 			goto drop;
 		this_cpu_inc(ibp->pmastats->n_multicast_rcv);
 		list_for_each_entry_rcu(p, &mcast->qp_list, list)
 			qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
 		/*
-		 * Notify qib_multicast_detach() if it is waiting for us
+		 * Notify rvt_multicast_detach() if it is waiting for us
 		 * to finish.
 		 */
 		if (atomic_dec_return(&mcast->refcount) <= 1)
 			wake_up(&mcast->wait);
 	} else {
-		if (rcd->lookaside_qp) {
-			if (rcd->lookaside_qpn != qp_num) {
-				if (atomic_dec_and_test(
-					&rcd->lookaside_qp->refcount))
-					wake_up(
-					 &rcd->lookaside_qp->wait);
-				rcd->lookaside_qp = NULL;
-			}
+		rcu_read_lock();
+		qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+		if (!qp) {
+			rcu_read_unlock();
+			goto drop;
 		}
-		if (!rcd->lookaside_qp) {
-			qp = qib_lookup_qpn(ibp, qp_num);
-			if (!qp)
-				goto drop;
-			rcd->lookaside_qp = qp;
-			rcd->lookaside_qpn = qp_num;
-		} else
-			qp = rcd->lookaside_qp;
 		this_cpu_inc(ibp->pmastats->n_unicast_rcv);
 		qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
+		rcu_read_unlock();
 	}
 	return;
 
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }
 
 /*
@@ -728,23 +455,25 @@ static void mem_timer(unsigned long data)
 {
 	struct qib_ibdev *dev = (struct qib_ibdev *) data;
 	struct list_head *list = &dev->memwait;
-	struct qib_qp *qp = NULL;
+	struct rvt_qp *qp = NULL;
+	struct qib_qp_priv *priv = NULL;
 	unsigned long flags;
 
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 	if (!list_empty(list)) {
-		qp = list_entry(list->next, struct qib_qp, iowait);
-		list_del_init(&qp->iowait);
+		priv = list_entry(list->next, struct qib_qp_priv, iowait);
+		qp = priv->owner;
+		list_del_init(&priv->iowait);
 		atomic_inc(&qp->refcount);
 		if (!list_empty(list))
 			mod_timer(&dev->mem_timer, jiffies + 1);
 	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
+	spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 
 	if (qp) {
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_flags & QIB_S_WAIT_KMEM) {
-			qp->s_flags &= ~QIB_S_WAIT_KMEM;
+		if (qp->s_flags & RVT_S_WAIT_KMEM) {
+			qp->s_flags &= ~RVT_S_WAIT_KMEM;
 			qib_schedule_send(qp);
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -753,9 +482,9 @@ static void mem_timer(unsigned long data)
 	}
 }
 
-static void update_sge(struct qib_sge_state *ss, u32 length)
+static void update_sge(struct rvt_sge_state *ss, u32 length)
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	sge->vaddr += length;
 	sge->length -= length;
@@ -764,7 +493,7 @@ static void update_sge(struct qib_sge_state *ss, u32 length)
 		if (--ss->num_sge)
 			*sge = *ss->sg_list++;
 	} else if (sge->length == 0 && sge->mr->lkey) {
-		if (++sge->n >= QIB_SEGSZ) {
+		if (++sge->n >= RVT_SEGSZ) {
 			if (++sge->m >= sge->mr->mapsz)
 				return;
 			sge->n = 0;
@@ -810,7 +539,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 }
 #endif
 
-static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
+static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
 		    u32 length, unsigned flush_wc)
 {
 	u32 extra = 0;
@@ -947,30 +676,31 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
 }
 
 static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
-					   struct qib_qp *qp)
+					   struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_verbs_txreq *tx;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	spin_lock(&dev->pending_lock);
+	spin_lock(&dev->rdi.pending_lock);
 
 	if (!list_empty(&dev->txreq_free)) {
 		struct list_head *l = dev->txreq_free.next;
 
 		list_del(l);
-		spin_unlock(&dev->pending_lock);
+		spin_unlock(&dev->rdi.pending_lock);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 	} else {
-		if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK &&
-		    list_empty(&qp->iowait)) {
+		if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK &&
+		    list_empty(&priv->iowait)) {
 			dev->n_txwait++;
-			qp->s_flags |= QIB_S_WAIT_TX;
-			list_add_tail(&qp->iowait, &dev->txwait);
+			qp->s_flags |= RVT_S_WAIT_TX;
+			list_add_tail(&priv->iowait, &dev->txwait);
 		}
-		qp->s_flags &= ~QIB_S_BUSY;
-		spin_unlock(&dev->pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		tx = ERR_PTR(-EBUSY);
 	}
@@ -978,22 +708,22 @@ static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
 }
 
 static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
-					 struct qib_qp *qp)
+					 struct rvt_qp *qp)
 {
 	struct qib_verbs_txreq *tx;
 	unsigned long flags;
 
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 	/* assume the list non empty */
 	if (likely(!list_empty(&dev->txreq_free))) {
 		struct list_head *l = dev->txreq_free.next;
 
 		list_del(l);
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 		tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 	} else {
 		/* call slow path to get the extra lock */
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 		tx =  __get_txreq(dev, qp);
 	}
 	return tx;
@@ -1002,16 +732,15 @@ static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
 void qib_put_txreq(struct qib_verbs_txreq *tx)
 {
 	struct qib_ibdev *dev;
-	struct qib_qp *qp;
+	struct rvt_qp *qp;
+	struct qib_qp_priv *priv;
 	unsigned long flags;
 
 	qp = tx->qp;
 	dev = to_idev(qp->ibqp.device);
 
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
 	if (tx->mr) {
-		qib_put_mr(tx->mr);
+		rvt_put_mr(tx->mr);
 		tx->mr = NULL;
 	}
 	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
@@ -1022,21 +751,23 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
 		kfree(tx->align_buf);
 	}
 
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 
 	/* Put struct back on free list */
 	list_add(&tx->txreq.list, &dev->txreq_free);
 
 	if (!list_empty(&dev->txwait)) {
 		/* Wake up first QP wanting a free struct */
-		qp = list_entry(dev->txwait.next, struct qib_qp, iowait);
-		list_del_init(&qp->iowait);
+		priv = list_entry(dev->txwait.next, struct qib_qp_priv,
+				  iowait);
+		qp = priv->owner;
+		list_del_init(&priv->iowait);
 		atomic_inc(&qp->refcount);
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_flags & QIB_S_WAIT_TX) {
-			qp->s_flags &= ~QIB_S_WAIT_TX;
+		if (qp->s_flags & RVT_S_WAIT_TX) {
+			qp->s_flags &= ~RVT_S_WAIT_TX;
 			qib_schedule_send(qp);
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1044,7 +775,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
 		if (atomic_dec_and_test(&qp->refcount))
 			wake_up(&qp->wait);
 	} else
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 }
 
 /*
@@ -1055,36 +786,39 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
  */
 void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
 {
-	struct qib_qp *qp, *nqp;
-	struct qib_qp *qps[20];
+	struct rvt_qp *qp, *nqp;
+	struct qib_qp_priv *qpp, *nqpp;
+	struct rvt_qp *qps[20];
 	struct qib_ibdev *dev;
 	unsigned i, n;
 
 	n = 0;
 	dev = &ppd->dd->verbs_dev;
-	spin_lock(&dev->pending_lock);
+	spin_lock(&dev->rdi.pending_lock);
 
 	/* Search wait list for first QP wanting DMA descriptors. */
-	list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) {
+	list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) {
+		qp = qpp->owner;
+		nqp = nqpp->owner;
 		if (qp->port_num != ppd->port)
 			continue;
 		if (n == ARRAY_SIZE(qps))
 			break;
-		if (qp->s_tx->txreq.sg_count > avail)
+		if (qpp->s_tx->txreq.sg_count > avail)
 			break;
-		avail -= qp->s_tx->txreq.sg_count;
-		list_del_init(&qp->iowait);
+		avail -= qpp->s_tx->txreq.sg_count;
+		list_del_init(&qpp->iowait);
 		atomic_inc(&qp->refcount);
 		qps[n++] = qp;
 	}
 
-	spin_unlock(&dev->pending_lock);
+	spin_unlock(&dev->rdi.pending_lock);
 
 	for (i = 0; i < n; i++) {
 		qp = qps[i];
 		spin_lock(&qp->s_lock);
-		if (qp->s_flags & QIB_S_WAIT_DMA_DESC) {
-			qp->s_flags &= ~QIB_S_WAIT_DMA_DESC;
+		if (qp->s_flags & RVT_S_WAIT_DMA_DESC) {
+			qp->s_flags &= ~RVT_S_WAIT_DMA_DESC;
 			qib_schedule_send(qp);
 		}
 		spin_unlock(&qp->s_lock);
@@ -1100,7 +834,8 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 {
 	struct qib_verbs_txreq *tx =
 		container_of(cookie, struct qib_verbs_txreq, txreq);
-	struct qib_qp *qp = tx->qp;
+	struct rvt_qp *qp = tx->qp;
+	struct qib_qp_priv *priv = qp->priv;
 
 	spin_lock(&qp->s_lock);
 	if (tx->wqe)
@@ -1117,11 +852,11 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 		}
 		qib_rc_send_complete(qp, hdr);
 	}
-	if (atomic_dec_and_test(&qp->s_dma_busy)) {
+	if (atomic_dec_and_test(&priv->s_dma_busy)) {
 		if (qp->state == IB_QPS_RESET)
-			wake_up(&qp->wait_dma);
-		else if (qp->s_flags & QIB_S_WAIT_DMA) {
-			qp->s_flags &= ~QIB_S_WAIT_DMA;
+			wake_up(&priv->wait_dma);
+		else if (qp->s_flags & RVT_S_WAIT_DMA) {
+			qp->s_flags &= ~RVT_S_WAIT_DMA;
 			qib_schedule_send(qp);
 		}
 	}
@@ -1130,22 +865,23 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 	qib_put_txreq(tx);
 }
 
-static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
+static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	unsigned long flags;
 	int ret = 0;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->iowait)) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		spin_lock(&dev->rdi.pending_lock);
+		if (list_empty(&priv->iowait)) {
 			if (list_empty(&dev->memwait))
 				mod_timer(&dev->mem_timer, jiffies + 1);
-			qp->s_flags |= QIB_S_WAIT_KMEM;
-			list_add_tail(&qp->iowait, &dev->memwait);
+			qp->s_flags |= RVT_S_WAIT_KMEM;
+			list_add_tail(&priv->iowait, &dev->memwait);
 		}
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~QIB_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1153,10 +889,11 @@ static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
 	return ret;
 }
 
-static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
-			      u32 hdrwords, struct qib_sge_state *ss, u32 len,
+static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr,
+			      u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 			      u32 plen, u32 dwords)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -1167,9 +904,9 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
 	u32 ndesc;
 	int ret;
 
-	tx = qp->s_tx;
+	tx = priv->s_tx;
 	if (tx) {
-		qp->s_tx = NULL;
+		priv->s_tx = NULL;
 		/* resend previously constructed packet */
 		ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
 		goto bail;
@@ -1182,7 +919,6 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
 	control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 				       be16_to_cpu(hdr->lrh[0]) >> 12);
 	tx->qp = qp;
-	atomic_inc(&qp->refcount);
 	tx->wqe = qp->s_wqe;
 	tx->mr = qp->s_rdma_mr;
 	if (qp->s_rdma_mr)
@@ -1245,7 +981,7 @@ err_tx:
 	qib_put_txreq(tx);
 	ret = wait_kmem(dev, qp);
 unaligned:
-	ibp->n_unaligned++;
+	ibp->rvp.n_unaligned++;
 bail:
 	return ret;
 bail_tx:
@@ -1257,8 +993,9 @@ bail_tx:
  * If we are now in the error state, return zero to flush the
  * send work request.
  */
-static int no_bufs_available(struct qib_qp *qp)
+static int no_bufs_available(struct rvt_qp *qp)
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_devdata *dd;
 	unsigned long flags;
@@ -1271,25 +1008,25 @@ static int no_bufs_available(struct qib_qp *qp)
 	 * enabling the PIO avail interrupt.
 	 */
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->iowait)) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		spin_lock(&dev->rdi.pending_lock);
+		if (list_empty(&priv->iowait)) {
 			dev->n_piowait++;
-			qp->s_flags |= QIB_S_WAIT_PIO;
-			list_add_tail(&qp->iowait, &dev->piowait);
+			qp->s_flags |= RVT_S_WAIT_PIO;
+			list_add_tail(&priv->iowait, &dev->piowait);
 			dd = dd_from_dev(dev);
 			dd->f_wantpiobuf_intr(dd, 1);
 		}
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~QIB_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return ret;
 }
 
-static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
-			      u32 hdrwords, struct qib_sge_state *ss, u32 len,
+static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr,
+			      u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 			      u32 plen, u32 dwords)
 {
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
@@ -1370,7 +1107,7 @@ done:
 	}
 	qib_sendbuf_done(dd, pbufn);
 	if (qp->s_rdma_mr) {
-		qib_put_mr(qp->s_rdma_mr);
+		rvt_put_mr(qp->s_rdma_mr);
 		qp->s_rdma_mr = NULL;
 	}
 	if (qp->s_wqe) {
@@ -1394,10 +1131,10 @@ done:
  * @len: the length of the packet in bytes
  *
  * Return zero if packet is sent or queued OK.
- * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise.
+ * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
  */
-int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
-		   u32 hdrwords, struct qib_sge_state *ss, u32 len)
+int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+		   u32 hdrwords, struct rvt_sge_state *ss, u32 len)
 {
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	u32 plen;
@@ -1529,10 +1266,11 @@ void qib_ib_piobufavail(struct qib_devdata *dd)
 {
 	struct qib_ibdev *dev = &dd->verbs_dev;
 	struct list_head *list;
-	struct qib_qp *qps[5];
-	struct qib_qp *qp;
+	struct rvt_qp *qps[5];
+	struct rvt_qp *qp;
 	unsigned long flags;
 	unsigned i, n;
+	struct qib_qp_priv *priv;
 
 	list = &dev->piowait;
 	n = 0;
@@ -1543,25 +1281,26 @@ void qib_ib_piobufavail(struct qib_devdata *dd)
 	 * could end up with QPs on the wait list with the interrupt
 	 * disabled.
 	 */
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 	while (!list_empty(list)) {
 		if (n == ARRAY_SIZE(qps))
 			goto full;
-		qp = list_entry(list->next, struct qib_qp, iowait);
-		list_del_init(&qp->iowait);
+		priv = list_entry(list->next, struct qib_qp_priv, iowait);
+		qp = priv->owner;
+		list_del_init(&priv->iowait);
 		atomic_inc(&qp->refcount);
 		qps[n++] = qp;
 	}
 	dd->f_wantpiobuf_intr(dd, 0);
 full:
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
+	spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 
 	for (i = 0; i < n; i++) {
 		qp = qps[i];
 
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_flags & QIB_S_WAIT_PIO) {
-			qp->s_flags &= ~QIB_S_WAIT_PIO;
+		if (qp->s_flags & RVT_S_WAIT_PIO) {
+			qp->s_flags &= ~RVT_S_WAIT_PIO;
 			qib_schedule_send(qp);
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1572,82 +1311,24 @@ full:
 	}
 }
 
-static int qib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-			    struct ib_udata *uhw)
-{
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	struct qib_ibdev *dev = to_idev(ibdev);
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-	memset(props, 0, sizeof(*props));
-
-	props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
-		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
-		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
-		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-	props->page_size_cap = PAGE_SIZE;
-	props->vendor_id =
-		QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
-	props->vendor_part_id = dd->deviceid;
-	props->hw_ver = dd->minrev;
-	props->sys_image_guid = ib_qib_sys_image_guid;
-	props->max_mr_size = ~0ULL;
-	props->max_qp = ib_qib_max_qps;
-	props->max_qp_wr = ib_qib_max_qp_wrs;
-	props->max_sge = ib_qib_max_sges;
-	props->max_sge_rd = ib_qib_max_sges;
-	props->max_cq = ib_qib_max_cqs;
-	props->max_ah = ib_qib_max_ahs;
-	props->max_cqe = ib_qib_max_cqes;
-	props->max_mr = dev->lk_table.max;
-	props->max_fmr = dev->lk_table.max;
-	props->max_map_per_fmr = 32767;
-	props->max_pd = ib_qib_max_pds;
-	props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
-	props->max_qp_init_rd_atom = 255;
-	/* props->max_res_rd_atom */
-	props->max_srq = ib_qib_max_srqs;
-	props->max_srq_wr = ib_qib_max_srq_wrs;
-	props->max_srq_sge = ib_qib_max_srq_sges;
-	/* props->local_ca_ack_delay */
-	props->atomic_cap = IB_ATOMIC_GLOB;
-	props->max_pkeys = qib_get_npkeys(dd);
-	props->max_mcast_grp = ib_qib_max_mcast_grps;
-	props->max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
-	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
-		props->max_mcast_grp;
-
-	return 0;
-}
-
-static int qib_query_port(struct ib_device *ibdev, u8 port,
+static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
 			  struct ib_port_attr *props)
 {
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	struct qib_ibport *ibp = to_iport(ibdev, port);
-	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = dd_from_dev(ibdev);
+	struct qib_pportdata *ppd = &dd->pport[port_num - 1];
 	enum ib_mtu mtu;
 	u16 lid = ppd->lid;
 
-	memset(props, 0, sizeof(*props));
 	props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
 	props->lmc = ppd->lmc;
-	props->sm_lid = ibp->sm_lid;
-	props->sm_sl = ibp->sm_sl;
 	props->state = dd->f_iblink_state(ppd->lastibcstat);
 	props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
-	props->port_cap_flags = ibp->port_cap_flags;
 	props->gid_tbl_len = QIB_GUIDS_PER_PORT;
-	props->max_msg_sz = 0x80000000;
-	props->pkey_tbl_len = qib_get_npkeys(dd);
-	props->bad_pkey_cntr = ibp->pkey_violations;
-	props->qkey_viol_cntr = ibp->qkey_violations;
 	props->active_width = ppd->link_width_active;
 	/* See rate_show() */
 	props->active_speed = ppd->link_speed_active;
 	props->max_vl_num = qib_num_vls(ppd->vls_supported);
-	props->init_type_reply = 0;
 
 	props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
 	switch (ppd->ibmtu) {
@@ -1670,7 +1351,6 @@ static int qib_query_port(struct ib_device *ibdev, u8 port,
 		mtu = IB_MTU_2048;
 	}
 	props->active_mtu = mtu;
-	props->subnet_timeout = ibp->subnet_timeout;
 
 	return 0;
 }
@@ -1714,185 +1394,70 @@ bail:
 	return ret;
 }
 
-static int qib_modify_port(struct ib_device *ibdev, u8 port,
-			   int port_modify_mask, struct ib_port_modify *props)
+static int qib_shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
 {
-	struct qib_ibport *ibp = to_iport(ibdev, port);
-	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-
-	ibp->port_cap_flags |= props->set_port_cap_mask;
-	ibp->port_cap_flags &= ~props->clr_port_cap_mask;
-	if (props->set_port_cap_mask || props->clr_port_cap_mask)
-		qib_cap_mask_chg(ibp);
-	if (port_modify_mask & IB_PORT_SHUTDOWN)
-		qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
-	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
-		ibp->qkey_violations = 0;
-	return 0;
-}
-
-static int qib_query_gid(struct ib_device *ibdev, u8 port,
-			 int index, union ib_gid *gid)
-{
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	int ret = 0;
-
-	if (!port || port > dd->num_pports)
-		ret = -EINVAL;
-	else {
-		struct qib_ibport *ibp = to_iport(ibdev, port);
-		struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-
-		gid->global.subnet_prefix = ibp->gid_prefix;
-		if (index == 0)
-			gid->global.interface_id = ppd->guid;
-		else if (index < QIB_GUIDS_PER_PORT)
-			gid->global.interface_id = ibp->guids[index - 1];
-		else
-			ret = -EINVAL;
-	}
-
-	return ret;
-}
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = dd_from_dev(ibdev);
+	struct qib_pportdata *ppd = &dd->pport[port_num - 1];
 
-static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev,
-				  struct ib_ucontext *context,
-				  struct ib_udata *udata)
-{
-	struct qib_ibdev *dev = to_idev(ibdev);
-	struct qib_pd *pd;
-	struct ib_pd *ret;
+	qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
 
-	/*
-	 * This is actually totally arbitrary.  Some correctness tests
-	 * assume there's a maximum number of PDs that can be allocated.
-	 * We don't actually have this limit, but we fail the test if
-	 * we allow allocations of more than we report for this value.
-	 */
-
-	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock(&dev->n_pds_lock);
-	if (dev->n_pds_allocated == ib_qib_max_pds) {
-		spin_unlock(&dev->n_pds_lock);
-		kfree(pd);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_pds_allocated++;
-	spin_unlock(&dev->n_pds_lock);
-
-	/* ib_alloc_pd() will initialize pd->ibpd. */
-	pd->user = udata != NULL;
-
-	ret = &pd->ibpd;
-
-bail:
-	return ret;
+	return 0;
 }
 
-static int qib_dealloc_pd(struct ib_pd *ibpd)
+static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
+			   int guid_index, __be64 *guid)
 {
-	struct qib_pd *pd = to_ipd(ibpd);
-	struct qib_ibdev *dev = to_idev(ibpd->device);
-
-	spin_lock(&dev->n_pds_lock);
-	dev->n_pds_allocated--;
-	spin_unlock(&dev->n_pds_lock);
+	struct qib_ibport *ibp = container_of(rvp, struct qib_ibport, rvp);
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 
-	kfree(pd);
+	if (guid_index == 0)
+		*guid = ppd->guid;
+	else if (guid_index < QIB_GUIDS_PER_PORT)
+		*guid = ibp->guids[guid_index - 1];
+	else
+		return -EINVAL;
 
 	return 0;
 }
 
 int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
 {
-	/* A multicast address requires a GRH (see ch. 8.4.1). */
-	if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
-	    ah_attr->dlid != QIB_PERMISSIVE_LID &&
-	    !(ah_attr->ah_flags & IB_AH_GRH))
-		goto bail;
-	if ((ah_attr->ah_flags & IB_AH_GRH) &&
-	    ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT)
-		goto bail;
-	if (ah_attr->dlid == 0)
-		goto bail;
-	if (ah_attr->port_num < 1 ||
-	    ah_attr->port_num > ibdev->phys_port_cnt)
-		goto bail;
-	if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
-	    ib_rate_to_mult(ah_attr->static_rate) < 0)
-		goto bail;
 	if (ah_attr->sl > 15)
-		goto bail;
+		return -EINVAL;
+
 	return 0;
-bail:
-	return -EINVAL;
 }
 
-/**
- * qib_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *qib_create_ah(struct ib_pd *pd,
-				   struct ib_ah_attr *ah_attr)
+static void qib_notify_new_ah(struct ib_device *ibdev,
+			      struct ib_ah_attr *ah_attr,
+			      struct rvt_ah *ah)
 {
-	struct qib_ah *ah;
-	struct ib_ah *ret;
-	struct qib_ibdev *dev = to_idev(pd->device);
-	unsigned long flags;
+	struct qib_ibport *ibp;
+	struct qib_pportdata *ppd;
 
-	if (qib_check_ah(pd->device, ah_attr)) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
-	if (!ah) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	if (dev->n_ahs_allocated == ib_qib_max_ahs) {
-		spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-		kfree(ah);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_ahs_allocated++;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	/* ib_create_ah() will initialize ah->ibah. */
-	ah->attr = *ah_attr;
-	atomic_set(&ah->refcount, 0);
-
-	ret = &ah->ibah;
+	/*
+	 * Do not trust reading anything from rvt_ah at this point as it is not
+	 * done being setup. We can however modify things which we need to set.
+	 */
 
-bail:
-	return ret;
+	ibp = to_iport(ibdev, ah_attr->port_num);
+	ppd = ppd_from_ibp(ibp);
+	ah->vl = ibp->sl_to_vl[ah->attr.sl];
+	ah->log_pmtu = ilog2(ppd->ibmtu);
 }
 
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
 {
 	struct ib_ah_attr attr;
 	struct ib_ah *ah = ERR_PTR(-EINVAL);
-	struct qib_qp *qp0;
+	struct rvt_qp *qp0;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.dlid = dlid;
 	attr.port_num = ppd_from_ibp(ibp)->port;
 	rcu_read_lock();
-	qp0 = rcu_dereference(ibp->qp0);
+	qp0 = rcu_dereference(ibp->rvp.qp[0]);
 	if (qp0)
 		ah = ib_create_ah(qp0->ibqp.pd, &attr);
 	rcu_read_unlock();
@@ -1900,51 +1465,6 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
 }
 
 /**
- * qib_destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int qib_destroy_ah(struct ib_ah *ibah)
-{
-	struct qib_ibdev *dev = to_idev(ibah->device);
-	struct qib_ah *ah = to_iah(ibah);
-	unsigned long flags;
-
-	if (atomic_read(&ah->refcount) != 0)
-		return -EBUSY;
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	dev->n_ahs_allocated--;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	kfree(ah);
-
-	return 0;
-}
-
-static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct qib_ah *ah = to_iah(ibah);
-
-	if (qib_check_ah(ibah->device, ah_attr))
-		return -EINVAL;
-
-	ah->attr = *ah_attr;
-
-	return 0;
-}
-
-static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct qib_ah *ah = to_iah(ibah);
-
-	*ah_attr = ah->attr;
-
-	return 0;
-}
-
-/**
  * qib_get_npkeys - return the size of the PKEY table for context 0
  * @dd: the qlogic_ib device
  */
@@ -1973,75 +1493,27 @@ unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
 	return ret;
 }
 
-static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-			  u16 *pkey)
-{
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	int ret;
-
-	if (index >= qib_get_npkeys(dd)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	*pkey = qib_get_pkey(to_iport(ibdev, port), index);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the QLogic_IB driver
- */
-
-static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev,
-					      struct ib_udata *udata)
-{
-	struct qib_ucontext *context;
-	struct ib_ucontext *ret;
-
-	context = kmalloc(sizeof(*context), GFP_KERNEL);
-	if (!context) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	ret = &context->ibucontext;
-
-bail:
-	return ret;
-}
-
-static int qib_dealloc_ucontext(struct ib_ucontext *context)
-{
-	kfree(to_iucontext(context));
-	return 0;
-}
-
 static void init_ibport(struct qib_pportdata *ppd)
 {
 	struct qib_verbs_counters cntrs;
 	struct qib_ibport *ibp = &ppd->ibport_data;
 
-	spin_lock_init(&ibp->lock);
+	spin_lock_init(&ibp->rvp.lock);
 	/* Set the prefix to the default value (see ch. 4.1.1) */
-	ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
-	ibp->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
-	ibp->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
+	ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
+	ibp->rvp.sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
+	ibp->rvp.port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
 		IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
 		IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
 		IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
 		IB_PORT_OTHER_LOCAL_CHANGES_SUP;
 	if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
-		ibp->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
-	ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
-	ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
-	ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
-	ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-	ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+		ibp->rvp.port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
+	ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+	ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+	ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+	ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+	ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
 
 	/* Snapshot current HW counters to "clear" them. */
 	qib_get_counters(ppd, &cntrs);
@@ -2061,26 +1533,55 @@ static void init_ibport(struct qib_pportdata *ppd)
 	ibp->z_excessive_buffer_overrun_errors =
 		cntrs.excessive_buffer_overrun_errors;
 	ibp->z_vl15_dropped = cntrs.vl15_dropped;
-	RCU_INIT_POINTER(ibp->qp0, NULL);
-	RCU_INIT_POINTER(ibp->qp1, NULL);
+	RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
+	RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
 }
 
-static int qib_port_immutable(struct ib_device *ibdev, u8 port_num,
-			      struct ib_port_immutable *immutable)
+/**
+ * qib_fill_device_attr - Fill in rvt dev info device attributes.
+ * @dd: the device data structure
+ */
+static void qib_fill_device_attr(struct qib_devdata *dd)
 {
-	struct ib_port_attr attr;
-	int err;
-
-	err = qib_query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+	memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
 
-	return 0;
+	rdi->dparms.props.max_pd = ib_qib_max_pds;
+	rdi->dparms.props.max_ah = ib_qib_max_ahs;
+	rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+	rdi->dparms.props.page_size_cap = PAGE_SIZE;
+	rdi->dparms.props.vendor_id =
+		QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
+	rdi->dparms.props.vendor_part_id = dd->deviceid;
+	rdi->dparms.props.hw_ver = dd->minrev;
+	rdi->dparms.props.sys_image_guid = ib_qib_sys_image_guid;
+	rdi->dparms.props.max_mr_size = ~0ULL;
+	rdi->dparms.props.max_qp = ib_qib_max_qps;
+	rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
+	rdi->dparms.props.max_sge = ib_qib_max_sges;
+	rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
+	rdi->dparms.props.max_cq = ib_qib_max_cqs;
+	rdi->dparms.props.max_cqe = ib_qib_max_cqes;
+	rdi->dparms.props.max_ah = ib_qib_max_ahs;
+	rdi->dparms.props.max_mr = rdi->lkey_table.max;
+	rdi->dparms.props.max_fmr = rdi->lkey_table.max;
+	rdi->dparms.props.max_map_per_fmr = 32767;
+	rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
+	rdi->dparms.props.max_qp_init_rd_atom = 255;
+	rdi->dparms.props.max_srq = ib_qib_max_srqs;
+	rdi->dparms.props.max_srq_wr = ib_qib_max_srq_wrs;
+	rdi->dparms.props.max_srq_sge = ib_qib_max_srq_sges;
+	rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
+	rdi->dparms.props.max_pkeys = qib_get_npkeys(dd);
+	rdi->dparms.props.max_mcast_grp = ib_qib_max_mcast_grps;
+	rdi->dparms.props.max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
+	rdi->dparms.props.max_total_mcast_qp_attach =
+					rdi->dparms.props.max_mcast_qp_attach *
+					rdi->dparms.props.max_mcast_grp;
 }
 
 /**
@@ -2091,68 +1592,20 @@ static int qib_port_immutable(struct ib_device *ibdev, u8 port_num,
 int qib_register_ib_device(struct qib_devdata *dd)
 {
 	struct qib_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
+	struct ib_device *ibdev = &dev->rdi.ibdev;
 	struct qib_pportdata *ppd = dd->pport;
-	unsigned i, lk_tab_size;
+	unsigned i, ctxt;
 	int ret;
 
-	dev->qp_table_size = ib_qib_qp_table_size;
 	get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
-	dev->qp_table = kmalloc_array(
-				dev->qp_table_size,
-				sizeof(*dev->qp_table),
-				GFP_KERNEL);
-	if (!dev->qp_table) {
-		ret = -ENOMEM;
-		goto err_qpt;
-	}
-	for (i = 0; i < dev->qp_table_size; i++)
-		RCU_INIT_POINTER(dev->qp_table[i], NULL);
-
 	for (i = 0; i < dd->num_pports; i++)
 		init_ibport(ppd + i);
 
 	/* Only need to initialize non-zero fields. */
-	spin_lock_init(&dev->qpt_lock);
-	spin_lock_init(&dev->n_pds_lock);
-	spin_lock_init(&dev->n_ahs_lock);
-	spin_lock_init(&dev->n_cqs_lock);
-	spin_lock_init(&dev->n_qps_lock);
-	spin_lock_init(&dev->n_srqs_lock);
-	spin_lock_init(&dev->n_mcast_grps_lock);
-	init_timer(&dev->mem_timer);
-	dev->mem_timer.function = mem_timer;
-	dev->mem_timer.data = (unsigned long) dev;
-
-	qib_init_qpn_table(dd, &dev->qpn_table);
+	setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
+
+	qpt_mask = dd->qpn_mask;
 
-	/*
-	 * The top ib_qib_lkey_table_size bits are used to index the
-	 * table.  The lower 8 bits can be owned by the user (copied from
-	 * the LKEY).  The remaining bits act as a generation number or tag.
-	 */
-	spin_lock_init(&dev->lk_table.lock);
-	/* insure generation is at least 4 bits see keys.c */
-	if (ib_qib_lkey_table_size > MAX_LKEY_TABLE_BITS) {
-		qib_dev_warn(dd, "lkey bits %u too large, reduced to %u\n",
-			ib_qib_lkey_table_size, MAX_LKEY_TABLE_BITS);
-		ib_qib_lkey_table_size = MAX_LKEY_TABLE_BITS;
-	}
-	dev->lk_table.max = 1 << ib_qib_lkey_table_size;
-	lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
-	dev->lk_table.table = (struct qib_mregion __rcu **)
-		vmalloc(lk_tab_size);
-	if (dev->lk_table.table == NULL) {
-		ret = -ENOMEM;
-		goto err_lk;
-	}
-	RCU_INIT_POINTER(dev->dma_mr, NULL);
-	for (i = 0; i < dev->lk_table.max; i++)
-		RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
-	INIT_LIST_HEAD(&dev->pending_mmaps);
-	spin_lock_init(&dev->pending_lock);
-	dev->mmap_offset = PAGE_SIZE;
-	spin_lock_init(&dev->mmap_offset_lock);
 	INIT_LIST_HEAD(&dev->piowait);
 	INIT_LIST_HEAD(&dev->dmawait);
 	INIT_LIST_HEAD(&dev->txwait);
@@ -2194,110 +1647,91 @@ int qib_register_ib_device(struct qib_devdata *dd)
 	strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
 	ibdev->owner = THIS_MODULE;
 	ibdev->node_guid = ppd->guid;
-	ibdev->uverbs_abi_ver = QIB_UVERBS_ABI_VERSION;
-	ibdev->uverbs_cmd_mask =
-		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-		(1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_AH)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
-		(1ull << IB_USER_VERBS_CMD_REG_MR)              |
-		(1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-		(1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
-		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
-		(1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-		(1ull << IB_USER_VERBS_CMD_POST_SEND)           |
-		(1ull << IB_USER_VERBS_CMD_POST_RECV)           |
-		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
-		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	ibdev->node_type = RDMA_NODE_IB_CA;
 	ibdev->phys_port_cnt = dd->num_pports;
-	ibdev->num_comp_vectors = 1;
 	ibdev->dma_device = &dd->pcidev->dev;
-	ibdev->query_device = qib_query_device;
 	ibdev->modify_device = qib_modify_device;
-	ibdev->query_port = qib_query_port;
-	ibdev->modify_port = qib_modify_port;
-	ibdev->query_pkey = qib_query_pkey;
-	ibdev->query_gid = qib_query_gid;
-	ibdev->alloc_ucontext = qib_alloc_ucontext;
-	ibdev->dealloc_ucontext = qib_dealloc_ucontext;
-	ibdev->alloc_pd = qib_alloc_pd;
-	ibdev->dealloc_pd = qib_dealloc_pd;
-	ibdev->create_ah = qib_create_ah;
-	ibdev->destroy_ah = qib_destroy_ah;
-	ibdev->modify_ah = qib_modify_ah;
-	ibdev->query_ah = qib_query_ah;
-	ibdev->create_srq = qib_create_srq;
-	ibdev->modify_srq = qib_modify_srq;
-	ibdev->query_srq = qib_query_srq;
-	ibdev->destroy_srq = qib_destroy_srq;
-	ibdev->create_qp = qib_create_qp;
-	ibdev->modify_qp = qib_modify_qp;
-	ibdev->query_qp = qib_query_qp;
-	ibdev->destroy_qp = qib_destroy_qp;
-	ibdev->post_send = qib_post_send;
-	ibdev->post_recv = qib_post_receive;
-	ibdev->post_srq_recv = qib_post_srq_receive;
-	ibdev->create_cq = qib_create_cq;
-	ibdev->destroy_cq = qib_destroy_cq;
-	ibdev->resize_cq = qib_resize_cq;
-	ibdev->poll_cq = qib_poll_cq;
-	ibdev->req_notify_cq = qib_req_notify_cq;
-	ibdev->get_dma_mr = qib_get_dma_mr;
-	ibdev->reg_user_mr = qib_reg_user_mr;
-	ibdev->dereg_mr = qib_dereg_mr;
-	ibdev->alloc_mr = qib_alloc_mr;
-	ibdev->map_mr_sg = qib_map_mr_sg;
-	ibdev->alloc_fmr = qib_alloc_fmr;
-	ibdev->map_phys_fmr = qib_map_phys_fmr;
-	ibdev->unmap_fmr = qib_unmap_fmr;
-	ibdev->dealloc_fmr = qib_dealloc_fmr;
-	ibdev->attach_mcast = qib_multicast_attach;
-	ibdev->detach_mcast = qib_multicast_detach;
 	ibdev->process_mad = qib_process_mad;
-	ibdev->mmap = qib_mmap;
-	ibdev->dma_ops = &qib_dma_mapping_ops;
-	ibdev->get_port_immutable = qib_port_immutable;
 
 	snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
 		 "Intel Infiniband HCA %s", init_utsname()->nodename);
 
-	ret = ib_register_device(ibdev, qib_create_port_files);
-	if (ret)
-		goto err_reg;
+	/*
+	 * Fill in rvt info object.
+	 */
+	dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
+	dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name;
+	dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
+	dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
+	dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
+	dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
+	dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
+	dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
+	dd->verbs_dev.rdi.driver_f.qp_priv_free = qib_qp_priv_free;
+	dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps;
+	dd->verbs_dev.rdi.driver_f.notify_qp_reset = qib_notify_qp_reset;
+	dd->verbs_dev.rdi.driver_f.do_send = qib_do_send;
+	dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send;
+	dd->verbs_dev.rdi.driver_f.quiesce_qp = qib_quiesce_qp;
+	dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
+	dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
+	dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
+	dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
+	dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
+	dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
+	dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send;
+	dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port;
+	dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port;
+	dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg;
+	dd->verbs_dev.rdi.driver_f.notify_create_mad_agent =
+						qib_notify_create_mad_agent;
+	dd->verbs_dev.rdi.driver_f.notify_free_mad_agent =
+						qib_notify_free_mad_agent;
+
+	dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC;
+	dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be;
+	dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size;
+	dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size;
+	dd->verbs_dev.rdi.dparms.qpn_start = 1;
+	dd->verbs_dev.rdi.dparms.qpn_res_start = QIB_KD_QP;
+	dd->verbs_dev.rdi.dparms.qpn_res_end = QIB_KD_QP; /* Reserve one QP */
+	dd->verbs_dev.rdi.dparms.qpn_inc = 1;
+	dd->verbs_dev.rdi.dparms.qos_shift = 1;
+	dd->verbs_dev.rdi.dparms.psn_mask = QIB_PSN_MASK;
+	dd->verbs_dev.rdi.dparms.psn_shift = QIB_PSN_SHIFT;
+	dd->verbs_dev.rdi.dparms.psn_modify_mask = QIB_PSN_MASK;
+	dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
+	dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd);
+	dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
+	dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+	dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
+
+	snprintf(dd->verbs_dev.rdi.dparms.cq_name,
+		 sizeof(dd->verbs_dev.rdi.dparms.cq_name),
+		 "qib_cq%d", dd->unit);
+
+	qib_fill_device_attr(dd);
+
+	ppd = dd->pport;
+	for (i = 0; i < dd->num_pports; i++, ppd++) {
+		ctxt = ppd->hw_pidx;
+		rvt_init_port(&dd->verbs_dev.rdi,
+			      &ppd->ibport_data.rvp,
+			      i,
+			      dd->rcd[ctxt]->pkeys);
+	}
 
-	ret = qib_create_agents(dev);
+	ret = rvt_register_device(&dd->verbs_dev.rdi);
 	if (ret)
-		goto err_agents;
+		goto err_tx;
 
 	ret = qib_verbs_register_sysfs(dd);
 	if (ret)
 		goto err_class;
 
-	goto bail;
+	return ret;
 
 err_class:
-	qib_free_agents(dev);
-err_agents:
-	ib_unregister_device(ibdev);
-err_reg:
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 err_tx:
 	while (!list_empty(&dev->txreq_free)) {
 		struct list_head *l = dev->txreq_free.next;
@@ -2313,27 +1747,17 @@ err_tx:
 					sizeof(struct qib_pio_header),
 				  dev->pio_hdrs, dev->pio_hdrs_phys);
 err_hdrs:
-	vfree(dev->lk_table.table);
-err_lk:
-	kfree(dev->qp_table);
-err_qpt:
 	qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-bail:
 	return ret;
 }
 
 void qib_unregister_ib_device(struct qib_devdata *dd)
 {
 	struct qib_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
-	u32 qps_inuse;
-	unsigned lk_tab_size;
 
 	qib_verbs_unregister_sysfs(dd);
 
-	qib_free_agents(dev);
-
-	ib_unregister_device(ibdev);
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 
 	if (!list_empty(&dev->piowait))
 		qib_dev_err(dd, "piowait list not empty!\n");
@@ -2343,16 +1767,8 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
 		qib_dev_err(dd, "txwait list not empty!\n");
 	if (!list_empty(&dev->memwait))
 		qib_dev_err(dd, "memwait list not empty!\n");
-	if (dev->dma_mr)
-		qib_dev_err(dd, "DMA MR not NULL!\n");
-
-	qps_inuse = qib_free_all_qps(dd);
-	if (qps_inuse)
-		qib_dev_err(dd, "QP memory leak! %u still in use\n",
-			    qps_inuse);
 
 	del_timer_sync(&dev->mem_timer);
-	qib_free_qpn_table(&dev->qpn_table);
 	while (!list_empty(&dev->txreq_free)) {
 		struct list_head *l = dev->txreq_free.next;
 		struct qib_verbs_txreq *tx;
@@ -2366,21 +1782,36 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
 				  dd->pport->sdma_descq_cnt *
 					sizeof(struct qib_pio_header),
 				  dev->pio_hdrs, dev->pio_hdrs_phys);
-	lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
-	vfree(dev->lk_table.table);
-	kfree(dev->qp_table);
 }
 
-/*
- * This must be called with s_lock held.
+/**
+ * _qib_schedule_send - schedule progress
+ * @qp - the qp
+ *
+ * This schedules progress w/o regard to the s_flags.
+ *
+ * It is only used in post send, which doesn't hold
+ * the s_lock.
  */
-void qib_schedule_send(struct qib_qp *qp)
+void _qib_schedule_send(struct rvt_qp *qp)
 {
-	if (qib_send_ok(qp)) {
-		struct qib_ibport *ibp =
-			to_iport(qp->ibqp.device, qp->port_num);
-		struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_ibport *ibp =
+		to_iport(qp->ibqp.device, qp->port_num);
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_qp_priv *priv = qp->priv;
 
-		queue_work(ppd->qib_wq, &qp->s_work);
-	}
+	queue_work(ppd->qib_wq, &priv->s_work);
+}
+
+/**
+ * qib_schedule_send - schedule progress
+ * @qp - the qp
+ *
+ * This schedules qp progress.  The s_lock
+ * should be held.
+ */
+void qib_schedule_send(struct rvt_qp *qp)
+{
+	if (qib_send_ok(qp))
+		_qib_schedule_send(qp);
 }
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 6c5e77753d85..4b76a8d59337 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -45,6 +45,8 @@
 #include <linux/completion.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_cq.h>
 
 struct qib_ctxtdata;
 struct qib_pportdata;
@@ -53,9 +55,7 @@ struct qib_verbs_txreq;
 
 #define QIB_MAX_RDMA_ATOMIC     16
 #define QIB_GUIDS_PER_PORT	5
-
-#define QPN_MAX                 (1 << 24)
-#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+#define QIB_PSN_SHIFT		8
 
 /*
  * Increment this value if any changes that break userspace ABI
@@ -63,12 +63,6 @@ struct qib_verbs_txreq;
  */
 #define QIB_UVERBS_ABI_VERSION       2
 
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE      (IB_CQ_NEXT_COMP + 1)
-
 #define IB_SEQ_NAK	(3 << 29)
 
 /* AETH NAK opcode values */
@@ -79,17 +73,6 @@ struct qib_verbs_txreq;
 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
 #define IB_NAK_INVALID_RD_REQUEST       0x64
 
-/* Flags for checking QP state (see ib_qib_state_ops[]) */
-#define QIB_POST_SEND_OK                0x01
-#define QIB_POST_RECV_OK                0x02
-#define QIB_PROCESS_RECV_OK             0x04
-#define QIB_PROCESS_SEND_OK             0x08
-#define QIB_PROCESS_NEXT_SEND_OK        0x10
-#define QIB_FLUSH_SEND			0x20
-#define QIB_FLUSH_RECV			0x40
-#define QIB_PROCESS_OR_FLUSH_SEND \
-	(QIB_PROCESS_SEND_OK | QIB_FLUSH_SEND)
-
 /* IB Performance Manager status values */
 #define IB_PMA_SAMPLE_STATUS_DONE       0x00
 #define IB_PMA_SAMPLE_STATUS_STARTED    0x01
@@ -203,468 +186,21 @@ struct qib_pio_header {
 } __packed;
 
 /*
- * There is one struct qib_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct qib_mcast_qp.
+ * qib specific data structure that will be hidden from rvt after the queue pair
+ * is made common.
  */
-struct qib_mcast_qp {
-	struct list_head list;
-	struct qib_qp *qp;
-};
-
-struct qib_mcast {
-	struct rb_node rb_node;
-	union ib_gid mgid;
-	struct list_head qp_list;
-	wait_queue_head_t wait;
-	atomic_t refcount;
-	int n_attached;
-};
-
-/* Protection domain */
-struct qib_pd {
-	struct ib_pd ibpd;
-	int user;               /* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct qib_ah {
-	struct ib_ah ibah;
-	struct ib_ah_attr attr;
-	atomic_t refcount;
-};
-
-/*
- * This structure is used by qib_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct qib_mmap_info {
-	struct list_head pending_mmaps;
-	struct ib_ucontext *context;
-	void *obj;
-	__u64 offset;
-	struct kref ref;
-	unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct qib_cq_wc {
-	u32 head;               /* index of next entry to fill */
-	u32 tail;               /* index of next ib_poll_cq() entry */
-	union {
-		/* these are actually size ibcq.cqe + 1 */
-		struct ib_uverbs_wc uqueue[0];
-		struct ib_wc kqueue[0];
-	};
-};
-
-/*
- * The completion queue structure.
- */
-struct qib_cq {
-	struct ib_cq ibcq;
-	struct kthread_work comptask;
-	struct qib_devdata *dd;
-	spinlock_t lock; /* protect changes in this struct */
-	u8 notify;
-	u8 triggered;
-	struct qib_cq_wc *queue;
-	struct qib_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct qib_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of qib_segs that fit in a page. */
-#define QIB_SEGSZ     (PAGE_SIZE / sizeof(struct qib_seg))
-
-struct qib_segarray {
-	struct qib_seg segs[QIB_SEGSZ];
-};
-
-struct qib_mregion {
-	struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
-	u64 user_base;          /* User's address for this region */
-	u64 iova;               /* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;             /* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;           /* number of qib_segs in all the arrays */
-	u32 mapsz;              /* size of the map array */
-	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
-	u8  lkey_published;     /* in global table */
-	struct completion comp; /* complete when refcount goes to zero */
-	struct rcu_head list;
-	atomic_t refcount;
-	struct qib_segarray *map[0];    /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct qib_sge {
-	struct qib_mregion *mr;
-	void *vaddr;            /* kernel virtual address of segment */
-	u32 sge_length;         /* length of the SGE */
-	u32 length;             /* remaining length of the segment */
-	u16 m;                  /* current index: mr->map[m] */
-	u16 n;                  /* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct qib_mr {
-	struct ib_mr ibmr;
-	struct ib_umem *umem;
-	u64 *pages;
-	u32 npages;
-	struct qib_mregion mr;  /* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct qib_swqe {
-	union {
-		struct ib_send_wr wr;   /* don't use wr.sg_list */
-		struct ib_ud_wr ud_wr;
-		struct ib_reg_wr reg_wr;
-		struct ib_rdma_wr rdma_wr;
-		struct ib_atomic_wr atomic_wr;
-	};
-	u32 psn;                /* first packet sequence number */
-	u32 lpsn;               /* last packet sequence number */
-	u32 ssn;                /* send sequence number */
-	u32 length;             /* total length of data in sg_list */
-	struct qib_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct qib_rwqe {
-	u64 wr_id;
-	u8 num_sge;
-	struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
- */
-struct qib_rwq {
-	u32 head;               /* new work requests posted to the head */
-	u32 tail;               /* receives pull requests from here. */
-	struct qib_rwqe wq[0];
-};
-
-struct qib_rq {
-	struct qib_rwq *wq;
-	u32 size;               /* size of RWQE array */
-	u8 max_sge;
-	spinlock_t lock /* protect changes in this struct */
-		____cacheline_aligned_in_smp;
-};
-
-struct qib_srq {
-	struct ib_srq ibsrq;
-	struct qib_rq rq;
-	struct qib_mmap_info *ip;
-	/* send signal when number of RWQEs < limit */
-	u32 limit;
-};
-
-struct qib_sge_state {
-	struct qib_sge *sg_list;      /* next SGE to be used if any */
-	struct qib_sge sge;   /* progress state for the current SGE */
-	u32 total_len;
-	u8 num_sge;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct qib_ack_entry {
-	u8 opcode;
-	u8 sent;
-	u32 psn;
-	u32 lpsn;
-	union {
-		struct qib_sge rdma_sge;
-		u64 atomic_data;
-	};
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct qib_qp {
-	struct ib_qp ibqp;
-	/* read mostly fields above and below */
-	struct ib_ah_attr remote_ah_attr;
-	struct ib_ah_attr alt_ah_attr;
-	struct qib_qp __rcu *next;            /* link list for QPN hash table */
-	struct qib_swqe *s_wq;  /* send work queue */
-	struct qib_mmap_info *ip;
-	struct qib_ib_header *s_hdr;     /* next packet header to send */
-	unsigned long timeout_jiffies;  /* computed from timeout */
-
-	enum ib_mtu path_mtu;
-	u32 remote_qpn;
-	u32 pmtu;		/* decoded from path_mtu */
-	u32 qkey;               /* QKEY for this QP (for UD or RD) */
-	u32 s_size;             /* send work queue size */
-	u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
-
-	u8 state;               /* QP state */
-	u8 qp_access_flags;
-	u8 alt_timeout;         /* Alternate path timeout for this QP */
-	u8 timeout;             /* Timeout for this QP */
-	u8 s_srate;
-	u8 s_mig_state;
-	u8 port_num;
-	u8 s_pkey_index;        /* PKEY index to use */
-	u8 s_alt_pkey_index;    /* Alternate path PKEY index to use */
-	u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
-	u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
-	u8 s_retry_cnt;         /* number of times to retry */
-	u8 s_rnr_retry_cnt;
-	u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
-	u8 s_max_sge;           /* size of s_wq->sg_list */
-	u8 s_draining;
-
-	/* start of read/write fields */
-
-	atomic_t refcount ____cacheline_aligned_in_smp;
-	wait_queue_head_t wait;
-
-
-	struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]
-		____cacheline_aligned_in_smp;
-	struct qib_sge_state s_rdma_read_sge;
-
-	spinlock_t r_lock ____cacheline_aligned_in_smp;      /* used for APM */
-	unsigned long r_aflags;
-	u64 r_wr_id;            /* ID for current receive WQE */
-	u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
-	u32 r_len;              /* total length of r_sge */
-	u32 r_rcv_len;          /* receive data len processed */
-	u32 r_psn;              /* expected rcv packet sequence number */
-	u32 r_msn;              /* message sequence number */
-
-	u8 r_state;             /* opcode of last packet received */
-	u8 r_flags;
-	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
-
-	struct list_head rspwait;       /* link for waititing to respond */
-
-	struct qib_sge_state r_sge;     /* current receive data */
-	struct qib_rq r_rq;             /* receive work queue */
-
-	spinlock_t s_lock ____cacheline_aligned_in_smp;
-	struct qib_sge_state *s_cur_sge;
-	u32 s_flags;
-	struct qib_verbs_txreq *s_tx;
-	struct qib_swqe *s_wqe;
-	struct qib_sge_state s_sge;     /* current send request data */
-	struct qib_mregion *s_rdma_mr;
-	atomic_t s_dma_busy;
-	u32 s_cur_size;         /* size of send packet in bytes */
-	u32 s_len;              /* total length of s_sge */
-	u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
-	u32 s_next_psn;         /* PSN for next request */
-	u32 s_last_psn;         /* last response PSN processed */
-	u32 s_sending_psn;      /* lowest PSN that is being sent */
-	u32 s_sending_hpsn;     /* highest PSN that is being sent */
-	u32 s_psn;              /* current packet sequence number */
-	u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
-	u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
-	u32 s_head;             /* new entries added here */
-	u32 s_tail;             /* next entry to process */
-	u32 s_cur;              /* current work queue entry */
-	u32 s_acked;            /* last un-ACK'ed entry */
-	u32 s_last;             /* last completed entry */
-	u32 s_ssn;              /* SSN of tail entry */
-	u32 s_lsn;              /* limit sequence number (credit) */
-	u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
-	u16 s_rdma_ack_cnt;
-	u8 s_state;             /* opcode of last packet sent */
-	u8 s_ack_state;         /* opcode of packet to ACK */
-	u8 s_nak_state;         /* non-zero if NAK is pending */
-	u8 r_nak_state;         /* non-zero if NAK is pending */
-	u8 s_retry;             /* requester retry counter */
-	u8 s_rnr_retry;         /* requester RNR retry counter */
-	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
-	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
-
-	struct qib_sge_state s_ack_rdma_sge;
-	struct timer_list s_timer;
+struct qib_qp_priv {
+	struct qib_ib_header *s_hdr;    /* next packet header to send */
 	struct list_head iowait;        /* link for wait PIO buf */
-
+	atomic_t s_dma_busy;
+	struct qib_verbs_txreq *s_tx;
 	struct work_struct s_work;
-
 	wait_queue_head_t wait_dma;
-
-	struct qib_sge r_sg_list[0] /* verified SGEs */
-		____cacheline_aligned_in_smp;
+	struct rvt_qp *owner;
 };
 
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define QIB_R_WRID_VALID        0
-#define QIB_R_REWIND_SGE        1
-
-/*
- * Bit definitions for r_flags.
- */
-#define QIB_R_REUSE_SGE 0x01
-#define QIB_R_RDMAR_SEQ 0x02
-#define QIB_R_RSP_NAK   0x04
-#define QIB_R_RSP_SEND  0x08
-#define QIB_R_COMM_EST  0x10
-
-/*
- * Bit definitions for s_flags.
- *
- * QIB_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
- * QIB_S_BUSY - send tasklet is processing the QP
- * QIB_S_TIMER - the RC retry timer is active
- * QIB_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
- * QIB_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
- *                         before processing the next SWQE
- * QIB_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
- *                         before processing the next SWQE
- * QIB_S_WAIT_RNR - waiting for RNR timeout
- * QIB_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * QIB_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- *                  next send completion entry not via send DMA
- * QIB_S_WAIT_PIO - waiting for a send buffer to be available
- * QIB_S_WAIT_TX - waiting for a struct qib_verbs_txreq to be available
- * QIB_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
- * QIB_S_WAIT_KMEM - waiting for kernel memory to be available
- * QIB_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
- * QIB_S_WAIT_ACK - waiting for an ACK packet before sending more requests
- * QIB_S_SEND_ONE - send one packet, request ACK, then wait for ACK
- */
-#define QIB_S_SIGNAL_REQ_WR	0x0001
-#define QIB_S_BUSY		0x0002
-#define QIB_S_TIMER		0x0004
-#define QIB_S_RESP_PENDING	0x0008
-#define QIB_S_ACK_PENDING	0x0010
-#define QIB_S_WAIT_FENCE	0x0020
-#define QIB_S_WAIT_RDMAR	0x0040
-#define QIB_S_WAIT_RNR		0x0080
-#define QIB_S_WAIT_SSN_CREDIT	0x0100
-#define QIB_S_WAIT_DMA		0x0200
-#define QIB_S_WAIT_PIO		0x0400
-#define QIB_S_WAIT_TX		0x0800
-#define QIB_S_WAIT_DMA_DESC	0x1000
-#define QIB_S_WAIT_KMEM		0x2000
-#define QIB_S_WAIT_PSN		0x4000
-#define QIB_S_WAIT_ACK		0x8000
-#define QIB_S_SEND_ONE		0x10000
-#define QIB_S_UNLIMITED_CREDIT	0x20000
-
-/*
- * Wait flags that would prevent any packet type from being sent.
- */
-#define QIB_S_ANY_WAIT_IO (QIB_S_WAIT_PIO | QIB_S_WAIT_TX | \
-	QIB_S_WAIT_DMA_DESC | QIB_S_WAIT_KMEM)
-
-/*
- * Wait flags that would prevent send work requests from making progress.
- */
-#define QIB_S_ANY_WAIT_SEND (QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | \
-	QIB_S_WAIT_RNR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_DMA | \
-	QIB_S_WAIT_PSN | QIB_S_WAIT_ACK)
-
-#define QIB_S_ANY_WAIT (QIB_S_ANY_WAIT_IO | QIB_S_ANY_WAIT_SEND)
-
 #define QIB_PSN_CREDIT  16
 
-/*
- * Since struct qib_swqe is not a fixed size, we can't simply index into
- * struct qib_qp.s_wq.  This function does the array index computation.
- */
-static inline struct qib_swqe *get_swqe_ptr(struct qib_qp *qp,
-					      unsigned n)
-{
-	return (struct qib_swqe *)((char *)qp->s_wq +
-				     (sizeof(struct qib_swqe) +
-				      qp->s_max_sge *
-				      sizeof(struct qib_sge)) * n);
-}
-
-/*
- * Since struct qib_rwqe is not a fixed size, we can't simply index into
- * struct qib_rwq.wq.  This function does the array index computation.
- */
-static inline struct qib_rwqe *get_rwqe_ptr(struct qib_rq *rq, unsigned n)
-{
-	return (struct qib_rwqe *)
-		((char *) rq->wq->wq +
-		 (sizeof(struct qib_rwqe) +
-		  rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-/*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
-	void *page;
-};
-
-struct qib_qpn_table {
-	spinlock_t lock; /* protect changes in this struct */
-	unsigned flags;         /* flags for QP0/1 allocated for each port */
-	u32 last;               /* last QP number allocated */
-	u32 nmaps;              /* size of the map table */
-	u16 limit;
-	u16 mask;
-	/* bit map of free QP numbers other than 0/1 */
-	struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-#define MAX_LKEY_TABLE_BITS 23
-
-struct qib_lkey_table {
-	spinlock_t lock; /* protect changes in this struct */
-	u32 next;               /* next unused index (speeds search) */
-	u32 gen;                /* generation count */
-	u32 max;                /* size of the table */
-	struct qib_mregion __rcu **table;
-};
-
 struct qib_opcode_stats {
 	u64 n_packets;          /* number of packets */
 	u64 n_bytes;            /* total number of bytes */
@@ -682,21 +218,9 @@ struct qib_pma_counters {
 };
 
 struct qib_ibport {
-	struct qib_qp __rcu *qp0;
-	struct qib_qp __rcu *qp1;
-	struct ib_mad_agent *send_agent;	/* agent for SMI (traps) */
-	struct qib_ah *sm_ah;
-	struct qib_ah *smi_ah;
-	struct rb_root mcast_tree;
-	spinlock_t lock;		/* protect changes in this struct */
-
-	/* non-zero when timer is set */
-	unsigned long mkey_lease_timeout;
-	unsigned long trap_timeout;
-	__be64 gid_prefix;      /* in network order */
-	__be64 mkey;
+	struct rvt_ibport rvp;
+	struct rvt_ah *smi_ah;
 	__be64 guids[QIB_GUIDS_PER_PORT	- 1];	/* writable GUIDs */
-	u64 tid;		/* TID for traps */
 	struct qib_pma_counters __percpu *pmastats;
 	u64 z_unicast_xmit;     /* starting count for PMA */
 	u64 z_unicast_rcv;      /* starting count for PMA */
@@ -715,82 +239,25 @@ struct qib_ibport {
 	u32 z_local_link_integrity_errors;      /* starting count for PMA */
 	u32 z_excessive_buffer_overrun_errors;  /* starting count for PMA */
 	u32 z_vl15_dropped;                     /* starting count for PMA */
-	u32 n_rc_resends;
-	u32 n_rc_acks;
-	u32 n_rc_qacks;
-	u32 n_rc_delayed_comp;
-	u32 n_seq_naks;
-	u32 n_rdma_seq;
-	u32 n_rnr_naks;
-	u32 n_other_naks;
-	u32 n_loop_pkts;
-	u32 n_pkt_drops;
-	u32 n_vl15_dropped;
-	u32 n_rc_timeouts;
-	u32 n_dmawait;
-	u32 n_unaligned;
-	u32 n_rc_dupreq;
-	u32 n_rc_seqnak;
-	u32 port_cap_flags;
-	u32 pma_sample_start;
-	u32 pma_sample_interval;
-	__be16 pma_counter_select[5];
-	u16 pma_tag;
-	u16 pkey_violations;
-	u16 qkey_violations;
-	u16 mkey_violations;
-	u16 mkey_lease_period;
-	u16 sm_lid;
-	u16 repress_traps;
-	u8 sm_sl;
-	u8 mkeyprot;
-	u8 subnet_timeout;
-	u8 vl_high_limit;
 	u8 sl_to_vl[16];
-
 };
 
-
 struct qib_ibdev {
-	struct ib_device ibdev;
-	struct list_head pending_mmaps;
-	spinlock_t mmap_offset_lock; /* protect mmap_offset */
-	u32 mmap_offset;
-	struct qib_mregion __rcu *dma_mr;
-
-	/* QP numbers are shared by all IB ports */
-	struct qib_qpn_table qpn_table;
-	struct qib_lkey_table lk_table;
+	struct rvt_dev_info rdi;
+
 	struct list_head piowait;       /* list for wait PIO buf */
 	struct list_head dmawait;	/* list for wait DMA */
 	struct list_head txwait;        /* list for wait qib_verbs_txreq */
 	struct list_head memwait;       /* list for wait kernel memory */
 	struct list_head txreq_free;
 	struct timer_list mem_timer;
-	struct qib_qp __rcu **qp_table;
 	struct qib_pio_header *pio_hdrs;
 	dma_addr_t pio_hdrs_phys;
-	/* list of QPs waiting for RNR timer */
-	spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */
-	u32 qp_table_size; /* size of the hash table */
 	u32 qp_rnd; /* random bytes for hash */
-	spinlock_t qpt_lock;
 
 	u32 n_piowait;
 	u32 n_txwait;
 
-	u32 n_pds_allocated;    /* number of PDs allocated for device */
-	spinlock_t n_pds_lock;
-	u32 n_ahs_allocated;    /* number of AHs allocated for device */
-	spinlock_t n_ahs_lock;
-	u32 n_cqs_allocated;    /* number of CQs allocated for device */
-	spinlock_t n_cqs_lock;
-	u32 n_qps_allocated;    /* number of QPs allocated for device */
-	spinlock_t n_qps_lock;
-	u32 n_srqs_allocated;   /* number of SRQs allocated for device */
-	spinlock_t n_srqs_lock;
-	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
-	spinlock_t n_mcast_grps_lock;
 #ifdef CONFIG_DEBUG_FS
 	/* per HCA debugfs */
 	struct dentry *qib_ibdev_dbg;
@@ -813,56 +280,27 @@ struct qib_verbs_counters {
 	u32 vl15_dropped;
 };
 
-static inline struct qib_mr *to_imr(struct ib_mr *ibmr)
-{
-	return container_of(ibmr, struct qib_mr, ibmr);
-}
-
-static inline struct qib_pd *to_ipd(struct ib_pd *ibpd)
-{
-	return container_of(ibpd, struct qib_pd, ibpd);
-}
-
-static inline struct qib_ah *to_iah(struct ib_ah *ibah)
-{
-	return container_of(ibah, struct qib_ah, ibah);
-}
-
-static inline struct qib_cq *to_icq(struct ib_cq *ibcq)
-{
-	return container_of(ibcq, struct qib_cq, ibcq);
-}
-
-static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq)
-{
-	return container_of(ibsrq, struct qib_srq, ibsrq);
-}
-
-static inline struct qib_qp *to_iqp(struct ib_qp *ibqp)
-{
-	return container_of(ibqp, struct qib_qp, ibqp);
-}
-
 static inline struct qib_ibdev *to_idev(struct ib_device *ibdev)
 {
-	return container_of(ibdev, struct qib_ibdev, ibdev);
+	struct rvt_dev_info *rdi;
+
+	rdi = container_of(ibdev, struct rvt_dev_info, ibdev);
+	return container_of(rdi, struct qib_ibdev, rdi);
 }
 
 /*
  * Send if not busy or waiting for I/O and either
  * a RC response is pending or we can process send work requests.
  */
-static inline int qib_send_ok(struct qib_qp *qp)
+static inline int qib_send_ok(struct rvt_qp *qp)
 {
-	return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) &&
-		(qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) ||
-		 !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
+	return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
+		(qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) ||
+		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
 }
 
-/*
- * This must be called with s_lock held.
- */
-void qib_schedule_send(struct qib_qp *qp);
+void _qib_schedule_send(struct rvt_qp *qp);
+void qib_schedule_send(struct rvt_qp *qp);
 
 static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
 {
@@ -878,7 +316,7 @@ static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
 
 void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
 		   u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
-void qib_cap_mask_chg(struct qib_ibport *ibp);
+void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
 void qib_sys_guid_chg(struct qib_ibport *ibp);
 void qib_node_desc_chg(struct qib_ibport *ibp);
 int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -886,8 +324,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 		    const struct ib_mad_hdr *in, size_t in_mad_size,
 		    struct ib_mad_hdr *out, size_t *out_mad_size,
 		    u16 *out_mad_pkey_index);
-int qib_create_agents(struct qib_ibdev *dev);
-void qib_free_agents(struct qib_ibdev *dev);
+void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx);
+void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx);
 
 /*
  * Compare the lower 24 bits of the two values.
@@ -898,8 +336,6 @@ static inline int qib_cmp24(u32 a, u32 b)
 	return (((int) a) - ((int) b)) << 8;
 }
 
-struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid);
-
 int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
 			  u64 *rwords, u64 *spkts, u64 *rpkts,
 			  u64 *xmit_wait);
@@ -907,35 +343,17 @@ int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
 int qib_get_counters(struct qib_pportdata *ppd,
 		     struct qib_verbs_counters *cntrs);
 
-int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int qib_mcast_tree_empty(struct qib_ibport *ibp);
-
-__be32 qib_compute_aeth(struct qib_qp *qp);
-
-struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn);
-
-struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
-			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata);
-
-int qib_destroy_qp(struct ib_qp *ibqp);
-
-int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err);
-
-int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_udata *udata);
-
-int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		 int attr_mask, struct ib_qp_init_attr *init_attr);
-
-unsigned qib_free_all_qps(struct qib_devdata *dd);
+__be32 qib_compute_aeth(struct rvt_qp *qp);
 
-void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
-
-void qib_free_qpn_table(struct qib_qpn_table *qpt);
+/*
+ * Functions provided by qib driver for rdmavt to use
+ */
+unsigned qib_free_all_qps(struct rvt_dev_info *rdi);
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp);
+void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+void qib_notify_qp_reset(struct rvt_qp *qp);
+int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+		  enum ib_qp_type type, u8 port, gfp_t gfp);
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -949,7 +367,7 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter);
 
 #endif
 
-void qib_get_credit(struct qib_qp *qp, u32 aeth);
+void qib_get_credit(struct rvt_qp *qp, u32 aeth);
 
 unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
 
@@ -957,166 +375,66 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
 
 void qib_put_txreq(struct qib_verbs_txreq *tx);
 
-int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
-		   u32 hdrwords, struct qib_sge_state *ss, u32 len);
+int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+		   u32 hdrwords, struct rvt_sge_state *ss, u32 len);
 
-void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length,
+void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
 		  int release);
 
-void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release);
+void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
 
 void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
 int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
 
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
 
 void qib_rc_rnr_retry(unsigned long arg);
 
-void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr);
+void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr);
 
-void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err);
+void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
 
-int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr);
+int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
 
 void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
-
-int qib_alloc_lkey(struct qib_mregion *mr, int dma_region);
-
-void qib_free_lkey(struct qib_mregion *mr);
-
-int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
-		struct qib_sge *isge, struct ib_sge *sge, int acc);
-
-int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
-		u32 len, u64 vaddr, u32 rkey, int acc);
-
-int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr);
-
-struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
-			      struct ib_srq_init_attr *srq_init_attr,
-			      struct ib_udata *udata);
-
-int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		   enum ib_srq_attr_mask attr_mask,
-		   struct ib_udata *udata);
-
-int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int qib_destroy_srq(struct ib_srq *ibsrq);
-
-int qib_cq_init(struct qib_devdata *dd);
-
-void qib_cq_exit(struct qib_devdata *dd);
-
-void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
-
-int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *qib_create_cq(struct ib_device *ibdev,
-			    const struct ib_cq_init_attr *attr,
-			    struct ib_ucontext *context,
-			    struct ib_udata *udata);
-
-int qib_destroy_cq(struct ib_cq *ibcq);
-
-int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
-
-int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			      u64 virt_addr, int mr_access_flags,
-			      struct ib_udata *udata);
-
-int qib_dereg_mr(struct ib_mr *ibmr);
-
-struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
-			   enum ib_mr_type mr_type,
-			   u32 max_entries);
-
-int qib_map_mr_sg(struct ib_mr *ibmr,
-		  struct scatterlist *sg,
-		  int sg_nents);
-
-int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr);
-
-struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			     struct ib_fmr_attr *fmr_attr);
-
-int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		     int list_len, u64 iova);
-
-int qib_unmap_fmr(struct list_head *fmr_list);
-
-int qib_dealloc_fmr(struct ib_fmr *ibfmr);
-
-static inline void qib_get_mr(struct qib_mregion *mr)
-{
-	atomic_inc(&mr->refcount);
-}
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
 void mr_rcu_callback(struct rcu_head *list);
 
-static inline void qib_put_mr(struct qib_mregion *mr)
-{
-	if (unlikely(atomic_dec_and_test(&mr->refcount)))
-		call_rcu(&mr->list, mr_rcu_callback);
-}
-
-static inline void qib_put_ss(struct qib_sge_state *ss)
-{
-	while (ss->num_sge) {
-		qib_put_mr(ss->sge.mr);
-		if (--ss->num_sge)
-			ss->sge = *ss->sg_list++;
-	}
-}
-
-
-void qib_release_mmap_info(struct kref *ref);
+int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only);
 
-struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
-					   struct ib_ucontext *context,
-					   void *obj);
-
-void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
-			  u32 size, void *obj);
-
-int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int qib_get_rwqe(struct qib_qp *qp, int wr_id_only);
-
-void qib_migrate_qp(struct qib_qp *qp);
+void qib_migrate_qp(struct rvt_qp *qp);
 
 int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		      int has_grh, struct qib_qp *qp, u32 bth0);
+		      int has_grh, struct rvt_qp *qp, u32 bth0);
 
 u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
 		 struct ib_global_route *grh, u32 hwords, u32 nwords);
 
-void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
 			 u32 bth0, u32 bth2);
 
-void qib_do_send(struct work_struct *work);
+void _qib_do_send(struct work_struct *work);
+
+void qib_do_send(struct rvt_qp *qp);
 
-void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 		       enum ib_wc_status status);
 
-void qib_send_rc_ack(struct qib_qp *qp);
+void qib_send_rc_ack(struct rvt_qp *qp);
 
-int qib_make_rc_req(struct qib_qp *qp);
+int qib_make_rc_req(struct rvt_qp *qp);
 
-int qib_make_uc_req(struct qib_qp *qp);
+int qib_make_uc_req(struct rvt_qp *qp);
 
-int qib_make_ud_req(struct qib_qp *qp);
+int qib_make_ud_req(struct rvt_qp *qp);
 
 int qib_register_ib_device(struct qib_devdata *);
 
@@ -1150,11 +468,11 @@ extern const enum ib_wc_opcode ib_qib_wc_opcode[];
 #define IB_PHYSPORTSTATE_CFG_ENH 0x10
 #define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13
 
-extern const int ib_qib_state_ops[];
+extern const int ib_rvt_state_ops[];
 
 extern __be64 ib_qib_sys_image_guid;    /* in network order */
 
-extern unsigned int ib_qib_lkey_table_size;
+extern unsigned int ib_rvt_lkey_table_size;
 
 extern unsigned int ib_qib_max_cqes;
 
@@ -1178,6 +496,4 @@ extern unsigned int ib_qib_max_srq_wrs;
 
 extern const u32 ib_qib_rnr_table[];
 
-extern struct ib_dma_mapping_ops qib_dma_mapping_ops;
-
 #endif                          /* QIB_VERBS_H */
diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
deleted file mode 100644
index b2fb5286dbd9..000000000000
--- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/rculist.h>
-
-#include "qib.h"
-
-/**
- * qib_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
- * @qp: the QP to link
- */
-static struct qib_mcast_qp *qib_mcast_qp_alloc(struct qib_qp *qp)
-{
-	struct qib_mcast_qp *mqp;
-
-	mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
-	if (!mqp)
-		goto bail;
-
-	mqp->qp = qp;
-	atomic_inc(&qp->refcount);
-
-bail:
-	return mqp;
-}
-
-static void qib_mcast_qp_free(struct qib_mcast_qp *mqp)
-{
-	struct qib_qp *qp = mqp->qp;
-
-	/* Notify qib_destroy_qp() if it is waiting. */
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-
-	kfree(mqp);
-}
-
-/**
- * qib_mcast_alloc - allocate the multicast GID structure
- * @mgid: the multicast GID
- *
- * A list of QPs will be attached to this structure.
- */
-static struct qib_mcast *qib_mcast_alloc(union ib_gid *mgid)
-{
-	struct qib_mcast *mcast;
-
-	mcast = kmalloc(sizeof(*mcast), GFP_KERNEL);
-	if (!mcast)
-		goto bail;
-
-	mcast->mgid = *mgid;
-	INIT_LIST_HEAD(&mcast->qp_list);
-	init_waitqueue_head(&mcast->wait);
-	atomic_set(&mcast->refcount, 0);
-	mcast->n_attached = 0;
-
-bail:
-	return mcast;
-}
-
-static void qib_mcast_free(struct qib_mcast *mcast)
-{
-	struct qib_mcast_qp *p, *tmp;
-
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-		qib_mcast_qp_free(p);
-
-	kfree(mcast);
-}
-
-/**
- * qib_mcast_find - search the global table for the given multicast GID
- * @ibp: the IB port structure
- * @mgid: the multicast GID to search for
- *
- * Returns NULL if not found.
- *
- * The caller is responsible for decrementing the reference count if found.
- */
-struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid)
-{
-	struct rb_node *n;
-	unsigned long flags;
-	struct qib_mcast *mcast;
-
-	spin_lock_irqsave(&ibp->lock, flags);
-	n = ibp->mcast_tree.rb_node;
-	while (n) {
-		int ret;
-
-		mcast = rb_entry(n, struct qib_mcast, rb_node);
-
-		ret = memcmp(mgid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else {
-			atomic_inc(&mcast->refcount);
-			spin_unlock_irqrestore(&ibp->lock, flags);
-			goto bail;
-		}
-	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
-
-	mcast = NULL;
-
-bail:
-	return mcast;
-}
-
-/**
- * qib_mcast_add - insert mcast GID into table and attach QP struct
- * @mcast: the mcast GID table
- * @mqp: the QP to attach
- *
- * Return zero if both were added.  Return EEXIST if the GID was already in
- * the table but the QP was added.  Return ESRCH if the QP was already
- * attached and neither structure was added.
- */
-static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp,
-			 struct qib_mcast *mcast, struct qib_mcast_qp *mqp)
-{
-	struct rb_node **n = &ibp->mcast_tree.rb_node;
-	struct rb_node *pn = NULL;
-	int ret;
-
-	spin_lock_irq(&ibp->lock);
-
-	while (*n) {
-		struct qib_mcast *tmcast;
-		struct qib_mcast_qp *p;
-
-		pn = *n;
-		tmcast = rb_entry(pn, struct qib_mcast, rb_node);
-
-		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0) {
-			n = &pn->rb_left;
-			continue;
-		}
-		if (ret > 0) {
-			n = &pn->rb_right;
-			continue;
-		}
-
-		/* Search the QP list to see if this is already there. */
-		list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
-			if (p->qp == mqp->qp) {
-				ret = ESRCH;
-				goto bail;
-			}
-		}
-		if (tmcast->n_attached == ib_qib_max_mcast_qp_attached) {
-			ret = ENOMEM;
-			goto bail;
-		}
-
-		tmcast->n_attached++;
-
-		list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
-		ret = EEXIST;
-		goto bail;
-	}
-
-	spin_lock(&dev->n_mcast_grps_lock);
-	if (dev->n_mcast_grps_allocated == ib_qib_max_mcast_grps) {
-		spin_unlock(&dev->n_mcast_grps_lock);
-		ret = ENOMEM;
-		goto bail;
-	}
-
-	dev->n_mcast_grps_allocated++;
-	spin_unlock(&dev->n_mcast_grps_lock);
-
-	mcast->n_attached++;
-
-	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
-
-	atomic_inc(&mcast->refcount);
-	rb_link_node(&mcast->rb_node, pn, n);
-	rb_insert_color(&mcast->rb_node, &ibp->mcast_tree);
-
-	ret = 0;
-
-bail:
-	spin_unlock_irq(&ibp->lock);
-
-	return ret;
-}
-
-int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_ibdev *dev = to_idev(ibqp->device);
-	struct qib_ibport *ibp;
-	struct qib_mcast *mcast;
-	struct qib_mcast_qp *mqp;
-	int ret;
-
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Allocate data structures since its better to do this outside of
-	 * spin locks and it will most likely be needed.
-	 */
-	mcast = qib_mcast_alloc(gid);
-	if (mcast == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	mqp = qib_mcast_qp_alloc(qp);
-	if (mqp == NULL) {
-		qib_mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-	}
-	ibp = to_iport(ibqp->device, qp->port_num);
-	switch (qib_mcast_add(dev, ibp, mcast, mqp)) {
-	case ESRCH:
-		/* Neither was used: OK to attach the same QP twice. */
-		qib_mcast_qp_free(mqp);
-		qib_mcast_free(mcast);
-		break;
-
-	case EEXIST:            /* The mcast wasn't used */
-		qib_mcast_free(mcast);
-		break;
-
-	case ENOMEM:
-		/* Exceeded the maximum number of mcast groups. */
-		qib_mcast_qp_free(mqp);
-		qib_mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-
-	default:
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_ibdev *dev = to_idev(ibqp->device);
-	struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
-	struct qib_mcast *mcast = NULL;
-	struct qib_mcast_qp *p, *tmp, *delp = NULL;
-	struct rb_node *n;
-	int last = 0;
-	int ret;
-
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
-		return -EINVAL;
-
-	spin_lock_irq(&ibp->lock);
-
-	/* Find the GID in the mcast table. */
-	n = ibp->mcast_tree.rb_node;
-	while (1) {
-		if (n == NULL) {
-			spin_unlock_irq(&ibp->lock);
-			return -EINVAL;
-		}
-
-		mcast = rb_entry(n, struct qib_mcast, rb_node);
-		ret = memcmp(gid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else
-			break;
-	}
-
-	/* Search the QP list. */
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
-		if (p->qp != qp)
-			continue;
-		/*
-		 * We found it, so remove it, but don't poison the forward
-		 * link until we are sure there are no list walkers.
-		 */
-		list_del_rcu(&p->list);
-		mcast->n_attached--;
-		delp = p;
-
-		/* If this was the last attached QP, remove the GID too. */
-		if (list_empty(&mcast->qp_list)) {
-			rb_erase(&mcast->rb_node, &ibp->mcast_tree);
-			last = 1;
-		}
-		break;
-	}
-
-	spin_unlock_irq(&ibp->lock);
-	/* QP not attached */
-	if (!delp)
-		return -EINVAL;
-	/*
-	 * Wait for any list walkers to finish before freeing the
-	 * list element.
-	 */
-	wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-	qib_mcast_qp_free(delp);
-
-	if (last) {
-		atomic_dec(&mcast->refcount);
-		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-		qib_mcast_free(mcast);
-		spin_lock_irq(&dev->n_mcast_grps_lock);
-		dev->n_mcast_grps_allocated--;
-		spin_unlock_irq(&dev->n_mcast_grps_lock);
-	}
-	return 0;
-}
-
-int qib_mcast_tree_empty(struct qib_ibport *ibp)
-{
-	return ibp->mcast_tree.rb_node == NULL;
-}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 645a5f6e6c88..7209fbc03ccb 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -144,7 +144,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 	ret = 0;
 
 	while (npages) {
-		ret = get_user_pages(current, current->mm, cur_base,
+		ret = get_user_pages(cur_base,
 					min_t(unsigned long, npages,
 					PAGE_SIZE / sizeof(struct page *)),
 					1, !writable, page_list, NULL);
diff --git a/drivers/infiniband/sw/Makefile b/drivers/infiniband/sw/Makefile
new file mode 100644
index 000000000000..988b6a0101a4
--- /dev/null
+++ b/drivers/infiniband/sw/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_INFINIBAND_RDMAVT)		+= rdmavt/
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
new file mode 100644
index 000000000000..11aa6a34bd71
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -0,0 +1,6 @@
+config INFINIBAND_RDMAVT
+	tristate "RDMA verbs transport library"
+	depends on 64BIT
+	default m
+	---help---
+	This is a common software verbs provider for RDMA networks.
diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile
new file mode 100644
index 000000000000..ccaa7992ac97
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/Makefile
@@ -0,0 +1,13 @@
+#
+# rdmavt driver
+#
+#
+#
+# Called from the kernel module build system.
+#
+obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o
+
+rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o \
+	trace.o
+
+CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
new file mode 100644
index 000000000000..16c446142c2a
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include "ah.h"
+#include "vt.h" /* for prints */
+
+/**
+ * rvt_check_ah - validate the attributes of AH
+ * @ibdev: the ib device
+ * @ah_attr: the attributes of the AH
+ *
+ * If driver supports a more detailed check_ah function call back to it
+ * otherwise just check the basics.
+ *
+ * Return: 0 on success
+ */
+int rvt_check_ah(struct ib_device *ibdev,
+		 struct ib_ah_attr *ah_attr)
+{
+	int err;
+	struct ib_port_attr port_attr;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	enum rdma_link_layer link = rdma_port_get_link_layer(ibdev,
+							     ah_attr->port_num);
+
+	err = ib_query_port(ibdev, ah_attr->port_num, &port_attr);
+	if (err)
+		return -EINVAL;
+	if (ah_attr->port_num < 1 ||
+	    ah_attr->port_num > ibdev->phys_port_cnt)
+		return -EINVAL;
+	if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
+	    ib_rate_to_mbps(ah_attr->static_rate) < 0)
+		return -EINVAL;
+	if ((ah_attr->ah_flags & IB_AH_GRH) &&
+	    ah_attr->grh.sgid_index >= port_attr.gid_tbl_len)
+		return -EINVAL;
+	if (link != IB_LINK_LAYER_ETHERNET) {
+		if (ah_attr->dlid == 0)
+			return -EINVAL;
+		if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+		    ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) &&
+		    !(ah_attr->ah_flags & IB_AH_GRH))
+			return -EINVAL;
+	}
+	if (rdi->driver_f.check_ah)
+		return rdi->driver_f.check_ah(ibdev, ah_attr);
+	return 0;
+}
+EXPORT_SYMBOL(rvt_check_ah);
+
+/**
+ * rvt_create_ah - create an address handle
+ * @pd: the protection domain
+ * @ah_attr: the attributes of the AH
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: newly allocated ah
+ */
+struct ib_ah *rvt_create_ah(struct ib_pd *pd,
+			    struct ib_ah_attr *ah_attr)
+{
+	struct rvt_ah *ah;
+	struct rvt_dev_info *dev = ib_to_rvt(pd->device);
+	unsigned long flags;
+
+	if (rvt_check_ah(pd->device, ah_attr))
+		return ERR_PTR(-EINVAL);
+
+	ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
+	if (!ah)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
+	if (dev->n_ahs_allocated == dev->dparms.props.max_ah) {
+		spin_unlock(&dev->n_ahs_lock);
+		kfree(ah);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	dev->n_ahs_allocated++;
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+	ah->attr = *ah_attr;
+	atomic_set(&ah->refcount, 0);
+
+	if (dev->driver_f.notify_new_ah)
+		dev->driver_f.notify_new_ah(pd->device, ah_attr, ah);
+
+	return &ah->ibah;
+}
+
+/**
+ * rvt_destory_ah - Destory an address handle
+ * @ibah: address handle
+ *
+ * Return: 0 on success
+ */
+int rvt_destroy_ah(struct ib_ah *ibah)
+{
+	struct rvt_dev_info *dev = ib_to_rvt(ibah->device);
+	struct rvt_ah *ah = ibah_to_rvtah(ibah);
+	unsigned long flags;
+
+	if (atomic_read(&ah->refcount) != 0)
+		return -EBUSY;
+
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
+	dev->n_ahs_allocated--;
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+	kfree(ah);
+
+	return 0;
+}
+
+/**
+ * rvt_modify_ah - modify an ah with given attrs
+ * @ibah: address handle to modify
+ * @ah_attr: attrs to apply
+ *
+ * Return: 0 on success
+ */
+int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+	struct rvt_ah *ah = ibah_to_rvtah(ibah);
+
+	if (rvt_check_ah(ibah->device, ah_attr))
+		return -EINVAL;
+
+	ah->attr = *ah_attr;
+
+	return 0;
+}
+
+/**
+ * rvt_query_ah - return attrs for ah
+ * @ibah: address handle to query
+ * @ah_attr: return info in this
+ *
+ * Return: always 0
+ */
+int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+	struct rvt_ah *ah = ibah_to_rvtah(ibah);
+
+	*ah_attr = ah->attr;
+
+	return 0;
+}
diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h
new file mode 100644
index 000000000000..e9c36be87d79
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/ah.h
@@ -0,0 +1,59 @@
+#ifndef DEF_RVTAH_H
+#define DEF_RVTAH_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+struct ib_ah *rvt_create_ah(struct ib_pd *pd,
+			    struct ib_ah_attr *ah_attr);
+int rvt_destroy_ah(struct ib_ah *ibah);
+int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+
+#endif          /* DEF_RVTAH_H */
diff --git a/drivers/staging/rdma/hfi1/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 4f046ffe7e60..b1ffc8b4a6c0 100644
--- a/drivers/staging/rdma/hfi1/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -48,25 +45,23 @@
  *
  */
 
-#include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/kthread.h>
-
-#include "verbs.h"
-#include "hfi.h"
+#include "cq.h"
+#include "vt.h"
 
 /**
- * hfi1_cq_enter - add a new entry to the completion queue
+ * rvt_cq_enter - add a new entry to the completion queue
  * @cq: completion queue
  * @entry: work completion entry to add
- * @sig: true if @entry is a solicited entry
+ * @sig: true if @entry is solicited
  *
  * This may be called with qp->s_lock held.
  */
-void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
+void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
 {
-	struct hfi1_cq_wc *wc;
+	struct rvt_cq_wc *wc;
 	unsigned long flags;
 	u32 head;
 	u32 next;
@@ -79,11 +74,13 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
 	 */
 	wc = cq->queue;
 	head = wc->head;
-	if (head >= (unsigned) cq->ibcq.cqe) {
+	if (head >= (unsigned)cq->ibcq.cqe) {
 		head = cq->ibcq.cqe;
 		next = 0;
-	} else
+	} else {
 		next = head + 1;
+	}
+
 	if (unlikely(next == wc->tail)) {
 		spin_unlock_irqrestore(&cq->lock, flags);
 		if (cq->ibcq.event_handler) {
@@ -114,8 +111,9 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
 		wc->uqueue[head].port_num = entry->port_num;
 		/* Make sure entry is written before the head index. */
 		smp_wmb();
-	} else
+	} else {
 		wc->kqueue[head] = *entry;
+	}
 	wc->head = next;
 
 	if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -126,10 +124,10 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
 		 * This will cause send_complete() to be called in
 		 * another thread.
 		 */
-		smp_read_barrier_depends(); /* see hfi1_cq_exit */
-		worker = cq->dd->worker;
+		smp_read_barrier_depends(); /* see rvt_cq_exit */
+		worker = cq->rdi->worker;
 		if (likely(worker)) {
-			cq->notify = IB_CQ_NONE;
+			cq->notify = RVT_CQ_NONE;
 			cq->triggered++;
 			queue_kthread_work(worker, &cq->comptask);
 		}
@@ -137,59 +135,11 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
 
 	spin_unlock_irqrestore(&cq->lock, flags);
 }
-
-/**
- * hfi1_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-	struct hfi1_cq *cq = to_icq(ibcq);
-	struct hfi1_cq_wc *wc;
-	unsigned long flags;
-	int npolled;
-	u32 tail;
-
-	/* The kernel can only poll a kernel completion queue */
-	if (cq->ip) {
-		npolled = -EINVAL;
-		goto bail;
-	}
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	wc = cq->queue;
-	tail = wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (tail == wc->head)
-			break;
-		/* The kernel doesn't need a RMB since it has the lock. */
-		*entry = wc->kqueue[tail];
-		if (tail >= cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	wc->tail = tail;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-	return npolled;
-}
+EXPORT_SYMBOL(rvt_cq_enter);
 
 static void send_complete(struct kthread_work *work)
 {
-	struct hfi1_cq *cq = container_of(work, struct hfi1_cq, comptask);
+	struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
 
 	/*
 	 * The completion handler will most likely rearm the notification
@@ -217,26 +167,25 @@ static void send_complete(struct kthread_work *work)
 }
 
 /**
- * hfi1_create_cq - create a completion queue
+ * rvt_create_cq - create a completion queue
  * @ibdev: the device this completion queue is attached to
  * @attr: creation attributes
- * @context: unused by the driver
+ * @context: unused by the QLogic_IB driver
  * @udata: user data for libibverbs.so
  *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
  * Called by ib_create_cq() in the generic verbs code.
+ *
+ * Return: pointer to the completion queue or negative errno values
+ * for failure.
  */
-struct ib_cq *hfi1_create_cq(
-	struct ib_device *ibdev,
-	const struct ib_cq_init_attr *attr,
-	struct ib_ucontext *context,
-	struct ib_udata *udata)
+struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
+			    const struct ib_cq_init_attr *attr,
+			    struct ib_ucontext *context,
+			    struct ib_udata *udata)
 {
-	struct hfi1_ibdev *dev = to_idev(ibdev);
-	struct hfi1_cq *cq;
-	struct hfi1_cq_wc *wc;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	struct rvt_cq *cq;
+	struct rvt_cq_wc *wc;
 	struct ib_cq *ret;
 	u32 sz;
 	unsigned int entries = attr->cqe;
@@ -244,11 +193,11 @@ struct ib_cq *hfi1_create_cq(
 	if (attr->flags)
 		return ERR_PTR(-EINVAL);
 
-	if (entries < 1 || entries > hfi1_max_cqes)
+	if (entries < 1 || entries > rdi->dparms.props.max_cqe)
 		return ERR_PTR(-EINVAL);
 
 	/* Allocate the completion queue structure. */
-	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq)
 		return ERR_PTR(-ENOMEM);
 
@@ -272,12 +221,12 @@ struct ib_cq *hfi1_create_cq(
 
 	/*
 	 * Return the address of the WC as the offset to mmap.
-	 * See hfi1_mmap() for details.
+	 * See rvt_mmap() for details.
 	 */
 	if (udata && udata->outlen >= sizeof(__u64)) {
 		int err;
 
-		cq->ip = hfi1_create_mmap_info(dev, sz, context, wc);
+		cq->ip = rvt_create_mmap_info(rdi, sz, context, wc);
 		if (!cq->ip) {
 			ret = ERR_PTR(-ENOMEM);
 			goto bail_wc;
@@ -289,23 +238,22 @@ struct ib_cq *hfi1_create_cq(
 			ret = ERR_PTR(err);
 			goto bail_ip;
 		}
-	} else
-		cq->ip = NULL;
+	}
 
-	spin_lock(&dev->n_cqs_lock);
-	if (dev->n_cqs_allocated == hfi1_max_cqs) {
-		spin_unlock(&dev->n_cqs_lock);
+	spin_lock(&rdi->n_cqs_lock);
+	if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
+		spin_unlock(&rdi->n_cqs_lock);
 		ret = ERR_PTR(-ENOMEM);
 		goto bail_ip;
 	}
 
-	dev->n_cqs_allocated++;
-	spin_unlock(&dev->n_cqs_lock);
+	rdi->n_cqs_allocated++;
+	spin_unlock(&rdi->n_cqs_lock);
 
 	if (cq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
+		spin_lock_irq(&rdi->pending_lock);
+		list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
+		spin_unlock_irq(&rdi->pending_lock);
 	}
 
 	/*
@@ -313,14 +261,11 @@ struct ib_cq *hfi1_create_cq(
 	 * The number of entries should be >= the number requested or return
 	 * an error.
 	 */
-	cq->dd = dd_from_dev(dev);
+	cq->rdi = rdi;
 	cq->ibcq.cqe = entries;
-	cq->notify = IB_CQ_NONE;
-	cq->triggered = 0;
+	cq->notify = RVT_CQ_NONE;
 	spin_lock_init(&cq->lock);
 	init_kthread_work(&cq->comptask, send_complete);
-	wc->head = 0;
-	wc->tail = 0;
 	cq->queue = wc;
 
 	ret = &cq->ibcq;
@@ -338,24 +283,24 @@ done:
 }
 
 /**
- * hfi1_destroy_cq - destroy a completion queue
+ * rvt_destroy_cq - destroy a completion queue
  * @ibcq: the completion queue to destroy.
  *
- * Returns 0 for success.
- *
  * Called by ib_destroy_cq() in the generic verbs code.
+ *
+ * Return: always 0
  */
-int hfi1_destroy_cq(struct ib_cq *ibcq)
+int rvt_destroy_cq(struct ib_cq *ibcq)
 {
-	struct hfi1_ibdev *dev = to_idev(ibcq->device);
-	struct hfi1_cq *cq = to_icq(ibcq);
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+	struct rvt_dev_info *rdi = cq->rdi;
 
 	flush_kthread_work(&cq->comptask);
-	spin_lock(&dev->n_cqs_lock);
-	dev->n_cqs_allocated--;
-	spin_unlock(&dev->n_cqs_lock);
+	spin_lock(&rdi->n_cqs_lock);
+	rdi->n_cqs_allocated--;
+	spin_unlock(&rdi->n_cqs_lock);
 	if (cq->ip)
-		kref_put(&cq->ip->ref, hfi1_release_mmap_info);
+		kref_put(&cq->ip->ref, rvt_release_mmap_info);
 	else
 		vfree(cq->queue);
 	kfree(cq);
@@ -364,18 +309,18 @@ int hfi1_destroy_cq(struct ib_cq *ibcq)
 }
 
 /**
- * hfi1_req_notify_cq - change the notification type for a completion queue
+ * rvt_req_notify_cq - change the notification type for a completion queue
  * @ibcq: the completion queue
  * @notify_flags: the type of notification to request
  *
- * Returns 0 for success.
- *
  * This may be called from interrupt context.  Also called by
  * ib_req_notify_cq() in the generic verbs code.
+ *
+ * Return: 0 for success.
  */
-int hfi1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
+int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
 {
-	struct hfi1_cq *cq = to_icq(ibcq);
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 	unsigned long flags;
 	int ret = 0;
 
@@ -397,24 +342,23 @@ int hfi1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
 }
 
 /**
- * hfi1_resize_cq - change the size of the CQ
+ * rvt_resize_cq - change the size of the CQ
  * @ibcq: the completion queue
  *
- * Returns 0 for success.
+ * Return: 0 for success.
  */
-int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 {
-	struct hfi1_cq *cq = to_icq(ibcq);
-	struct hfi1_cq_wc *old_wc;
-	struct hfi1_cq_wc *wc;
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+	struct rvt_cq_wc *old_wc;
+	struct rvt_cq_wc *wc;
 	u32 head, tail, n;
 	int ret;
 	u32 sz;
+	struct rvt_dev_info *rdi = cq->rdi;
 
-	if (cqe < 1 || cqe > hfi1_max_cqes) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
+		return -EINVAL;
 
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
@@ -425,10 +369,8 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	else
 		sz += sizeof(struct ib_wc) * (cqe + 1);
 	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = -ENOMEM;
-		goto bail;
-	}
+	if (!wc)
+		return -ENOMEM;
 
 	/* Check that we can write the offset to mmap. */
 	if (udata && udata->outlen >= sizeof(__u64)) {
@@ -446,11 +388,11 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	 */
 	old_wc = cq->queue;
 	head = old_wc->head;
-	if (head > (u32) cq->ibcq.cqe)
-		head = (u32) cq->ibcq.cqe;
+	if (head > (u32)cq->ibcq.cqe)
+		head = (u32)cq->ibcq.cqe;
 	tail = old_wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
+	if (tail > (u32)cq->ibcq.cqe)
+		tail = (u32)cq->ibcq.cqe;
 	if (head < tail)
 		n = cq->ibcq.cqe + 1 + head - tail;
 	else
@@ -464,7 +406,7 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 			wc->uqueue[n] = old_wc->uqueue[tail];
 		else
 			wc->kqueue[n] = old_wc->kqueue[tail];
-		if (tail == (u32) cq->ibcq.cqe)
+		if (tail == (u32)cq->ibcq.cqe)
 			tail = 0;
 		else
 			tail++;
@@ -478,80 +420,131 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	vfree(old_wc);
 
 	if (cq->ip) {
-		struct hfi1_ibdev *dev = to_idev(ibcq->device);
-		struct hfi1_mmap_info *ip = cq->ip;
+		struct rvt_mmap_info *ip = cq->ip;
 
-		hfi1_update_mmap_info(dev, ip, sz, wc);
+		rvt_update_mmap_info(rdi, ip, sz, wc);
 
 		/*
 		 * Return the offset to mmap.
-		 * See hfi1_mmap() for details.
+		 * See rvt_mmap() for details.
 		 */
 		if (udata && udata->outlen >= sizeof(__u64)) {
 			ret = ib_copy_to_udata(udata, &ip->offset,
 					       sizeof(ip->offset));
 			if (ret)
-				goto bail;
+				return ret;
 		}
 
-		spin_lock_irq(&dev->pending_lock);
+		spin_lock_irq(&rdi->pending_lock);
 		if (list_empty(&ip->pending_mmaps))
-			list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
+			list_add(&ip->pending_mmaps, &rdi->pending_mmaps);
+		spin_unlock_irq(&rdi->pending_lock);
 	}
 
-	ret = 0;
-	goto bail;
+	return 0;
 
 bail_unlock:
 	spin_unlock_irq(&cq->lock);
 bail_free:
 	vfree(wc);
-bail:
 	return ret;
 }
 
-int hfi1_cq_init(struct hfi1_devdata *dd)
+/**
+ * rvt_poll_cq - poll for work completion entries
+ * @ibcq: the completion queue to poll
+ * @num_entries: the maximum number of entries to return
+ * @entry: pointer to array where work completions are placed
+ *
+ * This may be called from interrupt context.  Also called by ib_poll_cq()
+ * in the generic verbs code.
+ *
+ * Return: the number of completion entries polled.
+ */
+int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+	struct rvt_cq_wc *wc;
+	unsigned long flags;
+	int npolled;
+	u32 tail;
+
+	/* The kernel can only poll a kernel completion queue */
+	if (cq->ip)
+		return -EINVAL;
+
+	spin_lock_irqsave(&cq->lock, flags);
+
+	wc = cq->queue;
+	tail = wc->tail;
+	if (tail > (u32)cq->ibcq.cqe)
+		tail = (u32)cq->ibcq.cqe;
+	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
+		if (tail == wc->head)
+			break;
+		/* The kernel doesn't need a RMB since it has the lock. */
+		*entry = wc->kqueue[tail];
+		if (tail >= cq->ibcq.cqe)
+			tail = 0;
+		else
+			tail++;
+	}
+	wc->tail = tail;
+
+	spin_unlock_irqrestore(&cq->lock, flags);
+
+	return npolled;
+}
+
+/**
+ * rvt_driver_cq_init - Init cq resources on behalf of driver
+ * @rdi: rvt dev structure
+ *
+ * Return: 0 on success
+ */
+int rvt_driver_cq_init(struct rvt_dev_info *rdi)
 {
 	int ret = 0;
 	int cpu;
 	struct task_struct *task;
 
-	if (dd->worker)
+	if (rdi->worker)
 		return 0;
-	dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
-	if (!dd->worker)
+	rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
+	if (!rdi->worker)
 		return -ENOMEM;
-	init_kthread_worker(dd->worker);
+	init_kthread_worker(rdi->worker);
 	task = kthread_create_on_node(
 		kthread_worker_fn,
-		dd->worker,
-		dd->assigned_node_id,
-		"hfi1_cq%d", dd->unit);
-	if (IS_ERR(task))
-		goto task_fail;
-	cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
+		rdi->worker,
+		rdi->dparms.node,
+		"%s", rdi->dparms.cq_name);
+	if (IS_ERR(task)) {
+		kfree(rdi->worker);
+		rdi->worker = NULL;
+		return PTR_ERR(task);
+	}
+
+	cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
 	kthread_bind(task, cpu);
 	wake_up_process(task);
-out:
 	return ret;
-task_fail:
-	ret = PTR_ERR(task);
-	kfree(dd->worker);
-	dd->worker = NULL;
-	goto out;
 }
 
-void hfi1_cq_exit(struct hfi1_devdata *dd)
+/**
+ * rvt_cq_exit - tear down cq reources
+ * @rdi: rvt dev structure
+ */
+void rvt_cq_exit(struct rvt_dev_info *rdi)
 {
 	struct kthread_worker *worker;
 
-	worker = dd->worker;
+	worker = rdi->worker;
 	if (!worker)
 		return;
 	/* blocks future queuing from send_complete() */
-	dd->worker = NULL;
-	smp_wmb(); /* See hfi1_cq_enter */
+	rdi->worker = NULL;
+	smp_wmb(); /* See rdi_cq_enter */
 	flush_kthread_worker(worker);
 	kthread_stop(worker->task);
 	kfree(worker);
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
new file mode 100644
index 000000000000..6182c29eff66
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -0,0 +1,64 @@
+#ifndef DEF_RVTCQ_H
+#define DEF_RVTCQ_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_cq.h>
+
+struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
+			    const struct ib_cq_init_attr *attr,
+			    struct ib_ucontext *context,
+			    struct ib_udata *udata);
+int rvt_destroy_cq(struct ib_cq *ibcq);
+int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
+int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
+int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+int rvt_driver_cq_init(struct rvt_dev_info *rdi);
+void rvt_cq_exit(struct rvt_dev_info *rdi);
+#endif          /* DEF_RVTCQ_H */
diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c
new file mode 100644
index 000000000000..33076a5eee2f
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/dma.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/types.h>
+#include <linux/scatterlist.h>
+#include <rdma/ib_verbs.h>
+
+#include "dma.h"
+
+#define BAD_DMA_ADDRESS ((u64)0)
+
+/*
+ * The following functions implement driver specific replacements
+ * for the ib_dma_*() functions.
+ *
+ * These functions return kernel virtual addresses instead of
+ * device bus addresses since the driver uses the CPU to copy
+ * data instead of using hardware DMA.
+ */
+
+static int rvt_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+	return dma_addr == BAD_DMA_ADDRESS;
+}
+
+static u64 rvt_dma_map_single(struct ib_device *dev, void *cpu_addr,
+			      size_t size, enum dma_data_direction direction)
+{
+	if (WARN_ON(!valid_dma_direction(direction)))
+		return BAD_DMA_ADDRESS;
+
+	return (u64)cpu_addr;
+}
+
+static void rvt_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
+				 enum dma_data_direction direction)
+{
+	/* This is a stub, nothing to be done here */
+}
+
+static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
+			    unsigned long offset, size_t size,
+			    enum dma_data_direction direction)
+{
+	u64 addr;
+
+	if (WARN_ON(!valid_dma_direction(direction)))
+		return BAD_DMA_ADDRESS;
+
+	if (offset + size > PAGE_SIZE)
+		return BAD_DMA_ADDRESS;
+
+	addr = (u64)page_address(page);
+	if (addr)
+		addr += offset;
+
+	return addr;
+}
+
+static void rvt_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
+			       enum dma_data_direction direction)
+{
+	/* This is a stub, nothing to be done here */
+}
+
+static int rvt_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+		      int nents, enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	u64 addr;
+	int i;
+	int ret = nents;
+
+	if (WARN_ON(!valid_dma_direction(direction)))
+		return 0;
+
+	for_each_sg(sgl, sg, nents, i) {
+		addr = (u64)page_address(sg_page(sg));
+		if (!addr) {
+			ret = 0;
+			break;
+		}
+		sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+		sg->dma_length = sg->length;
+#endif
+	}
+	return ret;
+}
+
+static void rvt_unmap_sg(struct ib_device *dev,
+			 struct scatterlist *sg, int nents,
+			 enum dma_data_direction direction)
+{
+	/* This is a stub, nothing to be done here */
+}
+
+static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr,
+				    size_t size, enum dma_data_direction dir)
+{
+}
+
+static void rvt_sync_single_for_device(struct ib_device *dev, u64 addr,
+				       size_t size,
+				       enum dma_data_direction dir)
+{
+}
+
+static void *rvt_dma_alloc_coherent(struct ib_device *dev, size_t size,
+				    u64 *dma_handle, gfp_t flag)
+{
+	struct page *p;
+	void *addr = NULL;
+
+	p = alloc_pages(flag, get_order(size));
+	if (p)
+		addr = page_address(p);
+	if (dma_handle)
+		*dma_handle = (u64)addr;
+	return addr;
+}
+
+static void rvt_dma_free_coherent(struct ib_device *dev, size_t size,
+				  void *cpu_addr, u64 dma_handle)
+{
+	free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = {
+	.mapping_error = rvt_mapping_error,
+	.map_single = rvt_dma_map_single,
+	.unmap_single = rvt_dma_unmap_single,
+	.map_page = rvt_dma_map_page,
+	.unmap_page = rvt_dma_unmap_page,
+	.map_sg = rvt_map_sg,
+	.unmap_sg = rvt_unmap_sg,
+	.sync_single_for_cpu = rvt_sync_single_for_cpu,
+	.sync_single_for_device = rvt_sync_single_for_device,
+	.alloc_coherent = rvt_dma_alloc_coherent,
+	.free_coherent = rvt_dma_free_coherent
+};
diff --git a/drivers/infiniband/sw/rdmavt/dma.h b/drivers/infiniband/sw/rdmavt/dma.h
new file mode 100644
index 000000000000..979f07e09195
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/dma.h
@@ -0,0 +1,53 @@
+#ifndef DEF_RDMAVTDMA_H
+#define DEF_RDMAVTDMA_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+extern struct ib_dma_mapping_ops rvt_default_dma_mapping_ops;
+
+#endif          /* DEF_RDMAVTDMA_H */
diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c
new file mode 100644
index 000000000000..f6e99778d7ca
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mad.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/ib_mad.h>
+#include "mad.h"
+#include "vt.h"
+
+/**
+ * rvt_process_mad - process an incoming MAD packet
+ * @ibdev: the infiniband device this packet came in on
+ * @mad_flags: MAD flags
+ * @port_num: the port number this packet came in on, 1 based from ib core
+ * @in_wc: the work completion entry for this packet
+ * @in_grh: the global route header for this packet
+ * @in_mad: the incoming MAD
+ * @out_mad: any outgoing MAD reply
+ *
+ * Note that the verbs framework has already done the MAD sanity checks,
+ * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
+ * MADs.
+ *
+ * This is called by the ib_mad module.
+ *
+ * Return: IB_MAD_RESULT_SUCCESS or error
+ */
+int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		    const struct ib_mad_hdr *in, size_t in_mad_size,
+		    struct ib_mad_hdr *out, size_t *out_mad_size,
+		    u16 *out_mad_pkey_index)
+{
+	/*
+	 * MAD processing is quite different between hfi1 and qib. Therfore this
+	 * is expected to be provided by the driver. Other drivers in the future
+	 * may chose to implement this but it should not be made into a
+	 * requirement.
+	 */
+	if (ibport_num_to_idx(ibdev, port_num) < 0)
+		return -EINVAL;
+
+	return IB_MAD_RESULT_FAILURE;
+}
+
+static void rvt_send_mad_handler(struct ib_mad_agent *agent,
+				 struct ib_mad_send_wc *mad_send_wc)
+{
+	ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+/**
+ * rvt_create_mad_agents - create mad agents
+ * @rdi: rvt dev struct
+ *
+ * If driver needs to be notified of mad agent creation then call back
+ *
+ * Return 0 on success
+ */
+int rvt_create_mad_agents(struct rvt_dev_info *rdi)
+{
+	struct ib_mad_agent *agent;
+	struct rvt_ibport *rvp;
+	int p;
+	int ret;
+
+	for (p = 0; p < rdi->dparms.nports; p++) {
+		rvp = rdi->ports[p];
+		agent = ib_register_mad_agent(&rdi->ibdev, p + 1,
+					      IB_QPT_SMI,
+					      NULL, 0, rvt_send_mad_handler,
+					      NULL, NULL, 0);
+		if (IS_ERR(agent)) {
+			ret = PTR_ERR(agent);
+			goto err;
+		}
+
+		rvp->send_agent = agent;
+
+		if (rdi->driver_f.notify_create_mad_agent)
+			rdi->driver_f.notify_create_mad_agent(rdi, p);
+	}
+
+	return 0;
+
+err:
+	for (p = 0; p < rdi->dparms.nports; p++) {
+		rvp = rdi->ports[p];
+		if (rvp->send_agent) {
+			agent = rvp->send_agent;
+			rvp->send_agent = NULL;
+			ib_unregister_mad_agent(agent);
+			if (rdi->driver_f.notify_free_mad_agent)
+				rdi->driver_f.notify_free_mad_agent(rdi, p);
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * rvt_free_mad_agents - free up mad agents
+ * @rdi: rvt dev struct
+ *
+ * If driver needs notification of mad agent removal make the call back
+ */
+void rvt_free_mad_agents(struct rvt_dev_info *rdi)
+{
+	struct ib_mad_agent *agent;
+	struct rvt_ibport *rvp;
+	int p;
+
+	for (p = 0; p < rdi->dparms.nports; p++) {
+		rvp = rdi->ports[p];
+		if (rvp->send_agent) {
+			agent = rvp->send_agent;
+			rvp->send_agent = NULL;
+			ib_unregister_mad_agent(agent);
+		}
+		if (rvp->sm_ah) {
+			ib_destroy_ah(&rvp->sm_ah->ibah);
+			rvp->sm_ah = NULL;
+		}
+
+		if (rdi->driver_f.notify_free_mad_agent)
+			rdi->driver_f.notify_free_mad_agent(rdi, p);
+	}
+}
+
diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h
new file mode 100644
index 000000000000..a9d6eecc3723
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mad.h
@@ -0,0 +1,60 @@
+#ifndef DEF_RVTMAD_H
+#define DEF_RVTMAD_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		    const struct ib_mad_hdr *in, size_t in_mad_size,
+		    struct ib_mad_hdr *out, size_t *out_mad_size,
+		    u16 *out_mad_pkey_index);
+int rvt_create_mad_agents(struct rvt_dev_info *rdi);
+void rvt_free_mad_agents(struct rvt_dev_info *rdi);
+#endif          /* DEF_RVTMAD_H */
diff --git a/drivers/staging/rdma/hfi1/verbs_mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index afc6b4c61a1d..983d319ac976 100644
--- a/drivers/staging/rdma/hfi1/verbs_mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -48,17 +45,36 @@
  *
  */
 
+#include <linux/slab.h>
+#include <linux/sched.h>
 #include <linux/rculist.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
 
-#include "hfi.h"
+#include "mcast.h"
+
+/**
+ * rvt_driver_mcast - init resources for multicast
+ * @rdi: rvt dev struct
+ *
+ * This is per device that registers with rdmavt
+ */
+void rvt_driver_mcast_init(struct rvt_dev_info *rdi)
+{
+	/*
+	 * Anything that needs setup for multicast on a per driver or per rdi
+	 * basis should be done in here.
+	 */
+	spin_lock_init(&rdi->n_mcast_grps_lock);
+}
 
 /**
  * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
  * @qp: the QP to link
  */
-static struct hfi1_mcast_qp *mcast_qp_alloc(struct hfi1_qp *qp)
+static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp)
 {
-	struct hfi1_mcast_qp *mqp;
+	struct rvt_mcast_qp *mqp;
 
 	mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
 	if (!mqp)
@@ -71,9 +87,9 @@ bail:
 	return mqp;
 }
 
-static void mcast_qp_free(struct hfi1_mcast_qp *mqp)
+static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
 {
-	struct hfi1_qp *qp = mqp->qp;
+	struct rvt_qp *qp = mqp->qp;
 
 	/* Notify hfi1_destroy_qp() if it is waiting. */
 	if (atomic_dec_and_test(&qp->refcount))
@@ -88,11 +104,11 @@ static void mcast_qp_free(struct hfi1_mcast_qp *mqp)
  *
  * A list of QPs will be attached to this structure.
  */
-static struct hfi1_mcast *mcast_alloc(union ib_gid *mgid)
+static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid)
 {
-	struct hfi1_mcast *mcast;
+	struct rvt_mcast *mcast;
 
-	mcast = kmalloc(sizeof(*mcast), GFP_KERNEL);
+	mcast = kzalloc(sizeof(*mcast), GFP_KERNEL);
 	if (!mcast)
 		goto bail;
 
@@ -100,75 +116,72 @@ static struct hfi1_mcast *mcast_alloc(union ib_gid *mgid)
 	INIT_LIST_HEAD(&mcast->qp_list);
 	init_waitqueue_head(&mcast->wait);
 	atomic_set(&mcast->refcount, 0);
-	mcast->n_attached = 0;
 
 bail:
 	return mcast;
 }
 
-static void mcast_free(struct hfi1_mcast *mcast)
+static void rvt_mcast_free(struct rvt_mcast *mcast)
 {
-	struct hfi1_mcast_qp *p, *tmp;
+	struct rvt_mcast_qp *p, *tmp;
 
 	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-		mcast_qp_free(p);
+		rvt_mcast_qp_free(p);
 
 	kfree(mcast);
 }
 
 /**
- * hfi1_mcast_find - search the global table for the given multicast GID
+ * rvt_mcast_find - search the global table for the given multicast GID
  * @ibp: the IB port structure
  * @mgid: the multicast GID to search for
  *
- * Returns NULL if not found.
- *
  * The caller is responsible for decrementing the reference count if found.
+ *
+ * Return: NULL if not found.
  */
-struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid)
+struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid)
 {
 	struct rb_node *n;
 	unsigned long flags;
-	struct hfi1_mcast *mcast;
+	struct rvt_mcast *found = NULL;
 
 	spin_lock_irqsave(&ibp->lock, flags);
 	n = ibp->mcast_tree.rb_node;
 	while (n) {
 		int ret;
+		struct rvt_mcast *mcast;
 
-		mcast = rb_entry(n, struct hfi1_mcast, rb_node);
+		mcast = rb_entry(n, struct rvt_mcast, rb_node);
 
 		ret = memcmp(mgid->raw, mcast->mgid.raw,
 			     sizeof(union ib_gid));
-		if (ret < 0)
+		if (ret < 0) {
 			n = n->rb_left;
-		else if (ret > 0)
+		} else if (ret > 0) {
 			n = n->rb_right;
-		else {
+		} else {
 			atomic_inc(&mcast->refcount);
-			spin_unlock_irqrestore(&ibp->lock, flags);
-			goto bail;
+			found = mcast;
+			break;
 		}
 	}
 	spin_unlock_irqrestore(&ibp->lock, flags);
-
-	mcast = NULL;
-
-bail:
-	return mcast;
+	return found;
 }
+EXPORT_SYMBOL(rvt_mcast_find);
 
 /**
  * mcast_add - insert mcast GID into table and attach QP struct
  * @mcast: the mcast GID table
  * @mqp: the QP to attach
  *
- * Return zero if both were added.  Return EEXIST if the GID was already in
+ * Return: zero if both were added.  Return EEXIST if the GID was already in
  * the table but the QP was added.  Return ESRCH if the QP was already
  * attached and neither structure was added.
  */
-static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
-		     struct hfi1_mcast *mcast, struct hfi1_mcast_qp *mqp)
+static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
+			 struct rvt_mcast *mcast, struct rvt_mcast_qp *mqp)
 {
 	struct rb_node **n = &ibp->mcast_tree.rb_node;
 	struct rb_node *pn = NULL;
@@ -177,11 +190,11 @@ static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
 	spin_lock_irq(&ibp->lock);
 
 	while (*n) {
-		struct hfi1_mcast *tmcast;
-		struct hfi1_mcast_qp *p;
+		struct rvt_mcast *tmcast;
+		struct rvt_mcast_qp *p;
 
 		pn = *n;
-		tmcast = rb_entry(pn, struct hfi1_mcast, rb_node);
+		tmcast = rb_entry(pn, struct rvt_mcast, rb_node);
 
 		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
 			     sizeof(union ib_gid));
@@ -201,7 +214,8 @@ static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
 				goto bail;
 			}
 		}
-		if (tmcast->n_attached == hfi1_max_mcast_qp_attached) {
+		if (tmcast->n_attached ==
+		    rdi->dparms.props.max_mcast_qp_attach) {
 			ret = ENOMEM;
 			goto bail;
 		}
@@ -213,15 +227,15 @@ static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
 		goto bail;
 	}
 
-	spin_lock(&dev->n_mcast_grps_lock);
-	if (dev->n_mcast_grps_allocated == hfi1_max_mcast_grps) {
-		spin_unlock(&dev->n_mcast_grps_lock);
+	spin_lock(&rdi->n_mcast_grps_lock);
+	if (rdi->n_mcast_grps_allocated == rdi->dparms.props.max_mcast_grp) {
+		spin_unlock(&rdi->n_mcast_grps_lock);
 		ret = ENOMEM;
 		goto bail;
 	}
 
-	dev->n_mcast_grps_allocated++;
-	spin_unlock(&dev->n_mcast_grps_lock);
+	rdi->n_mcast_grps_allocated++;
+	spin_unlock(&rdi->n_mcast_grps_lock);
 
 	mcast->n_attached++;
 
@@ -239,92 +253,98 @@ bail:
 	return ret;
 }
 
-int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+/**
+ * rvt_attach_mcast - attach a qp to a multicast group
+ * @ibqp: Infiniband qp
+ * @igd: multicast guid
+ * @lid: multicast lid
+ *
+ * Return: 0 on success
+ */
+int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-	struct hfi1_ibport *ibp;
-	struct hfi1_mcast *mcast;
-	struct hfi1_mcast_qp *mqp;
-	int ret;
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+	struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1];
+	struct rvt_mcast *mcast;
+	struct rvt_mcast_qp *mqp;
+	int ret = -ENOMEM;
 
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+		return -EINVAL;
 
 	/*
 	 * Allocate data structures since its better to do this outside of
 	 * spin locks and it will most likely be needed.
 	 */
-	mcast = mcast_alloc(gid);
-	if (mcast == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	mqp = mcast_qp_alloc(qp);
-	if (mqp == NULL) {
-		mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-	}
-	ibp = to_iport(ibqp->device, qp->port_num);
-	switch (mcast_add(dev, ibp, mcast, mqp)) {
-	case ESRCH:
-		/* Neither was used: OK to attach the same QP twice. */
-		mcast_qp_free(mqp);
-		mcast_free(mcast);
-		break;
+	mcast = rvt_mcast_alloc(gid);
+	if (!mcast)
+		return -ENOMEM;
 
-	case EEXIST:            /* The mcast wasn't used */
-		mcast_free(mcast);
-		break;
+	mqp = rvt_mcast_qp_alloc(qp);
+	if (!mqp)
+		goto bail_mcast;
 
+	switch (rvt_mcast_add(rdi, ibp, mcast, mqp)) {
+	case ESRCH:
+		/* Neither was used: OK to attach the same QP twice. */
+		ret = 0;
+		goto bail_mqp;
+	case EEXIST: /* The mcast wasn't used */
+		ret = 0;
+		goto bail_mcast;
 	case ENOMEM:
 		/* Exceeded the maximum number of mcast groups. */
-		mcast_qp_free(mqp);
-		mcast_free(mcast);
 		ret = -ENOMEM;
-		goto bail;
-
+		goto bail_mqp;
 	default:
 		break;
 	}
 
-	ret = 0;
+	return 0;
+
+bail_mqp:
+	rvt_mcast_qp_free(mqp);
+
+bail_mcast:
+	rvt_mcast_free(mcast);
 
-bail:
 	return ret;
 }
 
-int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+/**
+ * rvt_detach_mcast - remove a qp from a multicast group
+ * @ibqp: Infiniband qp
+ * @igd: multicast guid
+ * @lid: multicast lid
+ *
+ * Return: 0 on success
+ */
+int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-	struct hfi1_ibport *ibp = to_iport(ibqp->device, qp->port_num);
-	struct hfi1_mcast *mcast = NULL;
-	struct hfi1_mcast_qp *p, *tmp;
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+	struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1];
+	struct rvt_mcast *mcast = NULL;
+	struct rvt_mcast_qp *p, *tmp, *delp = NULL;
 	struct rb_node *n;
 	int last = 0;
-	int ret;
+	int ret = 0;
 
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+		return -EINVAL;
 
 	spin_lock_irq(&ibp->lock);
 
 	/* Find the GID in the mcast table. */
 	n = ibp->mcast_tree.rb_node;
 	while (1) {
-		if (n == NULL) {
+		if (!n) {
 			spin_unlock_irq(&ibp->lock);
-			ret = -EINVAL;
-			goto bail;
+			return -EINVAL;
 		}
 
-		mcast = rb_entry(n, struct hfi1_mcast, rb_node);
+		mcast = rb_entry(n, struct rvt_mcast, rb_node);
 		ret = memcmp(gid->raw, mcast->mgid.raw,
 			     sizeof(union ib_gid));
 		if (ret < 0)
@@ -345,6 +365,7 @@ int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		 */
 		list_del_rcu(&p->list);
 		mcast->n_attached--;
+		delp = p;
 
 		/* If this was the last attached QP, remove the GID too. */
 		if (list_empty(&mcast->qp_list)) {
@@ -355,31 +376,42 @@ int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	}
 
 	spin_unlock_irq(&ibp->lock);
+	/* QP not attached */
+	if (!delp)
+		return -EINVAL;
+
+	/*
+	 * Wait for any list walkers to finish before freeing the
+	 * list element.
+	 */
+	wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+	rvt_mcast_qp_free(delp);
 
-	if (p) {
-		/*
-		 * Wait for any list walkers to finish before freeing the
-		 * list element.
-		 */
-		wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-		mcast_qp_free(p);
-	}
 	if (last) {
 		atomic_dec(&mcast->refcount);
 		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-		mcast_free(mcast);
-		spin_lock_irq(&dev->n_mcast_grps_lock);
-		dev->n_mcast_grps_allocated--;
-		spin_unlock_irq(&dev->n_mcast_grps_lock);
+		rvt_mcast_free(mcast);
+		spin_lock_irq(&rdi->n_mcast_grps_lock);
+		rdi->n_mcast_grps_allocated--;
+		spin_unlock_irq(&rdi->n_mcast_grps_lock);
 	}
 
-	ret = 0;
-
-bail:
-	return ret;
+	return 0;
 }
 
-int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp)
+/**
+ *rvt_mast_tree_empty - determine if any qps are attached to any mcast group
+ *@rdi: rvt dev struct
+ *
+ * Return: in use count
+ */
+int rvt_mcast_tree_empty(struct rvt_dev_info *rdi)
 {
-	return ibp->mcast_tree.rb_node == NULL;
+	int i;
+	int in_use = 0;
+
+	for (i = 0; i < rdi->dparms.nports; i++)
+		if (rdi->ports[i]->mcast_tree.rb_node)
+			in_use++;
+	return in_use;
 }
diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h
new file mode 100644
index 000000000000..29f579267608
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mcast.h
@@ -0,0 +1,58 @@
+#ifndef DEF_RVTMCAST_H
+#define DEF_RVTMCAST_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+void rvt_driver_mcast_init(struct rvt_dev_info *rdi);
+int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+int rvt_mcast_tree_empty(struct rvt_dev_info *rdi);
+
+#endif          /* DEF_RVTMCAST_H */
diff --git a/drivers/staging/rdma/hfi1/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c
index 5173b1c60b3d..e202b8142759 100644
--- a/drivers/staging/rdma/hfi1/mmap.c
+++ b/drivers/infiniband/sw/rdmavt/mmap.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -48,68 +45,74 @@
  *
  */
 
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
-#include <linux/errno.h>
 #include <asm/pgtable.h>
+#include "mmap.h"
 
-#include "verbs.h"
+/**
+ * rvt_mmap_init - init link list and lock for mem map
+ * @rdi: rvt dev struct
+ */
+void rvt_mmap_init(struct rvt_dev_info *rdi)
+{
+	INIT_LIST_HEAD(&rdi->pending_mmaps);
+	spin_lock_init(&rdi->pending_lock);
+	rdi->mmap_offset = PAGE_SIZE;
+	spin_lock_init(&rdi->mmap_offset_lock);
+}
 
 /**
- * hfi1_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct hfi1_mmap_info
+ * rvt_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct rvt_mmap_info
  */
-void hfi1_release_mmap_info(struct kref *ref)
+void rvt_release_mmap_info(struct kref *ref)
 {
-	struct hfi1_mmap_info *ip =
-		container_of(ref, struct hfi1_mmap_info, ref);
-	struct hfi1_ibdev *dev = to_idev(ip->context->device);
+	struct rvt_mmap_info *ip =
+		container_of(ref, struct rvt_mmap_info, ref);
+	struct rvt_dev_info *rdi = ib_to_rvt(ip->context->device);
 
-	spin_lock_irq(&dev->pending_lock);
+	spin_lock_irq(&rdi->pending_lock);
 	list_del(&ip->pending_mmaps);
-	spin_unlock_irq(&dev->pending_lock);
+	spin_unlock_irq(&rdi->pending_lock);
 
 	vfree(ip->obj);
 	kfree(ip);
 }
 
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void hfi1_vma_open(struct vm_area_struct *vma)
+static void rvt_vma_open(struct vm_area_struct *vma)
 {
-	struct hfi1_mmap_info *ip = vma->vm_private_data;
+	struct rvt_mmap_info *ip = vma->vm_private_data;
 
 	kref_get(&ip->ref);
 }
 
-static void hfi1_vma_close(struct vm_area_struct *vma)
+static void rvt_vma_close(struct vm_area_struct *vma)
 {
-	struct hfi1_mmap_info *ip = vma->vm_private_data;
+	struct rvt_mmap_info *ip = vma->vm_private_data;
 
-	kref_put(&ip->ref, hfi1_release_mmap_info);
+	kref_put(&ip->ref, rvt_release_mmap_info);
 }
 
-static struct vm_operations_struct hfi1_vm_ops = {
-	.open =     hfi1_vma_open,
-	.close =    hfi1_vma_close,
+static const struct vm_operations_struct rvt_vm_ops = {
+	.open = rvt_vma_open,
+	.close = rvt_vma_close,
 };
 
 /**
- * hfi1_mmap - create a new mmap region
+ * rvt_mmap - create a new mmap region
  * @context: the IB user context of the process making the mmap() call
  * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
+ *
+ * Return: zero if the mmap is OK. Otherwise, return an errno.
  */
-int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
-	struct hfi1_ibdev *dev = to_idev(context->device);
+	struct rvt_dev_info *rdi = ib_to_rvt(context->device);
 	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
 	unsigned long size = vma->vm_end - vma->vm_start;
-	struct hfi1_mmap_info *ip, *pp;
+	struct rvt_mmap_info *ip, *pp;
 	int ret = -EINVAL;
 
 	/*
@@ -117,53 +120,60 @@ int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	 * Normally, this list is very short since a call to create a
 	 * CQ, QP, or SRQ is soon followed by a call to mmap().
 	 */
-	spin_lock_irq(&dev->pending_lock);
-	list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
+	spin_lock_irq(&rdi->pending_lock);
+	list_for_each_entry_safe(ip, pp, &rdi->pending_mmaps,
 				 pending_mmaps) {
 		/* Only the creator is allowed to mmap the object */
-		if (context != ip->context || (__u64) offset != ip->offset)
+		if (context != ip->context || (__u64)offset != ip->offset)
 			continue;
 		/* Don't allow a mmap larger than the object. */
 		if (size > ip->size)
 			break;
 
 		list_del_init(&ip->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
+		spin_unlock_irq(&rdi->pending_lock);
 
 		ret = remap_vmalloc_range(vma, ip->obj, 0);
 		if (ret)
 			goto done;
-		vma->vm_ops = &hfi1_vm_ops;
+		vma->vm_ops = &rvt_vm_ops;
 		vma->vm_private_data = ip;
-		hfi1_vma_open(vma);
+		rvt_vma_open(vma);
 		goto done;
 	}
-	spin_unlock_irq(&dev->pending_lock);
+	spin_unlock_irq(&rdi->pending_lock);
 done:
 	return ret;
 }
 
-/*
- * Allocate information for hfi1_mmap
+/**
+ * rvt_create_mmap_info - allocate information for hfi1_mmap
+ * @rdi: rvt dev struct
+ * @size: size in bytes to map
+ * @context: user context
+ * @obj: opaque pointer to a cq, wq etc
+ *
+ * Return: rvt_mmap struct on success
  */
-struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev,
-					     u32 size,
-					     struct ib_ucontext *context,
-					     void *obj) {
-	struct hfi1_mmap_info *ip;
+struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi,
+					   u32 size,
+					   struct ib_ucontext *context,
+					   void *obj)
+{
+	struct rvt_mmap_info *ip;
 
-	ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+	ip = kmalloc_node(sizeof(*ip), GFP_KERNEL, rdi->dparms.node);
 	if (!ip)
-		goto bail;
+		return ip;
 
 	size = PAGE_ALIGN(size);
 
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
+	spin_lock_irq(&rdi->mmap_offset_lock);
+	if (rdi->mmap_offset == 0)
+		rdi->mmap_offset = PAGE_SIZE;
+	ip->offset = rdi->mmap_offset;
+	rdi->mmap_offset += size;
+	spin_unlock_irq(&rdi->mmap_offset_lock);
 
 	INIT_LIST_HEAD(&ip->pending_mmaps);
 	ip->size = size;
@@ -171,21 +181,27 @@ struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev,
 	ip->obj = obj;
 	kref_init(&ip->ref);
 
-bail:
 	return ip;
 }
 
-void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip,
-			   u32 size, void *obj)
+/**
+ * rvt_update_mmap_info - update a mem map
+ * @rdi: rvt dev struct
+ * @ip: mmap info pointer
+ * @size: size to grow by
+ * @obj: opaque pointer to cq, wq, etc.
+ */
+void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip,
+			  u32 size, void *obj)
 {
 	size = PAGE_ALIGN(size);
 
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
+	spin_lock_irq(&rdi->mmap_offset_lock);
+	if (rdi->mmap_offset == 0)
+		rdi->mmap_offset = PAGE_SIZE;
+	ip->offset = rdi->mmap_offset;
+	rdi->mmap_offset += size;
+	spin_unlock_irq(&rdi->mmap_offset_lock);
 
 	ip->size = size;
 	ip->obj = obj;
diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h
new file mode 100644
index 000000000000..fab0e7b1daf9
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mmap.h
@@ -0,0 +1,63 @@
+#ifndef DEF_RDMAVTMMAP_H
+#define DEF_RDMAVTMMAP_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+void rvt_mmap_init(struct rvt_dev_info *rdi);
+void rvt_release_mmap_info(struct kref *ref);
+int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi,
+					   u32 size,
+					   struct ib_ucontext *context,
+					   void *obj);
+void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip,
+			  u32 size, void *obj);
+
+#endif          /* DEF_RDMAVTMMAP_H */
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
new file mode 100644
index 000000000000..0ff765bfd619
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -0,0 +1,830 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <rdma/ib_umem.h>
+#include <rdma/rdma_vt.h>
+#include "vt.h"
+#include "mr.h"
+
+/**
+ * rvt_driver_mr_init - Init MR resources per driver
+ * @rdi: rvt dev struct
+ *
+ * Do any intilization needed when a driver registers with rdmavt.
+ *
+ * Return: 0 on success or errno on failure
+ */
+int rvt_driver_mr_init(struct rvt_dev_info *rdi)
+{
+	unsigned int lkey_table_size = rdi->dparms.lkey_table_size;
+	unsigned lk_tab_size;
+	int i;
+
+	/*
+	 * The top hfi1_lkey_table_size bits are used to index the
+	 * table.  The lower 8 bits can be owned by the user (copied from
+	 * the LKEY).  The remaining bits act as a generation number or tag.
+	 */
+	if (!lkey_table_size)
+		return -EINVAL;
+
+	spin_lock_init(&rdi->lkey_table.lock);
+
+	/* ensure generation is at least 4 bits */
+	if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) {
+		rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n",
+			    lkey_table_size, RVT_MAX_LKEY_TABLE_BITS);
+		rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS;
+		lkey_table_size = rdi->dparms.lkey_table_size;
+	}
+	rdi->lkey_table.max = 1 << lkey_table_size;
+	lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
+	rdi->lkey_table.table = (struct rvt_mregion __rcu **)
+			       vmalloc_node(lk_tab_size, rdi->dparms.node);
+	if (!rdi->lkey_table.table)
+		return -ENOMEM;
+
+	RCU_INIT_POINTER(rdi->dma_mr, NULL);
+	for (i = 0; i < rdi->lkey_table.max; i++)
+		RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL);
+
+	return 0;
+}
+
+/**
+ *rvt_mr_exit: clean up MR
+ *@rdi: rvt dev structure
+ *
+ * called when drivers have unregistered or perhaps failed to register with us
+ */
+void rvt_mr_exit(struct rvt_dev_info *rdi)
+{
+	if (rdi->dma_mr)
+		rvt_pr_err(rdi, "DMA MR not null!\n");
+
+	vfree(rdi->lkey_table.table);
+}
+
+static void rvt_deinit_mregion(struct rvt_mregion *mr)
+{
+	int i = mr->mapsz;
+
+	mr->mapsz = 0;
+	while (i)
+		kfree(mr->map[--i]);
+}
+
+static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
+			    int count)
+{
+	int m, i = 0;
+
+	mr->mapsz = 0;
+	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
+	for (; i < m; i++) {
+		mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
+		if (!mr->map[i]) {
+			rvt_deinit_mregion(mr);
+			return -ENOMEM;
+		}
+		mr->mapsz++;
+	}
+	init_completion(&mr->comp);
+	/* count returning the ptr to user */
+	atomic_set(&mr->refcount, 1);
+	mr->pd = pd;
+	mr->max_segs = count;
+	return 0;
+}
+
+/**
+ * rvt_alloc_lkey - allocate an lkey
+ * @mr: memory region that this lkey protects
+ * @dma_region: 0->normal key, 1->restricted DMA key
+ *
+ * Returns 0 if successful, otherwise returns -errno.
+ *
+ * Increments mr reference count as required.
+ *
+ * Sets the lkey field mr for non-dma regions.
+ *
+ */
+static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
+{
+	unsigned long flags;
+	u32 r;
+	u32 n;
+	int ret = 0;
+	struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
+	struct rvt_lkey_table *rkt = &dev->lkey_table;
+
+	rvt_get_mr(mr);
+	spin_lock_irqsave(&rkt->lock, flags);
+
+	/* special case for dma_mr lkey == 0 */
+	if (dma_region) {
+		struct rvt_mregion *tmr;
+
+		tmr = rcu_access_pointer(dev->dma_mr);
+		if (!tmr) {
+			rcu_assign_pointer(dev->dma_mr, mr);
+			mr->lkey_published = 1;
+		} else {
+			rvt_put_mr(mr);
+		}
+		goto success;
+	}
+
+	/* Find the next available LKEY */
+	r = rkt->next;
+	n = r;
+	for (;;) {
+		if (!rcu_access_pointer(rkt->table[r]))
+			break;
+		r = (r + 1) & (rkt->max - 1);
+		if (r == n)
+			goto bail;
+	}
+	rkt->next = (r + 1) & (rkt->max - 1);
+	/*
+	 * Make sure lkey is never zero which is reserved to indicate an
+	 * unrestricted LKEY.
+	 */
+	rkt->gen++;
+	/*
+	 * bits are capped to ensure enough bits for generation number
+	 */
+	mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) |
+		((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen)
+		 << 8);
+	if (mr->lkey == 0) {
+		mr->lkey |= 1 << 8;
+		rkt->gen++;
+	}
+	rcu_assign_pointer(rkt->table[r], mr);
+	mr->lkey_published = 1;
+success:
+	spin_unlock_irqrestore(&rkt->lock, flags);
+out:
+	return ret;
+bail:
+	rvt_put_mr(mr);
+	spin_unlock_irqrestore(&rkt->lock, flags);
+	ret = -ENOMEM;
+	goto out;
+}
+
+/**
+ * rvt_free_lkey - free an lkey
+ * @mr: mr to free from tables
+ */
+static void rvt_free_lkey(struct rvt_mregion *mr)
+{
+	unsigned long flags;
+	u32 lkey = mr->lkey;
+	u32 r;
+	struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
+	struct rvt_lkey_table *rkt = &dev->lkey_table;
+	int freed = 0;
+
+	spin_lock_irqsave(&rkt->lock, flags);
+	if (!mr->lkey_published)
+		goto out;
+	if (lkey == 0) {
+		RCU_INIT_POINTER(dev->dma_mr, NULL);
+	} else {
+		r = lkey >> (32 - dev->dparms.lkey_table_size);
+		RCU_INIT_POINTER(rkt->table[r], NULL);
+	}
+	mr->lkey_published = 0;
+	freed++;
+out:
+	spin_unlock_irqrestore(&rkt->lock, flags);
+	if (freed) {
+		synchronize_rcu();
+		rvt_put_mr(mr);
+	}
+}
+
+static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
+{
+	struct rvt_mr *mr;
+	int rval = -ENOMEM;
+	int m;
+
+	/* Allocate struct plus pointers to first level page tables. */
+	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
+	mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
+	if (!mr)
+		goto bail;
+
+	rval = rvt_init_mregion(&mr->mr, pd, count);
+	if (rval)
+		goto bail;
+	/*
+	 * ib_reg_phys_mr() will initialize mr->ibmr except for
+	 * lkey and rkey.
+	 */
+	rval = rvt_alloc_lkey(&mr->mr, 0);
+	if (rval)
+		goto bail_mregion;
+	mr->ibmr.lkey = mr->mr.lkey;
+	mr->ibmr.rkey = mr->mr.lkey;
+done:
+	return mr;
+
+bail_mregion:
+	rvt_deinit_mregion(&mr->mr);
+bail:
+	kfree(mr);
+	mr = ERR_PTR(rval);
+	goto done;
+}
+
+static void __rvt_free_mr(struct rvt_mr *mr)
+{
+	rvt_deinit_mregion(&mr->mr);
+	rvt_free_lkey(&mr->mr);
+	vfree(mr);
+}
+
+/**
+ * rvt_get_dma_mr - get a DMA memory region
+ * @pd: protection domain for this memory region
+ * @acc: access flags
+ *
+ * Return: the memory region on success, otherwise returns an errno.
+ * Note that all DMA addresses should be created via the
+ * struct ib_dma_mapping_ops functions (see dma.c).
+ */
+struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	struct rvt_mr *mr;
+	struct ib_mr *ret;
+	int rval;
+
+	if (ibpd_to_rvtpd(pd)->user)
+		return ERR_PTR(-EPERM);
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr) {
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
+	rval = rvt_init_mregion(&mr->mr, pd, 0);
+	if (rval) {
+		ret = ERR_PTR(rval);
+		goto bail;
+	}
+
+	rval = rvt_alloc_lkey(&mr->mr, 1);
+	if (rval) {
+		ret = ERR_PTR(rval);
+		goto bail_mregion;
+	}
+
+	mr->mr.access_flags = acc;
+	ret = &mr->ibmr;
+done:
+	return ret;
+
+bail_mregion:
+	rvt_deinit_mregion(&mr->mr);
+bail:
+	kfree(mr);
+	goto done;
+}
+
+/**
+ * rvt_reg_user_mr - register a userspace memory region
+ * @pd: protection domain for this memory region
+ * @start: starting userspace address
+ * @length: length of region to register
+ * @mr_access_flags: access flags for this memory region
+ * @udata: unused by the driver
+ *
+ * Return: the memory region on success, otherwise returns an errno.
+ */
+struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+			      u64 virt_addr, int mr_access_flags,
+			      struct ib_udata *udata)
+{
+	struct rvt_mr *mr;
+	struct ib_umem *umem;
+	struct scatterlist *sg;
+	int n, m, entry;
+	struct ib_mr *ret;
+
+	if (length == 0)
+		return ERR_PTR(-EINVAL);
+
+	umem = ib_umem_get(pd->uobject->context, start, length,
+			   mr_access_flags, 0);
+	if (IS_ERR(umem))
+		return (void *)umem;
+
+	n = umem->nmap;
+
+	mr = __rvt_alloc_mr(n, pd);
+	if (IS_ERR(mr)) {
+		ret = (struct ib_mr *)mr;
+		goto bail_umem;
+	}
+
+	mr->mr.user_base = start;
+	mr->mr.iova = virt_addr;
+	mr->mr.length = length;
+	mr->mr.offset = ib_umem_offset(umem);
+	mr->mr.access_flags = mr_access_flags;
+	mr->umem = umem;
+
+	if (is_power_of_2(umem->page_size))
+		mr->mr.page_shift = ilog2(umem->page_size);
+	m = 0;
+	n = 0;
+	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+		void *vaddr;
+
+		vaddr = page_address(sg_page(sg));
+		if (!vaddr) {
+			ret = ERR_PTR(-EINVAL);
+			goto bail_inval;
+		}
+		mr->mr.map[m]->segs[n].vaddr = vaddr;
+		mr->mr.map[m]->segs[n].length = umem->page_size;
+		n++;
+		if (n == RVT_SEGSZ) {
+			m++;
+			n = 0;
+		}
+	}
+	return &mr->ibmr;
+
+bail_inval:
+	__rvt_free_mr(mr);
+
+bail_umem:
+	ib_umem_release(umem);
+
+	return ret;
+}
+
+/**
+ * rvt_dereg_mr - unregister and free a memory region
+ * @ibmr: the memory region to free
+ *
+ *
+ * Note that this is called to free MRs created by rvt_get_dma_mr()
+ * or rvt_reg_user_mr().
+ *
+ * Returns 0 on success.
+ */
+int rvt_dereg_mr(struct ib_mr *ibmr)
+{
+	struct rvt_mr *mr = to_imr(ibmr);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device);
+	int ret = 0;
+	unsigned long timeout;
+
+	rvt_free_lkey(&mr->mr);
+
+	rvt_put_mr(&mr->mr); /* will set completion if last */
+	timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
+	if (!timeout) {
+		rvt_pr_err(rdi,
+			   "rvt_dereg_mr timeout mr %p pd %p refcount %u\n",
+			   mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
+		rvt_get_mr(&mr->mr);
+		ret = -EBUSY;
+		goto out;
+	}
+	rvt_deinit_mregion(&mr->mr);
+	if (mr->umem)
+		ib_umem_release(mr->umem);
+	kfree(mr);
+out:
+	return ret;
+}
+
+/**
+ * rvt_alloc_mr - Allocate a memory region usable with the
+ * @pd: protection domain for this memory region
+ * @mr_type: mem region type
+ * @max_num_sg: Max number of segments allowed
+ *
+ * Return: the memory region on success, otherwise return an errno.
+ */
+struct ib_mr *rvt_alloc_mr(struct ib_pd *pd,
+			   enum ib_mr_type mr_type,
+			   u32 max_num_sg)
+{
+	struct rvt_mr *mr;
+
+	if (mr_type != IB_MR_TYPE_MEM_REG)
+		return ERR_PTR(-EINVAL);
+
+	mr = __rvt_alloc_mr(max_num_sg, pd);
+	if (IS_ERR(mr))
+		return (struct ib_mr *)mr;
+
+	return &mr->ibmr;
+}
+
+/**
+ * rvt_alloc_fmr - allocate a fast memory region
+ * @pd: the protection domain for this memory region
+ * @mr_access_flags: access flags for this memory region
+ * @fmr_attr: fast memory region attributes
+ *
+ * Return: the memory region on success, otherwise returns an errno.
+ */
+struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+			     struct ib_fmr_attr *fmr_attr)
+{
+	struct rvt_fmr *fmr;
+	int m;
+	struct ib_fmr *ret;
+	int rval = -ENOMEM;
+
+	/* Allocate struct plus pointers to first level page tables. */
+	m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ;
+	fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
+	if (!fmr)
+		goto bail;
+
+	rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
+	if (rval)
+		goto bail;
+
+	/*
+	 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
+	 * rkey.
+	 */
+	rval = rvt_alloc_lkey(&fmr->mr, 0);
+	if (rval)
+		goto bail_mregion;
+	fmr->ibfmr.rkey = fmr->mr.lkey;
+	fmr->ibfmr.lkey = fmr->mr.lkey;
+	/*
+	 * Resources are allocated but no valid mapping (RKEY can't be
+	 * used).
+	 */
+	fmr->mr.access_flags = mr_access_flags;
+	fmr->mr.max_segs = fmr_attr->max_pages;
+	fmr->mr.page_shift = fmr_attr->page_shift;
+
+	ret = &fmr->ibfmr;
+done:
+	return ret;
+
+bail_mregion:
+	rvt_deinit_mregion(&fmr->mr);
+bail:
+	kfree(fmr);
+	ret = ERR_PTR(rval);
+	goto done;
+}
+
+/**
+ * rvt_map_phys_fmr - set up a fast memory region
+ * @ibmfr: the fast memory region to set up
+ * @page_list: the list of pages to associate with the fast memory region
+ * @list_len: the number of pages to associate with the fast memory region
+ * @iova: the virtual address of the start of the fast memory region
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success
+ */
+
+int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+		     int list_len, u64 iova)
+{
+	struct rvt_fmr *fmr = to_ifmr(ibfmr);
+	struct rvt_lkey_table *rkt;
+	unsigned long flags;
+	int m, n, i;
+	u32 ps;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
+
+	i = atomic_read(&fmr->mr.refcount);
+	if (i > 2)
+		return -EBUSY;
+
+	if (list_len > fmr->mr.max_segs)
+		return -EINVAL;
+
+	rkt = &rdi->lkey_table;
+	spin_lock_irqsave(&rkt->lock, flags);
+	fmr->mr.user_base = iova;
+	fmr->mr.iova = iova;
+	ps = 1 << fmr->mr.page_shift;
+	fmr->mr.length = list_len * ps;
+	m = 0;
+	n = 0;
+	for (i = 0; i < list_len; i++) {
+		fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
+		fmr->mr.map[m]->segs[n].length = ps;
+		if (++n == RVT_SEGSZ) {
+			m++;
+			n = 0;
+		}
+	}
+	spin_unlock_irqrestore(&rkt->lock, flags);
+	return 0;
+}
+
+/**
+ * rvt_unmap_fmr - unmap fast memory regions
+ * @fmr_list: the list of fast memory regions to unmap
+ *
+ * Return: 0 on success.
+ */
+int rvt_unmap_fmr(struct list_head *fmr_list)
+{
+	struct rvt_fmr *fmr;
+	struct rvt_lkey_table *rkt;
+	unsigned long flags;
+	struct rvt_dev_info *rdi;
+
+	list_for_each_entry(fmr, fmr_list, ibfmr.list) {
+		rdi = ib_to_rvt(fmr->ibfmr.device);
+		rkt = &rdi->lkey_table;
+		spin_lock_irqsave(&rkt->lock, flags);
+		fmr->mr.user_base = 0;
+		fmr->mr.iova = 0;
+		fmr->mr.length = 0;
+		spin_unlock_irqrestore(&rkt->lock, flags);
+	}
+	return 0;
+}
+
+/**
+ * rvt_dealloc_fmr - deallocate a fast memory region
+ * @ibfmr: the fast memory region to deallocate
+ *
+ * Return: 0 on success.
+ */
+int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
+{
+	struct rvt_fmr *fmr = to_ifmr(ibfmr);
+	int ret = 0;
+	unsigned long timeout;
+
+	rvt_free_lkey(&fmr->mr);
+	rvt_put_mr(&fmr->mr); /* will set completion if last */
+	timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ);
+	if (!timeout) {
+		rvt_get_mr(&fmr->mr);
+		ret = -EBUSY;
+		goto out;
+	}
+	rvt_deinit_mregion(&fmr->mr);
+	kfree(fmr);
+out:
+	return ret;
+}
+
+/**
+ * rvt_lkey_ok - check IB SGE for validity and initialize
+ * @rkt: table containing lkey to check SGE against
+ * @pd: protection domain
+ * @isge: outgoing internal SGE
+ * @sge: SGE to check
+ * @acc: access flags
+ *
+ * Check the IB SGE for validity and initialize our internal version
+ * of it.
+ *
+ * Return: 1 if valid and successful, otherwise returns 0.
+ *
+ * increments the reference count upon success
+ *
+ */
+int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
+		struct rvt_sge *isge, struct ib_sge *sge, int acc)
+{
+	struct rvt_mregion *mr;
+	unsigned n, m;
+	size_t off;
+	struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
+
+	/*
+	 * We use LKEY == zero for kernel virtual addresses
+	 * (see rvt_get_dma_mr and dma.c).
+	 */
+	rcu_read_lock();
+	if (sge->lkey == 0) {
+		if (pd->user)
+			goto bail;
+		mr = rcu_dereference(dev->dma_mr);
+		if (!mr)
+			goto bail;
+		atomic_inc(&mr->refcount);
+		rcu_read_unlock();
+
+		isge->mr = mr;
+		isge->vaddr = (void *)sge->addr;
+		isge->length = sge->length;
+		isge->sge_length = sge->length;
+		isge->m = 0;
+		isge->n = 0;
+		goto ok;
+	}
+	mr = rcu_dereference(
+		rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
+	if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
+		goto bail;
+
+	off = sge->addr - mr->user_base;
+	if (unlikely(sge->addr < mr->user_base ||
+		     off + sge->length > mr->length ||
+		     (mr->access_flags & acc) != acc))
+		goto bail;
+	atomic_inc(&mr->refcount);
+	rcu_read_unlock();
+
+	off += mr->offset;
+	if (mr->page_shift) {
+		/*
+		 * page sizes are uniform power of 2 so no loop is necessary
+		 * entries_spanned_by_off is the number of times the loop below
+		 * would have executed.
+		*/
+		size_t entries_spanned_by_off;
+
+		entries_spanned_by_off = off >> mr->page_shift;
+		off -= (entries_spanned_by_off << mr->page_shift);
+		m = entries_spanned_by_off / RVT_SEGSZ;
+		n = entries_spanned_by_off % RVT_SEGSZ;
+	} else {
+		m = 0;
+		n = 0;
+		while (off >= mr->map[m]->segs[n].length) {
+			off -= mr->map[m]->segs[n].length;
+			n++;
+			if (n >= RVT_SEGSZ) {
+				m++;
+				n = 0;
+			}
+		}
+	}
+	isge->mr = mr;
+	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
+	isge->length = mr->map[m]->segs[n].length - off;
+	isge->sge_length = sge->length;
+	isge->m = m;
+	isge->n = n;
+ok:
+	return 1;
+bail:
+	rcu_read_unlock();
+	return 0;
+}
+EXPORT_SYMBOL(rvt_lkey_ok);
+
+/**
+ * rvt_rkey_ok - check the IB virtual address, length, and RKEY
+ * @qp: qp for validation
+ * @sge: SGE state
+ * @len: length of data
+ * @vaddr: virtual address to place data
+ * @rkey: rkey to check
+ * @acc: access flags
+ *
+ * Return: 1 if successful, otherwise 0.
+ *
+ * increments the reference count upon success
+ */
+int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
+		u32 len, u64 vaddr, u32 rkey, int acc)
+{
+	struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
+	struct rvt_lkey_table *rkt = &dev->lkey_table;
+	struct rvt_mregion *mr;
+	unsigned n, m;
+	size_t off;
+
+	/*
+	 * We use RKEY == zero for kernel virtual addresses
+	 * (see rvt_get_dma_mr and dma.c).
+	 */
+	rcu_read_lock();
+	if (rkey == 0) {
+		struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
+		struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device);
+
+		if (pd->user)
+			goto bail;
+		mr = rcu_dereference(rdi->dma_mr);
+		if (!mr)
+			goto bail;
+		atomic_inc(&mr->refcount);
+		rcu_read_unlock();
+
+		sge->mr = mr;
+		sge->vaddr = (void *)vaddr;
+		sge->length = len;
+		sge->sge_length = len;
+		sge->m = 0;
+		sge->n = 0;
+		goto ok;
+	}
+
+	mr = rcu_dereference(
+		rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
+	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
+		goto bail;
+
+	off = vaddr - mr->iova;
+	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
+		     (mr->access_flags & acc) == 0))
+		goto bail;
+	atomic_inc(&mr->refcount);
+	rcu_read_unlock();
+
+	off += mr->offset;
+	if (mr->page_shift) {
+		/*
+		 * page sizes are uniform power of 2 so no loop is necessary
+		 * entries_spanned_by_off is the number of times the loop below
+		 * would have executed.
+		*/
+		size_t entries_spanned_by_off;
+
+		entries_spanned_by_off = off >> mr->page_shift;
+		off -= (entries_spanned_by_off << mr->page_shift);
+		m = entries_spanned_by_off / RVT_SEGSZ;
+		n = entries_spanned_by_off % RVT_SEGSZ;
+	} else {
+		m = 0;
+		n = 0;
+		while (off >= mr->map[m]->segs[n].length) {
+			off -= mr->map[m]->segs[n].length;
+			n++;
+			if (n >= RVT_SEGSZ) {
+				m++;
+				n = 0;
+			}
+		}
+	}
+	sge->mr = mr;
+	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
+	sge->length = mr->map[m]->segs[n].length - off;
+	sge->sge_length = len;
+	sge->m = m;
+	sge->n = n;
+ok:
+	return 1;
+bail:
+	rcu_read_unlock();
+	return 0;
+}
+EXPORT_SYMBOL(rvt_rkey_ok);
diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h
new file mode 100644
index 000000000000..69380512c6d1
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/mr.h
@@ -0,0 +1,92 @@
+#ifndef DEF_RVTMR_H
+#define DEF_RVTMR_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+struct rvt_fmr {
+	struct ib_fmr ibfmr;
+	struct rvt_mregion mr;        /* must be last */
+};
+
+struct rvt_mr {
+	struct ib_mr ibmr;
+	struct ib_umem *umem;
+	struct rvt_mregion mr;  /* must be last */
+};
+
+static inline struct rvt_fmr *to_ifmr(struct ib_fmr *ibfmr)
+{
+	return container_of(ibfmr, struct rvt_fmr, ibfmr);
+}
+
+static inline struct rvt_mr *to_imr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct rvt_mr, ibmr);
+}
+
+int rvt_driver_mr_init(struct rvt_dev_info *rdi);
+void rvt_mr_exit(struct rvt_dev_info *rdi);
+
+/* Mem Regions */
+struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+			      u64 virt_addr, int mr_access_flags,
+			      struct ib_udata *udata);
+int rvt_dereg_mr(struct ib_mr *ibmr);
+struct ib_mr *rvt_alloc_mr(struct ib_pd *pd,
+			   enum ib_mr_type mr_type,
+			   u32 max_num_sg);
+struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+			     struct ib_fmr_attr *fmr_attr);
+int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+		     int list_len, u64 iova);
+int rvt_unmap_fmr(struct list_head *fmr_list);
+int rvt_dealloc_fmr(struct ib_fmr *ibfmr);
+
+#endif          /* DEF_RVTMR_H */
diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c
new file mode 100644
index 000000000000..d1292f324c67
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/pd.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include "pd.h"
+
+/**
+ * rvt_alloc_pd - allocate a protection domain
+ * @ibdev: ib device
+ * @context: optional user context
+ * @udata: optional user data
+ *
+ * Allocate and keep track of a PD.
+ *
+ * Return: 0 on success
+ */
+struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
+			   struct ib_ucontext *context,
+			   struct ib_udata *udata)
+{
+	struct rvt_dev_info *dev = ib_to_rvt(ibdev);
+	struct rvt_pd *pd;
+	struct ib_pd *ret;
+
+	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd) {
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+	/*
+	 * While we could continue allocating protecetion domains, being
+	 * constrained only by system resources. The IBTA spec defines that
+	 * there is a max_pd limit that can be set and we need to check for
+	 * that.
+	 */
+
+	spin_lock(&dev->n_pds_lock);
+	if (dev->n_pds_allocated == dev->dparms.props.max_pd) {
+		spin_unlock(&dev->n_pds_lock);
+		kfree(pd);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
+	dev->n_pds_allocated++;
+	spin_unlock(&dev->n_pds_lock);
+
+	/* ib_alloc_pd() will initialize pd->ibpd. */
+	pd->user = udata ? 1 : 0;
+
+	ret = &pd->ibpd;
+
+bail:
+	return ret;
+}
+
+/**
+ * rvt_dealloc_pd - Free PD
+ * @ibpd: Free up PD
+ *
+ * Return: always 0
+ */
+int rvt_dealloc_pd(struct ib_pd *ibpd)
+{
+	struct rvt_pd *pd = ibpd_to_rvtpd(ibpd);
+	struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
+
+	spin_lock(&dev->n_pds_lock);
+	dev->n_pds_allocated--;
+	spin_unlock(&dev->n_pds_lock);
+
+	kfree(pd);
+
+	return 0;
+}
diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h
new file mode 100644
index 000000000000..1892ca4a9746
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/pd.h
@@ -0,0 +1,58 @@
+#ifndef DEF_RDMAVTPD_H
+#define DEF_RDMAVTPD_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
+			   struct ib_ucontext *context,
+			   struct ib_udata *udata);
+int rvt_dealloc_pd(struct ib_pd *ibpd);
+
+#endif          /* DEF_RDMAVTPD_H */
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
new file mode 100644
index 000000000000..bd82a6948dc8
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -0,0 +1,1696 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/hash.h>
+#include <linux/bitops.h>
+#include <linux/lockdep.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <rdma/ib_verbs.h>
+#include "qp.h"
+#include "vt.h"
+#include "trace.h"
+
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; rvt_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
+const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
+	[IB_QPS_RESET] = 0,
+	[IB_QPS_INIT] = RVT_POST_RECV_OK,
+	[IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
+	[IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+	    RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
+	    RVT_PROCESS_NEXT_SEND_OK,
+	[IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+	    RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
+	[IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
+	    RVT_POST_SEND_OK | RVT_FLUSH_SEND,
+	[IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
+	    RVT_POST_SEND_OK | RVT_FLUSH_SEND,
+};
+EXPORT_SYMBOL(ib_rvt_state_ops);
+
+static void get_map_page(struct rvt_qpn_table *qpt,
+			 struct rvt_qpn_map *map,
+			 gfp_t gfp)
+{
+	unsigned long page = get_zeroed_page(gfp);
+
+	/*
+	 * Free the page if someone raced with us installing it.
+	 */
+
+	spin_lock(&qpt->lock);
+	if (map->page)
+		free_page(page);
+	else
+		map->page = (void *)page;
+	spin_unlock(&qpt->lock);
+}
+
+/**
+ * init_qpn_table - initialize the QP number table for a device
+ * @qpt: the QPN table
+ */
+static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
+{
+	u32 offset, i;
+	struct rvt_qpn_map *map;
+	int ret = 0;
+
+	if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start))
+		return -EINVAL;
+
+	spin_lock_init(&qpt->lock);
+
+	qpt->last = rdi->dparms.qpn_start;
+	qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift;
+
+	/*
+	 * Drivers may want some QPs beyond what we need for verbs let them use
+	 * our qpn table. No need for two. Lets go ahead and mark the bitmaps
+	 * for those. The reserved range must be *after* the range which verbs
+	 * will pick from.
+	 */
+
+	/* Figure out number of bit maps needed before reserved range */
+	qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE;
+
+	/* This should always be zero */
+	offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK;
+
+	/* Starting with the first reserved bit map */
+	map = &qpt->map[qpt->nmaps];
+
+	rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n",
+		    rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
+	for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
+		if (!map->page) {
+			get_map_page(qpt, map, GFP_KERNEL);
+			if (!map->page) {
+				ret = -ENOMEM;
+				break;
+			}
+		}
+		set_bit(offset, map->page);
+		offset++;
+		if (offset == RVT_BITS_PER_PAGE) {
+			/* next page */
+			qpt->nmaps++;
+			map++;
+			offset = 0;
+		}
+	}
+	return ret;
+}
+
+/**
+ * free_qpn_table - free the QP number table for a device
+ * @qpt: the QPN table
+ */
+static void free_qpn_table(struct rvt_qpn_table *qpt)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
+		free_page((unsigned long)qpt->map[i].page);
+}
+
+/**
+ * rvt_driver_qp_init - Init driver qp resources
+ * @rdi: rvt dev strucutre
+ *
+ * Return: 0 on success
+ */
+int rvt_driver_qp_init(struct rvt_dev_info *rdi)
+{
+	int i;
+	int ret = -ENOMEM;
+
+	if (!rdi->dparms.qp_table_size)
+		return -EINVAL;
+
+	/*
+	 * If driver is not doing any QP allocation then make sure it is
+	 * providing the necessary QP functions.
+	 */
+	if (!rdi->driver_f.free_all_qps ||
+	    !rdi->driver_f.qp_priv_alloc ||
+	    !rdi->driver_f.qp_priv_free ||
+	    !rdi->driver_f.notify_qp_reset)
+		return -EINVAL;
+
+	/* allocate parent object */
+	rdi->qp_dev = kzalloc_node(sizeof(*rdi->qp_dev), GFP_KERNEL,
+				   rdi->dparms.node);
+	if (!rdi->qp_dev)
+		return -ENOMEM;
+
+	/* allocate hash table */
+	rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
+	rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
+	rdi->qp_dev->qp_table =
+		kmalloc_node(rdi->qp_dev->qp_table_size *
+			     sizeof(*rdi->qp_dev->qp_table),
+			     GFP_KERNEL, rdi->dparms.node);
+	if (!rdi->qp_dev->qp_table)
+		goto no_qp_table;
+
+	for (i = 0; i < rdi->qp_dev->qp_table_size; i++)
+		RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL);
+
+	spin_lock_init(&rdi->qp_dev->qpt_lock);
+
+	/* initialize qpn map */
+	if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
+		goto fail_table;
+
+	spin_lock_init(&rdi->n_qps_lock);
+
+	return 0;
+
+fail_table:
+	kfree(rdi->qp_dev->qp_table);
+	free_qpn_table(&rdi->qp_dev->qpn_table);
+
+no_qp_table:
+	kfree(rdi->qp_dev);
+
+	return ret;
+}
+
+/**
+ * free_all_qps - check for QPs still in use
+ * @qpt: the QP table to empty
+ *
+ * There should not be any QPs still in use.
+ * Free memory for table.
+ */
+static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
+{
+	unsigned long flags;
+	struct rvt_qp *qp;
+	unsigned n, qp_inuse = 0;
+	spinlock_t *ql; /* work around too long line below */
+
+	if (rdi->driver_f.free_all_qps)
+		qp_inuse = rdi->driver_f.free_all_qps(rdi);
+
+	qp_inuse += rvt_mcast_tree_empty(rdi);
+
+	if (!rdi->qp_dev)
+		return qp_inuse;
+
+	ql = &rdi->qp_dev->qpt_lock;
+	spin_lock_irqsave(ql, flags);
+	for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
+		qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
+					       lockdep_is_held(ql));
+		RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
+
+		for (; qp; qp = rcu_dereference_protected(qp->next,
+							  lockdep_is_held(ql)))
+			qp_inuse++;
+	}
+	spin_unlock_irqrestore(ql, flags);
+	synchronize_rcu();
+	return qp_inuse;
+}
+
+/**
+ * rvt_qp_exit - clean up qps on device exit
+ * @rdi: rvt dev structure
+ *
+ * Check for qp leaks and free resources.
+ */
+void rvt_qp_exit(struct rvt_dev_info *rdi)
+{
+	u32 qps_inuse = rvt_free_all_qps(rdi);
+
+	if (qps_inuse)
+		rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
+			   qps_inuse);
+	if (!rdi->qp_dev)
+		return;
+
+	kfree(rdi->qp_dev->qp_table);
+	free_qpn_table(&rdi->qp_dev->qpn_table);
+	kfree(rdi->qp_dev);
+}
+
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+			      struct rvt_qpn_map *map, unsigned off)
+{
+	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
+}
+
+/**
+ * alloc_qpn - Allocate the next available qpn or zero/one for QP type
+ *	       IB_QPT_SMI/IB_QPT_GSI
+ *@rdi:	rvt device info structure
+ *@qpt: queue pair number table pointer
+ *@port_num: IB port number, 1 based, comes from core
+ *
+ * Return: The queue pair number
+ */
+static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+		     enum ib_qp_type type, u8 port_num, gfp_t gfp)
+{
+	u32 i, offset, max_scan, qpn;
+	struct rvt_qpn_map *map;
+	u32 ret;
+
+	if (rdi->driver_f.alloc_qpn)
+		return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp);
+
+	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
+		unsigned n;
+
+		ret = type == IB_QPT_GSI;
+		n = 1 << (ret + 2 * (port_num - 1));
+		spin_lock(&qpt->lock);
+		if (qpt->flags & n)
+			ret = -EINVAL;
+		else
+			qpt->flags |= n;
+		spin_unlock(&qpt->lock);
+		goto bail;
+	}
+
+	qpn = qpt->last + qpt->incr;
+	if (qpn >= RVT_QPN_MAX)
+		qpn = qpt->incr | ((qpt->last & 1) ^ 1);
+	/* offset carries bit 0 */
+	offset = qpn & RVT_BITS_PER_PAGE_MASK;
+	map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
+	max_scan = qpt->nmaps - !offset;
+	for (i = 0;;) {
+		if (unlikely(!map->page)) {
+			get_map_page(qpt, map, gfp);
+			if (unlikely(!map->page))
+				break;
+		}
+		do {
+			if (!test_and_set_bit(offset, map->page)) {
+				qpt->last = qpn;
+				ret = qpn;
+				goto bail;
+			}
+			offset += qpt->incr;
+			/*
+			 * This qpn might be bogus if offset >= BITS_PER_PAGE.
+			 * That is OK.   It gets re-assigned below
+			 */
+			qpn = mk_qpn(qpt, map, offset);
+		} while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
+		/*
+		 * In order to keep the number of pages allocated to a
+		 * minimum, we scan the all existing pages before increasing
+		 * the size of the bitmap table.
+		 */
+		if (++i > max_scan) {
+			if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
+				break;
+			map = &qpt->map[qpt->nmaps++];
+			/* start at incr with current bit 0 */
+			offset = qpt->incr | (offset & 1);
+		} else if (map < &qpt->map[qpt->nmaps]) {
+			++map;
+			/* start at incr with current bit 0 */
+			offset = qpt->incr | (offset & 1);
+		} else {
+			map = &qpt->map[0];
+			/* wrap to first map page, invert bit 0 */
+			offset = qpt->incr | ((offset & 1) ^ 1);
+		}
+		/* there can be no bits at shift and below */
+		WARN_ON(offset & (rdi->dparms.qos_shift - 1));
+		qpn = mk_qpn(qpt, map, offset);
+	}
+
+	ret = -ENOMEM;
+
+bail:
+	return ret;
+}
+
+static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
+{
+	struct rvt_qpn_map *map;
+
+	map = qpt->map + qpn / RVT_BITS_PER_PAGE;
+	if (map->page)
+		clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
+}
+
+/**
+ * rvt_clear_mr_refs - Drop help mr refs
+ * @qp: rvt qp data structure
+ * @clr_sends: If shoudl clear send side or not
+ */
+static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
+{
+	unsigned n;
+
+	if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
+		rvt_put_ss(&qp->s_rdma_read_sge);
+
+	rvt_put_ss(&qp->r_sge);
+
+	if (clr_sends) {
+		while (qp->s_last != qp->s_head) {
+			struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
+			unsigned i;
+
+			for (i = 0; i < wqe->wr.num_sge; i++) {
+				struct rvt_sge *sge = &wqe->sg_list[i];
+
+				rvt_put_mr(sge->mr);
+			}
+			if (qp->ibqp.qp_type == IB_QPT_UD ||
+			    qp->ibqp.qp_type == IB_QPT_SMI ||
+			    qp->ibqp.qp_type == IB_QPT_GSI)
+				atomic_dec(&ibah_to_rvtah(
+						wqe->ud_wr.ah)->refcount);
+			if (++qp->s_last >= qp->s_size)
+				qp->s_last = 0;
+			smp_wmb(); /* see qp_set_savail */
+		}
+		if (qp->s_rdma_mr) {
+			rvt_put_mr(qp->s_rdma_mr);
+			qp->s_rdma_mr = NULL;
+		}
+	}
+
+	if (qp->ibqp.qp_type != IB_QPT_RC)
+		return;
+
+	for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
+		struct rvt_ack_entry *e = &qp->s_ack_queue[n];
+
+		if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
+		    e->rdma_sge.mr) {
+			rvt_put_mr(e->rdma_sge.mr);
+			e->rdma_sge.mr = NULL;
+		}
+	}
+}
+
+/**
+ * rvt_remove_qp - remove qp form table
+ * @rdi: rvt dev struct
+ * @qp: qp to remove
+ *
+ * Remove the QP from the table so it can't be found asynchronously by
+ * the receive routine.
+ */
+static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+	struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
+	u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
+	unsigned long flags;
+	int removed = 1;
+
+	spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
+
+	if (rcu_dereference_protected(rvp->qp[0],
+			lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
+		RCU_INIT_POINTER(rvp->qp[0], NULL);
+	} else if (rcu_dereference_protected(rvp->qp[1],
+			lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
+		RCU_INIT_POINTER(rvp->qp[1], NULL);
+	} else {
+		struct rvt_qp *q;
+		struct rvt_qp __rcu **qpp;
+
+		removed = 0;
+		qpp = &rdi->qp_dev->qp_table[n];
+		for (; (q = rcu_dereference_protected(*qpp,
+			lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL;
+			qpp = &q->next) {
+			if (q == qp) {
+				RCU_INIT_POINTER(*qpp,
+				     rcu_dereference_protected(qp->next,
+				     lockdep_is_held(&rdi->qp_dev->qpt_lock)));
+				removed = 1;
+				trace_rvt_qpremove(qp, n);
+				break;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
+	if (removed) {
+		synchronize_rcu();
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	}
+}
+
+/**
+ * reset_qp - initialize the QP state to the reset state
+ * @qp: the QP to reset
+ * @type: the QP type
+ * r and s lock are required to be held by the caller
+ */
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		  enum ib_qp_type type)
+{
+	if (qp->state != IB_QPS_RESET) {
+		qp->state = IB_QPS_RESET;
+
+		/* Let drivers flush their waitlist */
+		rdi->driver_f.flush_qp_waiters(qp);
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
+		spin_unlock(&qp->s_lock);
+		spin_unlock(&qp->s_hlock);
+		spin_unlock_irq(&qp->r_lock);
+
+		/* Stop the send queue and the retry timer */
+		rdi->driver_f.stop_send_queue(qp);
+
+		/* Wait for things to stop */
+		rdi->driver_f.quiesce_qp(qp);
+
+		/* take qp out the hash and wait for it to be unused */
+		rvt_remove_qp(rdi, qp);
+		wait_event(qp->wait, !atomic_read(&qp->refcount));
+
+		/* grab the lock b/c it was locked at call time */
+		spin_lock_irq(&qp->r_lock);
+		spin_lock(&qp->s_hlock);
+		spin_lock(&qp->s_lock);
+
+		rvt_clear_mr_refs(qp, 1);
+	}
+
+	/*
+	 * Let the driver do any tear down it needs to for a qp
+	 * that has been reset
+	 */
+	rdi->driver_f.notify_qp_reset(qp);
+
+	qp->remote_qpn = 0;
+	qp->qkey = 0;
+	qp->qp_access_flags = 0;
+	qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
+	qp->s_hdrwords = 0;
+	qp->s_wqe = NULL;
+	qp->s_draining = 0;
+	qp->s_next_psn = 0;
+	qp->s_last_psn = 0;
+	qp->s_sending_psn = 0;
+	qp->s_sending_hpsn = 0;
+	qp->s_psn = 0;
+	qp->r_psn = 0;
+	qp->r_msn = 0;
+	if (type == IB_QPT_RC) {
+		qp->s_state = IB_OPCODE_RC_SEND_LAST;
+		qp->r_state = IB_OPCODE_RC_SEND_LAST;
+	} else {
+		qp->s_state = IB_OPCODE_UC_SEND_LAST;
+		qp->r_state = IB_OPCODE_UC_SEND_LAST;
+	}
+	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
+	qp->r_nak_state = 0;
+	qp->r_aflags = 0;
+	qp->r_flags = 0;
+	qp->s_head = 0;
+	qp->s_tail = 0;
+	qp->s_cur = 0;
+	qp->s_acked = 0;
+	qp->s_last = 0;
+	qp->s_ssn = 1;
+	qp->s_lsn = 0;
+	qp->s_mig_state = IB_MIG_MIGRATED;
+	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+	qp->r_head_ack_queue = 0;
+	qp->s_tail_ack_queue = 0;
+	qp->s_num_rd_atomic = 0;
+	if (qp->r_rq.wq) {
+		qp->r_rq.wq->head = 0;
+		qp->r_rq.wq->tail = 0;
+	}
+	qp->r_sge.num_sge = 0;
+}
+
+/**
+ * rvt_create_qp - create a queue pair for a device
+ * @ibpd: the protection domain who's device we create the queue pair for
+ * @init_attr: the attributes of the queue pair
+ * @udata: user data for libibverbs.so
+ *
+ * Queue pair creation is mostly an rvt issue. However, drivers have their own
+ * unique idea of what queue pair numbers mean. For instance there is a reserved
+ * range for PSM.
+ *
+ * Return: the queue pair on success, otherwise returns an errno.
+ *
+ * Called by the ib_create_qp() core verbs function.
+ */
+struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
+			    struct ib_qp_init_attr *init_attr,
+			    struct ib_udata *udata)
+{
+	struct rvt_qp *qp;
+	int err;
+	struct rvt_swqe *swq = NULL;
+	size_t sz;
+	size_t sg_list_sz;
+	struct ib_qp *ret = ERR_PTR(-ENOMEM);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
+	void *priv = NULL;
+	gfp_t gfp;
+
+	if (!rdi)
+		return ERR_PTR(-EINVAL);
+
+	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
+	    init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
+	    init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+		return ERR_PTR(-EINVAL);
+
+	/* GFP_NOIO is applicable to RC QP's only */
+
+	if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
+	    init_attr->qp_type != IB_QPT_RC)
+		return ERR_PTR(-EINVAL);
+
+	gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
+						GFP_NOIO : GFP_KERNEL;
+
+	/* Check receive queue parameters if no SRQ is specified. */
+	if (!init_attr->srq) {
+		if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
+		    init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
+			return ERR_PTR(-EINVAL);
+
+		if (init_attr->cap.max_send_sge +
+		    init_attr->cap.max_send_wr +
+		    init_attr->cap.max_recv_sge +
+		    init_attr->cap.max_recv_wr == 0)
+			return ERR_PTR(-EINVAL);
+	}
+
+	switch (init_attr->qp_type) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		if (init_attr->port_num == 0 ||
+		    init_attr->port_num > ibpd->device->phys_port_cnt)
+			return ERR_PTR(-EINVAL);
+	case IB_QPT_UC:
+	case IB_QPT_RC:
+	case IB_QPT_UD:
+		sz = sizeof(struct rvt_sge) *
+			init_attr->cap.max_send_sge +
+			sizeof(struct rvt_swqe);
+		if (gfp == GFP_NOIO)
+			swq = __vmalloc(
+				(init_attr->cap.max_send_wr + 1) * sz,
+				gfp, PAGE_KERNEL);
+		else
+			swq = vmalloc_node(
+				(init_attr->cap.max_send_wr + 1) * sz,
+				rdi->dparms.node);
+		if (!swq)
+			return ERR_PTR(-ENOMEM);
+
+		sz = sizeof(*qp);
+		sg_list_sz = 0;
+		if (init_attr->srq) {
+			struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
+
+			if (srq->rq.max_sge > 1)
+				sg_list_sz = sizeof(*qp->r_sg_list) *
+					(srq->rq.max_sge - 1);
+		} else if (init_attr->cap.max_recv_sge > 1)
+			sg_list_sz = sizeof(*qp->r_sg_list) *
+				(init_attr->cap.max_recv_sge - 1);
+		qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node);
+		if (!qp)
+			goto bail_swq;
+
+		RCU_INIT_POINTER(qp->next, NULL);
+
+		/*
+		 * Driver needs to set up it's private QP structure and do any
+		 * initialization that is needed.
+		 */
+		priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
+		if (!priv)
+			goto bail_qp;
+		qp->priv = priv;
+		qp->timeout_jiffies =
+			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+				1000UL);
+		if (init_attr->srq) {
+			sz = 0;
+		} else {
+			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
+			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
+			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
+				sizeof(struct rvt_rwqe);
+			if (udata)
+				qp->r_rq.wq = vmalloc_user(
+						sizeof(struct rvt_rwq) +
+						qp->r_rq.size * sz);
+			else if (gfp == GFP_NOIO)
+				qp->r_rq.wq = __vmalloc(
+						sizeof(struct rvt_rwq) +
+						qp->r_rq.size * sz,
+						gfp, PAGE_KERNEL);
+			else
+				qp->r_rq.wq = vmalloc_node(
+						sizeof(struct rvt_rwq) +
+						qp->r_rq.size * sz,
+						rdi->dparms.node);
+			if (!qp->r_rq.wq)
+				goto bail_driver_priv;
+		}
+
+		/*
+		 * ib_create_qp() will initialize qp->ibqp
+		 * except for qp->ibqp.qp_num.
+		 */
+		spin_lock_init(&qp->r_lock);
+		spin_lock_init(&qp->s_hlock);
+		spin_lock_init(&qp->s_lock);
+		spin_lock_init(&qp->r_rq.lock);
+		atomic_set(&qp->refcount, 0);
+		init_waitqueue_head(&qp->wait);
+		init_timer(&qp->s_timer);
+		qp->s_timer.data = (unsigned long)qp;
+		INIT_LIST_HEAD(&qp->rspwait);
+		qp->state = IB_QPS_RESET;
+		qp->s_wq = swq;
+		qp->s_size = init_attr->cap.max_send_wr + 1;
+		qp->s_avail = init_attr->cap.max_send_wr;
+		qp->s_max_sge = init_attr->cap.max_send_sge;
+		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
+			qp->s_flags = RVT_S_SIGNAL_REQ_WR;
+
+		err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
+				init_attr->qp_type,
+				init_attr->port_num, gfp);
+		if (err < 0) {
+			ret = ERR_PTR(err);
+			goto bail_rq_wq;
+		}
+		qp->ibqp.qp_num = err;
+		qp->port_num = init_attr->port_num;
+		rvt_reset_qp(rdi, qp, init_attr->qp_type);
+		break;
+
+	default:
+		/* Don't support raw QPs */
+		return ERR_PTR(-EINVAL);
+	}
+
+	init_attr->cap.max_inline_data = 0;
+
+	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See rvt_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		if (!qp->r_rq.wq) {
+			__u64 offset = 0;
+
+			err = ib_copy_to_udata(udata, &offset,
+					       sizeof(offset));
+			if (err) {
+				ret = ERR_PTR(err);
+				goto bail_qpn;
+			}
+		} else {
+			u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
+
+			qp->ip = rvt_create_mmap_info(rdi, s,
+						      ibpd->uobject->context,
+						      qp->r_rq.wq);
+			if (!qp->ip) {
+				ret = ERR_PTR(-ENOMEM);
+				goto bail_qpn;
+			}
+
+			err = ib_copy_to_udata(udata, &qp->ip->offset,
+					       sizeof(qp->ip->offset));
+			if (err) {
+				ret = ERR_PTR(err);
+				goto bail_ip;
+			}
+		}
+		qp->pid = current->pid;
+	}
+
+	spin_lock(&rdi->n_qps_lock);
+	if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
+		spin_unlock(&rdi->n_qps_lock);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail_ip;
+	}
+
+	rdi->n_qps_allocated++;
+	/*
+	 * Maintain a busy_jiffies variable that will be added to the timeout
+	 * period in mod_retry_timer and add_retry_timer. This busy jiffies
+	 * is scaled by the number of rc qps created for the device to reduce
+	 * the number of timeouts occurring when there is a large number of
+	 * qps. busy_jiffies is incremented every rc qp scaling interval.
+	 * The scaling interval is selected based on extensive performance
+	 * evaluation of targeted workloads.
+	 */
+	if (init_attr->qp_type == IB_QPT_RC) {
+		rdi->n_rc_qps++;
+		rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
+	}
+	spin_unlock(&rdi->n_qps_lock);
+
+	if (qp->ip) {
+		spin_lock_irq(&rdi->pending_lock);
+		list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
+		spin_unlock_irq(&rdi->pending_lock);
+	}
+
+	ret = &qp->ibqp;
+
+	/*
+	 * We have our QP and its good, now keep track of what types of opcodes
+	 * can be processed on this QP. We do this by keeping track of what the
+	 * 3 high order bits of the opcode are.
+	 */
+	switch (init_attr->qp_type) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK;
+		break;
+	case IB_QPT_RC:
+		qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+		break;
+	case IB_QPT_UC:
+		qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK;
+		break;
+	default:
+		ret = ERR_PTR(-EINVAL);
+		goto bail_ip;
+	}
+
+	return ret;
+
+bail_ip:
+	kref_put(&qp->ip->ref, rvt_release_mmap_info);
+
+bail_qpn:
+	free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
+
+bail_rq_wq:
+	vfree(qp->r_rq.wq);
+
+bail_driver_priv:
+	rdi->driver_f.qp_priv_free(rdi, qp);
+
+bail_qp:
+	kfree(qp);
+
+bail_swq:
+	vfree(swq);
+
+	return ret;
+}
+
+/**
+ * rvt_error_qp - put a QP into the error state
+ * @qp: the QP to put into the error state
+ * @err: the receive completion error to signal if a RWQE is active
+ *
+ * Flushes both send and receive work queues.
+ *
+ * Return: true if last WQE event should be generated.
+ * The QP r_lock and s_lock should be held and interrupts disabled.
+ * If we are already in error state, just return.
+ */
+int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
+{
+	struct ib_wc wc;
+	int ret = 0;
+	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+	if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
+		goto bail;
+
+	qp->state = IB_QPS_ERR;
+
+	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
+		del_timer(&qp->s_timer);
+	}
+
+	if (qp->s_flags & RVT_S_ANY_WAIT_SEND)
+		qp->s_flags &= ~RVT_S_ANY_WAIT_SEND;
+
+	rdi->driver_f.notify_error_qp(qp);
+
+	/* Schedule the sending tasklet to drain the send work queue. */
+	if (ACCESS_ONCE(qp->s_last) != qp->s_head)
+		rdi->driver_f.schedule_send(qp);
+
+	rvt_clear_mr_refs(qp, 0);
+
+	memset(&wc, 0, sizeof(wc));
+	wc.qp = &qp->ibqp;
+	wc.opcode = IB_WC_RECV;
+
+	if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) {
+		wc.wr_id = qp->r_wr_id;
+		wc.status = err;
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
+	}
+	wc.status = IB_WC_WR_FLUSH_ERR;
+
+	if (qp->r_rq.wq) {
+		struct rvt_rwq *wq;
+		u32 head;
+		u32 tail;
+
+		spin_lock(&qp->r_rq.lock);
+
+		/* sanity check pointers before trusting them */
+		wq = qp->r_rq.wq;
+		head = wq->head;
+		if (head >= qp->r_rq.size)
+			head = 0;
+		tail = wq->tail;
+		if (tail >= qp->r_rq.size)
+			tail = 0;
+		while (tail != head) {
+			wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
+			if (++tail >= qp->r_rq.size)
+				tail = 0;
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
+		}
+		wq->tail = tail;
+
+		spin_unlock(&qp->r_rq.lock);
+	} else if (qp->ibqp.event_handler) {
+		ret = 1;
+	}
+
+bail:
+	return ret;
+}
+EXPORT_SYMBOL(rvt_error_qp);
+
+/*
+ * Put the QP into the hash table.
+ * The hash table holds a reference to the QP.
+ */
+static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+	struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
+	unsigned long flags;
+
+	atomic_inc(&qp->refcount);
+	spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
+
+	if (qp->ibqp.qp_num <= 1) {
+		rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp);
+	} else {
+		u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
+
+		qp->next = rdi->qp_dev->qp_table[n];
+		rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp);
+		trace_rvt_qpinsert(qp, n);
+	}
+
+	spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
+}
+
+/**
+ * qib_modify_qp - modify the attributes of a queue pair
+ * @ibqp: the queue pair who's attributes we're modifying
+ * @attr: the new attributes
+ * @attr_mask: the mask of attributes to modify
+ * @udata: user data for libibverbs.so
+ *
+ * Return: 0 on success, otherwise returns an errno.
+ */
+int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		  int attr_mask, struct ib_udata *udata)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	enum ib_qp_state cur_state, new_state;
+	struct ib_event ev;
+	int lastwqe = 0;
+	int mig = 0;
+	int pmtu = 0; /* for gcc warning only */
+	enum rdma_link_layer link;
+
+	link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
+
+	spin_lock_irq(&qp->r_lock);
+	spin_lock(&qp->s_hlock);
+	spin_lock(&qp->s_lock);
+
+	cur_state = attr_mask & IB_QP_CUR_STATE ?
+		attr->cur_qp_state : qp->state;
+	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+				attr_mask, link))
+		goto inval;
+
+	if (rdi->driver_f.check_modify_qp &&
+	    rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata))
+		goto inval;
+
+	if (attr_mask & IB_QP_AV) {
+		if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
+			goto inval;
+		if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
+			goto inval;
+	}
+
+	if (attr_mask & IB_QP_ALT_PATH) {
+		if (attr->alt_ah_attr.dlid >=
+		    be16_to_cpu(IB_MULTICAST_LID_BASE))
+			goto inval;
+		if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
+			goto inval;
+		if (attr->alt_pkey_index >= rvt_get_npkeys(rdi))
+			goto inval;
+	}
+
+	if (attr_mask & IB_QP_PKEY_INDEX)
+		if (attr->pkey_index >= rvt_get_npkeys(rdi))
+			goto inval;
+
+	if (attr_mask & IB_QP_MIN_RNR_TIMER)
+		if (attr->min_rnr_timer > 31)
+			goto inval;
+
+	if (attr_mask & IB_QP_PORT)
+		if (qp->ibqp.qp_type == IB_QPT_SMI ||
+		    qp->ibqp.qp_type == IB_QPT_GSI ||
+		    attr->port_num == 0 ||
+		    attr->port_num > ibqp->device->phys_port_cnt)
+			goto inval;
+
+	if (attr_mask & IB_QP_DEST_QPN)
+		if (attr->dest_qp_num > RVT_QPN_MASK)
+			goto inval;
+
+	if (attr_mask & IB_QP_RETRY_CNT)
+		if (attr->retry_cnt > 7)
+			goto inval;
+
+	if (attr_mask & IB_QP_RNR_RETRY)
+		if (attr->rnr_retry > 7)
+			goto inval;
+
+	/*
+	 * Don't allow invalid path_mtu values.  OK to set greater
+	 * than the active mtu (or even the max_cap, if we have tuned
+	 * that to a small mtu.  We'll set qp->path_mtu
+	 * to the lesser of requested attribute mtu and active,
+	 * for packetizing messages.
+	 * Note that the QP port has to be set in INIT and MTU in RTR.
+	 */
+	if (attr_mask & IB_QP_PATH_MTU) {
+		pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr);
+		if (pmtu < 0)
+			goto inval;
+	}
+
+	if (attr_mask & IB_QP_PATH_MIG_STATE) {
+		if (attr->path_mig_state == IB_MIG_REARM) {
+			if (qp->s_mig_state == IB_MIG_ARMED)
+				goto inval;
+			if (new_state != IB_QPS_RTS)
+				goto inval;
+		} else if (attr->path_mig_state == IB_MIG_MIGRATED) {
+			if (qp->s_mig_state == IB_MIG_REARM)
+				goto inval;
+			if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
+				goto inval;
+			if (qp->s_mig_state == IB_MIG_ARMED)
+				mig = 1;
+		} else {
+			goto inval;
+		}
+	}
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic)
+			goto inval;
+
+	switch (new_state) {
+	case IB_QPS_RESET:
+		if (qp->state != IB_QPS_RESET)
+			rvt_reset_qp(rdi, qp, ibqp->qp_type);
+		break;
+
+	case IB_QPS_RTR:
+		/* Allow event to re-trigger if QP set to RTR more than once */
+		qp->r_flags &= ~RVT_R_COMM_EST;
+		qp->state = new_state;
+		break;
+
+	case IB_QPS_SQD:
+		qp->s_draining = qp->s_last != qp->s_cur;
+		qp->state = new_state;
+		break;
+
+	case IB_QPS_SQE:
+		if (qp->ibqp.qp_type == IB_QPT_RC)
+			goto inval;
+		qp->state = new_state;
+		break;
+
+	case IB_QPS_ERR:
+		lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+		break;
+
+	default:
+		qp->state = new_state;
+		break;
+	}
+
+	if (attr_mask & IB_QP_PKEY_INDEX)
+		qp->s_pkey_index = attr->pkey_index;
+
+	if (attr_mask & IB_QP_PORT)
+		qp->port_num = attr->port_num;
+
+	if (attr_mask & IB_QP_DEST_QPN)
+		qp->remote_qpn = attr->dest_qp_num;
+
+	if (attr_mask & IB_QP_SQ_PSN) {
+		qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask;
+		qp->s_psn = qp->s_next_psn;
+		qp->s_sending_psn = qp->s_next_psn;
+		qp->s_last_psn = qp->s_next_psn - 1;
+		qp->s_sending_hpsn = qp->s_last_psn;
+	}
+
+	if (attr_mask & IB_QP_RQ_PSN)
+		qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask;
+
+	if (attr_mask & IB_QP_ACCESS_FLAGS)
+		qp->qp_access_flags = attr->qp_access_flags;
+
+	if (attr_mask & IB_QP_AV) {
+		qp->remote_ah_attr = attr->ah_attr;
+		qp->s_srate = attr->ah_attr.static_rate;
+		qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
+	}
+
+	if (attr_mask & IB_QP_ALT_PATH) {
+		qp->alt_ah_attr = attr->alt_ah_attr;
+		qp->s_alt_pkey_index = attr->alt_pkey_index;
+	}
+
+	if (attr_mask & IB_QP_PATH_MIG_STATE) {
+		qp->s_mig_state = attr->path_mig_state;
+		if (mig) {
+			qp->remote_ah_attr = qp->alt_ah_attr;
+			qp->port_num = qp->alt_ah_attr.port_num;
+			qp->s_pkey_index = qp->s_alt_pkey_index;
+		}
+	}
+
+	if (attr_mask & IB_QP_PATH_MTU) {
+		qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
+		qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
+		qp->log_pmtu = ilog2(qp->pmtu);
+	}
+
+	if (attr_mask & IB_QP_RETRY_CNT) {
+		qp->s_retry_cnt = attr->retry_cnt;
+		qp->s_retry = attr->retry_cnt;
+	}
+
+	if (attr_mask & IB_QP_RNR_RETRY) {
+		qp->s_rnr_retry_cnt = attr->rnr_retry;
+		qp->s_rnr_retry = attr->rnr_retry;
+	}
+
+	if (attr_mask & IB_QP_MIN_RNR_TIMER)
+		qp->r_min_rnr_timer = attr->min_rnr_timer;
+
+	if (attr_mask & IB_QP_TIMEOUT) {
+		qp->timeout = attr->timeout;
+		qp->timeout_jiffies =
+			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+				1000UL);
+	}
+
+	if (attr_mask & IB_QP_QKEY)
+		qp->qkey = attr->qkey;
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+		qp->s_max_rd_atomic = attr->max_rd_atomic;
+
+	if (rdi->driver_f.modify_qp)
+		rdi->driver_f.modify_qp(qp, attr, attr_mask, udata);
+
+	spin_unlock(&qp->s_lock);
+	spin_unlock(&qp->s_hlock);
+	spin_unlock_irq(&qp->r_lock);
+
+	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+		rvt_insert_qp(rdi, qp);
+
+	if (lastwqe) {
+		ev.device = qp->ibqp.device;
+		ev.element.qp = &qp->ibqp;
+		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+	}
+	if (mig) {
+		ev.device = qp->ibqp.device;
+		ev.element.qp = &qp->ibqp;
+		ev.event = IB_EVENT_PATH_MIG;
+		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+	}
+	return 0;
+
+inval:
+	spin_unlock(&qp->s_lock);
+	spin_unlock(&qp->s_hlock);
+	spin_unlock_irq(&qp->r_lock);
+	return -EINVAL;
+}
+
+/** rvt_free_qpn - Free a qpn from the bit map
+ * @qpt: QP table
+ * @qpn: queue pair number to free
+ */
+static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
+{
+	struct rvt_qpn_map *map;
+
+	map = qpt->map + qpn / RVT_BITS_PER_PAGE;
+	if (map->page)
+		clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
+}
+
+/**
+ * rvt_destroy_qp - destroy a queue pair
+ * @ibqp: the queue pair to destroy
+ *
+ * Note that this can be called while the QP is actively sending or
+ * receiving!
+ *
+ * Return: 0 on success.
+ */
+int rvt_destroy_qp(struct ib_qp *ibqp)
+{
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+	spin_lock_irq(&qp->r_lock);
+	spin_lock(&qp->s_hlock);
+	spin_lock(&qp->s_lock);
+	rvt_reset_qp(rdi, qp, ibqp->qp_type);
+	spin_unlock(&qp->s_lock);
+	spin_unlock(&qp->s_hlock);
+	spin_unlock_irq(&qp->r_lock);
+
+	/* qpn is now available for use again */
+	rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
+
+	spin_lock(&rdi->n_qps_lock);
+	rdi->n_qps_allocated--;
+	if (qp->ibqp.qp_type == IB_QPT_RC) {
+		rdi->n_rc_qps--;
+		rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
+	}
+	spin_unlock(&rdi->n_qps_lock);
+
+	if (qp->ip)
+		kref_put(&qp->ip->ref, rvt_release_mmap_info);
+	else
+		vfree(qp->r_rq.wq);
+	vfree(qp->s_wq);
+	rdi->driver_f.qp_priv_free(rdi, qp);
+	kfree(qp);
+	return 0;
+}
+
+/**
+ * rvt_query_qp - query an ipbq
+ * @ibqp: IB qp to query
+ * @attr: attr struct to fill in
+ * @attr_mask: attr mask ignored
+ * @init_attr: struct to fill in
+ *
+ * Return: always 0
+ */
+int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		 int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+	attr->qp_state = qp->state;
+	attr->cur_qp_state = attr->qp_state;
+	attr->path_mtu = qp->path_mtu;
+	attr->path_mig_state = qp->s_mig_state;
+	attr->qkey = qp->qkey;
+	attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask;
+	attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask;
+	attr->dest_qp_num = qp->remote_qpn;
+	attr->qp_access_flags = qp->qp_access_flags;
+	attr->cap.max_send_wr = qp->s_size - 1;
+	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
+	attr->cap.max_send_sge = qp->s_max_sge;
+	attr->cap.max_recv_sge = qp->r_rq.max_sge;
+	attr->cap.max_inline_data = 0;
+	attr->ah_attr = qp->remote_ah_attr;
+	attr->alt_ah_attr = qp->alt_ah_attr;
+	attr->pkey_index = qp->s_pkey_index;
+	attr->alt_pkey_index = qp->s_alt_pkey_index;
+	attr->en_sqd_async_notify = 0;
+	attr->sq_draining = qp->s_draining;
+	attr->max_rd_atomic = qp->s_max_rd_atomic;
+	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
+	attr->min_rnr_timer = qp->r_min_rnr_timer;
+	attr->port_num = qp->port_num;
+	attr->timeout = qp->timeout;
+	attr->retry_cnt = qp->s_retry_cnt;
+	attr->rnr_retry = qp->s_rnr_retry_cnt;
+	attr->alt_port_num = qp->alt_ah_attr.port_num;
+	attr->alt_timeout = qp->alt_timeout;
+
+	init_attr->event_handler = qp->ibqp.event_handler;
+	init_attr->qp_context = qp->ibqp.qp_context;
+	init_attr->send_cq = qp->ibqp.send_cq;
+	init_attr->recv_cq = qp->ibqp.recv_cq;
+	init_attr->srq = qp->ibqp.srq;
+	init_attr->cap = attr->cap;
+	if (qp->s_flags & RVT_S_SIGNAL_REQ_WR)
+		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
+	else
+		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+	init_attr->qp_type = qp->ibqp.qp_type;
+	init_attr->port_num = qp->port_num;
+	return 0;
+}
+
+/**
+ * rvt_post_receive - post a receive on a QP
+ * @ibqp: the QP to post the receive on
+ * @wr: the WR to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success otherwise errno
+ */
+int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+		  struct ib_recv_wr **bad_wr)
+{
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_rwq *wq = qp->r_rq.wq;
+	unsigned long flags;
+	int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) &&
+				!qp->ibqp.srq;
+
+	/* Check that state is OK to post receive. */
+	if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) {
+		*bad_wr = wr;
+		return -EINVAL;
+	}
+
+	for (; wr; wr = wr->next) {
+		struct rvt_rwqe *wqe;
+		u32 next;
+		int i;
+
+		if ((unsigned)wr->num_sge > qp->r_rq.max_sge) {
+			*bad_wr = wr;
+			return -EINVAL;
+		}
+
+		spin_lock_irqsave(&qp->r_rq.lock, flags);
+		next = wq->head + 1;
+		if (next >= qp->r_rq.size)
+			next = 0;
+		if (next == wq->tail) {
+			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+			*bad_wr = wr;
+			return -ENOMEM;
+		}
+		if (unlikely(qp_err_flush)) {
+			struct ib_wc wc;
+
+			memset(&wc, 0, sizeof(wc));
+			wc.qp = &qp->ibqp;
+			wc.opcode = IB_WC_RECV;
+			wc.wr_id = wr->wr_id;
+			wc.status = IB_WC_WR_FLUSH_ERR;
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
+		} else {
+			wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head);
+			wqe->wr_id = wr->wr_id;
+			wqe->num_sge = wr->num_sge;
+			for (i = 0; i < wr->num_sge; i++)
+				wqe->sg_list[i] = wr->sg_list[i];
+			/*
+			 * Make sure queue entry is written
+			 * before the head index.
+			 */
+			smp_wmb();
+			wq->head = next;
+		}
+		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+	}
+	return 0;
+}
+
+/**
+ * qp_get_savail - return number of avail send entries
+ *
+ * @qp - the qp
+ *
+ * This assumes the s_hlock is held but the s_last
+ * qp variable is uncontrolled.
+ */
+static inline u32 qp_get_savail(struct rvt_qp *qp)
+{
+	u32 slast;
+	u32 ret;
+
+	smp_read_barrier_depends(); /* see rc.c */
+	slast = ACCESS_ONCE(qp->s_last);
+	if (qp->s_head >= slast)
+		ret = qp->s_size - (qp->s_head - slast);
+	else
+		ret = slast - qp->s_head;
+	return ret - 1;
+}
+
+/**
+ * rvt_post_one_wr - post one RC, UC, or UD send work request
+ * @qp: the QP to post on
+ * @wr: the work request to send
+ */
+static int rvt_post_one_wr(struct rvt_qp *qp,
+			   struct ib_send_wr *wr,
+			   int *call_send)
+{
+	struct rvt_swqe *wqe;
+	u32 next;
+	int i;
+	int j;
+	int acc;
+	struct rvt_lkey_table *rkt;
+	struct rvt_pd *pd;
+	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+	u8 log_pmtu;
+	int ret;
+
+	/* IB spec says that num_sge == 0 is OK. */
+	if (unlikely(wr->num_sge > qp->s_max_sge))
+		return -EINVAL;
+
+	/*
+	 * Don't allow RDMA reads or atomic operations on UC or
+	 * undefined operations.
+	 * Make sure buffer is large enough to hold the result for atomics.
+	 */
+	if (qp->ibqp.qp_type == IB_QPT_UC) {
+		if ((unsigned)wr->opcode >= IB_WR_RDMA_READ)
+			return -EINVAL;
+	} else if (qp->ibqp.qp_type != IB_QPT_RC) {
+		/* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
+		if (wr->opcode != IB_WR_SEND &&
+		    wr->opcode != IB_WR_SEND_WITH_IMM)
+			return -EINVAL;
+		/* Check UD destination address PD */
+		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
+			return -EINVAL;
+	} else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
+		return -EINVAL;
+	} else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
+		   (wr->num_sge == 0 ||
+		    wr->sg_list[0].length < sizeof(u64) ||
+		    wr->sg_list[0].addr & (sizeof(u64) - 1))) {
+		return -EINVAL;
+	} else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
+		return -EINVAL;
+	}
+	/* check for avail */
+	if (unlikely(!qp->s_avail)) {
+		qp->s_avail = qp_get_savail(qp);
+		if (WARN_ON(qp->s_avail > (qp->s_size - 1)))
+			rvt_pr_err(rdi,
+				   "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u",
+				   qp->ibqp.qp_num, qp->s_size, qp->s_avail,
+				   qp->s_head, qp->s_tail, qp->s_cur,
+				   qp->s_acked, qp->s_last);
+		if (!qp->s_avail)
+			return -ENOMEM;
+	}
+	next = qp->s_head + 1;
+	if (next >= qp->s_size)
+		next = 0;
+
+	rkt = &rdi->lkey_table;
+	pd = ibpd_to_rvtpd(qp->ibqp.pd);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_head);
+
+	if (qp->ibqp.qp_type != IB_QPT_UC &&
+	    qp->ibqp.qp_type != IB_QPT_RC)
+		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+		 wr->opcode == IB_WR_RDMA_WRITE ||
+		 wr->opcode == IB_WR_RDMA_READ)
+		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+	else
+		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
+	wqe->length = 0;
+	j = 0;
+	if (wr->num_sge) {
+		acc = wr->opcode >= IB_WR_RDMA_READ ?
+			IB_ACCESS_LOCAL_WRITE : 0;
+		for (i = 0; i < wr->num_sge; i++) {
+			u32 length = wr->sg_list[i].length;
+			int ok;
+
+			if (length == 0)
+				continue;
+			ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
+					 &wr->sg_list[i], acc);
+			if (!ok) {
+				ret = -EINVAL;
+				goto bail_inval_free;
+			}
+			wqe->length += length;
+			j++;
+		}
+		wqe->wr.num_sge = j;
+	}
+
+	/* general part of wqe valid - allow for driver checks */
+	if (rdi->driver_f.check_send_wqe) {
+		ret = rdi->driver_f.check_send_wqe(qp, wqe);
+		if (ret < 0)
+			goto bail_inval_free;
+		if (ret)
+			*call_send = ret;
+	}
+
+	log_pmtu = qp->log_pmtu;
+	if (qp->ibqp.qp_type != IB_QPT_UC &&
+	    qp->ibqp.qp_type != IB_QPT_RC) {
+		struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
+
+		log_pmtu = ah->log_pmtu;
+		atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
+	}
+
+	wqe->ssn = qp->s_ssn++;
+	wqe->psn = qp->s_next_psn;
+	wqe->lpsn = wqe->psn +
+			(wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0);
+	qp->s_next_psn = wqe->lpsn + 1;
+	trace_rvt_post_one_wr(qp, wqe);
+	smp_wmb(); /* see request builders */
+	qp->s_avail--;
+	qp->s_head = next;
+
+	return 0;
+
+bail_inval_free:
+	/* release mr holds */
+	while (j) {
+		struct rvt_sge *sge = &wqe->sg_list[--j];
+
+		rvt_put_mr(sge->mr);
+	}
+	return ret;
+}
+
+/**
+ * rvt_post_send - post a send on a QP
+ * @ibqp: the QP to post the send on
+ * @wr: the list of work requests to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success else errno
+ */
+int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+		  struct ib_send_wr **bad_wr)
+{
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+	unsigned long flags = 0;
+	int call_send;
+	unsigned nreq = 0;
+	int err = 0;
+
+	spin_lock_irqsave(&qp->s_hlock, flags);
+
+	/*
+	 * Ensure QP state is such that we can send. If not bail out early,
+	 * there is no need to do this every time we post a send.
+	 */
+	if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
+		spin_unlock_irqrestore(&qp->s_hlock, flags);
+		return -EINVAL;
+	}
+
+	/*
+	 * If the send queue is empty, and we only have a single WR then just go
+	 * ahead and kick the send engine into gear. Otherwise we will always
+	 * just schedule the send to happen later.
+	 */
+	call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next;
+
+	for (; wr; wr = wr->next) {
+		err = rvt_post_one_wr(qp, wr, &call_send);
+		if (unlikely(err)) {
+			*bad_wr = wr;
+			goto bail;
+		}
+		nreq++;
+	}
+bail:
+	spin_unlock_irqrestore(&qp->s_hlock, flags);
+	if (nreq) {
+		if (call_send)
+			rdi->driver_f.schedule_send_no_lock(qp);
+		else
+			rdi->driver_f.do_send(qp);
+	}
+	return err;
+}
+
+/**
+ * rvt_post_srq_receive - post a receive on a shared receive queue
+ * @ibsrq: the SRQ to post the receive on
+ * @wr: the list of work requests to post
+ * @bad_wr: A pointer to the first WR to cause a problem is put here
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: 0 on success else errno
+ */
+int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+		      struct ib_recv_wr **bad_wr)
+{
+	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
+	struct rvt_rwq *wq;
+	unsigned long flags;
+
+	for (; wr; wr = wr->next) {
+		struct rvt_rwqe *wqe;
+		u32 next;
+		int i;
+
+		if ((unsigned)wr->num_sge > srq->rq.max_sge) {
+			*bad_wr = wr;
+			return -EINVAL;
+		}
+
+		spin_lock_irqsave(&srq->rq.lock, flags);
+		wq = srq->rq.wq;
+		next = wq->head + 1;
+		if (next >= srq->rq.size)
+			next = 0;
+		if (next == wq->tail) {
+			spin_unlock_irqrestore(&srq->rq.lock, flags);
+			*bad_wr = wr;
+			return -ENOMEM;
+		}
+
+		wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head);
+		wqe->wr_id = wr->wr_id;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		/* Make sure queue entry is written before the head index. */
+		smp_wmb();
+		wq->head = next;
+		spin_unlock_irqrestore(&srq->rq.lock, flags);
+	}
+	return 0;
+}
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
new file mode 100644
index 000000000000..8409f80d5f25
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -0,0 +1,69 @@
+#ifndef DEF_RVTQP_H
+#define DEF_RVTQP_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+int rvt_driver_qp_init(struct rvt_dev_info *rdi);
+void rvt_qp_exit(struct rvt_dev_info *rdi);
+struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
+			    struct ib_qp_init_attr *init_attr,
+			    struct ib_udata *udata);
+int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		  int attr_mask, struct ib_udata *udata);
+int rvt_destroy_qp(struct ib_qp *ibqp);
+int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		 int attr_mask, struct ib_qp_init_attr *init_attr);
+int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+		  struct ib_recv_wr **bad_wr);
+int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+		  struct ib_send_wr **bad_wr);
+int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+		      struct ib_recv_wr **bad_wr);
+#endif          /* DEF_RVTQP_H */
diff --git a/drivers/staging/rdma/hfi1/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index 67786d417493..f7c48e9023de 100644
--- a/drivers/staging/rdma/hfi1/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,96 +49,50 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
-#include "verbs.h"
+#include "srq.h"
+#include "vt.h"
 
 /**
- * hfi1_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: A pointer to the first WR to cause a problem is put here
+ * rvt_driver_srq_init - init srq resources on a per driver basis
+ * @rdi: rvt dev structure
  *
- * This may be called from interrupt context.
+ * Do any initialization needed when a driver registers with rdmavt.
  */
-int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr)
+void rvt_driver_srq_init(struct rvt_dev_info *rdi)
 {
-	struct hfi1_srq *srq = to_isrq(ibsrq);
-	struct hfi1_rwq *wq;
-	unsigned long flags;
-	int ret;
-
-	for (; wr; wr = wr->next) {
-		struct hfi1_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		wq = srq->rq.wq;
-		next = wq->head + 1;
-		if (next >= srq->rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&srq->rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
+	spin_lock_init(&rdi->n_srqs_lock);
+	rdi->n_srqs_allocated = 0;
 }
 
 /**
- * hfi1_create_srq - create a shared receive queue
+ * rvt_create_srq - create a shared receive queue
  * @ibpd: the protection domain of the SRQ to create
  * @srq_init_attr: the attributes of the SRQ
  * @udata: data from libibverbs when creating a user SRQ
+ *
+ * Return: Allocated srq object
  */
-struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
-			       struct ib_srq_init_attr *srq_init_attr,
-			       struct ib_udata *udata)
+struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
+			      struct ib_srq_init_attr *srq_init_attr,
+			      struct ib_udata *udata)
 {
-	struct hfi1_ibdev *dev = to_idev(ibpd->device);
-	struct hfi1_srq *srq;
+	struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
+	struct rvt_srq *srq;
 	u32 sz;
 	struct ib_srq *ret;
 
-	if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-		ret = ERR_PTR(-ENOSYS);
-		goto done;
-	}
+	if (srq_init_attr->srq_type != IB_SRQT_BASIC)
+		return ERR_PTR(-ENOSYS);
 
 	if (srq_init_attr->attr.max_sge == 0 ||
-	    srq_init_attr->attr.max_sge > hfi1_max_srq_sges ||
+	    srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge ||
 	    srq_init_attr->attr.max_wr == 0 ||
-	    srq_init_attr->attr.max_wr > hfi1_max_srq_wrs) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
+	    srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr)
+		return ERR_PTR(-EINVAL);
 
 	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
-	if (!srq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
+	if (!srq)
+		return ERR_PTR(-ENOMEM);
 
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
@@ -149,8 +100,8 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
 	srq->rq.size = srq_init_attr->attr.max_wr + 1;
 	srq->rq.max_sge = srq_init_attr->attr.max_sge;
 	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
-		sizeof(struct hfi1_rwqe);
-	srq->rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) + srq->rq.size * sz);
+		sizeof(struct rvt_rwqe);
+	srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz);
 	if (!srq->rq.wq) {
 		ret = ERR_PTR(-ENOMEM);
 		goto bail_srq;
@@ -158,15 +109,15 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
 
 	/*
 	 * Return the address of the RWQ as the offset to mmap.
-	 * See hfi1_mmap() for details.
+	 * See rvt_mmap() for details.
 	 */
 	if (udata && udata->outlen >= sizeof(__u64)) {
 		int err;
-		u32 s = sizeof(struct hfi1_rwq) + srq->rq.size * sz;
+		u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz;
 
 		srq->ip =
-		    hfi1_create_mmap_info(dev, s, ibpd->uobject->context,
-					  srq->rq.wq);
+		    rvt_create_mmap_info(dev, s, ibpd->uobject->context,
+					 srq->rq.wq);
 		if (!srq->ip) {
 			ret = ERR_PTR(-ENOMEM);
 			goto bail_wq;
@@ -178,8 +129,9 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
 			ret = ERR_PTR(err);
 			goto bail_ip;
 		}
-	} else
+	} else {
 		srq->ip = NULL;
+	}
 
 	/*
 	 * ib_create_srq() will initialize srq->ibsrq.
@@ -190,7 +142,7 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
 	srq->limit = srq_init_attr->attr.srq_limit;
 
 	spin_lock(&dev->n_srqs_lock);
-	if (dev->n_srqs_allocated == hfi1_max_srqs) {
+	if (dev->n_srqs_allocated == dev->dparms.props.max_srq) {
 		spin_unlock(&dev->n_srqs_lock);
 		ret = ERR_PTR(-ENOMEM);
 		goto bail_ip;
@@ -205,8 +157,7 @@ struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
 		spin_unlock_irq(&dev->pending_lock);
 	}
 
-	ret = &srq->ibsrq;
-	goto done;
+	return &srq->ibsrq;
 
 bail_ip:
 	kfree(srq->ip);
@@ -214,46 +165,44 @@ bail_wq:
 	vfree(srq->rq.wq);
 bail_srq:
 	kfree(srq);
-done:
 	return ret;
 }
 
 /**
- * hfi1_modify_srq - modify a shared receive queue
+ * rvt_modify_srq - modify a shared receive queue
  * @ibsrq: the SRQ to modify
  * @attr: the new attributes of the SRQ
  * @attr_mask: indicates which attributes to modify
  * @udata: user data for libibverbs.so
+ *
+ * Return: 0 on success
  */
-int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		    enum ib_srq_attr_mask attr_mask,
-		    struct ib_udata *udata)
+int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		   enum ib_srq_attr_mask attr_mask,
+		   struct ib_udata *udata)
 {
-	struct hfi1_srq *srq = to_isrq(ibsrq);
-	struct hfi1_rwq *wq;
+	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
+	struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
+	struct rvt_rwq *wq;
 	int ret = 0;
 
 	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct hfi1_rwq *owq;
-		struct hfi1_rwqe *p;
+		struct rvt_rwq *owq;
+		struct rvt_rwqe *p;
 		u32 sz, size, n, head, tail;
 
 		/* Check that the requested sizes are below the limits. */
-		if ((attr->max_wr > hfi1_max_srq_wrs) ||
+		if ((attr->max_wr > dev->dparms.props.max_srq_wr) ||
 		    ((attr_mask & IB_SRQ_LIMIT) ?
-		     attr->srq_limit : srq->limit) > attr->max_wr) {
-			ret = -EINVAL;
-			goto bail;
-		}
+		     attr->srq_limit : srq->limit) > attr->max_wr)
+			return -EINVAL;
 
-		sz = sizeof(struct hfi1_rwqe) +
+		sz = sizeof(struct rvt_rwqe) +
 			srq->rq.max_sge * sizeof(struct ib_sge);
 		size = attr->max_wr + 1;
-		wq = vmalloc_user(sizeof(struct hfi1_rwq) + size * sz);
-		if (!wq) {
-			ret = -ENOMEM;
-			goto bail;
-		}
+		wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz);
+		if (!wq)
+			return -ENOMEM;
 
 		/* Check that we can write the offset to mmap. */
 		if (udata && udata->inlen >= sizeof(__u64)) {
@@ -264,8 +213,8 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 						 sizeof(offset_addr));
 			if (ret)
 				goto bail_free;
-			udata->outbuf =
-				(void __user *) (unsigned long) offset_addr;
+			udata->outbuf = (void __user *)
+					(unsigned long)offset_addr;
 			ret = ib_copy_to_udata(udata, &offset,
 					       sizeof(offset));
 			if (ret)
@@ -296,16 +245,16 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		n = 0;
 		p = wq->wq;
 		while (tail != head) {
-			struct hfi1_rwqe *wqe;
+			struct rvt_rwqe *wqe;
 			int i;
 
-			wqe = get_rwqe_ptr(&srq->rq, tail);
+			wqe = rvt_get_rwqe_ptr(&srq->rq, tail);
 			p->wr_id = wqe->wr_id;
 			p->num_sge = wqe->num_sge;
 			for (i = 0; i < wqe->num_sge; i++)
 				p->sg_list[i] = wqe->sg_list[i];
 			n++;
-			p = (struct hfi1_rwqe *)((char *)p + sz);
+			p = (struct rvt_rwqe *)((char *)p + sz);
 			if (++tail >= srq->rq.size)
 				tail = 0;
 		}
@@ -320,21 +269,21 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		vfree(owq);
 
 		if (srq->ip) {
-			struct hfi1_mmap_info *ip = srq->ip;
-			struct hfi1_ibdev *dev = to_idev(srq->ibsrq.device);
-			u32 s = sizeof(struct hfi1_rwq) + size * sz;
+			struct rvt_mmap_info *ip = srq->ip;
+			struct rvt_dev_info *dev = ib_to_rvt(srq->ibsrq.device);
+			u32 s = sizeof(struct rvt_rwq) + size * sz;
 
-			hfi1_update_mmap_info(dev, ip, s, wq);
+			rvt_update_mmap_info(dev, ip, s, wq);
 
 			/*
 			 * Return the offset to mmap.
-			 * See hfi1_mmap() for details.
+			 * See rvt_mmap() for details.
 			 */
 			if (udata && udata->inlen >= sizeof(__u64)) {
 				ret = ib_copy_to_udata(udata, &ip->offset,
 						       sizeof(ip->offset));
 				if (ret)
-					goto bail;
+					return ret;
 			}
 
 			/*
@@ -355,19 +304,24 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 			srq->limit = attr->srq_limit;
 		spin_unlock_irq(&srq->rq.lock);
 	}
-	goto bail;
+	return ret;
 
 bail_unlock:
 	spin_unlock_irq(&srq->rq.lock);
 bail_free:
 	vfree(wq);
-bail:
 	return ret;
 }
 
-int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+/** rvt_query_srq - query srq data
+ * @ibsrq: srq to query
+ * @attr: return info in attr
+ *
+ * Return: always 0
+ */
+int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
 {
-	struct hfi1_srq *srq = to_isrq(ibsrq);
+	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
 
 	attr->max_wr = srq->rq.size - 1;
 	attr->max_sge = srq->rq.max_sge;
@@ -376,19 +330,21 @@ int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
 }
 
 /**
- * hfi1_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
+ * rvt_destroy_srq - destory an srq
+ * @ibsrq: srq object to destroy
+ *
+ * Return always 0
  */
-int hfi1_destroy_srq(struct ib_srq *ibsrq)
+int rvt_destroy_srq(struct ib_srq *ibsrq)
 {
-	struct hfi1_srq *srq = to_isrq(ibsrq);
-	struct hfi1_ibdev *dev = to_idev(ibsrq->device);
+	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
+	struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
 
 	spin_lock(&dev->n_srqs_lock);
 	dev->n_srqs_allocated--;
 	spin_unlock(&dev->n_srqs_lock);
 	if (srq->ip)
-		kref_put(&srq->ip->ref, hfi1_release_mmap_info);
+		kref_put(&srq->ip->ref, rvt_release_mmap_info);
 	else
 		vfree(srq->rq.wq);
 	kfree(srq);
diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h
new file mode 100644
index 000000000000..bf0eaaf56465
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/srq.h
@@ -0,0 +1,62 @@
+#ifndef DEF_RVTSRQ_H
+#define DEF_RVTSRQ_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+void rvt_driver_srq_init(struct rvt_dev_info *rdi);
+struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
+			      struct ib_srq_init_attr *srq_init_attr,
+			      struct ib_udata *udata);
+int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		   enum ib_srq_attr_mask attr_mask,
+		   struct ib_udata *udata);
+int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+int rvt_destroy_srq(struct ib_srq *ibsrq);
+
+#endif          /* DEF_RVTSRQ_H */
diff --git a/drivers/infiniband/sw/rdmavt/trace.c b/drivers/infiniband/sw/rdmavt/trace.c
new file mode 100644
index 000000000000..d593285a349c
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
new file mode 100644
index 000000000000..6c0457db5499
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR rdmavt
+
+#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RDMAVT_TRACE_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#define RDI_DEV_ENTRY(rdi)   __string(dev, rdi->driver_f.get_card_name(rdi))
+#define RDI_DEV_ASSIGN(rdi)  __assign_str(dev, rdi->driver_f.get_card_name(rdi))
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rdmavt
+
+TRACE_EVENT(rvt_dbg,
+	TP_PROTO(struct rvt_dev_info *rdi,
+		 const char *msg),
+	TP_ARGS(rdi, msg),
+	TP_STRUCT__entry(
+		RDI_DEV_ENTRY(rdi)
+		__string(msg, msg)
+	),
+	TP_fast_assign(
+		RDI_DEV_ASSIGN(rdi);
+		__assign_str(msg, msg);
+	),
+	TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
+);
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_qphash
+DECLARE_EVENT_CLASS(rvt_qphash_template,
+	TP_PROTO(struct rvt_qp *qp, u32 bucket),
+	TP_ARGS(qp, bucket),
+	TP_STRUCT__entry(
+		RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+		__field(u32, qpn)
+		__field(u32, bucket)
+	),
+	TP_fast_assign(
+		RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+		__entry->qpn = qp->ibqp.qp_num;
+		__entry->bucket = bucket;
+	),
+	TP_printk(
+		"[%s] qpn 0x%x bucket %u",
+		__get_str(dev),
+		__entry->qpn,
+		__entry->bucket
+	)
+);
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
+	TP_PROTO(struct rvt_qp *qp, u32 bucket),
+	TP_ARGS(qp, bucket));
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
+	TP_PROTO(struct rvt_qp *qp, u32 bucket),
+	TP_ARGS(qp, bucket));
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_tx
+
+#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode  }
+#define show_wr_opcode(opcode)                             \
+__print_symbolic(opcode,                                   \
+	wr_opcode_name(RDMA_WRITE),                        \
+	wr_opcode_name(RDMA_WRITE_WITH_IMM),               \
+	wr_opcode_name(SEND),                              \
+	wr_opcode_name(SEND_WITH_IMM),                     \
+	wr_opcode_name(RDMA_READ),                         \
+	wr_opcode_name(ATOMIC_CMP_AND_SWP),                \
+	wr_opcode_name(ATOMIC_FETCH_AND_ADD),              \
+	wr_opcode_name(LSO),                               \
+	wr_opcode_name(SEND_WITH_INV),                     \
+	wr_opcode_name(RDMA_READ_WITH_INV),                \
+	wr_opcode_name(LOCAL_INV),                         \
+	wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP),         \
+	wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
+
+#define POS_PRN \
+"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
+
+TRACE_EVENT(
+	rvt_post_one_wr,
+	TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
+	TP_ARGS(qp, wqe),
+	TP_STRUCT__entry(
+		RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+		__field(u64, wr_id)
+		__field(u32, qpn)
+		__field(u32, psn)
+		__field(u32, lpsn)
+		__field(u32, length)
+		__field(u32, opcode)
+		__field(u32, size)
+		__field(u32, avail)
+		__field(u32, head)
+		__field(u32, last)
+	),
+	TP_fast_assign(
+		RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+		__entry->wr_id = wqe->wr.wr_id;
+		__entry->qpn = qp->ibqp.qp_num;
+		__entry->psn = wqe->psn;
+		__entry->lpsn = wqe->lpsn;
+		__entry->length = wqe->length;
+		__entry->opcode = wqe->wr.opcode;
+		__entry->size = qp->s_size;
+		__entry->avail = qp->s_avail;
+		__entry->head = qp->s_head;
+		__entry->last = qp->s_last;
+	),
+	TP_printk(
+		POS_PRN,
+		__get_str(dev),
+		__entry->wr_id,
+		__entry->qpn,
+		__entry->psn,
+		__entry->lpsn,
+		__entry->length,
+		__entry->opcode, show_wr_opcode(__entry->opcode),
+		__entry->size,
+		__entry->avail,
+		__entry->head,
+		__entry->last
+	)
+);
+
+#endif /* __RDMAVT_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
new file mode 100644
index 000000000000..6caf5272ba1f
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -0,0 +1,873 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include "vt.h"
+#include "trace.h"
+
+#define RVT_UVERBS_ABI_VERSION 2
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("RDMA Verbs Transport Library");
+
+static int rvt_init(void)
+{
+	/*
+	 * rdmavt does not need to do anything special when it starts up. All it
+	 * needs to do is sit and wait until a driver attempts registration.
+	 */
+	return 0;
+}
+module_init(rvt_init);
+
+static void rvt_cleanup(void)
+{
+	/*
+	 * Nothing to do at exit time either. The module won't be able to be
+	 * removed until all drivers are gone which means all the dev structs
+	 * are gone so there is really nothing to do.
+	 */
+}
+module_exit(rvt_cleanup);
+
+/**
+ * rvt_alloc_device - allocate rdi
+ * @size: how big of a structure to allocate
+ * @nports: number of ports to allocate array slots for
+ *
+ * Use IB core device alloc to allocate space for the rdi which is assumed to be
+ * inside of the ib_device. Any extra space that drivers require should be
+ * included in size.
+ *
+ * We also allocate a port array based on the number of ports.
+ *
+ * Return: pointer to allocated rdi
+ */
+struct rvt_dev_info *rvt_alloc_device(size_t size, int nports)
+{
+	struct rvt_dev_info *rdi = ERR_PTR(-ENOMEM);
+
+	rdi = (struct rvt_dev_info *)ib_alloc_device(size);
+	if (!rdi)
+		return rdi;
+
+	rdi->ports = kcalloc(nports,
+			     sizeof(struct rvt_ibport **),
+			     GFP_KERNEL);
+	if (!rdi->ports)
+		ib_dealloc_device(&rdi->ibdev);
+
+	return rdi;
+}
+EXPORT_SYMBOL(rvt_alloc_device);
+
+static int rvt_query_device(struct ib_device *ibdev,
+			    struct ib_device_attr *props,
+			    struct ib_udata *uhw)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+
+	if (uhw->inlen || uhw->outlen)
+		return -EINVAL;
+	/*
+	 * Return rvt_dev_info.dparms.props contents
+	 */
+	*props = rdi->dparms.props;
+	return 0;
+}
+
+static int rvt_modify_device(struct ib_device *device,
+			     int device_modify_mask,
+			     struct ib_device_modify *device_modify)
+{
+	/*
+	 * There is currently no need to supply this based on qib and hfi1.
+	 * Future drivers may need to implement this though.
+	 */
+
+	return -EOPNOTSUPP;
+}
+
+/**
+ * rvt_query_port: Passes the query port call to the driver
+ * @ibdev: Verbs IB dev
+ * @port_num: port number, 1 based from ib core
+ * @props: structure to hold returned properties
+ *
+ * Return: 0 on success
+ */
+static int rvt_query_port(struct ib_device *ibdev, u8 port_num,
+			  struct ib_port_attr *props)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	struct rvt_ibport *rvp;
+	int port_index = ibport_num_to_idx(ibdev, port_num);
+
+	if (port_index < 0)
+		return -EINVAL;
+
+	rvp = rdi->ports[port_index];
+	memset(props, 0, sizeof(*props));
+	props->sm_lid = rvp->sm_lid;
+	props->sm_sl = rvp->sm_sl;
+	props->port_cap_flags = rvp->port_cap_flags;
+	props->max_msg_sz = 0x80000000;
+	props->pkey_tbl_len = rvt_get_npkeys(rdi);
+	props->bad_pkey_cntr = rvp->pkey_violations;
+	props->qkey_viol_cntr = rvp->qkey_violations;
+	props->subnet_timeout = rvp->subnet_timeout;
+	props->init_type_reply = 0;
+
+	/* Populate the remaining ib_port_attr elements */
+	return rdi->driver_f.query_port_state(rdi, port_num, props);
+}
+
+/**
+ * rvt_modify_port
+ * @ibdev: Verbs IB dev
+ * @port_num: Port number, 1 based from ib core
+ * @port_modify_mask: How to change the port
+ * @props: Structure to fill in
+ *
+ * Return: 0 on success
+ */
+static int rvt_modify_port(struct ib_device *ibdev, u8 port_num,
+			   int port_modify_mask, struct ib_port_modify *props)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	struct rvt_ibport *rvp;
+	int ret = 0;
+	int port_index = ibport_num_to_idx(ibdev, port_num);
+
+	if (port_index < 0)
+		return -EINVAL;
+
+	rvp = rdi->ports[port_index];
+	rvp->port_cap_flags |= props->set_port_cap_mask;
+	rvp->port_cap_flags &= ~props->clr_port_cap_mask;
+
+	if (props->set_port_cap_mask || props->clr_port_cap_mask)
+		rdi->driver_f.cap_mask_chg(rdi, port_num);
+	if (port_modify_mask & IB_PORT_SHUTDOWN)
+		ret = rdi->driver_f.shut_down_port(rdi, port_num);
+	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
+		rvp->qkey_violations = 0;
+
+	return ret;
+}
+
+/**
+ * rvt_query_pkey - Return a pkey from the table at a given index
+ * @ibdev: Verbs IB dev
+ * @port_num: Port number, 1 based from ib core
+ * @intex: Index into pkey table
+ *
+ * Return: 0 on failure pkey otherwise
+ */
+static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index,
+			  u16 *pkey)
+{
+	/*
+	 * Driver will be responsible for keeping rvt_dev_info.pkey_table up to
+	 * date. This function will just return that value. There is no need to
+	 * lock, if a stale value is read and sent to the user so be it there is
+	 * no way to protect against that anyway.
+	 */
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	int port_index;
+
+	port_index = ibport_num_to_idx(ibdev, port_num);
+	if (port_index < 0)
+		return -EINVAL;
+
+	if (index >= rvt_get_npkeys(rdi))
+		return -EINVAL;
+
+	*pkey = rvt_get_pkey(rdi, port_index, index);
+	return 0;
+}
+
+/**
+ * rvt_query_gid - Return a gid from the table
+ * @ibdev: Verbs IB dev
+ * @port_num: Port number, 1 based from ib core
+ * @index: = Index in table
+ * @gid: Gid to return
+ *
+ * Return: 0 on success
+ */
+static int rvt_query_gid(struct ib_device *ibdev, u8 port_num,
+			 int guid_index, union ib_gid *gid)
+{
+	struct rvt_dev_info *rdi;
+	struct rvt_ibport *rvp;
+	int port_index;
+
+	/*
+	 * Driver is responsible for updating the guid table. Which will be used
+	 * to craft the return value. This will work similar to how query_pkey()
+	 * is being done.
+	 */
+	port_index = ibport_num_to_idx(ibdev, port_num);
+	if (port_index < 0)
+		return -EINVAL;
+
+	rdi = ib_to_rvt(ibdev);
+	rvp = rdi->ports[port_index];
+
+	gid->global.subnet_prefix = rvp->gid_prefix;
+
+	return rdi->driver_f.get_guid_be(rdi, rvp, guid_index,
+					 &gid->global.interface_id);
+}
+
+struct rvt_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext
+						*ibucontext)
+{
+	return container_of(ibucontext, struct rvt_ucontext, ibucontext);
+}
+
+/**
+ * rvt_alloc_ucontext - Allocate a user context
+ * @ibdev: Vers IB dev
+ * @data: User data allocated
+ */
+static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev,
+					      struct ib_udata *udata)
+{
+	struct rvt_ucontext *context;
+
+	context = kmalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+	return &context->ibucontext;
+}
+
+/**
+ *rvt_dealloc_ucontext - Free a user context
+ *@context - Free this
+ */
+static int rvt_dealloc_ucontext(struct ib_ucontext *context)
+{
+	kfree(to_iucontext(context));
+	return 0;
+}
+
+static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+				  struct ib_port_immutable *immutable)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	struct ib_port_attr attr;
+	int err, port_index;
+
+	port_index = ibport_num_to_idx(ibdev, port_num);
+	if (port_index < 0)
+		return -EINVAL;
+
+	err = rvt_query_port(ibdev, port_num, &attr);
+	if (err)
+		return err;
+
+	immutable->pkey_tbl_len = attr.pkey_tbl_len;
+	immutable->gid_tbl_len = attr.gid_tbl_len;
+	immutable->core_cap_flags = rdi->dparms.core_cap_flags;
+	immutable->max_mad_size = rdi->dparms.max_mad_size;
+
+	return 0;
+}
+
+enum {
+	MISC,
+	QUERY_DEVICE,
+	MODIFY_DEVICE,
+	QUERY_PORT,
+	MODIFY_PORT,
+	QUERY_PKEY,
+	QUERY_GID,
+	ALLOC_UCONTEXT,
+	DEALLOC_UCONTEXT,
+	GET_PORT_IMMUTABLE,
+	CREATE_QP,
+	MODIFY_QP,
+	DESTROY_QP,
+	QUERY_QP,
+	POST_SEND,
+	POST_RECV,
+	POST_SRQ_RECV,
+	CREATE_AH,
+	DESTROY_AH,
+	MODIFY_AH,
+	QUERY_AH,
+	CREATE_SRQ,
+	MODIFY_SRQ,
+	DESTROY_SRQ,
+	QUERY_SRQ,
+	ATTACH_MCAST,
+	DETACH_MCAST,
+	GET_DMA_MR,
+	REG_USER_MR,
+	DEREG_MR,
+	ALLOC_MR,
+	ALLOC_FMR,
+	MAP_PHYS_FMR,
+	UNMAP_FMR,
+	DEALLOC_FMR,
+	MMAP,
+	CREATE_CQ,
+	DESTROY_CQ,
+	POLL_CQ,
+	REQ_NOTFIY_CQ,
+	RESIZE_CQ,
+	ALLOC_PD,
+	DEALLOC_PD,
+	_VERB_IDX_MAX /* Must always be last! */
+};
+
+static inline int check_driver_override(struct rvt_dev_info *rdi,
+					size_t offset, void *func)
+{
+	if (!*(void **)((void *)&rdi->ibdev + offset)) {
+		*(void **)((void *)&rdi->ibdev + offset) = func;
+		return 0;
+	}
+
+	return 1;
+}
+
+static noinline int check_support(struct rvt_dev_info *rdi, int verb)
+{
+	switch (verb) {
+	case MISC:
+		/*
+		 * These functions are not part of verbs specifically but are
+		 * required for rdmavt to function.
+		 */
+		if ((!rdi->driver_f.port_callback) ||
+		    (!rdi->driver_f.get_card_name) ||
+		    (!rdi->driver_f.get_pci_dev))
+			return -EINVAL;
+		break;
+
+	case QUERY_DEVICE:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    query_device),
+						    rvt_query_device);
+		break;
+
+	case MODIFY_DEVICE:
+		/*
+		 * rdmavt does not support modify device currently drivers must
+		 * provide.
+		 */
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 modify_device),
+					   rvt_modify_device))
+			return -EOPNOTSUPP;
+		break;
+
+	case QUERY_PORT:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 query_port),
+					   rvt_query_port))
+			if (!rdi->driver_f.query_port_state)
+				return -EINVAL;
+		break;
+
+	case MODIFY_PORT:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 modify_port),
+					   rvt_modify_port))
+			if (!rdi->driver_f.cap_mask_chg ||
+			    !rdi->driver_f.shut_down_port)
+				return -EINVAL;
+		break;
+
+	case QUERY_PKEY:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    query_pkey),
+				      rvt_query_pkey);
+		break;
+
+	case QUERY_GID:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 query_gid),
+					   rvt_query_gid))
+			if (!rdi->driver_f.get_guid_be)
+				return -EINVAL;
+		break;
+
+	case ALLOC_UCONTEXT:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    alloc_ucontext),
+				      rvt_alloc_ucontext);
+		break;
+
+	case DEALLOC_UCONTEXT:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    dealloc_ucontext),
+				      rvt_dealloc_ucontext);
+		break;
+
+	case GET_PORT_IMMUTABLE:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    get_port_immutable),
+				      rvt_get_port_immutable);
+		break;
+
+	case CREATE_QP:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 create_qp),
+					   rvt_create_qp))
+			if (!rdi->driver_f.qp_priv_alloc ||
+			    !rdi->driver_f.qp_priv_free ||
+			    !rdi->driver_f.notify_qp_reset ||
+			    !rdi->driver_f.flush_qp_waiters ||
+			    !rdi->driver_f.stop_send_queue ||
+			    !rdi->driver_f.quiesce_qp)
+				return -EINVAL;
+		break;
+
+	case MODIFY_QP:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 modify_qp),
+					   rvt_modify_qp))
+			if (!rdi->driver_f.notify_qp_reset ||
+			    !rdi->driver_f.schedule_send ||
+			    !rdi->driver_f.get_pmtu_from_attr ||
+			    !rdi->driver_f.flush_qp_waiters ||
+			    !rdi->driver_f.stop_send_queue ||
+			    !rdi->driver_f.quiesce_qp ||
+			    !rdi->driver_f.notify_error_qp ||
+			    !rdi->driver_f.mtu_from_qp ||
+			    !rdi->driver_f.mtu_to_path_mtu ||
+			    !rdi->driver_f.shut_down_port ||
+			    !rdi->driver_f.cap_mask_chg)
+				return -EINVAL;
+		break;
+
+	case DESTROY_QP:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 destroy_qp),
+					   rvt_destroy_qp))
+			if (!rdi->driver_f.qp_priv_free ||
+			    !rdi->driver_f.notify_qp_reset ||
+			    !rdi->driver_f.flush_qp_waiters ||
+			    !rdi->driver_f.stop_send_queue ||
+			    !rdi->driver_f.quiesce_qp)
+				return -EINVAL;
+		break;
+
+	case QUERY_QP:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    query_qp),
+						    rvt_query_qp);
+		break;
+
+	case POST_SEND:
+		if (!check_driver_override(rdi, offsetof(struct ib_device,
+							 post_send),
+					   rvt_post_send))
+			if (!rdi->driver_f.schedule_send ||
+			    !rdi->driver_f.do_send)
+				return -EINVAL;
+		break;
+
+	case POST_RECV:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    post_recv),
+				      rvt_post_recv);
+		break;
+	case POST_SRQ_RECV:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    post_srq_recv),
+				      rvt_post_srq_recv);
+		break;
+
+	case CREATE_AH:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    create_ah),
+				      rvt_create_ah);
+		break;
+
+	case DESTROY_AH:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    destroy_ah),
+				      rvt_destroy_ah);
+		break;
+
+	case MODIFY_AH:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    modify_ah),
+				      rvt_modify_ah);
+		break;
+
+	case QUERY_AH:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    query_ah),
+				      rvt_query_ah);
+		break;
+
+	case CREATE_SRQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    create_srq),
+				      rvt_create_srq);
+		break;
+
+	case MODIFY_SRQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    modify_srq),
+				      rvt_modify_srq);
+		break;
+
+	case DESTROY_SRQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    destroy_srq),
+				      rvt_destroy_srq);
+		break;
+
+	case QUERY_SRQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    query_srq),
+				      rvt_query_srq);
+		break;
+
+	case ATTACH_MCAST:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    attach_mcast),
+				      rvt_attach_mcast);
+		break;
+
+	case DETACH_MCAST:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    detach_mcast),
+				      rvt_detach_mcast);
+		break;
+
+	case GET_DMA_MR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    get_dma_mr),
+				      rvt_get_dma_mr);
+		break;
+
+	case REG_USER_MR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    reg_user_mr),
+				      rvt_reg_user_mr);
+		break;
+
+	case DEREG_MR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    dereg_mr),
+				      rvt_dereg_mr);
+		break;
+
+	case ALLOC_FMR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    alloc_fmr),
+				      rvt_alloc_fmr);
+		break;
+
+	case ALLOC_MR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    alloc_mr),
+				      rvt_alloc_mr);
+		break;
+
+	case MAP_PHYS_FMR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    map_phys_fmr),
+				      rvt_map_phys_fmr);
+		break;
+
+	case UNMAP_FMR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    unmap_fmr),
+				      rvt_unmap_fmr);
+		break;
+
+	case DEALLOC_FMR:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    dealloc_fmr),
+				      rvt_dealloc_fmr);
+		break;
+
+	case MMAP:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    mmap),
+				      rvt_mmap);
+		break;
+
+	case CREATE_CQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    create_cq),
+				      rvt_create_cq);
+		break;
+
+	case DESTROY_CQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    destroy_cq),
+				      rvt_destroy_cq);
+		break;
+
+	case POLL_CQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    poll_cq),
+				      rvt_poll_cq);
+		break;
+
+	case REQ_NOTFIY_CQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    req_notify_cq),
+				      rvt_req_notify_cq);
+		break;
+
+	case RESIZE_CQ:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    resize_cq),
+				      rvt_resize_cq);
+		break;
+
+	case ALLOC_PD:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    alloc_pd),
+				      rvt_alloc_pd);
+		break;
+
+	case DEALLOC_PD:
+		check_driver_override(rdi, offsetof(struct ib_device,
+						    dealloc_pd),
+				      rvt_dealloc_pd);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * rvt_register_device - register a driver
+ * @rdi: main dev structure for all of rdmavt operations
+ *
+ * It is up to drivers to allocate the rdi and fill in the appropriate
+ * information.
+ *
+ * Return: 0 on success otherwise an errno.
+ */
+int rvt_register_device(struct rvt_dev_info *rdi)
+{
+	int ret = 0, i;
+
+	if (!rdi)
+		return -EINVAL;
+
+	/*
+	 * Check to ensure drivers have setup the required helpers for the verbs
+	 * they want rdmavt to handle
+	 */
+	for (i = 0; i < _VERB_IDX_MAX; i++)
+		if (check_support(rdi, i)) {
+			pr_err("Driver support req not met at %d\n", i);
+			return -EINVAL;
+		}
+
+
+	/* Once we get past here we can use rvt_pr macros and tracepoints */
+	trace_rvt_dbg(rdi, "Driver attempting registration");
+	rvt_mmap_init(rdi);
+
+	/* Queue Pairs */
+	ret = rvt_driver_qp_init(rdi);
+	if (ret) {
+		pr_err("Error in driver QP init.\n");
+		return -EINVAL;
+	}
+
+	/* Address Handle */
+	spin_lock_init(&rdi->n_ahs_lock);
+	rdi->n_ahs_allocated = 0;
+
+	/* Shared Receive Queue */
+	rvt_driver_srq_init(rdi);
+
+	/* Multicast */
+	rvt_driver_mcast_init(rdi);
+
+	/* Mem Region */
+	ret = rvt_driver_mr_init(rdi);
+	if (ret) {
+		pr_err("Error in driver MR init.\n");
+		goto bail_no_mr;
+	}
+
+	/* Completion queues */
+	ret = rvt_driver_cq_init(rdi);
+	if (ret) {
+		pr_err("Error in driver CQ init.\n");
+		goto bail_mr;
+	}
+
+	/* DMA Operations */
+	rdi->ibdev.dma_ops =
+		rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops;
+
+	/* Protection Domain */
+	spin_lock_init(&rdi->n_pds_lock);
+	rdi->n_pds_allocated = 0;
+
+	/*
+	 * There are some things which could be set by underlying drivers but
+	 * really should be up to rdmavt to set. For instance drivers can't know
+	 * exactly which functions rdmavt supports, nor do they know the ABI
+	 * version, so we do all of this sort of stuff here.
+	 */
+	rdi->ibdev.uverbs_abi_ver = RVT_UVERBS_ABI_VERSION;
+	rdi->ibdev.uverbs_cmd_mask =
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
+		(1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
+		(1ull << IB_USER_VERBS_CMD_MODIFY_AH)           |
+		(1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
+		(1ull << IB_USER_VERBS_CMD_REG_MR)              |
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
+		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
+		(1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
+		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
+		(1ull << IB_USER_VERBS_CMD_POST_SEND)           |
+		(1ull << IB_USER_VERBS_CMD_POST_RECV)           |
+		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
+		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
+		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
+		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
+		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+	rdi->ibdev.node_type = RDMA_NODE_IB_CA;
+	rdi->ibdev.num_comp_vectors = 1;
+
+	/* We are now good to announce we exist */
+	ret =  ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback);
+	if (ret) {
+		rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
+		goto bail_cq;
+	}
+
+	rvt_create_mad_agents(rdi);
+
+	rvt_pr_info(rdi, "Registration with rdmavt done.\n");
+	return ret;
+
+bail_cq:
+	rvt_cq_exit(rdi);
+
+bail_mr:
+	rvt_mr_exit(rdi);
+
+bail_no_mr:
+	rvt_qp_exit(rdi);
+
+	return ret;
+}
+EXPORT_SYMBOL(rvt_register_device);
+
+/**
+ * rvt_unregister_device - remove a driver
+ * @rdi: rvt dev struct
+ */
+void rvt_unregister_device(struct rvt_dev_info *rdi)
+{
+	trace_rvt_dbg(rdi, "Driver is unregistering.");
+	if (!rdi)
+		return;
+
+	rvt_free_mad_agents(rdi);
+
+	ib_unregister_device(&rdi->ibdev);
+	rvt_cq_exit(rdi);
+	rvt_mr_exit(rdi);
+	rvt_qp_exit(rdi);
+}
+EXPORT_SYMBOL(rvt_unregister_device);
+
+/**
+ * rvt_init_port - init internal data for driver port
+ * @rdi: rvt dev strut
+ * @port: rvt port
+ * @port_index: 0 based index of ports, different from IB core port num
+ *
+ * Keep track of a list of ports. No need to have a detach port.
+ * They persist until the driver goes away.
+ *
+ * Return: always 0
+ */
+int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
+		  int port_index, u16 *pkey_table)
+{
+
+	rdi->ports[port_index] = port;
+	rdi->ports[port_index]->pkey_table = pkey_table;
+
+	return 0;
+}
+EXPORT_SYMBOL(rvt_init_port);
diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h
new file mode 100644
index 000000000000..6b01eaa4461b
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/vt.h
@@ -0,0 +1,104 @@
+#ifndef DEF_RDMAVT_H
+#define DEF_RDMAVT_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+#include <linux/pci.h>
+#include "dma.h"
+#include "pd.h"
+#include "qp.h"
+#include "ah.h"
+#include "mr.h"
+#include "srq.h"
+#include "mcast.h"
+#include "mmap.h"
+#include "cq.h"
+#include "mad.h"
+#include "mmap.h"
+
+#define rvt_pr_info(rdi, fmt, ...) \
+	__rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \
+		      rdi->driver_f.get_card_name(rdi), \
+		      fmt, \
+		      ##__VA_ARGS__)
+
+#define rvt_pr_warn(rdi, fmt, ...) \
+	__rvt_pr_warn(rdi->driver_f.get_pci_dev(rdi), \
+		      rdi->driver_f.get_card_name(rdi), \
+		      fmt, \
+		      ##__VA_ARGS__)
+
+#define rvt_pr_err(rdi, fmt, ...) \
+	__rvt_pr_err(rdi->driver_f.get_pci_dev(rdi), \
+		     rdi->driver_f.get_card_name(rdi), \
+		     fmt, \
+		     ##__VA_ARGS__)
+
+#define __rvt_pr_info(pdev, name, fmt, ...) \
+	dev_info(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
+
+#define __rvt_pr_warn(pdev, name, fmt, ...) \
+	dev_warn(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
+
+#define __rvt_pr_err(pdev, name, fmt, ...) \
+	dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__)
+
+static inline int ibport_num_to_idx(struct ib_device *ibdev, u8 port_num)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	int port_index;
+
+	port_index = port_num - 1; /* IB ports start at 1 our arrays at 0 */
+	if ((port_index < 0) || (port_index >= rdi->dparms.nports))
+		return -EINVAL;
+
+	return port_index;
+}
+
+#endif          /* DEF_RDMAVT_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 85be0de3ab26..caec8e9c4666 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -388,7 +388,7 @@ struct ipoib_dev_priv {
 	struct dentry *mcg_dentry;
 	struct dentry *path_dentry;
 #endif
-	int	hca_caps;
+	u64	hca_caps;
 	struct ipoib_ethtool_st ethtool;
 	struct timer_list poll_timer;
 	unsigned max_send_sge;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 899e6b7fb8a5..f0e55e47eb54 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -180,6 +180,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	struct sk_buff *skb;
 	u64 mapping[IPOIB_UD_RX_SG];
 	union ib_gid *dgid;
+	union ib_gid *sgid;
 
 	ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
 		       wr_id, wc->status);
@@ -203,13 +204,6 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 		return;
 	}
 
-	/*
-	 * Drop packets that this interface sent, ie multicast packets
-	 * that the HCA has replicated.
-	 */
-	if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
-		goto repost;
-
 	memcpy(mapping, priv->rx_ring[wr_id].mapping,
 	       IPOIB_UD_RX_SG * sizeof *mapping);
 
@@ -239,6 +233,25 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	else
 		skb->pkt_type = PACKET_MULTICAST;
 
+	sgid = &((struct ib_grh *)skb->data)->sgid;
+
+	/*
+	 * Drop packets that this interface sent, ie multicast packets
+	 * that the HCA has replicated.
+	 */
+	if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) {
+		int need_repost = 1;
+
+		if ((wc->wc_flags & IB_WC_GRH) &&
+		    sgid->global.interface_id != priv->local_gid.global.interface_id)
+			need_repost = 0;
+
+		if (need_repost) {
+			dev_kfree_skb_any(skb);
+			goto repost;
+		}
+	}
+
 	skb_pull(skb, IB_GRH_BYTES);
 
 	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 25509bbd4a05..80807d6e5c4c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -51,6 +51,7 @@
 #include <net/addrconf.h>
 #include <linux/inetdevice.h>
 #include <rdma/ib_cache.h>
+#include <linux/pci.h>
 
 #define DRV_VERSION "1.0.0"
 
@@ -1590,11 +1591,67 @@ void ipoib_dev_cleanup(struct net_device *dev)
 	priv->tx_ring = NULL;
 }
 
+static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	return ib_set_vf_link_state(priv->ca, vf, priv->port, link_state);
+}
+
+static int ipoib_get_vf_config(struct net_device *dev, int vf,
+			       struct ifla_vf_info *ivf)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = ib_get_vf_config(priv->ca, vf, priv->port, ivf);
+	if (err)
+		return err;
+
+	ivf->vf = vf;
+
+	return 0;
+}
+
+static int ipoib_set_vf_guid(struct net_device *dev, int vf, u64 guid, int type)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	if (type != IFLA_VF_IB_NODE_GUID && type != IFLA_VF_IB_PORT_GUID)
+		return -EINVAL;
+
+	return ib_set_vf_guid(priv->ca, vf, priv->port, guid, type);
+}
+
+static int ipoib_get_vf_stats(struct net_device *dev, int vf,
+			      struct ifla_vf_stats *vf_stats)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	return ib_get_vf_stats(priv->ca, vf, priv->port, vf_stats);
+}
+
 static const struct header_ops ipoib_header_ops = {
 	.create	= ipoib_hard_header,
 };
 
-static const struct net_device_ops ipoib_netdev_ops = {
+static const struct net_device_ops ipoib_netdev_ops_pf = {
+	.ndo_uninit		 = ipoib_uninit,
+	.ndo_open		 = ipoib_open,
+	.ndo_stop		 = ipoib_stop,
+	.ndo_change_mtu		 = ipoib_change_mtu,
+	.ndo_fix_features	 = ipoib_fix_features,
+	.ndo_start_xmit		 = ipoib_start_xmit,
+	.ndo_tx_timeout		 = ipoib_timeout,
+	.ndo_set_rx_mode	 = ipoib_set_mcast_list,
+	.ndo_get_iflink		 = ipoib_get_iflink,
+	.ndo_set_vf_link_state	 = ipoib_set_vf_link_state,
+	.ndo_get_vf_config	 = ipoib_get_vf_config,
+	.ndo_get_vf_stats	 = ipoib_get_vf_stats,
+	.ndo_set_vf_guid	 = ipoib_set_vf_guid,
+};
+
+static const struct net_device_ops ipoib_netdev_ops_vf = {
 	.ndo_uninit		 = ipoib_uninit,
 	.ndo_open		 = ipoib_open,
 	.ndo_stop		 = ipoib_stop,
@@ -1610,7 +1667,11 @@ void ipoib_setup(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	dev->netdev_ops		 = &ipoib_netdev_ops;
+	if (priv->hca_caps & IB_DEVICE_VIRTUAL_FUNCTION)
+		dev->netdev_ops	= &ipoib_netdev_ops_vf;
+	else
+		dev->netdev_ops	= &ipoib_netdev_ops_pf;
+
 	dev->header_ops		 = &ipoib_header_ops;
 
 	ipoib_set_ethtool_ops(dev);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index f121e6129339..60b30d338a81 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -49,22 +49,25 @@ static struct workqueue_struct *isert_release_wq;
 static void
 isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
 static int
-isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
-	       struct isert_rdma_wr *wr);
+isert_map_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn);
 static void
 isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
 static int
-isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
-	       struct isert_rdma_wr *wr);
+isert_reg_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn);
 static int
 isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd);
 static int
-isert_rdma_post_recvl(struct isert_conn *isert_conn);
+isert_login_post_recv(struct isert_conn *isert_conn);
 static int
 isert_rdma_accept(struct isert_conn *isert_conn);
 struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np);
 
 static void isert_release_work(struct work_struct *work);
+static void isert_wait4flush(struct isert_conn *isert_conn);
+static void isert_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void isert_send_done(struct ib_cq *cq, struct ib_wc *wc);
+static void isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void isert_login_send_done(struct ib_cq *cq, struct ib_wc *wc);
 
 static inline bool
 isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
@@ -177,12 +180,6 @@ err:
 	return ret;
 }
 
-static void
-isert_cq_event_callback(struct ib_event *e, void *context)
-{
-	isert_dbg("event: %d\n", e->event);
-}
-
 static int
 isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
 {
@@ -212,6 +209,7 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
 		rx_sg->addr = rx_desc->dma_addr;
 		rx_sg->length = ISER_RX_PAYLOAD_SIZE;
 		rx_sg->lkey = device->pd->local_dma_lkey;
+		rx_desc->rx_cqe.done = isert_recv_done;
 	}
 
 	return 0;
@@ -250,9 +248,6 @@ isert_free_rx_descriptors(struct isert_conn *isert_conn)
 	isert_conn->rx_descs = NULL;
 }
 
-static void isert_cq_work(struct work_struct *);
-static void isert_cq_callback(struct ib_cq *, void *);
-
 static void
 isert_free_comps(struct isert_device *device)
 {
@@ -261,10 +256,8 @@ isert_free_comps(struct isert_device *device)
 	for (i = 0; i < device->comps_used; i++) {
 		struct isert_comp *comp = &device->comps[i];
 
-		if (comp->cq) {
-			cancel_work_sync(&comp->work);
-			ib_destroy_cq(comp->cq);
-		}
+		if (comp->cq)
+			ib_free_cq(comp->cq);
 	}
 	kfree(device->comps);
 }
@@ -293,28 +286,17 @@ isert_alloc_comps(struct isert_device *device)
 	max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
 
 	for (i = 0; i < device->comps_used; i++) {
-		struct ib_cq_init_attr cq_attr = {};
 		struct isert_comp *comp = &device->comps[i];
 
 		comp->device = device;
-		INIT_WORK(&comp->work, isert_cq_work);
-		cq_attr.cqe = max_cqe;
-		cq_attr.comp_vector = i;
-		comp->cq = ib_create_cq(device->ib_device,
-					isert_cq_callback,
-					isert_cq_event_callback,
-					(void *)comp,
-					&cq_attr);
+		comp->cq = ib_alloc_cq(device->ib_device, comp, max_cqe, i,
+				IB_POLL_WORKQUEUE);
 		if (IS_ERR(comp->cq)) {
 			isert_err("Unable to allocate cq\n");
 			ret = PTR_ERR(comp->cq);
 			comp->cq = NULL;
 			goto out_cq;
 		}
-
-		ret = ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP);
-		if (ret)
-			goto out_cq;
 	}
 
 	return 0;
@@ -582,7 +564,6 @@ isert_init_conn(struct isert_conn *isert_conn)
 	INIT_LIST_HEAD(&isert_conn->node);
 	init_completion(&isert_conn->login_comp);
 	init_completion(&isert_conn->login_req_comp);
-	init_completion(&isert_conn->wait);
 	kref_init(&isert_conn->kref);
 	mutex_init(&isert_conn->mutex);
 	spin_lock_init(&isert_conn->pool_lock);
@@ -596,11 +577,13 @@ isert_free_login_buf(struct isert_conn *isert_conn)
 	struct ib_device *ib_dev = isert_conn->device->ib_device;
 
 	ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
-			    ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
+			    ISER_RX_PAYLOAD_SIZE, DMA_TO_DEVICE);
+	kfree(isert_conn->login_rsp_buf);
+
 	ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
-			    ISCSI_DEF_MAX_RECV_SEG_LEN,
+			    ISER_RX_PAYLOAD_SIZE,
 			    DMA_FROM_DEVICE);
-	kfree(isert_conn->login_buf);
+	kfree(isert_conn->login_req_buf);
 }
 
 static int
@@ -609,50 +592,48 @@ isert_alloc_login_buf(struct isert_conn *isert_conn,
 {
 	int ret;
 
-	isert_conn->login_buf = kzalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
-					ISER_RX_LOGIN_SIZE, GFP_KERNEL);
-	if (!isert_conn->login_buf) {
+	isert_conn->login_req_buf = kzalloc(sizeof(*isert_conn->login_req_buf),
+			GFP_KERNEL);
+	if (!isert_conn->login_req_buf) {
 		isert_err("Unable to allocate isert_conn->login_buf\n");
 		return -ENOMEM;
 	}
 
-	isert_conn->login_req_buf = isert_conn->login_buf;
-	isert_conn->login_rsp_buf = isert_conn->login_buf +
-				    ISCSI_DEF_MAX_RECV_SEG_LEN;
-
-	isert_dbg("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n",
-		 isert_conn->login_buf, isert_conn->login_req_buf,
-		 isert_conn->login_rsp_buf);
-
 	isert_conn->login_req_dma = ib_dma_map_single(ib_dev,
-				(void *)isert_conn->login_req_buf,
-				ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE);
-
+				isert_conn->login_req_buf,
+				ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
 	ret = ib_dma_mapping_error(ib_dev, isert_conn->login_req_dma);
 	if (ret) {
 		isert_err("login_req_dma mapping error: %d\n", ret);
 		isert_conn->login_req_dma = 0;
-		goto out_login_buf;
+		goto out_free_login_req_buf;
 	}
 
-	isert_conn->login_rsp_dma = ib_dma_map_single(ib_dev,
-					(void *)isert_conn->login_rsp_buf,
-					ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
+	isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL);
+	if (!isert_conn->login_rsp_buf) {
+		isert_err("Unable to allocate isert_conn->login_rspbuf\n");
+		goto out_unmap_login_req_buf;
+	}
 
+	isert_conn->login_rsp_dma = ib_dma_map_single(ib_dev,
+					isert_conn->login_rsp_buf,
+					ISER_RX_PAYLOAD_SIZE, DMA_TO_DEVICE);
 	ret = ib_dma_mapping_error(ib_dev, isert_conn->login_rsp_dma);
 	if (ret) {
 		isert_err("login_rsp_dma mapping error: %d\n", ret);
 		isert_conn->login_rsp_dma = 0;
-		goto out_req_dma_map;
+		goto out_free_login_rsp_buf;
 	}
 
 	return 0;
 
-out_req_dma_map:
+out_free_login_rsp_buf:
+	kfree(isert_conn->login_rsp_buf);
+out_unmap_login_req_buf:
 	ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
-			    ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE);
-out_login_buf:
-	kfree(isert_conn->login_buf);
+			    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+out_free_login_req_buf:
+	kfree(isert_conn->login_req_buf);
 	return ret;
 }
 
@@ -726,7 +707,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 	if (ret)
 		goto out_conn_dev;
 
-	ret = isert_rdma_post_recvl(isert_conn);
+	ret = isert_login_post_recv(isert_conn);
 	if (ret)
 		goto out_conn_dev;
 
@@ -773,7 +754,7 @@ isert_connect_release(struct isert_conn *isert_conn)
 		ib_destroy_qp(isert_conn->qp);
 	}
 
-	if (isert_conn->login_buf)
+	if (isert_conn->login_req_buf)
 		isert_free_login_buf(isert_conn);
 
 	isert_device_put(device);
@@ -820,12 +801,30 @@ isert_put_conn(struct isert_conn *isert_conn)
 	kref_put(&isert_conn->kref, isert_release_kref);
 }
 
+static void
+isert_handle_unbound_conn(struct isert_conn *isert_conn)
+{
+	struct isert_np *isert_np = isert_conn->cm_id->context;
+
+	mutex_lock(&isert_np->mutex);
+	if (!list_empty(&isert_conn->node)) {
+		/*
+		 * This means iscsi doesn't know this connection
+		 * so schedule a cleanup ourselves
+		 */
+		list_del_init(&isert_conn->node);
+		isert_put_conn(isert_conn);
+		queue_work(isert_release_wq, &isert_conn->release_work);
+	}
+	mutex_unlock(&isert_np->mutex);
+}
+
 /**
  * isert_conn_terminate() - Initiate connection termination
  * @isert_conn: isert connection struct
  *
  * Notes:
- * In case the connection state is FULL_FEATURE, move state
+ * In case the connection state is BOUND, move state
  * to TEMINATING and start teardown sequence (rdma_disconnect).
  * In case the connection state is UP, complete flush as well.
  *
@@ -837,23 +836,16 @@ isert_conn_terminate(struct isert_conn *isert_conn)
 {
 	int err;
 
-	switch (isert_conn->state) {
-	case ISER_CONN_TERMINATING:
-		break;
-	case ISER_CONN_UP:
-	case ISER_CONN_FULL_FEATURE: /* FALLTHRU */
-		isert_info("Terminating conn %p state %d\n",
-			   isert_conn, isert_conn->state);
-		isert_conn->state = ISER_CONN_TERMINATING;
-		err = rdma_disconnect(isert_conn->cm_id);
-		if (err)
-			isert_warn("Failed rdma_disconnect isert_conn %p\n",
-				   isert_conn);
-		break;
-	default:
-		isert_warn("conn %p teminating in state %d\n",
-			   isert_conn, isert_conn->state);
-	}
+	if (isert_conn->state >= ISER_CONN_TERMINATING)
+		return;
+
+	isert_info("Terminating conn %p state %d\n",
+		   isert_conn, isert_conn->state);
+	isert_conn->state = ISER_CONN_TERMINATING;
+	err = rdma_disconnect(isert_conn->cm_id);
+	if (err)
+		isert_warn("Failed rdma_disconnect isert_conn %p\n",
+			   isert_conn);
 }
 
 static int
@@ -887,35 +879,27 @@ static int
 isert_disconnected_handler(struct rdma_cm_id *cma_id,
 			   enum rdma_cm_event_type event)
 {
-	struct isert_np *isert_np = cma_id->context;
-	struct isert_conn *isert_conn;
-	bool terminating = false;
-
-	if (isert_np->cm_id == cma_id)
-		return isert_np_cma_handler(cma_id->context, event);
-
-	isert_conn = cma_id->qp->qp_context;
+	struct isert_conn *isert_conn = cma_id->qp->qp_context;
 
 	mutex_lock(&isert_conn->mutex);
-	terminating = (isert_conn->state == ISER_CONN_TERMINATING);
-	isert_conn_terminate(isert_conn);
-	mutex_unlock(&isert_conn->mutex);
-
-	isert_info("conn %p completing wait\n", isert_conn);
-	complete(&isert_conn->wait);
-
-	if (terminating)
-		goto out;
-
-	mutex_lock(&isert_np->mutex);
-	if (!list_empty(&isert_conn->node)) {
-		list_del_init(&isert_conn->node);
-		isert_put_conn(isert_conn);
-		queue_work(isert_release_wq, &isert_conn->release_work);
+	switch (isert_conn->state) {
+	case ISER_CONN_TERMINATING:
+		break;
+	case ISER_CONN_UP:
+		isert_conn_terminate(isert_conn);
+		isert_wait4flush(isert_conn);
+		isert_handle_unbound_conn(isert_conn);
+		break;
+	case ISER_CONN_BOUND:
+	case ISER_CONN_FULL_FEATURE: /* FALLTHRU */
+		iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+		break;
+	default:
+		isert_warn("conn %p teminating in state %d\n",
+			   isert_conn, isert_conn->state);
 	}
-	mutex_unlock(&isert_np->mutex);
+	mutex_unlock(&isert_conn->mutex);
 
-out:
 	return 0;
 }
 
@@ -934,12 +918,16 @@ isert_connect_error(struct rdma_cm_id *cma_id)
 static int
 isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
+	struct isert_np *isert_np = cma_id->context;
 	int ret = 0;
 
 	isert_info("%s (%d): status %d id %p np %p\n",
 		   rdma_event_msg(event->event), event->event,
 		   event->status, cma_id, cma_id->context);
 
+	if (isert_np->cm_id == cma_id)
+		return isert_np_cma_handler(cma_id->context, event->event);
+
 	switch (event->event) {
 	case RDMA_CM_EVENT_CONNECT_REQUEST:
 		ret = isert_connect_request(cma_id, event);
@@ -977,7 +965,8 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count)
 
 	for (rx_wr = isert_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
 		rx_desc = &isert_conn->rx_descs[i];
-		rx_wr->wr_id = (uintptr_t)rx_desc;
+
+		rx_wr->wr_cqe = &rx_desc->rx_cqe;
 		rx_wr->sg_list = &rx_desc->rx_sg;
 		rx_wr->num_sge = 1;
 		rx_wr->next = rx_wr + 1;
@@ -985,13 +974,10 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count)
 	rx_wr--;
 	rx_wr->next = NULL; /* mark end of work requests list */
 
-	isert_conn->post_recv_buf_count += count;
 	ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr,
 			   &rx_wr_failed);
-	if (ret) {
+	if (ret)
 		isert_err("ib_post_recv() failed with ret: %d\n", ret);
-		isert_conn->post_recv_buf_count -= count;
-	}
 
 	return ret;
 }
@@ -1002,23 +988,20 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
 	struct ib_recv_wr *rx_wr_failed, rx_wr;
 	int ret;
 
-	rx_wr.wr_id = (uintptr_t)rx_desc;
+	rx_wr.wr_cqe = &rx_desc->rx_cqe;
 	rx_wr.sg_list = &rx_desc->rx_sg;
 	rx_wr.num_sge = 1;
 	rx_wr.next = NULL;
 
-	isert_conn->post_recv_buf_count++;
 	ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_failed);
-	if (ret) {
+	if (ret)
 		isert_err("ib_post_recv() failed with ret: %d\n", ret);
-		isert_conn->post_recv_buf_count--;
-	}
 
 	return ret;
 }
 
 static int
-isert_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
+isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
 {
 	struct ib_device *ib_dev = isert_conn->cm_id->device;
 	struct ib_send_wr send_wr, *send_wr_failed;
@@ -1027,8 +1010,10 @@ isert_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
 	ib_dma_sync_single_for_device(ib_dev, tx_desc->dma_addr,
 				      ISER_HEADERS_LEN, DMA_TO_DEVICE);
 
+	tx_desc->tx_cqe.done = isert_login_send_done;
+
 	send_wr.next	= NULL;
-	send_wr.wr_id	= (uintptr_t)tx_desc;
+	send_wr.wr_cqe	= &tx_desc->tx_cqe;
 	send_wr.sg_list	= tx_desc->tx_sg;
 	send_wr.num_sge	= tx_desc->num_sge;
 	send_wr.opcode	= IB_WR_SEND;
@@ -1056,7 +1041,6 @@ isert_create_send_desc(struct isert_conn *isert_conn,
 	tx_desc->iser_header.flags = ISCSI_CTRL;
 
 	tx_desc->num_sge = 1;
-	tx_desc->isert_cmd = isert_cmd;
 
 	if (tx_desc->tx_sg[0].lkey != device->pd->local_dma_lkey) {
 		tx_desc->tx_sg[0].lkey = device->pd->local_dma_lkey;
@@ -1097,8 +1081,9 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 {
 	struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc;
 
-	isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
-	send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
+	isert_cmd->iser_ib_op = ISER_IB_SEND;
+	tx_desc->tx_cqe.done = isert_send_done;
+	send_wr->wr_cqe = &tx_desc->tx_cqe;
 
 	if (isert_conn->snd_w_inv && isert_cmd->inv_rkey) {
 		send_wr->opcode  = IB_WR_SEND_WITH_INV;
@@ -1113,7 +1098,7 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 }
 
 static int
-isert_rdma_post_recvl(struct isert_conn *isert_conn)
+isert_login_post_recv(struct isert_conn *isert_conn)
 {
 	struct ib_recv_wr rx_wr, *rx_wr_fail;
 	struct ib_sge sge;
@@ -1121,23 +1106,22 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn)
 
 	memset(&sge, 0, sizeof(struct ib_sge));
 	sge.addr = isert_conn->login_req_dma;
-	sge.length = ISER_RX_LOGIN_SIZE;
+	sge.length = ISER_RX_PAYLOAD_SIZE;
 	sge.lkey = isert_conn->device->pd->local_dma_lkey;
 
 	isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n",
 		sge.addr, sge.length, sge.lkey);
 
+	isert_conn->login_req_buf->rx_cqe.done = isert_login_recv_done;
+
 	memset(&rx_wr, 0, sizeof(struct ib_recv_wr));
-	rx_wr.wr_id = (uintptr_t)isert_conn->login_req_buf;
+	rx_wr.wr_cqe = &isert_conn->login_req_buf->rx_cqe;
 	rx_wr.sg_list = &sge;
 	rx_wr.num_sge = 1;
 
-	isert_conn->post_recv_buf_count++;
 	ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_fail);
-	if (ret) {
+	if (ret)
 		isert_err("ib_post_recv() failed: %d\n", ret);
-		isert_conn->post_recv_buf_count--;
-	}
 
 	return ret;
 }
@@ -1203,12 +1187,12 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
 			goto post_send;
 		}
 
-		ret = isert_rdma_post_recvl(isert_conn);
+		ret = isert_login_post_recv(isert_conn);
 		if (ret)
 			return ret;
 	}
 post_send:
-	ret = isert_post_send(isert_conn, tx_desc);
+	ret = isert_login_post_send(isert_conn, tx_desc);
 	if (ret)
 		return ret;
 
@@ -1218,7 +1202,7 @@ post_send:
 static void
 isert_rx_login_req(struct isert_conn *isert_conn)
 {
-	struct iser_rx_desc *rx_desc = (void *)isert_conn->login_req_buf;
+	struct iser_rx_desc *rx_desc = isert_conn->login_req_buf;
 	int rx_buflen = isert_conn->login_req_len;
 	struct iscsi_conn *conn = isert_conn->conn;
 	struct iscsi_login *login = conn->conn_login;
@@ -1551,12 +1535,42 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
 }
 
 static void
-isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
+isert_print_wc(struct ib_wc *wc, const char *type)
+{
+	if (wc->status != IB_WC_WR_FLUSH_ERR)
+		isert_err("%s failure: %s (%d) vend_err %x\n", type,
+			  ib_wc_status_msg(wc->status), wc->status,
+			  wc->vendor_err);
+	else
+		isert_dbg("%s failure: %s (%d)\n", type,
+			  ib_wc_status_msg(wc->status), wc->status);
+}
+
+static void
+isert_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+	struct isert_conn *isert_conn = wc->qp->qp_context;
+	struct ib_device *ib_dev = isert_conn->cm_id->device;
+	struct iser_rx_desc *rx_desc = cqe_to_rx_desc(wc->wr_cqe);
+	struct iscsi_hdr *hdr = &rx_desc->iscsi_header;
 	struct iser_ctrl *iser_ctrl = &rx_desc->iser_header;
 	uint64_t read_va = 0, write_va = 0;
 	uint32_t read_stag = 0, write_stag = 0;
 
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		isert_print_wc(wc, "recv");
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+		return;
+	}
+
+	ib_dma_sync_single_for_cpu(ib_dev, rx_desc->dma_addr,
+			ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+
+	isert_dbg("DMA: 0x%llx, iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n",
+		 rx_desc->dma_addr, hdr->opcode, hdr->itt, hdr->flags,
+		 (int)(wc->byte_len - ISER_HEADERS_LEN));
+
 	switch (iser_ctrl->flags & 0xF0) {
 	case ISCSI_CTRL:
 		if (iser_ctrl->flags & ISER_RSV) {
@@ -1584,56 +1598,40 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
 
 	isert_rx_opcode(isert_conn, rx_desc,
 			read_stag, read_va, write_stag, write_va);
+
+	ib_dma_sync_single_for_device(ib_dev, rx_desc->dma_addr,
+			ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
 }
 
 static void
-isert_rcv_completion(struct iser_rx_desc *desc,
-		     struct isert_conn *isert_conn,
-		     u32 xfer_len)
+isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+	struct isert_conn *isert_conn = wc->qp->qp_context;
 	struct ib_device *ib_dev = isert_conn->cm_id->device;
-	struct iscsi_hdr *hdr;
-	u64 rx_dma;
-	int rx_buflen;
-
-	if ((char *)desc == isert_conn->login_req_buf) {
-		rx_dma = isert_conn->login_req_dma;
-		rx_buflen = ISER_RX_LOGIN_SIZE;
-		isert_dbg("login_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
-			 rx_dma, rx_buflen);
-	} else {
-		rx_dma = desc->dma_addr;
-		rx_buflen = ISER_RX_PAYLOAD_SIZE;
-		isert_dbg("req_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
-			 rx_dma, rx_buflen);
+
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		isert_print_wc(wc, "login recv");
+		return;
 	}
 
-	ib_dma_sync_single_for_cpu(ib_dev, rx_dma, rx_buflen, DMA_FROM_DEVICE);
+	ib_dma_sync_single_for_cpu(ib_dev, isert_conn->login_req_dma,
+			ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
 
-	hdr = &desc->iscsi_header;
-	isert_dbg("iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n",
-		 hdr->opcode, hdr->itt, hdr->flags,
-		 (int)(xfer_len - ISER_HEADERS_LEN));
+	isert_conn->login_req_len = wc->byte_len - ISER_HEADERS_LEN;
 
-	if ((char *)desc == isert_conn->login_req_buf) {
-		isert_conn->login_req_len = xfer_len - ISER_HEADERS_LEN;
-		if (isert_conn->conn) {
-			struct iscsi_login *login = isert_conn->conn->conn_login;
+	if (isert_conn->conn) {
+		struct iscsi_login *login = isert_conn->conn->conn_login;
 
-			if (login && !login->first_request)
-				isert_rx_login_req(isert_conn);
-		}
-		mutex_lock(&isert_conn->mutex);
-		complete(&isert_conn->login_req_comp);
-		mutex_unlock(&isert_conn->mutex);
-	} else {
-		isert_rx_do_work(desc, isert_conn);
+		if (login && !login->first_request)
+			isert_rx_login_req(isert_conn);
 	}
 
-	ib_dma_sync_single_for_device(ib_dev, rx_dma, rx_buflen,
-				      DMA_FROM_DEVICE);
+	mutex_lock(&isert_conn->mutex);
+	complete(&isert_conn->login_req_comp);
+	mutex_unlock(&isert_conn->mutex);
 
-	isert_conn->post_recv_buf_count--;
+	ib_dma_sync_single_for_device(ib_dev, isert_conn->login_req_dma,
+				ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
 }
 
 static int
@@ -1683,54 +1681,50 @@ isert_unmap_data_buf(struct isert_conn *isert_conn, struct isert_data_buf *data)
 static void
 isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
 {
-	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
-
 	isert_dbg("Cmd %p\n", isert_cmd);
 
-	if (wr->data.sg) {
+	if (isert_cmd->data.sg) {
 		isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
-		isert_unmap_data_buf(isert_conn, &wr->data);
+		isert_unmap_data_buf(isert_conn, &isert_cmd->data);
 	}
 
-	if (wr->rdma_wr) {
+	if (isert_cmd->rdma_wr) {
 		isert_dbg("Cmd %p free send_wr\n", isert_cmd);
-		kfree(wr->rdma_wr);
-		wr->rdma_wr = NULL;
+		kfree(isert_cmd->rdma_wr);
+		isert_cmd->rdma_wr = NULL;
 	}
 
-	if (wr->ib_sge) {
+	if (isert_cmd->ib_sge) {
 		isert_dbg("Cmd %p free ib_sge\n", isert_cmd);
-		kfree(wr->ib_sge);
-		wr->ib_sge = NULL;
+		kfree(isert_cmd->ib_sge);
+		isert_cmd->ib_sge = NULL;
 	}
 }
 
 static void
 isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
 {
-	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
-
 	isert_dbg("Cmd %p\n", isert_cmd);
 
-	if (wr->fr_desc) {
-		isert_dbg("Cmd %p free fr_desc %p\n", isert_cmd, wr->fr_desc);
-		if (wr->fr_desc->ind & ISERT_PROTECTED) {
-			isert_unmap_data_buf(isert_conn, &wr->prot);
-			wr->fr_desc->ind &= ~ISERT_PROTECTED;
+	if (isert_cmd->fr_desc) {
+		isert_dbg("Cmd %p free fr_desc %p\n", isert_cmd, isert_cmd->fr_desc);
+		if (isert_cmd->fr_desc->ind & ISERT_PROTECTED) {
+			isert_unmap_data_buf(isert_conn, &isert_cmd->prot);
+			isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
 		}
 		spin_lock_bh(&isert_conn->pool_lock);
-		list_add_tail(&wr->fr_desc->list, &isert_conn->fr_pool);
+		list_add_tail(&isert_cmd->fr_desc->list, &isert_conn->fr_pool);
 		spin_unlock_bh(&isert_conn->pool_lock);
-		wr->fr_desc = NULL;
+		isert_cmd->fr_desc = NULL;
 	}
 
-	if (wr->data.sg) {
+	if (isert_cmd->data.sg) {
 		isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
-		isert_unmap_data_buf(isert_conn, &wr->data);
+		isert_unmap_data_buf(isert_conn, &isert_cmd->data);
 	}
 
-	wr->ib_sge = NULL;
-	wr->rdma_wr = NULL;
+	isert_cmd->ib_sge = NULL;
+	isert_cmd->rdma_wr = NULL;
 }
 
 static void
@@ -1882,52 +1876,70 @@ fail_mr_status:
 }
 
 static void
-isert_completion_rdma_write(struct iser_tx_desc *tx_desc,
-			    struct isert_cmd *isert_cmd)
+isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
-	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
-	struct se_cmd *se_cmd = &cmd->se_cmd;
-	struct isert_conn *isert_conn = isert_cmd->conn;
+	struct isert_conn *isert_conn = wc->qp->qp_context;
 	struct isert_device *device = isert_conn->device;
+	struct iser_tx_desc *desc = cqe_to_tx_desc(wc->wr_cqe);
+	struct isert_cmd *isert_cmd = tx_desc_to_cmd(desc);
+	struct se_cmd *cmd = &isert_cmd->iscsi_cmd->se_cmd;
 	int ret = 0;
 
-	if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) {
-		ret = isert_check_pi_status(se_cmd,
-					    wr->fr_desc->pi_ctx->sig_mr);
-		wr->fr_desc->ind &= ~ISERT_PROTECTED;
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		isert_print_wc(wc, "rdma write");
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+		isert_completion_put(desc, isert_cmd, device->ib_device, true);
+		return;
+	}
+
+	isert_dbg("Cmd %p\n", isert_cmd);
+
+	if (isert_cmd->fr_desc && isert_cmd->fr_desc->ind & ISERT_PROTECTED) {
+		ret = isert_check_pi_status(cmd,
+				isert_cmd->fr_desc->pi_ctx->sig_mr);
+		isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
 	}
 
 	device->unreg_rdma_mem(isert_cmd, isert_conn);
-	wr->rdma_wr_num = 0;
+	isert_cmd->rdma_wr_num = 0;
 	if (ret)
-		transport_send_check_condition_and_sense(se_cmd,
-							 se_cmd->pi_err, 0);
+		transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0);
 	else
-		isert_put_response(isert_conn->conn, cmd);
+		isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd);
 }
 
 static void
-isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
-			   struct isert_cmd *isert_cmd)
+isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+	struct isert_conn *isert_conn = wc->qp->qp_context;
+	struct isert_device *device = isert_conn->device;
+	struct iser_tx_desc *desc = cqe_to_tx_desc(wc->wr_cqe);
+	struct isert_cmd *isert_cmd = tx_desc_to_cmd(desc);
 	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
 	struct se_cmd *se_cmd = &cmd->se_cmd;
-	struct isert_conn *isert_conn = isert_cmd->conn;
-	struct isert_device *device = isert_conn->device;
 	int ret = 0;
 
-	if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) {
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		isert_print_wc(wc, "rdma read");
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+		isert_completion_put(desc, isert_cmd, device->ib_device, true);
+		return;
+	}
+
+	isert_dbg("Cmd %p\n", isert_cmd);
+
+	if (isert_cmd->fr_desc && isert_cmd->fr_desc->ind & ISERT_PROTECTED) {
 		ret = isert_check_pi_status(se_cmd,
-					    wr->fr_desc->pi_ctx->sig_mr);
-		wr->fr_desc->ind &= ~ISERT_PROTECTED;
+					    isert_cmd->fr_desc->pi_ctx->sig_mr);
+		isert_cmd->fr_desc->ind &= ~ISERT_PROTECTED;
 	}
 
 	iscsit_stop_dataout_timer(cmd);
 	device->unreg_rdma_mem(isert_cmd, isert_conn);
-	cmd->write_data_done = wr->data.len;
-	wr->rdma_wr_num = 0;
+	cmd->write_data_done = isert_cmd->data.len;
+	isert_cmd->rdma_wr_num = 0;
 
 	isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
 	spin_lock_bh(&cmd->istate_lock);
@@ -1975,170 +1987,56 @@ isert_do_control_comp(struct work_struct *work)
 }
 
 static void
-isert_response_completion(struct iser_tx_desc *tx_desc,
-			  struct isert_cmd *isert_cmd,
-			  struct isert_conn *isert_conn,
-			  struct ib_device *ib_dev)
+isert_login_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
-	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
-
-	if (cmd->i_state == ISTATE_SEND_TASKMGTRSP ||
-	    cmd->i_state == ISTATE_SEND_LOGOUTRSP ||
-	    cmd->i_state == ISTATE_SEND_REJECT ||
-	    cmd->i_state == ISTATE_SEND_TEXTRSP) {
-		isert_unmap_tx_desc(tx_desc, ib_dev);
+	struct isert_conn *isert_conn = wc->qp->qp_context;
+	struct ib_device *ib_dev = isert_conn->cm_id->device;
+	struct iser_tx_desc *tx_desc = cqe_to_tx_desc(wc->wr_cqe);
 
-		INIT_WORK(&isert_cmd->comp_work, isert_do_control_comp);
-		queue_work(isert_comp_wq, &isert_cmd->comp_work);
-		return;
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		isert_print_wc(wc, "login send");
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
 	}
 
-	cmd->i_state = ISTATE_SENT_STATUS;
-	isert_completion_put(tx_desc, isert_cmd, ib_dev, false);
+	isert_unmap_tx_desc(tx_desc, ib_dev);
 }
 
 static void
-isert_snd_completion(struct iser_tx_desc *tx_desc,
-		      struct isert_conn *isert_conn)
+isert_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
+	struct isert_conn *isert_conn = wc->qp->qp_context;
 	struct ib_device *ib_dev = isert_conn->cm_id->device;
-	struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
-	struct isert_rdma_wr *wr;
+	struct iser_tx_desc *tx_desc = cqe_to_tx_desc(wc->wr_cqe);
+	struct isert_cmd *isert_cmd = tx_desc_to_cmd(tx_desc);
 
-	if (!isert_cmd) {
-		isert_unmap_tx_desc(tx_desc, ib_dev);
+	if (unlikely(wc->status != IB_WC_SUCCESS)) {
+		isert_print_wc(wc, "send");
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+		isert_completion_put(tx_desc, isert_cmd, ib_dev, true);
 		return;
 	}
-	wr = &isert_cmd->rdma_wr;
 
-	isert_dbg("Cmd %p iser_ib_op %d\n", isert_cmd, wr->iser_ib_op);
+	isert_dbg("Cmd %p\n", isert_cmd);
 
-	switch (wr->iser_ib_op) {
-	case ISER_IB_SEND:
-		isert_response_completion(tx_desc, isert_cmd,
-					  isert_conn, ib_dev);
-		break;
-	case ISER_IB_RDMA_WRITE:
-		isert_completion_rdma_write(tx_desc, isert_cmd);
-		break;
-	case ISER_IB_RDMA_READ:
-		isert_completion_rdma_read(tx_desc, isert_cmd);
-		break;
+	switch (isert_cmd->iscsi_cmd->i_state) {
+	case ISTATE_SEND_TASKMGTRSP:
+	case ISTATE_SEND_LOGOUTRSP:
+	case ISTATE_SEND_REJECT:
+	case ISTATE_SEND_TEXTRSP:
+		isert_unmap_tx_desc(tx_desc, ib_dev);
+
+		INIT_WORK(&isert_cmd->comp_work, isert_do_control_comp);
+		queue_work(isert_comp_wq, &isert_cmd->comp_work);
+		return;
 	default:
-		isert_err("Unknown wr->iser_ib_op: 0x%x\n", wr->iser_ib_op);
-		dump_stack();
+		isert_cmd->iscsi_cmd->i_state = ISTATE_SENT_STATUS;
+		isert_completion_put(tx_desc, isert_cmd, ib_dev, false);
 		break;
 	}
 }
 
-/**
- * is_isert_tx_desc() - Indicate if the completion wr_id
- *     is a TX descriptor or not.
- * @isert_conn: iser connection
- * @wr_id: completion WR identifier
- *
- * Since we cannot rely on wc opcode in FLUSH errors
- * we must work around it by checking if the wr_id address
- * falls in the iser connection rx_descs buffer. If so
- * it is an RX descriptor, otherwize it is a TX.
- */
-static inline bool
-is_isert_tx_desc(struct isert_conn *isert_conn, void *wr_id)
-{
-	void *start = isert_conn->rx_descs;
-	int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->rx_descs);
-
-	if (wr_id >= start && wr_id < start + len)
-		return false;
-
-	return true;
-}
-
-static void
-isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc)
-{
-	if (wc->wr_id == ISER_BEACON_WRID) {
-		isert_info("conn %p completing wait_comp_err\n",
-			   isert_conn);
-		complete(&isert_conn->wait_comp_err);
-	} else if (is_isert_tx_desc(isert_conn, (void *)(uintptr_t)wc->wr_id)) {
-		struct ib_device *ib_dev = isert_conn->cm_id->device;
-		struct isert_cmd *isert_cmd;
-		struct iser_tx_desc *desc;
-
-		desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
-		isert_cmd = desc->isert_cmd;
-		if (!isert_cmd)
-			isert_unmap_tx_desc(desc, ib_dev);
-		else
-			isert_completion_put(desc, isert_cmd, ib_dev, true);
-	} else {
-		isert_conn->post_recv_buf_count--;
-		if (!isert_conn->post_recv_buf_count)
-			iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
-	}
-}
-
-static void
-isert_handle_wc(struct ib_wc *wc)
-{
-	struct isert_conn *isert_conn;
-	struct iser_tx_desc *tx_desc;
-	struct iser_rx_desc *rx_desc;
-
-	isert_conn = wc->qp->qp_context;
-	if (likely(wc->status == IB_WC_SUCCESS)) {
-		if (wc->opcode == IB_WC_RECV) {
-			rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
-			isert_rcv_completion(rx_desc, isert_conn, wc->byte_len);
-		} else {
-			tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
-			isert_snd_completion(tx_desc, isert_conn);
-		}
-	} else {
-		if (wc->status != IB_WC_WR_FLUSH_ERR)
-			isert_err("%s (%d): wr id %llx vend_err %x\n",
-				  ib_wc_status_msg(wc->status), wc->status,
-				  wc->wr_id, wc->vendor_err);
-		else
-			isert_dbg("%s (%d): wr id %llx\n",
-				  ib_wc_status_msg(wc->status), wc->status,
-				  wc->wr_id);
-
-		if (wc->wr_id != ISER_FASTREG_LI_WRID)
-			isert_cq_comp_err(isert_conn, wc);
-	}
-}
-
-static void
-isert_cq_work(struct work_struct *work)
-{
-	enum { isert_poll_budget = 65536 };
-	struct isert_comp *comp = container_of(work, struct isert_comp,
-					       work);
-	struct ib_wc *const wcs = comp->wcs;
-	int i, n, completed = 0;
-
-	while ((n = ib_poll_cq(comp->cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
-		for (i = 0; i < n; i++)
-			isert_handle_wc(&wcs[i]);
-
-		completed += n;
-		if (completed >= isert_poll_budget)
-			break;
-	}
-
-	ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP);
-}
-
-static void
-isert_cq_callback(struct ib_cq *cq, void *context)
-{
-	struct isert_comp *comp = context;
-
-	queue_work(isert_comp_wq, &comp->work);
-}
-
 static int
 isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
 {
@@ -2395,7 +2293,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 	page_off = offset % PAGE_SIZE;
 
 	rdma_wr->wr.sg_list = ib_sge;
-	rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc;
+	rdma_wr->wr.wr_cqe = &isert_cmd->tx_desc.tx_cqe;
+
 	/*
 	 * Perform mapping of TCM scatterlist memory ib_sge dma_addr.
 	 */
@@ -2428,24 +2327,23 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 }
 
 static int
-isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
-	       struct isert_rdma_wr *wr)
+isert_map_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn)
 {
+	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
 	struct se_cmd *se_cmd = &cmd->se_cmd;
-	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
 	struct isert_conn *isert_conn = conn->context;
-	struct isert_data_buf *data = &wr->data;
+	struct isert_data_buf *data = &isert_cmd->data;
 	struct ib_rdma_wr *rdma_wr;
 	struct ib_sge *ib_sge;
 	u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
 	int ret = 0, i, ib_sge_cnt;
 
-	isert_cmd->tx_desc.isert_cmd = isert_cmd;
-
-	offset = wr->iser_ib_op == ISER_IB_RDMA_READ ? cmd->write_data_done : 0;
+	offset = isert_cmd->iser_ib_op == ISER_IB_RDMA_READ ?
+			cmd->write_data_done : 0;
 	ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg,
 				 se_cmd->t_data_nents, se_cmd->data_length,
-				 offset, wr->iser_ib_op, &wr->data);
+				 offset, isert_cmd->iser_ib_op,
+				 &isert_cmd->data);
 	if (ret)
 		return ret;
 
@@ -2458,41 +2356,44 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 		ret = -ENOMEM;
 		goto unmap_cmd;
 	}
-	wr->ib_sge = ib_sge;
+	isert_cmd->ib_sge = ib_sge;
 
-	wr->rdma_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
-	wr->rdma_wr = kzalloc(sizeof(struct ib_rdma_wr) * wr->rdma_wr_num,
-				GFP_KERNEL);
-	if (!wr->rdma_wr) {
-		isert_dbg("Unable to allocate wr->rdma_wr\n");
+	isert_cmd->rdma_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
+	isert_cmd->rdma_wr = kzalloc(sizeof(struct ib_rdma_wr) *
+			isert_cmd->rdma_wr_num, GFP_KERNEL);
+	if (!isert_cmd->rdma_wr) {
+		isert_dbg("Unable to allocate isert_cmd->rdma_wr\n");
 		ret = -ENOMEM;
 		goto unmap_cmd;
 	}
 
-	wr->isert_cmd = isert_cmd;
 	rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
 
-	for (i = 0; i < wr->rdma_wr_num; i++) {
-		rdma_wr = &isert_cmd->rdma_wr.rdma_wr[i];
+	for (i = 0; i < isert_cmd->rdma_wr_num; i++) {
+		rdma_wr = &isert_cmd->rdma_wr[i];
 		data_len = min(data_left, rdma_write_max);
 
 		rdma_wr->wr.send_flags = 0;
-		if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+		if (isert_cmd->iser_ib_op == ISER_IB_RDMA_WRITE) {
+			isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done;
+
 			rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
 			rdma_wr->remote_addr = isert_cmd->read_va + offset;
 			rdma_wr->rkey = isert_cmd->read_stag;
-			if (i + 1 == wr->rdma_wr_num)
+			if (i + 1 == isert_cmd->rdma_wr_num)
 				rdma_wr->wr.next = &isert_cmd->tx_desc.send_wr;
 			else
-				rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr;
+				rdma_wr->wr.next = &isert_cmd->rdma_wr[i + 1].wr;
 		} else {
+			isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
+
 			rdma_wr->wr.opcode = IB_WR_RDMA_READ;
 			rdma_wr->remote_addr = isert_cmd->write_va + va_offset;
 			rdma_wr->rkey = isert_cmd->write_stag;
-			if (i + 1 == wr->rdma_wr_num)
+			if (i + 1 == isert_cmd->rdma_wr_num)
 				rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
 			else
-				rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr;
+				rdma_wr->wr.next = &isert_cmd->rdma_wr[i + 1].wr;
 		}
 
 		ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
@@ -2517,7 +2418,7 @@ isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
 	u32 rkey;
 
 	memset(inv_wr, 0, sizeof(*inv_wr));
-	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+	inv_wr->wr_cqe = NULL;
 	inv_wr->opcode = IB_WR_LOCAL_INV;
 	inv_wr->ex.invalidate_rkey = mr->rkey;
 
@@ -2573,7 +2474,7 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
 
 	reg_wr.wr.next = NULL;
 	reg_wr.wr.opcode = IB_WR_REG_MR;
-	reg_wr.wr.wr_id = ISER_FASTREG_LI_WRID;
+	reg_wr.wr.wr_cqe = NULL;
 	reg_wr.wr.send_flags = 0;
 	reg_wr.wr.num_sge = 0;
 	reg_wr.mr = mr;
@@ -2660,10 +2561,10 @@ isert_set_prot_checks(u8 prot_checks)
 
 static int
 isert_reg_sig_mr(struct isert_conn *isert_conn,
-		 struct se_cmd *se_cmd,
-		 struct isert_rdma_wr *rdma_wr,
+		 struct isert_cmd *isert_cmd,
 		 struct fast_reg_descriptor *fr_desc)
 {
+	struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
 	struct ib_sig_handover_wr sig_wr;
 	struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
 	struct pi_context *pi_ctx = fr_desc->pi_ctx;
@@ -2684,14 +2585,14 @@ isert_reg_sig_mr(struct isert_conn *isert_conn,
 
 	memset(&sig_wr, 0, sizeof(sig_wr));
 	sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
-	sig_wr.wr.wr_id = ISER_FASTREG_LI_WRID;
-	sig_wr.wr.sg_list = &rdma_wr->ib_sg[DATA];
+	sig_wr.wr.wr_cqe = NULL;
+	sig_wr.wr.sg_list = &isert_cmd->ib_sg[DATA];
 	sig_wr.wr.num_sge = 1;
 	sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
 	sig_wr.sig_attrs = &sig_attrs;
 	sig_wr.sig_mr = pi_ctx->sig_mr;
 	if (se_cmd->t_prot_sg)
-		sig_wr.prot = &rdma_wr->ib_sg[PROT];
+		sig_wr.prot = &isert_cmd->ib_sg[PROT];
 
 	if (!wr)
 		wr = &sig_wr.wr;
@@ -2705,35 +2606,34 @@ isert_reg_sig_mr(struct isert_conn *isert_conn,
 	}
 	fr_desc->ind &= ~ISERT_SIG_KEY_VALID;
 
-	rdma_wr->ib_sg[SIG].lkey = pi_ctx->sig_mr->lkey;
-	rdma_wr->ib_sg[SIG].addr = 0;
-	rdma_wr->ib_sg[SIG].length = se_cmd->data_length;
+	isert_cmd->ib_sg[SIG].lkey = pi_ctx->sig_mr->lkey;
+	isert_cmd->ib_sg[SIG].addr = 0;
+	isert_cmd->ib_sg[SIG].length = se_cmd->data_length;
 	if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP &&
 	    se_cmd->prot_op != TARGET_PROT_DOUT_INSERT)
 		/*
 		 * We have protection guards on the wire
 		 * so we need to set a larget transfer
 		 */
-		rdma_wr->ib_sg[SIG].length += se_cmd->prot_length;
+		isert_cmd->ib_sg[SIG].length += se_cmd->prot_length;
 
 	isert_dbg("sig_sge: addr: 0x%llx  length: %u lkey: %x\n",
-		  rdma_wr->ib_sg[SIG].addr, rdma_wr->ib_sg[SIG].length,
-		  rdma_wr->ib_sg[SIG].lkey);
+		  isert_cmd->ib_sg[SIG].addr, isert_cmd->ib_sg[SIG].length,
+		  isert_cmd->ib_sg[SIG].lkey);
 err:
 	return ret;
 }
 
 static int
 isert_handle_prot_cmd(struct isert_conn *isert_conn,
-		      struct isert_cmd *isert_cmd,
-		      struct isert_rdma_wr *wr)
+		      struct isert_cmd *isert_cmd)
 {
 	struct isert_device *device = isert_conn->device;
 	struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
 	int ret;
 
-	if (!wr->fr_desc->pi_ctx) {
-		ret = isert_create_pi_ctx(wr->fr_desc,
+	if (!isert_cmd->fr_desc->pi_ctx) {
+		ret = isert_create_pi_ctx(isert_cmd->fr_desc,
 					  device->ib_device,
 					  device->pd);
 		if (ret) {
@@ -2748,16 +2648,20 @@ isert_handle_prot_cmd(struct isert_conn *isert_conn,
 					 se_cmd->t_prot_sg,
 					 se_cmd->t_prot_nents,
 					 se_cmd->prot_length,
-					 0, wr->iser_ib_op, &wr->prot);
+					 0,
+					 isert_cmd->iser_ib_op,
+					 &isert_cmd->prot);
 		if (ret) {
 			isert_err("conn %p failed to map protection buffer\n",
 				  isert_conn);
 			return ret;
 		}
 
-		memset(&wr->ib_sg[PROT], 0, sizeof(wr->ib_sg[PROT]));
-		ret = isert_fast_reg_mr(isert_conn, wr->fr_desc, &wr->prot,
-					ISERT_PROT_KEY_VALID, &wr->ib_sg[PROT]);
+		memset(&isert_cmd->ib_sg[PROT], 0, sizeof(isert_cmd->ib_sg[PROT]));
+		ret = isert_fast_reg_mr(isert_conn, isert_cmd->fr_desc,
+					&isert_cmd->prot,
+					ISERT_PROT_KEY_VALID,
+					&isert_cmd->ib_sg[PROT]);
 		if (ret) {
 			isert_err("conn %p failed to fast reg mr\n",
 				  isert_conn);
@@ -2765,29 +2669,28 @@ isert_handle_prot_cmd(struct isert_conn *isert_conn,
 		}
 	}
 
-	ret = isert_reg_sig_mr(isert_conn, se_cmd, wr, wr->fr_desc);
+	ret = isert_reg_sig_mr(isert_conn, isert_cmd, isert_cmd->fr_desc);
 	if (ret) {
 		isert_err("conn %p failed to fast reg mr\n",
 			  isert_conn);
 		goto unmap_prot_cmd;
 	}
-	wr->fr_desc->ind |= ISERT_PROTECTED;
+	isert_cmd->fr_desc->ind |= ISERT_PROTECTED;
 
 	return 0;
 
 unmap_prot_cmd:
 	if (se_cmd->t_prot_sg)
-		isert_unmap_data_buf(isert_conn, &wr->prot);
+		isert_unmap_data_buf(isert_conn, &isert_cmd->prot);
 
 	return ret;
 }
 
 static int
-isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
-	       struct isert_rdma_wr *wr)
+isert_reg_rdma(struct isert_cmd *isert_cmd, struct iscsi_conn *conn)
 {
+	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
 	struct se_cmd *se_cmd = &cmd->se_cmd;
-	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
 	struct isert_conn *isert_conn = conn->context;
 	struct fast_reg_descriptor *fr_desc = NULL;
 	struct ib_rdma_wr *rdma_wr;
@@ -2796,57 +2699,61 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	int ret = 0;
 	unsigned long flags;
 
-	isert_cmd->tx_desc.isert_cmd = isert_cmd;
-
-	offset = wr->iser_ib_op == ISER_IB_RDMA_READ ? cmd->write_data_done : 0;
+	offset = isert_cmd->iser_ib_op == ISER_IB_RDMA_READ ?
+			cmd->write_data_done : 0;
 	ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg,
 				 se_cmd->t_data_nents, se_cmd->data_length,
-				 offset, wr->iser_ib_op, &wr->data);
+				 offset, isert_cmd->iser_ib_op,
+				 &isert_cmd->data);
 	if (ret)
 		return ret;
 
-	if (wr->data.dma_nents != 1 || isert_prot_cmd(isert_conn, se_cmd)) {
+	if (isert_cmd->data.dma_nents != 1 ||
+	    isert_prot_cmd(isert_conn, se_cmd)) {
 		spin_lock_irqsave(&isert_conn->pool_lock, flags);
 		fr_desc = list_first_entry(&isert_conn->fr_pool,
 					   struct fast_reg_descriptor, list);
 		list_del(&fr_desc->list);
 		spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
-		wr->fr_desc = fr_desc;
+		isert_cmd->fr_desc = fr_desc;
 	}
 
-	ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->data,
-				ISERT_DATA_KEY_VALID, &wr->ib_sg[DATA]);
+	ret = isert_fast_reg_mr(isert_conn, fr_desc, &isert_cmd->data,
+				ISERT_DATA_KEY_VALID, &isert_cmd->ib_sg[DATA]);
 	if (ret)
 		goto unmap_cmd;
 
 	if (isert_prot_cmd(isert_conn, se_cmd)) {
-		ret = isert_handle_prot_cmd(isert_conn, isert_cmd, wr);
+		ret = isert_handle_prot_cmd(isert_conn, isert_cmd);
 		if (ret)
 			goto unmap_cmd;
 
-		ib_sg = &wr->ib_sg[SIG];
+		ib_sg = &isert_cmd->ib_sg[SIG];
 	} else {
-		ib_sg = &wr->ib_sg[DATA];
+		ib_sg = &isert_cmd->ib_sg[DATA];
 	}
 
-	memcpy(&wr->s_ib_sge, ib_sg, sizeof(*ib_sg));
-	wr->ib_sge = &wr->s_ib_sge;
-	wr->rdma_wr_num = 1;
-	memset(&wr->s_rdma_wr, 0, sizeof(wr->s_rdma_wr));
-	wr->rdma_wr = &wr->s_rdma_wr;
-	wr->isert_cmd = isert_cmd;
+	memcpy(&isert_cmd->s_ib_sge, ib_sg, sizeof(*ib_sg));
+	isert_cmd->ib_sge = &isert_cmd->s_ib_sge;
+	isert_cmd->rdma_wr_num = 1;
+	memset(&isert_cmd->s_rdma_wr, 0, sizeof(isert_cmd->s_rdma_wr));
+	isert_cmd->rdma_wr = &isert_cmd->s_rdma_wr;
 
-	rdma_wr = &isert_cmd->rdma_wr.s_rdma_wr;
-	rdma_wr->wr.sg_list = &wr->s_ib_sge;
+	rdma_wr = &isert_cmd->s_rdma_wr;
+	rdma_wr->wr.sg_list = &isert_cmd->s_ib_sge;
 	rdma_wr->wr.num_sge = 1;
-	rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc;
-	if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+	rdma_wr->wr.wr_cqe = &isert_cmd->tx_desc.tx_cqe;
+	if (isert_cmd->iser_ib_op == ISER_IB_RDMA_WRITE) {
+		isert_cmd->tx_desc.tx_cqe.done = isert_rdma_write_done;
+
 		rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
 		rdma_wr->remote_addr = isert_cmd->read_va;
 		rdma_wr->rkey = isert_cmd->read_stag;
 		rdma_wr->wr.send_flags = !isert_prot_cmd(isert_conn, se_cmd) ?
 				      0 : IB_SEND_SIGNALED;
 	} else {
+		isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
+
 		rdma_wr->wr.opcode = IB_WR_RDMA_READ;
 		rdma_wr->remote_addr = isert_cmd->write_va;
 		rdma_wr->rkey = isert_cmd->write_stag;
@@ -2861,7 +2768,7 @@ unmap_cmd:
 		list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
 		spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
 	}
-	isert_unmap_data_buf(isert_conn, &wr->data);
+	isert_unmap_data_buf(isert_conn, &isert_cmd->data);
 
 	return ret;
 }
@@ -2871,7 +2778,6 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 {
 	struct se_cmd *se_cmd = &cmd->se_cmd;
 	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
 	struct isert_conn *isert_conn = conn->context;
 	struct isert_device *device = isert_conn->device;
 	struct ib_send_wr *wr_failed;
@@ -2880,8 +2786,8 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 	isert_dbg("Cmd: %p RDMA_WRITE data_length: %u\n",
 		 isert_cmd, se_cmd->data_length);
 
-	wr->iser_ib_op = ISER_IB_RDMA_WRITE;
-	rc = device->reg_rdma_mem(conn, cmd, wr);
+	isert_cmd->iser_ib_op = ISER_IB_RDMA_WRITE;
+	rc = device->reg_rdma_mem(isert_cmd, conn);
 	if (rc) {
 		isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
 		return rc;
@@ -2898,8 +2804,8 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 		isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
 		isert_init_send_wr(isert_conn, isert_cmd,
 				   &isert_cmd->tx_desc.send_wr);
-		isert_cmd->rdma_wr.s_rdma_wr.wr.next = &isert_cmd->tx_desc.send_wr;
-		wr->rdma_wr_num += 1;
+		isert_cmd->s_rdma_wr.wr.next = &isert_cmd->tx_desc.send_wr;
+		isert_cmd->rdma_wr_num += 1;
 
 		rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
 		if (rc) {
@@ -2908,7 +2814,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 		}
 	}
 
-	rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed);
+	rc = ib_post_send(isert_conn->qp, &isert_cmd->rdma_wr->wr, &wr_failed);
 	if (rc)
 		isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
 
@@ -2927,7 +2833,6 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
 {
 	struct se_cmd *se_cmd = &cmd->se_cmd;
 	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
 	struct isert_conn *isert_conn = conn->context;
 	struct isert_device *device = isert_conn->device;
 	struct ib_send_wr *wr_failed;
@@ -2935,14 +2840,14 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
 
 	isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
 		 isert_cmd, se_cmd->data_length, cmd->write_data_done);
-	wr->iser_ib_op = ISER_IB_RDMA_READ;
-	rc = device->reg_rdma_mem(conn, cmd, wr);
+	isert_cmd->iser_ib_op = ISER_IB_RDMA_READ;
+	rc = device->reg_rdma_mem(isert_cmd, conn);
 	if (rc) {
 		isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
 		return rc;
 	}
 
-	rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed);
+	rc = ib_post_send(isert_conn->qp, &isert_cmd->rdma_wr->wr, &wr_failed);
 	if (rc)
 		isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
 
@@ -3214,6 +3119,7 @@ accept_wait:
 
 	conn->context = isert_conn;
 	isert_conn->conn = conn;
+	isert_conn->state = ISER_CONN_BOUND;
 
 	isert_set_conn_info(np, conn, isert_conn);
 
@@ -3274,8 +3180,6 @@ static void isert_release_work(struct work_struct *work)
 
 	isert_info("Starting release conn %p\n", isert_conn);
 
-	wait_for_completion(&isert_conn->wait);
-
 	mutex_lock(&isert_conn->mutex);
 	isert_conn->state = ISER_CONN_DOWN;
 	mutex_unlock(&isert_conn->mutex);
@@ -3310,14 +3214,26 @@ isert_wait4cmds(struct iscsi_conn *conn)
 }
 
 static void
+isert_beacon_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct isert_conn *isert_conn = wc->qp->qp_context;
+
+	isert_print_wc(wc, "beacon");
+
+	isert_info("conn %p completing wait_comp_err\n", isert_conn);
+	complete(&isert_conn->wait_comp_err);
+}
+
+static void
 isert_wait4flush(struct isert_conn *isert_conn)
 {
 	struct ib_recv_wr *bad_wr;
+	static struct ib_cqe cqe = { .done = isert_beacon_done };
 
 	isert_info("conn %p\n", isert_conn);
 
 	init_completion(&isert_conn->wait_comp_err);
-	isert_conn->beacon.wr_id = ISER_BEACON_WRID;
+	isert_conn->beacon.wr_cqe = &cqe;
 	/* post an indication that all flush errors were consumed */
 	if (ib_post_recv(isert_conn->qp, &isert_conn->beacon, &bad_wr)) {
 		isert_err("conn %p failed to post beacon", isert_conn);
@@ -3369,14 +3285,6 @@ static void isert_wait_conn(struct iscsi_conn *conn)
 	isert_info("Starting conn %p\n", isert_conn);
 
 	mutex_lock(&isert_conn->mutex);
-	/*
-	 * Only wait for wait_comp_err if the isert_conn made it
-	 * into full feature phase..
-	 */
-	if (isert_conn->state == ISER_CONN_INIT) {
-		mutex_unlock(&isert_conn->mutex);
-		return;
-	}
 	isert_conn_terminate(isert_conn);
 	mutex_unlock(&isert_conn->mutex);
 
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 8d50453eef66..192788a4820c 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -36,9 +36,7 @@
 /* Constant PDU lengths calculations */
 #define ISER_HEADERS_LEN	(sizeof(struct iser_ctrl) + \
 				 sizeof(struct iscsi_hdr))
-#define ISER_RECV_DATA_SEG_LEN	8192
-#define ISER_RX_PAYLOAD_SIZE	(ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
-#define ISER_RX_LOGIN_SIZE	(ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
+#define ISER_RX_PAYLOAD_SIZE	(ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
 
 /* QP settings */
 /* Maximal bounds on received asynchronous PDUs */
@@ -62,12 +60,11 @@
 				ISERT_MAX_TX_MISC_PDUS	+ \
 				ISERT_MAX_RX_MISC_PDUS)
 
-#define ISER_RX_PAD_SIZE	(ISER_RECV_DATA_SEG_LEN + 4096 - \
-		(ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
+#define ISER_RX_PAD_SIZE	(ISCSI_DEF_MAX_RECV_SEG_LEN + 4096 - \
+		(ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \
+		 sizeof(struct ib_cqe)))
 
 #define ISCSI_ISER_SG_TABLESIZE		256
-#define ISER_FASTREG_LI_WRID		0xffffffffffffffffULL
-#define ISER_BEACON_WRID               0xfffffffffffffffeULL
 
 enum isert_desc_type {
 	ISCSI_TX_CONTROL,
@@ -84,6 +81,7 @@ enum iser_ib_op_code {
 enum iser_conn_state {
 	ISER_CONN_INIT,
 	ISER_CONN_UP,
+	ISER_CONN_BOUND,
 	ISER_CONN_FULL_FEATURE,
 	ISER_CONN_TERMINATING,
 	ISER_CONN_DOWN,
@@ -92,23 +90,35 @@ enum iser_conn_state {
 struct iser_rx_desc {
 	struct iser_ctrl iser_header;
 	struct iscsi_hdr iscsi_header;
-	char		data[ISER_RECV_DATA_SEG_LEN];
+	char		data[ISCSI_DEF_MAX_RECV_SEG_LEN];
 	u64		dma_addr;
 	struct ib_sge	rx_sg;
+	struct ib_cqe	rx_cqe;
 	char		pad[ISER_RX_PAD_SIZE];
 } __packed;
 
+static inline struct iser_rx_desc *cqe_to_rx_desc(struct ib_cqe *cqe)
+{
+	return container_of(cqe, struct iser_rx_desc, rx_cqe);
+}
+
 struct iser_tx_desc {
 	struct iser_ctrl iser_header;
 	struct iscsi_hdr iscsi_header;
 	enum isert_desc_type type;
 	u64		dma_addr;
 	struct ib_sge	tx_sg[2];
+	struct ib_cqe	tx_cqe;
 	int		num_sge;
-	struct isert_cmd *isert_cmd;
 	struct ib_send_wr send_wr;
 } __packed;
 
+static inline struct iser_tx_desc *cqe_to_tx_desc(struct ib_cqe *cqe)
+{
+	return container_of(cqe, struct iser_tx_desc, tx_cqe);
+}
+
+
 enum isert_indicator {
 	ISERT_PROTECTED		= 1 << 0,
 	ISERT_DATA_KEY_VALID	= 1 << 1,
@@ -144,20 +154,6 @@ enum {
 	SIG = 2,
 };
 
-struct isert_rdma_wr {
-	struct isert_cmd	*isert_cmd;
-	enum iser_ib_op_code	iser_ib_op;
-	struct ib_sge		*ib_sge;
-	struct ib_sge		s_ib_sge;
-	int			rdma_wr_num;
-	struct ib_rdma_wr	*rdma_wr;
-	struct ib_rdma_wr	s_rdma_wr;
-	struct ib_sge		ib_sg[3];
-	struct isert_data_buf	data;
-	struct isert_data_buf	prot;
-	struct fast_reg_descriptor *fr_desc;
-};
-
 struct isert_cmd {
 	uint32_t		read_stag;
 	uint32_t		write_stag;
@@ -170,22 +166,34 @@ struct isert_cmd {
 	struct iscsi_cmd	*iscsi_cmd;
 	struct iser_tx_desc	tx_desc;
 	struct iser_rx_desc	*rx_desc;
-	struct isert_rdma_wr	rdma_wr;
+	enum iser_ib_op_code	iser_ib_op;
+	struct ib_sge		*ib_sge;
+	struct ib_sge		s_ib_sge;
+	int			rdma_wr_num;
+	struct ib_rdma_wr	*rdma_wr;
+	struct ib_rdma_wr	s_rdma_wr;
+	struct ib_sge		ib_sg[3];
+	struct isert_data_buf	data;
+	struct isert_data_buf	prot;
+	struct fast_reg_descriptor *fr_desc;
 	struct work_struct	comp_work;
 	struct scatterlist	sg;
 };
 
+static inline struct isert_cmd *tx_desc_to_cmd(struct iser_tx_desc *desc)
+{
+	return container_of(desc, struct isert_cmd, tx_desc);
+}
+
 struct isert_device;
 
 struct isert_conn {
 	enum iser_conn_state	state;
-	int			post_recv_buf_count;
 	u32			responder_resources;
 	u32			initiator_depth;
 	bool			pi_support;
 	u32			max_sge;
-	char			*login_buf;
-	char			*login_req_buf;
+	struct iser_rx_desc	*login_req_buf;
 	char			*login_rsp_buf;
 	u64			login_req_dma;
 	int			login_req_len;
@@ -201,7 +209,6 @@ struct isert_conn {
 	struct ib_qp		*qp;
 	struct isert_device	*device;
 	struct mutex		mutex;
-	struct completion	wait;
 	struct completion	wait_comp_err;
 	struct kref		kref;
 	struct list_head	fr_pool;
@@ -221,17 +228,13 @@ struct isert_conn {
  *
  * @device:     pointer to device handle
  * @cq:         completion queue
- * @wcs:        work completion array
  * @active_qps: Number of active QPs attached
  *              to completion context
- * @work:       completion work handle
  */
 struct isert_comp {
 	struct isert_device     *device;
 	struct ib_cq		*cq;
-	struct ib_wc		 wcs[16];
 	int                      active_qps;
-	struct work_struct	 work;
 };
 
 struct isert_device {
@@ -243,9 +246,8 @@ struct isert_device {
 	struct isert_comp	*comps;
 	int                     comps_used;
 	struct list_head	dev_node;
-	int			(*reg_rdma_mem)(struct iscsi_conn *conn,
-						    struct iscsi_cmd *cmd,
-						    struct isert_rdma_wr *wr);
+	int			(*reg_rdma_mem)(struct isert_cmd *isert_cmd,
+						struct iscsi_conn *conn);
 	void			(*unreg_rdma_mem)(struct isert_cmd *isert_cmd,
 						  struct isert_conn *isert_conn);
 };
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 25bdaeef2520..0bd3cb2f3c67 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -839,7 +839,7 @@ static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
 		if (srpt_set_ch_state(ch, CH_DISCONNECTED))
 			schedule_work(&ch->release_work);
 		else
-			WARN_ONCE("%s-%d\n", ch->sess_name, ch->qp->qp_num);
+			WARN_ONCE(1, "%s-%d\n", ch->sess_name, ch->qp->qp_num);
 	}
 }
 
@@ -1264,40 +1264,26 @@ free_mem:
  */
 static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
 {
+	struct se_session *se_sess;
 	struct srpt_send_ioctx *ioctx;
-	unsigned long flags;
+	int tag;
 
 	BUG_ON(!ch);
+	se_sess = ch->sess;
 
-	ioctx = NULL;
-	spin_lock_irqsave(&ch->spinlock, flags);
-	if (!list_empty(&ch->free_list)) {
-		ioctx = list_first_entry(&ch->free_list,
-					 struct srpt_send_ioctx, free_list);
-		list_del(&ioctx->free_list);
+	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	if (tag < 0) {
+		pr_err("Unable to obtain tag for srpt_send_ioctx\n");
+		return NULL;
 	}
-	spin_unlock_irqrestore(&ch->spinlock, flags);
-
-	if (!ioctx)
-		return ioctx;
-
-	BUG_ON(ioctx->ch != ch);
+	ioctx = &((struct srpt_send_ioctx *)se_sess->sess_cmd_map)[tag];
+	memset(ioctx, 0, sizeof(struct srpt_send_ioctx));
+	ioctx->ch = ch;
 	spin_lock_init(&ioctx->spinlock);
 	ioctx->state = SRPT_STATE_NEW;
-	ioctx->n_rbuf = 0;
-	ioctx->rbufs = NULL;
-	ioctx->n_rdma = 0;
-	ioctx->n_rdma_wrs = 0;
-	ioctx->rdma_wrs = NULL;
-	ioctx->mapped_sg_count = 0;
 	init_completion(&ioctx->tx_done);
-	ioctx->queue_status_only = false;
-	/*
-	 * transport_init_se_cmd() does not initialize all fields, so do it
-	 * here.
-	 */
-	memset(&ioctx->cmd, 0, sizeof(ioctx->cmd));
-	memset(&ioctx->sense_data, 0, sizeof(ioctx->sense_data));
+
+	ioctx->cmd.map_tag = tag;
 
 	return ioctx;
 }
@@ -2034,9 +2020,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
 	struct srp_login_rej *rej;
 	struct ib_cm_rep_param *rep_param;
 	struct srpt_rdma_ch *ch, *tmp_ch;
-	struct se_node_acl *se_acl;
 	u32 it_iu_len;
-	int i, ret = 0;
+	int ret = 0;
 	unsigned char *p;
 
 	WARN_ON_ONCE(irqs_disabled());
@@ -2158,12 +2143,6 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
 	if (!ch->ioctx_ring)
 		goto free_ch;
 
-	INIT_LIST_HEAD(&ch->free_list);
-	for (i = 0; i < ch->rq_size; i++) {
-		ch->ioctx_ring[i]->ch = ch;
-		list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list);
-	}
-
 	ret = srpt_create_ch_ib(ch);
 	if (ret) {
 		rej->reason = cpu_to_be32(
@@ -2193,19 +2172,13 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
 	pr_debug("registering session %s\n", ch->sess_name);
 	p = &ch->sess_name[0];
 
-	ch->sess = transport_init_session(TARGET_PROT_NORMAL);
-	if (IS_ERR(ch->sess)) {
-		rej->reason = cpu_to_be32(
-				SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-		pr_debug("Failed to create session\n");
-		goto destroy_ib;
-	}
-
 try_again:
-	se_acl = core_tpg_get_initiator_node_acl(&sport->port_tpg_1, p);
-	if (!se_acl) {
+	ch->sess = target_alloc_session(&sport->port_tpg_1, ch->rq_size,
+					sizeof(struct srpt_send_ioctx),
+					TARGET_PROT_NORMAL, p, ch, NULL);
+	if (IS_ERR(ch->sess)) {
 		pr_info("Rejected login because no ACL has been"
-			" configured yet for initiator %s.\n", ch->sess_name);
+			" configured yet for initiator %s.\n", p);
 		/*
 		 * XXX: Hack to retry of ch->i_port_id without leading '0x'
 		 */
@@ -2213,14 +2186,11 @@ try_again:
 			p += 2;
 			goto try_again;
 		}
-		rej->reason = cpu_to_be32(
+		rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ?
+				SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES :
 				SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
-		transport_free_session(ch->sess);
 		goto destroy_ib;
 	}
-	ch->sess->se_node_acl = se_acl;
-
-	transport_register_session(&sport->port_tpg_1, se_acl, ch->sess, ch);
 
 	pr_debug("Establish connection sess=%p name=%s cm_id=%p\n", ch->sess,
 		 ch->sess_name, ch->cm_id);
@@ -2911,7 +2881,7 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
 	struct srpt_send_ioctx *ioctx = container_of(se_cmd,
 				struct srpt_send_ioctx, cmd);
 	struct srpt_rdma_ch *ch = ioctx->ch;
-	unsigned long flags;
+	struct se_session *se_sess = ch->sess;
 
 	WARN_ON(ioctx->state != SRPT_STATE_DONE);
 	WARN_ON(ioctx->mapped_sg_count != 0);
@@ -2922,9 +2892,7 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
 		ioctx->n_rbuf = 0;
 	}
 
-	spin_lock_irqsave(&ch->spinlock, flags);
-	list_add(&ioctx->free_list, &ch->free_list);
-	spin_unlock_irqrestore(&ch->spinlock, flags);
+	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
 }
 
 /**
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index af9b8b527340..ca288f019315 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -179,7 +179,6 @@ struct srpt_recv_ioctx {
  * struct srpt_send_ioctx - SRPT send I/O context.
  * @ioctx:       See above.
  * @ch:          Channel pointer.
- * @free_list:   Node in srpt_rdma_ch.free_list.
  * @n_rbuf:      Number of data buffers in the received SRP command.
  * @rbufs:       Pointer to SRP data buffer array.
  * @single_rbuf: SRP data buffer if the command has only a single buffer.
@@ -202,7 +201,6 @@ struct srpt_send_ioctx {
 	struct srp_direct_buf	*rbufs;
 	struct srp_direct_buf	single_rbuf;
 	struct scatterlist	*sg;
-	struct list_head	free_list;
 	spinlock_t		spinlock;
 	enum srpt_command_state	state;
 	struct se_cmd		cmd;
diff --git a/drivers/input/input-compat.c b/drivers/input/input-compat.c
index 64ca7113ff28..d84d20b9cec0 100644
--- a/drivers/input/input-compat.c
+++ b/drivers/input/input-compat.c
@@ -17,7 +17,7 @@
 int input_event_from_user(const char __user *buffer,
 			  struct input_event *event)
 {
-	if (INPUT_COMPAT_TEST && !COMPAT_USE_64BIT_TIME) {
+	if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
 		struct input_event_compat compat_event;
 
 		if (copy_from_user(&compat_event, buffer,
@@ -41,7 +41,7 @@ int input_event_from_user(const char __user *buffer,
 int input_event_to_user(char __user *buffer,
 			const struct input_event *event)
 {
-	if (INPUT_COMPAT_TEST && !COMPAT_USE_64BIT_TIME) {
+	if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
 		struct input_event_compat compat_event;
 
 		compat_event.time.tv_sec = event->time.tv_sec;
@@ -65,7 +65,7 @@ int input_event_to_user(char __user *buffer,
 int input_ff_effect_from_user(const char __user *buffer, size_t size,
 			      struct ff_effect *effect)
 {
-	if (INPUT_COMPAT_TEST) {
+	if (in_compat_syscall()) {
 		struct ff_effect_compat *compat_effect;
 
 		if (size != sizeof(struct ff_effect_compat))
diff --git a/drivers/input/input-compat.h b/drivers/input/input-compat.h
index 148f66fe3205..1563160a7af3 100644
--- a/drivers/input/input-compat.h
+++ b/drivers/input/input-compat.h
@@ -17,18 +17,6 @@
 
 #ifdef CONFIG_COMPAT
 
-/* Note to the author of this code: did it ever occur to
-   you why the ifdefs are needed? Think about it again. -AK */
-#if defined(CONFIG_X86_64) || defined(CONFIG_TILE)
-#  define INPUT_COMPAT_TEST is_compat_task()
-#elif defined(CONFIG_S390)
-#  define INPUT_COMPAT_TEST test_thread_flag(TIF_31BIT)
-#elif defined(CONFIG_MIPS)
-#  define INPUT_COMPAT_TEST test_thread_flag(TIF_32BIT_ADDR)
-#else
-#  define INPUT_COMPAT_TEST test_thread_flag(TIF_32BIT)
-#endif
-
 struct input_event_compat {
 	struct compat_timeval time;
 	__u16 type;
@@ -67,7 +55,7 @@ struct ff_effect_compat {
 
 static inline size_t input_event_size(void)
 {
-	return (INPUT_COMPAT_TEST && !COMPAT_USE_64BIT_TIME) ?
+	return (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) ?
 		sizeof(struct input_event_compat) : sizeof(struct input_event);
 }
 
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 880605959aa6..b87ffbd4547d 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1015,7 +1015,7 @@ static int input_bits_to_string(char *buf, int buf_size,
 {
 	int len = 0;
 
-	if (INPUT_COMPAT_TEST) {
+	if (in_compat_syscall()) {
 		u32 dword = bits >> 32;
 		if (dword || !skip_empty)
 			len += snprintf(buf, buf_size, "%x ", dword);
diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index cfd58e87da26..1c5914cae853 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -817,26 +817,49 @@ static int ati_remote2_probe(struct usb_interface *interface, const struct usb_d
 
 	ar2->udev = udev;
 
+	/* Sanity check, first interface must have an endpoint */
+	if (alt->desc.bNumEndpoints < 1 || !alt->endpoint) {
+		dev_err(&interface->dev,
+			"%s(): interface 0 must have an endpoint\n", __func__);
+		r = -ENODEV;
+		goto fail1;
+	}
 	ar2->intf[0] = interface;
 	ar2->ep[0] = &alt->endpoint[0].desc;
 
+	/* Sanity check, the device must have two interfaces */
 	ar2->intf[1] = usb_ifnum_to_if(udev, 1);
+	if ((udev->actconfig->desc.bNumInterfaces < 2) || !ar2->intf[1]) {
+		dev_err(&interface->dev, "%s(): need 2 interfaces, found %d\n",
+			__func__, udev->actconfig->desc.bNumInterfaces);
+		r = -ENODEV;
+		goto fail1;
+	}
+
 	r = usb_driver_claim_interface(&ati_remote2_driver, ar2->intf[1], ar2);
 	if (r)
 		goto fail1;
+
+	/* Sanity check, second interface must have an endpoint */
 	alt = ar2->intf[1]->cur_altsetting;
+	if (alt->desc.bNumEndpoints < 1 || !alt->endpoint) {
+		dev_err(&interface->dev,
+			"%s(): interface 1 must have an endpoint\n", __func__);
+		r = -ENODEV;
+		goto fail2;
+	}
 	ar2->ep[1] = &alt->endpoint[0].desc;
 
 	r = ati_remote2_urb_init(ar2);
 	if (r)
-		goto fail2;
+		goto fail3;
 
 	ar2->channel_mask = channel_mask;
 	ar2->mode_mask = mode_mask;
 
 	r = ati_remote2_setup(ar2, ar2->channel_mask);
 	if (r)
-		goto fail2;
+		goto fail3;
 
 	usb_make_path(udev, ar2->phys, sizeof(ar2->phys));
 	strlcat(ar2->phys, "/input0", sizeof(ar2->phys));
@@ -845,11 +868,11 @@ static int ati_remote2_probe(struct usb_interface *interface, const struct usb_d
 
 	r = sysfs_create_group(&udev->dev.kobj, &ati_remote2_attr_group);
 	if (r)
-		goto fail2;
+		goto fail3;
 
 	r = ati_remote2_input_init(ar2);
 	if (r)
-		goto fail3;
+		goto fail4;
 
 	usb_set_intfdata(interface, ar2);
 
@@ -857,10 +880,11 @@ static int ati_remote2_probe(struct usb_interface *interface, const struct usb_d
 
 	return 0;
 
- fail3:
+ fail4:
 	sysfs_remove_group(&udev->dev.kobj, &ati_remote2_attr_group);
- fail2:
+ fail3:
 	ati_remote2_urb_cleanup(ar2);
+ fail2:
 	usb_driver_release_interface(&ati_remote2_driver, ar2->intf[1]);
  fail1:
 	kfree(ar2);
diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c
index ac1fa5f44580..9c0ea36913b4 100644
--- a/drivers/input/misc/ims-pcu.c
+++ b/drivers/input/misc/ims-pcu.c
@@ -1663,6 +1663,8 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc
 
 	pcu->ctrl_intf = usb_ifnum_to_if(pcu->udev,
 					 union_desc->bMasterInterface0);
+	if (!pcu->ctrl_intf)
+		return -EINVAL;
 
 	alt = pcu->ctrl_intf->cur_altsetting;
 	pcu->ep_ctrl = &alt->endpoint[0].desc;
@@ -1670,6 +1672,8 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc
 
 	pcu->data_intf = usb_ifnum_to_if(pcu->udev,
 					 union_desc->bSlaveInterface0);
+	if (!pcu->data_intf)
+		return -EINVAL;
 
 	alt = pcu->data_intf->cur_altsetting;
 	if (alt->desc.bNumEndpoints != 2) {
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 4eb9e4d94f46..abe1a927b332 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -664,7 +664,7 @@ struct uinput_ff_upload_compat {
 static int uinput_ff_upload_to_user(char __user *buffer,
 				    const struct uinput_ff_upload *ff_up)
 {
-	if (INPUT_COMPAT_TEST) {
+	if (in_compat_syscall()) {
 		struct uinput_ff_upload_compat ff_up_compat;
 
 		ff_up_compat.request_id = ff_up->request_id;
@@ -695,7 +695,7 @@ static int uinput_ff_upload_to_user(char __user *buffer,
 static int uinput_ff_upload_from_user(const char __user *buffer,
 				      struct uinput_ff_upload *ff_up)
 {
-	if (INPUT_COMPAT_TEST) {
+	if (in_compat_syscall()) {
 		struct uinput_ff_upload_compat ff_up_compat;
 
 		if (copy_from_user(&ff_up_compat, buffer,
diff --git a/drivers/input/mouse/byd.c b/drivers/input/mouse/byd.c
index 9425e0f6c5ce..fdc243ca93ed 100644
--- a/drivers/input/mouse/byd.c
+++ b/drivers/input/mouse/byd.c
@@ -12,10 +12,12 @@
 #include <linux/input.h>
 #include <linux/libps2.h>
 #include <linux/serio.h>
+#include <linux/slab.h>
 
 #include "psmouse.h"
 #include "byd.h"
 
+/* PS2 Bits */
 #define PS2_Y_OVERFLOW	BIT_MASK(7)
 #define PS2_X_OVERFLOW	BIT_MASK(6)
 #define PS2_Y_SIGN	BIT_MASK(5)
@@ -26,69 +28,249 @@
 #define PS2_LEFT	BIT_MASK(0)
 
 /*
- * The touchpad reports gestures in the last byte of each packet. It can take
- * any of the following values:
+ * BYD pad constants
  */
 
-/* One-finger scrolling in one of the edge scroll zones. */
-#define BYD_SCROLLUP		0xCA
-#define BYD_SCROLLDOWN		0x36
-#define BYD_SCROLLLEFT		0xCB
-#define BYD_SCROLLRIGHT		0x35
-/* Two-finger scrolling. */
-#define BYD_2DOWN		0x2B
-#define BYD_2UP			0xD5
-#define BYD_2LEFT		0xD6
-#define BYD_2RIGHT		0x2A
-/* Pinching in or out. */
-#define BYD_ZOOMOUT		0xD8
-#define BYD_ZOOMIN		0x28
-/* Three-finger swipe. */
-#define BYD_3UP			0xD3
-#define BYD_3DOWN		0x2D
-#define BYD_3LEFT		0xD4
-#define BYD_3RIGHT		0x2C
-/* Four-finger swipe. */
-#define BYD_4UP			0xCD
-#define BYD_4DOWN		0x33
+/*
+ * True device resolution is unknown, however experiments show the
+ * resolution is about 111 units/mm.
+ * Absolute coordinate packets are in the range 0-255 for both X and Y
+ * we pick ABS_X/ABS_Y dimensions which are multiples of 256 and in
+ * the right ballpark given the touchpad's physical dimensions and estimate
+ * resolution per spec sheet, device active area dimensions are
+ * 101.6 x 60.1 mm.
+ */
+#define BYD_PAD_WIDTH		11264
+#define BYD_PAD_HEIGHT		6656
+#define BYD_PAD_RESOLUTION	111
 
-int byd_detect(struct psmouse *psmouse, bool set_properties)
+/*
+ * Given the above dimensions, relative packets velocity is in multiples of
+ * 1 unit / 11 milliseconds.  We use this dt to estimate distance traveled
+ */
+#define BYD_DT			11
+/* Time in jiffies used to timeout various touch events (64 ms) */
+#define BYD_TOUCH_TIMEOUT	msecs_to_jiffies(64)
+
+/* BYD commands reverse engineered from windows driver */
+
+/*
+ * Swipe gesture from off-pad to on-pad
+ *  0 : disable
+ *  1 : enable
+ */
+#define BYD_CMD_SET_OFFSCREEN_SWIPE		0x10cc
+/*
+ * Tap and drag delay time
+ *  0 : disable
+ *  1 - 8 : least to most delay
+ */
+#define BYD_CMD_SET_TAP_DRAG_DELAY_TIME		0x10cf
+/*
+ * Physical buttons function mapping
+ *  0 : enable
+ *  4 : normal
+ *  5 : left button custom command
+ *  6 : right button custom command
+ *  8 : disable
+ */
+#define BYD_CMD_SET_PHYSICAL_BUTTONS		0x10d0
+/*
+ * Absolute mode (1 byte X/Y resolution)
+ *  0 : disable
+ *  2 : enable
+ */
+#define BYD_CMD_SET_ABSOLUTE_MODE		0x10d1
+/*
+ * Two finger scrolling
+ *  1 : vertical
+ *  2 : horizontal
+ *  3 : vertical + horizontal
+ *  4 : disable
+ */
+#define BYD_CMD_SET_TWO_FINGER_SCROLL		0x10d2
+/*
+ * Handedness
+ *  1 : right handed
+ *  2 : left handed
+ */
+#define BYD_CMD_SET_HANDEDNESS			0x10d3
+/*
+ * Tap to click
+ *  1 : enable
+ *  2 : disable
+ */
+#define BYD_CMD_SET_TAP				0x10d4
+/*
+ * Tap and drag
+ *  1 : tap and hold to drag
+ *  2 : tap and hold to drag + lock
+ *  3 : disable
+ */
+#define BYD_CMD_SET_TAP_DRAG			0x10d5
+/*
+ * Touch sensitivity
+ *  1 - 7 : least to most sensitive
+ */
+#define BYD_CMD_SET_TOUCH_SENSITIVITY		0x10d6
+/*
+ * One finger scrolling
+ *  1 : vertical
+ *  2 : horizontal
+ *  3 : vertical + horizontal
+ *  4 : disable
+ */
+#define BYD_CMD_SET_ONE_FINGER_SCROLL		0x10d7
+/*
+ * One finger scrolling function
+ *  1 : free scrolling
+ *  2 : edge motion
+ *  3 : free scrolling + edge motion
+ *  4 : disable
+ */
+#define BYD_CMD_SET_ONE_FINGER_SCROLL_FUNC	0x10d8
+/*
+ * Sliding speed
+ *  1 - 5 : slowest to fastest
+ */
+#define BYD_CMD_SET_SLIDING_SPEED		0x10da
+/*
+ * Edge motion
+ *  1 : disable
+ *  2 : enable when dragging
+ *  3 : enable when dragging and pointing
+ */
+#define BYD_CMD_SET_EDGE_MOTION			0x10db
+/*
+ * Left edge region size
+ *  0 - 7 : smallest to largest width
+ */
+#define BYD_CMD_SET_LEFT_EDGE_REGION		0x10dc
+/*
+ * Top edge region size
+ *  0 - 9 : smallest to largest height
+ */
+#define BYD_CMD_SET_TOP_EDGE_REGION		0x10dd
+/*
+ * Disregard palm press as clicks
+ *  1 - 6 : smallest to largest
+ */
+#define BYD_CMD_SET_PALM_CHECK			0x10de
+/*
+ * Right edge region size
+ *  0 - 7 : smallest to largest width
+ */
+#define BYD_CMD_SET_RIGHT_EDGE_REGION		0x10df
+/*
+ * Bottom edge region size
+ *  0 - 9 : smallest to largest height
+ */
+#define BYD_CMD_SET_BOTTOM_EDGE_REGION		0x10e1
+/*
+ * Multitouch gestures
+ *  1 : enable
+ *  2 : disable
+ */
+#define BYD_CMD_SET_MULTITOUCH			0x10e3
+/*
+ * Edge motion speed
+ *  0 : control with finger pressure
+ *  1 - 9 : slowest to fastest
+ */
+#define BYD_CMD_SET_EDGE_MOTION_SPEED		0x10e4
+/*
+ * Two finger scolling function
+ *  0 : free scrolling
+ *  1 : free scrolling (with momentum)
+ *  2 : edge motion
+ *  3 : free scrolling (with momentum) + edge motion
+ *  4 : disable
+ */
+#define BYD_CMD_SET_TWO_FINGER_SCROLL_FUNC	0x10e5
+
+/*
+ * The touchpad generates a mixture of absolute and relative packets, indicated
+ * by the the last byte of each packet being set to one of the following:
+ */
+#define BYD_PACKET_ABSOLUTE			0xf8
+#define BYD_PACKET_RELATIVE			0x00
+/* Multitouch gesture packets */
+#define BYD_PACKET_PINCH_IN			0xd8
+#define BYD_PACKET_PINCH_OUT			0x28
+#define BYD_PACKET_ROTATE_CLOCKWISE		0x29
+#define BYD_PACKET_ROTATE_ANTICLOCKWISE		0xd7
+#define BYD_PACKET_TWO_FINGER_SCROLL_RIGHT	0x2a
+#define BYD_PACKET_TWO_FINGER_SCROLL_DOWN	0x2b
+#define BYD_PACKET_TWO_FINGER_SCROLL_UP		0xd5
+#define BYD_PACKET_TWO_FINGER_SCROLL_LEFT	0xd6
+#define BYD_PACKET_THREE_FINGER_SWIPE_RIGHT	0x2c
+#define BYD_PACKET_THREE_FINGER_SWIPE_DOWN	0x2d
+#define BYD_PACKET_THREE_FINGER_SWIPE_UP	0xd3
+#define BYD_PACKET_THREE_FINGER_SWIPE_LEFT	0xd4
+#define BYD_PACKET_FOUR_FINGER_DOWN		0x33
+#define BYD_PACKET_FOUR_FINGER_UP		0xcd
+#define BYD_PACKET_REGION_SCROLL_RIGHT		0x35
+#define BYD_PACKET_REGION_SCROLL_DOWN		0x36
+#define BYD_PACKET_REGION_SCROLL_UP		0xca
+#define BYD_PACKET_REGION_SCROLL_LEFT		0xcb
+#define BYD_PACKET_RIGHT_CORNER_CLICK		0xd2
+#define BYD_PACKET_LEFT_CORNER_CLICK		0x2e
+#define BYD_PACKET_LEFT_AND_RIGHT_CORNER_CLICK	0x2f
+#define BYD_PACKET_ONTO_PAD_SWIPE_RIGHT		0x37
+#define BYD_PACKET_ONTO_PAD_SWIPE_DOWN		0x30
+#define BYD_PACKET_ONTO_PAD_SWIPE_UP		0xd0
+#define BYD_PACKET_ONTO_PAD_SWIPE_LEFT		0xc9
+
+struct byd_data {
+	struct timer_list timer;
+	s32 abs_x;
+	s32 abs_y;
+	typeof(jiffies) last_touch_time;
+	bool btn_left;
+	bool btn_right;
+	bool touch;
+};
+
+static void byd_report_input(struct psmouse *psmouse)
 {
-	struct ps2dev *ps2dev = &psmouse->ps2dev;
-	unsigned char param[4];
+	struct byd_data *priv = psmouse->private;
+	struct input_dev *dev = psmouse->dev;
 
-	param[0] = 0x03;
-	param[1] = 0x00;
-	param[2] = 0x00;
-	param[3] = 0x00;
+	input_report_key(dev, BTN_TOUCH, priv->touch);
+	input_report_key(dev, BTN_TOOL_FINGER, priv->touch);
 
-	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
-		return -1;
-	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
-		return -1;
-	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
-		return -1;
-	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
-		return -1;
-	if (ps2_command(ps2dev, param, PSMOUSE_CMD_GETINFO))
-		return -1;
+	input_report_abs(dev, ABS_X, priv->abs_x);
+	input_report_abs(dev, ABS_Y, priv->abs_y);
+	input_report_key(dev, BTN_LEFT, priv->btn_left);
+	input_report_key(dev, BTN_RIGHT, priv->btn_right);
 
-	if (param[1] != 0x03 || param[2] != 0x64)
-		return -ENODEV;
+	input_sync(dev);
+}
 
-	psmouse_dbg(psmouse, "BYD touchpad detected\n");
+static void byd_clear_touch(unsigned long data)
+{
+	struct psmouse *psmouse = (struct psmouse *)data;
+	struct byd_data *priv = psmouse->private;
 
-	if (set_properties) {
-		psmouse->vendor = "BYD";
-		psmouse->name = "TouchPad";
-	}
+	serio_pause_rx(psmouse->ps2dev.serio);
+	priv->touch = false;
 
-	return 0;
+	byd_report_input(psmouse);
+
+	serio_continue_rx(psmouse->ps2dev.serio);
+
+	/*
+	 * Move cursor back to center of pad when we lose touch - this
+	 * specifically improves user experience when moving cursor with one
+	 * finger, and pressing a button with another.
+	 */
+	priv->abs_x = BYD_PAD_WIDTH / 2;
+	priv->abs_y = BYD_PAD_HEIGHT / 2;
 }
 
 static psmouse_ret_t byd_process_byte(struct psmouse *psmouse)
 {
-	struct input_dev *dev = psmouse->dev;
+	struct byd_data *priv = psmouse->private;
 	u8 *pkt = psmouse->packet;
 
 	if (psmouse->pktcnt > 0 && !(pkt[0] & PS2_ALWAYS_1)) {
@@ -102,53 +284,34 @@ static psmouse_ret_t byd_process_byte(struct psmouse *psmouse)
 
 	/* Otherwise, a full packet has been received */
 	switch (pkt[3]) {
-	case 0: {
+	case BYD_PACKET_ABSOLUTE:
+		/* Only use absolute packets for the start of movement. */
+		if (!priv->touch) {
+			/* needed to detect tap */
+			typeof(jiffies) tap_time =
+				priv->last_touch_time + BYD_TOUCH_TIMEOUT;
+			priv->touch = time_after(jiffies, tap_time);
+
+			/* init abs position */
+			priv->abs_x = pkt[1] * (BYD_PAD_WIDTH / 256);
+			priv->abs_y = (255 - pkt[2]) * (BYD_PAD_HEIGHT / 256);
+		}
+		break;
+	case BYD_PACKET_RELATIVE: {
 		/* Standard packet */
 		/* Sign-extend if a sign bit is set. */
-		unsigned int signx = pkt[0] & PS2_X_SIGN ? ~0xFF : 0;
-		unsigned int signy = pkt[0] & PS2_Y_SIGN ? ~0xFF : 0;
-		int dx = signx | (int) pkt[1];
-		int dy = signy | (int) pkt[2];
+		u32 signx = pkt[0] & PS2_X_SIGN ? ~0xFF : 0;
+		u32 signy = pkt[0] & PS2_Y_SIGN ? ~0xFF : 0;
+		s32 dx = signx | (int) pkt[1];
+		s32 dy = signy | (int) pkt[2];
 
-		input_report_rel(psmouse->dev, REL_X, dx);
-		input_report_rel(psmouse->dev, REL_Y, -dy);
+		/* Update position based on velocity */
+		priv->abs_x += dx * BYD_DT;
+		priv->abs_y -= dy * BYD_DT;
 
-		input_report_key(psmouse->dev, BTN_LEFT, pkt[0] & PS2_LEFT);
-		input_report_key(psmouse->dev, BTN_RIGHT, pkt[0] & PS2_RIGHT);
-		input_report_key(psmouse->dev, BTN_MIDDLE, pkt[0] & PS2_MIDDLE);
+		priv->touch = true;
 		break;
 	}
-
-	case BYD_SCROLLDOWN:
-	case BYD_2DOWN:
-		input_report_rel(dev, REL_WHEEL, -1);
-		break;
-
-	case BYD_SCROLLUP:
-	case BYD_2UP:
-		input_report_rel(dev, REL_WHEEL, 1);
-		break;
-
-	case BYD_SCROLLLEFT:
-	case BYD_2LEFT:
-		input_report_rel(dev, REL_HWHEEL, -1);
-		break;
-
-	case BYD_SCROLLRIGHT:
-	case BYD_2RIGHT:
-		input_report_rel(dev, REL_HWHEEL, 1);
-		break;
-
-	case BYD_ZOOMOUT:
-	case BYD_ZOOMIN:
-	case BYD_3UP:
-	case BYD_3DOWN:
-	case BYD_3LEFT:
-	case BYD_3RIGHT:
-	case BYD_4UP:
-	case BYD_4DOWN:
-		break;
-
 	default:
 		psmouse_warn(psmouse,
 			     "Unrecognized Z: pkt = %02x %02x %02x %02x\n",
@@ -157,134 +320,76 @@ static psmouse_ret_t byd_process_byte(struct psmouse *psmouse)
 		return PSMOUSE_BAD_DATA;
 	}
 
-	input_sync(dev);
+	priv->btn_left = pkt[0] & PS2_LEFT;
+	priv->btn_right = pkt[0] & PS2_RIGHT;
 
-	return PSMOUSE_FULL_PACKET;
-}
+	byd_report_input(psmouse);
 
-/* Send a sequence of bytes, where each is ACKed before the next is sent. */
-static int byd_send_sequence(struct psmouse *psmouse, const u8 *seq, size_t len)
-{
-	unsigned int i;
-
-	for (i = 0; i < len; ++i) {
-		if (ps2_command(&psmouse->ps2dev, NULL, seq[i]))
-			return -1;
+	/* Reset time since last touch. */
+	if (priv->touch) {
+		priv->last_touch_time = jiffies;
+		mod_timer(&priv->timer, jiffies + BYD_TOUCH_TIMEOUT);
 	}
-	return 0;
-}
-
-/* Keep scrolling after fingers are removed. */
-#define SCROLL_INERTIAL		0x01
-#define SCROLL_NO_INERTIAL	0x02
-
-/* Clicking can be done by tapping or pressing. */
-#define CLICK_BOTH		0x01
-/* Clicking can only be done by pressing. */
-#define CLICK_PRESS_ONLY	0x02
 
-static int byd_enable(struct psmouse *psmouse)
-{
-	const u8 seq1[] = { 0xE2, 0x00, 0xE0, 0x02, 0xE0 };
-	const u8 seq2[] = {
-		0xD3, 0x01,
-		0xD0, 0x00,
-		0xD0, 0x04,
-		/* Whether clicking is done by tapping or pressing. */
-		0xD4, CLICK_PRESS_ONLY,
-		0xD5, 0x01,
-		0xD7, 0x03,
-		/* Vertical and horizontal one-finger scroll zone inertia. */
-		0xD8, SCROLL_INERTIAL,
-		0xDA, 0x05,
-		0xDB, 0x02,
-		0xE4, 0x05,
-		0xD6, 0x01,
-		0xDE, 0x04,
-		0xE3, 0x01,
-		0xCF, 0x00,
-		0xD2, 0x03,
-		/* Vertical and horizontal two-finger scrolling inertia. */
-		0xE5, SCROLL_INERTIAL,
-		0xD9, 0x02,
-		0xD9, 0x07,
-		0xDC, 0x03,
-		0xDD, 0x03,
-		0xDF, 0x03,
-		0xE1, 0x03,
-		0xD1, 0x00,
-		0xCE, 0x00,
-		0xCC, 0x00,
-		0xE0, 0x00,
-		0xE2, 0x01
-	};
-	u8 param[4];
-
-	if (byd_send_sequence(psmouse, seq1, ARRAY_SIZE(seq1)))
-		return -1;
-
-	/* Send a 0x01 command, which should return 4 bytes. */
-	if (ps2_command(&psmouse->ps2dev, param, 0x0401))
-		return -1;
-
-	if (byd_send_sequence(psmouse, seq2, ARRAY_SIZE(seq2)))
-		return -1;
-
-	return 0;
+	return PSMOUSE_FULL_PACKET;
 }
 
-/*
- * Send the set of PS/2 commands required to make it identify as an
- * intellimouse with 4-byte instead of 3-byte packets.
- */
-static int byd_send_intellimouse_sequence(struct psmouse *psmouse)
+static int byd_reset_touchpad(struct psmouse *psmouse)
 {
 	struct ps2dev *ps2dev = &psmouse->ps2dev;
 	u8 param[4];
-	int i;
+	size_t i;
+
 	const struct {
 		u16 command;
 		u8 arg;
 	} seq[] = {
-		{ PSMOUSE_CMD_RESET_BAT, 0 },
-		{ PSMOUSE_CMD_RESET_BAT, 0 },
-		{ PSMOUSE_CMD_GETID, 0 },
-		{ PSMOUSE_CMD_SETSCALE11, 0 },
-		{ PSMOUSE_CMD_SETSCALE11, 0 },
-		{ PSMOUSE_CMD_SETSCALE11, 0 },
-		{ PSMOUSE_CMD_GETINFO, 0 },
-		{ PSMOUSE_CMD_SETRES, 0x03 },
+		/*
+		 * Intellimouse initialization sequence, to get 4-byte instead
+		 * of 3-byte packets.
+		 */
 		{ PSMOUSE_CMD_SETRATE, 0xC8 },
 		{ PSMOUSE_CMD_SETRATE, 0x64 },
 		{ PSMOUSE_CMD_SETRATE, 0x50 },
 		{ PSMOUSE_CMD_GETID, 0 },
-		{ PSMOUSE_CMD_SETRATE, 0xC8 },
-		{ PSMOUSE_CMD_SETRATE, 0xC8 },
-		{ PSMOUSE_CMD_SETRATE, 0x50 },
-		{ PSMOUSE_CMD_GETID, 0 },
-		{ PSMOUSE_CMD_SETRATE, 0x64 },
-		{ PSMOUSE_CMD_SETRES, 0x03 },
-		{ PSMOUSE_CMD_ENABLE, 0 }
+		{ PSMOUSE_CMD_ENABLE, 0 },
+		/*
+		 * BYD-specific initialization, which enables absolute mode and
+		 * (if desired), the touchpad's built-in gesture detection.
+		 */
+		{ 0x10E2, 0x00 },
+		{ 0x10E0, 0x02 },
+		/* The touchpad should reply with 4 seemingly-random bytes */
+		{ 0x14E0, 0x01 },
+		/* Pairs of parameters and values. */
+		{ BYD_CMD_SET_HANDEDNESS, 0x01 },
+		{ BYD_CMD_SET_PHYSICAL_BUTTONS, 0x04 },
+		{ BYD_CMD_SET_TAP, 0x02 },
+		{ BYD_CMD_SET_ONE_FINGER_SCROLL, 0x04 },
+		{ BYD_CMD_SET_ONE_FINGER_SCROLL_FUNC, 0x04 },
+		{ BYD_CMD_SET_EDGE_MOTION, 0x01 },
+		{ BYD_CMD_SET_PALM_CHECK, 0x00 },
+		{ BYD_CMD_SET_MULTITOUCH, 0x02 },
+		{ BYD_CMD_SET_TWO_FINGER_SCROLL, 0x04 },
+		{ BYD_CMD_SET_TWO_FINGER_SCROLL_FUNC, 0x04 },
+		{ BYD_CMD_SET_LEFT_EDGE_REGION, 0x00 },
+		{ BYD_CMD_SET_TOP_EDGE_REGION, 0x00 },
+		{ BYD_CMD_SET_RIGHT_EDGE_REGION, 0x00 },
+		{ BYD_CMD_SET_BOTTOM_EDGE_REGION, 0x00 },
+		{ BYD_CMD_SET_ABSOLUTE_MODE, 0x02 },
+		/* Finalize initialization. */
+		{ 0x10E0, 0x00 },
+		{ 0x10E2, 0x01 },
 	};
 
-	memset(param, 0, sizeof(param));
 	for (i = 0; i < ARRAY_SIZE(seq); ++i) {
+		memset(param, 0, sizeof(param));
 		param[0] = seq[i].arg;
 		if (ps2_command(ps2dev, param, seq[i].command))
-			return -1;
+			return -EIO;
 	}
 
-	return 0;
-}
-
-static int byd_reset_touchpad(struct psmouse *psmouse)
-{
-	if (byd_send_intellimouse_sequence(psmouse))
-		return -EIO;
-
-	if (byd_enable(psmouse))
-		return -EIO;
-
+	psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
 	return 0;
 }
 
@@ -314,9 +419,50 @@ static int byd_reconnect(struct psmouse *psmouse)
 	return 0;
 }
 
+static void byd_disconnect(struct psmouse *psmouse)
+{
+	struct byd_data *priv = psmouse->private;
+
+	if (priv) {
+		del_timer(&priv->timer);
+		kfree(psmouse->private);
+		psmouse->private = NULL;
+	}
+}
+
+int byd_detect(struct psmouse *psmouse, bool set_properties)
+{
+	struct ps2dev *ps2dev = &psmouse->ps2dev;
+	u8 param[4] = {0x03, 0x00, 0x00, 0x00};
+
+	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
+		return -1;
+	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
+		return -1;
+	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
+		return -1;
+	if (ps2_command(ps2dev, param, PSMOUSE_CMD_SETRES))
+		return -1;
+	if (ps2_command(ps2dev, param, PSMOUSE_CMD_GETINFO))
+		return -1;
+
+	if (param[1] != 0x03 || param[2] != 0x64)
+		return -ENODEV;
+
+	psmouse_dbg(psmouse, "BYD touchpad detected\n");
+
+	if (set_properties) {
+		psmouse->vendor = "BYD";
+		psmouse->name = "TouchPad";
+	}
+
+	return 0;
+}
+
 int byd_init(struct psmouse *psmouse)
 {
 	struct input_dev *dev = psmouse->dev;
+	struct byd_data *priv;
 
 	if (psmouse_reset(psmouse))
 		return -EIO;
@@ -324,14 +470,39 @@ int byd_init(struct psmouse *psmouse)
 	if (byd_reset_touchpad(psmouse))
 		return -EIO;
 
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	memset(priv, 0, sizeof(*priv));
+	setup_timer(&priv->timer, byd_clear_touch, (unsigned long) psmouse);
+
+	psmouse->private = priv;
+	psmouse->disconnect = byd_disconnect;
 	psmouse->reconnect = byd_reconnect;
 	psmouse->protocol_handler = byd_process_byte;
 	psmouse->pktsize = 4;
 	psmouse->resync_time = 0;
 
-	__set_bit(BTN_MIDDLE, dev->keybit);
-	__set_bit(REL_WHEEL, dev->relbit);
-	__set_bit(REL_HWHEEL, dev->relbit);
+	__set_bit(INPUT_PROP_POINTER, dev->propbit);
+	/* Touchpad */
+	__set_bit(BTN_TOUCH, dev->keybit);
+	__set_bit(BTN_TOOL_FINGER, dev->keybit);
+	/* Buttons */
+	__set_bit(BTN_LEFT, dev->keybit);
+	__set_bit(BTN_RIGHT, dev->keybit);
+	__clear_bit(BTN_MIDDLE, dev->keybit);
+
+	/* Absolute position */
+	__set_bit(EV_ABS, dev->evbit);
+	input_set_abs_params(dev, ABS_X, 0, BYD_PAD_WIDTH, 0, 0);
+	input_set_abs_params(dev, ABS_Y, 0, BYD_PAD_HEIGHT, 0, 0);
+	input_abs_set_res(dev, ABS_X, BYD_PAD_RESOLUTION);
+	input_abs_set_res(dev, ABS_Y, BYD_PAD_RESOLUTION);
+	/* No relative support */
+	__clear_bit(EV_REL, dev->evbit);
+	__clear_bit(REL_X, dev->relbit);
+	__clear_bit(REL_Y, dev->relbit);
 
 	return 0;
 }
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 39d1becd35c9..5784e20542a4 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -846,7 +846,7 @@ static const struct psmouse_protocol psmouse_protocols[] = {
 #ifdef CONFIG_MOUSE_PS2_BYD
 	{
 		.type		= PSMOUSE_BYD,
-		.name		= "BydPS/2",
+		.name		= "BYDPS/2",
 		.alias		= "byd",
 		.detect		= byd_detect,
 		.init		= byd_init,
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 6025eb430c0a..a41d8328c064 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -862,8 +862,9 @@ static void synaptics_report_ext_buttons(struct psmouse *psmouse,
 	if (!SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap))
 		return;
 
-	/* Bug in FW 8.1, buttons are reported only when ExtBit is 1 */
-	if (SYN_ID_FULL(priv->identity) == 0x801 &&
+	/* Bug in FW 8.1 & 8.2, buttons are reported only when ExtBit is 1 */
+	if ((SYN_ID_FULL(priv->identity) == 0x801 ||
+	     SYN_ID_FULL(priv->identity) == 0x802) &&
 	    !((psmouse->packet[0] ^ psmouse->packet[3]) & 0x02))
 		return;
 
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index da38f0ad80ed..faa295ec4f31 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -126,7 +126,7 @@ static void process_one_interrupt(struct rmi_driver_data *data,
 		return;
 
 	fh = to_rmi_function_handler(fn->dev.driver);
-	if (fn->irq_mask && fh->attention) {
+	if (fh->attention) {
 		bitmap_and(data->fn_irq_bits, data->irq_status, fn->irq_mask,
 				data->irq_count);
 		if (!bitmap_empty(data->fn_irq_bits, data->irq_count))
@@ -172,8 +172,7 @@ int rmi_process_interrupt_requests(struct rmi_device *rmi_dev)
 	 * use irq_chip.
 	 */
 	list_for_each_entry(entry, &data->function_list, node)
-		if (entry->irq_mask)
-			process_one_interrupt(data, entry);
+		process_one_interrupt(data, entry);
 
 	if (data->input)
 		input_sync(data->input);
diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c
index 892729734c51..fb5fb9140ca9 100644
--- a/drivers/input/touchscreen/melfas_mip4.c
+++ b/drivers/input/touchscreen/melfas_mip4.c
@@ -1310,8 +1310,34 @@ static ssize_t mip4_sysfs_read_fw_version(struct device *dev,
 
 static DEVICE_ATTR(fw_version, S_IRUGO, mip4_sysfs_read_fw_version, NULL);
 
+static ssize_t mip4_sysfs_read_hw_version(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct mip4_ts *ts = i2c_get_clientdata(client);
+	size_t count;
+
+	/* Take lock to prevent racing with firmware update */
+	mutex_lock(&ts->input->mutex);
+
+	/*
+	 * product_name shows the name or version of the hardware
+	 * paired with current firmware in the chip.
+	 */
+	count = snprintf(buf, PAGE_SIZE, "%.*s\n",
+		(int)sizeof(ts->product_name), ts->product_name);
+
+	mutex_unlock(&ts->input->mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR(hw_version, S_IRUGO, mip4_sysfs_read_hw_version, NULL);
+
 static struct attribute *mip4_attrs[] = {
 	&dev_attr_fw_version.attr,
+	&dev_attr_hw_version.attr,
 	&dev_attr_update_fw.attr,
 	NULL,
 };
@@ -1512,6 +1538,6 @@ static struct i2c_driver mip4_driver = {
 module_i2c_driver(mip4_driver);
 
 MODULE_DESCRIPTION("MELFAS MIP4 Touchscreen");
-MODULE_VERSION("2016.03.03");
+MODULE_VERSION("2016.03.12");
 MODULE_AUTHOR("Sangwon Jee <jeesw@melfas.com>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c
index b6c4d03de340..880c40b23f66 100644
--- a/drivers/input/touchscreen/sur40.c
+++ b/drivers/input/touchscreen/sur40.c
@@ -197,28 +197,34 @@ static int sur40_command(struct sur40_state *dev,
 static int sur40_init(struct sur40_state *dev)
 {
 	int result;
-	u8 buffer[24];
+	u8 *buffer;
+
+	buffer = kmalloc(24, GFP_KERNEL);
+	if (!buffer) {
+		result = -ENOMEM;
+		goto error;
+	}
 
 	/* stupidly replay the original MS driver init sequence */
 	result = sur40_command(dev, SUR40_GET_VERSION, 0x00, buffer, 12);
 	if (result < 0)
-		return result;
+		goto error;
 
 	result = sur40_command(dev, SUR40_GET_VERSION, 0x01, buffer, 12);
 	if (result < 0)
-		return result;
+		goto error;
 
 	result = sur40_command(dev, SUR40_GET_VERSION, 0x02, buffer, 12);
 	if (result < 0)
-		return result;
+		goto error;
 
 	result = sur40_command(dev, SUR40_UNKNOWN2,    0x00, buffer, 24);
 	if (result < 0)
-		return result;
+		goto error;
 
 	result = sur40_command(dev, SUR40_UNKNOWN1,    0x00, buffer,  5);
 	if (result < 0)
-		return result;
+		goto error;
 
 	result = sur40_command(dev, SUR40_GET_VERSION, 0x03, buffer, 12);
 
@@ -226,7 +232,8 @@ static int sur40_init(struct sur40_state *dev)
 	 * Discard the result buffer - no known data inside except
 	 * some version strings, maybe extract these sometime...
 	 */
-
+error:
+	kfree(buffer);
 	return result;
 }
 
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index a1e75cba18e0..dd1dc39f84ff 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -39,6 +39,25 @@ config IOMMU_IO_PGTABLE_LPAE_SELFTEST
 
 	  If unsure, say N here.
 
+config IOMMU_IO_PGTABLE_ARMV7S
+	bool "ARMv7/v8 Short Descriptor Format"
+	select IOMMU_IO_PGTABLE
+	depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST)
+	help
+	  Enable support for the ARM Short-descriptor pagetable format.
+	  This supports 32-bit virtual and physical addresses mapped using
+	  2-level tables with 4KB pages/1MB sections, and contiguous entries
+	  for 64KB pages/16MB supersections if indicated by the IOMMU driver.
+
+config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
+	bool "ARMv7s selftests"
+	depends on IOMMU_IO_PGTABLE_ARMV7S
+	help
+	  Enable self-tests for ARMv7s page table allocator. This performs
+	  a series of page-table consistency checks during boot.
+
+	  If unsure, say N here.
+
 endmenu
 
 config IOMMU_IOVA
@@ -51,9 +70,9 @@ config OF_IOMMU
 # IOMMU-agnostic DMA-mapping layer
 config IOMMU_DMA
 	bool
-	depends on NEED_SG_DMA_LENGTH
 	select IOMMU_API
 	select IOMMU_IOVA
+	select NEED_SG_DMA_LENGTH
 
 config FSL_PAMU
 	bool "Freescale IOMMU support"
@@ -243,7 +262,7 @@ config TEGRA_IOMMU_SMMU
 
 config EXYNOS_IOMMU
 	bool "Exynos IOMMU Support"
-	depends on ARCH_EXYNOS && ARM && MMU
+	depends on ARCH_EXYNOS && MMU
 	select IOMMU_API
 	select ARM_DMA_USE_IOMMU
 	help
@@ -266,7 +285,7 @@ config EXYNOS_IOMMU_DEBUG
 config IPMMU_VMSA
 	bool "Renesas VMSA-compatible IPMMU"
 	depends on ARM_LPAE
-	depends on ARCH_SHMOBILE || COMPILE_TEST
+	depends on ARCH_RENESAS || COMPILE_TEST
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
 	select ARM_DMA_USE_IOMMU
@@ -318,4 +337,21 @@ config S390_IOMMU
 	help
 	  Support for the IOMMU API for s390 PCI devices.
 
+config MTK_IOMMU
+	bool "MTK IOMMU Support"
+	depends on ARM || ARM64
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	select ARM_DMA_USE_IOMMU
+	select IOMMU_API
+	select IOMMU_DMA
+	select IOMMU_IO_PGTABLE_ARMV7S
+	select MEMORY
+	select MTK_SMI
+	help
+	  Support for the M4U on certain Mediatek SOCs. M4U is MultiMedia
+	  Memory Management Unit. This option enables remapping of DMA memory
+	  accesses for the multimedia subsystem.
+
+	  If unsure, say N here.
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 42fc0c25cf1a..c6edb31bf8c6 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
 obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
+obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
 obj-$(CONFIG_IOMMU_IOVA) += iova.o
 obj-$(CONFIG_OF_IOMMU)	+= of_iommu.o
@@ -16,6 +17,7 @@ obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o
 obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
 obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
 obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
+obj-$(CONFIG_MTK_IOMMU) += mtk_iommu.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
 obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
 obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index c865737326e1..56999d2fac07 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -526,6 +526,7 @@ static void do_fault(struct work_struct *work)
 		flags |= FAULT_FLAG_USER;
 	if (fault->flags & PPR_FAULT_WRITE)
 		flags |= FAULT_FLAG_WRITE;
+	flags |= FAULT_FLAG_REMOTE;
 
 	down_read(&mm->mmap_sem);
 	vma = find_extend_vma(mm, address);
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 20875341c865..4ff73ff64e49 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/dma-iommu.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/iommu.h>
@@ -1396,7 +1397,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 {
 	struct arm_smmu_domain *smmu_domain;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
+	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
 		return NULL;
 
 	/*
@@ -1408,6 +1409,12 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	if (!smmu_domain)
 		return NULL;
 
+	if (type == IOMMU_DOMAIN_DMA &&
+	    iommu_get_dma_cookie(&smmu_domain->domain)) {
+		kfree(smmu_domain);
+		return NULL;
+	}
+
 	mutex_init(&smmu_domain->init_mutex);
 	spin_lock_init(&smmu_domain->pgtbl_lock);
 	return &smmu_domain->domain;
@@ -1436,6 +1443,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 
+	iommu_put_dma_cookie(domain);
 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
 
 	/* Free the CD and ASID, if we allocated them */
@@ -1630,6 +1638,17 @@ static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
 	return 0;
 }
 
+static void arm_smmu_detach_dev(struct device *dev)
+{
+	struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
+
+	smmu_group->ste.bypass = true;
+	if (IS_ERR_VALUE(arm_smmu_install_ste_for_group(smmu_group)))
+		dev_warn(dev, "failed to install bypass STE\n");
+
+	smmu_group->domain = NULL;
+}
+
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
 	int ret = 0;
@@ -1642,7 +1661,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 
 	/* Already attached to a different domain? */
 	if (smmu_group->domain && smmu_group->domain != smmu_domain)
-		return -EEXIST;
+		arm_smmu_detach_dev(dev);
 
 	smmu = smmu_group->smmu;
 	mutex_lock(&smmu_domain->init_mutex);
@@ -1668,7 +1687,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		goto out_unlock;
 
 	smmu_group->domain	= smmu_domain;
-	smmu_group->ste.bypass	= false;
+
+	/*
+	 * FIXME: This should always be "false" once we have IOMMU-backed
+	 * DMA ops for all devices behind the SMMU.
+	 */
+	smmu_group->ste.bypass	= domain->type == IOMMU_DOMAIN_DMA;
 
 	ret = arm_smmu_install_ste_for_group(smmu_group);
 	if (IS_ERR_VALUE(ret))
@@ -1679,25 +1703,6 @@ out_unlock:
 	return ret;
 }
 
-static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
-{
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
-
-	BUG_ON(!smmu_domain);
-	BUG_ON(!smmu_group);
-
-	mutex_lock(&smmu_domain->init_mutex);
-	BUG_ON(smmu_group->domain != smmu_domain);
-
-	smmu_group->ste.bypass = true;
-	if (IS_ERR_VALUE(arm_smmu_install_ste_for_group(smmu_group)))
-		dev_warn(dev, "failed to install bypass STE\n");
-
-	smmu_group->domain = NULL;
-	mutex_unlock(&smmu_domain->init_mutex);
-}
-
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 			phys_addr_t paddr, size_t size, int prot)
 {
@@ -1935,7 +1940,6 @@ static struct iommu_ops arm_smmu_ops = {
 	.domain_alloc		= arm_smmu_domain_alloc,
 	.domain_free		= arm_smmu_domain_free,
 	.attach_dev		= arm_smmu_attach_dev,
-	.detach_dev		= arm_smmu_detach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 59ee4b8a3236..2409e3bd3df2 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -29,6 +29,7 @@
 #define pr_fmt(fmt) "arm-smmu: " fmt
 
 #include <linux/delay.h>
+#include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
@@ -167,6 +168,9 @@
 #define S2CR_TYPE_BYPASS		(1 << S2CR_TYPE_SHIFT)
 #define S2CR_TYPE_FAULT			(2 << S2CR_TYPE_SHIFT)
 
+#define S2CR_PRIVCFG_SHIFT		24
+#define S2CR_PRIVCFG_UNPRIV		(2 << S2CR_PRIVCFG_SHIFT)
+
 /* Context bank attribute registers */
 #define ARM_SMMU_GR1_CBAR(n)		(0x0 + ((n) << 2))
 #define CBAR_VMID_SHIFT			0
@@ -257,9 +261,13 @@
 #define FSYNR0_WNR			(1 << 4)
 
 static int force_stage;
-module_param_named(force_stage, force_stage, int, S_IRUGO);
+module_param(force_stage, int, S_IRUGO);
 MODULE_PARM_DESC(force_stage,
 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
+static bool disable_bypass;
+module_param(disable_bypass, bool, S_IRUGO);
+MODULE_PARM_DESC(disable_bypass,
+	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
 
 enum arm_smmu_arch_version {
 	ARM_SMMU_V1 = 1,
@@ -963,7 +971,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 {
 	struct arm_smmu_domain *smmu_domain;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
+	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
 		return NULL;
 	/*
 	 * Allocate the domain and initialise some of its data structures.
@@ -974,6 +982,12 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	if (!smmu_domain)
 		return NULL;
 
+	if (type == IOMMU_DOMAIN_DMA &&
+	    iommu_get_dma_cookie(&smmu_domain->domain)) {
+		kfree(smmu_domain);
+		return NULL;
+	}
+
 	mutex_init(&smmu_domain->init_mutex);
 	spin_lock_init(&smmu_domain->pgtbl_lock);
 
@@ -988,6 +1002,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 	 * Free the domain resources. We assume that all devices have
 	 * already been detached.
 	 */
+	iommu_put_dma_cookie(domain);
 	arm_smmu_destroy_domain_context(domain);
 	kfree(smmu_domain);
 }
@@ -1079,11 +1094,18 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
 	if (ret)
 		return ret == -EEXIST ? 0 : ret;
 
+	/*
+	 * FIXME: This won't be needed once we have IOMMU-backed DMA ops
+	 * for all devices behind the SMMU.
+	 */
+	if (smmu_domain->domain.type == IOMMU_DOMAIN_DMA)
+		return 0;
+
 	for (i = 0; i < cfg->num_streamids; ++i) {
 		u32 idx, s2cr;
 
 		idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
-		s2cr = S2CR_TYPE_TRANS |
+		s2cr = S2CR_TYPE_TRANS | S2CR_PRIVCFG_UNPRIV |
 		       (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT);
 		writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
 	}
@@ -1108,14 +1130,24 @@ static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
 	 */
 	for (i = 0; i < cfg->num_streamids; ++i) {
 		u32 idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
+		u32 reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
 
-		writel_relaxed(S2CR_TYPE_BYPASS,
-			       gr0_base + ARM_SMMU_GR0_S2CR(idx));
+		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(idx));
 	}
 
 	arm_smmu_master_free_smrs(smmu, cfg);
 }
 
+static void arm_smmu_detach_dev(struct device *dev,
+				struct arm_smmu_master_cfg *cfg)
+{
+	struct iommu_domain *domain = dev->archdata.iommu;
+	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+	dev->archdata.iommu = NULL;
+	arm_smmu_domain_remove_master(smmu_domain, cfg);
+}
+
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
 	int ret;
@@ -1129,11 +1161,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		return -ENXIO;
 	}
 
-	if (dev->archdata.iommu) {
-		dev_err(dev, "already attached to IOMMU domain\n");
-		return -EEXIST;
-	}
-
 	/* Ensure that the domain is finalised */
 	ret = arm_smmu_init_domain_context(domain, smmu);
 	if (IS_ERR_VALUE(ret))
@@ -1155,25 +1182,16 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	if (!cfg)
 		return -ENODEV;
 
+	/* Detach the dev from its current domain */
+	if (dev->archdata.iommu)
+		arm_smmu_detach_dev(dev, cfg);
+
 	ret = arm_smmu_domain_add_master(smmu_domain, cfg);
 	if (!ret)
 		dev->archdata.iommu = domain;
 	return ret;
 }
 
-static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev)
-{
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct arm_smmu_master_cfg *cfg;
-
-	cfg = find_smmu_master_cfg(dev);
-	if (!cfg)
-		return;
-
-	dev->archdata.iommu = NULL;
-	arm_smmu_domain_remove_master(smmu_domain, cfg);
-}
-
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 			phys_addr_t paddr, size_t size, int prot)
 {
@@ -1449,7 +1467,6 @@ static struct iommu_ops arm_smmu_ops = {
 	.domain_alloc		= arm_smmu_domain_alloc,
 	.domain_free		= arm_smmu_domain_free,
 	.attach_dev		= arm_smmu_attach_dev,
-	.detach_dev		= arm_smmu_detach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
 	.map_sg			= default_iommu_map_sg,
@@ -1473,11 +1490,11 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
 	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
 
-	/* Mark all SMRn as invalid and all S2CRn as bypass */
+	/* Mark all SMRn as invalid and all S2CRn as bypass unless overridden */
+	reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
 		writel_relaxed(0, gr0_base + ARM_SMMU_GR0_SMR(i));
-		writel_relaxed(S2CR_TYPE_BYPASS,
-			gr0_base + ARM_SMMU_GR0_S2CR(i));
+		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(i));
 	}
 
 	/* Make sure all context banks are disabled and clear CB_FSR  */
@@ -1499,8 +1516,12 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 	/* Disable TLB broadcasting. */
 	reg |= (sCR0_VMIDPNE | sCR0_PTM);
 
-	/* Enable client access, but bypass when no mapping is found */
-	reg &= ~(sCR0_CLIENTPD | sCR0_USFCFG);
+	/* Enable client access, handling unmatched streams as appropriate */
+	reg &= ~sCR0_CLIENTPD;
+	if (disable_bypass)
+		reg |= sCR0_USFCFG;
+	else
+		reg &= ~sCR0_USFCFG;
 
 	/* Disable forced broadcasting */
 	reg &= ~sCR0_FB;
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 97c41b8ab5d9..5ecc86cb74c8 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1,6 +1,5 @@
-/* linux/drivers/iommu/exynos_iommu.c
- *
- * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+/*
+ * Copyright (c) 2011,2016 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
  * This program is free software; you can redistribute it and/or modify
@@ -25,10 +24,7 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
-
-#include <asm/cacheflush.h>
-#include <asm/dma-iommu.h>
-#include <asm/pgtable.h>
+#include <linux/dma-iommu.h>
 
 typedef u32 sysmmu_iova_t;
 typedef u32 sysmmu_pte_t;
@@ -58,17 +54,25 @@ typedef u32 sysmmu_pte_t;
 #define lv2ent_small(pent) ((*(pent) & 2) == 2)
 #define lv2ent_large(pent) ((*(pent) & 3) == 1)
 
-static u32 sysmmu_page_offset(sysmmu_iova_t iova, u32 size)
-{
-	return iova & (size - 1);
-}
-
-#define section_phys(sent) (*(sent) & SECT_MASK)
-#define section_offs(iova) sysmmu_page_offset((iova), SECT_SIZE)
-#define lpage_phys(pent) (*(pent) & LPAGE_MASK)
-#define lpage_offs(iova) sysmmu_page_offset((iova), LPAGE_SIZE)
-#define spage_phys(pent) (*(pent) & SPAGE_MASK)
-#define spage_offs(iova) sysmmu_page_offset((iova), SPAGE_SIZE)
+/*
+ * v1.x - v3.x SYSMMU supports 32bit physical and 32bit virtual address spaces
+ * v5.0 introduced support for 36bit physical address space by shifting
+ * all page entry values by 4 bits.
+ * All SYSMMU controllers in the system support the address spaces of the same
+ * size, so PG_ENT_SHIFT can be initialized on first SYSMMU probe to proper
+ * value (0 or 4).
+ */
+static short PG_ENT_SHIFT = -1;
+#define SYSMMU_PG_ENT_SHIFT 0
+#define SYSMMU_V5_PG_ENT_SHIFT 4
+
+#define sect_to_phys(ent) (((phys_addr_t) ent) << PG_ENT_SHIFT)
+#define section_phys(sent) (sect_to_phys(*(sent)) & SECT_MASK)
+#define section_offs(iova) (iova & (SECT_SIZE - 1))
+#define lpage_phys(pent) (sect_to_phys(*(pent)) & LPAGE_MASK)
+#define lpage_offs(iova) (iova & (LPAGE_SIZE - 1))
+#define spage_phys(pent) (sect_to_phys(*(pent)) & SPAGE_MASK)
+#define spage_offs(iova) (iova & (SPAGE_SIZE - 1))
 
 #define NUM_LV1ENTRIES 4096
 #define NUM_LV2ENTRIES (SECT_SIZE / SPAGE_SIZE)
@@ -83,16 +87,16 @@ static u32 lv2ent_offset(sysmmu_iova_t iova)
 	return (iova >> SPAGE_ORDER) & (NUM_LV2ENTRIES - 1);
 }
 
+#define LV1TABLE_SIZE (NUM_LV1ENTRIES * sizeof(sysmmu_pte_t))
 #define LV2TABLE_SIZE (NUM_LV2ENTRIES * sizeof(sysmmu_pte_t))
 
 #define SPAGES_PER_LPAGE (LPAGE_SIZE / SPAGE_SIZE)
+#define lv2table_base(sent) (sect_to_phys(*(sent) & 0xFFFFFFC0))
 
-#define lv2table_base(sent) (*(sent) & 0xFFFFFC00)
-
-#define mk_lv1ent_sect(pa) ((pa) | 2)
-#define mk_lv1ent_page(pa) ((pa) | 1)
-#define mk_lv2ent_lpage(pa) ((pa) | 1)
-#define mk_lv2ent_spage(pa) ((pa) | 2)
+#define mk_lv1ent_sect(pa) ((pa >> PG_ENT_SHIFT) | 2)
+#define mk_lv1ent_page(pa) ((pa >> PG_ENT_SHIFT) | 1)
+#define mk_lv2ent_lpage(pa) ((pa >> PG_ENT_SHIFT) | 1)
+#define mk_lv2ent_spage(pa) ((pa >> PG_ENT_SHIFT) | 2)
 
 #define CTRL_ENABLE	0x5
 #define CTRL_BLOCK	0x7
@@ -100,14 +104,23 @@ static u32 lv2ent_offset(sysmmu_iova_t iova)
 
 #define CFG_LRU		0x1
 #define CFG_QOS(n)	((n & 0xF) << 7)
-#define CFG_MASK	0x0150FFFF /* Selecting bit 0-15, 20, 22 and 24 */
 #define CFG_ACGEN	(1 << 24) /* System MMU 3.3 only */
 #define CFG_SYSSEL	(1 << 22) /* System MMU 3.2 only */
 #define CFG_FLPDCACHE	(1 << 20) /* System MMU 3.2+ only */
 
+/* common registers */
 #define REG_MMU_CTRL		0x000
 #define REG_MMU_CFG		0x004
 #define REG_MMU_STATUS		0x008
+#define REG_MMU_VERSION		0x034
+
+#define MMU_MAJ_VER(val)	((val) >> 7)
+#define MMU_MIN_VER(val)	((val) & 0x7F)
+#define MMU_RAW_VER(reg)	(((reg) >> 21) & ((1 << 11) - 1)) /* 11 bits */
+
+#define MAKE_MMU_VER(maj, min)	((((maj) & 0xF) << 7) | ((min) & 0x7F))
+
+/* v1.x - v3.x registers */
 #define REG_MMU_FLUSH		0x00C
 #define REG_MMU_FLUSH_ENTRY	0x010
 #define REG_PT_BASE_ADDR	0x014
@@ -119,21 +132,18 @@ static u32 lv2ent_offset(sysmmu_iova_t iova)
 #define REG_AR_FAULT_ADDR	0x02C
 #define REG_DEFAULT_SLAVE_ADDR	0x030
 
-#define REG_MMU_VERSION		0x034
-
-#define MMU_MAJ_VER(val)	((val) >> 7)
-#define MMU_MIN_VER(val)	((val) & 0x7F)
-#define MMU_RAW_VER(reg)	(((reg) >> 21) & ((1 << 11) - 1)) /* 11 bits */
-
-#define MAKE_MMU_VER(maj, min)	((((maj) & 0xF) << 7) | ((min) & 0x7F))
-
-#define REG_PB0_SADDR		0x04C
-#define REG_PB0_EADDR		0x050
-#define REG_PB1_SADDR		0x054
-#define REG_PB1_EADDR		0x058
+/* v5.x registers */
+#define REG_V5_PT_BASE_PFN	0x00C
+#define REG_V5_MMU_FLUSH_ALL	0x010
+#define REG_V5_MMU_FLUSH_ENTRY	0x014
+#define REG_V5_INT_STATUS	0x060
+#define REG_V5_INT_CLEAR	0x064
+#define REG_V5_FAULT_AR_VA	0x070
+#define REG_V5_FAULT_AW_VA	0x080
 
 #define has_sysmmu(dev)		(dev->archdata.iommu != NULL)
 
+static struct device *dma_dev;
 static struct kmem_cache *lv2table_kmem_cache;
 static sysmmu_pte_t *zero_lv2_table;
 #define ZERO_LV2LINK mk_lv1ent_page(virt_to_phys(zero_lv2_table))
@@ -149,40 +159,38 @@ static sysmmu_pte_t *page_entry(sysmmu_pte_t *sent, sysmmu_iova_t iova)
 				lv2table_base(sent)) + lv2ent_offset(iova);
 }
 
-enum exynos_sysmmu_inttype {
-	SYSMMU_PAGEFAULT,
-	SYSMMU_AR_MULTIHIT,
-	SYSMMU_AW_MULTIHIT,
-	SYSMMU_BUSERROR,
-	SYSMMU_AR_SECURITY,
-	SYSMMU_AR_ACCESS,
-	SYSMMU_AW_SECURITY,
-	SYSMMU_AW_PROTECTION, /* 7 */
-	SYSMMU_FAULT_UNKNOWN,
-	SYSMMU_FAULTS_NUM
+/*
+ * IOMMU fault information register
+ */
+struct sysmmu_fault_info {
+	unsigned int bit;	/* bit number in STATUS register */
+	unsigned short addr_reg; /* register to read VA fault address */
+	const char *name;	/* human readable fault name */
+	unsigned int type;	/* fault type for report_iommu_fault */
 };
 
-static unsigned short fault_reg_offset[SYSMMU_FAULTS_NUM] = {
-	REG_PAGE_FAULT_ADDR,
-	REG_AR_FAULT_ADDR,
-	REG_AW_FAULT_ADDR,
-	REG_DEFAULT_SLAVE_ADDR,
-	REG_AR_FAULT_ADDR,
-	REG_AR_FAULT_ADDR,
-	REG_AW_FAULT_ADDR,
-	REG_AW_FAULT_ADDR
+static const struct sysmmu_fault_info sysmmu_faults[] = {
+	{ 0, REG_PAGE_FAULT_ADDR, "PAGE", IOMMU_FAULT_READ },
+	{ 1, REG_AR_FAULT_ADDR, "AR MULTI-HIT", IOMMU_FAULT_READ },
+	{ 2, REG_AW_FAULT_ADDR, "AW MULTI-HIT", IOMMU_FAULT_WRITE },
+	{ 3, REG_DEFAULT_SLAVE_ADDR, "BUS ERROR", IOMMU_FAULT_READ },
+	{ 4, REG_AR_FAULT_ADDR, "AR SECURITY PROTECTION", IOMMU_FAULT_READ },
+	{ 5, REG_AR_FAULT_ADDR, "AR ACCESS PROTECTION", IOMMU_FAULT_READ },
+	{ 6, REG_AW_FAULT_ADDR, "AW SECURITY PROTECTION", IOMMU_FAULT_WRITE },
+	{ 7, REG_AW_FAULT_ADDR, "AW ACCESS PROTECTION", IOMMU_FAULT_WRITE },
 };
 
-static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = {
-	"PAGE FAULT",
-	"AR MULTI-HIT FAULT",
-	"AW MULTI-HIT FAULT",
-	"BUS ERROR",
-	"AR SECURITY PROTECTION FAULT",
-	"AR ACCESS PROTECTION FAULT",
-	"AW SECURITY PROTECTION FAULT",
-	"AW ACCESS PROTECTION FAULT",
-	"UNKNOWN FAULT"
+static const struct sysmmu_fault_info sysmmu_v5_faults[] = {
+	{ 0, REG_V5_FAULT_AR_VA, "AR PTW", IOMMU_FAULT_READ },
+	{ 1, REG_V5_FAULT_AR_VA, "AR PAGE", IOMMU_FAULT_READ },
+	{ 2, REG_V5_FAULT_AR_VA, "AR MULTI-HIT", IOMMU_FAULT_READ },
+	{ 3, REG_V5_FAULT_AR_VA, "AR ACCESS PROTECTION", IOMMU_FAULT_READ },
+	{ 4, REG_V5_FAULT_AR_VA, "AR SECURITY PROTECTION", IOMMU_FAULT_READ },
+	{ 16, REG_V5_FAULT_AW_VA, "AW PTW", IOMMU_FAULT_WRITE },
+	{ 17, REG_V5_FAULT_AW_VA, "AW PAGE", IOMMU_FAULT_WRITE },
+	{ 18, REG_V5_FAULT_AW_VA, "AW MULTI-HIT", IOMMU_FAULT_WRITE },
+	{ 19, REG_V5_FAULT_AW_VA, "AW ACCESS PROTECTION", IOMMU_FAULT_WRITE },
+	{ 20, REG_V5_FAULT_AW_VA, "AW SECURITY PROTECTION", IOMMU_FAULT_WRITE },
 };
 
 /*
@@ -193,6 +201,7 @@ static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = {
 */
 struct exynos_iommu_owner {
 	struct list_head controllers;	/* list of sysmmu_drvdata.owner_node */
+	struct iommu_domain *domain;	/* domain this device is attached */
 };
 
 /*
@@ -221,6 +230,8 @@ struct sysmmu_drvdata {
 	struct device *master;		/* master device (owner) */
 	void __iomem *sfrbase;		/* our registers */
 	struct clk *clk;		/* SYSMMU's clock */
+	struct clk *aclk;		/* SYSMMU's aclk clock */
+	struct clk *pclk;		/* SYSMMU's pclk clock */
 	struct clk *clk_master;		/* master's device clock */
 	int activations;		/* number of calls to sysmmu_enable */
 	spinlock_t lock;		/* lock for modyfying state */
@@ -255,70 +266,101 @@ static bool is_sysmmu_active(struct sysmmu_drvdata *data)
 	return data->activations > 0;
 }
 
-static void sysmmu_unblock(void __iomem *sfrbase)
+static void sysmmu_unblock(struct sysmmu_drvdata *data)
 {
-	__raw_writel(CTRL_ENABLE, sfrbase + REG_MMU_CTRL);
+	writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
 }
 
-static bool sysmmu_block(void __iomem *sfrbase)
+static bool sysmmu_block(struct sysmmu_drvdata *data)
 {
 	int i = 120;
 
-	__raw_writel(CTRL_BLOCK, sfrbase + REG_MMU_CTRL);
-	while ((i > 0) && !(__raw_readl(sfrbase + REG_MMU_STATUS) & 1))
+	writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
+	while ((i > 0) && !(readl(data->sfrbase + REG_MMU_STATUS) & 1))
 		--i;
 
-	if (!(__raw_readl(sfrbase + REG_MMU_STATUS) & 1)) {
-		sysmmu_unblock(sfrbase);
+	if (!(readl(data->sfrbase + REG_MMU_STATUS) & 1)) {
+		sysmmu_unblock(data);
 		return false;
 	}
 
 	return true;
 }
 
-static void __sysmmu_tlb_invalidate(void __iomem *sfrbase)
+static void __sysmmu_tlb_invalidate(struct sysmmu_drvdata *data)
 {
-	__raw_writel(0x1, sfrbase + REG_MMU_FLUSH);
+	if (MMU_MAJ_VER(data->version) < 5)
+		writel(0x1, data->sfrbase + REG_MMU_FLUSH);
+	else
+		writel(0x1, data->sfrbase + REG_V5_MMU_FLUSH_ALL);
 }
 
-static void __sysmmu_tlb_invalidate_entry(void __iomem *sfrbase,
+static void __sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
 				sysmmu_iova_t iova, unsigned int num_inv)
 {
 	unsigned int i;
 
 	for (i = 0; i < num_inv; i++) {
-		__raw_writel((iova & SPAGE_MASK) | 1,
-				sfrbase + REG_MMU_FLUSH_ENTRY);
+		if (MMU_MAJ_VER(data->version) < 5)
+			writel((iova & SPAGE_MASK) | 1,
+				     data->sfrbase + REG_MMU_FLUSH_ENTRY);
+		else
+			writel((iova & SPAGE_MASK) | 1,
+				     data->sfrbase + REG_V5_MMU_FLUSH_ENTRY);
 		iova += SPAGE_SIZE;
 	}
 }
 
-static void __sysmmu_set_ptbase(void __iomem *sfrbase,
-				       phys_addr_t pgd)
+static void __sysmmu_set_ptbase(struct sysmmu_drvdata *data, phys_addr_t pgd)
 {
-	__raw_writel(pgd, sfrbase + REG_PT_BASE_ADDR);
+	if (MMU_MAJ_VER(data->version) < 5)
+		writel(pgd, data->sfrbase + REG_PT_BASE_ADDR);
+	else
+		writel(pgd >> PAGE_SHIFT,
+			     data->sfrbase + REG_V5_PT_BASE_PFN);
 
-	__sysmmu_tlb_invalidate(sfrbase);
+	__sysmmu_tlb_invalidate(data);
 }
 
-static void show_fault_information(const char *name,
-		enum exynos_sysmmu_inttype itype,
-		phys_addr_t pgtable_base, sysmmu_iova_t fault_addr)
+static void __sysmmu_get_version(struct sysmmu_drvdata *data)
 {
-	sysmmu_pte_t *ent;
+	u32 ver;
+
+	clk_enable(data->clk_master);
+	clk_enable(data->clk);
+	clk_enable(data->pclk);
+	clk_enable(data->aclk);
+
+	ver = readl(data->sfrbase + REG_MMU_VERSION);
 
-	if ((itype >= SYSMMU_FAULTS_NUM) || (itype < SYSMMU_PAGEFAULT))
-		itype = SYSMMU_FAULT_UNKNOWN;
+	/* controllers on some SoCs don't report proper version */
+	if (ver == 0x80000001u)
+		data->version = MAKE_MMU_VER(1, 0);
+	else
+		data->version = MMU_RAW_VER(ver);
+
+	dev_dbg(data->sysmmu, "hardware version: %d.%d\n",
+		MMU_MAJ_VER(data->version), MMU_MIN_VER(data->version));
 
-	pr_err("%s occurred at %#x by %s(Page table base: %pa)\n",
-		sysmmu_fault_name[itype], fault_addr, name, &pgtable_base);
+	clk_disable(data->aclk);
+	clk_disable(data->pclk);
+	clk_disable(data->clk);
+	clk_disable(data->clk_master);
+}
 
-	ent = section_entry(phys_to_virt(pgtable_base), fault_addr);
-	pr_err("\tLv1 entry: %#x\n", *ent);
+static void show_fault_information(struct sysmmu_drvdata *data,
+				   const struct sysmmu_fault_info *finfo,
+				   sysmmu_iova_t fault_addr)
+{
+	sysmmu_pte_t *ent;
 
+	dev_err(data->sysmmu, "%s FAULT occurred at %#x (page table base: %pa)\n",
+		finfo->name, fault_addr, &data->pgtable);
+	ent = section_entry(phys_to_virt(data->pgtable), fault_addr);
+	dev_err(data->sysmmu, "\tLv1 entry: %#x\n", *ent);
 	if (lv1ent_page(ent)) {
 		ent = page_entry(ent, fault_addr);
-		pr_err("\t Lv2 entry: %#x\n", *ent);
+		dev_err(data->sysmmu, "\t Lv2 entry: %#x\n", *ent);
 	}
 }
 
@@ -326,49 +368,52 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
 {
 	/* SYSMMU is in blocked state when interrupt occurred. */
 	struct sysmmu_drvdata *data = dev_id;
-	enum exynos_sysmmu_inttype itype;
-	sysmmu_iova_t addr = -1;
+	const struct sysmmu_fault_info *finfo;
+	unsigned int i, n, itype;
+	sysmmu_iova_t fault_addr = -1;
+	unsigned short reg_status, reg_clear;
 	int ret = -ENOSYS;
 
 	WARN_ON(!is_sysmmu_active(data));
 
+	if (MMU_MAJ_VER(data->version) < 5) {
+		reg_status = REG_INT_STATUS;
+		reg_clear = REG_INT_CLEAR;
+		finfo = sysmmu_faults;
+		n = ARRAY_SIZE(sysmmu_faults);
+	} else {
+		reg_status = REG_V5_INT_STATUS;
+		reg_clear = REG_V5_INT_CLEAR;
+		finfo = sysmmu_v5_faults;
+		n = ARRAY_SIZE(sysmmu_v5_faults);
+	}
+
 	spin_lock(&data->lock);
 
-	if (!IS_ERR(data->clk_master))
-		clk_enable(data->clk_master);
+	clk_enable(data->clk_master);
 
-	itype = (enum exynos_sysmmu_inttype)
-		__ffs(__raw_readl(data->sfrbase + REG_INT_STATUS));
-	if (WARN_ON(!((itype >= 0) && (itype < SYSMMU_FAULT_UNKNOWN))))
-		itype = SYSMMU_FAULT_UNKNOWN;
-	else
-		addr = __raw_readl(data->sfrbase + fault_reg_offset[itype]);
+	itype = __ffs(readl(data->sfrbase + reg_status));
+	for (i = 0; i < n; i++, finfo++)
+		if (finfo->bit == itype)
+			break;
+	/* unknown/unsupported fault */
+	BUG_ON(i == n);
 
-	if (itype == SYSMMU_FAULT_UNKNOWN) {
-		pr_err("%s: Fault is not occurred by System MMU '%s'!\n",
-			__func__, dev_name(data->sysmmu));
-		pr_err("%s: Please check if IRQ is correctly configured.\n",
-			__func__);
-		BUG();
-	} else {
-		unsigned int base =
-				__raw_readl(data->sfrbase + REG_PT_BASE_ADDR);
-		show_fault_information(dev_name(data->sysmmu),
-					itype, base, addr);
-		if (data->domain)
-			ret = report_iommu_fault(&data->domain->domain,
-					data->master, addr, itype);
-	}
+	/* print debug message */
+	fault_addr = readl(data->sfrbase + finfo->addr_reg);
+	show_fault_information(data, finfo, fault_addr);
 
+	if (data->domain)
+		ret = report_iommu_fault(&data->domain->domain,
+					data->master, fault_addr, finfo->type);
 	/* fault is not recovered by fault handler */
 	BUG_ON(ret != 0);
 
-	__raw_writel(1 << itype, data->sfrbase + REG_INT_CLEAR);
+	writel(1 << itype, data->sfrbase + reg_clear);
 
-	sysmmu_unblock(data->sfrbase);
+	sysmmu_unblock(data);
 
-	if (!IS_ERR(data->clk_master))
-		clk_disable(data->clk_master);
+	clk_disable(data->clk_master);
 
 	spin_unlock(&data->lock);
 
@@ -377,15 +422,15 @@ static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id)
 
 static void __sysmmu_disable_nocount(struct sysmmu_drvdata *data)
 {
-	if (!IS_ERR(data->clk_master))
-		clk_enable(data->clk_master);
+	clk_enable(data->clk_master);
 
-	__raw_writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL);
-	__raw_writel(0, data->sfrbase + REG_MMU_CFG);
+	writel(CTRL_DISABLE, data->sfrbase + REG_MMU_CTRL);
+	writel(0, data->sfrbase + REG_MMU_CFG);
 
+	clk_disable(data->aclk);
+	clk_disable(data->pclk);
 	clk_disable(data->clk);
-	if (!IS_ERR(data->clk_master))
-		clk_disable(data->clk_master);
+	clk_disable(data->clk_master);
 }
 
 static bool __sysmmu_disable(struct sysmmu_drvdata *data)
@@ -416,42 +461,34 @@ static bool __sysmmu_disable(struct sysmmu_drvdata *data)
 
 static void __sysmmu_init_config(struct sysmmu_drvdata *data)
 {
-	unsigned int cfg = CFG_LRU | CFG_QOS(15);
-	unsigned int ver;
-
-	ver = MMU_RAW_VER(__raw_readl(data->sfrbase + REG_MMU_VERSION));
-	if (MMU_MAJ_VER(ver) == 3) {
-		if (MMU_MIN_VER(ver) >= 2) {
-			cfg |= CFG_FLPDCACHE;
-			if (MMU_MIN_VER(ver) == 3) {
-				cfg |= CFG_ACGEN;
-				cfg &= ~CFG_LRU;
-			} else {
-				cfg |= CFG_SYSSEL;
-			}
-		}
-	}
+	unsigned int cfg;
 
-	__raw_writel(cfg, data->sfrbase + REG_MMU_CFG);
-	data->version = ver;
+	if (data->version <= MAKE_MMU_VER(3, 1))
+		cfg = CFG_LRU | CFG_QOS(15);
+	else if (data->version <= MAKE_MMU_VER(3, 2))
+		cfg = CFG_LRU | CFG_QOS(15) | CFG_FLPDCACHE | CFG_SYSSEL;
+	else
+		cfg = CFG_QOS(15) | CFG_FLPDCACHE | CFG_ACGEN;
+
+	writel(cfg, data->sfrbase + REG_MMU_CFG);
 }
 
 static void __sysmmu_enable_nocount(struct sysmmu_drvdata *data)
 {
-	if (!IS_ERR(data->clk_master))
-		clk_enable(data->clk_master);
+	clk_enable(data->clk_master);
 	clk_enable(data->clk);
+	clk_enable(data->pclk);
+	clk_enable(data->aclk);
 
-	__raw_writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
+	writel(CTRL_BLOCK, data->sfrbase + REG_MMU_CTRL);
 
 	__sysmmu_init_config(data);
 
-	__sysmmu_set_ptbase(data->sfrbase, data->pgtable);
+	__sysmmu_set_ptbase(data, data->pgtable);
 
-	__raw_writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
+	writel(CTRL_ENABLE, data->sfrbase + REG_MMU_CTRL);
 
-	if (!IS_ERR(data->clk_master))
-		clk_disable(data->clk_master);
+	clk_disable(data->clk_master);
 }
 
 static int __sysmmu_enable(struct sysmmu_drvdata *data, phys_addr_t pgtable,
@@ -482,28 +519,21 @@ static int __sysmmu_enable(struct sysmmu_drvdata *data, phys_addr_t pgtable,
 	return ret;
 }
 
-static void __sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
-					      sysmmu_iova_t iova)
-{
-	if (data->version == MAKE_MMU_VER(3, 3))
-		__raw_writel(iova | 0x1, data->sfrbase + REG_MMU_FLUSH_ENTRY);
-}
-
 static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data,
 					    sysmmu_iova_t iova)
 {
 	unsigned long flags;
 
-	if (!IS_ERR(data->clk_master))
-		clk_enable(data->clk_master);
+	clk_enable(data->clk_master);
 
 	spin_lock_irqsave(&data->lock, flags);
-	if (is_sysmmu_active(data))
-		__sysmmu_tlb_invalidate_flpdcache(data, iova);
+	if (is_sysmmu_active(data)) {
+		if (data->version >= MAKE_MMU_VER(3, 3))
+			__sysmmu_tlb_invalidate_entry(data, iova, 1);
+	}
 	spin_unlock_irqrestore(&data->lock, flags);
 
-	if (!IS_ERR(data->clk_master))
-		clk_disable(data->clk_master);
+	clk_disable(data->clk_master);
 }
 
 static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
@@ -515,8 +545,7 @@ static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
 	if (is_sysmmu_active(data)) {
 		unsigned int num_inv = 1;
 
-		if (!IS_ERR(data->clk_master))
-			clk_enable(data->clk_master);
+		clk_enable(data->clk_master);
 
 		/*
 		 * L2TLB invalidation required
@@ -531,13 +560,11 @@ static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
 		if (MMU_MAJ_VER(data->version) == 2)
 			num_inv = min_t(unsigned int, size / PAGE_SIZE, 64);
 
-		if (sysmmu_block(data->sfrbase)) {
-			__sysmmu_tlb_invalidate_entry(
-				data->sfrbase, iova, num_inv);
-			sysmmu_unblock(data->sfrbase);
+		if (sysmmu_block(data)) {
+			__sysmmu_tlb_invalidate_entry(data, iova, num_inv);
+			sysmmu_unblock(data);
 		}
-		if (!IS_ERR(data->clk_master))
-			clk_disable(data->clk_master);
+		clk_disable(data->clk_master);
 	} else {
 		dev_dbg(data->master,
 			"disabled. Skipping TLB invalidation @ %#x\n", iova);
@@ -575,25 +602,52 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 	}
 
 	data->clk = devm_clk_get(dev, "sysmmu");
-	if (IS_ERR(data->clk)) {
-		dev_err(dev, "Failed to get clock!\n");
-		return PTR_ERR(data->clk);
-	} else  {
+	if (!IS_ERR(data->clk)) {
 		ret = clk_prepare(data->clk);
 		if (ret) {
 			dev_err(dev, "Failed to prepare clk\n");
 			return ret;
 		}
+	} else {
+		data->clk = NULL;
+	}
+
+	data->aclk = devm_clk_get(dev, "aclk");
+	if (!IS_ERR(data->aclk)) {
+		ret = clk_prepare(data->aclk);
+		if (ret) {
+			dev_err(dev, "Failed to prepare aclk\n");
+			return ret;
+		}
+	} else {
+		data->aclk = NULL;
+	}
+
+	data->pclk = devm_clk_get(dev, "pclk");
+	if (!IS_ERR(data->pclk)) {
+		ret = clk_prepare(data->pclk);
+		if (ret) {
+			dev_err(dev, "Failed to prepare pclk\n");
+			return ret;
+		}
+	} else {
+		data->pclk = NULL;
+	}
+
+	if (!data->clk && (!data->aclk || !data->pclk)) {
+		dev_err(dev, "Failed to get device clock(s)!\n");
+		return -ENOSYS;
 	}
 
 	data->clk_master = devm_clk_get(dev, "master");
 	if (!IS_ERR(data->clk_master)) {
 		ret = clk_prepare(data->clk_master);
 		if (ret) {
-			clk_unprepare(data->clk);
 			dev_err(dev, "Failed to prepare master's clk\n");
 			return ret;
 		}
+	} else {
+		data->clk_master = NULL;
 	}
 
 	data->sysmmu = dev;
@@ -601,6 +655,14 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, data);
 
+	__sysmmu_get_version(data);
+	if (PG_ENT_SHIFT < 0) {
+		if (MMU_MAJ_VER(data->version) < 5)
+			PG_ENT_SHIFT = SYSMMU_PG_ENT_SHIFT;
+		else
+			PG_ENT_SHIFT = SYSMMU_V5_PG_ENT_SHIFT;
+	}
+
 	pm_runtime_enable(dev);
 
 	return 0;
@@ -650,28 +712,38 @@ static struct platform_driver exynos_sysmmu_driver __refdata = {
 	}
 };
 
-static inline void pgtable_flush(void *vastart, void *vaend)
+static inline void update_pte(sysmmu_pte_t *ent, sysmmu_pte_t val)
 {
-	dmac_flush_range(vastart, vaend);
-	outer_flush_range(virt_to_phys(vastart),
-				virt_to_phys(vaend));
+	dma_sync_single_for_cpu(dma_dev, virt_to_phys(ent), sizeof(*ent),
+				DMA_TO_DEVICE);
+	*ent = val;
+	dma_sync_single_for_device(dma_dev, virt_to_phys(ent), sizeof(*ent),
+				   DMA_TO_DEVICE);
 }
 
 static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
 {
 	struct exynos_iommu_domain *domain;
+	dma_addr_t handle;
 	int i;
 
-	if (type != IOMMU_DOMAIN_UNMANAGED)
-		return NULL;
+	/* Check if correct PTE offsets are initialized */
+	BUG_ON(PG_ENT_SHIFT < 0 || !dma_dev);
 
 	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 	if (!domain)
 		return NULL;
 
+	if (type == IOMMU_DOMAIN_DMA) {
+		if (iommu_get_dma_cookie(&domain->domain) != 0)
+			goto err_pgtable;
+	} else if (type != IOMMU_DOMAIN_UNMANAGED) {
+		goto err_pgtable;
+	}
+
 	domain->pgtable = (sysmmu_pte_t *)__get_free_pages(GFP_KERNEL, 2);
 	if (!domain->pgtable)
-		goto err_pgtable;
+		goto err_dma_cookie;
 
 	domain->lv2entcnt = (short *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
 	if (!domain->lv2entcnt)
@@ -689,7 +761,10 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
 		domain->pgtable[i + 7] = ZERO_LV2LINK;
 	}
 
-	pgtable_flush(domain->pgtable, domain->pgtable + NUM_LV1ENTRIES);
+	handle = dma_map_single(dma_dev, domain->pgtable, LV1TABLE_SIZE,
+				DMA_TO_DEVICE);
+	/* For mapping page table entries we rely on dma == phys */
+	BUG_ON(handle != virt_to_phys(domain->pgtable));
 
 	spin_lock_init(&domain->lock);
 	spin_lock_init(&domain->pgtablelock);
@@ -703,6 +778,9 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
 
 err_counter:
 	free_pages((unsigned long)domain->pgtable, 2);
+err_dma_cookie:
+	if (type == IOMMU_DOMAIN_DMA)
+		iommu_put_dma_cookie(&domain->domain);
 err_pgtable:
 	kfree(domain);
 	return NULL;
@@ -727,16 +805,62 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain)
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
+	if (iommu_domain->type == IOMMU_DOMAIN_DMA)
+		iommu_put_dma_cookie(iommu_domain);
+
+	dma_unmap_single(dma_dev, virt_to_phys(domain->pgtable), LV1TABLE_SIZE,
+			 DMA_TO_DEVICE);
+
 	for (i = 0; i < NUM_LV1ENTRIES; i++)
-		if (lv1ent_page(domain->pgtable + i))
+		if (lv1ent_page(domain->pgtable + i)) {
+			phys_addr_t base = lv2table_base(domain->pgtable + i);
+
+			dma_unmap_single(dma_dev, base, LV2TABLE_SIZE,
+					 DMA_TO_DEVICE);
 			kmem_cache_free(lv2table_kmem_cache,
-				phys_to_virt(lv2table_base(domain->pgtable + i)));
+					phys_to_virt(base));
+		}
 
 	free_pages((unsigned long)domain->pgtable, 2);
 	free_pages((unsigned long)domain->lv2entcnt, 1);
 	kfree(domain);
 }
 
+static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain,
+				    struct device *dev)
+{
+	struct exynos_iommu_owner *owner = dev->archdata.iommu;
+	struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
+	phys_addr_t pagetable = virt_to_phys(domain->pgtable);
+	struct sysmmu_drvdata *data, *next;
+	unsigned long flags;
+	bool found = false;
+
+	if (!has_sysmmu(dev) || owner->domain != iommu_domain)
+		return;
+
+	spin_lock_irqsave(&domain->lock, flags);
+	list_for_each_entry_safe(data, next, &domain->clients, domain_node) {
+		if (data->master == dev) {
+			if (__sysmmu_disable(data)) {
+				data->master = NULL;
+				list_del_init(&data->domain_node);
+			}
+			pm_runtime_put(data->sysmmu);
+			found = true;
+		}
+	}
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	owner->domain = NULL;
+
+	if (found)
+		dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n",
+					__func__, &pagetable);
+	else
+		dev_err(dev, "%s: No IOMMU is attached\n", __func__);
+}
+
 static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
 				   struct device *dev)
 {
@@ -750,6 +874,9 @@ static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
 	if (!has_sysmmu(dev))
 		return -ENODEV;
 
+	if (owner->domain)
+		exynos_iommu_detach_device(owner->domain, dev);
+
 	list_for_each_entry(data, &owner->controllers, owner_node) {
 		pm_runtime_get_sync(data->sysmmu);
 		ret = __sysmmu_enable(data, pagetable, domain);
@@ -768,44 +895,13 @@ static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
 		return ret;
 	}
 
+	owner->domain = iommu_domain;
 	dev_dbg(dev, "%s: Attached IOMMU with pgtable %pa %s\n",
 		__func__, &pagetable, (ret == 0) ? "" : ", again");
 
 	return ret;
 }
 
-static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain,
-				    struct device *dev)
-{
-	struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
-	phys_addr_t pagetable = virt_to_phys(domain->pgtable);
-	struct sysmmu_drvdata *data, *next;
-	unsigned long flags;
-	bool found = false;
-
-	if (!has_sysmmu(dev))
-		return;
-
-	spin_lock_irqsave(&domain->lock, flags);
-	list_for_each_entry_safe(data, next, &domain->clients, domain_node) {
-		if (data->master == dev) {
-			if (__sysmmu_disable(data)) {
-				data->master = NULL;
-				list_del_init(&data->domain_node);
-			}
-			pm_runtime_put(data->sysmmu);
-			found = true;
-		}
-	}
-	spin_unlock_irqrestore(&domain->lock, flags);
-
-	if (found)
-		dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n",
-					__func__, &pagetable);
-	else
-		dev_err(dev, "%s: No IOMMU is attached\n", __func__);
-}
-
 static sysmmu_pte_t *alloc_lv2entry(struct exynos_iommu_domain *domain,
 		sysmmu_pte_t *sent, sysmmu_iova_t iova, short *pgcounter)
 {
@@ -819,15 +915,14 @@ static sysmmu_pte_t *alloc_lv2entry(struct exynos_iommu_domain *domain,
 		bool need_flush_flpd_cache = lv1ent_zero(sent);
 
 		pent = kmem_cache_zalloc(lv2table_kmem_cache, GFP_ATOMIC);
-		BUG_ON((unsigned int)pent & (LV2TABLE_SIZE - 1));
+		BUG_ON((uintptr_t)pent & (LV2TABLE_SIZE - 1));
 		if (!pent)
 			return ERR_PTR(-ENOMEM);
 
-		*sent = mk_lv1ent_page(virt_to_phys(pent));
+		update_pte(sent, mk_lv1ent_page(virt_to_phys(pent)));
 		kmemleak_ignore(pent);
 		*pgcounter = NUM_LV2ENTRIES;
-		pgtable_flush(pent, pent + NUM_LV2ENTRIES);
-		pgtable_flush(sent, sent + 1);
+		dma_map_single(dma_dev, pent, LV2TABLE_SIZE, DMA_TO_DEVICE);
 
 		/*
 		 * If pre-fetched SLPD is a faulty SLPD in zero_l2_table,
@@ -880,9 +975,7 @@ static int lv1set_section(struct exynos_iommu_domain *domain,
 		*pgcnt = 0;
 	}
 
-	*sent = mk_lv1ent_sect(paddr);
-
-	pgtable_flush(sent, sent + 1);
+	update_pte(sent, mk_lv1ent_sect(paddr));
 
 	spin_lock(&domain->lock);
 	if (lv1ent_page_zero(sent)) {
@@ -906,12 +999,15 @@ static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size,
 		if (WARN_ON(!lv2ent_fault(pent)))
 			return -EADDRINUSE;
 
-		*pent = mk_lv2ent_spage(paddr);
-		pgtable_flush(pent, pent + 1);
+		update_pte(pent, mk_lv2ent_spage(paddr));
 		*pgcnt -= 1;
 	} else { /* size == LPAGE_SIZE */
 		int i;
+		dma_addr_t pent_base = virt_to_phys(pent);
 
+		dma_sync_single_for_cpu(dma_dev, pent_base,
+					sizeof(*pent) * SPAGES_PER_LPAGE,
+					DMA_TO_DEVICE);
 		for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) {
 			if (WARN_ON(!lv2ent_fault(pent))) {
 				if (i > 0)
@@ -921,7 +1017,9 @@ static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size,
 
 			*pent = mk_lv2ent_lpage(paddr);
 		}
-		pgtable_flush(pent - SPAGES_PER_LPAGE, pent);
+		dma_sync_single_for_device(dma_dev, pent_base,
+					   sizeof(*pent) * SPAGES_PER_LPAGE,
+					   DMA_TO_DEVICE);
 		*pgcnt -= SPAGES_PER_LPAGE;
 	}
 
@@ -1031,8 +1129,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
 		}
 
 		/* workaround for h/w bug in System MMU v3.3 */
-		*ent = ZERO_LV2LINK;
-		pgtable_flush(ent, ent + 1);
+		update_pte(ent, ZERO_LV2LINK);
 		size = SECT_SIZE;
 		goto done;
 	}
@@ -1053,9 +1150,8 @@ static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
 	}
 
 	if (lv2ent_small(ent)) {
-		*ent = 0;
+		update_pte(ent, 0);
 		size = SPAGE_SIZE;
-		pgtable_flush(ent, ent + 1);
 		domain->lv2entcnt[lv1ent_offset(iova)] += 1;
 		goto done;
 	}
@@ -1066,9 +1162,13 @@ static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
 		goto err;
 	}
 
+	dma_sync_single_for_cpu(dma_dev, virt_to_phys(ent),
+				sizeof(*ent) * SPAGES_PER_LPAGE,
+				DMA_TO_DEVICE);
 	memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE);
-	pgtable_flush(ent, ent + SPAGES_PER_LPAGE);
-
+	dma_sync_single_for_device(dma_dev, virt_to_phys(ent),
+				   sizeof(*ent) * SPAGES_PER_LPAGE,
+				   DMA_TO_DEVICE);
 	size = LPAGE_SIZE;
 	domain->lv2entcnt[lv1ent_offset(iova)] += SPAGES_PER_LPAGE;
 done:
@@ -1114,28 +1214,32 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
 	return phys;
 }
 
+static struct iommu_group *get_device_iommu_group(struct device *dev)
+{
+	struct iommu_group *group;
+
+	group = iommu_group_get(dev);
+	if (!group)
+		group = iommu_group_alloc();
+
+	return group;
+}
+
 static int exynos_iommu_add_device(struct device *dev)
 {
 	struct iommu_group *group;
-	int ret;
 
 	if (!has_sysmmu(dev))
 		return -ENODEV;
 
-	group = iommu_group_get(dev);
+	group = iommu_group_get_for_dev(dev);
 
-	if (!group) {
-		group = iommu_group_alloc();
-		if (IS_ERR(group)) {
-			dev_err(dev, "Failed to allocate IOMMU group\n");
-			return PTR_ERR(group);
-		}
-	}
+	if (IS_ERR(group))
+		return PTR_ERR(group);
 
-	ret = iommu_group_add_device(group, dev);
 	iommu_group_put(group);
 
-	return ret;
+	return 0;
 }
 
 static void exynos_iommu_remove_device(struct device *dev)
@@ -1182,6 +1286,7 @@ static struct iommu_ops exynos_iommu_ops = {
 	.unmap = exynos_iommu_unmap,
 	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = exynos_iommu_iova_to_phys,
+	.device_group = get_device_iommu_group,
 	.add_device = exynos_iommu_add_device,
 	.remove_device = exynos_iommu_remove_device,
 	.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
@@ -1245,6 +1350,13 @@ static int __init exynos_iommu_of_setup(struct device_node *np)
 	if (IS_ERR(pdev))
 		return PTR_ERR(pdev);
 
+	/*
+	 * use the first registered sysmmu device for performing
+	 * dma mapping operations on iommu page tables (cpu cache flush)
+	 */
+	if (!dma_dev)
+		dma_dev = &pdev->dev;
+
 	of_iommu_set_ops(np, &exynos_iommu_ops);
 	return 0;
 }
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
new file mode 100644
index 000000000000..9488e3c97bcb
--- /dev/null
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -0,0 +1,846 @@
+/*
+ * CPU-agnostic ARM page table allocator.
+ *
+ * ARMv7 Short-descriptor format, supporting
+ * - Basic memory attributes
+ * - Simplified access permissions (AP[2:1] model)
+ * - Backwards-compatible TEX remap
+ * - Large pages/supersections (if indicated by the caller)
+ *
+ * Not supporting:
+ * - Legacy access permissions (AP[2:0] model)
+ *
+ * Almost certainly never supporting:
+ * - PXN
+ * - Domains
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (C) 2014-2015 ARM Limited
+ * Copyright (c) 2014-2015 MediaTek Inc.
+ */
+
+#define pr_fmt(fmt)	"arm-v7s io-pgtable: " fmt
+
+#include <linux/dma-mapping.h>
+#include <linux/gfp.h>
+#include <linux/iommu.h>
+#include <linux/kernel.h>
+#include <linux/kmemleak.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/barrier.h>
+
+#include "io-pgtable.h"
+
+/* Struct accessors */
+#define io_pgtable_to_data(x)						\
+	container_of((x), struct arm_v7s_io_pgtable, iop)
+
+#define io_pgtable_ops_to_data(x)					\
+	io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
+
+/*
+ * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2,
+ * and 12 bits in a page. With some carefully-chosen coefficients we can
+ * hide the ugly inconsistencies behind these macros and at least let the
+ * rest of the code pretend to be somewhat sane.
+ */
+#define ARM_V7S_ADDR_BITS		32
+#define _ARM_V7S_LVL_BITS(lvl)		(16 - (lvl) * 4)
+#define ARM_V7S_LVL_SHIFT(lvl)		(ARM_V7S_ADDR_BITS - (4 + 8 * (lvl)))
+#define ARM_V7S_TABLE_SHIFT		10
+
+#define ARM_V7S_PTES_PER_LVL(lvl)	(1 << _ARM_V7S_LVL_BITS(lvl))
+#define ARM_V7S_TABLE_SIZE(lvl)						\
+	(ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte))
+
+#define ARM_V7S_BLOCK_SIZE(lvl)		(1UL << ARM_V7S_LVL_SHIFT(lvl))
+#define ARM_V7S_LVL_MASK(lvl)		((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl)))
+#define ARM_V7S_TABLE_MASK		((u32)(~0U << ARM_V7S_TABLE_SHIFT))
+#define _ARM_V7S_IDX_MASK(lvl)		(ARM_V7S_PTES_PER_LVL(lvl) - 1)
+#define ARM_V7S_LVL_IDX(addr, lvl)	({				\
+	int _l = lvl;							\
+	((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \
+})
+
+/*
+ * Large page/supersection entries are effectively a block of 16 page/section
+ * entries, along the lines of the LPAE contiguous hint, but all with the
+ * same output address. For want of a better common name we'll call them
+ * "contiguous" versions of their respective page/section entries here, but
+ * noting the distinction (WRT to TLB maintenance) that they represent *one*
+ * entry repeated 16 times, not 16 separate entries (as in the LPAE case).
+ */
+#define ARM_V7S_CONT_PAGES		16
+
+/* PTE type bits: these are all mixed up with XN/PXN bits in most cases */
+#define ARM_V7S_PTE_TYPE_TABLE		0x1
+#define ARM_V7S_PTE_TYPE_PAGE		0x2
+#define ARM_V7S_PTE_TYPE_CONT_PAGE	0x1
+
+#define ARM_V7S_PTE_IS_VALID(pte)	(((pte) & 0x3) != 0)
+#define ARM_V7S_PTE_IS_TABLE(pte, lvl)	(lvl == 1 && ((pte) & ARM_V7S_PTE_TYPE_TABLE))
+
+/* Page table bits */
+#define ARM_V7S_ATTR_XN(lvl)		BIT(4 * (2 - (lvl)))
+#define ARM_V7S_ATTR_B			BIT(2)
+#define ARM_V7S_ATTR_C			BIT(3)
+#define ARM_V7S_ATTR_NS_TABLE		BIT(3)
+#define ARM_V7S_ATTR_NS_SECTION		BIT(19)
+
+#define ARM_V7S_CONT_SECTION		BIT(18)
+#define ARM_V7S_CONT_PAGE_XN_SHIFT	15
+
+/*
+ * The attribute bits are consistently ordered*, but occupy bits [17:10] of
+ * a level 1 PTE vs. bits [11:4] at level 2. Thus we define the individual
+ * fields relative to that 8-bit block, plus a total shift relative to the PTE.
+ */
+#define ARM_V7S_ATTR_SHIFT(lvl)		(16 - (lvl) * 6)
+
+#define ARM_V7S_ATTR_MASK		0xff
+#define ARM_V7S_ATTR_AP0		BIT(0)
+#define ARM_V7S_ATTR_AP1		BIT(1)
+#define ARM_V7S_ATTR_AP2		BIT(5)
+#define ARM_V7S_ATTR_S			BIT(6)
+#define ARM_V7S_ATTR_NG			BIT(7)
+#define ARM_V7S_TEX_SHIFT		2
+#define ARM_V7S_TEX_MASK		0x7
+#define ARM_V7S_ATTR_TEX(val)		(((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
+
+/* *well, except for TEX on level 2 large pages, of course :( */
+#define ARM_V7S_CONT_PAGE_TEX_SHIFT	6
+#define ARM_V7S_CONT_PAGE_TEX_MASK	(ARM_V7S_TEX_MASK << ARM_V7S_CONT_PAGE_TEX_SHIFT)
+
+/* Simplified access permissions */
+#define ARM_V7S_PTE_AF			ARM_V7S_ATTR_AP0
+#define ARM_V7S_PTE_AP_UNPRIV		ARM_V7S_ATTR_AP1
+#define ARM_V7S_PTE_AP_RDONLY		ARM_V7S_ATTR_AP2
+
+/* Register bits */
+#define ARM_V7S_RGN_NC			0
+#define ARM_V7S_RGN_WBWA		1
+#define ARM_V7S_RGN_WT			2
+#define ARM_V7S_RGN_WB			3
+
+#define ARM_V7S_PRRR_TYPE_DEVICE	1
+#define ARM_V7S_PRRR_TYPE_NORMAL	2
+#define ARM_V7S_PRRR_TR(n, type)	(((type) & 0x3) << ((n) * 2))
+#define ARM_V7S_PRRR_DS0		BIT(16)
+#define ARM_V7S_PRRR_DS1		BIT(17)
+#define ARM_V7S_PRRR_NS0		BIT(18)
+#define ARM_V7S_PRRR_NS1		BIT(19)
+#define ARM_V7S_PRRR_NOS(n)		BIT((n) + 24)
+
+#define ARM_V7S_NMRR_IR(n, attr)	(((attr) & 0x3) << ((n) * 2))
+#define ARM_V7S_NMRR_OR(n, attr)	(((attr) & 0x3) << ((n) * 2 + 16))
+
+#define ARM_V7S_TTBR_S			BIT(1)
+#define ARM_V7S_TTBR_NOS		BIT(5)
+#define ARM_V7S_TTBR_ORGN_ATTR(attr)	(((attr) & 0x3) << 3)
+#define ARM_V7S_TTBR_IRGN_ATTR(attr)					\
+	((((attr) & 0x1) << 6) | (((attr) & 0x2) >> 1))
+
+#define ARM_V7S_TCR_PD1			BIT(5)
+
+typedef u32 arm_v7s_iopte;
+
+static bool selftest_running;
+
+struct arm_v7s_io_pgtable {
+	struct io_pgtable	iop;
+
+	arm_v7s_iopte		*pgd;
+	struct kmem_cache	*l2_tables;
+};
+
+static dma_addr_t __arm_v7s_dma_addr(void *pages)
+{
+	return (dma_addr_t)virt_to_phys(pages);
+}
+
+static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl)
+{
+	if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
+		pte &= ARM_V7S_TABLE_MASK;
+	else
+		pte &= ARM_V7S_LVL_MASK(lvl);
+	return phys_to_virt(pte);
+}
+
+static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
+				   struct arm_v7s_io_pgtable *data)
+{
+	struct device *dev = data->iop.cfg.iommu_dev;
+	dma_addr_t dma;
+	size_t size = ARM_V7S_TABLE_SIZE(lvl);
+	void *table = NULL;
+
+	if (lvl == 1)
+		table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size));
+	else if (lvl == 2)
+		table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA);
+	if (table && !selftest_running) {
+		dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma))
+			goto out_free;
+		/*
+		 * We depend on the IOMMU being able to work with any physical
+		 * address directly, so if the DMA layer suggests otherwise by
+		 * translating or truncating them, that bodes very badly...
+		 */
+		if (dma != virt_to_phys(table))
+			goto out_unmap;
+	}
+	kmemleak_ignore(table);
+	return table;
+
+out_unmap:
+	dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
+	dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
+out_free:
+	if (lvl == 1)
+		free_pages((unsigned long)table, get_order(size));
+	else
+		kmem_cache_free(data->l2_tables, table);
+	return NULL;
+}
+
+static void __arm_v7s_free_table(void *table, int lvl,
+				 struct arm_v7s_io_pgtable *data)
+{
+	struct device *dev = data->iop.cfg.iommu_dev;
+	size_t size = ARM_V7S_TABLE_SIZE(lvl);
+
+	if (!selftest_running)
+		dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
+				 DMA_TO_DEVICE);
+	if (lvl == 1)
+		free_pages((unsigned long)table, get_order(size));
+	else
+		kmem_cache_free(data->l2_tables, table);
+}
+
+static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
+			       struct io_pgtable_cfg *cfg)
+{
+	if (selftest_running)
+		return;
+
+	dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
+				   num_entries * sizeof(*ptep), DMA_TO_DEVICE);
+}
+static void __arm_v7s_set_pte(arm_v7s_iopte *ptep, arm_v7s_iopte pte,
+			      int num_entries, struct io_pgtable_cfg *cfg)
+{
+	int i;
+
+	for (i = 0; i < num_entries; i++)
+		ptep[i] = pte;
+
+	__arm_v7s_pte_sync(ptep, num_entries, cfg);
+}
+
+static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
+					 struct io_pgtable_cfg *cfg)
+{
+	bool ap = !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS);
+	arm_v7s_iopte pte = ARM_V7S_ATTR_NG | ARM_V7S_ATTR_S |
+			    ARM_V7S_ATTR_TEX(1);
+
+	if (ap) {
+		pte |= ARM_V7S_PTE_AF | ARM_V7S_PTE_AP_UNPRIV;
+		if (!(prot & IOMMU_WRITE))
+			pte |= ARM_V7S_PTE_AP_RDONLY;
+	}
+	pte <<= ARM_V7S_ATTR_SHIFT(lvl);
+
+	if ((prot & IOMMU_NOEXEC) && ap)
+		pte |= ARM_V7S_ATTR_XN(lvl);
+	if (prot & IOMMU_CACHE)
+		pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C;
+
+	return pte;
+}
+
+static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
+{
+	int prot = IOMMU_READ;
+
+	if (pte & (ARM_V7S_PTE_AP_RDONLY << ARM_V7S_ATTR_SHIFT(lvl)))
+		prot |= IOMMU_WRITE;
+	if (pte & ARM_V7S_ATTR_C)
+		prot |= IOMMU_CACHE;
+
+	return prot;
+}
+
+static arm_v7s_iopte arm_v7s_pte_to_cont(arm_v7s_iopte pte, int lvl)
+{
+	if (lvl == 1) {
+		pte |= ARM_V7S_CONT_SECTION;
+	} else if (lvl == 2) {
+		arm_v7s_iopte xn = pte & ARM_V7S_ATTR_XN(lvl);
+		arm_v7s_iopte tex = pte & ARM_V7S_CONT_PAGE_TEX_MASK;
+
+		pte ^= xn | tex | ARM_V7S_PTE_TYPE_PAGE;
+		pte |= (xn << ARM_V7S_CONT_PAGE_XN_SHIFT) |
+		       (tex << ARM_V7S_CONT_PAGE_TEX_SHIFT) |
+		       ARM_V7S_PTE_TYPE_CONT_PAGE;
+	}
+	return pte;
+}
+
+static arm_v7s_iopte arm_v7s_cont_to_pte(arm_v7s_iopte pte, int lvl)
+{
+	if (lvl == 1) {
+		pte &= ~ARM_V7S_CONT_SECTION;
+	} else if (lvl == 2) {
+		arm_v7s_iopte xn = pte & BIT(ARM_V7S_CONT_PAGE_XN_SHIFT);
+		arm_v7s_iopte tex = pte & (ARM_V7S_CONT_PAGE_TEX_MASK <<
+					   ARM_V7S_CONT_PAGE_TEX_SHIFT);
+
+		pte ^= xn | tex | ARM_V7S_PTE_TYPE_CONT_PAGE;
+		pte |= (xn >> ARM_V7S_CONT_PAGE_XN_SHIFT) |
+		       (tex >> ARM_V7S_CONT_PAGE_TEX_SHIFT) |
+		       ARM_V7S_PTE_TYPE_PAGE;
+	}
+	return pte;
+}
+
+static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
+{
+	if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte, lvl))
+		return pte & ARM_V7S_CONT_SECTION;
+	else if (lvl == 2)
+		return !(pte & ARM_V7S_PTE_TYPE_PAGE);
+	return false;
+}
+
+static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
+			   size_t, int, arm_v7s_iopte *);
+
+static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
+			    unsigned long iova, phys_addr_t paddr, int prot,
+			    int lvl, int num_entries, arm_v7s_iopte *ptep)
+{
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	arm_v7s_iopte pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
+	int i;
+
+	for (i = 0; i < num_entries; i++)
+		if (ARM_V7S_PTE_IS_TABLE(ptep[i], lvl)) {
+			/*
+			 * We need to unmap and free the old table before
+			 * overwriting it with a block entry.
+			 */
+			arm_v7s_iopte *tblp;
+			size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
+
+			tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
+			if (WARN_ON(__arm_v7s_unmap(data, iova + i * sz,
+						    sz, lvl, tblp) != sz))
+				return -EINVAL;
+		} else if (ptep[i]) {
+			/* We require an unmap first */
+			WARN_ON(!selftest_running);
+			return -EEXIST;
+		}
+
+	pte |= ARM_V7S_PTE_TYPE_PAGE;
+	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
+		pte |= ARM_V7S_ATTR_NS_SECTION;
+
+	if (num_entries > 1)
+		pte = arm_v7s_pte_to_cont(pte, lvl);
+
+	pte |= paddr & ARM_V7S_LVL_MASK(lvl);
+
+	__arm_v7s_set_pte(ptep, pte, num_entries, cfg);
+	return 0;
+}
+
+static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
+			 phys_addr_t paddr, size_t size, int prot,
+			 int lvl, arm_v7s_iopte *ptep)
+{
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
+	arm_v7s_iopte pte, *cptep;
+	int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
+
+	/* Find our entry at the current level */
+	ptep += ARM_V7S_LVL_IDX(iova, lvl);
+
+	/* If we can install a leaf entry at this level, then do so */
+	if (num_entries)
+		return arm_v7s_init_pte(data, iova, paddr, prot,
+					lvl, num_entries, ptep);
+
+	/* We can't allocate tables at the final level */
+	if (WARN_ON(lvl == 2))
+		return -EINVAL;
+
+	/* Grab a pointer to the next level */
+	pte = *ptep;
+	if (!pte) {
+		cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data);
+		if (!cptep)
+			return -ENOMEM;
+
+		pte = virt_to_phys(cptep) | ARM_V7S_PTE_TYPE_TABLE;
+		if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
+			pte |= ARM_V7S_ATTR_NS_TABLE;
+
+		__arm_v7s_set_pte(ptep, pte, 1, cfg);
+	} else {
+		cptep = iopte_deref(pte, lvl);
+	}
+
+	/* Rinse, repeat */
+	return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep);
+}
+
+static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
+			phys_addr_t paddr, size_t size, int prot)
+{
+	struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+	struct io_pgtable *iop = &data->iop;
+	int ret;
+
+	/* If no access, then nothing to do */
+	if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+		return 0;
+
+	ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd);
+	/*
+	 * Synchronise all PTE updates for the new mapping before there's
+	 * a chance for anything to kick off a table walk for the new iova.
+	 */
+	if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
+		io_pgtable_tlb_add_flush(iop, iova, size,
+					 ARM_V7S_BLOCK_SIZE(2), false);
+		io_pgtable_tlb_sync(iop);
+	} else {
+		wmb();
+	}
+
+	return ret;
+}
+
+static void arm_v7s_free_pgtable(struct io_pgtable *iop)
+{
+	struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
+	int i;
+
+	for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) {
+		arm_v7s_iopte pte = data->pgd[i];
+
+		if (ARM_V7S_PTE_IS_TABLE(pte, 1))
+			__arm_v7s_free_table(iopte_deref(pte, 1), 2, data);
+	}
+	__arm_v7s_free_table(data->pgd, 1, data);
+	kmem_cache_destroy(data->l2_tables);
+	kfree(data);
+}
+
+static void arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
+			       unsigned long iova, int idx, int lvl,
+			       arm_v7s_iopte *ptep)
+{
+	struct io_pgtable *iop = &data->iop;
+	arm_v7s_iopte pte;
+	size_t size = ARM_V7S_BLOCK_SIZE(lvl);
+	int i;
+
+	ptep -= idx & (ARM_V7S_CONT_PAGES - 1);
+	pte = arm_v7s_cont_to_pte(*ptep, lvl);
+	for (i = 0; i < ARM_V7S_CONT_PAGES; i++) {
+		ptep[i] = pte;
+		pte += size;
+	}
+
+	__arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
+
+	size *= ARM_V7S_CONT_PAGES;
+	io_pgtable_tlb_add_flush(iop, iova, size, size, true);
+	io_pgtable_tlb_sync(iop);
+}
+
+static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
+				   unsigned long iova, size_t size,
+				   arm_v7s_iopte *ptep)
+{
+	unsigned long blk_start, blk_end, blk_size;
+	phys_addr_t blk_paddr;
+	arm_v7s_iopte table = 0;
+	int prot = arm_v7s_pte_to_prot(*ptep, 1);
+
+	blk_size = ARM_V7S_BLOCK_SIZE(1);
+	blk_start = iova & ARM_V7S_LVL_MASK(1);
+	blk_end = blk_start + ARM_V7S_BLOCK_SIZE(1);
+	blk_paddr = *ptep & ARM_V7S_LVL_MASK(1);
+
+	for (; blk_start < blk_end; blk_start += size, blk_paddr += size) {
+		arm_v7s_iopte *tablep;
+
+		/* Unmap! */
+		if (blk_start == iova)
+			continue;
+
+		/* __arm_v7s_map expects a pointer to the start of the table */
+		tablep = &table - ARM_V7S_LVL_IDX(blk_start, 1);
+		if (__arm_v7s_map(data, blk_start, blk_paddr, size, prot, 1,
+				  tablep) < 0) {
+			if (table) {
+				/* Free the table we allocated */
+				tablep = iopte_deref(table, 1);
+				__arm_v7s_free_table(tablep, 2, data);
+			}
+			return 0; /* Bytes unmapped */
+		}
+	}
+
+	__arm_v7s_set_pte(ptep, table, 1, &data->iop.cfg);
+	iova &= ~(blk_size - 1);
+	io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true);
+	return size;
+}
+
+static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
+			    unsigned long iova, size_t size, int lvl,
+			    arm_v7s_iopte *ptep)
+{
+	arm_v7s_iopte pte[ARM_V7S_CONT_PAGES];
+	struct io_pgtable *iop = &data->iop;
+	int idx, i = 0, num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
+
+	/* Something went horribly wrong and we ran out of page table */
+	if (WARN_ON(lvl > 2))
+		return 0;
+
+	idx = ARM_V7S_LVL_IDX(iova, lvl);
+	ptep += idx;
+	do {
+		if (WARN_ON(!ARM_V7S_PTE_IS_VALID(ptep[i])))
+			return 0;
+		pte[i] = ptep[i];
+	} while (++i < num_entries);
+
+	/*
+	 * If we've hit a contiguous 'large page' entry at this level, it
+	 * needs splitting first, unless we're unmapping the whole lot.
+	 */
+	if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl))
+		arm_v7s_split_cont(data, iova, idx, lvl, ptep);
+
+	/* If the size matches this level, we're in the right place */
+	if (num_entries) {
+		size_t blk_size = ARM_V7S_BLOCK_SIZE(lvl);
+
+		__arm_v7s_set_pte(ptep, 0, num_entries, &iop->cfg);
+
+		for (i = 0; i < num_entries; i++) {
+			if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) {
+				/* Also flush any partial walks */
+				io_pgtable_tlb_add_flush(iop, iova, blk_size,
+					ARM_V7S_BLOCK_SIZE(lvl + 1), false);
+				io_pgtable_tlb_sync(iop);
+				ptep = iopte_deref(pte[i], lvl);
+				__arm_v7s_free_table(ptep, lvl + 1, data);
+			} else {
+				io_pgtable_tlb_add_flush(iop, iova, blk_size,
+							 blk_size, true);
+			}
+			iova += blk_size;
+		}
+		return size;
+	} else if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte[0], lvl)) {
+		/*
+		 * Insert a table at the next level to map the old region,
+		 * minus the part we want to unmap
+		 */
+		return arm_v7s_split_blk_unmap(data, iova, size, ptep);
+	}
+
+	/* Keep on walkin' */
+	ptep = iopte_deref(pte[0], lvl);
+	return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep);
+}
+
+static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+			 size_t size)
+{
+	struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+	size_t unmapped;
+
+	unmapped = __arm_v7s_unmap(data, iova, size, 1, data->pgd);
+	if (unmapped)
+		io_pgtable_tlb_sync(&data->iop);
+
+	return unmapped;
+}
+
+static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
+					unsigned long iova)
+{
+	struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+	arm_v7s_iopte *ptep = data->pgd, pte;
+	int lvl = 0;
+	u32 mask;
+
+	do {
+		pte = ptep[ARM_V7S_LVL_IDX(iova, ++lvl)];
+		ptep = iopte_deref(pte, lvl);
+	} while (ARM_V7S_PTE_IS_TABLE(pte, lvl));
+
+	if (!ARM_V7S_PTE_IS_VALID(pte))
+		return 0;
+
+	mask = ARM_V7S_LVL_MASK(lvl);
+	if (arm_v7s_pte_is_cont(pte, lvl))
+		mask *= ARM_V7S_CONT_PAGES;
+	return (pte & mask) | (iova & ~mask);
+}
+
+static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
+						void *cookie)
+{
+	struct arm_v7s_io_pgtable *data;
+
+	if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
+		return NULL;
+
+	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
+			    IO_PGTABLE_QUIRK_NO_PERMS |
+			    IO_PGTABLE_QUIRK_TLBI_ON_MAP))
+		return NULL;
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return NULL;
+
+	data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
+					    ARM_V7S_TABLE_SIZE(2),
+					    ARM_V7S_TABLE_SIZE(2),
+					    SLAB_CACHE_DMA, NULL);
+	if (!data->l2_tables)
+		goto out_free_data;
+
+	data->iop.ops = (struct io_pgtable_ops) {
+		.map		= arm_v7s_map,
+		.unmap		= arm_v7s_unmap,
+		.iova_to_phys	= arm_v7s_iova_to_phys,
+	};
+
+	/* We have to do this early for __arm_v7s_alloc_table to work... */
+	data->iop.cfg = *cfg;
+
+	/*
+	 * Unless the IOMMU driver indicates supersection support by
+	 * having SZ_16M set in the initial bitmap, they won't be used.
+	 */
+	cfg->pgsize_bitmap &= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
+
+	/* TCR: T0SZ=0, disable TTBR1 */
+	cfg->arm_v7s_cfg.tcr = ARM_V7S_TCR_PD1;
+
+	/*
+	 * TEX remap: the indices used map to the closest equivalent types
+	 * under the non-TEX-remap interpretation of those attribute bits,
+	 * excepting various implementation-defined aspects of shareability.
+	 */
+	cfg->arm_v7s_cfg.prrr = ARM_V7S_PRRR_TR(1, ARM_V7S_PRRR_TYPE_DEVICE) |
+				ARM_V7S_PRRR_TR(4, ARM_V7S_PRRR_TYPE_NORMAL) |
+				ARM_V7S_PRRR_TR(7, ARM_V7S_PRRR_TYPE_NORMAL) |
+				ARM_V7S_PRRR_DS0 | ARM_V7S_PRRR_DS1 |
+				ARM_V7S_PRRR_NS1 | ARM_V7S_PRRR_NOS(7);
+	cfg->arm_v7s_cfg.nmrr = ARM_V7S_NMRR_IR(7, ARM_V7S_RGN_WBWA) |
+				ARM_V7S_NMRR_OR(7, ARM_V7S_RGN_WBWA);
+
+	/* Looking good; allocate a pgd */
+	data->pgd = __arm_v7s_alloc_table(1, GFP_KERNEL, data);
+	if (!data->pgd)
+		goto out_free_data;
+
+	/* Ensure the empty pgd is visible before any actual TTBR write */
+	wmb();
+
+	/* TTBRs */
+	cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) |
+				   ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS |
+				   ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
+				   ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA);
+	cfg->arm_v7s_cfg.ttbr[1] = 0;
+	return &data->iop;
+
+out_free_data:
+	kmem_cache_destroy(data->l2_tables);
+	kfree(data);
+	return NULL;
+}
+
+struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
+	.alloc	= arm_v7s_alloc_pgtable,
+	.free	= arm_v7s_free_pgtable,
+};
+
+#ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
+
+static struct io_pgtable_cfg *cfg_cookie;
+
+static void dummy_tlb_flush_all(void *cookie)
+{
+	WARN_ON(cookie != cfg_cookie);
+}
+
+static void dummy_tlb_add_flush(unsigned long iova, size_t size,
+				size_t granule, bool leaf, void *cookie)
+{
+	WARN_ON(cookie != cfg_cookie);
+	WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
+}
+
+static void dummy_tlb_sync(void *cookie)
+{
+	WARN_ON(cookie != cfg_cookie);
+}
+
+static struct iommu_gather_ops dummy_tlb_ops = {
+	.tlb_flush_all	= dummy_tlb_flush_all,
+	.tlb_add_flush	= dummy_tlb_add_flush,
+	.tlb_sync	= dummy_tlb_sync,
+};
+
+#define __FAIL(ops)	({				\
+		WARN(1, "selftest: test failed\n");	\
+		selftest_running = false;		\
+		-EFAULT;				\
+})
+
+static int __init arm_v7s_do_selftests(void)
+{
+	struct io_pgtable_ops *ops;
+	struct io_pgtable_cfg cfg = {
+		.tlb = &dummy_tlb_ops,
+		.oas = 32,
+		.ias = 32,
+		.quirks = IO_PGTABLE_QUIRK_ARM_NS,
+		.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
+	};
+	unsigned int iova, size, iova_start;
+	unsigned int i, loopnr = 0;
+
+	selftest_running = true;
+
+	cfg_cookie = &cfg;
+
+	ops = alloc_io_pgtable_ops(ARM_V7S, &cfg, &cfg);
+	if (!ops) {
+		pr_err("selftest: failed to allocate io pgtable ops\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Initial sanity checks.
+	 * Empty page tables shouldn't provide any translations.
+	 */
+	if (ops->iova_to_phys(ops, 42))
+		return __FAIL(ops);
+
+	if (ops->iova_to_phys(ops, SZ_1G + 42))
+		return __FAIL(ops);
+
+	if (ops->iova_to_phys(ops, SZ_2G + 42))
+		return __FAIL(ops);
+
+	/*
+	 * Distinct mappings of different granule sizes.
+	 */
+	iova = 0;
+	i = find_first_bit(&cfg.pgsize_bitmap, BITS_PER_LONG);
+	while (i != BITS_PER_LONG) {
+		size = 1UL << i;
+		if (ops->map(ops, iova, iova, size, IOMMU_READ |
+						    IOMMU_WRITE |
+						    IOMMU_NOEXEC |
+						    IOMMU_CACHE))
+			return __FAIL(ops);
+
+		/* Overlapping mappings */
+		if (!ops->map(ops, iova, iova + size, size,
+			      IOMMU_READ | IOMMU_NOEXEC))
+			return __FAIL(ops);
+
+		if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
+			return __FAIL(ops);
+
+		iova += SZ_16M;
+		i++;
+		i = find_next_bit(&cfg.pgsize_bitmap, BITS_PER_LONG, i);
+		loopnr++;
+	}
+
+	/* Partial unmap */
+	i = 1;
+	size = 1UL << __ffs(cfg.pgsize_bitmap);
+	while (i < loopnr) {
+		iova_start = i * SZ_16M;
+		if (ops->unmap(ops, iova_start + size, size) != size)
+			return __FAIL(ops);
+
+		/* Remap of partial unmap */
+		if (ops->map(ops, iova_start + size, size, size, IOMMU_READ))
+			return __FAIL(ops);
+
+		if (ops->iova_to_phys(ops, iova_start + size + 42)
+		    != (size + 42))
+			return __FAIL(ops);
+		i++;
+	}
+
+	/* Full unmap */
+	iova = 0;
+	i = find_first_bit(&cfg.pgsize_bitmap, BITS_PER_LONG);
+	while (i != BITS_PER_LONG) {
+		size = 1UL << i;
+
+		if (ops->unmap(ops, iova, size) != size)
+			return __FAIL(ops);
+
+		if (ops->iova_to_phys(ops, iova + 42))
+			return __FAIL(ops);
+
+		/* Remap full block */
+		if (ops->map(ops, iova, iova, size, IOMMU_WRITE))
+			return __FAIL(ops);
+
+		if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
+			return __FAIL(ops);
+
+		iova += SZ_16M;
+		i++;
+		i = find_next_bit(&cfg.pgsize_bitmap, BITS_PER_LONG, i);
+	}
+
+	free_io_pgtable_ops(ops);
+
+	selftest_running = false;
+
+	pr_info("self test ok\n");
+	return 0;
+}
+subsys_initcall(arm_v7s_do_selftests);
+#endif
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 381ca5a37a7b..f433b516098a 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -446,7 +446,6 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	unsigned long blk_start, blk_end;
 	phys_addr_t blk_paddr;
 	arm_lpae_iopte table = 0;
-	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
 	blk_start = iova & ~(blk_size - 1);
 	blk_end = blk_start + blk_size;
@@ -472,9 +471,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 		}
 	}
 
-	__arm_lpae_set_pte(ptep, table, cfg);
+	__arm_lpae_set_pte(ptep, table, &data->iop.cfg);
 	iova &= ~(blk_size - 1);
-	cfg->tlb->tlb_add_flush(iova, blk_size, blk_size, true, data->iop.cookie);
+	io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true);
 	return size;
 }
 
@@ -483,8 +482,7 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 			    arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte pte;
-	const struct iommu_gather_ops *tlb = data->iop.cfg.tlb;
-	void *cookie = data->iop.cookie;
+	struct io_pgtable *iop = &data->iop;
 	size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
 	/* Something went horribly wrong and we ran out of page table */
@@ -498,17 +496,17 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 
 	/* If the size matches this level, we're in the right place */
 	if (size == blk_size) {
-		__arm_lpae_set_pte(ptep, 0, &data->iop.cfg);
+		__arm_lpae_set_pte(ptep, 0, &iop->cfg);
 
 		if (!iopte_leaf(pte, lvl)) {
 			/* Also flush any partial walks */
-			tlb->tlb_add_flush(iova, size, ARM_LPAE_GRANULE(data),
-					   false, cookie);
-			tlb->tlb_sync(cookie);
+			io_pgtable_tlb_add_flush(iop, iova, size,
+						ARM_LPAE_GRANULE(data), false);
+			io_pgtable_tlb_sync(iop);
 			ptep = iopte_deref(pte, data);
 			__arm_lpae_free_pgtable(data, lvl + 1, ptep);
 		} else {
-			tlb->tlb_add_flush(iova, size, size, true, cookie);
+			io_pgtable_tlb_add_flush(iop, iova, size, size, true);
 		}
 
 		return size;
@@ -532,13 +530,12 @@ static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
 {
 	size_t unmapped;
 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
-	struct io_pgtable *iop = &data->iop;
 	arm_lpae_iopte *ptep = data->pgd;
 	int lvl = ARM_LPAE_START_LVL(data);
 
 	unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep);
 	if (unmapped)
-		iop->cfg.tlb->tlb_sync(iop->cookie);
+		io_pgtable_tlb_sync(&data->iop);
 
 	return unmapped;
 }
@@ -662,8 +659,12 @@ static struct io_pgtable *
 arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 {
 	u64 reg;
-	struct arm_lpae_io_pgtable *data = arm_lpae_alloc_pgtable(cfg);
+	struct arm_lpae_io_pgtable *data;
 
+	if (cfg->quirks & ~IO_PGTABLE_QUIRK_ARM_NS)
+		return NULL;
+
+	data = arm_lpae_alloc_pgtable(cfg);
 	if (!data)
 		return NULL;
 
@@ -746,8 +747,13 @@ static struct io_pgtable *
 arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 {
 	u64 reg, sl;
-	struct arm_lpae_io_pgtable *data = arm_lpae_alloc_pgtable(cfg);
+	struct arm_lpae_io_pgtable *data;
+
+	/* The NS quirk doesn't apply at stage 2 */
+	if (cfg->quirks)
+		return NULL;
 
+	data = arm_lpae_alloc_pgtable(cfg);
 	if (!data)
 		return NULL;
 
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 6f2e319d4f04..876f6a76d288 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -33,6 +33,9 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] =
 	[ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns,
 	[ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns,
 #endif
+#ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S
+	[ARM_V7S] = &io_pgtable_arm_v7s_init_fns,
+#endif
 };
 
 struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
@@ -72,6 +75,6 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops)
 		return;
 
 	iop = container_of(ops, struct io_pgtable, ops);
-	iop->cfg.tlb->tlb_flush_all(iop->cookie);
+	io_pgtable_tlb_flush_all(iop);
 	io_pgtable_init_table[iop->fmt]->free(iop);
 }
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 36673c83de58..d4f502742e3b 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -1,5 +1,6 @@
 #ifndef __IO_PGTABLE_H
 #define __IO_PGTABLE_H
+#include <linux/bitops.h>
 
 /*
  * Public API for use by IOMMU drivers
@@ -9,6 +10,7 @@ enum io_pgtable_fmt {
 	ARM_32_LPAE_S2,
 	ARM_64_LPAE_S1,
 	ARM_64_LPAE_S2,
+	ARM_V7S,
 	IO_PGTABLE_NUM_FMTS,
 };
 
@@ -45,8 +47,24 @@ struct iommu_gather_ops {
  *                 page table walker.
  */
 struct io_pgtable_cfg {
-	#define IO_PGTABLE_QUIRK_ARM_NS	(1 << 0)	/* Set NS bit in PTEs */
-	int				quirks;
+	/*
+	 * IO_PGTABLE_QUIRK_ARM_NS: (ARM formats) Set NS and NSTABLE bits in
+	 *	stage 1 PTEs, for hardware which insists on validating them
+	 *	even in	non-secure state where they should normally be ignored.
+	 *
+	 * IO_PGTABLE_QUIRK_NO_PERMS: Ignore the IOMMU_READ, IOMMU_WRITE and
+	 *	IOMMU_NOEXEC flags and map everything with full access, for
+	 *	hardware which does not implement the permissions of a given
+	 *	format, and/or requires some format-specific default value.
+	 *
+	 * IO_PGTABLE_QUIRK_TLBI_ON_MAP: If the format forbids caching invalid
+	 *	(unmapped) entries but the hardware might do so anyway, perform
+	 *	TLB maintenance when mapping as well as when unmapping.
+	 */
+	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
+	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
+	#define IO_PGTABLE_QUIRK_TLBI_ON_MAP	BIT(2)
+	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
 	unsigned int			ias;
 	unsigned int			oas;
@@ -65,6 +83,13 @@ struct io_pgtable_cfg {
 			u64	vttbr;
 			u64	vtcr;
 		} arm_lpae_s2_cfg;
+
+		struct {
+			u32	ttbr[2];
+			u32	tcr;
+			u32	nmrr;
+			u32	prrr;
+		} arm_v7s_cfg;
 	};
 };
 
@@ -121,18 +146,41 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops);
  * @fmt:    The page table format.
  * @cookie: An opaque token provided by the IOMMU driver and passed back to
  *          any callback routines.
+ * @tlb_sync_pending: Private flag for optimising out redundant syncs.
  * @cfg:    A copy of the page table configuration.
  * @ops:    The page table operations in use for this set of page tables.
  */
 struct io_pgtable {
 	enum io_pgtable_fmt	fmt;
 	void			*cookie;
+	bool			tlb_sync_pending;
 	struct io_pgtable_cfg	cfg;
 	struct io_pgtable_ops	ops;
 };
 
 #define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops)
 
+static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
+{
+	iop->cfg.tlb->tlb_flush_all(iop->cookie);
+	iop->tlb_sync_pending = true;
+}
+
+static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop,
+		unsigned long iova, size_t size, size_t granule, bool leaf)
+{
+	iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie);
+	iop->tlb_sync_pending = true;
+}
+
+static inline void io_pgtable_tlb_sync(struct io_pgtable *iop)
+{
+	if (iop->tlb_sync_pending) {
+		iop->cfg.tlb->tlb_sync(iop->cookie);
+		iop->tlb_sync_pending = false;
+	}
+}
+
 /**
  * struct io_pgtable_init_fns - Alloc/free a set of page tables for a
  *                              particular format.
@@ -149,5 +197,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns;
 extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
 
 #endif /* __IO_PGTABLE_H */
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0e3b0092ec92..bfd4f7c3b1d8 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1314,6 +1314,7 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 	unsigned long orig_iova = iova;
 	unsigned int min_pagesz;
 	size_t orig_size = size;
+	phys_addr_t orig_paddr = paddr;
 	int ret = 0;
 
 	if (unlikely(domain->ops->map == NULL ||
@@ -1358,7 +1359,7 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 	if (ret)
 		iommu_unmap(domain, orig_iova, orig_size - size);
 	else
-		trace_map(orig_iova, paddr, orig_size);
+		trace_map(orig_iova, orig_paddr, orig_size);
 
 	return ret;
 }
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
new file mode 100644
index 000000000000..929a66a81b2b
--- /dev/null
+++ b/drivers/iommu/mtk_iommu.c
@@ -0,0 +1,736 @@
+/*
+ * Copyright (c) 2015-2016 MediaTek Inc.
+ * Author: Yong Wu <yong.wu@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/bug.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/device.h>
+#include <linux/dma-iommu.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/of_address.h>
+#include <linux/of_iommu.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <asm/barrier.h>
+#include <dt-bindings/memory/mt8173-larb-port.h>
+#include <soc/mediatek/smi.h>
+
+#include "io-pgtable.h"
+
+#define REG_MMU_PT_BASE_ADDR			0x000
+
+#define REG_MMU_INVALIDATE			0x020
+#define F_ALL_INVLD				0x2
+#define F_MMU_INV_RANGE				0x1
+
+#define REG_MMU_INVLD_START_A			0x024
+#define REG_MMU_INVLD_END_A			0x028
+
+#define REG_MMU_INV_SEL				0x038
+#define F_INVLD_EN0				BIT(0)
+#define F_INVLD_EN1				BIT(1)
+
+#define REG_MMU_STANDARD_AXI_MODE		0x048
+#define REG_MMU_DCM_DIS				0x050
+
+#define REG_MMU_CTRL_REG			0x110
+#define F_MMU_PREFETCH_RT_REPLACE_MOD		BIT(4)
+#define F_MMU_TF_PROTECT_SEL(prot)		(((prot) & 0x3) << 5)
+
+#define REG_MMU_IVRP_PADDR			0x114
+#define F_MMU_IVRP_PA_SET(pa)			((pa) >> 1)
+
+#define REG_MMU_INT_CONTROL0			0x120
+#define F_L2_MULIT_HIT_EN			BIT(0)
+#define F_TABLE_WALK_FAULT_INT_EN		BIT(1)
+#define F_PREETCH_FIFO_OVERFLOW_INT_EN		BIT(2)
+#define F_MISS_FIFO_OVERFLOW_INT_EN		BIT(3)
+#define F_PREFETCH_FIFO_ERR_INT_EN		BIT(5)
+#define F_MISS_FIFO_ERR_INT_EN			BIT(6)
+#define F_INT_CLR_BIT				BIT(12)
+
+#define REG_MMU_INT_MAIN_CONTROL		0x124
+#define F_INT_TRANSLATION_FAULT			BIT(0)
+#define F_INT_MAIN_MULTI_HIT_FAULT		BIT(1)
+#define F_INT_INVALID_PA_FAULT			BIT(2)
+#define F_INT_ENTRY_REPLACEMENT_FAULT		BIT(3)
+#define F_INT_TLB_MISS_FAULT			BIT(4)
+#define F_INT_MISS_TRANSACTION_FIFO_FAULT	BIT(5)
+#define F_INT_PRETETCH_TRANSATION_FIFO_FAULT	BIT(6)
+
+#define REG_MMU_CPE_DONE			0x12C
+
+#define REG_MMU_FAULT_ST1			0x134
+
+#define REG_MMU_FAULT_VA			0x13c
+#define F_MMU_FAULT_VA_MSK			0xfffff000
+#define F_MMU_FAULT_VA_WRITE_BIT		BIT(1)
+#define F_MMU_FAULT_VA_LAYER_BIT		BIT(0)
+
+#define REG_MMU_INVLD_PA			0x140
+#define REG_MMU_INT_ID				0x150
+#define F_MMU0_INT_ID_LARB_ID(a)		(((a) >> 7) & 0x7)
+#define F_MMU0_INT_ID_PORT_ID(a)		(((a) >> 2) & 0x1f)
+
+#define MTK_PROTECT_PA_ALIGN			128
+
+struct mtk_iommu_suspend_reg {
+	u32				standard_axi_mode;
+	u32				dcm_dis;
+	u32				ctrl_reg;
+	u32				int_control0;
+	u32				int_main_control;
+};
+
+struct mtk_iommu_client_priv {
+	struct list_head		client;
+	unsigned int			mtk_m4u_id;
+	struct device			*m4udev;
+};
+
+struct mtk_iommu_domain {
+	spinlock_t			pgtlock; /* lock for page table */
+
+	struct io_pgtable_cfg		cfg;
+	struct io_pgtable_ops		*iop;
+
+	struct iommu_domain		domain;
+};
+
+struct mtk_iommu_data {
+	void __iomem			*base;
+	int				irq;
+	struct device			*dev;
+	struct clk			*bclk;
+	phys_addr_t			protect_base; /* protect memory base */
+	struct mtk_iommu_suspend_reg	reg;
+	struct mtk_iommu_domain		*m4u_dom;
+	struct iommu_group		*m4u_group;
+	struct mtk_smi_iommu		smi_imu;      /* SMI larb iommu info */
+};
+
+static struct iommu_ops mtk_iommu_ops;
+
+static struct mtk_iommu_domain *to_mtk_domain(struct iommu_domain *dom)
+{
+	return container_of(dom, struct mtk_iommu_domain, domain);
+}
+
+static void mtk_iommu_tlb_flush_all(void *cookie)
+{
+	struct mtk_iommu_data *data = cookie;
+
+	writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL);
+	writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE);
+	wmb(); /* Make sure the tlb flush all done */
+}
+
+static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size,
+					   size_t granule, bool leaf,
+					   void *cookie)
+{
+	struct mtk_iommu_data *data = cookie;
+
+	writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0, data->base + REG_MMU_INV_SEL);
+
+	writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
+	writel_relaxed(iova + size - 1, data->base + REG_MMU_INVLD_END_A);
+	writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
+}
+
+static void mtk_iommu_tlb_sync(void *cookie)
+{
+	struct mtk_iommu_data *data = cookie;
+	int ret;
+	u32 tmp;
+
+	ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, tmp,
+					tmp != 0, 10, 100000);
+	if (ret) {
+		dev_warn(data->dev,
+			 "Partial TLB flush timed out, falling back to full flush\n");
+		mtk_iommu_tlb_flush_all(cookie);
+	}
+	/* Clear the CPE status */
+	writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
+}
+
+static const struct iommu_gather_ops mtk_iommu_gather_ops = {
+	.tlb_flush_all = mtk_iommu_tlb_flush_all,
+	.tlb_add_flush = mtk_iommu_tlb_add_flush_nosync,
+	.tlb_sync = mtk_iommu_tlb_sync,
+};
+
+static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
+{
+	struct mtk_iommu_data *data = dev_id;
+	struct mtk_iommu_domain *dom = data->m4u_dom;
+	u32 int_state, regval, fault_iova, fault_pa;
+	unsigned int fault_larb, fault_port;
+	bool layer, write;
+
+	/* Read error info from registers */
+	int_state = readl_relaxed(data->base + REG_MMU_FAULT_ST1);
+	fault_iova = readl_relaxed(data->base + REG_MMU_FAULT_VA);
+	layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
+	write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
+	fault_iova &= F_MMU_FAULT_VA_MSK;
+	fault_pa = readl_relaxed(data->base + REG_MMU_INVLD_PA);
+	regval = readl_relaxed(data->base + REG_MMU_INT_ID);
+	fault_larb = F_MMU0_INT_ID_LARB_ID(regval);
+	fault_port = F_MMU0_INT_ID_PORT_ID(regval);
+
+	if (report_iommu_fault(&dom->domain, data->dev, fault_iova,
+			       write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
+		dev_err_ratelimited(
+			data->dev,
+			"fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d layer=%d %s\n",
+			int_state, fault_iova, fault_pa, fault_larb, fault_port,
+			layer, write ? "write" : "read");
+	}
+
+	/* Interrupt clear */
+	regval = readl_relaxed(data->base + REG_MMU_INT_CONTROL0);
+	regval |= F_INT_CLR_BIT;
+	writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0);
+
+	mtk_iommu_tlb_flush_all(data);
+
+	return IRQ_HANDLED;
+}
+
+static void mtk_iommu_config(struct mtk_iommu_data *data,
+			     struct device *dev, bool enable)
+{
+	struct mtk_iommu_client_priv *head, *cur, *next;
+	struct mtk_smi_larb_iommu    *larb_mmu;
+	unsigned int                 larbid, portid;
+
+	head = dev->archdata.iommu;
+	list_for_each_entry_safe(cur, next, &head->client, client) {
+		larbid = MTK_M4U_TO_LARB(cur->mtk_m4u_id);
+		portid = MTK_M4U_TO_PORT(cur->mtk_m4u_id);
+		larb_mmu = &data->smi_imu.larb_imu[larbid];
+
+		dev_dbg(dev, "%s iommu port: %d\n",
+			enable ? "enable" : "disable", portid);
+
+		if (enable)
+			larb_mmu->mmu |= MTK_SMI_MMU_EN(portid);
+		else
+			larb_mmu->mmu &= ~MTK_SMI_MMU_EN(portid);
+	}
+}
+
+static int mtk_iommu_domain_finalise(struct mtk_iommu_data *data)
+{
+	struct mtk_iommu_domain *dom = data->m4u_dom;
+
+	spin_lock_init(&dom->pgtlock);
+
+	dom->cfg = (struct io_pgtable_cfg) {
+		.quirks = IO_PGTABLE_QUIRK_ARM_NS |
+			IO_PGTABLE_QUIRK_NO_PERMS |
+			IO_PGTABLE_QUIRK_TLBI_ON_MAP,
+		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
+		.ias = 32,
+		.oas = 32,
+		.tlb = &mtk_iommu_gather_ops,
+		.iommu_dev = data->dev,
+	};
+
+	dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
+	if (!dom->iop) {
+		dev_err(data->dev, "Failed to alloc io pgtable\n");
+		return -EINVAL;
+	}
+
+	/* Update our support page sizes bitmap */
+	mtk_iommu_ops.pgsize_bitmap = dom->cfg.pgsize_bitmap;
+
+	writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
+	       data->base + REG_MMU_PT_BASE_ADDR);
+	return 0;
+}
+
+static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
+{
+	struct mtk_iommu_domain *dom;
+
+	if (type != IOMMU_DOMAIN_DMA)
+		return NULL;
+
+	dom = kzalloc(sizeof(*dom), GFP_KERNEL);
+	if (!dom)
+		return NULL;
+
+	if (iommu_get_dma_cookie(&dom->domain)) {
+		kfree(dom);
+		return NULL;
+	}
+
+	dom->domain.geometry.aperture_start = 0;
+	dom->domain.geometry.aperture_end = DMA_BIT_MASK(32);
+	dom->domain.geometry.force_aperture = true;
+
+	return &dom->domain;
+}
+
+static void mtk_iommu_domain_free(struct iommu_domain *domain)
+{
+	iommu_put_dma_cookie(domain);
+	kfree(to_mtk_domain(domain));
+}
+
+static int mtk_iommu_attach_device(struct iommu_domain *domain,
+				   struct device *dev)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
+	struct mtk_iommu_data *data;
+	int ret;
+
+	if (!priv)
+		return -ENODEV;
+
+	data = dev_get_drvdata(priv->m4udev);
+	if (!data->m4u_dom) {
+		data->m4u_dom = dom;
+		ret = mtk_iommu_domain_finalise(data);
+		if (ret) {
+			data->m4u_dom = NULL;
+			return ret;
+		}
+	} else if (data->m4u_dom != dom) {
+		/* All the client devices should be in the same m4u domain */
+		dev_err(dev, "try to attach into the error iommu domain\n");
+		return -EPERM;
+	}
+
+	mtk_iommu_config(data, dev, true);
+	return 0;
+}
+
+static void mtk_iommu_detach_device(struct iommu_domain *domain,
+				    struct device *dev)
+{
+	struct mtk_iommu_client_priv *priv = dev->archdata.iommu;
+	struct mtk_iommu_data *data;
+
+	if (!priv)
+		return;
+
+	data = dev_get_drvdata(priv->m4udev);
+	mtk_iommu_config(data, dev, false);
+}
+
+static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
+			 phys_addr_t paddr, size_t size, int prot)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	return ret;
+}
+
+static size_t mtk_iommu_unmap(struct iommu_domain *domain,
+			      unsigned long iova, size_t size)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned long flags;
+	size_t unmapsz;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	unmapsz = dom->iop->unmap(dom->iop, iova, size);
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	return unmapsz;
+}
+
+static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
+					  dma_addr_t iova)
+{
+	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+	unsigned long flags;
+	phys_addr_t pa;
+
+	spin_lock_irqsave(&dom->pgtlock, flags);
+	pa = dom->iop->iova_to_phys(dom->iop, iova);
+	spin_unlock_irqrestore(&dom->pgtlock, flags);
+
+	return pa;
+}
+
+static int mtk_iommu_add_device(struct device *dev)
+{
+	struct iommu_group *group;
+
+	if (!dev->archdata.iommu) /* Not a iommu client device */
+		return -ENODEV;
+
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	iommu_group_put(group);
+	return 0;
+}
+
+static void mtk_iommu_remove_device(struct device *dev)
+{
+	struct mtk_iommu_client_priv *head, *cur, *next;
+
+	head = dev->archdata.iommu;
+	if (!head)
+		return;
+
+	list_for_each_entry_safe(cur, next, &head->client, client) {
+		list_del(&cur->client);
+		kfree(cur);
+	}
+	kfree(head);
+	dev->archdata.iommu = NULL;
+
+	iommu_group_remove_device(dev);
+}
+
+static struct iommu_group *mtk_iommu_device_group(struct device *dev)
+{
+	struct mtk_iommu_data *data;
+	struct mtk_iommu_client_priv *priv;
+
+	priv = dev->archdata.iommu;
+	if (!priv)
+		return ERR_PTR(-ENODEV);
+
+	/* All the client devices are in the same m4u iommu-group */
+	data = dev_get_drvdata(priv->m4udev);
+	if (!data->m4u_group) {
+		data->m4u_group = iommu_group_alloc();
+		if (IS_ERR(data->m4u_group))
+			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
+	}
+	return data->m4u_group;
+}
+
+static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+	struct mtk_iommu_client_priv *head, *priv, *next;
+	struct platform_device *m4updev;
+
+	if (args->args_count != 1) {
+		dev_err(dev, "invalid #iommu-cells(%d) property for IOMMU\n",
+			args->args_count);
+		return -EINVAL;
+	}
+
+	if (!dev->archdata.iommu) {
+		/* Get the m4u device */
+		m4updev = of_find_device_by_node(args->np);
+		of_node_put(args->np);
+		if (WARN_ON(!m4updev))
+			return -EINVAL;
+
+		head = kzalloc(sizeof(*head), GFP_KERNEL);
+		if (!head)
+			return -ENOMEM;
+
+		dev->archdata.iommu = head;
+		INIT_LIST_HEAD(&head->client);
+		head->m4udev = &m4updev->dev;
+	} else {
+		head = dev->archdata.iommu;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		goto err_free_mem;
+
+	priv->mtk_m4u_id = args->args[0];
+	list_add_tail(&priv->client, &head->client);
+
+	return 0;
+
+err_free_mem:
+	list_for_each_entry_safe(priv, next, &head->client, client)
+		kfree(priv);
+	kfree(head);
+	dev->archdata.iommu = NULL;
+	return -ENOMEM;
+}
+
+static struct iommu_ops mtk_iommu_ops = {
+	.domain_alloc	= mtk_iommu_domain_alloc,
+	.domain_free	= mtk_iommu_domain_free,
+	.attach_dev	= mtk_iommu_attach_device,
+	.detach_dev	= mtk_iommu_detach_device,
+	.map		= mtk_iommu_map,
+	.unmap		= mtk_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
+	.iova_to_phys	= mtk_iommu_iova_to_phys,
+	.add_device	= mtk_iommu_add_device,
+	.remove_device	= mtk_iommu_remove_device,
+	.device_group	= mtk_iommu_device_group,
+	.of_xlate	= mtk_iommu_of_xlate,
+	.pgsize_bitmap	= SZ_4K | SZ_64K | SZ_1M | SZ_16M,
+};
+
+static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
+{
+	u32 regval;
+	int ret;
+
+	ret = clk_prepare_enable(data->bclk);
+	if (ret) {
+		dev_err(data->dev, "Failed to enable iommu bclk(%d)\n", ret);
+		return ret;
+	}
+
+	regval = F_MMU_PREFETCH_RT_REPLACE_MOD |
+		F_MMU_TF_PROTECT_SEL(2);
+	writel_relaxed(regval, data->base + REG_MMU_CTRL_REG);
+
+	regval = F_L2_MULIT_HIT_EN |
+		F_TABLE_WALK_FAULT_INT_EN |
+		F_PREETCH_FIFO_OVERFLOW_INT_EN |
+		F_MISS_FIFO_OVERFLOW_INT_EN |
+		F_PREFETCH_FIFO_ERR_INT_EN |
+		F_MISS_FIFO_ERR_INT_EN;
+	writel_relaxed(regval, data->base + REG_MMU_INT_CONTROL0);
+
+	regval = F_INT_TRANSLATION_FAULT |
+		F_INT_MAIN_MULTI_HIT_FAULT |
+		F_INT_INVALID_PA_FAULT |
+		F_INT_ENTRY_REPLACEMENT_FAULT |
+		F_INT_TLB_MISS_FAULT |
+		F_INT_MISS_TRANSACTION_FIFO_FAULT |
+		F_INT_PRETETCH_TRANSATION_FIFO_FAULT;
+	writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL);
+
+	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base),
+		       data->base + REG_MMU_IVRP_PADDR);
+
+	writel_relaxed(0, data->base + REG_MMU_DCM_DIS);
+	writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE);
+
+	if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0,
+			     dev_name(data->dev), (void *)data)) {
+		writel_relaxed(0, data->base + REG_MMU_PT_BASE_ADDR);
+		clk_disable_unprepare(data->bclk);
+		dev_err(data->dev, "Failed @ IRQ-%d Request\n", data->irq);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int compare_of(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static int mtk_iommu_bind(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+
+	return component_bind_all(dev, &data->smi_imu);
+}
+
+static void mtk_iommu_unbind(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+
+	component_unbind_all(dev, &data->smi_imu);
+}
+
+static const struct component_master_ops mtk_iommu_com_ops = {
+	.bind		= mtk_iommu_bind,
+	.unbind		= mtk_iommu_unbind,
+};
+
+static int mtk_iommu_probe(struct platform_device *pdev)
+{
+	struct mtk_iommu_data   *data;
+	struct device           *dev = &pdev->dev;
+	struct resource         *res;
+	struct component_match  *match = NULL;
+	void                    *protect;
+	int                     i, larb_nr, ret;
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	data->dev = dev;
+
+	/* Protect memory. HW will access here while translation fault.*/
+	protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2, GFP_KERNEL);
+	if (!protect)
+		return -ENOMEM;
+	data->protect_base = ALIGN(virt_to_phys(protect), MTK_PROTECT_PA_ALIGN);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(data->base))
+		return PTR_ERR(data->base);
+
+	data->irq = platform_get_irq(pdev, 0);
+	if (data->irq < 0)
+		return data->irq;
+
+	data->bclk = devm_clk_get(dev, "bclk");
+	if (IS_ERR(data->bclk))
+		return PTR_ERR(data->bclk);
+
+	larb_nr = of_count_phandle_with_args(dev->of_node,
+					     "mediatek,larbs", NULL);
+	if (larb_nr < 0)
+		return larb_nr;
+	data->smi_imu.larb_nr = larb_nr;
+
+	for (i = 0; i < larb_nr; i++) {
+		struct device_node *larbnode;
+		struct platform_device *plarbdev;
+
+		larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i);
+		if (!larbnode)
+			return -EINVAL;
+
+		if (!of_device_is_available(larbnode))
+			continue;
+
+		plarbdev = of_find_device_by_node(larbnode);
+		of_node_put(larbnode);
+		if (!plarbdev) {
+			plarbdev = of_platform_device_create(
+						larbnode, NULL,
+						platform_bus_type.dev_root);
+			if (!plarbdev)
+				return -EPROBE_DEFER;
+		}
+		data->smi_imu.larb_imu[i].dev = &plarbdev->dev;
+
+		component_match_add(dev, &match, compare_of, larbnode);
+	}
+
+	platform_set_drvdata(pdev, data);
+
+	ret = mtk_iommu_hw_init(data);
+	if (ret)
+		return ret;
+
+	if (!iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type, &mtk_iommu_ops);
+
+	return component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
+}
+
+static int mtk_iommu_remove(struct platform_device *pdev)
+{
+	struct mtk_iommu_data *data = platform_get_drvdata(pdev);
+
+	if (iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type, NULL);
+
+	free_io_pgtable_ops(data->m4u_dom->iop);
+	clk_disable_unprepare(data->bclk);
+	devm_free_irq(&pdev->dev, data->irq, data);
+	component_master_del(&pdev->dev, &mtk_iommu_com_ops);
+	return 0;
+}
+
+static int __maybe_unused mtk_iommu_suspend(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+	struct mtk_iommu_suspend_reg *reg = &data->reg;
+	void __iomem *base = data->base;
+
+	reg->standard_axi_mode = readl_relaxed(base +
+					       REG_MMU_STANDARD_AXI_MODE);
+	reg->dcm_dis = readl_relaxed(base + REG_MMU_DCM_DIS);
+	reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG);
+	reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0);
+	reg->int_main_control = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL);
+	return 0;
+}
+
+static int __maybe_unused mtk_iommu_resume(struct device *dev)
+{
+	struct mtk_iommu_data *data = dev_get_drvdata(dev);
+	struct mtk_iommu_suspend_reg *reg = &data->reg;
+	void __iomem *base = data->base;
+
+	writel_relaxed(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
+		       base + REG_MMU_PT_BASE_ADDR);
+	writel_relaxed(reg->standard_axi_mode,
+		       base + REG_MMU_STANDARD_AXI_MODE);
+	writel_relaxed(reg->dcm_dis, base + REG_MMU_DCM_DIS);
+	writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG);
+	writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0);
+	writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL);
+	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base),
+		       base + REG_MMU_IVRP_PADDR);
+	return 0;
+}
+
+const struct dev_pm_ops mtk_iommu_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume)
+};
+
+static const struct of_device_id mtk_iommu_of_ids[] = {
+	{ .compatible = "mediatek,mt8173-m4u", },
+	{}
+};
+
+static struct platform_driver mtk_iommu_driver = {
+	.probe	= mtk_iommu_probe,
+	.remove	= mtk_iommu_remove,
+	.driver	= {
+		.name = "mtk-iommu",
+		.of_match_table = mtk_iommu_of_ids,
+		.pm = &mtk_iommu_pm_ops,
+	}
+};
+
+static int mtk_iommu_init_fn(struct device_node *np)
+{
+	int ret;
+	struct platform_device *pdev;
+
+	pdev = of_platform_device_create(np, NULL, platform_bus_type.dev_root);
+	if (!pdev)
+		return -ENOMEM;
+
+	ret = platform_driver_register(&mtk_iommu_driver);
+	if (ret) {
+		pr_err("%s: Failed to register driver\n", __func__);
+		return ret;
+	}
+
+	of_iommu_set_ops(np, &mtk_iommu_ops);
+	return 0;
+}
+
+IOMMU_OF_DECLARE(mtkm4u, "mediatek,mt8173-m4u", mtk_iommu_init_fn);
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 60ba238090d9..5fea665af99d 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -110,6 +110,7 @@ void of_iommu_set_ops(struct device_node *np, struct iommu_ops *ops)
 	if (WARN_ON(!iommu))
 		return;
 
+	of_node_get(np);
 	INIT_LIST_HEAD(&iommu->list);
 	iommu->np = np;
 	iommu->ops = ops;
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index ebf0adb8e7ea..a6f593a0a29e 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -86,7 +86,8 @@ struct rk_iommu_domain {
 
 struct rk_iommu {
 	struct device *dev;
-	void __iomem *base;
+	void __iomem **bases;
+	int num_mmu;
 	int irq;
 	struct list_head node; /* entry in rk_iommu_domain.iommus */
 	struct iommu_domain *domain; /* domain to which iommu is attached */
@@ -271,47 +272,70 @@ static u32 rk_iova_page_offset(dma_addr_t iova)
 	return (u32)(iova & RK_IOVA_PAGE_MASK) >> RK_IOVA_PAGE_SHIFT;
 }
 
-static u32 rk_iommu_read(struct rk_iommu *iommu, u32 offset)
+static u32 rk_iommu_read(void __iomem *base, u32 offset)
 {
-	return readl(iommu->base + offset);
+	return readl(base + offset);
 }
 
-static void rk_iommu_write(struct rk_iommu *iommu, u32 offset, u32 value)
+static void rk_iommu_write(void __iomem *base, u32 offset, u32 value)
 {
-	writel(value, iommu->base + offset);
+	writel(value, base + offset);
 }
 
 static void rk_iommu_command(struct rk_iommu *iommu, u32 command)
 {
-	writel(command, iommu->base + RK_MMU_COMMAND);
+	int i;
+
+	for (i = 0; i < iommu->num_mmu; i++)
+		writel(command, iommu->bases[i] + RK_MMU_COMMAND);
 }
 
+static void rk_iommu_base_command(void __iomem *base, u32 command)
+{
+	writel(command, base + RK_MMU_COMMAND);
+}
 static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova,
 			       size_t size)
 {
+	int i;
+
 	dma_addr_t iova_end = iova + size;
 	/*
 	 * TODO(djkurtz): Figure out when it is more efficient to shootdown the
 	 * entire iotlb rather than iterate over individual iovas.
 	 */
-	for (; iova < iova_end; iova += SPAGE_SIZE)
-		rk_iommu_write(iommu, RK_MMU_ZAP_ONE_LINE, iova);
+	for (i = 0; i < iommu->num_mmu; i++)
+		for (; iova < iova_end; iova += SPAGE_SIZE)
+			rk_iommu_write(iommu->bases[i], RK_MMU_ZAP_ONE_LINE, iova);
 }
 
 static bool rk_iommu_is_stall_active(struct rk_iommu *iommu)
 {
-	return rk_iommu_read(iommu, RK_MMU_STATUS) & RK_MMU_STATUS_STALL_ACTIVE;
+	bool active = true;
+	int i;
+
+	for (i = 0; i < iommu->num_mmu; i++)
+		active &= rk_iommu_read(iommu->bases[i], RK_MMU_STATUS) &
+					RK_MMU_STATUS_STALL_ACTIVE;
+
+	return active;
 }
 
 static bool rk_iommu_is_paging_enabled(struct rk_iommu *iommu)
 {
-	return rk_iommu_read(iommu, RK_MMU_STATUS) &
-			     RK_MMU_STATUS_PAGING_ENABLED;
+	bool enable = true;
+	int i;
+
+	for (i = 0; i < iommu->num_mmu; i++)
+		enable &= rk_iommu_read(iommu->bases[i], RK_MMU_STATUS) &
+					RK_MMU_STATUS_PAGING_ENABLED;
+
+	return enable;
 }
 
 static int rk_iommu_enable_stall(struct rk_iommu *iommu)
 {
-	int ret;
+	int ret, i;
 
 	if (rk_iommu_is_stall_active(iommu))
 		return 0;
@@ -324,15 +348,16 @@ static int rk_iommu_enable_stall(struct rk_iommu *iommu)
 
 	ret = rk_wait_for(rk_iommu_is_stall_active(iommu), 1);
 	if (ret)
-		dev_err(iommu->dev, "Enable stall request timed out, status: %#08x\n",
-			rk_iommu_read(iommu, RK_MMU_STATUS));
+		for (i = 0; i < iommu->num_mmu; i++)
+			dev_err(iommu->dev, "Enable stall request timed out, status: %#08x\n",
+				rk_iommu_read(iommu->bases[i], RK_MMU_STATUS));
 
 	return ret;
 }
 
 static int rk_iommu_disable_stall(struct rk_iommu *iommu)
 {
-	int ret;
+	int ret, i;
 
 	if (!rk_iommu_is_stall_active(iommu))
 		return 0;
@@ -341,15 +366,16 @@ static int rk_iommu_disable_stall(struct rk_iommu *iommu)
 
 	ret = rk_wait_for(!rk_iommu_is_stall_active(iommu), 1);
 	if (ret)
-		dev_err(iommu->dev, "Disable stall request timed out, status: %#08x\n",
-			rk_iommu_read(iommu, RK_MMU_STATUS));
+		for (i = 0; i < iommu->num_mmu; i++)
+			dev_err(iommu->dev, "Disable stall request timed out, status: %#08x\n",
+				rk_iommu_read(iommu->bases[i], RK_MMU_STATUS));
 
 	return ret;
 }
 
 static int rk_iommu_enable_paging(struct rk_iommu *iommu)
 {
-	int ret;
+	int ret, i;
 
 	if (rk_iommu_is_paging_enabled(iommu))
 		return 0;
@@ -358,15 +384,16 @@ static int rk_iommu_enable_paging(struct rk_iommu *iommu)
 
 	ret = rk_wait_for(rk_iommu_is_paging_enabled(iommu), 1);
 	if (ret)
-		dev_err(iommu->dev, "Enable paging request timed out, status: %#08x\n",
-			rk_iommu_read(iommu, RK_MMU_STATUS));
+		for (i = 0; i < iommu->num_mmu; i++)
+			dev_err(iommu->dev, "Enable paging request timed out, status: %#08x\n",
+				rk_iommu_read(iommu->bases[i], RK_MMU_STATUS));
 
 	return ret;
 }
 
 static int rk_iommu_disable_paging(struct rk_iommu *iommu)
 {
-	int ret;
+	int ret, i;
 
 	if (!rk_iommu_is_paging_enabled(iommu))
 		return 0;
@@ -375,41 +402,49 @@ static int rk_iommu_disable_paging(struct rk_iommu *iommu)
 
 	ret = rk_wait_for(!rk_iommu_is_paging_enabled(iommu), 1);
 	if (ret)
-		dev_err(iommu->dev, "Disable paging request timed out, status: %#08x\n",
-			rk_iommu_read(iommu, RK_MMU_STATUS));
+		for (i = 0; i < iommu->num_mmu; i++)
+			dev_err(iommu->dev, "Disable paging request timed out, status: %#08x\n",
+				rk_iommu_read(iommu->bases[i], RK_MMU_STATUS));
 
 	return ret;
 }
 
 static int rk_iommu_force_reset(struct rk_iommu *iommu)
 {
-	int ret;
+	int ret, i;
 	u32 dte_addr;
 
 	/*
 	 * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY
 	 * and verifying that upper 5 nybbles are read back.
 	 */
-	rk_iommu_write(iommu, RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY);
+	for (i = 0; i < iommu->num_mmu; i++) {
+		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY);
 
-	dte_addr = rk_iommu_read(iommu, RK_MMU_DTE_ADDR);
-	if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) {
-		dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n");
-		return -EFAULT;
+		dte_addr = rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR);
+		if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) {
+			dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n");
+			return -EFAULT;
+		}
 	}
 
 	rk_iommu_command(iommu, RK_MMU_CMD_FORCE_RESET);
 
-	ret = rk_wait_for(rk_iommu_read(iommu, RK_MMU_DTE_ADDR) == 0x00000000,
-			  FORCE_RESET_TIMEOUT);
-	if (ret)
-		dev_err(iommu->dev, "FORCE_RESET command timed out\n");
+	for (i = 0; i < iommu->num_mmu; i++) {
+		ret = rk_wait_for(rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR) == 0x00000000,
+				  FORCE_RESET_TIMEOUT);
+		if (ret) {
+			dev_err(iommu->dev, "FORCE_RESET command timed out\n");
+			return ret;
+		}
+	}
 
-	return ret;
+	return 0;
 }
 
-static void log_iova(struct rk_iommu *iommu, dma_addr_t iova)
+static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
 {
+	void __iomem *base = iommu->bases[index];
 	u32 dte_index, pte_index, page_offset;
 	u32 mmu_dte_addr;
 	phys_addr_t mmu_dte_addr_phys, dte_addr_phys;
@@ -425,7 +460,7 @@ static void log_iova(struct rk_iommu *iommu, dma_addr_t iova)
 	pte_index = rk_iova_pte_index(iova);
 	page_offset = rk_iova_page_offset(iova);
 
-	mmu_dte_addr = rk_iommu_read(iommu, RK_MMU_DTE_ADDR);
+	mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR);
 	mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr;
 
 	dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index);
@@ -460,51 +495,56 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
 	u32 status;
 	u32 int_status;
 	dma_addr_t iova;
+	irqreturn_t ret = IRQ_NONE;
+	int i;
 
-	int_status = rk_iommu_read(iommu, RK_MMU_INT_STATUS);
-	if (int_status == 0)
-		return IRQ_NONE;
+	for (i = 0; i < iommu->num_mmu; i++) {
+		int_status = rk_iommu_read(iommu->bases[i], RK_MMU_INT_STATUS);
+		if (int_status == 0)
+			continue;
 
-	iova = rk_iommu_read(iommu, RK_MMU_PAGE_FAULT_ADDR);
+		ret = IRQ_HANDLED;
+		iova = rk_iommu_read(iommu->bases[i], RK_MMU_PAGE_FAULT_ADDR);
 
-	if (int_status & RK_MMU_IRQ_PAGE_FAULT) {
-		int flags;
+		if (int_status & RK_MMU_IRQ_PAGE_FAULT) {
+			int flags;
 
-		status = rk_iommu_read(iommu, RK_MMU_STATUS);
-		flags = (status & RK_MMU_STATUS_PAGE_FAULT_IS_WRITE) ?
-				IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
+			status = rk_iommu_read(iommu->bases[i], RK_MMU_STATUS);
+			flags = (status & RK_MMU_STATUS_PAGE_FAULT_IS_WRITE) ?
+					IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
 
-		dev_err(iommu->dev, "Page fault at %pad of type %s\n",
-			&iova,
-			(flags == IOMMU_FAULT_WRITE) ? "write" : "read");
+			dev_err(iommu->dev, "Page fault at %pad of type %s\n",
+				&iova,
+				(flags == IOMMU_FAULT_WRITE) ? "write" : "read");
 
-		log_iova(iommu, iova);
+			log_iova(iommu, i, iova);
 
-		/*
-		 * Report page fault to any installed handlers.
-		 * Ignore the return code, though, since we always zap cache
-		 * and clear the page fault anyway.
-		 */
-		if (iommu->domain)
-			report_iommu_fault(iommu->domain, iommu->dev, iova,
-					   flags);
-		else
-			dev_err(iommu->dev, "Page fault while iommu not attached to domain?\n");
+			/*
+			 * Report page fault to any installed handlers.
+			 * Ignore the return code, though, since we always zap cache
+			 * and clear the page fault anyway.
+			 */
+			if (iommu->domain)
+				report_iommu_fault(iommu->domain, iommu->dev, iova,
+						   flags);
+			else
+				dev_err(iommu->dev, "Page fault while iommu not attached to domain?\n");
 
-		rk_iommu_command(iommu, RK_MMU_CMD_ZAP_CACHE);
-		rk_iommu_command(iommu, RK_MMU_CMD_PAGE_FAULT_DONE);
-	}
+			rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
+			rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_PAGE_FAULT_DONE);
+		}
 
-	if (int_status & RK_MMU_IRQ_BUS_ERROR)
-		dev_err(iommu->dev, "BUS_ERROR occurred at %pad\n", &iova);
+		if (int_status & RK_MMU_IRQ_BUS_ERROR)
+			dev_err(iommu->dev, "BUS_ERROR occurred at %pad\n", &iova);
 
-	if (int_status & ~RK_MMU_IRQ_MASK)
-		dev_err(iommu->dev, "unexpected int_status: %#08x\n",
-			int_status);
+		if (int_status & ~RK_MMU_IRQ_MASK)
+			dev_err(iommu->dev, "unexpected int_status: %#08x\n",
+				int_status);
 
-	rk_iommu_write(iommu, RK_MMU_INT_CLEAR, int_status);
+		rk_iommu_write(iommu->bases[i], RK_MMU_INT_CLEAR, int_status);
+	}
 
-	return IRQ_HANDLED;
+	return ret;
 }
 
 static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -746,7 +786,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 	struct rk_iommu *iommu;
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
-	int ret;
+	int ret, i;
 	phys_addr_t dte_addr;
 
 	/*
@@ -773,9 +813,11 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 		return ret;
 
 	dte_addr = virt_to_phys(rk_domain->dt);
-	rk_iommu_write(iommu, RK_MMU_DTE_ADDR, dte_addr);
-	rk_iommu_command(iommu, RK_MMU_CMD_ZAP_CACHE);
-	rk_iommu_write(iommu, RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
+	for (i = 0; i < iommu->num_mmu; i++) {
+		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
+		rk_iommu_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
+		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
+	}
 
 	ret = rk_iommu_enable_paging(iommu);
 	if (ret)
@@ -798,6 +840,7 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
 	struct rk_iommu *iommu;
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
+	int i;
 
 	/* Allow 'virtual devices' (eg drm) to detach from domain */
 	iommu = rk_iommu_from_dev(dev);
@@ -811,8 +854,10 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
 	/* Ignore error while disabling, just keep going */
 	rk_iommu_enable_stall(iommu);
 	rk_iommu_disable_paging(iommu);
-	rk_iommu_write(iommu, RK_MMU_INT_MASK, 0);
-	rk_iommu_write(iommu, RK_MMU_DTE_ADDR, 0);
+	for (i = 0; i < iommu->num_mmu; i++) {
+		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, 0);
+		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 0);
+	}
 	rk_iommu_disable_stall(iommu);
 
 	devm_free_irq(dev, iommu->irq, iommu);
@@ -988,6 +1033,7 @@ static int rk_iommu_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct rk_iommu *iommu;
 	struct resource *res;
+	int i;
 
 	iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
 	if (!iommu)
@@ -995,11 +1041,21 @@ static int rk_iommu_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, iommu);
 	iommu->dev = dev;
+	iommu->num_mmu = 0;
+	iommu->bases = devm_kzalloc(dev, sizeof(*iommu->bases) * iommu->num_mmu,
+				    GFP_KERNEL);
+	if (!iommu->bases)
+		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	iommu->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(iommu->base))
-		return PTR_ERR(iommu->base);
+	for (i = 0; i < pdev->num_resources; i++) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+		iommu->bases[i] = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(iommu->bases[i]))
+			continue;
+		iommu->num_mmu++;
+	}
+	if (iommu->num_mmu == 0)
+		return PTR_ERR(iommu->bases[0]);
 
 	iommu->irq = platform_get_irq(pdev, 0);
 	if (iommu->irq < 0) {
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 7e8c441ff2de..3e124793e224 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -32,14 +32,6 @@ config ARM_GIC_V3_ITS
 	bool
 	select PCI_MSI_IRQ_DOMAIN
 
-config HISILICON_IRQ_MBIGEN
-	bool "Support mbigen interrupt controller"
-	default n
-	depends on ARM_GIC_V3 && ARM_GIC_V3_ITS && GENERIC_MSI_IRQ_DOMAIN
-	help
-	 Enable the mbigen interrupt controller used on
-	 Hisilicon platform.
-
 config ARM_NVIC
 	bool
 	select IRQ_DOMAIN
@@ -114,6 +106,12 @@ config DW_APB_ICTL
 	select GENERIC_IRQ_CHIP
 	select IRQ_DOMAIN
 
+config HISILICON_IRQ_MBIGEN
+	bool
+	select ARM_GIC_V3
+	select ARM_GIC_V3_ITS
+	select GENERIC_MSI_IRQ_DOMAIN
+
 config IMGPDC_IRQ
 	bool
 	select GENERIC_IRQ_CHIP
diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index 4dd3eb8a40b3..d67baa231c13 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -239,8 +239,11 @@ static struct irq_domain_ops mbigen_domain_ops = {
 static int mbigen_device_probe(struct platform_device *pdev)
 {
 	struct mbigen_device *mgn_chip;
-	struct resource *res;
+	struct platform_device *child;
 	struct irq_domain *domain;
+	struct device_node *np;
+	struct device *parent;
+	struct resource *res;
 	u32 num_pins;
 
 	mgn_chip = devm_kzalloc(&pdev->dev, sizeof(*mgn_chip), GFP_KERNEL);
@@ -254,23 +257,30 @@ static int mbigen_device_probe(struct platform_device *pdev)
 	if (IS_ERR(mgn_chip->base))
 		return PTR_ERR(mgn_chip->base);
 
-	if (of_property_read_u32(pdev->dev.of_node, "num-pins", &num_pins) < 0) {
-		dev_err(&pdev->dev, "No num-pins property\n");
-		return -EINVAL;
-	}
+	for_each_child_of_node(pdev->dev.of_node, np) {
+		if (!of_property_read_bool(np, "interrupt-controller"))
+			continue;
 
-	domain = platform_msi_create_device_domain(&pdev->dev, num_pins,
-							mbigen_write_msg,
-							&mbigen_domain_ops,
-							mgn_chip);
+		parent = platform_bus_type.dev_root;
+		child = of_platform_device_create(np, NULL, parent);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
 
-	if (!domain)
-		return -ENOMEM;
+		if (of_property_read_u32(child->dev.of_node, "num-pins",
+					 &num_pins) < 0) {
+			dev_err(&pdev->dev, "No num-pins property\n");
+			return -EINVAL;
+		}
+
+		domain = platform_msi_create_device_domain(&child->dev, num_pins,
+							   mbigen_write_msg,
+							   &mbigen_domain_ops,
+							   mgn_chip);
+		if (!domain)
+			return -ENOMEM;
+	}
 
 	platform_set_drvdata(pdev, mgn_chip);
-
-	dev_info(&pdev->dev, "Allocated %d MSIs\n", num_pins);
-
 	return 0;
 }
 
diff --git a/drivers/irqchip/irq-tegra.c b/drivers/irqchip/irq-tegra.c
index 121ec301372e..50be9639e27e 100644
--- a/drivers/irqchip/irq-tegra.c
+++ b/drivers/irqchip/irq-tegra.c
@@ -275,22 +275,10 @@ static int tegra_ictlr_domain_alloc(struct irq_domain *domain,
 					    &parent_fwspec);
 }
 
-static void tegra_ictlr_domain_free(struct irq_domain *domain,
-				    unsigned int virq,
-				    unsigned int nr_irqs)
-{
-	unsigned int i;
-
-	for (i = 0; i < nr_irqs; i++) {
-		struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
-		irq_domain_reset_irq_data(d);
-	}
-}
-
 static const struct irq_domain_ops tegra_ictlr_domain_ops = {
 	.translate	= tegra_ictlr_domain_translate,
 	.alloc		= tegra_ictlr_domain_alloc,
-	.free		= tegra_ictlr_domain_free,
+	.free		= irq_domain_free_irqs_common,
 };
 
 static int __init tegra_ictlr_init(struct device_node *node,
diff --git a/drivers/isdn/mISDN/clock.c b/drivers/isdn/mISDN/clock.c
index 693fb7c9b59a..f8f659f1ce1b 100644
--- a/drivers/isdn/mISDN/clock.c
+++ b/drivers/isdn/mISDN/clock.c
@@ -37,6 +37,7 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
+#include <linux/ktime.h>
 #include <linux/mISDNif.h>
 #include <linux/export.h>
 #include "core.h"
@@ -45,15 +46,15 @@ static u_int *debug;
 static LIST_HEAD(iclock_list);
 static DEFINE_RWLOCK(iclock_lock);
 static u16 iclock_count;		/* counter of last clock */
-static struct timeval iclock_tv;	/* time stamp of last clock */
-static int iclock_tv_valid;		/* already received one timestamp */
+static ktime_t iclock_timestamp;	/* time stamp of last clock */
+static int iclock_timestamp_valid;	/* already received one timestamp */
 static struct mISDNclock *iclock_current;
 
 void
 mISDN_init_clock(u_int *dp)
 {
 	debug = dp;
-	do_gettimeofday(&iclock_tv);
+	iclock_timestamp = ktime_get();
 }
 
 static void
@@ -86,7 +87,7 @@ select_iclock(void)
 	}
 	if (bestclock != iclock_current) {
 		/* no clock received yet */
-		iclock_tv_valid = 0;
+		iclock_timestamp_valid = 0;
 	}
 	iclock_current = bestclock;
 }
@@ -139,12 +140,11 @@ mISDN_unregister_clock(struct mISDNclock *iclock)
 EXPORT_SYMBOL(mISDN_unregister_clock);
 
 void
-mISDN_clock_update(struct mISDNclock *iclock, int samples, struct timeval *tv)
+mISDN_clock_update(struct mISDNclock *iclock, int samples, ktime_t *timestamp)
 {
 	u_long		flags;
-	struct timeval	tv_now;
-	time_t		elapsed_sec;
-	int		elapsed_8000th;
+	ktime_t		timestamp_now;
+	u16		delta;
 
 	write_lock_irqsave(&iclock_lock, flags);
 	if (iclock_current != iclock) {
@@ -156,33 +156,27 @@ mISDN_clock_update(struct mISDNclock *iclock, int samples, struct timeval *tv)
 		write_unlock_irqrestore(&iclock_lock, flags);
 		return;
 	}
-	if (iclock_tv_valid) {
+	if (iclock_timestamp_valid) {
 		/* increment sample counter by given samples */
 		iclock_count += samples;
-		if (tv) { /* tv must be set, if function call is delayed */
-			iclock_tv.tv_sec = tv->tv_sec;
-			iclock_tv.tv_usec = tv->tv_usec;
-		} else
-			do_gettimeofday(&iclock_tv);
+		if (timestamp) { /* timestamp must be set, if function call is delayed */
+			iclock_timestamp = *timestamp;
+		} else	{
+			iclock_timestamp = ktime_get();
+		}
 	} else {
 		/* calc elapsed time by system clock */
-		if (tv) { /* tv must be set, if function call is delayed */
-			tv_now.tv_sec = tv->tv_sec;
-			tv_now.tv_usec = tv->tv_usec;
-		} else
-			do_gettimeofday(&tv_now);
-		elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec;
-		elapsed_8000th = (tv_now.tv_usec / 125)
-			- (iclock_tv.tv_usec / 125);
-		if (elapsed_8000th < 0) {
-			elapsed_sec -= 1;
-			elapsed_8000th += 8000;
+		if (timestamp) { /* timestamp must be set, if function call is delayed */
+			timestamp_now = *timestamp;
+		} else {
+			timestamp_now = ktime_get();
 		}
+		delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp),
+				(NSEC_PER_SEC / 8000));
 		/* add elapsed time to counter and set new timestamp */
-		iclock_count += elapsed_sec * 8000 + elapsed_8000th;
-		iclock_tv.tv_sec = tv_now.tv_sec;
-		iclock_tv.tv_usec = tv_now.tv_usec;
-		iclock_tv_valid = 1;
+		iclock_count += delta;
+		iclock_timestamp = timestamp_now;
+		iclock_timestamp_valid = 1;
 		if (*debug & DEBUG_CLOCK)
 			printk("Received first clock from source '%s'.\n",
 			       iclock_current ? iclock_current->name : "nothing");
@@ -195,22 +189,17 @@ unsigned short
 mISDN_clock_get(void)
 {
 	u_long		flags;
-	struct timeval	tv_now;
-	time_t		elapsed_sec;
-	int		elapsed_8000th;
+	ktime_t		timestamp_now;
+	u16		delta;
 	u16		count;
 
 	read_lock_irqsave(&iclock_lock, flags);
 	/* calc elapsed time by system clock */
-	do_gettimeofday(&tv_now);
-	elapsed_sec = tv_now.tv_sec - iclock_tv.tv_sec;
-	elapsed_8000th = (tv_now.tv_usec / 125) - (iclock_tv.tv_usec / 125);
-	if (elapsed_8000th < 0) {
-		elapsed_sec -= 1;
-		elapsed_8000th += 8000;
-	}
+	timestamp_now = ktime_get();
+	delta = ktime_divns(ktime_sub(timestamp_now, iclock_timestamp),
+			(NSEC_PER_SEC / 8000));
 	/* add elapsed time to counter */
-	count =	iclock_count + elapsed_sec * 8000 + elapsed_8000th;
+	count =	iclock_count + delta;
 	read_unlock_irqrestore(&iclock_lock, flags);
 	return count;
 }
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 1f6415168998..225147863e02 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -443,6 +443,7 @@ config LEDS_DELL_NETBOOKS
 	tristate "External LED on Dell Business Netbooks"
 	depends on LEDS_CLASS
 	depends on X86 && ACPI_WMI
+	depends on DELL_SMBIOS
 	help
 	  This adds support for the Latitude 2100 and similar
 	  notebooks that have an external LED.
diff --git a/drivers/leds/dell-led.c b/drivers/leds/dell-led.c
index c36acaf566a6..b3d6e9c15cf9 100644
--- a/drivers/leds/dell-led.c
+++ b/drivers/leds/dell-led.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/dmi.h>
 #include <linux/dell-led.h>
+#include "../platform/x86/dell-smbios.h"
 
 MODULE_AUTHOR("Louis Davis/Jim Dailey");
 MODULE_DESCRIPTION("Dell LED Control Driver");
@@ -42,120 +43,32 @@ MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID);
 #define CMD_LED_OFF	17
 #define CMD_LED_BLINK	18
 
-struct app_wmi_args {
-	u16 class;
-	u16 selector;
-	u32 arg1;
-	u32 arg2;
-	u32 arg3;
-	u32 arg4;
-	u32 res1;
-	u32 res2;
-	u32 res3;
-	u32 res4;
-	char dummy[92];
-};
-
 #define GLOBAL_MIC_MUTE_ENABLE	0x364
 #define GLOBAL_MIC_MUTE_DISABLE	0x365
 
-struct dell_bios_data_token {
-	u16 tokenid;
-	u16 location;
-	u16 value;
-};
-
-struct __attribute__ ((__packed__)) dell_bios_calling_interface {
-	struct	dmi_header header;
-	u16	cmd_io_addr;
-	u8	cmd_io_code;
-	u32	supported_cmds;
-	struct	dell_bios_data_token damap[];
-};
-
-static struct dell_bios_data_token dell_mic_tokens[2];
-
-static int dell_wmi_perform_query(struct app_wmi_args *args)
-{
-	struct app_wmi_args *bios_return;
-	union acpi_object *obj;
-	struct acpi_buffer input;
-	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
-	acpi_status status;
-	u32 rc = -EINVAL;
-
-	input.length = 128;
-	input.pointer = args;
-
-	status = wmi_evaluate_method(DELL_APP_GUID, 0, 1, &input, &output);
-	if (!ACPI_SUCCESS(status))
-		goto err_out0;
-
-	obj = output.pointer;
-	if (!obj)
-		goto err_out0;
-
-	if (obj->type != ACPI_TYPE_BUFFER)
-		goto err_out1;
-
-	bios_return = (struct app_wmi_args *)obj->buffer.pointer;
-	rc = bios_return->res1;
-	if (rc)
-		goto err_out1;
-
-	memcpy(args, bios_return, sizeof(struct app_wmi_args));
-	rc = 0;
-
- err_out1:
-	kfree(obj);
- err_out0:
-	return rc;
-}
-
-static void __init find_micmute_tokens(const struct dmi_header *dm, void *dummy)
-{
-	struct dell_bios_calling_interface *calling_interface;
-	struct dell_bios_data_token *token;
-	int token_size = sizeof(struct dell_bios_data_token);
-	int i = 0;
-
-	if (dm->type == 0xda && dm->length > 17) {
-		calling_interface = container_of(dm,
-				struct dell_bios_calling_interface, header);
-
-		token = &calling_interface->damap[i];
-		while (token->tokenid != 0xffff) {
-			if (token->tokenid == GLOBAL_MIC_MUTE_DISABLE)
-				memcpy(&dell_mic_tokens[0], token, token_size);
-			else if (token->tokenid == GLOBAL_MIC_MUTE_ENABLE)
-				memcpy(&dell_mic_tokens[1], token, token_size);
-
-			i++;
-			token = &calling_interface->damap[i];
-		}
-	}
-}
-
 static int dell_micmute_led_set(int state)
 {
-	struct app_wmi_args args;
-	struct dell_bios_data_token *token;
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 
 	if (!wmi_has_guid(DELL_APP_GUID))
 		return -ENODEV;
 
-	if (state == 0 || state == 1)
-		token = &dell_mic_tokens[state];
+	if (state == 0)
+		token = dell_smbios_find_token(GLOBAL_MIC_MUTE_DISABLE);
+	else if (state == 1)
+		token = dell_smbios_find_token(GLOBAL_MIC_MUTE_ENABLE);
 	else
 		return -EINVAL;
 
-	memset(&args, 0, sizeof(struct app_wmi_args));
-
-	args.class = 1;
-	args.arg1 = token->location;
-	args.arg2 = token->value;
+	if (!token)
+		return -ENODEV;
 
-	dell_wmi_perform_query(&args);
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = token->location;
+	buffer->input[1] = token->value;
+	dell_smbios_send_request(1, 0);
+	dell_smbios_release_buffer();
 
 	return state;
 }
@@ -177,14 +90,6 @@ int dell_app_wmi_led_set(int whichled, int on)
 }
 EXPORT_SYMBOL_GPL(dell_app_wmi_led_set);
 
-static int __init dell_micmute_led_init(void)
-{
-	memset(dell_mic_tokens, 0, sizeof(struct dell_bios_data_token) * 2);
-	dmi_walk(find_micmute_tokens, NULL);
-
-	return 0;
-}
-
 struct bios_args {
 	unsigned char length;
 	unsigned char result_code;
@@ -330,9 +235,6 @@ static int __init dell_led_init(void)
 	if (!wmi_has_guid(DELL_LED_BIOS_GUID) && !wmi_has_guid(DELL_APP_GUID))
 		return -ENODEV;
 
-	if (wmi_has_guid(DELL_APP_GUID))
-		error = dell_micmute_led_init();
-
 	if (wmi_has_guid(DELL_LED_BIOS_GUID)) {
 		error = led_off();
 		if (error != 0)
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 0d1fb6b40c46..0dc9a80adb94 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -464,8 +464,13 @@ static int nvm_core_init(struct nvm_dev *dev)
 	dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls;
 
 	dev->total_secs = dev->nr_luns * dev->sec_per_lun;
+	dev->lun_map = kcalloc(BITS_TO_LONGS(dev->nr_luns),
+					sizeof(unsigned long), GFP_KERNEL);
+	if (!dev->lun_map)
+		return -ENOMEM;
 	INIT_LIST_HEAD(&dev->online_targets);
 	mutex_init(&dev->mlock);
+	spin_lock_init(&dev->lock);
 
 	return 0;
 }
@@ -585,6 +590,7 @@ int nvm_register(struct request_queue *q, char *disk_name,
 
 	return 0;
 err_init:
+	kfree(dev->lun_map);
 	kfree(dev);
 	return ret;
 }
@@ -607,6 +613,7 @@ void nvm_unregister(char *disk_name)
 	up_write(&nvm_lock);
 
 	nvm_exit(dev);
+	kfree(dev->lun_map);
 	kfree(dev);
 }
 EXPORT_SYMBOL(nvm_unregister);
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index d65ec36a2231..72e124a3927d 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -20,6 +20,68 @@
 
 #include "gennvm.h"
 
+static int gennvm_get_area(struct nvm_dev *dev, sector_t *lba, sector_t len)
+{
+	struct gen_nvm *gn = dev->mp;
+	struct gennvm_area *area, *prev, *next;
+	sector_t begin = 0;
+	sector_t max_sectors = (dev->sec_size * dev->total_secs) >> 9;
+
+	if (len > max_sectors)
+		return -EINVAL;
+
+	area = kmalloc(sizeof(struct gennvm_area), GFP_KERNEL);
+	if (!area)
+		return -ENOMEM;
+
+	prev = NULL;
+
+	spin_lock(&dev->lock);
+	list_for_each_entry(next, &gn->area_list, list) {
+		if (begin + len > next->begin) {
+			begin = next->end;
+			prev = next;
+			continue;
+		}
+		break;
+	}
+
+	if ((begin + len) > max_sectors) {
+		spin_unlock(&dev->lock);
+		kfree(area);
+		return -EINVAL;
+	}
+
+	area->begin = *lba = begin;
+	area->end = begin + len;
+
+	if (prev) /* insert into sorted order */
+		list_add(&area->list, &prev->list);
+	else
+		list_add(&area->list, &gn->area_list);
+	spin_unlock(&dev->lock);
+
+	return 0;
+}
+
+static void gennvm_put_area(struct nvm_dev *dev, sector_t begin)
+{
+	struct gen_nvm *gn = dev->mp;
+	struct gennvm_area *area;
+
+	spin_lock(&dev->lock);
+	list_for_each_entry(area, &gn->area_list, list) {
+		if (area->begin != begin)
+			continue;
+
+		list_del(&area->list);
+		spin_unlock(&dev->lock);
+		kfree(area);
+		return;
+	}
+	spin_unlock(&dev->lock);
+}
+
 static void gennvm_blocks_free(struct nvm_dev *dev)
 {
 	struct gen_nvm *gn = dev->mp;
@@ -195,7 +257,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 		}
 	}
 
-	if (dev->ops->get_l2p_tbl) {
+	if ((dev->identity.dom & NVM_RSP_L2P) && dev->ops->get_l2p_tbl) {
 		ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs,
 							gennvm_block_map, dev);
 		if (ret) {
@@ -229,6 +291,7 @@ static int gennvm_register(struct nvm_dev *dev)
 
 	gn->dev = dev;
 	gn->nr_luns = dev->nr_luns;
+	INIT_LIST_HEAD(&gn->area_list);
 	dev->mp = gn;
 
 	ret = gennvm_luns_init(dev, gn);
@@ -419,10 +482,23 @@ static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
 	return nvm_erase_ppa(dev, &addr, 1);
 }
 
+static int gennvm_reserve_lun(struct nvm_dev *dev, int lunid)
+{
+	return test_and_set_bit(lunid, dev->lun_map);
+}
+
+static void gennvm_release_lun(struct nvm_dev *dev, int lunid)
+{
+	WARN_ON(!test_and_clear_bit(lunid, dev->lun_map));
+}
+
 static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
 {
 	struct gen_nvm *gn = dev->mp;
 
+	if (unlikely(lunid >= dev->nr_luns))
+		return NULL;
+
 	return &gn->luns[lunid].vlun;
 }
 
@@ -464,7 +540,13 @@ static struct nvmm_type gennvm = {
 	.erase_blk		= gennvm_erase_blk,
 
 	.get_lun		= gennvm_get_lun,
+	.reserve_lun		= gennvm_reserve_lun,
+	.release_lun		= gennvm_release_lun,
 	.lun_info_print		= gennvm_lun_info_print,
+
+	.get_area		= gennvm_get_area,
+	.put_area		= gennvm_put_area,
+
 };
 
 static int __init gennvm_module_init(void)
diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h
index 9c24b5b32dac..04d7c23cfc61 100644
--- a/drivers/lightnvm/gennvm.h
+++ b/drivers/lightnvm/gennvm.h
@@ -39,8 +39,14 @@ struct gen_nvm {
 
 	int nr_luns;
 	struct gen_lun *luns;
+	struct list_head area_list;
 };
 
+struct gennvm_area {
+	struct list_head list;
+	sector_t begin;
+	sector_t end;	/* end is excluded */
+};
 #define gennvm_for_each_lun(bm, lun, i) \
 		for ((i) = 0, lun = &(bm)->luns[0]; \
 			(i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)])
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 82343783aa47..3ab6495c3fd8 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -965,25 +965,11 @@ static void rrpc_requeue(struct work_struct *work)
 
 static void rrpc_gc_free(struct rrpc *rrpc)
 {
-	struct rrpc_lun *rlun;
-	int i;
-
 	if (rrpc->krqd_wq)
 		destroy_workqueue(rrpc->krqd_wq);
 
 	if (rrpc->kgc_wq)
 		destroy_workqueue(rrpc->kgc_wq);
-
-	if (!rrpc->luns)
-		return;
-
-	for (i = 0; i < rrpc->nr_luns; i++) {
-		rlun = &rrpc->luns[i];
-
-		if (!rlun->blocks)
-			break;
-		vfree(rlun->blocks);
-	}
 }
 
 static int rrpc_gc_init(struct rrpc *rrpc)
@@ -1053,8 +1039,11 @@ static int rrpc_map_init(struct rrpc *rrpc)
 {
 	struct nvm_dev *dev = rrpc->dev;
 	sector_t i;
+	u64 slba;
 	int ret;
 
+	slba = rrpc->soffset >> (ilog2(dev->sec_size) - 9);
+
 	rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects);
 	if (!rrpc->trans_map)
 		return -ENOMEM;
@@ -1076,7 +1065,7 @@ static int rrpc_map_init(struct rrpc *rrpc)
 		return 0;
 
 	/* Bring up the mapping table from device */
-	ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_secs, rrpc_l2p_update,
+	ret = dev->ops->get_l2p_tbl(dev, slba, rrpc->nr_sects, rrpc_l2p_update,
 									rrpc);
 	if (ret) {
 		pr_err("nvm: rrpc: could not read L2P table.\n");
@@ -1086,7 +1075,6 @@ static int rrpc_map_init(struct rrpc *rrpc)
 	return 0;
 }
 
-
 /* Minimum pages needed within a lun */
 #define PAGE_POOL_SIZE 16
 #define ADDR_POOL_SIZE 64
@@ -1141,6 +1129,23 @@ static void rrpc_core_free(struct rrpc *rrpc)
 
 static void rrpc_luns_free(struct rrpc *rrpc)
 {
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvm_lun *lun;
+	struct rrpc_lun *rlun;
+	int i;
+
+	if (!rrpc->luns)
+		return;
+
+	for (i = 0; i < rrpc->nr_luns; i++) {
+		rlun = &rrpc->luns[i];
+		lun = rlun->parent;
+		if (!lun)
+			break;
+		dev->mt->release_lun(dev, lun->id);
+		vfree(rlun->blocks);
+	}
+
 	kfree(rrpc->luns);
 }
 
@@ -1148,7 +1153,7 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 {
 	struct nvm_dev *dev = rrpc->dev;
 	struct rrpc_lun *rlun;
-	int i, j;
+	int i, j, ret = -EINVAL;
 
 	if (dev->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
 		pr_err("rrpc: number of pages per block too high.");
@@ -1164,25 +1169,26 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 
 	/* 1:1 mapping */
 	for (i = 0; i < rrpc->nr_luns; i++) {
-		struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
-
-		rlun = &rrpc->luns[i];
-		rlun->rrpc = rrpc;
-		rlun->parent = lun;
-		INIT_LIST_HEAD(&rlun->prio_list);
-		INIT_LIST_HEAD(&rlun->open_list);
-		INIT_LIST_HEAD(&rlun->closed_list);
+		int lunid = lun_begin + i;
+		struct nvm_lun *lun;
 
-		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
-		spin_lock_init(&rlun->lock);
+		if (dev->mt->reserve_lun(dev, lunid)) {
+			pr_err("rrpc: lun %u is already allocated\n", lunid);
+			goto err;
+		}
 
-		rrpc->total_blocks += dev->blks_per_lun;
-		rrpc->nr_sects += dev->sec_per_lun;
+		lun = dev->mt->get_lun(dev, lunid);
+		if (!lun)
+			goto err;
 
+		rlun = &rrpc->luns[i];
+		rlun->parent = lun;
 		rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
 						rrpc->dev->blks_per_lun);
-		if (!rlun->blocks)
+		if (!rlun->blocks) {
+			ret = -ENOMEM;
 			goto err;
+		}
 
 		for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
 			struct rrpc_block *rblk = &rlun->blocks[j];
@@ -1193,11 +1199,43 @@ static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
 			INIT_LIST_HEAD(&rblk->prio);
 			spin_lock_init(&rblk->lock);
 		}
+
+		rlun->rrpc = rrpc;
+		INIT_LIST_HEAD(&rlun->prio_list);
+		INIT_LIST_HEAD(&rlun->open_list);
+		INIT_LIST_HEAD(&rlun->closed_list);
+
+		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
+		spin_lock_init(&rlun->lock);
+
+		rrpc->total_blocks += dev->blks_per_lun;
+		rrpc->nr_sects += dev->sec_per_lun;
+
 	}
 
 	return 0;
 err:
-	return -ENOMEM;
+	return ret;
+}
+
+/* returns 0 on success and stores the beginning address in *begin */
+static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin)
+{
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvmm_type *mt = dev->mt;
+	sector_t size = rrpc->nr_sects * dev->sec_size;
+
+	size >>= 9;
+
+	return mt->get_area(dev, begin, size);
+}
+
+static void rrpc_area_free(struct rrpc *rrpc)
+{
+	struct nvm_dev *dev = rrpc->dev;
+	struct nvmm_type *mt = dev->mt;
+
+	mt->put_area(dev, rrpc->soffset);
 }
 
 static void rrpc_free(struct rrpc *rrpc)
@@ -1206,6 +1244,7 @@ static void rrpc_free(struct rrpc *rrpc)
 	rrpc_map_free(rrpc);
 	rrpc_core_free(rrpc);
 	rrpc_luns_free(rrpc);
+	rrpc_area_free(rrpc);
 
 	kfree(rrpc);
 }
@@ -1327,6 +1366,7 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
 	struct request_queue *bqueue = dev->q;
 	struct request_queue *tqueue = tdisk->queue;
 	struct rrpc *rrpc;
+	sector_t soffset;
 	int ret;
 
 	if (!(dev->identity.dom & NVM_RSP_L2P)) {
@@ -1352,6 +1392,13 @@ static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
 	/* simple round-robin strategy */
 	atomic_set(&rrpc->next_lun, -1);
 
+	ret = rrpc_area_init(rrpc, &soffset);
+	if (ret < 0) {
+		pr_err("nvm: rrpc: could not initialize area\n");
+		return ERR_PTR(ret);
+	}
+	rrpc->soffset = soffset;
+
 	ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
 	if (ret) {
 		pr_err("nvm: rrpc: could not initialize luns\n");
diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h
index 855f4a5ca7dd..2653484a3b40 100644
--- a/drivers/lightnvm/rrpc.h
+++ b/drivers/lightnvm/rrpc.h
@@ -97,6 +97,7 @@ struct rrpc {
 	struct nvm_dev *dev;
 	struct gendisk *disk;
 
+	sector_t soffset; /* logical sector offset */
 	u64 poffset; /* physical page offset */
 	int lun_offset;
 
diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index 09719843ad4a..5305923752d2 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -87,6 +87,17 @@ config STI_MBOX
 	  Mailbox implementation for STMicroelectonics family chips with
 	  hardware for interprocessor communication.
 
+config TI_MESSAGE_MANAGER
+	tristate "Texas Instruments Message Manager Driver"
+	depends on ARCH_KEYSTONE
+	help
+	  An implementation of Message Manager slave driver for Keystone
+	  architecture SoCs from Texas Instruments. Message Manager is a
+	  communication entity found on few of Texas Instrument's keystone
+	  architecture SoCs. These may be used for communication between
+	  multiple processors within the SoC. Select this driver if your
+	  platform has support for the hardware block.
+
 config HI6220_MBOX
 	tristate "Hi6220 Mailbox"
 	depends on ARCH_HISI
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 948671733aa7..0be3e742bb7d 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -20,6 +20,8 @@ obj-$(CONFIG_BCM2835_MBOX)	+= bcm2835-mailbox.o
 
 obj-$(CONFIG_STI_MBOX)		+= mailbox-sti.o
 
+obj-$(CONFIG_TI_MESSAGE_MANAGER) += ti-msgmgr.o
+
 obj-$(CONFIG_XGENE_SLIMPRO_MBOX) += mailbox-xgene-slimpro.o
 
 obj-$(CONFIG_HI6220_MBOX)	+= hi6220-mailbox.o
diff --git a/drivers/mailbox/ti-msgmgr.c b/drivers/mailbox/ti-msgmgr.c
new file mode 100644
index 000000000000..54b9e4cb4cfa
--- /dev/null
+++ b/drivers/mailbox/ti-msgmgr.c
@@ -0,0 +1,639 @@
+/*
+ * Texas Instruments' Message Manager Driver
+ *
+ * Copyright (C) 2015-2016 Texas Instruments Incorporated - http://www.ti.com/
+ *	Nishanth Menon
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_controller.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/soc/ti/ti-msgmgr.h>
+
+#define Q_DATA_OFFSET(proxy, queue, reg)	\
+		     ((0x10000 * (proxy)) + (0x80 * (queue)) + ((reg) * 4))
+#define Q_STATE_OFFSET(queue)			((queue) * 0x4)
+#define Q_STATE_ENTRY_COUNT_MASK		(0xFFF000)
+
+/**
+ * struct ti_msgmgr_valid_queue_desc - SoC valid queues meant for this processor
+ * @queue_id:	Queue Number for this path
+ * @proxy_id:	Proxy ID representing the processor in SoC
+ * @is_tx:	Is this a receive path?
+ */
+struct ti_msgmgr_valid_queue_desc {
+	u8 queue_id;
+	u8 proxy_id;
+	bool is_tx;
+};
+
+/**
+ * struct ti_msgmgr_desc - Description of message manager integration
+ * @queue_count:	Number of Queues
+ * @max_message_size:	Message size in bytes
+ * @max_messages:	Number of messages
+ * @q_slices:		Number of queue engines
+ * @q_proxies:		Number of queue proxies per page
+ * @data_first_reg:	First data register for proxy data region
+ * @data_last_reg:	Last data register for proxy data region
+ * @tx_polled:		Do I need to use polled mechanism for tx
+ * @tx_poll_timeout_ms: Timeout in ms if polled
+ * @valid_queues:	List of Valid queues that the processor can access
+ * @num_valid_queues:	Number of valid queues
+ *
+ * This structure is used in of match data to describe how integration
+ * for a specific compatible SoC is done.
+ */
+struct ti_msgmgr_desc {
+	u8 queue_count;
+	u8 max_message_size;
+	u8 max_messages;
+	u8 q_slices;
+	u8 q_proxies;
+	u8 data_first_reg;
+	u8 data_last_reg;
+	bool tx_polled;
+	int tx_poll_timeout_ms;
+	const struct ti_msgmgr_valid_queue_desc *valid_queues;
+	int num_valid_queues;
+};
+
+/**
+ * struct ti_queue_inst - Description of a queue instance
+ * @name:	Queue Name
+ * @queue_id:	Queue Identifier as mapped on SoC
+ * @proxy_id:	Proxy Identifier as mapped on SoC
+ * @irq:	IRQ for Rx Queue
+ * @is_tx:	'true' if transmit queue, else, 'false'
+ * @queue_buff_start: First register of Data Buffer
+ * @queue_buff_end: Last (or confirmation) register of Data buffer
+ * @queue_state: Queue status register
+ * @chan:	Mailbox channel
+ * @rx_buff:	Receive buffer pointer allocated at probe, max_message_size
+ */
+struct ti_queue_inst {
+	char name[30];
+	u8 queue_id;
+	u8 proxy_id;
+	int irq;
+	bool is_tx;
+	void __iomem *queue_buff_start;
+	void __iomem *queue_buff_end;
+	void __iomem *queue_state;
+	struct mbox_chan *chan;
+	u32 *rx_buff;
+};
+
+/**
+ * struct ti_msgmgr_inst - Description of a Message Manager Instance
+ * @dev:	device pointer corresponding to the Message Manager instance
+ * @desc:	Description of the SoC integration
+ * @queue_proxy_region:	Queue proxy region where queue buffers are located
+ * @queue_state_debug_region:	Queue status register regions
+ * @num_valid_queues:	Number of valid queues defined for the processor
+ *		Note: other queues are probably reserved for other processors
+ *		in the SoC.
+ * @qinsts:	Array of valid Queue Instances for the Processor
+ * @mbox:	Mailbox Controller
+ * @chans:	Array for channels corresponding to the Queue Instances.
+ */
+struct ti_msgmgr_inst {
+	struct device *dev;
+	const struct ti_msgmgr_desc *desc;
+	void __iomem *queue_proxy_region;
+	void __iomem *queue_state_debug_region;
+	u8 num_valid_queues;
+	struct ti_queue_inst *qinsts;
+	struct mbox_controller mbox;
+	struct mbox_chan *chans;
+};
+
+/**
+ * ti_msgmgr_queue_get_num_messages() - Get the number of pending messages
+ * @qinst:	Queue instance for which we check the number of pending messages
+ *
+ * Return: number of messages pending in the queue (0 == no pending messages)
+ */
+static inline int ti_msgmgr_queue_get_num_messages(struct ti_queue_inst *qinst)
+{
+	u32 val;
+
+	/*
+	 * We cannot use relaxed operation here - update may happen
+	 * real-time.
+	 */
+	val = readl(qinst->queue_state) & Q_STATE_ENTRY_COUNT_MASK;
+	val >>= __ffs(Q_STATE_ENTRY_COUNT_MASK);
+
+	return val;
+}
+
+/**
+ * ti_msgmgr_queue_rx_interrupt() - Interrupt handler for receive Queue
+ * @irq:	Interrupt number
+ * @p:		Channel Pointer
+ *
+ * Return: -EINVAL if there is no instance
+ * IRQ_NONE if the interrupt is not ours.
+ * IRQ_HANDLED if the rx interrupt was successfully handled.
+ */
+static irqreturn_t ti_msgmgr_queue_rx_interrupt(int irq, void *p)
+{
+	struct mbox_chan *chan = p;
+	struct device *dev = chan->mbox->dev;
+	struct ti_msgmgr_inst *inst = dev_get_drvdata(dev);
+	struct ti_queue_inst *qinst = chan->con_priv;
+	const struct ti_msgmgr_desc *desc;
+	int msg_count, num_words;
+	struct ti_msgmgr_message message;
+	void __iomem *data_reg;
+	u32 *word_data;
+
+	if (WARN_ON(!inst)) {
+		dev_err(dev, "no platform drv data??\n");
+		return -EINVAL;
+	}
+
+	/* Do I have an invalid interrupt source? */
+	if (qinst->is_tx) {
+		dev_err(dev, "Cannot handle rx interrupt on tx channel %s\n",
+			qinst->name);
+		return IRQ_NONE;
+	}
+
+	/* Do I actually have messages to read? */
+	msg_count = ti_msgmgr_queue_get_num_messages(qinst);
+	if (!msg_count) {
+		/* Shared IRQ? */
+		dev_dbg(dev, "Spurious event - 0 pending data!\n");
+		return IRQ_NONE;
+	}
+
+	/*
+	 * I have no idea about the protocol being used to communicate with the
+	 * remote producer - 0 could be valid data, so I wont make a judgement
+	 * of how many bytes I should be reading. Let the client figure this
+	 * out.. I just read the full message and pass it on..
+	 */
+	desc = inst->desc;
+	message.len = desc->max_message_size;
+	message.buf = (u8 *)qinst->rx_buff;
+
+	/*
+	 * NOTE about register access involved here:
+	 * the hardware block is implemented with 32bit access operations and no
+	 * support for data splitting.  We don't want the hardware to misbehave
+	 * with sub 32bit access - For example: if the last register read is
+	 * split into byte wise access, it can result in the queue getting
+	 * stuck or indeterminate behavior. An out of order read operation may
+	 * result in weird data results as well.
+	 * Hence, we do not use memcpy_fromio or __ioread32_copy here, instead
+	 * we depend on readl for the purpose.
+	 *
+	 * Also note that the final register read automatically marks the
+	 * queue message as read.
+	 */
+	for (data_reg = qinst->queue_buff_start, word_data = qinst->rx_buff,
+	     num_words = (desc->max_message_size / sizeof(u32));
+	     num_words; num_words--, data_reg += sizeof(u32), word_data++)
+		*word_data = readl(data_reg);
+
+	/*
+	 * Last register read automatically clears the IRQ if only 1 message
+	 * is pending - so send the data up the stack..
+	 * NOTE: Client is expected to be as optimal as possible, since
+	 * we invoke the handler in IRQ context.
+	 */
+	mbox_chan_received_data(chan, (void *)&message);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ti_msgmgr_queue_peek_data() - Peek to see if there are any rx messages.
+ * @chan:	Channel Pointer
+ *
+ * Return: 'true' if there is pending rx data, 'false' if there is none.
+ */
+static bool ti_msgmgr_queue_peek_data(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+	int msg_count;
+
+	if (qinst->is_tx)
+		return false;
+
+	msg_count = ti_msgmgr_queue_get_num_messages(qinst);
+
+	return msg_count ? true : false;
+}
+
+/**
+ * ti_msgmgr_last_tx_done() - See if all the tx messages are sent
+ * @chan:	Channel pointer
+ *
+ * Return: 'true' is no pending tx data, 'false' if there are any.
+ */
+static bool ti_msgmgr_last_tx_done(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+	int msg_count;
+
+	if (!qinst->is_tx)
+		return false;
+
+	msg_count = ti_msgmgr_queue_get_num_messages(qinst);
+
+	/* if we have any messages pending.. */
+	return msg_count ? false : true;
+}
+
+/**
+ * ti_msgmgr_send_data() - Send data
+ * @chan:	Channel Pointer
+ * @data:	ti_msgmgr_message * Message Pointer
+ *
+ * Return: 0 if all goes good, else appropriate error messages.
+ */
+static int ti_msgmgr_send_data(struct mbox_chan *chan, void *data)
+{
+	struct device *dev = chan->mbox->dev;
+	struct ti_msgmgr_inst *inst = dev_get_drvdata(dev);
+	const struct ti_msgmgr_desc *desc;
+	struct ti_queue_inst *qinst = chan->con_priv;
+	int num_words, trail_bytes;
+	struct ti_msgmgr_message *message = data;
+	void __iomem *data_reg;
+	u32 *word_data;
+
+	if (WARN_ON(!inst)) {
+		dev_err(dev, "no platform drv data??\n");
+		return -EINVAL;
+	}
+	desc = inst->desc;
+
+	if (desc->max_message_size < message->len) {
+		dev_err(dev, "Queue %s message length %d > max %d\n",
+			qinst->name, message->len, desc->max_message_size);
+		return -EINVAL;
+	}
+
+	/* NOTE: Constraints similar to rx path exists here as well */
+	for (data_reg = qinst->queue_buff_start,
+	     num_words = message->len / sizeof(u32),
+	     word_data = (u32 *)message->buf;
+	     num_words; num_words--, data_reg += sizeof(u32), word_data++)
+		writel(*word_data, data_reg);
+
+	trail_bytes = message->len % sizeof(u32);
+	if (trail_bytes) {
+		u32 data_trail = *word_data;
+
+		/* Ensure all unused data is 0 */
+		data_trail &= 0xFFFFFFFF >> (8 * (sizeof(u32) - trail_bytes));
+		writel(data_trail, data_reg);
+		data_reg++;
+	}
+	/*
+	 * 'data_reg' indicates next register to write. If we did not already
+	 * write on tx complete reg(last reg), we must do so for transmit
+	 */
+	if (data_reg <= qinst->queue_buff_end)
+		writel(0, qinst->queue_buff_end);
+
+	return 0;
+}
+
+/**
+ * ti_msgmgr_queue_startup() - Startup queue
+ * @chan:	Channel pointer
+ *
+ * Return: 0 if all goes good, else return corresponding error message
+ */
+static int ti_msgmgr_queue_startup(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+	struct device *dev = chan->mbox->dev;
+	int ret;
+
+	if (!qinst->is_tx) {
+		/*
+		 * With the expectation that the IRQ might be shared in SoC
+		 */
+		ret = request_irq(qinst->irq, ti_msgmgr_queue_rx_interrupt,
+				  IRQF_SHARED, qinst->name, chan);
+		if (ret) {
+			dev_err(dev, "Unable to get IRQ %d on %s(res=%d)\n",
+				qinst->irq, qinst->name, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ti_msgmgr_queue_shutdown() - Shutdown the queue
+ * @chan:	Channel pointer
+ */
+static void ti_msgmgr_queue_shutdown(struct mbox_chan *chan)
+{
+	struct ti_queue_inst *qinst = chan->con_priv;
+
+	if (!qinst->is_tx)
+		free_irq(qinst->irq, chan);
+}
+
+/**
+ * ti_msgmgr_of_xlate() - Translation of phandle to queue
+ * @mbox:	Mailbox controller
+ * @p:		phandle pointer
+ *
+ * Return: Mailbox channel corresponding to the queue, else return error
+ * pointer.
+ */
+static struct mbox_chan *ti_msgmgr_of_xlate(struct mbox_controller *mbox,
+					    const struct of_phandle_args *p)
+{
+	struct ti_msgmgr_inst *inst;
+	int req_qid, req_pid;
+	struct ti_queue_inst *qinst;
+	int i;
+
+	inst = container_of(mbox, struct ti_msgmgr_inst, mbox);
+	if (WARN_ON(!inst))
+		return ERR_PTR(-EINVAL);
+
+	/* #mbox-cells is 2 */
+	if (p->args_count != 2) {
+		dev_err(inst->dev, "Invalid arguments in dt[%d] instead of 2\n",
+			p->args_count);
+		return ERR_PTR(-EINVAL);
+	}
+	req_qid = p->args[0];
+	req_pid = p->args[1];
+
+	for (qinst = inst->qinsts, i = 0; i < inst->num_valid_queues;
+	     i++, qinst++) {
+		if (req_qid == qinst->queue_id && req_pid == qinst->proxy_id)
+			return qinst->chan;
+	}
+
+	dev_err(inst->dev, "Queue ID %d, Proxy ID %d is wrong on %s\n",
+		req_qid, req_pid, p->np->name);
+	return ERR_PTR(-ENOENT);
+}
+
+/**
+ * ti_msgmgr_queue_setup() - Setup data structures for each queue instance
+ * @idx:	index of the queue
+ * @dev:	pointer to the message manager device
+ * @np:		pointer to the of node
+ * @inst:	Queue instance pointer
+ * @d:		Message Manager instance description data
+ * @qd:		Queue description data
+ * @qinst:	Queue instance pointer
+ * @chan:	pointer to mailbox channel
+ *
+ * Return: 0 if all went well, else return corresponding error
+ */
+static int ti_msgmgr_queue_setup(int idx, struct device *dev,
+				 struct device_node *np,
+				 struct ti_msgmgr_inst *inst,
+				 const struct ti_msgmgr_desc *d,
+				 const struct ti_msgmgr_valid_queue_desc *qd,
+				 struct ti_queue_inst *qinst,
+				 struct mbox_chan *chan)
+{
+	qinst->proxy_id = qd->proxy_id;
+	qinst->queue_id = qd->queue_id;
+
+	if (qinst->queue_id > d->queue_count) {
+		dev_err(dev, "Queue Data [idx=%d] queuid %d > %d\n",
+			idx, qinst->queue_id, d->queue_count);
+		return -ERANGE;
+	}
+
+	qinst->is_tx = qd->is_tx;
+	snprintf(qinst->name, sizeof(qinst->name), "%s %s_%03d_%03d",
+		 dev_name(dev), qinst->is_tx ? "tx" : "rx", qinst->queue_id,
+		 qinst->proxy_id);
+
+	if (!qinst->is_tx) {
+		char of_rx_irq_name[7];
+
+		snprintf(of_rx_irq_name, sizeof(of_rx_irq_name),
+			 "rx_%03d", qinst->queue_id);
+
+		qinst->irq = of_irq_get_byname(np, of_rx_irq_name);
+		if (qinst->irq < 0) {
+			dev_crit(dev,
+				 "[%d]QID %d PID %d:No IRQ[%s]: %d\n",
+				 idx, qinst->queue_id, qinst->proxy_id,
+				 of_rx_irq_name, qinst->irq);
+			return qinst->irq;
+		}
+		/* Allocate usage buffer for rx */
+		qinst->rx_buff = devm_kzalloc(dev,
+					      d->max_message_size, GFP_KERNEL);
+		if (!qinst->rx_buff)
+			return -ENOMEM;
+	}
+
+	qinst->queue_buff_start = inst->queue_proxy_region +
+	    Q_DATA_OFFSET(qinst->proxy_id, qinst->queue_id, d->data_first_reg);
+	qinst->queue_buff_end = inst->queue_proxy_region +
+	    Q_DATA_OFFSET(qinst->proxy_id, qinst->queue_id, d->data_last_reg);
+	qinst->queue_state = inst->queue_state_debug_region +
+	    Q_STATE_OFFSET(qinst->queue_id);
+	qinst->chan = chan;
+
+	chan->con_priv = qinst;
+
+	dev_dbg(dev, "[%d] qidx=%d pidx=%d irq=%d q_s=%p q_e = %p\n",
+		idx, qinst->queue_id, qinst->proxy_id, qinst->irq,
+		qinst->queue_buff_start, qinst->queue_buff_end);
+	return 0;
+}
+
+/* Queue operations */
+static const struct mbox_chan_ops ti_msgmgr_chan_ops = {
+	.startup = ti_msgmgr_queue_startup,
+	.shutdown = ti_msgmgr_queue_shutdown,
+	.peek_data = ti_msgmgr_queue_peek_data,
+	.last_tx_done = ti_msgmgr_last_tx_done,
+	.send_data = ti_msgmgr_send_data,
+};
+
+/* Keystone K2G SoC integration details */
+static const struct ti_msgmgr_valid_queue_desc k2g_valid_queues[] = {
+	{.queue_id = 0, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 1, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 2, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 3, .proxy_id = 0, .is_tx = true,},
+	{.queue_id = 5, .proxy_id = 2, .is_tx = false,},
+	{.queue_id = 56, .proxy_id = 1, .is_tx = true,},
+	{.queue_id = 57, .proxy_id = 2, .is_tx = false,},
+	{.queue_id = 58, .proxy_id = 3, .is_tx = true,},
+	{.queue_id = 59, .proxy_id = 4, .is_tx = true,},
+	{.queue_id = 60, .proxy_id = 5, .is_tx = true,},
+	{.queue_id = 61, .proxy_id = 6, .is_tx = true,},
+};
+
+static const struct ti_msgmgr_desc k2g_desc = {
+	.queue_count = 64,
+	.max_message_size = 64,
+	.max_messages = 128,
+	.q_slices = 1,
+	.q_proxies = 1,
+	.data_first_reg = 16,
+	.data_last_reg = 31,
+	.tx_polled = false,
+	.valid_queues = k2g_valid_queues,
+	.num_valid_queues = ARRAY_SIZE(k2g_valid_queues),
+};
+
+static const struct of_device_id ti_msgmgr_of_match[] = {
+	{.compatible = "ti,k2g-message-manager", .data = &k2g_desc},
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, ti_msgmgr_of_match);
+
+static int ti_msgmgr_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct of_device_id *of_id;
+	struct device_node *np;
+	struct resource *res;
+	const struct ti_msgmgr_desc *desc;
+	struct ti_msgmgr_inst *inst;
+	struct ti_queue_inst *qinst;
+	struct mbox_controller *mbox;
+	struct mbox_chan *chans;
+	int queue_count;
+	int i;
+	int ret = -EINVAL;
+	const struct ti_msgmgr_valid_queue_desc *queue_desc;
+
+	if (!dev->of_node) {
+		dev_err(dev, "no OF information\n");
+		return -EINVAL;
+	}
+	np = dev->of_node;
+
+	of_id = of_match_device(ti_msgmgr_of_match, dev);
+	if (!of_id) {
+		dev_err(dev, "OF data missing\n");
+		return -EINVAL;
+	}
+	desc = of_id->data;
+
+	inst = devm_kzalloc(dev, sizeof(*inst), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	inst->dev = dev;
+	inst->desc = desc;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "queue_proxy_region");
+	inst->queue_proxy_region = devm_ioremap_resource(dev, res);
+	if (IS_ERR(inst->queue_proxy_region))
+		return PTR_ERR(inst->queue_proxy_region);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "queue_state_debug_region");
+	inst->queue_state_debug_region = devm_ioremap_resource(dev, res);
+	if (IS_ERR(inst->queue_state_debug_region))
+		return PTR_ERR(inst->queue_state_debug_region);
+
+	dev_dbg(dev, "proxy region=%p, queue_state=%p\n",
+		inst->queue_proxy_region, inst->queue_state_debug_region);
+
+	queue_count = desc->num_valid_queues;
+	if (!queue_count || queue_count > desc->queue_count) {
+		dev_crit(dev, "Invalid Number of queues %d. Max %d\n",
+			 queue_count, desc->queue_count);
+		return -ERANGE;
+	}
+	inst->num_valid_queues = queue_count;
+
+	qinst = devm_kzalloc(dev, sizeof(*qinst) * queue_count, GFP_KERNEL);
+	if (!qinst)
+		return -ENOMEM;
+	inst->qinsts = qinst;
+
+	chans = devm_kzalloc(dev, sizeof(*chans) * queue_count, GFP_KERNEL);
+	if (!chans)
+		return -ENOMEM;
+	inst->chans = chans;
+
+	for (i = 0, queue_desc = desc->valid_queues;
+	     i < queue_count; i++, qinst++, chans++, queue_desc++) {
+		ret = ti_msgmgr_queue_setup(i, dev, np, inst,
+					    desc, queue_desc, qinst, chans);
+		if (ret)
+			return ret;
+	}
+
+	mbox = &inst->mbox;
+	mbox->dev = dev;
+	mbox->ops = &ti_msgmgr_chan_ops;
+	mbox->chans = inst->chans;
+	mbox->num_chans = inst->num_valid_queues;
+	mbox->txdone_irq = false;
+	mbox->txdone_poll = desc->tx_polled;
+	if (desc->tx_polled)
+		mbox->txpoll_period = desc->tx_poll_timeout_ms;
+	mbox->of_xlate = ti_msgmgr_of_xlate;
+
+	platform_set_drvdata(pdev, inst);
+	ret = mbox_controller_register(mbox);
+	if (ret)
+		dev_err(dev, "Failed to register mbox_controller(%d)\n", ret);
+
+	return ret;
+}
+
+static int ti_msgmgr_remove(struct platform_device *pdev)
+{
+	struct ti_msgmgr_inst *inst;
+
+	inst = platform_get_drvdata(pdev);
+	mbox_controller_unregister(&inst->mbox);
+
+	return 0;
+}
+
+static struct platform_driver ti_msgmgr_driver = {
+	.probe = ti_msgmgr_probe,
+	.remove = ti_msgmgr_remove,
+	.driver = {
+		   .name = "ti-msgmgr",
+		   .of_match_table = of_match_ptr(ti_msgmgr_of_match),
+	},
+};
+module_platform_driver(ti_msgmgr_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("TI message manager driver");
+MODULE_AUTHOR("Nishanth Menon");
+MODULE_ALIAS("platform:ti-msgmgr");
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index d80cce499a56..7df6b4f1548a 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -98,7 +98,6 @@ __acquires(bitmap->lock)
 		   bitmap->bp[page].hijacked) {
 		/* somebody beat us to getting the page */
 		kfree(mappage);
-		return 0;
 	} else {
 
 		/* no page was in place and we have one, so install it */
@@ -510,8 +509,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
 	sb->chunksize = cpu_to_le32(chunksize);
 
 	daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
-	if (!daemon_sleep ||
-	    (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
+	if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
 		printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
 		daemon_sleep = 5 * HZ;
 	}
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index 7d5c3a610ca5..5e3fcd6ecf77 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -49,8 +49,8 @@
  * When we set a bit, or in the counter (to start a write), if the fields is
  * 0, we first set the disk bit and set the counter to 1.
  *
- * If the counter is 0, the on-disk bit is clear and the stipe is clean
- * Anything that dirties the stipe pushes the counter to 2 (at least)
+ * If the counter is 0, the on-disk bit is clear and the stripe is clean
+ * Anything that dirties the stripe pushes the counter to 2 (at least)
  * and sets the on-disk bit (lazily).
  * If a periodic sweep find the counter at 2, it is decremented to 1.
  * If the sweep find the counter at 1, the on-disk bit is cleared and the
diff --git a/drivers/md/md.c b/drivers/md/md.c
index e55e6cf9ec17..c068f171b4eb 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -305,6 +305,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
  */
 void mddev_suspend(struct mddev *mddev)
 {
+	WARN_ON_ONCE(current == mddev->thread->tsk);
 	if (mddev->suspended++)
 		return;
 	synchronize_rcu();
@@ -5671,7 +5672,6 @@ static int do_md_stop(struct mddev *mddev, int mode,
 		export_array(mddev);
 
 		md_clean(mddev);
-		kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
 		if (mddev->hold_active == UNTIL_STOP)
 			mddev->hold_active = 0;
 	}
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 0a72ab6e6c20..dd483bb2e111 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -129,7 +129,9 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
 	}
 	multipath = conf->multipaths + mp_bh->path;
 
-	mp_bh->bio = *bio;
+	bio_init(&mp_bh->bio);
+	__bio_clone_fast(&mp_bh->bio, bio);
+
 	mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
 	mp_bh->bio.bi_bdev = multipath->rdev->bdev;
 	mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 4e3843f7d245..39fb21e048e6 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2274,6 +2274,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
 	if (fail) {
 		spin_lock_irq(&conf->device_lock);
 		list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
+		conf->nr_queued++;
 		spin_unlock_irq(&conf->device_lock);
 		md_wakeup_thread(conf->mddev->thread);
 	} else {
@@ -2391,8 +2392,10 @@ static void raid1d(struct md_thread *thread)
 		LIST_HEAD(tmp);
 		spin_lock_irqsave(&conf->device_lock, flags);
 		if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
-			list_add(&tmp, &conf->bio_end_io_list);
-			list_del_init(&conf->bio_end_io_list);
+			while (!list_empty(&conf->bio_end_io_list)) {
+				list_move(conf->bio_end_io_list.prev, &tmp);
+				conf->nr_queued--;
+			}
 		}
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 		while (!list_empty(&tmp)) {
@@ -2695,7 +2698,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
 			    !conf->fullsync &&
 			    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
 				break;
-			BUG_ON(sync_blocks < (PAGE_SIZE>>9));
 			if ((len >> 9) > sync_blocks)
 				len = sync_blocks<<9;
 		}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 1c1447dd3417..e3fd725d5c4d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2664,6 +2664,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
 		if (fail) {
 			spin_lock_irq(&conf->device_lock);
 			list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
+			conf->nr_queued++;
 			spin_unlock_irq(&conf->device_lock);
 			md_wakeup_thread(conf->mddev->thread);
 		} else {
@@ -2691,8 +2692,10 @@ static void raid10d(struct md_thread *thread)
 		LIST_HEAD(tmp);
 		spin_lock_irqsave(&conf->device_lock, flags);
 		if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
-			list_add(&tmp, &conf->bio_end_io_list);
-			list_del_init(&conf->bio_end_io_list);
+			while (!list_empty(&conf->bio_end_io_list)) {
+				list_move(conf->bio_end_io_list.prev, &tmp);
+				conf->nr_queued--;
+			}
 		}
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 		while (!list_empty(&tmp)) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b4f02c9959f2..8ab8b65e1741 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -340,8 +340,7 @@ static void release_inactive_stripe_list(struct r5conf *conf,
 					 int hash)
 {
 	int size;
-	unsigned long do_wakeup = 0;
-	int i = 0;
+	bool do_wakeup = false;
 	unsigned long flags;
 
 	if (hash == NR_STRIPE_HASH_LOCKS) {
@@ -362,19 +361,15 @@ static void release_inactive_stripe_list(struct r5conf *conf,
 			    !list_empty(list))
 				atomic_dec(&conf->empty_inactive_list_nr);
 			list_splice_tail_init(list, conf->inactive_list + hash);
-			do_wakeup |= 1 << hash;
+			do_wakeup = true;
 			spin_unlock_irqrestore(conf->hash_locks + hash, flags);
 		}
 		size--;
 		hash--;
 	}
 
-	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
-		if (do_wakeup & (1 << i))
-			wake_up(&conf->wait_for_stripe[i]);
-	}
-
 	if (do_wakeup) {
+		wake_up(&conf->wait_for_stripe);
 		if (atomic_read(&conf->active_stripes) == 0)
 			wake_up(&conf->wait_for_quiescent);
 		if (conf->retry_read_aligned)
@@ -687,15 +682,14 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
 			if (!sh) {
 				set_bit(R5_INACTIVE_BLOCKED,
 					&conf->cache_state);
-				wait_event_exclusive_cmd(
-					conf->wait_for_stripe[hash],
+				wait_event_lock_irq(
+					conf->wait_for_stripe,
 					!list_empty(conf->inactive_list + hash) &&
 					(atomic_read(&conf->active_stripes)
 					 < (conf->max_nr_stripes * 3 / 4)
 					 || !test_bit(R5_INACTIVE_BLOCKED,
 						      &conf->cache_state)),
-					spin_unlock_irq(conf->hash_locks + hash),
-					spin_lock_irq(conf->hash_locks + hash));
+					*(conf->hash_locks + hash));
 				clear_bit(R5_INACTIVE_BLOCKED,
 					  &conf->cache_state);
 			} else {
@@ -720,9 +714,6 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
 		}
 	} while (sh == NULL);
 
-	if (!list_empty(conf->inactive_list + hash))
-		wake_up(&conf->wait_for_stripe[hash]);
-
 	spin_unlock_irq(conf->hash_locks + hash);
 	return sh;
 }
@@ -2089,6 +2080,14 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
 	unsigned long cpu;
 	int err = 0;
 
+	/*
+	 * Never shrink. And mddev_suspend() could deadlock if this is called
+	 * from raid5d. In that case, scribble_disks and scribble_sectors
+	 * should equal to new_disks and new_sectors
+	 */
+	if (conf->scribble_disks >= new_disks &&
+	    conf->scribble_sectors >= new_sectors)
+		return 0;
 	mddev_suspend(conf->mddev);
 	get_online_cpus();
 	for_each_present_cpu(cpu) {
@@ -2110,6 +2109,10 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
 	}
 	put_online_cpus();
 	mddev_resume(conf->mddev);
+	if (!err) {
+		conf->scribble_disks = new_disks;
+		conf->scribble_sectors = new_sectors;
+	}
 	return err;
 }
 
@@ -2190,7 +2193,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 	cnt = 0;
 	list_for_each_entry(nsh, &newstripes, lru) {
 		lock_device_hash_lock(conf, hash);
-		wait_event_exclusive_cmd(conf->wait_for_stripe[hash],
+		wait_event_cmd(conf->wait_for_stripe,
 				    !list_empty(conf->inactive_list + hash),
 				    unlock_device_hash_lock(conf, hash),
 				    lock_device_hash_lock(conf, hash));
@@ -4233,10 +4236,9 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
 
 		list_del_init(&sh->batch_list);
 
-		WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
+		WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
 					  (1 << STRIPE_SYNCING) |
 					  (1 << STRIPE_REPLACED) |
-					  (1 << STRIPE_PREREAD_ACTIVE) |
 					  (1 << STRIPE_DELAYED) |
 					  (1 << STRIPE_BIT_DELAY) |
 					  (1 << STRIPE_FULL_WRITE) |
@@ -4246,11 +4248,14 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
 					  (1 << STRIPE_DISCARD) |
 					  (1 << STRIPE_BATCH_READY) |
 					  (1 << STRIPE_BATCH_ERR) |
-					  (1 << STRIPE_BITMAP_PENDING)));
-		WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
-					      (1 << STRIPE_REPLACED)));
+					  (1 << STRIPE_BITMAP_PENDING)),
+			"stripe state: %lx\n", sh->state);
+		WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
+					      (1 << STRIPE_REPLACED)),
+			"head stripe state: %lx\n", head_sh->state);
 
 		set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
+					    (1 << STRIPE_PREREAD_ACTIVE) |
 					    (1 << STRIPE_DEGRADED)),
 			      head_sh->state & (1 << STRIPE_INSYNC));
 
@@ -6376,6 +6381,8 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
 		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
 		free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
 		break;
 	default:
@@ -6413,6 +6420,12 @@ static int raid5_alloc_percpu(struct r5conf *conf)
 	}
 	put_online_cpus();
 
+	if (!err) {
+		conf->scribble_disks = max(conf->raid_disks,
+			conf->previous_raid_disks);
+		conf->scribble_sectors = max(conf->chunk_sectors,
+			conf->prev_chunk_sectors);
+	}
 	return err;
 }
 
@@ -6503,9 +6516,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 	seqcount_init(&conf->gen_lock);
 	mutex_init(&conf->cache_size_mutex);
 	init_waitqueue_head(&conf->wait_for_quiescent);
-	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
-		init_waitqueue_head(&conf->wait_for_stripe[i]);
-	}
+	init_waitqueue_head(&conf->wait_for_stripe);
 	init_waitqueue_head(&conf->wait_for_overlap);
 	INIT_LIST_HEAD(&conf->handle_list);
 	INIT_LIST_HEAD(&conf->hold_list);
@@ -7014,8 +7025,8 @@ static int raid5_run(struct mddev *mddev)
 		}
 
 		if (discard_supported &&
-		   mddev->queue->limits.max_discard_sectors >= stripe &&
-		   mddev->queue->limits.discard_granularity >= stripe)
+		    mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
+		    mddev->queue->limits.discard_granularity >= stripe)
 			queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
 						mddev->queue);
 		else
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index a415e1cd39b8..517d4b68a1be 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -510,6 +510,8 @@ struct r5conf {
 					      * conversions
 					      */
 	} __percpu *percpu;
+	int scribble_disks;
+	int scribble_sectors;
 #ifdef CONFIG_HOTPLUG_CPU
 	struct notifier_block	cpu_notify;
 #endif
@@ -522,7 +524,7 @@ struct r5conf {
 	atomic_t		empty_inactive_list_nr;
 	struct llist_head	released_stripes;
 	wait_queue_head_t	wait_for_quiescent;
-	wait_queue_head_t	wait_for_stripe[NR_STRIPE_HASH_LOCKS];
+	wait_queue_head_t	wait_for_stripe;
 	wait_queue_head_t	wait_for_overlap;
 	unsigned long		cache_state;
 #define R5_INACTIVE_BLOCKED	1	/* release of inactive stripes blocked,
diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c
index 24152accc66c..4769469fe842 100644
--- a/drivers/media/pci/ivtv/ivtv-udma.c
+++ b/drivers/media/pci/ivtv/ivtv-udma.c
@@ -124,8 +124,8 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
 	}
 
 	/* Get user pages for DMA Xfer */
-	err = get_user_pages_unlocked(current, current->mm,
-			user_dma.uaddr, user_dma.page_count, 0, 1, dma->map);
+	err = get_user_pages_unlocked(user_dma.uaddr, user_dma.page_count, 0,
+			1, dma->map);
 
 	if (user_dma.page_count != err) {
 		IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n",
diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c
index 2b8e7b2f2b86..b094054cda6e 100644
--- a/drivers/media/pci/ivtv/ivtv-yuv.c
+++ b/drivers/media/pci/ivtv/ivtv-yuv.c
@@ -75,14 +75,12 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
 	ivtv_udma_get_page_info (&uv_dma, (unsigned long)args->uv_source, 360 * uv_decode_height);
 
 	/* Get user pages for DMA Xfer */
-	y_pages = get_user_pages_unlocked(current, current->mm,
-				y_dma.uaddr, y_dma.page_count, 0, 1,
-				&dma->map[0]);
+	y_pages = get_user_pages_unlocked(y_dma.uaddr,
+			y_dma.page_count, 0, 1, &dma->map[0]);
 	uv_pages = 0; /* silence gcc. value is set and consumed only if: */
 	if (y_pages == y_dma.page_count) {
-		uv_pages = get_user_pages_unlocked(current, current->mm,
-					uv_dma.uaddr, uv_dma.page_count, 0, 1,
-					&dma->map[y_pages]);
+		uv_pages = get_user_pages_unlocked(uv_dma.uaddr,
+				uv_dma.page_count, 0, 1, &dma->map[y_pages]);
 	}
 
 	if (y_pages != y_dma.page_count || uv_pages != uv_dma.page_count) {
diff --git a/drivers/media/platform/vsp1/vsp1_dl.c b/drivers/media/platform/vsp1/vsp1_dl.c
index 7dc27ac6bd02..1a9a58588f84 100644
--- a/drivers/media/platform/vsp1/vsp1_dl.c
+++ b/drivers/media/platform/vsp1/vsp1_dl.c
@@ -278,7 +278,7 @@ struct vsp1_dl *vsp1_dl_create(struct vsp1_device *vsp1)
 	dl->vsp1 = vsp1;
 	dl->size = VSP1_DL_BODY_SIZE * ARRAY_SIZE(dl->lists.all);
 
-	dl->mem = dma_alloc_writecombine(vsp1->dev, dl->size, &dl->dma,
+	dl->mem = dma_alloc_wc(vsp1->dev, dl->size, &dl->dma,
 					 GFP_KERNEL);
 	if (!dl->mem) {
 		kfree(dl);
@@ -300,6 +300,6 @@ struct vsp1_dl *vsp1_dl_create(struct vsp1_device *vsp1)
 
 void vsp1_dl_destroy(struct vsp1_dl *dl)
 {
-	dma_free_writecombine(dl->vsp1->dev, dl->size, dl->mem, dl->dma);
+	dma_free_wc(dl->vsp1->dev, dl->size, dl->mem, dl->dma);
 	kfree(dl);
 }
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index f669cedca8bd..df4c052c6bd6 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -181,8 +181,7 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
 	dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n",
 		data, size, dma->nr_pages);
 
-	err = get_user_pages(current, current->mm,
-			     data & PAGE_MASK, dma->nr_pages,
+	err = get_user_pages(data & PAGE_MASK, dma->nr_pages,
 			     rw == READ, 1, /* force */
 			     dma->pages, NULL);
 
diff --git a/drivers/memory/Kconfig b/drivers/memory/Kconfig
index 6f3154613dc7..51d5cd20c26a 100644
--- a/drivers/memory/Kconfig
+++ b/drivers/memory/Kconfig
@@ -114,6 +114,14 @@ config JZ4780_NEMC
 	  the Ingenic JZ4780. This controller is used to handle external
 	  memory devices such as NAND and SRAM.
 
+config MTK_SMI
+	bool
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	help
+	  This driver is for the Memory Controller module in MediaTek SoCs,
+	  mainly help enable/disable iommu and control the power domain and
+	  clocks for each local arbiter.
+
 source "drivers/memory/tegra/Kconfig"
 
 endif
diff --git a/drivers/memory/Makefile b/drivers/memory/Makefile
index 1c46af501610..890bdf402449 100644
--- a/drivers/memory/Makefile
+++ b/drivers/memory/Makefile
@@ -15,5 +15,6 @@ obj-$(CONFIG_FSL_IFC)		+= fsl_ifc.o
 obj-$(CONFIG_MVEBU_DEVBUS)	+= mvebu-devbus.o
 obj-$(CONFIG_TEGRA20_MC)	+= tegra20-mc.o
 obj-$(CONFIG_JZ4780_NEMC)	+= jz4780-nemc.o
+obj-$(CONFIG_MTK_SMI)		+= mtk-smi.o
 
 obj-$(CONFIG_TEGRA_MC)		+= tegra/
diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c
index acd1460cf787..2a691da8c1c7 100644
--- a/drivers/memory/fsl_ifc.c
+++ b/drivers/memory/fsl_ifc.c
@@ -260,7 +260,7 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev)
 
 	/* get the Controller level irq */
 	fsl_ifc_ctrl_dev->irq = irq_of_parse_and_map(dev->dev.of_node, 0);
-	if (fsl_ifc_ctrl_dev->irq == NO_IRQ) {
+	if (fsl_ifc_ctrl_dev->irq == 0) {
 		dev_err(&dev->dev, "failed to get irq resource "
 							"for IFC\n");
 		ret = -ENODEV;
diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c
new file mode 100644
index 000000000000..089091f5f890
--- /dev/null
+++ b/drivers/memory/mtk-smi.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2015-2016 MediaTek Inc.
+ * Author: Yong Wu <yong.wu@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <soc/mediatek/smi.h>
+
+#define SMI_LARB_MMU_EN		0xf00
+
+struct mtk_smi {
+	struct device	*dev;
+	struct clk	*clk_apb, *clk_smi;
+};
+
+struct mtk_smi_larb { /* larb: local arbiter */
+	struct mtk_smi  smi;
+	void __iomem	*base;
+	struct device	*smi_common_dev;
+	u32		*mmu;
+};
+
+static int mtk_smi_enable(const struct mtk_smi *smi)
+{
+	int ret;
+
+	ret = pm_runtime_get_sync(smi->dev);
+	if (ret < 0)
+		return ret;
+
+	ret = clk_prepare_enable(smi->clk_apb);
+	if (ret)
+		goto err_put_pm;
+
+	ret = clk_prepare_enable(smi->clk_smi);
+	if (ret)
+		goto err_disable_apb;
+
+	return 0;
+
+err_disable_apb:
+	clk_disable_unprepare(smi->clk_apb);
+err_put_pm:
+	pm_runtime_put_sync(smi->dev);
+	return ret;
+}
+
+static void mtk_smi_disable(const struct mtk_smi *smi)
+{
+	clk_disable_unprepare(smi->clk_smi);
+	clk_disable_unprepare(smi->clk_apb);
+	pm_runtime_put_sync(smi->dev);
+}
+
+int mtk_smi_larb_get(struct device *larbdev)
+{
+	struct mtk_smi_larb *larb = dev_get_drvdata(larbdev);
+	struct mtk_smi *common = dev_get_drvdata(larb->smi_common_dev);
+	int ret;
+
+	/* Enable the smi-common's power and clocks */
+	ret = mtk_smi_enable(common);
+	if (ret)
+		return ret;
+
+	/* Enable the larb's power and clocks */
+	ret = mtk_smi_enable(&larb->smi);
+	if (ret) {
+		mtk_smi_disable(common);
+		return ret;
+	}
+
+	/* Configure the iommu info for this larb */
+	writel(*larb->mmu, larb->base + SMI_LARB_MMU_EN);
+
+	return 0;
+}
+
+void mtk_smi_larb_put(struct device *larbdev)
+{
+	struct mtk_smi_larb *larb = dev_get_drvdata(larbdev);
+	struct mtk_smi *common = dev_get_drvdata(larb->smi_common_dev);
+
+	/*
+	 * Don't de-configure the iommu info for this larb since there may be
+	 * several modules in this larb.
+	 * The iommu info will be reset after power off.
+	 */
+
+	mtk_smi_disable(&larb->smi);
+	mtk_smi_disable(common);
+}
+
+static int
+mtk_smi_larb_bind(struct device *dev, struct device *master, void *data)
+{
+	struct mtk_smi_larb *larb = dev_get_drvdata(dev);
+	struct mtk_smi_iommu *smi_iommu = data;
+	unsigned int         i;
+
+	for (i = 0; i < smi_iommu->larb_nr; i++) {
+		if (dev == smi_iommu->larb_imu[i].dev) {
+			/* The 'mmu' may be updated in iommu-attach/detach. */
+			larb->mmu = &smi_iommu->larb_imu[i].mmu;
+			return 0;
+		}
+	}
+	return -ENODEV;
+}
+
+static void
+mtk_smi_larb_unbind(struct device *dev, struct device *master, void *data)
+{
+	/* Do nothing as the iommu is always enabled. */
+}
+
+static const struct component_ops mtk_smi_larb_component_ops = {
+	.bind = mtk_smi_larb_bind,
+	.unbind = mtk_smi_larb_unbind,
+};
+
+static int mtk_smi_larb_probe(struct platform_device *pdev)
+{
+	struct mtk_smi_larb *larb;
+	struct resource *res;
+	struct device *dev = &pdev->dev;
+	struct device_node *smi_node;
+	struct platform_device *smi_pdev;
+
+	if (!dev->pm_domain)
+		return -EPROBE_DEFER;
+
+	larb = devm_kzalloc(dev, sizeof(*larb), GFP_KERNEL);
+	if (!larb)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	larb->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(larb->base))
+		return PTR_ERR(larb->base);
+
+	larb->smi.clk_apb = devm_clk_get(dev, "apb");
+	if (IS_ERR(larb->smi.clk_apb))
+		return PTR_ERR(larb->smi.clk_apb);
+
+	larb->smi.clk_smi = devm_clk_get(dev, "smi");
+	if (IS_ERR(larb->smi.clk_smi))
+		return PTR_ERR(larb->smi.clk_smi);
+	larb->smi.dev = dev;
+
+	smi_node = of_parse_phandle(dev->of_node, "mediatek,smi", 0);
+	if (!smi_node)
+		return -EINVAL;
+
+	smi_pdev = of_find_device_by_node(smi_node);
+	of_node_put(smi_node);
+	if (smi_pdev) {
+		larb->smi_common_dev = &smi_pdev->dev;
+	} else {
+		dev_err(dev, "Failed to get the smi_common device\n");
+		return -EINVAL;
+	}
+
+	pm_runtime_enable(dev);
+	platform_set_drvdata(pdev, larb);
+	return component_add(dev, &mtk_smi_larb_component_ops);
+}
+
+static int mtk_smi_larb_remove(struct platform_device *pdev)
+{
+	pm_runtime_disable(&pdev->dev);
+	component_del(&pdev->dev, &mtk_smi_larb_component_ops);
+	return 0;
+}
+
+static const struct of_device_id mtk_smi_larb_of_ids[] = {
+	{ .compatible = "mediatek,mt8173-smi-larb",},
+	{}
+};
+
+static struct platform_driver mtk_smi_larb_driver = {
+	.probe	= mtk_smi_larb_probe,
+	.remove = mtk_smi_larb_remove,
+	.driver	= {
+		.name = "mtk-smi-larb",
+		.of_match_table = mtk_smi_larb_of_ids,
+	}
+};
+
+static int mtk_smi_common_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mtk_smi *common;
+
+	if (!dev->pm_domain)
+		return -EPROBE_DEFER;
+
+	common = devm_kzalloc(dev, sizeof(*common), GFP_KERNEL);
+	if (!common)
+		return -ENOMEM;
+	common->dev = dev;
+
+	common->clk_apb = devm_clk_get(dev, "apb");
+	if (IS_ERR(common->clk_apb))
+		return PTR_ERR(common->clk_apb);
+
+	common->clk_smi = devm_clk_get(dev, "smi");
+	if (IS_ERR(common->clk_smi))
+		return PTR_ERR(common->clk_smi);
+
+	pm_runtime_enable(dev);
+	platform_set_drvdata(pdev, common);
+	return 0;
+}
+
+static int mtk_smi_common_remove(struct platform_device *pdev)
+{
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+static const struct of_device_id mtk_smi_common_of_ids[] = {
+	{ .compatible = "mediatek,mt8173-smi-common", },
+	{}
+};
+
+static struct platform_driver mtk_smi_common_driver = {
+	.probe	= mtk_smi_common_probe,
+	.remove = mtk_smi_common_remove,
+	.driver	= {
+		.name = "mtk-smi-common",
+		.of_match_table = mtk_smi_common_of_ids,
+	}
+};
+
+static int __init mtk_smi_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&mtk_smi_common_driver);
+	if (ret != 0) {
+		pr_err("Failed to register SMI driver\n");
+		return ret;
+	}
+
+	ret = platform_driver_register(&mtk_smi_larb_driver);
+	if (ret != 0) {
+		pr_err("Failed to register SMI-LARB driver\n");
+		goto err_unreg_smi;
+	}
+	return ret;
+
+err_unreg_smi:
+	platform_driver_unregister(&mtk_smi_common_driver);
+	return ret;
+}
+subsys_initcall(mtk_smi_init);
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index 6515dfc2b805..21825ddce4a3 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -541,9 +541,20 @@ static void gpmc_cs_show_timings(int cs, const char *desc)
 	GPMC_GET_TICKS(GPMC_CS_CONFIG3,  0,  3, "adv-on-ns");
 	GPMC_GET_TICKS(GPMC_CS_CONFIG3,  8, 12, "adv-rd-off-ns");
 	GPMC_GET_TICKS(GPMC_CS_CONFIG3, 16, 20, "adv-wr-off-ns");
+	if (gpmc_capability & GPMC_HAS_MUX_AAD) {
+		GPMC_GET_TICKS(GPMC_CS_CONFIG3, 4, 6, "adv-aad-mux-on-ns");
+		GPMC_GET_TICKS(GPMC_CS_CONFIG3, 24, 26,
+				"adv-aad-mux-rd-off-ns");
+		GPMC_GET_TICKS(GPMC_CS_CONFIG3, 28, 30,
+				"adv-aad-mux-wr-off-ns");
+	}
 
 	GPMC_GET_TICKS(GPMC_CS_CONFIG4,  0,  3, "oe-on-ns");
 	GPMC_GET_TICKS(GPMC_CS_CONFIG4,  8, 12, "oe-off-ns");
+	if (gpmc_capability & GPMC_HAS_MUX_AAD) {
+		GPMC_GET_TICKS(GPMC_CS_CONFIG4,  4,  6, "oe-aad-mux-on-ns");
+		GPMC_GET_TICKS(GPMC_CS_CONFIG4, 13, 15, "oe-aad-mux-off-ns");
+	}
 	GPMC_GET_TICKS(GPMC_CS_CONFIG4, 16, 19, "we-on-ns");
 	GPMC_GET_TICKS(GPMC_CS_CONFIG4, 24, 28, "we-off-ns");
 
@@ -734,9 +745,18 @@ int gpmc_cs_set_timings(int cs, const struct gpmc_timings *t,
 	GPMC_SET_ONE(GPMC_CS_CONFIG3,  0,  3, adv_on);
 	GPMC_SET_ONE(GPMC_CS_CONFIG3,  8, 12, adv_rd_off);
 	GPMC_SET_ONE(GPMC_CS_CONFIG3, 16, 20, adv_wr_off);
+	if (gpmc_capability & GPMC_HAS_MUX_AAD) {
+		GPMC_SET_ONE(GPMC_CS_CONFIG3,  4,  6, adv_aad_mux_on);
+		GPMC_SET_ONE(GPMC_CS_CONFIG3, 24, 26, adv_aad_mux_rd_off);
+		GPMC_SET_ONE(GPMC_CS_CONFIG3, 28, 30, adv_aad_mux_wr_off);
+	}
 
 	GPMC_SET_ONE(GPMC_CS_CONFIG4,  0,  3, oe_on);
 	GPMC_SET_ONE(GPMC_CS_CONFIG4,  8, 12, oe_off);
+	if (gpmc_capability & GPMC_HAS_MUX_AAD) {
+		GPMC_SET_ONE(GPMC_CS_CONFIG4,  4,  6, oe_aad_mux_on);
+		GPMC_SET_ONE(GPMC_CS_CONFIG4, 13, 15, oe_aad_mux_off);
+	}
 	GPMC_SET_ONE(GPMC_CS_CONFIG4, 16, 19, we_on);
 	GPMC_SET_ONE(GPMC_CS_CONFIG4, 24, 28, we_off);
 
@@ -1722,6 +1742,12 @@ static void __maybe_unused gpmc_read_timings_dt(struct device_node *np,
 	of_property_read_u32(np, "gpmc,adv-on-ns", &gpmc_t->adv_on);
 	of_property_read_u32(np, "gpmc,adv-rd-off-ns", &gpmc_t->adv_rd_off);
 	of_property_read_u32(np, "gpmc,adv-wr-off-ns", &gpmc_t->adv_wr_off);
+	of_property_read_u32(np, "gpmc,adv-aad-mux-on-ns",
+			     &gpmc_t->adv_aad_mux_on);
+	of_property_read_u32(np, "gpmc,adv-aad-mux-rd-off-ns",
+			     &gpmc_t->adv_aad_mux_rd_off);
+	of_property_read_u32(np, "gpmc,adv-aad-mux-wr-off-ns",
+			     &gpmc_t->adv_aad_mux_wr_off);
 
 	/* WE signal timings */
 	of_property_read_u32(np, "gpmc,we-on-ns", &gpmc_t->we_on);
@@ -1730,6 +1756,10 @@ static void __maybe_unused gpmc_read_timings_dt(struct device_node *np,
 	/* OE signal timings */
 	of_property_read_u32(np, "gpmc,oe-on-ns", &gpmc_t->oe_on);
 	of_property_read_u32(np, "gpmc,oe-off-ns", &gpmc_t->oe_off);
+	of_property_read_u32(np, "gpmc,oe-aad-mux-on-ns",
+			     &gpmc_t->oe_aad_mux_on);
+	of_property_read_u32(np, "gpmc,oe-aad-mux-off-ns",
+			     &gpmc_t->oe_aad_mux_off);
 
 	/* access and cycle timings */
 	of_property_read_u32(np, "gpmc,page-burst-access-ns",
diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c
index ef09ba0289d7..d5cfb503b9d6 100644
--- a/drivers/memstick/host/r592.c
+++ b/drivers/memstick/host/r592.c
@@ -298,8 +298,7 @@ static int r592_transfer_fifo_dma(struct r592_device *dev)
 	sg_count = dma_map_sg(&dev->pci_dev->dev, &dev->req->sg, 1, is_write ?
 		PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
 
-	if (sg_count != 1 ||
-			(sg_dma_len(&dev->req->sg) < dev->req->sg.length)) {
+	if (sg_count != 1 || sg_dma_len(&dev->req->sg) < R592_LFIFO_SIZE) {
 		message("problem in dma_map_sg");
 		return -EIO;
 	}
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index 6a451bd65bf3..e0203b1a20fd 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -1394,8 +1394,6 @@ retry:
 		}
 
 		pinned_pages->nr_pages = get_user_pages(
-				current,
-				mm,
 				(u64)addr,
 				nr_pages,
 				!!(prot & SCIF_PROT_WRITE),
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index f74fc0ca2ef9..a2d97b9b17e3 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -198,8 +198,7 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
 #else
 	*pageshift = PAGE_SHIFT;
 #endif
-	if (get_user_pages
-	    (current, current->mm, vaddr, 1, write, 0, &page, NULL) <= 0)
+	if (get_user_pages(vaddr, 1, write, 0, &page, NULL) <= 0)
 		return -EFAULT;
 	*paddr = page_to_phys(page);
 	put_page(page);
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index fe207e542032..3bdbe50a363f 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -589,6 +589,14 @@ static int mmc_blk_ioctl_cmd(struct block_device *bdev,
 	struct mmc_card *card;
 	int err = 0, ioc_err = 0;
 
+	/*
+	 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
+	 * whole block device, not on a partition.  This prevents overspray
+	 * between sibling partitions.
+	 */
+	if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
+		return -EPERM;
+
 	idata = mmc_blk_ioctl_copy_from_user(ic_ptr);
 	if (IS_ERR(idata))
 		return PTR_ERR(idata);
@@ -631,6 +639,14 @@ static int mmc_blk_ioctl_multi_cmd(struct block_device *bdev,
 	int i, err = 0, ioc_err = 0;
 	__u64 num_of_cmds;
 
+	/*
+	 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
+	 * whole block device, not on a partition.  This prevents overspray
+	 * between sibling partitions.
+	 */
+	if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
+		return -EPERM;
+
 	if (copy_from_user(&num_of_cmds, &user->num_of_cmds,
 			   sizeof(num_of_cmds)))
 		return -EFAULT;
@@ -688,14 +704,6 @@ cmd_err:
 static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode,
 	unsigned int cmd, unsigned long arg)
 {
-	/*
-	 * The caller must have CAP_SYS_RAWIO, and must be calling this on the
-	 * whole block device, not on a partition.  This prevents overspray
-	 * between sibling partitions.
-	 */
-	if ((!capable(CAP_SYS_RAWIO)) || (bdev != bdev->bd_contains))
-		return -EPERM;
-
 	switch (cmd) {
 	case MMC_IOC_CMD:
 		return mmc_blk_ioctl_cmd(bdev,
@@ -1362,8 +1370,8 @@ static int mmc_blk_err_check(struct mmc_card *card,
 
 	if (brq->data.error) {
 		if (need_retune && !brq->retune_retry_done) {
-			pr_info("%s: retrying because a re-tune was needed\n",
-				req->rq_disk->disk_name);
+			pr_debug("%s: retrying because a re-tune was needed\n",
+				 req->rq_disk->disk_name);
 			brq->retune_retry_done = 1;
 			return MMC_BLK_RETRY;
 		}
@@ -1524,13 +1532,13 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 	}
 	if (rq_data_dir(req) == READ) {
 		brq->cmd.opcode = readcmd;
-		brq->data.flags |= MMC_DATA_READ;
+		brq->data.flags = MMC_DATA_READ;
 		if (brq->mrq.stop)
 			brq->stop.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 |
 					MMC_CMD_AC;
 	} else {
 		brq->cmd.opcode = writecmd;
-		brq->data.flags |= MMC_DATA_WRITE;
+		brq->data.flags = MMC_DATA_WRITE;
 		if (brq->mrq.stop)
 			brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B |
 					MMC_CMD_AC;
@@ -1799,7 +1807,7 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
 
 	brq->data.blksz = 512;
 	brq->data.blocks = packed->blocks + hdr_blocks;
-	brq->data.flags |= MMC_DATA_WRITE;
+	brq->data.flags = MMC_DATA_WRITE;
 
 	brq->stop.opcode = MMC_STOP_TRANSMISSION;
 	brq->stop.arg = 0;
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index 7fc9174d4619..c032eef45762 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -2829,6 +2829,7 @@ static int mtf_testlist_show(struct seq_file *sf, void *data)
 
 	mutex_lock(&mmc_test_lock);
 
+	seq_printf(sf, "0:\tRun all tests\n");
 	for (i = 0; i < ARRAY_SIZE(mmc_test_cases); i++)
 		seq_printf(sf, "%d:\t%s\n", i+1, mmc_test_cases[i].name);
 
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index f95d41ffc766..41b1e761965f 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1033,7 +1033,7 @@ static inline void mmc_set_ios(struct mmc_host *host)
 		"width %u timing %u\n",
 		 mmc_hostname(host), ios->clock, ios->bus_mode,
 		 ios->power_mode, ios->chip_select, ios->vdd,
-		 ios->bus_width, ios->timing);
+		 1 << ios->bus_width, ios->timing);
 
 	host->ops->set_ios(host, ios);
 }
@@ -1079,7 +1079,8 @@ int mmc_execute_tuning(struct mmc_card *card)
 	err = host->ops->execute_tuning(host, opcode);
 
 	if (err)
-		pr_err("%s: tuning execution failed\n", mmc_hostname(host));
+		pr_err("%s: tuning execution failed: %d\n",
+			mmc_hostname(host), err);
 	else
 		mmc_retune_enable(host);
 
@@ -1204,8 +1205,9 @@ EXPORT_SYMBOL(mmc_vddrange_to_ocrmask);
  * @np: The device node need to be parsed.
  * @mask: mask of voltages available for MMC/SD/SDIO
  *
- * 1. Return zero on success.
- * 2. Return negative errno: voltage-range is invalid.
+ * Parse the "voltage-ranges" DT property, returning zero if it is not
+ * found, negative errno if the voltage-range specification is invalid,
+ * or one if the voltage-range is specified and successfully parsed.
  */
 int mmc_of_parse_voltage(struct device_node *np, u32 *mask)
 {
@@ -1214,8 +1216,12 @@ int mmc_of_parse_voltage(struct device_node *np, u32 *mask)
 
 	voltage_ranges = of_get_property(np, "voltage-ranges", &num_ranges);
 	num_ranges = num_ranges / sizeof(*voltage_ranges) / 2;
-	if (!voltage_ranges || !num_ranges) {
-		pr_info("%s: voltage-ranges unspecified\n", np->full_name);
+	if (!voltage_ranges) {
+		pr_debug("%s: voltage-ranges unspecified\n", np->full_name);
+		return 0;
+	}
+	if (!num_ranges) {
+		pr_err("%s: voltage-ranges empty\n", np->full_name);
 		return -EINVAL;
 	}
 
@@ -1234,7 +1240,7 @@ int mmc_of_parse_voltage(struct device_node *np, u32 *mask)
 		*mask |= ocr_mask;
 	}
 
-	return 0;
+	return 1;
 }
 EXPORT_SYMBOL(mmc_of_parse_voltage);
 
@@ -2532,7 +2538,7 @@ int mmc_detect_card_removed(struct mmc_host *host)
 	if (!card)
 		return 1;
 
-	if (host->caps & MMC_CAP_NONREMOVABLE)
+	if (!mmc_card_is_removable(host))
 		return 0;
 
 	ret = mmc_card_removed(card);
@@ -2570,7 +2576,7 @@ void mmc_rescan(struct work_struct *work)
 		return;
 
 	/* If there is a non-removable card registered, only scan once */
-	if ((host->caps & MMC_CAP_NONREMOVABLE) && host->rescan_entered)
+	if (!mmc_card_is_removable(host) && host->rescan_entered)
 		return;
 	host->rescan_entered = 1;
 
@@ -2587,8 +2593,7 @@ void mmc_rescan(struct work_struct *work)
 	 * if there is a _removable_ card registered, check whether it is
 	 * still present
 	 */
-	if (host->bus_ops && !host->bus_dead
-	    && !(host->caps & MMC_CAP_NONREMOVABLE))
+	if (host->bus_ops && !host->bus_dead && mmc_card_is_removable(host))
 		host->bus_ops->detect(host);
 
 	host->detect_change = 0;
@@ -2613,7 +2618,7 @@ void mmc_rescan(struct work_struct *work)
 	mmc_bus_put(host);
 
 	mmc_claim_host(host);
-	if (!(host->caps & MMC_CAP_NONREMOVABLE) && host->ops->get_cd &&
+	if (mmc_card_is_removable(host) && host->ops->get_cd &&
 			host->ops->get_cd(host) == 0) {
 		mmc_power_off(host);
 		mmc_release_host(host);
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 65cc0ac9b82d..9382a57a5aa4 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -220,7 +220,7 @@ static int mmc_clock_opt_set(void *data, u64 val)
 	struct mmc_host *host = data;
 
 	/* We need this check due to input value is u64 */
-	if (val > host->f_max)
+	if (val != 0 && (val > host->f_max || val < host->f_min))
 		return -EINVAL;
 
 	mmc_claim_host(host);
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 0aecd5c00b86..1d94607611d8 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -339,6 +339,7 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 	host->class_dev.parent = dev;
 	host->class_dev.class = &mmc_host_class;
 	device_initialize(&host->class_dev);
+	device_enable_async_suspend(&host->class_dev);
 
 	if (mmc_gpio_alloc(host)) {
 		put_device(&host->class_dev);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index bf49e44571f2..4dbe3df8024b 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -501,7 +501,7 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
 			card->ext_csd.raw_bkops_status =
 				ext_csd[EXT_CSD_BKOPS_STATUS];
 			if (!card->ext_csd.man_bkops_en)
-				pr_info("%s: MAN_BKOPS_EN bit is not set\n",
+				pr_debug("%s: MAN_BKOPS_EN bit is not set\n",
 					mmc_hostname(card->host));
 		}
 
@@ -945,7 +945,7 @@ static int mmc_select_bus_width(struct mmc_card *card)
 			break;
 		} else {
 			pr_warn("%s: switch to bus width %d failed\n",
-				mmc_hostname(host), ext_csd_bits[idx]);
+				mmc_hostname(host), 1 << bus_width);
 		}
 	}
 
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 2c90635c89af..62355bda608f 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -90,7 +90,6 @@ int mmc_send_status(struct mmc_card *card, u32 *status)
 
 static int _mmc_select_card(struct mmc_host *host, struct mmc_card *card)
 {
-	int err;
 	struct mmc_command cmd = {0};
 
 	BUG_ON(!host);
@@ -105,11 +104,7 @@ static int _mmc_select_card(struct mmc_host *host, struct mmc_card *card)
 		cmd.flags = MMC_RSP_NONE | MMC_CMD_AC;
 	}
 
-	err = mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES);
-	if (err)
-		return err;
-
-	return 0;
+	return mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES);
 }
 
 int mmc_select_card(struct mmc_card *card)
@@ -244,7 +239,6 @@ int mmc_all_send_cid(struct mmc_host *host, u32 *cid)
 
 int mmc_set_relative_addr(struct mmc_card *card)
 {
-	int err;
 	struct mmc_command cmd = {0};
 
 	BUG_ON(!card);
@@ -254,11 +248,7 @@ int mmc_set_relative_addr(struct mmc_card *card)
 	cmd.arg = card->rca << 16;
 	cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 
-	err = mmc_wait_for_cmd(card->host, &cmd, MMC_CMD_RETRIES);
-	if (err)
-		return err;
-
-	return 0;
+	return mmc_wait_for_cmd(card->host, &cmd, MMC_CMD_RETRIES);
 }
 
 static int
@@ -743,7 +733,7 @@ mmc_send_bus_test(struct mmc_card *card, struct mmc_host *host, u8 opcode,
 
 int mmc_bus_test(struct mmc_card *card, u8 bus_width)
 {
-	int err, width;
+	int width;
 
 	if (bus_width == MMC_BUS_WIDTH_8)
 		width = 8;
@@ -759,8 +749,7 @@ int mmc_bus_test(struct mmc_card *card, u8 bus_width)
 	 * is a problem.  This improves chances that the test will work.
 	 */
 	mmc_send_bus_test(card, card->host, MMC_BUS_TEST_W, width);
-	err = mmc_send_bus_test(card, card->host, MMC_BUS_TEST_R, width);
-	return err;
+	return mmc_send_bus_test(card, card->host, MMC_BUS_TEST_R, width);
 }
 
 int mmc_send_hpi_cmd(struct mmc_card *card, u32 *status)
diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index aba786daebca..bc173e18b71c 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -12,7 +12,6 @@
 #include <linux/slab.h>
 #include <linux/device.h>
 #include <linux/err.h>
-#include <linux/of_gpio.h>
 #include <linux/gpio/consumer.h>
 
 #include <linux/mmc/host.h>
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index bb39a29b2db6..b95bd24d92f4 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -74,8 +74,6 @@ void mmc_decode_cid(struct mmc_card *card)
 {
 	u32 *resp = card->raw_cid;
 
-	memset(&card->cid, 0, sizeof(struct mmc_cid));
-
 	/*
 	 * SD doesn't currently have a version field so we will
 	 * have to assume we can parse this.
diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c
index 48d0c93ba25a..16b774c18e75 100644
--- a/drivers/mmc/core/sd_ops.c
+++ b/drivers/mmc/core/sd_ops.c
@@ -120,7 +120,6 @@ EXPORT_SYMBOL(mmc_wait_for_app_cmd);
 
 int mmc_app_set_bus_width(struct mmc_card *card, int width)
 {
-	int err;
 	struct mmc_command cmd = {0};
 
 	BUG_ON(!card);
@@ -140,11 +139,7 @@ int mmc_app_set_bus_width(struct mmc_card *card, int width)
 		return -EINVAL;
 	}
 
-	err = mmc_wait_for_app_cmd(card->host, card, &cmd, MMC_CMD_RETRIES);
-	if (err)
-		return err;
-
-	return 0;
+	return mmc_wait_for_app_cmd(card->host, card, &cmd, MMC_CMD_RETRIES);
 }
 
 int mmc_send_app_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr)
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 467b3cf80c44..bd44ba8116d1 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -106,8 +106,6 @@ static int sdio_read_cccr(struct mmc_card *card, u32 ocr)
 	unsigned char data;
 	unsigned char speed;
 
-	memset(&card->cccr, 0, sizeof(struct sdio_cccr));
-
 	ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_CCCR, 0, &data);
 	if (ret)
 		goto out;
diff --git a/drivers/mmc/core/sdio_ops.c b/drivers/mmc/core/sdio_ops.c
index 62508b457c4f..34f6e8015306 100644
--- a/drivers/mmc/core/sdio_ops.c
+++ b/drivers/mmc/core/sdio_ops.c
@@ -217,7 +217,6 @@ int sdio_reset(struct mmc_host *host)
 	else
 		abort |= 0x08;
 
-	ret = mmc_io_rw_direct_host(host, 1, 0, SDIO_CCCR_ABORT, abort, NULL);
-	return ret;
+	return mmc_io_rw_direct_host(host, 1, 0, SDIO_CCCR_ABORT, abort, NULL);
 }
 
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 1526b8a10b09..04feea8354cb 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -318,15 +318,15 @@ config MMC_SDHCI_F_SDH30
 	  If unsure, say N.
 
 config MMC_SDHCI_IPROC
-	tristate "SDHCI platform support for the iProc SD/MMC Controller"
-	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	tristate "SDHCI support for the BCM2835 & iProc SD/MMC Controller"
+	depends on ARCH_BCM2835 || ARCH_BCM_IPROC || COMPILE_TEST
 	depends on MMC_SDHCI_PLTFM
 	default ARCH_BCM_IPROC
 	select MMC_SDHCI_IO_ACCESSORS
 	help
 	  This selects the iProc SD/MMC controller.
 
-	  If you have an IPROC platform with SD or MMC devices,
+	  If you have a BCM2835 or IPROC platform with SD or MMC devices,
 	  say Y or M here.
 
 	  If unsure, say N.
@@ -560,8 +560,8 @@ config MMC_TMIO
 
 config MMC_SDHI
 	tristate "SH-Mobile SDHI SD/SDIO controller support"
-	depends on SUPERH || ARM
-	depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST
+	depends on SUPERH || ARM || ARM64
+	depends on SUPERH || ARCH_RENESAS || COMPILE_TEST
 	select MMC_TMIO_CORE
 	help
 	  This provides support for the SDHI SD/SDIO controller found in
@@ -673,8 +673,8 @@ config MMC_DW_ROCKCHIP
 
 config MMC_SH_MMCIF
 	tristate "SuperH Internal MMCIF support"
-	depends on MMC_BLOCK && HAS_DMA
-	depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST
+	depends on HAS_DMA
+	depends on SUPERH || ARCH_RENESAS || COMPILE_TEST
 	help
 	  This selects the MMC Host Interface controller (MMCIF).
 
@@ -786,3 +786,14 @@ config MMC_MTK
 	  If you have a machine with a integrated SD/MMC card reader, say Y or M here.
 	  This is needed if support for any SD/SDIO/MMC devices is required.
 	  If unsure, say N.
+
+config MMC_SDHCI_MICROCHIP_PIC32
+        tristate "Microchip PIC32MZDA SDHCI support"
+        depends on MMC_SDHCI && PIC32MZDA && MMC_SDHCI_PLTFM
+        help
+          This selects the Secure Digital Host Controller Interface (SDHCI)
+          for PIC32MZDA platform.
+
+          If you have a controller with this interface, say Y or M here.
+
+          If unsure, say N.
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 3595f83e89dd..af918d261ff9 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -75,6 +75,7 @@ obj-$(CONFIG_MMC_SDHCI_BCM2835)		+= sdhci-bcm2835.o
 obj-$(CONFIG_MMC_SDHCI_IPROC)		+= sdhci-iproc.o
 obj-$(CONFIG_MMC_SDHCI_MSM)		+= sdhci-msm.o
 obj-$(CONFIG_MMC_SDHCI_ST)		+= sdhci-st.o
+obj-$(CONFIG_MMC_SDHCI_MICROCHIP_PIC32)	+= sdhci-pic32.o
 
 ifeq ($(CONFIG_CB710_DEBUG),y)
 	CFLAGS-cb710-mmc	+= -DDEBUG
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 851ccd9ac868..9268c41a8561 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -848,9 +848,7 @@ static u32 atmci_prepare_command(struct mmc_host *mmc,
 		if (cmd->opcode == SD_IO_RW_EXTENDED) {
 			cmdr |= ATMCI_CMDR_SDIO_BLOCK;
 		} else {
-			if (data->flags & MMC_DATA_STREAM)
-				cmdr |= ATMCI_CMDR_STREAM;
-			else if (data->blocks > 1)
+			if (data->blocks > 1)
 				cmdr |= ATMCI_CMDR_MULTI_BLOCK;
 			else
 				cmdr |= ATMCI_CMDR_BLOCK;
@@ -1371,10 +1369,7 @@ static void atmci_start_request(struct atmel_mci *host,
 		host->stop_cmdr |= ATMCI_CMDR_STOP_XFER;
 		if (!(data->flags & MMC_DATA_WRITE))
 			host->stop_cmdr |= ATMCI_CMDR_TRDIR_READ;
-		if (data->flags & MMC_DATA_STREAM)
-			host->stop_cmdr |= ATMCI_CMDR_STREAM;
-		else
-			host->stop_cmdr |= ATMCI_CMDR_MULTI_BLOCK;
+		host->stop_cmdr |= ATMCI_CMDR_MULTI_BLOCK;
 	}
 
 	/*
@@ -2443,7 +2438,7 @@ static int atmci_configure_dma(struct atmel_mci *host)
 		struct mci_platform_data *pdata = host->pdev->dev.platform_data;
 		dma_cap_mask_t mask;
 
-		if (!pdata->dma_filter)
+		if (!pdata || !pdata->dma_filter)
 			return -ENODEV;
 
 		dma_cap_zero(mask);
diff --git a/drivers/mmc/host/bfin_sdh.c b/drivers/mmc/host/bfin_sdh.c
index 2b7f37e82ca9..526231e38583 100644
--- a/drivers/mmc/host/bfin_sdh.c
+++ b/drivers/mmc/host/bfin_sdh.c
@@ -126,9 +126,6 @@ static int sdh_setup_data(struct sdh_host *host, struct mmc_data *data)
 	length = data->blksz * data->blocks;
 	bfin_write_SDH_DATA_LGTH(length);
 
-	if (data->flags & MMC_DATA_STREAM)
-		data_ctl |= DTX_MODE;
-
 	if (data->flags & MMC_DATA_READ)
 		data_ctl |= DTX_DIR;
 	/* Only supports power-of-2 block size */
diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index ea2a2ebc6b91..693144e7427b 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -346,10 +346,6 @@ static void mmc_davinci_start_command(struct mmc_davinci_host *host,
 	if (cmd->data)
 		cmd_reg |= MMCCMD_WDATX;
 
-	/* Setting whether stream or block transfer */
-	if (cmd->flags & MMC_DATA_STREAM)
-		cmd_reg |= MMCCMD_STRMTP;
-
 	/* Setting whether data read or write */
 	if (host->data_dir == DAVINCI_MMC_DATADIR_WRITE)
 		cmd_reg |= MMCCMD_DTRW;
@@ -568,8 +564,7 @@ mmc_davinci_prepare_data(struct mmc_davinci_host *host, struct mmc_request *req)
 		return;
 	}
 
-	dev_dbg(mmc_dev(host->mmc), "%s %s, %d blocks of %d bytes\n",
-		(data->flags & MMC_DATA_STREAM) ? "stream" : "block",
+	dev_dbg(mmc_dev(host->mmc), "%s, %d blocks of %d bytes\n",
 		(data->flags & MMC_DATA_WRITE) ? "write" : "read",
 		data->blocks, data->blksz);
 	dev_dbg(mmc_dev(host->mmc), "  DTO %d cycles + %d ns\n",
@@ -584,22 +579,18 @@ mmc_davinci_prepare_data(struct mmc_davinci_host *host, struct mmc_request *req)
 	writel(data->blksz, host->base + DAVINCI_MMCBLEN);
 
 	/* Configure the FIFO */
-	switch (data->flags & MMC_DATA_WRITE) {
-	case MMC_DATA_WRITE:
+	if (data->flags & MMC_DATA_WRITE) {
 		host->data_dir = DAVINCI_MMC_DATADIR_WRITE;
 		writel(fifo_lev | MMCFIFOCTL_FIFODIR_WR | MMCFIFOCTL_FIFORST,
 			host->base + DAVINCI_MMCFIFOCTL);
 		writel(fifo_lev | MMCFIFOCTL_FIFODIR_WR,
 			host->base + DAVINCI_MMCFIFOCTL);
-		break;
-
-	default:
+	} else {
 		host->data_dir = DAVINCI_MMC_DATADIR_READ;
 		writel(fifo_lev | MMCFIFOCTL_FIFODIR_RD | MMCFIFOCTL_FIFORST,
 			host->base + DAVINCI_MMCFIFOCTL);
 		writel(fifo_lev | MMCFIFOCTL_FIFODIR_RD,
 			host->base + DAVINCI_MMCFIFOCTL);
-		break;
 	}
 
 	host->buffer = NULL;
diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c
index 3a7e835a0033..8790f2afc057 100644
--- a/drivers/mmc/host/dw_mmc-exynos.c
+++ b/drivers/mmc/host/dw_mmc-exynos.c
@@ -145,6 +145,16 @@ static void dw_mci_exynos_set_clksel_timing(struct dw_mci *host, u32 timing)
 		mci_writel(host, CLKSEL64, clksel);
 	else
 		mci_writel(host, CLKSEL, clksel);
+
+	/*
+	 * Exynos4412 and Exynos5250 extends the use of CMD register with the
+	 * use of bit 29 (which is reserved on standard MSHC controllers) for
+	 * optionally bypassing the HOLD register for command and data. The
+	 * HOLD register should be bypassed in case there is no phase shift
+	 * applied on CMD/DATA that is sent to the card.
+	 */
+	if (!SDMMC_CLKSEL_GET_DRV_WD3(clksel))
+		set_bit(DW_MMC_CARD_NO_USE_HOLD, &host->cur_slot->flags);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -202,26 +212,6 @@ static int dw_mci_exynos_resume_noirq(struct device *dev)
 #define dw_mci_exynos_resume_noirq	NULL
 #endif /* CONFIG_PM_SLEEP */
 
-static void dw_mci_exynos_prepare_command(struct dw_mci *host, u32 *cmdr)
-{
-	struct dw_mci_exynos_priv_data *priv = host->priv;
-	/*
-	 * Exynos4412 and Exynos5250 extends the use of CMD register with the
-	 * use of bit 29 (which is reserved on standard MSHC controllers) for
-	 * optionally bypassing the HOLD register for command and data. The
-	 * HOLD register should be bypassed in case there is no phase shift
-	 * applied on CMD/DATA that is sent to the card.
-	 */
-	if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
-		priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU) {
-		if (SDMMC_CLKSEL_GET_DRV_WD3(mci_readl(host, CLKSEL64)))
-			*cmdr |= SDMMC_CMD_USE_HOLD_REG;
-	 } else {
-		if (SDMMC_CLKSEL_GET_DRV_WD3(mci_readl(host, CLKSEL)))
-			*cmdr |= SDMMC_CMD_USE_HOLD_REG;
-	}
-}
-
 static void dw_mci_exynos_config_hs400(struct dw_mci *host, u32 timing)
 {
 	struct dw_mci_exynos_priv_data *priv = host->priv;
@@ -500,7 +490,6 @@ static const struct dw_mci_drv_data exynos_drv_data = {
 	.caps			= exynos_dwmmc_caps,
 	.init			= dw_mci_exynos_priv_init,
 	.setup_clock		= dw_mci_exynos_setup_clock,
-	.prepare_command	= dw_mci_exynos_prepare_command,
 	.set_ios		= dw_mci_exynos_set_ios,
 	.parse_dt		= dw_mci_exynos_parse_dt,
 	.execute_tuning		= dw_mci_exynos_execute_tuning,
diff --git a/drivers/mmc/host/dw_mmc-pltfm.c b/drivers/mmc/host/dw_mmc-pltfm.c
index 81bdeeb05a4d..c0bb0c793e84 100644
--- a/drivers/mmc/host/dw_mmc-pltfm.c
+++ b/drivers/mmc/host/dw_mmc-pltfm.c
@@ -26,19 +26,6 @@
 #include "dw_mmc.h"
 #include "dw_mmc-pltfm.h"
 
-static void dw_mci_pltfm_prepare_command(struct dw_mci *host, u32 *cmdr)
-{
-	*cmdr |= SDMMC_CMD_USE_HOLD_REG;
-}
-
-static const struct dw_mci_drv_data socfpga_drv_data = {
-	.prepare_command	= dw_mci_pltfm_prepare_command,
-};
-
-static const struct dw_mci_drv_data pistachio_drv_data = {
-	.prepare_command	= dw_mci_pltfm_prepare_command,
-};
-
 int dw_mci_pltfm_register(struct platform_device *pdev,
 			  const struct dw_mci_drv_data *drv_data)
 {
@@ -94,10 +81,8 @@ EXPORT_SYMBOL_GPL(dw_mci_pltfm_pmops);
 
 static const struct of_device_id dw_mci_pltfm_match[] = {
 	{ .compatible = "snps,dw-mshc", },
-	{ .compatible = "altr,socfpga-dw-mshc",
-		.data = &socfpga_drv_data },
-	{ .compatible = "img,pistachio-dw-mshc",
-		.data = &pistachio_drv_data },
+	{ .compatible = "altr,socfpga-dw-mshc", },
+	{ .compatible = "img,pistachio-dw-mshc", },
 	{},
 };
 MODULE_DEVICE_TABLE(of, dw_mci_pltfm_match);
diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index d9c92f31da64..84e50f3a64b6 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -26,11 +26,6 @@ struct dw_mci_rockchip_priv_data {
 	int			default_sample_phase;
 };
 
-static void dw_mci_rockchip_prepare_command(struct dw_mci *host, u32 *cmdr)
-{
-	*cmdr |= SDMMC_CMD_USE_HOLD_REG;
-}
-
 static int dw_mci_rk3288_setup_clock(struct dw_mci *host)
 {
 	host->bus_hz /= RK3288_CLKGEN_DIV;
@@ -240,12 +235,10 @@ static int dw_mci_rockchip_init(struct dw_mci *host)
 }
 
 static const struct dw_mci_drv_data rk2928_drv_data = {
-	.prepare_command        = dw_mci_rockchip_prepare_command,
 	.init			= dw_mci_rockchip_init,
 };
 
 static const struct dw_mci_drv_data rk3288_drv_data = {
-	.prepare_command        = dw_mci_rockchip_prepare_command,
 	.set_ios		= dw_mci_rk3288_set_ios,
 	.execute_tuning		= dw_mci_rk3288_execute_tuning,
 	.parse_dt		= dw_mci_rk3288_parse_dt,
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 712835177e8b..242f9a0769bd 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -234,7 +234,6 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 	struct mmc_data	*data;
 	struct dw_mci_slot *slot = mmc_priv(mmc);
 	struct dw_mci *host = slot->host;
-	const struct dw_mci_drv_data *drv_data = slot->host->drv_data;
 	u32 cmdr;
 
 	cmd->error = -EINPROGRESS;
@@ -290,14 +289,12 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 	data = cmd->data;
 	if (data) {
 		cmdr |= SDMMC_CMD_DAT_EXP;
-		if (data->flags & MMC_DATA_STREAM)
-			cmdr |= SDMMC_CMD_STRM_MODE;
 		if (data->flags & MMC_DATA_WRITE)
 			cmdr |= SDMMC_CMD_DAT_WR;
 	}
 
-	if (drv_data && drv_data->prepare_command)
-		drv_data->prepare_command(slot->host, &cmdr);
+	if (!test_bit(DW_MMC_CARD_NO_USE_HOLD, &slot->flags))
+		cmdr |= SDMMC_CMD_USE_HOLD_REG;
 
 	return cmdr;
 }
@@ -1450,12 +1447,11 @@ static int dw_mci_get_cd(struct mmc_host *mmc)
 {
 	int present;
 	struct dw_mci_slot *slot = mmc_priv(mmc);
-	struct dw_mci_board *brd = slot->host->pdata;
 	struct dw_mci *host = slot->host;
 	int gpio_cd = mmc_gpio_get_cd(mmc);
 
 	/* Use platform get_cd function, else try onboard card detect */
-	if ((brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION) ||
+	if ((mmc->caps & MMC_CAP_NEEDS_POLL) ||
 	    (mmc->caps & MMC_CAP_NONREMOVABLE))
 		present = 1;
 	else if (!IS_ERR_VALUE(gpio_cd))
@@ -1477,6 +1473,34 @@ static int dw_mci_get_cd(struct mmc_host *mmc)
 	return present;
 }
 
+static void dw_mci_hw_reset(struct mmc_host *mmc)
+{
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci *host = slot->host;
+	int reset;
+
+	if (host->use_dma == TRANS_MODE_IDMAC)
+		dw_mci_idmac_reset(host);
+
+	if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_DMA_RESET |
+				     SDMMC_CTRL_FIFO_RESET))
+		return;
+
+	/*
+	 * According to eMMC spec, card reset procedure:
+	 * tRstW >= 1us:   RST_n pulse width
+	 * tRSCA >= 200us: RST_n to Command time
+	 * tRSTH >= 1us:   RST_n high period
+	 */
+	reset = mci_readl(host, RST_N);
+	reset &= ~(SDMMC_RST_HWACTIVE << slot->id);
+	mci_writel(host, RST_N, reset);
+	usleep_range(1, 2);
+	reset |= SDMMC_RST_HWACTIVE << slot->id;
+	mci_writel(host, RST_N, reset);
+	usleep_range(200, 300);
+}
+
 static void dw_mci_init_card(struct mmc_host *mmc, struct mmc_card *card)
 {
 	struct dw_mci_slot *slot = mmc_priv(mmc);
@@ -1563,6 +1587,7 @@ static const struct mmc_host_ops dw_mci_ops = {
 	.set_ios		= dw_mci_set_ios,
 	.get_ro			= dw_mci_get_ro,
 	.get_cd			= dw_mci_get_cd,
+	.hw_reset               = dw_mci_hw_reset,
 	.enable_sdio_irq	= dw_mci_enable_sdio_irq,
 	.execute_tuning		= dw_mci_execute_tuning,
 	.card_busy		= dw_mci_card_busy,
@@ -2840,23 +2865,13 @@ static void dw_mci_dto_timer(unsigned long arg)
 }
 
 #ifdef CONFIG_OF
-static struct dw_mci_of_quirks {
-	char *quirk;
-	int id;
-} of_quirks[] = {
-	{
-		.quirk	= "broken-cd",
-		.id	= DW_MCI_QUIRK_BROKEN_CARD_DETECTION,
-	},
-};
-
 static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
 {
 	struct dw_mci_board *pdata;
 	struct device *dev = host->dev;
 	struct device_node *np = dev->of_node;
 	const struct dw_mci_drv_data *drv_data = host->drv_data;
-	int idx, ret;
+	int ret;
 	u32 clock_frequency;
 
 	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
@@ -2864,17 +2879,7 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
 		return ERR_PTR(-ENOMEM);
 
 	/* find out number of slots supported */
-	if (of_property_read_u32(dev->of_node, "num-slots",
-				&pdata->num_slots)) {
-		dev_info(dev,
-			 "num-slots property not found, assuming 1 slot is available\n");
-		pdata->num_slots = 1;
-	}
-
-	/* get quirks */
-	for (idx = 0; idx < ARRAY_SIZE(of_quirks); idx++)
-		if (of_get_property(np, of_quirks[idx].quirk, NULL))
-			pdata->quirks |= of_quirks[idx].id;
+	of_property_read_u32(np, "num-slots", &pdata->num_slots);
 
 	if (of_property_read_u32(np, "fifo-depth", &pdata->fifo_depth))
 		dev_info(dev,
@@ -2908,18 +2913,19 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
 
 static void dw_mci_enable_cd(struct dw_mci *host)
 {
-	struct dw_mci_board *brd = host->pdata;
 	unsigned long irqflags;
 	u32 temp;
 	int i;
+	struct dw_mci_slot *slot;
 
-	/* No need for CD if broken card detection */
-	if (brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION)
-		return;
-
-	/* No need for CD if all slots have a non-error GPIO */
+	/*
+	 * No need for CD if all slots have a non-error GPIO
+	 * as well as broken card detection is found.
+	 */
 	for (i = 0; i < host->num_slots; i++) {
-		struct dw_mci_slot *slot = host->slot[i];
+		slot = host->slot[i];
+		if (slot->mmc->caps & MMC_CAP_NEEDS_POLL)
+			return;
 
 		if (IS_ERR_VALUE(mmc_gpio_get_cd(slot->mmc)))
 			break;
@@ -2949,12 +2955,6 @@ int dw_mci_probe(struct dw_mci *host)
 		}
 	}
 
-	if (host->pdata->num_slots < 1) {
-		dev_err(host->dev,
-			"Platform data must supply num_slots.\n");
-		return -ENODEV;
-	}
-
 	host->biu_clk = devm_clk_get(host->dev, "biu");
 	if (IS_ERR(host->biu_clk)) {
 		dev_dbg(host->dev, "biu clock not available\n");
@@ -3052,8 +3052,10 @@ int dw_mci_probe(struct dw_mci *host)
 	}
 
 	/* Reset all blocks */
-	if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS))
-		return -ENODEV;
+	if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS)) {
+		ret = -ENODEV;
+		goto err_clk_ciu;
+	}
 
 	host->dma_ops = host->pdata->dma_ops;
 	dw_mci_init_dma(host);
@@ -3111,13 +3113,20 @@ int dw_mci_probe(struct dw_mci *host)
 	if (host->pdata->num_slots)
 		host->num_slots = host->pdata->num_slots;
 	else
-		host->num_slots = SDMMC_GET_SLOT_NUM(mci_readl(host, HCON));
+		host->num_slots = 1;
+
+	if (host->num_slots < 1 ||
+	    host->num_slots > SDMMC_GET_SLOT_NUM(mci_readl(host, HCON))) {
+		dev_err(host->dev,
+			"Platform data must supply correct num_slots.\n");
+		ret = -ENODEV;
+		goto err_clk_ciu;
+	}
 
 	/*
 	 * Enable interrupts for command done, data over, data empty,
 	 * receive ready and error such as transmit, receive timeout, crc error
 	 */
-	mci_writel(host, RINTSTS, 0xFFFFFFFF);
 	mci_writel(host, INTMASK, SDMMC_INT_CMD_DONE | SDMMC_INT_DATA_OVER |
 		   SDMMC_INT_TXDR | SDMMC_INT_RXDR |
 		   DW_MCI_ERROR_FLAGS);
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index f695b58f0613..68d5da2dfd19 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -46,6 +46,7 @@
 #define SDMMC_VERID		0x06c
 #define SDMMC_HCON		0x070
 #define SDMMC_UHS_REG		0x074
+#define SDMMC_RST_N		0x078
 #define SDMMC_BMOD		0x080
 #define SDMMC_PLDMND		0x084
 #define SDMMC_DBADDR		0x088
@@ -169,6 +170,8 @@
 #define SDMMC_IDMAC_ENABLE		BIT(7)
 #define SDMMC_IDMAC_FB			BIT(1)
 #define SDMMC_IDMAC_SWRESET		BIT(0)
+/* H/W reset */
+#define SDMMC_RST_HWACTIVE		0x1
 /* Version ID register define */
 #define SDMMC_GET_VERID(x)		((x) & 0xFFFF)
 /* Card read threshold */
@@ -265,6 +268,7 @@ struct dw_mci_slot {
 #define DW_MMC_CARD_PRESENT	0
 #define DW_MMC_CARD_NEED_INIT	1
 #define DW_MMC_CARD_NO_LOW_PWR	2
+#define DW_MMC_CARD_NO_USE_HOLD 3
 	int			id;
 	int			sdio_id;
 };
@@ -274,7 +278,6 @@ struct dw_mci_slot {
  * @caps: mmc subsystem specified capabilities of the controller(s).
  * @init: early implementation specific initialization.
  * @setup_clock: implementation specific clock configuration.
- * @prepare_command: handle CMD register extensions.
  * @set_ios: handle bus specific extensions.
  * @parse_dt: parse implementation specific device tree properties.
  * @execute_tuning: implementation specific tuning procedure.
@@ -287,7 +290,6 @@ struct dw_mci_drv_data {
 	unsigned long	*caps;
 	int		(*init)(struct dw_mci *host);
 	int		(*setup_clock)(struct dw_mci *host);
-	void		(*prepare_command)(struct dw_mci *host, u32 *cmdr);
 	void		(*set_ios)(struct dw_mci *host, struct mmc_ios *ios);
 	int		(*parse_dt)(struct dw_mci *host);
 	int		(*execute_tuning)(struct dw_mci_slot *slot, u32 opcode);
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 76e8bce6f46e..03ddf0ecf402 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -660,8 +660,6 @@ static void jz4740_mmc_send_command(struct jz4740_mmc_host *host,
 		cmdat |= JZ_MMC_CMDAT_DATA_EN;
 		if (cmd->data->flags & MMC_DATA_WRITE)
 			cmdat |= JZ_MMC_CMDAT_WRITE;
-		if (cmd->data->flags & MMC_DATA_STREAM)
-			cmdat |= JZ_MMC_CMDAT_STREAM;
 		if (host->use_dma)
 			cmdat |= JZ_MMC_CMDAT_DMA_EN;
 
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 3446097a43c0..e77d79c8cd9f 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1442,6 +1442,12 @@ static int mmc_spi_probe(struct spi_device *spi)
 					     host->pdata->cd_debounce);
 		if (status != 0)
 			goto fail_add_host;
+
+		/* The platform has a CD GPIO signal that may support
+		 * interrupts, so let mmc_gpiod_request_cd_irq() decide
+		 * if polling is needed or not.
+		 */
+		mmc->caps &= ~MMC_CAP_NEEDS_POLL;
 		mmc_gpiod_request_cd_irq(mmc);
 	}
 
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 0d6ca4116f3d..2e6c96845c9a 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -40,7 +40,6 @@
 
 #include <asm/div64.h>
 #include <asm/io.h>
-#include <asm/sizes.h>
 
 #include "mmci.h"
 #include "mmci_qcom_dml.h"
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 82a97ac4e956..b17f30da97da 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -35,6 +35,7 @@
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/sd.h>
 #include <linux/mmc/sdio.h>
+#include <linux/mmc/slot-gpio.h>
 
 #define MAX_BD_NUM          1024
 
@@ -1020,26 +1021,19 @@ static void msdc_set_buswidth(struct msdc_host *host, u32 width)
 static int msdc_ops_switch_volt(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct msdc_host *host = mmc_priv(mmc);
-	int min_uv, max_uv;
 	int ret = 0;
 
 	if (!IS_ERR(mmc->supply.vqmmc)) {
-		if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_330) {
-			min_uv = 3300000;
-			max_uv = 3300000;
-		} else if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180) {
-			min_uv = 1800000;
-			max_uv = 1800000;
-		} else {
+		if (ios->signal_voltage != MMC_SIGNAL_VOLTAGE_330 &&
+		    ios->signal_voltage != MMC_SIGNAL_VOLTAGE_180) {
 			dev_err(host->dev, "Unsupported signal voltage!\n");
 			return -EINVAL;
 		}
 
-		ret = regulator_set_voltage(mmc->supply.vqmmc, min_uv, max_uv);
+		ret = mmc_regulator_set_vqmmc(mmc, ios);
 		if (ret) {
-			dev_err(host->dev,
-					"Regulator set error %d: %d - %d\n",
-					ret, min_uv, max_uv);
+			dev_dbg(host->dev, "Regulator set error %d (%d)\n",
+				ret, ios->signal_voltage);
 		} else {
 			/* Apply different pinctrl settings for different signal voltage */
 			if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180)
@@ -1452,6 +1446,7 @@ static struct mmc_host_ops mt_msdc_ops = {
 	.pre_req = msdc_pre_req,
 	.request = msdc_ops_request,
 	.set_ios = msdc_ops_set_ios,
+	.get_ro = mmc_gpio_get_ro,
 	.start_signal_voltage_switch = msdc_ops_switch_volt,
 	.card_busy = msdc_card_busy,
 	.execute_tuning = msdc_execute_tuning,
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index d110f9e98c4b..3d1ea5e0e549 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -307,9 +307,6 @@ static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
 	enum dma_transfer_direction slave_dirn;
 	int i, nents;
 
-	if (data->flags & MMC_DATA_STREAM)
-		nob = 0xffff;
-
 	host->data = data;
 	data->bytes_xfered = 0;
 
diff --git a/drivers/mmc/host/of_mmc_spi.c b/drivers/mmc/host/of_mmc_spi.c
index 660170cd04d9..85bbebfde02e 100644
--- a/drivers/mmc/host/of_mmc_spi.c
+++ b/drivers/mmc/host/of_mmc_spi.c
@@ -74,7 +74,6 @@ struct mmc_spi_platform_data *mmc_spi_get_pdata(struct spi_device *spi)
 	const u32 *voltage_ranges;
 	int num_ranges;
 	int i;
-	int ret = -EINVAL;
 
 	if (dev->platform_data || !np)
 		return dev->platform_data;
@@ -97,7 +96,6 @@ struct mmc_spi_platform_data *mmc_spi_get_pdata(struct spi_device *spi)
 		mask = mmc_vddrange_to_ocrmask(be32_to_cpu(voltage_ranges[j]),
 					       be32_to_cpu(voltage_ranges[j + 1]));
 		if (!mask) {
-			ret = -EINVAL;
 			dev_err(dev, "OF: voltage-range #%d is invalid\n", i);
 			goto err_ocr;
 		}
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index f6e4d9718035..f9ac3bb5d617 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -503,8 +503,11 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	host->pbias = devm_regulator_get_optional(host->dev, "pbias");
 	if (IS_ERR(host->pbias)) {
 		ret = PTR_ERR(host->pbias);
-		if ((ret != -ENODEV) && host->dev->of_node)
+		if ((ret != -ENODEV) && host->dev->of_node) {
+			dev_err(host->dev,
+			"SD card detect fail? enable CONFIG_REGULATOR_PBIAS\n");
 			return ret;
+		}
 		dev_dbg(host->dev, "unable to get pbias regulator %ld\n",
 			PTR_ERR(host->pbias));
 		host->pbias = NULL;
@@ -2159,7 +2162,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 						 &rx_req, &pdev->dev, "rx");
 
 	if (!host->rx_chan) {
-		dev_err(mmc_dev(host->mmc), "unable to obtain RX DMA engine channel %u\n", rx_req);
+		dev_err(mmc_dev(host->mmc), "unable to obtain RX DMA engine channel\n");
 		ret = -ENXIO;
 		goto err_irq;
 	}
@@ -2169,7 +2172,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 						 &tx_req, &pdev->dev, "tx");
 
 	if (!host->tx_chan) {
-		dev_err(mmc_dev(host->mmc), "unable to obtain TX DMA engine channel %u\n", tx_req);
+		dev_err(mmc_dev(host->mmc), "unable to obtain TX DMA engine channel\n");
 		ret = -ENXIO;
 		goto err_irq;
 	}
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index da824772bbb4..86fac3e86833 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -191,9 +191,6 @@ static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data)
 
 	host->data = data;
 
-	if (data->flags & MMC_DATA_STREAM)
-		nob = 0xffff;
-
 	writel(nob, host->base + MMC_NOB);
 	writel(data->blksz, host->base + MMC_BLKLEN);
 
@@ -443,9 +440,6 @@ static void pxamci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 		cmdat |= CMDAT_DATAEN | CMDAT_DMAEN;
 		if (mrq->data->flags & MMC_DATA_WRITE)
 			cmdat |= CMDAT_WRITE;
-
-		if (mrq->data->flags & MMC_DATA_STREAM)
-			cmdat |= CMDAT_STREAM;
 	}
 
 	pxamci_start_cmd(host, mrq->cmd, cmdat);
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 6291d5042ef2..39814f3dc96f 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -1014,8 +1014,7 @@ static int s3cmci_setup_data(struct s3cmci_host *host, struct mmc_data *data)
 	if (host->bus_width == MMC_BUS_WIDTH_4)
 		dcon |= S3C2410_SDIDCON_WIDEBUS;
 
-	if (!(data->flags & MMC_DATA_STREAM))
-		dcon |= S3C2410_SDIDCON_BLOCKMODE;
+	dcon |= S3C2410_SDIDCON_BLOCKMODE;
 
 	if (data->flags & MMC_DATA_WRITE) {
 		dcon |= S3C2410_SDIDCON_TXAFTERRESP;
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index a5cda926d38e..6839e41c6d58 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -75,7 +75,6 @@ struct sdhci_acpi_host {
 	const struct sdhci_acpi_slot	*slot;
 	struct platform_device		*pdev;
 	bool				use_runtime_pm;
-	bool				dma_setup;
 };
 
 static inline bool sdhci_acpi_flag(struct sdhci_acpi_host *c, unsigned int flag)
@@ -83,33 +82,6 @@ static inline bool sdhci_acpi_flag(struct sdhci_acpi_host *c, unsigned int flag)
 	return c->slot && (c->slot->flags & flag);
 }
 
-static int sdhci_acpi_enable_dma(struct sdhci_host *host)
-{
-	struct sdhci_acpi_host *c = sdhci_priv(host);
-	struct device *dev = &c->pdev->dev;
-	int err = -1;
-
-	if (c->dma_setup)
-		return 0;
-
-	if (host->flags & SDHCI_USE_64_BIT_DMA) {
-		if (host->quirks2 & SDHCI_QUIRK2_BROKEN_64_BIT_DMA) {
-			host->flags &= ~SDHCI_USE_64_BIT_DMA;
-		} else {
-			err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
-			if (err)
-				dev_warn(dev, "Failed to set 64-bit DMA mask\n");
-		}
-	}
-
-	if (err)
-		err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
-
-	c->dma_setup = !err;
-
-	return err;
-}
-
 static void sdhci_acpi_int_hw_reset(struct sdhci_host *host)
 {
 	u8 reg;
@@ -127,7 +99,6 @@ static void sdhci_acpi_int_hw_reset(struct sdhci_host *host)
 
 static const struct sdhci_ops sdhci_acpi_ops_dflt = {
 	.set_clock = sdhci_set_clock,
-	.enable_dma = sdhci_acpi_enable_dma,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 	.set_uhs_signaling = sdhci_set_uhs_signaling,
@@ -135,7 +106,6 @@ static const struct sdhci_ops sdhci_acpi_ops_dflt = {
 
 static const struct sdhci_ops sdhci_acpi_ops_int = {
 	.set_clock = sdhci_set_clock,
-	.enable_dma = sdhci_acpi_enable_dma,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 	.set_uhs_signaling = sdhci_set_uhs_signaling,
@@ -264,6 +234,17 @@ static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sd = {
 	.probe_slot	= sdhci_acpi_sd_probe_slot,
 };
 
+static const struct sdhci_acpi_slot sdhci_acpi_slot_qcom_sd_3v = {
+	.quirks  = SDHCI_QUIRK_BROKEN_CARD_DETECTION,
+	.quirks2 = SDHCI_QUIRK2_NO_1_8_V,
+	.caps    = MMC_CAP_NONREMOVABLE,
+};
+
+static const struct sdhci_acpi_slot sdhci_acpi_slot_qcom_sd = {
+	.quirks  = SDHCI_QUIRK_BROKEN_CARD_DETECTION,
+	.caps    = MMC_CAP_NONREMOVABLE,
+};
+
 struct sdhci_acpi_uid_slot {
 	const char *hid;
 	const char *uid;
@@ -284,6 +265,8 @@ static const struct sdhci_acpi_uid_slot sdhci_acpi_uids[] = {
 	{ "INT344D"  , NULL, &sdhci_acpi_slot_int_sdio },
 	{ "PNP0FFF"  , "3" , &sdhci_acpi_slot_int_sd   },
 	{ "PNP0D40"  },
+	{ "QCOM8051", NULL, &sdhci_acpi_slot_qcom_sd_3v },
+	{ "QCOM8052", NULL, &sdhci_acpi_slot_qcom_sd },
 	{ },
 };
 
@@ -298,6 +281,8 @@ static const struct acpi_device_id sdhci_acpi_ids[] = {
 	{ "INT3436"  },
 	{ "INT344D"  },
 	{ "PNP0D40"  },
+	{ "QCOM8051" },
+	{ "QCOM8052" },
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, sdhci_acpi_ids);
@@ -418,6 +403,8 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
 		pm_runtime_enable(dev);
 	}
 
+	device_enable_async_suspend(dev);
+
 	return 0;
 
 err_free:
diff --git a/drivers/mmc/host/sdhci-bcm2835.c b/drivers/mmc/host/sdhci-bcm2835.c
index 1c65d4690e70..4a6a1d1386cb 100644
--- a/drivers/mmc/host/sdhci-bcm2835.c
+++ b/drivers/mmc/host/sdhci-bcm2835.c
@@ -74,7 +74,7 @@ static inline u32 bcm2835_sdhci_readl(struct sdhci_host *host, int reg)
 static void bcm2835_sdhci_writew(struct sdhci_host *host, u16 val, int reg)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct bcm2835_sdhci *bcm2835_host = pltfm_host->priv;
+	struct bcm2835_sdhci *bcm2835_host = sdhci_pltfm_priv(pltfm_host);
 	u32 oldval = (reg == SDHCI_COMMAND) ? bcm2835_host->shadow :
 		bcm2835_sdhci_readl(host, reg & ~3);
 	u32 word_num = (reg >> 1) & 1;
@@ -152,20 +152,12 @@ static int bcm2835_sdhci_probe(struct platform_device *pdev)
 	struct sdhci_pltfm_host *pltfm_host;
 	int ret;
 
-	host = sdhci_pltfm_init(pdev, &bcm2835_sdhci_pdata, 0);
+	host = sdhci_pltfm_init(pdev, &bcm2835_sdhci_pdata,
+				sizeof(*bcm2835_host));
 	if (IS_ERR(host))
 		return PTR_ERR(host);
 
-	bcm2835_host = devm_kzalloc(&pdev->dev, sizeof(*bcm2835_host),
-					GFP_KERNEL);
-	if (!bcm2835_host) {
-		dev_err(mmc_dev(host->mmc),
-			"failed to allocate bcm2835_sdhci\n");
-		return -ENOMEM;
-	}
-
 	pltfm_host = sdhci_priv(host);
-	pltfm_host->priv = bcm2835_host;
 
 	pltfm_host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pltfm_host->clk)) {
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index f25f29253595..2d300d87cda8 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -260,7 +260,7 @@ static inline void esdhc_clrset_le(struct sdhci_host *host, u32 mask, u32 val, i
 static u32 esdhc_readl_le(struct sdhci_host *host, int reg)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 	u32 val = readl(host->ioaddr + reg);
 
 	if (unlikely(reg == SDHCI_PRESENT_STATE)) {
@@ -338,7 +338,7 @@ static u32 esdhc_readl_le(struct sdhci_host *host, int reg)
 static void esdhc_writel_le(struct sdhci_host *host, u32 val, int reg)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 	u32 data;
 
 	if (unlikely(reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE)) {
@@ -388,7 +388,7 @@ static void esdhc_writel_le(struct sdhci_host *host, u32 val, int reg)
 static u16 esdhc_readw_le(struct sdhci_host *host, int reg)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 	u16 ret = 0;
 	u32 val;
 
@@ -448,7 +448,7 @@ static u16 esdhc_readw_le(struct sdhci_host *host, int reg)
 static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 	u32 new_val = 0;
 
 	switch (reg) {
@@ -556,7 +556,7 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
 static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 	u32 new_val;
 	u32 mask;
 
@@ -633,7 +633,7 @@ static inline void esdhc_pltfm_set_clock(struct sdhci_host *host,
 					 unsigned int clock)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 	unsigned int host_clock = pltfm_host->clock;
 	int pre_div = 2;
 	int div = 1;
@@ -692,7 +692,7 @@ static inline void esdhc_pltfm_set_clock(struct sdhci_host *host,
 static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 	struct esdhc_platform_data *boarddata = &imx_data->boarddata;
 
 	switch (boarddata->wp_type) {
@@ -794,7 +794,7 @@ static int esdhc_change_pinstate(struct sdhci_host *host,
 						unsigned int uhs)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 	struct pinctrl_state *pinctrl;
 
 	dev_dbg(mmc_dev(host->mmc), "change pinctrl state for uhs %d\n", uhs);
@@ -864,7 +864,7 @@ static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
 {
 	u32 m;
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 	struct esdhc_platform_data *boarddata = &imx_data->boarddata;
 
 	/* disable ddr mode and disable HS400 mode */
@@ -917,7 +917,7 @@ static void esdhc_reset(struct sdhci_host *host, u8 mask)
 static unsigned int esdhc_get_max_timeout_count(struct sdhci_host *host)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 
 	return esdhc_is_usdhc(imx_data) ? 1 << 28 : 1 << 27;
 }
@@ -925,7 +925,7 @@ static unsigned int esdhc_get_max_timeout_count(struct sdhci_host *host)
 static void esdhc_set_timeout(struct sdhci_host *host, struct mmc_command *cmd)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 
 	/* use maximum timeout counter */
 	sdhci_writeb(host, esdhc_is_usdhc(imx_data) ? 0xF : 0xE,
@@ -1100,21 +1100,17 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
 	int err;
 	struct pltfm_imx_data *imx_data;
 
-	host = sdhci_pltfm_init(pdev, &sdhci_esdhc_imx_pdata, 0);
+	host = sdhci_pltfm_init(pdev, &sdhci_esdhc_imx_pdata,
+				sizeof(*imx_data));
 	if (IS_ERR(host))
 		return PTR_ERR(host);
 
 	pltfm_host = sdhci_priv(host);
 
-	imx_data = devm_kzalloc(&pdev->dev, sizeof(*imx_data), GFP_KERNEL);
-	if (!imx_data) {
-		err = -ENOMEM;
-		goto free_sdhci;
-	}
+	imx_data = sdhci_pltfm_priv(pltfm_host);
 
 	imx_data->socdata = of_id ? of_id->data : (struct esdhc_soc_data *)
 						  pdev->id_entry->driver_data;
-	pltfm_host->priv = imx_data;
 
 	imx_data->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
 	if (IS_ERR(imx_data->clk_ipg)) {
@@ -1241,7 +1237,7 @@ static int sdhci_esdhc_imx_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
 
 	pm_runtime_get_sync(&pdev->dev);
@@ -1264,7 +1260,7 @@ static int sdhci_esdhc_runtime_suspend(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 	int ret;
 
 	ret = sdhci_runtime_suspend_host(host);
@@ -1282,7 +1278,7 @@ static int sdhci_esdhc_runtime_resume(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
 
 	if (!sdhci_sdio_irq_enabled(host)) {
 		clk_prepare_enable(imx_data->clk_per);
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 3b423b0ad8e7..1110f73b08aa 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -26,6 +26,7 @@ struct sdhci_iproc_data {
 	const struct sdhci_pltfm_data *pdata;
 	u32 caps;
 	u32 caps1;
+	u32 mmc_caps;
 };
 
 struct sdhci_iproc_host {
@@ -165,9 +166,25 @@ static const struct sdhci_iproc_data iproc_data = {
 	.pdata = &sdhci_iproc_pltfm_data,
 	.caps = 0x05E90000,
 	.caps1 = 0x00000064,
+	.mmc_caps = MMC_CAP_1_8V_DDR,
+};
+
+static const struct sdhci_pltfm_data sdhci_bcm2835_pltfm_data = {
+	.quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION |
+		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
+		  SDHCI_QUIRK_MISSING_CAPS,
+	.ops = &sdhci_iproc_ops,
+};
+
+static const struct sdhci_iproc_data bcm2835_data = {
+	.pdata = &sdhci_bcm2835_pltfm_data,
+	.caps = SDHCI_CAN_VDD_330,
+	.caps1 = 0x00000000,
+	.mmc_caps = 0x00000000,
 };
 
 static const struct of_device_id sdhci_iproc_of_match[] = {
+	{ .compatible = "brcm,bcm2835-sdhci", .data = &bcm2835_data },
 	{ .compatible = "brcm,sdhci-iproc-cygnus", .data = &iproc_data },
 	{ }
 };
@@ -199,32 +216,37 @@ static int sdhci_iproc_probe(struct platform_device *pdev)
 	mmc_of_parse(host->mmc);
 	sdhci_get_of_property(pdev);
 
-	/* Enable EMMC 1/8V DDR capable */
-	host->mmc->caps |= MMC_CAP_1_8V_DDR;
+	host->mmc->caps |= iproc_host->data->mmc_caps;
 
 	pltfm_host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pltfm_host->clk)) {
 		ret = PTR_ERR(pltfm_host->clk);
 		goto err;
 	}
+	ret = clk_prepare_enable(pltfm_host->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable host clk\n");
+		goto err;
+	}
 
 	if (iproc_host->data->pdata->quirks & SDHCI_QUIRK_MISSING_CAPS) {
 		host->caps = iproc_host->data->caps;
 		host->caps1 = iproc_host->data->caps1;
 	}
 
-	return sdhci_add_host(host);
+	ret = sdhci_add_host(host);
+	if (ret)
+		goto err_clk;
+
+	return 0;
 
+err_clk:
+	clk_disable_unprepare(pltfm_host->clk);
 err:
 	sdhci_pltfm_free(pdev);
 	return ret;
 }
 
-static int sdhci_iproc_remove(struct platform_device *pdev)
-{
-	return sdhci_pltfm_unregister(pdev);
-}
-
 static struct platform_driver sdhci_iproc_driver = {
 	.driver = {
 		.name = "sdhci-iproc",
@@ -232,7 +254,7 @@ static struct platform_driver sdhci_iproc_driver = {
 		.pm = SDHCI_PLTFM_PMOPS,
 	},
 	.probe = sdhci_iproc_probe,
-	.remove = sdhci_iproc_remove,
+	.remove = sdhci_pltfm_unregister,
 };
 module_platform_driver(sdhci_iproc_driver);
 
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 4695bee203ea..0653fe730150 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -60,7 +60,6 @@ struct sdhci_msm_host {
 	struct clk *pclk;	/* SDHC peripheral bus clock */
 	struct clk *bus_clk;	/* SDHC bus voter clock */
 	struct mmc_host *mmc;
-	struct sdhci_pltfm_data sdhci_msm_pdata;
 };
 
 /* Platform specific tuning */
@@ -418,7 +417,7 @@ static const struct of_device_id sdhci_msm_dt_match[] = {
 
 MODULE_DEVICE_TABLE(of, sdhci_msm_dt_match);
 
-static struct sdhci_ops sdhci_msm_ops = {
+static const struct sdhci_ops sdhci_msm_ops = {
 	.platform_execute_tuning = sdhci_msm_execute_tuning,
 	.reset = sdhci_reset,
 	.set_clock = sdhci_set_clock,
@@ -426,6 +425,12 @@ static struct sdhci_ops sdhci_msm_ops = {
 	.set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
+static const struct sdhci_pltfm_data sdhci_msm_pdata = {
+	.quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION |
+		  SDHCI_QUIRK_SINGLE_POWER_WRITE,
+	.ops = &sdhci_msm_ops,
+};
+
 static int sdhci_msm_probe(struct platform_device *pdev)
 {
 	struct sdhci_host *host;
@@ -437,17 +442,12 @@ static int sdhci_msm_probe(struct platform_device *pdev)
 	u32 core_version, caps;
 	u8 core_major;
 
-	msm_host = devm_kzalloc(&pdev->dev, sizeof(*msm_host), GFP_KERNEL);
-	if (!msm_host)
-		return -ENOMEM;
-
-	msm_host->sdhci_msm_pdata.ops = &sdhci_msm_ops;
-	host = sdhci_pltfm_init(pdev, &msm_host->sdhci_msm_pdata, 0);
+	host = sdhci_pltfm_init(pdev, &sdhci_msm_pdata, sizeof(*msm_host));
 	if (IS_ERR(host))
 		return PTR_ERR(host);
 
 	pltfm_host = sdhci_priv(host);
-	pltfm_host->priv = msm_host;
+	msm_host = sdhci_pltfm_priv(pltfm_host);
 	msm_host->mmc = host->mmc;
 	msm_host->pdev = pdev;
 
@@ -522,9 +522,6 @@ static int sdhci_msm_probe(struct platform_device *pdev)
 	/* Set HC_MODE_EN bit in HC_MODE register */
 	writel_relaxed(HC_MODE_EN, (msm_host->core_mem + CORE_HC_MODE));
 
-	host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
-	host->quirks |= SDHCI_QUIRK_SINGLE_POWER_WRITE;
-
 	host_version = readw_relaxed((host->ioaddr + SDHCI_HOST_VERSION));
 	dev_dbg(&pdev->dev, "Host Version: 0x%x Vendor Version 0x%x\n",
 		host_version, ((host_version & SDHCI_VENDOR_VER_MASK) >>
@@ -570,16 +567,16 @@ static int sdhci_msm_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
 	int dead = (readl_relaxed(host->ioaddr + SDHCI_INT_STATUS) ==
 		    0xffffffff);
 
 	sdhci_remove_host(host, dead);
-	sdhci_pltfm_free(pdev);
 	clk_disable_unprepare(msm_host->clk);
 	clk_disable_unprepare(msm_host->pclk);
 	if (!IS_ERR(msm_host->bus_clk))
 		clk_disable_unprepare(msm_host->bus_clk);
+	sdhci_pltfm_free(pdev);
 	return 0;
 }
 
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index 75379cb0fb35..2e482b13d25e 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -21,6 +21,7 @@
 
 #include <linux/module.h>
 #include <linux/of_device.h>
+#include <linux/phy/phy.h>
 #include "sdhci-pltfm.h"
 
 #define SDHCI_ARASAN_CLK_CTRL_OFFSET	0x2c
@@ -32,9 +33,11 @@
 /**
  * struct sdhci_arasan_data
  * @clk_ahb:	Pointer to the AHB clock
+ * @phy: Pointer to the generic phy
  */
 struct sdhci_arasan_data {
 	struct clk	*clk_ahb;
+	struct phy	*phy;
 };
 
 static unsigned int sdhci_arasan_get_timeout_clock(struct sdhci_host *host)
@@ -81,13 +84,22 @@ static int sdhci_arasan_suspend(struct device *dev)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_arasan_data *sdhci_arasan = pltfm_host->priv;
+	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
 	int ret;
 
 	ret = sdhci_suspend_host(host);
 	if (ret)
 		return ret;
 
+	if (!IS_ERR(sdhci_arasan->phy)) {
+		ret = phy_power_off(sdhci_arasan->phy);
+		if (ret) {
+			dev_err(dev, "Cannot power off phy.\n");
+			sdhci_resume_host(host);
+			return ret;
+		}
+	}
+
 	clk_disable(pltfm_host->clk);
 	clk_disable(sdhci_arasan->clk_ahb);
 
@@ -106,7 +118,7 @@ static int sdhci_arasan_resume(struct device *dev)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_arasan_data *sdhci_arasan = pltfm_host->priv;
+	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
 	int ret;
 
 	ret = clk_enable(sdhci_arasan->clk_ahb);
@@ -118,10 +130,17 @@ static int sdhci_arasan_resume(struct device *dev)
 	ret = clk_enable(pltfm_host->clk);
 	if (ret) {
 		dev_err(dev, "Cannot enable SD clock.\n");
-		clk_disable(sdhci_arasan->clk_ahb);
 		return ret;
 	}
 
+	if (!IS_ERR(sdhci_arasan->phy)) {
+		ret = phy_power_on(sdhci_arasan->phy);
+		if (ret) {
+			dev_err(dev, "Cannot power on phy.\n");
+			return ret;
+		}
+	}
+
 	return sdhci_resume_host(host);
 }
 #endif /* ! CONFIG_PM_SLEEP */
@@ -137,27 +156,32 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
 	struct sdhci_pltfm_host *pltfm_host;
 	struct sdhci_arasan_data *sdhci_arasan;
 
-	sdhci_arasan = devm_kzalloc(&pdev->dev, sizeof(*sdhci_arasan),
-			GFP_KERNEL);
-	if (!sdhci_arasan)
-		return -ENOMEM;
+	host = sdhci_pltfm_init(pdev, &sdhci_arasan_pdata,
+				sizeof(*sdhci_arasan));
+	if (IS_ERR(host))
+		return PTR_ERR(host);
+
+	pltfm_host = sdhci_priv(host);
+	sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
 
 	sdhci_arasan->clk_ahb = devm_clk_get(&pdev->dev, "clk_ahb");
 	if (IS_ERR(sdhci_arasan->clk_ahb)) {
 		dev_err(&pdev->dev, "clk_ahb clock not found.\n");
-		return PTR_ERR(sdhci_arasan->clk_ahb);
+		ret = PTR_ERR(sdhci_arasan->clk_ahb);
+		goto err_pltfm_free;
 	}
 
 	clk_xin = devm_clk_get(&pdev->dev, "clk_xin");
 	if (IS_ERR(clk_xin)) {
 		dev_err(&pdev->dev, "clk_xin clock not found.\n");
-		return PTR_ERR(clk_xin);
+		ret = PTR_ERR(clk_xin);
+		goto err_pltfm_free;
 	}
 
 	ret = clk_prepare_enable(sdhci_arasan->clk_ahb);
 	if (ret) {
 		dev_err(&pdev->dev, "Unable to enable AHB clock.\n");
-		return ret;
+		goto err_pltfm_free;
 	}
 
 	ret = clk_prepare_enable(clk_xin);
@@ -166,20 +190,7 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
 		goto clk_dis_ahb;
 	}
 
-	host = sdhci_pltfm_init(pdev, &sdhci_arasan_pdata, 0);
-	if (IS_ERR(host)) {
-		ret = PTR_ERR(host);
-		goto clk_disable_all;
-	}
-
-	if (of_device_is_compatible(pdev->dev.of_node, "arasan,sdhci-4.9a")) {
-		host->quirks |= SDHCI_QUIRK_NO_HISPD_BIT;
-		host->quirks2 |= SDHCI_QUIRK2_HOST_NO_CMD23;
-	}
-
 	sdhci_get_of_property(pdev);
-	pltfm_host = sdhci_priv(host);
-	pltfm_host->priv = sdhci_arasan;
 	pltfm_host->clk = clk_xin;
 
 	ret = mmc_of_parse(host->mmc);
@@ -188,31 +199,69 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
 		goto clk_disable_all;
 	}
 
+	sdhci_arasan->phy = ERR_PTR(-ENODEV);
+	if (of_device_is_compatible(pdev->dev.of_node,
+				    "arasan,sdhci-5.1")) {
+		sdhci_arasan->phy = devm_phy_get(&pdev->dev,
+						 "phy_arasan");
+		if (IS_ERR(sdhci_arasan->phy)) {
+			ret = PTR_ERR(sdhci_arasan->phy);
+			dev_err(&pdev->dev, "No phy for arasan,sdhci-5.1.\n");
+			goto clk_disable_all;
+		}
+
+		ret = phy_init(sdhci_arasan->phy);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "phy_init err.\n");
+			goto clk_disable_all;
+		}
+
+		ret = phy_power_on(sdhci_arasan->phy);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "phy_power_on err.\n");
+			goto err_phy_power;
+		}
+	}
+
 	ret = sdhci_add_host(host);
 	if (ret)
-		goto err_pltfm_free;
+		goto err_add_host;
 
 	return 0;
 
-err_pltfm_free:
-	sdhci_pltfm_free(pdev);
+err_add_host:
+	if (!IS_ERR(sdhci_arasan->phy))
+		phy_power_off(sdhci_arasan->phy);
+err_phy_power:
+	if (!IS_ERR(sdhci_arasan->phy))
+		phy_exit(sdhci_arasan->phy);
 clk_disable_all:
 	clk_disable_unprepare(clk_xin);
 clk_dis_ahb:
 	clk_disable_unprepare(sdhci_arasan->clk_ahb);
-
+err_pltfm_free:
+	sdhci_pltfm_free(pdev);
 	return ret;
 }
 
 static int sdhci_arasan_remove(struct platform_device *pdev)
 {
+	int ret;
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_arasan_data *sdhci_arasan = pltfm_host->priv;
+	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
+	struct clk *clk_ahb = sdhci_arasan->clk_ahb;
 
-	clk_disable_unprepare(sdhci_arasan->clk_ahb);
+	if (!IS_ERR(sdhci_arasan->phy)) {
+		phy_power_off(sdhci_arasan->phy);
+		phy_exit(sdhci_arasan->phy);
+	}
 
-	return sdhci_pltfm_unregister(pdev);
+	ret = sdhci_pltfm_unregister(pdev);
+
+	clk_disable_unprepare(clk_ahb);
+
+	return ret;
 }
 
 static const struct of_device_id sdhci_arasan_of_match[] = {
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 9cb86fb25976..2703aa90d018 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -18,6 +18,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/mmc/host.h>
+#include <linux/mmc/slot-gpio.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -58,7 +59,7 @@ static int sdhci_at91_runtime_suspend(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_at91_priv *priv = pltfm_host->priv;
+	struct sdhci_at91_priv *priv = sdhci_pltfm_priv(pltfm_host);
 	int ret;
 
 	ret = sdhci_runtime_suspend_host(host);
@@ -74,7 +75,7 @@ static int sdhci_at91_runtime_resume(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_at91_priv *priv = pltfm_host->priv;
+	struct sdhci_at91_priv *priv = sdhci_pltfm_priv(pltfm_host);
 	int ret;
 
 	ret = clk_prepare_enable(priv->mainck);
@@ -124,11 +125,12 @@ static int sdhci_at91_probe(struct platform_device *pdev)
 		return -EINVAL;
 	soc_data = match->data;
 
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		dev_err(&pdev->dev, "unable to allocate private data\n");
-		return -ENOMEM;
-	}
+	host = sdhci_pltfm_init(pdev, soc_data, sizeof(*priv));
+	if (IS_ERR(host))
+		return PTR_ERR(host);
+
+	pltfm_host = sdhci_priv(host);
+	priv = sdhci_pltfm_priv(pltfm_host);
 
 	priv->mainck = devm_clk_get(&pdev->dev, "baseclk");
 	if (IS_ERR(priv->mainck)) {
@@ -148,10 +150,6 @@ static int sdhci_at91_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->gck);
 	}
 
-	host = sdhci_pltfm_init(pdev, soc_data, 0);
-	if (IS_ERR(host))
-		return PTR_ERR(host);
-
 	/*
 	 * The mult clock is provided by as a generated clock by the PMC
 	 * controller. In order to set the rate of gck, we have to get the
@@ -191,9 +189,6 @@ static int sdhci_at91_probe(struct platform_device *pdev)
 	clk_prepare_enable(priv->mainck);
 	clk_prepare_enable(priv->gck);
 
-	pltfm_host = sdhci_priv(host);
-	pltfm_host->priv = priv;
-
 	ret = mmc_of_parse(host->mmc);
 	if (ret)
 		goto clocks_disable_unprepare;
@@ -210,6 +205,25 @@ static int sdhci_at91_probe(struct platform_device *pdev)
 	if (ret)
 		goto pm_runtime_disable;
 
+	/*
+	 * When calling sdhci_runtime_suspend_host(), the sdhci layer makes
+	 * the assumption that all the clocks of the controller are disabled.
+	 * It means we can't get irq from it when it is runtime suspended.
+	 * For that reason, it is not planned to wake-up on a card detect irq
+	 * from the controller.
+	 * If we want to use runtime PM and to be able to wake-up on card
+	 * insertion, we have to use a GPIO for the card detection or we can
+	 * use polling. Be aware that using polling will resume/suspend the
+	 * controller between each attempt.
+	 * Disable SDHCI_QUIRK_BROKEN_CARD_DETECTION to be sure nobody tries
+	 * to enable polling via device tree with broken-cd property.
+	 */
+	if (!(host->mmc->caps & MMC_CAP_NONREMOVABLE) &&
+	    IS_ERR_VALUE(mmc_gpio_get_cd(host->mmc))) {
+		host->mmc->caps |= MMC_CAP_NEEDS_POLL;
+		host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
+	}
+
 	pm_runtime_put_autosuspend(&pdev->dev);
 
 	return 0;
@@ -231,7 +245,10 @@ static int sdhci_at91_remove(struct platform_device *pdev)
 {
 	struct sdhci_host	*host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host	*pltfm_host = sdhci_priv(host);
-	struct sdhci_at91_priv	*priv = pltfm_host->priv;
+	struct sdhci_at91_priv	*priv = sdhci_pltfm_priv(pltfm_host);
+	struct clk *gck = priv->gck;
+	struct clk *hclock = priv->hclock;
+	struct clk *mainck = priv->mainck;
 
 	pm_runtime_get_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
@@ -239,9 +256,9 @@ static int sdhci_at91_remove(struct platform_device *pdev)
 
 	sdhci_pltfm_unregister(pdev);
 
-	clk_disable_unprepare(priv->gck);
-	clk_disable_unprepare(priv->hclock);
-	clk_disable_unprepare(priv->mainck);
+	clk_disable_unprepare(gck);
+	clk_disable_unprepare(hclock);
+	clk_disable_unprepare(mainck);
 
 	return 0;
 }
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 83b1226471c1..3f34d354f1fc 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -49,7 +49,7 @@ static u32 esdhc_readl_fixup(struct sdhci_host *host,
 				     int spec_reg, u32 value)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_esdhc *esdhc = pltfm_host->priv;
+	struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host);
 	u32 ret;
 
 	/*
@@ -354,7 +354,7 @@ static void esdhc_le_writeb(struct sdhci_host *host, u8 val, int reg)
 static void esdhc_of_adma_workaround(struct sdhci_host *host, u32 intmask)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_esdhc *esdhc = pltfm_host->priv;
+	struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host);
 	bool applicable;
 	dma_addr_t dmastart;
 	dma_addr_t dmanow;
@@ -404,7 +404,7 @@ static unsigned int esdhc_of_get_min_clock(struct sdhci_host *host)
 static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_esdhc *esdhc = pltfm_host->priv;
+	struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host);
 	int pre_div = 1;
 	int div = 1;
 	u32 temp;
@@ -569,15 +569,12 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host)
 	u16 host_ver;
 
 	pltfm_host = sdhci_priv(host);
-	esdhc = devm_kzalloc(&pdev->dev, sizeof(struct sdhci_esdhc),
-			     GFP_KERNEL);
+	esdhc = sdhci_pltfm_priv(pltfm_host);
 
 	host_ver = sdhci_readw(host, SDHCI_HOST_VERSION);
 	esdhc->vendor_ver = (host_ver & SDHCI_VENDOR_VER_MASK) >>
 			     SDHCI_VENDOR_VER_SHIFT;
 	esdhc->spec_ver = host_ver & SDHCI_SPEC_VER_MASK;
-
-	pltfm_host->priv = esdhc;
 }
 
 static int sdhci_esdhc_probe(struct platform_device *pdev)
@@ -591,9 +588,11 @@ static int sdhci_esdhc_probe(struct platform_device *pdev)
 	np = pdev->dev.of_node;
 
 	if (of_get_property(np, "little-endian", NULL))
-		host = sdhci_pltfm_init(pdev, &sdhci_esdhc_le_pdata, 0);
+		host = sdhci_pltfm_init(pdev, &sdhci_esdhc_le_pdata,
+					sizeof(struct sdhci_esdhc));
 	else
-		host = sdhci_pltfm_init(pdev, &sdhci_esdhc_be_pdata, 0);
+		host = sdhci_pltfm_init(pdev, &sdhci_esdhc_be_pdata,
+					sizeof(struct sdhci_esdhc));
 
 	if (IS_ERR(host))
 		return PTR_ERR(host);
@@ -603,7 +602,7 @@ static int sdhci_esdhc_probe(struct platform_device *pdev)
 	sdhci_get_of_property(pdev);
 
 	pltfm_host = sdhci_priv(host);
-	esdhc = pltfm_host->priv;
+	esdhc = sdhci_pltfm_priv(pltfm_host);
 	if (esdhc->vendor_ver == VENDOR_V_22)
 		host->quirks2 |= SDHCI_QUIRK2_HOST_NO_CMD23;
 
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index df3b8eced8c4..62aa5d0efcee 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -1302,7 +1302,6 @@ static int sdhci_pci_enable_dma(struct sdhci_host *host)
 {
 	struct sdhci_pci_slot *slot;
 	struct pci_dev *pdev;
-	int ret = -1;
 
 	slot = sdhci_priv(host);
 	pdev = slot->chip->pdev;
@@ -1314,20 +1313,6 @@ static int sdhci_pci_enable_dma(struct sdhci_host *host)
 			"doesn't fully claim to support it.\n");
 	}
 
-	if (host->flags & SDHCI_USE_64_BIT_DMA) {
-		if (host->quirks2 & SDHCI_QUIRK2_BROKEN_64_BIT_DMA) {
-			host->flags &= ~SDHCI_USE_64_BIT_DMA;
-		} else {
-			ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-			if (ret)
-				dev_warn(&pdev->dev, "Failed to set 64-bit DMA mask\n");
-		}
-	}
-	if (ret)
-		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-	if (ret)
-		return ret;
-
 	pci_set_master(pdev);
 
 	return 0;
diff --git a/drivers/mmc/host/sdhci-pic32.c b/drivers/mmc/host/sdhci-pic32.c
new file mode 100644
index 000000000000..059df707a2fe
--- /dev/null
+++ b/drivers/mmc/host/sdhci-pic32.c
@@ -0,0 +1,257 @@
+/*
+ * Support of SDHCI platform devices for Microchip PIC32.
+ *
+ * Copyright (C) 2015 Microchip
+ * Andrei Pistirica, Paul Thacker
+ *
+ * Inspired by sdhci-pltfm.c
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/slab.h>
+#include <linux/mmc/host.h>
+#include <linux/io.h>
+#include "sdhci.h"
+#include "sdhci-pltfm.h"
+#include <linux/platform_data/sdhci-pic32.h>
+
+#define SDH_SHARED_BUS_CTRL		0x000000E0
+#define SDH_SHARED_BUS_NR_CLK_PINS_MASK	0x7
+#define SDH_SHARED_BUS_NR_IRQ_PINS_MASK	0x30
+#define SDH_SHARED_BUS_CLK_PINS		0x10
+#define SDH_SHARED_BUS_IRQ_PINS		0x14
+#define SDH_CAPS_SDH_SLOT_TYPE_MASK	0xC0000000
+#define SDH_SLOT_TYPE_REMOVABLE		0x0
+#define SDH_SLOT_TYPE_EMBEDDED		0x1
+#define SDH_SLOT_TYPE_SHARED_BUS	0x2
+#define SDHCI_CTRL_CDSSEL		0x80
+#define SDHCI_CTRL_CDTLVL		0x40
+
+#define ADMA_FIFO_RD_THSHLD	512
+#define ADMA_FIFO_WR_THSHLD	512
+
+struct pic32_sdhci_priv {
+	struct platform_device	*pdev;
+	struct clk *sys_clk;
+	struct clk *base_clk;
+};
+
+static unsigned int pic32_sdhci_get_max_clock(struct sdhci_host *host)
+{
+	struct pic32_sdhci_priv *sdhci_pdata = sdhci_priv(host);
+
+	return clk_get_rate(sdhci_pdata->base_clk);
+}
+
+static void pic32_sdhci_set_bus_width(struct sdhci_host *host, int width)
+{
+	u8 ctrl;
+
+	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+	if (width == MMC_BUS_WIDTH_8) {
+		ctrl &= ~SDHCI_CTRL_4BITBUS;
+		if (host->version >= SDHCI_SPEC_300)
+			ctrl |= SDHCI_CTRL_8BITBUS;
+	} else {
+		if (host->version >= SDHCI_SPEC_300)
+			ctrl &= ~SDHCI_CTRL_8BITBUS;
+		if (width == MMC_BUS_WIDTH_4)
+			ctrl |= SDHCI_CTRL_4BITBUS;
+		else
+			ctrl &= ~SDHCI_CTRL_4BITBUS;
+	}
+
+	/* CD select and test bits must be set for errata workaround. */
+	ctrl &= ~SDHCI_CTRL_CDTLVL;
+	ctrl |= SDHCI_CTRL_CDSSEL;
+	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+}
+
+static unsigned int pic32_sdhci_get_ro(struct sdhci_host *host)
+{
+	/*
+	 * The SDHCI_WRITE_PROTECT bit is unstable on current hardware so we
+	 * can't depend on its value in any way.
+	 */
+	return 0;
+}
+
+static const struct sdhci_ops pic32_sdhci_ops = {
+	.get_max_clock = pic32_sdhci_get_max_clock,
+	.set_clock = sdhci_set_clock,
+	.set_bus_width = pic32_sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.set_uhs_signaling = sdhci_set_uhs_signaling,
+	.get_ro = pic32_sdhci_get_ro,
+};
+
+static struct sdhci_pltfm_data sdhci_pic32_pdata = {
+	.ops = &pic32_sdhci_ops,
+	.quirks = SDHCI_QUIRK_NO_HISPD_BIT,
+	.quirks2 = SDHCI_QUIRK2_NO_1_8_V,
+};
+
+static void pic32_sdhci_shared_bus(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	u32 bus = readl(host->ioaddr + SDH_SHARED_BUS_CTRL);
+	u32 clk_pins = (bus & SDH_SHARED_BUS_NR_CLK_PINS_MASK) >> 0;
+	u32 irq_pins = (bus & SDH_SHARED_BUS_NR_IRQ_PINS_MASK) >> 4;
+
+	/* select first clock */
+	if (clk_pins & 1)
+		bus |= (1 << SDH_SHARED_BUS_CLK_PINS);
+
+	/* select first interrupt */
+	if (irq_pins & 1)
+		bus |= (1 << SDH_SHARED_BUS_IRQ_PINS);
+
+	writel(bus, host->ioaddr + SDH_SHARED_BUS_CTRL);
+}
+
+static int pic32_sdhci_probe_platform(struct platform_device *pdev,
+				      struct pic32_sdhci_priv *pdata)
+{
+	int ret = 0;
+	u32 caps_slot_type;
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+
+	/* Check card slot connected on shared bus. */
+	host->caps = readl(host->ioaddr + SDHCI_CAPABILITIES);
+	caps_slot_type = (host->caps & SDH_CAPS_SDH_SLOT_TYPE_MASK) >> 30;
+	if (caps_slot_type == SDH_SLOT_TYPE_SHARED_BUS)
+		pic32_sdhci_shared_bus(pdev);
+
+	return ret;
+}
+
+static int pic32_sdhci_probe(struct platform_device *pdev)
+{
+	struct sdhci_host *host;
+	struct sdhci_pltfm_host *pltfm_host;
+	struct pic32_sdhci_priv *sdhci_pdata;
+	struct pic32_sdhci_platform_data *plat_data;
+	int ret;
+
+	host = sdhci_pltfm_init(pdev, &sdhci_pic32_pdata,
+				sizeof(struct pic32_sdhci_priv));
+	if (IS_ERR(host)) {
+		ret = PTR_ERR(host);
+		goto err;
+	}
+
+	pltfm_host = sdhci_priv(host);
+	sdhci_pdata = sdhci_pltfm_priv(pltfm_host);
+
+	plat_data = pdev->dev.platform_data;
+	if (plat_data && plat_data->setup_dma) {
+		ret = plat_data->setup_dma(ADMA_FIFO_RD_THSHLD,
+					   ADMA_FIFO_WR_THSHLD);
+		if (ret)
+			goto err_host;
+	}
+
+	sdhci_pdata->sys_clk = devm_clk_get(&pdev->dev, "sys_clk");
+	if (IS_ERR(sdhci_pdata->sys_clk)) {
+		ret = PTR_ERR(sdhci_pdata->sys_clk);
+		dev_err(&pdev->dev, "Error getting clock\n");
+		goto err_host;
+	}
+
+	ret = clk_prepare_enable(sdhci_pdata->sys_clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Error enabling clock\n");
+		goto err_host;
+	}
+
+	sdhci_pdata->base_clk = devm_clk_get(&pdev->dev, "base_clk");
+	if (IS_ERR(sdhci_pdata->base_clk)) {
+		ret = PTR_ERR(sdhci_pdata->base_clk);
+		dev_err(&pdev->dev, "Error getting clock\n");
+		goto err_sys_clk;
+	}
+
+	ret = clk_prepare_enable(sdhci_pdata->base_clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Error enabling clock\n");
+		goto err_base_clk;
+	}
+
+	ret = mmc_of_parse(host->mmc);
+	if (ret)
+		goto err_base_clk;
+
+	ret = pic32_sdhci_probe_platform(pdev, sdhci_pdata);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to probe platform!\n");
+		goto err_base_clk;
+	}
+
+	ret = sdhci_add_host(host);
+	if (ret) {
+		dev_err(&pdev->dev, "error adding host\n");
+		goto err_base_clk;
+	}
+
+	dev_info(&pdev->dev, "Successfully added sdhci host\n");
+	return 0;
+
+err_base_clk:
+	clk_disable_unprepare(sdhci_pdata->base_clk);
+err_sys_clk:
+	clk_disable_unprepare(sdhci_pdata->sys_clk);
+err_host:
+	sdhci_pltfm_free(pdev);
+err:
+	dev_err(&pdev->dev, "pic32-sdhci probe failed: %d\n", ret);
+	return ret;
+}
+
+static int pic32_sdhci_remove(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct pic32_sdhci_priv *sdhci_pdata = sdhci_priv(host);
+	u32 scratch;
+
+	scratch = readl(host->ioaddr + SDHCI_INT_STATUS);
+	sdhci_remove_host(host, scratch == (u32)~0);
+	clk_disable_unprepare(sdhci_pdata->base_clk);
+	clk_disable_unprepare(sdhci_pdata->sys_clk);
+	sdhci_pltfm_free(pdev);
+
+	return 0;
+}
+
+static const struct of_device_id pic32_sdhci_id_table[] = {
+	{ .compatible = "microchip,pic32mzda-sdhci" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, pic32_sdhci_id_table);
+
+static struct platform_driver pic32_sdhci_driver = {
+	.driver = {
+		.name	= "pic32-sdhci",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(pic32_sdhci_id_table),
+	},
+	.probe		= pic32_sdhci_probe,
+	.remove		= pic32_sdhci_remove,
+};
+
+module_platform_driver(pic32_sdhci_driver);
+
+MODULE_DESCRIPTION("Microchip PIC32 SDHCI driver");
+MODULE_AUTHOR("Pistirica Sorin Andrei & Sandeep Sheriker");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h
index 04bc2481e5c3..d38053bf9e4d 100644
--- a/drivers/mmc/host/sdhci-pltfm.h
+++ b/drivers/mmc/host/sdhci-pltfm.h
@@ -23,7 +23,6 @@ struct sdhci_pltfm_data {
 
 struct sdhci_pltfm_host {
 	struct clk *clk;
-	void *priv; /* to handle quirks across io-accessor calls */
 
 	/* migrate from sdhci_of_host */
 	unsigned int clock;
diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index beffd8615489..1d8dd3540636 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c
@@ -177,7 +177,6 @@ static int sdhci_pxav2_probe(struct platform_device *pdev)
 		return PTR_ERR(host);
 
 	pltfm_host = sdhci_priv(host);
-	pltfm_host->priv = NULL;
 
 	clk = clk_get(dev, "PXA-SDHCLK");
 	if (IS_ERR(clk)) {
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index f5edf9d3a18a..aca439d3ca83 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -132,11 +132,15 @@ static int armada_38x_quirks(struct platform_device *pdev,
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_pxa *pxa = pltfm_host->priv;
+	struct sdhci_pxa *pxa = sdhci_pltfm_priv(pltfm_host);
 	struct resource *res;
 
 	host->quirks &= ~SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN;
 	host->quirks |= SDHCI_QUIRK_MISSING_CAPS;
+
+	host->caps = sdhci_readl(host, SDHCI_CAPABILITIES);
+	host->caps1 = sdhci_readl(host, SDHCI_CAPABILITIES_1);
+
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 					   "conf-sdio3");
 	if (res) {
@@ -150,7 +154,6 @@ static int armada_38x_quirks(struct platform_device *pdev,
 		 * Configuration register, if the adjustment is not done,
 		 * remove them from the capabilities.
 		 */
-		host->caps1 = sdhci_readl(host, SDHCI_CAPABILITIES_1);
 		host->caps1 &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_DDR50);
 
 		dev_warn(&pdev->dev, "conf-sdio3 register not found: disabling SDR50 and DDR50 modes.\nConsider updating your dtb\n");
@@ -161,7 +164,6 @@ static int armada_38x_quirks(struct platform_device *pdev,
 	 * controller has different capabilities than the ones shown
 	 * in its registers
 	 */
-	host->caps = sdhci_readl(host, SDHCI_CAPABILITIES);
 	if (of_property_read_bool(np, "no-1-8-v")) {
 		host->caps &= ~SDHCI_CAN_VDD_180;
 		host->mmc->caps &= ~MMC_CAP_1_8V_DDR;
@@ -201,7 +203,7 @@ static void pxav3_reset(struct sdhci_host *host, u8 mask)
 static void pxav3_gen_init_74_clocks(struct sdhci_host *host, u8 power_mode)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_pxa *pxa = pltfm_host->priv;
+	struct sdhci_pxa *pxa = sdhci_pltfm_priv(pltfm_host);
 	u16 tmp;
 	int count;
 
@@ -250,7 +252,7 @@ static void pxav3_gen_init_74_clocks(struct sdhci_host *host, u8 power_mode)
 static void pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_pxa *pxa = pltfm_host->priv;
+	struct sdhci_pxa *pxa = sdhci_pltfm_priv(pltfm_host);
 	u16 ctrl_2;
 
 	/*
@@ -370,16 +372,12 @@ static int sdhci_pxav3_probe(struct platform_device *pdev)
 	const struct of_device_id *match;
 	int ret;
 
-	pxa = devm_kzalloc(&pdev->dev, sizeof(struct sdhci_pxa), GFP_KERNEL);
-	if (!pxa)
-		return -ENOMEM;
-
-	host = sdhci_pltfm_init(pdev, &sdhci_pxav3_pdata, 0);
+	host = sdhci_pltfm_init(pdev, &sdhci_pxav3_pdata, sizeof(*pxa));
 	if (IS_ERR(host))
 		return PTR_ERR(host);
 
 	pltfm_host = sdhci_priv(host);
-	pltfm_host->priv = pxa;
+	pxa = sdhci_pltfm_priv(pltfm_host);
 
 	pxa->clk_io = devm_clk_get(dev, "io");
 	if (IS_ERR(pxa->clk_io))
@@ -486,7 +484,7 @@ static int sdhci_pxav3_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_pxa *pxa = pltfm_host->priv;
+	struct sdhci_pxa *pxa = sdhci_pltfm_priv(pltfm_host);
 
 	pm_runtime_get_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
@@ -535,7 +533,7 @@ static int sdhci_pxav3_runtime_suspend(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_pxa *pxa = pltfm_host->priv;
+	struct sdhci_pxa *pxa = sdhci_pltfm_priv(pltfm_host);
 	int ret;
 
 	ret = sdhci_runtime_suspend_host(host);
@@ -553,7 +551,7 @@ static int sdhci_pxav3_runtime_resume(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_pxa *pxa = pltfm_host->priv;
+	struct sdhci_pxa *pxa = sdhci_pltfm_priv(pltfm_host);
 
 	clk_prepare_enable(pxa->clk_io);
 	if (!IS_ERR(pxa->clk_core))
diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
index 969c2b0d57fd..320e1c2f8853 100644
--- a/drivers/mmc/host/sdhci-st.c
+++ b/drivers/mmc/host/sdhci-st.c
@@ -251,7 +251,7 @@ static int sdhci_st_set_dll_for_clock(struct sdhci_host *host)
 {
 	int ret = 0;
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct st_mmc_platform_data *pdata = pltfm_host->priv;
+	struct st_mmc_platform_data *pdata = sdhci_pltfm_priv(pltfm_host);
 
 	if (host->clock > CLK_TO_CHECK_DLL_LOCK) {
 		st_mmcss_set_dll(pdata->top_ioaddr);
@@ -265,7 +265,7 @@ static void sdhci_st_set_uhs_signaling(struct sdhci_host *host,
 					unsigned int uhs)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct st_mmc_platform_data *pdata = pltfm_host->priv;
+	struct st_mmc_platform_data *pdata = sdhci_pltfm_priv(pltfm_host);
 	u16 ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 	int ret = 0;
 
@@ -357,10 +357,7 @@ static int sdhci_st_probe(struct platform_device *pdev)
 	int ret = 0;
 	u16 host_version;
 	struct resource *res;
-
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return -ENOMEM;
+	struct reset_control *rstc;
 
 	clk =  devm_clk_get(&pdev->dev, "mmc");
 	if (IS_ERR(clk)) {
@@ -368,19 +365,23 @@ static int sdhci_st_probe(struct platform_device *pdev)
 		return PTR_ERR(clk);
 	}
 
-	pdata->rstc = devm_reset_control_get(&pdev->dev, NULL);
-	if (IS_ERR(pdata->rstc))
-		pdata->rstc = NULL;
+	rstc = devm_reset_control_get(&pdev->dev, NULL);
+	if (IS_ERR(rstc))
+		rstc = NULL;
 	else
-		reset_control_deassert(pdata->rstc);
+		reset_control_deassert(rstc);
 
-	host = sdhci_pltfm_init(pdev, &sdhci_st_pdata, 0);
+	host = sdhci_pltfm_init(pdev, &sdhci_st_pdata, sizeof(*pdata));
 	if (IS_ERR(host)) {
 		dev_err(&pdev->dev, "Failed sdhci_pltfm_init\n");
 		ret = PTR_ERR(host);
 		goto err_pltfm_init;
 	}
 
+	pltfm_host = sdhci_priv(host);
+	pdata = sdhci_pltfm_priv(pltfm_host);
+	pdata->rstc = rstc;
+
 	ret = mmc_of_parse(host->mmc);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed mmc_of_parse\n");
@@ -398,8 +399,6 @@ static int sdhci_st_probe(struct platform_device *pdev)
 		pdata->top_ioaddr = NULL;
 	}
 
-	pltfm_host = sdhci_priv(host);
-	pltfm_host->priv = pdata;
 	pltfm_host->clk = clk;
 
 	/* Configure the Arasan HC inside the flashSS */
@@ -427,8 +426,8 @@ err_out:
 err_of:
 	sdhci_pltfm_free(pdev);
 err_pltfm_init:
-	if (pdata->rstc)
-		reset_control_assert(pdata->rstc);
+	if (rstc)
+		reset_control_assert(rstc);
 
 	return ret;
 }
@@ -437,13 +436,14 @@ static int sdhci_st_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct st_mmc_platform_data *pdata = pltfm_host->priv;
+	struct st_mmc_platform_data *pdata = sdhci_pltfm_priv(pltfm_host);
+	struct reset_control *rstc = pdata->rstc;
 	int ret;
 
 	ret = sdhci_pltfm_unregister(pdev);
 
-	if (pdata->rstc)
-		reset_control_assert(pdata->rstc);
+	if (rstc)
+		reset_control_assert(rstc);
 
 	return ret;
 }
@@ -453,7 +453,7 @@ static int sdhci_st_suspend(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct st_mmc_platform_data *pdata = pltfm_host->priv;
+	struct st_mmc_platform_data *pdata = sdhci_pltfm_priv(pltfm_host);
 	int ret = sdhci_suspend_host(host);
 
 	if (ret)
@@ -471,7 +471,7 @@ static int sdhci_st_resume(struct device *dev)
 {
 	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct st_mmc_platform_data *pdata = pltfm_host->priv;
+	struct st_mmc_platform_data *pdata = sdhci_pltfm_priv(pltfm_host);
 	struct device_node *np = dev->of_node;
 
 	clk_prepare_enable(pltfm_host->clk);
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 83c4bf7bc16c..f8c4762bb48d 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -12,6 +12,7 @@
  *
  */
 
+#include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -42,12 +43,17 @@
 #define SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300	0x20
 #define SDHCI_MISC_CTRL_ENABLE_DDR50		0x200
 
+#define SDHCI_TEGRA_AUTO_CAL_CONFIG		0x1e4
+#define SDHCI_AUTO_CAL_START			BIT(31)
+#define SDHCI_AUTO_CAL_ENABLE			BIT(29)
+
 #define NVQUIRK_FORCE_SDHCI_SPEC_200	BIT(0)
 #define NVQUIRK_ENABLE_BLOCK_GAP_DET	BIT(1)
 #define NVQUIRK_ENABLE_SDHCI_SPEC_300	BIT(2)
 #define NVQUIRK_ENABLE_SDR50		BIT(3)
 #define NVQUIRK_ENABLE_SDR104		BIT(4)
 #define NVQUIRK_ENABLE_DDR50		BIT(5)
+#define NVQUIRK_HAS_PADCALIB		BIT(6)
 
 struct sdhci_tegra_soc_data {
 	const struct sdhci_pltfm_data *pdata;
@@ -58,12 +64,13 @@ struct sdhci_tegra {
 	const struct sdhci_tegra_soc_data *soc_data;
 	struct gpio_desc *power_gpio;
 	bool ddr_signaling;
+	bool pad_calib_required;
 };
 
 static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_tegra *tegra_host = pltfm_host->priv;
+	struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
 	const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
 
 	if (unlikely((soc_data->nvquirks & NVQUIRK_FORCE_SDHCI_SPEC_200) &&
@@ -99,7 +106,7 @@ static void tegra_sdhci_writew(struct sdhci_host *host, u16 val, int reg)
 static void tegra_sdhci_writel(struct sdhci_host *host, u32 val, int reg)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_tegra *tegra_host = pltfm_host->priv;
+	struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
 	const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
 
 	/* Seems like we're getting spurious timeout and crc errors, so
@@ -131,7 +138,7 @@ static unsigned int tegra_sdhci_get_ro(struct sdhci_host *host)
 static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_tegra *tegra_host = pltfm_host->priv;
+	struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
 	const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
 	u32 misc_ctrl, clk_ctrl;
 
@@ -147,10 +154,16 @@ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask)
 	/* Advertise UHS modes as supported by host */
 	if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR50)
 		misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR50;
+	else
+		misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_SDR50;
 	if (soc_data->nvquirks & NVQUIRK_ENABLE_DDR50)
 		misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_DDR50;
+	else
+		misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_DDR50;
 	if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR104)
 		misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR104;
+	else
+		misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_SDR104;
 	sdhci_writel(host, misc_ctrl, SDHCI_TEGRA_VENDOR_MISC_CTRL);
 
 	clk_ctrl = sdhci_readl(host, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
@@ -159,6 +172,9 @@ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask)
 		clk_ctrl |= SDHCI_CLOCK_CTRL_SDR50_TUNING_OVERRIDE;
 	sdhci_writel(host, clk_ctrl, SDHCI_TEGRA_VENDOR_CLOCK_CTRL);
 
+	if (soc_data->nvquirks & NVQUIRK_HAS_PADCALIB)
+		tegra_host->pad_calib_required = true;
+
 	tegra_host->ddr_signaling = false;
 }
 
@@ -181,27 +197,43 @@ static void tegra_sdhci_set_bus_width(struct sdhci_host *host, int bus_width)
 	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 }
 
+static void tegra_sdhci_pad_autocalib(struct sdhci_host *host)
+{
+	u32 val;
+
+	mdelay(1);
+
+	val = sdhci_readl(host, SDHCI_TEGRA_AUTO_CAL_CONFIG);
+	val |= SDHCI_AUTO_CAL_ENABLE | SDHCI_AUTO_CAL_START;
+	sdhci_writel(host,val, SDHCI_TEGRA_AUTO_CAL_CONFIG);
+}
+
 static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_tegra *tegra_host = pltfm_host->priv;
+	struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
 	unsigned long host_clk;
 
 	if (!clock)
-		return;
+		return sdhci_set_clock(host, clock);
 
 	host_clk = tegra_host->ddr_signaling ? clock * 2 : clock;
 	clk_set_rate(pltfm_host->clk, host_clk);
 	host->max_clk = clk_get_rate(pltfm_host->clk);
 
-	return sdhci_set_clock(host, clock);
+	sdhci_set_clock(host, clock);
+
+	if (tegra_host->pad_calib_required) {
+		tegra_sdhci_pad_autocalib(host);
+		tegra_host->pad_calib_required = false;
+	}
 }
 
 static void tegra_sdhci_set_uhs_signaling(struct sdhci_host *host,
 					  unsigned timing)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_tegra *tegra_host = pltfm_host->priv;
+	struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
 
 	if (timing == MMC_TIMING_UHS_DDR50)
 		tegra_host->ddr_signaling = true;
@@ -264,6 +296,16 @@ static int tegra_sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
 	return mmc_send_tuning(host->mmc, opcode, NULL);
 }
 
+static void tegra_sdhci_voltage_switch(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_tegra *tegra_host = sdhci_pltfm_priv(pltfm_host);
+	const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
+
+	if (soc_data->nvquirks & NVQUIRK_HAS_PADCALIB)
+		tegra_host->pad_calib_required = true;
+}
+
 static const struct sdhci_ops tegra_sdhci_ops = {
 	.get_ro     = tegra_sdhci_get_ro,
 	.read_w     = tegra_sdhci_readw,
@@ -273,6 +315,7 @@ static const struct sdhci_ops tegra_sdhci_ops = {
 	.reset      = tegra_sdhci_reset,
 	.platform_execute_tuning = tegra_sdhci_execute_tuning,
 	.set_uhs_signaling = tegra_sdhci_set_uhs_signaling,
+	.voltage_switch = tegra_sdhci_voltage_switch,
 	.get_max_clock = tegra_sdhci_get_max_clock,
 };
 
@@ -306,7 +349,8 @@ static const struct sdhci_tegra_soc_data soc_data_tegra30 = {
 	.pdata = &sdhci_tegra30_pdata,
 	.nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300 |
 		    NVQUIRK_ENABLE_SDR50 |
-		    NVQUIRK_ENABLE_SDR104,
+		    NVQUIRK_ENABLE_SDR104 |
+		    NVQUIRK_HAS_PADCALIB,
 };
 
 static const struct sdhci_ops tegra114_sdhci_ops = {
@@ -319,6 +363,7 @@ static const struct sdhci_ops tegra114_sdhci_ops = {
 	.reset      = tegra_sdhci_reset,
 	.platform_execute_tuning = tegra_sdhci_execute_tuning,
 	.set_uhs_signaling = tegra_sdhci_set_uhs_signaling,
+	.voltage_switch = tegra_sdhci_voltage_switch,
 	.get_max_clock = tegra_sdhci_get_max_clock,
 };
 
@@ -335,9 +380,14 @@ static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
 
 static const struct sdhci_tegra_soc_data soc_data_tegra114 = {
 	.pdata = &sdhci_tegra114_pdata,
+};
+
+static const struct sdhci_tegra_soc_data soc_data_tegra124 = {
+	.pdata = &sdhci_tegra114_pdata,
 	.nvquirks = NVQUIRK_ENABLE_SDR50 |
 		    NVQUIRK_ENABLE_DDR50 |
-		    NVQUIRK_ENABLE_SDR104,
+		    NVQUIRK_ENABLE_SDR104 |
+		    NVQUIRK_HAS_PADCALIB,
 };
 
 static const struct sdhci_pltfm_data sdhci_tegra210_pdata = {
@@ -357,7 +407,7 @@ static const struct sdhci_tegra_soc_data soc_data_tegra210 = {
 
 static const struct of_device_id sdhci_tegra_dt_match[] = {
 	{ .compatible = "nvidia,tegra210-sdhci", .data = &soc_data_tegra210 },
-	{ .compatible = "nvidia,tegra124-sdhci", .data = &soc_data_tegra114 },
+	{ .compatible = "nvidia,tegra124-sdhci", .data = &soc_data_tegra124 },
 	{ .compatible = "nvidia,tegra114-sdhci", .data = &soc_data_tegra114 },
 	{ .compatible = "nvidia,tegra30-sdhci", .data = &soc_data_tegra30 },
 	{ .compatible = "nvidia,tegra20-sdhci", .data = &soc_data_tegra20 },
@@ -380,20 +430,15 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
 		return -EINVAL;
 	soc_data = match->data;
 
-	host = sdhci_pltfm_init(pdev, soc_data->pdata, 0);
+	host = sdhci_pltfm_init(pdev, soc_data->pdata, sizeof(*tegra_host));
 	if (IS_ERR(host))
 		return PTR_ERR(host);
 	pltfm_host = sdhci_priv(host);
 
-	tegra_host = devm_kzalloc(&pdev->dev, sizeof(*tegra_host), GFP_KERNEL);
-	if (!tegra_host) {
-		dev_err(mmc_dev(host->mmc), "failed to allocate tegra_host\n");
-		rc = -ENOMEM;
-		goto err_alloc_tegra_host;
-	}
+	tegra_host = sdhci_pltfm_priv(pltfm_host);
 	tegra_host->ddr_signaling = false;
+	tegra_host->pad_calib_required = false;
 	tegra_host->soc_data = soc_data;
-	pltfm_host->priv = tegra_host;
 
 	rc = mmc_of_parse(host->mmc);
 	if (rc)
@@ -429,7 +474,6 @@ err_add_host:
 err_clk_get:
 err_power_req:
 err_parse_dt:
-err_alloc_tegra_host:
 	sdhci_pltfm_free(pdev);
 	return rc;
 }
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index add9fdfd1d8f..8670f162dec7 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -53,8 +53,6 @@ static void sdhci_finish_data(struct sdhci_host *);
 static void sdhci_finish_command(struct sdhci_host *);
 static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode);
 static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable);
-static int sdhci_pre_dma_transfer(struct sdhci_host *host,
-					struct mmc_data *data);
 static int sdhci_do_get_cd(struct sdhci_host *host);
 
 #ifdef CONFIG_PM
@@ -428,6 +426,31 @@ static void sdhci_transfer_pio(struct sdhci_host *host)
 	DBG("PIO transfer complete.\n");
 }
 
+static int sdhci_pre_dma_transfer(struct sdhci_host *host,
+				  struct mmc_data *data, int cookie)
+{
+	int sg_count;
+
+	/*
+	 * If the data buffers are already mapped, return the previous
+	 * dma_map_sg() result.
+	 */
+	if (data->host_cookie == COOKIE_PRE_MAPPED)
+		return data->sg_count;
+
+	sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+				data->flags & MMC_DATA_WRITE ?
+				DMA_TO_DEVICE : DMA_FROM_DEVICE);
+
+	if (sg_count == 0)
+		return -ENOSPC;
+
+	data->sg_count = sg_count;
+	data->host_cookie = cookie;
+
+	return sg_count;
+}
+
 static char *sdhci_kmap_atomic(struct scatterlist *sg, unsigned long *flags)
 {
 	local_irq_save(*flags);
@@ -462,41 +485,22 @@ static void sdhci_adma_mark_end(void *desc)
 	dma_desc->cmd |= cpu_to_le16(ADMA2_END);
 }
 
-static int sdhci_adma_table_pre(struct sdhci_host *host,
-	struct mmc_data *data)
+static void sdhci_adma_table_pre(struct sdhci_host *host,
+	struct mmc_data *data, int sg_count)
 {
-	int direction;
-
-	void *desc;
-	void *align;
-	dma_addr_t addr;
-	dma_addr_t align_addr;
-	int len, offset;
-
 	struct scatterlist *sg;
-	int i;
-	char *buffer;
 	unsigned long flags;
+	dma_addr_t addr, align_addr;
+	void *desc, *align;
+	char *buffer;
+	int len, offset, i;
 
 	/*
 	 * The spec does not specify endianness of descriptor table.
 	 * We currently guess that it is LE.
 	 */
 
-	if (data->flags & MMC_DATA_READ)
-		direction = DMA_FROM_DEVICE;
-	else
-		direction = DMA_TO_DEVICE;
-
-	host->align_addr = dma_map_single(mmc_dev(host->mmc),
-		host->align_buffer, host->align_buffer_sz, direction);
-	if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr))
-		goto fail;
-	BUG_ON(host->align_addr & SDHCI_ADMA2_MASK);
-
-	host->sg_count = sdhci_pre_dma_transfer(host, data);
-	if (host->sg_count < 0)
-		goto unmap_align;
+	host->sg_count = sg_count;
 
 	desc = host->adma_table;
 	align = host->align_buffer;
@@ -508,10 +512,9 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 		len = sg_dma_len(sg);
 
 		/*
-		 * The SDHCI specification states that ADMA
-		 * addresses must be 32-bit aligned. If they
-		 * aren't, then we use a bounce buffer for
-		 * the (up to three) bytes that screw up the
+		 * The SDHCI specification states that ADMA addresses must
+		 * be 32-bit aligned. If they aren't, then we use a bounce
+		 * buffer for the (up to three) bytes that screw up the
 		 * alignment.
 		 */
 		offset = (SDHCI_ADMA2_ALIGN - (addr & SDHCI_ADMA2_MASK)) &
@@ -555,92 +558,56 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 	}
 
 	if (host->quirks & SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC) {
-		/*
-		* Mark the last descriptor as the terminating descriptor
-		*/
+		/* Mark the last descriptor as the terminating descriptor */
 		if (desc != host->adma_table) {
 			desc -= host->desc_sz;
 			sdhci_adma_mark_end(desc);
 		}
 	} else {
-		/*
-		* Add a terminating entry.
-		*/
-
-		/* nop, end, valid */
+		/* Add a terminating entry - nop, end, valid */
 		sdhci_adma_write_desc(host, desc, 0, 0, ADMA2_NOP_END_VALID);
 	}
-
-	/*
-	 * Resync align buffer as we might have changed it.
-	 */
-	if (data->flags & MMC_DATA_WRITE) {
-		dma_sync_single_for_device(mmc_dev(host->mmc),
-			host->align_addr, host->align_buffer_sz, direction);
-	}
-
-	return 0;
-
-unmap_align:
-	dma_unmap_single(mmc_dev(host->mmc), host->align_addr,
-		host->align_buffer_sz, direction);
-fail:
-	return -EINVAL;
 }
 
 static void sdhci_adma_table_post(struct sdhci_host *host,
 	struct mmc_data *data)
 {
-	int direction;
-
 	struct scatterlist *sg;
 	int i, size;
 	void *align;
 	char *buffer;
 	unsigned long flags;
-	bool has_unaligned;
-
-	if (data->flags & MMC_DATA_READ)
-		direction = DMA_FROM_DEVICE;
-	else
-		direction = DMA_TO_DEVICE;
 
-	dma_unmap_single(mmc_dev(host->mmc), host->align_addr,
-		host->align_buffer_sz, direction);
+	if (data->flags & MMC_DATA_READ) {
+		bool has_unaligned = false;
 
-	/* Do a quick scan of the SG list for any unaligned mappings */
-	has_unaligned = false;
-	for_each_sg(data->sg, sg, host->sg_count, i)
-		if (sg_dma_address(sg) & SDHCI_ADMA2_MASK) {
-			has_unaligned = true;
-			break;
-		}
+		/* Do a quick scan of the SG list for any unaligned mappings */
+		for_each_sg(data->sg, sg, host->sg_count, i)
+			if (sg_dma_address(sg) & SDHCI_ADMA2_MASK) {
+				has_unaligned = true;
+				break;
+			}
 
-	if (has_unaligned && data->flags & MMC_DATA_READ) {
-		dma_sync_sg_for_cpu(mmc_dev(host->mmc), data->sg,
-			data->sg_len, direction);
+		if (has_unaligned) {
+			dma_sync_sg_for_cpu(mmc_dev(host->mmc), data->sg,
+					    data->sg_len, DMA_FROM_DEVICE);
 
-		align = host->align_buffer;
+			align = host->align_buffer;
 
-		for_each_sg(data->sg, sg, host->sg_count, i) {
-			if (sg_dma_address(sg) & SDHCI_ADMA2_MASK) {
-				size = SDHCI_ADMA2_ALIGN -
-				       (sg_dma_address(sg) & SDHCI_ADMA2_MASK);
+			for_each_sg(data->sg, sg, host->sg_count, i) {
+				if (sg_dma_address(sg) & SDHCI_ADMA2_MASK) {
+					size = SDHCI_ADMA2_ALIGN -
+					       (sg_dma_address(sg) & SDHCI_ADMA2_MASK);
 
-				buffer = sdhci_kmap_atomic(sg, &flags);
-				memcpy(buffer, align, size);
-				sdhci_kunmap_atomic(buffer, &flags);
+					buffer = sdhci_kmap_atomic(sg, &flags);
+					memcpy(buffer, align, size);
+					sdhci_kunmap_atomic(buffer, &flags);
 
-				align += SDHCI_ADMA2_ALIGN;
+					align += SDHCI_ADMA2_ALIGN;
+				}
 			}
 		}
 	}
-
-	if (data->host_cookie == COOKIE_MAPPED) {
-		dma_unmap_sg(mmc_dev(host->mmc), data->sg,
-			data->sg_len, direction);
-		data->host_cookie = COOKIE_UNMAPPED;
-	}
 }
 
 static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
@@ -666,9 +633,20 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
 	if (!data)
 		target_timeout = cmd->busy_timeout * 1000;
 	else {
-		target_timeout = data->timeout_ns / 1000;
-		if (host->clock)
-			target_timeout += data->timeout_clks / host->clock;
+		target_timeout = DIV_ROUND_UP(data->timeout_ns, 1000);
+		if (host->clock && data->timeout_clks) {
+			unsigned long long val;
+
+			/*
+			 * data->timeout_clks is in units of clock cycles.
+			 * host->clock is in Hz.  target_timeout is in us.
+			 * Hence, us = 1000000 * cycles / Hz.  Round up.
+			 */
+			val = 1000000 * data->timeout_clks;
+			if (do_div(val, host->clock))
+				target_timeout++;
+			target_timeout += val;
+		}
 	}
 
 	/*
@@ -729,7 +707,6 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
 {
 	u8 ctrl;
 	struct mmc_data *data = cmd->data;
-	int ret;
 
 	WARN_ON(host->data);
 
@@ -748,63 +725,48 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
 	host->data_early = 0;
 	host->data->bytes_xfered = 0;
 
-	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA))
-		host->flags |= SDHCI_REQ_USE_DMA;
-
-	/*
-	 * FIXME: This doesn't account for merging when mapping the
-	 * scatterlist.
-	 */
-	if (host->flags & SDHCI_REQ_USE_DMA) {
-		int broken, i;
+	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
 		struct scatterlist *sg;
+		unsigned int length_mask, offset_mask;
+		int i;
+
+		host->flags |= SDHCI_REQ_USE_DMA;
 
-		broken = 0;
+		/*
+		 * FIXME: This doesn't account for merging when mapping the
+		 * scatterlist.
+		 *
+		 * The assumption here being that alignment and lengths are
+		 * the same after DMA mapping to device address space.
+		 */
+		length_mask = 0;
+		offset_mask = 0;
 		if (host->flags & SDHCI_USE_ADMA) {
-			if (host->quirks & SDHCI_QUIRK_32BIT_ADMA_SIZE)
-				broken = 1;
+			if (host->quirks & SDHCI_QUIRK_32BIT_ADMA_SIZE) {
+				length_mask = 3;
+				/*
+				 * As we use up to 3 byte chunks to work
+				 * around alignment problems, we need to
+				 * check the offset as well.
+				 */
+				offset_mask = 3;
+			}
 		} else {
 			if (host->quirks & SDHCI_QUIRK_32BIT_DMA_SIZE)
-				broken = 1;
+				length_mask = 3;
+			if (host->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR)
+				offset_mask = 3;
 		}
 
-		if (unlikely(broken)) {
+		if (unlikely(length_mask | offset_mask)) {
 			for_each_sg(data->sg, sg, data->sg_len, i) {
-				if (sg->length & 0x3) {
+				if (sg->length & length_mask) {
 					DBG("Reverting to PIO because of transfer size (%d)\n",
-						sg->length);
+					    sg->length);
 					host->flags &= ~SDHCI_REQ_USE_DMA;
 					break;
 				}
-			}
-		}
-	}
-
-	/*
-	 * The assumption here being that alignment is the same after
-	 * translation to device address space.
-	 */
-	if (host->flags & SDHCI_REQ_USE_DMA) {
-		int broken, i;
-		struct scatterlist *sg;
-
-		broken = 0;
-		if (host->flags & SDHCI_USE_ADMA) {
-			/*
-			 * As we use 3 byte chunks to work around
-			 * alignment problems, we need to check this
-			 * quirk.
-			 */
-			if (host->quirks & SDHCI_QUIRK_32BIT_ADMA_SIZE)
-				broken = 1;
-		} else {
-			if (host->quirks & SDHCI_QUIRK_32BIT_DMA_ADDR)
-				broken = 1;
-		}
-
-		if (unlikely(broken)) {
-			for_each_sg(data->sg, sg, data->sg_len, i) {
-				if (sg->offset & 0x3) {
+				if (sg->offset & offset_mask) {
 					DBG("Reverting to PIO because of bad alignment\n");
 					host->flags &= ~SDHCI_REQ_USE_DMA;
 					break;
@@ -814,39 +776,27 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
 	}
 
 	if (host->flags & SDHCI_REQ_USE_DMA) {
-		if (host->flags & SDHCI_USE_ADMA) {
-			ret = sdhci_adma_table_pre(host, data);
-			if (ret) {
-				/*
-				 * This only happens when someone fed
-				 * us an invalid request.
-				 */
-				WARN_ON(1);
-				host->flags &= ~SDHCI_REQ_USE_DMA;
-			} else {
-				sdhci_writel(host, host->adma_addr,
-					SDHCI_ADMA_ADDRESS);
-				if (host->flags & SDHCI_USE_64_BIT_DMA)
-					sdhci_writel(host,
-						     (u64)host->adma_addr >> 32,
-						     SDHCI_ADMA_ADDRESS_HI);
-			}
-		} else {
-			int sg_cnt;
+		int sg_cnt = sdhci_pre_dma_transfer(host, data, COOKIE_MAPPED);
 
-			sg_cnt = sdhci_pre_dma_transfer(host, data);
-			if (sg_cnt <= 0) {
-				/*
-				 * This only happens when someone fed
-				 * us an invalid request.
-				 */
-				WARN_ON(1);
-				host->flags &= ~SDHCI_REQ_USE_DMA;
-			} else {
-				WARN_ON(sg_cnt != 1);
-				sdhci_writel(host, sg_dma_address(data->sg),
-					SDHCI_DMA_ADDRESS);
-			}
+		if (sg_cnt <= 0) {
+			/*
+			 * This only happens when someone fed
+			 * us an invalid request.
+			 */
+			WARN_ON(1);
+			host->flags &= ~SDHCI_REQ_USE_DMA;
+		} else if (host->flags & SDHCI_USE_ADMA) {
+			sdhci_adma_table_pre(host, data, sg_cnt);
+
+			sdhci_writel(host, host->adma_addr, SDHCI_ADMA_ADDRESS);
+			if (host->flags & SDHCI_USE_64_BIT_DMA)
+				sdhci_writel(host,
+					     (u64)host->adma_addr >> 32,
+					     SDHCI_ADMA_ADDRESS_HI);
+		} else {
+			WARN_ON(sg_cnt != 1);
+			sdhci_writel(host, sg_dma_address(data->sg),
+				SDHCI_DMA_ADDRESS);
 		}
 	}
 
@@ -946,19 +896,9 @@ static void sdhci_finish_data(struct sdhci_host *host)
 	data = host->data;
 	host->data = NULL;
 
-	if (host->flags & SDHCI_REQ_USE_DMA) {
-		if (host->flags & SDHCI_USE_ADMA)
-			sdhci_adma_table_post(host, data);
-		else {
-			if (data->host_cookie == COOKIE_MAPPED) {
-				dma_unmap_sg(mmc_dev(host->mmc),
-					data->sg, data->sg_len,
-					(data->flags & MMC_DATA_READ) ?
-					DMA_FROM_DEVICE : DMA_TO_DEVICE);
-				data->host_cookie = COOKIE_UNMAPPED;
-			}
-		}
-	}
+	if ((host->flags & (SDHCI_REQ_USE_DMA | SDHCI_USE_ADMA)) ==
+	    (SDHCI_REQ_USE_DMA | SDHCI_USE_ADMA))
+		sdhci_adma_table_post(host, data);
 
 	/*
 	 * The specification states that the block count register must
@@ -1003,6 +943,9 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 
 	WARN_ON(host->cmd);
 
+	/* Initially, a command has no error */
+	cmd->error = 0;
+
 	/* Wait max 10 ms */
 	timeout = 10;
 
@@ -1097,8 +1040,6 @@ static void sdhci_finish_command(struct sdhci_host *host)
 		}
 	}
 
-	host->cmd->error = 0;
-
 	/* Finished CMD23, now send actual command. */
 	if (host->cmd == host->mrq->sbc) {
 		host->cmd = NULL;
@@ -2114,39 +2055,12 @@ static void sdhci_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
 	struct sdhci_host *host = mmc_priv(mmc);
 	struct mmc_data *data = mrq->data;
 
-	if (host->flags & SDHCI_REQ_USE_DMA) {
-		if (data->host_cookie == COOKIE_GIVEN ||
-				data->host_cookie == COOKIE_MAPPED)
-			dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-					 data->flags & MMC_DATA_WRITE ?
-					 DMA_TO_DEVICE : DMA_FROM_DEVICE);
-		data->host_cookie = COOKIE_UNMAPPED;
-	}
-}
-
-static int sdhci_pre_dma_transfer(struct sdhci_host *host,
-				       struct mmc_data *data)
-{
-	int sg_count;
-
-	if (data->host_cookie == COOKIE_MAPPED) {
-		data->host_cookie = COOKIE_GIVEN;
-		return data->sg_count;
-	}
+	if (data->host_cookie != COOKIE_UNMAPPED)
+		dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+			     data->flags & MMC_DATA_WRITE ?
+			       DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
-	WARN_ON(data->host_cookie == COOKIE_GIVEN);
-
-	sg_count = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-				data->flags & MMC_DATA_WRITE ?
-				DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
-	if (sg_count == 0)
-		return -ENOSPC;
-
-	data->sg_count = sg_count;
-	data->host_cookie = COOKIE_MAPPED;
-
-	return sg_count;
+	data->host_cookie = COOKIE_UNMAPPED;
 }
 
 static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
@@ -2157,7 +2071,7 @@ static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
 	mrq->data->host_cookie = COOKIE_UNMAPPED;
 
 	if (host->flags & SDHCI_REQ_USE_DMA)
-		sdhci_pre_dma_transfer(host, mrq->data);
+		sdhci_pre_dma_transfer(host, mrq->data, COOKIE_PRE_MAPPED);
 }
 
 static void sdhci_card_event(struct mmc_host *mmc)
@@ -2238,6 +2152,22 @@ static void sdhci_tasklet_finish(unsigned long param)
 	mrq = host->mrq;
 
 	/*
+	 * Always unmap the data buffers if they were mapped by
+	 * sdhci_prepare_data() whenever we finish with a request.
+	 * This avoids leaking DMA mappings on error.
+	 */
+	if (host->flags & SDHCI_REQ_USE_DMA) {
+		struct mmc_data *data = mrq->data;
+
+		if (data && data->host_cookie == COOKIE_MAPPED) {
+			dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+				     (data->flags & MMC_DATA_READ) ?
+				     DMA_FROM_DEVICE : DMA_TO_DEVICE);
+			data->host_cookie = COOKIE_UNMAPPED;
+		}
+	}
+
+	/*
 	 * The controller needs a reset of internal state machines
 	 * upon error conditions.
 	 */
@@ -2322,13 +2252,30 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *mask)
 		return;
 	}
 
-	if (intmask & SDHCI_INT_TIMEOUT)
-		host->cmd->error = -ETIMEDOUT;
-	else if (intmask & (SDHCI_INT_CRC | SDHCI_INT_END_BIT |
-			SDHCI_INT_INDEX))
-		host->cmd->error = -EILSEQ;
+	if (intmask & (SDHCI_INT_TIMEOUT | SDHCI_INT_CRC |
+		       SDHCI_INT_END_BIT | SDHCI_INT_INDEX)) {
+		if (intmask & SDHCI_INT_TIMEOUT)
+			host->cmd->error = -ETIMEDOUT;
+		else
+			host->cmd->error = -EILSEQ;
+
+		/*
+		 * If this command initiates a data phase and a response
+		 * CRC error is signalled, the card can start transferring
+		 * data - the card may have received the command without
+		 * error.  We must not terminate the mmc_request early.
+		 *
+		 * If the card did not receive the command or returned an
+		 * error which prevented it sending data, the data phase
+		 * will time out.
+		 */
+		if (host->cmd->data &&
+		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
+		     SDHCI_INT_CRC) {
+			host->cmd = NULL;
+			return;
+		}
 
-	if (host->cmd->error) {
 		tasklet_schedule(&host->finish_tasklet);
 		return;
 	}
@@ -2857,6 +2804,36 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev,
 
 EXPORT_SYMBOL_GPL(sdhci_alloc_host);
 
+static int sdhci_set_dma_mask(struct sdhci_host *host)
+{
+	struct mmc_host *mmc = host->mmc;
+	struct device *dev = mmc_dev(mmc);
+	int ret = -EINVAL;
+
+	if (host->quirks2 & SDHCI_QUIRK2_BROKEN_64_BIT_DMA)
+		host->flags &= ~SDHCI_USE_64_BIT_DMA;
+
+	/* Try 64-bit mask if hardware is capable  of it */
+	if (host->flags & SDHCI_USE_64_BIT_DMA) {
+		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+		if (ret) {
+			pr_warn("%s: Failed to set 64-bit DMA mask.\n",
+				mmc_hostname(mmc));
+			host->flags &= ~SDHCI_USE_64_BIT_DMA;
+		}
+	}
+
+	/* 32-bit mask as default & fallback */
+	if (ret) {
+		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+		if (ret)
+			pr_warn("%s: Failed to set 32-bit DMA mask.\n",
+				mmc_hostname(mmc));
+	}
+
+	return ret;
+}
+
 int sdhci_add_host(struct sdhci_host *host)
 {
 	struct mmc_host *mmc;
@@ -2928,17 +2905,21 @@ int sdhci_add_host(struct sdhci_host *host)
 	 * SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to
 	 * implement.
 	 */
-	if (sdhci_readl(host, SDHCI_CAPABILITIES) & SDHCI_CAN_64BIT)
+	if (caps[0] & SDHCI_CAN_64BIT)
 		host->flags |= SDHCI_USE_64_BIT_DMA;
 
 	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
-		if (host->ops->enable_dma) {
-			if (host->ops->enable_dma(host)) {
-				pr_warn("%s: No suitable DMA available - falling back to PIO\n",
-					mmc_hostname(mmc));
-				host->flags &=
-					~(SDHCI_USE_SDMA | SDHCI_USE_ADMA);
-			}
+		ret = sdhci_set_dma_mask(host);
+
+		if (!ret && host->ops->enable_dma)
+			ret = host->ops->enable_dma(host);
+
+		if (ret) {
+			pr_warn("%s: No suitable DMA available - falling back to PIO\n",
+				mmc_hostname(mmc));
+			host->flags &= ~(SDHCI_USE_SDMA | SDHCI_USE_ADMA);
+
+			ret = 0;
 		}
 	}
 
@@ -2947,6 +2928,9 @@ int sdhci_add_host(struct sdhci_host *host)
 		host->flags &= ~SDHCI_USE_SDMA;
 
 	if (host->flags & SDHCI_USE_ADMA) {
+		dma_addr_t dma;
+		void *buf;
+
 		/*
 		 * The DMA descriptor table size is calculated as the maximum
 		 * number of segments times 2, to allow for an alignment
@@ -2962,33 +2946,27 @@ int sdhci_add_host(struct sdhci_host *host)
 					      SDHCI_ADMA2_32_DESC_SZ;
 			host->desc_sz = SDHCI_ADMA2_32_DESC_SZ;
 		}
-		host->adma_table = dma_alloc_coherent(mmc_dev(mmc),
-						      host->adma_table_sz,
-						      &host->adma_addr,
-						      GFP_KERNEL);
+
 		host->align_buffer_sz = SDHCI_MAX_SEGS * SDHCI_ADMA2_ALIGN;
-		host->align_buffer = kmalloc(host->align_buffer_sz, GFP_KERNEL);
-		if (!host->adma_table || !host->align_buffer) {
-			if (host->adma_table)
-				dma_free_coherent(mmc_dev(mmc),
-						  host->adma_table_sz,
-						  host->adma_table,
-						  host->adma_addr);
-			kfree(host->align_buffer);
+		buf = dma_alloc_coherent(mmc_dev(mmc), host->align_buffer_sz +
+					 host->adma_table_sz, &dma, GFP_KERNEL);
+		if (!buf) {
 			pr_warn("%s: Unable to allocate ADMA buffers - falling back to standard DMA\n",
 				mmc_hostname(mmc));
 			host->flags &= ~SDHCI_USE_ADMA;
-			host->adma_table = NULL;
-			host->align_buffer = NULL;
-		} else if (host->adma_addr & (SDHCI_ADMA2_DESC_ALIGN - 1)) {
+		} else if ((dma + host->align_buffer_sz) &
+			   (SDHCI_ADMA2_DESC_ALIGN - 1)) {
 			pr_warn("%s: unable to allocate aligned ADMA descriptor\n",
 				mmc_hostname(mmc));
 			host->flags &= ~SDHCI_USE_ADMA;
-			dma_free_coherent(mmc_dev(mmc), host->adma_table_sz,
-					  host->adma_table, host->adma_addr);
-			kfree(host->align_buffer);
-			host->adma_table = NULL;
-			host->align_buffer = NULL;
+			dma_free_coherent(mmc_dev(mmc), host->align_buffer_sz +
+					  host->adma_table_sz, buf, dma);
+		} else {
+			host->align_buffer = buf;
+			host->align_addr = dma;
+
+			host->adma_table = buf + host->align_buffer_sz;
+			host->adma_addr = dma + host->align_buffer_sz;
 		}
 	}
 
@@ -3072,14 +3050,14 @@ int sdhci_add_host(struct sdhci_host *host)
 		if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT)
 			host->timeout_clk *= 1000;
 
+		if (override_timeout_clk)
+			host->timeout_clk = override_timeout_clk;
+
 		mmc->max_busy_timeout = host->ops->get_max_timeout_count ?
 			host->ops->get_max_timeout_count(host) : 1 << 27;
 		mmc->max_busy_timeout /= host->timeout_clk;
 	}
 
-	if (override_timeout_clk)
-		host->timeout_clk = override_timeout_clk;
-
 	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
 	mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
 
@@ -3449,10 +3427,10 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 	if (!IS_ERR(mmc->supply.vqmmc))
 		regulator_disable(mmc->supply.vqmmc);
 
-	if (host->adma_table)
-		dma_free_coherent(mmc_dev(mmc), host->adma_table_sz,
-				  host->adma_table, host->adma_addr);
-	kfree(host->align_buffer);
+	if (host->align_buffer)
+		dma_free_coherent(mmc_dev(mmc), host->align_buffer_sz +
+				  host->adma_table_sz, host->align_buffer,
+				  host->align_addr);
 
 	host->adma_table = NULL;
 	host->align_buffer = NULL;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 0115e9907bf8..3bd28033dbd9 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -316,8 +316,8 @@ struct sdhci_adma2_64_desc {
 
 enum sdhci_cookie {
 	COOKIE_UNMAPPED,
-	COOKIE_MAPPED,
-	COOKIE_GIVEN,
+	COOKIE_PRE_MAPPED,	/* mapped by sdhci_pre_req() */
+	COOKIE_MAPPED,		/* mapped by sdhci_prepare_data() */
 };
 
 struct sdhci_host {
diff --git a/drivers/mmc/host/sdricoh_cs.c b/drivers/mmc/host/sdricoh_cs.c
index b7e305775314..5ff26ab81eb1 100644
--- a/drivers/mmc/host/sdricoh_cs.c
+++ b/drivers/mmc/host/sdricoh_cs.c
@@ -398,10 +398,10 @@ static struct mmc_host_ops sdricoh_ops = {
 static int sdricoh_init_mmc(struct pci_dev *pci_dev,
 			    struct pcmcia_device *pcmcia_dev)
 {
-	int result = 0;
-	void __iomem *iobase = NULL;
-	struct mmc_host *mmc = NULL;
-	struct sdricoh_host *host = NULL;
+	int result;
+	void __iomem *iobase;
+	struct mmc_host *mmc;
+	struct sdricoh_host *host;
 	struct device *dev = &pcmcia_dev->dev;
 	/* map iomem */
 	if (pci_resource_len(pci_dev, SDRICOH_PCI_REGION) !=
@@ -419,7 +419,7 @@ static int sdricoh_init_mmc(struct pci_dev *pci_dev,
 	if (readl(iobase + R104_VERSION) != 0x4000) {
 		dev_dbg(dev, "no supported mmc controller found\n");
 		result = -ENODEV;
-		goto err;
+		goto unmap_io;
 	}
 	/* allocate privdata */
 	mmc = pcmcia_dev->priv =
@@ -427,7 +427,7 @@ static int sdricoh_init_mmc(struct pci_dev *pci_dev,
 	if (!mmc) {
 		dev_err(dev, "mmc_alloc_host failed\n");
 		result = -ENOMEM;
-		goto err;
+		goto unmap_io;
 	}
 	host = mmc_priv(mmc);
 
@@ -451,8 +451,7 @@ static int sdricoh_init_mmc(struct pci_dev *pci_dev,
 	if (sdricoh_reset(host)) {
 		dev_dbg(dev, "could not reset\n");
 		result = -EIO;
-		goto err;
-
+		goto free_host;
 	}
 
 	result = mmc_add_host(mmc);
@@ -461,13 +460,10 @@ static int sdricoh_init_mmc(struct pci_dev *pci_dev,
 		dev_dbg(dev, "mmc host registered\n");
 		return 0;
 	}
-
-err:
-	if (iobase)
-		pci_iounmap(pci_dev, iobase);
-	if (mmc)
-		mmc_free_host(mmc);
-
+free_host:
+	mmc_free_host(mmc);
+unmap_io:
+	pci_iounmap(pci_dev, iobase);
 	return result;
 }
 
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 6234eab38ff3..8d870ce9f944 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -1395,7 +1395,7 @@ static irqreturn_t sh_mmcif_intr(int irq, void *dev_id)
 
 static void sh_mmcif_timeout_work(struct work_struct *work)
 {
-	struct delayed_work *d = container_of(work, struct delayed_work, work);
+	struct delayed_work *d = to_delayed_work(work);
 	struct sh_mmcif_host *host = container_of(d, struct sh_mmcif_host, timeout_work);
 	struct mmc_request *mrq = host->mrq;
 	struct device *dev = sh_mmcif_host_to_dev(host);
diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index 354f4f335ed5..9aa147959276 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -1,6 +1,8 @@
 /*
  * SuperH Mobile SDHI
  *
+ * Copyright (C) 2016 Sang Engineering, Wolfram Sang
+ * Copyright (C) 2015-16 Renesas Electronics Corporation
  * Copyright (C) 2009 Magnus Damm
  *
  * This program is free software; you can redistribute it and/or modify
@@ -43,6 +45,7 @@ struct sh_mobile_sdhi_of_data {
 	unsigned long capabilities2;
 	enum dma_slave_buswidth dma_buswidth;
 	dma_addr_t dma_rx_offset;
+	unsigned bus_shift;
 };
 
 static const struct sh_mobile_sdhi_of_data sh_mobile_sdhi_of_cfg[] = {
@@ -59,12 +62,19 @@ static const struct sh_mobile_sdhi_of_data of_rcar_gen1_compatible = {
 
 static const struct sh_mobile_sdhi_of_data of_rcar_gen2_compatible = {
 	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE |
-			  TMIO_MMC_CLK_ACTUAL,
+			  TMIO_MMC_CLK_ACTUAL | TMIO_MMC_FAST_CLK_CHG,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
 	.dma_buswidth	= DMA_SLAVE_BUSWIDTH_4_BYTES,
 	.dma_rx_offset	= 0x2000,
 };
 
+static const struct sh_mobile_sdhi_of_data of_rcar_gen3_compatible = {
+	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE |
+			  TMIO_MMC_CLK_ACTUAL | TMIO_MMC_FAST_CLK_CHG,
+	.capabilities	= MMC_CAP_SD_HIGHSPEED,
+	.bus_shift	= 2,
+};
+
 static const struct of_device_id sh_mobile_sdhi_of_match[] = {
 	{ .compatible = "renesas,sdhi-shmobile" },
 	{ .compatible = "renesas,sdhi-sh7372" },
@@ -78,6 +88,7 @@ static const struct of_device_id sh_mobile_sdhi_of_match[] = {
 	{ .compatible = "renesas,sdhi-r8a7792", .data = &of_rcar_gen2_compatible, },
 	{ .compatible = "renesas,sdhi-r8a7793", .data = &of_rcar_gen2_compatible, },
 	{ .compatible = "renesas,sdhi-r8a7794", .data = &of_rcar_gen2_compatible, },
+	{ .compatible = "renesas,sdhi-r8a7795", .data = &of_rcar_gen3_compatible, },
 	{},
 };
 MODULE_DEVICE_TABLE(of, sh_mobile_sdhi_of_match);
@@ -103,6 +114,15 @@ static void sh_mobile_sdhi_sdbuf_width(struct tmio_mmc_host *host, int width)
 	case 0xCB0D:
 		val = (width == 32) ? 0x0000 : 0x0001;
 		break;
+	case 0xCC10: /* Gen3, SD only */
+	case 0xCD10: /* Gen3, SD + MMC */
+		if (width == 64)
+			val = 0x0000;
+		else if (width == 32)
+			val = 0x0101;
+		else
+			val = 0x0001;
+		break;
 	default:
 		/* nothing to do */
 		return;
@@ -163,6 +183,7 @@ static int sh_mobile_sdhi_write16_hook(struct tmio_mmc_host *host, int addr)
 	case CTL_SD_MEM_CARD_OPT:
 	case CTL_TRANSACTION_CTL:
 	case CTL_DMA_ENABLE:
+	case EXT_ACC:
 		return sh_mobile_sdhi_wait_idle(host);
 	}
 
@@ -213,10 +234,8 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 		return -EINVAL;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(struct sh_mobile_sdhi), GFP_KERNEL);
-	if (priv == NULL) {
-		dev_err(&pdev->dev, "kzalloc failed\n");
+	if (!priv)
 		return -ENOMEM;
-	}
 
 	mmc_data = &priv->mmc_data;
 	dma_priv = &priv->dma_priv;
@@ -234,16 +253,26 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 		goto eprobe;
 	}
 
+	if (of_id && of_id->data) {
+		const struct sh_mobile_sdhi_of_data *of_data = of_id->data;
+
+		mmc_data->flags |= of_data->tmio_flags;
+		mmc_data->capabilities |= of_data->capabilities;
+		mmc_data->capabilities2 |= of_data->capabilities2;
+		mmc_data->dma_rx_offset = of_data->dma_rx_offset;
+		dma_priv->dma_buswidth = of_data->dma_buswidth;
+		host->bus_shift = of_data->bus_shift;
+	}
+
 	host->dma		= dma_priv;
 	host->write16_hook	= sh_mobile_sdhi_write16_hook;
 	host->clk_enable	= sh_mobile_sdhi_clk_enable;
 	host->clk_disable	= sh_mobile_sdhi_clk_disable;
 	host->multi_io_quirk	= sh_mobile_sdhi_multi_io_quirk;
-	/* SD control register space size is 0x100, 0x200 for bus_shift=1 */
-	if (resource_size(res) > 0x100)
+
+	/* Orginally registers were 16 bit apart, could be 32 or 64 nowadays */
+	if (!host->bus_shift && resource_size(res) > 0x100) /* old way to determine the shift */
 		host->bus_shift = 1;
-	else
-		host->bus_shift = 0;
 
 	if (mmd)
 		*mmc_data = *mmd;
@@ -275,15 +304,6 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 	 */
 	mmc_data->flags |= TMIO_MMC_SDIO_STATUS_QUIRK;
 
-	if (of_id && of_id->data) {
-		const struct sh_mobile_sdhi_of_data *of_data = of_id->data;
-		mmc_data->flags |= of_data->tmio_flags;
-		mmc_data->capabilities |= of_data->capabilities;
-		mmc_data->capabilities2 |= of_data->capabilities2;
-		mmc_data->dma_rx_offset = of_data->dma_rx_offset;
-		dma_priv->dma_buswidth = of_data->dma_buswidth;
-	}
-
 	ret = tmio_mmc_host_probe(host, mmc_data);
 	if (ret < 0)
 		goto efree;
diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index 83de82bceafc..8372a413848c 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -28,6 +28,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/reset.h>
+#include <linux/regulator/consumer.h>
 
 #include <linux/of_address.h>
 #include <linux/of_gpio.h>
@@ -214,6 +215,7 @@
 #define SDXC_CLK_25M		1
 #define SDXC_CLK_50M		2
 #define SDXC_CLK_50M_DDR	3
+#define SDXC_CLK_50M_DDR_8BIT	4
 
 struct sunxi_mmc_clk_delay {
 	u32 output;
@@ -256,6 +258,9 @@ struct sunxi_mmc_host {
 	struct mmc_request *mrq;
 	struct mmc_request *manual_stop_mrq;
 	int		ferror;
+
+	/* vqmmc */
+	bool		vqmmc_enabled;
 };
 
 static int sunxi_mmc_reset_host(struct sunxi_mmc_host *host)
@@ -284,16 +289,28 @@ static int sunxi_mmc_init_host(struct mmc_host *mmc)
 	if (sunxi_mmc_reset_host(host))
 		return -EIO;
 
+	/*
+	 * Burst 8 transfers, RX trigger level: 7, TX trigger level: 8
+	 *
+	 * TODO: sun9i has a larger FIFO and supports higher trigger values
+	 */
 	mmc_writel(host, REG_FTRGL, 0x20070008);
+	/* Maximum timeout value */
 	mmc_writel(host, REG_TMOUT, 0xffffffff);
+	/* Unmask SDIO interrupt if needed */
 	mmc_writel(host, REG_IMASK, host->sdio_imask);
+	/* Clear all pending interrupts */
 	mmc_writel(host, REG_RINTR, 0xffffffff);
+	/* Debug register? undocumented */
 	mmc_writel(host, REG_DBGC, 0xdeb);
+	/* Enable CEATA support */
 	mmc_writel(host, REG_FUNS, SDXC_CEATA_ON);
+	/* Set DMA descriptor list base address */
 	mmc_writel(host, REG_DLBA, host->sg_dma);
 
 	rval = mmc_readl(host, REG_GCTRL);
 	rval |= SDXC_INTERRUPT_ENABLE_BIT;
+	/* Undocumented, but found in Allwinner code */
 	rval &= ~SDXC_ACCESS_DONE_DIRECT;
 	mmc_writel(host, REG_GCTRL, rval);
 
@@ -640,11 +657,17 @@ static int sunxi_mmc_clk_set_rate(struct sunxi_mmc_host *host,
 				  struct mmc_ios *ios)
 {
 	u32 rate, oclk_dly, rval, sclk_dly;
+	u32 clock = ios->clock;
 	int ret;
 
-	rate = clk_round_rate(host->clk_mmc, ios->clock);
+	/* 8 bit DDR requires a higher module clock */
+	if (ios->timing == MMC_TIMING_MMC_DDR52 &&
+	    ios->bus_width == MMC_BUS_WIDTH_8)
+		clock <<= 1;
+
+	rate = clk_round_rate(host->clk_mmc, clock);
 	dev_dbg(mmc_dev(host->mmc), "setting clk to %d, rounded %d\n",
-		ios->clock, rate);
+		clock, rate);
 
 	/* setting clock rate */
 	ret = clk_set_rate(host->clk_mmc, rate);
@@ -661,6 +684,12 @@ static int sunxi_mmc_clk_set_rate(struct sunxi_mmc_host *host,
 	/* clear internal divider */
 	rval = mmc_readl(host, REG_CLKCR);
 	rval &= ~0xff;
+	/* set internal divider for 8 bit eMMC DDR, so card clock is right */
+	if (ios->timing == MMC_TIMING_MMC_DDR52 &&
+	    ios->bus_width == MMC_BUS_WIDTH_8) {
+		rval |= 1;
+		rate >>= 1;
+	}
 	mmc_writel(host, REG_CLKCR, rval);
 
 	/* determine delays */
@@ -670,13 +699,17 @@ static int sunxi_mmc_clk_set_rate(struct sunxi_mmc_host *host,
 	} else if (rate <= 25000000) {
 		oclk_dly = host->clk_delays[SDXC_CLK_25M].output;
 		sclk_dly = host->clk_delays[SDXC_CLK_25M].sample;
-	} else if (rate <= 50000000) {
-		if (ios->timing == MMC_TIMING_UHS_DDR50) {
-			oclk_dly = host->clk_delays[SDXC_CLK_50M_DDR].output;
-			sclk_dly = host->clk_delays[SDXC_CLK_50M_DDR].sample;
-		} else {
+	} else if (rate <= 52000000) {
+		if (ios->timing != MMC_TIMING_UHS_DDR50 &&
+		    ios->timing != MMC_TIMING_MMC_DDR52) {
 			oclk_dly = host->clk_delays[SDXC_CLK_50M].output;
 			sclk_dly = host->clk_delays[SDXC_CLK_50M].sample;
+		} else if (ios->bus_width == MMC_BUS_WIDTH_8) {
+			oclk_dly = host->clk_delays[SDXC_CLK_50M_DDR_8BIT].output;
+			sclk_dly = host->clk_delays[SDXC_CLK_50M_DDR_8BIT].sample;
+		} else {
+			oclk_dly = host->clk_delays[SDXC_CLK_50M_DDR].output;
+			sclk_dly = host->clk_delays[SDXC_CLK_50M_DDR].sample;
 		}
 	} else {
 		return -EINVAL;
@@ -699,7 +732,20 @@ static void sunxi_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		break;
 
 	case MMC_POWER_UP:
-		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd);
+		host->ferror = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc,
+						     ios->vdd);
+		if (host->ferror)
+			return;
+
+		if (!IS_ERR(mmc->supply.vqmmc)) {
+			host->ferror = regulator_enable(mmc->supply.vqmmc);
+			if (host->ferror) {
+				dev_err(mmc_dev(mmc),
+					"failed to enable vqmmc\n");
+				return;
+			}
+			host->vqmmc_enabled = true;
+		}
 
 		host->ferror = sunxi_mmc_init_host(mmc);
 		if (host->ferror)
@@ -712,6 +758,9 @@ static void sunxi_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		dev_dbg(mmc_dev(mmc), "power off!\n");
 		sunxi_mmc_reset_host(host);
 		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
+		if (!IS_ERR(mmc->supply.vqmmc) && host->vqmmc_enabled)
+			regulator_disable(mmc->supply.vqmmc);
+		host->vqmmc_enabled = false;
 		break;
 	}
 
@@ -730,7 +779,8 @@ static void sunxi_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	/* set ddr mode */
 	rval = mmc_readl(host, REG_GCTRL);
-	if (ios->timing == MMC_TIMING_UHS_DDR50)
+	if (ios->timing == MMC_TIMING_UHS_DDR50 ||
+	    ios->timing == MMC_TIMING_MMC_DDR52)
 		rval |= SDXC_DDR_MODE;
 	else
 		rval &= ~SDXC_DDR_MODE;
@@ -743,6 +793,19 @@ static void sunxi_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	}
 }
 
+static int sunxi_mmc_volt_switch(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	/* vqmmc regulator is available */
+	if (!IS_ERR(mmc->supply.vqmmc))
+		return mmc_regulator_set_vqmmc(mmc, ios);
+
+	/* no vqmmc regulator, assume fixed regulator at 3/3.3V */
+	if (mmc->ios.signal_voltage == MMC_SIGNAL_VOLTAGE_330)
+		return 0;
+
+	return -EINVAL;
+}
+
 static void sunxi_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
 {
 	struct sunxi_mmc_host *host = mmc_priv(mmc);
@@ -815,11 +878,6 @@ static void sunxi_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
 		if ((cmd->flags & MMC_CMD_MASK) == MMC_CMD_ADTC) {
 			cmd_val |= SDXC_DATA_EXPIRE | SDXC_WAIT_PRE_OVER;
-			if (cmd->data->flags & MMC_DATA_STREAM) {
-				imask |= SDXC_AUTO_COMMAND_DONE;
-				cmd_val |= SDXC_SEQUENCE_MODE |
-					   SDXC_SEND_AUTO_STOP;
-			}
 
 			if (cmd->data->stop) {
 				imask |= SDXC_AUTO_COMMAND_DONE;
@@ -894,6 +952,7 @@ static struct mmc_host_ops sunxi_mmc_ops = {
 	.get_ro		 = mmc_gpio_get_ro,
 	.get_cd		 = mmc_gpio_get_cd,
 	.enable_sdio_irq = sunxi_mmc_enable_sdio_irq,
+	.start_signal_voltage_switch = sunxi_mmc_volt_switch,
 	.hw_reset	 = sunxi_mmc_hw_reset,
 	.card_busy	 = sunxi_mmc_card_busy,
 };
@@ -903,6 +962,8 @@ static const struct sunxi_mmc_clk_delay sunxi_mmc_clk_delays[] = {
 	[SDXC_CLK_25M]		= { .output = 180, .sample =  75 },
 	[SDXC_CLK_50M]		= { .output =  90, .sample = 120 },
 	[SDXC_CLK_50M_DDR]	= { .output =  60, .sample = 120 },
+	/* Value from A83T "new timing mode". Works but might not be right. */
+	[SDXC_CLK_50M_DDR_8BIT]	= { .output =  90, .sample = 180 },
 };
 
 static const struct sunxi_mmc_clk_delay sun9i_mmc_clk_delays[] = {
@@ -910,6 +971,7 @@ static const struct sunxi_mmc_clk_delay sun9i_mmc_clk_delays[] = {
 	[SDXC_CLK_25M]		= { .output = 180, .sample =  75 },
 	[SDXC_CLK_50M]		= { .output = 150, .sample = 120 },
 	[SDXC_CLK_50M_DDR]	= { .output =  90, .sample = 120 },
+	[SDXC_CLK_50M_DDR_8BIT]	= { .output =  90, .sample = 120 },
 };
 
 static int sunxi_mmc_resource_request(struct sunxi_mmc_host *host,
@@ -1060,10 +1122,11 @@ static int sunxi_mmc_probe(struct platform_device *pdev)
 	mmc->max_segs		= PAGE_SIZE / sizeof(struct sunxi_idma_des);
 	mmc->max_seg_size	= (1 << host->idma_des_size_bits);
 	mmc->max_req_size	= mmc->max_seg_size * mmc->max_segs;
-	/* 400kHz ~ 50MHz */
+	/* 400kHz ~ 52MHz */
 	mmc->f_min		=   400000;
-	mmc->f_max		= 50000000;
+	mmc->f_max		= 52000000;
 	mmc->caps	       |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
+				  MMC_CAP_1_8V_DDR |
 				  MMC_CAP_ERASE | MMC_CAP_SDIO_IRQ;
 
 	ret = mmc_of_parse(mmc);
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 4a0d6b80eaa3..675435873823 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -94,10 +94,7 @@ static void tmio_mmc_start_dma_rx(struct tmio_mmc_host *host)
 			desc = NULL;
 			ret = cookie;
 		}
-		dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
-			__func__, host->sg_len, ret, cookie, host->mrq);
 	}
-
 pio:
 	if (!desc) {
 		/* DMA failed, fall back to PIO */
@@ -115,9 +112,6 @@ pio:
 		dev_warn(&host->pdev->dev,
 			 "DMA failed: %d, falling back to PIO\n", ret);
 	}
-
-	dev_dbg(&host->pdev->dev, "%s(): desc %p, sg[%d]\n", __func__,
-		desc, host->sg_len);
 }
 
 static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
@@ -174,10 +168,7 @@ static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
 			desc = NULL;
 			ret = cookie;
 		}
-		dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
-			__func__, host->sg_len, ret, cookie, host->mrq);
 	}
-
 pio:
 	if (!desc) {
 		/* DMA failed, fall back to PIO */
@@ -195,8 +186,6 @@ pio:
 		dev_warn(&host->pdev->dev,
 			 "DMA failed: %d, falling back to PIO\n", ret);
 	}
-
-	dev_dbg(&host->pdev->dev, "%s(): desc %p\n", __func__, desc);
 }
 
 void tmio_mmc_start_dma(struct tmio_mmc_host *host,
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index a10fde40b6c3..03f6e74c1906 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -1,6 +1,8 @@
 /*
  * linux/drivers/mmc/host/tmio_mmc_pio.c
  *
+ * Copyright (C) 2016 Sang Engineering, Wolfram Sang
+ * Copyright (C) 2015-16 Renesas Electronics Corporation
  * Copyright (C) 2011 Guennadi Liakhovetski
  * Copyright (C) 2007 Ian Molton
  * Copyright (C) 2004 Ian Molton
@@ -159,42 +161,44 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
 
 	if (new_clock) {
 		for (clock = host->mmc->f_min, clk = 0x80000080;
-			new_clock >= (clock<<1); clk >>= 1)
+		     new_clock >= (clock << 1);
+		     clk >>= 1)
 			clock <<= 1;
 
 		/* 1/1 clock is option */
 		if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) &&
-		    ((clk >> 22) & 0x1))
+		   ((clk >> 22) & 0x1))
 			clk |= 0xff;
 	}
 
 	if (host->set_clk_div)
-		host->set_clk_div(host->pdev, (clk>>22) & 1);
+		host->set_clk_div(host->pdev, (clk >> 22) & 1);
 
-	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & 0x1ff);
-	msleep(10);
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~CLK_CTL_SCLKEN &
+			sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & CLK_CTL_DIV_MASK);
+	if (!(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG))
+		msleep(10);
 }
 
 static void tmio_mmc_clk_stop(struct tmio_mmc_host *host)
 {
-	/* implicit BUG_ON(!res) */
 	if (host->pdata->flags & TMIO_MMC_HAVE_HIGH_REG) {
 		sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0000);
 		msleep(10);
 	}
 
-	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~0x0100 &
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, ~CLK_CTL_SCLKEN &
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
-	msleep(10);
+	msleep(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG ? 5 : 10);
 }
 
 static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
 {
-	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, 0x0100 |
+	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, CLK_CTL_SCLKEN |
 		sd_ctrl_read16(host, CTL_SD_CARD_CLK_CTL));
-	msleep(10);
+	msleep(host->pdata->flags & TMIO_MMC_FAST_CLK_CHG ? 1 : 10);
 
-	/* implicit BUG_ON(!res) */
 	if (host->pdata->flags & TMIO_MMC_HAVE_HIGH_REG) {
 		sd_ctrl_write16(host, CTL_CLK_AND_WAIT_CTL, 0x0100);
 		msleep(10);
@@ -205,7 +209,6 @@ static void tmio_mmc_reset(struct tmio_mmc_host *host)
 {
 	/* FIXME - should we set stop clock reg here */
 	sd_ctrl_write16(host, CTL_RESET_SD, 0x0000);
-	/* implicit BUG_ON(!res) */
 	if (host->pdata->flags & TMIO_MMC_HAVE_HIGH_REG)
 		sd_ctrl_write16(host, CTL_RESET_SDIO, 0x0000);
 	msleep(10);
diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index b47122d3e8d8..b2752fe711f2 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -1630,7 +1630,7 @@ static irqreturn_t usdhi6_cd(int irq, void *dev_id)
  */
 static void usdhi6_timeout_work(struct work_struct *work)
 {
-	struct delayed_work *d = container_of(work, struct delayed_work, work);
+	struct delayed_work *d = to_delayed_work(work);
 	struct usdhi6_host *host = container_of(d, struct usdhi6_host, timeout_work);
 	struct mmc_request *mrq = host->mrq;
 	struct mmc_data *data = mrq ? mrq->data : NULL;
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 42cc953309f1..e83a279f1217 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -142,7 +142,7 @@ config MTD_AR7_PARTS
 
 config MTD_BCM63XX_PARTS
 	tristate "BCM63XX CFE partitioning support"
-	depends on BCM63XX
+	depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST
 	select CRC32
 	help
 	  This provides partions parsing for BCM63xx devices with CFE
diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c
index 8282f47bcf5d..845dd27d9f41 100644
--- a/drivers/mtd/bcm47xxpart.c
+++ b/drivers/mtd/bcm47xxpart.c
@@ -66,11 +66,13 @@ static const char *bcm47xxpart_trx_data_part_name(struct mtd_info *master,
 {
 	uint32_t buf;
 	size_t bytes_read;
+	int err;
 
-	if (mtd_read(master, offset, sizeof(buf), &bytes_read,
-		     (uint8_t *)&buf) < 0) {
-		pr_err("mtd_read error while parsing (offset: 0x%X)!\n",
-			offset);
+	err  = mtd_read(master, offset, sizeof(buf), &bytes_read,
+			(uint8_t *)&buf);
+	if (err && !mtd_is_bitflip(err)) {
+		pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
+			offset, err);
 		goto out_default;
 	}
 
@@ -95,6 +97,7 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 	int trx_part = -1;
 	int last_trx_part = -1;
 	int possible_nvram_sizes[] = { 0x8000, 0xF000, 0x10000, };
+	int err;
 
 	/*
 	 * Some really old flashes (like AT45DB*) had smaller erasesize-s, but
@@ -118,8 +121,8 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 	/* Parse block by block looking for magics */
 	for (offset = 0; offset <= master->size - blocksize;
 	     offset += blocksize) {
-		/* Nothing more in higher memory */
-		if (offset >= 0x2000000)
+		/* Nothing more in higher memory on BCM47XX (MIPS) */
+		if (config_enabled(CONFIG_BCM47XX) && offset >= 0x2000000)
 			break;
 
 		if (curr_part >= BCM47XXPART_MAX_PARTS) {
@@ -128,10 +131,11 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 		}
 
 		/* Read beginning of the block */
-		if (mtd_read(master, offset, BCM47XXPART_BYTES_TO_READ,
-			     &bytes_read, (uint8_t *)buf) < 0) {
-			pr_err("mtd_read error while parsing (offset: 0x%X)!\n",
-			       offset);
+		err = mtd_read(master, offset, BCM47XXPART_BYTES_TO_READ,
+			       &bytes_read, (uint8_t *)buf);
+		if (err && !mtd_is_bitflip(err)) {
+			pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
+			       offset, err);
 			continue;
 		}
 
@@ -254,10 +258,11 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 		}
 
 		/* Read middle of the block */
-		if (mtd_read(master, offset + 0x8000, 0x4,
-			     &bytes_read, (uint8_t *)buf) < 0) {
-			pr_err("mtd_read error while parsing (offset: 0x%X)!\n",
-			       offset);
+		err = mtd_read(master, offset + 0x8000, 0x4, &bytes_read,
+			       (uint8_t *)buf);
+		if (err && !mtd_is_bitflip(err)) {
+			pr_err("mtd_read error while parsing (offset: 0x%X): %d\n",
+			       offset, err);
 			continue;
 		}
 
@@ -277,10 +282,11 @@ static int bcm47xxpart_parse(struct mtd_info *master,
 		}
 
 		offset = master->size - possible_nvram_sizes[i];
-		if (mtd_read(master, offset, 0x4, &bytes_read,
-			     (uint8_t *)buf) < 0) {
-			pr_err("mtd_read error while reading at offset 0x%X!\n",
-			       offset);
+		err = mtd_read(master, offset, 0x4, &bytes_read,
+			       (uint8_t *)buf);
+		if (err && !mtd_is_bitflip(err)) {
+			pr_err("mtd_read error while reading (offset 0x%X): %d\n",
+			       offset, err);
 			continue;
 		}
 
diff --git a/drivers/mtd/bcm63xxpart.c b/drivers/mtd/bcm63xxpart.c
index cec3188a170d..41d1d3149c61 100644
--- a/drivers/mtd/bcm63xxpart.c
+++ b/drivers/mtd/bcm63xxpart.c
@@ -24,6 +24,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/bcm963xx_nvram.h>
 #include <linux/bcm963xx_tag.h>
 #include <linux/crc32.h>
 #include <linux/module.h>
@@ -34,12 +35,15 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
-#include <asm/mach-bcm63xx/bcm63xx_nvram.h>
-#include <asm/mach-bcm63xx/board_bcm963xx.h>
+#define BCM963XX_CFE_BLOCK_SIZE		SZ_64K	/* always at least 64KiB */
 
-#define BCM63XX_CFE_BLOCK_SIZE	SZ_64K		/* always at least 64KiB */
+#define BCM963XX_CFE_MAGIC_OFFSET	0x4e0
+#define BCM963XX_CFE_VERSION_OFFSET	0x570
+#define BCM963XX_NVRAM_OFFSET		0x580
 
-#define BCM63XX_CFE_MAGIC_OFFSET 0x4e0
+/* Ensure strings read from flash structs are null terminated */
+#define STR_NULL_TERMINATE(x) \
+	do { char *_str = (x); _str[sizeof(x) - 1] = 0; } while (0)
 
 static int bcm63xx_detect_cfe(struct mtd_info *master)
 {
@@ -58,68 +62,130 @@ static int bcm63xx_detect_cfe(struct mtd_info *master)
 		return 0;
 
 	/* very old CFE's do not have the cfe-v string, so check for magic */
-	ret = mtd_read(master, BCM63XX_CFE_MAGIC_OFFSET, 8, &retlen,
+	ret = mtd_read(master, BCM963XX_CFE_MAGIC_OFFSET, 8, &retlen,
 		       (void *)buf);
 	buf[retlen] = 0;
 
 	return strncmp("CFE1CFE1", buf, 8);
 }
 
-static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
-					const struct mtd_partition **pparts,
-					struct mtd_part_parser_data *data)
+static int bcm63xx_read_nvram(struct mtd_info *master,
+	struct bcm963xx_nvram *nvram)
+{
+	u32 actual_crc, expected_crc;
+	size_t retlen;
+	int ret;
+
+	/* extract nvram data */
+	ret = mtd_read(master, BCM963XX_NVRAM_OFFSET, BCM963XX_NVRAM_V5_SIZE,
+			&retlen, (void *)nvram);
+	if (ret)
+		return ret;
+
+	ret = bcm963xx_nvram_checksum(nvram, &expected_crc, &actual_crc);
+	if (ret)
+		pr_warn("nvram checksum failed, contents may be invalid (expected %08x, got %08x)\n",
+			expected_crc, actual_crc);
+
+	if (!nvram->psi_size)
+		nvram->psi_size = BCM963XX_DEFAULT_PSI_SIZE;
+
+	return 0;
+}
+
+static int bcm63xx_read_image_tag(struct mtd_info *master, const char *name,
+	loff_t tag_offset, struct bcm_tag *buf)
+{
+	int ret;
+	size_t retlen;
+	u32 computed_crc;
+
+	ret = mtd_read(master, tag_offset, sizeof(*buf), &retlen, (void *)buf);
+	if (ret)
+		return ret;
+
+	if (retlen != sizeof(*buf))
+		return -EIO;
+
+	computed_crc = crc32_le(IMAGETAG_CRC_START, (u8 *)buf,
+				offsetof(struct bcm_tag, header_crc));
+	if (computed_crc == buf->header_crc) {
+		STR_NULL_TERMINATE(buf->board_id);
+		STR_NULL_TERMINATE(buf->tag_version);
+
+		pr_info("%s: CFE image tag found at 0x%llx with version %s, board type %s\n",
+			name, tag_offset, buf->tag_version, buf->board_id);
+
+		return 0;
+	}
+
+	pr_warn("%s: CFE image tag at 0x%llx CRC invalid (expected %08x, actual %08x)\n",
+		name, tag_offset, buf->header_crc, computed_crc);
+	return 1;
+}
+
+static int bcm63xx_parse_cfe_nor_partitions(struct mtd_info *master,
+	const struct mtd_partition **pparts, struct bcm963xx_nvram *nvram)
 {
 	/* CFE, NVRAM and global Linux are always present */
 	int nrparts = 3, curpart = 0;
-	struct bcm_tag *buf;
+	struct bcm_tag *buf = NULL;
 	struct mtd_partition *parts;
 	int ret;
-	size_t retlen;
 	unsigned int rootfsaddr, kerneladdr, spareaddr;
 	unsigned int rootfslen, kernellen, sparelen, totallen;
 	unsigned int cfelen, nvramlen;
 	unsigned int cfe_erasesize;
 	int i;
-	u32 computed_crc;
 	bool rootfs_first = false;
 
-	if (bcm63xx_detect_cfe(master))
-		return -EINVAL;
-
 	cfe_erasesize = max_t(uint32_t, master->erasesize,
-			      BCM63XX_CFE_BLOCK_SIZE);
+			      BCM963XX_CFE_BLOCK_SIZE);
 
 	cfelen = cfe_erasesize;
-	nvramlen = bcm63xx_nvram_get_psi_size() * SZ_1K;
+	nvramlen = nvram->psi_size * SZ_1K;
 	nvramlen = roundup(nvramlen, cfe_erasesize);
 
-	/* Allocate memory for buffer */
 	buf = vmalloc(sizeof(struct bcm_tag));
 	if (!buf)
 		return -ENOMEM;
 
 	/* Get the tag */
-	ret = mtd_read(master, cfelen, sizeof(struct bcm_tag), &retlen,
-		       (void *)buf);
-
-	if (retlen != sizeof(struct bcm_tag)) {
-		vfree(buf);
-		return -EIO;
-	}
+	ret = bcm63xx_read_image_tag(master, "rootfs", cfelen, buf);
+	if (!ret) {
+		STR_NULL_TERMINATE(buf->flash_image_start);
+		if (kstrtouint(buf->flash_image_start, 10, &rootfsaddr) ||
+				rootfsaddr < BCM963XX_EXTENDED_SIZE) {
+			pr_err("invalid rootfs address: %*ph\n",
+				(int)sizeof(buf->flash_image_start),
+				buf->flash_image_start);
+			goto invalid_tag;
+		}
 
-	computed_crc = crc32_le(IMAGETAG_CRC_START, (u8 *)buf,
-				offsetof(struct bcm_tag, header_crc));
-	if (computed_crc == buf->header_crc) {
-		char *boardid = &(buf->board_id[0]);
-		char *tagversion = &(buf->tag_version[0]);
+		STR_NULL_TERMINATE(buf->kernel_address);
+		if (kstrtouint(buf->kernel_address, 10, &kerneladdr) ||
+				kerneladdr < BCM963XX_EXTENDED_SIZE) {
+			pr_err("invalid kernel address: %*ph\n",
+				(int)sizeof(buf->kernel_address),
+				buf->kernel_address);
+			goto invalid_tag;
+		}
 
-		sscanf(buf->flash_image_start, "%u", &rootfsaddr);
-		sscanf(buf->kernel_address, "%u", &kerneladdr);
-		sscanf(buf->kernel_length, "%u", &kernellen);
-		sscanf(buf->total_length, "%u", &totallen);
+		STR_NULL_TERMINATE(buf->kernel_length);
+		if (kstrtouint(buf->kernel_length, 10, &kernellen)) {
+			pr_err("invalid kernel length: %*ph\n",
+				(int)sizeof(buf->kernel_length),
+				buf->kernel_length);
+			goto invalid_tag;
+		}
 
-		pr_info("CFE boot tag found with version %s and board type %s\n",
-			tagversion, boardid);
+		STR_NULL_TERMINATE(buf->total_length);
+		if (kstrtouint(buf->total_length, 10, &totallen)) {
+			pr_err("invalid total length: %*ph\n",
+				(int)sizeof(buf->total_length),
+				buf->total_length);
+			goto invalid_tag;
+		}
 
 		kerneladdr = kerneladdr - BCM963XX_EXTENDED_SIZE;
 		rootfsaddr = rootfsaddr - BCM963XX_EXTENDED_SIZE;
@@ -134,13 +200,14 @@ static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
 			rootfsaddr = kerneladdr + kernellen;
 			rootfslen = spareaddr - rootfsaddr;
 		}
-	} else {
-		pr_warn("CFE boot tag CRC invalid (expected %08x, actual %08x)\n",
-			buf->header_crc, computed_crc);
+	} else if (ret > 0) {
+invalid_tag:
 		kernellen = 0;
 		rootfslen = 0;
 		rootfsaddr = 0;
 		spareaddr = cfelen;
+	} else {
+		goto out;
 	}
 	sparelen = master->size - spareaddr - nvramlen;
 
@@ -151,11 +218,10 @@ static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
 	if (kernellen > 0)
 		nrparts++;
 
-	/* Ask kernel for more memory */
 	parts = kzalloc(sizeof(*parts) * nrparts + 10 * nrparts, GFP_KERNEL);
 	if (!parts) {
-		vfree(buf);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out;
 	}
 
 	/* Start building partition list */
@@ -206,9 +272,43 @@ static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
 		sparelen);
 
 	*pparts = parts;
+	ret = 0;
+
+out:
 	vfree(buf);
 
+	if (ret)
+		return ret;
+
 	return nrparts;
+}
+
+static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
+					const struct mtd_partition **pparts,
+					struct mtd_part_parser_data *data)
+{
+	struct bcm963xx_nvram *nvram = NULL;
+	int ret;
+
+	if (bcm63xx_detect_cfe(master))
+		return -EINVAL;
+
+	nvram = vzalloc(sizeof(*nvram));
+	if (!nvram)
+		return -ENOMEM;
+
+	ret = bcm63xx_read_nvram(master, nvram);
+	if (ret)
+		goto out;
+
+	if (!mtd_type_is_nand(master))
+		ret = bcm63xx_parse_cfe_nor_partitions(master, pparts, nvram);
+	else
+		ret = -EINVAL;
+
+out:
+	vfree(nvram);
+	return ret;
 };
 
 static struct mtd_part_parser bcm63xx_cfe_parser = {
diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index c3a2695a4420..e7b2e439696c 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c
@@ -72,13 +72,11 @@ MODULE_PARM_DESC(reliable_mode, "Set the docg3 mode (0=normal MLC, 1=fast, "
  * @eccbytes: 8 bytes are used (1 for Hamming ECC, 7 for BCH ECC)
  * @eccpos: ecc positions (byte 7 is Hamming ECC, byte 8-14 are BCH ECC)
  * @oobfree: free pageinfo bytes (byte 0 until byte 6, byte 15
- * @oobavail: 8 available bytes remaining after ECC toll
  */
 static struct nand_ecclayout docg3_oobinfo = {
 	.eccbytes = 8,
 	.eccpos = {7, 8, 9, 10, 11, 12, 13, 14},
 	.oobfree = {{0, 7}, {15, 1} },
-	.oobavail = 8,
 };
 
 static inline u8 doc_readb(struct docg3 *docg3, u16 reg)
@@ -1438,7 +1436,7 @@ static int doc_write_oob(struct mtd_info *mtd, loff_t ofs,
 		oobdelta = mtd->oobsize;
 		break;
 	case MTD_OPS_AUTO_OOB:
-		oobdelta = mtd->ecclayout->oobavail;
+		oobdelta = mtd->oobavail;
 		break;
 	default:
 		return -EINVAL;
@@ -1860,6 +1858,7 @@ static int __init doc_set_driver_info(int chip_id, struct mtd_info *mtd)
 	mtd->_write_oob = doc_write_oob;
 	mtd->_block_isbad = doc_block_isbad;
 	mtd->ecclayout = &docg3_oobinfo;
+	mtd->oobavail = 8;
 	mtd->ecc_strength = DOC_ECC_BCH_T;
 
 	return 0;
diff --git a/drivers/mtd/devices/mtdram.c b/drivers/mtd/devices/mtdram.c
index 627a9bc37679..cbd8547d7aad 100644
--- a/drivers/mtd/devices/mtdram.c
+++ b/drivers/mtd/devices/mtdram.c
@@ -19,6 +19,7 @@
 
 static unsigned long total_size = CONFIG_MTDRAM_TOTAL_SIZE;
 static unsigned long erase_size = CONFIG_MTDRAM_ERASE_SIZE;
+static unsigned long writebuf_size = 64;
 #define MTDRAM_TOTAL_SIZE (total_size * 1024)
 #define MTDRAM_ERASE_SIZE (erase_size * 1024)
 
@@ -27,6 +28,8 @@ module_param(total_size, ulong, 0);
 MODULE_PARM_DESC(total_size, "Total device size in KiB");
 module_param(erase_size, ulong, 0);
 MODULE_PARM_DESC(erase_size, "Device erase block size in KiB");
+module_param(writebuf_size, ulong, 0);
+MODULE_PARM_DESC(writebuf_size, "Device write buf size in Bytes (Default: 64)");
 #endif
 
 // We could store these in the mtd structure, but we only support 1 device..
@@ -123,7 +126,7 @@ int mtdram_init_device(struct mtd_info *mtd, void *mapped_address,
 	mtd->flags = MTD_CAP_RAM;
 	mtd->size = size;
 	mtd->writesize = 1;
-	mtd->writebufsize = 64; /* Mimic CFI NOR flashes */
+	mtd->writebufsize = writebuf_size;
 	mtd->erasesize = MTDRAM_ERASE_SIZE;
 	mtd->priv = mapped_address;
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 10bf304027dd..08de4b2cf0f5 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -126,10 +126,7 @@ static int part_read_oob(struct mtd_info *mtd, loff_t from,
 	if (ops->oobbuf) {
 		size_t len, pages;
 
-		if (ops->mode == MTD_OPS_AUTO_OOB)
-			len = mtd->oobavail;
-		else
-			len = mtd->oobsize;
+		len = mtd_oobavail(mtd, ops);
 		pages = mtd_div_by_ws(mtd->size, mtd);
 		pages -= mtd_div_by_ws(from, mtd);
 		if (ops->ooboffs + ops->ooblen > pages * len)
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index fc8b3d16cce7..cb06bdd21a1b 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -346,7 +346,7 @@ static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
 	if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset))
 		return MTDSWAP_SCANNED_BAD;
 
-	ops.ooblen = 2 * d->mtd->ecclayout->oobavail;
+	ops.ooblen = 2 * d->mtd->oobavail;
 	ops.oobbuf = d->oob_buf;
 	ops.ooboffs = 0;
 	ops.datbuf = NULL;
@@ -359,7 +359,7 @@ static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb)
 
 	data = (struct mtdswap_oobdata *)d->oob_buf;
 	data2 = (struct mtdswap_oobdata *)
-		(d->oob_buf + d->mtd->ecclayout->oobavail);
+		(d->oob_buf + d->mtd->oobavail);
 
 	if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) {
 		eb->erase_count = le32_to_cpu(data->count);
@@ -933,7 +933,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 
 	ops.mode = MTD_OPS_AUTO_OOB;
 	ops.len = mtd->writesize;
-	ops.ooblen = mtd->ecclayout->oobavail;
+	ops.ooblen = mtd->oobavail;
 	ops.ooboffs = 0;
 	ops.datbuf = d->page_buf;
 	ops.oobbuf = d->oob_buf;
@@ -945,7 +945,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 		for (i = 0; i < mtd_pages; i++) {
 			patt = mtdswap_test_patt(test + i);
 			memset(d->page_buf, patt, mtd->writesize);
-			memset(d->oob_buf, patt, mtd->ecclayout->oobavail);
+			memset(d->oob_buf, patt, mtd->oobavail);
 			ret = mtd_write_oob(mtd, pos, &ops);
 			if (ret)
 				goto error;
@@ -964,7 +964,7 @@ static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d,
 				if (p1[j] != patt)
 					goto error;
 
-			for (j = 0; j < mtd->ecclayout->oobavail; j++)
+			for (j = 0; j < mtd->oobavail; j++)
 				if (p2[j] != (unsigned char)patt)
 					goto error;
 
@@ -1387,7 +1387,7 @@ static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks,
 	if (!d->page_buf)
 		goto page_buf_fail;
 
-	d->oob_buf = kmalloc(2 * mtd->ecclayout->oobavail, GFP_KERNEL);
+	d->oob_buf = kmalloc(2 * mtd->oobavail, GFP_KERNEL);
 	if (!d->oob_buf)
 		goto oob_buf_fail;
 
@@ -1417,7 +1417,6 @@ static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 	unsigned long part;
 	unsigned int eblocks, eavailable, bad_blocks, spare_cnt;
 	uint64_t swap_size, use_size, size_limit;
-	struct nand_ecclayout *oinfo;
 	int ret;
 
 	parts = &partitions[0];
@@ -1447,17 +1446,10 @@ static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 		return;
 	}
 
-	oinfo = mtd->ecclayout;
-	if (!oinfo) {
-		printk(KERN_ERR "%s: mtd%d does not have OOB\n",
-			MTDSWAP_PREFIX, mtd->index);
-		return;
-	}
-
-	if (!mtd->oobsize || oinfo->oobavail < MTDSWAP_OOBSIZE) {
+	if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) {
 		printk(KERN_ERR "%s: Not enough free bytes in OOB, "
 			"%d available, %zu needed.\n",
-			MTDSWAP_PREFIX, oinfo->oobavail, MTDSWAP_OOBSIZE);
+			MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE);
 		return;
 	}
 
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 20f01b3ec23d..f05e0e9eb2f7 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -74,6 +74,7 @@ config MTD_NAND_DENALI_SCRATCH_REG_ADDR
 config MTD_NAND_GPIO
 	tristate "GPIO assisted NAND Flash driver"
 	depends on GPIOLIB || COMPILE_TEST
+	depends on HAS_IOMEM
 	help
 	  This enables a NAND flash driver where control signals are
 	  connected to GPIO pins, and commands and data are communicated
@@ -310,6 +311,7 @@ config MTD_NAND_CAFE
 config MTD_NAND_CS553X
 	tristate "NAND support for CS5535/CS5536 (AMD Geode companion chip)"
 	depends on X86_32
+	depends on !UML && HAS_IOMEM
 	help
 	  The CS553x companion chips for the AMD Geode processor
 	  include NAND flash controllers with built-in hardware ECC
@@ -463,6 +465,7 @@ config MTD_NAND_MPC5121_NFC
 config MTD_NAND_VF610_NFC
 	tristate "Support for Freescale NFC for VF610/MPC5125"
 	depends on (SOC_VF610 || COMPILE_TEST)
+	depends on HAS_IOMEM
 	help
 	  Enables support for NAND Flash Controller on some Freescale
 	  processors like the VF610, MPC5125, MCF54418 or Kinetis K70.
@@ -553,4 +556,11 @@ config MTD_NAND_HISI504
 	help
 	  Enables support for NAND controller on Hisilicon SoC Hip04.
 
+config MTD_NAND_QCOM
+	tristate "Support for NAND on QCOM SoCs"
+	depends on ARCH_QCOM
+	help
+	  Enables support for NAND flash chips on SoCs containing the EBI2 NAND
+	  controller. This controller is found on IPQ806x SoC.
+
 endif # MTD_NAND
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 9e3623308509..f55335373f7c 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -56,5 +56,6 @@ obj-$(CONFIG_MTD_NAND_BCM47XXNFLASH)	+= bcm47xxnflash/
 obj-$(CONFIG_MTD_NAND_SUNXI)		+= sunxi_nand.o
 obj-$(CONFIG_MTD_NAND_HISI504)	        += hisi504_nand.o
 obj-$(CONFIG_MTD_NAND_BRCMNAND)		+= brcmnand/
+obj-$(CONFIG_MTD_NAND_QCOM)		+= qcom_nandc.o
 
 nand-objs := nand_base.o nand_bbt.o nand_timings.o
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index bddcf83d6859..20cbaabb2959 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -65,6 +65,11 @@ module_param(on_flash_bbt, int, 0);
 
 struct atmel_nand_caps {
 	bool pmecc_correct_erase_page;
+	uint8_t pmecc_max_correction;
+};
+
+struct atmel_nand_nfc_caps {
+	uint32_t rb_mask;
 };
 
 /* oob layout for large page size
@@ -111,6 +116,7 @@ struct atmel_nfc {
 	/* Point to the sram bank which include readed data via NFC */
 	void			*data_in_sram;
 	bool			will_write_sram;
+	const struct atmel_nand_nfc_caps *caps;
 };
 static struct atmel_nfc	nand_nfc;
 
@@ -140,6 +146,7 @@ struct atmel_nand_host {
 	int			pmecc_cw_len;	/* Length of codeword */
 
 	void __iomem		*pmerrloc_base;
+	void __iomem		*pmerrloc_el_base;
 	void __iomem		*pmecc_rom_base;
 
 	/* lookup table for alpha_to and index_of */
@@ -468,6 +475,7 @@ static void atmel_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
  *                8-bits                13-bytes                 14-bytes
  *               12-bits                20-bytes                 21-bytes
  *               24-bits                39-bytes                 42-bytes
+ *               32-bits                52-bytes                 56-bytes
  */
 static int pmecc_get_ecc_bytes(int cap, int sector_size)
 {
@@ -813,7 +821,7 @@ static void pmecc_correct_data(struct mtd_info *mtd, uint8_t *buf, uint8_t *ecc,
 	sector_size = host->pmecc_sector_size;
 
 	while (err_nbr) {
-		tmp = pmerrloc_readl_el_relaxed(host->pmerrloc_base, i) - 1;
+		tmp = pmerrloc_readl_el_relaxed(host->pmerrloc_el_base, i) - 1;
 		byte_pos = tmp / 8;
 		bit_pos  = tmp % 8;
 
@@ -825,7 +833,7 @@ static void pmecc_correct_data(struct mtd_info *mtd, uint8_t *buf, uint8_t *ecc,
 			*(buf + byte_pos) ^= (1 << bit_pos);
 
 			pos = sector_num * host->pmecc_sector_size + byte_pos;
-			dev_info(host->dev, "Bit flip in data area, byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
+			dev_dbg(host->dev, "Bit flip in data area, byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
 				pos, bit_pos, err_byte, *(buf + byte_pos));
 		} else {
 			/* Bit flip in OOB area */
@@ -835,7 +843,7 @@ static void pmecc_correct_data(struct mtd_info *mtd, uint8_t *buf, uint8_t *ecc,
 			ecc[tmp] ^= (1 << bit_pos);
 
 			pos = tmp + nand_chip->ecc.layout->eccpos[0];
-			dev_info(host->dev, "Bit flip in OOB, oob_byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
+			dev_dbg(host->dev, "Bit flip in OOB, oob_byte_pos: %d, bit_pos: %d, 0x%02x -> 0x%02x\n",
 				pos, bit_pos, err_byte, ecc[tmp]);
 		}
 
@@ -1017,6 +1025,9 @@ static void atmel_pmecc_core_init(struct mtd_info *mtd)
 	case 24:
 		val = PMECC_CFG_BCH_ERR24;
 		break;
+	case 32:
+		val = PMECC_CFG_BCH_ERR32;
+		break;
 	}
 
 	if (host->pmecc_sector_size == 512)
@@ -1078,6 +1089,9 @@ static int pmecc_choose_ecc(struct atmel_nand_host *host,
 
 	/* If device tree doesn't specify, use NAND's minimum ECC parameters */
 	if (host->pmecc_corr_cap == 0) {
+		if (*cap > host->caps->pmecc_max_correction)
+			return -EINVAL;
+
 		/* use the most fitable ecc bits (the near bigger one ) */
 		if (*cap <= 2)
 			host->pmecc_corr_cap = 2;
@@ -1089,6 +1103,8 @@ static int pmecc_choose_ecc(struct atmel_nand_host *host,
 			host->pmecc_corr_cap = 12;
 		else if (*cap <= 24)
 			host->pmecc_corr_cap = 24;
+		else if (*cap <= 32)
+			host->pmecc_corr_cap = 32;
 		else
 			return -EINVAL;
 	}
@@ -1205,6 +1221,8 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev,
 		err_no = PTR_ERR(host->pmerrloc_base);
 		goto err;
 	}
+	host->pmerrloc_el_base = host->pmerrloc_base + ATMEL_PMERRLOC_SIGMAx +
+		(host->caps->pmecc_max_correction + 1) * 4;
 
 	if (!host->has_no_lookup_table) {
 		regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
@@ -1486,8 +1504,6 @@ static void atmel_nand_hwctl(struct mtd_info *mtd, int mode)
 		ecc_writel(host->ecc, CR, ATMEL_ECC_RST);
 }
 
-static const struct of_device_id atmel_nand_dt_ids[];
-
 static int atmel_of_init_port(struct atmel_nand_host *host,
 			      struct device_node *np)
 {
@@ -1498,7 +1514,7 @@ static int atmel_of_init_port(struct atmel_nand_host *host,
 	enum of_gpio_flags flags = 0;
 
 	host->caps = (struct atmel_nand_caps *)
-		of_match_device(atmel_nand_dt_ids, host->dev)->data;
+		of_device_get_match_data(host->dev);
 
 	if (of_property_read_u32(np, "atmel,nand-addr-offset", &val) == 0) {
 		if (val >= 32) {
@@ -1547,10 +1563,16 @@ static int atmel_of_init_port(struct atmel_nand_host *host,
 	 * them from NAND ONFI parameters.
 	 */
 	if (of_property_read_u32(np, "atmel,pmecc-cap", &val) == 0) {
-		if ((val != 2) && (val != 4) && (val != 8) && (val != 12) &&
-				(val != 24)) {
+		if (val > host->caps->pmecc_max_correction) {
 			dev_err(host->dev,
-				"Unsupported PMECC correction capability: %d; should be 2, 4, 8, 12 or 24\n",
+				"Required ECC strength too high: %u max %u\n",
+				val, host->caps->pmecc_max_correction);
+			return -EINVAL;
+		}
+		if ((val != 2)  && (val != 4)  && (val != 8) &&
+		    (val != 12) && (val != 24) && (val != 32)) {
+			dev_err(host->dev,
+				"Required ECC strength not supported: %u\n",
 				val);
 			return -EINVAL;
 		}
@@ -1560,7 +1582,7 @@ static int atmel_of_init_port(struct atmel_nand_host *host,
 	if (of_property_read_u32(np, "atmel,pmecc-sector-size", &val) == 0) {
 		if ((val != 512) && (val != 1024)) {
 			dev_err(host->dev,
-				"Unsupported PMECC sector size: %d; should be 512 or 1024 bytes\n",
+				"Required ECC sector size not supported: %u\n",
 				val);
 			return -EINVAL;
 		}
@@ -1677,9 +1699,9 @@ static irqreturn_t hsmc_interrupt(int irq, void *dev_id)
 		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_XFR_DONE);
 		ret = IRQ_HANDLED;
 	}
-	if (pending & NFC_SR_RB_EDGE) {
+	if (pending & host->nfc->caps->rb_mask) {
 		complete(&host->nfc->comp_ready);
-		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_RB_EDGE);
+		nfc_writel(host->nfc->hsmc_regs, IDR, host->nfc->caps->rb_mask);
 		ret = IRQ_HANDLED;
 	}
 	if (pending & NFC_SR_CMD_DONE) {
@@ -1697,7 +1719,7 @@ static void nfc_prepare_interrupt(struct atmel_nand_host *host, u32 flag)
 	if (flag & NFC_SR_XFR_DONE)
 		init_completion(&host->nfc->comp_xfer_done);
 
-	if (flag & NFC_SR_RB_EDGE)
+	if (flag & host->nfc->caps->rb_mask)
 		init_completion(&host->nfc->comp_ready);
 
 	if (flag & NFC_SR_CMD_DONE)
@@ -1715,7 +1737,7 @@ static int nfc_wait_interrupt(struct atmel_nand_host *host, u32 flag)
 	if (flag & NFC_SR_XFR_DONE)
 		comp[index++] = &host->nfc->comp_xfer_done;
 
-	if (flag & NFC_SR_RB_EDGE)
+	if (flag & host->nfc->caps->rb_mask)
 		comp[index++] = &host->nfc->comp_ready;
 
 	if (flag & NFC_SR_CMD_DONE)
@@ -1783,7 +1805,7 @@ static int nfc_device_ready(struct mtd_info *mtd)
 		dev_err(host->dev, "Lost the interrupt flags: 0x%08x\n",
 				mask & status);
 
-	return status & NFC_SR_RB_EDGE;
+	return status & host->nfc->caps->rb_mask;
 }
 
 static void nfc_select_chip(struct mtd_info *mtd, int chip)
@@ -1956,8 +1978,8 @@ static void nfc_nand_command(struct mtd_info *mtd, unsigned int command,
 		}
 		/* fall through */
 	default:
-		nfc_prepare_interrupt(host, NFC_SR_RB_EDGE);
-		nfc_wait_interrupt(host, NFC_SR_RB_EDGE);
+		nfc_prepare_interrupt(host, host->nfc->caps->rb_mask);
+		nfc_wait_interrupt(host, host->nfc->caps->rb_mask);
 	}
 }
 
@@ -2304,17 +2326,34 @@ static int atmel_nand_remove(struct platform_device *pdev)
 	return 0;
 }
 
+/*
+ * AT91RM9200 does not have PMECC or PMECC Errloc peripherals for
+ * BCH ECC. Combined with the "atmel,has-pmecc", it is used to describe
+ * devices from the SAM9 family that have those.
+ */
 static const struct atmel_nand_caps at91rm9200_caps = {
 	.pmecc_correct_erase_page = false,
+	.pmecc_max_correction = 24,
 };
 
 static const struct atmel_nand_caps sama5d4_caps = {
 	.pmecc_correct_erase_page = true,
+	.pmecc_max_correction = 24,
+};
+
+/*
+ * The PMECC Errloc controller starting in SAMA5D2 is not compatible,
+ * as the increased correction strength requires more registers.
+ */
+static const struct atmel_nand_caps sama5d2_caps = {
+	.pmecc_correct_erase_page = true,
+	.pmecc_max_correction = 32,
 };
 
 static const struct of_device_id atmel_nand_dt_ids[] = {
 	{ .compatible = "atmel,at91rm9200-nand", .data = &at91rm9200_caps },
 	{ .compatible = "atmel,sama5d4-nand", .data = &sama5d4_caps },
+	{ .compatible = "atmel,sama5d2-nand", .data = &sama5d2_caps },
 	{ /* sentinel */ }
 };
 
@@ -2354,6 +2393,11 @@ static int atmel_nand_nfc_probe(struct platform_device *pdev)
 		}
 	}
 
+	nfc->caps = (const struct atmel_nand_nfc_caps *)
+		of_device_get_match_data(&pdev->dev);
+	if (!nfc->caps)
+		return -ENODEV;
+
 	nfc_writel(nfc->hsmc_regs, IDR, 0xffffffff);
 	nfc_readl(nfc->hsmc_regs, SR);	/* clear the NFC_SR */
 
@@ -2382,8 +2426,17 @@ static int atmel_nand_nfc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct atmel_nand_nfc_caps sama5d3_nfc_caps = {
+	.rb_mask = NFC_SR_RB_EDGE0,
+};
+
+static const struct atmel_nand_nfc_caps sama5d4_nfc_caps = {
+	.rb_mask = NFC_SR_RB_EDGE3,
+};
+
 static const struct of_device_id atmel_nand_nfc_match[] = {
-	{ .compatible = "atmel,sama5d3-nfc" },
+	{ .compatible = "atmel,sama5d3-nfc", .data = &sama5d3_nfc_caps },
+	{ .compatible = "atmel,sama5d4-nfc", .data = &sama5d4_nfc_caps },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, atmel_nand_nfc_match);
diff --git a/drivers/mtd/nand/atmel_nand_ecc.h b/drivers/mtd/nand/atmel_nand_ecc.h
index 668e7358f19b..834d694487bd 100644
--- a/drivers/mtd/nand/atmel_nand_ecc.h
+++ b/drivers/mtd/nand/atmel_nand_ecc.h
@@ -43,6 +43,7 @@
 #define		PMECC_CFG_BCH_ERR8		(2 << 0)
 #define		PMECC_CFG_BCH_ERR12		(3 << 0)
 #define		PMECC_CFG_BCH_ERR24		(4 << 0)
+#define		PMECC_CFG_BCH_ERR32		(5 << 0)
 
 #define		PMECC_CFG_SECTOR512		(0 << 4)
 #define		PMECC_CFG_SECTOR1024		(1 << 4)
@@ -108,7 +109,11 @@
 #define		PMERRLOC_ERR_NUM_MASK		(0x1f << 8)
 #define		PMERRLOC_CALC_DONE		(1 << 0)
 #define ATMEL_PMERRLOC_SIGMAx		0x028	/* Error location SIGMA x */
-#define ATMEL_PMERRLOC_ELx		0x08c	/* Error location x */
+
+/*
+ * The ATMEL_PMERRLOC_ELx register location depends from the number of
+ * bits corrected by the PMECC controller. Do not use it.
+ */
 
 /* Register access macros for PMECC */
 #define pmecc_readl_relaxed(addr, reg) \
@@ -136,7 +141,7 @@
 	readl_relaxed((addr) + ATMEL_PMERRLOC_SIGMAx + ((n) * 4))
 
 #define pmerrloc_readl_el_relaxed(addr, n) \
-	readl_relaxed((addr) + ATMEL_PMERRLOC_ELx + ((n) * 4))
+	readl_relaxed((addr) + ((n) * 4))
 
 /* Galois field dimension */
 #define PMECC_GF_DIMENSION_13			13
diff --git a/drivers/mtd/nand/atmel_nand_nfc.h b/drivers/mtd/nand/atmel_nand_nfc.h
index 4d5d26221a7e..0bbc1fa97dba 100644
--- a/drivers/mtd/nand/atmel_nand_nfc.h
+++ b/drivers/mtd/nand/atmel_nand_nfc.h
@@ -42,7 +42,8 @@
 #define		NFC_SR_UNDEF		(1 << 21)
 #define		NFC_SR_AWB		(1 << 22)
 #define		NFC_SR_ASE		(1 << 23)
-#define		NFC_SR_RB_EDGE		(1 << 24)
+#define		NFC_SR_RB_EDGE0		(1 << 24)
+#define		NFC_SR_RB_EDGE3		(1 << 27)
 
 #define ATMEL_HSMC_NFC_IER	0x0c
 #define ATMEL_HSMC_NFC_IDR	0x10
diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index 844fc07d22cd..e0528397306a 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -311,6 +311,36 @@ static const u16 brcmnand_regs_v60[] = {
 	[BRCMNAND_FC_BASE]		= 0x400,
 };
 
+/* BRCMNAND v7.1 */
+static const u16 brcmnand_regs_v71[] = {
+	[BRCMNAND_CMD_START]		=  0x04,
+	[BRCMNAND_CMD_EXT_ADDRESS]	=  0x08,
+	[BRCMNAND_CMD_ADDRESS]		=  0x0c,
+	[BRCMNAND_INTFC_STATUS]		=  0x14,
+	[BRCMNAND_CS_SELECT]		=  0x18,
+	[BRCMNAND_CS_XOR]		=  0x1c,
+	[BRCMNAND_LL_OP]		=  0x20,
+	[BRCMNAND_CS0_BASE]		=  0x50,
+	[BRCMNAND_CS1_BASE]		=     0,
+	[BRCMNAND_CORR_THRESHOLD]	=  0xdc,
+	[BRCMNAND_CORR_THRESHOLD_EXT]	=  0xe0,
+	[BRCMNAND_UNCORR_COUNT]		=  0xfc,
+	[BRCMNAND_CORR_COUNT]		= 0x100,
+	[BRCMNAND_CORR_EXT_ADDR]	= 0x10c,
+	[BRCMNAND_CORR_ADDR]		= 0x110,
+	[BRCMNAND_UNCORR_EXT_ADDR]	= 0x114,
+	[BRCMNAND_UNCORR_ADDR]		= 0x118,
+	[BRCMNAND_SEMAPHORE]		= 0x150,
+	[BRCMNAND_ID]			= 0x194,
+	[BRCMNAND_ID_EXT]		= 0x198,
+	[BRCMNAND_LL_RDATA]		= 0x19c,
+	[BRCMNAND_OOB_READ_BASE]	= 0x200,
+	[BRCMNAND_OOB_READ_10_BASE]	=     0,
+	[BRCMNAND_OOB_WRITE_BASE]	= 0x280,
+	[BRCMNAND_OOB_WRITE_10_BASE]	=     0,
+	[BRCMNAND_FC_BASE]		= 0x400,
+};
+
 enum brcmnand_cs_reg {
 	BRCMNAND_CS_CFG_EXT = 0,
 	BRCMNAND_CS_CFG,
@@ -406,7 +436,9 @@ static int brcmnand_revision_init(struct brcmnand_controller *ctrl)
 	}
 
 	/* Register offsets */
-	if (ctrl->nand_version >= 0x0600)
+	if (ctrl->nand_version >= 0x0701)
+		ctrl->reg_offsets = brcmnand_regs_v71;
+	else if (ctrl->nand_version >= 0x0600)
 		ctrl->reg_offsets = brcmnand_regs_v60;
 	else if (ctrl->nand_version >= 0x0500)
 		ctrl->reg_offsets = brcmnand_regs_v50;
@@ -796,7 +828,8 @@ static struct nand_ecclayout *brcmnand_create_layout(int ecc_level,
 				    idx2 >= MTD_MAX_OOBFREE_ENTRIES_LARGE - 1)
 				break;
 		}
-		goto out;
+
+		return layout;
 	}
 
 	/*
@@ -847,10 +880,7 @@ static struct nand_ecclayout *brcmnand_create_layout(int ecc_level,
 				idx2 >= MTD_MAX_OOBFREE_ENTRIES_LARGE - 1)
 			break;
 	}
-out:
-	/* Sum available OOB */
-	for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES_LARGE; i++)
-		layout->oobavail += layout->oobfree[i].length;
+
 	return layout;
 }
 
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index aa1a616b9fb6..e553aff68987 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -537,7 +537,7 @@ static int cafe_nand_write_page_lowlevel(struct mtd_info *mtd,
 	return 0;
 }
 
-static int cafe_nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int cafe_nand_block_bad(struct mtd_info *mtd, loff_t ofs)
 {
 	return 0;
 }
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index f170f3c31b34..547c1002941d 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -794,7 +794,7 @@ static int doc200x_dev_ready(struct mtd_info *mtd)
 	}
 }
 
-static int doc200x_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int doc200x_block_bad(struct mtd_info *mtd, loff_t ofs)
 {
 	/* This is our last resort if we couldn't find or create a BBT.  Just
 	   pretend all blocks are good. */
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index df4165b02c62..d86a60e1bbcb 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -225,7 +225,6 @@ struct docg4_priv {
 static struct nand_ecclayout docg4_oobinfo = {
 	.eccbytes = 9,
 	.eccpos = {7, 8, 9, 10, 11, 12, 13, 14, 15},
-	.oobavail = 5,
 	.oobfree = { {.offset = 2, .length = 5} }
 };
 
@@ -1121,7 +1120,7 @@ static int docg4_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	return ret;
 }
 
-static int docg4_block_neverbad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int docg4_block_neverbad(struct mtd_info *mtd, loff_t ofs)
 {
 	/* only called when module_param ignore_badblocks is set */
 	return 0;
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 235ddcb58f39..8122c699ccf2 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1,7 +1,7 @@
 /*
  * Freescale GPMI NAND Flash Driver
  *
- * Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
+ * Copyright (C) 2010-2015 Freescale Semiconductor, Inc.
  * Copyright (C) 2008 Embedded Alley Solutions, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -136,7 +136,7 @@ static inline bool gpmi_check_ecc(struct gpmi_nand_data *this)
  *
  * We may have available oob space in this case.
  */
-static bool set_geometry_by_ecc_info(struct gpmi_nand_data *this)
+static int set_geometry_by_ecc_info(struct gpmi_nand_data *this)
 {
 	struct bch_geometry *geo = &this->bch_geometry;
 	struct nand_chip *chip = &this->nand;
@@ -145,7 +145,7 @@ static bool set_geometry_by_ecc_info(struct gpmi_nand_data *this)
 	unsigned int block_mark_bit_offset;
 
 	if (!(chip->ecc_strength_ds > 0 && chip->ecc_step_ds > 0))
-		return false;
+		return -EINVAL;
 
 	switch (chip->ecc_step_ds) {
 	case SZ_512:
@@ -158,19 +158,19 @@ static bool set_geometry_by_ecc_info(struct gpmi_nand_data *this)
 		dev_err(this->dev,
 			"unsupported nand chip. ecc bits : %d, ecc size : %d\n",
 			chip->ecc_strength_ds, chip->ecc_step_ds);
-		return false;
+		return -EINVAL;
 	}
 	geo->ecc_chunk_size = chip->ecc_step_ds;
 	geo->ecc_strength = round_up(chip->ecc_strength_ds, 2);
 	if (!gpmi_check_ecc(this))
-		return false;
+		return -EINVAL;
 
 	/* Keep the C >= O */
 	if (geo->ecc_chunk_size < mtd->oobsize) {
 		dev_err(this->dev,
 			"unsupported nand chip. ecc size: %d, oob size : %d\n",
 			chip->ecc_step_ds, mtd->oobsize);
-		return false;
+		return -EINVAL;
 	}
 
 	/* The default value, see comment in the legacy_set_geometry(). */
@@ -242,7 +242,7 @@ static bool set_geometry_by_ecc_info(struct gpmi_nand_data *this)
 				+ ALIGN(geo->ecc_chunk_count, 4);
 
 	if (!this->swap_block_mark)
-		return true;
+		return 0;
 
 	/* For bit swap. */
 	block_mark_bit_offset = mtd->writesize * 8 -
@@ -251,7 +251,7 @@ static bool set_geometry_by_ecc_info(struct gpmi_nand_data *this)
 
 	geo->block_mark_byte_offset = block_mark_bit_offset / 8;
 	geo->block_mark_bit_offset  = block_mark_bit_offset % 8;
-	return true;
+	return 0;
 }
 
 static int legacy_set_geometry(struct gpmi_nand_data *this)
@@ -285,7 +285,8 @@ static int legacy_set_geometry(struct gpmi_nand_data *this)
 	geo->ecc_strength = get_ecc_strength(this);
 	if (!gpmi_check_ecc(this)) {
 		dev_err(this->dev,
-			"required ecc strength of the NAND chip: %d is not supported by the GPMI controller (%d)\n",
+			"ecc strength: %d cannot be supported by the controller (%d)\n"
+			"try to use minimum ecc strength that NAND chip required\n",
 			geo->ecc_strength,
 			this->devdata->bch_max_ecc_strength);
 		return -EINVAL;
@@ -366,10 +367,11 @@ static int legacy_set_geometry(struct gpmi_nand_data *this)
 
 int common_nfc_set_geometry(struct gpmi_nand_data *this)
 {
-	if (of_property_read_bool(this->dev->of_node, "fsl,use-minimum-ecc")
-		&& set_geometry_by_ecc_info(this))
-		return 0;
-	return legacy_set_geometry(this);
+	if ((of_property_read_bool(this->dev->of_node, "fsl,use-minimum-ecc"))
+				|| legacy_set_geometry(this))
+		return set_geometry_by_ecc_info(this);
+
+	return 0;
 }
 
 struct dma_chan *get_dma_chan(struct gpmi_nand_data *this)
@@ -2033,9 +2035,54 @@ static int gpmi_nand_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int gpmi_pm_suspend(struct device *dev)
+{
+	struct gpmi_nand_data *this = dev_get_drvdata(dev);
+
+	release_dma_channels(this);
+	return 0;
+}
+
+static int gpmi_pm_resume(struct device *dev)
+{
+	struct gpmi_nand_data *this = dev_get_drvdata(dev);
+	int ret;
+
+	ret = acquire_dma_channels(this);
+	if (ret < 0)
+		return ret;
+
+	/* re-init the GPMI registers */
+	this->flags &= ~GPMI_TIMING_INIT_OK;
+	ret = gpmi_init(this);
+	if (ret) {
+		dev_err(this->dev, "Error setting GPMI : %d\n", ret);
+		return ret;
+	}
+
+	/* re-init the BCH registers */
+	ret = bch_set_geometry(this);
+	if (ret) {
+		dev_err(this->dev, "Error setting BCH : %d\n", ret);
+		return ret;
+	}
+
+	/* re-init others */
+	gpmi_extra_init(this);
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops gpmi_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(gpmi_pm_suspend, gpmi_pm_resume)
+};
+
 static struct platform_driver gpmi_nand_driver = {
 	.driver = {
 		.name = "gpmi-nand",
+		.pm = &gpmi_pm_ops,
 		.of_match_table = gpmi_nand_id_table,
 	},
 	.probe   = gpmi_nand_probe,
diff --git a/drivers/mtd/nand/hisi504_nand.c b/drivers/mtd/nand/hisi504_nand.c
index f8d37f36a81c..96502b624cfb 100644
--- a/drivers/mtd/nand/hisi504_nand.c
+++ b/drivers/mtd/nand/hisi504_nand.c
@@ -632,7 +632,6 @@ static void hisi_nfc_host_init(struct hinfc_host *host)
 }
 
 static struct nand_ecclayout nand_ecc_2K_16bits = {
-	.oobavail = 6,
 	.oobfree = { {2, 6} },
 };
 
diff --git a/drivers/mtd/nand/jz4740_nand.c b/drivers/mtd/nand/jz4740_nand.c
index b19d2a9a5eb9..673ceb2a0b44 100644
--- a/drivers/mtd/nand/jz4740_nand.c
+++ b/drivers/mtd/nand/jz4740_nand.c
@@ -427,9 +427,6 @@ static int jz_nand_probe(struct platform_device *pdev)
 	chip->ecc.strength	= 4;
 	chip->ecc.options	= NAND_ECC_GENERIC_ERASED_CHECK;
 
-	if (pdata)
-		chip->ecc.layout = pdata->ecc_layout;
-
 	chip->chip_delay = 50;
 	chip->cmd_ctrl = jz_nand_cmd_ctrl;
 	chip->select_chip = jz_nand_select_chip;
diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c
index 9bc435d72a86..d8c3e7afcc0b 100644
--- a/drivers/mtd/nand/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/lpc32xx_mlc.c
@@ -750,7 +750,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 	}
 
 	nand_chip->ecc.mode = NAND_ECC_HW;
-	nand_chip->ecc.size = mtd->writesize;
+	nand_chip->ecc.size = 512;
 	nand_chip->ecc.layout = &lpc32xx_nand_oob;
 	host->mlcsubpages = mtd->writesize / 512;
 
diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c
index 6b93e899d4e9..5d7843ffff6a 100644
--- a/drivers/mtd/nand/mpc5121_nfc.c
+++ b/drivers/mtd/nand/mpc5121_nfc.c
@@ -626,7 +626,7 @@ static void mpc5121_nfc_free(struct device *dev, struct mtd_info *mtd)
 
 static int mpc5121_nfc_probe(struct platform_device *op)
 {
-	struct device_node *rootnode, *dn = op->dev.of_node;
+	struct device_node *dn = op->dev.of_node;
 	struct clk *clk;
 	struct device *dev = &op->dev;
 	struct mpc5121_nfc_prv *prv;
@@ -712,18 +712,15 @@ static int mpc5121_nfc_probe(struct platform_device *op)
 	chip->ecc.mode = NAND_ECC_SOFT;
 
 	/* Support external chip-select logic on ADS5121 board */
-	rootnode = of_find_node_by_path("/");
-	if (of_device_is_compatible(rootnode, "fsl,mpc5121ads")) {
+	if (of_machine_is_compatible("fsl,mpc5121ads")) {
 		retval = ads5121_chipselect_init(mtd);
 		if (retval) {
 			dev_err(dev, "Chipselect init error!\n");
-			of_node_put(rootnode);
 			return retval;
 		}
 
 		chip->select_chip = ads5121_select_chip;
 	}
-	of_node_put(rootnode);
 
 	/* Enable NFC clock */
 	clk = devm_clk_get(dev, "ipg");
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index f2c8ff398d6c..b6facac54fc0 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -313,13 +313,12 @@ static void nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
  * nand_block_bad - [DEFAULT] Read bad block marker from the chip
  * @mtd: MTD device structure
  * @ofs: offset from device start
- * @getchip: 0, if the chip is already selected
  *
  * Check, if the block is bad.
  */
-static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
+static int nand_block_bad(struct mtd_info *mtd, loff_t ofs)
 {
-	int page, chipnr, res = 0, i = 0;
+	int page, res = 0, i = 0;
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	u16 bad;
 
@@ -328,15 +327,6 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
 
 	page = (int)(ofs >> chip->page_shift) & chip->pagemask;
 
-	if (getchip) {
-		chipnr = (int)(ofs >> chip->chip_shift);
-
-		nand_get_device(mtd, FL_READING);
-
-		/* Select the NAND device */
-		chip->select_chip(mtd, chipnr);
-	}
-
 	do {
 		if (chip->options & NAND_BUSWIDTH_16) {
 			chip->cmdfunc(mtd, NAND_CMD_READOOB,
@@ -361,11 +351,6 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
 		i++;
 	} while (!res && i < 2 && (chip->bbt_options & NAND_BBT_SCAN2NDPAGE));
 
-	if (getchip) {
-		chip->select_chip(mtd, -1);
-		nand_release_device(mtd);
-	}
-
 	return res;
 }
 
@@ -503,19 +488,17 @@ static int nand_block_isreserved(struct mtd_info *mtd, loff_t ofs)
  * nand_block_checkbad - [GENERIC] Check if a block is marked bad
  * @mtd: MTD device structure
  * @ofs: offset from device start
- * @getchip: 0, if the chip is already selected
  * @allowbbt: 1, if its allowed to access the bbt area
  *
  * Check, if the block is bad. Either by reading the bad block table or
  * calling of the scan function.
  */
-static int nand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int getchip,
-			       int allowbbt)
+static int nand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int allowbbt)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
 
 	if (!chip->bbt)
-		return chip->block_bad(mtd, ofs, getchip);
+		return chip->block_bad(mtd, ofs);
 
 	/* Return info from the table */
 	return nand_isbad_bbt(mtd, ofs, allowbbt);
@@ -566,8 +549,8 @@ void nand_wait_ready(struct mtd_info *mtd)
 		cond_resched();
 	} while (time_before(jiffies, timeo));
 
-	pr_warn_ratelimited(
-		"timeout while waiting for chip to become ready\n");
+	if (!chip->dev_ready(mtd))
+		pr_warn_ratelimited("timeout while waiting for chip to become ready\n");
 out:
 	led_trigger_event(nand_led_trigger, LED_OFF);
 }
@@ -1723,8 +1706,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 	int ret = 0;
 	uint32_t readlen = ops->len;
 	uint32_t oobreadlen = ops->ooblen;
-	uint32_t max_oobsize = ops->mode == MTD_OPS_AUTO_OOB ?
-		mtd->oobavail : mtd->oobsize;
+	uint32_t max_oobsize = mtd_oobavail(mtd, ops);
 
 	uint8_t *bufpoi, *oob, *buf;
 	int use_bufpoi;
@@ -2075,10 +2057,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
 
 	stats = mtd->ecc_stats;
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		len = chip->ecc.layout->oobavail;
-	else
-		len = mtd->oobsize;
+	len = mtd_oobavail(mtd, ops);
 
 	if (unlikely(ops->ooboffs >= len)) {
 		pr_debug("%s: attempt to start read outside oob\n",
@@ -2575,8 +2554,7 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 	uint32_t writelen = ops->len;
 
 	uint32_t oobwritelen = ops->ooblen;
-	uint32_t oobmaxlen = ops->mode == MTD_OPS_AUTO_OOB ?
-				mtd->oobavail : mtd->oobsize;
+	uint32_t oobmaxlen = mtd_oobavail(mtd, ops);
 
 	uint8_t *oob = ops->oobbuf;
 	uint8_t *buf = ops->datbuf;
@@ -2766,10 +2744,7 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 	pr_debug("%s: to = 0x%08x, len = %i\n",
 			 __func__, (unsigned int)to, (int)ops->ooblen);
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		len = chip->ecc.layout->oobavail;
-	else
-		len = mtd->oobsize;
+	len = mtd_oobavail(mtd, ops);
 
 	/* Do not allow write past end of page */
 	if ((ops->ooboffs + ops->ooblen) > len) {
@@ -2957,7 +2932,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 	while (len) {
 		/* Check if we have a bad block, we do not erase bad blocks! */
 		if (nand_block_checkbad(mtd, ((loff_t) page) <<
-					chip->page_shift, 0, allowbbt)) {
+					chip->page_shift, allowbbt)) {
 			pr_warn("%s: attempt to erase a bad block at page 0x%08x\n",
 				    __func__, page);
 			instr->state = MTD_ERASE_FAILED;
@@ -3044,7 +3019,20 @@ static void nand_sync(struct mtd_info *mtd)
  */
 static int nand_block_isbad(struct mtd_info *mtd, loff_t offs)
 {
-	return nand_block_checkbad(mtd, offs, 1, 0);
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	int chipnr = (int)(offs >> chip->chip_shift);
+	int ret;
+
+	/* Select the NAND device */
+	nand_get_device(mtd, FL_READING);
+	chip->select_chip(mtd, chipnr);
+
+	ret = nand_block_checkbad(mtd, offs, 0);
+
+	chip->select_chip(mtd, -1);
+	nand_release_device(mtd);
+
+	return ret;
 }
 
 /**
@@ -4287,10 +4275,8 @@ int nand_scan_tail(struct mtd_info *mtd)
 		}
 
 		/* See nand_bch_init() for details. */
-		ecc->bytes = DIV_ROUND_UP(
-				ecc->strength * fls(8 * ecc->size), 8);
-		ecc->priv = nand_bch_init(mtd, ecc->size, ecc->bytes,
-					       &ecc->layout);
+		ecc->bytes = 0;
+		ecc->priv = nand_bch_init(mtd);
 		if (!ecc->priv) {
 			pr_warn("BCH ECC initialization failed!\n");
 			BUG();
@@ -4325,11 +4311,11 @@ int nand_scan_tail(struct mtd_info *mtd)
 	 * The number of bytes available for a client to place data into
 	 * the out of band area.
 	 */
-	ecc->layout->oobavail = 0;
-	for (i = 0; ecc->layout->oobfree[i].length
-			&& i < ARRAY_SIZE(ecc->layout->oobfree); i++)
-		ecc->layout->oobavail += ecc->layout->oobfree[i].length;
-	mtd->oobavail = ecc->layout->oobavail;
+	mtd->oobavail = 0;
+	if (ecc->layout) {
+		for (i = 0; ecc->layout->oobfree[i].length; i++)
+			mtd->oobavail += ecc->layout->oobfree[i].length;
+	}
 
 	/* ECC sanity check: warn if it's too weak */
 	if (!nand_ecc_strength_good(mtd))
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 4b6a7085b442..2fbb523df066 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -1373,5 +1373,3 @@ int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs)
 
 	return ret;
 }
-
-EXPORT_SYMBOL(nand_scan_bbt);
diff --git a/drivers/mtd/nand/nand_bch.c b/drivers/mtd/nand/nand_bch.c
index a87c1b628dfc..b585bae37929 100644
--- a/drivers/mtd/nand/nand_bch.c
+++ b/drivers/mtd/nand/nand_bch.c
@@ -107,9 +107,6 @@ EXPORT_SYMBOL(nand_bch_correct_data);
 /**
  * nand_bch_init - [NAND Interface] Initialize NAND BCH error correction
  * @mtd:	MTD block structure
- * @eccsize:	ecc block size in bytes
- * @eccbytes:	ecc length in bytes
- * @ecclayout:	output default layout
  *
  * Returns:
  *  a pointer to a new NAND BCH control structure, or NULL upon failure
@@ -123,14 +120,21 @@ EXPORT_SYMBOL(nand_bch_correct_data);
  * @eccsize = 512  (thus, m=13 is the smallest integer such that 2^m-1 > 512*8)
  * @eccbytes = 7   (7 bytes are required to store m*t = 13*4 = 52 bits)
  */
-struct nand_bch_control *
-nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
-	      struct nand_ecclayout **ecclayout)
+struct nand_bch_control *nand_bch_init(struct mtd_info *mtd)
 {
+	struct nand_chip *nand = mtd_to_nand(mtd);
 	unsigned int m, t, eccsteps, i;
-	struct nand_ecclayout *layout;
+	struct nand_ecclayout *layout = nand->ecc.layout;
 	struct nand_bch_control *nbc = NULL;
 	unsigned char *erased_page;
+	unsigned int eccsize = nand->ecc.size;
+	unsigned int eccbytes = nand->ecc.bytes;
+	unsigned int eccstrength = nand->ecc.strength;
+
+	if (!eccbytes && eccstrength) {
+		eccbytes = DIV_ROUND_UP(eccstrength * fls(8 * eccsize), 8);
+		nand->ecc.bytes = eccbytes;
+	}
 
 	if (!eccsize || !eccbytes) {
 		printk(KERN_WARNING "ecc parameters not supplied\n");
@@ -158,7 +162,7 @@ nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
 	eccsteps = mtd->writesize/eccsize;
 
 	/* if no ecc placement scheme was provided, build one */
-	if (!*ecclayout) {
+	if (!layout) {
 
 		/* handle large page devices only */
 		if (mtd->oobsize < 64) {
@@ -184,7 +188,7 @@ nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
 		layout->oobfree[0].offset = 2;
 		layout->oobfree[0].length = mtd->oobsize-2-layout->eccbytes;
 
-		*ecclayout = layout;
+		nand->ecc.layout = layout;
 	}
 
 	/* sanity checks */
@@ -192,7 +196,7 @@ nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
 		printk(KERN_WARNING "eccsize %u is too large\n", eccsize);
 		goto fail;
 	}
-	if ((*ecclayout)->eccbytes != (eccsteps*eccbytes)) {
+	if (layout->eccbytes != (eccsteps*eccbytes)) {
 		printk(KERN_WARNING "invalid ecc layout\n");
 		goto fail;
 	}
@@ -216,6 +220,9 @@ nand_bch_init(struct mtd_info *mtd, unsigned int eccsize, unsigned int eccbytes,
 	for (i = 0; i < eccbytes; i++)
 		nbc->eccmask[i] ^= 0xff;
 
+	if (!eccstrength)
+		nand->ecc.strength = (eccbytes * 8) / fls(8 * eccsize);
+
 	return nbc;
 fail:
 	nand_bch_free(nbc);
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index a8804a3da076..ccc05f5b2695 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -50,8 +50,8 @@ struct nand_flash_dev nand_flash_ids[] = {
 		  SZ_16K, SZ_8K, SZ_4M, 0, 6, 1280, NAND_ECC_INFO(40, SZ_1K) },
 	{"H27UCG8T2ATR-BC 64G 3.3V 8-bit",
 		{ .id = {0xad, 0xde, 0x94, 0xda, 0x74, 0xc4} },
-		  SZ_8K, SZ_8K, SZ_2M, 0, 6, 640, NAND_ECC_INFO(40, SZ_1K),
-		  4 },
+		  SZ_8K, SZ_8K, SZ_2M, NAND_NEED_SCRAMBLING, 6, 640,
+		  NAND_ECC_INFO(40, SZ_1K), 4 },
 
 	LEGACY_ID_NAND("NAND 4MiB 5V 8-bit",   0x6B, 4, SZ_8K, SP_OPTIONS),
 	LEGACY_ID_NAND("NAND 4MiB 3,3V 8-bit", 0xE3, 4, SZ_8K, SP_OPTIONS),
diff --git a/drivers/mtd/nand/nuc900_nand.c b/drivers/mtd/nand/nuc900_nand.c
index 220ddfcf29f5..dbc5b571c2bb 100644
--- a/drivers/mtd/nand/nuc900_nand.c
+++ b/drivers/mtd/nand/nuc900_nand.c
@@ -113,7 +113,7 @@ static int nuc900_check_rb(struct nuc900_nand *nand)
 {
 	unsigned int val;
 	spin_lock(&nand->lock);
-	val = __raw_readl(REG_SMISR);
+	val = __raw_readl(nand->reg + REG_SMISR);
 	val &= READYBUSY;
 	spin_unlock(&nand->lock);
 
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index c553f78ab83f..0749ca1a1456 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1807,13 +1807,19 @@ static int omap_nand_probe(struct platform_device *pdev)
 		goto return_error;
 	}
 
+	/*
+	 * Bail out earlier to let NAND_ECC_SOFT code create its own
+	 * ecclayout instead of using ours.
+	 */
+	if (info->ecc_opt == OMAP_ECC_HAM1_CODE_SW) {
+		nand_chip->ecc.mode = NAND_ECC_SOFT;
+		goto scan_tail;
+	}
+
 	/* populate MTD interface based on ECC scheme */
 	ecclayout		= &info->oobinfo;
+	nand_chip->ecc.layout	= ecclayout;
 	switch (info->ecc_opt) {
-	case OMAP_ECC_HAM1_CODE_SW:
-		nand_chip->ecc.mode = NAND_ECC_SOFT;
-		break;
-
 	case OMAP_ECC_HAM1_CODE_HW:
 		pr_info("nand: using OMAP_ECC_HAM1_CODE_HW\n");
 		nand_chip->ecc.mode             = NAND_ECC_HW;
@@ -1861,10 +1867,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 		ecclayout->oobfree->offset	= 1 +
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
 		/* software bch library is used for locating errors */
-		nand_chip->ecc.priv		= nand_bch_init(mtd,
-							nand_chip->ecc.size,
-							nand_chip->ecc.bytes,
-							&ecclayout);
+		nand_chip->ecc.priv		= nand_bch_init(mtd);
 		if (!nand_chip->ecc.priv) {
 			dev_err(&info->pdev->dev, "unable to use BCH library\n");
 			err = -EINVAL;
@@ -1925,10 +1928,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 		ecclayout->oobfree->offset	= 1 +
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
 		/* software bch library is used for locating errors */
-		nand_chip->ecc.priv		= nand_bch_init(mtd,
-							nand_chip->ecc.size,
-							nand_chip->ecc.bytes,
-							&ecclayout);
+		nand_chip->ecc.priv		= nand_bch_init(mtd);
 		if (!nand_chip->ecc.priv) {
 			dev_err(&info->pdev->dev, "unable to use BCH library\n");
 			err = -EINVAL;
@@ -2002,9 +2002,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 		goto return_error;
 	}
 
-	if (info->ecc_opt == OMAP_ECC_HAM1_CODE_SW)
-		goto scan_tail;
-
 	/* all OOB bytes from oobfree->offset till end off OOB are free */
 	ecclayout->oobfree->length = mtd->oobsize - ecclayout->oobfree->offset;
 	/* check if NAND device's OOB is enough to store ECC signatures */
@@ -2015,7 +2012,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 		err = -EINVAL;
 		goto return_error;
 	}
-	nand_chip->ecc.layout = ecclayout;
 
 scan_tail:
 	/* second phase scan */
diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c
index a0e26dea1424..e4e50da30444 100644
--- a/drivers/mtd/nand/plat_nand.c
+++ b/drivers/mtd/nand/plat_nand.c
@@ -73,7 +73,6 @@ static int plat_nand_probe(struct platform_device *pdev)
 	data->chip.bbt_options |= pdata->chip.bbt_options;
 
 	data->chip.ecc.hwctl = pdata->ctrl.hwcontrol;
-	data->chip.ecc.layout = pdata->chip.ecclayout;
 	data->chip.ecc.mode = NAND_ECC_SOFT;
 
 	platform_set_drvdata(pdev, data);
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 86fc245dc71a..d6508856da99 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -131,11 +131,23 @@
 #define READ_ID_BYTES		7
 
 /* macros for registers read/write */
-#define nand_writel(info, off, val)	\
-	writel_relaxed((val), (info)->mmio_base + (off))
-
-#define nand_readl(info, off)		\
-	readl_relaxed((info)->mmio_base + (off))
+#define nand_writel(info, off, val)					\
+	do {								\
+		dev_vdbg(&info->pdev->dev,				\
+			 "%s():%d nand_writel(0x%x, 0x%04x)\n",		\
+			 __func__, __LINE__, (val), (off));		\
+		writel_relaxed((val), (info)->mmio_base + (off));	\
+	} while (0)
+
+#define nand_readl(info, off)						\
+	({								\
+		unsigned int _v;					\
+		_v = readl_relaxed((info)->mmio_base + (off));		\
+		dev_vdbg(&info->pdev->dev,				\
+			 "%s():%d nand_readl(0x%04x) = 0x%x\n",		\
+			 __func__, __LINE__, (off), _v);		\
+		_v;							\
+	})
 
 /* error code and state */
 enum {
@@ -199,7 +211,6 @@ struct pxa3xx_nand_info {
 	struct dma_chan		*dma_chan;
 	dma_cookie_t		dma_cookie;
 	int			drcmr_dat;
-	int			drcmr_cmd;
 
 	unsigned char		*data_buff;
 	unsigned char		*oob_buff;
@@ -222,15 +233,44 @@ struct pxa3xx_nand_info {
 	int			use_spare;	/* use spare ? */
 	int			need_wait;
 
-	unsigned int		data_size;	/* data to be read from FIFO */
-	unsigned int		chunk_size;	/* split commands chunk size */
-	unsigned int		oob_size;
+	/* Amount of real data per full chunk */
+	unsigned int		chunk_size;
+
+	/* Amount of spare data per full chunk */
 	unsigned int		spare_size;
+
+	/* Number of full chunks (i.e chunk_size + spare_size) */
+	unsigned int            nfullchunks;
+
+	/*
+	 * Total number of chunks. If equal to nfullchunks, then there
+	 * are only full chunks. Otherwise, there is one last chunk of
+	 * size (last_chunk_size + last_spare_size)
+	 */
+	unsigned int            ntotalchunks;
+
+	/* Amount of real data in the last chunk */
+	unsigned int		last_chunk_size;
+
+	/* Amount of spare data in the last chunk */
+	unsigned int		last_spare_size;
+
 	unsigned int		ecc_size;
 	unsigned int		ecc_err_cnt;
 	unsigned int		max_bitflips;
 	int 			retcode;
 
+	/*
+	 * Variables only valid during command
+	 * execution. step_chunk_size and step_spare_size is the
+	 * amount of real data and spare data in the current
+	 * chunk. cur_chunk is the current chunk being
+	 * read/programmed.
+	 */
+	unsigned int		step_chunk_size;
+	unsigned int		step_spare_size;
+	unsigned int            cur_chunk;
+
 	/* cached register value */
 	uint32_t		reg_ndcr;
 	uint32_t		ndtr0cs0;
@@ -526,25 +566,6 @@ static int pxa3xx_nand_init(struct pxa3xx_nand_host *host)
 	return 0;
 }
 
-/*
- * Set the data and OOB size, depending on the selected
- * spare and ECC configuration.
- * Only applicable to READ0, READOOB and PAGEPROG commands.
- */
-static void pxa3xx_set_datasize(struct pxa3xx_nand_info *info,
-				struct mtd_info *mtd)
-{
-	int oob_enable = info->reg_ndcr & NDCR_SPARE_EN;
-
-	info->data_size = mtd->writesize;
-	if (!oob_enable)
-		return;
-
-	info->oob_size = info->spare_size;
-	if (!info->use_ecc)
-		info->oob_size += info->ecc_size;
-}
-
 /**
  * NOTE: it is a must to set ND_RUN firstly, then write
  * command buffer, otherwise, it does not work.
@@ -660,28 +681,28 @@ static void drain_fifo(struct pxa3xx_nand_info *info, void *data, int len)
 
 static void handle_data_pio(struct pxa3xx_nand_info *info)
 {
-	unsigned int do_bytes = min(info->data_size, info->chunk_size);
-
 	switch (info->state) {
 	case STATE_PIO_WRITING:
-		writesl(info->mmio_base + NDDB,
-			info->data_buff + info->data_buff_pos,
-			DIV_ROUND_UP(do_bytes, 4));
+		if (info->step_chunk_size)
+			writesl(info->mmio_base + NDDB,
+				info->data_buff + info->data_buff_pos,
+				DIV_ROUND_UP(info->step_chunk_size, 4));
 
-		if (info->oob_size > 0)
+		if (info->step_spare_size)
 			writesl(info->mmio_base + NDDB,
 				info->oob_buff + info->oob_buff_pos,
-				DIV_ROUND_UP(info->oob_size, 4));
+				DIV_ROUND_UP(info->step_spare_size, 4));
 		break;
 	case STATE_PIO_READING:
-		drain_fifo(info,
-			   info->data_buff + info->data_buff_pos,
-			   DIV_ROUND_UP(do_bytes, 4));
+		if (info->step_chunk_size)
+			drain_fifo(info,
+				   info->data_buff + info->data_buff_pos,
+				   DIV_ROUND_UP(info->step_chunk_size, 4));
 
-		if (info->oob_size > 0)
+		if (info->step_spare_size)
 			drain_fifo(info,
 				   info->oob_buff + info->oob_buff_pos,
-				   DIV_ROUND_UP(info->oob_size, 4));
+				   DIV_ROUND_UP(info->step_spare_size, 4));
 		break;
 	default:
 		dev_err(&info->pdev->dev, "%s: invalid state %d\n", __func__,
@@ -690,9 +711,8 @@ static void handle_data_pio(struct pxa3xx_nand_info *info)
 	}
 
 	/* Update buffer pointers for multi-page read/write */
-	info->data_buff_pos += do_bytes;
-	info->oob_buff_pos += info->oob_size;
-	info->data_size -= do_bytes;
+	info->data_buff_pos += info->step_chunk_size;
+	info->oob_buff_pos += info->step_spare_size;
 }
 
 static void pxa3xx_nand_data_dma_irq(void *data)
@@ -733,8 +753,9 @@ static void start_data_dma(struct pxa3xx_nand_info *info)
 				info->state);
 		BUG();
 	}
-	info->sg.length = info->data_size +
-		(info->oob_size ? info->spare_size + info->ecc_size : 0);
+	info->sg.length = info->chunk_size;
+	if (info->use_spare)
+		info->sg.length += info->spare_size + info->ecc_size;
 	dma_map_sg(info->dma_chan->device->dev, &info->sg, 1, info->dma_dir);
 
 	tx = dmaengine_prep_slave_sg(info->dma_chan, &info->sg, 1, direction,
@@ -895,9 +916,11 @@ static void prepare_start_command(struct pxa3xx_nand_info *info, int command)
 	/* reset data and oob column point to handle data */
 	info->buf_start		= 0;
 	info->buf_count		= 0;
-	info->oob_size		= 0;
 	info->data_buff_pos	= 0;
 	info->oob_buff_pos	= 0;
+	info->step_chunk_size   = 0;
+	info->step_spare_size   = 0;
+	info->cur_chunk         = 0;
 	info->use_ecc		= 0;
 	info->use_spare		= 1;
 	info->retcode		= ERR_NONE;
@@ -909,8 +932,6 @@ static void prepare_start_command(struct pxa3xx_nand_info *info, int command)
 	case NAND_CMD_READ0:
 	case NAND_CMD_PAGEPROG:
 		info->use_ecc = 1;
-	case NAND_CMD_READOOB:
-		pxa3xx_set_datasize(info, mtd);
 		break;
 	case NAND_CMD_PARAM:
 		info->use_spare = 0;
@@ -969,6 +990,14 @@ static int prepare_set_command(struct pxa3xx_nand_info *info, int command,
 		if (command == NAND_CMD_READOOB)
 			info->buf_start += mtd->writesize;
 
+		if (info->cur_chunk < info->nfullchunks) {
+			info->step_chunk_size = info->chunk_size;
+			info->step_spare_size = info->spare_size;
+		} else {
+			info->step_chunk_size = info->last_chunk_size;
+			info->step_spare_size = info->last_spare_size;
+		}
+
 		/*
 		 * Multiple page read needs an 'extended command type' field,
 		 * which is either naked-read or last-read according to the
@@ -980,8 +1009,8 @@ static int prepare_set_command(struct pxa3xx_nand_info *info, int command,
 			info->ndcb0 |= NDCB0_DBC | (NAND_CMD_READSTART << 8)
 					| NDCB0_LEN_OVRD
 					| NDCB0_EXT_CMD_TYPE(ext_cmd_type);
-			info->ndcb3 = info->chunk_size +
-				      info->oob_size;
+			info->ndcb3 = info->step_chunk_size +
+				info->step_spare_size;
 		}
 
 		set_command_address(info, mtd->writesize, column, page_addr);
@@ -1001,8 +1030,6 @@ static int prepare_set_command(struct pxa3xx_nand_info *info, int command,
 				| NDCB0_EXT_CMD_TYPE(ext_cmd_type)
 				| addr_cycle
 				| command;
-			/* No data transfer in this case */
-			info->data_size = 0;
 			exec_cmd = 1;
 		}
 		break;
@@ -1014,6 +1041,14 @@ static int prepare_set_command(struct pxa3xx_nand_info *info, int command,
 			break;
 		}
 
+		if (info->cur_chunk < info->nfullchunks) {
+			info->step_chunk_size = info->chunk_size;
+			info->step_spare_size = info->spare_size;
+		} else {
+			info->step_chunk_size = info->last_chunk_size;
+			info->step_spare_size = info->last_spare_size;
+		}
+
 		/* Second command setting for large pages */
 		if (mtd->writesize > PAGE_CHUNK_SIZE) {
 			/*
@@ -1024,14 +1059,14 @@ static int prepare_set_command(struct pxa3xx_nand_info *info, int command,
 			info->ndcb0 |= NDCB0_CMD_TYPE(0x1)
 					| NDCB0_LEN_OVRD
 					| NDCB0_EXT_CMD_TYPE(ext_cmd_type);
-			info->ndcb3 = info->chunk_size +
-				      info->oob_size;
+			info->ndcb3 = info->step_chunk_size +
+				      info->step_spare_size;
 
 			/*
 			 * This is the command dispatch that completes a chunked
 			 * page program operation.
 			 */
-			if (info->data_size == 0) {
+			if (info->cur_chunk == info->ntotalchunks) {
 				info->ndcb0 = NDCB0_CMD_TYPE(0x1)
 					| NDCB0_EXT_CMD_TYPE(ext_cmd_type)
 					| command;
@@ -1058,7 +1093,7 @@ static int prepare_set_command(struct pxa3xx_nand_info *info, int command,
 				| command;
 		info->ndcb1 = (column & 0xFF);
 		info->ndcb3 = INIT_BUFFER_SIZE;
-		info->data_size = INIT_BUFFER_SIZE;
+		info->step_chunk_size = INIT_BUFFER_SIZE;
 		break;
 
 	case NAND_CMD_READID:
@@ -1068,7 +1103,7 @@ static int prepare_set_command(struct pxa3xx_nand_info *info, int command,
 				| command;
 		info->ndcb1 = (column & 0xFF);
 
-		info->data_size = 8;
+		info->step_chunk_size = 8;
 		break;
 	case NAND_CMD_STATUS:
 		info->buf_count = 1;
@@ -1076,7 +1111,7 @@ static int prepare_set_command(struct pxa3xx_nand_info *info, int command,
 				| NDCB0_ADDR_CYC(1)
 				| command;
 
-		info->data_size = 8;
+		info->step_chunk_size = 8;
 		break;
 
 	case NAND_CMD_ERASE1:
@@ -1217,6 +1252,7 @@ static void nand_cmdfunc_extended(struct mtd_info *mtd,
 	init_completion(&info->dev_ready);
 	do {
 		info->state = STATE_PREPARED;
+
 		exec_cmd = prepare_set_command(info, command, ext_cmd_type,
 					       column, page_addr);
 		if (!exec_cmd) {
@@ -1236,22 +1272,30 @@ static void nand_cmdfunc_extended(struct mtd_info *mtd,
 			break;
 		}
 
+		/* Only a few commands need several steps */
+		if (command != NAND_CMD_PAGEPROG &&
+		    command != NAND_CMD_READ0    &&
+		    command != NAND_CMD_READOOB)
+			break;
+
+		info->cur_chunk++;
+
 		/* Check if the sequence is complete */
-		if (info->data_size == 0 && command != NAND_CMD_PAGEPROG)
+		if (info->cur_chunk == info->ntotalchunks && command != NAND_CMD_PAGEPROG)
 			break;
 
 		/*
 		 * After a splitted program command sequence has issued
 		 * the command dispatch, the command sequence is complete.
 		 */
-		if (info->data_size == 0 &&
+		if (info->cur_chunk == (info->ntotalchunks + 1) &&
 		    command == NAND_CMD_PAGEPROG &&
 		    ext_cmd_type == EXT_CMD_TYPE_DISPATCH)
 			break;
 
 		if (command == NAND_CMD_READ0 || command == NAND_CMD_READOOB) {
 			/* Last read: issue a 'last naked read' */
-			if (info->data_size == info->chunk_size)
+			if (info->cur_chunk == info->ntotalchunks - 1)
 				ext_cmd_type = EXT_CMD_TYPE_LAST_RW;
 			else
 				ext_cmd_type = EXT_CMD_TYPE_NAKED_RW;
@@ -1261,7 +1305,7 @@ static void nand_cmdfunc_extended(struct mtd_info *mtd,
 		 * the command dispatch must be issued to complete.
 		 */
 		} else if (command == NAND_CMD_PAGEPROG &&
-			   info->data_size == 0) {
+			   info->cur_chunk == info->ntotalchunks) {
 				ext_cmd_type = EXT_CMD_TYPE_DISPATCH;
 		}
 	} while (1);
@@ -1506,6 +1550,8 @@ static int pxa_ecc_init(struct pxa3xx_nand_info *info,
 			int strength, int ecc_stepsize, int page_size)
 {
 	if (strength == 1 && ecc_stepsize == 512 && page_size == 2048) {
+		info->nfullchunks = 1;
+		info->ntotalchunks = 1;
 		info->chunk_size = 2048;
 		info->spare_size = 40;
 		info->ecc_size = 24;
@@ -1514,6 +1560,8 @@ static int pxa_ecc_init(struct pxa3xx_nand_info *info,
 		ecc->strength = 1;
 
 	} else if (strength == 1 && ecc_stepsize == 512 && page_size == 512) {
+		info->nfullchunks = 1;
+		info->ntotalchunks = 1;
 		info->chunk_size = 512;
 		info->spare_size = 8;
 		info->ecc_size = 8;
@@ -1527,6 +1575,8 @@ static int pxa_ecc_init(struct pxa3xx_nand_info *info,
 	 */
 	} else if (strength == 4 && ecc_stepsize == 512 && page_size == 2048) {
 		info->ecc_bch = 1;
+		info->nfullchunks = 1;
+		info->ntotalchunks = 1;
 		info->chunk_size = 2048;
 		info->spare_size = 32;
 		info->ecc_size = 32;
@@ -1537,6 +1587,8 @@ static int pxa_ecc_init(struct pxa3xx_nand_info *info,
 
 	} else if (strength == 4 && ecc_stepsize == 512 && page_size == 4096) {
 		info->ecc_bch = 1;
+		info->nfullchunks = 2;
+		info->ntotalchunks = 2;
 		info->chunk_size = 2048;
 		info->spare_size = 32;
 		info->ecc_size = 32;
@@ -1551,8 +1603,12 @@ static int pxa_ecc_init(struct pxa3xx_nand_info *info,
 	 */
 	} else if (strength == 8 && ecc_stepsize == 512 && page_size == 4096) {
 		info->ecc_bch = 1;
+		info->nfullchunks = 4;
+		info->ntotalchunks = 5;
 		info->chunk_size = 1024;
 		info->spare_size = 0;
+		info->last_chunk_size = 0;
+		info->last_spare_size = 64;
 		info->ecc_size = 32;
 		ecc->mode = NAND_ECC_HW;
 		ecc->size = info->chunk_size;
@@ -1738,7 +1794,7 @@ static int alloc_nand_resource(struct platform_device *pdev)
 	if (ret < 0)
 		return ret;
 
-	if (use_dma) {
+	if (!np && use_dma) {
 		r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
 		if (r == NULL) {
 			dev_err(&pdev->dev,
@@ -1747,15 +1803,6 @@ static int alloc_nand_resource(struct platform_device *pdev)
 			goto fail_disable_clk;
 		}
 		info->drcmr_dat = r->start;
-
-		r = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-		if (r == NULL) {
-			dev_err(&pdev->dev,
-				"no resource defined for cmd DMA\n");
-			ret = -ENXIO;
-			goto fail_disable_clk;
-		}
-		info->drcmr_cmd = r->start;
 	}
 
 	irq = platform_get_irq(pdev, 0);
diff --git a/drivers/mtd/nand/qcom_nandc.c b/drivers/mtd/nand/qcom_nandc.c
new file mode 100644
index 000000000000..f550a57e6eea
--- /dev/null
+++ b/drivers/mtd/nand/qcom_nandc.c
@@ -0,0 +1,2223 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/slab.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_mtd.h>
+#include <linux/delay.h>
+
+/* NANDc reg offsets */
+#define	NAND_FLASH_CMD			0x00
+#define	NAND_ADDR0			0x04
+#define	NAND_ADDR1			0x08
+#define	NAND_FLASH_CHIP_SELECT		0x0c
+#define	NAND_EXEC_CMD			0x10
+#define	NAND_FLASH_STATUS		0x14
+#define	NAND_BUFFER_STATUS		0x18
+#define	NAND_DEV0_CFG0			0x20
+#define	NAND_DEV0_CFG1			0x24
+#define	NAND_DEV0_ECC_CFG		0x28
+#define	NAND_DEV1_ECC_CFG		0x2c
+#define	NAND_DEV1_CFG0			0x30
+#define	NAND_DEV1_CFG1			0x34
+#define	NAND_READ_ID			0x40
+#define	NAND_READ_STATUS		0x44
+#define	NAND_DEV_CMD0			0xa0
+#define	NAND_DEV_CMD1			0xa4
+#define	NAND_DEV_CMD2			0xa8
+#define	NAND_DEV_CMD_VLD		0xac
+#define	SFLASHC_BURST_CFG		0xe0
+#define	NAND_ERASED_CW_DETECT_CFG	0xe8
+#define	NAND_ERASED_CW_DETECT_STATUS	0xec
+#define	NAND_EBI2_ECC_BUF_CFG		0xf0
+#define	FLASH_BUF_ACC			0x100
+
+#define	NAND_CTRL			0xf00
+#define	NAND_VERSION			0xf08
+#define	NAND_READ_LOCATION_0		0xf20
+#define	NAND_READ_LOCATION_1		0xf24
+
+/* dummy register offsets, used by write_reg_dma */
+#define	NAND_DEV_CMD1_RESTORE		0xdead
+#define	NAND_DEV_CMD_VLD_RESTORE	0xbeef
+
+/* NAND_FLASH_CMD bits */
+#define	PAGE_ACC			BIT(4)
+#define	LAST_PAGE			BIT(5)
+
+/* NAND_FLASH_CHIP_SELECT bits */
+#define	NAND_DEV_SEL			0
+#define	DM_EN				BIT(2)
+
+/* NAND_FLASH_STATUS bits */
+#define	FS_OP_ERR			BIT(4)
+#define	FS_READY_BSY_N			BIT(5)
+#define	FS_MPU_ERR			BIT(8)
+#define	FS_DEVICE_STS_ERR		BIT(16)
+#define	FS_DEVICE_WP			BIT(23)
+
+/* NAND_BUFFER_STATUS bits */
+#define	BS_UNCORRECTABLE_BIT		BIT(8)
+#define	BS_CORRECTABLE_ERR_MSK		0x1f
+
+/* NAND_DEVn_CFG0 bits */
+#define	DISABLE_STATUS_AFTER_WRITE	4
+#define	CW_PER_PAGE			6
+#define	UD_SIZE_BYTES			9
+#define	ECC_PARITY_SIZE_BYTES_RS	19
+#define	SPARE_SIZE_BYTES		23
+#define	NUM_ADDR_CYCLES			27
+#define	STATUS_BFR_READ			30
+#define	SET_RD_MODE_AFTER_STATUS	31
+
+/* NAND_DEVn_CFG0 bits */
+#define	DEV0_CFG1_ECC_DISABLE		0
+#define	WIDE_FLASH			1
+#define	NAND_RECOVERY_CYCLES		2
+#define	CS_ACTIVE_BSY			5
+#define	BAD_BLOCK_BYTE_NUM		6
+#define	BAD_BLOCK_IN_SPARE_AREA		16
+#define	WR_RD_BSY_GAP			17
+#define	ENABLE_BCH_ECC			27
+
+/* NAND_DEV0_ECC_CFG bits */
+#define	ECC_CFG_ECC_DISABLE		0
+#define	ECC_SW_RESET			1
+#define	ECC_MODE			4
+#define	ECC_PARITY_SIZE_BYTES_BCH	8
+#define	ECC_NUM_DATA_BYTES		16
+#define	ECC_FORCE_CLK_OPEN		30
+
+/* NAND_DEV_CMD1 bits */
+#define	READ_ADDR			0
+
+/* NAND_DEV_CMD_VLD bits */
+#define	READ_START_VLD			0
+
+/* NAND_EBI2_ECC_BUF_CFG bits */
+#define	NUM_STEPS			0
+
+/* NAND_ERASED_CW_DETECT_CFG bits */
+#define	ERASED_CW_ECC_MASK		1
+#define	AUTO_DETECT_RES			0
+#define	MASK_ECC			(1 << ERASED_CW_ECC_MASK)
+#define	RESET_ERASED_DET		(1 << AUTO_DETECT_RES)
+#define	ACTIVE_ERASED_DET		(0 << AUTO_DETECT_RES)
+#define	CLR_ERASED_PAGE_DET		(RESET_ERASED_DET | MASK_ECC)
+#define	SET_ERASED_PAGE_DET		(ACTIVE_ERASED_DET | MASK_ECC)
+
+/* NAND_ERASED_CW_DETECT_STATUS bits */
+#define	PAGE_ALL_ERASED			BIT(7)
+#define	CODEWORD_ALL_ERASED		BIT(6)
+#define	PAGE_ERASED			BIT(5)
+#define	CODEWORD_ERASED			BIT(4)
+#define	ERASED_PAGE			(PAGE_ALL_ERASED | PAGE_ERASED)
+#define	ERASED_CW			(CODEWORD_ALL_ERASED | CODEWORD_ERASED)
+
+/* Version Mask */
+#define	NAND_VERSION_MAJOR_MASK		0xf0000000
+#define	NAND_VERSION_MAJOR_SHIFT	28
+#define	NAND_VERSION_MINOR_MASK		0x0fff0000
+#define	NAND_VERSION_MINOR_SHIFT	16
+
+/* NAND OP_CMDs */
+#define	PAGE_READ			0x2
+#define	PAGE_READ_WITH_ECC		0x3
+#define	PAGE_READ_WITH_ECC_SPARE	0x4
+#define	PROGRAM_PAGE			0x6
+#define	PAGE_PROGRAM_WITH_ECC		0x7
+#define	PROGRAM_PAGE_SPARE		0x9
+#define	BLOCK_ERASE			0xa
+#define	FETCH_ID			0xb
+#define	RESET_DEVICE			0xd
+
+/*
+ * the NAND controller performs reads/writes with ECC in 516 byte chunks.
+ * the driver calls the chunks 'step' or 'codeword' interchangeably
+ */
+#define	NANDC_STEP_SIZE			512
+
+/*
+ * the largest page size we support is 8K, this will have 16 steps/codewords
+ * of 512 bytes each
+ */
+#define	MAX_NUM_STEPS			(SZ_8K / NANDC_STEP_SIZE)
+
+/* we read at most 3 registers per codeword scan */
+#define	MAX_REG_RD			(3 * MAX_NUM_STEPS)
+
+/* ECC modes supported by the controller */
+#define	ECC_NONE	BIT(0)
+#define	ECC_RS_4BIT	BIT(1)
+#define	ECC_BCH_4BIT	BIT(2)
+#define	ECC_BCH_8BIT	BIT(3)
+
+struct desc_info {
+	struct list_head node;
+
+	enum dma_data_direction dir;
+	struct scatterlist sgl;
+	struct dma_async_tx_descriptor *dma_desc;
+};
+
+/*
+ * holds the current register values that we want to write. acts as a contiguous
+ * chunk of memory which we use to write the controller registers through DMA.
+ */
+struct nandc_regs {
+	__le32 cmd;
+	__le32 addr0;
+	__le32 addr1;
+	__le32 chip_sel;
+	__le32 exec;
+
+	__le32 cfg0;
+	__le32 cfg1;
+	__le32 ecc_bch_cfg;
+
+	__le32 clrflashstatus;
+	__le32 clrreadstatus;
+
+	__le32 cmd1;
+	__le32 vld;
+
+	__le32 orig_cmd1;
+	__le32 orig_vld;
+
+	__le32 ecc_buf_cfg;
+};
+
+/*
+ * NAND controller data struct
+ *
+ * @controller:			base controller structure
+ * @host_list:			list containing all the chips attached to the
+ *				controller
+ * @dev:			parent device
+ * @base:			MMIO base
+ * @base_dma:			physical base address of controller registers
+ * @core_clk:			controller clock
+ * @aon_clk:			another controller clock
+ *
+ * @chan:			dma channel
+ * @cmd_crci:			ADM DMA CRCI for command flow control
+ * @data_crci:			ADM DMA CRCI for data flow control
+ * @desc_list:			DMA descriptor list (list of desc_infos)
+ *
+ * @data_buffer:		our local DMA buffer for page read/writes,
+ *				used when we can't use the buffer provided
+ *				by upper layers directly
+ * @buf_size/count/start:	markers for chip->read_buf/write_buf functions
+ * @reg_read_buf:		local buffer for reading back registers via DMA
+ * @reg_read_pos:		marker for data read in reg_read_buf
+ *
+ * @regs:			a contiguous chunk of memory for DMA register
+ *				writes. contains the register values to be
+ *				written to controller
+ * @cmd1/vld:			some fixed controller register values
+ * @ecc_modes:			supported ECC modes by the current controller,
+ *				initialized via DT match data
+ */
+struct qcom_nand_controller {
+	struct nand_hw_control controller;
+	struct list_head host_list;
+
+	struct device *dev;
+
+	void __iomem *base;
+	dma_addr_t base_dma;
+
+	struct clk *core_clk;
+	struct clk *aon_clk;
+
+	struct dma_chan *chan;
+	unsigned int cmd_crci;
+	unsigned int data_crci;
+	struct list_head desc_list;
+
+	u8		*data_buffer;
+	int		buf_size;
+	int		buf_count;
+	int		buf_start;
+
+	__le32 *reg_read_buf;
+	int reg_read_pos;
+
+	struct nandc_regs *regs;
+
+	u32 cmd1, vld;
+	u32 ecc_modes;
+};
+
+/*
+ * NAND chip structure
+ *
+ * @chip:			base NAND chip structure
+ * @node:			list node to add itself to host_list in
+ *				qcom_nand_controller
+ *
+ * @cs:				chip select value for this chip
+ * @cw_size:			the number of bytes in a single step/codeword
+ *				of a page, consisting of all data, ecc, spare
+ *				and reserved bytes
+ * @cw_data:			the number of bytes within a codeword protected
+ *				by ECC
+ * @use_ecc:			request the controller to use ECC for the
+ *				upcoming read/write
+ * @bch_enabled:		flag to tell whether BCH ECC mode is used
+ * @ecc_bytes_hw:		ECC bytes used by controller hardware for this
+ *				chip
+ * @status:			value to be returned if NAND_CMD_STATUS command
+ *				is executed
+ * @last_command:		keeps track of last command on this chip. used
+ *				for reading correct status
+ *
+ * @cfg0, cfg1, cfg0_raw..:	NANDc register configurations needed for
+ *				ecc/non-ecc mode for the current nand flash
+ *				device
+ */
+struct qcom_nand_host {
+	struct nand_chip chip;
+	struct list_head node;
+
+	int cs;
+	int cw_size;
+	int cw_data;
+	bool use_ecc;
+	bool bch_enabled;
+	int ecc_bytes_hw;
+	int spare_bytes;
+	int bbm_size;
+	u8 status;
+	int last_command;
+
+	u32 cfg0, cfg1;
+	u32 cfg0_raw, cfg1_raw;
+	u32 ecc_buf_cfg;
+	u32 ecc_bch_cfg;
+	u32 clrflashstatus;
+	u32 clrreadstatus;
+};
+
+static inline struct qcom_nand_host *to_qcom_nand_host(struct nand_chip *chip)
+{
+	return container_of(chip, struct qcom_nand_host, chip);
+}
+
+static inline struct qcom_nand_controller *
+get_qcom_nand_controller(struct nand_chip *chip)
+{
+	return container_of(chip->controller, struct qcom_nand_controller,
+			    controller);
+}
+
+static inline u32 nandc_read(struct qcom_nand_controller *nandc, int offset)
+{
+	return ioread32(nandc->base + offset);
+}
+
+static inline void nandc_write(struct qcom_nand_controller *nandc, int offset,
+			       u32 val)
+{
+	iowrite32(val, nandc->base + offset);
+}
+
+static __le32 *offset_to_nandc_reg(struct nandc_regs *regs, int offset)
+{
+	switch (offset) {
+	case NAND_FLASH_CMD:
+		return &regs->cmd;
+	case NAND_ADDR0:
+		return &regs->addr0;
+	case NAND_ADDR1:
+		return &regs->addr1;
+	case NAND_FLASH_CHIP_SELECT:
+		return &regs->chip_sel;
+	case NAND_EXEC_CMD:
+		return &regs->exec;
+	case NAND_FLASH_STATUS:
+		return &regs->clrflashstatus;
+	case NAND_DEV0_CFG0:
+		return &regs->cfg0;
+	case NAND_DEV0_CFG1:
+		return &regs->cfg1;
+	case NAND_DEV0_ECC_CFG:
+		return &regs->ecc_bch_cfg;
+	case NAND_READ_STATUS:
+		return &regs->clrreadstatus;
+	case NAND_DEV_CMD1:
+		return &regs->cmd1;
+	case NAND_DEV_CMD1_RESTORE:
+		return &regs->orig_cmd1;
+	case NAND_DEV_CMD_VLD:
+		return &regs->vld;
+	case NAND_DEV_CMD_VLD_RESTORE:
+		return &regs->orig_vld;
+	case NAND_EBI2_ECC_BUF_CFG:
+		return &regs->ecc_buf_cfg;
+	default:
+		return NULL;
+	}
+}
+
+static void nandc_set_reg(struct qcom_nand_controller *nandc, int offset,
+			  u32 val)
+{
+	struct nandc_regs *regs = nandc->regs;
+	__le32 *reg;
+
+	reg = offset_to_nandc_reg(regs, offset);
+
+	if (reg)
+		*reg = cpu_to_le32(val);
+}
+
+/* helper to configure address register values */
+static void set_address(struct qcom_nand_host *host, u16 column, int page)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	if (chip->options & NAND_BUSWIDTH_16)
+		column >>= 1;
+
+	nandc_set_reg(nandc, NAND_ADDR0, page << 16 | column);
+	nandc_set_reg(nandc, NAND_ADDR1, page >> 16 & 0xff);
+}
+
+/*
+ * update_rw_regs:	set up read/write register values, these will be
+ *			written to the NAND controller registers via DMA
+ *
+ * @num_cw:		number of steps for the read/write operation
+ * @read:		read or write operation
+ */
+static void update_rw_regs(struct qcom_nand_host *host, int num_cw, bool read)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	u32 cmd, cfg0, cfg1, ecc_bch_cfg;
+
+	if (read) {
+		if (host->use_ecc)
+			cmd = PAGE_READ_WITH_ECC | PAGE_ACC | LAST_PAGE;
+		else
+			cmd = PAGE_READ | PAGE_ACC | LAST_PAGE;
+	} else {
+			cmd = PROGRAM_PAGE | PAGE_ACC | LAST_PAGE;
+	}
+
+	if (host->use_ecc) {
+		cfg0 = (host->cfg0 & ~(7U << CW_PER_PAGE)) |
+				(num_cw - 1) << CW_PER_PAGE;
+
+		cfg1 = host->cfg1;
+		ecc_bch_cfg = host->ecc_bch_cfg;
+	} else {
+		cfg0 = (host->cfg0_raw & ~(7U << CW_PER_PAGE)) |
+				(num_cw - 1) << CW_PER_PAGE;
+
+		cfg1 = host->cfg1_raw;
+		ecc_bch_cfg = 1 << ECC_CFG_ECC_DISABLE;
+	}
+
+	nandc_set_reg(nandc, NAND_FLASH_CMD, cmd);
+	nandc_set_reg(nandc, NAND_DEV0_CFG0, cfg0);
+	nandc_set_reg(nandc, NAND_DEV0_CFG1, cfg1);
+	nandc_set_reg(nandc, NAND_DEV0_ECC_CFG, ecc_bch_cfg);
+	nandc_set_reg(nandc, NAND_EBI2_ECC_BUF_CFG, host->ecc_buf_cfg);
+	nandc_set_reg(nandc, NAND_FLASH_STATUS, host->clrflashstatus);
+	nandc_set_reg(nandc, NAND_READ_STATUS, host->clrreadstatus);
+	nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
+}
+
+static int prep_dma_desc(struct qcom_nand_controller *nandc, bool read,
+			 int reg_off, const void *vaddr, int size,
+			 bool flow_control)
+{
+	struct desc_info *desc;
+	struct dma_async_tx_descriptor *dma_desc;
+	struct scatterlist *sgl;
+	struct dma_slave_config slave_conf;
+	enum dma_transfer_direction dir_eng;
+	int ret;
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	sgl = &desc->sgl;
+
+	sg_init_one(sgl, vaddr, size);
+
+	if (read) {
+		dir_eng = DMA_DEV_TO_MEM;
+		desc->dir = DMA_FROM_DEVICE;
+	} else {
+		dir_eng = DMA_MEM_TO_DEV;
+		desc->dir = DMA_TO_DEVICE;
+	}
+
+	ret = dma_map_sg(nandc->dev, sgl, 1, desc->dir);
+	if (ret == 0) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	memset(&slave_conf, 0x00, sizeof(slave_conf));
+
+	slave_conf.device_fc = flow_control;
+	if (read) {
+		slave_conf.src_maxburst = 16;
+		slave_conf.src_addr = nandc->base_dma + reg_off;
+		slave_conf.slave_id = nandc->data_crci;
+	} else {
+		slave_conf.dst_maxburst = 16;
+		slave_conf.dst_addr = nandc->base_dma + reg_off;
+		slave_conf.slave_id = nandc->cmd_crci;
+	}
+
+	ret = dmaengine_slave_config(nandc->chan, &slave_conf);
+	if (ret) {
+		dev_err(nandc->dev, "failed to configure dma channel\n");
+		goto err;
+	}
+
+	dma_desc = dmaengine_prep_slave_sg(nandc->chan, sgl, 1, dir_eng, 0);
+	if (!dma_desc) {
+		dev_err(nandc->dev, "failed to prepare desc\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	desc->dma_desc = dma_desc;
+
+	list_add_tail(&desc->node, &nandc->desc_list);
+
+	return 0;
+err:
+	kfree(desc);
+
+	return ret;
+}
+
+/*
+ * read_reg_dma:	prepares a descriptor to read a given number of
+ *			contiguous registers to the reg_read_buf pointer
+ *
+ * @first:		offset of the first register in the contiguous block
+ * @num_regs:		number of registers to read
+ */
+static int read_reg_dma(struct qcom_nand_controller *nandc, int first,
+			int num_regs)
+{
+	bool flow_control = false;
+	void *vaddr;
+	int size;
+
+	if (first == NAND_READ_ID || first == NAND_FLASH_STATUS)
+		flow_control = true;
+
+	size = num_regs * sizeof(u32);
+	vaddr = nandc->reg_read_buf + nandc->reg_read_pos;
+	nandc->reg_read_pos += num_regs;
+
+	return prep_dma_desc(nandc, true, first, vaddr, size, flow_control);
+}
+
+/*
+ * write_reg_dma:	prepares a descriptor to write a given number of
+ *			contiguous registers
+ *
+ * @first:		offset of the first register in the contiguous block
+ * @num_regs:		number of registers to write
+ */
+static int write_reg_dma(struct qcom_nand_controller *nandc, int first,
+			 int num_regs)
+{
+	bool flow_control = false;
+	struct nandc_regs *regs = nandc->regs;
+	void *vaddr;
+	int size;
+
+	vaddr = offset_to_nandc_reg(regs, first);
+
+	if (first == NAND_FLASH_CMD)
+		flow_control = true;
+
+	if (first == NAND_DEV_CMD1_RESTORE)
+		first = NAND_DEV_CMD1;
+
+	if (first == NAND_DEV_CMD_VLD_RESTORE)
+		first = NAND_DEV_CMD_VLD;
+
+	size = num_regs * sizeof(u32);
+
+	return prep_dma_desc(nandc, false, first, vaddr, size, flow_control);
+}
+
+/*
+ * read_data_dma:	prepares a DMA descriptor to transfer data from the
+ *			controller's internal buffer to the buffer 'vaddr'
+ *
+ * @reg_off:		offset within the controller's data buffer
+ * @vaddr:		virtual address of the buffer we want to write to
+ * @size:		DMA transaction size in bytes
+ */
+static int read_data_dma(struct qcom_nand_controller *nandc, int reg_off,
+			 const u8 *vaddr, int size)
+{
+	return prep_dma_desc(nandc, true, reg_off, vaddr, size, false);
+}
+
+/*
+ * write_data_dma:	prepares a DMA descriptor to transfer data from
+ *			'vaddr' to the controller's internal buffer
+ *
+ * @reg_off:		offset within the controller's data buffer
+ * @vaddr:		virtual address of the buffer we want to read from
+ * @size:		DMA transaction size in bytes
+ */
+static int write_data_dma(struct qcom_nand_controller *nandc, int reg_off,
+			  const u8 *vaddr, int size)
+{
+	return prep_dma_desc(nandc, false, reg_off, vaddr, size, false);
+}
+
+/*
+ * helper to prepare dma descriptors to configure registers needed for reading a
+ * codeword/step in a page
+ */
+static void config_cw_read(struct qcom_nand_controller *nandc)
+{
+	write_reg_dma(nandc, NAND_FLASH_CMD, 3);
+	write_reg_dma(nandc, NAND_DEV0_CFG0, 3);
+	write_reg_dma(nandc, NAND_EBI2_ECC_BUF_CFG, 1);
+
+	write_reg_dma(nandc, NAND_EXEC_CMD, 1);
+
+	read_reg_dma(nandc, NAND_FLASH_STATUS, 2);
+	read_reg_dma(nandc, NAND_ERASED_CW_DETECT_STATUS, 1);
+}
+
+/*
+ * helpers to prepare dma descriptors used to configure registers needed for
+ * writing a codeword/step in a page
+ */
+static void config_cw_write_pre(struct qcom_nand_controller *nandc)
+{
+	write_reg_dma(nandc, NAND_FLASH_CMD, 3);
+	write_reg_dma(nandc, NAND_DEV0_CFG0, 3);
+	write_reg_dma(nandc, NAND_EBI2_ECC_BUF_CFG, 1);
+}
+
+static void config_cw_write_post(struct qcom_nand_controller *nandc)
+{
+	write_reg_dma(nandc, NAND_EXEC_CMD, 1);
+
+	read_reg_dma(nandc, NAND_FLASH_STATUS, 1);
+
+	write_reg_dma(nandc, NAND_FLASH_STATUS, 1);
+	write_reg_dma(nandc, NAND_READ_STATUS, 1);
+}
+
+/*
+ * the following functions are used within chip->cmdfunc() to perform different
+ * NAND_CMD_* commands
+ */
+
+/* sets up descriptors for NAND_CMD_PARAM */
+static int nandc_param(struct qcom_nand_host *host)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	/*
+	 * NAND_CMD_PARAM is called before we know much about the FLASH chip
+	 * in use. we configure the controller to perform a raw read of 512
+	 * bytes to read onfi params
+	 */
+	nandc_set_reg(nandc, NAND_FLASH_CMD, PAGE_READ | PAGE_ACC | LAST_PAGE);
+	nandc_set_reg(nandc, NAND_ADDR0, 0);
+	nandc_set_reg(nandc, NAND_ADDR1, 0);
+	nandc_set_reg(nandc, NAND_DEV0_CFG0, 0 << CW_PER_PAGE
+					| 512 << UD_SIZE_BYTES
+					| 5 << NUM_ADDR_CYCLES
+					| 0 << SPARE_SIZE_BYTES);
+	nandc_set_reg(nandc, NAND_DEV0_CFG1, 7 << NAND_RECOVERY_CYCLES
+					| 0 << CS_ACTIVE_BSY
+					| 17 << BAD_BLOCK_BYTE_NUM
+					| 1 << BAD_BLOCK_IN_SPARE_AREA
+					| 2 << WR_RD_BSY_GAP
+					| 0 << WIDE_FLASH
+					| 1 << DEV0_CFG1_ECC_DISABLE);
+	nandc_set_reg(nandc, NAND_EBI2_ECC_BUF_CFG, 1 << ECC_CFG_ECC_DISABLE);
+
+	/* configure CMD1 and VLD for ONFI param probing */
+	nandc_set_reg(nandc, NAND_DEV_CMD_VLD,
+		      (nandc->vld & ~(1 << READ_START_VLD))
+		      | 0 << READ_START_VLD);
+	nandc_set_reg(nandc, NAND_DEV_CMD1,
+		      (nandc->cmd1 & ~(0xFF << READ_ADDR))
+		      | NAND_CMD_PARAM << READ_ADDR);
+
+	nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
+
+	nandc_set_reg(nandc, NAND_DEV_CMD1_RESTORE, nandc->cmd1);
+	nandc_set_reg(nandc, NAND_DEV_CMD_VLD_RESTORE, nandc->vld);
+
+	write_reg_dma(nandc, NAND_DEV_CMD_VLD, 1);
+	write_reg_dma(nandc, NAND_DEV_CMD1, 1);
+
+	nandc->buf_count = 512;
+	memset(nandc->data_buffer, 0xff, nandc->buf_count);
+
+	config_cw_read(nandc);
+
+	read_data_dma(nandc, FLASH_BUF_ACC, nandc->data_buffer,
+		      nandc->buf_count);
+
+	/* restore CMD1 and VLD regs */
+	write_reg_dma(nandc, NAND_DEV_CMD1_RESTORE, 1);
+	write_reg_dma(nandc, NAND_DEV_CMD_VLD_RESTORE, 1);
+
+	return 0;
+}
+
+/* sets up descriptors for NAND_CMD_ERASE1 */
+static int erase_block(struct qcom_nand_host *host, int page_addr)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	nandc_set_reg(nandc, NAND_FLASH_CMD,
+		      BLOCK_ERASE | PAGE_ACC | LAST_PAGE);
+	nandc_set_reg(nandc, NAND_ADDR0, page_addr);
+	nandc_set_reg(nandc, NAND_ADDR1, 0);
+	nandc_set_reg(nandc, NAND_DEV0_CFG0,
+		      host->cfg0_raw & ~(7 << CW_PER_PAGE));
+	nandc_set_reg(nandc, NAND_DEV0_CFG1, host->cfg1_raw);
+	nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
+	nandc_set_reg(nandc, NAND_FLASH_STATUS, host->clrflashstatus);
+	nandc_set_reg(nandc, NAND_READ_STATUS, host->clrreadstatus);
+
+	write_reg_dma(nandc, NAND_FLASH_CMD, 3);
+	write_reg_dma(nandc, NAND_DEV0_CFG0, 2);
+	write_reg_dma(nandc, NAND_EXEC_CMD, 1);
+
+	read_reg_dma(nandc, NAND_FLASH_STATUS, 1);
+
+	write_reg_dma(nandc, NAND_FLASH_STATUS, 1);
+	write_reg_dma(nandc, NAND_READ_STATUS, 1);
+
+	return 0;
+}
+
+/* sets up descriptors for NAND_CMD_READID */
+static int read_id(struct qcom_nand_host *host, int column)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	if (column == -1)
+		return 0;
+
+	nandc_set_reg(nandc, NAND_FLASH_CMD, FETCH_ID);
+	nandc_set_reg(nandc, NAND_ADDR0, column);
+	nandc_set_reg(nandc, NAND_ADDR1, 0);
+	nandc_set_reg(nandc, NAND_FLASH_CHIP_SELECT, DM_EN);
+	nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
+
+	write_reg_dma(nandc, NAND_FLASH_CMD, 4);
+	write_reg_dma(nandc, NAND_EXEC_CMD, 1);
+
+	read_reg_dma(nandc, NAND_READ_ID, 1);
+
+	return 0;
+}
+
+/* sets up descriptors for NAND_CMD_RESET */
+static int reset(struct qcom_nand_host *host)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	nandc_set_reg(nandc, NAND_FLASH_CMD, RESET_DEVICE);
+	nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
+
+	write_reg_dma(nandc, NAND_FLASH_CMD, 1);
+	write_reg_dma(nandc, NAND_EXEC_CMD, 1);
+
+	read_reg_dma(nandc, NAND_FLASH_STATUS, 1);
+
+	return 0;
+}
+
+/* helpers to submit/free our list of dma descriptors */
+static int submit_descs(struct qcom_nand_controller *nandc)
+{
+	struct desc_info *desc;
+	dma_cookie_t cookie = 0;
+
+	list_for_each_entry(desc, &nandc->desc_list, node)
+		cookie = dmaengine_submit(desc->dma_desc);
+
+	if (dma_sync_wait(nandc->chan, cookie) != DMA_COMPLETE)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static void free_descs(struct qcom_nand_controller *nandc)
+{
+	struct desc_info *desc, *n;
+
+	list_for_each_entry_safe(desc, n, &nandc->desc_list, node) {
+		list_del(&desc->node);
+		dma_unmap_sg(nandc->dev, &desc->sgl, 1, desc->dir);
+		kfree(desc);
+	}
+}
+
+/* reset the register read buffer for next NAND operation */
+static void clear_read_regs(struct qcom_nand_controller *nandc)
+{
+	nandc->reg_read_pos = 0;
+	memset(nandc->reg_read_buf, 0,
+	       MAX_REG_RD * sizeof(*nandc->reg_read_buf));
+}
+
+static void pre_command(struct qcom_nand_host *host, int command)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	nandc->buf_count = 0;
+	nandc->buf_start = 0;
+	host->use_ecc = false;
+	host->last_command = command;
+
+	clear_read_regs(nandc);
+}
+
+/*
+ * this is called after NAND_CMD_PAGEPROG and NAND_CMD_ERASE1 to set our
+ * privately maintained status byte, this status byte can be read after
+ * NAND_CMD_STATUS is called
+ */
+static void parse_erase_write_errors(struct qcom_nand_host *host, int command)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int num_cw;
+	int i;
+
+	num_cw = command == NAND_CMD_PAGEPROG ? ecc->steps : 1;
+
+	for (i = 0; i < num_cw; i++) {
+		u32 flash_status = le32_to_cpu(nandc->reg_read_buf[i]);
+
+		if (flash_status & FS_MPU_ERR)
+			host->status &= ~NAND_STATUS_WP;
+
+		if (flash_status & FS_OP_ERR || (i == (num_cw - 1) &&
+						 (flash_status &
+						  FS_DEVICE_STS_ERR)))
+			host->status |= NAND_STATUS_FAIL;
+	}
+}
+
+static void post_command(struct qcom_nand_host *host, int command)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	switch (command) {
+	case NAND_CMD_READID:
+		memcpy(nandc->data_buffer, nandc->reg_read_buf,
+		       nandc->buf_count);
+		break;
+	case NAND_CMD_PAGEPROG:
+	case NAND_CMD_ERASE1:
+		parse_erase_write_errors(host, command);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Implements chip->cmdfunc. It's  only used for a limited set of commands.
+ * The rest of the commands wouldn't be called by upper layers. For example,
+ * NAND_CMD_READOOB would never be called because we have our own versions
+ * of read_oob ops for nand_ecc_ctrl.
+ */
+static void qcom_nandc_command(struct mtd_info *mtd, unsigned int command,
+			       int column, int page_addr)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	bool wait = false;
+	int ret = 0;
+
+	pre_command(host, command);
+
+	switch (command) {
+	case NAND_CMD_RESET:
+		ret = reset(host);
+		wait = true;
+		break;
+
+	case NAND_CMD_READID:
+		nandc->buf_count = 4;
+		ret = read_id(host, column);
+		wait = true;
+		break;
+
+	case NAND_CMD_PARAM:
+		ret = nandc_param(host);
+		wait = true;
+		break;
+
+	case NAND_CMD_ERASE1:
+		ret = erase_block(host, page_addr);
+		wait = true;
+		break;
+
+	case NAND_CMD_READ0:
+		/* we read the entire page for now */
+		WARN_ON(column != 0);
+
+		host->use_ecc = true;
+		set_address(host, 0, page_addr);
+		update_rw_regs(host, ecc->steps, true);
+		break;
+
+	case NAND_CMD_SEQIN:
+		WARN_ON(column != 0);
+		set_address(host, 0, page_addr);
+		break;
+
+	case NAND_CMD_PAGEPROG:
+	case NAND_CMD_STATUS:
+	case NAND_CMD_NONE:
+	default:
+		break;
+	}
+
+	if (ret) {
+		dev_err(nandc->dev, "failure executing command %d\n",
+			command);
+		free_descs(nandc);
+		return;
+	}
+
+	if (wait) {
+		ret = submit_descs(nandc);
+		if (ret)
+			dev_err(nandc->dev,
+				"failure submitting descs for command %d\n",
+				command);
+	}
+
+	free_descs(nandc);
+
+	post_command(host, command);
+}
+
+/*
+ * when using BCH ECC, the HW flags an error in NAND_FLASH_STATUS if it read
+ * an erased CW, and reports an erased CW in NAND_ERASED_CW_DETECT_STATUS.
+ *
+ * when using RS ECC, the HW reports the same erros when reading an erased CW,
+ * but it notifies that it is an erased CW by placing special characters at
+ * certain offsets in the buffer.
+ *
+ * verify if the page is erased or not, and fix up the page for RS ECC by
+ * replacing the special characters with 0xff.
+ */
+static bool erased_chunk_check_and_fixup(u8 *data_buf, int data_len)
+{
+	u8 empty1, empty2;
+
+	/*
+	 * an erased page flags an error in NAND_FLASH_STATUS, check if the page
+	 * is erased by looking for 0x54s at offsets 3 and 175 from the
+	 * beginning of each codeword
+	 */
+
+	empty1 = data_buf[3];
+	empty2 = data_buf[175];
+
+	/*
+	 * if the erased codework markers, if they exist override them with
+	 * 0xffs
+	 */
+	if ((empty1 == 0x54 && empty2 == 0xff) ||
+	    (empty1 == 0xff && empty2 == 0x54)) {
+		data_buf[3] = 0xff;
+		data_buf[175] = 0xff;
+	}
+
+	/*
+	 * check if the entire chunk contains 0xffs or not. if it doesn't, then
+	 * restore the original values at the special offsets
+	 */
+	if (memchr_inv(data_buf, 0xff, data_len)) {
+		data_buf[3] = empty1;
+		data_buf[175] = empty2;
+
+		return false;
+	}
+
+	return true;
+}
+
+struct read_stats {
+	__le32 flash;
+	__le32 buffer;
+	__le32 erased_cw;
+};
+
+/*
+ * reads back status registers set by the controller to notify page read
+ * errors. this is equivalent to what 'ecc->correct()' would do.
+ */
+static int parse_read_errors(struct qcom_nand_host *host, u8 *data_buf,
+			     u8 *oob_buf)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	unsigned int max_bitflips = 0;
+	struct read_stats *buf;
+	int i;
+
+	buf = (struct read_stats *)nandc->reg_read_buf;
+
+	for (i = 0; i < ecc->steps; i++, buf++) {
+		u32 flash, buffer, erased_cw;
+		int data_len, oob_len;
+
+		if (i == (ecc->steps - 1)) {
+			data_len = ecc->size - ((ecc->steps - 1) << 2);
+			oob_len = ecc->steps << 2;
+		} else {
+			data_len = host->cw_data;
+			oob_len = 0;
+		}
+
+		flash = le32_to_cpu(buf->flash);
+		buffer = le32_to_cpu(buf->buffer);
+		erased_cw = le32_to_cpu(buf->erased_cw);
+
+		if (flash & (FS_OP_ERR | FS_MPU_ERR)) {
+			bool erased;
+
+			/* ignore erased codeword errors */
+			if (host->bch_enabled) {
+				erased = (erased_cw & ERASED_CW) == ERASED_CW ?
+					 true : false;
+			} else {
+				erased = erased_chunk_check_and_fixup(data_buf,
+								      data_len);
+			}
+
+			if (erased) {
+				data_buf += data_len;
+				if (oob_buf)
+					oob_buf += oob_len + ecc->bytes;
+				continue;
+			}
+
+			if (buffer & BS_UNCORRECTABLE_BIT) {
+				int ret, ecclen, extraooblen;
+				void *eccbuf;
+
+				eccbuf = oob_buf ? oob_buf + oob_len : NULL;
+				ecclen = oob_buf ? host->ecc_bytes_hw : 0;
+				extraooblen = oob_buf ? oob_len : 0;
+
+				/*
+				 * make sure it isn't an erased page reported
+				 * as not-erased by HW because of a few bitflips
+				 */
+				ret = nand_check_erased_ecc_chunk(data_buf,
+					data_len, eccbuf, ecclen, oob_buf,
+					extraooblen, ecc->strength);
+				if (ret < 0) {
+					mtd->ecc_stats.failed++;
+				} else {
+					mtd->ecc_stats.corrected += ret;
+					max_bitflips =
+						max_t(unsigned int, max_bitflips, ret);
+				}
+			}
+		} else {
+			unsigned int stat;
+
+			stat = buffer & BS_CORRECTABLE_ERR_MSK;
+			mtd->ecc_stats.corrected += stat;
+			max_bitflips = max(max_bitflips, stat);
+		}
+
+		data_buf += data_len;
+		if (oob_buf)
+			oob_buf += oob_len + ecc->bytes;
+	}
+
+	return max_bitflips;
+}
+
+/*
+ * helper to perform the actual page read operation, used by ecc->read_page(),
+ * ecc->read_oob()
+ */
+static int read_page_ecc(struct qcom_nand_host *host, u8 *data_buf,
+			 u8 *oob_buf)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int i, ret;
+
+	/* queue cmd descs for each codeword */
+	for (i = 0; i < ecc->steps; i++) {
+		int data_size, oob_size;
+
+		if (i == (ecc->steps - 1)) {
+			data_size = ecc->size - ((ecc->steps - 1) << 2);
+			oob_size = (ecc->steps << 2) + host->ecc_bytes_hw +
+				   host->spare_bytes;
+		} else {
+			data_size = host->cw_data;
+			oob_size = host->ecc_bytes_hw + host->spare_bytes;
+		}
+
+		config_cw_read(nandc);
+
+		if (data_buf)
+			read_data_dma(nandc, FLASH_BUF_ACC, data_buf,
+				      data_size);
+
+		/*
+		 * when ecc is enabled, the controller doesn't read the real
+		 * or dummy bad block markers in each chunk. To maintain a
+		 * consistent layout across RAW and ECC reads, we just
+		 * leave the real/dummy BBM offsets empty (i.e, filled with
+		 * 0xffs)
+		 */
+		if (oob_buf) {
+			int j;
+
+			for (j = 0; j < host->bbm_size; j++)
+				*oob_buf++ = 0xff;
+
+			read_data_dma(nandc, FLASH_BUF_ACC + data_size,
+				      oob_buf, oob_size);
+		}
+
+		if (data_buf)
+			data_buf += data_size;
+		if (oob_buf)
+			oob_buf += oob_size;
+	}
+
+	ret = submit_descs(nandc);
+	if (ret)
+		dev_err(nandc->dev, "failure to read page/oob\n");
+
+	free_descs(nandc);
+
+	return ret;
+}
+
+/*
+ * a helper that copies the last step/codeword of a page (containing free oob)
+ * into our local buffer
+ */
+static int copy_last_cw(struct qcom_nand_host *host, int page)
+{
+	struct nand_chip *chip = &host->chip;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int size;
+	int ret;
+
+	clear_read_regs(nandc);
+
+	size = host->use_ecc ? host->cw_data : host->cw_size;
+
+	/* prepare a clean read buffer */
+	memset(nandc->data_buffer, 0xff, size);
+
+	set_address(host, host->cw_size * (ecc->steps - 1), page);
+	update_rw_regs(host, 1, true);
+
+	config_cw_read(nandc);
+
+	read_data_dma(nandc, FLASH_BUF_ACC, nandc->data_buffer, size);
+
+	ret = submit_descs(nandc);
+	if (ret)
+		dev_err(nandc->dev, "failed to copy last codeword\n");
+
+	free_descs(nandc);
+
+	return ret;
+}
+
+/* implements ecc->read_page() */
+static int qcom_nandc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf, int oob_required, int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	u8 *data_buf, *oob_buf = NULL;
+	int ret;
+
+	data_buf = buf;
+	oob_buf = oob_required ? chip->oob_poi : NULL;
+
+	ret = read_page_ecc(host, data_buf, oob_buf);
+	if (ret) {
+		dev_err(nandc->dev, "failure to read page\n");
+		return ret;
+	}
+
+	return parse_read_errors(host, data_buf, oob_buf);
+}
+
+/* implements ecc->read_page_raw() */
+static int qcom_nandc_read_page_raw(struct mtd_info *mtd,
+				    struct nand_chip *chip, uint8_t *buf,
+				    int oob_required, int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	u8 *data_buf, *oob_buf;
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int i, ret;
+
+	data_buf = buf;
+	oob_buf = chip->oob_poi;
+
+	host->use_ecc = false;
+	update_rw_regs(host, ecc->steps, true);
+
+	for (i = 0; i < ecc->steps; i++) {
+		int data_size1, data_size2, oob_size1, oob_size2;
+		int reg_off = FLASH_BUF_ACC;
+
+		data_size1 = mtd->writesize - host->cw_size * (ecc->steps - 1);
+		oob_size1 = host->bbm_size;
+
+		if (i == (ecc->steps - 1)) {
+			data_size2 = ecc->size - data_size1 -
+				     ((ecc->steps - 1) << 2);
+			oob_size2 = (ecc->steps << 2) + host->ecc_bytes_hw +
+				    host->spare_bytes;
+		} else {
+			data_size2 = host->cw_data - data_size1;
+			oob_size2 = host->ecc_bytes_hw + host->spare_bytes;
+		}
+
+		config_cw_read(nandc);
+
+		read_data_dma(nandc, reg_off, data_buf, data_size1);
+		reg_off += data_size1;
+		data_buf += data_size1;
+
+		read_data_dma(nandc, reg_off, oob_buf, oob_size1);
+		reg_off += oob_size1;
+		oob_buf += oob_size1;
+
+		read_data_dma(nandc, reg_off, data_buf, data_size2);
+		reg_off += data_size2;
+		data_buf += data_size2;
+
+		read_data_dma(nandc, reg_off, oob_buf, oob_size2);
+		oob_buf += oob_size2;
+	}
+
+	ret = submit_descs(nandc);
+	if (ret)
+		dev_err(nandc->dev, "failure to read raw page\n");
+
+	free_descs(nandc);
+
+	return 0;
+}
+
+/* implements ecc->read_oob() */
+static int qcom_nandc_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
+			       int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int ret;
+
+	clear_read_regs(nandc);
+
+	host->use_ecc = true;
+	set_address(host, 0, page);
+	update_rw_regs(host, ecc->steps, true);
+
+	ret = read_page_ecc(host, NULL, chip->oob_poi);
+	if (ret)
+		dev_err(nandc->dev, "failure to read oob\n");
+
+	return ret;
+}
+
+/* implements ecc->write_page() */
+static int qcom_nandc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
+				 const uint8_t *buf, int oob_required, int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	u8 *data_buf, *oob_buf;
+	int i, ret;
+
+	clear_read_regs(nandc);
+
+	data_buf = (u8 *)buf;
+	oob_buf = chip->oob_poi;
+
+	host->use_ecc = true;
+	update_rw_regs(host, ecc->steps, false);
+
+	for (i = 0; i < ecc->steps; i++) {
+		int data_size, oob_size;
+
+		if (i == (ecc->steps - 1)) {
+			data_size = ecc->size - ((ecc->steps - 1) << 2);
+			oob_size = (ecc->steps << 2) + host->ecc_bytes_hw +
+				   host->spare_bytes;
+		} else {
+			data_size = host->cw_data;
+			oob_size = ecc->bytes;
+		}
+
+		config_cw_write_pre(nandc);
+
+		write_data_dma(nandc, FLASH_BUF_ACC, data_buf, data_size);
+
+		/*
+		 * when ECC is enabled, we don't really need to write anything
+		 * to oob for the first n - 1 codewords since these oob regions
+		 * just contain ECC bytes that's written by the controller
+		 * itself. For the last codeword, we skip the bbm positions and
+		 * write to the free oob area.
+		 */
+		if (i == (ecc->steps - 1)) {
+			oob_buf += host->bbm_size;
+
+			write_data_dma(nandc, FLASH_BUF_ACC + data_size,
+				       oob_buf, oob_size);
+		}
+
+		config_cw_write_post(nandc);
+
+		data_buf += data_size;
+		oob_buf += oob_size;
+	}
+
+	ret = submit_descs(nandc);
+	if (ret)
+		dev_err(nandc->dev, "failure to write page\n");
+
+	free_descs(nandc);
+
+	return ret;
+}
+
+/* implements ecc->write_page_raw() */
+static int qcom_nandc_write_page_raw(struct mtd_info *mtd,
+				     struct nand_chip *chip, const uint8_t *buf,
+				     int oob_required, int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	u8 *data_buf, *oob_buf;
+	int i, ret;
+
+	clear_read_regs(nandc);
+
+	data_buf = (u8 *)buf;
+	oob_buf = chip->oob_poi;
+
+	host->use_ecc = false;
+	update_rw_regs(host, ecc->steps, false);
+
+	for (i = 0; i < ecc->steps; i++) {
+		int data_size1, data_size2, oob_size1, oob_size2;
+		int reg_off = FLASH_BUF_ACC;
+
+		data_size1 = mtd->writesize - host->cw_size * (ecc->steps - 1);
+		oob_size1 = host->bbm_size;
+
+		if (i == (ecc->steps - 1)) {
+			data_size2 = ecc->size - data_size1 -
+				     ((ecc->steps - 1) << 2);
+			oob_size2 = (ecc->steps << 2) + host->ecc_bytes_hw +
+				    host->spare_bytes;
+		} else {
+			data_size2 = host->cw_data - data_size1;
+			oob_size2 = host->ecc_bytes_hw + host->spare_bytes;
+		}
+
+		config_cw_write_pre(nandc);
+
+		write_data_dma(nandc, reg_off, data_buf, data_size1);
+		reg_off += data_size1;
+		data_buf += data_size1;
+
+		write_data_dma(nandc, reg_off, oob_buf, oob_size1);
+		reg_off += oob_size1;
+		oob_buf += oob_size1;
+
+		write_data_dma(nandc, reg_off, data_buf, data_size2);
+		reg_off += data_size2;
+		data_buf += data_size2;
+
+		write_data_dma(nandc, reg_off, oob_buf, oob_size2);
+		oob_buf += oob_size2;
+
+		config_cw_write_post(nandc);
+	}
+
+	ret = submit_descs(nandc);
+	if (ret)
+		dev_err(nandc->dev, "failure to write raw page\n");
+
+	free_descs(nandc);
+
+	return ret;
+}
+
+/*
+ * implements ecc->write_oob()
+ *
+ * the NAND controller cannot write only data or only oob within a codeword,
+ * since ecc is calculated for the combined codeword. we first copy the
+ * entire contents for the last codeword(data + oob), replace the old oob
+ * with the new one in chip->oob_poi, and then write the entire codeword.
+ * this read-copy-write operation results in a slight performance loss.
+ */
+static int qcom_nandc_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
+				int page)
+{
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	u8 *oob = chip->oob_poi;
+	int free_boff;
+	int data_size, oob_size;
+	int ret, status = 0;
+
+	host->use_ecc = true;
+
+	ret = copy_last_cw(host, page);
+	if (ret)
+		return ret;
+
+	clear_read_regs(nandc);
+
+	/* calculate the data and oob size for the last codeword/step */
+	data_size = ecc->size - ((ecc->steps - 1) << 2);
+	oob_size = ecc->steps << 2;
+
+	free_boff = ecc->layout->oobfree[0].offset;
+
+	/* override new oob content to last codeword */
+	memcpy(nandc->data_buffer + data_size, oob + free_boff, oob_size);
+
+	set_address(host, host->cw_size * (ecc->steps - 1), page);
+	update_rw_regs(host, 1, false);
+
+	config_cw_write_pre(nandc);
+	write_data_dma(nandc, FLASH_BUF_ACC, nandc->data_buffer,
+		       data_size + oob_size);
+	config_cw_write_post(nandc);
+
+	ret = submit_descs(nandc);
+
+	free_descs(nandc);
+
+	if (ret) {
+		dev_err(nandc->dev, "failure to write oob\n");
+		return -EIO;
+	}
+
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+
+	status = chip->waitfunc(mtd, chip);
+
+	return status & NAND_STATUS_FAIL ? -EIO : 0;
+}
+
+static int qcom_nandc_block_bad(struct mtd_info *mtd, loff_t ofs)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int page, ret, bbpos, bad = 0;
+	u32 flash_status;
+
+	page = (int)(ofs >> chip->page_shift) & chip->pagemask;
+
+	/*
+	 * configure registers for a raw sub page read, the address is set to
+	 * the beginning of the last codeword, we don't care about reading ecc
+	 * portion of oob. we just want the first few bytes from this codeword
+	 * that contains the BBM
+	 */
+	host->use_ecc = false;
+
+	ret = copy_last_cw(host, page);
+	if (ret)
+		goto err;
+
+	flash_status = le32_to_cpu(nandc->reg_read_buf[0]);
+
+	if (flash_status & (FS_OP_ERR | FS_MPU_ERR)) {
+		dev_warn(nandc->dev, "error when trying to read BBM\n");
+		goto err;
+	}
+
+	bbpos = mtd->writesize - host->cw_size * (ecc->steps - 1);
+
+	bad = nandc->data_buffer[bbpos] != 0xff;
+
+	if (chip->options & NAND_BUSWIDTH_16)
+		bad = bad || (nandc->data_buffer[bbpos + 1] != 0xff);
+err:
+	return bad;
+}
+
+static int qcom_nandc_block_markbad(struct mtd_info *mtd, loff_t ofs)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	int page, ret, status = 0;
+
+	clear_read_regs(nandc);
+
+	/*
+	 * to mark the BBM as bad, we flash the entire last codeword with 0s.
+	 * we don't care about the rest of the content in the codeword since
+	 * we aren't going to use this block again
+	 */
+	memset(nandc->data_buffer, 0x00, host->cw_size);
+
+	page = (int)(ofs >> chip->page_shift) & chip->pagemask;
+
+	/* prepare write */
+	host->use_ecc = false;
+	set_address(host, host->cw_size * (ecc->steps - 1), page);
+	update_rw_regs(host, 1, false);
+
+	config_cw_write_pre(nandc);
+	write_data_dma(nandc, FLASH_BUF_ACC, nandc->data_buffer, host->cw_size);
+	config_cw_write_post(nandc);
+
+	ret = submit_descs(nandc);
+
+	free_descs(nandc);
+
+	if (ret) {
+		dev_err(nandc->dev, "failure to update BBM\n");
+		return -EIO;
+	}
+
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+
+	status = chip->waitfunc(mtd, chip);
+
+	return status & NAND_STATUS_FAIL ? -EIO : 0;
+}
+
+/*
+ * the three functions below implement chip->read_byte(), chip->read_buf()
+ * and chip->write_buf() respectively. these aren't used for
+ * reading/writing page data, they are used for smaller data like reading
+ * id, status etc
+ */
+static uint8_t qcom_nandc_read_byte(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_host *host = to_qcom_nand_host(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	u8 *buf = nandc->data_buffer;
+	u8 ret = 0x0;
+
+	if (host->last_command == NAND_CMD_STATUS) {
+		ret = host->status;
+
+		host->status = NAND_STATUS_READY | NAND_STATUS_WP;
+
+		return ret;
+	}
+
+	if (nandc->buf_start < nandc->buf_count)
+		ret = buf[nandc->buf_start++];
+
+	return ret;
+}
+
+static void qcom_nandc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	int real_len = min_t(size_t, len, nandc->buf_count - nandc->buf_start);
+
+	memcpy(buf, nandc->data_buffer + nandc->buf_start, real_len);
+	nandc->buf_start += real_len;
+}
+
+static void qcom_nandc_write_buf(struct mtd_info *mtd, const uint8_t *buf,
+				 int len)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	int real_len = min_t(size_t, len, nandc->buf_count - nandc->buf_start);
+
+	memcpy(nandc->data_buffer + nandc->buf_start, buf, real_len);
+
+	nandc->buf_start += real_len;
+}
+
+/* we support only one external chip for now */
+static void qcom_nandc_select_chip(struct mtd_info *mtd, int chipnr)
+{
+	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+
+	if (chipnr <= 0)
+		return;
+
+	dev_warn(nandc->dev, "invalid chip select\n");
+}
+
+/*
+ * NAND controller page layout info
+ *
+ * Layout with ECC enabled:
+ *
+ * |----------------------|  |---------------------------------|
+ * |           xx.......yy|  |             *********xx.......yy|
+ * |    DATA   xx..ECC..yy|  |    DATA     **SPARE**xx..ECC..yy|
+ * |   (516)   xx.......yy|  |  (516-n*4)  **(n*4)**xx.......yy|
+ * |           xx.......yy|  |             *********xx.......yy|
+ * |----------------------|  |---------------------------------|
+ *     codeword 1,2..n-1                  codeword n
+ *  <---(528/532 Bytes)-->    <-------(528/532 Bytes)--------->
+ *
+ * n = Number of codewords in the page
+ * . = ECC bytes
+ * * = Spare/free bytes
+ * x = Unused byte(s)
+ * y = Reserved byte(s)
+ *
+ * 2K page: n = 4, spare = 16 bytes
+ * 4K page: n = 8, spare = 32 bytes
+ * 8K page: n = 16, spare = 64 bytes
+ *
+ * the qcom nand controller operates at a sub page/codeword level. each
+ * codeword is 528 and 532 bytes for 4 bit and 8 bit ECC modes respectively.
+ * the number of ECC bytes vary based on the ECC strength and the bus width.
+ *
+ * the first n - 1 codewords contains 516 bytes of user data, the remaining
+ * 12/16 bytes consist of ECC and reserved data. The nth codeword contains
+ * both user data and spare(oobavail) bytes that sum up to 516 bytes.
+ *
+ * When we access a page with ECC enabled, the reserved bytes(s) are not
+ * accessible at all. When reading, we fill up these unreadable positions
+ * with 0xffs. When writing, the controller skips writing the inaccessible
+ * bytes.
+ *
+ * Layout with ECC disabled:
+ *
+ * |------------------------------|  |---------------------------------------|
+ * |         yy          xx.......|  |         bb          *********xx.......|
+ * |  DATA1  yy  DATA2   xx..ECC..|  |  DATA1  bb  DATA2   **SPARE**xx..ECC..|
+ * | (size1) yy (size2)  xx.......|  | (size1) bb (size2)  **(n*4)**xx.......|
+ * |         yy          xx.......|  |         bb          *********xx.......|
+ * |------------------------------|  |---------------------------------------|
+ *         codeword 1,2..n-1                        codeword n
+ *  <-------(528/532 Bytes)------>    <-----------(528/532 Bytes)----------->
+ *
+ * n = Number of codewords in the page
+ * . = ECC bytes
+ * * = Spare/free bytes
+ * x = Unused byte(s)
+ * y = Dummy Bad Bock byte(s)
+ * b = Real Bad Block byte(s)
+ * size1/size2 = function of codeword size and 'n'
+ *
+ * when the ECC block is disabled, one reserved byte (or two for 16 bit bus
+ * width) is now accessible. For the first n - 1 codewords, these are dummy Bad
+ * Block Markers. In the last codeword, this position contains the real BBM
+ *
+ * In order to have a consistent layout between RAW and ECC modes, we assume
+ * the following OOB layout arrangement:
+ *
+ * |-----------|  |--------------------|
+ * |yyxx.......|  |bb*********xx.......|
+ * |yyxx..ECC..|  |bb*FREEOOB*xx..ECC..|
+ * |yyxx.......|  |bb*********xx.......|
+ * |yyxx.......|  |bb*********xx.......|
+ * |-----------|  |--------------------|
+ *  first n - 1       nth OOB region
+ *  OOB regions
+ *
+ * n = Number of codewords in the page
+ * . = ECC bytes
+ * * = FREE OOB bytes
+ * y = Dummy bad block byte(s) (inaccessible when ECC enabled)
+ * x = Unused byte(s)
+ * b = Real bad block byte(s) (inaccessible when ECC enabled)
+ *
+ * This layout is read as is when ECC is disabled. When ECC is enabled, the
+ * inaccessible Bad Block byte(s) are ignored when we write to a page/oob,
+ * and assumed as 0xffs when we read a page/oob. The ECC, unused and
+ * dummy/real bad block bytes are grouped as ecc bytes in nand_ecclayout (i.e,
+ * ecc->bytes is the sum of the three).
+ */
+
+static struct nand_ecclayout *
+qcom_nand_create_layout(struct qcom_nand_host *host)
+{
+	struct nand_chip *chip = &host->chip;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct nand_ecclayout *layout;
+	int i, j, steps, pos = 0, shift = 0;
+
+	layout = devm_kzalloc(nandc->dev, sizeof(*layout), GFP_KERNEL);
+	if (!layout)
+		return NULL;
+
+	steps = mtd->writesize / ecc->size;
+	layout->eccbytes = steps * ecc->bytes;
+
+	layout->oobfree[0].offset = (steps - 1) * ecc->bytes + host->bbm_size;
+	layout->oobfree[0].length = steps << 2;
+
+	/*
+	 * the oob bytes in the first n - 1 codewords are all grouped together
+	 * in the format:
+	 * DUMMY_BBM + UNUSED + ECC
+	 */
+	for (i = 0; i < steps - 1; i++) {
+		for (j = 0; j < ecc->bytes; j++)
+			layout->eccpos[pos++] = i * ecc->bytes + j;
+	}
+
+	/*
+	 * the oob bytes in the last codeword are grouped in the format:
+	 * BBM + FREE OOB + UNUSED + ECC
+	 */
+
+	/* fill up the bbm positions */
+	for (j = 0; j < host->bbm_size; j++)
+		layout->eccpos[pos++] = i * ecc->bytes + j;
+
+	/*
+	 * fill up the ecc and reserved positions, their indices are offseted
+	 * by the free oob region
+	 */
+	shift = layout->oobfree[0].length + host->bbm_size;
+
+	for (j = 0; j < (host->ecc_bytes_hw + host->spare_bytes); j++)
+		layout->eccpos[pos++] = i * ecc->bytes + shift + j;
+
+	return layout;
+}
+
+static int qcom_nand_host_setup(struct qcom_nand_host *host)
+{
+	struct nand_chip *chip = &host->chip;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
+	int cwperpage, bad_block_byte;
+	bool wide_bus;
+	int ecc_mode = 1;
+
+	/*
+	 * the controller requires each step consists of 512 bytes of data.
+	 * bail out if DT has populated a wrong step size.
+	 */
+	if (ecc->size != NANDC_STEP_SIZE) {
+		dev_err(nandc->dev, "invalid ecc size\n");
+		return -EINVAL;
+	}
+
+	wide_bus = chip->options & NAND_BUSWIDTH_16 ? true : false;
+
+	if (ecc->strength >= 8) {
+		/* 8 bit ECC defaults to BCH ECC on all platforms */
+		host->bch_enabled = true;
+		ecc_mode = 1;
+
+		if (wide_bus) {
+			host->ecc_bytes_hw = 14;
+			host->spare_bytes = 0;
+			host->bbm_size = 2;
+		} else {
+			host->ecc_bytes_hw = 13;
+			host->spare_bytes = 2;
+			host->bbm_size = 1;
+		}
+	} else {
+		/*
+		 * if the controller supports BCH for 4 bit ECC, the controller
+		 * uses lesser bytes for ECC. If RS is used, the ECC bytes is
+		 * always 10 bytes
+		 */
+		if (nandc->ecc_modes & ECC_BCH_4BIT) {
+			/* BCH */
+			host->bch_enabled = true;
+			ecc_mode = 0;
+
+			if (wide_bus) {
+				host->ecc_bytes_hw = 8;
+				host->spare_bytes = 2;
+				host->bbm_size = 2;
+			} else {
+				host->ecc_bytes_hw = 7;
+				host->spare_bytes = 4;
+				host->bbm_size = 1;
+			}
+		} else {
+			/* RS */
+			host->ecc_bytes_hw = 10;
+
+			if (wide_bus) {
+				host->spare_bytes = 0;
+				host->bbm_size = 2;
+			} else {
+				host->spare_bytes = 1;
+				host->bbm_size = 1;
+			}
+		}
+	}
+
+	/*
+	 * we consider ecc->bytes as the sum of all the non-data content in a
+	 * step. It gives us a clean representation of the oob area (even if
+	 * all the bytes aren't used for ECC).It is always 16 bytes for 8 bit
+	 * ECC and 12 bytes for 4 bit ECC
+	 */
+	ecc->bytes = host->ecc_bytes_hw + host->spare_bytes + host->bbm_size;
+
+	ecc->read_page		= qcom_nandc_read_page;
+	ecc->read_page_raw	= qcom_nandc_read_page_raw;
+	ecc->read_oob		= qcom_nandc_read_oob;
+	ecc->write_page		= qcom_nandc_write_page;
+	ecc->write_page_raw	= qcom_nandc_write_page_raw;
+	ecc->write_oob		= qcom_nandc_write_oob;
+
+	ecc->mode = NAND_ECC_HW;
+
+	ecc->layout = qcom_nand_create_layout(host);
+	if (!ecc->layout)
+		return -ENOMEM;
+
+	cwperpage = mtd->writesize / ecc->size;
+
+	/*
+	 * DATA_UD_BYTES varies based on whether the read/write command protects
+	 * spare data with ECC too. We protect spare data by default, so we set
+	 * it to main + spare data, which are 512 and 4 bytes respectively.
+	 */
+	host->cw_data = 516;
+
+	/*
+	 * total bytes in a step, either 528 bytes for 4 bit ECC, or 532 bytes
+	 * for 8 bit ECC
+	 */
+	host->cw_size = host->cw_data + ecc->bytes;
+
+	if (ecc->bytes * (mtd->writesize / ecc->size) > mtd->oobsize) {
+		dev_err(nandc->dev, "ecc data doesn't fit in OOB area\n");
+		return -EINVAL;
+	}
+
+	bad_block_byte = mtd->writesize - host->cw_size * (cwperpage - 1) + 1;
+
+	host->cfg0 = (cwperpage - 1) << CW_PER_PAGE
+				| host->cw_data << UD_SIZE_BYTES
+				| 0 << DISABLE_STATUS_AFTER_WRITE
+				| 5 << NUM_ADDR_CYCLES
+				| host->ecc_bytes_hw << ECC_PARITY_SIZE_BYTES_RS
+				| 0 << STATUS_BFR_READ
+				| 1 << SET_RD_MODE_AFTER_STATUS
+				| host->spare_bytes << SPARE_SIZE_BYTES;
+
+	host->cfg1 = 7 << NAND_RECOVERY_CYCLES
+				| 0 <<  CS_ACTIVE_BSY
+				| bad_block_byte << BAD_BLOCK_BYTE_NUM
+				| 0 << BAD_BLOCK_IN_SPARE_AREA
+				| 2 << WR_RD_BSY_GAP
+				| wide_bus << WIDE_FLASH
+				| host->bch_enabled << ENABLE_BCH_ECC;
+
+	host->cfg0_raw = (cwperpage - 1) << CW_PER_PAGE
+				| host->cw_size << UD_SIZE_BYTES
+				| 5 << NUM_ADDR_CYCLES
+				| 0 << SPARE_SIZE_BYTES;
+
+	host->cfg1_raw = 7 << NAND_RECOVERY_CYCLES
+				| 0 << CS_ACTIVE_BSY
+				| 17 << BAD_BLOCK_BYTE_NUM
+				| 1 << BAD_BLOCK_IN_SPARE_AREA
+				| 2 << WR_RD_BSY_GAP
+				| wide_bus << WIDE_FLASH
+				| 1 << DEV0_CFG1_ECC_DISABLE;
+
+	host->ecc_bch_cfg = host->bch_enabled << ECC_CFG_ECC_DISABLE
+				| 0 << ECC_SW_RESET
+				| host->cw_data << ECC_NUM_DATA_BYTES
+				| 1 << ECC_FORCE_CLK_OPEN
+				| ecc_mode << ECC_MODE
+				| host->ecc_bytes_hw << ECC_PARITY_SIZE_BYTES_BCH;
+
+	host->ecc_buf_cfg = 0x203 << NUM_STEPS;
+
+	host->clrflashstatus = FS_READY_BSY_N;
+	host->clrreadstatus = 0xc0;
+
+	dev_dbg(nandc->dev,
+		"cfg0 %x cfg1 %x ecc_buf_cfg %x ecc_bch cfg %x cw_size %d cw_data %d strength %d parity_bytes %d steps %d\n",
+		host->cfg0, host->cfg1, host->ecc_buf_cfg, host->ecc_bch_cfg,
+		host->cw_size, host->cw_data, ecc->strength, ecc->bytes,
+		cwperpage);
+
+	return 0;
+}
+
+static int qcom_nandc_alloc(struct qcom_nand_controller *nandc)
+{
+	int ret;
+
+	ret = dma_set_coherent_mask(nandc->dev, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_err(nandc->dev, "failed to set DMA mask\n");
+		return ret;
+	}
+
+	/*
+	 * we use the internal buffer for reading ONFI params, reading small
+	 * data like ID and status, and preforming read-copy-write operations
+	 * when writing to a codeword partially. 532 is the maximum possible
+	 * size of a codeword for our nand controller
+	 */
+	nandc->buf_size = 532;
+
+	nandc->data_buffer = devm_kzalloc(nandc->dev, nandc->buf_size,
+					GFP_KERNEL);
+	if (!nandc->data_buffer)
+		return -ENOMEM;
+
+	nandc->regs = devm_kzalloc(nandc->dev, sizeof(*nandc->regs),
+					GFP_KERNEL);
+	if (!nandc->regs)
+		return -ENOMEM;
+
+	nandc->reg_read_buf = devm_kzalloc(nandc->dev,
+				MAX_REG_RD * sizeof(*nandc->reg_read_buf),
+				GFP_KERNEL);
+	if (!nandc->reg_read_buf)
+		return -ENOMEM;
+
+	nandc->chan = dma_request_slave_channel(nandc->dev, "rxtx");
+	if (!nandc->chan) {
+		dev_err(nandc->dev, "failed to request slave channel\n");
+		return -ENODEV;
+	}
+
+	INIT_LIST_HEAD(&nandc->desc_list);
+	INIT_LIST_HEAD(&nandc->host_list);
+
+	spin_lock_init(&nandc->controller.lock);
+	init_waitqueue_head(&nandc->controller.wq);
+
+	return 0;
+}
+
+static void qcom_nandc_unalloc(struct qcom_nand_controller *nandc)
+{
+	dma_release_channel(nandc->chan);
+}
+
+/* one time setup of a few nand controller registers */
+static int qcom_nandc_setup(struct qcom_nand_controller *nandc)
+{
+	/* kill onenand */
+	nandc_write(nandc, SFLASHC_BURST_CFG, 0);
+
+	/* enable ADM DMA */
+	nandc_write(nandc, NAND_FLASH_CHIP_SELECT, DM_EN);
+
+	/* save the original values of these registers */
+	nandc->cmd1 = nandc_read(nandc, NAND_DEV_CMD1);
+	nandc->vld = nandc_read(nandc, NAND_DEV_CMD_VLD);
+
+	return 0;
+}
+
+static int qcom_nand_host_init(struct qcom_nand_controller *nandc,
+			       struct qcom_nand_host *host,
+			       struct device_node *dn)
+{
+	struct nand_chip *chip = &host->chip;
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct device *dev = nandc->dev;
+	int ret;
+
+	ret = of_property_read_u32(dn, "reg", &host->cs);
+	if (ret) {
+		dev_err(dev, "can't get chip-select\n");
+		return -ENXIO;
+	}
+
+	nand_set_flash_node(chip, dn);
+	mtd->name = devm_kasprintf(dev, GFP_KERNEL, "qcom_nand.%d", host->cs);
+	mtd->owner = THIS_MODULE;
+	mtd->dev.parent = dev;
+
+	chip->cmdfunc		= qcom_nandc_command;
+	chip->select_chip	= qcom_nandc_select_chip;
+	chip->read_byte		= qcom_nandc_read_byte;
+	chip->read_buf		= qcom_nandc_read_buf;
+	chip->write_buf		= qcom_nandc_write_buf;
+
+	/*
+	 * the bad block marker is readable only when we read the last codeword
+	 * of a page with ECC disabled. currently, the nand_base and nand_bbt
+	 * helpers don't allow us to read BB from a nand chip with ECC
+	 * disabled (MTD_OPS_PLACE_OOB is set by default). use the block_bad
+	 * and block_markbad helpers until we permanently switch to using
+	 * MTD_OPS_RAW for all drivers (with the help of badblockbits)
+	 */
+	chip->block_bad		= qcom_nandc_block_bad;
+	chip->block_markbad	= qcom_nandc_block_markbad;
+
+	chip->controller = &nandc->controller;
+	chip->options |= NAND_NO_SUBPAGE_WRITE | NAND_USE_BOUNCE_BUFFER |
+			 NAND_SKIP_BBTSCAN;
+
+	/* set up initial status value */
+	host->status = NAND_STATUS_READY | NAND_STATUS_WP;
+
+	ret = nand_scan_ident(mtd, 1, NULL);
+	if (ret)
+		return ret;
+
+	ret = qcom_nand_host_setup(host);
+	if (ret)
+		return ret;
+
+	ret = nand_scan_tail(mtd);
+	if (ret)
+		return ret;
+
+	return mtd_device_register(mtd, NULL, 0);
+}
+
+/* parse custom DT properties here */
+static int qcom_nandc_parse_dt(struct platform_device *pdev)
+{
+	struct qcom_nand_controller *nandc = platform_get_drvdata(pdev);
+	struct device_node *np = nandc->dev->of_node;
+	int ret;
+
+	ret = of_property_read_u32(np, "qcom,cmd-crci", &nandc->cmd_crci);
+	if (ret) {
+		dev_err(nandc->dev, "command CRCI unspecified\n");
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "qcom,data-crci", &nandc->data_crci);
+	if (ret) {
+		dev_err(nandc->dev, "data CRCI unspecified\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int qcom_nandc_probe(struct platform_device *pdev)
+{
+	struct qcom_nand_controller *nandc;
+	struct qcom_nand_host *host;
+	const void *dev_data;
+	struct device *dev = &pdev->dev;
+	struct device_node *dn = dev->of_node, *child;
+	struct resource *res;
+	int ret;
+
+	nandc = devm_kzalloc(&pdev->dev, sizeof(*nandc), GFP_KERNEL);
+	if (!nandc)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, nandc);
+	nandc->dev = dev;
+
+	dev_data = of_device_get_match_data(dev);
+	if (!dev_data) {
+		dev_err(&pdev->dev, "failed to get device data\n");
+		return -ENODEV;
+	}
+
+	nandc->ecc_modes = (unsigned long)dev_data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	nandc->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(nandc->base))
+		return PTR_ERR(nandc->base);
+
+	nandc->base_dma = phys_to_dma(dev, (phys_addr_t)res->start);
+
+	nandc->core_clk = devm_clk_get(dev, "core");
+	if (IS_ERR(nandc->core_clk))
+		return PTR_ERR(nandc->core_clk);
+
+	nandc->aon_clk = devm_clk_get(dev, "aon");
+	if (IS_ERR(nandc->aon_clk))
+		return PTR_ERR(nandc->aon_clk);
+
+	ret = qcom_nandc_parse_dt(pdev);
+	if (ret)
+		return ret;
+
+	ret = qcom_nandc_alloc(nandc);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(nandc->core_clk);
+	if (ret)
+		goto err_core_clk;
+
+	ret = clk_prepare_enable(nandc->aon_clk);
+	if (ret)
+		goto err_aon_clk;
+
+	ret = qcom_nandc_setup(nandc);
+	if (ret)
+		goto err_setup;
+
+	for_each_available_child_of_node(dn, child) {
+		if (of_device_is_compatible(child, "qcom,nandcs")) {
+			host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL);
+			if (!host) {
+				of_node_put(child);
+				ret = -ENOMEM;
+				goto err_cs_init;
+			}
+
+			ret = qcom_nand_host_init(nandc, host, child);
+			if (ret) {
+				devm_kfree(dev, host);
+				continue;
+			}
+
+			list_add_tail(&host->node, &nandc->host_list);
+		}
+	}
+
+	if (list_empty(&nandc->host_list)) {
+		ret = -ENODEV;
+		goto err_cs_init;
+	}
+
+	return 0;
+
+err_cs_init:
+	list_for_each_entry(host, &nandc->host_list, node)
+		nand_release(nand_to_mtd(&host->chip));
+err_setup:
+	clk_disable_unprepare(nandc->aon_clk);
+err_aon_clk:
+	clk_disable_unprepare(nandc->core_clk);
+err_core_clk:
+	qcom_nandc_unalloc(nandc);
+
+	return ret;
+}
+
+static int qcom_nandc_remove(struct platform_device *pdev)
+{
+	struct qcom_nand_controller *nandc = platform_get_drvdata(pdev);
+	struct qcom_nand_host *host;
+
+	list_for_each_entry(host, &nandc->host_list, node)
+		nand_release(nand_to_mtd(&host->chip));
+
+	qcom_nandc_unalloc(nandc);
+
+	clk_disable_unprepare(nandc->aon_clk);
+	clk_disable_unprepare(nandc->core_clk);
+
+	return 0;
+}
+
+#define EBI2_NANDC_ECC_MODES	(ECC_RS_4BIT | ECC_BCH_8BIT)
+
+/*
+ * data will hold a struct pointer containing more differences once we support
+ * more controller variants
+ */
+static const struct of_device_id qcom_nandc_of_match[] = {
+	{	.compatible = "qcom,ipq806x-nand",
+		.data = (void *)EBI2_NANDC_ECC_MODES,
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, qcom_nandc_of_match);
+
+static struct platform_driver qcom_nandc_driver = {
+	.driver = {
+		.name = "qcom-nandc",
+		.of_match_table = qcom_nandc_of_match,
+	},
+	.probe   = qcom_nandc_probe,
+	.remove  = qcom_nandc_remove,
+};
+module_platform_driver(qcom_nandc_driver);
+
+MODULE_AUTHOR("Archit Taneja <architt@codeaurora.org>");
+MODULE_DESCRIPTION("Qualcomm NAND Controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index 01ac74fa3b95..9c9397b54b2c 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -861,9 +861,6 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 	chip->ecc.mode	    = NAND_ECC_SOFT;
 #endif
 
-	if (set->ecc_layout != NULL)
-		chip->ecc.layout = set->ecc_layout;
-
 	if (set->disable_ecc)
 		chip->ecc.mode	= NAND_ECC_NONE;
 
diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c
index 51e10a35fe08..1c03eee44f3d 100644
--- a/drivers/mtd/nand/sunxi_nand.c
+++ b/drivers/mtd/nand/sunxi_nand.c
@@ -60,6 +60,7 @@
 #define NFC_REG_ECC_ERR_CNT(x)	((0x0040 + (x)) & ~0x3)
 #define NFC_REG_USER_DATA(x)	(0x0050 + ((x) * 4))
 #define NFC_REG_SPARE_AREA	0x00A0
+#define NFC_REG_PAT_ID		0x00A4
 #define NFC_RAM0_BASE		0x0400
 #define NFC_RAM1_BASE		0x0800
 
@@ -538,6 +539,174 @@ static void sunxi_nfc_cmd_ctrl(struct mtd_info *mtd, int dat,
 	sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
 }
 
+/* These seed values have been extracted from Allwinner's BSP */
+static const u16 sunxi_nfc_randomizer_page_seeds[] = {
+	0x2b75, 0x0bd0, 0x5ca3, 0x62d1, 0x1c93, 0x07e9, 0x2162, 0x3a72,
+	0x0d67, 0x67f9, 0x1be7, 0x077d, 0x032f, 0x0dac, 0x2716, 0x2436,
+	0x7922, 0x1510, 0x3860, 0x5287, 0x480f, 0x4252, 0x1789, 0x5a2d,
+	0x2a49, 0x5e10, 0x437f, 0x4b4e, 0x2f45, 0x216e, 0x5cb7, 0x7130,
+	0x2a3f, 0x60e4, 0x4dc9, 0x0ef0, 0x0f52, 0x1bb9, 0x6211, 0x7a56,
+	0x226d, 0x4ea7, 0x6f36, 0x3692, 0x38bf, 0x0c62, 0x05eb, 0x4c55,
+	0x60f4, 0x728c, 0x3b6f, 0x2037, 0x7f69, 0x0936, 0x651a, 0x4ceb,
+	0x6218, 0x79f3, 0x383f, 0x18d9, 0x4f05, 0x5c82, 0x2912, 0x6f17,
+	0x6856, 0x5938, 0x1007, 0x61ab, 0x3e7f, 0x57c2, 0x542f, 0x4f62,
+	0x7454, 0x2eac, 0x7739, 0x42d4, 0x2f90, 0x435a, 0x2e52, 0x2064,
+	0x637c, 0x66ad, 0x2c90, 0x0bad, 0x759c, 0x0029, 0x0986, 0x7126,
+	0x1ca7, 0x1605, 0x386a, 0x27f5, 0x1380, 0x6d75, 0x24c3, 0x0f8e,
+	0x2b7a, 0x1418, 0x1fd1, 0x7dc1, 0x2d8e, 0x43af, 0x2267, 0x7da3,
+	0x4e3d, 0x1338, 0x50db, 0x454d, 0x764d, 0x40a3, 0x42e6, 0x262b,
+	0x2d2e, 0x1aea, 0x2e17, 0x173d, 0x3a6e, 0x71bf, 0x25f9, 0x0a5d,
+	0x7c57, 0x0fbe, 0x46ce, 0x4939, 0x6b17, 0x37bb, 0x3e91, 0x76db,
+};
+
+/*
+ * sunxi_nfc_randomizer_ecc512_seeds and sunxi_nfc_randomizer_ecc1024_seeds
+ * have been generated using
+ * sunxi_nfc_randomizer_step(seed, (step_size * 8) + 15), which is what
+ * the randomizer engine does internally before de/scrambling OOB data.
+ *
+ * Those tables are statically defined to avoid calculating randomizer state
+ * at runtime.
+ */
+static const u16 sunxi_nfc_randomizer_ecc512_seeds[] = {
+	0x3346, 0x367f, 0x1f18, 0x769a, 0x4f64, 0x068c, 0x2ef1, 0x6b64,
+	0x28a9, 0x15d7, 0x30f8, 0x3659, 0x53db, 0x7c5f, 0x71d4, 0x4409,
+	0x26eb, 0x03cc, 0x655d, 0x47d4, 0x4daa, 0x0877, 0x712d, 0x3617,
+	0x3264, 0x49aa, 0x7f9e, 0x588e, 0x4fbc, 0x7176, 0x7f91, 0x6c6d,
+	0x4b95, 0x5fb7, 0x3844, 0x4037, 0x0184, 0x081b, 0x0ee8, 0x5b91,
+	0x293d, 0x1f71, 0x0e6f, 0x402b, 0x5122, 0x1e52, 0x22be, 0x3d2d,
+	0x75bc, 0x7c60, 0x6291, 0x1a2f, 0x61d4, 0x74aa, 0x4140, 0x29ab,
+	0x472d, 0x2852, 0x017e, 0x15e8, 0x5ec2, 0x17cf, 0x7d0f, 0x06b8,
+	0x117a, 0x6b94, 0x789b, 0x3126, 0x6ac5, 0x5be7, 0x150f, 0x51f8,
+	0x7889, 0x0aa5, 0x663d, 0x77e8, 0x0b87, 0x3dcb, 0x360d, 0x218b,
+	0x512f, 0x7dc9, 0x6a4d, 0x630a, 0x3547, 0x1dd2, 0x5aea, 0x69a5,
+	0x7bfa, 0x5e4f, 0x1519, 0x6430, 0x3a0e, 0x5eb3, 0x5425, 0x0c7a,
+	0x5540, 0x3670, 0x63c1, 0x31e9, 0x5a39, 0x2de7, 0x5979, 0x2891,
+	0x1562, 0x014b, 0x5b05, 0x2756, 0x5a34, 0x13aa, 0x6cb5, 0x2c36,
+	0x5e72, 0x1306, 0x0861, 0x15ef, 0x1ee8, 0x5a37, 0x7ac4, 0x45dd,
+	0x44c4, 0x7266, 0x2f41, 0x3ccc, 0x045e, 0x7d40, 0x7c66, 0x0fa0,
+};
+
+static const u16 sunxi_nfc_randomizer_ecc1024_seeds[] = {
+	0x2cf5, 0x35f1, 0x63a4, 0x5274, 0x2bd2, 0x778b, 0x7285, 0x32b6,
+	0x6a5c, 0x70d6, 0x757d, 0x6769, 0x5375, 0x1e81, 0x0cf3, 0x3982,
+	0x6787, 0x042a, 0x6c49, 0x1925, 0x56a8, 0x40a9, 0x063e, 0x7bd9,
+	0x4dbf, 0x55ec, 0x672e, 0x7334, 0x5185, 0x4d00, 0x232a, 0x7e07,
+	0x445d, 0x6b92, 0x528f, 0x4255, 0x53ba, 0x7d82, 0x2a2e, 0x3a4e,
+	0x75eb, 0x450c, 0x6844, 0x1b5d, 0x581a, 0x4cc6, 0x0379, 0x37b2,
+	0x419f, 0x0e92, 0x6b27, 0x5624, 0x01e3, 0x07c1, 0x44a5, 0x130c,
+	0x13e8, 0x5910, 0x0876, 0x60c5, 0x54e3, 0x5b7f, 0x2269, 0x509f,
+	0x7665, 0x36fd, 0x3e9a, 0x0579, 0x6295, 0x14ef, 0x0a81, 0x1bcc,
+	0x4b16, 0x64db, 0x0514, 0x4f07, 0x0591, 0x3576, 0x6853, 0x0d9e,
+	0x259f, 0x38b7, 0x64fb, 0x3094, 0x4693, 0x6ddd, 0x29bb, 0x0bc8,
+	0x3f47, 0x490e, 0x0c0e, 0x7933, 0x3c9e, 0x5840, 0x398d, 0x3e68,
+	0x4af1, 0x71f5, 0x57cf, 0x1121, 0x64eb, 0x3579, 0x15ac, 0x584d,
+	0x5f2a, 0x47e2, 0x6528, 0x6eac, 0x196e, 0x6b96, 0x0450, 0x0179,
+	0x609c, 0x06e1, 0x4626, 0x42c7, 0x273e, 0x486f, 0x0705, 0x1601,
+	0x145b, 0x407e, 0x062b, 0x57a5, 0x53f9, 0x5659, 0x4410, 0x3ccd,
+};
+
+static u16 sunxi_nfc_randomizer_step(u16 state, int count)
+{
+	state &= 0x7fff;
+
+	/*
+	 * This loop is just a simple implementation of a Fibonacci LFSR using
+	 * the x16 + x15 + 1 polynomial.
+	 */
+	while (count--)
+		state = ((state >> 1) |
+			 (((state ^ (state >> 1)) & 1) << 14)) & 0x7fff;
+
+	return state;
+}
+
+static u16 sunxi_nfc_randomizer_state(struct mtd_info *mtd, int page, bool ecc)
+{
+	const u16 *seeds = sunxi_nfc_randomizer_page_seeds;
+	int mod = mtd_div_by_ws(mtd->erasesize, mtd);
+
+	if (mod > ARRAY_SIZE(sunxi_nfc_randomizer_page_seeds))
+		mod = ARRAY_SIZE(sunxi_nfc_randomizer_page_seeds);
+
+	if (ecc) {
+		if (mtd->ecc_step_size == 512)
+			seeds = sunxi_nfc_randomizer_ecc512_seeds;
+		else
+			seeds = sunxi_nfc_randomizer_ecc1024_seeds;
+	}
+
+	return seeds[page % mod];
+}
+
+static void sunxi_nfc_randomizer_config(struct mtd_info *mtd,
+					int page, bool ecc)
+{
+	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
+	u32 ecc_ctl = readl(nfc->regs + NFC_REG_ECC_CTL);
+	u16 state;
+
+	if (!(nand->options & NAND_NEED_SCRAMBLING))
+		return;
+
+	ecc_ctl = readl(nfc->regs + NFC_REG_ECC_CTL);
+	state = sunxi_nfc_randomizer_state(mtd, page, ecc);
+	ecc_ctl = readl(nfc->regs + NFC_REG_ECC_CTL) & ~NFC_RANDOM_SEED_MSK;
+	writel(ecc_ctl | NFC_RANDOM_SEED(state), nfc->regs + NFC_REG_ECC_CTL);
+}
+
+static void sunxi_nfc_randomizer_enable(struct mtd_info *mtd)
+{
+	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
+
+	if (!(nand->options & NAND_NEED_SCRAMBLING))
+		return;
+
+	writel(readl(nfc->regs + NFC_REG_ECC_CTL) | NFC_RANDOM_EN,
+	       nfc->regs + NFC_REG_ECC_CTL);
+}
+
+static void sunxi_nfc_randomizer_disable(struct mtd_info *mtd)
+{
+	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
+
+	if (!(nand->options & NAND_NEED_SCRAMBLING))
+		return;
+
+	writel(readl(nfc->regs + NFC_REG_ECC_CTL) & ~NFC_RANDOM_EN,
+	       nfc->regs + NFC_REG_ECC_CTL);
+}
+
+static void sunxi_nfc_randomize_bbm(struct mtd_info *mtd, int page, u8 *bbm)
+{
+	u16 state = sunxi_nfc_randomizer_state(mtd, page, true);
+
+	bbm[0] ^= state;
+	bbm[1] ^= sunxi_nfc_randomizer_step(state, 8);
+}
+
+static void sunxi_nfc_randomizer_write_buf(struct mtd_info *mtd,
+					   const uint8_t *buf, int len,
+					   bool ecc, int page)
+{
+	sunxi_nfc_randomizer_config(mtd, page, ecc);
+	sunxi_nfc_randomizer_enable(mtd);
+	sunxi_nfc_write_buf(mtd, buf, len);
+	sunxi_nfc_randomizer_disable(mtd);
+}
+
+static void sunxi_nfc_randomizer_read_buf(struct mtd_info *mtd, uint8_t *buf,
+					  int len, bool ecc, int page)
+{
+	sunxi_nfc_randomizer_config(mtd, page, ecc);
+	sunxi_nfc_randomizer_enable(mtd);
+	sunxi_nfc_read_buf(mtd, buf, len);
+	sunxi_nfc_randomizer_disable(mtd);
+}
+
 static void sunxi_nfc_hw_ecc_enable(struct mtd_info *mtd)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
@@ -574,18 +743,20 @@ static int sunxi_nfc_hw_ecc_read_chunk(struct mtd_info *mtd,
 				       u8 *data, int data_off,
 				       u8 *oob, int oob_off,
 				       int *cur_off,
-				       unsigned int *max_bitflips)
+				       unsigned int *max_bitflips,
+				       bool bbm, int page)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
 	struct nand_ecc_ctrl *ecc = &nand->ecc;
+	int raw_mode = 0;
 	u32 status;
 	int ret;
 
 	if (*cur_off != data_off)
 		nand->cmdfunc(mtd, NAND_CMD_RNDOUT, data_off, -1);
 
-	sunxi_nfc_read_buf(mtd, NULL, ecc->size);
+	sunxi_nfc_randomizer_read_buf(mtd, NULL, ecc->size, false, page);
 
 	if (data_off + ecc->size != oob_off)
 		nand->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_off, -1);
@@ -594,25 +765,54 @@ static int sunxi_nfc_hw_ecc_read_chunk(struct mtd_info *mtd,
 	if (ret)
 		return ret;
 
+	sunxi_nfc_randomizer_enable(mtd);
 	writel(NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD | NFC_ECC_OP,
 	       nfc->regs + NFC_REG_CMD);
 
 	ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+	sunxi_nfc_randomizer_disable(mtd);
 	if (ret)
 		return ret;
 
+	*cur_off = oob_off + ecc->bytes + 4;
+
 	status = readl(nfc->regs + NFC_REG_ECC_ST);
+	if (status & NFC_ECC_PAT_FOUND(0)) {
+		u8 pattern = 0xff;
+
+		if (unlikely(!(readl(nfc->regs + NFC_REG_PAT_ID) & 0x1)))
+			pattern = 0x0;
+
+		memset(data, pattern, ecc->size);
+		memset(oob, pattern, ecc->bytes + 4);
+
+		return 1;
+	}
+
 	ret = NFC_ECC_ERR_CNT(0, readl(nfc->regs + NFC_REG_ECC_ERR_CNT(0)));
 
 	memcpy_fromio(data, nfc->regs + NFC_RAM0_BASE, ecc->size);
 
 	nand->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_off, -1);
-	sunxi_nfc_read_buf(mtd, oob, ecc->bytes + 4);
+	sunxi_nfc_randomizer_read_buf(mtd, oob, ecc->bytes + 4, true, page);
 
 	if (status & NFC_ECC_ERR(0)) {
+		/*
+		 * Re-read the data with the randomizer disabled to identify
+		 * bitflips in erased pages.
+		 */
+		if (nand->options & NAND_NEED_SCRAMBLING) {
+			nand->cmdfunc(mtd, NAND_CMD_RNDOUT, data_off, -1);
+			nand->read_buf(mtd, data, ecc->size);
+			nand->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_off, -1);
+			nand->read_buf(mtd, oob, ecc->bytes + 4);
+		}
+
 		ret = nand_check_erased_ecc_chunk(data,	ecc->size,
 						  oob, ecc->bytes + 4,
 						  NULL, 0, ecc->strength);
+		if (ret >= 0)
+			raw_mode = 1;
 	} else {
 		/*
 		 * The engine protects 4 bytes of OOB data per chunk.
@@ -620,6 +820,10 @@ static int sunxi_nfc_hw_ecc_read_chunk(struct mtd_info *mtd,
 		 */
 		sunxi_nfc_user_data_to_buf(readl(nfc->regs + NFC_REG_USER_DATA(0)),
 					   oob);
+
+		/* De-randomize the Bad Block Marker. */
+		if (bbm && nand->options & NAND_NEED_SCRAMBLING)
+			sunxi_nfc_randomize_bbm(mtd, page, oob);
 	}
 
 	if (ret < 0) {
@@ -629,13 +833,12 @@ static int sunxi_nfc_hw_ecc_read_chunk(struct mtd_info *mtd,
 		*max_bitflips = max_t(unsigned int, *max_bitflips, ret);
 	}
 
-	*cur_off = oob_off + ecc->bytes + 4;
-
-	return 0;
+	return raw_mode;
 }
 
 static void sunxi_nfc_hw_ecc_read_extra_oob(struct mtd_info *mtd,
-					    u8 *oob, int *cur_off)
+					    u8 *oob, int *cur_off,
+					    bool randomize, int page)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct nand_ecc_ctrl *ecc = &nand->ecc;
@@ -649,7 +852,11 @@ static void sunxi_nfc_hw_ecc_read_extra_oob(struct mtd_info *mtd,
 		nand->cmdfunc(mtd, NAND_CMD_RNDOUT,
 			      offset + mtd->writesize, -1);
 
-	sunxi_nfc_read_buf(mtd, oob + offset, len);
+	if (!randomize)
+		sunxi_nfc_read_buf(mtd, oob + offset, len);
+	else
+		sunxi_nfc_randomizer_read_buf(mtd, oob + offset, len,
+					      false, page);
 
 	*cur_off = mtd->oobsize + mtd->writesize;
 }
@@ -662,7 +869,8 @@ static inline u32 sunxi_nfc_buf_to_user_data(const u8 *buf)
 static int sunxi_nfc_hw_ecc_write_chunk(struct mtd_info *mtd,
 					const u8 *data, int data_off,
 					const u8 *oob, int oob_off,
-					int *cur_off)
+					int *cur_off, bool bbm,
+					int page)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
@@ -672,11 +880,20 @@ static int sunxi_nfc_hw_ecc_write_chunk(struct mtd_info *mtd,
 	if (data_off != *cur_off)
 		nand->cmdfunc(mtd, NAND_CMD_RNDIN, data_off, -1);
 
-	sunxi_nfc_write_buf(mtd, data, ecc->size);
+	sunxi_nfc_randomizer_write_buf(mtd, data, ecc->size, false, page);
 
 	/* Fill OOB data in */
-	writel(sunxi_nfc_buf_to_user_data(oob),
-	       nfc->regs + NFC_REG_USER_DATA(0));
+	if ((nand->options & NAND_NEED_SCRAMBLING) && bbm) {
+		u8 user_data[4];
+
+		memcpy(user_data, oob, 4);
+		sunxi_nfc_randomize_bbm(mtd, page, user_data);
+		writel(sunxi_nfc_buf_to_user_data(user_data),
+		       nfc->regs + NFC_REG_USER_DATA(0));
+	} else {
+		writel(sunxi_nfc_buf_to_user_data(oob),
+		       nfc->regs + NFC_REG_USER_DATA(0));
+	}
 
 	if (data_off + ecc->size != oob_off)
 		nand->cmdfunc(mtd, NAND_CMD_RNDIN, oob_off, -1);
@@ -685,11 +902,13 @@ static int sunxi_nfc_hw_ecc_write_chunk(struct mtd_info *mtd,
 	if (ret)
 		return ret;
 
+	sunxi_nfc_randomizer_enable(mtd);
 	writel(NFC_DATA_TRANS | NFC_DATA_SWAP_METHOD |
 	       NFC_ACCESS_DIR | NFC_ECC_OP,
 	       nfc->regs + NFC_REG_CMD);
 
 	ret = sunxi_nfc_wait_int(nfc, NFC_CMD_INT_FLAG, 0);
+	sunxi_nfc_randomizer_disable(mtd);
 	if (ret)
 		return ret;
 
@@ -699,7 +918,8 @@ static int sunxi_nfc_hw_ecc_write_chunk(struct mtd_info *mtd,
 }
 
 static void sunxi_nfc_hw_ecc_write_extra_oob(struct mtd_info *mtd,
-					     u8 *oob, int *cur_off)
+					     u8 *oob, int *cur_off,
+					     int page)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct nand_ecc_ctrl *ecc = &nand->ecc;
@@ -713,7 +933,7 @@ static void sunxi_nfc_hw_ecc_write_extra_oob(struct mtd_info *mtd,
 		nand->cmdfunc(mtd, NAND_CMD_RNDIN,
 			      offset + mtd->writesize, -1);
 
-	sunxi_nfc_write_buf(mtd, oob + offset, len);
+	sunxi_nfc_randomizer_write_buf(mtd, oob + offset, len, false, page);
 
 	*cur_off = mtd->oobsize + mtd->writesize;
 }
@@ -725,6 +945,7 @@ static int sunxi_nfc_hw_ecc_read_page(struct mtd_info *mtd,
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	unsigned int max_bitflips = 0;
 	int ret, i, cur_off = 0;
+	bool raw_mode = false;
 
 	sunxi_nfc_hw_ecc_enable(mtd);
 
@@ -736,13 +957,17 @@ static int sunxi_nfc_hw_ecc_read_page(struct mtd_info *mtd,
 
 		ret = sunxi_nfc_hw_ecc_read_chunk(mtd, data, data_off, oob,
 						  oob_off + mtd->writesize,
-						  &cur_off, &max_bitflips);
-		if (ret)
+						  &cur_off, &max_bitflips,
+						  !i, page);
+		if (ret < 0)
 			return ret;
+		else if (ret)
+			raw_mode = true;
 	}
 
 	if (oob_required)
-		sunxi_nfc_hw_ecc_read_extra_oob(mtd, chip->oob_poi, &cur_off);
+		sunxi_nfc_hw_ecc_read_extra_oob(mtd, chip->oob_poi, &cur_off,
+						!raw_mode, page);
 
 	sunxi_nfc_hw_ecc_disable(mtd);
 
@@ -767,13 +992,14 @@ static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd,
 
 		ret = sunxi_nfc_hw_ecc_write_chunk(mtd, data, data_off, oob,
 						   oob_off + mtd->writesize,
-						   &cur_off);
+						   &cur_off, !i, page);
 		if (ret)
 			return ret;
 	}
 
-	if (oob_required)
-		sunxi_nfc_hw_ecc_write_extra_oob(mtd, chip->oob_poi, &cur_off);
+	if (oob_required || (chip->options & NAND_NEED_SCRAMBLING))
+		sunxi_nfc_hw_ecc_write_extra_oob(mtd, chip->oob_poi,
+						 &cur_off, page);
 
 	sunxi_nfc_hw_ecc_disable(mtd);
 
@@ -788,6 +1014,7 @@ static int sunxi_nfc_hw_syndrome_ecc_read_page(struct mtd_info *mtd,
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	unsigned int max_bitflips = 0;
 	int ret, i, cur_off = 0;
+	bool raw_mode = false;
 
 	sunxi_nfc_hw_ecc_enable(mtd);
 
@@ -799,13 +1026,16 @@ static int sunxi_nfc_hw_syndrome_ecc_read_page(struct mtd_info *mtd,
 
 		ret = sunxi_nfc_hw_ecc_read_chunk(mtd, data, data_off, oob,
 						  oob_off, &cur_off,
-						  &max_bitflips);
-		if (ret)
+						  &max_bitflips, !i, page);
+		if (ret < 0)
 			return ret;
+		else if (ret)
+			raw_mode = true;
 	}
 
 	if (oob_required)
-		sunxi_nfc_hw_ecc_read_extra_oob(mtd, chip->oob_poi, &cur_off);
+		sunxi_nfc_hw_ecc_read_extra_oob(mtd, chip->oob_poi, &cur_off,
+						!raw_mode, page);
 
 	sunxi_nfc_hw_ecc_disable(mtd);
 
@@ -829,13 +1059,15 @@ static int sunxi_nfc_hw_syndrome_ecc_write_page(struct mtd_info *mtd,
 		const u8 *oob = chip->oob_poi + (i * (ecc->bytes + 4));
 
 		ret = sunxi_nfc_hw_ecc_write_chunk(mtd, data, data_off,
-						   oob, oob_off, &cur_off);
+						   oob, oob_off, &cur_off,
+						   false, page);
 		if (ret)
 			return ret;
 	}
 
-	if (oob_required)
-		sunxi_nfc_hw_ecc_write_extra_oob(mtd, chip->oob_poi, &cur_off);
+	if (oob_required || (chip->options & NAND_NEED_SCRAMBLING))
+		sunxi_nfc_hw_ecc_write_extra_oob(mtd, chip->oob_poi,
+						 &cur_off, page);
 
 	sunxi_nfc_hw_ecc_disable(mtd);
 
@@ -1345,6 +1577,9 @@ static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc,
 	if (nand->bbt_options & NAND_BBT_USE_FLASH)
 		nand->bbt_options |= NAND_BBT_NO_OOB;
 
+	if (nand->options & NAND_NEED_SCRAMBLING)
+		nand->options |= NAND_NO_SUBPAGE_WRITE;
+
 	ret = sunxi_nand_chip_init_timings(chip, np);
 	if (ret) {
 		dev_err(dev, "could not configure chip timings: %d\n", ret);
diff --git a/drivers/mtd/nand/vf610_nfc.c b/drivers/mtd/nand/vf610_nfc.c
index 034420f313d5..293feb19b0b1 100644
--- a/drivers/mtd/nand/vf610_nfc.c
+++ b/drivers/mtd/nand/vf610_nfc.c
@@ -795,8 +795,6 @@ static int vf610_nfc_probe(struct platform_device *pdev)
 			goto error;
 		}
 
-		/* propagate ecc.layout to mtd_info */
-		mtd->ecclayout = chip->ecc.layout;
 		chip->ecc.read_page = vf610_nfc_read_page;
 		chip->ecc.write_page = vf610_nfc_write_page;
 
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 43b3392ffee7..af28bb3ae7cf 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1124,11 +1124,7 @@ static int onenand_mlc_read_ops_nolock(struct mtd_info *mtd, loff_t from,
 	pr_debug("%s: from = 0x%08x, len = %i\n", __func__, (unsigned int)from,
 			(int)len);
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
-	else
-		oobsize = mtd->oobsize;
-
+	oobsize = mtd_oobavail(mtd, ops);
 	oobcolumn = from & (mtd->oobsize - 1);
 
 	/* Do not allow reads past end of device */
@@ -1229,11 +1225,7 @@ static int onenand_read_ops_nolock(struct mtd_info *mtd, loff_t from,
 	pr_debug("%s: from = 0x%08x, len = %i\n", __func__, (unsigned int)from,
 			(int)len);
 
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
-	else
-		oobsize = mtd->oobsize;
-
+	oobsize = mtd_oobavail(mtd, ops);
 	oobcolumn = from & (mtd->oobsize - 1);
 
 	/* Do not allow reads past end of device */
@@ -1365,7 +1357,7 @@ static int onenand_read_oob_nolock(struct mtd_info *mtd, loff_t from,
 	ops->oobretlen = 0;
 
 	if (mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
+		oobsize = mtd->oobavail;
 	else
 		oobsize = mtd->oobsize;
 
@@ -1885,12 +1877,7 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to,
 	/* Check zero length */
 	if (!len)
 		return 0;
-
-	if (ops->mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
-	else
-		oobsize = mtd->oobsize;
-
+	oobsize = mtd_oobavail(mtd, ops);
 	oobcolumn = to & (mtd->oobsize - 1);
 
 	column = to & (mtd->writesize - 1);
@@ -2063,7 +2050,7 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to,
 	ops->oobretlen = 0;
 
 	if (mode == MTD_OPS_AUTO_OOB)
-		oobsize = this->ecclayout->oobavail;
+		oobsize = mtd->oobavail;
 	else
 		oobsize = mtd->oobsize;
 
@@ -2599,6 +2586,7 @@ static int onenand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
  */
 static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs)
 {
+	struct onenand_chip *this = mtd->priv;
 	int ret;
 
 	ret = onenand_block_isbad(mtd, ofs);
@@ -2610,7 +2598,7 @@ static int onenand_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	}
 
 	onenand_get_device(mtd, FL_WRITING);
-	ret = mtd_block_markbad(mtd, ofs);
+	ret = this->block_markbad(mtd, ofs);
 	onenand_release_device(mtd);
 	return ret;
 }
@@ -4049,12 +4037,10 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
 	 * The number of bytes available for a client to place data into
 	 * the out of band area
 	 */
-	this->ecclayout->oobavail = 0;
+	mtd->oobavail = 0;
 	for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES &&
 	    this->ecclayout->oobfree[i].length; i++)
-		this->ecclayout->oobavail +=
-			this->ecclayout->oobfree[i].length;
-	mtd->oobavail = this->ecclayout->oobavail;
+		mtd->oobavail += this->ecclayout->oobfree[i].length;
 
 	mtd->ecclayout = this->ecclayout;
 	mtd->ecc_strength = 1;
diff --git a/drivers/mtd/onenand/onenand_bbt.c b/drivers/mtd/onenand/onenand_bbt.c
index 08d0085f3e93..680188a88130 100644
--- a/drivers/mtd/onenand/onenand_bbt.c
+++ b/drivers/mtd/onenand/onenand_bbt.c
@@ -179,7 +179,7 @@ static int onenand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt)
  * by the onenand_release function.
  *
  */
-int onenand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd)
+static int onenand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd)
 {
 	struct onenand_chip *this = mtd->priv;
 	struct bbm_info *bbm = this->bbm;
@@ -247,6 +247,3 @@ int onenand_default_bbt(struct mtd_info *mtd)
 
 	return onenand_scan_bbt(mtd, bbm->badblock_pattern);
 }
-
-EXPORT_SYMBOL(onenand_scan_bbt);
-EXPORT_SYMBOL(onenand_default_bbt);
diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig
index 0dc927540b3d..d42c98e1f581 100644
--- a/drivers/mtd/spi-nor/Kconfig
+++ b/drivers/mtd/spi-nor/Kconfig
@@ -9,6 +9,7 @@ if MTD_SPI_NOR
 
 config MTD_MT81xx_NOR
 	tristate "Mediatek MT81xx SPI NOR flash controller"
+	depends on HAS_IOMEM
 	help
 	  This enables access to SPI NOR flash, using MT81xx SPI NOR flash
 	  controller. This controller does not support generic SPI BUS, it only
@@ -30,7 +31,7 @@ config MTD_SPI_NOR_USE_4K_SECTORS
 
 config SPI_FSL_QUADSPI
 	tristate "Freescale Quad SPI controller"
-	depends on ARCH_MXC || COMPILE_TEST
+	depends on ARCH_MXC || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST
 	depends on HAS_IOMEM
 	help
 	  This enables support for the Quad SPI controller in master mode.
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 54640f1eb3a1..9ab2b51d54b8 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -213,6 +213,7 @@ enum fsl_qspi_devtype {
 	FSL_QUADSPI_IMX6SX,
 	FSL_QUADSPI_IMX7D,
 	FSL_QUADSPI_IMX6UL,
+	FSL_QUADSPI_LS1021A,
 };
 
 struct fsl_qspi_devtype_data {
@@ -258,6 +259,14 @@ static struct fsl_qspi_devtype_data imx6ul_data = {
 		       | QUADSPI_QUIRK_4X_INT_CLK,
 };
 
+static struct fsl_qspi_devtype_data ls1021a_data = {
+	.devtype = FSL_QUADSPI_LS1021A,
+	.rxfifo = 128,
+	.txfifo = 64,
+	.ahb_buf_size = 1024,
+	.driver_data = 0,
+};
+
 #define FSL_QSPI_MAX_CHIP	4
 struct fsl_qspi {
 	struct spi_nor nor[FSL_QSPI_MAX_CHIP];
@@ -275,6 +284,7 @@ struct fsl_qspi {
 	u32 clk_rate;
 	unsigned int chip_base_addr; /* We may support two chips. */
 	bool has_second_chip;
+	bool big_endian;
 	struct mutex lock;
 	struct pm_qos_request pm_qos_req;
 };
@@ -300,6 +310,28 @@ static inline int needs_wakeup_wait_mode(struct fsl_qspi *q)
 }
 
 /*
+ * R/W functions for big- or little-endian registers:
+ * The qSPI controller's endian is independent of the CPU core's endian.
+ * So far, although the CPU core is little-endian but the qSPI have two
+ * versions for big-endian and little-endian.
+ */
+static void qspi_writel(struct fsl_qspi *q, u32 val, void __iomem *addr)
+{
+	if (q->big_endian)
+		iowrite32be(val, addr);
+	else
+		iowrite32(val, addr);
+}
+
+static u32 qspi_readl(struct fsl_qspi *q, void __iomem *addr)
+{
+	if (q->big_endian)
+		return ioread32be(addr);
+	else
+		return ioread32(addr);
+}
+
+/*
  * An IC bug makes us to re-arrange the 32-bit data.
  * The following chips, such as IMX6SLX, have fixed this bug.
  */
@@ -310,14 +342,14 @@ static inline u32 fsl_qspi_endian_xchg(struct fsl_qspi *q, u32 a)
 
 static inline void fsl_qspi_unlock_lut(struct fsl_qspi *q)
 {
-	writel(QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
-	writel(QUADSPI_LCKER_UNLOCK, q->iobase + QUADSPI_LCKCR);
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_UNLOCK, q->iobase + QUADSPI_LCKCR);
 }
 
 static inline void fsl_qspi_lock_lut(struct fsl_qspi *q)
 {
-	writel(QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
-	writel(QUADSPI_LCKER_LOCK, q->iobase + QUADSPI_LCKCR);
+	qspi_writel(q, QUADSPI_LUTKEY_VALUE, q->iobase + QUADSPI_LUTKEY);
+	qspi_writel(q, QUADSPI_LCKER_LOCK, q->iobase + QUADSPI_LCKCR);
 }
 
 static irqreturn_t fsl_qspi_irq_handler(int irq, void *dev_id)
@@ -326,8 +358,8 @@ static irqreturn_t fsl_qspi_irq_handler(int irq, void *dev_id)
 	u32 reg;
 
 	/* clear interrupt */
-	reg = readl(q->iobase + QUADSPI_FR);
-	writel(reg, q->iobase + QUADSPI_FR);
+	reg = qspi_readl(q, q->iobase + QUADSPI_FR);
+	qspi_writel(q, reg, q->iobase + QUADSPI_FR);
 
 	if (reg & QUADSPI_FR_TFF_MASK)
 		complete(&q->c);
@@ -348,7 +380,7 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 
 	/* Clear all the LUT table */
 	for (i = 0; i < QUADSPI_LUT_NUM; i++)
-		writel(0, base + QUADSPI_LUT_BASE + i * 4);
+		qspi_writel(q, 0, base + QUADSPI_LUT_BASE + i * 4);
 
 	/* Quad Read */
 	lut_base = SEQID_QUAD_READ * 4;
@@ -364,14 +396,15 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 		dummy = 8;
 	}
 
-	writel(LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
 			base + QUADSPI_LUT(lut_base));
-	writel(LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
+	qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
 			base + QUADSPI_LUT(lut_base + 1));
 
 	/* Write enable */
 	lut_base = SEQID_WREN * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_WREN), base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_WREN),
+			base + QUADSPI_LUT(lut_base));
 
 	/* Page Program */
 	lut_base = SEQID_PP * 4;
@@ -385,13 +418,15 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 		addrlen = ADDR32BIT;
 	}
 
-	writel(LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
 			base + QUADSPI_LUT(lut_base));
-	writel(LUT0(FSL_WRITE, PAD1, 0), base + QUADSPI_LUT(lut_base + 1));
+	qspi_writel(q, LUT0(FSL_WRITE, PAD1, 0),
+			base + QUADSPI_LUT(lut_base + 1));
 
 	/* Read Status */
 	lut_base = SEQID_RDSR * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_RDSR) | LUT1(FSL_READ, PAD1, 0x1),
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_RDSR) |
+			LUT1(FSL_READ, PAD1, 0x1),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Erase a sector */
@@ -400,40 +435,46 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 	cmd = q->nor[0].erase_opcode;
 	addrlen = q->nor_size <= SZ_16M ? ADDR24BIT : ADDR32BIT;
 
-	writel(LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+	qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Erase the whole chip */
 	lut_base = SEQID_CHIP_ERASE * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_CHIP_ERASE),
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_CHIP_ERASE),
 			base + QUADSPI_LUT(lut_base));
 
 	/* READ ID */
 	lut_base = SEQID_RDID * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_RDID) | LUT1(FSL_READ, PAD1, 0x8),
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_RDID) |
+			LUT1(FSL_READ, PAD1, 0x8),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Write Register */
 	lut_base = SEQID_WRSR * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_WRSR) | LUT1(FSL_WRITE, PAD1, 0x2),
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_WRSR) |
+			LUT1(FSL_WRITE, PAD1, 0x2),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Read Configuration Register */
 	lut_base = SEQID_RDCR * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_RDCR) | LUT1(FSL_READ, PAD1, 0x1),
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_RDCR) |
+			LUT1(FSL_READ, PAD1, 0x1),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Write disable */
 	lut_base = SEQID_WRDI * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_WRDI), base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_WRDI),
+			base + QUADSPI_LUT(lut_base));
 
 	/* Enter 4 Byte Mode (Micron) */
 	lut_base = SEQID_EN4B * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_EN4B), base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_EN4B),
+			base + QUADSPI_LUT(lut_base));
 
 	/* Enter 4 Byte Mode (Spansion) */
 	lut_base = SEQID_BRWR * 4;
-	writel(LUT0(CMD, PAD1, SPINOR_OP_BRWR), base + QUADSPI_LUT(lut_base));
+	qspi_writel(q, LUT0(CMD, PAD1, SPINOR_OP_BRWR),
+			base + QUADSPI_LUT(lut_base));
 
 	fsl_qspi_lock_lut(q);
 }
@@ -488,15 +529,16 @@ fsl_qspi_runcmd(struct fsl_qspi *q, u8 cmd, unsigned int addr, int len)
 			q->chip_base_addr, addr, len, cmd);
 
 	/* save the reg */
-	reg = readl(base + QUADSPI_MCR);
+	reg = qspi_readl(q, base + QUADSPI_MCR);
 
-	writel(q->memmap_phy + q->chip_base_addr + addr, base + QUADSPI_SFAR);
-	writel(QUADSPI_RBCT_WMRK_MASK | QUADSPI_RBCT_RXBRD_USEIPS,
+	qspi_writel(q, q->memmap_phy + q->chip_base_addr + addr,
+			base + QUADSPI_SFAR);
+	qspi_writel(q, QUADSPI_RBCT_WMRK_MASK | QUADSPI_RBCT_RXBRD_USEIPS,
 			base + QUADSPI_RBCT);
-	writel(reg | QUADSPI_MCR_CLR_RXF_MASK, base + QUADSPI_MCR);
+	qspi_writel(q, reg | QUADSPI_MCR_CLR_RXF_MASK, base + QUADSPI_MCR);
 
 	do {
-		reg2 = readl(base + QUADSPI_SR);
+		reg2 = qspi_readl(q, base + QUADSPI_SR);
 		if (reg2 & (QUADSPI_SR_IP_ACC_MASK | QUADSPI_SR_AHB_ACC_MASK)) {
 			udelay(1);
 			dev_dbg(q->dev, "The controller is busy, 0x%x\n", reg2);
@@ -507,21 +549,22 @@ fsl_qspi_runcmd(struct fsl_qspi *q, u8 cmd, unsigned int addr, int len)
 
 	/* trigger the LUT now */
 	seqid = fsl_qspi_get_seqid(q, cmd);
-	writel((seqid << QUADSPI_IPCR_SEQID_SHIFT) | len, base + QUADSPI_IPCR);
+	qspi_writel(q, (seqid << QUADSPI_IPCR_SEQID_SHIFT) | len,
+			base + QUADSPI_IPCR);
 
 	/* Wait for the interrupt. */
 	if (!wait_for_completion_timeout(&q->c, msecs_to_jiffies(1000))) {
 		dev_err(q->dev,
 			"cmd 0x%.2x timeout, addr@%.8x, FR:0x%.8x, SR:0x%.8x\n",
-			cmd, addr, readl(base + QUADSPI_FR),
-			readl(base + QUADSPI_SR));
+			cmd, addr, qspi_readl(q, base + QUADSPI_FR),
+			qspi_readl(q, base + QUADSPI_SR));
 		err = -ETIMEDOUT;
 	} else {
 		err = 0;
 	}
 
 	/* restore the MCR */
-	writel(reg, base + QUADSPI_MCR);
+	qspi_writel(q, reg, base + QUADSPI_MCR);
 
 	return err;
 }
@@ -533,7 +576,7 @@ static void fsl_qspi_read_data(struct fsl_qspi *q, int len, u8 *rxbuf)
 	int i = 0;
 
 	while (len > 0) {
-		tmp = readl(q->iobase + QUADSPI_RBDR + i * 4);
+		tmp = qspi_readl(q, q->iobase + QUADSPI_RBDR + i * 4);
 		tmp = fsl_qspi_endian_xchg(q, tmp);
 		dev_dbg(q->dev, "chip addr:0x%.8x, rcv:0x%.8x\n",
 				q->chip_base_addr, tmp);
@@ -561,9 +604,9 @@ static inline void fsl_qspi_invalid(struct fsl_qspi *q)
 {
 	u32 reg;
 
-	reg = readl(q->iobase + QUADSPI_MCR);
+	reg = qspi_readl(q, q->iobase + QUADSPI_MCR);
 	reg |= QUADSPI_MCR_SWRSTHD_MASK | QUADSPI_MCR_SWRSTSD_MASK;
-	writel(reg, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, reg, q->iobase + QUADSPI_MCR);
 
 	/*
 	 * The minimum delay : 1 AHB + 2 SFCK clocks.
@@ -572,7 +615,7 @@ static inline void fsl_qspi_invalid(struct fsl_qspi *q)
 	udelay(1);
 
 	reg &= ~(QUADSPI_MCR_SWRSTHD_MASK | QUADSPI_MCR_SWRSTSD_MASK);
-	writel(reg, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, reg, q->iobase + QUADSPI_MCR);
 }
 
 static int fsl_qspi_nor_write(struct fsl_qspi *q, struct spi_nor *nor,
@@ -586,20 +629,20 @@ static int fsl_qspi_nor_write(struct fsl_qspi *q, struct spi_nor *nor,
 		q->chip_base_addr, to, count);
 
 	/* clear the TX FIFO. */
-	tmp = readl(q->iobase + QUADSPI_MCR);
-	writel(tmp | QUADSPI_MCR_CLR_TXF_MASK, q->iobase + QUADSPI_MCR);
+	tmp = qspi_readl(q, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, tmp | QUADSPI_MCR_CLR_TXF_MASK, q->iobase + QUADSPI_MCR);
 
 	/* fill the TX data to the FIFO */
 	for (j = 0, i = ((count + 3) / 4); j < i; j++) {
 		tmp = fsl_qspi_endian_xchg(q, *txbuf);
-		writel(tmp, q->iobase + QUADSPI_TBDR);
+		qspi_writel(q, tmp, q->iobase + QUADSPI_TBDR);
 		txbuf++;
 	}
 
 	/* fill the TXFIFO upto 16 bytes for i.MX7d */
 	if (needs_fill_txfifo(q))
 		for (; i < 4; i++)
-			writel(tmp, q->iobase + QUADSPI_TBDR);
+			qspi_writel(q, tmp, q->iobase + QUADSPI_TBDR);
 
 	/* Trigger it */
 	ret = fsl_qspi_runcmd(q, opcode, to, count);
@@ -615,10 +658,10 @@ static void fsl_qspi_set_map_addr(struct fsl_qspi *q)
 	int nor_size = q->nor_size;
 	void __iomem *base = q->iobase;
 
-	writel(nor_size + q->memmap_phy, base + QUADSPI_SFA1AD);
-	writel(nor_size * 2 + q->memmap_phy, base + QUADSPI_SFA2AD);
-	writel(nor_size * 3 + q->memmap_phy, base + QUADSPI_SFB1AD);
-	writel(nor_size * 4 + q->memmap_phy, base + QUADSPI_SFB2AD);
+	qspi_writel(q, nor_size + q->memmap_phy, base + QUADSPI_SFA1AD);
+	qspi_writel(q, nor_size * 2 + q->memmap_phy, base + QUADSPI_SFA2AD);
+	qspi_writel(q, nor_size * 3 + q->memmap_phy, base + QUADSPI_SFB1AD);
+	qspi_writel(q, nor_size * 4 + q->memmap_phy, base + QUADSPI_SFB2AD);
 }
 
 /*
@@ -640,24 +683,26 @@ static void fsl_qspi_init_abh_read(struct fsl_qspi *q)
 	int seqid;
 
 	/* AHB configuration for access buffer 0/1/2 .*/
-	writel(QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF0CR);
-	writel(QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF1CR);
-	writel(QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF2CR);
+	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF0CR);
+	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF1CR);
+	qspi_writel(q, QUADSPI_BUFXCR_INVALID_MSTRID, base + QUADSPI_BUF2CR);
 	/*
 	 * Set ADATSZ with the maximum AHB buffer size to improve the
 	 * read performance.
 	 */
-	writel(QUADSPI_BUF3CR_ALLMST_MASK | ((q->devtype_data->ahb_buf_size / 8)
-			<< QUADSPI_BUF3CR_ADATSZ_SHIFT), base + QUADSPI_BUF3CR);
+	qspi_writel(q, QUADSPI_BUF3CR_ALLMST_MASK |
+			((q->devtype_data->ahb_buf_size / 8)
+			<< QUADSPI_BUF3CR_ADATSZ_SHIFT),
+			base + QUADSPI_BUF3CR);
 
 	/* We only use the buffer3 */
-	writel(0, base + QUADSPI_BUF0IND);
-	writel(0, base + QUADSPI_BUF1IND);
-	writel(0, base + QUADSPI_BUF2IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF0IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF1IND);
+	qspi_writel(q, 0, base + QUADSPI_BUF2IND);
 
 	/* Set the default lut sequence for AHB Read. */
 	seqid = fsl_qspi_get_seqid(q, q->nor[0].read_opcode);
-	writel(seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
+	qspi_writel(q, seqid << QUADSPI_BFGENCR_SEQID_SHIFT,
 		q->iobase + QUADSPI_BFGENCR);
 }
 
@@ -713,7 +758,7 @@ static int fsl_qspi_nor_setup(struct fsl_qspi *q)
 		return ret;
 
 	/* Reset the module */
-	writel(QUADSPI_MCR_SWRSTSD_MASK | QUADSPI_MCR_SWRSTHD_MASK,
+	qspi_writel(q, QUADSPI_MCR_SWRSTSD_MASK | QUADSPI_MCR_SWRSTHD_MASK,
 		base + QUADSPI_MCR);
 	udelay(1);
 
@@ -721,24 +766,24 @@ static int fsl_qspi_nor_setup(struct fsl_qspi *q)
 	fsl_qspi_init_lut(q);
 
 	/* Disable the module */
-	writel(QUADSPI_MCR_MDIS_MASK | QUADSPI_MCR_RESERVED_MASK,
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK | QUADSPI_MCR_RESERVED_MASK,
 			base + QUADSPI_MCR);
 
-	reg = readl(base + QUADSPI_SMPR);
-	writel(reg & ~(QUADSPI_SMPR_FSDLY_MASK
+	reg = qspi_readl(q, base + QUADSPI_SMPR);
+	qspi_writel(q, reg & ~(QUADSPI_SMPR_FSDLY_MASK
 			| QUADSPI_SMPR_FSPHS_MASK
 			| QUADSPI_SMPR_HSENA_MASK
 			| QUADSPI_SMPR_DDRSMP_MASK), base + QUADSPI_SMPR);
 
 	/* Enable the module */
-	writel(QUADSPI_MCR_RESERVED_MASK | QUADSPI_MCR_END_CFG_MASK,
+	qspi_writel(q, QUADSPI_MCR_RESERVED_MASK | QUADSPI_MCR_END_CFG_MASK,
 			base + QUADSPI_MCR);
 
 	/* clear all interrupt status */
-	writel(0xffffffff, q->iobase + QUADSPI_FR);
+	qspi_writel(q, 0xffffffff, q->iobase + QUADSPI_FR);
 
 	/* enable the interrupt */
-	writel(QUADSPI_RSER_TFIE, q->iobase + QUADSPI_RSER);
+	qspi_writel(q, QUADSPI_RSER_TFIE, q->iobase + QUADSPI_RSER);
 
 	return 0;
 }
@@ -776,6 +821,7 @@ static const struct of_device_id fsl_qspi_dt_ids[] = {
 	{ .compatible = "fsl,imx6sx-qspi", .data = (void *)&imx6sx_data, },
 	{ .compatible = "fsl,imx7d-qspi", .data = (void *)&imx7d_data, },
 	{ .compatible = "fsl,imx6ul-qspi", .data = (void *)&imx6ul_data, },
+	{ .compatible = "fsl,ls1021a-qspi", .data = (void *)&ls1021a_data, },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, fsl_qspi_dt_ids);
@@ -954,6 +1000,7 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 	if (IS_ERR(q->iobase))
 		return PTR_ERR(q->iobase);
 
+	q->big_endian = of_property_read_bool(np, "big-endian");
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 					"QuadSPI-memory");
 	if (!devm_request_mem_region(dev, res->start, resource_size(res),
@@ -1101,8 +1148,8 @@ static int fsl_qspi_remove(struct platform_device *pdev)
 	}
 
 	/* disable the hardware */
-	writel(QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR);
-	writel(0x0, q->iobase + QUADSPI_RSER);
+	qspi_writel(q, QUADSPI_MCR_MDIS_MASK, q->iobase + QUADSPI_MCR);
+	qspi_writel(q, 0x0, q->iobase + QUADSPI_RSER);
 
 	mutex_destroy(&q->lock);
 
diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c b/drivers/mtd/spi-nor/mtk-quadspi.c
index d5f850d035bb..8bed1a4cb79c 100644
--- a/drivers/mtd/spi-nor/mtk-quadspi.c
+++ b/drivers/mtd/spi-nor/mtk-quadspi.c
@@ -371,8 +371,8 @@ static int mt8173_nor_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
 	return ret;
 }
 
-static int __init mtk_nor_init(struct mt8173_nor *mt8173_nor,
-			       struct device_node *flash_node)
+static int mtk_nor_init(struct mt8173_nor *mt8173_nor,
+			struct device_node *flash_node)
 {
 	int ret;
 	struct spi_nor *nor;
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index ed0c19c558b5..157841dc3e99 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -61,14 +61,20 @@ struct flash_info {
 	u16		addr_width;
 
 	u16		flags;
-#define	SECT_4K			0x01	/* SPINOR_OP_BE_4K works uniformly */
-#define	SPI_NOR_NO_ERASE	0x02	/* No erase command needed */
-#define	SST_WRITE		0x04	/* use SST byte programming */
-#define	SPI_NOR_NO_FR		0x08	/* Can't do fastread */
-#define	SECT_4K_PMC		0x10	/* SPINOR_OP_BE_4K_PMC works uniformly */
-#define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
-#define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
-#define	USE_FSR			0x80	/* use flag status register */
+#define SECT_4K			BIT(0)	/* SPINOR_OP_BE_4K works uniformly */
+#define SPI_NOR_NO_ERASE	BIT(1)	/* No erase command needed */
+#define SST_WRITE		BIT(2)	/* use SST byte programming */
+#define SPI_NOR_NO_FR		BIT(3)	/* Can't do fastread */
+#define SECT_4K_PMC		BIT(4)	/* SPINOR_OP_BE_4K_PMC works uniformly */
+#define SPI_NOR_DUAL_READ	BIT(5)	/* Flash supports Dual Read */
+#define SPI_NOR_QUAD_READ	BIT(6)	/* Flash supports Quad Read */
+#define USE_FSR			BIT(7)	/* use flag status register */
+#define SPI_NOR_HAS_LOCK	BIT(8)	/* Flash supports lock/unlock via SR */
+#define SPI_NOR_HAS_TB		BIT(9)	/*
+					 * Flash SR has Top/Bottom (TB) protect
+					 * bit. Must be used with
+					 * SPI_NOR_HAS_LOCK.
+					 */
 };
 
 #define JEDEC_MFR(info)	((info)->id[0])
@@ -434,32 +440,58 @@ static void stm_get_locked_range(struct spi_nor *nor, u8 sr, loff_t *ofs,
 	} else {
 		pow = ((sr & mask) ^ mask) >> shift;
 		*len = mtd->size >> pow;
-		*ofs = mtd->size - *len;
+		if (nor->flags & SNOR_F_HAS_SR_TB && sr & SR_TB)
+			*ofs = 0;
+		else
+			*ofs = mtd->size - *len;
 	}
 }
 
 /*
- * Return 1 if the entire region is locked, 0 otherwise
+ * Return 1 if the entire region is locked (if @locked is true) or unlocked (if
+ * @locked is false); 0 otherwise
  */
-static int stm_is_locked_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
-			    u8 sr)
+static int stm_check_lock_status_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
+				    u8 sr, bool locked)
 {
 	loff_t lock_offs;
 	uint64_t lock_len;
 
+	if (!len)
+		return 1;
+
 	stm_get_locked_range(nor, sr, &lock_offs, &lock_len);
 
-	return (ofs + len <= lock_offs + lock_len) && (ofs >= lock_offs);
+	if (locked)
+		/* Requested range is a sub-range of locked range */
+		return (ofs + len <= lock_offs + lock_len) && (ofs >= lock_offs);
+	else
+		/* Requested range does not overlap with locked range */
+		return (ofs >= lock_offs + lock_len) || (ofs + len <= lock_offs);
+}
+
+static int stm_is_locked_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
+			    u8 sr)
+{
+	return stm_check_lock_status_sr(nor, ofs, len, sr, true);
+}
+
+static int stm_is_unlocked_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
+			      u8 sr)
+{
+	return stm_check_lock_status_sr(nor, ofs, len, sr, false);
 }
 
 /*
  * Lock a region of the flash. Compatible with ST Micro and similar flash.
- * Supports only the block protection bits BP{0,1,2} in the status register
+ * Supports the block protection bits BP{0,1,2} in the status register
  * (SR). Does not support these features found in newer SR bitfields:
- *   - TB: top/bottom protect - only handle TB=0 (top protect)
  *   - SEC: sector/block protect - only handle SEC=0 (block protect)
  *   - CMP: complement protect - only support CMP=0 (range is not complemented)
  *
+ * Support for the following is provided conditionally for some flash:
+ *   - TB: top/bottom protect
+ *
  * Sample table portion for 8MB flash (Winbond w25q64fw):
  *
  *   SEC  |  TB   |  BP2  |  BP1  |  BP0  |  Prot Length  | Protected Portion
@@ -472,6 +504,13 @@ static int stm_is_locked_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
  *    0   |   0   |   1   |   0   |   1   |  2 MB         | Upper 1/4
  *    0   |   0   |   1   |   1   |   0   |  4 MB         | Upper 1/2
  *    X   |   X   |   1   |   1   |   1   |  8 MB         | ALL
+ *  ------|-------|-------|-------|-------|---------------|-------------------
+ *    0   |   1   |   0   |   0   |   1   |  128 KB       | Lower 1/64
+ *    0   |   1   |   0   |   1   |   0   |  256 KB       | Lower 1/32
+ *    0   |   1   |   0   |   1   |   1   |  512 KB       | Lower 1/16
+ *    0   |   1   |   1   |   0   |   0   |  1 MB         | Lower 1/8
+ *    0   |   1   |   1   |   0   |   1   |  2 MB         | Lower 1/4
+ *    0   |   1   |   1   |   1   |   0   |  4 MB         | Lower 1/2
  *
  * Returns negative on errors, 0 on success.
  */
@@ -481,20 +520,39 @@ static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	int status_old, status_new;
 	u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
 	u8 shift = ffs(mask) - 1, pow, val;
+	loff_t lock_len;
+	bool can_be_top = true, can_be_bottom = nor->flags & SNOR_F_HAS_SR_TB;
+	bool use_top;
 	int ret;
 
 	status_old = read_sr(nor);
 	if (status_old < 0)
 		return status_old;
 
-	/* SPI NOR always locks to the end */
-	if (ofs + len != mtd->size) {
-		/* Does combined region extend to end? */
-		if (!stm_is_locked_sr(nor, ofs + len, mtd->size - ofs - len,
-				      status_old))
-			return -EINVAL;
-		len = mtd->size - ofs;
-	}
+	/* If nothing in our range is unlocked, we don't need to do anything */
+	if (stm_is_locked_sr(nor, ofs, len, status_old))
+		return 0;
+
+	/* If anything below us is unlocked, we can't use 'bottom' protection */
+	if (!stm_is_locked_sr(nor, 0, ofs, status_old))
+		can_be_bottom = false;
+
+	/* If anything above us is unlocked, we can't use 'top' protection */
+	if (!stm_is_locked_sr(nor, ofs + len, mtd->size - (ofs + len),
+				status_old))
+		can_be_top = false;
+
+	if (!can_be_bottom && !can_be_top)
+		return -EINVAL;
+
+	/* Prefer top, if both are valid */
+	use_top = can_be_top;
+
+	/* lock_len: length of region that should end up locked */
+	if (use_top)
+		lock_len = mtd->size - ofs;
+	else
+		lock_len = ofs + len;
 
 	/*
 	 * Need smallest pow such that:
@@ -505,7 +563,7 @@ static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	 *
 	 *   pow = ceil(log2(size / len)) = log2(size) - floor(log2(len))
 	 */
-	pow = ilog2(mtd->size) - ilog2(len);
+	pow = ilog2(mtd->size) - ilog2(lock_len);
 	val = mask - (pow << shift);
 	if (val & ~mask)
 		return -EINVAL;
@@ -513,10 +571,20 @@ static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	if (!(val & mask))
 		return -EINVAL;
 
-	status_new = (status_old & ~mask) | val;
+	status_new = (status_old & ~mask & ~SR_TB) | val;
+
+	/* Disallow further writes if WP pin is asserted */
+	status_new |= SR_SRWD;
+
+	if (!use_top)
+		status_new |= SR_TB;
+
+	/* Don't bother if they're the same */
+	if (status_new == status_old)
+		return 0;
 
 	/* Only modify protection if it will not unlock other areas */
-	if ((status_new & mask) <= (status_old & mask))
+	if ((status_new & mask) < (status_old & mask))
 		return -EINVAL;
 
 	write_enable(nor);
@@ -537,17 +605,40 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	int status_old, status_new;
 	u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
 	u8 shift = ffs(mask) - 1, pow, val;
+	loff_t lock_len;
+	bool can_be_top = true, can_be_bottom = nor->flags & SNOR_F_HAS_SR_TB;
+	bool use_top;
 	int ret;
 
 	status_old = read_sr(nor);
 	if (status_old < 0)
 		return status_old;
 
-	/* Cannot unlock; would unlock larger region than requested */
-	if (stm_is_locked_sr(nor, ofs - mtd->erasesize, mtd->erasesize,
-			     status_old))
+	/* If nothing in our range is locked, we don't need to do anything */
+	if (stm_is_unlocked_sr(nor, ofs, len, status_old))
+		return 0;
+
+	/* If anything below us is locked, we can't use 'top' protection */
+	if (!stm_is_unlocked_sr(nor, 0, ofs, status_old))
+		can_be_top = false;
+
+	/* If anything above us is locked, we can't use 'bottom' protection */
+	if (!stm_is_unlocked_sr(nor, ofs + len, mtd->size - (ofs + len),
+				status_old))
+		can_be_bottom = false;
+
+	if (!can_be_bottom && !can_be_top)
 		return -EINVAL;
 
+	/* Prefer top, if both are valid */
+	use_top = can_be_top;
+
+	/* lock_len: length of region that should remain locked */
+	if (use_top)
+		lock_len = mtd->size - (ofs + len);
+	else
+		lock_len = ofs;
+
 	/*
 	 * Need largest pow such that:
 	 *
@@ -557,8 +648,8 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	 *
 	 *   pow = floor(log2(size / len)) = log2(size) - ceil(log2(len))
 	 */
-	pow = ilog2(mtd->size) - order_base_2(mtd->size - (ofs + len));
-	if (ofs + len == mtd->size) {
+	pow = ilog2(mtd->size) - order_base_2(lock_len);
+	if (lock_len == 0) {
 		val = 0; /* fully unlocked */
 	} else {
 		val = mask - (pow << shift);
@@ -567,10 +658,21 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 			return -EINVAL;
 	}
 
-	status_new = (status_old & ~mask) | val;
+	status_new = (status_old & ~mask & ~SR_TB) | val;
+
+	/* Don't protect status register if we're fully unlocked */
+	if (lock_len == mtd->size)
+		status_new &= ~SR_SRWD;
+
+	if (!use_top)
+		status_new |= SR_TB;
+
+	/* Don't bother if they're the same */
+	if (status_new == status_old)
+		return 0;
 
 	/* Only modify protection if it will not lock other areas */
-	if ((status_new & mask) >= (status_old & mask))
+	if ((status_new & mask) > (status_old & mask))
 		return -EINVAL;
 
 	write_enable(nor);
@@ -762,8 +864,8 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "n25q032a",	 INFO(0x20bb16, 0, 64 * 1024,   64, SPI_NOR_QUAD_READ) },
 	{ "n25q064",     INFO(0x20ba17, 0, 64 * 1024,  128, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q064a",    INFO(0x20bb17, 0, 64 * 1024,  128, SECT_4K | SPI_NOR_QUAD_READ) },
-	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, SPI_NOR_QUAD_READ) },
-	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, SPI_NOR_QUAD_READ) },
+	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
+	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K | SPI_NOR_QUAD_READ) },
 	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
 	{ "n25q512ax3",  INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) },
@@ -797,6 +899,7 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "s25fl008k",  INFO(0xef4014,      0,  64 * 1024,  16, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "s25fl016k",  INFO(0xef4015,      0,  64 * 1024,  32, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "s25fl064k",  INFO(0xef4017,      0,  64 * 1024, 128, SECT_4K) },
+	{ "s25fl116k",  INFO(0x014015,      0,  64 * 1024,  32, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
 	{ "s25fl132k",  INFO(0x014016,      0,  64 * 1024,  64, SECT_4K) },
 	{ "s25fl164k",  INFO(0x014017,      0,  64 * 1024, 128, SECT_4K) },
 	{ "s25fl204k",  INFO(0x014013,      0,  64 * 1024,   8, SECT_4K | SPI_NOR_DUAL_READ) },
@@ -860,11 +963,23 @@ static const struct flash_info spi_nor_ids[] = {
 	{ "w25x16", INFO(0xef3015, 0, 64 * 1024,  32, SECT_4K) },
 	{ "w25x32", INFO(0xef3016, 0, 64 * 1024,  64, SECT_4K) },
 	{ "w25q32", INFO(0xef4016, 0, 64 * 1024,  64, SECT_4K) },
-	{ "w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{
+		"w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64,
+			SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
+			SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB)
+	},
 	{ "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
 	{ "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) },
-	{ "w25q64dw", INFO(0xef6017, 0, 64 * 1024, 128, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
-	{ "w25q128fw", INFO(0xef6018, 0, 64 * 1024, 256, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{
+		"w25q64dw", INFO(0xef6017, 0, 64 * 1024, 128,
+			SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
+			SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB)
+	},
+	{
+		"w25q128fw", INFO(0xef6018, 0, 64 * 1024, 256,
+			SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
+			SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB)
+	},
 	{ "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
@@ -1100,45 +1215,6 @@ static int spansion_quad_enable(struct spi_nor *nor)
 	return 0;
 }
 
-static int micron_quad_enable(struct spi_nor *nor)
-{
-	int ret;
-	u8 val;
-
-	ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error %d reading EVCR\n", ret);
-		return ret;
-	}
-
-	write_enable(nor);
-
-	/* set EVCR, enable quad I/O */
-	nor->cmd_buf[0] = val & ~EVCR_QUAD_EN_MICRON;
-	ret = nor->write_reg(nor, SPINOR_OP_WD_EVCR, nor->cmd_buf, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error while writing EVCR register\n");
-		return ret;
-	}
-
-	ret = spi_nor_wait_till_ready(nor);
-	if (ret)
-		return ret;
-
-	/* read EVCR and check it */
-	ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error %d reading EVCR\n", ret);
-		return ret;
-	}
-	if (val & EVCR_QUAD_EN_MICRON) {
-		dev_err(nor->dev, "Micron EVCR Quad bit not clear\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info)
 {
 	int status;
@@ -1152,12 +1228,7 @@ static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info)
 		}
 		return status;
 	case SNOR_MFR_MICRON:
-		status = micron_quad_enable(nor);
-		if (status) {
-			dev_err(nor->dev, "Micron quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
+		return 0;
 	default:
 		status = spansion_quad_enable(nor);
 		if (status) {
@@ -1233,9 +1304,11 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 
 	if (JEDEC_MFR(info) == SNOR_MFR_ATMEL ||
 	    JEDEC_MFR(info) == SNOR_MFR_INTEL ||
-	    JEDEC_MFR(info) == SNOR_MFR_SST) {
+	    JEDEC_MFR(info) == SNOR_MFR_SST ||
+	    info->flags & SPI_NOR_HAS_LOCK) {
 		write_enable(nor);
 		write_sr(nor, 0);
+		spi_nor_wait_till_ready(nor);
 	}
 
 	if (!mtd->name)
@@ -1249,7 +1322,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	mtd->_read = spi_nor_read;
 
 	/* NOR protection support for STmicro/Micron chips and similar */
-	if (JEDEC_MFR(info) == SNOR_MFR_MICRON) {
+	if (JEDEC_MFR(info) == SNOR_MFR_MICRON ||
+			info->flags & SPI_NOR_HAS_LOCK) {
 		nor->flash_lock = stm_lock;
 		nor->flash_unlock = stm_unlock;
 		nor->flash_is_locked = stm_is_locked;
@@ -1269,6 +1343,8 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 
 	if (info->flags & USE_FSR)
 		nor->flags |= SNOR_F_USE_FSR;
+	if (info->flags & SPI_NOR_HAS_TB)
+		nor->flags |= SNOR_F_HAS_SR_TB;
 
 #ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
 	/* prefer "small sector" erase if possible */
diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c
index 31762120eb56..1cb3f7758fb6 100644
--- a/drivers/mtd/tests/oobtest.c
+++ b/drivers/mtd/tests/oobtest.c
@@ -215,19 +215,19 @@ static int verify_eraseblock(int ebnum)
 			pr_info("ignoring error as within bitflip_limit\n");
 		}
 
-		if (use_offset != 0 || use_len < mtd->ecclayout->oobavail) {
+		if (use_offset != 0 || use_len < mtd->oobavail) {
 			int k;
 
 			ops.mode      = MTD_OPS_AUTO_OOB;
 			ops.len       = 0;
 			ops.retlen    = 0;
-			ops.ooblen    = mtd->ecclayout->oobavail;
+			ops.ooblen    = mtd->oobavail;
 			ops.oobretlen = 0;
 			ops.ooboffs   = 0;
 			ops.datbuf    = NULL;
 			ops.oobbuf    = readbuf;
 			err = mtd_read_oob(mtd, addr, &ops);
-			if (err || ops.oobretlen != mtd->ecclayout->oobavail) {
+			if (err || ops.oobretlen != mtd->oobavail) {
 				pr_err("error: readoob failed at %#llx\n",
 						(long long)addr);
 				errcnt += 1;
@@ -244,7 +244,7 @@ static int verify_eraseblock(int ebnum)
 			/* verify post-(use_offset + use_len) area for 0xff */
 			k = use_offset + use_len;
 			bitflips += memffshow(addr, k, readbuf + k,
-					      mtd->ecclayout->oobavail - k);
+					      mtd->oobavail - k);
 
 			if (bitflips > bitflip_limit) {
 				pr_err("error: verify failed at %#llx\n",
@@ -269,8 +269,8 @@ static int verify_eraseblock_in_one_go(int ebnum)
 	struct mtd_oob_ops ops;
 	int err = 0;
 	loff_t addr = (loff_t)ebnum * mtd->erasesize;
-	size_t len = mtd->ecclayout->oobavail * pgcnt;
-	size_t oobavail = mtd->ecclayout->oobavail;
+	size_t len = mtd->oobavail * pgcnt;
+	size_t oobavail = mtd->oobavail;
 	size_t bitflips;
 	int i;
 
@@ -394,8 +394,8 @@ static int __init mtd_oobtest_init(void)
 		goto out;
 
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 0;
 
 	/* First test: write all OOB, read it back and verify */
@@ -460,8 +460,8 @@ static int __init mtd_oobtest_init(void)
 
 	/* Write all eraseblocks */
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 1;
 	prandom_seed_state(&rnd_state, 5);
 
@@ -471,8 +471,8 @@ static int __init mtd_oobtest_init(void)
 
 	/* Check all eraseblocks */
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 1;
 	prandom_seed_state(&rnd_state, 5);
 	err = verify_all_eraseblocks();
@@ -480,8 +480,8 @@ static int __init mtd_oobtest_init(void)
 		goto out;
 
 	use_offset = 0;
-	use_len = mtd->ecclayout->oobavail;
-	use_len_max = mtd->ecclayout->oobavail;
+	use_len = mtd->oobavail;
+	use_len_max = mtd->oobavail;
 	vary_offset = 0;
 
 	/* Fourth test: try to write off end of device */
@@ -501,7 +501,7 @@ static int __init mtd_oobtest_init(void)
 	ops.retlen    = 0;
 	ops.ooblen    = 1;
 	ops.oobretlen = 0;
-	ops.ooboffs   = mtd->ecclayout->oobavail;
+	ops.ooboffs   = mtd->oobavail;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = writebuf;
 	pr_info("attempting to start write past end of OOB\n");
@@ -521,7 +521,7 @@ static int __init mtd_oobtest_init(void)
 	ops.retlen    = 0;
 	ops.ooblen    = 1;
 	ops.oobretlen = 0;
-	ops.ooboffs   = mtd->ecclayout->oobavail;
+	ops.ooboffs   = mtd->oobavail;
 	ops.datbuf    = NULL;
 	ops.oobbuf    = readbuf;
 	pr_info("attempting to start read past end of OOB\n");
@@ -543,7 +543,7 @@ static int __init mtd_oobtest_init(void)
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail + 1;
+		ops.ooblen    = mtd->oobavail + 1;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
@@ -563,7 +563,7 @@ static int __init mtd_oobtest_init(void)
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail + 1;
+		ops.ooblen    = mtd->oobavail + 1;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
@@ -587,7 +587,7 @@ static int __init mtd_oobtest_init(void)
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail;
+		ops.ooblen    = mtd->oobavail;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 1;
 		ops.datbuf    = NULL;
@@ -607,7 +607,7 @@ static int __init mtd_oobtest_init(void)
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail;
+		ops.ooblen    = mtd->oobavail;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 1;
 		ops.datbuf    = NULL;
@@ -638,7 +638,7 @@ static int __init mtd_oobtest_init(void)
 	for (i = 0; i < ebcnt - 1; ++i) {
 		int cnt = 2;
 		int pg;
-		size_t sz = mtd->ecclayout->oobavail;
+		size_t sz = mtd->oobavail;
 		if (bbt[i] || bbt[i + 1])
 			continue;
 		addr = (loff_t)(i + 1) * mtd->erasesize - mtd->writesize;
@@ -673,13 +673,12 @@ static int __init mtd_oobtest_init(void)
 	for (i = 0; i < ebcnt - 1; ++i) {
 		if (bbt[i] || bbt[i + 1])
 			continue;
-		prandom_bytes_state(&rnd_state, writebuf,
-					mtd->ecclayout->oobavail * 2);
+		prandom_bytes_state(&rnd_state, writebuf, mtd->oobavail * 2);
 		addr = (loff_t)(i + 1) * mtd->erasesize - mtd->writesize;
 		ops.mode      = MTD_OPS_AUTO_OOB;
 		ops.len       = 0;
 		ops.retlen    = 0;
-		ops.ooblen    = mtd->ecclayout->oobavail * 2;
+		ops.ooblen    = mtd->oobavail * 2;
 		ops.oobretlen = 0;
 		ops.ooboffs   = 0;
 		ops.datbuf    = NULL;
@@ -688,7 +687,7 @@ static int __init mtd_oobtest_init(void)
 		if (err)
 			goto out;
 		if (memcmpshow(addr, readbuf, writebuf,
-			       mtd->ecclayout->oobavail * 2)) {
+			       mtd->oobavail * 2)) {
 			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
 			errcnt += 1;
diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c
index 2a45ac210b16..989036c681b8 100644
--- a/drivers/mtd/ubi/misc.c
+++ b/drivers/mtd/ubi/misc.c
@@ -153,3 +153,52 @@ int ubi_check_pattern(const void *buf, uint8_t patt, int size)
 			return 0;
 	return 1;
 }
+
+/* Normal UBI messages */
+void ubi_msg(const struct ubi_device *ubi, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_notice(UBI_NAME_STR "%d: %pV\n", ubi->ubi_num, &vaf);
+
+	va_end(args);
+}
+
+/* UBI warning messages */
+void ubi_warn(const struct ubi_device *ubi, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_warn(UBI_NAME_STR "%d warning: %ps: %pV\n",
+		ubi->ubi_num, __builtin_return_address(0), &vaf);
+
+	va_end(args);
+}
+
+/* UBI error messages */
+void ubi_err(const struct ubi_device *ubi, const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	pr_err(UBI_NAME_STR "%d error: %ps: %pV\n",
+	       ubi->ubi_num, __builtin_return_address(0), &vaf);
+	va_end(args);
+}
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 2974b67f6c6c..dadc6a9d5755 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -49,15 +49,19 @@
 /* UBI name used for character devices, sysfs, etc */
 #define UBI_NAME_STR "ubi"
 
+struct ubi_device;
+
 /* Normal UBI messages */
-#define ubi_msg(ubi, fmt, ...) pr_notice(UBI_NAME_STR "%d: " fmt "\n", \
-					 ubi->ubi_num, ##__VA_ARGS__)
+__printf(2, 3)
+void ubi_msg(const struct ubi_device *ubi, const char *fmt, ...);
+
 /* UBI warning messages */
-#define ubi_warn(ubi, fmt, ...) pr_warn(UBI_NAME_STR "%d warning: %s: " fmt "\n", \
-					ubi->ubi_num, __func__, ##__VA_ARGS__)
+__printf(2, 3)
+void ubi_warn(const struct ubi_device *ubi, const char *fmt, ...);
+
 /* UBI error messages */
-#define ubi_err(ubi, fmt, ...) pr_err(UBI_NAME_STR "%d error: %s: " fmt "\n", \
-				      ubi->ubi_num, __func__, ##__VA_ARGS__)
+__printf(2, 3)
+void ubi_err(const struct ubi_device *ubi, const char *fmt, ...);
 
 /* Background thread name pattern */
 #define UBI_BGT_NAME_PATTERN "ubi_bgt%dd"
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 3ce6095ced3d..6619178ed77b 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -2959,7 +2959,7 @@ static int macb_probe(struct platform_device *pdev)
 		int gpio = of_get_named_gpio(phy_node, "reset-gpios", 0);
 		if (gpio_is_valid(gpio))
 			bp->reset_gpio = gpio_to_desc(gpio);
-		gpiod_set_value(bp->reset_gpio, GPIOD_OUT_HIGH);
+		gpiod_direction_output(bp->reset_gpio, 1);
 	}
 	of_node_put(phy_node);
 
@@ -3029,7 +3029,7 @@ static int macb_remove(struct platform_device *pdev)
 		mdiobus_free(bp->mii_bus);
 
 		/* Shutdown the PHY if there is a GPIO reset */
-		gpiod_set_value(bp->reset_gpio, GPIOD_OUT_LOW);
+		gpiod_set_value(bp->reset_gpio, 0);
 
 		unregister_netdev(dev);
 		clk_disable_unprepare(bp->tx_clk);
diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index 4d187f22c48b..4686a85a8a22 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -96,6 +96,17 @@ config CHELSIO_T4_DCB
 
 	  If unsure, say N.
 
+config CHELSIO_T4_UWIRE
+	bool "Unified Wire Support for Chelsio T5 cards"
+	default n
+	depends on CHELSIO_T4
+	---help---
+	  Enable unified-wire offload features.
+	  Say Y here if you want to enable unified-wire over Ethernet
+	  in the driver.
+
+	  If unsure, say N.
+
 config CHELSIO_T4_FCOE
 	bool "Fibre Channel over Ethernet (FCoE) Support for Chelsio T5 cards"
 	default n
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index ace0ab98d0f1..85c92821b239 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -7,4 +7,5 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
+cxgb4-$(CONFIG_CHELSIO_T4_UWIRE) +=  cxgb4_ppm.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 1dac6c6111bf..984a3cc26f86 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -404,6 +404,9 @@ enum {
 	MAX_CTRL_QUEUES = NCHAN,      /* # of control Tx queues */
 	MAX_RDMA_QUEUES = NCHAN,      /* # of streaming RDMA Rx queues */
 	MAX_RDMA_CIQS = 32,        /* # of  RDMA concentrator IQs */
+
+	/* # of streaming iSCSIT Rx queues */
+	MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS,
 };
 
 enum {
@@ -420,8 +423,8 @@ enum {
 enum {
 	INGQ_EXTRAS = 2,        /* firmware event queue and */
 				/*   forwarded interrupts */
-	MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES
-		   + MAX_RDMA_CIQS + INGQ_EXTRAS,
+	MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES +
+		   MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS,
 };
 
 struct adapter;
@@ -508,6 +511,15 @@ struct pkt_gl {
 
 typedef int (*rspq_handler_t)(struct sge_rspq *q, const __be64 *rsp,
 			      const struct pkt_gl *gl);
+typedef void (*rspq_flush_handler_t)(struct sge_rspq *q);
+/* LRO related declarations for ULD */
+struct t4_lro_mgr {
+#define MAX_LRO_SESSIONS		64
+	u8 lro_session_cnt;         /* # of sessions to aggregate */
+	unsigned long lro_pkts;     /* # of LRO super packets */
+	unsigned long lro_merged;   /* # of wire packets merged by LRO */
+	struct sk_buff_head lroq;   /* list of aggregated sessions */
+};
 
 struct sge_rspq {                   /* state for an SGE response queue */
 	struct napi_struct napi;
@@ -532,6 +544,8 @@ struct sge_rspq {                   /* state for an SGE response queue */
 	struct adapter *adap;
 	struct net_device *netdev;  /* associated net device */
 	rspq_handler_t handler;
+	rspq_flush_handler_t flush_handler;
+	struct t4_lro_mgr lro_mgr;
 #ifdef CONFIG_NET_RX_BUSY_POLL
 #define CXGB_POLL_STATE_IDLE		0
 #define CXGB_POLL_STATE_NAPI		BIT(0) /* NAPI owns this poll */
@@ -641,6 +655,7 @@ struct sge {
 
 	struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
 	struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS];
+	struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES];
 	struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES];
 	struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS];
 	struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
@@ -652,9 +667,11 @@ struct sge {
 	u16 ethqsets;               /* # of active Ethernet queue sets */
 	u16 ethtxq_rover;           /* Tx queue to clean up next */
 	u16 iscsiqsets;              /* # of active iSCSI queue sets */
+	u16 niscsitq;               /* # of available iSCST Rx queues */
 	u16 rdmaqs;                 /* # of available RDMA Rx queues */
 	u16 rdmaciqs;               /* # of available RDMA concentrator IQs */
 	u16 iscsi_rxq[MAX_OFLD_QSETS];
+	u16 iscsit_rxq[MAX_ISCSIT_QUEUES];
 	u16 rdma_rxq[MAX_RDMA_QUEUES];
 	u16 rdma_ciq[MAX_RDMA_CIQS];
 	u16 timer_val[SGE_NTIMERS];
@@ -681,6 +698,7 @@ struct sge {
 
 #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++)
 #define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++)
+#define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++)
 #define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++)
 #define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++)
 
@@ -747,6 +765,8 @@ struct adapter {
 	struct list_head rcu_node;
 	struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */
 
+	void *iscsi_ppm;
+
 	struct tid_info tids;
 	void **tid_release_head;
 	spinlock_t tid_release_lock;
@@ -1113,7 +1133,8 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb);
 int t4_ofld_send(struct adapter *adap, struct sk_buff *skb);
 int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 		     struct net_device *dev, int intr_idx,
-		     struct sge_fl *fl, rspq_handler_t hnd, int cong);
+		     struct sge_fl *fl, rspq_handler_t hnd,
+		     rspq_flush_handler_t flush_handler, int cong);
 int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
 			 struct net_device *dev, struct netdev_queue *netdevq,
 			 unsigned int iqid);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index e6a4072b494b..0bb41e9b9b1c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2334,12 +2334,14 @@ static int sge_qinfo_show(struct seq_file *seq, void *v)
 	struct adapter *adap = seq->private;
 	int eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
 	int iscsi_entries = DIV_ROUND_UP(adap->sge.iscsiqsets, 4);
+	int iscsit_entries = DIV_ROUND_UP(adap->sge.niscsitq, 4);
 	int rdma_entries = DIV_ROUND_UP(adap->sge.rdmaqs, 4);
 	int ciq_entries = DIV_ROUND_UP(adap->sge.rdmaciqs, 4);
 	int ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
 	int i, r = (uintptr_t)v - 1;
 	int iscsi_idx = r - eth_entries;
-	int rdma_idx = iscsi_idx - iscsi_entries;
+	int iscsit_idx = iscsi_idx - iscsi_entries;
+	int rdma_idx = iscsit_idx - iscsit_entries;
 	int ciq_idx = rdma_idx - rdma_entries;
 	int ctrl_idx =  ciq_idx - ciq_entries;
 	int fq_idx =  ctrl_idx - ctrl_entries;
@@ -2453,6 +2455,35 @@ do { \
 		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
+	} else if (iscsit_idx < iscsit_entries) {
+		const struct sge_ofld_rxq *rx =
+			&adap->sge.iscsitrxq[iscsit_idx * 4];
+		int n = min(4, adap->sge.niscsitq - 4 * iscsit_idx);
+
+		S("QType:", "iSCSIT");
+		R("RspQ ID:", rspq.abs_id);
+		R("RspQ size:", rspq.size);
+		R("RspQE size:", rspq.iqe_len);
+		R("RspQ CIDX:", rspq.cidx);
+		R("RspQ Gen:", rspq.gen);
+		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
+		S3("u", "Intr pktcnt:",
+		   adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
+		R("FL ID:", fl.cntxt_id);
+		R("FL size:", fl.size - 8);
+		R("FL pend:", fl.pend_cred);
+		R("FL avail:", fl.avail);
+		R("FL PIDX:", fl.pidx);
+		R("FL CIDX:", fl.cidx);
+		RL("RxPackets:", stats.pkts);
+		RL("RxImmPkts:", stats.imm);
+		RL("RxNoMem:", stats.nomem);
+		RL("FLAllocErr:", fl.alloc_failed);
+		RL("FLLrgAlcErr:", fl.large_alloc_failed);
+		RL("FLMapErr:", fl.mapping_err);
+		RL("FLLow:", fl.low);
+		RL("FLStarving:", fl.starving);
+
 	} else if (rdma_idx < rdma_entries) {
 		const struct sge_ofld_rxq *rx =
 				&adap->sge.rdmarxq[rdma_idx * 4];
@@ -2543,6 +2574,7 @@ static int sge_queue_entries(const struct adapter *adap)
 {
 	return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
 	       DIV_ROUND_UP(adap->sge.iscsiqsets, 4) +
+	       DIV_ROUND_UP(adap->sge.niscsitq, 4) +
 	       DIV_ROUND_UP(adap->sge.rdmaqs, 4) +
 	       DIV_ROUND_UP(adap->sge.rdmaciqs, 4) +
 	       DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index adad73f7c8cd..d1e3f0997d6b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -227,7 +227,7 @@ static DEFINE_MUTEX(uld_mutex);
 static LIST_HEAD(adap_rcu_list);
 static DEFINE_SPINLOCK(adap_rcu_lock);
 static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
-static const char *uld_str[] = { "RDMA", "iSCSI" };
+static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" };
 
 static void link_report(struct net_device *dev)
 {
@@ -664,6 +664,13 @@ out:
 	return 0;
 }
 
+/* Flush the aggregated lro sessions */
+static void uldrx_flush_handler(struct sge_rspq *q)
+{
+	if (ulds[q->uld].lro_flush)
+		ulds[q->uld].lro_flush(&q->lro_mgr);
+}
+
 /**
  *	uldrx_handler - response queue handler for ULD queues
  *	@q: the response queue that received the packet
@@ -677,6 +684,7 @@ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
 			 const struct pkt_gl *gl)
 {
 	struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
+	int ret;
 
 	/* FW can send CPLs encapsulated in a CPL_FW4_MSG.
 	 */
@@ -684,10 +692,19 @@ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
 	    ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL)
 		rsp += 2;
 
-	if (ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], rsp, gl)) {
+	if (q->flush_handler)
+		ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld],
+						  rsp, gl, &q->lro_mgr,
+						  &q->napi);
+	else
+		ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld],
+					      rsp, gl);
+
+	if (ret) {
 		rxq->stats.nomem++;
 		return -1;
 	}
+
 	if (gl == NULL)
 		rxq->stats.imm++;
 	else if (gl == CXGB4_MSG_AN)
@@ -754,6 +771,10 @@ static void name_msix_vecs(struct adapter *adap)
 		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d",
 			 adap->port[0]->name, i);
 
+	for_each_iscsitrxq(&adap->sge, i)
+		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d",
+			 adap->port[0]->name, i);
+
 	for_each_rdmarxq(&adap->sge, i)
 		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
 			 adap->port[0]->name, i);
@@ -767,6 +788,7 @@ static int request_msix_queue_irqs(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
 	int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0;
+	int iscsitqidx = 0;
 	int msi_index = 2;
 
 	err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
@@ -792,6 +814,15 @@ static int request_msix_queue_irqs(struct adapter *adap)
 			goto unwind;
 		msi_index++;
 	}
+	for_each_iscsitrxq(s, iscsitqidx) {
+		err = request_irq(adap->msix_info[msi_index].vec,
+				  t4_sge_intr_msix, 0,
+				  adap->msix_info[msi_index].desc,
+				  &s->iscsitrxq[iscsitqidx].rspq);
+		if (err)
+			goto unwind;
+		msi_index++;
+	}
 	for_each_rdmarxq(s, rdmaqidx) {
 		err = request_irq(adap->msix_info[msi_index].vec,
 				  t4_sge_intr_msix, 0,
@@ -819,6 +850,9 @@ unwind:
 	while (--rdmaqidx >= 0)
 		free_irq(adap->msix_info[--msi_index].vec,
 			 &s->rdmarxq[rdmaqidx].rspq);
+	while (--iscsitqidx >= 0)
+		free_irq(adap->msix_info[--msi_index].vec,
+			 &s->iscsitrxq[iscsitqidx].rspq);
 	while (--iscsiqidx >= 0)
 		free_irq(adap->msix_info[--msi_index].vec,
 			 &s->iscsirxq[iscsiqidx].rspq);
@@ -840,6 +874,9 @@ static void free_msix_queue_irqs(struct adapter *adap)
 	for_each_iscsirxq(s, i)
 		free_irq(adap->msix_info[msi_index++].vec,
 			 &s->iscsirxq[i].rspq);
+	for_each_iscsitrxq(s, i)
+		free_irq(adap->msix_info[msi_index++].vec,
+			 &s->iscsitrxq[i].rspq);
 	for_each_rdmarxq(s, i)
 		free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq);
 	for_each_rdmaciq(s, i)
@@ -984,7 +1021,7 @@ static void enable_rx(struct adapter *adap)
 
 static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
 			   unsigned int nq, unsigned int per_chan, int msi_idx,
-			   u16 *ids)
+			   u16 *ids, bool lro)
 {
 	int i, err;
 
@@ -994,7 +1031,9 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
 		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
 				       adap->port[i / per_chan],
 				       msi_idx, q->fl.size ? &q->fl : NULL,
-				       uldrx_handler, 0);
+				       uldrx_handler,
+				       lro ? uldrx_flush_handler : NULL,
+				       0);
 		if (err)
 			return err;
 		memset(&q->stats, 0, sizeof(q->stats));
@@ -1024,7 +1063,7 @@ static int setup_sge_queues(struct adapter *adap)
 		msi_idx = 1;         /* vector 0 is for non-queue interrupts */
 	else {
 		err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
-				       NULL, NULL, -1);
+				       NULL, NULL, NULL, -1);
 		if (err)
 			return err;
 		msi_idx = -((int)s->intrq.abs_id + 1);
@@ -1044,7 +1083,7 @@ static int setup_sge_queues(struct adapter *adap)
 	 *    new/deleted queues.
 	 */
 	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
-			       msi_idx, NULL, fwevtq_handler, -1);
+			       msi_idx, NULL, fwevtq_handler, NULL, -1);
 	if (err) {
 freeout:	t4_free_sge_resources(adap);
 		return err;
@@ -1062,6 +1101,7 @@ freeout:	t4_free_sge_resources(adap);
 			err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
 					       msi_idx, &q->fl,
 					       t4_ethrx_handler,
+					       NULL,
 					       t4_get_mps_bg_map(adap,
 								 pi->tx_chan));
 			if (err)
@@ -1087,18 +1127,19 @@ freeout:	t4_free_sge_resources(adap);
 			goto freeout;
 	}
 
-#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids) do { \
-	err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids); \
+#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \
+	err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids, lro); \
 	if (err) \
 		goto freeout; \
 	if (msi_idx > 0) \
 		msi_idx += nq; \
 } while (0)
 
-	ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq);
-	ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq);
+	ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false);
+	ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true);
+	ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false);
 	j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */
-	ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq);
+	ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false);
 
 #undef ALLOC_OFLD_RXQS
 
@@ -2430,6 +2471,9 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
 	} else if (uld == CXGB4_ULD_ISCSI) {
 		lli.rxq_ids = adap->sge.iscsi_rxq;
 		lli.nrxq = adap->sge.iscsiqsets;
+	} else if (uld == CXGB4_ULD_ISCSIT) {
+		lli.rxq_ids = adap->sge.iscsit_rxq;
+		lli.nrxq = adap->sge.niscsitq;
 	}
 	lli.ntxq = adap->sge.iscsiqsets;
 	lli.nchan = adap->params.nports;
@@ -2437,6 +2481,10 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
 	lli.wr_cred = adap->params.ofldq_wr_cred;
 	lli.adapter_type = adap->params.chip;
 	lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
+	lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
+	lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
+	lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A);
+	lli.iscsi_ppm = &adap->iscsi_ppm;
 	lli.cclk_ps = 1000000000 / adap->params.vpd.cclk;
 	lli.udb_density = 1 << adap->params.sge.eq_qpp;
 	lli.ucq_density = 1 << adap->params.sge.iq_qpp;
@@ -4336,6 +4384,9 @@ static void cfg_queues(struct adapter *adap)
 		s->rdmaciqs = (s->rdmaciqs / adap->params.nports) *
 				adap->params.nports;
 		s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports);
+
+		if (!is_t4(adap->params.chip))
+			s->niscsitq = s->iscsiqsets;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
@@ -4362,6 +4413,16 @@ static void cfg_queues(struct adapter *adap)
 		r->fl.size = 72;
 	}
 
+	if (!is_t4(adap->params.chip)) {
+		for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) {
+			struct sge_ofld_rxq *r = &s->iscsitrxq[i];
+
+			init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
+			r->rspq.uld = CXGB4_ULD_ISCSIT;
+			r->fl.size = 72;
+		}
+	}
+
 	for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
 		struct sge_ofld_rxq *r = &s->rdmarxq[i];
 
@@ -4436,9 +4497,13 @@ static int enable_msix(struct adapter *adap)
 
 	want = s->max_ethqsets + EXTRA_VECS;
 	if (is_offload(adap)) {
-		want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets;
+		want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets	+
+			s->niscsitq;
 		/* need nchan for each possible ULD */
-		ofld_need = 3 * nchan;
+		if (is_t4(adap->params.chip))
+			ofld_need = 3 * nchan;
+		else
+			ofld_need = 4 * nchan;
 	}
 #ifdef CONFIG_CHELSIO_T4_DCB
 	/* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
@@ -4470,12 +4535,16 @@ static int enable_msix(struct adapter *adap)
 		if (allocated < want) {
 			s->rdmaqs = nchan;
 			s->rdmaciqs = nchan;
+
+			if (!is_t4(adap->params.chip))
+				s->niscsitq = nchan;
 		}
 
 		/* leftovers go to OFLD */
 		i = allocated - EXTRA_VECS - s->max_ethqsets -
-		    s->rdmaqs - s->rdmaciqs;
+		    s->rdmaqs - s->rdmaciqs - s->niscsitq;
 		s->iscsiqsets = (i / nchan) * nchan;  /* round down */
+
 	}
 	for (i = 0; i < allocated; ++i)
 		adap->msix_info[i].vec = entries[i].vector;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.c
new file mode 100644
index 000000000000..d88a7a7b2400
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.c
@@ -0,0 +1,464 @@
+/*
+ * cxgb4_ppm.c: Chelsio common library for T4/T5 iSCSI PagePod Manager
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Karen Xie (kxie@chelsio.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+
+#include "cxgb4_ppm.h"
+
+/* Direct Data Placement -
+ * Directly place the iSCSI Data-In or Data-Out PDU's payload into
+ * pre-posted final destination host-memory buffers based on the
+ * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT)
+ * in Data-Out PDUs. The host memory address is programmed into
+ * h/w in the format of pagepod entries. The location of the
+ * pagepod entry is encoded into ddp tag which is used as the base
+ * for ITT/TTT.
+ */
+
+/* Direct-Data Placement page size adjustment
+ */
+int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz)
+{
+	struct cxgbi_tag_format *tformat = &ppm->tformat;
+	int i;
+
+	for (i = 0; i < DDP_PGIDX_MAX; i++) {
+		if (pgsz == 1UL << (DDP_PGSZ_BASE_SHIFT +
+					 tformat->pgsz_order[i])) {
+			pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n",
+				 __func__, ppm->ndev->name, pgsz, i);
+			return i;
+		}
+	}
+	pr_info("ippm: ddp page size %lu not supported.\n", pgsz);
+	return DDP_PGIDX_MAX;
+}
+
+/* DDP setup & teardown
+ */
+static int ppm_find_unused_entries(unsigned long *bmap,
+				   unsigned int max_ppods,
+				   unsigned int start,
+				   unsigned int nr,
+				   unsigned int align_mask)
+{
+	unsigned long i;
+
+	i = bitmap_find_next_zero_area(bmap, max_ppods, start, nr, align_mask);
+
+	if (unlikely(i >= max_ppods) && (start > nr))
+		i = bitmap_find_next_zero_area(bmap, max_ppods, 0, start - 1,
+					       align_mask);
+	if (unlikely(i >= max_ppods))
+		return -ENOSPC;
+
+	bitmap_set(bmap, i, nr);
+	return (int)i;
+}
+
+static void ppm_mark_entries(struct cxgbi_ppm *ppm, int i, int count,
+			     unsigned long caller_data)
+{
+	struct cxgbi_ppod_data *pdata = ppm->ppod_data + i;
+
+	pdata->caller_data = caller_data;
+	pdata->npods = count;
+
+	if (pdata->color == ((1 << PPOD_IDX_SHIFT) - 1))
+		pdata->color = 0;
+	else
+		pdata->color++;
+}
+
+static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count,
+			       unsigned long caller_data)
+{
+	struct cxgbi_ppm_pool *pool;
+	unsigned int cpu;
+	int i;
+
+	cpu = get_cpu();
+	pool = per_cpu_ptr(ppm->pool, cpu);
+	spin_lock_bh(&pool->lock);
+	put_cpu();
+
+	i = ppm_find_unused_entries(pool->bmap, ppm->pool_index_max,
+				    pool->next, count, 0);
+	if (i < 0) {
+		pool->next = 0;
+		spin_unlock_bh(&pool->lock);
+		return -ENOSPC;
+	}
+
+	pool->next = i + count;
+	if (pool->next >= ppm->pool_index_max)
+		pool->next = 0;
+
+	spin_unlock_bh(&pool->lock);
+
+	pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n",
+		 __func__, cpu, i, count, i + cpu * ppm->pool_index_max,
+		pool->next);
+
+	i += cpu * ppm->pool_index_max;
+	ppm_mark_entries(ppm, i, count, caller_data);
+
+	return i;
+}
+
+static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count,
+			   unsigned long caller_data)
+{
+	int i;
+
+	spin_lock_bh(&ppm->map_lock);
+	i = ppm_find_unused_entries(ppm->ppod_bmap, ppm->bmap_index_max,
+				    ppm->next, count, 0);
+	if (i < 0) {
+		ppm->next = 0;
+		spin_unlock_bh(&ppm->map_lock);
+		pr_debug("ippm: NO suitable entries %u available.\n",
+			 count);
+		return -ENOSPC;
+	}
+
+	ppm->next = i + count;
+	if (ppm->next >= ppm->bmap_index_max)
+		ppm->next = 0;
+
+	spin_unlock_bh(&ppm->map_lock);
+
+	pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n",
+		 __func__, i, count, i + ppm->pool_rsvd, ppm->next,
+		 caller_data);
+
+	i += ppm->pool_rsvd;
+	ppm_mark_entries(ppm, i, count, caller_data);
+
+	return i;
+}
+
+static void ppm_unmark_entries(struct cxgbi_ppm *ppm, int i, int count)
+{
+	pr_debug("%s: idx %d + %d.\n", __func__, i, count);
+
+	if (i < ppm->pool_rsvd) {
+		unsigned int cpu;
+		struct cxgbi_ppm_pool *pool;
+
+		cpu = i / ppm->pool_index_max;
+		i %= ppm->pool_index_max;
+
+		pool = per_cpu_ptr(ppm->pool, cpu);
+		spin_lock_bh(&pool->lock);
+		bitmap_clear(pool->bmap, i, count);
+
+		if (i < pool->next)
+			pool->next = i;
+		spin_unlock_bh(&pool->lock);
+
+		pr_debug("%s: cpu %u, idx %d, next %u.\n",
+			 __func__, cpu, i, pool->next);
+	} else {
+		spin_lock_bh(&ppm->map_lock);
+
+		i -= ppm->pool_rsvd;
+		bitmap_clear(ppm->ppod_bmap, i, count);
+
+		if (i < ppm->next)
+			ppm->next = i;
+		spin_unlock_bh(&ppm->map_lock);
+
+		pr_debug("%s: idx %d, next %u.\n", __func__, i, ppm->next);
+	}
+}
+
+void cxgbi_ppm_ppod_release(struct cxgbi_ppm *ppm, u32 idx)
+{
+	struct cxgbi_ppod_data *pdata;
+
+	if (idx >= ppm->ppmax) {
+		pr_warn("ippm: idx too big %u > %u.\n", idx, ppm->ppmax);
+		return;
+	}
+
+	pdata = ppm->ppod_data + idx;
+	if (!pdata->npods) {
+		pr_warn("ippm: idx %u, npods 0.\n", idx);
+		return;
+	}
+
+	pr_debug("release idx %u, npods %u.\n", idx, pdata->npods);
+	ppm_unmark_entries(ppm, idx, pdata->npods);
+}
+EXPORT_SYMBOL(cxgbi_ppm_ppod_release);
+
+int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *ppm, unsigned short nr_pages,
+			    u32 per_tag_pg_idx, u32 *ppod_idx,
+			    u32 *ddp_tag, unsigned long caller_data)
+{
+	struct cxgbi_ppod_data *pdata;
+	unsigned int npods;
+	int idx = -1;
+	unsigned int hwidx;
+	u32 tag;
+
+	npods = (nr_pages + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
+	if (!npods) {
+		pr_warn("%s: pages %u -> npods %u, full.\n",
+			__func__, nr_pages, npods);
+		return -EINVAL;
+	}
+
+	/* grab from cpu pool first */
+	idx = ppm_get_cpu_entries(ppm, npods, caller_data);
+	/* try the general pool */
+	if (idx < 0)
+		idx = ppm_get_entries(ppm, npods, caller_data);
+	if (idx < 0) {
+		pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n",
+			 nr_pages, npods, ppm->next, caller_data);
+		return idx;
+	}
+
+	pdata = ppm->ppod_data + idx;
+	hwidx = ppm->base_idx + idx;
+
+	tag = cxgbi_ppm_make_ddp_tag(hwidx, pdata->color);
+
+	if (per_tag_pg_idx)
+		tag |= (per_tag_pg_idx << 30) & 0xC0000000;
+
+	*ppod_idx = idx;
+	*ddp_tag = tag;
+
+	pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n",
+		 nr_pages, tag, idx, npods, caller_data);
+
+	return npods;
+}
+EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve);
+
+void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag,
+			     unsigned int tid, unsigned int offset,
+			     unsigned int length,
+			     struct cxgbi_pagepod_hdr *hdr)
+{
+	/* The ddp tag in pagepod should be with bit 31:30 set to 0.
+	 * The ddp Tag on the wire should be with non-zero 31:30 to the peer
+	 */
+	tag &= 0x3FFFFFFF;
+
+	hdr->vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid));
+
+	hdr->rsvd = 0;
+	hdr->pgsz_tag_clr = htonl(tag & ppm->tformat.idx_clr_mask);
+	hdr->max_offset = htonl(length);
+	hdr->page_offset = htonl(offset);
+
+	pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n",
+		 tag, tid, length, offset);
+}
+EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr);
+
+static void ppm_free(struct cxgbi_ppm *ppm)
+{
+	vfree(ppm);
+}
+
+static void ppm_destroy(struct kref *kref)
+{
+	struct cxgbi_ppm *ppm = container_of(kref,
+					     struct cxgbi_ppm,
+					     refcnt);
+	pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n",
+		ppm->ndev->name, ppm);
+
+	*ppm->ppm_pp = NULL;
+
+	free_percpu(ppm->pool);
+	ppm_free(ppm);
+}
+
+int cxgbi_ppm_release(struct cxgbi_ppm *ppm)
+{
+	if (ppm) {
+		int rv;
+
+		rv = kref_put(&ppm->refcnt, ppm_destroy);
+		return rv;
+	}
+	return 1;
+}
+
+static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total,
+						 unsigned int *pcpu_ppmax)
+{
+	struct cxgbi_ppm_pool *pools;
+	unsigned int ppmax = (*total) / num_possible_cpus();
+	unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3;
+	unsigned int bmap;
+	unsigned int alloc_sz;
+	unsigned int count = 0;
+	unsigned int cpu;
+
+	/* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */
+	if (ppmax > max)
+		ppmax = max;
+
+	/* pool size must be multiple of unsigned long */
+	bmap = BITS_TO_LONGS(ppmax);
+	ppmax = (bmap * sizeof(unsigned long)) << 3;
+
+	alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap;
+	pools = __alloc_percpu(alloc_sz, __alignof__(struct cxgbi_ppm_pool));
+
+	if (!pools)
+		return NULL;
+
+	for_each_possible_cpu(cpu) {
+		struct cxgbi_ppm_pool *ppool = per_cpu_ptr(pools, cpu);
+
+		memset(ppool, 0, alloc_sz);
+		spin_lock_init(&ppool->lock);
+		count += ppmax;
+	}
+
+	*total = count;
+	*pcpu_ppmax = ppmax;
+
+	return pools;
+}
+
+int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
+		   struct pci_dev *pdev, void *lldev,
+		   struct cxgbi_tag_format *tformat,
+		   unsigned int ppmax,
+		   unsigned int llimit,
+		   unsigned int start,
+		   unsigned int reserve_factor)
+{
+	struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp);
+	struct cxgbi_ppm_pool *pool = NULL;
+	unsigned int ppmax_pool = 0;
+	unsigned int pool_index_max = 0;
+	unsigned int alloc_sz;
+	unsigned int ppod_bmap_size;
+
+	if (ppm) {
+		pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
+			ndev->name, ppm_pp, ppm, ppm->ppmax, ppmax);
+		kref_get(&ppm->refcnt);
+		return 1;
+	}
+
+	if (reserve_factor) {
+		ppmax_pool = ppmax / reserve_factor;
+		pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max);
+
+		pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
+			 ndev->name, ppmax, ppmax_pool, pool_index_max);
+	}
+
+	ppod_bmap_size = BITS_TO_LONGS(ppmax - ppmax_pool);
+	alloc_sz = sizeof(struct cxgbi_ppm) +
+			ppmax * (sizeof(struct cxgbi_ppod_data)) +
+			ppod_bmap_size * sizeof(unsigned long);
+
+	ppm = vmalloc(alloc_sz);
+	if (!ppm)
+		goto release_ppm_pool;
+
+	memset(ppm, 0, alloc_sz);
+
+	ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]);
+
+	if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) {
+		unsigned int start = ppmax - ppmax_pool;
+		unsigned int end = ppod_bmap_size >> 3;
+
+		bitmap_set(ppm->ppod_bmap, ppmax, end - start);
+		pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n",
+			__func__, ppmax, ppmax_pool, ppod_bmap_size, start,
+			end);
+	}
+
+	spin_lock_init(&ppm->map_lock);
+	kref_init(&ppm->refcnt);
+
+	memcpy(&ppm->tformat, tformat, sizeof(struct cxgbi_tag_format));
+
+	ppm->ppm_pp = ppm_pp;
+	ppm->ndev = ndev;
+	ppm->pdev = pdev;
+	ppm->lldev = lldev;
+	ppm->ppmax = ppmax;
+	ppm->next = 0;
+	ppm->llimit = llimit;
+	ppm->base_idx = start > llimit ?
+			(start - llimit + 1) >> PPOD_SIZE_SHIFT : 0;
+	ppm->bmap_index_max = ppmax - ppmax_pool;
+
+	ppm->pool = pool;
+	ppm->pool_rsvd = ppmax_pool;
+	ppm->pool_index_max = pool_index_max;
+
+	/* check one more time */
+	if (*ppm_pp) {
+		ppm_free(ppm);
+		ppm = (struct cxgbi_ppm *)(*ppm_pp);
+
+		pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
+			ndev->name, ppm_pp, *ppm_pp, ppm->ppmax, ppmax);
+
+		kref_get(&ppm->refcnt);
+		return 1;
+	}
+	*ppm_pp = ppm;
+
+	ppm->tformat.pgsz_idx_dflt = cxgbi_ppm_find_page_index(ppm, PAGE_SIZE);
+
+	pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n",
+		ndev->name, ppm_pp, ppm, ppm->base_idx, ppm->ppmax, PAGE_SIZE,
+		ppm->tformat.pgsz_idx_dflt, ppm->pool_rsvd,
+		ppm->pool_index_max);
+
+	return 0;
+
+release_ppm_pool:
+	free_percpu(pool);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(cxgbi_ppm_init);
+
+unsigned int cxgbi_tagmask_set(unsigned int ppmax)
+{
+	unsigned int bits = fls(ppmax);
+
+	if (bits > PPOD_IDX_MAX_SIZE)
+		bits = PPOD_IDX_MAX_SIZE;
+
+	pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n",
+		ppmax, ppmax, bits, 1 << (bits + PPOD_IDX_SHIFT));
+
+	return 1 << (bits + PPOD_IDX_SHIFT);
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.h
new file mode 100644
index 000000000000..d48732673b75
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ppm.h
@@ -0,0 +1,310 @@
+/*
+ * cxgb4_ppm.h: Chelsio common library for T4/T5 iSCSI ddp operation
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Written by: Karen Xie (kxie@chelsio.com)
+ */
+
+#ifndef	__CXGB4PPM_H__
+#define	__CXGB4PPM_H__
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
+#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
+
+struct cxgbi_pagepod_hdr {
+	u32 vld_tid;
+	u32 pgsz_tag_clr;
+	u32 max_offset;
+	u32 page_offset;
+	u64 rsvd;
+};
+
+#define PPOD_PAGES_MAX			4
+struct cxgbi_pagepod {
+	struct cxgbi_pagepod_hdr hdr;
+	u64 addr[PPOD_PAGES_MAX + 1];
+};
+
+/* ddp tag format
+ * for a 32-bit tag:
+ * bit #
+ * 31 .....   .....  0
+ *     X   Y...Y Z...Z, where
+ *     ^   ^^^^^ ^^^^
+ *     |   |      |____ when ddp bit = 0: color bits
+ *     |   |
+ *     |   |____ when ddp bit = 0: idx into the ddp memory region
+ *     |
+ *     |____ ddp bit: 0 - ddp tag, 1 - non-ddp tag
+ *
+ *  [page selector:2] [sw/free bits] [0] [idx] [color:6]
+ */
+
+#define DDP_PGIDX_MAX		4
+#define DDP_PGSZ_BASE_SHIFT	12	/* base page 4K */
+
+struct cxgbi_task_tag_info {
+	unsigned char flags;
+#define CXGBI_PPOD_INFO_FLAG_VALID	0x1
+#define CXGBI_PPOD_INFO_FLAG_MAPPED	0x2
+	unsigned char cid;
+	unsigned short pg_shift;
+	unsigned int npods;
+	unsigned int idx;
+	unsigned int tag;
+	struct cxgbi_pagepod_hdr hdr;
+	int nents;
+	int nr_pages;
+	struct scatterlist *sgl;
+};
+
+struct cxgbi_tag_format {
+	unsigned char pgsz_order[DDP_PGIDX_MAX];
+	unsigned char pgsz_idx_dflt;
+	unsigned char free_bits:4;
+	unsigned char color_bits:4;
+	unsigned char idx_bits;
+	unsigned char rsvd_bits;
+	unsigned int  no_ddp_mask;
+	unsigned int  idx_mask;
+	unsigned int  color_mask;
+	unsigned int  idx_clr_mask;
+	unsigned int  rsvd_mask;
+};
+
+struct cxgbi_ppod_data {
+	unsigned char pg_idx:2;
+	unsigned char color:6;
+	unsigned char chan_id;
+	unsigned short npods;
+	unsigned long caller_data;
+};
+
+/* per cpu ppm pool */
+struct cxgbi_ppm_pool {
+	unsigned int base;		/* base index */
+	unsigned int next;		/* next possible free index */
+	spinlock_t lock;		/* ppm pool lock */
+	unsigned long bmap[0];
+} ____cacheline_aligned_in_smp;
+
+struct cxgbi_ppm {
+	struct kref refcnt;
+	struct net_device *ndev;	/* net_device, 1st port */
+	struct pci_dev *pdev;
+	void *lldev;
+	void **ppm_pp;
+	struct cxgbi_tag_format tformat;
+	unsigned int ppmax;
+	unsigned int llimit;
+	unsigned int base_idx;
+
+	unsigned int pool_rsvd;
+	unsigned int pool_index_max;
+	struct cxgbi_ppm_pool __percpu *pool;
+	/* map lock */
+	spinlock_t map_lock;		/* ppm map lock */
+	unsigned int bmap_index_max;
+	unsigned int next;
+	unsigned long *ppod_bmap;
+	struct cxgbi_ppod_data ppod_data[0];
+};
+
+#define DDP_THRESHOLD		512
+
+#define PPOD_PAGES_SHIFT	2       /*  4 pages per pod */
+
+#define IPPOD_SIZE               sizeof(struct cxgbi_pagepod)  /*  64 */
+#define PPOD_SIZE_SHIFT         6
+
+/* page pods are allocated in groups of this size (must be power of 2) */
+#define PPOD_CLUSTER_SIZE	16U
+
+#define ULPMEM_DSGL_MAX_NPPODS	16	/*  1024/PPOD_SIZE */
+#define ULPMEM_IDATA_MAX_NPPODS	3	/* (PPOD_SIZE * 3 + ulptx hdr) < 256B */
+#define PCIE_MEMWIN_MAX_NPPODS	16	/*  1024/PPOD_SIZE */
+
+#define PPOD_COLOR_SHIFT	0
+#define PPOD_COLOR(x)		((x) << PPOD_COLOR_SHIFT)
+
+#define PPOD_IDX_SHIFT          6
+#define PPOD_IDX_MAX_SIZE       24
+
+#define PPOD_TID_SHIFT		0
+#define PPOD_TID(x)		((x) << PPOD_TID_SHIFT)
+
+#define PPOD_TAG_SHIFT		6
+#define PPOD_TAG(x)		((x) << PPOD_TAG_SHIFT)
+
+#define PPOD_VALID_SHIFT	24
+#define PPOD_VALID(x)		((x) << PPOD_VALID_SHIFT)
+#define PPOD_VALID_FLAG		PPOD_VALID(1U)
+
+#define PPOD_PI_EXTRACT_CTL_SHIFT	31
+#define PPOD_PI_EXTRACT_CTL(x)		((x) << PPOD_PI_EXTRACT_CTL_SHIFT)
+#define PPOD_PI_EXTRACT_CTL_FLAG	V_PPOD_PI_EXTRACT_CTL(1U)
+
+#define PPOD_PI_TYPE_SHIFT		29
+#define PPOD_PI_TYPE_MASK		0x3
+#define PPOD_PI_TYPE(x)			((x) << PPOD_PI_TYPE_SHIFT)
+
+#define PPOD_PI_CHECK_CTL_SHIFT		27
+#define PPOD_PI_CHECK_CTL_MASK		0x3
+#define PPOD_PI_CHECK_CTL(x)		((x) << PPOD_PI_CHECK_CTL_SHIFT)
+
+#define PPOD_PI_REPORT_CTL_SHIFT	25
+#define PPOD_PI_REPORT_CTL_MASK		0x3
+#define PPOD_PI_REPORT_CTL(x)		((x) << PPOD_PI_REPORT_CTL_SHIFT)
+
+static inline int cxgbi_ppm_is_ddp_tag(struct cxgbi_ppm *ppm, u32 tag)
+{
+	return !(tag & ppm->tformat.no_ddp_mask);
+}
+
+static inline int cxgbi_ppm_sw_tag_is_usable(struct cxgbi_ppm *ppm,
+					     u32 tag)
+{
+	/* the sw tag must be using <= 31 bits */
+	return !(tag & 0x80000000U);
+}
+
+static inline int cxgbi_ppm_make_non_ddp_tag(struct cxgbi_ppm *ppm,
+					     u32 sw_tag,
+					     u32 *final_tag)
+{
+	struct cxgbi_tag_format *tformat = &ppm->tformat;
+
+	if (!cxgbi_ppm_sw_tag_is_usable(ppm, sw_tag)) {
+		pr_info("sw_tag 0x%x NOT usable.\n", sw_tag);
+		return -EINVAL;
+	}
+
+	if (!sw_tag) {
+		*final_tag = tformat->no_ddp_mask;
+	} else {
+		unsigned int shift = tformat->idx_bits + tformat->color_bits;
+		u32 lower = sw_tag & tformat->idx_clr_mask;
+		u32 upper = (sw_tag >> shift) << (shift + 1);
+
+		*final_tag = upper | tformat->no_ddp_mask | lower;
+	}
+	return 0;
+}
+
+static inline u32 cxgbi_ppm_decode_non_ddp_tag(struct cxgbi_ppm *ppm,
+					       u32 tag)
+{
+	struct cxgbi_tag_format *tformat = &ppm->tformat;
+	unsigned int shift = tformat->idx_bits + tformat->color_bits;
+	u32 lower = tag & tformat->idx_clr_mask;
+	u32 upper = (tag >> tformat->rsvd_bits) << shift;
+
+	return upper | lower;
+}
+
+static inline u32 cxgbi_ppm_ddp_tag_get_idx(struct cxgbi_ppm *ppm,
+					    u32 ddp_tag)
+{
+	u32 hw_idx = (ddp_tag >> PPOD_IDX_SHIFT) &
+			ppm->tformat.idx_mask;
+
+	return hw_idx - ppm->base_idx;
+}
+
+static inline u32 cxgbi_ppm_make_ddp_tag(unsigned int hw_idx,
+					 unsigned char color)
+{
+	return (hw_idx << PPOD_IDX_SHIFT) | ((u32)color);
+}
+
+static inline unsigned long
+cxgbi_ppm_get_tag_caller_data(struct cxgbi_ppm *ppm,
+			      u32 ddp_tag)
+{
+	u32 idx = cxgbi_ppm_ddp_tag_get_idx(ppm, ddp_tag);
+
+	return ppm->ppod_data[idx].caller_data;
+}
+
+/* sw bits are the free bits */
+static inline int cxgbi_ppm_ddp_tag_update_sw_bits(struct cxgbi_ppm *ppm,
+						   u32 val, u32 orig_tag,
+						   u32 *final_tag)
+{
+	struct cxgbi_tag_format *tformat = &ppm->tformat;
+	u32 v = val >> tformat->free_bits;
+
+	if (v) {
+		pr_info("sw_bits 0x%x too large, avail bits %u.\n",
+			val, tformat->free_bits);
+		return -EINVAL;
+	}
+	if (!cxgbi_ppm_is_ddp_tag(ppm, orig_tag))
+		return -EINVAL;
+
+	*final_tag = (val << tformat->rsvd_bits) |
+		     (orig_tag & ppm->tformat.rsvd_mask);
+	return 0;
+}
+
+static inline void cxgbi_ppm_ppod_clear(struct cxgbi_pagepod *ppod)
+{
+	ppod->hdr.vld_tid = 0U;
+}
+
+static inline void cxgbi_tagmask_check(unsigned int tagmask,
+				       struct cxgbi_tag_format *tformat)
+{
+	unsigned int bits = fls(tagmask);
+
+	/* reserve top most 2 bits for page selector */
+	tformat->free_bits = 32 - 2 - bits;
+	tformat->rsvd_bits = bits;
+	tformat->color_bits = PPOD_IDX_SHIFT;
+	tformat->idx_bits = bits - 1 - PPOD_IDX_SHIFT;
+	tformat->no_ddp_mask = 1 << (bits - 1);
+	tformat->idx_mask = (1 << tformat->idx_bits) - 1;
+	tformat->color_mask = (1 << PPOD_IDX_SHIFT) - 1;
+	tformat->idx_clr_mask = (1 << (bits - 1)) - 1;
+	tformat->rsvd_mask = (1 << bits) - 1;
+
+	pr_info("ippm: tagmask 0x%x, rsvd %u=%u+%u+1, mask 0x%x,0x%x, "
+		"pg %u,%u,%u,%u.\n",
+		tagmask, tformat->rsvd_bits, tformat->idx_bits,
+		tformat->color_bits, tformat->no_ddp_mask, tformat->rsvd_mask,
+		tformat->pgsz_order[0], tformat->pgsz_order[1],
+		tformat->pgsz_order[2], tformat->pgsz_order[3]);
+}
+
+int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz);
+void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag,
+			     unsigned int tid, unsigned int offset,
+			     unsigned int length,
+			     struct cxgbi_pagepod_hdr *hdr);
+void cxgbi_ppm_ppod_release(struct cxgbi_ppm *, u32 idx);
+int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *, unsigned short nr_pages,
+			    u32 per_tag_pg_idx, u32 *ppod_idx, u32 *ddp_tag,
+			    unsigned long caller_data);
+int cxgbi_ppm_init(void **ppm_pp, struct net_device *, struct pci_dev *,
+		   void *lldev, struct cxgbi_tag_format *,
+		   unsigned int ppmax, unsigned int llimit,
+		   unsigned int start,
+		   unsigned int reserve_factor);
+int cxgbi_ppm_release(struct cxgbi_ppm *ppm);
+void cxgbi_tagmask_check(unsigned int tagmask, struct cxgbi_tag_format *);
+unsigned int cxgbi_tagmask_set(unsigned int ppmax);
+
+#endif	/*__CXGB4PPM_H__*/
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index cf711d5f15be..f3c58aaa932d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -191,6 +191,7 @@ static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue)
 enum cxgb4_uld {
 	CXGB4_ULD_RDMA,
 	CXGB4_ULD_ISCSI,
+	CXGB4_ULD_ISCSIT,
 	CXGB4_ULD_MAX
 };
 
@@ -212,6 +213,7 @@ struct l2t_data;
 struct net_device;
 struct pkt_gl;
 struct tp_tcp_stats;
+struct t4_lro_mgr;
 
 struct cxgb4_range {
 	unsigned int start;
@@ -273,6 +275,10 @@ struct cxgb4_lld_info {
 	unsigned int max_ordird_qp;          /* Max ORD/IRD depth per RDMA QP */
 	unsigned int max_ird_adapter;        /* Max IRD memory per adapter */
 	bool ulptx_memwrite_dsgl;            /* use of T5 DSGL allowed */
+	unsigned int iscsi_tagmask;	     /* iscsi ddp tag mask */
+	unsigned int iscsi_pgsz_order;	     /* iscsi ddp page size orders */
+	unsigned int iscsi_llimit;	     /* chip's iscsi region llimit */
+	void **iscsi_ppm;		     /* iscsi page pod manager */
 	int nodeid;			     /* device numa node id */
 };
 
@@ -283,6 +289,11 @@ struct cxgb4_uld_info {
 			  const struct pkt_gl *gl);
 	int (*state_change)(void *handle, enum cxgb4_state new_state);
 	int (*control)(void *handle, enum cxgb4_control control, ...);
+	int (*lro_rx_handler)(void *handle, const __be64 *rsp,
+			      const struct pkt_gl *gl,
+			      struct t4_lro_mgr *lro_mgr,
+			      struct napi_struct *napi);
+	void (*lro_flush)(struct t4_lro_mgr *);
 };
 
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 5b0f3ef348e9..60a26037a1c6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -48,8 +48,6 @@
 #include "t4_regs.h"
 #include "t4_values.h"
 
-#define VLAN_NONE 0xfff
-
 /* identifies sync vs async L2T_WRITE_REQs */
 #define SYNC_WR_S    12
 #define SYNC_WR_V(x) ((x) << SYNC_WR_S)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.h b/drivers/net/ethernet/chelsio/cxgb4/l2t.h
index 4e2d47ac102b..79665bd8f881 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.h
@@ -39,6 +39,8 @@
 #include <linux/if_ether.h>
 #include <linux/atomic.h>
 
+#define VLAN_NONE 0xfff
+
 enum { L2T_SIZE = 4096 };     /* # of L2T entries */
 
 enum {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index deca4a2956cc..13b144bcf725 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2157,8 +2157,11 @@ static int process_responses(struct sge_rspq *q, int budget)
 
 	while (likely(budget_left)) {
 		rc = (void *)q->cur_desc + (q->iqe_len - sizeof(*rc));
-		if (!is_new_response(rc, q))
+		if (!is_new_response(rc, q)) {
+			if (q->flush_handler)
+				q->flush_handler(q);
 			break;
+		}
 
 		dma_rmb();
 		rsp_type = RSPD_TYPE_G(rc->type_gen);
@@ -2544,7 +2547,8 @@ static void __iomem *bar2_address(struct adapter *adapter,
  */
 int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 		     struct net_device *dev, int intr_idx,
-		     struct sge_fl *fl, rspq_handler_t hnd, int cong)
+		     struct sge_fl *fl, rspq_handler_t hnd,
+		     rspq_flush_handler_t flush_hnd, int cong)
 {
 	int ret, flsz = 0;
 	struct fw_iq_cmd c;
@@ -2648,6 +2652,10 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 	iq->size--;                           /* subtract status entry */
 	iq->netdev = dev;
 	iq->handler = hnd;
+	iq->flush_handler = flush_hnd;
+
+	memset(&iq->lro_mgr, 0, sizeof(struct t4_lro_mgr));
+	skb_queue_head_init(&iq->lro_mgr.lroq);
 
 	/* set offset to -1 to distinguish ingress queues without FL */
 	iq->offset = fl ? 0 : -1;
@@ -2992,6 +3000,7 @@ void t4_free_sge_resources(struct adapter *adap)
 
 	/* clean up RDMA and iSCSI Rx queues */
 	t4_free_ofld_rxqs(adap, adap->sge.iscsiqsets, adap->sge.iscsirxq);
+	t4_free_ofld_rxqs(adap, adap->sge.niscsitq, adap->sge.iscsitrxq);
 	t4_free_ofld_rxqs(adap, adap->sge.rdmaqs, adap->sge.rdmarxq);
 	t4_free_ofld_rxqs(adap, adap->sge.rdmaciqs, adap->sge.rdmaciq);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index 1d2d1da40c80..80417fc564d4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -51,6 +51,7 @@ enum {
 	CPL_TX_PKT            = 0xE,
 	CPL_L2T_WRITE_REQ     = 0x12,
 	CPL_TID_RELEASE       = 0x1A,
+	CPL_TX_DATA_ISO	      = 0x1F,
 
 	CPL_CLOSE_LISTSRV_RPL = 0x20,
 	CPL_L2T_WRITE_RPL     = 0x23,
@@ -344,6 +345,87 @@ struct cpl_pass_open_rpl {
 	u8 status;
 };
 
+struct tcp_options {
+	__be16 mss;
+	__u8 wsf;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8:4;
+	__u8 unknown:1;
+	__u8:1;
+	__u8 sack:1;
+	__u8 tstamp:1;
+#else
+	__u8 tstamp:1;
+	__u8 sack:1;
+	__u8:1;
+	__u8 unknown:1;
+	__u8:4;
+#endif
+};
+
+struct cpl_pass_accept_req {
+	union opcode_tid ot;
+	__be16 rsvd;
+	__be16 len;
+	__be32 hdr_len;
+	__be16 vlan;
+	__be16 l2info;
+	__be32 tos_stid;
+	struct tcp_options tcpopt;
+};
+
+/* cpl_pass_accept_req.hdr_len fields */
+#define SYN_RX_CHAN_S    0
+#define SYN_RX_CHAN_M    0xF
+#define SYN_RX_CHAN_V(x) ((x) << SYN_RX_CHAN_S)
+#define SYN_RX_CHAN_G(x) (((x) >> SYN_RX_CHAN_S) & SYN_RX_CHAN_M)
+
+#define TCP_HDR_LEN_S    10
+#define TCP_HDR_LEN_M    0x3F
+#define TCP_HDR_LEN_V(x) ((x) << TCP_HDR_LEN_S)
+#define TCP_HDR_LEN_G(x) (((x) >> TCP_HDR_LEN_S) & TCP_HDR_LEN_M)
+
+#define IP_HDR_LEN_S    16
+#define IP_HDR_LEN_M    0x3FF
+#define IP_HDR_LEN_V(x) ((x) << IP_HDR_LEN_S)
+#define IP_HDR_LEN_G(x) (((x) >> IP_HDR_LEN_S) & IP_HDR_LEN_M)
+
+#define ETH_HDR_LEN_S    26
+#define ETH_HDR_LEN_M    0x1F
+#define ETH_HDR_LEN_V(x) ((x) << ETH_HDR_LEN_S)
+#define ETH_HDR_LEN_G(x) (((x) >> ETH_HDR_LEN_S) & ETH_HDR_LEN_M)
+
+/* cpl_pass_accept_req.l2info fields */
+#define SYN_MAC_IDX_S    0
+#define SYN_MAC_IDX_M    0x1FF
+#define SYN_MAC_IDX_V(x) ((x) << SYN_MAC_IDX_S)
+#define SYN_MAC_IDX_G(x) (((x) >> SYN_MAC_IDX_S) & SYN_MAC_IDX_M)
+
+#define SYN_XACT_MATCH_S    9
+#define SYN_XACT_MATCH_V(x) ((x) << SYN_XACT_MATCH_S)
+#define SYN_XACT_MATCH_F    SYN_XACT_MATCH_V(1U)
+
+#define SYN_INTF_S    12
+#define SYN_INTF_M    0xF
+#define SYN_INTF_V(x) ((x) << SYN_INTF_S)
+#define SYN_INTF_G(x) (((x) >> SYN_INTF_S) & SYN_INTF_M)
+
+enum {                     /* TCP congestion control algorithms */
+	CONG_ALG_RENO,
+	CONG_ALG_TAHOE,
+	CONG_ALG_NEWRENO,
+	CONG_ALG_HIGHSPEED
+};
+
+#define CONG_CNTRL_S    14
+#define CONG_CNTRL_M    0x3
+#define CONG_CNTRL_V(x) ((x) << CONG_CNTRL_S)
+#define CONG_CNTRL_G(x) (((x) >> CONG_CNTRL_S) & CONG_CNTRL_M)
+
+#define T5_ISS_S    18
+#define T5_ISS_V(x) ((x) << T5_ISS_S)
+#define T5_ISS_F    T5_ISS_V(1U)
+
 struct cpl_pass_accept_rpl {
 	WR_HDR;
 	union opcode_tid ot;
@@ -818,6 +900,110 @@ struct cpl_iscsi_hdr {
 #define ISCSI_DDP_V(x) ((x) << ISCSI_DDP_S)
 #define ISCSI_DDP_F    ISCSI_DDP_V(1U)
 
+struct cpl_rx_data_ddp {
+	union opcode_tid ot;
+	__be16 urg;
+	__be16 len;
+	__be32 seq;
+	union {
+		__be32 nxt_seq;
+		__be32 ddp_report;
+	};
+	__be32 ulp_crc;
+	__be32 ddpvld;
+};
+
+#define cpl_rx_iscsi_ddp cpl_rx_data_ddp
+
+struct cpl_iscsi_data {
+	union opcode_tid ot;
+	__u8 rsvd0[2];
+	__be16 len;
+	__be32 seq;
+	__be16 urg;
+	__u8 rsvd1;
+	__u8 status;
+};
+
+struct cpl_tx_data_iso {
+	__be32 op_to_scsi;
+	__u8   reserved1;
+	__u8   ahs_len;
+	__be16 mpdu;
+	__be32 burst_size;
+	__be32 len;
+	__be32 reserved2_seglen_offset;
+	__be32 datasn_offset;
+	__be32 buffer_offset;
+	__be32 reserved3;
+
+	/* encapsulated CPL_TX_DATA follows here */
+};
+
+/* cpl_tx_data_iso.op_to_scsi fields */
+#define CPL_TX_DATA_ISO_OP_S	24
+#define CPL_TX_DATA_ISO_OP_M	0xff
+#define CPL_TX_DATA_ISO_OP_V(x)	((x) << CPL_TX_DATA_ISO_OP_S)
+#define CPL_TX_DATA_ISO_OP_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_OP_S) & CPL_TX_DATA_ISO_OP_M)
+
+#define CPL_TX_DATA_ISO_FIRST_S		23
+#define CPL_TX_DATA_ISO_FIRST_M		0x1
+#define CPL_TX_DATA_ISO_FIRST_V(x)	((x) << CPL_TX_DATA_ISO_FIRST_S)
+#define CPL_TX_DATA_ISO_FIRST_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_FIRST_S) & CPL_TX_DATA_ISO_FIRST_M)
+#define CPL_TX_DATA_ISO_FIRST_F	CPL_TX_DATA_ISO_FIRST_V(1U)
+
+#define CPL_TX_DATA_ISO_LAST_S		22
+#define CPL_TX_DATA_ISO_LAST_M		0x1
+#define CPL_TX_DATA_ISO_LAST_V(x)	((x) << CPL_TX_DATA_ISO_LAST_S)
+#define CPL_TX_DATA_ISO_LAST_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_LAST_S) & CPL_TX_DATA_ISO_LAST_M)
+#define CPL_TX_DATA_ISO_LAST_F	CPL_TX_DATA_ISO_LAST_V(1U)
+
+#define CPL_TX_DATA_ISO_CPLHDRLEN_S	21
+#define CPL_TX_DATA_ISO_CPLHDRLEN_M	0x1
+#define CPL_TX_DATA_ISO_CPLHDRLEN_V(x)	((x) << CPL_TX_DATA_ISO_CPLHDRLEN_S)
+#define CPL_TX_DATA_ISO_CPLHDRLEN_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_CPLHDRLEN_S) & CPL_TX_DATA_ISO_CPLHDRLEN_M)
+#define CPL_TX_DATA_ISO_CPLHDRLEN_F	CPL_TX_DATA_ISO_CPLHDRLEN_V(1U)
+
+#define CPL_TX_DATA_ISO_HDRCRC_S	20
+#define CPL_TX_DATA_ISO_HDRCRC_M	0x1
+#define CPL_TX_DATA_ISO_HDRCRC_V(x)	((x) << CPL_TX_DATA_ISO_HDRCRC_S)
+#define CPL_TX_DATA_ISO_HDRCRC_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_HDRCRC_S) & CPL_TX_DATA_ISO_HDRCRC_M)
+#define CPL_TX_DATA_ISO_HDRCRC_F	CPL_TX_DATA_ISO_HDRCRC_V(1U)
+
+#define CPL_TX_DATA_ISO_PLDCRC_S	19
+#define CPL_TX_DATA_ISO_PLDCRC_M	0x1
+#define CPL_TX_DATA_ISO_PLDCRC_V(x)	((x) << CPL_TX_DATA_ISO_PLDCRC_S)
+#define CPL_TX_DATA_ISO_PLDCRC_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_PLDCRC_S) & CPL_TX_DATA_ISO_PLDCRC_M)
+#define CPL_TX_DATA_ISO_PLDCRC_F	CPL_TX_DATA_ISO_PLDCRC_V(1U)
+
+#define CPL_TX_DATA_ISO_IMMEDIATE_S	18
+#define CPL_TX_DATA_ISO_IMMEDIATE_M	0x1
+#define CPL_TX_DATA_ISO_IMMEDIATE_V(x)	((x) << CPL_TX_DATA_ISO_IMMEDIATE_S)
+#define CPL_TX_DATA_ISO_IMMEDIATE_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_IMMEDIATE_S) & CPL_TX_DATA_ISO_IMMEDIATE_M)
+#define CPL_TX_DATA_ISO_IMMEDIATE_F	CPL_TX_DATA_ISO_IMMEDIATE_V(1U)
+
+#define CPL_TX_DATA_ISO_SCSI_S		16
+#define CPL_TX_DATA_ISO_SCSI_M		0x3
+#define CPL_TX_DATA_ISO_SCSI_V(x)	((x) << CPL_TX_DATA_ISO_SCSI_S)
+#define CPL_TX_DATA_ISO_SCSI_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_SCSI_S) & CPL_TX_DATA_ISO_SCSI_M)
+
+/* cpl_tx_data_iso.reserved2_seglen_offset fields */
+#define CPL_TX_DATA_ISO_SEGLEN_OFFSET_S		0
+#define CPL_TX_DATA_ISO_SEGLEN_OFFSET_M		0xffffff
+#define CPL_TX_DATA_ISO_SEGLEN_OFFSET_V(x)	\
+	((x) << CPL_TX_DATA_ISO_SEGLEN_OFFSET_S)
+#define CPL_TX_DATA_ISO_SEGLEN_OFFSET_G(x)	\
+	(((x) >> CPL_TX_DATA_ISO_SEGLEN_OFFSET_S) & \
+	 CPL_TX_DATA_ISO_SEGLEN_OFFSET_M)
+
 struct cpl_rx_data {
 	union opcode_tid ot;
 	__be16 rsvd;
@@ -854,6 +1040,15 @@ struct cpl_rx_data_ack {
 #define RX_FORCE_ACK_V(x) ((x) << RX_FORCE_ACK_S)
 #define RX_FORCE_ACK_F    RX_FORCE_ACK_V(1U)
 
+#define RX_DACK_MODE_S    29
+#define RX_DACK_MODE_M    0x3
+#define RX_DACK_MODE_V(x) ((x) << RX_DACK_MODE_S)
+#define RX_DACK_MODE_G(x) (((x) >> RX_DACK_MODE_S) & RX_DACK_MODE_M)
+
+#define RX_DACK_CHANGE_S    31
+#define RX_DACK_CHANGE_V(x) ((x) << RX_DACK_CHANGE_S)
+#define RX_DACK_CHANGE_F    RX_DACK_CHANGE_V(1U)
+
 struct cpl_rx_pkt {
 	struct rss_header rsshdr;
 	u8 opcode;
@@ -1090,6 +1285,12 @@ struct cpl_fw4_ack {
 	__be64 rsvd1;
 };
 
+enum {
+	CPL_FW4_ACK_FLAGS_SEQVAL	= 0x1,	/* seqn valid */
+	CPL_FW4_ACK_FLAGS_CH		= 0x2,	/* channel change complete */
+	CPL_FW4_ACK_FLAGS_FLOWC		= 0x4,	/* fw_flowc_wr complete */
+};
+
 struct cpl_fw6_msg {
 	u8 opcode;
 	u8 type;
@@ -1115,6 +1316,17 @@ struct cpl_fw6_msg_ofld_connection_wr_rpl {
 	__u8    rsvd[2];
 };
 
+struct cpl_tx_data {
+	union opcode_tid ot;
+	__be32 len;
+	__be32 rsvd;
+	__be32 flags;
+};
+
+/* cpl_tx_data.flags field */
+#define TX_FORCE_S	13
+#define TX_FORCE_V(x)	((x) << TX_FORCE_S)
+
 enum {
 	ULP_TX_MEM_READ = 2,
 	ULP_TX_MEM_WRITE = 3,
@@ -1143,6 +1355,11 @@ struct ulptx_sgl {
 	struct ulptx_sge_pair sge[0];
 };
 
+struct ulptx_idata {
+	__be32 cmd_more;
+	__be32 len;
+};
+
 #define ULPTX_NSGE_S    0
 #define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S)
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index c8661c77b4e3..7ad6d4e75b2a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -101,6 +101,7 @@ enum fw_wr_opcodes {
 	FW_RI_BIND_MW_WR               = 0x18,
 	FW_RI_FR_NSMR_WR               = 0x19,
 	FW_RI_INV_LSTAG_WR             = 0x1a,
+	FW_ISCSI_TX_DATA_WR	       = 0x45,
 	FW_LASTC2E_WR                  = 0x70
 };
 
@@ -561,7 +562,12 @@ enum fw_flowc_mnem {
 	FW_FLOWC_MNEM_SNDBUF,
 	FW_FLOWC_MNEM_MSS,
 	FW_FLOWC_MNEM_TXDATAPLEN_MAX,
-	FW_FLOWC_MNEM_SCHEDCLASS = 11,
+	FW_FLOWC_MNEM_TCPSTATE,
+	FW_FLOWC_MNEM_EOSTATE,
+	FW_FLOWC_MNEM_SCHEDCLASS,
+	FW_FLOWC_MNEM_DCBPRIO,
+	FW_FLOWC_MNEM_SND_SCALE,
+	FW_FLOWC_MNEM_RCV_SCALE,
 };
 
 struct fw_flowc_mnemval {
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 79a210aaf0bb..ea83712a6d62 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -35,6 +35,7 @@
 #include "fman.h"
 #include "fman_muram.h"
 
+#include <linux/fsl/guts.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/module.h>
@@ -1871,6 +1872,90 @@ err_fm_state:
 	return -EINVAL;
 }
 
+static int fman_reset(struct fman *fman)
+{
+	u32 count;
+	int err = 0;
+
+	if (fman->state->rev_info.major < 6) {
+		iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc);
+		/* Wait for reset completion */
+		count = 100;
+		do {
+			udelay(1);
+		} while (((ioread32be(&fman->fpm_regs->fm_rstc)) &
+			 FPM_RSTC_FM_RESET) && --count);
+		if (count == 0)
+			err = -EBUSY;
+
+		goto _return;
+	} else {
+		struct device_node *guts_node;
+		struct ccsr_guts __iomem *guts_regs;
+		u32 devdisr2, reg;
+
+		/* Errata A007273 */
+		guts_node =
+			of_find_compatible_node(NULL, NULL,
+						"fsl,qoriq-device-config-2.0");
+		if (!guts_node) {
+			dev_err(fman->dev, "%s: Couldn't find guts node\n",
+				__func__);
+			goto guts_node;
+		}
+
+		guts_regs = of_iomap(guts_node, 0);
+		if (!guts_regs) {
+			dev_err(fman->dev, "%s: Couldn't map %s regs\n",
+				__func__, guts_node->full_name);
+			goto guts_regs;
+		}
+#define FMAN1_ALL_MACS_MASK	0xFCC00000
+#define FMAN2_ALL_MACS_MASK	0x000FCC00
+		/* Read current state */
+		devdisr2 = ioread32be(&guts_regs->devdisr2);
+		if (fman->dts_params.id == 0)
+			reg = devdisr2 & ~FMAN1_ALL_MACS_MASK;
+		else
+			reg = devdisr2 & ~FMAN2_ALL_MACS_MASK;
+
+		/* Enable all MACs */
+		iowrite32be(reg, &guts_regs->devdisr2);
+
+		/* Perform FMan reset */
+		iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc);
+
+		/* Wait for reset completion */
+		count = 100;
+		do {
+			udelay(1);
+		} while (((ioread32be(&fman->fpm_regs->fm_rstc)) &
+			 FPM_RSTC_FM_RESET) && --count);
+		if (count == 0) {
+			iounmap(guts_regs);
+			of_node_put(guts_node);
+			err = -EBUSY;
+			goto _return;
+		}
+
+		/* Restore devdisr2 value */
+		iowrite32be(devdisr2, &guts_regs->devdisr2);
+
+		iounmap(guts_regs);
+		of_node_put(guts_node);
+
+		goto _return;
+
+guts_regs:
+		of_node_put(guts_node);
+guts_node:
+		dev_dbg(fman->dev, "%s: Didn't perform FManV3 reset due to Errata A007273!\n",
+			__func__);
+	}
+_return:
+	return err;
+}
+
 static int fman_init(struct fman *fman)
 {
 	struct fman_cfg *cfg = NULL;
@@ -1914,22 +1999,9 @@ static int fman_init(struct fman *fman)
 		fman->liodn_base[i] = liodn_base;
 	}
 
-	/* FMan Reset (supported only for FMan V2) */
-	if (fman->state->rev_info.major >= 6) {
-		/* Errata A007273 */
-		dev_dbg(fman->dev, "%s: FManV3 reset is not supported!\n",
-			__func__);
-	} else {
-		iowrite32be(FPM_RSTC_FM_RESET, &fman->fpm_regs->fm_rstc);
-		/* Wait for reset completion */
-		count = 100;
-		do {
-			udelay(1);
-		} while (((ioread32be(&fman->fpm_regs->fm_rstc)) &
-			 FPM_RSTC_FM_RESET) && --count);
-		if (count == 0)
-			return -EBUSY;
-	}
+	err = fman_reset(fman);
+	if (err)
+		return err;
 
 	if (ioread32be(&fman->qmi_regs->fmqm_gs) & QMI_GS_HALT_NOT_BUSY) {
 		resume(fman->fpm_regs);
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index 1cbcb9fa3fb5..37d0cce392be 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -147,6 +147,8 @@ enum hnae_led_state {
 
 #define HNSV2_TXD_BUFNUM_S 0
 #define HNSV2_TXD_BUFNUM_M (0x7 << HNSV2_TXD_BUFNUM_S)
+#define HNSV2_TXD_PORTID_S	4
+#define HNSV2_TXD_PORTID_M	(0X7 << HNSV2_TXD_PORTID_S)
 #define HNSV2_TXD_RI_B   1
 #define HNSV2_TXD_L4CS_B   2
 #define HNSV2_TXD_L3CS_B   3
@@ -516,6 +518,7 @@ struct hnae_handle {
 	int q_num;
 	int vf_id;
 	u32 eport_id;
+	u32 dport_id;	/* v2 tx bd should fill the dport_id */
 	enum hnae_port_type port_type;
 	struct list_head node;    /* list to hnae_ae_dev->handle_list */
 	struct hnae_buf_ops *bops; /* operation for the buffer */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index d4f92ed322d6..285c893ab135 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -175,6 +175,7 @@ struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev,
 	ae_handle->phy_node = vf_cb->mac_cb->phy_node;
 	ae_handle->if_support = vf_cb->mac_cb->if_support;
 	ae_handle->port_type = vf_cb->mac_cb->mac_type;
+	ae_handle->dport_id = port_idx;
 
 	return ae_handle;
 vf_id_err:
@@ -419,7 +420,10 @@ static int hns_ae_set_autoneg(struct hnae_handle *handle, u8 enable)
 
 static void hns_ae_set_promisc_mode(struct hnae_handle *handle, u32 en)
 {
+	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+
 	hns_dsaf_set_promisc_mode(hns_ae_get_dsaf_dev(handle->dev), en);
+	hns_mac_set_promisc(mac_cb, (u8)!!en);
 }
 
 static int hns_ae_get_autoneg(struct hnae_handle *handle)
@@ -787,7 +791,8 @@ static int hns_ae_get_rss(struct hnae_handle *handle, u32 *indir, u8 *key,
 		memcpy(key, ppe_cb->rss_key, HNS_PPEV2_RSS_KEY_SIZE);
 
 	/* update the current hash->queue mappings from the shadow RSS table */
-	memcpy(indir, ppe_cb->rss_indir_table, HNS_PPEV2_RSS_IND_TBL_SIZE);
+	memcpy(indir, ppe_cb->rss_indir_table,
+	       HNS_PPEV2_RSS_IND_TBL_SIZE * sizeof(*indir));
 
 	return 0;
 }
@@ -799,10 +804,11 @@ static int hns_ae_set_rss(struct hnae_handle *handle, const u32 *indir,
 
 	/* set the RSS Hash Key if specififed by the user */
 	if (key)
-		hns_ppe_set_rss_key(ppe_cb, (int *)key);
+		hns_ppe_set_rss_key(ppe_cb, (u32 *)key);
 
 	/* update the shadow RSS table with user specified qids */
-	memcpy(ppe_cb->rss_indir_table, indir, HNS_PPEV2_RSS_IND_TBL_SIZE);
+	memcpy(ppe_cb->rss_indir_table, indir,
+	       HNS_PPEV2_RSS_IND_TBL_SIZE * sizeof(*indir));
 
 	/* now update the hardware */
 	hns_ppe_set_indir_table(ppe_cb, ppe_cb->rss_indir_table);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index b8517b00e706..6e2b76ede075 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -290,6 +290,24 @@ static int hns_gmac_adjust_link(void *mac_drv, enum mac_speed speed,
 	return 0;
 }
 
+static void hns_gmac_set_uc_match(void *mac_drv, u16 en)
+{
+	struct mac_driver *drv = mac_drv;
+
+	dsaf_set_dev_bit(drv, GMAC_REC_FILT_CONTROL_REG,
+			 GMAC_UC_MATCH_EN_B, !en);
+	dsaf_set_dev_bit(drv, GMAC_STATION_ADDR_HIGH_2_REG,
+			 GMAC_ADDR_EN_B, !en);
+}
+
+static void hns_gmac_set_promisc(void *mac_drv, u8 en)
+{
+	struct mac_driver *drv = mac_drv;
+
+	if (drv->mac_cb->mac_type == HNAE_PORT_DEBUG)
+		hns_gmac_set_uc_match(mac_drv, en);
+}
+
 static void hns_gmac_init(void *mac_drv)
 {
 	u32 port;
@@ -305,6 +323,8 @@ static void hns_gmac_init(void *mac_drv)
 	mdelay(10);
 	hns_gmac_disable(mac_drv, MAC_COMM_MODE_RX_AND_TX);
 	hns_gmac_tx_loop_pkt_dis(mac_drv);
+	if (drv->mac_cb->mac_type == HNAE_PORT_DEBUG)
+		hns_gmac_set_uc_match(mac_drv, 0);
 }
 
 void hns_gmac_update_stats(void *mac_drv)
@@ -402,14 +422,17 @@ static void hns_gmac_set_mac_addr(void *mac_drv, char *mac_addr)
 {
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
 
-	if (drv->mac_id >= DSAF_SERVICE_NW_NUM) {
-		u32 high_val = mac_addr[1] | (mac_addr[0] << 8);
+	u32 high_val = mac_addr[1] | (mac_addr[0] << 8);
 
-		u32 low_val = mac_addr[5] | (mac_addr[4] << 8)
-			| (mac_addr[3] << 16) | (mac_addr[2] << 24);
-		dsaf_write_dev(drv, GMAC_STATION_ADDR_LOW_2_REG, low_val);
-		dsaf_write_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG, high_val);
-	}
+	u32 low_val = mac_addr[5] | (mac_addr[4] << 8)
+		| (mac_addr[3] << 16) | (mac_addr[2] << 24);
+
+	u32 val = dsaf_read_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG);
+	u32 sta_addr_en = dsaf_get_bit(val, GMAC_ADDR_EN_B);
+
+	dsaf_write_dev(drv, GMAC_STATION_ADDR_LOW_2_REG, low_val);
+	dsaf_write_dev(drv, GMAC_STATION_ADDR_HIGH_2_REG,
+		       high_val | (sta_addr_en << GMAC_ADDR_EN_B));
 }
 
 static int hns_gmac_config_loopback(void *mac_drv, enum hnae_loop loop_mode,
@@ -699,6 +722,7 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
 	mac_drv->get_sset_count = hns_gmac_get_sset_count;
 	mac_drv->get_strings = hns_gmac_get_strings;
 	mac_drv->update_stats = hns_gmac_update_stats;
+	mac_drv->set_promiscuous = hns_gmac_set_promisc;
 
 	return (void *)mac_drv;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 5ef0e96e918a..a38084a22bf2 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -467,8 +467,13 @@ int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu)
 	struct mac_driver *drv = hns_mac_get_drv(mac_cb);
 	u32 buf_size = mac_cb->dsaf_dev->buf_size;
 	u32 new_frm = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	u32 max_frm = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver) ?
+			MAC_MAX_MTU : MAC_MAX_MTU_V2;
 
-	if ((new_mtu < MAC_MIN_MTU) || (new_frm > MAC_MAX_MTU) ||
+	if (mac_cb->mac_type == HNAE_PORT_DEBUG)
+		max_frm = MAC_MAX_MTU_DBG;
+
+	if ((new_mtu < MAC_MIN_MTU) || (new_frm > max_frm) ||
 	    (new_frm > HNS_RCB_RING_MAX_BD_PER_PKT * buf_size))
 		return -EINVAL;
 
@@ -861,6 +866,14 @@ int hns_mac_get_sset_count(struct hns_mac_cb *mac_cb, int stringset)
 	return mac_ctrl_drv->get_sset_count(stringset);
 }
 
+void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en)
+{
+	struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb);
+
+	if (mac_ctrl_drv->set_promiscuous)
+		mac_ctrl_drv->set_promiscuous(mac_ctrl_drv, en);
+}
+
 int hns_mac_get_regs_count(struct hns_mac_cb *mac_cb)
 {
 	struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 0b052191d751..823b6e78c8aa 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -26,7 +26,9 @@ struct dsaf_device;
 
 #define MAC_DEFAULT_MTU	(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN + ETH_DATA_LEN)
 #define MAC_MAX_MTU		9600
+#define MAC_MAX_MTU_V2		9728
 #define MAC_MIN_MTU		68
+#define MAC_MAX_MTU_DBG		MAC_DEFAULT_MTU
 
 #define MAC_DEFAULT_PAUSE_TIME 0xff
 
@@ -365,7 +367,7 @@ struct mac_driver {
 	/*config rx pause enable*/
 	void (*set_rx_ignore_pause_frames)(void *mac_drv, u32 enable);
 	/* config rx mode for promiscuous*/
-	int (*set_promiscuous)(void *mac_drv, u8 enable);
+	void (*set_promiscuous)(void *mac_drv, u8 enable);
 	/* get mac id */
 	void (*mac_get_id)(void *mac_drv, u8 *mac_id);
 	void (*mac_pausefrm_cfg)(void *mac_drv, u32 rx_en, u32 tx_en);
@@ -453,4 +455,6 @@ int hns_mac_get_regs_count(struct hns_mac_cb *mac_cb);
 void hns_set_led_opt(struct hns_mac_cb *mac_cb);
 int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb,
 			enum hnae_led_state status);
+void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en);
+
 #endif /* _HNS_DSAF_MAC_H */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 38fc5be3870c..5c1ac9ba1bf2 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -748,8 +748,9 @@ static void hns_dsaf_tbl_stat_en(struct dsaf_device *dsaf_dev)
  */
 static void hns_dsaf_rocee_bp_en(struct dsaf_device *dsaf_dev)
 {
-	dsaf_set_dev_bit(dsaf_dev, DSAF_XGE_CTRL_SIG_CFG_0_REG,
-			 DSAF_FC_XGE_TX_PAUSE_S, 1);
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver))
+		dsaf_set_dev_bit(dsaf_dev, DSAF_XGE_CTRL_SIG_CFG_0_REG,
+				 DSAF_FC_XGE_TX_PAUSE_S, 1);
 }
 
 /* set msk for dsaf exception irq*/
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index f302ef9073c6..5b7ae5ff43e8 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -27,7 +27,7 @@ void hns_ppe_set_tso_enable(struct hns_ppe_cb *ppe_cb, u32 value)
 void hns_ppe_set_rss_key(struct hns_ppe_cb *ppe_cb,
 			 const u32 rss_key[HNS_PPEV2_RSS_KEY_NUM])
 {
-	int key_item = 0;
+	u32 key_item;
 
 	for (key_item = 0; key_item < HNS_PPEV2_RSS_KEY_NUM; key_item++)
 		dsaf_write_dev(ppe_cb, PPEV2_RSS_KEY_REG + key_item * 0x4,
@@ -343,6 +343,9 @@ static void hns_ppe_init_hw(struct hns_ppe_cb *ppe_cb)
 	if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) {
 		hns_ppe_set_vlan_strip(ppe_cb, 0);
 
+		dsaf_write_dev(ppe_cb, PPE_CFG_MAX_FRAME_LEN_REG,
+			       HNS_PPEV2_MAX_FRAME_LEN);
+
 		/* set default RSS key in h/w */
 		hns_ppe_set_rss_key(ppe_cb, ppe_cb->rss_key);
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
index 0f5cb6962acf..e9c0ec2fa0dd 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
@@ -30,6 +30,8 @@
 #define HNS_PPEV2_RSS_KEY_SIZE 40 /* in bytes or 320 bits */
 #define HNS_PPEV2_RSS_KEY_NUM (HNS_PPEV2_RSS_KEY_SIZE / sizeof(u32))
 
+#define HNS_PPEV2_MAX_FRAME_LEN 0X980
+
 enum ppe_qid_mode {
 	PPE_QID_MODE0 = 0, /* fixed queue id mode */
 	PPE_QID_MODE1,	   /* switch:128VM non switch:6Port/4VM/4TC */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index 60d695daa471..bf62687e5ea7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -922,6 +922,8 @@
 #define GMAC_LP_REG_CF2MI_LP_EN_B	2
 
 #define GMAC_MODE_CHANGE_EB_B	0
+#define GMAC_UC_MATCH_EN_B	0
+#define GMAC_ADDR_EN_B		16
 
 #define GMAC_RECV_CTRL_STRIP_PAD_EN_B	3
 #define GMAC_RECV_CTRL_RUNT_PKT_EN_B	4
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index 802d55457f19..fd90f3737963 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -7,7 +7,7 @@
  * (at your option) any later version.
  */
 
-#include <asm-generic/io-64-nonatomic-hi-lo.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
 #include <linux/of_mdio.h>
 #include "hns_dsaf_main.h"
 #include "hns_dsaf_mac.h"
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 3f77ff77abbc..71aa37b4b338 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -48,7 +48,6 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv,
 	struct iphdr *iphdr;
 	struct ipv6hdr *ipv6hdr;
 	struct sk_buff *skb;
-	int skb_tmp_len;
 	__be16 protocol;
 	u8 bn_pid = 0;
 	u8 rrcfv = 0;
@@ -66,10 +65,14 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv,
 	desc->addr = cpu_to_le64(dma);
 	desc->tx.send_size = cpu_to_le16((u16)size);
 
-	/*config bd buffer end */
+	/* config bd buffer end */
 	hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1);
 	hnae_set_field(bn_pid, HNSV2_TXD_BUFNUM_M, 0, buf_num - 1);
 
+	/* fill port_id in the tx bd for sending management pkts */
+	hnae_set_field(bn_pid, HNSV2_TXD_PORTID_M,
+		       HNSV2_TXD_PORTID_S, ring->q->handle->dport_id);
+
 	if (type == DESC_TYPE_SKB) {
 		skb = (struct sk_buff *)priv;
 
@@ -90,13 +93,13 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv,
 				hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1);
 
 				/* check for tcp/udp header */
-				if (iphdr->protocol == IPPROTO_TCP) {
+				if (iphdr->protocol == IPPROTO_TCP &&
+				    skb_is_gso(skb)) {
 					hnae_set_bit(tvsvsn,
 						     HNSV2_TXD_TSE_B, 1);
-					skb_tmp_len = SKB_TMP_LEN(skb);
 					l4_len = tcp_hdrlen(skb);
-					mss = mtu - skb_tmp_len - ETH_FCS_LEN;
-					paylen = skb->len - skb_tmp_len;
+					mss = skb_shinfo(skb)->gso_size;
+					paylen = skb->len - SKB_TMP_LEN(skb);
 				}
 			} else if (skb->protocol == htons(ETH_P_IPV6)) {
 				hnae_set_bit(tvsvsn, HNSV2_TXD_IPV6_B, 1);
@@ -104,13 +107,13 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv,
 				hnae_set_bit(rrcfv, HNSV2_TXD_L4CS_B, 1);
 
 				/* check for tcp/udp header */
-				if (ipv6hdr->nexthdr == IPPROTO_TCP) {
+				if (ipv6hdr->nexthdr == IPPROTO_TCP &&
+				    skb_is_gso(skb) && skb_is_gso_v6(skb)) {
 					hnae_set_bit(tvsvsn,
 						     HNSV2_TXD_TSE_B, 1);
-					skb_tmp_len = SKB_TMP_LEN(skb);
 					l4_len = tcp_hdrlen(skb);
-					mss = mtu - skb_tmp_len - ETH_FCS_LEN;
-					paylen = skb->len - skb_tmp_len;
+					mss = skb_shinfo(skb)->gso_size;
+					paylen = skb->len - SKB_TMP_LEN(skb);
 				}
 			}
 			desc->tx.ip_offset = ip_offset;
@@ -564,6 +567,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
 	struct sk_buff *skb;
 	struct hnae_desc *desc;
 	struct hnae_desc_cb *desc_cb;
+	struct ethhdr *eh;
 	unsigned char *va;
 	int bnum, length, i;
 	int pull_len;
@@ -670,6 +674,14 @@ out_bnum_err:
 		return -EFAULT;
 	}
 
+	/* filter out multicast pkt with the same src mac as this port */
+	eh = eth_hdr(skb);
+	if (unlikely(is_multicast_ether_addr(eh->h_dest) &&
+		     ether_addr_equal(ndev->dev_addr, eh->h_source))) {
+		dev_kfree_skb_any(skb);
+		return -EFAULT;
+	}
+
 	ring->stats.rx_pkts++;
 	ring->stats.rx_bytes += skb->len;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 3c4a3bc31a89..9c3ba65988e1 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -1173,18 +1173,15 @@ hns_get_rss_key_size(struct net_device *netdev)
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_ae_ops *ops;
-	u32 ret;
 
 	if (AE_IS_VER1(priv->enet_ver)) {
 		netdev_err(netdev,
 			   "RSS feature is not supported on this hardware\n");
-		return -EOPNOTSUPP;
+		return 0;
 	}
 
 	ops = priv->ae_handle->dev->ops;
-	ret = ops->get_rss_key_size(priv->ae_handle);
-
-	return ret;
+	return ops->get_rss_key_size(priv->ae_handle);
 }
 
 static u32
@@ -1192,18 +1189,15 @@ hns_get_rss_indir_size(struct net_device *netdev)
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_ae_ops *ops;
-	u32 ret;
 
 	if (AE_IS_VER1(priv->enet_ver)) {
 		netdev_err(netdev,
 			   "RSS feature is not supported on this hardware\n");
-		return -EOPNOTSUPP;
+		return 0;
 	}
 
 	ops = priv->ae_handle->dev->ops;
-	ret = ops->get_rss_indir_size(priv->ae_handle);
-
-	return ret;
+	return ops->get_rss_indir_size(priv->ae_handle);
 }
 
 static int
@@ -1211,7 +1205,6 @@ hns_get_rss(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_ae_ops *ops;
-	int ret;
 
 	if (AE_IS_VER1(priv->enet_ver)) {
 		netdev_err(netdev,
@@ -1224,9 +1217,7 @@ hns_get_rss(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
 	if (!indir)
 		return 0;
 
-	ret = ops->get_rss(priv->ae_handle, indir, key, hfunc);
-
-	return 0;
+	return ops->get_rss(priv->ae_handle, indir, key, hfunc);
 }
 
 static int
@@ -1235,7 +1226,6 @@ hns_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key,
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_ae_ops *ops;
-	int ret;
 
 	if (AE_IS_VER1(priv->enet_ver)) {
 		netdev_err(netdev,
@@ -1252,7 +1242,22 @@ hns_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key,
 	if (!indir)
 		return 0;
 
-	ret = ops->set_rss(priv->ae_handle, indir, key, hfunc);
+	return ops->set_rss(priv->ae_handle, indir, key, hfunc);
+}
+
+static int hns_get_rxnfc(struct net_device *netdev,
+			 struct ethtool_rxnfc *cmd,
+			 u32 *rule_locs)
+{
+	struct hns_nic_priv *priv = netdev_priv(netdev);
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = priv->ae_handle->q_num;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
 
 	return 0;
 }
@@ -1280,6 +1285,7 @@ static struct ethtool_ops hns_ethtool_ops = {
 	.get_rxfh_indir_size = hns_get_rss_indir_size,
 	.get_rxfh = hns_get_rss,
 	.set_rxfh = hns_set_rss,
+	.get_rxnfc = hns_get_rxnfc,
 };
 
 void hns_ethtool_set_ops(struct net_device *ndev)
diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
index b4729ba57c9c..3b3c63e54ed6 100644
--- a/drivers/net/ethernet/intel/i40e/Makefile
+++ b/drivers/net/ethernet/intel/i40e/Makefile
@@ -41,6 +41,7 @@ i40e-objs := i40e_main.o \
 	i40e_diag.o	\
 	i40e_txrx.o	\
 	i40e_ptp.o	\
+	i40e_client.o   \
 	i40e_virtchnl_pf.o
 
 i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 2f6210ae8ba0..1ce6e9c0427d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -58,6 +58,7 @@
 #ifdef I40E_FCOE
 #include "i40e_fcoe.h"
 #endif
+#include "i40e_client.h"
 #include "i40e_virtchnl.h"
 #include "i40e_virtchnl_pf.h"
 #include "i40e_txrx.h"
@@ -190,6 +191,7 @@ struct i40e_lump_tracking {
 	u16 search_hint;
 	u16 list[0];
 #define I40E_PILE_VALID_BIT  0x8000
+#define I40E_IWARP_IRQ_PILE_ID  (I40E_PILE_VALID_BIT - 2)
 };
 
 #define I40E_DEFAULT_ATR_SAMPLE_RATE	20
@@ -282,6 +284,8 @@ struct i40e_pf {
 #endif /* I40E_FCOE */
 	u16 num_lan_qps;           /* num lan queues this PF has set up */
 	u16 num_lan_msix;          /* num queue vectors for the base PF vsi */
+	u16 num_iwarp_msix;        /* num of iwarp vectors for this PF */
+	int iwarp_base_vector;
 	int queues_left;           /* queues left unclaimed */
 	u16 alloc_rss_size;        /* allocated RSS queues */
 	u16 rss_size_max;          /* HW defined max RSS queues */
@@ -329,6 +333,7 @@ struct i40e_pf {
 #define I40E_FLAG_16BYTE_RX_DESC_ENABLED	BIT_ULL(13)
 #define I40E_FLAG_CLEAN_ADMINQ			BIT_ULL(14)
 #define I40E_FLAG_FILTER_SYNC			BIT_ULL(15)
+#define I40E_FLAG_SERVICE_CLIENT_REQUESTED	BIT_ULL(16)
 #define I40E_FLAG_PROCESS_MDD_EVENT		BIT_ULL(17)
 #define I40E_FLAG_PROCESS_VFLR_EVENT		BIT_ULL(18)
 #define I40E_FLAG_SRIOV_ENABLED			BIT_ULL(19)
@@ -571,6 +576,8 @@ struct i40e_vsi {
 	struct kobject *kobj;  /* sysfs object */
 	bool current_isup;     /* Sync 'link up' logging */
 
+	void *priv;	/* client driver data reference. */
+
 	/* VSI specific handlers */
 	irqreturn_t (*irq_handler)(int irq, void *data);
 
@@ -728,6 +735,10 @@ void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 			      struct i40e_vsi_context *ctxt,
 			      u8 enabled_tc, bool is_add);
 #endif
+void i40e_service_event_schedule(struct i40e_pf *pf);
+void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id,
+				  u8 *msg, u16 len);
+
 int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool enable);
 int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count);
 struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid,
@@ -750,6 +761,17 @@ static inline void i40e_dbg_pf_exit(struct i40e_pf *pf) {}
 static inline void i40e_dbg_init(void) {}
 static inline void i40e_dbg_exit(void) {}
 #endif /* CONFIG_DEBUG_FS*/
+/* needed by client drivers */
+int i40e_lan_add_device(struct i40e_pf *pf);
+int i40e_lan_del_device(struct i40e_pf *pf);
+void i40e_client_subtask(struct i40e_pf *pf);
+void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi);
+void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi);
+void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset);
+void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs);
+void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id);
+int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
+			   enum i40e_client_type type);
 /**
  * i40e_irq_dynamic_enable - Enable default interrupt generation settings
  * @vsi: pointer to a vsi
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
new file mode 100644
index 000000000000..0e6ac841321c
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -0,0 +1,1012 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include <linux/list.h>
+#include <linux/errno.h>
+
+#include "i40e.h"
+#include "i40e_prototype.h"
+#include "i40e_client.h"
+
+static const char i40e_client_interface_version_str[] = I40E_CLIENT_VERSION_STR;
+
+static LIST_HEAD(i40e_devices);
+static DEFINE_MUTEX(i40e_device_mutex);
+
+static LIST_HEAD(i40e_clients);
+static DEFINE_MUTEX(i40e_client_mutex);
+
+static LIST_HEAD(i40e_client_instances);
+static DEFINE_MUTEX(i40e_client_instance_mutex);
+
+static int i40e_client_virtchnl_send(struct i40e_info *ldev,
+				     struct i40e_client *client,
+				     u32 vf_id, u8 *msg, u16 len);
+
+static int i40e_client_setup_qvlist(struct i40e_info *ldev,
+				    struct i40e_client *client,
+				    struct i40e_qvlist_info *qvlist_info);
+
+static void i40e_client_request_reset(struct i40e_info *ldev,
+				      struct i40e_client *client,
+				      u32 reset_level);
+
+static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
+				       struct i40e_client *client,
+				       bool is_vf, u32 vf_id,
+				       u32 flag, u32 valid_flag);
+
+static struct i40e_ops i40e_lan_ops = {
+	.virtchnl_send = i40e_client_virtchnl_send,
+	.setup_qvlist = i40e_client_setup_qvlist,
+	.request_reset = i40e_client_request_reset,
+	.update_vsi_ctxt = i40e_client_update_vsi_ctxt,
+};
+
+/**
+ * i40e_client_type_to_vsi_type - convert client type to vsi type
+ * @client_type: the i40e_client type
+ *
+ * returns the related vsi type value
+ **/
+static
+enum i40e_vsi_type i40e_client_type_to_vsi_type(enum i40e_client_type type)
+{
+	switch (type) {
+	case I40E_CLIENT_IWARP:
+		return I40E_VSI_IWARP;
+
+	case I40E_CLIENT_VMDQ2:
+		return I40E_VSI_VMDQ2;
+
+	default:
+		pr_err("i40e: Client type unknown\n");
+		return I40E_VSI_TYPE_UNKNOWN;
+	}
+}
+
+/**
+ * i40e_client_get_params - Get the params that can change at runtime
+ * @vsi: the VSI with the message
+ * @param: clinet param struct
+ *
+ **/
+static
+int i40e_client_get_params(struct i40e_vsi *vsi, struct i40e_params *params)
+{
+	struct i40e_dcbx_config *dcb_cfg = &vsi->back->hw.local_dcbx_config;
+	int i = 0;
+
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+		u8 tc = dcb_cfg->etscfg.prioritytable[i];
+		u16 qs_handle;
+
+		/* If TC is not enabled for VSI use TC0 for UP */
+		if (!(vsi->tc_config.enabled_tc & BIT(tc)))
+			tc = 0;
+
+		qs_handle = le16_to_cpu(vsi->info.qs_handle[tc]);
+		params->qos.prio_qos[i].tc = tc;
+		params->qos.prio_qos[i].qs_handle = qs_handle;
+		if (qs_handle == I40E_AQ_VSI_QS_HANDLE_INVALID) {
+			dev_err(&vsi->back->pdev->dev, "Invalid queue set handle for TC = %d, vsi id = %d\n",
+				tc, vsi->id);
+			return -EINVAL;
+		}
+	}
+
+	params->mtu = vsi->netdev->mtu;
+	return 0;
+}
+
+/**
+ * i40e_notify_client_of_vf_msg - call the client vf message callback
+ * @vsi: the VSI with the message
+ * @vf_id: the absolute VF id that sent the message
+ * @msg: message buffer
+ * @len: length of the message
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void
+i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len)
+{
+	struct i40e_client_instance *cdev;
+
+	if (!vsi)
+		return;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.pf == vsi->back) {
+			if (!cdev->client ||
+			    !cdev->client->ops ||
+			    !cdev->client->ops->virtchnl_receive) {
+				dev_dbg(&vsi->back->pdev->dev,
+					"Cannot locate client instance virtual channel receive routine\n");
+				continue;
+			}
+			cdev->client->ops->virtchnl_receive(&cdev->lan_info,
+							    cdev->client,
+							    vf_id, msg, len);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_l2_param_changes - call the client notify callback
+ * @vsi: the VSI with l2 param changes
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi)
+{
+	struct i40e_client_instance *cdev;
+	struct i40e_params params;
+
+	if (!vsi)
+		return;
+	memset(&params, 0, sizeof(params));
+	i40e_client_get_params(vsi, &params);
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.pf == vsi->back) {
+			if (!cdev->client ||
+			    !cdev->client->ops ||
+			    !cdev->client->ops->l2_param_change) {
+				dev_dbg(&vsi->back->pdev->dev,
+					"Cannot locate client instance l2_param_change routine\n");
+				continue;
+			}
+			cdev->lan_info.params = params;
+			cdev->client->ops->l2_param_change(&cdev->lan_info,
+							   cdev->client,
+							   &params);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_netdev_open - call the client open callback
+ * @vsi: the VSI with netdev opened
+ *
+ * If there is a client to this netdev, call the client with open
+ **/
+void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi)
+{
+	struct i40e_client_instance *cdev;
+
+	if (!vsi)
+		return;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.netdev == vsi->netdev) {
+			if (!cdev->client ||
+			    !cdev->client->ops || !cdev->client->ops->open) {
+				dev_dbg(&vsi->back->pdev->dev,
+					"Cannot locate client instance open routine\n");
+				continue;
+			}
+			cdev->client->ops->open(&cdev->lan_info, cdev->client);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_client_release_qvlist
+ * @ldev: pointer to L2 context.
+ *
+ **/
+static void i40e_client_release_qvlist(struct i40e_info *ldev)
+{
+	struct i40e_qvlist_info *qvlist_info = ldev->qvlist_info;
+	u32 i;
+
+	if (!ldev->qvlist_info)
+		return;
+
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		struct i40e_pf *pf = ldev->pf;
+		struct i40e_qv_info *qv_info;
+		u32 reg_idx;
+
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		reg_idx = I40E_PFINT_LNKLSTN(qv_info->v_idx - 1);
+		wr32(&pf->hw, reg_idx, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK);
+	}
+	kfree(ldev->qvlist_info);
+	ldev->qvlist_info = NULL;
+}
+
+/**
+ * i40e_notify_client_of_netdev_close - call the client close callback
+ * @vsi: the VSI with netdev closed
+ * @reset: true when close called due to a reset pending
+ *
+ * If there is a client to this netdev, call the client with close
+ **/
+void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset)
+{
+	struct i40e_client_instance *cdev;
+
+	if (!vsi)
+		return;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.netdev == vsi->netdev) {
+			if (!cdev->client ||
+			    !cdev->client->ops || !cdev->client->ops->close) {
+				dev_dbg(&vsi->back->pdev->dev,
+					"Cannot locate client instance close routine\n");
+				continue;
+			}
+			cdev->client->ops->close(&cdev->lan_info, cdev->client,
+						 reset);
+			i40e_client_release_qvlist(&cdev->lan_info);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_vf_reset - call the client vf reset callback
+ * @pf: PF device pointer
+ * @vf_id: asolute id of VF being reset
+ *
+ * If there is a client attached to this PF, notify when a VF is reset
+ **/
+void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id)
+{
+	struct i40e_client_instance *cdev;
+
+	if (!pf)
+		return;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.pf == pf) {
+			if (!cdev->client ||
+			    !cdev->client->ops ||
+			    !cdev->client->ops->vf_reset) {
+				dev_dbg(&pf->pdev->dev,
+					"Cannot locate client instance VF reset routine\n");
+				continue;
+			}
+			cdev->client->ops->vf_reset(&cdev->lan_info,
+						    cdev->client, vf_id);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_notify_client_of_vf_enable - call the client vf notification callback
+ * @pf: PF device pointer
+ * @num_vfs: the number of VFs currently enabled, 0 for disable
+ *
+ * If there is a client attached to this PF, call its VF notification routine
+ **/
+void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs)
+{
+	struct i40e_client_instance *cdev;
+
+	if (!pf)
+		return;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.pf == pf) {
+			if (!cdev->client ||
+			    !cdev->client->ops ||
+			    !cdev->client->ops->vf_enable) {
+				dev_dbg(&pf->pdev->dev,
+					"Cannot locate client instance VF enable routine\n");
+				continue;
+			}
+			cdev->client->ops->vf_enable(&cdev->lan_info,
+						     cdev->client, num_vfs);
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+}
+
+/**
+ * i40e_vf_client_capable - ask the client if it likes the specified VF
+ * @pf: PF device pointer
+ * @vf_id: the VF in question
+ *
+ * If there is a client of the specified type attached to this PF, call
+ * its vf_capable routine
+ **/
+int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
+			   enum i40e_client_type type)
+{
+	struct i40e_client_instance *cdev;
+	int capable = false;
+
+	if (!pf)
+		return false;
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if (cdev->lan_info.pf == pf) {
+			if (!cdev->client ||
+			    !cdev->client->ops ||
+			    !cdev->client->ops->vf_capable ||
+			    !(cdev->client->type == type)) {
+				dev_dbg(&pf->pdev->dev,
+					"Cannot locate client instance VF capability routine\n");
+				continue;
+			}
+			capable = cdev->client->ops->vf_capable(&cdev->lan_info,
+								cdev->client,
+								vf_id);
+			break;
+		}
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+	return capable;
+}
+
+/**
+ * i40e_vsi_lookup - finds a matching VSI from the PF list starting at start_vsi
+ * @pf: board private structure
+ * @type: vsi type
+ * @start_vsi: a VSI pointer from where to start the search
+ *
+ * Returns non NULL on success or NULL for failure
+ **/
+struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf,
+				 enum i40e_vsi_type type,
+				 struct i40e_vsi *start_vsi)
+{
+	struct i40e_vsi *vsi;
+	int i = 0;
+
+	if (start_vsi) {
+		for (i = 0; i < pf->num_alloc_vsi; i++) {
+			vsi = pf->vsi[i];
+			if (vsi == start_vsi)
+				break;
+		}
+	}
+	for (; i < pf->num_alloc_vsi; i++) {
+		vsi = pf->vsi[i];
+		if (vsi && vsi->type == type)
+			return vsi;
+	}
+
+	return NULL;
+}
+
+/**
+ * i40e_client_add_instance - add a client instance struct to the instance list
+ * @pf: pointer to the board struct
+ * @client: pointer to a client struct in the client list.
+ *
+ * Returns cdev ptr on success, NULL on failure
+ **/
+static
+struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf,
+						      struct i40e_client *client)
+{
+	struct i40e_client_instance *cdev;
+	struct netdev_hw_addr *mac = NULL;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry(cdev, &i40e_client_instances, list) {
+		if ((cdev->lan_info.pf == pf) && (cdev->client == client)) {
+			cdev = NULL;
+			goto out;
+		}
+	}
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		goto out;
+
+	cdev->lan_info.pf = (void *)pf;
+	cdev->lan_info.netdev = vsi->netdev;
+	cdev->lan_info.pcidev = pf->pdev;
+	cdev->lan_info.fid = pf->hw.pf_id;
+	cdev->lan_info.ftype = I40E_CLIENT_FTYPE_PF;
+	cdev->lan_info.hw_addr = pf->hw.hw_addr;
+	cdev->lan_info.ops = &i40e_lan_ops;
+	cdev->lan_info.version.major = I40E_CLIENT_VERSION_MAJOR;
+	cdev->lan_info.version.minor = I40E_CLIENT_VERSION_MINOR;
+	cdev->lan_info.version.build = I40E_CLIENT_VERSION_BUILD;
+	cdev->lan_info.fw_maj_ver = pf->hw.aq.fw_maj_ver;
+	cdev->lan_info.fw_min_ver = pf->hw.aq.fw_min_ver;
+	cdev->lan_info.fw_build = pf->hw.aq.fw_build;
+	set_bit(__I40E_CLIENT_INSTANCE_NONE, &cdev->state);
+
+	if (i40e_client_get_params(vsi, &cdev->lan_info.params)) {
+		kfree(cdev);
+		cdev = NULL;
+		goto out;
+	}
+
+	cdev->lan_info.msix_count = pf->num_iwarp_msix;
+	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
+
+	mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list,
+			       struct netdev_hw_addr, list);
+	if (mac)
+		ether_addr_copy(cdev->lan_info.lanmac, mac->addr);
+	else
+		dev_err(&pf->pdev->dev, "MAC address list is empty!\n");
+
+	cdev->client = client;
+	INIT_LIST_HEAD(&cdev->list);
+	list_add(&cdev->list, &i40e_client_instances);
+out:
+	mutex_unlock(&i40e_client_instance_mutex);
+	return cdev;
+}
+
+/**
+ * i40e_client_del_instance - removes a client instance from the list
+ * @pf: pointer to the board struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+static
+int i40e_client_del_instance(struct i40e_pf *pf, struct i40e_client *client)
+{
+	struct i40e_client_instance *cdev, *tmp;
+	int ret = -ENODEV;
+
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry_safe(cdev, tmp, &i40e_client_instances, list) {
+		if ((cdev->lan_info.pf != pf) || (cdev->client != client))
+			continue;
+
+		dev_info(&pf->pdev->dev, "Deleted instance of Client %s, of dev %d bus=0x%02x func=0x%02x)\n",
+			 client->name, pf->hw.pf_id,
+			 pf->hw.bus.device, pf->hw.bus.func);
+		list_del(&cdev->list);
+		kfree(cdev);
+		ret = 0;
+		break;
+	}
+	mutex_unlock(&i40e_client_instance_mutex);
+	return ret;
+}
+
+/**
+ * i40e_client_subtask - client maintenance work
+ * @pf: board private structure
+ **/
+void i40e_client_subtask(struct i40e_pf *pf)
+{
+	struct i40e_client_instance *cdev;
+	struct i40e_client *client;
+	int ret = 0;
+
+	if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED))
+		return;
+	pf->flags &= ~I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+
+	/* If we're down or resetting, just bail */
+	if (test_bit(__I40E_DOWN, &pf->state) ||
+	    test_bit(__I40E_CONFIG_BUSY, &pf->state))
+		return;
+
+	/* Check client state and instantiate client if client registered */
+	mutex_lock(&i40e_client_mutex);
+	list_for_each_entry(client, &i40e_clients, list) {
+		/* first check client is registered */
+		if (!test_bit(__I40E_CLIENT_REGISTERED, &client->state))
+			continue;
+
+		/* Do we also need the LAN VSI to be up, to create instance */
+		if (!(client->flags & I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE)) {
+			/* check if L2 VSI is up, if not we are not ready */
+			if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state))
+				continue;
+		}
+
+		/* Add the client instance to the instance list */
+		cdev = i40e_client_add_instance(pf, client);
+		if (!cdev)
+			continue;
+
+		/* Also up the ref_cnt of no. of instances of this client */
+		atomic_inc(&client->ref_cnt);
+		dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n",
+			 client->name, pf->hw.pf_id,
+			 pf->hw.bus.device, pf->hw.bus.func);
+
+		/* Send an Open request to the client */
+		atomic_inc(&cdev->ref_cnt);
+		if (client->ops && client->ops->open)
+			ret = client->ops->open(&cdev->lan_info, client);
+		atomic_dec(&cdev->ref_cnt);
+		if (!ret) {
+			set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+		} else {
+			/* remove client instance */
+			i40e_client_del_instance(pf, client);
+			atomic_dec(&client->ref_cnt);
+			continue;
+		}
+	}
+	mutex_unlock(&i40e_client_mutex);
+}
+
+/**
+ * i40e_lan_add_device - add a lan device struct to the list of lan devices
+ * @pf: pointer to the board struct
+ *
+ * Returns 0 on success or none 0 on error
+ **/
+int i40e_lan_add_device(struct i40e_pf *pf)
+{
+	struct i40e_device *ldev;
+	int ret = 0;
+
+	mutex_lock(&i40e_device_mutex);
+	list_for_each_entry(ldev, &i40e_devices, list) {
+		if (ldev->pf == pf) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+	if (!ldev) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ldev->pf = pf;
+	INIT_LIST_HEAD(&ldev->list);
+	list_add(&ldev->list, &i40e_devices);
+	dev_info(&pf->pdev->dev, "Added LAN device PF%d bus=0x%02x func=0x%02x\n",
+		 pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func);
+
+	/* Since in some cases register may have happened before a device gets
+	 * added, we can schedule a subtask to go initiate the clients.
+	 */
+	pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+	i40e_service_event_schedule(pf);
+
+out:
+	mutex_unlock(&i40e_device_mutex);
+	return ret;
+}
+
+/**
+ * i40e_lan_del_device - removes a lan device from the device list
+ * @pf: pointer to the board struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40e_lan_del_device(struct i40e_pf *pf)
+{
+	struct i40e_device *ldev, *tmp;
+	int ret = -ENODEV;
+
+	mutex_lock(&i40e_device_mutex);
+	list_for_each_entry_safe(ldev, tmp, &i40e_devices, list) {
+		if (ldev->pf == pf) {
+			dev_info(&pf->pdev->dev, "Deleted LAN device PF%d bus=0x%02x func=0x%02x\n",
+				 pf->hw.pf_id, pf->hw.bus.device,
+				 pf->hw.bus.func);
+			list_del(&ldev->list);
+			kfree(ldev);
+			ret = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&i40e_device_mutex);
+	return ret;
+}
+
+/**
+ * i40e_client_release - release client specific resources
+ * @client: pointer to the registered client
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_release(struct i40e_client *client)
+{
+	struct i40e_client_instance *cdev, *tmp;
+	struct i40e_pf *pf = NULL;
+	int ret = 0;
+
+	LIST_HEAD(cdevs_tmp);
+
+	mutex_lock(&i40e_client_instance_mutex);
+	list_for_each_entry_safe(cdev, tmp, &i40e_client_instances, list) {
+		if (strncmp(cdev->client->name, client->name,
+			    I40E_CLIENT_STR_LENGTH))
+			continue;
+		if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+			if (atomic_read(&cdev->ref_cnt) > 0) {
+				ret = I40E_ERR_NOT_READY;
+				goto out;
+			}
+			pf = (struct i40e_pf *)cdev->lan_info.pf;
+			if (client->ops && client->ops->close)
+				client->ops->close(&cdev->lan_info, client,
+						   false);
+			i40e_client_release_qvlist(&cdev->lan_info);
+			clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+
+			dev_warn(&pf->pdev->dev,
+				 "Client %s instance for PF id %d closed\n",
+				 client->name, pf->hw.pf_id);
+		}
+		/* delete the client instance from the list */
+		list_del(&cdev->list);
+		list_add(&cdev->list, &cdevs_tmp);
+		atomic_dec(&client->ref_cnt);
+		dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n",
+			 client->name);
+	}
+out:
+	mutex_unlock(&i40e_client_instance_mutex);
+
+	/* free the client device and release its vsi */
+	list_for_each_entry_safe(cdev, tmp, &cdevs_tmp, list) {
+		kfree(cdev);
+	}
+	return ret;
+}
+
+/**
+ * i40e_client_prepare - prepare client specific resources
+ * @client: pointer to the registered client
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_prepare(struct i40e_client *client)
+{
+	struct i40e_device *ldev;
+	struct i40e_pf *pf;
+	int ret = 0;
+
+	mutex_lock(&i40e_device_mutex);
+	list_for_each_entry(ldev, &i40e_devices, list) {
+		pf = ldev->pf;
+		/* Start the client subtask */
+		pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+		i40e_service_event_schedule(pf);
+	}
+	mutex_unlock(&i40e_device_mutex);
+	return ret;
+}
+
+/**
+ * i40e_client_virtchnl_send - TBD
+ * @ldev: pointer to L2 context
+ * @client: Client pointer
+ * @vf_id: absolute VF identifier
+ * @msg: message buffer
+ * @len: length of message buffer
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_virtchnl_send(struct i40e_info *ldev,
+				     struct i40e_client *client,
+				     u32 vf_id, u8 *msg, u16 len)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_hw *hw = &pf->hw;
+	i40e_status err;
+
+	err = i40e_aq_send_msg_to_vf(hw, vf_id, I40E_VIRTCHNL_OP_IWARP,
+				     0, msg, len, NULL);
+	if (err)
+		dev_err(&pf->pdev->dev, "Unable to send iWarp message to VF, error %d, aq status %d\n",
+			err, hw->aq.asq_last_status);
+
+	return err;
+}
+
+/**
+ * i40e_client_setup_qvlist
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @qv_info: queue and vector list
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_setup_qvlist(struct i40e_info *ldev,
+				    struct i40e_client *client,
+				    struct i40e_qvlist_info *qvlist_info)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_qv_info *qv_info;
+	u32 v_idx, i, reg_idx, reg;
+	u32 size;
+
+	size = sizeof(struct i40e_qvlist_info) +
+	       (sizeof(struct i40e_qv_info) * (qvlist_info->num_vectors - 1));
+	ldev->qvlist_info = kzalloc(size, GFP_KERNEL);
+	ldev->qvlist_info->num_vectors = qvlist_info->num_vectors;
+
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		v_idx = qv_info->v_idx;
+
+		/* Validate vector id belongs to this client */
+		if ((v_idx >= (pf->iwarp_base_vector + pf->num_iwarp_msix)) ||
+		    (v_idx < pf->iwarp_base_vector))
+			goto err;
+
+		ldev->qvlist_info->qv_info[i] = *qv_info;
+		reg_idx = I40E_PFINT_LNKLSTN(v_idx - 1);
+
+		if (qv_info->ceq_idx == I40E_QUEUE_INVALID_IDX) {
+			/* Special case - No CEQ mapped on this vector */
+			wr32(hw, reg_idx, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK);
+		} else {
+			reg = (qv_info->ceq_idx &
+			       I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+			       (I40E_QUEUE_TYPE_PE_CEQ <<
+			       I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+			wr32(hw, reg_idx, reg);
+
+			reg = (I40E_PFINT_CEQCTL_CAUSE_ENA_MASK |
+			       (v_idx << I40E_PFINT_CEQCTL_MSIX_INDX_SHIFT) |
+			       (qv_info->itr_idx <<
+				I40E_PFINT_CEQCTL_ITR_INDX_SHIFT) |
+			       (I40E_QUEUE_END_OF_LIST <<
+				I40E_PFINT_CEQCTL_NEXTQ_INDX_SHIFT));
+			wr32(hw, I40E_PFINT_CEQCTL(qv_info->ceq_idx), reg);
+		}
+		if (qv_info->aeq_idx != I40E_QUEUE_INVALID_IDX) {
+			reg = (I40E_PFINT_AEQCTL_CAUSE_ENA_MASK |
+			       (v_idx << I40E_PFINT_AEQCTL_MSIX_INDX_SHIFT) |
+			       (qv_info->itr_idx <<
+				I40E_PFINT_AEQCTL_ITR_INDX_SHIFT));
+
+			wr32(hw, I40E_PFINT_AEQCTL, reg);
+		}
+	}
+
+	return 0;
+err:
+	kfree(ldev->qvlist_info);
+	ldev->qvlist_info = NULL;
+	return -EINVAL;
+}
+
+/**
+ * i40e_client_request_reset
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @level: reset level
+ **/
+static void i40e_client_request_reset(struct i40e_info *ldev,
+				      struct i40e_client *client,
+				      u32 reset_level)
+{
+	struct i40e_pf *pf = ldev->pf;
+
+	switch (reset_level) {
+	case I40E_CLIENT_RESET_LEVEL_PF:
+		set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
+		break;
+	case I40E_CLIENT_RESET_LEVEL_CORE:
+		set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
+		break;
+	default:
+		dev_warn(&pf->pdev->dev,
+			 "Client %s instance for PF id %d request an unsupported reset: %d.\n",
+			 client->name, pf->hw.pf_id, reset_level);
+		break;
+	}
+
+	i40e_service_event_schedule(pf);
+}
+
+/**
+ * i40e_client_update_vsi_ctxt
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @is_vf: if this for the VF
+ * @vf_id: if is_vf true this carries the vf_id
+ * @flag: Any device level setting that needs to be done for PE
+ * @valid_flag: Bits in this match up and enable changing of flag bits
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
+				       struct i40e_client *client,
+				       bool is_vf, u32 vf_id,
+				       u32 flag, u32 valid_flag)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	struct i40e_vsi_context ctxt;
+	bool update = true;
+	i40e_status err;
+
+	/* TODO: for now do not allow setting VF's VSI setting */
+	if (is_vf)
+		return -EINVAL;
+
+	ctxt.seid = pf->main_vsi_seid;
+	ctxt.pf_num = pf->hw.pf_id;
+	err = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
+	ctxt.flags = I40E_AQ_VSI_TYPE_PF;
+	if (err) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't get PF vsi config, err %s aq_err %s\n",
+			 i40e_stat_str(&pf->hw, err),
+			 i40e_aq_str(&pf->hw,
+				     pf->hw.aq.asq_last_status));
+		return -ENOENT;
+	}
+
+	if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) &&
+	    (flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) {
+		ctxt.info.valid_sections =
+			cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+		ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+	} else if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) &&
+		  !(flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) {
+		ctxt.info.valid_sections =
+			cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+		ctxt.info.queueing_opt_flags &= ~I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+	} else {
+		update = false;
+		dev_warn(&pf->pdev->dev,
+			 "Client %s instance for PF id %d request an unsupported Config: %x.\n",
+			 client->name, pf->hw.pf_id, flag);
+	}
+
+	if (update) {
+		err = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+		if (err) {
+			dev_info(&pf->pdev->dev,
+				 "update VSI ctxt for PE failed, err %s aq_err %s\n",
+				 i40e_stat_str(&pf->hw, err),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+		}
+	}
+	return err;
+}
+
+/**
+ * i40e_register_client - Register a i40e client driver with the L2 driver
+ * @client: pointer to the i40e_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40e_register_client(struct i40e_client *client)
+{
+	int ret = 0;
+	enum i40e_vsi_type vsi_type;
+
+	if (!client) {
+		ret = -EIO;
+		goto out;
+	}
+
+	if (strlen(client->name) == 0) {
+		pr_info("i40e: Failed to register client with no name\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	mutex_lock(&i40e_client_mutex);
+	if (i40e_client_is_registered(client)) {
+		pr_info("i40e: Client %s has already been registered!\n",
+			client->name);
+		mutex_unlock(&i40e_client_mutex);
+		ret = -EEXIST;
+		goto out;
+	}
+
+	if ((client->version.major != I40E_CLIENT_VERSION_MAJOR) ||
+	    (client->version.minor != I40E_CLIENT_VERSION_MINOR)) {
+		pr_info("i40e: Failed to register client %s due to mismatched client interface version\n",
+			client->name);
+		pr_info("Client is using version: %02d.%02d.%02d while LAN driver supports %s\n",
+			client->version.major, client->version.minor,
+			client->version.build,
+			i40e_client_interface_version_str);
+		mutex_unlock(&i40e_client_mutex);
+		ret = -EIO;
+		goto out;
+	}
+
+	vsi_type = i40e_client_type_to_vsi_type(client->type);
+	if (vsi_type == I40E_VSI_TYPE_UNKNOWN) {
+		pr_info("i40e: Failed to register client %s due to unknown client type %d\n",
+			client->name, client->type);
+		mutex_unlock(&i40e_client_mutex);
+		ret = -EIO;
+		goto out;
+	}
+	list_add(&client->list, &i40e_clients);
+	set_bit(__I40E_CLIENT_REGISTERED, &client->state);
+	mutex_unlock(&i40e_client_mutex);
+
+	if (i40e_client_prepare(client)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	pr_info("i40e: Registered client %s with return code %d\n",
+		client->name, ret);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(i40e_register_client);
+
+/**
+ * i40e_unregister_client - Unregister a i40e client driver with the L2 driver
+ * @client: pointer to the i40e_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40e_unregister_client(struct i40e_client *client)
+{
+	int ret = 0;
+
+	/* When a unregister request comes through we would have to send
+	 * a close for each of the client instances that were opened.
+	 * client_release function is called to handle this.
+	 */
+	if (!client || i40e_client_release(client)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	/* TODO: check if device is in reset, or if that matters? */
+	mutex_lock(&i40e_client_mutex);
+	if (!i40e_client_is_registered(client)) {
+		pr_info("i40e: Client %s has not been registered\n",
+			client->name);
+		mutex_unlock(&i40e_client_mutex);
+		ret = -ENODEV;
+		goto out;
+	}
+	if (atomic_read(&client->ref_cnt) == 0) {
+		clear_bit(__I40E_CLIENT_REGISTERED, &client->state);
+		list_del(&client->list);
+		pr_info("i40e: Unregistered client %s with return code %d\n",
+			client->name, ret);
+	} else {
+		ret = I40E_ERR_NOT_READY;
+		pr_err("i40e: Client %s failed unregister - client has open instances\n",
+		       client->name);
+	}
+
+	mutex_unlock(&i40e_client_mutex);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(i40e_unregister_client);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h
new file mode 100644
index 000000000000..bf6b453d93a1
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.h
@@ -0,0 +1,232 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_CLIENT_H_
+#define _I40E_CLIENT_H_
+
+#define I40E_CLIENT_STR_LENGTH 10
+
+/* Client interface version should be updated anytime there is a change in the
+ * existing APIs or data structures.
+ */
+#define I40E_CLIENT_VERSION_MAJOR 0
+#define I40E_CLIENT_VERSION_MINOR 01
+#define I40E_CLIENT_VERSION_BUILD 00
+#define I40E_CLIENT_VERSION_STR     \
+	XSTRINGIFY(I40E_CLIENT_VERSION_MAJOR) "." \
+	XSTRINGIFY(I40E_CLIENT_VERSION_MINOR) "." \
+	XSTRINGIFY(I40E_CLIENT_VERSION_BUILD)
+
+struct i40e_client_version {
+	u8 major;
+	u8 minor;
+	u8 build;
+	u8 rsvd;
+};
+
+enum i40e_client_state {
+	__I40E_CLIENT_NULL,
+	__I40E_CLIENT_REGISTERED
+};
+
+enum i40e_client_instance_state {
+	__I40E_CLIENT_INSTANCE_NONE,
+	__I40E_CLIENT_INSTANCE_OPENED,
+};
+
+enum i40e_client_type {
+	I40E_CLIENT_IWARP,
+	I40E_CLIENT_VMDQ2
+};
+
+struct i40e_ops;
+struct i40e_client;
+
+/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
+ * In order for us to keep the interface simple, SW will define a
+ * unique type value for AEQ.
+ */
+#define I40E_QUEUE_TYPE_PE_AEQ  0x80
+#define I40E_QUEUE_INVALID_IDX	0xFFFF
+
+struct i40e_qv_info {
+	u32 v_idx; /* msix_vector */
+	u16 ceq_idx;
+	u16 aeq_idx;
+	u8 itr_idx;
+};
+
+struct i40e_qvlist_info {
+	u32 num_vectors;
+	struct i40e_qv_info qv_info[1];
+};
+
+#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF
+
+/* set of LAN parameters useful for clients managed by LAN */
+
+/* Struct to hold per priority info */
+struct i40e_prio_qos_params {
+	u16 qs_handle; /* qs handle for prio */
+	u8 tc; /* TC mapped to prio */
+	u8 reserved;
+};
+
+#define I40E_CLIENT_MAX_USER_PRIORITY        8
+/* Struct to hold Client QoS */
+struct i40e_qos_params {
+	struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY];
+};
+
+struct i40e_params {
+	struct i40e_qos_params qos;
+	u16 mtu;
+};
+
+/* Structure to hold Lan device info for a client device */
+struct i40e_info {
+	struct i40e_client_version version;
+	u8 lanmac[6];
+	struct net_device *netdev;
+	struct pci_dev *pcidev;
+	u8 __iomem *hw_addr;
+	u8 fid;	/* function id, PF id or VF id */
+#define I40E_CLIENT_FTYPE_PF 0
+#define I40E_CLIENT_FTYPE_VF 1
+	u8 ftype; /* function type, PF or VF */
+	void *pf;
+
+	/* All L2 params that could change during the life span of the PF
+	 * and needs to be communicated to the client when they change
+	 */
+	struct i40e_qvlist_info *qvlist_info;
+	struct i40e_params params;
+	struct i40e_ops *ops;
+
+	u16 msix_count;	 /* number of msix vectors*/
+	/* Array down below will be dynamically allocated based on msix_count */
+	struct msix_entry *msix_entries;
+	u16 itr_index; /* Which ITR index the PE driver is suppose to use */
+	u16 fw_maj_ver;                 /* firmware major version */
+	u16 fw_min_ver;                 /* firmware minor version */
+	u32 fw_build;                   /* firmware build number */
+};
+
+#define I40E_CLIENT_RESET_LEVEL_PF   1
+#define I40E_CLIENT_RESET_LEVEL_CORE 2
+#define I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE  BIT(1)
+
+struct i40e_ops {
+	/* setup_q_vector_list enables queues with a particular vector */
+	int (*setup_qvlist)(struct i40e_info *ldev, struct i40e_client *client,
+			    struct i40e_qvlist_info *qv_info);
+
+	int (*virtchnl_send)(struct i40e_info *ldev, struct i40e_client *client,
+			     u32 vf_id, u8 *msg, u16 len);
+
+	/* If the PE Engine is unresponsive, RDMA driver can request a reset.
+	 * The level helps determine the level of reset being requested.
+	 */
+	void (*request_reset)(struct i40e_info *ldev,
+			      struct i40e_client *client, u32 level);
+
+	/* API for the RDMA driver to set certain VSI flags that control
+	 * PE Engine.
+	 */
+	int (*update_vsi_ctxt)(struct i40e_info *ldev,
+			       struct i40e_client *client,
+			       bool is_vf, u32 vf_id,
+			       u32 flag, u32 valid_flag);
+};
+
+struct i40e_client_ops {
+	/* Should be called from register_client() or whenever PF is ready
+	 * to create a specific client instance.
+	 */
+	int (*open)(struct i40e_info *ldev, struct i40e_client *client);
+
+	/* Should be called when netdev is unavailable or when unregister
+	 * call comes in. If the close is happenening due to a reset being
+	 * triggered set the reset bit to true.
+	 */
+	void (*close)(struct i40e_info *ldev, struct i40e_client *client,
+		      bool reset);
+
+	/* called when some l2 managed parameters changes - mtu */
+	void (*l2_param_change)(struct i40e_info *ldev,
+				struct i40e_client *client,
+				struct i40e_params *params);
+
+	int (*virtchnl_receive)(struct i40e_info *ldev,
+				struct i40e_client *client, u32 vf_id,
+				u8 *msg, u16 len);
+
+	/* called when a VF is reset by the PF */
+	void (*vf_reset)(struct i40e_info *ldev,
+			 struct i40e_client *client, u32 vf_id);
+
+	/* called when the number of VFs changes */
+	void (*vf_enable)(struct i40e_info *ldev,
+			  struct i40e_client *client, u32 num_vfs);
+
+	/* returns true if VF is capable of specified offload */
+	int (*vf_capable)(struct i40e_info *ldev,
+			  struct i40e_client *client, u32 vf_id);
+};
+
+/* Client device */
+struct i40e_client_instance {
+	struct list_head list;
+	struct i40e_info lan_info;
+	struct i40e_client *client;
+	unsigned long  state;
+	/* A count of all the in-progress calls to the client */
+	atomic_t ref_cnt;
+};
+
+struct i40e_client {
+	struct list_head list;		/* list of registered clients */
+	char name[I40E_CLIENT_STR_LENGTH];
+	struct i40e_client_version version;
+	unsigned long state;		/* client state */
+	atomic_t ref_cnt;  /* Count of all the client devices of this kind */
+	u32 flags;
+#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE	BIT(0)
+#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS	BIT(2)
+	enum i40e_client_type type;
+	struct i40e_client_ops *ops;	/* client ops provided by the client */
+};
+
+static inline bool i40e_client_is_registered(struct i40e_client *client)
+{
+	return test_bit(__I40E_CLIENT_REGISTERED, &client->state);
+}
+
+/* used by clients */
+int i40e_register_client(struct i40e_client *client);
+int i40e_unregister_client(struct i40e_client *client);
+
+#endif /* _I40E_CLIENT_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 70d9605a0d9e..67006431726a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -289,7 +289,7 @@ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
  *
  * If not already scheduled, this puts the task into the work queue
  **/
-static void i40e_service_event_schedule(struct i40e_pf *pf)
+void i40e_service_event_schedule(struct i40e_pf *pf)
 {
 	if (!test_bit(__I40E_DOWN, &pf->state) &&
 	    !test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) &&
@@ -2230,7 +2230,7 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
 	netdev->mtu = new_mtu;
 	if (netif_running(netdev))
 		i40e_vsi_reinit_locked(vsi);
-
+	i40e_notify_client_of_l2_param_changes(vsi);
 	return 0;
 }
 
@@ -4169,6 +4169,9 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
 		free_irq(pf->msix_entries[0].vector, pf);
 	}
 
+	i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
+		      I40E_IWARP_IRQ_PILE_ID);
+
 	i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
 	for (i = 0; i < pf->num_alloc_vsi; i++)
 		if (pf->vsi[i])
@@ -4212,12 +4215,17 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi)
  **/
 static void i40e_vsi_close(struct i40e_vsi *vsi)
 {
+	bool reset = false;
+
 	if (!test_and_set_bit(__I40E_DOWN, &vsi->state))
 		i40e_down(vsi);
 	i40e_vsi_free_irq(vsi);
 	i40e_vsi_free_tx_resources(vsi);
 	i40e_vsi_free_rx_resources(vsi);
 	vsi->current_netdev_flags = 0;
+	if (test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
+		reset = true;
+	i40e_notify_client_of_netdev_close(vsi, reset);
 }
 
 /**
@@ -4850,6 +4858,12 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
 	ctxt.info = vsi->info;
 	i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
 
+	if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+		ctxt.info.valid_sections |=
+				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+		ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+	}
+
 	/* Update the VSI after updating the VSI queue-mapping information */
 	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
 	if (ret) {
@@ -4993,6 +5007,7 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf)
 			if (pf->vsi[v]->netdev)
 				i40e_dcbnl_set_all(pf->vsi[v]);
 		}
+		i40e_notify_client_of_l2_param_changes(pf->vsi[v]);
 	}
 }
 
@@ -5191,6 +5206,11 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
 		}
 		i40e_fdir_filter_restore(vsi);
 	}
+
+	/* On the next run of the service_task, notify any clients of the new
+	 * opened netdev
+	 */
+	pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
 	i40e_service_event_schedule(pf);
 
 	return 0;
@@ -5379,6 +5399,8 @@ int i40e_open(struct net_device *netdev)
 		geneve_get_rx_port(netdev);
 #endif
 
+	i40e_notify_client_of_netdev_open(vsi);
+
 	return 0;
 }
 
@@ -6043,6 +6065,7 @@ static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up)
 	case I40E_VSI_SRIOV:
 	case I40E_VSI_VMDQ2:
 	case I40E_VSI_CTRL:
+	case I40E_VSI_IWARP:
 	case I40E_VSI_MIRROR:
 	default:
 		/* there is no notification for other VSIs */
@@ -7148,6 +7171,7 @@ static void i40e_service_task(struct work_struct *work)
 	i40e_vc_process_vflr_event(pf);
 	i40e_watchdog_subtask(pf);
 	i40e_fdir_reinit_subtask(pf);
+	i40e_client_subtask(pf);
 	i40e_sync_filters_subtask(pf);
 	i40e_sync_udp_filters_subtask(pf);
 	i40e_clean_adminq_subtask(pf);
@@ -7550,6 +7574,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 	int vectors_left;
 	int v_budget, i;
 	int v_actual;
+	int iwarp_requested = 0;
 
 	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
 		return -ENODEV;
@@ -7563,6 +7588,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 	 *		is governed by number of cpus in the system.
 	 *	- assumes symmetric Tx/Rx pairing
 	 *   - The number of VMDq pairs
+	 *   - The CPU count within the NUMA node if iWARP is enabled
 #ifdef I40E_FCOE
 	 *   - The number of FCOE qps.
 #endif
@@ -7609,6 +7635,16 @@ static int i40e_init_msix(struct i40e_pf *pf)
 	}
 
 #endif
+	/* can we reserve enough for iWARP? */
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+		if (!vectors_left)
+			pf->num_iwarp_msix = 0;
+		else if (vectors_left < pf->num_iwarp_msix)
+			pf->num_iwarp_msix = 1;
+		v_budget += pf->num_iwarp_msix;
+		vectors_left -= pf->num_iwarp_msix;
+	}
+
 	/* any vectors left over go for VMDq support */
 	if (pf->flags & I40E_FLAG_VMDQ_ENABLED) {
 		int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps;
@@ -7643,6 +7679,8 @@ static int i40e_init_msix(struct i40e_pf *pf)
 		 * of these features based on the policy and at the end disable
 		 * the features that did not get any vectors.
 		 */
+		iwarp_requested = pf->num_iwarp_msix;
+		pf->num_iwarp_msix = 0;
 #ifdef I40E_FCOE
 		pf->num_fcoe_qps = 0;
 		pf->num_fcoe_msix = 0;
@@ -7681,17 +7719,33 @@ static int i40e_init_msix(struct i40e_pf *pf)
 			pf->num_lan_msix = 1;
 			break;
 		case 3:
+			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+				pf->num_lan_msix = 1;
+				pf->num_iwarp_msix = 1;
+			} else {
+				pf->num_lan_msix = 2;
+			}
 #ifdef I40E_FCOE
 			/* give one vector to FCoE */
 			if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
 				pf->num_lan_msix = 1;
 				pf->num_fcoe_msix = 1;
 			}
-#else
-			pf->num_lan_msix = 2;
 #endif
 			break;
 		default:
+			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+				pf->num_iwarp_msix = min_t(int, (vec / 3),
+						 iwarp_requested);
+				pf->num_vmdq_vsis = min_t(int, (vec / 3),
+						  I40E_DEFAULT_NUM_VMDQ_VSI);
+			} else {
+				pf->num_vmdq_vsis = min_t(int, (vec / 2),
+						  I40E_DEFAULT_NUM_VMDQ_VSI);
+			}
+			pf->num_lan_msix = min_t(int,
+			       (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)),
+							      pf->num_lan_msix);
 #ifdef I40E_FCOE
 			/* give one vector to FCoE */
 			if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
@@ -7699,8 +7753,6 @@ static int i40e_init_msix(struct i40e_pf *pf)
 				vec--;
 			}
 #endif
-			/* give the rest to the PF */
-			pf->num_lan_msix = min_t(int, vec, pf->num_lan_qps);
 			break;
 		}
 	}
@@ -7710,6 +7762,12 @@ static int i40e_init_msix(struct i40e_pf *pf)
 		dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n");
 		pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
 	}
+
+	if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
+	    (pf->num_iwarp_msix == 0)) {
+		dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n");
+		pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+	}
 #ifdef I40E_FCOE
 
 	if ((pf->flags & I40E_FLAG_FCOE_ENABLED) && (pf->num_fcoe_msix == 0)) {
@@ -7801,6 +7859,7 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
 		vectors = i40e_init_msix(pf);
 		if (vectors < 0) {
 			pf->flags &= ~(I40E_FLAG_MSIX_ENABLED	|
+				       I40E_FLAG_IWARP_ENABLED	|
 #ifdef I40E_FCOE
 				       I40E_FLAG_FCOE_ENABLED	|
 #endif
@@ -8474,6 +8533,12 @@ static int i40e_sw_init(struct i40e_pf *pf)
 		pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
 	}
 
+	if (pf->hw.func_caps.iwarp) {
+		pf->flags |= I40E_FLAG_IWARP_ENABLED;
+		/* IWARP needs one extra vector for CQP just like MISC.*/
+		pf->num_iwarp_msix = (int)num_online_cpus() + 1;
+	}
+
 #ifdef I40E_FCOE
 	i40e_init_pf_fcoe(pf);
 
@@ -9328,6 +9393,13 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
 		}
 
+		if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+			ctxt.info.valid_sections |=
+				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+			ctxt.info.queueing_opt_flags |=
+						I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+		}
+
 		ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
 		ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL;
 		if (pf->vf[vsi->vf_id].spoofchk) {
@@ -9351,6 +9423,10 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 		break;
 
 #endif /* I40E_FCOE */
+	case I40E_VSI_IWARP:
+		/* send down message to iWARP */
+		break;
+
 	default:
 		return -ENODEV;
 	}
@@ -10467,6 +10543,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 
 		/* make sure all the fancies are disabled */
 		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
+			       I40E_FLAG_IWARP_ENABLED	|
 #ifdef I40E_FCOE
 			       I40E_FLAG_FCOE_ENABLED	|
 #endif
@@ -10484,6 +10561,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 		queues_left -= pf->num_lan_qps;
 
 		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
+			       I40E_FLAG_IWARP_ENABLED	|
 #ifdef I40E_FCOE
 			       I40E_FLAG_FCOE_ENABLED	|
 #endif
@@ -11059,7 +11137,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 #endif /* CONFIG_PCI_IOV */
 
-	pfs_found++;
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+		pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile,
+						      pf->num_iwarp_msix,
+						      I40E_IWARP_IRQ_PILE_ID);
+		if (pf->iwarp_base_vector < 0) {
+			dev_info(&pdev->dev,
+				 "failed to get tracking for %d vectors for IWARP err=%d\n",
+				 pf->num_iwarp_msix, pf->iwarp_base_vector);
+			pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+		}
+	}
 
 	i40e_dbg_pf_init(pf);
 
@@ -11070,6 +11158,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	mod_timer(&pf->service_timer,
 		  round_jiffies(jiffies + pf->service_timer_period));
 
+	/* add this PF to client device list and launch a client service task */
+	err = i40e_lan_add_device(pf);
+	if (err)
+		dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
+			 err);
+
 #ifdef I40E_FCOE
 	/* create FCoE interface */
 	i40e_fcoe_vsi_setup(pf);
@@ -11245,6 +11339,13 @@ static void i40e_remove(struct pci_dev *pdev)
 	if (pf->vsi[pf->lan_vsi])
 		i40e_vsi_release(pf->vsi[pf->lan_vsi]);
 
+	/* remove attached clients */
+	ret_code = i40e_lan_del_device(pf);
+	if (ret_code) {
+		dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
+			 ret_code);
+	}
+
 	/* shutdown and destroy the HMC */
 	if (hw->hmc.hmc_obj) {
 		ret_code = i40e_shutdown_lan_hmc(hw);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 0a0baf71041b..3335f9d13374 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -78,7 +78,7 @@ enum i40e_debug_mask {
 	I40E_DEBUG_DCB			= 0x00000400,
 	I40E_DEBUG_DIAG			= 0x00000800,
 	I40E_DEBUG_FD			= 0x00001000,
-
+	I40E_DEBUG_IWARP		= 0x00F00000,
 	I40E_DEBUG_AQ_MESSAGE		= 0x01000000,
 	I40E_DEBUG_AQ_DESCRIPTOR	= 0x02000000,
 	I40E_DEBUG_AQ_DESC_BUFFER	= 0x04000000,
@@ -160,6 +160,7 @@ enum i40e_vsi_type {
 	I40E_VSI_MIRROR	= 5,
 	I40E_VSI_SRIOV	= 6,
 	I40E_VSI_FDIR	= 7,
+	I40E_VSI_IWARP	= 8,
 	I40E_VSI_TYPE_UNKNOWN
 };
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
index 3226946bf3d4..ab866cf3dc18 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
@@ -81,6 +81,9 @@ enum i40e_virtchnl_ops {
 	I40E_VIRTCHNL_OP_GET_STATS = 15,
 	I40E_VIRTCHNL_OP_FCOE = 16,
 	I40E_VIRTCHNL_OP_EVENT = 17,
+	I40E_VIRTCHNL_OP_IWARP = 20,
+	I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21,
+	I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22,
 };
 
 /* Virtual channel message descriptor. This overlays the admin queue
@@ -348,6 +351,37 @@ struct i40e_virtchnl_pf_event {
 	int severity;
 };
 
+/* I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP
+ * VF uses this message to request PF to map IWARP vectors to IWARP queues.
+ * The request for this originates from the VF IWARP driver through
+ * a client interface between VF LAN and VF IWARP driver.
+ * A vector could have an AEQ and CEQ attached to it although
+ * there is a single AEQ per VF IWARP instance in which case
+ * most vectors will have an INVALID_IDX for aeq and valid idx for ceq.
+ * There will never be a case where there will be multiple CEQs attached
+ * to a single vector.
+ * PF configures interrupt mapping and returns status.
+ */
+
+/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
+ * In order for us to keep the interface simple, SW will define a
+ * unique type value for AEQ.
+*/
+#define I40E_QUEUE_TYPE_PE_AEQ  0x80
+#define I40E_QUEUE_INVALID_IDX  0xFFFF
+
+struct i40e_virtchnl_iwarp_qv_info {
+	u32 v_idx; /* msix_vector */
+	u16 ceq_idx;
+	u16 aeq_idx;
+	u8 itr_idx;
+};
+
+struct i40e_virtchnl_iwarp_qvlist_info {
+	u32 num_vectors;
+	struct i40e_virtchnl_iwarp_qv_info qv_info[1];
+};
+
 /* VF reset states - these are written into the RSTAT register:
  * I40E_VFGEN_RSTAT1 on the PF
  * I40E_VFGEN_RSTAT on the VF
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index acd2693a4e97..816c6bbf7093 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -352,6 +352,136 @@ irq_list_done:
 }
 
 /**
+ * i40e_release_iwarp_qvlist
+ * @vf: pointer to the VF.
+ *
+ **/
+static void i40e_release_iwarp_qvlist(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info = vf->qvlist_info;
+	u32 msix_vf;
+	u32 i;
+
+	if (!vf->qvlist_info)
+		return;
+
+	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		struct i40e_virtchnl_iwarp_qv_info *qv_info;
+		u32 next_q_index, next_q_type;
+		struct i40e_hw *hw = &pf->hw;
+		u32 v_idx, reg_idx, reg;
+
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		v_idx = qv_info->v_idx;
+		if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) {
+			/* Figure out the queue after CEQ and make that the
+			 * first queue.
+			 */
+			reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx;
+			reg = rd32(hw, I40E_VPINT_CEQCTL(reg_idx));
+			next_q_index = (reg & I40E_VPINT_CEQCTL_NEXTQ_INDX_MASK)
+					>> I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT;
+			next_q_type = (reg & I40E_VPINT_CEQCTL_NEXTQ_TYPE_MASK)
+					>> I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT;
+
+			reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+			reg = (next_q_index &
+			       I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+			       (next_q_type <<
+			       I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+
+			wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg);
+		}
+	}
+	kfree(vf->qvlist_info);
+	vf->qvlist_info = NULL;
+}
+
+/**
+ * i40e_config_iwarp_qvlist
+ * @vf: pointer to the VF info
+ * @qvlist_info: queue and vector list
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
+				    struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_virtchnl_iwarp_qv_info *qv_info;
+	u32 v_idx, i, reg_idx, reg;
+	u32 next_q_idx, next_q_type;
+	u32 msix_vf, size;
+
+	size = sizeof(struct i40e_virtchnl_iwarp_qvlist_info) +
+	       (sizeof(struct i40e_virtchnl_iwarp_qv_info) *
+						(qvlist_info->num_vectors - 1));
+	vf->qvlist_info = kzalloc(size, GFP_KERNEL);
+	vf->qvlist_info->num_vectors = qvlist_info->num_vectors;
+
+	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		v_idx = qv_info->v_idx;
+
+		/* Validate vector id belongs to this vf */
+		if (!i40e_vc_isvalid_vector_id(vf, v_idx))
+			goto err;
+
+		vf->qvlist_info->qv_info[i] = *qv_info;
+
+		reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+		/* We might be sharing the interrupt, so get the first queue
+		 * index and type, push it down the list by adding the new
+		 * queue on top. Also link it with the new queue in CEQCTL.
+		 */
+		reg = rd32(hw, I40E_VPINT_LNKLSTN(reg_idx));
+		next_q_idx = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) >>
+				I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT);
+		next_q_type = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK) >>
+				I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+
+		if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) {
+			reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx;
+			reg = (I40E_VPINT_CEQCTL_CAUSE_ENA_MASK |
+			(v_idx << I40E_VPINT_CEQCTL_MSIX_INDX_SHIFT) |
+			(qv_info->itr_idx << I40E_VPINT_CEQCTL_ITR_INDX_SHIFT) |
+			(next_q_type << I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT) |
+			(next_q_idx << I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT));
+			wr32(hw, I40E_VPINT_CEQCTL(reg_idx), reg);
+
+			reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+			reg = (qv_info->ceq_idx &
+			       I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+			       (I40E_QUEUE_TYPE_PE_CEQ <<
+			       I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+			wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg);
+		}
+
+		if (qv_info->aeq_idx != I40E_QUEUE_INVALID_IDX) {
+			reg = (I40E_VPINT_AEQCTL_CAUSE_ENA_MASK |
+			(v_idx << I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT) |
+			(qv_info->itr_idx << I40E_VPINT_AEQCTL_ITR_INDX_SHIFT));
+
+			wr32(hw, I40E_VPINT_AEQCTL(vf->vf_id), reg);
+		}
+	}
+
+	return 0;
+err:
+	kfree(vf->qvlist_info);
+	vf->qvlist_info = NULL;
+	return -EINVAL;
+}
+
+/**
  * i40e_config_vsi_tx_queue
  * @vf: pointer to the VF info
  * @vsi_id: id of VSI as provided by the FW
@@ -850,9 +980,11 @@ complete_reset:
 	/* reallocate VF resources to reset the VSI state */
 	i40e_free_vf_res(vf);
 	if (!i40e_alloc_vf_res(vf)) {
+		int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
 		i40e_enable_vf_mappings(vf);
 		set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states);
 		clear_bit(I40E_VF_STAT_DISABLED, &vf->vf_states);
+		i40e_notify_client_of_vf_reset(pf, abs_vf_id);
 	}
 	/* tell the VF the reset is done */
 	wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE);
@@ -877,11 +1009,7 @@ void i40e_free_vfs(struct i40e_pf *pf)
 	while (test_and_set_bit(__I40E_VF_DISABLE, &pf->state))
 		usleep_range(1000, 2000);
 
-	for (i = 0; i < pf->num_alloc_vfs; i++)
-		if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states))
-			i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx],
-					       false);
-
+	i40e_notify_client_of_vf_enable(pf, 0);
 	for (i = 0; i < pf->num_alloc_vfs; i++)
 		if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states))
 			i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx],
@@ -953,6 +1081,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
 			goto err_iov;
 		}
 	}
+	i40e_notify_client_of_vf_enable(pf, num_alloc_vfs);
 	/* allocate memory */
 	vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL);
 	if (!vfs) {
@@ -1206,6 +1335,13 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (!vsi->info.pvid)
 		vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_VLAN;
+
+	if (i40e_vf_client_capable(pf, vf->vf_id, I40E_CLIENT_IWARP) &&
+	    (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_IWARP)) {
+		vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_IWARP;
+		set_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states);
+	}
+
 	if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) {
 		if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ)
 			vfres->vf_offload_flags |=
@@ -1827,6 +1963,72 @@ error_param:
 }
 
 /**
+ * i40e_vc_iwarp_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the VF for the iwarp msgs
+ **/
+static int i40e_vc_iwarp_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+	struct i40e_pf *pf = vf->pf;
+	int abs_vf_id = vf->vf_id + pf->hw.func_caps.vf_base_id;
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+	    !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto error_param;
+	}
+
+	i40e_notify_client_of_vf_msg(pf->vsi[pf->lan_vsi], abs_vf_id,
+				     msg, msglen);
+
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_IWARP,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_iwarp_qvmap_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @config: config qvmap or release it
+ *
+ * called from the VF for the iwarp msgs
+ **/
+static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen,
+				   bool config)
+{
+	struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info =
+				(struct i40e_virtchnl_iwarp_qvlist_info *)msg;
+	i40e_status aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+	    !test_bit(I40E_VF_STAT_IWARPENA, &vf->vf_states)) {
+		aq_ret = I40E_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (config) {
+		if (i40e_config_iwarp_qvlist(vf, qvlist_info))
+			aq_ret = I40E_ERR_PARAM;
+	} else {
+		i40e_release_iwarp_qvlist(vf);
+	}
+
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf,
+			       config ? I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP :
+			       I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP,
+			       aq_ret);
+}
+
+/**
  * i40e_vc_validate_vf_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -1921,6 +2123,32 @@ static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
 	case I40E_VIRTCHNL_OP_GET_STATS:
 		valid_len = sizeof(struct i40e_virtchnl_queue_select);
 		break;
+	case I40E_VIRTCHNL_OP_IWARP:
+		/* These messages are opaque to us and will be validated in
+		 * the RDMA client code. We just need to check for nonzero
+		 * length. The firmware will enforce max length restrictions.
+		 */
+		if (msglen)
+			valid_len = msglen;
+		else
+			err_msg_format = true;
+		break;
+	case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
+		valid_len = 0;
+		break;
+	case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+		valid_len = sizeof(struct i40e_virtchnl_iwarp_qvlist_info);
+		if (msglen >= valid_len) {
+			struct i40e_virtchnl_iwarp_qvlist_info *qv =
+				(struct i40e_virtchnl_iwarp_qvlist_info *)msg;
+			if (qv->num_vectors == 0) {
+				err_msg_format = true;
+				break;
+			}
+			valid_len += ((qv->num_vectors - 1) *
+				sizeof(struct i40e_virtchnl_iwarp_qv_info));
+		}
+		break;
 	/* These are always errors coming from the VF. */
 	case I40E_VIRTCHNL_OP_EVENT:
 	case I40E_VIRTCHNL_OP_UNKNOWN:
@@ -2010,6 +2238,15 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode,
 	case I40E_VIRTCHNL_OP_GET_STATS:
 		ret = i40e_vc_get_stats_msg(vf, msg, msglen);
 		break;
+	case I40E_VIRTCHNL_OP_IWARP:
+		ret = i40e_vc_iwarp_msg(vf, msg, msglen);
+		break;
+	case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP:
+		ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, true);
+		break;
+	case I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP:
+		ret = i40e_vc_iwarp_qvmap_msg(vf, msg, msglen, false);
+		break;
 	case I40E_VIRTCHNL_OP_UNKNOWN:
 	default:
 		dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index e74642a0c42e..e7b2fba0309e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -58,6 +58,7 @@ enum i40e_queue_ctrl {
 enum i40e_vf_states {
 	I40E_VF_STAT_INIT = 0,
 	I40E_VF_STAT_ACTIVE,
+	I40E_VF_STAT_IWARPENA,
 	I40E_VF_STAT_FCOEENA,
 	I40E_VF_STAT_DISABLED,
 };
@@ -66,6 +67,7 @@ enum i40e_vf_states {
 enum i40e_vf_capabilities {
 	I40E_VIRTCHNL_VF_CAP_PRIVILEGE = 0,
 	I40E_VIRTCHNL_VF_CAP_L2,
+	I40E_VIRTCHNL_VF_CAP_IWARP,
 };
 
 /* VF information structure */
@@ -106,6 +108,8 @@ struct i40e_vf {
 	bool link_forced;
 	bool link_up;		/* only valid if VF link is forced */
 	bool spoofchk;
+	/* RDMA Client */
+	struct i40e_virtchnl_iwarp_qvlist_info *qvlist_info;
 };
 
 void i40e_free_vfs(struct i40e_pf *pf);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 7f2126b6a179..e0b68afea56e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1690,8 +1690,8 @@ static int mtk_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	eth->base = devm_ioremap_resource(&pdev->dev, res);
-	if (!eth->base)
-		return -EADDRNOTAVAIL;
+	if (IS_ERR(eth->base))
+		return PTR_ERR(eth->base);
 
 	spin_lock_init(&eth->page_lock);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 42d8de892bfe..6aa73972d478 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -39,8 +39,6 @@
 
 #include "mlx4.h"
 
-static const u8 zero_gid[16];	/* automatically initialized to 0 */
-
 int mlx4_get_mgm_entry_size(struct mlx4_dev *dev)
 {
 	return 1 << dev->oper_log_mgm_entry_size;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 97f5114fc113..eb926e1ee71c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -407,6 +407,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 const char *mlx5_command_str(int command)
 {
 	switch (command) {
+	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+		return "QUERY_HCA_VPORT_CONTEXT";
+
+	case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT:
+		return "MODIFY_HCA_VPORT_CONTEXT";
+
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
 		return "QUERY_HCA_CAP";
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index aa1ab4702385..75c7ae6a5cc4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -98,88 +98,55 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 {
 	int err;
 
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR);
-	if (err)
-		return err;
-
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX);
+	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
 	if (err)
 		return err;
 
 	if (MLX5_CAP_GEN(dev, eth_net_offloads)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, pg)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ODP,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ODP,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, atomic)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, roce)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, vport_group_manager) &&
 	    MLX5_CAP_GEN(dev, eswitch_flow_table)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_CUR);
-		if (err)
-			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE,
-					 HCA_CAP_OPMOD_GET_MAX);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
 		if (err)
 			return err;
 	}
 
 	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
-					 HCA_CAP_OPMOD_GET_CUR);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
 		if (err)
 			return err;
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH,
-					 HCA_CAP_OPMOD_GET_MAX);
+	}
+
+	if (MLX5_CAP_GEN(dev, vector_calc)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_VECTOR_CALC);
 		if (err)
 			return err;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 72a94e72ee25..3f3b2fae4991 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -341,8 +341,9 @@ static u16 to_fw_pkey_sz(u32 size)
 	}
 }
 
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type,
-		       enum mlx5_cap_mode cap_mode)
+static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
+				   enum mlx5_cap_type cap_type,
+				   enum mlx5_cap_mode cap_mode)
 {
 	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
 	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
@@ -392,6 +393,16 @@ query_ex:
 	return err;
 }
 
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
+{
+	int ret;
+
+	ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
+	if (ret)
+		return ret;
+	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
+}
+
 static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
 {
 	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
@@ -419,8 +430,7 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
 	int err;
 
 	if (MLX5_CAP_GEN(dev, atomic)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
-					 HCA_CAP_OPMOD_GET_CUR);
+		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
 		if (err)
 			return err;
 	} else {
@@ -462,11 +472,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 	if (!set_ctx)
 		goto query_ex;
 
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_MAX);
-	if (err)
-		goto query_ex;
-
-	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL, HCA_CAP_OPMOD_GET_CUR);
+	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
 	if (err)
 		goto query_ex;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 90ab09e375b8..bd518405859e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -852,7 +852,8 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
 
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
-				  u8 port_num, void *out, size_t out_sz)
+				  int vf, u8 port_num, void *out,
+				  size_t out_sz)
 {
 	int	in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
 	int	is_group_manager;
@@ -871,7 +872,7 @@ int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
 	if (other_vport) {
 		if (is_group_manager) {
 			MLX5_SET(query_vport_counter_in, in, other_vport, 1);
-			MLX5_SET(query_vport_counter_in, in, vport_number, 0);
+			MLX5_SET(query_vport_counter_in, in, vport_number, vf + 1);
 		} else {
 			err = -EPERM;
 			goto free;
@@ -890,3 +891,70 @@ free:
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
+
+int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
+				       u8 other_vport, u8 port_num,
+				       int vf,
+				       struct mlx5_hca_vport_context *req)
+{
+	int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in);
+	u8 out[MLX5_ST_SZ_BYTES(modify_hca_vport_context_out)];
+	int is_group_manager;
+	void *in;
+	int err;
+	void *ctx;
+
+	mlx5_core_dbg(dev, "vf %d\n", vf);
+	is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
+	in = kzalloc(in_sz, GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	memset(out, 0, sizeof(out));
+	MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT);
+	if (other_vport) {
+		if (is_group_manager) {
+			MLX5_SET(modify_hca_vport_context_in, in, other_vport, 1);
+			MLX5_SET(modify_hca_vport_context_in, in, vport_number, vf);
+		} else {
+			err = -EPERM;
+			goto ex;
+		}
+	}
+
+	if (MLX5_CAP_GEN(dev, num_ports) > 1)
+		MLX5_SET(modify_hca_vport_context_in, in, port_num, port_num);
+
+	ctx = MLX5_ADDR_OF(modify_hca_vport_context_in, in, hca_vport_context);
+	MLX5_SET(hca_vport_context, ctx, field_select, req->field_select);
+	MLX5_SET(hca_vport_context, ctx, sm_virt_aware, req->sm_virt_aware);
+	MLX5_SET(hca_vport_context, ctx, has_smi, req->has_smi);
+	MLX5_SET(hca_vport_context, ctx, has_raw, req->has_raw);
+	MLX5_SET(hca_vport_context, ctx, vport_state_policy, req->policy);
+	MLX5_SET(hca_vport_context, ctx, port_physical_state, req->phys_state);
+	MLX5_SET(hca_vport_context, ctx, vport_state, req->vport_state);
+	MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid);
+	MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid);
+	MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1);
+	MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, req->cap_mask1_perm);
+	MLX5_SET(hca_vport_context, ctx, cap_mask2, req->cap_mask2);
+	MLX5_SET(hca_vport_context, ctx, cap_mask2_field_select, req->cap_mask2_perm);
+	MLX5_SET(hca_vport_context, ctx, lid, req->lid);
+	MLX5_SET(hca_vport_context, ctx, init_type_reply, req->init_type_reply);
+	MLX5_SET(hca_vport_context, ctx, lmc, req->lmc);
+	MLX5_SET(hca_vport_context, ctx, subnet_timeout, req->subnet_timeout);
+	MLX5_SET(hca_vport_context, ctx, sm_lid, req->sm_lid);
+	MLX5_SET(hca_vport_context, ctx, sm_sl, req->sm_sl);
+	MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter);
+	MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter);
+	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+	if (err)
+		goto ex;
+
+	err = mlx5_cmd_status_to_err_v2(out);
+
+ex:
+	kfree(in);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index ab264e1bccd0..75683fb26734 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -45,7 +45,7 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
-#include <asm-generic/io-64-nonatomic-hi-lo.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
 
 #include "nfp_net_ctrl.h"
 
diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 3f5711061432..a733868a43aa 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -1791,9 +1791,11 @@ static int smc911x_probe(struct net_device *dev)
 	unsigned int val, chip_id, revision;
 	const char *version_string;
 	unsigned long irq_flags;
+#ifdef SMC_USE_DMA
 	struct dma_slave_config	config;
 	dma_cap_mask_t mask;
 	struct pxad_param param;
+#endif
 
 	DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__);
 
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 192631a345df..bc168894bda3 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -843,8 +843,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
 	if (info) {
 		fl6->daddr = info->key.u.ipv6.dst;
 		fl6->saddr = info->key.u.ipv6.src;
-		fl6->flowi6_tos = RT_TOS(info->key.tos);
-		fl6->flowlabel = info->key.label;
+		fl6->flowlabel = ip6_make_flowinfo(RT_TOS(info->key.tos),
+						   info->key.label);
 		dst_cache = &info->dst_cache;
 	} else {
 		prio = geneve->tos;
@@ -855,8 +855,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
 			use_cache = false;
 		}
 
-		fl6->flowi6_tos = RT_TOS(prio);
-		fl6->flowlabel = geneve->label;
+		fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
+						   geneve->label);
 		fl6->daddr = geneve->remote.sin6.sin6_addr;
 		dst_cache = &geneve->dst_cache;
 	}
@@ -1049,7 +1049,8 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
 		if (unlikely(err))
 			goto err;
 
-		prio = ip_tunnel_ecn_encap(fl6.flowi6_tos, iip, skb);
+		prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
+					   iip, skb);
 		ttl = geneve->ttl;
 		if (!ttl && ipv6_addr_is_multicast(&fl6.daddr))
 			ttl = 1;
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index b4c68783dfc3..8b3bd8ecd1c4 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -619,6 +619,7 @@ struct nvsp_message {
 #define NETVSC_PACKET_SIZE                      4096
 
 #define VRSS_SEND_TAB_SIZE 16
+#define VRSS_CHANNEL_MAX 64
 
 #define RNDIS_MAX_PKT_DEFAULT 8
 #define RNDIS_PKT_ALIGN_DEFAULT 8
@@ -700,13 +701,13 @@ struct netvsc_device {
 
 	struct net_device *ndev;
 
-	struct vmbus_channel *chn_table[NR_CPUS];
+	struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX];
 	u32 send_table[VRSS_SEND_TAB_SIZE];
 	u32 max_chn;
 	u32 num_chn;
 	spinlock_t sc_lock; /* Protects num_sc_offered variable */
 	u32 num_sc_offered;
-	atomic_t queue_sends[NR_CPUS];
+	atomic_t queue_sends[VRSS_CHANNEL_MAX];
 
 	/* Holds rndis device info */
 	void *extension;
@@ -718,7 +719,7 @@ struct netvsc_device {
 	/* The sub channel callback buffer */
 	unsigned char *sub_cb_buf;
 
-	struct multi_send_data msd[NR_CPUS];
+	struct multi_send_data msd[VRSS_CHANNEL_MAX];
 	u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
 	u32 pkt_align; /* alignment bytes, e.g. 8 */
 
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 08608499fa17..b8121eba33ff 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -858,6 +858,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	struct netvsc_device *nvdev = hv_get_drvdata(hdev);
 	struct netvsc_device_info device_info;
 	int limit = ETH_DATA_LEN;
+	u32 num_chn;
 	int ret = 0;
 
 	if (nvdev == NULL || nvdev->destroy)
@@ -873,6 +874,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	if (ret)
 		goto out;
 
+	num_chn = nvdev->num_chn;
+
 	nvdev->start_remove = true;
 	rndis_filter_device_remove(hdev);
 
@@ -883,7 +886,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.ring_size = ring_size;
-	device_info.num_chn = nvdev->num_chn;
+	device_info.num_chn = num_chn;
 	device_info.max_num_vrss_chns = max_num_vrss_chns;
 	rndis_filter_device_add(hdev, &device_info);
 
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 47d07c576a34..c4e1e0408433 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -986,12 +986,6 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 
 	nvscdev = hv_get_drvdata(new_sc->primary_channel->device_obj);
 
-	spin_lock_irqsave(&nvscdev->sc_lock, flags);
-	nvscdev->num_sc_offered--;
-	spin_unlock_irqrestore(&nvscdev->sc_lock, flags);
-	if (nvscdev->num_sc_offered == 0)
-		complete(&nvscdev->channel_init_wait);
-
 	if (chn_index >= nvscdev->num_chn)
 		return;
 
@@ -1004,6 +998,12 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 
 	if (ret == 0)
 		nvscdev->chn_table[chn_index] = new_sc;
+
+	spin_lock_irqsave(&nvscdev->sc_lock, flags);
+	nvscdev->num_sc_offered--;
+	spin_unlock_irqrestore(&nvscdev->sc_lock, flags);
+	if (nvscdev->num_sc_offered == 0)
+		complete(&nvscdev->channel_init_wait);
 }
 
 int rndis_filter_device_add(struct hv_device *dev,
@@ -1113,9 +1113,9 @@ int rndis_filter_device_add(struct hv_device *dev,
 	if (ret || rsscap.num_recv_que < 2)
 		goto out;
 
-	num_rss_qs = min(device_info->max_num_vrss_chns, rsscap.num_recv_que);
+	net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, rsscap.num_recv_que);
 
-	net_device->max_chn = rsscap.num_recv_que;
+	num_rss_qs = min(device_info->max_num_vrss_chns, net_device->max_chn);
 
 	/*
 	 * We will limit the VRSS channels to the number CPUs in the NUMA node
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 1e901c7cfaac..b3ffaee30858 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -277,12 +277,16 @@ static int at803x_probe(struct phy_device *phydev)
 	if (!priv)
 		return -ENOMEM;
 
-	gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+	if (phydev->drv->phy_id != ATH8030_PHY_ID)
+		goto does_not_require_reset_workaround;
+
+	gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
 	if (IS_ERR(gpiod_reset))
 		return PTR_ERR(gpiod_reset);
 
 	priv->gpiod_reset = gpiod_reset;
 
+does_not_require_reset_workaround:
 	phydev->priv = priv;
 
 	return 0;
@@ -362,10 +366,10 @@ static void at803x_link_change_notify(struct phy_device *phydev)
 
 				at803x_context_save(phydev, &context);
 
-				gpiod_set_value(priv->gpiod_reset, 0);
-				msleep(1);
 				gpiod_set_value(priv->gpiod_reset, 1);
 				msleep(1);
+				gpiod_set_value(priv->gpiod_reset, 0);
+				msleep(1);
 
 				at803x_context_restore(phydev, &context);
 
diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c
index f70522c35163..135296508a7e 100644
--- a/drivers/net/phy/mdio-sun4i.c
+++ b/drivers/net/phy/mdio-sun4i.c
@@ -122,6 +122,7 @@ static int sun4i_mdio_probe(struct platform_device *pdev)
 			return -EPROBE_DEFER;
 
 		dev_info(&pdev->dev, "no regulator found\n");
+		data->regulator = NULL;
 	} else {
 		ret = regulator_enable(data->regulator);
 		if (ret)
@@ -137,7 +138,8 @@ static int sun4i_mdio_probe(struct platform_device *pdev)
 	return 0;
 
 err_out_disable_regulator:
-	regulator_disable(data->regulator);
+	if (data->regulator)
+		regulator_disable(data->regulator);
 err_out_free_mdiobus:
 	mdiobus_free(bus);
 	return ret;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 4fd861063ed4..f572b31a2b20 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2307,7 +2307,7 @@ int ppp_register_net_channel(struct net *net, struct ppp_channel *chan)
 
 	pch->ppp = NULL;
 	pch->chan = chan;
-	pch->chan_net = net;
+	pch->chan_net = get_net(net);
 	chan->ppp = pch;
 	init_ppp_file(&pch->file, CHANNEL);
 	pch->file.hdrlen = chan->hdrlen;
@@ -2404,6 +2404,8 @@ ppp_unregister_channel(struct ppp_channel *chan)
 	spin_lock_bh(&pn->all_channels_lock);
 	list_del(&pch->list);
 	spin_unlock_bh(&pn->all_channels_lock);
+	put_net(pch->chan_net);
+	pch->chan_net = NULL;
 
 	pch->file.dead = 1;
 	wake_up_interruptible(&pch->file.rwait);
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 01f08a7751f7..9cfe6aeac84e 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -24,6 +24,7 @@
 #include <linux/skbuff.h>
 #include <linux/crc32.h>
 #include <linux/ethtool.h>
+#include <linux/reboot.h>
 
 #define DRV_NAME        "rionet"
 #define DRV_VERSION     "0.3"
@@ -48,6 +49,8 @@ MODULE_LICENSE("GPL");
 #define RIONET_TX_RING_SIZE	CONFIG_RIONET_TX_SIZE
 #define RIONET_RX_RING_SIZE	CONFIG_RIONET_RX_SIZE
 #define RIONET_MAX_NETS		8
+#define RIONET_MSG_SIZE         RIO_MAX_MSG_SIZE
+#define RIONET_MAX_MTU          (RIONET_MSG_SIZE - ETH_HLEN)
 
 struct rionet_private {
 	struct rio_mport *mport;
@@ -60,6 +63,7 @@ struct rionet_private {
 	spinlock_t lock;
 	spinlock_t tx_lock;
 	u32 msg_enable;
+	bool open;
 };
 
 struct rionet_peer {
@@ -71,6 +75,7 @@ struct rionet_peer {
 struct rionet_net {
 	struct net_device *ndev;
 	struct list_head peers;
+	spinlock_t lock;	/* net info access lock */
 	struct rio_dev **active;
 	int nact;	/* number of active peers */
 };
@@ -232,26 +237,32 @@ static void rionet_dbell_event(struct rio_mport *mport, void *dev_id, u16 sid, u
 	struct net_device *ndev = dev_id;
 	struct rionet_private *rnet = netdev_priv(ndev);
 	struct rionet_peer *peer;
+	unsigned char netid = rnet->mport->id;
 
 	if (netif_msg_intr(rnet))
 		printk(KERN_INFO "%s: doorbell sid %4.4x tid %4.4x info %4.4x",
 		       DRV_NAME, sid, tid, info);
 	if (info == RIONET_DOORBELL_JOIN) {
-		if (!nets[rnet->mport->id].active[sid]) {
-			list_for_each_entry(peer,
-					   &nets[rnet->mport->id].peers, node) {
+		if (!nets[netid].active[sid]) {
+			spin_lock(&nets[netid].lock);
+			list_for_each_entry(peer, &nets[netid].peers, node) {
 				if (peer->rdev->destid == sid) {
-					nets[rnet->mport->id].active[sid] =
-								peer->rdev;
-					nets[rnet->mport->id].nact++;
+					nets[netid].active[sid] = peer->rdev;
+					nets[netid].nact++;
 				}
 			}
+			spin_unlock(&nets[netid].lock);
+
 			rio_mport_send_doorbell(mport, sid,
 						RIONET_DOORBELL_JOIN);
 		}
 	} else if (info == RIONET_DOORBELL_LEAVE) {
-		nets[rnet->mport->id].active[sid] = NULL;
-		nets[rnet->mport->id].nact--;
+		spin_lock(&nets[netid].lock);
+		if (nets[netid].active[sid]) {
+			nets[netid].active[sid] = NULL;
+			nets[netid].nact--;
+		}
+		spin_unlock(&nets[netid].lock);
 	} else {
 		if (netif_msg_intr(rnet))
 			printk(KERN_WARNING "%s: unhandled doorbell\n",
@@ -280,7 +291,7 @@ static void rionet_outb_msg_event(struct rio_mport *mport, void *dev_id, int mbo
 	struct net_device *ndev = dev_id;
 	struct rionet_private *rnet = netdev_priv(ndev);
 
-	spin_lock(&rnet->lock);
+	spin_lock(&rnet->tx_lock);
 
 	if (netif_msg_intr(rnet))
 		printk(KERN_INFO
@@ -299,14 +310,16 @@ static void rionet_outb_msg_event(struct rio_mport *mport, void *dev_id, int mbo
 	if (rnet->tx_cnt < RIONET_TX_RING_SIZE)
 		netif_wake_queue(ndev);
 
-	spin_unlock(&rnet->lock);
+	spin_unlock(&rnet->tx_lock);
 }
 
 static int rionet_open(struct net_device *ndev)
 {
 	int i, rc = 0;
-	struct rionet_peer *peer, *tmp;
+	struct rionet_peer *peer;
 	struct rionet_private *rnet = netdev_priv(ndev);
+	unsigned char netid = rnet->mport->id;
+	unsigned long flags;
 
 	if (netif_msg_ifup(rnet))
 		printk(KERN_INFO "%s: open\n", DRV_NAME);
@@ -345,20 +358,13 @@ static int rionet_open(struct net_device *ndev)
 	netif_carrier_on(ndev);
 	netif_start_queue(ndev);
 
-	list_for_each_entry_safe(peer, tmp,
-				 &nets[rnet->mport->id].peers, node) {
-		if (!(peer->res = rio_request_outb_dbell(peer->rdev,
-							 RIONET_DOORBELL_JOIN,
-							 RIONET_DOORBELL_LEAVE)))
-		{
-			printk(KERN_ERR "%s: error requesting doorbells\n",
-			       DRV_NAME);
-			continue;
-		}
-
+	spin_lock_irqsave(&nets[netid].lock, flags);
+	list_for_each_entry(peer, &nets[netid].peers, node) {
 		/* Send a join message */
 		rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
 	}
+	spin_unlock_irqrestore(&nets[netid].lock, flags);
+	rnet->open = true;
 
       out:
 	return rc;
@@ -367,7 +373,9 @@ static int rionet_open(struct net_device *ndev)
 static int rionet_close(struct net_device *ndev)
 {
 	struct rionet_private *rnet = netdev_priv(ndev);
-	struct rionet_peer *peer, *tmp;
+	struct rionet_peer *peer;
+	unsigned char netid = rnet->mport->id;
+	unsigned long flags;
 	int i;
 
 	if (netif_msg_ifup(rnet))
@@ -375,18 +383,21 @@ static int rionet_close(struct net_device *ndev)
 
 	netif_stop_queue(ndev);
 	netif_carrier_off(ndev);
+	rnet->open = false;
 
 	for (i = 0; i < RIONET_RX_RING_SIZE; i++)
 		kfree_skb(rnet->rx_skb[i]);
 
-	list_for_each_entry_safe(peer, tmp,
-				 &nets[rnet->mport->id].peers, node) {
-		if (nets[rnet->mport->id].active[peer->rdev->destid]) {
+	spin_lock_irqsave(&nets[netid].lock, flags);
+	list_for_each_entry(peer, &nets[netid].peers, node) {
+		if (nets[netid].active[peer->rdev->destid]) {
 			rio_send_doorbell(peer->rdev, RIONET_DOORBELL_LEAVE);
-			nets[rnet->mport->id].active[peer->rdev->destid] = NULL;
+			nets[netid].active[peer->rdev->destid] = NULL;
 		}
-		rio_release_outb_dbell(peer->rdev, peer->res);
+		if (peer->res)
+			rio_release_outb_dbell(peer->rdev, peer->res);
 	}
+	spin_unlock_irqrestore(&nets[netid].lock, flags);
 
 	rio_release_inb_dbell(rnet->mport, RIONET_DOORBELL_JOIN,
 			      RIONET_DOORBELL_LEAVE);
@@ -400,22 +411,38 @@ static void rionet_remove_dev(struct device *dev, struct subsys_interface *sif)
 {
 	struct rio_dev *rdev = to_rio_dev(dev);
 	unsigned char netid = rdev->net->hport->id;
-	struct rionet_peer *peer, *tmp;
+	struct rionet_peer *peer;
+	int state, found = 0;
+	unsigned long flags;
 
-	if (dev_rionet_capable(rdev)) {
-		list_for_each_entry_safe(peer, tmp, &nets[netid].peers, node) {
-			if (peer->rdev == rdev) {
-				if (nets[netid].active[rdev->destid]) {
-					nets[netid].active[rdev->destid] = NULL;
-					nets[netid].nact--;
+	if (!dev_rionet_capable(rdev))
+		return;
+
+	spin_lock_irqsave(&nets[netid].lock, flags);
+	list_for_each_entry(peer, &nets[netid].peers, node) {
+		if (peer->rdev == rdev) {
+			list_del(&peer->node);
+			if (nets[netid].active[rdev->destid]) {
+				state = atomic_read(&rdev->state);
+				if (state != RIO_DEVICE_GONE &&
+				    state != RIO_DEVICE_INITIALIZING) {
+					rio_send_doorbell(rdev,
+							RIONET_DOORBELL_LEAVE);
 				}
-
-				list_del(&peer->node);
-				kfree(peer);
-				break;
+				nets[netid].active[rdev->destid] = NULL;
+				nets[netid].nact--;
 			}
+			found = 1;
+			break;
 		}
 	}
+	spin_unlock_irqrestore(&nets[netid].lock, flags);
+
+	if (found) {
+		if (peer->res)
+			rio_release_outb_dbell(rdev, peer->res);
+		kfree(peer);
+	}
 }
 
 static void rionet_get_drvinfo(struct net_device *ndev,
@@ -443,6 +470,17 @@ static void rionet_set_msglevel(struct net_device *ndev, u32 value)
 	rnet->msg_enable = value;
 }
 
+static int rionet_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	if ((new_mtu < 68) || (new_mtu > RIONET_MAX_MTU)) {
+		printk(KERN_ERR "%s: Invalid MTU size %d\n",
+		       ndev->name, new_mtu);
+		return -EINVAL;
+	}
+	ndev->mtu = new_mtu;
+	return 0;
+}
+
 static const struct ethtool_ops rionet_ethtool_ops = {
 	.get_drvinfo = rionet_get_drvinfo,
 	.get_msglevel = rionet_get_msglevel,
@@ -454,7 +492,7 @@ static const struct net_device_ops rionet_netdev_ops = {
 	.ndo_open		= rionet_open,
 	.ndo_stop		= rionet_close,
 	.ndo_start_xmit		= rionet_start_xmit,
-	.ndo_change_mtu		= eth_change_mtu,
+	.ndo_change_mtu		= rionet_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
@@ -478,6 +516,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
 	/* Set up private area */
 	rnet = netdev_priv(ndev);
 	rnet->mport = mport;
+	rnet->open = false;
 
 	/* Set the default MAC address */
 	device_id = rio_local_get_device_id(mport);
@@ -489,7 +528,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
 	ndev->dev_addr[5] = device_id & 0xff;
 
 	ndev->netdev_ops = &rionet_netdev_ops;
-	ndev->mtu = RIO_MAX_MSG_SIZE - 14;
+	ndev->mtu = RIONET_MAX_MTU;
 	ndev->features = NETIF_F_LLTX;
 	SET_NETDEV_DEV(ndev, &mport->dev);
 	ndev->ethtool_ops = &rionet_ethtool_ops;
@@ -500,8 +539,11 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
 	rnet->msg_enable = RIONET_DEFAULT_MSGLEVEL;
 
 	rc = register_netdev(ndev);
-	if (rc != 0)
+	if (rc != 0) {
+		free_pages((unsigned long)nets[mport->id].active,
+			   get_order(rionet_active_bytes));
 		goto out;
+	}
 
 	printk(KERN_INFO "%s: %s %s Version %s, MAC %pM, %s\n",
 	       ndev->name,
@@ -515,8 +557,6 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
 	return rc;
 }
 
-static unsigned long net_table[RIONET_MAX_NETS/sizeof(unsigned long) + 1];
-
 static int rionet_add_dev(struct device *dev, struct subsys_interface *sif)
 {
 	int rc = -ENODEV;
@@ -525,19 +565,16 @@ static int rionet_add_dev(struct device *dev, struct subsys_interface *sif)
 	struct net_device *ndev = NULL;
 	struct rio_dev *rdev = to_rio_dev(dev);
 	unsigned char netid = rdev->net->hport->id;
-	int oldnet;
 
 	if (netid >= RIONET_MAX_NETS)
 		return rc;
 
-	oldnet = test_and_set_bit(netid, net_table);
-
 	/*
 	 * If first time through this net, make sure local device is rionet
 	 * capable and setup netdev (this step will be skipped in later probes
 	 * on the same net).
 	 */
-	if (!oldnet) {
+	if (!nets[netid].ndev) {
 		rio_local_read_config_32(rdev->net->hport, RIO_SRC_OPS_CAR,
 					 &lsrc_ops);
 		rio_local_read_config_32(rdev->net->hport, RIO_DST_OPS_CAR,
@@ -555,30 +592,56 @@ static int rionet_add_dev(struct device *dev, struct subsys_interface *sif)
 			rc = -ENOMEM;
 			goto out;
 		}
-		nets[netid].ndev = ndev;
+
 		rc = rionet_setup_netdev(rdev->net->hport, ndev);
 		if (rc) {
 			printk(KERN_ERR "%s: failed to setup netdev (rc=%d)\n",
 			       DRV_NAME, rc);
+			free_netdev(ndev);
 			goto out;
 		}
 
 		INIT_LIST_HEAD(&nets[netid].peers);
+		spin_lock_init(&nets[netid].lock);
 		nets[netid].nact = 0;
-	} else if (nets[netid].ndev == NULL)
-		goto out;
+		nets[netid].ndev = ndev;
+	}
 
 	/*
 	 * If the remote device has mailbox/doorbell capabilities,
 	 * add it to the peer list.
 	 */
 	if (dev_rionet_capable(rdev)) {
-		if (!(peer = kmalloc(sizeof(struct rionet_peer), GFP_KERNEL))) {
+		struct rionet_private *rnet;
+		unsigned long flags;
+
+		rnet = netdev_priv(nets[netid].ndev);
+
+		peer = kzalloc(sizeof(*peer), GFP_KERNEL);
+		if (!peer) {
 			rc = -ENOMEM;
 			goto out;
 		}
 		peer->rdev = rdev;
+		peer->res = rio_request_outb_dbell(peer->rdev,
+						RIONET_DOORBELL_JOIN,
+						RIONET_DOORBELL_LEAVE);
+		if (!peer->res) {
+			pr_err("%s: error requesting doorbells\n", DRV_NAME);
+			kfree(peer);
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		spin_lock_irqsave(&nets[netid].lock, flags);
 		list_add_tail(&peer->node, &nets[netid].peers);
+		spin_unlock_irqrestore(&nets[netid].lock, flags);
+		pr_debug("%s: %s add peer %s\n",
+			 DRV_NAME, __func__, rio_name(rdev));
+
+		/* If netdev is already opened, send join request to new peer */
+		if (rnet->open)
+			rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN);
 	}
 
 	return 0;
@@ -586,6 +649,61 @@ out:
 	return rc;
 }
 
+static int rionet_shutdown(struct notifier_block *nb, unsigned long code,
+			   void *unused)
+{
+	struct rionet_peer *peer;
+	unsigned long flags;
+	int i;
+
+	pr_debug("%s: %s\n", DRV_NAME, __func__);
+
+	for (i = 0; i < RIONET_MAX_NETS; i++) {
+		if (!nets[i].ndev)
+			continue;
+
+		spin_lock_irqsave(&nets[i].lock, flags);
+		list_for_each_entry(peer, &nets[i].peers, node) {
+			if (nets[i].active[peer->rdev->destid]) {
+				rio_send_doorbell(peer->rdev,
+						  RIONET_DOORBELL_LEAVE);
+				nets[i].active[peer->rdev->destid] = NULL;
+			}
+		}
+		spin_unlock_irqrestore(&nets[i].lock, flags);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static void rionet_remove_mport(struct device *dev,
+				struct class_interface *class_intf)
+{
+	struct rio_mport *mport = to_rio_mport(dev);
+	struct net_device *ndev;
+	int id = mport->id;
+
+	pr_debug("%s %s\n", __func__, mport->name);
+
+	WARN(nets[id].nact, "%s called when connected to %d peers\n",
+	     __func__, nets[id].nact);
+	WARN(!nets[id].ndev, "%s called for mport without NDEV\n",
+	     __func__);
+
+	if (nets[id].ndev) {
+		ndev = nets[id].ndev;
+		netif_stop_queue(ndev);
+		unregister_netdev(ndev);
+
+		free_pages((unsigned long)nets[id].active,
+			   get_order(sizeof(void *) *
+			   RIO_MAX_ROUTE_ENTRIES(mport->sys_size)));
+		nets[id].active = NULL;
+		free_netdev(ndev);
+		nets[id].ndev = NULL;
+	}
+}
+
 #ifdef MODULE
 static struct rio_device_id rionet_id_table[] = {
 	{RIO_DEVICE(RIO_ANY_ID, RIO_ANY_ID)},
@@ -602,40 +720,43 @@ static struct subsys_interface rionet_interface = {
 	.remove_dev	= rionet_remove_dev,
 };
 
+static struct notifier_block rionet_notifier = {
+	.notifier_call = rionet_shutdown,
+};
+
+/* the rio_mport_interface is used to handle local mport devices */
+static struct class_interface rio_mport_interface __refdata = {
+	.class = &rio_mport_class,
+	.add_dev = NULL,
+	.remove_dev = rionet_remove_mport,
+};
+
 static int __init rionet_init(void)
 {
+	int ret;
+
+	ret = register_reboot_notifier(&rionet_notifier);
+	if (ret) {
+		pr_err("%s: failed to register reboot notifier (err=%d)\n",
+		       DRV_NAME, ret);
+		return ret;
+	}
+
+	ret = class_interface_register(&rio_mport_interface);
+	if (ret) {
+		pr_err("%s: class_interface_register error: %d\n",
+		       DRV_NAME, ret);
+		return ret;
+	}
+
 	return subsys_interface_register(&rionet_interface);
 }
 
 static void __exit rionet_exit(void)
 {
-	struct rionet_private *rnet;
-	struct net_device *ndev;
-	struct rionet_peer *peer, *tmp;
-	int i;
-
-	for (i = 0; i < RIONET_MAX_NETS; i++) {
-		if (nets[i].ndev != NULL) {
-			ndev = nets[i].ndev;
-			rnet = netdev_priv(ndev);
-			unregister_netdev(ndev);
-
-			list_for_each_entry_safe(peer,
-						 tmp, &nets[i].peers, node) {
-				list_del(&peer->node);
-				kfree(peer);
-			}
-
-			free_pages((unsigned long)nets[i].active,
-				 get_order(sizeof(void *) *
-				 RIO_MAX_ROUTE_ENTRIES(rnet->mport->sys_size)));
-			nets[i].active = NULL;
-
-			free_netdev(ndev);
-		}
-	}
-
+	unregister_reboot_notifier(&rionet_notifier);
 	subsys_interface_unregister(&rionet_interface);
+	class_interface_unregister(&rio_mport_interface);
 }
 
 late_initcall(rionet_init);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index d36d5ebf37f3..f20890ee03f3 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -3261,54 +3261,6 @@ void lan78xx_tx_timeout(struct net_device *net)
 	tasklet_schedule(&dev->bh);
 }
 
-struct rtnl_link_stats64 *lan78xx_get_stats64(struct net_device *netdev,
-					      struct rtnl_link_stats64 *storage)
-{
-	struct lan78xx_net *dev = netdev_priv(netdev);
-	struct lan78xx_statstage64 stats;
-
-	/* curr_stat is updated by timer.
-	 * periodic reading from HW will prevent from entering USB auto suspend.
-	 * if autosuspend is disabled, read from HW.
-	 */
-	if (!dev->udev->dev.power.runtime_auto)
-		lan78xx_update_stats(dev);
-
-	mutex_lock(&dev->stats.access_lock);
-	memcpy(&stats, &dev->stats.curr_stat, sizeof(stats));
-	mutex_unlock(&dev->stats.access_lock);
-
-	/* calc by driver */
-	storage->rx_packets = (__u64)netdev->stats.rx_packets;
-	storage->tx_packets = (__u64)netdev->stats.tx_packets;
-	storage->rx_bytes = (__u64)netdev->stats.rx_bytes;
-	storage->tx_bytes = (__u64)netdev->stats.tx_bytes;
-
-	/* use counter */
-	storage->rx_length_errors = stats.rx_undersize_frame_errors +
-				    stats.rx_oversize_frame_errors;
-	storage->rx_crc_errors = stats.rx_fcs_errors;
-	storage->rx_frame_errors = stats.rx_alignment_errors;
-	storage->rx_fifo_errors = stats.rx_dropped_frames;
-	storage->rx_over_errors = stats.rx_oversize_frame_errors;
-	storage->rx_errors = stats.rx_fcs_errors +
-			     stats.rx_alignment_errors +
-			     stats.rx_fragment_errors +
-			     stats.rx_jabber_errors +
-			     stats.rx_undersize_frame_errors +
-			     stats.rx_oversize_frame_errors +
-			     stats.rx_dropped_frames;
-
-	storage->tx_carrier_errors = stats.tx_carrier_errors;
-	storage->tx_errors = stats.tx_fcs_errors +
-			     stats.tx_excess_deferral_errors +
-			     stats.tx_carrier_errors;
-
-	storage->multicast = stats.rx_multicast_frames;
-
-	return storage;
-}
-
 static const struct net_device_ops lan78xx_netdev_ops = {
 	.ndo_open		= lan78xx_open,
 	.ndo_stop		= lan78xx_stop,
@@ -3322,7 +3274,6 @@ static const struct net_device_ops lan78xx_netdev_ops = {
 	.ndo_set_features	= lan78xx_set_features,
 	.ndo_vlan_rx_add_vid	= lan78xx_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= lan78xx_vlan_rx_kill_vid,
-	.ndo_get_stats64	= lan78xx_get_stats64,
 };
 
 static void lan78xx_stat_monitor(unsigned long param)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 800106a7246c..1c0fa364323e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1143,7 +1143,7 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
 static bool vxlan_remcsum(struct vxlanhdr *unparsed,
 			  struct sk_buff *skb, u32 vxflags)
 {
-	size_t start, offset, plen;
+	size_t start, offset;
 
 	if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
 		goto out;
@@ -1151,9 +1151,7 @@ static bool vxlan_remcsum(struct vxlanhdr *unparsed,
 	start = vxlan_rco_start(unparsed->vx_vni);
 	offset = start + vxlan_rco_offset(unparsed->vx_vni);
 
-	plen = sizeof(struct vxlanhdr) + offset + sizeof(u16);
-
-	if (!pskb_may_pull(skb, plen))
+	if (!pskb_may_pull(skb, offset + sizeof(u16)))
 		return false;
 
 	skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
@@ -1810,10 +1808,9 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = oif;
-	fl6.flowi6_tos = RT_TOS(tos);
 	fl6.daddr = *daddr;
 	fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
-	fl6.flowlabel = label;
+	fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
 	fl6.flowi6_mark = skb->mark;
 	fl6.flowi6_proto = IPPROTO_UDP;
 
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index 588803ad6847..6ccba0d862df 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -357,20 +357,6 @@ static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
 	return 0;
 }
 
-static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
-				phys_addr_t *db_addr,
-				resource_size_t *db_size)
-{
-	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
-
-	if (db_addr)
-		*db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET);
-	if (db_size)
-		*db_size = sizeof(u32);
-
-	return 0;
-}
-
 static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
@@ -415,20 +401,6 @@ static int amd_ntb_spad_write(struct ntb_dev *ntb,
 	return 0;
 }
 
-static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
-				  phys_addr_t *spad_addr)
-{
-	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
-
-	if (idx < 0 || idx >= ndev->spad_count)
-		return -EINVAL;
-
-	if (spad_addr)
-		*spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET +
-					   ndev->peer_spad + (idx << 2));
-	return 0;
-}
-
 static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
 {
 	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
@@ -472,12 +444,10 @@ static const struct ntb_dev_ops amd_ntb_ops = {
 	.db_clear		= amd_ntb_db_clear,
 	.db_set_mask		= amd_ntb_db_set_mask,
 	.db_clear_mask		= amd_ntb_db_clear_mask,
-	.peer_db_addr		= amd_ntb_peer_db_addr,
 	.peer_db_set		= amd_ntb_peer_db_set,
 	.spad_count		= amd_ntb_spad_count,
 	.spad_read		= amd_ntb_spad_read,
 	.spad_write		= amd_ntb_spad_write,
-	.peer_spad_addr		= amd_ntb_peer_spad_addr,
 	.peer_spad_read		= amd_ntb_peer_spad_read,
 	.peer_spad_write	= amd_ntb_peer_spad_write,
 };
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index ec4775f0ec16..2ef9d9130864 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -124,6 +124,7 @@ struct ntb_transport_qp {
 
 	bool client_ready;
 	bool link_is_up;
+	bool active;
 
 	u8 qp_num;	/* Only 64 QP's are allowed.  0-63 */
 	u64 qp_bit;
@@ -719,6 +720,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
 static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
 {
 	qp->link_is_up = false;
+	qp->active = false;
 
 	qp->tx_index = 0;
 	qp->rx_index = 0;
@@ -827,7 +829,7 @@ static void ntb_transport_link_work(struct work_struct *work)
 	struct pci_dev *pdev = ndev->pdev;
 	resource_size_t size;
 	u32 val;
-	int rc, i, spad;
+	int rc = 0, i, spad;
 
 	/* send the local info, in the opposite order of the way we read it */
 	for (i = 0; i < nt->mw_count; i++) {
@@ -897,6 +899,13 @@ static void ntb_transport_link_work(struct work_struct *work)
 out1:
 	for (i = 0; i < nt->mw_count; i++)
 		ntb_free_mw(nt, i);
+
+	/* if there's an actual failure, we should just bail */
+	if (rc < 0) {
+		ntb_link_disable(ndev);
+		return;
+	}
+
 out:
 	if (ntb_link_is_up(ndev, NULL, NULL) == 1)
 		schedule_delayed_work(&nt->link_work,
@@ -926,11 +935,13 @@ static void ntb_qp_link_work(struct work_struct *work)
 	if (val & BIT(qp->qp_num)) {
 		dev_info(&pdev->dev, "qp %d: Link Up\n", qp->qp_num);
 		qp->link_is_up = true;
+		qp->active = true;
 
 		if (qp->event_handler)
 			qp->event_handler(qp->cb_data, qp->link_is_up);
 
-		tasklet_schedule(&qp->rxc_db_work);
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
 	} else if (nt->link_is_up)
 		schedule_delayed_work(&qp->link_work,
 				      msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
@@ -1411,7 +1422,8 @@ static void ntb_transport_rxc_db(unsigned long data)
 
 	if (i == qp->rx_max_entry) {
 		/* there is more work to do */
-		tasklet_schedule(&qp->rxc_db_work);
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
 	} else if (ntb_db_read(qp->ndev) & BIT_ULL(qp->qp_num)) {
 		/* the doorbell bit is set: clear it */
 		ntb_db_clear(qp->ndev, BIT_ULL(qp->qp_num));
@@ -1422,7 +1434,8 @@ static void ntb_transport_rxc_db(unsigned long data)
 		 * ntb_process_rxc and clearing the doorbell bit:
 		 * there might be some more work to do.
 		 */
-		tasklet_schedule(&qp->rxc_db_work);
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
 	}
 }
 
@@ -1760,6 +1773,8 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
 
 	pdev = qp->ndev->pdev;
 
+	qp->active = false;
+
 	if (qp->tx_dma_chan) {
 		struct dma_chan *chan = qp->tx_dma_chan;
 		/* Putting the dma_chan to NULL will force any new traffic to be
@@ -1793,7 +1808,7 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
 	qp_bit = BIT_ULL(qp->qp_num);
 
 	ntb_db_set_mask(qp->ndev, qp_bit);
-	tasklet_disable(&qp->rxc_db_work);
+	tasklet_kill(&qp->rxc_db_work);
 
 	cancel_delayed_work_sync(&qp->link_work);
 
@@ -1886,7 +1901,8 @@ int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
 
 	ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q);
 
-	tasklet_schedule(&qp->rxc_db_work);
+	if (qp->active)
+		tasklet_schedule(&qp->rxc_db_work);
 
 	return 0;
 }
@@ -2069,7 +2085,8 @@ static void ntb_transport_doorbell_callback(void *data, int vector)
 		qp_num = __ffs(db_bits);
 		qp = &nt->qp_vec[qp_num];
 
-		tasklet_schedule(&qp->rxc_db_work);
+		if (qp->active)
+			tasklet_schedule(&qp->rxc_db_work);
 
 		db_bits &= ~BIT_ULL(qp_num);
 	}
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index c8a37ba4b4f9..8dfce9c9aad0 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -178,7 +178,7 @@ static void perf_copy_callback(void *data)
 	atomic_dec(&pctx->dma_sync);
 }
 
-static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
+static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst,
 			 char *src, size_t size)
 {
 	struct perf_ctx *perf = pctx->perf;
@@ -189,7 +189,8 @@ static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
 	dma_cookie_t cookie;
 	size_t src_off, dst_off;
 	struct perf_mw *mw = &perf->mw;
-	u64 vbase, dst_vaddr;
+	void __iomem *vbase;
+	void __iomem *dst_vaddr;
 	dma_addr_t dst_phys;
 	int retries = 0;
 
@@ -204,14 +205,14 @@ static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
 	}
 
 	device = chan->device;
-	src_off = (size_t)src & ~PAGE_MASK;
-	dst_off = (size_t)dst & ~PAGE_MASK;
+	src_off = (uintptr_t)src & ~PAGE_MASK;
+	dst_off = (uintptr_t __force)dst & ~PAGE_MASK;
 
 	if (!is_dma_copy_aligned(device, src_off, dst_off, size))
 		return -ENODEV;
 
-	vbase = (u64)(u64 *)mw->vbase;
-	dst_vaddr = (u64)(u64 *)dst;
+	vbase = mw->vbase;
+	dst_vaddr = dst;
 	dst_phys = mw->phys_addr + (dst_vaddr - vbase);
 
 	unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
@@ -261,13 +262,13 @@ err_get_unmap:
 	return 0;
 }
 
-static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src,
+static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
 			  u64 buf_size, u64 win_size, u64 total)
 {
 	int chunks, total_chunks, i;
 	int copied_chunks = 0;
 	u64 copied = 0, result;
-	char *tmp = dst;
+	char __iomem *tmp = dst;
 	u64 perf, diff_us;
 	ktime_t kstart, kstop, kdiff;
 
@@ -324,7 +325,7 @@ static int ntb_perf_thread(void *data)
 	struct perf_ctx *perf = pctx->perf;
 	struct pci_dev *pdev = perf->ntb->pdev;
 	struct perf_mw *mw = &perf->mw;
-	char *dst;
+	char __iomem *dst;
 	u64 win_size, buf_size, total;
 	void *src;
 	int rc, node, i;
@@ -364,7 +365,7 @@ static int ntb_perf_thread(void *data)
 	if (buf_size > MAX_TEST_SIZE)
 		buf_size = MAX_TEST_SIZE;
 
-	dst = (char *)mw->vbase;
+	dst = (char __iomem *)mw->vbase;
 
 	atomic_inc(&perf->tsync);
 	while (atomic_read(&perf->tsync) != perf->perf_threads)
@@ -424,6 +425,7 @@ static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
 {
 	struct perf_mw *mw = &perf->mw;
 	size_t xlat_size, buf_size;
+	int rc;
 
 	if (!size)
 		return -EINVAL;
@@ -447,6 +449,13 @@ static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
 		mw->buf_size = 0;
 	}
 
+	rc = ntb_mw_set_trans(perf->ntb, 0, mw->dma_addr, mw->xlat_size);
+	if (rc) {
+		dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n");
+		perf_free_mw(perf);
+		return -EIO;
+	}
+
 	return 0;
 }
 
@@ -541,6 +550,8 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
 		return 0;
 
 	buf = kmalloc(64, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
 	out_offset = snprintf(buf, 64, "%d\n", perf->run);
 	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
 	kfree(buf);
@@ -548,6 +559,21 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
 	return ret;
 }
 
+static void threads_cleanup(struct perf_ctx *perf)
+{
+	struct pthr_ctx *pctx;
+	int i;
+
+	perf->run = false;
+	for (i = 0; i < MAX_THREADS; i++) {
+		pctx = &perf->pthr_ctx[i];
+		if (pctx->thread) {
+			kthread_stop(pctx->thread);
+			pctx->thread = NULL;
+		}
+	}
+}
+
 static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
 				 size_t count, loff_t *offp)
 {
@@ -563,17 +589,9 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
 	if (atomic_read(&perf->tsync) == 0)
 		perf->run = false;
 
-	if (perf->run) {
-		/* lets stop the threads */
-		perf->run = false;
-		for (i = 0; i < MAX_THREADS; i++) {
-			if (perf->pthr_ctx[i].thread) {
-				kthread_stop(perf->pthr_ctx[i].thread);
-				perf->pthr_ctx[i].thread = NULL;
-			} else
-				break;
-		}
-	} else {
+	if (perf->run)
+		threads_cleanup(perf);
+	else {
 		perf->run = true;
 
 		if (perf->perf_threads > MAX_THREADS) {
@@ -604,17 +622,11 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
 				kthread_create_on_node(ntb_perf_thread,
 						       (void *)pctx,
 						       node, "ntb_perf %d", i);
-			if (pctx->thread)
+			if (IS_ERR(pctx->thread)) {
+				pctx->thread = NULL;
+				goto err;
+			} else
 				wake_up_process(pctx->thread);
-			else {
-				perf->run = false;
-				for (i = 0; i < MAX_THREADS; i++) {
-					if (pctx->thread) {
-						kthread_stop(pctx->thread);
-						pctx->thread = NULL;
-					}
-				}
-			}
 
 			if (perf->run == false)
 				return -ENXIO;
@@ -623,6 +635,10 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
 	}
 
 	return count;
+
+err:
+	threads_cleanup(perf);
+	return -ENXIO;
 }
 
 static const struct file_operations ntb_perf_debugfs_run = {
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 42a01a931989..9461dd639acd 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -146,6 +146,14 @@ struct nvme_nvm_command {
 	};
 };
 
+struct nvme_nvm_completion {
+	__le64	result;		/* Used by LightNVM to return ppa completions */
+	__le16	sq_head;	/* how much of this queue may be reclaimed */
+	__le16	sq_id;		/* submission queue that generated this entry */
+	__u16	command_id;	/* of the command which completed */
+	__le16	status;		/* did the command fail, and if so, why? */
+};
+
 #define NVME_NVM_LP_MLC_PAIRS 886
 struct nvme_nvm_lp_mlc {
 	__u16			num_pairs;
@@ -507,6 +515,10 @@ static inline void nvme_nvm_rqtocmd(struct request *rq, struct nvm_rq *rqd,
 static void nvme_nvm_end_io(struct request *rq, int error)
 {
 	struct nvm_rq *rqd = rq->end_io_data;
+	struct nvme_nvm_completion *cqe = rq->special;
+
+	if (cqe)
+		rqd->ppa_status = le64_to_cpu(cqe->result);
 
 	nvm_end_io(rqd, error);
 
@@ -526,7 +538,8 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	if (IS_ERR(rq))
 		return -ENOMEM;
 
-	cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
+	cmd = kzalloc(sizeof(struct nvme_nvm_command) +
+				sizeof(struct nvme_nvm_completion), GFP_KERNEL);
 	if (!cmd) {
 		blk_mq_free_request(rq);
 		return -ENOMEM;
@@ -545,7 +558,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 
 	rq->cmd = (unsigned char *)cmd;
 	rq->cmd_len = sizeof(struct nvme_nvm_command);
-	rq->special = (void *)0;
+	rq->special = cmd + 1;
 
 	rq->end_io_data = rqd;
 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f8db70ae172d..24ccda303efb 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -723,6 +723,13 @@ static void nvme_complete_rq(struct request *req)
 	blk_mq_end_request(req, error);
 }
 
+/* We read the CQE phase first to check if the rest of the entry is valid */
+static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
+		u16 phase)
+{
+	return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
+}
+
 static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 {
 	u16 head, phase;
@@ -730,13 +737,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 	head = nvmeq->cq_head;
 	phase = nvmeq->cq_phase;
 
-	for (;;) {
+	while (nvme_cqe_valid(nvmeq, head, phase)) {
 		struct nvme_completion cqe = nvmeq->cqes[head];
-		u16 status = le16_to_cpu(cqe.status);
 		struct request *req;
 
-		if ((status & 1) != phase)
-			break;
 		if (++head == nvmeq->q_depth) {
 			head = 0;
 			phase = !phase;
@@ -767,7 +771,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
 		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
 			memcpy(req->special, &cqe, sizeof(cqe));
-		blk_mq_complete_request(req, status >> 1);
+		blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
 
 	}
 
@@ -808,18 +812,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
 static irqreturn_t nvme_irq_check(int irq, void *data)
 {
 	struct nvme_queue *nvmeq = data;
-	struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
-	if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
-		return IRQ_NONE;
-	return IRQ_WAKE_THREAD;
+	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+		return IRQ_WAKE_THREAD;
+	return IRQ_NONE;
 }
 
 static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 {
 	struct nvme_queue *nvmeq = hctx->driver_data;
 
-	if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
-	    nvmeq->cq_phase) {
+	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
 		spin_lock_irq(&nvmeq->q_lock);
 		__nvme_process_cq(nvmeq, &tag);
 		spin_unlock_irq(&nvmeq->q_lock);
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index a1f37db745ab..209292e067d2 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -128,4 +128,5 @@ config PCI_HYPERV
           The PCI device frontend driver allows the kernel to import arbitrary
           PCI devices from a PCI backend to support PCI driver domains.
 
+source "drivers/pci/hotplug/Kconfig"
 source "drivers/pci/host/Kconfig"
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index c5014bf95a20..7a0780d56d2d 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -5,7 +5,6 @@ config PCI_DRA7XX
 	bool "TI DRA7xx PCIe controller"
 	select PCIE_DW
 	depends on OF && HAS_IOMEM && TI_PIPE3
-	depends on BROKEN
 	help
 	 Enables support for the PCIe controller in the DRA7xx SoC.  There
 	 are two instances of PCIe controller in DRA7xx.  This controller can
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 166637f2917c..32346b5a8a11 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -13,6 +13,7 @@
 
 #include <linux/bitmap.h>
 #include <linux/cpumask.h>
+#include <linux/cpu_pm.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/of_device.h>
@@ -710,6 +711,93 @@ static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
 	return NOTIFY_OK;
 }
 
+#ifdef CONFIG_CPU_PM
+static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
+{
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct perf_event *event;
+	int idx;
+
+	for (idx = 0; idx < armpmu->num_events; idx++) {
+		/*
+		 * If the counter is not used skip it, there is no
+		 * need of stopping/restarting it.
+		 */
+		if (!test_bit(idx, hw_events->used_mask))
+			continue;
+
+		event = hw_events->events[idx];
+
+		switch (cmd) {
+		case CPU_PM_ENTER:
+			/*
+			 * Stop and update the counter
+			 */
+			armpmu_stop(event, PERF_EF_UPDATE);
+			break;
+		case CPU_PM_EXIT:
+		case CPU_PM_ENTER_FAILED:
+			 /* Restore and enable the counter */
+			armpmu_start(event, PERF_EF_RELOAD);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
+			     void *v)
+{
+	struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return NOTIFY_DONE;
+
+	/*
+	 * Always reset the PMU registers on power-up even if
+	 * there are no events running.
+	 */
+	if (cmd == CPU_PM_EXIT && armpmu->reset)
+		armpmu->reset(armpmu);
+
+	if (!enabled)
+		return NOTIFY_OK;
+
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		armpmu->stop(armpmu);
+		cpu_pm_pmu_setup(armpmu, cmd);
+		break;
+	case CPU_PM_EXIT:
+		cpu_pm_pmu_setup(armpmu, cmd);
+	case CPU_PM_ENTER_FAILED:
+		armpmu->start(armpmu);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify;
+	return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb);
+}
+
+static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
+{
+	cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
+}
+#else
+static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
+static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
+#endif
+
 static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int err;
@@ -725,6 +813,10 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 	if (err)
 		goto out_hw_events;
 
+	err = cpu_pm_pmu_register(cpu_pmu);
+	if (err)
+		goto out_unregister;
+
 	for_each_possible_cpu(cpu) {
 		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
 		raw_spin_lock_init(&events->pmu_lock);
@@ -746,6 +838,8 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 
 	return 0;
 
+out_unregister:
+	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
 out_hw_events:
 	free_percpu(cpu_hw_events);
 	return err;
@@ -753,6 +847,7 @@ out_hw_events:
 
 static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 {
+	cpu_pm_pmu_unregister(cpu_pmu);
 	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
 	free_percpu(cpu_pmu->hw_events);
 }
@@ -889,6 +984,15 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
 		init_fn = of_id->data;
 
+		pmu->secure_access = of_property_read_bool(pdev->dev.of_node,
+							   "secure-reg-access");
+
+		/* arm64 systems boot only as non-secure */
+		if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) {
+			pr_warn("ignoring \"secure-reg-access\" property for arm64\n");
+			pmu->secure_access = false;
+		}
+
 		ret = of_pmu_irq_cfg(pmu);
 		if (!ret)
 			ret = init_fn(pmu);
@@ -898,7 +1002,7 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 	}
 
 	if (ret) {
-		pr_info("failed to probe PMU!\n");
+		pr_info("%s: failed to probe PMU!\n", of_node_full_name(node));
 		goto out_free;
 	}
 
@@ -918,7 +1022,8 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 out_destroy:
 	cpu_pmu_destroy(pmu);
 out_free:
-	pr_info("failed to register PMU devices!\n");
+	pr_info("%s: failed to register PMU devices!\n",
+		of_node_full_name(node));
 	kfree(pmu);
 	return ret;
 }
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 69f93a576e45..ed2004be13cf 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -91,10 +91,21 @@ config ASUS_LAPTOP
 
 	  If you have an ACPI-compatible ASUS laptop, say Y or M here.
 
+config DELL_SMBIOS
+	tristate "Dell SMBIOS Support"
+	depends on DCDBAS
+	default n
+	---help---
+	This module provides common functions for kernel modules using
+	Dell SMBIOS.
+
+	If you have a Dell laptop, say Y or M here.
+
 config DELL_LAPTOP
 	tristate "Dell Laptop Extras"
 	depends on X86
-	depends on DCDBAS
+	depends on DELL_SMBIOS
+	depends on DMI
 	depends on BACKLIGHT_CLASS_DEVICE
 	depends on ACPI_VIDEO || ACPI_VIDEO = n
 	depends on RFKILL || RFKILL = n
@@ -110,8 +121,10 @@ config DELL_LAPTOP
 config DELL_WMI
 	tristate "Dell WMI extras"
 	depends on ACPI_WMI
+	depends on DMI
 	depends on INPUT
 	depends on ACPI_VIDEO || ACPI_VIDEO = n
+	depends on DELL_SMBIOS
 	select INPUT_SPARSEKMAP
 	---help---
 	  Say Y here if you want to support WMI-based hotkeys on Dell laptops.
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 40574e7390f3..448443c3baba 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_EEEPC_WMI)		+= eeepc-wmi.o
 obj-$(CONFIG_MSI_LAPTOP)	+= msi-laptop.o
 obj-$(CONFIG_ACPI_CMPC)		+= classmate-laptop.o
 obj-$(CONFIG_COMPAL_LAPTOP)	+= compal-laptop.o
+obj-$(CONFIG_DELL_SMBIOS)	+= dell-smbios.o
 obj-$(CONFIG_DELL_LAPTOP)	+= dell-laptop.o
 obj-$(CONFIG_DELL_WMI)		+= dell-wmi.o
 obj-$(CONFIG_DELL_WMI_AIO)	+= dell-wmi-aio.o
diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c
index 1e1e59423889..005629447b0c 100644
--- a/drivers/platform/x86/alienware-wmi.c
+++ b/drivers/platform/x86/alienware-wmi.c
@@ -33,6 +33,9 @@
 #define WMAX_METHOD_BRIGHTNESS		0x3
 #define WMAX_METHOD_ZONE_CONTROL	0x4
 #define WMAX_METHOD_HDMI_CABLE		0x5
+#define WMAX_METHOD_AMPLIFIER_CABLE	0x6
+#define WMAX_METHOD_DEEP_SLEEP_CONTROL	0x0B
+#define WMAX_METHOD_DEEP_SLEEP_STATUS	0x0C
 
 MODULE_AUTHOR("Mario Limonciello <mario_limonciello@dell.com>");
 MODULE_DESCRIPTION("Alienware special feature control");
@@ -60,6 +63,8 @@ enum WMAX_CONTROL_STATES {
 struct quirk_entry {
 	u8 num_zones;
 	u8 hdmi_mux;
+	u8 amplifier;
+	u8 deepslp;
 };
 
 static struct quirk_entry *quirks;
@@ -67,16 +72,43 @@ static struct quirk_entry *quirks;
 static struct quirk_entry quirk_unknown = {
 	.num_zones = 2,
 	.hdmi_mux = 0,
+	.amplifier = 0,
+	.deepslp = 0,
 };
 
-static struct quirk_entry quirk_x51_family = {
+static struct quirk_entry quirk_x51_r1_r2 = {
 	.num_zones = 3,
-	.hdmi_mux = 0.
+	.hdmi_mux = 0,
+	.amplifier = 0,
+	.deepslp = 0,
+};
+
+static struct quirk_entry quirk_x51_r3 = {
+	.num_zones = 4,
+	.hdmi_mux = 0,
+	.amplifier = 1,
+	.deepslp = 0,
 };
 
 static struct quirk_entry quirk_asm100 = {
 	.num_zones = 2,
 	.hdmi_mux = 1,
+	.amplifier = 0,
+	.deepslp = 0,
+};
+
+static struct quirk_entry quirk_asm200 = {
+	.num_zones = 2,
+	.hdmi_mux = 1,
+	.amplifier = 0,
+	.deepslp = 1,
+};
+
+static struct quirk_entry quirk_asm201 = {
+	.num_zones = 2,
+	.hdmi_mux = 1,
+	.amplifier = 1,
+	.deepslp = 1,
 };
 
 static int __init dmi_matched(const struct dmi_system_id *dmi)
@@ -88,12 +120,12 @@ static int __init dmi_matched(const struct dmi_system_id *dmi)
 static const struct dmi_system_id alienware_quirks[] __initconst = {
 	{
 	 .callback = dmi_matched,
-	 .ident = "Alienware X51 R1",
+	 .ident = "Alienware X51 R3",
 	 .matches = {
 		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "Alienware X51"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "Alienware X51 R3"),
 		     },
-	 .driver_data = &quirk_x51_family,
+	 .driver_data = &quirk_x51_r3,
 	 },
 	{
 	 .callback = dmi_matched,
@@ -102,17 +134,44 @@ static const struct dmi_system_id alienware_quirks[] __initconst = {
 		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
 		     DMI_MATCH(DMI_PRODUCT_NAME, "Alienware X51 R2"),
 		     },
-	 .driver_data = &quirk_x51_family,
+	 .driver_data = &quirk_x51_r1_r2,
+	 },
+	{
+	 .callback = dmi_matched,
+	 .ident = "Alienware X51 R1",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "Alienware X51"),
+		     },
+	 .driver_data = &quirk_x51_r1_r2,
+	 },
+	{
+	 .callback = dmi_matched,
+	 .ident = "Alienware ASM100",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "ASM100"),
+		     },
+	 .driver_data = &quirk_asm100,
+	 },
+	{
+	 .callback = dmi_matched,
+	 .ident = "Alienware ASM200",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "ASM200"),
+		     },
+	 .driver_data = &quirk_asm200,
 	 },
 	{
-		.callback = dmi_matched,
-		.ident = "Alienware ASM100",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "ASM100"),
-		},
-		.driver_data = &quirk_asm100,
-	},
+	 .callback = dmi_matched,
+	 .ident = "Alienware ASM201",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR, "Alienware"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "ASM201"),
+		     },
+	 .driver_data = &quirk_asm201,
+	 },
 	{}
 };
 
@@ -133,7 +192,7 @@ struct wmax_brightness_args {
 	u32 percentage;
 };
 
-struct hdmi_args {
+struct wmax_basic_args {
 	u8 arg;
 };
 
@@ -170,7 +229,7 @@ static u8 global_brightness;
 
 /*
  * Helpers used for zone control
-*/
+ */
 static int parse_rgb(const char *buf, struct platform_zone *zone)
 {
 	long unsigned int rgb;
@@ -210,7 +269,7 @@ static struct platform_zone *match_zone(struct device_attribute *attr)
 
 /*
  * Individual RGB zone control
-*/
+ */
 static int alienware_update_led(struct platform_zone *zone)
 {
 	int method_id;
@@ -218,16 +277,16 @@ static int alienware_update_led(struct platform_zone *zone)
 	char *guid;
 	struct acpi_buffer input;
 	struct legacy_led_args legacy_args;
-	struct wmax_led_args wmax_args;
+	struct wmax_led_args wmax_basic_args;
 	if (interface == WMAX) {
-		wmax_args.led_mask = 1 << zone->location;
-		wmax_args.colors = zone->colors;
-		wmax_args.state = lighting_control_state;
+		wmax_basic_args.led_mask = 1 << zone->location;
+		wmax_basic_args.colors = zone->colors;
+		wmax_basic_args.state = lighting_control_state;
 		guid = WMAX_CONTROL_GUID;
 		method_id = WMAX_METHOD_ZONE_CONTROL;
 
-		input.length = (acpi_size) sizeof(wmax_args);
-		input.pointer = &wmax_args;
+		input.length = (acpi_size) sizeof(wmax_basic_args);
+		input.pointer = &wmax_basic_args;
 	} else {
 		legacy_args.colors = zone->colors;
 		legacy_args.brightness = global_brightness;
@@ -283,7 +342,7 @@ static ssize_t zone_set(struct device *dev, struct device_attribute *attr,
 
 /*
  * LED Brightness (Global)
-*/
+ */
 static int wmax_brightness(int brightness)
 {
 	acpi_status status;
@@ -327,7 +386,7 @@ static struct led_classdev global_led = {
 
 /*
  * Lighting control state device attribute (Global)
-*/
+ */
 static ssize_t show_control_state(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
@@ -435,11 +494,7 @@ static void alienware_zone_exit(struct platform_device *dev)
 	kfree(zone_attrs);
 }
 
-/*
-	The HDMI mux sysfs node indicates the status of the HDMI input mux.
-	It can toggle between standard system GPU output and HDMI input.
-*/
-static acpi_status alienware_hdmi_command(struct hdmi_args *in_args,
+static acpi_status alienware_wmax_command(struct wmax_basic_args *in_args,
 					  u32 command, int *out_data)
 {
 	acpi_status status;
@@ -467,16 +522,20 @@ static acpi_status alienware_hdmi_command(struct hdmi_args *in_args,
 
 }
 
+/*
+ *	The HDMI mux sysfs node indicates the status of the HDMI input mux.
+ *	It can toggle between standard system GPU output and HDMI input.
+ */
 static ssize_t show_hdmi_cable(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
 	acpi_status status;
 	u32 out_data;
-	struct hdmi_args in_args = {
+	struct wmax_basic_args in_args = {
 		.arg = 0,
 	};
 	status =
-	    alienware_hdmi_command(&in_args, WMAX_METHOD_HDMI_CABLE,
+	    alienware_wmax_command(&in_args, WMAX_METHOD_HDMI_CABLE,
 				   (u32 *) &out_data);
 	if (ACPI_SUCCESS(status)) {
 		if (out_data == 0)
@@ -495,11 +554,11 @@ static ssize_t show_hdmi_source(struct device *dev,
 {
 	acpi_status status;
 	u32 out_data;
-	struct hdmi_args in_args = {
+	struct wmax_basic_args in_args = {
 		.arg = 0,
 	};
 	status =
-	    alienware_hdmi_command(&in_args, WMAX_METHOD_HDMI_STATUS,
+	    alienware_wmax_command(&in_args, WMAX_METHOD_HDMI_STATUS,
 				   (u32 *) &out_data);
 
 	if (ACPI_SUCCESS(status)) {
@@ -519,7 +578,7 @@ static ssize_t toggle_hdmi_source(struct device *dev,
 				  const char *buf, size_t count)
 {
 	acpi_status status;
-	struct hdmi_args args;
+	struct wmax_basic_args args;
 	if (strcmp(buf, "gpu\n") == 0)
 		args.arg = 1;
 	else if (strcmp(buf, "input\n") == 0)
@@ -528,7 +587,7 @@ static ssize_t toggle_hdmi_source(struct device *dev,
 		args.arg = 3;
 	pr_debug("alienware-wmi: setting hdmi to %d : %s", args.arg, buf);
 
-	status = alienware_hdmi_command(&args, WMAX_METHOD_HDMI_SOURCE, NULL);
+	status = alienware_wmax_command(&args, WMAX_METHOD_HDMI_SOURCE, NULL);
 
 	if (ACPI_FAILURE(status))
 		pr_err("alienware-wmi: HDMI toggle failed: results: %u\n",
@@ -563,11 +622,144 @@ static int create_hdmi(struct platform_device *dev)
 
 	ret = sysfs_create_group(&dev->dev.kobj, &hdmi_attribute_group);
 	if (ret)
-		goto error_create_hdmi;
-	return 0;
+		remove_hdmi(dev);
+	return ret;
+}
 
-error_create_hdmi:
-	remove_hdmi(dev);
+/*
+ * Alienware GFX amplifier support
+ * - Currently supports reading cable status
+ * - Leaving expansion room to possibly support dock/undock events later
+ */
+static ssize_t show_amplifier_status(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	acpi_status status;
+	u32 out_data;
+	struct wmax_basic_args in_args = {
+		.arg = 0,
+	};
+	status =
+	    alienware_wmax_command(&in_args, WMAX_METHOD_AMPLIFIER_CABLE,
+				   (u32 *) &out_data);
+	if (ACPI_SUCCESS(status)) {
+		if (out_data == 0)
+			return scnprintf(buf, PAGE_SIZE,
+					 "[unconnected] connected unknown\n");
+		else if (out_data == 1)
+			return scnprintf(buf, PAGE_SIZE,
+					 "unconnected [connected] unknown\n");
+	}
+	pr_err("alienware-wmi: unknown amplifier cable status: %d\n", status);
+	return scnprintf(buf, PAGE_SIZE, "unconnected connected [unknown]\n");
+}
+
+static DEVICE_ATTR(status, S_IRUGO, show_amplifier_status, NULL);
+
+static struct attribute *amplifier_attrs[] = {
+	&dev_attr_status.attr,
+	NULL,
+};
+
+static struct attribute_group amplifier_attribute_group = {
+	.name = "amplifier",
+	.attrs = amplifier_attrs,
+};
+
+static void remove_amplifier(struct platform_device *dev)
+{
+	if (quirks->amplifier > 0)
+		sysfs_remove_group(&dev->dev.kobj, &amplifier_attribute_group);
+}
+
+static int create_amplifier(struct platform_device *dev)
+{
+	int ret;
+
+	ret = sysfs_create_group(&dev->dev.kobj, &amplifier_attribute_group);
+	if (ret)
+		remove_amplifier(dev);
+	return ret;
+}
+
+/*
+ * Deep Sleep Control support
+ * - Modifies BIOS setting for deep sleep control allowing extra wakeup events
+ */
+static ssize_t show_deepsleep_status(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	acpi_status status;
+	u32 out_data;
+	struct wmax_basic_args in_args = {
+		.arg = 0,
+	};
+	status = alienware_wmax_command(&in_args, WMAX_METHOD_DEEP_SLEEP_STATUS,
+					(u32 *) &out_data);
+	if (ACPI_SUCCESS(status)) {
+		if (out_data == 0)
+			return scnprintf(buf, PAGE_SIZE,
+					 "[disabled] s5 s5_s4\n");
+		else if (out_data == 1)
+			return scnprintf(buf, PAGE_SIZE,
+					 "disabled [s5] s5_s4\n");
+		else if (out_data == 2)
+			return scnprintf(buf, PAGE_SIZE,
+					 "disabled s5 [s5_s4]\n");
+	}
+	pr_err("alienware-wmi: unknown deep sleep status: %d\n", status);
+	return scnprintf(buf, PAGE_SIZE, "disabled s5 s5_s4 [unknown]\n");
+}
+
+static ssize_t toggle_deepsleep(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	acpi_status status;
+	struct wmax_basic_args args;
+
+	if (strcmp(buf, "disabled\n") == 0)
+		args.arg = 0;
+	else if (strcmp(buf, "s5\n") == 0)
+		args.arg = 1;
+	else
+		args.arg = 2;
+	pr_debug("alienware-wmi: setting deep sleep to %d : %s", args.arg, buf);
+
+	status = alienware_wmax_command(&args, WMAX_METHOD_DEEP_SLEEP_CONTROL,
+					NULL);
+
+	if (ACPI_FAILURE(status))
+		pr_err("alienware-wmi: deep sleep control failed: results: %u\n",
+			status);
+	return count;
+}
+
+static DEVICE_ATTR(deepsleep, S_IRUGO | S_IWUSR, show_deepsleep_status, toggle_deepsleep);
+
+static struct attribute *deepsleep_attrs[] = {
+	&dev_attr_deepsleep.attr,
+	NULL,
+};
+
+static struct attribute_group deepsleep_attribute_group = {
+	.name = "deepsleep",
+	.attrs = deepsleep_attrs,
+};
+
+static void remove_deepsleep(struct platform_device *dev)
+{
+	if (quirks->deepslp > 0)
+		sysfs_remove_group(&dev->dev.kobj, &deepsleep_attribute_group);
+}
+
+static int create_deepsleep(struct platform_device *dev)
+{
+	int ret;
+
+	ret = sysfs_create_group(&dev->dev.kobj, &deepsleep_attribute_group);
+	if (ret)
+		remove_deepsleep(dev);
 	return ret;
 }
 
@@ -606,6 +798,18 @@ static int __init alienware_wmi_init(void)
 			goto fail_prep_hdmi;
 	}
 
+	if (quirks->amplifier > 0) {
+		ret = create_amplifier(platform_device);
+		if (ret)
+			goto fail_prep_amplifier;
+	}
+
+	if (quirks->deepslp > 0) {
+		ret = create_deepsleep(platform_device);
+		if (ret)
+			goto fail_prep_deepsleep;
+	}
+
 	ret = alienware_zone_init(platform_device);
 	if (ret)
 		goto fail_prep_zones;
@@ -614,6 +818,8 @@ static int __init alienware_wmi_init(void)
 
 fail_prep_zones:
 	alienware_zone_exit(platform_device);
+fail_prep_deepsleep:
+fail_prep_amplifier:
 fail_prep_hdmi:
 	platform_device_del(platform_device);
 fail_platform_device2:
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index f236250ac106..4034d2d4c507 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -19,6 +19,7 @@
 #include <linux/acpi.h>
 #include <linux/pnp.h>
 #include <linux/apple_bl.h>
+#include <linux/apple-gmux.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
@@ -57,7 +58,9 @@ struct apple_gmux_data {
 	/* switcheroo data */
 	acpi_handle dhandle;
 	int gpe;
-	enum vga_switcheroo_client_id resume_client_id;
+	enum vga_switcheroo_client_id switch_state_display;
+	enum vga_switcheroo_client_id switch_state_ddc;
+	enum vga_switcheroo_client_id switch_state_external;
 	enum vga_switcheroo_state power_state;
 	struct completion powerchange_done;
 };
@@ -368,19 +371,70 @@ static const struct backlight_ops gmux_bl_ops = {
  * for the selected GPU.
  */
 
+static void gmux_read_switch_state(struct apple_gmux_data *gmux_data)
+{
+	if (gmux_read8(gmux_data, GMUX_PORT_SWITCH_DDC) == 1)
+		gmux_data->switch_state_ddc = VGA_SWITCHEROO_IGD;
+	else
+		gmux_data->switch_state_ddc = VGA_SWITCHEROO_DIS;
+
+	if (gmux_read8(gmux_data, GMUX_PORT_SWITCH_DISPLAY) == 2)
+		gmux_data->switch_state_display = VGA_SWITCHEROO_IGD;
+	else
+		gmux_data->switch_state_display = VGA_SWITCHEROO_DIS;
+
+	if (gmux_read8(gmux_data, GMUX_PORT_SWITCH_EXTERNAL) == 2)
+		gmux_data->switch_state_external = VGA_SWITCHEROO_IGD;
+	else
+		gmux_data->switch_state_external = VGA_SWITCHEROO_DIS;
+}
+
+static void gmux_write_switch_state(struct apple_gmux_data *gmux_data)
+{
+	if (gmux_data->switch_state_ddc == VGA_SWITCHEROO_IGD)
+		gmux_write8(gmux_data, GMUX_PORT_SWITCH_DDC, 1);
+	else
+		gmux_write8(gmux_data, GMUX_PORT_SWITCH_DDC, 2);
+
+	if (gmux_data->switch_state_display == VGA_SWITCHEROO_IGD)
+		gmux_write8(gmux_data, GMUX_PORT_SWITCH_DISPLAY, 2);
+	else
+		gmux_write8(gmux_data, GMUX_PORT_SWITCH_DISPLAY, 3);
+
+	if (gmux_data->switch_state_external == VGA_SWITCHEROO_IGD)
+		gmux_write8(gmux_data, GMUX_PORT_SWITCH_EXTERNAL, 2);
+	else
+		gmux_write8(gmux_data, GMUX_PORT_SWITCH_EXTERNAL, 3);
+}
+
 static int gmux_switchto(enum vga_switcheroo_client_id id)
 {
-	if (id == VGA_SWITCHEROO_IGD) {
+	apple_gmux_data->switch_state_ddc = id;
+	apple_gmux_data->switch_state_display = id;
+	apple_gmux_data->switch_state_external = id;
+
+	gmux_write_switch_state(apple_gmux_data);
+
+	return 0;
+}
+
+static int gmux_switch_ddc(enum vga_switcheroo_client_id id)
+{
+	enum vga_switcheroo_client_id old_ddc_owner =
+		apple_gmux_data->switch_state_ddc;
+
+	if (id == old_ddc_owner)
+		return id;
+
+	pr_debug("Switching DDC from %d to %d\n", old_ddc_owner, id);
+	apple_gmux_data->switch_state_ddc = id;
+
+	if (id == VGA_SWITCHEROO_IGD)
 		gmux_write8(apple_gmux_data, GMUX_PORT_SWITCH_DDC, 1);
-		gmux_write8(apple_gmux_data, GMUX_PORT_SWITCH_DISPLAY, 2);
-		gmux_write8(apple_gmux_data, GMUX_PORT_SWITCH_EXTERNAL, 2);
-	} else {
+	else
 		gmux_write8(apple_gmux_data, GMUX_PORT_SWITCH_DDC, 2);
-		gmux_write8(apple_gmux_data, GMUX_PORT_SWITCH_DISPLAY, 3);
-		gmux_write8(apple_gmux_data, GMUX_PORT_SWITCH_EXTERNAL, 3);
-	}
 
-	return 0;
+	return old_ddc_owner;
 }
 
 /**
@@ -440,17 +494,15 @@ static int gmux_get_client_id(struct pci_dev *pdev)
 		return VGA_SWITCHEROO_DIS;
 }
 
-static enum vga_switcheroo_client_id
-gmux_active_client(struct apple_gmux_data *gmux_data)
-{
-	if (gmux_read8(gmux_data, GMUX_PORT_SWITCH_DISPLAY) == 2)
-		return VGA_SWITCHEROO_IGD;
-
-	return VGA_SWITCHEROO_DIS;
-}
+static const struct vga_switcheroo_handler gmux_handler_indexed = {
+	.switchto = gmux_switchto,
+	.power_state = gmux_set_power_state,
+	.get_client_id = gmux_get_client_id,
+};
 
-static const struct vga_switcheroo_handler gmux_handler = {
+static const struct vga_switcheroo_handler gmux_handler_classic = {
 	.switchto = gmux_switchto,
+	.switch_ddc = gmux_switch_ddc,
 	.power_state = gmux_set_power_state,
 	.get_client_id = gmux_get_client_id,
 };
@@ -513,7 +565,6 @@ static int gmux_suspend(struct device *dev)
 	struct pnp_dev *pnp = to_pnp_dev(dev);
 	struct apple_gmux_data *gmux_data = pnp_get_drvdata(pnp);
 
-	gmux_data->resume_client_id = gmux_active_client(gmux_data);
 	gmux_disable_interrupts(gmux_data);
 	return 0;
 }
@@ -524,7 +575,7 @@ static int gmux_resume(struct device *dev)
 	struct apple_gmux_data *gmux_data = pnp_get_drvdata(pnp);
 
 	gmux_enable_interrupts(gmux_data);
-	gmux_switchto(gmux_data->resume_client_id);
+	gmux_write_switch_state(gmux_data);
 	if (gmux_data->power_state == VGA_SWITCHEROO_OFF)
 		gmux_set_discrete_state(gmux_data, gmux_data->power_state);
 	return 0;
@@ -704,9 +755,23 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 	apple_gmux_data = gmux_data;
 	init_completion(&gmux_data->powerchange_done);
 	gmux_enable_interrupts(gmux_data);
+	gmux_read_switch_state(gmux_data);
 
-	if (vga_switcheroo_register_handler(&gmux_handler)) {
-		ret = -ENODEV;
+	/*
+	 * Retina MacBook Pros cannot switch the panel's AUX separately
+	 * and need eDP pre-calibration. They are distinguishable from
+	 * pre-retinas by having an "indexed" gmux.
+	 *
+	 * Pre-retina MacBook Pros can switch the panel's DDC separately.
+	 */
+	if (gmux_data->indexed)
+		ret = vga_switcheroo_register_handler(&gmux_handler_indexed,
+					      VGA_SWITCHEROO_NEEDS_EDP_CONFIG);
+	else
+		ret = vga_switcheroo_register_handler(&gmux_handler_classic,
+					      VGA_SWITCHEROO_CAN_SWITCH_DDC);
+	if (ret) {
+		pr_err("Failed to register vga_switcheroo handler\n");
 		goto err_register_handler;
 	}
 
@@ -764,7 +829,7 @@ static void gmux_remove(struct pnp_dev *pnp)
 }
 
 static const struct pnp_device_id gmux_device_ids[] = {
-	{"APP000B", 0},
+	{GMUX_ACPI_HID, 0},
 	{"", 0}
 };
 
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 131fee2b093e..091ca7ada8fc 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -272,6 +272,15 @@ static const struct dmi_system_id asus_quirks[] = {
 	},
 	{
 		.callback = dmi_matched,
+		.ident = "ASUSTeK COMPUTER INC. X75VD",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X75VD"),
+		},
+		.driver_data = &quirk_asus_wapf4,
+	},
+	{
+		.callback = dmi_matched,
 		.ident = "ASUSTeK COMPUTER INC. 1015E",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index aaeeae81e3a9..2c2f02b2e08a 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -28,12 +28,11 @@
 #include <linux/acpi.h>
 #include <linux/mm.h>
 #include <linux/i8042.h>
-#include <linux/slab.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <acpi/video.h>
-#include "../../firmware/dcdbas.h"
 #include "dell-rbtn.h"
+#include "dell-smbios.h"
 
 #define BRIGHTNESS_TOKEN 0x7d
 #define KBD_LED_OFF_TOKEN 0x01E1
@@ -44,33 +43,6 @@
 #define KBD_LED_AUTO_75_TOKEN 0x02EC
 #define KBD_LED_AUTO_100_TOKEN 0x02F6
 
-/* This structure will be modified by the firmware when we enter
- * system management mode, hence the volatiles */
-
-struct calling_interface_buffer {
-	u16 class;
-	u16 select;
-	volatile u32 input[4];
-	volatile u32 output[4];
-} __packed;
-
-struct calling_interface_token {
-	u16 tokenID;
-	u16 location;
-	union {
-		u16 value;
-		u16 stringlength;
-	};
-};
-
-struct calling_interface_structure {
-	struct dmi_header header;
-	u16 cmdIOAddress;
-	u8 cmdIOCode;
-	u32 supportedCmds;
-	struct calling_interface_token tokens[];
-} __packed;
-
 struct quirk_entry {
 	u8 touchpad_led;
 
@@ -103,11 +75,6 @@ static struct quirk_entry quirk_dell_xps13_9333 = {
 	.kbd_timeouts = { 0, 5, 15, 60, 5 * 60, 15 * 60, -1 },
 };
 
-static int da_command_address;
-static int da_command_code;
-static int da_num_tokens;
-static struct calling_interface_token *da_tokens;
-
 static struct platform_driver platform_driver = {
 	.driver = {
 		.name = "dell-laptop",
@@ -306,126 +273,6 @@ static const struct dmi_system_id dell_quirks[] __initconst = {
 	{ }
 };
 
-static struct calling_interface_buffer *buffer;
-static DEFINE_MUTEX(buffer_mutex);
-
-static void clear_buffer(void)
-{
-	memset(buffer, 0, sizeof(struct calling_interface_buffer));
-}
-
-static void get_buffer(void)
-{
-	mutex_lock(&buffer_mutex);
-	clear_buffer();
-}
-
-static void release_buffer(void)
-{
-	mutex_unlock(&buffer_mutex);
-}
-
-static void __init parse_da_table(const struct dmi_header *dm)
-{
-	/* Final token is a terminator, so we don't want to copy it */
-	int tokens = (dm->length-11)/sizeof(struct calling_interface_token)-1;
-	struct calling_interface_token *new_da_tokens;
-	struct calling_interface_structure *table =
-		container_of(dm, struct calling_interface_structure, header);
-
-	/* 4 bytes of table header, plus 7 bytes of Dell header, plus at least
-	   6 bytes of entry */
-
-	if (dm->length < 17)
-		return;
-
-	da_command_address = table->cmdIOAddress;
-	da_command_code = table->cmdIOCode;
-
-	new_da_tokens = krealloc(da_tokens, (da_num_tokens + tokens) *
-				 sizeof(struct calling_interface_token),
-				 GFP_KERNEL);
-
-	if (!new_da_tokens)
-		return;
-	da_tokens = new_da_tokens;
-
-	memcpy(da_tokens+da_num_tokens, table->tokens,
-	       sizeof(struct calling_interface_token) * tokens);
-
-	da_num_tokens += tokens;
-}
-
-static void __init find_tokens(const struct dmi_header *dm, void *dummy)
-{
-	switch (dm->type) {
-	case 0xd4: /* Indexed IO */
-	case 0xd5: /* Protected Area Type 1 */
-	case 0xd6: /* Protected Area Type 2 */
-		break;
-	case 0xda: /* Calling interface */
-		parse_da_table(dm);
-		break;
-	}
-}
-
-static int find_token_id(int tokenid)
-{
-	int i;
-
-	for (i = 0; i < da_num_tokens; i++) {
-		if (da_tokens[i].tokenID == tokenid)
-			return i;
-	}
-
-	return -1;
-}
-
-static int find_token_location(int tokenid)
-{
-	int id;
-
-	id = find_token_id(tokenid);
-	if (id == -1)
-		return -1;
-
-	return da_tokens[id].location;
-}
-
-static struct calling_interface_buffer *
-dell_send_request(struct calling_interface_buffer *buffer, int class,
-		  int select)
-{
-	struct smi_cmd command;
-
-	command.magic = SMI_CMD_MAGIC;
-	command.command_address = da_command_address;
-	command.command_code = da_command_code;
-	command.ebx = virt_to_phys(buffer);
-	command.ecx = 0x42534931;
-
-	buffer->class = class;
-	buffer->select = select;
-
-	dcdbas_smi_request(&command);
-
-	return buffer;
-}
-
-static inline int dell_smi_error(int value)
-{
-	switch (value) {
-	case 0: /* Completed successfully */
-		return 0;
-	case -1: /* Completed with error */
-		return -EIO;
-	case -2: /* Function not supported */
-		return -ENXIO;
-	default: /* Unknown error */
-		return -EINVAL;
-	}
-}
-
 /*
  * Derived from information in smbios-wireless-ctl:
  *
@@ -548,6 +395,7 @@ static inline int dell_smi_error(int value)
 
 static int dell_rfkill_set(void *data, bool blocked)
 {
+	struct calling_interface_buffer *buffer;
 	int disable = blocked ? 1 : 0;
 	unsigned long radio = (unsigned long)data;
 	int hwswitch_bit = (unsigned long)data - 1;
@@ -555,19 +403,19 @@ static int dell_rfkill_set(void *data, bool blocked)
 	int status;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	status = buffer->output[1];
 
 	if (ret != 0)
 		goto out;
 
-	clear_buffer();
+	dell_smbios_clear_buffer();
 
 	buffer->input[0] = 0x2;
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	hwswitch = buffer->output[1];
 
@@ -577,27 +425,28 @@ static int dell_rfkill_set(void *data, bool blocked)
 	    (status & BIT(0)) && !(status & BIT(16)))
 		disable = 1;
 
-	clear_buffer();
+	dell_smbios_clear_buffer();
 
 	buffer->input[0] = (1 | (radio<<8) | (disable << 16));
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 
  out:
-	release_buffer();
-	return dell_smi_error(ret);
+	dell_smbios_release_buffer();
+	return dell_smbios_error(ret);
 }
 
 /* Must be called with the buffer held */
 static void dell_rfkill_update_sw_state(struct rfkill *rfkill, int radio,
-					int status)
+					int status,
+					struct calling_interface_buffer *buffer)
 {
 	if (status & BIT(0)) {
 		/* Has hw-switch, sync sw_state to BIOS */
 		int block = rfkill_blocked(rfkill);
-		clear_buffer();
+		dell_smbios_clear_buffer();
 		buffer->input[0] = (1 | (radio << 8) | (block << 16));
-		dell_send_request(buffer, 17, 11);
+		dell_smbios_send_request(17, 11);
 	} else {
 		/* No hw-switch, sync BIOS state to sw_state */
 		rfkill_set_sw_state(rfkill, !!(status & BIT(radio + 16)));
@@ -613,30 +462,31 @@ static void dell_rfkill_update_hw_state(struct rfkill *rfkill, int radio,
 
 static void dell_rfkill_query(struct rfkill *rfkill, void *data)
 {
+	struct calling_interface_buffer *buffer;
 	int radio = ((unsigned long)data & 0xF);
 	int hwswitch;
 	int status;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	status = buffer->output[1];
 
 	if (ret != 0 || !(status & BIT(0))) {
-		release_buffer();
+		dell_smbios_release_buffer();
 		return;
 	}
 
-	clear_buffer();
+	dell_smbios_clear_buffer();
 
 	buffer->input[0] = 0x2;
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	hwswitch = buffer->output[1];
 
-	release_buffer();
+	dell_smbios_release_buffer();
 
 	if (ret != 0)
 		return;
@@ -653,25 +503,26 @@ static struct dentry *dell_laptop_dir;
 
 static int dell_debugfs_show(struct seq_file *s, void *data)
 {
+	struct calling_interface_buffer *buffer;
 	int hwswitch_state;
 	int hwswitch_ret;
 	int status;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	status = buffer->output[1];
 
-	clear_buffer();
+	dell_smbios_clear_buffer();
 
 	buffer->input[0] = 0x2;
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	hwswitch_ret = buffer->output[0];
 	hwswitch_state = buffer->output[1];
 
-	release_buffer();
+	dell_smbios_release_buffer();
 
 	seq_printf(s, "return:\t%d\n", ret);
 	seq_printf(s, "status:\t0x%X\n", status);
@@ -752,23 +603,24 @@ static const struct file_operations dell_debugfs_fops = {
 
 static void dell_update_rfkill(struct work_struct *ignored)
 {
+	struct calling_interface_buffer *buffer;
 	int hwswitch = 0;
 	int status;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	status = buffer->output[1];
 
 	if (ret != 0)
 		goto out;
 
-	clear_buffer();
+	dell_smbios_clear_buffer();
 
 	buffer->input[0] = 0x2;
-	dell_send_request(buffer, 17, 11);
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 
 	if (ret == 0 && (status & BIT(0)))
@@ -776,20 +628,21 @@ static void dell_update_rfkill(struct work_struct *ignored)
 
 	if (wifi_rfkill) {
 		dell_rfkill_update_hw_state(wifi_rfkill, 1, status, hwswitch);
-		dell_rfkill_update_sw_state(wifi_rfkill, 1, status);
+		dell_rfkill_update_sw_state(wifi_rfkill, 1, status, buffer);
 	}
 	if (bluetooth_rfkill) {
 		dell_rfkill_update_hw_state(bluetooth_rfkill, 2, status,
 					    hwswitch);
-		dell_rfkill_update_sw_state(bluetooth_rfkill, 2, status);
+		dell_rfkill_update_sw_state(bluetooth_rfkill, 2, status,
+					    buffer);
 	}
 	if (wwan_rfkill) {
 		dell_rfkill_update_hw_state(wwan_rfkill, 3, status, hwswitch);
-		dell_rfkill_update_sw_state(wwan_rfkill, 3, status);
+		dell_rfkill_update_sw_state(wwan_rfkill, 3, status, buffer);
 	}
 
  out:
-	release_buffer();
+	dell_smbios_release_buffer();
 }
 static DECLARE_DELAYED_WORK(dell_rfkill_work, dell_update_rfkill);
 
@@ -833,6 +686,7 @@ static struct notifier_block dell_laptop_rbtn_notifier = {
 
 static int __init dell_setup_rfkill(void)
 {
+	struct calling_interface_buffer *buffer;
 	int status, ret, whitelisted;
 	const char *product;
 
@@ -848,11 +702,11 @@ static int __init dell_setup_rfkill(void)
 	if (!force_rfkill && !whitelisted)
 		return 0;
 
-	get_buffer();
-	dell_send_request(buffer, 17, 11);
+	buffer = dell_smbios_get_buffer();
+	dell_smbios_send_request(17, 11);
 	ret = buffer->output[0];
 	status = buffer->output[1];
-	release_buffer();
+	dell_smbios_release_buffer();
 
 	/* dell wireless info smbios call is not supported */
 	if (ret != 0)
@@ -1005,51 +859,53 @@ static void dell_cleanup_rfkill(void)
 
 static int dell_send_intensity(struct backlight_device *bd)
 {
-	int token;
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 	int ret;
 
-	token = find_token_location(BRIGHTNESS_TOKEN);
-	if (token == -1)
+	token = dell_smbios_find_token(BRIGHTNESS_TOKEN);
+	if (!token)
 		return -ENODEV;
 
-	get_buffer();
-	buffer->input[0] = token;
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = token->location;
 	buffer->input[1] = bd->props.brightness;
 
 	if (power_supply_is_system_supplied() > 0)
-		dell_send_request(buffer, 1, 2);
+		dell_smbios_send_request(1, 2);
 	else
-		dell_send_request(buffer, 1, 1);
+		dell_smbios_send_request(1, 1);
 
-	ret = dell_smi_error(buffer->output[0]);
+	ret = dell_smbios_error(buffer->output[0]);
 
-	release_buffer();
+	dell_smbios_release_buffer();
 	return ret;
 }
 
 static int dell_get_intensity(struct backlight_device *bd)
 {
-	int token;
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 	int ret;
 
-	token = find_token_location(BRIGHTNESS_TOKEN);
-	if (token == -1)
+	token = dell_smbios_find_token(BRIGHTNESS_TOKEN);
+	if (!token)
 		return -ENODEV;
 
-	get_buffer();
-	buffer->input[0] = token;
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = token->location;
 
 	if (power_supply_is_system_supplied() > 0)
-		dell_send_request(buffer, 0, 2);
+		dell_smbios_send_request(0, 2);
 	else
-		dell_send_request(buffer, 0, 1);
+		dell_smbios_send_request(0, 1);
 
 	if (buffer->output[0])
-		ret = dell_smi_error(buffer->output[0]);
+		ret = dell_smbios_error(buffer->output[0]);
 	else
 		ret = buffer->output[1];
 
-	release_buffer();
+	dell_smbios_release_buffer();
 	return ret;
 }
 
@@ -1293,17 +1149,18 @@ static bool kbd_led_present;
 
 static int kbd_get_info(struct kbd_info *info)
 {
+	struct calling_interface_buffer *buffer;
 	u8 units;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
 	buffer->input[0] = 0x0;
-	dell_send_request(buffer, 4, 11);
+	dell_smbios_send_request(4, 11);
 	ret = buffer->output[0];
 
 	if (ret) {
-		ret = dell_smi_error(ret);
+		ret = dell_smbios_error(ret);
 		goto out;
 	}
 
@@ -1323,7 +1180,7 @@ static int kbd_get_info(struct kbd_info *info)
 		info->days = (buffer->output[3] >> 24) & 0xFF;
 
  out:
-	release_buffer();
+	dell_smbios_release_buffer();
 	return ret;
 }
 
@@ -1382,16 +1239,17 @@ static int kbd_set_level(struct kbd_state *state, u8 level)
 
 static int kbd_get_state(struct kbd_state *state)
 {
+	struct calling_interface_buffer *buffer;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 
 	buffer->input[0] = 0x1;
-	dell_send_request(buffer, 4, 11);
+	dell_smbios_send_request(4, 11);
 	ret = buffer->output[0];
 
 	if (ret) {
-		ret = dell_smi_error(ret);
+		ret = dell_smbios_error(ret);
 		goto out;
 	}
 
@@ -1407,15 +1265,16 @@ static int kbd_get_state(struct kbd_state *state)
 	state->level = (buffer->output[2] >> 16) & 0xFF;
 
  out:
-	release_buffer();
+	dell_smbios_release_buffer();
 	return ret;
 }
 
 static int kbd_set_state(struct kbd_state *state)
 {
+	struct calling_interface_buffer *buffer;
 	int ret;
 
-	get_buffer();
+	buffer = dell_smbios_get_buffer();
 	buffer->input[0] = 0x2;
 	buffer->input[1] = BIT(state->mode_bit) & 0xFFFF;
 	buffer->input[1] |= (state->triggers & 0xFF) << 16;
@@ -1423,11 +1282,11 @@ static int kbd_set_state(struct kbd_state *state)
 	buffer->input[1] |= (state->timeout_unit & 0x3) << 30;
 	buffer->input[2] = state->als_setting & 0xFF;
 	buffer->input[2] |= (state->level & 0xFF) << 16;
-	dell_send_request(buffer, 4, 11);
+	dell_smbios_send_request(4, 11);
 	ret = buffer->output[0];
-	release_buffer();
+	dell_smbios_release_buffer();
 
-	return dell_smi_error(ret);
+	return dell_smbios_error(ret);
 }
 
 static int kbd_set_state_safe(struct kbd_state *state, struct kbd_state *old)
@@ -1452,50 +1311,52 @@ static int kbd_set_state_safe(struct kbd_state *state, struct kbd_state *old)
 
 static int kbd_set_token_bit(u8 bit)
 {
-	int id;
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 	int ret;
 
 	if (bit >= ARRAY_SIZE(kbd_tokens))
 		return -EINVAL;
 
-	id = find_token_id(kbd_tokens[bit]);
-	if (id == -1)
+	token = dell_smbios_find_token(kbd_tokens[bit]);
+	if (!token)
 		return -EINVAL;
 
-	get_buffer();
-	buffer->input[0] = da_tokens[id].location;
-	buffer->input[1] = da_tokens[id].value;
-	dell_send_request(buffer, 1, 0);
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = token->location;
+	buffer->input[1] = token->value;
+	dell_smbios_send_request(1, 0);
 	ret = buffer->output[0];
-	release_buffer();
+	dell_smbios_release_buffer();
 
-	return dell_smi_error(ret);
+	return dell_smbios_error(ret);
 }
 
 static int kbd_get_token_bit(u8 bit)
 {
-	int id;
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 	int ret;
 	int val;
 
 	if (bit >= ARRAY_SIZE(kbd_tokens))
 		return -EINVAL;
 
-	id = find_token_id(kbd_tokens[bit]);
-	if (id == -1)
+	token = dell_smbios_find_token(kbd_tokens[bit]);
+	if (!token)
 		return -EINVAL;
 
-	get_buffer();
-	buffer->input[0] = da_tokens[id].location;
-	dell_send_request(buffer, 0, 0);
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = token->location;
+	dell_smbios_send_request(0, 0);
 	ret = buffer->output[0];
 	val = buffer->output[1];
-	release_buffer();
+	dell_smbios_release_buffer();
 
 	if (ret)
-		return dell_smi_error(ret);
+		return dell_smbios_error(ret);
 
-	return (val == da_tokens[id].value);
+	return (val == token->value);
 }
 
 static int kbd_get_first_active_token_bit(void)
@@ -1597,7 +1458,7 @@ static inline void kbd_init_tokens(void)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(kbd_tokens); ++i)
-		if (find_token_id(kbd_tokens[i]) != -1)
+		if (dell_smbios_find_token(kbd_tokens[i]))
 			kbd_token_bits |= BIT(i);
 }
 
@@ -2111,8 +1972,9 @@ static void kbd_led_exit(void)
 
 static int __init dell_init(void)
 {
+	struct calling_interface_buffer *buffer;
+	struct calling_interface_token *token;
 	int max_intensity = 0;
-	int token;
 	int ret;
 
 	if (!dmi_check_system(dell_device_table))
@@ -2122,13 +1984,6 @@ static int __init dell_init(void)
 	/* find if this machine support other functions */
 	dmi_check_system(dell_quirks);
 
-	dmi_walk(find_tokens, NULL);
-
-	if (!da_tokens)  {
-		pr_info("Unable to find dmi tokens\n");
-		return -ENODEV;
-	}
-
 	ret = platform_driver_register(&platform_driver);
 	if (ret)
 		goto fail_platform_driver;
@@ -2141,16 +1996,6 @@ static int __init dell_init(void)
 	if (ret)
 		goto fail_platform_device2;
 
-	/*
-	 * Allocate buffer below 4GB for SMI data--only 32-bit physical addr
-	 * is passed to SMI handler.
-	 */
-	buffer = (void *)__get_free_page(GFP_KERNEL | GFP_DMA32);
-	if (!buffer) {
-		ret = -ENOMEM;
-		goto fail_buffer;
-	}
-
 	ret = dell_setup_rfkill();
 
 	if (ret) {
@@ -2171,14 +2016,14 @@ static int __init dell_init(void)
 	if (acpi_video_get_backlight_type() != acpi_backlight_vendor)
 		return 0;
 
-	token = find_token_location(BRIGHTNESS_TOKEN);
-	if (token != -1) {
-		get_buffer();
-		buffer->input[0] = token;
-		dell_send_request(buffer, 0, 2);
+	token = dell_smbios_find_token(BRIGHTNESS_TOKEN);
+	if (token) {
+		buffer = dell_smbios_get_buffer();
+		buffer->input[0] = token->location;
+		dell_smbios_send_request(0, 2);
 		if (buffer->output[0] == 0)
 			max_intensity = buffer->output[3];
-		release_buffer();
+		dell_smbios_release_buffer();
 	}
 
 	if (max_intensity) {
@@ -2208,15 +2053,12 @@ static int __init dell_init(void)
 fail_backlight:
 	dell_cleanup_rfkill();
 fail_rfkill:
-	free_page((unsigned long)buffer);
-fail_buffer:
 	platform_device_del(platform_device);
 fail_platform_device2:
 	platform_device_put(platform_device);
 fail_platform_device1:
 	platform_driver_unregister(&platform_driver);
 fail_platform_driver:
-	kfree(da_tokens);
 	return ret;
 }
 
@@ -2232,8 +2074,6 @@ static void __exit dell_exit(void)
 		platform_device_unregister(platform_device);
 		platform_driver_unregister(&platform_driver);
 	}
-	kfree(da_tokens);
-	free_page((unsigned long)buffer);
 }
 
 /* dell-rbtn.c driver export functions which will not work correctly (and could
diff --git a/drivers/platform/x86/dell-rbtn.c b/drivers/platform/x86/dell-rbtn.c
index cd410e392550..b51a2008d782 100644
--- a/drivers/platform/x86/dell-rbtn.c
+++ b/drivers/platform/x86/dell-rbtn.c
@@ -217,6 +217,21 @@ static void rbtn_notify(struct acpi_device *device, u32 event);
 static const struct acpi_device_id rbtn_ids[] = {
 	{ "DELRBTN", 0 },
 	{ "DELLABCE", 0 },
+
+	/*
+	 * This driver can also handle the "DELLABC6" device that
+	 * appears on the XPS 13 9350, but that device is disabled
+	 * by the DSDT unless booted with acpi_osi="!Windows 2012"
+	 * acpi_osi="!Windows 2013".  Even if we boot that and bind
+	 * the driver, we seem to have inconsistent behavior in
+	 * which NetworkManager can get out of sync with the rfkill
+	 * state.
+	 *
+	 * On the XPS 13 9350 and similar laptops, we're not supposed to
+	 * use DELLABC6 at all.  Instead, we handle the rfkill button
+	 * via the intel-hid driver.
+	 */
+
 	{ "", 0 },
 };
 
diff --git a/drivers/platform/x86/dell-smbios.c b/drivers/platform/x86/dell-smbios.c
new file mode 100644
index 000000000000..d2412ab097da
--- /dev/null
+++ b/drivers/platform/x86/dell-smbios.c
@@ -0,0 +1,193 @@
+/*
+ *  Common functions for kernel modules using Dell SMBIOS
+ *
+ *  Copyright (c) Red Hat <mjg@redhat.com>
+ *  Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com>
+ *  Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com>
+ *
+ *  Based on documentation in the libsmbios package:
+ *  Copyright (C) 2005-2014 Dell Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include "../../firmware/dcdbas.h"
+#include "dell-smbios.h"
+
+struct calling_interface_structure {
+	struct dmi_header header;
+	u16 cmdIOAddress;
+	u8 cmdIOCode;
+	u32 supportedCmds;
+	struct calling_interface_token tokens[];
+} __packed;
+
+static struct calling_interface_buffer *buffer;
+static DEFINE_MUTEX(buffer_mutex);
+
+static int da_command_address;
+static int da_command_code;
+static int da_num_tokens;
+static struct calling_interface_token *da_tokens;
+
+int dell_smbios_error(int value)
+{
+	switch (value) {
+	case 0: /* Completed successfully */
+		return 0;
+	case -1: /* Completed with error */
+		return -EIO;
+	case -2: /* Function not supported */
+		return -ENXIO;
+	default: /* Unknown error */
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL_GPL(dell_smbios_error);
+
+struct calling_interface_buffer *dell_smbios_get_buffer(void)
+{
+	mutex_lock(&buffer_mutex);
+	dell_smbios_clear_buffer();
+	return buffer;
+}
+EXPORT_SYMBOL_GPL(dell_smbios_get_buffer);
+
+void dell_smbios_clear_buffer(void)
+{
+	memset(buffer, 0, sizeof(struct calling_interface_buffer));
+}
+EXPORT_SYMBOL_GPL(dell_smbios_clear_buffer);
+
+void dell_smbios_release_buffer(void)
+{
+	mutex_unlock(&buffer_mutex);
+}
+EXPORT_SYMBOL_GPL(dell_smbios_release_buffer);
+
+void dell_smbios_send_request(int class, int select)
+{
+	struct smi_cmd command;
+
+	command.magic = SMI_CMD_MAGIC;
+	command.command_address = da_command_address;
+	command.command_code = da_command_code;
+	command.ebx = virt_to_phys(buffer);
+	command.ecx = 0x42534931;
+
+	buffer->class = class;
+	buffer->select = select;
+
+	dcdbas_smi_request(&command);
+}
+EXPORT_SYMBOL_GPL(dell_smbios_send_request);
+
+struct calling_interface_token *dell_smbios_find_token(int tokenid)
+{
+	int i;
+
+	for (i = 0; i < da_num_tokens; i++) {
+		if (da_tokens[i].tokenID == tokenid)
+			return &da_tokens[i];
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(dell_smbios_find_token);
+
+static void __init parse_da_table(const struct dmi_header *dm)
+{
+	/* Final token is a terminator, so we don't want to copy it */
+	int tokens = (dm->length-11)/sizeof(struct calling_interface_token)-1;
+	struct calling_interface_token *new_da_tokens;
+	struct calling_interface_structure *table =
+		container_of(dm, struct calling_interface_structure, header);
+
+	/* 4 bytes of table header, plus 7 bytes of Dell header, plus at least
+	   6 bytes of entry */
+
+	if (dm->length < 17)
+		return;
+
+	da_command_address = table->cmdIOAddress;
+	da_command_code = table->cmdIOCode;
+
+	new_da_tokens = krealloc(da_tokens, (da_num_tokens + tokens) *
+				 sizeof(struct calling_interface_token),
+				 GFP_KERNEL);
+
+	if (!new_da_tokens)
+		return;
+	da_tokens = new_da_tokens;
+
+	memcpy(da_tokens+da_num_tokens, table->tokens,
+	       sizeof(struct calling_interface_token) * tokens);
+
+	da_num_tokens += tokens;
+}
+
+static void __init find_tokens(const struct dmi_header *dm, void *dummy)
+{
+	switch (dm->type) {
+	case 0xd4: /* Indexed IO */
+	case 0xd5: /* Protected Area Type 1 */
+	case 0xd6: /* Protected Area Type 2 */
+		break;
+	case 0xda: /* Calling interface */
+		parse_da_table(dm);
+		break;
+	}
+}
+
+static int __init dell_smbios_init(void)
+{
+	int ret;
+
+	dmi_walk(find_tokens, NULL);
+
+	if (!da_tokens)  {
+		pr_info("Unable to find dmi tokens\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * Allocate buffer below 4GB for SMI data--only 32-bit physical addr
+	 * is passed to SMI handler.
+	 */
+	buffer = (void *)__get_free_page(GFP_KERNEL | GFP_DMA32);
+	if (!buffer) {
+		ret = -ENOMEM;
+		goto fail_buffer;
+	}
+
+	return 0;
+
+fail_buffer:
+	kfree(da_tokens);
+	return ret;
+}
+
+static void __exit dell_smbios_exit(void)
+{
+	kfree(da_tokens);
+	free_page((unsigned long)buffer);
+}
+
+subsys_initcall(dell_smbios_init);
+module_exit(dell_smbios_exit);
+
+MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
+MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>");
+MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
+MODULE_DESCRIPTION("Common functions for kernel modules using Dell SMBIOS");
+MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/dell-smbios.h b/drivers/platform/x86/dell-smbios.h
new file mode 100644
index 000000000000..ec7d40ae5e6e
--- /dev/null
+++ b/drivers/platform/x86/dell-smbios.h
@@ -0,0 +1,46 @@
+/*
+ *  Common functions for kernel modules using Dell SMBIOS
+ *
+ *  Copyright (c) Red Hat <mjg@redhat.com>
+ *  Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com>
+ *  Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com>
+ *
+ *  Based on documentation in the libsmbios package:
+ *  Copyright (C) 2005-2014 Dell Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#ifndef _DELL_SMBIOS_H_
+#define _DELL_SMBIOS_H_
+
+/* This structure will be modified by the firmware when we enter
+ * system management mode, hence the volatiles */
+
+struct calling_interface_buffer {
+	u16 class;
+	u16 select;
+	volatile u32 input[4];
+	volatile u32 output[4];
+} __packed;
+
+struct calling_interface_token {
+	u16 tokenID;
+	u16 location;
+	union {
+		u16 value;
+		u16 stringlength;
+	};
+};
+
+int dell_smbios_error(int value);
+
+struct calling_interface_buffer *dell_smbios_get_buffer(void);
+void dell_smbios_clear_buffer(void);
+void dell_smbios_release_buffer(void);
+void dell_smbios_send_request(int class, int select);
+
+struct calling_interface_token *dell_smbios_find_token(int tokenid);
+#endif
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 368e193c2741..15c6f1191aec 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -37,6 +37,7 @@
 #include <linux/string.h>
 #include <linux/dmi.h>
 #include <acpi/video.h>
+#include "dell-smbios.h"
 
 MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
 MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
@@ -47,10 +48,37 @@ MODULE_LICENSE("GPL");
 #define DELL_DESCRIPTOR_GUID "8D9DDCBC-A997-11DA-B012-B622A1EF5492"
 
 static u32 dell_wmi_interface_version;
+static bool wmi_requires_smbios_request;
 
 MODULE_ALIAS("wmi:"DELL_EVENT_GUID);
 MODULE_ALIAS("wmi:"DELL_DESCRIPTOR_GUID);
 
+static int __init dmi_matched(const struct dmi_system_id *dmi)
+{
+	wmi_requires_smbios_request = 1;
+	return 1;
+}
+
+static const struct dmi_system_id dell_wmi_smbios_list[] __initconst = {
+	{
+		.callback = dmi_matched,
+		.ident = "Dell Inspiron M5110",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron M5110"),
+		},
+	},
+	{
+		.callback = dmi_matched,
+		.ident = "Dell Vostro V131",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Vostro V131"),
+		},
+	},
+	{ }
+};
+
 /*
  * Certain keys are flagged as KE_IGNORE. All of these are either
  * notifications (rather than requests for change) or are also sent
@@ -90,8 +118,11 @@ static const struct key_entry dell_wmi_legacy_keymap[] __initconst = {
 
 	{ KE_IGNORE, 0xe020, { KEY_MUTE } },
 
-	/* Shortcut and audio panel keys */
-	{ KE_IGNORE, 0xe025, { KEY_RESERVED } },
+	/* Dell Instant Launch key */
+	{ KE_KEY, 0xe025, { KEY_PROG4 } },
+	{ KE_KEY, 0xe029, { KEY_PROG4 } },
+
+	/* Audio panel key */
 	{ KE_IGNORE, 0xe026, { KEY_RESERVED } },
 
 	{ KE_IGNORE, 0xe02e, { KEY_VOLUMEDOWN } },
@@ -120,7 +151,10 @@ struct dell_bios_hotkey_table {
 
 };
 
-static const struct dell_bios_hotkey_table *dell_bios_hotkey_table;
+struct dell_dmi_results {
+	int err;
+	struct key_entry *keymap;
+};
 
 /* Uninitialized entries here are KEY_RESERVED == 0. */
 static const u16 bios_to_linux_keycode[256] __initconst = {
@@ -166,6 +200,30 @@ static const u16 bios_to_linux_keycode[256] __initconst = {
 	[255]	= KEY_PROG3,
 };
 
+/*
+ * These are applied if the 0xB2 DMI hotkey table is present and doesn't
+ * override them.
+ */
+static const struct key_entry dell_wmi_extra_keymap[] __initconst = {
+	/* Fn-lock */
+	{ KE_IGNORE, 0x151, { KEY_RESERVED } },
+
+	/* Change keyboard illumination */
+	{ KE_IGNORE, 0x152, { KEY_KBDILLUMTOGGLE } },
+
+	/*
+	 * Radio disable (notify only -- there is no model for which the
+	 * WMI event is supposed to trigger an action).
+	 */
+	{ KE_IGNORE, 0x153, { KEY_RFKILL } },
+
+	/* RGB keyboard backlight control */
+	{ KE_IGNORE, 0x154, { KEY_RESERVED } },
+
+	/* Stealth mode toggle */
+	{ KE_IGNORE, 0x155, { KEY_RESERVED } },
+};
+
 static struct input_dev *dell_wmi_input_dev;
 
 static void dell_wmi_process_key(int reported_key)
@@ -188,6 +246,9 @@ static void dell_wmi_process_key(int reported_key)
 	    acpi_video_handles_brightness_key_presses())
 		return;
 
+	if (reported_key == 0xe025 && !wmi_requires_smbios_request)
+		return;
+
 	sparse_keymap_report_entry(dell_wmi_input_dev, key, 1, true);
 }
 
@@ -337,20 +398,60 @@ static void dell_wmi_notify(u32 value, void *context)
 	kfree(obj);
 }
 
-static const struct key_entry * __init dell_wmi_prepare_new_keymap(void)
+static bool have_scancode(u32 scancode, const struct key_entry *keymap, int len)
 {
-	int hotkey_num = (dell_bios_hotkey_table->header.length - 4) /
-				sizeof(struct dell_bios_keymap_entry);
-	struct key_entry *keymap;
 	int i;
 
-	keymap = kcalloc(hotkey_num + 1, sizeof(struct key_entry), GFP_KERNEL);
-	if (!keymap)
-		return NULL;
+	for (i = 0; i < len; i++)
+		if (keymap[i].code == scancode)
+			return true;
+
+	return false;
+}
+
+static void __init handle_dmi_entry(const struct dmi_header *dm,
+
+				    void *opaque)
+
+{
+	struct dell_dmi_results *results = opaque;
+	struct dell_bios_hotkey_table *table;
+	int hotkey_num, i, pos = 0;
+	struct key_entry *keymap;
+	int num_bios_keys;
+
+	if (results->err || results->keymap)
+		return;		/* We already found the hotkey table. */
+
+	if (dm->type != 0xb2)
+		return;
+
+	table = container_of(dm, struct dell_bios_hotkey_table, header);
+
+	hotkey_num = (table->header.length -
+		      sizeof(struct dell_bios_hotkey_table)) /
+				sizeof(struct dell_bios_keymap_entry);
+	if (hotkey_num < 1) {
+		/*
+		 * Historically, dell-wmi would ignore a DMI entry of
+		 * fewer than 7 bytes.  Sizes between 4 and 8 bytes are
+		 * nonsensical (both the header and all entries are 4
+		 * bytes), so we approximate the old behavior by
+		 * ignoring tables with fewer than one entry.
+		 */
+		return;
+	}
+
+	keymap = kcalloc(hotkey_num + ARRAY_SIZE(dell_wmi_extra_keymap) + 1,
+			 sizeof(struct key_entry), GFP_KERNEL);
+	if (!keymap) {
+		results->err = -ENOMEM;
+		return;
+	}
 
 	for (i = 0; i < hotkey_num; i++) {
 		const struct dell_bios_keymap_entry *bios_entry =
-					&dell_bios_hotkey_table->keymap[i];
+					&table->keymap[i];
 
 		/* Uninitialized entries are 0 aka KEY_RESERVED. */
 		u16 keycode = (bios_entry->keycode <
@@ -370,20 +471,39 @@ static const struct key_entry * __init dell_wmi_prepare_new_keymap(void)
 		}
 
 		if (keycode == KEY_KBDILLUMTOGGLE)
-			keymap[i].type = KE_IGNORE;
+			keymap[pos].type = KE_IGNORE;
 		else
-			keymap[i].type = KE_KEY;
-		keymap[i].code = bios_entry->scancode;
-		keymap[i].keycode = keycode;
+			keymap[pos].type = KE_KEY;
+		keymap[pos].code = bios_entry->scancode;
+		keymap[pos].keycode = keycode;
+
+		pos++;
+	}
+
+	num_bios_keys = pos;
+
+	for (i = 0; i < ARRAY_SIZE(dell_wmi_extra_keymap); i++) {
+		const struct key_entry *entry = &dell_wmi_extra_keymap[i];
+
+		/*
+		 * Check if we've already found this scancode.  This takes
+		 * quadratic time, but it doesn't matter unless the list
+		 * of extra keys gets very long.
+		 */
+		if (!have_scancode(entry->code, keymap, num_bios_keys)) {
+			keymap[pos] = *entry;
+			pos++;
+		}
 	}
 
-	keymap[hotkey_num].type = KE_END;
+	keymap[pos].type = KE_END;
 
-	return keymap;
+	results->keymap = keymap;
 }
 
 static int __init dell_wmi_input_setup(void)
 {
+	struct dell_dmi_results dmi_results = {};
 	int err;
 
 	dell_wmi_input_dev = input_allocate_device();
@@ -394,20 +514,31 @@ static int __init dell_wmi_input_setup(void)
 	dell_wmi_input_dev->phys = "wmi/input0";
 	dell_wmi_input_dev->id.bustype = BUS_HOST;
 
-	if (dell_new_hk_type) {
-		const struct key_entry *keymap = dell_wmi_prepare_new_keymap();
-		if (!keymap) {
-			err = -ENOMEM;
-			goto err_free_dev;
-		}
+	if (dmi_walk(handle_dmi_entry, &dmi_results)) {
+		/*
+		 * Historically, dell-wmi ignored dmi_walk errors.  A failure
+		 * is certainly surprising, but it probably just indicates
+		 * a very old laptop.
+		 */
+		pr_warn("no DMI; using the old-style hotkey interface\n");
+	}
 
-		err = sparse_keymap_setup(dell_wmi_input_dev, keymap, NULL);
+	if (dmi_results.err) {
+		err = dmi_results.err;
+		goto err_free_dev;
+	}
+
+	if (dmi_results.keymap) {
+		dell_new_hk_type = true;
+
+		err = sparse_keymap_setup(dell_wmi_input_dev,
+					  dmi_results.keymap, NULL);
 
 		/*
 		 * Sparse keymap library makes a copy of keymap so we
 		 * don't need the original one that was allocated.
 		 */
-		kfree(keymap);
+		kfree(dmi_results.keymap);
 	} else {
 		err = sparse_keymap_setup(dell_wmi_input_dev,
 					  dell_wmi_legacy_keymap, NULL);
@@ -434,15 +565,6 @@ static void dell_wmi_input_destroy(void)
 	input_unregister_device(dell_wmi_input_dev);
 }
 
-static void __init find_hk_type(const struct dmi_header *dm, void *dummy)
-{
-	if (dm->type == 0xb2 && dm->length > 6) {
-		dell_new_hk_type = true;
-		dell_bios_hotkey_table =
-			container_of(dm, struct dell_bios_hotkey_table, header);
-	}
-}
-
 /*
  * Descriptor buffer is 128 byte long and contains:
  *
@@ -509,6 +631,38 @@ static int __init dell_wmi_check_descriptor_buffer(void)
 	return 0;
 }
 
+/*
+ * According to Dell SMBIOS documentation:
+ *
+ * 17  3  Application Program Registration
+ *
+ *     cbArg1 Application ID 1 = 0x00010000
+ *     cbArg2 Application ID 2
+ *            QUICKSET/DCP = 0x51534554 "QSET"
+ *            ALS Driver   = 0x416c7353 "AlsS"
+ *            Latitude ON  = 0x4c6f6e52 "LonR"
+ *     cbArg3 Application version or revision number
+ *     cbArg4 0 = Unregister application
+ *            1 = Register application
+ *     cbRes1 Standard return codes (0, -1, -2)
+ */
+
+static int dell_wmi_events_set_enabled(bool enable)
+{
+	struct calling_interface_buffer *buffer;
+	int ret;
+
+	buffer = dell_smbios_get_buffer();
+	buffer->input[0] = 0x10000;
+	buffer->input[1] = 0x51534554;
+	buffer->input[3] = enable;
+	dell_smbios_send_request(17, 3);
+	ret = buffer->output[0];
+	dell_smbios_release_buffer();
+
+	return dell_smbios_error(ret);
+}
+
 static int __init dell_wmi_init(void)
 {
 	int err;
@@ -524,8 +678,6 @@ static int __init dell_wmi_init(void)
 	if (err)
 		return err;
 
-	dmi_walk(find_hk_type, NULL);
-
 	err = dell_wmi_input_setup();
 	if (err)
 		return err;
@@ -538,12 +690,26 @@ static int __init dell_wmi_init(void)
 		return -ENODEV;
 	}
 
+	dmi_check_system(dell_wmi_smbios_list);
+
+	if (wmi_requires_smbios_request) {
+		err = dell_wmi_events_set_enabled(true);
+		if (err) {
+			pr_err("Failed to enable WMI events\n");
+			wmi_remove_notify_handler(DELL_EVENT_GUID);
+			dell_wmi_input_destroy();
+			return err;
+		}
+	}
+
 	return 0;
 }
 module_init(dell_wmi_init);
 
 static void __exit dell_wmi_exit(void)
 {
+	if (wmi_requires_smbios_request)
+		dell_wmi_events_set_enabled(false);
 	wmi_remove_notify_handler(DELL_EVENT_GUID);
 	dell_wmi_input_destroy();
 }
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 1c62caff93fd..ffc84cc7b1c7 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -114,6 +114,7 @@
 #define KEY2_CODE	0x411
 #define KEY3_CODE	0x412
 #define KEY4_CODE	0x413
+#define KEY5_CODE	0x420
 
 #define MAX_HOTKEY_RINGBUFFER_SIZE 100
 #define RINGBUFFERSIZE 40
@@ -149,7 +150,7 @@ struct fujitsu_t {
 	char phys[32];
 	struct backlight_device *bl_device;
 	struct platform_device *pf_device;
-	int keycode1, keycode2, keycode3, keycode4;
+	int keycode1, keycode2, keycode3, keycode4, keycode5;
 
 	unsigned int max_brightness;
 	unsigned int brightness_changed;
@@ -823,6 +824,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 	set_bit(fujitsu->keycode2, input->keybit);
 	set_bit(fujitsu->keycode3, input->keybit);
 	set_bit(fujitsu->keycode4, input->keybit);
+	set_bit(fujitsu->keycode5, input->keybit);
 	set_bit(KEY_UNKNOWN, input->keybit);
 
 	error = input_register_device(input);
@@ -962,6 +964,9 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event)
 			case KEY4_CODE:
 				keycode = fujitsu->keycode4;
 				break;
+			case KEY5_CODE:
+				keycode = fujitsu->keycode5;
+				break;
 			case 0:
 				keycode = 0;
 				break;
@@ -1072,6 +1077,7 @@ static int __init fujitsu_init(void)
 	fujitsu->keycode2 = KEY_PROG2;
 	fujitsu->keycode3 = KEY_PROG3;
 	fujitsu->keycode4 = KEY_PROG4;
+	fujitsu->keycode5 = KEY_RFKILL;
 	dmi_check_system(fujitsu_dmi_table);
 
 	result = acpi_bus_register_driver(&acpi_fujitsu_driver);
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index fb4dd7b3ee71..6f145f2d004d 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -157,7 +157,6 @@ static struct platform_device *hp_wmi_platform_dev;
 static struct rfkill *wifi_rfkill;
 static struct rfkill *bluetooth_rfkill;
 static struct rfkill *wwan_rfkill;
-static struct rfkill *gps_rfkill;
 
 struct rfkill2_device {
 	u8 id;
@@ -613,10 +612,6 @@ static void hp_wmi_notify(u32 value, void *context)
 			rfkill_set_states(wwan_rfkill,
 					  hp_wmi_get_sw_state(HPWMI_WWAN),
 					  hp_wmi_get_hw_state(HPWMI_WWAN));
-		if (gps_rfkill)
-			rfkill_set_states(gps_rfkill,
-					  hp_wmi_get_sw_state(HPWMI_GPS),
-					  hp_wmi_get_hw_state(HPWMI_GPS));
 		break;
 	case HPWMI_CPU_BATTERY_THROTTLE:
 		pr_info("Unimplemented CPU throttle because of 3 Cell battery event detected\n");
@@ -746,7 +741,7 @@ static int __init hp_wmi_rfkill_setup(struct platform_device *device)
 						(void *) HPWMI_BLUETOOTH);
 		if (!bluetooth_rfkill) {
 			err = -ENOMEM;
-			goto register_wifi_error;
+			goto register_bluetooth_error;
 		}
 		rfkill_init_sw_state(bluetooth_rfkill,
 				     hp_wmi_get_sw_state(HPWMI_BLUETOOTH));
@@ -764,7 +759,7 @@ static int __init hp_wmi_rfkill_setup(struct platform_device *device)
 					   (void *) HPWMI_WWAN);
 		if (!wwan_rfkill) {
 			err = -ENOMEM;
-			goto register_bluetooth_error;
+			goto register_wwan_error;
 		}
 		rfkill_init_sw_state(wwan_rfkill,
 				     hp_wmi_get_sw_state(HPWMI_WWAN));
@@ -775,35 +770,13 @@ static int __init hp_wmi_rfkill_setup(struct platform_device *device)
 			goto register_wwan_error;
 	}
 
-	if (wireless & 0x8) {
-		gps_rfkill = rfkill_alloc("hp-gps", &device->dev,
-						RFKILL_TYPE_GPS,
-						&hp_wmi_rfkill_ops,
-						(void *) HPWMI_GPS);
-		if (!gps_rfkill) {
-			err = -ENOMEM;
-			goto register_wwan_error;
-		}
-		rfkill_init_sw_state(gps_rfkill,
-				     hp_wmi_get_sw_state(HPWMI_GPS));
-		rfkill_set_hw_state(gps_rfkill,
-				    hp_wmi_get_hw_state(HPWMI_GPS));
-		err = rfkill_register(gps_rfkill);
-		if (err)
-			goto register_gps_error;
-	}
-
 	return 0;
-register_gps_error:
-	rfkill_destroy(gps_rfkill);
-	gps_rfkill = NULL;
-	if (bluetooth_rfkill)
-		rfkill_unregister(bluetooth_rfkill);
+
 register_wwan_error:
 	rfkill_destroy(wwan_rfkill);
 	wwan_rfkill = NULL;
-	if (gps_rfkill)
-		rfkill_unregister(gps_rfkill);
+	if (bluetooth_rfkill)
+		rfkill_unregister(bluetooth_rfkill);
 register_bluetooth_error:
 	rfkill_destroy(bluetooth_rfkill);
 	bluetooth_rfkill = NULL;
@@ -907,7 +880,6 @@ static int __init hp_wmi_bios_setup(struct platform_device *device)
 	wifi_rfkill = NULL;
 	bluetooth_rfkill = NULL;
 	wwan_rfkill = NULL;
-	gps_rfkill = NULL;
 	rfkill2_count = 0;
 
 	if (hp_wmi_bios_2009_later() || hp_wmi_rfkill_setup(device))
@@ -960,10 +932,6 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device)
 		rfkill_unregister(wwan_rfkill);
 		rfkill_destroy(wwan_rfkill);
 	}
-	if (gps_rfkill) {
-		rfkill_unregister(gps_rfkill);
-		rfkill_destroy(gps_rfkill);
-	}
 
 	return 0;
 }
@@ -999,10 +967,6 @@ static int hp_wmi_resume_handler(struct device *device)
 		rfkill_set_states(wwan_rfkill,
 				  hp_wmi_get_sw_state(HPWMI_WWAN),
 				  hp_wmi_get_hw_state(HPWMI_WWAN));
-	if (gps_rfkill)
-		rfkill_set_states(gps_rfkill,
-				  hp_wmi_get_sw_state(HPWMI_GPS),
-				  hp_wmi_get_hw_state(HPWMI_GPS));
 
 	return 0;
 }
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index d78ee151c9e4..be3bc2f4edd4 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -865,6 +865,20 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
 		},
 	},
 	{
+		.ident = "Lenovo ideapad Y700-15ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700-15ISK"),
+		},
+	},
+	{
+		.ident = "Lenovo ideapad Y700 Touch-15ISK",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700 Touch-15ISK"),
+		},
+	},
+	{
 		.ident = "Lenovo ideapad Y700-17ISK",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index e20f23e04c24..f93abc8c1424 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -180,8 +180,7 @@ static int intel_hid_probe(struct platform_device *device)
 		return -ENODEV;
 	}
 
-	priv = devm_kzalloc(&device->dev,
-			    sizeof(struct intel_hid_priv *), GFP_KERNEL);
+	priv = devm_kzalloc(&device->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 	dev_set_drvdata(&device->dev, priv);
diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c
index 092519e37de6..3fb1d85c70a8 100644
--- a/drivers/platform/x86/intel_pmc_ipc.c
+++ b/drivers/platform/x86/intel_pmc_ipc.c
@@ -67,7 +67,8 @@
 /* exported resources from IFWI */
 #define PLAT_RESOURCE_IPC_INDEX		0
 #define PLAT_RESOURCE_IPC_SIZE		0x1000
-#define PLAT_RESOURCE_GCR_SIZE		0x1000
+#define PLAT_RESOURCE_GCR_OFFSET	0x1008
+#define PLAT_RESOURCE_GCR_SIZE		0x4
 #define PLAT_RESOURCE_BIOS_DATA_INDEX	1
 #define PLAT_RESOURCE_BIOS_IFACE_INDEX	2
 #define PLAT_RESOURCE_TELEM_SSRAM_INDEX	3
@@ -766,7 +767,7 @@ static int ipc_plat_get_res(struct platform_device *pdev)
 	}
 	ipcdev.ipc_base = addr;
 
-	ipcdev.gcr_base = res->start + size;
+	ipcdev.gcr_base = res->start + PLAT_RESOURCE_GCR_OFFSET;
 	ipcdev.gcr_size = PLAT_RESOURCE_GCR_SIZE;
 	dev_info(&pdev->dev, "ipc res: %pR\n", res);
 
@@ -824,7 +825,8 @@ static int ipc_plat_probe(struct platform_device *pdev)
 		goto err_device;
 	}
 
-	if (request_irq(ipcdev.irq, ioc, 0, "intel_pmc_ipc", &ipcdev)) {
+	if (request_irq(ipcdev.irq, ioc, IRQF_NO_SUSPEND,
+			"intel_pmc_ipc", &ipcdev)) {
 		dev_err(&pdev->dev, "Failed to request irq\n");
 		ret = -EBUSY;
 		goto err_irq;
diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c
index f94b730540e2..e81daff65f62 100644
--- a/drivers/platform/x86/intel_scu_ipc.c
+++ b/drivers/platform/x86/intel_scu_ipc.c
@@ -24,7 +24,6 @@
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <linux/sfi.h>
-#include <linux/module.h>
 #include <asm/intel-mid.h>
 #include <asm/intel_scu_ipc.h>
 
@@ -611,28 +610,6 @@ static int ipc_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 }
 
-/**
- *	ipc_remove	-	remove a bound IPC device
- *	@pdev: PCI device
- *
- *	In practice the SCU is not removable but this function is also
- *	called for each device on a module unload or cleanup which is the
- *	path that will get used.
- *
- *	Free up the mappings and release the PCI resources
- */
-static void ipc_remove(struct pci_dev *pdev)
-{
-	struct intel_scu_ipc_dev *scu = pci_get_drvdata(pdev);
-
-	mutex_lock(&ipclock);
-	scu->dev = NULL;
-	mutex_unlock(&ipclock);
-
-	iounmap(scu->i2c_base);
-	intel_scu_devices_destroy();
-}
-
 static const struct pci_device_id pci_ids[] = {
 	{
 		PCI_VDEVICE(INTEL, PCI_DEVICE_ID_LINCROFT),
@@ -650,17 +627,13 @@ static const struct pci_device_id pci_ids[] = {
 		0,
 	}
 };
-MODULE_DEVICE_TABLE(pci, pci_ids);
 
 static struct pci_driver ipc_driver = {
+	.driver = {
+		.suppress_bind_attrs = true,
+	},
 	.name = "intel_scu_ipc",
 	.id_table = pci_ids,
 	.probe = ipc_probe,
-	.remove = ipc_remove,
 };
-
-module_pci_driver(ipc_driver);
-
-MODULE_AUTHOR("Sreedhara DS <sreedhara.ds@intel.com>");
-MODULE_DESCRIPTION("Intel SCU IPC driver");
-MODULE_LICENSE("GPL");
+builtin_pci_driver(ipc_driver);
diff --git a/drivers/platform/x86/intel_telemetry_pltdrv.c b/drivers/platform/x86/intel_telemetry_pltdrv.c
index f97019b0106f..397119f83e82 100644
--- a/drivers/platform/x86/intel_telemetry_pltdrv.c
+++ b/drivers/platform/x86/intel_telemetry_pltdrv.c
@@ -1030,8 +1030,19 @@ static int telemetry_plt_set_trace_verbosity(enum telemetry_unit telem_unit,
 	switch (telem_unit) {
 	case TELEM_PSS:
 		ret = intel_punit_ipc_command(
+				IPC_PUNIT_BIOS_READ_TELE_TRACE_CTRL,
+				0, 0, NULL, &temp);
+		if (ret) {
+			pr_err("PSS TRACE_CTRL Read Failed\n");
+			goto out;
+		}
+
+		TELEM_CLEAR_VERBOSITY_BITS(temp);
+		TELEM_SET_VERBOSITY_BITS(temp, verbosity);
+
+		ret = intel_punit_ipc_command(
 				IPC_PUNIT_BIOS_WRITE_TELE_TRACE_CTRL,
-				0, 0, &verbosity, NULL);
+				0, 0, &temp, NULL);
 		if (ret) {
 			pr_err("PSS TRACE_CTRL Verbosity Set Failed\n");
 			goto out;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index a268a7abf8ab..e305ab541a22 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -6653,18 +6653,16 @@ static void __init tpacpi_detect_brightness_capabilities(void)
 	switch (b) {
 	case 16:
 		bright_maxlvl = 15;
-		pr_info("detected a 16-level brightness capable ThinkPad\n");
 		break;
 	case 8:
 	case 0:
 		bright_maxlvl = 7;
-		pr_info("detected a 8-level brightness capable ThinkPad\n");
 		break;
 	default:
-		pr_info("Unsupported brightness interface\n");
 		tp_features.bright_unkfw = 1;
 		bright_maxlvl = b - 1;
 	}
+	pr_debug("detected %u brightness levels\n", bright_maxlvl + 1);
 }
 
 static int __init brightness_init(struct ibm_init_struct *iibm)
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index 73833079bac8..df1f1a76a862 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -36,6 +36,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/proc_fs.h>
@@ -117,6 +118,7 @@ MODULE_LICENSE("GPL");
 #define HCI_LCD_BRIGHTNESS		0x002a
 #define HCI_WIRELESS			0x0056
 #define HCI_ACCELEROMETER		0x006d
+#define HCI_COOLING_METHOD		0x007f
 #define HCI_KBD_ILLUMINATION		0x0095
 #define HCI_ECO_MODE			0x0097
 #define HCI_ACCELEROMETER2		0x00a6
@@ -186,6 +188,7 @@ struct toshiba_acpi_dev {
 	int usbsc_bat_level;
 	int usbsc_mode_base;
 	int hotkey_event_type;
+	int max_cooling_method;
 
 	unsigned int illumination_supported:1;
 	unsigned int video_supported:1;
@@ -205,6 +208,7 @@ struct toshiba_acpi_dev {
 	unsigned int panel_power_on_supported:1;
 	unsigned int usb_three_supported:1;
 	unsigned int wwan_supported:1;
+	unsigned int cooling_method_supported:1;
 	unsigned int sysfs_created:1;
 	unsigned int special_functions;
 
@@ -217,6 +221,10 @@ struct toshiba_acpi_dev {
 
 static struct toshiba_acpi_dev *toshiba_acpi;
 
+static bool disable_hotkeys;
+module_param(disable_hotkeys, bool, 0444);
+MODULE_PARM_DESC(disable_hotkeys, "Disables the hotkeys activation");
+
 static const struct acpi_device_id toshiba_device_ids[] = {
 	{"TOS6200", 0},
 	{"TOS6207", 0},
@@ -1194,6 +1202,53 @@ static int toshiba_wwan_set(struct toshiba_acpi_dev *dev, u32 state)
 	return out[0] == TOS_SUCCESS ? 0 : -EIO;
 }
 
+/* Cooling Method */
+static void toshiba_cooling_method_available(struct toshiba_acpi_dev *dev)
+{
+	u32 in[TCI_WORDS] = { HCI_GET, HCI_COOLING_METHOD, 0, 0, 0, 0 };
+	u32 out[TCI_WORDS];
+	acpi_status status;
+
+	dev->cooling_method_supported = 0;
+	dev->max_cooling_method = 0;
+
+	status = tci_raw(dev, in, out);
+	if (ACPI_FAILURE(status))
+		pr_err("ACPI call to get Cooling Method failed\n");
+
+	if (out[0] != TOS_SUCCESS && out[0] != TOS_SUCCESS2)
+		return;
+
+	dev->cooling_method_supported = 1;
+	dev->max_cooling_method = out[3];
+}
+
+static int toshiba_cooling_method_get(struct toshiba_acpi_dev *dev, u32 *state)
+{
+	u32 result = hci_read(dev, HCI_COOLING_METHOD, state);
+
+	if (result == TOS_FAILURE)
+		pr_err("ACPI call to get Cooling Method failed\n");
+
+	if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
+
+	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
+}
+
+static int toshiba_cooling_method_set(struct toshiba_acpi_dev *dev, u32 state)
+{
+	u32 result = hci_write(dev, HCI_COOLING_METHOD, state);
+
+	if (result == TOS_FAILURE)
+		pr_err("ACPI call to get Cooling Method failed\n");
+
+	if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
+
+	return (result == TOS_SUCCESS || result == TOS_SUCCESS2) ? 0 : -EIO;
+}
+
 /* Transflective Backlight */
 static int get_tr_backlight_status(struct toshiba_acpi_dev *dev, u32 *status)
 {
@@ -2239,6 +2294,54 @@ static ssize_t usb_three_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(usb_three);
 
+static ssize_t cooling_method_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct toshiba_acpi_dev *toshiba = dev_get_drvdata(dev);
+	int state;
+	int ret;
+
+	ret = toshiba_cooling_method_get(toshiba, &state);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%d %d\n", state, toshiba->max_cooling_method);
+}
+
+static ssize_t cooling_method_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct toshiba_acpi_dev *toshiba = dev_get_drvdata(dev);
+	int state;
+	int ret;
+
+	ret = kstrtoint(buf, 0, &state);
+	if (ret)
+		return ret;
+
+	/*
+	 * Check for supported values
+	 * Depending on the laptop model, some only support these two:
+	 * 0 - Maximum Performance
+	 * 1 - Battery Optimized
+	 *
+	 * While some others support all three methods:
+	 * 0 - Maximum Performance
+	 * 1 - Performance
+	 * 2 - Battery Optimized
+	 */
+	if (state < 0 || state > toshiba->max_cooling_method)
+		return -EINVAL;
+
+	ret = toshiba_cooling_method_set(toshiba, state);
+	if (ret)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR_RW(cooling_method);
+
 static struct attribute *toshiba_attributes[] = {
 	&dev_attr_version.attr,
 	&dev_attr_fan.attr,
@@ -2255,6 +2358,7 @@ static struct attribute *toshiba_attributes[] = {
 	&dev_attr_kbd_function_keys.attr,
 	&dev_attr_panel_power_on.attr,
 	&dev_attr_usb_three.attr,
+	&dev_attr_cooling_method.attr,
 	NULL,
 };
 
@@ -2289,6 +2393,8 @@ static umode_t toshiba_sysfs_is_visible(struct kobject *kobj,
 		exists = (drv->panel_power_on_supported) ? true : false;
 	else if (attr == &dev_attr_usb_three.attr)
 		exists = (drv->usb_three_supported) ? true : false;
+	else if (attr == &dev_attr_cooling_method.attr)
+		exists = (drv->cooling_method_supported) ? true : false;
 
 	return exists ? attr->mode : 0;
 }
@@ -2591,6 +2697,11 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
 	acpi_handle ec_handle;
 	int error;
 
+	if (disable_hotkeys) {
+		pr_info("Hotkeys disabled by module parameter\n");
+		return 0;
+	}
+
 	if (wmi_has_guid(TOSHIBA_WMI_EVENT_GUID)) {
 		pr_info("WMI event detected, hotkeys will not be monitored\n");
 		return 0;
@@ -2779,6 +2890,8 @@ static void print_supported_features(struct toshiba_acpi_dev *dev)
 		pr_cont(" usb3");
 	if (dev->wwan_supported)
 		pr_cont(" wwan");
+	if (dev->cooling_method_supported)
+		pr_cont(" cooling-method");
 
 	pr_cont("\n");
 }
@@ -2963,6 +3076,8 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
 	if (dev->wwan_supported)
 		toshiba_acpi_setup_wwan_rfkill(dev);
 
+	toshiba_cooling_method_available(dev);
+
 	print_supported_features(dev);
 
 	ret = sysfs_create_group(&dev->acpi_dev->dev.kobj,
diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c
index 80994566a1c8..8986382718dd 100644
--- a/drivers/power/avs/rockchip-io-domain.c
+++ b/drivers/power/avs/rockchip-io-domain.c
@@ -47,6 +47,10 @@
 #define RK3368_SOC_CON15_FLASH0		BIT(14)
 #define RK3368_SOC_FLASH_SUPPLY_NUM	2
 
+#define RK3399_PMUGRF_CON0		0x180
+#define RK3399_PMUGRF_CON0_VSEL		BIT(8)
+#define RK3399_PMUGRF_VSEL_SUPPLY_NUM	9
+
 struct rockchip_iodomain;
 
 /**
@@ -181,6 +185,25 @@ static void rk3368_iodomain_init(struct rockchip_iodomain *iod)
 		dev_warn(iod->dev, "couldn't update flash0 ctrl\n");
 }
 
+static void rk3399_pmu_iodomain_init(struct rockchip_iodomain *iod)
+{
+	int ret;
+	u32 val;
+
+	/* if no pmu io supply we should leave things alone */
+	if (!iod->supplies[RK3399_PMUGRF_VSEL_SUPPLY_NUM].reg)
+		return;
+
+	/*
+	 * set pmu io iodomain to also use this framework
+	 * instead of a special gpio.
+	 */
+	val = RK3399_PMUGRF_CON0_VSEL | (RK3399_PMUGRF_CON0_VSEL << 16);
+	ret = regmap_write(iod->grf, RK3399_PMUGRF_CON0, val);
+	if (ret < 0)
+		dev_warn(iod->dev, "couldn't update pmu io iodomain ctrl\n");
+}
+
 /*
  * On the rk3188 the io-domains are handled by a shared register with the
  * lower 8 bits being still being continuing drive-strength settings.
@@ -252,6 +275,33 @@ static const struct rockchip_iodomain_soc_data soc_data_rk3368_pmu = {
 	},
 };
 
+static const struct rockchip_iodomain_soc_data soc_data_rk3399 = {
+	.grf_offset = 0xe640,
+	.supply_names = {
+		"bt656",		/* APIO2_VDD */
+		"audio",		/* APIO5_VDD */
+		"sdmmc",		/* SDMMC0_VDD */
+		"gpio1830",		/* APIO4_VDD */
+	},
+};
+
+static const struct rockchip_iodomain_soc_data soc_data_rk3399_pmu = {
+	.grf_offset = 0x180,
+	.supply_names = {
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		"pmu1830",		/* PMUIO2_VDD */
+	},
+	.init = rk3399_pmu_iodomain_init,
+};
+
 static const struct of_device_id rockchip_iodomain_match[] = {
 	{
 		.compatible = "rockchip,rk3188-io-voltage-domain",
@@ -269,6 +319,14 @@ static const struct of_device_id rockchip_iodomain_match[] = {
 		.compatible = "rockchip,rk3368-pmu-io-voltage-domain",
 		.data = (void *)&soc_data_rk3368_pmu
 	},
+	{
+		.compatible = "rockchip,rk3399-io-voltage-domain",
+		.data = (void *)&soc_data_rk3399
+	},
+	{
+		.compatible = "rockchip,rk3399-pmu-io-voltage-domain",
+		.data = (void *)&soc_data_rk3399_pmu
+	},
 	{ /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, rockchip_iodomain_match);
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 8cf0dae78555..c182efc62c7b 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -316,7 +316,7 @@ config PWM_RCAR
 
 config PWM_RENESAS_TPU
 	tristate "Renesas TPU PWM support"
-	depends on ARCH_SHMOBILE || COMPILE_TEST
+	depends on ARCH_RENESAS || COMPILE_TEST
 	depends on HAS_IOMEM
 	help
 	  This driver exposes the Timer Pulse Unit (TPU) PWM controller found
diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c
index 423ce087cd9c..5d5adee16886 100644
--- a/drivers/pwm/pwm-brcmstb.c
+++ b/drivers/pwm/pwm-brcmstb.c
@@ -274,8 +274,8 @@ static int brcmstb_pwm_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	p->base = devm_ioremap_resource(&pdev->dev, res);
-	if (!p->base) {
-		ret = -ENOMEM;
+	if (IS_ERR(p->base)) {
+		ret = PTR_ERR(p->base);
 		goto out_clk;
 	}
 
diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c
index 8a029f9bc18c..2fb30deee345 100644
--- a/drivers/pwm/pwm-img.c
+++ b/drivers/pwm/pwm-img.c
@@ -237,6 +237,11 @@ static int img_pwm_probe(struct platform_device *pdev)
 	}
 
 	clk_rate = clk_get_rate(pwm->pwm_clk);
+	if (!clk_rate) {
+		dev_err(&pdev->dev, "pwm clock has no frequency\n");
+		ret = -EINVAL;
+		goto disable_pwmclk;
+	}
 
 	/* The maximum input clock divider is 512 */
 	val = (u64)NSEC_PER_SEC * 512 * pwm->data->max_timebase;
diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c
index 9163085101bc..9861fed4e67d 100644
--- a/drivers/pwm/pwm-lpc18xx-sct.c
+++ b/drivers/pwm/pwm-lpc18xx-sct.c
@@ -360,6 +360,11 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev)
 	}
 
 	lpc18xx_pwm->clk_rate = clk_get_rate(lpc18xx_pwm->pwm_clk);
+	if (!lpc18xx_pwm->clk_rate) {
+		dev_err(&pdev->dev, "pwm clock has no frequency\n");
+		ret = -EINVAL;
+		goto disable_pwmclk;
+	}
 
 	mutex_init(&lpc18xx_pwm->res_lock);
 	mutex_init(&lpc18xx_pwm->period_lock);
diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c
index 826634ec0d5c..b7e6ecba7d5c 100644
--- a/drivers/pwm/pwm-omap-dmtimer.c
+++ b/drivers/pwm/pwm-omap-dmtimer.c
@@ -31,6 +31,7 @@
 #include <linux/time.h>
 
 #define DM_TIMER_LOAD_MIN 0xfffffffe
+#define DM_TIMER_MAX      0xffffffff
 
 struct pwm_omap_dmtimer_chip {
 	struct pwm_chip chip;
@@ -46,13 +47,9 @@ to_pwm_omap_dmtimer_chip(struct pwm_chip *chip)
 	return container_of(chip, struct pwm_omap_dmtimer_chip, chip);
 }
 
-static int pwm_omap_dmtimer_calc_value(unsigned long clk_rate, int ns)
+static u32 pwm_omap_dmtimer_get_clock_cycles(unsigned long clk_rate, int ns)
 {
-	u64 c = (u64)clk_rate * ns;
-
-	do_div(c, NSEC_PER_SEC);
-
-	return DM_TIMER_LOAD_MIN - c;
+	return DIV_ROUND_CLOSEST_ULL((u64)clk_rate * ns, NSEC_PER_SEC);
 }
 
 static void pwm_omap_dmtimer_start(struct pwm_omap_dmtimer_chip *omap)
@@ -99,12 +96,14 @@ static int pwm_omap_dmtimer_config(struct pwm_chip *chip,
 				   int duty_ns, int period_ns)
 {
 	struct pwm_omap_dmtimer_chip *omap = to_pwm_omap_dmtimer_chip(chip);
-	int load_value, match_value;
+	u32 period_cycles, duty_cycles;
+	u32 load_value, match_value;
 	struct clk *fclk;
 	unsigned long clk_rate;
 	bool timer_active;
 
-	dev_dbg(chip->dev, "duty cycle: %d, period %d\n", duty_ns, period_ns);
+	dev_dbg(chip->dev, "requested duty cycle: %d ns, period: %d ns\n",
+		duty_ns, period_ns);
 
 	mutex_lock(&omap->mutex);
 	if (duty_ns == pwm_get_duty_cycle(pwm) &&
@@ -117,15 +116,13 @@ static int pwm_omap_dmtimer_config(struct pwm_chip *chip,
 	fclk = omap->pdata->get_fclk(omap->dm_timer);
 	if (!fclk) {
 		dev_err(chip->dev, "invalid pmtimer fclk\n");
-		mutex_unlock(&omap->mutex);
-		return -EINVAL;
+		goto err_einval;
 	}
 
 	clk_rate = clk_get_rate(fclk);
 	if (!clk_rate) {
 		dev_err(chip->dev, "invalid pmtimer fclk rate\n");
-		mutex_unlock(&omap->mutex);
-		return -EINVAL;
+		goto err_einval;
 	}
 
 	dev_dbg(chip->dev, "clk rate: %luHz\n", clk_rate);
@@ -133,11 +130,51 @@ static int pwm_omap_dmtimer_config(struct pwm_chip *chip,
 	/*
 	 * Calculate the appropriate load and match values based on the
 	 * specified period and duty cycle. The load value determines the
-	 * cycle time and the match value determines the duty cycle.
+	 * period time and the match value determines the duty time.
+	 *
+	 * The period lasts for (DM_TIMER_MAX-load_value+1) clock cycles.
+	 * Similarly, the active time lasts (match_value-load_value+1) cycles.
+	 * The non-active time is the remainder: (DM_TIMER_MAX-match_value)
+	 * clock cycles.
+	 *
+	 * NOTE: It is required that: load_value <= match_value < DM_TIMER_MAX
+	 *
+	 * References:
+	 *   OMAP4430/60/70 TRM sections 22.2.4.10 and 22.2.4.11
+	 *   AM335x Sitara TRM sections 20.1.3.5 and 20.1.3.6
 	 */
-	load_value = pwm_omap_dmtimer_calc_value(clk_rate, period_ns);
-	match_value = pwm_omap_dmtimer_calc_value(clk_rate,
-						  period_ns - duty_ns);
+	period_cycles = pwm_omap_dmtimer_get_clock_cycles(clk_rate, period_ns);
+	duty_cycles = pwm_omap_dmtimer_get_clock_cycles(clk_rate, duty_ns);
+
+	if (period_cycles < 2) {
+		dev_info(chip->dev,
+			 "period %d ns too short for clock rate %lu Hz\n",
+			 period_ns, clk_rate);
+		goto err_einval;
+	}
+
+	if (duty_cycles < 1) {
+		dev_dbg(chip->dev,
+			"duty cycle %d ns is too short for clock rate %lu Hz\n",
+			duty_ns, clk_rate);
+		dev_dbg(chip->dev, "using minimum of 1 clock cycle\n");
+		duty_cycles = 1;
+	} else if (duty_cycles >= period_cycles) {
+		dev_dbg(chip->dev,
+			"duty cycle %d ns is too long for period %d ns at clock rate %lu Hz\n",
+			duty_ns, period_ns, clk_rate);
+		dev_dbg(chip->dev, "using maximum of 1 clock cycle less than period\n");
+		duty_cycles = period_cycles - 1;
+	}
+
+	dev_dbg(chip->dev, "effective duty cycle: %lld ns, period: %lld ns\n",
+		DIV_ROUND_CLOSEST_ULL((u64)NSEC_PER_SEC * duty_cycles,
+				      clk_rate),
+		DIV_ROUND_CLOSEST_ULL((u64)NSEC_PER_SEC * period_cycles,
+				      clk_rate));
+
+	load_value = (DM_TIMER_MAX - period_cycles) + 1;
+	match_value = load_value + duty_cycles - 1;
 
 	/*
 	 * We MUST stop the associated dual-mode timer before attempting to
@@ -166,6 +203,11 @@ static int pwm_omap_dmtimer_config(struct pwm_chip *chip,
 	mutex_unlock(&omap->mutex);
 
 	return 0;
+
+err_einval:
+	mutex_unlock(&omap->mutex);
+
+	return -EINVAL;
 }
 
 static int pwm_omap_dmtimer_set_polarity(struct pwm_chip *chip,
diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig
index 3e3be57e9a1a..b5a10d3c92c7 100644
--- a/drivers/rapidio/Kconfig
+++ b/drivers/rapidio/Kconfig
@@ -67,6 +67,14 @@ config RAPIDIO_ENUM_BASIC
 
 endchoice
 
+config RAPIDIO_MPORT_CDEV
+	tristate "RapidIO /dev mport device driver"
+	depends on RAPIDIO
+	help
+	  This option includes generic RapidIO mport device driver which
+	  allows to user space applications to perform RapidIO-specific
+	  operations through selected RapidIO mport.
+
 menu "RapidIO Switch drivers"
 	depends on RAPIDIO
 
diff --git a/drivers/rapidio/devices/Makefile b/drivers/rapidio/devices/Makefile
index 9432c494cf57..927dbf89592b 100644
--- a/drivers/rapidio/devices/Makefile
+++ b/drivers/rapidio/devices/Makefile
@@ -5,3 +5,4 @@
 obj-$(CONFIG_RAPIDIO_TSI721)	+= tsi721_mport.o
 tsi721_mport-y			:= tsi721.o
 tsi721_mport-$(CONFIG_RAPIDIO_DMA_ENGINE) += tsi721_dma.o
+obj-$(CONFIG_RAPIDIO_MPORT_CDEV) += rio_mport_cdev.o
diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
new file mode 100644
index 000000000000..9607bc826460
--- /dev/null
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -0,0 +1,2720 @@
+/*
+ * RapidIO mport character device
+ *
+ * Copyright 2014-2015 Integrated Device Technology, Inc.
+ *    Alexandre Bounine <alexandre.bounine@idt.com>
+ * Copyright 2014-2015 Prodrive Technologies
+ *    Andre van Herk <andre.van.herk@prodrive-technologies.com>
+ *    Jerry Jacobs <jerry.jacobs@prodrive-technologies.com>
+ * Copyright (C) 2014 Texas Instruments Incorporated
+ *    Aurelien Jacquiot <a-jacquiot@ti.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/cdev.h>
+#include <linux/ioctl.h>
+#include <linux/uaccess.h>
+#include <linux/list.h>
+#include <linux/fs.h>
+#include <linux/err.h>
+#include <linux/net.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/kfifo.h>
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/mman.h>
+
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+#include <linux/dmaengine.h>
+#endif
+
+#include <linux/rio.h>
+#include <linux/rio_ids.h>
+#include <linux/rio_drv.h>
+#include <linux/rio_mport_cdev.h>
+
+#include "../rio.h"
+
+#define DRV_NAME	"rio_mport"
+#define DRV_PREFIX	DRV_NAME ": "
+#define DEV_NAME	"rio_mport"
+#define DRV_VERSION     "1.0.0"
+
+/* Debug output filtering masks */
+enum {
+	DBG_NONE	= 0,
+	DBG_INIT	= BIT(0), /* driver init */
+	DBG_EXIT	= BIT(1), /* driver exit */
+	DBG_MPORT	= BIT(2), /* mport add/remove */
+	DBG_RDEV	= BIT(3), /* RapidIO device add/remove */
+	DBG_DMA		= BIT(4), /* DMA transfer messages */
+	DBG_MMAP	= BIT(5), /* mapping messages */
+	DBG_IBW		= BIT(6), /* inbound window */
+	DBG_EVENT	= BIT(7), /* event handling messages */
+	DBG_OBW		= BIT(8), /* outbound window messages */
+	DBG_DBELL	= BIT(9), /* doorbell messages */
+	DBG_ALL		= ~0,
+};
+
+#ifdef DEBUG
+#define rmcd_debug(level, fmt, arg...)		\
+	do {					\
+		if (DBG_##level & dbg_level)	\
+			pr_debug(DRV_PREFIX "%s: " fmt "\n", __func__, ##arg); \
+	} while (0)
+#else
+#define rmcd_debug(level, fmt, arg...) \
+		no_printk(KERN_DEBUG pr_fmt(DRV_PREFIX fmt "\n"), ##arg)
+#endif
+
+#define rmcd_warn(fmt, arg...) \
+	pr_warn(DRV_PREFIX "%s WARNING " fmt "\n", __func__, ##arg)
+
+#define rmcd_error(fmt, arg...) \
+	pr_err(DRV_PREFIX "%s ERROR " fmt "\n", __func__, ##arg)
+
+MODULE_AUTHOR("Jerry Jacobs <jerry.jacobs@prodrive-technologies.com>");
+MODULE_AUTHOR("Aurelien Jacquiot <a-jacquiot@ti.com>");
+MODULE_AUTHOR("Alexandre Bounine <alexandre.bounine@idt.com>");
+MODULE_AUTHOR("Andre van Herk <andre.van.herk@prodrive-technologies.com>");
+MODULE_DESCRIPTION("RapidIO mport character device driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static int dma_timeout = 3000; /* DMA transfer timeout in msec */
+module_param(dma_timeout, int, S_IRUGO);
+MODULE_PARM_DESC(dma_timeout, "DMA Transfer Timeout in msec (default: 3000)");
+
+#ifdef DEBUG
+static u32 dbg_level = DBG_NONE;
+module_param(dbg_level, uint, S_IWUSR | S_IWGRP | S_IRUGO);
+MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)");
+#endif
+
+/*
+ * An internal DMA coherent buffer
+ */
+struct mport_dma_buf {
+	void		*ib_base;
+	dma_addr_t	ib_phys;
+	u32		ib_size;
+	u64		ib_rio_base;
+	bool		ib_map;
+	struct file	*filp;
+};
+
+/*
+ * Internal memory mapping structure
+ */
+enum rio_mport_map_dir {
+	MAP_INBOUND,
+	MAP_OUTBOUND,
+	MAP_DMA,
+};
+
+struct rio_mport_mapping {
+	struct list_head node;
+	struct mport_dev *md;
+	enum rio_mport_map_dir dir;
+	u32 rioid;
+	u64 rio_addr;
+	dma_addr_t phys_addr; /* for mmap */
+	void *virt_addr; /* kernel address, for dma_free_coherent */
+	u64 size;
+	struct kref ref; /* refcount of vmas sharing the mapping */
+	struct file *filp;
+};
+
+struct rio_mport_dma_map {
+	int valid;
+	uint64_t length;
+	void *vaddr;
+	dma_addr_t paddr;
+};
+
+#define MPORT_MAX_DMA_BUFS	16
+#define MPORT_EVENT_DEPTH	10
+
+/*
+ * mport_dev  driver-specific structure that represents mport device
+ * @active    mport device status flag
+ * @node      list node to maintain list of registered mports
+ * @cdev      character device
+ * @dev       associated device object
+ * @mport     associated subsystem's master port device object
+ * @buf_mutex lock for buffer handling
+ * @file_mutex - lock for open files list
+ * @file_list  - list of open files on given mport
+ * @properties properties of this mport
+ * @portwrites queue of inbound portwrites
+ * @pw_lock    lock for port write queue
+ * @mappings   queue for memory mappings
+ * @dma_chan   DMA channels associated with this device
+ * @dma_ref:
+ * @comp:
+ */
+struct mport_dev {
+	atomic_t		active;
+	struct list_head	node;
+	struct cdev		cdev;
+	struct device		dev;
+	struct rio_mport	*mport;
+	struct mutex		buf_mutex;
+	struct mutex		file_mutex;
+	struct list_head	file_list;
+	struct rio_mport_properties	properties;
+	struct list_head		doorbells;
+	spinlock_t			db_lock;
+	struct list_head		portwrites;
+	spinlock_t			pw_lock;
+	struct list_head	mappings;
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct dma_chan *dma_chan;
+	struct kref	dma_ref;
+	struct completion comp;
+#endif
+};
+
+/*
+ * mport_cdev_priv - data structure specific to individual file object
+ *                   associated with an open device
+ * @md    master port character device object
+ * @async_queue - asynchronous notification queue
+ * @list - file objects tracking list
+ * @db_filters    inbound doorbell filters for this descriptor
+ * @pw_filters    portwrite filters for this descriptor
+ * @event_fifo    event fifo for this descriptor
+ * @event_rx_wait wait queue for this descriptor
+ * @fifo_lock     lock for event_fifo
+ * @event_mask    event mask for this descriptor
+ * @dmach DMA engine channel allocated for specific file object
+ */
+struct mport_cdev_priv {
+	struct mport_dev	*md;
+	struct fasync_struct	*async_queue;
+	struct list_head	list;
+	struct list_head	db_filters;
+	struct list_head        pw_filters;
+	struct kfifo            event_fifo;
+	wait_queue_head_t       event_rx_wait;
+	spinlock_t              fifo_lock;
+	unsigned int            event_mask; /* RIO_DOORBELL, RIO_PORTWRITE */
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct dma_chan		*dmach;
+	struct list_head	async_list;
+	struct list_head	pend_list;
+	spinlock_t              req_lock;
+	struct mutex		dma_lock;
+	struct kref		dma_ref;
+	struct completion	comp;
+#endif
+};
+
+/*
+ * rio_mport_pw_filter - structure to describe a portwrite filter
+ * md_node   node in mport device's list
+ * priv_node node in private file object's list
+ * priv      reference to private data
+ * filter    actual portwrite filter
+ */
+struct rio_mport_pw_filter {
+	struct list_head md_node;
+	struct list_head priv_node;
+	struct mport_cdev_priv *priv;
+	struct rio_pw_filter filter;
+};
+
+/*
+ * rio_mport_db_filter - structure to describe a doorbell filter
+ * @data_node reference to device node
+ * @priv_node node in private data
+ * @priv      reference to private data
+ * @filter    actual doorbell filter
+ */
+struct rio_mport_db_filter {
+	struct list_head data_node;
+	struct list_head priv_node;
+	struct mport_cdev_priv *priv;
+	struct rio_doorbell_filter filter;
+};
+
+static LIST_HEAD(mport_devs);
+static DEFINE_MUTEX(mport_devs_lock);
+
+#if (0) /* used by commented out portion of poll function : FIXME */
+static DECLARE_WAIT_QUEUE_HEAD(mport_cdev_wait);
+#endif
+
+static struct class *dev_class;
+static dev_t dev_number;
+
+static struct workqueue_struct *dma_wq;
+
+static void mport_release_mapping(struct kref *ref);
+
+static int rio_mport_maint_rd(struct mport_cdev_priv *priv, void __user *arg,
+			      int local)
+{
+	struct rio_mport *mport = priv->md->mport;
+	struct rio_mport_maint_io maint_io;
+	u32 *buffer;
+	u32 offset;
+	size_t length;
+	int ret, i;
+
+	if (unlikely(copy_from_user(&maint_io, arg, sizeof(maint_io))))
+		return -EFAULT;
+
+	if ((maint_io.offset % 4) ||
+	    (maint_io.length == 0) || (maint_io.length % 4))
+		return -EINVAL;
+
+	buffer = vmalloc(maint_io.length);
+	if (buffer == NULL)
+		return -ENOMEM;
+	length = maint_io.length/sizeof(u32);
+	offset = maint_io.offset;
+
+	for (i = 0; i < length; i++) {
+		if (local)
+			ret = __rio_local_read_config_32(mport,
+				offset, &buffer[i]);
+		else
+			ret = rio_mport_read_config_32(mport, maint_io.rioid,
+				maint_io.hopcount, offset, &buffer[i]);
+		if (ret)
+			goto out;
+
+		offset += 4;
+	}
+
+	if (unlikely(copy_to_user(maint_io.buffer, buffer, maint_io.length)))
+		ret = -EFAULT;
+out:
+	vfree(buffer);
+	return ret;
+}
+
+static int rio_mport_maint_wr(struct mport_cdev_priv *priv, void __user *arg,
+			      int local)
+{
+	struct rio_mport *mport = priv->md->mport;
+	struct rio_mport_maint_io maint_io;
+	u32 *buffer;
+	u32 offset;
+	size_t length;
+	int ret = -EINVAL, i;
+
+	if (unlikely(copy_from_user(&maint_io, arg, sizeof(maint_io))))
+		return -EFAULT;
+
+	if ((maint_io.offset % 4) ||
+	    (maint_io.length == 0) || (maint_io.length % 4))
+		return -EINVAL;
+
+	buffer = vmalloc(maint_io.length);
+	if (buffer == NULL)
+		return -ENOMEM;
+	length = maint_io.length;
+
+	if (unlikely(copy_from_user(buffer, maint_io.buffer, length))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	offset = maint_io.offset;
+	length /= sizeof(u32);
+
+	for (i = 0; i < length; i++) {
+		if (local)
+			ret = __rio_local_write_config_32(mport,
+							  offset, buffer[i]);
+		else
+			ret = rio_mport_write_config_32(mport, maint_io.rioid,
+							maint_io.hopcount,
+							offset, buffer[i]);
+		if (ret)
+			goto out;
+
+		offset += 4;
+	}
+
+out:
+	vfree(buffer);
+	return ret;
+}
+
+
+/*
+ * Inbound/outbound memory mapping functions
+ */
+static int
+rio_mport_create_outbound_mapping(struct mport_dev *md, struct file *filp,
+				  u32 rioid, u64 raddr, u32 size,
+				  dma_addr_t *paddr)
+{
+	struct rio_mport *mport = md->mport;
+	struct rio_mport_mapping *map;
+	int ret;
+
+	rmcd_debug(OBW, "did=%d ra=0x%llx sz=0x%x", rioid, raddr, size);
+
+	map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	ret = rio_map_outb_region(mport, rioid, raddr, size, 0, paddr);
+	if (ret < 0)
+		goto err_map_outb;
+
+	map->dir = MAP_OUTBOUND;
+	map->rioid = rioid;
+	map->rio_addr = raddr;
+	map->size = size;
+	map->phys_addr = *paddr;
+	map->filp = filp;
+	map->md = md;
+	kref_init(&map->ref);
+	list_add_tail(&map->node, &md->mappings);
+	return 0;
+err_map_outb:
+	kfree(map);
+	return ret;
+}
+
+static int
+rio_mport_get_outbound_mapping(struct mport_dev *md, struct file *filp,
+			       u32 rioid, u64 raddr, u32 size,
+			       dma_addr_t *paddr)
+{
+	struct rio_mport_mapping *map;
+	int err = -ENOMEM;
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry(map, &md->mappings, node) {
+		if (map->dir != MAP_OUTBOUND)
+			continue;
+		if (rioid == map->rioid &&
+		    raddr == map->rio_addr && size == map->size) {
+			*paddr = map->phys_addr;
+			err = 0;
+			break;
+		} else if (rioid == map->rioid &&
+			   raddr < (map->rio_addr + map->size - 1) &&
+			   (raddr + size) > map->rio_addr) {
+			err = -EBUSY;
+			break;
+		}
+	}
+
+	/* If not found, create new */
+	if (err == -ENOMEM)
+		err = rio_mport_create_outbound_mapping(md, filp, rioid, raddr,
+						size, paddr);
+	mutex_unlock(&md->buf_mutex);
+	return err;
+}
+
+static int rio_mport_obw_map(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *data = priv->md;
+	struct rio_mmap map;
+	dma_addr_t paddr;
+	int ret;
+
+	if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_mmap))))
+		return -EFAULT;
+
+	rmcd_debug(OBW, "did=%d ra=0x%llx sz=0x%llx",
+		   map.rioid, map.rio_addr, map.length);
+
+	ret = rio_mport_get_outbound_mapping(data, filp, map.rioid,
+					     map.rio_addr, map.length, &paddr);
+	if (ret < 0) {
+		rmcd_error("Failed to set OBW err= %d", ret);
+		return ret;
+	}
+
+	map.handle = paddr;
+
+	if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_mmap))))
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * rio_mport_obw_free() - unmap an OutBound Window from RapidIO address space
+ *
+ * @priv: driver private data
+ * @arg:  buffer handle returned by allocation routine
+ */
+static int rio_mport_obw_free(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	u64 handle;
+	struct rio_mport_mapping *map, *_map;
+
+	if (!md->mport->ops->unmap_outb)
+		return -EPROTONOSUPPORT;
+
+	if (copy_from_user(&handle, arg, sizeof(u64)))
+		return -EFAULT;
+
+	rmcd_debug(OBW, "h=0x%llx", handle);
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		if (map->dir == MAP_OUTBOUND && map->phys_addr == handle) {
+			if (map->filp == filp) {
+				rmcd_debug(OBW, "kref_put h=0x%llx", handle);
+				map->filp = NULL;
+				kref_put(&map->ref, mport_release_mapping);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	return 0;
+}
+
+/*
+ * maint_hdid_set() - Set the host Device ID
+ * @priv: driver private data
+ * @arg:	Device Id
+ */
+static int maint_hdid_set(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	uint16_t hdid;
+
+	if (copy_from_user(&hdid, arg, sizeof(uint16_t)))
+		return -EFAULT;
+
+	md->mport->host_deviceid = hdid;
+	md->properties.hdid = hdid;
+	rio_local_set_device_id(md->mport, hdid);
+
+	rmcd_debug(MPORT, "Set host device Id to %d", hdid);
+
+	return 0;
+}
+
+/*
+ * maint_comptag_set() - Set the host Component Tag
+ * @priv: driver private data
+ * @arg:	Component Tag
+ */
+static int maint_comptag_set(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	uint32_t comptag;
+
+	if (copy_from_user(&comptag, arg, sizeof(uint32_t)))
+		return -EFAULT;
+
+	rio_local_write_config_32(md->mport, RIO_COMPONENT_TAG_CSR, comptag);
+
+	rmcd_debug(MPORT, "Set host Component Tag to %d", comptag);
+
+	return 0;
+}
+
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+
+struct mport_dma_req {
+	struct list_head node;
+	struct file *filp;
+	struct mport_cdev_priv *priv;
+	enum rio_transfer_sync sync;
+	struct sg_table sgt;
+	struct page **page_list;
+	unsigned int nr_pages;
+	struct rio_mport_mapping *map;
+	struct dma_chan *dmach;
+	enum dma_data_direction dir;
+	dma_cookie_t cookie;
+	enum dma_status	status;
+	struct completion req_comp;
+};
+
+struct mport_faf_work {
+	struct work_struct work;
+	struct mport_dma_req *req;
+};
+
+static void mport_release_def_dma(struct kref *dma_ref)
+{
+	struct mport_dev *md =
+			container_of(dma_ref, struct mport_dev, dma_ref);
+
+	rmcd_debug(EXIT, "DMA_%d", md->dma_chan->chan_id);
+	rio_release_dma(md->dma_chan);
+	md->dma_chan = NULL;
+}
+
+static void mport_release_dma(struct kref *dma_ref)
+{
+	struct mport_cdev_priv *priv =
+			container_of(dma_ref, struct mport_cdev_priv, dma_ref);
+
+	rmcd_debug(EXIT, "DMA_%d", priv->dmach->chan_id);
+	complete(&priv->comp);
+}
+
+static void dma_req_free(struct mport_dma_req *req)
+{
+	struct mport_cdev_priv *priv = req->priv;
+	unsigned int i;
+
+	dma_unmap_sg(req->dmach->device->dev,
+		     req->sgt.sgl, req->sgt.nents, req->dir);
+	sg_free_table(&req->sgt);
+	if (req->page_list) {
+		for (i = 0; i < req->nr_pages; i++)
+			put_page(req->page_list[i]);
+		kfree(req->page_list);
+	}
+
+	if (req->map) {
+		mutex_lock(&req->map->md->buf_mutex);
+		kref_put(&req->map->ref, mport_release_mapping);
+		mutex_unlock(&req->map->md->buf_mutex);
+	}
+
+	kref_put(&priv->dma_ref, mport_release_dma);
+
+	kfree(req);
+}
+
+static void dma_xfer_callback(void *param)
+{
+	struct mport_dma_req *req = (struct mport_dma_req *)param;
+	struct mport_cdev_priv *priv = req->priv;
+
+	req->status = dma_async_is_tx_complete(priv->dmach, req->cookie,
+					       NULL, NULL);
+	complete(&req->req_comp);
+}
+
+static void dma_faf_cleanup(struct work_struct *_work)
+{
+	struct mport_faf_work *work = container_of(_work,
+						struct mport_faf_work, work);
+	struct mport_dma_req *req = work->req;
+
+	dma_req_free(req);
+	kfree(work);
+}
+
+static void dma_faf_callback(void *param)
+{
+	struct mport_dma_req *req = (struct mport_dma_req *)param;
+	struct mport_faf_work *work;
+
+	work = kmalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return;
+
+	INIT_WORK(&work->work, dma_faf_cleanup);
+	work->req = req;
+	queue_work(dma_wq, &work->work);
+}
+
+/*
+ * prep_dma_xfer() - Configure and send request to DMAengine to prepare DMA
+ *                   transfer object.
+ * Returns pointer to DMA transaction descriptor allocated by DMA driver on
+ * success or ERR_PTR (and/or NULL) if failed. Caller must check returned
+ * non-NULL pointer using IS_ERR macro.
+ */
+static struct dma_async_tx_descriptor
+*prep_dma_xfer(struct dma_chan *chan, struct rio_transfer_io *transfer,
+	struct sg_table *sgt, int nents, enum dma_transfer_direction dir,
+	enum dma_ctrl_flags flags)
+{
+	struct rio_dma_data tx_data;
+
+	tx_data.sg = sgt->sgl;
+	tx_data.sg_len = nents;
+	tx_data.rio_addr_u = 0;
+	tx_data.rio_addr = transfer->rio_addr;
+	if (dir == DMA_MEM_TO_DEV) {
+		switch (transfer->method) {
+		case RIO_EXCHANGE_NWRITE:
+			tx_data.wr_type = RDW_ALL_NWRITE;
+			break;
+		case RIO_EXCHANGE_NWRITE_R_ALL:
+			tx_data.wr_type = RDW_ALL_NWRITE_R;
+			break;
+		case RIO_EXCHANGE_NWRITE_R:
+			tx_data.wr_type = RDW_LAST_NWRITE_R;
+			break;
+		case RIO_EXCHANGE_DEFAULT:
+			tx_data.wr_type = RDW_DEFAULT;
+			break;
+		default:
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	return rio_dma_prep_xfer(chan, transfer->rioid, &tx_data, dir, flags);
+}
+
+/* Request DMA channel associated with this mport device.
+ * Try to request DMA channel for every new process that opened given
+ * mport. If a new DMA channel is not available use default channel
+ * which is the first DMA channel opened on mport device.
+ */
+static int get_dma_channel(struct mport_cdev_priv *priv)
+{
+	mutex_lock(&priv->dma_lock);
+	if (!priv->dmach) {
+		priv->dmach = rio_request_mport_dma(priv->md->mport);
+		if (!priv->dmach) {
+			/* Use default DMA channel if available */
+			if (priv->md->dma_chan) {
+				priv->dmach = priv->md->dma_chan;
+				kref_get(&priv->md->dma_ref);
+			} else {
+				rmcd_error("Failed to get DMA channel");
+				mutex_unlock(&priv->dma_lock);
+				return -ENODEV;
+			}
+		} else if (!priv->md->dma_chan) {
+			/* Register default DMA channel if we do not have one */
+			priv->md->dma_chan = priv->dmach;
+			kref_init(&priv->md->dma_ref);
+			rmcd_debug(DMA, "Register DMA_chan %d as default",
+				   priv->dmach->chan_id);
+		}
+
+		kref_init(&priv->dma_ref);
+		init_completion(&priv->comp);
+	}
+
+	kref_get(&priv->dma_ref);
+	mutex_unlock(&priv->dma_lock);
+	return 0;
+}
+
+static void put_dma_channel(struct mport_cdev_priv *priv)
+{
+	kref_put(&priv->dma_ref, mport_release_dma);
+}
+
+/*
+ * DMA transfer functions
+ */
+static int do_dma_request(struct mport_dma_req *req,
+			  struct rio_transfer_io *xfer,
+			  enum rio_transfer_sync sync, int nents)
+{
+	struct mport_cdev_priv *priv;
+	struct sg_table *sgt;
+	struct dma_chan *chan;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+	unsigned long tmo = msecs_to_jiffies(dma_timeout);
+	enum dma_transfer_direction dir;
+	long wret;
+	int ret = 0;
+
+	priv = req->priv;
+	sgt = &req->sgt;
+
+	chan = priv->dmach;
+	dir = (req->dir == DMA_FROM_DEVICE) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
+
+	rmcd_debug(DMA, "%s(%d) uses %s for DMA_%s",
+		   current->comm, task_pid_nr(current),
+		   dev_name(&chan->dev->device),
+		   (dir == DMA_DEV_TO_MEM)?"READ":"WRITE");
+
+	/* Initialize DMA transaction request */
+	tx = prep_dma_xfer(chan, xfer, sgt, nents, dir,
+			   DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+
+	if (!tx) {
+		rmcd_debug(DMA, "prep error for %s A:0x%llx L:0x%llx",
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE",
+			xfer->rio_addr, xfer->length);
+		ret = -EIO;
+		goto err_out;
+	} else if (IS_ERR(tx)) {
+		ret = PTR_ERR(tx);
+		rmcd_debug(DMA, "prep error %d for %s A:0x%llx L:0x%llx", ret,
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE",
+			xfer->rio_addr, xfer->length);
+		goto err_out;
+	}
+
+	if (sync == RIO_TRANSFER_FAF)
+		tx->callback = dma_faf_callback;
+	else
+		tx->callback = dma_xfer_callback;
+	tx->callback_param = req;
+
+	req->dmach = chan;
+	req->sync = sync;
+	req->status = DMA_IN_PROGRESS;
+	init_completion(&req->req_comp);
+
+	cookie = dmaengine_submit(tx);
+	req->cookie = cookie;
+
+	rmcd_debug(DMA, "pid=%d DMA_%s tx_cookie = %d", task_pid_nr(current),
+		   (dir == DMA_DEV_TO_MEM)?"READ":"WRITE", cookie);
+
+	if (dma_submit_error(cookie)) {
+		rmcd_error("submit err=%d (addr:0x%llx len:0x%llx)",
+			   cookie, xfer->rio_addr, xfer->length);
+		ret = -EIO;
+		goto err_out;
+	}
+
+	dma_async_issue_pending(chan);
+
+	if (sync == RIO_TRANSFER_ASYNC) {
+		spin_lock(&priv->req_lock);
+		list_add_tail(&req->node, &priv->async_list);
+		spin_unlock(&priv->req_lock);
+		return cookie;
+	} else if (sync == RIO_TRANSFER_FAF)
+		return 0;
+
+	wret = wait_for_completion_interruptible_timeout(&req->req_comp, tmo);
+
+	if (wret == 0) {
+		/* Timeout on wait occurred */
+		rmcd_error("%s(%d) timed out waiting for DMA_%s %d",
+		       current->comm, task_pid_nr(current),
+		       (dir == DMA_DEV_TO_MEM)?"READ":"WRITE", cookie);
+		return -ETIMEDOUT;
+	} else if (wret == -ERESTARTSYS) {
+		/* Wait_for_completion was interrupted by a signal but DMA may
+		 * be in progress
+		 */
+		rmcd_error("%s(%d) wait for DMA_%s %d was interrupted",
+			current->comm, task_pid_nr(current),
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE", cookie);
+		return -EINTR;
+	}
+
+	if (req->status != DMA_COMPLETE) {
+		/* DMA transaction completion was signaled with error */
+		rmcd_error("%s(%d) DMA_%s %d completed with status %d (ret=%d)",
+			current->comm, task_pid_nr(current),
+			(dir == DMA_DEV_TO_MEM)?"READ":"WRITE",
+			cookie, req->status, ret);
+		ret = -EIO;
+	}
+
+err_out:
+	return ret;
+}
+
+/*
+ * rio_dma_transfer() - Perform RapidIO DMA data transfer to/from
+ *                      the remote RapidIO device
+ * @filp: file pointer associated with the call
+ * @transfer_mode: DMA transfer mode
+ * @sync: synchronization mode
+ * @dir: DMA transfer direction (DMA_MEM_TO_DEV = write OR
+ *                               DMA_DEV_TO_MEM = read)
+ * @xfer: data transfer descriptor structure
+ */
+static int
+rio_dma_transfer(struct file *filp, uint32_t transfer_mode,
+		 enum rio_transfer_sync sync, enum dma_data_direction dir,
+		 struct rio_transfer_io *xfer)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	unsigned long nr_pages = 0;
+	struct page **page_list = NULL;
+	struct mport_dma_req *req;
+	struct mport_dev *md = priv->md;
+	struct dma_chan *chan;
+	int i, ret;
+	int nents;
+
+	if (xfer->length == 0)
+		return -EINVAL;
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	ret = get_dma_channel(priv);
+	if (ret) {
+		kfree(req);
+		return ret;
+	}
+
+	/*
+	 * If parameter loc_addr != NULL, we are transferring data from/to
+	 * data buffer allocated in user-space: lock in memory user-space
+	 * buffer pages and build an SG table for DMA transfer request
+	 *
+	 * Otherwise (loc_addr == NULL) contiguous kernel-space buffer is
+	 * used for DMA data transfers: build single entry SG table using
+	 * offset within the internal buffer specified by handle parameter.
+	 */
+	if (xfer->loc_addr) {
+		unsigned long offset;
+		long pinned;
+
+		offset = (unsigned long)xfer->loc_addr & ~PAGE_MASK;
+		nr_pages = PAGE_ALIGN(xfer->length + offset) >> PAGE_SHIFT;
+
+		page_list = kmalloc_array(nr_pages,
+					  sizeof(*page_list), GFP_KERNEL);
+		if (page_list == NULL) {
+			ret = -ENOMEM;
+			goto err_req;
+		}
+
+		down_read(&current->mm->mmap_sem);
+		pinned = get_user_pages(current, current->mm,
+				(unsigned long)xfer->loc_addr & PAGE_MASK,
+				nr_pages, dir == DMA_FROM_DEVICE, 0,
+				page_list, NULL);
+		up_read(&current->mm->mmap_sem);
+
+		if (pinned != nr_pages) {
+			if (pinned < 0) {
+				rmcd_error("get_user_pages err=%ld", pinned);
+				nr_pages = 0;
+			} else
+				rmcd_error("pinned %ld out of %ld pages",
+					   pinned, nr_pages);
+			ret = -EFAULT;
+			goto err_pg;
+		}
+
+		ret = sg_alloc_table_from_pages(&req->sgt, page_list, nr_pages,
+					offset, xfer->length, GFP_KERNEL);
+		if (ret) {
+			rmcd_error("sg_alloc_table failed with err=%d", ret);
+			goto err_pg;
+		}
+
+		req->page_list = page_list;
+		req->nr_pages = nr_pages;
+	} else {
+		dma_addr_t baddr;
+		struct rio_mport_mapping *map;
+
+		baddr = (dma_addr_t)xfer->handle;
+
+		mutex_lock(&md->buf_mutex);
+		list_for_each_entry(map, &md->mappings, node) {
+			if (baddr >= map->phys_addr &&
+			    baddr < (map->phys_addr + map->size)) {
+				kref_get(&map->ref);
+				req->map = map;
+				break;
+			}
+		}
+		mutex_unlock(&md->buf_mutex);
+
+		if (req->map == NULL) {
+			ret = -ENOMEM;
+			goto err_req;
+		}
+
+		if (xfer->length + xfer->offset > map->size) {
+			ret = -EINVAL;
+			goto err_req;
+		}
+
+		ret = sg_alloc_table(&req->sgt, 1, GFP_KERNEL);
+		if (unlikely(ret)) {
+			rmcd_error("sg_alloc_table failed for internal buf");
+			goto err_req;
+		}
+
+		sg_set_buf(req->sgt.sgl,
+			   map->virt_addr + (baddr - map->phys_addr) +
+				xfer->offset, xfer->length);
+	}
+
+	req->dir = dir;
+	req->filp = filp;
+	req->priv = priv;
+	chan = priv->dmach;
+
+	nents = dma_map_sg(chan->device->dev,
+			   req->sgt.sgl, req->sgt.nents, dir);
+	if (nents == -EFAULT) {
+		rmcd_error("Failed to map SG list");
+		return -EFAULT;
+	}
+
+	ret = do_dma_request(req, xfer, sync, nents);
+
+	if (ret >= 0) {
+		if (sync == RIO_TRANSFER_SYNC)
+			goto sync_out;
+		return ret; /* return ASYNC cookie */
+	}
+
+	if (ret == -ETIMEDOUT || ret == -EINTR) {
+		/*
+		 * This can happen only in case of SYNC transfer.
+		 * Do not free unfinished request structure immediately.
+		 * Place it into pending list and deal with it later
+		 */
+		spin_lock(&priv->req_lock);
+		list_add_tail(&req->node, &priv->pend_list);
+		spin_unlock(&priv->req_lock);
+		return ret;
+	}
+
+
+	rmcd_debug(DMA, "do_dma_request failed with err=%d", ret);
+sync_out:
+	dma_unmap_sg(chan->device->dev, req->sgt.sgl, req->sgt.nents, dir);
+	sg_free_table(&req->sgt);
+err_pg:
+	if (page_list) {
+		for (i = 0; i < nr_pages; i++)
+			put_page(page_list[i]);
+		kfree(page_list);
+	}
+err_req:
+	if (req->map) {
+		mutex_lock(&md->buf_mutex);
+		kref_put(&req->map->ref, mport_release_mapping);
+		mutex_unlock(&md->buf_mutex);
+	}
+	put_dma_channel(priv);
+	kfree(req);
+	return ret;
+}
+
+static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct rio_transaction transaction;
+	struct rio_transfer_io *transfer;
+	enum dma_data_direction dir;
+	int i, ret = 0;
+
+	if (unlikely(copy_from_user(&transaction, arg, sizeof(transaction))))
+		return -EFAULT;
+
+	if (transaction.count != 1)
+		return -EINVAL;
+
+	if ((transaction.transfer_mode &
+	     priv->md->properties.transfer_mode) == 0)
+		return -ENODEV;
+
+	transfer = vmalloc(transaction.count * sizeof(struct rio_transfer_io));
+	if (!transfer)
+		return -ENOMEM;
+
+	if (unlikely(copy_from_user(transfer, transaction.block,
+	      transaction.count * sizeof(struct rio_transfer_io)))) {
+		ret = -EFAULT;
+		goto out_free;
+	}
+
+	dir = (transaction.dir == RIO_TRANSFER_DIR_READ) ?
+					DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	for (i = 0; i < transaction.count && ret == 0; i++)
+		ret = rio_dma_transfer(filp, transaction.transfer_mode,
+			transaction.sync, dir, &transfer[i]);
+
+	if (unlikely(copy_to_user(transaction.block, transfer,
+	      transaction.count * sizeof(struct rio_transfer_io))))
+		ret = -EFAULT;
+
+out_free:
+	vfree(transfer);
+
+	return ret;
+}
+
+static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv;
+	struct mport_dev *md;
+	struct rio_async_tx_wait w_param;
+	struct mport_dma_req *req;
+	dma_cookie_t cookie;
+	unsigned long tmo;
+	long wret;
+	int found = 0;
+	int ret;
+
+	priv = (struct mport_cdev_priv *)filp->private_data;
+	md = priv->md;
+
+	if (unlikely(copy_from_user(&w_param, arg, sizeof(w_param))))
+		return -EFAULT;
+
+	cookie = w_param.token;
+	if (w_param.timeout)
+		tmo = msecs_to_jiffies(w_param.timeout);
+	else /* Use default DMA timeout */
+		tmo = msecs_to_jiffies(dma_timeout);
+
+	spin_lock(&priv->req_lock);
+	list_for_each_entry(req, &priv->async_list, node) {
+		if (req->cookie == cookie) {
+			list_del(&req->node);
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock(&priv->req_lock);
+
+	if (!found)
+		return -EAGAIN;
+
+	wret = wait_for_completion_interruptible_timeout(&req->req_comp, tmo);
+
+	if (wret == 0) {
+		/* Timeout on wait occurred */
+		rmcd_error("%s(%d) timed out waiting for ASYNC DMA_%s",
+		       current->comm, task_pid_nr(current),
+		       (req->dir == DMA_FROM_DEVICE)?"READ":"WRITE");
+		ret = -ETIMEDOUT;
+		goto err_tmo;
+	} else if (wret == -ERESTARTSYS) {
+		/* Wait_for_completion was interrupted by a signal but DMA may
+		 * be still in progress
+		 */
+		rmcd_error("%s(%d) wait for ASYNC DMA_%s was interrupted",
+			current->comm, task_pid_nr(current),
+			(req->dir == DMA_FROM_DEVICE)?"READ":"WRITE");
+		ret = -EINTR;
+		goto err_tmo;
+	}
+
+	if (req->status != DMA_COMPLETE) {
+		/* DMA transaction completion signaled with transfer error */
+		rmcd_error("%s(%d) ASYNC DMA_%s completion with status %d",
+			current->comm, task_pid_nr(current),
+			(req->dir == DMA_FROM_DEVICE)?"READ":"WRITE",
+			req->status);
+		ret = -EIO;
+	} else
+		ret = 0;
+
+	if (req->status != DMA_IN_PROGRESS && req->status != DMA_PAUSED)
+		dma_req_free(req);
+
+	return ret;
+
+err_tmo:
+	/* Return request back into async queue */
+	spin_lock(&priv->req_lock);
+	list_add_tail(&req->node, &priv->async_list);
+	spin_unlock(&priv->req_lock);
+	return ret;
+}
+
+static int rio_mport_create_dma_mapping(struct mport_dev *md, struct file *filp,
+			uint64_t size, struct rio_mport_mapping **mapping)
+{
+	struct rio_mport_mapping *map;
+
+	map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->virt_addr = dma_alloc_coherent(md->mport->dev.parent, size,
+					    &map->phys_addr, GFP_KERNEL);
+	if (map->virt_addr == NULL) {
+		kfree(map);
+		return -ENOMEM;
+	}
+
+	map->dir = MAP_DMA;
+	map->size = size;
+	map->filp = filp;
+	map->md = md;
+	kref_init(&map->ref);
+	mutex_lock(&md->buf_mutex);
+	list_add_tail(&map->node, &md->mappings);
+	mutex_unlock(&md->buf_mutex);
+	*mapping = map;
+
+	return 0;
+}
+
+static int rio_mport_alloc_dma(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	struct rio_dma_mem map;
+	struct rio_mport_mapping *mapping = NULL;
+	int ret;
+
+	if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_dma_mem))))
+		return -EFAULT;
+
+	ret = rio_mport_create_dma_mapping(md, filp, map.length, &mapping);
+	if (ret)
+		return ret;
+
+	map.dma_handle = mapping->phys_addr;
+
+	if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_dma_mem)))) {
+		mutex_lock(&md->buf_mutex);
+		kref_put(&mapping->ref, mport_release_mapping);
+		mutex_unlock(&md->buf_mutex);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int rio_mport_free_dma(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	u64 handle;
+	int ret = -EFAULT;
+	struct rio_mport_mapping *map, *_map;
+
+	if (copy_from_user(&handle, arg, sizeof(u64)))
+		return -EFAULT;
+	rmcd_debug(EXIT, "filp=%p", filp);
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		if (map->dir == MAP_DMA && map->phys_addr == handle &&
+		    map->filp == filp) {
+			kref_put(&map->ref, mport_release_mapping);
+			ret = 0;
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (ret == -EFAULT) {
+		rmcd_debug(DMA, "ERR no matching mapping");
+		return ret;
+	}
+
+	return 0;
+}
+#else
+static int rio_mport_transfer_ioctl(struct file *filp, void *arg)
+{
+	return -ENODEV;
+}
+
+static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
+{
+	return -ENODEV;
+}
+
+static int rio_mport_alloc_dma(struct file *filp, void __user *arg)
+{
+	return -ENODEV;
+}
+
+static int rio_mport_free_dma(struct file *filp, void __user *arg)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
+
+/*
+ * Inbound/outbound memory mapping functions
+ */
+
+static int
+rio_mport_create_inbound_mapping(struct mport_dev *md, struct file *filp,
+				u64 raddr, u32 size,
+				struct rio_mport_mapping **mapping)
+{
+	struct rio_mport *mport = md->mport;
+	struct rio_mport_mapping *map;
+	int ret;
+
+	map = kzalloc(sizeof(struct rio_mport_mapping), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	map->virt_addr = dma_alloc_coherent(mport->dev.parent, size,
+					    &map->phys_addr, GFP_KERNEL);
+	if (map->virt_addr == NULL) {
+		ret = -ENOMEM;
+		goto err_dma_alloc;
+	}
+
+	if (raddr == RIO_MAP_ANY_ADDR)
+		raddr = map->phys_addr;
+	ret = rio_map_inb_region(mport, map->phys_addr, raddr, size, 0);
+	if (ret < 0)
+		goto err_map_inb;
+
+	map->dir = MAP_INBOUND;
+	map->rio_addr = raddr;
+	map->size = size;
+	map->filp = filp;
+	map->md = md;
+	kref_init(&map->ref);
+	mutex_lock(&md->buf_mutex);
+	list_add_tail(&map->node, &md->mappings);
+	mutex_unlock(&md->buf_mutex);
+	*mapping = map;
+	return 0;
+
+err_map_inb:
+	dma_free_coherent(mport->dev.parent, size,
+			  map->virt_addr, map->phys_addr);
+err_dma_alloc:
+	kfree(map);
+	return ret;
+}
+
+static int
+rio_mport_get_inbound_mapping(struct mport_dev *md, struct file *filp,
+			      u64 raddr, u32 size,
+			      struct rio_mport_mapping **mapping)
+{
+	struct rio_mport_mapping *map;
+	int err = -ENOMEM;
+
+	if (raddr == RIO_MAP_ANY_ADDR)
+		goto get_new;
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry(map, &md->mappings, node) {
+		if (map->dir != MAP_INBOUND)
+			continue;
+		if (raddr == map->rio_addr && size == map->size) {
+			/* allow exact match only */
+			*mapping = map;
+			err = 0;
+			break;
+		} else if (raddr < (map->rio_addr + map->size - 1) &&
+			   (raddr + size) > map->rio_addr) {
+			err = -EBUSY;
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (err != -ENOMEM)
+		return err;
+get_new:
+	/* not found, create new */
+	return rio_mport_create_inbound_mapping(md, filp, raddr, size, mapping);
+}
+
+static int rio_mport_map_inbound(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	struct rio_mmap map;
+	struct rio_mport_mapping *mapping = NULL;
+	int ret;
+
+	if (!md->mport->ops->map_inb)
+		return -EPROTONOSUPPORT;
+	if (unlikely(copy_from_user(&map, arg, sizeof(struct rio_mmap))))
+		return -EFAULT;
+
+	rmcd_debug(IBW, "%s filp=%p", dev_name(&priv->md->dev), filp);
+
+	ret = rio_mport_get_inbound_mapping(md, filp, map.rio_addr,
+					    map.length, &mapping);
+	if (ret)
+		return ret;
+
+	map.handle = mapping->phys_addr;
+	map.rio_addr = mapping->rio_addr;
+
+	if (unlikely(copy_to_user(arg, &map, sizeof(struct rio_mmap)))) {
+		/* Delete mapping if it was created by this request */
+		if (ret == 0 && mapping->filp == filp) {
+			mutex_lock(&md->buf_mutex);
+			kref_put(&mapping->ref, mport_release_mapping);
+			mutex_unlock(&md->buf_mutex);
+		}
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * rio_mport_inbound_free() - unmap from RapidIO address space and free
+ *                    previously allocated inbound DMA coherent buffer
+ * @priv: driver private data
+ * @arg:  buffer handle returned by allocation routine
+ */
+static int rio_mport_inbound_free(struct file *filp, void __user *arg)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md = priv->md;
+	u64 handle;
+	struct rio_mport_mapping *map, *_map;
+
+	rmcd_debug(IBW, "%s filp=%p", dev_name(&priv->md->dev), filp);
+
+	if (!md->mport->ops->unmap_inb)
+		return -EPROTONOSUPPORT;
+
+	if (copy_from_user(&handle, arg, sizeof(u64)))
+		return -EFAULT;
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		if (map->dir == MAP_INBOUND && map->phys_addr == handle) {
+			if (map->filp == filp) {
+				map->filp = NULL;
+				kref_put(&map->ref, mport_release_mapping);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	return 0;
+}
+
+/*
+ * maint_port_idx_get() - Get the port index of the mport instance
+ * @priv: driver private data
+ * @arg:  port index
+ */
+static int maint_port_idx_get(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	uint32_t port_idx = md->mport->index;
+
+	rmcd_debug(MPORT, "port_index=%d", port_idx);
+
+	if (copy_to_user(arg, &port_idx, sizeof(port_idx)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int rio_mport_add_event(struct mport_cdev_priv *priv,
+			       struct rio_event *event)
+{
+	int overflow;
+
+	if (!(priv->event_mask & event->header))
+		return -EACCES;
+
+	spin_lock(&priv->fifo_lock);
+	overflow = kfifo_avail(&priv->event_fifo) < sizeof(*event)
+		|| kfifo_in(&priv->event_fifo, (unsigned char *)event,
+			sizeof(*event)) != sizeof(*event);
+	spin_unlock(&priv->fifo_lock);
+
+	wake_up_interruptible(&priv->event_rx_wait);
+
+	if (overflow) {
+		dev_warn(&priv->md->dev, DRV_NAME ": event fifo overflow\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static void rio_mport_doorbell_handler(struct rio_mport *mport, void *dev_id,
+				       u16 src, u16 dst, u16 info)
+{
+	struct mport_dev *data = dev_id;
+	struct mport_cdev_priv *priv;
+	struct rio_mport_db_filter *db_filter;
+	struct rio_event event;
+	int handled;
+
+	event.header = RIO_DOORBELL;
+	event.u.doorbell.rioid = src;
+	event.u.doorbell.payload = info;
+
+	handled = 0;
+	spin_lock(&data->db_lock);
+	list_for_each_entry(db_filter, &data->doorbells, data_node) {
+		if (((db_filter->filter.rioid == 0xffffffff ||
+		      db_filter->filter.rioid == src)) &&
+		      info >= db_filter->filter.low &&
+		      info <= db_filter->filter.high) {
+			priv = db_filter->priv;
+			rio_mport_add_event(priv, &event);
+			handled = 1;
+		}
+	}
+	spin_unlock(&data->db_lock);
+
+	if (!handled)
+		dev_warn(&data->dev,
+			"%s: spurious DB received from 0x%x, info=0x%04x\n",
+			__func__, src, info);
+}
+
+static int rio_mport_add_db_filter(struct mport_cdev_priv *priv,
+				   void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_mport_db_filter *db_filter;
+	struct rio_doorbell_filter filter;
+	unsigned long flags;
+	int ret;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	if (filter.low > filter.high)
+		return -EINVAL;
+
+	ret = rio_request_inb_dbell(md->mport, md, filter.low, filter.high,
+				    rio_mport_doorbell_handler);
+	if (ret) {
+		rmcd_error("%s failed to register IBDB, err=%d",
+			   dev_name(&md->dev), ret);
+		return ret;
+	}
+
+	db_filter = kzalloc(sizeof(*db_filter), GFP_KERNEL);
+	if (db_filter == NULL) {
+		rio_release_inb_dbell(md->mport, filter.low, filter.high);
+		return -ENOMEM;
+	}
+
+	db_filter->filter = filter;
+	db_filter->priv = priv;
+	spin_lock_irqsave(&md->db_lock, flags);
+	list_add_tail(&db_filter->priv_node, &priv->db_filters);
+	list_add_tail(&db_filter->data_node, &md->doorbells);
+	spin_unlock_irqrestore(&md->db_lock, flags);
+
+	return 0;
+}
+
+static void rio_mport_delete_db_filter(struct rio_mport_db_filter *db_filter)
+{
+	list_del(&db_filter->data_node);
+	list_del(&db_filter->priv_node);
+	kfree(db_filter);
+}
+
+static int rio_mport_remove_db_filter(struct mport_cdev_priv *priv,
+				      void __user *arg)
+{
+	struct rio_mport_db_filter *db_filter;
+	struct rio_doorbell_filter filter;
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	spin_lock_irqsave(&priv->md->db_lock, flags);
+	list_for_each_entry(db_filter, &priv->db_filters, priv_node) {
+		if (db_filter->filter.rioid == filter.rioid &&
+		    db_filter->filter.low == filter.low &&
+		    db_filter->filter.high == filter.high) {
+			rio_mport_delete_db_filter(db_filter);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&priv->md->db_lock, flags);
+
+	if (!ret)
+		rio_release_inb_dbell(priv->md->mport, filter.low, filter.high);
+
+	return ret;
+}
+
+static int rio_mport_match_pw(union rio_pw_msg *msg,
+			      struct rio_pw_filter *filter)
+{
+	if ((msg->em.comptag & filter->mask) < filter->low ||
+		(msg->em.comptag & filter->mask) > filter->high)
+		return 0;
+	return 1;
+}
+
+static int rio_mport_pw_handler(struct rio_mport *mport, void *context,
+				union rio_pw_msg *msg, int step)
+{
+	struct mport_dev *md = context;
+	struct mport_cdev_priv *priv;
+	struct rio_mport_pw_filter *pw_filter;
+	struct rio_event event;
+	int handled;
+
+	event.header = RIO_PORTWRITE;
+	memcpy(event.u.portwrite.payload, msg->raw, RIO_PW_MSG_SIZE);
+
+	handled = 0;
+	spin_lock(&md->pw_lock);
+	list_for_each_entry(pw_filter, &md->portwrites, md_node) {
+		if (rio_mport_match_pw(msg, &pw_filter->filter)) {
+			priv = pw_filter->priv;
+			rio_mport_add_event(priv, &event);
+			handled = 1;
+		}
+	}
+	spin_unlock(&md->pw_lock);
+
+	if (!handled) {
+		printk_ratelimited(KERN_WARNING DRV_NAME
+			": mport%d received spurious PW from 0x%08x\n",
+			mport->id, msg->em.comptag);
+	}
+
+	return 0;
+}
+
+static int rio_mport_add_pw_filter(struct mport_cdev_priv *priv,
+				   void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_mport_pw_filter *pw_filter;
+	struct rio_pw_filter filter;
+	unsigned long flags;
+	int hadd = 0;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	pw_filter = kzalloc(sizeof(*pw_filter), GFP_KERNEL);
+	if (pw_filter == NULL)
+		return -ENOMEM;
+
+	pw_filter->filter = filter;
+	pw_filter->priv = priv;
+	spin_lock_irqsave(&md->pw_lock, flags);
+	if (list_empty(&md->portwrites))
+		hadd = 1;
+	list_add_tail(&pw_filter->priv_node, &priv->pw_filters);
+	list_add_tail(&pw_filter->md_node, &md->portwrites);
+	spin_unlock_irqrestore(&md->pw_lock, flags);
+
+	if (hadd) {
+		int ret;
+
+		ret = rio_add_mport_pw_handler(md->mport, md,
+					       rio_mport_pw_handler);
+		if (ret) {
+			dev_err(&md->dev,
+				"%s: failed to add IB_PW handler, err=%d\n",
+				__func__, ret);
+			return ret;
+		}
+		rio_pw_enable(md->mport, 1);
+	}
+
+	return 0;
+}
+
+static void rio_mport_delete_pw_filter(struct rio_mport_pw_filter *pw_filter)
+{
+	list_del(&pw_filter->md_node);
+	list_del(&pw_filter->priv_node);
+	kfree(pw_filter);
+}
+
+static int rio_mport_match_pw_filter(struct rio_pw_filter *a,
+				     struct rio_pw_filter *b)
+{
+	if ((a->mask == b->mask) && (a->low == b->low) && (a->high == b->high))
+		return 1;
+	return 0;
+}
+
+static int rio_mport_remove_pw_filter(struct mport_cdev_priv *priv,
+				      void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_mport_pw_filter *pw_filter;
+	struct rio_pw_filter filter;
+	unsigned long flags;
+	int ret = -EINVAL;
+	int hdel = 0;
+
+	if (copy_from_user(&filter, arg, sizeof(filter)))
+		return -EFAULT;
+
+	spin_lock_irqsave(&md->pw_lock, flags);
+	list_for_each_entry(pw_filter, &priv->pw_filters, priv_node) {
+		if (rio_mport_match_pw_filter(&pw_filter->filter, &filter)) {
+			rio_mport_delete_pw_filter(pw_filter);
+			ret = 0;
+			break;
+		}
+	}
+
+	if (list_empty(&md->portwrites))
+		hdel = 1;
+	spin_unlock_irqrestore(&md->pw_lock, flags);
+
+	if (hdel) {
+		rio_del_mport_pw_handler(md->mport, priv->md,
+					 rio_mport_pw_handler);
+		rio_pw_enable(md->mport, 0);
+	}
+
+	return ret;
+}
+
+/*
+ * rio_release_dev - release routine for kernel RIO device object
+ * @dev: kernel device object associated with a RIO device structure
+ *
+ * Frees a RIO device struct associated a RIO device struct.
+ * The RIO device struct is freed.
+ */
+static void rio_release_dev(struct device *dev)
+{
+	struct rio_dev *rdev;
+
+	rdev = to_rio_dev(dev);
+	pr_info(DRV_PREFIX "%s: %s\n", __func__, rio_name(rdev));
+	kfree(rdev);
+}
+
+
+static void rio_release_net(struct device *dev)
+{
+	struct rio_net *net;
+
+	net = to_rio_net(dev);
+	rmcd_debug(RDEV, "net_%d", net->id);
+	kfree(net);
+}
+
+
+/*
+ * rio_mport_add_riodev - creates a kernel RIO device object
+ *
+ * Allocates a RIO device data structure and initializes required fields based
+ * on device's configuration space contents.
+ * If the device has switch capabilities, then a switch specific portion is
+ * allocated and configured.
+ */
+static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
+				   void __user *arg)
+{
+	struct mport_dev *md = priv->md;
+	struct rio_rdev_info dev_info;
+	struct rio_dev *rdev;
+	struct rio_switch *rswitch = NULL;
+	struct rio_mport *mport;
+	size_t size;
+	u32 rval;
+	u32 swpinfo = 0;
+	u16 destid;
+	u8 hopcount;
+	int err;
+
+	if (copy_from_user(&dev_info, arg, sizeof(dev_info)))
+		return -EFAULT;
+
+	rmcd_debug(RDEV, "name:%s ct:0x%x did:0x%x hc:0x%x", dev_info.name,
+		   dev_info.comptag, dev_info.destid, dev_info.hopcount);
+
+	if (bus_find_device_by_name(&rio_bus_type, NULL, dev_info.name)) {
+		rmcd_debug(RDEV, "device %s already exists", dev_info.name);
+		return -EEXIST;
+	}
+
+	size = sizeof(struct rio_dev);
+	mport = md->mport;
+	destid = (u16)dev_info.destid;
+	hopcount = (u8)dev_info.hopcount;
+
+	if (rio_mport_read_config_32(mport, destid, hopcount,
+				     RIO_PEF_CAR, &rval))
+		return -EIO;
+
+	if (rval & RIO_PEF_SWITCH) {
+		rio_mport_read_config_32(mport, destid, hopcount,
+					 RIO_SWP_INFO_CAR, &swpinfo);
+		size += (RIO_GET_TOTAL_PORTS(swpinfo) *
+			 sizeof(rswitch->nextdev[0])) + sizeof(*rswitch);
+	}
+
+	rdev = kzalloc(size, GFP_KERNEL);
+	if (rdev == NULL)
+		return -ENOMEM;
+
+	if (mport->net == NULL) {
+		struct rio_net *net;
+
+		net = rio_alloc_net(mport);
+		if (!net) {
+			err = -ENOMEM;
+			rmcd_debug(RDEV, "failed to allocate net object");
+			goto cleanup;
+		}
+
+		net->id = mport->id;
+		net->hport = mport;
+		dev_set_name(&net->dev, "rnet_%d", net->id);
+		net->dev.parent = &mport->dev;
+		net->dev.release = rio_release_net;
+		err = rio_add_net(net);
+		if (err) {
+			rmcd_debug(RDEV, "failed to register net, err=%d", err);
+			kfree(net);
+			goto cleanup;
+		}
+	}
+
+	rdev->net = mport->net;
+	rdev->pef = rval;
+	rdev->swpinfo = swpinfo;
+	rio_mport_read_config_32(mport, destid, hopcount,
+				 RIO_DEV_ID_CAR, &rval);
+	rdev->did = rval >> 16;
+	rdev->vid = rval & 0xffff;
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_DEV_INFO_CAR,
+				 &rdev->device_rev);
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_ASM_ID_CAR,
+				 &rval);
+	rdev->asm_did = rval >> 16;
+	rdev->asm_vid = rval & 0xffff;
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_ASM_INFO_CAR,
+				 &rval);
+	rdev->asm_rev = rval >> 16;
+
+	if (rdev->pef & RIO_PEF_EXT_FEATURES) {
+		rdev->efptr = rval & 0xffff;
+		rdev->phys_efptr = rio_mport_get_physefb(mport, 0, destid,
+							 hopcount);
+
+		rdev->em_efptr = rio_mport_get_feature(mport, 0, destid,
+						hopcount, RIO_EFB_ERR_MGMNT);
+	}
+
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_SRC_OPS_CAR,
+				 &rdev->src_ops);
+	rio_mport_read_config_32(mport, destid, hopcount, RIO_DST_OPS_CAR,
+				 &rdev->dst_ops);
+
+	rdev->comp_tag = dev_info.comptag;
+	rdev->destid = destid;
+	/* hopcount is stored as specified by a caller, regardles of EP or SW */
+	rdev->hopcount = hopcount;
+
+	if (rdev->pef & RIO_PEF_SWITCH) {
+		rswitch = rdev->rswitch;
+		rswitch->route_table = NULL;
+	}
+
+	if (strlen(dev_info.name))
+		dev_set_name(&rdev->dev, "%s", dev_info.name);
+	else if (rdev->pef & RIO_PEF_SWITCH)
+		dev_set_name(&rdev->dev, "%02x:s:%04x", mport->id,
+			     rdev->comp_tag & RIO_CTAG_UDEVID);
+	else
+		dev_set_name(&rdev->dev, "%02x:e:%04x", mport->id,
+			     rdev->comp_tag & RIO_CTAG_UDEVID);
+
+	INIT_LIST_HEAD(&rdev->net_list);
+	rdev->dev.parent = &mport->net->dev;
+	rio_attach_device(rdev);
+	rdev->dev.release = rio_release_dev;
+
+	if (rdev->dst_ops & RIO_DST_OPS_DOORBELL)
+		rio_init_dbell_res(&rdev->riores[RIO_DOORBELL_RESOURCE],
+				   0, 0xffff);
+	err = rio_add_device(rdev);
+	if (err)
+		goto cleanup;
+	rio_dev_get(rdev);
+
+	return 0;
+cleanup:
+	kfree(rdev);
+	return err;
+}
+
+static int rio_mport_del_riodev(struct mport_cdev_priv *priv, void __user *arg)
+{
+	struct rio_rdev_info dev_info;
+	struct rio_dev *rdev = NULL;
+	struct device  *dev;
+	struct rio_mport *mport;
+	struct rio_net *net;
+
+	if (copy_from_user(&dev_info, arg, sizeof(dev_info)))
+		return -EFAULT;
+
+	mport = priv->md->mport;
+
+	/* If device name is specified, removal by name has priority */
+	if (strlen(dev_info.name)) {
+		dev = bus_find_device_by_name(&rio_bus_type, NULL,
+					      dev_info.name);
+		if (dev)
+			rdev = to_rio_dev(dev);
+	} else {
+		do {
+			rdev = rio_get_comptag(dev_info.comptag, rdev);
+			if (rdev && rdev->dev.parent == &mport->net->dev &&
+			    rdev->destid == (u16)dev_info.destid &&
+			    rdev->hopcount == (u8)dev_info.hopcount)
+				break;
+		} while (rdev);
+	}
+
+	if (!rdev) {
+		rmcd_debug(RDEV,
+			"device name:%s ct:0x%x did:0x%x hc:0x%x not found",
+			dev_info.name, dev_info.comptag, dev_info.destid,
+			dev_info.hopcount);
+		return -ENODEV;
+	}
+
+	net = rdev->net;
+	rio_dev_put(rdev);
+	rio_del_device(rdev, RIO_DEVICE_SHUTDOWN);
+
+	if (list_empty(&net->devices)) {
+		rio_free_net(net);
+		mport->net = NULL;
+	}
+
+	return 0;
+}
+
+/*
+ * Mport cdev management
+ */
+
+/*
+ * mport_cdev_open() - Open character device (mport)
+ */
+static int mport_cdev_open(struct inode *inode, struct file *filp)
+{
+	int ret;
+	int minor = iminor(inode);
+	struct mport_dev *chdev;
+	struct mport_cdev_priv *priv;
+
+	/* Test for valid device */
+	if (minor >= RIO_MAX_MPORTS) {
+		rmcd_error("Invalid minor device number");
+		return -EINVAL;
+	}
+
+	chdev = container_of(inode->i_cdev, struct mport_dev, cdev);
+
+	rmcd_debug(INIT, "%s filp=%p", dev_name(&chdev->dev), filp);
+
+	if (atomic_read(&chdev->active) == 0)
+		return -ENODEV;
+
+	get_device(&chdev->dev);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		put_device(&chdev->dev);
+		return -ENOMEM;
+	}
+
+	priv->md = chdev;
+
+	mutex_lock(&chdev->file_mutex);
+	list_add_tail(&priv->list, &chdev->file_list);
+	mutex_unlock(&chdev->file_mutex);
+
+	INIT_LIST_HEAD(&priv->db_filters);
+	INIT_LIST_HEAD(&priv->pw_filters);
+	spin_lock_init(&priv->fifo_lock);
+	init_waitqueue_head(&priv->event_rx_wait);
+	ret = kfifo_alloc(&priv->event_fifo,
+			  sizeof(struct rio_event) * MPORT_EVENT_DEPTH,
+			  GFP_KERNEL);
+	if (ret < 0) {
+		dev_err(&chdev->dev, DRV_NAME ": kfifo_alloc failed\n");
+		ret = -ENOMEM;
+		goto err_fifo;
+	}
+
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	INIT_LIST_HEAD(&priv->async_list);
+	INIT_LIST_HEAD(&priv->pend_list);
+	spin_lock_init(&priv->req_lock);
+	mutex_init(&priv->dma_lock);
+#endif
+
+	filp->private_data = priv;
+	goto out;
+err_fifo:
+	kfree(priv);
+out:
+	return ret;
+}
+
+static int mport_cdev_fasync(int fd, struct file *filp, int mode)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+
+	return fasync_helper(fd, filp, mode, &priv->async_queue);
+}
+
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+static void mport_cdev_release_dma(struct file *filp)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md;
+	struct mport_dma_req *req, *req_next;
+	unsigned long tmo = msecs_to_jiffies(dma_timeout);
+	long wret;
+	LIST_HEAD(list);
+
+	rmcd_debug(EXIT, "from filp=%p %s(%d)",
+		   filp, current->comm, task_pid_nr(current));
+
+	if (!priv->dmach) {
+		rmcd_debug(EXIT, "No DMA channel for filp=%p", filp);
+		return;
+	}
+
+	md = priv->md;
+
+	flush_workqueue(dma_wq);
+
+	spin_lock(&priv->req_lock);
+	if (!list_empty(&priv->async_list)) {
+		rmcd_debug(EXIT, "async list not empty filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+		list_splice_init(&priv->async_list, &list);
+	}
+	spin_unlock(&priv->req_lock);
+
+	if (!list_empty(&list)) {
+		rmcd_debug(EXIT, "temp list not empty");
+		list_for_each_entry_safe(req, req_next, &list, node) {
+			rmcd_debug(EXIT, "free req->filp=%p cookie=%d compl=%s",
+				   req->filp, req->cookie,
+				   completion_done(&req->req_comp)?"yes":"no");
+			list_del(&req->node);
+			dma_req_free(req);
+		}
+	}
+
+	if (!list_empty(&priv->pend_list)) {
+		rmcd_debug(EXIT, "Free pending DMA requests for filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+		list_for_each_entry_safe(req,
+					 req_next, &priv->pend_list, node) {
+			rmcd_debug(EXIT, "free req->filp=%p cookie=%d compl=%s",
+				   req->filp, req->cookie,
+				   completion_done(&req->req_comp)?"yes":"no");
+			list_del(&req->node);
+			dma_req_free(req);
+		}
+	}
+
+	put_dma_channel(priv);
+	wret = wait_for_completion_interruptible_timeout(&priv->comp, tmo);
+
+	if (wret <= 0) {
+		rmcd_error("%s(%d) failed waiting for DMA release err=%ld",
+			current->comm, task_pid_nr(current), wret);
+	}
+
+	spin_lock(&priv->req_lock);
+
+	if (!list_empty(&priv->pend_list)) {
+		rmcd_debug(EXIT, "ATTN: pending DMA requests, filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+	}
+
+	spin_unlock(&priv->req_lock);
+
+	if (priv->dmach != priv->md->dma_chan) {
+		rmcd_debug(EXIT, "Release DMA channel for filp=%p %s(%d)",
+			   filp, current->comm, task_pid_nr(current));
+		rio_release_dma(priv->dmach);
+	} else {
+		rmcd_debug(EXIT, "Adjust default DMA channel refcount");
+		kref_put(&md->dma_ref, mport_release_def_dma);
+	}
+
+	priv->dmach = NULL;
+}
+#else
+#define mport_cdev_release_dma(priv) do {} while (0)
+#endif
+
+/*
+ * mport_cdev_release() - Release character device
+ */
+static int mport_cdev_release(struct inode *inode, struct file *filp)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *chdev;
+	struct rio_mport_pw_filter *pw_filter, *pw_filter_next;
+	struct rio_mport_db_filter *db_filter, *db_filter_next;
+	struct rio_mport_mapping *map, *_map;
+	unsigned long flags;
+
+	rmcd_debug(EXIT, "%s filp=%p", dev_name(&priv->md->dev), filp);
+
+	chdev = priv->md;
+	mport_cdev_release_dma(filp);
+
+	priv->event_mask = 0;
+
+	spin_lock_irqsave(&chdev->pw_lock, flags);
+	if (!list_empty(&priv->pw_filters)) {
+		list_for_each_entry_safe(pw_filter, pw_filter_next,
+					 &priv->pw_filters, priv_node)
+			rio_mport_delete_pw_filter(pw_filter);
+	}
+	spin_unlock_irqrestore(&chdev->pw_lock, flags);
+
+	spin_lock_irqsave(&chdev->db_lock, flags);
+	list_for_each_entry_safe(db_filter, db_filter_next,
+				 &priv->db_filters, priv_node) {
+		rio_mport_delete_db_filter(db_filter);
+	}
+	spin_unlock_irqrestore(&chdev->db_lock, flags);
+
+	kfifo_free(&priv->event_fifo);
+
+	mutex_lock(&chdev->buf_mutex);
+	list_for_each_entry_safe(map, _map, &chdev->mappings, node) {
+		if (map->filp == filp) {
+			rmcd_debug(EXIT, "release mapping %p filp=%p",
+				   map->virt_addr, filp);
+			kref_put(&map->ref, mport_release_mapping);
+		}
+	}
+	mutex_unlock(&chdev->buf_mutex);
+
+	mport_cdev_fasync(-1, filp, 0);
+	filp->private_data = NULL;
+	mutex_lock(&chdev->file_mutex);
+	list_del(&priv->list);
+	mutex_unlock(&chdev->file_mutex);
+	put_device(&chdev->dev);
+	kfree(priv);
+	return 0;
+}
+
+/*
+ * mport_cdev_ioctl() - IOCTLs for character device
+ */
+static long mport_cdev_ioctl(struct file *filp,
+		unsigned int cmd, unsigned long arg)
+{
+	int err = -EINVAL;
+	struct mport_cdev_priv *data = filp->private_data;
+	struct mport_dev *md = data->md;
+
+	if (atomic_read(&md->active) == 0)
+		return -ENODEV;
+
+	switch (cmd) {
+	case RIO_MPORT_MAINT_READ_LOCAL:
+		return rio_mport_maint_rd(data, (void __user *)arg, 1);
+	case RIO_MPORT_MAINT_WRITE_LOCAL:
+		return rio_mport_maint_wr(data, (void __user *)arg, 1);
+	case RIO_MPORT_MAINT_READ_REMOTE:
+		return rio_mport_maint_rd(data, (void __user *)arg, 0);
+	case RIO_MPORT_MAINT_WRITE_REMOTE:
+		return rio_mport_maint_wr(data, (void __user *)arg, 0);
+	case RIO_MPORT_MAINT_HDID_SET:
+		return maint_hdid_set(data, (void __user *)arg);
+	case RIO_MPORT_MAINT_COMPTAG_SET:
+		return maint_comptag_set(data, (void __user *)arg);
+	case RIO_MPORT_MAINT_PORT_IDX_GET:
+		return maint_port_idx_get(data, (void __user *)arg);
+	case RIO_MPORT_GET_PROPERTIES:
+		md->properties.hdid = md->mport->host_deviceid;
+		if (copy_to_user((void __user *)arg, &(data->md->properties),
+				 sizeof(data->md->properties)))
+			return -EFAULT;
+		return 0;
+	case RIO_ENABLE_DOORBELL_RANGE:
+		return rio_mport_add_db_filter(data, (void __user *)arg);
+	case RIO_DISABLE_DOORBELL_RANGE:
+		return rio_mport_remove_db_filter(data, (void __user *)arg);
+	case RIO_ENABLE_PORTWRITE_RANGE:
+		return rio_mport_add_pw_filter(data, (void __user *)arg);
+	case RIO_DISABLE_PORTWRITE_RANGE:
+		return rio_mport_remove_pw_filter(data, (void __user *)arg);
+	case RIO_SET_EVENT_MASK:
+		data->event_mask = arg;
+		return 0;
+	case RIO_GET_EVENT_MASK:
+		if (copy_to_user((void __user *)arg, &data->event_mask,
+				    sizeof(data->event_mask)))
+			return -EFAULT;
+		return 0;
+	case RIO_MAP_OUTBOUND:
+		return rio_mport_obw_map(filp, (void __user *)arg);
+	case RIO_MAP_INBOUND:
+		return rio_mport_map_inbound(filp, (void __user *)arg);
+	case RIO_UNMAP_OUTBOUND:
+		return rio_mport_obw_free(filp, (void __user *)arg);
+	case RIO_UNMAP_INBOUND:
+		return rio_mport_inbound_free(filp, (void __user *)arg);
+	case RIO_ALLOC_DMA:
+		return rio_mport_alloc_dma(filp, (void __user *)arg);
+	case RIO_FREE_DMA:
+		return rio_mport_free_dma(filp, (void __user *)arg);
+	case RIO_WAIT_FOR_ASYNC:
+		return rio_mport_wait_for_async_dma(filp, (void __user *)arg);
+	case RIO_TRANSFER:
+		return rio_mport_transfer_ioctl(filp, (void __user *)arg);
+	case RIO_DEV_ADD:
+		return rio_mport_add_riodev(data, (void __user *)arg);
+	case RIO_DEV_DEL:
+		return rio_mport_del_riodev(data, (void __user *)arg);
+	default:
+		break;
+	}
+
+	return err;
+}
+
+/*
+ * mport_release_mapping - free mapping resources and info structure
+ * @ref: a pointer to the kref within struct rio_mport_mapping
+ *
+ * NOTE: Shall be called while holding buf_mutex.
+ */
+static void mport_release_mapping(struct kref *ref)
+{
+	struct rio_mport_mapping *map =
+			container_of(ref, struct rio_mport_mapping, ref);
+	struct rio_mport *mport = map->md->mport;
+
+	rmcd_debug(MMAP, "type %d mapping @ %p (phys = %pad) for %s",
+		   map->dir, map->virt_addr,
+		   &map->phys_addr, mport->name);
+
+	list_del(&map->node);
+
+	switch (map->dir) {
+	case MAP_INBOUND:
+		rio_unmap_inb_region(mport, map->phys_addr);
+	case MAP_DMA:
+		dma_free_coherent(mport->dev.parent, map->size,
+				  map->virt_addr, map->phys_addr);
+		break;
+	case MAP_OUTBOUND:
+		rio_unmap_outb_region(mport, map->rioid, map->rio_addr);
+		break;
+	}
+	kfree(map);
+}
+
+static void mport_mm_open(struct vm_area_struct *vma)
+{
+	struct rio_mport_mapping *map = vma->vm_private_data;
+
+rmcd_debug(MMAP, "0x%pad", &map->phys_addr);
+	kref_get(&map->ref);
+}
+
+static void mport_mm_close(struct vm_area_struct *vma)
+{
+	struct rio_mport_mapping *map = vma->vm_private_data;
+
+rmcd_debug(MMAP, "0x%pad", &map->phys_addr);
+	mutex_lock(&map->md->buf_mutex);
+	kref_put(&map->ref, mport_release_mapping);
+	mutex_unlock(&map->md->buf_mutex);
+}
+
+static const struct vm_operations_struct vm_ops = {
+	.open =	mport_mm_open,
+	.close = mport_mm_close,
+};
+
+static int mport_cdev_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct mport_dev *md;
+	size_t size = vma->vm_end - vma->vm_start;
+	dma_addr_t baddr;
+	unsigned long offset;
+	int found = 0, ret;
+	struct rio_mport_mapping *map;
+
+	rmcd_debug(MMAP, "0x%x bytes at offset 0x%lx",
+		   (unsigned int)size, vma->vm_pgoff);
+
+	md = priv->md;
+	baddr = ((dma_addr_t)vma->vm_pgoff << PAGE_SHIFT);
+
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry(map, &md->mappings, node) {
+		if (baddr >= map->phys_addr &&
+		    baddr < (map->phys_addr + map->size)) {
+			found = 1;
+			break;
+		}
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (!found)
+		return -ENOMEM;
+
+	offset = baddr - map->phys_addr;
+
+	if (size + offset > map->size)
+		return -EINVAL;
+
+	vma->vm_pgoff = offset >> PAGE_SHIFT;
+	rmcd_debug(MMAP, "MMAP adjusted offset = 0x%lx", vma->vm_pgoff);
+
+	if (map->dir == MAP_INBOUND || map->dir == MAP_DMA)
+		ret = dma_mmap_coherent(md->mport->dev.parent, vma,
+				map->virt_addr, map->phys_addr, map->size);
+	else if (map->dir == MAP_OUTBOUND) {
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		ret = vm_iomap_memory(vma, map->phys_addr, map->size);
+	} else {
+		rmcd_error("Attempt to mmap unsupported mapping type");
+		ret = -EIO;
+	}
+
+	if (!ret) {
+		vma->vm_private_data = map;
+		vma->vm_ops = &vm_ops;
+		mport_mm_open(vma);
+	} else {
+		rmcd_error("MMAP exit with err=%d", ret);
+	}
+
+	return ret;
+}
+
+static unsigned int mport_cdev_poll(struct file *filp, poll_table *wait)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+
+	poll_wait(filp, &priv->event_rx_wait, wait);
+	if (kfifo_len(&priv->event_fifo))
+		return POLLIN | POLLRDNORM;
+
+	return 0;
+}
+
+static ssize_t mport_read(struct file *filp, char __user *buf, size_t count,
+			loff_t *ppos)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	int copied;
+	ssize_t ret;
+
+	if (!count)
+		return 0;
+
+	if (kfifo_is_empty(&priv->event_fifo) &&
+	    (filp->f_flags & O_NONBLOCK))
+		return -EAGAIN;
+
+	if (count % sizeof(struct rio_event))
+		return -EINVAL;
+
+	ret = wait_event_interruptible(priv->event_rx_wait,
+					kfifo_len(&priv->event_fifo) != 0);
+	if (ret)
+		return ret;
+
+	while (ret < count) {
+		if (kfifo_to_user(&priv->event_fifo, buf,
+		      sizeof(struct rio_event), &copied))
+			return -EFAULT;
+		ret += copied;
+		buf += copied;
+	}
+
+	return ret;
+}
+
+static ssize_t mport_write(struct file *filp, const char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	struct mport_cdev_priv *priv = filp->private_data;
+	struct rio_mport *mport = priv->md->mport;
+	struct rio_event event;
+	int len, ret;
+
+	if (!count)
+		return 0;
+
+	if (count % sizeof(event))
+		return -EINVAL;
+
+	len = 0;
+	while ((count - len) >= (int)sizeof(event)) {
+		if (copy_from_user(&event, buf, sizeof(event)))
+			return -EFAULT;
+
+		if (event.header != RIO_DOORBELL)
+			return -EINVAL;
+
+		ret = rio_mport_send_doorbell(mport,
+					      (u16)event.u.doorbell.rioid,
+					      event.u.doorbell.payload);
+		if (ret < 0)
+			return ret;
+
+		len += sizeof(event);
+		buf += sizeof(event);
+	}
+
+	return len;
+}
+
+static const struct file_operations mport_fops = {
+	.owner		= THIS_MODULE,
+	.open		= mport_cdev_open,
+	.release	= mport_cdev_release,
+	.poll		= mport_cdev_poll,
+	.read		= mport_read,
+	.write		= mport_write,
+	.mmap		= mport_cdev_mmap,
+	.fasync		= mport_cdev_fasync,
+	.unlocked_ioctl = mport_cdev_ioctl
+};
+
+/*
+ * Character device management
+ */
+
+static void mport_device_release(struct device *dev)
+{
+	struct mport_dev *md;
+
+	rmcd_debug(EXIT, "%s", dev_name(dev));
+	md = container_of(dev, struct mport_dev, dev);
+	kfree(md);
+}
+
+/*
+ * mport_cdev_add() - Create mport_dev from rio_mport
+ * @mport:	RapidIO master port
+ */
+static struct mport_dev *mport_cdev_add(struct rio_mport *mport)
+{
+	int ret = 0;
+	struct mport_dev *md;
+	struct rio_mport_attr attr;
+
+	md = kzalloc(sizeof(struct mport_dev), GFP_KERNEL);
+	if (!md) {
+		rmcd_error("Unable allocate a device object");
+		return NULL;
+	}
+
+	md->mport = mport;
+	mutex_init(&md->buf_mutex);
+	mutex_init(&md->file_mutex);
+	INIT_LIST_HEAD(&md->file_list);
+	cdev_init(&md->cdev, &mport_fops);
+	md->cdev.owner = THIS_MODULE;
+	ret = cdev_add(&md->cdev, MKDEV(MAJOR(dev_number), mport->id), 1);
+	if (ret < 0) {
+		kfree(md);
+		rmcd_error("Unable to register a device, err=%d", ret);
+		return NULL;
+	}
+
+	md->dev.devt = md->cdev.dev;
+	md->dev.class = dev_class;
+	md->dev.parent = &mport->dev;
+	md->dev.release = mport_device_release;
+	dev_set_name(&md->dev, DEV_NAME "%d", mport->id);
+	atomic_set(&md->active, 1);
+
+	ret = device_register(&md->dev);
+	if (ret) {
+		rmcd_error("Failed to register mport %d (err=%d)",
+		       mport->id, ret);
+		goto err_cdev;
+	}
+
+	get_device(&md->dev);
+
+	INIT_LIST_HEAD(&md->doorbells);
+	spin_lock_init(&md->db_lock);
+	INIT_LIST_HEAD(&md->portwrites);
+	spin_lock_init(&md->pw_lock);
+	INIT_LIST_HEAD(&md->mappings);
+
+	md->properties.id = mport->id;
+	md->properties.sys_size = mport->sys_size;
+	md->properties.hdid = mport->host_deviceid;
+	md->properties.index = mport->index;
+
+	/* The transfer_mode property will be returned through mport query
+	 * interface
+	 */
+#ifdef CONFIG_PPC /* for now: only on Freescale's SoCs */
+	md->properties.transfer_mode |= RIO_TRANSFER_MODE_MAPPED;
+#else
+	md->properties.transfer_mode |= RIO_TRANSFER_MODE_TRANSFER;
+#endif
+	ret = rio_query_mport(mport, &attr);
+	if (!ret) {
+		md->properties.flags = attr.flags;
+		md->properties.link_speed = attr.link_speed;
+		md->properties.link_width = attr.link_width;
+		md->properties.dma_max_sge = attr.dma_max_sge;
+		md->properties.dma_max_size = attr.dma_max_size;
+		md->properties.dma_align = attr.dma_align;
+		md->properties.cap_sys_size = 0;
+		md->properties.cap_transfer_mode = 0;
+		md->properties.cap_addr_size = 0;
+	} else
+		pr_info(DRV_PREFIX "Failed to obtain info for %s cdev(%d:%d)\n",
+			mport->name, MAJOR(dev_number), mport->id);
+
+	mutex_lock(&mport_devs_lock);
+	list_add_tail(&md->node, &mport_devs);
+	mutex_unlock(&mport_devs_lock);
+
+	pr_info(DRV_PREFIX "Added %s cdev(%d:%d)\n",
+		mport->name, MAJOR(dev_number), mport->id);
+
+	return md;
+
+err_cdev:
+	cdev_del(&md->cdev);
+	kfree(md);
+	return NULL;
+}
+
+/*
+ * mport_cdev_terminate_dma() - Stop all active DMA data transfers and release
+ *                              associated DMA channels.
+ */
+static void mport_cdev_terminate_dma(struct mport_dev *md)
+{
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct mport_cdev_priv *client;
+
+	rmcd_debug(DMA, "%s", dev_name(&md->dev));
+
+	mutex_lock(&md->file_mutex);
+	list_for_each_entry(client, &md->file_list, list) {
+		if (client->dmach) {
+			dmaengine_terminate_all(client->dmach);
+			rio_release_dma(client->dmach);
+		}
+	}
+	mutex_unlock(&md->file_mutex);
+
+	if (md->dma_chan) {
+		dmaengine_terminate_all(md->dma_chan);
+		rio_release_dma(md->dma_chan);
+		md->dma_chan = NULL;
+	}
+#endif
+}
+
+
+/*
+ * mport_cdev_kill_fasync() - Send SIGIO signal to all processes with open
+ *                            mport_cdev files.
+ */
+static int mport_cdev_kill_fasync(struct mport_dev *md)
+{
+	unsigned int files = 0;
+	struct mport_cdev_priv *client;
+
+	mutex_lock(&md->file_mutex);
+	list_for_each_entry(client, &md->file_list, list) {
+		if (client->async_queue)
+			kill_fasync(&client->async_queue, SIGIO, POLL_HUP);
+		files++;
+	}
+	mutex_unlock(&md->file_mutex);
+	return files;
+}
+
+/*
+ * mport_cdev_remove() - Remove mport character device
+ * @dev:	Mport device to remove
+ */
+static void mport_cdev_remove(struct mport_dev *md)
+{
+	struct rio_mport_mapping *map, *_map;
+
+	rmcd_debug(EXIT, "Remove %s cdev", md->mport->name);
+	atomic_set(&md->active, 0);
+	mport_cdev_terminate_dma(md);
+	rio_del_mport_pw_handler(md->mport, md, rio_mport_pw_handler);
+	cdev_del(&(md->cdev));
+	mport_cdev_kill_fasync(md);
+
+	flush_workqueue(dma_wq);
+
+	/* TODO: do we need to give clients some time to close file
+	 * descriptors? Simple wait for XX, or kref?
+	 */
+
+	/*
+	 * Release DMA buffers allocated for the mport device.
+	 * Disable associated inbound Rapidio requests mapping if applicable.
+	 */
+	mutex_lock(&md->buf_mutex);
+	list_for_each_entry_safe(map, _map, &md->mappings, node) {
+		kref_put(&map->ref, mport_release_mapping);
+	}
+	mutex_unlock(&md->buf_mutex);
+
+	if (!list_empty(&md->mappings))
+		rmcd_warn("WARNING: %s pending mappings on removal",
+			  md->mport->name);
+
+	rio_release_inb_dbell(md->mport, 0, 0x0fff);
+
+	device_unregister(&md->dev);
+	put_device(&md->dev);
+}
+
+/*
+ * RIO rio_mport_interface driver
+ */
+
+/*
+ * mport_add_mport() - Add rio_mport from LDM device struct
+ * @dev:		Linux device model struct
+ * @class_intf:	Linux class_interface
+ */
+static int mport_add_mport(struct device *dev,
+		struct class_interface *class_intf)
+{
+	struct rio_mport *mport = NULL;
+	struct mport_dev *chdev = NULL;
+
+	mport = to_rio_mport(dev);
+	if (!mport)
+		return -ENODEV;
+
+	chdev = mport_cdev_add(mport);
+	if (!chdev)
+		return -ENODEV;
+
+	return 0;
+}
+
+/*
+ * mport_remove_mport() - Remove rio_mport from global list
+ * TODO remove device from global mport_dev list
+ */
+static void mport_remove_mport(struct device *dev,
+		struct class_interface *class_intf)
+{
+	struct rio_mport *mport = NULL;
+	struct mport_dev *chdev;
+	int found = 0;
+
+	mport = to_rio_mport(dev);
+	rmcd_debug(EXIT, "Remove %s", mport->name);
+
+	mutex_lock(&mport_devs_lock);
+	list_for_each_entry(chdev, &mport_devs, node) {
+		if (chdev->mport->id == mport->id) {
+			atomic_set(&chdev->active, 0);
+			list_del(&chdev->node);
+			found = 1;
+			break;
+		}
+	}
+	mutex_unlock(&mport_devs_lock);
+
+	if (found)
+		mport_cdev_remove(chdev);
+}
+
+/* the rio_mport_interface is used to handle local mport devices */
+static struct class_interface rio_mport_interface __refdata = {
+	.class		= &rio_mport_class,
+	.add_dev	= mport_add_mport,
+	.remove_dev	= mport_remove_mport,
+};
+
+/*
+ * Linux kernel module
+ */
+
+/*
+ * mport_init - Driver module loading
+ */
+static int __init mport_init(void)
+{
+	int ret;
+
+	/* Create device class needed by udev */
+	dev_class = class_create(THIS_MODULE, DRV_NAME);
+	if (!dev_class) {
+		rmcd_error("Unable to create " DRV_NAME " class");
+		return -EINVAL;
+	}
+
+	ret = alloc_chrdev_region(&dev_number, 0, RIO_MAX_MPORTS, DRV_NAME);
+	if (ret < 0)
+		goto err_chr;
+
+	rmcd_debug(INIT, "Registered class with major=%d", MAJOR(dev_number));
+
+	/* Register to rio_mport_interface */
+	ret = class_interface_register(&rio_mport_interface);
+	if (ret) {
+		rmcd_error("class_interface_register() failed, err=%d", ret);
+		goto err_cli;
+	}
+
+	dma_wq = create_singlethread_workqueue("dma_wq");
+	if (!dma_wq) {
+		rmcd_error("failed to create DMA work queue");
+		ret = -ENOMEM;
+		goto err_wq;
+	}
+
+	return 0;
+
+err_wq:
+	class_interface_unregister(&rio_mport_interface);
+err_cli:
+	unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
+err_chr:
+	class_destroy(dev_class);
+	return ret;
+}
+
+/**
+ * mport_exit - Driver module unloading
+ */
+static void __exit mport_exit(void)
+{
+	class_interface_unregister(&rio_mport_interface);
+	class_destroy(dev_class);
+	unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
+	destroy_workqueue(dma_wq);
+}
+
+module_init(mport_init);
+module_exit(mport_exit);
diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
index eeca70ddbf61..b5b455614f8a 100644
--- a/drivers/rapidio/devices/tsi721.c
+++ b/drivers/rapidio/devices/tsi721.c
@@ -36,7 +36,11 @@
 
 #include "tsi721.h"
 
-#define DEBUG_PW	/* Inbound Port-Write debugging */
+#ifdef DEBUG
+u32 dbg_level = DBG_INIT | DBG_EXIT;
+module_param(dbg_level, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)");
+#endif
 
 static void tsi721_omsg_handler(struct tsi721_device *priv, int ch);
 static void tsi721_imsg_handler(struct tsi721_device *priv, int ch);
@@ -143,9 +147,9 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
 							& TSI721_DMAC_STS_RUN) {
 		udelay(1);
 		if (++i >= 5000000) {
-			dev_dbg(&priv->pdev->dev,
-				"%s : DMA[%d] read timeout ch_status=%x\n",
-				__func__, priv->mdma.ch_id, ch_stat);
+			tsi_debug(MAINT, &priv->pdev->dev,
+				"DMA[%d] read timeout ch_status=%x",
+				priv->mdma.ch_id, ch_stat);
 			if (!do_wr)
 				*data = 0xffffffff;
 			err = -EIO;
@@ -157,10 +161,12 @@ static int tsi721_maint_dma(struct tsi721_device *priv, u32 sys_size,
 		/* If DMA operation aborted due to error,
 		 * reinitialize DMA channel
 		 */
-		dev_dbg(&priv->pdev->dev, "%s : DMA ABORT ch_stat=%x\n",
-			__func__, ch_stat);
-		dev_dbg(&priv->pdev->dev, "OP=%d : destid=%x hc=%x off=%x\n",
-			do_wr ? MAINT_WR : MAINT_RD, destid, hopcount, offset);
+		tsi_debug(MAINT, &priv->pdev->dev, "DMA ABORT ch_stat=%x",
+			  ch_stat);
+		tsi_debug(MAINT, &priv->pdev->dev,
+			  "OP=%d : destid=%x hc=%x off=%x",
+			  do_wr ? MAINT_WR : MAINT_RD,
+			  destid, hopcount, offset);
 		iowrite32(TSI721_DMAC_INT_ALL, regs + TSI721_DMAC_INT);
 		iowrite32(TSI721_DMAC_CTL_INIT, regs + TSI721_DMAC_CTL);
 		udelay(10);
@@ -236,16 +242,15 @@ static int tsi721_cwrite_dma(struct rio_mport *mport, int index, u16 destid,
 
 /**
  * tsi721_pw_handler - Tsi721 inbound port-write interrupt handler
- * @mport: RapidIO master port structure
+ * @priv:  tsi721 device private structure
  *
  * Handles inbound port-write interrupts. Copies PW message from an internal
  * buffer into PW message FIFO and schedules deferred routine to process
  * queued messages.
  */
 static int
-tsi721_pw_handler(struct rio_mport *mport)
+tsi721_pw_handler(struct tsi721_device *priv)
 {
-	struct tsi721_device *priv = mport->priv;
 	u32 pw_stat;
 	u32 pw_buf[TSI721_RIO_PW_MSG_SIZE/sizeof(u32)];
 
@@ -283,30 +288,15 @@ static void tsi721_pw_dpc(struct work_struct *work)
 {
 	struct tsi721_device *priv = container_of(work, struct tsi721_device,
 						    pw_work);
-	u32 msg_buffer[RIO_PW_MSG_SIZE/sizeof(u32)]; /* Use full size PW message
-							buffer for RIO layer */
+	union rio_pw_msg pwmsg;
 
 	/*
 	 * Process port-write messages
 	 */
-	while (kfifo_out_spinlocked(&priv->pw_fifo, (unsigned char *)msg_buffer,
+	while (kfifo_out_spinlocked(&priv->pw_fifo, (unsigned char *)&pwmsg,
 			 TSI721_RIO_PW_MSG_SIZE, &priv->pw_fifo_lock)) {
-		/* Process one message */
-#ifdef DEBUG_PW
-		{
-		u32 i;
-		pr_debug("%s : Port-Write Message:", __func__);
-		for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32); ) {
-			pr_debug("0x%02x: %08x %08x %08x %08x", i*4,
-				msg_buffer[i], msg_buffer[i + 1],
-				msg_buffer[i + 2], msg_buffer[i + 3]);
-			i += 4;
-		}
-		pr_debug("\n");
-		}
-#endif
 		/* Pass the port-write message to RIO core for processing */
-		rio_inb_pwrite_handler((union rio_pw_msg *)msg_buffer);
+		rio_inb_pwrite_handler(&priv->mport, &pwmsg);
 	}
 }
 
@@ -354,8 +344,8 @@ static int tsi721_dsend(struct rio_mport *mport, int index,
 	offset = (((mport->sys_size) ? RIO_TT_CODE_16 : RIO_TT_CODE_8) << 18) |
 		 (destid << 2);
 
-	dev_dbg(&priv->pdev->dev,
-		"Send Doorbell 0x%04x to destID 0x%x\n", data, destid);
+	tsi_debug(DBELL, &priv->pdev->dev,
+		  "Send Doorbell 0x%04x to destID 0x%x", data, destid);
 	iowrite16be(data, priv->odb_base + offset);
 
 	return 0;
@@ -363,16 +353,15 @@ static int tsi721_dsend(struct rio_mport *mport, int index,
 
 /**
  * tsi721_dbell_handler - Tsi721 doorbell interrupt handler
- * @mport: RapidIO master port structure
+ * @priv: tsi721 device-specific data structure
  *
  * Handles inbound doorbell interrupts. Copies doorbell entry from an internal
  * buffer into DB message FIFO and schedules deferred  routine to process
  * queued DBs.
  */
 static int
-tsi721_dbell_handler(struct rio_mport *mport)
+tsi721_dbell_handler(struct tsi721_device *priv)
 {
-	struct tsi721_device *priv = mport->priv;
 	u32 regval;
 
 	/* Disable IDB interrupts */
@@ -404,7 +393,7 @@ static void tsi721_db_dpc(struct work_struct *work)
 	/*
 	 * Process queued inbound doorbells
 	 */
-	mport = priv->mport;
+	mport = &priv->mport;
 
 	wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE;
 	rd_ptr = ioread32(priv->regs + TSI721_IDQ_RP(IDB_QUEUE)) % IDB_QSIZE;
@@ -430,10 +419,10 @@ static void tsi721_db_dpc(struct work_struct *work)
 			dbell->dinb(mport, dbell->dev_id, DBELL_SID(idb.bytes),
 				    DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
 		} else {
-			dev_dbg(&priv->pdev->dev,
-				"spurious inb doorbell, sid %2.2x tid %2.2x"
-				" info %4.4x\n", DBELL_SID(idb.bytes),
-				DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));
+			tsi_debug(DBELL, &priv->pdev->dev,
+				  "spurious IDB sid %2.2x tid %2.2x info %4.4x",
+				  DBELL_SID(idb.bytes), DBELL_TID(idb.bytes),
+				  DBELL_INF(idb.bytes));
 		}
 
 		wr_ptr = ioread32(priv->regs +
@@ -457,15 +446,14 @@ static void tsi721_db_dpc(struct work_struct *work)
 /**
  * tsi721_irqhandler - Tsi721 interrupt handler
  * @irq: Linux interrupt number
- * @ptr: Pointer to interrupt-specific data (mport structure)
+ * @ptr: Pointer to interrupt-specific data (tsi721_device structure)
  *
  * Handles Tsi721 interrupts signaled using MSI and INTA. Checks reported
  * interrupt events and calls an event-specific handler(s).
  */
 static irqreturn_t tsi721_irqhandler(int irq, void *ptr)
 {
-	struct rio_mport *mport = (struct rio_mport *)ptr;
-	struct tsi721_device *priv = mport->priv;
+	struct tsi721_device *priv = (struct tsi721_device *)ptr;
 	u32 dev_int;
 	u32 dev_ch_int;
 	u32 intval;
@@ -488,10 +476,10 @@ static irqreturn_t tsi721_irqhandler(int irq, void *ptr)
 			intval = ioread32(priv->regs +
 						TSI721_SR_CHINT(IDB_QUEUE));
 			if (intval & TSI721_SR_CHINT_IDBQRCV)
-				tsi721_dbell_handler(mport);
+				tsi721_dbell_handler(priv);
 			else
-				dev_info(&priv->pdev->dev,
-					"Unsupported SR_CH_INT %x\n", intval);
+				tsi_info(&priv->pdev->dev,
+					"Unsupported SR_CH_INT %x", intval);
 
 			/* Clear interrupts */
 			iowrite32(intval,
@@ -545,7 +533,7 @@ static irqreturn_t tsi721_irqhandler(int irq, void *ptr)
 		/* Service SRIO MAC interrupts */
 		intval = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT);
 		if (intval & TSI721_RIO_EM_INT_STAT_PW_RX)
-			tsi721_pw_handler(mport);
+			tsi721_pw_handler(priv);
 	}
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
@@ -553,8 +541,8 @@ static irqreturn_t tsi721_irqhandler(int irq, void *ptr)
 		int ch;
 
 		if (dev_ch_int & TSI721_INT_BDMA_CHAN_M) {
-			dev_dbg(&priv->pdev->dev,
-				"IRQ from DMA channel 0x%08x\n", dev_ch_int);
+			tsi_debug(DMA, &priv->pdev->dev,
+				  "IRQ from DMA channel 0x%08x", dev_ch_int);
 
 			for (ch = 0; ch < TSI721_DMA_MAXCH; ch++) {
 				if (!(dev_ch_int & TSI721_INT_BDMA_CHAN(ch)))
@@ -613,13 +601,13 @@ static void tsi721_interrupts_init(struct tsi721_device *priv)
 /**
  * tsi721_omsg_msix - MSI-X interrupt handler for outbound messaging
  * @irq: Linux interrupt number
- * @ptr: Pointer to interrupt-specific data (mport structure)
+ * @ptr: Pointer to interrupt-specific data (tsi721_device structure)
  *
  * Handles outbound messaging interrupts signaled using MSI-X.
  */
 static irqreturn_t tsi721_omsg_msix(int irq, void *ptr)
 {
-	struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+	struct tsi721_device *priv = (struct tsi721_device *)ptr;
 	int mbox;
 
 	mbox = (irq - priv->msix[TSI721_VECT_OMB0_DONE].vector) % RIO_MAX_MBOX;
@@ -630,13 +618,13 @@ static irqreturn_t tsi721_omsg_msix(int irq, void *ptr)
 /**
  * tsi721_imsg_msix - MSI-X interrupt handler for inbound messaging
  * @irq: Linux interrupt number
- * @ptr: Pointer to interrupt-specific data (mport structure)
+ * @ptr: Pointer to interrupt-specific data (tsi721_device structure)
  *
  * Handles inbound messaging interrupts signaled using MSI-X.
  */
 static irqreturn_t tsi721_imsg_msix(int irq, void *ptr)
 {
-	struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+	struct tsi721_device *priv = (struct tsi721_device *)ptr;
 	int mbox;
 
 	mbox = (irq - priv->msix[TSI721_VECT_IMB0_RCV].vector) % RIO_MAX_MBOX;
@@ -647,19 +635,19 @@ static irqreturn_t tsi721_imsg_msix(int irq, void *ptr)
 /**
  * tsi721_srio_msix - Tsi721 MSI-X SRIO MAC interrupt handler
  * @irq: Linux interrupt number
- * @ptr: Pointer to interrupt-specific data (mport structure)
+ * @ptr: Pointer to interrupt-specific data (tsi721_device structure)
  *
  * Handles Tsi721 interrupts from SRIO MAC.
  */
 static irqreturn_t tsi721_srio_msix(int irq, void *ptr)
 {
-	struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+	struct tsi721_device *priv = (struct tsi721_device *)ptr;
 	u32 srio_int;
 
 	/* Service SRIO MAC interrupts */
 	srio_int = ioread32(priv->regs + TSI721_RIO_EM_INT_STAT);
 	if (srio_int & TSI721_RIO_EM_INT_STAT_PW_RX)
-		tsi721_pw_handler((struct rio_mport *)ptr);
+		tsi721_pw_handler(priv);
 
 	return IRQ_HANDLED;
 }
@@ -667,7 +655,7 @@ static irqreturn_t tsi721_srio_msix(int irq, void *ptr)
 /**
  * tsi721_sr2pc_ch_msix - Tsi721 MSI-X SR2PC Channel interrupt handler
  * @irq: Linux interrupt number
- * @ptr: Pointer to interrupt-specific data (mport structure)
+ * @ptr: Pointer to interrupt-specific data (tsi721_device structure)
  *
  * Handles Tsi721 interrupts from SR2PC Channel.
  * NOTE: At this moment services only one SR2PC channel associated with inbound
@@ -675,13 +663,13 @@ static irqreturn_t tsi721_srio_msix(int irq, void *ptr)
  */
 static irqreturn_t tsi721_sr2pc_ch_msix(int irq, void *ptr)
 {
-	struct tsi721_device *priv = ((struct rio_mport *)ptr)->priv;
+	struct tsi721_device *priv = (struct tsi721_device *)ptr;
 	u32 sr_ch_int;
 
 	/* Service Inbound DB interrupt from SR2PC channel */
 	sr_ch_int = ioread32(priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
 	if (sr_ch_int & TSI721_SR_CHINT_IDBQRCV)
-		tsi721_dbell_handler((struct rio_mport *)ptr);
+		tsi721_dbell_handler(priv);
 
 	/* Clear interrupts */
 	iowrite32(sr_ch_int, priv->regs + TSI721_SR_CHINT(IDB_QUEUE));
@@ -693,32 +681,31 @@ static irqreturn_t tsi721_sr2pc_ch_msix(int irq, void *ptr)
 
 /**
  * tsi721_request_msix - register interrupt service for MSI-X mode.
- * @mport: RapidIO master port structure
+ * @priv: tsi721 device-specific data structure
  *
  * Registers MSI-X interrupt service routines for interrupts that are active
  * immediately after mport initialization. Messaging interrupt service routines
  * should be registered during corresponding open requests.
  */
-static int tsi721_request_msix(struct rio_mport *mport)
+static int tsi721_request_msix(struct tsi721_device *priv)
 {
-	struct tsi721_device *priv = mport->priv;
 	int err = 0;
 
 	err = request_irq(priv->msix[TSI721_VECT_IDB].vector,
 			tsi721_sr2pc_ch_msix, 0,
-			priv->msix[TSI721_VECT_IDB].irq_name, (void *)mport);
+			priv->msix[TSI721_VECT_IDB].irq_name, (void *)priv);
 	if (err)
-		goto out;
+		return err;
 
 	err = request_irq(priv->msix[TSI721_VECT_PWRX].vector,
 			tsi721_srio_msix, 0,
-			priv->msix[TSI721_VECT_PWRX].irq_name, (void *)mport);
-	if (err)
-		free_irq(
-			priv->msix[TSI721_VECT_IDB].vector,
-			(void *)mport);
-out:
-	return err;
+			priv->msix[TSI721_VECT_PWRX].irq_name, (void *)priv);
+	if (err) {
+		free_irq(priv->msix[TSI721_VECT_IDB].vector, (void *)priv);
+		return err;
+	}
+
+	return 0;
 }
 
 /**
@@ -770,8 +757,8 @@ static int tsi721_enable_msix(struct tsi721_device *priv)
 
 	err = pci_enable_msix_exact(priv->pdev, entries, ARRAY_SIZE(entries));
 	if (err) {
-		dev_err(&priv->pdev->dev,
-			"Failed to enable MSI-X (err=%d)\n", err);
+		tsi_err(&priv->pdev->dev,
+			"Failed to enable MSI-X (err=%d)", err);
 		return err;
 	}
 
@@ -831,27 +818,209 @@ static int tsi721_enable_msix(struct tsi721_device *priv)
 }
 #endif /* CONFIG_PCI_MSI */
 
-static int tsi721_request_irq(struct rio_mport *mport)
+static int tsi721_request_irq(struct tsi721_device *priv)
 {
-	struct tsi721_device *priv = mport->priv;
 	int err;
 
 #ifdef CONFIG_PCI_MSI
 	if (priv->flags & TSI721_USING_MSIX)
-		err = tsi721_request_msix(mport);
+		err = tsi721_request_msix(priv);
 	else
 #endif
 		err = request_irq(priv->pdev->irq, tsi721_irqhandler,
 			  (priv->flags & TSI721_USING_MSI) ? 0 : IRQF_SHARED,
-			  DRV_NAME, (void *)mport);
+			  DRV_NAME, (void *)priv);
 
 	if (err)
-		dev_err(&priv->pdev->dev,
-			"Unable to allocate interrupt, Error: %d\n", err);
+		tsi_err(&priv->pdev->dev,
+			"Unable to allocate interrupt, err=%d", err);
 
 	return err;
 }
 
+static void tsi721_free_irq(struct tsi721_device *priv)
+{
+#ifdef CONFIG_PCI_MSI
+	if (priv->flags & TSI721_USING_MSIX) {
+		free_irq(priv->msix[TSI721_VECT_IDB].vector, (void *)priv);
+		free_irq(priv->msix[TSI721_VECT_PWRX].vector, (void *)priv);
+	} else
+#endif
+	free_irq(priv->pdev->irq, (void *)priv);
+}
+
+static int
+tsi721_obw_alloc(struct tsi721_device *priv, struct tsi721_obw_bar *pbar,
+		 u32 size, int *win_id)
+{
+	u64 win_base;
+	u64 bar_base;
+	u64 bar_end;
+	u32 align;
+	struct tsi721_ob_win *win;
+	struct tsi721_ob_win *new_win = NULL;
+	int new_win_idx = -1;
+	int i = 0;
+
+	bar_base = pbar->base;
+	bar_end =  bar_base + pbar->size;
+	win_base = bar_base;
+	align = size/TSI721_PC2SR_ZONES;
+
+	while (i < TSI721_IBWIN_NUM) {
+		for (i = 0; i < TSI721_IBWIN_NUM; i++) {
+			if (!priv->ob_win[i].active) {
+				if (new_win == NULL) {
+					new_win = &priv->ob_win[i];
+					new_win_idx = i;
+				}
+				continue;
+			}
+
+			/*
+			 * If this window belongs to the current BAR check it
+			 * for overlap
+			 */
+			win = &priv->ob_win[i];
+
+			if (win->base >= bar_base && win->base < bar_end) {
+				if (win_base < (win->base + win->size) &&
+						(win_base + size) > win->base) {
+					/* Overlap detected */
+					win_base = win->base + win->size;
+					win_base = ALIGN(win_base, align);
+					break;
+				}
+			}
+		}
+	}
+
+	if (win_base + size > bar_end)
+		return -ENOMEM;
+
+	if (!new_win) {
+		tsi_err(&priv->pdev->dev, "OBW count tracking failed");
+		return -EIO;
+	}
+
+	new_win->active = true;
+	new_win->base = win_base;
+	new_win->size = size;
+	new_win->pbar = pbar;
+	priv->obwin_cnt--;
+	pbar->free -= size;
+	*win_id = new_win_idx;
+	return 0;
+}
+
+static int tsi721_map_outb_win(struct rio_mport *mport, u16 destid, u64 rstart,
+			u32 size, u32 flags, dma_addr_t *laddr)
+{
+	struct tsi721_device *priv = mport->priv;
+	int i;
+	struct tsi721_obw_bar *pbar;
+	struct tsi721_ob_win *ob_win;
+	int obw = -1;
+	u32 rval;
+	u64 rio_addr;
+	u32 zsize;
+	int ret = -ENOMEM;
+
+	tsi_debug(OBW, &priv->pdev->dev,
+		  "did=%d ra=0x%llx sz=0x%x", destid, rstart, size);
+
+	if (!is_power_of_2(size) || (size < 0x8000) || (rstart & (size - 1)))
+		return -EINVAL;
+
+	if (priv->obwin_cnt == 0)
+		return -EBUSY;
+
+	for (i = 0; i < 2; i++) {
+		if (priv->p2r_bar[i].free >= size) {
+			pbar = &priv->p2r_bar[i];
+			ret = tsi721_obw_alloc(priv, pbar, size, &obw);
+			if (!ret)
+				break;
+		}
+	}
+
+	if (ret)
+		return ret;
+
+	WARN_ON(obw == -1);
+	ob_win = &priv->ob_win[obw];
+	ob_win->destid = destid;
+	ob_win->rstart = rstart;
+	tsi_debug(OBW, &priv->pdev->dev,
+		  "allocated OBW%d @%llx", obw, ob_win->base);
+
+	/*
+	 * Configure Outbound Window
+	 */
+
+	zsize = size/TSI721_PC2SR_ZONES;
+	rio_addr = rstart;
+
+	/*
+	 * Program Address Translation Zones:
+	 *  This implementation uses all 8 zones associated wit window.
+	 */
+	for (i = 0; i < TSI721_PC2SR_ZONES; i++) {
+
+		while (ioread32(priv->regs + TSI721_ZONE_SEL) &
+			TSI721_ZONE_SEL_GO) {
+			udelay(1);
+		}
+
+		rval = (u32)(rio_addr & TSI721_LUT_DATA0_ADD) |
+			TSI721_LUT_DATA0_NREAD | TSI721_LUT_DATA0_NWR;
+		iowrite32(rval, priv->regs + TSI721_LUT_DATA0);
+		rval = (u32)(rio_addr >> 32);
+		iowrite32(rval, priv->regs + TSI721_LUT_DATA1);
+		rval = destid;
+		iowrite32(rval, priv->regs + TSI721_LUT_DATA2);
+
+		rval = TSI721_ZONE_SEL_GO | (obw << 3) | i;
+		iowrite32(rval, priv->regs + TSI721_ZONE_SEL);
+
+		rio_addr += zsize;
+	}
+
+	iowrite32(TSI721_OBWIN_SIZE(size) << 8,
+		  priv->regs + TSI721_OBWINSZ(obw));
+	iowrite32((u32)(ob_win->base >> 32), priv->regs + TSI721_OBWINUB(obw));
+	iowrite32((u32)(ob_win->base & TSI721_OBWINLB_BA) | TSI721_OBWINLB_WEN,
+		  priv->regs + TSI721_OBWINLB(obw));
+
+	*laddr = ob_win->base;
+	return 0;
+}
+
+static void tsi721_unmap_outb_win(struct rio_mport *mport,
+				  u16 destid, u64 rstart)
+{
+	struct tsi721_device *priv = mport->priv;
+	struct tsi721_ob_win *ob_win;
+	int i;
+
+	tsi_debug(OBW, &priv->pdev->dev, "did=%d ra=0x%llx", destid, rstart);
+
+	for (i = 0; i < TSI721_OBWIN_NUM; i++) {
+		ob_win = &priv->ob_win[i];
+
+		if (ob_win->active &&
+		    ob_win->destid == destid && ob_win->rstart == rstart) {
+			tsi_debug(OBW, &priv->pdev->dev,
+				  "free OBW%d @%llx", i, ob_win->base);
+			ob_win->active = false;
+			iowrite32(0, priv->regs + TSI721_OBWINLB(i));
+			ob_win->pbar->free += ob_win->size;
+			priv->obwin_cnt++;
+			break;
+		}
+	}
+}
+
 /**
  * tsi721_init_pc2sr_mapping - initializes outbound (PCIe->SRIO)
  * translation regions.
@@ -861,11 +1030,41 @@ static int tsi721_request_irq(struct rio_mport *mport)
  */
 static void tsi721_init_pc2sr_mapping(struct tsi721_device *priv)
 {
-	int i;
+	int i, z;
+	u32 rval;
 
 	/* Disable all PC2SR translation windows */
 	for (i = 0; i < TSI721_OBWIN_NUM; i++)
 		iowrite32(0, priv->regs + TSI721_OBWINLB(i));
+
+	/* Initialize zone lookup tables to avoid ECC errors on reads */
+	iowrite32(0, priv->regs + TSI721_LUT_DATA0);
+	iowrite32(0, priv->regs + TSI721_LUT_DATA1);
+	iowrite32(0, priv->regs + TSI721_LUT_DATA2);
+
+	for (i = 0; i < TSI721_OBWIN_NUM; i++) {
+		for (z = 0; z < TSI721_PC2SR_ZONES; z++) {
+			while (ioread32(priv->regs + TSI721_ZONE_SEL) &
+				TSI721_ZONE_SEL_GO) {
+				udelay(1);
+			}
+			rval = TSI721_ZONE_SEL_GO | (i << 3) | z;
+			iowrite32(rval, priv->regs + TSI721_ZONE_SEL);
+		}
+	}
+
+	if (priv->p2r_bar[0].size == 0 && priv->p2r_bar[1].size == 0) {
+		priv->obwin_cnt = 0;
+		return;
+	}
+
+	priv->p2r_bar[0].free = priv->p2r_bar[0].size;
+	priv->p2r_bar[1].free = priv->p2r_bar[1].size;
+
+	for (i = 0; i < TSI721_OBWIN_NUM; i++)
+		priv->ob_win[i].active = false;
+
+	priv->obwin_cnt = TSI721_OBWIN_NUM;
 }
 
 /**
@@ -885,45 +1084,148 @@ static int tsi721_rio_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart,
 		u64 rstart, u32 size, u32 flags)
 {
 	struct tsi721_device *priv = mport->priv;
-	int i;
+	int i, avail = -1;
 	u32 regval;
+	struct tsi721_ib_win *ib_win;
+	bool direct = (lstart == rstart);
+	u64 ibw_size;
+	dma_addr_t loc_start;
+	u64 ibw_start;
+	struct tsi721_ib_win_mapping *map = NULL;
+	int ret = -EBUSY;
+
+	if (direct) {
+		/* Calculate minimal acceptable window size and base address */
+
+		ibw_size = roundup_pow_of_two(size);
+		ibw_start = lstart & ~(ibw_size - 1);
+
+		tsi_debug(IBW, &priv->pdev->dev,
+			"Direct (RIO_0x%llx -> PCIe_0x%pad), size=0x%x, ibw_start = 0x%llx",
+			rstart, &lstart, size, ibw_start);
+
+		while ((lstart + size) > (ibw_start + ibw_size)) {
+			ibw_size *= 2;
+			ibw_start = lstart & ~(ibw_size - 1);
+			if (ibw_size > 0x80000000) { /* Limit max size to 2GB */
+				return -EBUSY;
+			}
+		}
 
-	if (!is_power_of_2(size) || size < 0x1000 ||
-	    ((u64)lstart & (size - 1)) || (rstart & (size - 1)))
-		return -EINVAL;
+		loc_start = ibw_start;
+
+		map = kzalloc(sizeof(struct tsi721_ib_win_mapping), GFP_ATOMIC);
+		if (map == NULL)
+			return -ENOMEM;
+
+	} else {
+		tsi_debug(IBW, &priv->pdev->dev,
+			"Translated (RIO_0x%llx -> PCIe_0x%pad), size=0x%x",
+			rstart, &lstart, size);
+
+		if (!is_power_of_2(size) || size < 0x1000 ||
+		    ((u64)lstart & (size - 1)) || (rstart & (size - 1)))
+			return -EINVAL;
+		if (priv->ibwin_cnt == 0)
+			return -EBUSY;
+		ibw_start = rstart;
+		ibw_size = size;
+		loc_start = lstart;
+	}
 
-	/* Search for free inbound translation window */
+	/*
+	 * Scan for overlapping with active regions and mark the first available
+	 * IB window at the same time.
+	 */
 	for (i = 0; i < TSI721_IBWIN_NUM; i++) {
-		regval = ioread32(priv->regs + TSI721_IBWIN_LB(i));
-		if (!(regval & TSI721_IBWIN_LB_WEN))
+		ib_win = &priv->ib_win[i];
+
+		if (!ib_win->active) {
+			if (avail == -1) {
+				avail = i;
+				ret = 0;
+			}
+		} else if (ibw_start < (ib_win->rstart + ib_win->size) &&
+			   (ibw_start + ibw_size) > ib_win->rstart) {
+			/* Return error if address translation involved */
+			if (direct && ib_win->xlat) {
+				ret = -EFAULT;
+				break;
+			}
+
+			/*
+			 * Direct mappings usually are larger than originally
+			 * requested fragments - check if this new request fits
+			 * into it.
+			 */
+			if (rstart >= ib_win->rstart &&
+			    (rstart + size) <= (ib_win->rstart +
+							ib_win->size)) {
+				/* We are in - no further mapping required */
+				map->lstart = lstart;
+				list_add_tail(&map->node, &ib_win->mappings);
+				return 0;
+			}
+
+			ret = -EFAULT;
 			break;
+		}
 	}
 
-	if (i >= TSI721_IBWIN_NUM) {
-		dev_err(&priv->pdev->dev,
-			"Unable to find free inbound window\n");
-		return -EBUSY;
+	if (ret)
+		goto out;
+	i = avail;
+
+	/* Sanity check: available IB window must be disabled at this point */
+	regval = ioread32(priv->regs + TSI721_IBWIN_LB(i));
+	if (WARN_ON(regval & TSI721_IBWIN_LB_WEN)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	ib_win = &priv->ib_win[i];
+	ib_win->active = true;
+	ib_win->rstart = ibw_start;
+	ib_win->lstart = loc_start;
+	ib_win->size = ibw_size;
+	ib_win->xlat = (lstart != rstart);
+	INIT_LIST_HEAD(&ib_win->mappings);
+
+	/*
+	 * When using direct IBW mapping and have larger than requested IBW size
+	 * we can have multiple local memory blocks mapped through the same IBW
+	 * To handle this situation we maintain list of "clients" for such IBWs.
+	 */
+	if (direct) {
+		map->lstart = lstart;
+		list_add_tail(&map->node, &ib_win->mappings);
 	}
 
-	iowrite32(TSI721_IBWIN_SIZE(size) << 8,
+	iowrite32(TSI721_IBWIN_SIZE(ibw_size) << 8,
 			priv->regs + TSI721_IBWIN_SZ(i));
 
-	iowrite32(((u64)lstart >> 32), priv->regs + TSI721_IBWIN_TUA(i));
-	iowrite32(((u64)lstart & TSI721_IBWIN_TLA_ADD),
+	iowrite32(((u64)loc_start >> 32), priv->regs + TSI721_IBWIN_TUA(i));
+	iowrite32(((u64)loc_start & TSI721_IBWIN_TLA_ADD),
 		  priv->regs + TSI721_IBWIN_TLA(i));
 
-	iowrite32(rstart >> 32, priv->regs + TSI721_IBWIN_UB(i));
-	iowrite32((rstart & TSI721_IBWIN_LB_BA) | TSI721_IBWIN_LB_WEN,
+	iowrite32(ibw_start >> 32, priv->regs + TSI721_IBWIN_UB(i));
+	iowrite32((ibw_start & TSI721_IBWIN_LB_BA) | TSI721_IBWIN_LB_WEN,
 		priv->regs + TSI721_IBWIN_LB(i));
-	dev_dbg(&priv->pdev->dev,
-		"Configured IBWIN%d mapping (RIO_0x%llx -> PCIe_0x%llx)\n",
-		i, rstart, (unsigned long long)lstart);
+
+	priv->ibwin_cnt--;
+
+	tsi_debug(IBW, &priv->pdev->dev,
+		"Configured IBWIN%d (RIO_0x%llx -> PCIe_0x%pad), size=0x%llx",
+		i, ibw_start, &loc_start, ibw_size);
 
 	return 0;
+out:
+	kfree(map);
+	return ret;
 }
 
 /**
- * fsl_rio_unmap_inb_mem -- Unmapping inbound memory region.
+ * tsi721_rio_unmap_inb_mem -- Unmapping inbound memory region.
  * @mport: RapidIO master port
  * @lstart: Local memory space start address.
  */
@@ -931,25 +1233,56 @@ static void tsi721_rio_unmap_inb_mem(struct rio_mport *mport,
 				dma_addr_t lstart)
 {
 	struct tsi721_device *priv = mport->priv;
+	struct tsi721_ib_win *ib_win;
 	int i;
-	u64 addr;
-	u32 regval;
+
+	tsi_debug(IBW, &priv->pdev->dev,
+		"Unmap IBW mapped to PCIe_0x%pad", &lstart);
 
 	/* Search for matching active inbound translation window */
 	for (i = 0; i < TSI721_IBWIN_NUM; i++) {
-		regval = ioread32(priv->regs + TSI721_IBWIN_LB(i));
-		if (regval & TSI721_IBWIN_LB_WEN) {
-			regval = ioread32(priv->regs + TSI721_IBWIN_TUA(i));
-			addr = (u64)regval << 32;
-			regval = ioread32(priv->regs + TSI721_IBWIN_TLA(i));
-			addr |= regval & TSI721_IBWIN_TLA_ADD;
-
-			if (addr == (u64)lstart) {
-				iowrite32(0, priv->regs + TSI721_IBWIN_LB(i));
-				break;
+		ib_win = &priv->ib_win[i];
+
+		/* Address translating IBWs must to be an exact march */
+		if (!ib_win->active ||
+		    (ib_win->xlat && lstart != ib_win->lstart))
+			continue;
+
+		if (lstart >= ib_win->lstart &&
+		    lstart < (ib_win->lstart + ib_win->size)) {
+
+			if (!ib_win->xlat) {
+				struct tsi721_ib_win_mapping *map;
+				int found = 0;
+
+				list_for_each_entry(map,
+						    &ib_win->mappings, node) {
+					if (map->lstart == lstart) {
+						list_del(&map->node);
+						kfree(map);
+						found = 1;
+						break;
+					}
+				}
+
+				if (!found)
+					continue;
+
+				if (!list_empty(&ib_win->mappings))
+					break;
 			}
+
+			tsi_debug(IBW, &priv->pdev->dev, "Disable IBWIN_%d", i);
+			iowrite32(0, priv->regs + TSI721_IBWIN_LB(i));
+			ib_win->active = false;
+			priv->ibwin_cnt++;
+			break;
 		}
 	}
+
+	if (i == TSI721_IBWIN_NUM)
+		tsi_debug(IBW, &priv->pdev->dev,
+			"IB window mapped to %pad not found", &lstart);
 }
 
 /**
@@ -966,6 +1299,27 @@ static void tsi721_init_sr2pc_mapping(struct tsi721_device *priv)
 	/* Disable all SR2PC inbound windows */
 	for (i = 0; i < TSI721_IBWIN_NUM; i++)
 		iowrite32(0, priv->regs + TSI721_IBWIN_LB(i));
+	priv->ibwin_cnt = TSI721_IBWIN_NUM;
+}
+
+/*
+ * tsi721_close_sr2pc_mapping - closes all active inbound (SRIO->PCIe)
+ * translation regions.
+ * @priv: pointer to tsi721 device private data
+ */
+static void tsi721_close_sr2pc_mapping(struct tsi721_device *priv)
+{
+	struct tsi721_ib_win *ib_win;
+	int i;
+
+	/* Disable all active SR2PC inbound windows */
+	for (i = 0; i < TSI721_IBWIN_NUM; i++) {
+		ib_win = &priv->ib_win[i];
+		if (ib_win->active) {
+			iowrite32(0, priv->regs + TSI721_IBWIN_LB(i));
+			ib_win->active = false;
+		}
+	}
 }
 
 /**
@@ -982,7 +1336,7 @@ static int tsi721_port_write_init(struct tsi721_device *priv)
 	spin_lock_init(&priv->pw_fifo_lock);
 	if (kfifo_alloc(&priv->pw_fifo,
 			TSI721_RIO_PW_MSG_SIZE * 32, GFP_KERNEL)) {
-		dev_err(&priv->pdev->dev, "PW FIFO allocation failed\n");
+		tsi_err(&priv->pdev->dev, "PW FIFO allocation failed");
 		return -ENOMEM;
 	}
 
@@ -991,6 +1345,11 @@ static int tsi721_port_write_init(struct tsi721_device *priv)
 	return 0;
 }
 
+static void tsi721_port_write_free(struct tsi721_device *priv)
+{
+	kfifo_free(&priv->pw_fifo);
+}
+
 static int tsi721_doorbell_init(struct tsi721_device *priv)
 {
 	/* Outbound Doorbells do not require any setup.
@@ -1009,8 +1368,9 @@ static int tsi721_doorbell_init(struct tsi721_device *priv)
 	if (!priv->idb_base)
 		return -ENOMEM;
 
-	dev_dbg(&priv->pdev->dev, "Allocated IDB buffer @ %p (phys = %llx)\n",
-		priv->idb_base, (unsigned long long)priv->idb_dma);
+	tsi_debug(DBELL, &priv->pdev->dev,
+		  "Allocated IDB buffer @ %p (phys = %pad)",
+		  priv->idb_base, &priv->idb_dma);
 
 	iowrite32(TSI721_IDQ_SIZE_VAL(IDB_QSIZE),
 		priv->regs + TSI721_IDQ_SIZE(IDB_QUEUE));
@@ -1056,9 +1416,8 @@ static int tsi721_bdma_maint_init(struct tsi721_device *priv)
 	int		bd_num = 2;
 	void __iomem	*regs;
 
-	dev_dbg(&priv->pdev->dev,
-		"Init Block DMA Engine for Maintenance requests, CH%d\n",
-		TSI721_DMACH_MAINT);
+	tsi_debug(MAINT, &priv->pdev->dev,
+		  "Init BDMA_%d Maintenance requests", TSI721_DMACH_MAINT);
 
 	/*
 	 * Initialize DMA channel for maintenance requests
@@ -1078,8 +1437,8 @@ static int tsi721_bdma_maint_init(struct tsi721_device *priv)
 	priv->mdma.bd_phys = bd_phys;
 	priv->mdma.bd_base = bd_ptr;
 
-	dev_dbg(&priv->pdev->dev, "DMA descriptors @ %p (phys = %llx)\n",
-		bd_ptr, (unsigned long long)bd_phys);
+	tsi_debug(MAINT, &priv->pdev->dev, "DMA descriptors @ %p (phys = %pad)",
+		  bd_ptr, &bd_phys);
 
 	/* Allocate space for descriptor status FIFO */
 	sts_size = (bd_num >= TSI721_DMA_MINSTSSZ) ?
@@ -1101,9 +1460,9 @@ static int tsi721_bdma_maint_init(struct tsi721_device *priv)
 	priv->mdma.sts_base = sts_ptr;
 	priv->mdma.sts_size = sts_size;
 
-	dev_dbg(&priv->pdev->dev,
-		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
-		sts_ptr, (unsigned long long)sts_phys, sts_size);
+	tsi_debug(MAINT, &priv->pdev->dev,
+		"desc status FIFO @ %p (phys = %pad) size=0x%x",
+		sts_ptr, &sts_phys, sts_size);
 
 	/* Initialize DMA descriptors ring */
 	bd_ptr[bd_num - 1].type_id = cpu_to_le32(DTYPE3 << 29);
@@ -1304,11 +1663,14 @@ tsi721_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
 	struct tsi721_device *priv = mport->priv;
 	struct tsi721_omsg_desc *desc;
 	u32 tx_slot;
+	unsigned long flags;
 
 	if (!priv->omsg_init[mbox] ||
 	    len > TSI721_MSG_MAX_SIZE || len < 8)
 		return -EINVAL;
 
+	spin_lock_irqsave(&priv->omsg_ring[mbox].lock, flags);
+
 	tx_slot = priv->omsg_ring[mbox].tx_slot;
 
 	/* Copy copy message into transfer buffer */
@@ -1320,9 +1682,11 @@ tsi721_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
 	/* Build descriptor associated with buffer */
 	desc = priv->omsg_ring[mbox].omd_base;
 	desc[tx_slot].type_id = cpu_to_le32((DTYPE4 << 29) | rdev->destid);
+#ifdef TSI721_OMSG_DESC_INT
+	/* Request IOF_DONE interrupt generation for each N-th frame in queue */
 	if (tx_slot % 4 == 0)
 		desc[tx_slot].type_id |= cpu_to_le32(TSI721_OMD_IOF);
-
+#endif
 	desc[tx_slot].msg_info =
 		cpu_to_le32((mport->sys_size << 26) | (mbox << 22) |
 			    (0xe << 12) | (len & 0xff8));
@@ -1348,6 +1712,8 @@ tsi721_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
 		priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
 	ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
 
+	spin_unlock_irqrestore(&priv->omsg_ring[mbox].lock, flags);
+
 	return 0;
 }
 
@@ -1361,20 +1727,23 @@ tsi721_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
 static void tsi721_omsg_handler(struct tsi721_device *priv, int ch)
 {
 	u32 omsg_int;
+	struct rio_mport *mport = &priv->mport;
+	void *dev_id = NULL;
+	u32 tx_slot = 0xffffffff;
+	int do_callback = 0;
 
 	spin_lock(&priv->omsg_ring[ch].lock);
 
 	omsg_int = ioread32(priv->regs + TSI721_OBDMAC_INT(ch));
 
 	if (omsg_int & TSI721_OBDMAC_INT_ST_FULL)
-		dev_info(&priv->pdev->dev,
-			"OB MBOX%d: Status FIFO is full\n", ch);
+		tsi_info(&priv->pdev->dev,
+			"OB MBOX%d: Status FIFO is full", ch);
 
 	if (omsg_int & (TSI721_OBDMAC_INT_DONE | TSI721_OBDMAC_INT_IOF_DONE)) {
 		u32 srd_ptr;
 		u64 *sts_ptr, last_ptr = 0, prev_ptr = 0;
 		int i, j;
-		u32 tx_slot;
 
 		/*
 		 * Find last successfully processed descriptor
@@ -1402,7 +1771,7 @@ static void tsi721_omsg_handler(struct tsi721_device *priv, int ch)
 		priv->omsg_ring[ch].sts_rdptr = srd_ptr;
 		iowrite32(srd_ptr, priv->regs + TSI721_OBDMAC_DSRP(ch));
 
-		if (!priv->mport->outb_msg[ch].mcback)
+		if (!mport->outb_msg[ch].mcback)
 			goto no_sts_update;
 
 		/* Inform upper layer about transfer completion */
@@ -1424,14 +1793,19 @@ static void tsi721_omsg_handler(struct tsi721_device *priv, int ch)
 				goto no_sts_update;
 		}
 
+		if (tx_slot >= priv->omsg_ring[ch].size)
+			tsi_debug(OMSG, &priv->pdev->dev,
+				  "OB_MSG tx_slot=%x > size=%x",
+				  tx_slot, priv->omsg_ring[ch].size);
+		WARN_ON(tx_slot >= priv->omsg_ring[ch].size);
+
 		/* Move slot index to the next message to be sent */
 		++tx_slot;
 		if (tx_slot == priv->omsg_ring[ch].size)
 			tx_slot = 0;
-		BUG_ON(tx_slot >= priv->omsg_ring[ch].size);
-		priv->mport->outb_msg[ch].mcback(priv->mport,
-				priv->omsg_ring[ch].dev_id, ch,
-				tx_slot);
+
+		dev_id = priv->omsg_ring[ch].dev_id;
+		do_callback = 1;
 	}
 
 no_sts_update:
@@ -1442,20 +1816,20 @@ no_sts_update:
 		* reinitialize OB MSG channel
 		*/
 
-		dev_dbg(&priv->pdev->dev, "OB MSG ABORT ch_stat=%x\n",
-			ioread32(priv->regs + TSI721_OBDMAC_STS(ch)));
+		tsi_debug(OMSG, &priv->pdev->dev, "OB MSG ABORT ch_stat=%x",
+			  ioread32(priv->regs + TSI721_OBDMAC_STS(ch)));
 
 		iowrite32(TSI721_OBDMAC_INT_ERROR,
 				priv->regs + TSI721_OBDMAC_INT(ch));
-		iowrite32(TSI721_OBDMAC_CTL_INIT,
+		iowrite32(TSI721_OBDMAC_CTL_RETRY_THR | TSI721_OBDMAC_CTL_INIT,
 				priv->regs + TSI721_OBDMAC_CTL(ch));
 		ioread32(priv->regs + TSI721_OBDMAC_CTL(ch));
 
 		/* Inform upper level to clear all pending tx slots */
-		if (priv->mport->outb_msg[ch].mcback)
-			priv->mport->outb_msg[ch].mcback(priv->mport,
-					priv->omsg_ring[ch].dev_id, ch,
-					priv->omsg_ring[ch].tx_slot);
+		dev_id = priv->omsg_ring[ch].dev_id;
+		tx_slot = priv->omsg_ring[ch].tx_slot;
+		do_callback = 1;
+
 		/* Synch tx_slot tracking */
 		iowrite32(priv->omsg_ring[ch].tx_slot,
 			priv->regs + TSI721_OBDMAC_DRDCNT(ch));
@@ -1477,6 +1851,9 @@ no_sts_update:
 	}
 
 	spin_unlock(&priv->omsg_ring[ch].lock);
+
+	if (mport->outb_msg[ch].mcback && do_callback)
+		mport->outb_msg[ch].mcback(mport, dev_id, ch, tx_slot);
 }
 
 /**
@@ -1514,9 +1891,8 @@ static int tsi721_open_outb_mbox(struct rio_mport *mport, void *dev_id,
 				&priv->omsg_ring[mbox].omq_phys[i],
 				GFP_KERNEL);
 		if (priv->omsg_ring[mbox].omq_base[i] == NULL) {
-			dev_dbg(&priv->pdev->dev,
-				"Unable to allocate OB MSG data buffer for"
-				" MBOX%d\n", mbox);
+			tsi_debug(OMSG, &priv->pdev->dev,
+				  "ENOMEM for OB_MSG_%d data buffer", mbox);
 			rc = -ENOMEM;
 			goto out_buf;
 		}
@@ -1528,9 +1904,8 @@ static int tsi721_open_outb_mbox(struct rio_mport *mport, void *dev_id,
 				(entries + 1) * sizeof(struct tsi721_omsg_desc),
 				&priv->omsg_ring[mbox].omd_phys, GFP_KERNEL);
 	if (priv->omsg_ring[mbox].omd_base == NULL) {
-		dev_dbg(&priv->pdev->dev,
-			"Unable to allocate OB MSG descriptor memory "
-			"for MBOX%d\n", mbox);
+		tsi_debug(OMSG, &priv->pdev->dev,
+			"ENOMEM for OB_MSG_%d descriptor memory", mbox);
 		rc = -ENOMEM;
 		goto out_buf;
 	}
@@ -1544,9 +1919,8 @@ static int tsi721_open_outb_mbox(struct rio_mport *mport, void *dev_id,
 						sizeof(struct tsi721_dma_sts),
 			&priv->omsg_ring[mbox].sts_phys, GFP_KERNEL);
 	if (priv->omsg_ring[mbox].sts_base == NULL) {
-		dev_dbg(&priv->pdev->dev,
-			"Unable to allocate OB MSG descriptor status FIFO "
-			"for MBOX%d\n", mbox);
+		tsi_debug(OMSG, &priv->pdev->dev,
+			"ENOMEM for OB_MSG_%d status FIFO", mbox);
 		rc = -ENOMEM;
 		goto out_desc;
 	}
@@ -1575,32 +1949,28 @@ static int tsi721_open_outb_mbox(struct rio_mport *mport, void *dev_id,
 
 #ifdef CONFIG_PCI_MSI
 	if (priv->flags & TSI721_USING_MSIX) {
+		int idx = TSI721_VECT_OMB0_DONE + mbox;
+
 		/* Request interrupt service if we are in MSI-X mode */
-		rc = request_irq(
-			priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
-			tsi721_omsg_msix, 0,
-			priv->msix[TSI721_VECT_OMB0_DONE + mbox].irq_name,
-			(void *)mport);
+		rc = request_irq(priv->msix[idx].vector, tsi721_omsg_msix, 0,
+				 priv->msix[idx].irq_name, (void *)priv);
 
 		if (rc) {
-			dev_dbg(&priv->pdev->dev,
-				"Unable to allocate MSI-X interrupt for "
-				"OBOX%d-DONE\n", mbox);
+			tsi_debug(OMSG, &priv->pdev->dev,
+				"Unable to get MSI-X IRQ for OBOX%d-DONE",
+				mbox);
 			goto out_stat;
 		}
 
-		rc = request_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector,
-			tsi721_omsg_msix, 0,
-			priv->msix[TSI721_VECT_OMB0_INT + mbox].irq_name,
-			(void *)mport);
+		idx = TSI721_VECT_OMB0_INT + mbox;
+		rc = request_irq(priv->msix[idx].vector, tsi721_omsg_msix, 0,
+				 priv->msix[idx].irq_name, (void *)priv);
 
 		if (rc)	{
-			dev_dbg(&priv->pdev->dev,
-				"Unable to allocate MSI-X interrupt for "
-				"MBOX%d-INT\n", mbox);
-			free_irq(
-				priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
-				(void *)mport);
+			tsi_debug(OMSG, &priv->pdev->dev,
+				"Unable to get MSI-X IRQ for MBOX%d-INT", mbox);
+			idx = TSI721_VECT_OMB0_DONE + mbox;
+			free_irq(priv->msix[idx].vector, (void *)priv);
 			goto out_stat;
 		}
 	}
@@ -1621,7 +1991,8 @@ static int tsi721_open_outb_mbox(struct rio_mport *mport, void *dev_id,
 	mb();
 
 	/* Initialize Outbound Message engine */
-	iowrite32(TSI721_OBDMAC_CTL_INIT, priv->regs + TSI721_OBDMAC_CTL(mbox));
+	iowrite32(TSI721_OBDMAC_CTL_RETRY_THR | TSI721_OBDMAC_CTL_INIT,
+		  priv->regs + TSI721_OBDMAC_CTL(mbox));
 	ioread32(priv->regs + TSI721_OBDMAC_DWRCNT(mbox));
 	udelay(10);
 
@@ -1684,9 +2055,9 @@ static void tsi721_close_outb_mbox(struct rio_mport *mport, int mbox)
 #ifdef CONFIG_PCI_MSI
 	if (priv->flags & TSI721_USING_MSIX) {
 		free_irq(priv->msix[TSI721_VECT_OMB0_DONE + mbox].vector,
-			 (void *)mport);
+			 (void *)priv);
 		free_irq(priv->msix[TSI721_VECT_OMB0_INT + mbox].vector,
-			 (void *)mport);
+			 (void *)priv);
 	}
 #endif /* CONFIG_PCI_MSI */
 
@@ -1731,30 +2102,28 @@ static void tsi721_imsg_handler(struct tsi721_device *priv, int ch)
 {
 	u32 mbox = ch - 4;
 	u32 imsg_int;
+	struct rio_mport *mport = &priv->mport;
 
 	spin_lock(&priv->imsg_ring[mbox].lock);
 
 	imsg_int = ioread32(priv->regs + TSI721_IBDMAC_INT(ch));
 
 	if (imsg_int & TSI721_IBDMAC_INT_SRTO)
-		dev_info(&priv->pdev->dev, "IB MBOX%d SRIO timeout\n",
-			mbox);
+		tsi_info(&priv->pdev->dev, "IB MBOX%d SRIO timeout", mbox);
 
 	if (imsg_int & TSI721_IBDMAC_INT_PC_ERROR)
-		dev_info(&priv->pdev->dev, "IB MBOX%d PCIe error\n",
-			mbox);
+		tsi_info(&priv->pdev->dev, "IB MBOX%d PCIe error", mbox);
 
 	if (imsg_int & TSI721_IBDMAC_INT_FQ_LOW)
-		dev_info(&priv->pdev->dev,
-			"IB MBOX%d IB free queue low\n", mbox);
+		tsi_info(&priv->pdev->dev, "IB MBOX%d IB free queue low", mbox);
 
 	/* Clear IB channel interrupts */
 	iowrite32(imsg_int, priv->regs + TSI721_IBDMAC_INT(ch));
 
 	/* If an IB Msg is received notify the upper layer */
 	if (imsg_int & TSI721_IBDMAC_INT_DQ_RCV &&
-		priv->mport->inb_msg[mbox].mcback)
-		priv->mport->inb_msg[mbox].mcback(priv->mport,
+		mport->inb_msg[mbox].mcback)
+		mport->inb_msg[mbox].mcback(mport,
 				priv->imsg_ring[mbox].dev_id, mbox, -1);
 
 	if (!(priv->flags & TSI721_USING_MSIX)) {
@@ -1810,8 +2179,8 @@ static int tsi721_open_inb_mbox(struct rio_mport *mport, void *dev_id,
 				   GFP_KERNEL);
 
 	if (priv->imsg_ring[mbox].buf_base == NULL) {
-		dev_err(&priv->pdev->dev,
-			"Failed to allocate buffers for IB MBOX%d\n", mbox);
+		tsi_err(&priv->pdev->dev,
+			"Failed to allocate buffers for IB MBOX%d", mbox);
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -1824,8 +2193,8 @@ static int tsi721_open_inb_mbox(struct rio_mport *mport, void *dev_id,
 				   GFP_KERNEL);
 
 	if (priv->imsg_ring[mbox].imfq_base == NULL) {
-		dev_err(&priv->pdev->dev,
-			"Failed to allocate free queue for IB MBOX%d\n", mbox);
+		tsi_err(&priv->pdev->dev,
+			"Failed to allocate free queue for IB MBOX%d", mbox);
 		rc = -ENOMEM;
 		goto out_buf;
 	}
@@ -1837,8 +2206,8 @@ static int tsi721_open_inb_mbox(struct rio_mport *mport, void *dev_id,
 				   &priv->imsg_ring[mbox].imd_phys, GFP_KERNEL);
 
 	if (priv->imsg_ring[mbox].imd_base == NULL) {
-		dev_err(&priv->pdev->dev,
-			"Failed to allocate descriptor memory for IB MBOX%d\n",
+		tsi_err(&priv->pdev->dev,
+			"Failed to allocate descriptor memory for IB MBOX%d",
 			mbox);
 		rc = -ENOMEM;
 		goto out_dma;
@@ -1859,7 +2228,7 @@ static int tsi721_open_inb_mbox(struct rio_mport *mport, void *dev_id,
 	 * once when first inbound mailbox is requested.
 	 */
 	if (!(priv->flags & TSI721_IMSGID_SET)) {
-		iowrite32((u32)priv->mport->host_deviceid,
+		iowrite32((u32)priv->mport.host_deviceid,
 			priv->regs + TSI721_IB_DEVID);
 		priv->flags |= TSI721_IMSGID_SET;
 	}
@@ -1890,31 +2259,29 @@ static int tsi721_open_inb_mbox(struct rio_mport *mport, void *dev_id,
 
 #ifdef CONFIG_PCI_MSI
 	if (priv->flags & TSI721_USING_MSIX) {
+		int idx = TSI721_VECT_IMB0_RCV + mbox;
+
 		/* Request interrupt service if we are in MSI-X mode */
-		rc = request_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
-			tsi721_imsg_msix, 0,
-			priv->msix[TSI721_VECT_IMB0_RCV + mbox].irq_name,
-			(void *)mport);
+		rc = request_irq(priv->msix[idx].vector, tsi721_imsg_msix, 0,
+				 priv->msix[idx].irq_name, (void *)priv);
 
 		if (rc) {
-			dev_dbg(&priv->pdev->dev,
-				"Unable to allocate MSI-X interrupt for "
-				"IBOX%d-DONE\n", mbox);
+			tsi_debug(IMSG, &priv->pdev->dev,
+				"Unable to get MSI-X IRQ for IBOX%d-DONE",
+				mbox);
 			goto out_desc;
 		}
 
-		rc = request_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector,
-			tsi721_imsg_msix, 0,
-			priv->msix[TSI721_VECT_IMB0_INT + mbox].irq_name,
-			(void *)mport);
+		idx = TSI721_VECT_IMB0_INT + mbox;
+		rc = request_irq(priv->msix[idx].vector, tsi721_imsg_msix, 0,
+				 priv->msix[idx].irq_name, (void *)priv);
 
 		if (rc)	{
-			dev_dbg(&priv->pdev->dev,
-				"Unable to allocate MSI-X interrupt for "
-				"IBOX%d-INT\n", mbox);
+			tsi_debug(IMSG, &priv->pdev->dev,
+				"Unable to get MSI-X IRQ for IBOX%d-INT", mbox);
 			free_irq(
 				priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
-				(void *)mport);
+				(void *)priv);
 			goto out_desc;
 		}
 	}
@@ -1985,9 +2352,9 @@ static void tsi721_close_inb_mbox(struct rio_mport *mport, int mbox)
 #ifdef CONFIG_PCI_MSI
 	if (priv->flags & TSI721_USING_MSIX) {
 		free_irq(priv->msix[TSI721_VECT_IMB0_RCV + mbox].vector,
-				(void *)mport);
+				(void *)priv);
 		free_irq(priv->msix[TSI721_VECT_IMB0_INT + mbox].vector,
-				(void *)mport);
+				(void *)priv);
 	}
 #endif /* CONFIG_PCI_MSI */
 
@@ -2034,8 +2401,8 @@ static int tsi721_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf)
 
 	rx_slot = priv->imsg_ring[mbox].rx_slot;
 	if (priv->imsg_ring[mbox].imq_base[rx_slot]) {
-		dev_err(&priv->pdev->dev,
-			"Error adding inbound buffer %d, buffer exists\n",
+		tsi_err(&priv->pdev->dev,
+			"Error adding inbound buffer %d, buffer exists",
 			rx_slot);
 		rc = -EINVAL;
 		goto out;
@@ -2153,6 +2520,39 @@ static int tsi721_messages_init(struct tsi721_device *priv)
 }
 
 /**
+ * tsi721_query_mport - Fetch inbound message from the Tsi721 MSG Queue
+ * @mport: Master port implementing the Inbound Messaging Engine
+ * @mbox: Inbound mailbox number
+ *
+ * Returns pointer to the message on success or NULL on failure.
+ */
+static int tsi721_query_mport(struct rio_mport *mport,
+			      struct rio_mport_attr *attr)
+{
+	struct tsi721_device *priv = mport->priv;
+	u32 rval;
+
+	rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_ERR_STS_CSR(0)));
+	if (rval & RIO_PORT_N_ERR_STS_PORT_OK) {
+		rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_CTL2_CSR(0)));
+		attr->link_speed = (rval & RIO_PORT_N_CTL2_SEL_BAUD) >> 28;
+		rval = ioread32(priv->regs + (0x100 + RIO_PORT_N_CTL_CSR(0)));
+		attr->link_width = (rval & RIO_PORT_N_CTL_IPW) >> 27;
+	} else
+		attr->link_speed = RIO_LINK_DOWN;
+
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	attr->flags = RIO_MPORT_DMA | RIO_MPORT_DMA_SG;
+	attr->dma_max_sge = 0;
+	attr->dma_max_size = TSI721_BDMA_MAX_BCOUNT;
+	attr->dma_align = 0;
+#else
+	attr->flags = 0;
+#endif
+	return 0;
+}
+
+/**
  * tsi721_disable_ints - disables all device interrupts
  * @priv: pointer to tsi721 private data
  */
@@ -2203,6 +2603,34 @@ static void tsi721_disable_ints(struct tsi721_device *priv)
 	iowrite32(0, priv->regs + TSI721_RIO_EM_DEV_INT_EN);
 }
 
+static struct rio_ops tsi721_rio_ops = {
+	.lcread			= tsi721_lcread,
+	.lcwrite		= tsi721_lcwrite,
+	.cread			= tsi721_cread_dma,
+	.cwrite			= tsi721_cwrite_dma,
+	.dsend			= tsi721_dsend,
+	.open_inb_mbox		= tsi721_open_inb_mbox,
+	.close_inb_mbox		= tsi721_close_inb_mbox,
+	.open_outb_mbox		= tsi721_open_outb_mbox,
+	.close_outb_mbox	= tsi721_close_outb_mbox,
+	.add_outb_message	= tsi721_add_outb_message,
+	.add_inb_buffer		= tsi721_add_inb_buffer,
+	.get_inb_message	= tsi721_get_inb_message,
+	.map_inb		= tsi721_rio_map_inb_mem,
+	.unmap_inb		= tsi721_rio_unmap_inb_mem,
+	.pwenable		= tsi721_pw_enable,
+	.query_mport		= tsi721_query_mport,
+	.map_outb		= tsi721_map_outb_win,
+	.unmap_outb		= tsi721_unmap_outb_win,
+};
+
+static void tsi721_mport_release(struct device *dev)
+{
+	struct rio_mport *mport = to_rio_mport(dev);
+
+	tsi_debug(EXIT, dev, "%s id=%d", mport->name, mport->id);
+}
+
 /**
  * tsi721_setup_mport - Setup Tsi721 as RapidIO subsystem master port
  * @priv: pointer to tsi721 private data
@@ -2213,46 +2641,20 @@ static int tsi721_setup_mport(struct tsi721_device *priv)
 {
 	struct pci_dev *pdev = priv->pdev;
 	int err = 0;
-	struct rio_ops *ops;
-
-	struct rio_mport *mport;
+	struct rio_mport *mport = &priv->mport;
 
-	ops = kzalloc(sizeof(struct rio_ops), GFP_KERNEL);
-	if (!ops) {
-		dev_dbg(&pdev->dev, "Unable to allocate memory for rio_ops\n");
-		return -ENOMEM;
-	}
-
-	ops->lcread = tsi721_lcread;
-	ops->lcwrite = tsi721_lcwrite;
-	ops->cread = tsi721_cread_dma;
-	ops->cwrite = tsi721_cwrite_dma;
-	ops->dsend = tsi721_dsend;
-	ops->open_inb_mbox = tsi721_open_inb_mbox;
-	ops->close_inb_mbox = tsi721_close_inb_mbox;
-	ops->open_outb_mbox = tsi721_open_outb_mbox;
-	ops->close_outb_mbox = tsi721_close_outb_mbox;
-	ops->add_outb_message = tsi721_add_outb_message;
-	ops->add_inb_buffer = tsi721_add_inb_buffer;
-	ops->get_inb_message = tsi721_get_inb_message;
-	ops->map_inb = tsi721_rio_map_inb_mem;
-	ops->unmap_inb = tsi721_rio_unmap_inb_mem;
-
-	mport = kzalloc(sizeof(struct rio_mport), GFP_KERNEL);
-	if (!mport) {
-		kfree(ops);
-		dev_dbg(&pdev->dev, "Unable to allocate memory for mport\n");
-		return -ENOMEM;
-	}
+	err = rio_mport_initialize(mport);
+	if (err)
+		return err;
 
-	mport->ops = ops;
+	mport->ops = &tsi721_rio_ops;
 	mport->index = 0;
 	mport->sys_size = 0; /* small system */
 	mport->phy_type = RIO_PHY_SERIAL;
 	mport->priv = (void *)priv;
 	mport->phys_efptr = 0x100;
 	mport->dev.parent = &pdev->dev;
-	priv->mport = mport;
+	mport->dev.release = tsi721_mport_release;
 
 	INIT_LIST_HEAD(&mport->dbells);
 
@@ -2270,31 +2672,28 @@ static int tsi721_setup_mport(struct tsi721_device *priv)
 	else if (!pci_enable_msi(pdev))
 		priv->flags |= TSI721_USING_MSI;
 	else
-		dev_info(&pdev->dev,
-			 "MSI/MSI-X is not available. Using legacy INTx.\n");
+		tsi_debug(MPORT, &pdev->dev,
+			 "MSI/MSI-X is not available. Using legacy INTx.");
 #endif /* CONFIG_PCI_MSI */
 
-	err = tsi721_request_irq(mport);
+	err = tsi721_request_irq(priv);
 
-	if (!err) {
-		tsi721_interrupts_init(priv);
-		ops->pwenable = tsi721_pw_enable;
-	} else {
-		dev_err(&pdev->dev, "Unable to get assigned PCI IRQ "
-			"vector %02X err=0x%x\n", pdev->irq, err);
-		goto err_exit;
+	if (err) {
+		tsi_err(&pdev->dev, "Unable to get PCI IRQ %02X (err=0x%x)",
+			pdev->irq, err);
+		return err;
 	}
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
-	tsi721_register_dma(priv);
+	err = tsi721_register_dma(priv);
+	if (err)
+		goto err_exit;
 #endif
 	/* Enable SRIO link */
 	iowrite32(ioread32(priv->regs + TSI721_DEVCTL) |
 		  TSI721_DEVCTL_SRBOOT_CMPL,
 		  priv->regs + TSI721_DEVCTL);
 
-	rio_register_mport(mport);
-
 	if (mport->host_deviceid >= 0)
 		iowrite32(RIO_PORT_GEN_HOST | RIO_PORT_GEN_MASTER |
 			  RIO_PORT_GEN_DISCOVERED,
@@ -2302,11 +2701,16 @@ static int tsi721_setup_mport(struct tsi721_device *priv)
 	else
 		iowrite32(0, priv->regs + (0x100 + RIO_PORT_GEN_CTL_CSR));
 
+	err = rio_register_mport(mport);
+	if (err) {
+		tsi721_unregister_dma(priv);
+		goto err_exit;
+	}
+
 	return 0;
 
 err_exit:
-	kfree(mport);
-	kfree(ops);
+	tsi721_free_irq(priv);
 	return err;
 }
 
@@ -2317,15 +2721,14 @@ static int tsi721_probe(struct pci_dev *pdev,
 	int err;
 
 	priv = kzalloc(sizeof(struct tsi721_device), GFP_KERNEL);
-	if (priv == NULL) {
-		dev_err(&pdev->dev, "Failed to allocate memory for device\n");
+	if (!priv) {
 		err = -ENOMEM;
 		goto err_exit;
 	}
 
 	err = pci_enable_device(pdev);
 	if (err) {
-		dev_err(&pdev->dev, "Failed to enable PCI device\n");
+		tsi_err(&pdev->dev, "Failed to enable PCI device");
 		goto err_clean;
 	}
 
@@ -2333,13 +2736,12 @@ static int tsi721_probe(struct pci_dev *pdev,
 
 #ifdef DEBUG
 	{
-	int i;
-	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
-		dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n",
-			i, (unsigned long long)pci_resource_start(pdev, i),
-			(unsigned long)pci_resource_len(pdev, i),
-			pci_resource_flags(pdev, i));
-	}
+		int i;
+
+		for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+			tsi_debug(INIT, &pdev->dev, "res%d %pR",
+				  i, &pdev->resource[i]);
+		}
 	}
 #endif
 	/*
@@ -2350,8 +2752,7 @@ static int tsi721_probe(struct pci_dev *pdev,
 	if (!(pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM) ||
 	    pci_resource_flags(pdev, BAR_0) & IORESOURCE_MEM_64 ||
 	    pci_resource_len(pdev, BAR_0) < TSI721_REG_SPACE_SIZE) {
-		dev_err(&pdev->dev,
-			"Missing or misconfigured CSR BAR0, aborting.\n");
+		tsi_err(&pdev->dev, "Missing or misconfigured CSR BAR0");
 		err = -ENODEV;
 		goto err_disable_pdev;
 	}
@@ -2360,8 +2761,7 @@ static int tsi721_probe(struct pci_dev *pdev,
 	if (!(pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM) ||
 	    pci_resource_flags(pdev, BAR_1) & IORESOURCE_MEM_64 ||
 	    pci_resource_len(pdev, BAR_1) < TSI721_DB_WIN_SIZE) {
-		dev_err(&pdev->dev,
-			"Missing or misconfigured Doorbell BAR1, aborting.\n");
+		tsi_err(&pdev->dev, "Missing or misconfigured Doorbell BAR1");
 		err = -ENODEV;
 		goto err_disable_pdev;
 	}
@@ -2373,20 +2773,32 @@ static int tsi721_probe(struct pci_dev *pdev,
 	 * It may be a good idea to keep them disabled using HW configuration
 	 * to save PCI memory space.
 	 */
-	if ((pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM) &&
-	    (pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM_64)) {
-		dev_info(&pdev->dev, "Outbound BAR2 is not used but enabled.\n");
+
+	priv->p2r_bar[0].size = priv->p2r_bar[1].size = 0;
+
+	if (pci_resource_flags(pdev, BAR_2) & IORESOURCE_MEM_64) {
+		if (pci_resource_flags(pdev, BAR_2) & IORESOURCE_PREFETCH)
+			tsi_debug(INIT, &pdev->dev,
+				 "Prefetchable OBW BAR2 will not be used");
+		else {
+			priv->p2r_bar[0].base = pci_resource_start(pdev, BAR_2);
+			priv->p2r_bar[0].size = pci_resource_len(pdev, BAR_2);
+		}
 	}
 
-	if ((pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM) &&
-	    (pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM_64)) {
-		dev_info(&pdev->dev, "Outbound BAR4 is not used but enabled.\n");
+	if (pci_resource_flags(pdev, BAR_4) & IORESOURCE_MEM_64) {
+		if (pci_resource_flags(pdev, BAR_4) & IORESOURCE_PREFETCH)
+			tsi_debug(INIT, &pdev->dev,
+				 "Prefetchable OBW BAR4 will not be used");
+		else {
+			priv->p2r_bar[1].base = pci_resource_start(pdev, BAR_4);
+			priv->p2r_bar[1].size = pci_resource_len(pdev, BAR_4);
+		}
 	}
 
 	err = pci_request_regions(pdev, DRV_NAME);
 	if (err) {
-		dev_err(&pdev->dev, "Cannot obtain PCI resources, "
-			"aborting.\n");
+		tsi_err(&pdev->dev, "Unable to obtain PCI resources");
 		goto err_disable_pdev;
 	}
 
@@ -2394,16 +2806,14 @@ static int tsi721_probe(struct pci_dev *pdev,
 
 	priv->regs = pci_ioremap_bar(pdev, BAR_0);
 	if (!priv->regs) {
-		dev_err(&pdev->dev,
-			"Unable to map device registers space, aborting\n");
+		tsi_err(&pdev->dev, "Unable to map device registers space");
 		err = -ENOMEM;
 		goto err_free_res;
 	}
 
 	priv->odb_base = pci_ioremap_bar(pdev, BAR_1);
 	if (!priv->odb_base) {
-		dev_err(&pdev->dev,
-			"Unable to map outbound doorbells space, aborting\n");
+		tsi_err(&pdev->dev, "Unable to map outbound doorbells space");
 		err = -ENOMEM;
 		goto err_unmap_bars;
 	}
@@ -2412,25 +2822,23 @@ static int tsi721_probe(struct pci_dev *pdev,
 	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (err) {
-			dev_info(&pdev->dev, "Unable to set DMA mask\n");
+			tsi_err(&pdev->dev, "Unable to set DMA mask");
 			goto err_unmap_bars;
 		}
 
 		if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)))
-			dev_info(&pdev->dev, "Unable to set consistent DMA mask\n");
+			tsi_info(&pdev->dev, "Unable to set consistent DMA mask");
 	} else {
 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 		if (err)
-			dev_info(&pdev->dev, "Unable to set consistent DMA mask\n");
+			tsi_info(&pdev->dev, "Unable to set consistent DMA mask");
 	}
 
 	BUG_ON(!pci_is_pcie(pdev));
 
-	/* Clear "no snoop" and "relaxed ordering" bits, use default MRRS. */
+	/* Clear "no snoop" and "relaxed ordering" bits. */
 	pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL,
-		PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_RELAX_EN |
-		PCI_EXP_DEVCTL_NOSNOOP_EN,
-		PCI_EXP_DEVCTL_READRQ_512B);
+		PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN, 0);
 
 	/* Adjust PCIe completion timeout. */
 	pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL2, 0xf, 0x2);
@@ -2452,7 +2860,7 @@ static int tsi721_probe(struct pci_dev *pdev,
 	tsi721_init_sr2pc_mapping(priv);
 
 	if (tsi721_bdma_maint_init(priv)) {
-		dev_err(&pdev->dev, "BDMA initialization failed, aborting\n");
+		tsi_err(&pdev->dev, "BDMA initialization failed");
 		err = -ENOMEM;
 		goto err_unmap_bars;
 	}
@@ -2471,9 +2879,13 @@ static int tsi721_probe(struct pci_dev *pdev,
 	if (err)
 		goto err_free_consistent;
 
+	pci_set_drvdata(pdev, priv);
+	tsi721_interrupts_init(priv);
+
 	return 0;
 
 err_free_consistent:
+	tsi721_port_write_free(priv);
 	tsi721_doorbell_free(priv);
 err_free_bdma:
 	tsi721_bdma_maint_free(priv);
@@ -2493,6 +2905,53 @@ err_exit:
 	return err;
 }
 
+static void tsi721_remove(struct pci_dev *pdev)
+{
+	struct tsi721_device *priv = pci_get_drvdata(pdev);
+
+	tsi_debug(EXIT, &pdev->dev, "enter");
+
+	tsi721_disable_ints(priv);
+	tsi721_free_irq(priv);
+	flush_scheduled_work();
+	rio_unregister_mport(&priv->mport);
+
+	tsi721_unregister_dma(priv);
+	tsi721_bdma_maint_free(priv);
+	tsi721_doorbell_free(priv);
+	tsi721_port_write_free(priv);
+	tsi721_close_sr2pc_mapping(priv);
+
+	if (priv->regs)
+		iounmap(priv->regs);
+	if (priv->odb_base)
+		iounmap(priv->odb_base);
+#ifdef CONFIG_PCI_MSI
+	if (priv->flags & TSI721_USING_MSIX)
+		pci_disable_msix(priv->pdev);
+	else if (priv->flags & TSI721_USING_MSI)
+		pci_disable_msi(priv->pdev);
+#endif
+	pci_release_regions(pdev);
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+	kfree(priv);
+	tsi_debug(EXIT, &pdev->dev, "exit");
+}
+
+static void tsi721_shutdown(struct pci_dev *pdev)
+{
+	struct tsi721_device *priv = pci_get_drvdata(pdev);
+
+	tsi_debug(EXIT, &pdev->dev, "enter");
+
+	tsi721_disable_ints(priv);
+	tsi721_dma_stop_all(priv);
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+}
+
 static const struct pci_device_id tsi721_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_IDT, PCI_DEVICE_ID_TSI721) },
 	{ 0, }	/* terminate list */
@@ -2504,14 +2963,11 @@ static struct pci_driver tsi721_driver = {
 	.name		= "tsi721",
 	.id_table	= tsi721_pci_tbl,
 	.probe		= tsi721_probe,
+	.remove		= tsi721_remove,
+	.shutdown	= tsi721_shutdown,
 };
 
-static int __init tsi721_init(void)
-{
-	return pci_register_driver(&tsi721_driver);
-}
-
-device_initcall(tsi721_init);
+module_pci_driver(tsi721_driver);
 
 MODULE_DESCRIPTION("IDT Tsi721 PCIExpress-to-SRIO bridge driver");
 MODULE_AUTHOR("Integrated Device Technology, Inc.");
diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h
index 9d2502543ef6..5456dbddc929 100644
--- a/drivers/rapidio/devices/tsi721.h
+++ b/drivers/rapidio/devices/tsi721.h
@@ -21,6 +21,46 @@
 #ifndef __TSI721_H
 #define __TSI721_H
 
+/* Debug output filtering masks */
+enum {
+	DBG_NONE	= 0,
+	DBG_INIT	= BIT(0), /* driver init */
+	DBG_EXIT	= BIT(1), /* driver exit */
+	DBG_MPORT	= BIT(2), /* mport add/remove */
+	DBG_MAINT	= BIT(3), /* maintenance ops messages */
+	DBG_DMA		= BIT(4), /* DMA transfer messages */
+	DBG_DMAV	= BIT(5), /* verbose DMA transfer messages */
+	DBG_IBW		= BIT(6), /* inbound window */
+	DBG_EVENT	= BIT(7), /* event handling messages */
+	DBG_OBW		= BIT(8), /* outbound window messages */
+	DBG_DBELL	= BIT(9), /* doorbell messages */
+	DBG_OMSG	= BIT(10), /* doorbell messages */
+	DBG_IMSG	= BIT(11), /* doorbell messages */
+	DBG_ALL		= ~0,
+};
+
+#ifdef DEBUG
+extern u32 dbg_level;
+
+#define tsi_debug(level, dev, fmt, arg...)				\
+	do {								\
+		if (DBG_##level & dbg_level)				\
+			dev_dbg(dev, "%s: " fmt "\n", __func__, ##arg);	\
+	} while (0)
+#else
+#define tsi_debug(level, dev, fmt, arg...) \
+		no_printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##arg)
+#endif
+
+#define tsi_info(dev, fmt, arg...) \
+	dev_info(dev, "%s: " fmt "\n", __func__, ##arg)
+
+#define tsi_warn(dev, fmt, arg...) \
+	dev_warn(dev, "%s: WARNING " fmt "\n", __func__, ##arg)
+
+#define tsi_err(dev, fmt, arg...) \
+	dev_err(dev, "%s: ERROR " fmt "\n", __func__, ##arg)
+
 #define DRV_NAME	"tsi721"
 
 #define DEFAULT_HOPCOUNT	0xff
@@ -674,7 +714,7 @@ struct tsi721_bdma_chan {
 	struct dma_chan		dchan;
 	struct tsi721_tx_desc	*tx_desc;
 	spinlock_t		lock;
-	struct list_head	active_list;
+	struct tsi721_tx_desc	*active_tx;
 	struct list_head	queue;
 	struct list_head	free_list;
 	struct tasklet_struct	tasklet;
@@ -808,9 +848,38 @@ struct msix_irq {
 };
 #endif /* CONFIG_PCI_MSI */
 
+struct tsi721_ib_win_mapping {
+	struct list_head node;
+	dma_addr_t	lstart;
+};
+
+struct tsi721_ib_win {
+	u64		rstart;
+	u32		size;
+	dma_addr_t	lstart;
+	bool		active;
+	bool		xlat;
+	struct list_head mappings;
+};
+
+struct tsi721_obw_bar {
+	u64		base;
+	u64		size;
+	u64		free;
+};
+
+struct tsi721_ob_win {
+	u64		base;
+	u32		size;
+	u16		destid;
+	u64		rstart;
+	bool		active;
+	struct tsi721_obw_bar *pbar;
+};
+
 struct tsi721_device {
 	struct pci_dev	*pdev;
-	struct rio_mport *mport;
+	struct rio_mport mport;
 	u32		flags;
 	void __iomem	*regs;
 #ifdef CONFIG_PCI_MSI
@@ -843,11 +912,25 @@ struct tsi721_device {
 	/* Outbound Messaging */
 	int		omsg_init[TSI721_OMSG_CHNUM];
 	struct tsi721_omsg_ring	omsg_ring[TSI721_OMSG_CHNUM];
+
+	/* Inbound Mapping Windows */
+	struct tsi721_ib_win ib_win[TSI721_IBWIN_NUM];
+	int		ibwin_cnt;
+
+	/* Outbound Mapping Windows */
+	struct tsi721_obw_bar p2r_bar[2];
+	struct tsi721_ob_win  ob_win[TSI721_OBWIN_NUM];
+	int		obwin_cnt;
 };
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 extern void tsi721_bdma_handler(struct tsi721_bdma_chan *bdma_chan);
 extern int tsi721_register_dma(struct tsi721_device *priv);
+extern void tsi721_unregister_dma(struct tsi721_device *priv);
+extern void tsi721_dma_stop_all(struct tsi721_device *priv);
+#else
+#define tsi721_dma_stop_all(priv) do {} while (0)
+#define tsi721_unregister_dma(priv) do {} while (0)
 #endif
 
 #endif
diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c
index 47295940a868..155cae1e62de 100644
--- a/drivers/rapidio/devices/tsi721_dma.c
+++ b/drivers/rapidio/devices/tsi721_dma.c
@@ -30,6 +30,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/kfifo.h>
+#include <linux/sched.h>
 #include <linux/delay.h>
 #include "../../dma/dmaengine.h"
 
@@ -63,14 +64,6 @@ struct tsi721_tx_desc *to_tsi721_desc(struct dma_async_tx_descriptor *txd)
 	return container_of(txd, struct tsi721_tx_desc, txd);
 }
 
-static inline
-struct tsi721_tx_desc *tsi721_dma_first_active(
-				struct tsi721_bdma_chan *bdma_chan)
-{
-	return list_first_entry(&bdma_chan->active_list,
-				struct tsi721_tx_desc, desc_node);
-}
-
 static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
 {
 	struct tsi721_dma_desc *bd_ptr;
@@ -83,7 +76,7 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
 	struct tsi721_device *priv = to_tsi721(bdma_chan->dchan.device);
 #endif
 
-	dev_dbg(dev, "Init Block DMA Engine, CH%d\n", bdma_chan->id);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device, "DMAC%d", bdma_chan->id);
 
 	/*
 	 * Allocate space for DMA descriptors
@@ -91,7 +84,7 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
 	 */
 	bd_ptr = dma_zalloc_coherent(dev,
 				(bd_num + 1) * sizeof(struct tsi721_dma_desc),
-				&bd_phys, GFP_KERNEL);
+				&bd_phys, GFP_ATOMIC);
 	if (!bd_ptr)
 		return -ENOMEM;
 
@@ -99,8 +92,9 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
 	bdma_chan->bd_phys = bd_phys;
 	bdma_chan->bd_base = bd_ptr;
 
-	dev_dbg(dev, "DMA descriptors @ %p (phys = %llx)\n",
-		bd_ptr, (unsigned long long)bd_phys);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device,
+		  "DMAC%d descriptors @ %p (phys = %pad)",
+		  bdma_chan->id, bd_ptr, &bd_phys);
 
 	/* Allocate space for descriptor status FIFO */
 	sts_size = ((bd_num + 1) >= TSI721_DMA_MINSTSSZ) ?
@@ -108,7 +102,7 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
 	sts_size = roundup_pow_of_two(sts_size);
 	sts_ptr = dma_zalloc_coherent(dev,
 				     sts_size * sizeof(struct tsi721_dma_sts),
-				     &sts_phys, GFP_KERNEL);
+				     &sts_phys, GFP_ATOMIC);
 	if (!sts_ptr) {
 		/* Free space allocated for DMA descriptors */
 		dma_free_coherent(dev,
@@ -122,9 +116,9 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
 	bdma_chan->sts_base = sts_ptr;
 	bdma_chan->sts_size = sts_size;
 
-	dev_dbg(dev,
-		"desc status FIFO @ %p (phys = %llx) size=0x%x\n",
-		sts_ptr, (unsigned long long)sts_phys, sts_size);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device,
+		"DMAC%d desc status FIFO @ %p (phys = %pad) size=0x%x",
+		bdma_chan->id, sts_ptr, &sts_phys, sts_size);
 
 	/* Initialize DMA descriptors ring using added link descriptor */
 	bd_ptr[bd_num].type_id = cpu_to_le32(DTYPE3 << 29);
@@ -163,8 +157,9 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
 				 priv->msix[idx].irq_name, (void *)bdma_chan);
 
 		if (rc) {
-			dev_dbg(dev, "Unable to get MSI-X for BDMA%d-DONE\n",
-				bdma_chan->id);
+			tsi_debug(DMA, &bdma_chan->dchan.dev->device,
+				  "Unable to get MSI-X for DMAC%d-DONE",
+				  bdma_chan->id);
 			goto err_out;
 		}
 
@@ -174,8 +169,9 @@ static int tsi721_bdma_ch_init(struct tsi721_bdma_chan *bdma_chan, int bd_num)
 				priv->msix[idx].irq_name, (void *)bdma_chan);
 
 		if (rc)	{
-			dev_dbg(dev, "Unable to get MSI-X for BDMA%d-INT\n",
-				bdma_chan->id);
+			tsi_debug(DMA, &bdma_chan->dchan.dev->device,
+				  "Unable to get MSI-X for DMAC%d-INT",
+				  bdma_chan->id);
 			free_irq(
 				priv->msix[TSI721_VECT_DMA0_DONE +
 					    bdma_chan->id].vector,
@@ -286,7 +282,7 @@ void tsi721_bdma_handler(struct tsi721_bdma_chan *bdma_chan)
 	/* Disable BDMA channel interrupts */
 	iowrite32(0, bdma_chan->regs + TSI721_DMAC_INTE);
 	if (bdma_chan->active)
-		tasklet_schedule(&bdma_chan->tasklet);
+		tasklet_hi_schedule(&bdma_chan->tasklet);
 }
 
 #ifdef CONFIG_PCI_MSI
@@ -301,7 +297,8 @@ static irqreturn_t tsi721_bdma_msix(int irq, void *ptr)
 {
 	struct tsi721_bdma_chan *bdma_chan = ptr;
 
-	tsi721_bdma_handler(bdma_chan);
+	if (bdma_chan->active)
+		tasklet_hi_schedule(&bdma_chan->tasklet);
 	return IRQ_HANDLED;
 }
 #endif /* CONFIG_PCI_MSI */
@@ -310,20 +307,22 @@ static irqreturn_t tsi721_bdma_msix(int irq, void *ptr)
 static void tsi721_start_dma(struct tsi721_bdma_chan *bdma_chan)
 {
 	if (!tsi721_dma_is_idle(bdma_chan)) {
-		dev_err(bdma_chan->dchan.device->dev,
-			"BUG: Attempt to start non-idle channel\n");
+		tsi_err(&bdma_chan->dchan.dev->device,
+			"DMAC%d Attempt to start non-idle channel",
+			bdma_chan->id);
 		return;
 	}
 
 	if (bdma_chan->wr_count == bdma_chan->wr_count_next) {
-		dev_err(bdma_chan->dchan.device->dev,
-			"BUG: Attempt to start DMA with no BDs ready\n");
+		tsi_err(&bdma_chan->dchan.dev->device,
+			"DMAC%d Attempt to start DMA with no BDs ready %d",
+			bdma_chan->id, task_pid_nr(current));
 		return;
 	}
 
-	dev_dbg(bdma_chan->dchan.device->dev,
-		"%s: chan_%d (wrc=%d)\n", __func__, bdma_chan->id,
-		bdma_chan->wr_count_next);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device, "DMAC%d (wrc=%d) %d",
+		  bdma_chan->id, bdma_chan->wr_count_next,
+		  task_pid_nr(current));
 
 	iowrite32(bdma_chan->wr_count_next,
 		bdma_chan->regs + TSI721_DMAC_DWRCNT);
@@ -425,10 +424,11 @@ static int tsi721_submit_sg(struct tsi721_tx_desc *desc)
 	struct tsi721_dma_desc *bd_ptr = NULL;
 	u32 idx, rd_idx;
 	u32 add_count = 0;
+	struct device *ch_dev = &dchan->dev->device;
 
 	if (!tsi721_dma_is_idle(bdma_chan)) {
-		dev_err(bdma_chan->dchan.device->dev,
-			"BUG: Attempt to use non-idle channel\n");
+		tsi_err(ch_dev, "DMAC%d ERR: Attempt to use non-idle channel",
+			bdma_chan->id);
 		return -EIO;
 	}
 
@@ -439,7 +439,7 @@ static int tsi721_submit_sg(struct tsi721_tx_desc *desc)
 	rio_addr = desc->rio_addr;
 	next_addr = -1;
 	bcount = 0;
-	sys_size = dma_to_mport(bdma_chan->dchan.device)->sys_size;
+	sys_size = dma_to_mport(dchan->device)->sys_size;
 
 	rd_idx = ioread32(bdma_chan->regs + TSI721_DMAC_DRDCNT);
 	rd_idx %= (bdma_chan->bd_num + 1);
@@ -451,18 +451,18 @@ static int tsi721_submit_sg(struct tsi721_tx_desc *desc)
 		add_count++;
 	}
 
-	dev_dbg(dchan->device->dev, "%s: BD ring status: rdi=%d wri=%d\n",
-		__func__, rd_idx, idx);
+	tsi_debug(DMA, ch_dev, "DMAC%d BD ring status: rdi=%d wri=%d",
+		  bdma_chan->id, rd_idx, idx);
 
 	for_each_sg(desc->sg, sg, desc->sg_len, i) {
 
-		dev_dbg(dchan->device->dev, "sg%d/%d addr: 0x%llx len: %d\n",
-			i, desc->sg_len,
+		tsi_debug(DMAV, ch_dev, "DMAC%d sg%d/%d addr: 0x%llx len: %d",
+			bdma_chan->id, i, desc->sg_len,
 			(unsigned long long)sg_dma_address(sg), sg_dma_len(sg));
 
 		if (sg_dma_len(sg) > TSI721_BDMA_MAX_BCOUNT) {
-			dev_err(dchan->device->dev,
-				"%s: SG entry %d is too large\n", __func__, i);
+			tsi_err(ch_dev, "DMAC%d SG entry %d is too large",
+				bdma_chan->id, i);
 			err = -EINVAL;
 			break;
 		}
@@ -479,17 +479,16 @@ static int tsi721_submit_sg(struct tsi721_tx_desc *desc)
 		} else if (next_addr != -1) {
 			/* Finalize descriptor using total byte count value */
 			tsi721_desc_fill_end(bd_ptr, bcount, 0);
-			dev_dbg(dchan->device->dev,
-				"%s: prev desc final len: %d\n",
-				__func__, bcount);
+			tsi_debug(DMAV, ch_dev,	"DMAC%d prev desc final len: %d",
+				  bdma_chan->id, bcount);
 		}
 
 		desc->rio_addr = rio_addr;
 
 		if (i && idx == rd_idx) {
-			dev_dbg(dchan->device->dev,
-				"%s: HW descriptor ring is full @ %d\n",
-				__func__, i);
+			tsi_debug(DMAV, ch_dev,
+				  "DMAC%d HW descriptor ring is full @ %d",
+				  bdma_chan->id, i);
 			desc->sg = sg;
 			desc->sg_len -= i;
 			break;
@@ -498,13 +497,12 @@ static int tsi721_submit_sg(struct tsi721_tx_desc *desc)
 		bd_ptr = &((struct tsi721_dma_desc *)bdma_chan->bd_base)[idx];
 		err = tsi721_desc_fill_init(desc, bd_ptr, sg, sys_size);
 		if (err) {
-			dev_err(dchan->device->dev,
-				"Failed to build desc: err=%d\n", err);
+			tsi_err(ch_dev, "Failed to build desc: err=%d", err);
 			break;
 		}
 
-		dev_dbg(dchan->device->dev, "bd_ptr = %p did=%d raddr=0x%llx\n",
-			bd_ptr, desc->destid, desc->rio_addr);
+		tsi_debug(DMAV, ch_dev, "DMAC%d bd_ptr = %p did=%d raddr=0x%llx",
+			  bdma_chan->id, bd_ptr, desc->destid, desc->rio_addr);
 
 		next_addr = sg_dma_address(sg);
 		bcount = sg_dma_len(sg);
@@ -519,8 +517,9 @@ static int tsi721_submit_sg(struct tsi721_tx_desc *desc)
 entry_done:
 		if (sg_is_last(sg)) {
 			tsi721_desc_fill_end(bd_ptr, bcount, 0);
-			dev_dbg(dchan->device->dev, "%s: last desc final len: %d\n",
-				__func__, bcount);
+			tsi_debug(DMAV, ch_dev,
+				  "DMAC%d last desc final len: %d",
+				  bdma_chan->id, bcount);
 			desc->sg_len = 0;
 		} else {
 			rio_addr += sg_dma_len(sg);
@@ -534,35 +533,43 @@ entry_done:
 	return err;
 }
 
-static void tsi721_advance_work(struct tsi721_bdma_chan *bdma_chan)
+static void tsi721_advance_work(struct tsi721_bdma_chan *bdma_chan,
+				struct tsi721_tx_desc *desc)
 {
-	struct tsi721_tx_desc *desc;
 	int err;
 
-	dev_dbg(bdma_chan->dchan.device->dev, "%s: Enter\n", __func__);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device, "DMAC%d", bdma_chan->id);
+
+	if (!tsi721_dma_is_idle(bdma_chan))
+		return;
 
 	/*
-	 * If there are any new transactions in the queue add them
-	 * into the processing list
-	 */
-	if (!list_empty(&bdma_chan->queue))
-		list_splice_init(&bdma_chan->queue, &bdma_chan->active_list);
+	 * If there is no data transfer in progress, fetch new descriptor from
+	 * the pending queue.
+	*/
+
+	if (desc == NULL && bdma_chan->active_tx == NULL &&
+					!list_empty(&bdma_chan->queue)) {
+		desc = list_first_entry(&bdma_chan->queue,
+					struct tsi721_tx_desc, desc_node);
+		list_del_init((&desc->desc_node));
+		bdma_chan->active_tx = desc;
+	}
 
-	/* Start new transaction (if available) */
-	if (!list_empty(&bdma_chan->active_list)) {
-		desc = tsi721_dma_first_active(bdma_chan);
+	if (desc) {
 		err = tsi721_submit_sg(desc);
 		if (!err)
 			tsi721_start_dma(bdma_chan);
 		else {
 			tsi721_dma_tx_err(bdma_chan, desc);
-			dev_dbg(bdma_chan->dchan.device->dev,
-				"ERR: tsi721_submit_sg failed with err=%d\n",
-				err);
+			tsi_debug(DMA, &bdma_chan->dchan.dev->device,
+				"DMAC%d ERR: tsi721_submit_sg failed with err=%d",
+				bdma_chan->id, err);
 		}
 	}
 
-	dev_dbg(bdma_chan->dchan.device->dev, "%s: Exit\n", __func__);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device, "DMAC%d Exit",
+		  bdma_chan->id);
 }
 
 static void tsi721_dma_tasklet(unsigned long data)
@@ -571,22 +578,84 @@ static void tsi721_dma_tasklet(unsigned long data)
 	u32 dmac_int, dmac_sts;
 
 	dmac_int = ioread32(bdma_chan->regs + TSI721_DMAC_INT);
-	dev_dbg(bdma_chan->dchan.device->dev, "%s: DMAC%d_INT = 0x%x\n",
-		__func__, bdma_chan->id, dmac_int);
+	tsi_debug(DMA, &bdma_chan->dchan.dev->device, "DMAC%d_INT = 0x%x",
+		  bdma_chan->id, dmac_int);
 	/* Clear channel interrupts */
 	iowrite32(dmac_int, bdma_chan->regs + TSI721_DMAC_INT);
 
 	if (dmac_int & TSI721_DMAC_INT_ERR) {
+		int i = 10000;
+		struct tsi721_tx_desc *desc;
+
+		desc = bdma_chan->active_tx;
 		dmac_sts = ioread32(bdma_chan->regs + TSI721_DMAC_STS);
-		dev_err(bdma_chan->dchan.device->dev,
-			"%s: DMA ERROR - DMAC%d_STS = 0x%x\n",
-			__func__, bdma_chan->id, dmac_sts);
+		tsi_err(&bdma_chan->dchan.dev->device,
+			"DMAC%d_STS = 0x%x did=%d raddr=0x%llx",
+			bdma_chan->id, dmac_sts, desc->destid, desc->rio_addr);
+
+		/* Re-initialize DMA channel if possible */
+
+		if ((dmac_sts & TSI721_DMAC_STS_ABORT) == 0)
+			goto err_out;
+
+		tsi721_clr_stat(bdma_chan);
+
+		spin_lock(&bdma_chan->lock);
+
+		/* Put DMA channel into init state */
+		iowrite32(TSI721_DMAC_CTL_INIT,
+			  bdma_chan->regs + TSI721_DMAC_CTL);
+		do {
+			udelay(1);
+			dmac_sts = ioread32(bdma_chan->regs + TSI721_DMAC_STS);
+			i--;
+		} while ((dmac_sts & TSI721_DMAC_STS_ABORT) && i);
+
+		if (dmac_sts & TSI721_DMAC_STS_ABORT) {
+			tsi_err(&bdma_chan->dchan.dev->device,
+				"Failed to re-initiate DMAC%d",	bdma_chan->id);
+			spin_unlock(&bdma_chan->lock);
+			goto err_out;
+		}
+
+		/* Setup DMA descriptor pointers */
+		iowrite32(((u64)bdma_chan->bd_phys >> 32),
+			bdma_chan->regs + TSI721_DMAC_DPTRH);
+		iowrite32(((u64)bdma_chan->bd_phys & TSI721_DMAC_DPTRL_MASK),
+			bdma_chan->regs + TSI721_DMAC_DPTRL);
+
+		/* Setup descriptor status FIFO */
+		iowrite32(((u64)bdma_chan->sts_phys >> 32),
+			bdma_chan->regs + TSI721_DMAC_DSBH);
+		iowrite32(((u64)bdma_chan->sts_phys & TSI721_DMAC_DSBL_MASK),
+			bdma_chan->regs + TSI721_DMAC_DSBL);
+		iowrite32(TSI721_DMAC_DSSZ_SIZE(bdma_chan->sts_size),
+			bdma_chan->regs + TSI721_DMAC_DSSZ);
+
+		/* Clear interrupt bits */
+		iowrite32(TSI721_DMAC_INT_ALL,
+			bdma_chan->regs + TSI721_DMAC_INT);
+
+		ioread32(bdma_chan->regs + TSI721_DMAC_INT);
+
+		bdma_chan->wr_count = bdma_chan->wr_count_next = 0;
+		bdma_chan->sts_rdptr = 0;
+		udelay(10);
+
+		desc = bdma_chan->active_tx;
+		desc->status = DMA_ERROR;
+		dma_cookie_complete(&desc->txd);
+		list_add(&desc->desc_node, &bdma_chan->free_list);
+		bdma_chan->active_tx = NULL;
+		if (bdma_chan->active)
+			tsi721_advance_work(bdma_chan, NULL);
+		spin_unlock(&bdma_chan->lock);
 	}
 
 	if (dmac_int & TSI721_DMAC_INT_STFULL) {
-		dev_err(bdma_chan->dchan.device->dev,
-			"%s: DMAC%d descriptor status FIFO is full\n",
-			__func__, bdma_chan->id);
+		tsi_err(&bdma_chan->dchan.dev->device,
+			"DMAC%d descriptor status FIFO is full",
+			bdma_chan->id);
 	}
 
 	if (dmac_int & (TSI721_DMAC_INT_DONE | TSI721_DMAC_INT_IOFDONE)) {
@@ -594,7 +663,7 @@ static void tsi721_dma_tasklet(unsigned long data)
 
 		tsi721_clr_stat(bdma_chan);
 		spin_lock(&bdma_chan->lock);
-		desc = tsi721_dma_first_active(bdma_chan);
+		desc = bdma_chan->active_tx;
 
 		if (desc->sg_len == 0) {
 			dma_async_tx_callback callback = NULL;
@@ -606,17 +675,21 @@ static void tsi721_dma_tasklet(unsigned long data)
 				callback = desc->txd.callback;
 				param = desc->txd.callback_param;
 			}
-			list_move(&desc->desc_node, &bdma_chan->free_list);
+			list_add(&desc->desc_node, &bdma_chan->free_list);
+			bdma_chan->active_tx = NULL;
+			if (bdma_chan->active)
+				tsi721_advance_work(bdma_chan, NULL);
 			spin_unlock(&bdma_chan->lock);
 			if (callback)
 				callback(param);
-			spin_lock(&bdma_chan->lock);
+		} else {
+			if (bdma_chan->active)
+				tsi721_advance_work(bdma_chan,
+						    bdma_chan->active_tx);
+			spin_unlock(&bdma_chan->lock);
 		}
-
-		tsi721_advance_work(bdma_chan);
-		spin_unlock(&bdma_chan->lock);
 	}
-
+err_out:
 	/* Re-Enable BDMA channel interrupts */
 	iowrite32(TSI721_DMAC_INT_ALL, bdma_chan->regs + TSI721_DMAC_INTE);
 }
@@ -629,8 +702,9 @@ static dma_cookie_t tsi721_tx_submit(struct dma_async_tx_descriptor *txd)
 
 	/* Check if the descriptor is detached from any lists */
 	if (!list_empty(&desc->desc_node)) {
-		dev_err(bdma_chan->dchan.device->dev,
-			"%s: wrong state of descriptor %p\n", __func__, txd);
+		tsi_err(&bdma_chan->dchan.dev->device,
+			"DMAC%d wrong state of descriptor %p",
+			bdma_chan->id, txd);
 		return -EIO;
 	}
 
@@ -655,25 +729,25 @@ static int tsi721_alloc_chan_resources(struct dma_chan *dchan)
 	struct tsi721_tx_desc *desc = NULL;
 	int i;
 
-	dev_dbg(dchan->device->dev, "%s: for channel %d\n",
-		__func__, bdma_chan->id);
+	tsi_debug(DMA, &dchan->dev->device, "DMAC%d", bdma_chan->id);
 
 	if (bdma_chan->bd_base)
 		return TSI721_DMA_TX_QUEUE_SZ;
 
 	/* Initialize BDMA channel */
 	if (tsi721_bdma_ch_init(bdma_chan, dma_desc_per_channel)) {
-		dev_err(dchan->device->dev, "Unable to initialize data DMA"
-			" channel %d, aborting\n", bdma_chan->id);
+		tsi_err(&dchan->dev->device, "Unable to initialize DMAC%d",
+			bdma_chan->id);
 		return -ENODEV;
 	}
 
 	/* Allocate queue of transaction descriptors */
 	desc = kcalloc(TSI721_DMA_TX_QUEUE_SZ, sizeof(struct tsi721_tx_desc),
-			GFP_KERNEL);
+			GFP_ATOMIC);
 	if (!desc) {
-		dev_err(dchan->device->dev,
-			"Failed to allocate logical descriptors\n");
+		tsi_err(&dchan->dev->device,
+			"DMAC%d Failed to allocate logical descriptors",
+			bdma_chan->id);
 		tsi721_bdma_ch_free(bdma_chan);
 		return -ENOMEM;
 	}
@@ -714,15 +788,11 @@ static void tsi721_free_chan_resources(struct dma_chan *dchan)
 {
 	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
 
-	dev_dbg(dchan->device->dev, "%s: for channel %d\n",
-		__func__, bdma_chan->id);
+	tsi_debug(DMA, &dchan->dev->device, "DMAC%d", bdma_chan->id);
 
 	if (bdma_chan->bd_base == NULL)
 		return;
 
-	BUG_ON(!list_empty(&bdma_chan->active_list));
-	BUG_ON(!list_empty(&bdma_chan->queue));
-
 	tsi721_bdma_interrupt_enable(bdma_chan, 0);
 	bdma_chan->active = false;
 	tsi721_sync_dma_irq(bdma_chan);
@@ -736,20 +806,26 @@ static
 enum dma_status tsi721_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
 				 struct dma_tx_state *txstate)
 {
-	return dma_cookie_status(dchan, cookie, txstate);
+	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
+	enum dma_status	status;
+
+	spin_lock_bh(&bdma_chan->lock);
+	status = dma_cookie_status(dchan, cookie, txstate);
+	spin_unlock_bh(&bdma_chan->lock);
+	return status;
 }
 
 static void tsi721_issue_pending(struct dma_chan *dchan)
 {
 	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
 
-	dev_dbg(dchan->device->dev, "%s: Enter\n", __func__);
+	tsi_debug(DMA, &dchan->dev->device, "DMAC%d", bdma_chan->id);
 
+	spin_lock_bh(&bdma_chan->lock);
 	if (tsi721_dma_is_idle(bdma_chan) && bdma_chan->active) {
-		spin_lock_bh(&bdma_chan->lock);
-		tsi721_advance_work(bdma_chan);
-		spin_unlock_bh(&bdma_chan->lock);
+		tsi721_advance_work(bdma_chan, NULL);
 	}
+	spin_unlock_bh(&bdma_chan->lock);
 }
 
 static
@@ -759,18 +835,19 @@ struct dma_async_tx_descriptor *tsi721_prep_rio_sg(struct dma_chan *dchan,
 			void *tinfo)
 {
 	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
-	struct tsi721_tx_desc *desc, *_d;
+	struct tsi721_tx_desc *desc;
 	struct rio_dma_ext *rext = tinfo;
 	enum dma_rtype rtype;
 	struct dma_async_tx_descriptor *txd = NULL;
 
 	if (!sgl || !sg_len) {
-		dev_err(dchan->device->dev, "%s: No SG list\n", __func__);
-		return NULL;
+		tsi_err(&dchan->dev->device, "DMAC%d No SG list",
+			bdma_chan->id);
+		return ERR_PTR(-EINVAL);
 	}
 
-	dev_dbg(dchan->device->dev, "%s: %s\n", __func__,
-		(dir == DMA_DEV_TO_MEM)?"READ":"WRITE");
+	tsi_debug(DMA, &dchan->dev->device, "DMAC%d %s", bdma_chan->id,
+		  (dir == DMA_DEV_TO_MEM)?"READ":"WRITE");
 
 	if (dir == DMA_DEV_TO_MEM)
 		rtype = NREAD;
@@ -788,30 +865,36 @@ struct dma_async_tx_descriptor *tsi721_prep_rio_sg(struct dma_chan *dchan,
 			break;
 		}
 	} else {
-		dev_err(dchan->device->dev,
-			"%s: Unsupported DMA direction option\n", __func__);
-		return NULL;
+		tsi_err(&dchan->dev->device,
+			"DMAC%d Unsupported DMA direction option",
+			bdma_chan->id);
+		return ERR_PTR(-EINVAL);
 	}
 
 	spin_lock_bh(&bdma_chan->lock);
 
-	list_for_each_entry_safe(desc, _d, &bdma_chan->free_list, desc_node) {
-		if (async_tx_test_ack(&desc->txd)) {
-			list_del_init(&desc->desc_node);
-			desc->destid = rext->destid;
-			desc->rio_addr = rext->rio_addr;
-			desc->rio_addr_u = 0;
-			desc->rtype = rtype;
-			desc->sg_len	= sg_len;
-			desc->sg	= sgl;
-			txd		= &desc->txd;
-			txd->flags	= flags;
-			break;
-		}
+	if (!list_empty(&bdma_chan->free_list)) {
+		desc = list_first_entry(&bdma_chan->free_list,
+				struct tsi721_tx_desc, desc_node);
+		list_del_init(&desc->desc_node);
+		desc->destid = rext->destid;
+		desc->rio_addr = rext->rio_addr;
+		desc->rio_addr_u = 0;
+		desc->rtype = rtype;
+		desc->sg_len	= sg_len;
+		desc->sg	= sgl;
+		txd		= &desc->txd;
+		txd->flags	= flags;
 	}
 
 	spin_unlock_bh(&bdma_chan->lock);
 
+	if (!txd) {
+		tsi_debug(DMA, &dchan->dev->device,
+			  "DMAC%d free TXD is not available", bdma_chan->id);
+		return ERR_PTR(-EBUSY);
+	}
+
 	return txd;
 }
 
@@ -819,16 +902,18 @@ static int tsi721_terminate_all(struct dma_chan *dchan)
 {
 	struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
 	struct tsi721_tx_desc *desc, *_d;
-	u32 dmac_int;
 	LIST_HEAD(list);
 
-	dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
+	tsi_debug(DMA, &dchan->dev->device, "DMAC%d", bdma_chan->id);
 
 	spin_lock_bh(&bdma_chan->lock);
 
 	bdma_chan->active = false;
 
-	if (!tsi721_dma_is_idle(bdma_chan)) {
+	while (!tsi721_dma_is_idle(bdma_chan)) {
+
+		udelay(5);
+#if (0)
 		/* make sure to stop the transfer */
 		iowrite32(TSI721_DMAC_CTL_SUSP,
 			  bdma_chan->regs + TSI721_DMAC_CTL);
@@ -837,9 +922,11 @@ static int tsi721_terminate_all(struct dma_chan *dchan)
 		do {
 			dmac_int = ioread32(bdma_chan->regs + TSI721_DMAC_INT);
 		} while ((dmac_int & TSI721_DMAC_INT_SUSP) == 0);
+#endif
 	}
 
-	list_splice_init(&bdma_chan->active_list, &list);
+	if (bdma_chan->active_tx)
+		list_add(&bdma_chan->active_tx->desc_node, &list);
 	list_splice_init(&bdma_chan->queue, &list);
 
 	list_for_each_entry_safe(desc, _d, &list, desc_node)
@@ -850,12 +937,42 @@ static int tsi721_terminate_all(struct dma_chan *dchan)
 	return 0;
 }
 
+static void tsi721_dma_stop(struct tsi721_bdma_chan *bdma_chan)
+{
+	if (!bdma_chan->active)
+		return;
+	spin_lock_bh(&bdma_chan->lock);
+	if (!tsi721_dma_is_idle(bdma_chan)) {
+		int timeout = 100000;
+
+		/* stop the transfer in progress */
+		iowrite32(TSI721_DMAC_CTL_SUSP,
+			  bdma_chan->regs + TSI721_DMAC_CTL);
+
+		/* Wait until DMA channel stops */
+		while (!tsi721_dma_is_idle(bdma_chan) && --timeout)
+			udelay(1);
+	}
+
+	spin_unlock_bh(&bdma_chan->lock);
+}
+
+void tsi721_dma_stop_all(struct tsi721_device *priv)
+{
+	int i;
+
+	for (i = 0; i < TSI721_DMA_MAXCH; i++) {
+		if (i != TSI721_DMACH_MAINT)
+			tsi721_dma_stop(&priv->bdma[i]);
+	}
+}
+
 int tsi721_register_dma(struct tsi721_device *priv)
 {
 	int i;
 	int nr_channels = 0;
 	int err;
-	struct rio_mport *mport = priv->mport;
+	struct rio_mport *mport = &priv->mport;
 
 	INIT_LIST_HEAD(&mport->dma.channels);
 
@@ -875,7 +992,7 @@ int tsi721_register_dma(struct tsi721_device *priv)
 
 		spin_lock_init(&bdma_chan->lock);
 
-		INIT_LIST_HEAD(&bdma_chan->active_list);
+		bdma_chan->active_tx = NULL;
 		INIT_LIST_HEAD(&bdma_chan->queue);
 		INIT_LIST_HEAD(&bdma_chan->free_list);
 
@@ -901,7 +1018,33 @@ int tsi721_register_dma(struct tsi721_device *priv)
 
 	err = dma_async_device_register(&mport->dma);
 	if (err)
-		dev_err(&priv->pdev->dev, "Failed to register DMA device\n");
+		tsi_err(&priv->pdev->dev, "Failed to register DMA device");
 
 	return err;
 }
+
+void tsi721_unregister_dma(struct tsi721_device *priv)
+{
+	struct rio_mport *mport = &priv->mport;
+	struct dma_chan *chan, *_c;
+	struct tsi721_bdma_chan *bdma_chan;
+
+	tsi721_dma_stop_all(priv);
+	dma_async_device_unregister(&mport->dma);
+
+	list_for_each_entry_safe(chan, _c, &mport->dma.channels,
+					device_node) {
+		bdma_chan = to_tsi721_chan(chan);
+		if (bdma_chan->active) {
+			tsi721_bdma_interrupt_enable(bdma_chan, 0);
+			bdma_chan->active = false;
+			tsi721_sync_dma_irq(bdma_chan);
+			tasklet_kill(&bdma_chan->tasklet);
+			INIT_LIST_HEAD(&bdma_chan->free_list);
+			kfree(bdma_chan->tx_desc);
+			tsi721_bdma_ch_free(bdma_chan);
+		}
+
+		list_del(&chan->device_node);
+	}
+}
diff --git a/drivers/rapidio/rio-driver.c b/drivers/rapidio/rio-driver.c
index f301f059bb85..128350f4d17a 100644
--- a/drivers/rapidio/rio-driver.c
+++ b/drivers/rapidio/rio-driver.c
@@ -131,6 +131,17 @@ static int rio_device_remove(struct device *dev)
 	return 0;
 }
 
+static void rio_device_shutdown(struct device *dev)
+{
+	struct rio_dev *rdev = to_rio_dev(dev);
+	struct rio_driver *rdrv = rdev->driver;
+
+	dev_dbg(dev, "RIO: %s\n", __func__);
+
+	if (rdrv && rdrv->shutdown)
+		rdrv->shutdown(rdev);
+}
+
 /**
  *  rio_register_driver - register a new RIO driver
  *  @rdrv: the RIO driver structure to register
@@ -229,6 +240,7 @@ struct bus_type rio_bus_type = {
 	.bus_groups = rio_bus_groups,
 	.probe = rio_device_probe,
 	.remove = rio_device_remove,
+	.shutdown = rio_device_shutdown,
 	.uevent	= rio_uevent,
 };
 
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index d6a126c17c03..a63a380809d1 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -39,6 +39,13 @@
 
 static void rio_init_em(struct rio_dev *rdev);
 
+struct rio_id_table {
+	u16 start;	/* logical minimal id */
+	u32 max;	/* max number of IDs in table */
+	spinlock_t lock;
+	unsigned long table[0];
+};
+
 static int next_destid = 0;
 static int next_comptag = 1;
 
@@ -62,7 +69,7 @@ static int rio_mport_phys_table[] = {
 static u16 rio_destid_alloc(struct rio_net *net)
 {
 	int destid;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	spin_lock(&idtab->lock);
 	destid = find_first_zero_bit(idtab->table, idtab->max);
@@ -88,7 +95,7 @@ static u16 rio_destid_alloc(struct rio_net *net)
 static int rio_destid_reserve(struct rio_net *net, u16 destid)
 {
 	int oldbit;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	destid -= idtab->start;
 	spin_lock(&idtab->lock);
@@ -106,7 +113,7 @@ static int rio_destid_reserve(struct rio_net *net, u16 destid)
  */
 static void rio_destid_free(struct rio_net *net, u16 destid)
 {
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	destid -= idtab->start;
 	spin_lock(&idtab->lock);
@@ -121,7 +128,7 @@ static void rio_destid_free(struct rio_net *net, u16 destid)
 static u16 rio_destid_first(struct rio_net *net)
 {
 	int destid;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	spin_lock(&idtab->lock);
 	destid = find_first_bit(idtab->table, idtab->max);
@@ -141,7 +148,7 @@ static u16 rio_destid_first(struct rio_net *net)
 static u16 rio_destid_next(struct rio_net *net, u16 from)
 {
 	int destid;
-	struct rio_id_table *idtab = &net->destid_table;
+	struct rio_id_table *idtab = (struct rio_id_table *)net->enum_data;
 
 	spin_lock(&idtab->lock);
 	destid = find_next_bit(idtab->table, idtab->max, from);
@@ -187,19 +194,6 @@ static void rio_set_device_id(struct rio_mport *port, u16 destid, u8 hopcount, u
 }
 
 /**
- * rio_local_set_device_id - Set the base/extended device id for a port
- * @port: RIO master port
- * @did: Device ID value to be written
- *
- * Writes the base/extended device id from a device.
- */
-static void rio_local_set_device_id(struct rio_mport *port, u16 did)
-{
-	rio_local_write_config_32(port, RIO_DID_CSR, RIO_SET_DID(port->sys_size,
-				did));
-}
-
-/**
  * rio_clear_locks- Release all host locks and signal enumeration complete
  * @net: RIO network to run on
  *
@@ -449,9 +443,6 @@ static struct rio_dev *rio_setup_device(struct rio_net *net,
 
 		if (do_enum)
 			rio_route_clr_table(rdev, RIO_GLOBAL_TABLE, 0);
-
-		list_add_tail(&rswitch->node, &net->switches);
-
 	} else {
 		if (do_enum)
 			/*Enable Input Output Port (transmitter reviever)*/
@@ -461,13 +452,9 @@ static struct rio_dev *rio_setup_device(struct rio_net *net,
 			     rdev->comp_tag & RIO_CTAG_UDEVID);
 	}
 
-	rdev->dev.parent = &port->dev;
+	rdev->dev.parent = &net->dev;
 	rio_attach_device(rdev);
-
-	device_initialize(&rdev->dev);
 	rdev->dev.release = rio_release_dev;
-	rio_dev_get(rdev);
-
 	rdev->dma_mask = DMA_BIT_MASK(32);
 	rdev->dev.dma_mask = &rdev->dma_mask;
 	rdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
@@ -480,6 +467,8 @@ static struct rio_dev *rio_setup_device(struct rio_net *net,
 	if (ret)
 		goto cleanup;
 
+	rio_dev_get(rdev);
+
 	return rdev;
 
 cleanup:
@@ -621,8 +610,6 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port,
 	rdev = rio_setup_device(net, port, RIO_ANY_DESTID(port->sys_size),
 					hopcount, 1);
 	if (rdev) {
-		/* Add device to the global and bus/net specific list. */
-		list_add_tail(&rdev->net_list, &net->devices);
 		rdev->prev = prev;
 		if (prev && rio_is_switch(prev))
 			prev->rswitch->nextdev[prev_port] = rdev;
@@ -778,8 +765,6 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid,
 
 	/* Setup new RIO device */
 	if ((rdev = rio_setup_device(net, port, destid, hopcount, 0))) {
-		/* Add device to the global and bus/net specific list. */
-		list_add_tail(&rdev->net_list, &net->devices);
 		rdev->prev = prev;
 		if (prev && rio_is_switch(prev))
 			prev->rswitch->nextdev[prev_port] = rdev;
@@ -864,50 +849,71 @@ static int rio_mport_is_active(struct rio_mport *port)
 	return result & RIO_PORT_N_ERR_STS_PORT_OK;
 }
 
-/**
- * rio_alloc_net- Allocate and configure a new RIO network
- * @port: Master port associated with the RIO network
+static void rio_scan_release_net(struct rio_net *net)
+{
+	pr_debug("RIO-SCAN: %s: net_%d\n", __func__, net->id);
+	kfree(net->enum_data);
+}
+
+static void rio_scan_release_dev(struct device *dev)
+{
+	struct rio_net *net;
+
+	net = to_rio_net(dev);
+	pr_debug("RIO-SCAN: %s: net_%d\n", __func__, net->id);
+	kfree(net);
+}
+
+/*
+ * rio_scan_alloc_net - Allocate and configure a new RIO network
+ * @mport: Master port associated with the RIO network
  * @do_enum: Enumeration/Discovery mode flag
  * @start: logical minimal start id for new net
  *
- * Allocates a RIO network structure, initializes per-network
- * list heads, and adds the associated master port to the
- * network list of associated master ports. Returns a
- * RIO network pointer on success or %NULL on failure.
+ * Allocates a new RIO network structure and initializes enumerator-specific
+ * part of it (if required).
+ * Returns a RIO network pointer on success or %NULL on failure.
  */
-static struct rio_net *rio_alloc_net(struct rio_mport *port,
-					       int do_enum, u16 start)
+static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport,
+					  int do_enum, u16 start)
 {
 	struct rio_net *net;
 
-	net = kzalloc(sizeof(struct rio_net), GFP_KERNEL);
+	net = rio_alloc_net(mport);
+
 	if (net && do_enum) {
-		net->destid_table.table = kcalloc(
-			BITS_TO_LONGS(RIO_MAX_ROUTE_ENTRIES(port->sys_size)),
-			sizeof(long),
-			GFP_KERNEL);
+		struct rio_id_table *idtab;
+		size_t size;
+
+		size = sizeof(struct rio_id_table) +
+				BITS_TO_LONGS(
+					RIO_MAX_ROUTE_ENTRIES(mport->sys_size)
+					) * sizeof(long);
+
+		idtab = kzalloc(size, GFP_KERNEL);
 
-		if (net->destid_table.table == NULL) {
+		if (idtab == NULL) {
 			pr_err("RIO: failed to allocate destID table\n");
-			kfree(net);
+			rio_free_net(net);
 			net = NULL;
 		} else {
-			net->destid_table.start = start;
-			net->destid_table.max =
-					RIO_MAX_ROUTE_ENTRIES(port->sys_size);
-			spin_lock_init(&net->destid_table.lock);
+			net->enum_data = idtab;
+			net->release = rio_scan_release_net;
+			idtab->start = start;
+			idtab->max = RIO_MAX_ROUTE_ENTRIES(mport->sys_size);
+			spin_lock_init(&idtab->lock);
 		}
 	}
 
 	if (net) {
-		INIT_LIST_HEAD(&net->node);
-		INIT_LIST_HEAD(&net->devices);
-		INIT_LIST_HEAD(&net->switches);
-		INIT_LIST_HEAD(&net->mports);
-		list_add_tail(&port->nnode, &net->mports);
-		net->hport = port;
-		net->id = port->id;
+		net->id = mport->id;
+		net->hport = mport;
+		dev_set_name(&net->dev, "rnet_%d", net->id);
+		net->dev.parent = &mport->dev;
+		net->dev.release = rio_scan_release_dev;
+		rio_add_net(net);
 	}
+
 	return net;
 }
 
@@ -968,17 +974,6 @@ static void rio_init_em(struct rio_dev *rdev)
 }
 
 /**
- * rio_pw_enable - Enables/disables port-write handling by a master port
- * @port: Master port associated with port-write handling
- * @enable:  1=enable,  0=disable
- */
-static void rio_pw_enable(struct rio_mport *port, int enable)
-{
-	if (port->ops->pwenable)
-		port->ops->pwenable(port, enable);
-}
-
-/**
  * rio_enum_mport- Start enumeration through a master port
  * @mport: Master port to send transactions
  * @flags: Enumeration control flags
@@ -1016,7 +1011,7 @@ static int rio_enum_mport(struct rio_mport *mport, u32 flags)
 
 	/* If master port has an active link, allocate net and enum peers */
 	if (rio_mport_is_active(mport)) {
-		net = rio_alloc_net(mport, 1, 0);
+		net = rio_scan_alloc_net(mport, 1, 0);
 		if (!net) {
 			printk(KERN_ERR "RIO: failed to allocate new net\n");
 			rc = -ENOMEM;
@@ -1133,7 +1128,7 @@ static int rio_disc_mport(struct rio_mport *mport, u32 flags)
 enum_done:
 		pr_debug("RIO: ... enumeration done\n");
 
-		net = rio_alloc_net(mport, 0, 0);
+		net = rio_scan_alloc_net(mport, 0, 0);
 		if (!net) {
 			printk(KERN_ERR "RIO: Failed to allocate new net\n");
 			goto bail;
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index e220edc85c68..0dcaa660cba1 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -30,6 +30,20 @@
 
 #include "rio.h"
 
+/*
+ * struct rio_pwrite - RIO portwrite event
+ * @node:    Node in list of doorbell events
+ * @pwcback: Doorbell event callback
+ * @context: Handler specific context to pass on event
+ */
+struct rio_pwrite {
+	struct list_head node;
+
+	int (*pwcback)(struct rio_mport *mport, void *context,
+		       union rio_pw_msg *msg, int step);
+	void *context;
+};
+
 MODULE_DESCRIPTION("RapidIO Subsystem Core");
 MODULE_AUTHOR("Matt Porter <mporter@kernel.crashing.org>");
 MODULE_AUTHOR("Alexandre Bounine <alexandre.bounine@idt.com>");
@@ -42,6 +56,7 @@ MODULE_PARM_DESC(hdid,
 	"Destination ID assignment to local RapidIO controllers");
 
 static LIST_HEAD(rio_devices);
+static LIST_HEAD(rio_nets);
 static DEFINE_SPINLOCK(rio_global_list_lock);
 
 static LIST_HEAD(rio_mports);
@@ -68,6 +83,89 @@ u16 rio_local_get_device_id(struct rio_mport *port)
 }
 
 /**
+ * rio_query_mport - Query mport device attributes
+ * @port: mport device to query
+ * @mport_attr: mport attributes data structure
+ *
+ * Returns attributes of specified mport through the
+ * pointer to attributes data structure.
+ */
+int rio_query_mport(struct rio_mport *port,
+		    struct rio_mport_attr *mport_attr)
+{
+	if (!port->ops->query_mport)
+		return -ENODATA;
+	return port->ops->query_mport(port, mport_attr);
+}
+EXPORT_SYMBOL(rio_query_mport);
+
+/**
+ * rio_alloc_net- Allocate and initialize a new RIO network data structure
+ * @mport: Master port associated with the RIO network
+ *
+ * Allocates a RIO network structure, initializes per-network
+ * list heads, and adds the associated master port to the
+ * network list of associated master ports. Returns a
+ * RIO network pointer on success or %NULL on failure.
+ */
+struct rio_net *rio_alloc_net(struct rio_mport *mport)
+{
+	struct rio_net *net;
+
+	net = kzalloc(sizeof(struct rio_net), GFP_KERNEL);
+	if (net) {
+		INIT_LIST_HEAD(&net->node);
+		INIT_LIST_HEAD(&net->devices);
+		INIT_LIST_HEAD(&net->switches);
+		INIT_LIST_HEAD(&net->mports);
+		mport->net = net;
+	}
+	return net;
+}
+EXPORT_SYMBOL_GPL(rio_alloc_net);
+
+int rio_add_net(struct rio_net *net)
+{
+	int err;
+
+	err = device_register(&net->dev);
+	if (err)
+		return err;
+	spin_lock(&rio_global_list_lock);
+	list_add_tail(&net->node, &rio_nets);
+	spin_unlock(&rio_global_list_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rio_add_net);
+
+void rio_free_net(struct rio_net *net)
+{
+	spin_lock(&rio_global_list_lock);
+	if (!list_empty(&net->node))
+		list_del(&net->node);
+	spin_unlock(&rio_global_list_lock);
+	if (net->release)
+		net->release(net);
+	device_unregister(&net->dev);
+}
+EXPORT_SYMBOL_GPL(rio_free_net);
+
+/**
+ * rio_local_set_device_id - Set the base/extended device id for a port
+ * @port: RIO master port
+ * @did: Device ID value to be written
+ *
+ * Writes the base/extended device id from a device.
+ */
+void rio_local_set_device_id(struct rio_mport *port, u16 did)
+{
+	rio_local_write_config_32(port, RIO_DID_CSR,
+				  RIO_SET_DID(port->sys_size, did));
+}
+EXPORT_SYMBOL_GPL(rio_local_set_device_id);
+
+/**
  * rio_add_device- Adds a RIO device to the device model
  * @rdev: RIO device
  *
@@ -79,12 +177,19 @@ int rio_add_device(struct rio_dev *rdev)
 {
 	int err;
 
-	err = device_add(&rdev->dev);
+	atomic_set(&rdev->state, RIO_DEVICE_RUNNING);
+	err = device_register(&rdev->dev);
 	if (err)
 		return err;
 
 	spin_lock(&rio_global_list_lock);
 	list_add_tail(&rdev->global_list, &rio_devices);
+	if (rdev->net) {
+		list_add_tail(&rdev->net_list, &rdev->net->devices);
+		if (rdev->pef & RIO_PEF_SWITCH)
+			list_add_tail(&rdev->rswitch->node,
+				      &rdev->net->switches);
+	}
 	spin_unlock(&rio_global_list_lock);
 
 	rio_create_sysfs_dev_files(rdev);
@@ -93,6 +198,33 @@ int rio_add_device(struct rio_dev *rdev)
 }
 EXPORT_SYMBOL_GPL(rio_add_device);
 
+/*
+ * rio_del_device - removes a RIO device from the device model
+ * @rdev: RIO device
+ * @state: device state to set during removal process
+ *
+ * Removes the RIO device to the kernel device list and subsystem's device list.
+ * Clears sysfs entries for the removed device.
+ */
+void rio_del_device(struct rio_dev *rdev, enum rio_device_state state)
+{
+	pr_debug("RIO: %s: removing %s\n", __func__, rio_name(rdev));
+	atomic_set(&rdev->state, state);
+	spin_lock(&rio_global_list_lock);
+	list_del(&rdev->global_list);
+	if (rdev->net) {
+		list_del(&rdev->net_list);
+		if (rdev->pef & RIO_PEF_SWITCH) {
+			list_del(&rdev->rswitch->node);
+			kfree(rdev->rswitch->route_table);
+		}
+	}
+	spin_unlock(&rio_global_list_lock);
+	rio_remove_sysfs_dev_files(rdev);
+	device_unregister(&rdev->dev);
+}
+EXPORT_SYMBOL_GPL(rio_del_device);
+
 /**
  * rio_request_inb_mbox - request inbound mailbox service
  * @mport: RIO master port from which to allocate the mailbox resource
@@ -258,7 +390,9 @@ rio_setup_inb_dbell(struct rio_mport *mport, void *dev_id, struct resource *res,
 	dbell->dinb = dinb;
 	dbell->dev_id = dev_id;
 
+	mutex_lock(&mport->lock);
 	list_add_tail(&dbell->node, &mport->dbells);
+	mutex_unlock(&mport->lock);
 
       out:
 	return rc;
@@ -322,12 +456,15 @@ int rio_release_inb_dbell(struct rio_mport *mport, u16 start, u16 end)
 	int rc = 0, found = 0;
 	struct rio_dbell *dbell;
 
+	mutex_lock(&mport->lock);
 	list_for_each_entry(dbell, &mport->dbells, node) {
 		if ((dbell->res->start == start) && (dbell->res->end == end)) {
+			list_del(&dbell->node);
 			found = 1;
 			break;
 		}
 	}
+	mutex_unlock(&mport->lock);
 
 	/* If we can't find an exact match, fail */
 	if (!found) {
@@ -335,9 +472,6 @@ int rio_release_inb_dbell(struct rio_mport *mport, u16 start, u16 end)
 		goto out;
 	}
 
-	/* Delete from list */
-	list_del(&dbell->node);
-
 	/* Release the doorbell resource */
 	rc = release_resource(dbell->res);
 
@@ -394,7 +528,71 @@ int rio_release_outb_dbell(struct rio_dev *rdev, struct resource *res)
 }
 
 /**
- * rio_request_inb_pwrite - request inbound port-write message service
+ * rio_add_mport_pw_handler - add port-write message handler into the list
+ *                            of mport specific pw handlers
+ * @mport:   RIO master port to bind the portwrite callback
+ * @context: Handler specific context to pass on event
+ * @pwcback: Callback to execute when portwrite is received
+ *
+ * Returns 0 if the request has been satisfied.
+ */
+int rio_add_mport_pw_handler(struct rio_mport *mport, void *context,
+			     int (*pwcback)(struct rio_mport *mport,
+			     void *context, union rio_pw_msg *msg, int step))
+{
+	int rc = 0;
+	struct rio_pwrite *pwrite;
+
+	pwrite = kzalloc(sizeof(struct rio_pwrite), GFP_KERNEL);
+	if (!pwrite) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	pwrite->pwcback = pwcback;
+	pwrite->context = context;
+	mutex_lock(&mport->lock);
+	list_add_tail(&pwrite->node, &mport->pwrites);
+	mutex_unlock(&mport->lock);
+out:
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rio_add_mport_pw_handler);
+
+/**
+ * rio_del_mport_pw_handler - remove port-write message handler from the list
+ *                            of mport specific pw handlers
+ * @mport:   RIO master port to bind the portwrite callback
+ * @context: Registered handler specific context to pass on event
+ * @pwcback: Registered callback function
+ *
+ * Returns 0 if the request has been satisfied.
+ */
+int rio_del_mport_pw_handler(struct rio_mport *mport, void *context,
+			     int (*pwcback)(struct rio_mport *mport,
+			     void *context, union rio_pw_msg *msg, int step))
+{
+	int rc = -EINVAL;
+	struct rio_pwrite *pwrite;
+
+	mutex_lock(&mport->lock);
+	list_for_each_entry(pwrite, &mport->pwrites, node) {
+		if (pwrite->pwcback == pwcback && pwrite->context == context) {
+			list_del(&pwrite->node);
+			kfree(pwrite);
+			rc = 0;
+			break;
+		}
+	}
+	mutex_unlock(&mport->lock);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rio_del_mport_pw_handler);
+
+/**
+ * rio_request_inb_pwrite - request inbound port-write message service for
+ *                          specific RapidIO device
  * @rdev: RIO device to which register inbound port-write callback routine
  * @pwcback: Callback routine to execute when port-write is received
  *
@@ -419,6 +617,7 @@ EXPORT_SYMBOL_GPL(rio_request_inb_pwrite);
 
 /**
  * rio_release_inb_pwrite - release inbound port-write message service
+ *                          associated with specific RapidIO device
  * @rdev: RIO device which registered for inbound port-write callback
  *
  * Removes callback from the rio_dev structure. Returns 0 if the request
@@ -440,6 +639,24 @@ int rio_release_inb_pwrite(struct rio_dev *rdev)
 EXPORT_SYMBOL_GPL(rio_release_inb_pwrite);
 
 /**
+ * rio_pw_enable - Enables/disables port-write handling by a master port
+ * @mport: Master port associated with port-write handling
+ * @enable:  1=enable,  0=disable
+ */
+void rio_pw_enable(struct rio_mport *mport, int enable)
+{
+	if (mport->ops->pwenable) {
+		mutex_lock(&mport->lock);
+
+		if ((enable && ++mport->pwe_refcnt == 1) ||
+		    (!enable && mport->pwe_refcnt && --mport->pwe_refcnt == 0))
+			mport->ops->pwenable(mport, enable);
+		mutex_unlock(&mport->lock);
+	}
+}
+EXPORT_SYMBOL_GPL(rio_pw_enable);
+
+/**
  * rio_map_inb_region -- Map inbound memory region.
  * @mport: Master port.
  * @local: physical address of memory region to be mapped
@@ -483,6 +700,56 @@ void rio_unmap_inb_region(struct rio_mport *mport, dma_addr_t lstart)
 EXPORT_SYMBOL_GPL(rio_unmap_inb_region);
 
 /**
+ * rio_map_outb_region -- Map outbound memory region.
+ * @mport: Master port.
+ * @destid: destination id window points to
+ * @rbase: RIO base address window translates to
+ * @size: Size of the memory region
+ * @rflags: Flags for mapping.
+ * @local: physical address of memory region mapped
+ *
+ * Return: 0 -- Success.
+ *
+ * This function will create the mapping from RIO space to local memory.
+ */
+int rio_map_outb_region(struct rio_mport *mport, u16 destid, u64 rbase,
+			u32 size, u32 rflags, dma_addr_t *local)
+{
+	int rc = 0;
+	unsigned long flags;
+
+	if (!mport->ops->map_outb)
+		return -ENODEV;
+
+	spin_lock_irqsave(&rio_mmap_lock, flags);
+	rc = mport->ops->map_outb(mport, destid, rbase, size,
+		rflags, local);
+	spin_unlock_irqrestore(&rio_mmap_lock, flags);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(rio_map_outb_region);
+
+/**
+ * rio_unmap_inb_region -- Unmap the inbound memory region
+ * @mport: Master port
+ * @destid: destination id mapping points to
+ * @rstart: RIO base address window translates to
+ */
+void rio_unmap_outb_region(struct rio_mport *mport, u16 destid, u64 rstart)
+{
+	unsigned long flags;
+
+	if (!mport->ops->unmap_outb)
+		return;
+
+	spin_lock_irqsave(&rio_mmap_lock, flags);
+	mport->ops->unmap_outb(mport, destid, rstart);
+	spin_unlock_irqrestore(&rio_mmap_lock, flags);
+}
+EXPORT_SYMBOL_GPL(rio_unmap_outb_region);
+
+/**
  * rio_mport_get_physefb - Helper function that returns register offset
  *                      for Physical Layer Extended Features Block.
  * @port: Master port to issue transaction
@@ -864,52 +1131,66 @@ rd_err:
 }
 
 /**
- * rio_inb_pwrite_handler - process inbound port-write message
+ * rio_inb_pwrite_handler - inbound port-write message handler
+ * @mport:  mport device associated with port-write
  * @pw_msg: pointer to inbound port-write message
  *
  * Processes an inbound port-write message. Returns 0 if the request
  * has been satisfied.
  */
-int rio_inb_pwrite_handler(union rio_pw_msg *pw_msg)
+int rio_inb_pwrite_handler(struct rio_mport *mport, union rio_pw_msg *pw_msg)
 {
 	struct rio_dev *rdev;
 	u32 err_status, em_perrdet, em_ltlerrdet;
 	int rc, portnum;
-
-	rdev = rio_get_comptag((pw_msg->em.comptag & RIO_CTAG_UDEVID), NULL);
-	if (rdev == NULL) {
-		/* Device removed or enumeration error */
-		pr_debug("RIO: %s No matching device for CTag 0x%08x\n",
-			__func__, pw_msg->em.comptag);
-		return -EIO;
-	}
-
-	pr_debug("RIO: Port-Write message from %s\n", rio_name(rdev));
+	struct rio_pwrite *pwrite;
 
 #ifdef DEBUG_PW
 	{
-	u32 i;
-	for (i = 0; i < RIO_PW_MSG_SIZE/sizeof(u32);) {
+		u32 i;
+
+		pr_debug("%s: PW to mport_%d:\n", __func__, mport->id);
+		for (i = 0; i < RIO_PW_MSG_SIZE / sizeof(u32); i = i + 4) {
 			pr_debug("0x%02x: %08x %08x %08x %08x\n",
-				 i*4, pw_msg->raw[i], pw_msg->raw[i + 1],
-				 pw_msg->raw[i + 2], pw_msg->raw[i + 3]);
-			i += 4;
-	}
+				i * 4, pw_msg->raw[i], pw_msg->raw[i + 1],
+				pw_msg->raw[i + 2], pw_msg->raw[i + 3]);
+		}
 	}
 #endif
 
-	/* Call an external service function (if such is registered
-	 * for this device). This may be the service for endpoints that send
-	 * device-specific port-write messages. End-point messages expected
-	 * to be handled completely by EP specific device driver.
+	rdev = rio_get_comptag((pw_msg->em.comptag & RIO_CTAG_UDEVID), NULL);
+	if (rdev) {
+		pr_debug("RIO: Port-Write message from %s\n", rio_name(rdev));
+	} else {
+		pr_debug("RIO: %s No matching device for CTag 0x%08x\n",
+			__func__, pw_msg->em.comptag);
+	}
+
+	/* Call a device-specific handler (if it is registered for the device).
+	 * This may be the service for endpoints that send device-specific
+	 * port-write messages. End-point messages expected to be handled
+	 * completely by EP specific device driver.
 	 * For switches rc==0 signals that no standard processing required.
 	 */
-	if (rdev->pwcback != NULL) {
+	if (rdev && rdev->pwcback) {
 		rc = rdev->pwcback(rdev, pw_msg, 0);
 		if (rc == 0)
 			return 0;
 	}
 
+	mutex_lock(&mport->lock);
+	list_for_each_entry(pwrite, &mport->pwrites, node)
+		pwrite->pwcback(mport, pwrite->context, pw_msg, 0);
+	mutex_unlock(&mport->lock);
+
+	if (!rdev)
+		return 0;
+
+	/*
+	 * FIXME: The code below stays as it was before for now until we decide
+	 * how to do default PW handling in combination with per-mport callbacks
+	 */
+
 	portnum = pw_msg->em.is_port & 0xFF;
 
 	/* Check if device and route to it are functional:
@@ -1909,32 +2190,31 @@ static int rio_get_hdid(int index)
 	return hdid[index];
 }
 
-int rio_register_mport(struct rio_mport *port)
+int rio_mport_initialize(struct rio_mport *mport)
 {
-	struct rio_scan_node *scan = NULL;
-	int res = 0;
-
 	if (next_portid >= RIO_MAX_MPORTS) {
 		pr_err("RIO: reached specified max number of mports\n");
-		return 1;
+		return -ENODEV;
 	}
 
-	port->id = next_portid++;
-	port->host_deviceid = rio_get_hdid(port->id);
-	port->nscan = NULL;
+	atomic_set(&mport->state, RIO_DEVICE_INITIALIZING);
+	mport->id = next_portid++;
+	mport->host_deviceid = rio_get_hdid(mport->id);
+	mport->nscan = NULL;
+	mutex_init(&mport->lock);
+	mport->pwe_refcnt = 0;
+	INIT_LIST_HEAD(&mport->pwrites);
 
-	dev_set_name(&port->dev, "rapidio%d", port->id);
-	port->dev.class = &rio_mport_class;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rio_mport_initialize);
 
-	res = device_register(&port->dev);
-	if (res)
-		dev_err(&port->dev, "RIO: mport%d registration failed ERR=%d\n",
-			port->id, res);
-	else
-		dev_dbg(&port->dev, "RIO: mport%d registered\n", port->id);
+int rio_register_mport(struct rio_mport *port)
+{
+	struct rio_scan_node *scan = NULL;
+	int res = 0;
 
 	mutex_lock(&rio_mport_list_lock);
-	list_add_tail(&port->node, &rio_mports);
 
 	/*
 	 * Check if there are any registered enumeration/discovery operations
@@ -1948,12 +2228,73 @@ int rio_register_mport(struct rio_mport *port)
 				break;
 		}
 	}
+
+	list_add_tail(&port->node, &rio_mports);
 	mutex_unlock(&rio_mport_list_lock);
 
+	dev_set_name(&port->dev, "rapidio%d", port->id);
+	port->dev.class = &rio_mport_class;
+	atomic_set(&port->state, RIO_DEVICE_RUNNING);
+
+	res = device_register(&port->dev);
+	if (res)
+		dev_err(&port->dev, "RIO: mport%d registration failed ERR=%d\n",
+			port->id, res);
+	else
+		dev_dbg(&port->dev, "RIO: registered mport%d\n", port->id);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(rio_register_mport);
+
+static int rio_mport_cleanup_callback(struct device *dev, void *data)
+{
+	struct rio_dev *rdev = to_rio_dev(dev);
+
+	if (dev->bus == &rio_bus_type)
+		rio_del_device(rdev, RIO_DEVICE_SHUTDOWN);
+	return 0;
+}
+
+static int rio_net_remove_children(struct rio_net *net)
+{
+	/*
+	 * Unregister all RapidIO devices residing on this net (this will
+	 * invoke notification of registered subsystem interfaces as well).
+	 */
+	device_for_each_child(&net->dev, NULL, rio_mport_cleanup_callback);
+	return 0;
+}
+
+int rio_unregister_mport(struct rio_mport *port)
+{
 	pr_debug("RIO: %s %s id=%d\n", __func__, port->name, port->id);
+
+	/* Transition mport to the SHUTDOWN state */
+	if (atomic_cmpxchg(&port->state,
+			   RIO_DEVICE_RUNNING,
+			   RIO_DEVICE_SHUTDOWN) != RIO_DEVICE_RUNNING) {
+		pr_err("RIO: %s unexpected state transition for mport %s\n",
+			__func__, port->name);
+	}
+
+	if (port->net && port->net->hport == port) {
+		rio_net_remove_children(port->net);
+		rio_free_net(port->net);
+	}
+
+	/*
+	 * Unregister all RapidIO devices attached to this mport (this will
+	 * invoke notification of registered subsystem interfaces as well).
+	 */
+	mutex_lock(&rio_mport_list_lock);
+	list_del(&port->node);
+	mutex_unlock(&rio_mport_list_lock);
+	device_unregister(&port->dev);
+
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rio_register_mport);
+EXPORT_SYMBOL_GPL(rio_unregister_mport);
 
 EXPORT_SYMBOL_GPL(rio_local_get_device_id);
 EXPORT_SYMBOL_GPL(rio_get_device);
diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h
index 2d0550e08ea2..625d09add001 100644
--- a/drivers/rapidio/rio.h
+++ b/drivers/rapidio/rio.h
@@ -28,6 +28,7 @@ extern u32 rio_mport_get_efb(struct rio_mport *port, int local, u16 destid,
 extern int rio_mport_chk_dev_access(struct rio_mport *mport, u16 destid,
 				    u8 hopcount);
 extern int rio_create_sysfs_dev_files(struct rio_dev *rdev);
+extern void rio_remove_sysfs_dev_files(struct rio_dev *rdev);
 extern int rio_lock_device(struct rio_mport *port, u16 destid,
 			u8 hopcount, int wait_ms);
 extern int rio_unlock_device(struct rio_mport *port, u16 destid, u8 hopcount);
@@ -38,7 +39,11 @@ extern int rio_route_get_entry(struct rio_dev *rdev, u16 table,
 extern int rio_route_clr_table(struct rio_dev *rdev, u16 table, int lock);
 extern int rio_set_port_lockout(struct rio_dev *rdev, u32 pnum, int lock);
 extern struct rio_dev *rio_get_comptag(u32 comp_tag, struct rio_dev *from);
+extern struct rio_net *rio_alloc_net(struct rio_mport *mport);
+extern int rio_add_net(struct rio_net *net);
+extern void rio_free_net(struct rio_net *net);
 extern int rio_add_device(struct rio_dev *rdev);
+extern void rio_del_device(struct rio_dev *rdev, enum rio_device_state state);
 extern int rio_enable_rx_tx_port(struct rio_mport *port, int local, u16 destid,
 				 u8 hopcount, u8 port_num);
 extern int rio_register_scan(int mport_id, struct rio_scan *scan_ops);
diff --git a/drivers/reset/Makefile b/drivers/reset/Makefile
index 4d7178e46afa..a1fc8eda79f3 100644
--- a/drivers/reset/Makefile
+++ b/drivers/reset/Makefile
@@ -2,6 +2,7 @@ obj-y += core.o
 obj-$(CONFIG_ARCH_LPC18XX) += reset-lpc18xx.o
 obj-$(CONFIG_ARCH_SOCFPGA) += reset-socfpga.o
 obj-$(CONFIG_ARCH_BERLIN) += reset-berlin.o
+obj-$(CONFIG_MACH_PISTACHIO) += reset-pistachio.o
 obj-$(CONFIG_ARCH_SUNXI) += reset-sunxi.o
 obj-$(CONFIG_ARCH_STI) += sti/
 obj-$(CONFIG_ARCH_HISI) += hisilicon/
diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index 87376638948d..f15f150b79da 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -45,9 +45,6 @@ struct reset_control {
 static int of_reset_simple_xlate(struct reset_controller_dev *rcdev,
 			  const struct of_phandle_args *reset_spec)
 {
-	if (WARN_ON(reset_spec->args_count != rcdev->of_reset_n_cells))
-		return -EINVAL;
-
 	if (reset_spec->args[0] >= rcdev->nr_resets)
 		return -EINVAL;
 
@@ -152,7 +149,7 @@ EXPORT_SYMBOL_GPL(reset_control_status);
 struct reset_control *of_reset_control_get_by_index(struct device_node *node,
 					   int index)
 {
-	struct reset_control *rstc = ERR_PTR(-EPROBE_DEFER);
+	struct reset_control *rstc;
 	struct reset_controller_dev *r, *rcdev;
 	struct of_phandle_args args;
 	int rstc_id;
@@ -178,6 +175,11 @@ struct reset_control *of_reset_control_get_by_index(struct device_node *node,
 		return ERR_PTR(-EPROBE_DEFER);
 	}
 
+	if (WARN_ON(args.args_count != rcdev->of_reset_n_cells)) {
+		mutex_unlock(&reset_controller_list_mutex);
+		return ERR_PTR(-EINVAL);
+	}
+
 	rstc_id = rcdev->of_xlate(rcdev, &args);
 	if (rstc_id < 0) {
 		mutex_unlock(&reset_controller_list_mutex);
diff --git a/drivers/reset/hisilicon/hi6220_reset.c b/drivers/reset/hisilicon/hi6220_reset.c
index 7787a9b1cc67..8f55fd4a2630 100644
--- a/drivers/reset/hisilicon/hi6220_reset.c
+++ b/drivers/reset/hisilicon/hi6220_reset.c
@@ -57,7 +57,7 @@ static int hi6220_reset_deassert(struct reset_controller_dev *rc_dev,
 	return 0;
 }
 
-static struct reset_control_ops hi6220_reset_ops = {
+static const struct reset_control_ops hi6220_reset_ops = {
 	.assert = hi6220_reset_assert,
 	.deassert = hi6220_reset_deassert,
 };
@@ -83,9 +83,7 @@ static int hi6220_reset_probe(struct platform_device *pdev)
 	data->rc_dev.ops = &hi6220_reset_ops;
 	data->rc_dev.of_node = pdev->dev.of_node;
 
-	reset_controller_register(&data->rc_dev);
-
-	return 0;
+	return reset_controller_register(&data->rc_dev);
 }
 
 static const struct of_device_id hi6220_reset_match[] = {
diff --git a/drivers/reset/reset-ath79.c b/drivers/reset/reset-ath79.c
index 692fc890e94b..ccb940a8d9fb 100644
--- a/drivers/reset/reset-ath79.c
+++ b/drivers/reset/reset-ath79.c
@@ -70,7 +70,7 @@ static int ath79_reset_status(struct reset_controller_dev *rcdev,
 	return !!(val & BIT(id));
 }
 
-static struct reset_control_ops ath79_reset_ops = {
+static const struct reset_control_ops ath79_reset_ops = {
 	.assert = ath79_reset_assert,
 	.deassert = ath79_reset_deassert,
 	.status = ath79_reset_status,
diff --git a/drivers/reset/reset-berlin.c b/drivers/reset/reset-berlin.c
index 970b1ad60293..369f3917fd8e 100644
--- a/drivers/reset/reset-berlin.c
+++ b/drivers/reset/reset-berlin.c
@@ -46,7 +46,7 @@ static int berlin_reset_reset(struct reset_controller_dev *rcdev,
 	return 0;
 }
 
-static struct reset_control_ops berlin_reset_ops = {
+static const struct reset_control_ops berlin_reset_ops = {
 	.reset	= berlin_reset_reset,
 };
 
@@ -55,9 +55,6 @@ static int berlin_reset_xlate(struct reset_controller_dev *rcdev,
 {
 	unsigned offset, bit;
 
-	if (WARN_ON(reset_spec->args_count != rcdev->of_reset_n_cells))
-		return -EINVAL;
-
 	offset = reset_spec->args[0];
 	bit = reset_spec->args[1];
 
diff --git a/drivers/reset/reset-lpc18xx.c b/drivers/reset/reset-lpc18xx.c
index 70922e9ac27f..3b8a4f5a1ff6 100644
--- a/drivers/reset/reset-lpc18xx.c
+++ b/drivers/reset/reset-lpc18xx.c
@@ -136,7 +136,7 @@ static int lpc18xx_rgu_status(struct reset_controller_dev *rcdev,
 	return !(readl(rc->base + offset) & bit);
 }
 
-static struct reset_control_ops lpc18xx_rgu_ops = {
+static const struct reset_control_ops lpc18xx_rgu_ops = {
 	.reset		= lpc18xx_rgu_reset,
 	.assert		= lpc18xx_rgu_assert,
 	.deassert	= lpc18xx_rgu_deassert,
diff --git a/drivers/reset/reset-pistachio.c b/drivers/reset/reset-pistachio.c
new file mode 100644
index 000000000000..72a97a15a4c8
--- /dev/null
+++ b/drivers/reset/reset-pistachio.c
@@ -0,0 +1,154 @@
+/*
+ * Pistachio SoC Reset Controller driver
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ *
+ * Author: Damien Horsley <Damien.Horsley@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset-controller.h>
+#include <linux/slab.h>
+#include <linux/mfd/syscon.h>
+
+#include <dt-bindings/reset/pistachio-resets.h>
+
+#define	PISTACHIO_SOFT_RESET		0
+
+struct pistachio_reset_data {
+	struct reset_controller_dev	rcdev;
+	struct regmap			*periph_regs;
+};
+
+static inline int pistachio_reset_shift(unsigned long id)
+{
+	switch (id) {
+	case PISTACHIO_RESET_I2C0:
+	case PISTACHIO_RESET_I2C1:
+	case PISTACHIO_RESET_I2C2:
+	case PISTACHIO_RESET_I2C3:
+	case PISTACHIO_RESET_I2S_IN:
+	case PISTACHIO_RESET_PRL_OUT:
+	case PISTACHIO_RESET_SPDIF_OUT:
+	case PISTACHIO_RESET_SPI:
+	case PISTACHIO_RESET_PWM_PDM:
+	case PISTACHIO_RESET_UART0:
+	case PISTACHIO_RESET_UART1:
+	case PISTACHIO_RESET_QSPI:
+	case PISTACHIO_RESET_MDC:
+	case PISTACHIO_RESET_SDHOST:
+	case PISTACHIO_RESET_ETHERNET:
+	case PISTACHIO_RESET_IR:
+	case PISTACHIO_RESET_HASH:
+	case PISTACHIO_RESET_TIMER:
+		return id;
+	case PISTACHIO_RESET_I2S_OUT:
+	case PISTACHIO_RESET_SPDIF_IN:
+	case PISTACHIO_RESET_EVT:
+		return id + 6;
+	case PISTACHIO_RESET_USB_H:
+	case PISTACHIO_RESET_USB_PR:
+	case PISTACHIO_RESET_USB_PHY_PR:
+	case PISTACHIO_RESET_USB_PHY_PON:
+		return id + 7;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int pistachio_reset_assert(struct reset_controller_dev *rcdev,
+				  unsigned long id)
+{
+	struct pistachio_reset_data *rd;
+	u32 mask;
+	int shift;
+
+	rd = container_of(rcdev, struct pistachio_reset_data, rcdev);
+	shift = pistachio_reset_shift(id);
+	if (shift < 0)
+		return shift;
+	mask = BIT(shift);
+
+	return regmap_update_bits(rd->periph_regs, PISTACHIO_SOFT_RESET,
+				  mask, mask);
+}
+
+static int pistachio_reset_deassert(struct reset_controller_dev *rcdev,
+				    unsigned long id)
+{
+	struct pistachio_reset_data *rd;
+	u32 mask;
+	int shift;
+
+	rd = container_of(rcdev, struct pistachio_reset_data, rcdev);
+	shift = pistachio_reset_shift(id);
+	if (shift < 0)
+		return shift;
+	mask = BIT(shift);
+
+	return regmap_update_bits(rd->periph_regs, PISTACHIO_SOFT_RESET,
+				  mask, 0);
+}
+
+static const struct reset_control_ops pistachio_reset_ops = {
+	.assert		= pistachio_reset_assert,
+	.deassert	= pistachio_reset_deassert,
+};
+
+static int pistachio_reset_probe(struct platform_device *pdev)
+{
+	struct pistachio_reset_data *rd;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = pdev->dev.of_node;
+
+	rd = devm_kzalloc(dev, sizeof(*rd), GFP_KERNEL);
+	if (!rd)
+		return -ENOMEM;
+
+	rd->periph_regs = syscon_node_to_regmap(np->parent);
+	if (IS_ERR(rd->periph_regs))
+		return PTR_ERR(rd->periph_regs);
+
+	rd->rcdev.owner = THIS_MODULE;
+	rd->rcdev.nr_resets = PISTACHIO_RESET_MAX + 1;
+	rd->rcdev.ops = &pistachio_reset_ops;
+	rd->rcdev.of_node = np;
+
+	return reset_controller_register(&rd->rcdev);
+}
+
+static int pistachio_reset_remove(struct platform_device *pdev)
+{
+	struct pistachio_reset_data *data = platform_get_drvdata(pdev);
+
+	reset_controller_unregister(&data->rcdev);
+
+	return 0;
+}
+
+static const struct of_device_id pistachio_reset_dt_ids[] = {
+	 { .compatible = "img,pistachio-reset", },
+	 { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, pistachio_reset_dt_ids);
+
+static struct platform_driver pistachio_reset_driver = {
+	.probe	= pistachio_reset_probe,
+	.remove	= pistachio_reset_remove,
+	.driver = {
+		.name		= "pistachio-reset",
+		.of_match_table	= pistachio_reset_dt_ids,
+	},
+};
+module_platform_driver(pistachio_reset_driver);
+
+MODULE_AUTHOR("Damien Horsley <Damien.Horsley@imgtec.com>");
+MODULE_DESCRIPTION("Pistacho Reset Controller Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c
index b7d773d9248c..cd05a7032b17 100644
--- a/drivers/reset/reset-socfpga.c
+++ b/drivers/reset/reset-socfpga.c
@@ -90,7 +90,7 @@ static int socfpga_reset_status(struct reset_controller_dev *rcdev,
 	return !(reg & BIT(offset));
 }
 
-static struct reset_control_ops socfpga_reset_ops = {
+static const struct reset_control_ops socfpga_reset_ops = {
 	.assert		= socfpga_reset_assert,
 	.deassert	= socfpga_reset_deassert,
 	.status		= socfpga_reset_status,
diff --git a/drivers/reset/reset-sunxi.c b/drivers/reset/reset-sunxi.c
index 8d41a18da17f..677f86555212 100644
--- a/drivers/reset/reset-sunxi.c
+++ b/drivers/reset/reset-sunxi.c
@@ -70,7 +70,7 @@ static int sunxi_reset_deassert(struct reset_controller_dev *rcdev,
 	return 0;
 }
 
-static struct reset_control_ops sunxi_reset_ops = {
+static const struct reset_control_ops sunxi_reset_ops = {
 	.assert		= sunxi_reset_assert,
 	.deassert	= sunxi_reset_deassert,
 };
diff --git a/drivers/reset/reset-zynq.c b/drivers/reset/reset-zynq.c
index c6b3cd8b40ad..a7e87bc45885 100644
--- a/drivers/reset/reset-zynq.c
+++ b/drivers/reset/reset-zynq.c
@@ -86,7 +86,7 @@ static int zynq_reset_status(struct reset_controller_dev *rcdev,
 	return !!(reg & BIT(offset));
 }
 
-static struct reset_control_ops zynq_reset_ops = {
+static const struct reset_control_ops zynq_reset_ops = {
 	.assert		= zynq_reset_assert,
 	.deassert	= zynq_reset_deassert,
 	.status		= zynq_reset_status,
diff --git a/drivers/reset/sti/reset-syscfg.c b/drivers/reset/sti/reset-syscfg.c
index 1600cc7557f5..9bd57a5eee72 100644
--- a/drivers/reset/sti/reset-syscfg.c
+++ b/drivers/reset/sti/reset-syscfg.c
@@ -134,7 +134,7 @@ static int syscfg_reset_status(struct reset_controller_dev *rcdev,
 	return rst->active_low ? !ret_val : !!ret_val;
 }
 
-static struct reset_control_ops syscfg_reset_ops = {
+static const struct reset_control_ops syscfg_reset_ops = {
 	.reset    = syscfg_reset_dev,
 	.assert   = syscfg_reset_assert,
 	.deassert = syscfg_reset_deassert,
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 544bd3493852..3e84315c6f12 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -589,7 +589,7 @@ config RTC_DRV_RV3029_HWMON
 	default y
 	help
 	  Say Y here if you want to expose temperature sensor data on
-	  rtc-rv3029c2.
+	  rtc-rv3029.
 
 config RTC_DRV_RV8803
 	tristate "Micro Crystal RV8803"
diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c
index d41bbcd653f6..ba0d61934d35 100644
--- a/drivers/rtc/rtc-abx80x.c
+++ b/drivers/rtc/rtc-abx80x.c
@@ -49,7 +49,20 @@
 
 #define ABX8XX_REG_CD_TIMER_CTL	0x18
 
+#define ABX8XX_REG_OSC		0x1c
+#define ABX8XX_OSC_FOS		BIT(3)
+#define ABX8XX_OSC_BOS		BIT(4)
+#define ABX8XX_OSC_ACAL_512	BIT(5)
+#define ABX8XX_OSC_ACAL_1024	BIT(6)
+
+#define ABX8XX_OSC_OSEL		BIT(7)
+
+#define ABX8XX_REG_OSS		0x1d
+#define ABX8XX_OSS_OF		BIT(1)
+#define ABX8XX_OSS_OMODE	BIT(4)
+
 #define ABX8XX_REG_CFG_KEY	0x1f
+#define ABX8XX_CFG_KEY_OSC	0xa1
 #define ABX8XX_CFG_KEY_MISC	0x9d
 
 #define ABX8XX_REG_ID0		0x28
@@ -81,6 +94,20 @@ static struct abx80x_cap abx80x_caps[] = {
 	[ABX80X] = {.pn = 0}
 };
 
+static int abx80x_is_rc_mode(struct i2c_client *client)
+{
+	int flags = 0;
+
+	flags =  i2c_smbus_read_byte_data(client, ABX8XX_REG_OSS);
+	if (flags < 0) {
+		dev_err(&client->dev,
+			"Failed to read autocalibration attribute\n");
+		return flags;
+	}
+
+	return (flags & ABX8XX_OSS_OMODE) ? 1 : 0;
+}
+
 static int abx80x_enable_trickle_charger(struct i2c_client *client,
 					 u8 trickle_cfg)
 {
@@ -112,7 +139,23 @@ static int abx80x_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	unsigned char buf[8];
-	int err;
+	int err, flags, rc_mode = 0;
+
+	/* Read the Oscillator Failure only in XT mode */
+	rc_mode = abx80x_is_rc_mode(client);
+	if (rc_mode < 0)
+		return rc_mode;
+
+	if (!rc_mode) {
+		flags = i2c_smbus_read_byte_data(client, ABX8XX_REG_OSS);
+		if (flags < 0)
+			return flags;
+
+		if (flags & ABX8XX_OSS_OF) {
+			dev_err(dev, "Oscillator failure, data is invalid.\n");
+			return -EINVAL;
+		}
+	}
 
 	err = i2c_smbus_read_i2c_block_data(client, ABX8XX_REG_HTH,
 					    sizeof(buf), buf);
@@ -140,7 +183,7 @@ static int abx80x_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	unsigned char buf[8];
-	int err;
+	int err, flags;
 
 	if (tm->tm_year < 100)
 		return -EINVAL;
@@ -161,6 +204,18 @@ static int abx80x_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		return -EIO;
 	}
 
+	/* Clear the OF bit of Oscillator Status Register */
+	flags = i2c_smbus_read_byte_data(client, ABX8XX_REG_OSS);
+	if (flags < 0)
+		return flags;
+
+	err = i2c_smbus_write_byte_data(client, ABX8XX_REG_OSS,
+					flags & ~ABX8XX_OSS_OF);
+	if (err < 0) {
+		dev_err(&client->dev, "Unable to write oscillator status register\n");
+		return err;
+	}
+
 	return 0;
 }
 
@@ -248,6 +303,174 @@ static int abx80x_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	return 0;
 }
 
+static int abx80x_rtc_set_autocalibration(struct device *dev,
+					  int autocalibration)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int retval, flags = 0;
+
+	if ((autocalibration != 0) && (autocalibration != 1024) &&
+	    (autocalibration != 512)) {
+		dev_err(dev, "autocalibration value outside permitted range\n");
+		return -EINVAL;
+	}
+
+	flags = i2c_smbus_read_byte_data(client, ABX8XX_REG_OSC);
+	if (flags < 0)
+		return flags;
+
+	if (autocalibration == 0) {
+		flags &= ~(ABX8XX_OSC_ACAL_512 | ABX8XX_OSC_ACAL_1024);
+	} else if (autocalibration == 1024) {
+		/* 1024 autocalibration is 0x10 */
+		flags |= ABX8XX_OSC_ACAL_1024;
+		flags &= ~(ABX8XX_OSC_ACAL_512);
+	} else {
+		/* 512 autocalibration is 0x11 */
+		flags |= (ABX8XX_OSC_ACAL_1024 | ABX8XX_OSC_ACAL_512);
+	}
+
+	/* Unlock write access to Oscillator Control Register */
+	retval = i2c_smbus_write_byte_data(client, ABX8XX_REG_CFG_KEY,
+					   ABX8XX_CFG_KEY_OSC);
+	if (retval < 0) {
+		dev_err(dev, "Failed to write CONFIG_KEY register\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(client, ABX8XX_REG_OSC, flags);
+
+	return retval;
+}
+
+static int abx80x_rtc_get_autocalibration(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int flags = 0, autocalibration;
+
+	flags =  i2c_smbus_read_byte_data(client, ABX8XX_REG_OSC);
+	if (flags < 0)
+		return flags;
+
+	if (flags & ABX8XX_OSC_ACAL_512)
+		autocalibration = 512;
+	else if (flags & ABX8XX_OSC_ACAL_1024)
+		autocalibration = 1024;
+	else
+		autocalibration = 0;
+
+	return autocalibration;
+}
+
+static ssize_t autocalibration_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	int retval;
+	unsigned long autocalibration = 0;
+
+	retval = kstrtoul(buf, 10, &autocalibration);
+	if (retval < 0) {
+		dev_err(dev, "Failed to store RTC autocalibration attribute\n");
+		return -EINVAL;
+	}
+
+	retval = abx80x_rtc_set_autocalibration(dev, autocalibration);
+
+	return retval ? retval : count;
+}
+
+static ssize_t autocalibration_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	int autocalibration = 0;
+
+	autocalibration = abx80x_rtc_get_autocalibration(dev);
+	if (autocalibration < 0) {
+		dev_err(dev, "Failed to read RTC autocalibration\n");
+		sprintf(buf, "0\n");
+		return autocalibration;
+	}
+
+	return sprintf(buf, "%d\n", autocalibration);
+}
+
+static DEVICE_ATTR_RW(autocalibration);
+
+static ssize_t oscillator_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int retval, flags, rc_mode = 0;
+
+	if (strncmp(buf, "rc", 2) == 0) {
+		rc_mode = 1;
+	} else if (strncmp(buf, "xtal", 4) == 0) {
+		rc_mode = 0;
+	} else {
+		dev_err(dev, "Oscillator selection value outside permitted ones\n");
+		return -EINVAL;
+	}
+
+	flags =  i2c_smbus_read_byte_data(client, ABX8XX_REG_OSC);
+	if (flags < 0)
+		return flags;
+
+	if (rc_mode == 0)
+		flags &= ~(ABX8XX_OSC_OSEL);
+	else
+		flags |= (ABX8XX_OSC_OSEL);
+
+	/* Unlock write access on Oscillator Control register */
+	retval = i2c_smbus_write_byte_data(client, ABX8XX_REG_CFG_KEY,
+					   ABX8XX_CFG_KEY_OSC);
+	if (retval < 0) {
+		dev_err(dev, "Failed to write CONFIG_KEY register\n");
+		return retval;
+	}
+
+	retval = i2c_smbus_write_byte_data(client, ABX8XX_REG_OSC, flags);
+	if (retval < 0) {
+		dev_err(dev, "Failed to write Oscillator Control register\n");
+		return retval;
+	}
+
+	return retval ? retval : count;
+}
+
+static ssize_t oscillator_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	int rc_mode = 0;
+	struct i2c_client *client = to_i2c_client(dev);
+
+	rc_mode = abx80x_is_rc_mode(client);
+
+	if (rc_mode < 0) {
+		dev_err(dev, "Failed to read RTC oscillator selection\n");
+		sprintf(buf, "\n");
+		return rc_mode;
+	}
+
+	if (rc_mode)
+		return sprintf(buf, "rc\n");
+	else
+		return sprintf(buf, "xtal\n");
+}
+
+static DEVICE_ATTR_RW(oscillator);
+
+static struct attribute *rtc_calib_attrs[] = {
+	&dev_attr_autocalibration.attr,
+	&dev_attr_oscillator.attr,
+	NULL,
+};
+
+static const struct attribute_group rtc_calib_attr_group = {
+	.attrs		= rtc_calib_attrs,
+};
+
 static int abx80x_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct i2c_client *client = to_i2c_client(dev);
@@ -303,6 +526,13 @@ static int abx80x_dt_trickle_cfg(struct device_node *np)
 	return (trickle_cfg | i);
 }
 
+static void rtc_calib_remove_sysfs_group(void *_dev)
+{
+	struct device *dev = _dev;
+
+	sysfs_remove_group(&dev->kobj, &rtc_calib_attr_group);
+}
+
 static int abx80x_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
@@ -405,6 +635,24 @@ static int abx80x_probe(struct i2c_client *client,
 		}
 	}
 
+	/* Export sysfs entries */
+	err = sysfs_create_group(&(&client->dev)->kobj, &rtc_calib_attr_group);
+	if (err) {
+		dev_err(&client->dev, "Failed to create sysfs group: %d\n",
+			err);
+		return err;
+	}
+
+	err = devm_add_action(&client->dev, rtc_calib_remove_sysfs_group,
+			      &client->dev);
+	if (err) {
+		rtc_calib_remove_sysfs_group(&client->dev);
+		dev_err(&client->dev,
+			"Failed to add sysfs cleanup action: %d\n",
+			err);
+		return err;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/rtc/rtc-asm9260.c b/drivers/rtc/rtc-asm9260.c
index 14e08c4c1a01..355fdb97a006 100644
--- a/drivers/rtc/rtc-asm9260.c
+++ b/drivers/rtc/rtc-asm9260.c
@@ -255,7 +255,7 @@ static const struct rtc_class_ops asm9260_rtc_ops = {
 	.alarm_irq_enable	= asm9260_alarm_irq_enable,
 };
 
-static int __init asm9260_rtc_probe(struct platform_device *pdev)
+static int asm9260_rtc_probe(struct platform_device *pdev)
 {
 	struct asm9260_rtc_priv *priv;
 	struct device *dev = &pdev->dev;
@@ -323,7 +323,7 @@ err_return:
 	return ret;
 }
 
-static int __exit asm9260_rtc_remove(struct platform_device *pdev)
+static int asm9260_rtc_remove(struct platform_device *pdev)
 {
 	struct asm9260_rtc_priv *priv = platform_get_drvdata(pdev);
 
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index a82937e2f824..d107a8e72a7d 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -176,7 +176,13 @@ static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 		bin2bcd(tm->tm_mday) | (buf[M41T80_REG_DAY] & ~0x3f);
 	buf[M41T80_REG_MON] =
 		bin2bcd(tm->tm_mon + 1) | (buf[M41T80_REG_MON] & ~0x1f);
+
 	/* assume 20YY not 19YY */
+	if (tm->tm_year < 100 || tm->tm_year > 199) {
+		dev_err(&client->dev, "Year must be between 2000 and 2099. It's %d.\n",
+			tm->tm_year + 1900);
+		return -EINVAL;
+	}
 	buf[M41T80_REG_YEAR] = bin2bcd(tm->tm_year % 100);
 
 	if (i2c_transfer(client->adapter, msgs, 1) != 1) {
diff --git a/drivers/rtc/rtc-mcp795.c b/drivers/rtc/rtc-mcp795.c
index 1c91ce8a6d75..025bb33b9cd2 100644
--- a/drivers/rtc/rtc-mcp795.c
+++ b/drivers/rtc/rtc-mcp795.c
@@ -20,6 +20,7 @@
 #include <linux/printk.h>
 #include <linux/spi/spi.h>
 #include <linux/rtc.h>
+#include <linux/of.h>
 
 /* MCP795 Instructions, see datasheet table 3-1 */
 #define MCP795_EEREAD	0x03
@@ -183,9 +184,18 @@ static int mcp795_probe(struct spi_device *spi)
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id mcp795_of_match[] = {
+	{ .compatible = "maxim,mcp795" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, mcp795_of_match);
+#endif
+
 static struct spi_driver mcp795_driver = {
 		.driver = {
 				.name = "rtc-mcp795",
+				.of_match_table = of_match_ptr(mcp795_of_match),
 		},
 		.probe = mcp795_probe,
 };
diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index 8d9f35ceb808..f623038e586e 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -61,11 +61,14 @@ static irqreturn_t rv8803_handle_irq(int irq, void *dev_id)
 	struct i2c_client *client = dev_id;
 	struct rv8803_data *rv8803 = i2c_get_clientdata(client);
 	unsigned long events = 0;
-	int flags;
+	int flags, try = 0;
 
 	mutex_lock(&rv8803->flags_lock);
 
-	flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+	do {
+		flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+		try++;
+	} while ((flags == -ENXIO) && (try < 3));
 	if (flags <= 0) {
 		mutex_unlock(&rv8803->flags_lock);
 		return IRQ_NONE;
@@ -424,7 +427,7 @@ static int rv8803_probe(struct i2c_client *client,
 {
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
 	struct rv8803_data *rv8803;
-	int err, flags;
+	int err, flags, try = 0;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA |
 				     I2C_FUNC_SMBUS_I2C_BLOCK)) {
@@ -441,7 +444,16 @@ static int rv8803_probe(struct i2c_client *client,
 	rv8803->client = client;
 	i2c_set_clientdata(client, rv8803);
 
-	flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+	/*
+	 * There is a 60µs window where the RTC may not reply on the i2c bus in
+	 * that case, the transfer is not ACKed. In that case, ensure there are
+	 * multiple attempts.
+	 */
+	do {
+		flags = i2c_smbus_read_byte_data(client, RV8803_FLAG);
+		try++;
+	} while ((flags == -ENXIO) && (try < 3));
+
 	if (flags < 0)
 		return flags;
 
@@ -476,8 +488,12 @@ static int rv8803_probe(struct i2c_client *client,
 		return PTR_ERR(rv8803->rtc);
 	}
 
-	err = i2c_smbus_write_byte_data(rv8803->client, RV8803_EXT,
-					RV8803_EXT_WADA);
+	try = 0;
+	do {
+		err = i2c_smbus_write_byte_data(rv8803->client, RV8803_EXT,
+						RV8803_EXT_WADA);
+		try++;
+	} while ((err == -ENXIO) && (try < 3));
 	if (err)
 		return err;
 
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index ffb860d18701..d01ad7e8078e 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -501,18 +501,27 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 
 	info->rtc_clk = devm_clk_get(&pdev->dev, "rtc");
 	if (IS_ERR(info->rtc_clk)) {
-		dev_err(&pdev->dev, "failed to find rtc clock\n");
-		return PTR_ERR(info->rtc_clk);
+		ret = PTR_ERR(info->rtc_clk);
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "failed to find rtc clock\n");
+		else
+			dev_dbg(&pdev->dev, "probe deferred due to missing rtc clk\n");
+		return ret;
 	}
 	clk_prepare_enable(info->rtc_clk);
 
 	if (info->data->needs_src_clk) {
 		info->rtc_src_clk = devm_clk_get(&pdev->dev, "rtc_src");
 		if (IS_ERR(info->rtc_src_clk)) {
-			dev_err(&pdev->dev,
-				"failed to find rtc source clock\n");
+			ret = PTR_ERR(info->rtc_src_clk);
+			if (ret != -EPROBE_DEFER)
+				dev_err(&pdev->dev,
+					"failed to find rtc source clock\n");
+			else
+				dev_dbg(&pdev->dev,
+					"probe deferred due to missing rtc src clk\n");
 			clk_disable_unprepare(info->rtc_clk);
-			return PTR_ERR(info->rtc_src_clk);
+			return ret;
 		}
 		clk_prepare_enable(info->rtc_src_clk);
 	}
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.h b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.h
index 22dd8d670e4a..2fd9c76fc21c 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.h
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.h
@@ -25,21 +25,4 @@
 
 #define T5_ISS_VALID		(1 << 18)
 
-struct ulptx_idata {
-	__be32 cmd_more;
-	__be32 len;
-};
-
-struct cpl_rx_data_ddp {
-	union opcode_tid ot;
-	__be16 urg;
-	__be16 len;
-	__be32 seq;
-	union {
-		__be32 nxt_seq;
-		__be32 ddp_report;
-	};
-	__be32 ulp_crc;
-	__be32 ddpvld;
-};
 #endif	/* __CXGB4I_H__ */
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 5bcdf8dd6fb0..a404a41e871c 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -332,7 +332,7 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 {
 	int rel_port = -1, group_id;
 	struct alua_port_group *pg, *old_pg = NULL;
-	bool pg_updated;
+	bool pg_updated = false;
 	unsigned long flags;
 
 	group_id = scsi_vpd_tpg_id(sdev, &rel_port);
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 266b909fe854..f3032ca5051b 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -958,23 +958,22 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
 	case FCPIO_INVALID_PARAM:    /* some parameter in request invalid */
 	case FCPIO_REQ_NOT_SUPPORTED:/* request type is not supported */
 	default:
-		shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n",
-			     fnic_fcpio_status_to_str(hdr_status));
 		sc->result = (DID_ERROR << 16) | icmnd_cmpl->scsi_status;
 		break;
 	}
 
-	if (hdr_status != FCPIO_SUCCESS) {
-		atomic64_inc(&fnic_stats->io_stats.io_failures);
-		shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n",
-			     fnic_fcpio_status_to_str(hdr_status));
-	}
 	/* Break link with the SCSI command */
 	CMD_SP(sc) = NULL;
 	CMD_FLAGS(sc) |= FNIC_IO_DONE;
 
 	spin_unlock_irqrestore(io_lock, flags);
 
+	if (hdr_status != FCPIO_SUCCESS) {
+		atomic64_inc(&fnic_stats->io_stats.io_failures);
+		shost_printk(KERN_ERR, fnic->lport->host, "hdr status = %s\n",
+			     fnic_fcpio_status_to_str(hdr_status));
+	}
+
 	fnic_release_ioreq_buf(fnic, io_req, sc);
 
 	mempool_free(io_req, fnic->io_req_pool);
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index a544366a367e..f57d02c3b6cf 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -2860,7 +2860,7 @@ lpfc_online(struct lpfc_hba *phba)
 	}
 
 	vports = lpfc_create_vport_work_array(phba);
-	if (vports != NULL)
+	if (vports != NULL) {
 		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			struct Scsi_Host *shost;
 			shost = lpfc_shost_from_vport(vports[i]);
@@ -2877,7 +2877,8 @@ lpfc_online(struct lpfc_hba *phba)
 			}
 			spin_unlock_irq(shost->host_lock);
 		}
-		lpfc_destroy_vport_work_array(phba, vports);
+	}
+	lpfc_destroy_vport_work_array(phba, vports);
 
 	lpfc_unblock_mgmt_io(phba);
 	return 0;
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index 4484e63033a5..fce414a2cd76 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -2097,7 +2097,7 @@ struct megasas_instance {
 	u8 UnevenSpanSupport;
 
 	u8 supportmax256vd;
-	u8 allow_fw_scan;
+	u8 pd_list_not_supported;
 	u16 fw_supported_vd_count;
 	u16 fw_supported_pd_count;
 
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 5c08568ccfbf..e6ebc7ae2df1 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1838,7 +1838,7 @@ static int megasas_slave_configure(struct scsi_device *sdev)
 	struct megasas_instance *instance;
 
 	instance = megasas_lookup_instance(sdev->host->host_no);
-	if (instance->allow_fw_scan) {
+	if (instance->pd_list_not_supported) {
 		if (sdev->channel < MEGASAS_MAX_PD_CHANNELS &&
 			sdev->type == TYPE_DISK) {
 			pd_index = (sdev->channel * MEGASAS_MAX_DEV_PER_CHANNEL) +
@@ -1874,7 +1874,8 @@ static int megasas_slave_alloc(struct scsi_device *sdev)
 		pd_index =
 			(sdev->channel * MEGASAS_MAX_DEV_PER_CHANNEL) +
 			sdev->id;
-		if ((instance->allow_fw_scan || instance->pd_list[pd_index].driveState ==
+		if ((instance->pd_list_not_supported ||
+			instance->pd_list[pd_index].driveState ==
 			MR_PD_STATE_SYSTEM)) {
 			goto scan_target;
 		}
@@ -4087,7 +4088,13 @@ megasas_get_pd_list(struct megasas_instance *instance)
 
 	switch (ret) {
 	case DCMD_FAILED:
-		megaraid_sas_kill_hba(instance);
+		dev_info(&instance->pdev->dev, "MR_DCMD_PD_LIST_QUERY "
+			"failed/not supported by firmware\n");
+
+		if (instance->ctrl_context)
+			megaraid_sas_kill_hba(instance);
+		else
+			instance->pd_list_not_supported = 1;
 		break;
 	case DCMD_TIMEOUT:
 
@@ -5034,7 +5041,6 @@ static int megasas_init_fw(struct megasas_instance *instance)
 	case PCI_DEVICE_ID_DELL_PERC5:
 	default:
 		instance->instancet = &megasas_instance_template_xscale;
-		instance->allow_fw_scan = 1;
 		break;
 	}
 
@@ -6650,12 +6656,13 @@ out:
 	}
 
 	for (i = 0; i < ioc->sge_count; i++) {
-		if (kbuff_arr[i])
+		if (kbuff_arr[i]) {
 			dma_free_coherent(&instance->pdev->dev,
 					  le32_to_cpu(kern_sge32[i].length),
 					  kbuff_arr[i],
 					  le32_to_cpu(kern_sge32[i].phys_addr));
 			kbuff_arr[i] = NULL;
+		}
 	}
 
 	megasas_return_cmd(instance, cmd);
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index ceb452dd143c..47f8b9b49bac 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2963,6 +2963,7 @@ struct qlt_hw_data {
 
 	uint8_t tgt_node_name[WWN_SIZE];
 
+	struct dentry *dfs_tgt_sess;
 	struct list_head q_full_list;
 	uint32_t num_pend_cmds;
 	uint32_t num_qfull_cmds_alloc;
diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c
index cd8b96a4b0dd..34272fde8a5b 100644
--- a/drivers/scsi/qla2xxx/qla_dfs.c
+++ b/drivers/scsi/qla2xxx/qla_dfs.c
@@ -13,6 +13,47 @@ static struct dentry *qla2x00_dfs_root;
 static atomic_t qla2x00_dfs_root_count;
 
 static int
+qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused)
+{
+	scsi_qla_host_t *vha = s->private;
+	struct qla_hw_data *ha = vha->hw;
+	unsigned long flags;
+	struct qla_tgt_sess *sess = NULL;
+	struct qla_tgt *tgt= vha->vha_tgt.qla_tgt;
+
+	seq_printf(s, "%s\n",vha->host_str);
+	if (tgt) {
+		seq_printf(s, "Port ID   Port Name                Handle\n");
+
+		spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+		list_for_each_entry(sess, &tgt->sess_list, sess_list_entry) {
+			seq_printf(s, "%02x:%02x:%02x  %8phC  %d\n",
+					   sess->s_id.b.domain,sess->s_id.b.area,
+					   sess->s_id.b.al_pa,	sess->port_name,
+					   sess->loop_id);
+		}
+		spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+	}
+
+	return 0;
+}
+
+static int
+qla2x00_dfs_tgt_sess_open(struct inode *inode, struct file *file)
+{
+	scsi_qla_host_t *vha = inode->i_private;
+	return single_open(file, qla2x00_dfs_tgt_sess_show, vha);
+}
+
+
+static const struct file_operations dfs_tgt_sess_ops = {
+	.open		= qla2x00_dfs_tgt_sess_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int
 qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused)
 {
 	struct scsi_qla_host *vha = s->private;
@@ -248,6 +289,15 @@ create_nodes:
 		    "Unable to create debugfs fce node.\n");
 		goto out;
 	}
+
+	ha->tgt.dfs_tgt_sess = debugfs_create_file("tgt_sess",
+		S_IRUSR, ha->dfs_dir, vha, &dfs_tgt_sess_ops);
+	if (!ha->tgt.dfs_tgt_sess) {
+		ql_log(ql_log_warn, vha, 0xffff,
+			"Unable to create debugFS tgt_sess node.\n");
+		goto out;
+	}
+
 out:
 	return 0;
 }
@@ -257,6 +307,11 @@ qla2x00_dfs_remove(scsi_qla_host_t *vha)
 {
 	struct qla_hw_data *ha = vha->hw;
 
+	if (ha->tgt.dfs_tgt_sess) {
+		debugfs_remove(ha->tgt.dfs_tgt_sess);
+		ha->tgt.dfs_tgt_sess = NULL;
+	}
+
 	if (ha->dfs_fw_resource_cnt) {
 		debugfs_remove(ha->dfs_fw_resource_cnt);
 		ha->dfs_fw_resource_cnt = NULL;
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index ee967becd257..8a44d1541eb4 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -641,7 +641,8 @@ void qlt_unreg_sess(struct qla_tgt_sess *sess)
 {
 	struct scsi_qla_host *vha = sess->vha;
 
-	vha->hw->tgt.tgt_ops->clear_nacl_from_fcport_map(sess);
+	if (sess->se_sess)
+		vha->hw->tgt.tgt_ops->clear_nacl_from_fcport_map(sess);
 
 	if (!list_empty(&sess->del_list_entry))
 		list_del_init(&sess->del_list_entry);
@@ -856,8 +857,12 @@ static void qlt_del_sess_work_fn(struct delayed_work *work)
 			ql_dbg(ql_dbg_tgt_mgt, vha, 0xf004,
 			    "Timeout: sess %p about to be deleted\n",
 			    sess);
-			ha->tgt.tgt_ops->shutdown_sess(sess);
-			ha->tgt.tgt_ops->put_sess(sess);
+			if (sess->se_sess) {
+				ha->tgt.tgt_ops->shutdown_sess(sess);
+				ha->tgt.tgt_ops->put_sess(sess);
+			} else {
+				qlt_unreg_sess(sess);
+			}
 		} else {
 			schedule_delayed_work(&tgt->sess_del_work,
 			    sess->expires - elapsed);
@@ -879,7 +884,6 @@ static struct qla_tgt_sess *qlt_create_sess(
 	struct qla_hw_data *ha = vha->hw;
 	struct qla_tgt_sess *sess;
 	unsigned long flags;
-	unsigned char be_sid[3];
 
 	/* Check to avoid double sessions */
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
@@ -905,6 +909,14 @@ static struct qla_tgt_sess *qlt_create_sess(
 			if (sess->deleted)
 				qlt_undelete_sess(sess);
 
+			if (!sess->se_sess) {
+				if (ha->tgt.tgt_ops->check_initiator_node_acl(vha,
+				    &sess->port_name[0], sess) < 0) {
+					spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+					return NULL;
+				}
+			}
+
 			kref_get(&sess->se_sess->sess_kref);
 			ha->tgt.tgt_ops->update_sess(sess, fcport->d_id, fcport->loop_id,
 						(fcport->flags & FCF_CONF_COMP_SUPPORTED));
@@ -948,26 +960,6 @@ static struct qla_tgt_sess *qlt_create_sess(
 	    "Adding sess %p to tgt %p via ->check_initiator_node_acl()\n",
 	    sess, vha->vha_tgt.qla_tgt);
 
-	be_sid[0] = sess->s_id.b.domain;
-	be_sid[1] = sess->s_id.b.area;
-	be_sid[2] = sess->s_id.b.al_pa;
-	/*
-	 * Determine if this fc_port->port_name is allowed to access
-	 * target mode using explict NodeACLs+MappedLUNs, or using
-	 * TPG demo mode.  If this is successful a target mode FC nexus
-	 * is created.
-	 */
-	if (ha->tgt.tgt_ops->check_initiator_node_acl(vha,
-	    &fcport->port_name[0], sess, &be_sid[0], fcport->loop_id) < 0) {
-		kfree(sess);
-		return NULL;
-	}
-	/*
-	 * Take an extra reference to ->sess_kref here to handle qla_tgt_sess
-	 * access across ->tgt.sess_lock reaquire.
-	 */
-	kref_get(&sess->se_sess->sess_kref);
-
 	sess->conf_compl_supported = (fcport->flags & FCF_CONF_COMP_SUPPORTED);
 	BUILD_BUG_ON(sizeof(sess->port_name) != sizeof(fcport->port_name));
 	memcpy(sess->port_name, fcport->port_name, sizeof(sess->port_name));
@@ -985,6 +977,23 @@ static struct qla_tgt_sess *qlt_create_sess(
 	    fcport->loop_id, sess->s_id.b.domain, sess->s_id.b.area,
 	    sess->s_id.b.al_pa, sess->conf_compl_supported ?  "" : "not ");
 
+	/*
+	 * Determine if this fc_port->port_name is allowed to access
+	 * target mode using explict NodeACLs+MappedLUNs, or using
+	 * TPG demo mode.  If this is successful a target mode FC nexus
+	 * is created.
+	 */
+	if (ha->tgt.tgt_ops->check_initiator_node_acl(vha,
+	    &fcport->port_name[0], sess) < 0) {
+		return NULL;
+	} else {
+		/*
+		 * Take an extra reference to ->sess_kref here to handle qla_tgt_sess
+		 * access across ->tgt.sess_lock reaquire.
+		 */
+		kref_get(&sess->se_sess->sess_kref);
+	}
+
 	return sess;
 }
 
@@ -1872,15 +1881,17 @@ static int qlt_check_reserve_free_req(struct scsi_qla_host *vha,
 		else
 			vha->req->cnt = vha->req->length -
 			    (vha->req->ring_index - cnt);
-	}
 
-	if (unlikely(vha->req->cnt < (req_cnt + 2))) {
-		ql_dbg(ql_dbg_io, vha, 0x305a,
-		    "qla_target(%d): There is no room in the request ring: vha->req->ring_index=%d, vha->req->cnt=%d, req_cnt=%d Req-out=%d Req-in=%d Req-Length=%d\n",
-		    vha->vp_idx, vha->req->ring_index,
-		    vha->req->cnt, req_cnt, cnt, cnt_in, vha->req->length);
-		return -EAGAIN;
+		if (unlikely(vha->req->cnt < (req_cnt + 2))) {
+			ql_dbg(ql_dbg_io, vha, 0x305a,
+			    "qla_target(%d): There is no room in the request ring: vha->req->ring_index=%d, vha->req->cnt=%d, req_cnt=%d Req-out=%d Req-in=%d Req-Length=%d\n",
+			    vha->vp_idx, vha->req->ring_index,
+			    vha->req->cnt, req_cnt, cnt, cnt_in,
+			    vha->req->length);
+			return -EAGAIN;
+		}
 	}
+
 	vha->req->cnt -= req_cnt;
 
 	return 0;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 22a6a767fe07..d857feeb6514 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -731,7 +731,7 @@ struct qla_tgt_func_tmpl {
 	void (*free_session)(struct qla_tgt_sess *);
 
 	int (*check_initiator_node_acl)(struct scsi_qla_host *, unsigned char *,
-					void *, uint8_t *, uint16_t);
+					struct qla_tgt_sess *);
 	void (*update_sess)(struct qla_tgt_sess *, port_id_t, uint16_t, bool);
 	struct qla_tgt_sess *(*find_sess_by_loop_id)(struct scsi_qla_host *,
 						const uint16_t);
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 1808a01cfb7e..c1461d225f08 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1406,6 +1406,39 @@ static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess)
 	transport_deregister_session(sess->se_sess);
 }
 
+static int tcm_qla2xxx_session_cb(struct se_portal_group *se_tpg,
+				  struct se_session *se_sess, void *p)
+{
+	struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
+				struct tcm_qla2xxx_tpg, se_tpg);
+	struct tcm_qla2xxx_lport *lport = tpg->lport;
+	struct qla_hw_data *ha = lport->qla_vha->hw;
+	struct se_node_acl *se_nacl = se_sess->se_node_acl;
+	struct tcm_qla2xxx_nacl *nacl = container_of(se_nacl,
+				struct tcm_qla2xxx_nacl, se_node_acl);
+	struct qla_tgt_sess *qlat_sess = p;
+	uint16_t loop_id = qlat_sess->loop_id;
+	unsigned long flags;
+	unsigned char be_sid[3];
+
+	be_sid[0] = qlat_sess->s_id.b.domain;
+	be_sid[1] = qlat_sess->s_id.b.area;
+	be_sid[2] = qlat_sess->s_id.b.al_pa;
+
+	/*
+	 * And now setup se_nacl and session pointers into HW lport internal
+	 * mappings for fabric S_ID and LOOP_ID.
+	 */
+	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
+	tcm_qla2xxx_set_sess_by_s_id(lport, se_nacl, nacl,
+				     se_sess, qlat_sess, be_sid);
+	tcm_qla2xxx_set_sess_by_loop_id(lport, se_nacl, nacl,
+					se_sess, qlat_sess, loop_id);
+	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
+
+	return 0;
+}
+
 /*
  * Called via qlt_create_sess():ha->qla2x_tmpl->check_initiator_node_acl()
  * to locate struct se_node_acl
@@ -1413,20 +1446,13 @@ static void tcm_qla2xxx_free_session(struct qla_tgt_sess *sess)
 static int tcm_qla2xxx_check_initiator_node_acl(
 	scsi_qla_host_t *vha,
 	unsigned char *fc_wwpn,
-	void *qla_tgt_sess,
-	uint8_t *s_id,
-	uint16_t loop_id)
+	struct qla_tgt_sess *qlat_sess)
 {
 	struct qla_hw_data *ha = vha->hw;
 	struct tcm_qla2xxx_lport *lport;
 	struct tcm_qla2xxx_tpg *tpg;
-	struct tcm_qla2xxx_nacl *nacl;
-	struct se_portal_group *se_tpg;
-	struct se_node_acl *se_nacl;
 	struct se_session *se_sess;
-	struct qla_tgt_sess *sess = qla_tgt_sess;
 	unsigned char port_name[36];
-	unsigned long flags;
 	int num_tags = (ha->cur_fw_xcb_count) ? ha->cur_fw_xcb_count :
 		       TCM_QLA2XXX_DEFAULT_TAGS;
 
@@ -1444,15 +1470,6 @@ static int tcm_qla2xxx_check_initiator_node_acl(
 		pr_err("Unable to lcoate struct tcm_qla2xxx_lport->tpg_1\n");
 		return -EINVAL;
 	}
-	se_tpg = &tpg->se_tpg;
-
-	se_sess = transport_init_session_tags(num_tags,
-					      sizeof(struct qla_tgt_cmd),
-					      TARGET_PROT_ALL);
-	if (IS_ERR(se_sess)) {
-		pr_err("Unable to initialize struct se_session\n");
-		return PTR_ERR(se_sess);
-	}
 	/*
 	 * Format the FCP Initiator port_name into colon seperated values to
 	 * match the format by tcm_qla2xxx explict ConfigFS NodeACLs.
@@ -1463,28 +1480,12 @@ static int tcm_qla2xxx_check_initiator_node_acl(
 	 * Locate our struct se_node_acl either from an explict NodeACL created
 	 * via ConfigFS, or via running in TPG demo mode.
 	 */
-	se_sess->se_node_acl = core_tpg_check_initiator_node_acl(se_tpg,
-					port_name);
-	if (!se_sess->se_node_acl) {
-		transport_free_session(se_sess);
-		return -EINVAL;
-	}
-	se_nacl = se_sess->se_node_acl;
-	nacl = container_of(se_nacl, struct tcm_qla2xxx_nacl, se_node_acl);
-	/*
-	 * And now setup the new se_nacl and session pointers into our HW lport
-	 * mappings for fabric S_ID and LOOP_ID.
-	 */
-	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
-	tcm_qla2xxx_set_sess_by_s_id(lport, se_nacl, nacl, se_sess,
-			qla_tgt_sess, s_id);
-	tcm_qla2xxx_set_sess_by_loop_id(lport, se_nacl, nacl, se_sess,
-			qla_tgt_sess, loop_id);
-	spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
-	/*
-	 * Finally register the new FC Nexus with TCM
-	 */
-	transport_register_session(se_nacl->se_tpg, se_nacl, se_sess, sess);
+	se_sess = target_alloc_session(&tpg->se_tpg, num_tags,
+				       sizeof(struct qla_tgt_cmd),
+				       TARGET_PROT_ALL, port_name,
+				       qlat_sess, tcm_qla2xxx_session_cb);
+	if (IS_ERR(se_sess))
+		return PTR_ERR(se_sess);
 
 	return 0;
 }
diff --git a/drivers/scsi/scsi_common.c b/drivers/scsi/scsi_common.c
index c126966130ab..ce79de822e46 100644
--- a/drivers/scsi/scsi_common.c
+++ b/drivers/scsi/scsi_common.c
@@ -278,8 +278,16 @@ int scsi_set_sense_information(u8 *buf, int buf_len, u64 info)
 		ucp[3] = 0;
 		put_unaligned_be64(info, &ucp[4]);
 	} else if ((buf[0] & 0x7f) == 0x70) {
-		buf[0] |= 0x80;
-		put_unaligned_be64(info, &buf[3]);
+		/*
+		 * Only set the 'VALID' bit if we can represent the value
+		 * correctly; otherwise just fill out the lower bytes and
+		 * clear the 'VALID' flag.
+		 */
+		if (info <= 0xffffffffUL)
+			buf[0] |= 0x80;
+		else
+			buf[0] &= 0x7f;
+		put_unaligned_be32((u32)info, &buf[3]);
 	}
 
 	return 0;
diff --git a/drivers/scsi/scsi_sas_internal.h b/drivers/scsi/scsi_sas_internal.h
index 6266a5d73d0f..e659912498bd 100644
--- a/drivers/scsi/scsi_sas_internal.h
+++ b/drivers/scsi/scsi_sas_internal.h
@@ -4,7 +4,7 @@
 #define SAS_HOST_ATTRS		0
 #define SAS_PHY_ATTRS		17
 #define SAS_PORT_ATTRS		1
-#define SAS_RPORT_ATTRS		7
+#define SAS_RPORT_ATTRS		8
 #define SAS_END_DEV_ATTRS	5
 #define SAS_EXPANDER_ATTRS	7
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index d16441961f3a..92ffd2406f97 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1105,7 +1105,7 @@ static umode_t scsi_sdev_bin_attr_is_visible(struct kobject *kobj,
 	if (attr == &dev_attr_vpd_pg80 && !sdev->vpd_pg80)
 		return 0;
 
-	if (attr == &dev_attr_vpd_pg83 && sdev->vpd_pg83)
+	if (attr == &dev_attr_vpd_pg83 && !sdev->vpd_pg83)
 		return 0;
 
 	return S_IRUGO;
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 80520e2f0fa2..b6f958193dad 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -1286,6 +1286,7 @@ sas_rphy_protocol_attr(identify.target_port_protocols, target_port_protocols);
 sas_rphy_simple_attr(identify.sas_address, sas_address, "0x%016llx\n",
 		unsigned long long);
 sas_rphy_simple_attr(identify.phy_identifier, phy_identifier, "%d\n", u8);
+sas_rphy_simple_attr(scsi_target_id, scsi_target_id, "%d\n", u32);
 
 /* only need 8 bytes of data plus header (4 or 8) */
 #define BUF_SIZE 64
@@ -1886,6 +1887,7 @@ sas_attach_transport(struct sas_function_template *ft)
 	SETUP_RPORT_ATTRIBUTE(rphy_device_type);
 	SETUP_RPORT_ATTRIBUTE(rphy_sas_address);
 	SETUP_RPORT_ATTRIBUTE(rphy_phy_identifier);
+	SETUP_RPORT_ATTRIBUTE(rphy_scsi_target_id);
 	SETUP_OPTIONAL_RPORT_ATTRIBUTE(rphy_enclosure_identifier,
 				       get_enclosure_identifier);
 	SETUP_OPTIONAL_RPORT_ATTRIBUTE(rphy_bay_identifier,
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 607b0a505844..71c5138ddf94 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4917,8 +4917,6 @@ static int sgl_map_user_pages(struct st_buffer *STbp,
         /* Try to fault in all of the necessary pages */
         /* rw==READ means read from drive, write into memory area */
 	res = get_user_pages_unlocked(
-		current,
-		current->mm,
 		uaddr,
 		nr_pages,
 		rw == READ,
diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig
index 5f4530744e0a..097894a1fab5 100644
--- a/drivers/scsi/ufs/Kconfig
+++ b/drivers/scsi/ufs/Kconfig
@@ -37,6 +37,7 @@ config SCSI_UFSHCD
 	depends on SCSI && SCSI_DMA
 	select PM_DEVFREQ
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select NLS
 	---help---
 	This selects the support for UFS devices in Linux, say Y and make
 	  sure that you know the name of your UFS host adapter (the card
diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index 4f38d008bfb4..3aedf73f1131 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, Linux Foundation. All rights reserved.
+ * Copyright (c) 2013-2016, Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -16,8 +16,8 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
-
 #include <linux/phy/phy-qcom-ufs.h>
+
 #include "ufshcd.h"
 #include "ufshcd-pltfrm.h"
 #include "unipro.h"
@@ -58,6 +58,12 @@ static void ufs_qcom_dump_regs(struct ufs_hba *hba, int offset, int len,
 			len * 4, false);
 }
 
+static void ufs_qcom_dump_regs_wrapper(struct ufs_hba *hba, int offset, int len,
+		char *prefix, void *priv)
+{
+	ufs_qcom_dump_regs(hba, offset, len, prefix);
+}
+
 static int ufs_qcom_get_connected_tx_lanes(struct ufs_hba *hba, u32 *tx_lanes)
 {
 	int err = 0;
@@ -106,9 +112,11 @@ static void ufs_qcom_disable_lane_clks(struct ufs_qcom_host *host)
 	if (!host->is_lane_clks_enabled)
 		return;
 
-	clk_disable_unprepare(host->tx_l1_sync_clk);
+	if (host->hba->lanes_per_direction > 1)
+		clk_disable_unprepare(host->tx_l1_sync_clk);
 	clk_disable_unprepare(host->tx_l0_sync_clk);
-	clk_disable_unprepare(host->rx_l1_sync_clk);
+	if (host->hba->lanes_per_direction > 1)
+		clk_disable_unprepare(host->rx_l1_sync_clk);
 	clk_disable_unprepare(host->rx_l0_sync_clk);
 
 	host->is_lane_clks_enabled = false;
@@ -132,21 +140,24 @@ static int ufs_qcom_enable_lane_clks(struct ufs_qcom_host *host)
 	if (err)
 		goto disable_rx_l0;
 
-	err = ufs_qcom_host_clk_enable(dev, "rx_lane1_sync_clk",
-		host->rx_l1_sync_clk);
-	if (err)
-		goto disable_tx_l0;
+	if (host->hba->lanes_per_direction > 1) {
+		err = ufs_qcom_host_clk_enable(dev, "rx_lane1_sync_clk",
+			host->rx_l1_sync_clk);
+		if (err)
+			goto disable_tx_l0;
 
-	err = ufs_qcom_host_clk_enable(dev, "tx_lane1_sync_clk",
-		host->tx_l1_sync_clk);
-	if (err)
-		goto disable_rx_l1;
+		err = ufs_qcom_host_clk_enable(dev, "tx_lane1_sync_clk",
+			host->tx_l1_sync_clk);
+		if (err)
+			goto disable_rx_l1;
+	}
 
 	host->is_lane_clks_enabled = true;
 	goto out;
 
 disable_rx_l1:
-	clk_disable_unprepare(host->rx_l1_sync_clk);
+	if (host->hba->lanes_per_direction > 1)
+		clk_disable_unprepare(host->rx_l1_sync_clk);
 disable_tx_l0:
 	clk_disable_unprepare(host->tx_l0_sync_clk);
 disable_rx_l0:
@@ -170,14 +181,16 @@ static int ufs_qcom_init_lane_clks(struct ufs_qcom_host *host)
 	if (err)
 		goto out;
 
-	err = ufs_qcom_host_clk_get(dev, "rx_lane1_sync_clk",
-		&host->rx_l1_sync_clk);
-	if (err)
-		goto out;
-
-	err = ufs_qcom_host_clk_get(dev, "tx_lane1_sync_clk",
-		&host->tx_l1_sync_clk);
+	/* In case of single lane per direction, don't read lane1 clocks */
+	if (host->hba->lanes_per_direction > 1) {
+		err = ufs_qcom_host_clk_get(dev, "rx_lane1_sync_clk",
+			&host->rx_l1_sync_clk);
+		if (err)
+			goto out;
 
+		err = ufs_qcom_host_clk_get(dev, "tx_lane1_sync_clk",
+			&host->tx_l1_sync_clk);
+	}
 out:
 	return err;
 }
@@ -267,9 +280,8 @@ static int ufs_qcom_power_up_sequence(struct ufs_hba *hba)
 	ret = ufs_qcom_phy_calibrate_phy(phy, is_rate_B);
 
 	if (ret) {
-		dev_err(hba->dev,
-		"%s: ufs_qcom_phy_calibrate_phy()failed, ret = %d\n",
-		__func__, ret);
+		dev_err(hba->dev, "%s: ufs_qcom_phy_calibrate_phy() failed, ret = %d\n",
+			__func__, ret);
 		goto out;
 	}
 
@@ -519,6 +531,18 @@ static int ufs_qcom_link_startup_notify(struct ufs_hba *hba,
 			err = ufs_qcom_set_dme_vs_core_clk_ctrl_clear_div(hba,
 									  150);
 
+		/*
+		 * Some UFS devices (and may be host) have issues if LCC is
+		 * enabled. So we are setting PA_Local_TX_LCC_Enable to 0
+		 * before link startup which will make sure that both host
+		 * and device TX LCC are disabled once link startup is
+		 * completed.
+		 */
+		if (ufshcd_get_local_unipro_ver(hba) != UFS_UNIPRO_VER_1_41)
+			err = ufshcd_dme_set(hba,
+					UIC_ARG_MIB(PA_LOCAL_TX_LCC_ENABLE),
+					0);
+
 		break;
 	case POST_CHANGE:
 		ufs_qcom_link_startup_post_change(hba);
@@ -962,6 +986,10 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba,
 			goto out;
 		}
 
+		/* enable the device ref clock before changing to HS mode */
+		if (!ufshcd_is_hs_mode(&hba->pwr_info) &&
+			ufshcd_is_hs_mode(dev_req_params))
+			ufs_qcom_dev_ref_clk_ctrl(host, true);
 		break;
 	case POST_CHANGE:
 		if (ufs_qcom_cfg_timers(hba, dev_req_params->gear_rx,
@@ -989,6 +1017,11 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba,
 		memcpy(&host->dev_req_params,
 				dev_req_params, sizeof(*dev_req_params));
 		ufs_qcom_update_bus_bw_vote(host);
+
+		/* disable the device ref clock if entered PWM mode */
+		if (ufshcd_is_hs_mode(&hba->pwr_info) &&
+			!ufshcd_is_hs_mode(dev_req_params))
+			ufs_qcom_dev_ref_clk_ctrl(host, false);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1090,6 +1123,9 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on)
 			ufs_qcom_phy_disable_iface_clk(host->generic_phy);
 			goto out;
 		}
+		/* enable the device ref clock for HS mode*/
+		if (ufshcd_is_hs_mode(&hba->pwr_info))
+			ufs_qcom_dev_ref_clk_ctrl(host, true);
 		vote = host->bus_vote.saved_vote;
 		if (vote == host->bus_vote.min_bw_vote)
 			ufs_qcom_update_bus_bw_vote(host);
@@ -1367,6 +1403,74 @@ out:
 	return err;
 }
 
+static void ufs_qcom_print_hw_debug_reg_all(struct ufs_hba *hba,
+		void *priv, void (*print_fn)(struct ufs_hba *hba,
+		int offset, int num_regs, char *str, void *priv))
+{
+	u32 reg;
+	struct ufs_qcom_host *host;
+
+	if (unlikely(!hba)) {
+		pr_err("%s: hba is NULL\n", __func__);
+		return;
+	}
+	if (unlikely(!print_fn)) {
+		dev_err(hba->dev, "%s: print_fn is NULL\n", __func__);
+		return;
+	}
+
+	host = ufshcd_get_variant(hba);
+	if (!(host->dbg_print_en & UFS_QCOM_DBG_PRINT_REGS_EN))
+		return;
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_UFS_DBG_RD_REG_OCSC);
+	print_fn(hba, reg, 44, "UFS_UFS_DBG_RD_REG_OCSC ", priv);
+
+	reg = ufshcd_readl(hba, REG_UFS_CFG1);
+	reg |= UFS_BIT(17);
+	ufshcd_writel(hba, reg, REG_UFS_CFG1);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_UFS_DBG_RD_EDTL_RAM);
+	print_fn(hba, reg, 32, "UFS_UFS_DBG_RD_EDTL_RAM ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_UFS_DBG_RD_DESC_RAM);
+	print_fn(hba, reg, 128, "UFS_UFS_DBG_RD_DESC_RAM ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_UFS_DBG_RD_PRDT_RAM);
+	print_fn(hba, reg, 64, "UFS_UFS_DBG_RD_PRDT_RAM ", priv);
+
+	ufshcd_writel(hba, (reg & ~UFS_BIT(17)), REG_UFS_CFG1);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_UAWM);
+	print_fn(hba, reg, 4, "UFS_DBG_RD_REG_UAWM ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_UARM);
+	print_fn(hba, reg, 4, "UFS_DBG_RD_REG_UARM ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_TXUC);
+	print_fn(hba, reg, 48, "UFS_DBG_RD_REG_TXUC ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_RXUC);
+	print_fn(hba, reg, 27, "UFS_DBG_RD_REG_RXUC ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_DFC);
+	print_fn(hba, reg, 19, "UFS_DBG_RD_REG_DFC ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_TRLUT);
+	print_fn(hba, reg, 34, "UFS_DBG_RD_REG_TRLUT ", priv);
+
+	reg = ufs_qcom_get_debug_reg_offset(host, UFS_DBG_RD_REG_TMRLUT);
+	print_fn(hba, reg, 9, "UFS_DBG_RD_REG_TMRLUT ", priv);
+}
+
+static void ufs_qcom_enable_test_bus(struct ufs_qcom_host *host)
+{
+	if (host->dbg_print_en & UFS_QCOM_DBG_PRINT_TEST_BUS_EN)
+		ufshcd_rmwl(host->hba, TEST_BUS_EN, TEST_BUS_EN, REG_UFS_CFG1);
+	else
+		ufshcd_rmwl(host->hba, TEST_BUS_EN, 0, REG_UFS_CFG1);
+}
+
 static void ufs_qcom_get_default_testbus_cfg(struct ufs_qcom_host *host)
 {
 	/* provide a legal default configuration */
@@ -1475,6 +1579,7 @@ int ufs_qcom_testbus_config(struct ufs_qcom_host *host)
 	ufshcd_rmwl(host->hba, mask,
 		    (u32)host->testbus.select_minor << offset,
 		    reg);
+	ufs_qcom_enable_test_bus(host);
 	ufshcd_release(host->hba);
 	pm_runtime_put_sync(host->hba->dev);
 
@@ -1491,8 +1596,10 @@ static void ufs_qcom_dump_dbg_regs(struct ufs_hba *hba)
 	ufs_qcom_dump_regs(hba, REG_UFS_SYS1CLK_1US, 16,
 			"HCI Vendor Specific Registers ");
 
+	ufs_qcom_print_hw_debug_reg_all(hba, NULL, ufs_qcom_dump_regs_wrapper);
 	ufs_qcom_testbus_read(hba);
 }
+
 /**
  * struct ufs_hba_qcom_vops - UFS QCOM specific variant operations
  *
@@ -1537,7 +1644,7 @@ static int ufs_qcom_probe(struct platform_device *pdev)
  * ufs_qcom_remove - set driver_data of the device to NULL
  * @pdev: pointer to platform device handle
  *
- * Always return 0
+ * Always returns 0
  */
 static int ufs_qcom_remove(struct platform_device *pdev)
 {
diff --git a/drivers/scsi/ufs/ufs-qcom.h b/drivers/scsi/ufs/ufs-qcom.h
index 36249b35f858..a19307a57ce2 100644
--- a/drivers/scsi/ufs/ufs-qcom.h
+++ b/drivers/scsi/ufs/ufs-qcom.h
@@ -241,6 +241,15 @@ struct ufs_qcom_host {
 	struct ufs_qcom_testbus testbus;
 };
 
+static inline u32
+ufs_qcom_get_debug_reg_offset(struct ufs_qcom_host *host, u32 reg)
+{
+	if (host->hw_ver.major <= 0x02)
+		return UFS_CNTLR_2_x_x_VEN_REGS_OFFSET(reg);
+
+	return UFS_CNTLR_3_x_x_VEN_REGS_OFFSET(reg);
+};
+
 #define ufs_qcom_is_link_off(hba) ufshcd_is_link_off(hba)
 #define ufs_qcom_is_link_active(hba) ufshcd_is_link_active(hba)
 #define ufs_qcom_is_link_hibern8(hba) ufshcd_is_link_hibern8(hba)
diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h
index 54a16cef0367..b291fa6ed2ad 100644
--- a/drivers/scsi/ufs/ufs.h
+++ b/drivers/scsi/ufs/ufs.h
@@ -43,6 +43,7 @@
 #define GENERAL_UPIU_REQUEST_SIZE 32
 #define QUERY_DESC_MAX_SIZE       255
 #define QUERY_DESC_MIN_SIZE       2
+#define QUERY_DESC_HDR_SIZE       2
 #define QUERY_OSF_SIZE            (GENERAL_UPIU_REQUEST_SIZE - \
 					(sizeof(struct utp_upiu_header)))
 
@@ -195,6 +196,37 @@ enum unit_desc_param {
 	UNIT_DESC_PARAM_LARGE_UNIT_SIZE_M1	= 0x22,
 };
 
+/* Device descriptor parameters offsets in bytes*/
+enum device_desc_param {
+	DEVICE_DESC_PARAM_LEN			= 0x0,
+	DEVICE_DESC_PARAM_TYPE			= 0x1,
+	DEVICE_DESC_PARAM_DEVICE_TYPE		= 0x2,
+	DEVICE_DESC_PARAM_DEVICE_CLASS		= 0x3,
+	DEVICE_DESC_PARAM_DEVICE_SUB_CLASS	= 0x4,
+	DEVICE_DESC_PARAM_PRTCL			= 0x5,
+	DEVICE_DESC_PARAM_NUM_LU		= 0x6,
+	DEVICE_DESC_PARAM_NUM_WLU		= 0x7,
+	DEVICE_DESC_PARAM_BOOT_ENBL		= 0x8,
+	DEVICE_DESC_PARAM_DESC_ACCSS_ENBL	= 0x9,
+	DEVICE_DESC_PARAM_INIT_PWR_MODE		= 0xA,
+	DEVICE_DESC_PARAM_HIGH_PR_LUN		= 0xB,
+	DEVICE_DESC_PARAM_SEC_RMV_TYPE		= 0xC,
+	DEVICE_DESC_PARAM_SEC_LU		= 0xD,
+	DEVICE_DESC_PARAM_BKOP_TERM_LT		= 0xE,
+	DEVICE_DESC_PARAM_ACTVE_ICC_LVL		= 0xF,
+	DEVICE_DESC_PARAM_SPEC_VER		= 0x10,
+	DEVICE_DESC_PARAM_MANF_DATE		= 0x12,
+	DEVICE_DESC_PARAM_MANF_NAME		= 0x14,
+	DEVICE_DESC_PARAM_PRDCT_NAME		= 0x15,
+	DEVICE_DESC_PARAM_SN			= 0x16,
+	DEVICE_DESC_PARAM_OEM_ID		= 0x17,
+	DEVICE_DESC_PARAM_MANF_ID		= 0x18,
+	DEVICE_DESC_PARAM_UD_OFFSET		= 0x1A,
+	DEVICE_DESC_PARAM_UD_LEN		= 0x1B,
+	DEVICE_DESC_PARAM_RTT_CAP		= 0x1C,
+	DEVICE_DESC_PARAM_FRQ_RTC		= 0x1D,
+};
+
 /*
  * Logical Unit Write Protect
  * 00h: LU not write protected
@@ -469,6 +501,7 @@ struct ufs_vreg {
 	struct regulator *reg;
 	const char *name;
 	bool enabled;
+	bool unused;
 	int min_uV;
 	int max_uV;
 	int min_uA;
diff --git a/drivers/scsi/ufs/ufs_quirks.h b/drivers/scsi/ufs/ufs_quirks.h
new file mode 100644
index 000000000000..ee4ab85e2801
--- /dev/null
+++ b/drivers/scsi/ufs/ufs_quirks.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _UFS_QUIRKS_H_
+#define _UFS_QUIRKS_H_
+
+/* return true if s1 is a prefix of s2 */
+#define STR_PRFX_EQUAL(s1, s2) !strncmp(s1, s2, strlen(s1))
+
+#define UFS_ANY_VENDOR 0xFFFF
+#define UFS_ANY_MODEL  "ANY_MODEL"
+
+#define MAX_MODEL_LEN 16
+
+#define UFS_VENDOR_TOSHIBA     0x198
+#define UFS_VENDOR_SAMSUNG     0x1CE
+
+/**
+ * ufs_device_info - ufs device details
+ * @wmanufacturerid: card details
+ * @model: card model
+ */
+struct ufs_device_info {
+	u16 wmanufacturerid;
+	char model[MAX_MODEL_LEN + 1];
+};
+
+/**
+ * ufs_dev_fix - ufs device quirk info
+ * @card: ufs card details
+ * @quirk: device quirk
+ */
+struct ufs_dev_fix {
+	struct ufs_device_info card;
+	unsigned int quirk;
+};
+
+#define END_FIX { { 0 }, 0 }
+
+/* add specific device quirk */
+#define UFS_FIX(_vendor, _model, _quirk) \
+		{					  \
+			.card.wmanufacturerid = (_vendor),\
+			.card.model = (_model),		  \
+			.quirk = (_quirk),		  \
+		}
+
+/*
+ * If UFS device is having issue in processing LCC (Line Control
+ * Command) coming from UFS host controller then enable this quirk.
+ * When this quirk is enabled, host controller driver should disable
+ * the LCC transmission on UFS host controller (by clearing
+ * TX_LCC_ENABLE attribute of host to 0).
+ */
+#define UFS_DEVICE_QUIRK_BROKEN_LCC (1 << 0)
+
+/*
+ * Some UFS devices don't need VCCQ rail for device operations. Enabling this
+ * quirk for such devices will make sure that VCCQ rail is not voted.
+ */
+#define UFS_DEVICE_NO_VCCQ (1 << 1)
+
+/*
+ * Some vendor's UFS device sends back to back NACs for the DL data frames
+ * causing the host controller to raise the DFES error status. Sometimes
+ * such UFS devices send back to back NAC without waiting for new
+ * retransmitted DL frame from the host and in such cases it might be possible
+ * the Host UniPro goes into bad state without raising the DFES error
+ * interrupt. If this happens then all the pending commands would timeout
+ * only after respective SW command (which is generally too large).
+ *
+ * We can workaround such device behaviour like this:
+ * - As soon as SW sees the DL NAC error, it should schedule the error handler
+ * - Error handler would sleep for 50ms to see if there are any fatal errors
+ *   raised by UFS controller.
+ *    - If there are fatal errors then SW does normal error recovery.
+ *    - If there are no fatal errors then SW sends the NOP command to device
+ *      to check if link is alive.
+ *        - If NOP command times out, SW does normal error recovery
+ *        - If NOP command succeed, skip the error handling.
+ *
+ * If DL NAC error is seen multiple times with some vendor's UFS devices then
+ * enable this quirk to initiate quick error recovery and also silence related
+ * error logs to reduce spamming of kernel logs.
+ */
+#define UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS (1 << 2)
+
+/*
+ * Some UFS devices may not work properly after resume if the link was kept
+ * in off state during suspend. Enabling this quirk will not allow the
+ * link to be kept in off state during suspend.
+ */
+#define UFS_DEVICE_QUIRK_NO_LINK_OFF	(1 << 3)
+
+/*
+ * Few Toshiba UFS device models advertise RX_MIN_ACTIVATETIME_CAPABILITY as
+ * 600us which may not be enough for reliable hibern8 exit hardware sequence
+ * from UFS device.
+ * To workaround this issue, host should set its PA_TACTIVATE time to 1ms even
+ * if device advertises RX_MIN_ACTIVATETIME_CAPABILITY less than 1ms.
+ */
+#define UFS_DEVICE_QUIRK_PA_TACTIVATE	(1 << 4)
+
+/*
+ * Some UFS memory devices may have really low read/write throughput in
+ * FAST AUTO mode, enable this quirk to make sure that FAST AUTO mode is
+ * never enabled for such devices.
+ */
+#define UFS_DEVICE_NO_FASTAUTO		(1 << 5)
+
+/*
+ * It seems some UFS devices may keep drawing more than sleep current
+ * (atleast for 500us) from UFS rails (especially from VCCQ rail).
+ * To avoid this situation, add 2ms delay before putting these UFS
+ * rails in LPM mode.
+ */
+#define UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM	(1 << 6)
+
+struct ufs_hba;
+void ufs_advertise_fixup_device(struct ufs_hba *hba);
+
+static struct ufs_dev_fix ufs_fixups[] = {
+	/* UFS cards deviations table */
+	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL,
+		UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM),
+	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL, UFS_DEVICE_NO_VCCQ),
+	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL,
+		UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS),
+	UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL,
+		UFS_DEVICE_NO_FASTAUTO),
+	UFS_FIX(UFS_VENDOR_TOSHIBA, UFS_ANY_MODEL,
+		UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM),
+	UFS_FIX(UFS_VENDOR_TOSHIBA, "THGLF2G9C8KBADG",
+		UFS_DEVICE_QUIRK_PA_TACTIVATE),
+	UFS_FIX(UFS_VENDOR_TOSHIBA, "THGLF2G9D8KBADG",
+		UFS_DEVICE_QUIRK_PA_TACTIVATE),
+
+	END_FIX
+};
+#endif /* UFS_QUIRKS_H_ */
diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
index d2a7b127b05c..718f12e09885 100644
--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
@@ -40,6 +40,8 @@
 #include "ufshcd.h"
 #include "ufshcd-pltfrm.h"
 
+#define UFSHCD_DEFAULT_LANES_PER_DIRECTION		2
+
 static int ufshcd_parse_clock_info(struct ufs_hba *hba)
 {
 	int ret = 0;
@@ -277,6 +279,21 @@ void ufshcd_pltfrm_shutdown(struct platform_device *pdev)
 }
 EXPORT_SYMBOL_GPL(ufshcd_pltfrm_shutdown);
 
+static void ufshcd_init_lanes_per_dir(struct ufs_hba *hba)
+{
+	struct device *dev = hba->dev;
+	int ret;
+
+	ret = of_property_read_u32(dev->of_node, "lanes-per-direction",
+		&hba->lanes_per_direction);
+	if (ret) {
+		dev_dbg(hba->dev,
+			"%s: failed to read lanes-per-direction, ret=%d\n",
+			__func__, ret);
+		hba->lanes_per_direction = UFSHCD_DEFAULT_LANES_PER_DIRECTION;
+	}
+}
+
 /**
  * ufshcd_pltfrm_init - probe routine of the driver
  * @pdev: pointer to Platform device handle
@@ -331,6 +348,8 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
+	ufshcd_init_lanes_per_dir(hba);
+
 	err = ufshcd_init(hba, mmio_base, irq);
 	if (err) {
 		dev_err(dev, "Initialization failed\n");
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 9c1b94bef8f3..f8fa72c31a9d 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -39,8 +39,10 @@
 
 #include <linux/async.h>
 #include <linux/devfreq.h>
-
+#include <linux/nls.h>
+#include <linux/of.h>
 #include "ufshcd.h"
+#include "ufs_quirks.h"
 #include "unipro.h"
 
 #define UFSHCD_ENABLE_INTRS	(UTP_TRANSFER_REQ_COMPL |\
@@ -131,9 +133,11 @@ enum {
 /* UFSHCD UIC layer error flags */
 enum {
 	UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */
-	UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */
-	UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */
-	UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */
+	UFSHCD_UIC_DL_NAC_RECEIVED_ERROR = (1 << 1), /* Data link layer error */
+	UFSHCD_UIC_DL_TCx_REPLAY_ERROR = (1 << 2), /* Data link layer error */
+	UFSHCD_UIC_NL_ERROR = (1 << 3), /* Network layer error */
+	UFSHCD_UIC_TL_ERROR = (1 << 4), /* Transport Layer error */
+	UFSHCD_UIC_DME_ERROR = (1 << 5), /* DME error */
 };
 
 /* Interrupt configuration options */
@@ -193,6 +197,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba);
 static int __ufshcd_setup_clocks(struct ufs_hba *hba, bool on,
 				 bool skip_ref_clk);
 static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on);
+static int ufshcd_set_vccq_rail_unused(struct ufs_hba *hba, bool unused);
 static int ufshcd_uic_hibern8_exit(struct ufs_hba *hba);
 static int ufshcd_uic_hibern8_enter(struct ufs_hba *hba);
 static inline void ufshcd_add_delay_before_dme_cmd(struct ufs_hba *hba);
@@ -231,6 +236,16 @@ static inline void ufshcd_disable_irq(struct ufs_hba *hba)
 	}
 }
 
+/* replace non-printable or non-ASCII characters with spaces */
+static inline void ufshcd_remove_non_printable(char *val)
+{
+	if (!val)
+		return;
+
+	if (*val < 0x20 || *val > 0x7e)
+		*val = ' ';
+}
+
 /*
  * ufshcd_wait_for_register - wait for register value to change
  * @hba - per-adapter interface
@@ -239,11 +254,13 @@ static inline void ufshcd_disable_irq(struct ufs_hba *hba)
  * @val - wait condition
  * @interval_us - polling interval in microsecs
  * @timeout_ms - timeout in millisecs
+ * @can_sleep - perform sleep or just spin
  *
  * Returns -ETIMEDOUT on error, zero on success
  */
-static int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask,
-		u32 val, unsigned long interval_us, unsigned long timeout_ms)
+int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask,
+				u32 val, unsigned long interval_us,
+				unsigned long timeout_ms, bool can_sleep)
 {
 	int err = 0;
 	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
@@ -252,9 +269,10 @@ static int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask,
 	val = val & mask;
 
 	while ((ufshcd_readl(hba, reg) & mask) != val) {
-		/* wakeup within 50us of expiry */
-		usleep_range(interval_us, interval_us + 50);
-
+		if (can_sleep)
+			usleep_range(interval_us, interval_us + 50);
+		else
+			udelay(interval_us);
 		if (time_after(jiffies, timeout)) {
 			if ((ufshcd_readl(hba, reg) & mask) != val)
 				err = -ETIMEDOUT;
@@ -552,6 +570,34 @@ static inline int ufshcd_is_hba_active(struct ufs_hba *hba)
 	return (ufshcd_readl(hba, REG_CONTROLLER_ENABLE) & 0x1) ? 0 : 1;
 }
 
+u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba)
+{
+	/* HCI version 1.0 and 1.1 supports UniPro 1.41 */
+	if ((hba->ufs_version == UFSHCI_VERSION_10) ||
+	    (hba->ufs_version == UFSHCI_VERSION_11))
+		return UFS_UNIPRO_VER_1_41;
+	else
+		return UFS_UNIPRO_VER_1_6;
+}
+EXPORT_SYMBOL(ufshcd_get_local_unipro_ver);
+
+static bool ufshcd_is_unipro_pa_params_tuning_req(struct ufs_hba *hba)
+{
+	/*
+	 * If both host and device support UniPro ver1.6 or later, PA layer
+	 * parameters tuning happens during link startup itself.
+	 *
+	 * We can manually tune PA layer parameters if either host or device
+	 * doesn't support UniPro ver 1.6 or later. But to keep manual tuning
+	 * logic simple, we will only do manual tuning if local unipro version
+	 * doesn't support ver1.6 or later.
+	 */
+	if (ufshcd_get_local_unipro_ver(hba) < UFS_UNIPRO_VER_1_6)
+		return true;
+	else
+		return false;
+}
+
 static void ufshcd_ungate_work(struct work_struct *work)
 {
 	int ret;
@@ -1458,7 +1504,7 @@ ufshcd_clear_cmd(struct ufs_hba *hba, int tag)
 	 */
 	err = ufshcd_wait_for_register(hba,
 			REG_UTP_TRANSFER_REQ_DOOR_BELL,
-			mask, ~mask, 1000, 1000);
+			mask, ~mask, 1000, 1000, true);
 
 	return err;
 }
@@ -1857,21 +1903,7 @@ static int ufshcd_query_attr_retry(struct ufs_hba *hba,
 	return ret;
 }
 
-/**
- * ufshcd_query_descriptor - API function for sending descriptor requests
- * hba: per-adapter instance
- * opcode: attribute opcode
- * idn: attribute idn to access
- * index: index field
- * selector: selector field
- * desc_buf: the buffer that contains the descriptor
- * buf_len: length parameter passed to the device
- *
- * Returns 0 for success, non-zero in case of failure.
- * The buf_len parameter will contain, on return, the length parameter
- * received on the response.
- */
-static int ufshcd_query_descriptor(struct ufs_hba *hba,
+static int __ufshcd_query_descriptor(struct ufs_hba *hba,
 			enum query_opcode opcode, enum desc_idn idn, u8 index,
 			u8 selector, u8 *desc_buf, int *buf_len)
 {
@@ -1936,6 +1968,39 @@ out:
 }
 
 /**
+ * ufshcd_query_descriptor_retry - API function for sending descriptor
+ * requests
+ * hba: per-adapter instance
+ * opcode: attribute opcode
+ * idn: attribute idn to access
+ * index: index field
+ * selector: selector field
+ * desc_buf: the buffer that contains the descriptor
+ * buf_len: length parameter passed to the device
+ *
+ * Returns 0 for success, non-zero in case of failure.
+ * The buf_len parameter will contain, on return, the length parameter
+ * received on the response.
+ */
+int ufshcd_query_descriptor_retry(struct ufs_hba *hba,
+			enum query_opcode opcode, enum desc_idn idn, u8 index,
+			u8 selector, u8 *desc_buf, int *buf_len)
+{
+	int err;
+	int retries;
+
+	for (retries = QUERY_REQ_RETRIES; retries > 0; retries--) {
+		err = __ufshcd_query_descriptor(hba, opcode, idn, index,
+						selector, desc_buf, buf_len);
+		if (!err || err == -EINVAL)
+			break;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(ufshcd_query_descriptor_retry);
+
+/**
  * ufshcd_read_desc_param - read the specified descriptor parameter
  * @hba: Pointer to adapter instance
  * @desc_id: descriptor idn value
@@ -1977,9 +2042,9 @@ static int ufshcd_read_desc_param(struct ufs_hba *hba,
 			return -ENOMEM;
 	}
 
-	ret = ufshcd_query_descriptor(hba, UPIU_QUERY_OPCODE_READ_DESC,
-				      desc_id, desc_index, 0, desc_buf,
-				      &buff_len);
+	ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC,
+					desc_id, desc_index, 0, desc_buf,
+					&buff_len);
 
 	if (ret || (buff_len < ufs_query_desc_max_size[desc_id]) ||
 	    (desc_buf[QUERY_DESC_LENGTH_OFFSET] !=
@@ -2017,6 +2082,82 @@ static inline int ufshcd_read_power_desc(struct ufs_hba *hba,
 	return ufshcd_read_desc(hba, QUERY_DESC_IDN_POWER, 0, buf, size);
 }
 
+int ufshcd_read_device_desc(struct ufs_hba *hba, u8 *buf, u32 size)
+{
+	return ufshcd_read_desc(hba, QUERY_DESC_IDN_DEVICE, 0, buf, size);
+}
+EXPORT_SYMBOL(ufshcd_read_device_desc);
+
+/**
+ * ufshcd_read_string_desc - read string descriptor
+ * @hba: pointer to adapter instance
+ * @desc_index: descriptor index
+ * @buf: pointer to buffer where descriptor would be read
+ * @size: size of buf
+ * @ascii: if true convert from unicode to ascii characters
+ *
+ * Return 0 in case of success, non-zero otherwise
+ */
+int ufshcd_read_string_desc(struct ufs_hba *hba, int desc_index, u8 *buf,
+				u32 size, bool ascii)
+{
+	int err = 0;
+
+	err = ufshcd_read_desc(hba,
+				QUERY_DESC_IDN_STRING, desc_index, buf, size);
+
+	if (err) {
+		dev_err(hba->dev, "%s: reading String Desc failed after %d retries. err = %d\n",
+			__func__, QUERY_REQ_RETRIES, err);
+		goto out;
+	}
+
+	if (ascii) {
+		int desc_len;
+		int ascii_len;
+		int i;
+		char *buff_ascii;
+
+		desc_len = buf[0];
+		/* remove header and divide by 2 to move from UTF16 to UTF8 */
+		ascii_len = (desc_len - QUERY_DESC_HDR_SIZE) / 2 + 1;
+		if (size < ascii_len + QUERY_DESC_HDR_SIZE) {
+			dev_err(hba->dev, "%s: buffer allocated size is too small\n",
+					__func__);
+			err = -ENOMEM;
+			goto out;
+		}
+
+		buff_ascii = kmalloc(ascii_len, GFP_KERNEL);
+		if (!buff_ascii) {
+			err = -ENOMEM;
+			goto out_free_buff;
+		}
+
+		/*
+		 * the descriptor contains string in UTF16 format
+		 * we need to convert to utf-8 so it can be displayed
+		 */
+		utf16s_to_utf8s((wchar_t *)&buf[QUERY_DESC_HDR_SIZE],
+				desc_len - QUERY_DESC_HDR_SIZE,
+				UTF16_BIG_ENDIAN, buff_ascii, ascii_len);
+
+		/* replace non-printable or non-ASCII characters with spaces */
+		for (i = 0; i < ascii_len; i++)
+			ufshcd_remove_non_printable(&buff_ascii[i]);
+
+		memset(buf + QUERY_DESC_HDR_SIZE, 0,
+				size - QUERY_DESC_HDR_SIZE);
+		memcpy(buf + QUERY_DESC_HDR_SIZE, buff_ascii, ascii_len);
+		buf[QUERY_DESC_LENGTH_OFFSET] = ascii_len + QUERY_DESC_HDR_SIZE;
+out_free_buff:
+		kfree(buff_ascii);
+	}
+out:
+	return err;
+}
+EXPORT_SYMBOL(ufshcd_read_string_desc);
+
 /**
  * ufshcd_read_unit_desc_param - read the specified unit descriptor parameter
  * @hba: Pointer to adapter instance
@@ -2814,6 +2955,23 @@ out:
 }
 
 /**
+ * ufshcd_hba_stop - Send controller to reset state
+ * @hba: per adapter instance
+ * @can_sleep: perform sleep or just spin
+ */
+static inline void ufshcd_hba_stop(struct ufs_hba *hba, bool can_sleep)
+{
+	int err;
+
+	ufshcd_writel(hba, CONTROLLER_DISABLE,  REG_CONTROLLER_ENABLE);
+	err = ufshcd_wait_for_register(hba, REG_CONTROLLER_ENABLE,
+					CONTROLLER_ENABLE, CONTROLLER_DISABLE,
+					10, 1, can_sleep);
+	if (err)
+		dev_err(hba->dev, "%s: Controller disable failed\n", __func__);
+}
+
+/**
  * ufshcd_hba_enable - initialize the controller
  * @hba: per adapter instance
  *
@@ -2833,18 +2991,9 @@ static int ufshcd_hba_enable(struct ufs_hba *hba)
 	 * development and testing of this driver. msleep can be changed to
 	 * mdelay and retry count can be reduced based on the controller.
 	 */
-	if (!ufshcd_is_hba_active(hba)) {
-
+	if (!ufshcd_is_hba_active(hba))
 		/* change controller state to "reset state" */
-		ufshcd_hba_stop(hba);
-
-		/*
-		 * This delay is based on the testing done with UFS host
-		 * controller FPGA. The delay can be changed based on the
-		 * host controller used.
-		 */
-		msleep(5);
-	}
+		ufshcd_hba_stop(hba, true);
 
 	/* UniPro link is disabled at this point */
 	ufshcd_set_link_off(hba);
@@ -3365,31 +3514,18 @@ static void ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status)
 }
 
 /**
- * ufshcd_transfer_req_compl - handle SCSI and query command completion
+ * __ufshcd_transfer_req_compl - handle SCSI and query command completion
  * @hba: per adapter instance
+ * @completed_reqs: requests to complete
  */
-static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
+static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
+					unsigned long completed_reqs)
 {
 	struct ufshcd_lrb *lrbp;
 	struct scsi_cmnd *cmd;
-	unsigned long completed_reqs;
-	u32 tr_doorbell;
 	int result;
 	int index;
 
-	/* Resetting interrupt aggregation counters first and reading the
-	 * DOOR_BELL afterward allows us to handle all the completed requests.
-	 * In order to prevent other interrupts starvation the DB is read once
-	 * after reset. The down side of this solution is the possibility of
-	 * false interrupt if device completes another request after resetting
-	 * aggregation and before reading the DB.
-	 */
-	if (ufshcd_is_intr_aggr_allowed(hba))
-		ufshcd_reset_intr_aggr(hba);
-
-	tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-	completed_reqs = tr_doorbell ^ hba->outstanding_reqs;
-
 	for_each_set_bit(index, &completed_reqs, hba->nutrs) {
 		lrbp = &hba->lrb[index];
 		cmd = lrbp->cmd;
@@ -3419,6 +3555,31 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
 }
 
 /**
+ * ufshcd_transfer_req_compl - handle SCSI and query command completion
+ * @hba: per adapter instance
+ */
+static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
+{
+	unsigned long completed_reqs;
+	u32 tr_doorbell;
+
+	/* Resetting interrupt aggregation counters first and reading the
+	 * DOOR_BELL afterward allows us to handle all the completed requests.
+	 * In order to prevent other interrupts starvation the DB is read once
+	 * after reset. The down side of this solution is the possibility of
+	 * false interrupt if device completes another request after resetting
+	 * aggregation and before reading the DB.
+	 */
+	if (ufshcd_is_intr_aggr_allowed(hba))
+		ufshcd_reset_intr_aggr(hba);
+
+	tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+	completed_reqs = tr_doorbell ^ hba->outstanding_reqs;
+
+	__ufshcd_transfer_req_compl(hba, completed_reqs);
+}
+
+/**
  * ufshcd_disable_ee - disable exception event
  * @hba: per-adapter instance
  * @mask: exception event to disable
@@ -3630,7 +3791,7 @@ out:
  */
 static int ufshcd_urgent_bkops(struct ufs_hba *hba)
 {
-	return ufshcd_bkops_ctrl(hba, BKOPS_STATUS_PERF_IMPACT);
+	return ufshcd_bkops_ctrl(hba, hba->urgent_bkops_lvl);
 }
 
 static inline int ufshcd_get_ee_status(struct ufs_hba *hba, u32 *status)
@@ -3639,6 +3800,43 @@ static inline int ufshcd_get_ee_status(struct ufs_hba *hba, u32 *status)
 			QUERY_ATTR_IDN_EE_STATUS, 0, 0, status);
 }
 
+static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba)
+{
+	int err;
+	u32 curr_status = 0;
+
+	if (hba->is_urgent_bkops_lvl_checked)
+		goto enable_auto_bkops;
+
+	err = ufshcd_get_bkops_status(hba, &curr_status);
+	if (err) {
+		dev_err(hba->dev, "%s: failed to get BKOPS status %d\n",
+				__func__, err);
+		goto out;
+	}
+
+	/*
+	 * We are seeing that some devices are raising the urgent bkops
+	 * exception events even when BKOPS status doesn't indicate performace
+	 * impacted or critical. Handle these device by determining their urgent
+	 * bkops status at runtime.
+	 */
+	if (curr_status < BKOPS_STATUS_PERF_IMPACT) {
+		dev_err(hba->dev, "%s: device raised urgent BKOPS exception for bkops status %d\n",
+				__func__, curr_status);
+		/* update the current status as the urgent bkops level */
+		hba->urgent_bkops_lvl = curr_status;
+		hba->is_urgent_bkops_lvl_checked = true;
+	}
+
+enable_auto_bkops:
+	err = ufshcd_enable_auto_bkops(hba);
+out:
+	if (err < 0)
+		dev_err(hba->dev, "%s: failed to handle urgent bkops %d\n",
+				__func__, err);
+}
+
 /**
  * ufshcd_exception_event_handler - handle exceptions raised by device
  * @work: pointer to work data
@@ -3662,17 +3860,95 @@ static void ufshcd_exception_event_handler(struct work_struct *work)
 	}
 
 	status &= hba->ee_ctrl_mask;
-	if (status & MASK_EE_URGENT_BKOPS) {
-		err = ufshcd_urgent_bkops(hba);
-		if (err < 0)
-			dev_err(hba->dev, "%s: failed to handle urgent bkops %d\n",
-					__func__, err);
-	}
+
+	if (status & MASK_EE_URGENT_BKOPS)
+		ufshcd_bkops_exception_event_handler(hba);
+
 out:
 	pm_runtime_put_sync(hba->dev);
 	return;
 }
 
+/* Complete requests that have door-bell cleared */
+static void ufshcd_complete_requests(struct ufs_hba *hba)
+{
+	ufshcd_transfer_req_compl(hba);
+	ufshcd_tmc_handler(hba);
+}
+
+/**
+ * ufshcd_quirk_dl_nac_errors - This function checks if error handling is
+ *				to recover from the DL NAC errors or not.
+ * @hba: per-adapter instance
+ *
+ * Returns true if error handling is required, false otherwise
+ */
+static bool ufshcd_quirk_dl_nac_errors(struct ufs_hba *hba)
+{
+	unsigned long flags;
+	bool err_handling = true;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	/*
+	 * UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS only workaround the
+	 * device fatal error and/or DL NAC & REPLAY timeout errors.
+	 */
+	if (hba->saved_err & (CONTROLLER_FATAL_ERROR | SYSTEM_BUS_FATAL_ERROR))
+		goto out;
+
+	if ((hba->saved_err & DEVICE_FATAL_ERROR) ||
+	    ((hba->saved_err & UIC_ERROR) &&
+	     (hba->saved_uic_err & UFSHCD_UIC_DL_TCx_REPLAY_ERROR)))
+		goto out;
+
+	if ((hba->saved_err & UIC_ERROR) &&
+	    (hba->saved_uic_err & UFSHCD_UIC_DL_NAC_RECEIVED_ERROR)) {
+		int err;
+		/*
+		 * wait for 50ms to see if we can get any other errors or not.
+		 */
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		msleep(50);
+		spin_lock_irqsave(hba->host->host_lock, flags);
+
+		/*
+		 * now check if we have got any other severe errors other than
+		 * DL NAC error?
+		 */
+		if ((hba->saved_err & INT_FATAL_ERRORS) ||
+		    ((hba->saved_err & UIC_ERROR) &&
+		    (hba->saved_uic_err & ~UFSHCD_UIC_DL_NAC_RECEIVED_ERROR)))
+			goto out;
+
+		/*
+		 * As DL NAC is the only error received so far, send out NOP
+		 * command to confirm if link is still active or not.
+		 *   - If we don't get any response then do error recovery.
+		 *   - If we get response then clear the DL NAC error bit.
+		 */
+
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		err = ufshcd_verify_dev_init(hba);
+		spin_lock_irqsave(hba->host->host_lock, flags);
+
+		if (err)
+			goto out;
+
+		/* Link seems to be alive hence ignore the DL NAC errors */
+		if (hba->saved_uic_err == UFSHCD_UIC_DL_NAC_RECEIVED_ERROR)
+			hba->saved_err &= ~UIC_ERROR;
+		/* clear NAC error */
+		hba->saved_uic_err &= ~UFSHCD_UIC_DL_NAC_RECEIVED_ERROR;
+		if (!hba->saved_uic_err) {
+			err_handling = false;
+			goto out;
+		}
+	}
+out:
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+	return err_handling;
+}
+
 /**
  * ufshcd_err_handler - handle UFS errors that require s/w attention
  * @work: pointer to work structure
@@ -3685,6 +3961,7 @@ static void ufshcd_err_handler(struct work_struct *work)
 	u32 err_tm = 0;
 	int err = 0;
 	int tag;
+	bool needs_reset = false;
 
 	hba = container_of(work, struct ufs_hba, eh_work);
 
@@ -3692,40 +3969,86 @@ static void ufshcd_err_handler(struct work_struct *work)
 	ufshcd_hold(hba, false);
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	if (hba->ufshcd_state == UFSHCD_STATE_RESET) {
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
+	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
 		goto out;
-	}
 
 	hba->ufshcd_state = UFSHCD_STATE_RESET;
 	ufshcd_set_eh_in_progress(hba);
 
 	/* Complete requests that have door-bell cleared by h/w */
-	ufshcd_transfer_req_compl(hba);
-	ufshcd_tmc_handler(hba);
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
+	ufshcd_complete_requests(hba);
+
+	if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
+		bool ret;
 
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		/* release the lock as ufshcd_quirk_dl_nac_errors() may sleep */
+		ret = ufshcd_quirk_dl_nac_errors(hba);
+		spin_lock_irqsave(hba->host->host_lock, flags);
+		if (!ret)
+			goto skip_err_handling;
+	}
+	if ((hba->saved_err & INT_FATAL_ERRORS) ||
+	    ((hba->saved_err & UIC_ERROR) &&
+	    (hba->saved_uic_err & (UFSHCD_UIC_DL_PA_INIT_ERROR |
+				   UFSHCD_UIC_DL_NAC_RECEIVED_ERROR |
+				   UFSHCD_UIC_DL_TCx_REPLAY_ERROR))))
+		needs_reset = true;
+
+	/*
+	 * if host reset is required then skip clearing the pending
+	 * transfers forcefully because they will automatically get
+	 * cleared after link startup.
+	 */
+	if (needs_reset)
+		goto skip_pending_xfer_clear;
+
+	/* release lock as clear command might sleep */
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
 	/* Clear pending transfer requests */
-	for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs)
-		if (ufshcd_clear_cmd(hba, tag))
-			err_xfer |= 1 << tag;
+	for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
+		if (ufshcd_clear_cmd(hba, tag)) {
+			err_xfer = true;
+			goto lock_skip_pending_xfer_clear;
+		}
+	}
 
 	/* Clear pending task management requests */
-	for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs)
-		if (ufshcd_clear_tm_cmd(hba, tag))
-			err_tm |= 1 << tag;
+	for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
+		if (ufshcd_clear_tm_cmd(hba, tag)) {
+			err_tm = true;
+			goto lock_skip_pending_xfer_clear;
+		}
+	}
 
-	/* Complete the requests that are cleared by s/w */
+lock_skip_pending_xfer_clear:
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	ufshcd_transfer_req_compl(hba);
-	ufshcd_tmc_handler(hba);
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
+	/* Complete the requests that are cleared by s/w */
+	ufshcd_complete_requests(hba);
+
+	if (err_xfer || err_tm)
+		needs_reset = true;
+
+skip_pending_xfer_clear:
 	/* Fatal errors need reset */
-	if (err_xfer || err_tm || (hba->saved_err & INT_FATAL_ERRORS) ||
-			((hba->saved_err & UIC_ERROR) &&
-			 (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR))) {
+	if (needs_reset) {
+		unsigned long max_doorbells = (1UL << hba->nutrs) - 1;
+
+		/*
+		 * ufshcd_reset_and_restore() does the link reinitialization
+		 * which will need atleast one empty doorbell slot to send the
+		 * device management commands (NOP and query commands).
+		 * If there is no slot empty at this moment then free up last
+		 * slot forcefully.
+		 */
+		if (hba->outstanding_reqs == max_doorbells)
+			__ufshcd_transfer_req_compl(hba,
+						    (1UL << (hba->nutrs - 1)));
+
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
 		err = ufshcd_reset_and_restore(hba);
+		spin_lock_irqsave(hba->host->host_lock, flags);
 		if (err) {
 			dev_err(hba->dev, "%s: reset and restore failed\n",
 					__func__);
@@ -3739,9 +4062,19 @@ static void ufshcd_err_handler(struct work_struct *work)
 		hba->saved_err = 0;
 		hba->saved_uic_err = 0;
 	}
+
+skip_err_handling:
+	if (!needs_reset) {
+		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+		if (hba->saved_err || hba->saved_uic_err)
+			dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x",
+			    __func__, hba->saved_err, hba->saved_uic_err);
+	}
+
 	ufshcd_clear_eh_in_progress(hba);
 
 out:
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
 	scsi_unblock_requests(hba->host);
 	ufshcd_release(hba);
 	pm_runtime_put_sync(hba->dev);
@@ -3759,6 +4092,14 @@ static void ufshcd_update_uic_error(struct ufs_hba *hba)
 	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
 	if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
 		hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
+	else if (hba->dev_quirks &
+		   UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) {
+		if (reg & UIC_DATA_LINK_LAYER_ERROR_NAC_RECEIVED)
+			hba->uic_error |=
+				UFSHCD_UIC_DL_NAC_RECEIVED_ERROR;
+		else if (reg & UIC_DATA_LINK_LAYER_ERROR_TCx_REPLAY_TIMEOUT)
+			hba->uic_error |= UFSHCD_UIC_DL_TCx_REPLAY_ERROR;
+	}
 
 	/* UIC NL/TL/DME errors needs software retry */
 	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
@@ -3796,15 +4137,18 @@ static void ufshcd_check_errors(struct ufs_hba *hba)
 	}
 
 	if (queue_eh_work) {
+		/*
+		 * update the transfer error masks to sticky bits, let's do this
+		 * irrespective of current ufshcd_state.
+		 */
+		hba->saved_err |= hba->errors;
+		hba->saved_uic_err |= hba->uic_error;
+
 		/* handle fatal errors only when link is functional */
 		if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
 			/* block commands from scsi mid-layer */
 			scsi_block_requests(hba->host);
 
-			/* transfer error masks to sticky bits */
-			hba->saved_err |= hba->errors;
-			hba->saved_uic_err |= hba->uic_error;
-
 			hba->ufshcd_state = UFSHCD_STATE_ERROR;
 			schedule_work(&hba->eh_work);
 		}
@@ -3897,7 +4241,7 @@ static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag)
 	/* poll for max. 1 sec to clear door bell register by h/w */
 	err = ufshcd_wait_for_register(hba,
 			REG_UTP_TASK_REQ_DOOR_BELL,
-			mask, 0, 1000, 1000);
+			mask, 0, 1000, 1000, true);
 out:
 	return err;
 }
@@ -4179,7 +4523,7 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
 
 	/* Reset the host controller */
 	spin_lock_irqsave(hba->host->host_lock, flags);
-	ufshcd_hba_stop(hba);
+	ufshcd_hba_stop(hba, false);
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
 	err = ufshcd_hba_enable(hba);
@@ -4466,6 +4810,164 @@ out:
 	return ret;
 }
 
+static int ufs_get_device_info(struct ufs_hba *hba,
+				struct ufs_device_info *card_data)
+{
+	int err;
+	u8 model_index;
+	u8 str_desc_buf[QUERY_DESC_STRING_MAX_SIZE + 1] = {0};
+	u8 desc_buf[QUERY_DESC_DEVICE_MAX_SIZE];
+
+	err = ufshcd_read_device_desc(hba, desc_buf,
+					QUERY_DESC_DEVICE_MAX_SIZE);
+	if (err) {
+		dev_err(hba->dev, "%s: Failed reading Device Desc. err = %d\n",
+			__func__, err);
+		goto out;
+	}
+
+	/*
+	 * getting vendor (manufacturerID) and Bank Index in big endian
+	 * format
+	 */
+	card_data->wmanufacturerid = desc_buf[DEVICE_DESC_PARAM_MANF_ID] << 8 |
+				     desc_buf[DEVICE_DESC_PARAM_MANF_ID + 1];
+
+	model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
+
+	err = ufshcd_read_string_desc(hba, model_index, str_desc_buf,
+					QUERY_DESC_STRING_MAX_SIZE, ASCII_STD);
+	if (err) {
+		dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n",
+			__func__, err);
+		goto out;
+	}
+
+	str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0';
+	strlcpy(card_data->model, (str_desc_buf + QUERY_DESC_HDR_SIZE),
+		min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET],
+		      MAX_MODEL_LEN));
+
+	/* Null terminate the model string */
+	card_data->model[MAX_MODEL_LEN] = '\0';
+
+out:
+	return err;
+}
+
+void ufs_advertise_fixup_device(struct ufs_hba *hba)
+{
+	int err;
+	struct ufs_dev_fix *f;
+	struct ufs_device_info card_data;
+
+	card_data.wmanufacturerid = 0;
+
+	err = ufs_get_device_info(hba, &card_data);
+	if (err) {
+		dev_err(hba->dev, "%s: Failed getting device info. err = %d\n",
+			__func__, err);
+		return;
+	}
+
+	for (f = ufs_fixups; f->quirk; f++) {
+		if (((f->card.wmanufacturerid == card_data.wmanufacturerid) ||
+		    (f->card.wmanufacturerid == UFS_ANY_VENDOR)) &&
+		    (STR_PRFX_EQUAL(f->card.model, card_data.model) ||
+		     !strcmp(f->card.model, UFS_ANY_MODEL)))
+			hba->dev_quirks |= f->quirk;
+	}
+}
+
+/**
+ * ufshcd_tune_pa_tactivate - Tunes PA_TActivate of local UniPro
+ * @hba: per-adapter instance
+ *
+ * PA_TActivate parameter can be tuned manually if UniPro version is less than
+ * 1.61. PA_TActivate needs to be greater than or equal to peerM-PHY's
+ * RX_MIN_ACTIVATETIME_CAPABILITY attribute. This optimal value can help reduce
+ * the hibern8 exit latency.
+ *
+ * Returns zero on success, non-zero error value on failure.
+ */
+static int ufshcd_tune_pa_tactivate(struct ufs_hba *hba)
+{
+	int ret = 0;
+	u32 peer_rx_min_activatetime = 0, tuned_pa_tactivate;
+
+	ret = ufshcd_dme_peer_get(hba,
+				  UIC_ARG_MIB_SEL(
+					RX_MIN_ACTIVATETIME_CAPABILITY,
+					UIC_ARG_MPHY_RX_GEN_SEL_INDEX(0)),
+				  &peer_rx_min_activatetime);
+	if (ret)
+		goto out;
+
+	/* make sure proper unit conversion is applied */
+	tuned_pa_tactivate =
+		((peer_rx_min_activatetime * RX_MIN_ACTIVATETIME_UNIT_US)
+		 / PA_TACTIVATE_TIME_UNIT_US);
+	ret = ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TACTIVATE),
+			     tuned_pa_tactivate);
+
+out:
+	return ret;
+}
+
+/**
+ * ufshcd_tune_pa_hibern8time - Tunes PA_Hibern8Time of local UniPro
+ * @hba: per-adapter instance
+ *
+ * PA_Hibern8Time parameter can be tuned manually if UniPro version is less than
+ * 1.61. PA_Hibern8Time needs to be maximum of local M-PHY's
+ * TX_HIBERN8TIME_CAPABILITY & peer M-PHY's RX_HIBERN8TIME_CAPABILITY.
+ * This optimal value can help reduce the hibern8 exit latency.
+ *
+ * Returns zero on success, non-zero error value on failure.
+ */
+static int ufshcd_tune_pa_hibern8time(struct ufs_hba *hba)
+{
+	int ret = 0;
+	u32 local_tx_hibern8_time_cap = 0, peer_rx_hibern8_time_cap = 0;
+	u32 max_hibern8_time, tuned_pa_hibern8time;
+
+	ret = ufshcd_dme_get(hba,
+			     UIC_ARG_MIB_SEL(TX_HIBERN8TIME_CAPABILITY,
+					UIC_ARG_MPHY_TX_GEN_SEL_INDEX(0)),
+				  &local_tx_hibern8_time_cap);
+	if (ret)
+		goto out;
+
+	ret = ufshcd_dme_peer_get(hba,
+				  UIC_ARG_MIB_SEL(RX_HIBERN8TIME_CAPABILITY,
+					UIC_ARG_MPHY_RX_GEN_SEL_INDEX(0)),
+				  &peer_rx_hibern8_time_cap);
+	if (ret)
+		goto out;
+
+	max_hibern8_time = max(local_tx_hibern8_time_cap,
+			       peer_rx_hibern8_time_cap);
+	/* make sure proper unit conversion is applied */
+	tuned_pa_hibern8time = ((max_hibern8_time * HIBERN8TIME_UNIT_US)
+				/ PA_HIBERN8_TIME_UNIT_US);
+	ret = ufshcd_dme_set(hba, UIC_ARG_MIB(PA_HIBERN8TIME),
+			     tuned_pa_hibern8time);
+out:
+	return ret;
+}
+
+static void ufshcd_tune_unipro_params(struct ufs_hba *hba)
+{
+	if (ufshcd_is_unipro_pa_params_tuning_req(hba)) {
+		ufshcd_tune_pa_tactivate(hba);
+		ufshcd_tune_pa_hibern8time(hba);
+	}
+
+	if (hba->dev_quirks & UFS_DEVICE_QUIRK_PA_TACTIVATE)
+		/* set 1ms timeout for PA_TACTIVATE */
+		ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TACTIVATE), 10);
+}
+
 /**
  * ufshcd_probe_hba - probe hba to detect device and initialize
  * @hba: per-adapter instance
@@ -4482,6 +4984,10 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
 
 	ufshcd_init_pwr_info(hba);
 
+	/* set the default level for urgent bkops */
+	hba->urgent_bkops_lvl = BKOPS_STATUS_PERF_IMPACT;
+	hba->is_urgent_bkops_lvl_checked = false;
+
 	/* UniPro link is active now */
 	ufshcd_set_link_active(hba);
 
@@ -4493,6 +4999,14 @@ static int ufshcd_probe_hba(struct ufs_hba *hba)
 	if (ret)
 		goto out;
 
+	ufs_advertise_fixup_device(hba);
+	ufshcd_tune_unipro_params(hba);
+
+	ret = ufshcd_set_vccq_rail_unused(hba,
+		(hba->dev_quirks & UFS_DEVICE_NO_VCCQ) ? true : false);
+	if (ret)
+		goto out;
+
 	/* UFS device is also active now */
 	ufshcd_set_ufs_dev_active(hba);
 	ufshcd_force_reset_auto_bkops(hba);
@@ -4567,6 +5081,41 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie)
 	ufshcd_probe_hba(hba);
 }
 
+static enum blk_eh_timer_return ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
+{
+	unsigned long flags;
+	struct Scsi_Host *host;
+	struct ufs_hba *hba;
+	int index;
+	bool found = false;
+
+	if (!scmd || !scmd->device || !scmd->device->host)
+		return BLK_EH_NOT_HANDLED;
+
+	host = scmd->device->host;
+	hba = shost_priv(host);
+	if (!hba)
+		return BLK_EH_NOT_HANDLED;
+
+	spin_lock_irqsave(host->host_lock, flags);
+
+	for_each_set_bit(index, &hba->outstanding_reqs, hba->nutrs) {
+		if (hba->lrb[index].cmd == scmd) {
+			found = true;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(host->host_lock, flags);
+
+	/*
+	 * Bypass SCSI error handling and reset the block layer timer if this
+	 * SCSI command was not actually dispatched to UFS driver, otherwise
+	 * let SCSI layer handle the error as usual.
+	 */
+	return found ? BLK_EH_NOT_HANDLED : BLK_EH_RESET_TIMER;
+}
+
 static struct scsi_host_template ufshcd_driver_template = {
 	.module			= THIS_MODULE,
 	.name			= UFSHCD,
@@ -4579,6 +5128,7 @@ static struct scsi_host_template ufshcd_driver_template = {
 	.eh_abort_handler	= ufshcd_abort,
 	.eh_device_reset_handler = ufshcd_eh_device_reset_handler,
 	.eh_host_reset_handler   = ufshcd_eh_host_reset_handler,
+	.eh_timed_out		= ufshcd_eh_timed_out,
 	.this_id		= -1,
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= UFSHCD_CMD_PER_LUN,
@@ -4607,13 +5157,24 @@ static int ufshcd_config_vreg_load(struct device *dev, struct ufs_vreg *vreg,
 static inline int ufshcd_config_vreg_lpm(struct ufs_hba *hba,
 					 struct ufs_vreg *vreg)
 {
-	return ufshcd_config_vreg_load(hba->dev, vreg, UFS_VREG_LPM_LOAD_UA);
+	if (!vreg)
+		return 0;
+	else if (vreg->unused)
+		return 0;
+	else
+		return ufshcd_config_vreg_load(hba->dev, vreg,
+					       UFS_VREG_LPM_LOAD_UA);
 }
 
 static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba,
 					 struct ufs_vreg *vreg)
 {
-	return ufshcd_config_vreg_load(hba->dev, vreg, vreg->max_uA);
+	if (!vreg)
+		return 0;
+	else if (vreg->unused)
+		return 0;
+	else
+		return ufshcd_config_vreg_load(hba->dev, vreg, vreg->max_uA);
 }
 
 static int ufshcd_config_vreg(struct device *dev,
@@ -4648,7 +5209,9 @@ static int ufshcd_enable_vreg(struct device *dev, struct ufs_vreg *vreg)
 {
 	int ret = 0;
 
-	if (!vreg || vreg->enabled)
+	if (!vreg)
+		goto out;
+	else if (vreg->enabled || vreg->unused)
 		goto out;
 
 	ret = ufshcd_config_vreg(dev, vreg, true);
@@ -4668,7 +5231,9 @@ static int ufshcd_disable_vreg(struct device *dev, struct ufs_vreg *vreg)
 {
 	int ret = 0;
 
-	if (!vreg || !vreg->enabled)
+	if (!vreg)
+		goto out;
+	else if (!vreg->enabled || vreg->unused)
 		goto out;
 
 	ret = regulator_disable(vreg->reg);
@@ -4774,6 +5339,36 @@ static int ufshcd_init_hba_vreg(struct ufs_hba *hba)
 	return 0;
 }
 
+static int ufshcd_set_vccq_rail_unused(struct ufs_hba *hba, bool unused)
+{
+	int ret = 0;
+	struct ufs_vreg_info *info = &hba->vreg_info;
+
+	if (!info)
+		goto out;
+	else if (!info->vccq)
+		goto out;
+
+	if (unused) {
+		/* shut off the rail here */
+		ret = ufshcd_toggle_vreg(hba->dev, info->vccq, false);
+		/*
+		 * Mark this rail as no longer used, so it doesn't get enabled
+		 * later by mistake
+		 */
+		if (!ret)
+			info->vccq->unused = true;
+	} else {
+		/*
+		 * rail should have been already enabled hence just make sure
+		 * that unused flag is cleared.
+		 */
+		info->vccq->unused = false;
+	}
+out:
+	return ret;
+}
+
 static int __ufshcd_setup_clocks(struct ufs_hba *hba, bool on,
 					bool skip_ref_clk)
 {
@@ -5093,10 +5688,20 @@ static int ufshcd_link_state_transition(struct ufs_hba *hba,
 		   (!check_for_bkops || (check_for_bkops &&
 		    !hba->auto_bkops_enabled))) {
 		/*
+		 * Let's make sure that link is in low power mode, we are doing
+		 * this currently by putting the link in Hibern8. Otherway to
+		 * put the link in low power mode is to send the DME end point
+		 * to device and then send the DME reset command to local
+		 * unipro. But putting the link in hibern8 is much faster.
+		 */
+		ret = ufshcd_uic_hibern8_enter(hba);
+		if (ret)
+			goto out;
+		/*
 		 * Change controller state to "reset state" which
 		 * should also put the link in off/reset state
 		 */
-		ufshcd_hba_stop(hba);
+		ufshcd_hba_stop(hba, true);
 		/*
 		 * TODO: Check if we need any delay to make sure that
 		 * controller is reset
@@ -5111,6 +5716,16 @@ out:
 static void ufshcd_vreg_set_lpm(struct ufs_hba *hba)
 {
 	/*
+	 * It seems some UFS devices may keep drawing more than sleep current
+	 * (atleast for 500us) from UFS rails (especially from VCCQ rail).
+	 * To avoid this situation, add 2ms delay before putting these UFS
+	 * rails in LPM mode.
+	 */
+	if (!ufshcd_is_link_active(hba) &&
+	    hba->dev_quirks & UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM)
+		usleep_range(2000, 2100);
+
+	/*
 	 * If UFS device is either in UFS_Sleep turn off VCC rail to save some
 	 * power.
 	 *
@@ -5572,7 +6187,7 @@ void ufshcd_remove(struct ufs_hba *hba)
 	scsi_remove_host(hba->host);
 	/* disable interrupts */
 	ufshcd_disable_intr(hba, hba->intr_mask);
-	ufshcd_hba_stop(hba);
+	ufshcd_hba_stop(hba, true);
 
 	scsi_host_put(hba->host);
 
@@ -5836,6 +6451,21 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	init_waitqueue_head(&hba->dev_cmd.tag_wq);
 
 	ufshcd_init_clk_gating(hba);
+
+	/*
+	 * In order to avoid any spurious interrupt immediately after
+	 * registering UFS controller interrupt handler, clear any pending UFS
+	 * interrupt status and disable all the UFS interrupts.
+	 */
+	ufshcd_writel(hba, ufshcd_readl(hba, REG_INTERRUPT_STATUS),
+		      REG_INTERRUPT_STATUS);
+	ufshcd_writel(hba, 0, REG_INTERRUPT_ENABLE);
+	/*
+	 * Make sure that UFS interrupts are disabled and any pending interrupt
+	 * status is cleared before registering UFS interrupt handler.
+	 */
+	mb();
+
 	/* IRQ registration */
 	err = devm_request_irq(dev, irq, ufshcd_intr, IRQF_SHARED, UFSHCD, hba);
 	if (err) {
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index e3931d0c94eb..4bb65669f052 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -54,6 +54,7 @@
 #include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/regulator/consumer.h>
+#include "unipro.h"
 
 #include <asm/irq.h>
 #include <asm/byteorder.h>
@@ -383,6 +384,9 @@ struct ufs_init_prefetch {
  * @clk_list_head: UFS host controller clocks list node head
  * @pwr_info: holds current power mode
  * @max_pwr_info: keeps the device max valid pwm
+ * @urgent_bkops_lvl: keeps track of urgent bkops level for device
+ * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
+ *  device is known or not.
  */
 struct ufs_hba {
 	void __iomem *mmio_base;
@@ -470,6 +474,9 @@ struct ufs_hba {
 
 	unsigned int quirks;	/* Deviations from standard UFSHCI spec. */
 
+	/* Device deviations from standard UFS device spec. */
+	unsigned int dev_quirks;
+
 	wait_queue_head_t tm_wq;
 	wait_queue_head_t tm_tag_wq;
 	unsigned long tm_condition;
@@ -509,6 +516,8 @@ struct ufs_hba {
 
 	bool wlun_dev_clr_ua;
 
+	/* Number of lanes available (1 or 2) for Rx/Tx */
+	u32 lanes_per_direction;
 	struct ufs_pa_layer_attr pwr_info;
 	struct ufs_pwr_mode_info max_pwr_info;
 
@@ -533,6 +542,9 @@ struct ufs_hba {
 	struct devfreq *devfreq;
 	struct ufs_clk_scaling clk_scaling;
 	bool is_sys_suspended;
+
+	enum bkops_status urgent_bkops_lvl;
+	bool is_urgent_bkops_lvl_checked;
 };
 
 /* Returns true if clocks can be gated. Otherwise false */
@@ -588,15 +600,9 @@ int ufshcd_alloc_host(struct device *, struct ufs_hba **);
 void ufshcd_dealloc_host(struct ufs_hba *);
 int ufshcd_init(struct ufs_hba * , void __iomem * , unsigned int);
 void ufshcd_remove(struct ufs_hba *);
-
-/**
- * ufshcd_hba_stop - Send controller to reset state
- * @hba: per adapter instance
- */
-static inline void ufshcd_hba_stop(struct ufs_hba *hba)
-{
-	ufshcd_writel(hba, CONTROLLER_DISABLE,  REG_CONTROLLER_ENABLE);
-}
+int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask,
+				u32 val, unsigned long interval_us,
+				unsigned long timeout_ms, bool can_sleep);
 
 static inline void check_upiu_size(void)
 {
@@ -682,11 +688,27 @@ static inline int ufshcd_dme_peer_get(struct ufs_hba *hba,
 	return ufshcd_dme_get_attr(hba, attr_sel, mib_val, DME_PEER);
 }
 
+int ufshcd_read_device_desc(struct ufs_hba *hba, u8 *buf, u32 size);
+
+static inline bool ufshcd_is_hs_mode(struct ufs_pa_layer_attr *pwr_info)
+{
+	return (pwr_info->pwr_rx == FAST_MODE ||
+		pwr_info->pwr_rx == FASTAUTO_MODE) &&
+		(pwr_info->pwr_tx == FAST_MODE ||
+		pwr_info->pwr_tx == FASTAUTO_MODE);
+}
+
+#define ASCII_STD true
+
+int ufshcd_read_string_desc(struct ufs_hba *hba, int desc_index, u8 *buf,
+				u32 size, bool ascii);
+
 /* Expose Query-Request API */
 int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode,
 	enum flag_idn idn, bool *flag_res);
 int ufshcd_hold(struct ufs_hba *hba, bool async);
 void ufshcd_release(struct ufs_hba *hba);
+u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba);
 
 /* Wrapper functions for safely calling variant operations */
 static inline const char *ufshcd_get_var_name(struct ufs_hba *hba)
diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h
index 0ae0967aaed8..4cb1cc63f1a1 100644
--- a/drivers/scsi/ufs/ufshci.h
+++ b/drivers/scsi/ufs/ufshci.h
@@ -92,6 +92,7 @@ enum {
 	UFSHCI_VERSION_10 = 0x00010000, /* 1.0 */
 	UFSHCI_VERSION_11 = 0x00010100, /* 1.1 */
 	UFSHCI_VERSION_20 = 0x00000200, /* 2.0 */
+	UFSHCI_VERSION_21 = 0x00000210, /* 2.1 */
 };
 
 /*
@@ -170,6 +171,8 @@ enum {
 #define UIC_DATA_LINK_LAYER_ERROR		UFS_BIT(31)
 #define UIC_DATA_LINK_LAYER_ERROR_CODE_MASK	0x7FFF
 #define UIC_DATA_LINK_LAYER_ERROR_PA_INIT	0x2000
+#define UIC_DATA_LINK_LAYER_ERROR_NAC_RECEIVED	0x0001
+#define UIC_DATA_LINK_LAYER_ERROR_TCx_REPLAY_TIMEOUT 0x0002
 
 /* UECN - Host UIC Error Code Network Layer 40h */
 #define UIC_NETWORK_LAYER_ERROR			UFS_BIT(31)
@@ -209,6 +212,7 @@ enum {
 
 /* GenSelectorIndex calculation macros for M-PHY attributes */
 #define UIC_ARG_MPHY_TX_GEN_SEL_INDEX(lane) (lane)
+#define UIC_ARG_MPHY_RX_GEN_SEL_INDEX(lane) (PA_MAXDATALANES + (lane))
 
 #define UIC_ARG_MIB_SEL(attr, sel)	((((attr) & 0xFFFF) << 16) |\
 					 ((sel) & 0xFFFF))
diff --git a/drivers/scsi/ufs/unipro.h b/drivers/scsi/ufs/unipro.h
index 816a8a46efb8..e2854e45f8d3 100644
--- a/drivers/scsi/ufs/unipro.h
+++ b/drivers/scsi/ufs/unipro.h
@@ -15,6 +15,7 @@
 /*
  * M-TX Configuration Attributes
  */
+#define TX_HIBERN8TIME_CAPABILITY		0x000F
 #define TX_MODE					0x0021
 #define TX_HSRATE_SERIES			0x0022
 #define TX_HSGEAR				0x0023
@@ -48,8 +49,12 @@
 #define RX_ENTER_HIBERN8			0x00A7
 #define RX_BYPASS_8B10B_ENABLE			0x00A8
 #define RX_TERMINATION_FORCE_ENABLE		0x0089
+#define RX_MIN_ACTIVATETIME_CAPABILITY		0x008F
+#define RX_HIBERN8TIME_CAPABILITY		0x0092
 
 #define is_mphy_tx_attr(attr)			(attr < RX_MODE)
+#define RX_MIN_ACTIVATETIME_UNIT_US		100
+#define HIBERN8TIME_UNIT_US			100
 /*
  * PHY Adpater attributes
  */
@@ -70,6 +75,7 @@
 #define PA_MAXRXSPEEDFAST	0x1541
 #define PA_MAXRXSPEEDSLOW	0x1542
 #define PA_TXLINKSTARTUPHS	0x1544
+#define PA_LOCAL_TX_LCC_ENABLE	0x155E
 #define PA_TXSPEEDFAST		0x1565
 #define PA_TXSPEEDSLOW		0x1566
 #define PA_REMOTEVERINFO	0x15A0
@@ -110,6 +116,12 @@
 #define PA_STALLNOCONFIGTIME	0x15A3
 #define PA_SAVECONFIGTIME	0x15A4
 
+#define PA_TACTIVATE_TIME_UNIT_US	10
+#define PA_HIBERN8_TIME_UNIT_US		100
+
+/* PHY Adapter Protocol Constants */
+#define PA_MAXDATALANES	4
+
 /* PA power modes */
 enum {
 	FAST_MODE	= 1,
@@ -143,6 +155,16 @@ enum ufs_hs_gear_tag {
 	UFS_HS_G3,		/* HS Gear 3 */
 };
 
+enum ufs_unipro_ver {
+	UFS_UNIPRO_VER_RESERVED = 0,
+	UFS_UNIPRO_VER_1_40 = 1, /* UniPro version 1.40 */
+	UFS_UNIPRO_VER_1_41 = 2, /* UniPro version 1.41 */
+	UFS_UNIPRO_VER_1_6 = 3,  /* UniPro version 1.6 */
+	UFS_UNIPRO_VER_MAX = 4,  /* UniPro unsupported version */
+	/* UniPro version field mask in PA_LOCALVERINFO */
+	UFS_UNIPRO_VER_MASK = 0xF,
+};
+
 /*
  * Data Link Layer Attributes
  */
diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index 88260205a261..cb58ef0d9b2c 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -6,6 +6,7 @@ source "drivers/soc/fsl/qe/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
+source "drivers/soc/samsung/Kconfig"
 source "drivers/soc/sunxi/Kconfig"
 source "drivers/soc/tegra/Kconfig"
 source "drivers/soc/ti/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 2afdc74f7491..5ade71306ee1 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -10,6 +10,7 @@ obj-y				+= fsl/
 obj-$(CONFIG_ARCH_MEDIATEK)	+= mediatek/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-$(CONFIG_ARCH_ROCKCHIP)		+= rockchip/
+obj-$(CONFIG_SOC_SAMSUNG)	+= samsung/
 obj-$(CONFIG_ARCH_SUNXI)	+= sunxi/
 obj-$(CONFIG_ARCH_TEGRA)	+= tegra/
 obj-$(CONFIG_SOC_TI)		+= ti/
diff --git a/drivers/soc/mediatek/mtk-pmic-wrap.c b/drivers/soc/mediatek/mtk-pmic-wrap.c
index 105597a885cb..0d9b19a78d27 100644
--- a/drivers/soc/mediatek/mtk-pmic-wrap.c
+++ b/drivers/soc/mediatek/mtk-pmic-wrap.c
@@ -60,6 +60,15 @@
 #define PWRAP_MAN_CMD_OP_OUTD		(0x9 << 8)
 #define PWRAP_MAN_CMD_OP_OUTQ		(0xa << 8)
 
+/* macro for Watch Dog Timer Source */
+#define PWRAP_WDT_SRC_EN_STAUPD_TRIG		(1 << 25)
+#define PWRAP_WDT_SRC_EN_HARB_STAUPD_DLE	(1 << 20)
+#define PWRAP_WDT_SRC_EN_HARB_STAUPD_ALE	(1 << 6)
+#define PWRAP_WDT_SRC_MASK_ALL			0xffffffff
+#define PWRAP_WDT_SRC_MASK_NO_STAUPD	~(PWRAP_WDT_SRC_EN_STAUPD_TRIG | \
+					  PWRAP_WDT_SRC_EN_HARB_STAUPD_DLE | \
+					  PWRAP_WDT_SRC_EN_HARB_STAUPD_ALE)
+
 /* macro for slave device wrapper registers */
 #define PWRAP_DEW_BASE			0xbc00
 #define PWRAP_DEW_EVENT_OUT_EN		(PWRAP_DEW_BASE + 0x0)
@@ -412,6 +421,20 @@ static bool pwrap_is_fsm_vldclr(struct pmic_wrapper *wrp)
 	return PWRAP_GET_WACS_FSM(val) == PWRAP_WACS_FSM_WFVLDCLR;
 }
 
+/*
+ * Timeout issue sometimes caused by the last read command
+ * failed because pmic wrap could not got the FSM_VLDCLR
+ * in time after finishing WACS2_CMD. It made state machine
+ * still on FSM_VLDCLR and timeout next time.
+ * Check the status of FSM and clear the vldclr to recovery the
+ * error.
+ */
+static inline void pwrap_leave_fsm_vldclr(struct pmic_wrapper *wrp)
+{
+	if (pwrap_is_fsm_vldclr(wrp))
+		pwrap_writel(wrp, 1, PWRAP_WACS2_VLDCLR);
+}
+
 static bool pwrap_is_sync_idle(struct pmic_wrapper *wrp)
 {
 	return pwrap_readl(wrp, PWRAP_WACS2_RDATA) & PWRAP_STATE_SYNC_IDLE0;
@@ -445,8 +468,10 @@ static int pwrap_write(struct pmic_wrapper *wrp, u32 adr, u32 wdata)
 	int ret;
 
 	ret = pwrap_wait_for_state(wrp, pwrap_is_fsm_idle);
-	if (ret)
+	if (ret) {
+		pwrap_leave_fsm_vldclr(wrp);
 		return ret;
+	}
 
 	pwrap_writel(wrp, (1 << 31) | ((adr >> 1) << 16) | wdata,
 			PWRAP_WACS2_CMD);
@@ -459,8 +484,10 @@ static int pwrap_read(struct pmic_wrapper *wrp, u32 adr, u32 *rdata)
 	int ret;
 
 	ret = pwrap_wait_for_state(wrp, pwrap_is_fsm_idle);
-	if (ret)
+	if (ret) {
+		pwrap_leave_fsm_vldclr(wrp);
 		return ret;
+	}
 
 	pwrap_writel(wrp, (adr >> 1) << 16, PWRAP_WACS2_CMD);
 
@@ -804,7 +831,7 @@ MODULE_DEVICE_TABLE(of, of_pwrap_match_tbl);
 
 static int pwrap_probe(struct platform_device *pdev)
 {
-	int ret, irq;
+	int ret, irq, wdt_src;
 	struct pmic_wrapper *wrp;
 	struct device_node *np = pdev->dev.of_node;
 	const struct of_device_id *of_id =
@@ -894,7 +921,13 @@ static int pwrap_probe(struct platform_device *pdev)
 
 	/* Initialize watchdog, may not be done by the bootloader */
 	pwrap_writel(wrp, 0xf, PWRAP_WDT_UNIT);
-	pwrap_writel(wrp, 0xffffffff, PWRAP_WDT_SRC_EN);
+	/*
+	 * Since STAUPD was not used on mt8173 platform,
+	 * so STAUPD of WDT_SRC which should be turned off
+	 */
+	wdt_src = pwrap_is_mt8173(wrp) ?
+			PWRAP_WDT_SRC_MASK_NO_STAUPD : PWRAP_WDT_SRC_MASK_ALL;
+	pwrap_writel(wrp, wdt_src, PWRAP_WDT_SRC_EN);
 	pwrap_writel(wrp, 0x1, PWRAP_TIMER_EN);
 	pwrap_writel(wrp, ~((1 << 31) | (1 << 1)), PWRAP_INT_EN);
 
diff --git a/drivers/soc/mediatek/mtk-scpsys.c b/drivers/soc/mediatek/mtk-scpsys.c
index 0221387e5e27..57e781c71e67 100644
--- a/drivers/soc/mediatek/mtk-scpsys.c
+++ b/drivers/soc/mediatek/mtk-scpsys.c
@@ -76,7 +76,7 @@ struct scp_domain_data {
 	bool active_wakeup;
 };
 
-static const struct scp_domain_data scp_domain_data[] __initconst = {
+static const struct scp_domain_data scp_domain_data[] = {
 	[MT8173_POWER_DOMAIN_VDEC] = {
 		.name = "vdec",
 		.sta_mask = PWR_STATUS_VDEC,
@@ -174,12 +174,7 @@ struct scp_domain {
 	struct generic_pm_domain genpd;
 	struct scp *scp;
 	struct clk *clk[MAX_CLKS];
-	u32 sta_mask;
-	void __iomem *ctl_addr;
-	u32 sram_pdn_bits;
-	u32 sram_pdn_ack_bits;
-	u32 bus_prot_mask;
-	bool active_wakeup;
+	const struct scp_domain_data *data;
 	struct regulator *supply;
 };
 
@@ -195,8 +190,9 @@ static int scpsys_domain_is_on(struct scp_domain *scpd)
 {
 	struct scp *scp = scpd->scp;
 
-	u32 status = readl(scp->base + SPM_PWR_STATUS) & scpd->sta_mask;
-	u32 status2 = readl(scp->base + SPM_PWR_STATUS_2ND) & scpd->sta_mask;
+	u32 status = readl(scp->base + SPM_PWR_STATUS) & scpd->data->sta_mask;
+	u32 status2 = readl(scp->base + SPM_PWR_STATUS_2ND) &
+				scpd->data->sta_mask;
 
 	/*
 	 * A domain is on when both status bits are set. If only one is set
@@ -217,8 +213,8 @@ static int scpsys_power_on(struct generic_pm_domain *genpd)
 	struct scp *scp = scpd->scp;
 	unsigned long timeout;
 	bool expired;
-	void __iomem *ctl_addr = scpd->ctl_addr;
-	u32 sram_pdn_ack = scpd->sram_pdn_ack_bits;
+	void __iomem *ctl_addr = scp->base + scpd->data->ctl_offs;
+	u32 sram_pdn_ack = scpd->data->sram_pdn_ack_bits;
 	u32 val;
 	int ret;
 	int i;
@@ -273,7 +269,7 @@ static int scpsys_power_on(struct generic_pm_domain *genpd)
 	val |= PWR_RST_B_BIT;
 	writel(val, ctl_addr);
 
-	val &= ~scpd->sram_pdn_bits;
+	val &= ~scpd->data->sram_pdn_bits;
 	writel(val, ctl_addr);
 
 	/* wait until SRAM_PDN_ACK all 0 */
@@ -292,9 +288,9 @@ static int scpsys_power_on(struct generic_pm_domain *genpd)
 			expired = true;
 	}
 
-	if (scpd->bus_prot_mask) {
+	if (scpd->data->bus_prot_mask) {
 		ret = mtk_infracfg_clear_bus_protection(scp->infracfg,
-				scpd->bus_prot_mask);
+				scpd->data->bus_prot_mask);
 		if (ret)
 			goto err_pwr_ack;
 	}
@@ -321,21 +317,21 @@ static int scpsys_power_off(struct generic_pm_domain *genpd)
 	struct scp *scp = scpd->scp;
 	unsigned long timeout;
 	bool expired;
-	void __iomem *ctl_addr = scpd->ctl_addr;
-	u32 pdn_ack = scpd->sram_pdn_ack_bits;
+	void __iomem *ctl_addr = scp->base + scpd->data->ctl_offs;
+	u32 pdn_ack = scpd->data->sram_pdn_ack_bits;
 	u32 val;
 	int ret;
 	int i;
 
-	if (scpd->bus_prot_mask) {
+	if (scpd->data->bus_prot_mask) {
 		ret = mtk_infracfg_set_bus_protection(scp->infracfg,
-				scpd->bus_prot_mask);
+				scpd->data->bus_prot_mask);
 		if (ret)
 			goto out;
 	}
 
 	val = readl(ctl_addr);
-	val |= scpd->sram_pdn_bits;
+	val |= scpd->data->sram_pdn_bits;
 	writel(val, ctl_addr);
 
 	/* wait until SRAM_PDN_ACK all 1 */
@@ -409,10 +405,10 @@ static bool scpsys_active_wakeup(struct device *dev)
 	genpd = pd_to_genpd(dev->pm_domain);
 	scpd = container_of(genpd, struct scp_domain, genpd);
 
-	return scpd->active_wakeup;
+	return scpd->data->active_wakeup;
 }
 
-static int __init scpsys_probe(struct platform_device *pdev)
+static int scpsys_probe(struct platform_device *pdev)
 {
 	struct genpd_onecell_data *pd_data;
 	struct resource *res;
@@ -485,12 +481,7 @@ static int __init scpsys_probe(struct platform_device *pdev)
 		pd_data->domains[i] = genpd;
 		scpd->scp = scp;
 
-		scpd->sta_mask = data->sta_mask;
-		scpd->ctl_addr = scp->base + data->ctl_offs;
-		scpd->sram_pdn_bits = data->sram_pdn_bits;
-		scpd->sram_pdn_ack_bits = data->sram_pdn_ack_bits;
-		scpd->bus_prot_mask = data->bus_prot_mask;
-		scpd->active_wakeup = data->active_wakeup;
+		scpd->data = data;
 		for (j = 0; j < MAX_CLKS && data->clk_id[j]; j++)
 			scpd->clk[j] = clk[data->clk_id[j]];
 
@@ -500,14 +491,13 @@ static int __init scpsys_probe(struct platform_device *pdev)
 		genpd->dev_ops.active_wakeup = scpsys_active_wakeup;
 
 		/*
-		 * Initially turn on all domains to make the domains usable
-		 * with !CONFIG_PM and to get the hardware in sync with the
-		 * software.  The unused domains will be switched off during
-		 * late_init time.
+		 * With CONFIG_PM disabled turn on all domains to make the
+		 * hardware usable.
 		 */
-		genpd->power_on(genpd);
+		if (!IS_ENABLED(CONFIG_PM))
+			genpd->power_on(genpd);
 
-		pm_genpd_init(genpd, NULL, false);
+		pm_genpd_init(genpd, NULL, true);
 	}
 
 	/*
@@ -542,10 +532,12 @@ static const struct of_device_id of_scpsys_match_tbl[] = {
 };
 
 static struct platform_driver scpsys_drv = {
+	.probe = scpsys_probe,
 	.driver = {
 		.name = "mtk-scpsys",
+		.suppress_bind_attrs = true,
 		.owner = THIS_MODULE,
 		.of_match_table = of_match_ptr(of_scpsys_match_tbl),
 	},
 };
-builtin_platform_driver_probe(scpsys_drv, scpsys_probe);
+builtin_platform_driver(scpsys_drv);
diff --git a/drivers/soc/rockchip/pm_domains.c b/drivers/soc/rockchip/pm_domains.c
index 534c58937a56..43155e1f97b9 100644
--- a/drivers/soc/rockchip/pm_domains.c
+++ b/drivers/soc/rockchip/pm_domains.c
@@ -18,6 +18,7 @@
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
 #include <dt-bindings/power/rk3288-power.h>
+#include <dt-bindings/power/rk3368-power.h>
 
 struct rockchip_domain_info {
 	int pwr_mask;
@@ -75,6 +76,9 @@ struct rockchip_pmu {
 #define DOMAIN_RK3288(pwr, status, req)		\
 	DOMAIN(pwr, status, req, req, (req) + 16)
 
+#define DOMAIN_RK3368(pwr, status, req)		\
+	DOMAIN(pwr, status, req, (req) + 16, req)
+
 static bool rockchip_pmu_domain_is_idle(struct rockchip_pm_domain *pd)
 {
 	struct rockchip_pmu *pmu = pd->pmu;
@@ -419,6 +423,7 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev)
 		if (error) {
 			dev_err(dev, "failed to handle node %s: %d\n",
 				node->name, error);
+			of_node_put(node);
 			goto err_out;
 		}
 	}
@@ -444,6 +449,14 @@ static const struct rockchip_domain_info rk3288_pm_domains[] = {
 	[RK3288_PD_GPU]		= DOMAIN_RK3288(9, 9, 2),
 };
 
+static const struct rockchip_domain_info rk3368_pm_domains[] = {
+	[RK3368_PD_PERI]	= DOMAIN_RK3368(13, 12, 6),
+	[RK3368_PD_VIO]		= DOMAIN_RK3368(15, 14, 8),
+	[RK3368_PD_VIDEO]	= DOMAIN_RK3368(14, 13, 7),
+	[RK3368_PD_GPU_0]	= DOMAIN_RK3368(16, 15, 2),
+	[RK3368_PD_GPU_1]	= DOMAIN_RK3368(17, 16, 2),
+};
+
 static const struct rockchip_pmu_info rk3288_pmu = {
 	.pwr_offset = 0x08,
 	.status_offset = 0x0c,
@@ -461,11 +474,32 @@ static const struct rockchip_pmu_info rk3288_pmu = {
 	.domain_info = rk3288_pm_domains,
 };
 
+static const struct rockchip_pmu_info rk3368_pmu = {
+	.pwr_offset = 0x0c,
+	.status_offset = 0x10,
+	.req_offset = 0x3c,
+	.idle_offset = 0x40,
+	.ack_offset = 0x40,
+
+	.core_pwrcnt_offset = 0x48,
+	.gpu_pwrcnt_offset = 0x50,
+
+	.core_power_transition_time = 24,
+	.gpu_power_transition_time = 24,
+
+	.num_domains = ARRAY_SIZE(rk3368_pm_domains),
+	.domain_info = rk3368_pm_domains,
+};
+
 static const struct of_device_id rockchip_pm_domain_dt_match[] = {
 	{
 		.compatible = "rockchip,rk3288-power-controller",
 		.data = (void *)&rk3288_pmu,
 	},
+	{
+		.compatible = "rockchip,rk3368-power-controller",
+		.data = (void *)&rk3368_pmu,
+	},
 	{ /* sentinel */ },
 };
 
diff --git a/drivers/soc/samsung/Kconfig b/drivers/soc/samsung/Kconfig
new file mode 100644
index 000000000000..d7fc123006a3
--- /dev/null
+++ b/drivers/soc/samsung/Kconfig
@@ -0,0 +1,13 @@
+#
+# SAMSUNG SoC drivers
+#
+menuconfig SOC_SAMSUNG
+	bool "Samsung SoC driver support" if COMPILE_TEST
+
+if SOC_SAMSUNG
+
+config EXYNOS_PMU
+	bool "Exynos PMU controller driver" if COMPILE_TEST
+	depends on (ARM && ARCH_EXYNOS) || ((ARM || ARM64) && COMPILE_TEST)
+
+endif
diff --git a/drivers/soc/samsung/Makefile b/drivers/soc/samsung/Makefile
new file mode 100644
index 000000000000..f64ac4d80564
--- /dev/null
+++ b/drivers/soc/samsung/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_EXYNOS_PMU)	+= exynos-pmu.o exynos3250-pmu.o exynos4-pmu.o \
+					exynos5250-pmu.o exynos5420-pmu.o
diff --git a/drivers/soc/samsung/exynos-pmu.c b/drivers/soc/samsung/exynos-pmu.c
new file mode 100644
index 000000000000..0acdfd82e751
--- /dev/null
+++ b/drivers/soc/samsung/exynos-pmu.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2011-2014 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com/
+ *
+ * EXYNOS - CPU PMU(Power Management Unit) support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+
+#include <linux/soc/samsung/exynos-regs-pmu.h>
+#include <linux/soc/samsung/exynos-pmu.h>
+
+#include "exynos-pmu.h"
+
+struct exynos_pmu_context {
+	struct device *dev;
+	const struct exynos_pmu_data *pmu_data;
+};
+
+void __iomem *pmu_base_addr;
+static struct exynos_pmu_context *pmu_context;
+
+void pmu_raw_writel(u32 val, u32 offset)
+{
+	writel_relaxed(val, pmu_base_addr + offset);
+}
+
+u32 pmu_raw_readl(u32 offset)
+{
+	return readl_relaxed(pmu_base_addr + offset);
+}
+
+void exynos_sys_powerdown_conf(enum sys_powerdown mode)
+{
+	unsigned int i;
+	const struct exynos_pmu_data *pmu_data;
+
+	if (!pmu_context)
+		return;
+
+	pmu_data = pmu_context->pmu_data;
+
+	if (pmu_data->powerdown_conf)
+		pmu_data->powerdown_conf(mode);
+
+	if (pmu_data->pmu_config) {
+		for (i = 0; (pmu_data->pmu_config[i].offset != PMU_TABLE_END); i++)
+			pmu_raw_writel(pmu_data->pmu_config[i].val[mode],
+					pmu_data->pmu_config[i].offset);
+	}
+
+	if (pmu_data->powerdown_conf_extra)
+		pmu_data->powerdown_conf_extra(mode);
+
+	if (pmu_data->pmu_config_extra) {
+		for (i = 0; pmu_data->pmu_config_extra[i].offset != PMU_TABLE_END; i++)
+			pmu_raw_writel(pmu_data->pmu_config_extra[i].val[mode],
+					pmu_data->pmu_config_extra[i].offset);
+	}
+}
+
+/*
+ * PMU platform driver and devicetree bindings.
+ */
+static const struct of_device_id exynos_pmu_of_device_ids[] = {
+	{
+		.compatible = "samsung,exynos3250-pmu",
+		.data = &exynos3250_pmu_data,
+	}, {
+		.compatible = "samsung,exynos4210-pmu",
+		.data = &exynos4210_pmu_data,
+	}, {
+		.compatible = "samsung,exynos4212-pmu",
+		.data = &exynos4212_pmu_data,
+	}, {
+		.compatible = "samsung,exynos4412-pmu",
+		.data = &exynos4412_pmu_data,
+	}, {
+		.compatible = "samsung,exynos5250-pmu",
+		.data = &exynos5250_pmu_data,
+	}, {
+		.compatible = "samsung,exynos5420-pmu",
+		.data = &exynos5420_pmu_data,
+	},
+	{ /*sentinel*/ },
+};
+
+static int exynos_pmu_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pmu_base_addr = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pmu_base_addr))
+		return PTR_ERR(pmu_base_addr);
+
+	pmu_context = devm_kzalloc(&pdev->dev,
+			sizeof(struct exynos_pmu_context),
+			GFP_KERNEL);
+	if (!pmu_context) {
+		dev_err(dev, "Cannot allocate memory.\n");
+		return -ENOMEM;
+	}
+	pmu_context->dev = dev;
+
+	match = of_match_node(exynos_pmu_of_device_ids, dev->of_node);
+
+	pmu_context->pmu_data = match->data;
+
+	if (pmu_context->pmu_data->pmu_init)
+		pmu_context->pmu_data->pmu_init();
+
+	platform_set_drvdata(pdev, pmu_context);
+
+	dev_dbg(dev, "Exynos PMU Driver probe done\n");
+	return 0;
+}
+
+static struct platform_driver exynos_pmu_driver = {
+	.driver  = {
+		.name   = "exynos-pmu",
+		.of_match_table = exynos_pmu_of_device_ids,
+	},
+	.probe = exynos_pmu_probe,
+};
+
+static int __init exynos_pmu_init(void)
+{
+	return platform_driver_register(&exynos_pmu_driver);
+
+}
+postcore_initcall(exynos_pmu_init);
diff --git a/drivers/soc/samsung/exynos-pmu.h b/drivers/soc/samsung/exynos-pmu.h
new file mode 100644
index 000000000000..a469e366fead
--- /dev/null
+++ b/drivers/soc/samsung/exynos-pmu.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * Header for EXYNOS PMU Driver support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __EXYNOS_PMU_H
+#define __EXYNOS_PMU_H
+
+#include <linux/io.h>
+
+#define PMU_TABLE_END	(-1U)
+
+struct exynos_pmu_conf {
+	unsigned int offset;
+	u8 val[NUM_SYS_POWERDOWN];
+};
+
+struct exynos_pmu_data {
+	const struct exynos_pmu_conf *pmu_config;
+	const struct exynos_pmu_conf *pmu_config_extra;
+
+	void (*pmu_init)(void);
+	void (*powerdown_conf)(enum sys_powerdown);
+	void (*powerdown_conf_extra)(enum sys_powerdown);
+};
+
+extern void __iomem *pmu_base_addr;
+/* list of all exported SoC specific data */
+extern const struct exynos_pmu_data exynos3250_pmu_data;
+extern const struct exynos_pmu_data exynos4210_pmu_data;
+extern const struct exynos_pmu_data exynos4212_pmu_data;
+extern const struct exynos_pmu_data exynos4412_pmu_data;
+extern const struct exynos_pmu_data exynos5250_pmu_data;
+extern const struct exynos_pmu_data exynos5420_pmu_data;
+
+extern void pmu_raw_writel(u32 val, u32 offset);
+extern u32 pmu_raw_readl(u32 offset);
+#endif /* __EXYNOS_PMU_H */
diff --git a/drivers/soc/samsung/exynos3250-pmu.c b/drivers/soc/samsung/exynos3250-pmu.c
new file mode 100644
index 000000000000..20b3ab8aa790
--- /dev/null
+++ b/drivers/soc/samsung/exynos3250-pmu.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2011-2015 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com/
+ *
+ * EXYNOS3250 - CPU PMU (Power Management Unit) support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/soc/samsung/exynos-regs-pmu.h>
+#include <linux/soc/samsung/exynos-pmu.h>
+
+#include "exynos-pmu.h"
+
+static struct exynos_pmu_conf exynos3250_pmu_config[] = {
+	/* { .offset = offset, .val = { AFTR, W-AFTR, SLEEP } */
+	{ EXYNOS3_ARM_CORE0_SYS_PWR_REG,		{ 0x0, 0x0, 0x2} },
+	{ EXYNOS3_DIS_IRQ_ARM_CORE0_LOCAL_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS3_DIS_IRQ_ARM_CORE0_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS3_ARM_CORE1_SYS_PWR_REG,		{ 0x0, 0x0, 0x2} },
+	{ EXYNOS3_DIS_IRQ_ARM_CORE1_LOCAL_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS3_DIS_IRQ_ARM_CORE1_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS3_ISP_ARM_SYS_PWR_REG,			{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_DIS_IRQ_ISP_ARM_LOCAL_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS3_DIS_IRQ_ISP_ARM_CENTRAL_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS3_ARM_COMMON_SYS_PWR_REG,		{ 0x0, 0x0, 0x2} },
+	{ EXYNOS3_ARM_L2_SYS_PWR_REG,			{ 0x0, 0x0, 0x3} },
+	{ EXYNOS3_CMU_ACLKSTOP_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_CMU_SCLKSTOP_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_CMU_RESET_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_DRAM_FREQ_DOWN_SYS_PWR_REG,		{ 0x1, 0x1, 0x1} },
+	{ EXYNOS3_DDRPHY_DLLOFF_SYS_PWR_REG,		{ 0x1, 0x1, 0x1} },
+	{ EXYNOS3_LPDDR_PHY_DLL_LOCK_SYS_PWR_REG,	{ 0x1, 0x1, 0x1} },
+	{ EXYNOS3_CMU_ACLKSTOP_COREBLK_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_SCLKSTOP_COREBLK_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_RESET_COREBLK_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_APLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_MPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_BPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_VPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_EPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_UPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x1, 0x1} },
+	{ EXYNOS3_EPLLUSER_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_MPLLUSER_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_BPLLUSER_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_CLKSTOP_CAM_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_CLKSTOP_MFC_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_CLKSTOP_G3D_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_CLKSTOP_LCD0_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_CLKSTOP_ISP_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_CLKSTOP_MAUDIO_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_RESET_CAM_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_RESET_MFC_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_RESET_G3D_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_RESET_LCD0_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_RESET_ISP_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_CMU_RESET_MAUDIO_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS3_TOP_BUS_SYS_PWR_REG,			{ 0x3, 0x0, 0x0} },
+	{ EXYNOS3_TOP_RETENTION_SYS_PWR_REG,		{ 0x1, 0x1, 0x1} },
+	{ EXYNOS3_TOP_PWR_SYS_PWR_REG,			{ 0x3, 0x3, 0x3} },
+	{ EXYNOS3_TOP_BUS_COREBLK_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS3_TOP_RETENTION_COREBLK_SYS_PWR_REG,	{ 0x1, 0x1, 0x1} },
+	{ EXYNOS3_TOP_PWR_COREBLK_SYS_PWR_REG,		{ 0x3, 0x3, 0x3} },
+	{ EXYNOS3_LOGIC_RESET_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_OSCCLK_GATE_SYS_PWR_REG,		{ 0x1, 0x1, 0x1} },
+	{ EXYNOS3_LOGIC_RESET_COREBLK_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_OSCCLK_GATE_COREBLK_SYS_PWR_REG,	{ 0x1, 0x0, 0x1} },
+	{ EXYNOS3_PAD_RETENTION_DRAM_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_RETENTION_MAUDIO_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_RETENTION_GPIO_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_RETENTION_UART_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_RETENTION_MMC0_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_RETENTION_MMC1_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_RETENTION_MMC2_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_RETENTION_SPI_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_RETENTION_EBIA_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_RETENTION_EBIB_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_RETENTION_JTAG_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_ISOLATION_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_PAD_ALV_SEL_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_XUSBXTI_SYS_PWR_REG,			{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_XXTI_SYS_PWR_REG,			{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_EXT_REGULATOR_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_EXT_REGULATOR_COREBLK_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_GPIO_MODE_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_GPIO_MODE_MAUDIO_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_TOP_ASB_RESET_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_TOP_ASB_ISOLATION_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_TOP_ASB_RESET_COREBLK_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS3_TOP_ASB_ISOLATION_COREBLK_SYS_PWR_REG, { 0x1, 0x1, 0x0} },
+	{ EXYNOS3_CAM_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS3_MFC_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS3_G3D_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS3_LCD0_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS3_ISP_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS3_MAUDIO_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS3_CMU_SYSCLK_ISP_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ PMU_TABLE_END,},
+};
+
+static unsigned int const exynos3250_list_feed[] = {
+	EXYNOS3_ARM_CORE_OPTION(0),
+	EXYNOS3_ARM_CORE_OPTION(1),
+	EXYNOS3_ARM_CORE_OPTION(2),
+	EXYNOS3_ARM_CORE_OPTION(3),
+	EXYNOS3_ARM_COMMON_OPTION,
+	EXYNOS3_TOP_PWR_OPTION,
+	EXYNOS3_CORE_TOP_PWR_OPTION,
+	S5P_CAM_OPTION,
+	S5P_MFC_OPTION,
+	S5P_G3D_OPTION,
+	S5P_LCD0_OPTION,
+	S5P_ISP_OPTION,
+};
+
+static void exynos3250_powerdown_conf_extra(enum sys_powerdown mode)
+{
+	unsigned int i;
+	unsigned int tmp;
+
+	/* Enable only SC_FEEDBACK */
+	for (i = 0; i < ARRAY_SIZE(exynos3250_list_feed); i++) {
+		tmp = pmu_raw_readl(exynos3250_list_feed[i]);
+		tmp &= ~(EXYNOS3_OPTION_USE_SC_COUNTER);
+		tmp |= EXYNOS3_OPTION_USE_SC_FEEDBACK;
+		pmu_raw_writel(tmp, exynos3250_list_feed[i]);
+	}
+
+	if (mode != SYS_SLEEP)
+		return;
+
+	pmu_raw_writel(XUSBXTI_DURATION, EXYNOS3_XUSBXTI_DURATION);
+	pmu_raw_writel(XXTI_DURATION, EXYNOS3_XXTI_DURATION);
+	pmu_raw_writel(EXT_REGULATOR_DURATION, EXYNOS3_EXT_REGULATOR_DURATION);
+	pmu_raw_writel(EXT_REGULATOR_COREBLK_DURATION,
+		       EXYNOS3_EXT_REGULATOR_COREBLK_DURATION);
+}
+
+static void exynos3250_pmu_init(void)
+{
+	unsigned int value;
+
+	/*
+	 * To prevent from issuing new bus request form L2 memory system
+	 * If core status is power down, should be set '1' to L2 power down
+	 */
+	value = pmu_raw_readl(EXYNOS3_ARM_COMMON_OPTION);
+	value |= EXYNOS3_OPTION_SKIP_DEACTIVATE_ACEACP_IN_PWDN;
+	pmu_raw_writel(value, EXYNOS3_ARM_COMMON_OPTION);
+
+	/* Enable USE_STANDBY_WFI for all CORE */
+	pmu_raw_writel(S5P_USE_STANDBY_WFI_ALL, S5P_CENTRAL_SEQ_OPTION);
+
+	/*
+	 * Set PSHOLD port for output high
+	 */
+	value = pmu_raw_readl(S5P_PS_HOLD_CONTROL);
+	value |= S5P_PS_HOLD_OUTPUT_HIGH;
+	pmu_raw_writel(value, S5P_PS_HOLD_CONTROL);
+
+	/*
+	 * Enable signal for PSHOLD port
+	 */
+	value = pmu_raw_readl(S5P_PS_HOLD_CONTROL);
+	value |= S5P_PS_HOLD_EN;
+	pmu_raw_writel(value, S5P_PS_HOLD_CONTROL);
+}
+
+const struct exynos_pmu_data exynos3250_pmu_data = {
+	.pmu_config	= exynos3250_pmu_config,
+	.pmu_init	= exynos3250_pmu_init,
+	.powerdown_conf_extra	= exynos3250_powerdown_conf_extra,
+};
diff --git a/drivers/soc/samsung/exynos4-pmu.c b/drivers/soc/samsung/exynos4-pmu.c
new file mode 100644
index 000000000000..bc4fa73bed11
--- /dev/null
+++ b/drivers/soc/samsung/exynos4-pmu.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2011-2015 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com/
+ *
+ * EXYNOS4 - CPU PMU(Power Management Unit) support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/soc/samsung/exynos-regs-pmu.h>
+#include <linux/soc/samsung/exynos-pmu.h>
+
+#include "exynos-pmu.h"
+
+static const struct exynos_pmu_conf exynos4210_pmu_config[] = {
+	/* { .offset = offset, .val = { AFTR, LPA, SLEEP } */
+	{ S5P_ARM_CORE0_LOWPWR,			{ 0x0, 0x0, 0x2 } },
+	{ S5P_DIS_IRQ_CORE0,			{ 0x0, 0x0, 0x0 } },
+	{ S5P_DIS_IRQ_CENTRAL0,			{ 0x0, 0x0, 0x0 } },
+	{ S5P_ARM_CORE1_LOWPWR,			{ 0x0, 0x0, 0x2 } },
+	{ S5P_DIS_IRQ_CORE1,			{ 0x0, 0x0, 0x0 } },
+	{ S5P_DIS_IRQ_CENTRAL1,			{ 0x0, 0x0, 0x0 } },
+	{ S5P_ARM_COMMON_LOWPWR,		{ 0x0, 0x0, 0x2 } },
+	{ S5P_L2_0_LOWPWR,			{ 0x2, 0x2, 0x3 } },
+	{ S5P_L2_1_LOWPWR,			{ 0x2, 0x2, 0x3 } },
+	{ S5P_CMU_ACLKSTOP_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_SCLKSTOP_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_RESET_LOWPWR,			{ 0x1, 0x1, 0x0 } },
+	{ S5P_APLL_SYSCLK_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_MPLL_SYSCLK_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_VPLL_SYSCLK_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_EPLL_SYSCLK_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_CLKSTOP_GPS_ALIVE_LOWPWR,	{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_RESET_GPSALIVE_LOWPWR,	{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_CLKSTOP_CAM_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_CLKSTOP_TV_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_CLKSTOP_MFC_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_CLKSTOP_G3D_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_CLKSTOP_LCD0_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_CLKSTOP_LCD1_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_CLKSTOP_MAUDIO_LOWPWR,	{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_CLKSTOP_GPS_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_RESET_CAM_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_RESET_TV_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_RESET_MFC_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_RESET_G3D_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_RESET_LCD0_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_RESET_LCD1_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_RESET_MAUDIO_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_RESET_GPS_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_TOP_BUS_LOWPWR,			{ 0x3, 0x0, 0x0 } },
+	{ S5P_TOP_RETENTION_LOWPWR,		{ 0x1, 0x0, 0x1 } },
+	{ S5P_TOP_PWR_LOWPWR,			{ 0x3, 0x0, 0x3 } },
+	{ S5P_LOGIC_RESET_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_ONENAND_MEM_LOWPWR,		{ 0x3, 0x0, 0x0 } },
+	{ S5P_MODIMIF_MEM_LOWPWR,		{ 0x3, 0x0, 0x0 } },
+	{ S5P_G2D_ACP_MEM_LOWPWR,		{ 0x3, 0x0, 0x0 } },
+	{ S5P_USBOTG_MEM_LOWPWR,		{ 0x3, 0x0, 0x0 } },
+	{ S5P_HSMMC_MEM_LOWPWR,			{ 0x3, 0x0, 0x0 } },
+	{ S5P_CSSYS_MEM_LOWPWR,			{ 0x3, 0x0, 0x0 } },
+	{ S5P_SECSS_MEM_LOWPWR,			{ 0x3, 0x0, 0x0 } },
+	{ S5P_PCIE_MEM_LOWPWR,			{ 0x3, 0x0, 0x0 } },
+	{ S5P_SATA_MEM_LOWPWR,			{ 0x3, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_DRAM_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_MAUDIO_LOWPWR,	{ 0x1, 0x1, 0x0 } },
+	{ S5P_PAD_RETENTION_GPIO_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_UART_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_MMCA_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_MMCB_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_EBIA_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_EBIB_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_ISOLATION_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_ALV_SEL_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_XUSBXTI_LOWPWR,			{ 0x1, 0x1, 0x0 } },
+	{ S5P_XXTI_LOWPWR,			{ 0x1, 0x1, 0x0 } },
+	{ S5P_EXT_REGULATOR_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_GPIO_MODE_LOWPWR,			{ 0x1, 0x0, 0x0 } },
+	{ S5P_GPIO_MODE_MAUDIO_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CAM_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_TV_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_MFC_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_G3D_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_LCD0_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_LCD1_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_MAUDIO_LOWPWR,			{ 0x7, 0x7, 0x0 } },
+	{ S5P_GPS_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_GPS_ALIVE_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ PMU_TABLE_END,},
+};
+
+static const struct exynos_pmu_conf exynos4x12_pmu_config[] = {
+	{ S5P_ARM_CORE0_LOWPWR,			{ 0x0, 0x0, 0x2 } },
+	{ S5P_DIS_IRQ_CORE0,			{ 0x0, 0x0, 0x0 } },
+	{ S5P_DIS_IRQ_CENTRAL0,			{ 0x0, 0x0, 0x0 } },
+	{ S5P_ARM_CORE1_LOWPWR,			{ 0x0, 0x0, 0x2 } },
+	{ S5P_DIS_IRQ_CORE1,			{ 0x0, 0x0, 0x0 } },
+	{ S5P_DIS_IRQ_CENTRAL1,			{ 0x0, 0x0, 0x0 } },
+	{ S5P_ISP_ARM_LOWPWR,			{ 0x1, 0x0, 0x0 } },
+	{ S5P_DIS_IRQ_ISP_ARM_LOCAL_LOWPWR,	{ 0x0, 0x0, 0x0 } },
+	{ S5P_DIS_IRQ_ISP_ARM_CENTRAL_LOWPWR,	{ 0x0, 0x0, 0x0 } },
+	{ S5P_ARM_COMMON_LOWPWR,		{ 0x0, 0x0, 0x2 } },
+	{ S5P_L2_0_LOWPWR,			{ 0x0, 0x0, 0x3 } },
+	/* XXX_OPTION register should be set other field */
+	{ S5P_ARM_L2_0_OPTION,			{ 0x10, 0x10, 0x0 } },
+	{ S5P_L2_1_LOWPWR,			{ 0x0, 0x0, 0x3 } },
+	{ S5P_ARM_L2_1_OPTION,			{ 0x10, 0x10, 0x0 } },
+	{ S5P_CMU_ACLKSTOP_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_SCLKSTOP_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_RESET_LOWPWR,			{ 0x1, 0x1, 0x0 } },
+	{ S5P_DRAM_FREQ_DOWN_LOWPWR,		{ 0x1, 0x1, 0x1 } },
+	{ S5P_DDRPHY_DLLOFF_LOWPWR,		{ 0x1, 0x1, 0x1 } },
+	{ S5P_LPDDR_PHY_DLL_LOCK_LOWPWR,	{ 0x1, 0x1, 0x1 } },
+	{ S5P_CMU_ACLKSTOP_COREBLK_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_SCLKSTOP_COREBLK_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_RESET_COREBLK_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_APLL_SYSCLK_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_MPLL_SYSCLK_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_VPLL_SYSCLK_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_EPLL_SYSCLK_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_MPLLUSER_SYSCLK_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_CLKSTOP_GPS_ALIVE_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_RESET_GPSALIVE_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_CLKSTOP_CAM_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_CLKSTOP_TV_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_CLKSTOP_MFC_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_CLKSTOP_G3D_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_CLKSTOP_LCD0_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_CLKSTOP_ISP_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_CLKSTOP_MAUDIO_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_CLKSTOP_GPS_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_RESET_CAM_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_RESET_TV_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_RESET_MFC_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_RESET_G3D_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_RESET_LCD0_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_RESET_ISP_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_RESET_MAUDIO_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_CMU_RESET_GPS_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_TOP_BUS_LOWPWR,			{ 0x3, 0x0, 0x0 } },
+	{ S5P_TOP_RETENTION_LOWPWR,		{ 0x1, 0x0, 0x1 } },
+	{ S5P_TOP_PWR_LOWPWR,			{ 0x3, 0x0, 0x3 } },
+	{ S5P_TOP_BUS_COREBLK_LOWPWR,		{ 0x3, 0x0, 0x0 } },
+	{ S5P_TOP_RETENTION_COREBLK_LOWPWR,	{ 0x1, 0x0, 0x1 } },
+	{ S5P_TOP_PWR_COREBLK_LOWPWR,		{ 0x3, 0x0, 0x3 } },
+	{ S5P_LOGIC_RESET_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_OSCCLK_GATE_LOWPWR,		{ 0x1, 0x0, 0x1 } },
+	{ S5P_LOGIC_RESET_COREBLK_LOWPWR,	{ 0x1, 0x1, 0x0 } },
+	{ S5P_OSCCLK_GATE_COREBLK_LOWPWR,	{ 0x1, 0x0, 0x1 } },
+	{ S5P_ONENAND_MEM_LOWPWR,		{ 0x3, 0x0, 0x0 } },
+	{ S5P_ONENAND_MEM_OPTION,		{ 0x10, 0x10, 0x0 } },
+	{ S5P_HSI_MEM_LOWPWR,			{ 0x3, 0x0, 0x0 } },
+	{ S5P_HSI_MEM_OPTION,			{ 0x10, 0x10, 0x0 } },
+	{ S5P_G2D_ACP_MEM_LOWPWR,		{ 0x3, 0x0, 0x0 } },
+	{ S5P_G2D_ACP_MEM_OPTION,		{ 0x10, 0x10, 0x0 } },
+	{ S5P_USBOTG_MEM_LOWPWR,		{ 0x3, 0x0, 0x0 } },
+	{ S5P_USBOTG_MEM_OPTION,		{ 0x10, 0x10, 0x0 } },
+	{ S5P_HSMMC_MEM_LOWPWR,			{ 0x3, 0x0, 0x0 } },
+	{ S5P_HSMMC_MEM_OPTION,			{ 0x10, 0x10, 0x0 } },
+	{ S5P_CSSYS_MEM_LOWPWR,			{ 0x3, 0x0, 0x0 } },
+	{ S5P_CSSYS_MEM_OPTION,			{ 0x10, 0x10, 0x0 } },
+	{ S5P_SECSS_MEM_LOWPWR,			{ 0x3, 0x0, 0x0 } },
+	{ S5P_SECSS_MEM_OPTION,			{ 0x10, 0x10, 0x0 } },
+	{ S5P_ROTATOR_MEM_LOWPWR,		{ 0x3, 0x0, 0x0 } },
+	{ S5P_ROTATOR_MEM_OPTION,		{ 0x10, 0x10, 0x0 } },
+	{ S5P_PAD_RETENTION_DRAM_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_MAUDIO_LOWPWR,	{ 0x1, 0x1, 0x0 } },
+	{ S5P_PAD_RETENTION_GPIO_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_UART_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_MMCA_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_MMCB_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_EBIA_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_EBIB_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_GPIO_COREBLK_LOWPWR, { 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_ISOLATION_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_ISOLATION_COREBLK_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_PAD_RETENTION_ALV_SEL_LOWPWR,	{ 0x1, 0x0, 0x0 } },
+	{ S5P_XUSBXTI_LOWPWR,			{ 0x1, 0x1, 0x0 } },
+	{ S5P_XXTI_LOWPWR,			{ 0x1, 0x1, 0x0 } },
+	{ S5P_EXT_REGULATOR_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_GPIO_MODE_LOWPWR,			{ 0x1, 0x0, 0x0 } },
+	{ S5P_GPIO_MODE_COREBLK_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_GPIO_MODE_MAUDIO_LOWPWR,		{ 0x1, 0x1, 0x0 } },
+	{ S5P_TOP_ASB_RESET_LOWPWR,		{ 0x1, 0x1, 0x1 } },
+	{ S5P_TOP_ASB_ISOLATION_LOWPWR,		{ 0x1, 0x0, 0x1 } },
+	{ S5P_CAM_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_TV_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_MFC_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_G3D_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_LCD0_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_ISP_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_MAUDIO_LOWPWR,			{ 0x7, 0x7, 0x0 } },
+	{ S5P_GPS_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_GPS_ALIVE_LOWPWR,			{ 0x7, 0x0, 0x0 } },
+	{ S5P_CMU_SYSCLK_ISP_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ S5P_CMU_SYSCLK_GPS_LOWPWR,		{ 0x1, 0x0, 0x0 } },
+	{ PMU_TABLE_END,},
+};
+
+static const struct exynos_pmu_conf exynos4412_pmu_config[] = {
+	{ S5P_ARM_CORE2_LOWPWR,			{ 0x0, 0x0, 0x2 } },
+	{ S5P_DIS_IRQ_CORE2,			{ 0x0, 0x0, 0x0 } },
+	{ S5P_DIS_IRQ_CENTRAL2,			{ 0x0, 0x0, 0x0 } },
+	{ S5P_ARM_CORE3_LOWPWR,			{ 0x0, 0x0, 0x2 } },
+	{ S5P_DIS_IRQ_CORE3,			{ 0x0, 0x0, 0x0 } },
+	{ S5P_DIS_IRQ_CENTRAL3,			{ 0x0, 0x0, 0x0 } },
+	{ PMU_TABLE_END,},
+};
+
+const struct exynos_pmu_data exynos4210_pmu_data = {
+	.pmu_config	= exynos4210_pmu_config,
+};
+
+const struct exynos_pmu_data exynos4212_pmu_data = {
+	.pmu_config	= exynos4x12_pmu_config,
+};
+
+const struct exynos_pmu_data exynos4412_pmu_data = {
+	.pmu_config		= exynos4x12_pmu_config,
+	.pmu_config_extra	= exynos4412_pmu_config,
+};
diff --git a/drivers/soc/samsung/exynos5250-pmu.c b/drivers/soc/samsung/exynos5250-pmu.c
new file mode 100644
index 000000000000..3fac42561964
--- /dev/null
+++ b/drivers/soc/samsung/exynos5250-pmu.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2011-2015 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com/
+ *
+ * EXYNOS5250 - CPU PMU (Power Management Unit) support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/soc/samsung/exynos-regs-pmu.h>
+#include <linux/soc/samsung/exynos-pmu.h>
+
+#include "exynos-pmu.h"
+
+static const struct exynos_pmu_conf exynos5250_pmu_config[] = {
+	/* { .offset = offset, .val = { AFTR, LPA, SLEEP } */
+	{ EXYNOS5_ARM_CORE0_SYS_PWR_REG,		{ 0x0, 0x0, 0x2} },
+	{ EXYNOS5_DIS_IRQ_ARM_CORE0_LOCAL_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_DIS_IRQ_ARM_CORE0_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5_ARM_CORE1_SYS_PWR_REG,		{ 0x0, 0x0, 0x2} },
+	{ EXYNOS5_DIS_IRQ_ARM_CORE1_LOCAL_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_DIS_IRQ_ARM_CORE1_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5_FSYS_ARM_SYS_PWR_REG,			{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_DIS_IRQ_FSYS_ARM_CENTRAL_SYS_PWR_REG,	{ 0x1, 0x1, 0x1} },
+	{ EXYNOS5_ISP_ARM_SYS_PWR_REG,			{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_DIS_IRQ_ISP_ARM_LOCAL_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_DIS_IRQ_ISP_ARM_CENTRAL_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_ARM_COMMON_SYS_PWR_REG,		{ 0x0, 0x0, 0x2} },
+	{ EXYNOS5_ARM_L2_SYS_PWR_REG,			{ 0x3, 0x3, 0x3} },
+	{ EXYNOS5_ARM_L2_OPTION,			{ 0x10, 0x10, 0x0 } },
+	{ EXYNOS5_CMU_ACLKSTOP_SYS_PWR_REG,		{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_CMU_SCLKSTOP_SYS_PWR_REG,		{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_CMU_RESET_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_CMU_ACLKSTOP_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_CMU_SCLKSTOP_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_CMU_RESET_SYSMEM_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_DRAM_FREQ_DOWN_SYS_PWR_REG,		{ 0x1, 0x1, 0x1} },
+	{ EXYNOS5_DDRPHY_DLLOFF_SYS_PWR_REG,		{ 0x1, 0x1, 0x1} },
+	{ EXYNOS5_DDRPHY_DLLLOCK_SYS_PWR_REG,		{ 0x1, 0x1, 0x1} },
+	{ EXYNOS5_APLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_MPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_VPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_EPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_BPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_MPLLUSER_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_BPLLUSER_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_TOP_BUS_SYS_PWR_REG,			{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_TOP_RETENTION_SYS_PWR_REG,		{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_TOP_PWR_SYS_PWR_REG,			{ 0x3, 0x0, 0x3} },
+	{ EXYNOS5_TOP_BUS_SYSMEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_TOP_RETENTION_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_TOP_PWR_SYSMEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x3} },
+	{ EXYNOS5_LOGIC_RESET_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_OSCCLK_GATE_SYS_PWR_REG,		{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_LOGIC_RESET_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_OSCCLK_GATE_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_USBOTG_MEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_G2D_MEM_SYS_PWR_REG,			{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_USBDRD_MEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_SDMMC_MEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_CSSYS_MEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_SECSS_MEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_ROTATOR_MEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_INTRAM_MEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_INTROM_MEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_JPEG_MEM_SYS_PWR_REG,			{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_JPEG_MEM_OPTION,			{ 0x10, 0x10, 0x0} },
+	{ EXYNOS5_HSI_MEM_SYS_PWR_REG,			{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_MCUIOP_MEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_SATA_MEM_SYS_PWR_REG,			{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_PAD_RETENTION_DRAM_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_RETENTION_MAU_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_PAD_RETENTION_GPIO_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_RETENTION_UART_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_RETENTION_MMCA_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_RETENTION_MMCB_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_RETENTION_EBIA_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_RETENTION_EBIB_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_RETENTION_SPI_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_RETENTION_GPIO_SYSMEM_SYS_PWR_REG, { 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_ISOLATION_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_ISOLATION_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_ALV_SEL_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_XUSBXTI_SYS_PWR_REG,			{ 0x1, 0x1, 0x1} },
+	{ EXYNOS5_XXTI_SYS_PWR_REG,			{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_EXT_REGULATOR_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_GPIO_MODE_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_GPIO_MODE_SYSMEM_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_GPIO_MODE_MAU_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_TOP_ASB_RESET_SYS_PWR_REG,		{ 0x1, 0x1, 0x1} },
+	{ EXYNOS5_TOP_ASB_ISOLATION_SYS_PWR_REG,	{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_GSCL_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5_ISP_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5_MFC_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5_G3D_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5_DISP1_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5_MAU_SYS_PWR_REG,			{ 0x7, 0x7, 0x0} },
+	{ EXYNOS5_CMU_CLKSTOP_GSCL_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_CLKSTOP_ISP_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_CLKSTOP_MFC_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_CLKSTOP_G3D_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_CLKSTOP_DISP1_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_CLKSTOP_MAU_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_CMU_SYSCLK_GSCL_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_SYSCLK_ISP_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_SYSCLK_MFC_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_SYSCLK_G3D_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_SYSCLK_DISP1_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_SYSCLK_MAU_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_CMU_RESET_GSCL_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_RESET_ISP_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_RESET_MFC_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_RESET_G3D_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_RESET_DISP1_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_RESET_MAU_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ PMU_TABLE_END,},
+};
+
+static unsigned int const exynos5_list_both_cnt_feed[] = {
+	EXYNOS5_ARM_CORE0_OPTION,
+	EXYNOS5_ARM_CORE1_OPTION,
+	EXYNOS5_ARM_COMMON_OPTION,
+	EXYNOS5_GSCL_OPTION,
+	EXYNOS5_ISP_OPTION,
+	EXYNOS5_MFC_OPTION,
+	EXYNOS5_G3D_OPTION,
+	EXYNOS5_DISP1_OPTION,
+	EXYNOS5_MAU_OPTION,
+	EXYNOS5_TOP_PWR_OPTION,
+	EXYNOS5_TOP_PWR_SYSMEM_OPTION,
+};
+
+static unsigned int const exynos5_list_disable_wfi_wfe[] = {
+	EXYNOS5_ARM_CORE1_OPTION,
+	EXYNOS5_FSYS_ARM_OPTION,
+	EXYNOS5_ISP_ARM_OPTION,
+};
+
+static void exynos5250_pmu_init(void)
+{
+	unsigned int value;
+	/*
+	 * When SYS_WDTRESET is set, watchdog timer reset request
+	 * is ignored by power management unit.
+	 */
+	value = pmu_raw_readl(EXYNOS5_AUTO_WDTRESET_DISABLE);
+	value &= ~EXYNOS5_SYS_WDTRESET;
+	pmu_raw_writel(value, EXYNOS5_AUTO_WDTRESET_DISABLE);
+
+	value = pmu_raw_readl(EXYNOS5_MASK_WDTRESET_REQUEST);
+	value &= ~EXYNOS5_SYS_WDTRESET;
+	pmu_raw_writel(value, EXYNOS5_MASK_WDTRESET_REQUEST);
+}
+
+static void exynos5_powerdown_conf(enum sys_powerdown mode)
+{
+	unsigned int i;
+	unsigned int tmp;
+
+	/*
+	 * Enable both SC_FEEDBACK and SC_COUNTER
+	 */
+	for (i = 0; i < ARRAY_SIZE(exynos5_list_both_cnt_feed); i++) {
+		tmp = pmu_raw_readl(exynos5_list_both_cnt_feed[i]);
+		tmp |= (EXYNOS5_USE_SC_FEEDBACK |
+			EXYNOS5_USE_SC_COUNTER);
+		pmu_raw_writel(tmp, exynos5_list_both_cnt_feed[i]);
+	}
+
+	/*
+	 * SKIP_DEACTIVATE_ACEACP_IN_PWDN_BITFIELD Enable
+	 */
+	tmp = pmu_raw_readl(EXYNOS5_ARM_COMMON_OPTION);
+	tmp |= EXYNOS5_SKIP_DEACTIVATE_ACEACP_IN_PWDN;
+	pmu_raw_writel(tmp, EXYNOS5_ARM_COMMON_OPTION);
+
+	/*
+	 * Disable WFI/WFE on XXX_OPTION
+	 */
+	for (i = 0; i < ARRAY_SIZE(exynos5_list_disable_wfi_wfe); i++) {
+		tmp = pmu_raw_readl(exynos5_list_disable_wfi_wfe[i]);
+		tmp &= ~(EXYNOS5_OPTION_USE_STANDBYWFE |
+			 EXYNOS5_OPTION_USE_STANDBYWFI);
+		pmu_raw_writel(tmp, exynos5_list_disable_wfi_wfe[i]);
+	}
+}
+
+const struct exynos_pmu_data exynos5250_pmu_data = {
+	.pmu_config	= exynos5250_pmu_config,
+	.pmu_init	= exynos5250_pmu_init,
+	.powerdown_conf	= exynos5_powerdown_conf,
+};
diff --git a/drivers/soc/samsung/exynos5420-pmu.c b/drivers/soc/samsung/exynos5420-pmu.c
new file mode 100644
index 000000000000..b962fb6a5d22
--- /dev/null
+++ b/drivers/soc/samsung/exynos5420-pmu.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) 2011-2015 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com/
+ *
+ * EXYNOS5420 - CPU PMU (Power Management Unit) support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/pm.h>
+#include <linux/soc/samsung/exynos-regs-pmu.h>
+#include <linux/soc/samsung/exynos-pmu.h>
+
+#include <asm/cputype.h>
+
+#include "exynos-pmu.h"
+
+static struct exynos_pmu_conf exynos5420_pmu_config[] = {
+	/* { .offset = offset, .val = { AFTR, LPA, SLEEP } */
+	{ EXYNOS5_ARM_CORE0_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_DIS_IRQ_ARM_CORE0_LOCAL_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_DIS_IRQ_ARM_CORE0_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5_ARM_CORE1_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_DIS_IRQ_ARM_CORE1_LOCAL_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_DIS_IRQ_ARM_CORE1_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_ARM_CORE2_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_ARM_CORE2_LOCAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_ARM_CORE2_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_ARM_CORE3_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_ARM_CORE3_LOCAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_ARM_CORE3_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_KFC_CORE0_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_KFC_CORE0_LOCAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_KFC_CORE0_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_KFC_CORE1_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_KFC_CORE1_LOCAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_KFC_CORE1_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_KFC_CORE2_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_KFC_CORE2_LOCAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_KFC_CORE2_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_KFC_CORE3_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_KFC_CORE3_LOCAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_DIS_IRQ_KFC_CORE3_CENTRAL_SYS_PWR_REG, { 0x0, 0x0, 0x0} },
+	{ EXYNOS5_ISP_ARM_SYS_PWR_REG,			{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_DIS_IRQ_ISP_ARM_LOCAL_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_DIS_IRQ_ISP_ARM_CENTRAL_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_ARM_COMMON_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_KFC_COMMON_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_ARM_L2_SYS_PWR_REG,			{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_KFC_L2_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_ACLKSTOP_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_SCLKSTOP_SYS_PWR_REG,		{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_CMU_RESET_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_CMU_ACLKSTOP_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CMU_SCLKSTOP_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_CMU_RESET_SYSMEM_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_DRAM_FREQ_DOWN_SYS_PWR_REG,		{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_DDRPHY_DLLOFF_SYS_PWR_REG,		{ 0x1, 0x1, 0x1} },
+	{ EXYNOS5_DDRPHY_DLLLOCK_SYS_PWR_REG,		{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_APLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_MPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_VPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_EPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_BPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_CPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_DPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_IPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_KPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_MPLLUSER_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_BPLLUSER_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_RPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_SPLL_SYSCLK_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_TOP_BUS_SYS_PWR_REG,			{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_TOP_RETENTION_SYS_PWR_REG,		{ 0x1, 0x1, 0x1} },
+	{ EXYNOS5_TOP_PWR_SYS_PWR_REG,			{ 0x3, 0x3, 0x0} },
+	{ EXYNOS5_TOP_BUS_SYSMEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_TOP_RETENTION_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_TOP_PWR_SYSMEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x0} },
+	{ EXYNOS5_LOGIC_RESET_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_OSCCLK_GATE_SYS_PWR_REG,		{ 0x1, 0x0, 0x1} },
+	{ EXYNOS5_LOGIC_RESET_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_OSCCLK_GATE_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_INTRAM_MEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x3} },
+	{ EXYNOS5420_INTROM_MEM_SYS_PWR_REG,		{ 0x3, 0x0, 0x3} },
+	{ EXYNOS5_PAD_RETENTION_DRAM_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_RETENTION_MAU_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5420_PAD_RETENTION_JTAG_SYS_PWR_REG,	{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5420_PAD_RETENTION_DRAM_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_PAD_RETENTION_UART_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_PAD_RETENTION_MMC0_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_PAD_RETENTION_MMC1_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_PAD_RETENTION_MMC2_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_PAD_RETENTION_HSI_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_PAD_RETENTION_EBIA_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_PAD_RETENTION_EBIB_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_PAD_RETENTION_SPI_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5420_PAD_RETENTION_DRAM_COREBLK_SYS_PWR_REG, { 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_ISOLATION_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_PAD_ISOLATION_SYSMEM_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_PAD_ALV_SEL_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_XUSBXTI_SYS_PWR_REG,			{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_XXTI_SYS_PWR_REG,			{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_EXT_REGULATOR_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_GPIO_MODE_SYS_PWR_REG,		{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_GPIO_MODE_SYSMEM_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_GPIO_MODE_MAU_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_TOP_ASB_RESET_SYS_PWR_REG,		{ 0x1, 0x1, 0x0} },
+	{ EXYNOS5_TOP_ASB_ISOLATION_SYS_PWR_REG,	{ 0x1, 0x0, 0x0} },
+	{ EXYNOS5_GSCL_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5_ISP_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5_MFC_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5_G3D_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5420_DISP1_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5420_MAU_SYS_PWR_REG,			{ 0x7, 0x7, 0x0} },
+	{ EXYNOS5420_G2D_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5420_MSC_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5420_FSYS_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5420_FSYS2_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5420_PSGEN_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5420_PERIC_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5420_WCORE_SYS_PWR_REG,			{ 0x7, 0x0, 0x0} },
+	{ EXYNOS5_CMU_CLKSTOP_GSCL_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_CLKSTOP_ISP_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_CLKSTOP_MFC_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_CLKSTOP_G3D_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_CLKSTOP_DISP1_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_CLKSTOP_MAU_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_CLKSTOP_G2D_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_CLKSTOP_MSC_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_CLKSTOP_FSYS_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_CLKSTOP_PSGEN_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_CLKSTOP_PERIC_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_CLKSTOP_WCORE_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_SYSCLK_GSCL_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_SYSCLK_ISP_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_SYSCLK_MFC_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_SYSCLK_G3D_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_SYSCLK_DISP1_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_SYSCLK_MAU_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_SYSCLK_G2D_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_SYSCLK_MSC_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_SYSCLK_FSYS_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_SYSCLK_FSYS2_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_SYSCLK_PSGEN_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_SYSCLK_PERIC_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_SYSCLK_WCORE_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_RESET_FSYS2_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_RESET_PSGEN_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_RESET_PERIC_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_RESET_WCORE_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_RESET_GSCL_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_RESET_ISP_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_RESET_MFC_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5_CMU_RESET_G3D_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_RESET_DISP1_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_RESET_MAU_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_RESET_G2D_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_RESET_MSC_SYS_PWR_REG,		{ 0x0, 0x0, 0x0} },
+	{ EXYNOS5420_CMU_RESET_FSYS_SYS_PWR_REG,	{ 0x0, 0x0, 0x0} },
+	{ PMU_TABLE_END,},
+};
+
+static unsigned int const exynos5420_list_disable_pmu_reg[] = {
+	EXYNOS5_CMU_CLKSTOP_GSCL_SYS_PWR_REG,
+	EXYNOS5_CMU_CLKSTOP_ISP_SYS_PWR_REG,
+	EXYNOS5_CMU_CLKSTOP_G3D_SYS_PWR_REG,
+	EXYNOS5420_CMU_CLKSTOP_DISP1_SYS_PWR_REG,
+	EXYNOS5420_CMU_CLKSTOP_MAU_SYS_PWR_REG,
+	EXYNOS5420_CMU_CLKSTOP_G2D_SYS_PWR_REG,
+	EXYNOS5420_CMU_CLKSTOP_MSC_SYS_PWR_REG,
+	EXYNOS5420_CMU_CLKSTOP_FSYS_SYS_PWR_REG,
+	EXYNOS5420_CMU_CLKSTOP_PSGEN_SYS_PWR_REG,
+	EXYNOS5420_CMU_CLKSTOP_PERIC_SYS_PWR_REG,
+	EXYNOS5420_CMU_CLKSTOP_WCORE_SYS_PWR_REG,
+	EXYNOS5_CMU_SYSCLK_GSCL_SYS_PWR_REG,
+	EXYNOS5_CMU_SYSCLK_ISP_SYS_PWR_REG,
+	EXYNOS5_CMU_SYSCLK_G3D_SYS_PWR_REG,
+	EXYNOS5420_CMU_SYSCLK_DISP1_SYS_PWR_REG,
+	EXYNOS5420_CMU_SYSCLK_MAU_SYS_PWR_REG,
+	EXYNOS5420_CMU_SYSCLK_G2D_SYS_PWR_REG,
+	EXYNOS5420_CMU_SYSCLK_MSC_SYS_PWR_REG,
+	EXYNOS5420_CMU_SYSCLK_FSYS_SYS_PWR_REG,
+	EXYNOS5420_CMU_SYSCLK_FSYS2_SYS_PWR_REG,
+	EXYNOS5420_CMU_SYSCLK_PSGEN_SYS_PWR_REG,
+	EXYNOS5420_CMU_SYSCLK_PERIC_SYS_PWR_REG,
+	EXYNOS5420_CMU_SYSCLK_WCORE_SYS_PWR_REG,
+	EXYNOS5420_CMU_RESET_FSYS2_SYS_PWR_REG,
+	EXYNOS5420_CMU_RESET_PSGEN_SYS_PWR_REG,
+	EXYNOS5420_CMU_RESET_PERIC_SYS_PWR_REG,
+	EXYNOS5420_CMU_RESET_WCORE_SYS_PWR_REG,
+	EXYNOS5_CMU_RESET_GSCL_SYS_PWR_REG,
+	EXYNOS5_CMU_RESET_ISP_SYS_PWR_REG,
+	EXYNOS5_CMU_RESET_G3D_SYS_PWR_REG,
+	EXYNOS5420_CMU_RESET_DISP1_SYS_PWR_REG,
+	EXYNOS5420_CMU_RESET_MAU_SYS_PWR_REG,
+	EXYNOS5420_CMU_RESET_G2D_SYS_PWR_REG,
+	EXYNOS5420_CMU_RESET_MSC_SYS_PWR_REG,
+	EXYNOS5420_CMU_RESET_FSYS_SYS_PWR_REG,
+};
+
+static void exynos5420_powerdown_conf(enum sys_powerdown mode)
+{
+	u32 this_cluster;
+
+	this_cluster = MPIDR_AFFINITY_LEVEL(read_cpuid_mpidr(), 1);
+
+	/*
+	 * set the cluster id to IROM register to ensure that we wake
+	 * up with the current cluster.
+	 */
+	pmu_raw_writel(this_cluster, EXYNOS_IROM_DATA2);
+}
+
+static void exynos5420_pmu_init(void)
+{
+	unsigned int value;
+	int i;
+
+	/*
+	 * Set the CMU_RESET, CMU_SYSCLK and CMU_CLKSTOP registers
+	 * for local power blocks to Low initially as per Table 8-4:
+	 * "System-Level Power-Down Configuration Registers".
+	 */
+	for (i = 0; i < ARRAY_SIZE(exynos5420_list_disable_pmu_reg); i++)
+		pmu_raw_writel(0, exynos5420_list_disable_pmu_reg[i]);
+
+	/* Enable USE_STANDBY_WFI for all CORE */
+	pmu_raw_writel(EXYNOS5420_USE_STANDBY_WFI_ALL, S5P_CENTRAL_SEQ_OPTION);
+
+	value  = pmu_raw_readl(EXYNOS_L2_OPTION(0));
+	value &= ~EXYNOS5_USE_RETENTION;
+	pmu_raw_writel(value, EXYNOS_L2_OPTION(0));
+
+	value = pmu_raw_readl(EXYNOS_L2_OPTION(1));
+	value &= ~EXYNOS5_USE_RETENTION;
+	pmu_raw_writel(value, EXYNOS_L2_OPTION(1));
+
+	/*
+	 * If L2_COMMON is turned off, clocks related to ATB async
+	 * bridge are gated. Thus, when ISP power is gated, LPI
+	 * may get stuck.
+	 */
+	value = pmu_raw_readl(EXYNOS5420_LPI_MASK);
+	value |= EXYNOS5420_ATB_ISP_ARM;
+	pmu_raw_writel(value, EXYNOS5420_LPI_MASK);
+
+	value  = pmu_raw_readl(EXYNOS5420_LPI_MASK1);
+	value |= EXYNOS5420_ATB_KFC;
+	pmu_raw_writel(value, EXYNOS5420_LPI_MASK1);
+
+	/* Prevent issue of new bus request from L2 memory */
+	value = pmu_raw_readl(EXYNOS5420_ARM_COMMON_OPTION);
+	value |= EXYNOS5_SKIP_DEACTIVATE_ACEACP_IN_PWDN;
+	pmu_raw_writel(value, EXYNOS5420_ARM_COMMON_OPTION);
+
+	value = pmu_raw_readl(EXYNOS5420_KFC_COMMON_OPTION);
+	value |= EXYNOS5_SKIP_DEACTIVATE_ACEACP_IN_PWDN;
+	pmu_raw_writel(value, EXYNOS5420_KFC_COMMON_OPTION);
+
+	/* This setting is to reduce suspend/resume time */
+	pmu_raw_writel(DUR_WAIT_RESET, EXYNOS5420_LOGIC_RESET_DURATION3);
+
+	/* Serialized CPU wakeup of Eagle */
+	pmu_raw_writel(SPREAD_ENABLE, EXYNOS5420_ARM_INTR_SPREAD_ENABLE);
+
+	pmu_raw_writel(SPREAD_USE_STANDWFI,
+			EXYNOS5420_ARM_INTR_SPREAD_USE_STANDBYWFI);
+
+	pmu_raw_writel(0x1, EXYNOS5420_UP_SCHEDULER);
+
+	pr_info("EXYNOS5420 PMU initialized\n");
+}
+
+const struct exynos_pmu_data exynos5420_pmu_data = {
+	.pmu_config	= exynos5420_pmu_config,
+	.pmu_init	= exynos5420_pmu_init,
+	.powerdown_conf	= exynos5420_powerdown_conf,
+};
diff --git a/drivers/soc/sunxi/sunxi_sram.c b/drivers/soc/sunxi/sunxi_sram.c
index bc52670c8f4b..99e354c8f53f 100644
--- a/drivers/soc/sunxi/sunxi_sram.c
+++ b/drivers/soc/sunxi/sunxi_sram.c
@@ -117,7 +117,7 @@ static int sunxi_sram_show(struct seq_file *s, void *data)
 
 			val = readl(base + sram_data->reg);
 			val >>= sram_data->offset;
-			val &= sram_data->width;
+			val &= GENMASK(sram_data->width - 1, 0);
 
 			for (func = sram_data->func; func->func; func++) {
 				seq_printf(s, "\t\t%s%c\n", func->func,
@@ -208,7 +208,8 @@ int sunxi_sram_claim(struct device *dev)
 		return -EBUSY;
 	}
 
-	mask = GENMASK(sram_data->offset + sram_data->width, sram_data->offset);
+	mask = GENMASK(sram_data->offset + sram_data->width - 1,
+		       sram_data->offset);
 	val = readl(base + sram_data->reg);
 	val &= ~mask;
 	writel(val | ((device << sram_data->offset) & mask),
diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h
index 6ff936cacb70..905b974d1bdc 100644
--- a/drivers/soc/ti/knav_qmss.h
+++ b/drivers/soc/ti/knav_qmss.h
@@ -93,13 +93,13 @@ struct knav_reg_pdsp_regs {
 struct knav_reg_acc_command {
 	u32		command;
 	u32		queue_mask;
-	u32		list_phys;
+	u32		list_dma;
 	u32		queue_num;
 	u32		timer_config;
 };
 
 struct knav_link_ram_block {
-	dma_addr_t	 phys;
+	dma_addr_t	 dma;
 	void		*virt;
 	size_t		 size;
 };
diff --git a/drivers/soc/ti/knav_qmss_acc.c b/drivers/soc/ti/knav_qmss_acc.c
index d2d48f2802bc..0612ebae0a09 100644
--- a/drivers/soc/ti/knav_qmss_acc.c
+++ b/drivers/soc/ti/knav_qmss_acc.c
@@ -122,8 +122,8 @@ static irqreturn_t knav_acc_int_handler(int irq, void *_instdata)
 	channel = acc->channel;
 	list_dma = acc->list_dma[acc->list_index];
 	list_cpu = acc->list_cpu[acc->list_index];
-	dev_dbg(kdev->dev, "acc-irq: channel %d, list %d, virt %p, phys %x\n",
-		channel, acc->list_index, list_cpu, list_dma);
+	dev_dbg(kdev->dev, "acc-irq: channel %d, list %d, virt %p, dma %pad\n",
+		channel, acc->list_index, list_cpu, &list_dma);
 	if (atomic_read(&acc->retrigger_count)) {
 		atomic_dec(&acc->retrigger_count);
 		__knav_acc_notify(range, acc);
@@ -297,12 +297,12 @@ knav_acc_write(struct knav_device *kdev, struct knav_pdsp_info *pdsp,
 	u32 result;
 
 	dev_dbg(kdev->dev, "acc command %08x %08x %08x %08x %08x\n",
-		cmd->command, cmd->queue_mask, cmd->list_phys,
+		cmd->command, cmd->queue_mask, cmd->list_dma,
 		cmd->queue_num, cmd->timer_config);
 
 	writel_relaxed(cmd->timer_config, &pdsp->acc_command->timer_config);
 	writel_relaxed(cmd->queue_num, &pdsp->acc_command->queue_num);
-	writel_relaxed(cmd->list_phys, &pdsp->acc_command->list_phys);
+	writel_relaxed(cmd->list_dma, &pdsp->acc_command->list_dma);
 	writel_relaxed(cmd->queue_mask, &pdsp->acc_command->queue_mask);
 	writel_relaxed(cmd->command, &pdsp->acc_command->command);
 
@@ -337,7 +337,7 @@ static void knav_acc_setup_cmd(struct knav_device *kdev,
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->command    = acc->channel;
 	cmd->queue_mask = queue_mask;
-	cmd->list_phys  = acc->list_dma[0];
+	cmd->list_dma   = (u32)acc->list_dma[0];
 	cmd->queue_num  = info->list_entries << 16;
 	cmd->queue_num |= queue_base;
 
@@ -591,8 +591,8 @@ int knav_init_acc_range(struct knav_device *kdev,
 		acc->list_cpu[1] = list_mem + list_size;
 		acc->list_dma[0] = list_dma;
 		acc->list_dma[1] = list_dma + list_size;
-		dev_dbg(kdev->dev, "%s: channel %d, phys %08x, virt %8p\n",
-			acc->name, acc->channel, list_dma, list_mem);
+		dev_dbg(kdev->dev, "%s: channel %d, dma %pad, virt %8p\n",
+			acc->name, acc->channel, &list_dma, list_mem);
 	}
 
 	range->ops = &knav_acc_range_ops;
diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
index 8c03a80b482d..b73e3534f67b 100644
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -1023,9 +1023,9 @@ static void knav_queue_setup_region(struct knav_device *kdev,
 	list_add(&pool->region_inst, &region->pools);
 
 	dev_dbg(kdev->dev,
-		"region %s (%d): size:%d, link:%d@%d, phys:%08x-%08x, virt:%p-%p\n",
+		"region %s (%d): size:%d, link:%d@%d, dma:%pad-%pad, virt:%p-%p\n",
 		region->name, id, region->desc_size, region->num_desc,
-		region->link_index, region->dma_start, region->dma_end,
+		region->link_index, &region->dma_start, &region->dma_end,
 		region->virt_start, region->virt_end);
 
 	hw_desc_size = (region->desc_size / 16) - 1;
@@ -1033,7 +1033,7 @@ static void knav_queue_setup_region(struct knav_device *kdev,
 
 	for_each_qmgr(kdev, qmgr) {
 		regs = qmgr->reg_region + id;
-		writel_relaxed(region->dma_start, &regs->base);
+		writel_relaxed((u32)region->dma_start, &regs->base);
 		writel_relaxed(region->link_index, &regs->start_index);
 		writel_relaxed(hw_desc_size << 16 | hw_num_desc,
 			       &regs->size_count);
@@ -1145,14 +1145,14 @@ static int knav_get_link_ram(struct knav_device *kdev,
 			 * queue_base specified => using internal or onchip
 			 * link ram WARNING - we do not "reserve" this block
 			 */
-			block->phys = (dma_addr_t)temp[0];
+			block->dma = (dma_addr_t)temp[0];
 			block->virt = NULL;
 			block->size = temp[1];
 		} else {
 			block->size = temp[1];
 			/* queue_base not specific => allocate requested size */
 			block->virt = dmam_alloc_coherent(kdev->dev,
-						  8 * block->size, &block->phys,
+						  8 * block->size, &block->dma,
 						  GFP_KERNEL);
 			if (!block->virt) {
 				dev_err(kdev->dev, "failed to alloc linkram\n");
@@ -1172,18 +1172,18 @@ static int knav_queue_setup_link_ram(struct knav_device *kdev)
 
 	for_each_qmgr(kdev, qmgr) {
 		block = &kdev->link_rams[0];
-		dev_dbg(kdev->dev, "linkram0: phys:%x, virt:%p, size:%x\n",
-			block->phys, block->virt, block->size);
-		writel_relaxed(block->phys, &qmgr->reg_config->link_ram_base0);
+		dev_dbg(kdev->dev, "linkram0: dma:%pad, virt:%p, size:%x\n",
+			&block->dma, block->virt, block->size);
+		writel_relaxed((u32)block->dma, &qmgr->reg_config->link_ram_base0);
 		writel_relaxed(block->size, &qmgr->reg_config->link_ram_size0);
 
 		block++;
 		if (!block->size)
 			continue;
 
-		dev_dbg(kdev->dev, "linkram1: phys:%x, virt:%p, size:%x\n",
-			block->phys, block->virt, block->size);
-		writel_relaxed(block->phys, &qmgr->reg_config->link_ram_base1);
+		dev_dbg(kdev->dev, "linkram1: dma:%pad, virt:%p, size:%x\n",
+			&block->dma, block->virt, block->size);
+		writel_relaxed(block->dma, &qmgr->reg_config->link_ram_base1);
 	}
 
 	return 0;
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index f0ca4a18b799..cf84581287b9 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -60,8 +60,6 @@ source "drivers/staging/emxx_udc/Kconfig"
 
 source "drivers/staging/speakup/Kconfig"
 
-source "drivers/staging/ste_rmi4/Kconfig"
-
 source "drivers/staging/nvec/Kconfig"
 
 source "drivers/staging/media/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 22464a09cb27..7d6448d20464 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -21,7 +21,6 @@ obj-$(CONFIG_FB_SM750)		+= sm750fb/
 obj-$(CONFIG_FB_XGI)		+= xgifb/
 obj-$(CONFIG_USB_EMXX)		+= emxx_udc/
 obj-$(CONFIG_SPEAKUP)		+= speakup/
-obj-$(CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4)	+= ste_rmi4/
 obj-$(CONFIG_MFD_NVEC)		+= nvec/
 obj-$(CONFIG_STAGING_RDMA)	+= rdma/
 obj-$(CONFIG_ANDROID)		+= android/
diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 8a8078f954d5..ca9a53c03f0f 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -385,8 +385,8 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
 	}
 
 	/* requested protection bits must match our allowed protection mask */
-	if (unlikely((vma->vm_flags & ~calc_vm_prot_bits(asma->prot_mask)) &
-		     calc_vm_prot_bits(PROT_MASK))) {
+	if (unlikely((vma->vm_flags & ~calc_vm_prot_bits(asma->prot_mask, 0)) &
+		     calc_vm_prot_bits(PROT_MASK, 0))) {
 		ret = -EPERM;
 		goto out;
 	}
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index 1c872bdfddf6..85365672c931 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -1123,8 +1123,7 @@ static void ion_dma_buf_kunmap(struct dma_buf *dmabuf, unsigned long offset,
 {
 }
 
-static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start,
-					size_t len,
+static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
 					enum dma_data_direction direction)
 {
 	struct ion_buffer *buffer = dmabuf->priv;
@@ -1142,15 +1141,16 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start,
 	return PTR_ERR_OR_ZERO(vaddr);
 }
 
-static void ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start,
-				       size_t len,
-				       enum dma_data_direction direction)
+static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+				      enum dma_data_direction direction)
 {
 	struct ion_buffer *buffer = dmabuf->priv;
 
 	mutex_lock(&buffer->lock);
 	ion_buffer_kmap_put(buffer);
 	mutex_unlock(&buffer->lock);
+
+	return 0;
 }
 
 static struct dma_buf_ops dma_buf_ops = {
diff --git a/drivers/staging/android/ion/ion_test.c b/drivers/staging/android/ion/ion_test.c
index b8dcf5a26cc4..83a3af06d01c 100644
--- a/drivers/staging/android/ion/ion_test.c
+++ b/drivers/staging/android/ion/ion_test.c
@@ -109,7 +109,7 @@ static int ion_handle_test_kernel(struct dma_buf *dma_buf, void __user *ptr,
 	if (offset > dma_buf->size || size > dma_buf->size - offset)
 		return -EINVAL;
 
-	ret = dma_buf_begin_cpu_access(dma_buf, offset, size, dir);
+	ret = dma_buf_begin_cpu_access(dma_buf, dir);
 	if (ret)
 		return ret;
 
@@ -139,7 +139,7 @@ static int ion_handle_test_kernel(struct dma_buf *dma_buf, void __user *ptr,
 		copy_offset = 0;
 	}
 err:
-	dma_buf_end_cpu_access(dma_buf, offset, size, dir);
+	dma_buf_end_cpu_access(dma_buf, dir);
 	return ret;
 }
 
@@ -285,8 +285,8 @@ static int __init ion_test_init(void)
 {
 	ion_test_pdev = platform_device_register_simple("ion-test",
 							-1, NULL, 0);
-	if (!ion_test_pdev)
-		return -ENODEV;
+	if (IS_ERR(ion_test_pdev))
+		return PTR_ERR(ion_test_pdev);
 
 	return platform_driver_probe(&ion_test_platform_driver, ion_test_probe);
 }
diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
index d1226c97664b..dcaf7e89f299 100644
--- a/drivers/staging/comedi/drivers/ni_mio_common.c
+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
@@ -246,24 +246,24 @@ static void ni_writel(struct comedi_device *dev, uint32_t data, int reg)
 {
 	if (dev->mmio)
 		writel(data, dev->mmio + reg);
-
-	outl(data, dev->iobase + reg);
+	else
+		outl(data, dev->iobase + reg);
 }
 
 static void ni_writew(struct comedi_device *dev, uint16_t data, int reg)
 {
 	if (dev->mmio)
 		writew(data, dev->mmio + reg);
-
-	outw(data, dev->iobase + reg);
+	else
+		outw(data, dev->iobase + reg);
 }
 
 static void ni_writeb(struct comedi_device *dev, uint8_t data, int reg)
 {
 	if (dev->mmio)
 		writeb(data, dev->mmio + reg);
-
-	outb(data, dev->iobase + reg);
+	else
+		outb(data, dev->iobase + reg);
 }
 
 static uint32_t ni_readl(struct comedi_device *dev, int reg)
diff --git a/drivers/staging/fsl-mc/bus/mc-bus.c b/drivers/staging/fsl-mc/bus/mc-bus.c
index 9f77c37bd612..b59455661f4d 100644
--- a/drivers/staging/fsl-mc/bus/mc-bus.c
+++ b/drivers/staging/fsl-mc/bus/mc-bus.c
@@ -260,14 +260,14 @@ static int get_dprc_icid(struct fsl_mc_io *mc_io,
 
 	error = dprc_open(mc_io, 0, container_id, &dprc_handle);
 	if (error < 0) {
-		dev_err(&mc_io->dev, "dprc_open() failed: %d\n", error);
+		dev_err(mc_io->dev, "dprc_open() failed: %d\n", error);
 		return error;
 	}
 
 	memset(&attr, 0, sizeof(attr));
 	error = dprc_get_attributes(mc_io, 0, dprc_handle, &attr);
 	if (error < 0) {
-		dev_err(&mc_io->dev, "dprc_get_attributes() failed: %d\n",
+		dev_err(mc_io->dev, "dprc_get_attributes() failed: %d\n",
 			error);
 		goto common_cleanup;
 	}
diff --git a/drivers/staging/fsl-mc/bus/mc-sys.c b/drivers/staging/fsl-mc/bus/mc-sys.c
index 8101c469abb0..810a611c1cb0 100644
--- a/drivers/staging/fsl-mc/bus/mc-sys.c
+++ b/drivers/staging/fsl-mc/bus/mc-sys.c
@@ -328,7 +328,7 @@ static int mc_polling_wait_preemptible(struct fsl_mc_io *mc_io,
 			     MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
 
 		if (time_after_eq(jiffies, jiffies_until_timeout)) {
-			dev_dbg(&mc_io->dev,
+			dev_dbg(mc_io->dev,
 				"MC command timed out (portal: %#llx, obj handle: %#x, command: %#x)\n",
 				 mc_io->portal_phys_addr,
 				 (unsigned int)
@@ -370,7 +370,7 @@ static int mc_polling_wait_atomic(struct fsl_mc_io *mc_io,
 		udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
 		timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
 		if (timeout_usecs == 0) {
-			dev_dbg(&mc_io->dev,
+			dev_dbg(mc_io->dev,
 				"MC command timed out (portal: %#llx, obj handle: %#x, command: %#x)\n",
 				 mc_io->portal_phys_addr,
 				 (unsigned int)
@@ -426,7 +426,7 @@ int mc_send_command(struct fsl_mc_io *mc_io, struct mc_command *cmd)
 		goto common_exit;
 
 	if (status != MC_CMD_STATUS_OK) {
-		dev_dbg(&mc_io->dev,
+		dev_dbg(mc_io->dev,
 			"MC command failed: portal: %#llx, obj handle: %#x, command: %#x, status: %s (%#x)\n",
 			 mc_io->portal_phys_addr,
 			 (unsigned int)MC_CMD_HDR_READ_TOKEN(cmd->header),
diff --git a/drivers/staging/lustre/lustre/Kconfig b/drivers/staging/lustre/lustre/Kconfig
index a09b51ce8265..8ac7cd4d6fdb 100644
--- a/drivers/staging/lustre/lustre/Kconfig
+++ b/drivers/staging/lustre/lustre/Kconfig
@@ -1,7 +1,7 @@
 config LUSTRE_FS
 	tristate "Lustre file system client support"
 	depends on m && !MIPS && !XTENSA && !SUPERH
-	select LNET
+	depends on LNET
 	select CRYPTO
 	select CRYPTO_CRC32
 	select CRYPTO_CRC32_PCLMUL if X86
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 973f5cdec192..3e1572cb457b 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -657,7 +657,7 @@ static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
 #if BITS_PER_LONG == 32
 	return 1;
 #elif defined(CONFIG_COMPAT)
-	return unlikely(is_compat_task() || (sbi->ll_flags & LL_SBI_32BIT_API));
+	return unlikely(in_compat_syscall() || (sbi->ll_flags & LL_SBI_32BIT_API));
 #else
 	return unlikely(sbi->ll_flags & LL_SBI_32BIT_API);
 #endif
diff --git a/drivers/staging/lustre/lustre/mgc/mgc_request.c b/drivers/staging/lustre/lustre/mgc/mgc_request.c
index 65caffe8c42e..b7dc87248032 100644
--- a/drivers/staging/lustre/lustre/mgc/mgc_request.c
+++ b/drivers/staging/lustre/lustre/mgc/mgc_request.c
@@ -1282,7 +1282,7 @@ static int mgc_apply_recover_logs(struct obd_device *mgc,
 
 		rc = -ENOMEM;
 		lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
-		if (!lcfg) {
+		if (IS_ERR(lcfg)) {
 			CERROR("mgc: cannot allocate memory\n");
 			break;
 		}
diff --git a/drivers/staging/most/hdm-dim2/dim2_hdm.c b/drivers/staging/most/hdm-dim2/dim2_hdm.c
index 0dc86add7161..a36449551513 100644
--- a/drivers/staging/most/hdm-dim2/dim2_hdm.c
+++ b/drivers/staging/most/hdm-dim2/dim2_hdm.c
@@ -771,7 +771,7 @@ static int dim2_probe(struct platform_device *pdev)
 	dev->netinfo_task = kthread_run(&deliver_netinfo_thread, (void *)dev,
 					"dim2_netinfo");
 	if (IS_ERR(dev->netinfo_task))
-		ret = PTR_ERR(dev->netinfo_task);
+		return PTR_ERR(dev->netinfo_task);
 
 	for (i = 0; i < DMA_CHANNELS; i++) {
 		struct most_channel_capability *cap = dev->capabilities + i;
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index 9d47c5db24a6..163f21a1298d 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -49,7 +49,6 @@ static struct nand_ecclayout spinand_oob_64 = {
 		17, 18, 19, 20, 21, 22,
 		33, 34, 35, 36, 37, 38,
 		49, 50, 51, 52, 53, 54, },
-	.oobavail = 32,
 	.oobfree = {
 		{.offset = 8,
 			.length = 8},
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.h b/drivers/staging/mt29f_spinand/mt29f_spinand.h
index ae62975cf44a..457dc7ffdaf1 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.h
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.h
@@ -78,7 +78,6 @@
 #define BL_ALL_UNLOCKED    0
 
 struct spinand_info {
-	struct nand_ecclayout *ecclayout;
 	struct spi_device *spi;
 	void *priv;
 };
diff --git a/drivers/staging/rdma/hfi1/Kconfig b/drivers/staging/rdma/hfi1/Kconfig
index fd25078ee923..3e668d852f03 100644
--- a/drivers/staging/rdma/hfi1/Kconfig
+++ b/drivers/staging/rdma/hfi1/Kconfig
@@ -1,6 +1,7 @@
 config INFINIBAND_HFI1
 	tristate "Intel OPA Gen1 support"
-	depends on X86_64
+	depends on X86_64 && INFINIBAND_RDMAVT
+	select MMU_NOTIFIER
 	default m
 	---help---
 	This is a low-level driver for Intel OPA Gen1 adapter.
@@ -25,13 +26,3 @@ config SDMA_VERBOSITY
 	---help---
 	This is a configuration flag to enable verbose
 	SDMA debug
-config PRESCAN_RXQ
-	bool "Enable prescanning of the RX queue for ECNs"
-	depends on INFINIBAND_HFI1
-	default n
-	---help---
-	This option toggles the prescanning of the receive queue for
-	Explicit Congestion Notifications. If an ECN is detected, it
-	is processed as quickly as possible, the ECN is toggled off.
-	After the prescanning step, the receive queue is processed as
-	usual.
diff --git a/drivers/staging/rdma/hfi1/Makefile b/drivers/staging/rdma/hfi1/Makefile
index 68c5a315e557..8dc59382ee96 100644
--- a/drivers/staging/rdma/hfi1/Makefile
+++ b/drivers/staging/rdma/hfi1/Makefile
@@ -7,10 +7,12 @@
 #
 obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
 
-hfi1-y := chip.o cq.o device.o diag.o dma.o driver.o efivar.o eprom.o file_ops.o firmware.o \
-	init.o intr.o keys.o mad.o mmap.o mr.o pcie.o pio.o pio_copy.o \
-	qp.o qsfp.o rc.o ruc.o sdma.o srq.o sysfs.o trace.o twsi.o \
-	uc.o ud.o user_pages.o user_sdma.o verbs_mcast.o verbs.o
+hfi1-y := affinity.o chip.o device.o diag.o driver.o efivar.o \
+	eprom.o file_ops.o firmware.o \
+	init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
+	qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o twsi.o \
+	uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
+	verbs_txreq.o
 hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/staging/rdma/hfi1/affinity.c b/drivers/staging/rdma/hfi1/affinity.c
new file mode 100644
index 000000000000..2cb8ca77f876
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/affinity.c
@@ -0,0 +1,430 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/topology.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+
+#include "hfi.h"
+#include "affinity.h"
+#include "sdma.h"
+#include "trace.h"
+
+struct cpu_mask_set {
+	struct cpumask mask;
+	struct cpumask used;
+	uint gen;
+};
+
+struct hfi1_affinity {
+	struct cpu_mask_set def_intr;
+	struct cpu_mask_set rcv_intr;
+	struct cpu_mask_set proc;
+	/* spin lock to protect affinity struct */
+	spinlock_t lock;
+};
+
+/* Name of IRQ types, indexed by enum irq_type */
+static const char * const irq_type_names[] = {
+	"SDMA",
+	"RCVCTXT",
+	"GENERAL",
+	"OTHER",
+};
+
+static inline void init_cpu_mask_set(struct cpu_mask_set *set)
+{
+	cpumask_clear(&set->mask);
+	cpumask_clear(&set->used);
+	set->gen = 0;
+}
+
+/*
+ * Interrupt affinity.
+ *
+ * non-rcv avail gets a default mask that
+ * starts as possible cpus with threads reset
+ * and each rcv avail reset.
+ *
+ * rcv avail gets node relative 1 wrapping back
+ * to the node relative 1 as necessary.
+ *
+ */
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
+{
+	int node = pcibus_to_node(dd->pcidev->bus);
+	struct hfi1_affinity *info;
+	const struct cpumask *local_mask;
+	int curr_cpu, possible, i, ht;
+
+	if (node < 0)
+		node = numa_node_id();
+	dd->node = node;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	spin_lock_init(&info->lock);
+
+	init_cpu_mask_set(&info->def_intr);
+	init_cpu_mask_set(&info->rcv_intr);
+	init_cpu_mask_set(&info->proc);
+
+	local_mask = cpumask_of_node(dd->node);
+	if (cpumask_first(local_mask) >= nr_cpu_ids)
+		local_mask = topology_core_cpumask(0);
+	/* use local mask as default */
+	cpumask_copy(&info->def_intr.mask, local_mask);
+	/*
+	 * Remove HT cores from the default mask.  Do this in two steps below.
+	 */
+	possible = cpumask_weight(&info->def_intr.mask);
+	ht = cpumask_weight(topology_sibling_cpumask(
+					cpumask_first(&info->def_intr.mask)));
+	/*
+	 * Step 1.  Skip over the first N HT siblings and use them as the
+	 * "real" cores.  Assumes that HT cores are not enumerated in
+	 * succession (except in the single core case).
+	 */
+	curr_cpu = cpumask_first(&info->def_intr.mask);
+	for (i = 0; i < possible / ht; i++)
+		curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+	/*
+	 * Step 2.  Remove the remaining HT siblings.  Use cpumask_next() to
+	 * skip any gaps.
+	 */
+	for (; i < possible; i++) {
+		cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
+		curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+	}
+
+	/*  fill in the receive list */
+	possible = cpumask_weight(&info->def_intr.mask);
+	curr_cpu = cpumask_first(&info->def_intr.mask);
+	if (possible == 1) {
+		/*  only one CPU, everyone will use it */
+		cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
+	} else {
+		/*
+		 * Retain the first CPU in the default list for the control
+		 * context.
+		 */
+		curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+		/*
+		 * Remove the remaining kernel receive queues from
+		 * the default list and add them to the receive list.
+		 */
+		for (i = 0; i < dd->n_krcv_queues - 1; i++) {
+			cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
+			cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
+			curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
+			if (curr_cpu >= nr_cpu_ids)
+				break;
+		}
+	}
+
+	cpumask_copy(&info->proc.mask, cpu_online_mask);
+	dd->affinity = info;
+	return 0;
+}
+
+void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
+{
+	kfree(dd->affinity);
+}
+
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+{
+	int ret;
+	cpumask_var_t diff;
+	struct cpu_mask_set *set;
+	struct sdma_engine *sde = NULL;
+	struct hfi1_ctxtdata *rcd = NULL;
+	char extra[64];
+	int cpu = -1;
+
+	extra[0] = '\0';
+	cpumask_clear(&msix->mask);
+
+	ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
+	if (!ret)
+		return -ENOMEM;
+
+	switch (msix->type) {
+	case IRQ_SDMA:
+		sde = (struct sdma_engine *)msix->arg;
+		scnprintf(extra, 64, "engine %u", sde->this_idx);
+		/* fall through */
+	case IRQ_GENERAL:
+		set = &dd->affinity->def_intr;
+		break;
+	case IRQ_RCVCTXT:
+		rcd = (struct hfi1_ctxtdata *)msix->arg;
+		if (rcd->ctxt == HFI1_CTRL_CTXT) {
+			set = &dd->affinity->def_intr;
+			cpu = cpumask_first(&set->mask);
+		} else {
+			set = &dd->affinity->rcv_intr;
+		}
+		scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
+		break;
+	default:
+		dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
+		return -EINVAL;
+	}
+
+	/*
+	 * The control receive context is placed on a particular CPU, which
+	 * is set above.  Skip accounting for it.  Everything else finds its
+	 * CPU here.
+	 */
+	if (cpu == -1) {
+		spin_lock(&dd->affinity->lock);
+		if (cpumask_equal(&set->mask, &set->used)) {
+			/*
+			 * We've used up all the CPUs, bump up the generation
+			 * and reset the 'used' map
+			 */
+			set->gen++;
+			cpumask_clear(&set->used);
+		}
+		cpumask_andnot(diff, &set->mask, &set->used);
+		cpu = cpumask_first(diff);
+		cpumask_set_cpu(cpu, &set->used);
+		spin_unlock(&dd->affinity->lock);
+	}
+
+	switch (msix->type) {
+	case IRQ_SDMA:
+		sde->cpu = cpu;
+		break;
+	case IRQ_GENERAL:
+	case IRQ_RCVCTXT:
+	case IRQ_OTHER:
+		break;
+	}
+
+	cpumask_set_cpu(cpu, &msix->mask);
+	dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n",
+		    msix->msix.vector, irq_type_names[msix->type],
+		    extra, cpu);
+	irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+
+	free_cpumask_var(diff);
+	return 0;
+}
+
+void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
+			   struct hfi1_msix_entry *msix)
+{
+	struct cpu_mask_set *set = NULL;
+	struct hfi1_ctxtdata *rcd;
+
+	switch (msix->type) {
+	case IRQ_SDMA:
+	case IRQ_GENERAL:
+		set = &dd->affinity->def_intr;
+		break;
+	case IRQ_RCVCTXT:
+		rcd = (struct hfi1_ctxtdata *)msix->arg;
+		/* only do accounting for non control contexts */
+		if (rcd->ctxt != HFI1_CTRL_CTXT)
+			set = &dd->affinity->rcv_intr;
+		break;
+	default:
+		return;
+	}
+
+	if (set) {
+		spin_lock(&dd->affinity->lock);
+		cpumask_andnot(&set->used, &set->used, &msix->mask);
+		if (cpumask_empty(&set->used) && set->gen) {
+			set->gen--;
+			cpumask_copy(&set->used, &set->mask);
+		}
+		spin_unlock(&dd->affinity->lock);
+	}
+
+	irq_set_affinity_hint(msix->msix.vector, NULL);
+	cpumask_clear(&msix->mask);
+}
+
+int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
+{
+	int cpu = -1, ret;
+	cpumask_var_t diff, mask, intrs;
+	const struct cpumask *node_mask,
+		*proc_mask = tsk_cpus_allowed(current);
+	struct cpu_mask_set *set = &dd->affinity->proc;
+	char buf[1024];
+
+	/*
+	 * check whether process/context affinity has already
+	 * been set
+	 */
+	if (cpumask_weight(proc_mask) == 1) {
+		scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
+		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %s",
+			  current->pid, current->comm, buf);
+		/*
+		 * Mark the pre-set CPU as used. This is atomic so we don't
+		 * need the lock
+		 */
+		cpu = cpumask_first(proc_mask);
+		cpumask_set_cpu(cpu, &set->used);
+		goto done;
+	} else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
+		scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
+		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %s",
+			  current->pid, current->comm, buf);
+		goto done;
+	}
+
+	/*
+	 * The process does not have a preset CPU affinity so find one to
+	 * recommend. We prefer CPUs on the same NUMA as the device.
+	 */
+
+	ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
+	if (!ret)
+		goto done;
+	ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+	if (!ret)
+		goto free_diff;
+	ret = zalloc_cpumask_var(&intrs, GFP_KERNEL);
+	if (!ret)
+		goto free_mask;
+
+	spin_lock(&dd->affinity->lock);
+	/*
+	 * If we've used all available CPUs, clear the mask and start
+	 * overloading.
+	 */
+	if (cpumask_equal(&set->mask, &set->used)) {
+		set->gen++;
+		cpumask_clear(&set->used);
+	}
+
+	/* CPUs used by interrupt handlers */
+	cpumask_copy(intrs, (dd->affinity->def_intr.gen ?
+			     &dd->affinity->def_intr.mask :
+			     &dd->affinity->def_intr.used));
+	cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ?
+				  &dd->affinity->rcv_intr.mask :
+				  &dd->affinity->rcv_intr.used));
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(intrs));
+	hfi1_cdbg(PROC, "CPUs used by interrupts: %s", buf);
+
+	/*
+	 * If we don't have a NUMA node requested, preference is towards
+	 * device NUMA node
+	 */
+	if (node == -1)
+		node = dd->node;
+	node_mask = cpumask_of_node(node);
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(node_mask));
+	hfi1_cdbg(PROC, "device on NUMA %u, CPUs %s", node, buf);
+
+	/* diff will hold all unused cpus */
+	cpumask_andnot(diff, &set->mask, &set->used);
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(diff));
+	hfi1_cdbg(PROC, "unused CPUs (all) %s", buf);
+
+	/* get cpumask of available CPUs on preferred NUMA */
+	cpumask_and(mask, diff, node_mask);
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
+	hfi1_cdbg(PROC, "available cpus on NUMA %s", buf);
+
+	/*
+	 * At first, we don't want to place processes on the same
+	 * CPUs as interrupt handlers.
+	 */
+	cpumask_andnot(diff, mask, intrs);
+	if (!cpumask_empty(diff))
+		cpumask_copy(mask, diff);
+
+	/*
+	 * if we don't have a cpu on the preferred NUMA, get
+	 * the list of the remaining available CPUs
+	 */
+	if (cpumask_empty(mask)) {
+		cpumask_andnot(diff, &set->mask, &set->used);
+		cpumask_andnot(mask, diff, node_mask);
+	}
+	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
+	hfi1_cdbg(PROC, "possible CPUs for process %s", buf);
+
+	cpu = cpumask_first(mask);
+	if (cpu >= nr_cpu_ids) /* empty */
+		cpu = -1;
+	else
+		cpumask_set_cpu(cpu, &set->used);
+	spin_unlock(&dd->affinity->lock);
+
+	free_cpumask_var(intrs);
+free_mask:
+	free_cpumask_var(mask);
+free_diff:
+	free_cpumask_var(diff);
+done:
+	return cpu;
+}
+
+void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu)
+{
+	struct cpu_mask_set *set = &dd->affinity->proc;
+
+	if (cpu < 0)
+		return;
+	spin_lock(&dd->affinity->lock);
+	cpumask_clear_cpu(cpu, &set->used);
+	if (cpumask_empty(&set->used) && set->gen) {
+		set->gen--;
+		cpumask_copy(&set->used, &set->mask);
+	}
+	spin_unlock(&dd->affinity->lock);
+}
+
diff --git a/drivers/staging/rdma/hfi1/affinity.h b/drivers/staging/rdma/hfi1/affinity.h
new file mode 100644
index 000000000000..b287e4963024
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/affinity.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_AFFINITY_H
+#define _HFI1_AFFINITY_H
+
+#include "hfi.h"
+
+enum irq_type {
+	IRQ_SDMA,
+	IRQ_RCVCTXT,
+	IRQ_GENERAL,
+	IRQ_OTHER
+};
+
+/* Can be used for both memory and cpu */
+enum affinity_flags {
+	AFF_AUTO,
+	AFF_NUMA_LOCAL,
+	AFF_DEV_LOCAL,
+	AFF_IRQ_LOCAL
+};
+
+struct hfi1_msix_entry;
+
+/* Initialize driver affinity data */
+int hfi1_dev_affinity_init(struct hfi1_devdata *);
+/* Free driver affinity data */
+void hfi1_dev_affinity_free(struct hfi1_devdata *);
+/*
+ * Set IRQ affinity to a CPU. The function will determine the
+ * CPU and set the affinity to it.
+ */
+int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+/*
+ * Remove the IRQ's CPU affinity. This function also updates
+ * any internal CPU tracking data
+ */
+void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+/*
+ * Determine a CPU affinity for a user process, if the process does not
+ * have an affinity set yet.
+ */
+int hfi1_get_proc_affinity(struct hfi1_devdata *, int);
+/* Release a CPU used by a user process. */
+void hfi1_put_proc_affinity(struct hfi1_devdata *, int);
+
+#endif /* _HFI1_AFFINITY_H */
diff --git a/drivers/staging/rdma/hfi1/aspm.h b/drivers/staging/rdma/hfi1/aspm.h
new file mode 100644
index 000000000000..0d58fe3b49b5
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/aspm.h
@@ -0,0 +1,309 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _ASPM_H
+#define _ASPM_H
+
+#include "hfi.h"
+
+extern uint aspm_mode;
+
+enum aspm_mode {
+	ASPM_MODE_DISABLED = 0,	/* ASPM always disabled, performance mode */
+	ASPM_MODE_ENABLED = 1,	/* ASPM always enabled, power saving mode */
+	ASPM_MODE_DYNAMIC = 2,	/* ASPM enabled/disabled dynamically */
+};
+
+/* Time after which the timer interrupt will re-enable ASPM */
+#define ASPM_TIMER_MS 1000
+/* Time for which interrupts are ignored after a timer has been scheduled */
+#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2)
+/* Two interrupts within this time trigger ASPM disable */
+#define ASPM_TRIGGER_MS 1
+#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull)
+#define ASPM_L1_SUPPORTED(reg) \
+	(((reg & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2)
+
+static inline bool aspm_hw_l1_supported(struct hfi1_devdata *dd)
+{
+	struct pci_dev *parent = dd->pcidev->bus->self;
+	u32 up, dn;
+
+	/*
+	 * If the driver does not have access to the upstream component,
+	 * it cannot support ASPM L1 at all.
+	 */
+	if (!parent)
+		return false;
+
+	pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn);
+	dn = ASPM_L1_SUPPORTED(dn);
+
+	pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up);
+	up = ASPM_L1_SUPPORTED(up);
+
+	/* ASPM works on A-step but is reported as not supported */
+	return (!!dn || is_ax(dd)) && !!up;
+}
+
+/* Set L1 entrance latency for slower entry to L1 */
+static inline void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd)
+{
+	u32 l1_ent_lat = 0x4u;
+	u32 reg32;
+
+	pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, &reg32);
+	reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK;
+	reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT;
+	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32);
+}
+
+static inline void aspm_hw_enable_l1(struct hfi1_devdata *dd)
+{
+	struct pci_dev *parent = dd->pcidev->bus->self;
+
+	/*
+	 * If the driver does not have access to the upstream component,
+	 * it cannot support ASPM L1 at all.
+	 */
+	if (!parent)
+		return;
+
+	/* Enable ASPM L1 first in upstream component and then downstream */
+	pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC,
+					   PCI_EXP_LNKCTL_ASPM_L1);
+	pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC,
+					   PCI_EXP_LNKCTL_ASPM_L1);
+}
+
+static inline void aspm_hw_disable_l1(struct hfi1_devdata *dd)
+{
+	struct pci_dev *parent = dd->pcidev->bus->self;
+
+	/* Disable ASPM L1 first in downstream component and then upstream */
+	pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+					   PCI_EXP_LNKCTL_ASPMC, 0x0);
+	if (parent)
+		pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+						   PCI_EXP_LNKCTL_ASPMC, 0x0);
+}
+
+static inline void aspm_enable(struct hfi1_devdata *dd)
+{
+	if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED ||
+	    !dd->aspm_supported)
+		return;
+
+	aspm_hw_enable_l1(dd);
+	dd->aspm_enabled = true;
+}
+
+static inline void aspm_disable(struct hfi1_devdata *dd)
+{
+	if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED)
+		return;
+
+	aspm_hw_disable_l1(dd);
+	dd->aspm_enabled = false;
+}
+
+static inline void aspm_disable_inc(struct hfi1_devdata *dd)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->aspm_lock, flags);
+	aspm_disable(dd);
+	atomic_inc(&dd->aspm_disabled_cnt);
+	spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+static inline void aspm_enable_dec(struct hfi1_devdata *dd)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->aspm_lock, flags);
+	if (atomic_dec_and_test(&dd->aspm_disabled_cnt))
+		aspm_enable(dd);
+	spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+/* ASPM processing for each receive context interrupt */
+static inline void aspm_ctx_disable(struct hfi1_ctxtdata *rcd)
+{
+	bool restart_timer;
+	bool close_interrupts;
+	unsigned long flags;
+	ktime_t now, prev;
+
+	/* Quickest exit for minimum impact */
+	if (!rcd->aspm_intr_supported)
+		return;
+
+	spin_lock_irqsave(&rcd->aspm_lock, flags);
+	/* PSM contexts are open */
+	if (!rcd->aspm_intr_enable)
+		goto unlock;
+
+	prev = rcd->aspm_ts_last_intr;
+	now = ktime_get();
+	rcd->aspm_ts_last_intr = now;
+
+	/* An interrupt pair close together in time */
+	close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS;
+
+	/* Don't push out our timer till this much time has elapsed */
+	restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) >
+				    ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC;
+	restart_timer = restart_timer && close_interrupts;
+
+	/* Disable ASPM and schedule timer */
+	if (rcd->aspm_enabled && close_interrupts) {
+		aspm_disable_inc(rcd->dd);
+		rcd->aspm_enabled = false;
+		restart_timer = true;
+	}
+
+	if (restart_timer) {
+		mod_timer(&rcd->aspm_timer,
+			  jiffies + msecs_to_jiffies(ASPM_TIMER_MS));
+		rcd->aspm_ts_timer_sched = now;
+	}
+unlock:
+	spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/* Timer function for re-enabling ASPM in the absence of interrupt activity */
+static inline void aspm_ctx_timer_function(unsigned long data)
+{
+	struct hfi1_ctxtdata *rcd = (struct hfi1_ctxtdata *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rcd->aspm_lock, flags);
+	aspm_enable_dec(rcd->dd);
+	rcd->aspm_enabled = true;
+	spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/* Disable interrupt processing for verbs contexts when PSM contexts are open */
+static inline void aspm_disable_all(struct hfi1_devdata *dd)
+{
+	struct hfi1_ctxtdata *rcd;
+	unsigned long flags;
+	unsigned i;
+
+	for (i = 0; i < dd->first_user_ctxt; i++) {
+		rcd = dd->rcd[i];
+		del_timer_sync(&rcd->aspm_timer);
+		spin_lock_irqsave(&rcd->aspm_lock, flags);
+		rcd->aspm_intr_enable = false;
+		spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+	}
+
+	aspm_disable(dd);
+	atomic_set(&dd->aspm_disabled_cnt, 0);
+}
+
+/* Re-enable interrupt processing for verbs contexts */
+static inline void aspm_enable_all(struct hfi1_devdata *dd)
+{
+	struct hfi1_ctxtdata *rcd;
+	unsigned long flags;
+	unsigned i;
+
+	aspm_enable(dd);
+
+	if (aspm_mode != ASPM_MODE_DYNAMIC)
+		return;
+
+	for (i = 0; i < dd->first_user_ctxt; i++) {
+		rcd = dd->rcd[i];
+		spin_lock_irqsave(&rcd->aspm_lock, flags);
+		rcd->aspm_intr_enable = true;
+		rcd->aspm_enabled = true;
+		spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+	}
+}
+
+static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
+{
+	spin_lock_init(&rcd->aspm_lock);
+	setup_timer(&rcd->aspm_timer, aspm_ctx_timer_function,
+		    (unsigned long)rcd);
+	rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
+		aspm_mode == ASPM_MODE_DYNAMIC &&
+		rcd->ctxt < rcd->dd->first_user_ctxt;
+}
+
+static inline void aspm_init(struct hfi1_devdata *dd)
+{
+	unsigned i;
+
+	spin_lock_init(&dd->aspm_lock);
+	dd->aspm_supported = aspm_hw_l1_supported(dd);
+
+	for (i = 0; i < dd->first_user_ctxt; i++)
+		aspm_ctx_init(dd->rcd[i]);
+
+	/* Start with ASPM disabled */
+	aspm_hw_set_l1_ent_latency(dd);
+	dd->aspm_enabled = false;
+	aspm_hw_disable_l1(dd);
+
+	/* Now turn on ASPM if configured */
+	aspm_enable_all(dd);
+}
+
+static inline void aspm_exit(struct hfi1_devdata *dd)
+{
+	aspm_disable_all(dd);
+
+	/* Turn on ASPM on exit to conserve power */
+	aspm_enable(dd);
+}
+
+#endif /* _ASPM_H */
diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c
index 46a1830b509b..16eb653903e0 100644
--- a/drivers/staging/rdma/hfi1/chip.c
+++ b/drivers/staging/rdma/hfi1/chip.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -64,6 +61,8 @@
 #include "sdma.h"
 #include "eprom.h"
 #include "efivar.h"
+#include "platform.h"
+#include "aspm.h"
 
 #define NUM_IB_PORTS 1
 
@@ -420,10 +419,10 @@ static struct flag_table pio_err_status_flags[] = {
 	SEC_SPC_FREEZE,
 	SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
 /*23*/	FLAG_ENTRY("PioWriteQwValidParity",
-	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
+	SEC_WRITE_DROPPED | SEC_SPC_FREEZE,
 	SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
 /*24*/	FLAG_ENTRY("PioBlockQwCountParity",
-	SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
+	SEC_WRITE_DROPPED | SEC_SPC_FREEZE,
 	SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
 /*25*/	FLAG_ENTRY("PioVlfVlLenParity",
 	SEC_SPC_FREEZE,
@@ -509,6 +508,12 @@ static struct flag_table sdma_err_status_flags[] = {
 		| SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
 		| SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
 
+/* SendEgressErrInfo bits that correspond to a PortXmitDiscard counter */
+#define PORT_DISCARD_EGRESS_ERRS \
+	(SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK \
+	| SEND_EGRESS_ERR_INFO_VL_MAPPING_ERR_SMASK \
+	| SEND_EGRESS_ERR_INFO_VL_ERR_SMASK)
+
 /*
  * TXE Egress Error flags
  */
@@ -936,7 +941,7 @@ static struct flag_table dc8051_err_flags[] = {
 	FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
 	FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
 	FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
-		D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
+		    D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
 	FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
 };
 
@@ -950,7 +955,7 @@ static struct flag_table dc8051_info_err_flags[] = {
 	FLAG_ENTRY0("Unknown frame received",  UNKNOWN_FRAME),
 	FLAG_ENTRY0("Target BER not met",      TARGET_BER_NOT_MET),
 	FLAG_ENTRY0("Serdes internal loopback failure",
-					FAILED_SERDES_INTERNAL_LOOPBACK),
+		    FAILED_SERDES_INTERNAL_LOOPBACK),
 	FLAG_ENTRY0("Failed SerDes init",      FAILED_SERDES_INIT),
 	FLAG_ENTRY0("Failed LNI(Polling)",     FAILED_LNI_POLLING),
 	FLAG_ENTRY0("Failed LNI(Debounce)",    FAILED_LNI_DEBOUNCE),
@@ -958,7 +963,8 @@ static struct flag_table dc8051_info_err_flags[] = {
 	FLAG_ENTRY0("Failed LNI(OptEq)",       FAILED_LNI_OPTEQ),
 	FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
 	FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
-	FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT)
+	FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT),
+	FLAG_ENTRY0("Host Handshake Timeout",  HOST_HANDSHAKE_TIMEOUT)
 };
 
 /*
@@ -978,7 +984,6 @@ static struct flag_table dc8051_info_host_msg_flags[] = {
 	FLAG_ENTRY0("Link going down", 0x0100),
 };
 
-
 static u32 encoded_size(u32 size);
 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
 static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
@@ -1140,11 +1145,8 @@ struct cntr_entry {
 	/*
 	 * accessor for stat element, context either dd or ppd
 	 */
-	u64 (*rw_cntr)(const struct cntr_entry *,
-			       void *context,
-			       int vl,
-			       int mode,
-			       u64 data);
+	u64 (*rw_cntr)(const struct cntr_entry *, void *context, int vl,
+		       int mode, u64 data);
 };
 
 #define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
@@ -1188,7 +1190,7 @@ CNTR_ELEM(#name, \
 #define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
 #define OVR_ELM(ctx) \
 CNTR_ELEM("RcvHdrOvr" #ctx, \
-	  (RCV_HDR_OVFL_CNT + ctx*0x100), \
+	  (RCV_HDR_OVFL_CNT + ctx * 0x100), \
 	  0, CNTR_NORMAL, port_access_u64_csr)
 
 /* 32bit TXE */
@@ -1274,7 +1276,6 @@ static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
 {
 	u64 ret;
 
-
 	if (mode == CNTR_MODE_R) {
 		ret = read_csr(dd, csr);
 	} else if (mode == CNTR_MODE_W) {
@@ -1291,17 +1292,65 @@ static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
 
 /* Dev Access */
 static u64 dev_access_u32_csr(const struct cntr_entry *entry,
-			    void *context, int vl, int mode, u64 data)
+			      void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
+	u64 csr = entry->csr;
 
-	if (vl != CNTR_INVALID_VL)
-		return 0;
-	return read_write_csr(dd, entry->csr, mode, data);
+	if (entry->flags & CNTR_SDMA) {
+		if (vl == CNTR_INVALID_VL)
+			return 0;
+		csr += 0x100 * vl;
+	} else {
+		if (vl != CNTR_INVALID_VL)
+			return 0;
+	}
+	return read_write_csr(dd, csr, mode, data);
+}
+
+static u64 access_sde_err_cnt(const struct cntr_entry *entry,
+			      void *context, int idx, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].err_cnt;
+	return 0;
+}
+
+static u64 access_sde_int_cnt(const struct cntr_entry *entry,
+			      void *context, int idx, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].sdma_int_cnt;
+	return 0;
+}
+
+static u64 access_sde_idle_int_cnt(const struct cntr_entry *entry,
+				   void *context, int idx, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].idle_int_cnt;
+	return 0;
+}
+
+static u64 access_sde_progress_int_cnt(const struct cntr_entry *entry,
+				       void *context, int idx, int mode,
+				       u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	if (dd->per_sdma && idx < dd->num_sdma)
+		return dd->per_sdma[idx].progress_int_cnt;
+	return 0;
 }
 
 static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
-			    int vl, int mode, u64 data)
+			      int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
 
@@ -1322,7 +1371,7 @@ static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
 }
 
 static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
-			    int vl, int mode, u64 data)
+			      int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
 	u32 csr = entry->csr;
@@ -1346,7 +1395,7 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
 
 /* Port Access */
 static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
-			     int vl, int mode, u64 data)
+			       int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1356,7 +1405,7 @@ static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
 }
 
 static u64 port_access_u64_csr(const struct cntr_entry *entry,
-			     void *context, int vl, int mode, u64 data)
+			       void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 	u64 val;
@@ -1396,7 +1445,7 @@ static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
 }
 
 static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
-			       int vl, int mode, u64 data)
+				 int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1406,7 +1455,7 @@ static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
 }
 
 static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
-			       int vl, int mode, u64 data)
+				 int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1427,18 +1476,25 @@ static u64 access_sw_unknown_frame_cnt(const struct cntr_entry *entry,
 }
 
 static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
-				    void *context, int vl, int mode, u64 data)
+				   void *context, int vl, int mode, u64 data)
 {
-	struct hfi1_pportdata *ppd = context;
+	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
+	u64 zero = 0;
+	u64 *counter;
 
-	if (vl != CNTR_INVALID_VL)
-		return 0;
+	if (vl == CNTR_INVALID_VL)
+		counter = &ppd->port_xmit_discards;
+	else if (vl >= 0 && vl < C_VL_COUNT)
+		counter = &ppd->port_xmit_discards_vl[vl];
+	else
+		counter = &zero;
 
-	return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
+	return read_write_sw(ppd->dd, counter, mode, data);
 }
 
 static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
-				     void *context, int vl, int mode, u64 data)
+				       void *context, int vl, int mode,
+				       u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1450,7 +1506,7 @@ static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
 }
 
 static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
-				     void *context, int vl, int mode, u64 data)
+				      void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_pportdata *ppd = context;
 
@@ -1475,7 +1531,6 @@ static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
 			  u64 __percpu *cntr,
 			  int vl, int mode, u64 data)
 {
-
 	u64 ret = 0;
 
 	if (vl != CNTR_INVALID_VL)
@@ -1507,7 +1562,7 @@ static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
 }
 
 static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
-			      void *context, int vl, int mode, u64 data)
+				   void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = context;
 
@@ -1523,6 +1578,14 @@ static u64 access_sw_pio_wait(const struct cntr_entry *entry,
 	return dd->verbs_dev.n_piowait;
 }
 
+static u64 access_sw_pio_drain(const struct cntr_entry *entry,
+			       void *context, int vl, int mode, u64 data)
+{
+	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
+
+	return dd->verbs_dev.n_piodrain;
+}
+
 static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
 			      void *context, int vl, int mode, u64 data)
 {
@@ -1540,11 +1603,12 @@ static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
 }
 
 static u64 access_sw_send_schedule(const struct cntr_entry *entry,
-			       void *context, int vl, int mode, u64 data)
+				   void *context, int vl, int mode, u64 data)
 {
 	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
 
-	return dd->verbs_dev.n_send_schedule;
+	return read_write_cpu(dd, &dd->z_send_schedule, dd->send_schedule, vl,
+			      mode, data);
 }
 
 /* Software counters for the error status bits within MISC_ERR_STATUS */
@@ -3882,8 +3946,8 @@ static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry,		      \
 			      void *context, int vl, int mode, u64 data)      \
 {									      \
 	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;	      \
-	return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr,	      \
-			      ppd->ibport_data.cntr, vl,		      \
+	return read_write_cpu(ppd->dd, &ppd->ibport_data.rvp.z_ ##cntr,	      \
+			      ppd->ibport_data.rvp.cntr, vl,		      \
 			      mode, data);				      \
 }
 
@@ -3900,7 +3964,7 @@ static u64 access_ibp_##cntr(const struct cntr_entry *entry,		      \
 	if (vl != CNTR_INVALID_VL)					      \
 		return 0;						      \
 									      \
-	return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr,	      \
+	return read_write_sw(ppd->dd, &ppd->ibport_data.rvp.n_ ##cntr,	      \
 			     mode, data);				      \
 }
 
@@ -4063,10 +4127,28 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
 			    access_sw_vtx_wait),
 [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
 			    access_sw_pio_wait),
+[C_SW_PIO_DRAIN] = CNTR_ELEM("PioDrain", 0, 0, CNTR_NORMAL,
+			    access_sw_pio_drain),
 [C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
 			    access_sw_kmem_wait),
 [C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
 			    access_sw_send_schedule),
+[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
+				      SEND_DMA_DESC_FETCHED_CNT, 0,
+				      CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+				      dev_access_u32_csr),
+[C_SDMA_INT_CNT] = CNTR_ELEM("SDMAInt", 0, 0,
+			     CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+			     access_sde_int_cnt),
+[C_SDMA_ERR_CNT] = CNTR_ELEM("SDMAErrCt", 0, 0,
+			     CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+			     access_sde_err_cnt),
+[C_SDMA_IDLE_INT_CNT] = CNTR_ELEM("SDMAIdInt", 0, 0,
+				  CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+				  access_sde_idle_int_cnt),
+[C_SDMA_PROGRESS_INT_CNT] = CNTR_ELEM("SDMAPrIntCn", 0, 0,
+				      CNTR_NORMAL | CNTR_32BIT | CNTR_SDMA,
+				      access_sde_progress_int_cnt),
 /* MISC_ERR_STATUS */
 [C_MISC_PLL_LOCK_FAIL_ERR] = CNTR_ELEM("MISC_PLL_LOCK_FAIL_ERR", 0, 0,
 				CNTR_NORMAL,
@@ -4876,28 +4958,28 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
 [C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
 [C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
 [C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
-			CNTR_SYNTH | CNTR_VL),
+				      CNTR_SYNTH | CNTR_VL),
 [C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
-			CNTR_SYNTH | CNTR_VL),
+				     CNTR_SYNTH | CNTR_VL),
 [C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
-			CNTR_SYNTH | CNTR_VL),
+				      CNTR_SYNTH | CNTR_VL),
 [C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
 [C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
 [C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-			access_sw_link_dn_cnt),
+			     access_sw_link_dn_cnt),
 [C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-			access_sw_link_up_cnt),
+			   access_sw_link_up_cnt),
 [C_SW_UNKNOWN_FRAME] = CNTR_ELEM("UnknownFrame", 0, 0, CNTR_NORMAL,
 				 access_sw_unknown_frame_cnt),
 [C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
-			access_sw_xmit_discards),
+			     access_sw_xmit_discards),
 [C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
-			CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
-			access_sw_xmit_discards),
+				CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
+				access_sw_xmit_discards),
 [C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
-			access_xmit_constraint_errs),
+				 access_xmit_constraint_errs),
 [C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
-			access_rcv_constraint_errs),
+				access_rcv_constraint_errs),
 [C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
 [C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
 [C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
@@ -4913,9 +4995,9 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
 			       access_sw_cpu_rc_acks),
 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
-			       access_sw_cpu_rc_qacks),
+				access_sw_cpu_rc_qacks),
 [C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
-			       access_sw_cpu_rc_delayed_comp),
+				       access_sw_cpu_rc_delayed_comp),
 [OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
 [OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
 [OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
@@ -5064,7 +5146,7 @@ done:
  * the buffer.  End in '*' if the buffer is too short.
  */
 static char *flag_string(char *buf, int buf_len, u64 flags,
-				struct flag_table *table, int table_size)
+			 struct flag_table *table, int table_size)
 {
 	char extra[32];
 	char *p = buf;
@@ -5125,10 +5207,8 @@ static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
 	if (source < ARRAY_SIZE(cce_misc_names))
 		strncpy(buf, cce_misc_names[source], bsize);
 	else
-		snprintf(buf,
-			bsize,
-			"Reserved%u",
-			source + IS_GENERAL_ERR_START);
+		snprintf(buf, bsize, "Reserved%u",
+			 source + IS_GENERAL_ERR_START);
 
 	return buf;
 }
@@ -5167,7 +5247,7 @@ static char *is_various_name(char *buf, size_t bsize, unsigned int source)
 	if (source < ARRAY_SIZE(various_names))
 		strncpy(buf, various_names[source], bsize);
 	else
-		snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
+		snprintf(buf, bsize, "Reserved%u", source + IS_VARIOUS_START);
 	return buf;
 }
 
@@ -5252,51 +5332,56 @@ static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
 static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
+			   cce_err_status_flags,
+			   ARRAY_SIZE(cce_err_status_flags));
 }
 
 static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
+			   rxe_err_status_flags,
+			   ARRAY_SIZE(rxe_err_status_flags));
 }
 
 static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags, misc_err_status_flags,
-			ARRAY_SIZE(misc_err_status_flags));
+			   ARRAY_SIZE(misc_err_status_flags));
 }
 
 static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
+			   pio_err_status_flags,
+			   ARRAY_SIZE(pio_err_status_flags));
 }
 
 static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			sdma_err_status_flags,
-			ARRAY_SIZE(sdma_err_status_flags));
+			   sdma_err_status_flags,
+			   ARRAY_SIZE(sdma_err_status_flags));
 }
 
 static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-		egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
+			   egress_err_status_flags,
+			   ARRAY_SIZE(egress_err_status_flags));
 }
 
 static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-		egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
+			   egress_err_info_flags,
+			   ARRAY_SIZE(egress_err_info_flags));
 }
 
 static char *send_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			send_err_status_flags,
-			ARRAY_SIZE(send_err_status_flags));
+			   send_err_status_flags,
+			   ARRAY_SIZE(send_err_status_flags));
 }
 
 static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
@@ -5309,7 +5394,7 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	 * report or record it.
 	 */
 	dd_dev_info(dd, "CCE Error: %s\n",
-		cce_err_status_string(buf, sizeof(buf), reg));
+		    cce_err_status_string(buf, sizeof(buf), reg));
 
 	if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK) &&
 	    is_ax(dd) && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
@@ -5339,14 +5424,14 @@ static void update_rcverr_timer(unsigned long opaque)
 	u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
 
 	if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
-		ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
+	    ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
 		dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
-		set_link_down_reason(ppd,
-		  OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
-			OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
+		set_link_down_reason(
+		ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
+		OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
 		queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
 	}
-	dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
+	dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt;
 
 	mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
 }
@@ -5372,7 +5457,7 @@ static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	int i = 0;
 
 	dd_dev_info(dd, "Receive Error: %s\n",
-		rxe_err_status_string(buf, sizeof(buf), reg));
+		    rxe_err_status_string(buf, sizeof(buf), reg));
 
 	if (reg & ALL_RXE_FREEZE_ERR) {
 		int flags = 0;
@@ -5399,7 +5484,7 @@ static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	int i = 0;
 
 	dd_dev_info(dd, "Misc Error: %s",
-		misc_err_status_string(buf, sizeof(buf), reg));
+		    misc_err_status_string(buf, sizeof(buf), reg));
 	for (i = 0; i < NUM_MISC_ERR_STATUS_COUNTERS; i++) {
 		if (reg & (1ull << i))
 			incr_cntr64(&dd->misc_err_status_cnt[i]);
@@ -5412,7 +5497,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	int i = 0;
 
 	dd_dev_info(dd, "PIO Error: %s\n",
-		pio_err_status_string(buf, sizeof(buf), reg));
+		    pio_err_status_string(buf, sizeof(buf), reg));
 
 	if (reg & ALL_PIO_FREEZE_ERR)
 		start_freeze_handling(dd->pport, 0);
@@ -5429,7 +5514,7 @@ static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	int i = 0;
 
 	dd_dev_info(dd, "SDMA Error: %s\n",
-		sdma_err_status_string(buf, sizeof(buf), reg));
+		    sdma_err_status_string(buf, sizeof(buf), reg));
 
 	if (reg & ALL_SDMA_FREEZE_ERR)
 		start_freeze_handling(dd->pport, 0);
@@ -5440,12 +5525,14 @@ static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	}
 }
 
-static void count_port_inactive(struct hfi1_devdata *dd)
+static inline void __count_port_discards(struct hfi1_pportdata *ppd)
 {
-	struct hfi1_pportdata *ppd = dd->pport;
+	incr_cntr64(&ppd->port_xmit_discards);
+}
 
-	if (ppd->port_xmit_discards < ~(u64)0)
-		ppd->port_xmit_discards++;
+static void count_port_inactive(struct hfi1_devdata *dd)
+{
+	__count_port_discards(dd->pport);
 }
 
 /*
@@ -5457,7 +5544,8 @@ static void count_port_inactive(struct hfi1_devdata *dd)
  * egress error if more than one packet fails the same integrity check
  * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
  */
-static void handle_send_egress_err_info(struct hfi1_devdata *dd)
+static void handle_send_egress_err_info(struct hfi1_devdata *dd,
+					int vl)
 {
 	struct hfi1_pportdata *ppd = dd->pport;
 	u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
@@ -5468,14 +5556,44 @@ static void handle_send_egress_err_info(struct hfi1_devdata *dd)
 	write_csr(dd, SEND_EGRESS_ERR_INFO, info);
 
 	dd_dev_info(dd,
-		"Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
-		info, egress_err_info_string(buf, sizeof(buf), info), src);
+		    "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
+		    info, egress_err_info_string(buf, sizeof(buf), info), src);
 
 	/* Eventually add other counters for each bit */
+	if (info & PORT_DISCARD_EGRESS_ERRS) {
+		int weight, i;
 
-	if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
-		if (ppd->port_xmit_discards < ~(u64)0)
-			ppd->port_xmit_discards++;
+		/*
+		 * Count all applicable bits as individual errors and
+		 * attribute them to the packet that triggered this handler.
+		 * This may not be completely accurate due to limitations
+		 * on the available hardware error information.  There is
+		 * a single information register and any number of error
+		 * packets may have occurred and contributed to it before
+		 * this routine is called.  This means that:
+		 * a) If multiple packets with the same error occur before
+		 *    this routine is called, earlier packets are missed.
+		 *    There is only a single bit for each error type.
+		 * b) Errors may not be attributed to the correct VL.
+		 *    The driver is attributing all bits in the info register
+		 *    to the packet that triggered this call, but bits
+		 *    could be an accumulation of different packets with
+		 *    different VLs.
+		 * c) A single error packet may have multiple counts attached
+		 *    to it.  There is no way for the driver to know if
+		 *    multiple bits set in the info register are due to a
+		 *    single packet or multiple packets.  The driver assumes
+		 *    multiple packets.
+		 */
+		weight = hweight64(info & PORT_DISCARD_EGRESS_ERRS);
+		for (i = 0; i < weight; i++) {
+			__count_port_discards(ppd);
+			if (vl >= 0 && vl < TXE_NUM_DATA_VL)
+				incr_cntr64(&ppd->port_xmit_discards_vl[vl]);
+			else if (vl == 15)
+				incr_cntr64(&ppd->port_xmit_discards_vl
+					    [C_VL_15]);
+		}
 	}
 }
 
@@ -5493,12 +5611,71 @@ static inline int port_inactive_err(u64 posn)
  * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
  * register. Does it represent a 'disallowed packet' error?
  */
-static inline int disallowed_pkt_err(u64 posn)
+static inline int disallowed_pkt_err(int posn)
 {
 	return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
 		posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
 }
 
+/*
+ * Input value is a bit position of one of the SDMA engine disallowed
+ * packet errors.  Return which engine.  Use of this must be guarded by
+ * disallowed_pkt_err().
+ */
+static inline int disallowed_pkt_engine(int posn)
+{
+	return posn - SEES(TX_SDMA0_DISALLOWED_PACKET);
+}
+
+/*
+ * Translate an SDMA engine to a VL.  Return -1 if the tranlation cannot
+ * be done.
+ */
+static int engine_to_vl(struct hfi1_devdata *dd, int engine)
+{
+	struct sdma_vl_map *m;
+	int vl;
+
+	/* range check */
+	if (engine < 0 || engine >= TXE_NUM_SDMA_ENGINES)
+		return -1;
+
+	rcu_read_lock();
+	m = rcu_dereference(dd->sdma_map);
+	vl = m->engine_to_vl[engine];
+	rcu_read_unlock();
+
+	return vl;
+}
+
+/*
+ * Translate the send context (sofware index) into a VL.  Return -1 if the
+ * translation cannot be done.
+ */
+static int sc_to_vl(struct hfi1_devdata *dd, int sw_index)
+{
+	struct send_context_info *sci;
+	struct send_context *sc;
+	int i;
+
+	sci = &dd->send_contexts[sw_index];
+
+	/* there is no information for user (PSM) and ack contexts */
+	if (sci->type != SC_KERNEL)
+		return -1;
+
+	sc = sci->sc;
+	if (!sc)
+		return -1;
+	if (dd->vld[15].sc == sc)
+		return 15;
+	for (i = 0; i < num_vls; i++)
+		if (dd->vld[i].sc == sc)
+			return i;
+
+	return -1;
+}
+
 static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 {
 	u64 reg_copy = reg, handled = 0;
@@ -5507,34 +5684,34 @@ static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 
 	if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
 		start_freeze_handling(dd->pport, 0);
-	if (is_ax(dd) && (reg &
-		    SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
-		    && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
+	else if (is_ax(dd) &&
+		 (reg & SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK) &&
+		 (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
 		start_freeze_handling(dd->pport, 0);
 
 	while (reg_copy) {
 		int posn = fls64(reg_copy);
-		/*
-		 * fls64() returns a 1-based offset, but we generally
-		 * want 0-based offsets.
-		 */
+		/* fls64() returns a 1-based offset, we want it zero based */
 		int shift = posn - 1;
+		u64 mask = 1ULL << shift;
 
 		if (port_inactive_err(shift)) {
 			count_port_inactive(dd);
-			handled |= (1ULL << shift);
+			handled |= mask;
 		} else if (disallowed_pkt_err(shift)) {
-			handle_send_egress_err_info(dd);
-			handled |= (1ULL << shift);
+			int vl = engine_to_vl(dd, disallowed_pkt_engine(shift));
+
+			handle_send_egress_err_info(dd, vl);
+			handled |= mask;
 		}
-		clear_bit(shift, (unsigned long *)&reg_copy);
+		reg_copy &= ~mask;
 	}
 
 	reg &= ~handled;
 
 	if (reg)
 		dd_dev_info(dd, "Egress Error: %s\n",
-			egress_err_status_string(buf, sizeof(buf), reg));
+			    egress_err_status_string(buf, sizeof(buf), reg));
 
 	for (i = 0; i < NUM_SEND_EGRESS_ERR_STATUS_COUNTERS; i++) {
 		if (reg & (1ull << i))
@@ -5548,7 +5725,7 @@ static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	int i = 0;
 
 	dd_dev_info(dd, "Send Error: %s\n",
-		send_err_status_string(buf, sizeof(buf), reg));
+		    send_err_status_string(buf, sizeof(buf), reg));
 
 	for (i = 0; i < NUM_SEND_ERR_STATUS_COUNTERS; i++) {
 		if (reg & (1ull << i))
@@ -5594,7 +5771,7 @@ static void interrupt_clear_down(struct hfi1_devdata *dd,
 			u64 mask;
 
 			dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
-				eri->desc, reg);
+				   eri->desc, reg);
 			/*
 			 * Read-modify-write so any other masked bits
 			 * remain masked.
@@ -5618,14 +5795,15 @@ static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
 		interrupt_clear_down(dd, 0, eri);
 	} else {
 		dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
-			source);
+			   source);
 	}
 }
 
 static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
 {
 	return flag_string(buf, buf_len, flags,
-			sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
+			   sc_err_status_flags,
+			   ARRAY_SIZE(sc_err_status_flags));
 }
 
 /*
@@ -5650,15 +5828,15 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
 	sw_index = dd->hw_to_sw[hw_context];
 	if (sw_index >= dd->num_send_contexts) {
 		dd_dev_err(dd,
-			"out of range sw index %u for send context %u\n",
-			sw_index, hw_context);
+			   "out of range sw index %u for send context %u\n",
+			   sw_index, hw_context);
 		return;
 	}
 	sci = &dd->send_contexts[sw_index];
 	sc = sci->sc;
 	if (!sc) {
 		dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
-			sw_index, hw_context);
+			   sw_index, hw_context);
 		return;
 	}
 
@@ -5668,10 +5846,11 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
 	status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
 
 	dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
-		send_context_err_status_string(flags, sizeof(flags), status));
+		    send_context_err_status_string(flags, sizeof(flags),
+						   status));
 
 	if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
-		handle_send_egress_err_info(dd);
+		handle_send_egress_err_info(dd, sc_to_vl(dd, sw_index));
 
 	/*
 	 * Automatically restart halted kernel contexts out of interrupt
@@ -5704,6 +5883,7 @@ static void handle_sdma_eng_err(struct hfi1_devdata *dd,
 	dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
 		   sde->this_idx, source, (unsigned long long)status);
 #endif
+	sde->err_cnt++;
 	sdma_engine_error(sde, status);
 
 	/*
@@ -5752,23 +5932,22 @@ static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
 		interrupt_clear_down(dd, 0, eri);
 	else
 		dd_dev_info(dd,
-			"%s: Unimplemented/reserved interrupt %d\n",
-			__func__, source);
+			    "%s: Unimplemented/reserved interrupt %d\n",
+			    __func__, source);
 }
 
 static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
 {
-	/* source is always zero */
+	/* src_ctx is always zero */
 	struct hfi1_pportdata *ppd = dd->pport;
 	unsigned long flags;
 	u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
 
 	if (reg & QSFP_HFI0_MODPRST_N) {
-
-		dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
-				__func__);
-
 		if (!qsfp_mod_present(ppd)) {
+			dd_dev_info(dd, "%s: QSFP module removed\n",
+				    __func__);
+
 			ppd->driver_link_ready = 0;
 			/*
 			 * Cable removed, reset all our information about the
@@ -5781,14 +5960,23 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
 			 * an interrupt when a cable is inserted
 			 */
 			ppd->qsfp_info.cache_valid = 0;
-			ppd->qsfp_info.qsfp_interrupt_functional = 0;
+			ppd->qsfp_info.reset_needed = 0;
+			ppd->qsfp_info.limiting_active = 0;
 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-						flags);
-			write_csr(dd,
-					dd->hfi1_id ?
-						ASIC_QSFP2_INVERT :
-						ASIC_QSFP1_INVERT,
-				qsfp_int_mgmt);
+					       flags);
+			/* Invert the ModPresent pin now to detect plug-in */
+			write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT :
+				  ASIC_QSFP1_INVERT, qsfp_int_mgmt);
+
+			if ((ppd->offline_disabled_reason >
+			  HFI1_ODR_MASK(
+			  OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED)) ||
+			  (ppd->offline_disabled_reason ==
+			  HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
+				ppd->offline_disabled_reason =
+				HFI1_ODR_MASK(
+				OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
+
 			if (ppd->host_link_state == HLS_DN_POLL) {
 				/*
 				 * The link is still in POLL. This means
@@ -5799,28 +5987,33 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
 				queue_work(ppd->hfi1_wq, &ppd->link_down_work);
 			}
 		} else {
+			dd_dev_info(dd, "%s: QSFP module inserted\n",
+				    __func__);
+
 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 			ppd->qsfp_info.cache_valid = 0;
 			ppd->qsfp_info.cache_refresh_required = 1;
 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-						flags);
+					       flags);
 
+			/*
+			 * Stop inversion of ModPresent pin to detect
+			 * removal of the cable
+			 */
 			qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
-			write_csr(dd,
-					dd->hfi1_id ?
-						ASIC_QSFP2_INVERT :
-						ASIC_QSFP1_INVERT,
-				qsfp_int_mgmt);
+			write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_INVERT :
+				  ASIC_QSFP1_INVERT, qsfp_int_mgmt);
+
+			ppd->offline_disabled_reason =
+				HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
 		}
 	}
 
 	if (reg & QSFP_HFI0_INT_N) {
-
-		dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
-				__func__);
+		dd_dev_info(dd, "%s: Interrupt received from QSFP module\n",
+			    __func__);
 		spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 		ppd->qsfp_info.check_interrupt_flags = 1;
-		ppd->qsfp_info.qsfp_interrupt_functional = 1;
 		spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
 	}
 
@@ -5834,11 +6027,11 @@ static int request_host_lcb_access(struct hfi1_devdata *dd)
 	int ret;
 
 	ret = do_8051_command(dd, HCMD_MISC,
-		(u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
-		NULL);
+			      (u64)HCMD_MISC_REQUEST_LCB_ACCESS <<
+			      LOAD_DATA_FIELD_ID_SHIFT, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "%s: command failed with error %d\n",
-			__func__, ret);
+			   __func__, ret);
 	}
 	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
 }
@@ -5848,11 +6041,11 @@ static int request_8051_lcb_access(struct hfi1_devdata *dd)
 	int ret;
 
 	ret = do_8051_command(dd, HCMD_MISC,
-		(u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
-		NULL);
+			      (u64)HCMD_MISC_GRANT_LCB_ACCESS <<
+			      LOAD_DATA_FIELD_ID_SHIFT, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "%s: command failed with error %d\n",
-			__func__, ret);
+			   __func__, ret);
 	}
 	return ret == HCMD_SUCCESS ? 0 : -EBUSY;
 }
@@ -5864,8 +6057,8 @@ static int request_8051_lcb_access(struct hfi1_devdata *dd)
 static inline void set_host_lcb_access(struct hfi1_devdata *dd)
 {
 	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
-				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
-				| DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
+		  DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK |
+		  DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
 }
 
 /*
@@ -5875,7 +6068,7 @@ static inline void set_host_lcb_access(struct hfi1_devdata *dd)
 static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
 {
 	write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
-				DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
+		  DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
 }
 
 /*
@@ -5909,7 +6102,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
 	/* this access is valid only when the link is up */
 	if ((ppd->host_link_state & HLS_UP) == 0) {
 		dd_dev_info(dd, "%s: link state %s not up\n",
-			__func__, link_state_name(ppd->host_link_state));
+			    __func__, link_state_name(ppd->host_link_state));
 		ret = -EBUSY;
 		goto done;
 	}
@@ -5918,8 +6111,8 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
 		ret = request_host_lcb_access(dd);
 		if (ret) {
 			dd_dev_err(dd,
-				"%s: unable to acquire LCB access, err %d\n",
-				__func__, ret);
+				   "%s: unable to acquire LCB access, err %d\n",
+				   __func__, ret);
 			goto done;
 		}
 		set_host_lcb_access(dd);
@@ -5956,7 +6149,7 @@ int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
 
 	if (dd->lcb_access_count == 0) {
 		dd_dev_err(dd, "%s: LCB access count is zero.  Skipping.\n",
-			__func__);
+			   __func__);
 		goto done;
 	}
 
@@ -5965,8 +6158,8 @@ int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
 		ret = request_8051_lcb_access(dd);
 		if (ret) {
 			dd_dev_err(dd,
-				"%s: unable to release LCB access, err %d\n",
-				__func__, ret);
+				   "%s: unable to release LCB access, err %d\n",
+				   __func__, ret);
 			/* restore host access if the grant didn't work */
 			set_host_lcb_access(dd);
 			goto done;
@@ -5998,19 +6191,26 @@ static void init_lcb_access(struct hfi1_devdata *dd)
 static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
 {
 	write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
-		DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
-		| (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
-		| (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
+		  DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK |
+		  (u64)return_code <<
+		  DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT |
+		  (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
 }
 
 /*
- * Handle requests from the 8051.
+ * Handle host requests from the 8051.
+ *
+ * This is a work-queue function outside of the interrupt.
  */
-static void handle_8051_request(struct hfi1_devdata *dd)
+void handle_8051_request(struct work_struct *work)
 {
+	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+							dc_host_req_work);
+	struct hfi1_devdata *dd = ppd->dd;
 	u64 reg;
-	u16 data;
-	u8 type;
+	u16 data = 0;
+	u8 type, i, lanes, *cache = ppd->qsfp_info.cache;
+	u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS];
 
 	reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
 	if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
@@ -6031,12 +6231,46 @@ static void handle_8051_request(struct hfi1_devdata *dd)
 	case HREQ_READ_CONFIG:
 	case HREQ_SET_TX_EQ_ABS:
 	case HREQ_SET_TX_EQ_REL:
-	case HREQ_ENABLE:
 		dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
-			type);
+			    type);
 		hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
 		break;
 
+	case HREQ_ENABLE:
+		lanes = data & 0xF;
+		for (i = 0; lanes; lanes >>= 1, i++) {
+			if (!(lanes & 1))
+				continue;
+			if (data & 0x200) {
+				/* enable TX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x80)
+					cdr_ctrl_byte |= (1 << (i + 4));
+			} else {
+				/* disable TX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x8 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x80)
+					cdr_ctrl_byte &= ~(1 << (i + 4));
+			}
+
+			if (data & 0x800) {
+				/* enable RX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x40)
+					cdr_ctrl_byte |= (1 << i);
+			} else {
+				/* disable RX CDR */
+				if (cache[QSFP_MOD_PWR_OFFS] & 0x4 &&
+				    cache[QSFP_CDR_INFO_OFFS] & 0x40)
+					cdr_ctrl_byte &= ~(1 << i);
+			}
+		}
+		one_qsfp_write(ppd, dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS,
+			       &cdr_ctrl_byte, 1);
+		hreq_response(dd, HREQ_SUCCESS, data);
+		refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+		break;
+
 	case HREQ_CONFIG_DONE:
 		hreq_response(dd, HREQ_SUCCESS, 0);
 		break;
@@ -6056,11 +6290,11 @@ static void write_global_credit(struct hfi1_devdata *dd,
 				u8 vau, u16 total, u16 shared)
 {
 	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
-		((u64)total
-			<< SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
-		| ((u64)shared
-			<< SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
-		| ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
+		  ((u64)total <<
+		   SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) |
+		  ((u64)shared <<
+		   SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) |
+		  ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
 }
 
 /*
@@ -6097,7 +6331,7 @@ void reset_link_credits(struct hfi1_devdata *dd)
 
 	/* remove all previous VL credit limits */
 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
-		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
+		write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
 	write_global_credit(dd, 0, 0, 0);
 	/* reset the CM block */
@@ -6139,15 +6373,14 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
 	write_csr(dd, DC_LCB_CFG_RUN, 0);
 	/* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
-		1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
+		  1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
 	/* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
 	dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
 	reg = read_csr(dd, DCC_CFG_RESET);
-	write_csr(dd, DCC_CFG_RESET,
-		reg
-		| (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
-		| (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
-	(void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
+	write_csr(dd, DCC_CFG_RESET, reg |
+		  (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) |
+		  (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
+	(void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
 	if (!abort) {
 		udelay(1);    /* must hold for the longer of 16cclks or 20ns */
 		write_csr(dd, DCC_CFG_RESET, reg);
@@ -6176,14 +6409,18 @@ static void dc_shutdown(struct hfi1_devdata *dd)
 	spin_unlock_irqrestore(&dd->dc8051_lock, flags);
 	/* Shutdown the LCB */
 	lcb_shutdown(dd, 1);
-	/* Going to OFFLINE would have causes the 8051 to put the
+	/*
+	 * Going to OFFLINE would have causes the 8051 to put the
 	 * SerDes into reset already. Just need to shut down the 8051,
-	 * itself. */
+	 * itself.
+	 */
 	write_csr(dd, DC_DC8051_CFG_RST, 0x1);
 }
 
-/* Calling this after the DC has been brought out of reset should not
- * do any damage. */
+/*
+ * Calling this after the DC has been brought out of reset should not
+ * do any damage.
+ */
 static void dc_start(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
@@ -6199,7 +6436,7 @@ static void dc_start(struct hfi1_devdata *dd)
 	ret = wait_fm_ready(dd, TIMEOUT_8051_START);
 	if (ret) {
 		dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
-			__func__);
+			   __func__);
 	}
 	/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
 	write_csr(dd, DCC_CFG_RESET, 0x10);
@@ -6292,7 +6529,7 @@ static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
 	write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
 	/* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
 	write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
-		DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
+		  DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
 }
 
@@ -6309,8 +6546,10 @@ void handle_sma_message(struct work_struct *work)
 	u64 msg;
 	int ret;
 
-	/* msg is bytes 1-4 of the 40-bit idle message - the command code
-	   is stripped off */
+	/*
+	 * msg is bytes 1-4 of the 40-bit idle message - the command code
+	 * is stripped off
+	 */
 	ret = read_idle_sma(dd, &msg);
 	if (ret)
 		return;
@@ -6336,8 +6575,8 @@ void handle_sma_message(struct work_struct *work)
 		 *
 		 * Can activate the node.  Discard otherwise.
 		 */
-		if (ppd->host_link_state == HLS_UP_ARMED
-					&& ppd->is_active_optimize_enabled) {
+		if (ppd->host_link_state == HLS_UP_ARMED &&
+		    ppd->is_active_optimize_enabled) {
 			ppd->neighbor_normal = 1;
 			ret = set_link_state(ppd, HLS_UP_ACTIVE);
 			if (ret)
@@ -6349,8 +6588,8 @@ void handle_sma_message(struct work_struct *work)
 		break;
 	default:
 		dd_dev_err(dd,
-			"%s: received unexpected SMA idle message 0x%llx\n",
-			__func__, msg);
+			   "%s: received unexpected SMA idle message 0x%llx\n",
+			   __func__, msg);
 		break;
 	}
 }
@@ -6442,10 +6681,9 @@ static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
 
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(dd,
-				"Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
-				freeze ? "" : "un",
-				reg & ALL_FROZE,
-				freeze ? ALL_FROZE : 0ull);
+				   "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
+				   freeze ? "" : "un", reg & ALL_FROZE,
+				   freeze ? ALL_FROZE : 0ull);
 			return;
 		}
 		usleep_range(80, 120);
@@ -6475,11 +6713,17 @@ static void rxe_freeze(struct hfi1_devdata *dd)
  */
 static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
 {
+	u32 rcvmask;
 	int i;
 
 	/* enable all kernel contexts */
-	for (i = 0; i < dd->n_krcv_queues; i++)
-		hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
+	for (i = 0; i < dd->n_krcv_queues; i++) {
+		rcvmask = HFI1_RCVCTRL_CTXT_ENB;
+		/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
+		rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
+			HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
+		hfi1_rcvctrl(dd, rcvmask, i);
+	}
 
 	/* enable port */
 	add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -6564,7 +6808,7 @@ void handle_freeze(struct work_struct *work)
 void handle_link_up(struct work_struct *work)
 {
 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
-								link_up_work);
+						  link_up_work);
 	set_link_state(ppd, HLS_UP_INIT);
 
 	/* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
@@ -6583,17 +6827,20 @@ void handle_link_up(struct work_struct *work)
 	if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
 		/* oops - current speed is not enabled, bounce */
 		dd_dev_err(ppd->dd,
-			"Link speed active 0x%x is outside enabled 0x%x, downing link\n",
-			ppd->link_speed_active, ppd->link_speed_enabled);
+			   "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
+			   ppd->link_speed_active, ppd->link_speed_enabled);
 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
-			OPA_LINKDOWN_REASON_SPEED_POLICY);
+				     OPA_LINKDOWN_REASON_SPEED_POLICY);
 		set_link_state(ppd, HLS_DN_OFFLINE);
+		tune_serdes(ppd);
 		start_link(ppd);
 	}
 }
 
-/* Several pieces of LNI information were cached for SMA in ppd.
- * Reset these on link down */
+/*
+ * Several pieces of LNI information were cached for SMA in ppd.
+ * Reset these on link down
+ */
 static void reset_neighbor_info(struct hfi1_pportdata *ppd)
 {
 	ppd->neighbor_guid = 0;
@@ -6613,7 +6860,13 @@ void handle_link_down(struct work_struct *work)
 	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
 								link_down_work);
 
-	/* go offline first, then deal with reasons */
+	if ((ppd->host_link_state &
+	     (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) &&
+	     ppd->port_type == PORT_TYPE_FIXED)
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NOT_INSTALLED);
+
+	/* Go offline first, then deal with reading/writing through 8051 */
 	set_link_state(ppd, HLS_DN_OFFLINE);
 
 	lcl_reason = 0;
@@ -6633,12 +6886,16 @@ void handle_link_down(struct work_struct *work)
 	/* disable the port */
 	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
 
-	/* If there is no cable attached, turn the DC off. Otherwise,
-	 * start the link bring up. */
-	if (!qsfp_mod_present(ppd))
+	/*
+	 * If there is no cable attached, turn the DC off. Otherwise,
+	 * start the link bring up.
+	 */
+	if (!qsfp_mod_present(ppd)) {
 		dc_shutdown(ppd->dd);
-	else
+	} else {
+		tune_serdes(ppd);
 		start_link(ppd);
+	}
 }
 
 void handle_link_bounce(struct work_struct *work)
@@ -6651,10 +6908,11 @@ void handle_link_bounce(struct work_struct *work)
 	 */
 	if (ppd->host_link_state & HLS_UP) {
 		set_link_state(ppd, HLS_DN_OFFLINE);
+		tune_serdes(ppd);
 		start_link(ppd);
 	} else {
 		dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
-			__func__, link_state_name(ppd->host_link_state));
+			    __func__, link_state_name(ppd->host_link_state));
 	}
 }
 
@@ -6751,7 +7009,7 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
 	case 3: return OPA_LINK_WIDTH_3X;
 	default:
 		dd_dev_info(dd, "%s: invalid width %d, using 4\n",
-			__func__, width);
+			    __func__, width);
 		/* fall through */
 	case 4: return OPA_LINK_WIDTH_4X;
 	}
@@ -6763,6 +7021,7 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
 static const u8 bit_counts[16] = {
 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
 };
+
 static inline u8 nibble_to_count(u8 nibble)
 {
 	return bit_counts[nibble & 0xf];
@@ -6788,7 +7047,7 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
 
 	/* read the active lanes */
 	read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
-				&rx_polarity_inversion, &max_rate);
+			 &rx_polarity_inversion, &max_rate);
 	read_local_lni(dd, &enable_lane_rx);
 
 	/* convert to counts */
@@ -6800,8 +7059,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
 	 * handle_verify_cap().  The ASIC 8051 firmware does not correctly
 	 * set the max_rate field in handle_verify_cap until v0.19.
 	 */
-	if ((dd->icode == ICODE_RTL_SILICON)
-				&& (dd->dc8051_ver < dc8051_ver(0, 19))) {
+	if ((dd->icode == ICODE_RTL_SILICON) &&
+	    (dd->dc8051_ver < dc8051_ver(0, 19))) {
 		/* max_rate: 0 = 12.5G, 1 = 25G */
 		switch (max_rate) {
 		case 0:
@@ -6809,8 +7068,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
 			break;
 		default:
 			dd_dev_err(dd,
-				"%s: unexpected max rate %d, using 25Gb\n",
-				__func__, (int)max_rate);
+				   "%s: unexpected max rate %d, using 25Gb\n",
+				   __func__, (int)max_rate);
 			/* fall through */
 		case 1:
 			dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
@@ -6819,8 +7078,8 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
 	}
 
 	dd_dev_info(dd,
-		"Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
-		enable_lane_tx, tx, enable_lane_rx, rx);
+		    "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
+		    enable_lane_tx, tx, enable_lane_rx, rx);
 	*tx_width = link_width_to_bits(dd, tx);
 	*rx_width = link_width_to_bits(dd, rx);
 }
@@ -6923,13 +7182,8 @@ void handle_verify_cap(struct work_struct *work)
 	 */
 
 	read_vc_remote_phy(dd, &power_management, &continious);
-	read_vc_remote_fabric(
-		dd,
-		&vau,
-		&z,
-		&vcu,
-		&vl15buf,
-		&partner_supported_crc);
+	read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
+			      &partner_supported_crc);
 	read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
 	read_remote_device_id(dd, &device_id, &device_rev);
 	/*
@@ -6940,19 +7194,16 @@ void handle_verify_cap(struct work_struct *work)
 	/* print the active widths */
 	get_link_widths(dd, &active_tx, &active_rx);
 	dd_dev_info(dd,
-		"Peer PHY: power management 0x%x, continuous updates 0x%x\n",
-		(int)power_management, (int)continious);
+		    "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
+		    (int)power_management, (int)continious);
 	dd_dev_info(dd,
-		"Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
-		(int)vau,
-		(int)z,
-		(int)vcu,
-		(int)vl15buf,
-		(int)partner_supported_crc);
+		    "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
+		    (int)vau, (int)z, (int)vcu, (int)vl15buf,
+		    (int)partner_supported_crc);
 	dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
-		(u32)remote_tx_rate, (u32)link_widths);
+		    (u32)remote_tx_rate, (u32)link_widths);
 	dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
-		(u32)device_id, (u32)device_rev);
+		    (u32)device_id, (u32)device_rev);
 	/*
 	 * The peer vAU value just read is the peer receiver value.  HFI does
 	 * not support a transmit vAU of 0 (AU == 8).  We advertised that
@@ -6987,10 +7238,10 @@ void handle_verify_cap(struct work_struct *work)
 	reg = read_csr(dd, SEND_CM_CTRL);
 	if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
 		write_csr(dd, SEND_CM_CTRL,
-			reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
+			  reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
 	} else {
 		write_csr(dd, SEND_CM_CTRL,
-			reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
+			  reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
 	}
 
 	ppd->link_speed_active = 0;	/* invalid value */
@@ -7015,7 +7266,7 @@ void handle_verify_cap(struct work_struct *work)
 	}
 	if (ppd->link_speed_active == 0) {
 		dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
-			__func__, (int)remote_tx_rate);
+			   __func__, (int)remote_tx_rate);
 		ppd->link_speed_active = OPA_LINK_SPEED_25G;
 	}
 
@@ -7071,9 +7322,9 @@ void handle_verify_cap(struct work_struct *work)
 		read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
 		DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
 	dd_dev_info(dd,
-		"Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
-		ppd->neighbor_guid, ppd->neighbor_type,
-		ppd->mgmt_allowed, ppd->neighbor_fm_security);
+		    "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
+		    ppd->neighbor_guid, ppd->neighbor_type,
+		    ppd->mgmt_allowed, ppd->neighbor_fm_security);
 	if (ppd->mgmt_allowed)
 		add_full_mgmt_pkey(ppd);
 
@@ -7127,28 +7378,27 @@ retry:
 
 		/* bounce if not at starting active width */
 		if ((ppd->link_width_active !=
-					ppd->link_width_downgrade_tx_active)
-				|| (ppd->link_width_active !=
-					ppd->link_width_downgrade_rx_active)) {
+		     ppd->link_width_downgrade_tx_active) ||
+		    (ppd->link_width_active !=
+		     ppd->link_width_downgrade_rx_active)) {
 			dd_dev_err(ppd->dd,
-				"Link downgrade is disabled and link has downgraded, downing link\n");
+				   "Link downgrade is disabled and link has downgraded, downing link\n");
 			dd_dev_err(ppd->dd,
-				"  original 0x%x, tx active 0x%x, rx active 0x%x\n",
-				ppd->link_width_active,
-				ppd->link_width_downgrade_tx_active,
-				ppd->link_width_downgrade_rx_active);
+				   "  original 0x%x, tx active 0x%x, rx active 0x%x\n",
+				   ppd->link_width_active,
+				   ppd->link_width_downgrade_tx_active,
+				   ppd->link_width_downgrade_rx_active);
 			do_bounce = 1;
 		}
-	} else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
-		|| (lwde & ppd->link_width_downgrade_rx_active) == 0) {
+	} else if ((lwde & ppd->link_width_downgrade_tx_active) == 0 ||
+		   (lwde & ppd->link_width_downgrade_rx_active) == 0) {
 		/* Tx or Rx is outside the enabled policy */
 		dd_dev_err(ppd->dd,
-			"Link is outside of downgrade allowed, downing link\n");
+			   "Link is outside of downgrade allowed, downing link\n");
 		dd_dev_err(ppd->dd,
-			"  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
-			lwde,
-			ppd->link_width_downgrade_tx_active,
-			ppd->link_width_downgrade_rx_active);
+			   "  enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
+			   lwde, ppd->link_width_downgrade_tx_active,
+			   ppd->link_width_downgrade_rx_active);
 		do_bounce = 1;
 	}
 
@@ -7157,8 +7407,9 @@ done:
 
 	if (do_bounce) {
 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
-		  OPA_LINKDOWN_REASON_WIDTH_POLICY);
+				     OPA_LINKDOWN_REASON_WIDTH_POLICY);
 		set_link_state(ppd, HLS_DN_OFFLINE);
+		tune_serdes(ppd);
 		start_link(ppd);
 	}
 }
@@ -7239,9 +7490,10 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
 			    & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
 				queue_link_down = 1;
 				dd_dev_info(dd, "Link error: %s\n",
-					dc8051_info_err_string(buf,
-						sizeof(buf),
-						err & FAILED_LNI));
+					    dc8051_info_err_string(buf,
+								   sizeof(buf),
+								   err &
+								   FAILED_LNI));
 			}
 			err &= ~(u64)FAILED_LNI;
 		}
@@ -7253,7 +7505,8 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
 		if (err) {
 			/* report remaining errors, but do not do anything */
 			dd_dev_err(dd, "8051 info error: %s\n",
-				dc8051_info_err_string(buf, sizeof(buf), err));
+				   dc8051_info_err_string(buf, sizeof(buf),
+							  err));
 		}
 
 		/*
@@ -7281,7 +7534,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
 			host_msg &= ~(u64)LINKUP_ACHIEVED;
 		}
 		if (host_msg & EXT_DEVICE_CFG_REQ) {
-			handle_8051_request(dd);
+			queue_work(ppd->hfi1_wq, &ppd->dc_host_req_work);
 			host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
 		}
 		if (host_msg & VERIFY_CAP_FRAME) {
@@ -7306,8 +7559,9 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
 		if (host_msg) {
 			/* report remaining messages, but do not do anything */
 			dd_dev_info(dd, "8051 info host message: %s\n",
-				dc8051_info_host_msg_string(buf, sizeof(buf),
-					host_msg));
+				    dc8051_info_host_msg_string(buf,
+								sizeof(buf),
+								host_msg));
 		}
 
 		reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
@@ -7320,25 +7574,27 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
 		 */
 		dd_dev_err(dd, "Lost 8051 heartbeat\n");
 		write_csr(dd, DC_DC8051_ERR_EN,
-			read_csr(dd, DC_DC8051_ERR_EN)
-			  & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
+			  read_csr(dd, DC_DC8051_ERR_EN) &
+			  ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
 
 		reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
 	}
 	if (reg) {
 		/* report the error, but do not do anything */
 		dd_dev_err(dd, "8051 error: %s\n",
-			dc8051_err_string(buf, sizeof(buf), reg));
+			   dc8051_err_string(buf, sizeof(buf), reg));
 	}
 
 	if (queue_link_down) {
-		/* if the link is already going down or disabled, do not
-		 * queue another */
-		if ((ppd->host_link_state
-				    & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
-				|| ppd->link_enabled == 0) {
+		/*
+		 * if the link is already going down or disabled, do not
+		 * queue another
+		 */
+		if ((ppd->host_link_state &
+		    (HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) ||
+		    ppd->link_enabled == 0) {
 			dd_dev_info(dd, "%s: not queuing link down\n",
-				__func__);
+				    __func__);
 		} else {
 			queue_work(ppd->hfi1_wq, &ppd->link_down_work);
 		}
@@ -7480,8 +7736,10 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 			/* set status bit */
 			dd->err_info_rcvport.status_and_code |=
 				OPA_EI_STATUS_SMASK;
-			/* save first 2 flits in the packet that caused
-			 * the error */
+			/*
+			 * save first 2 flits in the packet that caused
+			 * the error
+			 */
 			 dd->err_info_rcvport.packet_flit1 = hdr0;
 			 dd->err_info_rcvport.packet_flit2 = hdr1;
 		}
@@ -7514,7 +7772,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 		/* just report this */
 		dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
 		dd_dev_info(dd, "           hdr0 0x%llx, hdr1 0x%llx\n",
-			hdr0, hdr1);
+			    hdr0, hdr1);
 
 		reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
 	}
@@ -7533,7 +7791,7 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	/* report any remaining errors */
 	if (reg)
 		dd_dev_info(dd, "DCC Error: %s\n",
-			dcc_err_string(buf, sizeof(buf), reg));
+			    dcc_err_string(buf, sizeof(buf), reg));
 
 	if (lcl_reason == 0)
 		lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
@@ -7550,7 +7808,7 @@ static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 	char buf[96];
 
 	dd_dev_info(dd, "LCB Error: %s\n",
-		lcb_err_string(buf, sizeof(buf), reg));
+		    lcb_err_string(buf, sizeof(buf), reg));
 }
 
 /*
@@ -7640,7 +7898,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
 		err_detail = "out of range";
 	}
 	dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
-		err_detail, source);
+		   err_detail, source);
 }
 
 /*
@@ -7666,7 +7924,7 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
 		err_detail = "out of range";
 	}
 	dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
-		err_detail, source);
+		   err_detail, source);
 }
 
 /*
@@ -7677,12 +7935,14 @@ static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
 	char name[64];
 
 	dd_dev_err(dd, "unexpected %s interrupt\n",
-				is_reserved_name(name, sizeof(name), source));
+		   is_reserved_name(name, sizeof(name), source));
 }
 
 static const struct is_table is_table[] = {
-/* start		     end
-				name func		interrupt func */
+/*
+ * start		 end
+ *				name func		interrupt func
+ */
 { IS_GENERAL_ERR_START,  IS_GENERAL_ERR_END,
 				is_misc_err_name,	is_misc_err_int },
 { IS_SDMAENG_ERR_START,  IS_SDMAENG_ERR_END,
@@ -7753,7 +8013,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
 
 	/* phase 2: call the appropriate handler */
 	for_each_set_bit(bit, (unsigned long *)&regs[0],
-						CCE_NUM_INT_CSRS*64) {
+			 CCE_NUM_INT_CSRS * 64) {
 		is_interrupt(dd, bit);
 	}
 
@@ -7776,27 +8036,27 @@ static irqreturn_t sdma_interrupt(int irq, void *data)
 
 	/* This read_csr is really bad in the hot path */
 	status = read_csr(dd,
-			CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
-			& sde->imask;
+			  CCE_INT_STATUS + (8 * (IS_SDMA_START / 64)))
+			  & sde->imask;
 	if (likely(status)) {
 		/* clear the interrupt(s) */
 		write_csr(dd,
-			CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
-			status);
+			  CCE_INT_CLEAR + (8 * (IS_SDMA_START / 64)),
+			  status);
 
 		/* handle the interrupt(s) */
 		sdma_engine_interrupt(sde, status);
 	} else
 		dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
-			sde->this_idx);
+			   sde->this_idx);
 
 	return IRQ_HANDLED;
 }
 
 /*
- * Clear the receive interrupt, forcing the write and making sure
- * we have data from the chip, pushing everything in front of it
- * back to the host.
+ * Clear the receive interrupt.  Use a read of the interrupt clear CSR
+ * to insure that the write completed.  This does NOT guarantee that
+ * queued DMA writes to memory from the chip are pushed.
  */
 static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
 {
@@ -7810,27 +8070,45 @@ static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
 }
 
 /* force the receive interrupt */
-static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
+void force_recv_intr(struct hfi1_ctxtdata *rcd)
 {
 	write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
 }
 
-/* return non-zero if a packet is present */
+/*
+ * Return non-zero if a packet is present.
+ *
+ * This routine is called when rechecking for packets after the RcvAvail
+ * interrupt has been cleared down.  First, do a quick check of memory for
+ * a packet present.  If not found, use an expensive CSR read of the context
+ * tail to determine the actual tail.  The CSR read is necessary because there
+ * is no method to push pending DMAs to memory other than an interrupt and we
+ * are trying to determine if we need to force an interrupt.
+ */
 static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
 {
+	u32 tail;
+	int present;
+
 	if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
-		return (rcd->seq_cnt ==
+		present = (rcd->seq_cnt ==
 				rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
+	else /* is RDMA rtail */
+		present = (rcd->head != get_rcvhdrtail(rcd));
+
+	if (present)
+		return 1;
 
-	/* else is RDMA rtail */
-	return (rcd->head != get_rcvhdrtail(rcd));
+	/* fall back to a CSR read, correct indpendent of DMA_RTAIL */
+	tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
+	return rcd->head != tail;
 }
 
 /*
  * Receive packet IRQ handler.  This routine expects to be on its own IRQ.
  * This routine will try to handle packets immediately (latency), but if
  * it finds too many, it will invoke the thread handler (bandwitdh).  The
- * chip receive interupt is *not* cleared down until this or the thread (if
+ * chip receive interrupt is *not* cleared down until this or the thread (if
  * invoked) is finished.  The intent is to avoid extra interrupts while we
  * are processing packets anyway.
  */
@@ -7843,6 +8121,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
 
 	trace_hfi1_receive_interrupt(dd, rcd->ctxt);
 	this_cpu_inc(*dd->int_counter);
+	aspm_ctx_disable(rcd);
 
 	/* receive interrupt remains blocked while processing packets */
 	disposition = rcd->do_interrupt(rcd, 0);
@@ -7909,7 +8188,7 @@ u32 read_physical_state(struct hfi1_devdata *dd)
 				& DC_DC8051_STS_CUR_STATE_PORT_MASK;
 }
 
-static u32 read_logical_state(struct hfi1_devdata *dd)
+u32 read_logical_state(struct hfi1_devdata *dd)
 {
 	u64 reg;
 
@@ -8157,8 +8436,8 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
 	return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
 }
 
-static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
-			    u8 lane_id, u32 config_data)
+int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
+		     u8 lane_id, u32 config_data)
 {
 	u64 data;
 	int ret;
@@ -8169,8 +8448,8 @@ static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
 	ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd,
-			"load 8051 config: field id %d, lane %d, err %d\n",
-			(int)field_id, (int)lane_id, ret);
+			   "load 8051 config: field id %d, lane %d, err %d\n",
+			   (int)field_id, (int)lane_id, ret);
 	}
 	return ret;
 }
@@ -8180,8 +8459,8 @@ static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
  * set the result, even on error.
  * Return 0 on success, -errno on failure
  */
-static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
-			    u32 *result)
+int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
+		     u32 *result)
 {
 	u64 big_data;
 	u32 addr;
@@ -8207,7 +8486,7 @@ static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
 	} else {
 		*result = 0;
 		dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
-			__func__, lane_id, field_id);
+			   __func__, lane_id, field_id);
 	}
 
 	return ret;
@@ -8244,7 +8523,7 @@ static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
 	u32 frame;
 
 	read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
-				&frame);
+			 &frame);
 	*misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
 	*flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
 	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
@@ -8326,7 +8605,7 @@ static void read_vc_remote_link_width(struct hfi1_devdata *dd,
 	u32 frame;
 
 	read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
-				&frame);
+			 &frame);
 	*remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
 				& REMOTE_TX_RATE_MASK;
 	*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
@@ -8366,7 +8645,7 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
 	*link_quality = 0;
 	if (dd->pport->host_link_state & HLS_UP) {
 		ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
-					&frame);
+				       &frame);
 		if (ret == 0)
 			*link_quality = (frame >> LINK_QUALITY_SHIFT)
 						& LINK_QUALITY_MASK;
@@ -8426,10 +8705,9 @@ static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
 	for (lane = 0; lane < 4; lane++) {
 		ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
 		if (ret) {
-			dd_dev_err(
-				dd,
-				"Unable to read lane %d firmware details\n",
-				lane);
+			dd_dev_err(dd,
+				   "Unable to read lane %d firmware details\n",
+				   lane);
 			continue;
 		}
 		version = (frame >> SPICO_ROM_VERSION_SHIFT)
@@ -8437,8 +8715,8 @@ static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
 		prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
 					& SPICO_ROM_PROD_ID_MASK;
 		dd_dev_info(dd,
-			"Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
-			lane, version, prod_id);
+			    "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
+			    lane, version, prod_id);
 	}
 }
 
@@ -8451,11 +8729,10 @@ static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
 {
 	int ret;
 
-	ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
-		type, data_out);
+	ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG, type, data_out);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "read idle message: type %d, err %d\n",
-			(u32)type, ret);
+			   (u32)type, ret);
 		return -EINVAL;
 	}
 	dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
@@ -8472,8 +8749,8 @@ static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
  */
 static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
 {
-	return read_idle_message(dd,
-			(u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
+	return read_idle_message(dd, (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT,
+				 data);
 }
 
 /*
@@ -8489,7 +8766,7 @@ static int send_idle_message(struct hfi1_devdata *dd, u64 data)
 	ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
-			data, ret);
+			   data, ret);
 		return -EINVAL;
 	}
 	return 0;
@@ -8504,8 +8781,8 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message)
 {
 	u64 data;
 
-	data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
-		| ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
+	data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT) |
+		((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
 	return send_idle_message(dd, data);
 }
 
@@ -8527,7 +8804,7 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
 		/* LCB_CFG_LOOPBACK.VAL = 2 */
 		/* LCB_CFG_LANE_WIDTH.VAL = 0 */
 		write_csr(dd, DC_LCB_CFG_LOOPBACK,
-			IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
+			  IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
 		write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
 	}
 
@@ -8539,25 +8816,24 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
 	if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
 		/* LCB_CFG_RUN.EN = 1 */
 		write_csr(dd, DC_LCB_CFG_RUN,
-			1ull << DC_LCB_CFG_RUN_EN_SHIFT);
+			  1ull << DC_LCB_CFG_RUN_EN_SHIFT);
 
 		/* watch LCB_STS_LINK_TRANSFER_ACTIVE */
 		timeout = jiffies + msecs_to_jiffies(10);
 		while (1) {
-			reg = read_csr(dd,
-				DC_LCB_STS_LINK_TRANSFER_ACTIVE);
+			reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
 			if (reg)
 				break;
 			if (time_after(jiffies, timeout)) {
 				dd_dev_err(dd,
-					"timeout waiting for LINK_TRANSFER_ACTIVE\n");
+					   "timeout waiting for LINK_TRANSFER_ACTIVE\n");
 				return -ETIMEDOUT;
 			}
 			udelay(2);
 		}
 
 		write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
-			1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
+			  1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
 	}
 
 	if (!loopback) {
@@ -8569,10 +8845,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
 		 * done with LCB set up before resuming.
 		 */
 		dd_dev_err(dd,
-			"Pausing for peer to be finished with LCB set up\n");
+			   "Pausing for peer to be finished with LCB set up\n");
 		msleep(5000);
-		dd_dev_err(dd,
-			"Continuing with quick linkup\n");
+		dd_dev_err(dd, "Continuing with quick linkup\n");
 	}
 
 	write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
@@ -8586,8 +8861,8 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
 	ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
 	if (ret != HCMD_SUCCESS) {
 		dd_dev_err(dd,
-			"%s: set physical link state to quick LinkUp failed with return %d\n",
-			__func__, ret);
+			   "%s: set physical link state to quick LinkUp failed with return %d\n",
+			   __func__, ret);
 
 		set_host_lcb_access(dd);
 		write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
@@ -8612,8 +8887,8 @@ static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
 	if (ret == HCMD_SUCCESS)
 		return 0;
 	dd_dev_err(dd,
-		"Set physical link state to SerDes Loopback failed with return %d\n",
-		ret);
+		   "Set physical link state to SerDes Loopback failed with return %d\n",
+		   ret);
 	if (ret >= 0)
 		ret = -EINVAL;
 	return ret;
@@ -8628,7 +8903,7 @@ static int init_loopback(struct hfi1_devdata *dd)
 
 	/* all loopbacks should disable self GUID check */
 	write_csr(dd, DC_DC8051_CFG_MODE,
-		(read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
+		  (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
 
 	/*
 	 * The simulator has only one loopback option - LCB.  Switch
@@ -8636,10 +8911,9 @@ static int init_loopback(struct hfi1_devdata *dd)
 	 *
 	 * Accept all valid loopback values.
 	 */
-	if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
-		&& (loopback == LOOPBACK_SERDES
-			|| loopback == LOOPBACK_LCB
-			|| loopback == LOOPBACK_CABLE)) {
+	if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR) &&
+	    (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
+	     loopback == LOOPBACK_CABLE)) {
 		loopback = LOOPBACK_LCB;
 		quick_linkup = 1;
 		return 0;
@@ -8660,7 +8934,7 @@ static int init_loopback(struct hfi1_devdata *dd)
 		/* not supported in emulation due to emulation RTL changes */
 		if (dd->icode == ICODE_FPGA_EMULATION) {
 			dd_dev_err(dd,
-				"LCB loopback not supported in emulation\n");
+				   "LCB loopback not supported in emulation\n");
 			return -EINVAL;
 		}
 		return 0;
@@ -8687,10 +8961,10 @@ static u16 opa_to_vc_link_widths(u16 opa_widths)
 		u16 from;
 		u16 to;
 	} opa_link_xlate[] = {
-		{ OPA_LINK_WIDTH_1X, 1 << (1-1)  },
-		{ OPA_LINK_WIDTH_2X, 1 << (2-1)  },
-		{ OPA_LINK_WIDTH_3X, 1 << (3-1)  },
-		{ OPA_LINK_WIDTH_4X, 1 << (4-1)  },
+		{ OPA_LINK_WIDTH_1X, 1 << (1 - 1)  },
+		{ OPA_LINK_WIDTH_2X, 1 << (2 - 1)  },
+		{ OPA_LINK_WIDTH_3X, 1 << (3 - 1)  },
+		{ OPA_LINK_WIDTH_4X, 1 << (4 - 1)  },
 	};
 
 	for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
@@ -8716,7 +8990,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
 
 	/* set the local tx rate - need to read-modify-write */
 	ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
-		&rx_polarity_inversion, &ppd->local_tx_rate);
+			       &rx_polarity_inversion, &ppd->local_tx_rate);
 	if (ret)
 		goto set_local_link_attributes_fail;
 
@@ -8737,15 +9011,16 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
 
 	enable_lane_tx = 0xF; /* enable all four lanes */
 	ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
-		     rx_polarity_inversion, ppd->local_tx_rate);
+				rx_polarity_inversion, ppd->local_tx_rate);
 	if (ret != HCMD_SUCCESS)
 		goto set_local_link_attributes_fail;
 
 	/*
 	 * DC supports continuous updates.
 	 */
-	ret = write_vc_local_phy(dd, 0 /* no power management */,
-				     1 /* continuous updates */);
+	ret = write_vc_local_phy(dd,
+				 0 /* no power management */,
+				 1 /* continuous updates */);
 	if (ret != HCMD_SUCCESS)
 		goto set_local_link_attributes_fail;
 
@@ -8756,7 +9031,8 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
 		goto set_local_link_attributes_fail;
 
 	ret = write_vc_local_link_width(dd, 0, 0,
-		     opa_to_vc_link_widths(ppd->link_width_enabled));
+					opa_to_vc_link_widths(
+						ppd->link_width_enabled));
 	if (ret != HCMD_SUCCESS)
 		goto set_local_link_attributes_fail;
 
@@ -8767,8 +9043,8 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
 
 set_local_link_attributes_fail:
 	dd_dev_err(dd,
-		"Failed to set local link attributes, return 0x%x\n",
-		ret);
+		   "Failed to set local link attributes, return 0x%x\n",
+		   ret);
 	return ret;
 }
 
@@ -8781,54 +9057,101 @@ int start_link(struct hfi1_pportdata *ppd)
 {
 	if (!ppd->link_enabled) {
 		dd_dev_info(ppd->dd,
-			"%s: stopping link start because link is disabled\n",
-			__func__);
+			    "%s: stopping link start because link is disabled\n",
+			    __func__);
 		return 0;
 	}
 	if (!ppd->driver_link_ready) {
 		dd_dev_info(ppd->dd,
-			"%s: stopping link start because driver is not ready\n",
-			__func__);
+			    "%s: stopping link start because driver is not ready\n",
+			    __func__);
 		return 0;
 	}
 
 	if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
-			loopback == LOOPBACK_LCB ||
-			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
+	    loopback == LOOPBACK_LCB ||
+	    ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
 		return set_link_state(ppd, HLS_DN_POLL);
 
 	dd_dev_info(ppd->dd,
-		"%s: stopping link start because no cable is present\n",
-		__func__);
+		    "%s: stopping link start because no cable is present\n",
+		    __func__);
 	return -EAGAIN;
 }
 
-static void reset_qsfp(struct hfi1_pportdata *ppd)
+static void wait_for_qsfp_init(struct hfi1_pportdata *ppd)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u64 mask;
+	unsigned long timeout;
+
+	/*
+	 * Check for QSFP interrupt for t_init (SFF 8679)
+	 */
+	timeout = jiffies + msecs_to_jiffies(2000);
+	while (1) {
+		mask = read_csr(dd, dd->hfi1_id ?
+				ASIC_QSFP2_IN : ASIC_QSFP1_IN);
+		if (!(mask & QSFP_HFI0_INT_N)) {
+			write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR :
+				  ASIC_QSFP1_CLEAR, QSFP_HFI0_INT_N);
+			break;
+		}
+		if (time_after(jiffies, timeout)) {
+			dd_dev_info(dd, "%s: No IntN detected, reset complete\n",
+				    __func__);
+			break;
+		}
+		udelay(2);
+	}
+}
+
+static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u64 mask;
+
+	mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK);
+	if (enable)
+		mask |= (u64)QSFP_HFI0_INT_N;
+	else
+		mask &= ~(u64)QSFP_HFI0_INT_N;
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
+}
+
+void reset_qsfp(struct hfi1_pportdata *ppd)
 {
 	struct hfi1_devdata *dd = ppd->dd;
 	u64 mask, qsfp_mask;
 
+	/* Disable INT_N from triggering QSFP interrupts */
+	set_qsfp_int_n(ppd, 0);
+
+	/* Reset the QSFP */
 	mask = (u64)QSFP_HFI0_RESET_N;
-	qsfp_mask = read_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
+	qsfp_mask = read_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
 	qsfp_mask |= mask;
-	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
-		qsfp_mask);
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE, qsfp_mask);
 
 	qsfp_mask = read_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
+			     dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
 	qsfp_mask &= ~mask;
 	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
-		qsfp_mask);
+		  dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask);
 
 	udelay(10);
 
 	qsfp_mask |= mask;
 	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
-		qsfp_mask);
+		  dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT, qsfp_mask);
+
+	wait_for_qsfp_init(ppd);
+
+	/*
+	 * Allow INT_N to trigger the QSFP interrupt to watch
+	 * for alarms and warnings
+	 */
+	set_qsfp_int_n(ppd, 1);
 }
 
 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
@@ -8837,102 +9160,86 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
 	struct hfi1_devdata *dd = ppd->dd;
 
 	if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
-		(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP cable on fire\n",
-			__func__);
+	    (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
+		dd_dev_info(dd, "%s: QSFP cable on fire\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
-		(qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP cable temperature too low\n",
-			__func__);
+	    (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
+		dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
-		(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP supply voltage too high\n",
-			__func__);
+	    (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
+		dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
-		(qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
-		dd_dev_info(dd,
-			"%s: QSFP supply voltage too low\n",
-			__func__);
+	    (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
+		dd_dev_info(dd, "%s: QSFP supply voltage too low\n",
+			    __func__);
 
 	/* Byte 2 is vendor specific */
 
 	if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 1/2 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 1/2 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 3/4 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable RX channel 3/4 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
-		(qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 bias too high\n",
-			__func__);
+	    (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
-		(qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 bias too low\n",
-			__func__);
+	    (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
-		(qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 bias too high\n",
-			__func__);
+	    (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
-		(qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 bias too low\n",
-			__func__);
+	    (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 1/2 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
-		(qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 power too high\n",
-			__func__);
+	    (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n",
+			    __func__);
 
 	if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
-		(qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
-		dd_dev_info(dd,
-			"%s: Cable TX channel 3/4 power too low\n",
-			__func__);
+	    (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
+		dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n",
+			    __func__);
 
 	/* Bytes 9-10 and 11-12 are reserved */
 	/* Bytes 13-15 are vendor specific */
@@ -8940,35 +9247,8 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
 	return 0;
 }
 
-static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
-{
-	refresh_qsfp_cache(ppd, &ppd->qsfp_info);
-
-	return 0;
-}
-
-static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
-{
-	struct hfi1_devdata *dd = ppd->dd;
-	u8 qsfp_interrupt_status = 0;
-
-	if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
-		!= 1) {
-		dd_dev_info(dd,
-			"%s: Failed to read status of QSFP module\n",
-			__func__);
-		return -EIO;
-	}
-
-	/* We don't care about alarms & warnings with a non-functional INT_N */
-	if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
-		do_pre_lni_host_behaviors(ppd);
-
-	return 0;
-}
-
 /* This routine will only be scheduled if the QSFP module is present */
-static void qsfp_event(struct work_struct *work)
+void qsfp_event(struct work_struct *work)
 {
 	struct qsfp_data *qd;
 	struct hfi1_pportdata *ppd;
@@ -8990,76 +9270,75 @@ static void qsfp_event(struct work_struct *work)
 	dc_start(dd);
 
 	if (qd->cache_refresh_required) {
-		msleep(3000);
-		reset_qsfp(ppd);
+		set_qsfp_int_n(ppd, 0);
 
-		/* Check for QSFP interrupt after t_init (SFF 8679)
-		 * + extra
+		wait_for_qsfp_init(ppd);
+
+		/*
+		 * Allow INT_N to trigger the QSFP interrupt to watch
+		 * for alarms and warnings
 		 */
-		msleep(3000);
-		if (!qd->qsfp_interrupt_functional) {
-			if (do_qsfp_intr_fallback(ppd) < 0)
-				dd_dev_info(dd, "%s: QSFP fallback failed\n",
-					__func__);
-			ppd->driver_link_ready = 1;
-			start_link(ppd);
-		}
+		set_qsfp_int_n(ppd, 1);
+
+		tune_serdes(ppd);
+
+		start_link(ppd);
 	}
 
 	if (qd->check_interrupt_flags) {
 		u8 qsfp_interrupt_status[16] = {0,};
 
-		if (qsfp_read(ppd, dd->hfi1_id, 6,
-			      &qsfp_interrupt_status[0], 16) != 16) {
+		if (one_qsfp_read(ppd, dd->hfi1_id, 6,
+				  &qsfp_interrupt_status[0], 16) != 16) {
 			dd_dev_info(dd,
-				"%s: Failed to read status of QSFP module\n",
-				__func__);
+				    "%s: Failed to read status of QSFP module\n",
+				    __func__);
 		} else {
 			unsigned long flags;
-			u8 data_status;
 
+			handle_qsfp_error_conditions(
+					ppd, qsfp_interrupt_status);
 			spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 			ppd->qsfp_info.check_interrupt_flags = 0;
 			spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
-								flags);
-
-			if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
-				 != 1) {
-				dd_dev_info(dd,
-				"%s: Failed to read status of QSFP module\n",
-					__func__);
-			}
-			if (!(data_status & QSFP_DATA_NOT_READY)) {
-				do_pre_lni_host_behaviors(ppd);
-				start_link(ppd);
-			} else
-				handle_qsfp_error_conditions(ppd,
-						qsfp_interrupt_status);
+					       flags);
 		}
 	}
 }
 
-void init_qsfp(struct hfi1_pportdata *ppd)
+static void init_qsfp_int(struct hfi1_devdata *dd)
 {
-	struct hfi1_devdata *dd = ppd->dd;
-	u64 qsfp_mask;
+	struct hfi1_pportdata *ppd = dd->pport;
+	u64 qsfp_mask, cce_int_mask;
+	const int qsfp1_int_smask = QSFP1_INT % 64;
+	const int qsfp2_int_smask = QSFP2_INT % 64;
 
-	if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
-			ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
-		ppd->driver_link_ready = 1;
-		return;
+	/*
+	 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
+	 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
+	 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
+	 * the index of the appropriate CSR in the CCEIntMask CSR array
+	 */
+	cce_int_mask = read_csr(dd, CCE_INT_MASK +
+				(8 * (QSFP1_INT / 64)));
+	if (dd->hfi1_id) {
+		cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
+		write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
+			  cce_int_mask);
+	} else {
+		cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
+		write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
+			  cce_int_mask);
 	}
 
-	ppd->qsfp_info.ppd = ppd;
-	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
-
 	qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
 	/* Clear current status to avoid spurious interrupts */
-	write_csr(dd,
-			dd->hfi1_id ?
-				ASIC_QSFP2_CLEAR :
-				ASIC_QSFP1_CLEAR,
-		qsfp_mask);
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_CLEAR : ASIC_QSFP1_CLEAR,
+		  qsfp_mask);
+	write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
+		  qsfp_mask);
+
+	set_qsfp_int_n(ppd, 0);
 
 	/* Handle active low nature of INT_N and MODPRST_N pins */
 	if (qsfp_mod_present(ppd))
@@ -9067,29 +9346,6 @@ void init_qsfp(struct hfi1_pportdata *ppd)
 	write_csr(dd,
 		  dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
 		  qsfp_mask);
-
-	/* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
-	qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
-	write_csr(dd,
-		dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
-		qsfp_mask);
-
-	if (qsfp_mod_present(ppd)) {
-		msleep(3000);
-		reset_qsfp(ppd);
-
-		/* Check for QSFP interrupt after t_init (SFF 8679)
-		 * + extra
-		 */
-		msleep(3000);
-		if (!ppd->qsfp_info.qsfp_interrupt_functional) {
-			if (do_qsfp_intr_fallback(ppd) < 0)
-				dd_dev_info(dd,
-					"%s: QSFP fallback failed\n",
-					__func__);
-			ppd->driver_link_ready = 1;
-		}
-	}
 }
 
 /*
@@ -9097,6 +9353,10 @@ void init_qsfp(struct hfi1_pportdata *ppd)
  */
 static void init_lcb(struct hfi1_devdata *dd)
 {
+	/* simulator does not correctly handle LCB cclk loopback, skip */
+	if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
+		return;
+
 	/* the DC has been reset earlier in the driver load */
 
 	/* set LCB for cclk loopback on the port */
@@ -9125,8 +9385,6 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
 		ppd->guid = guid;
 	}
 
-	/* the link defaults to enabled */
-	ppd->link_enabled = 1;
 	/* Set linkinit_reason on power up per OPA spec */
 	ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
 
@@ -9139,6 +9397,12 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
 			return ret;
 	}
 
+	/* tune the SERDES to a ballpark setting for
+	 * optimal signal and bit error rate
+	 * Needs to be done before starting the link
+	 */
+	tune_serdes(ppd);
+
 	return start_link(ppd);
 }
 
@@ -9156,8 +9420,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
 	ppd->driver_link_ready = 0;
 	ppd->link_enabled = 0;
 
+	ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
 	set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
-	  OPA_LINKDOWN_REASON_SMA_DISABLED);
+			     OPA_LINKDOWN_REASON_SMA_DISABLED);
 	set_link_state(ppd, HLS_DN_OFFLINE);
 
 	/* disable the port */
@@ -9171,14 +9437,14 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd)
 
 	ppd = (struct hfi1_pportdata *)(dd + 1);
 	for (i = 0; i < dd->num_pports; i++, ppd++) {
-		ppd->ibport_data.rc_acks = NULL;
-		ppd->ibport_data.rc_qacks = NULL;
-		ppd->ibport_data.rc_acks = alloc_percpu(u64);
-		ppd->ibport_data.rc_qacks = alloc_percpu(u64);
-		ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
-		if ((ppd->ibport_data.rc_acks == NULL) ||
-		    (ppd->ibport_data.rc_delayed_comp == NULL) ||
-		    (ppd->ibport_data.rc_qacks == NULL))
+		ppd->ibport_data.rvp.rc_acks = NULL;
+		ppd->ibport_data.rvp.rc_qacks = NULL;
+		ppd->ibport_data.rvp.rc_acks = alloc_percpu(u64);
+		ppd->ibport_data.rvp.rc_qacks = alloc_percpu(u64);
+		ppd->ibport_data.rvp.rc_delayed_comp = alloc_percpu(u64);
+		if (!ppd->ibport_data.rvp.rc_acks ||
+		    !ppd->ibport_data.rvp.rc_delayed_comp ||
+		    !ppd->ibport_data.rvp.rc_qacks)
 			return -ENOMEM;
 	}
 
@@ -9213,8 +9479,8 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
 		pa = 0;
 	} else if (type > PT_INVALID) {
 		dd_dev_err(dd,
-			"unexpected receive array type %u for index %u, not handled\n",
-			type, index);
+			   "unexpected receive array type %u for index %u, not handled\n",
+			   type, index);
 		goto done;
 	}
 
@@ -9429,12 +9695,15 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 	/* all kernel receive contexts have the same hdrqentsize */
 	for (i = 0; i < ppd->vls_supported; i++) {
 		sc_set_cr_threshold(dd->vld[i].sc,
-			sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
-				dd->rcd[0]->rcvhdrqentsize));
+				    sc_mtu_to_threshold(dd->vld[i].sc,
+							dd->vld[i].mtu,
+							dd->rcd[0]->
+							rcvhdrqentsize));
 	}
 	sc_set_cr_threshold(dd->vld[15].sc,
-		sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
-			dd->rcd[0]->rcvhdrqentsize));
+			    sc_mtu_to_threshold(dd->vld[15].sc,
+						dd->vld[15].mtu,
+						dd->rcd[0]->rcvhdrqentsize));
 
 	/* Adjust maximum MTU for the port in DC */
 	dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
@@ -9460,7 +9729,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
 	c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
 		| DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
 	c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
-			<< DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
+			<< DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT) |
 	      ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
 			<< DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
 	write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
@@ -9495,8 +9764,8 @@ static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
 			break;
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(dd,
-				"timeout waiting for phy link state 0x%x, current state is 0x%x\n",
-				state, curr_state);
+				   "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
+				   state, curr_state);
 			return -ETIMEDOUT;
 		}
 		usleep_range(1950, 2050); /* sleep 2ms-ish */
@@ -9539,17 +9808,18 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 
 	if (do_transition) {
 		ret = set_physical_link_state(dd,
-			PLS_OFFLINE | (rem_reason << 8));
+					      (rem_reason << 8) | PLS_OFFLINE);
 
 		if (ret != HCMD_SUCCESS) {
 			dd_dev_err(dd,
-				"Failed to transition to Offline link state, return %d\n",
-				ret);
+				   "Failed to transition to Offline link state, return %d\n",
+				   ret);
 			return -EINVAL;
 		}
-		if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
+		if (ppd->offline_disabled_reason ==
+				HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
 			ppd->offline_disabled_reason =
-			OPA_LINKDOWN_REASON_TRANSIENT;
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
 	}
 
 	if (do_wait) {
@@ -9570,6 +9840,22 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 	write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
 	ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
 
+	if (ppd->port_type == PORT_TYPE_QSFP &&
+	    ppd->qsfp_info.limiting_active &&
+	    qsfp_mod_present(ppd)) {
+		int ret;
+
+		ret = acquire_chip_resource(dd, qsfp_resource(dd), QSFP_WAIT);
+		if (ret == 0) {
+			set_qsfp_tx(ppd, 0);
+			release_chip_resource(dd, qsfp_resource(dd));
+		} else {
+			/* not fatal, but should warn */
+			dd_dev_err(dd,
+				   "Unable to acquire lock to turn off QSFP TX\n");
+		}
+	}
+
 	/*
 	 * The LNI has a mandatory wait time after the physical state
 	 * moves to Offline.Quiet.  The wait time may be different
@@ -9582,7 +9868,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 	ret = wait_fm_ready(dd, 7000);
 	if (ret) {
 		dd_dev_err(dd,
-			"After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
+			   "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
 		/* state is really offline, so make it so */
 		ppd->host_link_state = HLS_DN_OFFLINE;
 		return ret;
@@ -9605,8 +9891,8 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 		read_last_local_state(dd, &last_local_state);
 		read_last_remote_state(dd, &last_remote_state);
 		dd_dev_err(dd,
-			"LNI failure last states: local 0x%08x, remote 0x%08x\n",
-			last_local_state, last_remote_state);
+			   "LNI failure last states: local 0x%08x, remote 0x%08x\n",
+			   last_local_state, last_remote_state);
 	}
 
 	/* the active link width (downgrade) is 0 on link down */
@@ -9754,14 +10040,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 		state = dd->link_default;
 
 	/* interpret poll -> poll as a link bounce */
-	poll_bounce = ppd->host_link_state == HLS_DN_POLL
-				&& state == HLS_DN_POLL;
+	poll_bounce = ppd->host_link_state == HLS_DN_POLL &&
+		      state == HLS_DN_POLL;
 
 	dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
-		link_state_name(ppd->host_link_state),
-		link_state_name(orig_new_state),
-		poll_bounce ? "(bounce) " : "",
-		link_state_reason_name(ppd, state));
+		    link_state_name(ppd->host_link_state),
+		    link_state_name(orig_new_state),
+		    poll_bounce ? "(bounce) " : "",
+		    link_state_reason_name(ppd, state));
 
 	was_up = !!(ppd->host_link_state & HLS_UP);
 
@@ -9782,8 +10068,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 
 	switch (state) {
 	case HLS_UP_INIT:
-		if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
-			    || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
+		if (ppd->host_link_state == HLS_DN_POLL &&
+		    (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
 			/*
 			 * Quick link up jumps from polling to here.
 			 *
@@ -9791,7 +10077,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 			 * simulator jumps from polling to link up.
 			 * Accept that here.
 			 */
-			/* OK */;
+			/* OK */
 		} else if (ppd->host_link_state != HLS_GOING_UP) {
 			goto unexpected;
 		}
@@ -9802,8 +10088,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 			/* logical state didn't change, stay at going_up */
 			ppd->host_link_state = HLS_GOING_UP;
 			dd_dev_err(dd,
-				"%s: logical state did not change to INIT\n",
-				__func__);
+				   "%s: logical state did not change to INIT\n",
+				   __func__);
 		} else {
 			/* clear old transient LINKINIT_REASON code */
 			if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
@@ -9827,8 +10113,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 			/* logical state didn't change, stay at init */
 			ppd->host_link_state = HLS_UP_INIT;
 			dd_dev_err(dd,
-				"%s: logical state did not change to ARMED\n",
-				__func__);
+				   "%s: logical state did not change to ARMED\n",
+				   __func__);
 		}
 		/*
 		 * The simulator does not currently implement SMA messages,
@@ -9849,15 +10135,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 			/* logical state didn't change, stay at armed */
 			ppd->host_link_state = HLS_UP_ARMED;
 			dd_dev_err(dd,
-				"%s: logical state did not change to ACTIVE\n",
-				__func__);
+				   "%s: logical state did not change to ACTIVE\n",
+				   __func__);
 		} else {
-
 			/* tell all engines to go running */
 			sdma_all_running(dd);
 
 			/* Signal the IB layer that the port has went active */
-			event.device = &dd->verbs_dev.ibdev;
+			event.device = &dd->verbs_dev.rdi.ibdev;
 			event.element.port_num = ppd->port;
 			event.event = IB_EVENT_PORT_ACTIVE;
 		}
@@ -9884,6 +10169,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 				ppd->link_enabled = 1;
 		}
 
+		set_all_slowpath(ppd->dd);
 		ret = set_local_link_attributes(ppd);
 		if (ret)
 			break;
@@ -9898,12 +10184,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 			ret1 = set_physical_link_state(dd, PLS_POLLING);
 			if (ret1 != HCMD_SUCCESS) {
 				dd_dev_err(dd,
-					"Failed to transition to Polling link state, return 0x%x\n",
-					ret1);
+					   "Failed to transition to Polling link state, return 0x%x\n",
+					   ret1);
 				ret = -EINVAL;
 			}
 		}
-		ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
 		/*
 		 * If an error occurred above, go back to offline.  The
 		 * caller may reschedule another attempt.
@@ -9928,8 +10215,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 		ret1 = set_physical_link_state(dd, PLS_DISABLED);
 		if (ret1 != HCMD_SUCCESS) {
 			dd_dev_err(dd,
-				"Failed to transition to Disabled link state, return 0x%x\n",
-				ret1);
+				   "Failed to transition to Disabled link state, return 0x%x\n",
+				   ret1);
 			ret = -EINVAL;
 			break;
 		}
@@ -9957,8 +10244,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 		ret1 = set_physical_link_state(dd, PLS_LINKUP);
 		if (ret1 != HCMD_SUCCESS) {
 			dd_dev_err(dd,
-				"Failed to transition to link up state, return 0x%x\n",
-				ret1);
+				   "Failed to transition to link up state, return 0x%x\n",
+				   ret1);
 			ret = -EINVAL;
 			break;
 		}
@@ -9969,7 +10256,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 	case HLS_LINK_COOLDOWN:		/* transient within goto_offline() */
 	default:
 		dd_dev_info(dd, "%s: state 0x%x: not supported\n",
-			__func__, state);
+			    __func__, state);
 		ret = -EINVAL;
 		break;
 	}
@@ -9989,8 +10276,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
 
 unexpected:
 	dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
-		__func__, link_state_name(ppd->host_link_state),
-		link_state_name(state));
+		   __func__, link_state_name(ppd->host_link_state),
+		   link_state_name(state));
 	ret = -EINVAL;
 
 done:
@@ -10016,7 +10303,7 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
 		 * The VL Arbitrator high limit is sent in units of 4k
 		 * bytes, while HFI stores it in units of 64 bytes.
 		 */
-		val *= 4096/64;
+		val *= 4096 / 64;
 		reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
 			<< SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
 		write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
@@ -10031,12 +10318,6 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
 			ppd->vls_operational = val;
 			if (!ppd->port)
 				ret = -EINVAL;
-			else
-				ret = sdma_map_init(
-					ppd->dd,
-					ppd->port - 1,
-					val,
-					NULL);
 		}
 		break;
 	/*
@@ -10084,8 +10365,8 @@ int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
 	default:
 		if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
 			dd_dev_info(ppd->dd,
-			  "%s: which %s, val 0x%x: not implemented\n",
-			  __func__, ib_cfg_name(which), val);
+				    "%s: which %s, val 0x%x: not implemented\n",
+				    __func__, ib_cfg_name(which), val);
 		break;
 	}
 	return ret;
@@ -10152,6 +10433,7 @@ static int vl_arb_match_cache(struct vl_arb_cache *cache,
 {
 	return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
 }
+
 /* end functions related to vl arbitration table caching */
 
 static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
@@ -10239,7 +10521,7 @@ static int get_buffer_control(struct hfi1_devdata *dd,
 
 	/* OPA and HFI have a 1-1 mapping */
 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
-		read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
+		read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8 * i), &bc->vl[i]);
 
 	/* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
 	read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
@@ -10293,41 +10575,41 @@ static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
 static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
 {
 	write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
-		DC_SC_VL_VAL(15_0,
-		0, dp->vlnt[0] & 0xf,
-		1, dp->vlnt[1] & 0xf,
-		2, dp->vlnt[2] & 0xf,
-		3, dp->vlnt[3] & 0xf,
-		4, dp->vlnt[4] & 0xf,
-		5, dp->vlnt[5] & 0xf,
-		6, dp->vlnt[6] & 0xf,
-		7, dp->vlnt[7] & 0xf,
-		8, dp->vlnt[8] & 0xf,
-		9, dp->vlnt[9] & 0xf,
-		10, dp->vlnt[10] & 0xf,
-		11, dp->vlnt[11] & 0xf,
-		12, dp->vlnt[12] & 0xf,
-		13, dp->vlnt[13] & 0xf,
-		14, dp->vlnt[14] & 0xf,
-		15, dp->vlnt[15] & 0xf));
+		  DC_SC_VL_VAL(15_0,
+			       0, dp->vlnt[0] & 0xf,
+			       1, dp->vlnt[1] & 0xf,
+			       2, dp->vlnt[2] & 0xf,
+			       3, dp->vlnt[3] & 0xf,
+			       4, dp->vlnt[4] & 0xf,
+			       5, dp->vlnt[5] & 0xf,
+			       6, dp->vlnt[6] & 0xf,
+			       7, dp->vlnt[7] & 0xf,
+			       8, dp->vlnt[8] & 0xf,
+			       9, dp->vlnt[9] & 0xf,
+			       10, dp->vlnt[10] & 0xf,
+			       11, dp->vlnt[11] & 0xf,
+			       12, dp->vlnt[12] & 0xf,
+			       13, dp->vlnt[13] & 0xf,
+			       14, dp->vlnt[14] & 0xf,
+			       15, dp->vlnt[15] & 0xf));
 	write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
-		DC_SC_VL_VAL(31_16,
-		16, dp->vlnt[16] & 0xf,
-		17, dp->vlnt[17] & 0xf,
-		18, dp->vlnt[18] & 0xf,
-		19, dp->vlnt[19] & 0xf,
-		20, dp->vlnt[20] & 0xf,
-		21, dp->vlnt[21] & 0xf,
-		22, dp->vlnt[22] & 0xf,
-		23, dp->vlnt[23] & 0xf,
-		24, dp->vlnt[24] & 0xf,
-		25, dp->vlnt[25] & 0xf,
-		26, dp->vlnt[26] & 0xf,
-		27, dp->vlnt[27] & 0xf,
-		28, dp->vlnt[28] & 0xf,
-		29, dp->vlnt[29] & 0xf,
-		30, dp->vlnt[30] & 0xf,
-		31, dp->vlnt[31] & 0xf));
+		  DC_SC_VL_VAL(31_16,
+			       16, dp->vlnt[16] & 0xf,
+			       17, dp->vlnt[17] & 0xf,
+			       18, dp->vlnt[18] & 0xf,
+			       19, dp->vlnt[19] & 0xf,
+			       20, dp->vlnt[20] & 0xf,
+			       21, dp->vlnt[21] & 0xf,
+			       22, dp->vlnt[22] & 0xf,
+			       23, dp->vlnt[23] & 0xf,
+			       24, dp->vlnt[24] & 0xf,
+			       25, dp->vlnt[25] & 0xf,
+			       26, dp->vlnt[26] & 0xf,
+			       27, dp->vlnt[27] & 0xf,
+			       28, dp->vlnt[28] & 0xf,
+			       29, dp->vlnt[29] & 0xf,
+			       30, dp->vlnt[30] & 0xf,
+			       31, dp->vlnt[31] & 0xf));
 }
 
 static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
@@ -10335,7 +10617,7 @@ static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
 {
 	if (limit != 0)
 		dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
-			what, (int)limit, idx);
+			    what, (int)limit, idx);
 }
 
 /* change only the shared limit portion of SendCmGLobalCredit */
@@ -10413,14 +10695,14 @@ static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
 	}
 
 	dd_dev_err(dd,
-		"%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
-		which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
+		   "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
+		   which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
 	/*
 	 * If this occurs, it is likely there was a credit loss on the link.
 	 * The only recovery from that is a link bounce.
 	 */
 	dd_dev_err(dd,
-		"Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
+		   "Continuing anyway.  A credit loss may occur.  Suggest a link bounce\n");
 }
 
 /*
@@ -10447,13 +10729,15 @@ static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
  * raise = if the new limit is higher than the current value (may be changed
  *	earlier in the algorithm), set the new limit to the new value
  */
-static int set_buffer_control(struct hfi1_devdata *dd,
-			      struct buffer_control *new_bc)
+int set_buffer_control(struct hfi1_pportdata *ppd,
+		       struct buffer_control *new_bc)
 {
+	struct hfi1_devdata *dd = ppd->dd;
 	u64 changing_mask, ld_mask, stat_mask;
 	int change_count;
 	int i, use_all_mask;
 	int this_shared_changing;
+	int vl_count = 0, ret;
 	/*
 	 * A0: add the variable any_shared_limit_changing below and in the
 	 * algorithm above.  If removing A0 support, it can be removed.
@@ -10478,7 +10762,6 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 #define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
 #define NUM_USABLE_VLS 16	/* look at VL15 and less */
 
-
 	/* find the new total credits, do sanity check on unused VLs */
 	for (i = 0; i < OPA_MAX_VLS; i++) {
 		if (valid_vl(i)) {
@@ -10486,9 +10769,9 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 			continue;
 		}
 		nonzero_msg(dd, i, "dedicated",
-			be16_to_cpu(new_bc->vl[i].dedicated));
+			    be16_to_cpu(new_bc->vl[i].dedicated));
 		nonzero_msg(dd, i, "shared",
-			be16_to_cpu(new_bc->vl[i].shared));
+			    be16_to_cpu(new_bc->vl[i].shared));
 		new_bc->vl[i].dedicated = 0;
 		new_bc->vl[i].shared = 0;
 	}
@@ -10502,8 +10785,10 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 	 */
 	memset(changing, 0, sizeof(changing));
 	memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
-	/* NOTE: Assumes that the individual VL bits are adjacent and in
-	   increasing order */
+	/*
+	 * NOTE: Assumes that the individual VL bits are adjacent and in
+	 * increasing order
+	 */
 	stat_mask =
 		SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
 	changing_mask = 0;
@@ -10517,8 +10802,8 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 						!= cur_bc.vl[i].shared;
 		if (this_shared_changing)
 			any_shared_limit_changing = 1;
-		if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
-				|| this_shared_changing) {
+		if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated ||
+		    this_shared_changing) {
 			changing[i] = 1;
 			changing_mask |= stat_mask;
 			change_count++;
@@ -10557,7 +10842,7 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 	}
 
 	wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
-		"shared");
+				 "shared");
 
 	if (change_count > 0) {
 		for (i = 0; i < NUM_USABLE_VLS; i++) {
@@ -10566,7 +10851,8 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 
 			if (lowering_dedicated[i]) {
 				set_vl_dedicated(dd, i,
-					be16_to_cpu(new_bc->vl[i].dedicated));
+						 be16_to_cpu(new_bc->
+							     vl[i].dedicated));
 				cur_bc.vl[i].dedicated =
 						new_bc->vl[i].dedicated;
 			}
@@ -10582,7 +10868,8 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 			if (be16_to_cpu(new_bc->vl[i].dedicated) >
 					be16_to_cpu(cur_bc.vl[i].dedicated))
 				set_vl_dedicated(dd, i,
-					be16_to_cpu(new_bc->vl[i].dedicated));
+						 be16_to_cpu(new_bc->
+							     vl[i].dedicated));
 		}
 	}
 
@@ -10598,13 +10885,35 @@ static int set_buffer_control(struct hfi1_devdata *dd,
 
 	/* finally raise the global shared */
 	if (be16_to_cpu(new_bc->overall_shared_limit) >
-			be16_to_cpu(cur_bc.overall_shared_limit))
+	    be16_to_cpu(cur_bc.overall_shared_limit))
 		set_global_shared(dd,
-			be16_to_cpu(new_bc->overall_shared_limit));
+				  be16_to_cpu(new_bc->overall_shared_limit));
 
 	/* bracket the credit change with a total adjustment */
 	if (new_total < cur_total)
 		set_global_limit(dd, new_total);
+
+	/*
+	 * Determine the actual number of operational VLS using the number of
+	 * dedicated and shared credits for each VL.
+	 */
+	if (change_count > 0) {
+		for (i = 0; i < TXE_NUM_DATA_VL; i++)
+			if (be16_to_cpu(new_bc->vl[i].dedicated) > 0 ||
+			    be16_to_cpu(new_bc->vl[i].shared) > 0)
+				vl_count++;
+		ppd->actual_vls_operational = vl_count;
+		ret = sdma_map_init(dd, ppd->port - 1, vl_count ?
+				    ppd->actual_vls_operational :
+				    ppd->vls_operational,
+				    NULL);
+		if (ret == 0)
+			ret = pio_map_init(dd, ppd->port - 1, vl_count ?
+					   ppd->actual_vls_operational :
+					   ppd->vls_operational, NULL);
+		if (ret)
+			return ret;
+	}
 	return 0;
 }
 
@@ -10696,7 +11005,7 @@ int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
 				     VL_ARB_LOW_PRIO_TABLE_SIZE, t);
 		break;
 	case FM_TBL_BUFFER_CONTROL:
-		ret = set_buffer_control(ppd->dd, t);
+		ret = set_buffer_control(ppd, t);
 		break;
 	case FM_TBL_SC2VLNT:
 		set_sc2vlnt(ppd->dd, t);
@@ -10846,10 +11155,13 @@ static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
 	}
 
 	rcd->rcvavail_timeout = timeout;
-	/* timeout cannot be larger than rcv_intr_timeout_csr which has already
-	   been verified to be in range */
+	/*
+	 * timeout cannot be larger than rcv_intr_timeout_csr which has already
+	 * been verified to be in range
+	 */
 	write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
-		(u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
+			(u64)timeout <<
+			RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
 }
 
 void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
@@ -10915,16 +11227,16 @@ u32 hdrqempty(struct hfi1_ctxtdata *rcd)
 static u32 encoded_size(u32 size)
 {
 	switch (size) {
-	case   4*1024: return 0x1;
-	case   8*1024: return 0x2;
-	case  16*1024: return 0x3;
-	case  32*1024: return 0x4;
-	case  64*1024: return 0x5;
-	case 128*1024: return 0x6;
-	case 256*1024: return 0x7;
-	case 512*1024: return 0x8;
-	case   1*1024*1024: return 0x9;
-	case   2*1024*1024: return 0xa;
+	case   4 * 1024: return 0x1;
+	case   8 * 1024: return 0x2;
+	case  16 * 1024: return 0x3;
+	case  32 * 1024: return 0x4;
+	case  64 * 1024: return 0x5;
+	case 128 * 1024: return 0x6;
+	case 256 * 1024: return 0x7;
+	case 512 * 1024: return 0x8;
+	case   1 * 1024 * 1024: return 0x9;
+	case   2 * 1024 * 1024: return 0xa;
 	}
 	return 0x1;	/* if invalid, go with the minimum size */
 }
@@ -10943,8 +11255,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 
 	rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
 	/* if the context already enabled, don't do the extra steps */
-	if ((op & HFI1_RCVCTRL_CTXT_ENB)
-			&& !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
+	if ((op & HFI1_RCVCTRL_CTXT_ENB) &&
+	    !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
 		/* reset the tail and hdr addresses, and sequence count */
 		write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
 				rcd->rcvhdrq_phys);
@@ -11018,6 +11330,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 		if (dd->rcvhdrtail_dummy_physaddr) {
 			write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
 					dd->rcvhdrtail_dummy_physaddr);
+			/* Enabling RcvCtxtCtrl.TailUpd is intentional. */
 			rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
 		}
 
@@ -11029,15 +11342,20 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 		rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
 	if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
 		rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
-	if (op & HFI1_RCVCTRL_TAILUPD_DIS)
-		rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
+	if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
+		/* See comment on RcvCtxtCtrl.TailUpd above */
+		if (!(op & HFI1_RCVCTRL_CTXT_DIS))
+			rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
+	}
 	if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
 		rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
 	if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
 		rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
 	if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
-		/* In one-packet-per-eager mode, the size comes from
-		   the RcvArray entry. */
+		/*
+		 * In one-packet-per-eager mode, the size comes from
+		 * the RcvArray entry.
+		 */
 		rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
 		rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
 	}
@@ -11056,19 +11374,19 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 	write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
 
 	/* work around sticky RcvCtxtStatus.BlockedRHQFull */
-	if (did_enable
-	    && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
+	if (did_enable &&
+	    (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
 		reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
 		if (reg != 0) {
 			dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
-				ctxt, reg);
+				    ctxt, reg);
 			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
 			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
 			write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
 			read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
 			reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
 			dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
-				ctxt, reg, reg == 0 ? "not" : "still");
+				    ctxt, reg, reg == 0 ? "not" : "still");
 		}
 	}
 
@@ -11079,7 +11397,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 		 */
 		/* set interrupt timeout */
 		write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
-			(u64)rcd->rcvavail_timeout <<
+				(u64)rcd->rcvavail_timeout <<
 				RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
 
 		/* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
@@ -11097,28 +11415,19 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
 				dd->rcvhdrtail_dummy_physaddr);
 }
 
-u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
-		    u64 **cntrp)
+u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
 {
 	int ret;
 	u64 val = 0;
 
 	if (namep) {
 		ret = dd->cntrnameslen;
-		if (pos != 0) {
-			dd_dev_err(dd, "read_cntrs does not support indexing");
-			return 0;
-		}
 		*namep = dd->cntrnames;
 	} else {
 		const struct cntr_entry *entry;
 		int i, j;
 
 		ret = (dd->ndevcntrs) * sizeof(u64);
-		if (pos != 0) {
-			dd_dev_err(dd, "read_cntrs does not support indexing");
-			return 0;
-		}
 
 		/* Get the start of the block of counters */
 		*cntrp = dd->cntrs;
@@ -11147,6 +11456,20 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
 						dd->cntrs[entry->offset + j] =
 									    val;
 					}
+				} else if (entry->flags & CNTR_SDMA) {
+					hfi1_cdbg(CNTR,
+						  "\t Per SDMA Engine\n");
+					for (j = 0; j < dd->chip_sdma_engines;
+					     j++) {
+						val =
+						entry->rw_cntr(entry, dd, j,
+							       CNTR_MODE_R, 0);
+						hfi1_cdbg(CNTR,
+							  "\t\tRead 0x%llx for %d\n",
+							  val, j);
+						dd->cntrs[entry->offset + j] =
+									val;
+					}
 				} else {
 					val = entry->rw_cntr(entry, dd,
 							CNTR_INVALID_VL,
@@ -11163,30 +11486,19 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
 /*
  * Used by sysfs to create files for hfi stats to read
  */
-u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
-			char **namep, u64 **cntrp)
+u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp)
 {
 	int ret;
 	u64 val = 0;
 
 	if (namep) {
-		ret = dd->portcntrnameslen;
-		if (pos != 0) {
-			dd_dev_err(dd, "index not supported");
-			return 0;
-		}
-		*namep = dd->portcntrnames;
+		ret = ppd->dd->portcntrnameslen;
+		*namep = ppd->dd->portcntrnames;
 	} else {
 		const struct cntr_entry *entry;
-		struct hfi1_pportdata *ppd;
 		int i, j;
 
-		ret = (dd->nportcntrs) * sizeof(u64);
-		if (pos != 0) {
-			dd_dev_err(dd, "indexing not supported");
-			return 0;
-		}
-		ppd = (struct hfi1_pportdata *)(dd + 1 + port);
+		ret = ppd->dd->nportcntrs * sizeof(u64);
 		*cntrp = ppd->cntrs;
 
 		for (i = 0; i < PORT_CNTR_LAST; i++) {
@@ -11235,14 +11547,14 @@ static void free_cntrs(struct hfi1_devdata *dd)
 	for (i = 0; i < dd->num_pports; i++, ppd++) {
 		kfree(ppd->cntrs);
 		kfree(ppd->scntrs);
-		free_percpu(ppd->ibport_data.rc_acks);
-		free_percpu(ppd->ibport_data.rc_qacks);
-		free_percpu(ppd->ibport_data.rc_delayed_comp);
+		free_percpu(ppd->ibport_data.rvp.rc_acks);
+		free_percpu(ppd->ibport_data.rvp.rc_qacks);
+		free_percpu(ppd->ibport_data.rvp.rc_delayed_comp);
 		ppd->cntrs = NULL;
 		ppd->scntrs = NULL;
-		ppd->ibport_data.rc_acks = NULL;
-		ppd->ibport_data.rc_qacks = NULL;
-		ppd->ibport_data.rc_delayed_comp = NULL;
+		ppd->ibport_data.rvp.rc_acks = NULL;
+		ppd->ibport_data.rvp.rc_qacks = NULL;
+		ppd->ibport_data.rvp.rc_delayed_comp = NULL;
 	}
 	kfree(dd->portcntrnames);
 	dd->portcntrnames = NULL;
@@ -11510,11 +11822,13 @@ mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
 #define C_MAX_NAME 13 /* 12 chars + one for /0 */
 static int init_cntrs(struct hfi1_devdata *dd)
 {
-	int i, rcv_ctxts, index, j;
+	int i, rcv_ctxts, j;
 	size_t sz;
 	char *p;
 	char name[C_MAX_NAME];
 	struct hfi1_pportdata *ppd;
+	const char *bit_type_32 = ",32";
+	const int bit_type_32_sz = strlen(bit_type_32);
 
 	/* set up the stats timer; the add_timer is done at the end */
 	setup_timer(&dd->synth_stats_timer, update_synth_timer,
@@ -11527,49 +11841,57 @@ static int init_cntrs(struct hfi1_devdata *dd)
 	/* size names and determine how many we have*/
 	dd->ndevcntrs = 0;
 	sz = 0;
-	index = 0;
 
 	for (i = 0; i < DEV_CNTR_LAST; i++) {
-		hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
 		if (dev_cntrs[i].flags & CNTR_DISABLED) {
 			hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
 			continue;
 		}
 
 		if (dev_cntrs[i].flags & CNTR_VL) {
-			hfi1_dbg_early("\tProcessing VL cntr\n");
-			dev_cntrs[i].offset = index;
+			dev_cntrs[i].offset = dd->ndevcntrs;
 			for (j = 0; j < C_VL_COUNT; j++) {
-				memset(name, '\0', C_MAX_NAME);
 				snprintf(name, C_MAX_NAME, "%s%d",
-					dev_cntrs[i].name,
-					vl_from_idx(j));
+					 dev_cntrs[i].name, vl_from_idx(j));
+				sz += strlen(name);
+				/* Add ",32" for 32-bit counters */
+				if (dev_cntrs[i].flags & CNTR_32BIT)
+					sz += bit_type_32_sz;
+				sz++;
+				dd->ndevcntrs++;
+			}
+		} else if (dev_cntrs[i].flags & CNTR_SDMA) {
+			dev_cntrs[i].offset = dd->ndevcntrs;
+			for (j = 0; j < dd->chip_sdma_engines; j++) {
+				snprintf(name, C_MAX_NAME, "%s%d",
+					 dev_cntrs[i].name, j);
 				sz += strlen(name);
+				/* Add ",32" for 32-bit counters */
+				if (dev_cntrs[i].flags & CNTR_32BIT)
+					sz += bit_type_32_sz;
 				sz++;
-				hfi1_dbg_early("\t\t%s\n", name);
 				dd->ndevcntrs++;
-				index++;
 			}
 		} else {
-			/* +1 for newline  */
+			/* +1 for newline. */
 			sz += strlen(dev_cntrs[i].name) + 1;
+			/* Add ",32" for 32-bit counters */
+			if (dev_cntrs[i].flags & CNTR_32BIT)
+				sz += bit_type_32_sz;
+			dev_cntrs[i].offset = dd->ndevcntrs;
 			dd->ndevcntrs++;
-			dev_cntrs[i].offset = index;
-			index++;
-			hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
 		}
 	}
 
 	/* allocate space for the counter values */
-	dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
+	dd->cntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
 	if (!dd->cntrs)
 		goto bail;
 
-	dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
+	dd->scntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
 	if (!dd->scntrs)
 		goto bail;
 
-
 	/* allocate space for the counter names */
 	dd->cntrnameslen = sz;
 	dd->cntrnames = kmalloc(sz, GFP_KERNEL);
@@ -11577,27 +11899,51 @@ static int init_cntrs(struct hfi1_devdata *dd)
 		goto bail;
 
 	/* fill in the names */
-	for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
+	for (p = dd->cntrnames, i = 0; i < DEV_CNTR_LAST; i++) {
 		if (dev_cntrs[i].flags & CNTR_DISABLED) {
 			/* Nothing */
-		} else {
-			if (dev_cntrs[i].flags & CNTR_VL) {
-				for (j = 0; j < C_VL_COUNT; j++) {
-					memset(name, '\0', C_MAX_NAME);
-					snprintf(name, C_MAX_NAME, "%s%d",
-						dev_cntrs[i].name,
-						vl_from_idx(j));
-					memcpy(p, name, strlen(name));
-					p += strlen(name);
-					*p++ = '\n';
+		} else if (dev_cntrs[i].flags & CNTR_VL) {
+			for (j = 0; j < C_VL_COUNT; j++) {
+				snprintf(name, C_MAX_NAME, "%s%d",
+					 dev_cntrs[i].name,
+					 vl_from_idx(j));
+				memcpy(p, name, strlen(name));
+				p += strlen(name);
+
+				/* Counter is 32 bits */
+				if (dev_cntrs[i].flags & CNTR_32BIT) {
+					memcpy(p, bit_type_32, bit_type_32_sz);
+					p += bit_type_32_sz;
 				}
-			} else {
-				memcpy(p, dev_cntrs[i].name,
-				       strlen(dev_cntrs[i].name));
-				p += strlen(dev_cntrs[i].name);
+
+				*p++ = '\n';
+			}
+		} else if (dev_cntrs[i].flags & CNTR_SDMA) {
+			for (j = 0; j < dd->chip_sdma_engines; j++) {
+				snprintf(name, C_MAX_NAME, "%s%d",
+					 dev_cntrs[i].name, j);
+				memcpy(p, name, strlen(name));
+				p += strlen(name);
+
+				/* Counter is 32 bits */
+				if (dev_cntrs[i].flags & CNTR_32BIT) {
+					memcpy(p, bit_type_32, bit_type_32_sz);
+					p += bit_type_32_sz;
+				}
+
 				*p++ = '\n';
 			}
-			index++;
+		} else {
+			memcpy(p, dev_cntrs[i].name, strlen(dev_cntrs[i].name));
+			p += strlen(dev_cntrs[i].name);
+
+			/* Counter is 32 bits */
+			if (dev_cntrs[i].flags & CNTR_32BIT) {
+				memcpy(p, bit_type_32, bit_type_32_sz);
+				p += bit_type_32_sz;
+			}
+
+			*p++ = '\n';
 		}
 	}
 
@@ -11620,31 +11966,31 @@ static int init_cntrs(struct hfi1_devdata *dd)
 	sz = 0;
 	dd->nportcntrs = 0;
 	for (i = 0; i < PORT_CNTR_LAST; i++) {
-		hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
 		if (port_cntrs[i].flags & CNTR_DISABLED) {
 			hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
 			continue;
 		}
 
 		if (port_cntrs[i].flags & CNTR_VL) {
-			hfi1_dbg_early("\tProcessing VL cntr\n");
 			port_cntrs[i].offset = dd->nportcntrs;
 			for (j = 0; j < C_VL_COUNT; j++) {
-				memset(name, '\0', C_MAX_NAME);
 				snprintf(name, C_MAX_NAME, "%s%d",
-					port_cntrs[i].name,
-					vl_from_idx(j));
+					 port_cntrs[i].name, vl_from_idx(j));
 				sz += strlen(name);
+				/* Add ",32" for 32-bit counters */
+				if (port_cntrs[i].flags & CNTR_32BIT)
+					sz += bit_type_32_sz;
 				sz++;
-				hfi1_dbg_early("\t\t%s\n", name);
 				dd->nportcntrs++;
 			}
 		} else {
-			/* +1 for newline  */
+			/* +1 for newline */
 			sz += strlen(port_cntrs[i].name) + 1;
+			/* Add ",32" for 32-bit counters */
+			if (port_cntrs[i].flags & CNTR_32BIT)
+				sz += bit_type_32_sz;
 			port_cntrs[i].offset = dd->nportcntrs;
 			dd->nportcntrs++;
-			hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
 		}
 	}
 
@@ -11661,18 +12007,30 @@ static int init_cntrs(struct hfi1_devdata *dd)
 
 		if (port_cntrs[i].flags & CNTR_VL) {
 			for (j = 0; j < C_VL_COUNT; j++) {
-				memset(name, '\0', C_MAX_NAME);
 				snprintf(name, C_MAX_NAME, "%s%d",
-					port_cntrs[i].name,
-					vl_from_idx(j));
+					 port_cntrs[i].name, vl_from_idx(j));
 				memcpy(p, name, strlen(name));
 				p += strlen(name);
+
+				/* Counter is 32 bits */
+				if (port_cntrs[i].flags & CNTR_32BIT) {
+					memcpy(p, bit_type_32, bit_type_32_sz);
+					p += bit_type_32_sz;
+				}
+
 				*p++ = '\n';
 			}
 		} else {
 			memcpy(p, port_cntrs[i].name,
 			       strlen(port_cntrs[i].name));
 			p += strlen(port_cntrs[i].name);
+
+			/* Counter is 32 bits */
+			if (port_cntrs[i].flags & CNTR_32BIT) {
+				memcpy(p, bit_type_32, bit_type_32_sz);
+				p += bit_type_32_sz;
+			}
+
 			*p++ = '\n';
 		}
 	}
@@ -11700,14 +12058,13 @@ bail:
 	return -ENOMEM;
 }
 
-
 static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
 {
 	switch (chip_lstate) {
 	default:
 		dd_dev_err(dd,
-			 "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
-			 chip_lstate);
+			   "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
+			   chip_lstate);
 		/* fall through */
 	case LSTATE_DOWN:
 		return IB_PORT_DOWN;
@@ -11726,7 +12083,7 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
 	switch (chip_pstate & 0xf0) {
 	default:
 		dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
-			chip_pstate);
+			   chip_pstate);
 		/* fall through */
 	case PLS_DISABLED:
 		return IB_PORTPHYSSTATE_DISABLED;
@@ -11792,7 +12149,7 @@ u32 get_logical_state(struct hfi1_pportdata *ppd)
 	new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
 	if (new_state != ppd->lstate) {
 		dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
-			opa_lstate_name(new_state), new_state);
+			    opa_lstate_name(new_state), new_state);
 		ppd->lstate = new_state;
 	}
 	/*
@@ -11851,18 +12208,17 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
 
 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
 {
-	static u32 remembered_state = 0xff;
 	u32 pstate;
 	u32 ib_pstate;
 
 	pstate = read_physical_state(ppd->dd);
 	ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
-	if (remembered_state != ib_pstate) {
+	if (ppd->last_pstate != ib_pstate) {
 		dd_dev_info(ppd->dd,
-			"%s: physical state changed to %s (0x%x), phy 0x%x\n",
-			__func__, opa_pstate_name(ib_pstate), ib_pstate,
-			pstate);
-		remembered_state = ib_pstate;
+			    "%s: physical state changed to %s (0x%x), phy 0x%x\n",
+			    __func__, opa_pstate_name(ib_pstate), ib_pstate,
+			    pstate);
+		ppd->last_pstate = ib_pstate;
 	}
 	return ib_pstate;
 }
@@ -11906,7 +12262,7 @@ u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
 
 int hfi1_init_ctxt(struct send_context *sc)
 {
-	if (sc != NULL) {
+	if (sc) {
 		struct hfi1_devdata *dd = sc->dd;
 		u64 reg;
 		u8 set = (sc->type == SC_USER ?
@@ -11963,34 +12319,14 @@ void set_intr_state(struct hfi1_devdata *dd, u32 enable)
 	 * In HFI, the mask needs to be 1 to allow interrupts.
 	 */
 	if (enable) {
-		u64 cce_int_mask;
-		const int qsfp1_int_smask = QSFP1_INT % 64;
-		const int qsfp2_int_smask = QSFP2_INT % 64;
-
 		/* enable all interrupts */
 		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-			write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
+			write_csr(dd, CCE_INT_MASK + (8 * i), ~(u64)0);
 
-		/*
-		 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
-		 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
-		 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
-		 * the index of the appropriate CSR in the CCEIntMask CSR array
-		 */
-		cce_int_mask = read_csr(dd, CCE_INT_MASK +
-						(8*(QSFP1_INT/64)));
-		if (dd->hfi1_id) {
-			cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
-			write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
-					cce_int_mask);
-		} else {
-			cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
-			write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
-					cce_int_mask);
-		}
+		init_qsfp_int(dd);
 	} else {
 		for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-			write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
+			write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
 	}
 }
 
@@ -12002,7 +12338,7 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
 	int i;
 
 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-		write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
+		write_csr(dd, CCE_INT_CLEAR + (8 * i), ~(u64)0);
 
 	write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
 	write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
@@ -12037,10 +12373,9 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
 		struct hfi1_msix_entry *me = dd->msix_entries;
 
 		for (i = 0; i < dd->num_msix_entries; i++, me++) {
-			if (me->arg == NULL) /* => no irq, no affinity */
-				break;
-			irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
-					NULL);
+			if (!me->arg) /* => no irq, no affinity */
+				continue;
+			hfi1_put_irq_affinity(dd, &dd->msix_entries[i]);
 			free_irq(me->msix.vector, me->arg);
 		}
 	} else {
@@ -12061,8 +12396,6 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
 	}
 
 	/* clean structures */
-	for (i = 0; i < dd->num_msix_entries; i++)
-		free_cpumask_var(dd->msix_entries[i].mask);
 	kfree(dd->msix_entries);
 	dd->msix_entries = NULL;
 	dd->num_msix_entries = 0;
@@ -12085,10 +12418,10 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
 	/* direct the chip source to the given MSI-X interrupt */
 	m = isrc / 8;
 	n = isrc % 8;
-	reg = read_csr(dd, CCE_INT_MAP + (8*m));
-	reg &= ~((u64)0xff << (8*n));
-	reg |= ((u64)msix_intr & 0xff) << (8*n);
-	write_csr(dd, CCE_INT_MAP + (8*m), reg);
+	reg = read_csr(dd, CCE_INT_MAP + (8 * m));
+	reg &= ~((u64)0xff << (8 * n));
+	reg |= ((u64)msix_intr & 0xff) << (8 * n);
+	write_csr(dd, CCE_INT_MAP + (8 * m), reg);
 }
 
 static void remap_sdma_interrupts(struct hfi1_devdata *dd,
@@ -12101,12 +12434,12 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
 	 *	SDMAProgress
 	 *	SDMAIdle
 	 */
-	remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
-		msix_intr);
-	remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
-		msix_intr);
-	remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
-		msix_intr);
+	remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
+		   msix_intr);
+	remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
+		   msix_intr);
+	remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
+		   msix_intr);
 }
 
 static int request_intx_irq(struct hfi1_devdata *dd)
@@ -12116,10 +12449,10 @@ static int request_intx_irq(struct hfi1_devdata *dd)
 	snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME "_%d",
 		 dd->unit);
 	ret = request_irq(dd->pcidev->irq, general_interrupt,
-				  IRQF_SHARED, dd->intx_name, dd);
+			  IRQF_SHARED, dd->intx_name, dd);
 	if (ret)
 		dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
-				ret);
+			   ret);
 	else
 		dd->requested_intx_irq = 1;
 	return ret;
@@ -12127,70 +12460,20 @@ static int request_intx_irq(struct hfi1_devdata *dd)
 
 static int request_msix_irqs(struct hfi1_devdata *dd)
 {
-	const struct cpumask *local_mask;
-	cpumask_var_t def, rcv;
-	bool def_ret, rcv_ret;
 	int first_general, last_general;
 	int first_sdma, last_sdma;
 	int first_rx, last_rx;
-	int first_cpu, curr_cpu;
-	int rcv_cpu, sdma_cpu;
-	int i, ret = 0, possible;
-	int ht;
+	int i, ret = 0;
 
 	/* calculate the ranges we are going to use */
 	first_general = 0;
-	first_sdma = last_general = first_general + 1;
-	first_rx = last_sdma = first_sdma + dd->num_sdma;
+	last_general = first_general + 1;
+	first_sdma = last_general;
+	last_sdma = first_sdma + dd->num_sdma;
+	first_rx = last_sdma;
 	last_rx = first_rx + dd->n_krcv_queues;
 
 	/*
-	 * Interrupt affinity.
-	 *
-	 * non-rcv avail gets a default mask that
-	 * starts as possible cpus with threads reset
-	 * and each rcv avail reset.
-	 *
-	 * rcv avail gets node relative 1 wrapping back
-	 * to the node relative 1 as necessary.
-	 *
-	 */
-	local_mask = cpumask_of_pcibus(dd->pcidev->bus);
-	/* if first cpu is invalid, use NUMA 0 */
-	if (cpumask_first(local_mask) >= nr_cpu_ids)
-		local_mask = topology_core_cpumask(0);
-
-	def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
-	rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
-	if (!def_ret || !rcv_ret)
-		goto bail;
-	/* use local mask as default */
-	cpumask_copy(def, local_mask);
-	possible = cpumask_weight(def);
-	/* disarm threads from default */
-	ht = cpumask_weight(
-			topology_sibling_cpumask(cpumask_first(local_mask)));
-	for (i = possible/ht; i < possible; i++)
-		cpumask_clear_cpu(i, def);
-	/* def now has full cores on chosen node*/
-	first_cpu = cpumask_first(def);
-	if (nr_cpu_ids >= first_cpu)
-		first_cpu++;
-	curr_cpu = first_cpu;
-
-	/*  One context is reserved as control context */
-	for (i = first_cpu; i < dd->n_krcv_queues + first_cpu - 1; i++) {
-		cpumask_clear_cpu(curr_cpu, def);
-		cpumask_set_cpu(curr_cpu, rcv);
-		curr_cpu = cpumask_next(curr_cpu, def);
-		if (curr_cpu >= nr_cpu_ids)
-			break;
-	}
-	/* def mask has non-rcv, rcv has recv mask */
-	rcv_cpu = cpumask_first(rcv);
-	sdma_cpu = cpumask_first(def);
-
-	/*
 	 * Sanity check - the code expects all SDMA chip source
 	 * interrupts to be in the same CSR, starting at bit 0.  Verify
 	 * that this is true by checking the bit location of the start.
@@ -12215,6 +12498,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
 			snprintf(me->name, sizeof(me->name),
 				 DRIVER_NAME "_%d", dd->unit);
 			err_info = "general";
+			me->type = IRQ_GENERAL;
 		} else if (first_sdma <= i && i < last_sdma) {
 			idx = i - first_sdma;
 			sde = &dd->per_sdma[idx];
@@ -12224,6 +12508,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
 				 DRIVER_NAME "_%d sdma%d", dd->unit, idx);
 			err_info = "sdma";
 			remap_sdma_interrupts(dd, idx, i);
+			me->type = IRQ_SDMA;
 		} else if (first_rx <= i && i < last_rx) {
 			idx = i - first_rx;
 			rcd = dd->rcd[idx];
@@ -12234,9 +12519,9 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
 			 * Set the interrupt register and mask for this
 			 * context's interrupt.
 			 */
-			rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
+			rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
 			rcd->imask = ((u64)1) <<
-					((IS_RCVAVAIL_START+idx) % 64);
+					((IS_RCVAVAIL_START + idx) % 64);
 			handler = receive_context_interrupt;
 			thread = receive_context_thread;
 			arg = rcd;
@@ -12244,25 +12529,27 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
 				 DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
 			err_info = "receive context";
 			remap_intr(dd, IS_RCVAVAIL_START + idx, i);
+			me->type = IRQ_RCVCTXT;
 		} else {
 			/* not in our expected range - complain, then
-			   ignore it */
+			 * ignore it
+			 */
 			dd_dev_err(dd,
-				"Unexpected extra MSI-X interrupt %d\n", i);
+				   "Unexpected extra MSI-X interrupt %d\n", i);
 			continue;
 		}
 		/* no argument, no interrupt */
-		if (arg == NULL)
+		if (!arg)
 			continue;
 		/* make sure the name is terminated */
-		me->name[sizeof(me->name)-1] = 0;
+		me->name[sizeof(me->name) - 1] = 0;
 
 		ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
-						me->name, arg);
+					   me->name, arg);
 		if (ret) {
 			dd_dev_err(dd,
-				"unable to allocate %s interrupt, vector %d, index %d, err %d\n",
-				 err_info, me->msix.vector, idx, ret);
+				   "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
+				   err_info, me->msix.vector, idx, ret);
 			return ret;
 		}
 		/*
@@ -12271,52 +12558,13 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
 		 */
 		me->arg = arg;
 
-		if (!zalloc_cpumask_var(
-			&dd->msix_entries[i].mask,
-			GFP_KERNEL))
-			goto bail;
-		if (handler == sdma_interrupt) {
-			dd_dev_info(dd, "sdma engine %d cpu %d\n",
-				sde->this_idx, sdma_cpu);
-			sde->cpu = sdma_cpu;
-			cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
-			sdma_cpu = cpumask_next(sdma_cpu, def);
-			if (sdma_cpu >= nr_cpu_ids)
-				sdma_cpu = cpumask_first(def);
-		} else if (handler == receive_context_interrupt) {
-			dd_dev_info(dd, "rcv ctxt %d cpu %d\n", rcd->ctxt,
-				    (rcd->ctxt == HFI1_CTRL_CTXT) ?
-					    cpumask_first(def) : rcv_cpu);
-			if (rcd->ctxt == HFI1_CTRL_CTXT) {
-				/* map to first default */
-				cpumask_set_cpu(cpumask_first(def),
-						dd->msix_entries[i].mask);
-			} else {
-				cpumask_set_cpu(rcv_cpu,
-						dd->msix_entries[i].mask);
-				rcv_cpu = cpumask_next(rcv_cpu, rcv);
-				if (rcv_cpu >= nr_cpu_ids)
-					rcv_cpu = cpumask_first(rcv);
-			}
-		} else {
-			/* otherwise first def */
-			dd_dev_info(dd, "%s cpu %d\n",
-				err_info, cpumask_first(def));
-			cpumask_set_cpu(
-				cpumask_first(def), dd->msix_entries[i].mask);
-		}
-		irq_set_affinity_hint(
-			dd->msix_entries[i].msix.vector,
-			dd->msix_entries[i].mask);
+		ret = hfi1_get_irq_affinity(dd, me);
+		if (ret)
+			dd_dev_err(dd,
+				   "unable to pin IRQ %d\n", ret);
 	}
 
-out:
-	free_cpumask_var(def);
-	free_cpumask_var(rcv);
 	return ret;
-bail:
-	ret = -ENOMEM;
-	goto  out;
 }
 
 /*
@@ -12333,7 +12581,7 @@ static void reset_interrupts(struct hfi1_devdata *dd)
 
 	/* all chip interrupts map to MSI-X 0 */
 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
-		write_csr(dd, CCE_INT_MAP + (8*i), 0);
+		write_csr(dd, CCE_INT_MAP + (8 * i), 0);
 }
 
 static int set_up_interrupts(struct hfi1_devdata *dd)
@@ -12442,7 +12690,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 		 */
 		num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS - 1;
 	else
-		num_kernel_contexts = num_online_nodes();
+		num_kernel_contexts = num_online_nodes() + 1;
 	num_kernel_contexts =
 		max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
 	/*
@@ -12483,13 +12731,14 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 	dd->num_rcv_contexts = total_contexts;
 	dd->n_krcv_queues = num_kernel_contexts;
 	dd->first_user_ctxt = num_kernel_contexts;
+	dd->num_user_contexts = num_user_contexts;
 	dd->freectxts = num_user_contexts;
 	dd_dev_info(dd,
-		"rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
-		(int)dd->chip_rcv_contexts,
-		(int)dd->num_rcv_contexts,
-		(int)dd->n_krcv_queues,
-		(int)dd->num_rcv_contexts - dd->n_krcv_queues);
+		    "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
+		    (int)dd->chip_rcv_contexts,
+		    (int)dd->num_rcv_contexts,
+		    (int)dd->n_krcv_queues,
+		    (int)dd->num_rcv_contexts - dd->n_krcv_queues);
 
 	/*
 	 * Receive array allocation:
@@ -12515,8 +12764,8 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 		dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
 			dd->rcv_entries.group_size;
 		dd_dev_info(dd,
-		   "RcvArray group count too high, change to %u\n",
-		   dd->rcv_entries.ngroups);
+			    "RcvArray group count too high, change to %u\n",
+			    dd->rcv_entries.ngroups);
 		dd->rcv_entries.nctxt_extra = 0;
 	}
 	/*
@@ -12582,7 +12831,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
 
 	/* CceIntMap */
 	for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
-		write_csr(dd, CCE_INT_MAP+(8*i), 0);
+		write_csr(dd, CCE_INT_MAP + (8 * i), 0);
 
 	/* SendCtxtCreditReturnAddr */
 	for (i = 0; i < dd->chip_send_contexts; i++)
@@ -12590,8 +12839,10 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
 
 	/* PIO Send buffers */
 	/* SDMA Send buffers */
-	/* These are not normally read, and (presently) have no method
-	   to be read, so are not pre-initialized */
+	/*
+	 * These are not normally read, and (presently) have no method
+	 * to be read, so are not pre-initialized
+	 */
 
 	/* RcvHdrAddr */
 	/* RcvHdrTailAddr */
@@ -12600,13 +12851,13 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
 		write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
 		write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
 		for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
-			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
+			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j), 0);
 	}
 
 	/* RcvArray */
 	for (i = 0; i < dd->chip_rcv_array_count; i++)
-		write_csr(dd, RCV_ARRAY + (8*i),
-					RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
+		write_csr(dd, RCV_ARRAY + (8 * i),
+			  RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
 
 	/* RcvQPMapTable */
 	for (i = 0; i < 32; i++)
@@ -12638,8 +12889,8 @@ static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
 			return;
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(dd,
-				"Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
-				status_bits, reg & status_bits);
+				   "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
+				   status_bits, reg & status_bits);
 			return;
 		}
 		udelay(1);
@@ -12671,7 +12922,7 @@ static void reset_cce_csrs(struct hfi1_devdata *dd)
 	for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
 		write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
 		write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
-					CCE_MSIX_TABLE_UPPER_RESETCSR);
+			  CCE_MSIX_TABLE_UPPER_RESETCSR);
 	}
 	for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
 		/* CCE_MSIX_PBA read-only */
@@ -12691,91 +12942,6 @@ static void reset_cce_csrs(struct hfi1_devdata *dd)
 		write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
 }
 
-/* set ASIC CSRs to chip reset defaults */
-static void reset_asic_csrs(struct hfi1_devdata *dd)
-{
-	int i;
-
-	/*
-	 * If the HFIs are shared between separate nodes or VMs,
-	 * then more will need to be done here.  One idea is a module
-	 * parameter that returns early, letting the first power-on or
-	 * a known first load do the reset and blocking all others.
-	 */
-
-	if (!(dd->flags & HFI1_DO_INIT_ASIC))
-		return;
-
-	if (dd->icode != ICODE_FPGA_EMULATION) {
-		/* emulation does not have an SBus - leave these alone */
-		/*
-		 * All writes to ASIC_CFG_SBUS_REQUEST do something.
-		 * Notes:
-		 * o The reset is not zero if aimed at the core.  See the
-		 *   SBus documentation for details.
-		 * o If the SBus firmware has been updated (e.g. by the BIOS),
-		 *   will the reset revert that?
-		 */
-		/* ASIC_CFG_SBUS_REQUEST leave alone */
-		write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
-	}
-	/* ASIC_SBUS_RESULT read-only */
-	write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
-	for (i = 0; i < ASIC_NUM_SCRATCH; i++)
-		write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
-	write_csr(dd, ASIC_CFG_MUTEX, 0);	/* this will clear it */
-
-	/* We might want to retain this state across FLR if we ever use it */
-	write_csr(dd, ASIC_CFG_DRV_STR, 0);
-
-	/* ASIC_CFG_THERM_POLL_EN leave alone */
-	/* ASIC_STS_THERM read-only */
-	/* ASIC_CFG_RESET leave alone */
-
-	write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
-	/* ASIC_PCIE_SD_HOST_STATUS read-only */
-	write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
-	write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
-	/* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
-	write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
-	/* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
-	/* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
-	for (i = 0; i < 16; i++)
-		write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
-
-	/* ASIC_GPIO_IN read-only */
-	write_csr(dd, ASIC_GPIO_OE, 0);
-	write_csr(dd, ASIC_GPIO_INVERT, 0);
-	write_csr(dd, ASIC_GPIO_OUT, 0);
-	write_csr(dd, ASIC_GPIO_MASK, 0);
-	/* ASIC_GPIO_STATUS read-only */
-	write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
-	/* ASIC_GPIO_FORCE leave alone */
-
-	/* ASIC_QSFP1_IN read-only */
-	write_csr(dd, ASIC_QSFP1_OE, 0);
-	write_csr(dd, ASIC_QSFP1_INVERT, 0);
-	write_csr(dd, ASIC_QSFP1_OUT, 0);
-	write_csr(dd, ASIC_QSFP1_MASK, 0);
-	/* ASIC_QSFP1_STATUS read-only */
-	write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
-	/* ASIC_QSFP1_FORCE leave alone */
-
-	/* ASIC_QSFP2_IN read-only */
-	write_csr(dd, ASIC_QSFP2_OE, 0);
-	write_csr(dd, ASIC_QSFP2_INVERT, 0);
-	write_csr(dd, ASIC_QSFP2_OUT, 0);
-	write_csr(dd, ASIC_QSFP2_MASK, 0);
-	/* ASIC_QSFP2_STATUS read-only */
-	write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
-	/* ASIC_QSFP2_FORCE leave alone */
-
-	write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
-	/* this also writes a NOP command, clearing paging mode */
-	write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
-	write_csr(dd, ASIC_EEP_DATA, 0);
-}
-
 /* set MISC CSRs to chip reset defaults */
 static void reset_misc_csrs(struct hfi1_devdata *dd)
 {
@@ -12786,8 +12952,10 @@ static void reset_misc_csrs(struct hfi1_devdata *dd)
 		write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
 		write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
 	}
-	/* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
-	   only be written 128-byte chunks */
+	/*
+	 * MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
+	 * only be written 128-byte chunks
+	 */
 	/* init RSA engine to clear lingering errors */
 	write_csr(dd, MISC_CFG_RSA_CMD, 1);
 	write_csr(dd, MISC_CFG_RSA_MU, 0);
@@ -12843,18 +13011,17 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
 	write_csr(dd, SEND_ERR_CLEAR, ~0ull);
 	/* SEND_ERR_FORCE read-only */
 	for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
-		write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
+		write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0);
 	for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
-		write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
-	for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
-		write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
+		write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0);
+	for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++)
+		write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0);
 	for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
-		write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
+		write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0);
 	for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
-		write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
+		write_csr(dd, SEND_COUNTER_ARRAY64 + (8 * i), 0);
 	write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
-	write_csr(dd, SEND_CM_GLOBAL_CREDIT,
-					SEND_CM_GLOBAL_CREDIT_RESETCSR);
+	write_csr(dd, SEND_CM_GLOBAL_CREDIT, SEND_CM_GLOBAL_CREDIT_RESETCSR);
 	/* SEND_CM_CREDIT_USED_STATUS read-only */
 	write_csr(dd, SEND_CM_TIMER_CTRL, 0);
 	write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
@@ -12862,7 +13029,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
 	write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
 	write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
 	for (i = 0; i < TXE_NUM_DATA_VL; i++)
-		write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
+		write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
 	write_csr(dd, SEND_CM_CREDIT_VL15, 0);
 	/* SEND_CM_CREDIT_USED_VL read-only */
 	/* SEND_CM_CREDIT_USED_VL15 read-only */
@@ -12948,8 +13115,8 @@ static void init_rbufs(struct hfi1_devdata *dd)
 		 */
 		if (count++ > 500) {
 			dd_dev_err(dd,
-				"%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
-				__func__, reg);
+				   "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
+				   __func__, reg);
 			break;
 		}
 		udelay(2); /* do not busy-wait the CSR */
@@ -12978,8 +13145,8 @@ static void init_rbufs(struct hfi1_devdata *dd)
 		/* give up after 100us - slowest possible at 33MHz is 73us */
 		if (count++ > 50) {
 			dd_dev_err(dd,
-				"%s: RcvStatus.RxRbufInit not set, continuing\n",
-				__func__);
+				   "%s: RcvStatus.RxRbufInit not set, continuing\n",
+				   __func__);
 			break;
 		}
 	}
@@ -13005,7 +13172,7 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
 	write_csr(dd, RCV_VL15, 0);
 	/* this is a clear-down */
 	write_csr(dd, RCV_ERR_INFO,
-			RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
+		  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
 	/* RCV_ERR_STATUS read-only */
 	write_csr(dd, RCV_ERR_MASK, 0);
 	write_csr(dd, RCV_ERR_CLEAR, ~0ull);
@@ -13051,8 +13218,8 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
 		write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
 		/* RCV_EGR_OFFSET_TAIL read-only */
 		for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
-			write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
-				0);
+			write_uctxt_csr(dd, i,
+					RCV_TID_FLOW_TABLE + (8 * j), 0);
 		}
 	}
 }
@@ -13154,7 +13321,7 @@ static void init_chip(struct hfi1_devdata *dd)
 		write_csr(dd, RCV_CTXT_CTRL, 0);
 	/* mask all interrupt sources */
 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
-		write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
+		write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
 
 	/*
 	 * DC Reset: do a full DC reset before the register clear.
@@ -13163,7 +13330,7 @@ static void init_chip(struct hfi1_devdata *dd)
 	 * across the clear.
 	 */
 	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
-	(void) read_csr(dd, CCE_DC_CTRL);
+	(void)read_csr(dd, CCE_DC_CTRL);
 
 	if (use_flr) {
 		/*
@@ -13184,22 +13351,19 @@ static void init_chip(struct hfi1_devdata *dd)
 			hfi1_pcie_flr(dd);
 			restore_pci_variables(dd);
 		}
-
-		reset_asic_csrs(dd);
 	} else {
 		dd_dev_info(dd, "Resetting CSRs with writes\n");
 		reset_cce_csrs(dd);
 		reset_txe_csrs(dd);
 		reset_rxe_csrs(dd);
-		reset_asic_csrs(dd);
 		reset_misc_csrs(dd);
 	}
 	/* clear the DC reset */
 	write_csr(dd, CCE_DC_CTRL, 0);
 
 	/* Set the LED off */
-	if (is_ax(dd))
-		setextled(dd, 0);
+	setextled(dd, 0);
+
 	/*
 	 * Clear the QSFP reset.
 	 * An FLR enforces a 0 on all out pins. The driver does not touch
@@ -13212,6 +13376,7 @@ static void init_chip(struct hfi1_devdata *dd)
 	 */
 	write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
 	write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
+	init_chip_resources(dd);
 }
 
 static void init_early_variables(struct hfi1_devdata *dd)
@@ -13252,12 +13417,12 @@ static void init_kdeth_qp(struct hfi1_devdata *dd)
 		kdeth_qp = DEFAULT_KDETH_QP;
 
 	write_csr(dd, SEND_BTH_QP,
-			(kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
-				<< SEND_BTH_QP_KDETH_QP_SHIFT);
+		  (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) <<
+		  SEND_BTH_QP_KDETH_QP_SHIFT);
 
 	write_csr(dd, RCV_BTH_QP,
-			(kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
-				<< RCV_BTH_QP_KDETH_QP_SHIFT);
+		  (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) <<
+		  RCV_BTH_QP_KDETH_QP_SHIFT);
 }
 
 /**
@@ -13382,22 +13547,21 @@ static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
 		write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
 	/* add rule0 */
 	write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
-		RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
-			<< RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
-		2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
+		  RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK <<
+		  RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
+		  2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
 	write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
-		LRH_BTH_MATCH_OFFSET
-			<< RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
-		LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
-		LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
-		((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
-		QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
-		((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
+		  LRH_BTH_MATCH_OFFSET << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
+		  LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
+		  LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
+		  ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
+		  QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
+		  ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
 	write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
-		LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
-		LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
-		LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
-		LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
+		  LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
+		  LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
+		  LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
+		  LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
 	/* Enable RSM */
 	add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
 	kfree(rsmmap);
@@ -13415,9 +13579,8 @@ static void init_rxe(struct hfi1_devdata *dd)
 	/* enable all receive errors */
 	write_csr(dd, RCV_ERR_MASK, ~0ull);
 	/* setup QPN map table - start where VL15 context leaves off */
-	init_qos(
-		dd,
-		dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
+	init_qos(dd, dd->n_krcv_queues > MIN_KERNEL_KCTXTS ?
+		 MIN_KERNEL_KCTXTS : 0);
 	/*
 	 * make sure RcvCtrl.RcvWcb <= PCIe Device Control
 	 * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
@@ -13454,36 +13617,33 @@ static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
 			       u32 csr0to3, u32 csr4to7)
 {
 	write_csr(dd, csr0to3,
-		   0ull <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
-		|  1ull <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
-		|  2ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
-		|  4ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
+		  0ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT |
+		  1ull << SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT |
+		  2ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT |
+		  4ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
 	write_csr(dd, csr4to7,
-		   8ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
-		| 16ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
-		| 32ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
-		| 64ull * cu <<
-			SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
-
+		  8ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT |
+		  16ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT |
+		  32ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT |
+		  64ull * cu <<
+		  SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
 }
 
 static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
 {
 	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
-					SEND_CM_LOCAL_AU_TABLE4_TO7);
+			   SEND_CM_LOCAL_AU_TABLE4_TO7);
 }
 
 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
 {
 	assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
-					SEND_CM_REMOTE_AU_TABLE4_TO7);
+			   SEND_CM_REMOTE_AU_TABLE4_TO7);
 }
 
 static void init_txe(struct hfi1_devdata *dd)
@@ -13586,9 +13746,9 @@ int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
 	int ret = 0;
 	u64 reg;
 
-	if (ctxt < dd->num_rcv_contexts)
+	if (ctxt < dd->num_rcv_contexts) {
 		rcd = dd->rcd[ctxt];
-	else {
+	} else {
 		ret = -EINVAL;
 		goto done;
 	}
@@ -13614,9 +13774,9 @@ int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
 	int ret = 0;
 	u64 reg;
 
-	if (ctxt < dd->num_rcv_contexts)
+	if (ctxt < dd->num_rcv_contexts) {
 		rcd = dd->rcd[ctxt];
-	else {
+	} else {
 		ret = -EINVAL;
 		goto done;
 	}
@@ -13639,24 +13799,26 @@ done:
  */
 void hfi1_start_cleanup(struct hfi1_devdata *dd)
 {
+	aspm_exit(dd);
 	free_cntrs(dd);
 	free_rcverr(dd);
 	clean_up_interrupts(dd);
+	finish_chip_resources(dd);
 }
 
 #define HFI_BASE_GUID(dev) \
 	((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
 
 /*
- * Certain chip functions need to be initialized only once per asic
- * instead of per-device. This function finds the peer device and
- * checks whether that chip initialization needs to be done by this
- * device.
+ * Information can be shared between the two HFIs on the same ASIC
+ * in the same OS.  This function finds the peer device and sets
+ * up a shared structure.
  */
-static void asic_should_init(struct hfi1_devdata *dd)
+static int init_asic_data(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
 	struct hfi1_devdata *tmp, *peer = NULL;
+	int ret = 0;
 
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 	/* Find our peer device */
@@ -13668,13 +13830,21 @@ static void asic_should_init(struct hfi1_devdata *dd)
 		}
 	}
 
-	/*
-	 * "Claim" the ASIC for initialization if it hasn't been
-	 " "claimed" yet.
-	 */
-	if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
-		dd->flags |= HFI1_DO_INIT_ASIC;
+	if (peer) {
+		dd->asic_data = peer->asic_data;
+	} else {
+		dd->asic_data = kzalloc(sizeof(*dd->asic_data), GFP_KERNEL);
+		if (!dd->asic_data) {
+			ret = -ENOMEM;
+			goto done;
+		}
+		mutex_init(&dd->asic_data->asic_resource_mutex);
+	}
+	dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
+
+done:
 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+	return ret;
 }
 
 /*
@@ -13694,7 +13864,7 @@ static int obtain_boardname(struct hfi1_devdata *dd)
 	ret = read_hfi1_efi_var(dd, "description", &size,
 				(void **)&dd->boardname);
 	if (ret) {
-		dd_dev_err(dd, "Board description not found\n");
+		dd_dev_info(dd, "Board description not found\n");
 		/* use generic description */
 		dd->boardname = kstrdup(generic, GFP_KERNEL);
 		if (!dd->boardname)
@@ -13703,6 +13873,50 @@ static int obtain_boardname(struct hfi1_devdata *dd)
 	return 0;
 }
 
+/*
+ * Check the interrupt registers to make sure that they are mapped correctly.
+ * It is intended to help user identify any mismapping by VMM when the driver
+ * is running in a VM. This function should only be called before interrupt
+ * is set up properly.
+ *
+ * Return 0 on success, -EINVAL on failure.
+ */
+static int check_int_registers(struct hfi1_devdata *dd)
+{
+	u64 reg;
+	u64 all_bits = ~(u64)0;
+	u64 mask;
+
+	/* Clear CceIntMask[0] to avoid raising any interrupts */
+	mask = read_csr(dd, CCE_INT_MASK);
+	write_csr(dd, CCE_INT_MASK, 0ull);
+	reg = read_csr(dd, CCE_INT_MASK);
+	if (reg)
+		goto err_exit;
+
+	/* Clear all interrupt status bits */
+	write_csr(dd, CCE_INT_CLEAR, all_bits);
+	reg = read_csr(dd, CCE_INT_STATUS);
+	if (reg)
+		goto err_exit;
+
+	/* Set all interrupt status bits */
+	write_csr(dd, CCE_INT_FORCE, all_bits);
+	reg = read_csr(dd, CCE_INT_STATUS);
+	if (reg != all_bits)
+		goto err_exit;
+
+	/* Restore the interrupt mask */
+	write_csr(dd, CCE_INT_CLEAR, all_bits);
+	write_csr(dd, CCE_INT_MASK, mask);
+
+	return 0;
+err_exit:
+	write_csr(dd, CCE_INT_MASK, mask);
+	dd_dev_err(dd, "Interrupt registers not properly mapped by VMM\n");
+	return -EINVAL;
+}
+
 /**
  * Allocate and initialize the device structure for the hfi.
  * @dev: the pci_dev for hfi1_ib device
@@ -13727,9 +13941,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 		"RTL FPGA emulation",
 		"Functional simulator"
 	};
+	struct pci_dev *parent = pdev->bus->self;
 
-	dd = hfi1_alloc_devdata(pdev,
-		NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
+	dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
+				sizeof(struct hfi1_pportdata));
 	if (IS_ERR(dd))
 		goto bail;
 	ppd = dd->pport;
@@ -13750,8 +13965,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 		/* link width active is 0 when link is down */
 		/* link width downgrade active is 0 when link is down */
 
-		if (num_vls < HFI1_MIN_VLS_SUPPORTED
-			|| num_vls > HFI1_MAX_VLS_SUPPORTED) {
+		if (num_vls < HFI1_MIN_VLS_SUPPORTED ||
+		    num_vls > HFI1_MAX_VLS_SUPPORTED) {
 			hfi1_early_err(&pdev->dev,
 				       "Invalid num_vls %u, using %u VLs\n",
 				    num_vls, HFI1_MAX_VLS_SUPPORTED);
@@ -13759,6 +13974,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 		}
 		ppd->vls_supported = num_vls;
 		ppd->vls_operational = ppd->vls_supported;
+		ppd->actual_vls_operational = ppd->vls_supported;
 		/* Set the default MTU. */
 		for (vl = 0; vl < num_vls; vl++)
 			dd->vld[vl].mtu = hfi1_max_mtu;
@@ -13778,6 +13994,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 		/* start in offline */
 		ppd->host_link_state = HLS_DN_OFFLINE;
 		init_vl_arb_caches(ppd);
+		ppd->last_pstate = 0xff; /* invalid value */
 	}
 
 	dd->link_default = HLS_DN_POLL;
@@ -13803,8 +14020,21 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
 			& CCE_REVISION_CHIP_REV_MINOR_MASK;
 
-	/* obtain the hardware ID - NOT related to unit, which is a
-	   software enumeration */
+	/*
+	 * Check interrupt registers mapping if the driver has no access to
+	 * the upstream component. In this case, it is likely that the driver
+	 * is running in a VM.
+	 */
+	if (!parent) {
+		ret = check_int_registers(dd);
+		if (ret)
+			goto bail_cleanup;
+	}
+
+	/*
+	 * obtain the hardware ID - NOT related to unit, which is a
+	 * software enumeration
+	 */
 	reg = read_csr(dd, CCE_REVISION2);
 	dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
 					& CCE_REVISION2_HFI_ID_MASK;
@@ -13812,8 +14042,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
 	dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
 	dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
-		dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
-		(int)dd->irev);
+		    dd->icode < ARRAY_SIZE(inames) ?
+		    inames[dd->icode] : "unknown", (int)dd->irev);
 
 	/* speeds the hardware can support */
 	dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
@@ -13842,6 +14072,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 			   num_vls, dd->chip_sdma_engines);
 		num_vls = dd->chip_sdma_engines;
 		ppd->vls_supported = dd->chip_sdma_engines;
+		ppd->vls_operational = ppd->vls_supported;
 	}
 
 	/*
@@ -13863,8 +14094,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	/* needs to be done before we look for the peer device */
 	read_guid(dd);
 
-	/* should this device init the ASIC block? */
-	asic_should_init(dd);
+	/* set up shared ASIC data with peer device */
+	ret = init_asic_data(dd);
+	if (ret)
+		goto bail_cleanup;
 
 	/* obtain chip sizes, reset chip CSRs */
 	init_chip(dd);
@@ -13874,6 +14107,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	if (ret)
 		goto bail_cleanup;
 
+	/* Needs to be called before hfi1_firmware_init */
+	get_platform_config(dd);
+
 	/* read in firmware */
 	ret = hfi1_firmware_init(dd);
 	if (ret)
@@ -13925,6 +14161,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	/* set up KDETH QP prefix in both RX and TX CSRs */
 	init_kdeth_qp(dd);
 
+	ret = hfi1_dev_affinity_init(dd);
+	if (ret)
+		goto bail_cleanup;
+
 	/* send contexts must be set up before receive contexts */
 	ret = init_send_contexts(dd);
 	if (ret)
@@ -14022,7 +14262,6 @@ static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
 	return (u16)delta_cycles;
 }
 
-
 /**
  * create_pbc - build a pbc for transmission
  * @flags: special case flags or-ed in built pbc
@@ -14078,10 +14317,15 @@ static int thermal_init(struct hfi1_devdata *dd)
 	int ret = 0;
 
 	if (dd->icode != ICODE_RTL_SILICON ||
-	    !(dd->flags & HFI1_DO_INIT_ASIC))
+	    check_chip_resource(dd, CR_THERM_INIT, NULL))
 		return ret;
 
-	acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+	if (ret) {
+		THERM_FAILURE(dd, ret, "Acquire SBus");
+		return ret;
+	}
+
 	dd_dev_info(dd, "Initializing thermal sensor\n");
 	/* Disable polling of thermal readings */
 	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x0);
@@ -14128,8 +14372,14 @@ static int thermal_init(struct hfi1_devdata *dd)
 
 	/* Enable polling of thermal readings */
 	write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
+
+	/* Set initialized flag */
+	ret = acquire_chip_resource(dd, CR_THERM_INIT, 0);
+	if (ret)
+		THERM_FAILURE(dd, ret, "Unable to set thermal init flag");
+
 done:
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_SBUS);
 	return ret;
 }
 
@@ -14144,7 +14394,7 @@ static void handle_temp_err(struct hfi1_devdata *dd)
 	dd_dev_emerg(dd,
 		     "Critical temperature reached! Forcing device into freeze mode!\n");
 	dd->flags |= HFI1_FORCED_FREEZE;
-	start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
+	start_freeze_handling(ppd, FREEZE_SELF | FREEZE_ABORT);
 	/*
 	 * Shut DC down as much and as quickly as possible.
 	 *
@@ -14158,8 +14408,8 @@ static void handle_temp_err(struct hfi1_devdata *dd)
 	 */
 	ppd->driver_link_ready = 0;
 	ppd->link_enabled = 0;
-	set_physical_link_state(dd, PLS_OFFLINE |
-				(OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
+	set_physical_link_state(dd, (OPA_LINKDOWN_REASON_SMA_DISABLED << 8) |
+				PLS_OFFLINE);
 	/*
 	 * Step 2: Shutdown LCB and 8051
 	 *         After shutdown, do not restore DC_CFG_RESET value.
diff --git a/drivers/staging/rdma/hfi1/chip.h b/drivers/staging/rdma/hfi1/chip.h
index 5b375ddc345d..4f3b878e43eb 100644
--- a/drivers/staging/rdma/hfi1/chip.h
+++ b/drivers/staging/rdma/hfi1/chip.h
@@ -1,14 +1,13 @@
 #ifndef _CHIP_H
 #define _CHIP_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -79,8 +76,10 @@
 #define PIO_CMASK 0x7ff	/* counter mask for free and fill counters */
 #define MAX_EAGER_ENTRIES    2048	/* max receive eager entries */
 #define MAX_TID_PAIR_ENTRIES 1024	/* max receive expected pairs */
-/* Virtual? Allocation Unit, defined as AU = 8*2^vAU, 64 bytes, AU is fixed
-   at 64 bytes for all generation one devices */
+/*
+ * Virtual? Allocation Unit, defined as AU = 8*2^vAU, 64 bytes, AU is fixed
+ * at 64 bytes for all generation one devices
+ */
 #define CM_VAU 3
 /* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */
 #define CM_GLOBAL_CREDITS 0x940
@@ -93,15 +92,15 @@
 #define TXE_PIO_SEND (TXE + TXE_PIO_SEND_OFFSET)
 
 /* PBC flags */
-#define PBC_INTR		(1ull << 31)
+#define PBC_INTR		BIT_ULL(31)
 #define PBC_DC_INFO_SHIFT	(30)
-#define PBC_DC_INFO		(1ull << PBC_DC_INFO_SHIFT)
-#define PBC_TEST_EBP	(1ull << 29)
-#define PBC_PACKET_BYPASS	(1ull << 28)
-#define PBC_CREDIT_RETURN	(1ull << 25)
-#define PBC_INSERT_BYPASS_ICRC (1ull << 24)
-#define PBC_TEST_BAD_ICRC	(1ull << 23)
-#define PBC_FECN		(1ull << 22)
+#define PBC_DC_INFO		BIT_ULL(PBC_DC_INFO_SHIFT)
+#define PBC_TEST_EBP		BIT_ULL(29)
+#define PBC_PACKET_BYPASS	BIT_ULL(28)
+#define PBC_CREDIT_RETURN	BIT_ULL(25)
+#define PBC_INSERT_BYPASS_ICRC	BIT_ULL(24)
+#define PBC_TEST_BAD_ICRC	BIT_ULL(23)
+#define PBC_FECN		BIT_ULL(22)
 
 /* PbcInsertHcrc field settings */
 #define PBC_IHCRC_LKDETH 0x0	/* insert @ local KDETH offset */
@@ -212,7 +211,7 @@
 #define PLS_CONFIGPHY_DEBOUCE		   0x40
 #define PLS_CONFIGPHY_ESTCOMM		   0x41
 #define PLS_CONFIGPHY_ESTCOMM_TXRX_HUNT	   0x42
-#define PLS_CONFIGPHY_ESTcOMM_LOCAL_COMPLETE   0x43
+#define PLS_CONFIGPHY_ESTCOMM_LOCAL_COMPLETE   0x43
 #define PLS_CONFIGPHY_OPTEQ			   0x44
 #define PLS_CONFIGPHY_OPTEQ_OPTIMIZING	   0x44
 #define PLS_CONFIGPHY_OPTEQ_LOCAL_COMPLETE	   0x45
@@ -242,36 +241,37 @@
 #define HCMD_SUCCESS 2
 
 /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR - error flags */
-#define SPICO_ROM_FAILED		    (1 <<  0)
-#define UNKNOWN_FRAME		    (1 <<  1)
-#define TARGET_BER_NOT_MET		    (1 <<  2)
-#define FAILED_SERDES_INTERNAL_LOOPBACK (1 <<  3)
-#define FAILED_SERDES_INIT		    (1 <<  4)
-#define FAILED_LNI_POLLING		    (1 <<  5)
-#define FAILED_LNI_DEBOUNCE		    (1 <<  6)
-#define FAILED_LNI_ESTBCOMM		    (1 <<  7)
-#define FAILED_LNI_OPTEQ		    (1 <<  8)
-#define FAILED_LNI_VERIFY_CAP1	    (1 <<  9)
-#define FAILED_LNI_VERIFY_CAP2	    (1 << 10)
-#define FAILED_LNI_CONFIGLT		    (1 << 11)
+#define SPICO_ROM_FAILED		BIT(0)
+#define UNKNOWN_FRAME			BIT(1)
+#define TARGET_BER_NOT_MET		BIT(2)
+#define FAILED_SERDES_INTERNAL_LOOPBACK	BIT(3)
+#define FAILED_SERDES_INIT		BIT(4)
+#define FAILED_LNI_POLLING		BIT(5)
+#define FAILED_LNI_DEBOUNCE		BIT(6)
+#define FAILED_LNI_ESTBCOMM		BIT(7)
+#define FAILED_LNI_OPTEQ		BIT(8)
+#define FAILED_LNI_VERIFY_CAP1		BIT(9)
+#define FAILED_LNI_VERIFY_CAP2		BIT(10)
+#define FAILED_LNI_CONFIGLT		BIT(11)
+#define HOST_HANDSHAKE_TIMEOUT		BIT(12)
 
 #define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \
 			| FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \
 			| FAILED_LNI_VERIFY_CAP1 \
 			| FAILED_LNI_VERIFY_CAP2 \
-			| FAILED_LNI_CONFIGLT)
+			| FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT)
 
 /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */
-#define HOST_REQ_DONE	   (1 << 0)
-#define BC_PWR_MGM_MSG	   (1 << 1)
-#define BC_SMA_MSG		   (1 << 2)
-#define BC_BCC_UNKOWN_MSG	   (1 << 3)
-#define BC_IDLE_UNKNOWN_MSG	   (1 << 4)
-#define EXT_DEVICE_CFG_REQ	   (1 << 5)
-#define VERIFY_CAP_FRAME	   (1 << 6)
-#define LINKUP_ACHIEVED	   (1 << 7)
-#define LINK_GOING_DOWN	   (1 << 8)
-#define LINK_WIDTH_DOWNGRADED  (1 << 9)
+#define HOST_REQ_DONE		BIT(0)
+#define BC_PWR_MGM_MSG		BIT(1)
+#define BC_SMA_MSG		BIT(2)
+#define BC_BCC_UNKNOWN_MSG	BIT(3)
+#define BC_IDLE_UNKNOWN_MSG	BIT(4)
+#define EXT_DEVICE_CFG_REQ	BIT(5)
+#define VERIFY_CAP_FRAME	BIT(6)
+#define LINKUP_ACHIEVED		BIT(7)
+#define LINK_GOING_DOWN		BIT(8)
+#define LINK_WIDTH_DOWNGRADED	BIT(9)
 
 /* DC_DC8051_CFG_EXT_DEV_1.REQ_TYPE - 8051 host requests */
 #define HREQ_LOAD_CONFIG	0x01
@@ -335,14 +335,14 @@
  * the CSR fields hold multiples of this value.
  */
 #define RCV_SHIFT 3
-#define RCV_INCREMENT (1 << RCV_SHIFT)
+#define RCV_INCREMENT BIT(RCV_SHIFT)
 
 /*
  * Receive header queue entry increment - the CSR holds multiples of
  * this value.
  */
 #define HDRQ_SIZE_SHIFT 5
-#define HDRQ_INCREMENT (1 << HDRQ_SIZE_SHIFT)
+#define HDRQ_INCREMENT BIT(HDRQ_SIZE_SHIFT)
 
 /*
  * Freeze handling flags
@@ -371,6 +371,9 @@
 #define NUM_LANE_FIELDS    0x8
 
 /* 8051 general register Field IDs */
+#define LINK_OPTIMIZATION_SETTINGS   0x00
+#define LINK_TUNING_PARAMETERS	     0x02
+#define DC_HOST_COMM_SETTINGS	     0x03
 #define TX_SETTINGS		     0x06
 #define VERIFY_CAP_LOCAL_PHY	     0x07
 #define VERIFY_CAP_LOCAL_FABRIC	     0x08
@@ -387,6 +390,10 @@
 #define LINK_QUALITY_INFO            0x14
 #define REMOTE_DEVICE_ID	     0x15
 
+/* 8051 lane specific register field IDs */
+#define TX_EQ_SETTINGS		0x00
+#define CHANNEL_LOSS_SETTINGS	0x05
+
 /* Lane ID for general configuration registers */
 #define GENERAL_CONFIG 4
 
@@ -511,8 +518,10 @@ enum {
 #define LCB_CRC_48B			0x2	/* 48b CRC */
 #define LCB_CRC_12B_16B_PER_LANE	0x3	/* 12b-16b per lane CRC */
 
-/* the following enum is (almost) a copy/paste of the definition
- * in the OPA spec, section 20.2.2.6.8 (PortInfo) */
+/*
+ * the following enum is (almost) a copy/paste of the definition
+ * in the OPA spec, section 20.2.2.6.8 (PortInfo)
+ */
 enum {
 	PORT_LTP_CRC_MODE_NONE = 0,
 	PORT_LTP_CRC_MODE_14 = 1, /* 14-bit LTP CRC mode (optional) */
@@ -614,6 +623,8 @@ u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32);
 #define NUM_PCIE_SERDES 16	/* number of PCIe serdes on the SBus */
 extern const u8 pcie_serdes_broadcast[];
 extern const u8 pcie_pcs_addrs[2][NUM_PCIE_SERDES];
+extern uint platform_config_load;
+
 /* SBus commands */
 #define RESET_SBUS_RECEIVER 0x20
 #define WRITE_SBUS_RECEIVER 0x21
@@ -629,6 +640,42 @@ int load_firmware(struct hfi1_devdata *dd);
 void dispose_firmware(void);
 int acquire_hw_mutex(struct hfi1_devdata *dd);
 void release_hw_mutex(struct hfi1_devdata *dd);
+
+/*
+ * Bitmask of dynamic access for ASIC block chip resources.  Each HFI has its
+ * own range of bits for the resource so it can clear its own bits on
+ * starting and exiting.  If either HFI has the resource bit set, the
+ * resource is in use.  The separate bit ranges are:
+ *	HFI0 bits  7:0
+ *	HFI1 bits 15:8
+ */
+#define CR_SBUS  0x01	/* SBUS, THERM, and PCIE registers */
+#define CR_EPROM 0x02	/* EEP, GPIO registers */
+#define CR_I2C1  0x04	/* QSFP1_OE register */
+#define CR_I2C2  0x08	/* QSFP2_OE register */
+#define CR_DYN_SHIFT 8	/* dynamic flag shift */
+#define CR_DYN_MASK  ((1ull << CR_DYN_SHIFT) - 1)
+
+/*
+ * Bitmask of static ASIC states these are outside of the dynamic ASIC
+ * block chip resources above.  These are to be set once and never cleared.
+ * Must be holding the SBus dynamic flag when setting.
+ */
+#define CR_THERM_INIT	0x010000
+
+int acquire_chip_resource(struct hfi1_devdata *dd, u32 resource, u32 mswait);
+void release_chip_resource(struct hfi1_devdata *dd, u32 resource);
+bool check_chip_resource(struct hfi1_devdata *dd, u32 resource,
+			 const char *func);
+void init_chip_resources(struct hfi1_devdata *dd);
+void finish_chip_resources(struct hfi1_devdata *dd);
+
+/* ms wait time for access to an SBus resoure */
+#define SBUS_TIMEOUT 4000 /* long enough for a FW download and SBR */
+
+/* ms wait time for a qsfp (i2c) chain to become available */
+#define QSFP_WAIT 20000 /* long enough for FW update to the F4 uc */
+
 void fabric_serdes_reset(struct hfi1_devdata *dd);
 int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
 
@@ -644,13 +691,17 @@ void handle_verify_cap(struct work_struct *work);
 void handle_freeze(struct work_struct *work);
 void handle_link_up(struct work_struct *work);
 void handle_link_down(struct work_struct *work);
+void handle_8051_request(struct work_struct *work);
 void handle_link_downgrade(struct work_struct *work);
 void handle_link_bounce(struct work_struct *work);
 void handle_sma_message(struct work_struct *work);
+void reset_qsfp(struct hfi1_pportdata *ppd);
+void qsfp_event(struct work_struct *work);
 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
 int send_idle_sma(struct hfi1_devdata *dd, u64 message);
+int load_8051_config(struct hfi1_devdata *, u8, u8, u32);
+int read_8051_config(struct hfi1_devdata *, u8, u8, u32 *);
 int start_link(struct hfi1_pportdata *ppd);
-void init_qsfp(struct hfi1_pportdata *ppd);
 int bringup_serdes(struct hfi1_pportdata *ppd);
 void set_intr_state(struct hfi1_devdata *dd, u32 enable);
 void apply_link_downgrade_policy(struct hfi1_pportdata *ppd,
@@ -690,6 +741,8 @@ u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl);
 u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data);
 u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl);
 u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data);
+u32 read_logical_state(struct hfi1_devdata *dd);
+void force_recv_intr(struct hfi1_ctxtdata *rcd);
 
 /* Per VL indexes */
 enum {
@@ -785,8 +838,14 @@ enum {
 	C_SW_CPU_RCV_LIM,
 	C_SW_VTX_WAIT,
 	C_SW_PIO_WAIT,
+	C_SW_PIO_DRAIN,
 	C_SW_KMEM_WAIT,
 	C_SW_SEND_SCHED,
+	C_SDMA_DESC_FETCHED_CNT,
+	C_SDMA_INT_CNT,
+	C_SDMA_ERR_CNT,
+	C_SDMA_IDLE_INT_CNT,
+	C_SDMA_PROGRESS_INT_CNT,
 /* MISC_ERR_STATUS */
 	C_MISC_PLL_LOCK_FAIL_ERR,
 	C_MISC_MBIST_FAIL_ERR,
@@ -1275,10 +1334,8 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
 		  u32 type, unsigned long pa, u16 order);
 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
 void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt);
-u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
-		    u64 **cntrp);
-u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
-			char **namep, u64 **cntrp);
+u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp);
+u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp);
 u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd);
 int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which);
 int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
diff --git a/drivers/staging/rdma/hfi1/chip_registers.h b/drivers/staging/rdma/hfi1/chip_registers.h
index 014d7a609ea0..770f05c9b8de 100644
--- a/drivers/staging/rdma/hfi1/chip_registers.h
+++ b/drivers/staging/rdma/hfi1/chip_registers.h
@@ -2,14 +2,13 @@
 #define DEF_CHIP_REG
 
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -21,8 +20,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -1281,6 +1278,9 @@
 #define SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SHIFT 0
 #define SEND_STATIC_RATE_CONTROL_CSR_SRC_RELOAD_SMASK 0xFFFFull
 #define PCIE_CFG_REG_PL2 (PCIE + 0x000000000708)
+#define PCIE_CFG_REG_PL3 (PCIE + 0x00000000070C)
+#define PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT 27
+#define PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK 0x38000000
 #define PCIE_CFG_REG_PL102 (PCIE + 0x000000000898)
 #define PCIE_CFG_REG_PL102_GEN3_EQ_POST_CURSOR_PSET_SHIFT 12
 #define PCIE_CFG_REG_PL102_GEN3_EQ_CURSOR_PSET_SHIFT 6
@@ -1301,5 +1301,6 @@
 #define CCE_INT_BLOCKED (CCE + 0x000000110C00)
 #define SEND_DMA_IDLE_CNT (TXE + 0x000000200040)
 #define SEND_DMA_DESC_FETCHED_CNT (TXE + 0x000000200058)
+#define CCE_MSIX_PBA_OFFSET 0X0110000
 
 #endif          /* DEF_CHIP_REG */
diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/staging/rdma/hfi1/common.h
index 5dd92720faae..e9b6bb322025 100644
--- a/drivers/staging/rdma/hfi1/common.h
+++ b/drivers/staging/rdma/hfi1/common.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -341,19 +338,16 @@ struct hfi1_message_header {
 #define FULL_MGMT_P_KEY      0xFFFF
 
 #define DEFAULT_P_KEY LIM_MGMT_P_KEY
-#define HFI1_PERMISSIVE_LID 0xFFFF
 #define HFI1_AETH_CREDIT_SHIFT 24
 #define HFI1_AETH_CREDIT_MASK 0x1F
 #define HFI1_AETH_CREDIT_INVAL 0x1F
 #define HFI1_MSN_MASK 0xFFFFFF
-#define HFI1_QPN_MASK 0xFFFFFF
 #define HFI1_FECN_SHIFT 31
 #define HFI1_FECN_MASK 1
-#define HFI1_FECN_SMASK (1 << HFI1_FECN_SHIFT)
+#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
 #define HFI1_BECN_SHIFT 30
 #define HFI1_BECN_MASK 1
-#define HFI1_BECN_SMASK (1 << HFI1_BECN_SHIFT)
-#define HFI1_MULTICAST_LID_BASE 0xC000
+#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
 
 static inline __u64 rhf_to_cpu(const __le32 *rbuf)
 {
diff --git a/drivers/staging/rdma/hfi1/debugfs.c b/drivers/staging/rdma/hfi1/debugfs.c
index acd2269e9f14..dbab9d9cc288 100644
--- a/drivers/staging/rdma/hfi1/debugfs.c
+++ b/drivers/staging/rdma/hfi1/debugfs.c
@@ -1,13 +1,12 @@
 #ifdef CONFIG_DEBUG_FS
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -19,8 +18,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,6 +49,7 @@
 #include <linux/seq_file.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
+#include <linux/module.h>
 
 #include "hfi.h"
 #include "debugfs.h"
@@ -71,6 +69,7 @@ static const struct seq_operations _##name##_seq_ops = { \
 	.stop  = _##name##_seq_stop, \
 	.show  = _##name##_seq_show \
 }
+
 #define DEBUGFS_SEQ_FILE_OPEN(name) \
 static int _##name##_open(struct inode *inode, struct file *s) \
 { \
@@ -102,7 +101,6 @@ do { \
 		pr_warn("create of %s failed\n", name); \
 } while (0)
 
-
 #define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
 	DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
 
@@ -127,7 +125,6 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
 	return pos;
 }
 
-
 static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
 __releases(RCU)
 {
@@ -151,8 +148,8 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v)
 	if (!n_packets && !n_bytes)
 		return SEQ_SKIP;
 	seq_printf(s, "%02llx %llu/%llu\n", i,
-		(unsigned long long) n_packets,
-		(unsigned long long) n_bytes);
+		   (unsigned long long)n_packets,
+		   (unsigned long long)n_bytes);
 
 	return 0;
 }
@@ -247,7 +244,7 @@ __acquires(RCU)
 }
 
 static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
-				   loff_t *pos)
+				loff_t *pos)
 {
 	struct qp_iter *iter = iter_ptr;
 
@@ -308,7 +305,6 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos)
 	return pos;
 }
 
-
 static void _sdes_seq_stop(struct seq_file *s, void *v)
 __releases(RCU)
 {
@@ -341,7 +337,7 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf,
 
 	rcu_read_lock();
 	dd = private2dd(file);
-	avail = hfi1_read_cntrs(dd, *ppos, NULL, &counters);
+	avail = hfi1_read_cntrs(dd, NULL, &counters);
 	rval =  simple_read_from_buffer(buf, count, ppos, counters, avail);
 	rcu_read_unlock();
 	return rval;
@@ -358,7 +354,7 @@ static ssize_t dev_names_read(struct file *file, char __user *buf,
 
 	rcu_read_lock();
 	dd = private2dd(file);
-	avail = hfi1_read_cntrs(dd, *ppos, &names, NULL);
+	avail = hfi1_read_cntrs(dd, &names, NULL);
 	rval =  simple_read_from_buffer(buf, count, ppos, names, avail);
 	rcu_read_unlock();
 	return rval;
@@ -385,8 +381,7 @@ static ssize_t portnames_read(struct file *file, char __user *buf,
 
 	rcu_read_lock();
 	dd = private2dd(file);
-	/* port number n/a here since names are constant */
-	avail = hfi1_read_portcntrs(dd, *ppos, 0, &names, NULL);
+	avail = hfi1_read_portcntrs(dd->pport, &names, NULL);
 	rval = simple_read_from_buffer(buf, count, ppos, names, avail);
 	rcu_read_unlock();
 	return rval;
@@ -394,28 +389,150 @@ static ssize_t portnames_read(struct file *file, char __user *buf,
 
 /* read the per-port counters */
 static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf,
-				size_t count, loff_t *ppos)
+				      size_t count, loff_t *ppos)
 {
 	u64 *counters;
 	size_t avail;
-	struct hfi1_devdata *dd;
 	struct hfi1_pportdata *ppd;
 	ssize_t rval;
 
 	rcu_read_lock();
 	ppd = private2ppd(file);
-	dd = ppd->dd;
-	avail = hfi1_read_portcntrs(dd, *ppos, ppd->port - 1, NULL, &counters);
+	avail = hfi1_read_portcntrs(ppd, NULL, &counters);
 	rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
 	rcu_read_unlock();
 	return rval;
 }
 
+static void check_dyn_flag(u64 scratch0, char *p, int size, int *used,
+			   int this_hfi, int hfi, u32 flag, const char *what)
+{
+	u32 mask;
+
+	mask = flag << (hfi ? CR_DYN_SHIFT : 0);
+	if (scratch0 & mask) {
+		*used += scnprintf(p + *used, size - *used,
+				   "  0x%08x - HFI%d %s in use, %s device\n",
+				   mask, hfi, what,
+				   this_hfi == hfi ? "this" : "other");
+	}
+}
+
+static ssize_t asic_flags_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	struct hfi1_pportdata *ppd;
+	struct hfi1_devdata *dd;
+	u64 scratch0;
+	char *tmp;
+	int ret = 0;
+	int size;
+	int used;
+	int i;
+
+	rcu_read_lock();
+	ppd = private2ppd(file);
+	dd = ppd->dd;
+	size = PAGE_SIZE;
+	used = 0;
+	tmp = kmalloc(size, GFP_KERNEL);
+	if (!tmp) {
+		rcu_read_unlock();
+		return -ENOMEM;
+	}
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	used += scnprintf(tmp + used, size - used,
+			  "Resource flags: 0x%016llx\n", scratch0);
+
+	/* check permanent flag */
+	if (scratch0 & CR_THERM_INIT) {
+		used += scnprintf(tmp + used, size - used,
+				  "  0x%08x - thermal monitoring initialized\n",
+				  (u32)CR_THERM_INIT);
+	}
+
+	/* check each dynamic flag on each HFI */
+	for (i = 0; i < 2; i++) {
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_SBUS, "SBus");
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_EPROM, "EPROM");
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_I2C1, "i2c chain 1");
+		check_dyn_flag(scratch0, tmp, size, &used, dd->hfi1_id, i,
+			       CR_I2C2, "i2c chain 2");
+	}
+	used += scnprintf(tmp + used, size - used, "Write bits to clear\n");
+
+	ret = simple_read_from_buffer(buf, count, ppos, tmp, used);
+	rcu_read_unlock();
+	kfree(tmp);
+	return ret;
+}
+
+static ssize_t asic_flags_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct hfi1_pportdata *ppd;
+	struct hfi1_devdata *dd;
+	char *buff;
+	int ret;
+	unsigned long long value;
+	u64 scratch0;
+	u64 clear;
+
+	rcu_read_lock();
+	ppd = private2ppd(file);
+	dd = ppd->dd;
+
+	buff = kmalloc(count + 1, GFP_KERNEL);
+	if (!buff) {
+		ret = -ENOMEM;
+		goto do_return;
+	}
+
+	ret = copy_from_user(buff, buf, count);
+	if (ret > 0) {
+		ret = -EFAULT;
+		goto do_free;
+	}
+
+	/* zero terminate and read the expected integer */
+	buff[count] = 0;
+	ret = kstrtoull(buff, 0, &value);
+	if (ret)
+		goto do_free;
+	clear = value;
+
+	/* obtain exclusive access */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+	acquire_hw_mutex(dd);
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	scratch0 &= ~clear;
+	write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+	/* force write to be visible to other HFI on another OS */
+	(void)read_csr(dd, ASIC_CFG_SCRATCH);
+
+	release_hw_mutex(dd);
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+
+	/* return the number of bytes written */
+	ret = count;
+
+ do_free:
+	kfree(buff);
+ do_return:
+	rcu_read_unlock();
+	return ret;
+}
+
 /*
  * read the per-port QSFP data for ppd
  */
 static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
-			   size_t count, loff_t *ppos)
+				 size_t count, loff_t *ppos)
 {
 	struct hfi1_pportdata *ppd;
 	char *tmp;
@@ -439,7 +556,7 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf,
 
 /* Do an i2c write operation on the chain for the given HFI. */
 static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos, u32 target)
+				   size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -451,6 +568,16 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
 	rcu_read_lock();
 	ppd = private2ppd(file);
 
+	/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
+	i2c_addr = (*ppos >> 16) & 0xffff;
+	offset = *ppos & 0xffff;
+
+	/* explicitly reject invalid address 0 to catch cp and cat */
+	if (i2c_addr == 0) {
+		ret = -EINVAL;
+		goto _return;
+	}
+
 	buff = kmalloc(count, GFP_KERNEL);
 	if (!buff) {
 		ret = -ENOMEM;
@@ -463,9 +590,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
 		goto _free;
 	}
 
-	i2c_addr = (*ppos >> 16) & 0xff;
-	offset = *ppos & 0xffff;
-
 	total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count);
 	if (total_written < 0) {
 		ret = total_written;
@@ -485,21 +609,21 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
 
 /* Do an i2c write operation on chain for HFI 0. */
 static ssize_t i2c1_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_write(file, buf, count, ppos, 0);
 }
 
 /* Do an i2c write operation on chain for HFI 1. */
 static ssize_t i2c2_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_write(file, buf, count, ppos, 1);
 }
 
 /* Do an i2c read operation on the chain for the given HFI. */
 static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos, u32 target)
+				  size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -511,15 +635,22 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
 	rcu_read_lock();
 	ppd = private2ppd(file);
 
+	/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
+	i2c_addr = (*ppos >> 16) & 0xffff;
+	offset = *ppos & 0xffff;
+
+	/* explicitly reject invalid address 0 to catch cp and cat */
+	if (i2c_addr == 0) {
+		ret = -EINVAL;
+		goto _return;
+	}
+
 	buff = kmalloc(count, GFP_KERNEL);
 	if (!buff) {
 		ret = -ENOMEM;
 		goto _return;
 	}
 
-	i2c_addr = (*ppos >> 16) & 0xff;
-	offset = *ppos & 0xffff;
-
 	total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count);
 	if (total_read < 0) {
 		ret = total_read;
@@ -545,21 +676,21 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf,
 
 /* Do an i2c read operation on chain for HFI 0. */
 static ssize_t i2c1_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				 size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_read(file, buf, count, ppos, 0);
 }
 
 /* Do an i2c read operation on chain for HFI 1. */
 static ssize_t i2c2_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				 size_t count, loff_t *ppos)
 {
 	return __i2c_debugfs_read(file, buf, count, ppos, 1);
 }
 
 /* Do a QSFP write operation on the i2c chain for the given HFI. */
 static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos, u32 target)
+				    size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -605,21 +736,21 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
 
 /* Do a QSFP write operation on i2c chain for HFI 0. */
 static ssize_t qsfp1_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				   size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_write(file, buf, count, ppos, 0);
 }
 
 /* Do a QSFP write operation on i2c chain for HFI 1. */
 static ssize_t qsfp2_debugfs_write(struct file *file, const char __user *buf,
-			   size_t count, loff_t *ppos)
+				   size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_write(file, buf, count, ppos, 1);
 }
 
 /* Do a QSFP read operation on the i2c chain for the given HFI. */
 static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos, u32 target)
+				   size_t count, loff_t *ppos, u32 target)
 {
 	struct hfi1_pportdata *ppd;
 	char *buff;
@@ -665,18 +796,116 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf,
 
 /* Do a QSFP read operation on i2c chain for HFI 0. */
 static ssize_t qsfp1_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_read(file, buf, count, ppos, 0);
 }
 
 /* Do a QSFP read operation on i2c chain for HFI 1. */
 static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf,
-			size_t count, loff_t *ppos)
+				  size_t count, loff_t *ppos)
 {
 	return __qsfp_debugfs_read(file, buf, count, ppos, 1);
 }
 
+static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+	int ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	ppd = private2ppd(fp);
+
+	ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
+	if (ret) /* failed - release the module */
+		module_put(THIS_MODULE);
+
+	return ret;
+}
+
+static int i2c1_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_open(in, fp, 0);
+}
+
+static int i2c2_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_open(in, fp, 1);
+}
+
+static int __i2c_debugfs_release(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+
+	ppd = private2ppd(fp);
+
+	release_chip_resource(ppd->dd, i2c_target(target));
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static int i2c1_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_release(in, fp, 0);
+}
+
+static int i2c2_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __i2c_debugfs_release(in, fp, 1);
+}
+
+static int __qsfp_debugfs_open(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+	int ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	ppd = private2ppd(fp);
+
+	ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
+	if (ret) /* failed - release the module */
+		module_put(THIS_MODULE);
+
+	return ret;
+}
+
+static int qsfp1_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_open(in, fp, 0);
+}
+
+static int qsfp2_debugfs_open(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_open(in, fp, 1);
+}
+
+static int __qsfp_debugfs_release(struct inode *in, struct file *fp, u32 target)
+{
+	struct hfi1_pportdata *ppd;
+
+	ppd = private2ppd(fp);
+
+	release_chip_resource(ppd->dd, i2c_target(target));
+	module_put(THIS_MODULE);
+
+	return 0;
+}
+
+static int qsfp1_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_release(in, fp, 0);
+}
+
+static int qsfp2_debugfs_release(struct inode *in, struct file *fp)
+{
+	return __qsfp_debugfs_release(in, fp, 1);
+}
+
 #define DEBUGFS_OPS(nm, readroutine, writeroutine)	\
 { \
 	.name = nm, \
@@ -687,6 +916,18 @@ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf,
 	}, \
 }
 
+#define DEBUGFS_XOPS(nm, readf, writef, openf, releasef) \
+{ \
+	.name = nm, \
+	.ops = { \
+		.read = readf, \
+		.write = writef, \
+		.llseek = generic_file_llseek, \
+		.open = openf, \
+		.release = releasef \
+	}, \
+}
+
 static const struct counter_info cntr_ops[] = {
 	DEBUGFS_OPS("counter_names", dev_names_read, NULL),
 	DEBUGFS_OPS("counters", dev_counters_read, NULL),
@@ -695,11 +936,16 @@ static const struct counter_info cntr_ops[] = {
 
 static const struct counter_info port_cntr_ops[] = {
 	DEBUGFS_OPS("port%dcounters", portcntrs_debugfs_read, NULL),
-	DEBUGFS_OPS("i2c1", i2c1_debugfs_read, i2c1_debugfs_write),
-	DEBUGFS_OPS("i2c2", i2c2_debugfs_read, i2c2_debugfs_write),
+	DEBUGFS_XOPS("i2c1", i2c1_debugfs_read, i2c1_debugfs_write,
+		     i2c1_debugfs_open, i2c1_debugfs_release),
+	DEBUGFS_XOPS("i2c2", i2c2_debugfs_read, i2c2_debugfs_write,
+		     i2c2_debugfs_open, i2c2_debugfs_release),
 	DEBUGFS_OPS("qsfp_dump%d", qsfp_debugfs_dump, NULL),
-	DEBUGFS_OPS("qsfp1", qsfp1_debugfs_read, qsfp1_debugfs_write),
-	DEBUGFS_OPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write),
+	DEBUGFS_XOPS("qsfp1", qsfp1_debugfs_read, qsfp1_debugfs_write,
+		     qsfp1_debugfs_open, qsfp1_debugfs_release),
+	DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write,
+		     qsfp2_debugfs_open, qsfp2_debugfs_release),
+	DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
 };
 
 void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
@@ -747,8 +993,8 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
 					    ibd->hfi1_ibdev_dbg,
 					    ppd,
 					    &port_cntr_ops[i].ops,
-					    port_cntr_ops[i].ops.write == NULL ?
-					    S_IRUGO : S_IRUGO|S_IWUSR);
+					    !port_cntr_ops[i].ops.write ?
+					    S_IRUGO : S_IRUGO | S_IWUSR);
 		}
 }
 
diff --git a/drivers/staging/rdma/hfi1/debugfs.h b/drivers/staging/rdma/hfi1/debugfs.h
index 92d6fe146714..b6fb6814f1b8 100644
--- a/drivers/staging/rdma/hfi1/debugfs.h
+++ b/drivers/staging/rdma/hfi1/debugfs.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_DEBUGFS_H
 #define _HFI1_DEBUGFS_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/staging/rdma/hfi1/device.c
index 58472e5ac4e5..c05c39da83b1 100644
--- a/drivers/staging/rdma/hfi1/device.c
+++ b/drivers/staging/rdma/hfi1/device.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/staging/rdma/hfi1/device.h
index 2850ff739d81..5bb3e83cf2da 100644
--- a/drivers/staging/rdma/hfi1/device.h
+++ b/drivers/staging/rdma/hfi1/device.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_DEVICE_H
 #define _HFI1_DEVICE_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c
index e41159fe6889..c5b520bf610e 100644
--- a/drivers/staging/rdma/hfi1/diag.c
+++ b/drivers/staging/rdma/hfi1/diag.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -70,6 +67,7 @@
 #include "hfi.h"
 #include "device.h"
 #include "common.h"
+#include "verbs_txreq.h"
 #include "trace.h"
 
 #undef pr_fmt
@@ -80,15 +78,15 @@
 /* Snoop option mask */
 #define SNOOP_DROP_SEND		BIT(0)
 #define SNOOP_USE_METADATA	BIT(1)
+#define SNOOP_SET_VL0TOVL15     BIT(2)
 
 static u8 snoop_flags;
 
 /*
  * Extract packet length from LRH header.
- * Why & 0x7FF? Because len is only 11 bits in case it wasn't 0'd we throw the
- * bogus bits away. This is in Dwords so multiply by 4 to get size in bytes
+ * This is in Dwords so multiply by 4 to get size in bytes
  */
-#define HFI1_GET_PKT_LEN(x)      (((be16_to_cpu((x)->lrh[2]) & 0x7FF)) << 2)
+#define HFI1_GET_PKT_LEN(x)      (((be16_to_cpu((x)->lrh[2]) & 0xFFF)) << 2)
 
 enum hfi1_filter_status {
 	HFI1_FILTER_HIT,
@@ -860,7 +858,7 @@ static ssize_t hfi1_snoop_write(struct file *fp, const char __user *data,
 			vl = sc4;
 		} else {
 			sl = (byte_two >> 4) & 0xf;
-			ibp = to_iport(&dd->verbs_dev.ibdev, 1);
+			ibp = to_iport(&dd->verbs_dev.rdi.ibdev, 1);
 			sc5 = ibp->sl_to_sc[sl];
 			vl = sc_to_vlt(dd, sc5);
 			if (vl != sc4) {
@@ -966,6 +964,65 @@ static ssize_t hfi1_snoop_read(struct file *fp, char __user *data,
 	return ret;
 }
 
+/**
+ * hfi1_assign_snoop_link_credits -- Set up credits for VL15 and others
+ * @ppd : ptr to hfi1 port data
+ * @value : options from user space
+ *
+ * Assumes the rest of the CM credit registers are zero from a
+ * previous global or credit reset.
+ * Leave shared count at zero for both global and all vls.
+ * In snoop mode ideally we don't use shared credits
+ * Reserve 8.5k for VL15
+ * If total credits less than 8.5kbytes return error.
+ * Divide the rest of the credits across VL0 to VL7 and if
+ * each of these levels has less than 34 credits (at least 2048 + 128 bytes)
+ * return with an error.
+ * The credit registers will be reset to zero on link negotiation or link up
+ * so this function should be activated from user space only if the port has
+ * gone past link negotiation and link up.
+ *
+ * Return -- 0 if successful else error condition
+ *
+ */
+static long hfi1_assign_snoop_link_credits(struct hfi1_pportdata *ppd,
+					   int value)
+{
+#define  OPA_MIN_PER_VL_CREDITS  34  /* 2048 + 128 bytes */
+	struct buffer_control t;
+	int i;
+	struct hfi1_devdata *dd = ppd->dd;
+	u16  total_credits = (value >> 16) & 0xffff;
+	u16  vl15_credits = dd->vl15_init / 2;
+	u16  per_vl_credits;
+	__be16 be_per_vl_credits;
+
+	if (!(ppd->host_link_state & HLS_UP))
+		goto err_exit;
+	if (total_credits  <  vl15_credits)
+		goto err_exit;
+
+	per_vl_credits = (total_credits - vl15_credits) / TXE_NUM_DATA_VL;
+
+	if (per_vl_credits < OPA_MIN_PER_VL_CREDITS)
+		goto err_exit;
+
+	memset(&t, 0, sizeof(t));
+	be_per_vl_credits = cpu_to_be16(per_vl_credits);
+
+	for (i = 0; i < TXE_NUM_DATA_VL; i++)
+		t.vl[i].dedicated = be_per_vl_credits;
+
+	t.vl[15].dedicated  = cpu_to_be16(vl15_credits);
+	return set_buffer_control(ppd, &t);
+
+err_exit:
+	snoop_dbg("port_state = 0x%x, total_credits = %d, vl15_credits = %d",
+		  ppd->host_link_state, total_credits, vl15_credits);
+
+	return -EINVAL;
+}
+
 static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
 {
 	struct hfi1_devdata *dd;
@@ -1192,6 +1249,10 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
 			snoop_flags |= SNOOP_DROP_SEND;
 		if (value & SNOOP_USE_METADATA)
 			snoop_flags |= SNOOP_USE_METADATA;
+		if (value & (SNOOP_SET_VL0TOVL15)) {
+			ppd = &dd->pport[0];  /* first port will do */
+			ret = hfi1_assign_snoop_link_credits(ppd, value);
+		}
 		break;
 	default:
 		return -ENOTTY;
@@ -1603,7 +1664,7 @@ int snoop_recv_handler(struct hfi1_packet *packet)
 /*
  * Handle snooping and capturing packets when sdma is being used.
  */
-int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc)
 {
 	pr_alert("Snooping/Capture of Send DMA Packets Is Not Supported!\n");
@@ -1616,20 +1677,19 @@ int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
  * bypass packets. The only way to send a bypass packet currently is to use the
  * diagpkt interface. When that interface is enable snoop/capture is not.
  */
-int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc)
 {
-	struct ahg_ib_header *ahdr = qp->s_hdr;
 	u32 hdrwords = qp->s_hdrwords;
-	struct hfi1_sge_state *ss = qp->s_cur_sge;
+	struct rvt_sge_state *ss = qp->s_cur_sge;
 	u32 len = qp->s_cur_size;
 	u32 dwords = (len + 3) >> 2;
 	u32 plen = hdrwords + dwords + 2; /* includes pbc */
 	struct hfi1_pportdata *ppd = ps->ppd;
 	struct snoop_packet *s_packet = NULL;
-	u32 *hdr = (u32 *)&ahdr->ibh;
+	u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
 	u32 length = 0;
-	struct hfi1_sge_state temp_ss;
+	struct rvt_sge_state temp_ss;
 	void *data = NULL;
 	void *data_start = NULL;
 	int ret;
@@ -1638,7 +1698,7 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 	struct capture_md md;
 	u32 vl;
 	u32 hdr_len = hdrwords << 2;
-	u32 tlen = HFI1_GET_PKT_LEN(&ahdr->ibh);
+	u32 tlen = HFI1_GET_PKT_LEN(&ps->s_txreq->phdr.hdr);
 
 	md.u.pbc = 0;
 
@@ -1665,7 +1725,7 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 		md.port = 1;
 		md.dir = PKT_DIR_EGRESS;
 		if (likely(pbc == 0)) {
-			vl = be16_to_cpu(ahdr->ibh.lrh[0]) >> 12;
+			vl = be16_to_cpu(ps->s_txreq->phdr.hdr.lrh[0]) >> 12;
 			md.u.pbc = create_pbc(ppd, 0, qp->s_srate, vl, plen);
 		} else {
 			md.u.pbc = 0;
@@ -1727,7 +1787,7 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 		ret = HFI1_FILTER_HIT;
 	} else {
 		ret = ppd->dd->hfi1_snoop.filter_callback(
-					&ahdr->ibh,
+					&ps->s_txreq->phdr.hdr,
 					NULL,
 					ppd->dd->hfi1_snoop.filter_value);
 	}
@@ -1759,9 +1819,16 @@ int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 				spin_unlock_irqrestore(&qp->s_lock, flags);
 			} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 				spin_lock_irqsave(&qp->s_lock, flags);
-				hfi1_rc_send_complete(qp, &ahdr->ibh);
+				hfi1_rc_send_complete(qp,
+						      &ps->s_txreq->phdr.hdr);
 				spin_unlock_irqrestore(&qp->s_lock, flags);
 			}
+
+			/*
+			 * If snoop is dropping the packet we need to put the
+			 * txreq back because no one else will.
+			 */
+			hfi1_put_txreq(ps->s_txreq);
 			return 0;
 		}
 		break;
diff --git a/drivers/staging/rdma/hfi1/dma.c b/drivers/staging/rdma/hfi1/dma.c
index e03bd735173c..7e8dab892848 100644
--- a/drivers/staging/rdma/hfi1/dma.c
+++ b/drivers/staging/rdma/hfi1/dma.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,7 +49,7 @@
 
 #include "verbs.h"
 
-#define BAD_DMA_ADDRESS ((u64) 0)
+#define BAD_DMA_ADDRESS ((u64)0)
 
 /*
  * The following functions implement driver specific replacements
@@ -74,7 +71,7 @@ static u64 hfi1_dma_map_single(struct ib_device *dev, void *cpu_addr,
 	if (WARN_ON(!valid_dma_direction(direction)))
 		return BAD_DMA_ADDRESS;
 
-	return (u64) cpu_addr;
+	return (u64)cpu_addr;
 }
 
 static void hfi1_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
@@ -95,7 +92,7 @@ static u64 hfi1_dma_map_page(struct ib_device *dev, struct page *page,
 	if (offset + size > PAGE_SIZE)
 		return BAD_DMA_ADDRESS;
 
-	addr = (u64) page_address(page);
+	addr = (u64)page_address(page);
 	if (addr)
 		addr += offset;
 
@@ -120,7 +117,7 @@ static int hfi1_map_sg(struct ib_device *dev, struct scatterlist *sgl,
 		return BAD_DMA_ADDRESS;
 
 	for_each_sg(sgl, sg, nents, i) {
-		addr = (u64) page_address(sg_page(sg));
+		addr = (u64)page_address(sg_page(sg));
 		if (!addr) {
 			ret = 0;
 			break;
@@ -161,14 +158,14 @@ static void *hfi1_dma_alloc_coherent(struct ib_device *dev, size_t size,
 	if (p)
 		addr = page_address(p);
 	if (dma_handle)
-		*dma_handle = (u64) addr;
+		*dma_handle = (u64)addr;
 	return addr;
 }
 
 static void hfi1_dma_free_coherent(struct ib_device *dev, size_t size,
 				   void *cpu_addr, u64 dma_handle)
 {
-	free_pages((unsigned long) cpu_addr, get_order(size));
+	free_pages((unsigned long)cpu_addr, get_order(size));
 }
 
 struct ib_dma_mapping_ops hfi1_dma_mapping_ops = {
diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c
index ee50bbf64d39..34511e5df1d5 100644
--- a/drivers/staging/rdma/hfi1/driver.c
+++ b/drivers/staging/rdma/hfi1/driver.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -56,6 +53,7 @@
 #include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/prefetch.h>
+#include <rdma/ib_verbs.h>
 
 #include "hfi.h"
 #include "trace.h"
@@ -162,6 +160,22 @@ const char *get_unit_name(int unit)
 	return iname;
 }
 
+const char *get_card_name(struct rvt_dev_info *rdi)
+{
+	struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+	struct hfi1_devdata *dd = container_of(ibdev,
+					       struct hfi1_devdata, verbs_dev);
+	return get_unit_name(dd->unit);
+}
+
+struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
+{
+	struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
+	struct hfi1_devdata *dd = container_of(ibdev,
+					       struct hfi1_devdata, verbs_dev);
+	return dd->pcidev;
+}
+
 /*
  * Return count of units with at least one port ACTIVE.
  */
@@ -265,6 +279,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 	u32 rte = rhf_rcv_type_err(packet->rhf);
 	int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
 	struct hfi1_ibport *ibp = &ppd->ibport_data;
+	struct hfi1_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 
 	if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
 		return;
@@ -283,9 +299,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 			goto drop;
 
 		/* Check for GRH */
-		if (lnh == HFI1_LRH_BTH)
+		if (lnh == HFI1_LRH_BTH) {
 			ohdr = &hdr->u.oth;
-		else if (lnh == HFI1_LRH_GRH) {
+		} else if (lnh == HFI1_LRH_GRH) {
 			u32 vtf;
 
 			ohdr = &hdr->u.l.oth;
@@ -295,17 +311,17 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 			if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 				goto drop;
 			rcv_flags |= HFI1_HAS_GRH;
-		} else
+		} else {
 			goto drop;
-
+		}
 		/* Get the destination QP number. */
-		qp_num = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
-		if (lid < HFI1_MULTICAST_LID_BASE) {
-			struct hfi1_qp *qp;
+		qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+		if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+			struct rvt_qp *qp;
 			unsigned long flags;
 
 			rcu_read_lock();
-			qp = hfi1_lookup_qpn(ibp, qp_num);
+			qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 			if (!qp) {
 				rcu_read_unlock();
 				goto drop;
@@ -318,9 +334,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 			spin_lock_irqsave(&qp->r_lock, flags);
 
 			/* Check for valid receive state. */
-			if (!(ib_hfi1_state_ops[qp->state] &
-			      HFI1_PROCESS_RECV_OK)) {
-				ibp->n_pkt_drops++;
+			if (!(ib_rvt_state_ops[qp->state] &
+			      RVT_PROCESS_RECV_OK)) {
+				ibp->rvp.n_pkt_drops++;
 			}
 
 			switch (qp->ibqp.qp_type) {
@@ -352,7 +368,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 		if (rhf_use_egr_bfr(packet->rhf))
 			ebuf = packet->ebuf;
 
-		if (ebuf == NULL)
+		if (!ebuf)
 			goto drop; /* this should never happen */
 
 		if (lnh == HFI1_LRH_BTH)
@@ -370,7 +386,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 			 * Only in pre-B0 h/w is the CNP_OPCODE handled
 			 * via this code path.
 			 */
-			struct hfi1_qp *qp = NULL;
+			struct rvt_qp *qp = NULL;
 			u32 lqpn, rqpn;
 			u16 rlid;
 			u8 svc_type, sl, sc5;
@@ -380,10 +396,10 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 				sc5 |= 0x10;
 			sl = ibp->sc_to_sl[sc5];
 
-			lqpn = be32_to_cpu(bth[1]) & HFI1_QPN_MASK;
+			lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
 			rcu_read_lock();
-			qp = hfi1_lookup_qpn(ibp, lqpn);
-			if (qp == NULL) {
+			qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
+			if (!qp) {
 				rcu_read_unlock();
 				goto drop;
 			}
@@ -419,9 +435,8 @@ drop:
 }
 
 static inline void init_packet(struct hfi1_ctxtdata *rcd,
-			      struct hfi1_packet *packet)
+			       struct hfi1_packet *packet)
 {
-
 	packet->rsize = rcd->rcvhdrqentsize; /* words */
 	packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
 	packet->rcd = rcd;
@@ -434,12 +449,7 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd,
 	packet->rcv_flags = 0;
 }
 
-#ifndef CONFIG_PRESCAN_RXQ
-static void prescan_rxq(struct hfi1_packet *packet) {}
-#else /* !CONFIG_PRESCAN_RXQ */
-static int prescan_receive_queue;
-
-static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr,
+static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr,
 			struct hfi1_other_headers *ohdr,
 			u64 rhf, u32 bth1, struct ib_grh *grh)
 {
@@ -453,7 +463,7 @@ static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr,
 	case IB_QPT_GSI:
 	case IB_QPT_UD:
 		rlid = be16_to_cpu(hdr->lrh[3]);
-		rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK;
+		rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
 		svc_type = IB_CC_SVCTYPE_UD;
 		break;
 	case IB_QPT_UC:
@@ -483,7 +493,7 @@ static void process_ecn(struct hfi1_qp *qp, struct hfi1_ib_header *hdr,
 
 	if (bth1 & HFI1_BECN_SMASK) {
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-		u32 lqpn = bth1 & HFI1_QPN_MASK;
+		u32 lqpn = bth1 & RVT_QPN_MASK;
 		u8 sl = ibp->sc_to_sl[sc5];
 
 		process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
@@ -562,26 +572,31 @@ static inline void update_ps_mdata(struct ps_mdata *mdata,
  * containing Excplicit Congestion Notifications (FECNs, or BECNs).
  * When an ECN is found, process the Congestion Notification, and toggle
  * it off.
+ * This is declared as a macro to allow quick checking of the port to avoid
+ * the overhead of a function call if not enabled.
  */
-static void prescan_rxq(struct hfi1_packet *packet)
+#define prescan_rxq(rcd, packet) \
+	do { \
+		if (rcd->ppd->cc_prescan) \
+			__prescan_rxq(packet); \
+	} while (0)
+static void __prescan_rxq(struct hfi1_packet *packet)
 {
 	struct hfi1_ctxtdata *rcd = packet->rcd;
 	struct ps_mdata mdata;
 
-	if (!prescan_receive_queue)
-		return;
-
 	init_ps_mdata(&mdata, packet);
 
 	while (1) {
 		struct hfi1_devdata *dd = rcd->dd;
 		struct hfi1_ibport *ibp = &rcd->ppd->ibport_data;
-		__le32 *rhf_addr = (__le32 *) rcd->rcvhdrq + mdata.ps_head +
+		__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
 					 dd->rhf_offset;
-		struct hfi1_qp *qp;
+		struct rvt_qp *qp;
 		struct hfi1_ib_header *hdr;
 		struct hfi1_other_headers *ohdr;
 		struct ib_grh *grh = NULL;
+		struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 		u64 rhf = rhf_to_cpu(rhf_addr);
 		u32 etype = rhf_rcv_type(rhf), qpn, bth1;
 		int is_ecn = 0;
@@ -600,25 +615,25 @@ static void prescan_rxq(struct hfi1_packet *packet)
 			hfi1_get_msgheader(dd, rhf_addr);
 		lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 
-		if (lnh == HFI1_LRH_BTH)
+		if (lnh == HFI1_LRH_BTH) {
 			ohdr = &hdr->u.oth;
-		else if (lnh == HFI1_LRH_GRH) {
+		} else if (lnh == HFI1_LRH_GRH) {
 			ohdr = &hdr->u.l.oth;
 			grh = &hdr->u.l.grh;
-		} else
+		} else {
 			goto next; /* just in case */
-
+		}
 		bth1 = be32_to_cpu(ohdr->bth[1]);
 		is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
 
 		if (!is_ecn)
 			goto next;
 
-		qpn = bth1 & HFI1_QPN_MASK;
+		qpn = bth1 & RVT_QPN_MASK;
 		rcu_read_lock();
-		qp = hfi1_lookup_qpn(ibp, qpn);
+		qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
 
-		if (qp == NULL) {
+		if (!qp) {
 			rcu_read_unlock();
 			goto next;
 		}
@@ -633,7 +648,6 @@ next:
 		update_ps_mdata(&mdata, rcd);
 	}
 }
-#endif /* CONFIG_PRESCAN_RXQ */
 
 static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
 {
@@ -683,8 +697,9 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 		 * The +2 is the size of the RHF.
 		 */
 		prefetch_range(packet->ebuf,
-			packet->tlen - ((packet->rcd->rcvhdrqentsize -
-				  (rhf_hdrq_offset(packet->rhf)+2)) * 4));
+			       packet->tlen - ((packet->rcd->rcvhdrqentsize -
+					       (rhf_hdrq_offset(packet->rhf)
+						+ 2)) * 4));
 	}
 
 	/*
@@ -712,7 +727,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 		}
 	}
 
-	packet->rhf_addr = (__le32 *) packet->rcd->rcvhdrq + packet->rhqoff +
+	packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
 				      packet->rcd->dd->rhf_offset;
 	packet->rhf = rhf_to_cpu(packet->rhf_addr);
 
@@ -737,7 +752,6 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet)
 
 static inline void finish_packet(struct hfi1_packet *packet)
 {
-
 	/*
 	 * Nothing we need to free for the packet.
 	 *
@@ -746,14 +760,12 @@ static inline void finish_packet(struct hfi1_packet *packet)
 	 */
 	update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
 		       packet->etail, rcv_intr_dynamic, packet->numpkt);
-
 }
 
 static inline void process_rcv_qp_work(struct hfi1_packet *packet)
 {
-
 	struct hfi1_ctxtdata *rcd;
-	struct hfi1_qp *qp, *nqp;
+	struct rvt_qp *qp, *nqp;
 
 	rcd = packet->rcd;
 	rcd->head = packet->rhqoff;
@@ -764,17 +776,17 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet)
 	 */
 	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
 		list_del_init(&qp->rspwait);
-		if (qp->r_flags & HFI1_R_RSP_DEFERED_ACK) {
-			qp->r_flags &= ~HFI1_R_RSP_DEFERED_ACK;
+		if (qp->r_flags & RVT_R_RSP_NAK) {
+			qp->r_flags &= ~RVT_R_RSP_NAK;
 			hfi1_send_rc_ack(rcd, qp, 0);
 		}
-		if (qp->r_flags & HFI1_R_RSP_SEND) {
+		if (qp->r_flags & RVT_R_RSP_SEND) {
 			unsigned long flags;
 
-			qp->r_flags &= ~HFI1_R_RSP_SEND;
+			qp->r_flags &= ~RVT_R_RSP_SEND;
 			spin_lock_irqsave(&qp->s_lock, flags);
-			if (ib_hfi1_state_ops[qp->state] &
-					HFI1_PROCESS_OR_FLUSH_SEND)
+			if (ib_rvt_state_ops[qp->state] &
+					RVT_PROCESS_OR_FLUSH_SEND)
 				hfi1_schedule_send(qp);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 		}
@@ -799,7 +811,7 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 		goto bail;
 	}
 
-	prescan_rxq(&packet);
+	prescan_rxq(rcd, &packet);
 
 	while (last == RCV_PKT_OK) {
 		last = process_rcv_packet(&packet, thread);
@@ -830,7 +842,7 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
 	}
 	smp_rmb();  /* prevent speculative reads of dma'ed hdrq */
 
-	prescan_rxq(&packet);
+	prescan_rxq(rcd, &packet);
 
 	while (last == RCV_PKT_OK) {
 		last = process_rcv_packet(&packet, thread);
@@ -862,6 +874,37 @@ static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
 			&handle_receive_interrupt_dma_rtail;
 }
 
+void set_all_slowpath(struct hfi1_devdata *dd)
+{
+	int i;
+
+	/* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
+	for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
+		dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
+}
+
+static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
+				      struct hfi1_packet packet,
+				      struct hfi1_devdata *dd)
+{
+	struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
+	struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd,
+							     packet.rhf_addr);
+
+	if (hdr2sc(hdr, packet.rhf) != 0xf) {
+		int hwstate = read_logical_state(dd);
+
+		if (hwstate != LSTATE_ACTIVE) {
+			dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
+			return 0;
+		}
+
+		queue_work(rcd->ppd->hfi1_wq, lsaw);
+		return 1;
+	}
+	return 0;
+}
+
 /*
  * handle_receive_interrupt - receive a packet
  * @rcd: the context
@@ -910,17 +953,17 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 		}
 	}
 
-	prescan_rxq(&packet);
+	prescan_rxq(rcd, &packet);
 
 	while (last == RCV_PKT_OK) {
-
-		if (unlikely(dd->do_drop && atomic_xchg(&dd->drop_packet,
-			DROP_PACKET_OFF) == DROP_PACKET_ON)) {
+		if (unlikely(dd->do_drop &&
+			     atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
+			     DROP_PACKET_ON)) {
 			dd->do_drop = 0;
 
 			/* On to the next packet */
 			packet.rhqoff += packet.rsize;
-			packet.rhf_addr = (__le32 *) rcd->rcvhdrq +
+			packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
 					  packet.rhqoff +
 					  dd->rhf_offset;
 			packet.rhf = rhf_to_cpu(packet.rhf_addr);
@@ -929,6 +972,11 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 			last = skip_rcv_packet(&packet, thread);
 			skip_pkt = 0;
 		} else {
+			/* Auto activate link on non-SC15 packet receive */
+			if (unlikely(rcd->ppd->host_link_state ==
+				     HLS_UP_ARMED) &&
+			    set_armed_to_active(rcd, packet, dd))
+				goto bail;
 			last = process_rcv_packet(&packet, thread);
 		}
 
@@ -940,8 +988,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 			if (seq != rcd->seq_cnt)
 				last = RCV_PKT_DONE;
 			if (needset) {
-				dd_dev_info(dd,
-					"Switching to NO_DMA_RTAIL\n");
+				dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
 				set_all_nodma_rtail(dd);
 				needset = 0;
 			}
@@ -984,6 +1031,42 @@ bail:
 }
 
 /*
+ * We may discover in the interrupt that the hardware link state has
+ * changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
+ * and we need to update the driver's notion of the link state.  We cannot
+ * run set_link_state from interrupt context, so we queue this function on
+ * a workqueue.
+ *
+ * We delay the regular interrupt processing until after the state changes
+ * so that the link will be in the correct state by the time any application
+ * we wake up attempts to send a reply to any message it received.
+ * (Subsequent receive interrupts may possibly force the wakeup before we
+ * update the link state.)
+ *
+ * The rcd is freed in hfi1_free_ctxtdata after hfi1_postinit_cleanup invokes
+ * dd->f_cleanup(dd) to disable the interrupt handler and flush workqueues,
+ * so we're safe from use-after-free of the rcd.
+ */
+void receive_interrupt_work(struct work_struct *work)
+{
+	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+						  linkstate_active_work);
+	struct hfi1_devdata *dd = ppd->dd;
+	int i;
+
+	/* Received non-SC15 packet implies neighbor_normal */
+	ppd->neighbor_normal = 1;
+	set_link_state(ppd, HLS_UP_ACTIVE);
+
+	/*
+	 * Interrupt all kernel contexts that could have had an
+	 * interrupt during auto activation.
+	 */
+	for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
+		force_recv_intr(dd->rcd[i]);
+}
+
+/*
  * Convert a given MTU size to the on-wire MAD packet enumeration.
  * Return -1 if the size is invalid.
  */
@@ -1037,9 +1120,9 @@ int set_mtu(struct hfi1_pportdata *ppd)
 	ppd->ibmaxlen = ppd->ibmtu + lrh_max_header_bytes(ppd->dd);
 
 	mutex_lock(&ppd->hls_lock);
-	if (ppd->host_link_state == HLS_UP_INIT
-			|| ppd->host_link_state == HLS_UP_ARMED
-			|| ppd->host_link_state == HLS_UP_ACTIVE)
+	if (ppd->host_link_state == HLS_UP_INIT ||
+	    ppd->host_link_state == HLS_UP_ARMED ||
+	    ppd->host_link_state == HLS_UP_ACTIVE)
 		is_up = 1;
 
 	drain = !is_ax(dd) && is_up;
@@ -1082,79 +1165,80 @@ int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc)
 	return 0;
 }
 
-/*
- * Following deal with the "obviously simple" task of overriding the state
- * of the LEDs, which normally indicate link physical and logical status.
- * The complications arise in dealing with different hardware mappings
- * and the board-dependent routine being called from interrupts.
- * and then there's the requirement to _flash_ them.
- */
-#define LED_OVER_FREQ_SHIFT 8
-#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
-/* Below is "non-zero" to force override, but both actual LEDs are off */
-#define LED_OVER_BOTH_OFF (8)
+void shutdown_led_override(struct hfi1_pportdata *ppd)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+
+	/*
+	 * This pairs with the memory barrier in hfi1_start_led_override to
+	 * ensure that we read the correct state of LED beaconing represented
+	 * by led_override_timer_active
+	 */
+	smp_rmb();
+	if (atomic_read(&ppd->led_override_timer_active)) {
+		del_timer_sync(&ppd->led_override_timer);
+		atomic_set(&ppd->led_override_timer_active, 0);
+		/* Ensure the atomic_set is visible to all CPUs */
+		smp_wmb();
+	}
+
+	/* Hand control of the LED to the DC for normal operation */
+	write_csr(dd, DCC_CFG_LED_CNTRL, 0);
+}
 
 static void run_led_override(unsigned long opaque)
 {
 	struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque;
 	struct hfi1_devdata *dd = ppd->dd;
-	int timeoff;
-	int ph_idx;
+	unsigned long timeout;
+	int phase_idx;
 
 	if (!(dd->flags & HFI1_INITTED))
 		return;
 
-	ph_idx = ppd->led_override_phase++ & 1;
-	ppd->led_override = ppd->led_override_vals[ph_idx];
-	timeoff = ppd->led_override_timeoff;
+	phase_idx = ppd->led_override_phase & 1;
 
-	/*
-	 * don't re-fire the timer if user asked for it to be off; we let
-	 * it fire one more time after they turn it off to simplify
-	 */
-	if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
-		mod_timer(&ppd->led_override_timer, jiffies + timeoff);
+	setextled(dd, phase_idx);
+
+	timeout = ppd->led_override_vals[phase_idx];
+
+	/* Set up for next phase */
+	ppd->led_override_phase = !ppd->led_override_phase;
+
+	mod_timer(&ppd->led_override_timer, jiffies + timeout);
 }
 
-void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val)
+/*
+ * To have the LED blink in a particular pattern, provide timeon and timeoff
+ * in milliseconds.
+ * To turn off custom blinking and return to normal operation, use
+ * shutdown_led_override()
+ */
+void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
+			     unsigned int timeoff)
 {
-	struct hfi1_devdata *dd = ppd->dd;
-	int timeoff, freq;
-
-	if (!(dd->flags & HFI1_INITTED))
+	if (!(ppd->dd->flags & HFI1_INITTED))
 		return;
 
-	/* First check if we are blinking. If not, use 1HZ polling */
-	timeoff = HZ;
-	freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
+	/* Convert to jiffies for direct use in timer */
+	ppd->led_override_vals[0] = msecs_to_jiffies(timeoff);
+	ppd->led_override_vals[1] = msecs_to_jiffies(timeon);
 
-	if (freq) {
-		/* For blink, set each phase from one nybble of val */
-		ppd->led_override_vals[0] = val & 0xF;
-		ppd->led_override_vals[1] = (val >> 4) & 0xF;
-		timeoff = (HZ << 4)/freq;
-	} else {
-		/* Non-blink set both phases the same. */
-		ppd->led_override_vals[0] = val & 0xF;
-		ppd->led_override_vals[1] = val & 0xF;
-	}
-	ppd->led_override_timeoff = timeoff;
+	/* Arbitrarily start from LED on phase */
+	ppd->led_override_phase = 1;
 
 	/*
 	 * If the timer has not already been started, do so. Use a "quick"
-	 * timeout so the function will be called soon, to look at our request.
+	 * timeout so the handler will be called soon to look at our request.
 	 */
-	if (atomic_inc_return(&ppd->led_override_timer_active) == 1) {
-		/* Need to start timer */
+	if (!timer_pending(&ppd->led_override_timer)) {
 		setup_timer(&ppd->led_override_timer, run_led_override,
-				(unsigned long)ppd);
-
+			    (unsigned long)ppd);
 		ppd->led_override_timer.expires = jiffies + 1;
 		add_timer(&ppd->led_override_timer);
-	} else {
-		if (ppd->led_override_vals[0] || ppd->led_override_vals[1])
-			mod_timer(&ppd->led_override_timer, jiffies + 1);
-		atomic_dec(&ppd->led_override_timer_active);
+		atomic_set(&ppd->led_override_timer_active, 1);
+		/* Ensure the atomic_set is visible to all CPUs */
+		smp_wmb();
 	}
 }
 
@@ -1184,8 +1268,8 @@ int hfi1_reset_device(int unit)
 
 	if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) {
 		dd_dev_info(dd,
-			"Invalid unit number %u or not initialized or not present\n",
-			unit);
+			    "Invalid unit number %u or not initialized or not present\n",
+			    unit);
 		ret = -ENXIO;
 		goto bail;
 	}
@@ -1203,14 +1287,8 @@ int hfi1_reset_device(int unit)
 
 	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 		ppd = dd->pport + pidx;
-		if (atomic_read(&ppd->led_override_timer_active)) {
-			/* Need to stop LED timer, _then_ shut off LEDs */
-			del_timer_sync(&ppd->led_override_timer);
-			atomic_set(&ppd->led_override_timer_active, 0);
-		}
 
-		/* Shut off LEDs after we are sure timer is not running */
-		ppd->led_override = LED_OVER_BOTH_OFF;
+		shutdown_led_override(ppd);
 	}
 	if (dd->flags & HFI1_HAS_SEND_DMA)
 		sdma_exit(dd);
@@ -1221,11 +1299,11 @@ int hfi1_reset_device(int unit)
 
 	if (ret)
 		dd_dev_err(dd,
-			"Reinitialize unit %u after reset failed with %d\n",
-			unit, ret);
+			   "Reinitialize unit %u after reset failed with %d\n",
+			   unit, ret);
 	else
 		dd_dev_info(dd, "Reinitialized unit %u after resetting\n",
-			unit);
+			    unit);
 
 bail:
 	return ret;
@@ -1282,7 +1360,7 @@ int process_receive_bypass(struct hfi1_packet *packet)
 		handle_eflags(packet);
 
 	dd_dev_err(packet->rcd->dd,
-	   "Bypass packets are not supported in normal operation. Dropping\n");
+		   "Bypass packets are not supported in normal operation. Dropping\n");
 	return RHF_RCV_CONTINUE;
 }
 
@@ -1320,6 +1398,6 @@ int kdeth_process_eager(struct hfi1_packet *packet)
 int process_receive_invalid(struct hfi1_packet *packet)
 {
 	dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
-		rhf_rcv_type(packet->rhf));
+		   rhf_rcv_type(packet->rhf));
 	return RHF_RCV_CONTINUE;
 }
diff --git a/drivers/staging/rdma/hfi1/efivar.c b/drivers/staging/rdma/hfi1/efivar.c
index 47dfe2584760..106349fc1fb9 100644
--- a/drivers/staging/rdma/hfi1/efivar.c
+++ b/drivers/staging/rdma/hfi1/efivar.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
diff --git a/drivers/staging/rdma/hfi1/efivar.h b/drivers/staging/rdma/hfi1/efivar.h
index 070706225c51..94e9e70de568 100644
--- a/drivers/staging/rdma/hfi1/efivar.h
+++ b/drivers/staging/rdma/hfi1/efivar.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
diff --git a/drivers/staging/rdma/hfi1/eprom.c b/drivers/staging/rdma/hfi1/eprom.c
index fb620c97f592..bd8771570f81 100644
--- a/drivers/staging/rdma/hfi1/eprom.c
+++ b/drivers/staging/rdma/hfi1/eprom.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -99,17 +96,17 @@
 
 /* sleep length while waiting for controller */
 #define WAIT_SLEEP_US 100	/* must be larger than 5 (see usage) */
-#define COUNT_DELAY_SEC(n) ((n) * (1000000/WAIT_SLEEP_US))
+#define COUNT_DELAY_SEC(n) ((n) * (1000000 / WAIT_SLEEP_US))
 
 /* GPIO pins */
-#define EPROM_WP_N (1ull << 14)	/* EPROM write line */
+#define EPROM_WP_N BIT_ULL(14)	/* EPROM write line */
 
 /*
- * Use the EP mutex to guard against other callers from within the driver.
- * Also covers usage of eprom_available.
+ * How long to wait for the EPROM to become available, in ms.
+ * The spec 32 Mb EPROM takes around 40s to erase then write.
+ * Double it for safety.
  */
-static DEFINE_MUTEX(eprom_mutex);
-static int eprom_available;	/* default: not available */
+#define EPROM_TIMEOUT 80000 /* ms */
 
 /*
  * Turn on external enable line that allows writing on the flash.
@@ -117,11 +114,9 @@ static int eprom_available;	/* default: not available */
 static void write_enable(struct hfi1_devdata *dd)
 {
 	/* raise signal */
-	write_csr(dd, ASIC_GPIO_OUT,
-		read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) | EPROM_WP_N);
 	/* raise enable */
-	write_csr(dd, ASIC_GPIO_OE,
-		read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) | EPROM_WP_N);
 }
 
 /*
@@ -130,11 +125,9 @@ static void write_enable(struct hfi1_devdata *dd)
 static void write_disable(struct hfi1_devdata *dd)
 {
 	/* lower signal */
-	write_csr(dd, ASIC_GPIO_OUT,
-		read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OUT, read_csr(dd, ASIC_GPIO_OUT) & ~EPROM_WP_N);
 	/* lower enable */
-	write_csr(dd, ASIC_GPIO_OE,
-		read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
+	write_csr(dd, ASIC_GPIO_OE, read_csr(dd, ASIC_GPIO_OE) & ~EPROM_WP_N);
 }
 
 /*
@@ -212,8 +205,8 @@ static int erase_range(struct hfi1_devdata *dd, u32 start, u32 len)
 	/* check the end points for the minimum erase */
 	if ((start & MASK_4KB) || (end & MASK_4KB)) {
 		dd_dev_err(dd,
-			"%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n",
-			__func__, start, end);
+			   "%s: non-aligned range (0x%x,0x%x) for a 4KB erase\n",
+			   __func__, start, end);
 		return -EINVAL;
 	}
 
@@ -256,7 +249,7 @@ static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
 	int i;
 
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
-	for (i = 0; i < EP_PAGE_SIZE/sizeof(u32); i++)
+	for (i = 0; i < EP_PAGE_SIZE / sizeof(u32); i++)
 		result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
 }
@@ -267,7 +260,7 @@ static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
 static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
 {
 	u32 offset;
-	u32 buffer[EP_PAGE_SIZE/sizeof(u32)];
+	u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
 	int ret = 0;
 
 	/* reject anything not on an EPROM page boundary */
@@ -277,7 +270,7 @@ static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
 	for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
 		read_page(dd, start + offset, buffer);
 		if (copy_to_user((void __user *)(addr + offset),
-						buffer, EP_PAGE_SIZE)) {
+				 buffer, EP_PAGE_SIZE)) {
 			ret = -EFAULT;
 			goto done;
 		}
@@ -298,7 +291,7 @@ static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data)
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_WRITE_ENABLE);
 	write_csr(dd, ASIC_EEP_DATA, data[0]);
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_PAGE_PROGRAM(offset));
-	for (i = 1; i < EP_PAGE_SIZE/sizeof(u32); i++)
+	for (i = 1; i < EP_PAGE_SIZE / sizeof(u32); i++)
 		write_csr(dd, ASIC_EEP_DATA, data[i]);
 	/* will close the open page */
 	return wait_for_not_busy(dd);
@@ -310,7 +303,7 @@ static int write_page(struct hfi1_devdata *dd, u32 offset, u32 *data)
 static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
 {
 	u32 offset;
-	u32 buffer[EP_PAGE_SIZE/sizeof(u32)];
+	u32 buffer[EP_PAGE_SIZE / sizeof(u32)];
 	int ret = 0;
 
 	/* reject anything not on an EPROM page boundary */
@@ -321,7 +314,7 @@ static int write_length(struct hfi1_devdata *dd, u32 start, u32 len, u64 addr)
 
 	for (offset = 0; offset < len; offset += EP_PAGE_SIZE) {
 		if (copy_from_user(buffer, (void __user *)(addr + offset),
-						EP_PAGE_SIZE)) {
+				   EP_PAGE_SIZE)) {
 			ret = -EFAULT;
 			goto done;
 		}
@@ -353,44 +346,42 @@ static inline u32 extract_rstart(u32 composite)
  *
  * Return 0 on success, -ERRNO on error
  */
-int handle_eprom_command(const struct hfi1_cmd *cmd)
+int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd)
 {
 	struct hfi1_devdata *dd;
 	u32 dev_id;
 	u32 rlen;	/* range length */
 	u32 rstart;	/* range start */
+	int i_minor;
 	int ret = 0;
 
 	/*
-	 * The EPROM is per-device, so use unit 0 as that will always
-	 * exist.
+	 * Map the device file to device data using the relative minor.
+	 * The device file minor number is the unit number + 1.  0 is
+	 * the generic device file - reject it.
 	 */
-	dd = hfi1_lookup(0);
+	i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
+	if (i_minor <= 0)
+		return -EINVAL;
+	dd = hfi1_lookup(i_minor - 1);
 	if (!dd) {
-		pr_err("%s: cannot find unit 0!\n", __func__);
+		pr_err("%s: cannot find unit %d!\n", __func__, i_minor);
 		return -EINVAL;
 	}
 
-	/* lock against other callers touching the ASIC block */
-	mutex_lock(&eprom_mutex);
-
-	/* some platforms do not have an EPROM */
-	if (!eprom_available) {
-		ret = -ENOSYS;
-		goto done_asic;
-	}
+	/* some devices do not have an EPROM */
+	if (!dd->eprom_available)
+		return -EOPNOTSUPP;
 
-	/* lock against the other HFI on another OS */
-	ret = acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
 	if (ret) {
-		dd_dev_err(dd,
-			"%s: unable to acquire hw mutex, no EPROM support\n",
-			__func__);
+		dd_dev_err(dd, "%s: unable to acquire EPROM resource\n",
+			   __func__);
 		goto done_asic;
 	}
 
 	dd_dev_info(dd, "%s: cmd: type %d, len 0x%x, addr 0x%016llx\n",
-		__func__, cmd->type, cmd->len, cmd->addr);
+		    __func__, cmd->type, cmd->len, cmd->addr);
 
 	switch (cmd->type) {
 	case HFI1_CMD_EP_INFO:
@@ -401,7 +392,7 @@ int handle_eprom_command(const struct hfi1_cmd *cmd)
 		dev_id = read_device_id(dd);
 		/* addr points to a u32 user buffer */
 		if (copy_to_user((void __user *)cmd->addr, &dev_id,
-								sizeof(u32)))
+				 sizeof(u32)))
 			ret = -EFAULT;
 		break;
 
@@ -429,14 +420,13 @@ int handle_eprom_command(const struct hfi1_cmd *cmd)
 
 	default:
 		dd_dev_err(dd, "%s: unexpected command %d\n",
-			__func__, cmd->type);
+			   __func__, cmd->type);
 		ret = -EINVAL;
 		break;
 	}
 
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_EPROM);
 done_asic:
-	mutex_unlock(&eprom_mutex);
 	return ret;
 }
 
@@ -447,44 +437,35 @@ int eprom_init(struct hfi1_devdata *dd)
 {
 	int ret = 0;
 
-	/* only the discrete chip has an EPROM, nothing to do */
+	/* only the discrete chip has an EPROM */
 	if (dd->pcidev->device != PCI_DEVICE_ID_INTEL0)
 		return 0;
 
-	/* lock against other callers */
-	mutex_lock(&eprom_mutex);
-	if (eprom_available)	/* already initialized */
-		goto done_asic;
-
 	/*
-	 * Lock against the other HFI on another OS - the mutex above
-	 * would have caught anything in this driver.  It is OK if
-	 * both OSes reset the EPROM - as long as they don't do it at
-	 * the same time.
+	 * It is OK if both HFIs reset the EPROM as long as they don't
+	 * do it at the same time.
 	 */
-	ret = acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
 	if (ret) {
 		dd_dev_err(dd,
-			"%s: unable to acquire hw mutex, no EPROM support\n",
-			__func__);
+			   "%s: unable to acquire EPROM resource, no EPROM support\n",
+			   __func__);
 		goto done_asic;
 	}
 
 	/* reset EPROM to be sure it is in a good state */
 
 	/* set reset */
-	write_csr(dd, ASIC_EEP_CTL_STAT,
-					ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
+	write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_EP_RESET_SMASK);
 	/* clear reset, set speed */
 	write_csr(dd, ASIC_EEP_CTL_STAT,
-			EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
+		  EP_SPEED_FULL << ASIC_EEP_CTL_STAT_RATE_SPI_SHIFT);
 
 	/* wake the device with command "release powerdown NoID" */
 	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_RELEASE_POWERDOWN_NOID);
 
-	eprom_available = 1;
-	release_hw_mutex(dd);
+	dd->eprom_available = true;
+	release_chip_resource(dd, CR_EPROM);
 done_asic:
-	mutex_unlock(&eprom_mutex);
 	return ret;
 }
diff --git a/drivers/staging/rdma/hfi1/eprom.h b/drivers/staging/rdma/hfi1/eprom.h
index 64a64276be81..d41f0b1afb15 100644
--- a/drivers/staging/rdma/hfi1/eprom.h
+++ b/drivers/staging/rdma/hfi1/eprom.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,4 +49,4 @@ struct hfi1_cmd;
 struct hfi1_devdata;
 
 int eprom_init(struct hfi1_devdata *dd);
-int handle_eprom_command(const struct hfi1_cmd *cmd);
+int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd);
diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c
index 8b911e8bf0df..8396dc5fb6c1 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/staging/rdma/hfi1/file_ops.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -60,6 +57,8 @@
 #include "user_sdma.h"
 #include "user_exp_rcv.h"
 #include "eprom.h"
+#include "aspm.h"
+#include "mmu_rb.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -96,9 +95,6 @@ static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
 static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
 static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
 static int vma_fault(struct vm_area_struct *, struct vm_fault *);
-static int exp_tid_setup(struct file *, struct hfi1_tid_info *);
-static int exp_tid_free(struct file *, struct hfi1_tid_info *);
-static void unlock_exp_tids(struct hfi1_ctxtdata *);
 
 static const struct file_operations hfi1_file_ops = {
 	.owner = THIS_MODULE,
@@ -164,7 +160,6 @@ enum mmap_types {
 #define dbg(fmt, ...)				\
 	pr_info(fmt, ##__VA_ARGS__)
 
-
 static inline int is_valid_mmap(u64 token)
 {
 	return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC);
@@ -188,6 +183,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 	struct hfi1_cmd cmd;
 	struct hfi1_user_info uinfo;
 	struct hfi1_tid_info tinfo;
+	unsigned long addr;
 	ssize_t consumed = 0, copy = 0, ret = 0;
 	void *dest = NULL;
 	__u64 user_val = 0;
@@ -219,6 +215,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 		break;
 	case HFI1_CMD_TID_UPDATE:
 	case HFI1_CMD_TID_FREE:
+	case HFI1_CMD_TID_INVAL_READ:
 		copy = sizeof(tinfo);
 		dest = &tinfo;
 		break;
@@ -294,9 +291,8 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 			sc_return_credits(uctxt->sc);
 		break;
 	case HFI1_CMD_TID_UPDATE:
-		ret = exp_tid_setup(fp, &tinfo);
+		ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
 		if (!ret) {
-			unsigned long addr;
 			/*
 			 * Copy the number of tidlist entries we used
 			 * and the length of the buffer we registered.
@@ -311,8 +307,25 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 				ret = -EFAULT;
 		}
 		break;
+	case HFI1_CMD_TID_INVAL_READ:
+		ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
+		if (ret)
+			break;
+		addr = (unsigned long)cmd.addr +
+			offsetof(struct hfi1_tid_info, tidcnt);
+		if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+				 sizeof(tinfo.tidcnt)))
+			ret = -EFAULT;
+		break;
 	case HFI1_CMD_TID_FREE:
-		ret = exp_tid_free(fp, &tinfo);
+		ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
+		if (ret)
+			break;
+		addr = (unsigned long)cmd.addr +
+			offsetof(struct hfi1_tid_info, tidcnt);
+		if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+				 sizeof(tinfo.tidcnt)))
+			ret = -EFAULT;
 		break;
 	case HFI1_CMD_RECV_CTRL:
 		ret = manage_rcvq(uctxt, fd->subctxt, (int)user_val);
@@ -373,8 +386,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 				break;
 			}
 			if (dd->flags & HFI1_FORCED_FREEZE) {
-				/* Don't allow context reset if we are into
-				 * forced freeze */
+				/*
+				 * Don't allow context reset if we are into
+				 * forced freeze
+				 */
 				ret = -ENODEV;
 				break;
 			}
@@ -382,8 +397,9 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 			ret = sc_enable(sc);
 			hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
 				     uctxt->ctxt);
-		} else
+		} else {
 			ret = sc_restart(sc);
+		}
 		if (!ret)
 			sc_return_credits(sc);
 		break;
@@ -393,7 +409,7 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 	case HFI1_CMD_EP_ERASE_RANGE:
 	case HFI1_CMD_EP_READ_RANGE:
 	case HFI1_CMD_EP_WRITE_RANGE:
-		ret = handle_eprom_command(&cmd);
+		ret = handle_eprom_command(fp, &cmd);
 		break;
 	}
 
@@ -732,6 +748,9 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 	/* drain user sdma queue */
 	hfi1_user_sdma_free_queues(fdata);
 
+	/* release the cpu */
+	hfi1_put_proc_affinity(dd, fdata->rec_cpu_num);
+
 	/*
 	 * Clear any left over, unhandled events so the next process that
 	 * gets this context doesn't get confused.
@@ -755,6 +774,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 	hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
 		     HFI1_RCVCTRL_TIDFLOW_DIS |
 		     HFI1_RCVCTRL_INTRAVAIL_DIS |
+		     HFI1_RCVCTRL_TAILUPD_DIS |
 		     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
 		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
 		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
@@ -777,14 +797,12 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 	uctxt->pionowait = 0;
 	uctxt->event_flags = 0;
 
-	hfi1_clear_tids(uctxt);
+	hfi1_user_exp_rcv_free(fdata);
 	hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
 
-	if (uctxt->tid_pg_list)
-		unlock_exp_tids(uctxt);
-
 	hfi1_stats.sps_ctxts--;
-	dd->freectxts++;
+	if (++dd->freectxts == dd->num_user_contexts)
+		aspm_enable_all(dd);
 	mutex_unlock(&hfi1_mutex);
 	hfi1_free_ctxtdata(dd, uctxt);
 done:
@@ -826,8 +844,16 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
 
 	mutex_lock(&hfi1_mutex);
 	/* First, lets check if we need to setup a shared context? */
-	if (uinfo->subctxt_cnt)
+	if (uinfo->subctxt_cnt) {
+		struct hfi1_filedata *fd = fp->private_data;
+
 		ret = find_shared_ctxt(fp, uinfo);
+		if (ret < 0)
+			goto done_unlock;
+		if (ret)
+			fd->rec_cpu_num = hfi1_get_proc_affinity(
+				fd->uctxt->dd, fd->uctxt->numa_id);
+	}
 
 	/*
 	 * We execute the following block if we couldn't find a
@@ -837,6 +863,7 @@ static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
 		i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
 		ret = get_user_context(fp, uinfo, i_minor - 1, alg);
 	}
+done_unlock:
 	mutex_unlock(&hfi1_mutex);
 done:
 	return ret;
@@ -962,7 +989,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
 	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt;
 	unsigned ctxt;
-	int ret;
+	int ret, numa;
 
 	if (dd->flags & HFI1_FROZEN) {
 		/*
@@ -982,17 +1009,26 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
 	if (ctxt == dd->num_rcv_contexts)
 		return -EBUSY;
 
-	uctxt = hfi1_create_ctxtdata(dd->pport, ctxt);
+	fd->rec_cpu_num = hfi1_get_proc_affinity(dd, -1);
+	if (fd->rec_cpu_num != -1)
+		numa = cpu_to_node(fd->rec_cpu_num);
+	else
+		numa = numa_node_id();
+	uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa);
 	if (!uctxt) {
 		dd_dev_err(dd,
 			   "Unable to allocate ctxtdata memory, failing open\n");
 		return -ENOMEM;
 	}
+	hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)",
+		  uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num,
+		  uctxt->numa_id);
+
 	/*
 	 * Allocate and enable a PIO send context.
 	 */
 	uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
-			     uctxt->numa_id);
+			     uctxt->dd->node);
 	if (!uctxt->sc)
 		return -ENOMEM;
 
@@ -1026,7 +1062,12 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
 	INIT_LIST_HEAD(&uctxt->sdma_queues);
 	spin_lock_init(&uctxt->sdma_qlock);
 	hfi1_stats.sps_ctxts++;
-	dd->freectxts--;
+	/*
+	 * Disable ASPM when there are open user/PSM contexts to avoid
+	 * issues with ASPM L1 exit latency
+	 */
+	if (dd->freectxts-- == dd->num_user_contexts)
+		aspm_disable_all(dd);
 	fd->uctxt = uctxt;
 
 	return 0;
@@ -1035,22 +1076,19 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
 static int init_subctxts(struct hfi1_ctxtdata *uctxt,
 			 const struct hfi1_user_info *uinfo)
 {
-	int ret = 0;
 	unsigned num_subctxts;
 
 	num_subctxts = uinfo->subctxt_cnt;
-	if (num_subctxts > HFI1_MAX_SHARED_CTXTS) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
+		return -EINVAL;
 
 	uctxt->subctxt_cnt = uinfo->subctxt_cnt;
 	uctxt->subctxt_id = uinfo->subctxt_id;
 	uctxt->active_slaves = 1;
 	uctxt->redirect_seq_cnt = 1;
 	set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
-bail:
-	return ret;
+
+	return 0;
 }
 
 static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
@@ -1105,10 +1143,10 @@ static int user_init(struct file *fp)
 	 * has done it.
 	 */
 	if (fd->subctxt) {
-		ret = wait_event_interruptible(uctxt->wait,
-			!test_bit(HFI1_CTXT_MASTER_UNINIT,
-			&uctxt->event_flags));
-		goto done;
+		ret = wait_event_interruptible(uctxt->wait, !test_bit(
+					       HFI1_CTXT_MASTER_UNINIT,
+					       &uctxt->event_flags));
+		goto expected;
 	}
 
 	/* initialize poll variables... */
@@ -1146,8 +1184,16 @@ static int user_init(struct file *fp)
 		rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
 	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
 		rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+	/*
+	 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written.
+	 * We can't rely on the correct value to be set from prior
+	 * uses of the chip or ctxt. Therefore, add the rcvctrl op
+	 * for both cases.
+	 */
 	if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
 		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+	else
+		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
 	hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
 
 	/* Notify any waiting slaves */
@@ -1155,8 +1201,18 @@ static int user_init(struct file *fp)
 		clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
 		wake_up(&uctxt->wait);
 	}
-	ret = 0;
 
+expected:
+	/*
+	 * Expected receive has to be setup for all processes (including
+	 * shared contexts). However, it has to be done after the master
+	 * context has been fully configured as it depends on the
+	 * eager/expected split of the RcvArray entries.
+	 * Setting it up here ensures that the subcontexts will be waiting
+	 * (due to the above wait_event_interruptible() until the master
+	 * is setup.
+	 */
+	ret = hfi1_user_exp_rcv_init(fp);
 done:
 	return ret;
 }
@@ -1226,46 +1282,6 @@ static int setup_ctxt(struct file *fp)
 			if (ret)
 				goto done;
 		}
-		/* Setup Expected Rcv memories */
-		uctxt->tid_pg_list = vzalloc(uctxt->expected_count *
-					     sizeof(struct page **));
-		if (!uctxt->tid_pg_list) {
-			ret = -ENOMEM;
-			goto done;
-		}
-		uctxt->physshadow = vzalloc(uctxt->expected_count *
-					    sizeof(*uctxt->physshadow));
-		if (!uctxt->physshadow) {
-			ret = -ENOMEM;
-			goto done;
-		}
-		/* allocate expected TID map and initialize the cursor */
-		atomic_set(&uctxt->tidcursor, 0);
-		uctxt->numtidgroups = uctxt->expected_count /
-			dd->rcv_entries.group_size;
-		uctxt->tidmapcnt = uctxt->numtidgroups / BITS_PER_LONG +
-			!!(uctxt->numtidgroups % BITS_PER_LONG);
-		uctxt->tidusemap = kzalloc_node(uctxt->tidmapcnt *
-						sizeof(*uctxt->tidusemap),
-						GFP_KERNEL, uctxt->numa_id);
-		if (!uctxt->tidusemap) {
-			ret = -ENOMEM;
-			goto done;
-		}
-		/*
-		 * In case that the number of groups is not a multiple of
-		 * 64 (the number of groups in a tidusemap element), mark
-		 * the extra ones as used. This will effectively make them
-		 * permanently used and should never be assigned. Otherwise,
-		 * the code which checks how many free groups we have will
-		 * get completely confused about the state of the bits.
-		 */
-		if (uctxt->numtidgroups % BITS_PER_LONG)
-			uctxt->tidusemap[uctxt->tidmapcnt - 1] =
-				~((1ULL << (uctxt->numtidgroups %
-					    BITS_PER_LONG)) - 1);
-		trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0,
-				       uctxt->tidusemap, uctxt->tidmapcnt);
 	}
 	ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
 	if (ret)
@@ -1391,8 +1407,9 @@ static unsigned int poll_next(struct file *fp,
 		set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
 		hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt);
 		pollflag = 0;
-	} else
+	} else {
 		pollflag = POLLIN | POLLRDNORM;
+	}
 	spin_unlock_irq(&dd->uctxt_lock);
 
 	return pollflag;
@@ -1470,8 +1487,9 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
 		if (uctxt->rcvhdrtail_kvaddr)
 			clear_rcvhdrtail(uctxt);
 		rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
-	} else
+	} else {
 		rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
+	}
 	hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt);
 	/* always; new head should be equal to new tail; see above */
 bail:
@@ -1504,367 +1522,6 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
 	return 0;
 }
 
-#define num_user_pages(vaddr, len)					\
-	(1 + (((((unsigned long)(vaddr) +				\
-		 (unsigned long)(len) - 1) & PAGE_MASK) -		\
-	       ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
-
-/**
- * tzcnt - count the number of trailing zeros in a 64bit value
- * @value: the value to be examined
- *
- * Returns the number of trailing least significant zeros in the
- * the input value. If the value is zero, return the number of
- * bits of the value.
- */
-static inline u8 tzcnt(u64 value)
-{
-	return value ? __builtin_ctzl(value) : sizeof(value) * 8;
-}
-
-static inline unsigned num_free_groups(unsigned long map, u16 *start)
-{
-	unsigned free;
-	u16 bitidx = *start;
-
-	if (bitidx >= BITS_PER_LONG)
-		return 0;
-	/* "Turn off" any bits set before our bit index */
-	map &= ~((1ULL << bitidx) - 1);
-	free = tzcnt(map) - bitidx;
-	while (!free && bitidx < BITS_PER_LONG) {
-		/* Zero out the last set bit so we look at the rest */
-		map &= ~(1ULL << bitidx);
-		/*
-		 * Account for the previously checked bits and advance
-		 * the bit index. We don't have to check for bitidx
-		 * getting bigger than BITS_PER_LONG here as it would
-		 * mean extra instructions that we don't need. If it
-		 * did happen, it would push free to a negative value
-		 * which will break the loop.
-		 */
-		free = tzcnt(map) - ++bitidx;
-	}
-	*start = bitidx;
-	return free;
-}
-
-static int exp_tid_setup(struct file *fp, struct hfi1_tid_info *tinfo)
-{
-	int ret = 0;
-	struct hfi1_filedata *fd = fp->private_data;
-	struct hfi1_ctxtdata *uctxt = fd->uctxt;
-	struct hfi1_devdata *dd = uctxt->dd;
-	unsigned tid, mapped = 0, npages, ngroups, exp_groups,
-		tidpairs = uctxt->expected_count / 2;
-	struct page **pages;
-	unsigned long vaddr, tidmap[uctxt->tidmapcnt];
-	dma_addr_t *phys;
-	u32 tidlist[tidpairs], pairidx = 0, tidcursor;
-	u16 useidx, idx, bitidx, tidcnt = 0;
-
-	vaddr = tinfo->vaddr;
-
-	if (offset_in_page(vaddr)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	npages = num_user_pages(vaddr, tinfo->length);
-	if (!npages) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
-		       npages * PAGE_SIZE)) {
-		dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
-			   (void *)vaddr, npages);
-		ret = -EFAULT;
-		goto bail;
-	}
-
-	memset(tidmap, 0, sizeof(tidmap[0]) * uctxt->tidmapcnt);
-	memset(tidlist, 0, sizeof(tidlist[0]) * tidpairs);
-
-	exp_groups = uctxt->expected_count / dd->rcv_entries.group_size;
-	/* which group set do we look at first? */
-	tidcursor = atomic_read(&uctxt->tidcursor);
-	useidx = (tidcursor >> 16) & 0xffff;
-	bitidx = tidcursor & 0xffff;
-
-	/*
-	 * Keep going until we've mapped all pages or we've exhausted all
-	 * RcvArray entries.
-	 * This iterates over the number of tidmaps + 1
-	 * (idx <= uctxt->tidmapcnt) so we check the bitmap which we
-	 * started from one more time for any free bits before the
-	 * starting point bit.
-	 */
-	for (mapped = 0, idx = 0;
-	     mapped < npages && idx <= uctxt->tidmapcnt;) {
-		u64 i, offset = 0;
-		unsigned free, pinned, pmapped = 0, bits_used;
-		u16 grp;
-
-		/*
-		 * "Reserve" the needed group bits under lock so other
-		 * processes can't step in the middle of it. Once
-		 * reserved, we don't need the lock anymore since we
-		 * are guaranteed the groups.
-		 */
-		spin_lock(&uctxt->exp_lock);
-		if (uctxt->tidusemap[useidx] == -1ULL ||
-		    bitidx >= BITS_PER_LONG) {
-			/* no free groups in the set, use the next */
-			useidx = (useidx + 1) % uctxt->tidmapcnt;
-			idx++;
-			bitidx = 0;
-			spin_unlock(&uctxt->exp_lock);
-			continue;
-		}
-		ngroups = ((npages - mapped) / dd->rcv_entries.group_size) +
-			!!((npages - mapped) % dd->rcv_entries.group_size);
-
-		/*
-		 * If we've gotten here, the current set of groups does have
-		 * one or more free groups.
-		 */
-		free = num_free_groups(uctxt->tidusemap[useidx], &bitidx);
-		if (!free) {
-			/*
-			 * Despite the check above, free could still come back
-			 * as 0 because we don't check the entire bitmap but
-			 * we start from bitidx.
-			 */
-			spin_unlock(&uctxt->exp_lock);
-			continue;
-		}
-		bits_used = min(free, ngroups);
-		tidmap[useidx] |= ((1ULL << bits_used) - 1) << bitidx;
-		uctxt->tidusemap[useidx] |= tidmap[useidx];
-		spin_unlock(&uctxt->exp_lock);
-
-		/*
-		 * At this point, we know where in the map we have free bits.
-		 * properly offset into the various "shadow" arrays and compute
-		 * the RcvArray entry index.
-		 */
-		offset = ((useidx * BITS_PER_LONG) + bitidx) *
-			dd->rcv_entries.group_size;
-		pages = uctxt->tid_pg_list + offset;
-		phys = uctxt->physshadow + offset;
-		tid = uctxt->expected_base + offset;
-
-		/* Calculate how many pages we can pin based on free bits */
-		pinned = min((bits_used * dd->rcv_entries.group_size),
-			     (npages - mapped));
-		/*
-		 * Now that we know how many free RcvArray entries we have,
-		 * we can pin that many user pages.
-		 */
-		ret = hfi1_acquire_user_pages(vaddr + (mapped * PAGE_SIZE),
-					      pinned, true, pages);
-		if (ret) {
-			/*
-			 * We can't continue because the pages array won't be
-			 * initialized. This should never happen,
-			 * unless perhaps the user has mpin'ed the pages
-			 * themselves.
-			 */
-			dd_dev_info(dd,
-				    "Failed to lock addr %p, %u pages: errno %d\n",
-				    (void *) vaddr, pinned, -ret);
-			/*
-			 * Let go of the bits that we reserved since we are not
-			 * going to use them.
-			 */
-			spin_lock(&uctxt->exp_lock);
-			uctxt->tidusemap[useidx] &=
-				~(((1ULL << bits_used) - 1) << bitidx);
-			spin_unlock(&uctxt->exp_lock);
-			goto done;
-		}
-		/*
-		 * How many groups do we need based on how many pages we have
-		 * pinned?
-		 */
-		ngroups = (pinned / dd->rcv_entries.group_size) +
-			!!(pinned % dd->rcv_entries.group_size);
-		/*
-		 * Keep programming RcvArray entries for all the <ngroups> free
-		 * groups.
-		 */
-		for (i = 0, grp = 0; grp < ngroups; i++, grp++) {
-			unsigned j;
-			u32 pair_size = 0, tidsize;
-			/*
-			 * This inner loop will program an entire group or the
-			 * array of pinned pages (which ever limit is hit
-			 * first).
-			 */
-			for (j = 0; j < dd->rcv_entries.group_size &&
-				     pmapped < pinned; j++, pmapped++, tid++) {
-				tidsize = PAGE_SIZE;
-				phys[pmapped] = hfi1_map_page(dd->pcidev,
-						   pages[pmapped], 0,
-						   tidsize, PCI_DMA_FROMDEVICE);
-				trace_hfi1_exp_rcv_set(uctxt->ctxt,
-						       fd->subctxt,
-						       tid, vaddr,
-						       phys[pmapped],
-						       pages[pmapped]);
-				/*
-				 * Each RcvArray entry is programmed with one
-				 * page * worth of memory. This will handle
-				 * the 8K MTU as well as anything smaller
-				 * due to the fact that both entries in the
-				 * RcvTidPair are programmed with a page.
-				 * PSM currently does not handle anything
-				 * bigger than 8K MTU, so should we even worry
-				 * about 10K here?
-				 */
-				hfi1_put_tid(dd, tid, PT_EXPECTED,
-					     phys[pmapped],
-					     ilog2(tidsize >> PAGE_SHIFT) + 1);
-				pair_size += tidsize >> PAGE_SHIFT;
-				EXP_TID_RESET(tidlist[pairidx], LEN, pair_size);
-				if (!(tid % 2)) {
-					tidlist[pairidx] |=
-					   EXP_TID_SET(IDX,
-						(tid - uctxt->expected_base)
-						       / 2);
-					tidlist[pairidx] |=
-						EXP_TID_SET(CTRL, 1);
-					tidcnt++;
-				} else {
-					tidlist[pairidx] |=
-						EXP_TID_SET(CTRL, 2);
-					pair_size = 0;
-					pairidx++;
-				}
-			}
-			/*
-			 * We've programmed the entire group (or as much of the
-			 * group as we'll use. Now, it's time to push it out...
-			 */
-			flush_wc();
-		}
-		mapped += pinned;
-		atomic_set(&uctxt->tidcursor,
-			   (((useidx & 0xffffff) << 16) |
-			    ((bitidx + bits_used) & 0xffffff)));
-	}
-	trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 0, uctxt->tidusemap,
-			       uctxt->tidmapcnt);
-
-done:
-	/* If we've mapped anything, copy relevant info to user */
-	if (mapped) {
-		if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
-				 tidlist, sizeof(tidlist[0]) * tidcnt)) {
-			ret = -EFAULT;
-			goto done;
-		}
-		/* copy TID info to user */
-		if (copy_to_user((void __user *)(unsigned long)tinfo->tidmap,
-				 tidmap, sizeof(tidmap[0]) * uctxt->tidmapcnt))
-			ret = -EFAULT;
-	}
-bail:
-	/*
-	 * Calculate mapped length. New Exp TID protocol does not "unwind" and
-	 * report an error if it can't map the entire buffer. It just reports
-	 * the length that was mapped.
-	 */
-	tinfo->length = mapped * PAGE_SIZE;
-	tinfo->tidcnt = tidcnt;
-	return ret;
-}
-
-static int exp_tid_free(struct file *fp, struct hfi1_tid_info *tinfo)
-{
-	struct hfi1_filedata *fd = fp->private_data;
-	struct hfi1_ctxtdata *uctxt = fd->uctxt;
-	struct hfi1_devdata *dd = uctxt->dd;
-	unsigned long tidmap[uctxt->tidmapcnt];
-	struct page **pages;
-	dma_addr_t *phys;
-	u16 idx, bitidx, tid;
-	int ret = 0;
-
-	if (copy_from_user(&tidmap, (void __user *)(unsigned long)
-			   tinfo->tidmap,
-			   sizeof(tidmap[0]) * uctxt->tidmapcnt)) {
-		ret = -EFAULT;
-		goto done;
-	}
-	for (idx = 0; idx < uctxt->tidmapcnt; idx++) {
-		unsigned long map;
-
-		bitidx = 0;
-		if (!tidmap[idx])
-			continue;
-		map = tidmap[idx];
-		while ((bitidx = tzcnt(map)) < BITS_PER_LONG) {
-			int i, pcount = 0;
-			struct page *pshadow[dd->rcv_entries.group_size];
-			unsigned offset = ((idx * BITS_PER_LONG) + bitidx) *
-				dd->rcv_entries.group_size;
-
-			pages = uctxt->tid_pg_list + offset;
-			phys = uctxt->physshadow + offset;
-			tid = uctxt->expected_base + offset;
-			for (i = 0; i < dd->rcv_entries.group_size;
-			     i++, tid++) {
-				if (pages[i]) {
-					hfi1_put_tid(dd, tid, PT_INVALID,
-						      0, 0);
-					trace_hfi1_exp_rcv_free(uctxt->ctxt,
-								fd->subctxt,
-								tid, phys[i],
-								pages[i]);
-					pci_unmap_page(dd->pcidev, phys[i],
-					      PAGE_SIZE, PCI_DMA_FROMDEVICE);
-					pshadow[pcount] = pages[i];
-					pages[i] = NULL;
-					pcount++;
-					phys[i] = 0;
-				}
-			}
-			flush_wc();
-			hfi1_release_user_pages(pshadow, pcount, true);
-			clear_bit(bitidx, &uctxt->tidusemap[idx]);
-			map &= ~(1ULL<<bitidx);
-		}
-	}
-	trace_hfi1_exp_tid_map(uctxt->ctxt, fd->subctxt, 1, uctxt->tidusemap,
-			       uctxt->tidmapcnt);
-done:
-	return ret;
-}
-
-static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt)
-{
-	struct hfi1_devdata *dd = uctxt->dd;
-	unsigned tid;
-
-	dd_dev_info(dd, "ctxt %u unlocking any locked expTID pages\n",
-		    uctxt->ctxt);
-	for (tid = 0; tid < uctxt->expected_count; tid++) {
-		struct page *p = uctxt->tid_pg_list[tid];
-		dma_addr_t phys;
-
-		if (!p)
-			continue;
-
-		phys = uctxt->physshadow[tid];
-		uctxt->physshadow[tid] = 0;
-		uctxt->tid_pg_list[tid] = NULL;
-		pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE);
-		hfi1_release_user_pages(&p, 1, true);
-	}
-}
-
 static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
 			 u16 pkey)
 {
@@ -1933,10 +1590,9 @@ static loff_t ui_lseek(struct file *filp, loff_t offset, int whence)
 	return filp->f_pos;
 }
 
-
 /* NOTE: assumes unsigned long is 8 bytes */
 static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
-			loff_t *f_pos)
+		       loff_t *f_pos)
 {
 	struct hfi1_devdata *dd = filp->private_data;
 	void __iomem *base = dd->kregbase;
@@ -1972,12 +1628,12 @@ static ssize_t ui_read(struct file *filp, char __user *buf, size_t count,
 		 * them.  These registers are defined as having a read value
 		 * of 0.
 		 */
-		else if (csr_off == ASIC_GPIO_CLEAR
-				|| csr_off == ASIC_GPIO_FORCE
-				|| csr_off == ASIC_QSFP1_CLEAR
-				|| csr_off == ASIC_QSFP1_FORCE
-				|| csr_off == ASIC_QSFP2_CLEAR
-				|| csr_off == ASIC_QSFP2_FORCE)
+		else if (csr_off == ASIC_GPIO_CLEAR ||
+			 csr_off == ASIC_GPIO_FORCE ||
+			 csr_off == ASIC_QSFP1_CLEAR ||
+			 csr_off == ASIC_QSFP1_FORCE ||
+			 csr_off == ASIC_QSFP2_CLEAR ||
+			 csr_off == ASIC_QSFP2_FORCE)
 			data = 0;
 		else if (csr_off >= barlen) {
 			/*
diff --git a/drivers/staging/rdma/hfi1/firmware.c b/drivers/staging/rdma/hfi1/firmware.c
index 28ae42faa018..3040162cb326 100644
--- a/drivers/staging/rdma/hfi1/firmware.c
+++ b/drivers/staging/rdma/hfi1/firmware.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -77,7 +74,13 @@ static uint fw_8051_load = 1;
 static uint fw_fabric_serdes_load = 1;
 static uint fw_pcie_serdes_load = 1;
 static uint fw_sbus_load = 1;
-static uint platform_config_load = 1;
+
+/*
+ * Access required in platform.c
+ * Maintains state of whether the platform config was fetched via the
+ * fallback option
+ */
+uint platform_config_load;
 
 /* Firmware file names get set in hfi1_firmware_init() based on the above */
 static char *fw_8051_name;
@@ -107,6 +110,7 @@ struct css_header {
 	u32 exponent_size;	/* in DWORDs */
 	u32 reserved[22];
 };
+
 /* expected field values */
 #define CSS_MODULE_TYPE	   0x00000006
 #define CSS_HEADER_LEN	   0x000000a1
@@ -166,6 +170,7 @@ enum fw_state {
 	FW_FINAL,
 	FW_ERR
 };
+
 static enum fw_state fw_state = FW_EMPTY;
 static int fw_err;
 static struct firmware_details fw_8051;
@@ -193,7 +198,7 @@ static const struct firmware *platform_config;
 #define RSA_ENGINE_TIMEOUT 100 /* ms */
 
 /* hardware mutex timeout, in ms */
-#define HM_TIMEOUT 4000 /* 4 s */
+#define HM_TIMEOUT 10 /* ms */
 
 /* 8051 memory access timeout, in us */
 #define DC8051_ACCESS_TIMEOUT 100 /* us */
@@ -233,6 +238,8 @@ static const u8 all_pcie_serdes_broadcast = 0xe0;
 
 /* forwards */
 static void dispose_one_firmware(struct firmware_details *fdet);
+static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
+				       struct firmware_details *fdet);
 
 /*
  * Read a single 64-bit value from 8051 data memory.
@@ -372,8 +379,8 @@ static int invalid_header(struct hfi1_devdata *dd, const char *what,
 		return 0;
 
 	dd_dev_err(dd,
-		"invalid firmware header field %s: expected 0x%x, actual 0x%x\n",
-		what, expected, actual);
+		   "invalid firmware header field %s: expected 0x%x, actual 0x%x\n",
+		   what, expected, actual);
 	return 1;
 }
 
@@ -383,19 +390,19 @@ static int invalid_header(struct hfi1_devdata *dd, const char *what,
 static int verify_css_header(struct hfi1_devdata *dd, struct css_header *css)
 {
 	/* verify CSS header fields (most sizes are in DW, so add /4) */
-	if (invalid_header(dd, "module_type", css->module_type, CSS_MODULE_TYPE)
-			|| invalid_header(dd, "header_len", css->header_len,
-					(sizeof(struct firmware_file)/4))
-			|| invalid_header(dd, "header_version",
-					css->header_version, CSS_HEADER_VERSION)
-			|| invalid_header(dd, "module_vendor",
-					css->module_vendor, CSS_MODULE_VENDOR)
-			|| invalid_header(dd, "key_size",
-					css->key_size, KEY_SIZE/4)
-			|| invalid_header(dd, "modulus_size",
-					css->modulus_size, KEY_SIZE/4)
-			|| invalid_header(dd, "exponent_size",
-					css->exponent_size, EXPONENT_SIZE/4)) {
+	if (invalid_header(dd, "module_type", css->module_type,
+			   CSS_MODULE_TYPE) ||
+	    invalid_header(dd, "header_len", css->header_len,
+			   (sizeof(struct firmware_file) / 4)) ||
+	    invalid_header(dd, "header_version", css->header_version,
+			   CSS_HEADER_VERSION) ||
+	    invalid_header(dd, "module_vendor", css->module_vendor,
+			   CSS_MODULE_VENDOR) ||
+	    invalid_header(dd, "key_size", css->key_size, KEY_SIZE / 4) ||
+	    invalid_header(dd, "modulus_size", css->modulus_size,
+			   KEY_SIZE / 4) ||
+	    invalid_header(dd, "exponent_size", css->exponent_size,
+			   EXPONENT_SIZE / 4)) {
 		return -EINVAL;
 	}
 	return 0;
@@ -410,8 +417,8 @@ static int payload_check(struct hfi1_devdata *dd, const char *name,
 	/* make sure we have some payload */
 	if (prefix_size >= file_size) {
 		dd_dev_err(dd,
-			"firmware \"%s\", size %ld, must be larger than %ld bytes\n",
-			name, file_size, prefix_size);
+			   "firmware \"%s\", size %ld, must be larger than %ld bytes\n",
+			   name, file_size, prefix_size);
 		return -EINVAL;
 	}
 
@@ -433,8 +440,8 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
 
 	ret = request_firmware(&fdet->fw, name, &dd->pcidev->dev);
 	if (ret) {
-		dd_dev_err(dd, "cannot find firmware \"%s\", err %d\n",
-			   name, ret);
+		dd_dev_warn(dd, "cannot find firmware \"%s\", err %d\n",
+			    name, ret);
 		return ret;
 	}
 
@@ -480,14 +487,14 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
 	ret = verify_css_header(dd, css);
 	if (ret) {
 		dd_dev_info(dd, "Invalid CSS header for \"%s\"\n", name);
-	} else if ((css->size*4) == fdet->fw->size) {
+	} else if ((css->size * 4) == fdet->fw->size) {
 		/* non-augmented firmware file */
 		struct firmware_file *ff = (struct firmware_file *)
 							fdet->fw->data;
 
 		/* make sure there are bytes in the payload */
 		ret = payload_check(dd, name, fdet->fw->size,
-						sizeof(struct firmware_file));
+				    sizeof(struct firmware_file));
 		if (ret == 0) {
 			fdet->css_header = css;
 			fdet->modulus = ff->modulus;
@@ -505,14 +512,14 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
 			dd_dev_err(dd, "driver is unable to validate firmware without r2 and mu (not in firmware file)\n");
 			ret = -EINVAL;
 		}
-	} else if ((css->size*4) + AUGMENT_SIZE == fdet->fw->size) {
+	} else if ((css->size * 4) + AUGMENT_SIZE == fdet->fw->size) {
 		/* augmented firmware file */
 		struct augmented_firmware_file *aff =
 			(struct augmented_firmware_file *)fdet->fw->data;
 
 		/* make sure there are bytes in the payload */
 		ret = payload_check(dd, name, fdet->fw->size,
-					sizeof(struct augmented_firmware_file));
+				    sizeof(struct augmented_firmware_file));
 		if (ret == 0) {
 			fdet->css_header = css;
 			fdet->modulus = aff->modulus;
@@ -527,9 +534,10 @@ static int obtain_one_firmware(struct hfi1_devdata *dd, const char *name,
 	} else {
 		/* css->size check failed */
 		dd_dev_err(dd,
-			"invalid firmware header field size: expected 0x%lx or 0x%lx, actual 0x%x\n",
-			fdet->fw->size/4, (fdet->fw->size - AUGMENT_SIZE)/4,
-			css->size);
+			   "invalid firmware header field size: expected 0x%lx or 0x%lx, actual 0x%x\n",
+			   fdet->fw->size / 4,
+			   (fdet->fw->size - AUGMENT_SIZE) / 4,
+			   css->size);
 
 		ret = -EINVAL;
 	}
@@ -572,7 +580,7 @@ retry:
 		 * We tried the original and it failed.  Move to the
 		 * alternate.
 		 */
-		dd_dev_info(dd, "using alternate firmware names\n");
+		dd_dev_warn(dd, "using alternate firmware names\n");
 		/*
 		 * Let others run.  Some systems, when missing firmware, does
 		 * something that holds for 30 seconds.  If we do that twice
@@ -593,27 +601,27 @@ retry:
 		fw_pcie_serdes_name = ALT_FW_PCIE_NAME;
 	}
 
-	if (fw_8051_load) {
-		err = obtain_one_firmware(dd, fw_8051_name, &fw_8051);
+	if (fw_sbus_load) {
+		err = obtain_one_firmware(dd, fw_sbus_name, &fw_sbus);
 		if (err)
 			goto done;
 	}
 
-	if (fw_fabric_serdes_load) {
-		err = obtain_one_firmware(dd, fw_fabric_serdes_name,
-			&fw_fabric);
+	if (fw_pcie_serdes_load) {
+		err = obtain_one_firmware(dd, fw_pcie_serdes_name, &fw_pcie);
 		if (err)
 			goto done;
 	}
 
-	if (fw_sbus_load) {
-		err = obtain_one_firmware(dd, fw_sbus_name, &fw_sbus);
+	if (fw_fabric_serdes_load) {
+		err = obtain_one_firmware(dd, fw_fabric_serdes_name,
+					  &fw_fabric);
 		if (err)
 			goto done;
 	}
 
-	if (fw_pcie_serdes_load) {
-		err = obtain_one_firmware(dd, fw_pcie_serdes_name, &fw_pcie);
+	if (fw_8051_load) {
+		err = obtain_one_firmware(dd, fw_8051_name, &fw_8051);
 		if (err)
 			goto done;
 	}
@@ -621,16 +629,18 @@ retry:
 done:
 	if (err) {
 		/* oops, had problems obtaining a firmware */
-		if (fw_state == FW_EMPTY) {
-			/* retry with alternate */
+		if (fw_state == FW_EMPTY && dd->icode == ICODE_RTL_SILICON) {
+			/* retry with alternate (RTL only) */
 			fw_state = FW_TRY;
 			goto retry;
 		}
+		dd_dev_err(dd, "unable to obtain working firmware\n");
 		fw_state = FW_ERR;
 		fw_err = -ENOENT;
 	} else {
 		/* success */
-		if (fw_state == FW_EMPTY)
+		if (fw_state == FW_EMPTY &&
+		    dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
 			fw_state = FW_TRY;	/* may retry later */
 		else
 			fw_state = FW_FINAL;	/* cannot try again */
@@ -673,10 +683,15 @@ static int obtain_firmware(struct hfi1_devdata *dd)
 	}
 	/* not in FW_TRY state */
 
-	if (fw_state == FW_FINAL)
+	if (fw_state == FW_FINAL) {
+		if (platform_config) {
+			dd->platform_config.data = platform_config->data;
+			dd->platform_config.size = platform_config->size;
+		}
 		goto done;	/* already acquired */
-	else if (fw_state == FW_ERR)
+	} else if (fw_state == FW_ERR) {
 		goto done;	/* already tried and failed */
+	}
 	/* fw_state is FW_EMPTY */
 
 	/* set fw_state to FW_TRY, FW_FINAL, or FW_ERR, and fw_err */
@@ -685,9 +700,13 @@ static int obtain_firmware(struct hfi1_devdata *dd)
 	if (platform_config_load) {
 		platform_config = NULL;
 		err = request_firmware(&platform_config, platform_config_name,
-						&dd->pcidev->dev);
-		if (err)
+				       &dd->pcidev->dev);
+		if (err) {
 			platform_config = NULL;
+			goto done;
+		}
+		dd->platform_config.data = platform_config->data;
+		dd->platform_config.size = platform_config->size;
 	}
 
 done:
@@ -761,7 +780,7 @@ static int retry_firmware(struct hfi1_devdata *dd, int load_result)
 static void write_rsa_data(struct hfi1_devdata *dd, int what,
 			   const u8 *data, int nbytes)
 {
-	int qw_size = nbytes/8;
+	int qw_size = nbytes / 8;
 	int i;
 
 	if (((unsigned long)data & 0x7) == 0) {
@@ -769,14 +788,14 @@ static void write_rsa_data(struct hfi1_devdata *dd, int what,
 		u64 *ptr = (u64 *)data;
 
 		for (i = 0; i < qw_size; i++, ptr++)
-			write_csr(dd, what + (8*i), *ptr);
+			write_csr(dd, what + (8 * i), *ptr);
 	} else {
 		/* not aligned */
 		for (i = 0; i < qw_size; i++, data += 8) {
 			u64 value;
 
 			memcpy(&value, data, 8);
-			write_csr(dd, what + (8*i), value);
+			write_csr(dd, what + (8 * i), value);
 		}
 	}
 }
@@ -789,7 +808,7 @@ static void write_streamed_rsa_data(struct hfi1_devdata *dd, int what,
 				    const u8 *data, int nbytes)
 {
 	u64 *ptr = (u64 *)data;
-	int qw_size = nbytes/8;
+	int qw_size = nbytes / 8;
 
 	for (; qw_size > 0; qw_size--, ptr++)
 		write_csr(dd, what, *ptr);
@@ -822,7 +841,7 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who,
 			     >> MISC_CFG_FW_CTRL_RSA_STATUS_SHIFT;
 	if (status != RSA_STATUS_IDLE) {
 		dd_dev_err(dd, "%s security engine not idle - giving up\n",
-			who);
+			   who);
 		return -EBUSY;
 	}
 
@@ -859,7 +878,7 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who,
 		if (status == RSA_STATUS_IDLE) {
 			/* should not happen */
 			dd_dev_err(dd, "%s firmware security bad idle state\n",
-				who);
+				   who);
 			ret = -EINVAL;
 			break;
 		} else if (status == RSA_STATUS_DONE) {
@@ -893,19 +912,20 @@ static int run_rsa(struct hfi1_devdata *dd, const char *who,
 	 * is not keeping the error high.
 	 */
 	write_csr(dd, MISC_ERR_CLEAR,
-			MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK
-			| MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK);
+		  MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK |
+		  MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK);
 	/*
-	 * All that is left are the current errors.  Print failure details,
-	 * if any.
+	 * All that is left are the current errors.  Print warnings on
+	 * authorization failure details, if any.  Firmware authorization
+	 * can be retried, so these are only warnings.
 	 */
 	reg = read_csr(dd, MISC_ERR_STATUS);
 	if (ret) {
 		if (reg & MISC_ERR_STATUS_MISC_FW_AUTH_FAILED_ERR_SMASK)
-			dd_dev_err(dd, "%s firmware authorization failed\n",
-				who);
+			dd_dev_warn(dd, "%s firmware authorization failed\n",
+				    who);
 		if (reg & MISC_ERR_STATUS_MISC_KEY_MISMATCH_ERR_SMASK)
-			dd_dev_err(dd, "%s firmware key mismatch\n", who);
+			dd_dev_warn(dd, "%s firmware key mismatch\n", who);
 	}
 
 	return ret;
@@ -922,7 +942,8 @@ static void load_security_variables(struct hfi1_devdata *dd,
 	write_rsa_data(dd, MISC_CFG_RSA_MU, fdet->mu, MU_SIZE);
 	/* Security variables d.  Write the header */
 	write_streamed_rsa_data(dd, MISC_CFG_SHA_PRELOAD,
-			(u8 *)fdet->css_header, sizeof(struct css_header));
+				(u8 *)fdet->css_header,
+				sizeof(struct css_header));
 }
 
 /* return the 8051 firmware state */
@@ -1002,7 +1023,7 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
 
 	/* Firmware load steps 3-5 */
 	ret = write_8051(dd, 1/*code*/, 0, fdet->firmware_ptr,
-							fdet->firmware_len);
+			 fdet->firmware_len);
 	if (ret)
 		return ret;
 
@@ -1029,13 +1050,13 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
 	ret = wait_fm_ready(dd, TIMEOUT_8051_START);
 	if (ret) { /* timed out */
 		dd_dev_err(dd, "8051 start timeout, current state 0x%x\n",
-			get_firmware_state(dd));
+			   get_firmware_state(dd));
 		return -ETIMEDOUT;
 	}
 
 	read_misc_status(dd, &ver_a, &ver_b);
 	dd_dev_info(dd, "8051 firmware version %d.%d\n",
-		(int)ver_b, (int)ver_a);
+		    (int)ver_b, (int)ver_a);
 	dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
 
 	return 0;
@@ -1050,11 +1071,11 @@ void sbus_request(struct hfi1_devdata *dd,
 		  u8 receiver_addr, u8 data_addr, u8 command, u32 data_in)
 {
 	write_csr(dd, ASIC_CFG_SBUS_REQUEST,
-		((u64)data_in << ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT)
-		| ((u64)command << ASIC_CFG_SBUS_REQUEST_COMMAND_SHIFT)
-		| ((u64)data_addr << ASIC_CFG_SBUS_REQUEST_DATA_ADDR_SHIFT)
-		| ((u64)receiver_addr
-			<< ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT));
+		  ((u64)data_in << ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT) |
+		  ((u64)command << ASIC_CFG_SBUS_REQUEST_COMMAND_SHIFT) |
+		  ((u64)data_addr << ASIC_CFG_SBUS_REQUEST_DATA_ADDR_SHIFT) |
+		  ((u64)receiver_addr <<
+		   ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT));
 }
 
 /*
@@ -1072,14 +1093,14 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags)
 		return;
 
 	dd_dev_info(dd, "Turning off spicos:%s%s\n",
-		flags & SPICO_SBUS ? " SBus" : "",
-		flags & SPICO_FABRIC ? " fabric" : "");
+		    flags & SPICO_SBUS ? " SBus" : "",
+		    flags & SPICO_FABRIC ? " fabric" : "");
 
 	write_csr(dd, MISC_CFG_FW_CTRL, ENABLE_SPICO_SMASK);
 	/* disable SBus spico */
 	if (flags & SPICO_SBUS)
 		sbus_request(dd, SBUS_MASTER_BROADCAST, 0x01,
-			WRITE_SBUS_RECEIVER, 0x00000040);
+			     WRITE_SBUS_RECEIVER, 0x00000040);
 
 	/* disable the fabric serdes spicos */
 	if (flags & SPICO_FABRIC)
@@ -1089,29 +1110,60 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags)
 }
 
 /*
- *  Reset all of the fabric serdes for our HFI.
+ * Reset all of the fabric serdes for this HFI in preparation to take the
+ * link to Polling.
+ *
+ * To do a reset, we need to write to to the serdes registers.  Unfortunately,
+ * the fabric serdes download to the other HFI on the ASIC will have turned
+ * off the firmware validation on this HFI.  This means we can't write to the
+ * registers to reset the serdes.  Work around this by performing a complete
+ * re-download and validation of the fabric serdes firmware.  This, as a
+ * by-product, will reset the serdes.  NOTE: the re-download requires that
+ * the 8051 be in the Offline state.  I.e. not actively trying to use the
+ * serdes.  This routine is called at the point where the link is Offline and
+ * is getting ready to go to Polling.
  */
 void fabric_serdes_reset(struct hfi1_devdata *dd)
 {
-	u8 ra;
+	int ret;
 
-	if (dd->icode != ICODE_RTL_SILICON) /* only for RTL */
+	if (!fw_fabric_serdes_load)
 		return;
 
-	ra = fabric_serdes_broadcast[dd->hfi1_id];
-
-	acquire_hw_mutex(dd);
+	ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+	if (ret) {
+		dd_dev_err(dd,
+			   "Cannot acquire SBus resource to reset fabric SerDes - perhaps you should reboot\n");
+		return;
+	}
 	set_sbus_fast_mode(dd);
-	/* place SerDes in reset and disable SPICO */
-	sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000011);
-	/* wait 100 refclk cycles @ 156.25MHz => 640ns */
-	udelay(1);
-	/* remove SerDes reset */
-	sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000010);
-	/* turn SPICO enable on */
-	sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000002);
+
+	if (is_ax(dd)) {
+		/* A0 serdes do not work with a re-download */
+		u8 ra = fabric_serdes_broadcast[dd->hfi1_id];
+
+		/* place SerDes in reset and disable SPICO */
+		sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000011);
+		/* wait 100 refclk cycles @ 156.25MHz => 640ns */
+		udelay(1);
+		/* remove SerDes reset */
+		sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000010);
+		/* turn SPICO enable on */
+		sbus_request(dd, ra, 0x07, WRITE_SBUS_RECEIVER, 0x00000002);
+	} else {
+		turn_off_spicos(dd, SPICO_FABRIC);
+		/*
+		 * No need for firmware retry - what to download has already
+		 * been decided.
+		 * No need to pay attention to the load return - the only
+		 * failure is a validation failure, which has already been
+		 * checked by the initial download.
+		 */
+		(void)load_fabric_serdes_firmware(dd, &fw_fabric);
+	}
+
 	clear_sbus_fast_mode(dd);
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_SBUS);
 }
 
 /* Access to the SBus in this routine should probably be serialized */
@@ -1120,6 +1172,9 @@ int sbus_request_slow(struct hfi1_devdata *dd,
 {
 	u64 reg, count = 0;
 
+	/* make sure fast mode is clear */
+	clear_sbus_fast_mode(dd);
+
 	sbus_request(dd, receiver_addr, data_addr, command, data_in);
 	write_csr(dd, ASIC_CFG_SBUS_EXECUTE,
 		  ASIC_CFG_SBUS_EXECUTE_EXECUTE_SMASK);
@@ -1177,7 +1232,7 @@ static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
 	/* step 5: download SerDes machine code */
 	for (i = 0; i < fdet->firmware_len; i += 4) {
 		sbus_request(dd, ra, 0x0a, WRITE_SBUS_RECEIVER,
-					*(u32 *)&fdet->firmware_ptr[i]);
+			     *(u32 *)&fdet->firmware_ptr[i]);
 	}
 	/* step 6: IMEM override off */
 	sbus_request(dd, ra, 0x00, WRITE_SBUS_RECEIVER, 0x00000000);
@@ -1216,7 +1271,7 @@ static int load_sbus_firmware(struct hfi1_devdata *dd,
 	/* step 5: download the SBus Master machine code */
 	for (i = 0; i < fdet->firmware_len; i += 4) {
 		sbus_request(dd, ra, 0x14, WRITE_SBUS_RECEIVER,
-					*(u32 *)&fdet->firmware_ptr[i]);
+			     *(u32 *)&fdet->firmware_ptr[i]);
 	}
 	/* step 6: set IMEM_CNTL_EN off */
 	sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000040);
@@ -1249,19 +1304,23 @@ static int load_pcie_serdes_firmware(struct hfi1_devdata *dd,
 	/* step 3: enable XDMEM access */
 	sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000d40);
 	/* step 4: load firmware into SBus Master XDMEM */
-	/* NOTE: the dmem address, write_en, and wdata are all pre-packed,
-	   we only need to pick up the bytes and write them */
+	/*
+	 * NOTE: the dmem address, write_en, and wdata are all pre-packed,
+	 * we only need to pick up the bytes and write them
+	 */
 	for (i = 0; i < fdet->firmware_len; i += 4) {
 		sbus_request(dd, ra, 0x04, WRITE_SBUS_RECEIVER,
-					*(u32 *)&fdet->firmware_ptr[i]);
+			     *(u32 *)&fdet->firmware_ptr[i]);
 	}
 	/* step 5: disable XDMEM access */
 	sbus_request(dd, ra, 0x01, WRITE_SBUS_RECEIVER, 0x00000140);
 	/* step 6: allow SBus Spico to run */
 	sbus_request(dd, ra, 0x05, WRITE_SBUS_RECEIVER, 0x00000000);
 
-	/* steps 7-11: run RSA, if it succeeds, firmware is available to
-	   be swapped */
+	/*
+	 * steps 7-11: run RSA, if it succeeds, firmware is available to
+	 * be swapped
+	 */
 	return run_rsa(dd, "PCIe serdes", fdet->signature);
 }
 
@@ -1285,7 +1344,7 @@ static void set_serdes_broadcast(struct hfi1_devdata *dd, u8 bg1, u8 bg2,
 		 *	23:16	BROADCAST_GROUP_2 (default 0xff)
 		 */
 		sbus_request(dd, addrs[count], 0xfd, WRITE_SBUS_RECEIVER,
-				(u32)bg1 << 4 | (u32)bg2 << 16);
+			     (u32)bg1 << 4 | (u32)bg2 << 16);
 	}
 }
 
@@ -1310,8 +1369,8 @@ retry:
 
 	/* timed out */
 	dd_dev_err(dd,
-		"Unable to acquire hardware mutex, mutex mask %u, my mask %u (%s)\n",
-		(u32)user, (u32)mask, (try == 0) ? "retrying" : "giving up");
+		   "Unable to acquire hardware mutex, mutex mask %u, my mask %u (%s)\n",
+		   (u32)user, (u32)mask, (try == 0) ? "retrying" : "giving up");
 
 	if (try == 0) {
 		/* break mutex and retry */
@@ -1328,10 +1387,197 @@ void release_hw_mutex(struct hfi1_devdata *dd)
 	write_csr(dd, ASIC_CFG_MUTEX, 0);
 }
 
+/* return the given resource bit(s) as a mask for the given HFI */
+static inline u64 resource_mask(u32 hfi1_id, u32 resource)
+{
+	return ((u64)resource) << (hfi1_id ? CR_DYN_SHIFT : 0);
+}
+
+static void fail_mutex_acquire_message(struct hfi1_devdata *dd,
+				       const char *func)
+{
+	dd_dev_err(dd,
+		   "%s: hardware mutex stuck - suggest rebooting the machine\n",
+		   func);
+}
+
+/*
+ * Acquire access to a chip resource.
+ *
+ * Return 0 on success, -EBUSY if resource busy, -EIO if mutex acquire failed.
+ */
+static int __acquire_chip_resource(struct hfi1_devdata *dd, u32 resource)
+{
+	u64 scratch0, all_bits, my_bit;
+	int ret;
+
+	if (resource & CR_DYN_MASK) {
+		/* a dynamic resource is in use if either HFI has set the bit */
+		all_bits = resource_mask(0, resource) |
+						resource_mask(1, resource);
+		my_bit = resource_mask(dd->hfi1_id, resource);
+	} else {
+		/* non-dynamic resources are not split between HFIs */
+		all_bits = resource;
+		my_bit = resource;
+	}
+
+	/* lock against other callers within the driver wanting a resource */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+	ret = acquire_hw_mutex(dd);
+	if (ret) {
+		fail_mutex_acquire_message(dd, __func__);
+		ret = -EIO;
+		goto done;
+	}
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	if (scratch0 & all_bits) {
+		ret = -EBUSY;
+	} else {
+		write_csr(dd, ASIC_CFG_SCRATCH, scratch0 | my_bit);
+		/* force write to be visible to other HFI on another OS */
+		(void)read_csr(dd, ASIC_CFG_SCRATCH);
+	}
+
+	release_hw_mutex(dd);
+
+done:
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+	return ret;
+}
+
+/*
+ * Acquire access to a chip resource, wait up to mswait milliseconds for
+ * the resource to become available.
+ *
+ * Return 0 on success, -EBUSY if busy (even after wait), -EIO if mutex
+ * acquire failed.
+ */
+int acquire_chip_resource(struct hfi1_devdata *dd, u32 resource, u32 mswait)
+{
+	unsigned long timeout;
+	int ret;
+
+	timeout = jiffies + msecs_to_jiffies(mswait);
+	while (1) {
+		ret = __acquire_chip_resource(dd, resource);
+		if (ret != -EBUSY)
+			return ret;
+		/* resource is busy, check our timeout */
+		if (time_after_eq(jiffies, timeout))
+			return -EBUSY;
+		usleep_range(80, 120);	/* arbitrary delay */
+	}
+}
+
+/*
+ * Release access to a chip resource
+ */
+void release_chip_resource(struct hfi1_devdata *dd, u32 resource)
+{
+	u64 scratch0, bit;
+
+	/* only dynamic resources should ever be cleared */
+	if (!(resource & CR_DYN_MASK)) {
+		dd_dev_err(dd, "%s: invalid resource 0x%x\n", __func__,
+			   resource);
+		return;
+	}
+	bit = resource_mask(dd->hfi1_id, resource);
+
+	/* lock against other callers within the driver wanting a resource */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+	if (acquire_hw_mutex(dd)) {
+		fail_mutex_acquire_message(dd, __func__);
+		goto done;
+	}
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	if ((scratch0 & bit) != 0) {
+		scratch0 &= ~bit;
+		write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+		/* force write to be visible to other HFI on another OS */
+		(void)read_csr(dd, ASIC_CFG_SCRATCH);
+	} else {
+		dd_dev_warn(dd, "%s: id %d, resource 0x%x: bit not set\n",
+			    __func__, dd->hfi1_id, resource);
+	}
+
+	release_hw_mutex(dd);
+
+done:
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+}
+
+/*
+ * Return true if resource is set, false otherwise.  Print a warning
+ * if not set and a function is supplied.
+ */
+bool check_chip_resource(struct hfi1_devdata *dd, u32 resource,
+			 const char *func)
+{
+	u64 scratch0, bit;
+
+	if (resource & CR_DYN_MASK)
+		bit = resource_mask(dd->hfi1_id, resource);
+	else
+		bit = resource;
+
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	if ((scratch0 & bit) == 0) {
+		if (func)
+			dd_dev_warn(dd,
+				    "%s: id %d, resource 0x%x, not acquired!\n",
+				    func, dd->hfi1_id, resource);
+		return false;
+	}
+	return true;
+}
+
+static void clear_chip_resources(struct hfi1_devdata *dd, const char *func)
+{
+	u64 scratch0;
+
+	/* lock against other callers within the driver wanting a resource */
+	mutex_lock(&dd->asic_data->asic_resource_mutex);
+
+	if (acquire_hw_mutex(dd)) {
+		fail_mutex_acquire_message(dd, func);
+		goto done;
+	}
+
+	/* clear all dynamic access bits for this HFI */
+	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
+	scratch0 &= ~resource_mask(dd->hfi1_id, CR_DYN_MASK);
+	write_csr(dd, ASIC_CFG_SCRATCH, scratch0);
+	/* force write to be visible to other HFI on another OS */
+	(void)read_csr(dd, ASIC_CFG_SCRATCH);
+
+	release_hw_mutex(dd);
+
+done:
+	mutex_unlock(&dd->asic_data->asic_resource_mutex);
+}
+
+void init_chip_resources(struct hfi1_devdata *dd)
+{
+	/* clear any holds left by us */
+	clear_chip_resources(dd, __func__);
+}
+
+void finish_chip_resources(struct hfi1_devdata *dd)
+{
+	/* clear any holds left by us */
+	clear_chip_resources(dd, __func__);
+}
+
 void set_sbus_fast_mode(struct hfi1_devdata *dd)
 {
 	write_csr(dd, ASIC_CFG_SBUS_EXECUTE,
-				ASIC_CFG_SBUS_EXECUTE_FAST_MODE_SMASK);
+		  ASIC_CFG_SBUS_EXECUTE_FAST_MODE_SMASK);
 }
 
 void clear_sbus_fast_mode(struct hfi1_devdata *dd)
@@ -1354,23 +1600,23 @@ int load_firmware(struct hfi1_devdata *dd)
 	int ret;
 
 	if (fw_fabric_serdes_load) {
-		ret = acquire_hw_mutex(dd);
+		ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
 		if (ret)
 			return ret;
 
 		set_sbus_fast_mode(dd);
 
 		set_serdes_broadcast(dd, all_fabric_serdes_broadcast,
-				fabric_serdes_broadcast[dd->hfi1_id],
-				fabric_serdes_addrs[dd->hfi1_id],
-				NUM_FABRIC_SERDES);
+				     fabric_serdes_broadcast[dd->hfi1_id],
+				     fabric_serdes_addrs[dd->hfi1_id],
+				     NUM_FABRIC_SERDES);
 		turn_off_spicos(dd, SPICO_FABRIC);
 		do {
 			ret = load_fabric_serdes_firmware(dd, &fw_fabric);
 		} while (retry_firmware(dd, ret));
 
 		clear_sbus_fast_mode(dd);
-		release_hw_mutex(dd);
+		release_chip_resource(dd, CR_SBUS);
 		if (ret)
 			return ret;
 	}
@@ -1419,18 +1665,57 @@ int hfi1_firmware_init(struct hfi1_devdata *dd)
 	return obtain_firmware(dd);
 }
 
+/*
+ * This function is a helper function for parse_platform_config(...) and
+ * does not check for validity of the platform configuration cache
+ * (because we know it is invalid as we are building up the cache).
+ * As such, this should not be called from anywhere other than
+ * parse_platform_config
+ */
+static int check_meta_version(struct hfi1_devdata *dd, u32 *system_table)
+{
+	u32 meta_ver, meta_ver_meta, ver_start, ver_len, mask;
+	struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
+
+	if (!system_table)
+		return -EINVAL;
+
+	meta_ver_meta =
+	*(pcfgcache->config_tables[PLATFORM_CONFIG_SYSTEM_TABLE].table_metadata
+	+ SYSTEM_TABLE_META_VERSION);
+
+	mask = ((1 << METADATA_TABLE_FIELD_START_LEN_BITS) - 1);
+	ver_start = meta_ver_meta & mask;
+
+	meta_ver_meta >>= METADATA_TABLE_FIELD_LEN_SHIFT;
+
+	mask = ((1 << METADATA_TABLE_FIELD_LEN_LEN_BITS) - 1);
+	ver_len = meta_ver_meta & mask;
+
+	ver_start /= 8;
+	meta_ver = *((u8 *)system_table + ver_start) & ((1 << ver_len) - 1);
+
+	if (meta_ver < 5) {
+		dd_dev_info(
+			dd, "%s:Please update platform config\n", __func__);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 int parse_platform_config(struct hfi1_devdata *dd)
 {
 	struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
 	u32 *ptr = NULL;
-	u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0;
+	u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0, file_length = 0;
 	u32 record_idx = 0, table_type = 0, table_length_dwords = 0;
+	int ret = -EINVAL; /* assume failure */
 
-	if (platform_config == NULL) {
+	if (!dd->platform_config.data) {
 		dd_dev_info(dd, "%s: Missing config file\n", __func__);
 		goto bail;
 	}
-	ptr = (u32 *)platform_config->data;
+	ptr = (u32 *)dd->platform_config.data;
 
 	magic_num = *ptr;
 	ptr++;
@@ -1439,12 +1724,32 @@ int parse_platform_config(struct hfi1_devdata *dd)
 		goto bail;
 	}
 
-	while (ptr < (u32 *)(platform_config->data + platform_config->size)) {
+	/* Field is file size in DWORDs */
+	file_length = (*ptr) * 4;
+	ptr++;
+
+	if (file_length > dd->platform_config.size) {
+		dd_dev_info(dd, "%s:File claims to be larger than read size\n",
+			    __func__);
+		goto bail;
+	} else if (file_length < dd->platform_config.size) {
+		dd_dev_info(dd,
+			    "%s:File claims to be smaller than read size, continuing\n",
+			    __func__);
+	}
+	/* exactly equal, perfection */
+
+	/*
+	 * In both cases where we proceed, using the self-reported file length
+	 * is the safer option
+	 */
+	while (ptr < (u32 *)(dd->platform_config.data + file_length)) {
 		header1 = *ptr;
 		header2 = *(ptr + 1);
 		if (header1 != ~header2) {
 			dd_dev_info(dd, "%s: Failed validation at offset %ld\n",
-				__func__, (ptr - (u32 *)platform_config->data));
+				    __func__, (ptr - (u32 *)
+					       dd->platform_config.data));
 			goto bail;
 		}
 
@@ -1467,6 +1772,9 @@ int parse_platform_config(struct hfi1_devdata *dd)
 			case PLATFORM_CONFIG_SYSTEM_TABLE:
 				pcfgcache->config_tables[table_type].num_table =
 									1;
+				ret = check_meta_version(dd, ptr);
+				if (ret)
+					goto bail;
 				break;
 			case PLATFORM_CONFIG_PORT_TABLE:
 				pcfgcache->config_tables[table_type].num_table =
@@ -1484,9 +1792,10 @@ int parse_platform_config(struct hfi1_devdata *dd)
 				break;
 			default:
 				dd_dev_info(dd,
-				      "%s: Unknown data table %d, offset %ld\n",
-					__func__, table_type,
-				       (ptr - (u32 *)platform_config->data));
+					    "%s: Unknown data table %d, offset %ld\n",
+					    __func__, table_type,
+					    (ptr - (u32 *)
+					     dd->platform_config.data));
 				goto bail; /* We don't trust this file now */
 			}
 			pcfgcache->config_tables[table_type].table = ptr;
@@ -1507,9 +1816,10 @@ int parse_platform_config(struct hfi1_devdata *dd)
 				break;
 			default:
 				dd_dev_info(dd,
-				  "%s: Unknown metadata table %d, offset %ld\n",
-				  __func__, table_type,
-				  (ptr - (u32 *)platform_config->data));
+					    "%s: Unknown meta table %d, offset %ld\n",
+					    __func__, table_type,
+					    (ptr -
+					     (u32 *)dd->platform_config.data));
 				goto bail; /* We don't trust this file now */
 			}
 			pcfgcache->config_tables[table_type].table_metadata =
@@ -1518,14 +1828,16 @@ int parse_platform_config(struct hfi1_devdata *dd)
 
 		/* Calculate and check table crc */
 		crc = crc32_le(~(u32)0, (unsigned char const *)ptr,
-				(table_length_dwords * 4));
+			       (table_length_dwords * 4));
 		crc ^= ~(u32)0;
 
 		/* Jump the table */
 		ptr += table_length_dwords;
 		if (crc != *ptr) {
 			dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n",
-				__func__, (ptr - (u32 *)platform_config->data));
+				    __func__, (ptr -
+					       (u32 *)
+					       dd->platform_config.data));
 			goto bail;
 		}
 		/* Jump the CRC DWORD */
@@ -1536,11 +1848,12 @@ int parse_platform_config(struct hfi1_devdata *dd)
 	return 0;
 bail:
 	memset(pcfgcache, 0, sizeof(struct platform_config_cache));
-	return -EINVAL;
+	return ret;
 }
 
 static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
-		int field, u32 *field_len_bits, u32 *field_start_bits)
+					  int field, u32 *field_len_bits,
+					  u32 *field_start_bits)
 {
 	struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
 	u32 *src_ptr = NULL;
@@ -1600,8 +1913,9 @@ static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
  * @len: length of memory pointed by @data in bytes.
  */
 int get_platform_config_field(struct hfi1_devdata *dd,
-			enum platform_config_table_type_encoding table_type,
-			int table_index, int field_index, u32 *data, u32 len)
+			      enum platform_config_table_type_encoding
+			      table_type, int table_index, int field_index,
+			      u32 *data, u32 len)
 {
 	int ret = 0, wlen = 0, seek = 0;
 	u32 field_len_bits = 0, field_start_bits = 0, *src_ptr = NULL;
@@ -1613,7 +1927,8 @@ int get_platform_config_field(struct hfi1_devdata *dd,
 		return -EINVAL;
 
 	ret = get_platform_fw_field_metadata(dd, table_type, field_index,
-					&field_len_bits, &field_start_bits);
+					     &field_len_bits,
+					     &field_start_bits);
 	if (ret)
 		return -EINVAL;
 
@@ -1629,19 +1944,21 @@ int get_platform_config_field(struct hfi1_devdata *dd,
 			if (len < field_len_bits)
 				return -EINVAL;
 
-			seek = field_start_bits/8;
-			wlen = field_len_bits/8;
+			seek = field_start_bits / 8;
+			wlen = field_len_bits / 8;
 
 			src_ptr = (u32 *)((u8 *)src_ptr + seek);
 
-			/* We expect the field to be byte aligned and whole byte
-			 * lengths if we are here */
+			/*
+			 * We expect the field to be byte aligned and whole byte
+			 * lengths if we are here
+			 */
 			memcpy(data, src_ptr, wlen);
 			return 0;
 		}
 		break;
 	case PLATFORM_CONFIG_PORT_TABLE:
-		/* Port table is 4 DWORDS in META_VERSION 0 */
+		/* Port table is 4 DWORDS */
 		src_ptr = dd->hfi1_id ?
 			pcfgcache->config_tables[table_type].table + 4 :
 			pcfgcache->config_tables[table_type].table;
@@ -1669,7 +1986,7 @@ int get_platform_config_field(struct hfi1_devdata *dd,
 	if (!src_ptr || len < field_len_bits)
 		return -EINVAL;
 
-	src_ptr += (field_start_bits/32);
+	src_ptr += (field_start_bits / 32);
 	*data = (*src_ptr >> (field_start_bits % 32)) &
 			((1 << field_len_bits) - 1);
 
@@ -1680,7 +1997,7 @@ int get_platform_config_field(struct hfi1_devdata *dd,
  * Download the firmware needed for the Gen3 PCIe SerDes.  An update
  * to the SBus firmware is needed before updating the PCIe firmware.
  *
- * Note: caller must be holding the HW mutex.
+ * Note: caller must be holding the SBus resource.
  */
 int load_pcie_firmware(struct hfi1_devdata *dd)
 {
@@ -1701,9 +2018,9 @@ int load_pcie_firmware(struct hfi1_devdata *dd)
 	if (fw_pcie_serdes_load) {
 		dd_dev_info(dd, "Setting PCIe SerDes broadcast\n");
 		set_serdes_broadcast(dd, all_pcie_serdes_broadcast,
-					pcie_serdes_broadcast[dd->hfi1_id],
-					pcie_serdes_addrs[dd->hfi1_id],
-					NUM_PCIE_SERDES);
+				     pcie_serdes_broadcast[dd->hfi1_id],
+				     pcie_serdes_addrs[dd->hfi1_id],
+				     NUM_PCIE_SERDES);
 		do {
 			ret = load_pcie_serdes_firmware(dd, &fw_pcie);
 		} while (retry_firmware(dd, ret));
@@ -1724,9 +2041,9 @@ void read_guid(struct hfi1_devdata *dd)
 {
 	/* Take the DC out of reset to get a valid GUID value */
 	write_csr(dd, CCE_DC_CTRL, 0);
-	(void) read_csr(dd, CCE_DC_CTRL);
+	(void)read_csr(dd, CCE_DC_CTRL);
 
 	dd->base_guid = read_csr(dd, DC_DC8051_CFG_LOCAL_GUID);
 	dd_dev_info(dd, "GUID %llx",
-		(unsigned long long)dd->base_guid);
+		    (unsigned long long)dd->base_guid);
 }
diff --git a/drivers/staging/rdma/hfi1/hfi.h b/drivers/staging/rdma/hfi1/hfi.h
index d4826a9ab8d3..16cbdc4073e0 100644
--- a/drivers/staging/rdma/hfi1/hfi.h
+++ b/drivers/staging/rdma/hfi1/hfi.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_KERNEL_H
 #define _HFI1_KERNEL_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -65,6 +62,7 @@
 #include <linux/cdev.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <rdma/rdma_vt.h>
 
 #include "chip_registers.h"
 #include "common.h"
@@ -73,7 +71,8 @@
 #include "chip.h"
 #include "mad.h"
 #include "qsfp.h"
-#include "platform_config.h"
+#include "platform.h"
+#include "affinity.h"
 
 /* bumped 1 from s/w major version of TrueScale */
 #define HFI1_CHIP_VERS_MAJ 3U
@@ -98,6 +97,8 @@ extern unsigned long hfi1_cap_mask;
 #define HFI1_CAP_IS_USET(cap) (!!HFI1_CAP_UGET(cap))
 #define HFI1_MISC_GET() ((hfi1_cap_mask >> HFI1_CAP_MISC_SHIFT) & \
 			HFI1_CAP_MISC_MASK)
+/* Offline Disabled Reason is 4-bits */
+#define HFI1_ODR_MASK(rsn) ((rsn) & OPA_PI_MASK_OFFLINE_REASON)
 
 /*
  * Control context is always 0 and handles the error packets.
@@ -177,6 +178,11 @@ struct ctxt_eager_bufs {
 	} *rcvtids;
 };
 
+struct exp_tid_set {
+	struct list_head list;
+	u32 count;
+};
+
 struct hfi1_ctxtdata {
 	/* shadow the ctxt's RcvCtrl register */
 	u64 rcvctrl;
@@ -233,20 +239,13 @@ struct hfi1_ctxtdata {
 	u32 expected_count;
 	/* index of first expected TID entry. */
 	u32 expected_base;
-	/* cursor into the exp group sets */
-	atomic_t tidcursor;
-	/* number of exp TID groups assigned to the ctxt */
-	u16 numtidgroups;
-	/* size of exp TID group fields in tidusemap */
-	u16 tidmapcnt;
-	/* exp TID group usage bitfield array */
-	unsigned long *tidusemap;
-	/* pinned pages for exp sends, allocated at open */
-	struct page **tid_pg_list;
-	/* dma handles for exp tid pages */
-	dma_addr_t *physshadow;
+
+	struct exp_tid_set tid_group_list;
+	struct exp_tid_set tid_used_list;
+	struct exp_tid_set tid_full_list;
+
 	/* lock protecting all Expected TID data */
-	spinlock_t exp_lock;
+	struct mutex exp_lock;
 	/* number of pio bufs for this ctxt (all procs, if shared) */
 	u32 piocnt;
 	/* first pio buffer for this ctxt */
@@ -311,8 +310,24 @@ struct hfi1_ctxtdata {
 	 */
 	struct task_struct *progress;
 	struct list_head sdma_queues;
+	/* protect sdma queues */
 	spinlock_t sdma_qlock;
 
+	/* Is ASPM interrupt supported for this context */
+	bool aspm_intr_supported;
+	/* ASPM state (enabled/disabled) for this context */
+	bool aspm_enabled;
+	/* Timer for re-enabling ASPM if interrupt activity quietens down */
+	struct timer_list aspm_timer;
+	/* Lock to serialize between intr, timer intr and user threads */
+	spinlock_t aspm_lock;
+	/* Is ASPM processing enabled for this context (in intr context) */
+	bool aspm_intr_enable;
+	/* Last interrupt timestamp */
+	ktime_t aspm_ts_last_intr;
+	/* Last timestamp at which we scheduled a timer for this context */
+	ktime_t aspm_ts_timer_sched;
+
 	/*
 	 * The interrupt handler for a particular receive context can vary
 	 * throughout it's lifetime. This is not a lock protected data member so
@@ -335,7 +350,7 @@ struct hfi1_packet {
 	void *hdr;
 	struct hfi1_ctxtdata *rcd;
 	__le32 *rhf_addr;
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
 	struct hfi1_other_headers *ohdr;
 	u64 rhf;
 	u32 maxcnt;
@@ -363,6 +378,7 @@ struct hfi1_snoop_data {
 	int mode_flag;
 	struct cdev cdev;
 	struct device *class_dev;
+	/* protect snoop data */
 	spinlock_t snoop_lock;
 	struct list_head queue;
 	wait_queue_head_t waitq;
@@ -375,7 +391,7 @@ struct hfi1_snoop_data {
 #define HFI1_PORT_SNOOP_MODE     1U
 #define HFI1_PORT_CAPTURE_MODE   2U
 
-struct hfi1_sge_state;
+struct rvt_sge_state;
 
 /*
  * Get/Set IB link-level config parameters for f_get/set_ib_cfg()
@@ -424,17 +440,17 @@ struct hfi1_sge_state;
 #define __HLS_GOING_OFFLINE_BP  9
 #define __HLS_LINK_COOLDOWN_BP 10
 
-#define HLS_UP_INIT	  (1 << __HLS_UP_INIT_BP)
-#define HLS_UP_ARMED	  (1 << __HLS_UP_ARMED_BP)
-#define HLS_UP_ACTIVE	  (1 << __HLS_UP_ACTIVE_BP)
-#define HLS_DN_DOWNDEF	  (1 << __HLS_DN_DOWNDEF_BP) /* link down default */
-#define HLS_DN_POLL	  (1 << __HLS_DN_POLL_BP)
-#define HLS_DN_DISABLE	  (1 << __HLS_DN_DISABLE_BP)
-#define HLS_DN_OFFLINE	  (1 << __HLS_DN_OFFLINE_BP)
-#define HLS_VERIFY_CAP	  (1 << __HLS_VERIFY_CAP_BP)
-#define HLS_GOING_UP	  (1 << __HLS_GOING_UP_BP)
-#define HLS_GOING_OFFLINE (1 << __HLS_GOING_OFFLINE_BP)
-#define HLS_LINK_COOLDOWN (1 << __HLS_LINK_COOLDOWN_BP)
+#define HLS_UP_INIT	  BIT(__HLS_UP_INIT_BP)
+#define HLS_UP_ARMED	  BIT(__HLS_UP_ARMED_BP)
+#define HLS_UP_ACTIVE	  BIT(__HLS_UP_ACTIVE_BP)
+#define HLS_DN_DOWNDEF	  BIT(__HLS_DN_DOWNDEF_BP) /* link down default */
+#define HLS_DN_POLL	  BIT(__HLS_DN_POLL_BP)
+#define HLS_DN_DISABLE	  BIT(__HLS_DN_DISABLE_BP)
+#define HLS_DN_OFFLINE	  BIT(__HLS_DN_OFFLINE_BP)
+#define HLS_VERIFY_CAP	  BIT(__HLS_VERIFY_CAP_BP)
+#define HLS_GOING_UP	  BIT(__HLS_GOING_UP_BP)
+#define HLS_GOING_OFFLINE BIT(__HLS_GOING_OFFLINE_BP)
+#define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP)
 
 #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
 
@@ -490,6 +506,7 @@ struct hfi1_sge_state;
 #define CNTR_DISABLED		0x2 /* Disable this counter */
 #define CNTR_32BIT		0x4 /* Simulate 64 bits for this counter */
 #define CNTR_VL			0x8 /* Per VL counter */
+#define CNTR_SDMA              0x10
 #define CNTR_INVALID_VL		-1  /* Specifies invalid VL */
 #define CNTR_MODE_W		0x0
 #define CNTR_MODE_R		0x1
@@ -512,10 +529,11 @@ static inline void incr_cntr32(u32 *cntr)
 
 #define MAX_NAME_SIZE 64
 struct hfi1_msix_entry {
+	enum irq_type type;
 	struct msix_entry msix;
 	void *arg;
 	char name[MAX_NAME_SIZE];
-	cpumask_var_t mask;
+	cpumask_t mask;
 };
 
 /* per-SL CCA information */
@@ -542,6 +560,7 @@ enum {
 };
 
 struct vl_arb_cache {
+	/* protect vl arb cache */
 	spinlock_t lock;
 	struct ib_vl_weight_elem table[VL_ARB_TABLE_SIZE];
 };
@@ -561,7 +580,8 @@ struct hfi1_pportdata {
 	struct kobject sl2sc_kobj;
 	struct kobject vl2mtu_kobj;
 
-	/* QSFP support */
+	/* PHY support */
+	u32 port_type;
 	struct qsfp_data qsfp_info;
 
 	/* GUID for this interface, in host order */
@@ -586,6 +606,7 @@ struct hfi1_pportdata {
 	struct work_struct link_vc_work;
 	struct work_struct link_up_work;
 	struct work_struct link_down_work;
+	struct work_struct dc_host_req_work;
 	struct work_struct sma_message_work;
 	struct work_struct freeze_work;
 	struct work_struct link_downgrade_work;
@@ -623,6 +644,7 @@ struct hfi1_pportdata {
 	u16 link_speed_active;
 	u8 vls_supported;
 	u8 vls_operational;
+	u8 actual_vls_operational;
 	/* LID mask control */
 	u8 lmc;
 	/* Rx Polarity inversion (compensate for ~tx on partner) */
@@ -642,19 +664,23 @@ struct hfi1_pportdata {
 	u8 link_enabled;	/* link enabled? */
 	u8 linkinit_reason;
 	u8 local_tx_rate;	/* rate given to 8051 firmware */
+	u8 last_pstate;		/* info only */
 
 	/* placeholders for IB MAD packet settings */
 	u8 overrun_threshold;
 	u8 phy_error_threshold;
 
-	/* used to override LED behavior */
-	u8 led_override;  /* Substituted for normal value, if non-zero */
-	u16 led_override_timeoff; /* delta to next timer event */
-	u8 led_override_vals[2]; /* Alternates per blink-frame */
-	u8 led_override_phase; /* Just counts, LSB picks from vals[] */
+	/* Used to override LED behavior for things like maintenance beaconing*/
+	/*
+	 * Alternates per phase of blink
+	 * [0] holds LED off duration, [1] holds LED on duration
+	 */
+	unsigned long led_override_vals[2];
+	u8 led_override_phase; /* LSB picks from vals[] */
 	atomic_t led_override_timer_active;
 	/* Used to flash LEDs in override mode */
 	struct timer_list led_override_timer;
+
 	u32 sm_trap_qp;
 	u32 sa_qp;
 
@@ -689,10 +715,12 @@ struct hfi1_pportdata {
 	/* CA's max number of 64 entry units in the congestion control table */
 	u8 cc_max_table_entries;
 
-	/* begin congestion log related entries
-	 * cc_log_lock protects all congestion log related data */
+	/*
+	 * begin congestion log related entries
+	 * cc_log_lock protects all congestion log related data
+	 */
 	spinlock_t cc_log_lock ____cacheline_aligned_in_smp;
-	u8 threshold_cong_event_map[OPA_MAX_SLS/8];
+	u8 threshold_cong_event_map[OPA_MAX_SLS / 8];
 	u16 threshold_event_counter;
 	struct opa_hfi1_cong_log_event_internal cc_events[OPA_CONG_LOG_ELEMS];
 	int cc_log_idx; /* index for logging events */
@@ -705,8 +733,9 @@ struct hfi1_pportdata {
 	u64 *cntrs;
 	/* port relative synthetic counter buffer */
 	u64 *scntrs;
-	/* we synthesize port_xmit_discards from several egress errors */
+	/* port_xmit_discards are synthesized from different egress errors */
 	u64 port_xmit_discards;
+	u64 port_xmit_discards_vl[C_VL_COUNT];
 	u64 port_xmit_constraint_errors;
 	u64 port_rcv_constraint_errors;
 	/* count of 'link_err' interrupts from DC */
@@ -728,6 +757,9 @@ struct hfi1_pportdata {
 	u8 remote_link_down_reason;
 	/* Error events that will cause a port bounce. */
 	u32 port_error_action;
+	struct work_struct linkstate_active_work;
+	/* Does this port need to prescan for FECNs */
+	bool cc_prescan;
 };
 
 typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
@@ -773,6 +805,12 @@ struct hfi1_temp {
 	u8 triggers;      /* temperature triggers */
 };
 
+/* common data between shared ASIC HFIs */
+struct hfi1_asic_data {
+	struct hfi1_devdata *dds[2];	/* back pointers */
+	struct mutex asic_resource_mutex;
+};
+
 /* device data struct now contains only "general per-device" info.
  * fields related to a physical IB port are in a hfi1_pportdata struct.
  */
@@ -782,6 +820,7 @@ struct sdma_vl_map;
 #define BOARD_VERS_MAX 96 /* how long the version string can be */
 #define SERIAL_MAX 16 /* length of the serial number */
 
+typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64);
 struct hfi1_devdata {
 	struct hfi1_ibdev verbs_dev;     /* must be first */
 	struct list_head list;
@@ -811,6 +850,12 @@ struct hfi1_devdata {
 	spinlock_t sc_lock;
 	/* Per VL data. Enough for all VLs but not all elements are set/used. */
 	struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
+	/* lock for pio_map */
+	spinlock_t pio_map_lock;
+	/* array of kernel send contexts */
+	struct send_context **kernel_send_context;
+	/* array of vl maps */
+	struct pio_vl_map __rcu *pio_map;
 	/* seqlock for sc2vl */
 	seqlock_t sc2vl_lock;
 	u64 sc2vl[4];
@@ -841,6 +886,8 @@ struct hfi1_devdata {
 	wait_queue_head_t		  sdma_unfreeze_wq;
 	atomic_t			  sdma_unfreeze_count;
 
+	/* common data between shared ASIC HFIs in this OS */
+	struct hfi1_asic_data *asic_data;
 
 	/* hfi1_pportdata, points to array of (physical) port-specific
 	 * data structs, indexed by pidx (0..n-1)
@@ -873,10 +920,11 @@ struct hfi1_devdata {
 	/* reset value */
 	u64 z_int_counter;
 	u64 z_rcv_limit;
+	u64 z_send_schedule;
 	/* percpu int_counter */
 	u64 __percpu *int_counter;
 	u64 __percpu *rcv_limit;
-
+	u64 __percpu *send_schedule;
 	/* number of receive contexts in use by the driver */
 	u32 num_rcv_contexts;
 	/* number of pio send contexts in use by the driver */
@@ -885,6 +933,8 @@ struct hfi1_devdata {
 	 * number of ctxts available for PSM open
 	 */
 	u32 freectxts;
+	/* total number of available user/PSM contexts */
+	u32 num_user_contexts;
 	/* base receive interrupt timeout, in CSR units */
 	u32 rcv_intr_timeout_csr;
 
@@ -996,9 +1046,8 @@ struct hfi1_devdata {
 	u16 irev;	/* implementation revision */
 	u16 dc8051_ver; /* 8051 firmware version */
 
+	struct platform_config platform_config;
 	struct platform_config_cache pcfg_cache;
-	/* control high-level access to qsfp */
-	struct mutex qsfp_i2c_mutex;
 
 	struct diag_client *diag_client;
 	spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
@@ -1008,8 +1057,6 @@ struct hfi1_devdata {
 	u16 psxmitwait_check_rate;
 	/* high volume overflow errors deferred to tasklet */
 	struct tasklet_struct error_tasklet;
-	/* per device cq worker */
-	struct kthread_worker *worker;
 
 	/* MSI-X information */
 	struct hfi1_msix_entry *msix_entries;
@@ -1090,10 +1137,8 @@ struct hfi1_devdata {
 	 * Handlers for outgoing data so that snoop/capture does not
 	 * have to have its hooks in the send path
 	 */
-	int (*process_pio_send)(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
-				u64 pbc);
-	int (*process_dma_send)(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
-				u64 pbc);
+	send_routine process_pio_send;
+	send_routine process_dma_send;
 	void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
 				u64 pbc, const void *from, size_t count);
 
@@ -1105,7 +1150,6 @@ struct hfi1_devdata {
 	struct timer_list rcverr_timer;
 	u32 rcv_ovfl_cnt;
 
-	int assigned_node_id;
 	wait_queue_head_t event_queue;
 
 	/* Save the enabled LCB error bits */
@@ -1115,6 +1159,16 @@ struct hfi1_devdata {
 	/* receive context tail dummy address */
 	__le64 *rcvhdrtail_dummy_kvaddr;
 	dma_addr_t rcvhdrtail_dummy_physaddr;
+
+	bool eprom_available;	/* true if EPROM is available for this device */
+	bool aspm_supported;	/* Does HW support ASPM */
+	bool aspm_enabled;	/* ASPM state: enabled/disabled */
+	/* Serialize ASPM enable/disable between multiple verbs contexts */
+	spinlock_t aspm_lock;
+	/* Number of verbs contexts which have disabled ASPM */
+	atomic_t aspm_disabled_cnt;
+
+	struct hfi1_affinity *affinity;
 };
 
 /* 8051 firmware version helper */
@@ -1125,6 +1179,9 @@ struct hfi1_devdata {
 #define PT_EAGER    1
 #define PT_INVALID  2
 
+struct tid_rb_node;
+struct mmu_rb_node;
+
 /* Private data for file operations */
 struct hfi1_filedata {
 	struct hfi1_ctxtdata *uctxt;
@@ -1133,6 +1190,16 @@ struct hfi1_filedata {
 	struct hfi1_user_sdma_pkt_q *pq;
 	/* for cpu affinity; -1 if none */
 	int rec_cpu_num;
+	u32 tid_n_pinned;
+	struct rb_root tid_rb_root;
+	struct tid_rb_node **entry_to_rb;
+	spinlock_t tid_lock; /* protect tid_[limit,used] counters */
+	u32 tid_limit;
+	u32 tid_used;
+	u32 *invalid_tids;
+	u32 invalid_tid_idx;
+	/* protect invalid_tids array and invalid_tid_idx */
+	spinlock_t invalid_lock;
 };
 
 extern struct list_head hfi1_dev_list;
@@ -1156,7 +1223,7 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
 int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *);
 int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *);
 int hfi1_create_ctxts(struct hfi1_devdata *dd);
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32);
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int);
 void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *,
 			 struct hfi1_devdata *, u8, u8);
 void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
@@ -1164,6 +1231,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
 int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
 int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
 int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
+void set_all_slowpath(struct hfi1_devdata *dd);
 
 /* receive packet handler dispositions */
 #define RCV_PKT_OK      0x0 /* keep going */
@@ -1184,6 +1252,15 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
 	return ppd->lstate; /* use the cached value */
 }
 
+void receive_interrupt_work(struct work_struct *work);
+
+/* extract service channel from header and rhf */
+static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
+{
+	return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
+	       ((!!(rhf & RHF_DC_INFO_MASK)) << 4);
+}
+
 static inline u16 generate_jkey(kuid_t uid)
 {
 	return from_kuid(current_user_ns(), uid) & 0xffff;
@@ -1253,7 +1330,7 @@ static inline u32 egress_cycles(u32 len, u32 rate)
 void set_link_ipg(struct hfi1_pportdata *ppd);
 void process_becn(struct hfi1_pportdata *ppd, u8 sl,  u16 rlid, u32 lqpn,
 		  u32 rqpn, u8 svc_type);
-void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
+void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
 		u32 pkey, u32 slid, u32 dlid, u8 sc5,
 		const struct ib_grh *old_grh);
 
@@ -1424,6 +1501,7 @@ static inline int valid_ib_mtu(unsigned int mtu)
 		mtu == 1024 || mtu == 2048 ||
 		mtu == 4096;
 }
+
 static inline int valid_opa_max_mtu(unsigned int mtu)
 {
 	return mtu >= 2048 &&
@@ -1445,12 +1523,13 @@ void reset_link_credits(struct hfi1_devdata *dd);
 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
 
 int snoop_recv_handler(struct hfi1_packet *packet);
-int snoop_send_dma_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc);
-int snoop_send_pio_handler(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			   u64 pbc);
 void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
 			   u64 pbc, const void *from, size_t count);
+int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc);
 
 static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd)
 {
@@ -1472,6 +1551,11 @@ static inline struct hfi1_pportdata *ppd_from_ibp(struct hfi1_ibport *ibp)
 	return container_of(ibp, struct hfi1_pportdata, ibport_data);
 }
 
+static inline struct hfi1_ibdev *dev_from_rdi(struct rvt_dev_info *rdi)
+{
+	return container_of(rdi, struct hfi1_ibdev, rdi);
+}
+
 static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -1515,12 +1599,10 @@ static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
 #define HFI1_HAS_SDMA_TIMEOUT  0x8
 #define HFI1_HAS_SEND_DMA      0x10   /* Supports Send DMA */
 #define HFI1_FORCED_FREEZE     0x80   /* driver forced freeze mode */
-#define HFI1_DO_INIT_ASIC      0x100  /* This device will init the ASIC */
 
 /* IB dword length mask in PBC (lower 11 bits); same for all chips */
 #define HFI1_PBC_LENGTH_MASK                     ((1 << 11) - 1)
 
-
 /* ctxt_flag bit offsets */
 		/* context has been setup */
 #define HFI1_CTXT_SETUP_DONE 1
@@ -1538,14 +1620,10 @@ void hfi1_free_devdata(struct hfi1_devdata *);
 void cc_state_reclaim(struct rcu_head *rcu);
 struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
 
-/*
- * Set LED override, only the two LSBs have "public" meaning, but
- * any non-zero value substitutes them for the Link and LinkTrain
- * LED states.
- */
-#define HFI1_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
-#define HFI1_LED_LOG 2  /* Logical (link) YELLOW LED */
-void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val);
+/* LED beaconing functions */
+void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
+			     unsigned int timeoff);
+void shutdown_led_override(struct hfi1_pportdata *ppd);
 
 #define HFI1_CREDIT_RETURN_RATE (100)
 
@@ -1587,12 +1665,13 @@ void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val);
  */
 #define DEFAULT_RCVHDR_ENTSIZE 32
 
+bool hfi1_can_pin_pages(struct hfi1_devdata *, u32, u32);
 int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **);
-void hfi1_release_user_pages(struct page **, size_t, bool);
+void hfi1_release_user_pages(struct mm_struct *, struct page **, size_t, bool);
 
 static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
 {
-	*((u64 *) rcd->rcvhdrtail_kvaddr) = 0ULL;
+	*((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL;
 }
 
 static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
@@ -1601,7 +1680,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
 	 * volatile because it's a DMA target from the chip, routine is
 	 * inlined, and don't want register caching or reordering.
 	 */
-	return (u32) le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
+	return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
 }
 
 /*
@@ -1633,12 +1712,13 @@ void restore_pci_variables(struct hfi1_devdata *dd);
 int do_pcie_gen3_transition(struct hfi1_devdata *dd);
 int parse_platform_config(struct hfi1_devdata *dd);
 int get_platform_config_field(struct hfi1_devdata *dd,
-			enum platform_config_table_type_encoding table_type,
-			int table_index, int field_index, u32 *data, u32 len);
+			      enum platform_config_table_type_encoding
+			      table_type, int table_index, int field_index,
+			      u32 *data, u32 len);
 
-dma_addr_t hfi1_map_page(struct pci_dev *, struct page *, unsigned long,
-			 size_t, int);
 const char *get_unit_name(int unit);
+const char *get_card_name(struct rvt_dev_info *rdi);
+struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi);
 
 /*
  * Flush write combining store buffers (if present) and perform a write
@@ -1659,7 +1739,7 @@ int process_receive_invalid(struct hfi1_packet *packet);
 
 extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8];
 
-void update_sge(struct hfi1_sge_state *ss, u32 length);
+void update_sge(struct rvt_sge_state *ss, u32 length);
 
 /* global module parameter variables */
 extern unsigned int hfi1_max_mtu;
@@ -1667,7 +1747,7 @@ extern unsigned int hfi1_cu;
 extern unsigned int user_credit_return_threshold;
 extern int num_user_contexts;
 extern unsigned n_krcvqs;
-extern u8 krcvqs[];
+extern uint krcvqs[];
 extern int krcvqsset;
 extern uint kdeth_qp;
 extern uint loopback;
@@ -1829,13 +1909,14 @@ static inline void hfi1_reset_cpu_counters(struct hfi1_devdata *dd)
 
 	dd->z_int_counter = get_all_cpu_total(dd->int_counter);
 	dd->z_rcv_limit = get_all_cpu_total(dd->rcv_limit);
+	dd->z_send_schedule = get_all_cpu_total(dd->send_schedule);
 
 	ppd = (struct hfi1_pportdata *)(dd + 1);
 	for (i = 0; i < dd->num_pports; i++, ppd++) {
-		ppd->ibport_data.z_rc_acks =
-			get_all_cpu_total(ppd->ibport_data.rc_acks);
-		ppd->ibport_data.z_rc_qacks =
-			get_all_cpu_total(ppd->ibport_data.rc_qacks);
+		ppd->ibport_data.rvp.z_rc_acks =
+			get_all_cpu_total(ppd->ibport_data.rvp.rc_acks);
+		ppd->ibport_data.rvp.z_rc_qacks =
+			get_all_cpu_total(ppd->ibport_data.rvp.rc_qacks);
 	}
 }
 
@@ -1848,6 +1929,18 @@ static inline void setextled(struct hfi1_devdata *dd, u32 on)
 		write_csr(dd, DCC_CFG_LED_CNTRL, 0x10);
 }
 
+/* return the i2c resource given the target */
+static inline u32 i2c_target(u32 target)
+{
+	return target ? CR_I2C2 : CR_I2C1;
+}
+
+/* return the i2c chain chip resource that this HFI uses for QSFP */
+static inline u32 qsfp_resource(struct hfi1_devdata *dd)
+{
+	return i2c_target(dd->hfi1_id);
+}
+
 int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
 
 #endif                          /* _HFI1_KERNEL_H */
diff --git a/drivers/staging/rdma/hfi1/init.c b/drivers/staging/rdma/hfi1/init.c
index 02df291eb172..cfcdc16b41c3 100644
--- a/drivers/staging/rdma/hfi1/init.c
+++ b/drivers/staging/rdma/hfi1/init.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -56,6 +53,7 @@
 #include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/hrtimer.h>
+#include <rdma/rdma_vt.h>
 
 #include "hfi.h"
 #include "device.h"
@@ -65,6 +63,7 @@
 #include "sdma.h"
 #include "debugfs.h"
 #include "verbs.h"
+#include "aspm.h"
 
 #undef pr_fmt
 #define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -75,6 +74,7 @@
 #define HFI1_MIN_USER_CTXT_BUFCNT 7
 
 #define HFI1_MIN_HDRQ_EGRBUF_CNT 2
+#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
 #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
 #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
 
@@ -87,9 +87,9 @@ module_param_named(num_user_contexts, num_user_contexts, uint, S_IRUGO);
 MODULE_PARM_DESC(
 	num_user_contexts, "Set max number of user contexts to use");
 
-u8 krcvqs[RXE_NUM_DATA_VL];
+uint krcvqs[RXE_NUM_DATA_VL];
 int krcvqsset;
-module_param_array(krcvqs, byte, &krcvqsset, S_IRUGO);
+module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO);
 MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
 
 /* computed based on above array */
@@ -128,16 +128,12 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 {
 	unsigned i;
 	int ret;
-	int local_node_id = pcibus_to_node(dd->pcidev->bus);
 
 	/* Control context has to be always 0 */
 	BUILD_BUG_ON(HFI1_CTRL_CTXT != 0);
 
-	if (local_node_id < 0)
-		local_node_id = numa_node_id();
-	dd->assigned_node_id = local_node_id;
-
-	dd->rcd = kcalloc(dd->num_rcv_contexts, sizeof(*dd->rcd), GFP_KERNEL);
+	dd->rcd = kzalloc_node(dd->num_rcv_contexts * sizeof(*dd->rcd),
+			       GFP_KERNEL, dd->node);
 	if (!dd->rcd)
 		goto nomem;
 
@@ -147,10 +143,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 		struct hfi1_ctxtdata *rcd;
 
 		ppd = dd->pport + (i % dd->num_pports);
-		rcd = hfi1_create_ctxtdata(ppd, i);
+		rcd = hfi1_create_ctxtdata(ppd, i, dd->node);
 		if (!rcd) {
 			dd_dev_err(dd,
-				"Unable to allocate kernel receive context, failing\n");
+				   "Unable to allocate kernel receive context, failing\n");
 			goto nomem;
 		}
 		/*
@@ -171,7 +167,7 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 		rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
 		if (!rcd->sc) {
 			dd_dev_err(dd,
-				"Unable to allocate kernel send context, failing\n");
+				   "Unable to allocate kernel send context, failing\n");
 			dd->rcd[rcd->ctxt] = NULL;
 			hfi1_free_ctxtdata(dd, rcd);
 			goto nomem;
@@ -189,6 +185,12 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 		}
 	}
 
+	/*
+	 * Initialize aspm, to be done after gen3 transition and setting up
+	 * contexts and before enabling interrupts
+	 */
+	aspm_init(dd);
+
 	return 0;
 nomem:
 	ret = -ENOMEM;
@@ -201,7 +203,8 @@ bail:
 /*
  * Common code for user and kernel context setup.
  */
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
+struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
+					   int numa)
 {
 	struct hfi1_devdata *dd = ppd->dd;
 	struct hfi1_ctxtdata *rcd;
@@ -224,10 +227,10 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
 		rcd->cnt = 1;
 		rcd->ctxt = ctxt;
 		dd->rcd[ctxt] = rcd;
-		rcd->numa_id = numa_node_id();
+		rcd->numa_id = numa;
 		rcd->rcv_array_groups = dd->rcv_entries.ngroups;
 
-		spin_lock_init(&rcd->exp_lock);
+		mutex_init(&rcd->exp_lock);
 
 		/*
 		 * Calculate the context's RcvArray entry starting point.
@@ -260,7 +263,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt)
 		/* Validate and initialize Rcv Hdr Q variables */
 		if (rcvhdrcnt % HDRQ_INCREMENT) {
 			dd_dev_err(dd,
-				   "ctxt%u: header queue count %d must be divisible by %d\n",
+				   "ctxt%u: header queue count %d must be divisible by %lu\n",
 				   rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT);
 			goto bail;
 		}
@@ -379,7 +382,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd)
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL)
+	if (!cc_state)
 		/*
 		 * This should _never_ happen - rcu_read_lock() is held,
 		 * and set_link_ipg() should not be called if cc_state
@@ -431,7 +434,7 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return HRTIMER_NORESTART;
 	}
@@ -493,14 +496,19 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
 	INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
 	INIT_WORK(&ppd->link_up_work, handle_link_up);
 	INIT_WORK(&ppd->link_down_work, handle_link_down);
+	INIT_WORK(&ppd->dc_host_req_work, handle_8051_request);
 	INIT_WORK(&ppd->freeze_work, handle_freeze);
 	INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
 	INIT_WORK(&ppd->sma_message_work, handle_sma_message);
 	INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
+	INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
+	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
+
 	mutex_init(&ppd->hls_lock);
 	spin_lock_init(&ppd->sdma_alllock);
 	spin_lock_init(&ppd->qsfp_info.qsfp_lock);
 
+	ppd->qsfp_info.ppd = ppd;
 	ppd->sm_trap_qp = 0x0;
 	ppd->sa_qp = 0x1;
 
@@ -582,8 +590,8 @@ static void enable_chip(struct hfi1_devdata *dd)
 	 * Enable kernel ctxts' receive and receive interrupt.
 	 * Other ctxts done as user opens and initializes them.
 	 */
-	rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
 	for (i = 0; i < dd->first_user_ctxt; ++i) {
+		rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
 		rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
 			HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
 		if (!HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, MULTI_PKT_EGR))
@@ -729,7 +737,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
 			lastfail = hfi1_setup_eagerbufs(rcd);
 		if (lastfail)
 			dd_dev_err(dd,
-				"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
+				   "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
 	}
 	if (lastfail)
 		ret = lastfail;
@@ -762,7 +770,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
 	/* enable chip even if we have an error, so we can debug cause */
 	enable_chip(dd);
 
-	ret = hfi1_cq_init(dd);
 done:
 	/*
 	 * Set status even if port serdes is not initialized
@@ -779,20 +786,15 @@ done:
 		for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 			ppd = dd->pport + pidx;
 
-			/* initialize the qsfp if it exists
-			 * Requires interrupts to be enabled so we are notified
-			 * when the QSFP completes reset, and has
-			 * to be done before bringing up the SERDES
+			/*
+			 * start the serdes - must be after interrupts are
+			 * enabled so we are notified when the link goes up
 			 */
-			init_qsfp(ppd);
-
-			/* start the serdes - must be after interrupts are
-			   enabled so we are notified when the link goes up */
 			lastfail = bringup_serdes(ppd);
 			if (lastfail)
 				dd_dev_info(dd,
-					"Failed to bring up port %u\n",
-					ppd->port);
+					    "Failed to bring up port %u\n",
+					    ppd->port);
 
 			/*
 			 * Set status even if port serdes is not initialized
@@ -904,6 +906,8 @@ static void shutdown_device(struct hfi1_devdata *dd)
 		/* disable the send device */
 		pio_send_control(dd, PSC_GLOBAL_DISABLE);
 
+		shutdown_led_override(ppd);
+
 		/*
 		 * Clear SerdesEnable.
 		 * We can't count on interrupts since we are stopping.
@@ -961,17 +965,33 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 	kfree(rcd->egrbufs.buffers);
 
 	sc_free(rcd->sc);
-	vfree(rcd->physshadow);
-	vfree(rcd->tid_pg_list);
 	vfree(rcd->user_event_mask);
 	vfree(rcd->subctxt_uregbase);
 	vfree(rcd->subctxt_rcvegrbuf);
 	vfree(rcd->subctxt_rcvhdr_base);
-	kfree(rcd->tidusemap);
 	kfree(rcd->opstats);
 	kfree(rcd);
 }
 
+/*
+ * Release our hold on the shared asic data.  If we are the last one,
+ * free the structure.  Must be holding hfi1_devs_lock.
+ */
+static void release_asic_data(struct hfi1_devdata *dd)
+{
+	int other;
+
+	if (!dd->asic_data)
+		return;
+	dd->asic_data->dds[dd->hfi1_id] = NULL;
+	other = dd->hfi1_id ? 0 : 1;
+	if (!dd->asic_data->dds[other]) {
+		/* we are the last holder, free it */
+		kfree(dd->asic_data);
+	}
+	dd->asic_data = NULL;
+}
+
 void hfi1_free_devdata(struct hfi1_devdata *dd)
 {
 	unsigned long flags;
@@ -979,12 +999,15 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 	idr_remove(&hfi1_unit_table, dd->unit);
 	list_del(&dd->list);
+	release_asic_data(dd);
 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-	hfi1_dbg_ibdev_exit(&dd->verbs_dev);
+	free_platform_config(dd);
 	rcu_barrier(); /* wait for rcu callbacks to complete */
 	free_percpu(dd->int_counter);
 	free_percpu(dd->rcv_limit);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	hfi1_dev_affinity_free(dd);
+	free_percpu(dd->send_schedule);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 }
 
 /*
@@ -999,19 +1022,19 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
 {
 	unsigned long flags;
 	struct hfi1_devdata *dd;
-	int ret;
+	int ret, nports;
 
-	dd = (struct hfi1_devdata *)ib_alloc_device(sizeof(*dd) + extra);
+	/* extra is * number of ports */
+	nports = extra / sizeof(struct hfi1_pportdata);
+
+	dd = (struct hfi1_devdata *)rvt_alloc_device(sizeof(*dd) + extra,
+						     nports);
 	if (!dd)
 		return ERR_PTR(-ENOMEM);
-	/* extra is * number of ports */
-	dd->num_pports = extra / sizeof(struct hfi1_pportdata);
+	dd->num_pports = nports;
 	dd->pport = (struct hfi1_pportdata *)(dd + 1);
 
 	INIT_LIST_HEAD(&dd->list);
-	dd->node = dev_to_node(&pdev->dev);
-	if (dd->node < 0)
-		dd->node = 0;
 	idr_preload(GFP_KERNEL);
 	spin_lock_irqsave(&hfi1_devs_lock, flags);
 
@@ -1041,9 +1064,9 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
 	spin_lock_init(&dd->sc_init_lock);
 	spin_lock_init(&dd->dc8051_lock);
 	spin_lock_init(&dd->dc8051_memlock);
-	mutex_init(&dd->qsfp_i2c_mutex);
 	seqlock_init(&dd->sc2vl_lock);
 	spin_lock_init(&dd->sde_map_lock);
+	spin_lock_init(&dd->pio_map_lock);
 	init_waitqueue_head(&dd->event_queue);
 
 	dd->int_counter = alloc_percpu(u64);
@@ -1062,6 +1085,14 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
 		goto bail;
 	}
 
+	dd->send_schedule = alloc_percpu(u64);
+	if (!dd->send_schedule) {
+		ret = -ENOMEM;
+		hfi1_early_err(&pdev->dev,
+			       "Could not allocate per-cpu int_counter\n");
+		goto bail;
+	}
+
 	if (!hfi1_cpulist_count) {
 		u32 count = num_online_cpus();
 
@@ -1074,13 +1105,12 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
 			&pdev->dev,
 			"Could not alloc cpulist info, cpu affinity might be wrong\n");
 	}
-	hfi1_dbg_ibdev_init(&dd->verbs_dev);
 	return dd;
 
 bail:
 	if (!list_empty(&dd->list))
 		list_del_init(&dd->list);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 	return ERR_PTR(ret);
 }
 
@@ -1173,8 +1203,10 @@ static int __init hfi1_mod_init(void)
 		user_credit_return_threshold = 100;
 
 	compute_krcvqs();
-	/* sanitize receive interrupt count, time must wait until after
-	   the hardware type is known */
+	/*
+	 * sanitize receive interrupt count, time must wait until after
+	 * the hardware type is known
+	 */
 	if (rcv_intr_count > RCV_HDR_HEAD_COUNTER_MASK)
 		rcv_intr_count = RCV_HDR_HEAD_COUNTER_MASK;
 	/* reject invalid combinations */
@@ -1209,6 +1241,9 @@ static int __init hfi1_mod_init(void)
 	idr_init(&hfi1_unit_table);
 
 	hfi1_dbg_init();
+	ret = hfi1_wss_init();
+	if (ret < 0)
+		goto bail_wss;
 	ret = pci_register_driver(&hfi1_pci_driver);
 	if (ret < 0) {
 		pr_err("Unable to register driver: error %d\n", -ret);
@@ -1217,6 +1252,8 @@ static int __init hfi1_mod_init(void)
 	goto bail; /* all OK */
 
 bail_dev:
+	hfi1_wss_exit();
+bail_wss:
 	hfi1_dbg_exit();
 	idr_destroy(&hfi1_unit_table);
 	dev_cleanup();
@@ -1232,6 +1269,7 @@ module_init(hfi1_mod_init);
 static void __exit hfi1_mod_cleanup(void)
 {
 	pci_unregister_driver(&hfi1_pci_driver);
+	hfi1_wss_exit();
 	hfi1_dbg_exit();
 	hfi1_cpulist_count = 0;
 	kfree(hfi1_cpulist);
@@ -1303,16 +1341,18 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
 		}
 	}
 	kfree(tmp);
+	free_pio_map(dd);
 	/* must follow rcv context free - need to remove rcv's hooks */
 	for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++)
 		sc_free(dd->send_contexts[ctxt].sc);
 	dd->num_send_contexts = 0;
 	kfree(dd->send_contexts);
 	dd->send_contexts = NULL;
+	kfree(dd->hw_to_sw);
+	dd->hw_to_sw = NULL;
 	kfree(dd->boardname);
 	vfree(dd->events);
 	vfree(dd->status);
-	hfi1_cq_exit(dd);
 }
 
 /*
@@ -1346,6 +1386,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		ret = -EINVAL;
 		goto bail;
 	}
+	if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+		hfi1_early_err(&pdev->dev,
+			       "Receive header queue count cannot be greater than %u\n",
+			       HFI1_MAX_HDRQ_EGRBUF_CNT);
+		ret = -EINVAL;
+		goto bail;
+	}
 	/* use the encoding function as a sanitization check */
 	if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
 		hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
@@ -1422,8 +1469,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * we still create devices, so diags, etc. can be used
 	 * to determine cause of problem.
 	 */
-	if (!initfail && !ret)
+	if (!initfail && !ret) {
 		dd->flags |= HFI1_INITTED;
+		/* create debufs files after init and ib register */
+		hfi1_dbg_ibdev_init(&dd->verbs_dev);
+	}
 
 	j = hfi1_device_create(dd);
 	if (j)
@@ -1464,6 +1514,8 @@ static void remove_one(struct pci_dev *pdev)
 {
 	struct hfi1_devdata *dd = pci_get_drvdata(pdev);
 
+	/* close debugfs files before ib unregister */
+	hfi1_dbg_ibdev_exit(&dd->verbs_dev);
 	/* unregister from IB core */
 	hfi1_unregister_ib_device(dd);
 
@@ -1516,18 +1568,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 
 		if (!rcd->rcvhdrq) {
 			dd_dev_err(dd,
-				"attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
-				amt, rcd->ctxt);
+				   "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n",
+				   amt, rcd->ctxt);
 			goto bail;
 		}
 
-		/* Event mask is per device now and is in hfi1_devdata */
-		/*if (rcd->ctxt >= dd->first_user_ctxt) {
-			rcd->user_event_mask = vmalloc_user(PAGE_SIZE);
-			if (!rcd->user_event_mask)
-				goto bail_free_hdrq;
-				}*/
-
 		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
 			rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
 				&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
@@ -1568,8 +1613,8 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 
 bail_free:
 	dd_dev_err(dd,
-		"attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
-		rcd->ctxt);
+		   "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
+		   rcd->ctxt);
 	vfree(rcd->user_event_mask);
 	rcd->user_event_mask = NULL;
 	dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
@@ -1659,7 +1704,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 			if (rcd->egrbufs.rcvtid_size == round_mtu ||
 			    !HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
 				dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
-					rcd->ctxt);
+					   rcd->ctxt);
 				goto bail_rcvegrbuf_phys;
 			}
 
@@ -1694,8 +1739,9 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 				     rcd->egrbufs.buffers[j].len)) {
 					j++;
 					offset = 0;
-				} else
+				} else {
 					offset += new_size;
+				}
 			}
 			rcd->egrbufs.rcvtid_size = new_size;
 		}
@@ -1708,7 +1754,6 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 		  rcd->ctxt, rcd->egrbufs.alloced, rcd->egrbufs.rcvtid_size,
 		  rcd->egrbufs.size);
 
-
 	/*
 	 * Set the contexts rcv array head update threshold to the closest
 	 * power of 2 (so we can use a mask instead of modulo) below half
@@ -1742,14 +1787,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 
 	for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
 		hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
-			      rcd->egrbufs.rcvtids[idx].phys, order);
+			     rcd->egrbufs.rcvtids[idx].phys, order);
 		cond_resched();
 	}
 	goto bail;
 
 bail_rcvegrbuf_phys:
 	for (idx = 0; idx < rcd->egrbufs.alloced &&
-		     rcd->egrbufs.buffers[idx].addr;
+	     rcd->egrbufs.buffers[idx].addr;
 	     idx++) {
 		dma_free_coherent(&dd->pcidev->dev,
 				  rcd->egrbufs.buffers[idx].len,
diff --git a/drivers/staging/rdma/hfi1/intr.c b/drivers/staging/rdma/hfi1/intr.c
index 426582b9ab65..65348d16ab2f 100644
--- a/drivers/staging/rdma/hfi1/intr.c
+++ b/drivers/staging/rdma/hfi1/intr.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -98,7 +95,7 @@ static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev)
 	 */
 	if (!(dd->flags & HFI1_INITTED))
 		return;
-	event.device = &dd->verbs_dev.ibdev;
+	event.device = &dd->verbs_dev.rdi.ibdev;
 	event.element.port_num = ppd->port;
 	event.event = ev;
 	ib_dispatch_event(&event);
@@ -131,28 +128,26 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
 		 * NOTE: This uses this device's vAU, vCU, and vl15_init for
 		 * the remote values.  Both sides must be using the values.
 		 */
-		if (quick_linkup
-			    || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
+		if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
 			set_up_vl15(dd, dd->vau, dd->vl15_init);
 			assign_remote_cm_au_table(dd, dd->vcu);
 			ppd->neighbor_guid =
-				read_csr(dd,
-					DC_DC8051_STS_REMOTE_GUID);
+				read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
 			ppd->neighbor_type =
 				read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
 					DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
 			ppd->neighbor_port_number =
 				read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
-					DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
-			dd_dev_info(dd,
-				"Neighbor GUID: %llx Neighbor type %d\n",
-				ppd->neighbor_guid,
-				ppd->neighbor_type);
+					 DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
+			dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n",
+				    ppd->neighbor_guid,
+				    ppd->neighbor_type);
 		}
 
 		/* physical link went up */
 		ppd->linkup = 1;
-		ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
 
 		/* link widths are not available until the link is fully up */
 		get_linkup_link_widths(ppd);
@@ -165,7 +160,7 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
 		reset_link_credits(dd);
 
 		/* freeze after a link down to guarantee a clean egress */
-		start_freeze_handling(ppd, FREEZE_SELF|FREEZE_LINK_DOWN);
+		start_freeze_handling(ppd, FREEZE_SELF | FREEZE_LINK_DOWN);
 
 		ev = IB_EVENT_PORT_ERR;
 
@@ -177,8 +172,6 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
 		/* notify IB of the link change */
 		signal_ib_event(ppd, ev);
 	}
-
-
 }
 
 /*
diff --git a/drivers/staging/rdma/hfi1/iowait.h b/drivers/staging/rdma/hfi1/iowait.h
index e8ba5606d08d..2ec6ef38d389 100644
--- a/drivers/staging/rdma/hfi1/iowait.h
+++ b/drivers/staging/rdma/hfi1/iowait.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_IOWAIT_H
 #define _HFI1_IOWAIT_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -54,6 +51,8 @@
 #include <linux/workqueue.h>
 #include <linux/sched.h>
 
+#include "sdma_txreq.h"
+
 /*
  * typedef (*restart_t)() - restart callback
  * @work: pointer to work structure
@@ -67,9 +66,11 @@ struct sdma_engine;
  * @list: used to add/insert into QP/PQ wait lists
  * @tx_head: overflow list of sdma_txreq's
  * @sleep: no space callback
- * @wakeup: space callback
+ * @wakeup: space callback wakeup
+ * @sdma_drained: sdma count drained
  * @iowork: workqueue overhead
  * @wait_dma: wait for sdma_busy == 0
+ * @wait_pio: wait for pio_busy == 0
  * @sdma_busy: # of packets in flight
  * @count: total number of descriptors in tx_head'ed list
  * @tx_limit: limit for overflow queuing
@@ -101,9 +102,12 @@ struct iowait {
 		struct sdma_txreq *tx,
 		unsigned seq);
 	void (*wakeup)(struct iowait *wait, int reason);
+	void (*sdma_drained)(struct iowait *wait);
 	struct work_struct iowork;
 	wait_queue_head_t wait_dma;
+	wait_queue_head_t wait_pio;
 	atomic_t sdma_busy;
+	atomic_t pio_busy;
 	u32 count;
 	u32 tx_limit;
 	u32 tx_count;
@@ -117,7 +121,7 @@ struct iowait {
  * @tx_limit: limit for overflow queuing
  * @func: restart function for workqueue
  * @sleep: sleep function for no space
- * @wakeup: wakeup function for no space
+ * @resume: wakeup function for no space
  *
  * This function initializes the iowait
  * structure embedded in the QP or PQ.
@@ -133,17 +137,21 @@ static inline void iowait_init(
 		struct iowait *wait,
 		struct sdma_txreq *tx,
 		unsigned seq),
-	void (*wakeup)(struct iowait *wait, int reason))
+	void (*wakeup)(struct iowait *wait, int reason),
+	void (*sdma_drained)(struct iowait *wait))
 {
 	wait->count = 0;
 	INIT_LIST_HEAD(&wait->list);
 	INIT_LIST_HEAD(&wait->tx_head);
 	INIT_WORK(&wait->iowork, func);
 	init_waitqueue_head(&wait->wait_dma);
+	init_waitqueue_head(&wait->wait_pio);
 	atomic_set(&wait->sdma_busy, 0);
+	atomic_set(&wait->pio_busy, 0);
 	wait->tx_limit = tx_limit;
 	wait->sleep = sleep;
 	wait->wakeup = wakeup;
+	wait->sdma_drained = sdma_drained;
 }
 
 /**
@@ -174,6 +182,88 @@ static inline void iowait_sdma_drain(struct iowait *wait)
 }
 
 /**
+ * iowait_sdma_pending() - return sdma pending count
+ *
+ * @wait: iowait structure
+ *
+ */
+static inline int iowait_sdma_pending(struct iowait *wait)
+{
+	return atomic_read(&wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_inc - note sdma io pending
+ * @wait: iowait structure
+ */
+static inline void iowait_sdma_inc(struct iowait *wait)
+{
+	atomic_inc(&wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_add - add count to pending
+ * @wait: iowait structure
+ */
+static inline void iowait_sdma_add(struct iowait *wait, int count)
+{
+	atomic_add(count, &wait->sdma_busy);
+}
+
+/**
+ * iowait_sdma_dec - note sdma complete
+ * @wait: iowait structure
+ */
+static inline int iowait_sdma_dec(struct iowait *wait)
+{
+	return atomic_dec_and_test(&wait->sdma_busy);
+}
+
+/**
+ * iowait_pio_drain() - wait for pios to drain
+ *
+ * @wait: iowait structure
+ *
+ * This will delay until the iowait pios have
+ * completed.
+ */
+static inline void iowait_pio_drain(struct iowait *wait)
+{
+	wait_event_timeout(wait->wait_pio,
+			   !atomic_read(&wait->pio_busy),
+			   HZ);
+}
+
+/**
+ * iowait_pio_pending() - return pio pending count
+ *
+ * @wait: iowait structure
+ *
+ */
+static inline int iowait_pio_pending(struct iowait *wait)
+{
+	return atomic_read(&wait->pio_busy);
+}
+
+/**
+ * iowait_pio_inc - note pio pending
+ * @wait: iowait structure
+ */
+static inline void iowait_pio_inc(struct iowait *wait)
+{
+	atomic_inc(&wait->pio_busy);
+}
+
+/**
+ * iowait_sdma_dec - note pio complete
+ * @wait: iowait structure
+ */
+static inline int iowait_pio_dec(struct iowait *wait)
+{
+	return atomic_dec_and_test(&wait->pio_busy);
+}
+
+/**
  * iowait_drain_wakeup() - trigger iowait_drain() waiter
  *
  * @wait: iowait structure
@@ -183,6 +273,28 @@ static inline void iowait_sdma_drain(struct iowait *wait)
 static inline void iowait_drain_wakeup(struct iowait *wait)
 {
 	wake_up(&wait->wait_dma);
+	wake_up(&wait->wait_pio);
+	if (wait->sdma_drained)
+		wait->sdma_drained(wait);
+}
+
+/**
+ * iowait_get_txhead() - get packet off of iowait list
+ *
+ * @wait wait struture
+ */
+static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
+{
+	struct sdma_txreq *tx = NULL;
+
+	if (!list_empty(&wait->tx_head)) {
+		tx = list_first_entry(
+			&wait->tx_head,
+			struct sdma_txreq,
+			list);
+		list_del_init(&tx->list);
+	}
+	return tx;
 }
 
 #endif
diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c
deleted file mode 100644
index e34f093a6b55..000000000000
--- a/drivers/staging/rdma/hfi1/keys.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include "hfi.h"
-
-/**
- * hfi1_alloc_lkey - allocate an lkey
- * @mr: memory region that this lkey protects
- * @dma_region: 0->normal key, 1->restricted DMA key
- *
- * Returns 0 if successful, otherwise returns -errno.
- *
- * Increments mr reference count as required.
- *
- * Sets the lkey field mr for non-dma regions.
- *
- */
-
-int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region)
-{
-	unsigned long flags;
-	u32 r;
-	u32 n;
-	int ret = 0;
-	struct hfi1_ibdev *dev = to_idev(mr->pd->device);
-	struct hfi1_lkey_table *rkt = &dev->lk_table;
-
-	hfi1_get_mr(mr);
-	spin_lock_irqsave(&rkt->lock, flags);
-
-	/* special case for dma_mr lkey == 0 */
-	if (dma_region) {
-		struct hfi1_mregion *tmr;
-
-		tmr = rcu_access_pointer(dev->dma_mr);
-		if (!tmr) {
-			rcu_assign_pointer(dev->dma_mr, mr);
-			mr->lkey_published = 1;
-		} else {
-			hfi1_put_mr(mr);
-		}
-		goto success;
-	}
-
-	/* Find the next available LKEY */
-	r = rkt->next;
-	n = r;
-	for (;;) {
-		if (!rcu_access_pointer(rkt->table[r]))
-			break;
-		r = (r + 1) & (rkt->max - 1);
-		if (r == n)
-			goto bail;
-	}
-	rkt->next = (r + 1) & (rkt->max - 1);
-	/*
-	 * Make sure lkey is never zero which is reserved to indicate an
-	 * unrestricted LKEY.
-	 */
-	rkt->gen++;
-	/*
-	 * bits are capped in verbs.c to ensure enough bits for
-	 * generation number
-	 */
-	mr->lkey = (r << (32 - hfi1_lkey_table_size)) |
-		((((1 << (24 - hfi1_lkey_table_size)) - 1) & rkt->gen)
-		 << 8);
-	if (mr->lkey == 0) {
-		mr->lkey = 1 << 8;
-		rkt->gen++;
-	}
-	rcu_assign_pointer(rkt->table[r], mr);
-	mr->lkey_published = 1;
-success:
-	spin_unlock_irqrestore(&rkt->lock, flags);
-out:
-	return ret;
-bail:
-	hfi1_put_mr(mr);
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	ret = -ENOMEM;
-	goto out;
-}
-
-/**
- * hfi1_free_lkey - free an lkey
- * @mr: mr to free from tables
- */
-void hfi1_free_lkey(struct hfi1_mregion *mr)
-{
-	unsigned long flags;
-	u32 lkey = mr->lkey;
-	u32 r;
-	struct hfi1_ibdev *dev = to_idev(mr->pd->device);
-	struct hfi1_lkey_table *rkt = &dev->lk_table;
-	int freed = 0;
-
-	spin_lock_irqsave(&rkt->lock, flags);
-	if (!mr->lkey_published)
-		goto out;
-	if (lkey == 0)
-		RCU_INIT_POINTER(dev->dma_mr, NULL);
-	else {
-		r = lkey >> (32 - hfi1_lkey_table_size);
-		RCU_INIT_POINTER(rkt->table[r], NULL);
-	}
-	mr->lkey_published = 0;
-	freed++;
-out:
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	if (freed) {
-		synchronize_rcu();
-		hfi1_put_mr(mr);
-	}
-}
-
-/**
- * hfi1_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @pd: protection domain
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * increments the reference count upon success
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd,
-		 struct hfi1_sge *isge, struct ib_sge *sge, int acc)
-{
-	struct hfi1_mregion *mr;
-	unsigned n, m;
-	size_t off;
-
-	/*
-	 * We use LKEY == zero for kernel virtual addresses
-	 * (see hfi1_get_dma_mr and dma.c).
-	 */
-	rcu_read_lock();
-	if (sge->lkey == 0) {
-		struct hfi1_ibdev *dev = to_idev(pd->ibpd.device);
-
-		if (pd->user)
-			goto bail;
-		mr = rcu_dereference(dev->dma_mr);
-		if (!mr)
-			goto bail;
-		atomic_inc(&mr->refcount);
-		rcu_read_unlock();
-
-		isge->mr = mr;
-		isge->vaddr = (void *) sge->addr;
-		isge->length = sge->length;
-		isge->sge_length = sge->length;
-		isge->m = 0;
-		isge->n = 0;
-		goto ok;
-	}
-	mr = rcu_dereference(
-		rkt->table[(sge->lkey >> (32 - hfi1_lkey_table_size))]);
-	if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
-		goto bail;
-
-	off = sge->addr - mr->user_base;
-	if (unlikely(sge->addr < mr->user_base ||
-		     off + sge->length > mr->length ||
-		     (mr->access_flags & acc) != acc))
-		goto bail;
-	atomic_inc(&mr->refcount);
-	rcu_read_unlock();
-
-	off += mr->offset;
-	if (mr->page_shift) {
-		/*
-		page sizes are uniform power of 2 so no loop is necessary
-		entries_spanned_by_off is the number of times the loop below
-		would have executed.
-		*/
-		size_t entries_spanned_by_off;
-
-		entries_spanned_by_off = off >> mr->page_shift;
-		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off / HFI1_SEGSZ;
-		n = entries_spanned_by_off % HFI1_SEGSZ;
-	} else {
-		m = 0;
-		n = 0;
-		while (off >= mr->map[m]->segs[n].length) {
-			off -= mr->map[m]->segs[n].length;
-			n++;
-			if (n >= HFI1_SEGSZ) {
-				m++;
-				n = 0;
-			}
-		}
-	}
-	isge->mr = mr;
-	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	isge->length = mr->map[m]->segs[n].length - off;
-	isge->sge_length = sge->length;
-	isge->m = m;
-	isge->n = n;
-ok:
-	return 1;
-bail:
-	rcu_read_unlock();
-	return 0;
-}
-
-/**
- * hfi1_rkey_ok - check the IB virtual address, length, and RKEY
- * @qp: qp for validation
- * @sge: SGE state
- * @len: length of data
- * @vaddr: virtual address to place data
- * @rkey: rkey to check
- * @acc: access flags
- *
- * Return 1 if successful, otherwise 0.
- *
- * increments the reference count upon success
- */
-int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge,
-		 u32 len, u64 vaddr, u32 rkey, int acc)
-{
-	struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-	struct hfi1_mregion *mr;
-	unsigned n, m;
-	size_t off;
-
-	/*
-	 * We use RKEY == zero for kernel virtual addresses
-	 * (see hfi1_get_dma_mr and dma.c).
-	 */
-	rcu_read_lock();
-	if (rkey == 0) {
-		struct hfi1_pd *pd = to_ipd(qp->ibqp.pd);
-		struct hfi1_ibdev *dev = to_idev(pd->ibpd.device);
-
-		if (pd->user)
-			goto bail;
-		mr = rcu_dereference(dev->dma_mr);
-		if (!mr)
-			goto bail;
-		atomic_inc(&mr->refcount);
-		rcu_read_unlock();
-
-		sge->mr = mr;
-		sge->vaddr = (void *) vaddr;
-		sge->length = len;
-		sge->sge_length = len;
-		sge->m = 0;
-		sge->n = 0;
-		goto ok;
-	}
-
-	mr = rcu_dereference(
-		rkt->table[(rkey >> (32 - hfi1_lkey_table_size))]);
-	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
-		goto bail;
-
-	off = vaddr - mr->iova;
-	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
-		     (mr->access_flags & acc) == 0))
-		goto bail;
-	atomic_inc(&mr->refcount);
-	rcu_read_unlock();
-
-	off += mr->offset;
-	if (mr->page_shift) {
-		/*
-		page sizes are uniform power of 2 so no loop is necessary
-		entries_spanned_by_off is the number of times the loop below
-		would have executed.
-		*/
-		size_t entries_spanned_by_off;
-
-		entries_spanned_by_off = off >> mr->page_shift;
-		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off / HFI1_SEGSZ;
-		n = entries_spanned_by_off % HFI1_SEGSZ;
-	} else {
-		m = 0;
-		n = 0;
-		while (off >= mr->map[m]->segs[n].length) {
-			off -= mr->map[m]->segs[n].length;
-			n++;
-			if (n >= HFI1_SEGSZ) {
-				m++;
-				n = 0;
-			}
-		}
-	}
-	sge->mr = mr;
-	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	sge->length = mr->map[m]->segs[n].length - off;
-	sge->sge_length = len;
-	sge->m = m;
-	sge->n = n;
-ok:
-	return 1;
-bail:
-	rcu_read_unlock();
-	return 0;
-}
diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c
index 77700b818e3d..d1e7f4d7cf6f 100644
--- a/drivers/staging/rdma/hfi1/mad.c
+++ b/drivers/staging/rdma/hfi1/mad.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -55,6 +52,7 @@
 #include "hfi.h"
 #include "mad.h"
 #include "trace.h"
+#include "qp.h"
 
 /* the reset value from the FM is supposed to be 0xffff, handle both */
 #define OPA_LINK_WIDTH_RESET_OLD 0x0fff
@@ -91,7 +89,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 	int pkey_idx;
 	u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
 
-	agent = ibp->send_agent;
+	agent = ibp->rvp.send_agent;
 	if (!agent)
 		return;
 
@@ -100,7 +98,8 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 		return;
 
 	/* o14-2 */
-	if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout))
+	if (ibp->rvp.trap_timeout && time_before(jiffies,
+						 ibp->rvp.trap_timeout))
 		return;
 
 	pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
@@ -121,42 +120,43 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
 	smp->class_version = OPA_SMI_CLASS_VERSION;
 	smp->method = IB_MGMT_METHOD_TRAP;
-	ibp->tid++;
-	smp->tid = cpu_to_be64(ibp->tid);
+	ibp->rvp.tid++;
+	smp->tid = cpu_to_be64(ibp->rvp.tid);
 	smp->attr_id = IB_SMP_ATTR_NOTICE;
 	/* o14-1: smp->mkey = 0; */
 	memcpy(smp->route.lid.data, data, len);
 
-	spin_lock_irqsave(&ibp->lock, flags);
-	if (!ibp->sm_ah) {
-		if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	if (!ibp->rvp.sm_ah) {
+		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
 			struct ib_ah *ah;
 
-			ah = hfi1_create_qp0_ah(ibp, ibp->sm_lid);
-			if (IS_ERR(ah))
+			ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
+			if (IS_ERR(ah)) {
 				ret = PTR_ERR(ah);
-			else {
+			} else {
 				send_buf->ah = ah;
-				ibp->sm_ah = to_iah(ah);
+				ibp->rvp.sm_ah = ibah_to_rvtah(ah);
 				ret = 0;
 			}
-		} else
+		} else {
 			ret = -EINVAL;
+		}
 	} else {
-		send_buf->ah = &ibp->sm_ah->ibah;
+		send_buf->ah = &ibp->rvp.sm_ah->ibah;
 		ret = 0;
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 	if (!ret)
 		ret = ib_post_send_mad(send_buf, NULL);
 	if (!ret) {
 		/* 4.096 usec. */
-		timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000;
-		ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout);
+		timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
+		ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
 	} else {
 		ib_free_send_mad(send_buf);
-		ibp->trap_timeout = 0;
+		ibp->rvp.trap_timeout = 0;
 	}
 }
 
@@ -174,10 +174,10 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
 	memset(&data, 0, sizeof(data));
 
 	if (trap_num == OPA_TRAP_BAD_P_KEY)
-		ibp->pkey_violations++;
+		ibp->rvp.pkey_violations++;
 	else
-		ibp->qkey_violations++;
-	ibp->n_pkt_drops++;
+		ibp->rvp.qkey_violations++;
+	ibp->rvp.n_pkt_drops++;
 
 	/* Send violation trap */
 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
@@ -233,9 +233,12 @@ static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
 /*
  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
  */
-void hfi1_cap_mask_chg(struct hfi1_ibport *ibp)
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
 {
 	struct opa_mad_notice_attr data;
+	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+	struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
 	u32 lid = ppd_from_ibp(ibp)->lid;
 
 	memset(&data, 0, sizeof(data));
@@ -245,7 +248,7 @@ void hfi1_cap_mask_chg(struct hfi1_ibport *ibp)
 	data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
 	data.issuer_lid = cpu_to_be32(lid);
 	data.ntc_144.lid = data.issuer_lid;
-	data.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags);
+	data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
 
 	send_trap(ibp, &data, sizeof(data));
 }
@@ -407,37 +410,38 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
 	int ret = 0;
 
 	/* Is the mkey in the process of expiring? */
-	if (ibp->mkey_lease_timeout &&
-	    time_after_eq(jiffies, ibp->mkey_lease_timeout)) {
+	if (ibp->rvp.mkey_lease_timeout &&
+	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
 		/* Clear timeout and mkey protection field. */
-		ibp->mkey_lease_timeout = 0;
-		ibp->mkeyprot = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
+		ibp->rvp.mkeyprot = 0;
 	}
 
-	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->mkey == 0 ||
-	    ibp->mkey == mkey)
+	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
+	    ibp->rvp.mkey == mkey)
 		valid_mkey = 1;
 
 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
-	if (valid_mkey && ibp->mkey_lease_timeout &&
+	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
 	    (mad->method == IB_MGMT_METHOD_GET ||
 	     mad->method == IB_MGMT_METHOD_SET ||
 	     mad->method == IB_MGMT_METHOD_TRAP_REPRESS))
-		ibp->mkey_lease_timeout = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
 
 	if (!valid_mkey) {
 		switch (mad->method) {
 		case IB_MGMT_METHOD_GET:
 			/* Bad mkey not a violation below level 2 */
-			if (ibp->mkeyprot < 2)
+			if (ibp->rvp.mkeyprot < 2)
 				break;
 		case IB_MGMT_METHOD_SET:
 		case IB_MGMT_METHOD_TRAP_REPRESS:
-			if (ibp->mkey_violations != 0xFFFF)
-				++ibp->mkey_violations;
-			if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
-				ibp->mkey_lease_timeout = jiffies +
-					ibp->mkey_lease_period * HZ;
+			if (ibp->rvp.mkey_violations != 0xFFFF)
+				++ibp->rvp.mkey_violations;
+			if (!ibp->rvp.mkey_lease_timeout &&
+			    ibp->rvp.mkey_lease_period)
+				ibp->rvp.mkey_lease_timeout = jiffies +
+					ibp->rvp.mkey_lease_period * HZ;
 			/* Generate a trap notice. */
 			bad_mkey(ibp, mad, mkey, dr_slid, return_path,
 				 hop_cnt);
@@ -501,16 +505,6 @@ void read_ltp_rtt(struct hfi1_devdata *dd)
 		write_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, reg);
 }
 
-static u8 __opa_porttype(struct hfi1_pportdata *ppd)
-{
-	if (qsfp_mod_present(ppd)) {
-		if (ppd->qsfp_info.cache_valid)
-			return OPA_PORT_TYPE_STANDARD;
-		return OPA_PORT_TYPE_DISCONNECTED;
-	}
-	return OPA_PORT_TYPE_UNKNOWN;
-}
-
 static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 				   struct ib_device *ibdev, u8 port,
 				   u32 *resp_len)
@@ -522,6 +516,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	struct opa_port_info *pi = (struct opa_port_info *)data;
 	u8 mtu;
 	u8 credit_rate;
+	u8 is_beaconing_active;
 	u32 state;
 	u32 num_ports = OPA_AM_NPORT(am);
 	u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -538,8 +533,8 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	ppd = dd->pport + (port - 1);
 	ibp = &ppd->ibport_data;
 
-	if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
-		ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
+	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
+	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -548,14 +543,14 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 
 	/* Only return the mkey if the protection field allows it. */
 	if (!(smp->method == IB_MGMT_METHOD_GET &&
-	      ibp->mkey != smp->mkey &&
-	      ibp->mkeyprot == 1))
-		pi->mkey = ibp->mkey;
-
-	pi->subnet_prefix = ibp->gid_prefix;
-	pi->sm_lid = cpu_to_be32(ibp->sm_lid);
-	pi->ib_cap_mask = cpu_to_be32(ibp->port_cap_flags);
-	pi->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period);
+	      ibp->rvp.mkey != smp->mkey &&
+	      ibp->rvp.mkeyprot == 1))
+		pi->mkey = ibp->rvp.mkey;
+
+	pi->subnet_prefix = ibp->rvp.gid_prefix;
+	pi->sm_lid = cpu_to_be32(ibp->rvp.sm_lid);
+	pi->ib_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
+	pi->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
 	pi->sm_trap_qp = cpu_to_be32(ppd->sm_trap_qp);
 	pi->sa_qp = cpu_to_be32(ppd->sa_qp);
 
@@ -581,38 +576,45 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	if (start_of_sm_config && (state == IB_PORT_INIT))
 		ppd->is_sm_config_started = 1;
 
-	pi->port_phys_conf = __opa_porttype(ppd) & 0xf;
+	pi->port_phys_conf = (ppd->port_type & 0xf);
 
 #if PI_LED_ENABLE_SUP
 	pi->port_states.ledenable_offlinereason = ppd->neighbor_normal << 4;
 	pi->port_states.ledenable_offlinereason |=
 		ppd->is_sm_config_started << 5;
+	/*
+	 * This pairs with the memory barrier in hfi1_start_led_override to
+	 * ensure that we read the correct state of LED beaconing represented
+	 * by led_override_timer_active
+	 */
+	smp_rmb();
+	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
+	pi->port_states.ledenable_offlinereason |= is_beaconing_active << 6;
 	pi->port_states.ledenable_offlinereason |=
-		ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON;
+		ppd->offline_disabled_reason;
 #else
 	pi->port_states.offline_reason = ppd->neighbor_normal << 4;
 	pi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
-	pi->port_states.offline_reason |= ppd->offline_disabled_reason &
-						OPA_PI_MASK_OFFLINE_REASON;
+	pi->port_states.offline_reason |= ppd->offline_disabled_reason;
 #endif /* PI_LED_ENABLE_SUP */
 
 	pi->port_states.portphysstate_portstate =
 		(hfi1_ibphys_portstate(ppd) << 4) | state;
 
-	pi->mkeyprotect_lmc = (ibp->mkeyprot << 6) | ppd->lmc;
+	pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
 
 	memset(pi->neigh_mtu.pvlx_to_mtu, 0, sizeof(pi->neigh_mtu.pvlx_to_mtu));
 	for (i = 0; i < ppd->vls_supported; i++) {
 		mtu = mtu_to_enum(dd->vld[i].mtu, HFI1_DEFAULT_ACTIVE_MTU);
 		if ((i % 2) == 0)
-			pi->neigh_mtu.pvlx_to_mtu[i/2] |= (mtu << 4);
+			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= (mtu << 4);
 		else
-			pi->neigh_mtu.pvlx_to_mtu[i/2] |= mtu;
+			pi->neigh_mtu.pvlx_to_mtu[i / 2] |= mtu;
 	}
 	/* don't forget VL 15 */
 	mtu = mtu_to_enum(dd->vld[15].mtu, 2048);
-	pi->neigh_mtu.pvlx_to_mtu[15/2] |= mtu;
-	pi->smsl = ibp->sm_sl & OPA_PI_MASK_SMSL;
+	pi->neigh_mtu.pvlx_to_mtu[15 / 2] |= mtu;
+	pi->smsl = ibp->rvp.sm_sl & OPA_PI_MASK_SMSL;
 	pi->operational_vls = hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS);
 	pi->partenforce_filterraw |=
 		(ppd->linkinit_reason & OPA_PI_MASK_LINKINIT_REASON);
@@ -620,17 +622,17 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_IN;
 	if (ppd->part_enforce & HFI1_PART_ENFORCE_OUT)
 		pi->partenforce_filterraw |= OPA_PI_MASK_PARTITION_ENFORCE_OUT;
-	pi->mkey_violations = cpu_to_be16(ibp->mkey_violations);
+	pi->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
 	/* P_KeyViolations are counted by hardware. */
-	pi->pkey_violations = cpu_to_be16(ibp->pkey_violations);
-	pi->qkey_violations = cpu_to_be16(ibp->qkey_violations);
+	pi->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
+	pi->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
 
 	pi->vl.cap = ppd->vls_supported;
-	pi->vl.high_limit = cpu_to_be16(ibp->vl_high_limit);
+	pi->vl.high_limit = cpu_to_be16(ibp->rvp.vl_high_limit);
 	pi->vl.arb_high_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_CAP);
 	pi->vl.arb_low_cap = (u8)hfi1_get_ib_cfg(ppd, HFI1_IB_CFG_VL_LOW_CAP);
 
-	pi->clientrereg_subnettimeout = ibp->subnet_timeout;
+	pi->clientrereg_subnettimeout = ibp->rvp.subnet_timeout;
 
 	pi->port_link_mode  = cpu_to_be16(OPA_PORT_LINK_MODE_OPA << 10 |
 					  OPA_PORT_LINK_MODE_OPA << 5 |
@@ -701,8 +703,10 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	/* read the cached value of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
 	read_lcb_cache(DC_LCB_STS_ROUND_TRIP_LTP_CNT, &tmp);
 
-	/* this counter is 16 bits wide, but the replay_depth.wire
-	 * variable is only 8 bits */
+	/*
+	 * this counter is 16 bits wide, but the replay_depth.wire
+	 * variable is only 8 bits
+	 */
 	if (tmp > 0xff)
 		tmp = 0xff;
 	pi->replay_depth.wire = tmp;
@@ -749,7 +753,7 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	n_blocks_avail = (u16) (npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
 
 	size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
 
@@ -763,7 +767,7 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	p = (__be16 *) data;
+	p = (__be16 *)data;
 	q = (u16 *)data;
 	/* get the real pkeys if we are requesting the first block */
 	if (start_block == 0) {
@@ -772,9 +776,9 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 			p[i] = cpu_to_be16(q[i]);
 		if (resp_len)
 			*resp_len += size;
-	} else
+	} else {
 		smp->status |= IB_SMP_INVALID_FIELD;
-
+	}
 	return reply((struct ib_mad_hdr *)smp);
 }
 
@@ -901,8 +905,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
 	u32 logical_old = driver_logical_state(ppd);
 	int ret, logical_allowed, physical_allowed;
 
-	logical_allowed = ret =
-		logical_transition_allowed(logical_old, logical_new);
+	ret = logical_transition_allowed(logical_old, logical_new);
+	logical_allowed = ret;
 
 	if (ret == HFI_TRANSITION_DISALLOWED ||
 	    ret == HFI_TRANSITION_UNDEFINED) {
@@ -912,8 +916,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
 		return ret;
 	}
 
-	physical_allowed = ret =
-		physical_transition_allowed(physical_old, physical_new);
+	ret = physical_transition_allowed(physical_old, physical_new);
+	physical_allowed = ret;
 
 	if (ret == HFI_TRANSITION_DISALLOWED ||
 	    ret == HFI_TRANSITION_UNDEFINED) {
@@ -928,6 +932,14 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
 		return HFI_TRANSITION_IGNORED;
 
 	/*
+	 * A change request of Physical Port State from
+	 * 'Offline' to 'Polling' should be ignored.
+	 */
+	if ((physical_old == OPA_PORTPHYSSTATE_OFFLINE) &&
+	    (physical_new == IB_PORTPHYSSTATE_POLLING))
+		return HFI_TRANSITION_IGNORED;
+
+	/*
 	 * Either physical_allowed or logical_allowed is
 	 * HFI_TRANSITION_ALLOWED.
 	 */
@@ -972,16 +984,15 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
 			break;
 		/* FALLTHROUGH */
 	case IB_PORT_DOWN:
-		if (phys_state == IB_PORTPHYSSTATE_NOP)
+		if (phys_state == IB_PORTPHYSSTATE_NOP) {
 			link_state = HLS_DN_DOWNDEF;
-		else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
+		} else if (phys_state == IB_PORTPHYSSTATE_POLLING) {
 			link_state = HLS_DN_POLL;
-			set_link_down_reason(ppd,
-			     OPA_LINKDOWN_REASON_FM_BOUNCE, 0,
-			     OPA_LINKDOWN_REASON_FM_BOUNCE);
-		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED)
+			set_link_down_reason(ppd, OPA_LINKDOWN_REASON_FM_BOUNCE,
+					     0, OPA_LINKDOWN_REASON_FM_BOUNCE);
+		} else if (phys_state == IB_PORTPHYSSTATE_DISABLED) {
 			link_state = HLS_DN_DISABLE;
-		else {
+		} else {
 			pr_warn("SubnSet(OPA_PortInfo) invalid physical state 0x%x\n",
 				phys_state);
 			smp->status |= IB_SMP_INVALID_FIELD;
@@ -991,11 +1002,11 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
 		set_link_state(ppd, link_state);
 		if (link_state == HLS_DN_DISABLE &&
 		    (ppd->offline_disabled_reason >
-		     OPA_LINKDOWN_REASON_SMA_DISABLED ||
+		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED) ||
 		     ppd->offline_disabled_reason ==
-		     OPA_LINKDOWN_REASON_NONE))
+		     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE)))
 			ppd->offline_disabled_reason =
-			OPA_LINKDOWN_REASON_SMA_DISABLED;
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
 		/*
 		 * Don't send a reply if the response would be sent
 		 * through the disabled port.
@@ -1091,13 +1102,13 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 
 	ls_old = driver_lstate(ppd);
 
-	ibp->mkey = pi->mkey;
-	ibp->gid_prefix = pi->subnet_prefix;
-	ibp->mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
+	ibp->rvp.mkey = pi->mkey;
+	ibp->rvp.gid_prefix = pi->subnet_prefix;
+	ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
 
 	/* Must be a valid unicast LID address. */
 	if ((lid == 0 && ls_old > IB_PORT_INIT) ||
-	     lid >= HFI1_MULTICAST_LID_BASE) {
+	    lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
 			lid);
@@ -1130,23 +1141,23 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 
 	/* Must be a valid unicast LID address. */
 	if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
-	     smlid >= HFI1_MULTICAST_LID_BASE) {
+	    smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
-	} else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
+	} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
 		pr_warn("SubnSet(OPA_PortInfo) smlid 0x%x\n", smlid);
-		spin_lock_irqsave(&ibp->lock, flags);
-		if (ibp->sm_ah) {
-			if (smlid != ibp->sm_lid)
-				ibp->sm_ah->attr.dlid = smlid;
-			if (msl != ibp->sm_sl)
-				ibp->sm_ah->attr.sl = msl;
+		spin_lock_irqsave(&ibp->rvp.lock, flags);
+		if (ibp->rvp.sm_ah) {
+			if (smlid != ibp->rvp.sm_lid)
+				ibp->rvp.sm_ah->attr.dlid = smlid;
+			if (msl != ibp->rvp.sm_sl)
+				ibp->rvp.sm_ah->attr.sl = msl;
 		}
-		spin_unlock_irqrestore(&ibp->lock, flags);
-		if (smlid != ibp->sm_lid)
-			ibp->sm_lid = smlid;
-		if (msl != ibp->sm_sl)
-			ibp->sm_sl = msl;
+		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+		if (smlid != ibp->rvp.sm_lid)
+			ibp->rvp.sm_lid = smlid;
+		if (msl != ibp->rvp.sm_sl)
+			ibp->rvp.sm_sl = msl;
 		event.event = IB_EVENT_SM_CHANGE;
 		ib_dispatch_event(&event);
 	}
@@ -1167,8 +1178,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	ppd->port_error_action = be32_to_cpu(pi->port_error_action);
 	lwe = be16_to_cpu(pi->link_width.enabled);
 	if (lwe) {
-		if (lwe == OPA_LINK_WIDTH_RESET
-				|| lwe == OPA_LINK_WIDTH_RESET_OLD)
+		if (lwe == OPA_LINK_WIDTH_RESET ||
+		    lwe == OPA_LINK_WIDTH_RESET_OLD)
 			set_link_width_enabled(ppd, ppd->link_width_supported);
 		else if ((lwe & ~ppd->link_width_supported) == 0)
 			set_link_width_enabled(ppd, lwe);
@@ -1177,19 +1188,21 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	}
 	lwe = be16_to_cpu(pi->link_width_downgrade.enabled);
 	/* LWD.E is always applied - 0 means "disabled" */
-	if (lwe == OPA_LINK_WIDTH_RESET
-			|| lwe == OPA_LINK_WIDTH_RESET_OLD) {
+	if (lwe == OPA_LINK_WIDTH_RESET ||
+	    lwe == OPA_LINK_WIDTH_RESET_OLD) {
 		set_link_width_downgrade_enabled(ppd,
-				ppd->link_width_downgrade_supported);
+						 ppd->
+						 link_width_downgrade_supported
+						 );
 	} else if ((lwe & ~ppd->link_width_downgrade_supported) == 0) {
 		/* only set and apply if something changed */
 		if (lwe != ppd->link_width_downgrade_enabled) {
 			set_link_width_downgrade_enabled(ppd, lwe);
 			call_link_downgrade_policy = 1;
 		}
-	} else
+	} else {
 		smp->status |= IB_SMP_INVALID_FIELD;
-
+	}
 	lse = be16_to_cpu(pi->link_speed.enabled);
 	if (lse) {
 		if (lse & be16_to_cpu(pi->link_speed.supported))
@@ -1198,22 +1211,24 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 			smp->status |= IB_SMP_INVALID_FIELD;
 	}
 
-	ibp->mkeyprot = (pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
-	ibp->vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
+	ibp->rvp.mkeyprot =
+		(pi->mkeyprotect_lmc & OPA_PI_MASK_MKEY_PROT_BIT) >> 6;
+	ibp->rvp.vl_high_limit = be16_to_cpu(pi->vl.high_limit) & 0xFF;
 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_VL_HIGH_LIMIT,
-				    ibp->vl_high_limit);
+				    ibp->rvp.vl_high_limit);
 
-	if (ppd->vls_supported/2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
-		ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
+	if (ppd->vls_supported / 2 > ARRAY_SIZE(pi->neigh_mtu.pvlx_to_mtu) ||
+	    ppd->vls_supported > ARRAY_SIZE(dd->vld)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
 	for (i = 0; i < ppd->vls_supported; i++) {
 		if ((i % 2) == 0)
-			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i/2] >> 4)
-					  & 0xF);
+			mtu = enum_to_mtu((pi->neigh_mtu.pvlx_to_mtu[i / 2] >>
+					   4) & 0xF);
 		else
-			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i/2] & 0xF);
+			mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[i / 2] &
+					  0xF);
 		if (mtu == 0xffff) {
 			pr_warn("SubnSet(OPA_PortInfo) mtu invalid %d (0x%x)\n",
 				mtu,
@@ -1223,8 +1238,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 		}
 		if (dd->vld[i].mtu != mtu) {
 			dd_dev_info(dd,
-				"MTU change on vl %d from %d to %d\n",
-				i, dd->vld[i].mtu, mtu);
+				    "MTU change on vl %d from %d to %d\n",
+				    i, dd->vld[i].mtu, mtu);
 			dd->vld[i].mtu = mtu;
 			call_set_mtu++;
 		}
@@ -1232,13 +1247,13 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 	/* As per OPAV1 spec: VL15 must support and be configured
 	 * for operation with a 2048 or larger MTU.
 	 */
-	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15/2] & 0xF);
+	mtu = enum_to_mtu(pi->neigh_mtu.pvlx_to_mtu[15 / 2] & 0xF);
 	if (mtu < 2048 || mtu == 0xffff)
 		mtu = 2048;
 	if (dd->vld[15].mtu != mtu) {
 		dd_dev_info(dd,
-			"MTU change on vl 15 from %d to %d\n",
-			dd->vld[15].mtu, mtu);
+			    "MTU change on vl 15 from %d to %d\n",
+			    dd->vld[15].mtu, mtu);
 		dd->vld[15].mtu = mtu;
 		call_set_mtu++;
 	}
@@ -1254,21 +1269,21 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
 			smp->status |= IB_SMP_INVALID_FIELD;
 		} else {
 			if (hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_OP_VLS,
-						vls) == -EINVAL)
+					    vls) == -EINVAL)
 				smp->status |= IB_SMP_INVALID_FIELD;
 		}
 	}
 
 	if (pi->mkey_violations == 0)
-		ibp->mkey_violations = 0;
+		ibp->rvp.mkey_violations = 0;
 
 	if (pi->pkey_violations == 0)
-		ibp->pkey_violations = 0;
+		ibp->rvp.pkey_violations = 0;
 
 	if (pi->qkey_violations == 0)
-		ibp->qkey_violations = 0;
+		ibp->rvp.qkey_violations = 0;
 
-	ibp->subnet_timeout =
+	ibp->rvp.subnet_timeout =
 		pi->clientrereg_subnettimeout & OPA_PI_MASK_SUBNET_TIMEOUT;
 
 	crc_enabled = be16_to_cpu(pi->port_ltp_crc_mode);
@@ -1388,7 +1403,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
 
 		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev.ibdev;
+		event.device = &dd->verbs_dev.rdi.ibdev;
 		event.element.port_num = port;
 		ib_dispatch_event(&event);
 	}
@@ -1402,7 +1417,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u32 n_blocks_sent = OPA_AM_NBLK(am);
 	u32 start_block = am & 0x7ff;
-	u16 *p = (u16 *) data;
+	u16 *p = (u16 *)data;
 	__be16 *q = (__be16 *)data;
 	int i;
 	u16 n_blocks_avail;
@@ -1415,7 +1430,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	n_blocks_avail = (u16)(npkeys/OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+	n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
 
 	if (start_block + n_blocks_sent > n_blocks_avail ||
 	    n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
@@ -1514,14 +1529,22 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	u8 *p = data;
 	int i;
+	u8 sc;
 
 	if (am) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++)
-		ibp->sl_to_sc[i] = *p++;
+	for (i = 0; i <  ARRAY_SIZE(ibp->sl_to_sc); i++) {
+		sc = *p++;
+		if (ibp->sl_to_sc[i] != sc) {
+			ibp->sl_to_sc[i] = sc;
+
+			/* Put all stale qps into error state */
+			hfi1_error_port_qps(ibp, i);
+		}
+	}
 
 	return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
 }
@@ -1574,7 +1597,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
 {
 	u32 n_blocks = OPA_AM_NBLK(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	size_t size = 4 * sizeof(u64);
 
 	if (n_blocks != 1) {
@@ -1597,7 +1620,7 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
 	u32 n_blocks = OPA_AM_NBLK(am);
 	int async_update = OPA_AM_ASYNC(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	struct hfi1_pportdata *ppd;
 	int lstate;
 
@@ -1609,8 +1632,10 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
 	/* IB numbers ports from 1, hw from 0 */
 	ppd = dd->pport + (port - 1);
 	lstate = driver_lstate(ppd);
-	/* it's known that async_update is 0 by this point, but include
-	 * the explicit check for clarity */
+	/*
+	 * it's known that async_update is 0 by this point, but include
+	 * the explicit check for clarity
+	 */
 	if (!async_update &&
 	    (lstate == IB_PORT_ARMED || lstate == IB_PORT_ACTIVE)) {
 		smp->status |= IB_SMP_INVALID_FIELD;
@@ -1629,7 +1654,7 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
 	u32 n_blocks = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	int size;
 
 	if (n_blocks != 1) {
@@ -1654,7 +1679,7 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
 	u32 n_blocks = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	void *vp = (void *) data;
+	void *vp = (void *)data;
 	int lstate;
 
 	if (n_blocks != 1) {
@@ -1687,7 +1712,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
 	u32 lstate;
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
-	struct opa_port_state_info *psi = (struct opa_port_state_info *) data;
+	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
 
 	if (nports != 1) {
 		smp->status |= IB_SMP_INVALID_FIELD;
@@ -1707,12 +1732,11 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
 	psi->port_states.ledenable_offlinereason |=
 		ppd->is_sm_config_started << 5;
 	psi->port_states.ledenable_offlinereason |=
-		ppd->offline_disabled_reason & OPA_PI_MASK_OFFLINE_REASON;
+		ppd->offline_disabled_reason;
 #else
 	psi->port_states.offline_reason = ppd->neighbor_normal << 4;
 	psi->port_states.offline_reason |= ppd->is_sm_config_started << 5;
-	psi->port_states.offline_reason |= ppd->offline_disabled_reason &
-				OPA_PI_MASK_OFFLINE_REASON;
+	psi->port_states.offline_reason |= ppd->offline_disabled_reason;
 #endif /* PI_LED_ENABLE_SUP */
 
 	psi->port_states.portphysstate_portstate =
@@ -1737,7 +1761,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
 	u8 ls_new, ps_new;
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
-	struct opa_port_state_info *psi = (struct opa_port_state_info *) data;
+	struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
 	int ret, invalid = 0;
 
 	if (nports != 1) {
@@ -1782,14 +1806,16 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
 	u32 len = OPA_AM_CI_LEN(am) + 1;
 	int ret;
 
-#define __CI_PAGE_SIZE (1 << 7) /* 128 bytes */
+#define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
 
-	/* check that addr is within spec, and
-	 * addr and (addr + len - 1) are on the same "page" */
+	/*
+	 * check that addr is within spec, and
+	 * addr and (addr + len - 1) are on the same "page"
+	 */
 	if (addr >= 4096 ||
-		(__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
+	    (__CI_PAGE_NUM(addr) != __CI_PAGE_NUM(addr + len - 1))) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -1823,7 +1849,7 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
 	u32 num_ports = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	struct buffer_control *p = (struct buffer_control *) data;
+	struct buffer_control *p = (struct buffer_control *)data;
 	int size;
 
 	if (num_ports != 1) {
@@ -1846,7 +1872,7 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
 	u32 num_ports = OPA_AM_NPORT(am);
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	struct hfi1_pportdata *ppd;
-	struct buffer_control *p = (struct buffer_control *) data;
+	struct buffer_control *p = (struct buffer_control *)data;
 
 	if (num_ports != 1) {
 		smp->status |= IB_SMP_INVALID_FIELD;
@@ -1919,13 +1945,15 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
 
 	switch (section) {
 	case OPA_VLARB_LOW_ELEMENTS:
-		(void) fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
+		(void)fm_set_table(ppd, FM_TBL_VL_LOW_ARB, p);
 		break;
 	case OPA_VLARB_HIGH_ELEMENTS:
-		(void) fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
+		(void)fm_set_table(ppd, FM_TBL_VL_HIGH_ARB, p);
 		break;
-	/* neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
-	 * can be changed from the default values */
+	/*
+	 * neither OPA_VLARB_PREEMPT_ELEMENTS, or OPA_VLARB_PREEMPT_MATRIX
+	 * can be changed from the default values
+	 */
 	case OPA_VLARB_PREEMPT_ELEMENTS:
 		/* FALLTHROUGH */
 	case OPA_VLARB_PREEMPT_MATRIX:
@@ -2137,8 +2165,10 @@ struct opa_port_data_counters_msg {
 };
 
 struct opa_port_error_counters64_msg {
-	/* Request contains first two fields, response contains the
-	 * whole magilla */
+	/*
+	 * Request contains first two fields, response contains the
+	 * whole magilla
+	 */
 	__be64 port_select_mask[4];
 	__be32 vl_select_mask;
 
@@ -2172,7 +2202,6 @@ struct opa_port_error_info_msg {
 	__be32 error_info_select_mask;
 	__be32 reserved1;
 	struct _port_ei {
-
 		u8 port_number;
 		u8 reserved2[7];
 
@@ -2251,7 +2280,7 @@ enum error_info_selects {
 };
 
 static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
-				struct ib_device *ibdev, u32 *resp_len)
+				     struct ib_device *ibdev, u32 *resp_len)
 {
 	struct opa_class_port_info *p =
 		(struct opa_class_port_info *)pmp->data;
@@ -2299,9 +2328,9 @@ static void a0_portstatus(struct hfi1_pportdata *ppd,
 	}
 }
 
-
 static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				  struct ib_device *ibdev,
+				  u8 port, u32 *resp_len)
 {
 	struct opa_port_status_req *req =
 		(struct opa_port_status_req *)pmp->data;
@@ -2326,8 +2355,8 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
 		return reply((struct ib_mad_hdr *)pmp);
 	}
 
-	if (nports != 1 || (port_num && port_num != port)
-	    || num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
+	if (nports != 1 || (port_num && port_num != port) ||
+	    num_vls > OPA_MAX_VLS || (vl_select_mask & ~VL_MASK_ALL)) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -2357,7 +2386,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
 					 CNTR_INVALID_VL));
 	rsp->port_multicast_xmit_pkts =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
-					CNTR_INVALID_VL));
+					  CNTR_INVALID_VL));
 	rsp->port_multicast_rcv_pkts =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
 					  CNTR_INVALID_VL));
@@ -2386,7 +2415,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
 	}
 	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
 	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-					CNTR_INVALID_VL);
+				   CNTR_INVALID_VL);
 	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
 		/* overflow/wrapped */
 		rsp->link_error_recovery = cpu_to_be32(~0);
@@ -2401,13 +2430,13 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
 					  CNTR_INVALID_VL));
 	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
-					  CNTR_INVALID_VL));
+						      CNTR_INVALID_VL));
 
 	/* rsp->uncorrectable_errors is 8 bits wide, and it pegs at 0xff */
 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
 
-	vlinfo = &(rsp->vls[0]);
+	vlinfo = &rsp->vls[0];
 	vfi = 0;
 	/* The vl_select_mask has been checked above, and we know
 	 * that it contains only entries which represent valid VLs.
@@ -2423,27 +2452,27 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
 
 		rsp->vls[vfi].port_vl_rcv_pkts =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
-					idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_data =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
-					idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_pkts =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
-					idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_wait =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
-					idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_fecn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
-					idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_becn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
-					idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		vlinfo++;
 		vfi++;
@@ -2473,7 +2502,7 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
 	error_counter_summary += read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
 						CNTR_INVALID_VL);
 	error_counter_summary += read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
-						CNTR_INVALID_VL);
+					       CNTR_INVALID_VL);
 	/* local link integrity must be right-shifted by the lli resolution */
 	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
 	tmp += read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
@@ -2483,10 +2512,10 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
 	tmp += read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT, CNTR_INVALID_VL);
 	error_counter_summary += (tmp >> res_ler);
 	error_counter_summary += read_dev_cntr(dd, C_DC_RCV_ERR,
-						CNTR_INVALID_VL);
+					       CNTR_INVALID_VL);
 	error_counter_summary += read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
 	error_counter_summary += read_dev_cntr(dd, C_DC_FM_CFG_ERR,
-						CNTR_INVALID_VL);
+					       CNTR_INVALID_VL);
 	/* ppd->link_downed is a 32-bit value */
 	error_counter_summary += read_port_cntr(ppd, C_SW_LINK_DOWN,
 						CNTR_INVALID_VL);
@@ -2512,7 +2541,7 @@ static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
 						 idx_from_vl(vl));
 			if (tmp < sum_vl_xmit_wait) {
 				/* we wrapped */
-				sum_vl_xmit_wait = (u64) ~0;
+				sum_vl_xmit_wait = (u64)~0;
 				break;
 			}
 			sum_vl_xmit_wait = tmp;
@@ -2522,8 +2551,30 @@ static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
 	}
 }
 
+static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
+				   struct _port_dctrs *rsp)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
+						CNTR_INVALID_VL));
+	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
+						CNTR_INVALID_VL));
+	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
+						CNTR_INVALID_VL));
+	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
+						CNTR_INVALID_VL));
+	rsp->port_multicast_xmit_pkts =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
+					  CNTR_INVALID_VL));
+	rsp->port_multicast_rcv_pkts =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
+					  CNTR_INVALID_VL));
+}
+
 static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				    struct ib_device *ibdev,
+				    u8 port, u32 *resp_len)
 {
 	struct opa_port_data_counters_msg *req =
 		(struct opa_port_data_counters_msg *)pmp->data;
@@ -2590,39 +2641,19 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
 	 */
 	hfi1_read_link_quality(dd, &lq);
 	rsp->link_quality_indicator = cpu_to_be32((u32)lq);
+	pma_get_opa_port_dctrs(ibdev, rsp);
 
-	/* rsp->sw_port_congestion is 0 for HFIs */
-	/* rsp->port_xmit_time_cong is 0 for HFIs */
-	/* rsp->port_xmit_wasted_bw ??? */
-	/* rsp->port_xmit_wait_data ??? */
-	/* rsp->port_mark_fecn is 0 for HFIs */
-
-	rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
-						CNTR_INVALID_VL));
-	rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
-						CNTR_INVALID_VL));
-	rsp->port_xmit_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_PKTS,
-						CNTR_INVALID_VL));
-	rsp->port_rcv_pkts = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_PKTS,
-						CNTR_INVALID_VL));
-	rsp->port_multicast_xmit_pkts =
-		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_XMIT_PKTS,
-						CNTR_INVALID_VL));
-	rsp->port_multicast_rcv_pkts =
-		cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
-						CNTR_INVALID_VL));
 	rsp->port_xmit_wait =
 		cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
 	rsp->port_rcv_fecn =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
 	rsp->port_rcv_becn =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN, CNTR_INVALID_VL));
-
 	rsp->port_error_counter_summary =
 		cpu_to_be64(get_error_counter_summary(ibdev, port,
 						      res_lli, res_ler));
 
-	vlinfo = &(rsp->vls[0]);
+	vlinfo = &rsp->vls[0];
 	vfi = 0;
 	/* The vl_select_mask has been checked above, and we know
 	 * that it contains only entries which represent valid VLs.
@@ -2630,44 +2661,45 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
 	 * any additional checks for vl.
 	 */
 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
-		 8 * sizeof(req->vl_select_mask)) {
+			 8 * sizeof(req->vl_select_mask)) {
 		memset(vlinfo, 0, sizeof(*vlinfo));
 
 		rsp->vls[vfi].port_vl_xmit_data =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_FLIT_VL,
-							idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_data =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_FLIT_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_pkts =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
-							idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_pkts =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_xmit_wait =
 			cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
-							idx_from_vl(vl)));
+						   idx_from_vl(vl)));
 
 		rsp->vls[vfi].port_vl_rcv_fecn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 		rsp->vls[vfi].port_vl_rcv_becn =
 			cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_BCN_VL,
-							idx_from_vl(vl)));
+						  idx_from_vl(vl)));
 
 		/* rsp->port_vl_xmit_time_cong is 0 for HFIs */
 		/* rsp->port_vl_xmit_wasted_bw ??? */
 		/* port_vl_xmit_wait_data - TXE (table 13-9 HFI spec) ???
-		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait */
+		 * does this differ from rsp->vls[vfi].port_vl_xmit_wait
+		 */
 		/*rsp->vls[vfi].port_vl_mark_fecn =
-			cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
-				+ offset));
-		*/
+		 *	cpu_to_be64(read_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT
+		 *		+ offset));
+		 */
 		vlinfo++;
 		vfi++;
 	}
@@ -2680,12 +2712,88 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
 	return reply((struct ib_mad_hdr *)pmp);
 }
 
+static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
+				       struct ib_device *ibdev, u8 port)
+{
+	struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
+						pmp->data;
+	struct _port_dctrs rsp;
+
+	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+		goto bail;
+	}
+
+	memset(&rsp, 0, sizeof(rsp));
+	pma_get_opa_port_dctrs(ibdev, &rsp);
+
+	p->port_xmit_data = rsp.port_xmit_data;
+	p->port_rcv_data = rsp.port_rcv_data;
+	p->port_xmit_packets = rsp.port_xmit_pkts;
+	p->port_rcv_packets = rsp.port_rcv_pkts;
+	p->port_unicast_xmit_packets = 0;
+	p->port_unicast_rcv_packets =  0;
+	p->port_multicast_xmit_packets = rsp.port_multicast_xmit_pkts;
+	p->port_multicast_rcv_packets = rsp.port_multicast_rcv_pkts;
+
+bail:
+	return reply((struct ib_mad_hdr *)pmp);
+}
+
+static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
+				   struct _port_ectrs *rsp, u8 port)
+{
+	u64 tmp, tmp2;
+	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+	struct hfi1_ibport *ibp = to_iport(ibdev, port);
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+
+	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
+	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
+					CNTR_INVALID_VL);
+	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
+		/* overflow/wrapped */
+		rsp->link_error_recovery = cpu_to_be32(~0);
+	} else {
+		rsp->link_error_recovery = cpu_to_be32(tmp2);
+	}
+
+	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
+						CNTR_INVALID_VL));
+	rsp->port_rcv_errors =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_ERR, CNTR_INVALID_VL));
+	rsp->port_rcv_remote_physical_errors =
+		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
+					  CNTR_INVALID_VL));
+	rsp->port_rcv_switch_relay_errors = 0;
+	rsp->port_xmit_discards =
+		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
+					   CNTR_INVALID_VL));
+	rsp->port_xmit_constraint_errors =
+		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
+					   CNTR_INVALID_VL));
+	rsp->port_rcv_constraint_errors =
+		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
+					   CNTR_INVALID_VL));
+	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
+	tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
+	if (tmp2 < tmp) {
+		/* overflow/wrapped */
+		rsp->local_link_integrity_errors = cpu_to_be64(~0);
+	} else {
+		rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
+	}
+	rsp->excessive_buffer_overruns =
+		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
+}
+
 static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				  struct ib_device *ibdev,
+				  u8 port, u32 *resp_len)
 {
 	size_t response_data_size;
 	struct _port_ectrs *rsp;
-	unsigned long port_num;
+	u8 port_num;
 	struct opa_port_error_counters64_msg *req;
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u32 num_ports;
@@ -2695,7 +2803,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
 	struct hfi1_pportdata *ppd;
 	struct _vls_ectrs *vlinfo;
 	unsigned long vl;
-	u64 port_mask, tmp, tmp2;
+	u64 port_mask, tmp;
 	u32 vl_select_mask;
 	int vfi;
 
@@ -2724,9 +2832,9 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
 	 */
 	port_mask = be64_to_cpu(req->port_select_mask[3]);
 	port_num = find_first_bit((unsigned long *)&port_mask,
-					sizeof(port_mask));
+				  sizeof(port_mask));
 
-	if ((u8)port_num != port) {
+	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -2737,46 +2845,18 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
 	ppd = ppd_from_ibp(ibp);
 
 	memset(rsp, 0, sizeof(*rsp));
-	rsp->port_number = (u8)port_num;
+	rsp->port_number = port_num;
+
+	pma_get_opa_port_ectrs(ibdev, rsp, port_num);
 
-	rsp->port_rcv_constraint_errors =
-		cpu_to_be64(read_port_cntr(ppd, C_SW_RCV_CSTR_ERR,
-					   CNTR_INVALID_VL));
-	/* port_rcv_switch_relay_errors is 0 for HFIs */
-	rsp->port_xmit_discards =
-		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD,
-						CNTR_INVALID_VL));
 	rsp->port_rcv_remote_physical_errors =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_RMT_PHY_ERR,
-						CNTR_INVALID_VL));
-	tmp = read_dev_cntr(dd, C_DC_RX_REPLAY, CNTR_INVALID_VL);
-	tmp2 = tmp + read_dev_cntr(dd, C_DC_TX_REPLAY, CNTR_INVALID_VL);
-	if (tmp2 < tmp) {
-		/* overflow/wrapped */
-		rsp->local_link_integrity_errors = cpu_to_be64(~0);
-	} else {
-		rsp->local_link_integrity_errors = cpu_to_be64(tmp2);
-	}
-	tmp = read_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL);
-	tmp2 = tmp + read_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-					CNTR_INVALID_VL);
-	if (tmp2 > (u32)UINT_MAX || tmp2 < tmp) {
-		/* overflow/wrapped */
-		rsp->link_error_recovery = cpu_to_be32(~0);
-	} else {
-		rsp->link_error_recovery = cpu_to_be32(tmp2);
-	}
-	rsp->port_xmit_constraint_errors =
-		cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_CSTR_ERR,
-					   CNTR_INVALID_VL));
-	rsp->excessive_buffer_overruns =
-		cpu_to_be64(read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL));
+					  CNTR_INVALID_VL));
 	rsp->fm_config_errors =
 		cpu_to_be64(read_dev_cntr(dd, C_DC_FM_CFG_ERR,
-						CNTR_INVALID_VL));
-	rsp->link_downed = cpu_to_be32(read_port_cntr(ppd, C_SW_LINK_DOWN,
-						CNTR_INVALID_VL));
+					  CNTR_INVALID_VL));
 	tmp = read_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL);
+
 	rsp->uncorrectable_errors = tmp < 0x100 ? (tmp & 0xff) : 0xff;
 
 	vlinfo = &rsp->vls[0];
@@ -2796,8 +2876,94 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
 	return reply((struct ib_mad_hdr *)pmp);
 }
 
+static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
+				   struct ib_device *ibdev, u8 port)
+{
+	struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
+		pmp->data;
+	struct _port_ectrs rsp;
+	u64 temp_link_overrun_errors;
+	u64 temp_64;
+	u32 temp_32;
+
+	memset(&rsp, 0, sizeof(rsp));
+	pma_get_opa_port_ectrs(ibdev, &rsp, port);
+
+	if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) {
+		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
+		goto bail;
+	}
+
+	p->symbol_error_counter = 0; /* N/A for OPA */
+
+	temp_32 = be32_to_cpu(rsp.link_error_recovery);
+	if (temp_32 > 0xFFUL)
+		p->link_error_recovery_counter = 0xFF;
+	else
+		p->link_error_recovery_counter = (u8)temp_32;
+
+	temp_32 = be32_to_cpu(rsp.link_downed);
+	if (temp_32 > 0xFFUL)
+		p->link_downed_counter = 0xFF;
+	else
+		p->link_downed_counter = (u8)temp_32;
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_errors);
+	if (temp_64 > 0xFFFFUL)
+		p->port_rcv_errors = cpu_to_be16(0xFFFF);
+	else
+		p->port_rcv_errors = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_remote_physical_errors);
+	if (temp_64 > 0xFFFFUL)
+		p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF);
+	else
+		p->port_rcv_remphys_errors = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_switch_relay_errors);
+	p->port_rcv_switch_relay_errors = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_xmit_discards);
+	if (temp_64 > 0xFFFFUL)
+		p->port_xmit_discards = cpu_to_be16(0xFFFF);
+	else
+		p->port_xmit_discards = cpu_to_be16((u16)temp_64);
+
+	temp_64 = be64_to_cpu(rsp.port_xmit_constraint_errors);
+	if (temp_64 > 0xFFUL)
+		p->port_xmit_constraint_errors = 0xFF;
+	else
+		p->port_xmit_constraint_errors = (u8)temp_64;
+
+	temp_64 = be64_to_cpu(rsp.port_rcv_constraint_errors);
+	if (temp_64 > 0xFFUL)
+		p->port_rcv_constraint_errors = 0xFFUL;
+	else
+		p->port_rcv_constraint_errors = (u8)temp_64;
+
+	/* LocalLink: 7:4, BufferOverrun: 3:0 */
+	temp_64 = be64_to_cpu(rsp.local_link_integrity_errors);
+	if (temp_64 > 0xFUL)
+		temp_64 = 0xFUL;
+
+	temp_link_overrun_errors = temp_64 << 4;
+
+	temp_64 = be64_to_cpu(rsp.excessive_buffer_overruns);
+	if (temp_64 > 0xFUL)
+		temp_64 = 0xFUL;
+	temp_link_overrun_errors |= temp_64;
+
+	p->link_overrun_errors = (u8)temp_link_overrun_errors;
+
+	p->vl15_dropped = 0; /* N/A for OPA */
+
+bail:
+	return reply((struct ib_mad_hdr *)pmp);
+}
+
 static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				 struct ib_device *ibdev,
+				 u8 port, u32 *resp_len)
 {
 	size_t response_data_size;
 	struct _port_ei *rsp;
@@ -2805,7 +2971,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u64 port_mask;
 	u32 num_ports;
-	unsigned long port_num;
+	u8 port_num;
 	u8 num_pslm;
 	u64 reg;
 
@@ -2838,7 +3004,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
 	port_num = find_first_bit((unsigned long *)&port_mask,
 				  sizeof(port_mask));
 
-	if ((u8)port_num != port) {
+	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -2847,15 +3013,17 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
 	rsp->port_rcv_ei.status_and_code =
 		dd->err_info_rcvport.status_and_code;
 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit1,
-		&dd->err_info_rcvport.packet_flit1, sizeof(u64));
+	       &dd->err_info_rcvport.packet_flit1, sizeof(u64));
 	memcpy(&rsp->port_rcv_ei.ei.ei1to12.packet_flit2,
-		&dd->err_info_rcvport.packet_flit2, sizeof(u64));
+	       &dd->err_info_rcvport.packet_flit2, sizeof(u64));
 
 	/* ExcessiverBufferOverrunInfo */
 	reg = read_csr(dd, RCV_ERR_INFO);
 	if (reg & RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK) {
-		/* if the RcvExcessBufferOverrun bit is set, save SC of
-		 * first pkt that encountered an excess buffer overrun */
+		/*
+		 * if the RcvExcessBufferOverrun bit is set, save SC of
+		 * first pkt that encountered an excess buffer overrun
+		 */
 		u8 tmp = (u8)reg;
 
 		tmp &=  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SC_SMASK;
@@ -2892,7 +3060,8 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
 }
 
 static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				  struct ib_device *ibdev,
+				  u8 port, u32 *resp_len)
 {
 	struct opa_clear_port_status *req =
 		(struct opa_clear_port_status *)pmp->data;
@@ -2951,8 +3120,9 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
 		write_dev_cntr(dd, C_DC_RCV_BBL, CNTR_INVALID_VL, 0);
 
 	/* Only applicable for switch */
-	/*if (counter_select & CS_PORT_MARK_FECN)
-		write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);*/
+	/* if (counter_select & CS_PORT_MARK_FECN)
+	 *	write_csr(dd, DCC_PRF_PORT_MARK_FECN_CNT, 0);
+	 */
 
 	if (counter_select & CS_PORT_RCV_CONSTRAINT_ERRORS)
 		write_port_cntr(ppd, C_SW_RCV_CSTR_ERR, CNTR_INVALID_VL, 0);
@@ -2975,7 +3145,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
 	if (counter_select & CS_LINK_ERROR_RECOVERY) {
 		write_dev_cntr(dd, C_DC_SEQ_CRC_CNT, CNTR_INVALID_VL, 0);
 		write_dev_cntr(dd, C_DC_REINIT_FROM_PEER_CNT,
-						CNTR_INVALID_VL, 0);
+			       CNTR_INVALID_VL, 0);
 	}
 
 	if (counter_select & CS_PORT_RCV_ERRORS)
@@ -2997,7 +3167,6 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
 
 	for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
 			 8 * sizeof(vl_select_mask)) {
-
 		if (counter_select & CS_PORT_XMIT_DATA)
 			write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
 
@@ -3026,9 +3195,9 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
 		if (counter_select & CS_PORT_RCV_BUBBLE)
 			write_dev_cntr(dd, C_DC_RCV_BBL_VL, idx_from_vl(vl), 0);
 
-		/*if (counter_select & CS_PORT_MARK_FECN)
-		     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
-		*/
+		/* if (counter_select & CS_PORT_MARK_FECN)
+		 *     write_csr(dd, DCC_PRF_PORT_VL_MARK_FECN_CNT + offset, 0);
+		 */
 		/* port_vl_xmit_discards ??? */
 	}
 
@@ -3039,14 +3208,15 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
 }
 
 static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
-			struct ib_device *ibdev, u8 port, u32 *resp_len)
+				 struct ib_device *ibdev,
+				 u8 port, u32 *resp_len)
 {
 	struct _port_ei *rsp;
 	struct opa_port_error_info_msg *req;
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
 	u64 port_mask;
 	u32 num_ports;
-	unsigned long port_num;
+	u8 port_num;
 	u8 num_pslm;
 	u32 error_info_select;
 
@@ -3071,7 +3241,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
 	port_num = find_first_bit((unsigned long *)&port_mask,
 				  sizeof(port_mask));
 
-	if ((u8)port_num != port) {
+	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -3085,8 +3255,10 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
 
 	/* ExcessiverBufferOverrunInfo */
 	if (error_info_select & ES_EXCESSIVE_BUFFER_OVERRUN_INFO)
-		/* status bit is essentially kept in the h/w - bit 5 of
-		 * RCV_ERR_INFO */
+		/*
+		 * status bit is essentially kept in the h/w - bit 5 of
+		 * RCV_ERR_INFO
+		 */
 		write_csr(dd, RCV_ERR_INFO,
 			  RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
 
@@ -3138,13 +3310,12 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
 }
 
 static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
-					     u8 *data,
-					     struct ib_device *ibdev,
-					     u8 port, u32 *resp_len)
+				       u8 *data, struct ib_device *ibdev,
+				       u8 port, u32 *resp_len)
 {
 	int i;
 	struct opa_congestion_setting_attr *p =
-		(struct opa_congestion_setting_attr *) data;
+		(struct opa_congestion_setting_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct opa_congestion_setting_entry_shadow *entries;
@@ -3154,7 +3325,7 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -3183,7 +3354,7 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
 				       u32 *resp_len)
 {
 	struct opa_congestion_setting_attr *p =
-		(struct opa_congestion_setting_attr *) data;
+		(struct opa_congestion_setting_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct opa_congestion_setting_entry_shadow *entries;
@@ -3245,7 +3416,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
 			continue;
 		memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
 		memcpy(cong_log->events[i].remote_qp_number_cn_entry,
-			&cce->rqpn, 3);
+		       &cce->rqpn, 3);
 		cong_log->events[i].sl_svc_type_cn_entry =
 			((cce->sl & 0x1f) << 3) | (cce->svc_type & 0x7);
 		cong_log->events[i].remote_lid_cn_entry =
@@ -3275,7 +3446,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 				   u32 *resp_len)
 {
 	struct ib_cc_table_attr *cc_table_attr =
-		(struct ib_cc_table_attr *) data;
+		(struct ib_cc_table_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	u32 start_block = OPA_AM_START_BLK(am);
@@ -3296,7 +3467,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return reply((struct ib_mad_hdr *)smp);
 	}
@@ -3316,7 +3487,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 	rcu_read_unlock();
 
 	if (resp_len)
-		*resp_len += sizeof(u16)*(IB_CCT_ENTRIES * n_blocks + 1);
+		*resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
 
 	return reply((struct ib_mad_hdr *)smp);
 }
@@ -3332,7 +3503,7 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 				   struct ib_device *ibdev, u8 port,
 				   u32 *resp_len)
 {
-	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *) data;
+	struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
 	struct hfi1_ibport *ibp = to_iport(ibdev, port);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	u32 start_block = OPA_AM_START_BLK(am);
@@ -3362,14 +3533,14 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 	}
 
 	new_cc_state = kzalloc(sizeof(*new_cc_state), GFP_KERNEL);
-	if (new_cc_state == NULL)
+	if (!new_cc_state)
 		goto getit;
 
 	spin_lock(&ppd->cc_state_lock);
 
 	old_cc_state = get_cc_state(ppd);
 
-	if (old_cc_state == NULL) {
+	if (!old_cc_state) {
 		spin_unlock(&ppd->cc_state_lock);
 		kfree(new_cc_state);
 		return reply((struct ib_mad_hdr *)smp);
@@ -3409,26 +3580,31 @@ struct opa_led_info {
 };
 
 #define OPA_LED_SHIFT	31
-#define OPA_LED_MASK	(1 << OPA_LED_SHIFT)
+#define OPA_LED_MASK	BIT(OPA_LED_SHIFT)
 
 static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
 				   struct ib_device *ibdev, u8 port,
 				   u32 *resp_len)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct opa_led_info *p = (struct opa_led_info *) data;
+	struct hfi1_pportdata *ppd = dd->pport;
+	struct opa_led_info *p = (struct opa_led_info *)data;
 	u32 nport = OPA_AM_NPORT(am);
-	u64 reg;
+	u32 is_beaconing_active;
 
 	if (nport != 1) {
 		smp->status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	reg = read_csr(dd, DCC_CFG_LED_CNTRL);
-	if ((reg & DCC_CFG_LED_CNTRL_LED_CNTRL_SMASK) &&
-		((reg & DCC_CFG_LED_CNTRL_LED_SW_BLINK_RATE_SMASK) == 0xf))
-			p->rsvd_led_mask = cpu_to_be32(OPA_LED_MASK);
+	/*
+	 * This pairs with the memory barrier in hfi1_start_led_override to
+	 * ensure that we read the correct state of LED beaconing represented
+	 * by led_override_timer_active
+	 */
+	smp_rmb();
+	is_beaconing_active = !!atomic_read(&ppd->led_override_timer_active);
+	p->rsvd_led_mask = cpu_to_be32(is_beaconing_active << OPA_LED_SHIFT);
 
 	if (resp_len)
 		*resp_len += sizeof(struct opa_led_info);
@@ -3441,7 +3617,7 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
 				   u32 *resp_len)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct opa_led_info *p = (struct opa_led_info *) data;
+	struct opa_led_info *p = (struct opa_led_info *)data;
 	u32 nport = OPA_AM_NPORT(am);
 	int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
 
@@ -3450,7 +3626,10 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
 		return reply((struct ib_mad_hdr *)smp);
 	}
 
-	setextled(dd, on);
+	if (on)
+		hfi1_start_led_override(dd->pport, 2000, 1500);
+	else
+		shutdown_led_override(dd->pport);
 
 	return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
 }
@@ -3493,7 +3672,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
 		break;
 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
 		ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
-					       resp_len);
+						resp_len);
 		break;
 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
 		ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
@@ -3532,9 +3711,9 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
 					      resp_len);
 		break;
 	case IB_SMP_ATTR_SM_INFO:
-		if (ibp->port_cap_flags & IB_PORT_SM_DISABLED)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-		if (ibp->port_cap_flags & IB_PORT_SM)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return IB_MAD_RESULT_SUCCESS;
 		/* FALLTHROUGH */
 	default:
@@ -3575,7 +3754,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
 		break;
 	case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
 		ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
-					       resp_len);
+						resp_len);
 		break;
 	case OPA_ATTRIB_ID_PORT_STATE_INFO:
 		ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
@@ -3602,9 +3781,9 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
 					      resp_len);
 		break;
 	case IB_SMP_ATTR_SM_INFO:
-		if (ibp->port_cap_flags & IB_PORT_SM_DISABLED)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
 			return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-		if (ibp->port_cap_flags & IB_PORT_SM)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return IB_MAD_RESULT_SUCCESS;
 		/* FALLTHROUGH */
 	default:
@@ -3654,14 +3833,13 @@ static int subn_get_opa_aggregate(struct opa_smp *smp,
 		/* zero the payload for this segment */
 		memset(next_smp + sizeof(*agg), 0, agg_data_len);
 
-		(void) subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
+		(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
 					ibdev, port, NULL);
 		if (smp->status & ~IB_SMP_DIRECTION) {
 			set_aggr_error(agg);
 			return reply((struct ib_mad_hdr *)smp);
 		}
 		next_smp += agg_size;
-
 	}
 
 	return reply((struct ib_mad_hdr *)smp);
@@ -3698,14 +3876,13 @@ static int subn_set_opa_aggregate(struct opa_smp *smp,
 			return reply((struct ib_mad_hdr *)smp);
 		}
 
-		(void) subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
+		(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
 					ibdev, port, NULL);
 		if (smp->status & ~IB_SMP_DIRECTION) {
 			set_aggr_error(agg);
 			return reply((struct ib_mad_hdr *)smp);
 		}
 		next_smp += agg_size;
-
 	}
 
 	return reply((struct ib_mad_hdr *)smp);
@@ -3823,7 +4000,7 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
 	if (smp->class_version != OPA_SMI_CLASS_VERSION) {
 		smp->status |= IB_SMP_UNSUP_VERSION;
 		ret = reply((struct ib_mad_hdr *)smp);
-		goto bail;
+		return ret;
 	}
 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags, smp->mkey,
 			 smp->route.dr.dr_slid, smp->route.dr.return_path,
@@ -3843,13 +4020,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
 		     smp->method == IB_MGMT_METHOD_SET) &&
 		    port_num && port_num <= ibdev->phys_port_cnt &&
 		    port != port_num)
-			(void) check_mkey(to_iport(ibdev, port_num),
+			(void)check_mkey(to_iport(ibdev, port_num),
 					  (struct ib_mad_hdr *)smp, 0,
 					  smp->mkey, smp->route.dr.dr_slid,
 					  smp->route.dr.return_path,
 					  smp->hop_cnt);
 		ret = IB_MAD_RESULT_FAILURE;
-		goto bail;
+		return ret;
 	}
 
 	*resp_len = opa_get_smp_header_size(smp);
@@ -3861,23 +4038,25 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
 			clear_opa_smp_data(smp);
 			ret = subn_get_opa_sma(attr_id, smp, am, data,
 					       ibdev, port, resp_len);
-			goto bail;
+			break;
 		case OPA_ATTRIB_ID_AGGREGATE:
 			ret = subn_get_opa_aggregate(smp, ibdev, port,
 						     resp_len);
-			goto bail;
+			break;
 		}
+		break;
 	case IB_MGMT_METHOD_SET:
 		switch (attr_id) {
 		default:
 			ret = subn_set_opa_sma(attr_id, smp, am, data,
 					       ibdev, port, resp_len);
-			goto bail;
+			break;
 		case OPA_ATTRIB_ID_AGGREGATE:
 			ret = subn_set_opa_aggregate(smp, ibdev, port,
 						     resp_len);
-			goto bail;
+			break;
 		}
+		break;
 	case IB_MGMT_METHOD_TRAP:
 	case IB_MGMT_METHOD_REPORT:
 	case IB_MGMT_METHOD_REPORT_RESP:
@@ -3888,13 +4067,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
 		 * Just tell the caller to process it normally.
 		 */
 		ret = IB_MAD_RESULT_SUCCESS;
-		goto bail;
+		break;
 	default:
 		smp->status |= IB_SMP_UNSUP_METHOD;
 		ret = reply((struct ib_mad_hdr *)smp);
+		break;
 	}
 
-bail:
 	return ret;
 }
 
@@ -3910,7 +4089,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
 	if (smp->class_version != 1) {
 		smp->status |= IB_SMP_UNSUP_VERSION;
 		ret = reply((struct ib_mad_hdr *)smp);
-		goto bail;
+		return ret;
 	}
 
 	ret = check_mkey(ibp, (struct ib_mad_hdr *)smp, mad_flags,
@@ -3931,13 +4110,13 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
 		     smp->method == IB_MGMT_METHOD_SET) &&
 		    port_num && port_num <= ibdev->phys_port_cnt &&
 		    port != port_num)
-			(void) check_mkey(to_iport(ibdev, port_num),
-					  (struct ib_mad_hdr *)smp, 0,
-					  smp->mkey,
-					  (__force __be32)smp->dr_slid,
-					  smp->return_path, smp->hop_cnt);
+			(void)check_mkey(to_iport(ibdev, port_num),
+					 (struct ib_mad_hdr *)smp, 0,
+					 smp->mkey,
+					 (__force __be32)smp->dr_slid,
+					 smp->return_path, smp->hop_cnt);
 		ret = IB_MAD_RESULT_FAILURE;
-		goto bail;
+		return ret;
 	}
 
 	switch (smp->method) {
@@ -3945,15 +4124,77 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
 		switch (smp->attr_id) {
 		case IB_SMP_ATTR_NODE_INFO:
 			ret = subn_get_nodeinfo(smp, ibdev, port);
-			goto bail;
+			break;
 		default:
 			smp->status |= IB_SMP_UNSUP_METH_ATTR;
 			ret = reply((struct ib_mad_hdr *)smp);
-			goto bail;
+			break;
 		}
+		break;
+	}
+
+	return ret;
+}
+
+static int process_perf(struct ib_device *ibdev, u8 port,
+			const struct ib_mad *in_mad,
+			struct ib_mad *out_mad)
+{
+	struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad;
+	struct ib_class_port_info *cpi = (struct ib_class_port_info *)
+						&pmp->data;
+	int ret = IB_MAD_RESULT_FAILURE;
+
+	*out_mad = *in_mad;
+	if (pmp->mad_hdr.class_version != 1) {
+		pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
+		ret = reply((struct ib_mad_hdr *)pmp);
+		return ret;
+	}
+
+	switch (pmp->mad_hdr.method) {
+	case IB_MGMT_METHOD_GET:
+		switch (pmp->mad_hdr.attr_id) {
+		case IB_PMA_PORT_COUNTERS:
+			ret = pma_get_ib_portcounters(pmp, ibdev, port);
+			break;
+		case IB_PMA_PORT_COUNTERS_EXT:
+			ret = pma_get_ib_portcounters_ext(pmp, ibdev, port);
+			break;
+		case IB_PMA_CLASS_PORT_INFO:
+			cpi->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+			ret = reply((struct ib_mad_hdr *)pmp);
+			break;
+		default:
+			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+			ret = reply((struct ib_mad_hdr *)pmp);
+			break;
+		}
+		break;
+
+	case IB_MGMT_METHOD_SET:
+		if (pmp->mad_hdr.attr_id) {
+			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
+			ret = reply((struct ib_mad_hdr *)pmp);
+		}
+		break;
+
+	case IB_MGMT_METHOD_TRAP:
+	case IB_MGMT_METHOD_GET_RESP:
+		/*
+		 * The ib_mad module will call us to process responses
+		 * before checking for other consumers.
+		 * Just tell the caller to process it normally.
+		 */
+		ret = IB_MAD_RESULT_SUCCESS;
+		break;
+
+	default:
+		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
+		ret = reply((struct ib_mad_hdr *)pmp);
+		break;
 	}
 
-bail:
 	return ret;
 }
 
@@ -3978,44 +4219,46 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port,
 		switch (pmp->mad_hdr.attr_id) {
 		case IB_PMA_CLASS_PORT_INFO:
 			ret = pma_get_opa_classportinfo(pmp, ibdev, resp_len);
-			goto bail;
+			break;
 		case OPA_PM_ATTRIB_ID_PORT_STATUS:
 			ret = pma_get_opa_portstatus(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						     resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_DATA_PORT_COUNTERS:
 			ret = pma_get_opa_datacounters(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						       resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_ERROR_PORT_COUNTERS:
 			ret = pma_get_opa_porterrors(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						     resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
 			ret = pma_get_opa_errorinfo(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						    resp_len);
+			break;
 		default:
 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
 			ret = reply((struct ib_mad_hdr *)pmp);
-			goto bail;
+			break;
 		}
+		break;
 
 	case IB_MGMT_METHOD_SET:
 		switch (pmp->mad_hdr.attr_id) {
 		case OPA_PM_ATTRIB_ID_CLEAR_PORT_STATUS:
 			ret = pma_set_opa_portstatus(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						     resp_len);
+			break;
 		case OPA_PM_ATTRIB_ID_ERROR_INFO:
 			ret = pma_set_opa_errorinfo(pmp, ibdev, port,
-								resp_len);
-			goto bail;
+						    resp_len);
+			break;
 		default:
 			pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR;
 			ret = reply((struct ib_mad_hdr *)pmp);
-			goto bail;
+			break;
 		}
+		break;
 
 	case IB_MGMT_METHOD_TRAP:
 	case IB_MGMT_METHOD_GET_RESP:
@@ -4025,14 +4268,14 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port,
 		 * Just tell the caller to process it normally.
 		 */
 		ret = IB_MAD_RESULT_SUCCESS;
-		goto bail;
+		break;
 
 	default:
 		pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD;
 		ret = reply((struct ib_mad_hdr *)pmp);
+		break;
 	}
 
-bail:
 	return ret;
 }
 
@@ -4097,12 +4340,15 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
 	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
 		ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad);
-		goto bail;
+		break;
+	case IB_MGMT_CLASS_PERF_MGMT:
+		ret = process_perf(ibdev, port, in_mad, out_mad);
+		break;
 	default:
 		ret = IB_MAD_RESULT_SUCCESS;
+		break;
 	}
 
-bail:
 	return ret;
 }
 
@@ -4154,66 +4400,3 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 
 	return IB_MAD_RESULT_FAILURE;
 }
-
-static void send_handler(struct ib_mad_agent *agent,
-			 struct ib_mad_send_wc *mad_send_wc)
-{
-	ib_free_send_mad(mad_send_wc->send_buf);
-}
-
-int hfi1_create_agents(struct hfi1_ibdev *dev)
-{
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct hfi1_ibport *ibp;
-	int p;
-	int ret;
-
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
-					      NULL, 0, send_handler,
-					      NULL, NULL, 0);
-		if (IS_ERR(agent)) {
-			ret = PTR_ERR(agent);
-			goto err;
-		}
-
-		ibp->send_agent = agent;
-	}
-
-	return 0;
-
-err:
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-	}
-
-	return ret;
-}
-
-void hfi1_free_agents(struct hfi1_ibdev *dev)
-{
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct hfi1_ibport *ibp;
-	int p;
-
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-		if (ibp->sm_ah) {
-			ib_destroy_ah(&ibp->sm_ah->ibah);
-			ibp->sm_ah = NULL;
-		}
-	}
-}
diff --git a/drivers/staging/rdma/hfi1/mad.h b/drivers/staging/rdma/hfi1/mad.h
index f0317750e2fc..55ee08675333 100644
--- a/drivers/staging/rdma/hfi1/mad.h
+++ b/drivers/staging/rdma/hfi1/mad.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -51,8 +48,10 @@
 #define _HFI1_MAD_H
 
 #include <rdma/ib_pma.h>
-#define USE_PI_LED_ENABLE	1 /* use led enabled bit in struct
-				   * opa_port_states, if available */
+#define USE_PI_LED_ENABLE	1 /*
+				   * use led enabled bit in struct
+				   * opa_port_states, if available
+				   */
 #include <rdma/opa_smi.h>
 #include <rdma/opa_port_info.h>
 #ifndef PI_LED_ENABLE_SUP
@@ -235,7 +234,6 @@ struct ib_pma_portcounters_cong {
 #define IB_CC_SVCTYPE_RD 0x2
 #define IB_CC_SVCTYPE_UD 0x3
 
-
 /*
  * There should be an equivalent IB #define for the following, but
  * I cannot find it.
@@ -267,7 +265,7 @@ struct opa_hfi1_cong_log {
 	u8 congestion_flags;
 	__be16 threshold_event_counter;
 	__be32 current_time_stamp;
-	u8 threshold_cong_event_map[OPA_MAX_SLS/8];
+	u8 threshold_cong_event_map[OPA_MAX_SLS / 8];
 	struct opa_hfi1_cong_log_event events[OPA_CONG_LOG_ELEMS];
 } __packed;
 
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.c b/drivers/staging/rdma/hfi1/mmu_rb.c
new file mode 100644
index 000000000000..c7ad0164ea9a
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/mmu_rb.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/list.h>
+#include <linux/mmu_notifier.h>
+#include <linux/interval_tree_generic.h>
+
+#include "mmu_rb.h"
+#include "trace.h"
+
+struct mmu_rb_handler {
+	struct list_head list;
+	struct mmu_notifier mn;
+	struct rb_root *root;
+	spinlock_t lock;        /* protect the RB tree */
+	struct mmu_rb_ops *ops;
+};
+
+static LIST_HEAD(mmu_rb_handlers);
+static DEFINE_SPINLOCK(mmu_rb_lock); /* protect mmu_rb_handlers list */
+
+static unsigned long mmu_node_start(struct mmu_rb_node *);
+static unsigned long mmu_node_last(struct mmu_rb_node *);
+static struct mmu_rb_handler *find_mmu_handler(struct rb_root *);
+static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
+				     unsigned long);
+static inline void mmu_notifier_range_start(struct mmu_notifier *,
+					    struct mm_struct *,
+					    unsigned long, unsigned long);
+static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
+					unsigned long, unsigned long);
+static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
+					   unsigned long, unsigned long);
+
+static struct mmu_notifier_ops mn_opts = {
+	.invalidate_page = mmu_notifier_page,
+	.invalidate_range_start = mmu_notifier_range_start,
+};
+
+INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last,
+		     mmu_node_start, mmu_node_last, static, __mmu_int_rb);
+
+static unsigned long mmu_node_start(struct mmu_rb_node *node)
+{
+	return node->addr & PAGE_MASK;
+}
+
+static unsigned long mmu_node_last(struct mmu_rb_node *node)
+{
+	return PAGE_ALIGN((node->addr & PAGE_MASK) + node->len) - 1;
+}
+
+int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops)
+{
+	struct mmu_rb_handler *handlr;
+	unsigned long flags;
+
+	if (!ops->invalidate)
+		return -EINVAL;
+
+	handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
+	if (!handlr)
+		return -ENOMEM;
+
+	handlr->root = root;
+	handlr->ops = ops;
+	INIT_HLIST_NODE(&handlr->mn.hlist);
+	spin_lock_init(&handlr->lock);
+	handlr->mn.ops = &mn_opts;
+	spin_lock_irqsave(&mmu_rb_lock, flags);
+	list_add_tail(&handlr->list, &mmu_rb_handlers);
+	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+
+	return mmu_notifier_register(&handlr->mn, current->mm);
+}
+
+void hfi1_mmu_rb_unregister(struct rb_root *root)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	unsigned long flags;
+
+	if (!handler)
+		return;
+
+	spin_lock_irqsave(&mmu_rb_lock, flags);
+	list_del(&handler->list);
+	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+
+	if (!RB_EMPTY_ROOT(root)) {
+		struct rb_node *node;
+		struct mmu_rb_node *rbnode;
+
+		while ((node = rb_first(root))) {
+			rbnode = rb_entry(node, struct mmu_rb_node, node);
+			rb_erase(node, root);
+			if (handler->ops->remove)
+				handler->ops->remove(root, rbnode, false);
+		}
+	}
+
+	if (current->mm)
+		mmu_notifier_unregister(&handler->mn, current->mm);
+	kfree(handler);
+}
+
+int hfi1_mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	struct mmu_rb_node *node;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!handler)
+		return -EINVAL;
+
+	spin_lock_irqsave(&handler->lock, flags);
+	hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr,
+		  mnode->len);
+	node = __mmu_rb_search(handler, mnode->addr, mnode->len);
+	if (node) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+	__mmu_int_rb_insert(mnode, root);
+
+	if (handler->ops->insert) {
+		ret = handler->ops->insert(root, mnode);
+		if (ret)
+			__mmu_int_rb_remove(mnode, root);
+	}
+unlock:
+	spin_unlock_irqrestore(&handler->lock, flags);
+	return ret;
+}
+
+/* Caller must host handler lock */
+static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
+					   unsigned long addr,
+					   unsigned long len)
+{
+	struct mmu_rb_node *node = NULL;
+
+	hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len);
+	if (!handler->ops->filter) {
+		node = __mmu_int_rb_iter_first(handler->root, addr,
+					       (addr + len) - 1);
+	} else {
+		for (node = __mmu_int_rb_iter_first(handler->root, addr,
+						    (addr + len) - 1);
+		     node;
+		     node = __mmu_int_rb_iter_next(node, addr,
+						   (addr + len) - 1)) {
+			if (handler->ops->filter(node, addr, len))
+				return node;
+		}
+	}
+	return node;
+}
+
+static void __mmu_rb_remove(struct mmu_rb_handler *handler,
+			    struct mmu_rb_node *node, bool arg)
+{
+	/* Validity of handler and node pointers has been checked by caller. */
+	hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
+		  node->len);
+	__mmu_int_rb_remove(node, handler->root);
+	if (handler->ops->remove)
+		handler->ops->remove(handler->root, node, arg);
+}
+
+struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *root, unsigned long addr,
+				       unsigned long len)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	struct mmu_rb_node *node;
+	unsigned long flags;
+
+	if (!handler)
+		return ERR_PTR(-EINVAL);
+
+	spin_lock_irqsave(&handler->lock, flags);
+	node = __mmu_rb_search(handler, addr, len);
+	spin_unlock_irqrestore(&handler->lock, flags);
+
+	return node;
+}
+
+void hfi1_mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node)
+{
+	struct mmu_rb_handler *handler = find_mmu_handler(root);
+	unsigned long flags;
+
+	if (!handler || !node)
+		return;
+
+	spin_lock_irqsave(&handler->lock, flags);
+	__mmu_rb_remove(handler, node, false);
+	spin_unlock_irqrestore(&handler->lock, flags);
+}
+
+static struct mmu_rb_handler *find_mmu_handler(struct rb_root *root)
+{
+	struct mmu_rb_handler *handler;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mmu_rb_lock, flags);
+	list_for_each_entry(handler, &mmu_rb_handlers, list) {
+		if (handler->root == root)
+			goto unlock;
+	}
+	handler = NULL;
+unlock:
+	spin_unlock_irqrestore(&mmu_rb_lock, flags);
+	return handler;
+}
+
+static inline void mmu_notifier_page(struct mmu_notifier *mn,
+				     struct mm_struct *mm, unsigned long addr)
+{
+	mmu_notifier_mem_invalidate(mn, addr, addr + PAGE_SIZE);
+}
+
+static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
+					    struct mm_struct *mm,
+					    unsigned long start,
+					    unsigned long end)
+{
+	mmu_notifier_mem_invalidate(mn, start, end);
+}
+
+static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
+					unsigned long start, unsigned long end)
+{
+	struct mmu_rb_handler *handler =
+		container_of(mn, struct mmu_rb_handler, mn);
+	struct rb_root *root = handler->root;
+	struct mmu_rb_node *node;
+	unsigned long flags;
+
+	spin_lock_irqsave(&handler->lock, flags);
+	for (node = __mmu_int_rb_iter_first(root, start, end - 1); node;
+	     node = __mmu_int_rb_iter_next(node, start, end - 1)) {
+		hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
+			  node->addr, node->len);
+		if (handler->ops->invalidate(root, node))
+			__mmu_rb_remove(handler, node, true);
+	}
+	spin_unlock_irqrestore(&handler->lock, flags);
+}
diff --git a/drivers/staging/rdma/hfi1/mmu_rb.h b/drivers/staging/rdma/hfi1/mmu_rb.h
new file mode 100644
index 000000000000..f8523fdb8a18
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/mmu_rb.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_MMU_RB_H
+#define _HFI1_MMU_RB_H
+
+#include "hfi.h"
+
+struct mmu_rb_node {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long __last;
+	struct rb_node node;
+};
+
+struct mmu_rb_ops {
+	bool (*filter)(struct mmu_rb_node *, unsigned long, unsigned long);
+	int (*insert)(struct rb_root *, struct mmu_rb_node *);
+	void (*remove)(struct rb_root *, struct mmu_rb_node *, bool);
+	int (*invalidate)(struct rb_root *, struct mmu_rb_node *);
+};
+
+int hfi1_mmu_rb_register(struct rb_root *root, struct mmu_rb_ops *ops);
+void hfi1_mmu_rb_unregister(struct rb_root *);
+int hfi1_mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
+void hfi1_mmu_rb_remove(struct rb_root *, struct mmu_rb_node *);
+struct mmu_rb_node *hfi1_mmu_rb_search(struct rb_root *, unsigned long,
+				       unsigned long);
+
+#endif /* _HFI1_MMU_RB_H */
diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c
deleted file mode 100644
index 38253212af7a..000000000000
--- a/drivers/staging/rdma/hfi1/mr.c
+++ /dev/null
@@ -1,473 +0,0 @@
-/*
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2015 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  - Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  - Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  - Neither the name of Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_smi.h>
-
-#include "hfi.h"
-
-/* Fast memory region */
-struct hfi1_fmr {
-	struct ib_fmr ibfmr;
-	struct hfi1_mregion mr;        /* must be last */
-};
-
-static inline struct hfi1_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-	return container_of(ibfmr, struct hfi1_fmr, ibfmr);
-}
-
-static int init_mregion(struct hfi1_mregion *mr, struct ib_pd *pd,
-			int count)
-{
-	int m, i = 0;
-	int rval = 0;
-
-	m = DIV_ROUND_UP(count, HFI1_SEGSZ);
-	for (; i < m; i++) {
-		mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
-		if (!mr->map[i])
-			goto bail;
-	}
-	mr->mapsz = m;
-	init_completion(&mr->comp);
-	/* count returning the ptr to user */
-	atomic_set(&mr->refcount, 1);
-	mr->pd = pd;
-	mr->max_segs = count;
-out:
-	return rval;
-bail:
-	while (i)
-		kfree(mr->map[--i]);
-	rval = -ENOMEM;
-	goto out;
-}
-
-static void deinit_mregion(struct hfi1_mregion *mr)
-{
-	int i = mr->mapsz;
-
-	mr->mapsz = 0;
-	while (i)
-		kfree(mr->map[--i]);
-}
-
-
-/**
- * hfi1_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see dma.c).
- */
-struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc)
-{
-	struct hfi1_mr *mr = NULL;
-	struct ib_mr *ret;
-	int rval;
-
-	if (to_ipd(pd)->user) {
-		ret = ERR_PTR(-EPERM);
-		goto bail;
-	}
-
-	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-	if (!mr) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	rval = init_mregion(&mr->mr, pd, 0);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail;
-	}
-
-
-	rval = hfi1_alloc_lkey(&mr->mr, 1);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail_mregion;
-	}
-
-	mr->mr.access_flags = acc;
-	ret = &mr->ibmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	goto done;
-}
-
-static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd)
-{
-	struct hfi1_mr *mr;
-	int rval = -ENOMEM;
-	int m;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = DIV_ROUND_UP(count, HFI1_SEGSZ);
-	mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
-	if (!mr)
-		goto bail;
-
-	rval = init_mregion(&mr->mr, pd, count);
-	if (rval)
-		goto bail;
-
-	rval = hfi1_alloc_lkey(&mr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	mr->ibmr.lkey = mr->mr.lkey;
-	mr->ibmr.rkey = mr->mr.lkey;
-done:
-	return mr;
-
-bail_mregion:
-	deinit_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	mr = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * hfi1_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			       u64 virt_addr, int mr_access_flags,
-			       struct ib_udata *udata)
-{
-	struct hfi1_mr *mr;
-	struct ib_umem *umem;
-	struct scatterlist *sg;
-	int n, m, entry;
-	struct ib_mr *ret;
-
-	if (length == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	umem = ib_umem_get(pd->uobject->context, start, length,
-			   mr_access_flags, 0);
-	if (IS_ERR(umem))
-		return (void *) umem;
-
-	n = umem->nmap;
-
-	mr = alloc_mr(n, pd);
-	if (IS_ERR(mr)) {
-		ret = (struct ib_mr *)mr;
-		ib_umem_release(umem);
-		goto bail;
-	}
-
-	mr->mr.user_base = start;
-	mr->mr.iova = virt_addr;
-	mr->mr.length = length;
-	mr->mr.offset = ib_umem_offset(umem);
-	mr->mr.access_flags = mr_access_flags;
-	mr->umem = umem;
-
-	if (is_power_of_2(umem->page_size))
-		mr->mr.page_shift = ilog2(umem->page_size);
-	m = 0;
-	n = 0;
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		void *vaddr;
-
-		vaddr = page_address(sg_page(sg));
-		if (!vaddr) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		mr->mr.map[m]->segs[n].vaddr = vaddr;
-		mr->mr.map[m]->segs[n].length = umem->page_size;
-		n++;
-		if (n == HFI1_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	ret = &mr->ibmr;
-
-bail:
-	return ret;
-}
-
-/**
- * hfi1_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by hfi1_get_dma_mr()
- * or hfi1_reg_user_mr().
- */
-int hfi1_dereg_mr(struct ib_mr *ibmr)
-{
-	struct hfi1_mr *mr = to_imr(ibmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	hfi1_free_lkey(&mr->mr);
-
-	hfi1_put_mr(&mr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&mr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		dd_dev_err(
-			dd_from_ibdev(mr->mr.pd->device),
-			"hfi1_dereg_mr timeout mr %p pd %p refcount %u\n",
-			mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
-		hfi1_get_mr(&mr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_mregion(&mr->mr);
-	if (mr->umem)
-		ib_umem_release(mr->umem);
-	kfree(mr);
-out:
-	return ret;
-}
-
-/*
- * Allocate a memory region usable with the
- * IB_WR_REG_MR send work request.
- *
- * Return the memory region on success, otherwise return an errno.
- * FIXME: IB_WR_REG_MR is not supported
- */
-struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
-			    enum ib_mr_type mr_type,
-			    u32 max_num_sg)
-{
-	struct hfi1_mr *mr;
-
-	if (mr_type != IB_MR_TYPE_MEM_REG)
-		return ERR_PTR(-EINVAL);
-
-	mr = alloc_mr(max_num_sg, pd);
-	if (IS_ERR(mr))
-		return (struct ib_mr *)mr;
-
-	return &mr->ibmr;
-}
-
-/**
- * hfi1_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			      struct ib_fmr_attr *fmr_attr)
-{
-	struct hfi1_fmr *fmr;
-	int m;
-	struct ib_fmr *ret;
-	int rval = -ENOMEM;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = DIV_ROUND_UP(fmr_attr->max_pages, HFI1_SEGSZ);
-	fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
-	if (!fmr)
-		goto bail;
-
-	rval = init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
-	if (rval)
-		goto bail;
-
-	/*
-	 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
-	 * rkey.
-	 */
-	rval = hfi1_alloc_lkey(&fmr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	fmr->ibfmr.rkey = fmr->mr.lkey;
-	fmr->ibfmr.lkey = fmr->mr.lkey;
-	/*
-	 * Resources are allocated but no valid mapping (RKEY can't be
-	 * used).
-	 */
-	fmr->mr.access_flags = mr_access_flags;
-	fmr->mr.max_segs = fmr_attr->max_pages;
-	fmr->mr.page_shift = fmr_attr->page_shift;
-
-	ret = &fmr->ibfmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_mregion(&fmr->mr);
-bail:
-	kfree(fmr);
-	ret = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * hfi1_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		      int list_len, u64 iova)
-{
-	struct hfi1_fmr *fmr = to_ifmr(ibfmr);
-	struct hfi1_lkey_table *rkt;
-	unsigned long flags;
-	int m, n, i;
-	u32 ps;
-	int ret;
-
-	i = atomic_read(&fmr->mr.refcount);
-	if (i > 2)
-		return -EBUSY;
-
-	if (list_len > fmr->mr.max_segs) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	rkt = &to_idev(ibfmr->device)->lk_table;
-	spin_lock_irqsave(&rkt->lock, flags);
-	fmr->mr.user_base = iova;
-	fmr->mr.iova = iova;
-	ps = 1 << fmr->mr.page_shift;
-	fmr->mr.length = list_len * ps;
-	m = 0;
-	n = 0;
-	for (i = 0; i < list_len; i++) {
-		fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
-		fmr->mr.map[m]->segs[n].length = ps;
-		if (++n == HFI1_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * hfi1_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int hfi1_unmap_fmr(struct list_head *fmr_list)
-{
-	struct hfi1_fmr *fmr;
-	struct hfi1_lkey_table *rkt;
-	unsigned long flags;
-
-	list_for_each_entry(fmr, fmr_list, ibfmr.list) {
-		rkt = &to_idev(fmr->ibfmr.device)->lk_table;
-		spin_lock_irqsave(&rkt->lock, flags);
-		fmr->mr.user_base = 0;
-		fmr->mr.iova = 0;
-		fmr->mr.length = 0;
-		spin_unlock_irqrestore(&rkt->lock, flags);
-	}
-	return 0;
-}
-
-/**
- * hfi1_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int hfi1_dealloc_fmr(struct ib_fmr *ibfmr)
-{
-	struct hfi1_fmr *fmr = to_ifmr(ibfmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	hfi1_free_lkey(&fmr->mr);
-	hfi1_put_mr(&fmr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&fmr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		hfi1_get_mr(&fmr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_mregion(&fmr->mr);
-	kfree(fmr);
-out:
-	return ret;
-}
diff --git a/drivers/staging/rdma/hfi1/opa_compat.h b/drivers/staging/rdma/hfi1/opa_compat.h
index f64eec1c2951..6ef3c1cbdcd7 100644
--- a/drivers/staging/rdma/hfi1/opa_compat.h
+++ b/drivers/staging/rdma/hfi1/opa_compat.h
@@ -1,14 +1,13 @@
 #ifndef _LINUX_H
 #define _LINUX_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -111,19 +108,4 @@ enum opa_port_phys_state {
 	/* values 12-15 are reserved/ignored */
 };
 
-/* OPA_PORT_TYPE_* definitions - these belong in opa_port_info.h */
-#define OPA_PORT_TYPE_UNKNOWN          0
-#define OPA_PORT_TYPE_DISCONNECTED     1
-/* port is not currently usable, CableInfo not available */
-#define OPA_PORT_TYPE_FIXED            2
-/* A fixed backplane port in a director class switch. All OPA ASICS */
-#define OPA_PORT_TYPE_VARIABLE         3
-/* A backplane port in a blade system, possibly mixed configuration */
-#define OPA_PORT_TYPE_STANDARD         4
-/* implies a SFF-8636 defined format for CableInfo (QSFP) */
-#define OPA_PORT_TYPE_SI_PHOTONICS      5
-/* A silicon photonics module implies TBD defined format for CableInfo
- * as defined by Intel SFO group */
-/* 6 - 15 are reserved */
-
 #endif /* _LINUX_H */
diff --git a/drivers/staging/rdma/hfi1/pcie.c b/drivers/staging/rdma/hfi1/pcie.c
index 47ca6314e328..0bac21e6a658 100644
--- a/drivers/staging/rdma/hfi1/pcie.c
+++ b/drivers/staging/rdma/hfi1/pcie.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -57,6 +54,7 @@
 
 #include "hfi.h"
 #include "chip_registers.h"
+#include "aspm.h"
 
 /* link speed vector for Gen3 speed - not in Linux headers */
 #define GEN1_SPEED_VECTOR 0x1
@@ -122,8 +120,9 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 			goto bail;
 		}
 		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-	} else
+	} else {
 		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	}
 	if (ret) {
 		hfi1_early_err(&pdev->dev,
 			       "Unable to set DMA consistent mask: %d\n", ret);
@@ -131,13 +130,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	pci_set_master(pdev);
-	ret = pci_enable_pcie_error_reporting(pdev);
-	if (ret) {
-		hfi1_early_err(&pdev->dev,
-			       "Unable to enable pcie error reporting: %d\n",
-			      ret);
-		ret = 0;
-	}
+	(void)pci_enable_pcie_error_reporting(pdev);
 	goto done;
 
 bail:
@@ -222,10 +215,9 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev,
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl);
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl);
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
-							&dd->pcie_devctl2);
+				  &dd->pcie_devctl2);
 	pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
-	pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-							&dd->pci_lnkctl3);
+	pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3);
 	pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
 
 	return 0;
@@ -238,7 +230,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev,
  */
 void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
 {
-	u64 __iomem *base = (void __iomem *) dd->kregbase;
+	u64 __iomem *base = (void __iomem *)dd->kregbase;
 
 	dd->flags &= ~HFI1_PRESENT;
 	dd->kregbase = NULL;
@@ -274,7 +266,7 @@ void hfi1_pcie_flr(struct hfi1_devdata *dd)
 
 clear:
 	pcie_capability_set_word(dd->pcidev, PCI_EXP_DEVCTL,
-						PCI_EXP_DEVCTL_BCR_FLR);
+				 PCI_EXP_DEVCTL_BCR_FLR);
 	/* PCIe spec requires the function to be back within 100ms */
 	msleep(100);
 }
@@ -287,9 +279,11 @@ static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt,
 	struct msix_entry *msix_entry;
 	int i;
 
-	/* We can't pass hfi1_msix_entry array to msix_setup
+	/*
+	 * We can't pass hfi1_msix_entry array to msix_setup
 	 * so use a dummy msix_entry array and copy the allocated
-	 * irq back to the hfi1_msix_entry array. */
+	 * irq back to the hfi1_msix_entry array.
+	 */
 	msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL);
 	if (!msix_entry) {
 		ret = -ENOMEM;
@@ -319,7 +313,6 @@ do_intx:
 		   nvec, ret);
 	*msixcnt = 0;
 	hfi1_enable_intx(dd->pcidev);
-
 }
 
 /* return the PCIe link speed from the given link status */
@@ -367,6 +360,7 @@ static void update_lbus_info(struct hfi1_devdata *dd)
 int pcie_speeds(struct hfi1_devdata *dd)
 {
 	u32 linkcap;
+	struct pci_dev *parent = dd->pcidev->bus->self;
 
 	if (!pci_is_pcie(dd->pcidev)) {
 		dd_dev_err(dd, "Can't find PCI Express capability!\n");
@@ -379,15 +373,15 @@ int pcie_speeds(struct hfi1_devdata *dd)
 	pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
 	if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) {
 		dd_dev_info(dd,
-			"This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
-			linkcap & PCI_EXP_LNKCAP_SLS);
+			    "This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
+			    linkcap & PCI_EXP_LNKCAP_SLS);
 		dd->link_gen3_capable = 0;
 	}
 
 	/*
 	 * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
 	 */
-	if (dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+	if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
 		dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
 		dd->link_gen3_capable = 0;
 	}
@@ -395,9 +389,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
 	/* obtain the link width and current speed */
 	update_lbus_info(dd);
 
-	/* check against expected pcie width and complain if "wrong" */
-	if (dd->lbus_width < 16)
-		dd_dev_err(dd, "PCIe width %u (x16 HFI)\n", dd->lbus_width);
+	dd_dev_info(dd, "%s\n", dd->lbus_info);
 
 	return 0;
 }
@@ -436,23 +428,18 @@ void hfi1_enable_intx(struct pci_dev *pdev)
 void restore_pci_variables(struct hfi1_devdata *dd)
 {
 	pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
-	pci_write_config_dword(dd->pcidev,
-				PCI_BASE_ADDRESS_0, dd->pcibar0);
-	pci_write_config_dword(dd->pcidev,
-				PCI_BASE_ADDRESS_1, dd->pcibar1);
-	pci_write_config_dword(dd->pcidev,
-				PCI_ROM_ADDRESS, dd->pci_rom);
+	pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0);
+	pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1);
+	pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
-							dd->pcie_devctl2);
+				   dd->pcie_devctl2);
 	pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
-	pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-							dd->pci_lnkctl3);
+	pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3);
 	pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
 }
 
-
 /*
  * BIOS may not set PCIe bus-utilization parameters for best performance.
  * Check and optionally adjust them to maximize our throughput.
@@ -461,6 +448,10 @@ static int hfi1_pcie_caps;
 module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
 MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
 
+uint aspm_mode = ASPM_MODE_DISABLED;
+module_param_named(aspm, aspm_mode, uint, S_IRUGO);
+MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
+
 static void tune_pcie_caps(struct hfi1_devdata *dd)
 {
 	struct pci_dev *parent;
@@ -479,6 +470,12 @@ static void tune_pcie_caps(struct hfi1_devdata *dd)
 	}
 	/* Find out supported and configured values for parent (root) */
 	parent = dd->pcidev->bus->self;
+	/*
+	 * The driver cannot perform the tuning if it does not have
+	 * access to the upstream component.
+	 */
+	if (!parent)
+		return;
 	if (!pci_is_root_bus(parent->bus)) {
 		dd_dev_info(dd, "Parent not root\n");
 		return;
@@ -532,6 +529,7 @@ static void tune_pcie_caps(struct hfi1_devdata *dd)
 		pcie_set_readrq(dd->pcidev, ep_mrrs);
 	}
 }
+
 /* End of PCIe capability tuning */
 
 /*
@@ -746,21 +744,22 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
 		c0 = fs - (eq[i][PREC] / div) - (eq[i][POST] / div);
 		c_plus1 = eq[i][POST] / div;
 		pci_write_config_dword(pdev, PCIE_CFG_REG_PL102,
-			eq_value(c_minus1, c0, c_plus1));
+				       eq_value(c_minus1, c0, c_plus1));
 		/* check if these coefficients violate EQ rules */
 		pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105,
-								&violation);
+				      &violation);
 		if (violation
 		    & PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){
 			if (hit_error == 0) {
 				dd_dev_err(dd,
-					"Gen3 EQ Table Coefficient rule violations\n");
+					   "Gen3 EQ Table Coefficient rule violations\n");
 				dd_dev_err(dd, "         prec   attn   post\n");
 			}
 			dd_dev_err(dd, "   p%02d:   %02x     %02x     %02x\n",
-				i, (u32)eq[i][0], (u32)eq[i][1], (u32)eq[i][2]);
+				   i, (u32)eq[i][0], (u32)eq[i][1],
+				   (u32)eq[i][2]);
 			dd_dev_err(dd, "            %02x     %02x     %02x\n",
-				(u32)c_minus1, (u32)c0, (u32)c_plus1);
+				   (u32)c_minus1, (u32)c0, (u32)c_plus1);
 			hit_error = 1;
 		}
 	}
@@ -772,7 +771,7 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
 /*
  * Steps to be done after the PCIe firmware is downloaded and
  * before the SBR for the Pcie Gen3.
- * The hardware mutex is already being held.
+ * The SBus resource is already being held.
  */
 static void pcie_post_steps(struct hfi1_devdata *dd)
 {
@@ -815,8 +814,8 @@ static int trigger_sbr(struct hfi1_devdata *dd)
 	list_for_each_entry(pdev, &dev->bus->devices, bus_list)
 		if (pdev != dev) {
 			dd_dev_err(dd,
-				"%s: another device is on the same bus\n",
-				__func__);
+				   "%s: another device is on the same bus\n",
+				   __func__);
 			return -ENOTTY;
 		}
 
@@ -840,8 +839,8 @@ static void write_gasket_interrupt(struct hfi1_devdata *dd, int index,
 				   u16 code, u16 data)
 {
 	write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (index * 8),
-	    (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT)
-	    |((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT)));
+		  (((u64)code << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_CODE_SHIFT) |
+		   ((u64)data << ASIC_PCIE_SD_INTRPT_LIST_INTRPT_DATA_SHIFT)));
 }
 
 /*
@@ -851,14 +850,13 @@ static void arm_gasket_logic(struct hfi1_devdata *dd)
 {
 	u64 reg;
 
-	reg = (((u64)1 << dd->hfi1_id)
-			<< ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT)
-		| ((u64)pcie_serdes_broadcast[dd->hfi1_id]
-			<< ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT
-		| ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK
-		| ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK)
-			<< ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT
-		);
+	reg = (((u64)1 << dd->hfi1_id) <<
+	       ASIC_PCIE_SD_HOST_CMD_INTRPT_CMD_SHIFT) |
+	      ((u64)pcie_serdes_broadcast[dd->hfi1_id] <<
+	       ASIC_PCIE_SD_HOST_CMD_SBUS_RCVR_ADDR_SHIFT |
+	       ASIC_PCIE_SD_HOST_CMD_SBR_MODE_SMASK |
+	       ((u64)SBR_DELAY_US & ASIC_PCIE_SD_HOST_CMD_TIMER_MASK) <<
+	       ASIC_PCIE_SD_HOST_CMD_TIMER_SHIFT);
 	write_csr(dd, ASIC_PCIE_SD_HOST_CMD, reg);
 	/* read back to push the write */
 	read_csr(dd, ASIC_PCIE_SD_HOST_CMD);
@@ -946,7 +944,7 @@ static void write_xmt_margin(struct hfi1_devdata *dd, const char *fname)
  */
 int do_pcie_gen3_transition(struct hfi1_devdata *dd)
 {
-	struct pci_dev *parent;
+	struct pci_dev *parent = dd->pcidev->bus->self;
 	u64 fw_ctrl;
 	u64 reg, therm;
 	u32 reg32, fs, lf;
@@ -955,8 +953,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
 	int do_retry, retry_count = 0;
 	uint default_pset;
 	u16 target_vector, target_speed;
-	u16 lnkctl, lnkctl2, vendor;
-	u8 nsbr = 1;
+	u16 lnkctl2, vendor;
 	u8 div;
 	const u8 (*eq)[3];
 	int return_error = 0;
@@ -983,17 +980,21 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
 	/* if already at target speed, done (unless forced) */
 	if (dd->lbus_speed == target_speed) {
 		dd_dev_info(dd, "%s: PCIe already at gen%d, %s\n", __func__,
-			pcie_target,
-			pcie_force ? "re-doing anyway" : "skipping");
+			    pcie_target,
+			    pcie_force ? "re-doing anyway" : "skipping");
 		if (!pcie_force)
 			return 0;
 	}
 
 	/*
-	 * A0 needs an additional SBR
+	 * The driver cannot do the transition if it has no access to the
+	 * upstream component
 	 */
-	if (is_ax(dd))
-		nsbr++;
+	if (!parent) {
+		dd_dev_info(dd, "%s: No upstream, Can't do gen3 transition\n",
+			    __func__);
+		return 0;
+	}
 
 	/*
 	 * Do the Gen3 transition.  Steps are those of the PCIe Gen3
@@ -1009,10 +1010,13 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
 		goto done_no_mutex;
 	}
 
-	/* hold the HW mutex across the firmware download and SBR */
-	ret = acquire_hw_mutex(dd);
-	if (ret)
+	/* hold the SBus resource across the firmware download and SBR */
+	ret = acquire_chip_resource(dd, CR_SBUS, SBUS_TIMEOUT);
+	if (ret) {
+		dd_dev_err(dd, "%s: unable to acquire SBus resource\n",
+			   __func__);
 		return ret;
+	}
 
 	/* make sure thermal polling is not causing interrupts */
 	therm = read_csr(dd, ASIC_CFG_THERM_POLL_EN);
@@ -1030,8 +1034,11 @@ retry:
 	/* step 4: download PCIe Gen3 SerDes firmware */
 	dd_dev_info(dd, "%s: downloading firmware\n", __func__);
 	ret = load_pcie_firmware(dd);
-	if (ret)
+	if (ret) {
+		/* do not proceed if the firmware cannot be downloaded */
+		return_error = 1;
 		goto done;
+	}
 
 	/* step 5: set up device parameter settings */
 	dd_dev_info(dd, "%s: setting PCIe registers\n", __func__);
@@ -1091,8 +1098,10 @@ retry:
 		default_pset = DEFAULT_MCP_PSET;
 	}
 	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL101,
-		(fs << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT)
-		| (lf << PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT));
+			       (fs <<
+				PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_FS_SHIFT) |
+			       (lf <<
+				PCIE_CFG_REG_PL101_GEN3_EQ_LOCAL_LF_SHIFT));
 	ret = load_eq_table(dd, eq, fs, div);
 	if (ret)
 		goto done;
@@ -1106,15 +1115,15 @@ retry:
 		pcie_pset = default_pset;
 	if (pcie_pset > 10) {	/* valid range is 0-10, inclusive */
 		dd_dev_err(dd, "%s: Invalid Eq Pset %u, setting to %d\n",
-			__func__, pcie_pset, default_pset);
+			   __func__, pcie_pset, default_pset);
 		pcie_pset = default_pset;
 	}
 	dd_dev_info(dd, "%s: using EQ Pset %u\n", __func__, pcie_pset);
 	pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL106,
-		((1 << pcie_pset)
-			<< PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT)
-		| PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK
-		| PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
+			       ((1 << pcie_pset) <<
+			PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT) |
+			PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK |
+			PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
 
 	/*
 	 * step 5b: Do post firmware download steps via SBus
@@ -1142,11 +1151,12 @@ retry:
 	 */
 	write_xmt_margin(dd, __func__);
 
-	/* step 5e: disable active state power management (ASPM) */
+	/*
+	 * step 5e: disable active state power management (ASPM). It
+	 * will be enabled if required later
+	 */
 	dd_dev_info(dd, "%s: clearing ASPM\n", __func__);
-	pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &lnkctl);
-	lnkctl &= ~PCI_EXP_LNKCTL_ASPMC;
-	pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, lnkctl);
+	aspm_hw_disable_l1(dd);
 
 	/*
 	 * step 5f: clear DirectSpeedChange
@@ -1165,16 +1175,15 @@ retry:
 	 * that it is Gen3 capable earlier.
 	 */
 	dd_dev_info(dd, "%s: setting parent target link speed\n", __func__);
-	parent = dd->pcidev->bus->self;
 	pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
 	dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
-		(u32)lnkctl2);
+		    (u32)lnkctl2);
 	/* only write to parent if target is not as high as ours */
 	if ((lnkctl2 & LNKCTL2_TARGET_LINK_SPEED_MASK) < target_vector) {
 		lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
 		lnkctl2 |= target_vector;
 		dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
-			(u32)lnkctl2);
+			    (u32)lnkctl2);
 		pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2);
 	} else {
 		dd_dev_info(dd, "%s: ..target speed is OK\n", __func__);
@@ -1183,17 +1192,17 @@ retry:
 	dd_dev_info(dd, "%s: setting target link speed\n", __func__);
 	pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
 	dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
-		(u32)lnkctl2);
+		    (u32)lnkctl2);
 	lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
 	lnkctl2 |= target_vector;
 	dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
-		(u32)lnkctl2);
+		    (u32)lnkctl2);
 	pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
 
 	/* step 5h: arm gasket logic */
 	/* hold DC in reset across the SBR */
 	write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
-	(void) read_csr(dd, CCE_DC_CTRL); /* DC reset hold */
+	(void)read_csr(dd, CCE_DC_CTRL); /* DC reset hold */
 	/* save firmware control across the SBR */
 	fw_ctrl = read_csr(dd, MISC_CFG_FW_CTRL);
 
@@ -1224,8 +1233,8 @@ retry:
 	ret = pci_read_config_word(dd->pcidev, PCI_VENDOR_ID, &vendor);
 	if (ret) {
 		dd_dev_info(dd,
-			"%s: read of VendorID failed after SBR, err %d\n",
-			__func__, ret);
+			    "%s: read of VendorID failed after SBR, err %d\n",
+			    __func__, ret);
 		return_error = 1;
 		goto done;
 	}
@@ -1265,8 +1274,7 @@ retry:
 	write_csr(dd, CCE_DC_CTRL, 0);
 
 	/* Set the LED off */
-	if (is_ax(dd))
-		setextled(dd, 0);
+	setextled(dd, 0);
 
 	/* check for any per-lane errors */
 	pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
@@ -1277,8 +1285,8 @@ retry:
 			& ASIC_PCIE_SD_HOST_STATUS_FW_DNLD_STS_MASK;
 	if ((status & (1 << dd->hfi1_id)) == 0) {
 		dd_dev_err(dd,
-			"%s: gasket status 0x%x, expecting 0x%x\n",
-			__func__, status, 1 << dd->hfi1_id);
+			   "%s: gasket status 0x%x, expecting 0x%x\n",
+			   __func__, status, 1 << dd->hfi1_id);
 		ret = -EIO;
 		goto done;
 	}
@@ -1295,13 +1303,13 @@ retry:
 	/* update our link information cache */
 	update_lbus_info(dd);
 	dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
-		dd->lbus_info);
+		    dd->lbus_info);
 
 	if (dd->lbus_speed != target_speed) { /* not target */
 		/* maybe retry */
 		do_retry = retry_count < pcie_retry;
 		dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
-			pcie_target, do_retry ? ", retrying" : "");
+			   pcie_target, do_retry ? ", retrying" : "");
 		retry_count++;
 		if (do_retry) {
 			msleep(100); /* allow time to settle */
@@ -1317,7 +1325,7 @@ done:
 		dd_dev_info(dd, "%s: Re-enable therm polling\n",
 			    __func__);
 	}
-	release_hw_mutex(dd);
+	release_chip_resource(dd, CR_SBUS);
 done_no_mutex:
 	/* return no error if it is OK to be at current speed */
 	if (ret && !return_error) {
diff --git a/drivers/staging/rdma/hfi1/pio.c b/drivers/staging/rdma/hfi1/pio.c
index b51a4416312b..c6849ce9e5eb 100644
--- a/drivers/staging/rdma/hfi1/pio.c
+++ b/drivers/staging/rdma/hfi1/pio.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -101,7 +98,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
 	/* Fall through */
 	case PSC_DATA_VL_ENABLE:
 		/* Disallow sending on VLs not enabled */
-		mask = (((~0ull)<<num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK)<<
+		mask = (((~0ull) << num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
 				SEND_CTRL_UNSUPPORTED_VL_SHIFT;
 		reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask;
 		break;
@@ -130,7 +127,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
 	if (write) {
 		write_csr(dd, SEND_CTRL, reg);
 		if (flush)
-			(void) read_csr(dd, SEND_CTRL); /* flush write */
+			(void)read_csr(dd, SEND_CTRL); /* flush write */
 	}
 
 	spin_unlock_irqrestore(&dd->sendctrl_lock, flags);
@@ -177,8 +174,10 @@ static struct mem_pool_config sc_mem_pool_config[NUM_SC_POOLS] = {
 
 /* memory pool information, used when calculating final sizes */
 struct mem_pool_info {
-	int centipercent;	/* 100th of 1% of memory to use, -1 if blocks
-				   already set */
+	int centipercent;	/*
+				 * 100th of 1% of memory to use, -1 if blocks
+				 * already set
+				 */
 	int count;		/* count of contexts in the pool */
 	int blocks;		/* block size of the pool */
 	int size;		/* context size, in blocks */
@@ -312,7 +311,7 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
 		if (i == SC_ACK) {
 			count = dd->n_krcv_queues;
 		} else if (i == SC_KERNEL) {
-			count = num_vls + 1 /* VL15 */;
+			count = (INIT_SC_PER_VL * num_vls) + 1 /* VL15 */;
 		} else if (count == SCC_PER_CPU) {
 			count = dd->num_rcv_contexts - dd->n_krcv_queues;
 		} else if (count < 0) {
@@ -509,7 +508,7 @@ static void sc_hw_free(struct hfi1_devdata *dd, u32 sw_index, u32 hw_context)
 	sci = &dd->send_contexts[sw_index];
 	if (!sci->allocated) {
 		dd_dev_err(dd, "%s: sw_index %u not allocated? hw_context %u\n",
-			__func__, sw_index, hw_context);
+			   __func__, sw_index, hw_context);
 	}
 	sci->allocated = 0;
 	dd->hw_to_sw[hw_context] = INVALID_SCI;
@@ -625,7 +624,7 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold)
 				& SC(CREDIT_CTRL_THRESHOLD_MASK))
 			   << SC(CREDIT_CTRL_THRESHOLD_SHIFT));
 		write_kctxt_csr(sc->dd, sc->hw_context,
-			SC(CREDIT_CTRL), sc->credit_ctrl);
+				SC(CREDIT_CTRL), sc->credit_ctrl);
 
 		/* force a credit return on change to avoid a possible stall */
 		force_return = 1;
@@ -700,7 +699,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
 	if (dd->flags & HFI1_FROZEN)
 		return NULL;
 
-	sc = kzalloc_node(sizeof(struct send_context), GFP_KERNEL, numa);
+	sc = kzalloc_node(sizeof(*sc), GFP_KERNEL, numa);
 	if (!sc)
 		return NULL;
 
@@ -763,9 +762,9 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
 
 	/* set the default partition key */
 	write_kctxt_csr(dd, hw_context, SC(CHECK_PARTITION_KEY),
-		(DEFAULT_PKEY &
-			SC(CHECK_PARTITION_KEY_VALUE_MASK))
-		    << SC(CHECK_PARTITION_KEY_VALUE_SHIFT));
+			(SC(CHECK_PARTITION_KEY_VALUE_MASK) &
+			 DEFAULT_PKEY) <<
+			SC(CHECK_PARTITION_KEY_VALUE_SHIFT));
 
 	/* per context type checks */
 	if (type == SC_USER) {
@@ -778,8 +777,8 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
 
 	/* set the send context check opcode mask and value */
 	write_kctxt_csr(dd, hw_context, SC(CHECK_OPCODE),
-		((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) |
-		((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
+			((u64)opmask << SC(CHECK_OPCODE_MASK_SHIFT)) |
+			((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
 
 	/* set up credit return */
 	reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
@@ -797,7 +796,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
 		thresh = sc_percent_to_threshold(sc, 50);
 	} else if (type == SC_USER) {
 		thresh = sc_percent_to_threshold(sc,
-				user_credit_return_threshold);
+						 user_credit_return_threshold);
 	} else { /* kernel */
 		thresh = sc_mtu_to_threshold(sc, hfi1_max_mtu, hdrqentsize);
 	}
@@ -852,7 +851,6 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
 		  sc->credit_ctrl,
 		  thresh);
 
-
 	return sc;
 }
 
@@ -971,11 +969,11 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
 		if (loop > 500) {
 			/* timed out - bounce the link */
 			dd_dev_err(dd,
-				"%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
-				__func__, sc->sw_index,
-				sc->hw_context, (u32)reg);
+				   "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
+				   __func__, sc->sw_index,
+				   sc->hw_context, (u32)reg);
 			queue_work(dd->pport->hfi1_wq,
-				&dd->pport->link_bounce_work);
+				   &dd->pport->link_bounce_work);
 			break;
 		}
 		loop++;
@@ -1021,7 +1019,7 @@ int sc_restart(struct send_context *sc)
 		return -EINVAL;
 
 	dd_dev_info(dd, "restarting send context %u(%u)\n", sc->sw_index,
-		sc->hw_context);
+		    sc->hw_context);
 
 	/*
 	 * Step 1: Wait for the context to actually halt.
@@ -1036,7 +1034,7 @@ int sc_restart(struct send_context *sc)
 			break;
 		if (loop > 100) {
 			dd_dev_err(dd, "%s: context %u(%u) not halting, skipping\n",
-				__func__, sc->sw_index, sc->hw_context);
+				   __func__, sc->sw_index, sc->hw_context);
 			return -ETIME;
 		}
 		loop++;
@@ -1062,9 +1060,9 @@ int sc_restart(struct send_context *sc)
 				break;
 			if (loop > 100) {
 				dd_dev_err(dd,
-					"%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n",
-					__func__, sc->sw_index,
-					sc->hw_context, count);
+					   "%s: context %u(%u) timeout waiting for PIO buffers to zero, remaining %d\n",
+					   __func__, sc->sw_index,
+					   sc->hw_context, count);
 			}
 			loop++;
 			udelay(1);
@@ -1177,18 +1175,18 @@ void pio_reset_all(struct hfi1_devdata *dd)
 	if (ret == -EIO) {
 		/* clear the error */
 		write_csr(dd, SEND_PIO_ERR_CLEAR,
-			SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK);
+			  SEND_PIO_ERR_CLEAR_PIO_INIT_SM_IN_ERR_SMASK);
 	}
 
 	/* reset init all */
 	write_csr(dd, SEND_PIO_INIT_CTXT,
-			SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK);
+		  SEND_PIO_INIT_CTXT_PIO_ALL_CTXT_INIT_SMASK);
 	udelay(2);
 	ret = pio_init_wait_progress(dd);
 	if (ret < 0) {
 		dd_dev_err(dd,
-			"PIO send context init %s while initializing all PIO blocks\n",
-			ret == -ETIMEDOUT ? "is stuck" : "had an error");
+			   "PIO send context init %s while initializing all PIO blocks\n",
+			   ret == -ETIMEDOUT ? "is stuck" : "had an error");
 	}
 }
 
@@ -1236,8 +1234,7 @@ int sc_enable(struct send_context *sc)
 	 */
 	reg = read_kctxt_csr(dd, sc->hw_context, SC(ERR_STATUS));
 	if (reg)
-		write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR),
-			reg);
+		write_kctxt_csr(dd, sc->hw_context, SC(ERR_CLEAR), reg);
 
 	/*
 	 * The HW PIO initialization engine can handle only one init
@@ -1295,7 +1292,7 @@ void sc_return_credits(struct send_context *sc)
 
 	/* a 0->1 transition schedules a credit return */
 	write_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_FORCE),
-		SC(CREDIT_FORCE_FORCE_RETURN_SMASK));
+			SC(CREDIT_FORCE_FORCE_RETURN_SMASK));
 	/*
 	 * Ensure that the write is flushed and the credit return is
 	 * scheduled. We care more about the 0 -> 1 transition.
@@ -1321,7 +1318,7 @@ void sc_drop(struct send_context *sc)
 		return;
 
 	dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n",
-			__func__, sc->sw_index, sc->hw_context);
+		    __func__, sc->sw_index, sc->hw_context);
 }
 
 /*
@@ -1346,7 +1343,7 @@ void sc_stop(struct send_context *sc, int flag)
 	wake_up(&sc->halt_wait);
 }
 
-#define BLOCK_DWORDS (PIO_BLOCK_SIZE/sizeof(u32))
+#define BLOCK_DWORDS (PIO_BLOCK_SIZE / sizeof(u32))
 #define dwords_to_blocks(x) DIV_ROUND_UP(x, BLOCK_DWORDS)
 
 /*
@@ -1430,8 +1427,10 @@ retry:
 	next = head + 1;
 	if (next >= sc->sr_size)
 		next = 0;
-	/* update the head - must be last! - the releaser can look at fields
-	   in pbuf once we move the head */
+	/*
+	 * update the head - must be last! - the releaser can look at fields
+	 * in pbuf once we move the head
+	 */
 	smp_wmb();
 	sc->sr_head = next;
 	spin_unlock_irqrestore(&sc->alloc_lock, flags);
@@ -1469,7 +1468,7 @@ void sc_add_credit_return_intr(struct send_context *sc)
 	if (sc->credit_intr_count == 0) {
 		sc->credit_ctrl |= SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
 		write_kctxt_csr(sc->dd, sc->hw_context,
-			SC(CREDIT_CTRL), sc->credit_ctrl);
+				SC(CREDIT_CTRL), sc->credit_ctrl);
 	}
 	sc->credit_intr_count++;
 	spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
@@ -1491,7 +1490,7 @@ void sc_del_credit_return_intr(struct send_context *sc)
 	if (sc->credit_intr_count == 0) {
 		sc->credit_ctrl &= ~SC(CREDIT_CTRL_CREDIT_INTR_SMASK);
 		write_kctxt_csr(sc->dd, sc->hw_context,
-			SC(CREDIT_CTRL), sc->credit_ctrl);
+				SC(CREDIT_CTRL), sc->credit_ctrl);
 	}
 	spin_unlock_irqrestore(&sc->credit_ctrl_lock, flags);
 }
@@ -1526,8 +1525,9 @@ static void sc_piobufavail(struct send_context *sc)
 	struct hfi1_devdata *dd = sc->dd;
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
 	struct list_head *list;
-	struct hfi1_qp *qps[PIO_WAIT_BATCH_SIZE];
-	struct hfi1_qp *qp;
+	struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE];
+	struct rvt_qp *qp;
+	struct hfi1_qp_priv *priv;
 	unsigned long flags;
 	unsigned i, n = 0;
 
@@ -1545,24 +1545,28 @@ static void sc_piobufavail(struct send_context *sc)
 		struct iowait *wait;
 
 		if (n == ARRAY_SIZE(qps))
-			goto full;
+			break;
 		wait = list_first_entry(list, struct iowait, list);
-		qp = container_of(wait, struct hfi1_qp, s_iowait);
-		list_del_init(&qp->s_iowait.list);
+		qp = iowait_to_qp(wait);
+		priv = qp->priv;
+		list_del_init(&priv->s_iowait.list);
 		/* refcount held until actual wake up */
 		qps[n++] = qp;
 	}
 	/*
-	 * Counting: only call wantpiobuf_intr() if there were waiters and they
-	 * are now all gone.
+	 * If there had been waiters and there are more
+	 * insure that we redo the force to avoid a potential hang.
 	 */
-	if (n)
+	if (n) {
 		hfi1_sc_wantpiobuf_intr(sc, 0);
-full:
+		if (!list_empty(list))
+			hfi1_sc_wantpiobuf_intr(sc, 1);
+	}
 	write_sequnlock_irqrestore(&dev->iowait_lock, flags);
 
 	for (i = 0; i < n; i++)
-		hfi1_qp_wakeup(qps[i], HFI1_S_WAIT_PIO);
+		hfi1_qp_wakeup(qps[i],
+			       RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
 }
 
 /* translate a send credit update to a bit code of reasons */
@@ -1661,7 +1665,7 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context)
 	sw_index = dd->hw_to_sw[hw_context];
 	if (unlikely(sw_index >= dd->num_send_contexts)) {
 		dd_dev_err(dd, "%s: invalid hw (%u) to sw (%u) mapping\n",
-			__func__, hw_context, sw_index);
+			   __func__, hw_context, sw_index);
 		goto done;
 	}
 	sc = dd->send_contexts[sw_index].sc;
@@ -1674,8 +1678,8 @@ void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context)
 		sw_index = dd->hw_to_sw[gc];
 		if (unlikely(sw_index >= dd->num_send_contexts)) {
 			dd_dev_err(dd,
-				"%s: invalid hw (%u) to sw (%u) mapping\n",
-				__func__, hw_context, sw_index);
+				   "%s: invalid hw (%u) to sw (%u) mapping\n",
+				   __func__, hw_context, sw_index);
 			continue;
 		}
 		sc_release_update(dd->send_contexts[sw_index].sc);
@@ -1684,11 +1688,217 @@ done:
 	spin_unlock(&dd->sc_lock);
 }
 
+/*
+ * pio_select_send_context_vl() - select send context
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @vl: this vl
+ *
+ * This function returns a send context based on the selector and a vl.
+ * The mapping fields are protected by RCU
+ */
+struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd,
+						u32 selector, u8 vl)
+{
+	struct pio_vl_map *m;
+	struct pio_map_elem *e;
+	struct send_context *rval;
+
+	/*
+	 * NOTE This should only happen if SC->VL changed after the initial
+	 * checks on the QP/AH
+	 * Default will return VL0's send context below
+	 */
+	if (unlikely(vl >= num_vls)) {
+		rval = NULL;
+		goto done;
+	}
+
+	rcu_read_lock();
+	m = rcu_dereference(dd->pio_map);
+	if (unlikely(!m)) {
+		rcu_read_unlock();
+		return dd->vld[0].sc;
+	}
+	e = m->map[vl & m->mask];
+	rval = e->ksc[selector & e->mask];
+	rcu_read_unlock();
+
+done:
+	rval = !rval ? dd->vld[0].sc : rval;
+	return rval;
+}
+
+/*
+ * pio_select_send_context_sc() - select send context
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @sc5: the 5 bit sc
+ *
+ * This function returns an send context based on the selector and an sc
+ */
+struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd,
+						u32 selector, u8 sc5)
+{
+	u8 vl = sc_to_vlt(dd, sc5);
+
+	return pio_select_send_context_vl(dd, selector, vl);
+}
+
+/*
+ * Free the indicated map struct
+ */
+static void pio_map_free(struct pio_vl_map *m)
+{
+	int i;
+
+	for (i = 0; m && i < m->actual_vls; i++)
+		kfree(m->map[i]);
+	kfree(m);
+}
+
+/*
+ * Handle RCU callback
+ */
+static void pio_map_rcu_callback(struct rcu_head *list)
+{
+	struct pio_vl_map *m = container_of(list, struct pio_vl_map, list);
+
+	pio_map_free(m);
+}
+
+/*
+ * pio_map_init - called when #vls change
+ * @dd: hfi1_devdata
+ * @port: port number
+ * @num_vls: number of vls
+ * @vl_scontexts: per vl send context mapping (optional)
+ *
+ * This routine changes the mapping based on the number of vls.
+ *
+ * vl_scontexts is used to specify a non-uniform vl/send context
+ * loading. NULL implies auto computing the loading and giving each
+ * VL an uniform distribution of send contexts per VL.
+ *
+ * The auto algorithm computers the sc_per_vl and the number of extra
+ * send contexts. Any extra send contexts are added from the last VL
+ * on down
+ *
+ * rcu locking is used here to control access to the mapping fields.
+ *
+ * If either the num_vls or num_send_contexts are non-power of 2, the
+ * array sizes in the struct pio_vl_map and the struct pio_map_elem are
+ * rounded up to the next highest power of 2 and the first entry is
+ * reused in a round robin fashion.
+ *
+ * If an error occurs the map change is not done and the mapping is not
+ * chaged.
+ *
+ */
+int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
+{
+	int i, j;
+	int extra, sc_per_vl;
+	int scontext = 1;
+	int num_kernel_send_contexts = 0;
+	u8 lvl_scontexts[OPA_MAX_VLS];
+	struct pio_vl_map *oldmap, *newmap;
+
+	if (!vl_scontexts) {
+		/* send context 0 reserved for VL15 */
+		for (i = 1; i < dd->num_send_contexts; i++)
+			if (dd->send_contexts[i].type == SC_KERNEL)
+				num_kernel_send_contexts++;
+		/* truncate divide */
+		sc_per_vl = num_kernel_send_contexts / num_vls;
+		/* extras */
+		extra = num_kernel_send_contexts % num_vls;
+		vl_scontexts = lvl_scontexts;
+		/* add extras from last vl down */
+		for (i = num_vls - 1; i >= 0; i--, extra--)
+			vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0);
+	}
+	/* build new map */
+	newmap = kzalloc(sizeof(*newmap) +
+			 roundup_pow_of_two(num_vls) *
+			 sizeof(struct pio_map_elem *),
+			 GFP_KERNEL);
+	if (!newmap)
+		goto bail;
+	newmap->actual_vls = num_vls;
+	newmap->vls = roundup_pow_of_two(num_vls);
+	newmap->mask = (1 << ilog2(newmap->vls)) - 1;
+	for (i = 0; i < newmap->vls; i++) {
+		/* save for wrap around */
+		int first_scontext = scontext;
+
+		if (i < newmap->actual_vls) {
+			int sz = roundup_pow_of_two(vl_scontexts[i]);
+
+			/* only allocate once */
+			newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) +
+						 sz * sizeof(struct
+							     send_context *),
+						 GFP_KERNEL);
+			if (!newmap->map[i])
+				goto bail;
+			newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
+			/* assign send contexts */
+			for (j = 0; j < sz; j++) {
+				if (dd->kernel_send_context[scontext])
+					newmap->map[i]->ksc[j] =
+					dd->kernel_send_context[scontext];
+				if (++scontext >= first_scontext +
+						  vl_scontexts[i])
+					/* wrap back to first send context */
+					scontext = first_scontext;
+			}
+		} else {
+			/* just re-use entry without allocating */
+			newmap->map[i] = newmap->map[i % num_vls];
+		}
+		scontext = first_scontext + vl_scontexts[i];
+	}
+	/* newmap in hand, save old map */
+	spin_lock_irq(&dd->pio_map_lock);
+	oldmap = rcu_dereference_protected(dd->pio_map,
+					   lockdep_is_held(&dd->pio_map_lock));
+
+	/* publish newmap */
+	rcu_assign_pointer(dd->pio_map, newmap);
+
+	spin_unlock_irq(&dd->pio_map_lock);
+	/* success, free any old map after grace period */
+	if (oldmap)
+		call_rcu(&oldmap->list, pio_map_rcu_callback);
+	return 0;
+bail:
+	/* free any partial allocation */
+	pio_map_free(newmap);
+	return -ENOMEM;
+}
+
+void free_pio_map(struct hfi1_devdata *dd)
+{
+	/* Free PIO map if allocated */
+	if (rcu_access_pointer(dd->pio_map)) {
+		spin_lock_irq(&dd->pio_map_lock);
+		pio_map_free(rcu_access_pointer(dd->pio_map));
+		RCU_INIT_POINTER(dd->pio_map, NULL);
+		spin_unlock_irq(&dd->pio_map_lock);
+		synchronize_rcu();
+	}
+	kfree(dd->kernel_send_context);
+	dd->kernel_send_context = NULL;
+}
+
 int init_pervl_scs(struct hfi1_devdata *dd)
 {
 	int i;
-	u64 mask, all_vl_mask = (u64) 0x80ff; /* VLs 0-7, 15 */
+	u64 mask, all_vl_mask = (u64)0x80ff; /* VLs 0-7, 15 */
+	u64 data_vls_mask = (u64)0x00ff; /* VLs 0-7 */
 	u32 ctxt;
+	struct hfi1_pportdata *ppd = dd->pport;
 
 	dd->vld[15].sc = sc_alloc(dd, SC_KERNEL,
 				  dd->rcd[0]->rcvhdrqentsize, dd->node);
@@ -1696,6 +1906,12 @@ int init_pervl_scs(struct hfi1_devdata *dd)
 		goto nomem;
 	hfi1_init_ctxt(dd->vld[15].sc);
 	dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048);
+
+	dd->kernel_send_context = kmalloc_node(dd->num_send_contexts *
+					sizeof(struct send_context *),
+					GFP_KERNEL, dd->node);
+	dd->kernel_send_context[0] = dd->vld[15].sc;
+
 	for (i = 0; i < num_vls; i++) {
 		/*
 		 * Since this function does not deal with a specific
@@ -1708,12 +1924,19 @@ int init_pervl_scs(struct hfi1_devdata *dd)
 					 dd->rcd[0]->rcvhdrqentsize, dd->node);
 		if (!dd->vld[i].sc)
 			goto nomem;
-
+		dd->kernel_send_context[i + 1] = dd->vld[i].sc;
 		hfi1_init_ctxt(dd->vld[i].sc);
-
 		/* non VL15 start with the max MTU */
 		dd->vld[i].mtu = hfi1_max_mtu;
 	}
+	for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
+		dd->kernel_send_context[i + 1] =
+		sc_alloc(dd, SC_KERNEL, dd->rcd[0]->rcvhdrqentsize, dd->node);
+		if (!dd->kernel_send_context[i + 1])
+			goto nomem;
+		hfi1_init_ctxt(dd->kernel_send_context[i + 1]);
+	}
+
 	sc_enable(dd->vld[15].sc);
 	ctxt = dd->vld[15].sc->hw_context;
 	mask = all_vl_mask & ~(1LL << 15);
@@ -1721,17 +1944,29 @@ int init_pervl_scs(struct hfi1_devdata *dd)
 	dd_dev_info(dd,
 		    "Using send context %u(%u) for VL15\n",
 		    dd->vld[15].sc->sw_index, ctxt);
+
 	for (i = 0; i < num_vls; i++) {
 		sc_enable(dd->vld[i].sc);
 		ctxt = dd->vld[i].sc->hw_context;
-		mask = all_vl_mask & ~(1LL << i);
+		mask = all_vl_mask & ~(data_vls_mask);
 		write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
 	}
+	for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++) {
+		sc_enable(dd->kernel_send_context[i + 1]);
+		ctxt = dd->kernel_send_context[i + 1]->hw_context;
+		mask = all_vl_mask & ~(data_vls_mask);
+		write_kctxt_csr(dd, ctxt, SC(CHECK_VL), mask);
+	}
+
+	if (pio_map_init(dd, ppd->port - 1, num_vls, NULL))
+		goto nomem;
 	return 0;
 nomem:
 	sc_free(dd->vld[15].sc);
 	for (i = 0; i < num_vls; i++)
 		sc_free(dd->vld[i].sc);
+	for (i = num_vls; i < INIT_SC_PER_VL * num_vls; i++)
+		sc_free(dd->kernel_send_context[i + 1]);
 	return -ENOMEM;
 }
 
@@ -1769,11 +2004,11 @@ int init_credit_return(struct hfi1_devdata *dd)
 					bytes,
 					&dd->cr_base[i].pa,
 					GFP_KERNEL);
-		if (dd->cr_base[i].va == NULL) {
+		if (!dd->cr_base[i].va) {
 			set_dev_node(&dd->pcidev->dev, dd->node);
 			dd_dev_err(dd,
-				"Unable to allocate credit return DMA range for NUMA %d\n",
-				i);
+				   "Unable to allocate credit return DMA range for NUMA %d\n",
+				   i);
 			ret = -ENOMEM;
 			goto done;
 		}
@@ -1797,10 +2032,10 @@ void free_credit_return(struct hfi1_devdata *dd)
 	for (i = 0; i < num_numa; i++) {
 		if (dd->cr_base[i].va) {
 			dma_free_coherent(&dd->pcidev->dev,
-				TXE_NUM_CONTEXTS
-					* sizeof(struct credit_return),
-				dd->cr_base[i].va,
-				dd->cr_base[i].pa);
+					  TXE_NUM_CONTEXTS *
+					  sizeof(struct credit_return),
+					  dd->cr_base[i].va,
+					  dd->cr_base[i].pa);
 		}
 	}
 	kfree(dd->cr_base);
diff --git a/drivers/staging/rdma/hfi1/pio.h b/drivers/staging/rdma/hfi1/pio.h
index 53d3e0a79375..0026976ce4f6 100644
--- a/drivers/staging/rdma/hfi1/pio.h
+++ b/drivers/staging/rdma/hfi1/pio.h
@@ -1,14 +1,13 @@
 #ifndef _PIO_H
 #define _PIO_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -50,7 +47,6 @@
  *
  */
 
-
 /* send context types */
 #define SC_KERNEL 0
 #define SC_ACK    1
@@ -106,6 +102,7 @@ struct send_context {
 	struct hfi1_devdata *dd;		/* device */
 	void __iomem *base_addr;	/* start of PIO memory */
 	union pio_shadow_ring *sr;	/* shadow ring */
+
 	volatile __le64 *hw_free;	/* HW free counter */
 	struct work_struct halt_work;	/* halted context work queue entry */
 	unsigned long flags;		/* flags */
@@ -165,6 +162,112 @@ struct sc_config_sizes {
 	short int count;
 };
 
+/*
+ * The diagram below details the relationship of the mapping structures
+ *
+ * Since the mapping now allows for non-uniform send contexts per vl, the
+ * number of send contexts for a vl is either the vl_scontexts[vl] or
+ * a computation based on num_kernel_send_contexts/num_vls:
+ *
+ * For example:
+ * nactual = vl_scontexts ? vl_scontexts[vl] : num_kernel_send_contexts/num_vls
+ *
+ * n = roundup to next highest power of 2 using nactual
+ *
+ * In the case where there are num_kernel_send_contexts/num_vls doesn't divide
+ * evenly, the extras are added from the last vl downward.
+ *
+ * For the case where n > nactual, the send contexts are assigned
+ * in a round robin fashion wrapping back to the first send context
+ * for a particular vl.
+ *
+ *               dd->pio_map
+ *                    |                                   pio_map_elem[0]
+ *                    |                                +--------------------+
+ *                    v                                |       mask         |
+ *               pio_vl_map                            |--------------------|
+ *      +--------------------------+                   | ksc[0] -> sc 1     |
+ *      |    list (RCU)            |                   |--------------------|
+ *      |--------------------------|                 ->| ksc[1] -> sc 2     |
+ *      |    mask                  |              --/  |--------------------|
+ *      |--------------------------|            -/     |        *           |
+ *      |    actual_vls (max 8)    |          -/       |--------------------|
+ *      |--------------------------|       --/         | ksc[n] -> sc n     |
+ *      |    vls (max 8)           |     -/            +--------------------+
+ *      |--------------------------|  --/
+ *      |    map[0]                |-/
+ *      |--------------------------|                   +--------------------+
+ *      |    map[1]                |---                |       mask         |
+ *      |--------------------------|   \----           |--------------------|
+ *      |           *              |        \--        | ksc[0] -> sc 1+n   |
+ *      |           *              |           \----   |--------------------|
+ *      |           *              |                \->| ksc[1] -> sc 2+n   |
+ *      |--------------------------|                   |--------------------|
+ *      |   map[vls - 1]           |-                  |         *          |
+ *      +--------------------------+ \-                |--------------------|
+ *                                     \-              | ksc[m] -> sc m+n   |
+ *                                       \             +--------------------+
+ *                                        \-
+ *                                          \
+ *                                           \-        +--------------------+
+ *                                             \-      |       mask         |
+ *                                               \     |--------------------|
+ *                                                \-   | ksc[0] -> sc 1+m+n |
+ *                                                  \- |--------------------|
+ *                                                    >| ksc[1] -> sc 2+m+n |
+ *                                                     |--------------------|
+ *                                                     |         *          |
+ *                                                     |--------------------|
+ *                                                     | ksc[o] -> sc o+m+n |
+ *                                                     +--------------------+
+ *
+ */
+
+/* Initial number of send contexts per VL */
+#define INIT_SC_PER_VL 2
+
+/*
+ * struct pio_map_elem - mapping for a vl
+ * @mask - selector mask
+ * @ksc - array of kernel send contexts for this vl
+ *
+ * The mask is used to "mod" the selector to
+ * produce index into the trailing array of
+ * kscs
+ */
+struct pio_map_elem {
+	u32 mask;
+	struct send_context *ksc[0];
+};
+
+/*
+ * struct pio_vl_map - mapping for a vl
+ * @list - rcu head for free callback
+ * @mask - vl mask to "mod" the vl to produce an index to map array
+ * @actual_vls - number of vls
+ * @vls - numbers of vls rounded to next power of 2
+ * @map - array of pio_map_elem entries
+ *
+ * This is the parent mapping structure. The trailing members of the
+ * struct point to pio_map_elem entries, which in turn point to an
+ * array of kscs for that vl.
+ */
+struct pio_vl_map {
+	struct rcu_head list;
+	u32 mask;
+	u8 actual_vls;
+	u8 vls;
+	struct pio_map_elem *map[0];
+};
+
+int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls,
+		 u8 *vl_scontexts);
+void free_pio_map(struct hfi1_devdata *dd);
+struct send_context *pio_select_send_context_vl(struct hfi1_devdata *dd,
+						u32 selector, u8 vl);
+struct send_context *pio_select_send_context_sc(struct hfi1_devdata *dd,
+						u32 selector, u8 sc5);
+
 /* send context functions */
 int init_credit_return(struct hfi1_devdata *dd);
 void free_credit_return(struct hfi1_devdata *dd);
@@ -183,7 +286,7 @@ void sc_flush(struct send_context *sc);
 void sc_drop(struct send_context *sc);
 void sc_stop(struct send_context *sc, int bit);
 struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
-			pio_release_cb cb, void *arg);
+				pio_release_cb cb, void *arg);
 void sc_release_update(struct send_context *sc);
 void sc_return_credits(struct send_context *sc);
 void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context);
@@ -212,12 +315,11 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd);
 void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl);
 void pio_send_control(struct hfi1_devdata *dd, int op);
 
-
 /* PIO copy routines */
 void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
 	      const void *from, size_t count);
 void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
-					const void *from, size_t nbytes);
+			const void *from, size_t nbytes);
 void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes);
 void seg_pio_copy_end(struct pio_buf *pbuf);
 
diff --git a/drivers/staging/rdma/hfi1/pio_copy.c b/drivers/staging/rdma/hfi1/pio_copy.c
index 64bef6c26653..8c25e1b58849 100644
--- a/drivers/staging/rdma/hfi1/pio_copy.c
+++ b/drivers/staging/rdma/hfi1/pio_copy.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -52,9 +49,9 @@
 
 /* additive distance between non-SOP and SOP space */
 #define SOP_DISTANCE (TXE_PIO_SIZE / 2)
-#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE-1)
+#define PIO_BLOCK_MASK (PIO_BLOCK_SIZE - 1)
 /* number of QUADWORDs in a block */
-#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE/sizeof(u64))
+#define PIO_BLOCK_QWS (PIO_BLOCK_SIZE / sizeof(u64))
 
 /**
  * pio_copy - copy data block to MMIO space
@@ -83,11 +80,13 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
 	dest += sizeof(u64);
 
 	/* calculate where the QWORD data ends - in SOP=1 space */
-	dend = dest + ((count>>1) * sizeof(u64));
+	dend = dest + ((count >> 1) * sizeof(u64));
 
 	if (dend < send) {
-		/* all QWORD data is within the SOP block, does *not*
-		   reach the end of the SOP block */
+		/*
+		 * all QWORD data is within the SOP block, does *not*
+		 * reach the end of the SOP block
+		 */
 
 		while (dest < dend) {
 			writeq(*(u64 *)from, dest);
@@ -152,8 +151,10 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
 		writeq(val.val64, dest);
 		dest += sizeof(u64);
 	}
-	/* fill in rest of block, no need to check pbuf->end
-	   as we only wrap on a block boundary */
+	/*
+	 * fill in rest of block, no need to check pbuf->end
+	 * as we only wrap on a block boundary
+	 */
 	while (((unsigned long)dest & PIO_BLOCK_MASK) != 0) {
 		writeq(0, dest);
 		dest += sizeof(u64);
@@ -177,7 +178,7 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
  * "zero" shift - bit shift used to zero out upper bytes.  Input is
  * the count of LSB bytes to preserve.
  */
-#define zshift(x) (8 * (8-(x)))
+#define zshift(x) (8 * (8 - (x)))
 
 /*
  * "merge" shift - bit shift used to merge with carry bytes.  Input is
@@ -196,7 +197,7 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
  * o nbytes must not span a QW boundary
  */
 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
-							unsigned int nbytes)
+				  unsigned int nbytes)
 {
 	unsigned long off;
 
@@ -223,7 +224,7 @@ static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
  * o nbytes may span a QW boundary
  */
 static inline void read_extra_bytes(struct pio_buf *pbuf,
-					const void *from, unsigned int nbytes)
+				    const void *from, unsigned int nbytes)
 {
 	unsigned long off = (unsigned long)from & 0x7;
 	unsigned int room, xbytes;
@@ -244,7 +245,7 @@ static inline void read_extra_bytes(struct pio_buf *pbuf,
 		pbuf->carry.val64 |= (((*(u64 *)from)
 					>> mshift(off))
 					<< zshift(xbytes))
-					>> zshift(xbytes+pbuf->carry_bytes);
+					>> zshift(xbytes + pbuf->carry_bytes);
 		off = 0;
 		pbuf->carry_bytes += xbytes;
 		nbytes -= xbytes;
@@ -362,7 +363,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
  * o from may _not_ be u64 aligned.
  */
 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
-							unsigned int nbytes)
+				  unsigned int nbytes)
 {
 	jcopy(&pbuf->carry.val8[0], from, nbytes);
 	pbuf->carry_bytes = nbytes;
@@ -377,7 +378,7 @@ static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
  * o nbytes may span a QW boundary
  */
 static inline void read_extra_bytes(struct pio_buf *pbuf,
-					const void *from, unsigned int nbytes)
+				    const void *from, unsigned int nbytes)
 {
 	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
 	pbuf->carry_bytes += nbytes;
@@ -411,7 +412,7 @@ static inline void merge_write8(
 
 	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
 	writeq(pbuf->carry.val64, dest);
-	jcopy(&pbuf->carry.val8[0], src+remainder, pbuf->carry_bytes);
+	jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
 }
 
 /*
@@ -433,7 +434,7 @@ static inline int carry_write8(struct pio_buf *pbuf, void *dest)
 		u64 zero = 0;
 
 		jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
-						8 - pbuf->carry_bytes);
+		      8 - pbuf->carry_bytes);
 		writeq(pbuf->carry.val64, dest);
 		return 1;
 	}
@@ -453,7 +454,7 @@ static inline int carry_write8(struct pio_buf *pbuf, void *dest)
  * @nbytes: bytes to copy
  */
 void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
-				const void *from, size_t nbytes)
+			const void *from, size_t nbytes)
 {
 	void __iomem *dest = pbuf->start + SOP_DISTANCE;
 	void __iomem *send = dest + PIO_BLOCK_SIZE;
@@ -463,11 +464,13 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
 	dest += sizeof(u64);
 
 	/* calculate where the QWORD data ends - in SOP=1 space */
-	dend = dest + ((nbytes>>3) * sizeof(u64));
+	dend = dest + ((nbytes >> 3) * sizeof(u64));
 
 	if (dend < send) {
-		/* all QWORD data is within the SOP block, does *not*
-		   reach the end of the SOP block */
+		/*
+		 * all QWORD data is within the SOP block, does *not*
+		 * reach the end of the SOP block
+		 */
 
 		while (dest < dend) {
 			writeq(*(u64 *)from, dest);
@@ -562,8 +565,10 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
 		void __iomem *send;		/* SOP end */
 		void __iomem *xend;
 
-		/* calculate the end of data or end of block, whichever
-		   comes first */
+		/*
+		 * calculate the end of data or end of block, whichever
+		 * comes first
+		 */
 		send = pbuf->start + PIO_BLOCK_SIZE;
 		xend = min(send, dend);
 
@@ -639,13 +644,13 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
  * Must handle nbytes < 8.
  */
 static void mid_copy_straight(struct pio_buf *pbuf,
-						const void *from, size_t nbytes)
+			      const void *from, size_t nbytes)
 {
 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
 	void __iomem *dend;			/* 8-byte data end */
 
 	/* calculate 8-byte data end */
-	dend = dest + ((nbytes>>3) * sizeof(u64));
+	dend = dest + ((nbytes >> 3) * sizeof(u64));
 
 	if (pbuf->qw_written < PIO_BLOCK_QWS) {
 		/*
@@ -656,8 +661,10 @@ static void mid_copy_straight(struct pio_buf *pbuf,
 		void __iomem *send;		/* SOP end */
 		void __iomem *xend;
 
-		/* calculate the end of data or end of block, whichever
-		   comes first */
+		/*
+		 * calculate the end of data or end of block, whichever
+		 * comes first
+		 */
 		send = pbuf->start + PIO_BLOCK_SIZE;
 		xend = min(send, dend);
 
@@ -713,7 +720,7 @@ static void mid_copy_straight(struct pio_buf *pbuf,
 	/* we know carry_bytes was zero on entry to this routine */
 	read_low_bytes(pbuf, from, nbytes & 0x7);
 
-	pbuf->qw_written += nbytes>>3;
+	pbuf->qw_written += nbytes >> 3;
 }
 
 /*
diff --git a/drivers/staging/rdma/hfi1/platform.c b/drivers/staging/rdma/hfi1/platform.c
new file mode 100644
index 000000000000..0a1d074583e4
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/platform.c
@@ -0,0 +1,893 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "efivar.h"
+
+void get_platform_config(struct hfi1_devdata *dd)
+{
+	int ret = 0;
+	unsigned long size = 0;
+	u8 *temp_platform_config = NULL;
+
+	ret = read_hfi1_efi_var(dd, "configuration", &size,
+				(void **)&temp_platform_config);
+	if (ret) {
+		dd_dev_info(dd,
+			    "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
+			    __func__);
+		/* fall back to request firmware */
+		platform_config_load = 1;
+		goto bail;
+	}
+
+	dd->platform_config.data = temp_platform_config;
+	dd->platform_config.size = size;
+
+bail:
+	/* exit */;
+}
+
+void free_platform_config(struct hfi1_devdata *dd)
+{
+	if (!platform_config_load) {
+		/*
+		 * was loaded from EFI, release memory
+		 * allocated by read_efi_var
+		 */
+		kfree(dd->platform_config.data);
+	}
+	/*
+	 * else do nothing, dispose_firmware will release
+	 * struct firmware platform_config on driver exit
+	 */
+}
+
+int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
+{
+	u8 tx_ctrl_byte = on ? 0x0 : 0xF;
+	int ret = 0;
+
+	ret = qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_TX_CTRL_BYTE_OFFS,
+			 &tx_ctrl_byte, 1);
+	/* we expected 1, so consider 0 an error */
+	if (ret == 0)
+		ret = -EIO;
+	else if (ret == 1)
+		ret = 0;
+	return ret;
+}
+
+static int qual_power(struct hfi1_pportdata *ppd)
+{
+	u32 cable_power_class = 0, power_class_max = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+	int ret = 0;
+
+	ret = get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_SYSTEM_TABLE, 0,
+		SYSTEM_TABLE_QSFP_POWER_CLASS_MAX, &power_class_max, 4);
+	if (ret)
+		return ret;
+
+	if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4)
+		cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
+	else
+		cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
+
+	if (cable_power_class <= 3 && cable_power_class > (power_class_max - 1))
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
+	else if (cable_power_class > 4 && cable_power_class > (power_class_max))
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY);
+	/*
+	 * cable_power_class will never have value 4 as this simply
+	 * means the high power settings are unused
+	 */
+
+	if (ppd->offline_disabled_reason ==
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: Port disabled due to system power restrictions\n",
+			__func__);
+		ret = -EPERM;
+	}
+	return ret;
+}
+
+static int qual_bitrate(struct hfi1_pportdata *ppd)
+{
+	u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G) &&
+	    cache[QSFP_NOM_BIT_RATE_250_OFFS] < 0x64)
+		ppd->offline_disabled_reason =
+			   HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY);
+
+	if ((lss & OPA_LINK_SPEED_12_5G) && (lse & OPA_LINK_SPEED_12_5G) &&
+	    cache[QSFP_NOM_BIT_RATE_100_OFFS] < 0x7D)
+		ppd->offline_disabled_reason =
+			   HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY);
+
+	if (ppd->offline_disabled_reason ==
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: Cable failed bitrate check, disabling port\n",
+			__func__);
+		return -EPERM;
+	}
+	return 0;
+}
+
+static int set_qsfp_high_power(struct hfi1_pportdata *ppd)
+{
+	u8 cable_power_class = 0, power_ctrl_byte = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+	int ret;
+
+	if (QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]) != 4)
+		cable_power_class = QSFP_HIGH_PWR(cache[QSFP_MOD_PWR_OFFS]);
+	else
+		cable_power_class = QSFP_PWR(cache[QSFP_MOD_PWR_OFFS]);
+
+	if (cable_power_class) {
+		power_ctrl_byte = cache[QSFP_PWR_CTRL_BYTE_OFFS];
+
+		power_ctrl_byte |= 1;
+		power_ctrl_byte &= ~(0x2);
+
+		ret = qsfp_write(ppd, ppd->dd->hfi1_id,
+				 QSFP_PWR_CTRL_BYTE_OFFS,
+				 &power_ctrl_byte, 1);
+		if (ret != 1)
+			return -EIO;
+
+		if (cable_power_class > 3) {
+			/* > power class 4*/
+			power_ctrl_byte |= (1 << 2);
+			ret = qsfp_write(ppd, ppd->dd->hfi1_id,
+					 QSFP_PWR_CTRL_BYTE_OFFS,
+					 &power_ctrl_byte, 1);
+			if (ret != 1)
+				return -EIO;
+		}
+
+		/* SFF 8679 rev 1.7 LPMode Deassert time */
+		msleep(300);
+	}
+	return 0;
+}
+
+static void apply_rx_cdr(struct hfi1_pportdata *ppd,
+			 u32 rx_preset_index,
+			 u8 *cdr_ctrl_byte)
+{
+	u32 rx_preset;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	if (!((cache[QSFP_MOD_PWR_OFFS] & 0x4) &&
+	      (cache[QSFP_CDR_INFO_OFFS] & 0x40)))
+		return;
+
+	/* rx_preset preset to zero to catch error */
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+		rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
+		&rx_preset, 4);
+
+	if (!rx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: RX_CDR_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+		rx_preset_index, RX_PRESET_TABLE_QSFP_RX_CDR,
+		&rx_preset, 4);
+
+	/* Expand cdr setting to all 4 lanes */
+	rx_preset = (rx_preset | (rx_preset << 1) |
+			(rx_preset << 2) | (rx_preset << 3));
+
+	if (rx_preset) {
+		*cdr_ctrl_byte |= rx_preset;
+	} else {
+		*cdr_ctrl_byte &= rx_preset;
+		/* Preserve current TX CDR status */
+		*cdr_ctrl_byte |= (cache[QSFP_CDR_CTRL_BYTE_OFFS] & 0xF0);
+	}
+}
+
+static void apply_tx_cdr(struct hfi1_pportdata *ppd,
+			 u32 tx_preset_index,
+			 u8 *ctr_ctrl_byte)
+{
+	u32 tx_preset;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	if (!((cache[QSFP_MOD_PWR_OFFS] & 0x8) &&
+	      (cache[QSFP_CDR_INFO_OFFS] & 0x80)))
+		return;
+
+	get_platform_config_field(
+		ppd->dd,
+		PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index,
+		TX_PRESET_TABLE_QSFP_TX_CDR_APPLY, &tx_preset, 4);
+
+	if (!tx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: TX_CDR_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+		ppd->dd,
+		PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index,
+		TX_PRESET_TABLE_QSFP_TX_CDR, &tx_preset, 4);
+
+	/* Expand cdr setting to all 4 lanes */
+	tx_preset = (tx_preset | (tx_preset << 1) |
+			(tx_preset << 2) | (tx_preset << 3));
+
+	if (tx_preset)
+		*ctr_ctrl_byte |= (tx_preset << 4);
+	else
+		/* Preserve current/determined RX CDR status */
+		*ctr_ctrl_byte &= ((tx_preset << 4) | 0xF);
+}
+
+static void apply_cdr_settings(
+		struct hfi1_pportdata *ppd, u32 rx_preset_index,
+		u32 tx_preset_index)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+	u8 cdr_ctrl_byte = cache[QSFP_CDR_CTRL_BYTE_OFFS];
+
+	apply_rx_cdr(ppd, rx_preset_index, &cdr_ctrl_byte);
+
+	apply_tx_cdr(ppd, tx_preset_index, &cdr_ctrl_byte);
+
+	qsfp_write(ppd, ppd->dd->hfi1_id, QSFP_CDR_CTRL_BYTE_OFFS,
+		   &cdr_ctrl_byte, 1);
+}
+
+static void apply_tx_eq_auto(struct hfi1_pportdata *ppd)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+	u8 tx_eq;
+
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x8))
+		return;
+	/* Disable adaptive TX EQ if present */
+	tx_eq = cache[(128 * 3) + 241];
+	tx_eq &= 0xF0;
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 241, &tx_eq, 1);
+}
+
+static void apply_tx_eq_prog(struct hfi1_pportdata *ppd, u32 tx_preset_index)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+	u32 tx_preset;
+	u8 tx_eq;
+
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x4))
+		return;
+
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index, TX_PRESET_TABLE_QSFP_TX_EQ_APPLY,
+		&tx_preset, 4);
+	if (!tx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: TX_EQ_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+			tx_preset_index, TX_PRESET_TABLE_QSFP_TX_EQ,
+			&tx_preset, 4);
+
+	if (((cache[(128 * 3) + 224] & 0xF0) >> 4) < tx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: TX EQ %x unsupported\n",
+			__func__, tx_preset);
+
+		dd_dev_info(
+			ppd->dd,
+			"%s: Applying EQ %x\n",
+			__func__, cache[608] & 0xF0);
+
+		tx_preset = (cache[608] & 0xF0) >> 4;
+	}
+
+	tx_eq = tx_preset | (tx_preset << 4);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 234, &tx_eq, 1);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 235, &tx_eq, 1);
+}
+
+static void apply_rx_eq_emp(struct hfi1_pportdata *ppd, u32 rx_preset_index)
+{
+	u32 rx_preset;
+	u8 rx_eq, *cache = ppd->qsfp_info.cache;
+
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x2))
+		return;
+	get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+			rx_preset_index, RX_PRESET_TABLE_QSFP_RX_EMP_APPLY,
+			&rx_preset, 4);
+
+	if (!rx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: RX_EMP_APPLY is set to disabled\n",
+			__func__);
+		return;
+	}
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_RX_PRESET_TABLE,
+		rx_preset_index, RX_PRESET_TABLE_QSFP_RX_EMP,
+		&rx_preset, 4);
+
+	if ((cache[(128 * 3) + 224] & 0xF) < rx_preset) {
+		dd_dev_info(
+			ppd->dd,
+			"%s: Requested RX EMP %x\n",
+			__func__, rx_preset);
+
+		dd_dev_info(
+			ppd->dd,
+			"%s: Applying supported EMP %x\n",
+			__func__, cache[608] & 0xF);
+
+		rx_preset = cache[608] & 0xF;
+	}
+
+	rx_eq = rx_preset | (rx_preset << 4);
+
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 236, &rx_eq, 1);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 237, &rx_eq, 1);
+}
+
+static void apply_eq_settings(struct hfi1_pportdata *ppd,
+			      u32 rx_preset_index, u32 tx_preset_index)
+{
+	u8 *cache = ppd->qsfp_info.cache;
+
+	/* no point going on w/o a page 3 */
+	if (cache[2] & 4) {
+		dd_dev_info(ppd->dd,
+			    "%s: Upper page 03 not present\n",
+			    __func__);
+		return;
+	}
+
+	apply_tx_eq_auto(ppd);
+
+	apply_tx_eq_prog(ppd, tx_preset_index);
+
+	apply_rx_eq_emp(ppd, rx_preset_index);
+}
+
+static void apply_rx_amplitude_settings(
+		struct hfi1_pportdata *ppd, u32 rx_preset_index,
+		u32 tx_preset_index)
+{
+	u32 rx_preset;
+	u8 rx_amp = 0, i = 0, preferred = 0, *cache = ppd->qsfp_info.cache;
+
+	/* no point going on w/o a page 3 */
+	if (cache[2] & 4) {
+		dd_dev_info(ppd->dd,
+			    "%s: Upper page 03 not present\n",
+			    __func__);
+		return;
+	}
+	if (!(cache[QSFP_EQ_INFO_OFFS] & 0x1)) {
+		dd_dev_info(ppd->dd,
+			    "%s: RX_AMP_APPLY is set to disabled\n",
+			    __func__);
+		return;
+	}
+
+	get_platform_config_field(ppd->dd,
+				  PLATFORM_CONFIG_RX_PRESET_TABLE,
+				  rx_preset_index,
+				  RX_PRESET_TABLE_QSFP_RX_AMP_APPLY,
+				  &rx_preset, 4);
+
+	if (!rx_preset) {
+		dd_dev_info(ppd->dd,
+			    "%s: RX_AMP_APPLY is set to disabled\n",
+			    __func__);
+		return;
+	}
+	get_platform_config_field(ppd->dd,
+				  PLATFORM_CONFIG_RX_PRESET_TABLE,
+				  rx_preset_index,
+				  RX_PRESET_TABLE_QSFP_RX_AMP,
+				  &rx_preset, 4);
+
+	dd_dev_info(ppd->dd,
+		    "%s: Requested RX AMP %x\n",
+		    __func__,
+		    rx_preset);
+
+	for (i = 0; i < 4; i++) {
+		if (cache[(128 * 3) + 225] & (1 << i)) {
+			preferred = i;
+			if (preferred == rx_preset)
+				break;
+		}
+	}
+
+	/*
+	 * Verify that preferred RX amplitude is not just a
+	 * fall through of the default
+	 */
+	if (!preferred && !(cache[(128 * 3) + 225] & 0x1)) {
+		dd_dev_info(ppd->dd, "No supported RX AMP, not applying\n");
+		return;
+	}
+
+	dd_dev_info(ppd->dd,
+		    "%s: Applying RX AMP %x\n", __func__, preferred);
+
+	rx_amp = preferred | (preferred << 4);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 238, &rx_amp, 1);
+	qsfp_write(ppd, ppd->dd->hfi1_id, (256 * 3) + 239, &rx_amp, 1);
+}
+
+#define OPA_INVALID_INDEX 0xFFF
+
+static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id,
+			   u32 config_data, const char *message)
+{
+	u8 i;
+	int ret = HCMD_SUCCESS;
+
+	for (i = 0; i < 4; i++) {
+		ret = load_8051_config(ppd->dd, field_id, i, config_data);
+		if (ret != HCMD_SUCCESS) {
+			dd_dev_err(
+				ppd->dd,
+				"%s: %s for lane %u failed\n",
+				message, __func__, i);
+		}
+	}
+}
+
+static void apply_tunings(
+		struct hfi1_pportdata *ppd, u32 tx_preset_index,
+		u8 tuning_method, u32 total_atten, u8 limiting_active)
+{
+	int ret = 0;
+	u32 config_data = 0, tx_preset = 0;
+	u8 precur = 0, attn = 0, postcur = 0, external_device_config = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	/* Enable external device config if channel is limiting active */
+	read_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
+			 GENERAL_CONFIG, &config_data);
+	config_data |= limiting_active;
+	ret = load_8051_config(ppd->dd, LINK_OPTIMIZATION_SETTINGS,
+			       GENERAL_CONFIG, config_data);
+	if (ret != HCMD_SUCCESS)
+		dd_dev_err(
+			ppd->dd,
+			"%s: Failed to set enable external device config\n",
+			__func__);
+
+	config_data = 0; /* re-init  */
+	/* Pass tuning method to 8051 */
+	read_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
+			 &config_data);
+	config_data |= tuning_method;
+	ret = load_8051_config(ppd->dd, LINK_TUNING_PARAMETERS, GENERAL_CONFIG,
+			       config_data);
+	if (ret != HCMD_SUCCESS)
+		dd_dev_err(ppd->dd, "%s: Failed to set tuning method\n",
+			   __func__);
+
+	/* Set same channel loss for both TX and RX */
+	config_data = 0 | (total_atten << 16) | (total_atten << 24);
+	apply_tx_lanes(ppd, CHANNEL_LOSS_SETTINGS, config_data,
+		       "Setting channel loss");
+
+	/* Inform 8051 of cable capabilities */
+	if (ppd->qsfp_info.cache_valid) {
+		external_device_config =
+			((cache[QSFP_MOD_PWR_OFFS] & 0x4) << 3) |
+			((cache[QSFP_MOD_PWR_OFFS] & 0x8) << 2) |
+			((cache[QSFP_EQ_INFO_OFFS] & 0x2) << 1) |
+			(cache[QSFP_EQ_INFO_OFFS] & 0x4);
+		ret = read_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
+				       GENERAL_CONFIG, &config_data);
+		/* Clear, then set the external device config field */
+		config_data &= ~(0xFF << 24);
+		config_data |= (external_device_config << 24);
+		ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
+				       GENERAL_CONFIG, config_data);
+		if (ret != HCMD_SUCCESS)
+			dd_dev_info(ppd->dd,
+				    "%s: Failed set ext device config params\n",
+				    __func__);
+	}
+
+	if (tx_preset_index == OPA_INVALID_INDEX) {
+		if (ppd->port_type == PORT_TYPE_QSFP && limiting_active)
+			dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n",
+				    __func__);
+		return;
+	}
+
+	/* Following for limiting active channels only */
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE, tx_preset_index,
+		TX_PRESET_TABLE_PRECUR, &tx_preset, 4);
+	precur = tx_preset;
+
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index, TX_PRESET_TABLE_ATTN, &tx_preset, 4);
+	attn = tx_preset;
+
+	get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_TX_PRESET_TABLE,
+		tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4);
+	postcur = tx_preset;
+
+	config_data = precur | (attn << 8) | (postcur << 16);
+
+	apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data,
+		       "Applying TX settings");
+}
+
+static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
+			    u32 *ptr_rx_preset, u32 *ptr_total_atten)
+{
+	int ret;
+	u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
+	if (ret) {
+		dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n",
+			   __func__, (int)ppd->dd->hfi1_id);
+		return ret;
+	}
+
+	ppd->qsfp_info.limiting_active = 1;
+
+	ret = set_qsfp_tx(ppd, 0);
+	if (ret)
+		goto bail_unlock;
+
+	ret = qual_power(ppd);
+	if (ret)
+		goto bail_unlock;
+
+	ret = qual_bitrate(ppd);
+	if (ret)
+		goto bail_unlock;
+
+	if (ppd->qsfp_info.reset_needed) {
+		reset_qsfp(ppd);
+		ppd->qsfp_info.reset_needed = 0;
+		refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+	} else {
+		ppd->qsfp_info.reset_needed = 1;
+	}
+
+	ret = set_qsfp_high_power(ppd);
+	if (ret)
+		goto bail_unlock;
+
+	if (cache[QSFP_EQ_INFO_OFFS] & 0x4) {
+		ret = get_platform_config_field(
+			ppd->dd,
+			PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ,
+			ptr_tx_preset, 4);
+		if (ret) {
+			*ptr_tx_preset = OPA_INVALID_INDEX;
+			goto bail_unlock;
+		}
+	} else {
+		ret = get_platform_config_field(
+			ppd->dd,
+			PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ,
+			ptr_tx_preset, 4);
+		if (ret) {
+			*ptr_tx_preset = OPA_INVALID_INDEX;
+			goto bail_unlock;
+		}
+	}
+
+	ret = get_platform_config_field(
+		ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+		PORT_TABLE_RX_PRESET_IDX, ptr_rx_preset, 4);
+	if (ret) {
+		*ptr_rx_preset = OPA_INVALID_INDEX;
+		goto bail_unlock;
+	}
+
+	if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G))
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_25G, ptr_total_atten, 4);
+	else if ((lss & OPA_LINK_SPEED_12_5G) && (lse & OPA_LINK_SPEED_12_5G))
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_12G, ptr_total_atten, 4);
+
+	apply_cdr_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+	apply_eq_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+	apply_rx_amplitude_settings(ppd, *ptr_rx_preset, *ptr_tx_preset);
+
+	ret = set_qsfp_tx(ppd, 1);
+
+bail_unlock:
+	release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+	return ret;
+}
+
+static int tune_qsfp(struct hfi1_pportdata *ppd,
+		     u32 *ptr_tx_preset, u32 *ptr_rx_preset,
+		     u8 *ptr_tuning_method, u32 *ptr_total_atten)
+{
+	u32 cable_atten = 0, remote_atten = 0, platform_atten = 0;
+	u16 lss = ppd->link_speed_supported, lse = ppd->link_speed_enabled;
+	int ret = 0;
+	u8 *cache = ppd->qsfp_info.cache;
+
+	switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
+	case 0xA ... 0xB:
+		ret = get_platform_config_field(
+			ppd->dd,
+			PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_25G,
+			&platform_atten, 4);
+		if (ret)
+			return ret;
+
+		if ((lss & OPA_LINK_SPEED_25G) && (lse & OPA_LINK_SPEED_25G))
+			cable_atten = cache[QSFP_CU_ATTEN_12G_OFFS];
+		else if ((lss & OPA_LINK_SPEED_12_5G) &&
+			 (lse & OPA_LINK_SPEED_12_5G))
+			cable_atten = cache[QSFP_CU_ATTEN_7G_OFFS];
+
+		/* Fallback to configured attenuation if cable memory is bad */
+		if (cable_atten == 0 || cable_atten > 36) {
+			ret = get_platform_config_field(
+				ppd->dd,
+				PLATFORM_CONFIG_SYSTEM_TABLE, 0,
+				SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G,
+				&cable_atten, 4);
+			if (ret)
+				return ret;
+		}
+
+		ret = get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_REMOTE_ATTEN_25G, &remote_atten, 4);
+		if (ret)
+			return ret;
+
+		*ptr_total_atten = platform_atten + cable_atten + remote_atten;
+
+		*ptr_tuning_method = OPA_PASSIVE_TUNING;
+		break;
+	case 0x0 ... 0x9: /* fallthrough */
+	case 0xC: /* fallthrough */
+	case 0xE:
+		ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset,
+				       ptr_total_atten);
+		if (ret)
+			return ret;
+
+		*ptr_tuning_method = OPA_ACTIVE_TUNING;
+		break;
+	case 0xD: /* fallthrough */
+	case 0xF:
+	default:
+		dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n",
+			    __func__);
+		break;
+	}
+	return ret;
+}
+
+/*
+ * This function communicates its success or failure via ppd->driver_link_ready
+ * Thus, it depends on its association with start_link(...) which checks
+ * driver_link_ready before proceeding with the link negotiation and
+ * initialization process.
+ */
+void tune_serdes(struct hfi1_pportdata *ppd)
+{
+	int ret = 0;
+	u32 total_atten = 0;
+	u32 remote_atten = 0, platform_atten = 0;
+	u32 rx_preset_index, tx_preset_index;
+	u8 tuning_method = 0, limiting_active = 0;
+	struct hfi1_devdata *dd = ppd->dd;
+
+	rx_preset_index = OPA_INVALID_INDEX;
+	tx_preset_index = OPA_INVALID_INDEX;
+
+	/* the link defaults to enabled */
+	ppd->link_enabled = 1;
+	/* the driver link ready state defaults to not ready */
+	ppd->driver_link_ready = 0;
+	ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
+
+	/* Skip the tuning for testing (loopback != none) and simulations */
+	if (loopback != LOOPBACK_NONE ||
+	    ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
+		ppd->driver_link_ready = 1;
+		return;
+	}
+
+	ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+					PORT_TABLE_PORT_TYPE, &ppd->port_type,
+					4);
+	if (ret)
+		ppd->port_type = PORT_TYPE_UNKNOWN;
+
+	switch (ppd->port_type) {
+	case PORT_TYPE_DISCONNECTED:
+		ppd->offline_disabled_reason =
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED);
+		dd_dev_info(dd, "%s: Port disconnected, disabling port\n",
+			    __func__);
+		goto bail;
+	case PORT_TYPE_FIXED:
+		/* platform_atten, remote_atten pre-zeroed to catch error */
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_LOCAL_ATTEN_25G, &platform_atten, 4);
+
+		get_platform_config_field(
+			ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+			PORT_TABLE_REMOTE_ATTEN_25G, &remote_atten, 4);
+
+		total_atten = platform_atten + remote_atten;
+
+		tuning_method = OPA_PASSIVE_TUNING;
+		break;
+	case PORT_TYPE_VARIABLE:
+		if (qsfp_mod_present(ppd)) {
+			/*
+			 * platform_atten, remote_atten pre-zeroed to
+			 * catch error
+			 */
+			get_platform_config_field(
+				ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+				PORT_TABLE_LOCAL_ATTEN_25G,
+				&platform_atten, 4);
+
+			get_platform_config_field(
+				ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
+				PORT_TABLE_REMOTE_ATTEN_25G,
+				&remote_atten, 4);
+
+			total_atten = platform_atten + remote_atten;
+
+			tuning_method = OPA_PASSIVE_TUNING;
+		} else
+			ppd->offline_disabled_reason =
+			     HFI1_ODR_MASK(OPA_LINKDOWN_REASON_CHASSIS_CONFIG);
+		break;
+	case PORT_TYPE_QSFP:
+		if (qsfp_mod_present(ppd)) {
+			refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+
+			if (ppd->qsfp_info.cache_valid) {
+				ret = tune_qsfp(ppd,
+						&tx_preset_index,
+						&rx_preset_index,
+						&tuning_method,
+						&total_atten);
+
+				/*
+				 * We may have modified the QSFP memory, so
+				 * update the cache to reflect the changes
+				 */
+				refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+				if (ret)
+					goto bail;
+
+				limiting_active =
+						ppd->qsfp_info.limiting_active;
+			} else {
+				dd_dev_err(dd,
+					   "%s: Reading QSFP memory failed\n",
+					   __func__);
+				goto bail;
+			}
+		} else
+			ppd->offline_disabled_reason =
+			   HFI1_ODR_MASK(
+				OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED);
+		break;
+	default:
+		dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
+		ppd->port_type = PORT_TYPE_UNKNOWN;
+		tuning_method = OPA_UNKNOWN_TUNING;
+		total_atten = 0;
+		limiting_active = 0;
+		tx_preset_index = OPA_INVALID_INDEX;
+		break;
+	}
+
+	if (ppd->offline_disabled_reason ==
+			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
+		apply_tunings(ppd, tx_preset_index, tuning_method,
+			      total_atten, limiting_active);
+
+	if (!ret)
+		ppd->driver_link_ready = 1;
+
+	return;
+bail:
+	ppd->driver_link_ready = 0;
+}
diff --git a/drivers/staging/rdma/hfi1/platform_config.h b/drivers/staging/rdma/hfi1/platform.h
index 8a94a8342052..19620cf546d5 100644
--- a/drivers/staging/rdma/hfi1/platform_config.h
+++ b/drivers/staging/rdma/hfi1/platform.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -47,8 +44,8 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
-#ifndef __PLATFORM_CONFIG_H
-#define __PLATFORM_CONFIG_H
+#ifndef __PLATFORM_H
+#define __PLATFORM_H
 
 #define METADATA_TABLE_FIELD_START_SHIFT		0
 #define METADATA_TABLE_FIELD_START_LEN_BITS		15
@@ -94,17 +91,18 @@ enum platform_config_system_table_fields {
 enum platform_config_port_table_fields {
 	PORT_TABLE_RESERVED,
 	PORT_TABLE_PORT_TYPE,
-	PORT_TABLE_ATTENUATION_12G,
-	PORT_TABLE_ATTENUATION_25G,
+	PORT_TABLE_LOCAL_ATTEN_12G,
+	PORT_TABLE_LOCAL_ATTEN_25G,
 	PORT_TABLE_LINK_SPEED_SUPPORTED,
 	PORT_TABLE_LINK_WIDTH_SUPPORTED,
+	PORT_TABLE_AUTO_LANE_SHEDDING_ENABLED,
+	PORT_TABLE_EXTERNAL_LOOPBACK_ALLOWED,
 	PORT_TABLE_VL_CAP,
 	PORT_TABLE_MTU_CAP,
 	PORT_TABLE_TX_LANE_ENABLE_MASK,
 	PORT_TABLE_LOCAL_MAX_TIMEOUT,
-	PORT_TABLE_AUTO_LANE_SHEDDING_ENABLED,
-	PORT_TABLE_EXTERNAL_LOOPBACK_ALLOWED,
-	PORT_TABLE_TX_PRESET_IDX_PASSIVE_CU,
+	PORT_TABLE_REMOTE_ATTEN_12G,
+	PORT_TABLE_REMOTE_ATTEN_25G,
 	PORT_TABLE_TX_PRESET_IDX_ACTIVE_NO_EQ,
 	PORT_TABLE_TX_PRESET_IDX_ACTIVE_EQ,
 	PORT_TABLE_RX_PRESET_IDX,
@@ -115,10 +113,10 @@ enum platform_config_port_table_fields {
 enum platform_config_rx_preset_table_fields {
 	RX_PRESET_TABLE_RESERVED,
 	RX_PRESET_TABLE_QSFP_RX_CDR_APPLY,
-	RX_PRESET_TABLE_QSFP_RX_EQ_APPLY,
+	RX_PRESET_TABLE_QSFP_RX_EMP_APPLY,
 	RX_PRESET_TABLE_QSFP_RX_AMP_APPLY,
 	RX_PRESET_TABLE_QSFP_RX_CDR,
-	RX_PRESET_TABLE_QSFP_RX_EQ,
+	RX_PRESET_TABLE_QSFP_RX_EMP,
 	RX_PRESET_TABLE_QSFP_RX_AMP,
 	RX_PRESET_TABLE_MAX
 };
@@ -149,6 +147,11 @@ enum platform_config_variable_settings_table_fields {
 	VARIABLE_SETTINGS_TABLE_MAX
 };
 
+struct platform_config {
+	size_t size;
+	const u8 *data;
+};
+
 struct platform_config_data {
 	u32 *table;
 	u32 *table_metadata;
@@ -179,9 +182,11 @@ static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
  * fields defined for each table above
  */
 
-/*=====================================================
+/*
+ * =====================================================
  *  System table encodings
- *====================================================*/
+ * =====================================================
+ */
 #define PLATFORM_CONFIG_MAGIC_NUM		0x3d4f5041
 #define PLATFORM_CONFIG_MAGIC_NUMBER_LEN	4
 
@@ -199,12 +204,13 @@ enum platform_config_qsfp_power_class_encoding {
 	QSFP_POWER_CLASS_7
 };
 
-
-/*=====================================================
+/*
+ * ====================================================
  *  Port table encodings
- *==================================================== */
+ * ====================================================
+ */
 enum platform_config_port_type_encoding {
-	PORT_TYPE_RESERVED,
+	PORT_TYPE_UNKNOWN,
 	PORT_TYPE_DISCONNECTED,
 	PORT_TYPE_FIXED,
 	PORT_TYPE_VARIABLE,
@@ -283,4 +289,16 @@ enum platform_config_local_max_timeout_encoding {
 	LOCAL_MAX_TIMEOUT_1000_S
 };
 
-#endif			/*__PLATFORM_CONFIG_H*/
+enum link_tuning_encoding {
+	OPA_PASSIVE_TUNING,
+	OPA_ACTIVE_TUNING,
+	OPA_UNKNOWN_TUNING
+};
+
+/* platform.c */
+void get_platform_config(struct hfi1_devdata *dd);
+void free_platform_config(struct hfi1_devdata *dd);
+int set_qsfp_tx(struct hfi1_pportdata *ppd, int on);
+void tune_serdes(struct hfi1_pportdata *ppd);
+
+#endif			/*__PLATFORM_H*/
diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c
index ce036810d576..29a5ad28019b 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/staging/rdma/hfi1/qp.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -54,31 +51,32 @@
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/seq_file.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
 
 #include "hfi.h"
 #include "qp.h"
 #include "trace.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 
-#define BITS_PER_PAGE           (PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
-
-static unsigned int hfi1_qp_table_size = 256;
+unsigned int hfi1_qp_table_size = 256;
 module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
 
-static void flush_tx_list(struct hfi1_qp *qp);
+static void flush_tx_list(struct rvt_qp *qp);
 static int iowait_sleep(
 	struct sdma_engine *sde,
 	struct iowait *wait,
 	struct sdma_txreq *stx,
 	unsigned seq);
 static void iowait_wakeup(struct iowait *wait, int reason);
+static void iowait_sdma_drained(struct iowait *wait);
+static void qp_pio_drain(struct rvt_qp *qp);
 
-static inline unsigned mk_qpn(struct hfi1_qpn_table *qpt,
-			      struct qpn_map *map, unsigned off)
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+			      struct rvt_qpn_map *map, unsigned off)
 {
-	return (map - qpt->map) * BITS_PER_PAGE + off;
+	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
 }
 
 /*
@@ -118,437 +116,15 @@ static const u16 credit_table[31] = {
 	32768                   /* 1E */
 };
 
-static void get_map_page(struct hfi1_qpn_table *qpt, struct qpn_map *map)
-{
-	unsigned long page = get_zeroed_page(GFP_KERNEL);
-
-	/*
-	 * Free the page if someone raced with us installing it.
-	 */
-
-	spin_lock(&qpt->lock);
-	if (map->page)
-		free_page(page);
-	else
-		map->page = (void *)page;
-	spin_unlock(&qpt->lock);
-}
-
-/*
- * Allocate the next available QPN or
- * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
- */
-static int alloc_qpn(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt,
-		     enum ib_qp_type type, u8 port)
-{
-	u32 i, offset, max_scan, qpn;
-	struct qpn_map *map;
-	u32 ret;
-
-	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
-		unsigned n;
-
-		ret = type == IB_QPT_GSI;
-		n = 1 << (ret + 2 * (port - 1));
-		spin_lock(&qpt->lock);
-		if (qpt->flags & n)
-			ret = -EINVAL;
-		else
-			qpt->flags |= n;
-		spin_unlock(&qpt->lock);
-		goto bail;
-	}
-
-	qpn = qpt->last + qpt->incr;
-	if (qpn >= QPN_MAX)
-		qpn = qpt->incr | ((qpt->last & 1) ^ 1);
-	/* offset carries bit 0 */
-	offset = qpn & BITS_PER_PAGE_MASK;
-	map = &qpt->map[qpn / BITS_PER_PAGE];
-	max_scan = qpt->nmaps - !offset;
-	for (i = 0;;) {
-		if (unlikely(!map->page)) {
-			get_map_page(qpt, map);
-			if (unlikely(!map->page))
-				break;
-		}
-		do {
-			if (!test_and_set_bit(offset, map->page)) {
-				qpt->last = qpn;
-				ret = qpn;
-				goto bail;
-			}
-			offset += qpt->incr;
-			/*
-			 * This qpn might be bogus if offset >= BITS_PER_PAGE.
-			 * That is OK.   It gets re-assigned below
-			 */
-			qpn = mk_qpn(qpt, map, offset);
-		} while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
-		/*
-		 * In order to keep the number of pages allocated to a
-		 * minimum, we scan the all existing pages before increasing
-		 * the size of the bitmap table.
-		 */
-		if (++i > max_scan) {
-			if (qpt->nmaps == QPNMAP_ENTRIES)
-				break;
-			map = &qpt->map[qpt->nmaps++];
-			/* start at incr with current bit 0 */
-			offset = qpt->incr | (offset & 1);
-		} else if (map < &qpt->map[qpt->nmaps]) {
-			++map;
-			/* start at incr with current bit 0 */
-			offset = qpt->incr | (offset & 1);
-		} else {
-			map = &qpt->map[0];
-			/* wrap to first map page, invert bit 0 */
-			offset = qpt->incr | ((offset & 1) ^ 1);
-		}
-		/* there can be no bits at shift and below */
-		WARN_ON(offset & (dd->qos_shift - 1));
-		qpn = mk_qpn(qpt, map, offset);
-	}
-
-	ret = -ENOMEM;
-
-bail:
-	return ret;
-}
-
-static void free_qpn(struct hfi1_qpn_table *qpt, u32 qpn)
-{
-	struct qpn_map *map;
-
-	map = qpt->map + qpn / BITS_PER_PAGE;
-	if (map->page)
-		clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-}
-
-/*
- * Put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static void insert_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
-{
-	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	unsigned long flags;
-
-	atomic_inc(&qp->refcount);
-	spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
-
-	if (qp->ibqp.qp_num <= 1) {
-		rcu_assign_pointer(ibp->qp[qp->ibqp.qp_num], qp);
-	} else {
-		u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num);
-
-		qp->next = dev->qp_dev->qp_table[n];
-		rcu_assign_pointer(dev->qp_dev->qp_table[n], qp);
-		trace_hfi1_qpinsert(qp, n);
-	}
-
-	spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
-}
-
-/*
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void remove_qp(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
-{
-	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	u32 n = qpn_hash(dev->qp_dev, qp->ibqp.qp_num);
-	unsigned long flags;
-	int removed = 1;
-
-	spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
-
-	if (rcu_dereference_protected(ibp->qp[0],
-			lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp[0], NULL);
-	} else if (rcu_dereference_protected(ibp->qp[1],
-			lockdep_is_held(&dev->qp_dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp[1], NULL);
-	} else {
-		struct hfi1_qp *q;
-		struct hfi1_qp __rcu **qpp;
-
-		removed = 0;
-		qpp = &dev->qp_dev->qp_table[n];
-		for (; (q = rcu_dereference_protected(*qpp,
-				lockdep_is_held(&dev->qp_dev->qpt_lock)))
-					!= NULL;
-				qpp = &q->next)
-			if (q == qp) {
-				RCU_INIT_POINTER(*qpp,
-				 rcu_dereference_protected(qp->next,
-				 lockdep_is_held(&dev->qp_dev->qpt_lock)));
-				removed = 1;
-				trace_hfi1_qpremove(qp, n);
-				break;
-			}
-	}
-
-	spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
-	if (removed) {
-		synchronize_rcu();
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-}
-
-/**
- * free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
- */
-static unsigned free_all_qps(struct hfi1_devdata *dd)
-{
-	struct hfi1_ibdev *dev = &dd->verbs_dev;
-	unsigned long flags;
-	struct hfi1_qp *qp;
-	unsigned n, qp_inuse = 0;
-
-	for (n = 0; n < dd->num_pports; n++) {
-		struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
-
-		if (!hfi1_mcast_tree_empty(ibp))
-			qp_inuse++;
-		rcu_read_lock();
-		if (rcu_dereference(ibp->qp[0]))
-			qp_inuse++;
-		if (rcu_dereference(ibp->qp[1]))
-			qp_inuse++;
-		rcu_read_unlock();
-	}
-
-	if (!dev->qp_dev)
-		goto bail;
-	spin_lock_irqsave(&dev->qp_dev->qpt_lock, flags);
-	for (n = 0; n < dev->qp_dev->qp_table_size; n++) {
-		qp = rcu_dereference_protected(dev->qp_dev->qp_table[n],
-			lockdep_is_held(&dev->qp_dev->qpt_lock));
-		RCU_INIT_POINTER(dev->qp_dev->qp_table[n], NULL);
-
-		for (; qp; qp = rcu_dereference_protected(qp->next,
-				lockdep_is_held(&dev->qp_dev->qpt_lock)))
-			qp_inuse++;
-	}
-	spin_unlock_irqrestore(&dev->qp_dev->qpt_lock, flags);
-	synchronize_rcu();
-bail:
-	return qp_inuse;
-}
-
-/**
- * reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void reset_qp(struct hfi1_qp *qp, enum ib_qp_type type)
-{
-	qp->remote_qpn = 0;
-	qp->qkey = 0;
-	qp->qp_access_flags = 0;
-	iowait_init(
-		&qp->s_iowait,
-		1,
-		hfi1_do_send,
-		iowait_sleep,
-		iowait_wakeup);
-	qp->s_flags &= HFI1_S_SIGNAL_REQ_WR;
-	qp->s_hdrwords = 0;
-	qp->s_wqe = NULL;
-	qp->s_draining = 0;
-	qp->s_next_psn = 0;
-	qp->s_last_psn = 0;
-	qp->s_sending_psn = 0;
-	qp->s_sending_hpsn = 0;
-	qp->s_psn = 0;
-	qp->r_psn = 0;
-	qp->r_msn = 0;
-	if (type == IB_QPT_RC) {
-		qp->s_state = IB_OPCODE_RC_SEND_LAST;
-		qp->r_state = IB_OPCODE_RC_SEND_LAST;
-	} else {
-		qp->s_state = IB_OPCODE_UC_SEND_LAST;
-		qp->r_state = IB_OPCODE_UC_SEND_LAST;
-	}
-	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-	qp->r_nak_state = 0;
-	qp->r_adefered = 0;
-	qp->r_aflags = 0;
-	qp->r_flags = 0;
-	qp->s_head = 0;
-	qp->s_tail = 0;
-	qp->s_cur = 0;
-	qp->s_acked = 0;
-	qp->s_last = 0;
-	qp->s_ssn = 1;
-	qp->s_lsn = 0;
-	clear_ahg(qp);
-	qp->s_mig_state = IB_MIG_MIGRATED;
-	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
-	qp->r_head_ack_queue = 0;
-	qp->s_tail_ack_queue = 0;
-	qp->s_num_rd_atomic = 0;
-	if (qp->r_rq.wq) {
-		qp->r_rq.wq->head = 0;
-		qp->r_rq.wq->tail = 0;
-	}
-	qp->r_sge.num_sge = 0;
-}
-
-static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends)
-{
-	unsigned n;
-
-	if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
-		hfi1_put_ss(&qp->s_rdma_read_sge);
-
-	hfi1_put_ss(&qp->r_sge);
-
-	if (clr_sends) {
-		while (qp->s_last != qp->s_head) {
-			struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-			unsigned i;
-
-			for (i = 0; i < wqe->wr.num_sge; i++) {
-				struct hfi1_sge *sge = &wqe->sg_list[i];
-
-				hfi1_put_mr(sge->mr);
-			}
-			if (qp->ibqp.qp_type == IB_QPT_UD ||
-			    qp->ibqp.qp_type == IB_QPT_SMI ||
-			    qp->ibqp.qp_type == IB_QPT_GSI)
-				atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
-			if (++qp->s_last >= qp->s_size)
-				qp->s_last = 0;
-		}
-		if (qp->s_rdma_mr) {
-			hfi1_put_mr(qp->s_rdma_mr);
-			qp->s_rdma_mr = NULL;
-		}
-	}
-
-	if (qp->ibqp.qp_type != IB_QPT_RC)
-		return;
-
-	for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
-		struct hfi1_ack_entry *e = &qp->s_ack_queue[n];
-
-		if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
-		    e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
-			e->rdma_sge.mr = NULL;
-		}
-	}
-}
-
-/**
- * hfi1_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err)
+static void flush_tx_list(struct rvt_qp *qp)
 {
-	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ib_wc wc;
-	int ret = 0;
-
-	if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
-		goto bail;
-
-	qp->state = IB_QPS_ERR;
-
-	if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
-	}
-
-	if (qp->s_flags & HFI1_S_ANY_WAIT_SEND)
-		qp->s_flags &= ~HFI1_S_ANY_WAIT_SEND;
-
-	write_seqlock(&dev->iowait_lock);
-	if (!list_empty(&qp->s_iowait.list) && !(qp->s_flags & HFI1_S_BUSY)) {
-		qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
-		list_del_init(&qp->s_iowait.list);
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
-	}
-	write_sequnlock(&dev->iowait_lock);
-
-	if (!(qp->s_flags & HFI1_S_BUSY)) {
-		qp->s_hdrwords = 0;
-		if (qp->s_rdma_mr) {
-			hfi1_put_mr(qp->s_rdma_mr);
-			qp->s_rdma_mr = NULL;
-		}
-		flush_tx_list(qp);
-	}
-
-	/* Schedule the sending tasklet to drain the send work queue. */
-	if (qp->s_last != qp->s_head)
-		hfi1_schedule_send(qp);
-
-	clear_mr_refs(qp, 0);
-
-	memset(&wc, 0, sizeof(wc));
-	wc.qp = &qp->ibqp;
-	wc.opcode = IB_WC_RECV;
-
-	if (test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags)) {
-		wc.wr_id = qp->r_wr_id;
-		wc.status = err;
-		hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-	}
-	wc.status = IB_WC_WR_FLUSH_ERR;
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	if (qp->r_rq.wq) {
-		struct hfi1_rwq *wq;
-		u32 head;
-		u32 tail;
-
-		spin_lock(&qp->r_rq.lock);
-
-		/* sanity check pointers before trusting them */
-		wq = qp->r_rq.wq;
-		head = wq->head;
-		if (head >= qp->r_rq.size)
-			head = 0;
-		tail = wq->tail;
-		if (tail >= qp->r_rq.size)
-			tail = 0;
-		while (tail != head) {
-			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
-			if (++tail >= qp->r_rq.size)
-				tail = 0;
-			hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-		}
-		wq->tail = tail;
-
-		spin_unlock(&qp->r_rq.lock);
-	} else if (qp->ibqp.event_handler)
-		ret = 1;
-
-bail:
-	return ret;
-}
-
-static void flush_tx_list(struct hfi1_qp *qp)
-{
-	while (!list_empty(&qp->s_iowait.tx_head)) {
+	while (!list_empty(&priv->s_iowait.tx_head)) {
 		struct sdma_txreq *tx;
 
 		tx = list_first_entry(
-			&qp->s_iowait.tx_head,
+			&priv->s_iowait.tx_head,
 			struct sdma_txreq,
 			list);
 		list_del_init(&tx->list);
@@ -557,14 +133,15 @@ static void flush_tx_list(struct hfi1_qp *qp)
 	}
 }
 
-static void flush_iowait(struct hfi1_qp *qp)
+static void flush_iowait(struct rvt_qp *qp)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 	unsigned long flags;
 
 	write_seqlock_irqsave(&dev->iowait_lock, flags);
-	if (!list_empty(&qp->s_iowait.list)) {
-		list_del_init(&qp->s_iowait.list);
+	if (!list_empty(&priv->s_iowait.list)) {
+		list_del_init(&priv->s_iowait.list);
 		if (atomic_dec_and_test(&qp->refcount))
 			wake_up(&qp->wait);
 	}
@@ -597,362 +174,106 @@ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
 	return ib_mtu_enum_to_int(mtu);
 }
 
-
-/**
- * hfi1_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		   int attr_mask, struct ib_udata *udata)
+int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+			 int attr_mask, struct ib_udata *udata)
 {
+	struct ib_qp *ibqp = &qp->ibqp;
 	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	enum ib_qp_state cur_state, new_state;
-	struct ib_event ev;
-	int lastwqe = 0;
-	int mig = 0;
-	int ret;
-	u32 pmtu = 0; /* for gcc warning only */
 	struct hfi1_devdata *dd = dd_from_dev(dev);
-
-	spin_lock_irq(&qp->r_lock);
-	spin_lock(&qp->s_lock);
-
-	cur_state = attr_mask & IB_QP_CUR_STATE ?
-		attr->cur_qp_state : qp->state;
-	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask, IB_LINK_LAYER_UNSPECIFIED))
-		goto inval;
+	u8 sc;
 
 	if (attr_mask & IB_QP_AV) {
-		u8 sc;
-
-		if (attr->ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
-			goto inval;
-		if (hfi1_check_ah(qp->ibqp.device, &attr->ah_attr))
-			goto inval;
 		sc = ah_to_sc(ibqp->device, &attr->ah_attr);
+		if (sc == 0xf)
+			return -EINVAL;
+
 		if (!qp_to_sdma_engine(qp, sc) &&
 		    dd->flags & HFI1_HAS_SEND_DMA)
-			goto inval;
+			return -EINVAL;
+
+		if (!qp_to_send_context(qp, sc))
+			return -EINVAL;
 	}
 
 	if (attr_mask & IB_QP_ALT_PATH) {
-		u8 sc;
-
-		if (attr->alt_ah_attr.dlid >= HFI1_MULTICAST_LID_BASE)
-			goto inval;
-		if (hfi1_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
-			goto inval;
-		if (attr->alt_pkey_index >= hfi1_get_npkeys(dd))
-			goto inval;
 		sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr);
+		if (sc == 0xf)
+			return -EINVAL;
+
 		if (!qp_to_sdma_engine(qp, sc) &&
 		    dd->flags & HFI1_HAS_SEND_DMA)
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= hfi1_get_npkeys(dd))
-			goto inval;
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		if (attr->min_rnr_timer > 31)
-			goto inval;
+			return -EINVAL;
 
-	if (attr_mask & IB_QP_PORT)
-		if (qp->ibqp.qp_type == IB_QPT_SMI ||
-		    qp->ibqp.qp_type == IB_QPT_GSI ||
-		    attr->port_num == 0 ||
-		    attr->port_num > ibqp->device->phys_port_cnt)
-			goto inval;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		if (attr->dest_qp_num > HFI1_QPN_MASK)
-			goto inval;
+		if (!qp_to_send_context(qp, sc))
+			return -EINVAL;
+	}
 
-	if (attr_mask & IB_QP_RETRY_CNT)
-		if (attr->retry_cnt > 7)
-			goto inval;
+	return 0;
+}
 
-	if (attr_mask & IB_QP_RNR_RETRY)
-		if (attr->rnr_retry > 7)
-			goto inval;
+void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+		    int attr_mask, struct ib_udata *udata)
+{
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	/*
-	 * Don't allow invalid path_mtu values.  OK to set greater
-	 * than the active mtu (or even the max_cap, if we have tuned
-	 * that to a small mtu.  We'll set qp->path_mtu
-	 * to the lesser of requested attribute mtu and active,
-	 * for packetizing messages.
-	 * Note that the QP port has to be set in INIT and MTU in RTR.
-	 */
-	if (attr_mask & IB_QP_PATH_MTU) {
-		int mtu, pidx = qp->port_num - 1;
-
-		dd = dd_from_dev(dev);
-		mtu = verbs_mtu_enum_to_int(ibqp->device, attr->path_mtu);
-		if (mtu == -1)
-			goto inval;
-
-		if (mtu > dd->pport[pidx].ibmtu)
-			pmtu = mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
-		else
-			pmtu = attr->path_mtu;
+	if (attr_mask & IB_QP_AV) {
+		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
+		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
 	}
 
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		if (attr->path_mig_state == IB_MIG_REARM) {
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				goto inval;
-			if (new_state != IB_QPS_RTS)
-				goto inval;
-		} else if (attr->path_mig_state == IB_MIG_MIGRATED) {
-			if (qp->s_mig_state == IB_MIG_REARM)
-				goto inval;
-			if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
-				goto inval;
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				mig = 1;
-		} else
-			goto inval;
+	if (attr_mask & IB_QP_PATH_MIG_STATE &&
+	    attr->path_mig_state == IB_MIG_MIGRATED &&
+	    qp->s_mig_state == IB_MIG_ARMED) {
+		qp->s_flags |= RVT_S_AHG_CLEAR;
+		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
+		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
 	}
+}
 
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		if (attr->max_dest_rd_atomic > HFI1_MAX_RDMA_ATOMIC)
-			goto inval;
-
-	switch (new_state) {
-	case IB_QPS_RESET:
-		if (qp->state != IB_QPS_RESET) {
-			qp->state = IB_QPS_RESET;
-			flush_iowait(qp);
-			qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
-			spin_unlock(&qp->s_lock);
-			spin_unlock_irq(&qp->r_lock);
-			/* Stop the sending work queue and retry timer */
-			cancel_work_sync(&qp->s_iowait.iowork);
-			del_timer_sync(&qp->s_timer);
-			iowait_sdma_drain(&qp->s_iowait);
-			flush_tx_list(qp);
-			remove_qp(dev, qp);
-			wait_event(qp->wait, !atomic_read(&qp->refcount));
-			spin_lock_irq(&qp->r_lock);
-			spin_lock(&qp->s_lock);
-			clear_mr_refs(qp, 1);
-			clear_ahg(qp);
-			reset_qp(qp, ibqp->qp_type);
-		}
-		break;
-
-	case IB_QPS_RTR:
-		/* Allow event to re-trigger if QP set to RTR more than once */
-		qp->r_flags &= ~HFI1_R_COMM_EST;
-		qp->state = new_state;
-		break;
-
-	case IB_QPS_SQD:
-		qp->s_draining = qp->s_last != qp->s_cur;
-		qp->state = new_state;
-		break;
+/**
+ * hfi1_check_send_wqe - validate wqe
+ * @qp - The qp
+ * @wqe - The built wqe
+ *
+ * validate wqe.  This is called
+ * prior to inserting the wqe into
+ * the ring but after the wqe has been
+ * setup.
+ *
+ * Returns 0 on success, -EINVAL on failure
+ *
+ */
+int hfi1_check_send_wqe(struct rvt_qp *qp,
+			struct rvt_swqe *wqe)
+{
+	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	struct rvt_ah *ah;
 
-	case IB_QPS_SQE:
-		if (qp->ibqp.qp_type == IB_QPT_RC)
-			goto inval;
-		qp->state = new_state;
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+		if (wqe->length > 0x80000000U)
+			return -EINVAL;
 		break;
-
-	case IB_QPS_ERR:
-		lastwqe = hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+	case IB_QPT_SMI:
+		ah = ibah_to_rvtah(wqe->ud_wr.ah);
+		if (wqe->length > (1 << ah->log_pmtu))
+			return -EINVAL;
 		break;
-
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		ah = ibah_to_rvtah(wqe->ud_wr.ah);
+		if (wqe->length > (1 << ah->log_pmtu))
+			return -EINVAL;
+		if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
+			return -EINVAL;
 	default:
-		qp->state = new_state;
 		break;
 	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		qp->s_pkey_index = attr->pkey_index;
-
-	if (attr_mask & IB_QP_PORT)
-		qp->port_num = attr->port_num;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		qp->remote_qpn = attr->dest_qp_num;
-
-	if (attr_mask & IB_QP_SQ_PSN) {
-		qp->s_next_psn = attr->sq_psn & PSN_MODIFY_MASK;
-		qp->s_psn = qp->s_next_psn;
-		qp->s_sending_psn = qp->s_next_psn;
-		qp->s_last_psn = qp->s_next_psn - 1;
-		qp->s_sending_hpsn = qp->s_last_psn;
-	}
-
-	if (attr_mask & IB_QP_RQ_PSN)
-		qp->r_psn = attr->rq_psn & PSN_MODIFY_MASK;
-
-	if (attr_mask & IB_QP_ACCESS_FLAGS)
-		qp->qp_access_flags = attr->qp_access_flags;
-
-	if (attr_mask & IB_QP_AV) {
-		qp->remote_ah_attr = attr->ah_attr;
-		qp->s_srate = attr->ah_attr.static_rate;
-		qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
-		qp->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
-		qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
-	}
-
-	if (attr_mask & IB_QP_ALT_PATH) {
-		qp->alt_ah_attr = attr->alt_ah_attr;
-		qp->s_alt_pkey_index = attr->alt_pkey_index;
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		qp->s_mig_state = attr->path_mig_state;
-		if (mig) {
-			qp->remote_ah_attr = qp->alt_ah_attr;
-			qp->port_num = qp->alt_ah_attr.port_num;
-			qp->s_pkey_index = qp->s_alt_pkey_index;
-			qp->s_flags |= HFI1_S_AHG_CLEAR;
-			qp->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
-			qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
-		}
-	}
-
-	if (attr_mask & IB_QP_PATH_MTU) {
-		struct hfi1_ibport *ibp;
-		u8 sc, vl;
-		u32 mtu;
-
-		dd = dd_from_dev(dev);
-		ibp = &dd->pport[qp->port_num - 1].ibport_data;
-
-		sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
-		vl = sc_to_vlt(dd, sc);
-
-		mtu = verbs_mtu_enum_to_int(ibqp->device, pmtu);
-		if (vl < PER_VL_SEND_CONTEXTS)
-			mtu = min_t(u32, mtu, dd->vld[vl].mtu);
-		pmtu = mtu_to_enum(mtu, OPA_MTU_8192);
-
-		qp->path_mtu = pmtu;
-		qp->pmtu = mtu;
-	}
-
-	if (attr_mask & IB_QP_RETRY_CNT) {
-		qp->s_retry_cnt = attr->retry_cnt;
-		qp->s_retry = attr->retry_cnt;
-	}
-
-	if (attr_mask & IB_QP_RNR_RETRY) {
-		qp->s_rnr_retry_cnt = attr->rnr_retry;
-		qp->s_rnr_retry = attr->rnr_retry;
-	}
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		qp->r_min_rnr_timer = attr->min_rnr_timer;
-
-	if (attr_mask & IB_QP_TIMEOUT) {
-		qp->timeout = attr->timeout;
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-	}
-
-	if (attr_mask & IB_QP_QKEY)
-		qp->qkey = attr->qkey;
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-		qp->s_max_rd_atomic = attr->max_rd_atomic;
-
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-
-	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
-		insert_qp(dev, qp);
-
-	if (lastwqe) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	if (mig) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_PATH_MIG;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	ret = 0;
-	goto bail;
-
-inval:
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-	ret = -EINVAL;
-
-bail:
-	return ret;
-}
-
-int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_qp_init_attr *init_attr)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-
-	attr->qp_state = qp->state;
-	attr->cur_qp_state = attr->qp_state;
-	attr->path_mtu = qp->path_mtu;
-	attr->path_mig_state = qp->s_mig_state;
-	attr->qkey = qp->qkey;
-	attr->rq_psn = mask_psn(qp->r_psn);
-	attr->sq_psn = mask_psn(qp->s_next_psn);
-	attr->dest_qp_num = qp->remote_qpn;
-	attr->qp_access_flags = qp->qp_access_flags;
-	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
-	attr->cap.max_send_sge = qp->s_max_sge;
-	attr->cap.max_recv_sge = qp->r_rq.max_sge;
-	attr->cap.max_inline_data = 0;
-	attr->ah_attr = qp->remote_ah_attr;
-	attr->alt_ah_attr = qp->alt_ah_attr;
-	attr->pkey_index = qp->s_pkey_index;
-	attr->alt_pkey_index = qp->s_alt_pkey_index;
-	attr->en_sqd_async_notify = 0;
-	attr->sq_draining = qp->s_draining;
-	attr->max_rd_atomic = qp->s_max_rd_atomic;
-	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
-	attr->min_rnr_timer = qp->r_min_rnr_timer;
-	attr->port_num = qp->port_num;
-	attr->timeout = qp->timeout;
-	attr->retry_cnt = qp->s_retry_cnt;
-	attr->rnr_retry = qp->s_rnr_retry_cnt;
-	attr->alt_port_num = qp->alt_ah_attr.port_num;
-	attr->alt_timeout = qp->alt_timeout;
-
-	init_attr->event_handler = qp->ibqp.event_handler;
-	init_attr->qp_context = qp->ibqp.qp_context;
-	init_attr->send_cq = qp->ibqp.send_cq;
-	init_attr->recv_cq = qp->ibqp.recv_cq;
-	init_attr->srq = qp->ibqp.srq;
-	init_attr->cap = attr->cap;
-	if (qp->s_flags & HFI1_S_SIGNAL_REQ_WR)
-		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-	else
-		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-	init_attr->qp_type = qp->ibqp.qp_type;
-	init_attr->port_num = qp->port_num;
-	return 0;
+	return wqe->length <= piothreshold;
 }
 
 /**
@@ -961,7 +282,7 @@ int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
  *
  * Returns the AETH.
  */
-__be32 hfi1_compute_aeth(struct hfi1_qp *qp)
+__be32 hfi1_compute_aeth(struct rvt_qp *qp)
 {
 	u32 aeth = qp->r_msn & HFI1_MSN_MASK;
 
@@ -974,7 +295,7 @@ __be32 hfi1_compute_aeth(struct hfi1_qp *qp)
 	} else {
 		u32 min, max, x;
 		u32 credits;
-		struct hfi1_rwq *wq = qp->r_rq.wq;
+		struct rvt_rwq *wq = qp->r_rq.wq;
 		u32 head;
 		u32 tail;
 
@@ -1004,12 +325,13 @@ __be32 hfi1_compute_aeth(struct hfi1_qp *qp)
 			x = (min + max) / 2;
 			if (credit_table[x] == credits)
 				break;
-			if (credit_table[x] > credits)
+			if (credit_table[x] > credits) {
 				max = x;
-			else if (min == x)
-				break;
-			else
+			} else {
+				if (min == x)
+					break;
 				min = x;
+			}
 		}
 		aeth |= x << HFI1_AETH_CREDIT_SHIFT;
 	}
@@ -1017,348 +339,58 @@ __be32 hfi1_compute_aeth(struct hfi1_qp *qp)
 }
 
 /**
- * hfi1_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
-			     struct ib_qp_init_attr *init_attr,
-			     struct ib_udata *udata)
-{
-	struct hfi1_qp *qp;
-	int err;
-	struct hfi1_swqe *swq = NULL;
-	struct hfi1_ibdev *dev;
-	struct hfi1_devdata *dd;
-	size_t sz;
-	size_t sg_list_sz;
-	struct ib_qp *ret;
-
-	if (init_attr->cap.max_send_sge > hfi1_max_sges ||
-	    init_attr->cap.max_send_wr > hfi1_max_qp_wrs ||
-	    init_attr->create_flags) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	/* Check receive queue parameters if no SRQ is specified. */
-	if (!init_attr->srq) {
-		if (init_attr->cap.max_recv_sge > hfi1_max_sges ||
-		    init_attr->cap.max_recv_wr > hfi1_max_qp_wrs) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		if (init_attr->cap.max_send_sge +
-		    init_attr->cap.max_send_wr +
-		    init_attr->cap.max_recv_sge +
-		    init_attr->cap.max_recv_wr == 0) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-	}
-
-	switch (init_attr->qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		if (init_attr->port_num == 0 ||
-		    init_attr->port_num > ibpd->device->phys_port_cnt) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-	case IB_QPT_UC:
-	case IB_QPT_RC:
-	case IB_QPT_UD:
-		sz = sizeof(struct hfi1_sge) *
-			init_attr->cap.max_send_sge +
-			sizeof(struct hfi1_swqe);
-		swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
-		if (swq == NULL) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail;
-		}
-		sz = sizeof(*qp);
-		sg_list_sz = 0;
-		if (init_attr->srq) {
-			struct hfi1_srq *srq = to_isrq(init_attr->srq);
-
-			if (srq->rq.max_sge > 1)
-				sg_list_sz = sizeof(*qp->r_sg_list) *
-					(srq->rq.max_sge - 1);
-		} else if (init_attr->cap.max_recv_sge > 1)
-			sg_list_sz = sizeof(*qp->r_sg_list) *
-				(init_attr->cap.max_recv_sge - 1);
-		qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
-		if (!qp) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_swq;
-		}
-		RCU_INIT_POINTER(qp->next, NULL);
-		qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
-		if (!qp->s_hdr) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_qp;
-		}
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-		if (init_attr->srq)
-			sz = 0;
-		else {
-			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
-			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
-				sizeof(struct hfi1_rwqe);
-			qp->r_rq.wq = vmalloc_user(sizeof(struct hfi1_rwq) +
-						   qp->r_rq.size * sz);
-			if (!qp->r_rq.wq) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_qp;
-			}
-		}
-
-		/*
-		 * ib_create_qp() will initialize qp->ibqp
-		 * except for qp->ibqp.qp_num.
-		 */
-		spin_lock_init(&qp->r_lock);
-		spin_lock_init(&qp->s_lock);
-		spin_lock_init(&qp->r_rq.lock);
-		atomic_set(&qp->refcount, 0);
-		init_waitqueue_head(&qp->wait);
-		init_timer(&qp->s_timer);
-		qp->s_timer.data = (unsigned long)qp;
-		INIT_LIST_HEAD(&qp->rspwait);
-		qp->state = IB_QPS_RESET;
-		qp->s_wq = swq;
-		qp->s_size = init_attr->cap.max_send_wr + 1;
-		qp->s_max_sge = init_attr->cap.max_send_sge;
-		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-			qp->s_flags = HFI1_S_SIGNAL_REQ_WR;
-		dev = to_idev(ibpd->device);
-		dd = dd_from_dev(dev);
-		err = alloc_qpn(dd, &dev->qp_dev->qpn_table, init_attr->qp_type,
-				init_attr->port_num);
-		if (err < 0) {
-			ret = ERR_PTR(err);
-			vfree(qp->r_rq.wq);
-			goto bail_qp;
-		}
-		qp->ibqp.qp_num = err;
-		qp->port_num = init_attr->port_num;
-		reset_qp(qp, init_attr->qp_type);
-
-		break;
-
-	default:
-		/* Don't support raw QPs */
-		ret = ERR_PTR(-ENOSYS);
-		goto bail;
-	}
-
-	init_attr->cap.max_inline_data = 0;
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See hfi1_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		if (!qp->r_rq.wq) {
-			__u64 offset = 0;
-
-			err = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		} else {
-			u32 s = sizeof(struct hfi1_rwq) + qp->r_rq.size * sz;
-
-			qp->ip = hfi1_create_mmap_info(dev, s,
-						      ibpd->uobject->context,
-						      qp->r_rq.wq);
-			if (!qp->ip) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_ip;
-			}
-
-			err = ib_copy_to_udata(udata, &(qp->ip->offset),
-					       sizeof(qp->ip->offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		}
-	}
-
-	spin_lock(&dev->n_qps_lock);
-	if (dev->n_qps_allocated == hfi1_max_qps) {
-		spin_unlock(&dev->n_qps_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_qps_allocated++;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &qp->ibqp;
-
-	/*
-	 * We have our QP and its good, now keep track of what types of opcodes
-	 * can be processed on this QP. We do this by keeping track of what the
-	 * 3 high order bits of the opcode are.
-	 */
-	switch (init_attr->qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-	case IB_QPT_UD:
-		qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & OPCODE_QP_MASK;
-		break;
-	case IB_QPT_RC:
-		qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & OPCODE_QP_MASK;
-		break;
-	case IB_QPT_UC:
-		qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & OPCODE_QP_MASK;
-		break;
-	default:
-		ret = ERR_PTR(-EINVAL);
-		goto bail_ip;
-	}
-
-	goto bail;
-
-bail_ip:
-	if (qp->ip)
-		kref_put(&qp->ip->ref, hfi1_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num);
-bail_qp:
-	kfree(qp->s_hdr);
-	kfree(qp);
-bail_swq:
-	vfree(swq);
-bail:
-	return ret;
-}
-
-/**
- * hfi1_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
+ * _hfi1_schedule_send - schedule progress
+ * @qp: the QP
  *
- * Returns 0 on success.
+ * This schedules qp progress w/o regard to the s_flags.
  *
- * Note that this can be called while the QP is actively sending or
- * receiving!
+ * It is only used in the post send, which doesn't hold
+ * the s_lock.
  */
-int hfi1_destroy_qp(struct ib_qp *ibqp)
+void _hfi1_schedule_send(struct rvt_qp *qp)
 {
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-
-	/* Make sure HW and driver activity is stopped. */
-	spin_lock_irq(&qp->r_lock);
-	spin_lock(&qp->s_lock);
-	if (qp->state != IB_QPS_RESET) {
-		qp->state = IB_QPS_RESET;
-		flush_iowait(qp);
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_ANY_WAIT);
-		spin_unlock(&qp->s_lock);
-		spin_unlock_irq(&qp->r_lock);
-		cancel_work_sync(&qp->s_iowait.iowork);
-		del_timer_sync(&qp->s_timer);
-		iowait_sdma_drain(&qp->s_iowait);
-		flush_tx_list(qp);
-		remove_qp(dev, qp);
-		wait_event(qp->wait, !atomic_read(&qp->refcount));
-		spin_lock_irq(&qp->r_lock);
-		spin_lock(&qp->s_lock);
-		clear_mr_refs(qp, 1);
-		clear_ahg(qp);
-	}
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-
-	/* all user's cleaned up, mark it available */
-	free_qpn(&dev->qp_dev->qpn_table, qp->ibqp.qp_num);
-	spin_lock(&dev->n_qps_lock);
-	dev->n_qps_allocated--;
-	spin_unlock(&dev->n_qps_lock);
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct hfi1_ibport *ibp =
+		to_iport(qp->ibqp.device, qp->port_num);
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 
-	if (qp->ip)
-		kref_put(&qp->ip->ref, hfi1_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	vfree(qp->s_wq);
-	kfree(qp->s_hdr);
-	kfree(qp);
-	return 0;
+	iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
+			priv->s_sde ?
+			priv->s_sde->cpu :
+			cpumask_first(cpumask_of_node(dd->node)));
 }
 
-/**
- * init_qpn_table - initialize the QP number table for a device
- * @qpt: the QPN table
- */
-static int init_qpn_table(struct hfi1_devdata *dd, struct hfi1_qpn_table *qpt)
+static void qp_pio_drain(struct rvt_qp *qp)
 {
-	u32 offset, qpn, i;
-	struct qpn_map *map;
-	int ret = 0;
+	struct hfi1_ibdev *dev;
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	spin_lock_init(&qpt->lock);
-
-	qpt->last = 0;
-	qpt->incr = 1 << dd->qos_shift;
-
-	/* insure we don't assign QPs from KDETH 64K window */
-	qpn = kdeth_qp << 16;
-	qpt->nmaps = qpn / BITS_PER_PAGE;
-	/* This should always be zero */
-	offset = qpn & BITS_PER_PAGE_MASK;
-	map = &qpt->map[qpt->nmaps];
-	dd_dev_info(dd, "Reserving QPNs for KDETH window from 0x%x to 0x%x\n",
-		qpn, qpn + 65535);
-	for (i = 0; i < 65536; i++) {
-		if (!map->page) {
-			get_map_page(qpt, map);
-			if (!map->page) {
-				ret = -ENOMEM;
-				break;
-			}
-		}
-		set_bit(offset, map->page);
-		offset++;
-		if (offset == BITS_PER_PAGE) {
-			/* next page */
-			qpt->nmaps++;
-			map++;
-			offset = 0;
-		}
+	if (!priv->s_sendcontext)
+		return;
+	dev = to_idev(qp->ibqp.device);
+	while (iowait_pio_pending(&priv->s_iowait)) {
+		write_seqlock_irq(&dev->iowait_lock);
+		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
+		write_sequnlock_irq(&dev->iowait_lock);
+		iowait_pio_drain(&priv->s_iowait);
+		write_seqlock_irq(&dev->iowait_lock);
+		hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
+		write_sequnlock_irq(&dev->iowait_lock);
 	}
-	return ret;
 }
 
 /**
- * free_qpn_table - free the QP number table for a device
- * @qpt: the QPN table
+ * hfi1_schedule_send - schedule progress
+ * @qp: the QP
+ *
+ * This schedules qp progress and caller should hold
+ * the s_lock.
  */
-static void free_qpn_table(struct hfi1_qpn_table *qpt)
+void hfi1_schedule_send(struct rvt_qp *qp)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
-		free_page((unsigned long) qpt->map[i].page);
+	if (hfi1_send_ok(qp))
+		_hfi1_schedule_send(qp);
 }
 
 /**
@@ -1368,7 +400,7 @@ static void free_qpn_table(struct hfi1_qpn_table *qpt)
  *
  * The QP s_lock should be held.
  */
-void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth)
+void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
 {
 	u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
 
@@ -1378,27 +410,27 @@ void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth)
 	 * honor the credit field.
 	 */
 	if (credit == HFI1_AETH_CREDIT_INVAL) {
-		if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) {
-			qp->s_flags |= HFI1_S_UNLIMITED_CREDIT;
-			if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT;
+		if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+			qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				hfi1_schedule_send(qp);
 			}
 		}
-	} else if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT)) {
+	} else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
 		/* Compute new LSN (i.e., MSN + credit) */
 		credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK;
 		if (cmp_msn(credit, qp->s_lsn) > 0) {
 			qp->s_lsn = credit;
-			if (qp->s_flags & HFI1_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~HFI1_S_WAIT_SSN_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				hfi1_schedule_send(qp);
 			}
 		}
 	}
 }
 
-void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag)
+void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
 {
 	unsigned long flags;
 
@@ -1421,16 +453,17 @@ static int iowait_sleep(
 	unsigned seq)
 {
 	struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
+	struct hfi1_qp_priv *priv;
 	unsigned long flags;
 	int ret = 0;
 	struct hfi1_ibdev *dev;
 
 	qp = tx->qp;
+	priv = qp->priv;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
-
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		/*
 		 * If we couldn't queue the DMA request, save the info
 		 * and try again later rather than destroying the
@@ -1442,18 +475,18 @@ static int iowait_sleep(
 		write_seqlock(&dev->iowait_lock);
 		if (sdma_progress(sde, seq, stx))
 			goto eagain;
-		if (list_empty(&qp->s_iowait.list)) {
+		if (list_empty(&priv->s_iowait.list)) {
 			struct hfi1_ibport *ibp =
 				to_iport(qp->ibqp.device, qp->port_num);
 
-			ibp->n_dmawait++;
-			qp->s_flags |= HFI1_S_WAIT_DMA_DESC;
-			list_add_tail(&qp->s_iowait.list, &sde->dmawait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_DMA_DESC);
+			ibp->rvp.n_dmawait++;
+			qp->s_flags |= RVT_S_WAIT_DMA_DESC;
+			list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
 			atomic_inc(&qp->refcount);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~HFI1_S_BUSY;
+		qp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		ret = -EBUSY;
 	} else {
@@ -1470,61 +503,25 @@ eagain:
 
 static void iowait_wakeup(struct iowait *wait, int reason)
 {
-	struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait);
+	struct rvt_qp *qp = iowait_to_qp(wait);
 
 	WARN_ON(reason != SDMA_AVAIL_REASON);
-	hfi1_qp_wakeup(qp, HFI1_S_WAIT_DMA_DESC);
+	hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC);
 }
 
-int hfi1_qp_init(struct hfi1_ibdev *dev)
+static void iowait_sdma_drained(struct iowait *wait)
 {
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	int i;
-	int ret = -ENOMEM;
-
-	/* allocate parent object */
-	dev->qp_dev = kzalloc(sizeof(*dev->qp_dev), GFP_KERNEL);
-	if (!dev->qp_dev)
-		goto nomem;
-	/* allocate hash table */
-	dev->qp_dev->qp_table_size = hfi1_qp_table_size;
-	dev->qp_dev->qp_table_bits = ilog2(hfi1_qp_table_size);
-	dev->qp_dev->qp_table =
-		kmalloc(dev->qp_dev->qp_table_size *
-				sizeof(*dev->qp_dev->qp_table),
-			GFP_KERNEL);
-	if (!dev->qp_dev->qp_table)
-		goto nomem;
-	for (i = 0; i < dev->qp_dev->qp_table_size; i++)
-		RCU_INIT_POINTER(dev->qp_dev->qp_table[i], NULL);
-	spin_lock_init(&dev->qp_dev->qpt_lock);
-	/* initialize qpn map */
-	ret = init_qpn_table(dd, &dev->qp_dev->qpn_table);
-	if (ret)
-		goto nomem;
-	return ret;
-nomem:
-	if (dev->qp_dev) {
-		kfree(dev->qp_dev->qp_table);
-		free_qpn_table(&dev->qp_dev->qpn_table);
-		kfree(dev->qp_dev);
-	}
-	return ret;
-}
+	struct rvt_qp *qp = iowait_to_qp(wait);
 
-void hfi1_qp_exit(struct hfi1_ibdev *dev)
-{
-	struct hfi1_devdata *dd = dd_from_dev(dev);
-	u32 qps_inuse;
-
-	qps_inuse = free_all_qps(dd);
-	if (qps_inuse)
-		dd_dev_err(dd, "QP memory leak! %u still in use\n",
-			   qps_inuse);
-	if (dev->qp_dev) {
-		kfree(dev->qp_dev->qp_table);
-		free_qpn_table(&dev->qp_dev->qpn_table);
-		kfree(dev->qp_dev);
+	/*
+	 * This happens when the send engine notes
+	 * a QP in the error state and cannot
+	 * do the flush work until that QP's
+	 * sdma work has finished.
+	 */
+	if (qp->s_flags & RVT_S_WAIT_DMA) {
+		qp->s_flags &= ~RVT_S_WAIT_DMA;
+		hfi1_schedule_send(qp);
 	}
 }
 
@@ -1537,7 +534,7 @@ void hfi1_qp_exit(struct hfi1_ibdev *dev)
  * Return:
  * A send engine for the qp or NULL for SMI type qp.
  */
-struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5)
+struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	struct sdma_engine *sde;
@@ -1554,9 +551,33 @@ struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5)
 	return sde;
 }
 
+/*
+ * qp_to_send_context - map a qp to a send context
+ * @qp: the QP
+ * @sc5: the 5 bit sc
+ *
+ * Return:
+ * A send context for the qp
+ */
+struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_SMI:
+		/* SMA packets to VL15 */
+		return dd->vld[15].sc;
+	default:
+		break;
+	}
+
+	return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift,
+					  sc5);
+}
+
 struct qp_iter {
 	struct hfi1_ibdev *dev;
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
 	int specials;
 	int n;
 };
@@ -1570,7 +591,7 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
 		return NULL;
 
 	iter->dev = dev;
-	iter->specials = dev->ibdev.phys_port_cnt * 2;
+	iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
 	if (qp_iter_next(iter)) {
 		kfree(iter);
 		return NULL;
@@ -1584,8 +605,8 @@ int qp_iter_next(struct qp_iter *iter)
 	struct hfi1_ibdev *dev = iter->dev;
 	int n = iter->n;
 	int ret = 1;
-	struct hfi1_qp *pqp = iter->qp;
-	struct hfi1_qp *qp;
+	struct rvt_qp *pqp = iter->qp;
+	struct rvt_qp *qp;
 
 	/*
 	 * The approach is to consider the special qps
@@ -1597,11 +618,11 @@ int qp_iter_next(struct qp_iter *iter)
 	 *
 	 * n = 0..iter->specials is the special qp indices
 	 *
-	 * n = iter->specials..dev->qp_dev->qp_table_size+iter->specials are
+	 * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are
 	 * the potential hash bucket entries
 	 *
 	 */
-	for (; n <  dev->qp_dev->qp_table_size + iter->specials; n++) {
+	for (; n <  dev->rdi.qp_dev->qp_table_size + iter->specials; n++) {
 		if (pqp) {
 			qp = rcu_dereference(pqp->next);
 		} else {
@@ -1610,17 +631,17 @@ int qp_iter_next(struct qp_iter *iter)
 				struct hfi1_ibport *ibp;
 				int pidx;
 
-				pidx = n % dev->ibdev.phys_port_cnt;
+				pidx = n % dev->rdi.ibdev.phys_port_cnt;
 				ppd = &dd_from_dev(dev)->pport[pidx];
 				ibp = &ppd->ibport_data;
 
 				if (!(n & 1))
-					qp = rcu_dereference(ibp->qp[0]);
+					qp = rcu_dereference(ibp->rvp.qp[0]);
 				else
-					qp = rcu_dereference(ibp->qp[1]);
+					qp = rcu_dereference(ibp->rvp.qp[1]);
 			} else {
 				qp = rcu_dereference(
-					dev->qp_dev->qp_table[
+					dev->rdi.qp_dev->qp_table[
 						(n - iter->specials)]);
 			}
 		}
@@ -1638,7 +659,7 @@ static const char * const qp_type_str[] = {
 	"SMI", "GSI", "RC", "UC", "UD",
 };
 
-static int qp_idle(struct hfi1_qp *qp)
+static int qp_idle(struct rvt_qp *qp)
 {
 	return
 		qp->s_last == qp->s_acked &&
@@ -1649,14 +670,17 @@ static int qp_idle(struct hfi1_qp *qp)
 
 void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
 {
-	struct hfi1_swqe *wqe;
-	struct hfi1_qp *qp = iter->qp;
+	struct rvt_swqe *wqe;
+	struct rvt_qp *qp = iter->qp;
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct sdma_engine *sde;
+	struct send_context *send_context;
 
-	sde = qp_to_sdma_engine(qp, qp->s_sc);
-	wqe = get_swqe_ptr(qp, qp->s_last);
+	sde = qp_to_sdma_engine(qp, priv->s_sc);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_last);
+	send_context = qp_to_send_context(qp, priv->s_sc);
 	seq_printf(s,
-		   "N %d %s QP%u R %u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x SL %u MTU %d %u %u %u SDE %p,%u\n",
+		   "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
 		   iter->n,
 		   qp_idle(qp) ? "I" : "B",
 		   qp->ibqp.qp_num,
@@ -1666,8 +690,9 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
 		   wqe ? wqe->wr.opcode : 0,
 		   qp->s_hdrwords,
 		   qp->s_flags,
-		   atomic_read(&qp->s_iowait.sdma_busy),
-		   !list_empty(&qp->s_iowait.list),
+		   iowait_sdma_pending(&priv->s_iowait),
+		   iowait_pio_pending(&priv->s_iowait),
+		   !list_empty(&priv->s_iowait.list),
 		   qp->timeout,
 		   wqe ? wqe->ssn : 0,
 		   qp->s_lsn,
@@ -1676,20 +701,26 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
 		   qp->s_sending_psn, qp->s_sending_hpsn,
 		   qp->s_last, qp->s_acked, qp->s_cur,
 		   qp->s_tail, qp->s_head, qp->s_size,
+		   qp->s_avail,
 		   qp->remote_qpn,
 		   qp->remote_ah_attr.dlid,
 		   qp->remote_ah_attr.sl,
 		   qp->pmtu,
+		   qp->s_retry,
 		   qp->s_retry_cnt,
-		   qp->timeout,
 		   qp->s_rnr_retry_cnt,
 		   sde,
-		   sde ? sde->this_idx : 0);
+		   sde ? sde->this_idx : 0,
+		   send_context,
+		   send_context ? send_context->sw_index : 0,
+		   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head,
+		   ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail,
+		   qp->pid);
 }
 
-void qp_comm_est(struct hfi1_qp *qp)
+void qp_comm_est(struct rvt_qp *qp)
 {
-	qp->r_flags |= HFI1_R_COMM_EST;
+	qp->r_flags |= RVT_R_COMM_EST;
 	if (qp->ibqp.event_handler) {
 		struct ib_event ev;
 
@@ -1700,24 +731,241 @@ void qp_comm_est(struct hfi1_qp *qp)
 	}
 }
 
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		    gfp_t gfp)
+{
+	struct hfi1_qp_priv *priv;
+
+	priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+
+	priv->owner = qp;
+
+	priv->s_hdr = kzalloc_node(sizeof(*priv->s_hdr), gfp, rdi->dparms.node);
+	if (!priv->s_hdr) {
+		kfree(priv);
+		return ERR_PTR(-ENOMEM);
+	}
+	setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
+	qp->s_timer.function = hfi1_rc_timeout;
+	return priv;
+}
+
+void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	kfree(priv->s_hdr);
+	kfree(priv);
+}
+
+unsigned free_all_qps(struct rvt_dev_info *rdi)
+{
+	struct hfi1_ibdev *verbs_dev = container_of(rdi,
+						    struct hfi1_ibdev,
+						    rdi);
+	struct hfi1_devdata *dd = container_of(verbs_dev,
+					       struct hfi1_devdata,
+					       verbs_dev);
+	int n;
+	unsigned qp_inuse = 0;
+
+	for (n = 0; n < dd->num_pports; n++) {
+		struct hfi1_ibport *ibp = &dd->pport[n].ibport_data;
+
+		rcu_read_lock();
+		if (rcu_dereference(ibp->rvp.qp[0]))
+			qp_inuse++;
+		if (rcu_dereference(ibp->rvp.qp[1]))
+			qp_inuse++;
+		rcu_read_unlock();
+	}
+
+	return qp_inuse;
+}
+
+void flush_qp_waiters(struct rvt_qp *qp)
+{
+	flush_iowait(qp);
+	hfi1_stop_rc_timers(qp);
+}
+
+void stop_send_queue(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	cancel_work_sync(&priv->s_iowait.iowork);
+	hfi1_del_timers_sync(qp);
+}
+
+void quiesce_qp(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	iowait_sdma_drain(&priv->s_iowait);
+	qp_pio_drain(qp);
+	flush_tx_list(qp);
+}
+
+void notify_qp_reset(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	iowait_init(
+		&priv->s_iowait,
+		1,
+		_hfi1_do_send,
+		iowait_sleep,
+		iowait_wakeup,
+		iowait_sdma_drained);
+	priv->r_adefered = 0;
+	clear_ahg(qp);
+}
+
 /*
  * Switch to alternate path.
  * The QP s_lock should be held and interrupts disabled.
  */
-void hfi1_migrate_qp(struct hfi1_qp *qp)
+void hfi1_migrate_qp(struct rvt_qp *qp)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct ib_event ev;
 
 	qp->s_mig_state = IB_MIG_MIGRATED;
 	qp->remote_ah_attr = qp->alt_ah_attr;
 	qp->port_num = qp->alt_ah_attr.port_num;
 	qp->s_pkey_index = qp->s_alt_pkey_index;
-	qp->s_flags |= HFI1_S_AHG_CLEAR;
-	qp->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
-	qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
+	qp->s_flags |= RVT_S_AHG_CLEAR;
+	priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
+	priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 
 	ev.device = qp->ibqp.device;
 	ev.element.qp = &qp->ibqp;
 	ev.event = IB_EVENT_PATH_MIG;
 	qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
 }
+
+int mtu_to_path_mtu(u32 mtu)
+{
+	return mtu_to_enum(mtu, OPA_MTU_8192);
+}
+
+u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
+{
+	u32 mtu;
+	struct hfi1_ibdev *verbs_dev = container_of(rdi,
+						    struct hfi1_ibdev,
+						    rdi);
+	struct hfi1_devdata *dd = container_of(verbs_dev,
+					       struct hfi1_devdata,
+					       verbs_dev);
+	struct hfi1_ibport *ibp;
+	u8 sc, vl;
+
+	ibp = &dd->pport[qp->port_num - 1].ibport_data;
+	sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+	vl = sc_to_vlt(dd, sc);
+
+	mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
+	if (vl < PER_VL_SEND_CONTEXTS)
+		mtu = min_t(u32, mtu, dd->vld[vl].mtu);
+	return mtu;
+}
+
+int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		       struct ib_qp_attr *attr)
+{
+	int mtu, pidx = qp->port_num - 1;
+	struct hfi1_ibdev *verbs_dev = container_of(rdi,
+						    struct hfi1_ibdev,
+						    rdi);
+	struct hfi1_devdata *dd = container_of(verbs_dev,
+					       struct hfi1_devdata,
+					       verbs_dev);
+	mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu);
+	if (mtu == -1)
+		return -1; /* values less than 0 are error */
+
+	if (mtu > dd->pport[pidx].ibmtu)
+		return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048);
+	else
+		return attr->path_mtu;
+}
+
+void notify_error_qp(struct rvt_qp *qp)
+{
+	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	write_seqlock(&dev->iowait_lock);
+	if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
+		qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+		list_del_init(&priv->s_iowait.list);
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	}
+	write_sequnlock(&dev->iowait_lock);
+
+	if (!(qp->s_flags & RVT_S_BUSY)) {
+		qp->s_hdrwords = 0;
+		if (qp->s_rdma_mr) {
+			rvt_put_mr(qp->s_rdma_mr);
+			qp->s_rdma_mr = NULL;
+		}
+		flush_tx_list(qp);
+	}
+}
+
+/**
+ * hfi1_error_port_qps - put a port's RC/UC qps into error state
+ * @ibp: the ibport.
+ * @sl: the service level.
+ *
+ * This function places all RC/UC qps with a given service level into error
+ * state. It is generally called to force upper lay apps to abandon stale qps
+ * after an sl->sc mapping change.
+ */
+void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
+{
+	struct rvt_qp *qp = NULL;
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+	struct hfi1_ibdev *dev = &ppd->dd->verbs_dev;
+	int n;
+	int lastwqe;
+	struct ib_event ev;
+
+	rcu_read_lock();
+
+	/* Deal only with RC/UC qps that use the given SL. */
+	for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) {
+		for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp;
+			qp = rcu_dereference(qp->next)) {
+			if (qp->port_num == ppd->port &&
+			    (qp->ibqp.qp_type == IB_QPT_UC ||
+			     qp->ibqp.qp_type == IB_QPT_RC) &&
+			    qp->remote_ah_attr.sl == sl &&
+			    (ib_rvt_state_ops[qp->state] &
+			     RVT_POST_SEND_OK)) {
+				spin_lock_irq(&qp->r_lock);
+				spin_lock(&qp->s_hlock);
+				spin_lock(&qp->s_lock);
+				lastwqe = rvt_error_qp(qp,
+						       IB_WC_WR_FLUSH_ERR);
+				spin_unlock(&qp->s_lock);
+				spin_unlock(&qp->s_hlock);
+				spin_unlock_irq(&qp->r_lock);
+				if (lastwqe) {
+					ev.device = qp->ibqp.device;
+					ev.element.qp = &qp->ibqp;
+					ev.event =
+						IB_EVENT_QP_LAST_WQE_REACHED;
+					qp->ibqp.event_handler(&ev,
+						qp->ibqp.qp_context);
+				}
+			}
+		}
+	}
+
+	rcu_read_unlock();
+}
diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h
index 62a94c5d7dca..e7bc8d6cf681 100644
--- a/drivers/staging/rdma/hfi1/qp.h
+++ b/drivers/staging/rdma/hfi1/qp.h
@@ -1,14 +1,13 @@
 #ifndef _QP_H
 #define _QP_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -51,119 +48,33 @@
  */
 
 #include <linux/hash.h>
+#include <rdma/rdmavt_qp.h>
 #include "verbs.h"
 #include "sdma.h"
 
-#define QPN_MAX                 (1 << 24)
-#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+extern unsigned int hfi1_qp_table_size;
 
 /*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
-	void *page;
-};
-
-struct hfi1_qpn_table {
-	spinlock_t lock; /* protect changes in this struct */
-	unsigned flags;         /* flags for QP0/1 allocated for each port */
-	u32 last;               /* last QP number allocated */
-	u32 nmaps;              /* size of the map table */
-	u16 limit;
-	u8  incr;
-	/* bit map of free QP numbers other than 0/1 */
-	struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-struct hfi1_qp_ibdev {
-	u32 qp_table_size;
-	u32 qp_table_bits;
-	struct hfi1_qp __rcu **qp_table;
-	spinlock_t qpt_lock;
-	struct hfi1_qpn_table qpn_table;
-};
-
-static inline u32 qpn_hash(struct hfi1_qp_ibdev *dev, u32 qpn)
-{
-	return hash_32(qpn, dev->qp_table_bits);
-}
-
-/**
- * hfi1_lookup_qpn - return the QP with the given QPN
- * @ibp: the ibport
- * @qpn: the QP number to look up
- *
- * The caller must hold the rcu_read_lock(), and keep the lock until
- * the returned qp is no longer in use.
+ * free_ahg - clear ahg from QP
  */
-static inline struct hfi1_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp,
-				u32 qpn) __must_hold(RCU)
+static inline void clear_ahg(struct rvt_qp *qp)
 {
-	struct hfi1_qp *qp = NULL;
-
-	if (unlikely(qpn <= 1)) {
-		qp = rcu_dereference(ibp->qp[qpn]);
-	} else {
-		struct hfi1_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
-		u32 n = qpn_hash(dev->qp_dev, qpn);
-
-		for (qp = rcu_dereference(dev->qp_dev->qp_table[n]); qp;
-			qp = rcu_dereference(qp->next))
-			if (qp->ibqp.qp_num == qpn)
-				break;
-	}
-	return qp;
-}
+	struct hfi1_qp_priv *priv = qp->priv;
 
-/**
- * clear_ahg - reset ahg status in qp
- * @qp - qp pointer
- */
-static inline void clear_ahg(struct hfi1_qp *qp)
-{
-	qp->s_hdr->ahgcount = 0;
-	qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR);
-	if (qp->s_sde && qp->s_ahgidx >= 0)
-		sdma_ahg_free(qp->s_sde, qp->s_ahgidx);
+	priv->s_hdr->ahgcount = 0;
+	qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR);
+	if (priv->s_sde && qp->s_ahgidx >= 0)
+		sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
 	qp->s_ahgidx = -1;
 }
 
 /**
- * hfi1_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int hfi1_error_qp(struct hfi1_qp *qp, enum ib_wc_status err);
-
-/**
- * hfi1_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		   int attr_mask, struct ib_udata *udata);
-
-int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_qp_init_attr *init_attr);
-
-/**
  * hfi1_compute_aeth - compute the AETH (syndrome + MSN)
  * @qp: the queue pair to compute the AETH for
  *
  * Returns the AETH.
  */
-__be32 hfi1_compute_aeth(struct hfi1_qp *qp);
+__be32 hfi1_compute_aeth(struct rvt_qp *qp);
 
 /**
  * hfi1_create_qp - create a queue pair for a device
@@ -179,45 +90,23 @@ struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
 			     struct ib_qp_init_attr *init_attr,
 			     struct ib_udata *udata);
 /**
- * hfi1_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int hfi1_destroy_qp(struct ib_qp *ibqp);
-
-/**
  * hfi1_get_credit - flush the send work queue of a QP
  * @qp: the qp who's send work queue to flush
  * @aeth: the Acknowledge Extended Transport Header
  *
  * The QP s_lock should be held.
  */
-void hfi1_get_credit(struct hfi1_qp *qp, u32 aeth);
-
-/**
- * hfi1_qp_init - allocate QP tables
- * @dev: a pointer to the hfi1_ibdev
- */
-int hfi1_qp_init(struct hfi1_ibdev *dev);
-
-/**
- * hfi1_qp_exit - free the QP related structures
- * @dev: a pointer to the hfi1_ibdev
- */
-void hfi1_qp_exit(struct hfi1_ibdev *dev);
+void hfi1_get_credit(struct rvt_qp *qp, u32 aeth);
 
 /**
  * hfi1_qp_wakeup - wake up on the indicated event
  * @qp: the QP
  * @flag: flag the qp on which the qp is stalled
  */
-void hfi1_qp_wakeup(struct hfi1_qp *qp, u32 flag);
+void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag);
 
-struct sdma_engine *qp_to_sdma_engine(struct hfi1_qp *qp, u8 sc5);
+struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5);
+struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
 
 struct qp_iter;
 
@@ -244,43 +133,28 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter);
  * qp_comm_est - handle trap with QP established
  * @qp: the QP
  */
-void qp_comm_est(struct hfi1_qp *qp);
+void qp_comm_est(struct rvt_qp *qp);
 
-/**
- * _hfi1_schedule_send - schedule progress
- * @qp: the QP
- *
- * This schedules qp progress w/o regard to the s_flags.
- *
- * It is only used in the post send, which doesn't hold
- * the s_lock.
- */
-static inline void _hfi1_schedule_send(struct hfi1_qp *qp)
-{
-	struct hfi1_ibport *ibp =
-		to_iport(qp->ibqp.device, qp->port_num);
-	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+void _hfi1_schedule_send(struct rvt_qp *qp);
+void hfi1_schedule_send(struct rvt_qp *qp);
 
-	iowait_schedule(&qp->s_iowait, ppd->hfi1_wq,
-			qp->s_sde ?
-			qp->s_sde->cpu :
-			cpumask_first(cpumask_of_node(dd->assigned_node_id)));
-}
-
-/**
- * hfi1_schedule_send - schedule progress
- * @qp: the QP
- *
- * This schedules qp progress and caller should hold
- * the s_lock.
- */
-static inline void hfi1_schedule_send(struct hfi1_qp *qp)
-{
-	if (hfi1_send_ok(qp))
-		_hfi1_schedule_send(qp);
-}
-
-void hfi1_migrate_qp(struct hfi1_qp *qp);
+void hfi1_migrate_qp(struct rvt_qp *qp);
 
+/*
+ * Functions provided by hfi1 driver for rdmavt to use
+ */
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		    gfp_t gfp);
+void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+unsigned free_all_qps(struct rvt_dev_info *rdi);
+void notify_qp_reset(struct rvt_qp *qp);
+int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+		       struct ib_qp_attr *attr);
+void flush_qp_waiters(struct rvt_qp *qp);
+void notify_error_qp(struct rvt_qp *qp);
+void stop_send_queue(struct rvt_qp *qp);
+void quiesce_qp(struct rvt_qp *qp);
+u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
+int mtu_to_path_mtu(u32 mtu);
+void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
 #endif /* _QP_H */
diff --git a/drivers/staging/rdma/hfi1/qsfp.c b/drivers/staging/rdma/hfi1/qsfp.c
index 6326a915d7fd..9ed1963010fe 100644
--- a/drivers/staging/rdma/hfi1/qsfp.c
+++ b/drivers/staging/rdma/hfi1/qsfp.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -62,7 +59,7 @@
 #define I2C_MAX_RETRY 4
 
 /*
- * Unlocked i2c write.  Must hold dd->qsfp_i2c_mutex.
+ * Raw i2c write.  No set-up or lock checking.
  */
 static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 		       int offset, void *bp, int len)
@@ -71,14 +68,6 @@ static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 	int ret, cnt;
 	u8 *buff = bp;
 
-	/* Make sure TWSI bus is in sane state. */
-	ret = hfi1_twsi_reset(dd, target);
-	if (ret) {
-		hfi1_dev_porterr(dd, ppd->port,
-				 "I2C interface Reset for write failed\n");
-		return -EIO;
-	}
-
 	cnt = 0;
 	while (cnt < len) {
 		int wlen = len - cnt;
@@ -99,48 +88,45 @@ static int __i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 	return cnt;
 }
 
+/*
+ * Caller must hold the i2c chain resource.
+ */
 int i2c_write(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
 	      void *bp, int len)
 {
-	struct hfi1_devdata *dd = ppd->dd;
 	int ret;
 
-	ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex);
-	if (!ret) {
-		ret = __i2c_write(ppd, target, i2c_addr, offset, bp, len);
-		mutex_unlock(&dd->qsfp_i2c_mutex);
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "I2C chain %d write interface reset failed\n",
+				 target);
+		return ret;
 	}
 
-	return ret;
+	return __i2c_write(ppd, target, i2c_addr, offset, bp, len);
 }
 
 /*
- * Unlocked i2c read.  Must hold dd->qsfp_i2c_mutex.
+ * Raw i2c read.  No set-up or lock checking.
  */
 static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 		      int offset, void *bp, int len)
 {
 	struct hfi1_devdata *dd = ppd->dd;
 	int ret, cnt, pass = 0;
-	int stuck = 0;
-	u8 *buff = bp;
-
-	/* Make sure TWSI bus is in sane state. */
-	ret = hfi1_twsi_reset(dd, target);
-	if (ret) {
-		hfi1_dev_porterr(dd, ppd->port,
-				 "I2C interface Reset for read failed\n");
-		ret = -EIO;
-		stuck = 1;
-		goto exit;
-	}
+	int orig_offset = offset;
 
 	cnt = 0;
 	while (cnt < len) {
 		int rlen = len - cnt;
 
 		ret = hfi1_twsi_blk_rd(dd, target, i2c_addr, offset,
-				       buff + cnt, rlen);
+				       bp + cnt, rlen);
 		/* Some QSFP's fail first try. Retry as experiment */
 		if (ret && cnt == 0 && ++pass < I2C_MAX_RETRY)
 			continue;
@@ -156,14 +142,11 @@ static int __i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr,
 	ret = cnt;
 
 exit:
-	if (stuck)
-		dd_dev_err(dd, "I2C interface bus stuck non-idle\n");
-
-	if (pass >= I2C_MAX_RETRY && ret)
+	if (ret < 0) {
 		hfi1_dev_porterr(dd, ppd->port,
-				 "I2C failed even retrying\n");
-	else if (pass)
-		hfi1_dev_porterr(dd, ppd->port, "I2C retries: %d\n", pass);
+				 "I2C chain %d read failed, addr 0x%x, offset 0x%x, len %d\n",
+				 target, i2c_addr, orig_offset, len);
+	}
 
 	/* Must wait min 20us between qsfp i2c transactions */
 	udelay(20);
@@ -171,21 +154,35 @@ exit:
 	return ret;
 }
 
+/*
+ * Caller must hold the i2c chain resource.
+ */
 int i2c_read(struct hfi1_pportdata *ppd, u32 target, int i2c_addr, int offset,
 	     void *bp, int len)
 {
-	struct hfi1_devdata *dd = ppd->dd;
 	int ret;
 
-	ret = mutex_lock_interruptible(&dd->qsfp_i2c_mutex);
-	if (!ret) {
-		ret = __i2c_read(ppd, target, i2c_addr, offset, bp, len);
-		mutex_unlock(&dd->qsfp_i2c_mutex);
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "I2C chain %d read interface reset failed\n",
+				 target);
+		return ret;
 	}
 
-	return ret;
+	return __i2c_read(ppd, target, i2c_addr, offset, bp, len);
 }
 
+/*
+ * Write page n, offset m of QSFP memory as defined by SFF 8636
+ * by writing @addr = ((256 * n) + m)
+ *
+ * Caller must hold the i2c chain resource.
+ */
 int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	       int len)
 {
@@ -195,50 +192,81 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	int ret;
 	u8 page;
 
-	ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex);
-	if (ret)
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "QSFP chain %d write interface reset failed\n",
+				 target);
 		return ret;
+	}
 
 	while (count < len) {
 		/*
-		 * Set the qsfp page based on a zero-based addresss
+		 * Set the qsfp page based on a zero-based address
 		 * and a page size of QSFP_PAGESIZE bytes.
 		 */
 		page = (u8)(addr / QSFP_PAGESIZE);
 
-		ret = __i2c_write(ppd, target, QSFP_DEV,
-					QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
+		ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				  QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
 		if (ret != 1) {
-			hfi1_dev_porterr(
-			ppd->dd,
-			ppd->port,
-			"can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret);
+			hfi1_dev_porterr(ppd->dd, ppd->port,
+					 "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
+					 target, ret);
 			ret = -EIO;
 			break;
 		}
 
-		/* truncate write to end of page if crossing page boundary */
 		offset = addr % QSFP_PAGESIZE;
 		nwrite = len - count;
-		if ((offset + nwrite) > QSFP_PAGESIZE)
-			nwrite = QSFP_PAGESIZE - offset;
+		/* truncate write to boundary if crossing boundary */
+		if (((addr % QSFP_RW_BOUNDARY) + nwrite) > QSFP_RW_BOUNDARY)
+			nwrite = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY);
 
-		ret = __i2c_write(ppd, target, QSFP_DEV, offset, bp + count,
-					nwrite);
-		if (ret <= 0)	/* stop on error or nothing read */
+		ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				  offset, bp + count, nwrite);
+		if (ret <= 0)	/* stop on error or nothing written */
 			break;
 
 		count += ret;
 		addr += ret;
 	}
 
-	mutex_unlock(&ppd->dd->qsfp_i2c_mutex);
-
 	if (ret < 0)
 		return ret;
 	return count;
 }
 
+/*
+ * Perform a stand-alone single QSFP write.  Acquire the resource, do the
+ * read, then release the resource.
+ */
+int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		   int len)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u32 resource = qsfp_resource(dd);
+	int ret;
+
+	ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
+	if (ret)
+		return ret;
+	ret = qsfp_write(ppd, target, addr, bp, len);
+	release_chip_resource(dd, resource);
+
+	return ret;
+}
+
+/*
+ * Access page n, offset m of QSFP memory as defined by SFF 8636
+ * by reading @addr = ((256 * n) + m)
+ *
+ * Caller must hold the i2c chain resource.
+ */
 int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	      int len)
 {
@@ -248,9 +276,17 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	int ret;
 	u8 page;
 
-	ret = mutex_lock_interruptible(&ppd->dd->qsfp_i2c_mutex);
-	if (ret)
+	if (!check_chip_resource(ppd->dd, qsfp_resource(ppd->dd), __func__))
+		return -EACCES;
+
+	/* make sure the TWSI bus is in a sane state */
+	ret = hfi1_twsi_reset(ppd->dd, target);
+	if (ret) {
+		hfi1_dev_porterr(ppd->dd, ppd->port,
+				 "QSFP chain %d read interface reset failed\n",
+				 target);
 		return ret;
+	}
 
 	while (count < len) {
 		/*
@@ -258,25 +294,26 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 		 * and a page size of QSFP_PAGESIZE bytes.
 		 */
 		page = (u8)(addr / QSFP_PAGESIZE);
-		ret = __i2c_write(ppd, target, QSFP_DEV,
-					QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
+		ret = __i2c_write(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				  QSFP_PAGE_SELECT_BYTE_OFFS, &page, 1);
 		if (ret != 1) {
-			hfi1_dev_porterr(
-			ppd->dd,
-			ppd->port,
-			"can't write QSFP_PAGE_SELECT_BYTE: %d\n", ret);
+			hfi1_dev_porterr(ppd->dd, ppd->port,
+					 "QSFP chain %d can't write QSFP_PAGE_SELECT_BYTE: %d\n",
+					 target, ret);
 			ret = -EIO;
 			break;
 		}
 
-		/* truncate read to end of page if crossing page boundary */
 		offset = addr % QSFP_PAGESIZE;
 		nread = len - count;
-		if ((offset + nread) > QSFP_PAGESIZE)
-			nread = QSFP_PAGESIZE - offset;
-
-		ret = __i2c_read(ppd, target, QSFP_DEV, offset, bp + count,
-					nread);
+		/* truncate read to boundary if crossing boundary */
+		if (((addr % QSFP_RW_BOUNDARY) + nread) > QSFP_RW_BOUNDARY)
+			nread = QSFP_RW_BOUNDARY - (addr % QSFP_RW_BOUNDARY);
+
+		/* QSFPs require a 5-10msec delay after write operations */
+		mdelay(5);
+		ret = __i2c_read(ppd, target, QSFP_DEV | QSFP_OFFSET_SIZE,
+				 offset, bp + count, nread);
 		if (ret <= 0)	/* stop on error or nothing read */
 			break;
 
@@ -284,17 +321,40 @@ int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 		addr += ret;
 	}
 
-	mutex_unlock(&ppd->dd->qsfp_i2c_mutex);
-
 	if (ret < 0)
 		return ret;
 	return count;
 }
 
 /*
+ * Perform a stand-alone single QSFP read.  Acquire the resource, do the
+ * read, then release the resource.
+ */
+int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		  int len)
+{
+	struct hfi1_devdata *dd = ppd->dd;
+	u32 resource = qsfp_resource(dd);
+	int ret;
+
+	ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
+	if (ret)
+		return ret;
+	ret = qsfp_read(ppd, target, addr, bp, len);
+	release_chip_resource(dd, resource);
+
+	return ret;
+}
+
+/*
  * This function caches the QSFP memory range in 128 byte chunks.
  * As an example, the next byte after address 255 is byte 128 from
  * upper page 01H (if existing) rather than byte 0 from lower page 00H.
+ * Access page n, offset m of QSFP memory as defined by SFF 8636
+ * in the cache by reading byte ((128 * n) + m)
+ * The calls to qsfp_{read,write} in this function correctly handle the
+ * address map difference between this mapping and the mapping implemented
+ * by those functions
  */
 int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
 {
@@ -304,79 +364,84 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
 	u8 *cache = &cp->cache[0];
 
 	/* ensure sane contents on invalid reads, for cable swaps */
-	memset(cache, 0, (QSFP_MAX_NUM_PAGES*128));
-	dd_dev_info(ppd->dd, "%s: called\n", __func__);
+	memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128));
+	spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
+	ppd->qsfp_info.cache_valid = 0;
+	spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
+
 	if (!qsfp_mod_present(ppd)) {
 		ret = -ENODEV;
-		goto bail;
+		goto bail_no_release;
 	}
 
-	ret = qsfp_read(ppd, target, 0, cache, 256);
-	if (ret != 256) {
+	ret = acquire_chip_resource(ppd->dd, qsfp_resource(ppd->dd), QSFP_WAIT);
+	if (ret)
+		goto bail_no_release;
+
+	ret = qsfp_read(ppd, target, 0, cache, QSFP_PAGESIZE);
+	if (ret != QSFP_PAGESIZE) {
 		dd_dev_info(ppd->dd,
-			"%s: Read of pages 00H failed, expected 256, got %d\n",
-			__func__, ret);
+			    "%s: Page 0 read failed, expected %d, got %d\n",
+			    __func__, QSFP_PAGESIZE, ret);
 		goto bail;
 	}
 
-	if (cache[0] != 0x0C && cache[0] != 0x0D)
-		goto bail;
-
 	/* Is paging enabled? */
 	if (!(cache[2] & 4)) {
-
 		/* Paging enabled, page 03 required */
 		if ((cache[195] & 0xC0) == 0xC0) {
 			/* all */
 			ret = qsfp_read(ppd, target, 384, cache + 256, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 640, cache + 384, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		} else if ((cache[195] & 0x80) == 0x80) {
 			/* only page 2 and 3 */
 			ret = qsfp_read(ppd, target, 640, cache + 384, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		} else if ((cache[195] & 0x40) == 0x40) {
 			/* only page 1 and 3 */
 			ret = qsfp_read(ppd, target, 384, cache + 256, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		} else {
 			/* only page 3 */
 			ret = qsfp_read(ppd, target, 896, cache + 512, 128);
 			if (ret <= 0 || ret != 128) {
-				dd_dev_info(ppd->dd, "%s: failed\n", __func__);
+				dd_dev_info(ppd->dd, "%s failed\n", __func__);
 				goto bail;
 			}
 		}
 	}
 
+	release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+
 	spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
 	ppd->qsfp_info.cache_valid = 1;
 	ppd->qsfp_info.cache_refresh_required = 0;
@@ -385,7 +450,9 @@ int refresh_qsfp_cache(struct hfi1_pportdata *ppd, struct qsfp_data *cp)
 	return 0;
 
 bail:
-	memset(cache, 0, (QSFP_MAX_NUM_PAGES*128));
+	release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+bail_no_release:
+	memset(cache, 0, (QSFP_MAX_NUM_PAGES * 128));
 	return ret;
 }
 
@@ -434,7 +501,7 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len,
 
 	if (port_num > dd->num_pports || port_num < 1) {
 		dd_dev_info(dd, "%s: Invalid port number %d\n",
-				__func__, port_num);
+			    __func__, port_num);
 		ret = -EINVAL;
 		goto set_zeroes;
 	}
@@ -485,7 +552,6 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
 	lenstr[1] = '\0';
 
 	if (ppd->qsfp_info.cache_valid) {
-
 		if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
 			sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
 
@@ -529,7 +595,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
 
 			memcpy(bin_buff, &cache[bidx], QSFP_DUMP_CHUNK);
 			for (iidx = 0; iidx < QSFP_DUMP_CHUNK; ++iidx) {
-				sofar += scnprintf(buf + sofar, len-sofar,
+				sofar += scnprintf(buf + sofar, len - sofar,
 					" %02X", bin_buff[iidx]);
 			}
 			sofar += scnprintf(buf + sofar, len - sofar, "\n");
diff --git a/drivers/staging/rdma/hfi1/qsfp.h b/drivers/staging/rdma/hfi1/qsfp.h
index d30c2a6baa0b..831fe4cf1345 100644
--- a/drivers/staging/rdma/hfi1/qsfp.h
+++ b/drivers/staging/rdma/hfi1/qsfp.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -59,23 +56,28 @@
  * Below are masks for QSFP pins.  Pins are the same for HFI0 and HFI1.
  * _N means asserted low
  */
-#define QSFP_HFI0_I2CCLK    (1 << 0)
-#define QSFP_HFI0_I2CDAT    (1 << 1)
-#define QSFP_HFI0_RESET_N   (1 << 2)
-#define QSFP_HFI0_INT_N	    (1 << 3)
-#define QSFP_HFI0_MODPRST_N (1 << 4)
+#define QSFP_HFI0_I2CCLK    BIT(0)
+#define QSFP_HFI0_I2CDAT    BIT(1)
+#define QSFP_HFI0_RESET_N   BIT(2)
+#define QSFP_HFI0_INT_N	    BIT(3)
+#define QSFP_HFI0_MODPRST_N BIT(4)
 
 /* QSFP is paged at 256 bytes */
 #define QSFP_PAGESIZE 256
+/* Reads/writes cannot cross 128 byte boundaries */
+#define QSFP_RW_BOUNDARY 128
+
+/* number of bytes in i2c offset for QSFP devices */
+#define __QSFP_OFFSET_SIZE 1                           /* num address bytes */
+#define QSFP_OFFSET_SIZE (__QSFP_OFFSET_SIZE << 8)     /* shifted value */
 
 /* Defined fields that Intel requires of qualified cables */
 /* Byte 0 is Identifier, not checked */
 /* Byte 1 is reserved "status MSB" */
-/* Byte 2 is "status LSB" We only care that D2 "Flat Mem" is set. */
-/*
- * Rest of first 128 not used, although 127 is reserved for page select
- * if module is not "Flat memory".
- */
+#define QSFP_TX_CTRL_BYTE_OFFS 86
+#define QSFP_PWR_CTRL_BYTE_OFFS 93
+#define QSFP_CDR_CTRL_BYTE_OFFS 98
+
 #define QSFP_PAGE_SELECT_BYTE_OFFS 127
 /* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */
 #define QSFP_MOD_ID_OFFS 128
@@ -87,7 +89,8 @@
 /* Byte 130 is Connector type. Not Intel req'd */
 /* Bytes 131..138 are Transceiver types, bit maps for various tech, none IB */
 /* Byte 139 is encoding. code 0x01 is 8b10b. Not Intel req'd */
-/* byte 140 is nominal bit-rate, in units of 100Mbits/sec Not Intel req'd */
+/* byte 140 is nominal bit-rate, in units of 100Mbits/sec */
+#define QSFP_NOM_BIT_RATE_100_OFFS 140
 /* Byte 141 is Extended Rate Select. Not Intel req'd */
 /* Bytes 142..145 are lengths for various fiber types. Not Intel req'd */
 /* Byte 146 is length for Copper. Units of 1 meter */
@@ -135,11 +138,18 @@ extern const char *const hfi1_qsfp_devtech[16];
  */
 #define QSFP_ATTEN_OFFS 186
 #define QSFP_ATTEN_LEN 2
-/* Bytes 188,189 are Wavelength tolerance, not Intel req'd */
+/*
+ * Bytes 188,189 are Wavelength tolerance, if optical
+ * If copper, they are attenuation in dB:
+ * Byte 188 is at 12.5 Gb/s, Byte 189 at 25 Gb/s
+ */
+#define QSFP_CU_ATTEN_7G_OFFS 188
+#define QSFP_CU_ATTEN_12G_OFFS 189
 /* Byte 190 is Max Case Temp. Not Intel req'd */
 /* Byte 191 is LSB of sum of bytes 128..190. Not Intel req'd */
 #define QSFP_CC_OFFS 191
-/* Bytes 192..195 are Options implemented in qsfp. Not Intel req'd */
+#define QSFP_EQ_INFO_OFFS 193
+#define QSFP_CDR_INFO_OFFS 194
 /* Bytes 196..211 are Serial Number, String */
 #define QSFP_SN_OFFS 196
 #define QSFP_SN_LEN 16
@@ -150,6 +160,8 @@ extern const char *const hfi1_qsfp_devtech[16];
 #define QSFP_LOT_OFFS 218
 #define QSFP_LOT_LEN 2
 /* Bytes 220, 221 indicate monitoring options, Not Intel req'd */
+/* Byte 222 indicates nominal bitrate in units of 250Mbits/sec */
+#define QSFP_NOM_BIT_RATE_250_OFFS 222
 /* Byte 223 is LSB of sum of bytes 192..222 */
 #define QSFP_CC_EXT_OFFS 223
 
@@ -191,6 +203,7 @@ extern const char *const hfi1_qsfp_devtech[16];
  */
 
 #define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
+#define QSFP_HIGH_PWR(pbyte) (((pbyte) & 3) | 4)
 #define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
 #define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
 
@@ -198,10 +211,12 @@ struct qsfp_data {
 	/* Helps to find our way */
 	struct hfi1_pportdata *ppd;
 	struct work_struct qsfp_work;
-	u8 cache[QSFP_MAX_NUM_PAGES*128];
+	u8 cache[QSFP_MAX_NUM_PAGES * 128];
+	/* protect qsfp data */
 	spinlock_t qsfp_lock;
 	u8 check_interrupt_flags;
-	u8 qsfp_interrupt_functional;
+	u8 reset_needed;
+	u8 limiting_active;
 	u8 cache_valid;
 	u8 cache_refresh_required;
 };
@@ -220,3 +235,7 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	       int len);
 int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
 	      int len);
+int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		   int len);
+int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
+		  int len);
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c
index 6f4a155f7931..0d7e1017f3cb 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/staging/rdma/hfi1/rc.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -49,18 +46,151 @@
  */
 
 #include <linux/io.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
 
 #include "hfi.h"
 #include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 #include "trace.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_RC_##x
 
-static void rc_timeout(unsigned long arg);
+/**
+ * hfi1_add_retry_timer - add/start a retry timer
+ * @qp - the QP
+ *
+ * add a retry timer on the QP
+ */
+static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
+{
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+	qp->s_flags |= RVT_S_TIMER;
+	/* 4.096 usec. * (1 << qp->timeout) */
+	qp->s_timer.expires = jiffies + qp->timeout_jiffies +
+			      rdi->busy_jiffies;
+	add_timer(&qp->s_timer);
+}
+
+/**
+ * hfi1_add_rnr_timer - add/start an rnr timer
+ * @qp - the QP
+ * @to - timeout in usecs
+ *
+ * add an rnr timer on the QP
+ */
+void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	qp->s_flags |= RVT_S_WAIT_RNR;
+	qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
+	add_timer(&priv->s_rnr_timer);
+}
+
+/**
+ * hfi1_mod_retry_timer - mod a retry timer
+ * @qp - the QP
+ *
+ * Modify a potentially already running retry
+ * timer
+ */
+static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
+{
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+	qp->s_flags |= RVT_S_TIMER;
+	/* 4.096 usec. * (1 << qp->timeout) */
+	mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
+		  rdi->busy_jiffies);
+}
+
+/**
+ * hfi1_stop_retry_timer - stop a retry timer
+ * @qp - the QP
+ *
+ * stop a retry timer and return if the timer
+ * had been pending.
+ */
+static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
+{
+	int rval = 0;
+
+	/* Remove QP from retry */
+	if (qp->s_flags & RVT_S_TIMER) {
+		qp->s_flags &= ~RVT_S_TIMER;
+		rval = del_timer(&qp->s_timer);
+	}
+	return rval;
+}
+
+/**
+ * hfi1_stop_rc_timers - stop all timers
+ * @qp - the QP
+ *
+ * stop any pending timers
+ */
+void hfi1_stop_rc_timers(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	/* Remove QP from all timers */
+	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
+		del_timer(&qp->s_timer);
+		del_timer(&priv->s_rnr_timer);
+	}
+}
+
+/**
+ * hfi1_stop_rnr_timer - stop an rnr timer
+ * @qp - the QP
+ *
+ * stop an rnr timer and return if the timer
+ * had been pending.
+ */
+static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
+{
+	int rval = 0;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	/* Remove QP from rnr timer */
+	if (qp->s_flags & RVT_S_WAIT_RNR) {
+		qp->s_flags &= ~RVT_S_WAIT_RNR;
+		rval = del_timer(&priv->s_rnr_timer);
+	}
+	return rval;
+}
+
+/**
+ * hfi1_del_timers_sync - wait for any timeout routines to exit
+ * @qp - the QP
+ */
+void hfi1_del_timers_sync(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	del_timer_sync(&qp->s_timer);
+	del_timer_sync(&priv->s_rnr_timer);
+}
 
-static u32 restart_sge(struct hfi1_sge_state *ss, struct hfi1_swqe *wqe,
+/* only opcode mask for adaptive pio */
+const u32 rc_only_opcode =
+	BIT(OP(SEND_ONLY) & 0x1f) |
+	BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) |
+	BIT(OP(RDMA_READ_REQUEST & 0x1f)) |
+	BIT(OP(ACKNOWLEDGE & 0x1f)) |
+	BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) |
+	BIT(OP(COMPARE_SWAP & 0x1f)) |
+	BIT(OP(FETCH_ADD & 0x1f));
+
+static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
 		       u32 psn, u32 pmtu)
 {
 	u32 len;
@@ -74,38 +204,32 @@ static u32 restart_sge(struct hfi1_sge_state *ss, struct hfi1_swqe *wqe,
 	return wqe->length - len;
 }
 
-static void start_timer(struct hfi1_qp *qp)
-{
-	qp->s_flags |= HFI1_S_TIMER;
-	qp->s_timer.function = rc_timeout;
-	/* 4.096 usec. * (1 << qp->timeout) */
-	qp->s_timer.expires = jiffies + qp->timeout_jiffies;
-	add_timer(&qp->s_timer);
-}
-
 /**
  * make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
  * @dev: the device for this QP
  * @qp: a pointer to the QP
  * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
+ * @ps: the xmit packet state
  *
  * Return 1 if constructed; otherwise, return 0.
  * Note that we are in the responder's side of the QP context.
  * Note the QP s_lock must be held.
  */
-static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
-		       struct hfi1_other_headers *ohdr, u32 pmtu)
+static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
+		       struct hfi1_other_headers *ohdr,
+		       struct hfi1_pkt_state *ps)
 {
-	struct hfi1_ack_entry *e;
+	struct rvt_ack_entry *e;
 	u32 hwords;
 	u32 len;
 	u32 bth0;
 	u32 bth2;
 	int middle = 0;
+	u32 pmtu = qp->pmtu;
+	struct hfi1_qp_priv *priv = qp->priv;
 
 	/* Don't send an ACK if we aren't supposed to. */
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto bail;
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -116,7 +240,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
 	case OP(RDMA_READ_RESPONSE_ONLY):
 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 		if (e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		/* FALLTHROUGH */
@@ -133,7 +257,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
 	case OP(ACKNOWLEDGE):
 		/* Check for no next entry in the queue. */
 		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
-			if (qp->s_flags & HFI1_S_ACK_PENDING)
+			if (qp->s_flags & RVT_S_ACK_PENDING)
 				goto normal;
 			goto bail;
 		}
@@ -152,9 +276,9 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
 				goto bail;
 			}
 			/* Copy SGE state in case we need to resend */
-			qp->s_rdma_mr = e->rdma_sge.mr;
-			if (qp->s_rdma_mr)
-				hfi1_get_mr(qp->s_rdma_mr);
+			ps->s_txreq->mr = e->rdma_sge.mr;
+			if (ps->s_txreq->mr)
+				rvt_get_mr(ps->s_txreq->mr);
 			qp->s_ack_rdma_sge.sge = e->rdma_sge;
 			qp->s_ack_rdma_sge.num_sge = 1;
 			qp->s_cur_sge = &qp->s_ack_rdma_sge;
@@ -191,9 +315,9 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct hfi1_qp *qp,
 		/* FALLTHROUGH */
 	case OP(RDMA_READ_RESPONSE_MIDDLE):
 		qp->s_cur_sge = &qp->s_ack_rdma_sge;
-		qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
-		if (qp->s_rdma_mr)
-			hfi1_get_mr(qp->s_rdma_mr);
+		ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
+		if (ps->s_txreq->mr)
+			rvt_get_mr(ps->s_txreq->mr);
 		len = qp->s_ack_rdma_sge.sge.sge_length;
 		if (len > pmtu) {
 			len = pmtu;
@@ -218,7 +342,7 @@ normal:
 		 * (see above).
 		 */
 		qp->s_ack_state = OP(SEND_ONLY);
-		qp->s_flags &= ~HFI1_S_ACK_PENDING;
+		qp->s_flags &= ~RVT_S_ACK_PENDING;
 		qp->s_cur_sge = NULL;
 		if (qp->s_nak_state)
 			ohdr->u.aeth =
@@ -234,20 +358,23 @@ normal:
 	}
 	qp->s_rdma_ack_cnt++;
 	qp->s_hdrwords = hwords;
+	ps->s_txreq->sde = priv->s_sde;
 	qp->s_cur_size = len;
-	hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle);
+	hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
 	return 1;
 
 bail:
 	qp->s_ack_state = OP(ACKNOWLEDGE);
 	/*
 	 * Ensure s_rdma_ack_cnt changes are committed prior to resetting
-	 * HFI1_S_RESP_PENDING
+	 * RVT_S_RESP_PENDING
 	 */
 	smp_wmb();
-	qp->s_flags &= ~(HFI1_S_RESP_PENDING
-				| HFI1_S_ACK_PENDING
-				| HFI1_S_AHG_VALID);
+	qp->s_flags &= ~(RVT_S_RESP_PENDING
+				| RVT_S_ACK_PENDING
+				| RVT_S_AHG_VALID);
 	return 0;
 }
 
@@ -255,14 +382,17 @@ bail:
  * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
  * @qp: a pointer to the QP
  *
+ * Assumes s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int hfi1_make_rc_req(struct hfi1_qp *qp)
+int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
 	struct hfi1_other_headers *ohdr;
-	struct hfi1_sge_state *ss;
-	struct hfi1_swqe *wqe;
+	struct rvt_sge_state *ss;
+	struct rvt_swqe *wqe;
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	u32 hwords = 5;
 	u32 len;
@@ -270,51 +400,48 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 	u32 bth2;
 	u32 pmtu = qp->pmtu;
 	char newreq;
-	unsigned long flags;
-	int ret = 0;
 	int middle = 0;
 	int delta;
 
-	ohdr = &qp->s_hdr->ibh.u.oth;
-	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->ibh.u.l.oth;
+	ps->s_txreq = get_txreq(ps->dev, qp);
+	if (IS_ERR(ps->s_txreq))
+		goto bail_no_tx;
 
-	/*
-	 * The lock is needed to synchronize between the sending tasklet,
-	 * the receive interrupt handler, and timeout re-sends.
-	 */
-	spin_lock_irqsave(&qp->s_lock, flags);
+	ohdr = &ps->s_txreq->phdr.hdr.u.oth;
+	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+		ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
 
 	/* Sending responses has higher priority over sending requests. */
-	if ((qp->s_flags & HFI1_S_RESP_PENDING) &&
-	    make_rc_ack(dev, qp, ohdr, pmtu))
-		goto done;
+	if ((qp->s_flags & RVT_S_RESP_PENDING) &&
+	    make_rc_ack(dev, qp, ohdr, ps))
+		return 1;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_iowait.sdma_busy)) {
-			qp->s_flags |= HFI1_S_WAIT_DMA;
+		if (iowait_sdma_pending(&priv->s_iowait)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
 		clear_ahg(qp);
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
 			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
 		/* will get called again */
-		goto done;
+		goto done_free_tx;
 	}
 
-	if (qp->s_flags & (HFI1_S_WAIT_RNR | HFI1_S_WAIT_ACK))
+	if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
 		goto bail;
 
 	if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
 		if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
-			qp->s_flags |= HFI1_S_WAIT_PSN;
+			qp->s_flags |= RVT_S_WAIT_PSN;
 			goto bail;
 		}
 		qp->s_sending_psn = qp->s_psn;
@@ -322,10 +449,10 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 	}
 
 	/* Send a request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	switch (qp->s_state) {
 	default:
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 		/*
 		 * Resend an old request or start a new one.
@@ -347,11 +474,11 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			 */
 			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
 			    qp->s_num_rd_atomic) {
-				qp->s_flags |= HFI1_S_WAIT_FENCE;
+				qp->s_flags |= RVT_S_WAIT_FENCE;
 				goto bail;
 			}
-			wqe->psn = qp->s_next_psn;
 			newreq = 1;
+			qp->s_psn = wqe->psn;
 		}
 		/*
 		 * Note that we have to be careful not to modify the
@@ -365,21 +492,19 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 		case IB_WR_SEND:
 		case IB_WR_SEND_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= HFI1_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(SEND_FIRST);
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_SEND)
+			if (wqe->wr.opcode == IB_WR_SEND) {
 				qp->s_state = OP(SEND_ONLY);
-			else {
+			} else {
 				qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the BTH */
 				ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -393,14 +518,14 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			break;
 
 		case IB_WR_RDMA_WRITE:
-			if (newreq && !(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+			if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 				qp->s_lsn++;
 			/* FALLTHROUGH */
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= HFI1_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
 			ohdr->u.rc.reth.vaddr =
@@ -409,16 +534,14 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 				cpu_to_be32(wqe->rdma_wr.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
 			hwords += sizeof(struct ib_reth) / sizeof(u32);
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(RDMA_WRITE_FIRST);
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+			if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 				qp->s_state = OP(RDMA_WRITE_ONLY);
-			else {
+			} else {
 				qp->s_state =
 					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after RETH */
@@ -440,19 +563,12 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= HFI1_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 				}
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
-				/*
-				 * Adjust s_next_psn to count the
-				 * expected number of responses.
-				 */
-				if (len > pmtu)
-					qp->s_next_psn += (len - 1) / pmtu;
-				wqe->lpsn = qp->s_next_psn++;
 			}
 			ohdr->u.rc.reth.vaddr =
 				cpu_to_be64(wqe->rdma_wr.remote_addr);
@@ -477,13 +593,12 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= HFI1_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 				}
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & HFI1_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
-				wqe->lpsn = wqe->psn;
 			}
 			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 				qp->s_state = OP(COMPARE_SWAP);
@@ -526,11 +641,8 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 		}
 		if (wqe->wr.opcode == IB_WR_RDMA_READ)
 			qp->s_psn = wqe->lpsn + 1;
-		else {
+		else
 			qp->s_psn++;
-			if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
-				qp->s_next_psn = qp->s_psn;
-		}
 		break;
 
 	case OP(RDMA_READ_RESPONSE_FIRST):
@@ -550,8 +662,6 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 		/* FALLTHROUGH */
 	case OP(SEND_MIDDLE):
 		bth2 = mask_psn(qp->s_psn++);
-		if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -559,9 +669,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_SEND)
+		if (wqe->wr.opcode == IB_WR_SEND) {
 			qp->s_state = OP(SEND_LAST);
-		else {
+		} else {
 			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -592,8 +702,6 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 		/* FALLTHROUGH */
 	case OP(RDMA_WRITE_MIDDLE):
 		bth2 = mask_psn(qp->s_psn++);
-		if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
@@ -601,9 +709,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+		if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 			qp->s_state = OP(RDMA_WRITE_LAST);
-		else {
+		} else {
 			qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -648,13 +756,14 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 	delta = delta_psn(bth2, wqe->psn);
 	if (delta && delta % HFI1_PSN_CREDIT == 0)
 		bth2 |= IB_BTH_REQ_ACK;
-	if (qp->s_flags & HFI1_S_SEND_ONE) {
-		qp->s_flags &= ~HFI1_S_SEND_ONE;
-		qp->s_flags |= HFI1_S_WAIT_ACK;
+	if (qp->s_flags & RVT_S_SEND_ONE) {
+		qp->s_flags &= ~RVT_S_SEND_ONE;
+		qp->s_flags |= RVT_S_WAIT_ACK;
 		bth2 |= IB_BTH_REQ_ACK;
 	}
 	qp->s_len -= len;
 	qp->s_hdrwords = hwords;
+	ps->s_txreq->sde = priv->s_sde;
 	qp->s_cur_sge = ss;
 	qp->s_cur_size = len;
 	hfi1_make_ruc_header(
@@ -662,16 +771,25 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
 		ohdr,
 		bth0 | (qp->s_state << 24),
 		bth2,
-		middle);
-done:
-	ret = 1;
-	goto unlock;
+		middle,
+		ps);
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+	return 1;
+
+done_free_tx:
+	hfi1_put_txreq(ps->s_txreq);
+	ps->s_txreq = NULL;
+	return 1;
 
 bail:
-	qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
+	hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+	ps->s_txreq = NULL;
+	qp->s_flags &= ~RVT_S_BUSY;
+	qp->s_hdrwords = 0;
+	return 0;
 }
 
 /**
@@ -682,7 +800,7 @@ unlock:
  * Note that RDMA reads and atomics are handled in the
  * send side QP state and tasklet.
  */
-void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
+void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
 		      int is_fecn)
 {
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -700,7 +818,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
 	unsigned long flags;
 
 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-	if (qp->s_flags & HFI1_S_RESP_PENDING)
+	if (qp->s_flags & RVT_S_RESP_PENDING)
 		goto queue_ack;
 
 	/* Ensure s_rdma_ack_cnt changes are committed */
@@ -763,7 +881,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
 		goto queue_ack;
 	}
 
-	trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
+	trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
 
 	/* write the pbc and data */
 	ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords);
@@ -771,13 +889,13 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct hfi1_qp *qp,
 	return;
 
 queue_ack:
-	this_cpu_inc(*ibp->rc_qacks);
+	this_cpu_inc(*ibp->rvp.rc_qacks);
 	spin_lock_irqsave(&qp->s_lock, flags);
-	qp->s_flags |= HFI1_S_ACK_PENDING | HFI1_S_RESP_PENDING;
+	qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
 	qp->s_nak_state = qp->r_nak_state;
 	qp->s_ack_psn = qp->r_ack_psn;
 	if (is_fecn)
-		qp->s_flags |= HFI1_S_ECN;
+		qp->s_flags |= RVT_S_ECN;
 
 	/* Schedule the send tasklet. */
 	hfi1_schedule_send(qp);
@@ -793,10 +911,10 @@ queue_ack:
  * for the given QP.
  * Called at interrupt level with the QP s_lock held.
  */
-static void reset_psn(struct hfi1_qp *qp, u32 psn)
+static void reset_psn(struct rvt_qp *qp, u32 psn)
 {
 	u32 n = qp->s_acked;
-	struct hfi1_swqe *wqe = get_swqe_ptr(qp, n);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
 	u32 opcode;
 
 	qp->s_cur = n;
@@ -819,7 +937,7 @@ static void reset_psn(struct hfi1_qp *qp, u32 psn)
 			n = 0;
 		if (n == qp->s_tail)
 			break;
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		diff = cmp_psn(psn, wqe->psn);
 		if (diff < 0)
 			break;
@@ -865,23 +983,23 @@ static void reset_psn(struct hfi1_qp *qp, u32 psn)
 done:
 	qp->s_psn = psn;
 	/*
-	 * Set HFI1_S_WAIT_PSN as rc_complete() may start the timer
+	 * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
 	 * asynchronously before the send tasklet can get scheduled.
 	 * Doing it in hfi1_make_rc_req() is too late.
 	 */
 	if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
 	    (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
-		qp->s_flags |= HFI1_S_WAIT_PSN;
-	qp->s_flags &= ~HFI1_S_AHG_VALID;
+		qp->s_flags |= RVT_S_WAIT_PSN;
+	qp->s_flags &= ~RVT_S_AHG_VALID;
 }
 
 /*
  * Back up requester to resend the last un-ACKed request.
  * The QP r_lock and s_lock should be held and interrupts disabled.
  */
-static void restart_rc(struct hfi1_qp *qp, u32 psn, int wait)
+static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 {
-	struct hfi1_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	struct hfi1_ibport *ibp;
 
 	if (qp->s_retry == 0) {
@@ -890,42 +1008,44 @@ static void restart_rc(struct hfi1_qp *qp, u32 psn, int wait)
 			qp->s_retry = qp->s_retry_cnt;
 		} else if (qp->s_last == qp->s_acked) {
 			hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-			hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			return;
-		} else /* need to handle delayed completion */
+		} else { /* need to handle delayed completion */
 			return;
-	} else
+		}
+	} else {
 		qp->s_retry--;
+	}
 
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
-		ibp->n_rc_resends++;
+		ibp->rvp.n_rc_resends++;
 	else
-		ibp->n_rc_resends += delta_psn(qp->s_psn, psn);
+		ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
 
-	qp->s_flags &= ~(HFI1_S_WAIT_FENCE | HFI1_S_WAIT_RDMAR |
-			 HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_PSN |
-			 HFI1_S_WAIT_ACK);
+	qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
+			 RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
+			 RVT_S_WAIT_ACK);
 	if (wait)
-		qp->s_flags |= HFI1_S_SEND_ONE;
+		qp->s_flags |= RVT_S_SEND_ONE;
 	reset_psn(qp, psn);
 }
 
 /*
  * This is called from s_timer for missing responses.
  */
-static void rc_timeout(unsigned long arg)
+void hfi1_rc_timeout(unsigned long arg)
 {
-	struct hfi1_qp *qp = (struct hfi1_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	struct hfi1_ibport *ibp;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->r_lock, flags);
 	spin_lock(&qp->s_lock);
-	if (qp->s_flags & HFI1_S_TIMER) {
+	if (qp->s_flags & RVT_S_TIMER) {
 		ibp = to_iport(qp->ibqp.device, qp->port_num);
-		ibp->n_rc_timeouts++;
-		qp->s_flags &= ~HFI1_S_TIMER;
+		ibp->rvp.n_rc_timeouts++;
+		qp->s_flags &= ~RVT_S_TIMER;
 		del_timer(&qp->s_timer);
 		trace_hfi1_rc_timeout(qp, qp->s_last_psn + 1);
 		restart_rc(qp, qp->s_last_psn + 1, 1);
@@ -940,15 +1060,12 @@ static void rc_timeout(unsigned long arg)
  */
 void hfi1_rc_rnr_retry(unsigned long arg)
 {
-	struct hfi1_qp *qp = (struct hfi1_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (qp->s_flags & HFI1_S_WAIT_RNR) {
-		qp->s_flags &= ~HFI1_S_WAIT_RNR;
-		del_timer(&qp->s_timer);
-		hfi1_schedule_send(qp);
-	}
+	hfi1_stop_rnr_timer(qp);
+	hfi1_schedule_send(qp);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
@@ -956,14 +1073,14 @@ void hfi1_rc_rnr_retry(unsigned long arg)
  * Set qp->s_sending_psn to the next PSN after the given one.
  * This would be psn+1 except when RDMA reads are present.
  */
-static void reset_sending_psn(struct hfi1_qp *qp, u32 psn)
+static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
 {
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	u32 n = qp->s_last;
 
 	/* Find the work request corresponding to the given PSN. */
 	for (;;) {
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		if (cmp_psn(psn, wqe->lpsn) <= 0) {
 			if (wqe->wr.opcode == IB_WR_RDMA_READ)
 				qp->s_sending_psn = wqe->lpsn + 1;
@@ -981,16 +1098,16 @@ static void reset_sending_psn(struct hfi1_qp *qp, u32 psn)
 /*
  * This should be called with the QP s_lock held and interrupts disabled.
  */
-void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr)
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr)
 {
 	struct hfi1_other_headers *ohdr;
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	struct ib_wc wc;
 	unsigned i;
 	u32 opcode;
 	u32 psn;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 
 	/* Find out where the BTH is */
@@ -1016,22 +1133,30 @@ void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr)
 	 */
 	if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
 	    !(qp->s_flags &
-		(HFI1_S_TIMER | HFI1_S_WAIT_RNR | HFI1_S_WAIT_PSN)) &&
-		(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK))
-		start_timer(qp);
+		(RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
+		(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
+		hfi1_add_retry_timer(qp);
 
 	while (qp->s_last != qp->s_acked) {
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		u32 s_last;
+
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
 		    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
 			break;
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct hfi1_sge *sge = &wqe->sg_list[i];
+			struct rvt_sge *sge = &wqe->sg_list[i];
 
-			hfi1_put_mr(sge->mr);
+			rvt_put_mr(sge->mr);
 		}
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
@@ -1039,26 +1164,24 @@ void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr)
 			wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
-			hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	}
 	/*
 	 * If we were waiting for sends to complete before re-sending,
 	 * and they are now complete, restart sending.
 	 */
 	trace_hfi1_rc_sendcomplete(qp, psn);
-	if (qp->s_flags & HFI1_S_WAIT_PSN &&
+	if (qp->s_flags & RVT_S_WAIT_PSN &&
 	    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
-		qp->s_flags &= ~HFI1_S_WAIT_PSN;
+		qp->s_flags &= ~RVT_S_WAIT_PSN;
 		qp->s_sending_psn = qp->s_psn;
 		qp->s_sending_hpsn = qp->s_psn - 1;
 		hfi1_schedule_send(qp);
 	}
 }
 
-static inline void update_last_psn(struct hfi1_qp *qp, u32 psn)
+static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
 {
 	qp->s_last_psn = psn;
 }
@@ -1068,9 +1191,9 @@ static inline void update_last_psn(struct hfi1_qp *qp, u32 psn)
  * This is similar to hfi1_send_complete but has to check to be sure
  * that the SGEs are not being referenced if the SWQE is being resent.
  */
-static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
-					  struct hfi1_swqe *wqe,
-					  struct hfi1_ibport *ibp)
+static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+					 struct rvt_swqe *wqe,
+					 struct hfi1_ibport *ibp)
 {
 	struct ib_wc wc;
 	unsigned i;
@@ -1082,13 +1205,21 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
 	 */
 	if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
 	    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+		u32 s_last;
+
 		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct hfi1_sge *sge = &wqe->sg_list[i];
+			struct rvt_sge *sge = &wqe->sg_list[i];
 
-			hfi1_put_mr(sge->mr);
+			rvt_put_mr(sge->mr);
 		}
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
@@ -1096,14 +1227,12 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
 			wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
-			hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	} else {
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
-		this_cpu_inc(*ibp->rc_delayed_comp);
+		this_cpu_inc(*ibp->rvp.rc_delayed_comp);
 		/*
 		 * If send progress not running attempt to progress
 		 * SDMA queue.
@@ -1131,7 +1260,7 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
 		if (++qp->s_cur >= qp->s_size)
 			qp->s_cur = 0;
 		qp->s_acked = qp->s_cur;
-		wqe = get_swqe_ptr(qp, qp->s_cur);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 		if (qp->s_acked != qp->s_tail) {
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = wqe->psn;
@@ -1141,7 +1270,7 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
 			qp->s_acked = 0;
 		if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
 			qp->s_draining = 0;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	}
 	return wqe;
 }
@@ -1157,21 +1286,16 @@ static struct hfi1_swqe *do_rc_completion(struct hfi1_qp *qp,
  * May be called at interrupt level, with the QP s_lock held.
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  */
-static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
+static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 		     u64 val, struct hfi1_ctxtdata *rcd)
 {
 	struct hfi1_ibport *ibp;
 	enum ib_wc_status status;
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	int ret = 0;
 	u32 ack_psn;
 	int diff;
-
-	/* Remove QP from retry timer */
-	if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
-	}
+	unsigned long to;
 
 	/*
 	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
@@ -1182,7 +1306,7 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 	ack_psn = psn;
 	if (aeth >> 29)
 		ack_psn--;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 
 	/*
@@ -1200,7 +1324,7 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 		    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
 		    diff == 0) {
 			ret = 1;
-			goto bail;
+			goto bail_stop;
 		}
 		/*
 		 * If this request is a RDMA read or atomic, and the ACK is
@@ -1217,11 +1341,11 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
 			/* Retry this request. */
-			if (!(qp->r_flags & HFI1_R_RDMAR_SEQ)) {
-				qp->r_flags |= HFI1_R_RDMAR_SEQ;
+			if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
+				qp->r_flags |= RVT_R_RDMAR_SEQ;
 				restart_rc(qp, qp->s_last_psn + 1, 0);
 				if (list_empty(&qp->rspwait)) {
-					qp->r_flags |= HFI1_R_RSP_SEND;
+					qp->r_flags |= RVT_R_RSP_SEND;
 					atomic_inc(&qp->refcount);
 					list_add_tail(&qp->rspwait,
 						      &rcd->qp_wait_list);
@@ -1231,7 +1355,7 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 			 * No need to process the ACK/NAK since we are
 			 * restarting an earlier request.
 			 */
-			goto bail;
+			goto bail_stop;
 		}
 		if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
@@ -1244,14 +1368,14 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
 			qp->s_num_rd_atomic--;
 			/* Restart sending task if fence is complete */
-			if ((qp->s_flags & HFI1_S_WAIT_FENCE) &&
+			if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
 			    !qp->s_num_rd_atomic) {
-				qp->s_flags &= ~(HFI1_S_WAIT_FENCE |
-						 HFI1_S_WAIT_ACK);
+				qp->s_flags &= ~(RVT_S_WAIT_FENCE |
+						 RVT_S_WAIT_ACK);
 				hfi1_schedule_send(qp);
-			} else if (qp->s_flags & HFI1_S_WAIT_RDMAR) {
-				qp->s_flags &= ~(HFI1_S_WAIT_RDMAR |
-						 HFI1_S_WAIT_ACK);
+			} else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
+				qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
+						 RVT_S_WAIT_ACK);
 				hfi1_schedule_send(qp);
 			}
 		}
@@ -1262,40 +1386,43 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 
 	switch (aeth >> 29) {
 	case 0:         /* ACK */
-		this_cpu_inc(*ibp->rc_acks);
+		this_cpu_inc(*ibp->rvp.rc_acks);
 		if (qp->s_acked != qp->s_tail) {
 			/*
 			 * We are expecting more ACKs so
-			 * reset the re-transmit timer.
+			 * mod the retry timer.
 			 */
-			start_timer(qp);
+			hfi1_mod_retry_timer(qp);
 			/*
 			 * We can stop re-sending the earlier packets and
 			 * continue with the next packet the receiver wants.
 			 */
 			if (cmp_psn(qp->s_psn, psn) <= 0)
 				reset_psn(qp, psn + 1);
-		} else if (cmp_psn(qp->s_psn, psn) <= 0) {
-			qp->s_state = OP(SEND_LAST);
-			qp->s_psn = psn + 1;
+		} else {
+			/* No more acks - kill all timers */
+			hfi1_stop_rc_timers(qp);
+			if (cmp_psn(qp->s_psn, psn) <= 0) {
+				qp->s_state = OP(SEND_LAST);
+				qp->s_psn = psn + 1;
+			}
 		}
-		if (qp->s_flags & HFI1_S_WAIT_ACK) {
-			qp->s_flags &= ~HFI1_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			hfi1_schedule_send(qp);
 		}
 		hfi1_get_credit(qp, aeth);
 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
 		qp->s_retry = qp->s_retry_cnt;
 		update_last_psn(qp, psn);
-		ret = 1;
-		goto bail;
+		return 1;
 
 	case 1:         /* RNR NAK */
-		ibp->n_rnr_naks++;
+		ibp->rvp.n_rnr_naks++;
 		if (qp->s_acked == qp->s_tail)
-			goto bail;
-		if (qp->s_flags & HFI1_S_WAIT_RNR)
-			goto bail;
+			goto bail_stop;
+		if (qp->s_flags & RVT_S_WAIT_RNR)
+			goto bail_stop;
 		if (qp->s_rnr_retry == 0) {
 			status = IB_WC_RNR_RETRY_EXC_ERR;
 			goto class_b;
@@ -1306,28 +1433,27 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 
-		ibp->n_rc_resends += delta_psn(qp->s_psn, psn);
+		ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
 
 		reset_psn(qp, psn);
 
-		qp->s_flags &= ~(HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_ACK);
-		qp->s_flags |= HFI1_S_WAIT_RNR;
-		qp->s_timer.function = hfi1_rc_rnr_retry;
-		qp->s_timer.expires = jiffies + usecs_to_jiffies(
+		qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
+		hfi1_stop_rc_timers(qp);
+		to =
 			ib_hfi1_rnr_table[(aeth >> HFI1_AETH_CREDIT_SHIFT) &
-					   HFI1_AETH_CREDIT_MASK]);
-		add_timer(&qp->s_timer);
-		goto bail;
+					   HFI1_AETH_CREDIT_MASK];
+		hfi1_add_rnr_timer(qp, to);
+		return 0;
 
 	case 3:         /* NAK */
 		if (qp->s_acked == qp->s_tail)
-			goto bail;
+			goto bail_stop;
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 		switch ((aeth >> HFI1_AETH_CREDIT_SHIFT) &
 			HFI1_AETH_CREDIT_MASK) {
 		case 0: /* PSN sequence error */
-			ibp->n_seq_naks++;
+			ibp->rvp.n_seq_naks++;
 			/*
 			 * Back up to the responder's expected PSN.
 			 * Note that we might get a NAK in the middle of an
@@ -1340,21 +1466,21 @@ static int do_rc_ack(struct hfi1_qp *qp, u32 aeth, u32 psn, int opcode,
 
 		case 1: /* Invalid Request */
 			status = IB_WC_REM_INV_REQ_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 
 		case 2: /* Remote Access Error */
 			status = IB_WC_REM_ACCESS_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 
 		case 3: /* Remote Operation Error */
 			status = IB_WC_REM_OP_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 class_b:
 			if (qp->s_last == qp->s_acked) {
 				hfi1_send_complete(qp, wqe, status);
-				hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+				rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			}
 			break;
 
@@ -1364,15 +1490,16 @@ class_b:
 		}
 		qp->s_retry = qp->s_retry_cnt;
 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
-		goto bail;
+		goto bail_stop;
 
 	default:                /* 2: reserved */
 reserved:
 		/* Ignore reserved NAK codes. */
-		goto bail;
+		goto bail_stop;
 	}
-
-bail:
+	return ret;
+bail_stop:
+	hfi1_stop_rc_timers(qp);
 	return ret;
 }
 
@@ -1380,18 +1507,15 @@ bail:
  * We have seen an out of sequence RDMA read middle or last packet.
  * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
  */
-static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn,
+static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
 			 struct hfi1_ctxtdata *rcd)
 {
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 
 	/* Remove QP from retry timer */
-	if (qp->s_flags & (HFI1_S_TIMER | HFI1_S_WAIT_RNR)) {
-		qp->s_flags &= ~(HFI1_S_TIMER | HFI1_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
-	}
+	hfi1_stop_rc_timers(qp);
 
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 
 	while (cmp_psn(psn, wqe->lpsn) > 0) {
 		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
@@ -1401,11 +1525,11 @@ static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn,
 		wqe = do_rc_completion(qp, wqe, ibp);
 	}
 
-	ibp->n_rdma_seq++;
-	qp->r_flags |= HFI1_R_RDMAR_SEQ;
+	ibp->rvp.n_rdma_seq++;
+	qp->r_flags |= RVT_R_RDMAR_SEQ;
 	restart_rc(qp, qp->s_last_psn + 1, 0);
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= HFI1_R_RSP_SEND;
+		qp->r_flags |= RVT_R_RSP_SEND;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
@@ -1429,11 +1553,11 @@ static void rdma_seq_err(struct hfi1_qp *qp, struct hfi1_ibport *ibp, u32 psn,
  */
 static void rc_rcv_resp(struct hfi1_ibport *ibp,
 			struct hfi1_other_headers *ohdr,
-			void *data, u32 tlen, struct hfi1_qp *qp,
+			void *data, u32 tlen, struct rvt_qp *qp,
 			u32 opcode, u32 psn, u32 hdrsize, u32 pmtu,
 			struct hfi1_ctxtdata *rcd)
 {
-	struct hfi1_swqe *wqe;
+	struct rvt_swqe *wqe;
 	enum ib_wc_status status;
 	unsigned long flags;
 	int diff;
@@ -1446,7 +1570,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
 	trace_hfi1_rc_ack(qp, psn);
 
 	/* Ignore invalid responses. */
-	if (cmp_psn(psn, qp->s_next_psn) >= 0)
+	smp_read_barrier_depends(); /* see post_one_send */
+	if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
 		goto ack_done;
 
 	/* Ignore duplicate responses. */
@@ -1465,15 +1590,15 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
 	 * Skip everything other than the PSN we expect, if we are waiting
 	 * for a reply to a restarted RDMA read or atomic op.
 	 */
-	if (qp->r_flags & HFI1_R_RDMAR_SEQ) {
+	if (qp->r_flags & RVT_R_RDMAR_SEQ) {
 		if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
 			goto ack_done;
-		qp->r_flags &= ~HFI1_R_RDMAR_SEQ;
+		qp->r_flags &= ~RVT_R_RDMAR_SEQ;
 	}
 
 	if (unlikely(qp->s_acked == qp->s_tail))
 		goto ack_done;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	status = IB_WC_SUCCESS;
 
 	switch (opcode) {
@@ -1484,14 +1609,15 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
 		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
 			__be32 *p = ohdr->u.at.atomic_ack_eth;
 
-			val = ((u64) be32_to_cpu(p[0]) << 32) |
+			val = ((u64)be32_to_cpu(p[0]) << 32) |
 				be32_to_cpu(p[1]);
-		} else
+		} else {
 			val = 0;
+		}
 		if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 			goto ack_done;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 			goto ack_op_err;
 		/*
@@ -1519,10 +1645,10 @@ read_middle:
 		 * We got a response so update the timeout.
 		 * 4.096 usec. * (1 << qp->timeout)
 		 */
-		qp->s_flags |= HFI1_S_TIMER;
+		qp->s_flags |= RVT_S_TIMER;
 		mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
-		if (qp->s_flags & HFI1_S_WAIT_ACK) {
-			qp->s_flags &= ~HFI1_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			hfi1_schedule_send(qp);
 		}
 
@@ -1536,7 +1662,7 @@ read_middle:
 		qp->s_rdma_read_len -= pmtu;
 		update_last_psn(qp, psn);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
-		hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
+		hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0, 0);
 		goto bail;
 
 	case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1556,7 +1682,7 @@ read_middle:
 		 * have to be careful to copy the data to the right
 		 * location.
 		 */
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
 						  wqe, psn, pmtu);
 		goto read_last;
@@ -1580,9 +1706,9 @@ read_last:
 		if (unlikely(tlen != qp->s_rdma_read_len))
 			goto ack_len_err;
 		aeth = be32_to_cpu(ohdr->u.aeth);
-		hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
+		hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0);
 		WARN_ON(qp->s_rdma_read_sge.num_sge);
-		(void) do_rc_ack(qp, aeth, psn,
+		(void)do_rc_ack(qp, aeth, psn,
 				 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
 		goto ack_done;
 	}
@@ -1600,7 +1726,7 @@ ack_len_err:
 ack_err:
 	if (qp->s_last == qp->s_acked) {
 		hfi1_send_complete(qp, wqe, status);
-		hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 	}
 ack_done:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1609,22 +1735,24 @@ bail:
 }
 
 static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
-				  struct hfi1_qp *qp)
+				  struct rvt_qp *qp)
 {
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= HFI1_R_RSP_DEFERED_ACK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 }
 
-static inline void rc_cancel_ack(struct hfi1_qp *qp)
+static inline void rc_cancel_ack(struct rvt_qp *qp)
 {
-	qp->r_adefered = 0;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	priv->r_adefered = 0;
 	if (list_empty(&qp->rspwait))
 		return;
 	list_del_init(&qp->rspwait);
-	qp->r_flags &= ~HFI1_R_RSP_DEFERED_ACK;
+	qp->r_flags &= ~RVT_R_RSP_NAK;
 	if (atomic_dec_and_test(&qp->refcount))
 		wake_up(&qp->wait);
 }
@@ -1645,11 +1773,11 @@ static inline void rc_cancel_ack(struct hfi1_qp *qp)
  * schedule a response to be sent.
  */
 static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
-			struct hfi1_qp *qp, u32 opcode, u32 psn, int diff,
-			struct hfi1_ctxtdata *rcd)
+				 struct rvt_qp *qp, u32 opcode, u32 psn,
+				 int diff, struct hfi1_ctxtdata *rcd)
 {
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	struct hfi1_ack_entry *e;
+	struct rvt_ack_entry *e;
 	unsigned long flags;
 	u8 i, prev;
 	int old_req;
@@ -1662,7 +1790,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 		 * Don't queue the NAK if we already sent one.
 		 */
 		if (!qp->r_nak_state) {
-			ibp->n_rc_seqnak++;
+			ibp->rvp.n_rc_seqnak++;
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;
@@ -1694,7 +1822,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 	 */
 	e = NULL;
 	old_req = 1;
-	ibp->n_rc_dupreq++;
+	ibp->rvp.n_rc_dupreq++;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -1747,7 +1875,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 		if (unlikely(offset + len != e->rdma_sge.sge_length))
 			goto unlock_done;
 		if (e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		if (len != 0) {
@@ -1755,8 +1883,8 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 
-			ok = hfi1_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
-					  IB_ACCESS_REMOTE_READ);
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+					 IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 				goto unlock_done;
 		} else {
@@ -1778,7 +1906,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 		 * or the send tasklet is already backed up to send an
 		 * earlier entry, we can ignore this request.
 		 */
-		if (!e || e->opcode != (u8) opcode || old_req)
+		if (!e || e->opcode != (u8)opcode || old_req)
 			goto unlock_done;
 		qp->s_tail_ack_queue = prev;
 		break;
@@ -1810,7 +1938,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
 		break;
 	}
 	qp->s_ack_state = OP(ACKNOWLEDGE);
-	qp->s_flags |= HFI1_S_RESP_PENDING;
+	qp->s_flags |= RVT_S_RESP_PENDING;
 	qp->r_nak_state = 0;
 	hfi1_schedule_send(qp);
 
@@ -1823,13 +1951,13 @@ send_ack:
 	return 0;
 }
 
-void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err)
+void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
 {
 	unsigned long flags;
 	int lastwqe;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	lastwqe = hfi1_error_qp(qp, err);
+	lastwqe = rvt_error_qp(qp, err);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 
 	if (lastwqe) {
@@ -1842,7 +1970,7 @@ void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err)
 	}
 }
 
-static inline void update_ack_queue(struct hfi1_qp *qp, unsigned n)
+static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
 {
 	unsigned next;
 
@@ -1864,14 +1992,14 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
 
 	spin_lock_irqsave(&ppd->cc_log_lock, flags);
 
-	ppd->threshold_cong_event_map[sl/8] |= 1 << (sl % 8);
+	ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8);
 	ppd->threshold_event_counter++;
 
 	cc_event = &ppd->cc_events[ppd->cc_log_idx++];
 	if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS)
 		ppd->cc_log_idx = 0;
-	cc_event->lqpn = lqpn & HFI1_QPN_MASK;
-	cc_event->rqpn = rqpn & HFI1_QPN_MASK;
+	cc_event->lqpn = lqpn & RVT_QPN_MASK;
+	cc_event->rqpn = rqpn & RVT_QPN_MASK;
 	cc_event->sl = sl;
 	cc_event->svc_type = svc_type;
 	cc_event->rlid = rlid;
@@ -1897,7 +2025,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
 
 	cc_state = get_cc_state(ppd);
 
-	if (cc_state == NULL)
+	if (!cc_state)
 		return;
 
 	/*
@@ -1957,7 +2085,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct hfi1_other_headers *ohdr = packet->ohdr;
@@ -1972,6 +2100,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
 	unsigned long flags;
 	u32 bth1;
 	int ret, is_fecn = 0;
+	int copy_last = 0;
 
 	bth0 = be32_to_cpu(ohdr->bth[0]);
 	if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
@@ -2054,13 +2183,13 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
 		break;
 	}
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & HFI1_R_COMM_EST))
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
 		qp_comm_est(qp);
 
 	/* OK, process the packet. */
 	switch (opcode) {
 	case OP(SEND_FIRST):
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2076,12 +2205,12 @@ send_middle:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto nack_inv;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1);
+		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
 		break;
 
 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
 		/* consume RWQE */
-		ret = hfi1_get_rwqe(qp, 1);
+		ret = hfi1_rvt_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2090,7 +2219,7 @@ send_middle:
 
 	case OP(SEND_ONLY):
 	case OP(SEND_ONLY_WITH_IMMEDIATE):
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2104,8 +2233,10 @@ send_last_imm:
 		wc.ex.imm_data = ohdr->u.imm_data;
 		wc.wc_flags = IB_WC_WITH_IMM;
 		goto send_last;
-	case OP(SEND_LAST):
 	case OP(RDMA_WRITE_LAST):
+		copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
+		/* fall through */
+	case OP(SEND_LAST):
 no_immediate_data:
 		wc.wc_flags = 0;
 		wc.ex.imm_data = 0;
@@ -2121,10 +2252,10 @@ send_last:
 		wc.byte_len = tlen + qp->r_rcv_len;
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto nack_inv;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
-		hfi1_put_ss(&qp->r_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
+		rvt_put_ss(&qp->r_sge);
 		qp->r_msn++;
-		if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+		if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 			break;
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -2154,12 +2285,14 @@ send_last:
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
-		hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-			      (bth0 & IB_BTH_SOLICITED) != 0);
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+			     (bth0 & IB_BTH_SOLICITED) != 0);
 		break;
 
-	case OP(RDMA_WRITE_FIRST):
 	case OP(RDMA_WRITE_ONLY):
+		copy_last = 1;
+		/* fall through */
+	case OP(RDMA_WRITE_FIRST):
 	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
 			goto nack_inv;
@@ -2174,8 +2307,8 @@ send_last:
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = hfi1_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
-					  rkey, IB_ACCESS_REMOTE_WRITE);
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
+					 rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 				goto nack_acc;
 			qp->r_sge.num_sge = 1;
@@ -2190,7 +2323,7 @@ send_last:
 			goto send_middle;
 		else if (opcode == OP(RDMA_WRITE_ONLY))
 			goto no_immediate_data;
-		ret = hfi1_get_rwqe(qp, 1);
+		ret = hfi1_rvt_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto nack_op_err;
 		if (!ret)
@@ -2200,7 +2333,7 @@ send_last:
 		goto send_last;
 
 	case OP(RDMA_READ_REQUEST): {
-		struct hfi1_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u32 len;
 		u8 next;
 
@@ -2218,7 +2351,7 @@ send_last:
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		reth = &ohdr->u.rc.reth;
@@ -2229,8 +2362,8 @@ send_last:
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = hfi1_rkey_ok(qp, &e->rdma_sge, len, vaddr,
-					  rkey, IB_ACCESS_REMOTE_READ);
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+					 rkey, IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 				goto nack_acc_unlck;
 			/*
@@ -2261,7 +2394,7 @@ send_last:
 		qp->r_head_ack_queue = next;
 
 		/* Schedule the send tasklet. */
-		qp->s_flags |= HFI1_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		hfi1_schedule_send(qp);
 
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2273,7 +2406,7 @@ send_last:
 	case OP(COMPARE_SWAP):
 	case OP(FETCH_ADD): {
 		struct ib_atomic_eth *ateth;
-		struct hfi1_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u64 vaddr;
 		atomic64_t *maddr;
 		u64 sdata;
@@ -2293,29 +2426,29 @@ send_last:
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			hfi1_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 		}
 		ateth = &ohdr->u.atomic_eth;
-		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+		vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) |
 			be32_to_cpu(ateth->vaddr[1]);
 		if (unlikely(vaddr & (sizeof(u64) - 1)))
 			goto nack_inv_unlck;
 		rkey = be32_to_cpu(ateth->rkey);
 		/* Check rkey & NAK */
-		if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
-					   vaddr, rkey,
-					   IB_ACCESS_REMOTE_ATOMIC)))
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+					  vaddr, rkey,
+					  IB_ACCESS_REMOTE_ATOMIC)))
 			goto nack_acc_unlck;
 		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+		maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
 		sdata = be64_to_cpu(ateth->swap_data);
 		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
-			(u64) atomic64_add_return(sdata, maddr) - sdata :
-			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+			(u64)atomic64_add_return(sdata, maddr) - sdata :
+			(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
 				      be64_to_cpu(ateth->compare_data),
 				      sdata);
-		hfi1_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		e->opcode = opcode;
 		e->sent = 0;
@@ -2328,7 +2461,7 @@ send_last:
 		qp->r_head_ack_queue = next;
 
 		/* Schedule the send tasklet. */
-		qp->s_flags |= HFI1_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		hfi1_schedule_send(qp);
 
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -2347,11 +2480,13 @@ send_last:
 	qp->r_nak_state = 0;
 	/* Send an ACK if requested or required. */
 	if (psn & IB_BTH_REQ_ACK) {
+		struct hfi1_qp_priv *priv = qp->priv;
+
 		if (packet->numpkt == 0) {
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
-		if (qp->r_adefered >= HFI1_PSN_CREDIT) {
+		if (priv->r_adefered >= HFI1_PSN_CREDIT) {
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
@@ -2359,13 +2494,13 @@ send_last:
 			rc_cancel_ack(qp);
 			goto send_ack;
 		}
-		qp->r_adefered++;
+		priv->r_adefered++;
 		rc_defered_ack(rcd, qp);
 	}
 	return;
 
 rnr_nak:
-	qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+	qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK;
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue RNR NAK for later */
 	rc_defered_ack(rcd, qp);
@@ -2403,7 +2538,7 @@ void hfi1_rc_hdrerr(
 	struct hfi1_ctxtdata *rcd,
 	struct hfi1_ib_header *hdr,
 	u32 rcv_flags,
-	struct hfi1_qp *qp)
+	struct rvt_qp *qp)
 {
 	int has_grh = rcv_flags & HFI1_HAS_GRH;
 	struct hfi1_other_headers *ohdr;
@@ -2428,7 +2563,7 @@ void hfi1_rc_hdrerr(
 	if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
 		diff = delta_psn(psn, qp->r_psn);
 		if (!qp->r_nak_state && diff >= 0) {
-			ibp->n_rc_seqnak++;
+			ibp->rvp.n_rc_seqnak++;
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;
diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c
index 4a91975b68d7..08813cdbd475 100644
--- a/drivers/staging/rdma/hfi1/ruc.c
+++ b/drivers/staging/rdma/hfi1/ruc.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -53,7 +50,8 @@
 #include "hfi.h"
 #include "mad.h"
 #include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
+#include "trace.h"
 
 /*
  * Convert the AETH RNR timeout code into the number of microseconds.
@@ -97,16 +95,16 @@ const u32 ib_hfi1_rnr_table[32] = {
  * Validate a RWQE and fill in the SGE state.
  * Return 1 if OK.
  */
-static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe)
+static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
 {
 	int i, j, ret;
 	struct ib_wc wc;
-	struct hfi1_lkey_table *rkt;
-	struct hfi1_pd *pd;
-	struct hfi1_sge_state *ss;
+	struct rvt_lkey_table *rkt;
+	struct rvt_pd *pd;
+	struct rvt_sge_state *ss;
 
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+	rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
+	pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
 	ss = &qp->r_sge;
 	ss->sg_list = qp->r_sg_list;
 	qp->r_len = 0;
@@ -114,8 +112,8 @@ static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe)
 		if (wqe->sg_list[i].length == 0)
 			continue;
 		/* Check LKEY */
-		if (!hfi1_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
-				  &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+		if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+				 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
 			goto bad_lkey;
 		qp->r_len += wqe->sg_list[i].length;
 		j++;
@@ -127,9 +125,9 @@ static int init_sge(struct hfi1_qp *qp, struct hfi1_rwqe *wqe)
 
 bad_lkey:
 	while (j) {
-		struct hfi1_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+		struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
 
-		hfi1_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	ss->num_sge = 0;
 	memset(&wc, 0, sizeof(wc));
@@ -138,14 +136,14 @@ bad_lkey:
 	wc.opcode = IB_WC_RECV;
 	wc.qp = &qp->ibqp;
 	/* Signal solicited completion event. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
 	ret = 0;
 bail:
 	return ret;
 }
 
 /**
- * hfi1_get_rwqe - copy the next RWQE into the QP's RWQE
+ * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
  * @qp: the QP
  * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
  *
@@ -154,19 +152,19 @@ bail:
  *
  * Can be called from interrupt level.
  */
-int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
+int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only)
 {
 	unsigned long flags;
-	struct hfi1_rq *rq;
-	struct hfi1_rwq *wq;
-	struct hfi1_srq *srq;
-	struct hfi1_rwqe *wqe;
+	struct rvt_rq *rq;
+	struct rvt_rwq *wq;
+	struct rvt_srq *srq;
+	struct rvt_rwqe *wqe;
 	void (*handler)(struct ib_event *, void *);
 	u32 tail;
 	int ret;
 
 	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
+		srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
 		handler = srq->ibsrq.event_handler;
 		rq = &srq->rq;
 	} else {
@@ -176,7 +174,7 @@ int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
 	}
 
 	spin_lock_irqsave(&rq->lock, flags);
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) {
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 		ret = 0;
 		goto unlock;
 	}
@@ -192,7 +190,7 @@ int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
 	}
 	/* Make sure entry is read after head index is read. */
 	smp_rmb();
-	wqe = get_rwqe_ptr(rq, tail);
+	wqe = rvt_get_rwqe_ptr(rq, tail);
 	/*
 	 * Even though we update the tail index in memory, the verbs
 	 * consumer is not supposed to post more entries until a
@@ -208,7 +206,7 @@ int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only)
 	qp->r_wr_id = wqe->wr_id;
 
 	ret = 1;
-	set_bit(HFI1_R_WRID_VALID, &qp->r_aflags);
+	set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
 	if (handler) {
 		u32 n;
 
@@ -265,7 +263,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
  * The s_lock will be acquired around the hfi1_migrate_qp() call.
  */
 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
-		       int has_grh, struct hfi1_qp *qp, u32 bth0)
+		       int has_grh, struct rvt_qp *qp, u32 bth0)
 {
 	__be64 guid;
 	unsigned long flags;
@@ -279,11 +277,13 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
 			if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
 				goto err;
 			guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+				    guid))
 				goto err;
-			if (!gid_ok(&hdr->u.l.grh.sgid,
-			    qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
-			    qp->alt_ah_attr.grh.dgid.global.interface_id))
+			if (!gid_ok(
+				&hdr->u.l.grh.sgid,
+				qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
+				qp->alt_ah_attr.grh.dgid.global.interface_id))
 				goto err;
 		}
 		if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
@@ -312,11 +312,13 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
 				goto err;
 			guid = get_sguid(ibp,
 					 qp->remote_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+				    guid))
 				goto err;
-			if (!gid_ok(&hdr->u.l.grh.sgid,
-			    qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
-			    qp->remote_ah_attr.grh.dgid.global.interface_id))
+			if (!gid_ok(
+			     &hdr->u.l.grh.sgid,
+			     qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
+			     qp->remote_ah_attr.grh.dgid.global.interface_id))
 				goto err;
 		}
 		if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
@@ -355,12 +357,12 @@ err:
  * receive interrupts since this is a connected protocol and all packets
  * will pass through here.
  */
-static void ruc_loopback(struct hfi1_qp *sqp)
+static void ruc_loopback(struct rvt_qp *sqp)
 {
 	struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
-	struct hfi1_qp *qp;
-	struct hfi1_swqe *wqe;
-	struct hfi1_sge *sge;
+	struct rvt_qp *qp;
+	struct rvt_swqe *wqe;
+	struct rvt_sge *sge;
 	unsigned long flags;
 	struct ib_wc wc;
 	u64 sdata;
@@ -368,6 +370,8 @@ static void ruc_loopback(struct hfi1_qp *sqp)
 	enum ib_wc_status send_status;
 	int release;
 	int ret;
+	int copy_last = 0;
+	u32 to;
 
 	rcu_read_lock();
 
@@ -375,25 +379,27 @@ static void ruc_loopback(struct hfi1_qp *sqp)
 	 * Note that we check the responder QP state after
 	 * checking the requester's state.
 	 */
-	qp = hfi1_lookup_qpn(ibp, sqp->remote_qpn);
+	qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
+			    sqp->remote_qpn);
 
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
 	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT)) ||
-	    !(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		goto unlock;
 
-	sqp->s_flags |= HFI1_S_BUSY;
+	sqp->s_flags |= RVT_S_BUSY;
 
 again:
-	if (sqp->s_last == sqp->s_head)
+	smp_read_barrier_depends(); /* see post_one_send() */
+	if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
 		goto clr_busy;
-	wqe = get_swqe_ptr(sqp, sqp->s_last);
+	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
 
 	/* Return if it is not OK to start a new work request. */
-	if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[sqp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
 			goto clr_busy;
 		/* We are in the error state, flush the work request. */
 		send_status = IB_WC_WR_FLUSH_ERR;
@@ -411,9 +417,9 @@ again:
 	}
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 
-	if (!qp || !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) ||
+	if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
 	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		/*
 		 * For RC, the requester would timeout and retry so
 		 * shortcut the timeouts and just signal too many retries.
@@ -439,7 +445,7 @@ again:
 		wc.ex.imm_data = wqe->wr.ex.imm_data;
 		/* FALLTHROUGH */
 	case IB_WR_SEND:
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0)
 			goto op_err;
 		if (!ret)
@@ -451,21 +457,24 @@ again:
 			goto inv_err;
 		wc.wc_flags = IB_WC_WITH_IMM;
 		wc.ex.imm_data = wqe->wr.ex.imm_data;
-		ret = hfi1_get_rwqe(qp, 1);
+		ret = hfi1_rvt_get_rwqe(qp, 1);
 		if (ret < 0)
 			goto op_err;
 		if (!ret)
 			goto rnr_nak;
-		/* FALLTHROUGH */
+		/* skip copy_last set and qp_access_flags recheck */
+		goto do_write;
 	case IB_WR_RDMA_WRITE:
+		copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
 			goto inv_err;
+do_write:
 		if (wqe->length == 0)
 			break;
-		if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
-					   wqe->rdma_wr.remote_addr,
-					   wqe->rdma_wr.rkey,
-					   IB_ACCESS_REMOTE_WRITE)))
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+					  wqe->rdma_wr.remote_addr,
+					  wqe->rdma_wr.rkey,
+					  IB_ACCESS_REMOTE_WRITE)))
 			goto acc_err;
 		qp->r_sge.sg_list = NULL;
 		qp->r_sge.num_sge = 1;
@@ -475,10 +484,10 @@ again:
 	case IB_WR_RDMA_READ:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
 			goto inv_err;
-		if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
-					   wqe->rdma_wr.remote_addr,
-					   wqe->rdma_wr.rkey,
-					   IB_ACCESS_REMOTE_READ)))
+		if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+					  wqe->rdma_wr.remote_addr,
+					  wqe->rdma_wr.rkey,
+					  IB_ACCESS_REMOTE_READ)))
 			goto acc_err;
 		release = 0;
 		sqp->s_sge.sg_list = NULL;
@@ -493,20 +502,20 @@ again:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 			goto inv_err;
-		if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
-					   wqe->atomic_wr.remote_addr,
-					   wqe->atomic_wr.rkey,
-					   IB_ACCESS_REMOTE_ATOMIC)))
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+					  wqe->atomic_wr.remote_addr,
+					  wqe->atomic_wr.rkey,
+					  IB_ACCESS_REMOTE_ATOMIC)))
 			goto acc_err;
 		/* Perform atomic OP and save result. */
-		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+		maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
 		sdata = wqe->atomic_wr.compare_add;
-		*(u64 *) sqp->s_sge.sge.vaddr =
+		*(u64 *)sqp->s_sge.sge.vaddr =
 			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
-			(u64) atomic64_add_return(sdata, maddr) - sdata :
-			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+			(u64)atomic64_add_return(sdata, maddr) - sdata :
+			(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
 				      sdata, wqe->atomic_wr.swap);
-		hfi1_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		goto send_comp;
 
@@ -524,17 +533,17 @@ again:
 		if (len > sge->sge_length)
 			len = sge->sge_length;
 		WARN_ON_ONCE(len == 0);
-		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release);
+		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (!release)
-				hfi1_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--sqp->s_sge.num_sge)
 				*sge = *sqp->s_sge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -547,9 +556,9 @@ again:
 		sqp->s_len -= len;
 	}
 	if (release)
-		hfi1_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 
-	if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto send_comp;
 
 	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -565,12 +574,12 @@ again:
 	wc.sl = qp->remote_ah_attr.sl;
 	wc.port_num = 1;
 	/* Signal completion event if the solicited bit is set. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		      wqe->wr.send_flags & IB_SEND_SOLICITED);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     wqe->wr.send_flags & IB_SEND_SOLICITED);
 
 send_comp:
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	ibp->n_loop_pkts++;
+	ibp->rvp.n_loop_pkts++;
 flush_send:
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 	hfi1_send_complete(sqp, wqe, send_status);
@@ -580,7 +589,7 @@ rnr_nak:
 	/* Handle RNR NAK */
 	if (qp->ibqp.qp_type == IB_QPT_UC)
 		goto send_comp;
-	ibp->n_rnr_naks++;
+	ibp->rvp.n_rnr_naks++;
 	/*
 	 * Note: we don't need the s_lock held since the BUSY flag
 	 * makes this single threaded.
@@ -592,13 +601,10 @@ rnr_nak:
 	if (sqp->s_rnr_retry_cnt < 7)
 		sqp->s_rnr_retry--;
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	if (!(ib_hfi1_state_ops[sqp->state] & HFI1_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
 		goto clr_busy;
-	sqp->s_flags |= HFI1_S_WAIT_RNR;
-	sqp->s_timer.function = hfi1_rc_rnr_retry;
-	sqp->s_timer.expires = jiffies +
-		usecs_to_jiffies(ib_hfi1_rnr_table[qp->r_min_rnr_timer]);
-	add_timer(&sqp->s_timer);
+	to = ib_hfi1_rnr_table[qp->r_min_rnr_timer];
+	hfi1_add_rnr_timer(sqp, to);
 	goto clr_busy;
 
 op_err:
@@ -622,9 +628,9 @@ serr:
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	hfi1_send_complete(sqp, wqe, send_status);
 	if (sqp->ibqp.qp_type == IB_QPT_RC) {
-		int lastwqe = hfi1_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
 
-		sqp->s_flags &= ~HFI1_S_BUSY;
+		sqp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		if (lastwqe) {
 			struct ib_event ev;
@@ -637,7 +643,7 @@ serr:
 		goto done;
 	}
 clr_busy:
-	sqp->s_flags &= ~HFI1_S_BUSY;
+	sqp->s_flags &= ~RVT_S_BUSY;
 unlock:
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 done:
@@ -666,7 +672,7 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
 	hdr->next_hdr = IB_GRH_NEXT_HDR;
 	hdr->hop_limit = grh->hop_limit;
 	/* The SGID is 32-bit aligned. */
-	hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
+	hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
 	hdr->sgid.global.interface_id =
 		grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ?
 		ibp->guids[grh->sgid_index - 1] :
@@ -690,29 +696,31 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
  * Subsequent middles use the copied entry, editing the
  * PSN with 1 or 2 edits.
  */
-static inline void build_ahg(struct hfi1_qp *qp, u32 npsn)
+static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
 {
-	if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR))
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR))
 		clear_ahg(qp);
-	if (!(qp->s_flags & HFI1_S_AHG_VALID)) {
+	if (!(qp->s_flags & RVT_S_AHG_VALID)) {
 		/* first middle that needs copy  */
 		if (qp->s_ahgidx < 0)
-			qp->s_ahgidx = sdma_ahg_alloc(qp->s_sde);
+			qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
 		if (qp->s_ahgidx >= 0) {
 			qp->s_ahgpsn = npsn;
-			qp->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
+			priv->s_hdr->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
 			/* save to protect a change in another thread */
-			qp->s_hdr->sde = qp->s_sde;
-			qp->s_hdr->ahgidx = qp->s_ahgidx;
-			qp->s_flags |= HFI1_S_AHG_VALID;
+			priv->s_hdr->sde = priv->s_sde;
+			priv->s_hdr->ahgidx = qp->s_ahgidx;
+			qp->s_flags |= RVT_S_AHG_VALID;
 		}
 	} else {
 		/* subsequent middle after valid */
 		if (qp->s_ahgidx >= 0) {
-			qp->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG;
-			qp->s_hdr->ahgidx = qp->s_ahgidx;
-			qp->s_hdr->ahgcount++;
-			qp->s_hdr->ahgdesc[0] =
+			priv->s_hdr->tx_flags |= SDMA_TXREQ_F_USE_AHG;
+			priv->s_hdr->ahgidx = qp->s_ahgidx;
+			priv->s_hdr->ahgcount++;
+			priv->s_hdr->ahgdesc[0] =
 				sdma_build_ahg_descriptor(
 					(__force u16)cpu_to_be16((u16)npsn),
 					BTH2_OFFSET,
@@ -720,8 +728,8 @@ static inline void build_ahg(struct hfi1_qp *qp, u32 npsn)
 					16);
 			if ((npsn & 0xffff0000) !=
 					(qp->s_ahgpsn & 0xffff0000)) {
-				qp->s_hdr->ahgcount++;
-				qp->s_hdr->ahgdesc[1] =
+				priv->s_hdr->ahgcount++;
+				priv->s_hdr->ahgdesc[1] =
 					sdma_build_ahg_descriptor(
 						(__force u16)cpu_to_be16(
 							(u16)(npsn >> 16)),
@@ -733,10 +741,12 @@ static inline void build_ahg(struct hfi1_qp *qp, u32 npsn)
 	}
 }
 
-void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
-			  u32 bth0, u32 bth2, int middle)
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+			  u32 bth0, u32 bth2, int middle,
+			  struct hfi1_pkt_state *ps)
 {
-	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct hfi1_ibport *ibp = ps->ibp;
 	u16 lrh0;
 	u32 nwords;
 	u32 extra_bytes;
@@ -747,13 +757,14 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
 	nwords = (qp->s_cur_size + extra_bytes) >> 2;
 	lrh0 = HFI1_LRH_BTH;
 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-		qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh,
-					       &qp->remote_ah_attr.grh,
-					       qp->s_hdrwords, nwords);
+		qp->s_hdrwords += hfi1_make_grh(ibp,
+						&ps->s_txreq->phdr.hdr.u.l.grh,
+						&qp->remote_ah_attr.grh,
+						qp->s_hdrwords, nwords);
 		lrh0 = HFI1_LRH_GRH;
 		middle = 0;
 	}
-	lrh0 |= (qp->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+	lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
 	/*
 	 * reset s_hdr/AHG fields
 	 *
@@ -765,10 +776,10 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
 	 * build_ahg() will modify as appropriate
 	 * to use the AHG feature.
 	 */
-	qp->s_hdr->tx_flags = 0;
-	qp->s_hdr->ahgcount = 0;
-	qp->s_hdr->ahgidx = 0;
-	qp->s_hdr->sde = NULL;
+	priv->s_hdr->tx_flags = 0;
+	priv->s_hdr->ahgcount = 0;
+	priv->s_hdr->ahgidx = 0;
+	priv->s_hdr->sde = NULL;
 	if (qp->s_mig_state == IB_MIG_MIGRATED)
 		bth0 |= IB_BTH_MIG_REQ;
 	else
@@ -776,19 +787,19 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
 	if (middle)
 		build_ahg(qp, bth2);
 	else
-		qp->s_flags &= ~HFI1_S_AHG_VALID;
-	qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->ibh.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	qp->s_hdr->ibh.lrh[2] =
+		qp->s_flags &= ~RVT_S_AHG_VALID;
+	ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
+	ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+	ps->s_txreq->phdr.hdr.lrh[2] =
 		cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	qp->s_hdr->ibh.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+	ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
 				       qp->remote_ah_attr.src_path_bits);
 	bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	bth1 = qp->remote_qpn;
-	if (qp->s_flags & HFI1_S_ECN) {
-		qp->s_flags &= ~HFI1_S_ECN;
+	if (qp->s_flags & RVT_S_ECN) {
+		qp->s_flags &= ~RVT_S_ECN;
 		/* we recently received a FECN, so return a BECN */
 		bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
 	}
@@ -799,6 +810,14 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
 /* when sending, force a reschedule every one of these periods */
 #define SEND_RESCHED_TIMEOUT (5 * HZ)  /* 5s in jiffies */
 
+void _hfi1_do_send(struct work_struct *work)
+{
+	struct iowait *wait = container_of(work, struct iowait, iowork);
+	struct rvt_qp *qp = iowait_to_qp(wait);
+
+	hfi1_do_send(qp);
+}
+
 /**
  * hfi1_do_send - perform a send on a QP
  * @work: contains a pointer to the QP
@@ -807,34 +826,45 @@ void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
  * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
  * Otherwise, two threads could send packets out of order.
  */
-void hfi1_do_send(struct work_struct *work)
+void hfi1_do_send(struct rvt_qp *qp)
 {
-	struct iowait *wait = container_of(work, struct iowait, iowork);
-	struct hfi1_qp *qp = container_of(wait, struct hfi1_qp, s_iowait);
 	struct hfi1_pkt_state ps;
-	int (*make_req)(struct hfi1_qp *qp);
+	struct hfi1_qp_priv *priv = qp->priv;
+	int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 	unsigned long flags;
 	unsigned long timeout;
+	unsigned long timeout_int;
+	int cpu;
 
 	ps.dev = to_idev(qp->ibqp.device);
 	ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ps.ppd = ppd_from_ibp(ps.ibp);
 
-	if ((qp->ibqp.qp_type == IB_QPT_RC ||
-	     qp->ibqp.qp_type == IB_QPT_UC) &&
-	    !loopback &&
-	    (qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc) - 1)) ==
-	    ps.ppd->lid) {
-		ruc_loopback(qp);
-		return;
-	}
-
-	if (qp->ibqp.qp_type == IB_QPT_RC)
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+		if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
+								) - 1)) ==
+				 ps.ppd->lid)) {
+			ruc_loopback(qp);
+			return;
+		}
 		make_req = hfi1_make_rc_req;
-	else if (qp->ibqp.qp_type == IB_QPT_UC)
+		timeout_int = (qp->timeout_jiffies);
+		break;
+	case IB_QPT_UC:
+		if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
+								) - 1)) ==
+				 ps.ppd->lid)) {
+			ruc_loopback(qp);
+			return;
+		}
 		make_req = hfi1_make_uc_req;
-	else
+		timeout_int = SEND_RESCHED_TIMEOUT;
+		break;
+	default:
 		make_req = hfi1_make_ud_req;
+		timeout_int = SEND_RESCHED_TIMEOUT;
+	}
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -844,57 +874,83 @@ void hfi1_do_send(struct work_struct *work)
 		return;
 	}
 
-	qp->s_flags |= HFI1_S_BUSY;
+	qp->s_flags |= RVT_S_BUSY;
 
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-
-	timeout = jiffies + SEND_RESCHED_TIMEOUT;
+	timeout = jiffies + (timeout_int) / 8;
+	cpu = priv->s_sde ? priv->s_sde->cpu :
+			cpumask_first(cpumask_of_node(ps.ppd->dd->node));
+	/* insure a pre-built packet is handled  */
+	ps.s_txreq = get_waiting_verbs_txreq(qp);
 	do {
 		/* Check for a constructed packet to be sent. */
 		if (qp->s_hdrwords != 0) {
+			spin_unlock_irqrestore(&qp->s_lock, flags);
 			/*
 			 * If the packet cannot be sent now, return and
 			 * the send tasklet will be woken up later.
 			 */
 			if (hfi1_verbs_send(qp, &ps))
-				break;
+				return;
 			/* Record that s_hdr is empty. */
 			qp->s_hdrwords = 0;
+			/* allow other tasks to run */
+			if (unlikely(time_after(jiffies, timeout))) {
+				if (workqueue_congested(cpu,
+							ps.ppd->hfi1_wq)) {
+					spin_lock_irqsave(&qp->s_lock, flags);
+					qp->s_flags &= ~RVT_S_BUSY;
+					hfi1_schedule_send(qp);
+					spin_unlock_irqrestore(&qp->s_lock,
+							       flags);
+					this_cpu_inc(
+						*ps.ppd->dd->send_schedule);
+					return;
+				}
+				if (!irqs_disabled()) {
+					cond_resched();
+					this_cpu_inc(
+					   *ps.ppd->dd->send_schedule);
+				}
+				timeout = jiffies + (timeout_int) / 8;
+			}
+			spin_lock_irqsave(&qp->s_lock, flags);
 		}
+	} while (make_req(qp, &ps));
 
-		/* allow other tasks to run */
-		if (unlikely(time_after(jiffies, timeout))) {
-			cond_resched();
-			ps.ppd->dd->verbs_dev.n_send_schedule++;
-			timeout = jiffies + SEND_RESCHED_TIMEOUT;
-		}
-	} while (make_req(qp));
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
 /*
  * This should be called with s_lock held.
  */
-void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
+void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 			enum ib_wc_status status)
 {
 	u32 old_last, last;
 	unsigned i;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 
+	last = qp->s_last;
+	old_last = last;
+	if (++last >= qp->s_size)
+		last = 0;
+	qp->s_last = last;
+	/* See post_send() */
+	barrier();
 	for (i = 0; i < wqe->wr.num_sge; i++) {
-		struct hfi1_sge *sge = &wqe->sg_list[i];
+		struct rvt_sge *sge = &wqe->sg_list[i];
 
-		hfi1_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	if (qp->ibqp.qp_type == IB_QPT_UD ||
 	    qp->ibqp.qp_type == IB_QPT_SMI ||
 	    qp->ibqp.qp_type == IB_QPT_GSI)
-		atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
+		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
 
 	/* See ch. 11.2.4.1 and 10.7.3.1 */
-	if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
+	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
 	    status != IB_WC_SUCCESS) {
 		struct ib_wc wc;
@@ -906,15 +962,10 @@ void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
 		wc.qp = &qp->ibqp;
 		if (status == IB_WC_SUCCESS)
 			wc.byte_len = wqe->length;
-		hfi1_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
-			      status != IB_WC_SUCCESS);
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
+			     status != IB_WC_SUCCESS);
 	}
 
-	last = qp->s_last;
-	old_last = last;
-	if (++last >= qp->s_size)
-		last = 0;
-	qp->s_last = last;
 	if (qp->s_acked == old_last)
 		qp->s_acked = last;
 	if (qp->s_cur == old_last)
diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c
index 9a15f1f32b45..abb8ebc1fcac 100644
--- a/drivers/staging/rdma/hfi1/sdma.c
+++ b/drivers/staging/rdma/hfi1/sdma.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -112,10 +109,10 @@ MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt");
 	| SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK))
 
 /* sdma_sendctrl operations */
-#define SDMA_SENDCTRL_OP_ENABLE    (1U << 0)
-#define SDMA_SENDCTRL_OP_INTENABLE (1U << 1)
-#define SDMA_SENDCTRL_OP_HALT      (1U << 2)
-#define SDMA_SENDCTRL_OP_CLEANUP   (1U << 3)
+#define SDMA_SENDCTRL_OP_ENABLE    BIT(0)
+#define SDMA_SENDCTRL_OP_INTENABLE BIT(1)
+#define SDMA_SENDCTRL_OP_HALT      BIT(2)
+#define SDMA_SENDCTRL_OP_CLEANUP   BIT(3)
 
 /* handle long defines */
 #define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \
@@ -325,9 +322,9 @@ static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
 		if (lcnt++ > 500) {
 			/* timed out - bounce the link */
 			dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
-				  __func__, sde->this_idx, (u32)reg);
+				   __func__, sde->this_idx, (u32)reg);
 			queue_work(dd->pport->hfi1_wq,
-				&dd->pport->link_bounce_work);
+				   &dd->pport->link_bounce_work);
 			break;
 		}
 		udelay(1);
@@ -361,6 +358,28 @@ static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
 	write_sde_csr(sde, SD(DESC_CNT), reg);
 }
 
+static inline void complete_tx(struct sdma_engine *sde,
+			       struct sdma_txreq *tx,
+			       int res)
+{
+	/* protect against complete modifying */
+	struct iowait *wait = tx->wait;
+	callback_t complete = tx->complete;
+
+#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
+	trace_hfi1_sdma_out_sn(sde, tx->sn);
+	if (WARN_ON_ONCE(sde->head_sn != tx->sn))
+		dd_dev_err(sde->dd, "expected %llu got %llu\n",
+			   sde->head_sn, tx->sn);
+	sde->head_sn++;
+#endif
+	sdma_txclean(sde->dd, tx);
+	if (complete)
+		(*complete)(tx, res);
+	if (iowait_sdma_dec(wait) && wait)
+		iowait_drain_wakeup(wait);
+}
+
 /*
  * Complete all the sdma requests with a SDMA_TXREQ_S_ABORTED status
  *
@@ -395,27 +414,8 @@ static void sdma_flush(struct sdma_engine *sde)
 	}
 	spin_unlock_irqrestore(&sde->flushlist_lock, flags);
 	/* flush from flush list */
-	list_for_each_entry_safe(txp, txp_next, &flushlist, list) {
-		int drained = 0;
-		/* protect against complete modifying */
-		struct iowait *wait = txp->wait;
-
-		list_del_init(&txp->list);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-		trace_hfi1_sdma_out_sn(sde, txp->sn);
-		if (WARN_ON_ONCE(sde->head_sn != txp->sn))
-			dd_dev_err(sde->dd, "expected %llu got %llu\n",
-				sde->head_sn, txp->sn);
-		sde->head_sn++;
-#endif
-		sdma_txclean(sde->dd, txp);
-		if (wait)
-			drained = atomic_dec_and_test(&wait->sdma_busy);
-		if (txp->complete)
-			(*txp->complete)(txp, SDMA_TXREQ_S_ABORTED, drained);
-		if (wait && drained)
-			iowait_drain_wakeup(wait);
-	}
+	list_for_each_entry_safe(txp, txp_next, &flushlist, list)
+		complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
 }
 
 /*
@@ -455,8 +455,8 @@ static void sdma_err_halt_wait(struct work_struct *work)
 			break;
 		if (time_after(jiffies, timeout)) {
 			dd_dev_err(sde->dd,
-				"SDMA engine %d - timeout waiting for engine to halt\n",
-				sde->this_idx);
+				   "SDMA engine %d - timeout waiting for engine to halt\n",
+				   sde->this_idx);
 			/*
 			 * Continue anyway.  This could happen if there was
 			 * an uncorrectable error in the wrong spot.
@@ -472,7 +472,6 @@ static void sdma_err_halt_wait(struct work_struct *work)
 static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
 {
 	if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) {
-
 		unsigned index;
 		struct hfi1_devdata *dd = sde->dd;
 
@@ -531,7 +530,7 @@ static void sdma_err_progress_check(unsigned long data)
 
 static void sdma_hw_clean_up_task(unsigned long opaque)
 {
-	struct sdma_engine *sde = (struct sdma_engine *) opaque;
+	struct sdma_engine *sde = (struct sdma_engine *)opaque;
 	u64 statuscsr;
 
 	while (1) {
@@ -577,31 +576,10 @@ static void sdma_flush_descq(struct sdma_engine *sde)
 		head = ++sde->descq_head & sde->sdma_mask;
 		/* if now past this txp's descs, do the callback */
 		if (txp && txp->next_descq_idx == head) {
-			int drained = 0;
-			/* protect against complete modifying */
-			struct iowait *wait = txp->wait;
-
 			/* remove from list */
 			sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
-			if (wait)
-				drained = atomic_dec_and_test(&wait->sdma_busy);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-			trace_hfi1_sdma_out_sn(sde, txp->sn);
-			if (WARN_ON_ONCE(sde->head_sn != txp->sn))
-				dd_dev_err(sde->dd, "expected %llu got %llu\n",
-					sde->head_sn, txp->sn);
-			sde->head_sn++;
-#endif
-			sdma_txclean(sde->dd, txp);
+			complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
 			trace_hfi1_sdma_progress(sde, head, tail, txp);
-			if (txp->complete)
-				(*txp->complete)(
-					txp,
-					SDMA_TXREQ_S_ABORTED,
-					drained);
-			if (wait && drained)
-				iowait_drain_wakeup(wait);
-			/* see if there is another txp */
 			txp = get_txhead(sde);
 		}
 		progress++;
@@ -612,7 +590,7 @@ static void sdma_flush_descq(struct sdma_engine *sde)
 
 static void sdma_sw_clean_up_task(unsigned long opaque)
 {
-	struct sdma_engine *sde = (struct sdma_engine *) opaque;
+	struct sdma_engine *sde = (struct sdma_engine *)opaque;
 	unsigned long flags;
 
 	spin_lock_irqsave(&sde->tail_lock, flags);
@@ -627,7 +605,6 @@ static void sdma_sw_clean_up_task(unsigned long opaque)
 	 *   descq are ours to play with.
 	 */
 
-
 	/*
 	 * In the error clean up sequence, software clean must be called
 	 * before the hardware clean so we can use the hardware head in
@@ -676,7 +653,7 @@ static void sdma_start_hw_clean_up(struct sdma_engine *sde)
 }
 
 static void sdma_set_state(struct sdma_engine *sde,
-	enum sdma_states next_state)
+			   enum sdma_states next_state)
 {
 	struct sdma_state *ss = &sde->state;
 	const struct sdma_set_state_action *action = sdma_action_table;
@@ -692,8 +669,8 @@ static void sdma_set_state(struct sdma_engine *sde,
 	ss->previous_op = ss->current_op;
 	ss->current_state = next_state;
 
-	if (ss->previous_state != sdma_state_s99_running
-		&& next_state == sdma_state_s99_running)
+	if (ss->previous_state != sdma_state_s99_running &&
+	    next_state == sdma_state_s99_running)
 		sdma_flush(sde);
 
 	if (action[next_state].op_enable)
@@ -890,6 +867,9 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
 	newmap->actual_vls = num_vls;
 	newmap->vls = roundup_pow_of_two(num_vls);
 	newmap->mask = (1 << ilog2(newmap->vls)) - 1;
+	/* initialize back-map */
+	for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
+		newmap->engine_to_vl[i] = -1;
 	for (i = 0; i < newmap->vls; i++) {
 		/* save for wrap around */
 		int first_engine = engine;
@@ -913,6 +893,9 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
 					/* wrap back to first engine */
 					engine = first_engine;
 			}
+			/* assign back-map */
+			for (j = 0; j < vl_engines[i]; j++)
+				newmap->engine_to_vl[first_engine + j] = i;
 		} else {
 			/* just re-use entry without allocating */
 			newmap->map[i] = newmap->map[i % num_vls];
@@ -922,7 +905,7 @@ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
 	/* newmap in hand, save old map */
 	spin_lock_irq(&dd->sde_map_lock);
 	oldmap = rcu_dereference_protected(dd->sdma_map,
-			lockdep_is_held(&dd->sde_map_lock));
+					   lockdep_is_held(&dd->sde_map_lock));
 
 	/* publish newmap */
 	rcu_assign_pointer(dd->sdma_map, newmap);
@@ -983,7 +966,7 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
 		sde->tx_ring = NULL;
 	}
 	spin_lock_irq(&dd->sde_map_lock);
-	kfree(rcu_access_pointer(dd->sdma_map));
+	sdma_map_free(rcu_access_pointer(dd->sdma_map));
 	RCU_INIT_POINTER(dd->sdma_map, NULL);
 	spin_unlock_irq(&dd->sde_map_lock);
 	synchronize_rcu();
@@ -1020,19 +1003,19 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
 		return 0;
 	}
 	if (mod_num_sdma &&
-		/* can't exceed chip support */
-		mod_num_sdma <= dd->chip_sdma_engines &&
-		/* count must be >= vls */
-		mod_num_sdma >= num_vls)
+	    /* can't exceed chip support */
+	    mod_num_sdma <= dd->chip_sdma_engines &&
+	    /* count must be >= vls */
+	    mod_num_sdma >= num_vls)
 		num_engines = mod_num_sdma;
 
 	dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
 	dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines);
 	dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
-		dd->chip_sdma_mem_size);
+		    dd->chip_sdma_mem_size);
 
 	per_sdma_credits =
-		dd->chip_sdma_mem_size/(num_engines * SDMA_BLOCK_SIZE);
+		dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE);
 
 	/* set up freeze waitqueue */
 	init_waitqueue_head(&dd->sdma_unfreeze_wq);
@@ -1040,7 +1023,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
 
 	descq_cnt = sdma_get_descq_cnt();
 	dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n",
-		num_engines, descq_cnt);
+		    num_engines, descq_cnt);
 
 	/* alloc memory for array of send engines */
 	dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
@@ -1061,18 +1044,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
 		sde->desc_avail = sdma_descq_freecnt(sde);
 		sde->sdma_shift = ilog2(descq_cnt);
 		sde->sdma_mask = (1 << sde->sdma_shift) - 1;
-		sde->descq_full_count = 0;
-
-		/* Create a mask for all 3 chip interrupt sources */
-		sde->imask = (u64)1 << (0*TXE_NUM_SDMA_ENGINES + this_idx)
-			| (u64)1 << (1*TXE_NUM_SDMA_ENGINES + this_idx)
-			| (u64)1 << (2*TXE_NUM_SDMA_ENGINES + this_idx);
-		/* Create a mask specifically for sdma_idle */
-		sde->idle_mask =
-			(u64)1 << (2*TXE_NUM_SDMA_ENGINES + this_idx);
-		/* Create a mask specifically for sdma_progress */
-		sde->progress_mask =
-			(u64)1 << (TXE_NUM_SDMA_ENGINES + this_idx);
+
+		/* Create a mask specifically for each interrupt source */
+		sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
+					   this_idx);
+		sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
+						this_idx);
+		sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
+					    this_idx);
+		/* Create a combined mask to cover all 3 interrupt sources */
+		sde->imask = sde->int_mask | sde->progress_mask |
+			     sde->idle_mask;
+
 		spin_lock_init(&sde->tail_lock);
 		seqlock_init(&sde->head_lock);
 		spin_lock_init(&sde->senddmactrl_lock);
@@ -1100,10 +1083,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
 				SDMA_DESC1_INT_REQ_FLAG;
 
 		tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
-			(unsigned long)sde);
+			     (unsigned long)sde);
 
 		tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
-			(unsigned long)sde);
+			     (unsigned long)sde);
 		INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
 		INIT_WORK(&sde->flush_worker, sdma_field_flush);
 
@@ -1251,11 +1234,10 @@ void sdma_exit(struct hfi1_devdata *dd)
 
 	for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
 			++this_idx) {
-
 		sde = &dd->per_sdma[this_idx];
 		if (!list_empty(&sde->dmawait))
 			dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
-				sde->this_idx);
+				   sde->this_idx);
 		sdma_process_event(sde, sdma_event_e00_go_hw_down);
 
 		del_timer_sync(&sde->err_progress_check_timer);
@@ -1358,8 +1340,8 @@ retry:
 	use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) &&
 					(dd->flags & HFI1_HAS_SDMA_TIMEOUT);
 	hwhead = use_dmahead ?
-		(u16) le64_to_cpu(*sde->head_dma) :
-		(u16) read_sde_csr(sde, SD(HEAD));
+		(u16)le64_to_cpu(*sde->head_dma) :
+		(u16)read_sde_csr(sde, SD(HEAD));
 
 	if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
 		u16 cnt;
@@ -1385,9 +1367,9 @@ retry:
 
 		if (unlikely(!sane)) {
 			dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
-				sde->this_idx,
-				use_dmahead ? "dma" : "kreg",
-				hwhead, swhead, swtail, cnt);
+				   sde->this_idx,
+				   use_dmahead ? "dma" : "kreg",
+				   hwhead, swhead, swtail, cnt);
 			if (use_dmahead) {
 				/* try one more time, using csr */
 				use_dmahead = 0;
@@ -1464,7 +1446,7 @@ static void sdma_make_progress(struct sdma_engine *sde, u64 status)
 {
 	struct sdma_txreq *txp = NULL;
 	int progress = 0;
-	u16 hwhead, swhead, swtail;
+	u16 hwhead, swhead;
 	int idle_check_done = 0;
 
 	hwhead = sdma_gethead(sde);
@@ -1485,29 +1467,9 @@ retry:
 
 		/* if now past this txp's descs, do the callback */
 		if (txp && txp->next_descq_idx == swhead) {
-			int drained = 0;
-			/* protect against complete modifying */
-			struct iowait *wait = txp->wait;
-
 			/* remove from list */
 			sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
-			if (wait)
-				drained = atomic_dec_and_test(&wait->sdma_busy);
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-			trace_hfi1_sdma_out_sn(sde, txp->sn);
-			if (WARN_ON_ONCE(sde->head_sn != txp->sn))
-				dd_dev_err(sde->dd, "expected %llu got %llu\n",
-					sde->head_sn, txp->sn);
-			sde->head_sn++;
-#endif
-			sdma_txclean(sde->dd, txp);
-			if (txp->complete)
-				(*txp->complete)(
-					txp,
-					SDMA_TXREQ_S_OK,
-					drained);
-			if (wait && drained)
-				iowait_drain_wakeup(wait);
+			complete_tx(sde, txp, SDMA_TXREQ_S_OK);
 			/* see if there is another txp */
 			txp = get_txhead(sde);
 		}
@@ -1525,6 +1487,8 @@ retry:
 	 * of sdma_make_progress(..) which is ensured by idle_check_done flag
 	 */
 	if ((status & sde->idle_mask) && !idle_check_done) {
+		u16 swtail;
+
 		swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
 		if (swtail != hwhead) {
 			hwhead = (u16)read_sde_csr(sde, SD(HEAD));
@@ -1552,6 +1516,12 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
 	trace_hfi1_sdma_engine_interrupt(sde, status);
 	write_seqlock(&sde->head_lock);
 	sdma_set_desc_cnt(sde, sdma_desct_intr);
+	if (status & sde->idle_mask)
+		sde->idle_int_cnt++;
+	else if (status & sde->progress_mask)
+		sde->progress_int_cnt++;
+	else if (status & sde->int_mask)
+		sde->sdma_int_cnt++;
 	sdma_make_progress(sde, status);
 	write_sequnlock(&sde->head_lock);
 }
@@ -1577,10 +1547,10 @@ void sdma_engine_error(struct sdma_engine *sde, u64 status)
 		__sdma_process_event(sde, sdma_event_e60_hw_halted);
 	if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) {
 		dd_dev_err(sde->dd,
-			"SDMA (%u) engine error: 0x%llx state %s\n",
-			sde->this_idx,
-			(unsigned long long)status,
-			sdma_state_names[sde->state.current_state]);
+			   "SDMA (%u) engine error: 0x%llx state %s\n",
+			   sde->this_idx,
+			   (unsigned long long)status,
+			   sdma_state_names[sde->state.current_state]);
 		dump_sdma_state(sde);
 	}
 	write_sequnlock(&sde->head_lock);
@@ -1624,8 +1594,8 @@ static void sdma_sendctrl(struct sdma_engine *sde, unsigned op)
 
 	if (op & SDMA_SENDCTRL_OP_CLEANUP)
 		write_sde_csr(sde, SD(CTRL),
-			sde->p_senddmactrl |
-			SD(CTRL_SDMA_CLEANUP_SMASK));
+			      sde->p_senddmactrl |
+			      SD(CTRL_SDMA_CLEANUP_SMASK));
 	else
 		write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl);
 
@@ -1649,12 +1619,10 @@ static void sdma_setlengen(struct sdma_engine *sde)
 	 * generation counter.
 	 */
 	write_sde_csr(sde, SD(LEN_GEN),
-		(sde->descq_cnt/64) << SD(LEN_GEN_LENGTH_SHIFT)
-	);
+		      (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
 	write_sde_csr(sde, SD(LEN_GEN),
-		((sde->descq_cnt/64) << SD(LEN_GEN_LENGTH_SHIFT))
-		| (4ULL << SD(LEN_GEN_GENERATION_SHIFT))
-	);
+		      ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
+		      (4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
 }
 
 static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
@@ -1714,7 +1682,6 @@ static void set_sdma_integrity(struct sdma_engine *sde)
 	write_sde_csr(sde, SD(CHECK_ENABLE), reg);
 }
 
-
 static void init_sdma_regs(
 	struct sdma_engine *sde,
 	u32 credits,
@@ -1735,17 +1702,16 @@ static void init_sdma_regs(
 	write_sde_csr(sde, SD(DESC_CNT), 0);
 	write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys);
 	write_sde_csr(sde, SD(MEMORY),
-		((u64)credits <<
-			SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
-		((u64)(credits * sde->this_idx) <<
-			SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
+		      ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
+		      ((u64)(credits * sde->this_idx) <<
+		       SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
 	write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull);
 	set_sdma_integrity(sde);
 	opmask = OPCODE_CHECK_MASK_DISABLED;
 	opval = OPCODE_CHECK_VAL_DISABLED;
 	write_sde_csr(sde, SD(CHECK_OPCODE),
-		(opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
-		(opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
+		      (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
+		      (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
 }
 
 #ifdef CONFIG_SDMA_VERBOSITY
@@ -1824,12 +1790,9 @@ static void dump_sdma_state(struct sdma_engine *sde)
 	descq = sde->descq;
 
 	dd_dev_err(sde->dd,
-		"SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
-		sde->this_idx,
-		head,
-		tail,
-		cnt,
-		!list_empty(&sde->flushlist));
+		   "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
+		   sde->this_idx, head, tail, cnt,
+		   !list_empty(&sde->flushlist));
 
 	/* print info for each entry in the descriptor queue */
 	while (head != tail) {
@@ -1850,20 +1813,23 @@ static void dump_sdma_state(struct sdma_engine *sde)
 		len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
 			& SDMA_DESC0_BYTE_COUNT_MASK;
 		dd_dev_err(sde->dd,
-			"SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
-			 head, flags, addr, gen, len);
+			   "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
+			   head, flags, addr, gen, len);
 		dd_dev_err(sde->dd,
-			"\tdesc0:0x%016llx desc1 0x%016llx\n",
-			 desc[0], desc[1]);
+			   "\tdesc0:0x%016llx desc1 0x%016llx\n",
+			   desc[0], desc[1]);
 		if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
 			dd_dev_err(sde->dd,
-				"\taidx: %u amode: %u alen: %u\n",
-				(u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK)
-					>> SDMA_DESC1_HEADER_INDEX_SHIFT),
-				(u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK)
-					>> SDMA_DESC1_HEADER_MODE_SHIFT),
-				(u8)((desc[1] & SDMA_DESC1_HEADER_DWS_SMASK)
-					>> SDMA_DESC1_HEADER_DWS_SHIFT));
+				   "\taidx: %u amode: %u alen: %u\n",
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_INDEX_SMASK) >>
+					SDMA_DESC1_HEADER_INDEX_SHIFT),
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_MODE_SMASK) >>
+					SDMA_DESC1_HEADER_MODE_SHIFT),
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_DWS_SMASK) >>
+					SDMA_DESC1_HEADER_DWS_SHIFT));
 		head++;
 		head &= sde->sdma_mask;
 	}
@@ -1890,29 +1856,26 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
 	head = sde->descq_head & sde->sdma_mask;
 	tail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
 	seq_printf(s, SDE_FMT, sde->this_idx,
-		sde->cpu,
-		sdma_state_name(sde->state.current_state),
-		(unsigned long long)read_sde_csr(sde, SD(CTRL)),
-		(unsigned long long)read_sde_csr(sde, SD(STATUS)),
-		(unsigned long long)read_sde_csr(sde,
-			SD(ENG_ERR_STATUS)),
-		(unsigned long long)read_sde_csr(sde, SD(TAIL)),
-		tail,
-		(unsigned long long)read_sde_csr(sde, SD(HEAD)),
-		head,
-		(unsigned long long)le64_to_cpu(*sde->head_dma),
-		(unsigned long long)read_sde_csr(sde, SD(MEMORY)),
-		(unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
-		(unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
-		(unsigned long long)sde->last_status,
-		(unsigned long long)sde->ahg_bits,
-		sde->tx_tail,
-		sde->tx_head,
-		sde->descq_tail,
-		sde->descq_head,
+		   sde->cpu,
+		   sdma_state_name(sde->state.current_state),
+		   (unsigned long long)read_sde_csr(sde, SD(CTRL)),
+		   (unsigned long long)read_sde_csr(sde, SD(STATUS)),
+		   (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)),
+		   (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail,
+		   (unsigned long long)read_sde_csr(sde, SD(HEAD)), head,
+		   (unsigned long long)le64_to_cpu(*sde->head_dma),
+		   (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
+		   (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
+		   (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
+		   (unsigned long long)sde->last_status,
+		   (unsigned long long)sde->ahg_bits,
+		   sde->tx_tail,
+		   sde->tx_head,
+		   sde->descq_tail,
+		   sde->descq_head,
 		   !list_empty(&sde->flushlist),
-		sde->descq_full_count,
-		(unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
+		   sde->descq_full_count,
+		   (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
 
 	/* print info for each entry in the descriptor queue */
 	while (head != tail) {
@@ -1933,14 +1896,16 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
 		len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
 			& SDMA_DESC0_BYTE_COUNT_MASK;
 		seq_printf(s,
-			"\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
-			head, flags, addr, gen, len);
+			   "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
+			   head, flags, addr, gen, len);
 		if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
 			seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n",
-				(u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK)
-					>> SDMA_DESC1_HEADER_INDEX_SHIFT),
-				(u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK)
-					>> SDMA_DESC1_HEADER_MODE_SHIFT));
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_INDEX_SMASK) >>
+					SDMA_DESC1_HEADER_INDEX_SHIFT),
+				   (u8)((desc[1] &
+					 SDMA_DESC1_HEADER_MODE_SMASK) >>
+					SDMA_DESC1_HEADER_MODE_SHIFT));
 		head = (head + 1) & sde->sdma_mask;
 	}
 }
@@ -2041,8 +2006,9 @@ static int sdma_check_progress(
 		ret = wait->sleep(sde, wait, tx, seq);
 		if (ret == -EAGAIN)
 			sde->desc_avail = sdma_descq_freecnt(sde);
-	} else
+	} else {
 		ret = -EBUSY;
+	}
 	return ret;
 }
 
@@ -2080,14 +2046,14 @@ retry:
 		goto nodesc;
 	tail = submit_tx(sde, tx);
 	if (wait)
-		atomic_inc(&wait->sdma_busy);
+		iowait_sdma_inc(wait);
 	sdma_update_tail(sde, tail);
 unlock:
 	spin_unlock_irqrestore(&sde->tail_lock, flags);
 	return ret;
 unlock_noconn:
 	if (wait)
-		atomic_inc(&wait->sdma_busy);
+		iowait_sdma_inc(wait);
 	tx->next_descq_idx = 0;
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 	tx->sn = sde->tail_sn++;
@@ -2132,13 +2098,12 @@ nodesc:
  * side locking.
  *
  * Return:
- * 0 - Success, -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring
- * (wait == NULL)
+ * > 0 - Success (value is number of sdma_txreq's submitted),
+ * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
  * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
  */
-int sdma_send_txlist(struct sdma_engine *sde,
-		    struct iowait *wait,
-		    struct list_head *tx_list)
+int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
+		     struct list_head *tx_list)
 {
 	struct sdma_txreq *tx, *tx_next;
 	int ret = 0;
@@ -2169,18 +2134,18 @@ retry:
 	}
 update_tail:
 	if (wait)
-		atomic_add(count, &wait->sdma_busy);
+		iowait_sdma_add(wait, count);
 	if (tail != INVALID_TAIL)
 		sdma_update_tail(sde, tail);
 	spin_unlock_irqrestore(&sde->tail_lock, flags);
-	return ret;
+	return ret == 0 ? count : ret;
 unlock_noconn:
 	spin_lock(&sde->flushlist_lock);
 	list_for_each_entry_safe(tx, tx_next, tx_list, list) {
 		tx->wait = wait;
 		list_del_init(&tx->list);
 		if (wait)
-			atomic_inc(&wait->sdma_busy);
+			iowait_sdma_inc(wait);
 		tx->next_descq_idx = 0;
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 		tx->sn = sde->tail_sn++;
@@ -2206,8 +2171,7 @@ nodesc:
 	goto update_tail;
 }
 
-static void sdma_process_event(struct sdma_engine *sde,
-	enum sdma_events event)
+static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
 {
 	unsigned long flags;
 
@@ -2224,7 +2188,7 @@ static void sdma_process_event(struct sdma_engine *sde,
 }
 
 static void __sdma_process_event(struct sdma_engine *sde,
-	enum sdma_events event)
+				 enum sdma_events event)
 {
 	struct sdma_state *ss = &sde->state;
 	int need_progress = 0;
@@ -2247,14 +2211,15 @@ static void __sdma_process_event(struct sdma_engine *sde,
 			 * of link up, then we need to start up.
 			 * This can happen when hw down is requested while
 			 * bringing the link up with traffic active on
-			 * 7220, e.g. */
+			 * 7220, e.g.
+			 */
 			ss->go_s99_running = 1;
 			/* fall through and start dma engine */
 		case sdma_event_e10_go_hw_start:
 			/* This reference means the state machine is started */
 			sdma_get(&sde->state);
 			sdma_set_state(sde,
-				sdma_state_s10_hw_start_up_halt_wait);
+				       sdma_state_s10_hw_start_up_halt_wait);
 			break;
 		case sdma_event_e15_hw_halt_done:
 			break;
@@ -2292,7 +2257,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
 			break;
 		case sdma_event_e15_hw_halt_done:
 			sdma_set_state(sde,
-				sdma_state_s15_hw_start_up_clean_wait);
+				       sdma_state_s15_hw_start_up_clean_wait);
 			sdma_start_hw_clean_up(sde);
 			break;
 		case sdma_event_e25_hw_clean_up_done:
@@ -2767,7 +2732,7 @@ enomem:
  * This function calls _extend_sdma_tx_descs to extend or allocate
  * coalesce buffer. If there is a allocated coalesce buffer, it will
  * copy the input packet data into the coalesce buffer. It also adds
- * coalesce buffer descriptor once whe whole packet is received.
+ * coalesce buffer descriptor once when whole packet is received.
  *
  * Return:
  * <0 - error
@@ -3030,7 +2995,8 @@ void sdma_freeze(struct hfi1_devdata *dd)
 	 * continuing.
 	 */
 	ret = wait_event_interruptible(dd->sdma_unfreeze_wq,
-				atomic_read(&dd->sdma_unfreeze_count) <= 0);
+				       atomic_read(&dd->sdma_unfreeze_count) <=
+				       0);
 	/* interrupted or count is negative, then unloading - just exit */
 	if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0)
 		return;
@@ -3047,7 +3013,7 @@ void sdma_freeze(struct hfi1_devdata *dd)
 	 * software clean will read engine CSRs, so must be completed before
 	 * the next step, which will clear the engine CSRs.
 	 */
-	(void) wait_event_interruptible(dd->sdma_unfreeze_wq,
+	(void)wait_event_interruptible(dd->sdma_unfreeze_wq,
 				atomic_read(&dd->sdma_unfreeze_count) <= 0);
 	/* no need to check results - done no matter what */
 }
@@ -3067,7 +3033,7 @@ void sdma_unfreeze(struct hfi1_devdata *dd)
 	/* tell all engines start freeze clean up */
 	for (i = 0; i < dd->num_sdma; i++)
 		sdma_process_event(&dd->per_sdma[i],
-					sdma_event_e82_hw_unfreeze);
+				   sdma_event_e82_hw_unfreeze);
 }
 
 /**
@@ -3081,5 +3047,6 @@ void _sdma_engine_progress_schedule(
 	trace_hfi1_sdma_engine_progress(sde, sde->progress_mask);
 	/* assume we have selected a good cpu */
 	write_csr(sde->dd,
-		  CCE_INT_FORCE + (8*(IS_SDMA_START/64)), sde->progress_mask);
+		  CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)),
+		  sde->progress_mask);
 }
diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h
index da89e6458162..8f50c99fe711 100644
--- a/drivers/staging/rdma/hfi1/sdma.h
+++ b/drivers/staging/rdma/hfi1/sdma.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_SDMA_H
 #define _HFI1_SDMA_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -58,15 +55,13 @@
 
 #include "hfi.h"
 #include "verbs.h"
+#include "sdma_txreq.h"
 
-/* increased for AHG */
-#define NUM_DESC 6
 /* Hardware limit */
 #define MAX_DESC 64
 /* Hardware limit for SDMA packet size */
 #define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
 
-
 #define SDMA_TXREQ_S_OK        0
 #define SDMA_TXREQ_S_SENDERROR 1
 #define SDMA_TXREQ_S_ABORTED   2
@@ -109,8 +104,8 @@
 /*
  * Bits defined in the send DMA descriptor.
  */
-#define SDMA_DESC0_FIRST_DESC_FLAG      (1ULL << 63)
-#define SDMA_DESC0_LAST_DESC_FLAG       (1ULL << 62)
+#define SDMA_DESC0_FIRST_DESC_FLAG      BIT_ULL(63)
+#define SDMA_DESC0_LAST_DESC_FLAG       BIT_ULL(62)
 #define SDMA_DESC0_BYTE_COUNT_SHIFT     48
 #define SDMA_DESC0_BYTE_COUNT_WIDTH     14
 #define SDMA_DESC0_BYTE_COUNT_MASK \
@@ -154,8 +149,8 @@
 	((1ULL << SDMA_DESC1_GENERATION_WIDTH) - 1)
 #define SDMA_DESC1_GENERATION_SMASK \
 	(SDMA_DESC1_GENERATION_MASK << SDMA_DESC1_GENERATION_SHIFT)
-#define SDMA_DESC1_INT_REQ_FLAG         (1ULL << 1)
-#define SDMA_DESC1_HEAD_TO_HOST_FLAG    (1ULL << 0)
+#define SDMA_DESC1_INT_REQ_FLAG         BIT_ULL(1)
+#define SDMA_DESC1_HEAD_TO_HOST_FLAG    BIT_ULL(0)
 
 enum sdma_states {
 	sdma_state_s00_hw_down,
@@ -311,83 +306,6 @@ struct hw_sdma_desc {
 	__le64 qw[2];
 };
 
-/*
- * struct sdma_desc - canonical fragment descriptor
- *
- * This is the descriptor carried in the tx request
- * corresponding to each fragment.
- *
- */
-struct sdma_desc {
-	/* private:  don't use directly */
-	u64 qw[2];
-};
-
-struct sdma_txreq;
-typedef void (*callback_t)(struct sdma_txreq *, int, int);
-
-/**
- * struct sdma_txreq - the sdma_txreq structure (one per packet)
- * @list: for use by user and by queuing for wait
- *
- * This is the representation of a packet which consists of some
- * number of fragments.   Storage is provided to within the structure.
- * for all fragments.
- *
- * The storage for the descriptors are automatically extended as needed
- * when the currently allocation is exceeded.
- *
- * The user (Verbs or PSM) may overload this structure with fields
- * specific to their use by putting this struct first in their struct.
- * The method of allocation of the overloaded structure is user dependent
- *
- * The list is the only public field in the structure.
- *
- */
-
-struct sdma_txreq {
-	struct list_head list;
-	/* private: */
-	struct sdma_desc *descp;
-	/* private: */
-	void *coalesce_buf;
-	/* private: */
-	u16 coalesce_idx;
-	/* private: */
-	struct iowait *wait;
-	/* private: */
-	callback_t                  complete;
-#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
-	u64 sn;
-#endif
-	/* private: - used in coalesce/pad processing */
-	u16                         packet_len;
-	/* private: - down-counted to trigger last */
-	u16                         tlen;
-	/* private: flags */
-	u16                         flags;
-	/* private: */
-	u16                         num_desc;
-	/* private: */
-	u16                         desc_limit;
-	/* private: */
-	u16                         next_descq_idx;
-	/* private: */
-	struct sdma_desc descs[NUM_DESC];
-};
-
-struct verbs_txreq {
-	struct hfi1_pio_header	phdr;
-	struct sdma_txreq       txreq;
-	struct hfi1_qp           *qp;
-	struct hfi1_swqe         *wqe;
-	struct hfi1_mregion	*mr;
-	struct hfi1_sge_state    *ss;
-	struct sdma_engine     *sde;
-	u16                     hdr_dwords;
-	u16                     hdr_inx;
-};
-
 /**
  * struct sdma_engine - Data pertaining to each SDMA engine.
  * @dd: a back-pointer to the device data
@@ -409,6 +327,7 @@ struct sdma_engine {
 	u64 imask;			/* clear interrupt mask */
 	u64 idle_mask;
 	u64 progress_mask;
+	u64 int_mask;
 	/* private: */
 	volatile __le64      *head_dma; /* DMA'ed by chip */
 	/* private: */
@@ -465,6 +384,12 @@ struct sdma_engine {
 	u16                   tx_head;
 	/* private: */
 	u64                   last_status;
+	/* private */
+	u64                     err_cnt;
+	/* private */
+	u64                     sdma_int_cnt;
+	u64                     idle_int_cnt;
+	u64                     progress_int_cnt;
 
 	/* private: */
 	struct list_head      dmawait;
@@ -484,12 +409,12 @@ struct sdma_engine {
 	u32                   progress_check_head;
 	/* private: */
 	struct work_struct flush_worker;
+	/* protect flush list */
 	spinlock_t flushlist_lock;
 	/* private: */
 	struct list_head flushlist;
 };
 
-
 int sdma_init(struct hfi1_devdata *dd, u8 port);
 void sdma_start(struct hfi1_devdata *dd);
 void sdma_exit(struct hfi1_devdata *dd);
@@ -535,7 +460,6 @@ static inline int __sdma_running(struct sdma_engine *engine)
 	return engine->state.current_state == sdma_state_s99_running;
 }
 
-
 /**
  * sdma_running() - state suitability test
  * @engine: sdma engine
@@ -565,7 +489,6 @@ void _sdma_txreq_ahgadd(
 	u32 *ahg,
 	u8 ahg_hlen);
 
-
 /**
  * sdma_txinit_ahg() - initialize an sdma_txreq struct with AHG
  * @tx: tx request to initialize
@@ -626,7 +549,7 @@ static inline int sdma_txinit_ahg(
 	u8 num_ahg,
 	u32 *ahg,
 	u8 ahg_hlen,
-	void (*cb)(struct sdma_txreq *, int, int))
+	void (*cb)(struct sdma_txreq *, int))
 {
 	if (tlen == 0)
 		return -ENODATA;
@@ -640,7 +563,8 @@ static inline int sdma_txinit_ahg(
 	tx->complete = cb;
 	tx->coalesce_buf = NULL;
 	tx->wait = NULL;
-	tx->tlen = tx->packet_len = tlen;
+	tx->packet_len = tlen;
+	tx->tlen = tx->packet_len;
 	tx->descs[0].qw[0] = SDMA_DESC0_FIRST_DESC_FLAG;
 	tx->descs[0].qw[1] = 0;
 	if (flags & SDMA_TXREQ_F_AHG_COPY)
@@ -689,7 +613,7 @@ static inline int sdma_txinit(
 	struct sdma_txreq *tx,
 	u16 flags,
 	u16 tlen,
-	void (*cb)(struct sdma_txreq *, int, int))
+	void (*cb)(struct sdma_txreq *, int))
 {
 	return sdma_txinit_ahg(tx, flags, tlen, 0, 0, NULL, 0, cb);
 }
@@ -753,7 +677,7 @@ static inline void _sdma_close_tx(struct hfi1_devdata *dd,
 		dd->default_desc1;
 	if (tx->flags & SDMA_TXREQ_F_URGENT)
 		tx->descp[tx->num_desc].qw[1] |=
-			(SDMA_DESC1_HEAD_TO_HOST_FLAG|
+			(SDMA_DESC1_HEAD_TO_HOST_FLAG |
 			 SDMA_DESC1_INT_REQ_FLAG);
 }
 
@@ -1080,6 +1004,7 @@ struct sdma_map_elem {
 
 /**
  * struct sdma_map_el - mapping for a vl
+ * @engine_to_vl - map of an engine to a vl
  * @list - rcu head for free callback
  * @mask - vl mask to "mod" the vl to produce an index to map array
  * @actual_vls - number of vls
@@ -1091,6 +1016,7 @@ struct sdma_map_elem {
  * in turn point to an array of sde's for that vl.
  */
 struct sdma_vl_map {
+	s8 engine_to_vl[TXE_NUM_SDMA_ENGINES];
 	struct rcu_head list;
 	u32 mask;
 	u8 actual_vls;
diff --git a/drivers/staging/rdma/hfi1/sdma_txreq.h b/drivers/staging/rdma/hfi1/sdma_txreq.h
new file mode 100644
index 000000000000..bf7d777d756e
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/sdma_txreq.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef HFI1_SDMA_TXREQ_H
+#define HFI1_SDMA_TXREQ_H
+
+/* increased for AHG */
+#define NUM_DESC 6
+
+/*
+ * struct sdma_desc - canonical fragment descriptor
+ *
+ * This is the descriptor carried in the tx request
+ * corresponding to each fragment.
+ *
+ */
+struct sdma_desc {
+	/* private:  don't use directly */
+	u64 qw[2];
+};
+
+/**
+ * struct sdma_txreq - the sdma_txreq structure (one per packet)
+ * @list: for use by user and by queuing for wait
+ *
+ * This is the representation of a packet which consists of some
+ * number of fragments.   Storage is provided to within the structure.
+ * for all fragments.
+ *
+ * The storage for the descriptors are automatically extended as needed
+ * when the currently allocation is exceeded.
+ *
+ * The user (Verbs or PSM) may overload this structure with fields
+ * specific to their use by putting this struct first in their struct.
+ * The method of allocation of the overloaded structure is user dependent
+ *
+ * The list is the only public field in the structure.
+ *
+ */
+
+#define SDMA_TXREQ_S_OK        0
+#define SDMA_TXREQ_S_SENDERROR 1
+#define SDMA_TXREQ_S_ABORTED   2
+#define SDMA_TXREQ_S_SHUTDOWN  3
+
+/* flags bits */
+#define SDMA_TXREQ_F_URGENT       0x0001
+#define SDMA_TXREQ_F_AHG_COPY     0x0002
+#define SDMA_TXREQ_F_USE_AHG      0x0004
+
+struct sdma_txreq;
+typedef void (*callback_t)(struct sdma_txreq *, int);
+
+struct iowait;
+struct sdma_txreq {
+	struct list_head list;
+	/* private: */
+	struct sdma_desc *descp;
+	/* private: */
+	void *coalesce_buf;
+	/* private: */
+	struct iowait *wait;
+	/* private: */
+	callback_t                  complete;
+#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
+	u64 sn;
+#endif
+	/* private: - used in coalesce/pad processing */
+	u16                         packet_len;
+	/* private: - down-counted to trigger last */
+	u16                         tlen;
+	/* private: */
+	u16                         num_desc;
+	/* private: */
+	u16                         desc_limit;
+	/* private: */
+	u16                         next_descq_idx;
+	/* private: */
+	u16 coalesce_idx;
+	/* private: flags */
+	u16                         flags;
+	/* private: */
+	struct sdma_desc descs[NUM_DESC];
+};
+
+static inline int sdma_txreq_built(struct sdma_txreq *tx)
+{
+	return tx->num_desc;
+}
+
+#endif                          /* HFI1_SDMA_TXREQ_H */
diff --git a/drivers/staging/rdma/hfi1/sysfs.c b/drivers/staging/rdma/hfi1/sysfs.c
index 1dd6727dd5ef..c7f1271190af 100644
--- a/drivers/staging/rdma/hfi1/sysfs.c
+++ b/drivers/staging/rdma/hfi1/sysfs.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -53,7 +50,6 @@
 #include "mad.h"
 #include "trace.h"
 
-
 /*
  * Start of per-port congestion control structures and support code
  */
@@ -62,8 +58,8 @@
  * Congestion control table size followed by table entries
  */
 static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *bin_attr,
-		char *buf, loff_t pos, size_t count)
+				 struct bin_attribute *bin_attr,
+				 char *buf, loff_t pos, size_t count)
 {
 	int ret;
 	struct hfi1_pportdata *ppd =
@@ -84,7 +80,7 @@ static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
 
 	rcu_read_lock();
 	cc_state = get_cc_state(ppd);
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return -EINVAL;
 	}
@@ -99,10 +95,6 @@ static void port_release(struct kobject *kobj)
 	/* nothing to do since memory is freed by hfi1_free_devdata() */
 }
 
-static struct kobj_type port_cc_ktype = {
-	.release = port_release,
-};
-
 static struct bin_attribute cc_table_bin_attr = {
 	.attr = {.name = "cc_table_bin", .mode = 0444},
 	.read = read_cc_table_bin,
@@ -115,8 +107,8 @@ static struct bin_attribute cc_table_bin_attr = {
  * trigger threshold and the minimum injection rate delay.
  */
 static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *bin_attr,
-		char *buf, loff_t pos, size_t count)
+				   struct bin_attribute *bin_attr,
+				   char *buf, loff_t pos, size_t count)
 {
 	int ret;
 	struct hfi1_pportdata *ppd =
@@ -135,7 +127,7 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
 
 	rcu_read_lock();
 	cc_state = get_cc_state(ppd);
-	if (cc_state == NULL) {
+	if (!cc_state) {
 		rcu_read_unlock();
 		return -EINVAL;
 	}
@@ -151,6 +143,68 @@ static struct bin_attribute cc_setting_bin_attr = {
 	.size = PAGE_SIZE,
 };
 
+struct hfi1_port_attr {
+	struct attribute attr;
+	ssize_t	(*show)(struct hfi1_pportdata *, char *);
+	ssize_t	(*store)(struct hfi1_pportdata *, const char *, size_t);
+};
+
+static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf)
+{
+	return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off");
+}
+
+static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf,
+				size_t count)
+{
+	if (!memcmp(buf, "on", 2))
+		ppd->cc_prescan = true;
+	else if (!memcmp(buf, "off", 3))
+		ppd->cc_prescan = false;
+
+	return count;
+}
+
+static struct hfi1_port_attr cc_prescan_attr =
+		__ATTR(cc_prescan, 0600, cc_prescan_show, cc_prescan_store);
+
+static ssize_t cc_attr_show(struct kobject *kobj, struct attribute *attr,
+			    char *buf)
+{
+	struct hfi1_port_attr *port_attr =
+		container_of(attr, struct hfi1_port_attr, attr);
+	struct hfi1_pportdata *ppd =
+		container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
+
+	return port_attr->show(ppd, buf);
+}
+
+static ssize_t cc_attr_store(struct kobject *kobj, struct attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct hfi1_port_attr *port_attr =
+		container_of(attr, struct hfi1_port_attr, attr);
+	struct hfi1_pportdata *ppd =
+		container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
+
+	return port_attr->store(ppd, buf, count);
+}
+
+static const struct sysfs_ops port_cc_sysfs_ops = {
+	.show = cc_attr_show,
+	.store = cc_attr_store
+};
+
+static struct attribute *port_cc_default_attributes[] = {
+	&cc_prescan_attr.attr
+};
+
+static struct kobj_type port_cc_ktype = {
+	.release = port_release,
+	.sysfs_ops = &port_cc_sysfs_ops,
+	.default_attrs = port_cc_default_attributes
+};
+
 /* Start sc2vl */
 #define HFI1_SC2VL_ATTR(N)				    \
 	static struct hfi1_sc2vl_attr hfi1_sc2vl_attr_##N = { \
@@ -196,7 +250,6 @@ HFI1_SC2VL_ATTR(29);
 HFI1_SC2VL_ATTR(30);
 HFI1_SC2VL_ATTR(31);
 
-
 static struct attribute *sc2vl_default_attributes[] = {
 	&hfi1_sc2vl_attr_0.attr,
 	&hfi1_sc2vl_attr_1.attr,
@@ -302,7 +355,6 @@ HFI1_SL2SC_ATTR(29);
 HFI1_SL2SC_ATTR(30);
 HFI1_SL2SC_ATTR(31);
 
-
 static struct attribute *sl2sc_default_attributes[] = {
 	&hfi1_sl2sc_attr_0.attr,
 	&hfi1_sl2sc_attr_1.attr,
@@ -435,7 +487,6 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
 	.default_attrs = vl2mtu_default_attributes
 };
 
-
 /* end of per-port file structures and support code */
 
 /*
@@ -446,7 +497,7 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
 			char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 
 	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
 }
@@ -455,7 +506,7 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
 			char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -470,19 +521,18 @@ static ssize_t show_boardversion(struct device *device,
 				 struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/* The string printed here is already newline-terminated. */
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
 }
 
-
 static ssize_t show_nctxts(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/*
@@ -497,10 +547,10 @@ static ssize_t show_nctxts(struct device *device,
 }
 
 static ssize_t show_nfreectxts(struct device *device,
-			   struct device_attribute *attr, char *buf)
+			       struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/* Return the number of free user ports (contexts) available. */
@@ -511,11 +561,10 @@ static ssize_t show_serial(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
-
 }
 
 static ssize_t store_chip_reset(struct device *device,
@@ -523,7 +572,7 @@ static ssize_t store_chip_reset(struct device *device,
 				size_t count)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -552,7 +601,7 @@ static ssize_t show_tempsense(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, ibdev.dev);
+		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	struct hfi1_temp temp;
 	int ret;
@@ -608,8 +657,8 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
 
 	if (!port_num || port_num > dd->num_pports) {
 		dd_dev_err(dd,
-			"Skipping infiniband class with invalid port %u\n",
-			port_num);
+			   "Skipping infiniband class with invalid port %u\n",
+			   port_num);
 		return -ENODEV;
 	}
 	ppd = &dd->pport[port_num - 1];
@@ -644,39 +693,36 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
 	}
 	kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD);
 
-
 	ret = kobject_init_and_add(&ppd->pport_cc_kobj, &port_cc_ktype,
 				   kobj, "CCMgtA");
 	if (ret) {
 		dd_dev_err(dd,
-		 "Skipping Congestion Control sysfs info, (err %d) port %u\n",
-		 ret, port_num);
+			   "Skipping Congestion Control sysfs info, (err %d) port %u\n",
+			   ret, port_num);
 		goto bail_vl2mtu;
 	}
 
 	kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
 
-	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
-				&cc_setting_bin_attr);
+	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
 	if (ret) {
 		dd_dev_err(dd,
-		 "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
-		 ret, port_num);
+			   "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
+			   ret, port_num);
 		goto bail_cc;
 	}
 
-	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
-				&cc_table_bin_attr);
+	ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_table_bin_attr);
 	if (ret) {
 		dd_dev_err(dd,
-		 "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
-		 ret, port_num);
+			   "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
+			   ret, port_num);
 		goto bail_cc_entry_bin;
 	}
 
 	dd_dev_info(dd,
-		"IB%u: Congestion Control Agent enabled for port %d\n",
-		dd->unit, port_num);
+		    "IB%u: Congestion Control Agent enabled for port %d\n",
+		    dd->unit, port_num);
 
 	return 0;
 
@@ -700,7 +746,7 @@ bail:
  */
 int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
 {
-	struct ib_device *dev = &dd->verbs_dev.ibdev;
+	struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
 	int i, ret;
 
 	for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c
index 10122e84cb2f..8b62fefcf903 100644
--- a/drivers/staging/rdma/hfi1/trace.c
+++ b/drivers/staging/rdma/hfi1/trace.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -109,17 +106,17 @@ const char *parse_everbs_hdrs(
 	case OP(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE):
 	case OP(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE):
 		trace_seq_printf(p, IMM_PRN,
-			be32_to_cpu(eh->imm_data));
+				 be32_to_cpu(eh->imm_data));
 		break;
 	/* reth + imm */
 	case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 	case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 		trace_seq_printf(p, RETH_PRN " " IMM_PRN,
-			(unsigned long long)ib_u64_get(
-				(__be32 *)&eh->rc.reth.vaddr),
-			be32_to_cpu(eh->rc.reth.rkey),
-			be32_to_cpu(eh->rc.reth.length),
-			be32_to_cpu(eh->rc.imm_data));
+				 (unsigned long long)ib_u64_get(
+				 (__be32 *)&eh->rc.reth.vaddr),
+				 be32_to_cpu(eh->rc.reth.rkey),
+				 be32_to_cpu(eh->rc.reth.length),
+				 be32_to_cpu(eh->rc.imm_data));
 		break;
 	/* reth */
 	case OP(RC, RDMA_READ_REQUEST):
@@ -128,10 +125,10 @@ const char *parse_everbs_hdrs(
 	case OP(RC, RDMA_WRITE_ONLY):
 	case OP(UC, RDMA_WRITE_ONLY):
 		trace_seq_printf(p, RETH_PRN,
-			(unsigned long long)ib_u64_get(
-				(__be32 *)&eh->rc.reth.vaddr),
-			be32_to_cpu(eh->rc.reth.rkey),
-			be32_to_cpu(eh->rc.reth.length));
+				 (unsigned long long)ib_u64_get(
+				 (__be32 *)&eh->rc.reth.vaddr),
+				 be32_to_cpu(eh->rc.reth.rkey),
+				 be32_to_cpu(eh->rc.reth.length));
 		break;
 	case OP(RC, RDMA_READ_RESPONSE_FIRST):
 	case OP(RC, RDMA_READ_RESPONSE_LAST):
@@ -154,19 +151,20 @@ const char *parse_everbs_hdrs(
 	case OP(RC, COMPARE_SWAP):
 	case OP(RC, FETCH_ADD):
 		trace_seq_printf(p, ATOMICETH_PRN,
-			(unsigned long long)ib_u64_get(eh->atomic_eth.vaddr),
-			eh->atomic_eth.rkey,
-			(unsigned long long)ib_u64_get(
-				(__be32 *)&eh->atomic_eth.swap_data),
-			(unsigned long long) ib_u64_get(
+				 (unsigned long long)ib_u64_get(
+				 eh->atomic_eth.vaddr),
+				 eh->atomic_eth.rkey,
+				 (unsigned long long)ib_u64_get(
+				 (__be32 *)&eh->atomic_eth.swap_data),
+				 (unsigned long long)ib_u64_get(
 				 (__be32 *)&eh->atomic_eth.compare_data));
 		break;
 	/* deth */
 	case OP(UD, SEND_ONLY):
 	case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
 		trace_seq_printf(p, DETH_PRN,
-			be32_to_cpu(eh->ud.deth[0]),
-			be32_to_cpu(eh->ud.deth[1]) & HFI1_QPN_MASK);
+				 be32_to_cpu(eh->ud.deth[0]),
+				 be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK);
 		break;
 	}
 	trace_seq_putc(p, 0);
@@ -187,12 +185,12 @@ const char *parse_sdma_flags(
 	trace_seq_printf(p, "%s", flags);
 	if (desc0 & SDMA_DESC0_FIRST_DESC_FLAG)
 		trace_seq_printf(p, " amode:%u aidx:%u alen:%u",
-			(u8)((desc1 >> SDMA_DESC1_HEADER_MODE_SHIFT)
-				& SDMA_DESC1_HEADER_MODE_MASK),
-			(u8)((desc1 >> SDMA_DESC1_HEADER_INDEX_SHIFT)
-				& SDMA_DESC1_HEADER_INDEX_MASK),
-			(u8)((desc1 >> SDMA_DESC1_HEADER_DWS_SHIFT)
-				& SDMA_DESC1_HEADER_DWS_MASK));
+				 (u8)((desc1 >> SDMA_DESC1_HEADER_MODE_SHIFT) &
+				      SDMA_DESC1_HEADER_MODE_MASK),
+				 (u8)((desc1 >> SDMA_DESC1_HEADER_INDEX_SHIFT) &
+				      SDMA_DESC1_HEADER_INDEX_MASK),
+				 (u8)((desc1 >> SDMA_DESC1_HEADER_DWS_SHIFT) &
+				      SDMA_DESC1_HEADER_DWS_MASK));
 	return ret;
 }
 
@@ -234,3 +232,4 @@ __hfi1_trace_fn(DC8051);
 __hfi1_trace_fn(FIRMWARE);
 __hfi1_trace_fn(RCVCTRL);
 __hfi1_trace_fn(TID);
+__hfi1_trace_fn(MMU);
diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h
index 86c12ebfd4f0..963dc948c38a 100644
--- a/drivers/staging/rdma/hfi1/trace.h
+++ b/drivers/staging/rdma/hfi1/trace.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -76,304 +73,295 @@ __print_symbolic(etype,                         \
 #define TRACE_SYSTEM hfi1_rx
 
 TRACE_EVENT(hfi1_rcvhdr,
-	TP_PROTO(struct hfi1_devdata *dd,
-		 u64 eflags,
-		 u32 ctxt,
-		 u32 etype,
-		 u32 hlen,
-		 u32 tlen,
-		 u32 updegr,
-		 u32 etail),
-	TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__field(u64, eflags)
-		__field(u32, ctxt)
-		__field(u32, etype)
-		__field(u32, hlen)
-		__field(u32, tlen)
-		__field(u32, updegr)
-		__field(u32, etail)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		__entry->eflags = eflags;
-		__entry->ctxt = ctxt;
-		__entry->etype = etype;
-		__entry->hlen = hlen;
-		__entry->tlen = tlen;
-		__entry->updegr = updegr;
-		__entry->etail = etail;
-	),
-	TP_printk(
-"[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
-		__get_str(dev),
-		__entry->ctxt,
-		__entry->eflags,
-		__entry->etype, show_packettype(__entry->etype),
-		__entry->hlen,
-		__entry->tlen,
-		__entry->updegr,
-		__entry->etail
-	)
+	    TP_PROTO(struct hfi1_devdata *dd,
+		     u64 eflags,
+		     u32 ctxt,
+		     u32 etype,
+		     u32 hlen,
+		     u32 tlen,
+		     u32 updegr,
+		     u32 etail
+		     ),
+	    TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(u64, eflags)
+			     __field(u32, ctxt)
+			     __field(u32, etype)
+			     __field(u32, hlen)
+			     __field(u32, tlen)
+			     __field(u32, updegr)
+			     __field(u32, etail)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->eflags = eflags;
+			   __entry->ctxt = ctxt;
+			   __entry->etype = etype;
+			   __entry->hlen = hlen;
+			   __entry->tlen = tlen;
+			   __entry->updegr = updegr;
+			   __entry->etail = etail;
+			   ),
+	    TP_printk(
+		      "[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->eflags,
+		      __entry->etype, show_packettype(__entry->etype),
+		      __entry->hlen,
+		      __entry->tlen,
+		      __entry->updegr,
+		      __entry->etail
+		      )
 );
 
 TRACE_EVENT(hfi1_receive_interrupt,
-	TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
-	TP_ARGS(dd, ctxt),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__field(u32, ctxt)
-		__field(u8, slow_path)
-		__field(u8, dma_rtail)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		__entry->ctxt = ctxt;
-		if (dd->rcd[ctxt]->do_interrupt ==
-		    &handle_receive_interrupt) {
-			__entry->slow_path = 1;
-			__entry->dma_rtail = 0xFF;
-		} else if (dd->rcd[ctxt]->do_interrupt ==
-			&handle_receive_interrupt_dma_rtail){
-			__entry->dma_rtail = 1;
-			__entry->slow_path = 0;
-		} else if (dd->rcd[ctxt]->do_interrupt ==
-			 &handle_receive_interrupt_nodma_rtail) {
-			__entry->dma_rtail = 0;
-			__entry->slow_path = 0;
-		}
-	),
-	TP_printk(
-		"[%s] ctxt %d SlowPath: %d DmaRtail: %d",
-		__get_str(dev),
-		__entry->ctxt,
-		__entry->slow_path,
-		__entry->dma_rtail
-	)
+	    TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
+	    TP_ARGS(dd, ctxt),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(u32, ctxt)
+			     __field(u8, slow_path)
+			     __field(u8, dma_rtail)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->ctxt = ctxt;
+			   if (dd->rcd[ctxt]->do_interrupt ==
+			       &handle_receive_interrupt) {
+				__entry->slow_path = 1;
+				__entry->dma_rtail = 0xFF;
+			   } else if (dd->rcd[ctxt]->do_interrupt ==
+				      &handle_receive_interrupt_dma_rtail){
+				__entry->dma_rtail = 1;
+				__entry->slow_path = 0;
+			   } else if (dd->rcd[ctxt]->do_interrupt ==
+				      &handle_receive_interrupt_nodma_rtail) {
+				__entry->dma_rtail = 0;
+				__entry->slow_path = 0;
+			   }
+			   ),
+	    TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->slow_path,
+		      __entry->dma_rtail
+		      )
 );
 
-const char *print_u64_array(struct trace_seq *, u64 *, int);
+TRACE_EVENT(hfi1_exp_tid_reg,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr,
+		     u32 npages, unsigned long va, unsigned long pa,
+		     dma_addr_t dma),
+	    TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
+	    TP_STRUCT__entry(
+		    __field(unsigned, ctxt)
+		    __field(u16, subctxt)
+		    __field(u32, rarr)
+		    __field(u32, npages)
+		    __field(unsigned long, va)
+		    __field(unsigned long, pa)
+		    __field(dma_addr_t, dma)
+		    ),
+	    TP_fast_assign(
+		    __entry->ctxt = ctxt;
+		    __entry->subctxt = subctxt;
+		    __entry->rarr = rarr;
+		    __entry->npages = npages;
+		    __entry->va = va;
+		    __entry->pa = pa;
+		    __entry->dma = dma;
+		    ),
+	    TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
+		      __entry->ctxt,
+		      __entry->subctxt,
+		      __entry->rarr,
+		      __entry->npages,
+		      __entry->pa,
+		      __entry->va,
+		      __entry->dma
+		    )
+	);
 
-TRACE_EVENT(hfi1_exp_tid_map,
-	    TP_PROTO(unsigned ctxt, u16 subctxt, int dir,
-		     unsigned long *maps, u16 count),
-	    TP_ARGS(ctxt, subctxt, dir, maps, count),
+TRACE_EVENT(hfi1_exp_tid_unreg,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 rarr, u32 npages,
+		     unsigned long va, unsigned long pa, dma_addr_t dma),
+	    TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
 	    TP_STRUCT__entry(
 		    __field(unsigned, ctxt)
 		    __field(u16, subctxt)
-		    __field(int, dir)
-		    __field(u16, count)
-		    __dynamic_array(unsigned long, maps, sizeof(*maps) * count)
+		    __field(u32, rarr)
+		    __field(u32, npages)
+		    __field(unsigned long, va)
+		    __field(unsigned long, pa)
+		    __field(dma_addr_t, dma)
 		    ),
 	    TP_fast_assign(
 		    __entry->ctxt = ctxt;
 		    __entry->subctxt = subctxt;
-		    __entry->dir = dir;
-		    __entry->count = count;
-		    memcpy(__get_dynamic_array(maps), maps,
-			   sizeof(*maps) * count);
+		    __entry->rarr = rarr;
+		    __entry->npages = npages;
+		    __entry->va = va;
+		    __entry->pa = pa;
+		    __entry->dma = dma;
 		    ),
-	    TP_printk("[%3u:%02u] %s tidmaps %s",
+	    TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
 		      __entry->ctxt,
 		      __entry->subctxt,
-		      (__entry->dir ? ">" : "<"),
-		      print_u64_array(p, __get_dynamic_array(maps),
-				      __entry->count)
+		      __entry->rarr,
+		      __entry->npages,
+		      __entry->pa,
+		      __entry->va,
+		      __entry->dma
 		    )
 	);
 
-TRACE_EVENT(hfi1_exp_rcv_set,
-	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid,
-		     unsigned long vaddr, u64 phys_addr, void *page),
-	    TP_ARGS(ctxt, subctxt, tid, vaddr, phys_addr, page),
+TRACE_EVENT(hfi1_exp_tid_inval,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, unsigned long va, u32 rarr,
+		     u32 npages, dma_addr_t dma),
+	    TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
 	    TP_STRUCT__entry(
 		    __field(unsigned, ctxt)
 		    __field(u16, subctxt)
-		    __field(u32, tid)
-		    __field(unsigned long, vaddr)
-		    __field(u64, phys_addr)
-		    __field(void *, page)
+		    __field(unsigned long, va)
+		    __field(u32, rarr)
+		    __field(u32, npages)
+		    __field(dma_addr_t, dma)
 		    ),
 	    TP_fast_assign(
 		    __entry->ctxt = ctxt;
 		    __entry->subctxt = subctxt;
-		    __entry->tid = tid;
-		    __entry->vaddr = vaddr;
-		    __entry->phys_addr = phys_addr;
-		    __entry->page = page;
+		    __entry->va = va;
+		    __entry->rarr = rarr;
+		    __entry->npages = npages;
+		    __entry->dma = dma;
 		    ),
-	    TP_printk("[%u:%u] TID %u, vaddrs 0x%lx, physaddr 0x%llx, pgp %p",
+	    TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
 		      __entry->ctxt,
 		      __entry->subctxt,
-		      __entry->tid,
-		      __entry->vaddr,
-		      __entry->phys_addr,
-		      __entry->page
+		      __entry->rarr,
+		      __entry->npages,
+		      __entry->va,
+		      __entry->dma
 		    )
 	);
 
-TRACE_EVENT(hfi1_exp_rcv_free,
-	    TP_PROTO(unsigned ctxt, u16 subctxt, u32 tid,
-		     unsigned long phys, void *page),
-	    TP_ARGS(ctxt, subctxt, tid, phys, page),
+TRACE_EVENT(hfi1_mmu_invalidate,
+	    TP_PROTO(unsigned ctxt, u16 subctxt, const char *type,
+		     unsigned long start, unsigned long end),
+	    TP_ARGS(ctxt, subctxt, type, start, end),
 	    TP_STRUCT__entry(
 		    __field(unsigned, ctxt)
 		    __field(u16, subctxt)
-		    __field(u32, tid)
-		    __field(unsigned long, phys)
-		    __field(void *, page)
+		    __string(type, type)
+		    __field(unsigned long, start)
+		    __field(unsigned long, end)
 		    ),
 	    TP_fast_assign(
 		    __entry->ctxt = ctxt;
 		    __entry->subctxt = subctxt;
-		    __entry->tid = tid;
-		    __entry->phys = phys;
-		    __entry->page = page;
+		    __assign_str(type, type);
+		    __entry->start = start;
+		    __entry->end = end;
 		    ),
-	    TP_printk("[%u:%u] freeing TID %u, 0x%lx, pgp %p",
+	    TP_printk("[%3u:%02u] MMU Invalidate (%s) 0x%lx - 0x%lx",
 		      __entry->ctxt,
 		      __entry->subctxt,
-		      __entry->tid,
-		      __entry->phys,
-		      __entry->page
+		      __get_str(type),
+		      __entry->start,
+		      __entry->end
 		    )
 	);
+
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_tx
 
 TRACE_EVENT(hfi1_piofree,
-	TP_PROTO(struct send_context *sc, int extra),
-	TP_ARGS(sc, extra),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sc->dd)
-		__field(u32, sw_index)
-		__field(u32, hw_context)
-		__field(int, extra)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sc->dd);
-		__entry->sw_index = sc->sw_index;
-		__entry->hw_context = sc->hw_context;
-		__entry->extra = extra;
-	),
-	TP_printk(
-		"[%s] ctxt %u(%u) extra %d",
-		__get_str(dev),
-		__entry->sw_index,
-		__entry->hw_context,
-		__entry->extra
-	)
+	    TP_PROTO(struct send_context *sc, int extra),
+	    TP_ARGS(sc, extra),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+			     __field(u32, sw_index)
+			     __field(u32, hw_context)
+			     __field(int, extra)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+			   __entry->sw_index = sc->sw_index;
+			   __entry->hw_context = sc->hw_context;
+			   __entry->extra = extra;
+			   ),
+	    TP_printk("[%s] ctxt %u(%u) extra %d",
+		      __get_str(dev),
+		      __entry->sw_index,
+		      __entry->hw_context,
+		      __entry->extra
+		      )
 );
 
 TRACE_EVENT(hfi1_wantpiointr,
-	TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
-	TP_ARGS(sc, needint, credit_ctrl),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sc->dd)
-		__field(u32, sw_index)
-		__field(u32, hw_context)
-		__field(u32, needint)
-		__field(u64, credit_ctrl)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sc->dd);
-		__entry->sw_index = sc->sw_index;
-		__entry->hw_context = sc->hw_context;
-		__entry->needint = needint;
-		__entry->credit_ctrl = credit_ctrl;
-	),
-	TP_printk(
-		"[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
-		__get_str(dev),
-		__entry->sw_index,
-		__entry->hw_context,
-		__entry->needint,
-		(unsigned long long)__entry->credit_ctrl
-	)
+	    TP_PROTO(struct send_context *sc, u32 needint, u64 credit_ctrl),
+	    TP_ARGS(sc, needint, credit_ctrl),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sc->dd)
+			     __field(u32, sw_index)
+			     __field(u32, hw_context)
+			     __field(u32, needint)
+			     __field(u64, credit_ctrl)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(sc->dd);
+			   __entry->sw_index = sc->sw_index;
+			   __entry->hw_context = sc->hw_context;
+			   __entry->needint = needint;
+			   __entry->credit_ctrl = credit_ctrl;
+			   ),
+	    TP_printk("[%s] ctxt %u(%u) on %d credit_ctrl 0x%llx",
+		      __get_str(dev),
+		      __entry->sw_index,
+		      __entry->hw_context,
+		      __entry->needint,
+		      (unsigned long long)__entry->credit_ctrl
+		       )
 );
 
 DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
-	TP_PROTO(struct hfi1_qp *qp, u32 flags),
-	TP_ARGS(qp, flags),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
-		__field(u32, qpn)
-		__field(u32, flags)
-		__field(u32, s_flags)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
-		__entry->flags = flags;
-		__entry->qpn = qp->ibqp.qp_num;
-		__entry->s_flags = qp->s_flags;
-	),
-	TP_printk(
-		"[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
-		__get_str(dev),
-		__entry->qpn,
-		__entry->flags,
-		__entry->s_flags
-	)
+		    TP_PROTO(struct rvt_qp *qp, u32 flags),
+		    TP_ARGS(qp, flags),
+		    TP_STRUCT__entry(
+			    DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+			    __field(u32, qpn)
+			    __field(u32, flags)
+			    __field(u32, s_flags)
+			    ),
+		    TP_fast_assign(
+			    DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+			    __entry->flags = flags;
+			    __entry->qpn = qp->ibqp.qp_num;
+			    __entry->s_flags = qp->s_flags;
+			    ),
+		    TP_printk(
+			    "[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
+			    __get_str(dev),
+			    __entry->qpn,
+			    __entry->flags,
+			    __entry->s_flags
+			    )
 );
 
 DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpwakeup,
-	     TP_PROTO(struct hfi1_qp *qp, u32 flags),
+	     TP_PROTO(struct rvt_qp *qp, u32 flags),
 	     TP_ARGS(qp, flags));
 
 DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep,
-	     TP_PROTO(struct hfi1_qp *qp, u32 flags),
+	     TP_PROTO(struct rvt_qp *qp, u32 flags),
 	     TP_ARGS(qp, flags));
 
 #undef TRACE_SYSTEM
-#define TRACE_SYSTEM hfi1_qphash
-DECLARE_EVENT_CLASS(hfi1_qphash_template,
-	TP_PROTO(struct hfi1_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
-		__field(u32, qpn)
-		__field(u32, bucket)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
-		__entry->qpn = qp->ibqp.qp_num;
-		__entry->bucket = bucket;
-	),
-	TP_printk(
-		"[%s] qpn 0x%x bucket %u",
-		__get_str(dev),
-		__entry->qpn,
-		__entry->bucket
-	)
-);
-
-DEFINE_EVENT(hfi1_qphash_template, hfi1_qpinsert,
-	TP_PROTO(struct hfi1_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket));
-
-DEFINE_EVENT(hfi1_qphash_template, hfi1_qpremove,
-	TP_PROTO(struct hfi1_qp *qp, u32 bucket),
-	TP_ARGS(qp, bucket));
-
-#undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_ibhdrs
 
 u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
-const char *parse_everbs_hdrs(
-	struct trace_seq *p,
-	u8 opcode,
-	void *ehdrs);
+const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
 
 #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
 
-const char *parse_sdma_flags(
-	struct trace_seq *p,
-	u64 desc0, u64 desc1);
+const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1);
 
 #define __parse_sdma_flags(desc0, desc1) parse_sdma_flags(p, desc0, desc1)
 
-
 #define lrh_name(lrh) { HFI1_##lrh, #lrh }
 #define show_lnh(lrh)                    \
 __print_symbolic(lrh,                    \
@@ -420,7 +408,6 @@ __print_symbolic(opcode,                                   \
 	ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE),       \
 	ib_opcode_name(CNP))
 
-
 #define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
 #define BTH_PRN \
 	"op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
@@ -428,124 +415,130 @@ __print_symbolic(opcode,                                   \
 #define EHDR_PRN "%s"
 
 DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
-	TP_PROTO(struct hfi1_devdata *dd,
-		 struct hfi1_ib_header *hdr),
-	TP_ARGS(dd, hdr),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		/* LRH */
-		__field(u8, vl)
-		__field(u8, lver)
-		__field(u8, sl)
-		__field(u8, lnh)
-		__field(u16, dlid)
-		__field(u16, len)
-		__field(u16, slid)
-		/* BTH */
-		__field(u8, opcode)
-		__field(u8, se)
-		__field(u8, m)
-		__field(u8, pad)
-		__field(u8, tver)
-		__field(u16, pkey)
-		__field(u8, f)
-		__field(u8, b)
-		__field(u32, qpn)
-		__field(u8, a)
-		__field(u32, psn)
-		/* extended headers */
-		__dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
-	),
-	TP_fast_assign(
-		struct hfi1_other_headers *ohdr;
-
-		DD_DEV_ASSIGN(dd);
-		/* LRH */
-		__entry->vl =
-			(u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
-		__entry->lver =
-			(u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
-		__entry->sl =
-			(u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
-		__entry->lnh =
-			(u8)(be16_to_cpu(hdr->lrh[0]) & 3);
-		__entry->dlid =
-			be16_to_cpu(hdr->lrh[1]);
-		/* allow for larger len */
-		__entry->len =
-			be16_to_cpu(hdr->lrh[2]);
-		__entry->slid =
-			be16_to_cpu(hdr->lrh[3]);
-		/* BTH */
-		if (__entry->lnh == HFI1_LRH_BTH)
-			ohdr = &hdr->u.oth;
-		else
-			ohdr = &hdr->u.l.oth;
-		__entry->opcode =
-			(be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
-		__entry->se =
-			(be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
-		__entry->m =
-			 (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
-		__entry->pad =
-			(be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
-		__entry->tver =
-			(be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
-		__entry->pkey =
-			be32_to_cpu(ohdr->bth[0]) & 0xffff;
-		__entry->f =
-			(be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT)
-			& HFI1_FECN_MASK;
-		__entry->b =
-			(be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT)
-			& HFI1_BECN_MASK;
-		__entry->qpn =
-			be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
-		__entry->a =
-			(be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
-		/* allow for larger PSN */
-		__entry->psn =
-			be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
-		/* extended headers */
-		 memcpy(
-			__get_dynamic_array(ehdrs),
-			&ohdr->u,
-			ibhdr_exhdr_len(hdr));
-	),
-	TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
-		__get_str(dev),
-		/* LRH */
-		__entry->vl,
-		__entry->lver,
-		__entry->sl,
-		__entry->lnh, show_lnh(__entry->lnh),
-		__entry->dlid,
-		__entry->len,
-		__entry->slid,
-		/* BTH */
-		__entry->opcode, show_ib_opcode(__entry->opcode),
-		__entry->se,
-		__entry->m,
-		__entry->pad,
-		__entry->tver,
-		__entry->pkey,
-		__entry->f,
-		__entry->b,
-		__entry->qpn,
-		__entry->a,
-		__entry->psn,
-		/* extended headers */
-		__parse_ib_ehdrs(
-			__entry->opcode,
-			(void *)__get_dynamic_array(ehdrs))
-	)
+		    TP_PROTO(struct hfi1_devdata *dd,
+			     struct hfi1_ib_header *hdr),
+		    TP_ARGS(dd, hdr),
+		    TP_STRUCT__entry(
+			    DD_DEV_ENTRY(dd)
+			    /* LRH */
+			    __field(u8, vl)
+			    __field(u8, lver)
+			    __field(u8, sl)
+			    __field(u8, lnh)
+			    __field(u16, dlid)
+			    __field(u16, len)
+			    __field(u16, slid)
+			    /* BTH */
+			    __field(u8, opcode)
+			    __field(u8, se)
+			    __field(u8, m)
+			    __field(u8, pad)
+			    __field(u8, tver)
+			    __field(u16, pkey)
+			    __field(u8, f)
+			    __field(u8, b)
+			    __field(u32, qpn)
+			    __field(u8, a)
+			    __field(u32, psn)
+			    /* extended headers */
+			    __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
+			    ),
+		    TP_fast_assign(
+			   struct hfi1_other_headers *ohdr;
+
+			   DD_DEV_ASSIGN(dd);
+			   /* LRH */
+			   __entry->vl =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
+			   __entry->lver =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
+			   __entry->sl =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
+			   __entry->lnh =
+			   (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
+			   __entry->dlid =
+			   be16_to_cpu(hdr->lrh[1]);
+			   /* allow for larger len */
+			   __entry->len =
+			   be16_to_cpu(hdr->lrh[2]);
+			   __entry->slid =
+			   be16_to_cpu(hdr->lrh[3]);
+			   /* BTH */
+			   if (__entry->lnh == HFI1_LRH_BTH)
+				ohdr = &hdr->u.oth;
+			   else
+				ohdr = &hdr->u.l.oth;
+			  __entry->opcode =
+			  (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+			  __entry->se =
+			  (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
+			  __entry->m =
+			  (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
+			  __entry->pad =
+			  (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+			  __entry->tver =
+			  (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
+			  __entry->pkey =
+			  be32_to_cpu(ohdr->bth[0]) & 0xffff;
+			  __entry->f =
+			  (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
+			  HFI1_FECN_MASK;
+			  __entry->b =
+			  (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
+			  HFI1_BECN_MASK;
+			  __entry->qpn =
+			  be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
+			  __entry->a =
+			  (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
+			  /* allow for larger PSN */
+			  __entry->psn =
+			  be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
+			  /* extended headers */
+			  memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
+				 ibhdr_exhdr_len(hdr));
+			 ),
+		    TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
+			      __get_str(dev),
+			      /* LRH */
+			      __entry->vl,
+			      __entry->lver,
+			      __entry->sl,
+			      __entry->lnh, show_lnh(__entry->lnh),
+			      __entry->dlid,
+			      __entry->len,
+			      __entry->slid,
+			      /* BTH */
+			      __entry->opcode, show_ib_opcode(__entry->opcode),
+			      __entry->se,
+			      __entry->m,
+			      __entry->pad,
+			      __entry->tver,
+			      __entry->pkey,
+			      __entry->f,
+			      __entry->b,
+			      __entry->qpn,
+			      __entry->a,
+			      __entry->psn,
+			      /* extended headers */
+			      __parse_ib_ehdrs(
+					__entry->opcode,
+					(void *)__get_dynamic_array(ehdrs))
+			     )
 );
 
 DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
 	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
 	     TP_ARGS(dd, hdr));
 
-DEFINE_EVENT(hfi1_ibhdr_template, output_ibhdr,
+DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
+	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+	     TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
+	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+	     TP_ARGS(dd, hdr));
+
+DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
 	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
 	     TP_ARGS(dd, hdr));
 
@@ -556,15 +549,14 @@ DEFINE_EVENT(hfi1_ibhdr_template, output_ibhdr,
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_snoop
 
-
 TRACE_EVENT(snoop_capture,
-	TP_PROTO(struct hfi1_devdata *dd,
-		 int hdr_len,
-		 struct hfi1_ib_header *hdr,
-		 int data_len,
-		 void *data),
-	TP_ARGS(dd, hdr_len, hdr, data_len, data),
-	TP_STRUCT__entry(
+	    TP_PROTO(struct hfi1_devdata *dd,
+		     int hdr_len,
+		     struct hfi1_ib_header *hdr,
+		     int data_len,
+		     void *data),
+	    TP_ARGS(dd, hdr_len, hdr, data_len, data),
+	    TP_STRUCT__entry(
 		DD_DEV_ENTRY(dd)
 		__field(u16, slid)
 		__field(u16, dlid)
@@ -577,8 +569,8 @@ TRACE_EVENT(snoop_capture,
 		__field(u8, lnh)
 		__dynamic_array(u8, raw_hdr, hdr_len)
 		__dynamic_array(u8, raw_pkt, data_len)
-	),
-	TP_fast_assign(
+		),
+	    TP_fast_assign(
 		struct hfi1_other_headers *ohdr;
 
 		__entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
@@ -589,7 +581,7 @@ TRACE_EVENT(snoop_capture,
 		DD_DEV_ASSIGN(dd);
 		__entry->slid = be16_to_cpu(hdr->lrh[3]);
 		__entry->dlid = be16_to_cpu(hdr->lrh[1]);
-		__entry->qpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
+		__entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 		__entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
 		__entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
 		__entry->pkey =	be32_to_cpu(ohdr->bth[0]) & 0xffff;
@@ -597,8 +589,9 @@ TRACE_EVENT(snoop_capture,
 		__entry->data_len = data_len;
 		memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
 		memcpy(__get_dynamic_array(raw_pkt), data, data_len);
-	),
-	TP_printk("[%s] " SNOOP_PRN,
+		),
+	    TP_printk(
+		"[%s] " SNOOP_PRN,
 		__get_str(dev),
 		__entry->slid,
 		__entry->dlid,
@@ -609,7 +602,7 @@ TRACE_EVENT(snoop_capture,
 		__entry->pkey,
 		__entry->hdr_len,
 		__entry->data_len
-	)
+		)
 );
 
 #undef TRACE_SYSTEM
@@ -621,41 +614,39 @@ TRACE_EVENT(snoop_capture,
 TRACE_EVENT(hfi1_uctxtdata,
 	    TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
 	    TP_ARGS(dd, uctxt),
-	    TP_STRUCT__entry(
-		    DD_DEV_ENTRY(dd)
-		    __field(unsigned, ctxt)
-		    __field(u32, credits)
-		    __field(u64, hw_free)
-		    __field(u64, piobase)
-		    __field(u16, rcvhdrq_cnt)
-		    __field(u64, rcvhdrq_phys)
-		    __field(u32, eager_cnt)
-		    __field(u64, rcvegr_phys)
-		    ),
-	    TP_fast_assign(
-		    DD_DEV_ASSIGN(dd);
-		    __entry->ctxt = uctxt->ctxt;
-		    __entry->credits = uctxt->sc->credits;
-		    __entry->hw_free = (u64)uctxt->sc->hw_free;
-		    __entry->piobase = (u64)uctxt->sc->base_addr;
-		    __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
-		    __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
-		    __entry->eager_cnt = uctxt->egrbufs.alloced;
-		    __entry->rcvegr_phys = uctxt->egrbufs.rcvtids[0].phys;
-		    ),
-	    TP_printk(
-		    "[%s] ctxt %u " UCTXT_FMT,
-		    __get_str(dev),
-		    __entry->ctxt,
-		    __entry->credits,
-		    __entry->hw_free,
-		    __entry->piobase,
-		    __entry->rcvhdrq_cnt,
-		    __entry->rcvhdrq_phys,
-		    __entry->eager_cnt,
-		    __entry->rcvegr_phys
-		    )
-	);
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(unsigned, ctxt)
+			     __field(u32, credits)
+			     __field(u64, hw_free)
+			     __field(u64, piobase)
+			     __field(u16, rcvhdrq_cnt)
+			     __field(u64, rcvhdrq_phys)
+			     __field(u32, eager_cnt)
+			     __field(u64, rcvegr_phys)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->ctxt = uctxt->ctxt;
+			   __entry->credits = uctxt->sc->credits;
+			   __entry->hw_free = (u64)uctxt->sc->hw_free;
+			   __entry->piobase = (u64)uctxt->sc->base_addr;
+			   __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+			   __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
+			   __entry->eager_cnt = uctxt->egrbufs.alloced;
+			   __entry->rcvegr_phys =
+			   uctxt->egrbufs.rcvtids[0].phys;
+			   ),
+	    TP_printk("[%s] ctxt %u " UCTXT_FMT,
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->credits,
+		      __entry->hw_free,
+		      __entry->piobase,
+		      __entry->rcvhdrq_cnt,
+		      __entry->rcvhdrq_phys,
+		      __entry->eager_cnt,
+		      __entry->rcvegr_phys
+		      )
+);
 
 #define CINFO_FMT \
 	"egrtids:%u, egr_size:%u, hdrq_cnt:%u, hdrq_size:%u, sdma_ring_size:%u"
@@ -663,38 +654,35 @@ TRACE_EVENT(hfi1_ctxt_info,
 	    TP_PROTO(struct hfi1_devdata *dd, unsigned ctxt, unsigned subctxt,
 		     struct hfi1_ctxt_info cinfo),
 	    TP_ARGS(dd, ctxt, subctxt, cinfo),
-	    TP_STRUCT__entry(
-		    DD_DEV_ENTRY(dd)
-		    __field(unsigned, ctxt)
-		    __field(unsigned, subctxt)
-		    __field(u16, egrtids)
-		    __field(u16, rcvhdrq_cnt)
-		    __field(u16, rcvhdrq_size)
-		    __field(u16, sdma_ring_size)
-		    __field(u32, rcvegr_size)
-		    ),
-	    TP_fast_assign(
-		    DD_DEV_ASSIGN(dd);
-		    __entry->ctxt = ctxt;
-		    __entry->subctxt = subctxt;
-		    __entry->egrtids = cinfo.egrtids;
-		    __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
-		    __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
-		    __entry->sdma_ring_size = cinfo.sdma_ring_size;
-		    __entry->rcvegr_size = cinfo.rcvegr_size;
-		    ),
-	    TP_printk(
-		    "[%s] ctxt %u:%u " CINFO_FMT,
-		    __get_str(dev),
-		    __entry->ctxt,
-		    __entry->subctxt,
-		    __entry->egrtids,
-		    __entry->rcvegr_size,
-		    __entry->rcvhdrq_cnt,
-		    __entry->rcvhdrq_size,
-		    __entry->sdma_ring_size
-		    )
-	);
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(unsigned, ctxt)
+			     __field(unsigned, subctxt)
+			     __field(u16, egrtids)
+			     __field(u16, rcvhdrq_cnt)
+			     __field(u16, rcvhdrq_size)
+			     __field(u16, sdma_ring_size)
+			     __field(u32, rcvegr_size)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			    __entry->ctxt = ctxt;
+			    __entry->subctxt = subctxt;
+			    __entry->egrtids = cinfo.egrtids;
+			    __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
+			    __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
+			    __entry->sdma_ring_size = cinfo.sdma_ring_size;
+			    __entry->rcvegr_size = cinfo.rcvegr_size;
+			    ),
+	    TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
+		      __get_str(dev),
+		      __entry->ctxt,
+		      __entry->subctxt,
+		      __entry->egrtids,
+		      __entry->rcvegr_size,
+		      __entry->rcvhdrq_cnt,
+		      __entry->rcvhdrq_size,
+		      __entry->sdma_ring_size
+		      )
+);
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_sma
@@ -708,52 +696,48 @@ TRACE_EVENT(hfi1_ctxt_info,
 	)
 
 DECLARE_EVENT_CLASS(hfi1_bct_template,
-	TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
-	TP_ARGS(dd, bc),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__dynamic_array(u8, bct, sizeof(*bc))
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		memcpy(
-			__get_dynamic_array(bct),
-			bc,
-			sizeof(*bc));
-	),
-	TP_printk(BCT_FORMAT,
-		BCT(overall_shared_limit),
-
-		BCT(vl[0].dedicated),
-		BCT(vl[0].shared),
-
-		BCT(vl[1].dedicated),
-		BCT(vl[1].shared),
-
-		BCT(vl[2].dedicated),
-		BCT(vl[2].shared),
-
-		BCT(vl[3].dedicated),
-		BCT(vl[3].shared),
-
-		BCT(vl[4].dedicated),
-		BCT(vl[4].shared),
-
-		BCT(vl[5].dedicated),
-		BCT(vl[5].shared),
-
-		BCT(vl[6].dedicated),
-		BCT(vl[6].shared),
-
-		BCT(vl[7].dedicated),
-		BCT(vl[7].shared),
-
-		BCT(vl[15].dedicated),
-		BCT(vl[15].shared)
-	)
+		    TP_PROTO(struct hfi1_devdata *dd,
+			     struct buffer_control *bc),
+		    TP_ARGS(dd, bc),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+				     __dynamic_array(u8, bct, sizeof(*bc))
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(dd);
+				   memcpy(__get_dynamic_array(bct), bc,
+					  sizeof(*bc));
+				   ),
+		    TP_printk(BCT_FORMAT,
+			      BCT(overall_shared_limit),
+
+			      BCT(vl[0].dedicated),
+			      BCT(vl[0].shared),
+
+			      BCT(vl[1].dedicated),
+			      BCT(vl[1].shared),
+
+			      BCT(vl[2].dedicated),
+			      BCT(vl[2].shared),
+
+			      BCT(vl[3].dedicated),
+			      BCT(vl[3].shared),
+
+			      BCT(vl[4].dedicated),
+			      BCT(vl[4].shared),
+
+			      BCT(vl[5].dedicated),
+			      BCT(vl[5].shared),
+
+			      BCT(vl[6].dedicated),
+			      BCT(vl[6].shared),
+
+			      BCT(vl[7].dedicated),
+			      BCT(vl[7].shared),
+
+			      BCT(vl[15].dedicated),
+			      BCT(vl[15].shared)
+			      )
 );
 
-
 DEFINE_EVENT(hfi1_bct_template, bct_set,
 	     TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
 	     TP_ARGS(dd, bc));
@@ -766,252 +750,209 @@ DEFINE_EVENT(hfi1_bct_template, bct_get,
 #define TRACE_SYSTEM hfi1_sdma
 
 TRACE_EVENT(hfi1_sdma_descriptor,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 desc0,
-		u64 desc1,
-		u16 e,
-		void *descp),
+	    TP_PROTO(struct sdma_engine *sde,
+		     u64 desc0,
+		     u64 desc1,
+		     u16 e,
+		     void *descp),
 	TP_ARGS(sde, desc0, desc1, e, descp),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(void *, descp)
-		__field(u64, desc0)
-		__field(u64, desc1)
-		__field(u16, e)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->desc0 = desc0;
-		__entry->desc1 = desc1;
-		__entry->idx = sde->this_idx;
-		__entry->descp = descp;
-		__entry->e = e;
-	),
+	TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			 __field(void *, descp)
+			 __field(u64, desc0)
+			 __field(u64, desc1)
+			 __field(u16, e)
+			 __field(u8, idx)
+			 ),
+	TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+		       __entry->desc0 = desc0;
+		       __entry->desc1 = desc1;
+		       __entry->idx = sde->this_idx;
+		       __entry->descp = descp;
+		       __entry->e = e;
+		       ),
 	TP_printk(
-		"[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
-		__get_str(dev),
-		__entry->idx,
-		__parse_sdma_flags(__entry->desc0, __entry->desc1),
-		(__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT)
-			& SDMA_DESC0_PHY_ADDR_MASK,
-		(u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT)
-			& SDMA_DESC1_GENERATION_MASK),
-		(u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT)
-			& SDMA_DESC0_BYTE_COUNT_MASK),
-		__entry->desc0,
-		__entry->desc1,
-		__entry->descp,
-		__entry->e
-	)
+		  "[%s] SDE(%u) flags:%s addr:0x%016llx gen:%u len:%u d0:%016llx d1:%016llx to %p,%u",
+		  __get_str(dev),
+		  __entry->idx,
+		  __parse_sdma_flags(__entry->desc0, __entry->desc1),
+		  (__entry->desc0 >> SDMA_DESC0_PHY_ADDR_SHIFT) &
+		  SDMA_DESC0_PHY_ADDR_MASK,
+		  (u8)((__entry->desc1 >> SDMA_DESC1_GENERATION_SHIFT) &
+		       SDMA_DESC1_GENERATION_MASK),
+		  (u16)((__entry->desc0 >> SDMA_DESC0_BYTE_COUNT_SHIFT) &
+			SDMA_DESC0_BYTE_COUNT_MASK),
+		  __entry->desc0,
+		  __entry->desc1,
+		  __entry->descp,
+		  __entry->e
+		  )
 );
 
 TRACE_EVENT(hfi1_sdma_engine_select,
-	TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
-	TP_ARGS(dd, sel, vl, idx),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__field(u32, sel)
-		__field(u8, vl)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd);
-		__entry->sel = sel;
-		__entry->vl = vl;
-		__entry->idx = idx;
-	),
-	TP_printk(
-		"[%s] selecting SDE %u sel 0x%x vl %u",
-		__get_str(dev),
-		__entry->idx,
-		__entry->sel,
-		__entry->vl
-	)
+	    TP_PROTO(struct hfi1_devdata *dd, u32 sel, u8 vl, u8 idx),
+	    TP_ARGS(dd, sel, vl, idx),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __field(u32, sel)
+			     __field(u8, vl)
+			     __field(u8, idx)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd);
+			   __entry->sel = sel;
+			   __entry->vl = vl;
+			   __entry->idx = idx;
+			   ),
+	    TP_printk("[%s] selecting SDE %u sel 0x%x vl %u",
+		      __get_str(dev),
+		      __entry->idx,
+		      __entry->sel,
+		      __entry->vl
+		      )
 );
 
 DECLARE_EVENT_CLASS(hfi1_sdma_engine_class,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 status
-	),
-	TP_ARGS(sde, status),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u64, status)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->status = status;
-		__entry->idx = sde->this_idx;
-	),
-	TP_printk(
-		"[%s] SDE(%u) status %llx",
-		__get_str(dev),
-		__entry->idx,
-		(unsigned long long)__entry->status
-	)
+		    TP_PROTO(struct sdma_engine *sde, u64 status),
+		    TP_ARGS(sde, status),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+				     __field(u64, status)
+				     __field(u8, idx)
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+				   __entry->status = status;
+				   __entry->idx = sde->this_idx;
+				   ),
+		    TP_printk("[%s] SDE(%u) status %llx",
+			      __get_str(dev),
+			      __entry->idx,
+			      (unsigned long long)__entry->status
+			      )
 );
 
 DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_interrupt,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 status
-	),
-	TP_ARGS(sde, status)
+	     TP_PROTO(struct sdma_engine *sde, u64 status),
+	     TP_ARGS(sde, status)
 );
 
 DEFINE_EVENT(hfi1_sdma_engine_class, hfi1_sdma_engine_progress,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 status
-	),
-	TP_ARGS(sde, status)
+	     TP_PROTO(struct sdma_engine *sde, u64 status),
+	     TP_ARGS(sde, status)
 );
 
 DECLARE_EVENT_CLASS(hfi1_sdma_ahg_ad,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		int aidx
-	),
-	TP_ARGS(sde, aidx),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(int, aidx)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->idx = sde->this_idx;
-		__entry->aidx = aidx;
-	),
-	TP_printk(
-		"[%s] SDE(%u) aidx %d",
-		__get_str(dev),
-		__entry->idx,
-		__entry->aidx
-	)
+		    TP_PROTO(struct sdma_engine *sde, int aidx),
+		    TP_ARGS(sde, aidx),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+				     __field(int, aidx)
+				     __field(u8, idx)
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+				   __entry->idx = sde->this_idx;
+				   __entry->aidx = aidx;
+				   ),
+		    TP_printk("[%s] SDE(%u) aidx %d",
+			      __get_str(dev),
+			      __entry->idx,
+			      __entry->aidx
+			      )
 );
 
 DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_allocate,
-	     TP_PROTO(
-		struct sdma_engine *sde,
-		int aidx
-	     ),
+	     TP_PROTO(struct sdma_engine *sde, int aidx),
 	     TP_ARGS(sde, aidx));
 
 DEFINE_EVENT(hfi1_sdma_ahg_ad, hfi1_ahg_deallocate,
-	     TP_PROTO(
-		struct sdma_engine *sde,
-		int aidx
-	     ),
+	     TP_PROTO(struct sdma_engine *sde, int aidx),
 	     TP_ARGS(sde, aidx));
 
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 TRACE_EVENT(hfi1_sdma_progress,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u16 hwhead,
-		u16 swhead,
-		struct sdma_txreq *txp
-	),
-	TP_ARGS(sde, hwhead, swhead, txp),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u64, sn)
-		__field(u16, hwhead)
-		__field(u16, swhead)
-		__field(u16, txnext)
-		__field(u16, tx_tail)
-		__field(u16, tx_head)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->hwhead = hwhead;
-		__entry->swhead = swhead;
-		__entry->tx_tail = sde->tx_tail;
-		__entry->tx_head = sde->tx_head;
-		__entry->txnext = txp ? txp->next_descq_idx : ~0;
-		__entry->idx = sde->this_idx;
-		__entry->sn = txp ? txp->sn : ~0;
-	),
-	TP_printk(
-		"[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
-		__get_str(dev),
-		__entry->idx,
-		__entry->sn,
-		__entry->hwhead,
-		__entry->swhead,
-		__entry->txnext,
-		__entry->tx_head,
-		__entry->tx_tail
-	)
+	    TP_PROTO(struct sdma_engine *sde,
+		     u16 hwhead,
+		     u16 swhead,
+		     struct sdma_txreq *txp
+		     ),
+	    TP_ARGS(sde, hwhead, swhead, txp),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			     __field(u64, sn)
+			     __field(u16, hwhead)
+			     __field(u16, swhead)
+			     __field(u16, txnext)
+			     __field(u16, tx_tail)
+			     __field(u16, tx_head)
+			     __field(u8, idx)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+			   __entry->hwhead = hwhead;
+			   __entry->swhead = swhead;
+			   __entry->tx_tail = sde->tx_tail;
+			   __entry->tx_head = sde->tx_head;
+			   __entry->txnext = txp ? txp->next_descq_idx : ~0;
+			   __entry->idx = sde->this_idx;
+			   __entry->sn = txp ? txp->sn : ~0;
+			   ),
+	    TP_printk(
+		      "[%s] SDE(%u) sn %llu hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+		      __get_str(dev),
+		      __entry->idx,
+		      __entry->sn,
+		      __entry->hwhead,
+		      __entry->swhead,
+		      __entry->txnext,
+		      __entry->tx_head,
+		      __entry->tx_tail
+		      )
 );
 #else
 TRACE_EVENT(hfi1_sdma_progress,
-	    TP_PROTO(
-		struct sdma_engine *sde,
-		u16 hwhead,
-		u16 swhead,
-		struct sdma_txreq *txp
+	    TP_PROTO(struct sdma_engine *sde,
+		     u16 hwhead, u16 swhead,
+		     struct sdma_txreq *txp
 	    ),
 	TP_ARGS(sde, hwhead, swhead, txp),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u16, hwhead)
-		__field(u16, swhead)
-		__field(u16, txnext)
-		__field(u16, tx_tail)
-		__field(u16, tx_head)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->hwhead = hwhead;
-		__entry->swhead = swhead;
-		__entry->tx_tail = sde->tx_tail;
-		__entry->tx_head = sde->tx_head;
-		__entry->txnext = txp ? txp->next_descq_idx : ~0;
-		__entry->idx = sde->this_idx;
-	),
+	TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			 __field(u16, hwhead)
+			 __field(u16, swhead)
+			 __field(u16, txnext)
+			 __field(u16, tx_tail)
+			 __field(u16, tx_head)
+			 __field(u8, idx)
+			 ),
+	TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+		       __entry->hwhead = hwhead;
+		       __entry->swhead = swhead;
+		       __entry->tx_tail = sde->tx_tail;
+		       __entry->tx_head = sde->tx_head;
+		       __entry->txnext = txp ? txp->next_descq_idx : ~0;
+		       __entry->idx = sde->this_idx;
+		       ),
 	TP_printk(
-		"[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
-		__get_str(dev),
-		__entry->idx,
-		__entry->hwhead,
-		__entry->swhead,
-		__entry->txnext,
-		__entry->tx_head,
-		__entry->tx_tail
-	)
+		  "[%s] SDE(%u) hwhead %u swhead %u next_descq_idx %u tx_head %u tx_tail %u",
+		  __get_str(dev),
+		  __entry->idx,
+		  __entry->hwhead,
+		  __entry->swhead,
+		  __entry->txnext,
+		  __entry->tx_head,
+		  __entry->tx_tail
+		  )
 );
 #endif
 
 DECLARE_EVENT_CLASS(hfi1_sdma_sn,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		u64 sn
-	),
-	TP_ARGS(sde, sn),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__field(u64, sn)
-		__field(u8, idx)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__entry->sn = sn;
-		__entry->idx = sde->this_idx;
-	),
-	TP_printk(
-		"[%s] SDE(%u) sn %llu",
-		__get_str(dev),
-		__entry->idx,
-		__entry->sn
-	)
+		    TP_PROTO(struct sdma_engine *sde, u64 sn),
+		    TP_ARGS(sde, sn),
+		    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+				     __field(u64, sn)
+				     __field(u8, idx)
+				     ),
+		    TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+				   __entry->sn = sn;
+				   __entry->idx = sde->this_idx;
+				   ),
+		    TP_printk("[%s] SDE(%u) sn %llu",
+			      __get_str(dev),
+			      __entry->idx,
+			      __entry->sn
+			      )
 );
 
 DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn,
@@ -1023,10 +964,7 @@ DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_out_sn,
 );
 
 DEFINE_EVENT(hfi1_sdma_sn, hfi1_sdma_in_sn,
-	     TP_PROTO(
-		struct sdma_engine *sde,
-		u64 sn
-	     ),
+	     TP_PROTO(struct sdma_engine *sde, u64 sn),
 	     TP_ARGS(sde, sn)
 );
 
@@ -1227,88 +1165,85 @@ TRACE_EVENT(hfi1_sdma_user_header_ahg,
 	);
 
 TRACE_EVENT(hfi1_sdma_state,
-	TP_PROTO(
-		struct sdma_engine *sde,
-		const char *cstate,
-		const char *nstate
-	),
-	TP_ARGS(sde, cstate, nstate),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(sde->dd)
-		__string(curstate, cstate)
-		__string(newstate, nstate)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(sde->dd);
-		__assign_str(curstate, cstate);
-		__assign_str(newstate, nstate);
-	),
+	    TP_PROTO(struct sdma_engine *sde,
+		     const char *cstate,
+		     const char *nstate
+		     ),
+	    TP_ARGS(sde, cstate, nstate),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(sde->dd)
+			     __string(curstate, cstate)
+			     __string(newstate, nstate)
+			     ),
+	TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
+		       __assign_str(curstate, cstate);
+		       __assign_str(newstate, nstate);
+		       ),
 	TP_printk("[%s] current state %s new state %s",
-		__get_str(dev),
-		__get_str(curstate),
-		__get_str(newstate)
-	)
+		  __get_str(dev),
+		  __get_str(curstate),
+		  __get_str(newstate)
+		  )
 );
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_rc
 
 DECLARE_EVENT_CLASS(hfi1_rc_template,
-	TP_PROTO(struct hfi1_qp *qp, u32 psn),
-	TP_ARGS(qp, psn),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
-		__field(u32, qpn)
-		__field(u32, s_flags)
-		__field(u32, psn)
-		__field(u32, s_psn)
-		__field(u32, s_next_psn)
-		__field(u32, s_sending_psn)
-		__field(u32, s_sending_hpsn)
-		__field(u32, r_psn)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
-		__entry->qpn = qp->ibqp.qp_num;
-		__entry->s_flags = qp->s_flags;
-		__entry->psn = psn;
-		__entry->s_psn = qp->s_psn;
-		__entry->s_next_psn = qp->s_next_psn;
-		__entry->s_sending_psn = qp->s_sending_psn;
-		__entry->s_sending_hpsn = qp->s_sending_hpsn;
-		__entry->r_psn = qp->r_psn;
-	),
-	TP_printk(
-		"[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
-		__get_str(dev),
-		__entry->qpn,
-		__entry->s_flags,
-		__entry->psn,
-		__entry->s_psn,
-		__entry->s_next_psn,
-		__entry->s_sending_psn,
-		__entry->s_sending_hpsn,
-		__entry->r_psn
-	)
+		    TP_PROTO(struct rvt_qp *qp, u32 psn),
+		    TP_ARGS(qp, psn),
+		    TP_STRUCT__entry(
+			DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+			__field(u32, qpn)
+			__field(u32, s_flags)
+			__field(u32, psn)
+			__field(u32, s_psn)
+			__field(u32, s_next_psn)
+			__field(u32, s_sending_psn)
+			__field(u32, s_sending_hpsn)
+			__field(u32, r_psn)
+			),
+		    TP_fast_assign(
+			DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+			__entry->qpn = qp->ibqp.qp_num;
+			__entry->s_flags = qp->s_flags;
+			__entry->psn = psn;
+			__entry->s_psn = qp->s_psn;
+			__entry->s_next_psn = qp->s_next_psn;
+			__entry->s_sending_psn = qp->s_sending_psn;
+			__entry->s_sending_hpsn = qp->s_sending_hpsn;
+			__entry->r_psn = qp->r_psn;
+			),
+		    TP_printk(
+			"[%s] qpn 0x%x s_flags 0x%x psn 0x%x s_psn 0x%x s_next_psn 0x%x s_sending_psn 0x%x sending_hpsn 0x%x r_psn 0x%x",
+			__get_str(dev),
+			__entry->qpn,
+			__entry->s_flags,
+			__entry->psn,
+			__entry->s_psn,
+			__entry->s_next_psn,
+			__entry->s_sending_psn,
+			__entry->s_sending_hpsn,
+			__entry->r_psn
+			)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_sendcomplete,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_ack,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_timeout,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
 DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error,
-	     TP_PROTO(struct hfi1_qp *qp, u32 psn),
+	     TP_PROTO(struct rvt_qp *qp, u32 psn),
 	     TP_ARGS(qp, psn)
 );
 
@@ -1316,21 +1251,20 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_rc_rcv_error,
 #define TRACE_SYSTEM hfi1_misc
 
 TRACE_EVENT(hfi1_interrupt,
-	TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
-		 int src),
-	TP_ARGS(dd, is_entry, src),
-	TP_STRUCT__entry(
-		DD_DEV_ENTRY(dd)
-		__array(char, buf, 64)
-		__field(int, src)
-	),
-	TP_fast_assign(
-		DD_DEV_ASSIGN(dd)
-		is_entry->is_name(__entry->buf, 64, src - is_entry->start);
-		__entry->src = src;
-	),
-	TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
-		  __entry->src)
+	    TP_PROTO(struct hfi1_devdata *dd, const struct is_table *is_entry,
+		     int src),
+	    TP_ARGS(dd, is_entry, src),
+	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+			     __array(char, buf, 64)
+			     __field(int, src)
+			     ),
+	    TP_fast_assign(DD_DEV_ASSIGN(dd)
+			   is_entry->is_name(__entry->buf, 64,
+					     src - is_entry->start);
+			   __entry->src = src;
+			   ),
+	    TP_printk("[%s] source: %s [%d]", __get_str(dev), __entry->buf,
+		      __entry->src)
 );
 
 /*
@@ -1345,21 +1279,21 @@ TRACE_EVENT(hfi1_interrupt,
 #define MAX_MSG_LEN 512
 
 DECLARE_EVENT_CLASS(hfi1_trace_template,
-	TP_PROTO(const char *function, struct va_format *vaf),
-	TP_ARGS(function, vaf),
-	TP_STRUCT__entry(
-		__string(function, function)
-		__dynamic_array(char, msg, MAX_MSG_LEN)
-	),
-	TP_fast_assign(
-		__assign_str(function, function);
-		WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
-		     MAX_MSG_LEN, vaf->fmt,
-		     *vaf->va) >= MAX_MSG_LEN);
-	),
-	TP_printk("(%s) %s",
-		  __get_str(function),
-		  __get_str(msg))
+		    TP_PROTO(const char *function, struct va_format *vaf),
+		    TP_ARGS(function, vaf),
+		    TP_STRUCT__entry(__string(function, function)
+				     __dynamic_array(char, msg, MAX_MSG_LEN)
+				     ),
+		    TP_fast_assign(__assign_str(function, function);
+				   WARN_ON_ONCE(vsnprintf
+						(__get_dynamic_array(msg),
+						 MAX_MSG_LEN, vaf->fmt,
+						 *vaf->va) >=
+						MAX_MSG_LEN);
+				   ),
+		    TP_printk("(%s) %s",
+			      __get_str(function),
+			      __get_str(msg))
 );
 
 /*
@@ -1406,6 +1340,7 @@ __hfi1_trace_def(DC8051);
 __hfi1_trace_def(FIRMWARE);
 __hfi1_trace_def(RCVCTRL);
 __hfi1_trace_def(TID);
+__hfi1_trace_def(MMU);
 
 #define hfi1_cdbg(which, fmt, ...) \
 	__hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)
diff --git a/drivers/staging/rdma/hfi1/twsi.c b/drivers/staging/rdma/hfi1/twsi.c
index ea54fd2700ad..e82e52a63d35 100644
--- a/drivers/staging/rdma/hfi1/twsi.c
+++ b/drivers/staging/rdma/hfi1/twsi.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -119,9 +116,9 @@ static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit)
 	 * Allow for slow slaves by simple
 	 * delay for falling edge, sampling on rise.
 	 */
-	if (!bit)
+	if (!bit) {
 		udelay(2);
-	else {
+	} else {
 		int rise_usec;
 
 		for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) {
@@ -131,11 +128,24 @@ static void scl_out(struct hfi1_devdata *dd, u32 target, u8 bit)
 		}
 		if (rise_usec <= 0)
 			dd_dev_err(dd, "SCL interface stuck low > %d uSec\n",
-				    SCL_WAIT_USEC);
+				   SCL_WAIT_USEC);
 	}
 	i2c_wait_for_writes(dd, target);
 }
 
+static u8 scl_in(struct hfi1_devdata *dd, u32 target, int wait)
+{
+	u32 read_val, mask;
+
+	mask = QSFP_HFI0_I2CCLK;
+	/* SCL is meant to be bare-drain, so never set "OUT", just DIR */
+	hfi1_gpio_mod(dd, target, 0, 0, mask);
+	read_val = hfi1_gpio_mod(dd, target, 0, 0, 0);
+	if (wait)
+		i2c_wait_for_writes(dd, target);
+	return (read_val & mask) >> GPIO_SCL_NUM;
+}
+
 static void sda_out(struct hfi1_devdata *dd, u32 target, u8 bit)
 {
 	u32 mask;
@@ -274,13 +284,12 @@ static void stop_cmd(struct hfi1_devdata *dd, u32 target)
 /**
  * hfi1_twsi_reset - reset I2C communication
  * @dd: the hfi1_ib device
+ * returns 0 if ok, -EIO on error
  */
-
 int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target)
 {
 	int clock_cycles_left = 9;
-	int was_high = 0;
-	u32 pins, mask;
+	u32 mask;
 
 	/* Both SCL and SDA should be high. If not, there
 	 * is something wrong.
@@ -294,43 +303,23 @@ int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target)
 	 */
 	hfi1_gpio_mod(dd, target, 0, 0, mask);
 
-	/*
-	 * Clock nine times to get all listeners into a sane state.
-	 * If SDA does not go high at any point, we are wedged.
-	 * One vendor recommends then issuing START followed by STOP.
-	 * we cannot use our "normal" functions to do that, because
-	 * if SCL drops between them, another vendor's part will
-	 * wedge, dropping SDA and keeping it low forever, at the end of
-	 * the next transaction (even if it was not the device addressed).
-	 * So our START and STOP take place with SCL held high.
+	/* Check if SCL is low, if it is low then we have a slave device
+	 * misbehaving and there is not much we can do.
+	 */
+	if (!scl_in(dd, target, 0))
+		return -EIO;
+
+	/* Check if SDA is low, if it is low then we have to clock SDA
+	 * up to 9 times for the device to release the bus
 	 */
 	while (clock_cycles_left--) {
+		if (sda_in(dd, target, 0))
+			return 0;
 		scl_out(dd, target, 0);
 		scl_out(dd, target, 1);
-		/* Note if SDA is high, but keep clocking to sync slave */
-		was_high |= sda_in(dd, target, 0);
-	}
-
-	if (was_high) {
-		/*
-		 * We saw a high, which we hope means the slave is sync'd.
-		 * Issue START, STOP, pause for T_BUF.
-		 */
-
-		pins = hfi1_gpio_mod(dd, target, 0, 0, 0);
-		if ((pins & mask) != mask)
-			dd_dev_err(dd, "GPIO pins not at rest: %d\n",
-				    pins & mask);
-		/* Drop SDA to issue START */
-		udelay(1); /* Guarantee .6 uSec setup */
-		sda_out(dd, target, 0);
-		udelay(1); /* Guarantee .6 uSec hold */
-		/* At this point, SCL is high, SDA low. Raise SDA for STOP */
-		sda_out(dd, target, 1);
-		udelay(TWSI_BUF_WAIT_USEC);
 	}
 
-	return !was_high;
+	return -EIO;
 }
 
 #define HFI1_TWSI_START 0x100
@@ -365,17 +354,25 @@ static int twsi_wr(struct hfi1_devdata *dd, u32 target, int data, int flags)
  * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
  * which responded to all TWSI device codes, interpreting them as
  * address within device. On all other devices found on board handled by
- * this driver, the device is followed by a one-byte "address" which selects
+ * this driver, the device is followed by a N-byte "address" which selects
  * the "register" or "offset" within the device from which data should
  * be read.
  */
 int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 		     void *buffer, int len)
 {
-	int ret;
 	u8 *bp = buffer;
+	int ret = 1;
+	int i;
+	int offset_size;
+
+	/* obtain the offset size, strip it from the device address */
+	offset_size = (dev >> 8) & 0xff;
+	dev &= 0xff;
 
-	ret = 1;
+	/* allow at most a 2 byte offset */
+	if (offset_size > 2)
+		goto bail;
 
 	if (dev == HFI1_TWSI_NO_DEV) {
 		/* legacy not-really-I2C */
@@ -383,34 +380,29 @@ int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 		ret = twsi_wr(dd, target, addr, HFI1_TWSI_START);
 	} else {
 		/* Actual I2C */
-		ret = twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START);
-		if (ret) {
-			stop_cmd(dd, target);
-			ret = 1;
-			goto bail;
-		}
-		/*
-		 * SFF spec claims we do _not_ stop after the addr
-		 * but simply issue a start with the "read" dev-addr.
-		 * Since we are implicitly waiting for ACK here,
-		 * we need t_buf (nominally 20uSec) before that start,
-		 * and cannot rely on the delay built in to the STOP
-		 */
-		ret = twsi_wr(dd, target, addr, 0);
-		udelay(TWSI_BUF_WAIT_USEC);
+		if (offset_size) {
+			ret = twsi_wr(dd, target,
+				      dev | WRITE_CMD, HFI1_TWSI_START);
+			if (ret) {
+				stop_cmd(dd, target);
+				goto bail;
+			}
 
-		if (ret) {
-			dd_dev_err(dd,
-				"Failed to write interface read addr %02X\n",
-				addr);
-			ret = 1;
-			goto bail;
+			for (i = 0; i < offset_size; i++) {
+				ret = twsi_wr(dd, target,
+					      (addr >> (i * 8)) & 0xff, 0);
+				udelay(TWSI_BUF_WAIT_USEC);
+				if (ret) {
+					dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
+						   i, addr);
+					goto bail;
+				}
+			}
 		}
 		ret = twsi_wr(dd, target, dev | READ_CMD, HFI1_TWSI_START);
 	}
 	if (ret) {
 		stop_cmd(dd, target);
-		ret = 1;
 		goto bail;
 	}
 
@@ -442,76 +434,55 @@ bail:
  * HFI1_TWSI_NO_DEV and does the correct operation for the legacy part,
  * which responded to all TWSI device codes, interpreting them as
  * address within device. On all other devices found on board handled by
- * this driver, the device is followed by a one-byte "address" which selects
+ * this driver, the device is followed by a N-byte "address" which selects
  * the "register" or "offset" within the device to which data should
  * be written.
  */
 int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 		     const void *buffer, int len)
 {
-	int sub_len;
 	const u8 *bp = buffer;
-	int max_wait_time, i;
 	int ret = 1;
+	int i;
+	int offset_size;
 
-	while (len > 0) {
-		if (dev == HFI1_TWSI_NO_DEV) {
-			if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD,
-				    HFI1_TWSI_START)) {
-				goto failed_write;
-			}
-		} else {
-			/* Real I2C */
-			if (twsi_wr(dd, target,
-				    dev | WRITE_CMD, HFI1_TWSI_START))
-				goto failed_write;
-			ret = twsi_wr(dd, target, addr, 0);
-			if (ret) {
-				dd_dev_err(dd,
-					"Failed to write interface write addr %02X\n",
-					addr);
-				goto failed_write;
-			}
-		}
-
-		sub_len = min(len, 4);
-		addr += sub_len;
-		len -= sub_len;
+	/* obtain the offset size, strip it from the device address */
+	offset_size = (dev >> 8) & 0xff;
+	dev &= 0xff;
 
-		for (i = 0; i < sub_len; i++)
-			if (twsi_wr(dd, target, *bp++, 0))
-				goto failed_write;
+	/* allow at most a 2 byte offset */
+	if (offset_size > 2)
+		goto bail;
 
-		stop_cmd(dd, target);
+	if (dev == HFI1_TWSI_NO_DEV) {
+		if (twsi_wr(dd, target, (addr << 1) | WRITE_CMD,
+			    HFI1_TWSI_START)) {
+			goto failed_write;
+		}
+	} else {
+		/* Real I2C */
+		if (twsi_wr(dd, target, dev | WRITE_CMD, HFI1_TWSI_START))
+			goto failed_write;
+	}
 
-		/*
-		 * Wait for write complete by waiting for a successful
-		 * read (the chip replies with a zero after the write
-		 * cmd completes, and before it writes to the eeprom.
-		 * The startcmd for the read will fail the ack until
-		 * the writes have completed.   We do this inline to avoid
-		 * the debug prints that are in the real read routine
-		 * if the startcmd fails.
-		 * We also use the proper device address, so it doesn't matter
-		 * whether we have real eeprom_dev. Legacy likes any address.
-		 */
-		max_wait_time = 100;
-		while (twsi_wr(dd, target,
-			       dev | READ_CMD, HFI1_TWSI_START)) {
-			stop_cmd(dd, target);
-			if (!--max_wait_time)
-				goto failed_write;
+	for (i = 0; i < offset_size; i++) {
+		ret = twsi_wr(dd, target, (addr >> (i * 8)) & 0xff, 0);
+		udelay(TWSI_BUF_WAIT_USEC);
+		if (ret) {
+			dd_dev_err(dd, "Failed to write byte %d of offset 0x%04X\n",
+				   i, addr);
+			goto bail;
 		}
-		/* now read (and ignore) the resulting byte */
-		rd_byte(dd, target, 1);
 	}
 
+	for (i = 0; i < len; i++)
+		if (twsi_wr(dd, target, *bp++, 0))
+			goto failed_write;
+
 	ret = 0;
-	goto bail;
 
 failed_write:
 	stop_cmd(dd, target);
-	ret = 1;
 
 bail:
 	return ret;
diff --git a/drivers/staging/rdma/hfi1/twsi.h b/drivers/staging/rdma/hfi1/twsi.h
index 5907e029613d..5b8a5b5e7eae 100644
--- a/drivers/staging/rdma/hfi1/twsi.h
+++ b/drivers/staging/rdma/hfi1/twsi.h
@@ -1,14 +1,13 @@
 #ifndef _TWSI_H
 #define _TWSI_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -54,8 +51,9 @@
 
 struct hfi1_devdata;
 
-/* Bit position of SDA pin in ASIC_QSFP* registers  */
+/* Bit position of SDA/SCL pins in ASIC_QSFP* registers  */
 #define  GPIO_SDA_NUM 1
+#define  GPIO_SCL_NUM 0
 
 /* these functions must be called with qsfp_lock held */
 int hfi1_twsi_reset(struct hfi1_devdata *dd, u32 target);
@@ -64,5 +62,4 @@ int hfi1_twsi_blk_rd(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 int hfi1_twsi_blk_wr(struct hfi1_devdata *dd, u32 target, int dev, int addr,
 		     const void *buffer, int len);
 
-
 #endif /* _TWSI_H */
diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c
index 4f2a7889a852..df773d433297 100644
--- a/drivers/staging/rdma/hfi1/uc.c
+++ b/drivers/staging/rdma/hfi1/uc.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -49,71 +46,82 @@
  */
 
 #include "hfi.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 #include "qp.h"
 
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_UC_##x
 
+/* only opcode mask for adaptive pio */
+const u32 uc_only_opcode =
+	BIT(OP(SEND_ONLY) & 0x1f) |
+	BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
+	BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f));
+
 /**
  * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
  * @qp: a pointer to the QP
  *
+ * Assume s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int hfi1_make_uc_req(struct hfi1_qp *qp)
+int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_other_headers *ohdr;
-	struct hfi1_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 hwords = 5;
 	u32 bth0 = 0;
 	u32 len;
 	u32 pmtu = qp->pmtu;
-	int ret = 0;
 	int middle = 0;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
+	ps->s_txreq = get_txreq(ps->dev, qp);
+	if (IS_ERR(ps->s_txreq))
+		goto bail_no_tx;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_iowait.sdma_busy)) {
-			qp->s_flags |= HFI1_S_WAIT_DMA;
+		if (iowait_sdma_pending(&priv->s_iowait)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
 		clear_ahg(qp);
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
+		goto done_free_tx;
 	}
 
-	ohdr = &qp->s_hdr->ibh.u.oth;
+	ohdr = &ps->s_txreq->phdr.hdr.u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->ibh.u.l.oth;
+		ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
 
 	/* Get the next send request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	qp->s_wqe = NULL;
 	switch (qp->s_state) {
 	default:
-		if (!(ib_hfi1_state_ops[qp->state] &
-		    HFI1_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] &
+		    RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 		/* Check if send work queue is empty. */
-		if (qp->s_cur == qp->s_head) {
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_cur == ACCESS_ONCE(qp->s_head)) {
 			clear_ahg(qp);
 			goto bail;
 		}
 		/*
 		 * Start a new request.
 		 */
-		wqe->psn = qp->s_next_psn;
-		qp->s_psn = qp->s_next_psn;
+		qp->s_psn = wqe->psn;
 		qp->s_sge.sge = wqe->sg_list[0];
 		qp->s_sge.sg_list = wqe->sg_list + 1;
 		qp->s_sge.num_sge = wqe->wr.num_sge;
@@ -128,9 +136,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_SEND)
+			if (wqe->wr.opcode == IB_WR_SEND) {
 				qp->s_state = OP(SEND_ONLY);
-			else {
+			} else {
 				qp->s_state =
 					OP(SEND_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the BTH */
@@ -157,9 +165,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+			if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 				qp->s_state = OP(RDMA_WRITE_ONLY);
-			else {
+			} else {
 				qp->s_state =
 					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after the RETH */
@@ -188,9 +196,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_SEND)
+		if (wqe->wr.opcode == IB_WR_SEND) {
 			qp->s_state = OP(SEND_LAST);
-		else {
+		} else {
 			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
@@ -213,9 +221,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
 			middle = HFI1_CAP_IS_KSET(SDMA_AHG);
 			break;
 		}
-		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+		if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
 			qp->s_state = OP(RDMA_WRITE_LAST);
-		else {
+		} else {
 			qp->s_state =
 				OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
 			/* Immediate data comes after the BTH */
@@ -231,19 +239,28 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
 	}
 	qp->s_len -= len;
 	qp->s_hdrwords = hwords;
+	ps->s_txreq->sde = priv->s_sde;
 	qp->s_cur_sge = &qp->s_sge;
 	qp->s_cur_size = len;
 	hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
-			     mask_psn(qp->s_next_psn++), middle);
-done:
-	ret = 1;
-	goto unlock;
+			     mask_psn(qp->s_psn++), middle, ps);
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+	return 1;
+
+done_free_tx:
+	hfi1_put_txreq(ps->s_txreq);
+	ps->s_txreq = NULL;
+	return 1;
 
 bail:
-	qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
+	hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+	ps->s_txreq = NULL;
+	qp->s_flags &= ~RVT_S_BUSY;
+	qp->s_hdrwords = 0;
+	return 0;
 }
 
 /**
@@ -266,7 +283,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	struct hfi1_other_headers *ohdr = packet->ohdr;
 	u32 bth0, opcode;
 	u32 hdrsize = packet->hlen;
@@ -291,14 +308,14 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
 			u16 rlid = be16_to_cpu(hdr->lrh[3]);
 			u8 sl, sc5;
 
-			lqpn = bth1 & HFI1_QPN_MASK;
+			lqpn = bth1 & RVT_QPN_MASK;
 			rqpn = qp->remote_qpn;
 
 			sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
 			sl = ibp->sc_to_sl[sc5];
 
 			process_becn(ppd, sl, rlid, lqpn, rqpn,
-					IB_CC_SVCTYPE_UC);
+				     IB_CC_SVCTYPE_UC);
 		}
 
 		if (bth1 & HFI1_FECN_SMASK) {
@@ -331,10 +348,11 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
 inv:
 		if (qp->r_state == OP(SEND_FIRST) ||
 		    qp->r_state == OP(SEND_MIDDLE)) {
-			set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags);
+			set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 			qp->r_sge.num_sge = 0;
-		} else
-			hfi1_put_ss(&qp->r_sge);
+		} else {
+			rvt_put_ss(&qp->r_sge);
+		}
 		qp->r_state = OP(SEND_LAST);
 		switch (opcode) {
 		case OP(SEND_FIRST):
@@ -381,7 +399,7 @@ inv:
 		goto inv;
 	}
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & HFI1_R_COMM_EST))
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
 		qp_comm_est(qp);
 
 	/* OK, process the packet. */
@@ -390,10 +408,10 @@ inv:
 	case OP(SEND_ONLY):
 	case OP(SEND_ONLY_WITH_IMMEDIATE):
 send_first:
-		if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
 			qp->r_sge = qp->s_rdma_read_sge;
-		else {
-			ret = hfi1_get_rwqe(qp, 0);
+		} else {
+			ret = hfi1_rvt_get_rwqe(qp, 0);
 			if (ret < 0)
 				goto op_err;
 			if (!ret)
@@ -417,7 +435,7 @@ send_first:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto rewind;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, 0);
+		hfi1_copy_sge(&qp->r_sge, data, pmtu, 0, 0);
 		break;
 
 	case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -442,8 +460,8 @@ send_last:
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto rewind;
 		wc.opcode = IB_WC_RECV;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 0);
-		hfi1_put_ss(&qp->s_rdma_read_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 0, 0);
+		rvt_put_ss(&qp->s_rdma_read_sge);
 last_imm:
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -468,9 +486,9 @@ last_imm:
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
-		hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-			      (ohdr->bth[0] &
-				cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+			     (ohdr->bth[0] &
+			      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 		break;
 
 	case OP(RDMA_WRITE_FIRST):
@@ -491,8 +509,8 @@ rdma_first:
 			int ok;
 
 			/* Check rkey */
-			ok = hfi1_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
-					  vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
+					 vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 				goto drop;
 			qp->r_sge.num_sge = 1;
@@ -503,9 +521,9 @@ rdma_first:
 			qp->r_sge.sge.length = 0;
 			qp->r_sge.sge.sge_length = 0;
 		}
-		if (opcode == OP(RDMA_WRITE_ONLY))
+		if (opcode == OP(RDMA_WRITE_ONLY)) {
 			goto rdma_last;
-		else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
+		} else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) {
 			wc.ex.imm_data = ohdr->u.rc.imm_data;
 			goto rdma_last_imm;
 		}
@@ -517,7 +535,7 @@ rdma_first:
 		qp->r_rcv_len += pmtu;
 		if (unlikely(qp->r_rcv_len > qp->r_len))
 			goto drop;
-		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1);
+		hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
 		break;
 
 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -535,10 +553,10 @@ rdma_last_imm:
 		tlen -= (hdrsize + pad + 4);
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
-		if (test_and_clear_bit(HFI1_R_REWIND_SGE, &qp->r_aflags))
-			hfi1_put_ss(&qp->s_rdma_read_sge);
-		else {
-			ret = hfi1_get_rwqe(qp, 1);
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
+			rvt_put_ss(&qp->s_rdma_read_sge);
+		} else {
+			ret = hfi1_rvt_get_rwqe(qp, 1);
 			if (ret < 0)
 				goto op_err;
 			if (!ret)
@@ -546,8 +564,8 @@ rdma_last_imm:
 		}
 		wc.byte_len = qp->r_len;
 		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
-		hfi1_put_ss(&qp->r_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+		rvt_put_ss(&qp->r_sge);
 		goto last_imm;
 
 	case OP(RDMA_WRITE_LAST):
@@ -562,8 +580,8 @@ rdma_last:
 		tlen -= (hdrsize + pad + 4);
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
-		hfi1_copy_sge(&qp->r_sge, data, tlen, 1);
-		hfi1_put_ss(&qp->r_sge);
+		hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+		rvt_put_ss(&qp->r_sge);
 		break;
 
 	default:
@@ -575,14 +593,12 @@ rdma_last:
 	return;
 
 rewind:
-	set_bit(HFI1_R_REWIND_SGE, &qp->r_aflags);
+	set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 	qp->r_sge.num_sge = 0;
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 	return;
 
 op_err:
 	hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-	return;
-
 }
diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c
index 25e6053c38db..ae8a70f703eb 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/staging/rdma/hfi1/ud.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -53,6 +50,7 @@
 
 #include "hfi.h"
 #include "mad.h"
+#include "verbs_txreq.h"
 #include "qp.h"
 
 /**
@@ -65,24 +63,25 @@
  * Note that the receive interrupt handler may be calling hfi1_ud_rcv()
  * while this is being called.
  */
-static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
+static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 {
 	struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
 	struct hfi1_pportdata *ppd;
-	struct hfi1_qp *qp;
+	struct rvt_qp *qp;
 	struct ib_ah_attr *ah_attr;
 	unsigned long flags;
-	struct hfi1_sge_state ssge;
-	struct hfi1_sge *sge;
+	struct rvt_sge_state ssge;
+	struct rvt_sge *sge;
 	struct ib_wc wc;
 	u32 length;
 	enum ib_qp_type sqptype, dqptype;
 
 	rcu_read_lock();
 
-	qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
+	qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
+			    swqe->ud_wr.remote_qpn);
 	if (!qp) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		rcu_read_unlock();
 		return;
 	}
@@ -93,12 +92,12 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 			IB_QPT_UD : qp->ibqp.qp_type;
 
 	if (dqptype != sqptype ||
-	    !(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK)) {
-		ibp->n_pkt_drops++;
+	    !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+		ibp->rvp.n_pkt_drops++;
 		goto drop;
 	}
 
-	ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
+	ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
 	ppd = ppd_from_ibp(ibp);
 
 	if (qp->ibqp.qp_num > 1) {
@@ -161,35 +160,36 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
-	if (qp->r_flags & HFI1_R_REUSE_SGE)
-		qp->r_flags &= ~HFI1_R_REUSE_SGE;
-	else {
+	if (qp->r_flags & RVT_R_REUSE_SGE) {
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
+	} else {
 		int ret;
 
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0) {
 			hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
 			goto bail_unlock;
 		}
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			goto bail_unlock;
 		}
 	}
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= HFI1_R_REUSE_SGE;
-		ibp->n_pkt_drops++;
+		qp->r_flags |= RVT_R_REUSE_SGE;
+		ibp->rvp.n_pkt_drops++;
 		goto bail_unlock;
 	}
 
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		hfi1_copy_sge(&qp->r_sge, &ah_attr->grh,
-			      sizeof(struct ib_grh), 1);
+			      sizeof(struct ib_grh), 1, 0);
 		wc.wc_flags |= IB_WC_GRH;
-	} else
+	} else {
 		hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+	}
 	ssge.sg_list = swqe->sg_list + 1;
 	ssge.sge = *swqe->sg_list;
 	ssge.num_sge = swqe->wr.num_sge;
@@ -202,7 +202,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 		if (len > sge->sge_length)
 			len = sge->sge_length;
 		WARN_ON_ONCE(len == 0);
-		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
+		hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1, 0);
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
@@ -210,7 +210,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 			if (--ssge.num_sge)
 				*sge = *ssge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -222,8 +222,8 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 		}
 		length -= len;
 	}
-	hfi1_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto bail_unlock;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
@@ -242,14 +242,14 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 	wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
 	/* Check for loopback when the port lid is not set */
 	if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI)
-		wc.slid = HFI1_PERMISSIVE_LID;
+		wc.slid = be16_to_cpu(IB_LID_PERMISSIVE);
 	wc.sl = ah_attr->sl;
 	wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		      swqe->wr.send_flags & IB_SEND_SOLICITED);
-	ibp->n_loop_pkts++;
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     swqe->wr.send_flags & IB_SEND_SOLICITED);
+	ibp->rvp.n_loop_pkts++;
 bail_unlock:
 	spin_unlock_irqrestore(&qp->r_lock, flags);
 drop:
@@ -260,47 +260,53 @@ drop:
  * hfi1_make_ud_req - construct a UD request packet
  * @qp: the QP
  *
+ * Assume s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  */
-int hfi1_make_ud_req(struct hfi1_qp *qp)
+int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_other_headers *ohdr;
 	struct ib_ah_attr *ah_attr;
 	struct hfi1_pportdata *ppd;
 	struct hfi1_ibport *ibp;
-	struct hfi1_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 nwords;
 	u32 extra_bytes;
 	u32 bth0;
 	u16 lrh0;
 	u16 lid;
-	int ret = 0;
 	int next_cur;
 	u8 sc5;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
+	ps->s_txreq = get_txreq(ps->dev, qp);
+	if (IS_ERR(ps->s_txreq))
+		goto bail_no_tx;
 
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_hfi1_state_ops[qp->state] & HFI1_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_iowait.sdma_busy)) {
-			qp->s_flags |= HFI1_S_WAIT_DMA;
+		if (iowait_sdma_pending(&priv->s_iowait)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
-		goto done;
+		goto done_free_tx;
 	}
 
-	if (qp->s_cur == qp->s_head)
+	/* see post_one_send() */
+	smp_read_barrier_depends();
+	if (qp->s_cur == ACCESS_ONCE(qp->s_head))
 		goto bail;
 
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	next_cur = qp->s_cur + 1;
 	if (next_cur >= qp->s_size)
 		next_cur = 0;
@@ -308,13 +314,15 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 	/* Construct the header. */
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ppd = ppd_from_ibp(ibp);
-	ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
-	if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE ||
-	    ah_attr->dlid == HFI1_PERMISSIVE_LID) {
+	ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+	if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+	    ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
 		lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
-		if (unlikely(!loopback && (lid == ppd->lid ||
-		    (lid == HFI1_PERMISSIVE_LID &&
-		     qp->ibqp.qp_type == IB_QPT_GSI)))) {
+		if (unlikely(!loopback &&
+			     (lid == ppd->lid ||
+			      (lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
+			      qp->ibqp.qp_type == IB_QPT_GSI)))) {
+			unsigned long flags;
 			/*
 			 * If DMAs are in progress, we can't generate
 			 * a completion for the loopback packet since
@@ -322,16 +330,17 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 			 * Instead of waiting, we could queue a
 			 * zero length descriptor so we get a callback.
 			 */
-			if (atomic_read(&qp->s_iowait.sdma_busy)) {
-				qp->s_flags |= HFI1_S_WAIT_DMA;
+			if (iowait_sdma_pending(&priv->s_iowait)) {
+				qp->s_flags |= RVT_S_WAIT_DMA;
 				goto bail;
 			}
 			qp->s_cur = next_cur;
+			local_irq_save(flags);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			ud_loopback(qp, wqe);
 			spin_lock_irqsave(&qp->s_lock, flags);
 			hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
-			goto done;
+			goto done_free_tx;
 		}
 	}
 
@@ -353,11 +362,12 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		/* Header size in 32-bit words. */
-		qp->s_hdrwords += hfi1_make_grh(ibp, &qp->s_hdr->ibh.u.l.grh,
-					       &ah_attr->grh,
-					       qp->s_hdrwords, nwords);
+		qp->s_hdrwords += hfi1_make_grh(ibp,
+						&ps->s_txreq->phdr.hdr.u.l.grh,
+						&ah_attr->grh,
+						qp->s_hdrwords, nwords);
 		lrh0 = HFI1_LRH_GRH;
-		ohdr = &qp->s_hdr->ibh.u.l.oth;
+		ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
 		/*
 		 * Don't worry about sending to locally attached multicast
 		 * QPs.  It is unspecified by the spec. what happens.
@@ -365,37 +375,42 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 	} else {
 		/* Header size in 32-bit words. */
 		lrh0 = HFI1_LRH_BTH;
-		ohdr = &qp->s_hdr->ibh.u.oth;
+		ohdr = &ps->s_txreq->phdr.hdr.u.oth;
 	}
 	if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 		qp->s_hdrwords++;
 		ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
 		bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
-	} else
+	} else {
 		bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
+	}
 	sc5 = ibp->sl_to_sc[ah_attr->sl];
 	lrh0 |= (ah_attr->sl & 0xf) << 4;
 	if (qp->ibqp.qp_type == IB_QPT_SMI) {
 		lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
-		qp->s_sc = 0xf;
+		priv->s_sc = 0xf;
 	} else {
 		lrh0 |= (sc5 & 0xf) << 12;
-		qp->s_sc = sc5;
+		priv->s_sc = sc5;
 	}
-	qp->s_sde = qp_to_sdma_engine(qp, qp->s_sc);
-	qp->s_hdr->ibh.lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->ibh.lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
-	qp->s_hdr->ibh.lrh[2] =
+	priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+	ps->s_txreq->sde = priv->s_sde;
+	priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
+	ps->s_txreq->psc = priv->s_sendcontext;
+	ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
+	ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);
+	ps->s_txreq->phdr.hdr.lrh[2] =
 		cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE))
-		qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE;
-	else {
+	if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+		ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
+	} else {
 		lid = ppd->lid;
 		if (lid) {
 			lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
-			qp->s_hdr->ibh.lrh[3] = cpu_to_be16(lid);
-		} else
-			qp->s_hdr->ibh.lrh[3] = IB_LID_PERMISSIVE;
+			ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid);
+		} else {
+			ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
+		}
 	}
 	if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 		bth0 |= IB_BTH_SOLICITED;
@@ -406,7 +421,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 		bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++));
+	ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn));
 	/*
 	 * Qkeys with the high order bit set mean use the
 	 * qkey from the QP context instead of the WR (see 10.2.5).
@@ -415,20 +430,28 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
 					 qp->qkey : wqe->ud_wr.remote_qkey);
 	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
 	/* disarm any ahg */
-	qp->s_hdr->ahgcount = 0;
-	qp->s_hdr->ahgidx = 0;
-	qp->s_hdr->tx_flags = 0;
-	qp->s_hdr->sde = NULL;
+	priv->s_hdr->ahgcount = 0;
+	priv->s_hdr->ahgidx = 0;
+	priv->s_hdr->tx_flags = 0;
+	priv->s_hdr->sde = NULL;
+	/* pbc */
+	ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+
+	return 1;
 
-done:
-	ret = 1;
-	goto unlock;
+done_free_tx:
+	hfi1_put_txreq(ps->s_txreq);
+	ps->s_txreq = NULL;
+	return 1;
 
 bail:
-	qp->s_flags &= ~HFI1_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
+	hfi1_put_txreq(ps->s_txreq);
+
+bail_no_tx:
+	ps->s_txreq = NULL;
+	qp->s_flags &= ~RVT_S_BUSY;
+	qp->s_hdrwords = 0;
+	return 0;
 }
 
 /*
@@ -476,7 +499,7 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey)
 	return -1;
 }
 
-void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
+void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
 		u32 pkey, u32 slid, u32 dlid, u8 sc5,
 		const struct ib_grh *old_grh)
 {
@@ -550,7 +573,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct hfi1_qp *qp, u32 remote_qpn,
  * opa_smp_check() returns 0 if all checks succeed, 1 otherwise.
  */
 static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
-			 struct hfi1_qp *qp, u16 slid, struct opa_smp *smp)
+			 struct rvt_qp *qp, u16 slid, struct opa_smp *smp)
 {
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
@@ -607,7 +630,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
 	case IB_MGMT_METHOD_TRAP:
 	case IB_MGMT_METHOD_GET_RESP:
 	case IB_MGMT_METHOD_REPORT_RESP:
-		if (ibp->port_cap_flags & IB_PORT_SM)
+		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return 0;
 		if (pkey == FULL_MGMT_P_KEY) {
 			smp->status |= IB_SMP_UNSUP_METHOD;
@@ -624,7 +647,6 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
 	return 0;
 }
 
-
 /**
  * hfi1_ud_rcv - receive an incoming UD packet
  * @ibp: the port the packet came in on
@@ -654,7 +676,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	bool has_grh = rcv_flags & HFI1_HAS_GRH;
 	bool sc4_bit = has_sc4_bit(packet);
 	u8 sc;
@@ -663,10 +685,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	struct ib_grh *grh = NULL;
 
 	qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK;
+	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
 	dlid = be16_to_cpu(hdr->lrh[1]);
-	is_mcast = (dlid > HFI1_MULTICAST_LID_BASE) &&
-			(dlid != HFI1_PERMISSIVE_LID);
+	is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+			(dlid != be16_to_cpu(IB_LID_PERMISSIVE));
 	bth1 = be32_to_cpu(ohdr->bth[1]);
 	if (unlikely(bth1 & HFI1_BECN_SMASK)) {
 		/*
@@ -674,7 +696,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		 * error path.
 		 */
 		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-		u32 lqpn =  be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK;
+		u32 lqpn =  be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 		u8 sl, sc5;
 
 		sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
@@ -750,7 +772,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 			mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
 			if (mgmt_pkey_idx < 0)
 				goto drop;
-
 		}
 		if (unlikely(qkey != qp->qkey)) {
 			hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
@@ -788,7 +809,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 		mgmt_pkey_idx = hfi1_lookup_pkey_idx(ibp, pkey);
 		if (mgmt_pkey_idx < 0)
 			goto drop;
-
 	}
 
 	if (qp->ibqp.qp_num > 1 &&
@@ -799,8 +819,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
 		wc.ex.imm_data = 0;
 		wc.wc_flags = 0;
-	} else
+	} else {
 		goto drop;
+	}
 
 	/*
 	 * A GRH is expected to precede the data even if not
@@ -811,36 +832,38 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 */
-	if (qp->r_flags & HFI1_R_REUSE_SGE)
-		qp->r_flags &= ~HFI1_R_REUSE_SGE;
-	else {
+	if (qp->r_flags & RVT_R_REUSE_SGE) {
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
+	} else {
 		int ret;
 
-		ret = hfi1_get_rwqe(qp, 0);
+		ret = hfi1_rvt_get_rwqe(qp, 0);
 		if (ret < 0) {
 			hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
 			return;
 		}
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			return;
 		}
 	}
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= HFI1_R_REUSE_SGE;
+		qp->r_flags |= RVT_R_REUSE_SGE;
 		goto drop;
 	}
 	if (has_grh) {
 		hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
-			      sizeof(struct ib_grh), 1);
+			      sizeof(struct ib_grh), 1, 0);
 		wc.wc_flags |= IB_WC_GRH;
-	} else
+	} else {
 		hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
-	hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
-	hfi1_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(HFI1_R_WRID_VALID, &qp->r_aflags))
+	}
+	hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+		      1, 0);
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		return;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
@@ -862,8 +885,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 			}
 		}
 		wc.pkey_index = (unsigned)mgmt_pkey_idx;
-	} else
+	} else {
 		wc.pkey_index = 0;
+	}
 
 	wc.slid = be16_to_cpu(hdr->lrh[3]);
 	sc = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf;
@@ -873,15 +897,15 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
 	/*
 	 * Save the LMC lower bits if the destination LID is a unicast LID.
 	 */
-	wc.dlid_path_bits = dlid >= HFI1_MULTICAST_LID_BASE ? 0 :
+	wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 :
 		dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
-	hfi1_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		      (ohdr->bth[0] &
-			cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     (ohdr->bth[0] &
+		      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 	return;
 
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.c b/drivers/staging/rdma/hfi1/user_exp_rcv.c
new file mode 100644
index 000000000000..0861e095df8d
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.c
@@ -0,0 +1,1044 @@
+/*
+ * Copyright(c) 2015, 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <asm/page.h>
+
+#include "user_exp_rcv.h"
+#include "trace.h"
+#include "mmu_rb.h"
+
+struct tid_group {
+	struct list_head list;
+	unsigned base;
+	u8 size;
+	u8 used;
+	u8 map;
+};
+
+struct tid_rb_node {
+	struct mmu_rb_node mmu;
+	unsigned long phys;
+	struct tid_group *grp;
+	u32 rcventry;
+	dma_addr_t dma_addr;
+	bool freed;
+	unsigned npages;
+	struct page *pages[0];
+};
+
+struct tid_pageset {
+	u16 idx;
+	u16 count;
+};
+
+#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
+
+#define num_user_pages(vaddr, len)				       \
+	(1 + (((((unsigned long)(vaddr) +			       \
+		 (unsigned long)(len) - 1) & PAGE_MASK) -	       \
+	       ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
+
+static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
+			    struct rb_root *);
+static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
+static int set_rcvarray_entry(struct file *, unsigned long, u32,
+			      struct tid_group *, struct page **, unsigned);
+static int mmu_rb_insert(struct rb_root *, struct mmu_rb_node *);
+static void mmu_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+static int mmu_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
+			    struct tid_pageset *, unsigned, u16, struct page **,
+			    u32 *, unsigned *, unsigned *);
+static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
+static void clear_tid_node(struct hfi1_filedata *, u16, struct tid_rb_node *);
+
+static struct mmu_rb_ops tid_rb_ops = {
+	.insert = mmu_rb_insert,
+	.remove = mmu_rb_remove,
+	.invalidate = mmu_rb_invalidate
+};
+
+static inline u32 rcventry2tidinfo(u32 rcventry)
+{
+	u32 pair = rcventry & ~0x1;
+
+	return EXP_TID_SET(IDX, pair >> 1) |
+		EXP_TID_SET(CTRL, 1 << (rcventry - pair));
+}
+
+static inline void exp_tid_group_init(struct exp_tid_set *set)
+{
+	INIT_LIST_HEAD(&set->list);
+	set->count = 0;
+}
+
+static inline void tid_group_remove(struct tid_group *grp,
+				    struct exp_tid_set *set)
+{
+	list_del_init(&grp->list);
+	set->count--;
+}
+
+static inline void tid_group_add_tail(struct tid_group *grp,
+				      struct exp_tid_set *set)
+{
+	list_add_tail(&grp->list, &set->list);
+	set->count++;
+}
+
+static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
+{
+	struct tid_group *grp =
+		list_first_entry(&set->list, struct tid_group, list);
+	list_del_init(&grp->list);
+	set->count--;
+	return grp;
+}
+
+static inline void tid_group_move(struct tid_group *group,
+				  struct exp_tid_set *s1,
+				  struct exp_tid_set *s2)
+{
+	tid_group_remove(group, s1);
+	tid_group_add_tail(group, s2);
+}
+
+/*
+ * Initialize context and file private data needed for Expected
+ * receive caching. This needs to be done after the context has
+ * been configured with the eager/expected RcvEntry counts.
+ */
+int hfi1_user_exp_rcv_init(struct file *fp)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	unsigned tidbase;
+	int i, ret = 0;
+
+	spin_lock_init(&fd->tid_lock);
+	spin_lock_init(&fd->invalid_lock);
+	fd->tid_rb_root = RB_ROOT;
+
+	if (!uctxt->subctxt_cnt || !fd->subctxt) {
+		exp_tid_group_init(&uctxt->tid_group_list);
+		exp_tid_group_init(&uctxt->tid_used_list);
+		exp_tid_group_init(&uctxt->tid_full_list);
+
+		tidbase = uctxt->expected_base;
+		for (i = 0; i < uctxt->expected_count /
+			     dd->rcv_entries.group_size; i++) {
+			struct tid_group *grp;
+
+			grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+			if (!grp) {
+				/*
+				 * If we fail here, the groups already
+				 * allocated will be freed by the close
+				 * call.
+				 */
+				ret = -ENOMEM;
+				goto done;
+			}
+			grp->size = dd->rcv_entries.group_size;
+			grp->base = tidbase;
+			tid_group_add_tail(grp, &uctxt->tid_group_list);
+			tidbase += dd->rcv_entries.group_size;
+		}
+	}
+
+	fd->entry_to_rb = kcalloc(uctxt->expected_count,
+				     sizeof(struct rb_node *),
+				     GFP_KERNEL);
+	if (!fd->entry_to_rb)
+		return -ENOMEM;
+
+	if (!HFI1_CAP_IS_USET(TID_UNMAP)) {
+		fd->invalid_tid_idx = 0;
+		fd->invalid_tids = kzalloc(uctxt->expected_count *
+					   sizeof(u32), GFP_KERNEL);
+		if (!fd->invalid_tids) {
+			ret = -ENOMEM;
+			goto done;
+		}
+
+		/*
+		 * Register MMU notifier callbacks. If the registration
+		 * fails, continue but turn off the TID caching for
+		 * all user contexts.
+		 */
+		ret = hfi1_mmu_rb_register(&fd->tid_rb_root, &tid_rb_ops);
+		if (ret) {
+			dd_dev_info(dd,
+				    "Failed MMU notifier registration %d\n",
+				    ret);
+			HFI1_CAP_USET(TID_UNMAP);
+			ret = 0;
+		}
+	}
+
+	/*
+	 * PSM does not have a good way to separate, count, and
+	 * effectively enforce a limit on RcvArray entries used by
+	 * subctxts (when context sharing is used) when TID caching
+	 * is enabled. To help with that, we calculate a per-process
+	 * RcvArray entry share and enforce that.
+	 * If TID caching is not in use, PSM deals with usage on its
+	 * own. In that case, we allow any subctxt to take all of the
+	 * entries.
+	 *
+	 * Make sure that we set the tid counts only after successful
+	 * init.
+	 */
+	spin_lock(&fd->tid_lock);
+	if (uctxt->subctxt_cnt && !HFI1_CAP_IS_USET(TID_UNMAP)) {
+		u16 remainder;
+
+		fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
+		remainder = uctxt->expected_count % uctxt->subctxt_cnt;
+		if (remainder && fd->subctxt < remainder)
+			fd->tid_limit++;
+	} else {
+		fd->tid_limit = uctxt->expected_count;
+	}
+	spin_unlock(&fd->tid_lock);
+done:
+	return ret;
+}
+
+int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
+{
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct tid_group *grp, *gptr;
+
+	/*
+	 * The notifier would have been removed when the process'es mm
+	 * was freed.
+	 */
+	if (!HFI1_CAP_IS_USET(TID_UNMAP))
+		hfi1_mmu_rb_unregister(&fd->tid_rb_root);
+
+	kfree(fd->invalid_tids);
+
+	if (!uctxt->cnt) {
+		if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+			unlock_exp_tids(uctxt, &uctxt->tid_full_list,
+					&fd->tid_rb_root);
+		if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+			unlock_exp_tids(uctxt, &uctxt->tid_used_list,
+					&fd->tid_rb_root);
+		list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
+					 list) {
+			list_del_init(&grp->list);
+			kfree(grp);
+		}
+		hfi1_clear_tids(uctxt);
+	}
+
+	kfree(fd->entry_to_rb);
+	return 0;
+}
+
+/*
+ * Write an "empty" RcvArray entry.
+ * This function exists so the TID registaration code can use it
+ * to write to unused/unneeded entries and still take advantage
+ * of the WC performance improvements. The HFI will ignore this
+ * write to the RcvArray entry.
+ */
+static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
+{
+	/*
+	 * Doing the WC fill writes only makes sense if the device is
+	 * present and the RcvArray has been mapped as WC memory.
+	 */
+	if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
+		writeq(0, dd->rcvarray_wc + (index * 8));
+}
+
+/*
+ * RcvArray entry allocation for Expected Receives is done by the
+ * following algorithm:
+ *
+ * The context keeps 3 lists of groups of RcvArray entries:
+ *   1. List of empty groups - tid_group_list
+ *      This list is created during user context creation and
+ *      contains elements which describe sets (of 8) of empty
+ *      RcvArray entries.
+ *   2. List of partially used groups - tid_used_list
+ *      This list contains sets of RcvArray entries which are
+ *      not completely used up. Another mapping request could
+ *      use some of all of the remaining entries.
+ *   3. List of full groups - tid_full_list
+ *      This is the list where sets that are completely used
+ *      up go.
+ *
+ * An attempt to optimize the usage of RcvArray entries is
+ * made by finding all sets of physically contiguous pages in a
+ * user's buffer.
+ * These physically contiguous sets are further split into
+ * sizes supported by the receive engine of the HFI. The
+ * resulting sets of pages are stored in struct tid_pageset,
+ * which describes the sets as:
+ *    * .count - number of pages in this set
+ *    * .idx - starting index into struct page ** array
+ *                    of this set
+ *
+ * From this point on, the algorithm deals with the page sets
+ * described above. The number of pagesets is divided by the
+ * RcvArray group size to produce the number of full groups
+ * needed.
+ *
+ * Groups from the 3 lists are manipulated using the following
+ * rules:
+ *   1. For each set of 8 pagesets, a complete group from
+ *      tid_group_list is taken, programmed, and moved to
+ *      the tid_full_list list.
+ *   2. For all remaining pagesets:
+ *      2.1 If the tid_used_list is empty and the tid_group_list
+ *          is empty, stop processing pageset and return only
+ *          what has been programmed up to this point.
+ *      2.2 If the tid_used_list is empty and the tid_group_list
+ *          is not empty, move a group from tid_group_list to
+ *          tid_used_list.
+ *      2.3 For each group is tid_used_group, program as much as
+ *          can fit into the group. If the group becomes fully
+ *          used, move it to tid_full_list.
+ */
+int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+	int ret = 0, need_group = 0, pinned;
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets,
+		tididx = 0, mapped, mapped_pages = 0;
+	unsigned long vaddr = tinfo->vaddr;
+	struct page **pages = NULL;
+	u32 *tidlist = NULL;
+	struct tid_pageset *pagesets = NULL;
+
+	/* Get the number of pages the user buffer spans */
+	npages = num_user_pages(vaddr, tinfo->length);
+	if (!npages)
+		return -EINVAL;
+
+	if (npages > uctxt->expected_count) {
+		dd_dev_err(dd, "Expected buffer too big\n");
+		return -EINVAL;
+	}
+
+	/* Verify that access is OK for the user buffer */
+	if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
+		       npages * PAGE_SIZE)) {
+		dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
+			   (void *)vaddr, npages);
+		return -EFAULT;
+	}
+
+	pagesets = kcalloc(uctxt->expected_count, sizeof(*pagesets),
+			   GFP_KERNEL);
+	if (!pagesets)
+		return -ENOMEM;
+
+	/* Allocate the array of struct page pointers needed for pinning */
+	pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+	if (!pages) {
+		ret = -ENOMEM;
+		goto bail;
+	}
+
+	/*
+	 * Pin all the pages of the user buffer. If we can't pin all the
+	 * pages, accept the amount pinned so far and program only that.
+	 * User space knows how to deal with partially programmed buffers.
+	 */
+	if (!hfi1_can_pin_pages(dd, fd->tid_n_pinned, npages))
+		return -ENOMEM;
+	pinned = hfi1_acquire_user_pages(vaddr, npages, true, pages);
+	if (pinned <= 0) {
+		ret = pinned;
+		goto bail;
+	}
+	fd->tid_n_pinned += npages;
+
+	/* Find sets of physically contiguous pages */
+	npagesets = find_phys_blocks(pages, pinned, pagesets);
+
+	/*
+	 * We don't need to access this under a lock since tid_used is per
+	 * process and the same process cannot be in hfi1_user_exp_rcv_clear()
+	 * and hfi1_user_exp_rcv_setup() at the same time.
+	 */
+	spin_lock(&fd->tid_lock);
+	if (fd->tid_used + npagesets > fd->tid_limit)
+		pageset_count = fd->tid_limit - fd->tid_used;
+	else
+		pageset_count = npagesets;
+	spin_unlock(&fd->tid_lock);
+
+	if (!pageset_count)
+		goto bail;
+
+	ngroups = pageset_count / dd->rcv_entries.group_size;
+	tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL);
+	if (!tidlist) {
+		ret = -ENOMEM;
+		goto nomem;
+	}
+
+	tididx = 0;
+
+	/*
+	 * From this point on, we are going to be using shared (between master
+	 * and subcontexts) context resources. We need to take the lock.
+	 */
+	mutex_lock(&uctxt->exp_lock);
+	/*
+	 * The first step is to program the RcvArray entries which are complete
+	 * groups.
+	 */
+	while (ngroups && uctxt->tid_group_list.count) {
+		struct tid_group *grp =
+			tid_group_pop(&uctxt->tid_group_list);
+
+		ret = program_rcvarray(fp, vaddr, grp, pagesets,
+				       pageidx, dd->rcv_entries.group_size,
+				       pages, tidlist, &tididx, &mapped);
+		/*
+		 * If there was a failure to program the RcvArray
+		 * entries for the entire group, reset the grp fields
+		 * and add the grp back to the free group list.
+		 */
+		if (ret <= 0) {
+			tid_group_add_tail(grp, &uctxt->tid_group_list);
+			hfi1_cdbg(TID,
+				  "Failed to program RcvArray group %d", ret);
+			goto unlock;
+		}
+
+		tid_group_add_tail(grp, &uctxt->tid_full_list);
+		ngroups--;
+		pageidx += ret;
+		mapped_pages += mapped;
+	}
+
+	while (pageidx < pageset_count) {
+		struct tid_group *grp, *ptr;
+		/*
+		 * If we don't have any partially used tid groups, check
+		 * if we have empty groups. If so, take one from there and
+		 * put in the partially used list.
+		 */
+		if (!uctxt->tid_used_list.count || need_group) {
+			if (!uctxt->tid_group_list.count)
+				goto unlock;
+
+			grp = tid_group_pop(&uctxt->tid_group_list);
+			tid_group_add_tail(grp, &uctxt->tid_used_list);
+			need_group = 0;
+		}
+		/*
+		 * There is an optimization opportunity here - instead of
+		 * fitting as many page sets as we can, check for a group
+		 * later on in the list that could fit all of them.
+		 */
+		list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list,
+					 list) {
+			unsigned use = min_t(unsigned, pageset_count - pageidx,
+					     grp->size - grp->used);
+
+			ret = program_rcvarray(fp, vaddr, grp, pagesets,
+					       pageidx, use, pages, tidlist,
+					       &tididx, &mapped);
+			if (ret < 0) {
+				hfi1_cdbg(TID,
+					  "Failed to program RcvArray entries %d",
+					  ret);
+				ret = -EFAULT;
+				goto unlock;
+			} else if (ret > 0) {
+				if (grp->used == grp->size)
+					tid_group_move(grp,
+						       &uctxt->tid_used_list,
+						       &uctxt->tid_full_list);
+				pageidx += ret;
+				mapped_pages += mapped;
+				need_group = 0;
+				/* Check if we are done so we break out early */
+				if (pageidx >= pageset_count)
+					break;
+			} else if (WARN_ON(ret == 0)) {
+				/*
+				 * If ret is 0, we did not program any entries
+				 * into this group, which can only happen if
+				 * we've screwed up the accounting somewhere.
+				 * Warn and try to continue.
+				 */
+				need_group = 1;
+			}
+		}
+	}
+unlock:
+	mutex_unlock(&uctxt->exp_lock);
+nomem:
+	hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
+		  mapped_pages, ret);
+	if (tididx) {
+		spin_lock(&fd->tid_lock);
+		fd->tid_used += tididx;
+		spin_unlock(&fd->tid_lock);
+		tinfo->tidcnt = tididx;
+		tinfo->length = mapped_pages * PAGE_SIZE;
+
+		if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
+				 tidlist, sizeof(tidlist[0]) * tididx)) {
+			/*
+			 * On failure to copy to the user level, we need to undo
+			 * everything done so far so we don't leak resources.
+			 */
+			tinfo->tidlist = (unsigned long)&tidlist;
+			hfi1_user_exp_rcv_clear(fp, tinfo);
+			tinfo->tidlist = 0;
+			ret = -EFAULT;
+			goto bail;
+		}
+	}
+
+	/*
+	 * If not everything was mapped (due to insufficient RcvArray entries,
+	 * for example), unpin all unmapped pages so we can pin them nex time.
+	 */
+	if (mapped_pages != pinned) {
+		hfi1_release_user_pages(current->mm, &pages[mapped_pages],
+					pinned - mapped_pages,
+					false);
+		fd->tid_n_pinned -= pinned - mapped_pages;
+	}
+bail:
+	kfree(pagesets);
+	kfree(pages);
+	kfree(tidlist);
+	return ret > 0 ? 0 : ret;
+}
+
+int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+	int ret = 0;
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	u32 *tidinfo;
+	unsigned tididx;
+
+	tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL);
+	if (!tidinfo)
+		return -ENOMEM;
+
+	if (copy_from_user(tidinfo, (void __user *)(unsigned long)
+			   tinfo->tidlist, sizeof(tidinfo[0]) *
+			   tinfo->tidcnt)) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+	mutex_lock(&uctxt->exp_lock);
+	for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
+		ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL);
+		if (ret) {
+			hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
+				  ret);
+			break;
+		}
+	}
+	spin_lock(&fd->tid_lock);
+	fd->tid_used -= tididx;
+	spin_unlock(&fd->tid_lock);
+	tinfo->tidcnt = tididx;
+	mutex_unlock(&uctxt->exp_lock);
+done:
+	kfree(tidinfo);
+	return ret;
+}
+
+int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	unsigned long *ev = uctxt->dd->events +
+		(((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+		  HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
+	u32 *array;
+	int ret = 0;
+
+	if (!fd->invalid_tids)
+		return -EINVAL;
+
+	/*
+	 * copy_to_user() can sleep, which will leave the invalid_lock
+	 * locked and cause the MMU notifier to be blocked on the lock
+	 * for a long time.
+	 * Copy the data to a local buffer so we can release the lock.
+	 */
+	array = kcalloc(uctxt->expected_count, sizeof(*array), GFP_KERNEL);
+	if (!array)
+		return -EFAULT;
+
+	spin_lock(&fd->invalid_lock);
+	if (fd->invalid_tid_idx) {
+		memcpy(array, fd->invalid_tids, sizeof(*array) *
+		       fd->invalid_tid_idx);
+		memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) *
+		       fd->invalid_tid_idx);
+		tinfo->tidcnt = fd->invalid_tid_idx;
+		fd->invalid_tid_idx = 0;
+		/*
+		 * Reset the user flag while still holding the lock.
+		 * Otherwise, PSM can miss events.
+		 */
+		clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
+	} else {
+		tinfo->tidcnt = 0;
+	}
+	spin_unlock(&fd->invalid_lock);
+
+	if (tinfo->tidcnt) {
+		if (copy_to_user((void __user *)tinfo->tidlist,
+				 array, sizeof(*array) * tinfo->tidcnt))
+			ret = -EFAULT;
+	}
+	kfree(array);
+
+	return ret;
+}
+
+static u32 find_phys_blocks(struct page **pages, unsigned npages,
+			    struct tid_pageset *list)
+{
+	unsigned pagecount, pageidx, setcount = 0, i;
+	unsigned long pfn, this_pfn;
+
+	if (!npages)
+		return 0;
+
+	/*
+	 * Look for sets of physically contiguous pages in the user buffer.
+	 * This will allow us to optimize Expected RcvArray entry usage by
+	 * using the bigger supported sizes.
+	 */
+	pfn = page_to_pfn(pages[0]);
+	for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
+		this_pfn = i < npages ? page_to_pfn(pages[i]) : 0;
+
+		/*
+		 * If the pfn's are not sequential, pages are not physically
+		 * contiguous.
+		 */
+		if (this_pfn != ++pfn) {
+			/*
+			 * At this point we have to loop over the set of
+			 * physically contiguous pages and break them down it
+			 * sizes supported by the HW.
+			 * There are two main constraints:
+			 *     1. The max buffer size is MAX_EXPECTED_BUFFER.
+			 *        If the total set size is bigger than that
+			 *        program only a MAX_EXPECTED_BUFFER chunk.
+			 *     2. The buffer size has to be a power of two. If
+			 *        it is not, round down to the closes power of
+			 *        2 and program that size.
+			 */
+			while (pagecount) {
+				int maxpages = pagecount;
+				u32 bufsize = pagecount * PAGE_SIZE;
+
+				if (bufsize > MAX_EXPECTED_BUFFER)
+					maxpages =
+						MAX_EXPECTED_BUFFER >>
+						PAGE_SHIFT;
+				else if (!is_power_of_2(bufsize))
+					maxpages =
+						rounddown_pow_of_two(bufsize) >>
+						PAGE_SHIFT;
+
+				list[setcount].idx = pageidx;
+				list[setcount].count = maxpages;
+				pagecount -= maxpages;
+				pageidx += maxpages;
+				setcount++;
+			}
+			pageidx = i;
+			pagecount = 1;
+			pfn = this_pfn;
+		} else {
+			pagecount++;
+		}
+	}
+	return setcount;
+}
+
+/**
+ * program_rcvarray() - program an RcvArray group with receive buffers
+ * @fp: file pointer
+ * @vaddr: starting user virtual address
+ * @grp: RcvArray group
+ * @sets: array of struct tid_pageset holding information on physically
+ *        contiguous chunks from the user buffer
+ * @start: starting index into sets array
+ * @count: number of struct tid_pageset's to program
+ * @pages: an array of struct page * for the user buffer
+ * @tidlist: the array of u32 elements when the information about the
+ *           programmed RcvArray entries is to be encoded.
+ * @tididx: starting offset into tidlist
+ * @pmapped: (output parameter) number of pages programmed into the RcvArray
+ *           entries.
+ *
+ * This function will program up to 'count' number of RcvArray entries from the
+ * group 'grp'. To make best use of write-combining writes, the function will
+ * perform writes to the unused RcvArray entries which will be ignored by the
+ * HW. Each RcvArray entry will be programmed with a physically contiguous
+ * buffer chunk from the user's virtual buffer.
+ *
+ * Return:
+ * -EINVAL if the requested count is larger than the size of the group,
+ * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or
+ * number of RcvArray entries programmed.
+ */
+static int program_rcvarray(struct file *fp, unsigned long vaddr,
+			    struct tid_group *grp,
+			    struct tid_pageset *sets,
+			    unsigned start, u16 count, struct page **pages,
+			    u32 *tidlist, unsigned *tididx, unsigned *pmapped)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	u16 idx;
+	u32 tidinfo = 0, rcventry, useidx = 0;
+	int mapped = 0;
+
+	/* Count should never be larger than the group size */
+	if (count > grp->size)
+		return -EINVAL;
+
+	/* Find the first unused entry in the group */
+	for (idx = 0; idx < grp->size; idx++) {
+		if (!(grp->map & (1 << idx))) {
+			useidx = idx;
+			break;
+		}
+		rcv_array_wc_fill(dd, grp->base + idx);
+	}
+
+	idx = 0;
+	while (idx < count) {
+		u16 npages, pageidx, setidx = start + idx;
+		int ret = 0;
+
+		/*
+		 * If this entry in the group is used, move to the next one.
+		 * If we go past the end of the group, exit the loop.
+		 */
+		if (useidx >= grp->size) {
+			break;
+		} else if (grp->map & (1 << useidx)) {
+			rcv_array_wc_fill(dd, grp->base + useidx);
+			useidx++;
+			continue;
+		}
+
+		rcventry = grp->base + useidx;
+		npages = sets[setidx].count;
+		pageidx = sets[setidx].idx;
+
+		ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE),
+					 rcventry, grp, pages + pageidx,
+					 npages);
+		if (ret)
+			return ret;
+		mapped += npages;
+
+		tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) |
+			EXP_TID_SET(LEN, npages);
+		tidlist[(*tididx)++] = tidinfo;
+		grp->used++;
+		grp->map |= 1 << useidx++;
+		idx++;
+	}
+
+	/* Fill the rest of the group with "blank" writes */
+	for (; useidx < grp->size; useidx++)
+		rcv_array_wc_fill(dd, grp->base + useidx);
+	*pmapped = mapped;
+	return idx;
+}
+
+static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
+			      u32 rcventry, struct tid_group *grp,
+			      struct page **pages, unsigned npages)
+{
+	int ret;
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct tid_rb_node *node;
+	struct hfi1_devdata *dd = uctxt->dd;
+	struct rb_root *root = &fd->tid_rb_root;
+	dma_addr_t phys;
+
+	/*
+	 * Allocate the node first so we can handle a potential
+	 * failure before we've programmed anything.
+	 */
+	node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages),
+		       GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	phys = pci_map_single(dd->pcidev,
+			      __va(page_to_phys(pages[0])),
+			      npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
+	if (dma_mapping_error(&dd->pcidev->dev, phys)) {
+		dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n",
+			   phys);
+		kfree(node);
+		return -EFAULT;
+	}
+
+	node->mmu.addr = vaddr;
+	node->mmu.len = npages * PAGE_SIZE;
+	node->phys = page_to_phys(pages[0]);
+	node->npages = npages;
+	node->rcventry = rcventry;
+	node->dma_addr = phys;
+	node->grp = grp;
+	node->freed = false;
+	memcpy(node->pages, pages, sizeof(struct page *) * npages);
+
+	if (HFI1_CAP_IS_USET(TID_UNMAP))
+		ret = mmu_rb_insert(root, &node->mmu);
+	else
+		ret = hfi1_mmu_rb_insert(root, &node->mmu);
+
+	if (ret) {
+		hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
+			  node->rcventry, node->mmu.addr, node->phys, ret);
+		pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
+				 PCI_DMA_FROMDEVICE);
+		kfree(node);
+		return -EFAULT;
+	}
+	hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
+	trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
+			       node->mmu.addr, node->phys, phys);
+	return 0;
+}
+
+static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
+			      struct tid_group **grp)
+{
+	struct hfi1_filedata *fd = fp->private_data;
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+	struct tid_rb_node *node;
+	u8 tidctrl = EXP_TID_GET(tidinfo, CTRL);
+	u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry;
+
+	if (tididx >= uctxt->expected_count) {
+		dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n",
+			   tididx, uctxt->ctxt);
+		return -EINVAL;
+	}
+
+	if (tidctrl == 0x3)
+		return -EINVAL;
+
+	rcventry = tididx + (tidctrl - 1);
+
+	node = fd->entry_to_rb[rcventry];
+	if (!node || node->rcventry != (uctxt->expected_base + rcventry))
+		return -EBADF;
+	if (HFI1_CAP_IS_USET(TID_UNMAP))
+		mmu_rb_remove(&fd->tid_rb_root, &node->mmu, false);
+	else
+		hfi1_mmu_rb_remove(&fd->tid_rb_root, &node->mmu);
+
+	if (grp)
+		*grp = node->grp;
+	clear_tid_node(fd, fd->subctxt, node);
+	return 0;
+}
+
+static void clear_tid_node(struct hfi1_filedata *fd, u16 subctxt,
+			   struct tid_rb_node *node)
+{
+	struct hfi1_ctxtdata *uctxt = fd->uctxt;
+	struct hfi1_devdata *dd = uctxt->dd;
+
+	trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
+				 node->npages, node->mmu.addr, node->phys,
+				 node->dma_addr);
+
+	hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
+	/*
+	 * Make sure device has seen the write before we unpin the
+	 * pages.
+	 */
+	flush_wc();
+
+	pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
+			 PCI_DMA_FROMDEVICE);
+	hfi1_release_user_pages(current->mm, node->pages, node->npages, true);
+	fd->tid_n_pinned -= node->npages;
+
+	node->grp->used--;
+	node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
+
+	if (node->grp->used == node->grp->size - 1)
+		tid_group_move(node->grp, &uctxt->tid_full_list,
+			       &uctxt->tid_used_list);
+	else if (!node->grp->used)
+		tid_group_move(node->grp, &uctxt->tid_used_list,
+			       &uctxt->tid_group_list);
+	kfree(node);
+}
+
+static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
+			    struct exp_tid_set *set, struct rb_root *root)
+{
+	struct tid_group *grp, *ptr;
+	struct hfi1_filedata *fd = container_of(root, struct hfi1_filedata,
+						tid_rb_root);
+	int i;
+
+	list_for_each_entry_safe(grp, ptr, &set->list, list) {
+		list_del_init(&grp->list);
+
+		for (i = 0; i < grp->size; i++) {
+			if (grp->map & (1 << i)) {
+				u16 rcventry = grp->base + i;
+				struct tid_rb_node *node;
+
+				node = fd->entry_to_rb[rcventry -
+							  uctxt->expected_base];
+				if (!node || node->rcventry != rcventry)
+					continue;
+				if (HFI1_CAP_IS_USET(TID_UNMAP))
+					mmu_rb_remove(&fd->tid_rb_root,
+						      &node->mmu, false);
+				else
+					hfi1_mmu_rb_remove(&fd->tid_rb_root,
+							   &node->mmu);
+				clear_tid_node(fd, -1, node);
+			}
+		}
+	}
+}
+
+static int mmu_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct hfi1_filedata *fdata =
+		container_of(root, struct hfi1_filedata, tid_rb_root);
+	struct hfi1_ctxtdata *uctxt = fdata->uctxt;
+	struct tid_rb_node *node =
+		container_of(mnode, struct tid_rb_node, mmu);
+
+	if (node->freed)
+		return 0;
+
+	trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+				 node->rcventry, node->npages, node->dma_addr);
+	node->freed = true;
+
+	spin_lock(&fdata->invalid_lock);
+	if (fdata->invalid_tid_idx < uctxt->expected_count) {
+		fdata->invalid_tids[fdata->invalid_tid_idx] =
+			rcventry2tidinfo(node->rcventry - uctxt->expected_base);
+		fdata->invalid_tids[fdata->invalid_tid_idx] |=
+			EXP_TID_SET(LEN, node->npages);
+		if (!fdata->invalid_tid_idx) {
+			unsigned long *ev;
+
+			/*
+			 * hfi1_set_uevent_bits() sets a user event flag
+			 * for all processes. Because calling into the
+			 * driver to process TID cache invalidations is
+			 * expensive and TID cache invalidations are
+			 * handled on a per-process basis, we can
+			 * optimize this to set the flag only for the
+			 * process in question.
+			 */
+			ev = uctxt->dd->events +
+				(((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
+				  HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+			set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
+		}
+		fdata->invalid_tid_idx++;
+	}
+	spin_unlock(&fdata->invalid_lock);
+	return 0;
+}
+
+static int mmu_rb_insert(struct rb_root *root, struct mmu_rb_node *node)
+{
+	struct hfi1_filedata *fdata =
+		container_of(root, struct hfi1_filedata, tid_rb_root);
+	struct tid_rb_node *tnode =
+		container_of(node, struct tid_rb_node, mmu);
+	u32 base = fdata->uctxt->expected_base;
+
+	fdata->entry_to_rb[tnode->rcventry - base] = tnode;
+	return 0;
+}
+
+static void mmu_rb_remove(struct rb_root *root, struct mmu_rb_node *node,
+			  bool notifier)
+{
+	struct hfi1_filedata *fdata =
+		container_of(root, struct hfi1_filedata, tid_rb_root);
+	struct tid_rb_node *tnode =
+		container_of(node, struct tid_rb_node, mmu);
+	u32 base = fdata->uctxt->expected_base;
+
+	fdata->entry_to_rb[tnode->rcventry - base] = NULL;
+}
diff --git a/drivers/staging/rdma/hfi1/user_exp_rcv.h b/drivers/staging/rdma/hfi1/user_exp_rcv.h
index 4f4876e1d353..9bc8d9fba87e 100644
--- a/drivers/staging/rdma/hfi1/user_exp_rcv.h
+++ b/drivers/staging/rdma/hfi1/user_exp_rcv.h
@@ -1,14 +1,13 @@
 #ifndef _HFI1_USER_EXP_RCV_H
 #define _HFI1_USER_EXP_RCV_H
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -20,8 +19,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -50,6 +47,8 @@
  *
  */
 
+#include "hfi.h"
+
 #define EXP_TID_TIDLEN_MASK   0x7FFULL
 #define EXP_TID_TIDLEN_SHIFT  0
 #define EXP_TID_TIDCTRL_MASK  0x3ULL
@@ -71,4 +70,10 @@
 		(tid) |= EXP_TID_SET(field, (value));			\
 	} while (0)
 
+int hfi1_user_exp_rcv_init(struct file *);
+int hfi1_user_exp_rcv_free(struct hfi1_filedata *);
+int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *);
+int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *);
+int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *);
+
 #endif /* _HFI1_USER_EXP_RCV_H */
diff --git a/drivers/staging/rdma/hfi1/user_pages.c b/drivers/staging/rdma/hfi1/user_pages.c
index 8ebfe9ee0d76..88e10b5f55f1 100644
--- a/drivers/staging/rdma/hfi1/user_pages.c
+++ b/drivers/staging/rdma/hfi1/user_pages.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -51,32 +48,62 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/device.h>
+#include <linux/module.h>
 
 #include "hfi.h"
 
-/**
- * hfi1_map_page - a safety wrapper around pci_map_page()
+static unsigned long cache_size = 256;
+module_param(cache_size, ulong, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)");
+
+/*
+ * Determine whether the caller can pin pages.
+ *
+ * This function should be used in the implementation of buffer caches.
+ * The cache implementation should call this function prior to attempting
+ * to pin buffer pages in order to determine whether they should do so.
+ * The function computes cache limits based on the configured ulimit and
+ * cache size. Use of this function is especially important for caches
+ * which are not limited in any other way (e.g. by HW resources) and, thus,
+ * could keeping caching buffers.
  *
  */
-dma_addr_t hfi1_map_page(struct pci_dev *hwdev, struct page *page,
-			 unsigned long offset, size_t size, int direction)
+bool hfi1_can_pin_pages(struct hfi1_devdata *dd, u32 nlocked, u32 npages)
 {
-	return pci_map_page(hwdev, page, offset, size, direction);
-}
-
-int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
-			    struct page **pages)
-{
-	unsigned long pinned, lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
+		size = (cache_size * (1UL << 20)); /* convert to bytes */
+	unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt;
 	bool can_lock = capable(CAP_IPC_LOCK);
-	int ret;
+
+	/*
+	 * Calculate per-cache size. The calculation below uses only a quarter
+	 * of the available per-context limit. This leaves space for other
+	 * pinning. Should we worry about shared ctxts?
+	 */
+	cache_limit = (ulimit / usr_ctxts) / 4;
+
+	/* If ulimit isn't set to "unlimited" and is smaller than cache_size. */
+	if (ulimit != (-1UL) && size > cache_limit)
+		size = cache_limit;
+
+	/* Convert to number of pages */
+	size = DIV_ROUND_UP(size, PAGE_SIZE);
 
 	down_read(&current->mm->mmap_sem);
 	pinned = current->mm->pinned_vm;
 	up_read(&current->mm->mmap_sem);
 
-	if (pinned + npages > lock_limit && !can_lock)
-		return -ENOMEM;
+	/* First, check the absolute limit against all pinned pages. */
+	if (pinned + npages >= ulimit && !can_lock)
+		return false;
+
+	return ((nlocked + npages) <= size) || can_lock;
+}
+
+int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
+			    struct page **pages)
+{
+	int ret;
 
 	ret = get_user_pages_fast(vaddr, npages, writable, pages);
 	if (ret < 0)
@@ -89,7 +116,8 @@ int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
 	return ret;
 }
 
-void hfi1_release_user_pages(struct page **p, size_t npages, bool dirty)
+void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
+			     size_t npages, bool dirty)
 {
 	size_t i;
 
@@ -99,9 +127,9 @@ void hfi1_release_user_pages(struct page **p, size_t npages, bool dirty)
 		put_page(p[i]);
 	}
 
-	if (current->mm) { /* during close after signal, mm can be NULL */
-		down_write(&current->mm->mmap_sem);
-		current->mm->pinned_vm -= npages;
-		up_write(&current->mm->mmap_sem);
+	if (mm) { /* during close after signal, mm can be NULL */
+		down_write(&mm->mmap_sem);
+		mm->pinned_vm -= npages;
+		up_write(&mm->mmap_sem);
 	}
 }
diff --git a/drivers/staging/rdma/hfi1/user_sdma.c b/drivers/staging/rdma/hfi1/user_sdma.c
index 9d4f5d6aaf33..ab6b6a42000f 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.c
+++ b/drivers/staging/rdma/hfi1/user_sdma.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -70,6 +67,7 @@
 #include "verbs.h"  /* for the headers */
 #include "common.h" /* for struct hfi1_tid_info */
 #include "trace.h"
+#include "mmu_rb.h"
 
 static uint hfi1_sdma_comp_ring_size = 128;
 module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
@@ -146,7 +144,6 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
 
 /* Last packet in the request */
 #define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
-#define TXREQ_FLAGS_IOVEC_LAST_PKT BIT(0)
 
 #define SDMA_REQ_IN_USE     0
 #define SDMA_REQ_FOR_THREAD 1
@@ -170,16 +167,28 @@ static unsigned initial_pkt_count = 8;
 #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
 
 struct user_sdma_iovec {
+	struct list_head list;
 	struct iovec iov;
 	/* number of pages in this vector */
 	unsigned npages;
 	/* array of pinned pages for this vector */
 	struct page **pages;
-	/* offset into the virtual address space of the vector at
-	 * which we last left off. */
+	/*
+	 * offset into the virtual address space of the vector at
+	 * which we last left off.
+	 */
 	u64 offset;
 };
 
+struct sdma_mmu_node {
+	struct mmu_rb_node rb;
+	struct list_head list;
+	struct hfi1_user_sdma_pkt_q *pq;
+	atomic_t refcount;
+	struct page **pages;
+	unsigned npages;
+};
+
 struct user_sdma_request {
 	struct sdma_req_info info;
 	struct hfi1_user_sdma_pkt_q *pq;
@@ -213,15 +222,6 @@ struct user_sdma_request {
 	 */
 	u8 omfactor;
 	/*
-	 * pointer to the user's mm_struct. We are going to
-	 * get a reference to it so it doesn't get freed
-	 * since we might not be in process context when we
-	 * are processing the iov's.
-	 * Using this mm_struct, we can get vma based on the
-	 * iov's address (find_vma()).
-	 */
-	struct mm_struct *user_mm;
-	/*
 	 * We copy the iovs for this request (based on
 	 * info.iovcnt). These are only the data vectors
 	 */
@@ -238,13 +238,12 @@ struct user_sdma_request {
 	u16 tididx;
 	u32 sent;
 	u64 seqnum;
+	u64 seqcomp;
+	u64 seqsubmitted;
 	struct list_head txps;
-	spinlock_t txcmp_lock;  /* protect txcmp list */
-	struct list_head txcmp;
 	unsigned long flags;
 	/* status of the last txreq completed */
 	int status;
-	struct work_struct worker;
 };
 
 /*
@@ -259,11 +258,6 @@ struct user_sdma_txreq {
 	struct sdma_txreq txreq;
 	struct list_head list;
 	struct user_sdma_request *req;
-	struct {
-		struct user_sdma_iovec *vec;
-		u8 flags;
-	} iovecs[3];
-	int idx;
 	u16 flags;
 	unsigned busycount;
 	u64 seqnum;
@@ -279,21 +273,21 @@ struct user_sdma_txreq {
 
 static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
 static int num_user_pages(const struct iovec *);
-static void user_sdma_txreq_cb(struct sdma_txreq *, int, int);
-static void user_sdma_delayed_completion(struct work_struct *);
-static void user_sdma_free_request(struct user_sdma_request *);
+static void user_sdma_txreq_cb(struct sdma_txreq *, int);
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
+static void user_sdma_free_request(struct user_sdma_request *, bool);
 static int pin_vector_pages(struct user_sdma_request *,
 			    struct user_sdma_iovec *);
-static void unpin_vector_pages(struct user_sdma_request *,
-			       struct user_sdma_iovec *);
+static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned);
 static int check_header_template(struct user_sdma_request *,
 				 struct hfi1_pkt_header *, u32, u32);
 static int set_txreq_header(struct user_sdma_request *,
 			    struct user_sdma_txreq *, u32);
 static int set_txreq_header_ahg(struct user_sdma_request *,
 				struct user_sdma_txreq *, u32);
-static inline void set_comp_state(struct user_sdma_request *,
-					enum hfi1_sdma_comp_state, int);
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
+				  struct hfi1_user_sdma_comp_q *,
+				  u16, enum hfi1_sdma_comp_state, int);
 static inline u32 set_pkt_bth_psn(__be32, u8, u32);
 static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
 
@@ -303,6 +297,17 @@ static int defer_packet_queue(
 	struct sdma_txreq *,
 	unsigned seq);
 static void activate_packet_queue(struct iowait *, int);
+static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
+static int sdma_rb_insert(struct rb_root *, struct mmu_rb_node *);
+static void sdma_rb_remove(struct rb_root *, struct mmu_rb_node *, bool);
+static int sdma_rb_invalidate(struct rb_root *, struct mmu_rb_node *);
+
+static struct mmu_rb_ops sdma_rb_ops = {
+	.filter = sdma_rb_filter,
+	.insert = sdma_rb_insert,
+	.remove = sdma_rb_remove,
+	.invalidate = sdma_rb_invalidate
+};
 
 static int defer_packet_queue(
 	struct sdma_engine *sde,
@@ -380,7 +385,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
 		goto pq_nomem;
 
 	memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
-	pq->reqs = kmalloc(memsize, GFP_KERNEL);
+	pq->reqs = kzalloc(memsize, GFP_KERNEL);
 	if (!pq->reqs)
 		goto pq_reqs_nomem;
 
@@ -392,9 +397,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
 	pq->state = SDMA_PKT_Q_INACTIVE;
 	atomic_set(&pq->n_reqs, 0);
 	init_waitqueue_head(&pq->wait);
+	pq->sdma_rb_root = RB_ROOT;
+	INIT_LIST_HEAD(&pq->evict);
+	spin_lock_init(&pq->evict_lock);
 
 	iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
-		    activate_packet_queue);
+		    activate_packet_queue, NULL);
 	pq->reqidx = 0;
 	snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
 		 fd->subctxt);
@@ -421,6 +429,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
 	cq->nentries = hfi1_sdma_comp_ring_size;
 	fd->cq = cq;
 
+	ret = hfi1_mmu_rb_register(&pq->sdma_rb_root, &sdma_rb_ops);
+	if (ret) {
+		dd_dev_err(dd, "Failed to register with MMU %d", ret);
+		goto done;
+	}
+
 	spin_lock_irqsave(&uctxt->sdma_qlock, flags);
 	list_add(&pq->list, &uctxt->sdma_queues);
 	spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
@@ -450,6 +464,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
 	hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
 		  uctxt->ctxt, fd->subctxt);
 	pq = fd->pq;
+	hfi1_mmu_rb_unregister(&pq->sdma_rb_root);
 	if (pq) {
 		spin_lock_irqsave(&uctxt->sdma_qlock, flags);
 		if (!list_empty(&pq->list))
@@ -476,7 +491,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
 int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 				   unsigned long dim, unsigned long *count)
 {
-	int ret = 0, i = 0, sent;
+	int ret = 0, i = 0;
 	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct hfi1_user_sdma_pkt_q *pq = fd->pq;
@@ -502,9 +517,11 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 			  dd->unit, uctxt->ctxt, fd->subctxt, ret);
 		return -EFAULT;
 	}
+
 	trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
 				     (u16 *)&info);
-	if (cq->comps[info.comp_idx].status == QUEUED) {
+	if (cq->comps[info.comp_idx].status == QUEUED ||
+	    test_bit(SDMA_REQ_IN_USE, &pq->reqs[info.comp_idx].flags)) {
 		hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in QUEUED state",
 			  dd->unit, uctxt->ctxt, fd->subctxt,
 			  info.comp_idx);
@@ -531,10 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	req->cq = cq;
 	req->status = -1;
 	INIT_LIST_HEAD(&req->txps);
-	INIT_LIST_HEAD(&req->txcmp);
-	INIT_WORK(&req->worker, user_sdma_delayed_completion);
 
-	spin_lock_init(&req->txcmp_lock);
 	memcpy(&req->info, &info, sizeof(info));
 
 	if (req_opcode(info.ctrl) == EXPECTED)
@@ -593,8 +607,10 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 	}
 
 	req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]);
-	/* Calculate the initial TID offset based on the values of
-	   KDETH.OFFSET and KDETH.OM that are passed in. */
+	/*
+	 * Calculate the initial TID offset based on the values of
+	 * KDETH.OFFSET and KDETH.OM that are passed in.
+	 */
 	req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
 		(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
 		 KDETH_OM_LARGE : KDETH_OM_SMALL);
@@ -603,8 +619,13 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 
 	/* Save all the IO vector structures */
 	while (i < req->data_iovs) {
+		INIT_LIST_HEAD(&req->iovs[i].list);
 		memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
-		req->iovs[i].offset = 0;
+		ret = pin_vector_pages(req, &req->iovs[i]);
+		if (ret) {
+			req->status = ret;
+			goto free_req;
+		}
 		req->data_len += req->iovs[i++].iov.iov_len;
 	}
 	SDMA_DBG(req, "total data length %u", req->data_len);
@@ -668,52 +689,59 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
 		}
 	}
 
-	set_comp_state(req, QUEUED, 0);
+	set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
+	atomic_inc(&pq->n_reqs);
 	/* Send the first N packets in the request to buy us some time */
-	sent = user_sdma_send_pkts(req, pcount);
-	if (unlikely(sent < 0)) {
-		if (sent != -EBUSY) {
-			req->status = sent;
-			set_comp_state(req, ERROR, req->status);
-			return sent;
-		} else
-			sent = 0;
+	ret = user_sdma_send_pkts(req, pcount);
+	if (unlikely(ret < 0 && ret != -EBUSY)) {
+		req->status = ret;
+		goto free_req;
 	}
-	atomic_inc(&pq->n_reqs);
-	xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
 
-	if (sent < req->info.npkts) {
-		/*
-		 * This is a somewhat blocking send implementation.
-		 * The driver will block the caller until all packets of the
-		 * request have been submitted to the SDMA engine. However, it
-		 * will not wait for send completions.
-		 */
-		while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
-			ret = user_sdma_send_pkts(req, pcount);
-			if (ret < 0) {
-				if (ret != -EBUSY) {
-					req->status = ret;
-					return ret;
-				}
-				wait_event_interruptible_timeout(
-					pq->busy.wait_dma,
-					(pq->state == SDMA_PKT_Q_ACTIVE),
-					msecs_to_jiffies(
-						SDMA_IOWAIT_TIMEOUT));
+	/*
+	 * It is possible that the SDMA engine would have processed all the
+	 * submitted packets by the time we get here. Therefore, only set
+	 * packet queue state to ACTIVE if there are still uncompleted
+	 * requests.
+	 */
+	if (atomic_read(&pq->n_reqs))
+		xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
+
+	/*
+	 * This is a somewhat blocking send implementation.
+	 * The driver will block the caller until all packets of the
+	 * request have been submitted to the SDMA engine. However, it
+	 * will not wait for send completions.
+	 */
+	while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+		ret = user_sdma_send_pkts(req, pcount);
+		if (ret < 0) {
+			if (ret != -EBUSY) {
+				req->status = ret;
+				set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+				if (ACCESS_ONCE(req->seqcomp) ==
+				    req->seqsubmitted - 1)
+					goto free_req;
+				return ret;
 			}
+			wait_event_interruptible_timeout(
+				pq->busy.wait_dma,
+				(pq->state == SDMA_PKT_Q_ACTIVE),
+				msecs_to_jiffies(
+					SDMA_IOWAIT_TIMEOUT));
 		}
-
 	}
 	*count += idx;
 	return 0;
 free_req:
-	user_sdma_free_request(req);
+	user_sdma_free_request(req, true);
+	pq_update(pq);
+	set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
 	return ret;
 }
 
 static inline u32 compute_data_length(struct user_sdma_request *req,
-					    struct user_sdma_txreq *tx)
+				      struct user_sdma_txreq *tx)
 {
 	/*
 	 * Determine the proper size of the packet data.
@@ -731,8 +759,10 @@ static inline u32 compute_data_length(struct user_sdma_request *req,
 	} else if (req_opcode(req->info.ctrl) == EXPECTED) {
 		u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
 			PAGE_SIZE;
-		/* Get the data length based on the remaining space in the
-		 * TID pair. */
+		/*
+		 * Get the data length based on the remaining space in the
+		 * TID pair.
+		 */
 		len = min(tidlen - req->tidoffset, (u32)req->info.fragsize);
 		/* If we've filled up the TID pair, move to the next one. */
 		if (unlikely(!len) && ++req->tididx < req->n_tids &&
@@ -742,12 +772,15 @@ static inline u32 compute_data_length(struct user_sdma_request *req,
 			req->tidoffset = 0;
 			len = min_t(u32, tidlen, req->info.fragsize);
 		}
-		/* Since the TID pairs map entire pages, make sure that we
+		/*
+		 * Since the TID pairs map entire pages, make sure that we
 		 * are not going to try to send more data that we have
-		 * remaining. */
+		 * remaining.
+		 */
 		len = min(len, req->data_len - req->sent);
-	} else
+	} else {
 		len = min(req->data_len - req->sent, (u32)req->info.fragsize);
+	}
 	SDMA_DBG(req, "Data Length = %u", len);
 	return len;
 }
@@ -810,9 +843,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 		tx->flags = 0;
 		tx->req = req;
 		tx->busycount = 0;
-		tx->idx = -1;
 		INIT_LIST_HEAD(&tx->list);
-		memset(tx->iovecs, 0, sizeof(tx->iovecs));
 
 		if (req->seqnum == req->info.npkts - 1)
 			tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT;
@@ -833,18 +864,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 				WARN_ON(iovec->offset);
 			}
 
-			/*
-			 * This request might include only a header and no user
-			 * data, so pin pages only if there is data and it the
-			 * pages have not been pinned already.
-			 */
-			if (unlikely(!iovec->pages && iovec->iov.iov_len)) {
-				ret = pin_vector_pages(req, iovec);
-				if (ret)
-					goto free_tx;
-			}
-
-			tx->iovecs[++tx->idx].vec = iovec;
 			datalen = compute_data_length(req, tx);
 			if (!datalen) {
 				SDMA_DBG(req,
@@ -934,16 +953,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 					      iovec->pages[pageidx],
 					      offset, len);
 			if (ret) {
-				int i;
-
 				SDMA_DBG(req, "SDMA txreq add page failed %d\n",
 					 ret);
-				/* Mark all assigned vectors as complete so they
-				 * are unpinned in the callback. */
-				for (i = tx->idx; i >= 0; i--) {
-					tx->iovecs[i].flags |=
-						TXREQ_FLAGS_IOVEC_LAST_PKT;
-				}
 				goto free_txreq;
 			}
 			iov_offset += len;
@@ -951,19 +962,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 			data_sent += len;
 			if (unlikely(queued < datalen &&
 				     pageidx == iovec->npages &&
-				     req->iov_idx < req->data_iovs - 1 &&
-				     tx->idx < ARRAY_SIZE(tx->iovecs))) {
+				     req->iov_idx < req->data_iovs - 1)) {
 				iovec->offset += iov_offset;
-				tx->iovecs[tx->idx].flags |=
-					TXREQ_FLAGS_IOVEC_LAST_PKT;
 				iovec = &req->iovs[++req->iov_idx];
-				if (!iovec->pages) {
-					ret = pin_vector_pages(req, iovec);
-					if (ret)
-						goto free_txreq;
-				}
 				iov_offset = 0;
-				tx->iovecs[++tx->idx].vec = iovec;
 			}
 		}
 		/*
@@ -974,28 +976,21 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 		if (req_opcode(req->info.ctrl) == EXPECTED)
 			req->tidoffset += datalen;
 		req->sent += data_sent;
-		if (req->data_len) {
-			tx->iovecs[tx->idx].vec->offset += iov_offset;
-			/* If we've reached the end of the io vector, mark it
-			 * so the callback can unpin the pages and free it. */
-			if (tx->iovecs[tx->idx].vec->offset ==
-			    tx->iovecs[tx->idx].vec->iov.iov_len)
-				tx->iovecs[tx->idx].flags |=
-					TXREQ_FLAGS_IOVEC_LAST_PKT;
-		}
-
+		if (req->data_len)
+			iovec->offset += iov_offset;
+		list_add_tail(&tx->txreq.list, &req->txps);
 		/*
 		 * It is important to increment this here as it is used to
 		 * generate the BTH.PSN and, therefore, can't be bulk-updated
 		 * outside of the loop.
 		 */
 		tx->seqnum = req->seqnum++;
-		list_add_tail(&tx->txreq.list, &req->txps);
 		npkts++;
 	}
 dosend:
 	ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps);
-	if (list_empty(&req->txps))
+	if (list_empty(&req->txps)) {
+		req->seqsubmitted = req->seqnum;
 		if (req->seqnum == req->info.npkts) {
 			set_bit(SDMA_REQ_SEND_DONE, &req->flags);
 			/*
@@ -1007,6 +1002,10 @@ dosend:
 			if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
 				sdma_ahg_free(req->sde, req->ahg_idx);
 		}
+	} else if (ret > 0) {
+		req->seqsubmitted += ret;
+		ret = 0;
+	}
 	return ret;
 
 free_txreq:
@@ -1021,7 +1020,7 @@ free_tx:
  */
 static inline int num_user_pages(const struct iovec *iov)
 {
-	const unsigned long addr  = (unsigned long) iov->iov_base;
+	const unsigned long addr  = (unsigned long)iov->iov_base;
 	const unsigned long len   = iov->iov_len;
 	const unsigned long spage = addr & PAGE_MASK;
 	const unsigned long epage = (addr + len - 1) & PAGE_MASK;
@@ -1029,64 +1028,129 @@ static inline int num_user_pages(const struct iovec *iov)
 	return 1 + ((epage - spage) >> PAGE_SHIFT);
 }
 
-static int pin_vector_pages(struct user_sdma_request *req,
-			    struct user_sdma_iovec *iovec) {
-	int pinned, npages;
+/* Caller must hold pq->evict_lock */
+static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
+{
+	u32 cleared = 0;
+	struct sdma_mmu_node *node, *ptr;
 
-	npages = num_user_pages(&iovec->iov);
-	iovec->pages = kcalloc(npages, sizeof(*iovec->pages), GFP_KERNEL);
-	if (!iovec->pages) {
-		SDMA_DBG(req, "Failed page array alloc");
-		return -ENOMEM;
+	list_for_each_entry_safe_reverse(node, ptr, &pq->evict, list) {
+		/* Make sure that no one is still using the node. */
+		if (!atomic_read(&node->refcount)) {
+			/*
+			 * Need to use the page count now as the remove callback
+			 * will free the node.
+			 */
+			cleared += node->npages;
+			spin_unlock(&pq->evict_lock);
+			hfi1_mmu_rb_remove(&pq->sdma_rb_root, &node->rb);
+			spin_lock(&pq->evict_lock);
+			if (cleared >= npages)
+				break;
+		}
 	}
+	return cleared;
+}
 
-	/*
-	 * Get a reference to the process's mm so we can use it when
-	 * unpinning the io vectors.
-	 */
-	req->pq->user_mm = get_task_mm(current);
+static int pin_vector_pages(struct user_sdma_request *req,
+			    struct user_sdma_iovec *iovec) {
+	int ret = 0, pinned, npages, cleared;
+	struct page **pages;
+	struct hfi1_user_sdma_pkt_q *pq = req->pq;
+	struct sdma_mmu_node *node = NULL;
+	struct mmu_rb_node *rb_node;
+
+	rb_node = hfi1_mmu_rb_search(&pq->sdma_rb_root,
+				     (unsigned long)iovec->iov.iov_base,
+				     iovec->iov.iov_len);
+	if (rb_node)
+		node = container_of(rb_node, struct sdma_mmu_node, rb);
+
+	if (!node) {
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
+		if (!node)
+			return -ENOMEM;
 
-	pinned = hfi1_acquire_user_pages((unsigned long)iovec->iov.iov_base,
-					 npages, 0, iovec->pages);
+		node->rb.addr = (unsigned long)iovec->iov.iov_base;
+		node->rb.len = iovec->iov.iov_len;
+		node->pq = pq;
+		atomic_set(&node->refcount, 0);
+		INIT_LIST_HEAD(&node->list);
+	}
 
-	if (pinned < 0)
-		return pinned;
+	npages = num_user_pages(&iovec->iov);
+	if (node->npages < npages) {
+		pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+		if (!pages) {
+			SDMA_DBG(req, "Failed page array alloc");
+			ret = -ENOMEM;
+			goto bail;
+		}
+		memcpy(pages, node->pages, node->npages * sizeof(*pages));
+
+		npages -= node->npages;
+retry:
+		if (!hfi1_can_pin_pages(pq->dd, pq->n_locked, npages)) {
+			spin_lock(&pq->evict_lock);
+			cleared = sdma_cache_evict(pq, npages);
+			spin_unlock(&pq->evict_lock);
+			if (cleared >= npages)
+				goto retry;
+		}
+		pinned = hfi1_acquire_user_pages(
+			((unsigned long)iovec->iov.iov_base +
+			 (node->npages * PAGE_SIZE)), npages, 0,
+			pages + node->npages);
+		if (pinned < 0) {
+			kfree(pages);
+			ret = pinned;
+			goto bail;
+		}
+		if (pinned != npages) {
+			unpin_vector_pages(current->mm, pages, pinned);
+			ret = -EFAULT;
+			goto bail;
+		}
+		kfree(node->pages);
+		node->pages = pages;
+		node->npages += pinned;
+		npages = node->npages;
+		spin_lock(&pq->evict_lock);
+		if (!rb_node)
+			list_add(&node->list, &pq->evict);
+		else
+			list_move(&node->list, &pq->evict);
+		pq->n_locked += pinned;
+		spin_unlock(&pq->evict_lock);
+	}
+	iovec->pages = node->pages;
+	iovec->npages = npages;
 
-	iovec->npages = pinned;
-	if (pinned != npages) {
-		SDMA_DBG(req, "Failed to pin pages (%d/%u)", pinned, npages);
-		unpin_vector_pages(req, iovec);
-		return -EFAULT;
+	if (!rb_node) {
+		ret = hfi1_mmu_rb_insert(&req->pq->sdma_rb_root, &node->rb);
+		if (ret) {
+			spin_lock(&pq->evict_lock);
+			list_del(&node->list);
+			pq->n_locked -= node->npages;
+			spin_unlock(&pq->evict_lock);
+			ret = 0;
+			goto bail;
+		}
+	} else {
+		atomic_inc(&node->refcount);
 	}
 	return 0;
+bail:
+	if (!rb_node)
+		kfree(node);
+	return ret;
 }
 
-static void unpin_vector_pages(struct user_sdma_request *req,
-			       struct user_sdma_iovec *iovec)
+static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+			       unsigned npages)
 {
-	/*
-	 * Unpinning is done through the workqueue so use the
-	 * process's mm if we have a reference to it.
-	 */
-	if ((current->flags & PF_KTHREAD) && req->pq->user_mm)
-		use_mm(req->pq->user_mm);
-
-	hfi1_release_user_pages(iovec->pages, iovec->npages, 0);
-
-	/*
-	 * Unuse the user's mm (see above) and release the
-	 * reference to it.
-	 */
-	if (req->pq->user_mm) {
-		if (current->flags & PF_KTHREAD)
-			unuse_mm(req->pq->user_mm);
-		mmput(req->pq->user_mm);
-	}
-
-	kfree(iovec->pages);
-	iovec->pages = NULL;
-	iovec->npages = 0;
-	iovec->offset = 0;
+	hfi1_release_user_pages(mm, pages, npages, 0);
+	kfree(pages);
 }
 
 static int check_header_template(struct user_sdma_request *req,
@@ -1209,7 +1273,6 @@ static int set_txreq_header(struct user_sdma_request *req,
 		if (ret)
 			return ret;
 		goto done;
-
 	}
 
 	hdr->bth[2] = cpu_to_be32(
@@ -1219,7 +1282,7 @@ static int set_txreq_header(struct user_sdma_request *req,
 
 	/* Set ACK request on last packet */
 	if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
-		hdr->bth[2] |= cpu_to_be32(1UL<<31);
+		hdr->bth[2] |= cpu_to_be32(1UL << 31);
 
 	/* Set the new offset */
 	hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset);
@@ -1233,8 +1296,10 @@ static int set_txreq_header(struct user_sdma_request *req,
 		if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
 					 PAGE_SIZE)) {
 			req->tidoffset = 0;
-			/* Since we don't copy all the TIDs, all at once,
-			 * we have to check again. */
+			/*
+			 * Since we don't copy all the TIDs, all at once,
+			 * we have to check again.
+			 */
 			if (++req->tididx > req->n_tids - 1 ||
 			    !req->tids[req->tididx]) {
 				return -EINVAL;
@@ -1315,8 +1380,10 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
 		if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
 					 PAGE_SIZE)) {
 			req->tidoffset = 0;
-			/* Since we don't copy all the TIDs, all at once,
-			 * we have to check again. */
+			/*
+			 * Since we don't copy all the TIDs, all at once,
+			 * we have to check again.
+			 */
 			if (++req->tididx > req->n_tids - 1 ||
 			    !req->tids[req->tididx]) {
 				return -EINVAL;
@@ -1340,8 +1407,9 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
 								INTR) >> 16);
 			val &= cpu_to_le16(~(1U << 13));
 			AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
-		} else
+		} else {
 			AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val);
+		}
 	}
 
 	trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
@@ -1356,113 +1424,62 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
  * tx request have been processed by the DMA engine. Called in
  * interrupt context.
  */
-static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status,
-			       int drain)
+static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
 {
 	struct user_sdma_txreq *tx =
 		container_of(txreq, struct user_sdma_txreq, txreq);
 	struct user_sdma_request *req;
-	bool defer;
-	int i;
+	struct hfi1_user_sdma_pkt_q *pq;
+	struct hfi1_user_sdma_comp_q *cq;
+	u16 idx;
 
 	if (!tx->req)
 		return;
 
 	req = tx->req;
-	/*
-	 * If this is the callback for the last packet of the request,
-	 * queue up the request for clean up.
-	 */
-	defer = (tx->seqnum == req->info.npkts - 1);
-
-	/*
-	 * If we have any io vectors associated with this txreq,
-	 * check whether they need to be 'freed'. We can't free them
-	 * here because the unpin function needs to be able to sleep.
-	 */
-	for (i = tx->idx; i >= 0; i--) {
-		if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) {
-			defer = true;
-			break;
-		}
-	}
+	pq = req->pq;
+	cq = req->cq;
 
-	req->status = status;
 	if (status != SDMA_TXREQ_S_OK) {
 		SDMA_DBG(req, "SDMA completion with error %d",
 			 status);
 		set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
-		defer = true;
 	}
 
-	/*
-	 * Defer the clean up of the iovectors and the request until later
-	 * so it can be done outside of interrupt context.
-	 */
-	if (defer) {
-		spin_lock(&req->txcmp_lock);
-		list_add_tail(&tx->list, &req->txcmp);
-		spin_unlock(&req->txcmp_lock);
-		schedule_work(&req->worker);
+	req->seqcomp = tx->seqnum;
+	kmem_cache_free(pq->txreq_cache, tx);
+	tx = NULL;
+
+	idx = req->info.comp_idx;
+	if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
+		if (req->seqcomp == req->info.npkts - 1) {
+			req->status = 0;
+			user_sdma_free_request(req, false);
+			pq_update(pq);
+			set_comp_state(pq, cq, idx, COMPLETE, 0);
+		}
 	} else {
-		kmem_cache_free(req->pq->txreq_cache, tx);
+		if (status != SDMA_TXREQ_S_OK)
+			req->status = status;
+		if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
+		    (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
+		     test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
+			user_sdma_free_request(req, false);
+			pq_update(pq);
+			set_comp_state(pq, cq, idx, ERROR, req->status);
+		}
 	}
 }
 
-static void user_sdma_delayed_completion(struct work_struct *work)
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
 {
-	struct user_sdma_request *req =
-		container_of(work, struct user_sdma_request, worker);
-	struct hfi1_user_sdma_pkt_q *pq = req->pq;
-	struct user_sdma_txreq *tx = NULL;
-	unsigned long flags;
-	u64 seqnum;
-	int i;
-
-	while (1) {
-		spin_lock_irqsave(&req->txcmp_lock, flags);
-		if (!list_empty(&req->txcmp)) {
-			tx = list_first_entry(&req->txcmp,
-					      struct user_sdma_txreq, list);
-			list_del(&tx->list);
-		}
-		spin_unlock_irqrestore(&req->txcmp_lock, flags);
-		if (!tx)
-			break;
-
-		for (i = tx->idx; i >= 0; i--)
-			if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT)
-				unpin_vector_pages(req, tx->iovecs[i].vec);
-
-		seqnum = tx->seqnum;
-		kmem_cache_free(pq->txreq_cache, tx);
-		tx = NULL;
-
-		if (req->status != SDMA_TXREQ_S_OK) {
-			if (seqnum == ACCESS_ONCE(req->seqnum) &&
-			    test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) {
-				atomic_dec(&pq->n_reqs);
-				set_comp_state(req, ERROR, req->status);
-				user_sdma_free_request(req);
-				break;
-			}
-		} else {
-			if (seqnum == req->info.npkts - 1) {
-				atomic_dec(&pq->n_reqs);
-				set_comp_state(req, COMPLETE, 0);
-				user_sdma_free_request(req);
-				break;
-			}
-		}
-	}
-
-	if (!atomic_read(&pq->n_reqs)) {
+	if (atomic_dec_and_test(&pq->n_reqs)) {
 		xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
 		wake_up(&pq->wait);
 	}
 }
 
-static void user_sdma_free_request(struct user_sdma_request *req)
+static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
 {
 	if (!list_empty(&req->txps)) {
 		struct sdma_txreq *t, *p;
@@ -1476,25 +1493,87 @@ static void user_sdma_free_request(struct user_sdma_request *req)
 		}
 	}
 	if (req->data_iovs) {
+		struct sdma_mmu_node *node;
+		struct mmu_rb_node *mnode;
 		int i;
 
-		for (i = 0; i < req->data_iovs; i++)
-			if (req->iovs[i].npages && req->iovs[i].pages)
-				unpin_vector_pages(req, &req->iovs[i]);
+		for (i = 0; i < req->data_iovs; i++) {
+			mnode = hfi1_mmu_rb_search(
+				&req->pq->sdma_rb_root,
+				(unsigned long)req->iovs[i].iov.iov_base,
+				req->iovs[i].iov.iov_len);
+			if (!mnode)
+				continue;
+
+			node = container_of(mnode, struct sdma_mmu_node, rb);
+			if (unpin)
+				hfi1_mmu_rb_remove(&req->pq->sdma_rb_root,
+						   &node->rb);
+			else
+				atomic_dec(&node->refcount);
+		}
 	}
 	kfree(req->tids);
 	clear_bit(SDMA_REQ_IN_USE, &req->flags);
 }
 
-static inline void set_comp_state(struct user_sdma_request *req,
-					enum hfi1_sdma_comp_state state,
-					int ret)
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
+				  struct hfi1_user_sdma_comp_q *cq,
+				  u16 idx, enum hfi1_sdma_comp_state state,
+				  int ret)
 {
-	SDMA_DBG(req, "Setting completion status %u %d", state, ret);
-	req->cq->comps[req->info.comp_idx].status = state;
+	hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
+		  pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
+	cq->comps[idx].status = state;
 	if (state == ERROR)
-		req->cq->comps[req->info.comp_idx].errcode = -ret;
-	trace_hfi1_sdma_user_completion(req->pq->dd, req->pq->ctxt,
-					req->pq->subctxt, req->info.comp_idx,
-					state, ret);
+		cq->comps[idx].errcode = -ret;
+	trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
+					idx, state, ret);
+}
+
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+			   unsigned long len)
+{
+	return (bool)(node->addr == addr);
+}
+
+static int sdma_rb_insert(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct sdma_mmu_node *node =
+		container_of(mnode, struct sdma_mmu_node, rb);
+
+	atomic_inc(&node->refcount);
+	return 0;
+}
+
+static void sdma_rb_remove(struct rb_root *root, struct mmu_rb_node *mnode,
+			   bool notifier)
+{
+	struct sdma_mmu_node *node =
+		container_of(mnode, struct sdma_mmu_node, rb);
+
+	spin_lock(&node->pq->evict_lock);
+	list_del(&node->list);
+	node->pq->n_locked -= node->npages;
+	spin_unlock(&node->pq->evict_lock);
+
+	unpin_vector_pages(notifier ? NULL : current->mm, node->pages,
+			   node->npages);
+	/*
+	 * If called by the MMU notifier, we have to adjust the pinned
+	 * page count ourselves.
+	 */
+	if (notifier)
+		current->mm->pinned_vm -= node->npages;
+	kfree(node);
+}
+
+static int sdma_rb_invalidate(struct rb_root *root, struct mmu_rb_node *mnode)
+{
+	struct sdma_mmu_node *node =
+		container_of(mnode, struct sdma_mmu_node, rb);
+
+	if (!atomic_read(&node->refcount))
+		return 1;
+	return 0;
 }
diff --git a/drivers/staging/rdma/hfi1/user_sdma.h b/drivers/staging/rdma/hfi1/user_sdma.h
index 0afa28508a8a..b9240e351161 100644
--- a/drivers/staging/rdma/hfi1/user_sdma.h
+++ b/drivers/staging/rdma/hfi1/user_sdma.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -69,7 +66,11 @@ struct hfi1_user_sdma_pkt_q {
 	struct iowait busy;
 	unsigned state;
 	wait_queue_head_t wait;
-	struct mm_struct *user_mm;
+	unsigned long unpinned;
+	struct rb_root sdma_rb_root;
+	u32 n_locked;
+	struct list_head evict;
+	spinlock_t evict_lock; /* protect evict and n_locked */
 };
 
 struct hfi1_user_sdma_comp_q {
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c
index 176168614b5a..89f2aad45c1b 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/staging/rdma/hfi1/verbs.c
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -63,9 +60,9 @@
 #include "device.h"
 #include "trace.h"
 #include "qp.h"
-#include "sdma.h"
+#include "verbs_txreq.h"
 
-unsigned int hfi1_lkey_table_size = 16;
+static unsigned int hfi1_lkey_table_size = 16;
 module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
 		   S_IRUGO);
 MODULE_PARM_DESC(lkey_table_size,
@@ -124,45 +121,181 @@ unsigned int hfi1_max_srq_wrs = 0x1FFFF;
 module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO);
 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
 
+unsigned short piothreshold = 256;
+module_param(piothreshold, ushort, S_IRUGO);
+MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
+
+#define COPY_CACHELESS 1
+#define COPY_ADAPTIVE  2
+static unsigned int sge_copy_mode;
+module_param(sge_copy_mode, uint, S_IRUGO);
+MODULE_PARM_DESC(sge_copy_mode,
+		 "Verbs copy mode: 0 use memcpy, 1 use cacheless copy, 2 adapt based on WSS");
+
 static void verbs_sdma_complete(
 	struct sdma_txreq *cookie,
-	int status,
-	int drained);
+	int status);
+
+static int pio_wait(struct rvt_qp *qp,
+		    struct send_context *sc,
+		    struct hfi1_pkt_state *ps,
+		    u32 flag);
 
 /* Length of buffer to create verbs txreq cache name */
 #define TXREQ_NAME_LEN 24
 
+static uint wss_threshold;
+module_param(wss_threshold, uint, S_IRUGO);
+MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
+static uint wss_clean_period = 256;
+module_param(wss_clean_period, uint, S_IRUGO);
+MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
+
+/* memory working set size */
+struct hfi1_wss {
+	unsigned long *entries;
+	atomic_t total_count;
+	atomic_t clean_counter;
+	atomic_t clean_entry;
+
+	int threshold;
+	int num_entries;
+	long pages_mask;
+};
+
+static struct hfi1_wss wss;
+
+int hfi1_wss_init(void)
+{
+	long llc_size;
+	long llc_bits;
+	long table_size;
+	long table_bits;
+
+	/* check for a valid percent range - default to 80 if none or invalid */
+	if (wss_threshold < 1 || wss_threshold > 100)
+		wss_threshold = 80;
+	/* reject a wildly large period */
+	if (wss_clean_period > 1000000)
+		wss_clean_period = 256;
+	/* reject a zero period */
+	if (wss_clean_period == 0)
+		wss_clean_period = 1;
+
+	/*
+	 * Calculate the table size - the next power of 2 larger than the
+	 * LLC size.  LLC size is in KiB.
+	 */
+	llc_size = wss_llc_size() * 1024;
+	table_size = roundup_pow_of_two(llc_size);
+
+	/* one bit per page in rounded up table */
+	llc_bits = llc_size / PAGE_SIZE;
+	table_bits = table_size / PAGE_SIZE;
+	wss.pages_mask = table_bits - 1;
+	wss.num_entries = table_bits / BITS_PER_LONG;
+
+	wss.threshold = (llc_bits * wss_threshold) / 100;
+	if (wss.threshold == 0)
+		wss.threshold = 1;
+
+	atomic_set(&wss.clean_counter, wss_clean_period);
+
+	wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
+			      GFP_KERNEL);
+	if (!wss.entries) {
+		hfi1_wss_exit();
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void hfi1_wss_exit(void)
+{
+	/* coded to handle partially initialized and repeat callers */
+	kfree(wss.entries);
+	wss.entries = NULL;
+}
+
 /*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; hfi1_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
+ * Advance the clean counter.  When the clean period has expired,
+ * clean an entry.
+ *
+ * This is implemented in atomics to avoid locking.  Because multiple
+ * variables are involved, it can be racy which can lead to slightly
+ * inaccurate information.  Since this is only a heuristic, this is
+ * OK.  Any innaccuracies will clean themselves out as the counter
+ * advances.  That said, it is unlikely the entry clean operation will
+ * race - the next possible racer will not start until the next clean
+ * period.
+ *
+ * The clean counter is implemented as a decrement to zero.  When zero
+ * is reached an entry is cleaned.
  */
-const int ib_hfi1_state_ops[IB_QPS_ERR + 1] = {
-	[IB_QPS_RESET] = 0,
-	[IB_QPS_INIT] = HFI1_POST_RECV_OK,
-	[IB_QPS_RTR] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK,
-	[IB_QPS_RTS] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
-	    HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK |
-	    HFI1_PROCESS_NEXT_SEND_OK,
-	[IB_QPS_SQD] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
-	    HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK,
-	[IB_QPS_SQE] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK |
-	    HFI1_POST_SEND_OK | HFI1_FLUSH_SEND,
-	[IB_QPS_ERR] = HFI1_POST_RECV_OK | HFI1_FLUSH_RECV |
-	    HFI1_POST_SEND_OK | HFI1_FLUSH_SEND,
-};
+static void wss_advance_clean_counter(void)
+{
+	int entry;
+	int weight;
+	unsigned long bits;
 
-struct hfi1_ucontext {
-	struct ib_ucontext ibucontext;
-};
+	/* become the cleaner if we decrement the counter to zero */
+	if (atomic_dec_and_test(&wss.clean_counter)) {
+		/*
+		 * Set, not add, the clean period.  This avoids an issue
+		 * where the counter could decrement below the clean period.
+		 * Doing a set can result in lost decrements, slowing the
+		 * clean advance.  Since this a heuristic, this possible
+		 * slowdown is OK.
+		 *
+		 * An alternative is to loop, advancing the counter by a
+		 * clean period until the result is > 0. However, this could
+		 * lead to several threads keeping another in the clean loop.
+		 * This could be mitigated by limiting the number of times
+		 * we stay in the loop.
+		 */
+		atomic_set(&wss.clean_counter, wss_clean_period);
 
-static inline struct hfi1_ucontext *to_iucontext(struct ib_ucontext
-						  *ibucontext)
+		/*
+		 * Uniquely grab the entry to clean and move to next.
+		 * The current entry is always the lower bits of
+		 * wss.clean_entry.  The table size, wss.num_entries,
+		 * is always a power-of-2.
+		 */
+		entry = (atomic_inc_return(&wss.clean_entry) - 1)
+			& (wss.num_entries - 1);
+
+		/* clear the entry and count the bits */
+		bits = xchg(&wss.entries[entry], 0);
+		weight = hweight64((u64)bits);
+		/* only adjust the contended total count if needed */
+		if (weight)
+			atomic_sub(weight, &wss.total_count);
+	}
+}
+
+/*
+ * Insert the given address into the working set array.
+ */
+static void wss_insert(void *address)
 {
-	return container_of(ibucontext, struct hfi1_ucontext, ibucontext);
+	u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
+	u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
+	u32 nr = page & (BITS_PER_LONG - 1);
+
+	if (!test_and_set_bit(nr, &wss.entries[entry]))
+		atomic_inc(&wss.total_count);
+
+	wss_advance_clean_counter();
 }
 
-static inline void _hfi1_schedule_send(struct hfi1_qp *qp);
+/*
+ * Is the working set larger than the threshold?
+ */
+static inline int wss_exceeds_threshold(void)
+{
+	return atomic_read(&wss.total_count) >= wss.threshold;
+}
 
 /*
  * Translate ib_wr_opcode into ib_wc_opcode.
@@ -274,14 +407,47 @@ __be64 ib_hfi1_sys_image_guid;
  * @ss: the SGE state
  * @data: the data to copy
  * @length: the length of the data
+ * @copy_last: do a separate copy of the last 8 bytes
  */
 void hfi1_copy_sge(
-	struct hfi1_sge_state *ss,
+	struct rvt_sge_state *ss,
 	void *data, u32 length,
-	int release)
+	int release,
+	int copy_last)
 {
-	struct hfi1_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
+	int in_last = 0;
+	int i;
+	int cacheless_copy = 0;
+
+	if (sge_copy_mode == COPY_CACHELESS) {
+		cacheless_copy = length >= PAGE_SIZE;
+	} else if (sge_copy_mode == COPY_ADAPTIVE) {
+		if (length >= PAGE_SIZE) {
+			/*
+			 * NOTE: this *assumes*:
+			 * o The first vaddr is the dest.
+			 * o If multiple pages, then vaddr is sequential.
+			 */
+			wss_insert(sge->vaddr);
+			if (length >= (2 * PAGE_SIZE))
+				wss_insert(sge->vaddr + PAGE_SIZE);
+
+			cacheless_copy = wss_exceeds_threshold();
+		} else {
+			wss_advance_clean_counter();
+		}
+	}
+	if (copy_last) {
+		if (length > 8) {
+			length -= 8;
+		} else {
+			copy_last = 0;
+			in_last = 1;
+		}
+	}
 
+again:
 	while (length) {
 		u32 len = sge->length;
 
@@ -290,17 +456,25 @@ void hfi1_copy_sge(
 		if (len > sge->sge_length)
 			len = sge->sge_length;
 		WARN_ON_ONCE(len == 0);
-		memcpy(sge->vaddr, data, len);
+		if (unlikely(in_last)) {
+			/* enforce byte transfer ordering */
+			for (i = 0; i < len; i++)
+				((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
+		} else if (cacheless_copy) {
+			cacheless_memcpy(sge->vaddr, data, len);
+		} else {
+			memcpy(sge->vaddr, data, len);
+		}
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (release)
-				hfi1_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -313,6 +487,13 @@ void hfi1_copy_sge(
 		data += len;
 		length -= len;
 	}
+
+	if (copy_last) {
+		copy_last = 0;
+		in_last = 1;
+		length = 8;
+		goto again;
+	}
 }
 
 /**
@@ -320,9 +501,9 @@ void hfi1_copy_sge(
  * @ss: the SGE state
  * @length: the number of bytes to skip
  */
-void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release)
+void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
 {
-	struct hfi1_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	while (length) {
 		u32 len = sge->length;
@@ -337,11 +518,11 @@ void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release)
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 			if (release)
-				hfi1_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= HFI1_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 				sge->n = 0;
@@ -355,231 +536,6 @@ void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release)
 	}
 }
 
-/**
- * post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
-{
-	struct hfi1_swqe *wqe;
-	u32 next;
-	int i;
-	int j;
-	int acc;
-	struct hfi1_lkey_table *rkt;
-	struct hfi1_pd *pd;
-	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
-	struct hfi1_pportdata *ppd;
-	struct hfi1_ibport *ibp;
-
-	/* IB spec says that num_sge == 0 is OK. */
-	if (unlikely(wr->num_sge > qp->s_max_sge))
-		return -EINVAL;
-
-	ppd = &dd->pport[qp->port_num - 1];
-	ibp = &ppd->ibport_data;
-
-	/*
-	 * Don't allow RDMA reads or atomic operations on UC or
-	 * undefined operations.
-	 * Make sure buffer is large enough to hold the result for atomics.
-	 */
-	if (qp->ibqp.qp_type == IB_QPT_UC) {
-		if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
-			return -EINVAL;
-	} else if (qp->ibqp.qp_type != IB_QPT_RC) {
-		/* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
-		if (wr->opcode != IB_WR_SEND &&
-		    wr->opcode != IB_WR_SEND_WITH_IMM)
-			return -EINVAL;
-		/* Check UD destination address PD */
-		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
-			return -EINVAL;
-	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
-		return -EINVAL;
-	else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-		   (wr->num_sge == 0 ||
-		    wr->sg_list[0].length < sizeof(u64) ||
-		    wr->sg_list[0].addr & (sizeof(u64) - 1)))
-		return -EINVAL;
-	else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
-		return -EINVAL;
-
-	next = qp->s_head + 1;
-	if (next >= qp->s_size)
-		next = 0;
-	if (next == qp->s_last)
-		return -ENOMEM;
-
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.pd);
-	wqe = get_swqe_ptr(qp, qp->s_head);
-
-
-	if (qp->ibqp.qp_type != IB_QPT_UC &&
-	    qp->ibqp.qp_type != IB_QPT_RC)
-		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
-	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
-		 wr->opcode == IB_WR_RDMA_WRITE ||
-		 wr->opcode == IB_WR_RDMA_READ)
-		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
-	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
-	else
-		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
-	wqe->length = 0;
-	j = 0;
-	if (wr->num_sge) {
-		acc = wr->opcode >= IB_WR_RDMA_READ ?
-			IB_ACCESS_LOCAL_WRITE : 0;
-		for (i = 0; i < wr->num_sge; i++) {
-			u32 length = wr->sg_list[i].length;
-			int ok;
-
-			if (length == 0)
-				continue;
-			ok = hfi1_lkey_ok(rkt, pd, &wqe->sg_list[j],
-					  &wr->sg_list[i], acc);
-			if (!ok)
-				goto bail_inval_free;
-			wqe->length += length;
-			j++;
-		}
-		wqe->wr.num_sge = j;
-	}
-	if (qp->ibqp.qp_type == IB_QPT_UC ||
-	    qp->ibqp.qp_type == IB_QPT_RC) {
-		if (wqe->length > 0x80000000U)
-			goto bail_inval_free;
-	} else {
-		struct hfi1_ah *ah = to_iah(ud_wr(wr)->ah);
-
-		atomic_inc(&ah->refcount);
-	}
-	wqe->ssn = qp->s_ssn++;
-	qp->s_head = next;
-
-	return 0;
-
-bail_inval_free:
-	/* release mr holds */
-	while (j) {
-		struct hfi1_sge *sge = &wqe->sg_list[--j];
-
-		hfi1_put_mr(sge->mr);
-	}
-	return -EINVAL;
-}
-
-/**
- * post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		     struct ib_send_wr **bad_wr)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	int err = 0;
-	int call_send;
-	unsigned long flags;
-	unsigned nreq = 0;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Check that state is OK to post send. */
-	if (unlikely(!(ib_hfi1_state_ops[qp->state] & HFI1_POST_SEND_OK))) {
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		return -EINVAL;
-	}
-
-	/* sq empty and not list -> call send */
-	call_send = qp->s_head == qp->s_last && !wr->next;
-
-	for (; wr; wr = wr->next) {
-		err = post_one_send(qp, wr);
-		if (unlikely(err)) {
-			*bad_wr = wr;
-			goto bail;
-		}
-		nreq++;
-	}
-bail:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	if (nreq && !call_send)
-		_hfi1_schedule_send(qp);
-	if (nreq && call_send)
-		hfi1_do_send(&qp->s_iowait.iowork);
-	return err;
-}
-
-/**
- * post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			struct ib_recv_wr **bad_wr)
-{
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_rwq *wq = qp->r_rq.wq;
-	unsigned long flags;
-	int ret;
-
-	/* Check that state is OK to post receive. */
-	if (!(ib_hfi1_state_ops[qp->state] & HFI1_POST_RECV_OK) || !wq) {
-		*bad_wr = wr;
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	for (; wr; wr = wr->next) {
-		struct hfi1_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = wq->head + 1;
-		if (next >= qp->r_rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
 /*
  * Make sure the QP is ready and able to accept the given opcode.
  */
@@ -587,18 +543,17 @@ static inline int qp_ok(int opcode, struct hfi1_packet *packet)
 {
 	struct hfi1_ibport *ibp;
 
-	if (!(ib_hfi1_state_ops[packet->qp->state] & HFI1_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
 		goto dropit;
 	if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
 	    (opcode == IB_OPCODE_CNP))
 		return 1;
 dropit:
 	ibp = &packet->rcd->ppd->ibport_data;
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 	return 0;
 }
 
-
 /**
  * hfi1_ib_rcv - process an incoming packet
  * @packet: data packet information
@@ -614,6 +569,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 	u32 tlen = packet->tlen;
 	struct hfi1_pportdata *ppd = rcd->ppd;
 	struct hfi1_ibport *ibp = &ppd->ibport_data;
+	struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
 	unsigned long flags;
 	u32 qp_num;
 	int lnh;
@@ -622,9 +578,9 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 
 	/* Check for GRH */
 	lnh = be16_to_cpu(hdr->lrh[0]) & 3;
-	if (lnh == HFI1_LRH_BTH)
+	if (lnh == HFI1_LRH_BTH) {
 		packet->ohdr = &hdr->u.oth;
-	else if (lnh == HFI1_LRH_GRH) {
+	} else if (lnh == HFI1_LRH_GRH) {
 		u32 vtf;
 
 		packet->ohdr = &hdr->u.l.oth;
@@ -634,8 +590,9 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 		if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 			goto drop;
 		packet->rcv_flags |= HFI1_HAS_GRH;
-	} else
+	} else {
 		goto drop;
+	}
 
 	trace_input_ibhdr(rcd->dd, hdr);
 
@@ -643,17 +600,17 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 	inc_opstats(tlen, &rcd->opstats->stats[opcode]);
 
 	/* Get the destination QP number. */
-	qp_num = be32_to_cpu(packet->ohdr->bth[1]) & HFI1_QPN_MASK;
+	qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
 	lid = be16_to_cpu(hdr->lrh[1]);
-	if (unlikely((lid >= HFI1_MULTICAST_LID_BASE) &&
-	    (lid != HFI1_PERMISSIVE_LID))) {
-		struct hfi1_mcast *mcast;
-		struct hfi1_mcast_qp *p;
+	if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+		     (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
+		struct rvt_mcast *mcast;
+		struct rvt_mcast_qp *p;
 
 		if (lnh != HFI1_LRH_GRH)
 			goto drop;
-		mcast = hfi1_mcast_find(ibp, &hdr->u.l.grh.dgid);
-		if (mcast == NULL)
+		mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+		if (!mcast)
 			goto drop;
 		list_for_each_entry_rcu(p, &mcast->qp_list, list) {
 			packet->qp = p->qp;
@@ -663,14 +620,14 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 			spin_unlock_irqrestore(&packet->qp->r_lock, flags);
 		}
 		/*
-		 * Notify hfi1_multicast_detach() if it is waiting for us
+		 * Notify rvt_multicast_detach() if it is waiting for us
 		 * to finish.
 		 */
 		if (atomic_dec_return(&mcast->refcount) <= 1)
 			wake_up(&mcast->wait);
 	} else {
 		rcu_read_lock();
-		packet->qp = hfi1_lookup_qpn(ibp, qp_num);
+		packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
 		if (!packet->qp) {
 			rcu_read_unlock();
 			goto drop;
@@ -684,7 +641,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
 	return;
 
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }
 
 /*
@@ -695,15 +652,17 @@ static void mem_timer(unsigned long data)
 {
 	struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data;
 	struct list_head *list = &dev->memwait;
-	struct hfi1_qp *qp = NULL;
+	struct rvt_qp *qp = NULL;
 	struct iowait *wait;
 	unsigned long flags;
+	struct hfi1_qp_priv *priv;
 
 	write_seqlock_irqsave(&dev->iowait_lock, flags);
 	if (!list_empty(list)) {
 		wait = list_first_entry(list, struct iowait, list);
-		qp = container_of(wait, struct hfi1_qp, s_iowait);
-		list_del_init(&qp->s_iowait.list);
+		qp = iowait_to_qp(wait);
+		priv = qp->priv;
+		list_del_init(&priv->s_iowait.list);
 		/* refcount held until actual wake up */
 		if (!list_empty(list))
 			mod_timer(&dev->mem_timer, jiffies + 1);
@@ -711,12 +670,12 @@ static void mem_timer(unsigned long data)
 	write_sequnlock_irqrestore(&dev->iowait_lock, flags);
 
 	if (qp)
-		hfi1_qp_wakeup(qp, HFI1_S_WAIT_KMEM);
+		hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
 }
 
-void update_sge(struct hfi1_sge_state *ss, u32 length)
+void update_sge(struct rvt_sge_state *ss, u32 length)
 {
-	struct hfi1_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 	sge->vaddr += length;
 	sge->length -= length;
@@ -725,7 +684,7 @@ void update_sge(struct hfi1_sge_state *ss, u32 length)
 		if (--ss->num_sge)
 			*sge = *ss->sg_list++;
 	} else if (sge->length == 0 && sge->mr->lkey) {
-		if (++sge->n >= HFI1_SEGSZ) {
+		if (++sge->n >= RVT_SEGSZ) {
 			if (++sge->m >= sge->mr->mapsz)
 				return;
 			sge->n = 0;
@@ -735,143 +694,55 @@ void update_sge(struct hfi1_sge_state *ss, u32 length)
 	}
 }
 
-static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
-						struct hfi1_qp *qp)
-{
-	struct verbs_txreq *tx;
-	unsigned long flags;
-
-	tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
-	if (!tx) {
-		spin_lock_irqsave(&qp->s_lock, flags);
-		write_seqlock(&dev->iowait_lock);
-		if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK &&
-		    list_empty(&qp->s_iowait.list)) {
-			dev->n_txwait++;
-			qp->s_flags |= HFI1_S_WAIT_TX;
-			list_add_tail(&qp->s_iowait.list, &dev->txwait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TX);
-			atomic_inc(&qp->refcount);
-		}
-		qp->s_flags &= ~HFI1_S_BUSY;
-		write_sequnlock(&dev->iowait_lock);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		tx = ERR_PTR(-EBUSY);
-	}
-	return tx;
-}
-
-static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
-					    struct hfi1_qp *qp)
-{
-	struct verbs_txreq *tx;
-
-	tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
-	if (!tx) {
-		/* call slow path to get the lock */
-		tx =  __get_txreq(dev, qp);
-		if (IS_ERR(tx))
-			return tx;
-	}
-	tx->qp = qp;
-	return tx;
-}
-
-void hfi1_put_txreq(struct verbs_txreq *tx)
-{
-	struct hfi1_ibdev *dev;
-	struct hfi1_qp *qp;
-	unsigned long flags;
-	unsigned int seq;
-
-	qp = tx->qp;
-	dev = to_idev(qp->ibqp.device);
-
-	if (tx->mr) {
-		hfi1_put_mr(tx->mr);
-		tx->mr = NULL;
-	}
-	sdma_txclean(dd_from_dev(dev), &tx->txreq);
-
-	/* Free verbs_txreq and return to slab cache */
-	kmem_cache_free(dev->verbs_txreq_cache, tx);
-
-	do {
-		seq = read_seqbegin(&dev->iowait_lock);
-		if (!list_empty(&dev->txwait)) {
-			struct iowait *wait;
-
-			write_seqlock_irqsave(&dev->iowait_lock, flags);
-			/* Wake up first QP wanting a free struct */
-			wait = list_first_entry(&dev->txwait, struct iowait,
-						list);
-			qp = container_of(wait, struct hfi1_qp, s_iowait);
-			list_del_init(&qp->s_iowait.list);
-			/* refcount held until actual wake up */
-			write_sequnlock_irqrestore(&dev->iowait_lock, flags);
-			hfi1_qp_wakeup(qp, HFI1_S_WAIT_TX);
-			break;
-		}
-	} while (read_seqretry(&dev->iowait_lock, seq));
-}
-
 /*
  * This is called with progress side lock held.
  */
 /* New API */
 static void verbs_sdma_complete(
 	struct sdma_txreq *cookie,
-	int status,
-	int drained)
+	int status)
 {
 	struct verbs_txreq *tx =
 		container_of(cookie, struct verbs_txreq, txreq);
-	struct hfi1_qp *qp = tx->qp;
+	struct rvt_qp *qp = tx->qp;
 
 	spin_lock(&qp->s_lock);
-	if (tx->wqe)
+	if (tx->wqe) {
 		hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
-	else if (qp->ibqp.qp_type == IB_QPT_RC) {
+	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 		struct hfi1_ib_header *hdr;
 
 		hdr = &tx->phdr.hdr;
 		hfi1_rc_send_complete(qp, hdr);
 	}
-	if (drained) {
-		/*
-		 * This happens when the send engine notes
-		 * a QP in the error state and cannot
-		 * do the flush work until that QP's
-		 * sdma work has finished.
-		 */
-		if (qp->s_flags & HFI1_S_WAIT_DMA) {
-			qp->s_flags &= ~HFI1_S_WAIT_DMA;
-			hfi1_schedule_send(qp);
-		}
-	}
 	spin_unlock(&qp->s_lock);
 
 	hfi1_put_txreq(tx);
 }
 
-static int wait_kmem(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
+static int wait_kmem(struct hfi1_ibdev *dev,
+		     struct rvt_qp *qp,
+		     struct hfi1_pkt_state *ps)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	unsigned long flags;
 	int ret = 0;
 
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		write_seqlock(&dev->iowait_lock);
-		if (list_empty(&qp->s_iowait.list)) {
+		list_add_tail(&ps->s_txreq->txreq.list,
+			      &priv->s_iowait.tx_head);
+		if (list_empty(&priv->s_iowait.list)) {
 			if (list_empty(&dev->memwait))
 				mod_timer(&dev->mem_timer, jiffies + 1);
-			qp->s_flags |= HFI1_S_WAIT_KMEM;
-			list_add_tail(&qp->s_iowait.list, &dev->memwait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_KMEM);
+			qp->s_flags |= RVT_S_WAIT_KMEM;
+			list_add_tail(&priv->s_iowait.list, &dev->memwait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
 			atomic_inc(&qp->refcount);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~HFI1_S_BUSY;
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -884,14 +755,14 @@ static int wait_kmem(struct hfi1_ibdev *dev, struct hfi1_qp *qp)
  *
  * Add failures will revert the sge cursor
  */
-static int build_verbs_ulp_payload(
+static noinline int build_verbs_ulp_payload(
 	struct sdma_engine *sde,
-	struct hfi1_sge_state *ss,
+	struct rvt_sge_state *ss,
 	u32 length,
 	struct verbs_txreq *tx)
 {
-	struct hfi1_sge *sg_list = ss->sg_list;
-	struct hfi1_sge sge = ss->sge;
+	struct rvt_sge *sg_list = ss->sg_list;
+	struct rvt_sge sge = ss->sge;
 	u8 num_sge = ss->num_sge;
 	u32 len;
 	int ret = 0;
@@ -928,23 +799,21 @@ bail_txadd:
  * NOTE: DMA mapping is held in the tx until completed in the ring or
  *       the tx desc is freed without having been submitted to the ring
  *
- * This routine insures the following all the helper routine
- * calls succeed.
+ * This routine ensures all the helper routine calls succeed.
  */
 /* New API */
 static int build_verbs_tx_desc(
 	struct sdma_engine *sde,
-	struct hfi1_sge_state *ss,
+	struct rvt_sge_state *ss,
 	u32 length,
 	struct verbs_txreq *tx,
 	struct ahg_ib_header *ahdr,
 	u64 pbc)
 {
 	int ret = 0;
-	struct hfi1_pio_header *phdr;
+	struct hfi1_pio_header *phdr = &tx->phdr;
 	u16 hdrbytes = tx->hdr_dwords << 2;
 
-	phdr = &tx->phdr;
 	if (!ahdr->ahgcount) {
 		ret = sdma_txinit_ahg(
 			&tx->txreq,
@@ -958,29 +827,14 @@ static int build_verbs_tx_desc(
 		if (ret)
 			goto bail_txadd;
 		phdr->pbc = cpu_to_le64(pbc);
-		memcpy(&phdr->hdr, &ahdr->ibh, hdrbytes - sizeof(phdr->pbc));
-		/* add the header */
 		ret = sdma_txadd_kvaddr(
 			sde->dd,
 			&tx->txreq,
-			&tx->phdr,
-			tx->hdr_dwords << 2);
+			phdr,
+			hdrbytes);
 		if (ret)
 			goto bail_txadd;
 	} else {
-		struct hfi1_other_headers *sohdr = &ahdr->ibh.u.oth;
-		struct hfi1_other_headers *dohdr = &phdr->hdr.u.oth;
-
-		/* needed in rc_send_complete() */
-		phdr->hdr.lrh[0] = ahdr->ibh.lrh[0];
-		if ((be16_to_cpu(phdr->hdr.lrh[0]) & 3) == HFI1_LRH_GRH) {
-			sohdr = &ahdr->ibh.u.l.oth;
-			dohdr = &phdr->hdr.u.l.oth;
-		}
-		/* opcode */
-		dohdr->bth[0] = sohdr->bth[0];
-		/* PSN/ACK  */
-		dohdr->bth[2] = sohdr->bth[2];
 		ret = sdma_txinit_ahg(
 			&tx->txreq,
 			ahdr->tx_flags,
@@ -1001,80 +855,75 @@ bail_txadd:
 	return ret;
 }
 
-int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc)
 {
-	struct ahg_ib_header *ahdr = qp->s_hdr;
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct ahg_ib_header *ahdr = priv->s_hdr;
 	u32 hdrwords = qp->s_hdrwords;
-	struct hfi1_sge_state *ss = qp->s_cur_sge;
+	struct rvt_sge_state *ss = qp->s_cur_sge;
 	u32 len = qp->s_cur_size;
 	u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */
 	struct hfi1_ibdev *dev = ps->dev;
 	struct hfi1_pportdata *ppd = ps->ppd;
 	struct verbs_txreq *tx;
-	struct sdma_txreq *stx;
 	u64 pbc_flags = 0;
-	u8 sc5 = qp->s_sc;
+	u8 sc5 = priv->s_sc;
+
 	int ret;
 
-	if (!list_empty(&qp->s_iowait.tx_head)) {
-		stx = list_first_entry(
-			&qp->s_iowait.tx_head,
-			struct sdma_txreq,
-			list);
-		list_del_init(&stx->list);
-		tx = container_of(stx, struct verbs_txreq, txreq);
-		ret = sdma_send_txreq(tx->sde, &qp->s_iowait, stx);
-		if (unlikely(ret == -ECOMM))
+	tx = ps->s_txreq;
+	if (!sdma_txreq_built(&tx->txreq)) {
+		if (likely(pbc == 0)) {
+			u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+			/* No vl15 here */
+			/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
+			pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+
+			pbc = create_pbc(ppd,
+					 pbc_flags,
+					 qp->srate_mbps,
+					 vl,
+					 plen);
+		}
+		tx->wqe = qp->s_wqe;
+		ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc);
+		if (unlikely(ret))
+			goto bail_build;
+	}
+	ret =  sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq);
+	if (unlikely(ret < 0)) {
+		if (ret == -ECOMM)
 			goto bail_ecomm;
 		return ret;
 	}
-
-	tx = get_txreq(dev, qp);
-	if (IS_ERR(tx))
-		goto bail_tx;
-
-	tx->sde = qp->s_sde;
-
-	if (likely(pbc == 0)) {
-		u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
-		/* No vl15 here */
-		/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
-		pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
-
-		pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
-	}
-	tx->wqe = qp->s_wqe;
-	tx->mr = qp->s_rdma_mr;
-	if (qp->s_rdma_mr)
-		qp->s_rdma_mr = NULL;
-	tx->hdr_dwords = hdrwords + 2;
-	ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahdr, pbc);
-	if (unlikely(ret))
-		goto bail_build;
-	trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh);
-	ret =  sdma_send_txreq(tx->sde, &qp->s_iowait, &tx->txreq);
-	if (unlikely(ret == -ECOMM))
-		goto bail_ecomm;
+	trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+				&ps->s_txreq->phdr.hdr);
 	return ret;
 
 bail_ecomm:
 	/* The current one got "sent" */
 	return 0;
 bail_build:
-	/* kmalloc or mapping fail */
-	hfi1_put_txreq(tx);
-	return wait_kmem(dev, qp);
-bail_tx:
-	return PTR_ERR(tx);
+	ret = wait_kmem(dev, qp, ps);
+	if (!ret) {
+		/* free txreq - bad state */
+		hfi1_put_txreq(ps->s_txreq);
+		ps->s_txreq = NULL;
+	}
+	return ret;
 }
 
 /*
  * If we are now in the error state, return zero to flush the
  * send work request.
  */
-static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc)
+static int pio_wait(struct rvt_qp *qp,
+		    struct send_context *sc,
+		    struct hfi1_pkt_state *ps,
+		    u32 flag)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_devdata *dd = sc->dd;
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
 	unsigned long flags;
@@ -1087,74 +936,89 @@ static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc)
 	 * enabling the PIO avail interrupt.
 	 */
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		write_seqlock(&dev->iowait_lock);
-		if (list_empty(&qp->s_iowait.list)) {
+		list_add_tail(&ps->s_txreq->txreq.list,
+			      &priv->s_iowait.tx_head);
+		if (list_empty(&priv->s_iowait.list)) {
 			struct hfi1_ibdev *dev = &dd->verbs_dev;
 			int was_empty;
 
+			dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
+			dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
 			dev->n_piowait++;
-			qp->s_flags |= HFI1_S_WAIT_PIO;
+			qp->s_flags |= flag;
 			was_empty = list_empty(&sc->piowait);
-			list_add_tail(&qp->s_iowait.list, &sc->piowait);
-			trace_hfi1_qpsleep(qp, HFI1_S_WAIT_PIO);
+			list_add_tail(&priv->s_iowait.list, &sc->piowait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
 			atomic_inc(&qp->refcount);
 			/* counting: only call wantpiobuf_intr if first user */
 			if (was_empty)
 				hfi1_sc_wantpiobuf_intr(sc, 1);
 		}
 		write_sequnlock(&dev->iowait_lock);
-		qp->s_flags &= ~HFI1_S_BUSY;
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return ret;
 }
 
-struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5)
+static void verbs_pio_complete(void *arg, int code)
 {
-	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
-	struct hfi1_pportdata *ppd = dd->pport + (qp->port_num - 1);
-	u8 vl;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
+	struct hfi1_qp_priv *priv = qp->priv;
 
-	vl = sc_to_vlt(dd, sc5);
-	if (vl >= ppd->vls_supported && vl != 15)
-		return NULL;
-	return dd->vld[vl].sc;
+	if (iowait_pio_dec(&priv->s_iowait))
+		iowait_drain_wakeup(&priv->s_iowait);
 }
 
-int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc)
 {
-	struct ahg_ib_header *ahdr = qp->s_hdr;
+	struct hfi1_qp_priv *priv = qp->priv;
 	u32 hdrwords = qp->s_hdrwords;
-	struct hfi1_sge_state *ss = qp->s_cur_sge;
+	struct rvt_sge_state *ss = qp->s_cur_sge;
 	u32 len = qp->s_cur_size;
 	u32 dwords = (len + 3) >> 2;
 	u32 plen = hdrwords + dwords + 2; /* includes pbc */
 	struct hfi1_pportdata *ppd = ps->ppd;
-	u32 *hdr = (u32 *)&ahdr->ibh;
+	u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
 	u64 pbc_flags = 0;
-	u32 sc5;
+	u8 sc5;
 	unsigned long flags = 0;
 	struct send_context *sc;
 	struct pio_buf *pbuf;
 	int wc_status = IB_WC_SUCCESS;
+	int ret = 0;
+	pio_release_cb cb = NULL;
+
+	/* only RC/UC use complete */
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+		cb = verbs_pio_complete;
+		break;
+	default:
+		break;
+	}
 
 	/* vl15 special case taken care of in ud.c */
-	sc5 = qp->s_sc;
-	sc = qp_to_send_context(qp, sc5);
+	sc5 = priv->s_sc;
+	sc = ps->s_txreq->psc;
 
-	if (!sc)
-		return -EINVAL;
 	if (likely(pbc == 0)) {
-		u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+		u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
 		/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
 		pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
 		pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
 	}
-	pbuf = sc_buffer_alloc(sc, plen, NULL, NULL);
-	if (unlikely(pbuf == NULL)) {
+	if (cb)
+		iowait_pio_inc(&priv->s_iowait);
+	pbuf = sc_buffer_alloc(sc, plen, cb, qp);
+	if (unlikely(!pbuf)) {
+		if (cb)
+			verbs_pio_complete(qp, 0);
 		if (ppd->host_link_state != HLS_UP_ACTIVE) {
 			/*
 			 * If we have filled the PIO buffers to capacity and are
@@ -1174,7 +1038,12 @@ int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 			 * so lets continue to queue the request.
 			 */
 			hfi1_cdbg(PIO, "alloc failed. state active, queuing");
-			return no_bufs_available(qp, sc);
+			ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO);
+			if (!ret)
+				/* txreq not queued - free */
+				goto bail;
+			/* tx consumed in wait */
+			return ret;
 		}
 	}
 
@@ -1182,7 +1051,7 @@ int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 		pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords);
 	} else {
 		if (ss) {
-			seg_pio_copy_start(pbuf, pbc, hdr, hdrwords*4);
+			seg_pio_copy_start(pbuf, pbc, hdr, hdrwords * 4);
 			while (len) {
 				void *addr = ss->sge.vaddr;
 				u32 slen = ss->sge.length;
@@ -1197,12 +1066,8 @@ int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
 		}
 	}
 
-	trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh);
-
-	if (qp->s_rdma_mr) {
-		hfi1_put_mr(qp->s_rdma_mr);
-		qp->s_rdma_mr = NULL;
-	}
+	trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+			       &ps->s_txreq->phdr.hdr);
 
 pio_bail:
 	if (qp->s_wqe) {
@@ -1211,10 +1076,15 @@ pio_bail:
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
 		spin_lock_irqsave(&qp->s_lock, flags);
-		hfi1_rc_send_complete(qp, &ahdr->ibh);
+		hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 	}
-	return 0;
+
+	ret = 0;
+
+bail:
+	hfi1_put_txreq(ps->s_txreq);
+	return ret;
 }
 
 /*
@@ -1247,13 +1117,14 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
  */
 static inline int egress_pkey_check(struct hfi1_pportdata *ppd,
 				    struct hfi1_ib_header *hdr,
-				    struct hfi1_qp *qp)
+				    struct rvt_qp *qp)
 {
+	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_other_headers *ohdr;
 	struct hfi1_devdata *dd;
 	int i = 0;
 	u16 pkey;
-	u8 lnh, sc5 = qp->s_sc;
+	u8 lnh, sc5 = priv->s_sc;
 
 	if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT))
 		return 0;
@@ -1271,14 +1142,14 @@ static inline int egress_pkey_check(struct hfi1_pportdata *ppd,
 	if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
 		goto bad;
 
-
 	/* Is the pkey = 0x0, or 0x8000? */
 	if ((pkey & PKEY_LOW_15_MASK) == 0)
 		goto bad;
 
 	/* The most likely matching pkey has index qp->s_pkey_index */
 	if (unlikely(!egress_pkey_matches_entry(pkey,
-					ppd->pkeys[qp->s_pkey_index]))) {
+						ppd->pkeys
+						[qp->s_pkey_index]))) {
 		/* no match - try the entire table */
 		for (; i < MAX_PKEY_VALUES; i++) {
 			if (egress_pkey_matches_entry(pkey, ppd->pkeys[i]))
@@ -1302,31 +1173,65 @@ bad:
 }
 
 /**
+ * get_send_routine - choose an egress routine
+ *
+ * Choose an egress routine based on QP type
+ * and size
+ */
+static inline send_routine get_send_routine(struct rvt_qp *qp,
+					    struct verbs_txreq *tx)
+{
+	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct hfi1_ib_header *h = &tx->phdr.hdr;
+
+	if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
+		return dd->process_pio_send;
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_SMI:
+		return dd->process_pio_send;
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		break;
+	case IB_QPT_RC:
+		if (piothreshold &&
+		    qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+		    (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) &&
+		    iowait_sdma_pending(&priv->s_iowait) == 0 &&
+		    !sdma_txreq_built(&tx->txreq))
+			return dd->process_pio_send;
+		break;
+	case IB_QPT_UC:
+		if (piothreshold &&
+		    qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+		    (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) &&
+		    iowait_sdma_pending(&priv->s_iowait) == 0 &&
+		    !sdma_txreq_built(&tx->txreq))
+			return dd->process_pio_send;
+		break;
+	default:
+		break;
+	}
+	return dd->process_dma_send;
+}
+
+/**
  * hfi1_verbs_send - send a packet
  * @qp: the QP to send on
  * @ps: the state of the packet to send
  *
  * Return zero if packet is sent or queued OK.
- * Return non-zero and clear qp->s_flags HFI1_S_BUSY otherwise.
+ * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
  */
-int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps)
+int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
-	struct ahg_ib_header *ahdr = qp->s_hdr;
+	struct hfi1_qp_priv *priv = qp->priv;
+	send_routine sr;
 	int ret;
-	int pio = 0;
-	unsigned long flags = 0;
-
-	/*
-	 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
-	 * can defer SDMA restart until link goes ACTIVE without
-	 * worrying about just how we got there.
-	 */
-	if ((qp->ibqp.qp_type == IB_QPT_SMI) ||
-	    !(dd->flags & HFI1_HAS_SEND_DMA))
-		pio = 1;
 
-	ret = egress_pkey_check(dd->pport, &ahdr->ibh, qp);
+	sr = get_send_routine(qp, ps->s_txreq);
+	ret = egress_pkey_check(dd->pport, &ps->s_txreq->phdr.hdr, qp);
 	if (unlikely(ret)) {
 		/*
 		 * The value we are returning here does not get propagated to
@@ -1336,7 +1241,9 @@ int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps)
 		 * mechanism for handling the errors. So for SDMA we can just
 		 * return.
 		 */
-		if (pio) {
+		if (sr == dd->process_pio_send) {
+			unsigned long flags;
+
 			hfi1_cdbg(PIO, "%s() Failed. Completing with err",
 				  __func__);
 			spin_lock_irqsave(&qp->s_lock, flags);
@@ -1345,71 +1252,57 @@ int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps)
 		}
 		return -EINVAL;
 	}
-
-	if (pio) {
-		ret = dd->process_pio_send(qp, ps, 0);
-	} else {
-#ifdef CONFIG_SDMA_VERBOSITY
-		dd_dev_err(dd, "CONFIG SDMA %s:%d %s()\n",
-			   slashstrip(__FILE__), __LINE__, __func__);
-		dd_dev_err(dd, "SDMA hdrwords = %u, len = %u\n", qp->s_hdrwords,
-			   qp->s_cur_size);
-#endif
-		ret = dd->process_dma_send(qp, ps, 0);
-	}
-
-	return ret;
+	if (sr == dd->process_dma_send && iowait_pio_pending(&priv->s_iowait))
+		return pio_wait(qp,
+				ps->s_txreq->psc,
+				ps,
+				RVT_S_WAIT_PIO_DRAIN);
+	return sr(qp, ps, 0);
 }
 
-static int query_device(struct ib_device *ibdev,
-			struct ib_device_attr *props,
-			struct ib_udata *uhw)
+/**
+ * hfi1_fill_device_attr - Fill in rvt dev info device attributes.
+ * @dd: the device data structure
+ */
+static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
 {
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct hfi1_ibdev *dev = to_idev(ibdev);
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-	memset(props, 0, sizeof(*props));
-
-	props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
-		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
-		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
-		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-
-	props->page_size_cap = PAGE_SIZE;
-	props->vendor_id =
-		dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
-	props->vendor_part_id = dd->pcidev->device;
-	props->hw_ver = dd->minrev;
-	props->sys_image_guid = ib_hfi1_sys_image_guid;
-	props->max_mr_size = ~0ULL;
-	props->max_qp = hfi1_max_qps;
-	props->max_qp_wr = hfi1_max_qp_wrs;
-	props->max_sge = hfi1_max_sges;
-	props->max_sge_rd = hfi1_max_sges;
-	props->max_cq = hfi1_max_cqs;
-	props->max_ah = hfi1_max_ahs;
-	props->max_cqe = hfi1_max_cqes;
-	props->max_mr = dev->lk_table.max;
-	props->max_fmr = dev->lk_table.max;
-	props->max_map_per_fmr = 32767;
-	props->max_pd = hfi1_max_pds;
-	props->max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
-	props->max_qp_init_rd_atom = 255;
-	/* props->max_res_rd_atom */
-	props->max_srq = hfi1_max_srqs;
-	props->max_srq_wr = hfi1_max_srq_wrs;
-	props->max_srq_sge = hfi1_max_srq_sges;
-	/* props->local_ca_ack_delay */
-	props->atomic_cap = IB_ATOMIC_GLOB;
-	props->max_pkeys = hfi1_get_npkeys(dd);
-	props->max_mcast_grp = hfi1_max_mcast_grps;
-	props->max_mcast_qp_attach = hfi1_max_mcast_qp_attached;
-	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
-		props->max_mcast_grp;
-
-	return 0;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+
+	memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
+
+	rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+			IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+			IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+			IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+	rdi->dparms.props.page_size_cap = PAGE_SIZE;
+	rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
+	rdi->dparms.props.vendor_part_id = dd->pcidev->device;
+	rdi->dparms.props.hw_ver = dd->minrev;
+	rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid;
+	rdi->dparms.props.max_mr_size = ~0ULL;
+	rdi->dparms.props.max_qp = hfi1_max_qps;
+	rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
+	rdi->dparms.props.max_sge = hfi1_max_sges;
+	rdi->dparms.props.max_sge_rd = hfi1_max_sges;
+	rdi->dparms.props.max_cq = hfi1_max_cqs;
+	rdi->dparms.props.max_ah = hfi1_max_ahs;
+	rdi->dparms.props.max_cqe = hfi1_max_cqes;
+	rdi->dparms.props.max_mr = rdi->lkey_table.max;
+	rdi->dparms.props.max_fmr = rdi->lkey_table.max;
+	rdi->dparms.props.max_map_per_fmr = 32767;
+	rdi->dparms.props.max_pd = hfi1_max_pds;
+	rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
+	rdi->dparms.props.max_qp_init_rd_atom = 255;
+	rdi->dparms.props.max_srq = hfi1_max_srqs;
+	rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs;
+	rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges;
+	rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
+	rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd);
+	rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps;
+	rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached;
+	rdi->dparms.props.max_total_mcast_qp_attach =
+					rdi->dparms.props.max_mcast_qp_attach *
+					rdi->dparms.props.max_mcast_grp;
 }
 
 static inline u16 opa_speed_to_ib(u16 in)
@@ -1443,33 +1336,24 @@ static inline u16 opa_width_to_ib(u16 in)
 	}
 }
 
-static int query_port(struct ib_device *ibdev, u8 port,
+static int query_port(struct rvt_dev_info *rdi, u8 port_num,
 		      struct ib_port_attr *props)
 {
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	struct hfi1_ibport *ibp = to_iport(ibdev, port);
-	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+	struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
 	u16 lid = ppd->lid;
 
-	memset(props, 0, sizeof(*props));
 	props->lid = lid ? lid : 0;
 	props->lmc = ppd->lmc;
-	props->sm_lid = ibp->sm_lid;
-	props->sm_sl = ibp->sm_sl;
 	/* OPA logical states match IB logical states */
 	props->state = driver_lstate(ppd);
 	props->phys_state = hfi1_ibphys_portstate(ppd);
-	props->port_cap_flags = ibp->port_cap_flags;
 	props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
-	props->max_msg_sz = 0x80000000;
-	props->pkey_tbl_len = hfi1_get_npkeys(dd);
-	props->bad_pkey_cntr = ibp->pkey_violations;
-	props->qkey_viol_cntr = ibp->qkey_violations;
 	props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
 	/* see rate_show() in ib core/sysfs.c */
 	props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
 	props->max_vl_num = ppd->vls_supported;
-	props->init_type_reply = 0;
 
 	/* Once we are a "first class" citizen and have added the OPA MTUs to
 	 * the core we can advertise the larger MTU enum to the ULPs, for now
@@ -1483,27 +1367,6 @@ static int query_port(struct ib_device *ibdev, u8 port,
 				      4096 : hfi1_max_mtu), IB_MTU_4096);
 	props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
 		mtu_to_enum(ppd->ibmtu, IB_MTU_2048);
-	props->subnet_timeout = ibp->subnet_timeout;
-
-	return 0;
-}
-
-static int port_immutable(struct ib_device *ibdev, u8 port_num,
-			  struct ib_port_immutable *immutable)
-{
-	struct ib_port_attr attr;
-	int err;
-
-	err = query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
-
-	memset(immutable, 0, sizeof(*immutable));
-
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_INTEL_OPA;
-	immutable->max_mad_size = OPA_MGMT_MAD_SIZE;
 
 	return 0;
 }
@@ -1547,102 +1410,31 @@ bail:
 	return ret;
 }
 
-static int modify_port(struct ib_device *ibdev, u8 port,
-		       int port_modify_mask, struct ib_port_modify *props)
-{
-	struct hfi1_ibport *ibp = to_iport(ibdev, port);
-	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-	int ret = 0;
-
-	ibp->port_cap_flags |= props->set_port_cap_mask;
-	ibp->port_cap_flags &= ~props->clr_port_cap_mask;
-	if (props->set_port_cap_mask || props->clr_port_cap_mask)
-		hfi1_cap_mask_chg(ibp);
-	if (port_modify_mask & IB_PORT_SHUTDOWN) {
-		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
-		  OPA_LINKDOWN_REASON_UNKNOWN);
-		ret = set_link_state(ppd, HLS_DN_DOWNDEF);
-	}
-	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
-		ibp->qkey_violations = 0;
-	return ret;
-}
-
-static int query_gid(struct ib_device *ibdev, u8 port,
-		     int index, union ib_gid *gid)
+static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
 {
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	int ret = 0;
-
-	if (!port || port > dd->num_pports)
-		ret = -EINVAL;
-	else {
-		struct hfi1_ibport *ibp = to_iport(ibdev, port);
-		struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-
-		gid->global.subnet_prefix = ibp->gid_prefix;
-		if (index == 0)
-			gid->global.interface_id = cpu_to_be64(ppd->guid);
-		else if (index < HFI1_GUIDS_PER_PORT)
-			gid->global.interface_id = ibp->guids[index - 1];
-		else
-			ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-static struct ib_pd *alloc_pd(struct ib_device *ibdev,
-			      struct ib_ucontext *context,
-			      struct ib_udata *udata)
-{
-	struct hfi1_ibdev *dev = to_idev(ibdev);
-	struct hfi1_pd *pd;
-	struct ib_pd *ret;
-
-	/*
-	 * This is actually totally arbitrary.  Some correctness tests
-	 * assume there's a maximum number of PDs that can be allocated.
-	 * We don't actually have this limit, but we fail the test if
-	 * we allow allocations of more than we report for this value.
-	 */
-
-	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock(&dev->n_pds_lock);
-	if (dev->n_pds_allocated == hfi1_max_pds) {
-		spin_unlock(&dev->n_pds_lock);
-		kfree(pd);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_pds_allocated++;
-	spin_unlock(&dev->n_pds_lock);
-
-	/* ib_alloc_pd() will initialize pd->ibpd. */
-	pd->user = udata != NULL;
-
-	ret = &pd->ibpd;
+	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+	struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
+	int ret;
 
-bail:
+	set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
+			     OPA_LINKDOWN_REASON_UNKNOWN);
+	ret = set_link_state(ppd, HLS_DN_DOWNDEF);
 	return ret;
 }
 
-static int dealloc_pd(struct ib_pd *ibpd)
+static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
+			    int guid_index, __be64 *guid)
 {
-	struct hfi1_pd *pd = to_ipd(ibpd);
-	struct hfi1_ibdev *dev = to_idev(ibpd->device);
-
-	spin_lock(&dev->n_pds_lock);
-	dev->n_pds_allocated--;
-	spin_unlock(&dev->n_pds_lock);
+	struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp);
+	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 
-	kfree(pd);
+	if (guid_index == 0)
+		*guid = cpu_to_be64(ppd->guid);
+	else if (guid_index < HFI1_GUIDS_PER_PORT)
+		*guid = ibp->guids[guid_index - 1];
+	else
+		return -EINVAL;
 
 	return 0;
 }
@@ -1657,101 +1449,57 @@ u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah)
 	return ibp->sl_to_sc[ah->sl];
 }
 
-int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
 {
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
 	struct hfi1_devdata *dd;
 	u8 sc5;
 
-	/* A multicast address requires a GRH (see ch. 8.4.1). */
-	if (ah_attr->dlid >= HFI1_MULTICAST_LID_BASE &&
-	    ah_attr->dlid != HFI1_PERMISSIVE_LID &&
-	    !(ah_attr->ah_flags & IB_AH_GRH))
-		goto bail;
-	if ((ah_attr->ah_flags & IB_AH_GRH) &&
-	    ah_attr->grh.sgid_index >= HFI1_GUIDS_PER_PORT)
-		goto bail;
-	if (ah_attr->dlid == 0)
-		goto bail;
-	if (ah_attr->port_num < 1 ||
-	    ah_attr->port_num > ibdev->phys_port_cnt)
-		goto bail;
-	if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
-	    ib_rate_to_mbps(ah_attr->static_rate) < 0)
-		goto bail;
-	if (ah_attr->sl >= OPA_MAX_SLS)
-		goto bail;
 	/* test the mapping for validity */
 	ibp = to_iport(ibdev, ah_attr->port_num);
 	ppd = ppd_from_ibp(ibp);
 	sc5 = ibp->sl_to_sc[ah_attr->sl];
 	dd = dd_from_ppd(ppd);
 	if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
-		goto bail;
+		return -EINVAL;
 	return 0;
-bail:
-	return -EINVAL;
 }
 
-/**
- * create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *create_ah(struct ib_pd *pd,
-			       struct ib_ah_attr *ah_attr)
+static void hfi1_notify_new_ah(struct ib_device *ibdev,
+			       struct ib_ah_attr *ah_attr,
+			       struct rvt_ah *ah)
 {
-	struct hfi1_ah *ah;
-	struct ib_ah *ret;
-	struct hfi1_ibdev *dev = to_idev(pd->device);
-	unsigned long flags;
-
-	if (hfi1_check_ah(pd->device, ah_attr)) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
-	if (!ah) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	if (dev->n_ahs_allocated == hfi1_max_ahs) {
-		spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-		kfree(ah);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_ahs_allocated++;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	/* ib_create_ah() will initialize ah->ibah. */
-	ah->attr = *ah_attr;
-	atomic_set(&ah->refcount, 0);
+	struct hfi1_ibport *ibp;
+	struct hfi1_pportdata *ppd;
+	struct hfi1_devdata *dd;
+	u8 sc5;
 
-	ret = &ah->ibah;
+	/*
+	 * Do not trust reading anything from rvt_ah at this point as it is not
+	 * done being setup. We can however modify things which we need to set.
+	 */
 
-bail:
-	return ret;
+	ibp = to_iport(ibdev, ah_attr->port_num);
+	ppd = ppd_from_ibp(ibp);
+	sc5 = ibp->sl_to_sc[ah->attr.sl];
+	dd = dd_from_ppd(ppd);
+	ah->vl = sc_to_vlt(dd, sc5);
+	if (ah->vl < num_vls || ah->vl == 15)
+		ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu);
 }
 
 struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
 {
 	struct ib_ah_attr attr;
 	struct ib_ah *ah = ERR_PTR(-EINVAL);
-	struct hfi1_qp *qp0;
+	struct rvt_qp *qp0;
 
 	memset(&attr, 0, sizeof(attr));
 	attr.dlid = dlid;
 	attr.port_num = ppd_from_ibp(ibp)->port;
 	rcu_read_lock();
-	qp0 = rcu_dereference(ibp->qp[0]);
+	qp0 = rcu_dereference(ibp->rvp.qp[0]);
 	if (qp0)
 		ah = ib_create_ah(qp0->ibqp.pd, &attr);
 	rcu_read_unlock();
@@ -1759,51 +1507,6 @@ struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
 }
 
 /**
- * destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int destroy_ah(struct ib_ah *ibah)
-{
-	struct hfi1_ibdev *dev = to_idev(ibah->device);
-	struct hfi1_ah *ah = to_iah(ibah);
-	unsigned long flags;
-
-	if (atomic_read(&ah->refcount) != 0)
-		return -EBUSY;
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	dev->n_ahs_allocated--;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	kfree(ah);
-
-	return 0;
-}
-
-static int modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct hfi1_ah *ah = to_iah(ibah);
-
-	if (hfi1_check_ah(ibah->device, ah_attr))
-		return -EINVAL;
-
-	ah->attr = *ah_attr;
-
-	return 0;
-}
-
-static int query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct hfi1_ah *ah = to_iah(ibah);
-
-	*ah_attr = ah->attr;
-
-	return 0;
-}
-
-/**
  * hfi1_get_npkeys - return the size of the PKEY table for context 0
  * @dd: the hfi1_ib device
  */
@@ -1812,54 +1515,6 @@ unsigned hfi1_get_npkeys(struct hfi1_devdata *dd)
 	return ARRAY_SIZE(dd->pport[0].pkeys);
 }
 
-static int query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-		      u16 *pkey)
-{
-	struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
-	int ret;
-
-	if (index >= hfi1_get_npkeys(dd)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	*pkey = hfi1_get_pkey(to_iport(ibdev, port), index);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the driver
- */
-
-static struct ib_ucontext *alloc_ucontext(struct ib_device *ibdev,
-					  struct ib_udata *udata)
-{
-	struct hfi1_ucontext *context;
-	struct ib_ucontext *ret;
-
-	context = kmalloc(sizeof(*context), GFP_KERNEL);
-	if (!context) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	ret = &context->ibucontext;
-
-bail:
-	return ret;
-}
-
-static int dealloc_ucontext(struct ib_ucontext *context)
-{
-	kfree(to_iucontext(context));
-	return 0;
-}
-
 static void init_ibport(struct hfi1_pportdata *ppd)
 {
 	struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -1871,28 +1526,21 @@ static void init_ibport(struct hfi1_pportdata *ppd)
 		ibp->sc_to_sl[i] = i;
 	}
 
-	spin_lock_init(&ibp->lock);
+	spin_lock_init(&ibp->rvp.lock);
 	/* Set the prefix to the default value (see ch. 4.1.1) */
-	ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
-	ibp->sm_lid = 0;
+	ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
+	ibp->rvp.sm_lid = 0;
 	/* Below should only set bits defined in OPA PortInfo.CapabilityMask */
-	ibp->port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
+	ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
 		IB_PORT_CAP_MASK_NOTICE_SUP;
-	ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
-	ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
-	ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
-	ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-	ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
-
-	RCU_INIT_POINTER(ibp->qp[0], NULL);
-	RCU_INIT_POINTER(ibp->qp[1], NULL);
-}
-
-static void verbs_txreq_kmem_cache_ctor(void *obj)
-{
-	struct verbs_txreq *tx = obj;
-
-	memset(tx, 0, sizeof(*tx));
+	ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+	ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+	ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+	ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+	ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+
+	RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
+	RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
 }
 
 /**
@@ -1903,74 +1551,26 @@ static void verbs_txreq_kmem_cache_ctor(void *obj)
 int hfi1_register_ib_device(struct hfi1_devdata *dd)
 {
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
+	struct ib_device *ibdev = &dev->rdi.ibdev;
 	struct hfi1_pportdata *ppd = dd->pport;
-	unsigned i, lk_tab_size;
+	unsigned i;
 	int ret;
 	size_t lcpysz = IB_DEVICE_NAME_MAX;
-	u16 descq_cnt;
-	char buf[TXREQ_NAME_LEN];
-
-	ret = hfi1_qp_init(dev);
-	if (ret)
-		goto err_qp_init;
-
 
 	for (i = 0; i < dd->num_pports; i++)
 		init_ibport(ppd + i);
 
 	/* Only need to initialize non-zero fields. */
-	spin_lock_init(&dev->n_pds_lock);
-	spin_lock_init(&dev->n_ahs_lock);
-	spin_lock_init(&dev->n_cqs_lock);
-	spin_lock_init(&dev->n_qps_lock);
-	spin_lock_init(&dev->n_srqs_lock);
-	spin_lock_init(&dev->n_mcast_grps_lock);
+
 	setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
 
-	/*
-	 * The top hfi1_lkey_table_size bits are used to index the
-	 * table.  The lower 8 bits can be owned by the user (copied from
-	 * the LKEY).  The remaining bits act as a generation number or tag.
-	 */
-	spin_lock_init(&dev->lk_table.lock);
-	dev->lk_table.max = 1 << hfi1_lkey_table_size;
-	/* ensure generation is at least 4 bits (keys.c) */
-	if (hfi1_lkey_table_size > MAX_LKEY_TABLE_BITS) {
-		dd_dev_warn(dd, "lkey bits %u too large, reduced to %u\n",
-			      hfi1_lkey_table_size, MAX_LKEY_TABLE_BITS);
-		hfi1_lkey_table_size = MAX_LKEY_TABLE_BITS;
-	}
-	lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
-	dev->lk_table.table = (struct hfi1_mregion __rcu **)
-		vmalloc(lk_tab_size);
-	if (dev->lk_table.table == NULL) {
-		ret = -ENOMEM;
-		goto err_lk;
-	}
-	RCU_INIT_POINTER(dev->dma_mr, NULL);
-	for (i = 0; i < dev->lk_table.max; i++)
-		RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
-	INIT_LIST_HEAD(&dev->pending_mmaps);
-	spin_lock_init(&dev->pending_lock);
 	seqlock_init(&dev->iowait_lock);
-	dev->mmap_offset = PAGE_SIZE;
-	spin_lock_init(&dev->mmap_offset_lock);
 	INIT_LIST_HEAD(&dev->txwait);
 	INIT_LIST_HEAD(&dev->memwait);
 
-	descq_cnt = sdma_get_descq_cnt();
-
-	snprintf(buf, sizeof(buf), "hfi1_%u_vtxreq_cache", dd->unit);
-	/* SLAB_HWCACHE_ALIGN for AHG */
-	dev->verbs_txreq_cache = kmem_cache_create(buf,
-						   sizeof(struct verbs_txreq),
-						   0, SLAB_HWCACHE_ALIGN,
-						   verbs_txreq_kmem_cache_ctor);
-	if (!dev->verbs_txreq_cache) {
-		ret = -ENOMEM;
+	ret = verbs_txreq_init(dev);
+	if (ret)
 		goto err_verbs_txreq;
-	}
 
 	/*
 	 * The system image GUID is supposed to be the same for all
@@ -1983,142 +1583,119 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
 	strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
 	ibdev->owner = THIS_MODULE;
 	ibdev->node_guid = cpu_to_be64(ppd->guid);
-	ibdev->uverbs_abi_ver = HFI1_UVERBS_ABI_VERSION;
-	ibdev->uverbs_cmd_mask =
-		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-		(1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_AH)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
-		(1ull << IB_USER_VERBS_CMD_REG_MR)              |
-		(1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-		(1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
-		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
-		(1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-		(1ull << IB_USER_VERBS_CMD_POST_SEND)           |
-		(1ull << IB_USER_VERBS_CMD_POST_RECV)           |
-		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
-		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	ibdev->node_type = RDMA_NODE_IB_CA;
 	ibdev->phys_port_cnt = dd->num_pports;
-	ibdev->num_comp_vectors = 1;
 	ibdev->dma_device = &dd->pcidev->dev;
-	ibdev->query_device = query_device;
 	ibdev->modify_device = modify_device;
-	ibdev->query_port = query_port;
-	ibdev->modify_port = modify_port;
-	ibdev->query_pkey = query_pkey;
-	ibdev->query_gid = query_gid;
-	ibdev->alloc_ucontext = alloc_ucontext;
-	ibdev->dealloc_ucontext = dealloc_ucontext;
-	ibdev->alloc_pd = alloc_pd;
-	ibdev->dealloc_pd = dealloc_pd;
-	ibdev->create_ah = create_ah;
-	ibdev->destroy_ah = destroy_ah;
-	ibdev->modify_ah = modify_ah;
-	ibdev->query_ah = query_ah;
-	ibdev->create_srq = hfi1_create_srq;
-	ibdev->modify_srq = hfi1_modify_srq;
-	ibdev->query_srq = hfi1_query_srq;
-	ibdev->destroy_srq = hfi1_destroy_srq;
-	ibdev->create_qp = hfi1_create_qp;
-	ibdev->modify_qp = hfi1_modify_qp;
-	ibdev->query_qp = hfi1_query_qp;
-	ibdev->destroy_qp = hfi1_destroy_qp;
-	ibdev->post_send = post_send;
-	ibdev->post_recv = post_receive;
-	ibdev->post_srq_recv = hfi1_post_srq_receive;
-	ibdev->create_cq = hfi1_create_cq;
-	ibdev->destroy_cq = hfi1_destroy_cq;
-	ibdev->resize_cq = hfi1_resize_cq;
-	ibdev->poll_cq = hfi1_poll_cq;
-	ibdev->req_notify_cq = hfi1_req_notify_cq;
-	ibdev->get_dma_mr = hfi1_get_dma_mr;
-	ibdev->reg_user_mr = hfi1_reg_user_mr;
-	ibdev->dereg_mr = hfi1_dereg_mr;
-	ibdev->alloc_mr = hfi1_alloc_mr;
-	ibdev->alloc_fmr = hfi1_alloc_fmr;
-	ibdev->map_phys_fmr = hfi1_map_phys_fmr;
-	ibdev->unmap_fmr = hfi1_unmap_fmr;
-	ibdev->dealloc_fmr = hfi1_dealloc_fmr;
-	ibdev->attach_mcast = hfi1_multicast_attach;
-	ibdev->detach_mcast = hfi1_multicast_detach;
+
+	/* keep process mad in the driver */
 	ibdev->process_mad = hfi1_process_mad;
-	ibdev->mmap = hfi1_mmap;
-	ibdev->dma_ops = &hfi1_dma_mapping_ops;
-	ibdev->get_port_immutable = port_immutable;
 
 	strncpy(ibdev->node_desc, init_utsname()->nodename,
 		sizeof(ibdev->node_desc));
 
-	ret = ib_register_device(ibdev, hfi1_create_port_files);
-	if (ret)
-		goto err_reg;
-
-	ret = hfi1_create_agents(dev);
+	/*
+	 * Fill in rvt info object.
+	 */
+	dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
+	dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name;
+	dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
+	dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
+	dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
+	dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be;
+	dd->verbs_dev.rdi.driver_f.query_port_state = query_port;
+	dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port;
+	dd->verbs_dev.rdi.driver_f.cap_mask_chg = hfi1_cap_mask_chg;
+	/*
+	 * Fill in rvt info device attributes.
+	 */
+	hfi1_fill_device_attr(dd);
+
+	/* queue pair */
+	dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size;
+	dd->verbs_dev.rdi.dparms.qpn_start = 0;
+	dd->verbs_dev.rdi.dparms.qpn_inc = 1;
+	dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
+	dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
+	dd->verbs_dev.rdi.dparms.qpn_res_end =
+	dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
+	dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
+	dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
+	dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;
+	dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK;
+	dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA;
+	dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE;
+
+	dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc;
+	dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
+	dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
+	dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
+	dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
+	dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
+	dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
+	dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
+	dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
+	dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters;
+	dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue;
+	dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp;
+	dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp;
+	dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp;
+	dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
+	dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
+	dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
+	dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+
+	/* completeion queue */
+	snprintf(dd->verbs_dev.rdi.dparms.cq_name,
+		 sizeof(dd->verbs_dev.rdi.dparms.cq_name),
+		 "hfi1_cq%d", dd->unit);
+	dd->verbs_dev.rdi.dparms.node = dd->node;
+
+	/* misc settings */
+	dd->verbs_dev.rdi.flags = 0; /* Let rdmavt handle it all */
+	dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
+	dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
+	dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
+
+	ppd = dd->pport;
+	for (i = 0; i < dd->num_pports; i++, ppd++)
+		rvt_init_port(&dd->verbs_dev.rdi,
+			      &ppd->ibport_data.rvp,
+			      i,
+			      ppd->pkeys);
+
+	ret = rvt_register_device(&dd->verbs_dev.rdi);
 	if (ret)
-		goto err_agents;
+		goto err_verbs_txreq;
 
 	ret = hfi1_verbs_register_sysfs(dd);
 	if (ret)
 		goto err_class;
 
-	goto bail;
+	return ret;
 
 err_class:
-	hfi1_free_agents(dev);
-err_agents:
-	ib_unregister_device(ibdev);
-err_reg:
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 err_verbs_txreq:
-	kmem_cache_destroy(dev->verbs_txreq_cache);
-	vfree(dev->lk_table.table);
-err_lk:
-	hfi1_qp_exit(dev);
-err_qp_init:
+	verbs_txreq_exit(dev);
 	dd_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-bail:
 	return ret;
 }
 
 void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
 {
 	struct hfi1_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
 
 	hfi1_verbs_unregister_sysfs(dd);
 
-	hfi1_free_agents(dev);
-
-	ib_unregister_device(ibdev);
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 
 	if (!list_empty(&dev->txwait))
 		dd_dev_err(dd, "txwait list not empty!\n");
 	if (!list_empty(&dev->memwait))
 		dd_dev_err(dd, "memwait list not empty!\n");
-	if (dev->dma_mr)
-		dd_dev_err(dd, "DMA MR not NULL!\n");
 
-	hfi1_qp_exit(dev);
 	del_timer_sync(&dev->mem_timer);
-	kmem_cache_destroy(dev->verbs_txreq_cache);
-	vfree(dev->lk_table.table);
+	verbs_txreq_exit(dev);
 }
 
 void hfi1_cnp_rcv(struct hfi1_packet *packet)
@@ -2126,7 +1703,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
 	struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
 	struct hfi1_ib_header *hdr = packet->hdr;
-	struct hfi1_qp *qp = packet->qp;
+	struct rvt_qp *qp = packet->qp;
 	u32 lqpn, rqpn = 0;
 	u16 rlid = 0;
 	u8 sl, sc5, sc4_bit, svc_type;
@@ -2149,7 +1726,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
 		svc_type = IB_CC_SVCTYPE_UD;
 		break;
 	default:
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		return;
 	}
 
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index 286e468b0479..6c4670fffdbb 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h
@@ -1,12 +1,11 @@
 /*
+ * Copyright(c) 2015, 2016 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -59,9 +56,13 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/completion.h>
+#include <linux/slab.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_mad.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
+#include <rdma/rdmavt_cq.h>
 
 struct hfi1_ctxtdata;
 struct hfi1_pportdata;
@@ -79,12 +80,6 @@ struct hfi1_packet;
  */
 #define HFI1_UVERBS_ABI_VERSION       2
 
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE      (IB_CQ_NEXT_COMP + 1)
-
 #define IB_SEQ_NAK	(3 << 29)
 
 /* AETH NAK opcode values */
@@ -95,17 +90,6 @@ struct hfi1_packet;
 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
 #define IB_NAK_INVALID_RD_REQUEST       0x64
 
-/* Flags for checking QP state (see ib_hfi1_state_ops[]) */
-#define HFI1_POST_SEND_OK                0x01
-#define HFI1_POST_RECV_OK                0x02
-#define HFI1_PROCESS_RECV_OK             0x04
-#define HFI1_PROCESS_SEND_OK             0x08
-#define HFI1_PROCESS_NEXT_SEND_OK        0x10
-#define HFI1_FLUSH_SEND			0x20
-#define HFI1_FLUSH_RECV			0x40
-#define HFI1_PROCESS_OR_FLUSH_SEND \
-	(HFI1_PROCESS_SEND_OK | HFI1_FLUSH_SEND)
-
 /* IB Performance Manager status values */
 #define IB_PMA_SAMPLE_STATUS_DONE       0x00
 #define IB_PMA_SAMPLE_STATUS_STARTED    0x01
@@ -208,341 +192,18 @@ struct hfi1_pio_header {
 } __packed;
 
 /*
- * used for force cacheline alignment for AHG
- */
-struct tx_pio_header {
-	struct hfi1_pio_header phdr;
-} ____cacheline_aligned;
-
-/*
- * There is one struct hfi1_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct hfi1_mcast_qp.
- */
-struct hfi1_mcast_qp {
-	struct list_head list;
-	struct hfi1_qp *qp;
-};
-
-struct hfi1_mcast {
-	struct rb_node rb_node;
-	union ib_gid mgid;
-	struct list_head qp_list;
-	wait_queue_head_t wait;
-	atomic_t refcount;
-	int n_attached;
-};
-
-/* Protection domain */
-struct hfi1_pd {
-	struct ib_pd ibpd;
-	int user;               /* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct hfi1_ah {
-	struct ib_ah ibah;
-	struct ib_ah_attr attr;
-	atomic_t refcount;
-};
-
-/*
- * This structure is used by hfi1_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct hfi1_mmap_info {
-	struct list_head pending_mmaps;
-	struct ib_ucontext *context;
-	void *obj;
-	__u64 offset;
-	struct kref ref;
-	unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct hfi1_cq_wc {
-	u32 head;               /* index of next entry to fill */
-	u32 tail;               /* index of next ib_poll_cq() entry */
-	union {
-		/* these are actually size ibcq.cqe + 1 */
-		struct ib_uverbs_wc uqueue[0];
-		struct ib_wc kqueue[0];
-	};
-};
-
-/*
- * The completion queue structure.
- */
-struct hfi1_cq {
-	struct ib_cq ibcq;
-	struct kthread_work comptask;
-	struct hfi1_devdata *dd;
-	spinlock_t lock; /* protect changes in this struct */
-	u8 notify;
-	u8 triggered;
-	struct hfi1_cq_wc *queue;
-	struct hfi1_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * Used by the verbs layer.
- */
-struct hfi1_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of hfi1_segs that fit in a page. */
-#define HFI1_SEGSZ     (PAGE_SIZE / sizeof(struct hfi1_seg))
-
-struct hfi1_segarray {
-	struct hfi1_seg segs[HFI1_SEGSZ];
-};
-
-struct hfi1_mregion {
-	struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
-	u64 user_base;          /* User's address for this region */
-	u64 iova;               /* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;             /* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;           /* number of hfi1_segs in all the arrays */
-	u32 mapsz;              /* size of the map array */
-	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
-	u8  lkey_published;     /* in global table */
-	struct completion comp; /* complete when refcount goes to zero */
-	atomic_t refcount;
-	struct hfi1_segarray *map[0];    /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct hfi1_sge {
-	struct hfi1_mregion *mr;
-	void *vaddr;            /* kernel virtual address of segment */
-	u32 sge_length;         /* length of the SGE */
-	u32 length;             /* remaining length of the segment */
-	u16 m;                  /* current index: mr->map[m] */
-	u16 n;                  /* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct hfi1_mr {
-	struct ib_mr ibmr;
-	struct ib_umem *umem;
-	struct hfi1_mregion mr;  /* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct hfi1_swqe {
-	union {
-		struct ib_send_wr wr;   /* don't use wr.sg_list */
-		struct ib_rdma_wr rdma_wr;
-		struct ib_atomic_wr atomic_wr;
-		struct ib_ud_wr ud_wr;
-	};
-	u32 psn;                /* first packet sequence number */
-	u32 lpsn;               /* last packet sequence number */
-	u32 ssn;                /* send sequence number */
-	u32 length;             /* total length of data in sg_list */
-	struct hfi1_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct hfi1_rwqe {
-	u64 wr_id;
-	u8 num_sge;
-	struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
+ * hfi1 specific data structures that will be hidden from rvt after the queue
+ * pair is made common
  */
-struct hfi1_rwq {
-	u32 head;               /* new work requests posted to the head */
-	u32 tail;               /* receives pull requests from here. */
-	struct hfi1_rwqe wq[0];
-};
-
-struct hfi1_rq {
-	struct hfi1_rwq *wq;
-	u32 size;               /* size of RWQE array */
-	u8 max_sge;
-	/* protect changes in this struct */
-	spinlock_t lock ____cacheline_aligned_in_smp;
-};
-
-struct hfi1_srq {
-	struct ib_srq ibsrq;
-	struct hfi1_rq rq;
-	struct hfi1_mmap_info *ip;
-	/* send signal when number of RWQEs < limit */
-	u32 limit;
-};
-
-struct hfi1_sge_state {
-	struct hfi1_sge *sg_list;      /* next SGE to be used if any */
-	struct hfi1_sge sge;   /* progress state for the current SGE */
-	u32 total_len;
-	u8 num_sge;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct hfi1_ack_entry {
-	u8 opcode;
-	u8 sent;
-	u32 psn;
-	u32 lpsn;
-	union {
-		struct hfi1_sge rdma_sge;
-		u64 atomic_data;
-	};
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct hfi1_qp {
-	struct ib_qp ibqp;
-	/* read mostly fields above and below */
-	struct ib_ah_attr remote_ah_attr;
-	struct ib_ah_attr alt_ah_attr;
-	struct hfi1_qp __rcu *next;           /* link list for QPN hash table */
-	struct hfi1_swqe *s_wq;  /* send work queue */
-	struct hfi1_mmap_info *ip;
-	struct ahg_ib_header *s_hdr;     /* next packet header to send */
-	/* sc for UC/RC QPs - based on ah for UD */
-	u8 s_sc;
-	unsigned long timeout_jiffies;  /* computed from timeout */
-
-	enum ib_mtu path_mtu;
-	int srate_mbps;		/* s_srate (below) converted to Mbit/s */
-	u32 remote_qpn;
-	u32 pmtu;		/* decoded from path_mtu */
-	u32 qkey;               /* QKEY for this QP (for UD or RD) */
-	u32 s_size;             /* send work queue size */
-	u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
-	u32 s_ahgpsn;           /* set to the psn in the copy of the header */
-
-	u8 state;               /* QP state */
-	u8 allowed_ops;		/* high order bits of allowed opcodes */
-	u8 qp_access_flags;
-	u8 alt_timeout;         /* Alternate path timeout for this QP */
-	u8 timeout;             /* Timeout for this QP */
-	u8 s_srate;
-	u8 s_mig_state;
-	u8 port_num;
-	u8 s_pkey_index;        /* PKEY index to use */
-	u8 s_alt_pkey_index;    /* Alternate path PKEY index to use */
-	u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
-	u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
-	u8 s_retry_cnt;         /* number of times to retry */
-	u8 s_rnr_retry_cnt;
-	u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
-	u8 s_max_sge;           /* size of s_wq->sg_list */
-	u8 s_draining;
-
-	/* start of read/write fields */
-	atomic_t refcount ____cacheline_aligned_in_smp;
-	wait_queue_head_t wait;
-
-
-	struct hfi1_ack_entry s_ack_queue[HFI1_MAX_RDMA_ATOMIC + 1]
-		____cacheline_aligned_in_smp;
-	struct hfi1_sge_state s_rdma_read_sge;
-
-	spinlock_t r_lock ____cacheline_aligned_in_smp;      /* used for APM */
-	unsigned long r_aflags;
-	u64 r_wr_id;            /* ID for current receive WQE */
-	u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
-	u32 r_len;              /* total length of r_sge */
-	u32 r_rcv_len;          /* receive data len processed */
-	u32 r_psn;              /* expected rcv packet sequence number */
-	u32 r_msn;              /* message sequence number */
-
-	u8 r_adefered;         /* number of acks defered */
-	u8 r_state;             /* opcode of last packet received */
-	u8 r_flags;
-	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
-
-	struct list_head rspwait;       /* link for waiting to respond */
-
-	struct hfi1_sge_state r_sge;     /* current receive data */
-	struct hfi1_rq r_rq;             /* receive work queue */
-
-	spinlock_t s_lock ____cacheline_aligned_in_smp;
-	struct hfi1_sge_state *s_cur_sge;
-	u32 s_flags;
-	struct hfi1_swqe *s_wqe;
-	struct hfi1_sge_state s_sge;     /* current send request data */
-	struct hfi1_mregion *s_rdma_mr;
-	struct sdma_engine *s_sde; /* current sde */
-	u32 s_cur_size;         /* size of send packet in bytes */
-	u32 s_len;              /* total length of s_sge */
-	u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
-	u32 s_next_psn;         /* PSN for next request */
-	u32 s_last_psn;         /* last response PSN processed */
-	u32 s_sending_psn;      /* lowest PSN that is being sent */
-	u32 s_sending_hpsn;     /* highest PSN that is being sent */
-	u32 s_psn;              /* current packet sequence number */
-	u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
-	u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
-	u32 s_head;             /* new entries added here */
-	u32 s_tail;             /* next entry to process */
-	u32 s_cur;              /* current work queue entry */
-	u32 s_acked;            /* last un-ACK'ed entry */
-	u32 s_last;             /* last completed entry */
-	u32 s_ssn;              /* SSN of tail entry */
-	u32 s_lsn;              /* limit sequence number (credit) */
-	u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
-	u16 s_rdma_ack_cnt;
-	s8 s_ahgidx;
-	u8 s_state;             /* opcode of last packet sent */
-	u8 s_ack_state;         /* opcode of packet to ACK */
-	u8 s_nak_state;         /* non-zero if NAK is pending */
-	u8 r_nak_state;         /* non-zero if NAK is pending */
-	u8 s_retry;             /* requester retry counter */
-	u8 s_rnr_retry;         /* requester RNR retry counter */
-	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
-	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
-
-	struct hfi1_sge_state s_ack_rdma_sge;
-	struct timer_list s_timer;
-
+struct hfi1_qp_priv {
+	struct ahg_ib_header *s_hdr;              /* next header to send */
+	struct sdma_engine *s_sde;                /* current sde */
+	struct send_context *s_sendcontext;       /* current sendcontext */
+	u8 s_sc;		                  /* SC[0..4] for next packet */
+	u8 r_adefered;                            /* number of acks defered */
 	struct iowait s_iowait;
-
-	struct hfi1_sge r_sg_list[0] /* verified SGEs */
-		____cacheline_aligned_in_smp;
+	struct timer_list s_rnr_timer;
+	struct rvt_qp *owner;
 };
 
 /*
@@ -553,123 +214,11 @@ struct hfi1_pkt_state {
 	struct hfi1_ibdev *dev;
 	struct hfi1_ibport *ibp;
 	struct hfi1_pportdata *ppd;
+	struct verbs_txreq *s_txreq;
 };
 
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define HFI1_R_WRID_VALID        0
-#define HFI1_R_REWIND_SGE        1
-
-/*
- * Bit definitions for r_flags.
- */
-#define HFI1_R_REUSE_SGE       0x01
-#define HFI1_R_RDMAR_SEQ       0x02
-/* defer ack until end of interrupt session */
-#define HFI1_R_RSP_DEFERED_ACK 0x04
-/* relay ack to send engine */
-#define HFI1_R_RSP_SEND        0x08
-#define HFI1_R_COMM_EST        0x10
-
-/*
- * Bit definitions for s_flags.
- *
- * HFI1_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
- * HFI1_S_BUSY - send tasklet is processing the QP
- * HFI1_S_TIMER - the RC retry timer is active
- * HFI1_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
- * HFI1_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
- *                         before processing the next SWQE
- * HFI1_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
- *                         before processing the next SWQE
- * HFI1_S_WAIT_RNR - waiting for RNR timeout
- * HFI1_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * HFI1_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- *                  next send completion entry not via send DMA
- * HFI1_S_WAIT_PIO - waiting for a send buffer to be available
- * HFI1_S_WAIT_TX - waiting for a struct verbs_txreq to be available
- * HFI1_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
- * HFI1_S_WAIT_KMEM - waiting for kernel memory to be available
- * HFI1_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
- * HFI1_S_WAIT_ACK - waiting for an ACK packet before sending more requests
- * HFI1_S_SEND_ONE - send one packet, request ACK, then wait for ACK
- * HFI1_S_ECN - a BECN was queued to the send engine
- */
-#define HFI1_S_SIGNAL_REQ_WR	0x0001
-#define HFI1_S_BUSY		0x0002
-#define HFI1_S_TIMER		0x0004
-#define HFI1_S_RESP_PENDING	0x0008
-#define HFI1_S_ACK_PENDING	0x0010
-#define HFI1_S_WAIT_FENCE	0x0020
-#define HFI1_S_WAIT_RDMAR	0x0040
-#define HFI1_S_WAIT_RNR		0x0080
-#define HFI1_S_WAIT_SSN_CREDIT	0x0100
-#define HFI1_S_WAIT_DMA		0x0200
-#define HFI1_S_WAIT_PIO		0x0400
-#define HFI1_S_WAIT_TX		0x0800
-#define HFI1_S_WAIT_DMA_DESC	0x1000
-#define HFI1_S_WAIT_KMEM		0x2000
-#define HFI1_S_WAIT_PSN		0x4000
-#define HFI1_S_WAIT_ACK		0x8000
-#define HFI1_S_SEND_ONE		0x10000
-#define HFI1_S_UNLIMITED_CREDIT	0x20000
-#define HFI1_S_AHG_VALID		0x40000
-#define HFI1_S_AHG_CLEAR		0x80000
-#define HFI1_S_ECN		0x100000
-
-/*
- * Wait flags that would prevent any packet type from being sent.
- */
-#define HFI1_S_ANY_WAIT_IO (HFI1_S_WAIT_PIO | HFI1_S_WAIT_TX | \
-	HFI1_S_WAIT_DMA_DESC | HFI1_S_WAIT_KMEM)
-
-/*
- * Wait flags that would prevent send work requests from making progress.
- */
-#define HFI1_S_ANY_WAIT_SEND (HFI1_S_WAIT_FENCE | HFI1_S_WAIT_RDMAR | \
-	HFI1_S_WAIT_RNR | HFI1_S_WAIT_SSN_CREDIT | HFI1_S_WAIT_DMA | \
-	HFI1_S_WAIT_PSN | HFI1_S_WAIT_ACK)
-
-#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | HFI1_S_ANY_WAIT_SEND)
-
 #define HFI1_PSN_CREDIT  16
 
-/*
- * Since struct hfi1_swqe is not a fixed size, we can't simply index into
- * struct hfi1_qp.s_wq.  This function does the array index computation.
- */
-static inline struct hfi1_swqe *get_swqe_ptr(struct hfi1_qp *qp,
-					     unsigned n)
-{
-	return (struct hfi1_swqe *)((char *)qp->s_wq +
-				     (sizeof(struct hfi1_swqe) +
-				      qp->s_max_sge *
-				      sizeof(struct hfi1_sge)) * n);
-}
-
-/*
- * Since struct hfi1_rwqe is not a fixed size, we can't simply index into
- * struct hfi1_rwq.wq.  This function does the array index computation.
- */
-static inline struct hfi1_rwqe *get_rwqe_ptr(struct hfi1_rq *rq, unsigned n)
-{
-	return (struct hfi1_rwqe *)
-		((char *) rq->wq->wq +
-		 (sizeof(struct hfi1_rwqe) +
-		  rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-#define MAX_LKEY_TABLE_BITS 23
-
-struct hfi1_lkey_table {
-	spinlock_t lock; /* protect changes in this struct */
-	u32 next;               /* next unused index (speeds search) */
-	u32 gen;                /* generation count */
-	u32 max;                /* size of the table */
-	struct hfi1_mregion __rcu **table;
-};
-
 struct hfi1_opcode_stats {
 	u64 n_packets;          /* number of packets */
 	u64 n_bytes;            /* total number of bytes */
@@ -690,75 +239,20 @@ static inline void inc_opstats(
 }
 
 struct hfi1_ibport {
-	struct hfi1_qp __rcu *qp[2];
-	struct ib_mad_agent *send_agent;	/* agent for SMI (traps) */
-	struct hfi1_ah *sm_ah;
-	struct hfi1_ah *smi_ah;
-	struct rb_root mcast_tree;
-	spinlock_t lock;		/* protect changes in this struct */
-
-	/* non-zero when timer is set */
-	unsigned long mkey_lease_timeout;
-	unsigned long trap_timeout;
-	__be64 gid_prefix;      /* in network order */
-	__be64 mkey;
+	struct rvt_qp __rcu *qp[2];
+	struct rvt_ibport rvp;
+
 	__be64 guids[HFI1_GUIDS_PER_PORT	- 1];	/* writable GUIDs */
-	u64 tid;		/* TID for traps */
-	u64 n_rc_resends;
-	u64 n_seq_naks;
-	u64 n_rdma_seq;
-	u64 n_rnr_naks;
-	u64 n_other_naks;
-	u64 n_loop_pkts;
-	u64 n_pkt_drops;
-	u64 n_vl15_dropped;
-	u64 n_rc_timeouts;
-	u64 n_dmawait;
-	u64 n_unaligned;
-	u64 n_rc_dupreq;
-	u64 n_rc_seqnak;
-
-	/* Hot-path per CPU counters to avoid cacheline trading to update */
-	u64 z_rc_acks;
-	u64 z_rc_qacks;
-	u64 z_rc_delayed_comp;
-	u64 __percpu *rc_acks;
-	u64 __percpu *rc_qacks;
-	u64 __percpu *rc_delayed_comp;
-
-	u32 port_cap_flags;
-	u32 pma_sample_start;
-	u32 pma_sample_interval;
-	__be16 pma_counter_select[5];
-	u16 pma_tag;
-	u16 pkey_violations;
-	u16 qkey_violations;
-	u16 mkey_violations;
-	u16 mkey_lease_period;
-	u16 sm_lid;
-	u16 repress_traps;
-	u8 sm_sl;
-	u8 mkeyprot;
-	u8 subnet_timeout;
-	u8 vl_high_limit;
+
 	/* the first 16 entries are sl_to_vl for !OPA */
 	u8 sl_to_sc[32];
 	u8 sc_to_sl[32];
 };
 
-
-struct hfi1_qp_ibdev;
 struct hfi1_ibdev {
-	struct ib_device ibdev;
-	struct list_head pending_mmaps;
-	spinlock_t mmap_offset_lock; /* protect mmap_offset */
-	u32 mmap_offset;
-	struct hfi1_mregion __rcu *dma_mr;
-
-	struct hfi1_qp_ibdev *qp_dev;
+	struct rvt_dev_info rdi; /* Must be first */
 
 	/* QP numbers are shared by all IB ports */
-	struct hfi1_lkey_table lk_table;
 	/* protect wait lists */
 	seqlock_t iowait_lock;
 	struct list_head txwait;        /* list for wait verbs_txreq */
@@ -767,26 +261,11 @@ struct hfi1_ibdev {
 	struct kmem_cache *verbs_txreq_cache;
 	struct timer_list mem_timer;
 
-	/* other waiters */
-	spinlock_t pending_lock;
-
 	u64 n_piowait;
+	u64 n_piodrain;
 	u64 n_txwait;
 	u64 n_kmem_wait;
-	u64 n_send_schedule;
-
-	u32 n_pds_allocated;    /* number of PDs allocated for device */
-	spinlock_t n_pds_lock;
-	u32 n_ahs_allocated;    /* number of AHs allocated for device */
-	spinlock_t n_ahs_lock;
-	u32 n_cqs_allocated;    /* number of CQs allocated for device */
-	spinlock_t n_cqs_lock;
-	u32 n_qps_allocated;    /* number of QPs allocated for device */
-	spinlock_t n_qps_lock;
-	u32 n_srqs_allocated;   /* number of SRQs allocated for device */
-	spinlock_t n_srqs_lock;
-	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
-	spinlock_t n_mcast_grps_lock;
+
 #ifdef CONFIG_DEBUG_FS
 	/* per HFI debugfs */
 	struct dentry *hfi1_ibdev_dbg;
@@ -795,66 +274,31 @@ struct hfi1_ibdev {
 #endif
 };
 
-struct hfi1_verbs_counters {
-	u64 symbol_error_counter;
-	u64 link_error_recovery_counter;
-	u64 link_downed_counter;
-	u64 port_rcv_errors;
-	u64 port_rcv_remphys_errors;
-	u64 port_xmit_discards;
-	u64 port_xmit_data;
-	u64 port_rcv_data;
-	u64 port_xmit_packets;
-	u64 port_rcv_packets;
-	u32 local_link_integrity_errors;
-	u32 excessive_buffer_overrun_errors;
-	u32 vl15_dropped;
-};
-
-static inline struct hfi1_mr *to_imr(struct ib_mr *ibmr)
-{
-	return container_of(ibmr, struct hfi1_mr, ibmr);
-}
-
-static inline struct hfi1_pd *to_ipd(struct ib_pd *ibpd)
-{
-	return container_of(ibpd, struct hfi1_pd, ibpd);
-}
-
-static inline struct hfi1_ah *to_iah(struct ib_ah *ibah)
-{
-	return container_of(ibah, struct hfi1_ah, ibah);
-}
-
-static inline struct hfi1_cq *to_icq(struct ib_cq *ibcq)
+static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
 {
-	return container_of(ibcq, struct hfi1_cq, ibcq);
-}
+	struct rvt_dev_info *rdi;
 
-static inline struct hfi1_srq *to_isrq(struct ib_srq *ibsrq)
-{
-	return container_of(ibsrq, struct hfi1_srq, ibsrq);
+	rdi = container_of(ibdev, struct rvt_dev_info, ibdev);
+	return container_of(rdi, struct hfi1_ibdev, rdi);
 }
 
-static inline struct hfi1_qp *to_iqp(struct ib_qp *ibqp)
+static inline struct rvt_qp *iowait_to_qp(struct  iowait *s_iowait)
 {
-	return container_of(ibqp, struct hfi1_qp, ibqp);
-}
+	struct hfi1_qp_priv *priv;
 
-static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
-{
-	return container_of(ibdev, struct hfi1_ibdev, ibdev);
+	priv = container_of(s_iowait, struct hfi1_qp_priv, s_iowait);
+	return priv->owner;
 }
 
 /*
  * Send if not busy or waiting for I/O and either
  * a RC response is pending or we can process send work requests.
  */
-static inline int hfi1_send_ok(struct hfi1_qp *qp)
+static inline int hfi1_send_ok(struct rvt_qp *qp)
 {
-	return !(qp->s_flags & (HFI1_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
-		(qp->s_hdrwords || (qp->s_flags & HFI1_S_RESP_PENDING) ||
-		 !(qp->s_flags & HFI1_S_ANY_WAIT_SEND));
+	return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
+		(qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) ||
+		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
 }
 
 /*
@@ -862,7 +306,7 @@ static inline int hfi1_send_ok(struct hfi1_qp *qp)
  */
 void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
 		    u32 qp1, u32 qp2, u16 lid1, u16 lid2);
-void hfi1_cap_mask_chg(struct hfi1_ibport *ibp);
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
 void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
 void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
 int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
@@ -870,8 +314,6 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
 		     const struct ib_mad_hdr *in_mad, size_t in_mad_size,
 		     struct ib_mad_hdr *out_mad, size_t *out_mad_size,
 		     u16 *out_mad_pkey_index);
-int hfi1_create_agents(struct hfi1_ibdev *dev);
-void hfi1_free_agents(struct hfi1_ibdev *dev);
 
 /*
  * The PSN_MASK and PSN_SHIFT allow for
@@ -901,7 +343,7 @@ void hfi1_free_agents(struct hfi1_ibdev *dev);
  */
 static inline int cmp_msn(u32 a, u32 b)
 {
-	return (((int) a) - ((int) b)) << 8;
+	return (((int)a) - ((int)b)) << 8;
 }
 
 /*
@@ -910,7 +352,7 @@ static inline int cmp_msn(u32 a, u32 b)
  */
 static inline int cmp_psn(u32 a, u32 b)
 {
-	return (((int) a) - ((int) b)) << PSN_SHIFT;
+	return (((int)a) - ((int)b)) << PSN_SHIFT;
 }
 
 /*
@@ -929,23 +371,15 @@ static inline u32 delta_psn(u32 a, u32 b)
 	return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
 }
 
-struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid);
-
-int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp);
-
 struct verbs_txreq;
 void hfi1_put_txreq(struct verbs_txreq *tx);
 
-int hfi1_verbs_send(struct hfi1_qp *qp, struct hfi1_pkt_state *ps);
+int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
-void hfi1_copy_sge(struct hfi1_sge_state *ss, void *data, u32 length,
-		   int release);
+void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
+		   int release, int copy_last);
 
-void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release);
+void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
 
 void hfi1_cnp_rcv(struct hfi1_packet *packet);
 
@@ -957,147 +391,75 @@ void hfi1_rc_hdrerr(
 	struct hfi1_ctxtdata *rcd,
 	struct hfi1_ib_header *hdr,
 	u32 rcv_flags,
-	struct hfi1_qp *qp);
+	struct rvt_qp *qp);
 
 u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
 
-int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
-
 struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid);
 
 void hfi1_rc_rnr_retry(unsigned long arg);
+void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to);
+void hfi1_rc_timeout(unsigned long arg);
+void hfi1_del_timers_sync(struct rvt_qp *qp);
+void hfi1_stop_rc_timers(struct rvt_qp *qp);
 
-void hfi1_rc_send_complete(struct hfi1_qp *qp, struct hfi1_ib_header *hdr);
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr);
 
-void hfi1_rc_error(struct hfi1_qp *qp, enum ib_wc_status err);
+void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
 
 void hfi1_ud_rcv(struct hfi1_packet *packet);
 
 int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey);
 
-int hfi1_alloc_lkey(struct hfi1_mregion *mr, int dma_region);
-
-void hfi1_free_lkey(struct hfi1_mregion *mr);
-
-int hfi1_lkey_ok(struct hfi1_lkey_table *rkt, struct hfi1_pd *pd,
-		 struct hfi1_sge *isge, struct ib_sge *sge, int acc);
-
-int hfi1_rkey_ok(struct hfi1_qp *qp, struct hfi1_sge *sge,
-		 u32 len, u64 vaddr, u32 rkey, int acc);
-
-int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr);
-
-struct ib_srq *hfi1_create_srq(struct ib_pd *ibpd,
-			       struct ib_srq_init_attr *srq_init_attr,
-			       struct ib_udata *udata);
-
-int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		    enum ib_srq_attr_mask attr_mask,
-		    struct ib_udata *udata);
-
-int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int hfi1_destroy_srq(struct ib_srq *ibsrq);
-
-int hfi1_cq_init(struct hfi1_devdata *dd);
-
-void hfi1_cq_exit(struct hfi1_devdata *dd);
-
-void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int sig);
-
-int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *hfi1_create_cq(
-	struct ib_device *ibdev,
-	const struct ib_cq_init_attr *attr,
-	struct ib_ucontext *context,
-	struct ib_udata *udata);
-
-int hfi1_destroy_cq(struct ib_cq *ibcq);
-
-int hfi1_req_notify_cq(
-	struct ib_cq *ibcq,
-	enum ib_cq_notify_flags notify_flags);
-
-int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			       u64 virt_addr, int mr_access_flags,
-			       struct ib_udata *udata);
+int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only);
 
-int hfi1_dereg_mr(struct ib_mr *ibmr);
+void hfi1_migrate_qp(struct rvt_qp *qp);
 
-struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
-			    enum ib_mr_type mr_type,
-			    u32 max_entries);
+int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+			 int attr_mask, struct ib_udata *udata);
 
-struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			      struct ib_fmr_attr *fmr_attr);
+void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
+		    int attr_mask, struct ib_udata *udata);
 
-int hfi1_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		      int list_len, u64 iova);
+int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
 
-int hfi1_unmap_fmr(struct list_head *fmr_list);
+extern const u32 rc_only_opcode;
+extern const u32 uc_only_opcode;
 
-int hfi1_dealloc_fmr(struct ib_fmr *ibfmr);
-
-static inline void hfi1_get_mr(struct hfi1_mregion *mr)
-{
-	atomic_inc(&mr->refcount);
-}
-
-static inline void hfi1_put_mr(struct hfi1_mregion *mr)
+static inline u8 get_opcode(struct hfi1_ib_header *h)
 {
-	if (unlikely(atomic_dec_and_test(&mr->refcount)))
-		complete(&mr->comp);
-}
+	u16 lnh = be16_to_cpu(h->lrh[0]) & 3;
 
-static inline void hfi1_put_ss(struct hfi1_sge_state *ss)
-{
-	while (ss->num_sge) {
-		hfi1_put_mr(ss->sge.mr);
-		if (--ss->num_sge)
-			ss->sge = *ss->sg_list++;
-	}
+	if (lnh == IB_LNH_IBA_LOCAL)
+		return be32_to_cpu(h->u.oth.bth[0]) >> 24;
+	else
+		return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
 }
 
-void hfi1_release_mmap_info(struct kref *ref);
-
-struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev, u32 size,
-					     struct ib_ucontext *context,
-					     void *obj);
-
-void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip,
-			   u32 size, void *obj);
-
-int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int hfi1_get_rwqe(struct hfi1_qp *qp, int wr_id_only);
-
 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
-		       int has_grh, struct hfi1_qp *qp, u32 bth0);
+		       int has_grh, struct rvt_qp *qp, u32 bth0);
 
 u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
 		  struct ib_global_route *grh, u32 hwords, u32 nwords);
 
-void hfi1_make_ruc_header(struct hfi1_qp *qp, struct hfi1_other_headers *ohdr,
-			  u32 bth0, u32 bth2, int middle);
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+			  u32 bth0, u32 bth2, int middle,
+			  struct hfi1_pkt_state *ps);
 
-void hfi1_do_send(struct work_struct *work);
+void _hfi1_do_send(struct work_struct *work);
 
-void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
+void hfi1_do_send(struct rvt_qp *qp);
+
+void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 			enum ib_wc_status status);
 
-void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct hfi1_qp *qp, int is_fecn);
+void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct rvt_qp *qp, int is_fecn);
 
-int hfi1_make_rc_req(struct hfi1_qp *qp);
+int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
-int hfi1_make_uc_req(struct hfi1_qp *qp);
+int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
-int hfi1_make_ud_req(struct hfi1_qp *qp);
+int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
 
 int hfi1_register_ib_device(struct hfi1_devdata *);
 
@@ -1107,24 +469,42 @@ void hfi1_ib_rcv(struct hfi1_packet *packet);
 
 unsigned hfi1_get_npkeys(struct hfi1_devdata *);
 
-int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc);
 
-int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct hfi1_pkt_state *ps,
+int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
 			u64 pbc);
 
-struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5);
+int hfi1_wss_init(void);
+void hfi1_wss_exit(void);
+
+/* platform specific: return the lowest level cache (llc) size, in KiB */
+static inline int wss_llc_size(void)
+{
+	/* assume that the boot CPU value is universal for all CPUs */
+	return boot_cpu_data.x86_cache_size;
+}
+
+/* platform specific: cacheless copy */
+static inline void cacheless_memcpy(void *dst, void *src, size_t n)
+{
+	/*
+	 * Use the only available X64 cacheless copy.  Add a __user cast
+	 * to quiet sparse.  The src agument is already in the kernel so
+	 * there are no security issues.  The extra fault recovery machinery
+	 * is not invoked.
+	 */
+	__copy_user_nocache(dst, (void __user *)src, n, 0);
+}
 
 extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
 
 extern const u8 hdr_len_by_opcode[];
 
-extern const int ib_hfi1_state_ops[];
+extern const int ib_rvt_state_ops[];
 
 extern __be64 ib_hfi1_sys_image_guid;    /* in network order */
 
-extern unsigned int hfi1_lkey_table_size;
-
 extern unsigned int hfi1_max_cqes;
 
 extern unsigned int hfi1_max_cqs;
@@ -1145,8 +525,8 @@ extern unsigned int hfi1_max_srq_sges;
 
 extern unsigned int hfi1_max_srq_wrs;
 
-extern const u32 ib_hfi1_rnr_table[];
+extern unsigned short piothreshold;
 
-extern struct ib_dma_mapping_ops hfi1_dma_mapping_ops;
+extern const u32 ib_hfi1_rnr_table[];
 
 #endif                          /* HFI1_VERBS_H */
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.c b/drivers/staging/rdma/hfi1/verbs_txreq.c
new file mode 100644
index 000000000000..bc95c4112c61
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/verbs_txreq.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "verbs_txreq.h"
+#include "qp.h"
+#include "trace.h"
+
+#define TXREQ_LEN 24
+
+void hfi1_put_txreq(struct verbs_txreq *tx)
+{
+	struct hfi1_ibdev *dev;
+	struct rvt_qp *qp;
+	unsigned long flags;
+	unsigned int seq;
+	struct hfi1_qp_priv *priv;
+
+	qp = tx->qp;
+	dev = to_idev(qp->ibqp.device);
+
+	if (tx->mr)
+		rvt_put_mr(tx->mr);
+
+	sdma_txclean(dd_from_dev(dev), &tx->txreq);
+
+	/* Free verbs_txreq and return to slab cache */
+	kmem_cache_free(dev->verbs_txreq_cache, tx);
+
+	do {
+		seq = read_seqbegin(&dev->iowait_lock);
+		if (!list_empty(&dev->txwait)) {
+			struct iowait *wait;
+
+			write_seqlock_irqsave(&dev->iowait_lock, flags);
+			wait = list_first_entry(&dev->txwait, struct iowait,
+						list);
+			qp = iowait_to_qp(wait);
+			priv = qp->priv;
+			list_del_init(&priv->s_iowait.list);
+			/* refcount held until actual wake up */
+			write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+			hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
+			break;
+		}
+	} while (read_seqretry(&dev->iowait_lock, seq));
+}
+
+struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
+				struct rvt_qp *qp)
+{
+	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+	write_seqlock(&dev->iowait_lock);
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		struct hfi1_qp_priv *priv;
+
+		tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+		if (tx)
+			goto out;
+		priv = qp->priv;
+		if (list_empty(&priv->s_iowait.list)) {
+			dev->n_txwait++;
+			qp->s_flags |= RVT_S_WAIT_TX;
+			list_add_tail(&priv->s_iowait.list, &dev->txwait);
+			trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
+			atomic_inc(&qp->refcount);
+		}
+		qp->s_flags &= ~RVT_S_BUSY;
+	}
+out:
+	write_sequnlock(&dev->iowait_lock);
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	return tx;
+}
+
+static void verbs_txreq_kmem_cache_ctor(void *obj)
+{
+	struct verbs_txreq *tx = (struct verbs_txreq *)obj;
+
+	memset(tx, 0, sizeof(*tx));
+}
+
+int verbs_txreq_init(struct hfi1_ibdev *dev)
+{
+	char buf[TXREQ_LEN];
+	struct hfi1_devdata *dd = dd_from_dev(dev);
+
+	snprintf(buf, sizeof(buf), "hfi1_%u_vtxreq_cache", dd->unit);
+	dev->verbs_txreq_cache = kmem_cache_create(buf,
+						   sizeof(struct verbs_txreq),
+						   0, SLAB_HWCACHE_ALIGN,
+						   verbs_txreq_kmem_cache_ctor);
+	if (!dev->verbs_txreq_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void verbs_txreq_exit(struct hfi1_ibdev *dev)
+{
+	kmem_cache_destroy(dev->verbs_txreq_cache);
+	dev->verbs_txreq_cache = NULL;
+}
diff --git a/drivers/staging/rdma/hfi1/verbs_txreq.h b/drivers/staging/rdma/hfi1/verbs_txreq.h
new file mode 100644
index 000000000000..1cf69b2fe4a5
--- /dev/null
+++ b/drivers/staging/rdma/hfi1/verbs_txreq.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef HFI1_VERBS_TXREQ_H
+#define HFI1_VERBS_TXREQ_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#include "verbs.h"
+#include "sdma_txreq.h"
+#include "iowait.h"
+
+struct verbs_txreq {
+	struct hfi1_pio_header	phdr;
+	struct sdma_txreq       txreq;
+	struct rvt_qp           *qp;
+	struct rvt_swqe         *wqe;
+	struct rvt_mregion	*mr;
+	struct rvt_sge_state    *ss;
+	struct sdma_engine     *sde;
+	struct send_context     *psc;
+	u16                     hdr_dwords;
+};
+
+struct hfi1_ibdev;
+struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
+				struct rvt_qp *qp);
+
+static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
+					    struct rvt_qp *qp)
+{
+	struct verbs_txreq *tx;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+	if (unlikely(!tx)) {
+		/* call slow path to get the lock */
+		tx = __get_txreq(dev, qp);
+		if (IS_ERR(tx))
+			return tx;
+	}
+	tx->qp = qp;
+	tx->mr = NULL;
+	tx->sde = priv->s_sde;
+	tx->psc = priv->s_sendcontext;
+	/* so that we can test if the sdma decriptors are there */
+	tx->txreq.num_desc = 0;
+	return tx;
+}
+
+static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
+{
+	return &tx->txreq;
+}
+
+static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
+{
+	struct sdma_txreq *stx;
+	struct hfi1_qp_priv *priv = qp->priv;
+
+	stx = iowait_get_txhead(&priv->s_iowait);
+	if (stx)
+		return container_of(stx, struct verbs_txreq, txreq);
+	return NULL;
+}
+
+void hfi1_put_txreq(struct verbs_txreq *tx);
+int verbs_txreq_init(struct hfi1_ibdev *dev);
+void verbs_txreq_exit(struct hfi1_ibdev *dev);
+
+#endif                         /* HFI1_VERBS_TXREQ_H */
diff --git a/drivers/staging/rtl8712/TODO b/drivers/staging/rtl8712/TODO
index d8dfe5bfe702..847c8c41f4f7 100644
--- a/drivers/staging/rtl8712/TODO
+++ b/drivers/staging/rtl8712/TODO
@@ -4,10 +4,10 @@ TODO:
 - switch to use MAC80211
 - checkpatch.pl fixes - only a few remain
 
-Please send any patches to Greg Kroah-Hartman <greg@kroah.com>,
-Larry Finger <Larry.Finger@lwfinger.net> and
-Florian Schilhabel <florian.c.schilhabel@googlemail.com>.
-
-
-
+A replacement for this driver with MAC80211 support is available
+at https://github.com/chunkeey/rtl8192su
 
+Please send any patches to Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
+Larry Finger <Larry.Finger@lwfinger.net>,
+Florian Schilhabel <florian.c.schilhabel@googlemail.com> and
+Linux Driver Project Developer List <driverdev-devel@linuxdriverproject.org>.
diff --git a/drivers/staging/rtl8723au/TODO b/drivers/staging/rtl8723au/TODO
index f5d57d32fae6..42b86e478df8 100644
--- a/drivers/staging/rtl8723au/TODO
+++ b/drivers/staging/rtl8723au/TODO
@@ -9,5 +9,8 @@ TODO:
 - merge Realtek's bugfixes and new features into the driver
 - switch to use MAC80211
 
+A mac80211 driver for this hardware already was integrated at
+drivers/net/wireless/realtek/rtl8xxxu/
+
 Please send any patches to Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
 Jes Sorensen <Jes.Sorensen@redhat.com>, and Larry Finger <Larry.Finger@lwfinger.net>.
diff --git a/drivers/staging/ste_rmi4/Kconfig b/drivers/staging/ste_rmi4/Kconfig
deleted file mode 100644
index e8679509e525..000000000000
--- a/drivers/staging/ste_rmi4/Kconfig
+++ /dev/null
@@ -1,9 +0,0 @@
-config TOUCHSCREEN_SYNAPTICS_I2C_RMI4
-	tristate "Synaptics i2c rmi4 touchscreen"
-	depends on I2C && INPUT
-	help
-	  Say Y here if you have a Synaptics RMI4 and
-	  want to enable support for the built-in touchscreen.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called synaptics_rmi4_ts.
diff --git a/drivers/staging/ste_rmi4/Makefile b/drivers/staging/ste_rmi4/Makefile
deleted file mode 100644
index 6cce2ed187ef..000000000000
--- a/drivers/staging/ste_rmi4/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-#
-# Makefile for the RMI4 touchscreen driver.
-#
-obj-$(CONFIG_TOUCHSCREEN_SYNAPTICS_I2C_RMI4) += synaptics_i2c_rmi4.o
diff --git a/drivers/staging/ste_rmi4/TODO b/drivers/staging/ste_rmi4/TODO
deleted file mode 100644
index 9be2437da85f..000000000000
--- a/drivers/staging/ste_rmi4/TODO
+++ /dev/null
@@ -1,7 +0,0 @@
-TODO
-----
-
-Wait for the official upstream synaptics rmi4 clearpad drivers as promised over the past few months
-Merge any device support needed from this driver into it
-Delete this driver
-
diff --git a/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.c b/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.c
deleted file mode 100644
index 774958a8ce02..000000000000
--- a/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.c
+++ /dev/null
@@ -1,1137 +0,0 @@
-/**
- *
- * Synaptics Register Mapped Interface (RMI4) I2C Physical Layer Driver.
- * Copyright (c) 2007-2010, Synaptics Incorporated
- *
- * Author: Js HA <js.ha@stericsson.com> for ST-Ericsson
- * Author: Naveen Kumar G <naveen.gaddipati@stericsson.com> for ST-Ericsson
- * Copyright 2010 (c) ST-Ericsson AB
- */
-/*
- * This file is licensed under the GPL2 license.
- *
- *#############################################################################
- * GPL
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- *#############################################################################
- */
-
-#include <linux/input.h>
-#include <linux/slab.h>
-#include <linux/i2c.h>
-#include <linux/interrupt.h>
-#include <linux/regulator/consumer.h>
-#include <linux/module.h>
-#include <linux/input/mt.h>
-#include "synaptics_i2c_rmi4.h"
-
-/* TODO: for multiple device support will need a per-device mutex */
-#define DRIVER_NAME "synaptics_rmi4_i2c"
-
-#define MAX_ERROR_REPORT	6
-#define MAX_TOUCH_MAJOR		15
-#define MAX_RETRY_COUNT		5
-#define STD_QUERY_LEN		21
-#define PAGE_LEN		2
-#define DATA_BUF_LEN		32
-#define BUF_LEN			37
-#define QUERY_LEN		9
-#define DATA_LEN		12
-#define HAS_TAP			0x01
-#define HAS_PALMDETECT		0x01
-#define HAS_ROTATE		0x02
-#define HAS_TAPANDHOLD		0x02
-#define HAS_DOUBLETAP		0x04
-#define HAS_EARLYTAP		0x08
-#define HAS_RELEASE		0x08
-#define HAS_FLICK		0x10
-#define HAS_PRESS		0x20
-#define HAS_PINCH		0x40
-
-#define MASK_16BIT		0xFFFF
-#define MASK_8BIT		0xFF
-#define MASK_7BIT		0x7F
-#define MASK_5BIT		0x1F
-#define MASK_4BIT		0x0F
-#define MASK_3BIT		0x07
-#define MASK_2BIT		0x03
-#define TOUCHPAD_CTRL_INTR	0x8
-#define PDT_START_SCAN_LOCATION (0x00E9)
-#define PDT_END_SCAN_LOCATION	(0x000A)
-#define PDT_ENTRY_SIZE		(0x0006)
-#define SYNAPTICS_RMI4_TOUCHPAD_FUNC_NUM	(0x11)
-#define SYNAPTICS_RMI4_DEVICE_CONTROL_FUNC_NUM	(0x01)
-
-/**
- * struct synaptics_rmi4_fn_desc - contains the function descriptor information
- * @query_base_addr: base address for query
- * @cmd_base_addr: base address for command
- * @ctrl_base_addr: base address for control
- * @data_base_addr: base address for data
- * @intr_src_count: count for the interrupt source
- * @fn_number: function number
- *
- * This structure is used to gives the function descriptor information
- * of the particular functionality.
- */
-struct synaptics_rmi4_fn_desc {
-	unsigned char	query_base_addr;
-	unsigned char	cmd_base_addr;
-	unsigned char	ctrl_base_addr;
-	unsigned char	data_base_addr;
-	unsigned char	intr_src_count;
-	unsigned char	fn_number;
-};
-
-/**
- * struct synaptics_rmi4_fn - contains the function information
- * @fn_number: function number
- * @num_of_data_sources: number of data sources
- * @num_of_data_points: number of fingers touched
- * @size_of_data_register_block: data register block size
- * @index_to_intr_reg: index for interrupt register
- * @intr_mask: interrupt mask value
- * @fn_desc: variable for function descriptor structure
- * @link: linked list for function descriptors
- *
- * This structure gives information about the number of data sources and
- * the number of data registers associated with the function.
- */
-struct synaptics_rmi4_fn {
-	unsigned char		fn_number;
-	unsigned char		num_of_data_sources;
-	unsigned char		num_of_data_points;
-	unsigned char		size_of_data_register_block;
-	unsigned char		index_to_intr_reg;
-	unsigned char		intr_mask;
-	struct synaptics_rmi4_fn_desc	fn_desc;
-	struct list_head	link;
-};
-
-/**
- * struct synaptics_rmi4_device_info - contains the rmi4 device information
- * @version_major: protocol major version number
- * @version_minor: protocol minor version number
- * @manufacturer_id: manufacturer identification byte
- * @product_props: product properties information
- * @product_info: product info array
- * @date_code: device manufacture date
- * @tester_id: tester id array
- * @serial_number: serial number for that device
- * @product_id_string: product id for the device
- * @support_fn_list: linked list for device information
- *
- * This structure gives information about the number of data sources and
- * the number of data registers associated with the function.
- */
-struct synaptics_rmi4_device_info {
-	unsigned int		version_major;
-	unsigned int		version_minor;
-	unsigned char		manufacturer_id;
-	unsigned char		product_props;
-	unsigned char		product_info[2];
-	unsigned char		date_code[3];
-	unsigned short		tester_id;
-	unsigned short		serial_number;
-	unsigned char		product_id_string[11];
-	struct list_head	support_fn_list;
-};
-
-/**
- * struct synaptics_rmi4_data - contains the rmi4 device data
- * @rmi4_mod_info: structure variable for rmi4 device info
- * @input_dev: pointer for input device
- * @i2c_client: pointer for i2c client
- * @board: constant pointer for touch platform data
- * @fn_list_mutex: mutex for function list
- * @rmi4_page_mutex: mutex for rmi4 page
- * @current_page: variable for integer
- * @number_of_interrupt_register: interrupt registers count
- * @fn01_ctrl_base_addr: control base address for fn01
- * @fn01_query_base_addr: query base address for fn01
- * @fn01_data_base_addr: data base address for fn01
- * @sensor_max_x: sensor maximum x value
- * @sensor_max_y: sensor maximum y value
- * @regulator: pointer to the regulator structure
- * @wait: wait queue structure variable
- * @touch_stopped: flag to stop the thread function
- * @fingers_supported: maximum supported fingers
- *
- * This structure gives the device data information.
- */
-struct synaptics_rmi4_data {
-	struct synaptics_rmi4_device_info rmi4_mod_info;
-	struct input_dev	*input_dev;
-	struct i2c_client	*i2c_client;
-	const struct synaptics_rmi4_platform_data *board;
-	struct mutex		fn_list_mutex;
-	struct mutex		rmi4_page_mutex;
-	int			current_page;
-	unsigned int		number_of_interrupt_register;
-	unsigned short		fn01_ctrl_base_addr;
-	unsigned short		fn01_query_base_addr;
-	unsigned short		fn01_data_base_addr;
-	int			sensor_max_x;
-	int			sensor_max_y;
-	struct regulator	*regulator;
-	wait_queue_head_t	wait;
-	bool			touch_stopped;
-	unsigned char		fingers_supported;
-};
-
-/**
- * synaptics_rmi4_set_page() - sets the page
- * @pdata: pointer to synaptics_rmi4_data structure
- * @address: set the address of the page
- *
- * This function is used to set the page and returns integer.
- */
-static int synaptics_rmi4_set_page(struct synaptics_rmi4_data *pdata,
-					unsigned int address)
-{
-	unsigned char	txbuf[PAGE_LEN];
-	int		retval;
-	unsigned int	page;
-	struct i2c_client *i2c = pdata->i2c_client;
-
-	page	= ((address >> 8) & MASK_8BIT);
-	if (page != pdata->current_page) {
-		txbuf[0]	= MASK_8BIT;
-		txbuf[1]	= page;
-		retval	= i2c_master_send(i2c, txbuf, PAGE_LEN);
-		if (retval != PAGE_LEN)
-			dev_err(&i2c->dev, "failed:%d\n", retval);
-		else
-			pdata->current_page = page;
-	} else
-		retval = PAGE_LEN;
-	return retval;
-}
-/**
- * synaptics_rmi4_i2c_block_read() - read the block of data
- * @pdata: pointer to synaptics_rmi4_data structure
- * @address: read the block of data from this offset
- * @valp: pointer to a buffer containing the data to be read
- * @size: number of bytes to read
- *
- * This function is to read the block of data and returns integer.
- */
-static int synaptics_rmi4_i2c_block_read(struct synaptics_rmi4_data *pdata,
-						unsigned short address,
-						unsigned char *valp, int size)
-{
-	int retval = 0;
-	int retry_count = 0;
-	int index;
-	struct i2c_client *i2c = pdata->i2c_client;
-
-	mutex_lock(&(pdata->rmi4_page_mutex));
-	retval = synaptics_rmi4_set_page(pdata, address);
-	if (retval != PAGE_LEN)
-		goto exit;
-	index = address & MASK_8BIT;
-retry:
-	retval = i2c_smbus_read_i2c_block_data(i2c, index, size, valp);
-	if (retval != size) {
-		if (++retry_count == MAX_RETRY_COUNT)
-			dev_err(&i2c->dev,
-				"%s:address 0x%04x size %d failed:%d\n",
-					__func__, address, size, retval);
-		else {
-			synaptics_rmi4_set_page(pdata, address);
-			goto retry;
-		}
-	}
-exit:
-	mutex_unlock(&(pdata->rmi4_page_mutex));
-	return retval;
-}
-
-/**
- * synaptics_rmi4_i2c_byte_write() - write the single byte data
- * @pdata: pointer to synaptics_rmi4_data structure
- * @address: write the block of data from this offset
- * @data: data to be write
- *
- * This function is to write the single byte data and returns integer.
- */
-static int synaptics_rmi4_i2c_byte_write(struct synaptics_rmi4_data *pdata,
-						unsigned short address,
-						unsigned char data)
-{
-	unsigned char txbuf[2];
-	int retval = 0;
-	struct i2c_client *i2c = pdata->i2c_client;
-
-	/* Can't have anyone else changing the page behind our backs */
-	mutex_lock(&(pdata->rmi4_page_mutex));
-
-	retval = synaptics_rmi4_set_page(pdata, address);
-	if (retval != PAGE_LEN)
-		goto exit;
-	txbuf[0]	= address & MASK_8BIT;
-	txbuf[1]	= data;
-	retval		= i2c_master_send(pdata->i2c_client, txbuf, 2);
-	/* Add in retry on writes only in certain error return values */
-	if (retval != 2) {
-		dev_err(&i2c->dev, "failed:%d\n", retval);
-		retval = -EIO;
-	} else
-		retval = 1;
-exit:
-	mutex_unlock(&(pdata->rmi4_page_mutex));
-	return retval;
-}
-
-/**
- * synpatics_rmi4_touchpad_report() - reports for the rmi4 touchpad device
- * @pdata: pointer to synaptics_rmi4_data structure
- * @rfi: pointer to synaptics_rmi4_fn structure
- *
- * This function calls to reports for the rmi4 touchpad device
- */
-static int synpatics_rmi4_touchpad_report(struct synaptics_rmi4_data *pdata,
-						struct synaptics_rmi4_fn *rfi)
-{
-	/* number of touch points - fingers down in this case */
-	int	touch_count = 0;
-	int	finger;
-	int	finger_registers;
-	int	reg;
-	int	finger_shift;
-	int	finger_status;
-	int	retval;
-	int	x, y;
-	int	wx, wy;
-	unsigned short	data_base_addr;
-	unsigned short	data_offset;
-	unsigned char	data_reg_blk_size;
-	unsigned char	values[2];
-	unsigned char	data[DATA_LEN];
-	unsigned char	fingers_supported = pdata->fingers_supported;
-	struct	i2c_client *client = pdata->i2c_client;
-	struct	input_dev *input_dev = pdata->input_dev;
-
-	/* get 2D sensor finger data */
-	/*
-	 * First get the finger status field - the size of the finger status
-	 * field is determined by the number of finger supporte - 2 bits per
-	 * finger, so the number of registers to read is:
-	 * registerCount = ceil(numberOfFingers/4).
-	 * Read the required number of registers and check each 2 bit field to
-	 * determine if a finger is down:
-	 *	00 = finger not present,
-	 *	01 = finger present and data accurate,
-	 *	10 = finger present but data may not be accurate,
-	 *	11 = reserved for product use.
-	 */
-	finger_registers	= (fingers_supported + 3) / 4;
-	data_base_addr		= rfi->fn_desc.data_base_addr;
-	retval = synaptics_rmi4_i2c_block_read(pdata, data_base_addr, values,
-							finger_registers);
-	if (retval != finger_registers) {
-		dev_err(&client->dev, "%s:read status registers failed\n",
-								__func__);
-		return 0;
-	}
-	/*
-	 * For each finger present, read the proper number of registers
-	 * to get absolute data.
-	 */
-	data_reg_blk_size = rfi->size_of_data_register_block;
-	for (finger = 0; finger < fingers_supported; finger++) {
-		/* determine which data byte the finger status is in */
-		reg = finger / 4;
-		/* bit shift to get finger's status */
-		finger_shift	= (finger % 4) * 2;
-		finger_status	= (values[reg] >> finger_shift) & 3;
-		/*
-		 * if finger status indicates a finger is present then
-		 * read the finger data and report it
-		 */
-		input_mt_slot(input_dev, finger);
-		input_mt_report_slot_state(input_dev, MT_TOOL_FINGER,
-							finger_status != 0);
-
-		if (finger_status) {
-			/* Read the finger data */
-			data_offset = data_base_addr +
-					((finger * data_reg_blk_size) +
-					finger_registers);
-			retval = synaptics_rmi4_i2c_block_read(pdata,
-						data_offset, data,
-						data_reg_blk_size);
-			if (retval != data_reg_blk_size) {
-				dev_err(&client->dev, "%s:read data failed\n",
-								__func__);
-				return 0;
-			}
-			x = (data[0] << 4) | (data[2] & MASK_4BIT);
-			y = (data[1] << 4) | ((data[2] >> 4) & MASK_4BIT);
-			wy = (data[3] >> 4) & MASK_4BIT;
-			wx = (data[3] & MASK_4BIT);
-
-			if (pdata->board->x_flip)
-				x = pdata->sensor_max_x - x;
-			if (pdata->board->y_flip)
-				y = pdata->sensor_max_y - y;
-
-			input_report_abs(input_dev, ABS_MT_TOUCH_MAJOR,
-								max(wx, wy));
-			input_report_abs(input_dev, ABS_MT_POSITION_X, x);
-			input_report_abs(input_dev, ABS_MT_POSITION_Y, y);
-
-			/* number of active touch points */
-			touch_count++;
-		}
-	}
-
-	/* sync after groups of events */
-	input_mt_sync_frame(input_dev);
-	input_sync(input_dev);
-	/* return the number of touch points */
-	return touch_count;
-}
-
-/**
- * synaptics_rmi4_report_device() - reports the rmi4 device
- * @pdata: pointer to synaptics_rmi4_data structure
- * @rfi: pointer to synaptics_rmi4_fn
- *
- * This function is used to call the report function of the rmi4 device.
- */
-static int synaptics_rmi4_report_device(struct synaptics_rmi4_data *pdata,
-					struct synaptics_rmi4_fn *rfi)
-{
-	int touch = 0;
-	struct	i2c_client *client = pdata->i2c_client;
-	static int num_error_reports;
-
-	if (rfi->fn_number != SYNAPTICS_RMI4_TOUCHPAD_FUNC_NUM) {
-		num_error_reports++;
-		if (num_error_reports < MAX_ERROR_REPORT)
-			dev_err(&client->dev, "%s:report not supported\n",
-								__func__);
-	} else
-		touch = synpatics_rmi4_touchpad_report(pdata, rfi);
-	return touch;
-}
-/**
- * synaptics_rmi4_sensor_report() - reports to input subsystem
- * @pdata: pointer to synaptics_rmi4_data structure
- *
- * This function is used to reads in all data sources and reports
- * them to the input subsystem.
- */
-static int synaptics_rmi4_sensor_report(struct synaptics_rmi4_data *pdata)
-{
-	unsigned char	intr_status[4];
-	/* number of touch points - fingers or buttons */
-	int touch = 0;
-	unsigned int retval;
-	struct synaptics_rmi4_fn		*rfi;
-	struct synaptics_rmi4_device_info	*rmi;
-	struct	i2c_client *client = pdata->i2c_client;
-
-	/*
-	 * Get the interrupt status from the function $01
-	 * control register+1 to find which source(s) were interrupting
-	 * so we can read the data from the source(s) (2D sensor, buttons..)
-	 */
-	retval = synaptics_rmi4_i2c_block_read(pdata,
-					pdata->fn01_data_base_addr + 1,
-					intr_status,
-					pdata->number_of_interrupt_register);
-	if (retval != pdata->number_of_interrupt_register) {
-		dev_err(&client->dev,
-				"could not read interrupt status registers\n");
-		return 0;
-	}
-	/*
-	 * check each function that has data sources and if the interrupt for
-	 * that triggered then call that RMI4 functions report() function to
-	 * gather data and report it to the input subsystem
-	 */
-	rmi = &(pdata->rmi4_mod_info);
-	list_for_each_entry(rfi, &rmi->support_fn_list, link) {
-		if (rfi->num_of_data_sources) {
-			if (intr_status[rfi->index_to_intr_reg] &
-							rfi->intr_mask)
-				touch = synaptics_rmi4_report_device(pdata,
-									rfi);
-		}
-	}
-	/* return the number of touch points */
-	return touch;
-}
-
-/**
- * synaptics_rmi4_irq() - thread function for rmi4 attention line
- * @irq: irq value
- * @data: void pointer
- *
- * This function is interrupt thread function. It just notifies the
- * application layer that attention is required.
- */
-static irqreturn_t synaptics_rmi4_irq(int irq, void *data)
-{
-	struct synaptics_rmi4_data *pdata = data;
-	int touch_count;
-
-	do {
-		touch_count = synaptics_rmi4_sensor_report(pdata);
-		if (touch_count)
-			wait_event_timeout(pdata->wait, pdata->touch_stopped,
-							msecs_to_jiffies(1));
-		else
-			break;
-	} while (!pdata->touch_stopped);
-	return IRQ_HANDLED;
-}
-
-/**
- * synpatics_rmi4_touchpad_detect() - detects the rmi4 touchpad device
- * @pdata: pointer to synaptics_rmi4_data structure
- * @rfi: pointer to synaptics_rmi4_fn structure
- * @fd: pointer to synaptics_rmi4_fn_desc structure
- * @interruptcount: count the number of interrupts
- *
- * This function calls to detects the rmi4 touchpad device
- */
-static int synpatics_rmi4_touchpad_detect(struct synaptics_rmi4_data *pdata,
-					struct synaptics_rmi4_fn *rfi,
-					struct synaptics_rmi4_fn_desc *fd,
-					unsigned int interruptcount)
-{
-	unsigned char	queries[QUERY_LEN];
-	unsigned short	intr_offset;
-	unsigned char	abs_data_size;
-	unsigned char	abs_data_blk_size;
-	unsigned char	egr_0, egr_1;
-	unsigned int	all_data_blk_size;
-	int	has_pinch, has_flick, has_tap;
-	int	has_tapandhold, has_doubletap;
-	int	has_earlytap, has_press;
-	int	has_palmdetect, has_rotate;
-	int	has_rel;
-	int	i;
-	int	retval;
-	struct	i2c_client *client = pdata->i2c_client;
-
-	rfi->fn_desc.query_base_addr	= fd->query_base_addr;
-	rfi->fn_desc.data_base_addr	= fd->data_base_addr;
-	rfi->fn_desc.intr_src_count	= fd->intr_src_count;
-	rfi->fn_desc.fn_number		= fd->fn_number;
-	rfi->fn_number			= fd->fn_number;
-	rfi->num_of_data_sources	= fd->intr_src_count;
-	rfi->fn_desc.ctrl_base_addr	= fd->ctrl_base_addr;
-	rfi->fn_desc.cmd_base_addr	= fd->cmd_base_addr;
-
-	/*
-	 * need to get number of fingers supported, data size, etc.
-	 * to be used when getting data since the number of registers to
-	 * read depends on the number of fingers supported and data size.
-	 */
-	retval = synaptics_rmi4_i2c_block_read(pdata, fd->query_base_addr,
-							queries,
-							sizeof(queries));
-	if (retval != sizeof(queries)) {
-		dev_err(&client->dev, "%s:read function query registers\n",
-							__func__);
-		return retval;
-	}
-	/*
-	 * 2D data sources have only 3 bits for the number of fingers
-	 * supported - so the encoding is a bit weird.
-	 */
-	if ((queries[1] & MASK_3BIT) <= 4)
-		/* add 1 since zero based */
-		rfi->num_of_data_points = (queries[1] & MASK_3BIT) + 1;
-	else {
-		/*
-		 * a value of 5 is up to 10 fingers - 6 and 7 are reserved
-		 * (shouldn't get these i int retval;n a normal 2D source).
-		 */
-		if ((queries[1] & MASK_3BIT) == 5)
-			rfi->num_of_data_points = 10;
-	}
-	pdata->fingers_supported = rfi->num_of_data_points;
-	/* Need to get interrupt info for handling interrupts */
-	rfi->index_to_intr_reg = (interruptcount + 7) / 8;
-	if (rfi->index_to_intr_reg != 0)
-		rfi->index_to_intr_reg -= 1;
-	/*
-	 * loop through interrupts for each source in fn $11
-	 * and or in a bit to the interrupt mask for each.
-	 */
-	intr_offset = interruptcount % 8;
-	rfi->intr_mask = 0;
-	for (i = intr_offset;
-		i < ((fd->intr_src_count & MASK_3BIT) + intr_offset); i++)
-		rfi->intr_mask |= 1 << i;
-
-	/* Size of just the absolute data for one finger */
-	abs_data_size	= queries[5] & MASK_2BIT;
-	/* One each for X and Y, one for LSB for X & Y, one for W, one for Z */
-	abs_data_blk_size = 3 + (2 * (abs_data_size == 0 ? 1 : 0));
-	rfi->size_of_data_register_block = abs_data_blk_size;
-
-	/*
-	 * need to determine the size of data to read - this depends on
-	 * conditions such as whether Relative data is reported and if Gesture
-	 * data is reported.
-	 */
-	egr_0 = queries[7];
-	egr_1 = queries[8];
-
-	/*
-	 * Get info about what EGR data is supported, whether it has
-	 * Relative data supported, etc.
-	 */
-	has_pinch	= egr_0 & HAS_PINCH;
-	has_flick	= egr_0 & HAS_FLICK;
-	has_tap		= egr_0 & HAS_TAP;
-	has_earlytap	= egr_0 & HAS_EARLYTAP;
-	has_press	= egr_0 & HAS_PRESS;
-	has_rotate	= egr_1 & HAS_ROTATE;
-	has_rel		= queries[1] & HAS_RELEASE;
-	has_tapandhold	= egr_0 & HAS_TAPANDHOLD;
-	has_doubletap	= egr_0 & HAS_DOUBLETAP;
-	has_palmdetect	= egr_1 & HAS_PALMDETECT;
-
-	/*
-	 * Size of all data including finger status, absolute data for each
-	 * finger, relative data and EGR data
-	 */
-	all_data_blk_size =
-		/* finger status, four fingers per register */
-		((rfi->num_of_data_points + 3) / 4) +
-		/* absolute data, per finger times number of fingers */
-		(abs_data_blk_size * rfi->num_of_data_points) +
-		/*
-		 * two relative registers (if relative is being reported)
-		 */
-		2 * has_rel +
-		/*
-		 * F11_2D_data8 is only present if the egr_0
-		 * register is non-zero.
-		 */
-		!!(egr_0) +
-		/*
-		 * F11_2D_data9 is only present if either egr_0 or
-		 * egr_1 registers are non-zero.
-		 */
-		(egr_0 || egr_1) +
-		/*
-		 * F11_2D_data10 is only present if EGR_PINCH or EGR_FLICK of
-		 * egr_0 reports as 1.
-		 */
-		!!(has_pinch | has_flick) +
-		/*
-		 * F11_2D_data11 and F11_2D_data12 are only present if
-		 * EGR_FLICK of egr_0 reports as 1.
-		 */
-		2 * !!(has_flick);
-	return retval;
-}
-
-/**
- * synaptics_rmi4_touchpad_config() - configures the rmi4 touchpad device
- * @pdata: pointer to synaptics_rmi4_data structure
- * @rfi: pointer to synaptics_rmi4_fn structure
- *
- * This function calls to configures the rmi4 touchpad device
- */
-static int synaptics_rmi4_touchpad_config(struct synaptics_rmi4_data *pdata,
-						struct synaptics_rmi4_fn *rfi)
-{
-	/*
-	 * For the data source - print info and do any
-	 * source specific configuration.
-	 */
-	unsigned char data[BUF_LEN];
-	int retval = 0;
-	struct	i2c_client *client = pdata->i2c_client;
-
-	/* Get and print some info about the data source... */
-	/* To Query 2D devices we need to read from the address obtained
-	 * from the function descriptor stored in the RMI function info.
-	 */
-	retval = synaptics_rmi4_i2c_block_read(pdata,
-						rfi->fn_desc.query_base_addr,
-						data, QUERY_LEN);
-	if (retval != QUERY_LEN)
-		dev_err(&client->dev, "%s:read query registers failed\n",
-								__func__);
-	else {
-		retval = synaptics_rmi4_i2c_block_read(pdata,
-						rfi->fn_desc.ctrl_base_addr,
-						data, DATA_BUF_LEN);
-		if (retval != DATA_BUF_LEN) {
-			dev_err(&client->dev,
-				"%s:read control registers failed\n",
-								__func__);
-			return retval;
-		}
-		/* Store these for use later*/
-		pdata->sensor_max_x = ((data[6] & MASK_8BIT) << 0) |
-						((data[7] & MASK_4BIT) << 8);
-		pdata->sensor_max_y = ((data[8] & MASK_5BIT) << 0) |
-						((data[9] & MASK_4BIT) << 8);
-	}
-	return retval;
-}
-
-/**
- * synaptics_rmi4_i2c_query_device() - query the rmi4 device
- * @pdata: pointer to synaptics_rmi4_data structure
- *
- * This function is used to query the rmi4 device.
- */
-static int synaptics_rmi4_i2c_query_device(struct synaptics_rmi4_data *pdata)
-{
-	int i;
-	int retval;
-	unsigned char std_queries[STD_QUERY_LEN];
-	unsigned char intr_count = 0;
-	int data_sources = 0;
-	unsigned int ctrl_offset;
-	struct synaptics_rmi4_fn *rfi;
-	struct synaptics_rmi4_fn_desc	rmi_fd;
-	struct synaptics_rmi4_device_info *rmi;
-	struct	i2c_client *client = pdata->i2c_client;
-
-	/*
-	 * init the physical drivers RMI module
-	 * info list of functions
-	 */
-	INIT_LIST_HEAD(&pdata->rmi4_mod_info.support_fn_list);
-
-	/*
-	 * Read the Page Descriptor Table to determine what functions
-	 * are present
-	 */
-	for (i = PDT_START_SCAN_LOCATION; i > PDT_END_SCAN_LOCATION;
-						i -= PDT_ENTRY_SIZE) {
-		retval = synaptics_rmi4_i2c_block_read(pdata, i,
-						(unsigned char *)&rmi_fd,
-						sizeof(rmi_fd));
-		if (retval != sizeof(rmi_fd)) {
-			/* failed to read next PDT entry */
-			dev_err(&client->dev, "%s: read error\n", __func__);
-			return -EIO;
-		}
-		rfi = NULL;
-		if (rmi_fd.fn_number) {
-			switch (rmi_fd.fn_number & MASK_8BIT) {
-			case SYNAPTICS_RMI4_DEVICE_CONTROL_FUNC_NUM:
-				pdata->fn01_query_base_addr =
-						rmi_fd.query_base_addr;
-				pdata->fn01_ctrl_base_addr =
-						rmi_fd.ctrl_base_addr;
-				pdata->fn01_data_base_addr =
-						rmi_fd.data_base_addr;
-				break;
-			case SYNAPTICS_RMI4_TOUCHPAD_FUNC_NUM:
-				if (rmi_fd.intr_src_count) {
-					rfi = kmalloc(sizeof(*rfi),
-						      GFP_KERNEL);
-					if (!rfi)
-						return -ENOMEM;
-					retval = synpatics_rmi4_touchpad_detect
-								(pdata,	rfi,
-								&rmi_fd,
-								intr_count);
-					if (retval < 0) {
-						kfree(rfi);
-						return retval;
-					}
-				}
-				break;
-			}
-			/* interrupt count for next iteration */
-			intr_count += (rmi_fd.intr_src_count & MASK_3BIT);
-			/*
-			 * We only want to add functions to the list
-			 * that have data associated with them.
-			 */
-			if (rfi && rmi_fd.intr_src_count) {
-				/* link this function info to the RMI module */
-				mutex_lock(&(pdata->fn_list_mutex));
-				list_add_tail(&rfi->link,
-					&pdata->rmi4_mod_info.support_fn_list);
-				mutex_unlock(&(pdata->fn_list_mutex));
-			}
-		} else {
-			/*
-			 * A zero in the function number
-			 * signals the end of the PDT
-			 */
-			dev_dbg(&client->dev,
-				"%s:end of PDT\n", __func__);
-			break;
-		}
-	}
-	/*
-	 * calculate the interrupt register count - used in the
-	 * ISR to read the correct number of interrupt registers
-	 */
-	pdata->number_of_interrupt_register = (intr_count + 7) / 8;
-	/*
-	 * Function $01 will be used to query the product properties,
-	 * and product ID  so we had to read the PDT above first to get
-	 * the Fn $01 query address and prior to filling in the product
-	 * info. NOTE: Even an unflashed device will still have FN $01.
-	 */
-
-	/* Load up the standard queries and get the RMI4 module info */
-	retval = synaptics_rmi4_i2c_block_read(pdata,
-					pdata->fn01_query_base_addr,
-					std_queries,
-					sizeof(std_queries));
-	if (retval != sizeof(std_queries)) {
-		dev_err(&client->dev, "%s:Failed reading queries\n",
-							__func__);
-		 return -EIO;
-	}
-
-	/* Currently supported RMI version is 4.0 */
-	pdata->rmi4_mod_info.version_major	= 4;
-	pdata->rmi4_mod_info.version_minor	= 0;
-	/*
-	 * get manufacturer id, product_props, product info,
-	 * date code, tester id, serial num and product id (name)
-	 */
-	pdata->rmi4_mod_info.manufacturer_id	= std_queries[0];
-	pdata->rmi4_mod_info.product_props	= std_queries[1];
-	pdata->rmi4_mod_info.product_info[0]	= std_queries[2];
-	pdata->rmi4_mod_info.product_info[1]	= std_queries[3];
-	/* year - 2001-2032 */
-	pdata->rmi4_mod_info.date_code[0]	= std_queries[4] & MASK_5BIT;
-	/* month - 1-12 */
-	pdata->rmi4_mod_info.date_code[1]	= std_queries[5] & MASK_4BIT;
-	/* day - 1-31 */
-	pdata->rmi4_mod_info.date_code[2]	= std_queries[6] & MASK_5BIT;
-	pdata->rmi4_mod_info.tester_id = ((std_queries[7] & MASK_7BIT) << 8) |
-						(std_queries[8] & MASK_7BIT);
-	pdata->rmi4_mod_info.serial_number =
-		((std_queries[9] & MASK_7BIT) << 8) |
-				(std_queries[10] & MASK_7BIT);
-	memcpy(pdata->rmi4_mod_info.product_id_string, &std_queries[11], 10);
-
-	/* Check if this is a Synaptics device - report if not. */
-	if (pdata->rmi4_mod_info.manufacturer_id != 1)
-		dev_err(&client->dev, "non-Synaptics mfg id:%d\n",
-			pdata->rmi4_mod_info.manufacturer_id);
-
-	list_for_each_entry(rfi, &pdata->rmi4_mod_info.support_fn_list, link)
-		data_sources += rfi->num_of_data_sources;
-	if (data_sources) {
-		rmi = &(pdata->rmi4_mod_info);
-		list_for_each_entry(rfi, &rmi->support_fn_list, link) {
-			if (rfi->num_of_data_sources) {
-				if (rfi->fn_number ==
-					SYNAPTICS_RMI4_TOUCHPAD_FUNC_NUM) {
-					retval = synaptics_rmi4_touchpad_config
-								(pdata, rfi);
-					if (retval < 0)
-						return retval;
-				} else
-					dev_err(&client->dev,
-						"%s:fn_number not supported\n",
-								__func__);
-				/*
-				 * Turn on interrupts for this
-				 * function's data sources.
-				 */
-				ctrl_offset = pdata->fn01_ctrl_base_addr + 1 +
-							rfi->index_to_intr_reg;
-				retval = synaptics_rmi4_i2c_byte_write(pdata,
-							ctrl_offset,
-							rfi->intr_mask);
-				if (retval < 0)
-					return retval;
-			}
-		}
-	}
-	return 0;
-}
-
-/*
- * Descriptor structure.
- * Describes the number of i2c devices on the bus that speak RMI.
- */
-static const struct synaptics_rmi4_platform_data synaptics_rmi4_platformdata = {
-	.irq_type       = (IRQF_TRIGGER_FALLING | IRQF_SHARED),
-	.x_flip		= false,
-	.y_flip		= true,
-};
-
-/**
- * synaptics_rmi4_probe() - Initialze the i2c-client touchscreen driver
- * @i2c: i2c client structure pointer
- * @id:i2c device id pointer
- *
- * This function will allocate and initialize the instance
- * data and request the irq and set the instance data as the clients
- * platform data then register the physical driver which will do a scan of
- * the rmi4 Physical Device Table and enumerate any rmi4 functions that
- * have data sources associated with them.
- */
-static int synaptics_rmi4_probe
-	(struct i2c_client *client, const struct i2c_device_id *dev_id)
-{
-	int retval;
-	unsigned char intr_status[4];
-	struct synaptics_rmi4_data *rmi4_data;
-	const struct synaptics_rmi4_platform_data *platformdata =
-						client->dev.platform_data;
-
-	if (!i2c_check_functionality(client->adapter,
-					I2C_FUNC_SMBUS_BYTE_DATA)) {
-		dev_err(&client->dev, "i2c smbus byte data not supported\n");
-		return -EIO;
-	}
-
-	if (!platformdata)
-		platformdata = &synaptics_rmi4_platformdata;
-
-	/* Allocate and initialize the instance data for this client */
-	rmi4_data = kcalloc(2, sizeof(struct synaptics_rmi4_data),
-			    GFP_KERNEL);
-	if (!rmi4_data)
-		return -ENOMEM;
-
-	rmi4_data->input_dev = input_allocate_device();
-	if (!rmi4_data->input_dev) {
-		retval = -ENOMEM;
-		goto err_input;
-	}
-
-	rmi4_data->regulator = regulator_get(&client->dev, "vdd");
-	if (IS_ERR(rmi4_data->regulator)) {
-		dev_err(&client->dev, "%s:get regulator failed\n",
-							__func__);
-		retval = PTR_ERR(rmi4_data->regulator);
-		goto err_get_regulator;
-	}
-	retval = regulator_enable(rmi4_data->regulator);
-	if (retval < 0) {
-		dev_err(&client->dev, "%s:regulator enable failed\n",
-							__func__);
-		goto err_regulator_enable;
-	}
-	init_waitqueue_head(&rmi4_data->wait);
-	/*
-	 * Copy i2c_client pointer into RTID's i2c_client pointer for
-	 * later use in rmi4_read, rmi4_write, etc.
-	 */
-	rmi4_data->i2c_client		= client;
-	/* So we set the page correctly the first time */
-	rmi4_data->current_page		= MASK_16BIT;
-	rmi4_data->board		= platformdata;
-	rmi4_data->touch_stopped	= false;
-
-	/* init the mutexes for maintain the lists */
-	mutex_init(&(rmi4_data->fn_list_mutex));
-	mutex_init(&(rmi4_data->rmi4_page_mutex));
-
-	/*
-	 * Register physical driver - this will call the detect function that
-	 * will then scan the device and determine the supported
-	 * rmi4 functions.
-	 */
-	retval = synaptics_rmi4_i2c_query_device(rmi4_data);
-	if (retval) {
-		dev_err(&client->dev, "%s: rmi4 query device failed\n",
-							__func__);
-		goto err_query_dev;
-	}
-
-	/* Store the instance data in the i2c_client */
-	i2c_set_clientdata(client, rmi4_data);
-
-	/*initialize the input device parameters */
-	rmi4_data->input_dev->name	= DRIVER_NAME;
-	rmi4_data->input_dev->phys	= "Synaptics_Clearpad";
-	rmi4_data->input_dev->id.bustype = BUS_I2C;
-	rmi4_data->input_dev->dev.parent = &client->dev;
-	input_set_drvdata(rmi4_data->input_dev, rmi4_data);
-
-	/* Initialize the function handlers for rmi4 */
-	set_bit(EV_SYN, rmi4_data->input_dev->evbit);
-	set_bit(EV_KEY, rmi4_data->input_dev->evbit);
-	set_bit(EV_ABS, rmi4_data->input_dev->evbit);
-
-	input_set_abs_params(rmi4_data->input_dev, ABS_MT_POSITION_X, 0,
-					rmi4_data->sensor_max_x, 0, 0);
-	input_set_abs_params(rmi4_data->input_dev, ABS_MT_POSITION_Y, 0,
-					rmi4_data->sensor_max_y, 0, 0);
-	input_set_abs_params(rmi4_data->input_dev, ABS_MT_TOUCH_MAJOR, 0,
-						MAX_TOUCH_MAJOR, 0, 0);
-	input_mt_init_slots(rmi4_data->input_dev,
-				rmi4_data->fingers_supported, 0);
-
-	/* Clear interrupts */
-	synaptics_rmi4_i2c_block_read(rmi4_data,
-			rmi4_data->fn01_data_base_addr + 1, intr_status,
-				rmi4_data->number_of_interrupt_register);
-	retval = request_threaded_irq(client->irq, NULL,
-					synaptics_rmi4_irq,
-					platformdata->irq_type,
-					DRIVER_NAME, rmi4_data);
-	if (retval) {
-		dev_err(&client->dev, "Unable to get attn irq %d\n",
-			client->irq);
-		goto err_query_dev;
-	}
-
-	retval = input_register_device(rmi4_data->input_dev);
-	if (retval) {
-		dev_err(&client->dev, "%s:input register failed\n", __func__);
-		goto err_free_irq;
-	}
-
-	return retval;
-
-err_free_irq:
-	free_irq(client->irq, rmi4_data);
-err_query_dev:
-	regulator_disable(rmi4_data->regulator);
-err_regulator_enable:
-	regulator_put(rmi4_data->regulator);
-err_get_regulator:
-	input_free_device(rmi4_data->input_dev);
-	rmi4_data->input_dev = NULL;
-err_input:
-	kfree(rmi4_data);
-
-	return retval;
-}
-/**
- * synaptics_rmi4_remove() - Removes the i2c-client touchscreen driver
- * @client: i2c client structure pointer
- *
- * This function uses to remove the i2c-client
- * touchscreen driver and returns integer.
- */
-static int synaptics_rmi4_remove(struct i2c_client *client)
-{
-	struct synaptics_rmi4_data *rmi4_data = i2c_get_clientdata(client);
-
-	rmi4_data->touch_stopped = true;
-	wake_up(&rmi4_data->wait);
-	free_irq(client->irq, rmi4_data);
-	input_unregister_device(rmi4_data->input_dev);
-	regulator_disable(rmi4_data->regulator);
-	regulator_put(rmi4_data->regulator);
-	kfree(rmi4_data);
-
-	return 0;
-}
-
-/**
- * synaptics_rmi4_suspend() - suspend the touch screen controller
- * @dev: pointer to device structure
- *
- * This function is used to suspend the
- * touch panel controller and returns integer
- */
-static int __maybe_unused synaptics_rmi4_suspend(struct device *dev)
-{
-	/* Touch sleep mode */
-	int retval;
-	unsigned char intr_status;
-	struct synaptics_rmi4_data *rmi4_data = dev_get_drvdata(dev);
-
-	rmi4_data->touch_stopped = true;
-	disable_irq(rmi4_data->i2c_client->irq);
-
-	retval = synaptics_rmi4_i2c_block_read(rmi4_data,
-				rmi4_data->fn01_data_base_addr + 1,
-				&intr_status,
-				rmi4_data->number_of_interrupt_register);
-	if (retval < 0)
-		return retval;
-
-	retval = synaptics_rmi4_i2c_byte_write(rmi4_data,
-					rmi4_data->fn01_ctrl_base_addr + 1,
-					(intr_status & ~TOUCHPAD_CTRL_INTR));
-	if (retval < 0)
-		return retval;
-
-	regulator_disable(rmi4_data->regulator);
-
-	return 0;
-}
-/**
- * synaptics_rmi4_resume() - resume the touch screen controller
- * @dev: pointer to device structure
- *
- * This function is used to resume the touch panel
- * controller and returns integer.
- */
-static int __maybe_unused synaptics_rmi4_resume(struct device *dev)
-{
-	int retval;
-	unsigned char intr_status;
-	struct synaptics_rmi4_data *rmi4_data = dev_get_drvdata(dev);
-
-	retval = regulator_enable(rmi4_data->regulator);
-	if (retval) {
-		dev_err(dev, "Regulator enable failed (%d)\n", retval);
-		return retval;
-	}
-
-	enable_irq(rmi4_data->i2c_client->irq);
-	rmi4_data->touch_stopped = false;
-
-	retval = synaptics_rmi4_i2c_block_read(rmi4_data,
-				rmi4_data->fn01_data_base_addr + 1,
-				&intr_status,
-				rmi4_data->number_of_interrupt_register);
-	if (retval < 0)
-		return retval;
-
-	retval = synaptics_rmi4_i2c_byte_write(rmi4_data,
-					rmi4_data->fn01_ctrl_base_addr + 1,
-					(intr_status | TOUCHPAD_CTRL_INTR));
-	if (retval < 0)
-		return retval;
-
-	return 0;
-}
-
-static SIMPLE_DEV_PM_OPS(synaptics_rmi4_dev_pm_ops, synaptics_rmi4_suspend,
-			 synaptics_rmi4_resume);
-
-static const struct i2c_device_id synaptics_rmi4_id_table[] = {
-	{ DRIVER_NAME, 0 },
-	{ },
-};
-MODULE_DEVICE_TABLE(i2c, synaptics_rmi4_id_table);
-
-static struct i2c_driver synaptics_rmi4_driver = {
-	.driver = {
-		.name	=	DRIVER_NAME,
-		.pm	=	&synaptics_rmi4_dev_pm_ops,
-	},
-	.probe		=	synaptics_rmi4_probe,
-	.remove		=	synaptics_rmi4_remove,
-	.id_table	=	synaptics_rmi4_id_table,
-};
-
-module_i2c_driver(synaptics_rmi4_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("naveen.gaddipati@stericsson.com, js.ha@stericsson.com");
-MODULE_DESCRIPTION("synaptics rmi4 i2c touch Driver");
diff --git a/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.h b/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.h
deleted file mode 100644
index 8c9166ba71c7..000000000000
--- a/drivers/staging/ste_rmi4/synaptics_i2c_rmi4.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- *
- * Synaptics Register Mapped Interface (RMI4) I2C Physical Layer Driver.
- * Copyright (c) 2007-2010, Synaptics Incorporated
- *
- * Author: Js HA <js.ha@stericsson.com> for ST-Ericsson
- * Author: Naveen Kumar G <naveen.gaddipati@stericsson.com> for ST-Ericsson
- * Copyright 2010 (c) ST-Ericsson AB
- */
-/*
- * This file is licensed under the GPL2 license.
- *
- *#############################################################################
- * GPL
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- *#############################################################################
- */
-
-#ifndef _SYNAPTICS_RMI4_H_INCLUDED_
-#define _SYNAPTICS_RMI4_H_INCLUDED_
-
-/**
- * struct synaptics_rmi4_platform_data - contains the rmi4 platform data
- * @irq_number: irq number
- * @irq_type: irq type
- * @x flip: x flip flag
- * @y flip: y flip flag
- *
- * This structure gives platform data for rmi4.
- */
-struct synaptics_rmi4_platform_data {
-	int irq_type;
-	bool x_flip;
-	bool y_flip;
-};
-
-#endif
diff --git a/drivers/staging/wilc1000/coreconfigurator.c b/drivers/staging/wilc1000/coreconfigurator.c
index 2c4ae1fc8435..4b51c0ac27ac 100644
--- a/drivers/staging/wilc1000/coreconfigurator.c
+++ b/drivers/staging/wilc1000/coreconfigurator.c
@@ -338,8 +338,10 @@ s32 wilc_parse_network_info(u8 *msg_buffer,
 
 		if (ies_len > 0) {
 			network_info->ies = kmemdup(ies, ies_len, GFP_KERNEL);
-			if (!network_info->ies)
+			if (!network_info->ies) {
+				kfree(network_info);
 				return -ENOMEM;
+			}
 		}
 		network_info->ies_len = ies_len;
 	}
@@ -373,8 +375,10 @@ s32 wilc_parse_assoc_resp_info(u8 *buffer, u32 buffer_len,
 					    AID_LEN);
 
 		connect_resp_info->ies = kmemdup(ies, ies_len, GFP_KERNEL);
-		if (!connect_resp_info->ies)
+		if (!connect_resp_info->ies) {
+			kfree(connect_resp_info);
 			return -ENOMEM;
+		}
 
 		connect_resp_info->ies_len = ies_len;
 	}
diff --git a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
index b76622d1adc3..448a5c8c4514 100644
--- a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
+++ b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
@@ -2170,6 +2170,13 @@ static int get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev,
 	int ret;
 	struct wilc_priv *priv = wiphy_priv(wiphy);
 	struct wilc_vif *vif = netdev_priv(priv->dev);
+	struct wilc *wl;
+
+	wl = vif->wilc;
+
+	/* If firmware is not started, return. */
+	if (!wl->initialized)
+		return -EIO;
 
 	ret = wilc_get_tx_power(vif, (u8 *)dbm);
 	if (ret)
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index d41a5c300e31..0ad5ac541a7f 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -802,58 +802,48 @@ static struct configfs_attribute *tcm_loop_tpg_attrib_attrs[] = {
 
 /* Start items for tcm_loop_nexus_cit */
 
+static int tcm_loop_alloc_sess_cb(struct se_portal_group *se_tpg,
+				  struct se_session *se_sess, void *p)
+{
+	struct tcm_loop_tpg *tl_tpg = container_of(se_tpg,
+					struct tcm_loop_tpg, tl_se_tpg);
+
+	tl_tpg->tl_nexus = p;
+	return 0;
+}
+
 static int tcm_loop_make_nexus(
 	struct tcm_loop_tpg *tl_tpg,
 	const char *name)
 {
-	struct se_portal_group *se_tpg;
 	struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
 	struct tcm_loop_nexus *tl_nexus;
-	int ret = -ENOMEM;
+	int ret;
 
 	if (tl_tpg->tl_nexus) {
 		pr_debug("tl_tpg->tl_nexus already exists\n");
 		return -EEXIST;
 	}
-	se_tpg = &tl_tpg->tl_se_tpg;
 
 	tl_nexus = kzalloc(sizeof(struct tcm_loop_nexus), GFP_KERNEL);
 	if (!tl_nexus) {
 		pr_err("Unable to allocate struct tcm_loop_nexus\n");
 		return -ENOMEM;
 	}
-	/*
-	 * Initialize the struct se_session pointer
-	 */
-	tl_nexus->se_sess = transport_init_session(
-				TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS);
+
+	tl_nexus->se_sess = target_alloc_session(&tl_tpg->tl_se_tpg, 0, 0,
+					TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS,
+					name, tl_nexus, tcm_loop_alloc_sess_cb);
 	if (IS_ERR(tl_nexus->se_sess)) {
 		ret = PTR_ERR(tl_nexus->se_sess);
-		goto out;
-	}
-	/*
-	 * Since we are running in 'demo mode' this call with generate a
-	 * struct se_node_acl for the tcm_loop struct se_portal_group with the SCSI
-	 * Initiator port name of the passed configfs group 'name'.
-	 */
-	tl_nexus->se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
-				se_tpg, (unsigned char *)name);
-	if (!tl_nexus->se_sess->se_node_acl) {
-		transport_free_session(tl_nexus->se_sess);
-		goto out;
+		kfree(tl_nexus);
+		return ret;
 	}
-	/* Now, register the I_T Nexus as active. */
-	transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl,
-			tl_nexus->se_sess, tl_nexus);
-	tl_tpg->tl_nexus = tl_nexus;
+
 	pr_debug("TCM_Loop_ConfigFS: Established I_T Nexus to emulated"
 		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba),
 		name);
 	return 0;
-
-out:
-	kfree(tl_nexus);
-	return ret;
 }
 
 static int tcm_loop_drop_nexus(
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index 3072f1aca8ec..c57e7884973d 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -196,45 +196,30 @@ static struct sbp_session *sbp_session_create(
 	struct sbp_session *sess;
 	int ret;
 	char guid_str[17];
-	struct se_node_acl *se_nacl;
+
+	snprintf(guid_str, sizeof(guid_str), "%016llx", guid);
 
 	sess = kmalloc(sizeof(*sess), GFP_KERNEL);
 	if (!sess) {
 		pr_err("failed to allocate session descriptor\n");
 		return ERR_PTR(-ENOMEM);
 	}
+	spin_lock_init(&sess->lock);
+	INIT_LIST_HEAD(&sess->login_list);
+	INIT_DELAYED_WORK(&sess->maint_work, session_maintenance_work);
+	sess->guid = guid;
 
-	sess->se_sess = transport_init_session(TARGET_PROT_NORMAL);
+	sess->se_sess = target_alloc_session(&tpg->se_tpg, 128,
+					     sizeof(struct sbp_target_request),
+					     TARGET_PROT_NORMAL, guid_str,
+					     sess, NULL);
 	if (IS_ERR(sess->se_sess)) {
 		pr_err("failed to init se_session\n");
-
 		ret = PTR_ERR(sess->se_sess);
 		kfree(sess);
 		return ERR_PTR(ret);
 	}
 
-	snprintf(guid_str, sizeof(guid_str), "%016llx", guid);
-
-	se_nacl = core_tpg_check_initiator_node_acl(&tpg->se_tpg, guid_str);
-	if (!se_nacl) {
-		pr_warn("Node ACL not found for %s\n", guid_str);
-
-		transport_free_session(sess->se_sess);
-		kfree(sess);
-
-		return ERR_PTR(-EPERM);
-	}
-
-	sess->se_sess->se_node_acl = se_nacl;
-
-	spin_lock_init(&sess->lock);
-	INIT_LIST_HEAD(&sess->login_list);
-	INIT_DELAYED_WORK(&sess->maint_work, session_maintenance_work);
-
-	sess->guid = guid;
-
-	transport_register_session(&tpg->se_tpg, se_nacl, sess->se_sess, sess);
-
 	return sess;
 }
 
@@ -908,7 +893,6 @@ static void tgt_agent_process_work(struct work_struct *work)
 					STATUS_BLOCK_SBP_STATUS(
 						SBP_STATUS_REQ_TYPE_NOTSUPP));
 			sbp_send_status(req);
-			sbp_free_request(req);
 			return;
 		case 3: /* Dummy ORB */
 			req->status.status |= cpu_to_be32(
@@ -919,7 +903,6 @@ static void tgt_agent_process_work(struct work_struct *work)
 					STATUS_BLOCK_SBP_STATUS(
 						SBP_STATUS_DUMMY_ORB_COMPLETE));
 			sbp_send_status(req);
-			sbp_free_request(req);
 			return;
 		default:
 			BUG();
@@ -938,6 +921,25 @@ static inline bool tgt_agent_check_active(struct sbp_target_agent *agent)
 	return active;
 }
 
+static struct sbp_target_request *sbp_mgt_get_req(struct sbp_session *sess,
+	struct fw_card *card, u64 next_orb)
+{
+	struct se_session *se_sess = sess->se_sess;
+	struct sbp_target_request *req;
+	int tag;
+
+	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC);
+	if (tag < 0)
+		return ERR_PTR(-ENOMEM);
+
+	req = &((struct sbp_target_request *)se_sess->sess_cmd_map)[tag];
+	memset(req, 0, sizeof(*req));
+	req->se_cmd.map_tag = tag;
+	req->se_cmd.tag = next_orb;
+
+	return req;
+}
+
 static void tgt_agent_fetch_work(struct work_struct *work)
 {
 	struct sbp_target_agent *agent =
@@ -949,8 +951,8 @@ static void tgt_agent_fetch_work(struct work_struct *work)
 	u64 next_orb = agent->orb_pointer;
 
 	while (next_orb && tgt_agent_check_active(agent)) {
-		req = kzalloc(sizeof(*req), GFP_KERNEL);
-		if (!req) {
+		req = sbp_mgt_get_req(sess, sess->card, next_orb);
+		if (IS_ERR(req)) {
 			spin_lock_bh(&agent->lock);
 			agent->state = AGENT_STATE_DEAD;
 			spin_unlock_bh(&agent->lock);
@@ -985,7 +987,6 @@ static void tgt_agent_fetch_work(struct work_struct *work)
 			spin_unlock_bh(&agent->lock);
 
 			sbp_send_status(req);
-			sbp_free_request(req);
 			return;
 		}
 
@@ -1232,7 +1233,7 @@ static void sbp_handle_command(struct sbp_target_request *req)
 	req->se_cmd.tag = req->orb_pointer;
 	if (target_submit_cmd(&req->se_cmd, sess->se_sess, req->cmd_buf,
 			      req->sense_buf, unpacked_lun, data_length,
-			      TCM_SIMPLE_TAG, data_dir, 0))
+			      TCM_SIMPLE_TAG, data_dir, TARGET_SCF_ACK_KREF))
 		goto err;
 
 	return;
@@ -1244,7 +1245,6 @@ err:
 		STATUS_BLOCK_LEN(1) |
 		STATUS_BLOCK_SBP_STATUS(SBP_STATUS_UNSPECIFIED_ERROR));
 	sbp_send_status(req);
-	sbp_free_request(req);
 }
 
 /*
@@ -1343,22 +1343,29 @@ static int sbp_rw_data(struct sbp_target_request *req)
 
 static int sbp_send_status(struct sbp_target_request *req)
 {
-	int ret, length;
+	int rc, ret = 0, length;
 	struct sbp_login_descriptor *login = req->login;
 
 	length = (((be32_to_cpu(req->status.status) >> 24) & 0x07) + 1) * 4;
 
-	ret = sbp_run_request_transaction(req, TCODE_WRITE_BLOCK_REQUEST,
+	rc = sbp_run_request_transaction(req, TCODE_WRITE_BLOCK_REQUEST,
 			login->status_fifo_addr, &req->status, length);
-	if (ret != RCODE_COMPLETE) {
-		pr_debug("sbp_send_status: write failed: 0x%x\n", ret);
-		return -EIO;
+	if (rc != RCODE_COMPLETE) {
+		pr_debug("sbp_send_status: write failed: 0x%x\n", rc);
+		ret = -EIO;
+		goto put_ref;
 	}
 
 	pr_debug("sbp_send_status: status write complete for ORB: 0x%llx\n",
 			req->orb_pointer);
-
-	return 0;
+	/*
+	 * Drop the extra ACK_KREF reference taken by target_submit_cmd()
+	 * ahead of sbp_check_stop_free() -> transport_generic_free_cmd()
+	 * final se_cmd->cmd_kref put.
+	 */
+put_ref:
+	target_put_sess_cmd(&req->se_cmd);
+	return ret;
 }
 
 static void sbp_sense_mangle(struct sbp_target_request *req)
@@ -1447,9 +1454,13 @@ static int sbp_send_sense(struct sbp_target_request *req)
 
 static void sbp_free_request(struct sbp_target_request *req)
 {
+	struct se_cmd *se_cmd = &req->se_cmd;
+	struct se_session *se_sess = se_cmd->se_sess;
+
 	kfree(req->pg_tbl);
 	kfree(req->cmd_buf);
-	kfree(req);
+
+	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
 }
 
 static void sbp_mgt_agent_process(struct work_struct *work)
@@ -1609,7 +1620,6 @@ static void sbp_mgt_agent_rw(struct fw_card *card,
 			rcode = RCODE_CONFLICT_ERROR;
 			goto out;
 		}
-
 		req = kzalloc(sizeof(*req), GFP_ATOMIC);
 		if (!req) {
 			rcode = RCODE_CONFLICT_ERROR;
@@ -1815,8 +1825,7 @@ static int sbp_check_stop_free(struct se_cmd *se_cmd)
 	struct sbp_target_request *req = container_of(se_cmd,
 			struct sbp_target_request, se_cmd);
 
-	transport_generic_free_cmd(&req->se_cmd, 0);
-	return 1;
+	return transport_generic_free_cmd(&req->se_cmd, 0);
 }
 
 static int sbp_count_se_tpg_luns(struct se_portal_group *tpg)
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index da457e25717a..a4046ca6e60d 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -86,7 +86,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun)
 		se_cmd->lun_ref_active = true;
 
 		if ((se_cmd->data_direction == DMA_TO_DEVICE) &&
-		    (deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY)) {
+		    deve->lun_access_ro) {
 			pr_err("TARGET_CORE[%s]: Detected WRITE_PROTECTED LUN"
 				" Access for 0x%08llx\n",
 				se_cmd->se_tfo->get_fabric_name(),
@@ -199,7 +199,7 @@ bool target_lun_is_rdonly(struct se_cmd *cmd)
 
 	rcu_read_lock();
 	deve = target_nacl_find_deve(se_sess->se_node_acl, cmd->orig_fe_lun);
-	ret = (deve && deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY);
+	ret = deve && deve->lun_access_ro;
 	rcu_read_unlock();
 
 	return ret;
@@ -258,22 +258,15 @@ void core_free_device_list_for_node(
 
 void core_update_device_list_access(
 	u64 mapped_lun,
-	u32 lun_access,
+	bool lun_access_ro,
 	struct se_node_acl *nacl)
 {
 	struct se_dev_entry *deve;
 
 	mutex_lock(&nacl->lun_entry_mutex);
 	deve = target_nacl_find_deve(nacl, mapped_lun);
-	if (deve) {
-		if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
-			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
-			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
-		} else {
-			deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_WRITE;
-			deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
-		}
-	}
+	if (deve)
+		deve->lun_access_ro = lun_access_ro;
 	mutex_unlock(&nacl->lun_entry_mutex);
 }
 
@@ -319,7 +312,7 @@ int core_enable_device_list_for_node(
 	struct se_lun *lun,
 	struct se_lun_acl *lun_acl,
 	u64 mapped_lun,
-	u32 lun_access,
+	bool lun_access_ro,
 	struct se_node_acl *nacl,
 	struct se_portal_group *tpg)
 {
@@ -340,11 +333,7 @@ int core_enable_device_list_for_node(
 	kref_init(&new->pr_kref);
 	init_completion(&new->pr_comp);
 
-	if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE)
-		new->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
-	else
-		new->lun_flags |= TRANSPORT_LUNFLAGS_READ_ONLY;
-
+	new->lun_access_ro = lun_access_ro;
 	new->creation_time = get_jiffies_64();
 	new->attach_count++;
 
@@ -433,7 +422,7 @@ void core_disable_device_list_for_node(
 
 	hlist_del_rcu(&orig->link);
 	clear_bit(DEF_PR_REG_ACTIVE, &orig->deve_flags);
-	orig->lun_flags = 0;
+	orig->lun_access_ro = false;
 	orig->creation_time = 0;
 	orig->attach_count--;
 	/*
@@ -558,8 +547,7 @@ int core_dev_add_lun(
 {
 	int rc;
 
-	rc = core_tpg_add_lun(tpg, lun,
-				TRANSPORT_LUNFLAGS_READ_WRITE, dev);
+	rc = core_tpg_add_lun(tpg, lun, false, dev);
 	if (rc < 0)
 		return rc;
 
@@ -635,7 +623,7 @@ int core_dev_add_initiator_node_lun_acl(
 	struct se_portal_group *tpg,
 	struct se_lun_acl *lacl,
 	struct se_lun *lun,
-	u32 lun_access)
+	bool lun_access_ro)
 {
 	struct se_node_acl *nacl = lacl->se_lun_nacl;
 	/*
@@ -647,20 +635,19 @@ int core_dev_add_initiator_node_lun_acl(
 	if (!nacl)
 		return -EINVAL;
 
-	if ((lun->lun_access & TRANSPORT_LUNFLAGS_READ_ONLY) &&
-	    (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE))
-		lun_access = TRANSPORT_LUNFLAGS_READ_ONLY;
+	if (lun->lun_access_ro)
+		lun_access_ro = true;
 
 	lacl->se_lun = lun;
 
 	if (core_enable_device_list_for_node(lun, lacl, lacl->mapped_lun,
-			lun_access, nacl, tpg) < 0)
+			lun_access_ro, nacl, tpg) < 0)
 		return -EINVAL;
 
 	pr_debug("%s_TPG[%hu]_LUN[%llu->%llu] - Added %s ACL for "
 		" InitiatorNode: %s\n", tpg->se_tpg_tfo->get_fabric_name(),
 		tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun, lacl->mapped_lun,
-		(lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) ? "RW" : "RO",
+		lun_access_ro ? "RO" : "RW",
 		nacl->initiatorname);
 	/*
 	 * Check to see if there are any existing persistent reservation APTPL
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 8caef31da415..1bd5c72b663e 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -78,7 +78,7 @@ static int target_fabric_mappedlun_link(
 			struct se_lun_acl, se_lun_group);
 	struct se_portal_group *se_tpg;
 	struct config_item *nacl_ci, *tpg_ci, *tpg_ci_s, *wwn_ci, *wwn_ci_s;
-	int lun_access;
+	bool lun_access_ro;
 
 	if (lun->lun_link_magic != SE_LUN_LINK_MAGIC) {
 		pr_err("Bad lun->lun_link_magic, not a valid lun_ci pointer:"
@@ -115,19 +115,18 @@ static int target_fabric_mappedlun_link(
 	}
 	/*
 	 * If this struct se_node_acl was dynamically generated with
-	 * tpg_1/attrib/generate_node_acls=1, use the existing deve->lun_flags,
-	 * which be will write protected (READ-ONLY) when
+	 * tpg_1/attrib/generate_node_acls=1, use the existing
+	 * deve->lun_access_ro value, which will be true when
 	 * tpg_1/attrib/demo_mode_write_protect=1
 	 */
 	rcu_read_lock();
 	deve = target_nacl_find_deve(lacl->se_lun_nacl, lacl->mapped_lun);
 	if (deve)
-		lun_access = deve->lun_flags;
+		lun_access_ro = deve->lun_access_ro;
 	else
-		lun_access =
+		lun_access_ro =
 			(se_tpg->se_tpg_tfo->tpg_check_prod_mode_write_protect(
-				se_tpg)) ? TRANSPORT_LUNFLAGS_READ_ONLY :
-					   TRANSPORT_LUNFLAGS_READ_WRITE;
+				se_tpg)) ? true : false;
 	rcu_read_unlock();
 	/*
 	 * Determine the actual mapped LUN value user wants..
@@ -135,7 +134,7 @@ static int target_fabric_mappedlun_link(
 	 * This value is what the SCSI Initiator actually sees the
 	 * $FABRIC/$WWPN/$TPGT/lun/lun_* as on their SCSI Initiator Ports.
 	 */
-	return core_dev_add_initiator_node_lun_acl(se_tpg, lacl, lun, lun_access);
+	return core_dev_add_initiator_node_lun_acl(se_tpg, lacl, lun, lun_access_ro);
 }
 
 static int target_fabric_mappedlun_unlink(
@@ -167,8 +166,7 @@ static ssize_t target_fabric_mappedlun_write_protect_show(
 	rcu_read_lock();
 	deve = target_nacl_find_deve(se_nacl, lacl->mapped_lun);
 	if (deve) {
-		len = sprintf(page, "%d\n",
-			(deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY) ? 1 : 0);
+		len = sprintf(page, "%d\n", deve->lun_access_ro);
 	}
 	rcu_read_unlock();
 
@@ -181,25 +179,23 @@ static ssize_t target_fabric_mappedlun_write_protect_store(
 	struct se_lun_acl *lacl = item_to_lun_acl(item);
 	struct se_node_acl *se_nacl = lacl->se_lun_nacl;
 	struct se_portal_group *se_tpg = se_nacl->se_tpg;
-	unsigned long op;
+	unsigned long wp;
 	int ret;
 
-	ret = kstrtoul(page, 0, &op);
+	ret = kstrtoul(page, 0, &wp);
 	if (ret)
 		return ret;
 
-	if ((op != 1) && (op != 0))
+	if ((wp != 1) && (wp != 0))
 		return -EINVAL;
 
-	core_update_device_list_access(lacl->mapped_lun, (op) ?
-			TRANSPORT_LUNFLAGS_READ_ONLY :
-			TRANSPORT_LUNFLAGS_READ_WRITE,
-			lacl->se_lun_nacl);
+	/* wp=1 means lun_access_ro=true */
+	core_update_device_list_access(lacl->mapped_lun, wp, lacl->se_lun_nacl);
 
 	pr_debug("%s_ConfigFS: Changed Initiator ACL: %s"
 		" Mapped LUN: %llu Write Protect bit to %s\n",
 		se_tpg->se_tpg_tfo->get_fabric_name(),
-		se_nacl->initiatorname, lacl->mapped_lun, (op) ? "ON" : "OFF");
+		se_nacl->initiatorname, lacl->mapped_lun, (wp) ? "ON" : "OFF");
 
 	return count;
 
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index abe4eb997a84..026a758e5778 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -413,8 +413,39 @@ iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
 }
 
 static sense_reason_t
+iblock_execute_write_same_direct(struct block_device *bdev, struct se_cmd *cmd)
+{
+	struct se_device *dev = cmd->se_dev;
+	struct scatterlist *sg = &cmd->t_data_sg[0];
+	struct page *page = NULL;
+	int ret;
+
+	if (sg->offset) {
+		page = alloc_page(GFP_KERNEL);
+		if (!page)
+			return TCM_OUT_OF_RESOURCES;
+		sg_copy_to_buffer(sg, cmd->t_data_nents, page_address(page),
+				  dev->dev_attrib.block_size);
+	}
+
+	ret = blkdev_issue_write_same(bdev,
+				target_to_linux_sector(dev, cmd->t_task_lba),
+				target_to_linux_sector(dev,
+					sbc_get_write_same_sectors(cmd)),
+				GFP_KERNEL, page ? page : sg_page(sg));
+	if (page)
+		__free_page(page);
+	if (ret)
+		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+
+	target_complete_cmd(cmd, GOOD);
+	return 0;
+}
+
+static sense_reason_t
 iblock_execute_write_same(struct se_cmd *cmd)
 {
+	struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd;
 	struct iblock_req *ibr;
 	struct scatterlist *sg;
 	struct bio *bio;
@@ -439,6 +470,9 @@ iblock_execute_write_same(struct se_cmd *cmd)
 		return TCM_INVALID_CDB_FIELD;
 	}
 
+	if (bdev_write_same(bdev))
+		return iblock_execute_write_same_direct(bdev, cmd);
+
 	ibr = kzalloc(sizeof(struct iblock_req), GFP_KERNEL);
 	if (!ibr)
 		goto fail;
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 4a7cf499cdfa..86b4a8375628 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -59,10 +59,10 @@ struct se_dev_entry *core_get_se_deve_from_rtpi(struct se_node_acl *, u16);
 void	target_pr_kref_release(struct kref *);
 void	core_free_device_list_for_node(struct se_node_acl *,
 		struct se_portal_group *);
-void	core_update_device_list_access(u64, u32, struct se_node_acl *);
+void	core_update_device_list_access(u64, bool, struct se_node_acl *);
 struct se_dev_entry *target_nacl_find_deve(struct se_node_acl *, u64);
 int	core_enable_device_list_for_node(struct se_lun *, struct se_lun_acl *,
-		u64, u32, struct se_node_acl *, struct se_portal_group *);
+		u64, bool, struct se_node_acl *, struct se_portal_group *);
 void	core_disable_device_list_for_node(struct se_lun *, struct se_dev_entry *,
 		struct se_node_acl *, struct se_portal_group *);
 void	core_clear_lun_from_tpg(struct se_lun *, struct se_portal_group *);
@@ -72,7 +72,7 @@ void	core_dev_del_lun(struct se_portal_group *, struct se_lun *);
 struct se_lun_acl *core_dev_init_initiator_node_lun_acl(struct se_portal_group *,
 		struct se_node_acl *, u64, int *);
 int	core_dev_add_initiator_node_lun_acl(struct se_portal_group *,
-		struct se_lun_acl *, struct se_lun *lun, u32);
+		struct se_lun_acl *, struct se_lun *lun, bool);
 int	core_dev_del_initiator_node_lun_acl(struct se_lun *,
 		struct se_lun_acl *);
 void	core_dev_free_initiator_node_lun_acl(struct se_portal_group *,
@@ -118,7 +118,7 @@ void	core_tpg_add_node_to_devs(struct se_node_acl *, struct se_portal_group *,
 void	core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *);
 struct se_lun *core_tpg_alloc_lun(struct se_portal_group *, u64);
 int	core_tpg_add_lun(struct se_portal_group *, struct se_lun *,
-		u32, struct se_device *);
+		bool, struct se_device *);
 void core_tpg_remove_lun(struct se_portal_group *, struct se_lun *);
 struct se_node_acl *core_tpg_add_initiator_node_acl(struct se_portal_group *tpg,
 		const char *initiatorname);
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 0aa47babd16c..2a91ed3ef380 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -997,7 +997,6 @@ static sense_reason_t spc_emulate_modesense(struct se_cmd *cmd)
 	int length = 0;
 	int ret;
 	int i;
-	bool read_only = target_lun_is_rdonly(cmd);;
 
 	memset(buf, 0, SE_MODE_PAGE_BUF);
 
@@ -1008,7 +1007,7 @@ static sense_reason_t spc_emulate_modesense(struct se_cmd *cmd)
 	length = ten ? 3 : 2;
 
 	/* DEVICE-SPECIFIC PARAMETER */
-	if ((cmd->se_lun->lun_access & TRANSPORT_LUNFLAGS_READ_ONLY) || read_only)
+	if (cmd->se_lun->lun_access_ro || target_lun_is_rdonly(cmd))
 		spc_modesense_write_protect(&buf[length], type);
 
 	/*
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 3608b1b5ecf7..ddf046080dc3 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -121,7 +121,7 @@ void core_tpg_add_node_to_devs(
 	struct se_portal_group *tpg,
 	struct se_lun *lun_orig)
 {
-	u32 lun_access = 0;
+	bool lun_access_ro = true;
 	struct se_lun *lun;
 	struct se_device *dev;
 
@@ -137,27 +137,26 @@ void core_tpg_add_node_to_devs(
 		 * demo_mode_write_protect is ON, or READ_ONLY;
 		 */
 		if (!tpg->se_tpg_tfo->tpg_check_demo_mode_write_protect(tpg)) {
-			lun_access = TRANSPORT_LUNFLAGS_READ_WRITE;
+			lun_access_ro = false;
 		} else {
 			/*
 			 * Allow only optical drives to issue R/W in default RO
 			 * demo mode.
 			 */
 			if (dev->transport->get_device_type(dev) == TYPE_DISK)
-				lun_access = TRANSPORT_LUNFLAGS_READ_ONLY;
+				lun_access_ro = true;
 			else
-				lun_access = TRANSPORT_LUNFLAGS_READ_WRITE;
+				lun_access_ro = false;
 		}
 
 		pr_debug("TARGET_CORE[%s]->TPG[%u]_LUN[%llu] - Adding %s"
 			" access for LUN in Demo Mode\n",
 			tpg->se_tpg_tfo->get_fabric_name(),
 			tpg->se_tpg_tfo->tpg_get_tag(tpg), lun->unpacked_lun,
-			(lun_access == TRANSPORT_LUNFLAGS_READ_WRITE) ?
-			"READ-WRITE" : "READ-ONLY");
+			lun_access_ro ? "READ-ONLY" : "READ-WRITE");
 
 		core_enable_device_list_for_node(lun, NULL, lun->unpacked_lun,
-						 lun_access, acl, tpg);
+						 lun_access_ro, acl, tpg);
 		/*
 		 * Check to see if there are any existing persistent reservation
 		 * APTPL pre-registrations that need to be enabled for this dynamic
@@ -522,7 +521,7 @@ int core_tpg_register(
 			return PTR_ERR(se_tpg->tpg_virt_lun0);
 
 		ret = core_tpg_add_lun(se_tpg, se_tpg->tpg_virt_lun0,
-				TRANSPORT_LUNFLAGS_READ_ONLY, g_lun0_dev);
+				true, g_lun0_dev);
 		if (ret < 0) {
 			kfree(se_tpg->tpg_virt_lun0);
 			return ret;
@@ -616,7 +615,7 @@ struct se_lun *core_tpg_alloc_lun(
 int core_tpg_add_lun(
 	struct se_portal_group *tpg,
 	struct se_lun *lun,
-	u32 lun_access,
+	bool lun_access_ro,
 	struct se_device *dev)
 {
 	int ret;
@@ -644,9 +643,9 @@ int core_tpg_add_lun(
 	spin_unlock(&dev->se_port_lock);
 
 	if (dev->dev_flags & DF_READ_ONLY)
-		lun->lun_access = TRANSPORT_LUNFLAGS_READ_ONLY;
+		lun->lun_access_ro = true;
 	else
-		lun->lun_access = lun_access;
+		lun->lun_access_ro = lun_access_ro;
 	if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
 		hlist_add_head_rcu(&lun->link, &tpg->tpg_lun_hlist);
 	mutex_unlock(&tpg->tpg_lun_mutex);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 867bc6d0a68a..ab2bf12975e1 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -281,6 +281,17 @@ struct se_session *transport_init_session_tags(unsigned int tag_num,
 	struct se_session *se_sess;
 	int rc;
 
+	if (tag_num != 0 && !tag_size) {
+		pr_err("init_session_tags called with percpu-ida tag_num:"
+		       " %u, but zero tag_size\n", tag_num);
+		return ERR_PTR(-EINVAL);
+	}
+	if (!tag_num && tag_size) {
+		pr_err("init_session_tags called with percpu-ida tag_size:"
+		       " %u, but zero tag_num\n", tag_size);
+		return ERR_PTR(-EINVAL);
+	}
+
 	se_sess = transport_init_session(sup_prot_ops);
 	if (IS_ERR(se_sess))
 		return se_sess;
@@ -374,6 +385,51 @@ void transport_register_session(
 }
 EXPORT_SYMBOL(transport_register_session);
 
+struct se_session *
+target_alloc_session(struct se_portal_group *tpg,
+		     unsigned int tag_num, unsigned int tag_size,
+		     enum target_prot_op prot_op,
+		     const char *initiatorname, void *private,
+		     int (*callback)(struct se_portal_group *,
+				     struct se_session *, void *))
+{
+	struct se_session *sess;
+
+	/*
+	 * If the fabric driver is using percpu-ida based pre allocation
+	 * of I/O descriptor tags, go ahead and perform that setup now..
+	 */
+	if (tag_num != 0)
+		sess = transport_init_session_tags(tag_num, tag_size, prot_op);
+	else
+		sess = transport_init_session(prot_op);
+
+	if (IS_ERR(sess))
+		return sess;
+
+	sess->se_node_acl = core_tpg_check_initiator_node_acl(tpg,
+					(unsigned char *)initiatorname);
+	if (!sess->se_node_acl) {
+		transport_free_session(sess);
+		return ERR_PTR(-EACCES);
+	}
+	/*
+	 * Go ahead and perform any remaining fabric setup that is
+	 * required before transport_register_session().
+	 */
+	if (callback != NULL) {
+		int rc = callback(tpg, sess, private);
+		if (rc) {
+			transport_free_session(sess);
+			return ERR_PTR(rc);
+		}
+	}
+
+	transport_register_session(tpg, sess->se_node_acl, sess, private);
+	return sess;
+}
+EXPORT_SYMBOL(target_alloc_session);
+
 static void target_release_session(struct kref *kref)
 {
 	struct se_session *se_sess = container_of(kref,
@@ -1941,6 +1997,9 @@ static void transport_complete_qf(struct se_cmd *cmd)
 
 	switch (cmd->data_direction) {
 	case DMA_FROM_DEVICE:
+		if (cmd->scsi_status)
+			goto queue_status;
+
 		trace_target_cmd_complete(cmd);
 		ret = cmd->se_tfo->queue_data_in(cmd);
 		break;
@@ -1951,6 +2010,7 @@ static void transport_complete_qf(struct se_cmd *cmd)
 		}
 		/* Fall through for DMA_TO_DEVICE */
 	case DMA_NONE:
+queue_status:
 		trace_target_cmd_complete(cmd);
 		ret = cmd->se_tfo->queue_status(cmd);
 		break;
@@ -2072,6 +2132,9 @@ static void target_complete_ok_work(struct work_struct *work)
 queue_rsp:
 	switch (cmd->data_direction) {
 	case DMA_FROM_DEVICE:
+		if (cmd->scsi_status)
+			goto queue_status;
+
 		atomic_long_add(cmd->data_length,
 				&cmd->se_lun->lun_stats.tx_data_octets);
 		/*
@@ -2111,6 +2174,7 @@ queue_rsp:
 		}
 		/* Fall through for DMA_TO_DEVICE */
 	case DMA_NONE:
+queue_status:
 		trace_target_cmd_complete(cmd);
 		ret = cmd->se_tfo->queue_status(cmd);
 		if (ret == -EAGAIN || ret == -ENOMEM)
@@ -2596,8 +2660,6 @@ void target_wait_for_sess_cmds(struct se_session *se_sess)
 
 	list_for_each_entry_safe(se_cmd, tmp_cmd,
 				&se_sess->sess_wait_list, se_cmd_list) {
-		list_del_init(&se_cmd->se_cmd_list);
-
 		pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:"
 			" %d\n", se_cmd, se_cmd->t_state,
 			se_cmd->se_tfo->get_cmd_state(se_cmd));
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 94f5154ac788..62bf4fe5704a 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -26,6 +26,7 @@
 #include <linux/vmalloc.h>
 #include <linux/uio_driver.h>
 #include <linux/stringify.h>
+#include <linux/bitops.h>
 #include <net/genetlink.h>
 #include <scsi/scsi_common.h>
 #include <scsi/scsi_proto.h>
@@ -63,8 +64,11 @@
 
 #define TCMU_TIME_OUT (30 * MSEC_PER_SEC)
 
+#define DATA_BLOCK_BITS 256
+#define DATA_BLOCK_SIZE 4096
+
 #define CMDR_SIZE (16 * 4096)
-#define DATA_SIZE (257 * 4096)
+#define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE)
 
 #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
 
@@ -93,12 +97,11 @@ struct tcmu_dev {
 	u32 cmdr_size;
 	u32 cmdr_last_cleaned;
 	/* Offset of data ring from start of mb */
+	/* Must add data_off and mb_addr to get the address */
 	size_t data_off;
 	size_t data_size;
-	/* Ring head + tail values. */
-	/* Must add data_off and mb_addr to get the address */
-	size_t data_head;
-	size_t data_tail;
+
+	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
 
 	wait_queue_head_t wait_cmdr;
 	/* TODO should this be a mutex? */
@@ -122,9 +125,9 @@ struct tcmu_cmd {
 
 	uint16_t cmd_id;
 
-	/* Can't use se_cmd->data_length when cleaning up expired cmds, because if
+	/* Can't use se_cmd when cleaning up expired cmds, because if
 	   cmd has been completed then accessing se_cmd is off limits */
-	size_t data_length;
+	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
 
 	unsigned long deadline;
 
@@ -168,13 +171,6 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 
 	tcmu_cmd->se_cmd = se_cmd;
 	tcmu_cmd->tcmu_dev = udev;
-	tcmu_cmd->data_length = se_cmd->data_length;
-
-	if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
-		tcmu_cmd->data_length += se_cmd->t_bidi_data_sg->length;
-	}
-
 	tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT);
 
 	idr_preload(GFP_KERNEL);
@@ -231,105 +227,126 @@ static inline size_t head_to_end(size_t head, size_t size)
 	return size - head;
 }
 
+static inline void new_iov(struct iovec **iov, int *iov_cnt,
+			   struct tcmu_dev *udev)
+{
+	struct iovec *iovec;
+
+	if (*iov_cnt != 0)
+		(*iov)++;
+	(*iov_cnt)++;
+
+	iovec = *iov;
+	memset(iovec, 0, sizeof(struct iovec));
+}
+
 #define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size)
 
+/* offset is relative to mb_addr */
+static inline size_t get_block_offset(struct tcmu_dev *dev,
+		int block, int remaining)
+{
+	return dev->data_off + block * DATA_BLOCK_SIZE +
+		DATA_BLOCK_SIZE - remaining;
+}
+
+static inline size_t iov_tail(struct tcmu_dev *udev, struct iovec *iov)
+{
+	return (size_t)iov->iov_base + iov->iov_len;
+}
+
 static void alloc_and_scatter_data_area(struct tcmu_dev *udev,
 	struct scatterlist *data_sg, unsigned int data_nents,
 	struct iovec **iov, int *iov_cnt, bool copy_data)
 {
-	int i;
+	int i, block;
+	int block_remaining = 0;
 	void *from, *to;
-	size_t copy_bytes;
+	size_t copy_bytes, to_offset;
 	struct scatterlist *sg;
 
 	for_each_sg(data_sg, sg, data_nents, i) {
-		copy_bytes = min_t(size_t, sg->length,
-				 head_to_end(udev->data_head, udev->data_size));
+		int sg_remaining = sg->length;
 		from = kmap_atomic(sg_page(sg)) + sg->offset;
-		to = (void *) udev->mb_addr + udev->data_off + udev->data_head;
-
-		if (copy_data) {
-			memcpy(to, from, copy_bytes);
-			tcmu_flush_dcache_range(to, copy_bytes);
-		}
-
-		/* Even iov_base is relative to mb_addr */
-		(*iov)->iov_len = copy_bytes;
-		(*iov)->iov_base = (void __user *) udev->data_off +
-						udev->data_head;
-		(*iov_cnt)++;
-		(*iov)++;
-
-		UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size);
-
-		/* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */
-		if (sg->length != copy_bytes) {
-			void *from_skip = from + copy_bytes;
-
-			copy_bytes = sg->length - copy_bytes;
-
-			(*iov)->iov_len = copy_bytes;
-			(*iov)->iov_base = (void __user *) udev->data_off +
-							udev->data_head;
-
+		while (sg_remaining > 0) {
+			if (block_remaining == 0) {
+				block = find_first_zero_bit(udev->data_bitmap,
+						DATA_BLOCK_BITS);
+				block_remaining = DATA_BLOCK_SIZE;
+				set_bit(block, udev->data_bitmap);
+			}
+			copy_bytes = min_t(size_t, sg_remaining,
+					block_remaining);
+			to_offset = get_block_offset(udev, block,
+					block_remaining);
+			to = (void *)udev->mb_addr + to_offset;
+			if (*iov_cnt != 0 &&
+			    to_offset == iov_tail(udev, *iov)) {
+				(*iov)->iov_len += copy_bytes;
+			} else {
+				new_iov(iov, iov_cnt, udev);
+				(*iov)->iov_base = (void __user *) to_offset;
+				(*iov)->iov_len = copy_bytes;
+			}
 			if (copy_data) {
-				to = (void *) udev->mb_addr +
-					udev->data_off + udev->data_head;
-				memcpy(to, from_skip, copy_bytes);
+				memcpy(to, from + sg->length - sg_remaining,
+					copy_bytes);
 				tcmu_flush_dcache_range(to, copy_bytes);
 			}
-
-			(*iov_cnt)++;
-			(*iov)++;
-
-			UPDATE_HEAD(udev->data_head,
-				copy_bytes, udev->data_size);
+			sg_remaining -= copy_bytes;
+			block_remaining -= copy_bytes;
 		}
-
 		kunmap_atomic(from - sg->offset);
 	}
 }
 
-static void gather_and_free_data_area(struct tcmu_dev *udev,
-	struct scatterlist *data_sg, unsigned int data_nents)
+static void free_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd)
 {
-	int i;
+	bitmap_xor(udev->data_bitmap, udev->data_bitmap, cmd->data_bitmap,
+		   DATA_BLOCK_BITS);
+}
+
+static void gather_data_area(struct tcmu_dev *udev, unsigned long *cmd_bitmap,
+		struct scatterlist *data_sg, unsigned int data_nents)
+{
+	int i, block;
+	int block_remaining = 0;
 	void *from, *to;
-	size_t copy_bytes;
+	size_t copy_bytes, from_offset;
 	struct scatterlist *sg;
 
-	/* It'd be easier to look at entry's iovec again, but UAM */
 	for_each_sg(data_sg, sg, data_nents, i) {
-		copy_bytes = min_t(size_t, sg->length,
-				 head_to_end(udev->data_tail, udev->data_size));
-
+		int sg_remaining = sg->length;
 		to = kmap_atomic(sg_page(sg)) + sg->offset;
-		WARN_ON(sg->length + sg->offset > PAGE_SIZE);
-		from = (void *) udev->mb_addr +
-			udev->data_off + udev->data_tail;
-		tcmu_flush_dcache_range(from, copy_bytes);
-		memcpy(to, from, copy_bytes);
-
-		UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size);
-
-		/* Uh oh, wrapped the data buffer for this sg's data */
-		if (sg->length != copy_bytes) {
-			void *to_skip = to + copy_bytes;
-
-			from = (void *) udev->mb_addr +
-				udev->data_off + udev->data_tail;
-			WARN_ON(udev->data_tail);
-			copy_bytes = sg->length - copy_bytes;
+		while (sg_remaining > 0) {
+			if (block_remaining == 0) {
+				block = find_first_bit(cmd_bitmap,
+						DATA_BLOCK_BITS);
+				block_remaining = DATA_BLOCK_SIZE;
+				clear_bit(block, cmd_bitmap);
+			}
+			copy_bytes = min_t(size_t, sg_remaining,
+					block_remaining);
+			from_offset = get_block_offset(udev, block,
+					block_remaining);
+			from = (void *) udev->mb_addr + from_offset;
 			tcmu_flush_dcache_range(from, copy_bytes);
-			memcpy(to_skip, from, copy_bytes);
+			memcpy(to + sg->length - sg_remaining, from,
+					copy_bytes);
 
-			UPDATE_HEAD(udev->data_tail,
-				copy_bytes, udev->data_size);
+			sg_remaining -= copy_bytes;
+			block_remaining -= copy_bytes;
 		}
 		kunmap_atomic(to - sg->offset);
 	}
 }
 
+static inline size_t spc_bitmap_free(unsigned long *bitmap)
+{
+	return DATA_BLOCK_SIZE * (DATA_BLOCK_BITS -
+			bitmap_weight(bitmap, DATA_BLOCK_BITS));
+}
+
 /*
  * We can't queue a command until we have space available on the cmd ring *and*
  * space available on the data ring.
@@ -339,9 +356,8 @@ static void gather_and_free_data_area(struct tcmu_dev *udev,
 static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t data_needed)
 {
 	struct tcmu_mailbox *mb = udev->mb_addr;
-	size_t space;
+	size_t space, cmd_needed;
 	u32 cmd_head;
-	size_t cmd_needed;
 
 	tcmu_flush_dcache_range(mb, sizeof(*mb));
 
@@ -363,10 +379,10 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t d
 		return false;
 	}
 
-	space = spc_free(udev->data_head, udev->data_tail, udev->data_size);
+	space = spc_bitmap_free(udev->data_bitmap);
 	if (space < data_needed) {
-		pr_debug("no data space: %zu %zu %zu\n", udev->data_head,
-		       udev->data_tail, udev->data_size);
+		pr_debug("no data space: only %zu available, but ask for %zu\n",
+				space, data_needed);
 		return false;
 	}
 
@@ -385,6 +401,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	uint32_t cmd_head;
 	uint64_t cdb_off;
 	bool copy_to_data_area;
+	size_t data_length;
+	DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS);
 
 	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
 		return -EINVAL;
@@ -393,12 +411,12 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	 * Must be a certain minimum size for response sense info, but
 	 * also may be larger if the iov array is large.
 	 *
-	 * iovs = sgl_nents+1, for end-of-ring case, plus another 1
-	 * b/c size == offsetof one-past-element.
+	 * We prepare way too many iovs for potential uses here, because it's
+	 * expensive to tell how many regions are freed in the bitmap
 	*/
 	base_command_size = max(offsetof(struct tcmu_cmd_entry,
-					 req.iov[se_cmd->t_bidi_data_nents +
-						 se_cmd->t_data_nents + 2]),
+				req.iov[se_cmd->t_bidi_data_nents +
+					se_cmd->t_data_nents]),
 				sizeof(struct tcmu_cmd_entry));
 	command_size = base_command_size
 		+ round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE);
@@ -409,13 +427,18 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
 	mb = udev->mb_addr;
 	cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
+	data_length = se_cmd->data_length;
+	if (se_cmd->se_cmd_flags & SCF_BIDI) {
+		BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
+		data_length += se_cmd->t_bidi_data_sg->length;
+	}
 	if ((command_size > (udev->cmdr_size / 2))
-	    || tcmu_cmd->data_length > (udev->data_size - 1))
+	    || data_length > udev->data_size)
 		pr_warn("TCMU: Request of size %zu/%zu may be too big for %u/%zu "
-			"cmd/data ring buffers\n", command_size, tcmu_cmd->data_length,
+			"cmd/data ring buffers\n", command_size, data_length,
 			udev->cmdr_size, udev->data_size);
 
-	while (!is_ring_space_avail(udev, command_size, tcmu_cmd->data_length)) {
+	while (!is_ring_space_avail(udev, command_size, data_length)) {
 		int ret;
 		DEFINE_WAIT(__wait);
 
@@ -462,6 +485,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	entry->hdr.kflags = 0;
 	entry->hdr.uflags = 0;
 
+	bitmap_copy(old_bitmap, udev->data_bitmap, DATA_BLOCK_BITS);
+
 	/*
 	 * Fix up iovecs, and handle if allocation in data ring wrapped.
 	 */
@@ -480,6 +505,10 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		se_cmd->t_bidi_data_nents, &iov, &iov_cnt, false);
 	entry->req.iov_bidi_cnt = iov_cnt;
 
+	/* cmd's data_bitmap is what changed in process */
+	bitmap_xor(tcmu_cmd->data_bitmap, old_bitmap, udev->data_bitmap,
+			DATA_BLOCK_BITS);
+
 	/* All offsets relative to mb_addr, not start of entry! */
 	cdb_off = CMDR_OFF + cmd_head + base_command_size;
 	memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb));
@@ -530,35 +559,42 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
 	struct tcmu_dev *udev = cmd->tcmu_dev;
 
 	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
-		/* cmd has been completed already from timeout, just reclaim data
-		   ring space */
-		UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+		/*
+		 * cmd has been completed already from timeout, just reclaim
+		 * data ring space and free cmd
+		 */
+		free_data_area(udev, cmd);
+
+		kmem_cache_free(tcmu_cmd_cache, cmd);
 		return;
 	}
 
 	if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) {
-		UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+		free_data_area(udev, cmd);
 		pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n",
 			cmd->se_cmd);
 		entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION;
 	} else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
 		memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
 			       se_cmd->scsi_sense_length);
-
-		UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+		free_data_area(udev, cmd);
 	} else if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		/* Discard data_out buffer */
-		UPDATE_HEAD(udev->data_tail,
-			(size_t)se_cmd->t_data_sg->length, udev->data_size);
+		DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
 
-		/* Get Data-In buffer */
-		gather_and_free_data_area(udev,
+		/* Get Data-In buffer before clean up */
+		bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
+		gather_data_area(udev, bitmap,
 			se_cmd->t_bidi_data_sg, se_cmd->t_bidi_data_nents);
+		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
-		gather_and_free_data_area(udev,
+		DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
+
+		bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
+		gather_data_area(udev, bitmap,
 			se_cmd->t_data_sg, se_cmd->t_data_nents);
+		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_TO_DEVICE) {
-		UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction != DMA_NONE) {
 		pr_warn("TCMU: data direction was %d!\n",
 			se_cmd->data_direction);
@@ -894,11 +930,13 @@ static int tcmu_configure_device(struct se_device *dev)
 
 	mb = udev->mb_addr;
 	mb->version = TCMU_MAILBOX_VERSION;
+	mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC;
 	mb->cmdr_off = CMDR_OFF;
 	mb->cmdr_size = udev->cmdr_size;
 
 	WARN_ON(!PAGE_ALIGNED(udev->data_off));
 	WARN_ON(udev->data_size % PAGE_SIZE);
+	WARN_ON(udev->data_size % DATA_BLOCK_SIZE);
 
 	info->version = __stringify(TCMU_MAILBOX_VERSION);
 
@@ -942,12 +980,12 @@ err_vzalloc:
 	return ret;
 }
 
-static int tcmu_check_pending_cmd(int id, void *p, void *data)
+static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
 {
-	struct tcmu_cmd *cmd = p;
-
-	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
+	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
+		kmem_cache_free(tcmu_cmd_cache, cmd);
 		return 0;
+	}
 	return -EINVAL;
 }
 
@@ -962,6 +1000,8 @@ static void tcmu_dev_call_rcu(struct rcu_head *p)
 static void tcmu_free_device(struct se_device *dev)
 {
 	struct tcmu_dev *udev = TCMU_DEV(dev);
+	struct tcmu_cmd *cmd;
+	bool all_expired = true;
 	int i;
 
 	del_timer_sync(&udev->timeout);
@@ -970,10 +1010,13 @@ static void tcmu_free_device(struct se_device *dev)
 
 	/* Upper layer should drain all requests before calling this */
 	spin_lock_irq(&udev->commands_lock);
-	i = idr_for_each(&udev->commands, tcmu_check_pending_cmd, NULL);
+	idr_for_each_entry(&udev->commands, cmd, i) {
+		if (tcmu_check_and_free_pending_cmd(cmd) != 0)
+			all_expired = false;
+	}
 	idr_destroy(&udev->commands);
 	spin_unlock_irq(&udev->commands_lock);
-	WARN_ON(i);
+	WARN_ON(!all_expired);
 
 	/* Device was configured */
 	if (udev->uio_info.uio_dev) {
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 064d6dfb5b6d..216e18cc9133 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -107,8 +107,7 @@ void ft_release_cmd(struct se_cmd *se_cmd)
 
 int ft_check_stop_free(struct se_cmd *se_cmd)
 {
-	transport_generic_free_cmd(se_cmd, 0);
-	return 1;
+	return transport_generic_free_cmd(se_cmd, 0);
 }
 
 /*
@@ -179,6 +178,12 @@ int ft_queue_status(struct se_cmd *se_cmd)
 		return -ENOMEM;
 	}
 	lport->tt.exch_done(cmd->seq);
+	/*
+	 * Drop the extra ACK_KREF reference taken by target_submit_cmd()
+	 * ahead of ft_check_stop_free() -> transport_generic_free_cmd()
+	 * final se_cmd->cmd_kref put.
+	 */
+	target_put_sess_cmd(&cmd->se_cmd);
 	return 0;
 }
 
@@ -387,7 +392,7 @@ static void ft_send_tm(struct ft_cmd *cmd)
 	/* FIXME: Add referenced task tag for ABORT_TASK */
 	rc = target_submit_tmr(&cmd->se_cmd, cmd->sess->se_sess,
 		&cmd->ft_sense_buffer[0], scsilun_to_int(&fcp->fc_lun),
-		cmd, tm_func, GFP_KERNEL, 0, 0);
+		cmd, tm_func, GFP_KERNEL, 0, TARGET_SCF_ACK_KREF);
 	if (rc < 0)
 		ft_send_resp_code_and_free(cmd, FCP_TMF_FAILED);
 }
@@ -422,6 +427,12 @@ void ft_queue_tm_resp(struct se_cmd *se_cmd)
 	pr_debug("tmr fn %d resp %d fcp code %d\n",
 		  tmr->function, tmr->response, code);
 	ft_send_resp_code(cmd, code);
+	/*
+	 * Drop the extra ACK_KREF reference taken by target_submit_tmr()
+	 * ahead of ft_check_stop_free() -> transport_generic_free_cmd()
+	 * final se_cmd->cmd_kref put.
+	 */
+	target_put_sess_cmd(&cmd->se_cmd);
 }
 
 void ft_aborted_task(struct se_cmd *se_cmd)
@@ -560,7 +571,8 @@ static void ft_send_work(struct work_struct *work)
 	 */
 	if (target_submit_cmd(&cmd->se_cmd, cmd->sess->se_sess, fcp->fc_cdb,
 			      &cmd->ft_sense_buffer[0], scsilun_to_int(&fcp->fc_lun),
-			      ntohl(fcp->fc_dl), task_attr, data_dir, 0))
+			      ntohl(fcp->fc_dl), task_attr, data_dir,
+			      TARGET_SCF_ACK_KREF))
 		goto err;
 
 	pr_debug("r_ctl %x alloc target_submit_cmd\n", fh->fh_r_ctl);
diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
index e19f4c58c6fa..d0c3e1894c61 100644
--- a/drivers/target/tcm_fc/tfc_sess.c
+++ b/drivers/target/tcm_fc/tfc_sess.c
@@ -186,6 +186,20 @@ out:
 	return NULL;
 }
 
+static int ft_sess_alloc_cb(struct se_portal_group *se_tpg,
+			    struct se_session *se_sess, void *p)
+{
+	struct ft_sess *sess = p;
+	struct ft_tport *tport = sess->tport;
+	struct hlist_head *head = &tport->hash[ft_sess_hash(sess->port_id)];
+
+	pr_debug("port_id %x sess %p\n", sess->port_id, sess);
+	hlist_add_head_rcu(&sess->hash, head);
+	tport->sess_count++;
+
+	return 0;
+}
+
 /*
  * Allocate session and enter it in the hash for the local port.
  * Caller holds ft_lport_lock.
@@ -194,7 +208,6 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id,
 				      struct fc_rport_priv *rdata)
 {
 	struct se_portal_group *se_tpg = &tport->tpg->se_tpg;
-	struct se_node_acl *se_acl;
 	struct ft_sess *sess;
 	struct hlist_head *head;
 	unsigned char initiatorname[TRANSPORT_IQN_LEN];
@@ -210,31 +223,18 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id,
 	if (!sess)
 		return NULL;
 
-	sess->se_sess = transport_init_session_tags(TCM_FC_DEFAULT_TAGS,
-						    sizeof(struct ft_cmd),
-						    TARGET_PROT_NORMAL);
-	if (IS_ERR(sess->se_sess)) {
-		kfree(sess);
-		return NULL;
-	}
+	kref_init(&sess->kref); /* ref for table entry */
+	sess->tport = tport;
+	sess->port_id = port_id;
 
-	se_acl = core_tpg_get_initiator_node_acl(se_tpg, &initiatorname[0]);
-	if (!se_acl) {
-		transport_free_session(sess->se_sess);
+	sess->se_sess = target_alloc_session(se_tpg, TCM_FC_DEFAULT_TAGS,
+					     sizeof(struct ft_cmd),
+					     TARGET_PROT_NORMAL, &initiatorname[0],
+					     sess, ft_sess_alloc_cb);
+	if (IS_ERR(sess->se_sess)) {
 		kfree(sess);
 		return NULL;
 	}
-	sess->se_sess->se_node_acl = se_acl;
-	sess->tport = tport;
-	sess->port_id = port_id;
-	kref_init(&sess->kref);	/* ref for table entry */
-	hlist_add_head_rcu(&sess->hash, head);
-	tport->sess_count++;
-
-	pr_debug("port_id %x sess %p\n", port_id, sess);
-
-	transport_register_session(&tport->tpg->se_tpg, se_acl,
-				   sess->se_sess, sess);
 	return sess;
 }
 
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 7c92c09be213..c37eedc35a24 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -178,6 +178,7 @@ config THERMAL_EMULATION
 config HISI_THERMAL
 	tristate "Hisilicon thermal driver"
 	depends on (ARCH_HISI && CPU_THERMAL && OF) || COMPILE_TEST
+	depends on HAS_IOMEM
 	help
 	  Enable this to plug hisilicon's thermal sensor driver into the Linux
 	  thermal framework. cpufreq is used as the cooling device to throttle
@@ -197,6 +198,7 @@ config IMX_THERMAL
 config SPEAR_THERMAL
 	tristate "SPEAr thermal sensor driver"
 	depends on PLAT_SPEAR || COMPILE_TEST
+	depends on HAS_IOMEM
 	depends on OF
 	help
 	  Enable this to plug the SPEAr thermal sensor driver into the Linux
@@ -206,6 +208,7 @@ config ROCKCHIP_THERMAL
 	tristate "Rockchip thermal driver"
 	depends on ARCH_ROCKCHIP || COMPILE_TEST
 	depends on RESET_CONTROLLER
+	depends on HAS_IOMEM
 	help
 	  Rockchip thermal driver provides support for Temperature sensor
 	  ADC (TS-ADC) found on Rockchip SoCs. It supports one critical
@@ -214,7 +217,7 @@ config ROCKCHIP_THERMAL
 
 config RCAR_THERMAL
 	tristate "Renesas R-Car thermal driver"
-	depends on ARCH_SHMOBILE || COMPILE_TEST
+	depends on ARCH_RENESAS || COMPILE_TEST
 	depends on HAS_IOMEM
 	help
 	  Enable this to plug the R-Car thermal sensor driver into the Linux
@@ -223,6 +226,7 @@ config RCAR_THERMAL
 config KIRKWOOD_THERMAL
 	tristate "Temperature sensor on Marvell Kirkwood SoCs"
 	depends on MACH_KIRKWOOD || COMPILE_TEST
+	depends on HAS_IOMEM
 	depends on OF
 	help
 	  Support for the Kirkwood thermal sensor driver into the Linux thermal
@@ -231,6 +235,7 @@ config KIRKWOOD_THERMAL
 config DOVE_THERMAL
 	tristate "Temperature sensor on Marvell Dove SoCs"
 	depends on ARCH_DOVE || MACH_DOVE || COMPILE_TEST
+	depends on HAS_IOMEM
 	depends on OF
 	help
 	  Support for the Dove thermal sensor driver in the Linux thermal
@@ -249,6 +254,7 @@ config DB8500_THERMAL
 config ARMADA_THERMAL
 	tristate "Armada 370/XP thermal management"
 	depends on ARCH_MVEBU || COMPILE_TEST
+	depends on HAS_IOMEM
 	depends on OF
 	help
 	  Enable this option if you want to have support for thermal management
@@ -266,7 +272,8 @@ config TEGRA_SOCTHERM
 
 config DB8500_CPUFREQ_COOLING
 	tristate "DB8500 cpufreq cooling"
-	depends on ARCH_U8500
+	depends on ARCH_U8500 || COMPILE_TEST
+	depends on HAS_IOMEM
 	depends on CPU_THERMAL
 	default y
 	help
@@ -365,8 +372,18 @@ config INTEL_PCH_THERMAL
 	  Thermal reporting device will provide temperature reading,
 	  programmable trip points and other information.
 
+config MTK_THERMAL
+	tristate "Temperature sensor driver for mediatek SoCs"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	depends on HAS_IOMEM
+	default y
+	help
+	  Enable this option if you want to have support for thermal management
+	  controller present in Mediatek SoCs
+
 menu "Texas Instruments thermal drivers"
 depends on ARCH_HAS_BANDGAP || COMPILE_TEST
+depends on HAS_IOMEM
 source "drivers/thermal/ti-soc-thermal/Kconfig"
 endmenu
 
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index cfae6a654793..8e9cbc3b5679 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -48,3 +48,4 @@ obj-$(CONFIG_INTEL_PCH_THERMAL)	+= intel_pch_thermal.o
 obj-$(CONFIG_ST_THERMAL)	+= st/
 obj-$(CONFIG_TEGRA_SOCTHERM)	+= tegra_soctherm.o
 obj-$(CONFIG_HISI_THERMAL)     += hisi_thermal.o
+obj-$(CONFIG_MTK_THERMAL)	+= mtk_thermal.o
diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c
index 00d81af648b8..6a6ec1c95a7a 100644
--- a/drivers/thermal/intel_pch_thermal.c
+++ b/drivers/thermal/intel_pch_thermal.c
@@ -24,6 +24,7 @@
 
 /* Intel PCH thermal Device IDs */
 #define PCH_THERMAL_DID_WPT	0x9CA4 /* Wildcat Point */
+#define PCH_THERMAL_DID_SKL	0x9D31 /* Skylake PCH */
 
 /* Wildcat Point-LP  PCH Thermal registers */
 #define WPT_TEMP	0x0000	/* Temperature */
@@ -201,6 +202,10 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev,
 		ptd->ops = &pch_dev_ops_wpt;
 		dev_name = "pch_wildcat_point";
 		break;
+	case PCH_THERMAL_DID_SKL:
+		ptd->ops = &pch_dev_ops_wpt;
+		dev_name = "pch_skylake";
+		break;
 	default:
 		dev_err(&pdev->dev, "unknown pch thermal device\n");
 		return -ENODEV;
@@ -266,6 +271,7 @@ static void intel_pch_thermal_remove(struct pci_dev *pdev)
 
 static struct pci_device_id intel_pch_thermal_id[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) },
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c
new file mode 100644
index 000000000000..3d93b1c07cee
--- /dev/null
+++ b/drivers/thermal/mtk_thermal.c
@@ -0,0 +1,625 @@
+/*
+ * Copyright (c) 2015 MediaTek Inc.
+ * Author: Hanyi Wu <hanyi.wu@mediatek.com>
+ *         Sascha Hauer <s.hauer@pengutronix.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/thermal.h>
+#include <linux/reset.h>
+#include <linux/types.h>
+#include <linux/nvmem-consumer.h>
+
+/* AUXADC Registers */
+#define AUXADC_CON0_V		0x000
+#define AUXADC_CON1_V		0x004
+#define AUXADC_CON1_SET_V	0x008
+#define AUXADC_CON1_CLR_V	0x00c
+#define AUXADC_CON2_V		0x010
+#define AUXADC_DATA(channel)	(0x14 + (channel) * 4)
+#define AUXADC_MISC_V		0x094
+
+#define AUXADC_CON1_CHANNEL(x)	BIT(x)
+
+#define APMIXED_SYS_TS_CON1	0x604
+
+/* Thermal Controller Registers */
+#define TEMP_MONCTL0		0x000
+#define TEMP_MONCTL1		0x004
+#define TEMP_MONCTL2		0x008
+#define TEMP_MONIDET0		0x014
+#define TEMP_MONIDET1		0x018
+#define TEMP_MSRCTL0		0x038
+#define TEMP_AHBPOLL		0x040
+#define TEMP_AHBTO		0x044
+#define TEMP_ADCPNP0		0x048
+#define TEMP_ADCPNP1		0x04c
+#define TEMP_ADCPNP2		0x050
+#define TEMP_ADCPNP3		0x0b4
+
+#define TEMP_ADCMUX		0x054
+#define TEMP_ADCEN		0x060
+#define TEMP_PNPMUXADDR		0x064
+#define TEMP_ADCMUXADDR		0x068
+#define TEMP_ADCENADDR		0x074
+#define TEMP_ADCVALIDADDR	0x078
+#define TEMP_ADCVOLTADDR	0x07c
+#define TEMP_RDCTRL		0x080
+#define TEMP_ADCVALIDMASK	0x084
+#define TEMP_ADCVOLTAGESHIFT	0x088
+#define TEMP_ADCWRITECTRL	0x08c
+#define TEMP_MSR0		0x090
+#define TEMP_MSR1		0x094
+#define TEMP_MSR2		0x098
+#define TEMP_MSR3		0x0B8
+
+#define TEMP_SPARE0		0x0f0
+
+#define PTPCORESEL		0x400
+
+#define TEMP_MONCTL1_PERIOD_UNIT(x)	((x) & 0x3ff)
+
+#define TEMP_MONCTL2_FILTER_INTERVAL(x)	(((x) & 0x3ff) << 16)
+#define TEMP_MONCTL2_SENSOR_INTERVAL(x)	((x) & 0x3ff)
+
+#define TEMP_AHBPOLL_ADC_POLL_INTERVAL(x)	(x)
+
+#define TEMP_ADCWRITECTRL_ADC_PNP_WRITE		BIT(0)
+#define TEMP_ADCWRITECTRL_ADC_MUX_WRITE		BIT(1)
+
+#define TEMP_ADCVALIDMASK_VALID_HIGH		BIT(5)
+#define TEMP_ADCVALIDMASK_VALID_POS(bit)	(bit)
+
+#define MT8173_TS1	0
+#define MT8173_TS2	1
+#define MT8173_TS3	2
+#define MT8173_TS4	3
+#define MT8173_TSABB	4
+
+/* AUXADC channel 11 is used for the temperature sensors */
+#define MT8173_TEMP_AUXADC_CHANNEL	11
+
+/* The total number of temperature sensors in the MT8173 */
+#define MT8173_NUM_SENSORS		5
+
+/* The number of banks in the MT8173 */
+#define MT8173_NUM_ZONES		4
+
+/* The number of sensing points per bank */
+#define MT8173_NUM_SENSORS_PER_ZONE	4
+
+/* Layout of the fuses providing the calibration data */
+#define MT8173_CALIB_BUF0_VALID		BIT(0)
+#define MT8173_CALIB_BUF1_ADC_GE(x)	(((x) >> 22) & 0x3ff)
+#define MT8173_CALIB_BUF0_VTS_TS1(x)	(((x) >> 17) & 0x1ff)
+#define MT8173_CALIB_BUF0_VTS_TS2(x)	(((x) >> 8) & 0x1ff)
+#define MT8173_CALIB_BUF1_VTS_TS3(x)	(((x) >> 0) & 0x1ff)
+#define MT8173_CALIB_BUF2_VTS_TS4(x)	(((x) >> 23) & 0x1ff)
+#define MT8173_CALIB_BUF2_VTS_TSABB(x)	(((x) >> 14) & 0x1ff)
+#define MT8173_CALIB_BUF0_DEGC_CALI(x)	(((x) >> 1) & 0x3f)
+#define MT8173_CALIB_BUF0_O_SLOPE(x)	(((x) >> 26) & 0x3f)
+
+#define THERMAL_NAME    "mtk-thermal"
+
+struct mtk_thermal;
+
+struct mtk_thermal_bank {
+	struct mtk_thermal *mt;
+	int id;
+};
+
+struct mtk_thermal {
+	struct device *dev;
+	void __iomem *thermal_base;
+
+	struct clk *clk_peri_therm;
+	struct clk *clk_auxadc;
+
+	struct mtk_thermal_bank banks[MT8173_NUM_ZONES];
+
+	/* lock: for getting and putting banks */
+	struct mutex lock;
+
+	/* Calibration values */
+	s32 adc_ge;
+	s32 degc_cali;
+	s32 o_slope;
+	s32 vts[MT8173_NUM_SENSORS];
+
+	struct thermal_zone_device *tzd;
+};
+
+struct mtk_thermal_bank_cfg {
+	unsigned int num_sensors;
+	unsigned int sensors[MT8173_NUM_SENSORS_PER_ZONE];
+};
+
+static const int sensor_mux_values[MT8173_NUM_SENSORS] = { 0, 1, 2, 3, 16 };
+
+/*
+ * The MT8173 thermal controller has four banks. Each bank can read up to
+ * four temperature sensors simultaneously. The MT8173 has a total of 5
+ * temperature sensors. We use each bank to measure a certain area of the
+ * SoC. Since TS2 is located centrally in the SoC it is influenced by multiple
+ * areas, hence is used in different banks.
+ *
+ * The thermal core only gets the maximum temperature of all banks, so
+ * the bank concept wouldn't be necessary here. However, the SVS (Smart
+ * Voltage Scaling) unit makes its decisions based on the same bank
+ * data, and this indeed needs the temperatures of the individual banks
+ * for making better decisions.
+ */
+static const struct mtk_thermal_bank_cfg bank_data[] = {
+	{
+		.num_sensors = 2,
+		.sensors = { MT8173_TS2, MT8173_TS3 },
+	}, {
+		.num_sensors = 2,
+		.sensors = { MT8173_TS2, MT8173_TS4 },
+	}, {
+		.num_sensors = 3,
+		.sensors = { MT8173_TS1, MT8173_TS2, MT8173_TSABB },
+	}, {
+		.num_sensors = 1,
+		.sensors = { MT8173_TS2 },
+	},
+};
+
+struct mtk_thermal_sense_point {
+	int msr;
+	int adcpnp;
+};
+
+static const struct mtk_thermal_sense_point
+		sensing_points[MT8173_NUM_SENSORS_PER_ZONE] = {
+	{
+		.msr = TEMP_MSR0,
+		.adcpnp = TEMP_ADCPNP0,
+	}, {
+		.msr = TEMP_MSR1,
+		.adcpnp = TEMP_ADCPNP1,
+	}, {
+		.msr = TEMP_MSR2,
+		.adcpnp = TEMP_ADCPNP2,
+	}, {
+		.msr = TEMP_MSR3,
+		.adcpnp = TEMP_ADCPNP3,
+	},
+};
+
+/**
+ * raw_to_mcelsius - convert a raw ADC value to mcelsius
+ * @mt:		The thermal controller
+ * @raw:	raw ADC value
+ *
+ * This converts the raw ADC value to mcelsius using the SoC specific
+ * calibration constants
+ */
+static int raw_to_mcelsius(struct mtk_thermal *mt, int sensno, s32 raw)
+{
+	s32 tmp;
+
+	raw &= 0xfff;
+
+	tmp = 203450520 << 3;
+	tmp /= 165 + mt->o_slope;
+	tmp /= 10000 + mt->adc_ge;
+	tmp *= raw - mt->vts[sensno] - 3350;
+	tmp >>= 3;
+
+	return mt->degc_cali * 500 - tmp;
+}
+
+/**
+ * mtk_thermal_get_bank - get bank
+ * @bank:	The bank
+ *
+ * The bank registers are banked, we have to select a bank in the
+ * PTPCORESEL register to access it.
+ */
+static void mtk_thermal_get_bank(struct mtk_thermal_bank *bank)
+{
+	struct mtk_thermal *mt = bank->mt;
+	u32 val;
+
+	mutex_lock(&mt->lock);
+
+	val = readl(mt->thermal_base + PTPCORESEL);
+	val &= ~0xf;
+	val |= bank->id;
+	writel(val, mt->thermal_base + PTPCORESEL);
+}
+
+/**
+ * mtk_thermal_put_bank - release bank
+ * @bank:	The bank
+ *
+ * release a bank previously taken with mtk_thermal_get_bank,
+ */
+static void mtk_thermal_put_bank(struct mtk_thermal_bank *bank)
+{
+	struct mtk_thermal *mt = bank->mt;
+
+	mutex_unlock(&mt->lock);
+}
+
+/**
+ * mtk_thermal_bank_temperature - get the temperature of a bank
+ * @bank:	The bank
+ *
+ * The temperature of a bank is considered the maximum temperature of
+ * the sensors associated to the bank.
+ */
+static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank)
+{
+	struct mtk_thermal *mt = bank->mt;
+	int i, temp = INT_MIN, max = INT_MIN;
+	u32 raw;
+
+	for (i = 0; i < bank_data[bank->id].num_sensors; i++) {
+		raw = readl(mt->thermal_base + sensing_points[i].msr);
+
+		temp = raw_to_mcelsius(mt, bank_data[bank->id].sensors[i], raw);
+
+		/*
+		 * The first read of a sensor often contains very high bogus
+		 * temperature value. Filter these out so that the system does
+		 * not immediately shut down.
+		 */
+		if (temp > 200000)
+			temp = 0;
+
+		if (temp > max)
+			max = temp;
+	}
+
+	return max;
+}
+
+static int mtk_read_temp(void *data, int *temperature)
+{
+	struct mtk_thermal *mt = data;
+	int i;
+	int tempmax = INT_MIN;
+
+	for (i = 0; i < MT8173_NUM_ZONES; i++) {
+		struct mtk_thermal_bank *bank = &mt->banks[i];
+
+		mtk_thermal_get_bank(bank);
+
+		tempmax = max(tempmax, mtk_thermal_bank_temperature(bank));
+
+		mtk_thermal_put_bank(bank);
+	}
+
+	*temperature = tempmax;
+
+	return 0;
+}
+
+static const struct thermal_zone_of_device_ops mtk_thermal_ops = {
+	.get_temp = mtk_read_temp,
+};
+
+static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num,
+				  u32 apmixed_phys_base, u32 auxadc_phys_base)
+{
+	struct mtk_thermal_bank *bank = &mt->banks[num];
+	const struct mtk_thermal_bank_cfg *cfg = &bank_data[num];
+	int i;
+
+	bank->id = num;
+	bank->mt = mt;
+
+	mtk_thermal_get_bank(bank);
+
+	/* bus clock 66M counting unit is 12 * 15.15ns * 256 = 46.540us */
+	writel(TEMP_MONCTL1_PERIOD_UNIT(12), mt->thermal_base + TEMP_MONCTL1);
+
+	/*
+	 * filt interval is 1 * 46.540us = 46.54us,
+	 * sen interval is 429 * 46.540us = 19.96ms
+	 */
+	writel(TEMP_MONCTL2_FILTER_INTERVAL(1) |
+			TEMP_MONCTL2_SENSOR_INTERVAL(429),
+			mt->thermal_base + TEMP_MONCTL2);
+
+	/* poll is set to 10u */
+	writel(TEMP_AHBPOLL_ADC_POLL_INTERVAL(768),
+	       mt->thermal_base + TEMP_AHBPOLL);
+
+	/* temperature sampling control, 1 sample */
+	writel(0x0, mt->thermal_base + TEMP_MSRCTL0);
+
+	/* exceed this polling time, IRQ would be inserted */
+	writel(0xffffffff, mt->thermal_base + TEMP_AHBTO);
+
+	/* number of interrupts per event, 1 is enough */
+	writel(0x0, mt->thermal_base + TEMP_MONIDET0);
+	writel(0x0, mt->thermal_base + TEMP_MONIDET1);
+
+	/*
+	 * The MT8173 thermal controller does not have its own ADC. Instead it
+	 * uses AHB bus accesses to control the AUXADC. To do this the thermal
+	 * controller has to be programmed with the physical addresses of the
+	 * AUXADC registers and with the various bit positions in the AUXADC.
+	 * Also the thermal controller controls a mux in the APMIXEDSYS register
+	 * space.
+	 */
+
+	/*
+	 * this value will be stored to TEMP_PNPMUXADDR (TEMP_SPARE0)
+	 * automatically by hw
+	 */
+	writel(BIT(MT8173_TEMP_AUXADC_CHANNEL), mt->thermal_base + TEMP_ADCMUX);
+
+	/* AHB address for auxadc mux selection */
+	writel(auxadc_phys_base + AUXADC_CON1_CLR_V,
+	       mt->thermal_base + TEMP_ADCMUXADDR);
+
+	/* AHB address for pnp sensor mux selection */
+	writel(apmixed_phys_base + APMIXED_SYS_TS_CON1,
+	       mt->thermal_base + TEMP_PNPMUXADDR);
+
+	/* AHB value for auxadc enable */
+	writel(BIT(MT8173_TEMP_AUXADC_CHANNEL), mt->thermal_base + TEMP_ADCEN);
+
+	/* AHB address for auxadc enable (channel 0 immediate mode selected) */
+	writel(auxadc_phys_base + AUXADC_CON1_SET_V,
+	       mt->thermal_base + TEMP_ADCENADDR);
+
+	/* AHB address for auxadc valid bit */
+	writel(auxadc_phys_base + AUXADC_DATA(MT8173_TEMP_AUXADC_CHANNEL),
+	       mt->thermal_base + TEMP_ADCVALIDADDR);
+
+	/* AHB address for auxadc voltage output */
+	writel(auxadc_phys_base + AUXADC_DATA(MT8173_TEMP_AUXADC_CHANNEL),
+	       mt->thermal_base + TEMP_ADCVOLTADDR);
+
+	/* read valid & voltage are at the same register */
+	writel(0x0, mt->thermal_base + TEMP_RDCTRL);
+
+	/* indicate where the valid bit is */
+	writel(TEMP_ADCVALIDMASK_VALID_HIGH | TEMP_ADCVALIDMASK_VALID_POS(12),
+	       mt->thermal_base + TEMP_ADCVALIDMASK);
+
+	/* no shift */
+	writel(0x0, mt->thermal_base + TEMP_ADCVOLTAGESHIFT);
+
+	/* enable auxadc mux write transaction */
+	writel(TEMP_ADCWRITECTRL_ADC_MUX_WRITE,
+	       mt->thermal_base + TEMP_ADCWRITECTRL);
+
+	for (i = 0; i < cfg->num_sensors; i++)
+		writel(sensor_mux_values[cfg->sensors[i]],
+		       mt->thermal_base + sensing_points[i].adcpnp);
+
+	writel((1 << cfg->num_sensors) - 1, mt->thermal_base + TEMP_MONCTL0);
+
+	writel(TEMP_ADCWRITECTRL_ADC_PNP_WRITE |
+	       TEMP_ADCWRITECTRL_ADC_MUX_WRITE,
+	       mt->thermal_base + TEMP_ADCWRITECTRL);
+
+	mtk_thermal_put_bank(bank);
+}
+
+static u64 of_get_phys_base(struct device_node *np)
+{
+	u64 size64;
+	const __be32 *regaddr_p;
+
+	regaddr_p = of_get_address(np, 0, &size64, NULL);
+	if (!regaddr_p)
+		return OF_BAD_ADDR;
+
+	return of_translate_address(np, regaddr_p);
+}
+
+static int mtk_thermal_get_calibration_data(struct device *dev,
+					    struct mtk_thermal *mt)
+{
+	struct nvmem_cell *cell;
+	u32 *buf;
+	size_t len;
+	int i, ret = 0;
+
+	/* Start with default values */
+	mt->adc_ge = 512;
+	for (i = 0; i < MT8173_NUM_SENSORS; i++)
+		mt->vts[i] = 260;
+	mt->degc_cali = 40;
+	mt->o_slope = 0;
+
+	cell = nvmem_cell_get(dev, "calibration-data");
+	if (IS_ERR(cell)) {
+		if (PTR_ERR(cell) == -EPROBE_DEFER)
+			return PTR_ERR(cell);
+		return 0;
+	}
+
+	buf = (u32 *)nvmem_cell_read(cell, &len);
+
+	nvmem_cell_put(cell);
+
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	if (len < 3 * sizeof(u32)) {
+		dev_warn(dev, "invalid calibration data\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (buf[0] & MT8173_CALIB_BUF0_VALID) {
+		mt->adc_ge = MT8173_CALIB_BUF1_ADC_GE(buf[1]);
+		mt->vts[MT8173_TS1] = MT8173_CALIB_BUF0_VTS_TS1(buf[0]);
+		mt->vts[MT8173_TS2] = MT8173_CALIB_BUF0_VTS_TS2(buf[0]);
+		mt->vts[MT8173_TS3] = MT8173_CALIB_BUF1_VTS_TS3(buf[1]);
+		mt->vts[MT8173_TS4] = MT8173_CALIB_BUF2_VTS_TS4(buf[2]);
+		mt->vts[MT8173_TSABB] = MT8173_CALIB_BUF2_VTS_TSABB(buf[2]);
+		mt->degc_cali = MT8173_CALIB_BUF0_DEGC_CALI(buf[0]);
+		mt->o_slope = MT8173_CALIB_BUF0_O_SLOPE(buf[0]);
+	} else {
+		dev_info(dev, "Device not calibrated, using default calibration values\n");
+	}
+
+out:
+	kfree(buf);
+
+	return ret;
+}
+
+static int mtk_thermal_probe(struct platform_device *pdev)
+{
+	int ret, i;
+	struct device_node *auxadc, *apmixedsys, *np = pdev->dev.of_node;
+	struct mtk_thermal *mt;
+	struct resource *res;
+	u64 auxadc_phys_base, apmixed_phys_base;
+
+	mt = devm_kzalloc(&pdev->dev, sizeof(*mt), GFP_KERNEL);
+	if (!mt)
+		return -ENOMEM;
+
+	mt->clk_peri_therm = devm_clk_get(&pdev->dev, "therm");
+	if (IS_ERR(mt->clk_peri_therm))
+		return PTR_ERR(mt->clk_peri_therm);
+
+	mt->clk_auxadc = devm_clk_get(&pdev->dev, "auxadc");
+	if (IS_ERR(mt->clk_auxadc))
+		return PTR_ERR(mt->clk_auxadc);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mt->thermal_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mt->thermal_base))
+		return PTR_ERR(mt->thermal_base);
+
+	ret = mtk_thermal_get_calibration_data(&pdev->dev, mt);
+	if (ret)
+		return ret;
+
+	mutex_init(&mt->lock);
+
+	mt->dev = &pdev->dev;
+
+	auxadc = of_parse_phandle(np, "mediatek,auxadc", 0);
+	if (!auxadc) {
+		dev_err(&pdev->dev, "missing auxadc node\n");
+		return -ENODEV;
+	}
+
+	auxadc_phys_base = of_get_phys_base(auxadc);
+
+	of_node_put(auxadc);
+
+	if (auxadc_phys_base == OF_BAD_ADDR) {
+		dev_err(&pdev->dev, "Can't get auxadc phys address\n");
+		return -EINVAL;
+	}
+
+	apmixedsys = of_parse_phandle(np, "mediatek,apmixedsys", 0);
+	if (!apmixedsys) {
+		dev_err(&pdev->dev, "missing apmixedsys node\n");
+		return -ENODEV;
+	}
+
+	apmixed_phys_base = of_get_phys_base(apmixedsys);
+
+	of_node_put(apmixedsys);
+
+	if (apmixed_phys_base == OF_BAD_ADDR) {
+		dev_err(&pdev->dev, "Can't get auxadc phys address\n");
+		return -EINVAL;
+	}
+
+	ret = clk_prepare_enable(mt->clk_auxadc);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't enable auxadc clk: %d\n", ret);
+		return ret;
+	}
+
+	ret = device_reset(&pdev->dev);
+	if (ret)
+		goto err_disable_clk_auxadc;
+
+	ret = clk_prepare_enable(mt->clk_peri_therm);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't enable peri clk: %d\n", ret);
+		goto err_disable_clk_auxadc;
+	}
+
+	for (i = 0; i < MT8173_NUM_ZONES; i++)
+		mtk_thermal_init_bank(mt, i, apmixed_phys_base,
+				      auxadc_phys_base);
+
+	platform_set_drvdata(pdev, mt);
+
+	mt->tzd = thermal_zone_of_sensor_register(&pdev->dev, 0, mt,
+				&mtk_thermal_ops);
+	if (IS_ERR(mt->tzd))
+		goto err_register;
+
+	return 0;
+
+err_register:
+	clk_disable_unprepare(mt->clk_peri_therm);
+
+err_disable_clk_auxadc:
+	clk_disable_unprepare(mt->clk_auxadc);
+
+	return ret;
+}
+
+static int mtk_thermal_remove(struct platform_device *pdev)
+{
+	struct mtk_thermal *mt = platform_get_drvdata(pdev);
+
+	thermal_zone_of_sensor_unregister(&pdev->dev, mt->tzd);
+
+	clk_disable_unprepare(mt->clk_peri_therm);
+	clk_disable_unprepare(mt->clk_auxadc);
+
+	return 0;
+}
+
+static const struct of_device_id mtk_thermal_of_match[] = {
+	{
+		.compatible = "mediatek,mt8173-thermal",
+	}, {
+	},
+};
+
+static struct platform_driver mtk_thermal_driver = {
+	.probe = mtk_thermal_probe,
+	.remove = mtk_thermal_remove,
+	.driver = {
+		.name = THERMAL_NAME,
+		.of_match_table = mtk_thermal_of_match,
+	},
+};
+
+module_platform_driver(mtk_thermal_driver);
+
+MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de");
+MODULE_AUTHOR("Hanyi Wu <hanyi.wu@mediatek.com>");
+MODULE_DESCRIPTION("Mediatek thermal driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 9043f8f91852..49ac23d3e776 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -555,6 +555,87 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister);
 
+static void devm_thermal_zone_of_sensor_release(struct device *dev, void *res)
+{
+	thermal_zone_of_sensor_unregister(dev,
+					  *(struct thermal_zone_device **)res);
+}
+
+static int devm_thermal_zone_of_sensor_match(struct device *dev, void *res,
+					     void *data)
+{
+	struct thermal_zone_device **r = res;
+
+	if (WARN_ON(!r || !*r))
+		return 0;
+
+	return *r == data;
+}
+
+/**
+ * devm_thermal_zone_of_sensor_register - Resource managed version of
+ *				thermal_zone_of_sensor_register()
+ * @dev: a valid struct device pointer of a sensor device. Must contain
+ *       a valid .of_node, for the sensor node.
+ * @sensor_id: a sensor identifier, in case the sensor IP has more
+ *	       than one sensors
+ * @data: a private pointer (owned by the caller) that will be passed
+ *	  back, when a temperature reading is needed.
+ * @ops: struct thermal_zone_of_device_ops *. Must contain at least .get_temp.
+ *
+ * Refer thermal_zone_of_sensor_register() for more details.
+ *
+ * Return: On success returns a valid struct thermal_zone_device,
+ * otherwise, it returns a corresponding ERR_PTR(). Caller must
+ * check the return value with help of IS_ERR() helper.
+ * Registered hermal_zone_device device will automatically be
+ * released when device is unbounded.
+ */
+struct thermal_zone_device *devm_thermal_zone_of_sensor_register(
+	struct device *dev, int sensor_id,
+	void *data, const struct thermal_zone_of_device_ops *ops)
+{
+	struct thermal_zone_device **ptr, *tzd;
+
+	ptr = devres_alloc(devm_thermal_zone_of_sensor_release, sizeof(*ptr),
+			   GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	tzd = thermal_zone_of_sensor_register(dev, sensor_id, data, ops);
+	if (IS_ERR(tzd)) {
+		devres_free(ptr);
+		return tzd;
+	}
+
+	*ptr = tzd;
+	devres_add(dev, ptr);
+
+	return tzd;
+}
+EXPORT_SYMBOL_GPL(devm_thermal_zone_of_sensor_register);
+
+/**
+ * devm_thermal_zone_of_sensor_unregister - Resource managed version of
+ *				thermal_zone_of_sensor_unregister().
+ * @dev: Device for which which resource was allocated.
+ * @tzd: a pointer to struct thermal_zone_device where the sensor is registered.
+ *
+ * This function removes the sensor callbacks and private data from the
+ * thermal zone device registered with devm_thermal_zone_of_sensor_register()
+ * API. It will also silent the zone by remove the .get_temp() and .get_trend()
+ * thermal zone device callbacks.
+ * Normally this function will not need to be called and the resource
+ * management code will ensure that the resource is freed.
+ */
+void devm_thermal_zone_of_sensor_unregister(struct device *dev,
+					    struct thermal_zone_device *tzd)
+{
+	WARN_ON(devres_release(dev, devm_thermal_zone_of_sensor_release,
+			       devm_thermal_zone_of_sensor_match, tzd));
+}
+EXPORT_SYMBOL_GPL(devm_thermal_zone_of_sensor_unregister);
+
 /***   functions parsing device tree nodes   ***/
 
 /**
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 0e735acea33a..82daba09e150 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -430,8 +430,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
 	struct rcar_thermal_priv *priv;
 	struct device *dev = &pdev->dev;
 	struct resource *res, *irq;
-	const struct of_device_id *of_id = of_match_device(rcar_thermal_dt_ids, dev);
-	unsigned long of_data = (unsigned long)of_id->data;
+	unsigned long of_data = (unsigned long)of_device_get_match_data(dev);
 	int mres = 0;
 	int i;
 	int ret = -ENODEV;
diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
index b58e3fb9b311..233a564442a0 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -58,8 +58,8 @@ enum sensor_id {
 
 /**
  * The conversion table has the adc value and temperature.
- * ADC_DECREMENT: the adc value is of diminishing.(e.g. v2_code_table)
- * ADC_INCREMENT: the adc value is incremental.(e.g. v3_code_table)
+ * ADC_DECREMENT: the adc value is of diminishing.(e.g. rk3288_code_table)
+ * ADC_INCREMENT: the adc value is incremental.(e.g. rk3368_code_table)
  */
 enum adc_sort_mode {
 	ADC_DECREMENT = 0,
@@ -135,7 +135,13 @@ struct rockchip_thermal_data {
 	enum tshut_polarity tshut_polarity;
 };
 
-/* TSADC Sensor info define: */
+/**
+ * TSADC Sensor Register description:
+ *
+ * TSADCV2_* are used for RK3288 SoCs, the other chips can reuse it.
+ * TSADCV3_* are used for newer SoCs than RK3288. (e.g: RK3228, RK3399)
+ *
+ */
 #define TSADCV2_AUTO_CON			0x04
 #define TSADCV2_INT_EN				0x08
 #define TSADCV2_INT_PD				0x0c
@@ -149,13 +155,20 @@ struct rockchip_thermal_data {
 #define TSADCV2_AUTO_EN				BIT(0)
 #define TSADCV2_AUTO_SRC_EN(chn)		BIT(4 + (chn))
 #define TSADCV2_AUTO_TSHUT_POLARITY_HIGH	BIT(8)
+/**
+ * TSADCV1_AUTO_Q_SEL_EN:
+ * whether select (1024 - tsadc_q) as output
+ * 1'b0:use tsadc_q as output(temperature-code is rising sequence)
+ * 1'b1:use(1024 - tsadc_q) as output (temperature-code is falling sequence)
+ */
+#define TSADCV3_AUTO_Q_SEL_EN			BIT(1)
 
 #define TSADCV2_INT_SRC_EN(chn)			BIT(chn)
 #define TSADCV2_SHUT_2GPIO_SRC_EN(chn)		BIT(4 + (chn))
 #define TSADCV2_SHUT_2CRU_SRC_EN(chn)		BIT(8 + (chn))
 
-#define TSADCV1_INT_PD_CLEAR_MASK		~BIT(16)
 #define TSADCV2_INT_PD_CLEAR_MASK		~BIT(8)
+#define TSADCV3_INT_PD_CLEAR_MASK		~BIT(16)
 
 #define TSADCV2_DATA_MASK			0xfff
 #define TSADCV3_DATA_MASK			0x3ff
@@ -177,45 +190,46 @@ struct tsadc_table {
  * linearly interpolated.
  * Code to Temperature mapping should be updated based on sillcon results.
  */
-static const struct tsadc_table v1_code_table[] = {
-	{TSADCV3_DATA_MASK, -40000},
-	{436, -40000},
-	{431, -35000},
-	{426, -30000},
-	{421, -25000},
-	{416, -20000},
-	{411, -15000},
-	{406, -10000},
-	{401, -5000},
-	{395, 0},
-	{390, 5000},
-	{385, 10000},
-	{380, 15000},
-	{375, 20000},
-	{370, 25000},
-	{364, 30000},
-	{359, 35000},
-	{354, 40000},
-	{349, 45000},
-	{343, 50000},
-	{338, 55000},
-	{333, 60000},
-	{328, 65000},
-	{322, 70000},
-	{317, 75000},
-	{312, 80000},
-	{307, 85000},
-	{301, 90000},
-	{296, 95000},
-	{291, 100000},
-	{286, 105000},
-	{280, 110000},
-	{275, 115000},
-	{270, 120000},
-	{264, 125000},
+static const struct tsadc_table rk3228_code_table[] = {
+	{0, -40000},
+	{588, -40000},
+	{593, -35000},
+	{598, -30000},
+	{603, -25000},
+	{608, -20000},
+	{613, -15000},
+	{618, -10000},
+	{623, -5000},
+	{629, 0},
+	{634, 5000},
+	{639, 10000},
+	{644, 15000},
+	{649, 20000},
+	{654, 25000},
+	{660, 30000},
+	{665, 35000},
+	{670, 40000},
+	{675, 45000},
+	{681, 50000},
+	{686, 55000},
+	{691, 60000},
+	{696, 65000},
+	{702, 70000},
+	{707, 75000},
+	{712, 80000},
+	{717, 85000},
+	{723, 90000},
+	{728, 95000},
+	{733, 100000},
+	{738, 105000},
+	{744, 110000},
+	{749, 115000},
+	{754, 120000},
+	{760, 125000},
+	{TSADCV2_DATA_MASK, 125000},
 };
 
-static const struct tsadc_table v2_code_table[] = {
+static const struct tsadc_table rk3288_code_table[] = {
 	{TSADCV2_DATA_MASK, -40000},
 	{3800, -40000},
 	{3792, -35000},
@@ -253,7 +267,7 @@ static const struct tsadc_table v2_code_table[] = {
 	{3421, 125000},
 };
 
-static const struct tsadc_table v3_code_table[] = {
+static const struct tsadc_table rk3368_code_table[] = {
 	{0, -40000},
 	{106, -40000},
 	{108, -35000},
@@ -292,42 +306,43 @@ static const struct tsadc_table v3_code_table[] = {
 	{TSADCV3_DATA_MASK, 125000},
 };
 
-static const struct tsadc_table v4_code_table[] = {
-	{TSADCV3_DATA_MASK, -40000},
-	{431, -40000},
-	{426, -35000},
-	{421, -30000},
-	{415, -25000},
-	{410, -20000},
-	{405, -15000},
-	{399, -10000},
-	{394, -5000},
-	{389, 0},
-	{383, 5000},
-	{378, 10000},
-	{373, 15000},
-	{367, 20000},
-	{362, 25000},
-	{357, 30000},
-	{351, 35000},
-	{346, 40000},
-	{340, 45000},
-	{335, 50000},
-	{330, 55000},
-	{324, 60000},
-	{319, 65000},
-	{313, 70000},
-	{308, 75000},
-	{302, 80000},
-	{297, 85000},
-	{291, 90000},
-	{286, 95000},
-	{281, 100000},
-	{275, 105000},
-	{270, 110000},
-	{264, 115000},
-	{259, 120000},
-	{253, 125000},
+static const struct tsadc_table rk3399_code_table[] = {
+	{0, -40000},
+	{593, -40000},
+	{598, -35000},
+	{603, -30000},
+	{609, -25000},
+	{614, -20000},
+	{619, -15000},
+	{625, -10000},
+	{630, -5000},
+	{635, 0},
+	{641, 5000},
+	{646, 10000},
+	{651, 15000},
+	{657, 20000},
+	{662, 25000},
+	{667, 30000},
+	{673, 35000},
+	{678, 40000},
+	{684, 45000},
+	{689, 50000},
+	{694, 55000},
+	{700, 60000},
+	{705, 65000},
+	{711, 70000},
+	{716, 75000},
+	{722, 80000},
+	{727, 85000},
+	{733, 90000},
+	{738, 95000},
+	{743, 100000},
+	{749, 105000},
+	{754, 110000},
+	{760, 115000},
+	{765, 120000},
+	{771, 125000},
+	{TSADCV3_DATA_MASK, 125000},
 };
 
 static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,
@@ -411,7 +426,7 @@ static int rk_tsadcv2_code_to_temp(struct chip_tsadc_table table, u32 code,
 	 * temperature between 2 table entries is linear and interpolate
 	 * to produce less granular result.
 	 */
-	num = table.id[mid].temp - v2_code_table[mid - 1].temp;
+	num = table.id[mid].temp - table.id[mid - 1].temp;
 	num *= abs(table.id[mid - 1].code - code);
 	denom = abs(table.id[mid - 1].code - table.id[mid].code);
 	*temp = table.id[mid - 1].temp + (num / denom);
@@ -453,20 +468,20 @@ static void rk_tsadcv2_initialize(void __iomem *regs,
 		       regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE);
 }
 
-static void rk_tsadcv1_irq_ack(void __iomem *regs)
+static void rk_tsadcv2_irq_ack(void __iomem *regs)
 {
 	u32 val;
 
 	val = readl_relaxed(regs + TSADCV2_INT_PD);
-	writel_relaxed(val & TSADCV1_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
+	writel_relaxed(val & TSADCV2_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
 }
 
-static void rk_tsadcv2_irq_ack(void __iomem *regs)
+static void rk_tsadcv3_irq_ack(void __iomem *regs)
 {
 	u32 val;
 
 	val = readl_relaxed(regs + TSADCV2_INT_PD);
-	writel_relaxed(val & TSADCV2_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
+	writel_relaxed(val & TSADCV3_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
 }
 
 static void rk_tsadcv2_control(void __iomem *regs, bool enable)
@@ -482,6 +497,25 @@ static void rk_tsadcv2_control(void __iomem *regs, bool enable)
 	writel_relaxed(val, regs + TSADCV2_AUTO_CON);
 }
 
+/**
+ * @rk_tsadcv3_control:
+ * TSADC controller works at auto mode, and some SoCs need set the tsadc_q_sel
+ * bit on TSADCV2_AUTO_CON[1]. The (1024 - tsadc_q) as output adc value if
+ * setting this bit to enable.
+ */
+static void rk_tsadcv3_control(void __iomem *regs, bool enable)
+{
+	u32 val;
+
+	val = readl_relaxed(regs + TSADCV2_AUTO_CON);
+	if (enable)
+		val |= TSADCV2_AUTO_EN | TSADCV3_AUTO_Q_SEL_EN;
+	else
+		val &= ~TSADCV2_AUTO_EN;
+
+	writel_relaxed(val, regs + TSADCV2_AUTO_CON);
+}
+
 static int rk_tsadcv2_get_temp(struct chip_tsadc_table table,
 			       int chn, void __iomem *regs, int *temp)
 {
@@ -531,17 +565,17 @@ static const struct rockchip_tsadc_chip rk3228_tsadc_data = {
 	.tshut_temp = 95000,
 
 	.initialize = rk_tsadcv2_initialize,
-	.irq_ack = rk_tsadcv1_irq_ack,
-	.control = rk_tsadcv2_control,
+	.irq_ack = rk_tsadcv3_irq_ack,
+	.control = rk_tsadcv3_control,
 	.get_temp = rk_tsadcv2_get_temp,
 	.set_tshut_temp = rk_tsadcv2_tshut_temp,
 	.set_tshut_mode = rk_tsadcv2_tshut_mode,
 
 	.table = {
-		.id = v1_code_table,
-		.length = ARRAY_SIZE(v1_code_table),
+		.id = rk3228_code_table,
+		.length = ARRAY_SIZE(rk3228_code_table),
 		.data_mask = TSADCV3_DATA_MASK,
-		.mode = ADC_DECREMENT,
+		.mode = ADC_INCREMENT,
 	},
 };
 
@@ -562,8 +596,8 @@ static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
 	.set_tshut_mode = rk_tsadcv2_tshut_mode,
 
 	.table = {
-		.id = v2_code_table,
-		.length = ARRAY_SIZE(v2_code_table),
+		.id = rk3288_code_table,
+		.length = ARRAY_SIZE(rk3288_code_table),
 		.data_mask = TSADCV2_DATA_MASK,
 		.mode = ADC_DECREMENT,
 	},
@@ -586,8 +620,8 @@ static const struct rockchip_tsadc_chip rk3368_tsadc_data = {
 	.set_tshut_mode = rk_tsadcv2_tshut_mode,
 
 	.table = {
-		.id = v3_code_table,
-		.length = ARRAY_SIZE(v3_code_table),
+		.id = rk3368_code_table,
+		.length = ARRAY_SIZE(rk3368_code_table),
 		.data_mask = TSADCV3_DATA_MASK,
 		.mode = ADC_INCREMENT,
 	},
@@ -603,17 +637,17 @@ static const struct rockchip_tsadc_chip rk3399_tsadc_data = {
 	.tshut_temp = 95000,
 
 	.initialize = rk_tsadcv2_initialize,
-	.irq_ack = rk_tsadcv1_irq_ack,
-	.control = rk_tsadcv2_control,
+	.irq_ack = rk_tsadcv3_irq_ack,
+	.control = rk_tsadcv3_control,
 	.get_temp = rk_tsadcv2_get_temp,
 	.set_tshut_temp = rk_tsadcv2_tshut_temp,
 	.set_tshut_mode = rk_tsadcv2_tshut_mode,
 
 	.table = {
-		.id = v4_code_table,
-		.length = ARRAY_SIZE(v4_code_table),
+		.id = rk3399_code_table,
+		.length = ARRAY_SIZE(rk3399_code_table),
 		.data_mask = TSADCV3_DATA_MASK,
-		.mode = ADC_DECREMENT,
+		.mode = ADC_INCREMENT,
 	},
 };
 
@@ -693,15 +727,14 @@ static int rockchip_configure_from_dt(struct device *dev,
 			 thermal->chip->tshut_temp);
 		thermal->tshut_temp = thermal->chip->tshut_temp;
 	} else {
+		if (shut_temp > INT_MAX) {
+			dev_err(dev, "Invalid tshut temperature specified: %d\n",
+				shut_temp);
+			return -ERANGE;
+		}
 		thermal->tshut_temp = shut_temp;
 	}
 
-	if (thermal->tshut_temp > INT_MAX) {
-		dev_err(dev, "Invalid tshut temperature specified: %d\n",
-			thermal->tshut_temp);
-		return -ERANGE;
-	}
-
 	if (of_property_read_u32(np, "rockchip,hw-tshut-mode", &tshut_mode)) {
 		dev_warn(dev,
 			 "Missing tshut mode property, using default (%s)\n",
diff --git a/drivers/thermal/samsung/Kconfig b/drivers/thermal/samsung/Kconfig
index e0da3865e060..222e644169f0 100644
--- a/drivers/thermal/samsung/Kconfig
+++ b/drivers/thermal/samsung/Kconfig
@@ -1,6 +1,7 @@
 config EXYNOS_THERMAL
 	tristate "Exynos thermal management unit driver"
 	depends on THERMAL_OF
+	depends on HAS_IOMEM
 	help
 	  If you say yes here you get support for the TMU (Thermal Management
 	  Unit) driver for SAMSUNG EXYNOS series of SoCs. This driver initialises
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index fa61eff88496..f3ce94ec73b5 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -184,6 +184,7 @@
  * @temp_error2: fused value of the second point trim.
  * @regulator: pointer to the TMU regulator structure.
  * @reg_conf: pointer to structure to register with core thermal.
+ * @ntrip: number of supported trip points.
  * @tmu_initialize: SoC specific TMU initialization method
  * @tmu_control: SoC specific TMU control method
  * @tmu_read: SoC specific TMU temperature read method
@@ -203,6 +204,7 @@ struct exynos_tmu_data {
 	u16 temp_error1, temp_error2;
 	struct regulator *regulator;
 	struct thermal_zone_device *tzd;
+	unsigned int ntrip;
 
 	int (*tmu_initialize)(struct platform_device *pdev);
 	void (*tmu_control)(struct platform_device *pdev, bool on);
@@ -346,6 +348,14 @@ static int exynos_tmu_initialize(struct platform_device *pdev)
 	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	int ret;
 
+	if (of_thermal_get_ntrips(data->tzd) > data->ntrip) {
+		dev_info(&pdev->dev,
+			 "More trip points than supported by this TMU.\n");
+		dev_info(&pdev->dev,
+			 "%d trip points should be configured in polling mode.\n",
+			 (of_thermal_get_ntrips(data->tzd) - data->ntrip));
+	}
+
 	mutex_lock(&data->lock);
 	clk_enable(data->clk);
 	if (!IS_ERR(data->clk_sec))
@@ -1210,6 +1220,7 @@ static int exynos_map_dt_data(struct platform_device *pdev)
 		data->tmu_control = exynos4210_tmu_control;
 		data->tmu_read = exynos4210_tmu_read;
 		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		data->ntrip = 4;
 		break;
 	case SOC_ARCH_EXYNOS3250:
 	case SOC_ARCH_EXYNOS4412:
@@ -1222,6 +1233,7 @@ static int exynos_map_dt_data(struct platform_device *pdev)
 		data->tmu_read = exynos4412_tmu_read;
 		data->tmu_set_emulation = exynos4412_tmu_set_emulation;
 		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		data->ntrip = 4;
 		break;
 	case SOC_ARCH_EXYNOS5433:
 		data->tmu_initialize = exynos5433_tmu_initialize;
@@ -1229,6 +1241,7 @@ static int exynos_map_dt_data(struct platform_device *pdev)
 		data->tmu_read = exynos4412_tmu_read;
 		data->tmu_set_emulation = exynos4412_tmu_set_emulation;
 		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		data->ntrip = 8;
 		break;
 	case SOC_ARCH_EXYNOS5440:
 		data->tmu_initialize = exynos5440_tmu_initialize;
@@ -1236,6 +1249,7 @@ static int exynos_map_dt_data(struct platform_device *pdev)
 		data->tmu_read = exynos5440_tmu_read;
 		data->tmu_set_emulation = exynos5440_tmu_set_emulation;
 		data->tmu_clear_irqs = exynos5440_tmu_clear_irqs;
+		data->ntrip = 4;
 		break;
 	case SOC_ARCH_EXYNOS7:
 		data->tmu_initialize = exynos7_tmu_initialize;
@@ -1243,6 +1257,7 @@ static int exynos_map_dt_data(struct platform_device *pdev)
 		data->tmu_read = exynos7_tmu_read;
 		data->tmu_set_emulation = exynos4412_tmu_set_emulation;
 		data->tmu_clear_irqs = exynos4210_tmu_clear_irqs;
+		data->ntrip = 8;
 		break;
 	default:
 		dev_err(&pdev->dev, "Platform not supported\n");
@@ -1295,7 +1310,7 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 	 * TODO: Add regulator as an SOC feature, so that regulator enable
 	 * is a compulsory call.
 	 */
-	data->regulator = devm_regulator_get(&pdev->dev, "vtmu");
+	data->regulator = devm_regulator_get_optional(&pdev->dev, "vtmu");
 	if (!IS_ERR(data->regulator)) {
 		ret = regulator_enable(data->regulator);
 		if (ret) {
@@ -1303,6 +1318,8 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 			return ret;
 		}
 	} else {
+		if (PTR_ERR(data->regulator) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
 		dev_info(&pdev->dev, "Regulator node (vtmu) not found\n");
 	}
 
diff --git a/drivers/thermal/tegra_soctherm.c b/drivers/thermal/tegra_soctherm.c
index 74ea5765938b..136975220c92 100644
--- a/drivers/thermal/tegra_soctherm.c
+++ b/drivers/thermal/tegra_soctherm.c
@@ -57,7 +57,7 @@
 #define READBACK_VALUE_MASK			0xff00
 #define READBACK_VALUE_SHIFT			8
 #define READBACK_ADD_HALF			BIT(7)
-#define READBACK_NEGATE				BIT(1)
+#define READBACK_NEGATE				BIT(0)
 
 #define FUSE_TSENSOR8_CALIB			0x180
 #define FUSE_SPARE_REALIGNMENT_REG_0		0x1fc
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index a0a8fd1235e2..d4b54653ecf8 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -454,6 +454,10 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
 {
 	enum thermal_trip_type type;
 
+	/* Ignore disabled trip points */
+	if (test_bit(trip, &tz->trips_disabled))
+		return;
+
 	tz->ops->get_trip_type(tz, trip, &type);
 
 	if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)
@@ -1800,6 +1804,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 {
 	struct thermal_zone_device *tz;
 	enum thermal_trip_type trip_type;
+	int trip_temp;
 	int result;
 	int count;
 	int passive = 0;
@@ -1871,9 +1876,15 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 		goto unregister;
 
 	for (count = 0; count < trips; count++) {
-		tz->ops->get_trip_type(tz, count, &trip_type);
+		if (tz->ops->get_trip_type(tz, count, &trip_type))
+			set_bit(count, &tz->trips_disabled);
 		if (trip_type == THERMAL_TRIP_PASSIVE)
 			passive = 1;
+		if (tz->ops->get_trip_temp(tz, count, &trip_temp))
+			set_bit(count, &tz->trips_disabled);
+		/* Check for bogus trip points */
+		if (trip_temp == 0)
+			set_bit(count, &tz->trips_disabled);
 	}
 
 	if (!passive) {
diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
index 1e34a1efc554..06ea9766a70a 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
@@ -1265,7 +1265,7 @@ static
 int ti_bandgap_probe(struct platform_device *pdev)
 {
 	struct ti_bandgap *bgp;
-	int clk_rate, ret = 0, i;
+	int clk_rate, ret, i;
 
 	bgp = ti_bandgap_build(pdev);
 	if (IS_ERR(bgp)) {
@@ -1288,16 +1288,14 @@ int ti_bandgap_probe(struct platform_device *pdev)
 	}
 
 	bgp->fclock = clk_get(NULL, bgp->conf->fclock_name);
-	ret = IS_ERR(bgp->fclock);
-	if (ret) {
+	if (IS_ERR(bgp->fclock)) {
 		dev_err(&pdev->dev, "failed to request fclock reference\n");
 		ret = PTR_ERR(bgp->fclock);
 		goto free_irqs;
 	}
 
 	bgp->div_clk = clk_get(NULL, bgp->conf->div_ck_name);
-	ret = IS_ERR(bgp->div_clk);
-	if (ret) {
+	if (IS_ERR(bgp->div_clk)) {
 		dev_err(&pdev->dev, "failed to request div_ts_ck clock ref\n");
 		ret = PTR_ERR(bgp->div_clk);
 		goto free_irqs;
@@ -1314,7 +1312,7 @@ int ti_bandgap_probe(struct platform_device *pdev)
 		 * may not be accurate
 		 */
 		val = ti_bandgap_readl(bgp, tsr->bgap_efuse);
-		if (ret || !val)
+		if (!val)
 			dev_info(&pdev->dev,
 				 "Non-trimmed BGAP, Temp not accurate\n");
 	}
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index f417fa1ee47c..5e87e4866bcb 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -25,6 +25,7 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/serial_core.h>
 
 #include <asm/io.h>
 #include <asm/xen/hypervisor.h>
@@ -245,6 +246,18 @@ err:
 	return -ENODEV;
 }
 
+static int xencons_info_pv_init(struct xencons_info *info, int vtermno)
+{
+	info->evtchn = xen_start_info->console.domU.evtchn;
+	/* GFN == MFN for PV guest */
+	info->intf = gfn_to_virt(xen_start_info->console.domU.mfn);
+	info->vtermno = vtermno;
+
+	list_add_tail(&info->list, &xenconsoles);
+
+	return 0;
+}
+
 static int xen_pv_console_init(void)
 {
 	struct xencons_info *info;
@@ -264,13 +277,8 @@ static int xen_pv_console_init(void)
 		/* already configured */
 		return 0;
 	}
-	info->evtchn = xen_start_info->console.domU.evtchn;
-	/* GFN == MFN for PV guest */
-	info->intf = gfn_to_virt(xen_start_info->console.domU.mfn);
-	info->vtermno = HVC_COOKIE;
-
 	spin_lock(&xencons_lock);
-	list_add_tail(&info->list, &xenconsoles);
+	xencons_info_pv_init(info, HVC_COOKIE);
 	spin_unlock(&xencons_lock);
 
 	return 0;
@@ -597,15 +605,39 @@ static int xen_cons_init(void)
 }
 console_initcall(xen_cons_init);
 
+#ifdef CONFIG_X86
+static void xen_hvm_early_write(uint32_t vtermno, const char *str, int len)
+{
+	if (xen_cpuid_base())
+		outsb(0xe9, str, len);
+}
+#else
+static void xen_hvm_early_write(uint32_t vtermno, const char *str, int len) { }
+#endif
+
 #ifdef CONFIG_EARLY_PRINTK
+static int __init xenboot_setup_console(struct console *console, char *string)
+{
+	static struct xencons_info xenboot;
+
+	if (xen_initial_domain())
+		return 0;
+	if (!xen_pv_domain())
+		return -ENODEV;
+
+	return xencons_info_pv_init(&xenboot, 0);
+}
+
 static void xenboot_write_console(struct console *console, const char *string,
 				  unsigned len)
 {
 	unsigned int linelen, off = 0;
 	const char *pos;
 
-	if (!xen_pv_domain())
+	if (!xen_pv_domain()) {
+		xen_hvm_early_write(0, string, len);
 		return;
+	}
 
 	dom0_write_console(0, string, len);
 
@@ -628,6 +660,7 @@ static void xenboot_write_console(struct console *console, const char *string,
 struct console xenboot_console = {
 	.name		= "xenboot",
 	.write		= xenboot_write_console,
+	.setup		= xenboot_setup_console,
 	.flags		= CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME,
 	.index		= -1,
 };
@@ -640,17 +673,10 @@ void xen_raw_console_write(const char *str)
 
 	if (xen_domain()) {
 		rc = dom0_write_console(0, str, len);
-#ifdef CONFIG_X86
-		if (rc == -ENOSYS && xen_hvm_domain())
-			goto outb_print;
-
-	} else if (xen_cpuid_base()) {
-		int i;
-outb_print:
-		for (i = 0; i < len; i++)
-			outb(str[i], 0xe9);
-#endif
+		if (rc != -ENOSYS || !xen_hvm_domain())
+			return;
 	}
+	xen_hvm_early_write(0, str, len);
 }
 
 void xen_raw_printk(const char *fmt, ...)
@@ -664,3 +690,18 @@ void xen_raw_printk(const char *fmt, ...)
 
 	xen_raw_console_write(buf);
 }
+
+static void xenboot_earlycon_write(struct console *console,
+				  const char *string,
+				  unsigned len)
+{
+	dom0_write_console(0, string, len);
+}
+
+static int __init xenboot_earlycon_setup(struct earlycon_device *device,
+					    const char *opt)
+{
+	device->con->write = xenboot_earlycon_write;
+	return 0;
+}
+EARLYCON_DECLARE(xenboot, xenboot_earlycon_setup);
diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c
index 9c9f74155066..974335377d9f 100644
--- a/drivers/usb/dwc3/dwc3-of-simple.c
+++ b/drivers/usb/dwc3/dwc3-of-simple.c
@@ -42,6 +42,7 @@ static int dwc3_of_simple_probe(struct platform_device *pdev)
 	struct device		*dev = &pdev->dev;
 	struct device_node	*np = dev->of_node;
 
+	unsigned int		count;
 	int			ret;
 	int			i;
 
@@ -49,11 +50,11 @@ static int dwc3_of_simple_probe(struct platform_device *pdev)
 	if (!simple)
 		return -ENOMEM;
 
-	ret = of_clk_get_parent_count(np);
-	if (ret < 0)
-		return ret;
+	count = of_clk_get_parent_count(np);
+	if (!count)
+		return -ENOENT;
 
-	simple->num_clocks = ret;
+	simple->num_clocks = count;
 
 	simple->clks = devm_kcalloc(dev, simple->num_clocks,
 			sizeof(struct clk *), GFP_KERNEL);
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index dfb733047a4c..2ace0295408e 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -41,13 +41,6 @@ static inline struct f_uas *to_f_uas(struct usb_function *f)
 	return container_of(f, struct f_uas, function);
 }
 
-static void usbg_cmd_release(struct kref *);
-
-static inline void usbg_cleanup_cmd(struct usbg_cmd *cmd)
-{
-	kref_put(&cmd->ref, usbg_cmd_release);
-}
-
 /* Start bot.c code */
 
 static int bot_enqueue_cmd_cbw(struct f_uas *fu)
@@ -68,7 +61,7 @@ static void bot_status_complete(struct usb_ep *ep, struct usb_request *req)
 	struct usbg_cmd *cmd = req->context;
 	struct f_uas *fu = cmd->fu;
 
-	usbg_cleanup_cmd(cmd);
+	transport_generic_free_cmd(&cmd->se_cmd, 0);
 	if (req->status < 0) {
 		pr_err("ERR %s(%d)\n", __func__, __LINE__);
 		return;
@@ -605,7 +598,7 @@ static void uasp_status_data_cmpl(struct usb_ep *ep, struct usb_request *req)
 		break;
 
 	case UASP_QUEUE_COMMAND:
-		usbg_cleanup_cmd(cmd);
+		transport_generic_free_cmd(&cmd->se_cmd, 0);
 		usb_ep_queue(fu->ep_cmd, fu->cmd.req, GFP_ATOMIC);
 		break;
 
@@ -615,7 +608,7 @@ static void uasp_status_data_cmpl(struct usb_ep *ep, struct usb_request *req)
 	return;
 
 cleanup:
-	usbg_cleanup_cmd(cmd);
+	transport_generic_free_cmd(&cmd->se_cmd, 0);
 }
 
 static int uasp_send_status_response(struct usbg_cmd *cmd)
@@ -977,7 +970,7 @@ static void usbg_data_write_cmpl(struct usb_ep *ep, struct usb_request *req)
 	return;
 
 cleanup:
-	usbg_cleanup_cmd(cmd);
+	transport_generic_free_cmd(&cmd->se_cmd, 0);
 }
 
 static int usbg_prepare_w_request(struct usbg_cmd *cmd, struct usb_request *req)
@@ -1046,7 +1039,7 @@ static void usbg_cmd_work(struct work_struct *work)
 	struct se_cmd *se_cmd;
 	struct tcm_usbg_nexus *tv_nexus;
 	struct usbg_tpg *tpg;
-	int dir;
+	int dir, flags = (TARGET_SCF_UNKNOWN_SIZE | TARGET_SCF_ACK_KREF);
 
 	se_cmd = &cmd->se_cmd;
 	tpg = cmd->fu->tpg;
@@ -1060,9 +1053,9 @@ static void usbg_cmd_work(struct work_struct *work)
 		goto out;
 	}
 
-	if (target_submit_cmd(se_cmd, tv_nexus->tvn_se_sess,
-			cmd->cmd_buf, cmd->sense_iu.sense, cmd->unpacked_lun,
-			0, cmd->prio_attr, dir, TARGET_SCF_UNKNOWN_SIZE) < 0)
+	if (target_submit_cmd(se_cmd, tv_nexus->tvn_se_sess, cmd->cmd_buf,
+			      cmd->sense_iu.sense, cmd->unpacked_lun, 0,
+			      cmd->prio_attr, dir, flags) < 0)
 		goto out;
 
 	return;
@@ -1070,42 +1063,64 @@ static void usbg_cmd_work(struct work_struct *work)
 out:
 	transport_send_check_condition_and_sense(se_cmd,
 			TCM_UNSUPPORTED_SCSI_OPCODE, 1);
-	usbg_cleanup_cmd(cmd);
+	transport_generic_free_cmd(&cmd->se_cmd, 0);
 }
 
+static struct usbg_cmd *usbg_get_cmd(struct f_uas *fu,
+		struct tcm_usbg_nexus *tv_nexus, u32 scsi_tag)
+{
+	struct se_session *se_sess = tv_nexus->tvn_se_sess;
+	struct usbg_cmd *cmd;
+	int tag;
+
+	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC);
+	if (tag < 0)
+		return ERR_PTR(-ENOMEM);
+
+	cmd = &((struct usbg_cmd *)se_sess->sess_cmd_map)[tag];
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->se_cmd.map_tag = tag;
+	cmd->se_cmd.tag = cmd->tag = scsi_tag;
+	cmd->fu = fu;
+
+	return cmd;
+}
+
+static void usbg_release_cmd(struct se_cmd *);
+
 static int usbg_submit_command(struct f_uas *fu,
 		void *cmdbuf, unsigned int len)
 {
 	struct command_iu *cmd_iu = cmdbuf;
 	struct usbg_cmd *cmd;
-	struct usbg_tpg *tpg;
-	struct tcm_usbg_nexus *tv_nexus;
+	struct usbg_tpg *tpg = fu->tpg;
+	struct tcm_usbg_nexus *tv_nexus = tpg->tpg_nexus;
 	u32 cmd_len;
+	u16 scsi_tag;
 
 	if (cmd_iu->iu_id != IU_ID_COMMAND) {
 		pr_err("Unsupported type %d\n", cmd_iu->iu_id);
 		return -EINVAL;
 	}
 
-	cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
-	if (!cmd)
-		return -ENOMEM;
-
-	cmd->fu = fu;
-
-	/* XXX until I figure out why I can't free in on complete */
-	kref_init(&cmd->ref);
-	kref_get(&cmd->ref);
+	tv_nexus = tpg->tpg_nexus;
+	if (!tv_nexus) {
+		pr_err("Missing nexus, ignoring command\n");
+		return -EINVAL;
+	}
 
-	tpg = fu->tpg;
 	cmd_len = (cmd_iu->len & ~0x3) + 16;
 	if (cmd_len > USBG_MAX_CMD)
-		goto err;
+		return -EINVAL;
 
+	scsi_tag = be16_to_cpup(&cmd_iu->tag);
+	cmd = usbg_get_cmd(fu, tv_nexus, scsi_tag);
+	if (IS_ERR(cmd)) {
+		pr_err("usbg_get_cmd failed\n");
+		return -ENOMEM;
+	}
 	memcpy(cmd->cmd_buf, cmd_iu->cdb, cmd_len);
 
-	cmd->tag = be16_to_cpup(&cmd_iu->tag);
-	cmd->se_cmd.tag = cmd->tag;
 	if (fu->flags & USBG_USE_STREAMS) {
 		if (cmd->tag > UASP_SS_EP_COMP_NUM_STREAMS)
 			goto err;
@@ -1117,12 +1132,6 @@ static int usbg_submit_command(struct f_uas *fu,
 		cmd->stream = &fu->stream[0];
 	}
 
-	tv_nexus = tpg->tpg_nexus;
-	if (!tv_nexus) {
-		pr_err("Missing nexus, ignoring command\n");
-		goto err;
-	}
-
 	switch (cmd_iu->prio_attr & 0x7) {
 	case UAS_HEAD_TAG:
 		cmd->prio_attr = TCM_HEAD_TAG;
@@ -1148,7 +1157,7 @@ static int usbg_submit_command(struct f_uas *fu,
 
 	return 0;
 err:
-	kfree(cmd);
+	usbg_release_cmd(&cmd->se_cmd);
 	return -EINVAL;
 }
 
@@ -1182,7 +1191,7 @@ static void bot_cmd_work(struct work_struct *work)
 out:
 	transport_send_check_condition_and_sense(se_cmd,
 				TCM_UNSUPPORTED_SCSI_OPCODE, 1);
-	usbg_cleanup_cmd(cmd);
+	transport_generic_free_cmd(&cmd->se_cmd, 0);
 }
 
 static int bot_submit_command(struct f_uas *fu,
@@ -1190,7 +1199,7 @@ static int bot_submit_command(struct f_uas *fu,
 {
 	struct bulk_cb_wrap *cbw = cmdbuf;
 	struct usbg_cmd *cmd;
-	struct usbg_tpg *tpg;
+	struct usbg_tpg *tpg = fu->tpg;
 	struct tcm_usbg_nexus *tv_nexus;
 	u32 cmd_len;
 
@@ -1207,28 +1216,20 @@ static int bot_submit_command(struct f_uas *fu,
 	if (cmd_len < 1 || cmd_len > 16)
 		return -EINVAL;
 
-	cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
-	if (!cmd)
-		return -ENOMEM;
-
-	cmd->fu = fu;
-
-	/* XXX until I figure out why I can't free in on complete */
-	kref_init(&cmd->ref);
-	kref_get(&cmd->ref);
-
-	tpg = fu->tpg;
-
-	memcpy(cmd->cmd_buf, cbw->CDB, cmd_len);
-
-	cmd->bot_tag = cbw->Tag;
-
 	tv_nexus = tpg->tpg_nexus;
 	if (!tv_nexus) {
 		pr_err("Missing nexus, ignoring command\n");
-		goto err;
+		return -ENODEV;
 	}
 
+	cmd = usbg_get_cmd(fu, tv_nexus, cbw->Tag);
+	if (IS_ERR(cmd)) {
+		pr_err("usbg_get_cmd failed\n");
+		return -ENOMEM;
+	}
+	memcpy(cmd->cmd_buf, cbw->CDB, cmd_len);
+
+	cmd->bot_tag = cbw->Tag;
 	cmd->prio_attr = TCM_SIMPLE_TAG;
 	cmd->unpacked_lun = cbw->Lun;
 	cmd->is_read = cbw->Flags & US_BULK_FLAG_IN ? 1 : 0;
@@ -1239,9 +1240,6 @@ static int bot_submit_command(struct f_uas *fu,
 	queue_work(tpg->workqueue, &cmd->work);
 
 	return 0;
-err:
-	kfree(cmd);
-	return -EINVAL;
 }
 
 /* Start fabric.c code */
@@ -1282,20 +1280,14 @@ static u32 usbg_tpg_get_inst_index(struct se_portal_group *se_tpg)
 	return 1;
 }
 
-static void usbg_cmd_release(struct kref *ref)
-{
-	struct usbg_cmd *cmd = container_of(ref, struct usbg_cmd,
-			ref);
-
-	transport_generic_free_cmd(&cmd->se_cmd, 0);
-}
-
 static void usbg_release_cmd(struct se_cmd *se_cmd)
 {
 	struct usbg_cmd *cmd = container_of(se_cmd, struct usbg_cmd,
 			se_cmd);
+	struct se_session *se_sess = se_cmd->se_sess;
+
 	kfree(cmd->data_buf);
-	kfree(cmd);
+	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
 }
 
 static int usbg_shutdown_session(struct se_session *se_sess)
@@ -1579,55 +1571,48 @@ out:
 	return ret;
 }
 
+static int usbg_alloc_sess_cb(struct se_portal_group *se_tpg,
+			      struct se_session *se_sess, void *p)
+{
+	struct usbg_tpg *tpg = container_of(se_tpg,
+				struct usbg_tpg, se_tpg);
+
+	tpg->tpg_nexus = p;
+	return 0;
+}
+
 static int tcm_usbg_make_nexus(struct usbg_tpg *tpg, char *name)
 {
-	struct se_portal_group *se_tpg;
 	struct tcm_usbg_nexus *tv_nexus;
-	int ret;
+	int ret = 0;
 
 	mutex_lock(&tpg->tpg_mutex);
 	if (tpg->tpg_nexus) {
 		ret = -EEXIST;
 		pr_debug("tpg->tpg_nexus already exists\n");
-		goto err_unlock;
+		goto out_unlock;
 	}
-	se_tpg = &tpg->se_tpg;
 
-	ret = -ENOMEM;
 	tv_nexus = kzalloc(sizeof(*tv_nexus), GFP_KERNEL);
-	if (!tv_nexus)
-		goto err_unlock;
-	tv_nexus->tvn_se_sess = transport_init_session(TARGET_PROT_NORMAL);
-	if (IS_ERR(tv_nexus->tvn_se_sess))
-		goto err_free;
+	if (!tv_nexus) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
 
-	/*
-	 * Since we are running in 'demo mode' this call with generate a
-	 * struct se_node_acl for the tcm_vhost struct se_portal_group with
-	 * the SCSI Initiator port name of the passed configfs group 'name'.
-	 */
-	tv_nexus->tvn_se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
-			se_tpg, name);
-	if (!tv_nexus->tvn_se_sess->se_node_acl) {
+	tv_nexus->tvn_se_sess = target_alloc_session(&tpg->se_tpg,
+						     USB_G_DEFAULT_SESSION_TAGS,
+						     sizeof(struct usbg_cmd),
+						     TARGET_PROT_NORMAL, name,
+						     tv_nexus, usbg_alloc_sess_cb);
+	if (IS_ERR(tv_nexus->tvn_se_sess)) {
 #define MAKE_NEXUS_MSG "core_tpg_check_initiator_node_acl() failed for %s\n"
 		pr_debug(MAKE_NEXUS_MSG, name);
 #undef MAKE_NEXUS_MSG
-		goto err_session;
+		ret = PTR_ERR(tv_nexus->tvn_se_sess);
+		kfree(tv_nexus);
 	}
-	/*
-	 * Now register the TCM vHost virtual I_T Nexus as active.
-	 */
-	transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
-			tv_nexus->tvn_se_sess, tv_nexus);
-	tpg->tpg_nexus = tv_nexus;
-	mutex_unlock(&tpg->tpg_mutex);
-	return 0;
 
-err_session:
-	transport_free_session(tv_nexus->tvn_se_sess);
-err_free:
-	kfree(tv_nexus);
-err_unlock:
+out_unlock:
 	mutex_unlock(&tpg->tpg_mutex);
 	return ret;
 }
@@ -1735,11 +1720,7 @@ static void usbg_port_unlink(struct se_portal_group *se_tpg,
 
 static int usbg_check_stop_free(struct se_cmd *se_cmd)
 {
-	struct usbg_cmd *cmd = container_of(se_cmd, struct usbg_cmd,
-			se_cmd);
-
-	kref_put(&cmd->ref, usbg_cmd_release);
-	return 1;
+	return target_put_sess_cmd(se_cmd);
 }
 
 static const struct target_core_fabric_ops usbg_ops = {
diff --git a/drivers/usb/gadget/function/tcm.h b/drivers/usb/gadget/function/tcm.h
index b75c6f3e1980..a27e6e34db0b 100644
--- a/drivers/usb/gadget/function/tcm.h
+++ b/drivers/usb/gadget/function/tcm.h
@@ -23,6 +23,8 @@ enum {
 #define USB_G_ALT_INT_BBB       0
 #define USB_G_ALT_INT_UAS       1
 
+#define USB_G_DEFAULT_SESSION_TAGS	128
+
 struct tcm_usbg_nexus {
 	struct se_session *tvn_se_sess;
 };
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index dbde1149c218..81d42cce885a 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -17,7 +17,9 @@
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/list.h>
+#include <linux/mfd/syscon.h>
 #include <linux/platform_device.h>
+#include <linux/regmap.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
 #include <linux/usb/atmel_usba_udc.h>
@@ -1886,20 +1888,15 @@ static int atmel_usba_stop(struct usb_gadget *gadget)
 #ifdef CONFIG_OF
 static void at91sam9rl_toggle_bias(struct usba_udc *udc, int is_on)
 {
-	unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
-
-	if (is_on)
-		at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
-	else
-		at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
+	regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
+			   is_on ? AT91_PMC_BIASEN : 0);
 }
 
 static void at91sam9g45_pulse_bias(struct usba_udc *udc)
 {
-	unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR);
-
-	at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN));
-	at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN);
+	regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN, 0);
+	regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN,
+			   AT91_PMC_BIASEN);
 }
 
 static const struct usba_udc_errata at91sam9rl_errata = {
@@ -1936,6 +1933,9 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev,
 		return ERR_PTR(-EINVAL);
 
 	udc->errata = match->data;
+	udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc");
+	if (udc->errata && IS_ERR(udc->pmc))
+		return ERR_CAST(udc->pmc);
 
 	udc->num_ep = 0;
 
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.h b/drivers/usb/gadget/udc/atmel_usba_udc.h
index ea448a344767..3e1c9d589dfa 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.h
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.h
@@ -354,6 +354,8 @@ struct usba_udc {
 	struct dentry *debugfs_root;
 	struct dentry *debugfs_regs;
 #endif
+
+	struct regmap *pmc;
 };
 
 static inline struct usba_ep *to_usba_ep(struct usb_ep *ep)
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index f898686cdd93..0e6fd556c982 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1664,8 +1664,7 @@ static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg,
 	mutex_unlock(&vhost_scsi_mutex);
 }
 
-static void vhost_scsi_free_cmd_map_res(struct vhost_scsi_nexus *nexus,
-				       struct se_session *se_sess)
+static void vhost_scsi_free_cmd_map_res(struct se_session *se_sess)
 {
 	struct vhost_scsi_cmd *tv_cmd;
 	unsigned int i;
@@ -1721,98 +1720,82 @@ static struct configfs_attribute *vhost_scsi_tpg_attrib_attrs[] = {
 	NULL,
 };
 
-static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
-				const char *name)
+static int vhost_scsi_nexus_cb(struct se_portal_group *se_tpg,
+			       struct se_session *se_sess, void *p)
 {
-	struct se_portal_group *se_tpg;
-	struct se_session *se_sess;
-	struct vhost_scsi_nexus *tv_nexus;
 	struct vhost_scsi_cmd *tv_cmd;
 	unsigned int i;
 
-	mutex_lock(&tpg->tv_tpg_mutex);
-	if (tpg->tpg_nexus) {
-		mutex_unlock(&tpg->tv_tpg_mutex);
-		pr_debug("tpg->tpg_nexus already exists\n");
-		return -EEXIST;
-	}
-	se_tpg = &tpg->se_tpg;
-
-	tv_nexus = kzalloc(sizeof(struct vhost_scsi_nexus), GFP_KERNEL);
-	if (!tv_nexus) {
-		mutex_unlock(&tpg->tv_tpg_mutex);
-		pr_err("Unable to allocate struct vhost_scsi_nexus\n");
-		return -ENOMEM;
-	}
-	/*
-	 *  Initialize the struct se_session pointer and setup tagpool
-	 *  for struct vhost_scsi_cmd descriptors
-	 */
-	tv_nexus->tvn_se_sess = transport_init_session_tags(
-					VHOST_SCSI_DEFAULT_TAGS,
-					sizeof(struct vhost_scsi_cmd),
-					TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS);
-	if (IS_ERR(tv_nexus->tvn_se_sess)) {
-		mutex_unlock(&tpg->tv_tpg_mutex);
-		kfree(tv_nexus);
-		return -ENOMEM;
-	}
-	se_sess = tv_nexus->tvn_se_sess;
 	for (i = 0; i < VHOST_SCSI_DEFAULT_TAGS; i++) {
 		tv_cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[i];
 
 		tv_cmd->tvc_sgl = kzalloc(sizeof(struct scatterlist) *
 					VHOST_SCSI_PREALLOC_SGLS, GFP_KERNEL);
 		if (!tv_cmd->tvc_sgl) {
-			mutex_unlock(&tpg->tv_tpg_mutex);
 			pr_err("Unable to allocate tv_cmd->tvc_sgl\n");
 			goto out;
 		}
 
 		tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) *
-					VHOST_SCSI_PREALLOC_UPAGES, GFP_KERNEL);
+				VHOST_SCSI_PREALLOC_UPAGES, GFP_KERNEL);
 		if (!tv_cmd->tvc_upages) {
-			mutex_unlock(&tpg->tv_tpg_mutex);
 			pr_err("Unable to allocate tv_cmd->tvc_upages\n");
 			goto out;
 		}
 
 		tv_cmd->tvc_prot_sgl = kzalloc(sizeof(struct scatterlist) *
-					VHOST_SCSI_PREALLOC_PROT_SGLS, GFP_KERNEL);
+				VHOST_SCSI_PREALLOC_PROT_SGLS, GFP_KERNEL);
 		if (!tv_cmd->tvc_prot_sgl) {
-			mutex_unlock(&tpg->tv_tpg_mutex);
 			pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n");
 			goto out;
 		}
 	}
+	return 0;
+out:
+	vhost_scsi_free_cmd_map_res(se_sess);
+	return -ENOMEM;
+}
+
+static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
+				const char *name)
+{
+	struct se_portal_group *se_tpg;
+	struct vhost_scsi_nexus *tv_nexus;
+
+	mutex_lock(&tpg->tv_tpg_mutex);
+	if (tpg->tpg_nexus) {
+		mutex_unlock(&tpg->tv_tpg_mutex);
+		pr_debug("tpg->tpg_nexus already exists\n");
+		return -EEXIST;
+	}
+	se_tpg = &tpg->se_tpg;
+
+	tv_nexus = kzalloc(sizeof(struct vhost_scsi_nexus), GFP_KERNEL);
+	if (!tv_nexus) {
+		mutex_unlock(&tpg->tv_tpg_mutex);
+		pr_err("Unable to allocate struct vhost_scsi_nexus\n");
+		return -ENOMEM;
+	}
 	/*
 	 * Since we are running in 'demo mode' this call with generate a
 	 * struct se_node_acl for the vhost_scsi struct se_portal_group with
 	 * the SCSI Initiator port name of the passed configfs group 'name'.
 	 */
-	tv_nexus->tvn_se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
-				se_tpg, (unsigned char *)name);
-	if (!tv_nexus->tvn_se_sess->se_node_acl) {
+	tv_nexus->tvn_se_sess = target_alloc_session(&tpg->se_tpg,
+					VHOST_SCSI_DEFAULT_TAGS,
+					sizeof(struct vhost_scsi_cmd),
+					TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS,
+					(unsigned char *)name, tv_nexus,
+					vhost_scsi_nexus_cb);
+	if (IS_ERR(tv_nexus->tvn_se_sess)) {
 		mutex_unlock(&tpg->tv_tpg_mutex);
-		pr_debug("core_tpg_check_initiator_node_acl() failed"
-				" for %s\n", name);
-		goto out;
+		kfree(tv_nexus);
+		return -ENOMEM;
 	}
-	/*
-	 * Now register the TCM vhost virtual I_T Nexus as active.
-	 */
-	transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
-			tv_nexus->tvn_se_sess, tv_nexus);
 	tpg->tpg_nexus = tv_nexus;
 
 	mutex_unlock(&tpg->tv_tpg_mutex);
 	return 0;
-
-out:
-	vhost_scsi_free_cmd_map_res(tv_nexus, se_sess);
-	transport_free_session(se_sess);
-	kfree(tv_nexus);
-	return -ENOMEM;
 }
 
 static int vhost_scsi_drop_nexus(struct vhost_scsi_tpg *tpg)
@@ -1853,7 +1836,7 @@ static int vhost_scsi_drop_nexus(struct vhost_scsi_tpg *tpg)
 		" %s Initiator Port: %s\n", vhost_scsi_dump_proto_id(tpg->tport),
 		tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
 
-	vhost_scsi_free_cmd_map_res(tv_nexus, se_sess);
+	vhost_scsi_free_cmd_map_res(se_sess);
 	/*
 	 * Release the SCSI I_T Nexus to the emulated vhost Target Port
 	 */
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dispc.h b/drivers/video/fbdev/omap2/omapfb/dss/dispc.h
index 483744223dd1..e014d0419c58 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/dispc.h
+++ b/drivers/video/fbdev/omap2/omapfb/dss/dispc.h
@@ -915,4 +915,5 @@ static inline u16 DISPC_MFLAG_THRESHOLD_OFFSET(enum omap_plane plane)
 		return 0;
 	}
 }
+
 #endif
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dss.h b/drivers/video/fbdev/omap2/omapfb/dss/dss.h
index b9066afee301..0184a8461df1 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/dss.h
+++ b/drivers/video/fbdev/omap2/omapfb/dss/dss.h
@@ -412,6 +412,44 @@ void dispc_wb_set_channel_in(enum dss_writeback_channel channel);
 int dispc_wb_setup(const struct omap_dss_writeback_info *wi,
 		bool mem_to_mem, const struct omap_video_timings *timings);
 
+u32 dispc_read_irqstatus(void);
+void dispc_clear_irqstatus(u32 mask);
+u32 dispc_read_irqenable(void);
+void dispc_write_irqenable(u32 mask);
+
+int dispc_request_irq(irq_handler_t handler, void *dev_id);
+void dispc_free_irq(void *dev_id);
+
+int dispc_runtime_get(void);
+void dispc_runtime_put(void);
+
+void dispc_mgr_enable(enum omap_channel channel, bool enable);
+bool dispc_mgr_is_enabled(enum omap_channel channel);
+u32 dispc_mgr_get_vsync_irq(enum omap_channel channel);
+u32 dispc_mgr_get_framedone_irq(enum omap_channel channel);
+u32 dispc_mgr_get_sync_lost_irq(enum omap_channel channel);
+bool dispc_mgr_go_busy(enum omap_channel channel);
+void dispc_mgr_go(enum omap_channel channel);
+void dispc_mgr_set_lcd_config(enum omap_channel channel,
+		const struct dss_lcd_mgr_config *config);
+void dispc_mgr_set_timings(enum omap_channel channel,
+		const struct omap_video_timings *timings);
+void dispc_mgr_setup(enum omap_channel channel,
+		const struct omap_overlay_manager_info *info);
+
+int dispc_ovl_check(enum omap_plane plane, enum omap_channel channel,
+		const struct omap_overlay_info *oi,
+		const struct omap_video_timings *timings,
+		int *x_predecim, int *y_predecim);
+
+int dispc_ovl_enable(enum omap_plane plane, bool enable);
+bool dispc_ovl_enabled(enum omap_plane plane);
+void dispc_ovl_set_channel_out(enum omap_plane plane,
+		enum omap_channel channel);
+int dispc_ovl_setup(enum omap_plane plane, const struct omap_overlay_info *oi,
+		bool replication, const struct omap_video_timings *mgr_timings,
+		bool mem_to_mem);
+
 /* VENC */
 int venc_init_platform_driver(void) __init;
 void venc_uninit_platform_driver(void);
@@ -465,4 +503,44 @@ int dss_pll_write_config_type_b(struct dss_pll *pll,
 		const struct dss_pll_clock_info *cinfo);
 int dss_pll_wait_reset_done(struct dss_pll *pll);
 
+/* compat */
+
+struct dss_mgr_ops {
+	int (*connect)(struct omap_overlay_manager *mgr,
+		struct omap_dss_device *dst);
+	void (*disconnect)(struct omap_overlay_manager *mgr,
+		struct omap_dss_device *dst);
+
+	void (*start_update)(struct omap_overlay_manager *mgr);
+	int (*enable)(struct omap_overlay_manager *mgr);
+	void (*disable)(struct omap_overlay_manager *mgr);
+	void (*set_timings)(struct omap_overlay_manager *mgr,
+			const struct omap_video_timings *timings);
+	void (*set_lcd_config)(struct omap_overlay_manager *mgr,
+			const struct dss_lcd_mgr_config *config);
+	int (*register_framedone_handler)(struct omap_overlay_manager *mgr,
+			void (*handler)(void *), void *data);
+	void (*unregister_framedone_handler)(struct omap_overlay_manager *mgr,
+			void (*handler)(void *), void *data);
+};
+
+int dss_install_mgr_ops(const struct dss_mgr_ops *mgr_ops);
+void dss_uninstall_mgr_ops(void);
+
+int dss_mgr_connect(struct omap_overlay_manager *mgr,
+		struct omap_dss_device *dst);
+void dss_mgr_disconnect(struct omap_overlay_manager *mgr,
+		struct omap_dss_device *dst);
+void dss_mgr_set_timings(struct omap_overlay_manager *mgr,
+		const struct omap_video_timings *timings);
+void dss_mgr_set_lcd_config(struct omap_overlay_manager *mgr,
+		const struct dss_lcd_mgr_config *config);
+int dss_mgr_enable(struct omap_overlay_manager *mgr);
+void dss_mgr_disable(struct omap_overlay_manager *mgr);
+void dss_mgr_start_update(struct omap_overlay_manager *mgr);
+int dss_mgr_register_framedone_handler(struct omap_overlay_manager *mgr,
+		void (*handler)(void *), void *data);
+void dss_mgr_unregister_framedone_handler(struct omap_overlay_manager *mgr,
+		void (*handler)(void *), void *data);
+
 #endif
diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c
index 0e24eb9c219c..71a923e53f93 100644
--- a/drivers/video/fbdev/pvr2fb.c
+++ b/drivers/video/fbdev/pvr2fb.c
@@ -686,8 +686,8 @@ static ssize_t pvr2fb_write(struct fb_info *info, const char *buf,
 	if (!pages)
 		return -ENOMEM;
 
-	ret = get_user_pages_unlocked(current, current->mm, (unsigned long)buf,
-				      nr_pages, WRITE, 0, pages);
+	ret = get_user_pages_unlocked((unsigned long)buf, nr_pages, WRITE,
+			0, pages);
 
 	if (ret < nr_pages) {
 		nr_pages = ret;
diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c
index 48ccf6db62a2..e9cf19977285 100644
--- a/drivers/video/fbdev/simplefb.c
+++ b/drivers/video/fbdev/simplefb.c
@@ -174,7 +174,7 @@ static int simplefb_parse_pd(struct platform_device *pdev,
 struct simplefb_par {
 	u32 palette[PSEUDO_PALETTE_SIZE];
 #if defined CONFIG_OF && defined CONFIG_COMMON_CLK
-	int clk_count;
+	unsigned int clk_count;
 	struct clk **clks;
 #endif
 #if defined CONFIG_OF && defined CONFIG_REGULATOR
@@ -213,7 +213,7 @@ static int simplefb_clocks_init(struct simplefb_par *par,
 		return 0;
 
 	par->clk_count = of_clk_get_parent_count(np);
-	if (par->clk_count <= 0)
+	if (!par->clk_count)
 		return 0;
 
 	par->clks = kcalloc(par->clk_count, sizeof(struct clk *), GFP_KERNEL);
diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
index 32c8fc5f7a5c..60bdad3a689b 100644
--- a/drivers/virt/fsl_hypervisor.c
+++ b/drivers/virt/fsl_hypervisor.c
@@ -244,9 +244,8 @@ static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p)
 
 	/* Get the physical addresses of the source buffer */
 	down_read(&current->mm->mmap_sem);
-	num_pinned = get_user_pages(current, current->mm,
-		param.local_vaddr - lb_offset, num_pages,
-		(param.source == -1) ? READ : WRITE,
+	num_pinned = get_user_pages(param.local_vaddr - lb_offset,
+		num_pages, (param.source == -1) ? READ : WRITE,
 		0, pages, NULL);
 	up_read(&current->mm->mmap_sem);
 
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 92443c319e59..8f89bd8a826a 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -37,6 +37,7 @@
 #include <asm/cacheflush.h>
 #endif /* CONFIG_HPWDT_NMI_DECODING */
 #include <asm/nmi.h>
+#include <asm/frame.h>
 
 #define HPWDT_VERSION			"1.3.3"
 #define SECS_TO_TICKS(secs)		((secs) * 1000 / 128)
@@ -353,10 +354,10 @@ static int detect_cru_service(void)
 
 asm(".text                      \n\t"
     ".align 4                   \n\t"
-    ".globl asminline_call	\n"
+    ".globl asminline_call	\n\t"
+    ".type asminline_call, @function \n\t"
     "asminline_call:            \n\t"
-    "pushq      %rbp            \n\t"
-    "movq       %rsp, %rbp      \n\t"
+    FRAME_BEGIN
     "pushq      %rax            \n\t"
     "pushq      %rbx            \n\t"
     "pushq      %rdx            \n\t"
@@ -386,7 +387,7 @@ asm(".text                      \n\t"
     "popq       %rdx            \n\t"
     "popq       %rbx            \n\t"
     "popq       %rax            \n\t"
-    "leave                      \n\t"
+    FRAME_END
     "ret                        \n\t"
     ".previous");
 
@@ -483,7 +484,7 @@ static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs)
 	static int die_nmi_called;
 
 	if (!hpwdt_nmi_decoding)
-		goto out;
+		return NMI_DONE;
 
 	spin_lock_irqsave(&rom_lock, rom_pl);
 	if (!die_nmi_called && !is_icru && !is_uefi)
@@ -496,11 +497,11 @@ static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs)
 
 	if (!is_icru && !is_uefi) {
 		if (cmn_regs.u1.ral == 0) {
-			panic("An NMI occurred, "
-				"but unable to determine source.\n");
+			nmi_panic(regs, "An NMI occurred, but unable to determine source.\n");
+			return NMI_HANDLED;
 		}
 	}
-	panic("An NMI occurred. Depending on your system the reason "
+	nmi_panic(regs, "An NMI occurred. Depending on your system the reason "
 		"for the NMI is logged in any one of the following "
 		"resources:\n"
 		"1. Integrated Management Log (IML)\n"
@@ -508,8 +509,7 @@ static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs)
 		"3. OA Forward Progress Log\n"
 		"4. iLO Event Log");
 
-out:
-	return NMI_DONE;
+	return NMI_HANDLED;
 }
 #endif /* CONFIG_HPWDT_NMI_DECODING */
 
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 7c8a2cf16f58..9781e0dd59d6 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -42,7 +42,6 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/bootmem.h>
 #include <linux/pagemap.h>
@@ -760,7 +759,4 @@ static int __init balloon_init(void)
 
 	return 0;
 }
-
 subsys_initcall(balloon_init);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index 403fe3955393..bdff01095f54 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -9,7 +9,6 @@
 #include <linux/linkage.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/module.h>
 
 #include <asm/sync_bitops.h>
 #include <asm/xen/hypercall.h>
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 524c22146429..488017a0806a 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -26,7 +26,7 @@
 #include <linux/linkage.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/string.h>
 #include <linux/bootmem.h>
 #include <linux/slab.h>
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index eff2b88003d9..9289a17712e2 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -36,7 +36,6 @@
 #include <linux/linkage.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/module.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
 #include <linux/cpu.h>
diff --git a/drivers/xen/features.c b/drivers/xen/features.c
index 99eda169c779..d7d34fdfc993 100644
--- a/drivers/xen/features.c
+++ b/drivers/xen/features.c
@@ -7,7 +7,7 @@
  */
 #include <linux/types.h>
 #include <linux/cache.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 #include <asm/xen/hypercall.h>
 
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index effbaf91791f..bb36b1e1dbcc 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -33,7 +33,6 @@
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
-#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 3454973dc3bb..cf9666680c8c 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -2,6 +2,9 @@
  * platform-pci.c
  *
  * Xen platform PCI device driver
+ *
+ * Authors: ssmith@xensource.com and stefano.stabellini@eu.citrix.com
+ *
  * Copyright (c) 2005, Intel Corporation.
  * Copyright (c) 2007, XenSource Inc.
  * Copyright (c) 2010, Citrix
@@ -24,7 +27,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/pci.h>
 
 #include <xen/platform_pci.h>
@@ -36,10 +39,6 @@
 
 #define DRV_NAME    "xen-platform-pci"
 
-MODULE_AUTHOR("ssmith@xensource.com and stefano.stabellini@eu.citrix.com");
-MODULE_DESCRIPTION("Xen platform PCI device");
-MODULE_LICENSE("GPL");
-
 static unsigned long platform_mmio;
 static unsigned long platform_mmio_alloc;
 static unsigned long platform_mmiolen;
@@ -101,8 +100,8 @@ static int platform_pci_resume(struct pci_dev *pdev)
 	return 0;
 }
 
-static int platform_pci_init(struct pci_dev *pdev,
-			     const struct pci_device_id *ent)
+static int platform_pci_probe(struct pci_dev *pdev,
+			      const struct pci_device_id *ent)
 {
 	int i, ret;
 	long ioaddr;
@@ -181,20 +180,17 @@ static struct pci_device_id platform_pci_tbl[] = {
 	{0,}
 };
 
-MODULE_DEVICE_TABLE(pci, platform_pci_tbl);
-
 static struct pci_driver platform_driver = {
 	.name =           DRV_NAME,
-	.probe =          platform_pci_init,
+	.probe =          platform_pci_probe,
 	.id_table =       platform_pci_tbl,
 #ifdef CONFIG_PM
 	.resume_early =   platform_pci_resume,
 #endif
 };
 
-static int __init platform_pci_module_init(void)
+static int __init platform_pci_init(void)
 {
 	return pci_register_driver(&platform_driver);
 }
-
-module_init(platform_pci_module_init);
+device_initcall(platform_pci_init);
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
index b5a7342e0ba5..6881b3ceb675 100644
--- a/drivers/xen/sys-hypervisor.c
+++ b/drivers/xen/sys-hypervisor.c
@@ -9,7 +9,7 @@
 
 #include <linux/slab.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/kobject.h>
 #include <linux/err.h>
 
@@ -50,11 +50,6 @@ static int __init xen_sysfs_type_init(void)
 	return sysfs_create_file(hypervisor_kobj, &type_attr.attr);
 }
 
-static void xen_sysfs_type_destroy(void)
-{
-	sysfs_remove_file(hypervisor_kobj, &type_attr.attr);
-}
-
 /* xen version attributes */
 static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
 {
@@ -111,11 +106,6 @@ static int __init xen_sysfs_version_init(void)
 	return sysfs_create_group(hypervisor_kobj, &version_group);
 }
 
-static void xen_sysfs_version_destroy(void)
-{
-	sysfs_remove_group(hypervisor_kobj, &version_group);
-}
-
 /* UUID */
 
 static ssize_t uuid_show_fallback(struct hyp_sysfs_attr *attr, char *buffer)
@@ -157,11 +147,6 @@ static int __init xen_sysfs_uuid_init(void)
 	return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr);
 }
 
-static void xen_sysfs_uuid_destroy(void)
-{
-	sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
-}
-
 /* xen compilation attributes */
 
 static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
@@ -235,11 +220,6 @@ static int __init xen_compilation_init(void)
 	return sysfs_create_group(hypervisor_kobj, &xen_compilation_group);
 }
 
-static void xen_compilation_destroy(void)
-{
-	sysfs_remove_group(hypervisor_kobj, &xen_compilation_group);
-}
-
 /* xen properties info */
 
 static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
@@ -366,11 +346,6 @@ static int __init xen_properties_init(void)
 	return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
 }
 
-static void xen_properties_destroy(void)
-{
-	sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
-}
-
 #ifdef CONFIG_XEN_HAVE_VPMU
 struct pmu_mode {
 	const char *name;
@@ -484,11 +459,6 @@ static int __init xen_pmu_init(void)
 {
 	return sysfs_create_group(hypervisor_kobj, &xen_pmu_group);
 }
-
-static void xen_pmu_destroy(void)
-{
-	sysfs_remove_group(hypervisor_kobj, &xen_pmu_group);
-}
 #endif
 
 static int __init hyper_sysfs_init(void)
@@ -517,7 +487,8 @@ static int __init hyper_sysfs_init(void)
 	if (xen_initial_domain()) {
 		ret = xen_pmu_init();
 		if (ret) {
-			xen_properties_destroy();
+			sysfs_remove_group(hypervisor_kobj,
+					   &xen_properties_group);
 			goto prop_out;
 		}
 	}
@@ -525,31 +496,17 @@ static int __init hyper_sysfs_init(void)
 	goto out;
 
 prop_out:
-	xen_sysfs_uuid_destroy();
+	sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
 uuid_out:
-	xen_compilation_destroy();
+	sysfs_remove_group(hypervisor_kobj, &xen_compilation_group);
 comp_out:
-	xen_sysfs_version_destroy();
+	sysfs_remove_group(hypervisor_kobj, &version_group);
 version_out:
-	xen_sysfs_type_destroy();
+	sysfs_remove_file(hypervisor_kobj, &type_attr.attr);
 out:
 	return ret;
 }
-
-static void __exit hyper_sysfs_exit(void)
-{
-#ifdef CONFIG_XEN_HAVE_VPMU
-	xen_pmu_destroy();
-#endif
-	xen_properties_destroy();
-	xen_compilation_destroy();
-	xen_sysfs_uuid_destroy();
-	xen_sysfs_version_destroy();
-	xen_sysfs_type_destroy();
-
-}
-module_init(hyper_sysfs_init);
-module_exit(hyper_sysfs_exit);
+device_initcall(hyper_sysfs_init);
 
 static ssize_t hyp_sysfs_show(struct kobject *kobj,
 			      struct attribute *attr,
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 39e7ef8d3957..79865b8901ba 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -33,7 +33,9 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/mm_types.h>
+#include <linux/init.h>
 #include <linux/capability.h>
 
 #include <xen/xen.h>
@@ -109,14 +111,6 @@ static int __init balloon_init(void)
 }
 subsys_initcall(balloon_init);
 
-static void balloon_exit(void)
-{
-    /* XXX - release balloon here */
-    return;
-}
-
-module_exit(balloon_exit);
-
 #define BALLOON_SHOW(name, format, args...)				\
 	static ssize_t show_##name(struct device *dev,			\
 				   struct device_attribute *attr,	\
@@ -250,5 +244,3 @@ static int register_balloon(struct device *dev)
 
 	return 0;
 }
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 9c234209d8b5..8e67336f8ddd 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/pci.h>
 #include "pciback.h"
 #include "conf_space.h"
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index fb0221434f81..2f19dd7553e6 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -6,7 +6,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/wait.h>
 #include <linux/bitops.h>
 #include <xen/events.h>
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 4843741e703a..c252eb3f0176 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -6,7 +6,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index c46ee189466f..ff932624eaad 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -141,6 +141,8 @@ struct scsiback_tmr {
 	wait_queue_head_t tmr_wait;
 };
 
+#define VSCSI_DEFAULT_SESSION_TAGS	128
+
 struct scsiback_nexus {
 	/* Pointer to TCM session for I_T Nexus */
 	struct se_session *tvn_se_sess;
@@ -190,7 +192,6 @@ module_param_named(max_buffer_pages, scsiback_max_buffer_pages, int, 0644);
 MODULE_PARM_DESC(max_buffer_pages,
 "Maximum number of free pages to keep in backend buffer");
 
-static struct kmem_cache *scsiback_cachep;
 static DEFINE_SPINLOCK(free_pages_lock);
 static int free_pages_num;
 static LIST_HEAD(scsiback_free_pages);
@@ -321,11 +322,11 @@ static void scsiback_free_translation_entry(struct kref *kref)
 	kfree(entry);
 }
 
-static void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
-			uint32_t resid, struct vscsibk_pend *pending_req)
+static void scsiback_send_response(struct vscsibk_info *info,
+			char *sense_buffer, int32_t result, uint32_t resid,
+			uint16_t rqid)
 {
 	struct vscsiif_response *ring_res;
-	struct vscsibk_info *info = pending_req->info;
 	int notify;
 	struct scsi_sense_hdr sshdr;
 	unsigned long flags;
@@ -337,7 +338,7 @@ static void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
 	info->ring.rsp_prod_pvt++;
 
 	ring_res->rslt   = result;
-	ring_res->rqid   = pending_req->rqid;
+	ring_res->rqid   = rqid;
 
 	if (sense_buffer != NULL &&
 	    scsi_normalize_sense(sense_buffer, VSCSIIF_SENSE_BUFFERSIZE,
@@ -357,6 +358,13 @@ static void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
 
 	if (notify)
 		notify_remote_via_irq(info->irq);
+}
+
+static void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
+			uint32_t resid, struct vscsibk_pend *pending_req)
+{
+	scsiback_send_response(pending_req->info, sense_buffer, result,
+			       resid, pending_req->rqid);
 
 	if (pending_req->v2p)
 		kref_put(&pending_req->v2p->kref,
@@ -380,6 +388,12 @@ static void scsiback_cmd_done(struct vscsibk_pend *pending_req)
 	scsiback_fast_flush_area(pending_req);
 	scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
 	scsiback_put(info);
+	/*
+	 * Drop the extra KREF_ACK reference taken by target_submit_cmd_map_sgls()
+	 * ahead of scsiback_check_stop_free() ->  transport_generic_free_cmd()
+	 * final se_cmd->cmd_kref put.
+	 */
+	target_put_sess_cmd(&pending_req->se_cmd);
 }
 
 static void scsiback_cmd_exec(struct vscsibk_pend *pending_req)
@@ -388,16 +402,12 @@ static void scsiback_cmd_exec(struct vscsibk_pend *pending_req)
 	struct se_session *sess = pending_req->v2p->tpg->tpg_nexus->tvn_se_sess;
 	int rc;
 
-	memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
-
-	memset(se_cmd, 0, sizeof(*se_cmd));
-
 	scsiback_get(pending_req->info);
 	se_cmd->tag = pending_req->rqid;
 	rc = target_submit_cmd_map_sgls(se_cmd, sess, pending_req->cmnd,
 			pending_req->sense_buffer, pending_req->v2p->lun,
 			pending_req->data_len, 0,
-			pending_req->sc_data_direction, 0,
+			pending_req->sc_data_direction, TARGET_SCF_ACK_KREF,
 			pending_req->sgl, pending_req->n_sg,
 			NULL, 0, NULL, 0);
 	if (rc < 0) {
@@ -586,45 +596,40 @@ static void scsiback_disconnect(struct vscsibk_info *info)
 static void scsiback_device_action(struct vscsibk_pend *pending_req,
 	enum tcm_tmreq_table act, int tag)
 {
-	int rc, err = FAILED;
 	struct scsiback_tpg *tpg = pending_req->v2p->tpg;
+	struct scsiback_nexus *nexus = tpg->tpg_nexus;
 	struct se_cmd *se_cmd = &pending_req->se_cmd;
 	struct scsiback_tmr *tmr;
+	u64 unpacked_lun = pending_req->v2p->lun;
+	int rc, err = FAILED;
 
 	tmr = kzalloc(sizeof(struct scsiback_tmr), GFP_KERNEL);
-	if (!tmr)
-		goto out;
+	if (!tmr) {
+		target_put_sess_cmd(se_cmd);
+		goto err;
+	}
 
 	init_waitqueue_head(&tmr->tmr_wait);
 
-	transport_init_se_cmd(se_cmd, tpg->se_tpg.se_tpg_tfo,
-		tpg->tpg_nexus->tvn_se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG,
-		&pending_req->sense_buffer[0]);
-
-	rc = core_tmr_alloc_req(se_cmd, tmr, act, GFP_KERNEL);
-	if (rc < 0)
-		goto out;
-
-	se_cmd->se_tmr_req->ref_task_tag = tag;
-
-	if (transport_lookup_tmr_lun(se_cmd, pending_req->v2p->lun) < 0)
-		goto out;
+	rc = target_submit_tmr(&pending_req->se_cmd, nexus->tvn_se_sess,
+			       &pending_req->sense_buffer[0],
+			       unpacked_lun, tmr, act, GFP_KERNEL,
+			       tag, TARGET_SCF_ACK_KREF);
+	if (rc)
+		goto err;
 
-	transport_generic_handle_tmr(se_cmd);
 	wait_event(tmr->tmr_wait, atomic_read(&tmr->tmr_complete));
 
 	err = (se_cmd->se_tmr_req->response == TMR_FUNCTION_COMPLETE) ?
 		SUCCESS : FAILED;
 
-out:
-	if (tmr) {
-		transport_generic_free_cmd(&pending_req->se_cmd, 1);
+	scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
+	transport_generic_free_cmd(&pending_req->se_cmd, 1);
+	return;
+err:
+	if (tmr)
 		kfree(tmr);
-	}
-
 	scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
-
-	kmem_cache_free(scsiback_cachep, pending_req);
 }
 
 /*
@@ -653,15 +658,53 @@ out:
 	return entry;
 }
 
-static int prepare_pending_reqs(struct vscsibk_info *info,
-				struct vscsiif_request *ring_req,
-				struct vscsibk_pend *pending_req)
+static struct vscsibk_pend *scsiback_get_pend_req(struct vscsiif_back_ring *ring,
+				struct v2p_entry *v2p)
+{
+	struct scsiback_tpg *tpg = v2p->tpg;
+	struct scsiback_nexus *nexus = tpg->tpg_nexus;
+	struct se_session *se_sess = nexus->tvn_se_sess;
+	struct vscsibk_pend *req;
+	int tag, i;
+
+	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	if (tag < 0) {
+		pr_err("Unable to obtain tag for vscsiif_request\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	req = &((struct vscsibk_pend *)se_sess->sess_cmd_map)[tag];
+	memset(req, 0, sizeof(*req));
+	req->se_cmd.map_tag = tag;
+
+	for (i = 0; i < VSCSI_MAX_GRANTS; i++)
+		req->grant_handles[i] = SCSIBACK_INVALID_HANDLE;
+
+	return req;
+}
+
+static struct vscsibk_pend *prepare_pending_reqs(struct vscsibk_info *info,
+				struct vscsiif_back_ring *ring,
+				struct vscsiif_request *ring_req)
 {
+	struct vscsibk_pend *pending_req;
 	struct v2p_entry *v2p;
 	struct ids_tuple vir;
 
-	pending_req->rqid       = ring_req->rqid;
-	pending_req->info       = info;
+	/* request range check from frontend */
+	if ((ring_req->sc_data_direction != DMA_BIDIRECTIONAL) &&
+		(ring_req->sc_data_direction != DMA_TO_DEVICE) &&
+		(ring_req->sc_data_direction != DMA_FROM_DEVICE) &&
+		(ring_req->sc_data_direction != DMA_NONE)) {
+		pr_debug("invalid parameter data_dir = %d\n",
+			ring_req->sc_data_direction);
+		return ERR_PTR(-EINVAL);
+	}
+	if (ring_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) {
+		pr_debug("invalid parameter cmd_len = %d\n",
+			ring_req->cmd_len);
+		return ERR_PTR(-EINVAL);
+	}
 
 	vir.chn = ring_req->channel;
 	vir.tgt = ring_req->id;
@@ -669,33 +712,24 @@ static int prepare_pending_reqs(struct vscsibk_info *info,
 
 	v2p = scsiback_do_translation(info, &vir);
 	if (!v2p) {
-		pending_req->v2p = NULL;
 		pr_debug("the v2p of (chn:%d, tgt:%d, lun:%d) doesn't exist.\n",
-			vir.chn, vir.tgt, vir.lun);
-		return -ENODEV;
+			 vir.chn, vir.tgt, vir.lun);
+		return ERR_PTR(-ENODEV);
 	}
-	pending_req->v2p = v2p;
 
-	/* request range check from frontend */
-	pending_req->sc_data_direction = ring_req->sc_data_direction;
-	if ((pending_req->sc_data_direction != DMA_BIDIRECTIONAL) &&
-		(pending_req->sc_data_direction != DMA_TO_DEVICE) &&
-		(pending_req->sc_data_direction != DMA_FROM_DEVICE) &&
-		(pending_req->sc_data_direction != DMA_NONE)) {
-		pr_debug("invalid parameter data_dir = %d\n",
-			pending_req->sc_data_direction);
-		return -EINVAL;
+	pending_req = scsiback_get_pend_req(ring, v2p);
+	if (IS_ERR(pending_req)) {
+		kref_put(&v2p->kref, scsiback_free_translation_entry);
+		return ERR_PTR(-ENOMEM);
 	}
-
+	pending_req->rqid = ring_req->rqid;
+	pending_req->info = info;
+	pending_req->v2p = v2p;
+	pending_req->sc_data_direction = ring_req->sc_data_direction;
 	pending_req->cmd_len = ring_req->cmd_len;
-	if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) {
-		pr_debug("invalid parameter cmd_len = %d\n",
-			pending_req->cmd_len);
-		return -EINVAL;
-	}
 	memcpy(pending_req->cmnd, ring_req->cmnd, pending_req->cmd_len);
 
-	return 0;
+	return pending_req;
 }
 
 static int scsiback_do_cmd_fn(struct vscsibk_info *info)
@@ -704,7 +738,7 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
 	struct vscsiif_request ring_req;
 	struct vscsibk_pend *pending_req;
 	RING_IDX rc, rp;
-	int err, more_to_do;
+	int more_to_do;
 	uint32_t result;
 
 	rc = ring->req_cons;
@@ -722,16 +756,13 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
 	while ((rc != rp)) {
 		if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
 			break;
-		pending_req = kmem_cache_alloc(scsiback_cachep, GFP_KERNEL);
-		if (!pending_req)
-			return 1;
 
 		RING_COPY_REQUEST(ring, rc, &ring_req);
 		ring->req_cons = ++rc;
 
-		err = prepare_pending_reqs(info, &ring_req, pending_req);
-		if (err) {
-			switch (err) {
+		pending_req = prepare_pending_reqs(info, ring, &ring_req);
+		if (IS_ERR(pending_req)) {
+			switch (PTR_ERR(pending_req)) {
 			case -ENODEV:
 				result = DID_NO_CONNECT;
 				break;
@@ -739,9 +770,8 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
 				result = DRIVER_ERROR;
 				break;
 			}
-			scsiback_do_resp_with_sense(NULL, result << 24, 0,
-						    pending_req);
-			kmem_cache_free(scsiback_cachep, pending_req);
+			scsiback_send_response(info, NULL, result << 24, 0,
+					       ring_req.rqid);
 			return 1;
 		}
 
@@ -750,8 +780,8 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
 			if (scsiback_gnttab_data_map(&ring_req, pending_req)) {
 				scsiback_fast_flush_area(pending_req);
 				scsiback_do_resp_with_sense(NULL,
-					DRIVER_ERROR << 24, 0, pending_req);
-				kmem_cache_free(scsiback_cachep, pending_req);
+						DRIVER_ERROR << 24, 0, pending_req);
+				transport_generic_free_cmd(&pending_req->se_cmd, 0);
 			} else {
 				scsiback_cmd_exec(pending_req);
 			}
@@ -765,9 +795,9 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
 			break;
 		default:
 			pr_err_ratelimited("invalid request\n");
-			scsiback_do_resp_with_sense(NULL, DRIVER_ERROR << 24,
-						    0, pending_req);
-			kmem_cache_free(scsiback_cachep, pending_req);
+			scsiback_do_resp_with_sense(NULL, DRIVER_ERROR << 24, 0,
+						    pending_req);
+			transport_generic_free_cmd(&pending_req->se_cmd, 0);
 			break;
 		}
 
@@ -1353,24 +1383,20 @@ static u32 scsiback_tpg_get_inst_index(struct se_portal_group *se_tpg)
 
 static int scsiback_check_stop_free(struct se_cmd *se_cmd)
 {
-	/*
-	 * Do not release struct se_cmd's containing a valid TMR pointer.
-	 * These will be released directly in scsiback_device_action()
-	 * with transport_generic_free_cmd().
-	 */
-	if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
-		return 0;
-
-	transport_generic_free_cmd(se_cmd, 0);
-	return 1;
+	return transport_generic_free_cmd(se_cmd, 0);
 }
 
 static void scsiback_release_cmd(struct se_cmd *se_cmd)
 {
-	struct vscsibk_pend *pending_req = container_of(se_cmd,
-				struct vscsibk_pend, se_cmd);
+	struct se_session *se_sess = se_cmd->se_sess;
+	struct se_tmr_req *se_tmr = se_cmd->se_tmr_req;
+
+	if (se_tmr && se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) {
+		struct scsiback_tmr *tmr = se_tmr->fabric_tmr_ptr;
+		kfree(tmr);
+	}
 
-	kmem_cache_free(scsiback_cachep, pending_req);
+	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
 }
 
 static int scsiback_shutdown_session(struct se_session *se_sess)
@@ -1494,61 +1520,49 @@ static struct configfs_attribute *scsiback_param_attrs[] = {
 	NULL,
 };
 
+static int scsiback_alloc_sess_cb(struct se_portal_group *se_tpg,
+				  struct se_session *se_sess, void *p)
+{
+	struct scsiback_tpg *tpg = container_of(se_tpg,
+				struct scsiback_tpg, se_tpg);
+
+	tpg->tpg_nexus = p;
+	return 0;
+}
+
 static int scsiback_make_nexus(struct scsiback_tpg *tpg,
 				const char *name)
 {
-	struct se_portal_group *se_tpg;
-	struct se_session *se_sess;
 	struct scsiback_nexus *tv_nexus;
+	int ret = 0;
 
 	mutex_lock(&tpg->tv_tpg_mutex);
 	if (tpg->tpg_nexus) {
-		mutex_unlock(&tpg->tv_tpg_mutex);
 		pr_debug("tpg->tpg_nexus already exists\n");
-		return -EEXIST;
+		ret = -EEXIST;
+		goto out_unlock;
 	}
-	se_tpg = &tpg->se_tpg;
 
 	tv_nexus = kzalloc(sizeof(struct scsiback_nexus), GFP_KERNEL);
 	if (!tv_nexus) {
-		mutex_unlock(&tpg->tv_tpg_mutex);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_unlock;
 	}
-	/*
-	 * Initialize the struct se_session pointer
-	 */
-	tv_nexus->tvn_se_sess = transport_init_session(TARGET_PROT_NORMAL);
+
+	tv_nexus->tvn_se_sess = target_alloc_session(&tpg->se_tpg,
+						     VSCSI_DEFAULT_SESSION_TAGS,
+						     sizeof(struct vscsibk_pend),
+						     TARGET_PROT_NORMAL, name,
+						     tv_nexus, scsiback_alloc_sess_cb);
 	if (IS_ERR(tv_nexus->tvn_se_sess)) {
-		mutex_unlock(&tpg->tv_tpg_mutex);
 		kfree(tv_nexus);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_unlock;
 	}
-	se_sess = tv_nexus->tvn_se_sess;
-	/*
-	 * Since we are running in 'demo mode' this call with generate a
-	 * struct se_node_acl for the scsiback struct se_portal_group with
-	 * the SCSI Initiator port name of the passed configfs group 'name'.
-	 */
-	tv_nexus->tvn_se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
-				se_tpg, (unsigned char *)name);
-	if (!tv_nexus->tvn_se_sess->se_node_acl) {
-		mutex_unlock(&tpg->tv_tpg_mutex);
-		pr_debug("core_tpg_check_initiator_node_acl() failed for %s\n",
-			 name);
-		goto out;
-	}
-	/* Now register the TCM pvscsi virtual I_T Nexus as active. */
-	transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
-			tv_nexus->tvn_se_sess, tv_nexus);
-	tpg->tpg_nexus = tv_nexus;
 
+out_unlock:
 	mutex_unlock(&tpg->tv_tpg_mutex);
-	return 0;
-
-out:
-	transport_free_session(se_sess);
-	kfree(tv_nexus);
-	return -ENOMEM;
+	return ret;
 }
 
 static int scsiback_drop_nexus(struct scsiback_tpg *tpg)
@@ -1866,16 +1880,6 @@ static struct xenbus_driver scsiback_driver = {
 	.otherend_changed	= scsiback_frontend_changed
 };
 
-static void scsiback_init_pend(void *p)
-{
-	struct vscsibk_pend *pend = p;
-	int i;
-
-	memset(pend, 0, sizeof(*pend));
-	for (i = 0; i < VSCSI_MAX_GRANTS; i++)
-		pend->grant_handles[i] = SCSIBACK_INVALID_HANDLE;
-}
-
 static int __init scsiback_init(void)
 {
 	int ret;
@@ -1886,14 +1890,9 @@ static int __init scsiback_init(void)
 	pr_debug("xen-pvscsi: fabric module %s on %s/%s on "UTS_RELEASE"\n",
 		 VSCSI_VERSION, utsname()->sysname, utsname()->machine);
 
-	scsiback_cachep = kmem_cache_create("vscsiif_cache",
-		sizeof(struct vscsibk_pend), 0, 0, scsiback_init_pend);
-	if (!scsiback_cachep)
-		return -ENOMEM;
-
 	ret = xenbus_register_backend(&scsiback_driver);
 	if (ret)
-		goto out_cache_destroy;
+		goto out;
 
 	ret = target_register_template(&scsiback_ops);
 	if (ret)
@@ -1903,8 +1902,7 @@ static int __init scsiback_init(void)
 
 out_unregister_xenbus:
 	xenbus_unregister_driver(&scsiback_driver);
-out_cache_destroy:
-	kmem_cache_destroy(scsiback_cachep);
+out:
 	pr_err("%s: error %d\n", __func__, ret);
 	return ret;
 }
@@ -1920,7 +1918,6 @@ static void __exit scsiback_exit(void)
 	}
 	target_unregister_template(&scsiback_ops);
 	xenbus_unregister_driver(&scsiback_driver);
-	kmem_cache_destroy(scsiback_cachep);
 }
 
 module_init(scsiback_init);
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 3b2bffde534f..53a085fca00c 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -71,7 +71,6 @@
 #include <linux/swap.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
-#include <linux/module.h>
 #include <linux/workqueue.h>
 #include <linux/device.h>
 #include <xen/balloon.h>
diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c
index ee6d9efd7b76..4a41ac9af966 100644
--- a/drivers/xen/xenbus/xenbus_dev_backend.c
+++ b/drivers/xen/xenbus/xenbus_dev_backend.c
@@ -5,7 +5,7 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/miscdevice.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/capability.h>
 
 #include <xen/xen.h>
@@ -18,8 +18,6 @@
 
 #include "xenbus_comms.h"
 
-MODULE_LICENSE("GPL");
-
 static int xenbus_backend_open(struct inode *inode, struct file *filp)
 {
 	if (!capable(CAP_SYS_ADMIN))
@@ -132,11 +130,4 @@ static int __init xenbus_backend_init(void)
 		pr_err("Could not register xenbus backend device\n");
 	return err;
 }
-
-static void __exit xenbus_backend_exit(void)
-{
-	misc_deregister(&xenbus_backend_dev);
-}
-
-module_init(xenbus_backend_init);
-module_exit(xenbus_backend_exit);
+device_initcall(xenbus_backend_init);
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 912b64edb42b..cacf30d14747 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -55,7 +55,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/miscdevice.h>
-#include <linux/module.h>
+#include <linux/init.h>
 
 #include "xenbus_comms.h"
 
@@ -63,8 +63,6 @@
 #include <xen/xen.h>
 #include <asm/xen/hypervisor.h>
 
-MODULE_LICENSE("GPL");
-
 /*
  * An element of a list of outstanding transactions, for which we're
  * still waiting a reply.
@@ -626,11 +624,4 @@ static int __init xenbus_init(void)
 		pr_err("Could not register xenbus frontend device\n");
 	return err;
 }
-
-static void __exit xenbus_exit(void)
-{
-	misc_deregister(&xenbus_dev);
-}
-
-module_init(xenbus_init);
-module_exit(xenbus_exit);
+device_initcall(xenbus_init);
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index ba804f3d8278..374b12af8812 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -44,7 +44,6 @@
 #include <linux/fcntl.h>
 #include <linux/kthread.h>
 #include <linux/rwsem.h>
-#include <linux/module.h>
 #include <linux/mutex.h>
 #include <asm/xen/hypervisor.h>
 #include <xen/xenbus.h>
diff --git a/drivers/xen/xenfs/xensyms.c b/drivers/xen/xenfs/xensyms.c
index a03f261b12d8..c6e2b4a542ea 100644
--- a/drivers/xen/xenfs/xensyms.c
+++ b/drivers/xen/xenfs/xensyms.c
@@ -1,4 +1,3 @@
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
 #include <linux/fs.h>