73 files changed, 14097 insertions, 853 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index c0cc96bab9e7..6e973b8e3a3b 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -182,4 +182,6 @@ source "drivers/thunderbolt/Kconfig"
 
 source "drivers/android/Kconfig"
 
+source "drivers/nvdimm/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 9a02fb7c5106..b64b49f6e01b 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
 
 obj-$(CONFIG_PARPORT)		+= parport/
 obj-y				+= base/ block/ misc/ mfd/ nfc/
+obj-$(CONFIG_LIBNVDIMM)		+= nvdimm/
 obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
 obj-$(CONFIG_NUBUS)		+= nubus/
 obj-y				+= macintosh/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 35da507411a0..f15db002be8e 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -386,6 +386,32 @@ config ACPI_REDUCED_HARDWARE_ONLY
 
 	  If you are unsure what to do, do not enable this option.
 
+config ACPI_NFIT
+	tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
+	depends on PHYS_ADDR_T_64BIT
+	depends on BLK_DEV
+	select LIBNVDIMM
+	help
+	  Infrastructure to probe ACPI 6 compliant platforms for
+	  NVDIMMs (NFIT) and register a libnvdimm device tree.  In
+	  addition to storage devices this also enables libnvdimm to pass
+	  ACPI._DSM messages for platform/dimm configuration.
+
+	  To compile this driver as a module, choose M here:
+	  the module will be called nfit.
+
+config ACPI_NFIT_DEBUG
+	bool "NFIT DSM debug"
+	depends on ACPI_NFIT
+	depends on DYNAMIC_DEBUG
+	default n
+	help
+	  Enabling this option causes the nfit driver to dump the
+	  input and output buffers of _DSM operations on the ACPI0012
+	  device and its children.  This can be very verbose, so leave
+	  it disabled unless you are debugging a hardware / firmware
+	  issue.
+
 source "drivers/acpi/apei/Kconfig"
 
 config ACPI_EXTLOG
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 73d840bef455..8321430d7f24 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_ACPI_PCI_SLOT)	+= pci_slot.o
 obj-$(CONFIG_ACPI_PROCESSOR)	+= processor.o
 obj-y				+= container.o
 obj-$(CONFIG_ACPI_THERMAL)	+= thermal.o
+obj-$(CONFIG_ACPI_NFIT)		+= nfit.o
 obj-y				+= acpi_memhotplug.o
 obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
 obj-$(CONFIG_ACPI_BATTERY)	+= battery.o
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
new file mode 100644
index 000000000000..2161fa178c8d
--- /dev/null
+++ b/drivers/acpi/nfit.c
@@ -0,0 +1,1587 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/list_sort.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+#include <linux/list.h>
+#include <linux/acpi.h>
+#include <linux/sort.h>
+#include <linux/io.h>
+#include "nfit.h"
+
+/*
+ * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
+ * irrelevant.
+ */
+#include <asm-generic/io-64-nonatomic-hi-lo.h>
+
+static bool force_enable_dimms;
+module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
+
+static u8 nfit_uuid[NFIT_UUID_MAX][16];
+
+const u8 *to_nfit_uuid(enum nfit_uuids id)
+{
+	return nfit_uuid[id];
+}
+EXPORT_SYMBOL(to_nfit_uuid);
+
+static struct acpi_nfit_desc *to_acpi_nfit_desc(
+		struct nvdimm_bus_descriptor *nd_desc)
+{
+	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+
+	/*
+	 * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
+	 * acpi_device.
+	 */
+	if (!nd_desc->provider_name
+			|| strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
+		return NULL;
+
+	return to_acpi_device(acpi_desc->dev);
+}
+
+static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
+		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+		unsigned int buf_len)
+{
+	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+	const struct nd_cmd_desc *desc = NULL;
+	union acpi_object in_obj, in_buf, *out_obj;
+	struct device *dev = acpi_desc->dev;
+	const char *cmd_name, *dimm_name;
+	unsigned long dsm_mask;
+	acpi_handle handle;
+	const u8 *uuid;
+	u32 offset;
+	int rc, i;
+
+	if (nvdimm) {
+		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+		struct acpi_device *adev = nfit_mem->adev;
+
+		if (!adev)
+			return -ENOTTY;
+		dimm_name = nvdimm_name(nvdimm);
+		cmd_name = nvdimm_cmd_name(cmd);
+		dsm_mask = nfit_mem->dsm_mask;
+		desc = nd_cmd_dimm_desc(cmd);
+		uuid = to_nfit_uuid(NFIT_DEV_DIMM);
+		handle = adev->handle;
+	} else {
+		struct acpi_device *adev = to_acpi_dev(acpi_desc);
+
+		cmd_name = nvdimm_bus_cmd_name(cmd);
+		dsm_mask = nd_desc->dsm_mask;
+		desc = nd_cmd_bus_desc(cmd);
+		uuid = to_nfit_uuid(NFIT_DEV_BUS);
+		handle = adev->handle;
+		dimm_name = "bus";
+	}
+
+	if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
+		return -ENOTTY;
+
+	if (!test_bit(cmd, &dsm_mask))
+		return -ENOTTY;
+
+	in_obj.type = ACPI_TYPE_PACKAGE;
+	in_obj.package.count = 1;
+	in_obj.package.elements = &in_buf;
+	in_buf.type = ACPI_TYPE_BUFFER;
+	in_buf.buffer.pointer = buf;
+	in_buf.buffer.length = 0;
+
+	/* libnvdimm has already validated the input envelope */
+	for (i = 0; i < desc->in_num; i++)
+		in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
+				i, buf);
+
+	if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+		dev_dbg(dev, "%s:%s cmd: %s input length: %d\n", __func__,
+				dimm_name, cmd_name, in_buf.buffer.length);
+		print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
+				4, in_buf.buffer.pointer, min_t(u32, 128,
+					in_buf.buffer.length), true);
+	}
+
+	out_obj = acpi_evaluate_dsm(handle, uuid, 1, cmd, &in_obj);
+	if (!out_obj) {
+		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
+				cmd_name);
+		return -EINVAL;
+	}
+
+	if (out_obj->package.type != ACPI_TYPE_BUFFER) {
+		dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
+				__func__, dimm_name, cmd_name, out_obj->type);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+		dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
+				dimm_name, cmd_name, out_obj->buffer.length);
+		print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
+				4, out_obj->buffer.pointer, min_t(u32, 128,
+					out_obj->buffer.length), true);
+	}
+
+	for (i = 0, offset = 0; i < desc->out_num; i++) {
+		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
+				(u32 *) out_obj->buffer.pointer);
+
+		if (offset + out_size > out_obj->buffer.length) {
+			dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			break;
+		}
+
+		if (in_buf.buffer.length + offset + out_size > buf_len) {
+			dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			rc = -ENXIO;
+			goto out;
+		}
+		memcpy(buf + in_buf.buffer.length + offset,
+				out_obj->buffer.pointer + offset, out_size);
+		offset += out_size;
+	}
+	if (offset + in_buf.buffer.length < buf_len) {
+		if (i >= 1) {
+			/*
+			 * status valid, return the number of bytes left
+			 * unfilled in the output buffer
+			 */
+			rc = buf_len - offset - in_buf.buffer.length;
+		} else {
+			dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
+					__func__, dimm_name, cmd_name, buf_len,
+					offset);
+			rc = -ENXIO;
+		}
+	} else
+		rc = 0;
+
+ out:
+	ACPI_FREE(out_obj);
+
+	return rc;
+}
+
+static const char *spa_type_name(u16 type)
+{
+	static const char *to_name[] = {
+		[NFIT_SPA_VOLATILE] = "volatile",
+		[NFIT_SPA_PM] = "pmem",
+		[NFIT_SPA_DCR] = "dimm-control-region",
+		[NFIT_SPA_BDW] = "block-data-window",
+		[NFIT_SPA_VDISK] = "volatile-disk",
+		[NFIT_SPA_VCD] = "volatile-cd",
+		[NFIT_SPA_PDISK] = "persistent-disk",
+		[NFIT_SPA_PCD] = "persistent-cd",
+
+	};
+
+	if (type > NFIT_SPA_PCD)
+		return "unknown";
+
+	return to_name[type];
+}
+
+static int nfit_spa_type(struct acpi_nfit_system_address *spa)
+{
+	int i;
+
+	for (i = 0; i < NFIT_UUID_MAX; i++)
+		if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
+			return i;
+	return -1;
+}
+
+static bool add_spa(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	struct device *dev = acpi_desc->dev;
+	struct nfit_spa *nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa),
+			GFP_KERNEL);
+
+	if (!nfit_spa)
+		return false;
+	INIT_LIST_HEAD(&nfit_spa->list);
+	nfit_spa->spa = spa;
+	list_add_tail(&nfit_spa->list, &acpi_desc->spas);
+	dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
+			spa->range_index,
+			spa_type_name(nfit_spa_type(spa)));
+	return true;
+}
+
+static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_memory_map *memdev)
+{
+	struct device *dev = acpi_desc->dev;
+	struct nfit_memdev *nfit_memdev = devm_kzalloc(dev,
+			sizeof(*nfit_memdev), GFP_KERNEL);
+
+	if (!nfit_memdev)
+		return false;
+	INIT_LIST_HEAD(&nfit_memdev->list);
+	nfit_memdev->memdev = memdev;
+	list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
+	dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
+			__func__, memdev->device_handle, memdev->range_index,
+			memdev->region_index);
+	return true;
+}
+
+static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_control_region *dcr)
+{
+	struct device *dev = acpi_desc->dev;
+	struct nfit_dcr *nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr),
+			GFP_KERNEL);
+
+	if (!nfit_dcr)
+		return false;
+	INIT_LIST_HEAD(&nfit_dcr->list);
+	nfit_dcr->dcr = dcr;
+	list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
+	dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
+			dcr->region_index, dcr->windows);
+	return true;
+}
+
+static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_data_region *bdw)
+{
+	struct device *dev = acpi_desc->dev;
+	struct nfit_bdw *nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw),
+			GFP_KERNEL);
+
+	if (!nfit_bdw)
+		return false;
+	INIT_LIST_HEAD(&nfit_bdw->list);
+	nfit_bdw->bdw = bdw;
+	list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
+	dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
+			bdw->region_index, bdw->windows);
+	return true;
+}
+
+static bool add_idt(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_interleave *idt)
+{
+	struct device *dev = acpi_desc->dev;
+	struct nfit_idt *nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt),
+			GFP_KERNEL);
+
+	if (!nfit_idt)
+		return false;
+	INIT_LIST_HEAD(&nfit_idt->list);
+	nfit_idt->idt = idt;
+	list_add_tail(&nfit_idt->list, &acpi_desc->idts);
+	dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
+			idt->interleave_index, idt->line_count);
+	return true;
+}
+
+static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table,
+		const void *end)
+{
+	struct device *dev = acpi_desc->dev;
+	struct acpi_nfit_header *hdr;
+	void *err = ERR_PTR(-ENOMEM);
+
+	if (table >= end)
+		return NULL;
+
+	hdr = table;
+	switch (hdr->type) {
+	case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
+		if (!add_spa(acpi_desc, table))
+			return err;
+		break;
+	case ACPI_NFIT_TYPE_MEMORY_MAP:
+		if (!add_memdev(acpi_desc, table))
+			return err;
+		break;
+	case ACPI_NFIT_TYPE_CONTROL_REGION:
+		if (!add_dcr(acpi_desc, table))
+			return err;
+		break;
+	case ACPI_NFIT_TYPE_DATA_REGION:
+		if (!add_bdw(acpi_desc, table))
+			return err;
+		break;
+	case ACPI_NFIT_TYPE_INTERLEAVE:
+		if (!add_idt(acpi_desc, table))
+			return err;
+		break;
+	case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
+		dev_dbg(dev, "%s: flush\n", __func__);
+		break;
+	case ACPI_NFIT_TYPE_SMBIOS:
+		dev_dbg(dev, "%s: smbios\n", __func__);
+		break;
+	default:
+		dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
+		break;
+	}
+
+	return table + hdr->length;
+}
+
+static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_mem *nfit_mem)
+{
+	u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+	u16 dcr = nfit_mem->dcr->region_index;
+	struct nfit_spa *nfit_spa;
+
+	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+		u16 range_index = nfit_spa->spa->range_index;
+		int type = nfit_spa_type(nfit_spa->spa);
+		struct nfit_memdev *nfit_memdev;
+
+		if (type != NFIT_SPA_BDW)
+			continue;
+
+		list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+			if (nfit_memdev->memdev->range_index != range_index)
+				continue;
+			if (nfit_memdev->memdev->device_handle != device_handle)
+				continue;
+			if (nfit_memdev->memdev->region_index != dcr)
+				continue;
+
+			nfit_mem->spa_bdw = nfit_spa->spa;
+			return;
+		}
+	}
+
+	dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
+			nfit_mem->spa_dcr->range_index);
+	nfit_mem->bdw = NULL;
+}
+
+static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
+{
+	u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
+	struct nfit_memdev *nfit_memdev;
+	struct nfit_dcr *nfit_dcr;
+	struct nfit_bdw *nfit_bdw;
+	struct nfit_idt *nfit_idt;
+	u16 idt_idx, range_index;
+
+	list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+		if (nfit_dcr->dcr->region_index != dcr)
+			continue;
+		nfit_mem->dcr = nfit_dcr->dcr;
+		break;
+	}
+
+	if (!nfit_mem->dcr) {
+		dev_dbg(acpi_desc->dev, "SPA %d missing:%s%s\n",
+				spa->range_index, __to_nfit_memdev(nfit_mem)
+				? "" : " MEMDEV", nfit_mem->dcr ? "" : " DCR");
+		return -ENODEV;
+	}
+
+	/*
+	 * We've found enough to create an nvdimm, optionally
+	 * find an associated BDW
+	 */
+	list_add(&nfit_mem->list, &acpi_desc->dimms);
+
+	list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
+		if (nfit_bdw->bdw->region_index != dcr)
+			continue;
+		nfit_mem->bdw = nfit_bdw->bdw;
+		break;
+	}
+
+	if (!nfit_mem->bdw)
+		return 0;
+
+	nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
+
+	if (!nfit_mem->spa_bdw)
+		return 0;
+
+	range_index = nfit_mem->spa_bdw->range_index;
+	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+		if (nfit_memdev->memdev->range_index != range_index ||
+				nfit_memdev->memdev->region_index != dcr)
+			continue;
+		nfit_mem->memdev_bdw = nfit_memdev->memdev;
+		idt_idx = nfit_memdev->memdev->interleave_index;
+		list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
+			if (nfit_idt->idt->interleave_index != idt_idx)
+				continue;
+			nfit_mem->idt_bdw = nfit_idt->idt;
+			break;
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	struct nfit_mem *nfit_mem, *found;
+	struct nfit_memdev *nfit_memdev;
+	int type = nfit_spa_type(spa);
+	u16 dcr;
+
+	switch (type) {
+	case NFIT_SPA_DCR:
+	case NFIT_SPA_PM:
+		break;
+	default:
+		return 0;
+	}
+
+	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+		int rc;
+
+		if (nfit_memdev->memdev->range_index != spa->range_index)
+			continue;
+		found = NULL;
+		dcr = nfit_memdev->memdev->region_index;
+		list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
+			if (__to_nfit_memdev(nfit_mem)->region_index == dcr) {
+				found = nfit_mem;
+				break;
+			}
+
+		if (found)
+			nfit_mem = found;
+		else {
+			nfit_mem = devm_kzalloc(acpi_desc->dev,
+					sizeof(*nfit_mem), GFP_KERNEL);
+			if (!nfit_mem)
+				return -ENOMEM;
+			INIT_LIST_HEAD(&nfit_mem->list);
+		}
+
+		if (type == NFIT_SPA_DCR) {
+			struct nfit_idt *nfit_idt;
+			u16 idt_idx;
+
+			/* multiple dimms may share a SPA when interleaved */
+			nfit_mem->spa_dcr = spa;
+			nfit_mem->memdev_dcr = nfit_memdev->memdev;
+			idt_idx = nfit_memdev->memdev->interleave_index;
+			list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
+				if (nfit_idt->idt->interleave_index != idt_idx)
+					continue;
+				nfit_mem->idt_dcr = nfit_idt->idt;
+				break;
+			}
+		} else {
+			/*
+			 * A single dimm may belong to multiple SPA-PM
+			 * ranges, record at least one in addition to
+			 * any SPA-DCR range.
+			 */
+			nfit_mem->memdev_pmem = nfit_memdev->memdev;
+		}
+
+		if (found)
+			continue;
+
+		rc = nfit_mem_add(acpi_desc, nfit_mem, spa);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
+{
+	struct nfit_mem *a = container_of(_a, typeof(*a), list);
+	struct nfit_mem *b = container_of(_b, typeof(*b), list);
+	u32 handleA, handleB;
+
+	handleA = __to_nfit_memdev(a)->device_handle;
+	handleB = __to_nfit_memdev(b)->device_handle;
+	if (handleA < handleB)
+		return -1;
+	else if (handleA > handleB)
+		return 1;
+	return 0;
+}
+
+static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nfit_spa *nfit_spa;
+
+	/*
+	 * For each SPA-DCR or SPA-PMEM address range find its
+	 * corresponding MEMDEV(s).  From each MEMDEV find the
+	 * corresponding DCR.  Then, if we're operating on a SPA-DCR,
+	 * try to find a SPA-BDW and a corresponding BDW that references
+	 * the DCR.  Throw it all into an nfit_mem object.  Note, that
+	 * BDWs are optional.
+	 */
+	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+		int rc;
+
+		rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
+		if (rc)
+			return rc;
+	}
+
+	list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
+
+	return 0;
+}
+
+static ssize_t revision_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+	return sprintf(buf, "%d\n", acpi_desc->nfit->header.revision);
+}
+static DEVICE_ATTR_RO(revision);
+
+static struct attribute *acpi_nfit_attributes[] = {
+	&dev_attr_revision.attr,
+	NULL,
+};
+
+static struct attribute_group acpi_nfit_attribute_group = {
+	.name = "nfit",
+	.attrs = acpi_nfit_attributes,
+};
+
+const struct attribute_group *acpi_nfit_attribute_groups[] = {
+	&nvdimm_bus_attribute_group,
+	&acpi_nfit_attribute_group,
+	NULL,
+};
+EXPORT_SYMBOL_GPL(acpi_nfit_attribute_groups);
+
+static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+	return __to_nfit_memdev(nfit_mem);
+}
+
+static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+	return nfit_mem->dcr;
+}
+
+static ssize_t handle_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
+
+	return sprintf(buf, "%#x\n", memdev->device_handle);
+}
+static DEVICE_ATTR_RO(handle);
+
+static ssize_t phys_id_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
+
+	return sprintf(buf, "%#x\n", memdev->physical_id);
+}
+static DEVICE_ATTR_RO(phys_id);
+
+static ssize_t vendor_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+	return sprintf(buf, "%#x\n", dcr->vendor_id);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t rev_id_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+	return sprintf(buf, "%#x\n", dcr->revision_id);
+}
+static DEVICE_ATTR_RO(rev_id);
+
+static ssize_t device_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+	return sprintf(buf, "%#x\n", dcr->device_id);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t format_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+	return sprintf(buf, "%#x\n", dcr->code);
+}
+static DEVICE_ATTR_RO(format);
+
+static ssize_t serial_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+	return sprintf(buf, "%#x\n", dcr->serial_number);
+}
+static DEVICE_ATTR_RO(serial);
+
+static ssize_t flags_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	u16 flags = to_nfit_memdev(dev)->flags;
+
+	return sprintf(buf, "%s%s%s%s%s\n",
+			flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "",
+			flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "",
+			flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "",
+			flags & ACPI_NFIT_MEM_ARMED ? "arm " : "",
+			flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart " : "");
+}
+static DEVICE_ATTR_RO(flags);
+
+static struct attribute *acpi_nfit_dimm_attributes[] = {
+	&dev_attr_handle.attr,
+	&dev_attr_phys_id.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_device.attr,
+	&dev_attr_format.attr,
+	&dev_attr_serial.attr,
+	&dev_attr_rev_id.attr,
+	&dev_attr_flags.attr,
+	NULL,
+};
+
+static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
+		struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+
+	if (to_nfit_dcr(dev))
+		return a->mode;
+	else
+		return 0;
+}
+
+static struct attribute_group acpi_nfit_dimm_attribute_group = {
+	.name = "nfit",
+	.attrs = acpi_nfit_dimm_attributes,
+	.is_visible = acpi_nfit_dimm_attr_visible,
+};
+
+static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
+	&nvdimm_attribute_group,
+	&nd_device_attribute_group,
+	&acpi_nfit_dimm_attribute_group,
+	NULL,
+};
+
+static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
+		u32 device_handle)
+{
+	struct nfit_mem *nfit_mem;
+
+	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
+		if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
+			return nfit_mem->nvdimm;
+
+	return NULL;
+}
+
+static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_mem *nfit_mem, u32 device_handle)
+{
+	struct acpi_device *adev, *adev_dimm;
+	struct device *dev = acpi_desc->dev;
+	const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM);
+	unsigned long long sta;
+	int i, rc = -ENODEV;
+	acpi_status status;
+
+	nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en;
+	adev = to_acpi_dev(acpi_desc);
+	if (!adev)
+		return 0;
+
+	adev_dimm = acpi_find_child_device(adev, device_handle, false);
+	nfit_mem->adev = adev_dimm;
+	if (!adev_dimm) {
+		dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
+				device_handle);
+		return force_enable_dimms ? 0 : -ENODEV;
+	}
+
+	status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta);
+	if (status == AE_NOT_FOUND) {
+		dev_dbg(dev, "%s missing _STA, assuming enabled...\n",
+				dev_name(&adev_dimm->dev));
+		rc = 0;
+	} else if (ACPI_FAILURE(status))
+		dev_err(dev, "%s failed to retrieve_STA, disabling...\n",
+				dev_name(&adev_dimm->dev));
+	else if ((sta & ACPI_STA_DEVICE_ENABLED) == 0)
+		dev_info(dev, "%s disabled by firmware\n",
+				dev_name(&adev_dimm->dev));
+	else
+		rc = 0;
+
+	for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++)
+		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+			set_bit(i, &nfit_mem->dsm_mask);
+
+	return force_enable_dimms ? 0 : rc;
+}
+
+static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nfit_mem *nfit_mem;
+	int dimm_count = 0;
+
+	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+		struct nvdimm *nvdimm;
+		unsigned long flags = 0;
+		u32 device_handle;
+		u16 mem_flags;
+		int rc;
+
+		device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+		nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
+		if (nvdimm) {
+			/*
+			 * If for some reason we find multiple DCRs the
+			 * first one wins
+			 */
+			dev_err(acpi_desc->dev, "duplicate DCR detected: %s\n",
+					nvdimm_name(nvdimm));
+			continue;
+		}
+
+		if (nfit_mem->bdw && nfit_mem->memdev_pmem)
+			flags |= NDD_ALIASING;
+
+		mem_flags = __to_nfit_memdev(nfit_mem)->flags;
+		if (mem_flags & ACPI_NFIT_MEM_ARMED)
+			flags |= NDD_UNARMED;
+
+		rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
+		if (rc)
+			continue;
+
+		nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
+				acpi_nfit_dimm_attribute_groups,
+				flags, &nfit_mem->dsm_mask);
+		if (!nvdimm)
+			return -ENOMEM;
+
+		nfit_mem->nvdimm = nvdimm;
+		dimm_count++;
+
+		if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
+			continue;
+
+		dev_info(acpi_desc->dev, "%s: failed: %s%s%s%s\n",
+				nvdimm_name(nvdimm),
+			mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "",
+			mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "",
+			mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "",
+			mem_flags & ACPI_NFIT_MEM_ARMED ? "arm " : "");
+
+	}
+
+	return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
+}
+
+static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+	const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+	struct acpi_device *adev;
+	int i;
+
+	adev = to_acpi_dev(acpi_desc);
+	if (!adev)
+		return;
+
+	for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++)
+		if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+			set_bit(i, &nd_desc->dsm_mask);
+}
+
+static ssize_t range_index_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
+
+	return sprintf(buf, "%d\n", nfit_spa->spa->range_index);
+}
+static DEVICE_ATTR_RO(range_index);
+
+static struct attribute *acpi_nfit_region_attributes[] = {
+	&dev_attr_range_index.attr,
+	NULL,
+};
+
+static struct attribute_group acpi_nfit_region_attribute_group = {
+	.name = "nfit",
+	.attrs = acpi_nfit_region_attributes,
+};
+
+static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
+	&nd_region_attribute_group,
+	&nd_mapping_attribute_group,
+	&nd_device_attribute_group,
+	&nd_numa_attribute_group,
+	&acpi_nfit_region_attribute_group,
+	NULL,
+};
+
+/* enough info to uniquely specify an interleave set */
+struct nfit_set_info {
+	struct nfit_set_info_map {
+		u64 region_offset;
+		u32 serial_number;
+		u32 pad;
+	} mapping[0];
+};
+
+static size_t sizeof_nfit_set_info(int num_mappings)
+{
+	return sizeof(struct nfit_set_info)
+		+ num_mappings * sizeof(struct nfit_set_info_map);
+}
+
+static int cmp_map(const void *m0, const void *m1)
+{
+	const struct nfit_set_info_map *map0 = m0;
+	const struct nfit_set_info_map *map1 = m1;
+
+	return memcmp(&map0->region_offset, &map1->region_offset,
+			sizeof(u64));
+}
+
+/* Retrieve the nth entry referencing this spa */
+static struct acpi_nfit_memory_map *memdev_from_spa(
+		struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
+{
+	struct nfit_memdev *nfit_memdev;
+
+	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
+		if (nfit_memdev->memdev->range_index == range_index)
+			if (n-- == 0)
+				return nfit_memdev->memdev;
+	return NULL;
+}
+
+static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
+		struct nd_region_desc *ndr_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	int i, spa_type = nfit_spa_type(spa);
+	struct device *dev = acpi_desc->dev;
+	struct nd_interleave_set *nd_set;
+	u16 nr = ndr_desc->num_mappings;
+	struct nfit_set_info *info;
+
+	if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
+		/* pass */;
+	else
+		return 0;
+
+	nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
+	if (!nd_set)
+		return -ENOMEM;
+
+	info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	for (i = 0; i < nr; i++) {
+		struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
+		struct nfit_set_info_map *map = &info->mapping[i];
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+		struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
+				spa->range_index, i);
+
+		if (!memdev || !nfit_mem->dcr) {
+			dev_err(dev, "%s: failed to find DCR\n", __func__);
+			return -ENODEV;
+		}
+
+		map->region_offset = memdev->region_offset;
+		map->serial_number = nfit_mem->dcr->serial_number;
+	}
+
+	sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
+			cmp_map, NULL);
+	nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+	ndr_desc->nd_set = nd_set;
+	devm_kfree(dev, info);
+
+	return 0;
+}
+
+static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
+{
+	struct acpi_nfit_interleave *idt = mmio->idt;
+	u32 sub_line_offset, line_index, line_offset;
+	u64 line_no, table_skip_count, table_offset;
+
+	line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
+	table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
+	line_offset = idt->line_offset[line_index]
+		* mmio->line_size;
+	table_offset = table_skip_count * mmio->table_size;
+
+	return mmio->base_offset + line_offset + table_offset + sub_line_offset;
+}
+
+static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
+{
+	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
+	u64 offset = nfit_blk->stat_offset + mmio->size * bw;
+
+	if (mmio->num_lines)
+		offset = to_interleave_offset(offset, mmio);
+
+	return readq(mmio->base + offset);
+}
+
+static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
+		resource_size_t dpa, unsigned int len, unsigned int write)
+{
+	u64 cmd, offset;
+	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
+
+	enum {
+		BCW_OFFSET_MASK = (1ULL << 48)-1,
+		BCW_LEN_SHIFT = 48,
+		BCW_LEN_MASK = (1ULL << 8) - 1,
+		BCW_CMD_SHIFT = 56,
+	};
+
+	cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
+	len = len >> L1_CACHE_SHIFT;
+	cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
+	cmd |= ((u64) write) << BCW_CMD_SHIFT;
+
+	offset = nfit_blk->cmd_offset + mmio->size * bw;
+	if (mmio->num_lines)
+		offset = to_interleave_offset(offset, mmio);
+
+	writeq(cmd, mmio->base + offset);
+	/* FIXME: conditionally perform read-back if mandated by firmware */
+}
+
+static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
+		resource_size_t dpa, void *iobuf, size_t len, int rw,
+		unsigned int lane)
+{
+	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+	unsigned int copied = 0;
+	u64 base_offset;
+	int rc;
+
+	base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
+		+ lane * mmio->size;
+	/* TODO: non-temporal access, flush hints, cache management etc... */
+	write_blk_ctl(nfit_blk, lane, dpa, len, rw);
+	while (len) {
+		unsigned int c;
+		u64 offset;
+
+		if (mmio->num_lines) {
+			u32 line_offset;
+
+			offset = to_interleave_offset(base_offset + copied,
+					mmio);
+			div_u64_rem(offset, mmio->line_size, &line_offset);
+			c = min_t(size_t, len, mmio->line_size - line_offset);
+		} else {
+			offset = base_offset + nfit_blk->bdw_offset;
+			c = len;
+		}
+
+		if (rw)
+			memcpy(mmio->aperture + offset, iobuf + copied, c);
+		else
+			memcpy(iobuf + copied, mmio->aperture + offset, c);
+
+		copied += c;
+		len -= c;
+	}
+	rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
+	return rc;
+}
+
+static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
+		resource_size_t dpa, void *iobuf, u64 len, int rw)
+{
+	struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
+	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+	struct nd_region *nd_region = nfit_blk->nd_region;
+	unsigned int lane, copied = 0;
+	int rc = 0;
+
+	lane = nd_region_acquire_lane(nd_region);
+	while (len) {
+		u64 c = min(len, mmio->size);
+
+		rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
+				iobuf + copied, c, rw, lane);
+		if (rc)
+			break;
+
+		copied += c;
+		len -= c;
+	}
+	nd_region_release_lane(nd_region, lane);
+
+	return rc;
+}
+
+static void nfit_spa_mapping_release(struct kref *kref)
+{
+	struct nfit_spa_mapping *spa_map = to_spa_map(kref);
+	struct acpi_nfit_system_address *spa = spa_map->spa;
+	struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc;
+
+	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
+	dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
+	iounmap(spa_map->iomem);
+	release_mem_region(spa->address, spa->length);
+	list_del(&spa_map->list);
+	kfree(spa_map);
+}
+
+static struct nfit_spa_mapping *find_spa_mapping(
+		struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	struct nfit_spa_mapping *spa_map;
+
+	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
+	list_for_each_entry(spa_map, &acpi_desc->spa_maps, list)
+		if (spa_map->spa == spa)
+			return spa_map;
+
+	return NULL;
+}
+
+static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	struct nfit_spa_mapping *spa_map;
+
+	mutex_lock(&acpi_desc->spa_map_mutex);
+	spa_map = find_spa_mapping(acpi_desc, spa);
+
+	if (spa_map)
+		kref_put(&spa_map->kref, nfit_spa_mapping_release);
+	mutex_unlock(&acpi_desc->spa_map_mutex);
+}
+
+static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	resource_size_t start = spa->address;
+	resource_size_t n = spa->length;
+	struct nfit_spa_mapping *spa_map;
+	struct resource *res;
+
+	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
+
+	spa_map = find_spa_mapping(acpi_desc, spa);
+	if (spa_map) {
+		kref_get(&spa_map->kref);
+		return spa_map->iomem;
+	}
+
+	spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
+	if (!spa_map)
+		return NULL;
+
+	INIT_LIST_HEAD(&spa_map->list);
+	spa_map->spa = spa;
+	kref_init(&spa_map->kref);
+	spa_map->acpi_desc = acpi_desc;
+
+	res = request_mem_region(start, n, dev_name(acpi_desc->dev));
+	if (!res)
+		goto err_mem;
+
+	/* TODO: cacheability based on the spa type */
+	spa_map->iomem = ioremap_nocache(start, n);
+	if (!spa_map->iomem)
+		goto err_map;
+
+	list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
+	return spa_map->iomem;
+
+ err_map:
+	release_mem_region(start, n);
+ err_mem:
+	kfree(spa_map);
+	return NULL;
+}
+
+/**
+ * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges
+ * @nvdimm_bus: NFIT-bus that provided the spa table entry
+ * @nfit_spa: spa table to map
+ *
+ * In the case where block-data-window apertures and
+ * dimm-control-regions are interleaved they will end up sharing a
+ * single request_mem_region() + ioremap() for the address range.  In
+ * the style of devm nfit_spa_map() mappings are automatically dropped
+ * when all region devices referencing the same mapping are disabled /
+ * unbound.
+ */
+static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	void __iomem *iomem;
+
+	mutex_lock(&acpi_desc->spa_map_mutex);
+	iomem = __nfit_spa_map(acpi_desc, spa);
+	mutex_unlock(&acpi_desc->spa_map_mutex);
+
+	return iomem;
+}
+
+static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
+		struct acpi_nfit_interleave *idt, u16 interleave_ways)
+{
+	if (idt) {
+		mmio->num_lines = idt->line_count;
+		mmio->line_size = idt->line_size;
+		if (interleave_ways == 0)
+			return -ENXIO;
+		mmio->table_size = mmio->num_lines * interleave_ways
+			* mmio->line_size;
+	}
+
+	return 0;
+}
+
+static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
+		struct device *dev)
+{
+	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+	struct nd_blk_region *ndbr = to_nd_blk_region(dev);
+	struct nfit_blk_mmio *mmio;
+	struct nfit_blk *nfit_blk;
+	struct nfit_mem *nfit_mem;
+	struct nvdimm *nvdimm;
+	int rc;
+
+	nvdimm = nd_blk_region_to_dimm(ndbr);
+	nfit_mem = nvdimm_provider_data(nvdimm);
+	if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
+		dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
+				nfit_mem ? "" : " nfit_mem",
+				nfit_mem->dcr ? "" : " dcr",
+				nfit_mem->bdw ? "" : " bdw");
+		return -ENXIO;
+	}
+
+	nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
+	if (!nfit_blk)
+		return -ENOMEM;
+	nd_blk_region_set_provider_data(ndbr, nfit_blk);
+	nfit_blk->nd_region = to_nd_region(dev);
+
+	/* map block aperture memory */
+	nfit_blk->bdw_offset = nfit_mem->bdw->offset;
+	mmio = &nfit_blk->mmio[BDW];
+	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw);
+	if (!mmio->base) {
+		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
+				nvdimm_name(nvdimm));
+		return -ENOMEM;
+	}
+	mmio->size = nfit_mem->bdw->size;
+	mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
+	mmio->idt = nfit_mem->idt_bdw;
+	mmio->spa = nfit_mem->spa_bdw;
+	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
+			nfit_mem->memdev_bdw->interleave_ways);
+	if (rc) {
+		dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
+				__func__, nvdimm_name(nvdimm));
+		return rc;
+	}
+
+	/* map block control memory */
+	nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
+	nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
+	mmio = &nfit_blk->mmio[DCR];
+	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr);
+	if (!mmio->base) {
+		dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
+				nvdimm_name(nvdimm));
+		return -ENOMEM;
+	}
+	mmio->size = nfit_mem->dcr->window_size;
+	mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
+	mmio->idt = nfit_mem->idt_dcr;
+	mmio->spa = nfit_mem->spa_dcr;
+	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
+			nfit_mem->memdev_dcr->interleave_ways);
+	if (rc) {
+		dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
+				__func__, nvdimm_name(nvdimm));
+		return rc;
+	}
+
+	if (mmio->line_size == 0)
+		return 0;
+
+	if ((u32) nfit_blk->cmd_offset % mmio->line_size
+			+ 8 > mmio->line_size) {
+		dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
+		return -ENXIO;
+	} else if ((u32) nfit_blk->stat_offset % mmio->line_size
+			+ 8 > mmio->line_size) {
+		dev_dbg(dev, "stat_offset crosses interleave boundary\n");
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
+		struct device *dev)
+{
+	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+	struct nd_blk_region *ndbr = to_nd_blk_region(dev);
+	struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
+	int i;
+
+	if (!nfit_blk)
+		return; /* never enabled */
+
+	/* auto-free BLK spa mappings */
+	for (i = 0; i < 2; i++) {
+		struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
+
+		if (mmio->base)
+			nfit_spa_unmap(acpi_desc, mmio->spa);
+	}
+	nd_blk_region_set_provider_data(ndbr, NULL);
+	/* devm will free nfit_blk */
+}
+
+static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
+		struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
+		struct acpi_nfit_memory_map *memdev,
+		struct acpi_nfit_system_address *spa)
+{
+	struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
+			memdev->device_handle);
+	struct nd_blk_region_desc *ndbr_desc;
+	struct nfit_mem *nfit_mem;
+	int blk_valid = 0;
+
+	if (!nvdimm) {
+		dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
+				spa->range_index, memdev->device_handle);
+		return -ENODEV;
+	}
+
+	nd_mapping->nvdimm = nvdimm;
+	switch (nfit_spa_type(spa)) {
+	case NFIT_SPA_PM:
+	case NFIT_SPA_VOLATILE:
+		nd_mapping->start = memdev->address;
+		nd_mapping->size = memdev->region_size;
+		break;
+	case NFIT_SPA_DCR:
+		nfit_mem = nvdimm_provider_data(nvdimm);
+		if (!nfit_mem || !nfit_mem->bdw) {
+			dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
+					spa->range_index, nvdimm_name(nvdimm));
+		} else {
+			nd_mapping->size = nfit_mem->bdw->capacity;
+			nd_mapping->start = nfit_mem->bdw->start_address;
+			ndr_desc->num_lanes = nfit_mem->bdw->windows;
+			blk_valid = 1;
+		}
+
+		ndr_desc->nd_mapping = nd_mapping;
+		ndr_desc->num_mappings = blk_valid;
+		ndbr_desc = to_blk_region_desc(ndr_desc);
+		ndbr_desc->enable = acpi_nfit_blk_region_enable;
+		ndbr_desc->disable = acpi_nfit_blk_region_disable;
+		ndbr_desc->do_io = acpi_desc->blk_do_io;
+		if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc))
+			return -ENOMEM;
+		break;
+	}
+
+	return 0;
+}
+
+static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_spa *nfit_spa)
+{
+	static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
+	struct acpi_nfit_system_address *spa = nfit_spa->spa;
+	struct nd_blk_region_desc ndbr_desc;
+	struct nd_region_desc *ndr_desc;
+	struct nfit_memdev *nfit_memdev;
+	struct nvdimm_bus *nvdimm_bus;
+	struct resource res;
+	int count = 0, rc;
+
+	if (spa->range_index == 0) {
+		dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
+				__func__);
+		return 0;
+	}
+
+	memset(&res, 0, sizeof(res));
+	memset(&nd_mappings, 0, sizeof(nd_mappings));
+	memset(&ndbr_desc, 0, sizeof(ndbr_desc));
+	res.start = spa->address;
+	res.end = res.start + spa->length - 1;
+	ndr_desc = &ndbr_desc.ndr_desc;
+	ndr_desc->res = &res;
+	ndr_desc->provider_data = nfit_spa;
+	ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
+	if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
+		ndr_desc->numa_node = acpi_map_pxm_to_online_node(
+						spa->proximity_domain);
+	else
+		ndr_desc->numa_node = NUMA_NO_NODE;
+
+	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+		struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
+		struct nd_mapping *nd_mapping;
+
+		if (memdev->range_index != spa->range_index)
+			continue;
+		if (count >= ND_MAX_MAPPINGS) {
+			dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
+					spa->range_index, ND_MAX_MAPPINGS);
+			return -ENXIO;
+		}
+		nd_mapping = &nd_mappings[count++];
+		rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
+				memdev, spa);
+		if (rc)
+			return rc;
+	}
+
+	ndr_desc->nd_mapping = nd_mappings;
+	ndr_desc->num_mappings = count;
+	rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
+	if (rc)
+		return rc;
+
+	nvdimm_bus = acpi_desc->nvdimm_bus;
+	if (nfit_spa_type(spa) == NFIT_SPA_PM) {
+		if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc))
+			return -ENOMEM;
+	} else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
+		if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc))
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nfit_spa *nfit_spa;
+
+	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+		int rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
+
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
+{
+	struct device *dev = acpi_desc->dev;
+	const void *end;
+	u8 *data;
+	int rc;
+
+	INIT_LIST_HEAD(&acpi_desc->spa_maps);
+	INIT_LIST_HEAD(&acpi_desc->spas);
+	INIT_LIST_HEAD(&acpi_desc->dcrs);
+	INIT_LIST_HEAD(&acpi_desc->bdws);
+	INIT_LIST_HEAD(&acpi_desc->idts);
+	INIT_LIST_HEAD(&acpi_desc->memdevs);
+	INIT_LIST_HEAD(&acpi_desc->dimms);
+	mutex_init(&acpi_desc->spa_map_mutex);
+
+	data = (u8 *) acpi_desc->nfit;
+	end = data + sz;
+	data += sizeof(struct acpi_table_nfit);
+	while (!IS_ERR_OR_NULL(data))
+		data = add_table(acpi_desc, data, end);
+
+	if (IS_ERR(data)) {
+		dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
+				PTR_ERR(data));
+		return PTR_ERR(data);
+	}
+
+	if (nfit_mem_init(acpi_desc) != 0)
+		return -ENOMEM;
+
+	acpi_nfit_init_dsms(acpi_desc);
+
+	rc = acpi_nfit_register_dimms(acpi_desc);
+	if (rc)
+		return rc;
+
+	return acpi_nfit_register_regions(acpi_desc);
+}
+EXPORT_SYMBOL_GPL(acpi_nfit_init);
+
+static int acpi_nfit_add(struct acpi_device *adev)
+{
+	struct nvdimm_bus_descriptor *nd_desc;
+	struct acpi_nfit_desc *acpi_desc;
+	struct device *dev = &adev->dev;
+	struct acpi_table_header *tbl;
+	acpi_status status = AE_OK;
+	acpi_size sz;
+	int rc;
+
+	status = acpi_get_table_with_size("NFIT", 0, &tbl, &sz);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to find NFIT\n");
+		return -ENXIO;
+	}
+
+	acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+	if (!acpi_desc)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, acpi_desc);
+	acpi_desc->dev = dev;
+	acpi_desc->nfit = (struct acpi_table_nfit *) tbl;
+	acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
+	nd_desc = &acpi_desc->nd_desc;
+	nd_desc->provider_name = "ACPI.NFIT";
+	nd_desc->ndctl = acpi_nfit_ctl;
+	nd_desc->attr_groups = acpi_nfit_attribute_groups;
+
+	acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc);
+	if (!acpi_desc->nvdimm_bus)
+		return -ENXIO;
+
+	rc = acpi_nfit_init(acpi_desc, sz);
+	if (rc) {
+		nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+		return rc;
+	}
+	return 0;
+}
+
+static int acpi_nfit_remove(struct acpi_device *adev)
+{
+	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
+
+	nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+	return 0;
+}
+
+static const struct acpi_device_id acpi_nfit_ids[] = {
+	{ "ACPI0012", 0 },
+	{ "", 0 },
+};
+MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
+
+static struct acpi_driver acpi_nfit_driver = {
+	.name = KBUILD_MODNAME,
+	.ids = acpi_nfit_ids,
+	.ops = {
+		.add = acpi_nfit_add,
+		.remove = acpi_nfit_remove,
+	},
+};
+
+static __init int nfit_init(void)
+{
+	BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
+
+	acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
+	acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
+	acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
+	acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
+	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
+	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
+	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
+	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
+	acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
+	acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
+
+	return acpi_bus_register_driver(&acpi_nfit_driver);
+}
+
+static __exit void nfit_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_nfit_driver);
+}
+
+module_init(nfit_init);
+module_exit(nfit_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
new file mode 100644
index 000000000000..81f2e8c5a79c
--- /dev/null
+++ b/drivers/acpi/nfit.h
@@ -0,0 +1,158 @@
+/*
+ * NVDIMM Firmware Interface Table - NFIT
+ *
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __NFIT_H__
+#define __NFIT_H__
+#include <linux/libnvdimm.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <linux/acpi.h>
+#include <acpi/acuuid.h>
+
+#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
+#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
+#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
+		| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
+		| ACPI_NFIT_MEM_ARMED)
+
+enum nfit_uuids {
+	NFIT_SPA_VOLATILE,
+	NFIT_SPA_PM,
+	NFIT_SPA_DCR,
+	NFIT_SPA_BDW,
+	NFIT_SPA_VDISK,
+	NFIT_SPA_VCD,
+	NFIT_SPA_PDISK,
+	NFIT_SPA_PCD,
+	NFIT_DEV_BUS,
+	NFIT_DEV_DIMM,
+	NFIT_UUID_MAX,
+};
+
+struct nfit_spa {
+	struct acpi_nfit_system_address *spa;
+	struct list_head list;
+};
+
+struct nfit_dcr {
+	struct acpi_nfit_control_region *dcr;
+	struct list_head list;
+};
+
+struct nfit_bdw {
+	struct acpi_nfit_data_region *bdw;
+	struct list_head list;
+};
+
+struct nfit_idt {
+	struct acpi_nfit_interleave *idt;
+	struct list_head list;
+};
+
+struct nfit_memdev {
+	struct acpi_nfit_memory_map *memdev;
+	struct list_head list;
+};
+
+/* assembled tables for a given dimm/memory-device */
+struct nfit_mem {
+	struct nvdimm *nvdimm;
+	struct acpi_nfit_memory_map *memdev_dcr;
+	struct acpi_nfit_memory_map *memdev_pmem;
+	struct acpi_nfit_memory_map *memdev_bdw;
+	struct acpi_nfit_control_region *dcr;
+	struct acpi_nfit_data_region *bdw;
+	struct acpi_nfit_system_address *spa_dcr;
+	struct acpi_nfit_system_address *spa_bdw;
+	struct acpi_nfit_interleave *idt_dcr;
+	struct acpi_nfit_interleave *idt_bdw;
+	struct list_head list;
+	struct acpi_device *adev;
+	unsigned long dsm_mask;
+};
+
+struct acpi_nfit_desc {
+	struct nvdimm_bus_descriptor nd_desc;
+	struct acpi_table_nfit *nfit;
+	struct mutex spa_map_mutex;
+	struct list_head spa_maps;
+	struct list_head memdevs;
+	struct list_head dimms;
+	struct list_head spas;
+	struct list_head dcrs;
+	struct list_head bdws;
+	struct list_head idts;
+	struct nvdimm_bus *nvdimm_bus;
+	struct device *dev;
+	unsigned long dimm_dsm_force_en;
+	int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
+			void *iobuf, u64 len, int rw);
+};
+
+enum nd_blk_mmio_selector {
+	BDW,
+	DCR,
+};
+
+struct nfit_blk {
+	struct nfit_blk_mmio {
+		union {
+			void __iomem *base;
+			void *aperture;
+		};
+		u64 size;
+		u64 base_offset;
+		u32 line_size;
+		u32 num_lines;
+		u32 table_size;
+		struct acpi_nfit_interleave *idt;
+		struct acpi_nfit_system_address *spa;
+	} mmio[2];
+	struct nd_region *nd_region;
+	u64 bdw_offset; /* post interleave offset */
+	u64 stat_offset;
+	u64 cmd_offset;
+};
+
+struct nfit_spa_mapping {
+	struct acpi_nfit_desc *acpi_desc;
+	struct acpi_nfit_system_address *spa;
+	struct list_head list;
+	struct kref kref;
+	void __iomem *iomem;
+};
+
+static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
+{
+	return container_of(kref, struct nfit_spa_mapping, kref);
+}
+
+static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
+		struct nfit_mem *nfit_mem)
+{
+	if (nfit_mem->memdev_dcr)
+		return nfit_mem->memdev_dcr;
+	return nfit_mem->memdev_pmem;
+}
+
+static inline struct acpi_nfit_desc *to_acpi_desc(
+		struct nvdimm_bus_descriptor *nd_desc)
+{
+	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+const u8 *to_nfit_uuid(enum nfit_uuids id);
+int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz);
+extern const struct attribute_group *acpi_nfit_attribute_groups[];
+#endif /* __NFIT_H__ */
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 1333cbdc3ea2..acaa3b4ea504 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -29,6 +29,8 @@
 #include <linux/errno.h>
 #include <linux/acpi.h>
 #include <linux/numa.h>
+#include <linux/nodemask.h>
+#include <linux/topology.h>
 
 #define PREFIX "ACPI: "
 
@@ -70,7 +72,12 @@ static void __acpi_map_pxm_to_node(int pxm, int node)
 
 int acpi_map_pxm_to_node(int pxm)
 {
-	int node = pxm_to_node_map[pxm];
+	int node;
+
+	if (pxm < 0 || pxm >= MAX_PXM_DOMAINS)
+		return NUMA_NO_NODE;
+
+	node = pxm_to_node_map[pxm];
 
 	if (node == NUMA_NO_NODE) {
 		if (nodes_weight(nodes_found_map) >= MAX_NUMNODES)
@@ -83,6 +90,45 @@ int acpi_map_pxm_to_node(int pxm)
 	return node;
 }
 
+/**
+ * acpi_map_pxm_to_online_node - Map proximity ID to online node
+ * @pxm: ACPI proximity ID
+ *
+ * This is similar to acpi_map_pxm_to_node(), but always returns an online
+ * node.  When the mapped node from a given proximity ID is offline, it
+ * looks up the node distance table and returns the nearest online node.
+ *
+ * ACPI device drivers, which are called after the NUMA initialization has
+ * completed in the kernel, can call this interface to obtain their device
+ * NUMA topology from ACPI tables.  Such drivers do not have to deal with
+ * offline nodes.  A node may be offline when a device proximity ID is
+ * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
+ * "numa=off" on x86.
+ */
+int acpi_map_pxm_to_online_node(int pxm)
+{
+	int node, n, dist, min_dist;
+
+	node = acpi_map_pxm_to_node(pxm);
+
+	if (node == NUMA_NO_NODE)
+		node = 0;
+
+	if (!node_online(node)) {
+		min_dist = INT_MAX;
+		for_each_online_node(n) {
+			dist = node_distance(node, n);
+			if (dist < min_dist) {
+				min_dist = dist;
+				node = n;
+			}
+		}
+	}
+
+	return node;
+}
+EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
+
 static void __init
 acpi_table_print_srat_entry(struct acpi_subtable_header *header)
 {
@@ -328,8 +374,6 @@ int acpi_get_node(acpi_handle handle)
 	int pxm;
 
 	pxm = acpi_get_pxm(handle);
-	if (pxm < 0 || pxm >= MAX_PXM_DOMAINS)
-		return NUMA_NO_NODE;
 
 	return acpi_map_pxm_to_node(pxm);
 }
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 3ccef9eba6f9..1b8094d4d7af 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -404,18 +404,6 @@ config BLK_DEV_RAM_DAX
 	  and will prevent RAM block device backing store memory from being
 	  allocated from highmem (only a problem for highmem systems).
 
-config BLK_DEV_PMEM
-	tristate "Persistent memory block device support"
-	depends on HAS_IOMEM
-	help
-	  Saying Y here will allow you to use a contiguous range of reserved
-	  memory as one or more persistent block devices.
-
-	  To compile this driver as a module, choose M here: the module will be
-	  called 'pmem'.
-
-	  If unsure, say N.
-
 config CDROM_PKTCDVD
 	tristate "Packet writing on CD/DVD media"
 	depends on !UML
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 9cc6c18a1c7e..02b688d1438d 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_PS3_VRAM)		+= ps3vram.o
 obj-$(CONFIG_ATARI_FLOPPY)	+= ataflop.o
 obj-$(CONFIG_AMIGA_Z2RAM)	+= z2ram.o
 obj-$(CONFIG_BLK_DEV_RAM)	+= brd.o
-obj-$(CONFIG_BLK_DEV_PMEM)	+= pmem.o
 obj-$(CONFIG_BLK_DEV_LOOP)	+= loop.o
 obj-$(CONFIG_BLK_CPQ_DA)	+= cpqarray.o
 obj-$(CONFIG_BLK_CPQ_CISS_DA)  += cciss.o
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index e5112714188f..34338d7438f5 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -193,6 +193,13 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	return 0;
 }
 
+static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
+	struct nvme_queue *nvmeq = hctx->driver_data;
+
+	nvmeq->tags = NULL;
+}
+
 static int nvme_admin_init_request(void *data, struct request *req,
 				unsigned int hctx_idx, unsigned int rq_idx,
 				unsigned int numa_node)
@@ -606,7 +613,10 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 			return;
 		}
 		if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
-			req->errors = status;
+			if (cmd_rq->ctx == CMD_CTX_CANCELLED)
+				req->errors = -EINTR;
+			else
+				req->errors = status;
 		} else {
 			req->errors = nvme_error_status(status);
 		}
@@ -1161,12 +1171,13 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
 
 int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
 {
-	struct nvme_command c = {
-		.identify.opcode = nvme_admin_identify,
-		.identify.cns = cpu_to_le32(1),
-	};
+	struct nvme_command c = { };
 	int error;
 
+	/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+	c.identify.opcode = nvme_admin_identify;
+	c.identify.cns = cpu_to_le32(1);
+
 	*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
 	if (!*id)
 		return -ENOMEM;
@@ -1181,12 +1192,13 @@ int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
 int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
 		struct nvme_id_ns **id)
 {
-	struct nvme_command c = {
-		.identify.opcode = nvme_admin_identify,
-		.identify.nsid = cpu_to_le32(nsid),
-	};
+	struct nvme_command c = { };
 	int error;
 
+	/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+	c.identify.opcode = nvme_admin_identify,
+	c.identify.nsid = cpu_to_le32(nsid),
+
 	*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
 	if (!*id)
 		return -ENOMEM;
@@ -1230,14 +1242,14 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
 
 int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
 {
-	struct nvme_command c = {
-		.common.opcode = nvme_admin_get_log_page,
-		.common.nsid = cpu_to_le32(0xFFFFFFFF),
-		.common.cdw10[0] = cpu_to_le32(
+	struct nvme_command c = { };
+	int error;
+
+	c.common.opcode = nvme_admin_get_log_page,
+	c.common.nsid = cpu_to_le32(0xFFFFFFFF),
+	c.common.cdw10[0] = cpu_to_le32(
 			(((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
 			 NVME_LOG_SMART),
-	};
-	int error;
 
 	*log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
 	if (!*log)
@@ -1606,6 +1618,7 @@ static struct blk_mq_ops nvme_mq_admin_ops = {
 	.queue_rq	= nvme_queue_rq,
 	.map_queue	= blk_mq_map_queue,
 	.init_hctx	= nvme_admin_init_hctx,
+	.exit_hctx      = nvme_admin_exit_hctx,
 	.init_request	= nvme_admin_init_request,
 	.timeout	= nvme_timeout,
 };
@@ -1648,6 +1661,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 		}
 		if (!blk_get_queue(dev->admin_q)) {
 			nvme_dev_remove_admin(dev);
+			dev->admin_q = NULL;
 			return -ENODEV;
 		}
 	} else
@@ -2349,19 +2363,20 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	}
 	kfree(ctrl);
 
-	dev->tagset.ops = &nvme_mq_ops;
-	dev->tagset.nr_hw_queues = dev->online_queues - 1;
-	dev->tagset.timeout = NVME_IO_TIMEOUT;
-	dev->tagset.numa_node = dev_to_node(dev->dev);
-	dev->tagset.queue_depth =
+	if (!dev->tagset.tags) {
+		dev->tagset.ops = &nvme_mq_ops;
+		dev->tagset.nr_hw_queues = dev->online_queues - 1;
+		dev->tagset.timeout = NVME_IO_TIMEOUT;
+		dev->tagset.numa_node = dev_to_node(dev->dev);
+		dev->tagset.queue_depth =
 				min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
-	dev->tagset.cmd_size = nvme_cmd_size(dev);
-	dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
-	dev->tagset.driver_data = dev;
-
-	if (blk_mq_alloc_tag_set(&dev->tagset))
-		return 0;
+		dev->tagset.cmd_size = nvme_cmd_size(dev);
+		dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
+		dev->tagset.driver_data = dev;
 
+		if (blk_mq_alloc_tag_set(&dev->tagset))
+			return 0;
+	}
 	schedule_work(&dev->scan_work);
 	return 0;
 }
@@ -2734,8 +2749,10 @@ static void nvme_free_dev(struct kref *kref)
 	put_device(dev->device);
 	nvme_free_namespaces(dev);
 	nvme_release_instance(dev);
-	blk_mq_free_tag_set(&dev->tagset);
-	blk_put_queue(dev->admin_q);
+	if (dev->tagset.tags)
+		blk_mq_free_tag_set(&dev->tagset);
+	if (dev->admin_q)
+		blk_put_queue(dev->admin_q);
 	kfree(dev->queues);
 	kfree(dev->entry);
 	kfree(dev);
@@ -2866,6 +2883,9 @@ static int nvme_dev_start(struct nvme_dev *dev)
 
  free_tags:
 	nvme_dev_remove_admin(dev);
+	blk_put_queue(dev->admin_q);
+	dev->admin_q = NULL;
+	dev->queues[0]->tags = NULL;
  disable:
 	nvme_disable_queue(dev, 0);
 	nvme_dev_list_remove(dev);
@@ -2907,25 +2927,43 @@ static int nvme_dev_resume(struct nvme_dev *dev)
 		spin_unlock(&dev_list_lock);
 	} else {
 		nvme_unfreeze_queues(dev);
-		schedule_work(&dev->scan_work);
+		nvme_dev_add(dev);
 		nvme_set_irq_hints(dev);
 	}
 	return 0;
 }
 
+static void nvme_dead_ctrl(struct nvme_dev *dev)
+{
+	dev_warn(dev->dev, "Device failed to resume\n");
+	kref_get(&dev->kref);
+	if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
+						dev->instance))) {
+		dev_err(dev->dev,
+			"Failed to start controller remove task\n");
+		kref_put(&dev->kref, nvme_free_dev);
+	}
+}
+
 static void nvme_dev_reset(struct nvme_dev *dev)
 {
+	bool in_probe = work_busy(&dev->probe_work);
+
 	nvme_dev_shutdown(dev);
-	if (nvme_dev_resume(dev)) {
-		dev_warn(dev->dev, "Device failed to resume\n");
-		kref_get(&dev->kref);
-		if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
-							dev->instance))) {
-			dev_err(dev->dev,
-				"Failed to start controller remove task\n");
-			kref_put(&dev->kref, nvme_free_dev);
-		}
+
+	/* Synchronize with device probe so that work will see failure status
+	 * and exit gracefully without trying to schedule another reset */
+	flush_work(&dev->probe_work);
+
+	/* Fail this device if reset occured during probe to avoid
+	 * infinite initialization loops. */
+	if (in_probe) {
+		nvme_dead_ctrl(dev);
+		return;
 	}
+	/* Schedule device resume asynchronously so the reset work is available
+	 * to cleanup errors that may occur during reinitialization */
+	schedule_work(&dev->probe_work);
 }
 
 static void nvme_reset_failed_dev(struct work_struct *ws)
@@ -2957,6 +2995,7 @@ static int nvme_reset(struct nvme_dev *dev)
 
 	if (!ret) {
 		flush_work(&dev->reset_work);
+		flush_work(&dev->probe_work);
 		return 0;
 	}
 
@@ -3053,26 +3092,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 static void nvme_async_probe(struct work_struct *work)
 {
 	struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
-	int result;
 
-	result = nvme_dev_start(dev);
-	if (result)
-		goto reset;
-
-	if (dev->online_queues > 1)
-		result = nvme_dev_add(dev);
-	if (result)
-		goto reset;
-
-	nvme_set_irq_hints(dev);
-	return;
- reset:
-	spin_lock(&dev_list_lock);
-	if (!work_busy(&dev->reset_work)) {
-		dev->reset_workfn = nvme_reset_failed_dev;
-		queue_work(nvme_workq, &dev->reset_work);
-	}
-	spin_unlock(&dev_list_lock);
+	if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work))
+		nvme_dead_ctrl(dev);
 }
 
 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
@@ -3104,8 +3126,8 @@ static void nvme_remove(struct pci_dev *pdev)
 	flush_work(&dev->reset_work);
 	flush_work(&dev->scan_work);
 	device_remove_file(dev->device, &dev_attr_reset_controller);
-	nvme_dev_shutdown(dev);
 	nvme_dev_remove(dev);
+	nvme_dev_shutdown(dev);
 	nvme_dev_remove_admin(dev);
 	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
 	nvme_free_queues(dev, 0);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 713fc9ff1149..2126842fb6e8 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -84,6 +84,13 @@ MODULE_PARM_DESC(max_persistent_grants,
                  "Maximum number of grants to map persistently");
 
 /*
+ * Maximum order of pages to be used for the shared ring between front and
+ * backend, 4KB page granularity is used.
+ */
+unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+/*
  * The LRU mechanism to clean the lists of persistent grants needs to
  * be executed periodically. The time interval between consecutive executions
  * of the purge mechanism is set in ms.
@@ -1438,6 +1445,12 @@ static int __init xen_blkif_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
+	if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
+		pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
+			xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
+		xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+	}
+
 	rc = xen_blkif_interface_init();
 	if (rc)
 		goto failed_init;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index f620b5d3f77c..8ccc49d01c8e 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -44,6 +44,7 @@
 #include <xen/interface/io/blkif.h>
 #include <xen/interface/io/protocols.h>
 
+extern unsigned int xen_blkif_max_ring_order;
 /*
  * This is the maximum number of segments that would be allowed in indirect
  * requests. This value will also be passed to the frontend.
@@ -248,7 +249,7 @@ struct backend_info;
 #define PERSISTENT_GNT_WAS_ACTIVE	1
 
 /* Number of requests that we can fit in a ring */
-#define XEN_BLKIF_REQS			32
+#define XEN_BLKIF_REQS_PER_PAGE		32
 
 struct persistent_gnt {
 	struct page *page;
@@ -320,6 +321,7 @@ struct xen_blkif {
 	struct work_struct	free_work;
 	/* Thread shutdown wait queue. */
 	wait_queue_head_t	shutdown_wq;
+	unsigned int nr_ring_pages;
 };
 
 struct seg_buf {
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 6ab69ad61ee1..deb3f001791f 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -25,6 +25,7 @@
 
 /* Enlarge the array size in order to fully show blkback name. */
 #define BLKBACK_NAME_LEN (20)
+#define RINGREF_NAME_LEN (20)
 
 struct backend_info {
 	struct xenbus_device	*dev;
@@ -124,8 +125,6 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 {
 	struct xen_blkif *blkif;
-	struct pending_req *req, *n;
-	int i, j;
 
 	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
 
@@ -151,55 +150,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 
 	INIT_LIST_HEAD(&blkif->pending_free);
 	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
-
-	for (i = 0; i < XEN_BLKIF_REQS; i++) {
-		req = kzalloc(sizeof(*req), GFP_KERNEL);
-		if (!req)
-			goto fail;
-		list_add_tail(&req->free_list,
-		              &blkif->pending_free);
-		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
-			req->segments[j] = kzalloc(sizeof(*req->segments[0]),
-			                           GFP_KERNEL);
-			if (!req->segments[j])
-				goto fail;
-		}
-		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
-			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
-			                                 GFP_KERNEL);
-			if (!req->indirect_pages[j])
-				goto fail;
-		}
-	}
 	spin_lock_init(&blkif->pending_free_lock);
 	init_waitqueue_head(&blkif->pending_free_wq);
 	init_waitqueue_head(&blkif->shutdown_wq);
 
 	return blkif;
-
-fail:
-	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
-		list_del(&req->free_list);
-		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
-			if (!req->segments[j])
-				break;
-			kfree(req->segments[j]);
-		}
-		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
-			if (!req->indirect_pages[j])
-				break;
-			kfree(req->indirect_pages[j]);
-		}
-		kfree(req);
-	}
-
-	kmem_cache_free(xen_blkif_cachep, blkif);
-
-	return ERR_PTR(-ENOMEM);
 }
 
-static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
-			 unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
+			 unsigned int nr_grefs, unsigned int evtchn)
 {
 	int err;
 
@@ -207,7 +166,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
 	if (blkif->irq)
 		return 0;
 
-	err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
+	err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
 				     &blkif->blk_ring);
 	if (err < 0)
 		return err;
@@ -217,21 +176,21 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
 	{
 		struct blkif_sring *sring;
 		sring = (struct blkif_sring *)blkif->blk_ring;
-		BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+		BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs);
 		break;
 	}
 	case BLKIF_PROTOCOL_X86_32:
 	{
 		struct blkif_x86_32_sring *sring_x86_32;
 		sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
-		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs);
 		break;
 	}
 	case BLKIF_PROTOCOL_X86_64:
 	{
 		struct blkif_x86_64_sring *sring_x86_64;
 		sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
-		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs);
 		break;
 	}
 	default:
@@ -312,7 +271,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
 		i++;
 	}
 
-	WARN_ON(i != XEN_BLKIF_REQS);
+	WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
 
 	kmem_cache_free(xen_blkif_cachep, blkif);
 }
@@ -597,6 +556,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
 	if (err)
 		goto fail;
 
+	err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
+			    xen_blkif_max_ring_order);
+	if (err)
+		pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
+
 	err = xenbus_switch_state(dev, XenbusStateInitWait);
 	if (err)
 		goto fail;
@@ -860,22 +824,66 @@ again:
 static int connect_ring(struct backend_info *be)
 {
 	struct xenbus_device *dev = be->dev;
-	unsigned long ring_ref;
-	unsigned int evtchn;
+	unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
+	unsigned int evtchn, nr_grefs, ring_page_order;
 	unsigned int pers_grants;
 	char protocol[64] = "";
-	int err;
+	struct pending_req *req, *n;
+	int err, i, j;
 
 	pr_debug("%s %s\n", __func__, dev->otherend);
 
-	err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
-			    &ring_ref, "event-channel", "%u", &evtchn, NULL);
-	if (err) {
-		xenbus_dev_fatal(dev, err,
-				 "reading %s/ring-ref and event-channel",
+	err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+			  &evtchn);
+	if (err != 1) {
+		err = -EINVAL;
+		xenbus_dev_fatal(dev, err, "reading %s/event-channel",
 				 dev->otherend);
 		return err;
 	}
+	pr_info("event-channel %u\n", evtchn);
+
+	err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+			  &ring_page_order);
+	if (err != 1) {
+		err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+				  "%u", &ring_ref[0]);
+		if (err != 1) {
+			err = -EINVAL;
+			xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+					 dev->otherend);
+			return err;
+		}
+		nr_grefs = 1;
+		pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
+			ring_ref[0]);
+	} else {
+		unsigned int i;
+
+		if (ring_page_order > xen_blkif_max_ring_order) {
+			err = -EINVAL;
+			xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
+					 dev->otherend, ring_page_order,
+					 xen_blkif_max_ring_order);
+			return err;
+		}
+
+		nr_grefs = 1 << ring_page_order;
+		for (i = 0; i < nr_grefs; i++) {
+			char ring_ref_name[RINGREF_NAME_LEN];
+
+			snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+			err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
+					   "%u", &ring_ref[i]);
+			if (err != 1) {
+				err = -EINVAL;
+				xenbus_dev_fatal(dev, err, "reading %s/%s",
+						 dev->otherend, ring_ref_name);
+				return err;
+			}
+			pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
+		}
+	}
 
 	be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
 	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
@@ -900,20 +908,55 @@ static int connect_ring(struct backend_info *be)
 
 	be->blkif->vbd.feature_gnt_persistent = pers_grants;
 	be->blkif->vbd.overflow_max_grants = 0;
+	be->blkif->nr_ring_pages = nr_grefs;
 
-	pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
-		ring_ref, evtchn, be->blkif->blk_protocol, protocol,
+	pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
+		nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
 		pers_grants ? "persistent grants" : "");
 
+	for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
+		req = kzalloc(sizeof(*req), GFP_KERNEL);
+		if (!req)
+			goto fail;
+		list_add_tail(&req->free_list, &be->blkif->pending_free);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
+			if (!req->segments[j])
+				goto fail;
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+							 GFP_KERNEL);
+			if (!req->indirect_pages[j])
+				goto fail;
+		}
+	}
+
 	/* Map the shared frame, irq etc. */
-	err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+	err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
 	if (err) {
-		xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
-				 ring_ref, evtchn);
+		xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
 		return err;
 	}
 
 	return 0;
+
+fail:
+	list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
+		list_del(&req->free_list);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			if (!req->segments[j])
+				break;
+			kfree(req->segments[j]);
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			if (!req->indirect_pages[j])
+				break;
+			kfree(req->indirect_pages[j]);
+		}
+		kfree(req);
+	}
+	return -ENOMEM;
 }
 
 static const struct xenbus_device_id xen_blkbk_ids[] = {
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2c61cf8c6f61..fc770b7d3beb 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -98,7 +98,21 @@ static unsigned int xen_blkif_max_segments = 32;
 module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
 MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
 
-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+/*
+ * Maximum order of pages to be used for the shared ring between front and
+ * backend, 4KB page granularity is used.
+ */
+static unsigned int xen_blkif_max_ring_order;
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+
+#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages)
+#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES)
+/*
+ * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
+ * characters are enough. Define to 20 to keep consist with backend.
+ */
+#define RINGREF_NAME_LEN (20)
 
 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.  They
@@ -114,13 +128,14 @@ struct blkfront_info
 	int vdevice;
 	blkif_vdev_t handle;
 	enum blkif_state connected;
-	int ring_ref;
+	int ring_ref[XENBUS_MAX_RING_PAGES];
+	unsigned int nr_ring_pages;
 	struct blkif_front_ring ring;
 	unsigned int evtchn, irq;
 	struct request_queue *rq;
 	struct work_struct work;
 	struct gnttab_free_callback callback;
-	struct blk_shadow shadow[BLK_RING_SIZE];
+	struct blk_shadow shadow[BLK_MAX_RING_SIZE];
 	struct list_head grants;
 	struct list_head indirect_pages;
 	unsigned int persistent_gnts_c;
@@ -139,8 +154,6 @@ static unsigned int nr_minors;
 static unsigned long *minors;
 static DEFINE_SPINLOCK(minor_lock);
 
-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
-	(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
 #define GRANT_INVALID_REF	0
 
 #define PARTS_PER_DISK		16
@@ -170,7 +183,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info);
 static int get_id_from_freelist(struct blkfront_info *info)
 {
 	unsigned long free = info->shadow_free;
-	BUG_ON(free >= BLK_RING_SIZE);
+	BUG_ON(free >= BLK_RING_SIZE(info));
 	info->shadow_free = info->shadow[free].req.u.rw.id;
 	info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
 	return free;
@@ -983,7 +996,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 		}
 	}
 
-	for (i = 0; i < BLK_RING_SIZE; i++) {
+	for (i = 0; i < BLK_RING_SIZE(info); i++) {
 		/*
 		 * Clear persistent grants present in requests already
 		 * on the shared ring
@@ -1033,12 +1046,15 @@ free_shadow:
 	flush_work(&info->work);
 
 	/* Free resources associated with old device channel. */
-	if (info->ring_ref != GRANT_INVALID_REF) {
-		gnttab_end_foreign_access(info->ring_ref, 0,
-					  (unsigned long)info->ring.sring);
-		info->ring_ref = GRANT_INVALID_REF;
-		info->ring.sring = NULL;
+	for (i = 0; i < info->nr_ring_pages; i++) {
+		if (info->ring_ref[i] != GRANT_INVALID_REF) {
+			gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+			info->ring_ref[i] = GRANT_INVALID_REF;
+		}
 	}
+	free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+	info->ring.sring = NULL;
+
 	if (info->irq)
 		unbind_from_irqhandler(info->irq, info);
 	info->evtchn = info->irq = 0;
@@ -1157,7 +1173,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 		 * never have given to it (we stamp it up to BLK_RING_SIZE -
 		 * look in get_id_from_freelist.
 		 */
-		if (id >= BLK_RING_SIZE) {
+		if (id >= BLK_RING_SIZE(info)) {
 			WARN(1, "%s: response to %s has incorrect id (%ld)\n",
 			     info->gd->disk_name, op_name(bret->operation), id);
 			/* We can't safely get the 'struct request' as
@@ -1245,26 +1261,30 @@ static int setup_blkring(struct xenbus_device *dev,
 			 struct blkfront_info *info)
 {
 	struct blkif_sring *sring;
-	grant_ref_t gref;
-	int err;
+	int err, i;
+	unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE;
+	grant_ref_t gref[XENBUS_MAX_RING_PAGES];
 
-	info->ring_ref = GRANT_INVALID_REF;
+	for (i = 0; i < info->nr_ring_pages; i++)
+		info->ring_ref[i] = GRANT_INVALID_REF;
 
-	sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+	sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+						       get_order(ring_size));
 	if (!sring) {
 		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
 		return -ENOMEM;
 	}
 	SHARED_RING_INIT(sring);
-	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+	FRONT_RING_INIT(&info->ring, sring, ring_size);
 
-	err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref);
+	err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref);
 	if (err < 0) {
-		free_page((unsigned long)sring);
+		free_pages((unsigned long)sring, get_order(ring_size));
 		info->ring.sring = NULL;
 		goto fail;
 	}
-	info->ring_ref = gref;
+	for (i = 0; i < info->nr_ring_pages; i++)
+		info->ring_ref[i] = gref[i];
 
 	err = xenbus_alloc_evtchn(dev, &info->evtchn);
 	if (err)
@@ -1292,7 +1312,18 @@ static int talk_to_blkback(struct xenbus_device *dev,
 {
 	const char *message = NULL;
 	struct xenbus_transaction xbt;
-	int err;
+	int err, i;
+	unsigned int max_page_order = 0;
+	unsigned int ring_page_order = 0;
+
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "max-ring-page-order", "%u", &max_page_order);
+	if (err != 1)
+		info->nr_ring_pages = 1;
+	else {
+		ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
+		info->nr_ring_pages = 1 << ring_page_order;
+	}
 
 	/* Create shared ring, alloc event channel. */
 	err = setup_blkring(dev, info);
@@ -1306,11 +1337,32 @@ again:
 		goto destroy_blkring;
 	}
 
-	err = xenbus_printf(xbt, dev->nodename,
-			    "ring-ref", "%u", info->ring_ref);
-	if (err) {
-		message = "writing ring-ref";
-		goto abort_transaction;
+	if (info->nr_ring_pages == 1) {
+		err = xenbus_printf(xbt, dev->nodename,
+				    "ring-ref", "%u", info->ring_ref[0]);
+		if (err) {
+			message = "writing ring-ref";
+			goto abort_transaction;
+		}
+	} else {
+		err = xenbus_printf(xbt, dev->nodename,
+				    "ring-page-order", "%u", ring_page_order);
+		if (err) {
+			message = "writing ring-page-order";
+			goto abort_transaction;
+		}
+
+		for (i = 0; i < info->nr_ring_pages; i++) {
+			char ring_ref_name[RINGREF_NAME_LEN];
+
+			snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+			err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
+					    "%u", info->ring_ref[i]);
+			if (err) {
+				message = "writing ring-ref";
+				goto abort_transaction;
+			}
+		}
 	}
 	err = xenbus_printf(xbt, dev->nodename,
 			    "event-channel", "%u", info->evtchn);
@@ -1338,6 +1390,9 @@ again:
 		goto destroy_blkring;
 	}
 
+	for (i = 0; i < BLK_RING_SIZE(info); i++)
+		info->shadow[i].req.u.rw.id = i+1;
+	info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
 	xenbus_switch_state(dev, XenbusStateInitialised);
 
 	return 0;
@@ -1361,7 +1416,7 @@ again:
 static int blkfront_probe(struct xenbus_device *dev,
 			  const struct xenbus_device_id *id)
 {
-	int err, vdevice, i;
+	int err, vdevice;
 	struct blkfront_info *info;
 
 	/* FIXME: Use dynamic device id if this is not set. */
@@ -1422,21 +1477,10 @@ static int blkfront_probe(struct xenbus_device *dev,
 	info->connected = BLKIF_STATE_DISCONNECTED;
 	INIT_WORK(&info->work, blkif_restart_queue);
 
-	for (i = 0; i < BLK_RING_SIZE; i++)
-		info->shadow[i].req.u.rw.id = i+1;
-	info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
-
 	/* Front end dir is a number, which is used as the id. */
 	info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
 	dev_set_drvdata(&dev->dev, info);
 
-	err = talk_to_blkback(dev, info);
-	if (err) {
-		kfree(info);
-		dev_set_drvdata(&dev->dev, NULL);
-		return err;
-	}
-
 	return 0;
 }
 
@@ -1476,10 +1520,10 @@ static int blkif_recover(struct blkfront_info *info)
 
 	/* Stage 2: Set up free list. */
 	memset(&info->shadow, 0, sizeof(info->shadow));
-	for (i = 0; i < BLK_RING_SIZE; i++)
+	for (i = 0; i < BLK_RING_SIZE(info); i++)
 		info->shadow[i].req.u.rw.id = i+1;
 	info->shadow_free = info->ring.req_prod_pvt;
-	info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+	info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
 
 	rc = blkfront_setup_indirect(info);
 	if (rc) {
@@ -1491,7 +1535,7 @@ static int blkif_recover(struct blkfront_info *info)
 	blk_queue_max_segments(info->rq, segs);
 	bio_list_init(&bio_list);
 	INIT_LIST_HEAD(&requests);
-	for (i = 0; i < BLK_RING_SIZE; i++) {
+	for (i = 0; i < BLK_RING_SIZE(info); i++) {
 		/* Not in use? */
 		if (!copy[i].request)
 			continue;
@@ -1697,7 +1741,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
 		segs = info->max_indirect_segments;
 	}
 
-	err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
+	err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info));
 	if (err)
 		goto out_of_memory;
 
@@ -1707,7 +1751,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
 		 * grants, we need to allocate a set of pages that can be
 		 * used for mapping indirect grefs
 		 */
-		int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE;
+		int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info);
 
 		BUG_ON(!list_empty(&info->indirect_pages));
 		for (i = 0; i < num; i++) {
@@ -1718,7 +1762,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
 		}
 	}
 
-	for (i = 0; i < BLK_RING_SIZE; i++) {
+	for (i = 0; i < BLK_RING_SIZE(info); i++) {
 		info->shadow[i].grants_used = kzalloc(
 			sizeof(info->shadow[i].grants_used[0]) * segs,
 			GFP_NOIO);
@@ -1740,7 +1784,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
 	return 0;
 
 out_of_memory:
-	for (i = 0; i < BLK_RING_SIZE; i++) {
+	for (i = 0; i < BLK_RING_SIZE(info); i++) {
 		kfree(info->shadow[i].grants_used);
 		info->shadow[i].grants_used = NULL;
 		kfree(info->shadow[i].sg);
@@ -1906,8 +1950,15 @@ static void blkback_changed(struct xenbus_device *dev,
 	dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
 
 	switch (backend_state) {
-	case XenbusStateInitialising:
 	case XenbusStateInitWait:
+		if (dev->state != XenbusStateInitialising)
+			break;
+		if (talk_to_blkback(dev, info)) {
+			kfree(info);
+			dev_set_drvdata(&dev->dev, NULL);
+			break;
+		}
+	case XenbusStateInitialising:
 	case XenbusStateInitialised:
 	case XenbusStateReconfiguring:
 	case XenbusStateReconfigured:
@@ -2091,6 +2142,12 @@ static int __init xlblk_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
+	if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
+		pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
+			xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
+		xen_blkif_max_ring_order = 0;
+	}
+
 	if (!xen_has_pv_disk_devices())
 		return -ENODEV;
 
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index bda2cb06dc7a..88d474b78076 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -162,6 +162,17 @@ config MX3_IPU_IRQS
 	  To avoid bloating the irq_desc[] array we allocate a sufficient
 	  number of IRQ slots and map them dynamically to specific sources.
 
+config PXA_DMA
+	bool "PXA DMA support"
+	depends on (ARCH_MMP || ARCH_PXA)
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the DMA engine for PXA. It is also compatible with MMP PDMA
+	  platform. The internal DMA IP of all PXA variants is supported, with
+	  16 to 32 channels for peripheral to memory or memory to memory
+	  transfers.
+
 config TXX9_DMAC
 	tristate "Toshiba TXx9 SoC DMA support"
 	depends on MACH_TX49XX || MACH_TX39XX
@@ -245,6 +256,9 @@ config TI_EDMA
 	  Enable support for the TI EDMA controller. This DMA
 	  engine is found on TI DaVinci and AM33xx parts.
 
+config TI_DMA_CROSSBAR
+	bool
+
 config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 	bool
 
@@ -330,6 +344,7 @@ config DMA_OMAP
 	depends on ARCH_OMAP
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
+	select TI_DMA_CROSSBAR if SOC_DRA7XX
 
 config DMA_BCM2835
 	tristate "BCM2835 DMA engine support"
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 69f77d5ba53b..6a4d6f2827da 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
 obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_MXS_DMA) += mxs-dma.o
+obj-$(CONFIG_PXA_DMA) += pxa_dma.o
 obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_TI_EDMA) += edma.o
@@ -38,6 +39,7 @@ obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
 obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
 obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
+obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o
 obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
 obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
 obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 49d396ec06e5..5de3cf453f35 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -474,7 +474,7 @@ static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
 	u32 val = readl(ch->reg_config);
 
 	val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK |
-	         PL080_CONFIG_TC_IRQ_MASK);
+		 PL080_CONFIG_TC_IRQ_MASK);
 
 	writel(val, ch->reg_config);
 
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 57b2141ddddc..59892126d175 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -247,6 +247,10 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first)
 	channel_writel(atchan, CTRLA, 0);
 	channel_writel(atchan, CTRLB, 0);
 	channel_writel(atchan, DSCR, first->txd.phys);
+	channel_writel(atchan, SPIP, ATC_SPIP_HOLE(first->src_hole) |
+		       ATC_SPIP_BOUNDARY(first->boundary));
+	channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) |
+		       ATC_DPIP_BOUNDARY(first->boundary));
 	dma_writel(atdma, CHER, atchan->mask);
 
 	vdbg_dump_regs(atchan);
@@ -635,6 +639,104 @@ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx)
 }
 
 /**
+ * atc_prep_dma_interleaved - prepare memory to memory interleaved operation
+ * @chan: the channel to prepare operation on
+ * @xt: Interleaved transfer template
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_interleaved(struct dma_chan *chan,
+			 struct dma_interleaved_template *xt,
+			 unsigned long flags)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct data_chunk	*first = xt->sgl;
+	struct at_desc		*desc = NULL;
+	size_t			xfer_count;
+	unsigned int		dwidth;
+	u32			ctrla;
+	u32			ctrlb;
+	size_t			len = 0;
+	int			i;
+
+	dev_info(chan2dev(chan),
+		 "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n",
+		__func__, xt->src_start, xt->dst_start, xt->numf,
+		xt->frame_size, flags);
+
+	if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
+		return NULL;
+
+	/*
+	 * The controller can only "skip" X bytes every Y bytes, so we
+	 * need to make sure we are given a template that fit that
+	 * description, ie a template with chunks that always have the
+	 * same size, with the same ICGs.
+	 */
+	for (i = 0; i < xt->frame_size; i++) {
+		struct data_chunk *chunk = xt->sgl + i;
+
+		if ((chunk->size != xt->sgl->size) ||
+		    (dmaengine_get_dst_icg(xt, chunk) != dmaengine_get_dst_icg(xt, first)) ||
+		    (dmaengine_get_src_icg(xt, chunk) != dmaengine_get_src_icg(xt, first))) {
+			dev_err(chan2dev(chan),
+				"%s: the controller can transfer only identical chunks\n",
+				__func__);
+			return NULL;
+		}
+
+		len += chunk->size;
+	}
+
+	dwidth = atc_get_xfer_width(xt->src_start,
+				    xt->dst_start, len);
+
+	xfer_count = len >> dwidth;
+	if (xfer_count > ATC_BTSIZE_MAX) {
+		dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__);
+		return NULL;
+	}
+
+	ctrla = ATC_SRC_WIDTH(dwidth) |
+		ATC_DST_WIDTH(dwidth);
+
+	ctrlb =   ATC_DEFAULT_CTRLB | ATC_IEN
+		| ATC_SRC_ADDR_MODE_INCR
+		| ATC_DST_ADDR_MODE_INCR
+		| ATC_SRC_PIP
+		| ATC_DST_PIP
+		| ATC_FC_MEM2MEM;
+
+	/* create the transfer */
+	desc = atc_desc_get(atchan);
+	if (!desc) {
+		dev_err(chan2dev(chan),
+			"%s: couldn't allocate our descriptor\n", __func__);
+		return NULL;
+	}
+
+	desc->lli.saddr = xt->src_start;
+	desc->lli.daddr = xt->dst_start;
+	desc->lli.ctrla = ctrla | xfer_count;
+	desc->lli.ctrlb = ctrlb;
+
+	desc->boundary = first->size >> dwidth;
+	desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1;
+	desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1;
+
+	desc->txd.cookie = -EBUSY;
+	desc->total_len = desc->len = len;
+	desc->tx_width = dwidth;
+
+	/* set end-of-link to the last link descriptor of list*/
+	set_desc_eol(desc);
+
+	desc->txd.flags = flags; /* client is in control of this ack */
+
+	return &desc->txd;
+}
+
+/**
  * atc_prep_dma_memcpy - prepare a memcpy operation
  * @chan: the channel to prepare operation on
  * @dest: operation virtual destination address
@@ -1609,6 +1711,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	/* setup platform data for each SoC */
 	dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask);
 	dma_cap_set(DMA_SG, at91sam9rl_config.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask);
 	dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask);
 	dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask);
 	dma_cap_set(DMA_SG, at91sam9g45_config.cap_mask);
@@ -1713,6 +1816,9 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	atdma->dma_common.dev = &pdev->dev;
 
 	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_common.cap_mask))
+		atdma->dma_common.device_prep_interleaved_dma = atc_prep_dma_interleaved;
+
 	if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask))
 		atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy;
 
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
index 2727ca560572..bc8d5ebedd19 100644
--- a/drivers/dma/at_hdmac_regs.h
+++ b/drivers/dma/at_hdmac_regs.h
@@ -196,6 +196,11 @@ struct at_desc {
 	size_t				len;
 	u32				tx_width;
 	size_t				total_len;
+
+	/* Interleaved data */
+	size_t				boundary;
+	size_t				dst_hole;
+	size_t				src_hole;
 };
 
 static inline struct at_desc *
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 7992164ea9ec..cf1213de7865 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -235,6 +235,10 @@ struct at_xdmac_lld {
 	dma_addr_t	mbr_sa;		/* Source Address Member */
 	dma_addr_t	mbr_da;		/* Destination Address Member */
 	u32		mbr_cfg;	/* Configuration Register */
+	u32		mbr_bc;		/* Block Control Register */
+	u32		mbr_ds;		/* Data Stride Register */
+	u32		mbr_sus;	/* Source Microblock Stride Register */
+	u32		mbr_dus;	/* Destination Microblock Stride Register */
 };
 
 
@@ -358,6 +362,8 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
 	if (at_xdmac_chan_is_cyclic(atchan)) {
 		reg = AT_XDMAC_CNDC_NDVIEW_NDV1;
 		at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg);
+	} else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) {
+		reg = AT_XDMAC_CNDC_NDVIEW_NDV3;
 	} else {
 		/*
 		 * No need to write AT_XDMAC_CC reg, it will be done when the
@@ -465,6 +471,33 @@ static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan)
 	return desc;
 }
 
+static void at_xdmac_queue_desc(struct dma_chan *chan,
+				struct at_xdmac_desc *prev,
+				struct at_xdmac_desc *desc)
+{
+	if (!prev || !desc)
+		return;
+
+	prev->lld.mbr_nda = desc->tx_dma_desc.phys;
+	prev->lld.mbr_ubc |= AT_XDMAC_MBR_UBC_NDE;
+
+	dev_dbg(chan2dev(chan),	"%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+		__func__, prev, &prev->lld.mbr_nda);
+}
+
+static inline void at_xdmac_increment_block_count(struct dma_chan *chan,
+						  struct at_xdmac_desc *desc)
+{
+	if (!desc)
+		return;
+
+	desc->lld.mbr_bc++;
+
+	dev_dbg(chan2dev(chan),
+		"%s: incrementing the block count of the desc 0x%p\n",
+		__func__, desc);
+}
+
 static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec,
 				       struct of_dma *of_dma)
 {
@@ -656,19 +689,14 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2			/* next descriptor view */
 			| AT_XDMAC_MBR_UBC_NDEN					/* next descriptor dst parameter update */
 			| AT_XDMAC_MBR_UBC_NSEN					/* next descriptor src parameter update */
-			| (i == sg_len - 1 ? 0 : AT_XDMAC_MBR_UBC_NDE)		/* descriptor fetch */
 			| (len >> fixed_dwidth);				/* microblock length */
 		dev_dbg(chan2dev(chan),
 			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
 			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
 
 		/* Chain lld. */
-		if (prev) {
-			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
-			dev_dbg(chan2dev(chan),
-				 "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
-				 __func__, prev, &prev->lld.mbr_nda);
-		}
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
 
 		prev = desc;
 		if (!first)
@@ -748,7 +776,6 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
 		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1
 			| AT_XDMAC_MBR_UBC_NDEN
 			| AT_XDMAC_MBR_UBC_NSEN
-			| AT_XDMAC_MBR_UBC_NDE
 			| period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg);
 
 		dev_dbg(chan2dev(chan),
@@ -756,12 +783,8 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
 			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
 
 		/* Chain lld. */
-		if (prev) {
-			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
-			dev_dbg(chan2dev(chan),
-				 "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
-				 __func__, prev, &prev->lld.mbr_nda);
-		}
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
 
 		prev = desc;
 		if (!first)
@@ -783,6 +806,215 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
 	return &first->tx_dma_desc;
 }
 
+static inline u32 at_xdmac_align_width(struct dma_chan *chan, dma_addr_t addr)
+{
+	u32 width;
+
+	/*
+	 * Check address alignment to select the greater data width we
+	 * can use.
+	 *
+	 * Some XDMAC implementations don't provide dword transfer, in
+	 * this case selecting dword has the same behavior as
+	 * selecting word transfers.
+	 */
+	if (!(addr & 7)) {
+		width = AT_XDMAC_CC_DWIDTH_DWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
+	} else if (!(addr & 3)) {
+		width = AT_XDMAC_CC_DWIDTH_WORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
+	} else if (!(addr & 1)) {
+		width = AT_XDMAC_CC_DWIDTH_HALFWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
+	} else {
+		width = AT_XDMAC_CC_DWIDTH_BYTE;
+		dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
+	}
+
+	return width;
+}
+
+static struct at_xdmac_desc *
+at_xdmac_interleaved_queue_desc(struct dma_chan *chan,
+				struct at_xdmac_chan *atchan,
+				struct at_xdmac_desc *prev,
+				dma_addr_t src, dma_addr_t dst,
+				struct dma_interleaved_template *xt,
+				struct data_chunk *chunk)
+{
+	struct at_xdmac_desc	*desc;
+	u32			dwidth;
+	unsigned long		flags;
+	size_t			ublen;
+	/*
+	 * WARNING: The channel configuration is set here since there is no
+	 * dmaengine_slave_config call in this case. Moreover we don't know the
+	 * direction, it involves we can't dynamically set the source and dest
+	 * interface so we have to use the same one. Only interface 0 allows EBI
+	 * access. Hopefully we can access DDR through both ports (at least on
+	 * SAMA5D4x), so we can use the same interface for source and dest,
+	 * that solves the fact we don't know the direction.
+	 */
+	u32			chan_cc = AT_XDMAC_CC_DIF(0)
+					| AT_XDMAC_CC_SIF(0)
+					| AT_XDMAC_CC_MBSIZE_SIXTEEN
+					| AT_XDMAC_CC_TYPE_MEM_TRAN;
+
+	dwidth = at_xdmac_align_width(chan, src | dst | chunk->size);
+	if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
+		dev_dbg(chan2dev(chan),
+			"%s: chunk too big (%d, max size %lu)...\n",
+			__func__, chunk->size,
+			AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth);
+		return NULL;
+	}
+
+	if (prev)
+		dev_dbg(chan2dev(chan),
+			"Adding items at the end of desc 0x%p\n", prev);
+
+	if (xt->src_inc) {
+		if (xt->src_sgl)
+			chan_cc |=  AT_XDMAC_CC_SAM_UBS_DS_AM;
+		else
+			chan_cc |=  AT_XDMAC_CC_SAM_INCREMENTED_AM;
+	}
+
+	if (xt->dst_inc) {
+		if (xt->dst_sgl)
+			chan_cc |=  AT_XDMAC_CC_DAM_UBS_DS_AM;
+		else
+			chan_cc |=  AT_XDMAC_CC_DAM_INCREMENTED_AM;
+	}
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	desc = at_xdmac_get_desc(atchan);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+	if (!desc) {
+		dev_err(chan2dev(chan), "can't get descriptor\n");
+		return NULL;
+	}
+
+	chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+	ublen = chunk->size >> dwidth;
+
+	desc->lld.mbr_sa = src;
+	desc->lld.mbr_da = dst;
+	desc->lld.mbr_sus = dmaengine_get_src_icg(xt, chunk);
+	desc->lld.mbr_dus = dmaengine_get_dst_icg(xt, chunk);
+
+	desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
+		| AT_XDMAC_MBR_UBC_NDEN
+		| AT_XDMAC_MBR_UBC_NSEN
+		| ublen;
+	desc->lld.mbr_cfg = chan_cc;
+
+	dev_dbg(chan2dev(chan),
+		"%s: lld: mbr_sa=0x%08x, mbr_da=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+		__func__, desc->lld.mbr_sa, desc->lld.mbr_da,
+		desc->lld.mbr_ubc, desc->lld.mbr_cfg);
+
+	/* Chain lld. */
+	if (prev)
+		at_xdmac_queue_desc(chan, prev, desc);
+
+	return desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_interleaved(struct dma_chan *chan,
+			  struct dma_interleaved_template *xt,
+			  unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*prev = NULL, *first = NULL;
+	struct data_chunk	*chunk, *prev_chunk = NULL;
+	dma_addr_t		dst_addr, src_addr;
+	size_t			dst_skip, src_skip, len = 0;
+	size_t			prev_dst_icg = 0, prev_src_icg = 0;
+	int			i;
+
+	if (!xt || (xt->numf != 1) || (xt->dir != DMA_MEM_TO_MEM))
+		return NULL;
+
+	dev_dbg(chan2dev(chan), "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n",
+		__func__, xt->src_start, xt->dst_start,	xt->numf,
+		xt->frame_size, flags);
+
+	src_addr = xt->src_start;
+	dst_addr = xt->dst_start;
+
+	for (i = 0; i < xt->frame_size; i++) {
+		struct at_xdmac_desc *desc;
+		size_t src_icg, dst_icg;
+
+		chunk = xt->sgl + i;
+
+		dst_icg = dmaengine_get_dst_icg(xt, chunk);
+		src_icg = dmaengine_get_src_icg(xt, chunk);
+
+		src_skip = chunk->size + src_icg;
+		dst_skip = chunk->size + dst_icg;
+
+		dev_dbg(chan2dev(chan),
+			"%s: chunk size=%d, src icg=%d, dst icg=%d\n",
+			__func__, chunk->size, src_icg, dst_icg);
+
+		/*
+		 * Handle the case where we just have the same
+		 * transfer to setup, we can just increase the
+		 * block number and reuse the same descriptor.
+		 */
+		if (prev_chunk && prev &&
+		    (prev_chunk->size == chunk->size) &&
+		    (prev_src_icg == src_icg) &&
+		    (prev_dst_icg == dst_icg)) {
+			dev_dbg(chan2dev(chan),
+				"%s: same configuration that the previous chunk, merging the descriptors...\n",
+				__func__);
+			at_xdmac_increment_block_count(chan, prev);
+			continue;
+		}
+
+		desc = at_xdmac_interleaved_queue_desc(chan, atchan,
+						       prev,
+						       src_addr, dst_addr,
+						       xt, chunk);
+		if (!desc) {
+			list_splice_init(&first->descs_list,
+					 &atchan->free_descs_list);
+			return NULL;
+		}
+
+		if (!first)
+			first = desc;
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			__func__, desc, first);
+		list_add_tail(&desc->desc_node, &first->descs_list);
+
+		if (xt->src_sgl)
+			src_addr += src_skip;
+
+		if (xt->dst_sgl)
+			dst_addr += dst_skip;
+
+		len += chunk->size;
+		prev_chunk = chunk;
+		prev_dst_icg = dst_icg;
+		prev_src_icg = src_icg;
+		prev = desc;
+	}
+
+	first->tx_dma_desc.cookie = -EBUSY;
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = len;
+
+	return &first->tx_dma_desc;
+}
+
 static struct dma_async_tx_descriptor *
 at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 			 size_t len, unsigned long flags)
@@ -814,24 +1046,7 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	if (unlikely(!len))
 		return NULL;
 
-	/*
-	 * Check address alignment to select the greater data width we can use.
-	 * Some XDMAC implementations don't provide dword transfer, in this
-	 * case selecting dword has the same behavior as selecting word transfers.
-	 */
-	if (!((src_addr | dst_addr) & 7)) {
-		dwidth = AT_XDMAC_CC_DWIDTH_DWORD;
-		dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
-	} else if (!((src_addr | dst_addr)  & 3)) {
-		dwidth = AT_XDMAC_CC_DWIDTH_WORD;
-		dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
-	} else if (!((src_addr | dst_addr) & 1)) {
-		dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD;
-		dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
-	} else {
-		dwidth = AT_XDMAC_CC_DWIDTH_BYTE;
-		dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
-	}
+	dwidth = at_xdmac_align_width(chan, src_addr | dst_addr);
 
 	/* Prepare descriptors. */
 	while (remaining_size) {
@@ -861,19 +1076,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size);
 
 		/* Check remaining length and change data width if needed. */
-		if (!((src_addr | dst_addr | xfer_size) & 7)) {
-			dwidth = AT_XDMAC_CC_DWIDTH_DWORD;
-			dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
-		} else if (!((src_addr | dst_addr | xfer_size)  & 3)) {
-			dwidth = AT_XDMAC_CC_DWIDTH_WORD;
-			dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
-		} else if (!((src_addr | dst_addr | xfer_size) & 1)) {
-			dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD;
-			dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
-		} else if ((src_addr | dst_addr | xfer_size) & 1) {
-			dwidth = AT_XDMAC_CC_DWIDTH_BYTE;
-			dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
-		}
+		dwidth = at_xdmac_align_width(chan,
+					      src_addr | dst_addr | xfer_size);
 		chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
 
 		ublen = xfer_size >> dwidth;
@@ -884,7 +1088,6 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2
 			| AT_XDMAC_MBR_UBC_NDEN
 			| AT_XDMAC_MBR_UBC_NSEN
-			| (remaining_size ? AT_XDMAC_MBR_UBC_NDE : 0)
 			| ublen;
 		desc->lld.mbr_cfg = chan_cc;
 
@@ -893,12 +1096,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg);
 
 		/* Chain lld. */
-		if (prev) {
-			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
-			dev_dbg(chan2dev(chan),
-				 "%s: chain lld: prev=0x%p, mbr_nda=0x%08x\n",
-				 __func__, prev, prev->lld.mbr_nda);
-		}
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
 
 		prev = desc;
 		if (!first)
@@ -915,6 +1114,93 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	return &first->tx_dma_desc;
 }
 
+static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan,
+							 struct at_xdmac_chan *atchan,
+							 dma_addr_t dst_addr,
+							 size_t len,
+							 int value)
+{
+	struct at_xdmac_desc	*desc;
+	unsigned long		flags;
+	size_t			ublen;
+	u32			dwidth;
+	/*
+	 * WARNING: The channel configuration is set here since there is no
+	 * dmaengine_slave_config call in this case. Moreover we don't know the
+	 * direction, it involves we can't dynamically set the source and dest
+	 * interface so we have to use the same one. Only interface 0 allows EBI
+	 * access. Hopefully we can access DDR through both ports (at least on
+	 * SAMA5D4x), so we can use the same interface for source and dest,
+	 * that solves the fact we don't know the direction.
+	 */
+	u32			chan_cc = AT_XDMAC_CC_DAM_INCREMENTED_AM
+					| AT_XDMAC_CC_SAM_INCREMENTED_AM
+					| AT_XDMAC_CC_DIF(0)
+					| AT_XDMAC_CC_SIF(0)
+					| AT_XDMAC_CC_MBSIZE_SIXTEEN
+					| AT_XDMAC_CC_MEMSET_HW_MODE
+					| AT_XDMAC_CC_TYPE_MEM_TRAN;
+
+	dwidth = at_xdmac_align_width(chan, dst_addr);
+
+	if (len >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
+		dev_err(chan2dev(chan),
+			"%s: Transfer too large, aborting...\n",
+			__func__);
+		return NULL;
+	}
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	desc = at_xdmac_get_desc(atchan);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+	if (!desc) {
+		dev_err(chan2dev(chan), "can't get descriptor\n");
+		return NULL;
+	}
+
+	chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+	ublen = len >> dwidth;
+
+	desc->lld.mbr_da = dst_addr;
+	desc->lld.mbr_ds = value;
+	desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
+		| AT_XDMAC_MBR_UBC_NDEN
+		| AT_XDMAC_MBR_UBC_NSEN
+		| ublen;
+	desc->lld.mbr_cfg = chan_cc;
+
+	dev_dbg(chan2dev(chan),
+		"%s: lld: mbr_da=0x%08x, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+		__func__, desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc,
+		desc->lld.mbr_cfg);
+
+	return desc;
+}
+
+struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+			 size_t len, unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*desc;
+
+	dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n",
+		__func__, dest, len, value, flags);
+
+	if (unlikely(!len))
+		return NULL;
+
+	desc = at_xdmac_memset_create_desc(chan, atchan, dest, len, value);
+	list_add_tail(&desc->desc_node, &desc->descs_list);
+
+	desc->tx_dma_desc.cookie = -EBUSY;
+	desc->tx_dma_desc.flags = flags;
+	desc->xfer_size = len;
+
+	return &desc->tx_dma_desc;
+}
+
 static enum dma_status
 at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 		struct dma_tx_state *txstate)
@@ -1445,7 +1731,9 @@ static int at_xdmac_probe(struct platform_device *pdev)
 	}
 
 	dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask);
 	dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask);
 	dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask);
 	/*
 	 * Without DMA_PRIVATE the driver is not able to allocate more than
@@ -1458,7 +1746,9 @@ static int at_xdmac_probe(struct platform_device *pdev)
 	atxdmac->dma.device_tx_status			= at_xdmac_tx_status;
 	atxdmac->dma.device_issue_pending		= at_xdmac_issue_pending;
 	atxdmac->dma.device_prep_dma_cyclic		= at_xdmac_prep_dma_cyclic;
+	atxdmac->dma.device_prep_interleaved_dma	= at_xdmac_prep_interleaved;
 	atxdmac->dma.device_prep_dma_memcpy		= at_xdmac_prep_dma_memcpy;
+	atxdmac->dma.device_prep_dma_memset		= at_xdmac_prep_dma_memset;
 	atxdmac->dma.device_prep_slave_sg		= at_xdmac_prep_slave_sg;
 	atxdmac->dma.device_config			= at_xdmac_device_config;
 	atxdmac->dma.device_pause			= at_xdmac_device_pause;
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 3ddfd1f6c23c..4a4cce15f25d 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -267,6 +267,13 @@ static void dma_chan_put(struct dma_chan *chan)
 	/* This channel is not in use anymore, free it */
 	if (!chan->client_count && chan->device->device_free_chan_resources)
 		chan->device->device_free_chan_resources(chan);
+
+	/* If the channel is used via a DMA request router, free the mapping */
+	if (chan->router && chan->router->route_free) {
+		chan->router->route_free(chan->router->dev, chan->route_data);
+		chan->router = NULL;
+		chan->route_data = NULL;
+	}
 }
 
 enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
@@ -536,7 +543,7 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask,
 }
 
 /**
- * dma_request_slave_channel - try to get specific channel exclusively
+ * dma_get_slave_channel - try to get specific channel exclusively
  * @chan: target channel
  */
 struct dma_chan *dma_get_slave_channel(struct dma_chan *chan)
@@ -648,7 +655,7 @@ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
 EXPORT_SYMBOL_GPL(__dma_request_channel);
 
 /**
- * dma_request_slave_channel - try to allocate an exclusive slave channel
+ * dma_request_slave_channel_reason - try to allocate an exclusive slave channel
  * @dev:	pointer to client device structure
  * @name:	slave channel name
  *
@@ -836,6 +843,8 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_pq);
 	BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
 		!device->device_prep_dma_pq_val);
+	BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
+		!device->device_prep_dma_memset);
 	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
 		!device->device_prep_dma_interrupt);
 	BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index 24e5290faa32..57ff46284f15 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -1364,7 +1364,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static struct platform_device_id ep93xx_dma_driver_ids[] = {
+static const struct platform_device_id ep93xx_dma_driver_ids[] = {
 	{ "ep93xx-dma-m2p", 0 },
 	{ "ep93xx-dma-m2m", 1 },
 	{ },
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index 09e2842d15ec..915eec3cc279 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -881,10 +881,6 @@ static int fsl_edma_probe(struct platform_device *pdev)
 
 	}
 
-	ret = fsl_edma_irq_init(pdev, fsl_edma);
-	if (ret)
-		return ret;
-
 	fsl_edma->big_endian = of_property_read_bool(np, "big-endian");
 
 	INIT_LIST_HEAD(&fsl_edma->dma_dev.channels);
@@ -900,6 +896,11 @@ static int fsl_edma_probe(struct platform_device *pdev)
 		fsl_edma_chan_mux(fsl_chan, 0, false);
 	}
 
+	edma_writel(fsl_edma, ~0, fsl_edma->membase + EDMA_INTR);
+	ret = fsl_edma_irq_init(pdev, fsl_edma);
+	if (ret)
+		return ret;
+
 	dma_cap_set(DMA_PRIVATE, fsl_edma->dma_dev.cap_mask);
 	dma_cap_set(DMA_SLAVE, fsl_edma->dma_dev.cap_mask);
 	dma_cap_set(DMA_CYCLIC, fsl_edma->dma_dev.cap_mask);
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index eed405976ea9..865501fcc67d 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -193,7 +193,7 @@ struct imxdma_filter_data {
 	int			 request;
 };
 
-static struct platform_device_id imx_dma_devtype[] = {
+static const struct platform_device_id imx_dma_devtype[] = {
 	{
 		.name = "imx1-dma",
 		.driver_data = IMX1_DMA,
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 62bbd79338e0..77b6aab04f47 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -420,7 +420,7 @@ static struct sdma_driver_data sdma_imx6q = {
 	.script_addrs = &sdma_script_imx6q,
 };
 
-static struct platform_device_id sdma_devtypes[] = {
+static const struct platform_device_id sdma_devtypes[] = {
 	{
 		.name = "imx25-sdma",
 		.driver_data = (unsigned long)&sdma_imx25,
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 1c56001df676..fbaf1ead2597 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -19,6 +19,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/memory.h>
 #include <linux/clk.h>
@@ -30,6 +31,11 @@
 #include "dmaengine.h"
 #include "mv_xor.h"
 
+enum mv_xor_mode {
+	XOR_MODE_IN_REG,
+	XOR_MODE_IN_DESC,
+};
+
 static void mv_xor_issue_pending(struct dma_chan *chan);
 
 #define to_mv_xor_chan(chan)		\
@@ -56,18 +62,30 @@ static void mv_desc_init(struct mv_xor_desc_slot *desc,
 	hw_desc->byte_count = byte_count;
 }
 
-static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
-				  u32 next_desc_addr)
+static void mv_desc_set_mode(struct mv_xor_desc_slot *desc)
 {
 	struct mv_xor_desc *hw_desc = desc->hw_desc;
-	BUG_ON(hw_desc->phy_next_desc);
-	hw_desc->phy_next_desc = next_desc_addr;
+
+	switch (desc->type) {
+	case DMA_XOR:
+	case DMA_INTERRUPT:
+		hw_desc->desc_command |= XOR_DESC_OPERATION_XOR;
+		break;
+	case DMA_MEMCPY:
+		hw_desc->desc_command |= XOR_DESC_OPERATION_MEMCPY;
+		break;
+	default:
+		BUG();
+		return;
+	}
 }
 
-static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+				  u32 next_desc_addr)
 {
 	struct mv_xor_desc *hw_desc = desc->hw_desc;
-	hw_desc->phy_next_desc = 0;
+	BUG_ON(hw_desc->phy_next_desc);
+	hw_desc->phy_next_desc = next_desc_addr;
 }
 
 static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
@@ -104,7 +122,7 @@ static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
 	return intr_cause;
 }
 
-static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+static void mv_chan_clear_eoc_cause(struct mv_xor_chan *chan)
 {
 	u32 val;
 
@@ -114,14 +132,14 @@ static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
 	writel_relaxed(val, XOR_INTR_CAUSE(chan));
 }
 
-static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+static void mv_chan_clear_err_status(struct mv_xor_chan *chan)
 {
 	u32 val = 0xFFFF0000 >> (chan->idx * 16);
 	writel_relaxed(val, XOR_INTR_CAUSE(chan));
 }
 
-static void mv_set_mode(struct mv_xor_chan *chan,
-			       enum dma_transaction_type type)
+static void mv_chan_set_mode(struct mv_xor_chan *chan,
+			     enum dma_transaction_type type)
 {
 	u32 op_mode;
 	u32 config = readl_relaxed(XOR_CONFIG(chan));
@@ -144,6 +162,25 @@ static void mv_set_mode(struct mv_xor_chan *chan,
 	config &= ~0x7;
 	config |= op_mode;
 
+	if (IS_ENABLED(__BIG_ENDIAN))
+		config |= XOR_DESCRIPTOR_SWAP;
+	else
+		config &= ~XOR_DESCRIPTOR_SWAP;
+
+	writel_relaxed(config, XOR_CONFIG(chan));
+	chan->current_type = type;
+}
+
+static void mv_chan_set_mode_to_desc(struct mv_xor_chan *chan)
+{
+	u32 op_mode;
+	u32 config = readl_relaxed(XOR_CONFIG(chan));
+
+	op_mode = XOR_OPERATION_MODE_IN_DESC;
+
+	config &= ~0x7;
+	config |= op_mode;
+
 #if defined(__BIG_ENDIAN)
 	config |= XOR_DESCRIPTOR_SWAP;
 #else
@@ -151,7 +188,6 @@ static void mv_set_mode(struct mv_xor_chan *chan,
 #endif
 
 	writel_relaxed(config, XOR_CONFIG(chan));
-	chan->current_type = type;
 }
 
 static void mv_chan_activate(struct mv_xor_chan *chan)
@@ -171,28 +207,13 @@ static char mv_chan_is_busy(struct mv_xor_chan *chan)
 	return (state == 1) ? 1 : 0;
 }
 
-/**
- * mv_xor_free_slots - flags descriptor slots for reuse
- * @slot: Slot to free
- * Caller must hold &mv_chan->lock while calling this function
- */
-static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
-			      struct mv_xor_desc_slot *slot)
-{
-	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d slot %p\n",
-		__func__, __LINE__, slot);
-
-	slot->slot_used = 0;
-
-}
-
 /*
- * mv_xor_start_new_chain - program the engine to operate on new chain headed by
- * sw_desc
+ * mv_chan_start_new_chain - program the engine to operate on new
+ * chain headed by sw_desc
  * Caller must hold &mv_chan->lock while calling this function
  */
-static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
-				   struct mv_xor_desc_slot *sw_desc)
+static void mv_chan_start_new_chain(struct mv_xor_chan *mv_chan,
+				    struct mv_xor_desc_slot *sw_desc)
 {
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n",
 		__func__, __LINE__, sw_desc);
@@ -205,8 +226,9 @@ static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
 }
 
 static dma_cookie_t
-mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
-	struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+mv_desc_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+				struct mv_xor_chan *mv_chan,
+				dma_cookie_t cookie)
 {
 	BUG_ON(desc->async_tx.cookie < 0);
 
@@ -230,93 +252,110 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
 }
 
 static int
-mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+mv_chan_clean_completed_slots(struct mv_xor_chan *mv_chan)
 {
 	struct mv_xor_desc_slot *iter, *_iter;
 
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
 	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
-				 completed_node) {
+				 node) {
 
-		if (async_tx_test_ack(&iter->async_tx)) {
-			list_del(&iter->completed_node);
-			mv_xor_free_slots(mv_chan, iter);
-		}
+		if (async_tx_test_ack(&iter->async_tx))
+			list_move_tail(&iter->node, &mv_chan->free_slots);
 	}
 	return 0;
 }
 
 static int
-mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
-	struct mv_xor_chan *mv_chan)
+mv_desc_clean_slot(struct mv_xor_desc_slot *desc,
+		   struct mv_xor_chan *mv_chan)
 {
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags %d\n",
 		__func__, __LINE__, desc, desc->async_tx.flags);
-	list_del(&desc->chain_node);
+
 	/* the client is allowed to attach dependent operations
 	 * until 'ack' is set
 	 */
-	if (!async_tx_test_ack(&desc->async_tx)) {
+	if (!async_tx_test_ack(&desc->async_tx))
 		/* move this slot to the completed_slots */
-		list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
-		return 0;
-	}
+		list_move_tail(&desc->node, &mv_chan->completed_slots);
+	else
+		list_move_tail(&desc->node, &mv_chan->free_slots);
 
-	mv_xor_free_slots(mv_chan, desc);
 	return 0;
 }
 
 /* This function must be called with the mv_xor_chan spinlock held */
-static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+static void mv_chan_slot_cleanup(struct mv_xor_chan *mv_chan)
 {
 	struct mv_xor_desc_slot *iter, *_iter;
 	dma_cookie_t cookie = 0;
 	int busy = mv_chan_is_busy(mv_chan);
 	u32 current_desc = mv_chan_get_current_desc(mv_chan);
-	int seen_current = 0;
+	int current_cleaned = 0;
+	struct mv_xor_desc *hw_desc;
 
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
 	dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc);
-	mv_xor_clean_completed_slots(mv_chan);
+	mv_chan_clean_completed_slots(mv_chan);
 
 	/* free completed slots from the chain starting with
 	 * the oldest descriptor
 	 */
 
 	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
-					chain_node) {
-		prefetch(_iter);
-		prefetch(&_iter->async_tx);
+				 node) {
 
-		/* do not advance past the current descriptor loaded into the
-		 * hardware channel, subsequent descriptors are either in
-		 * process or have not been submitted
-		 */
-		if (seen_current)
-			break;
+		/* clean finished descriptors */
+		hw_desc = iter->hw_desc;
+		if (hw_desc->status & XOR_DESC_SUCCESS) {
+			cookie = mv_desc_run_tx_complete_actions(iter, mv_chan,
+								 cookie);
 
-		/* stop the search if we reach the current descriptor and the
-		 * channel is busy
-		 */
-		if (iter->async_tx.phys == current_desc) {
-			seen_current = 1;
-			if (busy)
+			/* done processing desc, clean slot */
+			mv_desc_clean_slot(iter, mv_chan);
+
+			/* break if we did cleaned the current */
+			if (iter->async_tx.phys == current_desc) {
+				current_cleaned = 1;
 				break;
+			}
+		} else {
+			if (iter->async_tx.phys == current_desc) {
+				current_cleaned = 0;
+				break;
+			}
 		}
-
-		cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
-
-		if (mv_xor_clean_slot(iter, mv_chan))
-			break;
 	}
 
 	if ((busy == 0) && !list_empty(&mv_chan->chain)) {
-		struct mv_xor_desc_slot *chain_head;
-		chain_head = list_entry(mv_chan->chain.next,
-					struct mv_xor_desc_slot,
-					chain_node);
-
-		mv_xor_start_new_chain(mv_chan, chain_head);
+		if (current_cleaned) {
+			/*
+			 * current descriptor cleaned and removed, run
+			 * from list head
+			 */
+			iter = list_entry(mv_chan->chain.next,
+					  struct mv_xor_desc_slot,
+					  node);
+			mv_chan_start_new_chain(mv_chan, iter);
+		} else {
+			if (!list_is_last(&iter->node, &mv_chan->chain)) {
+				/*
+				 * descriptors are still waiting after
+				 * current, trigger them
+				 */
+				iter = list_entry(iter->node.next,
+						  struct mv_xor_desc_slot,
+						  node);
+				mv_chan_start_new_chain(mv_chan, iter);
+			} else {
+				/*
+				 * some descriptors are still waiting
+				 * to be cleaned
+				 */
+				tasklet_schedule(&mv_chan->irq_tasklet);
+			}
+		}
 	}
 
 	if (cookie > 0)
@@ -328,56 +367,35 @@ static void mv_xor_tasklet(unsigned long data)
 	struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
 
 	spin_lock_bh(&chan->lock);
-	mv_xor_slot_cleanup(chan);
+	mv_chan_slot_cleanup(chan);
 	spin_unlock_bh(&chan->lock);
 }
 
 static struct mv_xor_desc_slot *
-mv_xor_alloc_slot(struct mv_xor_chan *mv_chan)
+mv_chan_alloc_slot(struct mv_xor_chan *mv_chan)
 {
-	struct mv_xor_desc_slot *iter, *_iter;
-	int retry = 0;
+	struct mv_xor_desc_slot *iter;
 
-	/* start search from the last allocated descrtiptor
-	 * if a contiguous allocation can not be found start searching
-	 * from the beginning of the list
-	 */
-retry:
-	if (retry == 0)
-		iter = mv_chan->last_used;
-	else
-		iter = list_entry(&mv_chan->all_slots,
-			struct mv_xor_desc_slot,
-			slot_node);
-
-	list_for_each_entry_safe_continue(
-		iter, _iter, &mv_chan->all_slots, slot_node) {
-
-		prefetch(_iter);
-		prefetch(&_iter->async_tx);
-		if (iter->slot_used) {
-			/* give up after finding the first busy slot
-			 * on the second pass through the list
-			 */
-			if (retry)
-				break;
-			continue;
-		}
+	spin_lock_bh(&mv_chan->lock);
+
+	if (!list_empty(&mv_chan->free_slots)) {
+		iter = list_first_entry(&mv_chan->free_slots,
+					struct mv_xor_desc_slot,
+					node);
+
+		list_move_tail(&iter->node, &mv_chan->allocated_slots);
+
+		spin_unlock_bh(&mv_chan->lock);
 
 		/* pre-ack descriptor */
 		async_tx_ack(&iter->async_tx);
-
-		iter->slot_used = 1;
-		INIT_LIST_HEAD(&iter->chain_node);
 		iter->async_tx.cookie = -EBUSY;
-		mv_chan->last_used = iter;
-		mv_desc_clear_next_desc(iter);
 
 		return iter;
 
 	}
-	if (!retry++)
-		goto retry;
+
+	spin_unlock_bh(&mv_chan->lock);
 
 	/* try to free some slots if the allocation fails */
 	tasklet_schedule(&mv_chan->irq_tasklet);
@@ -403,14 +421,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
 	cookie = dma_cookie_assign(tx);
 
 	if (list_empty(&mv_chan->chain))
-		list_add_tail(&sw_desc->chain_node, &mv_chan->chain);
+		list_move_tail(&sw_desc->node, &mv_chan->chain);
 	else {
 		new_hw_chain = 0;
 
 		old_chain_tail = list_entry(mv_chan->chain.prev,
 					    struct mv_xor_desc_slot,
-					    chain_node);
-		list_add_tail(&sw_desc->chain_node, &mv_chan->chain);
+					    node);
+		list_move_tail(&sw_desc->node, &mv_chan->chain);
 
 		dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n",
 			&old_chain_tail->async_tx.phys);
@@ -431,7 +449,7 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
 	}
 
 	if (new_hw_chain)
-		mv_xor_start_new_chain(mv_chan, sw_desc);
+		mv_chan_start_new_chain(mv_chan, sw_desc);
 
 	spin_unlock_bh(&mv_chan->lock);
 
@@ -463,26 +481,20 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
 
 		dma_async_tx_descriptor_init(&slot->async_tx, chan);
 		slot->async_tx.tx_submit = mv_xor_tx_submit;
-		INIT_LIST_HEAD(&slot->chain_node);
-		INIT_LIST_HEAD(&slot->slot_node);
+		INIT_LIST_HEAD(&slot->node);
 		dma_desc = mv_chan->dma_desc_pool;
 		slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE;
 		slot->idx = idx++;
 
 		spin_lock_bh(&mv_chan->lock);
 		mv_chan->slots_allocated = idx;
-		list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+		list_add_tail(&slot->node, &mv_chan->free_slots);
 		spin_unlock_bh(&mv_chan->lock);
 	}
 
-	if (mv_chan->slots_allocated && !mv_chan->last_used)
-		mv_chan->last_used = list_entry(mv_chan->all_slots.next,
-					struct mv_xor_desc_slot,
-					slot_node);
-
 	dev_dbg(mv_chan_to_devp(mv_chan),
-		"allocated %d descriptor slots last_used: %p\n",
-		mv_chan->slots_allocated, mv_chan->last_used);
+		"allocated %d descriptor slots\n",
+		mv_chan->slots_allocated);
 
 	return mv_chan->slots_allocated ? : -ENOMEM;
 }
@@ -503,16 +515,17 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
 		"%s src_cnt: %d len: %u dest %pad flags: %ld\n",
 		__func__, src_cnt, len, &dest, flags);
 
-	spin_lock_bh(&mv_chan->lock);
-	sw_desc = mv_xor_alloc_slot(mv_chan);
+	sw_desc = mv_chan_alloc_slot(mv_chan);
 	if (sw_desc) {
 		sw_desc->type = DMA_XOR;
 		sw_desc->async_tx.flags = flags;
 		mv_desc_init(sw_desc, dest, len, flags);
+		if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
+			mv_desc_set_mode(sw_desc);
 		while (src_cnt--)
 			mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]);
 	}
-	spin_unlock_bh(&mv_chan->lock);
+
 	dev_dbg(mv_chan_to_devp(mv_chan),
 		"%s sw_desc %p async_tx %p \n",
 		__func__, sw_desc, &sw_desc->async_tx);
@@ -556,25 +569,29 @@ static void mv_xor_free_chan_resources(struct dma_chan *chan)
 
 	spin_lock_bh(&mv_chan->lock);
 
-	mv_xor_slot_cleanup(mv_chan);
+	mv_chan_slot_cleanup(mv_chan);
 
 	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
-					chain_node) {
+					node) {
 		in_use_descs++;
-		list_del(&iter->chain_node);
+		list_move_tail(&iter->node, &mv_chan->free_slots);
 	}
 	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
-				 completed_node) {
+				 node) {
 		in_use_descs++;
-		list_del(&iter->completed_node);
+		list_move_tail(&iter->node, &mv_chan->free_slots);
+	}
+	list_for_each_entry_safe(iter, _iter, &mv_chan->allocated_slots,
+				 node) {
+		in_use_descs++;
+		list_move_tail(&iter->node, &mv_chan->free_slots);
 	}
 	list_for_each_entry_safe_reverse(
-		iter, _iter, &mv_chan->all_slots, slot_node) {
-		list_del(&iter->slot_node);
+		iter, _iter, &mv_chan->free_slots, node) {
+		list_del(&iter->node);
 		kfree(iter);
 		mv_chan->slots_allocated--;
 	}
-	mv_chan->last_used = NULL;
 
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n",
 		__func__, mv_chan->slots_allocated);
@@ -603,13 +620,13 @@ static enum dma_status mv_xor_status(struct dma_chan *chan,
 		return ret;
 
 	spin_lock_bh(&mv_chan->lock);
-	mv_xor_slot_cleanup(mv_chan);
+	mv_chan_slot_cleanup(mv_chan);
 	spin_unlock_bh(&mv_chan->lock);
 
 	return dma_cookie_status(chan, cookie, txstate);
 }
 
-static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+static void mv_chan_dump_regs(struct mv_xor_chan *chan)
 {
 	u32 val;
 
@@ -632,8 +649,8 @@ static void mv_dump_xor_regs(struct mv_xor_chan *chan)
 	dev_err(mv_chan_to_devp(chan), "error addr   0x%08x\n", val);
 }
 
-static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
-					 u32 intr_cause)
+static void mv_chan_err_interrupt_handler(struct mv_xor_chan *chan,
+					  u32 intr_cause)
 {
 	if (intr_cause & XOR_INT_ERR_DECODE) {
 		dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n");
@@ -643,7 +660,7 @@ static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
 	dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n",
 		chan->idx, intr_cause);
 
-	mv_dump_xor_regs(chan);
+	mv_chan_dump_regs(chan);
 	WARN_ON(1);
 }
 
@@ -655,11 +672,11 @@ static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
 	dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause);
 
 	if (intr_cause & XOR_INTR_ERRORS)
-		mv_xor_err_interrupt_handler(chan, intr_cause);
+		mv_chan_err_interrupt_handler(chan, intr_cause);
 
 	tasklet_schedule(&chan->irq_tasklet);
 
-	mv_xor_device_clear_eoc_cause(chan);
+	mv_chan_clear_eoc_cause(chan);
 
 	return IRQ_HANDLED;
 }
@@ -678,7 +695,7 @@ static void mv_xor_issue_pending(struct dma_chan *chan)
  * Perform a transaction to verify the HW works.
  */
 
-static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan)
+static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 {
 	int i, ret;
 	void *src, *dest;
@@ -787,7 +804,7 @@ out:
 
 #define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
 static int
-mv_xor_xor_self_test(struct mv_xor_chan *mv_chan)
+mv_chan_xor_self_test(struct mv_xor_chan *mv_chan)
 {
 	int i, src_idx, ret;
 	struct page *dest;
@@ -951,7 +968,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
 static struct mv_xor_chan *
 mv_xor_channel_add(struct mv_xor_device *xordev,
 		   struct platform_device *pdev,
-		   int idx, dma_cap_mask_t cap_mask, int irq)
+		   int idx, dma_cap_mask_t cap_mask, int irq, int op_in_desc)
 {
 	int ret = 0;
 	struct mv_xor_chan *mv_chan;
@@ -963,6 +980,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 
 	mv_chan->idx = idx;
 	mv_chan->irq = irq;
+	mv_chan->op_in_desc = op_in_desc;
 
 	dma_dev = &mv_chan->dmadev;
 
@@ -1014,7 +1032,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 		     mv_chan);
 
 	/* clear errors before enabling interrupts */
-	mv_xor_device_clear_err_status(mv_chan);
+	mv_chan_clear_err_status(mv_chan);
 
 	ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler,
 			  0, dev_name(&pdev->dev), mv_chan);
@@ -1023,32 +1041,37 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 
 	mv_chan_unmask_interrupts(mv_chan);
 
-	mv_set_mode(mv_chan, DMA_XOR);
+	if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
+		mv_chan_set_mode_to_desc(mv_chan);
+	else
+		mv_chan_set_mode(mv_chan, DMA_XOR);
 
 	spin_lock_init(&mv_chan->lock);
 	INIT_LIST_HEAD(&mv_chan->chain);
 	INIT_LIST_HEAD(&mv_chan->completed_slots);
-	INIT_LIST_HEAD(&mv_chan->all_slots);
+	INIT_LIST_HEAD(&mv_chan->free_slots);
+	INIT_LIST_HEAD(&mv_chan->allocated_slots);
 	mv_chan->dmachan.device = dma_dev;
 	dma_cookie_init(&mv_chan->dmachan);
 
 	list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels);
 
 	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
-		ret = mv_xor_memcpy_self_test(mv_chan);
+		ret = mv_chan_memcpy_self_test(mv_chan);
 		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
 		if (ret)
 			goto err_free_irq;
 	}
 
 	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
-		ret = mv_xor_xor_self_test(mv_chan);
+		ret = mv_chan_xor_self_test(mv_chan);
 		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
 		if (ret)
 			goto err_free_irq;
 	}
 
-	dev_info(&pdev->dev, "Marvell XOR: ( %s%s%s)\n",
+	dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s)\n",
+		 mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode",
 		 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
 		 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
 		 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
@@ -1097,6 +1120,13 @@ mv_xor_conf_mbus_windows(struct mv_xor_device *xordev,
 	writel(0, base + WINDOW_OVERRIDE_CTRL(1));
 }
 
+static const struct of_device_id mv_xor_dt_ids[] = {
+	{ .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG },
+	{ .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mv_xor_dt_ids);
+
 static int mv_xor_probe(struct platform_device *pdev)
 {
 	const struct mbus_dram_target_info *dram;
@@ -1104,6 +1134,7 @@ static int mv_xor_probe(struct platform_device *pdev)
 	struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *res;
 	int i, ret;
+	int op_in_desc;
 
 	dev_notice(&pdev->dev, "Marvell shared XOR driver\n");
 
@@ -1148,11 +1179,15 @@ static int mv_xor_probe(struct platform_device *pdev)
 	if (pdev->dev.of_node) {
 		struct device_node *np;
 		int i = 0;
+		const struct of_device_id *of_id =
+			of_match_device(mv_xor_dt_ids,
+					&pdev->dev);
 
 		for_each_child_of_node(pdev->dev.of_node, np) {
 			struct mv_xor_chan *chan;
 			dma_cap_mask_t cap_mask;
 			int irq;
+			op_in_desc = (int)of_id->data;
 
 			dma_cap_zero(cap_mask);
 			if (of_property_read_bool(np, "dmacap,memcpy"))
@@ -1169,7 +1204,7 @@ static int mv_xor_probe(struct platform_device *pdev)
 			}
 
 			chan = mv_xor_channel_add(xordev, pdev, i,
-						  cap_mask, irq);
+						  cap_mask, irq, op_in_desc);
 			if (IS_ERR(chan)) {
 				ret = PTR_ERR(chan);
 				irq_dispose_mapping(irq);
@@ -1198,7 +1233,8 @@ static int mv_xor_probe(struct platform_device *pdev)
 			}
 
 			chan = mv_xor_channel_add(xordev, pdev, i,
-						  cd->cap_mask, irq);
+						  cd->cap_mask, irq,
+						  XOR_MODE_IN_REG);
 			if (IS_ERR(chan)) {
 				ret = PTR_ERR(chan);
 				goto err_channel_add;
@@ -1244,14 +1280,6 @@ static int mv_xor_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_OF
-static const struct of_device_id mv_xor_dt_ids[] = {
-       { .compatible = "marvell,orion-xor", },
-       {},
-};
-MODULE_DEVICE_TABLE(of, mv_xor_dt_ids);
-#endif
-
 static struct platform_driver mv_xor_driver = {
 	.probe		= mv_xor_probe,
 	.remove		= mv_xor_remove,
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index 91958dba39a2..b7455b42137b 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -19,7 +19,7 @@
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
 
-#define MV_XOR_POOL_SIZE		PAGE_SIZE
+#define MV_XOR_POOL_SIZE		(MV_XOR_SLOT_SIZE * 3072)
 #define MV_XOR_SLOT_SIZE		64
 #define MV_XOR_THRESHOLD		1
 #define MV_XOR_MAX_CHANNELS             2
@@ -30,7 +30,13 @@
 /* Values for the XOR_CONFIG register */
 #define XOR_OPERATION_MODE_XOR		0
 #define XOR_OPERATION_MODE_MEMCPY	2
+#define XOR_OPERATION_MODE_IN_DESC      7
 #define XOR_DESCRIPTOR_SWAP		BIT(14)
+#define XOR_DESC_SUCCESS		0x40000000
+
+#define XOR_DESC_OPERATION_XOR          (0 << 24)
+#define XOR_DESC_OPERATION_CRC32C       (1 << 24)
+#define XOR_DESC_OPERATION_MEMCPY       (2 << 24)
 
 #define XOR_DESC_DMA_OWNED		BIT(31)
 #define XOR_DESC_EOD_INT_EN		BIT(31)
@@ -88,13 +94,14 @@ struct mv_xor_device {
  * @mmr_base: memory mapped register base
  * @idx: the index of the xor channel
  * @chain: device chain view of the descriptors
+ * @free_slots: free slots usable by the channel
+ * @allocated_slots: slots allocated by the driver
  * @completed_slots: slots completed by HW but still need to be acked
  * @device: parent device
  * @common: common dmaengine channel object members
- * @last_used: place holder for allocation to continue from where it left off
- * @all_slots: complete domain of slots usable by the channel
  * @slots_allocated: records the actual size of the descriptor slot pool
  * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ * @op_in_desc: new mode of driver, each op is writen to descriptor.
  */
 struct mv_xor_chan {
 	int			pending;
@@ -105,16 +112,17 @@ struct mv_xor_chan {
 	int                     irq;
 	enum dma_transaction_type	current_type;
 	struct list_head	chain;
+	struct list_head	free_slots;
+	struct list_head	allocated_slots;
 	struct list_head	completed_slots;
 	dma_addr_t		dma_desc_pool;
 	void			*dma_desc_pool_virt;
 	size_t                  pool_size;
 	struct dma_device	dmadev;
 	struct dma_chan		dmachan;
-	struct mv_xor_desc_slot	*last_used;
-	struct list_head	all_slots;
 	int			slots_allocated;
 	struct tasklet_struct	irq_tasklet;
+	int                     op_in_desc;
 	char			dummy_src[MV_XOR_MIN_BYTE_COUNT];
 	char			dummy_dst[MV_XOR_MIN_BYTE_COUNT];
 	dma_addr_t		dummy_src_addr, dummy_dst_addr;
@@ -122,9 +130,7 @@ struct mv_xor_chan {
 
 /**
  * struct mv_xor_desc_slot - software descriptor
- * @slot_node: node on the mv_xor_chan.all_slots list
- * @chain_node: node on the mv_xor_chan.chain list
- * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @node: node on the mv_xor_chan lists
  * @hw_desc: virtual address of the hardware descriptor chain
  * @phys: hardware address of the hardware descriptor chain
  * @slot_used: slot in use or not
@@ -133,12 +139,9 @@ struct mv_xor_chan {
  * @async_tx: support for the async_tx api
  */
 struct mv_xor_desc_slot {
-	struct list_head	slot_node;
-	struct list_head	chain_node;
-	struct list_head	completed_node;
+	struct list_head	node;
 	enum dma_transaction_type	type;
 	void			*hw_desc;
-	u16			slot_used;
 	u16			idx;
 	struct dma_async_tx_descriptor	async_tx;
 };
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 829ec686dac3..60de35251da5 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -170,7 +170,7 @@ static struct mxs_dma_type mxs_dma_types[] = {
 	}
 };
 
-static struct platform_device_id mxs_dma_ids[] = {
+static const struct platform_device_id mxs_dma_ids[] = {
 	{
 		.name = "imx23-dma-apbh",
 		.driver_data = (kernel_ulong_t) &mxs_dma_types[0],
diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c
index 88b77c98365d..2b5a198ac77e 100644
--- a/drivers/dma/nbpfaxi.c
+++ b/drivers/dma/nbpfaxi.c
@@ -1455,7 +1455,7 @@ static int nbpf_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct platform_device_id nbpf_ids[] = {
+static const struct platform_device_id nbpf_ids[] = {
 	{"nbpfaxi64dmac1b4",	(kernel_ulong_t)&nbpf_cfg[NBPF1B4]},
 	{"nbpfaxi64dmac1b8",	(kernel_ulong_t)&nbpf_cfg[NBPF1B8]},
 	{"nbpfaxi64dmac1b16",	(kernel_ulong_t)&nbpf_cfg[NBPF1B16]},
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index cbd4a8aff120..1e1f2986eba8 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -45,6 +45,50 @@ static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec)
 }
 
 /**
+ * of_dma_router_xlate - translation function for router devices
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data (router information)
+ *
+ * The function creates new dma_spec to be passed to the router driver's
+ * of_dma_route_allocate() function to prepare a dma_spec which will be used
+ * to request channel from the real DMA controller.
+ */
+static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec,
+					    struct of_dma *ofdma)
+{
+	struct dma_chan		*chan;
+	struct of_dma		*ofdma_target;
+	struct of_phandle_args	dma_spec_target;
+	void			*route_data;
+
+	/* translate the request for the real DMA controller */
+	memcpy(&dma_spec_target, dma_spec, sizeof(dma_spec_target));
+	route_data = ofdma->of_dma_route_allocate(&dma_spec_target, ofdma);
+	if (IS_ERR(route_data))
+		return NULL;
+
+	ofdma_target = of_dma_find_controller(&dma_spec_target);
+	if (!ofdma_target)
+		return NULL;
+
+	chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target);
+	if (chan) {
+		chan->router = ofdma->dma_router;
+		chan->route_data = route_data;
+	} else {
+		ofdma->dma_router->route_free(ofdma->dma_router->dev,
+					      route_data);
+	}
+
+	/*
+	 * Need to put the node back since the ofdma->of_dma_route_allocate
+	 * has taken it for generating the new, translated dma_spec
+	 */
+	of_node_put(dma_spec_target.np);
+	return chan;
+}
+
+/**
  * of_dma_controller_register - Register a DMA controller to DT DMA helpers
  * @np:			device node of DMA controller
  * @of_dma_xlate:	translation function which converts a phandle
@@ -110,6 +154,51 @@ void of_dma_controller_free(struct device_node *np)
 EXPORT_SYMBOL_GPL(of_dma_controller_free);
 
 /**
+ * of_dma_router_register - Register a DMA router to DT DMA helpers as a
+ *			    controller
+ * @np:				device node of DMA router
+ * @of_dma_route_allocate:	setup function for the router which need to
+ *				modify the dma_spec for the DMA controller to
+ *				use and to set up the requested route.
+ * @dma_router:			pointer to dma_router structure to be used when
+ *				the route need to be free up.
+ *
+ * Returns 0 on success or appropriate errno value on error.
+ *
+ * Allocated memory should be freed with appropriate of_dma_controller_free()
+ * call.
+ */
+int of_dma_router_register(struct device_node *np,
+			   void *(*of_dma_route_allocate)
+			   (struct of_phandle_args *, struct of_dma *),
+			   struct dma_router *dma_router)
+{
+	struct of_dma	*ofdma;
+
+	if (!np || !of_dma_route_allocate || !dma_router) {
+		pr_err("%s: not enough information provided\n", __func__);
+		return -EINVAL;
+	}
+
+	ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL);
+	if (!ofdma)
+		return -ENOMEM;
+
+	ofdma->of_node = np;
+	ofdma->of_dma_xlate = of_dma_router_xlate;
+	ofdma->of_dma_route_allocate = of_dma_route_allocate;
+	ofdma->dma_router = dma_router;
+
+	/* Now queue of_dma controller structure in list */
+	mutex_lock(&of_dma_lock);
+	list_add_tail(&ofdma->of_dma_controllers, &of_dma_list);
+	mutex_unlock(&of_dma_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_dma_router_register);
+
+/**
  * of_dma_match_channel - Check if a DMA specifier matches name
  * @np:		device node to look for DMA channels
  * @name:	channel name to be matched
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index 167dbaf65742..249445c8a4c6 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -22,6 +22,9 @@
 
 #include "virt-dma.h"
 
+#define OMAP_SDMA_REQUESTS	127
+#define OMAP_SDMA_CHANNELS	32
+
 struct omap_dmadev {
 	struct dma_device ddev;
 	spinlock_t lock;
@@ -31,9 +34,10 @@ struct omap_dmadev {
 	const struct omap_dma_reg *reg_map;
 	struct omap_system_dma_plat_info *plat;
 	bool legacy;
+	unsigned dma_requests;
 	spinlock_t irq_lock;
 	uint32_t irq_enable_mask;
-	struct omap_chan *lch_map[32];
+	struct omap_chan *lch_map[OMAP_SDMA_CHANNELS];
 };
 
 struct omap_chan {
@@ -362,7 +366,7 @@ static void omap_dma_start_sg(struct omap_chan *c, struct omap_desc *d,
 	struct omap_sg *sg = d->sg + idx;
 	unsigned cxsa, cxei, cxfi;
 
-	if (d->dir == DMA_DEV_TO_MEM) {
+	if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) {
 		cxsa = CDSA;
 		cxei = CDEI;
 		cxfi = CDFI;
@@ -408,7 +412,7 @@ static void omap_dma_start_desc(struct omap_chan *c)
 	if (dma_omap1())
 		omap_dma_chan_write(c, CCR2, d->ccr >> 16);
 
-	if (d->dir == DMA_DEV_TO_MEM) {
+	if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) {
 		cxsa = CSSA;
 		cxei = CSEI;
 		cxfi = CSFI;
@@ -589,6 +593,7 @@ static void omap_dma_free_chan_resources(struct dma_chan *chan)
 	omap_free_dma(c->dma_ch);
 
 	dev_dbg(od->ddev.dev, "freeing channel for %u\n", c->dma_sig);
+	c->dma_sig = 0;
 }
 
 static size_t omap_dma_sg_size(struct omap_sg *sg)
@@ -948,6 +953,51 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic(
 	return vchan_tx_prep(&c->vc, &d->vd, flags);
 }
 
+static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+	size_t len, unsigned long tx_flags)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	struct omap_desc *d;
+	uint8_t data_type;
+
+	d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
+	if (!d)
+		return NULL;
+
+	data_type = __ffs((src | dest | len));
+	if (data_type > CSDP_DATA_TYPE_32)
+		data_type = CSDP_DATA_TYPE_32;
+
+	d->dir = DMA_MEM_TO_MEM;
+	d->dev_addr = src;
+	d->fi = 0;
+	d->es = data_type;
+	d->sg[0].en = len / BIT(data_type);
+	d->sg[0].fn = 1;
+	d->sg[0].addr = dest;
+	d->sglen = 1;
+	d->ccr = c->ccr;
+	d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC;
+
+	d->cicr = CICR_DROP_IE;
+	if (tx_flags & DMA_PREP_INTERRUPT)
+		d->cicr |= CICR_FRAME_IE;
+
+	d->csdp = data_type;
+
+	if (dma_omap1()) {
+		d->cicr |= CICR_TOUT_IE;
+		d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF;
+	} else {
+		d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED;
+		d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+		d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64;
+	}
+
+	return vchan_tx_prep(&c->vc, &d->vd, tx_flags);
+}
+
 static int omap_dma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg)
 {
 	struct omap_chan *c = to_omap_dma_chan(chan);
@@ -1037,7 +1087,7 @@ static int omap_dma_resume(struct dma_chan *chan)
 	return 0;
 }
 
-static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig)
+static int omap_dma_chan_init(struct omap_dmadev *od)
 {
 	struct omap_chan *c;
 
@@ -1046,7 +1096,6 @@ static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig)
 		return -ENOMEM;
 
 	c->reg_map = od->reg_map;
-	c->dma_sig = dma_sig;
 	c->vc.desc_free = omap_dma_desc_free;
 	vchan_init(&c->vc, &od->ddev);
 	INIT_LIST_HEAD(&c->node);
@@ -1094,12 +1143,14 @@ static int omap_dma_probe(struct platform_device *pdev)
 
 	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
 	dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
+	dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask);
 	od->ddev.device_alloc_chan_resources = omap_dma_alloc_chan_resources;
 	od->ddev.device_free_chan_resources = omap_dma_free_chan_resources;
 	od->ddev.device_tx_status = omap_dma_tx_status;
 	od->ddev.device_issue_pending = omap_dma_issue_pending;
 	od->ddev.device_prep_slave_sg = omap_dma_prep_slave_sg;
 	od->ddev.device_prep_dma_cyclic = omap_dma_prep_dma_cyclic;
+	od->ddev.device_prep_dma_memcpy = omap_dma_prep_dma_memcpy;
 	od->ddev.device_config = omap_dma_slave_config;
 	od->ddev.device_pause = omap_dma_pause;
 	od->ddev.device_resume = omap_dma_resume;
@@ -1116,8 +1167,17 @@ static int omap_dma_probe(struct platform_device *pdev)
 
 	tasklet_init(&od->task, omap_dma_sched, (unsigned long)od);
 
-	for (i = 0; i < 127; i++) {
-		rc = omap_dma_chan_init(od, i);
+	od->dma_requests = OMAP_SDMA_REQUESTS;
+	if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+						      "dma-requests",
+						      &od->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing dma-requests property, using %u.\n",
+			 OMAP_SDMA_REQUESTS);
+	}
+
+	for (i = 0; i < OMAP_SDMA_CHANNELS; i++) {
+		rc = omap_dma_chan_init(od);
 		if (rc) {
 			omap_dma_free(od);
 			return rc;
@@ -1208,10 +1268,14 @@ static struct platform_driver omap_dma_driver = {
 bool omap_dma_filter_fn(struct dma_chan *chan, void *param)
 {
 	if (chan->device->dev->driver == &omap_dma_driver.driver) {
+		struct omap_dmadev *od = to_omap_dma_dev(chan->device);
 		struct omap_chan *c = to_omap_dma_chan(chan);
 		unsigned req = *(unsigned *)param;
 
-		return req == c->dma_sig;
+		if (req <= od->dma_requests) {
+			c->dma_sig = req;
+			return true;
+		}
 	}
 	return false;
 }
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 340f9e607cd8..f513f77b1d85 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1424,8 +1424,8 @@ static int pl330_submit_req(struct pl330_thread *thrd,
 		goto xfer_exit;
 
 	if (ret > pl330->mcbufsz / 2) {
-		dev_info(pl330->ddma.dev, "%s:%d Trying increasing mcbufsz\n",
-				__func__, __LINE__);
+		dev_info(pl330->ddma.dev, "%s:%d Try increasing mcbufsz (%i/%i)\n",
+				__func__, __LINE__, ret, pl330->mcbufsz / 2);
 		ret = -ENOMEM;
 		goto xfer_exit;
 	}
@@ -2584,12 +2584,14 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 {
 	struct dma_pl330_desc *desc;
 	struct dma_pl330_chan *pch = to_pchan(chan);
-	struct pl330_dmac *pl330 = pch->dmac;
+	struct pl330_dmac *pl330;
 	int burst;
 
 	if (unlikely(!pch || !len))
 		return NULL;
 
+	pl330 = pch->dmac;
+
 	desc = __pl330_prep_dma_memcpy(pch, dst, src, len);
 	if (!desc)
 		return NULL;
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
new file mode 100644
index 000000000000..ddcbbf5cd9e9
--- /dev/null
+++ b/drivers/dma/pxa_dma.c
@@ -0,0 +1,1467 @@
+/*
+ * Copyright 2015 Robert Jarzmik <robert.jarzmik@free.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/platform_data/mmp_dma.h>
+#include <linux/dmapool.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/of.h>
+#include <linux/dma/pxa-dma.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define DCSR(n)		(0x0000 + ((n) << 2))
+#define DALGN(n)	0x00a0
+#define DINT		0x00f0
+#define DDADR(n)	(0x0200 + ((n) << 4))
+#define DSADR(n)	(0x0204 + ((n) << 4))
+#define DTADR(n)	(0x0208 + ((n) << 4))
+#define DCMD(n)		(0x020c + ((n) << 4))
+
+#define PXA_DCSR_RUN		BIT(31)	/* Run Bit (read / write) */
+#define PXA_DCSR_NODESC		BIT(30)	/* No-Descriptor Fetch (read / write) */
+#define PXA_DCSR_STOPIRQEN	BIT(29)	/* Stop Interrupt Enable (R/W) */
+#define PXA_DCSR_REQPEND	BIT(8)	/* Request Pending (read-only) */
+#define PXA_DCSR_STOPSTATE	BIT(3)	/* Stop State (read-only) */
+#define PXA_DCSR_ENDINTR	BIT(2)	/* End Interrupt (read / write) */
+#define PXA_DCSR_STARTINTR	BIT(1)	/* Start Interrupt (read / write) */
+#define PXA_DCSR_BUSERR		BIT(0)	/* Bus Error Interrupt (read / write) */
+
+#define PXA_DCSR_EORIRQEN	BIT(28)	/* End of Receive IRQ Enable (R/W) */
+#define PXA_DCSR_EORJMPEN	BIT(27)	/* Jump to next descriptor on EOR */
+#define PXA_DCSR_EORSTOPEN	BIT(26)	/* STOP on an EOR */
+#define PXA_DCSR_SETCMPST	BIT(25)	/* Set Descriptor Compare Status */
+#define PXA_DCSR_CLRCMPST	BIT(24)	/* Clear Descriptor Compare Status */
+#define PXA_DCSR_CMPST		BIT(10)	/* The Descriptor Compare Status */
+#define PXA_DCSR_EORINTR	BIT(9)	/* The end of Receive */
+
+#define DRCMR_MAPVLD	BIT(7)	/* Map Valid (read / write) */
+#define DRCMR_CHLNUM	0x1f	/* mask for Channel Number (read / write) */
+
+#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor (mask) */
+#define DDADR_STOP	BIT(0)	/* Stop (read / write) */
+
+#define PXA_DCMD_INCSRCADDR	BIT(31)	/* Source Address Increment Setting. */
+#define PXA_DCMD_INCTRGADDR	BIT(30)	/* Target Address Increment Setting. */
+#define PXA_DCMD_FLOWSRC	BIT(29)	/* Flow Control by the source. */
+#define PXA_DCMD_FLOWTRG	BIT(28)	/* Flow Control by the target. */
+#define PXA_DCMD_STARTIRQEN	BIT(22)	/* Start Interrupt Enable */
+#define PXA_DCMD_ENDIRQEN	BIT(21)	/* End Interrupt Enable */
+#define PXA_DCMD_ENDIAN		BIT(18)	/* Device Endian-ness. */
+#define PXA_DCMD_BURST8		(1 << 16)	/* 8 byte burst */
+#define PXA_DCMD_BURST16	(2 << 16)	/* 16 byte burst */
+#define PXA_DCMD_BURST32	(3 << 16)	/* 32 byte burst */
+#define PXA_DCMD_WIDTH1		(1 << 14)	/* 1 byte width */
+#define PXA_DCMD_WIDTH2		(2 << 14)	/* 2 byte width (HalfWord) */
+#define PXA_DCMD_WIDTH4		(3 << 14)	/* 4 byte width (Word) */
+#define PXA_DCMD_LENGTH		0x01fff		/* length mask (max = 8K - 1) */
+
+#define PDMA_ALIGNMENT		3
+#define PDMA_MAX_DESC_BYTES	(PXA_DCMD_LENGTH & ~((1 << PDMA_ALIGNMENT) - 1))
+
+struct pxad_desc_hw {
+	u32 ddadr;	/* Points to the next descriptor + flags */
+	u32 dsadr;	/* DSADR value for the current transfer */
+	u32 dtadr;	/* DTADR value for the current transfer */
+	u32 dcmd;	/* DCMD value for the current transfer */
+} __aligned(16);
+
+struct pxad_desc_sw {
+	struct virt_dma_desc	vd;		/* Virtual descriptor */
+	int			nb_desc;	/* Number of hw. descriptors */
+	size_t			len;		/* Number of bytes xfered */
+	dma_addr_t		first;		/* First descriptor's addr */
+
+	/* At least one descriptor has an src/dst address not multiple of 8 */
+	bool			misaligned;
+	bool			cyclic;
+	struct dma_pool		*desc_pool;	/* Channel's used allocator */
+
+	struct pxad_desc_hw	*hw_desc[];	/* DMA coherent descriptors */
+};
+
+struct pxad_phy {
+	int			idx;
+	void __iomem		*base;
+	struct pxad_chan	*vchan;
+};
+
+struct pxad_chan {
+	struct virt_dma_chan	vc;		/* Virtual channel */
+	u32			drcmr;		/* Requestor of the channel */
+	enum pxad_chan_prio	prio;		/* Required priority of phy */
+	/*
+	 * At least one desc_sw in submitted or issued transfers on this channel
+	 * has one address such as: addr % 8 != 0. This implies the DALGN
+	 * setting on the phy.
+	 */
+	bool			misaligned;
+	struct dma_slave_config	cfg;		/* Runtime config */
+
+	/* protected by vc->lock */
+	struct pxad_phy		*phy;
+	struct dma_pool		*desc_pool;	/* Descriptors pool */
+};
+
+struct pxad_device {
+	struct dma_device		slave;
+	int				nr_chans;
+	void __iomem			*base;
+	struct pxad_phy			*phys;
+	spinlock_t			phy_lock;	/* Phy association */
+#ifdef CONFIG_DEBUG_FS
+	struct dentry			*dbgfs_root;
+	struct dentry			*dbgfs_state;
+	struct dentry			**dbgfs_chan;
+#endif
+};
+
+#define tx_to_pxad_desc(tx)					\
+	container_of(tx, struct pxad_desc_sw, async_tx)
+#define to_pxad_chan(dchan)					\
+	container_of(dchan, struct pxad_chan, vc.chan)
+#define to_pxad_dev(dmadev)					\
+	container_of(dmadev, struct pxad_device, slave)
+#define to_pxad_sw_desc(_vd)				\
+	container_of((_vd), struct pxad_desc_sw, vd)
+
+#define _phy_readl_relaxed(phy, _reg)					\
+	readl_relaxed((phy)->base + _reg((phy)->idx))
+#define phy_readl_relaxed(phy, _reg)					\
+	({								\
+		u32 _v;							\
+		_v = readl_relaxed((phy)->base + _reg((phy)->idx));	\
+		dev_vdbg(&phy->vchan->vc.chan.dev->device,		\
+			 "%s(): readl(%s): 0x%08x\n", __func__, #_reg,	\
+			  _v);						\
+		_v;							\
+	})
+#define phy_writel(phy, val, _reg)					\
+	do {								\
+		writel((val), (phy)->base + _reg((phy)->idx));		\
+		dev_vdbg(&phy->vchan->vc.chan.dev->device,		\
+			 "%s(): writel(0x%08x, %s)\n",			\
+			 __func__, (u32)(val), #_reg);			\
+	} while (0)
+#define phy_writel_relaxed(phy, val, _reg)				\
+	do {								\
+		writel_relaxed((val), (phy)->base + _reg((phy)->idx));	\
+		dev_vdbg(&phy->vchan->vc.chan.dev->device,		\
+			 "%s(): writel_relaxed(0x%08x, %s)\n",		\
+			 __func__, (u32)(val), #_reg);			\
+	} while (0)
+
+static unsigned int pxad_drcmr(unsigned int line)
+{
+	if (line < 64)
+		return 0x100 + line * 4;
+	return 0x1000 + line * 4;
+}
+
+/*
+ * Debug fs
+ */
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/seq_file.h>
+
+static int dbg_show_requester_chan(struct seq_file *s, void *p)
+{
+	int pos = 0;
+	struct pxad_phy *phy = s->private;
+	int i;
+	u32 drcmr;
+
+	pos += seq_printf(s, "DMA channel %d requester :\n", phy->idx);
+	for (i = 0; i < 70; i++) {
+		drcmr = readl_relaxed(phy->base + pxad_drcmr(i));
+		if ((drcmr & DRCMR_CHLNUM) == phy->idx)
+			pos += seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i,
+					  !!(drcmr & DRCMR_MAPVLD));
+	}
+	return pos;
+}
+
+static inline int dbg_burst_from_dcmd(u32 dcmd)
+{
+	int burst = (dcmd >> 16) & 0x3;
+
+	return burst ? 4 << burst : 0;
+}
+
+static int is_phys_valid(unsigned long addr)
+{
+	return pfn_valid(__phys_to_pfn(addr));
+}
+
+#define PXA_DCSR_STR(flag) (dcsr & PXA_DCSR_##flag ? #flag" " : "")
+#define PXA_DCMD_STR(flag) (dcmd & PXA_DCMD_##flag ? #flag" " : "")
+
+static int dbg_show_descriptors(struct seq_file *s, void *p)
+{
+	struct pxad_phy *phy = s->private;
+	int i, max_show = 20, burst, width;
+	u32 dcmd;
+	unsigned long phys_desc, ddadr;
+	struct pxad_desc_hw *desc;
+
+	phys_desc = ddadr = _phy_readl_relaxed(phy, DDADR);
+
+	seq_printf(s, "DMA channel %d descriptors :\n", phy->idx);
+	seq_printf(s, "[%03d] First descriptor unknown\n", 0);
+	for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) {
+		desc = phys_to_virt(phys_desc);
+		dcmd = desc->dcmd;
+		burst = dbg_burst_from_dcmd(dcmd);
+		width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
+
+		seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n",
+			   i, phys_desc, desc);
+		seq_printf(s, "\tDDADR = %08x\n", desc->ddadr);
+		seq_printf(s, "\tDSADR = %08x\n", desc->dsadr);
+		seq_printf(s, "\tDTADR = %08x\n", desc->dtadr);
+		seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
+			   dcmd,
+			   PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR),
+			   PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG),
+			   PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN),
+			   PXA_DCMD_STR(ENDIAN), burst, width,
+			   dcmd & PXA_DCMD_LENGTH);
+		phys_desc = desc->ddadr;
+	}
+	if (i == max_show)
+		seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n",
+			   i, phys_desc);
+	else
+		seq_printf(s, "[%03d] Desc at %08lx is %s\n",
+			   i, phys_desc, phys_desc == DDADR_STOP ?
+			   "DDADR_STOP" : "invalid");
+
+	return 0;
+}
+
+static int dbg_show_chan_state(struct seq_file *s, void *p)
+{
+	struct pxad_phy *phy = s->private;
+	u32 dcsr, dcmd;
+	int burst, width;
+	static const char * const str_prio[] = {
+		"high", "normal", "low", "invalid"
+	};
+
+	dcsr = _phy_readl_relaxed(phy, DCSR);
+	dcmd = _phy_readl_relaxed(phy, DCMD);
+	burst = dbg_burst_from_dcmd(dcmd);
+	width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
+
+	seq_printf(s, "DMA channel %d\n", phy->idx);
+	seq_printf(s, "\tPriority : %s\n",
+			  str_prio[(phy->idx & 0xf) / 4]);
+	seq_printf(s, "\tUnaligned transfer bit: %s\n",
+			  _phy_readl_relaxed(phy, DALGN) & BIT(phy->idx) ?
+			  "yes" : "no");
+	seq_printf(s, "\tDCSR  = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
+		   dcsr, PXA_DCSR_STR(RUN), PXA_DCSR_STR(NODESC),
+		   PXA_DCSR_STR(STOPIRQEN), PXA_DCSR_STR(EORIRQEN),
+		   PXA_DCSR_STR(EORJMPEN), PXA_DCSR_STR(EORSTOPEN),
+		   PXA_DCSR_STR(SETCMPST), PXA_DCSR_STR(CLRCMPST),
+		   PXA_DCSR_STR(CMPST), PXA_DCSR_STR(EORINTR),
+		   PXA_DCSR_STR(REQPEND), PXA_DCSR_STR(STOPSTATE),
+		   PXA_DCSR_STR(ENDINTR), PXA_DCSR_STR(STARTINTR),
+		   PXA_DCSR_STR(BUSERR));
+
+	seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
+		   dcmd,
+		   PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR),
+		   PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG),
+		   PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN),
+		   PXA_DCMD_STR(ENDIAN), burst, width, dcmd & PXA_DCMD_LENGTH);
+	seq_printf(s, "\tDSADR = %08x\n", _phy_readl_relaxed(phy, DSADR));
+	seq_printf(s, "\tDTADR = %08x\n", _phy_readl_relaxed(phy, DTADR));
+	seq_printf(s, "\tDDADR = %08x\n", _phy_readl_relaxed(phy, DDADR));
+
+	return 0;
+}
+
+static int dbg_show_state(struct seq_file *s, void *p)
+{
+	struct pxad_device *pdev = s->private;
+
+	/* basic device status */
+	seq_puts(s, "DMA engine status\n");
+	seq_printf(s, "\tChannel number: %d\n", pdev->nr_chans);
+
+	return 0;
+}
+
+#define DBGFS_FUNC_DECL(name) \
+static int dbg_open_##name(struct inode *inode, struct file *file) \
+{ \
+	return single_open(file, dbg_show_##name, inode->i_private); \
+} \
+static const struct file_operations dbg_fops_##name = { \
+	.owner		= THIS_MODULE, \
+	.open		= dbg_open_##name, \
+	.llseek		= seq_lseek, \
+	.read		= seq_read, \
+	.release	= single_release, \
+}
+
+DBGFS_FUNC_DECL(state);
+DBGFS_FUNC_DECL(chan_state);
+DBGFS_FUNC_DECL(descriptors);
+DBGFS_FUNC_DECL(requester_chan);
+
+static struct dentry *pxad_dbg_alloc_chan(struct pxad_device *pdev,
+					     int ch, struct dentry *chandir)
+{
+	char chan_name[11];
+	struct dentry *chan, *chan_state = NULL, *chan_descr = NULL;
+	struct dentry *chan_reqs = NULL;
+	void *dt;
+
+	scnprintf(chan_name, sizeof(chan_name), "%d", ch);
+	chan = debugfs_create_dir(chan_name, chandir);
+	dt = (void *)&pdev->phys[ch];
+
+	if (chan)
+		chan_state = debugfs_create_file("state", 0400, chan, dt,
+						 &dbg_fops_chan_state);
+	if (chan_state)
+		chan_descr = debugfs_create_file("descriptors", 0400, chan, dt,
+						 &dbg_fops_descriptors);
+	if (chan_descr)
+		chan_reqs = debugfs_create_file("requesters", 0400, chan, dt,
+						&dbg_fops_requester_chan);
+	if (!chan_reqs)
+		goto err_state;
+
+	return chan;
+
+err_state:
+	debugfs_remove_recursive(chan);
+	return NULL;
+}
+
+static void pxad_init_debugfs(struct pxad_device *pdev)
+{
+	int i;
+	struct dentry *chandir;
+
+	pdev->dbgfs_root = debugfs_create_dir(dev_name(pdev->slave.dev), NULL);
+	if (IS_ERR(pdev->dbgfs_root) || !pdev->dbgfs_root)
+		goto err_root;
+
+	pdev->dbgfs_state = debugfs_create_file("state", 0400, pdev->dbgfs_root,
+						pdev, &dbg_fops_state);
+	if (!pdev->dbgfs_state)
+		goto err_state;
+
+	pdev->dbgfs_chan =
+		kmalloc_array(pdev->nr_chans, sizeof(*pdev->dbgfs_state),
+			      GFP_KERNEL);
+	if (!pdev->dbgfs_chan)
+		goto err_alloc;
+
+	chandir = debugfs_create_dir("channels", pdev->dbgfs_root);
+	if (!chandir)
+		goto err_chandir;
+
+	for (i = 0; i < pdev->nr_chans; i++) {
+		pdev->dbgfs_chan[i] = pxad_dbg_alloc_chan(pdev, i, chandir);
+		if (!pdev->dbgfs_chan[i])
+			goto err_chans;
+	}
+
+	return;
+err_chans:
+err_chandir:
+	kfree(pdev->dbgfs_chan);
+err_alloc:
+err_state:
+	debugfs_remove_recursive(pdev->dbgfs_root);
+err_root:
+	pr_err("pxad: debugfs is not available\n");
+}
+
+static void pxad_cleanup_debugfs(struct pxad_device *pdev)
+{
+	debugfs_remove_recursive(pdev->dbgfs_root);
+}
+#else
+static inline void pxad_init_debugfs(struct pxad_device *pdev) {}
+static inline void pxad_cleanup_debugfs(struct pxad_device *pdev) {}
+#endif
+
+/*
+ * In the transition phase where legacy pxa handling is done at the same time as
+ * mmp_dma, the DMA physical channel split between the 2 DMA providers is done
+ * through legacy_reserved. Legacy code reserves DMA channels by settings
+ * corresponding bits in legacy_reserved.
+ */
+static u32 legacy_reserved;
+static u32 legacy_unavailable;
+
+static struct pxad_phy *lookup_phy(struct pxad_chan *pchan)
+{
+	int prio, i;
+	struct pxad_device *pdev = to_pxad_dev(pchan->vc.chan.device);
+	struct pxad_phy *phy, *found = NULL;
+	unsigned long flags;
+
+	/*
+	 * dma channel priorities
+	 * ch 0 - 3,  16 - 19  <--> (0)
+	 * ch 4 - 7,  20 - 23  <--> (1)
+	 * ch 8 - 11, 24 - 27  <--> (2)
+	 * ch 12 - 15, 28 - 31  <--> (3)
+	 */
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) {
+		for (i = 0; i < pdev->nr_chans; i++) {
+			if (prio != (i & 0xf) >> 2)
+				continue;
+			if ((i < 32) && (legacy_reserved & BIT(i)))
+				continue;
+			phy = &pdev->phys[i];
+			if (!phy->vchan) {
+				phy->vchan = pchan;
+				found = phy;
+				if (i < 32)
+					legacy_unavailable |= BIT(i);
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+	dev_dbg(&pchan->vc.chan.dev->device,
+		"%s(): phy=%p(%d)\n", __func__, found,
+		found ? found->idx : -1);
+
+	return found;
+}
+
+static void pxad_free_phy(struct pxad_chan *chan)
+{
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+	unsigned long flags;
+	u32 reg;
+	int i;
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): freeing\n", __func__);
+	if (!chan->phy)
+		return;
+
+	/* clear the channel mapping in DRCMR */
+	reg = pxad_drcmr(chan->drcmr);
+	writel_relaxed(0, chan->phy->base + reg);
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	for (i = 0; i < 32; i++)
+		if (chan->phy == &pdev->phys[i])
+			legacy_unavailable &= ~BIT(i);
+	chan->phy->vchan = NULL;
+	chan->phy = NULL;
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+}
+
+static bool is_chan_running(struct pxad_chan *chan)
+{
+	u32 dcsr;
+	struct pxad_phy *phy = chan->phy;
+
+	if (!phy)
+		return false;
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	return dcsr & PXA_DCSR_RUN;
+}
+
+static bool is_running_chan_misaligned(struct pxad_chan *chan)
+{
+	u32 dalgn;
+
+	BUG_ON(!chan->phy);
+	dalgn = phy_readl_relaxed(chan->phy, DALGN);
+	return dalgn & (BIT(chan->phy->idx));
+}
+
+static void phy_enable(struct pxad_phy *phy, bool misaligned)
+{
+	u32 reg, dalgn;
+
+	if (!phy->vchan)
+		return;
+
+	dev_dbg(&phy->vchan->vc.chan.dev->device,
+		"%s(); phy=%p(%d) misaligned=%d\n", __func__,
+		phy, phy->idx, misaligned);
+
+	reg = pxad_drcmr(phy->vchan->drcmr);
+	writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg);
+
+	dalgn = phy_readl_relaxed(phy, DALGN);
+	if (misaligned)
+		dalgn |= BIT(phy->idx);
+	else
+		dalgn &= ~BIT(phy->idx);
+	phy_writel_relaxed(phy, dalgn, DALGN);
+
+	phy_writel(phy, PXA_DCSR_STOPIRQEN | PXA_DCSR_ENDINTR |
+		   PXA_DCSR_BUSERR | PXA_DCSR_RUN, DCSR);
+}
+
+static void phy_disable(struct pxad_phy *phy)
+{
+	u32 dcsr;
+
+	if (!phy)
+		return;
+
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	dev_dbg(&phy->vchan->vc.chan.dev->device,
+		"%s(): phy=%p(%d)\n", __func__, phy, phy->idx);
+	phy_writel(phy, dcsr & ~PXA_DCSR_RUN & ~PXA_DCSR_STOPIRQEN, DCSR);
+}
+
+static void pxad_launch_chan(struct pxad_chan *chan,
+				 struct pxad_desc_sw *desc)
+{
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): desc=%p\n", __func__, desc);
+	if (!chan->phy) {
+		chan->phy = lookup_phy(chan);
+		if (!chan->phy) {
+			dev_dbg(&chan->vc.chan.dev->device,
+				"%s(): no free dma channel\n", __func__);
+			return;
+		}
+	}
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	phy_writel(chan->phy, desc->first, DDADR);
+	phy_enable(chan->phy, chan->misaligned);
+}
+
+static void set_updater_desc(struct pxad_desc_sw *sw_desc,
+			     unsigned long flags)
+{
+	struct pxad_desc_hw *updater =
+		sw_desc->hw_desc[sw_desc->nb_desc - 1];
+	dma_addr_t dma = sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr;
+
+	updater->ddadr = DDADR_STOP;
+	updater->dsadr = dma;
+	updater->dtadr = dma + 8;
+	updater->dcmd = PXA_DCMD_WIDTH4 | PXA_DCMD_BURST32 |
+		(PXA_DCMD_LENGTH & sizeof(u32));
+	if (flags & DMA_PREP_INTERRUPT)
+		updater->dcmd |= PXA_DCMD_ENDIRQEN;
+}
+
+static bool is_desc_completed(struct virt_dma_desc *vd)
+{
+	struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd);
+	struct pxad_desc_hw *updater =
+		sw_desc->hw_desc[sw_desc->nb_desc - 1];
+
+	return updater->dtadr != (updater->dsadr + 8);
+}
+
+static void pxad_desc_chain(struct virt_dma_desc *vd1,
+				struct virt_dma_desc *vd2)
+{
+	struct pxad_desc_sw *desc1 = to_pxad_sw_desc(vd1);
+	struct pxad_desc_sw *desc2 = to_pxad_sw_desc(vd2);
+	dma_addr_t dma_to_chain;
+
+	dma_to_chain = desc2->first;
+	desc1->hw_desc[desc1->nb_desc - 1]->ddadr = dma_to_chain;
+}
+
+static bool pxad_try_hotchain(struct virt_dma_chan *vc,
+				  struct virt_dma_desc *vd)
+{
+	struct virt_dma_desc *vd_last_issued = NULL;
+	struct pxad_chan *chan = to_pxad_chan(&vc->chan);
+
+	/*
+	 * Attempt to hot chain the tx if the phy is still running. This is
+	 * considered successful only if either the channel is still running
+	 * after the chaining, or if the chained transfer is completed after
+	 * having been hot chained.
+	 * A change of alignment is not allowed, and forbids hotchaining.
+	 */
+	if (is_chan_running(chan)) {
+		BUG_ON(list_empty(&vc->desc_issued));
+
+		if (!is_running_chan_misaligned(chan) &&
+		    to_pxad_sw_desc(vd)->misaligned)
+			return false;
+
+		vd_last_issued = list_entry(vc->desc_issued.prev,
+					    struct virt_dma_desc, node);
+		pxad_desc_chain(vd_last_issued, vd);
+		if (is_chan_running(chan) || is_desc_completed(vd_last_issued))
+			return true;
+	}
+
+	return false;
+}
+
+static unsigned int clear_chan_irq(struct pxad_phy *phy)
+{
+	u32 dcsr;
+	u32 dint = readl(phy->base + DINT);
+
+	if (!(dint & BIT(phy->idx)))
+		return PXA_DCSR_RUN;
+
+	/* clear irq */
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	phy_writel(phy, dcsr, DCSR);
+	if ((dcsr & PXA_DCSR_BUSERR) && (phy->vchan))
+		dev_warn(&phy->vchan->vc.chan.dev->device,
+			 "%s(chan=%p): PXA_DCSR_BUSERR\n",
+			 __func__, &phy->vchan);
+
+	return dcsr & ~PXA_DCSR_RUN;
+}
+
+static irqreturn_t pxad_chan_handler(int irq, void *dev_id)
+{
+	struct pxad_phy *phy = dev_id;
+	struct pxad_chan *chan = phy->vchan;
+	struct virt_dma_desc *vd, *tmp;
+	unsigned int dcsr;
+	unsigned long flags;
+
+	BUG_ON(!chan);
+
+	dcsr = clear_chan_irq(phy);
+	if (dcsr & PXA_DCSR_RUN)
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) {
+		dev_dbg(&chan->vc.chan.dev->device,
+			"%s(): checking txd %p[%x]: completed=%d\n",
+			__func__, vd, vd->tx.cookie, is_desc_completed(vd));
+		if (is_desc_completed(vd)) {
+			list_del(&vd->node);
+			vchan_cookie_complete(vd);
+		} else {
+			break;
+		}
+	}
+
+	if (dcsr & PXA_DCSR_STOPSTATE) {
+		dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): channel stopped, submitted_empty=%d issued_empty=%d",
+			__func__,
+			list_empty(&chan->vc.desc_submitted),
+			list_empty(&chan->vc.desc_issued));
+		phy_writel_relaxed(phy, dcsr & ~PXA_DCSR_STOPIRQEN, DCSR);
+
+		if (list_empty(&chan->vc.desc_issued)) {
+			chan->misaligned =
+				!list_empty(&chan->vc.desc_submitted);
+		} else {
+			vd = list_first_entry(&chan->vc.desc_issued,
+					      struct virt_dma_desc, node);
+			pxad_launch_chan(chan, to_pxad_sw_desc(vd));
+		}
+	}
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t pxad_int_handler(int irq, void *dev_id)
+{
+	struct pxad_device *pdev = dev_id;
+	struct pxad_phy *phy;
+	u32 dint = readl(pdev->base + DINT);
+	int i, ret = IRQ_NONE;
+
+	while (dint) {
+		i = __ffs(dint);
+		dint &= (dint - 1);
+		phy = &pdev->phys[i];
+		if ((i < 32) && (legacy_reserved & BIT(i)))
+			continue;
+		if (pxad_chan_handler(irq, phy) == IRQ_HANDLED)
+			ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static int pxad_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+
+	if (chan->desc_pool)
+		return 1;
+
+	chan->desc_pool = dma_pool_create(dma_chan_name(dchan),
+					  pdev->slave.dev,
+					  sizeof(struct pxad_desc_hw),
+					  __alignof__(struct pxad_desc_hw),
+					  0);
+	if (!chan->desc_pool) {
+		dev_err(&chan->vc.chan.dev->device,
+			"%s(): unable to allocate descriptor pool\n",
+			__func__);
+		return -ENOMEM;
+	}
+
+	return 1;
+}
+
+static void pxad_free_chan_resources(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+
+	vchan_free_chan_resources(&chan->vc);
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+
+}
+
+static void pxad_free_desc(struct virt_dma_desc *vd)
+{
+	int i;
+	dma_addr_t dma;
+	struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd);
+
+	BUG_ON(sw_desc->nb_desc == 0);
+	for (i = sw_desc->nb_desc - 1; i >= 0; i--) {
+		if (i > 0)
+			dma = sw_desc->hw_desc[i - 1]->ddadr;
+		else
+			dma = sw_desc->first;
+		dma_pool_free(sw_desc->desc_pool,
+			      sw_desc->hw_desc[i], dma);
+	}
+	sw_desc->nb_desc = 0;
+	kfree(sw_desc);
+}
+
+static struct pxad_desc_sw *
+pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc)
+{
+	struct pxad_desc_sw *sw_desc;
+	dma_addr_t dma;
+	int i;
+
+	sw_desc = kzalloc(sizeof(*sw_desc) +
+			  nb_hw_desc * sizeof(struct pxad_desc_hw *),
+			  GFP_NOWAIT);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->desc_pool = chan->desc_pool;
+
+	for (i = 0; i < nb_hw_desc; i++) {
+		sw_desc->hw_desc[i] = dma_pool_alloc(sw_desc->desc_pool,
+						     GFP_NOWAIT, &dma);
+		if (!sw_desc->hw_desc[i]) {
+			dev_err(&chan->vc.chan.dev->device,
+				"%s(): Couldn't allocate the %dth hw_desc from dma_pool %p\n",
+				__func__, i, sw_desc->desc_pool);
+			goto err;
+		}
+
+		if (i == 0)
+			sw_desc->first = dma;
+		else
+			sw_desc->hw_desc[i - 1]->ddadr = dma;
+		sw_desc->nb_desc++;
+	}
+
+	return sw_desc;
+err:
+	pxad_free_desc(&sw_desc->vd);
+	return NULL;
+}
+
+static dma_cookie_t pxad_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+	struct pxad_chan *chan = to_pxad_chan(&vc->chan);
+	struct virt_dma_desc *vd_chained = NULL,
+		*vd = container_of(tx, struct virt_dma_desc, tx);
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	set_updater_desc(to_pxad_sw_desc(vd), tx->flags);
+
+	spin_lock_irqsave(&vc->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	if (list_empty(&vc->desc_submitted) && pxad_try_hotchain(vc, vd)) {
+		list_move_tail(&vd->node, &vc->desc_issued);
+		dev_dbg(&chan->vc.chan.dev->device,
+			"%s(): txd %p[%x]: submitted (hot linked)\n",
+			__func__, vd, cookie);
+		goto out;
+	}
+
+	/*
+	 * Fallback to placing the tx in the submitted queue
+	 */
+	if (!list_empty(&vc->desc_submitted)) {
+		vd_chained = list_entry(vc->desc_submitted.prev,
+					struct virt_dma_desc, node);
+		/*
+		 * Only chain the descriptors if no new misalignment is
+		 * introduced. If a new misalignment is chained, let the channel
+		 * stop, and be relaunched in misalign mode from the irq
+		 * handler.
+		 */
+		if (chan->misaligned || !to_pxad_sw_desc(vd)->misaligned)
+			pxad_desc_chain(vd_chained, vd);
+		else
+			vd_chained = NULL;
+	}
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): txd %p[%x]: submitted (%s linked)\n",
+		__func__, vd, cookie, vd_chained ? "cold" : "not");
+	list_move_tail(&vd->node, &vc->desc_submitted);
+	chan->misaligned |= to_pxad_sw_desc(vd)->misaligned;
+
+out:
+	spin_unlock_irqrestore(&vc->lock, flags);
+	return cookie;
+}
+
+static void pxad_issue_pending(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct virt_dma_desc *vd_first;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	if (list_empty(&chan->vc.desc_submitted))
+		goto out;
+
+	vd_first = list_first_entry(&chan->vc.desc_submitted,
+				    struct virt_dma_desc, node);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): txd %p[%x]", __func__, vd_first, vd_first->tx.cookie);
+
+	vchan_issue_pending(&chan->vc);
+	if (!pxad_try_hotchain(&chan->vc, vd_first))
+		pxad_launch_chan(chan, to_pxad_sw_desc(vd_first));
+out:
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static inline struct dma_async_tx_descriptor *
+pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd,
+		 unsigned long tx_flags)
+{
+	struct dma_async_tx_descriptor *tx;
+	struct pxad_chan *chan = container_of(vc, struct pxad_chan, vc);
+
+	tx = vchan_tx_prep(vc, vd, tx_flags);
+	tx->tx_submit = pxad_tx_submit;
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): vc=%p txd=%p[%x] flags=0x%lx\n", __func__,
+		vc, vd, vd->tx.cookie,
+		tx_flags);
+
+	return tx;
+}
+
+static void pxad_get_config(struct pxad_chan *chan,
+			    enum dma_transfer_direction dir,
+			    u32 *dcmd, u32 *dev_src, u32 *dev_dst)
+{
+	u32 maxburst = 0, dev_addr = 0;
+	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+
+	*dcmd = 0;
+	if (chan->cfg.direction == DMA_DEV_TO_MEM) {
+		maxburst = chan->cfg.src_maxburst;
+		width = chan->cfg.src_addr_width;
+		dev_addr = chan->cfg.src_addr;
+		*dev_src = dev_addr;
+		*dcmd |= PXA_DCMD_INCTRGADDR | PXA_DCMD_FLOWSRC;
+	}
+	if (chan->cfg.direction == DMA_MEM_TO_DEV) {
+		maxburst = chan->cfg.dst_maxburst;
+		width = chan->cfg.dst_addr_width;
+		dev_addr = chan->cfg.dst_addr;
+		*dev_dst = dev_addr;
+		*dcmd |= PXA_DCMD_INCSRCADDR | PXA_DCMD_FLOWTRG;
+	}
+	if (chan->cfg.direction == DMA_MEM_TO_MEM)
+		*dcmd |= PXA_DCMD_BURST32 | PXA_DCMD_INCTRGADDR |
+			PXA_DCMD_INCSRCADDR;
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): dev_addr=0x%x maxburst=%d width=%d  dir=%d\n",
+		__func__, dev_addr, maxburst, width, dir);
+
+	if (width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+		*dcmd |= PXA_DCMD_WIDTH1;
+	else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+		*dcmd |= PXA_DCMD_WIDTH2;
+	else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES)
+		*dcmd |= PXA_DCMD_WIDTH4;
+
+	if (maxburst == 8)
+		*dcmd |= PXA_DCMD_BURST8;
+	else if (maxburst == 16)
+		*dcmd |= PXA_DCMD_BURST16;
+	else if (maxburst == 32)
+		*dcmd |= PXA_DCMD_BURST32;
+
+	/* FIXME: drivers should be ported over to use the filter
+	 * function. Once that's done, the following two lines can
+	 * be removed.
+	 */
+	if (chan->cfg.slave_id)
+		chan->drcmr = chan->cfg.slave_id;
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_memcpy(struct dma_chan *dchan,
+		 dma_addr_t dma_dst, dma_addr_t dma_src,
+		 size_t len, unsigned long flags)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	struct pxad_desc_hw *hw_desc;
+	u32 dcmd;
+	unsigned int i, nb_desc = 0;
+	size_t copy;
+
+	if (!dchan || !len)
+		return NULL;
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): dma_dst=0x%lx dma_src=0x%lx len=%zu flags=%lx\n",
+		__func__, (unsigned long)dma_dst, (unsigned long)dma_src,
+		len, flags);
+	pxad_get_config(chan, DMA_MEM_TO_MEM, &dcmd, NULL, NULL);
+
+	nb_desc = DIV_ROUND_UP(len, PDMA_MAX_DESC_BYTES);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->len = len;
+
+	if (!IS_ALIGNED(dma_src, 1 << PDMA_ALIGNMENT) ||
+	    !IS_ALIGNED(dma_dst, 1 << PDMA_ALIGNMENT))
+		sw_desc->misaligned = true;
+
+	i = 0;
+	do {
+		hw_desc = sw_desc->hw_desc[i++];
+		copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES);
+		hw_desc->dcmd = dcmd | (PXA_DCMD_LENGTH & copy);
+		hw_desc->dsadr = dma_src;
+		hw_desc->dtadr = dma_dst;
+		len -= copy;
+		dma_src += copy;
+		dma_dst += copy;
+	} while (len);
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+		   unsigned int sg_len, enum dma_transfer_direction dir,
+		   unsigned long flags, void *context)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	size_t len, avail;
+	struct scatterlist *sg;
+	dma_addr_t dma;
+	u32 dcmd, dsadr = 0, dtadr = 0;
+	unsigned int nb_desc = 0, i, j = 0;
+
+	if ((sgl == NULL) || (sg_len == 0))
+		return NULL;
+
+	pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): dir=%d flags=%lx\n", __func__, dir, flags);
+
+	for_each_sg(sgl, sg, sg_len, i)
+		nb_desc += DIV_ROUND_UP(sg_dma_len(sg), PDMA_MAX_DESC_BYTES);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma = sg_dma_address(sg);
+		avail = sg_dma_len(sg);
+		sw_desc->len += avail;
+
+		do {
+			len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
+			if (dma & 0x7)
+				sw_desc->misaligned = true;
+
+			sw_desc->hw_desc[j]->dcmd =
+				dcmd | (PXA_DCMD_LENGTH & len);
+			sw_desc->hw_desc[j]->dsadr = dsadr ? dsadr : dma;
+			sw_desc->hw_desc[j++]->dtadr = dtadr ? dtadr : dma;
+
+			dma += len;
+			avail -= len;
+		} while (avail);
+	}
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_dma_cyclic(struct dma_chan *dchan,
+		     dma_addr_t buf_addr, size_t len, size_t period_len,
+		     enum dma_transfer_direction dir, unsigned long flags)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	struct pxad_desc_hw **phw_desc;
+	dma_addr_t dma;
+	u32 dcmd, dsadr = 0, dtadr = 0;
+	unsigned int nb_desc = 0;
+
+	if (!dchan || !len || !period_len)
+		return NULL;
+	if ((dir != DMA_DEV_TO_MEM) && (dir != DMA_MEM_TO_DEV)) {
+		dev_err(&chan->vc.chan.dev->device,
+			"Unsupported direction for cyclic DMA\n");
+		return NULL;
+	}
+	/* the buffer length must be a multiple of period_len */
+	if (len % period_len != 0 || period_len > PDMA_MAX_DESC_BYTES ||
+	    !IS_ALIGNED(period_len, 1 << PDMA_ALIGNMENT))
+		return NULL;
+
+	pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr);
+	dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH | period_len);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n",
+		__func__, (unsigned long)buf_addr, len, period_len, dir, flags);
+
+	nb_desc = DIV_ROUND_UP(period_len, PDMA_MAX_DESC_BYTES);
+	nb_desc *= DIV_ROUND_UP(len, period_len);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->cyclic = true;
+	sw_desc->len = len;
+
+	phw_desc = sw_desc->hw_desc;
+	dma = buf_addr;
+	do {
+		phw_desc[0]->dsadr = dsadr ? dsadr : dma;
+		phw_desc[0]->dtadr = dtadr ? dtadr : dma;
+		phw_desc[0]->dcmd = dcmd;
+		phw_desc++;
+		dma += period_len;
+		len -= period_len;
+	} while (len);
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static int pxad_config(struct dma_chan *dchan,
+		       struct dma_slave_config *cfg)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+
+	if (!dchan)
+		return -EINVAL;
+
+	chan->cfg = *cfg;
+	return 0;
+}
+
+static int pxad_terminate_all(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+	struct virt_dma_desc *vd = NULL;
+	unsigned long flags;
+	struct pxad_phy *phy;
+	LIST_HEAD(head);
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): vchan %p: terminate all\n", __func__, &chan->vc);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	vchan_get_all_descriptors(&chan->vc, &head);
+
+	list_for_each_entry(vd, &head, node) {
+		dev_dbg(&chan->vc.chan.dev->device,
+			"%s(): cancelling txd %p[%x] (completed=%d)", __func__,
+			vd, vd->tx.cookie, is_desc_completed(vd));
+	}
+
+	phy = chan->phy;
+	if (phy) {
+		phy_disable(chan->phy);
+		pxad_free_phy(chan);
+		chan->phy = NULL;
+		spin_lock(&pdev->phy_lock);
+		phy->vchan = NULL;
+		spin_unlock(&pdev->phy_lock);
+	}
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+	vchan_dma_desc_free_list(&chan->vc, &head);
+
+	return 0;
+}
+
+static unsigned int pxad_residue(struct pxad_chan *chan,
+				 dma_cookie_t cookie)
+{
+	struct virt_dma_desc *vd = NULL;
+	struct pxad_desc_sw *sw_desc = NULL;
+	struct pxad_desc_hw *hw_desc = NULL;
+	u32 curr, start, len, end, residue = 0;
+	unsigned long flags;
+	bool passed = false;
+	int i;
+
+	/*
+	 * If the channel does not have a phy pointer anymore, it has already
+	 * been completed. Therefore, its residue is 0.
+	 */
+	if (!chan->phy)
+		return 0;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	vd = vchan_find_desc(&chan->vc, cookie);
+	if (!vd)
+		goto out;
+
+	sw_desc = to_pxad_sw_desc(vd);
+	if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR)
+		curr = phy_readl_relaxed(chan->phy, DSADR);
+	else
+		curr = phy_readl_relaxed(chan->phy, DTADR);
+
+	for (i = 0; i < sw_desc->nb_desc - 1; i++) {
+		hw_desc = sw_desc->hw_desc[i];
+		if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR)
+			start = hw_desc->dsadr;
+		else
+			start = hw_desc->dtadr;
+		len = hw_desc->dcmd & PXA_DCMD_LENGTH;
+		end = start + len;
+
+		/*
+		 * 'passed' will be latched once we found the descriptor
+		 * which lies inside the boundaries of the curr
+		 * pointer. All descriptors that occur in the list
+		 * _after_ we found that partially handled descriptor
+		 * are still to be processed and are hence added to the
+		 * residual bytes counter.
+		 */
+
+		if (passed) {
+			residue += len;
+		} else if (curr >= start && curr <= end) {
+			residue += end - curr;
+			passed = true;
+		}
+	}
+	if (!passed)
+		residue = sw_desc->len;
+
+out:
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): txd %p[%x] sw_desc=%p: %d\n",
+		__func__, vd, cookie, sw_desc, residue);
+	return residue;
+}
+
+static enum dma_status pxad_tx_status(struct dma_chan *dchan,
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (likely(txstate && (ret != DMA_ERROR)))
+		dma_set_residue(txstate, pxad_residue(chan, cookie));
+
+	return ret;
+}
+
+static void pxad_free_channels(struct dma_device *dmadev)
+{
+	struct pxad_chan *c, *cn;
+
+	list_for_each_entry_safe(c, cn, &dmadev->channels,
+				 vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+	}
+}
+
+static int pxad_remove(struct platform_device *op)
+{
+	struct pxad_device *pdev = platform_get_drvdata(op);
+
+	pxad_cleanup_debugfs(pdev);
+	pxad_free_channels(&pdev->slave);
+	dma_async_device_unregister(&pdev->slave);
+	return 0;
+}
+
+static int pxad_init_phys(struct platform_device *op,
+			  struct pxad_device *pdev,
+			  unsigned int nb_phy_chans)
+{
+	int irq0, irq, nr_irq = 0, i, ret;
+	struct pxad_phy *phy;
+
+	irq0 = platform_get_irq(op, 0);
+	if (irq0 < 0)
+		return irq0;
+
+	pdev->phys = devm_kcalloc(&op->dev, nb_phy_chans,
+				  sizeof(pdev->phys[0]), GFP_KERNEL);
+	if (!pdev->phys)
+		return -ENOMEM;
+
+	for (i = 0; i < nb_phy_chans; i++)
+		if (platform_get_irq(op, i) > 0)
+			nr_irq++;
+
+	for (i = 0; i < nb_phy_chans; i++) {
+		phy = &pdev->phys[i];
+		phy->base = pdev->base;
+		phy->idx = i;
+		irq = platform_get_irq(op, i);
+		if ((nr_irq > 1) && (irq > 0))
+			ret = devm_request_irq(&op->dev, irq,
+					       pxad_chan_handler,
+					       IRQF_SHARED, "pxa-dma", phy);
+		if ((nr_irq == 1) && (i == 0))
+			ret = devm_request_irq(&op->dev, irq0,
+					       pxad_int_handler,
+					       IRQF_SHARED, "pxa-dma", pdev);
+		if (ret) {
+			dev_err(pdev->slave.dev,
+				"%s(): can't request irq %d:%d\n", __func__,
+				irq, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static const struct of_device_id const pxad_dt_ids[] = {
+	{ .compatible = "marvell,pdma-1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, pxad_dt_ids);
+
+static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct pxad_device *d = ofdma->of_dma_data;
+	struct dma_chan *chan;
+
+	chan = dma_get_any_slave_channel(&d->slave);
+	if (!chan)
+		return NULL;
+
+	to_pxad_chan(chan)->drcmr = dma_spec->args[0];
+	to_pxad_chan(chan)->prio = dma_spec->args[1];
+
+	return chan;
+}
+
+static int pxad_init_dmadev(struct platform_device *op,
+			    struct pxad_device *pdev,
+			    unsigned int nr_phy_chans)
+{
+	int ret;
+	unsigned int i;
+	struct pxad_chan *c;
+
+	pdev->nr_chans = nr_phy_chans;
+	INIT_LIST_HEAD(&pdev->slave.channels);
+	pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources;
+	pdev->slave.device_free_chan_resources = pxad_free_chan_resources;
+	pdev->slave.device_tx_status = pxad_tx_status;
+	pdev->slave.device_issue_pending = pxad_issue_pending;
+	pdev->slave.device_config = pxad_config;
+	pdev->slave.device_terminate_all = pxad_terminate_all;
+
+	if (op->dev.coherent_dma_mask)
+		dma_set_mask(&op->dev, op->dev.coherent_dma_mask);
+	else
+		dma_set_mask(&op->dev, DMA_BIT_MASK(32));
+
+	ret = pxad_init_phys(op, pdev, nr_phy_chans);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nr_phy_chans; i++) {
+		c = devm_kzalloc(&op->dev, sizeof(*c), GFP_KERNEL);
+		if (!c)
+			return -ENOMEM;
+		c->vc.desc_free = pxad_free_desc;
+		vchan_init(&c->vc, &pdev->slave);
+	}
+
+	return dma_async_device_register(&pdev->slave);
+}
+
+static int pxad_probe(struct platform_device *op)
+{
+	struct pxad_device *pdev;
+	const struct of_device_id *of_id;
+	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
+	struct resource *iores;
+	int ret, dma_channels = 0;
+	const enum dma_slave_buswidth widths =
+		DMA_SLAVE_BUSWIDTH_1_BYTE   | DMA_SLAVE_BUSWIDTH_2_BYTES |
+		DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+	pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL);
+	if (!pdev)
+		return -ENOMEM;
+
+	spin_lock_init(&pdev->phy_lock);
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	pdev->base = devm_ioremap_resource(&op->dev, iores);
+	if (IS_ERR(pdev->base))
+		return PTR_ERR(pdev->base);
+
+	of_id = of_match_device(pxad_dt_ids, &op->dev);
+	if (of_id)
+		of_property_read_u32(op->dev.of_node, "#dma-channels",
+				     &dma_channels);
+	else if (pdata && pdata->dma_channels)
+		dma_channels = pdata->dma_channels;
+	else
+		dma_channels = 32;	/* default 32 channel */
+
+	dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, pdev->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, pdev->slave.cap_mask);
+	pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy;
+	pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg;
+	pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic;
+
+	pdev->slave.copy_align = PDMA_ALIGNMENT;
+	pdev->slave.src_addr_widths = widths;
+	pdev->slave.dst_addr_widths = widths;
+	pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	pdev->slave.dev = &op->dev;
+	ret = pxad_init_dmadev(op, pdev, dma_channels);
+	if (ret) {
+		dev_err(pdev->slave.dev, "unable to register\n");
+		return ret;
+	}
+
+	if (op->dev.of_node) {
+		/* Device-tree DMA controller registration */
+		ret = of_dma_controller_register(op->dev.of_node,
+						 pxad_dma_xlate, pdev);
+		if (ret < 0) {
+			dev_err(pdev->slave.dev,
+				"of_dma_controller_register failed\n");
+			return ret;
+		}
+	}
+
+	platform_set_drvdata(op, pdev);
+	pxad_init_debugfs(pdev);
+	dev_info(pdev->slave.dev, "initialized %d channels\n", dma_channels);
+	return 0;
+}
+
+static const struct platform_device_id pxad_id_table[] = {
+	{ "pxa-dma", },
+	{ },
+};
+
+static struct platform_driver pxad_driver = {
+	.driver		= {
+		.name	= "pxa-dma",
+		.of_match_table = pxad_dt_ids,
+	},
+	.id_table	= pxad_id_table,
+	.probe		= pxad_probe,
+	.remove		= pxad_remove,
+};
+
+bool pxad_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct pxad_chan *c = to_pxad_chan(chan);
+	struct pxad_param *p = param;
+
+	if (chan->device->dev->driver != &pxad_driver.driver)
+		return false;
+
+	c->drcmr = p->drcmr;
+	c->prio = p->prio;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(pxad_filter_fn);
+
+int pxad_toggle_reserved_channel(int legacy_channel)
+{
+	if (legacy_unavailable & (BIT(legacy_channel)))
+		return -EBUSY;
+	legacy_reserved ^= BIT(legacy_channel);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pxad_toggle_reserved_channel);
+
+module_platform_driver(pxad_driver);
+
+MODULE_DESCRIPTION("Marvell PXA Peripheral DMA Driver");
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 01dcaf21b988..17ccdfd28f37 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -1168,7 +1168,7 @@ static struct soc_data soc_s3c2443 = {
 	.has_clocks = true,
 };
 
-static struct platform_device_id s3c24xx_dma_driver_ids[] = {
+static const struct platform_device_id s3c24xx_dma_driver_ids[] = {
 	{
 		.name		= "s3c2410-dma",
 		.driver_data	= (kernel_ulong_t)&soc_s3c2410,
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index e0302c784ba4..7820d07e7bee 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -183,7 +183,7 @@ struct rcar_dmac {
 	unsigned int n_channels;
 	struct rcar_dmac_chan *channels;
 
-	unsigned long modules[256 / BITS_PER_LONG];
+	DECLARE_BITMAP(modules, 256);
 };
 
 #define to_rcar_dmac(d)		container_of(d, struct rcar_dmac, engine)
diff --git a/drivers/dma/sh/shdma-r8a73a4.c b/drivers/dma/sh/shdma-r8a73a4.c
index 4fb99970a3ea..96ea3828c3eb 100644
--- a/drivers/dma/sh/shdma-r8a73a4.c
+++ b/drivers/dma/sh/shdma-r8a73a4.c
@@ -11,7 +11,7 @@
 
 #include "shdma-arm.h"
 
-const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT;
+static const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT;
 
 static const struct sh_dmae_slave_config dma_slaves[] = {
 	{
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index a1afda43b8ef..8c5186cc9f63 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -23,8 +23,13 @@
 
 #include "dmaengine.h"
 
+#define SIRFSOC_DMA_VER_A7V1                    1
+#define SIRFSOC_DMA_VER_A7V2                    2
+#define SIRFSOC_DMA_VER_A6                      4
+
 #define SIRFSOC_DMA_DESCRIPTORS                 16
 #define SIRFSOC_DMA_CHANNELS                    16
+#define SIRFSOC_DMA_TABLE_NUM                   256
 
 #define SIRFSOC_DMA_CH_ADDR                     0x00
 #define SIRFSOC_DMA_CH_XLEN                     0x04
@@ -35,15 +40,44 @@
 #define SIRFSOC_DMA_CH_VALID                    0x140
 #define SIRFSOC_DMA_CH_INT                      0x144
 #define SIRFSOC_DMA_INT_EN                      0x148
-#define SIRFSOC_DMA_INT_EN_CLR			0x14C
+#define SIRFSOC_DMA_INT_EN_CLR                  0x14C
 #define SIRFSOC_DMA_CH_LOOP_CTRL                0x150
-#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR            0x15C
+#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR            0x154
+#define SIRFSOC_DMA_WIDTH_ATLAS7                0x10
+#define SIRFSOC_DMA_VALID_ATLAS7                0x14
+#define SIRFSOC_DMA_INT_ATLAS7                  0x18
+#define SIRFSOC_DMA_INT_EN_ATLAS7               0x1c
+#define SIRFSOC_DMA_LOOP_CTRL_ATLAS7            0x20
+#define SIRFSOC_DMA_CUR_DATA_ADDR               0x34
+#define SIRFSOC_DMA_MUL_ATLAS7                  0x38
+#define SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7         0x158
+#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7     0x15C
+#define SIRFSOC_DMA_IOBG_SCMD_EN		0x800
+#define SIRFSOC_DMA_EARLY_RESP_SET		0x818
+#define SIRFSOC_DMA_EARLY_RESP_CLR		0x81C
 
 #define SIRFSOC_DMA_MODE_CTRL_BIT               4
 #define SIRFSOC_DMA_DIR_CTRL_BIT                5
+#define SIRFSOC_DMA_MODE_CTRL_BIT_ATLAS7        2
+#define SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7       3
+#define SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7         4
+#define SIRFSOC_DMA_TAB_NUM_ATLAS7              7
+#define SIRFSOC_DMA_CHAIN_INT_BIT_ATLAS7        5
+#define SIRFSOC_DMA_CHAIN_FLAG_SHIFT_ATLAS7     25
+#define SIRFSOC_DMA_CHAIN_ADDR_SHIFT            32
+
+#define SIRFSOC_DMA_INT_FINI_INT_ATLAS7         BIT(0)
+#define SIRFSOC_DMA_INT_CNT_INT_ATLAS7          BIT(1)
+#define SIRFSOC_DMA_INT_PAU_INT_ATLAS7          BIT(2)
+#define SIRFSOC_DMA_INT_LOOP_INT_ATLAS7         BIT(3)
+#define SIRFSOC_DMA_INT_INV_INT_ATLAS7          BIT(4)
+#define SIRFSOC_DMA_INT_END_INT_ATLAS7          BIT(5)
+#define SIRFSOC_DMA_INT_ALL_ATLAS7              0x3F
 
 /* xlen and dma_width register is in 4 bytes boundary */
 #define SIRFSOC_DMA_WORD_LEN			4
+#define SIRFSOC_DMA_XLEN_MAX_V1         0x800
+#define SIRFSOC_DMA_XLEN_MAX_V2         0x1000
 
 struct sirfsoc_dma_desc {
 	struct dma_async_tx_descriptor	desc;
@@ -56,7 +90,9 @@ struct sirfsoc_dma_desc {
 	int             width;          /* DMA width */
 	int             dir;
 	bool            cyclic;         /* is loop DMA? */
+	bool            chain;          /* is chain DMA? */
 	u32             addr;		/* DMA buffer address */
+	u64 chain_table[SIRFSOC_DMA_TABLE_NUM]; /* chain tbl */
 };
 
 struct sirfsoc_dma_chan {
@@ -87,10 +123,25 @@ struct sirfsoc_dma {
 	void __iomem			*base;
 	int				irq;
 	struct clk			*clk;
-	bool				is_marco;
+	int				type;
+	void (*exec_desc)(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base);
 	struct sirfsoc_dma_regs		regs_save;
 };
 
+struct sirfsoc_dmadata {
+	void (*exec)(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base);
+	int type;
+};
+
+enum sirfsoc_dma_chain_flag {
+	SIRFSOC_DMA_CHAIN_NORMAL = 0x01,
+	SIRFSOC_DMA_CHAIN_PAUSE = 0x02,
+	SIRFSOC_DMA_CHAIN_LOOP = 0x03,
+	SIRFSOC_DMA_CHAIN_END = 0x04
+};
+
 #define DRV_NAME	"sirfsoc_dma"
 
 static int sirfsoc_dma_runtime_suspend(struct device *dev);
@@ -109,48 +160,105 @@ static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c)
 	return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]);
 }
 
+static void sirfsoc_dma_execute_hw_a7v2(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	if (sdesc->chain) {
+		/* DMA v2 HW chain mode */
+		writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) |
+			       (sdesc->chain <<
+				SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) |
+			       (0x8 << SIRFSOC_DMA_TAB_NUM_ATLAS7) | 0x3,
+			       base + SIRFSOC_DMA_CH_CTRL);
+	} else {
+		/* DMA v2 legacy mode */
+		writel_relaxed(sdesc->xlen, base + SIRFSOC_DMA_CH_XLEN);
+		writel_relaxed(sdesc->ylen, base + SIRFSOC_DMA_CH_YLEN);
+		writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_ATLAS7);
+		writel_relaxed((sdesc->width*((sdesc->ylen+1)>>1)),
+				base + SIRFSOC_DMA_MUL_ATLAS7);
+		writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) |
+			       (sdesc->chain <<
+				SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) |
+			       0x3, base + SIRFSOC_DMA_CH_CTRL);
+	}
+	writel_relaxed(sdesc->chain ? SIRFSOC_DMA_INT_END_INT_ATLAS7 :
+		       (SIRFSOC_DMA_INT_FINI_INT_ATLAS7 |
+			SIRFSOC_DMA_INT_LOOP_INT_ATLAS7),
+		       base + SIRFSOC_DMA_INT_EN_ATLAS7);
+	writel(sdesc->addr, base + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic)
+		writel(0x10001, base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+}
+
+static void sirfsoc_dma_execute_hw_a7v1(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	writel_relaxed(1, base + SIRFSOC_DMA_IOBG_SCMD_EN);
+	writel_relaxed((1 << cid), base + SIRFSOC_DMA_EARLY_RESP_SET);
+	writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4);
+	writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		       (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		       base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) |
+		       (1 << cid), base + SIRFSOC_DMA_INT_EN);
+	writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+		       readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7),
+		       base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7);
+	}
+
+}
+
+static void sirfsoc_dma_execute_hw_a6(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4);
+	writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		       (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		       base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) |
+		       (1 << cid), base + SIRFSOC_DMA_INT_EN);
+	writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+		       readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL),
+		       base + SIRFSOC_DMA_CH_LOOP_CTRL);
+	}
+
+}
+
 /* Execute all queued DMA descriptors */
 static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan)
 {
 	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
 	int cid = schan->chan.chan_id;
 	struct sirfsoc_dma_desc *sdesc = NULL;
+	void __iomem *base;
 
 	/*
 	 * lock has been held by functions calling this, so we don't hold
 	 * lock again
 	 */
-
+	base = sdma->base;
 	sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc,
-		node);
+				 node);
 	/* Move the first queued descriptor to active list */
 	list_move_tail(&sdesc->node, &schan->active);
 
-	/* Start the DMA transfer */
-	writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 +
-		cid * 4);
-	writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
-		(sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
-		sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
-	writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 +
-		SIRFSOC_DMA_CH_XLEN);
-	writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 +
-		SIRFSOC_DMA_CH_YLEN);
-	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) |
-		(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2)
+		cid = 0;
 
-	/*
-	 * writel has an implict memory write barrier to make sure data is
-	 * flushed into memory before starting DMA
-	 */
-	writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	/* Start the DMA transfer */
+	sdma->exec_desc(sdesc, cid, schan->mode, base);
 
-	if (sdesc->cyclic) {
-		writel((1 << cid) | 1 << (cid + 16) |
-			readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+	if (sdesc->cyclic)
 		schan->happened_cyclic = schan->completed_cyclic = 0;
-	}
 }
 
 /* Interrupt handler */
@@ -160,27 +268,65 @@ static irqreturn_t sirfsoc_dma_irq(int irq, void *data)
 	struct sirfsoc_dma_chan *schan;
 	struct sirfsoc_dma_desc *sdesc = NULL;
 	u32 is;
+	bool chain;
 	int ch;
+	void __iomem *reg;
+
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A6:
+	case SIRFSOC_DMA_VER_A7V1:
+		is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
+		reg = sdma->base + SIRFSOC_DMA_CH_INT;
+		while ((ch = fls(is) - 1) >= 0) {
+			is &= ~(1 << ch);
+			writel_relaxed(1 << ch, reg);
+			schan = &sdma->channels[ch];
+			spin_lock(&schan->lock);
+			sdesc = list_first_entry(&schan->active,
+						 struct sirfsoc_dma_desc, node);
+			if (!sdesc->cyclic) {
+				/* Execute queued descriptors */
+				list_splice_tail_init(&schan->active,
+						      &schan->completed);
+				dma_cookie_complete(&sdesc->desc);
+				if (!list_empty(&schan->queued))
+					sirfsoc_dma_execute(schan);
+			} else
+				schan->happened_cyclic++;
+			spin_unlock(&schan->lock);
+		}
+		break;
 
-	is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
-	while ((ch = fls(is) - 1) >= 0) {
-		is &= ~(1 << ch);
-		writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT);
-		schan = &sdma->channels[ch];
+	case SIRFSOC_DMA_VER_A7V2:
+		is = readl(sdma->base + SIRFSOC_DMA_INT_ATLAS7);
 
+		reg = sdma->base + SIRFSOC_DMA_INT_ATLAS7;
+		writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7, reg);
+		schan = &sdma->channels[0];
 		spin_lock(&schan->lock);
-
-		sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
-			node);
+		sdesc = list_first_entry(&schan->active,
+					 struct sirfsoc_dma_desc, node);
 		if (!sdesc->cyclic) {
-			/* Execute queued descriptors */
-			list_splice_tail_init(&schan->active, &schan->completed);
-			if (!list_empty(&schan->queued))
-				sirfsoc_dma_execute(schan);
-		} else
+			chain = sdesc->chain;
+			if ((chain && (is & SIRFSOC_DMA_INT_END_INT_ATLAS7)) ||
+				(!chain &&
+				(is & SIRFSOC_DMA_INT_FINI_INT_ATLAS7))) {
+				/* Execute queued descriptors */
+				list_splice_tail_init(&schan->active,
+						      &schan->completed);
+				dma_cookie_complete(&sdesc->desc);
+				if (!list_empty(&schan->queued))
+					sirfsoc_dma_execute(schan);
+			}
+		} else if (sdesc->cyclic && (is &
+					SIRFSOC_DMA_INT_LOOP_INT_ATLAS7))
 			schan->happened_cyclic++;
 
 		spin_unlock(&schan->lock);
+		break;
+
+	default:
+		break;
 	}
 
 	/* Schedule tasklet */
@@ -227,16 +373,15 @@ static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma)
 			schan->chan.completed_cookie = last_cookie;
 			spin_unlock_irqrestore(&schan->lock, flags);
 		} else {
-			/* for cyclic channel, desc is always in active list */
-			sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
-				node);
-
-			if (!sdesc || (sdesc && !sdesc->cyclic)) {
-				/* without active cyclic DMA */
+			if (list_empty(&schan->active)) {
 				spin_unlock_irqrestore(&schan->lock, flags);
 				continue;
 			}
 
+			/* for cyclic channel, desc is always in active list */
+			sdesc = list_first_entry(&schan->active,
+				struct sirfsoc_dma_desc, node);
+
 			/* cyclic DMA */
 			happened_cyclic = schan->happened_cyclic;
 			spin_unlock_irqrestore(&schan->lock, flags);
@@ -307,20 +452,32 @@ static int sirfsoc_dma_terminate_all(struct dma_chan *chan)
 
 	spin_lock_irqsave(&schan->lock, flags);
 
-	if (!sdma->is_marco) {
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
-			~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
-			& ~((1 << cid) | 1 << (cid + 16)),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-	} else {
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
 		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_INT_EN_CLR);
 		writel_relaxed((1 << cid) | 1 << (cid + 16),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR);
+			       sdma->base +
+			       SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7);
+		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_INT_EN_ATLAS7);
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_VALID_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
+			       ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) &
+			       ~((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+		break;
+	default:
+		break;
 	}
 
-	writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
-
 	list_splice_tail_init(&schan->active, &schan->free);
 	list_splice_tail_init(&schan->queued, &schan->free);
 
@@ -338,13 +495,25 @@ static int sirfsoc_dma_pause_chan(struct dma_chan *chan)
 
 	spin_lock_irqsave(&schan->lock, flags);
 
-	if (!sdma->is_marco)
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
-			& ~((1 << cid) | 1 << (cid + 16)),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-	else
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
 		writel_relaxed((1 << cid) | 1 << (cid + 16),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR);
+			       sdma->base +
+			       SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) &
+			       ~((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		break;
+
+	default:
+		break;
+	}
 
 	spin_unlock_irqrestore(&schan->lock, flags);
 
@@ -359,14 +528,25 @@ static int sirfsoc_dma_resume_chan(struct dma_chan *chan)
 	unsigned long flags;
 
 	spin_lock_irqsave(&schan->lock, flags);
-
-	if (!sdma->is_marco)
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
-			| ((1 << cid) | 1 << (cid + 16)),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-	else
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
 		writel_relaxed((1 << cid) | 1 << (cid + 16),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0x10001,
+			       sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) |
+			       ((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		break;
+
+	default:
+		break;
+	}
 
 	spin_unlock_irqrestore(&schan->lock, flags);
 
@@ -473,14 +653,31 @@ sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 
 	spin_lock_irqsave(&schan->lock, flags);
 
-	sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
-			node);
-	dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) *
-		(sdesc->width * SIRFSOC_DMA_WORD_LEN);
+	if (list_empty(&schan->active)) {
+		ret = dma_cookie_status(chan, cookie, txstate);
+		dma_set_residue(txstate, 0);
+		spin_unlock_irqrestore(&schan->lock, flags);
+		return ret;
+	}
+	sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, node);
+	if (sdesc->cyclic)
+		dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) *
+			(sdesc->width * SIRFSOC_DMA_WORD_LEN);
+	else
+		dma_request_bytes = sdesc->xlen * SIRFSOC_DMA_WORD_LEN;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	dma_pos = readl_relaxed(sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR)
-		<< 2;
+
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2)
+		cid = 0;
+
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		dma_pos = readl_relaxed(sdma->base + SIRFSOC_DMA_CUR_DATA_ADDR);
+	} else {
+		dma_pos = readl_relaxed(
+			sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) << 2;
+	}
+
 	residue = dma_request_bytes - (dma_pos - sdesc->addr);
 	dma_set_residue(txstate, residue);
 
@@ -647,6 +844,7 @@ static int sirfsoc_dma_probe(struct platform_device *op)
 	struct dma_device *dma;
 	struct sirfsoc_dma *sdma;
 	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dmadata *data;
 	struct resource res;
 	ulong regs_start, regs_size;
 	u32 id;
@@ -657,9 +855,11 @@ static int sirfsoc_dma_probe(struct platform_device *op)
 		dev_err(dev, "Memory exhausted!\n");
 		return -ENOMEM;
 	}
-
-	if (of_device_is_compatible(dn, "sirf,marco-dmac"))
-		sdma->is_marco = true;
+	data = (struct sirfsoc_dmadata *)
+		(of_match_device(op->dev.driver->of_match_table,
+				 &op->dev)->data);
+	sdma->exec_desc = data->exec;
+	sdma->type = data->type;
 
 	if (of_property_read_u32(dn, "cell-index", &id)) {
 		dev_err(dev, "Fail to get DMAC index\n");
@@ -816,6 +1016,8 @@ static int sirfsoc_dma_pm_suspend(struct device *dev)
 	struct sirfsoc_dma_chan *schan;
 	int ch;
 	int ret;
+	int count;
+	u32 int_offset;
 
 	/*
 	 * if we were runtime-suspended before, resume to enable clock
@@ -827,11 +1029,19 @@ static int sirfsoc_dma_pm_suspend(struct device *dev)
 			return ret;
 	}
 
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		count = 1;
+		int_offset = SIRFSOC_DMA_INT_EN_ATLAS7;
+	} else {
+		count = SIRFSOC_DMA_CHANNELS;
+		int_offset = SIRFSOC_DMA_INT_EN;
+	}
+
 	/*
 	 * DMA controller will lose all registers while suspending
 	 * so we need to save registers for active channels
 	 */
-	for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) {
+	for (ch = 0; ch < count; ch++) {
 		schan = &sdma->channels[ch];
 		if (list_empty(&schan->active))
 			continue;
@@ -841,7 +1051,7 @@ static int sirfsoc_dma_pm_suspend(struct device *dev)
 		save->ctrl[ch] = readl_relaxed(sdma->base +
 			ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
 	}
-	save->interrupt_en = readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN);
+	save->interrupt_en = readl_relaxed(sdma->base + int_offset);
 
 	/* Disable clock */
 	sirfsoc_dma_runtime_suspend(dev);
@@ -857,14 +1067,27 @@ static int sirfsoc_dma_pm_resume(struct device *dev)
 	struct sirfsoc_dma_chan *schan;
 	int ch;
 	int ret;
+	int count;
+	u32 int_offset;
+	u32 width_offset;
 
 	/* Enable clock before accessing register */
 	ret = sirfsoc_dma_runtime_resume(dev);
 	if (ret < 0)
 		return ret;
 
-	writel_relaxed(save->interrupt_en, sdma->base + SIRFSOC_DMA_INT_EN);
-	for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) {
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		count = 1;
+		int_offset = SIRFSOC_DMA_INT_EN_ATLAS7;
+		width_offset = SIRFSOC_DMA_WIDTH_ATLAS7;
+	} else {
+		count = SIRFSOC_DMA_CHANNELS;
+		int_offset = SIRFSOC_DMA_INT_EN;
+		width_offset = SIRFSOC_DMA_WIDTH_0;
+	}
+
+	writel_relaxed(save->interrupt_en, sdma->base + int_offset);
+	for (ch = 0; ch < count; ch++) {
 		schan = &sdma->channels[ch];
 		if (list_empty(&schan->active))
 			continue;
@@ -872,15 +1095,21 @@ static int sirfsoc_dma_pm_resume(struct device *dev)
 			struct sirfsoc_dma_desc,
 			node);
 		writel_relaxed(sdesc->width,
-			sdma->base + SIRFSOC_DMA_WIDTH_0 + ch * 4);
+			sdma->base + width_offset + ch * 4);
 		writel_relaxed(sdesc->xlen,
 			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_XLEN);
 		writel_relaxed(sdesc->ylen,
 			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_YLEN);
 		writel_relaxed(save->ctrl[ch],
 			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
-		writel_relaxed(sdesc->addr >> 2,
-			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR);
+		if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+			writel_relaxed(sdesc->addr,
+				sdma->base + SIRFSOC_DMA_CH_ADDR);
+		} else {
+			writel_relaxed(sdesc->addr >> 2,
+				sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR);
+
+		}
 	}
 
 	/* if we were runtime-suspended before, suspend again */
@@ -896,9 +1125,25 @@ static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume)
 };
 
+struct sirfsoc_dmadata sirfsoc_dmadata_a6 = {
+	.exec = sirfsoc_dma_execute_hw_a6,
+	.type = SIRFSOC_DMA_VER_A6,
+};
+
+struct sirfsoc_dmadata sirfsoc_dmadata_a7v1 = {
+	.exec = sirfsoc_dma_execute_hw_a7v1,
+	.type = SIRFSOC_DMA_VER_A7V1,
+};
+
+struct sirfsoc_dmadata sirfsoc_dmadata_a7v2 = {
+	.exec = sirfsoc_dma_execute_hw_a7v2,
+	.type = SIRFSOC_DMA_VER_A7V2,
+};
+
 static const struct of_device_id sirfsoc_dma_match[] = {
-	{ .compatible = "sirf,prima2-dmac", },
-	{ .compatible = "sirf,marco-dmac", },
+	{ .compatible = "sirf,prima2-dmac", .data = &sirfsoc_dmadata_a6,},
+	{ .compatible = "sirf,atlas7-dmac", .data = &sirfsoc_dmadata_a7v1,},
+	{ .compatible = "sirf,atlas7-dmac-v2", .data = &sirfsoc_dmadata_a7v2,},
 	{},
 };
 
@@ -925,7 +1170,7 @@ static void __exit sirfsoc_dma_exit(void)
 subsys_initcall(sirfsoc_dma_init);
 module_exit(sirfsoc_dma_exit);
 
-MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, "
-	"Barry Song <baohua.song@csr.com>");
+MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>");
+MODULE_AUTHOR("Barry Song <baohua.song@csr.com>");
 MODULE_DESCRIPTION("SIRFSOC DMA control driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c
index 11e536586812..842ff97c2cfb 100644
--- a/drivers/dma/sun6i-dma.c
+++ b/drivers/dma/sun6i-dma.c
@@ -891,9 +891,21 @@ static struct sun6i_dma_config sun8i_a23_dma_cfg = {
 	.nr_max_vchans   = 37,
 };
 
+/*
+ * The H3 has 12 physical channels, a maximum DRQ port id of 27,
+ * and a total of 34 usable source and destination endpoints.
+ */
+
+static struct sun6i_dma_config sun8i_h3_dma_cfg = {
+	.nr_max_channels = 12,
+	.nr_max_requests = 27,
+	.nr_max_vchans   = 34,
+};
+
 static const struct of_device_id sun6i_dma_match[] = {
 	{ .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg },
 	{ .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg },
+	{ .compatible = "allwinner,sun8i-h3-dma", .data = &sun8i_h3_dma_cfg },
 	{ /* sentinel */ }
 };
 
diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c
new file mode 100644
index 000000000000..24f5ca2356bf
--- /dev/null
+++ b/drivers/dma/ti-dma-crossbar.c
@@ -0,0 +1,188 @@
+/*
+ *  Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/io.h>
+#include <linux/idr.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+
+#define TI_XBAR_OUTPUTS	127
+#define TI_XBAR_INPUTS	256
+
+static DEFINE_IDR(map_idr);
+
+struct ti_dma_xbar_data {
+	void __iomem *iomem;
+
+	struct dma_router dmarouter;
+
+	u16 safe_val; /* Value to rest the crossbar lines */
+	u32 xbar_requests; /* number of DMA requests connected to XBAR */
+	u32 dma_requests; /* number of DMA requests forwarded to DMA */
+};
+
+struct ti_dma_xbar_map {
+	u16 xbar_in;
+	int xbar_out;
+};
+
+static inline void ti_dma_xbar_write(void __iomem *iomem, int xbar, u16 val)
+{
+	writew_relaxed(val, iomem + (xbar * 2));
+}
+
+static void ti_dma_xbar_free(struct device *dev, void *route_data)
+{
+	struct ti_dma_xbar_data *xbar = dev_get_drvdata(dev);
+	struct ti_dma_xbar_map *map = route_data;
+
+	dev_dbg(dev, "Unmapping XBAR%u (was routed to %d)\n",
+		map->xbar_in, map->xbar_out);
+
+	ti_dma_xbar_write(xbar->iomem, map->xbar_out, xbar->safe_val);
+	idr_remove(&map_idr, map->xbar_out);
+	kfree(map);
+}
+
+static void *ti_dma_xbar_route_allocate(struct of_phandle_args *dma_spec,
+					struct of_dma *ofdma)
+{
+	struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+	struct ti_dma_xbar_data *xbar = platform_get_drvdata(pdev);
+	struct ti_dma_xbar_map *map;
+
+	if (dma_spec->args[0] >= xbar->xbar_requests) {
+		dev_err(&pdev->dev, "Invalid XBAR request number: %d\n",
+			dma_spec->args[0]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The of_node_put() will be done in the core for the node */
+	dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+	if (!dma_spec->np) {
+		dev_err(&pdev->dev, "Can't get DMA master\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map) {
+		of_node_put(dma_spec->np);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	map->xbar_out = idr_alloc(&map_idr, NULL, 0, xbar->dma_requests,
+				  GFP_KERNEL);
+	map->xbar_in = (u16)dma_spec->args[0];
+
+	/* The DMA request is 1 based in sDMA */
+	dma_spec->args[0] = map->xbar_out + 1;
+
+	dev_dbg(&pdev->dev, "Mapping XBAR%u to DMA%d\n",
+		map->xbar_in, map->xbar_out);
+
+	ti_dma_xbar_write(xbar->iomem, map->xbar_out, map->xbar_in);
+
+	return map;
+}
+
+static int ti_dma_xbar_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct device_node *dma_node;
+	struct ti_dma_xbar_data *xbar;
+	struct resource *res;
+	u32 safe_val;
+	void __iomem *iomem;
+	int i, ret;
+
+	if (!node)
+		return -ENODEV;
+
+	xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL);
+	if (!xbar)
+		return -ENOMEM;
+
+	dma_node = of_parse_phandle(node, "dma-masters", 0);
+	if (!dma_node) {
+		dev_err(&pdev->dev, "Can't get DMA master node\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_u32(dma_node, "dma-requests",
+				 &xbar->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR output information, using %u.\n",
+			 TI_XBAR_OUTPUTS);
+		xbar->dma_requests = TI_XBAR_OUTPUTS;
+	}
+	of_node_put(dma_node);
+
+	if (of_property_read_u32(node, "dma-requests", &xbar->xbar_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR input information, using %u.\n",
+			 TI_XBAR_INPUTS);
+		xbar->xbar_requests = TI_XBAR_INPUTS;
+	}
+
+	if (!of_property_read_u32(node, "ti,dma-safe-map", &safe_val))
+		xbar->safe_val = (u16)safe_val;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	iomem = devm_ioremap_resource(&pdev->dev, res);
+	if (!iomem)
+		return -ENOMEM;
+
+	xbar->iomem = iomem;
+
+	xbar->dmarouter.dev = &pdev->dev;
+	xbar->dmarouter.route_free = ti_dma_xbar_free;
+
+	platform_set_drvdata(pdev, xbar);
+
+	/* Reset the crossbar */
+	for (i = 0; i < xbar->dma_requests; i++)
+		ti_dma_xbar_write(xbar->iomem, i, xbar->safe_val);
+
+	ret = of_dma_router_register(node, ti_dma_xbar_route_allocate,
+				     &xbar->dmarouter);
+	if (ret) {
+		/* Restore the defaults for the crossbar */
+		for (i = 0; i < xbar->dma_requests; i++)
+			ti_dma_xbar_write(xbar->iomem, i, i);
+	}
+
+	return ret;
+}
+
+static const struct of_device_id ti_dma_xbar_match[] = {
+	{ .compatible = "ti,dra7-dma-crossbar" },
+	{},
+};
+
+static struct platform_driver ti_dma_xbar_driver = {
+	.driver = {
+		.name = "ti-dma-crossbar",
+		.of_match_table = of_match_ptr(ti_dma_xbar_match),
+	},
+	.probe	= ti_dma_xbar_probe,
+};
+
+int omap_dmaxbar_init(void)
+{
+	return platform_driver_register(&ti_dma_xbar_driver);
+}
+arch_initcall(omap_dmaxbar_init);
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index 6f80432a3f0a..7d2c17d8d30f 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -29,7 +29,7 @@ dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx)
 	spin_lock_irqsave(&vc->lock, flags);
 	cookie = dma_cookie_assign(tx);
 
-	list_add_tail(&vd->node, &vc->desc_submitted);
+	list_move_tail(&vd->node, &vc->desc_submitted);
 	spin_unlock_irqrestore(&vc->lock, flags);
 
 	dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n",
@@ -83,8 +83,10 @@ static void vchan_complete(unsigned long arg)
 		cb_data = vd->tx.callback_param;
 
 		list_del(&vd->node);
-
-		vc->desc_free(vd);
+		if (async_tx_test_ack(&vd->tx))
+			list_add(&vd->node, &vc->desc_allocated);
+		else
+			vc->desc_free(vd);
 
 		if (cb)
 			cb(cb_data);
@@ -96,9 +98,13 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
 	while (!list_empty(head)) {
 		struct virt_dma_desc *vd = list_first_entry(head,
 			struct virt_dma_desc, node);
-		list_del(&vd->node);
-		dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
-		vc->desc_free(vd);
+		if (async_tx_test_ack(&vd->tx)) {
+			list_move_tail(&vd->node, &vc->desc_allocated);
+		} else {
+			dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
+			list_del(&vd->node);
+			vc->desc_free(vd);
+		}
 	}
 }
 EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list);
@@ -108,6 +114,7 @@ void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev)
 	dma_cookie_init(&vc->chan);
 
 	spin_lock_init(&vc->lock);
+	INIT_LIST_HEAD(&vc->desc_allocated);
 	INIT_LIST_HEAD(&vc->desc_submitted);
 	INIT_LIST_HEAD(&vc->desc_issued);
 	INIT_LIST_HEAD(&vc->desc_completed);
diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h
index 181b95267866..189e75dbcb15 100644
--- a/drivers/dma/virt-dma.h
+++ b/drivers/dma/virt-dma.h
@@ -29,6 +29,7 @@ struct virt_dma_chan {
 	spinlock_t lock;
 
 	/* protected by vc.lock */
+	struct list_head desc_allocated;
 	struct list_head desc_submitted;
 	struct list_head desc_issued;
 	struct list_head desc_completed;
@@ -55,11 +56,16 @@ static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan
 	struct virt_dma_desc *vd, unsigned long tx_flags)
 {
 	extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *);
+	unsigned long flags;
 
 	dma_async_tx_descriptor_init(&vd->tx, &vc->chan);
 	vd->tx.flags = tx_flags;
 	vd->tx.tx_submit = vchan_tx_submit;
 
+	spin_lock_irqsave(&vc->lock, flags);
+	list_add_tail(&vd->node, &vc->desc_allocated);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
 	return &vd->tx;
 }
 
@@ -122,7 +128,8 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc)
 }
 
 /**
- * vchan_get_all_descriptors - obtain all submitted and issued descriptors
+ * vchan_get_all_descriptors - obtain all allocated, submitted and issued
+ *                             descriptors
  * vc: virtual channel to get descriptors from
  * head: list of descriptors found
  *
@@ -134,6 +141,7 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc)
 static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc,
 	struct list_head *head)
 {
+	list_splice_tail_init(&vc->desc_allocated, head);
 	list_splice_tail_init(&vc->desc_submitted, head);
 	list_splice_tail_init(&vc->desc_issued, head);
 	list_splice_tail_init(&vc->desc_completed, head);
@@ -141,11 +149,14 @@ static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc,
 
 static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
 {
+	struct virt_dma_desc *vd;
 	unsigned long flags;
 	LIST_HEAD(head);
 
 	spin_lock_irqsave(&vc->lock, flags);
 	vchan_get_all_descriptors(vc, &head);
+	list_for_each_entry(vd, &head, node)
+		async_tx_clear_ack(&vd->tx);
 	spin_unlock_irqrestore(&vc->lock, flags);
 
 	vchan_dma_desc_free_list(vc, &head);
diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c
index f52e37502254..620fd55ec766 100755..100644
--- a/drivers/dma/xgene-dma.c
+++ b/drivers/dma/xgene-dma.c
@@ -124,32 +124,8 @@
 #define XGENE_DMA_DESC_ELERR_POS		46
 #define XGENE_DMA_DESC_RTYPE_POS		56
 #define XGENE_DMA_DESC_LERR_POS			60
-#define XGENE_DMA_DESC_FLYBY_POS		4
 #define XGENE_DMA_DESC_BUFLEN_POS		48
 #define XGENE_DMA_DESC_HOENQ_NUM_POS		48
-
-#define XGENE_DMA_DESC_NV_SET(m)		\
-	(((u64 *)(m))[0] |= XGENE_DMA_DESC_NV_BIT)
-#define XGENE_DMA_DESC_IN_SET(m)		\
-	(((u64 *)(m))[0] |= XGENE_DMA_DESC_IN_BIT)
-#define XGENE_DMA_DESC_RTYPE_SET(m, v)		\
-	(((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_RTYPE_POS))
-#define XGENE_DMA_DESC_BUFADDR_SET(m, v)	\
-	(((u64 *)(m))[0] |= (v))
-#define XGENE_DMA_DESC_BUFLEN_SET(m, v)		\
-	(((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_BUFLEN_POS))
-#define XGENE_DMA_DESC_C_SET(m)			\
-	(((u64 *)(m))[1] |= XGENE_DMA_DESC_C_BIT)
-#define XGENE_DMA_DESC_FLYBY_SET(m, v)		\
-	(((u64 *)(m))[2] |= ((v) << XGENE_DMA_DESC_FLYBY_POS))
-#define XGENE_DMA_DESC_MULTI_SET(m, v, i)	\
-	(((u64 *)(m))[2] |= ((u64)(v) << (((i) + 1) * 8)))
-#define XGENE_DMA_DESC_DR_SET(m)		\
-	(((u64 *)(m))[2] |= XGENE_DMA_DESC_DR_BIT)
-#define XGENE_DMA_DESC_DST_ADDR_SET(m, v)	\
-	(((u64 *)(m))[3] |= (v))
-#define XGENE_DMA_DESC_H0ENQ_NUM_SET(m, v)	\
-	(((u64 *)(m))[3] |= ((u64)(v) << XGENE_DMA_DESC_HOENQ_NUM_POS))
 #define XGENE_DMA_DESC_ELERR_RD(m)		\
 	(((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3)
 #define XGENE_DMA_DESC_LERR_RD(m)		\
@@ -158,14 +134,7 @@
 	(((elerr) << 4) | (lerr))
 
 /* X-Gene DMA descriptor empty s/w signature */
-#define XGENE_DMA_DESC_EMPTY_INDEX		0
 #define XGENE_DMA_DESC_EMPTY_SIGNATURE		~0ULL
-#define XGENE_DMA_DESC_SET_EMPTY(m)		\
-	(((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] =	\
-	 XGENE_DMA_DESC_EMPTY_SIGNATURE)
-#define XGENE_DMA_DESC_IS_EMPTY(m)		\
-	(((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] ==	\
-	 XGENE_DMA_DESC_EMPTY_SIGNATURE)
 
 /* X-Gene DMA configurable parameters defines */
 #define XGENE_DMA_RING_NUM		512
@@ -184,7 +153,7 @@
 #define XGENE_DMA_XOR_ALIGNMENT		6	/* 64 Bytes */
 #define XGENE_DMA_MAX_XOR_SRC		5
 #define XGENE_DMA_16K_BUFFER_LEN_CODE	0x0
-#define XGENE_DMA_INVALID_LEN_CODE	0x7800
+#define XGENE_DMA_INVALID_LEN_CODE	0x7800000000000000ULL
 
 /* X-Gene DMA descriptor error codes */
 #define ERR_DESC_AXI			0x01
@@ -214,10 +183,10 @@
 #define ERR_DESC_SRC_INT		0xB
 
 /* X-Gene DMA flyby operation code */
-#define FLYBY_2SRC_XOR			0x8
-#define FLYBY_3SRC_XOR			0x9
-#define FLYBY_4SRC_XOR			0xA
-#define FLYBY_5SRC_XOR			0xB
+#define FLYBY_2SRC_XOR			0x80
+#define FLYBY_3SRC_XOR			0x90
+#define FLYBY_4SRC_XOR			0xA0
+#define FLYBY_5SRC_XOR			0xB0
 
 /* X-Gene DMA SW descriptor flags */
 #define XGENE_DMA_FLAG_64B_DESC		BIT(0)
@@ -238,10 +207,10 @@
 	dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
 
 struct xgene_dma_desc_hw {
-	u64 m0;
-	u64 m1;
-	u64 m2;
-	u64 m3;
+	__le64 m0;
+	__le64 m1;
+	__le64 m2;
+	__le64 m3;
 };
 
 enum xgene_dma_ring_cfgsize {
@@ -388,18 +357,11 @@ static bool is_pq_enabled(struct xgene_dma *pdma)
 	return !(val & XGENE_DMA_PQ_DISABLE_MASK);
 }
 
-static void xgene_dma_cpu_to_le64(u64 *desc, int count)
-{
-	int i;
-
-	for (i = 0; i < count; i++)
-		desc[i] = cpu_to_le64(desc[i]);
-}
-
-static u16 xgene_dma_encode_len(u32 len)
+static u64 xgene_dma_encode_len(size_t len)
 {
 	return (len < XGENE_DMA_MAX_BYTE_CNT) ?
-		len : XGENE_DMA_16K_BUFFER_LEN_CODE;
+		((u64)len << XGENE_DMA_DESC_BUFLEN_POS) :
+		XGENE_DMA_16K_BUFFER_LEN_CODE;
 }
 
 static u8 xgene_dma_encode_xor_flyby(u32 src_cnt)
@@ -424,34 +386,50 @@ static u32 xgene_dma_ring_desc_cnt(struct xgene_dma_ring *ring)
 	return XGENE_DMA_RING_DESC_CNT(ring_state);
 }
 
-static void xgene_dma_set_src_buffer(void *ext8, size_t *len,
+static void xgene_dma_set_src_buffer(__le64 *ext8, size_t *len,
 				     dma_addr_t *paddr)
 {
 	size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ?
 			*len : XGENE_DMA_MAX_BYTE_CNT;
 
-	XGENE_DMA_DESC_BUFADDR_SET(ext8, *paddr);
-	XGENE_DMA_DESC_BUFLEN_SET(ext8, xgene_dma_encode_len(nbytes));
+	*ext8 |= cpu_to_le64(*paddr);
+	*ext8 |= cpu_to_le64(xgene_dma_encode_len(nbytes));
 	*len -= nbytes;
 	*paddr += nbytes;
 }
 
-static void xgene_dma_invalidate_buffer(void *ext8)
+static void xgene_dma_invalidate_buffer(__le64 *ext8)
 {
-	XGENE_DMA_DESC_BUFLEN_SET(ext8, XGENE_DMA_INVALID_LEN_CODE);
+	*ext8 |= cpu_to_le64(XGENE_DMA_INVALID_LEN_CODE);
 }
 
-static void *xgene_dma_lookup_ext8(u64 *desc, int idx)
+static __le64 *xgene_dma_lookup_ext8(struct xgene_dma_desc_hw *desc, int idx)
 {
-	return (idx % 2) ? (desc + idx - 1) : (desc + idx + 1);
+	switch (idx) {
+	case 0:
+		return &desc->m1;
+	case 1:
+		return &desc->m0;
+	case 2:
+		return &desc->m3;
+	case 3:
+		return &desc->m2;
+	default:
+		pr_err("Invalid dma descriptor index\n");
+	}
+
+	return NULL;
 }
 
-static void xgene_dma_init_desc(void *desc, u16 dst_ring_num)
+static void xgene_dma_init_desc(struct xgene_dma_desc_hw *desc,
+				u16 dst_ring_num)
 {
-	XGENE_DMA_DESC_C_SET(desc); /* Coherent IO */
-	XGENE_DMA_DESC_IN_SET(desc);
-	XGENE_DMA_DESC_H0ENQ_NUM_SET(desc, dst_ring_num);
-	XGENE_DMA_DESC_RTYPE_SET(desc, XGENE_DMA_RING_OWNER_DMA);
+	desc->m0 |= cpu_to_le64(XGENE_DMA_DESC_IN_BIT);
+	desc->m0 |= cpu_to_le64((u64)XGENE_DMA_RING_OWNER_DMA <<
+				XGENE_DMA_DESC_RTYPE_POS);
+	desc->m1 |= cpu_to_le64(XGENE_DMA_DESC_C_BIT);
+	desc->m3 |= cpu_to_le64((u64)dst_ring_num <<
+				XGENE_DMA_DESC_HOENQ_NUM_POS);
 }
 
 static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan,
@@ -459,7 +437,7 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan,
 				    dma_addr_t dst, dma_addr_t src,
 				    size_t len)
 {
-	void *desc1, *desc2;
+	struct xgene_dma_desc_hw *desc1, *desc2;
 	int i;
 
 	/* Get 1st descriptor */
@@ -467,23 +445,21 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan,
 	xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
 
 	/* Set destination address */
-	XGENE_DMA_DESC_DR_SET(desc1);
-	XGENE_DMA_DESC_DST_ADDR_SET(desc1, dst);
+	desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT);
+	desc1->m3 |= cpu_to_le64(dst);
 
 	/* Set 1st source address */
-	xgene_dma_set_src_buffer(desc1 + 8, &len, &src);
+	xgene_dma_set_src_buffer(&desc1->m1, &len, &src);
 
-	if (len <= 0) {
-		desc2 = NULL;
-		goto skip_additional_src;
-	}
+	if (!len)
+		return;
 
 	/*
 	 * We need to split this source buffer,
 	 * and need to use 2nd descriptor
 	 */
 	desc2 = &desc_sw->desc2;
-	XGENE_DMA_DESC_NV_SET(desc1);
+	desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT);
 
 	/* Set 2nd to 5th source address */
 	for (i = 0; i < 4 && len; i++)
@@ -496,12 +472,6 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan,
 
 	/* Updated flag that we have prepared 64B descriptor */
 	desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC;
-
-skip_additional_src:
-	/* Hardware stores descriptor in little endian format */
-	xgene_dma_cpu_to_le64(desc1, 4);
-	if (desc2)
-		xgene_dma_cpu_to_le64(desc2, 4);
 }
 
 static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
@@ -510,7 +480,7 @@ static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
 				    u32 src_cnt, size_t *nbytes,
 				    const u8 *scf)
 {
-	void *desc1, *desc2;
+	struct xgene_dma_desc_hw *desc1, *desc2;
 	size_t len = *nbytes;
 	int i;
 
@@ -521,28 +491,24 @@ static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
 	xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
 
 	/* Set destination address */
-	XGENE_DMA_DESC_DR_SET(desc1);
-	XGENE_DMA_DESC_DST_ADDR_SET(desc1, *dst);
+	desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT);
+	desc1->m3 |= cpu_to_le64(*dst);
 
 	/* We have multiple source addresses, so need to set NV bit*/
-	XGENE_DMA_DESC_NV_SET(desc1);
+	desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT);
 
 	/* Set flyby opcode */
-	XGENE_DMA_DESC_FLYBY_SET(desc1, xgene_dma_encode_xor_flyby(src_cnt));
+	desc1->m2 |= cpu_to_le64(xgene_dma_encode_xor_flyby(src_cnt));
 
 	/* Set 1st to 5th source addresses */
 	for (i = 0; i < src_cnt; i++) {
 		len = *nbytes;
-		xgene_dma_set_src_buffer((i == 0) ? (desc1 + 8) :
+		xgene_dma_set_src_buffer((i == 0) ? &desc1->m1 :
 					 xgene_dma_lookup_ext8(desc2, i - 1),
 					 &len, &src[i]);
-		XGENE_DMA_DESC_MULTI_SET(desc1, scf[i], i);
+		desc1->m2 |= cpu_to_le64((scf[i] << ((i + 1) * 8)));
 	}
 
-	/* Hardware stores descriptor in little endian format */
-	xgene_dma_cpu_to_le64(desc1, 4);
-	xgene_dma_cpu_to_le64(desc2, 4);
-
 	/* Update meta data */
 	*nbytes = len;
 	*dst += XGENE_DMA_MAX_BYTE_CNT;
@@ -738,7 +704,7 @@ static int xgene_chan_xfer_request(struct xgene_dma_ring *ring,
  * xgene_chan_xfer_ld_pending - push any pending transactions to hw
  * @chan : X-Gene DMA channel
  *
- * LOCKING: must hold chan->desc_lock
+ * LOCKING: must hold chan->lock
  */
 static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan)
 {
@@ -808,7 +774,8 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
 		desc_hw = &ring->desc_hw[ring->head];
 
 		/* Check if this descriptor has been completed */
-		if (unlikely(XGENE_DMA_DESC_IS_EMPTY(desc_hw)))
+		if (unlikely(le64_to_cpu(desc_hw->m0) ==
+			     XGENE_DMA_DESC_EMPTY_SIGNATURE))
 			break;
 
 		if (++ring->head == ring->slots)
@@ -842,7 +809,7 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
 		iowrite32(-1, ring->cmd);
 
 		/* Mark this hw descriptor as processed */
-		XGENE_DMA_DESC_SET_EMPTY(desc_hw);
+		desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
 
 		xgene_dma_run_tx_complete_actions(chan, desc_sw);
 
@@ -889,7 +856,7 @@ static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan)
  * @chan: X-Gene DMA channel
  * @list: the list to free
  *
- * LOCKING: must hold chan->desc_lock
+ * LOCKING: must hold chan->lock
  */
 static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan,
 				     struct list_head *list)
@@ -900,15 +867,6 @@ static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan,
 		xgene_dma_clean_descriptor(chan, desc);
 }
 
-static void xgene_dma_free_tx_desc_list(struct xgene_dma_chan *chan,
-					struct list_head *list)
-{
-	struct xgene_dma_desc_sw *desc, *_desc;
-
-	list_for_each_entry_safe(desc, _desc, list, node)
-		xgene_dma_clean_descriptor(chan, desc);
-}
-
 static void xgene_dma_free_chan_resources(struct dma_chan *dchan)
 {
 	struct xgene_dma_chan *chan = to_dma_chan(dchan);
@@ -985,7 +943,7 @@ fail:
 	if (!first)
 		return NULL;
 
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+	xgene_dma_free_desc_list(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -1093,7 +1051,7 @@ fail:
 	if (!first)
 		return NULL;
 
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+	xgene_dma_free_desc_list(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -1141,7 +1099,7 @@ fail:
 	if (!first)
 		return NULL;
 
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+	xgene_dma_free_desc_list(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -1218,7 +1176,7 @@ fail:
 	if (!first)
 		return NULL;
 
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+	xgene_dma_free_desc_list(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -1316,7 +1274,6 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring)
 {
 	void *ring_cfg = ring->state;
 	u64 addr = ring->desc_paddr;
-	void *desc;
 	u32 i, val;
 
 	ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE;
@@ -1358,8 +1315,10 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring)
 
 	/* Set empty signature to DMA Rx ring descriptors */
 	for (i = 0; i < ring->slots; i++) {
+		struct xgene_dma_desc_hw *desc;
+
 		desc = &ring->desc_hw[i];
-		XGENE_DMA_DESC_SET_EMPTY(desc);
+		desc->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
 	}
 
 	/* Enable DMA Rx ring interrupt */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8d9f89b4519d..df92d30ca054 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2628,13 +2628,14 @@ errors_show(struct md_rdev *rdev, char *page)
 static ssize_t
 errors_store(struct md_rdev *rdev, const char *buf, size_t len)
 {
-	char *e;
-	unsigned long n = simple_strtoul(buf, &e, 10);
-	if (*buf && (*e == 0 || *e == '\n')) {
-		atomic_set(&rdev->corrected_errors, n);
-		return len;
-	}
-	return -EINVAL;
+	unsigned int n;
+	int rv;
+
+	rv = kstrtouint(buf, 10, &n);
+	if (rv < 0)
+		return rv;
+	atomic_set(&rdev->corrected_errors, n);
+	return len;
 }
 static struct rdev_sysfs_entry rdev_errors =
 __ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
@@ -2651,13 +2652,16 @@ slot_show(struct md_rdev *rdev, char *page)
 static ssize_t
 slot_store(struct md_rdev *rdev, const char *buf, size_t len)
 {
-	char *e;
+	int slot;
 	int err;
-	int slot = simple_strtoul(buf, &e, 10);
+
 	if (strncmp(buf, "none", 4)==0)
 		slot = -1;
-	else if (e==buf || (*e && *e!= '\n'))
-		return -EINVAL;
+	else {
+		err = kstrtouint(buf, 10, (unsigned int *)&slot);
+		if (err < 0)
+			return err;
+	}
 	if (rdev->mddev->pers && slot == -1) {
 		/* Setting 'slot' on an active array requires also
 		 * updating the 'rd%d' link, and communicating
@@ -3542,12 +3546,12 @@ layout_show(struct mddev *mddev, char *page)
 static ssize_t
 layout_store(struct mddev *mddev, const char *buf, size_t len)
 {
-	char *e;
-	unsigned long n = simple_strtoul(buf, &e, 10);
+	unsigned int n;
 	int err;
 
-	if (!*buf || (*e && *e != '\n'))
-		return -EINVAL;
+	err = kstrtouint(buf, 10, &n);
+	if (err < 0)
+		return err;
 	err = mddev_lock(mddev);
 	if (err)
 		return err;
@@ -3591,12 +3595,12 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks);
 static ssize_t
 raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
 {
-	char *e;
+	unsigned int n;
 	int err;
-	unsigned long n = simple_strtoul(buf, &e, 10);
 
-	if (!*buf || (*e && *e != '\n'))
-		return -EINVAL;
+	err = kstrtouint(buf, 10, &n);
+	if (err < 0)
+		return err;
 
 	err = mddev_lock(mddev);
 	if (err)
@@ -3643,12 +3647,12 @@ chunk_size_show(struct mddev *mddev, char *page)
 static ssize_t
 chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
 {
+	unsigned long n;
 	int err;
-	char *e;
-	unsigned long n = simple_strtoul(buf, &e, 10);
 
-	if (!*buf || (*e && *e != '\n'))
-		return -EINVAL;
+	err = kstrtoul(buf, 10, &n);
+	if (err < 0)
+		return err;
 
 	err = mddev_lock(mddev);
 	if (err)
@@ -3686,19 +3690,24 @@ resync_start_show(struct mddev *mddev, char *page)
 static ssize_t
 resync_start_store(struct mddev *mddev, const char *buf, size_t len)
 {
+	unsigned long long n;
 	int err;
-	char *e;
-	unsigned long long n = simple_strtoull(buf, &e, 10);
+
+	if (cmd_match(buf, "none"))
+		n = MaxSector;
+	else {
+		err = kstrtoull(buf, 10, &n);
+		if (err < 0)
+			return err;
+		if (n != (sector_t)n)
+			return -EINVAL;
+	}
 
 	err = mddev_lock(mddev);
 	if (err)
 		return err;
 	if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
 		err = -EBUSY;
-	else if (cmd_match(buf, "none"))
-		n = MaxSector;
-	else if (!*buf || (*e && *e != '\n'))
-		err = -EINVAL;
 
 	if (!err) {
 		mddev->recovery_cp = n;
@@ -3934,14 +3943,14 @@ max_corrected_read_errors_show(struct mddev *mddev, char *page) {
 static ssize_t
 max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
 {
-	char *e;
-	unsigned long n = simple_strtoul(buf, &e, 10);
+	unsigned int n;
+	int rv;
 
-	if (*buf && (*e == 0 || *e == '\n')) {
-		atomic_set(&mddev->max_corr_read_errors, n);
-		return len;
-	}
-	return -EINVAL;
+	rv = kstrtouint(buf, 10, &n);
+	if (rv < 0)
+		return rv;
+	atomic_set(&mddev->max_corr_read_errors, n);
+	return len;
 }
 
 static struct md_sysfs_entry max_corr_read_errors =
@@ -4003,8 +4012,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
 	else
 		rdev = md_import_device(dev, -1, -1);
 
-	if (IS_ERR(rdev))
+	if (IS_ERR(rdev)) {
+		mddev_unlock(mddev);
 		return PTR_ERR(rdev);
+	}
 	err = bind_rdev_to_array(rdev, mddev);
  out:
 	if (err)
@@ -4298,15 +4309,18 @@ sync_min_show(struct mddev *mddev, char *page)
 static ssize_t
 sync_min_store(struct mddev *mddev, const char *buf, size_t len)
 {
-	int min;
-	char *e;
+	unsigned int min;
+	int rv;
+
 	if (strncmp(buf, "system", 6)==0) {
-		mddev->sync_speed_min = 0;
-		return len;
+		min = 0;
+	} else {
+		rv = kstrtouint(buf, 10, &min);
+		if (rv < 0)
+			return rv;
+		if (min == 0)
+			return -EINVAL;
 	}
-	min = simple_strtoul(buf, &e, 10);
-	if (buf == e || (*e && *e != '\n') || min <= 0)
-		return -EINVAL;
 	mddev->sync_speed_min = min;
 	return len;
 }
@@ -4324,15 +4338,18 @@ sync_max_show(struct mddev *mddev, char *page)
 static ssize_t
 sync_max_store(struct mddev *mddev, const char *buf, size_t len)
 {
-	int max;
-	char *e;
+	unsigned int max;
+	int rv;
+
 	if (strncmp(buf, "system", 6)==0) {
-		mddev->sync_speed_max = 0;
-		return len;
+		max = 0;
+	} else {
+		rv = kstrtouint(buf, 10, &max);
+		if (rv < 0)
+			return rv;
+		if (max == 0)
+			return -EINVAL;
 	}
-	max = simple_strtoul(buf, &e, 10);
-	if (buf == e || (*e && *e != '\n') || max <= 0)
-		return -EINVAL;
 	mddev->sync_speed_max = max;
 	return len;
 }
@@ -4515,12 +4532,13 @@ suspend_lo_show(struct mddev *mddev, char *page)
 static ssize_t
 suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
 {
-	char *e;
-	unsigned long long new = simple_strtoull(buf, &e, 10);
-	unsigned long long old;
+	unsigned long long old, new;
 	int err;
 
-	if (buf == e || (*e && *e != '\n'))
+	err = kstrtoull(buf, 10, &new);
+	if (err < 0)
+		return err;
+	if (new != (sector_t)new)
 		return -EINVAL;
 
 	err = mddev_lock(mddev);
@@ -4557,12 +4575,13 @@ suspend_hi_show(struct mddev *mddev, char *page)
 static ssize_t
 suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
 {
-	char *e;
-	unsigned long long new = simple_strtoull(buf, &e, 10);
-	unsigned long long old;
+	unsigned long long old, new;
 	int err;
 
-	if (buf == e || (*e && *e != '\n'))
+	err = kstrtoull(buf, 10, &new);
+	if (err < 0)
+		return err;
+	if (new != (sector_t)new)
 		return -EINVAL;
 
 	err = mddev_lock(mddev);
@@ -4604,11 +4623,13 @@ static ssize_t
 reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
 {
 	struct md_rdev *rdev;
-	char *e;
+	unsigned long long new;
 	int err;
-	unsigned long long new = simple_strtoull(buf, &e, 10);
 
-	if (buf == e || (*e && *e != '\n'))
+	err = kstrtoull(buf, 10, &new);
+	if (err < 0)
+		return err;
+	if (new != (sector_t)new)
 		return -EINVAL;
 	err = mddev_lock(mddev);
 	if (err)
@@ -5157,6 +5178,7 @@ int md_run(struct mddev *mddev)
 		mddev_detach(mddev);
 		if (mddev->private)
 			pers->free(mddev, mddev->private);
+		mddev->private = NULL;
 		module_put(pers->owner);
 		bitmap_destroy(mddev);
 		return err;
@@ -5292,6 +5314,7 @@ static void md_clean(struct mddev *mddev)
 	mddev->changed = 0;
 	mddev->degraded = 0;
 	mddev->safemode = 0;
+	mddev->private = NULL;
 	mddev->merge_check_needed = 0;
 	mddev->bitmap_info.offset = 0;
 	mddev->bitmap_info.default_offset = 0;
@@ -5364,6 +5387,7 @@ static void __md_stop(struct mddev *mddev)
 	mddev->pers = NULL;
 	spin_unlock(&mddev->lock);
 	pers->free(mddev, mddev->private);
+	mddev->private = NULL;
 	if (pers->sync_request && mddev->to_remove == NULL)
 		mddev->to_remove = &md_redundancy_group;
 	module_put(pers->owner);
@@ -6373,7 +6397,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
 	    mddev->ctime         != info->ctime         ||
 	    mddev->level         != info->level         ||
 /*	    mddev->layout        != info->layout        || */
-	    !mddev->persistent	 != info->not_persistent||
+	    mddev->persistent	 != !info->not_persistent ||
 	    mddev->chunk_sectors != info->chunk_size >> 9 ||
 	    /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */
 	    ((state^info->state) & 0xfffffe00)
@@ -8104,6 +8128,15 @@ void md_check_recovery(struct mddev *mddev)
 		int spares = 0;
 
 		if (mddev->ro) {
+			struct md_rdev *rdev;
+			if (!mddev->external && mddev->in_sync)
+				/* 'Blocked' flag not needed as failed devices
+				 * will be recorded if array switched to read/write.
+				 * Leaving it set will prevent the device
+				 * from being removed.
+				 */
+				rdev_for_each(rdev, mddev)
+					clear_bit(Blocked, &rdev->flags);
 			/* On a read-only array we can:
 			 * - remove failed devices
 			 * - add already-in_sync devices if the array itself
@@ -9011,13 +9044,7 @@ static int get_ro(char *buffer, struct kernel_param *kp)
 }
 static int set_ro(const char *val, struct kernel_param *kp)
 {
-	char *e;
-	int num = simple_strtoul(val, &e, 10);
-	if (*val && (*e == '\0' || *e == '\n')) {
-		start_readonly = num;
-		return 0;
-	}
-	return -EINVAL;
+	return kstrtouint(val, 10, (unsigned int *)&start_readonly);
 }
 
 module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 188d8e9a6bdc..940f2f365461 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2099,17 +2099,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 		tbio->bi_rw = WRITE;
 		tbio->bi_private = r10_bio;
 		tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
-
-		for (j=0; j < vcnt ; j++) {
-			tbio->bi_io_vec[j].bv_offset = 0;
-			tbio->bi_io_vec[j].bv_len = PAGE_SIZE;
-
-			memcpy(page_address(tbio->bi_io_vec[j].bv_page),
-			       page_address(fbio->bi_io_vec[j].bv_page),
-			       PAGE_SIZE);
-		}
 		tbio->bi_end_io = end_sync_write;
 
+		bio_copy_data(tbio, fbio);
+
 		d = r10_bio->devs[i].devnum;
 		atomic_inc(&conf->mirrors[d].rdev->nr_pending);
 		atomic_inc(&r10_bio->remaining);
@@ -2124,17 +2117,14 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 	 * that are active
 	 */
 	for (i = 0; i < conf->copies; i++) {
-		int j, d;
+		int d;
 
 		tbio = r10_bio->devs[i].repl_bio;
 		if (!tbio || !tbio->bi_end_io)
 			continue;
 		if (r10_bio->devs[i].bio->bi_end_io != end_sync_write
 		    && r10_bio->devs[i].bio != fbio)
-			for (j = 0; j < vcnt; j++)
-				memcpy(page_address(tbio->bi_io_vec[j].bv_page),
-				       page_address(fbio->bi_io_vec[j].bv_page),
-				       PAGE_SIZE);
+			bio_copy_data(tbio, fbio);
 		d = r10_bio->devs[i].devnum;
 		atomic_inc(&r10_bio->remaining);
 		md_sync_acct(conf->mirrors[d].replacement->bdev,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b6793d2e051f..59e44e99eef3 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -344,7 +344,8 @@ static void release_inactive_stripe_list(struct r5conf *conf,
 					 int hash)
 {
 	int size;
-	bool do_wakeup = false;
+	unsigned long do_wakeup = 0;
+	int i = 0;
 	unsigned long flags;
 
 	if (hash == NR_STRIPE_HASH_LOCKS) {
@@ -365,15 +366,21 @@ static void release_inactive_stripe_list(struct r5conf *conf,
 			    !list_empty(list))
 				atomic_dec(&conf->empty_inactive_list_nr);
 			list_splice_tail_init(list, conf->inactive_list + hash);
-			do_wakeup = true;
+			do_wakeup |= 1 << hash;
 			spin_unlock_irqrestore(conf->hash_locks + hash, flags);
 		}
 		size--;
 		hash--;
 	}
 
+	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
+		if (do_wakeup & (1 << i))
+			wake_up(&conf->wait_for_stripe[i]);
+	}
+
 	if (do_wakeup) {
-		wake_up(&conf->wait_for_stripe);
+		if (atomic_read(&conf->active_stripes) == 0)
+			wake_up(&conf->wait_for_quiescent);
 		if (conf->retry_read_aligned)
 			md_wakeup_thread(conf->mddev->thread);
 	}
@@ -667,15 +674,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
 	spin_lock_irq(conf->hash_locks + hash);
 
 	do {
-		wait_event_lock_irq(conf->wait_for_stripe,
+		wait_event_lock_irq(conf->wait_for_quiescent,
 				    conf->quiesce == 0 || noquiesce,
 				    *(conf->hash_locks + hash));
 		sh = __find_stripe(conf, sector, conf->generation - previous);
 		if (!sh) {
 			if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
 				sh = get_free_stripe(conf, hash);
-				if (!sh && llist_empty(&conf->released_stripes) &&
-				    !test_bit(R5_DID_ALLOC, &conf->cache_state))
+				if (!sh && !test_bit(R5_DID_ALLOC,
+						     &conf->cache_state))
 					set_bit(R5_ALLOC_MORE,
 						&conf->cache_state);
 			}
@@ -684,14 +691,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
 			if (!sh) {
 				set_bit(R5_INACTIVE_BLOCKED,
 					&conf->cache_state);
-				wait_event_lock_irq(
-					conf->wait_for_stripe,
+				wait_event_exclusive_cmd(
+					conf->wait_for_stripe[hash],
 					!list_empty(conf->inactive_list + hash) &&
 					(atomic_read(&conf->active_stripes)
 					 < (conf->max_nr_stripes * 3 / 4)
 					 || !test_bit(R5_INACTIVE_BLOCKED,
 						      &conf->cache_state)),
-					*(conf->hash_locks + hash));
+					spin_unlock_irq(conf->hash_locks + hash),
+					spin_lock_irq(conf->hash_locks + hash));
 				clear_bit(R5_INACTIVE_BLOCKED,
 					  &conf->cache_state);
 			} else {
@@ -716,6 +724,9 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
 		}
 	} while (sh == NULL);
 
+	if (!list_empty(conf->inactive_list + hash))
+		wake_up(&conf->wait_for_stripe[hash]);
+
 	spin_unlock_irq(conf->hash_locks + hash);
 	return sh;
 }
@@ -2177,7 +2188,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 	cnt = 0;
 	list_for_each_entry(nsh, &newstripes, lru) {
 		lock_device_hash_lock(conf, hash);
-		wait_event_cmd(conf->wait_for_stripe,
+		wait_event_exclusive_cmd(conf->wait_for_stripe[hash],
 				    !list_empty(conf->inactive_list + hash),
 				    unlock_device_hash_lock(conf, hash),
 				    lock_device_hash_lock(conf, hash));
@@ -4760,7 +4771,7 @@ static void raid5_align_endio(struct bio *bi, int error)
 					 raid_bi, 0);
 		bio_endio(raid_bi, 0);
 		if (atomic_dec_and_test(&conf->active_aligned_reads))
-			wake_up(&conf->wait_for_stripe);
+			wake_up(&conf->wait_for_quiescent);
 		return;
 	}
 
@@ -4855,7 +4866,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 		align_bi->bi_iter.bi_sector += rdev->data_offset;
 
 		spin_lock_irq(&conf->device_lock);
-		wait_event_lock_irq(conf->wait_for_stripe,
+		wait_event_lock_irq(conf->wait_for_quiescent,
 				    conf->quiesce == 0,
 				    conf->device_lock);
 		atomic_inc(&conf->active_aligned_reads);
@@ -5699,7 +5710,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
 		bio_endio(raid_bio, 0);
 	}
 	if (atomic_dec_and_test(&conf->active_aligned_reads))
-		wake_up(&conf->wait_for_stripe);
+		wake_up(&conf->wait_for_quiescent);
 	return handled;
 }
 
@@ -6433,7 +6444,10 @@ static struct r5conf *setup_conf(struct mddev *mddev)
 		goto abort;
 	spin_lock_init(&conf->device_lock);
 	seqcount_init(&conf->gen_lock);
-	init_waitqueue_head(&conf->wait_for_stripe);
+	init_waitqueue_head(&conf->wait_for_quiescent);
+	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
+		init_waitqueue_head(&conf->wait_for_stripe[i]);
+	}
 	init_waitqueue_head(&conf->wait_for_overlap);
 	INIT_LIST_HEAD(&conf->handle_list);
 	INIT_LIST_HEAD(&conf->hold_list);
@@ -7466,7 +7480,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
 		 * active stripes can drain
 		 */
 		conf->quiesce = 2;
-		wait_event_cmd(conf->wait_for_stripe,
+		wait_event_cmd(conf->wait_for_quiescent,
 				    atomic_read(&conf->active_stripes) == 0 &&
 				    atomic_read(&conf->active_aligned_reads) == 0,
 				    unlock_all_device_hash_locks_irq(conf),
@@ -7480,7 +7494,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
 	case 0: /* re-enable writes */
 		lock_all_device_hash_locks_irq(conf);
 		conf->quiesce = 0;
-		wake_up(&conf->wait_for_stripe);
+		wake_up(&conf->wait_for_quiescent);
 		wake_up(&conf->wait_for_overlap);
 		unlock_all_device_hash_locks_irq(conf);
 		break;
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 896d603ad0da..02c3bf8fbfe7 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -511,7 +511,8 @@ struct r5conf {
 	struct list_head	inactive_list[NR_STRIPE_HASH_LOCKS];
 	atomic_t		empty_inactive_list_nr;
 	struct llist_head	released_stripes;
-	wait_queue_head_t	wait_for_stripe;
+	wait_queue_head_t	wait_for_quiescent;
+	wait_queue_head_t	wait_for_stripe[NR_STRIPE_HASH_LOCKS];
 	wait_queue_head_t	wait_for_overlap;
 	unsigned long		cache_state;
 #define R5_INACTIVE_BLOCKED	1	/* release of inactive stripes blocked,
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
new file mode 100644
index 000000000000..72226acb5c0f
--- /dev/null
+++ b/drivers/nvdimm/Kconfig
@@ -0,0 +1,68 @@
+menuconfig LIBNVDIMM
+	tristate "NVDIMM (Non-Volatile Memory Device) Support"
+	depends on PHYS_ADDR_T_64BIT
+	depends on BLK_DEV
+	help
+	  Generic support for non-volatile memory devices including
+	  ACPI-6-NFIT defined resources.  On platforms that define an
+	  NFIT, or otherwise can discover NVDIMM resources, a libnvdimm
+	  bus is registered to advertise PMEM (persistent memory)
+	  namespaces (/dev/pmemX) and BLK (sliding mmio window(s))
+	  namespaces (/dev/ndblkX.Y). A PMEM namespace refers to a
+	  memory resource that may span multiple DIMMs and support DAX
+	  (see CONFIG_DAX).  A BLK namespace refers to an NVDIMM control
+	  region which exposes an mmio register set for windowed access
+	  mode to non-volatile memory.
+
+if LIBNVDIMM
+
+config BLK_DEV_PMEM
+	tristate "PMEM: Persistent memory block device support"
+	default LIBNVDIMM
+	depends on HAS_IOMEM
+	select ND_BTT if BTT
+	help
+	  Memory ranges for PMEM are described by either an NFIT
+	  (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a
+	  non-standard OEM-specific E820 memory type (type-12, see
+	  CONFIG_X86_PMEM_LEGACY), or it is manually specified by the
+	  'memmap=nn[KMG]!ss[KMG]' kernel command line (see
+	  Documentation/kernel-parameters.txt).  This driver converts
+	  these persistent memory ranges into block devices that are
+	  capable of DAX (direct-access) file system mappings.  See
+	  Documentation/nvdimm/nvdimm.txt for more details.
+
+	  Say Y if you want to use an NVDIMM
+
+config ND_BLK
+	tristate "BLK: Block data window (aperture) device support"
+	default LIBNVDIMM
+	select ND_BTT if BTT
+	help
+	  Support NVDIMMs, or other devices, that implement a BLK-mode
+	  access capability.  BLK-mode access uses memory-mapped-i/o
+	  apertures to access persistent media.
+
+	  Say Y if your platform firmware emits an ACPI.NFIT table
+	  (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
+	  capabilities.
+
+config ND_BTT
+	tristate
+
+config BTT
+	bool "BTT: Block Translation Table (atomic sector updates)"
+	default y if LIBNVDIMM
+	help
+	  The Block Translation Table (BTT) provides atomic sector
+	  update semantics for persistent memory devices, so that
+	  applications that rely on sector writes not being torn (a
+	  guarantee that typical disks provide) can continue to do so.
+	  The BTT manifests itself as an alternate personality for an
+	  NVDIMM namespace, i.e. a namespace can be in raw mode (pmemX,
+	  ndblkX.Y, etc...), or 'sectored' mode, (pmemXs, ndblkX.Ys,
+	  etc...).
+
+	  Select Y if unsure
+
+endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
new file mode 100644
index 000000000000..594bb97c867a
--- /dev/null
+++ b/drivers/nvdimm/Makefile
@@ -0,0 +1,20 @@
+obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
+obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
+obj-$(CONFIG_ND_BTT) += nd_btt.o
+obj-$(CONFIG_ND_BLK) += nd_blk.o
+
+nd_pmem-y := pmem.o
+
+nd_btt-y := btt.o
+
+nd_blk-y := blk.o
+
+libnvdimm-y := core.o
+libnvdimm-y += bus.o
+libnvdimm-y += dimm_devs.o
+libnvdimm-y += dimm.o
+libnvdimm-y += region_devs.o
+libnvdimm-y += region.o
+libnvdimm-y += namespace_devs.o
+libnvdimm-y += label.o
+libnvdimm-$(CONFIG_BTT) += btt_devs.o
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
new file mode 100644
index 000000000000..4f97b248c236
--- /dev/null
+++ b/drivers/nvdimm/blk.c
@@ -0,0 +1,384 @@
+/*
+ * NVDIMM Block Window Driver
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/nd.h>
+#include <linux/sizes.h>
+#include "nd.h"
+
+struct nd_blk_device {
+	struct request_queue *queue;
+	struct gendisk *disk;
+	struct nd_namespace_blk *nsblk;
+	struct nd_blk_region *ndbr;
+	size_t disk_size;
+	u32 sector_size;
+	u32 internal_lbasize;
+};
+
+static int nd_blk_major;
+
+static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev)
+{
+	return blk_dev->nsblk->lbasize - blk_dev->sector_size;
+}
+
+static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
+				resource_size_t ns_offset, unsigned int len)
+{
+	int i;
+
+	for (i = 0; i < nsblk->num_resources; i++) {
+		if (ns_offset < resource_size(nsblk->res[i])) {
+			if (ns_offset + len > resource_size(nsblk->res[i])) {
+				dev_WARN_ONCE(&nsblk->common.dev, 1,
+					"illegal request\n");
+				return SIZE_MAX;
+			}
+			return nsblk->res[i]->start + ns_offset;
+		}
+		ns_offset -= resource_size(nsblk->res[i]);
+	}
+
+	dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n");
+	return SIZE_MAX;
+}
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
+				struct bio_integrity_payload *bip, u64 lba,
+				int rw)
+{
+	unsigned int len = nd_blk_meta_size(blk_dev);
+	resource_size_t	dev_offset, ns_offset;
+	struct nd_namespace_blk *nsblk;
+	struct nd_blk_region *ndbr;
+	int err = 0;
+
+	nsblk = blk_dev->nsblk;
+	ndbr = blk_dev->ndbr;
+	ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size;
+	dev_offset = to_dev_offset(nsblk, ns_offset, len);
+	if (dev_offset == SIZE_MAX)
+		return -EIO;
+
+	while (len) {
+		unsigned int cur_len;
+		struct bio_vec bv;
+		void *iobuf;
+
+		bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
+		/*
+		 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
+		 * .bv_offset already adjusted for iter->bi_bvec_done, and we
+		 * can use those directly
+		 */
+
+		cur_len = min(len, bv.bv_len);
+		iobuf = kmap_atomic(bv.bv_page);
+		err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset,
+				cur_len, rw);
+		kunmap_atomic(iobuf);
+		if (err)
+			return err;
+
+		len -= cur_len;
+		dev_offset += cur_len;
+		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+	}
+
+	return err;
+}
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
+				struct bio_integrity_payload *bip, u64 lba,
+				int rw)
+{
+	return 0;
+}
+#endif
+
+static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
+			struct bio_integrity_payload *bip, struct page *page,
+			unsigned int len, unsigned int off, int rw,
+			sector_t sector)
+{
+	struct nd_blk_region *ndbr = blk_dev->ndbr;
+	resource_size_t	dev_offset, ns_offset;
+	int err = 0;
+	void *iobuf;
+	u64 lba;
+
+	while (len) {
+		unsigned int cur_len;
+
+		/*
+		 * If we don't have an integrity payload, we don't have to
+		 * split the bvec into sectors, as this would cause unnecessary
+		 * Block Window setup/move steps. the do_io routine is capable
+		 * of handling len <= PAGE_SIZE.
+		 */
+		cur_len = bip ? min(len, blk_dev->sector_size) : len;
+
+		lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size);
+		ns_offset = lba * blk_dev->internal_lbasize;
+		dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len);
+		if (dev_offset == SIZE_MAX)
+			return -EIO;
+
+		iobuf = kmap_atomic(page);
+		err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw);
+		kunmap_atomic(iobuf);
+		if (err)
+			return err;
+
+		if (bip) {
+			err = nd_blk_rw_integrity(blk_dev, bip, lba, rw);
+			if (err)
+				return err;
+		}
+		len -= cur_len;
+		off += cur_len;
+		sector += blk_dev->sector_size >> SECTOR_SHIFT;
+	}
+
+	return err;
+}
+
+static void nd_blk_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct block_device *bdev = bio->bi_bdev;
+	struct gendisk *disk = bdev->bd_disk;
+	struct bio_integrity_payload *bip;
+	struct nd_blk_device *blk_dev;
+	struct bvec_iter iter;
+	unsigned long start;
+	struct bio_vec bvec;
+	int err = 0, rw;
+	bool do_acct;
+
+	/*
+	 * bio_integrity_enabled also checks if the bio already has an
+	 * integrity payload attached. If it does, we *don't* do a
+	 * bio_integrity_prep here - the payload has been generated by
+	 * another kernel subsystem, and we just pass it through.
+	 */
+	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
+		err = -EIO;
+		goto out;
+	}
+
+	bip = bio_integrity(bio);
+	blk_dev = disk->private_data;
+	rw = bio_data_dir(bio);
+	do_acct = nd_iostat_start(bio, &start);
+	bio_for_each_segment(bvec, bio, iter) {
+		unsigned int len = bvec.bv_len;
+
+		BUG_ON(len > PAGE_SIZE);
+		err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len,
+					bvec.bv_offset, rw, iter.bi_sector);
+		if (err) {
+			dev_info(&blk_dev->nsblk->common.dev,
+					"io error in %s sector %lld, len %d,\n",
+					(rw == READ) ? "READ" : "WRITE",
+					(unsigned long long) iter.bi_sector, len);
+			break;
+		}
+	}
+	if (do_acct)
+		nd_iostat_end(bio, start);
+
+ out:
+	bio_endio(bio, err);
+}
+
+static int nd_blk_rw_bytes(struct nd_namespace_common *ndns,
+		resource_size_t offset, void *iobuf, size_t n, int rw)
+{
+	struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim);
+	struct nd_namespace_blk *nsblk = blk_dev->nsblk;
+	struct nd_blk_region *ndbr = blk_dev->ndbr;
+	resource_size_t	dev_offset;
+
+	dev_offset = to_dev_offset(nsblk, offset, n);
+
+	if (unlikely(offset + n > blk_dev->disk_size)) {
+		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
+		return -EFAULT;
+	}
+
+	if (dev_offset == SIZE_MAX)
+		return -EIO;
+
+	return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw);
+}
+
+static const struct block_device_operations nd_blk_fops = {
+	.owner = THIS_MODULE,
+	.revalidate_disk = nvdimm_revalidate_disk,
+};
+
+static int nd_blk_attach_disk(struct nd_namespace_common *ndns,
+		struct nd_blk_device *blk_dev)
+{
+	resource_size_t available_disk_size;
+	struct gendisk *disk;
+	u64 internal_nlba;
+
+	internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize);
+	available_disk_size = internal_nlba * blk_dev->sector_size;
+
+	blk_dev->queue = blk_alloc_queue(GFP_KERNEL);
+	if (!blk_dev->queue)
+		return -ENOMEM;
+
+	blk_queue_make_request(blk_dev->queue, nd_blk_make_request);
+	blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX);
+	blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY);
+	blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size);
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue);
+
+	disk = blk_dev->disk = alloc_disk(0);
+	if (!disk) {
+		blk_cleanup_queue(blk_dev->queue);
+		return -ENOMEM;
+	}
+
+	disk->driverfs_dev	= &ndns->dev;
+	disk->major		= nd_blk_major;
+	disk->first_minor	= 0;
+	disk->fops		= &nd_blk_fops;
+	disk->private_data	= blk_dev;
+	disk->queue		= blk_dev->queue;
+	disk->flags		= GENHD_FL_EXT_DEVT;
+	nvdimm_namespace_disk_name(ndns, disk->disk_name);
+	set_capacity(disk, 0);
+	add_disk(disk);
+
+	if (nd_blk_meta_size(blk_dev)) {
+		int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev));
+
+		if (rc) {
+			del_gendisk(disk);
+			put_disk(disk);
+			blk_cleanup_queue(blk_dev->queue);
+			return rc;
+		}
+	}
+
+	set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
+	revalidate_disk(disk);
+	return 0;
+}
+
+static int nd_blk_probe(struct device *dev)
+{
+	struct nd_namespace_common *ndns;
+	struct nd_namespace_blk *nsblk;
+	struct nd_blk_device *blk_dev;
+	int rc;
+
+	ndns = nvdimm_namespace_common_probe(dev);
+	if (IS_ERR(ndns))
+		return PTR_ERR(ndns);
+
+	blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL);
+	if (!blk_dev)
+		return -ENOMEM;
+
+	nsblk = to_nd_namespace_blk(&ndns->dev);
+	blk_dev->disk_size = nvdimm_namespace_capacity(ndns);
+	blk_dev->ndbr = to_nd_blk_region(dev->parent);
+	blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev);
+	blk_dev->internal_lbasize = roundup(nsblk->lbasize,
+						INT_LBASIZE_ALIGNMENT);
+	blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512);
+	dev_set_drvdata(dev, blk_dev);
+
+	ndns->rw_bytes = nd_blk_rw_bytes;
+	if (is_nd_btt(dev))
+		rc = nvdimm_namespace_attach_btt(ndns);
+	else if (nd_btt_probe(ndns, blk_dev) == 0) {
+		/* we'll come back as btt-blk */
+		rc = -ENXIO;
+	} else
+		rc = nd_blk_attach_disk(ndns, blk_dev);
+	if (rc)
+		kfree(blk_dev);
+	return rc;
+}
+
+static void nd_blk_detach_disk(struct nd_blk_device *blk_dev)
+{
+	del_gendisk(blk_dev->disk);
+	put_disk(blk_dev->disk);
+	blk_cleanup_queue(blk_dev->queue);
+}
+
+static int nd_blk_remove(struct device *dev)
+{
+	struct nd_blk_device *blk_dev = dev_get_drvdata(dev);
+
+	if (is_nd_btt(dev))
+		nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
+	else
+		nd_blk_detach_disk(blk_dev);
+	kfree(blk_dev);
+
+	return 0;
+}
+
+static struct nd_device_driver nd_blk_driver = {
+	.probe = nd_blk_probe,
+	.remove = nd_blk_remove,
+	.drv = {
+		.name = "nd_blk",
+	},
+	.type = ND_DRIVER_NAMESPACE_BLK,
+};
+
+static int __init nd_blk_init(void)
+{
+	int rc;
+
+	rc = register_blkdev(0, "nd_blk");
+	if (rc < 0)
+		return rc;
+
+	nd_blk_major = rc;
+	rc = nd_driver_register(&nd_blk_driver);
+
+	if (rc < 0)
+		unregister_blkdev(nd_blk_major, "nd_blk");
+
+	return rc;
+}
+
+static void __exit nd_blk_exit(void)
+{
+	driver_unregister(&nd_blk_driver.drv);
+	unregister_blkdev(nd_blk_major, "nd_blk");
+}
+
+MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK);
+module_init(nd_blk_init);
+module_exit(nd_blk_exit);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
new file mode 100644
index 000000000000..411c7b2bb37a
--- /dev/null
+++ b/drivers/nvdimm/btt.c
@@ -0,0 +1,1479 @@
+/*
+ * Block Translation Table
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/highmem.h>
+#include <linux/debugfs.h>
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/hdreg.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/ndctl.h>
+#include <linux/fs.h>
+#include <linux/nd.h>
+#include "btt.h"
+#include "nd.h"
+
+enum log_ent_request {
+	LOG_NEW_ENT = 0,
+	LOG_OLD_ENT
+};
+
+static int btt_major;
+
+static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
+		void *buf, size_t n)
+{
+	struct nd_btt *nd_btt = arena->nd_btt;
+	struct nd_namespace_common *ndns = nd_btt->ndns;
+
+	/* arena offsets are 4K from the base of the device */
+	offset += SZ_4K;
+	return nvdimm_read_bytes(ndns, offset, buf, n);
+}
+
+static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
+		void *buf, size_t n)
+{
+	struct nd_btt *nd_btt = arena->nd_btt;
+	struct nd_namespace_common *ndns = nd_btt->ndns;
+
+	/* arena offsets are 4K from the base of the device */
+	offset += SZ_4K;
+	return nvdimm_write_bytes(ndns, offset, buf, n);
+}
+
+static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
+{
+	int ret;
+
+	ret = arena_write_bytes(arena, arena->info2off, super,
+			sizeof(struct btt_sb));
+	if (ret)
+		return ret;
+
+	return arena_write_bytes(arena, arena->infooff, super,
+			sizeof(struct btt_sb));
+}
+
+static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
+{
+	WARN_ON(!super);
+	return arena_read_bytes(arena, arena->infooff, super,
+			sizeof(struct btt_sb));
+}
+
+/*
+ * 'raw' version of btt_map write
+ * Assumptions:
+ *   mapping is in little-endian
+ *   mapping contains 'E' and 'Z' flags as desired
+ */
+static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping)
+{
+	u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
+
+	WARN_ON(lba >= arena->external_nlba);
+	return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE);
+}
+
+static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
+			u32 z_flag, u32 e_flag)
+{
+	u32 ze;
+	__le32 mapping_le;
+
+	/*
+	 * This 'mapping' is supposed to be just the LBA mapping, without
+	 * any flags set, so strip the flag bits.
+	 */
+	mapping &= MAP_LBA_MASK;
+
+	ze = (z_flag << 1) + e_flag;
+	switch (ze) {
+	case 0:
+		/*
+		 * We want to set neither of the Z or E flags, and
+		 * in the actual layout, this means setting the bit
+		 * positions of both to '1' to indicate a 'normal'
+		 * map entry
+		 */
+		mapping |= MAP_ENT_NORMAL;
+		break;
+	case 1:
+		mapping |= (1 << MAP_ERR_SHIFT);
+		break;
+	case 2:
+		mapping |= (1 << MAP_TRIM_SHIFT);
+		break;
+	default:
+		/*
+		 * The case where Z and E are both sent in as '1' could be
+		 * construed as a valid 'normal' case, but we decide not to,
+		 * to avoid confusion
+		 */
+		WARN_ONCE(1, "Invalid use of Z and E flags\n");
+		return -EIO;
+	}
+
+	mapping_le = cpu_to_le32(mapping);
+	return __btt_map_write(arena, lba, mapping_le);
+}
+
+static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
+			int *trim, int *error)
+{
+	int ret;
+	__le32 in;
+	u32 raw_mapping, postmap, ze, z_flag, e_flag;
+	u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
+
+	WARN_ON(lba >= arena->external_nlba);
+
+	ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE);
+	if (ret)
+		return ret;
+
+	raw_mapping = le32_to_cpu(in);
+
+	z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT;
+	e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT;
+	ze = (z_flag << 1) + e_flag;
+	postmap = raw_mapping & MAP_LBA_MASK;
+
+	/* Reuse the {z,e}_flag variables for *trim and *error */
+	z_flag = 0;
+	e_flag = 0;
+
+	switch (ze) {
+	case 0:
+		/* Initial state. Return postmap = premap */
+		*mapping = lba;
+		break;
+	case 1:
+		*mapping = postmap;
+		e_flag = 1;
+		break;
+	case 2:
+		*mapping = postmap;
+		z_flag = 1;
+		break;
+	case 3:
+		*mapping = postmap;
+		break;
+	default:
+		return -EIO;
+	}
+
+	if (trim)
+		*trim = z_flag;
+	if (error)
+		*error = e_flag;
+
+	return ret;
+}
+
+static int btt_log_read_pair(struct arena_info *arena, u32 lane,
+			struct log_entry *ent)
+{
+	WARN_ON(!ent);
+	return arena_read_bytes(arena,
+			arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
+			2 * LOG_ENT_SIZE);
+}
+
+static struct dentry *debugfs_root;
+
+static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
+				int idx)
+{
+	char dirname[32];
+	struct dentry *d;
+
+	/* If for some reason, parent bttN was not created, exit */
+	if (!parent)
+		return;
+
+	snprintf(dirname, 32, "arena%d", idx);
+	d = debugfs_create_dir(dirname, parent);
+	if (IS_ERR_OR_NULL(d))
+		return;
+	a->debugfs_dir = d;
+
+	debugfs_create_x64("size", S_IRUGO, d, &a->size);
+	debugfs_create_x64("external_lba_start", S_IRUGO, d,
+				&a->external_lba_start);
+	debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba);
+	debugfs_create_u32("internal_lbasize", S_IRUGO, d,
+				&a->internal_lbasize);
+	debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba);
+	debugfs_create_u32("external_lbasize", S_IRUGO, d,
+				&a->external_lbasize);
+	debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree);
+	debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major);
+	debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor);
+	debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff);
+	debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff);
+	debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff);
+	debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff);
+	debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
+	debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
+	debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
+}
+
+static void btt_debugfs_init(struct btt *btt)
+{
+	int i = 0;
+	struct arena_info *arena;
+
+	btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev),
+						debugfs_root);
+	if (IS_ERR_OR_NULL(btt->debugfs_dir))
+		return;
+
+	list_for_each_entry(arena, &btt->arena_list, list) {
+		arena_debugfs_init(arena, btt->debugfs_dir, i);
+		i++;
+	}
+}
+
+/*
+ * This function accepts two log entries, and uses the
+ * sequence number to find the 'older' entry.
+ * It also updates the sequence number in this old entry to
+ * make it the 'new' one if the mark_flag is set.
+ * Finally, it returns which of the entries was the older one.
+ *
+ * TODO The logic feels a bit kludge-y. make it better..
+ */
+static int btt_log_get_old(struct log_entry *ent)
+{
+	int old;
+
+	/*
+	 * the first ever time this is seen, the entry goes into [0]
+	 * the next time, the following logic works out to put this
+	 * (next) entry into [1]
+	 */
+	if (ent[0].seq == 0) {
+		ent[0].seq = cpu_to_le32(1);
+		return 0;
+	}
+
+	if (ent[0].seq == ent[1].seq)
+		return -EINVAL;
+	if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
+		return -EINVAL;
+
+	if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
+		if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
+			old = 0;
+		else
+			old = 1;
+	} else {
+		if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
+			old = 1;
+		else
+			old = 0;
+	}
+
+	return old;
+}
+
+static struct device *to_dev(struct arena_info *arena)
+{
+	return &arena->nd_btt->dev;
+}
+
+/*
+ * This function copies the desired (old/new) log entry into ent if
+ * it is not NULL. It returns the sub-slot number (0 or 1)
+ * where the desired log entry was found. Negative return values
+ * indicate errors.
+ */
+static int btt_log_read(struct arena_info *arena, u32 lane,
+			struct log_entry *ent, int old_flag)
+{
+	int ret;
+	int old_ent, ret_ent;
+	struct log_entry log[2];
+
+	ret = btt_log_read_pair(arena, lane, log);
+	if (ret)
+		return -EIO;
+
+	old_ent = btt_log_get_old(log);
+	if (old_ent < 0 || old_ent > 1) {
+		dev_info(to_dev(arena),
+				"log corruption (%d): lane %d seq [%d, %d]\n",
+			old_ent, lane, log[0].seq, log[1].seq);
+		/* TODO set error state? */
+		return -EIO;
+	}
+
+	ret_ent = (old_flag ? old_ent : (1 - old_ent));
+
+	if (ent != NULL)
+		memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
+
+	return ret_ent;
+}
+
+/*
+ * This function commits a log entry to media
+ * It does _not_ prepare the freelist entry for the next write
+ * btt_flog_write is the wrapper for updating the freelist elements
+ */
+static int __btt_log_write(struct arena_info *arena, u32 lane,
+			u32 sub, struct log_entry *ent)
+{
+	int ret;
+	/*
+	 * Ignore the padding in log_entry for calculating log_half.
+	 * The entry is 'committed' when we write the sequence number,
+	 * and we want to ensure that that is the last thing written.
+	 * We don't bother writing the padding as that would be extra
+	 * media wear and write amplification
+	 */
+	unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
+	u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
+	void *src = ent;
+
+	/* split the 16B write into atomic, durable halves */
+	ret = arena_write_bytes(arena, ns_off, src, log_half);
+	if (ret)
+		return ret;
+
+	ns_off += log_half;
+	src += log_half;
+	return arena_write_bytes(arena, ns_off, src, log_half);
+}
+
+static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
+			struct log_entry *ent)
+{
+	int ret;
+
+	ret = __btt_log_write(arena, lane, sub, ent);
+	if (ret)
+		return ret;
+
+	/* prepare the next free entry */
+	arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
+	if (++(arena->freelist[lane].seq) == 4)
+		arena->freelist[lane].seq = 1;
+	arena->freelist[lane].block = le32_to_cpu(ent->old_map);
+
+	return ret;
+}
+
+/*
+ * This function initializes the BTT map to the initial state, which is
+ * all-zeroes, and indicates an identity mapping
+ */
+static int btt_map_init(struct arena_info *arena)
+{
+	int ret = -EINVAL;
+	void *zerobuf;
+	size_t offset = 0;
+	size_t chunk_size = SZ_2M;
+	size_t mapsize = arena->logoff - arena->mapoff;
+
+	zerobuf = kzalloc(chunk_size, GFP_KERNEL);
+	if (!zerobuf)
+		return -ENOMEM;
+
+	while (mapsize) {
+		size_t size = min(mapsize, chunk_size);
+
+		ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
+				size);
+		if (ret)
+			goto free;
+
+		offset += size;
+		mapsize -= size;
+		cond_resched();
+	}
+
+ free:
+	kfree(zerobuf);
+	return ret;
+}
+
+/*
+ * This function initializes the BTT log with 'fake' entries pointing
+ * to the initial reserved set of blocks as being free
+ */
+static int btt_log_init(struct arena_info *arena)
+{
+	int ret;
+	u32 i;
+	struct log_entry log, zerolog;
+
+	memset(&zerolog, 0, sizeof(zerolog));
+
+	for (i = 0; i < arena->nfree; i++) {
+		log.lba = cpu_to_le32(i);
+		log.old_map = cpu_to_le32(arena->external_nlba + i);
+		log.new_map = cpu_to_le32(arena->external_nlba + i);
+		log.seq = cpu_to_le32(LOG_SEQ_INIT);
+		ret = __btt_log_write(arena, i, 0, &log);
+		if (ret)
+			return ret;
+		ret = __btt_log_write(arena, i, 1, &zerolog);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int btt_freelist_init(struct arena_info *arena)
+{
+	int old, new, ret;
+	u32 i, map_entry;
+	struct log_entry log_new, log_old;
+
+	arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry),
+					GFP_KERNEL);
+	if (!arena->freelist)
+		return -ENOMEM;
+
+	for (i = 0; i < arena->nfree; i++) {
+		old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT);
+		if (old < 0)
+			return old;
+
+		new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT);
+		if (new < 0)
+			return new;
+
+		/* sub points to the next one to be overwritten */
+		arena->freelist[i].sub = 1 - new;
+		arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
+		arena->freelist[i].block = le32_to_cpu(log_new.old_map);
+
+		/* This implies a newly created or untouched flog entry */
+		if (log_new.old_map == log_new.new_map)
+			continue;
+
+		/* Check if map recovery is needed */
+		ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry,
+				NULL, NULL);
+		if (ret)
+			return ret;
+		if ((le32_to_cpu(log_new.new_map) != map_entry) &&
+				(le32_to_cpu(log_new.old_map) == map_entry)) {
+			/*
+			 * Last transaction wrote the flog, but wasn't able
+			 * to complete the map write. So fix up the map.
+			 */
+			ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
+					le32_to_cpu(log_new.new_map), 0, 0);
+			if (ret)
+				return ret;
+		}
+
+	}
+
+	return 0;
+}
+
+static int btt_rtt_init(struct arena_info *arena)
+{
+	arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
+	if (arena->rtt == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int btt_maplocks_init(struct arena_info *arena)
+{
+	u32 i;
+
+	arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock),
+				GFP_KERNEL);
+	if (!arena->map_locks)
+		return -ENOMEM;
+
+	for (i = 0; i < arena->nfree; i++)
+		spin_lock_init(&arena->map_locks[i].lock);
+
+	return 0;
+}
+
+static struct arena_info *alloc_arena(struct btt *btt, size_t size,
+				size_t start, size_t arena_off)
+{
+	struct arena_info *arena;
+	u64 logsize, mapsize, datasize;
+	u64 available = size;
+
+	arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL);
+	if (!arena)
+		return NULL;
+	arena->nd_btt = btt->nd_btt;
+
+	if (!size)
+		return arena;
+
+	arena->size = size;
+	arena->external_lba_start = start;
+	arena->external_lbasize = btt->lbasize;
+	arena->internal_lbasize = roundup(arena->external_lbasize,
+					INT_LBASIZE_ALIGNMENT);
+	arena->nfree = BTT_DEFAULT_NFREE;
+	arena->version_major = 1;
+	arena->version_minor = 1;
+
+	if (available % BTT_PG_SIZE)
+		available -= (available % BTT_PG_SIZE);
+
+	/* Two pages are reserved for the super block and its copy */
+	available -= 2 * BTT_PG_SIZE;
+
+	/* The log takes a fixed amount of space based on nfree */
+	logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
+				BTT_PG_SIZE);
+	available -= logsize;
+
+	/* Calculate optimal split between map and data area */
+	arena->internal_nlba = div_u64(available - BTT_PG_SIZE,
+			arena->internal_lbasize + MAP_ENT_SIZE);
+	arena->external_nlba = arena->internal_nlba - arena->nfree;
+
+	mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE);
+	datasize = available - mapsize;
+
+	/* 'Absolute' values, relative to start of storage space */
+	arena->infooff = arena_off;
+	arena->dataoff = arena->infooff + BTT_PG_SIZE;
+	arena->mapoff = arena->dataoff + datasize;
+	arena->logoff = arena->mapoff + mapsize;
+	arena->info2off = arena->logoff + logsize;
+	return arena;
+}
+
+static void free_arenas(struct btt *btt)
+{
+	struct arena_info *arena, *next;
+
+	list_for_each_entry_safe(arena, next, &btt->arena_list, list) {
+		list_del(&arena->list);
+		kfree(arena->rtt);
+		kfree(arena->map_locks);
+		kfree(arena->freelist);
+		debugfs_remove_recursive(arena->debugfs_dir);
+		kfree(arena);
+	}
+}
+
+/*
+ * This function checks if the metadata layout is valid and error free
+ */
+static int arena_is_valid(struct arena_info *arena, struct btt_sb *super,
+				u8 *uuid, u32 lbasize)
+{
+	u64 checksum;
+
+	if (memcmp(super->uuid, uuid, 16))
+		return 0;
+
+	checksum = le64_to_cpu(super->checksum);
+	super->checksum = 0;
+	if (checksum != nd_btt_sb_checksum(super))
+		return 0;
+	super->checksum = cpu_to_le64(checksum);
+
+	if (lbasize != le32_to_cpu(super->external_lbasize))
+		return 0;
+
+	/* TODO: figure out action for this */
+	if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
+		dev_info(to_dev(arena), "Found arena with an error flag\n");
+
+	return 1;
+}
+
+/*
+ * This function reads an existing valid btt superblock and
+ * populates the corresponding arena_info struct
+ */
+static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super,
+				u64 arena_off)
+{
+	arena->internal_nlba = le32_to_cpu(super->internal_nlba);
+	arena->internal_lbasize = le32_to_cpu(super->internal_lbasize);
+	arena->external_nlba = le32_to_cpu(super->external_nlba);
+	arena->external_lbasize = le32_to_cpu(super->external_lbasize);
+	arena->nfree = le32_to_cpu(super->nfree);
+	arena->version_major = le16_to_cpu(super->version_major);
+	arena->version_minor = le16_to_cpu(super->version_minor);
+
+	arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off +
+			le64_to_cpu(super->nextoff));
+	arena->infooff = arena_off;
+	arena->dataoff = arena_off + le64_to_cpu(super->dataoff);
+	arena->mapoff = arena_off + le64_to_cpu(super->mapoff);
+	arena->logoff = arena_off + le64_to_cpu(super->logoff);
+	arena->info2off = arena_off + le64_to_cpu(super->info2off);
+
+	arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) :
+			(arena->info2off - arena->infooff + BTT_PG_SIZE);
+
+	arena->flags = le32_to_cpu(super->flags);
+}
+
+static int discover_arenas(struct btt *btt)
+{
+	int ret = 0;
+	struct arena_info *arena;
+	struct btt_sb *super;
+	size_t remaining = btt->rawsize;
+	u64 cur_nlba = 0;
+	size_t cur_off = 0;
+	int num_arenas = 0;
+
+	super = kzalloc(sizeof(*super), GFP_KERNEL);
+	if (!super)
+		return -ENOMEM;
+
+	while (remaining) {
+		/* Alloc memory for arena */
+		arena = alloc_arena(btt, 0, 0, 0);
+		if (!arena) {
+			ret = -ENOMEM;
+			goto out_super;
+		}
+
+		arena->infooff = cur_off;
+		ret = btt_info_read(arena, super);
+		if (ret)
+			goto out;
+
+		if (!arena_is_valid(arena, super, btt->nd_btt->uuid,
+				btt->lbasize)) {
+			if (remaining == btt->rawsize) {
+				btt->init_state = INIT_NOTFOUND;
+				dev_info(to_dev(arena), "No existing arenas\n");
+				goto out;
+			} else {
+				dev_info(to_dev(arena),
+						"Found corrupted metadata!\n");
+				ret = -ENODEV;
+				goto out;
+			}
+		}
+
+		arena->external_lba_start = cur_nlba;
+		parse_arena_meta(arena, super, cur_off);
+
+		ret = btt_freelist_init(arena);
+		if (ret)
+			goto out;
+
+		ret = btt_rtt_init(arena);
+		if (ret)
+			goto out;
+
+		ret = btt_maplocks_init(arena);
+		if (ret)
+			goto out;
+
+		list_add_tail(&arena->list, &btt->arena_list);
+
+		remaining -= arena->size;
+		cur_off += arena->size;
+		cur_nlba += arena->external_nlba;
+		num_arenas++;
+
+		if (arena->nextoff == 0)
+			break;
+	}
+	btt->num_arenas = num_arenas;
+	btt->nlba = cur_nlba;
+	btt->init_state = INIT_READY;
+
+	kfree(super);
+	return ret;
+
+ out:
+	kfree(arena);
+	free_arenas(btt);
+ out_super:
+	kfree(super);
+	return ret;
+}
+
+static int create_arenas(struct btt *btt)
+{
+	size_t remaining = btt->rawsize;
+	size_t cur_off = 0;
+
+	while (remaining) {
+		struct arena_info *arena;
+		size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining);
+
+		remaining -= arena_size;
+		if (arena_size < ARENA_MIN_SIZE)
+			break;
+
+		arena = alloc_arena(btt, arena_size, btt->nlba, cur_off);
+		if (!arena) {
+			free_arenas(btt);
+			return -ENOMEM;
+		}
+		btt->nlba += arena->external_nlba;
+		if (remaining >= ARENA_MIN_SIZE)
+			arena->nextoff = arena->size;
+		else
+			arena->nextoff = 0;
+		cur_off += arena_size;
+		list_add_tail(&arena->list, &btt->arena_list);
+	}
+
+	return 0;
+}
+
+/*
+ * This function completes arena initialization by writing
+ * all the metadata.
+ * It is only called for an uninitialized arena when a write
+ * to that arena occurs for the first time.
+ */
+static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
+{
+	int ret;
+	struct btt_sb *super;
+
+	ret = btt_map_init(arena);
+	if (ret)
+		return ret;
+
+	ret = btt_log_init(arena);
+	if (ret)
+		return ret;
+
+	super = kzalloc(sizeof(struct btt_sb), GFP_NOIO);
+	if (!super)
+		return -ENOMEM;
+
+	strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
+	memcpy(super->uuid, uuid, 16);
+	super->flags = cpu_to_le32(arena->flags);
+	super->version_major = cpu_to_le16(arena->version_major);
+	super->version_minor = cpu_to_le16(arena->version_minor);
+	super->external_lbasize = cpu_to_le32(arena->external_lbasize);
+	super->external_nlba = cpu_to_le32(arena->external_nlba);
+	super->internal_lbasize = cpu_to_le32(arena->internal_lbasize);
+	super->internal_nlba = cpu_to_le32(arena->internal_nlba);
+	super->nfree = cpu_to_le32(arena->nfree);
+	super->infosize = cpu_to_le32(sizeof(struct btt_sb));
+	super->nextoff = cpu_to_le64(arena->nextoff);
+	/*
+	 * Subtract arena->infooff (arena start) so numbers are relative
+	 * to 'this' arena
+	 */
+	super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff);
+	super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff);
+	super->logoff = cpu_to_le64(arena->logoff - arena->infooff);
+	super->info2off = cpu_to_le64(arena->info2off - arena->infooff);
+
+	super->flags = 0;
+	super->checksum = cpu_to_le64(nd_btt_sb_checksum(super));
+
+	ret = btt_info_write(arena, super);
+
+	kfree(super);
+	return ret;
+}
+
+/*
+ * This function completes the initialization for the BTT namespace
+ * such that it is ready to accept IOs
+ */
+static int btt_meta_init(struct btt *btt)
+{
+	int ret = 0;
+	struct arena_info *arena;
+
+	mutex_lock(&btt->init_lock);
+	list_for_each_entry(arena, &btt->arena_list, list) {
+		ret = btt_arena_write_layout(arena, btt->nd_btt->uuid);
+		if (ret)
+			goto unlock;
+
+		ret = btt_freelist_init(arena);
+		if (ret)
+			goto unlock;
+
+		ret = btt_rtt_init(arena);
+		if (ret)
+			goto unlock;
+
+		ret = btt_maplocks_init(arena);
+		if (ret)
+			goto unlock;
+	}
+
+	btt->init_state = INIT_READY;
+
+ unlock:
+	mutex_unlock(&btt->init_lock);
+	return ret;
+}
+
+static u32 btt_meta_size(struct btt *btt)
+{
+	return btt->lbasize - btt->sector_size;
+}
+
+/*
+ * This function calculates the arena in which the given LBA lies
+ * by doing a linear walk. This is acceptable since we expect only
+ * a few arenas. If we have backing devices that get much larger,
+ * we can construct a balanced binary tree of arenas at init time
+ * so that this range search becomes faster.
+ */
+static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap,
+				struct arena_info **arena)
+{
+	struct arena_info *arena_list;
+	__u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size);
+
+	list_for_each_entry(arena_list, &btt->arena_list, list) {
+		if (lba < arena_list->external_nlba) {
+			*arena = arena_list;
+			*premap = lba;
+			return 0;
+		}
+		lba -= arena_list->external_nlba;
+	}
+
+	return -EIO;
+}
+
+/*
+ * The following (lock_map, unlock_map) are mostly just to improve
+ * readability, since they index into an array of locks
+ */
+static void lock_map(struct arena_info *arena, u32 premap)
+		__acquires(&arena->map_locks[idx].lock)
+{
+	u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
+
+	spin_lock(&arena->map_locks[idx].lock);
+}
+
+static void unlock_map(struct arena_info *arena, u32 premap)
+		__releases(&arena->map_locks[idx].lock)
+{
+	u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
+
+	spin_unlock(&arena->map_locks[idx].lock);
+}
+
+static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
+{
+	return arena->dataoff + ((u64)lba * arena->internal_lbasize);
+}
+
+static int btt_data_read(struct arena_info *arena, struct page *page,
+			unsigned int off, u32 lba, u32 len)
+{
+	int ret;
+	u64 nsoff = to_namespace_offset(arena, lba);
+	void *mem = kmap_atomic(page);
+
+	ret = arena_read_bytes(arena, nsoff, mem + off, len);
+	kunmap_atomic(mem);
+
+	return ret;
+}
+
+static int btt_data_write(struct arena_info *arena, u32 lba,
+			struct page *page, unsigned int off, u32 len)
+{
+	int ret;
+	u64 nsoff = to_namespace_offset(arena, lba);
+	void *mem = kmap_atomic(page);
+
+	ret = arena_write_bytes(arena, nsoff, mem + off, len);
+	kunmap_atomic(mem);
+
+	return ret;
+}
+
+static void zero_fill_data(struct page *page, unsigned int off, u32 len)
+{
+	void *mem = kmap_atomic(page);
+
+	memset(mem + off, 0, len);
+	kunmap_atomic(mem);
+}
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
+			struct arena_info *arena, u32 postmap, int rw)
+{
+	unsigned int len = btt_meta_size(btt);
+	u64 meta_nsoff;
+	int ret = 0;
+
+	if (bip == NULL)
+		return 0;
+
+	meta_nsoff = to_namespace_offset(arena, postmap) + btt->sector_size;
+
+	while (len) {
+		unsigned int cur_len;
+		struct bio_vec bv;
+		void *mem;
+
+		bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
+		/*
+		 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
+		 * .bv_offset already adjusted for iter->bi_bvec_done, and we
+		 * can use those directly
+		 */
+
+		cur_len = min(len, bv.bv_len);
+		mem = kmap_atomic(bv.bv_page);
+		if (rw)
+			ret = arena_write_bytes(arena, meta_nsoff,
+					mem + bv.bv_offset, cur_len);
+		else
+			ret = arena_read_bytes(arena, meta_nsoff,
+					mem + bv.bv_offset, cur_len);
+
+		kunmap_atomic(mem);
+		if (ret)
+			return ret;
+
+		len -= cur_len;
+		meta_nsoff += cur_len;
+		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+	}
+
+	return ret;
+}
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
+			struct arena_info *arena, u32 postmap, int rw)
+{
+	return 0;
+}
+#endif
+
+static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
+			struct page *page, unsigned int off, sector_t sector,
+			unsigned int len)
+{
+	int ret = 0;
+	int t_flag, e_flag;
+	struct arena_info *arena = NULL;
+	u32 lane = 0, premap, postmap;
+
+	while (len) {
+		u32 cur_len;
+
+		lane = nd_region_acquire_lane(btt->nd_region);
+
+		ret = lba_to_arena(btt, sector, &premap, &arena);
+		if (ret)
+			goto out_lane;
+
+		cur_len = min(btt->sector_size, len);
+
+		ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag);
+		if (ret)
+			goto out_lane;
+
+		/*
+		 * We loop to make sure that the post map LBA didn't change
+		 * from under us between writing the RTT and doing the actual
+		 * read.
+		 */
+		while (1) {
+			u32 new_map;
+
+			if (t_flag) {
+				zero_fill_data(page, off, cur_len);
+				goto out_lane;
+			}
+
+			if (e_flag) {
+				ret = -EIO;
+				goto out_lane;
+			}
+
+			arena->rtt[lane] = RTT_VALID | postmap;
+			/*
+			 * Barrier to make sure this write is not reordered
+			 * to do the verification map_read before the RTT store
+			 */
+			barrier();
+
+			ret = btt_map_read(arena, premap, &new_map, &t_flag,
+						&e_flag);
+			if (ret)
+				goto out_rtt;
+
+			if (postmap == new_map)
+				break;
+
+			postmap = new_map;
+		}
+
+		ret = btt_data_read(arena, page, off, postmap, cur_len);
+		if (ret)
+			goto out_rtt;
+
+		if (bip) {
+			ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
+			if (ret)
+				goto out_rtt;
+		}
+
+		arena->rtt[lane] = RTT_INVALID;
+		nd_region_release_lane(btt->nd_region, lane);
+
+		len -= cur_len;
+		off += cur_len;
+		sector += btt->sector_size >> SECTOR_SHIFT;
+	}
+
+	return 0;
+
+ out_rtt:
+	arena->rtt[lane] = RTT_INVALID;
+ out_lane:
+	nd_region_release_lane(btt->nd_region, lane);
+	return ret;
+}
+
+static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
+			sector_t sector, struct page *page, unsigned int off,
+			unsigned int len)
+{
+	int ret = 0;
+	struct arena_info *arena = NULL;
+	u32 premap = 0, old_postmap, new_postmap, lane = 0, i;
+	struct log_entry log;
+	int sub;
+
+	while (len) {
+		u32 cur_len;
+
+		lane = nd_region_acquire_lane(btt->nd_region);
+
+		ret = lba_to_arena(btt, sector, &premap, &arena);
+		if (ret)
+			goto out_lane;
+		cur_len = min(btt->sector_size, len);
+
+		if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) {
+			ret = -EIO;
+			goto out_lane;
+		}
+
+		new_postmap = arena->freelist[lane].block;
+
+		/* Wait if the new block is being read from */
+		for (i = 0; i < arena->nfree; i++)
+			while (arena->rtt[i] == (RTT_VALID | new_postmap))
+				cpu_relax();
+
+
+		if (new_postmap >= arena->internal_nlba) {
+			ret = -EIO;
+			goto out_lane;
+		}
+
+		ret = btt_data_write(arena, new_postmap, page, off, cur_len);
+		if (ret)
+			goto out_lane;
+
+		if (bip) {
+			ret = btt_rw_integrity(btt, bip, arena, new_postmap,
+						WRITE);
+			if (ret)
+				goto out_lane;
+		}
+
+		lock_map(arena, premap);
+		ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL);
+		if (ret)
+			goto out_map;
+		if (old_postmap >= arena->internal_nlba) {
+			ret = -EIO;
+			goto out_map;
+		}
+
+		log.lba = cpu_to_le32(premap);
+		log.old_map = cpu_to_le32(old_postmap);
+		log.new_map = cpu_to_le32(new_postmap);
+		log.seq = cpu_to_le32(arena->freelist[lane].seq);
+		sub = arena->freelist[lane].sub;
+		ret = btt_flog_write(arena, lane, sub, &log);
+		if (ret)
+			goto out_map;
+
+		ret = btt_map_write(arena, premap, new_postmap, 0, 0);
+		if (ret)
+			goto out_map;
+
+		unlock_map(arena, premap);
+		nd_region_release_lane(btt->nd_region, lane);
+
+		len -= cur_len;
+		off += cur_len;
+		sector += btt->sector_size >> SECTOR_SHIFT;
+	}
+
+	return 0;
+
+ out_map:
+	unlock_map(arena, premap);
+ out_lane:
+	nd_region_release_lane(btt->nd_region, lane);
+	return ret;
+}
+
+static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
+			struct page *page, unsigned int len, unsigned int off,
+			int rw, sector_t sector)
+{
+	int ret;
+
+	if (rw == READ) {
+		ret = btt_read_pg(btt, bip, page, off, sector, len);
+		flush_dcache_page(page);
+	} else {
+		flush_dcache_page(page);
+		ret = btt_write_pg(btt, bip, sector, page, off, len);
+	}
+
+	return ret;
+}
+
+static void btt_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+	struct btt *btt = q->queuedata;
+	struct bvec_iter iter;
+	unsigned long start;
+	struct bio_vec bvec;
+	int err = 0, rw;
+	bool do_acct;
+
+	/*
+	 * bio_integrity_enabled also checks if the bio already has an
+	 * integrity payload attached. If it does, we *don't* do a
+	 * bio_integrity_prep here - the payload has been generated by
+	 * another kernel subsystem, and we just pass it through.
+	 */
+	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
+		err = -EIO;
+		goto out;
+	}
+
+	do_acct = nd_iostat_start(bio, &start);
+	rw = bio_data_dir(bio);
+	bio_for_each_segment(bvec, bio, iter) {
+		unsigned int len = bvec.bv_len;
+
+		BUG_ON(len > PAGE_SIZE);
+		/* Make sure len is in multiples of sector size. */
+		/* XXX is this right? */
+		BUG_ON(len < btt->sector_size);
+		BUG_ON(len % btt->sector_size);
+
+		err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
+				rw, iter.bi_sector);
+		if (err) {
+			dev_info(&btt->nd_btt->dev,
+					"io error in %s sector %lld, len %d,\n",
+					(rw == READ) ? "READ" : "WRITE",
+					(unsigned long long) iter.bi_sector, len);
+			break;
+		}
+	}
+	if (do_acct)
+		nd_iostat_end(bio, start);
+
+out:
+	bio_endio(bio, err);
+}
+
+static int btt_rw_page(struct block_device *bdev, sector_t sector,
+		struct page *page, int rw)
+{
+	struct btt *btt = bdev->bd_disk->private_data;
+
+	btt_do_bvec(btt, NULL, page, PAGE_CACHE_SIZE, 0, rw, sector);
+	page_endio(page, rw & WRITE, 0);
+	return 0;
+}
+
+
+static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo)
+{
+	/* some standard values */
+	geo->heads = 1 << 6;
+	geo->sectors = 1 << 5;
+	geo->cylinders = get_capacity(bd->bd_disk) >> 11;
+	return 0;
+}
+
+static const struct block_device_operations btt_fops = {
+	.owner =		THIS_MODULE,
+	.rw_page =		btt_rw_page,
+	.getgeo =		btt_getgeo,
+	.revalidate_disk =	nvdimm_revalidate_disk,
+};
+
+static int btt_blk_init(struct btt *btt)
+{
+	struct nd_btt *nd_btt = btt->nd_btt;
+	struct nd_namespace_common *ndns = nd_btt->ndns;
+
+	/* create a new disk and request queue for btt */
+	btt->btt_queue = blk_alloc_queue(GFP_KERNEL);
+	if (!btt->btt_queue)
+		return -ENOMEM;
+
+	btt->btt_disk = alloc_disk(0);
+	if (!btt->btt_disk) {
+		blk_cleanup_queue(btt->btt_queue);
+		return -ENOMEM;
+	}
+
+	nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
+	btt->btt_disk->driverfs_dev = &btt->nd_btt->dev;
+	btt->btt_disk->major = btt_major;
+	btt->btt_disk->first_minor = 0;
+	btt->btt_disk->fops = &btt_fops;
+	btt->btt_disk->private_data = btt;
+	btt->btt_disk->queue = btt->btt_queue;
+	btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
+
+	blk_queue_make_request(btt->btt_queue, btt_make_request);
+	blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
+	blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
+	blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
+	btt->btt_queue->queuedata = btt;
+
+	set_capacity(btt->btt_disk, 0);
+	add_disk(btt->btt_disk);
+	if (btt_meta_size(btt)) {
+		int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
+
+		if (rc) {
+			del_gendisk(btt->btt_disk);
+			put_disk(btt->btt_disk);
+			blk_cleanup_queue(btt->btt_queue);
+			return rc;
+		}
+	}
+	set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
+	revalidate_disk(btt->btt_disk);
+
+	return 0;
+}
+
+static void btt_blk_cleanup(struct btt *btt)
+{
+	blk_integrity_unregister(btt->btt_disk);
+	del_gendisk(btt->btt_disk);
+	put_disk(btt->btt_disk);
+	blk_cleanup_queue(btt->btt_queue);
+}
+
+/**
+ * btt_init - initialize a block translation table for the given device
+ * @nd_btt:	device with BTT geometry and backing device info
+ * @rawsize:	raw size in bytes of the backing device
+ * @lbasize:	lba size of the backing device
+ * @uuid:	A uuid for the backing device - this is stored on media
+ * @maxlane:	maximum number of parallel requests the device can handle
+ *
+ * Initialize a Block Translation Table on a backing device to provide
+ * single sector power fail atomicity.
+ *
+ * Context:
+ * Might sleep.
+ *
+ * Returns:
+ * Pointer to a new struct btt on success, NULL on failure.
+ */
+static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
+		u32 lbasize, u8 *uuid, struct nd_region *nd_region)
+{
+	int ret;
+	struct btt *btt;
+	struct device *dev = &nd_btt->dev;
+
+	btt = kzalloc(sizeof(struct btt), GFP_KERNEL);
+	if (!btt)
+		return NULL;
+
+	btt->nd_btt = nd_btt;
+	btt->rawsize = rawsize;
+	btt->lbasize = lbasize;
+	btt->sector_size = ((lbasize >= 4096) ? 4096 : 512);
+	INIT_LIST_HEAD(&btt->arena_list);
+	mutex_init(&btt->init_lock);
+	btt->nd_region = nd_region;
+
+	ret = discover_arenas(btt);
+	if (ret) {
+		dev_err(dev, "init: error in arena_discover: %d\n", ret);
+		goto out_free;
+	}
+
+	if (btt->init_state != INIT_READY && nd_region->ro) {
+		dev_info(dev, "%s is read-only, unable to init btt metadata\n",
+				dev_name(&nd_region->dev));
+		goto out_free;
+	} else if (btt->init_state != INIT_READY) {
+		btt->num_arenas = (rawsize / ARENA_MAX_SIZE) +
+			((rawsize % ARENA_MAX_SIZE) ? 1 : 0);
+		dev_dbg(dev, "init: %d arenas for %llu rawsize\n",
+				btt->num_arenas, rawsize);
+
+		ret = create_arenas(btt);
+		if (ret) {
+			dev_info(dev, "init: create_arenas: %d\n", ret);
+			goto out_free;
+		}
+
+		ret = btt_meta_init(btt);
+		if (ret) {
+			dev_err(dev, "init: error in meta_init: %d\n", ret);
+			goto out_free;
+		}
+	}
+
+	ret = btt_blk_init(btt);
+	if (ret) {
+		dev_err(dev, "init: error in blk_init: %d\n", ret);
+		goto out_free;
+	}
+
+	btt_debugfs_init(btt);
+
+	return btt;
+
+ out_free:
+	kfree(btt);
+	return NULL;
+}
+
+/**
+ * btt_fini - de-initialize a BTT
+ * @btt:	the BTT handle that was generated by btt_init
+ *
+ * De-initialize a Block Translation Table on device removal
+ *
+ * Context:
+ * Might sleep.
+ */
+static void btt_fini(struct btt *btt)
+{
+	if (btt) {
+		btt_blk_cleanup(btt);
+		free_arenas(btt);
+		debugfs_remove_recursive(btt->debugfs_dir);
+		kfree(btt);
+	}
+}
+
+int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
+{
+	struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
+	struct nd_region *nd_region;
+	struct btt *btt;
+	size_t rawsize;
+
+	if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize)
+		return -ENODEV;
+
+	rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K;
+	if (rawsize < ARENA_MIN_SIZE) {
+		return -ENXIO;
+	}
+	nd_region = to_nd_region(nd_btt->dev.parent);
+	btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid,
+			nd_region);
+	if (!btt)
+		return -ENOMEM;
+	nd_btt->btt = btt;
+
+	return 0;
+}
+EXPORT_SYMBOL(nvdimm_namespace_attach_btt);
+
+int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns)
+{
+	struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
+	struct btt *btt = nd_btt->btt;
+
+	btt_fini(btt);
+	nd_btt->btt = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL(nvdimm_namespace_detach_btt);
+
+static int __init nd_btt_init(void)
+{
+	int rc;
+
+	BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K);
+
+	btt_major = register_blkdev(0, "btt");
+	if (btt_major < 0)
+		return btt_major;
+
+	debugfs_root = debugfs_create_dir("btt", NULL);
+	if (IS_ERR_OR_NULL(debugfs_root)) {
+		rc = -ENXIO;
+		goto err_debugfs;
+	}
+
+	return 0;
+
+ err_debugfs:
+	unregister_blkdev(btt_major, "btt");
+
+	return rc;
+}
+
+static void __exit nd_btt_exit(void)
+{
+	debugfs_remove_recursive(debugfs_root);
+	unregister_blkdev(btt_major, "btt");
+}
+
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT);
+MODULE_AUTHOR("Vishal Verma <vishal.l.verma@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
+module_init(nd_btt_init);
+module_exit(nd_btt_exit);
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
new file mode 100644
index 000000000000..75b0d80a6bd9
--- /dev/null
+++ b/drivers/nvdimm/btt.h
@@ -0,0 +1,185 @@
+/*
+ * Block Translation Table library
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_BTT_H
+#define _LINUX_BTT_H
+
+#include <linux/types.h>
+
+#define BTT_SIG_LEN 16
+#define BTT_SIG "BTT_ARENA_INFO\0"
+#define MAP_ENT_SIZE 4
+#define MAP_TRIM_SHIFT 31
+#define MAP_TRIM_MASK (1 << MAP_TRIM_SHIFT)
+#define MAP_ERR_SHIFT 30
+#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
+#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
+#define MAP_ENT_NORMAL 0xC0000000
+#define LOG_ENT_SIZE sizeof(struct log_entry)
+#define ARENA_MIN_SIZE (1UL << 24)	/* 16 MB */
+#define ARENA_MAX_SIZE (1ULL << 39)	/* 512 GB */
+#define RTT_VALID (1UL << 31)
+#define RTT_INVALID 0
+#define BTT_PG_SIZE 4096
+#define BTT_DEFAULT_NFREE ND_MAX_LANES
+#define LOG_SEQ_INIT 1
+
+#define IB_FLAG_ERROR 0x00000001
+#define IB_FLAG_ERROR_MASK 0x00000001
+
+enum btt_init_state {
+	INIT_UNCHECKED = 0,
+	INIT_NOTFOUND,
+	INIT_READY
+};
+
+struct log_entry {
+	__le32 lba;
+	__le32 old_map;
+	__le32 new_map;
+	__le32 seq;
+	__le64 padding[2];
+};
+
+struct btt_sb {
+	u8 signature[BTT_SIG_LEN];
+	u8 uuid[16];
+	u8 parent_uuid[16];
+	__le32 flags;
+	__le16 version_major;
+	__le16 version_minor;
+	__le32 external_lbasize;
+	__le32 external_nlba;
+	__le32 internal_lbasize;
+	__le32 internal_nlba;
+	__le32 nfree;
+	__le32 infosize;
+	__le64 nextoff;
+	__le64 dataoff;
+	__le64 mapoff;
+	__le64 logoff;
+	__le64 info2off;
+	u8 padding[3968];
+	__le64 checksum;
+};
+
+struct free_entry {
+	u32 block;
+	u8 sub;
+	u8 seq;
+};
+
+struct aligned_lock {
+	union {
+		spinlock_t lock;
+		u8 cacheline_padding[L1_CACHE_BYTES];
+	};
+};
+
+/**
+ * struct arena_info - handle for an arena
+ * @size:		Size in bytes this arena occupies on the raw device.
+ *			This includes arena metadata.
+ * @external_lba_start:	The first external LBA in this arena.
+ * @internal_nlba:	Number of internal blocks available in the arena
+ *			including nfree reserved blocks
+ * @internal_lbasize:	Internal and external lba sizes may be different as
+ *			we can round up 'odd' external lbasizes such as 520B
+ *			to be aligned.
+ * @external_nlba:	Number of blocks contributed by the arena to the number
+ *			reported to upper layers. (internal_nlba - nfree)
+ * @external_lbasize:	LBA size as exposed to upper layers.
+ * @nfree:		A reserve number of 'free' blocks that is used to
+ *			handle incoming writes.
+ * @version_major:	Metadata layout version major.
+ * @version_minor:	Metadata layout version minor.
+ * @nextoff:		Offset in bytes to the start of the next arena.
+ * @infooff:		Offset in bytes to the info block of this arena.
+ * @dataoff:		Offset in bytes to the data area of this arena.
+ * @mapoff:		Offset in bytes to the map area of this arena.
+ * @logoff:		Offset in bytes to the log area of this arena.
+ * @info2off:		Offset in bytes to the backup info block of this arena.
+ * @freelist:		Pointer to in-memory list of free blocks
+ * @rtt:		Pointer to in-memory "Read Tracking Table"
+ * @map_locks:		Spinlocks protecting concurrent map writes
+ * @nd_btt:		Pointer to parent nd_btt structure.
+ * @list:		List head for list of arenas
+ * @debugfs_dir:	Debugfs dentry
+ * @flags:		Arena flags - may signify error states.
+ *
+ * arena_info is a per-arena handle. Once an arena is narrowed down for an
+ * IO, this struct is passed around for the duration of the IO.
+ */
+struct arena_info {
+	u64 size;			/* Total bytes for this arena */
+	u64 external_lba_start;
+	u32 internal_nlba;
+	u32 internal_lbasize;
+	u32 external_nlba;
+	u32 external_lbasize;
+	u32 nfree;
+	u16 version_major;
+	u16 version_minor;
+	/* Byte offsets to the different on-media structures */
+	u64 nextoff;
+	u64 infooff;
+	u64 dataoff;
+	u64 mapoff;
+	u64 logoff;
+	u64 info2off;
+	/* Pointers to other in-memory structures for this arena */
+	struct free_entry *freelist;
+	u32 *rtt;
+	struct aligned_lock *map_locks;
+	struct nd_btt *nd_btt;
+	struct list_head list;
+	struct dentry *debugfs_dir;
+	/* Arena flags */
+	u32 flags;
+};
+
+/**
+ * struct btt - handle for a BTT instance
+ * @btt_disk:		Pointer to the gendisk for BTT device
+ * @btt_queue:		Pointer to the request queue for the BTT device
+ * @arena_list:		Head of the list of arenas
+ * @debugfs_dir:	Debugfs dentry
+ * @nd_btt:		Parent nd_btt struct
+ * @nlba:		Number of logical blocks exposed to the	upper layers
+ *			after removing the amount of space needed by metadata
+ * @rawsize:		Total size in bytes of the available backing device
+ * @lbasize:		LBA size as requested and presented to upper layers.
+ *			This is sector_size + size of any metadata.
+ * @sector_size:	The Linux sector size - 512 or 4096
+ * @lanes:		Per-lane spinlocks
+ * @init_lock:		Mutex used for the BTT initialization
+ * @init_state:		Flag describing the initialization state for the BTT
+ * @num_arenas:		Number of arenas in the BTT instance
+ */
+struct btt {
+	struct gendisk *btt_disk;
+	struct request_queue *btt_queue;
+	struct list_head arena_list;
+	struct dentry *debugfs_dir;
+	struct nd_btt *nd_btt;
+	u64 nlba;
+	unsigned long long rawsize;
+	u32 lbasize;
+	u32 sector_size;
+	struct nd_region *nd_region;
+	struct mutex init_lock;
+	int init_state;
+	int num_arenas;
+};
+#endif
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
new file mode 100644
index 000000000000..6ac8c0fea3ec
--- /dev/null
+++ b/drivers/nvdimm/btt_devs.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/blkdev.h>
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-core.h"
+#include "btt.h"
+#include "nd.h"
+
+static void __nd_btt_detach_ndns(struct nd_btt *nd_btt)
+{
+	struct nd_namespace_common *ndns = nd_btt->ndns;
+
+	dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
+			|| ndns->claim != &nd_btt->dev,
+			"%s: invalid claim\n", __func__);
+	ndns->claim = NULL;
+	nd_btt->ndns = NULL;
+	put_device(&ndns->dev);
+}
+
+static void nd_btt_detach_ndns(struct nd_btt *nd_btt)
+{
+	struct nd_namespace_common *ndns = nd_btt->ndns;
+
+	if (!ndns)
+		return;
+	get_device(&ndns->dev);
+	device_lock(&ndns->dev);
+	__nd_btt_detach_ndns(nd_btt);
+	device_unlock(&ndns->dev);
+	put_device(&ndns->dev);
+}
+
+static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt,
+		struct nd_namespace_common *ndns)
+{
+	if (ndns->claim)
+		return false;
+	dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
+			|| nd_btt->ndns,
+			"%s: invalid claim\n", __func__);
+	ndns->claim = &nd_btt->dev;
+	nd_btt->ndns = ndns;
+	get_device(&ndns->dev);
+	return true;
+}
+
+static bool nd_btt_attach_ndns(struct nd_btt *nd_btt,
+		struct nd_namespace_common *ndns)
+{
+	bool claimed;
+
+	device_lock(&ndns->dev);
+	claimed = __nd_btt_attach_ndns(nd_btt, ndns);
+	device_unlock(&ndns->dev);
+	return claimed;
+}
+
+static void nd_btt_release(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	dev_dbg(dev, "%s\n", __func__);
+	nd_btt_detach_ndns(nd_btt);
+	ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
+	kfree(nd_btt->uuid);
+	kfree(nd_btt);
+}
+
+static struct device_type nd_btt_device_type = {
+	.name = "nd_btt",
+	.release = nd_btt_release,
+};
+
+bool is_nd_btt(struct device *dev)
+{
+	return dev->type == &nd_btt_device_type;
+}
+EXPORT_SYMBOL(is_nd_btt);
+
+struct nd_btt *to_nd_btt(struct device *dev)
+{
+	struct nd_btt *nd_btt = container_of(dev, struct nd_btt, dev);
+
+	WARN_ON(!is_nd_btt(dev));
+	return nd_btt;
+}
+EXPORT_SYMBOL(to_nd_btt);
+
+static const unsigned long btt_lbasize_supported[] = { 512, 520, 528,
+	4096, 4104, 4160, 4224, 0 };
+
+static ssize_t sector_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf);
+}
+
+static ssize_t sector_size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize,
+			btt_lbasize_supported);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(sector_size);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nd_btt->uuid)
+		return sprintf(buf, "%pUb\n", nd_btt->uuid);
+	return sprintf(buf, "\n");
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t namespace_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	rc = sprintf(buf, "%s\n", nd_btt->ndns
+			? dev_name(&nd_btt->ndns->dev) : "");
+	nvdimm_bus_unlock(dev);
+	return rc;
+}
+
+static int namespace_match(struct device *dev, void *data)
+{
+	char *name = data;
+
+	return strcmp(name, dev_name(dev)) == 0;
+}
+
+static bool is_nd_btt_idle(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver)
+		return false;
+	return true;
+}
+
+static ssize_t __namespace_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	struct nd_namespace_common *ndns;
+	struct device *found;
+	char *name;
+
+	if (dev->driver) {
+		dev_dbg(dev, "%s: -EBUSY\n", __func__);
+		return -EBUSY;
+	}
+
+	name = kstrndup(buf, len, GFP_KERNEL);
+	if (!name)
+		return -ENOMEM;
+	strim(name);
+
+	if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
+		/* pass */;
+	else {
+		len = -EINVAL;
+		goto out;
+	}
+
+	ndns = nd_btt->ndns;
+	if (strcmp(name, "") == 0) {
+		/* detach the namespace and destroy / reset the btt device */
+		nd_btt_detach_ndns(nd_btt);
+		if (is_nd_btt_idle(dev))
+			nd_device_unregister(dev, ND_ASYNC);
+		else {
+			nd_btt->lbasize = 0;
+			kfree(nd_btt->uuid);
+			nd_btt->uuid = NULL;
+		}
+		goto out;
+	} else if (ndns) {
+		dev_dbg(dev, "namespace already set to: %s\n",
+				dev_name(&ndns->dev));
+		len = -EBUSY;
+		goto out;
+	}
+
+	found = device_find_child(dev->parent, name, namespace_match);
+	if (!found) {
+		dev_dbg(dev, "'%s' not found under %s\n", name,
+				dev_name(dev->parent));
+		len = -ENODEV;
+		goto out;
+	}
+
+	ndns = to_ndns(found);
+	if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
+		dev_dbg(dev, "%s too small to host btt\n", name);
+		len = -ENXIO;
+		goto out_attach;
+	}
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
+	if (!nd_btt_attach_ndns(nd_btt, ndns)) {
+		dev_dbg(dev, "%s already claimed\n",
+				dev_name(&ndns->dev));
+		len = -EBUSY;
+	}
+
+ out_attach:
+	put_device(&ndns->dev); /* from device_find_child */
+ out:
+	kfree(name);
+	return len;
+}
+
+static ssize_t namespace_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	device_lock(dev);
+	rc = __namespace_store(dev, attr, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RW(namespace);
+
+static struct attribute *nd_btt_attributes[] = {
+	&dev_attr_sector_size.attr,
+	&dev_attr_namespace.attr,
+	&dev_attr_uuid.attr,
+	NULL,
+};
+
+static struct attribute_group nd_btt_attribute_group = {
+	.attrs = nd_btt_attributes,
+};
+
+static const struct attribute_group *nd_btt_attribute_groups[] = {
+	&nd_btt_attribute_group,
+	&nd_device_attribute_group,
+	&nd_numa_attribute_group,
+	NULL,
+};
+
+static struct device *__nd_btt_create(struct nd_region *nd_region,
+		unsigned long lbasize, u8 *uuid,
+		struct nd_namespace_common *ndns)
+{
+	struct nd_btt *nd_btt;
+	struct device *dev;
+
+	nd_btt = kzalloc(sizeof(*nd_btt), GFP_KERNEL);
+	if (!nd_btt)
+		return NULL;
+
+	nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL);
+	if (nd_btt->id < 0) {
+		kfree(nd_btt);
+		return NULL;
+	}
+
+	nd_btt->lbasize = lbasize;
+	if (uuid)
+		uuid = kmemdup(uuid, 16, GFP_KERNEL);
+	nd_btt->uuid = uuid;
+	dev = &nd_btt->dev;
+	dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id);
+	dev->parent = &nd_region->dev;
+	dev->type = &nd_btt_device_type;
+	dev->groups = nd_btt_attribute_groups;
+	device_initialize(&nd_btt->dev);
+	if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) {
+		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
+				__func__, dev_name(ndns->claim));
+		put_device(dev);
+		return NULL;
+	}
+	return dev;
+}
+
+struct device *nd_btt_create(struct nd_region *nd_region)
+{
+	struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);
+
+	if (dev)
+		__nd_device_register(dev);
+	return dev;
+}
+
+/*
+ * nd_btt_sb_checksum: compute checksum for btt info block
+ *
+ * Returns a fletcher64 checksum of everything in the given info block
+ * except the last field (since that's where the checksum lives).
+ */
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
+{
+	u64 sum;
+	__le64 sum_save;
+
+	sum_save = btt_sb->checksum;
+	btt_sb->checksum = 0;
+	sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
+	btt_sb->checksum = sum_save;
+	return sum;
+}
+EXPORT_SYMBOL(nd_btt_sb_checksum);
+
+static int __nd_btt_probe(struct nd_btt *nd_btt,
+		struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
+{
+	u64 checksum;
+
+	if (!btt_sb || !ndns || !nd_btt)
+		return -ENODEV;
+
+	if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb)))
+		return -ENXIO;
+
+	if (nvdimm_namespace_capacity(ndns) < SZ_16M)
+		return -ENXIO;
+
+	if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
+		return -ENODEV;
+
+	checksum = le64_to_cpu(btt_sb->checksum);
+	btt_sb->checksum = 0;
+	if (checksum != nd_btt_sb_checksum(btt_sb))
+		return -ENODEV;
+	btt_sb->checksum = cpu_to_le64(checksum);
+
+	nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
+	nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
+	if (!nd_btt->uuid)
+		return -ENOMEM;
+
+	__nd_device_register(&nd_btt->dev);
+
+	return 0;
+}
+
+int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+	int rc;
+	struct device *dev;
+	struct btt_sb *btt_sb;
+	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
+
+	if (ndns->force_raw)
+		return -ENODEV;
+
+	nvdimm_bus_lock(&ndns->dev);
+	dev = __nd_btt_create(nd_region, 0, NULL, ndns);
+	nvdimm_bus_unlock(&ndns->dev);
+	if (!dev)
+		return -ENOMEM;
+	dev_set_drvdata(dev, drvdata);
+	btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL);
+	rc = __nd_btt_probe(to_nd_btt(dev), ndns, btt_sb);
+	kfree(btt_sb);
+	dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__,
+			rc == 0 ? dev_name(dev) : "<none>");
+	if (rc < 0) {
+		__nd_btt_detach_ndns(to_nd_btt(dev));
+		put_device(dev);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(nd_btt_probe);
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
new file mode 100644
index 000000000000..8eb22c0ca7ce
--- /dev/null
+++ b/drivers/nvdimm/bus.c
@@ -0,0 +1,730 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/fcntl.h>
+#include <linux/async.h>
+#include <linux/genhd.h>
+#include <linux/ndctl.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/nd.h>
+#include "nd-core.h"
+#include "nd.h"
+
+int nvdimm_major;
+static int nvdimm_bus_major;
+static struct class *nd_class;
+
+static int to_nd_device_type(struct device *dev)
+{
+	if (is_nvdimm(dev))
+		return ND_DEVICE_DIMM;
+	else if (is_nd_pmem(dev))
+		return ND_DEVICE_REGION_PMEM;
+	else if (is_nd_blk(dev))
+		return ND_DEVICE_REGION_BLK;
+	else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
+		return nd_region_to_nstype(to_nd_region(dev->parent));
+
+	return 0;
+}
+
+static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	/*
+	 * Ensure that region devices always have their numa node set as
+	 * early as possible.
+	 */
+	if (is_nd_pmem(dev) || is_nd_blk(dev))
+		set_dev_node(dev, to_nd_region(dev)->numa_node);
+	return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
+			to_nd_device_type(dev));
+}
+
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
+
+	return test_bit(to_nd_device_type(dev), &nd_drv->type);
+}
+
+static struct module *to_bus_provider(struct device *dev)
+{
+	/* pin bus providers while regions are enabled */
+	if (is_nd_pmem(dev) || is_nd_blk(dev)) {
+		struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+		return nvdimm_bus->module;
+	}
+	return NULL;
+}
+
+static void nvdimm_bus_probe_start(struct nvdimm_bus *nvdimm_bus)
+{
+	nvdimm_bus_lock(&nvdimm_bus->dev);
+	nvdimm_bus->probe_active++;
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+
+static void nvdimm_bus_probe_end(struct nvdimm_bus *nvdimm_bus)
+{
+	nvdimm_bus_lock(&nvdimm_bus->dev);
+	if (--nvdimm_bus->probe_active == 0)
+		wake_up(&nvdimm_bus->probe_wait);
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+
+static int nvdimm_bus_probe(struct device *dev)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct module *provider = to_bus_provider(dev);
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	int rc;
+
+	if (!try_module_get(provider))
+		return -ENXIO;
+
+	nvdimm_bus_probe_start(nvdimm_bus);
+	rc = nd_drv->probe(dev);
+	if (rc == 0)
+		nd_region_probe_success(nvdimm_bus, dev);
+	else
+		nd_region_disable(nvdimm_bus, dev);
+	nvdimm_bus_probe_end(nvdimm_bus);
+
+	dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
+			dev_name(dev), rc);
+
+	if (rc != 0)
+		module_put(provider);
+	return rc;
+}
+
+static int nvdimm_bus_remove(struct device *dev)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct module *provider = to_bus_provider(dev);
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	int rc;
+
+	rc = nd_drv->remove(dev);
+	nd_region_disable(nvdimm_bus, dev);
+
+	dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
+			dev_name(dev), rc);
+	module_put(provider);
+	return rc;
+}
+
+static struct bus_type nvdimm_bus_type = {
+	.name = "nd",
+	.uevent = nvdimm_bus_uevent,
+	.match = nvdimm_bus_match,
+	.probe = nvdimm_bus_probe,
+	.remove = nvdimm_bus_remove,
+};
+
+static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
+
+void nd_synchronize(void)
+{
+	async_synchronize_full_domain(&nd_async_domain);
+}
+EXPORT_SYMBOL_GPL(nd_synchronize);
+
+static void nd_async_device_register(void *d, async_cookie_t cookie)
+{
+	struct device *dev = d;
+
+	if (device_add(dev) != 0) {
+		dev_err(dev, "%s: failed\n", __func__);
+		put_device(dev);
+	}
+	put_device(dev);
+}
+
+static void nd_async_device_unregister(void *d, async_cookie_t cookie)
+{
+	struct device *dev = d;
+
+	/* flush bus operations before delete */
+	nvdimm_bus_lock(dev);
+	nvdimm_bus_unlock(dev);
+
+	device_unregister(dev);
+	put_device(dev);
+}
+
+void __nd_device_register(struct device *dev)
+{
+	dev->bus = &nvdimm_bus_type;
+	get_device(dev);
+	async_schedule_domain(nd_async_device_register, dev,
+			&nd_async_domain);
+}
+
+void nd_device_register(struct device *dev)
+{
+	device_initialize(dev);
+	__nd_device_register(dev);
+}
+EXPORT_SYMBOL(nd_device_register);
+
+void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
+{
+	switch (mode) {
+	case ND_ASYNC:
+		get_device(dev);
+		async_schedule_domain(nd_async_device_unregister, dev,
+				&nd_async_domain);
+		break;
+	case ND_SYNC:
+		nd_synchronize();
+		device_unregister(dev);
+		break;
+	}
+}
+EXPORT_SYMBOL(nd_device_unregister);
+
+/**
+ * __nd_driver_register() - register a region or a namespace driver
+ * @nd_drv: driver to register
+ * @owner: automatically set by nd_driver_register() macro
+ * @mod_name: automatically set by nd_driver_register() macro
+ */
+int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
+		const char *mod_name)
+{
+	struct device_driver *drv = &nd_drv->drv;
+
+	if (!nd_drv->type) {
+		pr_debug("driver type bitmask not set (%pf)\n",
+				__builtin_return_address(0));
+		return -EINVAL;
+	}
+
+	if (!nd_drv->probe || !nd_drv->remove) {
+		pr_debug("->probe() and ->remove() must be specified\n");
+		return -EINVAL;
+	}
+
+	drv->bus = &nvdimm_bus_type;
+	drv->owner = owner;
+	drv->mod_name = mod_name;
+
+	return driver_register(drv);
+}
+EXPORT_SYMBOL(__nd_driver_register);
+
+int nvdimm_revalidate_disk(struct gendisk *disk)
+{
+	struct device *dev = disk->driverfs_dev;
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	const char *pol = nd_region->ro ? "only" : "write";
+
+	if (nd_region->ro == get_disk_ro(disk))
+		return 0;
+
+	dev_info(dev, "%s read-%s, marking %s read-%s\n",
+			dev_name(&nd_region->dev), pol, disk->disk_name, pol);
+	set_disk_ro(disk, nd_region->ro);
+
+	return 0;
+
+}
+EXPORT_SYMBOL(nvdimm_revalidate_disk);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	return sprintf(buf, ND_DEVICE_MODALIAS_FMT "\n",
+			to_nd_device_type(dev));
+}
+static DEVICE_ATTR_RO(modalias);
+
+static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	return sprintf(buf, "%s\n", dev->type->name);
+}
+static DEVICE_ATTR_RO(devtype);
+
+static struct attribute *nd_device_attributes[] = {
+	&dev_attr_modalias.attr,
+	&dev_attr_devtype.attr,
+	NULL,
+};
+
+/**
+ * nd_device_attribute_group - generic attributes for all devices on an nd bus
+ */
+struct attribute_group nd_device_attribute_group = {
+	.attrs = nd_device_attributes,
+};
+EXPORT_SYMBOL_GPL(nd_device_attribute_group);
+
+static ssize_t numa_node_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", dev_to_node(dev));
+}
+static DEVICE_ATTR_RO(numa_node);
+
+static struct attribute *nd_numa_attributes[] = {
+	&dev_attr_numa_node.attr,
+	NULL,
+};
+
+static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a,
+		int n)
+{
+	if (!IS_ENABLED(CONFIG_NUMA))
+		return 0;
+
+	return a->mode;
+}
+
+/**
+ * nd_numa_attribute_group - NUMA attributes for all devices on an nd bus
+ */
+struct attribute_group nd_numa_attribute_group = {
+	.attrs = nd_numa_attributes,
+	.is_visible = nd_numa_attr_visible,
+};
+EXPORT_SYMBOL_GPL(nd_numa_attribute_group);
+
+int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
+{
+	dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id);
+	struct device *dev;
+
+	dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
+			"ndctl%d", nvdimm_bus->id);
+
+	if (IS_ERR(dev)) {
+		dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
+				nvdimm_bus->id, PTR_ERR(dev));
+		return PTR_ERR(dev);
+	}
+	return 0;
+}
+
+void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
+{
+	device_destroy(nd_class, MKDEV(nvdimm_bus_major, nvdimm_bus->id));
+}
+
+static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = {
+	[ND_CMD_IMPLEMENTED] = { },
+	[ND_CMD_SMART] = {
+		.out_num = 2,
+		.out_sizes = { 4, 8, },
+	},
+	[ND_CMD_SMART_THRESHOLD] = {
+		.out_num = 2,
+		.out_sizes = { 4, 8, },
+	},
+	[ND_CMD_DIMM_FLAGS] = {
+		.out_num = 2,
+		.out_sizes = { 4, 4 },
+	},
+	[ND_CMD_GET_CONFIG_SIZE] = {
+		.out_num = 3,
+		.out_sizes = { 4, 4, 4, },
+	},
+	[ND_CMD_GET_CONFIG_DATA] = {
+		.in_num = 2,
+		.in_sizes = { 4, 4, },
+		.out_num = 2,
+		.out_sizes = { 4, UINT_MAX, },
+	},
+	[ND_CMD_SET_CONFIG_DATA] = {
+		.in_num = 3,
+		.in_sizes = { 4, 4, UINT_MAX, },
+		.out_num = 1,
+		.out_sizes = { 4, },
+	},
+	[ND_CMD_VENDOR] = {
+		.in_num = 3,
+		.in_sizes = { 4, 4, UINT_MAX, },
+		.out_num = 3,
+		.out_sizes = { 4, 4, UINT_MAX, },
+	},
+};
+
+const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd)
+{
+	if (cmd < ARRAY_SIZE(__nd_cmd_dimm_descs))
+		return &__nd_cmd_dimm_descs[cmd];
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_dimm_desc);
+
+static const struct nd_cmd_desc __nd_cmd_bus_descs[] = {
+	[ND_CMD_IMPLEMENTED] = { },
+	[ND_CMD_ARS_CAP] = {
+		.in_num = 2,
+		.in_sizes = { 8, 8, },
+		.out_num = 2,
+		.out_sizes = { 4, 4, },
+	},
+	[ND_CMD_ARS_START] = {
+		.in_num = 4,
+		.in_sizes = { 8, 8, 2, 6, },
+		.out_num = 1,
+		.out_sizes = { 4, },
+	},
+	[ND_CMD_ARS_STATUS] = {
+		.out_num = 2,
+		.out_sizes = { 4, UINT_MAX, },
+	},
+};
+
+const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd)
+{
+	if (cmd < ARRAY_SIZE(__nd_cmd_bus_descs))
+		return &__nd_cmd_bus_descs[cmd];
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_bus_desc);
+
+u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, void *buf)
+{
+	if (idx >= desc->in_num)
+		return UINT_MAX;
+
+	if (desc->in_sizes[idx] < UINT_MAX)
+		return desc->in_sizes[idx];
+
+	if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA && idx == 2) {
+		struct nd_cmd_set_config_hdr *hdr = buf;
+
+		return hdr->in_length;
+	} else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) {
+		struct nd_cmd_vendor_hdr *hdr = buf;
+
+		return hdr->in_length;
+	}
+
+	return UINT_MAX;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_in_size);
+
+u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
+		const u32 *out_field)
+{
+	if (idx >= desc->out_num)
+		return UINT_MAX;
+
+	if (desc->out_sizes[idx] < UINT_MAX)
+		return desc->out_sizes[idx];
+
+	if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && idx == 1)
+		return in_field[1];
+	else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
+		return out_field[1];
+	else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 1)
+		return ND_CMD_ARS_STATUS_MAX;
+
+	return UINT_MAX;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_out_size);
+
+void wait_nvdimm_bus_probe_idle(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	do {
+		if (nvdimm_bus->probe_active == 0)
+			break;
+		nvdimm_bus_unlock(&nvdimm_bus->dev);
+		wait_event(nvdimm_bus->probe_wait,
+				nvdimm_bus->probe_active == 0);
+		nvdimm_bus_lock(&nvdimm_bus->dev);
+	} while (true);
+}
+
+/* set_config requires an idle interleave set */
+static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd)
+{
+	struct nvdimm_bus *nvdimm_bus;
+
+	if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA)
+		return 0;
+
+	nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev);
+	wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev);
+
+	if (atomic_read(&nvdimm->busy))
+		return -EBUSY;
+	return 0;
+}
+
+static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+		int read_only, unsigned int ioctl_cmd, unsigned long arg)
+{
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+	size_t buf_len = 0, in_len = 0, out_len = 0;
+	static char out_env[ND_CMD_MAX_ENVELOPE];
+	static char in_env[ND_CMD_MAX_ENVELOPE];
+	const struct nd_cmd_desc *desc = NULL;
+	unsigned int cmd = _IOC_NR(ioctl_cmd);
+	void __user *p = (void __user *) arg;
+	struct device *dev = &nvdimm_bus->dev;
+	const char *cmd_name, *dimm_name;
+	unsigned long dsm_mask;
+	void *buf;
+	int rc, i;
+
+	if (nvdimm) {
+		desc = nd_cmd_dimm_desc(cmd);
+		cmd_name = nvdimm_cmd_name(cmd);
+		dsm_mask = nvdimm->dsm_mask ? *(nvdimm->dsm_mask) : 0;
+		dimm_name = dev_name(&nvdimm->dev);
+	} else {
+		desc = nd_cmd_bus_desc(cmd);
+		cmd_name = nvdimm_bus_cmd_name(cmd);
+		dsm_mask = nd_desc->dsm_mask;
+		dimm_name = "bus";
+	}
+
+	if (!desc || (desc->out_num + desc->in_num == 0) ||
+			!test_bit(cmd, &dsm_mask))
+		return -ENOTTY;
+
+	/* fail write commands (when read-only) */
+	if (read_only)
+		switch (ioctl_cmd) {
+		case ND_IOCTL_VENDOR:
+		case ND_IOCTL_SET_CONFIG_DATA:
+		case ND_IOCTL_ARS_START:
+			dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
+					nvdimm ? nvdimm_cmd_name(cmd)
+					: nvdimm_bus_cmd_name(cmd));
+			return -EPERM;
+		default:
+			break;
+		}
+
+	/* process an input envelope */
+	for (i = 0; i < desc->in_num; i++) {
+		u32 in_size, copy;
+
+		in_size = nd_cmd_in_size(nvdimm, cmd, desc, i, in_env);
+		if (in_size == UINT_MAX) {
+			dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			return -ENXIO;
+		}
+		if (!access_ok(VERIFY_READ, p + in_len, in_size))
+			return -EFAULT;
+		if (in_len < sizeof(in_env))
+			copy = min_t(u32, sizeof(in_env) - in_len, in_size);
+		else
+			copy = 0;
+		if (copy && copy_from_user(&in_env[in_len], p + in_len, copy))
+			return -EFAULT;
+		in_len += in_size;
+	}
+
+	/* process an output envelope */
+	for (i = 0; i < desc->out_num; i++) {
+		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
+				(u32 *) in_env, (u32 *) out_env);
+		u32 copy;
+
+		if (out_size == UINT_MAX) {
+			dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			return -EFAULT;
+		}
+		if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size))
+			return -EFAULT;
+		if (out_len < sizeof(out_env))
+			copy = min_t(u32, sizeof(out_env) - out_len, out_size);
+		else
+			copy = 0;
+		if (copy && copy_from_user(&out_env[out_len],
+					p + in_len + out_len, copy))
+			return -EFAULT;
+		out_len += out_size;
+	}
+
+	buf_len = out_len + in_len;
+	if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len)))
+		return -EFAULT;
+
+	if (buf_len > ND_IOCTL_MAX_BUFLEN) {
+		dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__,
+				dimm_name, cmd_name, buf_len,
+				ND_IOCTL_MAX_BUFLEN);
+		return -EINVAL;
+	}
+
+	buf = vmalloc(buf_len);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, p, buf_len)) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	nvdimm_bus_lock(&nvdimm_bus->dev);
+	rc = nd_cmd_clear_to_send(nvdimm, cmd);
+	if (rc)
+		goto out_unlock;
+
+	rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len);
+	if (rc < 0)
+		goto out_unlock;
+	if (copy_to_user(p, buf, buf_len))
+		rc = -EFAULT;
+ out_unlock:
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+ out:
+	vfree(buf);
+	return rc;
+}
+
+static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long id = (long) file->private_data;
+	int rc = -ENXIO, read_only;
+	struct nvdimm_bus *nvdimm_bus;
+
+	read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
+		if (nvdimm_bus->id == id) {
+			rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg);
+			break;
+		}
+	}
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	return rc;
+}
+
+static int match_dimm(struct device *dev, void *data)
+{
+	long id = (long) data;
+
+	if (is_nvdimm(dev)) {
+		struct nvdimm *nvdimm = to_nvdimm(dev);
+
+		return nvdimm->id == id;
+	}
+
+	return 0;
+}
+
+static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int rc = -ENXIO, read_only;
+	struct nvdimm_bus *nvdimm_bus;
+
+	read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
+		struct device *dev = device_find_child(&nvdimm_bus->dev,
+				file->private_data, match_dimm);
+		struct nvdimm *nvdimm;
+
+		if (!dev)
+			continue;
+
+		nvdimm = to_nvdimm(dev);
+		rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg);
+		put_device(dev);
+		break;
+	}
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	return rc;
+}
+
+static int nd_open(struct inode *inode, struct file *file)
+{
+	long minor = iminor(inode);
+
+	file->private_data = (void *) minor;
+	return 0;
+}
+
+static const struct file_operations nvdimm_bus_fops = {
+	.owner = THIS_MODULE,
+	.open = nd_open,
+	.unlocked_ioctl = nd_ioctl,
+	.compat_ioctl = nd_ioctl,
+	.llseek = noop_llseek,
+};
+
+static const struct file_operations nvdimm_fops = {
+	.owner = THIS_MODULE,
+	.open = nd_open,
+	.unlocked_ioctl = nvdimm_ioctl,
+	.compat_ioctl = nvdimm_ioctl,
+	.llseek = noop_llseek,
+};
+
+int __init nvdimm_bus_init(void)
+{
+	int rc;
+
+	rc = bus_register(&nvdimm_bus_type);
+	if (rc)
+		return rc;
+
+	rc = register_chrdev(0, "ndctl", &nvdimm_bus_fops);
+	if (rc < 0)
+		goto err_bus_chrdev;
+	nvdimm_bus_major = rc;
+
+	rc = register_chrdev(0, "dimmctl", &nvdimm_fops);
+	if (rc < 0)
+		goto err_dimm_chrdev;
+	nvdimm_major = rc;
+
+	nd_class = class_create(THIS_MODULE, "nd");
+	if (IS_ERR(nd_class))
+		goto err_class;
+
+	return 0;
+
+ err_class:
+	unregister_chrdev(nvdimm_major, "dimmctl");
+ err_dimm_chrdev:
+	unregister_chrdev(nvdimm_bus_major, "ndctl");
+ err_bus_chrdev:
+	bus_unregister(&nvdimm_bus_type);
+
+	return rc;
+}
+
+void nvdimm_bus_exit(void)
+{
+	class_destroy(nd_class);
+	unregister_chrdev(nvdimm_bus_major, "ndctl");
+	unregister_chrdev(nvdimm_major, "dimmctl");
+	bus_unregister(&nvdimm_bus_type);
+}
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
new file mode 100644
index 000000000000..cb62ec6a12d0
--- /dev/null
+++ b/drivers/nvdimm/core.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/libnvdimm.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/device.h>
+#include <linux/ctype.h>
+#include <linux/ndctl.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include "nd-core.h"
+#include "nd.h"
+
+LIST_HEAD(nvdimm_bus_list);
+DEFINE_MUTEX(nvdimm_bus_list_mutex);
+static DEFINE_IDA(nd_ida);
+
+void nvdimm_bus_lock(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return;
+	mutex_lock(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(nvdimm_bus_lock);
+
+void nvdimm_bus_unlock(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return;
+	mutex_unlock(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(nvdimm_bus_unlock);
+
+bool is_nvdimm_bus_locked(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return false;
+	return mutex_is_locked(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(is_nvdimm_bus_locked);
+
+u64 nd_fletcher64(void *addr, size_t len, bool le)
+{
+	u32 *buf = addr;
+	u32 lo32 = 0;
+	u64 hi32 = 0;
+	int i;
+
+	for (i = 0; i < len / sizeof(u32); i++) {
+		lo32 += le ? le32_to_cpu((__le32) buf[i]) : buf[i];
+		hi32 += lo32;
+	}
+
+	return hi32 << 32 | lo32;
+}
+EXPORT_SYMBOL_GPL(nd_fletcher64);
+
+static void nvdimm_bus_release(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus;
+
+	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+	ida_simple_remove(&nd_ida, nvdimm_bus->id);
+	kfree(nvdimm_bus);
+}
+
+struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus;
+
+	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+	WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
+	return nvdimm_bus;
+}
+EXPORT_SYMBOL_GPL(to_nvdimm_bus);
+
+struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
+{
+	/* struct nvdimm_bus definition is private to libnvdimm */
+	return nvdimm_bus->nd_desc;
+}
+EXPORT_SYMBOL_GPL(to_nd_desc);
+
+struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+{
+	struct device *dev;
+
+	for (dev = nd_dev; dev; dev = dev->parent)
+		if (dev->release == nvdimm_bus_release)
+			break;
+	dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
+	if (dev)
+		return to_nvdimm_bus(dev);
+	return NULL;
+}
+
+static bool is_uuid_sep(char sep)
+{
+	if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
+		return true;
+	return false;
+}
+
+static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
+		size_t len)
+{
+	const char *str = buf;
+	u8 uuid[16];
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		if (!isxdigit(str[0]) || !isxdigit(str[1])) {
+			dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
+					__func__, i, str - buf, str[0],
+					str + 1 - buf, str[1]);
+			return -EINVAL;
+		}
+
+		uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
+		str += 2;
+		if (is_uuid_sep(*str))
+			str++;
+	}
+
+	memcpy(uuid_out, uuid, sizeof(uuid));
+	return 0;
+}
+
+/**
+ * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
+ * @dev: container device for the uuid property
+ * @uuid_out: uuid buffer to replace
+ * @buf: raw sysfs buffer to parse
+ *
+ * Enforce that uuids can only be changed while the device is disabled
+ * (driver detached)
+ * LOCKING: expects device_lock() is held on entry
+ */
+int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+		size_t len)
+{
+	u8 uuid[16];
+	int rc;
+
+	if (dev->driver)
+		return -EBUSY;
+
+	rc = nd_uuid_parse(dev, uuid, buf, len);
+	if (rc)
+		return rc;
+
+	kfree(*uuid_out);
+	*uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
+	if (!(*uuid_out))
+		return -ENOMEM;
+
+	return 0;
+}
+
+ssize_t nd_sector_size_show(unsigned long current_lbasize,
+		const unsigned long *supported, char *buf)
+{
+	ssize_t len = 0;
+	int i;
+
+	for (i = 0; supported[i]; i++)
+		if (current_lbasize == supported[i])
+			len += sprintf(buf + len, "[%ld] ", supported[i]);
+		else
+			len += sprintf(buf + len, "%ld ", supported[i]);
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+
+ssize_t nd_sector_size_store(struct device *dev, const char *buf,
+		unsigned long *current_lbasize, const unsigned long *supported)
+{
+	unsigned long lbasize;
+	int rc, i;
+
+	if (dev->driver)
+		return -EBUSY;
+
+	rc = kstrtoul(buf, 0, &lbasize);
+	if (rc)
+		return rc;
+
+	for (i = 0; supported[i]; i++)
+		if (lbasize == supported[i])
+			break;
+
+	if (supported[i]) {
+		*current_lbasize = lbasize;
+		return 0;
+	} else {
+		return -EINVAL;
+	}
+}
+
+void __nd_iostat_start(struct bio *bio, unsigned long *start)
+{
+	struct gendisk *disk = bio->bi_bdev->bd_disk;
+	const int rw = bio_data_dir(bio);
+	int cpu = part_stat_lock();
+
+	*start = jiffies;
+	part_round_stats(cpu, &disk->part0);
+	part_stat_inc(cpu, &disk->part0, ios[rw]);
+	part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
+	part_inc_in_flight(&disk->part0, rw);
+	part_stat_unlock();
+}
+EXPORT_SYMBOL(__nd_iostat_start);
+
+void nd_iostat_end(struct bio *bio, unsigned long start)
+{
+	struct gendisk *disk = bio->bi_bdev->bd_disk;
+	unsigned long duration = jiffies - start;
+	const int rw = bio_data_dir(bio);
+	int cpu = part_stat_lock();
+
+	part_stat_add(cpu, &disk->part0, ticks[rw], duration);
+	part_round_stats(cpu, &disk->part0);
+	part_dec_in_flight(&disk->part0, rw);
+	part_stat_unlock();
+}
+EXPORT_SYMBOL(nd_iostat_end);
+
+static ssize_t commands_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	int cmd, len = 0;
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+
+	for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG)
+		len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd));
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+static DEVICE_ATTR_RO(commands);
+
+static const char *nvdimm_bus_provider(struct nvdimm_bus *nvdimm_bus)
+{
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+	struct device *parent = nvdimm_bus->dev.parent;
+
+	if (nd_desc->provider_name)
+		return nd_desc->provider_name;
+	else if (parent)
+		return dev_name(parent);
+	else
+		return "unknown";
+}
+
+static ssize_t provider_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+
+	return sprintf(buf, "%s\n", nvdimm_bus_provider(nvdimm_bus));
+}
+static DEVICE_ATTR_RO(provider);
+
+static int flush_namespaces(struct device *dev, void *data)
+{
+	device_lock(dev);
+	device_unlock(dev);
+	return 0;
+}
+
+static int flush_regions_dimms(struct device *dev, void *data)
+{
+	device_lock(dev);
+	device_unlock(dev);
+	device_for_each_child(dev, NULL, flush_namespaces);
+	return 0;
+}
+
+static ssize_t wait_probe_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	nd_synchronize();
+	device_for_each_child(dev, NULL, flush_regions_dimms);
+	return sprintf(buf, "1\n");
+}
+static DEVICE_ATTR_RO(wait_probe);
+
+static struct attribute *nvdimm_bus_attributes[] = {
+	&dev_attr_commands.attr,
+	&dev_attr_wait_probe.attr,
+	&dev_attr_provider.attr,
+	NULL,
+};
+
+struct attribute_group nvdimm_bus_attribute_group = {
+	.attrs = nvdimm_bus_attributes,
+};
+EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
+
+struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
+		struct nvdimm_bus_descriptor *nd_desc, struct module *module)
+{
+	struct nvdimm_bus *nvdimm_bus;
+	int rc;
+
+	nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
+	if (!nvdimm_bus)
+		return NULL;
+	INIT_LIST_HEAD(&nvdimm_bus->list);
+	init_waitqueue_head(&nvdimm_bus->probe_wait);
+	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
+	mutex_init(&nvdimm_bus->reconfig_mutex);
+	if (nvdimm_bus->id < 0) {
+		kfree(nvdimm_bus);
+		return NULL;
+	}
+	nvdimm_bus->nd_desc = nd_desc;
+	nvdimm_bus->module = module;
+	nvdimm_bus->dev.parent = parent;
+	nvdimm_bus->dev.release = nvdimm_bus_release;
+	nvdimm_bus->dev.groups = nd_desc->attr_groups;
+	dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
+	rc = device_register(&nvdimm_bus->dev);
+	if (rc) {
+		dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
+		goto err;
+	}
+
+	rc = nvdimm_bus_create_ndctl(nvdimm_bus);
+	if (rc)
+		goto err;
+
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	return nvdimm_bus;
+ err:
+	put_device(&nvdimm_bus->dev);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
+
+static int child_unregister(struct device *dev, void *data)
+{
+	/*
+	 * the singular ndctl class device per bus needs to be
+	 * "device_destroy"ed, so skip it here
+	 *
+	 * i.e. remove classless children
+	 */
+	if (dev->class)
+		/* pass */;
+	else
+		nd_device_unregister(dev, ND_SYNC);
+	return 0;
+}
+
+void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
+{
+	if (!nvdimm_bus)
+		return;
+
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_del_init(&nvdimm_bus->list);
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	nd_synchronize();
+	device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
+	nvdimm_bus_destroy_ndctl(nvdimm_bus);
+
+	device_unregister(&nvdimm_bus->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int nd_pi_nop_generate_verify(struct blk_integrity_iter *iter)
+{
+	return 0;
+}
+
+int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
+{
+	struct blk_integrity integrity = {
+		.name = "ND-PI-NOP",
+		.generate_fn = nd_pi_nop_generate_verify,
+		.verify_fn = nd_pi_nop_generate_verify,
+		.tuple_size = meta_size,
+		.tag_size = meta_size,
+	};
+	int ret;
+
+	if (meta_size == 0)
+		return 0;
+
+	ret = blk_integrity_register(disk, &integrity);
+	if (ret)
+		return ret;
+
+	blk_queue_max_integrity_segments(disk->queue, 1);
+
+	return 0;
+}
+EXPORT_SYMBOL(nd_integrity_init);
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
+{
+	return 0;
+}
+EXPORT_SYMBOL(nd_integrity_init);
+
+#endif
+
+static __init int libnvdimm_init(void)
+{
+	int rc;
+
+	rc = nvdimm_bus_init();
+	if (rc)
+		return rc;
+	rc = nvdimm_init();
+	if (rc)
+		goto err_dimm;
+	rc = nd_region_init();
+	if (rc)
+		goto err_region;
+	return 0;
+ err_region:
+	nvdimm_exit();
+ err_dimm:
+	nvdimm_bus_exit();
+	return rc;
+}
+
+static __exit void libnvdimm_exit(void)
+{
+	WARN_ON(!list_empty(&nvdimm_bus_list));
+	nd_region_exit();
+	nvdimm_exit();
+	nvdimm_bus_exit();
+}
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
+subsys_initcall(libnvdimm_init);
+module_exit(libnvdimm_exit);
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
new file mode 100644
index 000000000000..71d12bb67339
--- /dev/null
+++ b/drivers/nvdimm/dimm.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sizes.h>
+#include <linux/ndctl.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/nd.h>
+#include "label.h"
+#include "nd.h"
+
+static int nvdimm_probe(struct device *dev)
+{
+	struct nvdimm_drvdata *ndd;
+	int rc;
+
+	ndd = kzalloc(sizeof(*ndd), GFP_KERNEL);
+	if (!ndd)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, ndd);
+	ndd->dpa.name = dev_name(dev);
+	ndd->ns_current = -1;
+	ndd->ns_next = -1;
+	ndd->dpa.start = 0;
+	ndd->dpa.end = -1;
+	ndd->dev = dev;
+	get_device(dev);
+	kref_init(&ndd->kref);
+
+	rc = nvdimm_init_nsarea(ndd);
+	if (rc)
+		goto err;
+
+	rc = nvdimm_init_config_data(ndd);
+	if (rc)
+		goto err;
+
+	dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size);
+
+	nvdimm_bus_lock(dev);
+	ndd->ns_current = nd_label_validate(ndd);
+	ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
+	nd_label_copy(ndd, to_next_namespace_index(ndd),
+			to_current_namespace_index(ndd));
+	rc = nd_label_reserve_dpa(ndd);
+	nvdimm_bus_unlock(dev);
+
+	if (rc)
+		goto err;
+
+	return 0;
+
+ err:
+	put_ndd(ndd);
+	return rc;
+}
+
+static int nvdimm_remove(struct device *dev)
+{
+	struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
+
+	nvdimm_bus_lock(dev);
+	dev_set_drvdata(dev, NULL);
+	nvdimm_bus_unlock(dev);
+	put_ndd(ndd);
+
+	return 0;
+}
+
+static struct nd_device_driver nvdimm_driver = {
+	.probe = nvdimm_probe,
+	.remove = nvdimm_remove,
+	.drv = {
+		.name = "nvdimm",
+	},
+	.type = ND_DRIVER_DIMM,
+};
+
+int __init nvdimm_init(void)
+{
+	return nd_driver_register(&nvdimm_driver);
+}
+
+void nvdimm_exit(void)
+{
+	driver_unregister(&nvdimm_driver.drv);
+}
+
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DIMM);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
new file mode 100644
index 000000000000..c05eb807d674
--- /dev/null
+++ b/drivers/nvdimm/dimm_devs.c
@@ -0,0 +1,551 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/vmalloc.h>
+#include <linux/device.h>
+#include <linux/ndctl.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-core.h"
+#include "label.h"
+#include "nd.h"
+
+static DEFINE_IDA(dimm_ida);
+
+/*
+ * Retrieve bus and dimm handle and return if this bus supports
+ * get_config_data commands
+ */
+static int __validate_dimm(struct nvdimm_drvdata *ndd)
+{
+	struct nvdimm *nvdimm;
+
+	if (!ndd)
+		return -EINVAL;
+
+	nvdimm = to_nvdimm(ndd->dev);
+
+	if (!nvdimm->dsm_mask)
+		return -ENXIO;
+	if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask))
+		return -ENXIO;
+
+	return 0;
+}
+
+static int validate_dimm(struct nvdimm_drvdata *ndd)
+{
+	int rc = __validate_dimm(ndd);
+
+	if (rc && ndd)
+		dev_dbg(ndd->dev, "%pf: %s error: %d\n",
+				__builtin_return_address(0), __func__, rc);
+	return rc;
+}
+
+/**
+ * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area
+ * @nvdimm: dimm to initialize
+ */
+int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
+{
+	struct nd_cmd_get_config_size *cmd = &ndd->nsarea;
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+	struct nvdimm_bus_descriptor *nd_desc;
+	int rc = validate_dimm(ndd);
+
+	if (rc)
+		return rc;
+
+	if (cmd->config_size)
+		return 0; /* already valid */
+
+	memset(cmd, 0, sizeof(*cmd));
+	nd_desc = nvdimm_bus->nd_desc;
+	return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
+			ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd));
+}
+
+int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+	struct nd_cmd_get_config_data_hdr *cmd;
+	struct nvdimm_bus_descriptor *nd_desc;
+	int rc = validate_dimm(ndd);
+	u32 max_cmd_size, config_size;
+	size_t offset;
+
+	if (rc)
+		return rc;
+
+	if (ndd->data)
+		return 0;
+
+	if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0
+			|| ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) {
+		dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n",
+				ndd->nsarea.max_xfer, ndd->nsarea.config_size);
+		return -ENXIO;
+	}
+
+	ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL);
+	if (!ndd->data)
+		ndd->data = vmalloc(ndd->nsarea.config_size);
+
+	if (!ndd->data)
+		return -ENOMEM;
+
+	max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer);
+	cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	nd_desc = nvdimm_bus->nd_desc;
+	for (config_size = ndd->nsarea.config_size, offset = 0;
+			config_size; config_size -= cmd->in_length,
+			offset += cmd->in_length) {
+		cmd->in_length = min(config_size, max_cmd_size);
+		cmd->in_offset = offset;
+		rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
+				ND_CMD_GET_CONFIG_DATA, cmd,
+				cmd->in_length + sizeof(*cmd));
+		if (rc || cmd->status) {
+			rc = -ENXIO;
+			break;
+		}
+		memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
+	}
+	dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc);
+	kfree(cmd);
+
+	return rc;
+}
+
+int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
+		void *buf, size_t len)
+{
+	int rc = validate_dimm(ndd);
+	size_t max_cmd_size, buf_offset;
+	struct nd_cmd_set_config_hdr *cmd;
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+
+	if (rc)
+		return rc;
+
+	if (!ndd->data)
+		return -ENXIO;
+
+	if (offset + len > ndd->nsarea.config_size)
+		return -ENXIO;
+
+	max_cmd_size = min_t(u32, PAGE_SIZE, len);
+	max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer);
+	cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	for (buf_offset = 0; len; len -= cmd->in_length,
+			buf_offset += cmd->in_length) {
+		size_t cmd_size;
+		u32 *status;
+
+		cmd->in_offset = offset + buf_offset;
+		cmd->in_length = min(max_cmd_size, len);
+		memcpy(cmd->in_buf, buf + buf_offset, cmd->in_length);
+
+		/* status is output in the last 4-bytes of the command buffer */
+		cmd_size = sizeof(*cmd) + cmd->in_length + sizeof(u32);
+		status = ((void *) cmd) + cmd_size - sizeof(u32);
+
+		rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
+				ND_CMD_SET_CONFIG_DATA, cmd, cmd_size);
+		if (rc || *status) {
+			rc = rc ? rc : -ENXIO;
+			break;
+		}
+	}
+	kfree(cmd);
+
+	return rc;
+}
+
+static void nvdimm_release(struct device *dev)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+
+	ida_simple_remove(&dimm_ida, nvdimm->id);
+	kfree(nvdimm);
+}
+
+static struct device_type nvdimm_device_type = {
+	.name = "nvdimm",
+	.release = nvdimm_release,
+};
+
+bool is_nvdimm(struct device *dev)
+{
+	return dev->type == &nvdimm_device_type;
+}
+
+struct nvdimm *to_nvdimm(struct device *dev)
+{
+	struct nvdimm *nvdimm = container_of(dev, struct nvdimm, dev);
+
+	WARN_ON(!is_nvdimm(dev));
+	return nvdimm;
+}
+EXPORT_SYMBOL_GPL(to_nvdimm);
+
+struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr)
+{
+	struct nd_region *nd_region = &ndbr->nd_region;
+	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+
+	return nd_mapping->nvdimm;
+}
+EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm);
+
+struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
+{
+	struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev));
+
+	return dev_get_drvdata(&nvdimm->dev);
+}
+EXPORT_SYMBOL(to_ndd);
+
+void nvdimm_drvdata_release(struct kref *kref)
+{
+	struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref);
+	struct device *dev = ndd->dev;
+	struct resource *res, *_r;
+
+	dev_dbg(dev, "%s\n", __func__);
+
+	nvdimm_bus_lock(dev);
+	for_each_dpa_resource_safe(ndd, res, _r)
+		nvdimm_free_dpa(ndd, res);
+	nvdimm_bus_unlock(dev);
+
+	if (ndd->data && is_vmalloc_addr(ndd->data))
+		vfree(ndd->data);
+	else
+		kfree(ndd->data);
+	kfree(ndd);
+	put_device(dev);
+}
+
+void get_ndd(struct nvdimm_drvdata *ndd)
+{
+	kref_get(&ndd->kref);
+}
+
+void put_ndd(struct nvdimm_drvdata *ndd)
+{
+	if (ndd)
+		kref_put(&ndd->kref, nvdimm_drvdata_release);
+}
+
+const char *nvdimm_name(struct nvdimm *nvdimm)
+{
+	return dev_name(&nvdimm->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_name);
+
+void *nvdimm_provider_data(struct nvdimm *nvdimm)
+{
+	if (nvdimm)
+		return nvdimm->provider_data;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nvdimm_provider_data);
+
+static ssize_t commands_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	int cmd, len = 0;
+
+	if (!nvdimm->dsm_mask)
+		return sprintf(buf, "\n");
+
+	for_each_set_bit(cmd, nvdimm->dsm_mask, BITS_PER_LONG)
+		len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd));
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+static DEVICE_ATTR_RO(commands);
+
+static ssize_t state_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+
+	/*
+	 * The state may be in the process of changing, userspace should
+	 * quiesce probing if it wants a static answer
+	 */
+	nvdimm_bus_lock(dev);
+	nvdimm_bus_unlock(dev);
+	return sprintf(buf, "%s\n", atomic_read(&nvdimm->busy)
+			? "active" : "idle");
+}
+static DEVICE_ATTR_RO(state);
+
+static ssize_t available_slots_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
+	ssize_t rc;
+	u32 nfree;
+
+	if (!ndd)
+		return -ENXIO;
+
+	nvdimm_bus_lock(dev);
+	nfree = nd_label_nfree(ndd);
+	if (nfree - 1 > nfree) {
+		dev_WARN_ONCE(dev, 1, "we ate our last label?\n");
+		nfree = 0;
+	} else
+		nfree--;
+	rc = sprintf(buf, "%d\n", nfree);
+	nvdimm_bus_unlock(dev);
+	return rc;
+}
+static DEVICE_ATTR_RO(available_slots);
+
+static struct attribute *nvdimm_attributes[] = {
+	&dev_attr_state.attr,
+	&dev_attr_commands.attr,
+	&dev_attr_available_slots.attr,
+	NULL,
+};
+
+struct attribute_group nvdimm_attribute_group = {
+	.attrs = nvdimm_attributes,
+};
+EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
+
+struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
+		const struct attribute_group **groups, unsigned long flags,
+		unsigned long *dsm_mask)
+{
+	struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
+	struct device *dev;
+
+	if (!nvdimm)
+		return NULL;
+
+	nvdimm->id = ida_simple_get(&dimm_ida, 0, 0, GFP_KERNEL);
+	if (nvdimm->id < 0) {
+		kfree(nvdimm);
+		return NULL;
+	}
+	nvdimm->provider_data = provider_data;
+	nvdimm->flags = flags;
+	nvdimm->dsm_mask = dsm_mask;
+	atomic_set(&nvdimm->busy, 0);
+	dev = &nvdimm->dev;
+	dev_set_name(dev, "nmem%d", nvdimm->id);
+	dev->parent = &nvdimm_bus->dev;
+	dev->type = &nvdimm_device_type;
+	dev->devt = MKDEV(nvdimm_major, nvdimm->id);
+	dev->groups = groups;
+	nd_device_register(dev);
+
+	return nvdimm;
+}
+EXPORT_SYMBOL_GPL(nvdimm_create);
+
+/**
+ * nd_blk_available_dpa - account the unused dpa of BLK region
+ * @nd_mapping: container of dpa-resource-root + labels
+ *
+ * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges.
+ */
+resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping)
+{
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	resource_size_t map_end, busy = 0, available;
+	struct resource *res;
+
+	if (!ndd)
+		return 0;
+
+	map_end = nd_mapping->start + nd_mapping->size - 1;
+	for_each_dpa_resource(ndd, res)
+		if (res->start >= nd_mapping->start && res->start < map_end) {
+			resource_size_t end = min(map_end, res->end);
+
+			busy += end - res->start + 1;
+		} else if (res->end >= nd_mapping->start
+				&& res->end <= map_end) {
+			busy += res->end - nd_mapping->start;
+		} else if (nd_mapping->start > res->start
+				&& nd_mapping->start < res->end) {
+			/* total eclipse of the BLK region mapping */
+			busy += nd_mapping->size;
+		}
+
+	available = map_end - nd_mapping->start + 1;
+	if (busy < available)
+		return available - busy;
+	return 0;
+}
+
+/**
+ * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa
+ * @nd_mapping: container of dpa-resource-root + labels
+ * @nd_region: constrain available space check to this reference region
+ * @overlap: calculate available space assuming this level of overlap
+ *
+ * Validate that a PMEM label, if present, aligns with the start of an
+ * interleave set and truncate the available size at the lowest BLK
+ * overlap point.
+ *
+ * The expectation is that this routine is called multiple times as it
+ * probes for the largest BLK encroachment for any single member DIMM of
+ * the interleave set.  Once that value is determined the PMEM-limit for
+ * the set can be established.
+ */
+resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, resource_size_t *overlap)
+{
+	resource_size_t map_start, map_end, busy = 0, available, blk_start;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct resource *res;
+	const char *reason;
+
+	if (!ndd)
+		return 0;
+
+	map_start = nd_mapping->start;
+	map_end = map_start + nd_mapping->size - 1;
+	blk_start = max(map_start, map_end + 1 - *overlap);
+	for_each_dpa_resource(ndd, res)
+		if (res->start >= map_start && res->start < map_end) {
+			if (strncmp(res->name, "blk", 3) == 0)
+				blk_start = min(blk_start, res->start);
+			else if (res->start != map_start) {
+				reason = "misaligned to iset";
+				goto err;
+			} else {
+				if (busy) {
+					reason = "duplicate overlapping PMEM reservations?";
+					goto err;
+				}
+				busy += resource_size(res);
+				continue;
+			}
+		} else if (res->end >= map_start && res->end <= map_end) {
+			if (strncmp(res->name, "blk", 3) == 0) {
+				/*
+				 * If a BLK allocation overlaps the start of
+				 * PMEM the entire interleave set may now only
+				 * be used for BLK.
+				 */
+				blk_start = map_start;
+			} else {
+				reason = "misaligned to iset";
+				goto err;
+			}
+		} else if (map_start > res->start && map_start < res->end) {
+			/* total eclipse of the mapping */
+			busy += nd_mapping->size;
+			blk_start = map_start;
+		}
+
+	*overlap = map_end + 1 - blk_start;
+	available = blk_start - map_start;
+	if (busy < available)
+		return available - busy;
+	return 0;
+
+ err:
+	/*
+	 * Something is wrong, PMEM must align with the start of the
+	 * interleave set, and there can only be one allocation per set.
+	 */
+	nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
+	return 0;
+}
+
+void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res)
+{
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
+	kfree(res->name);
+	__release_region(&ndd->dpa, res->start, resource_size(res));
+}
+
+struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id, resource_size_t start,
+		resource_size_t n)
+{
+	char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL);
+	struct resource *res;
+
+	if (!name)
+		return NULL;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
+	res = __request_region(&ndd->dpa, start, n, name, 0);
+	if (!res)
+		kfree(name);
+	return res;
+}
+
+/**
+ * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id
+ * @nvdimm: container of dpa-resource-root + labels
+ * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid>
+ */
+resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id)
+{
+	resource_size_t allocated = 0;
+	struct resource *res;
+
+	for_each_dpa_resource(ndd, res)
+		if (strcmp(res->name, label_id->id) == 0)
+			allocated += resource_size(res);
+
+	return allocated;
+}
+
+static int count_dimms(struct device *dev, void *c)
+{
+	int *count = c;
+
+	if (is_nvdimm(dev))
+		(*count)++;
+	return 0;
+}
+
+int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
+{
+	int count = 0;
+	/* Flush any possible dimm registration failures */
+	nd_synchronize();
+
+	device_for_each_child(&nvdimm_bus->dev, &count, count_dimms);
+	dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count);
+	if (count != dimm_count)
+		return -ENXIO;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count);
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
new file mode 100644
index 000000000000..96526dcfdd37
--- /dev/null
+++ b/drivers/nvdimm/label.c
@@ -0,0 +1,927 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/ndctl.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/nd.h>
+#include "nd-core.h"
+#include "label.h"
+#include "nd.h"
+
+static u32 best_seq(u32 a, u32 b)
+{
+	a &= NSINDEX_SEQ_MASK;
+	b &= NSINDEX_SEQ_MASK;
+
+	if (a == 0 || a == b)
+		return b;
+	else if (b == 0)
+		return a;
+	else if (nd_inc_seq(a) == b)
+		return b;
+	else
+		return a;
+}
+
+size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
+{
+	u32 index_span;
+
+	if (ndd->nsindex_size)
+		return ndd->nsindex_size;
+
+	/*
+	 * The minimum index space is 512 bytes, with that amount of
+	 * index we can describe ~1400 labels which is less than a byte
+	 * of overhead per label.  Round up to a byte of overhead per
+	 * label and determine the size of the index region.  Yes, this
+	 * starts to waste space at larger config_sizes, but it's
+	 * unlikely we'll ever see anything but 128K.
+	 */
+	index_span = ndd->nsarea.config_size / 129;
+	index_span /= NSINDEX_ALIGN * 2;
+	ndd->nsindex_size = index_span * NSINDEX_ALIGN;
+
+	return ndd->nsindex_size;
+}
+
+int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
+{
+	return ndd->nsarea.config_size / 129;
+}
+
+int nd_label_validate(struct nvdimm_drvdata *ndd)
+{
+	/*
+	 * On media label format consists of two index blocks followed
+	 * by an array of labels.  None of these structures are ever
+	 * updated in place.  A sequence number tracks the current
+	 * active index and the next one to write, while labels are
+	 * written to free slots.
+	 *
+	 *     +------------+
+	 *     |            |
+	 *     |  nsindex0  |
+	 *     |            |
+	 *     +------------+
+	 *     |            |
+	 *     |  nsindex1  |
+	 *     |            |
+	 *     +------------+
+	 *     |   label0   |
+	 *     +------------+
+	 *     |   label1   |
+	 *     +------------+
+	 *     |            |
+	 *      ....nslot...
+	 *     |            |
+	 *     +------------+
+	 *     |   labelN   |
+	 *     +------------+
+	 */
+	struct nd_namespace_index *nsindex[] = {
+		to_namespace_index(ndd, 0),
+		to_namespace_index(ndd, 1),
+	};
+	const int num_index = ARRAY_SIZE(nsindex);
+	struct device *dev = ndd->dev;
+	bool valid[2] = { 0 };
+	int i, num_valid = 0;
+	u32 seq;
+
+	for (i = 0; i < num_index; i++) {
+		u32 nslot;
+		u8 sig[NSINDEX_SIG_LEN];
+		u64 sum_save, sum, size;
+
+		memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
+		if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
+			dev_dbg(dev, "%s: nsindex%d signature invalid\n",
+					__func__, i);
+			continue;
+		}
+		sum_save = __le64_to_cpu(nsindex[i]->checksum);
+		nsindex[i]->checksum = __cpu_to_le64(0);
+		sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
+		nsindex[i]->checksum = __cpu_to_le64(sum_save);
+		if (sum != sum_save) {
+			dev_dbg(dev, "%s: nsindex%d checksum invalid\n",
+					__func__, i);
+			continue;
+		}
+
+		seq = __le32_to_cpu(nsindex[i]->seq);
+		if ((seq & NSINDEX_SEQ_MASK) == 0) {
+			dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n",
+					__func__, i, seq);
+			continue;
+		}
+
+		/* sanity check the index against expected values */
+		if (__le64_to_cpu(nsindex[i]->myoff)
+				!= i * sizeof_namespace_index(ndd)) {
+			dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n",
+					__func__, i, (unsigned long long)
+					__le64_to_cpu(nsindex[i]->myoff));
+			continue;
+		}
+		if (__le64_to_cpu(nsindex[i]->otheroff)
+				!= (!i) * sizeof_namespace_index(ndd)) {
+			dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n",
+					__func__, i, (unsigned long long)
+					__le64_to_cpu(nsindex[i]->otheroff));
+			continue;
+		}
+
+		size = __le64_to_cpu(nsindex[i]->mysize);
+		if (size > sizeof_namespace_index(ndd)
+				|| size < sizeof(struct nd_namespace_index)) {
+			dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n",
+					__func__, i, size);
+			continue;
+		}
+
+		nslot = __le32_to_cpu(nsindex[i]->nslot);
+		if (nslot * sizeof(struct nd_namespace_label)
+				+ 2 * sizeof_namespace_index(ndd)
+				> ndd->nsarea.config_size) {
+			dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
+					__func__, i, nslot,
+					ndd->nsarea.config_size);
+			continue;
+		}
+		valid[i] = true;
+		num_valid++;
+	}
+
+	switch (num_valid) {
+	case 0:
+		break;
+	case 1:
+		for (i = 0; i < num_index; i++)
+			if (valid[i])
+				return i;
+		/* can't have num_valid > 0 but valid[] = { false, false } */
+		WARN_ON(1);
+		break;
+	default:
+		/* pick the best index... */
+		seq = best_seq(__le32_to_cpu(nsindex[0]->seq),
+				__le32_to_cpu(nsindex[1]->seq));
+		if (seq == (__le32_to_cpu(nsindex[1]->seq) & NSINDEX_SEQ_MASK))
+			return 1;
+		else
+			return 0;
+		break;
+	}
+
+	return -1;
+}
+
+void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
+		struct nd_namespace_index *src)
+{
+	if (dst && src)
+		/* pass */;
+	else
+		return;
+
+	memcpy(dst, src, sizeof_namespace_index(ndd));
+}
+
+static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd)
+{
+	void *base = to_namespace_index(ndd, 0);
+
+	return base + 2 * sizeof_namespace_index(ndd);
+}
+
+static int to_slot(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_label *nd_label)
+{
+	return nd_label - nd_label_base(ndd);
+}
+
+#define for_each_clear_bit_le(bit, addr, size) \
+	for ((bit) = find_next_zero_bit_le((addr), (size), 0);  \
+	     (bit) < (size);                                    \
+	     (bit) = find_next_zero_bit_le((addr), (size), (bit) + 1))
+
+/**
+ * preamble_index - common variable initialization for nd_label_* routines
+ * @ndd: dimm container for the relevant label set
+ * @idx: namespace_index index
+ * @nsindex_out: on return set to the currently active namespace index
+ * @free: on return set to the free label bitmap in the index
+ * @nslot: on return set to the number of slots in the label space
+ */
+static bool preamble_index(struct nvdimm_drvdata *ndd, int idx,
+		struct nd_namespace_index **nsindex_out,
+		unsigned long **free, u32 *nslot)
+{
+	struct nd_namespace_index *nsindex;
+
+	nsindex = to_namespace_index(ndd, idx);
+	if (nsindex == NULL)
+		return false;
+
+	*free = (unsigned long *) nsindex->free;
+	*nslot = __le32_to_cpu(nsindex->nslot);
+	*nsindex_out = nsindex;
+
+	return true;
+}
+
+char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
+{
+	if (!label_id || !uuid)
+		return NULL;
+	snprintf(label_id->id, ND_LABEL_ID_SIZE, "%s-%pUb",
+			flags & NSLABEL_FLAG_LOCAL ? "blk" : "pmem", uuid);
+	return label_id->id;
+}
+
+static bool preamble_current(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_index **nsindex,
+		unsigned long **free, u32 *nslot)
+{
+	return preamble_index(ndd, ndd->ns_current, nsindex,
+			free, nslot);
+}
+
+static bool preamble_next(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_index **nsindex,
+		unsigned long **free, u32 *nslot)
+{
+	return preamble_index(ndd, ndd->ns_next, nsindex,
+			free, nslot);
+}
+
+static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot)
+{
+	/* check that we are written where we expect to be written */
+	if (slot != __le32_to_cpu(nd_label->slot))
+		return false;
+
+	/* check that DPA allocations are page aligned */
+	if ((__le64_to_cpu(nd_label->dpa)
+				| __le64_to_cpu(nd_label->rawsize)) % SZ_4K)
+		return false;
+
+	return true;
+}
+
+int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return 0; /* no label, nothing to reserve */
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+		struct nd_region *nd_region = NULL;
+		u8 label_uuid[NSLABEL_UUID_LEN];
+		struct nd_label_id label_id;
+		struct resource *res;
+		u32 flags;
+
+		nd_label = nd_label_base(ndd) + slot;
+
+		if (!slot_valid(nd_label, slot))
+			continue;
+
+		memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+		flags = __le32_to_cpu(nd_label->flags);
+		nd_label_gen_id(&label_id, label_uuid, flags);
+		res = nvdimm_allocate_dpa(ndd, &label_id,
+				__le64_to_cpu(nd_label->dpa),
+				__le64_to_cpu(nd_label->rawsize));
+		nd_dbg_dpa(nd_region, ndd, res, "reserve\n");
+		if (!res)
+			return -EBUSY;
+	}
+
+	return 0;
+}
+
+int nd_label_active_count(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+	int count = 0;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return 0;
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+
+		nd_label = nd_label_base(ndd) + slot;
+
+		if (!slot_valid(nd_label, slot)) {
+			u32 label_slot = __le32_to_cpu(nd_label->slot);
+			u64 size = __le64_to_cpu(nd_label->rawsize);
+			u64 dpa = __le64_to_cpu(nd_label->dpa);
+
+			dev_dbg(ndd->dev,
+				"%s: slot%d invalid slot: %d dpa: %llx size: %llx\n",
+					__func__, slot, label_slot, dpa, size);
+			continue;
+		}
+		count++;
+	}
+	return count;
+}
+
+struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return NULL;
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+
+		nd_label = nd_label_base(ndd) + slot;
+		if (!slot_valid(nd_label, slot))
+			continue;
+
+		if (n-- == 0)
+			return nd_label_base(ndd) + slot;
+	}
+
+	return NULL;
+}
+
+u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!preamble_next(ndd, &nsindex, &free, &nslot))
+		return UINT_MAX;
+
+	WARN_ON(!is_nvdimm_bus_locked(ndd->dev));
+
+	slot = find_next_bit_le(free, nslot, 0);
+	if (slot == nslot)
+		return UINT_MAX;
+
+	clear_bit_le(slot, free);
+
+	return slot;
+}
+
+bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot;
+
+	if (!preamble_next(ndd, &nsindex, &free, &nslot))
+		return false;
+
+	WARN_ON(!is_nvdimm_bus_locked(ndd->dev));
+
+	if (slot < nslot)
+		return !test_and_set_bit_le(slot, free);
+	return false;
+}
+
+u32 nd_label_nfree(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot;
+
+	WARN_ON(!is_nvdimm_bus_locked(ndd->dev));
+
+	if (!preamble_next(ndd, &nsindex, &free, &nslot))
+		return nvdimm_num_label_slots(ndd);
+
+	return bitmap_weight(free, nslot);
+}
+
+static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq,
+		unsigned long flags)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long offset;
+	u64 checksum;
+	u32 nslot;
+	int rc;
+
+	nsindex = to_namespace_index(ndd, index);
+	if (flags & ND_NSINDEX_INIT)
+		nslot = nvdimm_num_label_slots(ndd);
+	else
+		nslot = __le32_to_cpu(nsindex->nslot);
+
+	memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN);
+	nsindex->flags = __cpu_to_le32(0);
+	nsindex->seq = __cpu_to_le32(seq);
+	offset = (unsigned long) nsindex
+		- (unsigned long) to_namespace_index(ndd, 0);
+	nsindex->myoff = __cpu_to_le64(offset);
+	nsindex->mysize = __cpu_to_le64(sizeof_namespace_index(ndd));
+	offset = (unsigned long) to_namespace_index(ndd,
+			nd_label_next_nsindex(index))
+		- (unsigned long) to_namespace_index(ndd, 0);
+	nsindex->otheroff = __cpu_to_le64(offset);
+	offset = (unsigned long) nd_label_base(ndd)
+		- (unsigned long) to_namespace_index(ndd, 0);
+	nsindex->labeloff = __cpu_to_le64(offset);
+	nsindex->nslot = __cpu_to_le32(nslot);
+	nsindex->major = __cpu_to_le16(1);
+	nsindex->minor = __cpu_to_le16(1);
+	nsindex->checksum = __cpu_to_le64(0);
+	if (flags & ND_NSINDEX_INIT) {
+		unsigned long *free = (unsigned long *) nsindex->free;
+		u32 nfree = ALIGN(nslot, BITS_PER_LONG);
+		int last_bits, i;
+
+		memset(nsindex->free, 0xff, nfree / 8);
+		for (i = 0, last_bits = nfree - nslot; i < last_bits; i++)
+			clear_bit_le(nslot + i, free);
+	}
+	checksum = nd_fletcher64(nsindex, sizeof_namespace_index(ndd), 1);
+	nsindex->checksum = __cpu_to_le64(checksum);
+	rc = nvdimm_set_config_data(ndd, __le64_to_cpu(nsindex->myoff),
+			nsindex, sizeof_namespace_index(ndd));
+	if (rc < 0)
+		return rc;
+
+	if (flags & ND_NSINDEX_INIT)
+		return 0;
+
+	/* copy the index we just wrote to the new 'next' */
+	WARN_ON(index != ndd->ns_next);
+	nd_label_copy(ndd, to_current_namespace_index(ndd), nsindex);
+	ndd->ns_current = nd_label_next_nsindex(ndd->ns_current);
+	ndd->ns_next = nd_label_next_nsindex(ndd->ns_next);
+	WARN_ON(ndd->ns_current == ndd->ns_next);
+
+	return 0;
+}
+
+static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_label *nd_label)
+{
+	return (unsigned long) nd_label
+		- (unsigned long) to_namespace_index(ndd, 0);
+}
+
+static int __pmem_label_update(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
+		int pos)
+{
+	u64 cookie = nd_region_interleave_set_cookie(nd_region), rawsize;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct nd_namespace_label *victim_label;
+	struct nd_namespace_label *nd_label;
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+	size_t offset;
+	int rc;
+
+	if (!preamble_next(ndd, &nsindex, &free, &nslot))
+		return -ENXIO;
+
+	/* allocate and write the label to the staging (next) index */
+	slot = nd_label_alloc_slot(ndd);
+	if (slot == UINT_MAX)
+		return -ENXIO;
+	dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+
+	nd_label = nd_label_base(ndd) + slot;
+	memset(nd_label, 0, sizeof(struct nd_namespace_label));
+	memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN);
+	if (nspm->alt_name)
+		memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN);
+	nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_UPDATING);
+	nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings);
+	nd_label->position = __cpu_to_le16(pos);
+	nd_label->isetcookie = __cpu_to_le64(cookie);
+	rawsize = div_u64(resource_size(&nspm->nsio.res),
+			nd_region->ndr_mappings);
+	nd_label->rawsize = __cpu_to_le64(rawsize);
+	nd_label->dpa = __cpu_to_le64(nd_mapping->start);
+	nd_label->slot = __cpu_to_le32(slot);
+
+	/* update label */
+	offset = nd_label_offset(ndd, nd_label);
+	rc = nvdimm_set_config_data(ndd, offset, nd_label,
+			sizeof(struct nd_namespace_label));
+	if (rc < 0)
+		return rc;
+
+	/* Garbage collect the previous label */
+	victim_label = nd_mapping->labels[0];
+	if (victim_label) {
+		slot = to_slot(ndd, victim_label);
+		nd_label_free_slot(ndd, slot);
+		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+	}
+
+	/* update index */
+	rc = nd_label_write_index(ndd, ndd->ns_next,
+			nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
+	if (rc < 0)
+		return rc;
+
+	nd_mapping->labels[0] = nd_label;
+
+	return 0;
+}
+
+static void del_label(struct nd_mapping *nd_mapping, int l)
+{
+	struct nd_namespace_label *next_label, *nd_label;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	unsigned int slot;
+	int j;
+
+	nd_label = nd_mapping->labels[l];
+	slot = to_slot(ndd, nd_label);
+	dev_vdbg(ndd->dev, "%s: clear: %d\n", __func__, slot);
+
+	for (j = l; (next_label = nd_mapping->labels[j + 1]); j++)
+		nd_mapping->labels[j] = next_label;
+	nd_mapping->labels[j] = NULL;
+}
+
+static bool is_old_resource(struct resource *res, struct resource **list, int n)
+{
+	int i;
+
+	if (res->flags & DPA_RESOURCE_ADJUSTED)
+		return false;
+	for (i = 0; i < n; i++)
+		if (res == list[i])
+			return true;
+	return false;
+}
+
+static struct resource *to_resource(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_label *nd_label)
+{
+	struct resource *res;
+
+	for_each_dpa_resource(ndd, res) {
+		if (res->start != __le64_to_cpu(nd_label->dpa))
+			continue;
+		if (resource_size(res) != __le64_to_cpu(nd_label->rawsize))
+			continue;
+		return res;
+	}
+
+	return NULL;
+}
+
+/*
+ * 1/ Account all the labels that can be freed after this update
+ * 2/ Allocate and write the label to the staging (next) index
+ * 3/ Record the resources in the namespace device
+ */
+static int __blk_label_update(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_namespace_blk *nsblk,
+		int num_labels)
+{
+	int i, l, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct nd_namespace_label *nd_label;
+	struct nd_namespace_index *nsindex;
+	unsigned long *free, *victim_map = NULL;
+	struct resource *res, **old_res_list;
+	struct nd_label_id label_id;
+	u8 uuid[NSLABEL_UUID_LEN];
+	u32 nslot, slot;
+
+	if (!preamble_next(ndd, &nsindex, &free, &nslot))
+		return -ENXIO;
+
+	old_res_list = nsblk->res;
+	nfree = nd_label_nfree(ndd);
+	old_num_resources = nsblk->num_resources;
+	nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
+
+	/*
+	 * We need to loop over the old resources a few times, which seems a
+	 * bit inefficient, but we need to know that we have the label
+	 * space before we start mutating the tracking structures.
+	 * Otherwise the recovery method of last resort for userspace is
+	 * disable and re-enable the parent region.
+	 */
+	alloc = 0;
+	for_each_dpa_resource(ndd, res) {
+		if (strcmp(res->name, label_id.id) != 0)
+			continue;
+		if (!is_old_resource(res, old_res_list, old_num_resources))
+			alloc++;
+	}
+
+	victims = 0;
+	if (old_num_resources) {
+		/* convert old local-label-map to dimm-slot victim-map */
+		victim_map = kcalloc(BITS_TO_LONGS(nslot), sizeof(long),
+				GFP_KERNEL);
+		if (!victim_map)
+			return -ENOMEM;
+
+		/* mark unused labels for garbage collection */
+		for_each_clear_bit_le(slot, free, nslot) {
+			nd_label = nd_label_base(ndd) + slot;
+			memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+			if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
+				continue;
+			res = to_resource(ndd, nd_label);
+			if (res && is_old_resource(res, old_res_list,
+						old_num_resources))
+				continue;
+			slot = to_slot(ndd, nd_label);
+			set_bit(slot, victim_map);
+			victims++;
+		}
+	}
+
+	/* don't allow updates that consume the last label */
+	if (nfree - alloc < 0 || nfree - alloc + victims < 1) {
+		dev_info(&nsblk->common.dev, "insufficient label space\n");
+		kfree(victim_map);
+		return -ENOSPC;
+	}
+	/* from here on we need to abort on error */
+
+
+	/* assign all resources to the namespace before writing the labels */
+	nsblk->res = NULL;
+	nsblk->num_resources = 0;
+	for_each_dpa_resource(ndd, res) {
+		if (strcmp(res->name, label_id.id) != 0)
+			continue;
+		if (!nsblk_add_resource(nd_region, ndd, nsblk, res->start)) {
+			rc = -ENOMEM;
+			goto abort;
+		}
+	}
+
+	for (i = 0; i < nsblk->num_resources; i++) {
+		size_t offset;
+
+		res = nsblk->res[i];
+		if (is_old_resource(res, old_res_list, old_num_resources))
+			continue; /* carry-over */
+		slot = nd_label_alloc_slot(ndd);
+		if (slot == UINT_MAX)
+			goto abort;
+		dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+
+		nd_label = nd_label_base(ndd) + slot;
+		memset(nd_label, 0, sizeof(struct nd_namespace_label));
+		memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN);
+		if (nsblk->alt_name)
+			memcpy(nd_label->name, nsblk->alt_name,
+					NSLABEL_NAME_LEN);
+		nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL);
+		nd_label->nlabel = __cpu_to_le16(0); /* N/A */
+		nd_label->position = __cpu_to_le16(0); /* N/A */
+		nd_label->isetcookie = __cpu_to_le64(0); /* N/A */
+		nd_label->dpa = __cpu_to_le64(res->start);
+		nd_label->rawsize = __cpu_to_le64(resource_size(res));
+		nd_label->lbasize = __cpu_to_le64(nsblk->lbasize);
+		nd_label->slot = __cpu_to_le32(slot);
+
+		/* update label */
+		offset = nd_label_offset(ndd, nd_label);
+		rc = nvdimm_set_config_data(ndd, offset, nd_label,
+				sizeof(struct nd_namespace_label));
+		if (rc < 0)
+			goto abort;
+	}
+
+	/* free up now unused slots in the new index */
+	for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) {
+		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		nd_label_free_slot(ndd, slot);
+	}
+
+	/* update index */
+	rc = nd_label_write_index(ndd, ndd->ns_next,
+			nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
+	if (rc)
+		goto abort;
+
+	/*
+	 * Now that the on-dimm labels are up to date, fix up the tracking
+	 * entries in nd_mapping->labels
+	 */
+	nlabel = 0;
+	for_each_label(l, nd_label, nd_mapping->labels) {
+		nlabel++;
+		memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+		if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
+			continue;
+		nlabel--;
+		del_label(nd_mapping, l);
+		l--; /* retry with the new label at this index */
+	}
+	if (nlabel + nsblk->num_resources > num_labels) {
+		/*
+		 * Bug, we can't end up with more resources than
+		 * available labels
+		 */
+		WARN_ON_ONCE(1);
+		rc = -ENXIO;
+		goto out;
+	}
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		nd_label = nd_label_base(ndd) + slot;
+		memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+		if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
+			continue;
+		res = to_resource(ndd, nd_label);
+		res->flags &= ~DPA_RESOURCE_ADJUSTED;
+		dev_vdbg(&nsblk->common.dev, "assign label[%d] slot: %d\n",
+				l, slot);
+		nd_mapping->labels[l++] = nd_label;
+	}
+	nd_mapping->labels[l] = NULL;
+
+ out:
+	kfree(old_res_list);
+	kfree(victim_map);
+	return rc;
+
+ abort:
+	/*
+	 * 1/ repair the allocated label bitmap in the index
+	 * 2/ restore the resource list
+	 */
+	nd_label_copy(ndd, nsindex, to_current_namespace_index(ndd));
+	kfree(nsblk->res);
+	nsblk->res = old_res_list;
+	nsblk->num_resources = old_num_resources;
+	old_res_list = NULL;
+	goto out;
+}
+
+static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
+{
+	int i, l, old_num_labels = 0;
+	struct nd_namespace_index *nsindex;
+	struct nd_namespace_label *nd_label;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	size_t size = (num_labels + 1) * sizeof(struct nd_namespace_label *);
+
+	for_each_label(l, nd_label, nd_mapping->labels)
+		old_num_labels++;
+
+	/*
+	 * We need to preserve all the old labels for the mapping so
+	 * they can be garbage collected after writing the new labels.
+	 */
+	if (num_labels > old_num_labels) {
+		struct nd_namespace_label **labels;
+
+		labels = krealloc(nd_mapping->labels, size, GFP_KERNEL);
+		if (!labels)
+			return -ENOMEM;
+		nd_mapping->labels = labels;
+	}
+	if (!nd_mapping->labels)
+		return -ENOMEM;
+
+	for (i = old_num_labels; i <= num_labels; i++)
+		nd_mapping->labels[i] = NULL;
+
+	if (ndd->ns_current == -1 || ndd->ns_next == -1)
+		/* pass */;
+	else
+		return max(num_labels, old_num_labels);
+
+	nsindex = to_namespace_index(ndd, 0);
+	memset(nsindex, 0, ndd->nsarea.config_size);
+	for (i = 0; i < 2; i++) {
+		int rc = nd_label_write_index(ndd, i, i*2, ND_NSINDEX_INIT);
+
+		if (rc)
+			return rc;
+	}
+	ndd->ns_next = 1;
+	ndd->ns_current = 0;
+
+	return max(num_labels, old_num_labels);
+}
+
+static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
+{
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct nd_namespace_label *nd_label;
+	struct nd_namespace_index *nsindex;
+	u8 label_uuid[NSLABEL_UUID_LEN];
+	int l, num_freed = 0;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!uuid)
+		return 0;
+
+	/* no index || no labels == nothing to delete */
+	if (!preamble_next(ndd, &nsindex, &free, &nslot)
+			|| !nd_mapping->labels)
+		return 0;
+
+	for_each_label(l, nd_label, nd_mapping->labels) {
+		memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+		if (memcmp(label_uuid, uuid, NSLABEL_UUID_LEN) != 0)
+			continue;
+		slot = to_slot(ndd, nd_label);
+		nd_label_free_slot(ndd, slot);
+		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		del_label(nd_mapping, l);
+		num_freed++;
+		l--; /* retry with new label at this index */
+	}
+
+	if (num_freed > l) {
+		/*
+		 * num_freed will only ever be > l when we delete the last
+		 * label
+		 */
+		kfree(nd_mapping->labels);
+		nd_mapping->labels = NULL;
+		dev_dbg(ndd->dev, "%s: no more labels\n", __func__);
+	}
+
+	return nd_label_write_index(ndd, ndd->ns_next,
+			nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
+}
+
+int nd_pmem_namespace_label_update(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm, resource_size_t size)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		int rc;
+
+		if (size == 0) {
+			rc = del_labels(nd_mapping, nspm->uuid);
+			if (rc)
+				return rc;
+			continue;
+		}
+
+		rc = init_labels(nd_mapping, 1);
+		if (rc < 0)
+			return rc;
+
+		rc = __pmem_label_update(nd_region, nd_mapping, nspm, i);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+int nd_blk_namespace_label_update(struct nd_region *nd_region,
+		struct nd_namespace_blk *nsblk, resource_size_t size)
+{
+	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+	struct resource *res;
+	int count = 0;
+
+	if (size == 0)
+		return del_labels(nd_mapping, nsblk->uuid);
+
+	for_each_dpa_resource(to_ndd(nd_mapping), res)
+		count++;
+
+	count = init_labels(nd_mapping, count);
+	if (count < 0)
+		return count;
+
+	return __blk_label_update(nd_region, nd_mapping, nsblk, count);
+}
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
new file mode 100644
index 000000000000..a59ef6eef2a3
--- /dev/null
+++ b/drivers/nvdimm/label.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __LABEL_H__
+#define __LABEL_H__
+
+#include <linux/ndctl.h>
+#include <linux/sizes.h>
+#include <linux/io.h>
+
+enum {
+	NSINDEX_SIG_LEN = 16,
+	NSINDEX_ALIGN = 256,
+	NSINDEX_SEQ_MASK = 0x3,
+	NSLABEL_UUID_LEN = 16,
+	NSLABEL_NAME_LEN = 64,
+	NSLABEL_FLAG_ROLABEL = 0x1,  /* read-only label */
+	NSLABEL_FLAG_LOCAL = 0x2,    /* DIMM-local namespace */
+	NSLABEL_FLAG_BTT = 0x4,      /* namespace contains a BTT */
+	NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */
+	BTT_ALIGN = 4096,            /* all btt structures */
+	BTTINFO_SIG_LEN = 16,
+	BTTINFO_UUID_LEN = 16,
+	BTTINFO_FLAG_ERROR = 0x1,    /* error state (read-only) */
+	BTTINFO_MAJOR_VERSION = 1,
+	ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */
+	ND_LABEL_ID_SIZE = 50,
+	ND_NSINDEX_INIT = 0x1,
+};
+
+static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
+
+/**
+ * struct nd_namespace_index - label set superblock
+ * @sig: NAMESPACE_INDEX\0
+ * @flags: placeholder
+ * @seq: sequence number for this index
+ * @myoff: offset of this index in label area
+ * @mysize: size of this index struct
+ * @otheroff: offset of other index
+ * @labeloff: offset of first label slot
+ * @nslot: total number of label slots
+ * @major: label area major version
+ * @minor: label area minor version
+ * @checksum: fletcher64 of all fields
+ * @free[0]: bitmap, nlabel bits
+ *
+ * The size of free[] is rounded up so the total struct size is a
+ * multiple of NSINDEX_ALIGN bytes.  Any bits this allocates beyond
+ * nlabel bits must be zero.
+ */
+struct nd_namespace_index {
+	u8 sig[NSINDEX_SIG_LEN];
+	__le32 flags;
+	__le32 seq;
+	__le64 myoff;
+	__le64 mysize;
+	__le64 otheroff;
+	__le64 labeloff;
+	__le32 nslot;
+	__le16 major;
+	__le16 minor;
+	__le64 checksum;
+	u8 free[0];
+};
+
+/**
+ * struct nd_namespace_label - namespace superblock
+ * @uuid: UUID per RFC 4122
+ * @name: optional name (NULL-terminated)
+ * @flags: see NSLABEL_FLAG_*
+ * @nlabel: num labels to describe this ns
+ * @position: labels position in set
+ * @isetcookie: interleave set cookie
+ * @lbasize: LBA size in bytes or 0 for pmem
+ * @dpa: DPA of NVM range on this DIMM
+ * @rawsize: size of namespace
+ * @slot: slot of this label in label area
+ * @unused: must be zero
+ */
+struct nd_namespace_label {
+	u8 uuid[NSLABEL_UUID_LEN];
+	u8 name[NSLABEL_NAME_LEN];
+	__le32 flags;
+	__le16 nlabel;
+	__le16 position;
+	__le64 isetcookie;
+	__le64 lbasize;
+	__le64 dpa;
+	__le64 rawsize;
+	__le32 slot;
+	__le32 unused;
+};
+
+/**
+ * struct nd_label_id - identifier string for dpa allocation
+ * @id: "{blk|pmem}-<namespace uuid>"
+ */
+struct nd_label_id {
+	char id[ND_LABEL_ID_SIZE];
+};
+
+/*
+ * If the 'best' index is invalid, so is the 'next' index.  Otherwise,
+ * the next index is MOD(index+1, 2)
+ */
+static inline int nd_label_next_nsindex(int index)
+{
+	if (index < 0)
+		return -1;
+
+	return (index + 1) % 2;
+}
+
+struct nvdimm_drvdata;
+int nd_label_validate(struct nvdimm_drvdata *ndd);
+void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
+		struct nd_namespace_index *src);
+size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd);
+int nd_label_active_count(struct nvdimm_drvdata *ndd);
+struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
+u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd);
+bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot);
+u32 nd_label_nfree(struct nvdimm_drvdata *ndd);
+struct nd_region;
+struct nd_namespace_pmem;
+struct nd_namespace_blk;
+int nd_pmem_namespace_label_update(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm, resource_size_t size);
+int nd_blk_namespace_label_update(struct nd_region *nd_region,
+		struct nd_namespace_blk *nsblk, resource_size_t size);
+#endif /* __LABEL_H__ */
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
new file mode 100644
index 000000000000..fef0dd80d4ad
--- /dev/null
+++ b/drivers/nvdimm/namespace_devs.c
@@ -0,0 +1,1870 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/nd.h>
+#include "nd-core.h"
+#include "nd.h"
+
+static void namespace_io_release(struct device *dev)
+{
+	struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+	kfree(nsio);
+}
+
+static void namespace_pmem_release(struct device *dev)
+{
+	struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+	kfree(nspm->alt_name);
+	kfree(nspm->uuid);
+	kfree(nspm);
+}
+
+static void namespace_blk_release(struct device *dev)
+{
+	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+
+	if (nsblk->id >= 0)
+		ida_simple_remove(&nd_region->ns_ida, nsblk->id);
+	kfree(nsblk->alt_name);
+	kfree(nsblk->uuid);
+	kfree(nsblk->res);
+	kfree(nsblk);
+}
+
+static struct device_type namespace_io_device_type = {
+	.name = "nd_namespace_io",
+	.release = namespace_io_release,
+};
+
+static struct device_type namespace_pmem_device_type = {
+	.name = "nd_namespace_pmem",
+	.release = namespace_pmem_release,
+};
+
+static struct device_type namespace_blk_device_type = {
+	.name = "nd_namespace_blk",
+	.release = namespace_blk_release,
+};
+
+static bool is_namespace_pmem(struct device *dev)
+{
+	return dev ? dev->type == &namespace_pmem_device_type : false;
+}
+
+static bool is_namespace_blk(struct device *dev)
+{
+	return dev ? dev->type == &namespace_blk_device_type : false;
+}
+
+static bool is_namespace_io(struct device *dev)
+{
+	return dev ? dev->type == &namespace_io_device_type : false;
+}
+
+const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
+		char *name)
+{
+	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
+	const char *suffix = "";
+
+	if (ndns->claim && is_nd_btt(ndns->claim))
+		suffix = "s";
+
+	if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev))
+		sprintf(name, "pmem%d%s", nd_region->id, suffix);
+	else if (is_namespace_blk(&ndns->dev)) {
+		struct nd_namespace_blk *nsblk;
+
+		nsblk = to_nd_namespace_blk(&ndns->dev);
+		sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix);
+	} else {
+		return NULL;
+	}
+
+	return name;
+}
+EXPORT_SYMBOL(nvdimm_namespace_disk_name);
+
+static ssize_t nstype_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+
+	return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
+}
+static DEVICE_ATTR_RO(nstype);
+
+static ssize_t __alt_name_store(struct device *dev, const char *buf,
+		const size_t len)
+{
+	char *input, *pos, *alt_name, **ns_altname;
+	ssize_t rc;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_altname = &nspm->alt_name;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		ns_altname = &nsblk->alt_name;
+	} else
+		return -ENXIO;
+
+	if (dev->driver || to_ndns(dev)->claim)
+		return -EBUSY;
+
+	input = kmemdup(buf, len + 1, GFP_KERNEL);
+	if (!input)
+		return -ENOMEM;
+
+	input[len] = '\0';
+	pos = strim(input);
+	if (strlen(pos) + 1 > NSLABEL_NAME_LEN) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	alt_name = kzalloc(NSLABEL_NAME_LEN, GFP_KERNEL);
+	if (!alt_name) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	kfree(*ns_altname);
+	*ns_altname = alt_name;
+	sprintf(*ns_altname, "%s", pos);
+	rc = len;
+
+out:
+	kfree(input);
+	return rc;
+}
+
+static resource_size_t nd_namespace_blk_size(struct nd_namespace_blk *nsblk)
+{
+	struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent);
+	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct nd_label_id label_id;
+	resource_size_t size = 0;
+	struct resource *res;
+
+	if (!nsblk->uuid)
+		return 0;
+	nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
+	for_each_dpa_resource(ndd, res)
+		if (strcmp(res->name, label_id.id) == 0)
+			size += resource_size(res);
+	return size;
+}
+
+static bool __nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
+{
+	struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent);
+	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct nd_label_id label_id;
+	struct resource *res;
+	int count, i;
+
+	if (!nsblk->uuid || !nsblk->lbasize || !ndd)
+		return false;
+
+	count = 0;
+	nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
+	for_each_dpa_resource(ndd, res) {
+		if (strcmp(res->name, label_id.id) != 0)
+			continue;
+		/*
+		 * Resources with unacknoweldged adjustments indicate a
+		 * failure to update labels
+		 */
+		if (res->flags & DPA_RESOURCE_ADJUSTED)
+			return false;
+		count++;
+	}
+
+	/* These values match after a successful label update */
+	if (count != nsblk->num_resources)
+		return false;
+
+	for (i = 0; i < nsblk->num_resources; i++) {
+		struct resource *found = NULL;
+
+		for_each_dpa_resource(ndd, res)
+			if (res == nsblk->res[i]) {
+				found = res;
+				break;
+			}
+		/* stale resource */
+		if (!found)
+			return false;
+	}
+
+	return true;
+}
+
+resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
+{
+	resource_size_t size;
+
+	nvdimm_bus_lock(&nsblk->common.dev);
+	size = __nd_namespace_blk_validate(nsblk);
+	nvdimm_bus_unlock(&nsblk->common.dev);
+
+	return size;
+}
+EXPORT_SYMBOL(nd_namespace_blk_validate);
+
+
+static int nd_namespace_label_update(struct nd_region *nd_region,
+		struct device *dev)
+{
+	dev_WARN_ONCE(dev, dev->driver || to_ndns(dev)->claim,
+			"namespace must be idle during label update\n");
+	if (dev->driver || to_ndns(dev)->claim)
+		return 0;
+
+	/*
+	 * Only allow label writes that will result in a valid namespace
+	 * or deletion of an existing namespace.
+	 */
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+		resource_size_t size = resource_size(&nspm->nsio.res);
+
+		if (size == 0 && nspm->uuid)
+			/* delete allocation */;
+		else if (!nspm->uuid)
+			return 0;
+
+		return nd_pmem_namespace_label_update(nd_region, nspm, size);
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+		resource_size_t size = nd_namespace_blk_size(nsblk);
+
+		if (size == 0 && nsblk->uuid)
+			/* delete allocation */;
+		else if (!nsblk->uuid || !nsblk->lbasize)
+			return 0;
+
+		return nd_blk_namespace_label_update(nd_region, nsblk, size);
+	} else
+		return -ENXIO;
+}
+
+static ssize_t alt_name_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	ssize_t rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	rc = __alt_name_store(dev, buf, len);
+	if (rc >= 0)
+		rc = nd_namespace_label_update(nd_region, dev);
+	dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc < 0 ? rc : len;
+}
+
+static ssize_t alt_name_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	char *ns_altname;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_altname = nspm->alt_name;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		ns_altname = nsblk->alt_name;
+	} else
+		return -ENXIO;
+
+	return sprintf(buf, "%s\n", ns_altname ? ns_altname : "");
+}
+static DEVICE_ATTR_RW(alt_name);
+
+static int scan_free(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
+		resource_size_t n)
+{
+	bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	int rc = 0;
+
+	while (n) {
+		struct resource *res, *last;
+		resource_size_t new_start;
+
+		last = NULL;
+		for_each_dpa_resource(ndd, res)
+			if (strcmp(res->name, label_id->id) == 0)
+				last = res;
+		res = last;
+		if (!res)
+			return 0;
+
+		if (n >= resource_size(res)) {
+			n -= resource_size(res);
+			nd_dbg_dpa(nd_region, ndd, res, "delete %d\n", rc);
+			nvdimm_free_dpa(ndd, res);
+			/* retry with last resource deleted */
+			continue;
+		}
+
+		/*
+		 * Keep BLK allocations relegated to high DPA as much as
+		 * possible
+		 */
+		if (is_blk)
+			new_start = res->start + n;
+		else
+			new_start = res->start;
+
+		rc = adjust_resource(res, new_start, resource_size(res) - n);
+		if (rc == 0)
+			res->flags |= DPA_RESOURCE_ADJUSTED;
+		nd_dbg_dpa(nd_region, ndd, res, "shrink %d\n", rc);
+		break;
+	}
+
+	return rc;
+}
+
+/**
+ * shrink_dpa_allocation - for each dimm in region free n bytes for label_id
+ * @nd_region: the set of dimms to reclaim @n bytes from
+ * @label_id: unique identifier for the namespace consuming this dpa range
+ * @n: number of bytes per-dimm to release
+ *
+ * Assumes resources are ordered.  Starting from the end try to
+ * adjust_resource() the allocation to @n, but if @n is larger than the
+ * allocation delete it and find the 'new' last allocation in the label
+ * set.
+ */
+static int shrink_dpa_allocation(struct nd_region *nd_region,
+		struct nd_label_id *label_id, resource_size_t n)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		int rc;
+
+		rc = scan_free(nd_region, nd_mapping, label_id, n);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
+		struct nd_region *nd_region, struct nd_mapping *nd_mapping,
+		resource_size_t n)
+{
+	bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	resource_size_t first_dpa;
+	struct resource *res;
+	int rc = 0;
+
+	/* allocate blk from highest dpa first */
+	if (is_blk)
+		first_dpa = nd_mapping->start + nd_mapping->size - n;
+	else
+		first_dpa = nd_mapping->start;
+
+	/* first resource allocation for this label-id or dimm */
+	res = nvdimm_allocate_dpa(ndd, label_id, first_dpa, n);
+	if (!res)
+		rc = -EBUSY;
+
+	nd_dbg_dpa(nd_region, ndd, res, "init %d\n", rc);
+	return rc ? n : 0;
+}
+
+static bool space_valid(bool is_pmem, bool is_reserve,
+		struct nd_label_id *label_id, struct resource *res)
+{
+	/*
+	 * For BLK-space any space is valid, for PMEM-space, it must be
+	 * contiguous with an existing allocation unless we are
+	 * reserving pmem.
+	 */
+	if (is_reserve || !is_pmem)
+		return true;
+	if (!res || strcmp(res->name, label_id->id) == 0)
+		return true;
+	return false;
+}
+
+enum alloc_loc {
+	ALLOC_ERR = 0, ALLOC_BEFORE, ALLOC_MID, ALLOC_AFTER,
+};
+
+static resource_size_t scan_allocate(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
+		resource_size_t n)
+{
+	resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
+	bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
+	bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	const resource_size_t to_allocate = n;
+	struct resource *res;
+	int first;
+
+ retry:
+	first = 0;
+	for_each_dpa_resource(ndd, res) {
+		resource_size_t allocate, available = 0, free_start, free_end;
+		struct resource *next = res->sibling, *new_res = NULL;
+		enum alloc_loc loc = ALLOC_ERR;
+		const char *action;
+		int rc = 0;
+
+		/* ignore resources outside this nd_mapping */
+		if (res->start > mapping_end)
+			continue;
+		if (res->end < nd_mapping->start)
+			continue;
+
+		/* space at the beginning of the mapping */
+		if (!first++ && res->start > nd_mapping->start) {
+			free_start = nd_mapping->start;
+			available = res->start - free_start;
+			if (space_valid(is_pmem, is_reserve, label_id, NULL))
+				loc = ALLOC_BEFORE;
+		}
+
+		/* space between allocations */
+		if (!loc && next) {
+			free_start = res->start + resource_size(res);
+			free_end = min(mapping_end, next->start - 1);
+			if (space_valid(is_pmem, is_reserve, label_id, res)
+					&& free_start < free_end) {
+				available = free_end + 1 - free_start;
+				loc = ALLOC_MID;
+			}
+		}
+
+		/* space at the end of the mapping */
+		if (!loc && !next) {
+			free_start = res->start + resource_size(res);
+			free_end = mapping_end;
+			if (space_valid(is_pmem, is_reserve, label_id, res)
+					&& free_start < free_end) {
+				available = free_end + 1 - free_start;
+				loc = ALLOC_AFTER;
+			}
+		}
+
+		if (!loc || !available)
+			continue;
+		allocate = min(available, n);
+		switch (loc) {
+		case ALLOC_BEFORE:
+			if (strcmp(res->name, label_id->id) == 0) {
+				/* adjust current resource up */
+				if (is_pmem && !is_reserve)
+					return n;
+				rc = adjust_resource(res, res->start - allocate,
+						resource_size(res) + allocate);
+				action = "cur grow up";
+			} else
+				action = "allocate";
+			break;
+		case ALLOC_MID:
+			if (strcmp(next->name, label_id->id) == 0) {
+				/* adjust next resource up */
+				if (is_pmem && !is_reserve)
+					return n;
+				rc = adjust_resource(next, next->start
+						- allocate, resource_size(next)
+						+ allocate);
+				new_res = next;
+				action = "next grow up";
+			} else if (strcmp(res->name, label_id->id) == 0) {
+				action = "grow down";
+			} else
+				action = "allocate";
+			break;
+		case ALLOC_AFTER:
+			if (strcmp(res->name, label_id->id) == 0)
+				action = "grow down";
+			else
+				action = "allocate";
+			break;
+		default:
+			return n;
+		}
+
+		if (strcmp(action, "allocate") == 0) {
+			/* BLK allocate bottom up */
+			if (!is_pmem)
+				free_start += available - allocate;
+			else if (!is_reserve && free_start != nd_mapping->start)
+				return n;
+
+			new_res = nvdimm_allocate_dpa(ndd, label_id,
+					free_start, allocate);
+			if (!new_res)
+				rc = -EBUSY;
+		} else if (strcmp(action, "grow down") == 0) {
+			/* adjust current resource down */
+			rc = adjust_resource(res, res->start, resource_size(res)
+					+ allocate);
+			if (rc == 0)
+				res->flags |= DPA_RESOURCE_ADJUSTED;
+		}
+
+		if (!new_res)
+			new_res = res;
+
+		nd_dbg_dpa(nd_region, ndd, new_res, "%s(%d) %d\n",
+				action, loc, rc);
+
+		if (rc)
+			return n;
+
+		n -= allocate;
+		if (n) {
+			/*
+			 * Retry scan with newly inserted resources.
+			 * For example, if we did an ALLOC_BEFORE
+			 * insertion there may also have been space
+			 * available for an ALLOC_AFTER insertion, so we
+			 * need to check this same resource again
+			 */
+			goto retry;
+		} else
+			return 0;
+	}
+
+	/*
+	 * If we allocated nothing in the BLK case it may be because we are in
+	 * an initial "pmem-reserve pass".  Only do an initial BLK allocation
+	 * when none of the DPA space is reserved.
+	 */
+	if ((is_pmem || !ndd->dpa.child) && n == to_allocate)
+		return init_dpa_allocation(label_id, nd_region, nd_mapping, n);
+	return n;
+}
+
+static int merge_dpa(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_label_id *label_id)
+{
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct resource *res;
+
+	if (strncmp("pmem", label_id->id, 4) == 0)
+		return 0;
+ retry:
+	for_each_dpa_resource(ndd, res) {
+		int rc;
+		struct resource *next = res->sibling;
+		resource_size_t end = res->start + resource_size(res);
+
+		if (!next || strcmp(res->name, label_id->id) != 0
+				|| strcmp(next->name, label_id->id) != 0
+				|| end != next->start)
+			continue;
+		end += resource_size(next);
+		nvdimm_free_dpa(ndd, next);
+		rc = adjust_resource(res, res->start, end - res->start);
+		nd_dbg_dpa(nd_region, ndd, res, "merge %d\n", rc);
+		if (rc)
+			return rc;
+		res->flags |= DPA_RESOURCE_ADJUSTED;
+		goto retry;
+	}
+
+	return 0;
+}
+
+static int __reserve_free_pmem(struct device *dev, void *data)
+{
+	struct nvdimm *nvdimm = data;
+	struct nd_region *nd_region;
+	struct nd_label_id label_id;
+	int i;
+
+	if (!is_nd_pmem(dev))
+		return 0;
+
+	nd_region = to_nd_region(dev);
+	if (nd_region->ndr_mappings == 0)
+		return 0;
+
+	memset(&label_id, 0, sizeof(label_id));
+	strcat(label_id.id, "pmem-reserve");
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		resource_size_t n, rem = 0;
+
+		if (nd_mapping->nvdimm != nvdimm)
+			continue;
+
+		n = nd_pmem_available_dpa(nd_region, nd_mapping, &rem);
+		if (n == 0)
+			return 0;
+		rem = scan_allocate(nd_region, nd_mapping, &label_id, n);
+		dev_WARN_ONCE(&nd_region->dev, rem,
+				"pmem reserve underrun: %#llx of %#llx bytes\n",
+				(unsigned long long) n - rem,
+				(unsigned long long) n);
+		return rem ? -ENXIO : 0;
+	}
+
+	return 0;
+}
+
+static void release_free_pmem(struct nvdimm_bus *nvdimm_bus,
+		struct nd_mapping *nd_mapping)
+{
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct resource *res, *_res;
+
+	for_each_dpa_resource_safe(ndd, res, _res)
+		if (strcmp(res->name, "pmem-reserve") == 0)
+			nvdimm_free_dpa(ndd, res);
+}
+
+static int reserve_free_pmem(struct nvdimm_bus *nvdimm_bus,
+		struct nd_mapping *nd_mapping)
+{
+	struct nvdimm *nvdimm = nd_mapping->nvdimm;
+	int rc;
+
+	rc = device_for_each_child(&nvdimm_bus->dev, nvdimm,
+			__reserve_free_pmem);
+	if (rc)
+		release_free_pmem(nvdimm_bus, nd_mapping);
+	return rc;
+}
+
+/**
+ * grow_dpa_allocation - for each dimm allocate n bytes for @label_id
+ * @nd_region: the set of dimms to allocate @n more bytes from
+ * @label_id: unique identifier for the namespace consuming this dpa range
+ * @n: number of bytes per-dimm to add to the existing allocation
+ *
+ * Assumes resources are ordered.  For BLK regions, first consume
+ * BLK-only available DPA free space, then consume PMEM-aliased DPA
+ * space starting at the highest DPA.  For PMEM regions start
+ * allocations from the start of an interleave set and end at the first
+ * BLK allocation or the end of the interleave set, whichever comes
+ * first.
+ */
+static int grow_dpa_allocation(struct nd_region *nd_region,
+		struct nd_label_id *label_id, resource_size_t n)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+	bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		resource_size_t rem = n;
+		int rc, j;
+
+		/*
+		 * In the BLK case try once with all unallocated PMEM
+		 * reserved, and once without
+		 */
+		for (j = is_pmem; j < 2; j++) {
+			bool blk_only = j == 0;
+
+			if (blk_only) {
+				rc = reserve_free_pmem(nvdimm_bus, nd_mapping);
+				if (rc)
+					return rc;
+			}
+			rem = scan_allocate(nd_region, nd_mapping,
+					label_id, rem);
+			if (blk_only)
+				release_free_pmem(nvdimm_bus, nd_mapping);
+
+			/* try again and allow encroachments into PMEM */
+			if (rem == 0)
+				break;
+		}
+
+		dev_WARN_ONCE(&nd_region->dev, rem,
+				"allocation underrun: %#llx of %#llx bytes\n",
+				(unsigned long long) n - rem,
+				(unsigned long long) n);
+		if (rem)
+			return -ENXIO;
+
+		rc = merge_dpa(nd_region, nd_mapping, label_id);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static void nd_namespace_pmem_set_size(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm, resource_size_t size)
+{
+	struct resource *res = &nspm->nsio.res;
+
+	res->start = nd_region->ndr_start;
+	res->end = nd_region->ndr_start + size - 1;
+}
+
+static ssize_t __size_store(struct device *dev, unsigned long long val)
+{
+	resource_size_t allocated = 0, available = 0;
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_mapping *nd_mapping;
+	struct nvdimm_drvdata *ndd;
+	struct nd_label_id label_id;
+	u32 flags = 0, remainder;
+	u8 *uuid = NULL;
+	int rc, i;
+
+	if (dev->driver || to_ndns(dev)->claim)
+		return -EBUSY;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		uuid = nsblk->uuid;
+		flags = NSLABEL_FLAG_LOCAL;
+	}
+
+	/*
+	 * We need a uuid for the allocation-label and dimm(s) on which
+	 * to store the label.
+	 */
+	if (!uuid || nd_region->ndr_mappings == 0)
+		return -ENXIO;
+
+	div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, &remainder);
+	if (remainder) {
+		dev_dbg(dev, "%llu is not %dK aligned\n", val,
+				(SZ_4K * nd_region->ndr_mappings) / SZ_1K);
+		return -EINVAL;
+	}
+
+	nd_label_gen_id(&label_id, uuid, flags);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		nd_mapping = &nd_region->mapping[i];
+		ndd = to_ndd(nd_mapping);
+
+		/*
+		 * All dimms in an interleave set, or the base dimm for a blk
+		 * region, need to be enabled for the size to be changed.
+		 */
+		if (!ndd)
+			return -ENXIO;
+
+		allocated += nvdimm_allocated_dpa(ndd, &label_id);
+	}
+	available = nd_region_available_dpa(nd_region);
+
+	if (val > available + allocated)
+		return -ENOSPC;
+
+	if (val == allocated)
+		return 0;
+
+	val = div_u64(val, nd_region->ndr_mappings);
+	allocated = div_u64(allocated, nd_region->ndr_mappings);
+	if (val < allocated)
+		rc = shrink_dpa_allocation(nd_region, &label_id,
+				allocated - val);
+	else
+		rc = grow_dpa_allocation(nd_region, &label_id, val - allocated);
+
+	if (rc)
+		return rc;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		nd_namespace_pmem_set_size(nd_region, nspm,
+				val * nd_region->ndr_mappings);
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		/*
+		 * Try to delete the namespace if we deleted all of its
+		 * allocation, this is not the seed device for the
+		 * region, and it is not actively claimed by a btt
+		 * instance.
+		 */
+		if (val == 0 && nd_region->ns_seed != dev
+				&& !nsblk->common.claim)
+			nd_device_unregister(dev, ND_ASYNC);
+	}
+
+	return rc;
+}
+
+static ssize_t size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	unsigned long long val;
+	u8 **uuid = NULL;
+	int rc;
+
+	rc = kstrtoull(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	rc = __size_store(dev, val);
+	if (rc >= 0)
+		rc = nd_namespace_label_update(nd_region, dev);
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = &nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		uuid = &nsblk->uuid;
+	}
+
+	if (rc == 0 && val == 0 && uuid) {
+		/* setting size zero == 'delete namespace' */
+		kfree(*uuid);
+		*uuid = NULL;
+	}
+
+	dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0
+			? "fail" : "success", rc);
+
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc < 0 ? rc : len;
+}
+
+resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns)
+{
+	struct device *dev = &ndns->dev;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		return resource_size(&nspm->nsio.res);
+	} else if (is_namespace_blk(dev)) {
+		return nd_namespace_blk_size(to_nd_namespace_blk(dev));
+	} else if (is_namespace_io(dev)) {
+		struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+		return resource_size(&nsio->res);
+	} else
+		WARN_ONCE(1, "unknown namespace type\n");
+	return 0;
+}
+
+resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns)
+{
+	resource_size_t size;
+
+	nvdimm_bus_lock(&ndns->dev);
+	size = __nvdimm_namespace_capacity(ndns);
+	nvdimm_bus_unlock(&ndns->dev);
+
+	return size;
+}
+EXPORT_SYMBOL(nvdimm_namespace_capacity);
+
+static ssize_t size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%llu\n", (unsigned long long)
+			nvdimm_namespace_capacity(to_ndns(dev)));
+}
+static DEVICE_ATTR(size, S_IRUGO, size_show, size_store);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	u8 *uuid;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		uuid = nsblk->uuid;
+	} else
+		return -ENXIO;
+
+	if (uuid)
+		return sprintf(buf, "%pUb\n", uuid);
+	return sprintf(buf, "\n");
+}
+
+/**
+ * namespace_update_uuid - check for a unique uuid and whether we're "renaming"
+ * @nd_region: parent region so we can updates all dimms in the set
+ * @dev: namespace type for generating label_id
+ * @new_uuid: incoming uuid
+ * @old_uuid: reference to the uuid storage location in the namespace object
+ */
+static int namespace_update_uuid(struct nd_region *nd_region,
+		struct device *dev, u8 *new_uuid, u8 **old_uuid)
+{
+	u32 flags = is_namespace_blk(dev) ? NSLABEL_FLAG_LOCAL : 0;
+	struct nd_label_id old_label_id;
+	struct nd_label_id new_label_id;
+	int i;
+
+	if (!nd_is_uuid_unique(dev, new_uuid))
+		return -EINVAL;
+
+	if (*old_uuid == NULL)
+		goto out;
+
+	/*
+	 * If we've already written a label with this uuid, then it's
+	 * too late to rename because we can't reliably update the uuid
+	 * without losing the old namespace.  Userspace must delete this
+	 * namespace to abandon the old uuid.
+	 */
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+
+		/*
+		 * This check by itself is sufficient because old_uuid
+		 * would be NULL above if this uuid did not exist in the
+		 * currently written set.
+		 *
+		 * FIXME: can we delete uuid with zero dpa allocated?
+		 */
+		if (nd_mapping->labels)
+			return -EBUSY;
+	}
+
+	nd_label_gen_id(&old_label_id, *old_uuid, flags);
+	nd_label_gen_id(&new_label_id, new_uuid, flags);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+		struct resource *res;
+
+		for_each_dpa_resource(ndd, res)
+			if (strcmp(res->name, old_label_id.id) == 0)
+				sprintf((void *) res->name, "%s",
+						new_label_id.id);
+	}
+	kfree(*old_uuid);
+ out:
+	*old_uuid = new_uuid;
+	return 0;
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	u8 *uuid = NULL;
+	ssize_t rc = 0;
+	u8 **ns_uuid;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_uuid = &nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		ns_uuid = &nsblk->uuid;
+	} else
+		return -ENXIO;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	if (to_ndns(dev)->claim)
+		rc = -EBUSY;
+	if (rc >= 0)
+		rc = nd_uuid_store(dev, &uuid, buf, len);
+	if (rc >= 0)
+		rc = namespace_update_uuid(nd_region, dev, uuid, ns_uuid);
+	if (rc >= 0)
+		rc = nd_namespace_label_update(nd_region, dev);
+	else
+		kfree(uuid);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc < 0 ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t resource_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct resource *res;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		res = &nspm->nsio.res;
+	} else if (is_namespace_io(dev)) {
+		struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+		res = &nsio->res;
+	} else
+		return -ENXIO;
+
+	/* no address to convey if the namespace has no allocation */
+	if (resource_size(res) == 0)
+		return -ENXIO;
+	return sprintf(buf, "%#llx\n", (unsigned long long) res->start);
+}
+static DEVICE_ATTR_RO(resource);
+
+static const unsigned long ns_lbasize_supported[] = { 512, 520, 528,
+	4096, 4104, 4160, 4224, 0 };
+
+static ssize_t sector_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+	if (!is_namespace_blk(dev))
+		return -ENXIO;
+
+	return nd_sector_size_show(nsblk->lbasize, ns_lbasize_supported, buf);
+}
+
+static ssize_t sector_size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	ssize_t rc = 0;
+
+	if (!is_namespace_blk(dev))
+		return -ENXIO;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	if (to_ndns(dev)->claim)
+		rc = -EBUSY;
+	if (rc >= 0)
+		rc = nd_sector_size_store(dev, buf, &nsblk->lbasize,
+				ns_lbasize_supported);
+	if (rc >= 0)
+		rc = nd_namespace_label_update(nd_region, dev);
+	dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
+			rc, rc < 0 ? "tried" : "wrote", buf,
+			buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(sector_size);
+
+static ssize_t dpa_extents_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_label_id label_id;
+	int count = 0, i;
+	u8 *uuid = NULL;
+	u32 flags = 0;
+
+	nvdimm_bus_lock(dev);
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = nspm->uuid;
+		flags = 0;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		uuid = nsblk->uuid;
+		flags = NSLABEL_FLAG_LOCAL;
+	}
+
+	if (!uuid)
+		goto out;
+
+	nd_label_gen_id(&label_id, uuid, flags);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+		struct resource *res;
+
+		for_each_dpa_resource(ndd, res)
+			if (strcmp(res->name, label_id.id) == 0)
+				count++;
+	}
+ out:
+	nvdimm_bus_unlock(dev);
+
+	return sprintf(buf, "%d\n", count);
+}
+static DEVICE_ATTR_RO(dpa_extents);
+
+static ssize_t holder_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_namespace_common *ndns = to_ndns(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : "");
+	device_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(holder);
+
+static ssize_t force_raw_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	bool force_raw;
+	int rc = strtobool(buf, &force_raw);
+
+	if (rc)
+		return rc;
+
+	to_ndns(dev)->force_raw = force_raw;
+	return len;
+}
+
+static ssize_t force_raw_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", to_ndns(dev)->force_raw);
+}
+static DEVICE_ATTR_RW(force_raw);
+
+static struct attribute *nd_namespace_attributes[] = {
+	&dev_attr_nstype.attr,
+	&dev_attr_size.attr,
+	&dev_attr_uuid.attr,
+	&dev_attr_holder.attr,
+	&dev_attr_resource.attr,
+	&dev_attr_alt_name.attr,
+	&dev_attr_force_raw.attr,
+	&dev_attr_sector_size.attr,
+	&dev_attr_dpa_extents.attr,
+	NULL,
+};
+
+static umode_t namespace_visible(struct kobject *kobj,
+		struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+
+	if (a == &dev_attr_resource.attr) {
+		if (is_namespace_blk(dev))
+			return 0;
+		return a->mode;
+	}
+
+	if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
+		if (a == &dev_attr_size.attr)
+			return S_IWUSR | S_IRUGO;
+
+		if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr)
+			return 0;
+
+		return a->mode;
+	}
+
+	if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr
+			|| a == &dev_attr_holder.attr
+			|| a == &dev_attr_force_raw.attr)
+		return a->mode;
+
+	return 0;
+}
+
+static struct attribute_group nd_namespace_attribute_group = {
+	.attrs = nd_namespace_attributes,
+	.is_visible = namespace_visible,
+};
+
+static const struct attribute_group *nd_namespace_attribute_groups[] = {
+	&nd_device_attribute_group,
+	&nd_namespace_attribute_group,
+	&nd_numa_attribute_group,
+	NULL,
+};
+
+struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
+{
+	struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
+	struct nd_namespace_common *ndns;
+	resource_size_t size;
+
+	if (nd_btt) {
+		ndns = nd_btt->ndns;
+		if (!ndns)
+			return ERR_PTR(-ENODEV);
+
+		/*
+		 * Flush any in-progess probes / removals in the driver
+		 * for the raw personality of this namespace.
+		 */
+		device_lock(&ndns->dev);
+		device_unlock(&ndns->dev);
+		if (ndns->dev.driver) {
+			dev_dbg(&ndns->dev, "is active, can't bind %s\n",
+					dev_name(&nd_btt->dev));
+			return ERR_PTR(-EBUSY);
+		}
+		if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev,
+					"host (%s) vs claim (%s) mismatch\n",
+					dev_name(&nd_btt->dev),
+					dev_name(ndns->claim)))
+			return ERR_PTR(-ENXIO);
+	} else {
+		ndns = to_ndns(dev);
+		if (ndns->claim) {
+			dev_dbg(dev, "claimed by %s, failing probe\n",
+				dev_name(ndns->claim));
+
+			return ERR_PTR(-ENXIO);
+		}
+	}
+
+	size = nvdimm_namespace_capacity(ndns);
+	if (size < ND_MIN_NAMESPACE_SIZE) {
+		dev_dbg(&ndns->dev, "%pa, too small must be at least %#x\n",
+				&size, ND_MIN_NAMESPACE_SIZE);
+		return ERR_PTR(-ENODEV);
+	}
+
+	if (is_namespace_pmem(&ndns->dev)) {
+		struct nd_namespace_pmem *nspm;
+
+		nspm = to_nd_namespace_pmem(&ndns->dev);
+		if (!nspm->uuid) {
+			dev_dbg(&ndns->dev, "%s: uuid not set\n", __func__);
+			return ERR_PTR(-ENODEV);
+		}
+	} else if (is_namespace_blk(&ndns->dev)) {
+		struct nd_namespace_blk *nsblk;
+
+		nsblk = to_nd_namespace_blk(&ndns->dev);
+		if (!nd_namespace_blk_validate(nsblk))
+			return ERR_PTR(-ENODEV);
+	}
+
+	return ndns;
+}
+EXPORT_SYMBOL(nvdimm_namespace_common_probe);
+
+static struct device **create_namespace_io(struct nd_region *nd_region)
+{
+	struct nd_namespace_io *nsio;
+	struct device *dev, **devs;
+	struct resource *res;
+
+	nsio = kzalloc(sizeof(*nsio), GFP_KERNEL);
+	if (!nsio)
+		return NULL;
+
+	devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
+	if (!devs) {
+		kfree(nsio);
+		return NULL;
+	}
+
+	dev = &nsio->common.dev;
+	dev->type = &namespace_io_device_type;
+	dev->parent = &nd_region->dev;
+	res = &nsio->res;
+	res->name = dev_name(&nd_region->dev);
+	res->flags = IORESOURCE_MEM;
+	res->start = nd_region->ndr_start;
+	res->end = res->start + nd_region->ndr_size - 1;
+
+	devs[0] = dev;
+	return devs;
+}
+
+static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
+		u64 cookie, u16 pos)
+{
+	struct nd_namespace_label *found = NULL;
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *nd_label;
+		bool found_uuid = false;
+		int l;
+
+		for_each_label(l, nd_label, nd_mapping->labels) {
+			u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
+			u16 position = __le16_to_cpu(nd_label->position);
+			u16 nlabel = __le16_to_cpu(nd_label->nlabel);
+
+			if (isetcookie != cookie)
+				continue;
+
+			if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
+				continue;
+
+			if (found_uuid) {
+				dev_dbg(to_ndd(nd_mapping)->dev,
+						"%s duplicate entry for uuid\n",
+						__func__);
+				return false;
+			}
+			found_uuid = true;
+			if (nlabel != nd_region->ndr_mappings)
+				continue;
+			if (position != pos)
+				continue;
+			found = nd_label;
+			break;
+		}
+		if (found)
+			break;
+	}
+	return found != NULL;
+}
+
+static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
+{
+	struct nd_namespace_label *select = NULL;
+	int i;
+
+	if (!pmem_id)
+		return -ENODEV;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *nd_label;
+		u64 hw_start, hw_end, pmem_start, pmem_end;
+		int l;
+
+		for_each_label(l, nd_label, nd_mapping->labels)
+			if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0)
+				break;
+
+		if (!nd_label) {
+			WARN_ON(1);
+			return -EINVAL;
+		}
+
+		select = nd_label;
+		/*
+		 * Check that this label is compliant with the dpa
+		 * range published in NFIT
+		 */
+		hw_start = nd_mapping->start;
+		hw_end = hw_start + nd_mapping->size;
+		pmem_start = __le64_to_cpu(select->dpa);
+		pmem_end = pmem_start + __le64_to_cpu(select->rawsize);
+		if (pmem_start == hw_start && pmem_end <= hw_end)
+			/* pass */;
+		else
+			return -EINVAL;
+
+		nd_mapping->labels[0] = select;
+		nd_mapping->labels[1] = NULL;
+	}
+	return 0;
+}
+
+/**
+ * find_pmem_label_set - validate interleave set labelling, retrieve label0
+ * @nd_region: region with mappings to validate
+ */
+static int find_pmem_label_set(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm)
+{
+	u64 cookie = nd_region_interleave_set_cookie(nd_region);
+	struct nd_namespace_label *nd_label;
+	u8 select_id[NSLABEL_UUID_LEN];
+	resource_size_t size = 0;
+	u8 *pmem_id = NULL;
+	int rc = -ENODEV, l;
+	u16 i;
+
+	if (cookie == 0)
+		return -ENXIO;
+
+	/*
+	 * Find a complete set of labels by uuid.  By definition we can start
+	 * with any mapping as the reference label
+	 */
+	for_each_label(l, nd_label, nd_region->mapping[0].labels) {
+		u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
+
+		if (isetcookie != cookie)
+			continue;
+
+		for (i = 0; nd_region->ndr_mappings; i++)
+			if (!has_uuid_at_pos(nd_region, nd_label->uuid,
+						cookie, i))
+				break;
+		if (i < nd_region->ndr_mappings) {
+			/*
+			 * Give up if we don't find an instance of a
+			 * uuid at each position (from 0 to
+			 * nd_region->ndr_mappings - 1), or if we find a
+			 * dimm with two instances of the same uuid.
+			 */
+			rc = -EINVAL;
+			goto err;
+		} else if (pmem_id) {
+			/*
+			 * If there is more than one valid uuid set, we
+			 * need userspace to clean this up.
+			 */
+			rc = -EBUSY;
+			goto err;
+		}
+		memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN);
+		pmem_id = select_id;
+	}
+
+	/*
+	 * Fix up each mapping's 'labels' to have the validated pmem label for
+	 * that position at labels[0], and NULL at labels[1].  In the process,
+	 * check that the namespace aligns with interleave-set.  We know
+	 * that it does not overlap with any blk namespaces by virtue of
+	 * the dimm being enabled (i.e. nd_label_reserve_dpa()
+	 * succeeded).
+	 */
+	rc = select_pmem_id(nd_region, pmem_id);
+	if (rc)
+		goto err;
+
+	/* Calculate total size and populate namespace properties from label0 */
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *label0 = nd_mapping->labels[0];
+
+		size += __le64_to_cpu(label0->rawsize);
+		if (__le16_to_cpu(label0->position) != 0)
+			continue;
+		WARN_ON(nspm->alt_name || nspm->uuid);
+		nspm->alt_name = kmemdup((void __force *) label0->name,
+				NSLABEL_NAME_LEN, GFP_KERNEL);
+		nspm->uuid = kmemdup((void __force *) label0->uuid,
+				NSLABEL_UUID_LEN, GFP_KERNEL);
+	}
+
+	if (!nspm->alt_name || !nspm->uuid) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	nd_namespace_pmem_set_size(nd_region, nspm, size);
+
+	return 0;
+ err:
+	switch (rc) {
+	case -EINVAL:
+		dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
+		break;
+	case -ENODEV:
+		dev_dbg(&nd_region->dev, "%s: label not found\n", __func__);
+		break;
+	default:
+		dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n",
+				__func__, rc);
+		break;
+	}
+	return rc;
+}
+
+static struct device **create_namespace_pmem(struct nd_region *nd_region)
+{
+	struct nd_namespace_pmem *nspm;
+	struct device *dev, **devs;
+	struct resource *res;
+	int rc;
+
+	nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+	if (!nspm)
+		return NULL;
+
+	dev = &nspm->nsio.common.dev;
+	dev->type = &namespace_pmem_device_type;
+	dev->parent = &nd_region->dev;
+	res = &nspm->nsio.res;
+	res->name = dev_name(&nd_region->dev);
+	res->flags = IORESOURCE_MEM;
+	rc = find_pmem_label_set(nd_region, nspm);
+	if (rc == -ENODEV) {
+		int i;
+
+		/* Pass, try to permit namespace creation... */
+		for (i = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+
+			kfree(nd_mapping->labels);
+			nd_mapping->labels = NULL;
+		}
+
+		/* Publish a zero-sized namespace for userspace to configure. */
+		nd_namespace_pmem_set_size(nd_region, nspm, 0);
+
+		rc = 0;
+	} else if (rc)
+		goto err;
+
+	devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
+	if (!devs)
+		goto err;
+
+	devs[0] = dev;
+	return devs;
+
+ err:
+	namespace_pmem_release(&nspm->nsio.common.dev);
+	return NULL;
+}
+
+struct resource *nsblk_add_resource(struct nd_region *nd_region,
+		struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
+		resource_size_t start)
+{
+	struct nd_label_id label_id;
+	struct resource *res;
+
+	nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
+	res = krealloc(nsblk->res,
+			sizeof(void *) * (nsblk->num_resources + 1),
+			GFP_KERNEL);
+	if (!res)
+		return NULL;
+	nsblk->res = (struct resource **) res;
+	for_each_dpa_resource(ndd, res)
+		if (strcmp(res->name, label_id.id) == 0
+				&& res->start == start) {
+			nsblk->res[nsblk->num_resources++] = res;
+			return res;
+		}
+	return NULL;
+}
+
+static struct device *nd_namespace_blk_create(struct nd_region *nd_region)
+{
+	struct nd_namespace_blk *nsblk;
+	struct device *dev;
+
+	if (!is_nd_blk(&nd_region->dev))
+		return NULL;
+
+	nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+	if (!nsblk)
+		return NULL;
+
+	dev = &nsblk->common.dev;
+	dev->type = &namespace_blk_device_type;
+	nsblk->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL);
+	if (nsblk->id < 0) {
+		kfree(nsblk);
+		return NULL;
+	}
+	dev_set_name(dev, "namespace%d.%d", nd_region->id, nsblk->id);
+	dev->parent = &nd_region->dev;
+	dev->groups = nd_namespace_attribute_groups;
+
+	return &nsblk->common.dev;
+}
+
+void nd_region_create_blk_seed(struct nd_region *nd_region)
+{
+	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
+	nd_region->ns_seed = nd_namespace_blk_create(nd_region);
+	/*
+	 * Seed creation failures are not fatal, provisioning is simply
+	 * disabled until memory becomes available
+	 */
+	if (!nd_region->ns_seed)
+		dev_err(&nd_region->dev, "failed to create blk namespace\n");
+	else
+		nd_device_register(nd_region->ns_seed);
+}
+
+void nd_region_create_btt_seed(struct nd_region *nd_region)
+{
+	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
+	nd_region->btt_seed = nd_btt_create(nd_region);
+	/*
+	 * Seed creation failures are not fatal, provisioning is simply
+	 * disabled until memory becomes available
+	 */
+	if (!nd_region->btt_seed)
+		dev_err(&nd_region->dev, "failed to create btt namespace\n");
+}
+
+static struct device **create_namespace_blk(struct nd_region *nd_region)
+{
+	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+	struct nd_namespace_label *nd_label;
+	struct device *dev, **devs = NULL;
+	struct nd_namespace_blk *nsblk;
+	struct nvdimm_drvdata *ndd;
+	int i, l, count = 0;
+	struct resource *res;
+
+	if (nd_region->ndr_mappings == 0)
+		return NULL;
+
+	ndd = to_ndd(nd_mapping);
+	for_each_label(l, nd_label, nd_mapping->labels) {
+		u32 flags = __le32_to_cpu(nd_label->flags);
+		char *name[NSLABEL_NAME_LEN];
+		struct device **__devs;
+
+		if (flags & NSLABEL_FLAG_LOCAL)
+			/* pass */;
+		else
+			continue;
+
+		for (i = 0; i < count; i++) {
+			nsblk = to_nd_namespace_blk(devs[i]);
+			if (memcmp(nsblk->uuid, nd_label->uuid,
+						NSLABEL_UUID_LEN) == 0) {
+				res = nsblk_add_resource(nd_region, ndd, nsblk,
+						__le64_to_cpu(nd_label->dpa));
+				if (!res)
+					goto err;
+				nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
+					dev_name(&nsblk->common.dev));
+				break;
+			}
+		}
+		if (i < count)
+			continue;
+		__devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL);
+		if (!__devs)
+			goto err;
+		memcpy(__devs, devs, sizeof(dev) * count);
+		kfree(devs);
+		devs = __devs;
+
+		nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+		if (!nsblk)
+			goto err;
+		dev = &nsblk->common.dev;
+		dev->type = &namespace_blk_device_type;
+		dev->parent = &nd_region->dev;
+		dev_set_name(dev, "namespace%d.%d", nd_region->id, count);
+		devs[count++] = dev;
+		nsblk->id = -1;
+		nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
+		nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
+				GFP_KERNEL);
+		if (!nsblk->uuid)
+			goto err;
+		memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
+		if (name[0])
+			nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
+					GFP_KERNEL);
+		res = nsblk_add_resource(nd_region, ndd, nsblk,
+				__le64_to_cpu(nd_label->dpa));
+		if (!res)
+			goto err;
+		nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
+				dev_name(&nsblk->common.dev));
+	}
+
+	dev_dbg(&nd_region->dev, "%s: discovered %d blk namespace%s\n",
+			__func__, count, count == 1 ? "" : "s");
+
+	if (count == 0) {
+		/* Publish a zero-sized namespace for userspace to configure. */
+		for (i = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+
+			kfree(nd_mapping->labels);
+			nd_mapping->labels = NULL;
+		}
+
+		devs = kcalloc(2, sizeof(dev), GFP_KERNEL);
+		if (!devs)
+			goto err;
+		nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+		if (!nsblk)
+			goto err;
+		dev = &nsblk->common.dev;
+		dev->type = &namespace_blk_device_type;
+		dev->parent = &nd_region->dev;
+		devs[count++] = dev;
+	}
+
+	return devs;
+
+err:
+	for (i = 0; i < count; i++) {
+		nsblk = to_nd_namespace_blk(devs[i]);
+		namespace_blk_release(&nsblk->common.dev);
+	}
+	kfree(devs);
+	return NULL;
+}
+
+static int init_active_labels(struct nd_region *nd_region)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+		int count, j;
+
+		/*
+		 * If the dimm is disabled then prevent the region from
+		 * being activated if it aliases DPA.
+		 */
+		if (!ndd) {
+			if ((nvdimm->flags & NDD_ALIASING) == 0)
+				return 0;
+			dev_dbg(&nd_region->dev, "%s: is disabled, failing probe\n",
+					dev_name(&nd_mapping->nvdimm->dev));
+			return -ENXIO;
+		}
+		nd_mapping->ndd = ndd;
+		atomic_inc(&nvdimm->busy);
+		get_ndd(ndd);
+
+		count = nd_label_active_count(ndd);
+		dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
+		if (!count)
+			continue;
+		nd_mapping->labels = kcalloc(count + 1, sizeof(void *),
+				GFP_KERNEL);
+		if (!nd_mapping->labels)
+			return -ENOMEM;
+		for (j = 0; j < count; j++) {
+			struct nd_namespace_label *label;
+
+			label = nd_label_active(ndd, j);
+			nd_mapping->labels[j] = label;
+		}
+	}
+
+	return 0;
+}
+
+int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
+{
+	struct device **devs = NULL;
+	int i, rc = 0, type;
+
+	*err = 0;
+	nvdimm_bus_lock(&nd_region->dev);
+	rc = init_active_labels(nd_region);
+	if (rc) {
+		nvdimm_bus_unlock(&nd_region->dev);
+		return rc;
+	}
+
+	type = nd_region_to_nstype(nd_region);
+	switch (type) {
+	case ND_DEVICE_NAMESPACE_IO:
+		devs = create_namespace_io(nd_region);
+		break;
+	case ND_DEVICE_NAMESPACE_PMEM:
+		devs = create_namespace_pmem(nd_region);
+		break;
+	case ND_DEVICE_NAMESPACE_BLK:
+		devs = create_namespace_blk(nd_region);
+		break;
+	default:
+		break;
+	}
+	nvdimm_bus_unlock(&nd_region->dev);
+
+	if (!devs)
+		return -ENODEV;
+
+	for (i = 0; devs[i]; i++) {
+		struct device *dev = devs[i];
+		int id;
+
+		if (type == ND_DEVICE_NAMESPACE_BLK) {
+			struct nd_namespace_blk *nsblk;
+
+			nsblk = to_nd_namespace_blk(dev);
+			id = ida_simple_get(&nd_region->ns_ida, 0, 0,
+					GFP_KERNEL);
+			nsblk->id = id;
+		} else
+			id = i;
+
+		if (id < 0)
+			break;
+		dev_set_name(dev, "namespace%d.%d", nd_region->id, id);
+		dev->groups = nd_namespace_attribute_groups;
+		nd_device_register(dev);
+	}
+	if (i)
+		nd_region->ns_seed = devs[0];
+
+	if (devs[i]) {
+		int j;
+
+		for (j = i; devs[j]; j++) {
+			struct device *dev = devs[j];
+
+			device_initialize(dev);
+			put_device(dev);
+		}
+		*err = j - i;
+		/*
+		 * All of the namespaces we tried to register failed, so
+		 * fail region activation.
+		 */
+		if (*err == 0)
+			rc = -ENODEV;
+	}
+	kfree(devs);
+
+	if (rc == -ENODEV)
+		return rc;
+
+	return i;
+}
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
new file mode 100644
index 000000000000..e1970c71ad1c
--- /dev/null
+++ b/drivers/nvdimm/nd-core.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __ND_CORE_H__
+#define __ND_CORE_H__
+#include <linux/libnvdimm.h>
+#include <linux/device.h>
+#include <linux/libnvdimm.h>
+#include <linux/sizes.h>
+#include <linux/mutex.h>
+#include <linux/nd.h>
+
+extern struct list_head nvdimm_bus_list;
+extern struct mutex nvdimm_bus_list_mutex;
+extern int nvdimm_major;
+
+struct nvdimm_bus {
+	struct nvdimm_bus_descriptor *nd_desc;
+	wait_queue_head_t probe_wait;
+	struct module *module;
+	struct list_head list;
+	struct device dev;
+	int id, probe_active;
+	struct mutex reconfig_mutex;
+};
+
+struct nvdimm {
+	unsigned long flags;
+	void *provider_data;
+	unsigned long *dsm_mask;
+	struct device dev;
+	atomic_t busy;
+	int id;
+};
+
+bool is_nvdimm(struct device *dev);
+bool is_nd_pmem(struct device *dev);
+bool is_nd_blk(struct device *dev);
+struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
+int __init nvdimm_bus_init(void);
+void nvdimm_bus_exit(void);
+void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+struct nd_region;
+void nd_region_create_blk_seed(struct nd_region *nd_region);
+void nd_region_create_btt_seed(struct nd_region *nd_region);
+void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
+void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
+void nd_synchronize(void);
+int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
+int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
+int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus);
+void __nd_device_register(struct device *dev);
+int nd_match_dimm(struct device *dev, void *data);
+struct nd_label_id;
+char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
+bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
+struct nd_region;
+struct nvdimm_drvdata;
+struct nd_mapping;
+resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, resource_size_t *overlap);
+resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping);
+resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
+resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id);
+struct nd_mapping;
+struct resource *nsblk_add_resource(struct nd_region *nd_region,
+		struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
+		resource_size_t start);
+int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
+void get_ndd(struct nvdimm_drvdata *ndd);
+resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
+#endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
new file mode 100644
index 000000000000..c41f53e74277
--- /dev/null
+++ b/drivers/nvdimm/nd.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __ND_H__
+#define __ND_H__
+#include <linux/libnvdimm.h>
+#include <linux/blkdev.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+#include <linux/types.h>
+#include "label.h"
+
+enum {
+	/*
+	 * Limits the maximum number of block apertures a dimm can
+	 * support and is an input to the geometry/on-disk-format of a
+	 * BTT instance
+	 */
+	ND_MAX_LANES = 256,
+	SECTOR_SHIFT = 9,
+	INT_LBASIZE_ALIGNMENT = 64,
+};
+
+struct nvdimm_drvdata {
+	struct device *dev;
+	int nsindex_size;
+	struct nd_cmd_get_config_size nsarea;
+	void *data;
+	int ns_current, ns_next;
+	struct resource dpa;
+	struct kref kref;
+};
+
+struct nd_region_namespaces {
+	int count;
+	int active;
+};
+
+static inline struct nd_namespace_index *to_namespace_index(
+		struct nvdimm_drvdata *ndd, int i)
+{
+	if (i < 0)
+		return NULL;
+
+	return ndd->data + sizeof_namespace_index(ndd) * i;
+}
+
+static inline struct nd_namespace_index *to_current_namespace_index(
+		struct nvdimm_drvdata *ndd)
+{
+	return to_namespace_index(ndd, ndd->ns_current);
+}
+
+static inline struct nd_namespace_index *to_next_namespace_index(
+		struct nvdimm_drvdata *ndd)
+{
+	return to_namespace_index(ndd, ndd->ns_next);
+}
+
+#define nd_dbg_dpa(r, d, res, fmt, arg...) \
+	dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \
+		(r) ? dev_name((d)->dev) : "", res ? res->name : "null", \
+		(unsigned long long) (res ? resource_size(res) : 0), \
+		(unsigned long long) (res ? res->start : 0), ##arg)
+
+#define for_each_label(l, label, labels) \
+	for (l = 0; (label = labels ? labels[l] : NULL); l++)
+
+#define for_each_dpa_resource(ndd, res) \
+	for (res = (ndd)->dpa.child; res; res = res->sibling)
+
+#define for_each_dpa_resource_safe(ndd, res, next) \
+	for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \
+			res; res = next, next = next ? next->sibling : NULL)
+
+struct nd_percpu_lane {
+	int count;
+	spinlock_t lock;
+};
+
+struct nd_region {
+	struct device dev;
+	struct ida ns_ida;
+	struct ida btt_ida;
+	struct device *ns_seed;
+	struct device *btt_seed;
+	u16 ndr_mappings;
+	u64 ndr_size;
+	u64 ndr_start;
+	int id, num_lanes, ro, numa_node;
+	void *provider_data;
+	struct nd_interleave_set *nd_set;
+	struct nd_percpu_lane __percpu *lane;
+	struct nd_mapping mapping[0];
+};
+
+struct nd_blk_region {
+	int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+	void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+	int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
+			void *iobuf, u64 len, int rw);
+	void *blk_provider_data;
+	struct nd_region nd_region;
+};
+
+/*
+ * Lookup next in the repeating sequence of 01, 10, and 11.
+ */
+static inline unsigned nd_inc_seq(unsigned seq)
+{
+	static const unsigned next[] = { 0, 2, 3, 1 };
+
+	return next[seq & 3];
+}
+
+struct btt;
+struct nd_btt {
+	struct device dev;
+	struct nd_namespace_common *ndns;
+	struct btt *btt;
+	unsigned long lbasize;
+	u8 *uuid;
+	int id;
+};
+
+enum nd_async_mode {
+	ND_SYNC,
+	ND_ASYNC,
+};
+
+int nd_integrity_init(struct gendisk *disk, unsigned long meta_size);
+void wait_nvdimm_bus_probe_idle(struct device *dev);
+void nd_device_register(struct device *dev);
+void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
+int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+		size_t len);
+ssize_t nd_sector_size_show(unsigned long current_lbasize,
+		const unsigned long *supported, char *buf);
+ssize_t nd_sector_size_store(struct device *dev, const char *buf,
+		unsigned long *current_lbasize, const unsigned long *supported);
+int __init nvdimm_init(void);
+int __init nd_region_init(void);
+void nvdimm_exit(void);
+void nd_region_exit(void);
+struct nvdimm;
+struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
+int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
+int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
+int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
+		void *buf, size_t len);
+struct nd_btt *to_nd_btt(struct device *dev);
+struct btt_sb;
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
+#if IS_ENABLED(CONFIG_BTT)
+int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata);
+bool is_nd_btt(struct device *dev);
+struct device *nd_btt_create(struct nd_region *nd_region);
+#else
+static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+	return -ENODEV;
+}
+
+static inline bool is_nd_btt(struct device *dev)
+{
+	return false;
+}
+
+static inline struct device *nd_btt_create(struct nd_region *nd_region)
+{
+	return NULL;
+}
+
+#endif
+struct nd_region *to_nd_region(struct device *dev);
+int nd_region_to_nstype(struct nd_region *nd_region);
+int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
+void nvdimm_bus_lock(struct device *dev);
+void nvdimm_bus_unlock(struct device *dev);
+bool is_nvdimm_bus_locked(struct device *dev);
+int nvdimm_revalidate_disk(struct gendisk *disk);
+void nvdimm_drvdata_release(struct kref *kref);
+void put_ndd(struct nvdimm_drvdata *ndd);
+int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);
+void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res);
+struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id, resource_size_t start,
+		resource_size_t n);
+resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
+struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev);
+int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns);
+int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns);
+const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
+		char *name);
+int nd_blk_region_init(struct nd_region *nd_region);
+void __nd_iostat_start(struct bio *bio, unsigned long *start);
+static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
+{
+	struct gendisk *disk = bio->bi_bdev->bd_disk;
+
+	if (!blk_queue_io_stat(disk->queue))
+		return false;
+
+	__nd_iostat_start(bio, start);
+	return true;
+}
+void nd_iostat_end(struct bio *bio, unsigned long start);
+resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
+#endif /* __ND_H__ */
diff --git a/drivers/block/pmem.c b/drivers/nvdimm/pmem.c
index 095dfaadcaa5..ade9eb917a4d 100644
--- a/drivers/block/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -1,7 +1,7 @@
 /*
  * Persistent Memory Driver
  *
- * Copyright (c) 2014, Intel Corporation.
+ * Copyright (c) 2014-2015, Intel Corporation.
  * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
  * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
  *
@@ -23,8 +23,9 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
-
-#define PMEM_MINORS		16
+#include <linux/pmem.h>
+#include <linux/nd.h>
+#include "nd.h"
 
 struct pmem_device {
 	struct request_queue	*pmem_queue;
@@ -32,12 +33,11 @@ struct pmem_device {
 
 	/* One contiguous memory region per device */
 	phys_addr_t		phys_addr;
-	void			*virt_addr;
+	void __pmem		*virt_addr;
 	size_t			size;
 };
 
 static int pmem_major;
-static atomic_t pmem_index;
 
 static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 			unsigned int len, unsigned int off, int rw,
@@ -45,13 +45,14 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 {
 	void *mem = kmap_atomic(page);
 	size_t pmem_off = sector << 9;
+	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
 
 	if (rw == READ) {
-		memcpy(mem + off, pmem->virt_addr + pmem_off, len);
+		memcpy_from_pmem(mem + off, pmem_addr, len);
 		flush_dcache_page(page);
 	} else {
 		flush_dcache_page(page);
-		memcpy(pmem->virt_addr + pmem_off, mem + off, len);
+		memcpy_to_pmem(pmem_addr, mem + off, len);
 	}
 
 	kunmap_atomic(mem);
@@ -59,31 +60,24 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
 static void pmem_make_request(struct request_queue *q, struct bio *bio)
 {
-	struct block_device *bdev = bio->bi_bdev;
-	struct pmem_device *pmem = bdev->bd_disk->private_data;
-	int rw;
+	bool do_acct;
+	unsigned long start;
 	struct bio_vec bvec;
-	sector_t sector;
 	struct bvec_iter iter;
-	int err = 0;
-
-	if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) {
-		err = -EIO;
-		goto out;
-	}
-
-	BUG_ON(bio->bi_rw & REQ_DISCARD);
+	struct block_device *bdev = bio->bi_bdev;
+	struct pmem_device *pmem = bdev->bd_disk->private_data;
 
-	rw = bio_data_dir(bio);
-	sector = bio->bi_iter.bi_sector;
-	bio_for_each_segment(bvec, bio, iter) {
+	do_acct = nd_iostat_start(bio, &start);
+	bio_for_each_segment(bvec, bio, iter)
 		pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
-			     rw, sector);
-		sector += bvec.bv_len >> 9;
-	}
+				bio_data_dir(bio), iter.bi_sector);
+	if (do_acct)
+		nd_iostat_end(bio, start);
 
-out:
-	bio_endio(bio, err);
+	if (bio_data_dir(bio))
+		wmb_pmem();
+
+	bio_endio(bio, 0);
 }
 
 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
@@ -106,7 +100,8 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector,
 	if (!pmem)
 		return -ENODEV;
 
-	*kaddr = pmem->virt_addr + offset;
+	/* FIXME convert DAX to comprehend that this mapping has a lifetime */
+	*kaddr = (void __force *) pmem->virt_addr + offset;
 	*pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
 
 	return pmem->size - offset;
@@ -116,124 +111,165 @@ static const struct block_device_operations pmem_fops = {
 	.owner =		THIS_MODULE,
 	.rw_page =		pmem_rw_page,
 	.direct_access =	pmem_direct_access,
+	.revalidate_disk =	nvdimm_revalidate_disk,
 };
 
-static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res)
+static struct pmem_device *pmem_alloc(struct device *dev,
+		struct resource *res, int id)
 {
 	struct pmem_device *pmem;
-	struct gendisk *disk;
-	int idx, err;
 
-	err = -ENOMEM;
 	pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
 	if (!pmem)
-		goto out;
+		return ERR_PTR(-ENOMEM);
 
 	pmem->phys_addr = res->start;
 	pmem->size = resource_size(res);
+	if (!arch_has_pmem_api())
+		dev_warn(dev, "unable to guarantee persistence of writes\n");
+
+	if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
+		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
+				&pmem->phys_addr, pmem->size);
+		kfree(pmem);
+		return ERR_PTR(-EBUSY);
+	}
 
-	err = -EINVAL;
-	if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) {
-		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size);
-		goto out_free_dev;
+	pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size);
+	if (!pmem->virt_addr) {
+		release_mem_region(pmem->phys_addr, pmem->size);
+		kfree(pmem);
+		return ERR_PTR(-ENXIO);
 	}
 
-	/*
-	 * Map the memory as write-through, as we can't write back the contents
-	 * of the CPU caches in case of a crash.
-	 */
-	err = -ENOMEM;
-	pmem->virt_addr = ioremap_wt(pmem->phys_addr, pmem->size);
-	if (!pmem->virt_addr)
-		goto out_release_region;
+	return pmem;
+}
+
+static void pmem_detach_disk(struct pmem_device *pmem)
+{
+	del_gendisk(pmem->pmem_disk);
+	put_disk(pmem->pmem_disk);
+	blk_cleanup_queue(pmem->pmem_queue);
+}
+
+static int pmem_attach_disk(struct nd_namespace_common *ndns,
+		struct pmem_device *pmem)
+{
+	struct gendisk *disk;
 
 	pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
 	if (!pmem->pmem_queue)
-		goto out_unmap;
+		return -ENOMEM;
 
 	blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
-	blk_queue_max_hw_sectors(pmem->pmem_queue, 1024);
+	blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
 	blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
 
-	disk = alloc_disk(PMEM_MINORS);
-	if (!disk)
-		goto out_free_queue;
-
-	idx = atomic_inc_return(&pmem_index) - 1;
+	disk = alloc_disk(0);
+	if (!disk) {
+		blk_cleanup_queue(pmem->pmem_queue);
+		return -ENOMEM;
+	}
 
 	disk->major		= pmem_major;
-	disk->first_minor	= PMEM_MINORS * idx;
+	disk->first_minor	= 0;
 	disk->fops		= &pmem_fops;
 	disk->private_data	= pmem;
 	disk->queue		= pmem->pmem_queue;
 	disk->flags		= GENHD_FL_EXT_DEVT;
-	sprintf(disk->disk_name, "pmem%d", idx);
-	disk->driverfs_dev = dev;
+	nvdimm_namespace_disk_name(ndns, disk->disk_name);
+	disk->driverfs_dev = &ndns->dev;
 	set_capacity(disk, pmem->size >> 9);
 	pmem->pmem_disk = disk;
 
 	add_disk(disk);
+	revalidate_disk(disk);
 
-	return pmem;
+	return 0;
+}
 
-out_free_queue:
-	blk_cleanup_queue(pmem->pmem_queue);
-out_unmap:
-	iounmap(pmem->virt_addr);
-out_release_region:
-	release_mem_region(pmem->phys_addr, pmem->size);
-out_free_dev:
-	kfree(pmem);
-out:
-	return ERR_PTR(err);
+static int pmem_rw_bytes(struct nd_namespace_common *ndns,
+		resource_size_t offset, void *buf, size_t size, int rw)
+{
+	struct pmem_device *pmem = dev_get_drvdata(ndns->claim);
+
+	if (unlikely(offset + size > pmem->size)) {
+		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
+		return -EFAULT;
+	}
+
+	if (rw == READ)
+		memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
+	else {
+		memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
+		wmb_pmem();
+	}
+
+	return 0;
 }
 
 static void pmem_free(struct pmem_device *pmem)
 {
-	del_gendisk(pmem->pmem_disk);
-	put_disk(pmem->pmem_disk);
-	blk_cleanup_queue(pmem->pmem_queue);
-	iounmap(pmem->virt_addr);
+	memunmap_pmem(pmem->virt_addr);
 	release_mem_region(pmem->phys_addr, pmem->size);
 	kfree(pmem);
 }
 
-static int pmem_probe(struct platform_device *pdev)
+static int nd_pmem_probe(struct device *dev)
 {
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_namespace_common *ndns;
+	struct nd_namespace_io *nsio;
 	struct pmem_device *pmem;
-	struct resource *res;
-
-	if (WARN_ON(pdev->num_resources > 1))
-		return -ENXIO;
+	int rc;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENXIO;
+	ndns = nvdimm_namespace_common_probe(dev);
+	if (IS_ERR(ndns))
+		return PTR_ERR(ndns);
 
-	pmem = pmem_alloc(&pdev->dev, res);
+	nsio = to_nd_namespace_io(&ndns->dev);
+	pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
 	if (IS_ERR(pmem))
 		return PTR_ERR(pmem);
 
-	platform_set_drvdata(pdev, pmem);
-
-	return 0;
+	dev_set_drvdata(dev, pmem);
+	ndns->rw_bytes = pmem_rw_bytes;
+	if (is_nd_btt(dev))
+		rc = nvdimm_namespace_attach_btt(ndns);
+	else if (nd_btt_probe(ndns, pmem) == 0) {
+		/* we'll come back as btt-pmem */
+		rc = -ENXIO;
+	} else
+		rc = pmem_attach_disk(ndns, pmem);
+	if (rc)
+		pmem_free(pmem);
+	return rc;
 }
 
-static int pmem_remove(struct platform_device *pdev)
+static int nd_pmem_remove(struct device *dev)
 {
-	struct pmem_device *pmem = platform_get_drvdata(pdev);
+	struct pmem_device *pmem = dev_get_drvdata(dev);
 
+	if (is_nd_btt(dev))
+		nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
+	else
+		pmem_detach_disk(pmem);
 	pmem_free(pmem);
+
 	return 0;
 }
 
-static struct platform_driver pmem_driver = {
-	.probe		= pmem_probe,
-	.remove		= pmem_remove,
-	.driver		= {
-		.owner	= THIS_MODULE,
-		.name	= "pmem",
+MODULE_ALIAS("pmem");
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
+static struct nd_device_driver nd_pmem_driver = {
+	.probe = nd_pmem_probe,
+	.remove = nd_pmem_remove,
+	.drv = {
+		.name = "nd_pmem",
 	},
+	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
 };
 
 static int __init pmem_init(void)
@@ -244,16 +280,19 @@ static int __init pmem_init(void)
 	if (pmem_major < 0)
 		return pmem_major;
 
-	error = platform_driver_register(&pmem_driver);
-	if (error)
+	error = nd_driver_register(&nd_pmem_driver);
+	if (error) {
 		unregister_blkdev(pmem_major, "pmem");
-	return error;
+		return error;
+	}
+
+	return 0;
 }
 module_init(pmem_init);
 
 static void pmem_exit(void)
 {
-	platform_driver_unregister(&pmem_driver);
+	driver_unregister(&nd_pmem_driver.drv);
 	unregister_blkdev(pmem_major, "pmem");
 }
 module_exit(pmem_exit);
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
new file mode 100644
index 000000000000..f28f78ccff19
--- /dev/null
+++ b/drivers/nvdimm/region.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/nd.h>
+#include "nd.h"
+
+static int nd_region_probe(struct device *dev)
+{
+	int err, rc;
+	static unsigned long once;
+	struct nd_region_namespaces *num_ns;
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	if (nd_region->num_lanes > num_online_cpus()
+			&& nd_region->num_lanes < num_possible_cpus()
+			&& !test_and_set_bit(0, &once)) {
+		dev_info(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
+				num_online_cpus(), nd_region->num_lanes,
+				num_possible_cpus());
+		dev_info(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
+				nd_region->num_lanes);
+	}
+
+	rc = nd_blk_region_init(nd_region);
+	if (rc)
+		return rc;
+
+	rc = nd_region_register_namespaces(nd_region, &err);
+	num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
+	if (!num_ns)
+		return -ENOMEM;
+
+	if (rc < 0)
+		return rc;
+
+	num_ns->active = rc;
+	num_ns->count = rc + err;
+	dev_set_drvdata(dev, num_ns);
+
+	if (rc && err && rc == err)
+		return -ENODEV;
+
+	nd_region->btt_seed = nd_btt_create(nd_region);
+	if (err == 0)
+		return 0;
+
+	/*
+	 * Given multiple namespaces per region, we do not want to
+	 * disable all the successfully registered peer namespaces upon
+	 * a single registration failure.  If userspace is missing a
+	 * namespace that it expects it can disable/re-enable the region
+	 * to retry discovery after correcting the failure.
+	 * <regionX>/namespaces returns the current
+	 * "<async-registered>/<total>" namespace count.
+	 */
+	dev_err(dev, "failed to register %d namespace%s, continuing...\n",
+			err, err == 1 ? "" : "s");
+	return 0;
+}
+
+static int child_unregister(struct device *dev, void *data)
+{
+	nd_device_unregister(dev, ND_SYNC);
+	return 0;
+}
+
+static int nd_region_remove(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	/* flush attribute readers and disable */
+	nvdimm_bus_lock(dev);
+	nd_region->ns_seed = NULL;
+	nd_region->btt_seed = NULL;
+	dev_set_drvdata(dev, NULL);
+	nvdimm_bus_unlock(dev);
+
+	device_for_each_child(dev, NULL, child_unregister);
+	return 0;
+}
+
+static struct nd_device_driver nd_region_driver = {
+	.probe = nd_region_probe,
+	.remove = nd_region_remove,
+	.drv = {
+		.name = "nd_region",
+	},
+	.type = ND_DRIVER_REGION_BLK | ND_DRIVER_REGION_PMEM,
+};
+
+int __init nd_region_init(void)
+{
+	return nd_driver_register(&nd_region_driver);
+}
+
+void nd_region_exit(void)
+{
+	driver_unregister(&nd_region_driver.drv);
+}
+
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_PMEM);
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_BLK);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
new file mode 100644
index 000000000000..a5233422f9dc
--- /dev/null
+++ b/drivers/nvdimm/region_devs.c
@@ -0,0 +1,787 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/io.h>
+#include <linux/nd.h>
+#include "nd-core.h"
+#include "nd.h"
+
+static DEFINE_IDA(region_ida);
+
+static void nd_region_release(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	u16 i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+		put_device(&nvdimm->dev);
+	}
+	free_percpu(nd_region->lane);
+	ida_simple_remove(&region_ida, nd_region->id);
+	if (is_nd_blk(dev))
+		kfree(to_nd_blk_region(dev));
+	else
+		kfree(nd_region);
+}
+
+static struct device_type nd_blk_device_type = {
+	.name = "nd_blk",
+	.release = nd_region_release,
+};
+
+static struct device_type nd_pmem_device_type = {
+	.name = "nd_pmem",
+	.release = nd_region_release,
+};
+
+static struct device_type nd_volatile_device_type = {
+	.name = "nd_volatile",
+	.release = nd_region_release,
+};
+
+bool is_nd_pmem(struct device *dev)
+{
+	return dev ? dev->type == &nd_pmem_device_type : false;
+}
+
+bool is_nd_blk(struct device *dev)
+{
+	return dev ? dev->type == &nd_blk_device_type : false;
+}
+
+struct nd_region *to_nd_region(struct device *dev)
+{
+	struct nd_region *nd_region = container_of(dev, struct nd_region, dev);
+
+	WARN_ON(dev->type->release != nd_region_release);
+	return nd_region;
+}
+EXPORT_SYMBOL_GPL(to_nd_region);
+
+struct nd_blk_region *to_nd_blk_region(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	WARN_ON(!is_nd_blk(dev));
+	return container_of(nd_region, struct nd_blk_region, nd_region);
+}
+EXPORT_SYMBOL_GPL(to_nd_blk_region);
+
+void *nd_region_provider_data(struct nd_region *nd_region)
+{
+	return nd_region->provider_data;
+}
+EXPORT_SYMBOL_GPL(nd_region_provider_data);
+
+void *nd_blk_region_provider_data(struct nd_blk_region *ndbr)
+{
+	return ndbr->blk_provider_data;
+}
+EXPORT_SYMBOL_GPL(nd_blk_region_provider_data);
+
+void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data)
+{
+	ndbr->blk_provider_data = data;
+}
+EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
+
+/**
+ * nd_region_to_nstype() - region to an integer namespace type
+ * @nd_region: region-device to interrogate
+ *
+ * This is the 'nstype' attribute of a region as well, an input to the
+ * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match
+ * namespace devices with namespace drivers.
+ */
+int nd_region_to_nstype(struct nd_region *nd_region)
+{
+	if (is_nd_pmem(&nd_region->dev)) {
+		u16 i, alias;
+
+		for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+			struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+			if (nvdimm->flags & NDD_ALIASING)
+				alias++;
+		}
+		if (alias)
+			return ND_DEVICE_NAMESPACE_PMEM;
+		else
+			return ND_DEVICE_NAMESPACE_IO;
+	} else if (is_nd_blk(&nd_region->dev)) {
+		return ND_DEVICE_NAMESPACE_BLK;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(nd_region_to_nstype);
+
+static int is_uuid_busy(struct device *dev, void *data)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	u8 *uuid = data;
+
+	switch (nd_region_to_nstype(nd_region)) {
+	case ND_DEVICE_NAMESPACE_PMEM: {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		if (!nspm->uuid)
+			break;
+		if (memcmp(uuid, nspm->uuid, NSLABEL_UUID_LEN) == 0)
+			return -EBUSY;
+		break;
+	}
+	case ND_DEVICE_NAMESPACE_BLK: {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		if (!nsblk->uuid)
+			break;
+		if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) == 0)
+			return -EBUSY;
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int is_namespace_uuid_busy(struct device *dev, void *data)
+{
+	if (is_nd_pmem(dev) || is_nd_blk(dev))
+		return device_for_each_child(dev, data, is_uuid_busy);
+	return 0;
+}
+
+/**
+ * nd_is_uuid_unique - verify that no other namespace has @uuid
+ * @dev: any device on a nvdimm_bus
+ * @uuid: uuid to check
+ */
+bool nd_is_uuid_unique(struct device *dev, u8 *uuid)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return false;
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev));
+	if (device_for_each_child(&nvdimm_bus->dev, uuid,
+				is_namespace_uuid_busy) != 0)
+		return false;
+	return true;
+}
+
+static ssize_t size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	unsigned long long size = 0;
+
+	if (is_nd_pmem(dev)) {
+		size = nd_region->ndr_size;
+	} else if (nd_region->ndr_mappings == 1) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+
+		size = nd_mapping->size;
+	}
+
+	return sprintf(buf, "%llu\n", size);
+}
+static DEVICE_ATTR_RO(size);
+
+static ssize_t mappings_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	return sprintf(buf, "%d\n", nd_region->ndr_mappings);
+}
+static DEVICE_ATTR_RO(mappings);
+
+static ssize_t nstype_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
+}
+static DEVICE_ATTR_RO(nstype);
+
+static ssize_t set_cookie_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
+
+	if (is_nd_pmem(dev) && nd_set)
+		/* pass, should be precluded by region_visible */;
+	else
+		return -ENXIO;
+
+	return sprintf(buf, "%#llx\n", nd_set->cookie);
+}
+static DEVICE_ATTR_RO(set_cookie);
+
+resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
+{
+	resource_size_t blk_max_overlap = 0, available, overlap;
+	int i;
+
+	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
+
+ retry:
+	available = 0;
+	overlap = blk_max_overlap;
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+
+		/* if a dimm is disabled the available capacity is zero */
+		if (!ndd)
+			return 0;
+
+		if (is_nd_pmem(&nd_region->dev)) {
+			available += nd_pmem_available_dpa(nd_region,
+					nd_mapping, &overlap);
+			if (overlap > blk_max_overlap) {
+				blk_max_overlap = overlap;
+				goto retry;
+			}
+		} else if (is_nd_blk(&nd_region->dev)) {
+			available += nd_blk_available_dpa(nd_mapping);
+		}
+	}
+
+	return available;
+}
+
+static ssize_t available_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	unsigned long long available = 0;
+
+	/*
+	 * Flush in-flight updates and grab a snapshot of the available
+	 * size.  Of course, this value is potentially invalidated the
+	 * memory nvdimm_bus_lock() is dropped, but that's userspace's
+	 * problem to not race itself.
+	 */
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	available = nd_region_available_dpa(nd_region);
+	nvdimm_bus_unlock(dev);
+
+	return sprintf(buf, "%llu\n", available);
+}
+static DEVICE_ATTR_RO(available_size);
+
+static ssize_t init_namespaces_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region_namespaces *num_ns = dev_get_drvdata(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (num_ns)
+		rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count);
+	else
+		rc = -ENXIO;
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(init_namespaces);
+
+static ssize_t namespace_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (nd_region->ns_seed)
+		rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed));
+	else
+		rc = sprintf(buf, "\n");
+	nvdimm_bus_unlock(dev);
+	return rc;
+}
+static DEVICE_ATTR_RO(namespace_seed);
+
+static ssize_t btt_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (nd_region->btt_seed)
+		rc = sprintf(buf, "%s\n", dev_name(nd_region->btt_seed));
+	else
+		rc = sprintf(buf, "\n");
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(btt_seed);
+
+static ssize_t read_only_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	return sprintf(buf, "%d\n", nd_region->ro);
+}
+
+static ssize_t read_only_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	bool ro;
+	int rc = strtobool(buf, &ro);
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	if (rc)
+		return rc;
+
+	nd_region->ro = ro;
+	return len;
+}
+static DEVICE_ATTR_RW(read_only);
+
+static struct attribute *nd_region_attributes[] = {
+	&dev_attr_size.attr,
+	&dev_attr_nstype.attr,
+	&dev_attr_mappings.attr,
+	&dev_attr_btt_seed.attr,
+	&dev_attr_read_only.attr,
+	&dev_attr_set_cookie.attr,
+	&dev_attr_available_size.attr,
+	&dev_attr_namespace_seed.attr,
+	&dev_attr_init_namespaces.attr,
+	NULL,
+};
+
+static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, typeof(*dev), kobj);
+	struct nd_region *nd_region = to_nd_region(dev);
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
+	int type = nd_region_to_nstype(nd_region);
+
+	if (a != &dev_attr_set_cookie.attr
+			&& a != &dev_attr_available_size.attr)
+		return a->mode;
+
+	if ((type == ND_DEVICE_NAMESPACE_PMEM
+				|| type == ND_DEVICE_NAMESPACE_BLK)
+			&& a == &dev_attr_available_size.attr)
+		return a->mode;
+	else if (is_nd_pmem(dev) && nd_set)
+		return a->mode;
+
+	return 0;
+}
+
+struct attribute_group nd_region_attribute_group = {
+	.attrs = nd_region_attributes,
+	.is_visible = region_visible,
+};
+EXPORT_SYMBOL_GPL(nd_region_attribute_group);
+
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
+{
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
+
+	if (nd_set)
+		return nd_set->cookie;
+	return 0;
+}
+
+/*
+ * Upon successful probe/remove, take/release a reference on the
+ * associated interleave set (if present), and plant new btt + namespace
+ * seeds.  Also, on the removal of a BLK region, notify the provider to
+ * disable the region.
+ */
+static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
+		struct device *dev, bool probe)
+{
+	struct nd_region *nd_region;
+
+	if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) {
+		int i;
+
+		nd_region = to_nd_region(dev);
+		for (i = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+			struct nvdimm_drvdata *ndd = nd_mapping->ndd;
+			struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+			kfree(nd_mapping->labels);
+			nd_mapping->labels = NULL;
+			put_ndd(ndd);
+			nd_mapping->ndd = NULL;
+			if (ndd)
+				atomic_dec(&nvdimm->busy);
+		}
+
+		if (is_nd_pmem(dev))
+			return;
+
+		to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
+	}
+	if (dev->parent && is_nd_blk(dev->parent) && probe) {
+		nd_region = to_nd_region(dev->parent);
+		nvdimm_bus_lock(dev);
+		if (nd_region->ns_seed == dev)
+			nd_region_create_blk_seed(nd_region);
+		nvdimm_bus_unlock(dev);
+	}
+	if (is_nd_btt(dev) && probe) {
+		nd_region = to_nd_region(dev->parent);
+		nvdimm_bus_lock(dev);
+		if (nd_region->btt_seed == dev)
+			nd_region_create_btt_seed(nd_region);
+		nvdimm_bus_unlock(dev);
+	}
+}
+
+void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev)
+{
+	nd_region_notify_driver_action(nvdimm_bus, dev, true);
+}
+
+void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev)
+{
+	nd_region_notify_driver_action(nvdimm_bus, dev, false);
+}
+
+static ssize_t mappingN(struct device *dev, char *buf, int n)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	struct nd_mapping *nd_mapping;
+	struct nvdimm *nvdimm;
+
+	if (n >= nd_region->ndr_mappings)
+		return -ENXIO;
+	nd_mapping = &nd_region->mapping[n];
+	nvdimm = nd_mapping->nvdimm;
+
+	return sprintf(buf, "%s,%llu,%llu\n", dev_name(&nvdimm->dev),
+			nd_mapping->start, nd_mapping->size);
+}
+
+#define REGION_MAPPING(idx) \
+static ssize_t mapping##idx##_show(struct device *dev,		\
+		struct device_attribute *attr, char *buf)	\
+{								\
+	return mappingN(dev, buf, idx);				\
+}								\
+static DEVICE_ATTR_RO(mapping##idx)
+
+/*
+ * 32 should be enough for a while, even in the presence of socket
+ * interleave a 32-way interleave set is a degenerate case.
+ */
+REGION_MAPPING(0);
+REGION_MAPPING(1);
+REGION_MAPPING(2);
+REGION_MAPPING(3);
+REGION_MAPPING(4);
+REGION_MAPPING(5);
+REGION_MAPPING(6);
+REGION_MAPPING(7);
+REGION_MAPPING(8);
+REGION_MAPPING(9);
+REGION_MAPPING(10);
+REGION_MAPPING(11);
+REGION_MAPPING(12);
+REGION_MAPPING(13);
+REGION_MAPPING(14);
+REGION_MAPPING(15);
+REGION_MAPPING(16);
+REGION_MAPPING(17);
+REGION_MAPPING(18);
+REGION_MAPPING(19);
+REGION_MAPPING(20);
+REGION_MAPPING(21);
+REGION_MAPPING(22);
+REGION_MAPPING(23);
+REGION_MAPPING(24);
+REGION_MAPPING(25);
+REGION_MAPPING(26);
+REGION_MAPPING(27);
+REGION_MAPPING(28);
+REGION_MAPPING(29);
+REGION_MAPPING(30);
+REGION_MAPPING(31);
+
+static umode_t mapping_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	if (n < nd_region->ndr_mappings)
+		return a->mode;
+	return 0;
+}
+
+static struct attribute *mapping_attributes[] = {
+	&dev_attr_mapping0.attr,
+	&dev_attr_mapping1.attr,
+	&dev_attr_mapping2.attr,
+	&dev_attr_mapping3.attr,
+	&dev_attr_mapping4.attr,
+	&dev_attr_mapping5.attr,
+	&dev_attr_mapping6.attr,
+	&dev_attr_mapping7.attr,
+	&dev_attr_mapping8.attr,
+	&dev_attr_mapping9.attr,
+	&dev_attr_mapping10.attr,
+	&dev_attr_mapping11.attr,
+	&dev_attr_mapping12.attr,
+	&dev_attr_mapping13.attr,
+	&dev_attr_mapping14.attr,
+	&dev_attr_mapping15.attr,
+	&dev_attr_mapping16.attr,
+	&dev_attr_mapping17.attr,
+	&dev_attr_mapping18.attr,
+	&dev_attr_mapping19.attr,
+	&dev_attr_mapping20.attr,
+	&dev_attr_mapping21.attr,
+	&dev_attr_mapping22.attr,
+	&dev_attr_mapping23.attr,
+	&dev_attr_mapping24.attr,
+	&dev_attr_mapping25.attr,
+	&dev_attr_mapping26.attr,
+	&dev_attr_mapping27.attr,
+	&dev_attr_mapping28.attr,
+	&dev_attr_mapping29.attr,
+	&dev_attr_mapping30.attr,
+	&dev_attr_mapping31.attr,
+	NULL,
+};
+
+struct attribute_group nd_mapping_attribute_group = {
+	.is_visible = mapping_visible,
+	.attrs = mapping_attributes,
+};
+EXPORT_SYMBOL_GPL(nd_mapping_attribute_group);
+
+int nd_blk_region_init(struct nd_region *nd_region)
+{
+	struct device *dev = &nd_region->dev;
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!is_nd_blk(dev))
+		return 0;
+
+	if (nd_region->ndr_mappings < 1) {
+		dev_err(dev, "invalid BLK region\n");
+		return -ENXIO;
+	}
+
+	return to_nd_blk_region(dev)->enable(nvdimm_bus, dev);
+}
+
+/**
+ * nd_region_acquire_lane - allocate and lock a lane
+ * @nd_region: region id and number of lanes possible
+ *
+ * A lane correlates to a BLK-data-window and/or a log slot in the BTT.
+ * We optimize for the common case where there are 256 lanes, one
+ * per-cpu.  For larger systems we need to lock to share lanes.  For now
+ * this implementation assumes the cost of maintaining an allocator for
+ * free lanes is on the order of the lock hold time, so it implements a
+ * static lane = cpu % num_lanes mapping.
+ *
+ * In the case of a BTT instance on top of a BLK namespace a lane may be
+ * acquired recursively.  We lock on the first instance.
+ *
+ * In the case of a BTT instance on top of PMEM, we only acquire a lane
+ * for the BTT metadata updates.
+ */
+unsigned int nd_region_acquire_lane(struct nd_region *nd_region)
+{
+	unsigned int cpu, lane;
+
+	cpu = get_cpu();
+	if (nd_region->num_lanes < nr_cpu_ids) {
+		struct nd_percpu_lane *ndl_lock, *ndl_count;
+
+		lane = cpu % nd_region->num_lanes;
+		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
+		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
+		if (ndl_count->count++ == 0)
+			spin_lock(&ndl_lock->lock);
+	} else
+		lane = cpu;
+
+	return lane;
+}
+EXPORT_SYMBOL(nd_region_acquire_lane);
+
+void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
+{
+	if (nd_region->num_lanes < nr_cpu_ids) {
+		unsigned int cpu = get_cpu();
+		struct nd_percpu_lane *ndl_lock, *ndl_count;
+
+		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
+		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
+		if (--ndl_count->count == 0)
+			spin_unlock(&ndl_lock->lock);
+		put_cpu();
+	}
+	put_cpu();
+}
+EXPORT_SYMBOL(nd_region_release_lane);
+
+static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc, struct device_type *dev_type,
+		const char *caller)
+{
+	struct nd_region *nd_region;
+	struct device *dev;
+	void *region_buf;
+	unsigned int i;
+	int ro = 0;
+
+	for (i = 0; i < ndr_desc->num_mappings; i++) {
+		struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+		if ((nd_mapping->start | nd_mapping->size) % SZ_4K) {
+			dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n",
+					caller, dev_name(&nvdimm->dev), i);
+
+			return NULL;
+		}
+
+		if (nvdimm->flags & NDD_UNARMED)
+			ro = 1;
+	}
+
+	if (dev_type == &nd_blk_device_type) {
+		struct nd_blk_region_desc *ndbr_desc;
+		struct nd_blk_region *ndbr;
+
+		ndbr_desc = to_blk_region_desc(ndr_desc);
+		ndbr = kzalloc(sizeof(*ndbr) + sizeof(struct nd_mapping)
+				* ndr_desc->num_mappings,
+				GFP_KERNEL);
+		if (ndbr) {
+			nd_region = &ndbr->nd_region;
+			ndbr->enable = ndbr_desc->enable;
+			ndbr->disable = ndbr_desc->disable;
+			ndbr->do_io = ndbr_desc->do_io;
+		}
+		region_buf = ndbr;
+	} else {
+		nd_region = kzalloc(sizeof(struct nd_region)
+				+ sizeof(struct nd_mapping)
+				* ndr_desc->num_mappings,
+				GFP_KERNEL);
+		region_buf = nd_region;
+	}
+
+	if (!region_buf)
+		return NULL;
+	nd_region->id = ida_simple_get(&region_ida, 0, 0, GFP_KERNEL);
+	if (nd_region->id < 0)
+		goto err_id;
+
+	nd_region->lane = alloc_percpu(struct nd_percpu_lane);
+	if (!nd_region->lane)
+		goto err_percpu;
+
+        for (i = 0; i < nr_cpu_ids; i++) {
+		struct nd_percpu_lane *ndl;
+
+		ndl = per_cpu_ptr(nd_region->lane, i);
+		spin_lock_init(&ndl->lock);
+		ndl->count = 0;
+	}
+
+	memcpy(nd_region->mapping, ndr_desc->nd_mapping,
+			sizeof(struct nd_mapping) * ndr_desc->num_mappings);
+	for (i = 0; i < ndr_desc->num_mappings; i++) {
+		struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+		get_device(&nvdimm->dev);
+	}
+	nd_region->ndr_mappings = ndr_desc->num_mappings;
+	nd_region->provider_data = ndr_desc->provider_data;
+	nd_region->nd_set = ndr_desc->nd_set;
+	nd_region->num_lanes = ndr_desc->num_lanes;
+	nd_region->ro = ro;
+	nd_region->numa_node = ndr_desc->numa_node;
+	ida_init(&nd_region->ns_ida);
+	ida_init(&nd_region->btt_ida);
+	dev = &nd_region->dev;
+	dev_set_name(dev, "region%d", nd_region->id);
+	dev->parent = &nvdimm_bus->dev;
+	dev->type = dev_type;
+	dev->groups = ndr_desc->attr_groups;
+	nd_region->ndr_size = resource_size(ndr_desc->res);
+	nd_region->ndr_start = ndr_desc->res->start;
+	nd_device_register(dev);
+
+	return nd_region;
+
+ err_percpu:
+	ida_simple_remove(&region_ida, nd_region->id);
+ err_id:
+	kfree(region_buf);
+	return NULL;
+}
+
+struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc)
+{
+	ndr_desc->num_lanes = ND_MAX_LANES;
+	return nd_region_create(nvdimm_bus, ndr_desc, &nd_pmem_device_type,
+			__func__);
+}
+EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create);
+
+struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc)
+{
+	if (ndr_desc->num_mappings > 1)
+		return NULL;
+	ndr_desc->num_lanes = min(ndr_desc->num_lanes, ND_MAX_LANES);
+	return nd_region_create(nvdimm_bus, ndr_desc, &nd_blk_device_type,
+			__func__);
+}
+EXPORT_SYMBOL_GPL(nvdimm_blk_region_create);
+
+struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc)
+{
+	ndr_desc->num_lanes = ND_MAX_LANES;
+	return nd_region_create(nvdimm_bus, ndr_desc, &nd_volatile_device_type,
+			__func__);
+}
+EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index e9851add6f4e..964ad572aaee 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1056,19 +1056,21 @@ struct vfio_devices {
 static int vfio_pci_get_devs(struct pci_dev *pdev, void *data)
 {
 	struct vfio_devices *devs = data;
-	struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
-
-	if (pci_drv != &vfio_pci_driver)
-		return -EBUSY;
+	struct vfio_device *device;
 
 	if (devs->cur_index == devs->max_index)
 		return -ENOSPC;
 
-	devs->devices[devs->cur_index] = vfio_device_get_from_dev(&pdev->dev);
-	if (!devs->devices[devs->cur_index])
+	device = vfio_device_get_from_dev(&pdev->dev);
+	if (!device)
 		return -EINVAL;
 
-	devs->cur_index++;
+	if (pci_dev_driver(pdev) != &vfio_pci_driver) {
+		vfio_device_put(device);
+		return -EBUSY;
+	}
+
+	devs->devices[devs->cur_index++] = device;
 	return 0;
 }
 
diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig
index 9a4403e2a36c..bb30128782aa 100644
--- a/drivers/vfio/platform/Kconfig
+++ b/drivers/vfio/platform/Kconfig
@@ -1,6 +1,6 @@
 config VFIO_PLATFORM
 	tristate "VFIO support for platform devices"
-	depends on VFIO && EVENTFD && ARM
+	depends on VFIO && EVENTFD && (ARM || ARM64)
 	select VFIO_VIRQFD
 	help
 	  Support for platform devices with VFIO. This is required to make
@@ -18,3 +18,5 @@ config VFIO_AMBA
 	  framework.
 
 	  If you don't know what to do here, say N.
+
+source "drivers/vfio/platform/reset/Kconfig"
diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile
index 81de144c0eaa..9ce8afe28450 100644
--- a/drivers/vfio/platform/Makefile
+++ b/drivers/vfio/platform/Makefile
@@ -2,7 +2,9 @@
 vfio-platform-y := vfio_platform.o vfio_platform_common.o vfio_platform_irq.o
 
 obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o
+obj-$(CONFIG_VFIO_PLATFORM) += reset/
 
 vfio-amba-y := vfio_amba.o
 
 obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o
+obj-$(CONFIG_VFIO_AMBA) += reset/
diff --git a/drivers/vfio/platform/reset/Kconfig b/drivers/vfio/platform/reset/Kconfig
new file mode 100644
index 000000000000..746b96b0003b
--- /dev/null
+++ b/drivers/vfio/platform/reset/Kconfig
@@ -0,0 +1,7 @@
+config VFIO_PLATFORM_CALXEDAXGMAC_RESET
+	tristate "VFIO support for calxeda xgmac reset"
+	depends on VFIO_PLATFORM
+	help
+	  Enables the VFIO platform driver to handle reset for Calxeda xgmac
+
+	  If you don't know what to do here, say N.
diff --git a/drivers/vfio/platform/reset/Makefile b/drivers/vfio/platform/reset/Makefile
new file mode 100644
index 000000000000..2a486af9f8fa
--- /dev/null
+++ b/drivers/vfio/platform/reset/Makefile
@@ -0,0 +1,5 @@
+vfio-platform-calxedaxgmac-y := vfio_platform_calxedaxgmac.o
+
+ccflags-y += -Idrivers/vfio/platform
+
+obj-$(CONFIG_VFIO_PLATFORM_CALXEDAXGMAC_RESET) += vfio-platform-calxedaxgmac.o
diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
new file mode 100644
index 000000000000..619dc7d22082
--- /dev/null
+++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c
@@ -0,0 +1,86 @@
+/*
+ * VFIO platform driver specialized for Calxeda xgmac reset
+ * reset code is inherited from calxeda xgmac native driver
+ *
+ * Copyright 2010-2011 Calxeda, Inc.
+ * Copyright (c) 2015 Linaro Ltd.
+ *              www.linaro.org
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+
+#include "vfio_platform_private.h"
+
+#define DRIVER_VERSION  "0.1"
+#define DRIVER_AUTHOR   "Eric Auger <eric.auger@linaro.org>"
+#define DRIVER_DESC     "Reset support for Calxeda xgmac vfio platform device"
+
+#define CALXEDAXGMAC_COMPAT "calxeda,hb-xgmac"
+
+/* XGMAC Register definitions */
+#define XGMAC_CONTROL           0x00000000      /* MAC Configuration */
+
+/* DMA Control and Status Registers */
+#define XGMAC_DMA_CONTROL       0x00000f18      /* Ctrl (Operational Mode) */
+#define XGMAC_DMA_INTR_ENA      0x00000f1c      /* Interrupt Enable */
+
+/* DMA Control registe defines */
+#define DMA_CONTROL_ST          0x00002000      /* Start/Stop Transmission */
+#define DMA_CONTROL_SR          0x00000002      /* Start/Stop Receive */
+
+/* Common MAC defines */
+#define MAC_ENABLE_TX           0x00000008      /* Transmitter Enable */
+#define MAC_ENABLE_RX           0x00000004      /* Receiver Enable */
+
+static inline void xgmac_mac_disable(void __iomem *ioaddr)
+{
+	u32 value = readl(ioaddr + XGMAC_DMA_CONTROL);
+
+	value &= ~(DMA_CONTROL_ST | DMA_CONTROL_SR);
+	writel(value, ioaddr + XGMAC_DMA_CONTROL);
+
+	value = readl(ioaddr + XGMAC_CONTROL);
+	value &= ~(MAC_ENABLE_TX | MAC_ENABLE_RX);
+	writel(value, ioaddr + XGMAC_CONTROL);
+}
+
+int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev)
+{
+	struct vfio_platform_region reg = vdev->regions[0];
+
+	if (!reg.ioaddr) {
+		reg.ioaddr =
+			ioremap_nocache(reg.addr, reg.size);
+		if (!reg.ioaddr)
+			return -ENOMEM;
+	}
+
+	/* disable IRQ */
+	writel(0, reg.ioaddr + XGMAC_DMA_INTR_ENA);
+
+	/* Disable the MAC core */
+	xgmac_mac_disable(reg.ioaddr);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_platform_calxedaxgmac_reset);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index abcff7a1aa66..e43efb5e92bf 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -25,6 +25,44 @@
 
 static DEFINE_MUTEX(driver_lock);
 
+static const struct vfio_platform_reset_combo reset_lookup_table[] = {
+	{
+		.compat = "calxeda,hb-xgmac",
+		.reset_function_name = "vfio_platform_calxedaxgmac_reset",
+		.module_name = "vfio-platform-calxedaxgmac",
+	},
+};
+
+static void vfio_platform_get_reset(struct vfio_platform_device *vdev,
+				    struct device *dev)
+{
+	const char *compat;
+	int (*reset)(struct vfio_platform_device *);
+	int ret, i;
+
+	ret = device_property_read_string(dev, "compatible", &compat);
+	if (ret)
+		return;
+
+	for (i = 0 ; i < ARRAY_SIZE(reset_lookup_table); i++) {
+		if (!strcmp(reset_lookup_table[i].compat, compat)) {
+			request_module(reset_lookup_table[i].module_name);
+			reset = __symbol_get(
+				reset_lookup_table[i].reset_function_name);
+			if (reset) {
+				vdev->reset = reset;
+				return;
+			}
+		}
+	}
+}
+
+static void vfio_platform_put_reset(struct vfio_platform_device *vdev)
+{
+	if (vdev->reset)
+		symbol_put_addr(vdev->reset);
+}
+
 static int vfio_platform_regions_init(struct vfio_platform_device *vdev)
 {
 	int cnt = 0, i;
@@ -100,6 +138,8 @@ static void vfio_platform_release(void *device_data)
 	mutex_lock(&driver_lock);
 
 	if (!(--vdev->refcnt)) {
+		if (vdev->reset)
+			vdev->reset(vdev);
 		vfio_platform_regions_cleanup(vdev);
 		vfio_platform_irq_cleanup(vdev);
 	}
@@ -127,6 +167,9 @@ static int vfio_platform_open(void *device_data)
 		ret = vfio_platform_irq_init(vdev);
 		if (ret)
 			goto err_irq;
+
+		if (vdev->reset)
+			vdev->reset(vdev);
 	}
 
 	vdev->refcnt++;
@@ -159,6 +202,8 @@ static long vfio_platform_ioctl(void *device_data,
 		if (info.argsz < minsz)
 			return -EINVAL;
 
+		if (vdev->reset)
+			vdev->flags |= VFIO_DEVICE_FLAGS_RESET;
 		info.flags = vdev->flags;
 		info.num_regions = vdev->num_regions;
 		info.num_irqs = vdev->num_irqs;
@@ -252,8 +297,12 @@ static long vfio_platform_ioctl(void *device_data,
 
 		return ret;
 
-	} else if (cmd == VFIO_DEVICE_RESET)
-		return -EINVAL;
+	} else if (cmd == VFIO_DEVICE_RESET) {
+		if (vdev->reset)
+			return vdev->reset(vdev);
+		else
+			return -EINVAL;
+	}
 
 	return -ENOTTY;
 }
@@ -502,6 +551,8 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
 		return ret;
 	}
 
+	vfio_platform_get_reset(vdev, dev);
+
 	mutex_init(&vdev->igate);
 
 	return 0;
@@ -513,8 +564,11 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev)
 	struct vfio_platform_device *vdev;
 
 	vdev = vfio_del_group_dev(dev);
-	if (vdev)
+
+	if (vdev) {
+		vfio_platform_put_reset(vdev);
 		iommu_group_put(dev->iommu_group);
+	}
 
 	return vdev;
 }
diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h
index 5d31e0473406..1c9b3d59543c 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -67,6 +67,13 @@ struct vfio_platform_device {
 	struct resource*
 		(*get_resource)(struct vfio_platform_device *vdev, int i);
 	int	(*get_irq)(struct vfio_platform_device *vdev, int i);
+	int	(*reset)(struct vfio_platform_device *vdev);
+};
+
+struct vfio_platform_reset_combo {
+	const char *compat;
+	const char *reset_function_name;
+	const char *module_name;
 };
 
 extern int vfio_platform_probe_common(struct vfio_platform_device *vdev,
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index e1278fe04b1e..2fb29dfeffbd 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -661,18 +661,29 @@ int vfio_add_group_dev(struct device *dev,
 EXPORT_SYMBOL_GPL(vfio_add_group_dev);
 
 /**
- * Get a reference to the vfio_device for a device that is known to
- * be bound to a vfio driver.  The driver implicitly holds a
- * vfio_device reference between vfio_add_group_dev and
- * vfio_del_group_dev.  We can therefore use drvdata to increment
- * that reference from the struct device.  This additional
- * reference must be released by calling vfio_device_put.
+ * Get a reference to the vfio_device for a device.  Even if the
+ * caller thinks they own the device, they could be racing with a
+ * release call path, so we can't trust drvdata for the shortcut.
+ * Go the long way around, from the iommu_group to the vfio_group
+ * to the vfio_device.
  */
 struct vfio_device *vfio_device_get_from_dev(struct device *dev)
 {
-	struct vfio_device *device = dev_get_drvdata(dev);
+	struct iommu_group *iommu_group;
+	struct vfio_group *group;
+	struct vfio_device *device;
+
+	iommu_group = iommu_group_get(dev);
+	if (!iommu_group)
+		return NULL;
 
-	vfio_device_get(device);
+	group = vfio_group_get_from_iommu(iommu_group);
+	iommu_group_put(iommu_group);
+	if (!group)
+		return NULL;
+
+	device = vfio_group_get_device(group, dev);
+	vfio_group_put(group);
 
 	return device;
 }