57 files changed, 13842 insertions, 155 deletions
diff --git a/Documentation/nvdimm/btt.txt b/Documentation/nvdimm/btt.txt
new file mode 100644
index 000000000000..b91443f577dc
--- /dev/null
+++ b/Documentation/nvdimm/btt.txt
@@ -0,0 +1,283 @@
+BTT - Block Translation Table
+=============================
+
+
+1. Introduction
+---------------
+
+Persistent memory based storage is able to perform IO at byte (or more
+accurately, cache line) granularity. However, we often want to expose such
+storage as traditional block devices. The block drivers for persistent memory
+will do exactly this. However, they do not provide any atomicity guarantees.
+Traditional SSDs typically provide protection against torn sectors in hardware,
+using stored energy in capacitors to complete in-flight block writes, or perhaps
+in firmware. We don't have this luxury with persistent memory - if a write is in
+progress, and we experience a power failure, the block will contain a mix of old
+and new data. Applications may not be prepared to handle such a scenario.
+
+The Block Translation Table (BTT) provides atomic sector update semantics for
+persistent memory devices, so that applications that rely on sector writes not
+being torn can continue to do so. The BTT manifests itself as a stacked block
+device, and reserves a portion of the underlying storage for its metadata. At
+the heart of it, is an indirection table that re-maps all the blocks on the
+volume. It can be thought of as an extremely simple file system that only
+provides atomic sector updates.
+
+
+2. Static Layout
+----------------
+
+The underlying storage on which a BTT can be laid out is not limited in any way.
+The BTT, however, splits the available space into chunks of up to 512 GiB,
+called "Arenas".
+
+Each arena follows the same layout for its metadata, and all references in an
+arena are internal to it (with the exception of one field that points to the
+next arena). The following depicts the "On-disk" metadata layout:
+
+
+  Backing Store     +------->  Arena
++---------------+   |   +------------------+
+|               |   |   | Arena info block |
+|    Arena 0    +---+   |       4K         |
+|     512G      |       +------------------+
+|               |       |                  |
++---------------+       |                  |
+|               |       |                  |
+|    Arena 1    |       |   Data Blocks    |
+|     512G      |       |                  |
+|               |       |                  |
++---------------+       |                  |
+|       .       |       |                  |
+|       .       |       |                  |
+|       .       |       |                  |
+|               |       |                  |
+|               |       |                  |
++---------------+       +------------------+
+                        |                  |
+                        |     BTT Map      |
+                        |                  |
+                        |                  |
+                        +------------------+
+                        |                  |
+                        |     BTT Flog     |
+                        |                  |
+                        +------------------+
+                        | Info block copy  |
+                        |       4K         |
+                        +------------------+
+
+
+3. Theory of Operation
+----------------------
+
+
+a. The BTT Map
+--------------
+
+The map is a simple lookup/indirection table that maps an LBA to an internal
+block. Each map entry is 32 bits. The two most significant bits are special
+flags, and the remaining form the internal block number.
+
+Bit      Description
+31 - 30	: Error and Zero flags - Used in the following way:
+	 Bit		      Description
+	31 30
+	-----------------------------------------------------------------------
+	 00	Initial state. Reads return zeroes; Premap = Postmap
+	 01	Zero state: Reads return zeroes
+	 10	Error state: Reads fail; Writes clear 'E' bit
+	 11	Normal Block – has valid postmap
+
+
+29 - 0	: Mappings to internal 'postmap' blocks
+
+
+Some of the terminology that will be subsequently used:
+
+External LBA  : LBA as made visible to upper layers.
+ABA           : Arena Block Address - Block offset/number within an arena
+Premap ABA    : The block offset into an arena, which was decided upon by range
+		checking the External LBA
+Postmap ABA   : The block number in the "Data Blocks" area obtained after
+		indirection from the map
+nfree	      : The number of free blocks that are maintained at any given time.
+		This is the number of concurrent writes that can happen to the
+		arena.
+
+
+For example, after adding a BTT, we surface a disk of 1024G. We get a read for
+the external LBA at 768G. This falls into the second arena, and of the 512G
+worth of blocks that this arena contributes, this block is at 256G. Thus, the
+premap ABA is 256G. We now refer to the map, and find out the mapping for block
+'X' (256G) points to block 'Y', say '64'. Thus the postmap ABA is 64.
+
+
+b. The BTT Flog
+---------------
+
+The BTT provides sector atomicity by making every write an "allocating write",
+i.e. Every write goes to a "free" block. A running list of free blocks is
+maintained in the form of the BTT flog. 'Flog' is a combination of the words
+"free list" and "log". The flog contains 'nfree' entries, and an entry contains:
+
+lba     : The premap ABA that is being written to
+old_map : The old postmap ABA - after 'this' write completes, this will be a
+	  free block.
+new_map : The new postmap ABA. The map will up updated to reflect this
+	  lba->postmap_aba mapping, but we log it here in case we have to
+	  recover.
+seq	: Sequence number to mark which of the 2 sections of this flog entry is
+	  valid/newest. It cycles between 01->10->11->01 (binary) under normal
+	  operation, with 00 indicating an uninitialized state.
+lba'	: alternate lba entry
+old_map': alternate old postmap entry
+new_map': alternate new postmap entry
+seq'	: alternate sequence number.
+
+Each of the above fields is 32-bit, making one entry 32 bytes. Entries are also
+padded to 64 bytes to avoid cache line sharing or aliasing. Flog updates are
+done such that for any entry being written, it:
+a. overwrites the 'old' section in the entry based on sequence numbers
+b. writes the 'new' section such that the sequence number is written last.
+
+
+c. The concept of lanes
+-----------------------
+
+While 'nfree' describes the number of concurrent IOs an arena can process
+concurrently, 'nlanes' is the number of IOs the BTT device as a whole can
+process.
+ nlanes = min(nfree, num_cpus)
+A lane number is obtained at the start of any IO, and is used for indexing into
+all the on-disk and in-memory data structures for the duration of the IO. If
+there are more CPUs than the max number of available lanes, than lanes are
+protected by spinlocks.
+
+
+d. In-memory data structure: Read Tracking Table (RTT)
+------------------------------------------------------
+
+Consider a case where we have two threads, one doing reads and the other,
+writes. We can hit a condition where the writer thread grabs a free block to do
+a new IO, but the (slow) reader thread is still reading from it. In other words,
+the reader consulted a map entry, and started reading the corresponding block. A
+writer started writing to the same external LBA, and finished the write updating
+the map for that external LBA to point to its new postmap ABA. At this point the
+internal, postmap block that the reader is (still) reading has been inserted
+into the list of free blocks. If another write comes in for the same LBA, it can
+grab this free block, and start writing to it, causing the reader to read
+incorrect data. To prevent this, we introduce the RTT.
+
+The RTT is a simple, per arena table with 'nfree' entries. Every reader inserts
+into rtt[lane_number], the postmap ABA it is reading, and clears it after the
+read is complete. Every writer thread, after grabbing a free block, checks the
+RTT for its presence. If the postmap free block is in the RTT, it waits till the
+reader clears the RTT entry, and only then starts writing to it.
+
+
+e. In-memory data structure: map locks
+--------------------------------------
+
+Consider a case where two writer threads are writing to the same LBA. There can
+be a race in the following sequence of steps:
+
+free[lane] = map[premap_aba]
+map[premap_aba] = postmap_aba
+
+Both threads can update their respective free[lane] with the same old, freed
+postmap_aba. This has made the layout inconsistent by losing a free entry, and
+at the same time, duplicating another free entry for two lanes.
+
+To solve this, we could have a single map lock (per arena) that has to be taken
+before performing the above sequence, but we feel that could be too contentious.
+Instead we use an array of (nfree) map_locks that is indexed by
+(premap_aba modulo nfree).
+
+
+f. Reconstruction from the Flog
+-------------------------------
+
+On startup, we analyze the BTT flog to create our list of free blocks. We walk
+through all the entries, and for each lane, of the set of two possible
+'sections', we always look at the most recent one only (based on the sequence
+number). The reconstruction rules/steps are simple:
+- Read map[log_entry.lba].
+- If log_entry.new matches the map entry, then log_entry.old is free.
+- If log_entry.new does not match the map entry, then log_entry.new is free.
+  (This case can only be caused by power-fails/unsafe shutdowns)
+
+
+g. Summarizing - Read and Write flows
+-------------------------------------
+
+Read:
+
+1.  Convert external LBA to arena number + pre-map ABA
+2.  Get a lane (and take lane_lock)
+3.  Read map to get the entry for this pre-map ABA
+4.  Enter post-map ABA into RTT[lane]
+5.  If TRIM flag set in map, return zeroes, and end IO (go to step 8)
+6.  If ERROR flag set in map, end IO with EIO (go to step 8)
+7.  Read data from this block
+8.  Remove post-map ABA entry from RTT[lane]
+9.  Release lane (and lane_lock)
+
+Write:
+
+1.  Convert external LBA to Arena number + pre-map ABA
+2.  Get a lane (and take lane_lock)
+3.  Use lane to index into in-memory free list and obtain a new block, next flog
+        index, next sequence number
+4.  Scan the RTT to check if free block is present, and spin/wait if it is.
+5.  Write data to this free block
+6.  Read map to get the existing post-map ABA entry for this pre-map ABA
+7.  Write flog entry: [premap_aba / old postmap_aba / new postmap_aba / seq_num]
+8.  Write new post-map ABA into map.
+9.  Write old post-map entry into the free list
+10. Calculate next sequence number and write into the free list entry
+11. Release lane (and lane_lock)
+
+
+4. Error Handling
+=================
+
+An arena would be in an error state if any of the metadata is corrupted
+irrecoverably, either due to a bug or a media error. The following conditions
+indicate an error:
+- Info block checksum does not match (and recovering from the copy also fails)
+- All internal available blocks are not uniquely and entirely addressed by the
+  sum of mapped blocks and free blocks (from the BTT flog).
+- Rebuilding free list from the flog reveals missing/duplicate/impossible
+  entries
+- A map entry is out of bounds
+
+If any of these error conditions are encountered, the arena is put into a read
+only state using a flag in the info block.
+
+
+5. In-kernel usage
+==================
+
+Any block driver that supports byte granularity IO to the storage may register
+with the BTT. It will have to provide the rw_bytes interface in its
+block_device_operations struct:
+
+	int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw);
+
+It may register with the BTT after it adds its own gendisk, using btt_init:
+
+	struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize,
+			u32 lbasize, u8 uuid[], int maxlane);
+
+note that maxlane is the maximum amount of concurrency the driver wishes to
+allow the BTT to use.
+
+The BTT 'disk' appears as a stacked block device that grabs the underlying block
+device in the O_EXCL mode.
+
+When the driver wishes to remove the backing disk, it should similarly call
+btt_fini using the same struct btt* handle that was provided to it by btt_init.
+
+	void btt_fini(struct btt *btt);
+
diff --git a/Documentation/nvdimm/nvdimm.txt b/Documentation/nvdimm/nvdimm.txt
new file mode 100644
index 000000000000..197a0b6b0582
--- /dev/null
+++ b/Documentation/nvdimm/nvdimm.txt
@@ -0,0 +1,808 @@
+			  LIBNVDIMM: Non-Volatile Devices
+	      libnvdimm - kernel / libndctl - userspace helper library
+			   linux-nvdimm@lists.01.org
+				      v13
+
+
+	Glossary
+	Overview
+	    Supporting Documents
+	    Git Trees
+	LIBNVDIMM PMEM and BLK
+	Why BLK?
+	    PMEM vs BLK
+	        BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
+	Example NVDIMM Platform
+	LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
+	    LIBNDCTL: Context
+	        libndctl: instantiate a new library context example
+	    LIBNVDIMM/LIBNDCTL: Bus
+	        libnvdimm: control class device in /sys/class
+	        libnvdimm: bus
+	        libndctl: bus enumeration example
+	    LIBNVDIMM/LIBNDCTL: DIMM (NMEM)
+	        libnvdimm: DIMM (NMEM)
+	        libndctl: DIMM enumeration example
+	    LIBNVDIMM/LIBNDCTL: Region
+	        libnvdimm: region
+	        libndctl: region enumeration example
+	        Why Not Encode the Region Type into the Region Name?
+	        How Do I Determine the Major Type of a Region?
+	    LIBNVDIMM/LIBNDCTL: Namespace
+	        libnvdimm: namespace
+	        libndctl: namespace enumeration example
+	        libndctl: namespace creation example
+	        Why the Term "namespace"?
+	    LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
+	        libnvdimm: btt layout
+	        libndctl: btt creation example
+	Summary LIBNDCTL Diagram
+
+
+Glossary
+--------
+
+PMEM: A system-physical-address range where writes are persistent.  A
+block device composed of PMEM is capable of DAX.  A PMEM address range
+may span an interleave of several DIMMs.
+
+BLK: A set of one or more programmable memory mapped apertures provided
+by a DIMM to access its media.  This indirection precludes the
+performance benefit of interleaving, but enables DIMM-bounded failure
+modes.
+
+DPA: DIMM Physical Address, is a DIMM-relative offset.  With one DIMM in
+the system there would be a 1:1 system-physical-address:DPA association.
+Once more DIMMs are added a memory controller interleave must be
+decoded to determine the DPA associated with a given
+system-physical-address.  BLK capacity always has a 1:1 relationship
+with a single-DIMM's DPA range.
+
+DAX: File system extensions to bypass the page cache and block layer to
+mmap persistent memory, from a PMEM block device, directly into a
+process address space.
+
+BTT: Block Translation Table: Persistent memory is byte addressable.
+Existing software may have an expectation that the power-fail-atomicity
+of writes is at least one sector, 512 bytes.  The BTT is an indirection
+table with atomic update semantics to front a PMEM/BLK block device
+driver and present arbitrary atomic sector sizes.
+
+LABEL: Metadata stored on a DIMM device that partitions and identifies
+(persistently names) storage between PMEM and BLK.  It also partitions
+BLK storage to host BTTs with different parameters per BLK-partition.
+Note that traditional partition tables, GPT/MBR, are layered on top of a
+BLK or PMEM device.
+
+
+Overview
+--------
+
+The LIBNVDIMM subsystem provides support for three types of NVDIMMs, namely,
+PMEM, BLK, and NVDIMM devices that can simultaneously support both PMEM
+and BLK mode access.  These three modes of operation are described by
+the "NVDIMM Firmware Interface Table" (NFIT) in ACPI 6.  While the LIBNVDIMM
+implementation is generic and supports pre-NFIT platforms, it was guided
+by the superset of capabilities need to support this ACPI 6 definition
+for NVDIMM resources.  The bulk of the kernel implementation is in place
+to handle the case where DPA accessible via PMEM is aliased with DPA
+accessible via BLK.  When that occurs a LABEL is needed to reserve DPA
+for exclusive access via one mode a time.
+
+Supporting Documents
+ACPI 6: http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf
+NVDIMM Namespace: http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf
+DSM Interface Example: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
+Driver Writer's Guide: http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf
+
+Git Trees
+LIBNVDIMM: https://git.kernel.org/cgit/linux/kernel/git/djbw/nvdimm.git
+LIBNDCTL: https://github.com/pmem/ndctl.git
+PMEM: https://github.com/01org/prd
+
+
+LIBNVDIMM PMEM and BLK
+------------------
+
+Prior to the arrival of the NFIT, non-volatile memory was described to a
+system in various ad-hoc ways.  Usually only the bare minimum was
+provided, namely, a single system-physical-address range where writes
+are expected to be durable after a system power loss.  Now, the NFIT
+specification standardizes not only the description of PMEM, but also
+BLK and platform message-passing entry points for control and
+configuration.
+
+For each NVDIMM access method (PMEM, BLK), LIBNVDIMM provides a block
+device driver:
+
+    1. PMEM (nd_pmem.ko): Drives a system-physical-address range.  This
+    range is contiguous in system memory and may be interleaved (hardware
+    memory controller striped) across multiple DIMMs.  When interleaved the
+    platform may optionally provide details of which DIMMs are participating
+    in the interleave.
+
+    Note that while LIBNVDIMM describes system-physical-address ranges that may
+    alias with BLK access as ND_NAMESPACE_PMEM ranges and those without
+    alias as ND_NAMESPACE_IO ranges, to the nd_pmem driver there is no
+    distinction.  The different device-types are an implementation detail
+    that userspace can exploit to implement policies like "only interface
+    with address ranges from certain DIMMs".  It is worth noting that when
+    aliasing is present and a DIMM lacks a label, then no block device can
+    be created by default as userspace needs to do at least one allocation
+    of DPA to the PMEM range.  In contrast ND_NAMESPACE_IO ranges, once
+    registered, can be immediately attached to nd_pmem.
+
+    2. BLK (nd_blk.ko): This driver performs I/O using a set of platform
+    defined apertures.  A set of apertures will all access just one DIMM.
+    Multiple windows allow multiple concurrent accesses, much like
+    tagged-command-queuing, and would likely be used by different threads or
+    different CPUs.
+
+    The NFIT specification defines a standard format for a BLK-aperture, but
+    the spec also allows for vendor specific layouts, and non-NFIT BLK
+    implementations may other designs for BLK I/O.  For this reason "nd_blk"
+    calls back into platform-specific code to perform the I/O.  One such
+    implementation is defined in the "Driver Writer's Guide" and "DSM
+    Interface Example".
+
+
+Why BLK?
+--------
+
+While PMEM provides direct byte-addressable CPU-load/store access to
+NVDIMM storage, it does not provide the best system RAS (recovery,
+availability, and serviceability) model.  An access to a corrupted
+system-physical-address address causes a cpu exception while an access
+to a corrupted address through an BLK-aperture causes that block window
+to raise an error status in a register.  The latter is more aligned with
+the standard error model that host-bus-adapter attached disks present.
+Also, if an administrator ever wants to replace a memory it is easier to
+service a system at DIMM module boundaries.  Compare this to PMEM where
+data could be interleaved in an opaque hardware specific manner across
+several DIMMs.
+
+PMEM vs BLK
+BLK-apertures solve this RAS problem, but their presence is also the
+major contributing factor to the complexity of the ND subsystem.  They
+complicate the implementation because PMEM and BLK alias in DPA space.
+Any given DIMM's DPA-range may contribute to one or more
+system-physical-address sets of interleaved DIMMs, *and* may also be
+accessed in its entirety through its BLK-aperture.  Accessing a DPA
+through a system-physical-address while simultaneously accessing the
+same DPA through a BLK-aperture has undefined results.  For this reason,
+DIMMs with this dual interface configuration include a DSM function to
+store/retrieve a LABEL.  The LABEL effectively partitions the DPA-space
+into exclusive system-physical-address and BLK-aperture accessible
+regions.  For simplicity a DIMM is allowed a PMEM "region" per each
+interleave set in which it is a member.  The remaining DPA space can be
+carved into an arbitrary number of BLK devices with discontiguous
+extents.
+
+BLK-REGIONs, PMEM-REGIONs, Atomic Sectors, and DAX
+--------------------------------------------------
+
+One of the few
+reasons to allow multiple BLK namespaces per REGION is so that each
+BLK-namespace can be configured with a BTT with unique atomic sector
+sizes.  While a PMEM device can host a BTT the LABEL specification does
+not provide for a sector size to be specified for a PMEM namespace.
+This is due to the expectation that the primary usage model for PMEM is
+via DAX, and the BTT is incompatible with DAX.  However, for the cases
+where an application or filesystem still needs atomic sector update
+guarantees it can register a BTT on a PMEM device or partition.  See
+LIBNVDIMM/NDCTL: Block Translation Table "btt"
+
+
+Example NVDIMM Platform
+-----------------------
+
+For the remainder of this document the following diagram will be
+referenced for any example sysfs layouts.
+
+
+                             (a)               (b)           DIMM   BLK-REGION
+          +-------------------+--------+--------+--------+
++------+  |       pm0.0       | blk2.0 | pm1.0  | blk2.1 |    0      region2
+| imc0 +--+- - - region0- - - +--------+        +--------+
++--+---+  |       pm0.0       | blk3.0 | pm1.0  | blk3.1 |    1      region3
+   |      +-------------------+--------v        v--------+
++--+---+                               |                 |
+| cpu0 |                                     region1
++--+---+                               |                 |
+   |      +----------------------------^        ^--------+
++--+---+  |           blk4.0           | pm1.0  | blk4.0 |    2      region4
+| imc1 +--+----------------------------|        +--------+
++------+  |           blk5.0           | pm1.0  | blk5.0 |    3      region5
+          +----------------------------+--------+--------+
+
+In this platform we have four DIMMs and two memory controllers in one
+socket.  Each unique interface (BLK or PMEM) to DPA space is identified
+by a region device with a dynamically assigned id (REGION0 - REGION5).
+
+    1. The first portion of DIMM0 and DIMM1 are interleaved as REGION0. A
+    single PMEM namespace is created in the REGION0-SPA-range that spans
+    DIMM0 and DIMM1 with a user-specified name of "pm0.0". Some of that
+    interleaved system-physical-address range is reclaimed as BLK-aperture
+    accessed space starting at DPA-offset (a) into each DIMM.  In that
+    reclaimed space we create two BLK-aperture "namespaces" from REGION2 and
+    REGION3 where "blk2.0" and "blk3.0" are just human readable names that
+    could be set to any user-desired name in the LABEL.
+
+    2. In the last portion of DIMM0 and DIMM1 we have an interleaved
+    system-physical-address range, REGION1, that spans those two DIMMs as
+    well as DIMM2 and DIMM3.  Some of REGION1 allocated to a PMEM namespace
+    named "pm1.0" the rest is reclaimed in 4 BLK-aperture namespaces (for
+    each DIMM in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
+    "blk5.0".
+
+    3. The portion of DIMM2 and DIMM3 that do not participate in the REGION1
+    interleaved system-physical-address range (i.e. the DPA address below
+    offset (b) are also included in the "blk4.0" and "blk5.0" namespaces.
+    Note, that this example shows that BLK-aperture namespaces don't need to
+    be contiguous in DPA-space.
+
+    This bus is provided by the kernel under the device
+    /sys/devices/platform/nfit_test.0 when CONFIG_NFIT_TEST is enabled and
+    the nfit_test.ko module is loaded.  This not only test LIBNVDIMM but the
+    acpi_nfit.ko driver as well.
+
+
+LIBNVDIMM Kernel Device Model and LIBNDCTL Userspace API
+----------------------------------------------------
+
+What follows is a description of the LIBNVDIMM sysfs layout and a
+corresponding object hierarchy diagram as viewed through the LIBNDCTL
+api.  The example sysfs paths and diagrams are relative to the Example
+NVDIMM Platform which is also the LIBNVDIMM bus used in the LIBNDCTL unit
+test.
+
+LIBNDCTL: Context
+Every api call in the LIBNDCTL library requires a context that holds the
+logging parameters and other library instance state.  The library is
+based on the libabc template:
+https://git.kernel.org/cgit/linux/kernel/git/kay/libabc.git/
+
+LIBNDCTL: instantiate a new library context example
+
+	struct ndctl_ctx *ctx;
+
+	if (ndctl_new(&ctx) == 0)
+		return ctx;
+	else
+		return NULL;
+
+LIBNVDIMM/LIBNDCTL: Bus
+-------------------
+
+A bus has a 1:1 relationship with an NFIT.  The current expectation for
+ACPI based systems is that there is only ever one platform-global NFIT.
+That said, it is trivial to register multiple NFITs, the specification
+does not preclude it.  The infrastructure supports multiple busses and
+we we use this capability to test multiple NFIT configurations in the
+unit test.
+
+LIBNVDIMM: control class device in /sys/class
+
+This character device accepts DSM messages to be passed to DIMM
+identified by its NFIT handle.
+
+	/sys/class/nd/ndctl0
+	|-- dev
+	|-- device -> ../../../ndbus0
+	|-- subsystem -> ../../../../../../../class/nd
+
+
+
+LIBNVDIMM: bus
+
+	struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
+	       struct nvdimm_bus_descriptor *nfit_desc);
+
+	/sys/devices/platform/nfit_test.0/ndbus0
+	|-- commands
+	|-- nd
+	|-- nfit
+	|-- nmem0
+	|-- nmem1
+	|-- nmem2
+	|-- nmem3
+	|-- power
+	|-- provider
+	|-- region0
+	|-- region1
+	|-- region2
+	|-- region3
+	|-- region4
+	|-- region5
+	|-- uevent
+	`-- wait_probe
+
+LIBNDCTL: bus enumeration example
+Find the bus handle that describes the bus from Example NVDIMM Platform
+
+	static struct ndctl_bus *get_bus_by_provider(struct ndctl_ctx *ctx,
+			const char *provider)
+	{
+		struct ndctl_bus *bus;
+
+		ndctl_bus_foreach(ctx, bus)
+			if (strcmp(provider, ndctl_bus_get_provider(bus)) == 0)
+				return bus;
+
+		return NULL;
+	}
+
+	bus = get_bus_by_provider(ctx, "nfit_test.0");
+
+
+LIBNVDIMM/LIBNDCTL: DIMM (NMEM)
+---------------------------
+
+The DIMM device provides a character device for sending commands to
+hardware, and it is a container for LABELs.  If the DIMM is defined by
+NFIT then an optional 'nfit' attribute sub-directory is available to add
+NFIT-specifics.
+
+Note that the kernel device name for "DIMMs" is "nmemX".  The NFIT
+describes these devices via "Memory Device to System Physical Address
+Range Mapping Structure", and there is no requirement that they actually
+be physical DIMMs, so we use a more generic name.
+
+LIBNVDIMM: DIMM (NMEM)
+
+	struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
+			const struct attribute_group **groups, unsigned long flags,
+			unsigned long *dsm_mask);
+
+	/sys/devices/platform/nfit_test.0/ndbus0
+	|-- nmem0
+	|   |-- available_slots
+	|   |-- commands
+	|   |-- dev
+	|   |-- devtype
+	|   |-- driver -> ../../../../../bus/nd/drivers/nvdimm
+	|   |-- modalias
+	|   |-- nfit
+	|   |   |-- device
+	|   |   |-- format
+	|   |   |-- handle
+	|   |   |-- phys_id
+	|   |   |-- rev_id
+	|   |   |-- serial
+	|   |   `-- vendor
+	|   |-- state
+	|   |-- subsystem -> ../../../../../bus/nd
+	|   `-- uevent
+	|-- nmem1
+	[..]
+
+
+LIBNDCTL: DIMM enumeration example
+
+Note, in this example we are assuming NFIT-defined DIMMs which are
+identified by an "nfit_handle" a 32-bit value where:
+Bit 3:0 DIMM number within the memory channel
+Bit 7:4 memory channel number
+Bit 11:8 memory controller ID
+Bit 15:12 socket ID (within scope of a Node controller if node controller is present)
+Bit 27:16 Node Controller ID
+Bit 31:28 Reserved
+
+	static struct ndctl_dimm *get_dimm_by_handle(struct ndctl_bus *bus,
+	       unsigned int handle)
+	{
+		struct ndctl_dimm *dimm;
+
+		ndctl_dimm_foreach(bus, dimm)
+			if (ndctl_dimm_get_handle(dimm) == handle)
+				return dimm;
+
+		return NULL;
+	}
+
+	#define DIMM_HANDLE(n, s, i, c, d) \
+		(((n & 0xfff) << 16) | ((s & 0xf) << 12) | ((i & 0xf) << 8) \
+		 | ((c & 0xf) << 4) | (d & 0xf))
+
+	dimm = get_dimm_by_handle(bus, DIMM_HANDLE(0, 0, 0, 0, 0));
+
+LIBNVDIMM/LIBNDCTL: Region
+----------------------
+
+A generic REGION device is registered for each PMEM range orBLK-aperture
+set.  Per the example there are 6 regions: 2 PMEM and 4 BLK-aperture
+sets on the "nfit_test.0" bus.  The primary role of regions are to be a
+container of "mappings".  A mapping is a tuple of <DIMM,
+DPA-start-offset, length>.
+
+LIBNVDIMM provides a built-in driver for these REGION devices.  This driver
+is responsible for reconciling the aliased DPA mappings across all
+regions, parsing the LABEL, if present, and then emitting NAMESPACE
+devices with the resolved/exclusive DPA-boundaries for the nd_pmem or
+nd_blk device driver to consume.
+
+In addition to the generic attributes of "mapping"s, "interleave_ways"
+and "size" the REGION device also exports some convenience attributes.
+"nstype" indicates the integer type of namespace-device this region
+emits, "devtype" duplicates the DEVTYPE variable stored by udev at the
+'add' event, "modalias" duplicates the MODALIAS variable stored by udev
+at the 'add' event, and finally, the optional "spa_index" is provided in
+the case where the region is defined by a SPA.
+
+LIBNVDIMM: region
+
+	struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
+			struct nd_region_desc *ndr_desc);
+	struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
+			struct nd_region_desc *ndr_desc);
+
+	/sys/devices/platform/nfit_test.0/ndbus0
+	|-- region0
+	|   |-- available_size
+	|   |-- btt0
+	|   |-- btt_seed
+	|   |-- devtype
+	|   |-- driver -> ../../../../../bus/nd/drivers/nd_region
+	|   |-- init_namespaces
+	|   |-- mapping0
+	|   |-- mapping1
+	|   |-- mappings
+	|   |-- modalias
+	|   |-- namespace0.0
+	|   |-- namespace_seed
+	|   |-- numa_node
+	|   |-- nfit
+	|   |   `-- spa_index
+	|   |-- nstype
+	|   |-- set_cookie
+	|   |-- size
+	|   |-- subsystem -> ../../../../../bus/nd
+	|   `-- uevent
+	|-- region1
+	[..]
+
+LIBNDCTL: region enumeration example
+
+Sample region retrieval routines based on NFIT-unique data like
+"spa_index" (interleave set id) for PMEM and "nfit_handle" (dimm id) for
+BLK.
+
+	static struct ndctl_region *get_pmem_region_by_spa_index(struct ndctl_bus *bus,
+			unsigned int spa_index)
+	{
+		struct ndctl_region *region;
+
+		ndctl_region_foreach(bus, region) {
+			if (ndctl_region_get_type(region) != ND_DEVICE_REGION_PMEM)
+				continue;
+			if (ndctl_region_get_spa_index(region) == spa_index)
+				return region;
+		}
+		return NULL;
+	}
+
+	static struct ndctl_region *get_blk_region_by_dimm_handle(struct ndctl_bus *bus,
+			unsigned int handle)
+	{
+		struct ndctl_region *region;
+
+		ndctl_region_foreach(bus, region) {
+			struct ndctl_mapping *map;
+
+			if (ndctl_region_get_type(region) != ND_DEVICE_REGION_BLOCK)
+				continue;
+			ndctl_mapping_foreach(region, map) {
+				struct ndctl_dimm *dimm = ndctl_mapping_get_dimm(map);
+
+				if (ndctl_dimm_get_handle(dimm) == handle)
+					return region;
+			}
+		}
+		return NULL;
+	}
+
+
+Why Not Encode the Region Type into the Region Name?
+----------------------------------------------------
+
+At first glance it seems since NFIT defines just PMEM and BLK interface
+types that we should simply name REGION devices with something derived
+from those type names.  However, the ND subsystem explicitly keeps the
+REGION name generic and expects userspace to always consider the
+region-attributes for 4 reasons:
+
+    1. There are already more than two REGION and "namespace" types.  For
+    PMEM there are two subtypes.  As mentioned previously we have PMEM where
+    the constituent DIMM devices are known and anonymous PMEM.  For BLK
+    regions the NFIT specification already anticipates vendor specific
+    implementations.  The exact distinction of what a region contains is in
+    the region-attributes not the region-name or the region-devtype.
+
+    2. A region with zero child-namespaces is a possible configuration.  For
+    example, the NFIT allows for a DCR to be published without a
+    corresponding BLK-aperture.  This equates to a DIMM that can only accept
+    control/configuration messages, but no i/o through a descendant block
+    device.  Again, this "type" is advertised in the attributes ('mappings'
+    == 0) and the name does not tell you much.
+
+    3. What if a third major interface type arises in the future?  Outside
+    of vendor specific implementations, it's not difficult to envision a
+    third class of interface type beyond BLK and PMEM.  With a generic name
+    for the REGION level of the device-hierarchy old userspace
+    implementations can still make sense of new kernel advertised
+    region-types.  Userspace can always rely on the generic region
+    attributes like "mappings", "size", etc and the expected child devices
+    named "namespace".  This generic format of the device-model hierarchy
+    allows the LIBNVDIMM and LIBNDCTL implementations to be more uniform and
+    future-proof.
+
+    4. There are more robust mechanisms for determining the major type of a
+    region than a device name.  See the next section, How Do I Determine the
+    Major Type of a Region?
+
+How Do I Determine the Major Type of a Region?
+----------------------------------------------
+
+Outside of the blanket recommendation of "use libndctl", or simply
+looking at the kernel header (/usr/include/linux/ndctl.h) to decode the
+"nstype" integer attribute, here are some other options.
+
+    1. module alias lookup:
+
+    The whole point of region/namespace device type differentiation is to
+    decide which block-device driver will attach to a given LIBNVDIMM namespace.
+    One can simply use the modalias to lookup the resulting module.  It's
+    important to note that this method is robust in the presence of a
+    vendor-specific driver down the road.  If a vendor-specific
+    implementation wants to supplant the standard nd_blk driver it can with
+    minimal impact to the rest of LIBNVDIMM.
+
+    In fact, a vendor may also want to have a vendor-specific region-driver
+    (outside of nd_region).  For example, if a vendor defined its own LABEL
+    format it would need its own region driver to parse that LABEL and emit
+    the resulting namespaces.  The output from module resolution is more
+    accurate than a region-name or region-devtype.
+
+    2. udev:
+
+    The kernel "devtype" is registered in the udev database
+    # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region0
+    P: /devices/platform/nfit_test.0/ndbus0/region0
+    E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region0
+    E: DEVTYPE=nd_pmem
+    E: MODALIAS=nd:t2
+    E: SUBSYSTEM=nd
+
+    # udevadm info --path=/devices/platform/nfit_test.0/ndbus0/region4
+    P: /devices/platform/nfit_test.0/ndbus0/region4
+    E: DEVPATH=/devices/platform/nfit_test.0/ndbus0/region4
+    E: DEVTYPE=nd_blk
+    E: MODALIAS=nd:t3
+    E: SUBSYSTEM=nd
+
+    ...and is available as a region attribute, but keep in mind that the
+    "devtype" does not indicate sub-type variations and scripts should
+    really be understanding the other attributes.
+
+    3. type specific attributes:
+
+    As it currently stands a BLK-aperture region will never have a
+    "nfit/spa_index" attribute, but neither will a non-NFIT PMEM region.  A
+    BLK region with a "mappings" value of 0 is, as mentioned above, a DIMM
+    that does not allow I/O.  A PMEM region with a "mappings" value of zero
+    is a simple system-physical-address range.
+
+
+LIBNVDIMM/LIBNDCTL: Namespace
+-------------------------
+
+A REGION, after resolving DPA aliasing and LABEL specified boundaries,
+surfaces one or more "namespace" devices.  The arrival of a "namespace"
+device currently triggers either the nd_blk or nd_pmem driver to load
+and register a disk/block device.
+
+LIBNVDIMM: namespace
+Here is a sample layout from the three major types of NAMESPACE where
+namespace0.0 represents DIMM-info-backed PMEM (note that it has a 'uuid'
+attribute), namespace2.0 represents a BLK namespace (note it has a
+'sector_size' attribute) that, and namespace6.0 represents an anonymous
+PMEM namespace (note that has no 'uuid' attribute due to not support a
+LABEL).
+
+	/sys/devices/platform/nfit_test.0/ndbus0/region0/namespace0.0
+	|-- alt_name
+	|-- devtype
+	|-- dpa_extents
+	|-- force_raw
+	|-- modalias
+	|-- numa_node
+	|-- resource
+	|-- size
+	|-- subsystem -> ../../../../../../bus/nd
+	|-- type
+	|-- uevent
+	`-- uuid
+	/sys/devices/platform/nfit_test.0/ndbus0/region2/namespace2.0
+	|-- alt_name
+	|-- devtype
+	|-- dpa_extents
+	|-- force_raw
+	|-- modalias
+	|-- numa_node
+	|-- sector_size
+	|-- size
+	|-- subsystem -> ../../../../../../bus/nd
+	|-- type
+	|-- uevent
+	`-- uuid
+	/sys/devices/platform/nfit_test.1/ndbus1/region6/namespace6.0
+	|-- block
+	|   `-- pmem0
+	|-- devtype
+	|-- driver -> ../../../../../../bus/nd/drivers/pmem
+	|-- force_raw
+	|-- modalias
+	|-- numa_node
+	|-- resource
+	|-- size
+	|-- subsystem -> ../../../../../../bus/nd
+	|-- type
+	`-- uevent
+
+LIBNDCTL: namespace enumeration example
+Namespaces are indexed relative to their parent region, example below.
+These indexes are mostly static from boot to boot, but subsystem makes
+no guarantees in this regard.  For a static namespace identifier use its
+'uuid' attribute.
+
+static struct ndctl_namespace *get_namespace_by_id(struct ndctl_region *region,
+                unsigned int id)
+{
+        struct ndctl_namespace *ndns;
+
+        ndctl_namespace_foreach(region, ndns)
+                if (ndctl_namespace_get_id(ndns) == id)
+                        return ndns;
+
+        return NULL;
+}
+
+LIBNDCTL: namespace creation example
+Idle namespaces are automatically created by the kernel if a given
+region has enough available capacity to create a new namespace.
+Namespace instantiation involves finding an idle namespace and
+configuring it.  For the most part the setting of namespace attributes
+can occur in any order, the only constraint is that 'uuid' must be set
+before 'size'.  This enables the kernel to track DPA allocations
+internally with a static identifier.
+
+static int configure_namespace(struct ndctl_region *region,
+                struct ndctl_namespace *ndns,
+                struct namespace_parameters *parameters)
+{
+        char devname[50];
+
+        snprintf(devname, sizeof(devname), "namespace%d.%d",
+                        ndctl_region_get_id(region), paramaters->id);
+
+        ndctl_namespace_set_alt_name(ndns, devname);
+        /* 'uuid' must be set prior to setting size! */
+        ndctl_namespace_set_uuid(ndns, paramaters->uuid);
+        ndctl_namespace_set_size(ndns, paramaters->size);
+        /* unlike pmem namespaces, blk namespaces have a sector size */
+        if (parameters->lbasize)
+                ndctl_namespace_set_sector_size(ndns, parameters->lbasize);
+        ndctl_namespace_enable(ndns);
+}
+
+
+Why the Term "namespace"?
+
+    1. Why not "volume" for instance?  "volume" ran the risk of confusing ND
+    as a volume manager like device-mapper.
+
+    2. The term originated to describe the sub-devices that can be created
+    within a NVME controller (see the nvme specification:
+    http://www.nvmexpress.org/specifications/), and NFIT namespaces are
+    meant to parallel the capabilities and configurability of
+    NVME-namespaces.
+
+
+LIBNVDIMM/LIBNDCTL: Block Translation Table "btt"
+---------------------------------------------
+
+A BTT (design document: http://pmem.io/2014/09/23/btt.html) is a stacked
+block device driver that fronts either the whole block device or a
+partition of a block device emitted by either a PMEM or BLK NAMESPACE.
+
+LIBNVDIMM: btt layout
+Every region will start out with at least one BTT device which is the
+seed device.  To activate it set the "namespace", "uuid", and
+"sector_size" attributes and then bind the device to the nd_pmem or
+nd_blk driver depending on the region type.
+
+	/sys/devices/platform/nfit_test.1/ndbus0/region0/btt0/
+	|-- namespace
+	|-- delete
+	|-- devtype
+	|-- modalias
+	|-- numa_node
+	|-- sector_size
+	|-- subsystem -> ../../../../../bus/nd
+	|-- uevent
+	`-- uuid
+
+LIBNDCTL: btt creation example
+Similar to namespaces an idle BTT device is automatically created per
+region.  Each time this "seed" btt device is configured and enabled a new
+seed is created.  Creating a BTT configuration involves two steps of
+finding and idle BTT and assigning it to consume a PMEM or BLK namespace.
+
+	static struct ndctl_btt *get_idle_btt(struct ndctl_region *region)
+	{
+		struct ndctl_btt *btt;
+
+		ndctl_btt_foreach(region, btt)
+			if (!ndctl_btt_is_enabled(btt)
+					&& !ndctl_btt_is_configured(btt))
+				return btt;
+
+		return NULL;
+	}
+
+	static int configure_btt(struct ndctl_region *region,
+			struct btt_parameters *parameters)
+	{
+		btt = get_idle_btt(region);
+
+		ndctl_btt_set_uuid(btt, parameters->uuid);
+		ndctl_btt_set_sector_size(btt, parameters->sector_size);
+		ndctl_btt_set_namespace(btt, parameters->ndns);
+		/* turn off raw mode device */
+		ndctl_namespace_disable(parameters->ndns);
+		/* turn on btt access */
+		ndctl_btt_enable(btt);
+	}
+
+Once instantiated a new inactive btt seed device will appear underneath
+the region.
+
+Once a "namespace" is removed from a BTT that instance of the BTT device
+will be deleted or otherwise reset to default values.  This deletion is
+only at the device model level.  In order to destroy a BTT the "info
+block" needs to be destroyed.  Note, that to destroy a BTT the media
+needs to be written in raw mode.  By default, the kernel will autodetect
+the presence of a BTT and disable raw mode.  This autodetect behavior
+can be suppressed by enabling raw mode for the namespace via the
+ndctl_namespace_set_raw_mode() api.
+
+
+Summary LIBNDCTL Diagram
+------------------------
+
+For the given example above, here is the view of the objects as seen by the LIBNDCTL api:
+            +---+
+            |CTX|    +---------+   +--------------+  +---------------+
+            +-+-+  +-> REGION0 +---> NAMESPACE0.0 +--> PMEM8 "pm0.0" |
+              |    | +---------+   +--------------+  +---------------+
++-------+     |    | +---------+   +--------------+  +---------------+
+| DIMM0 <-+   |    +-> REGION1 +---> NAMESPACE1.0 +--> PMEM6 "pm1.0" |
++-------+ |   |    | +---------+   +--------------+  +---------------+
+| DIMM1 <-+ +-v--+ | +---------+   +--------------+  +---------------+
++-------+ +-+BUS0+---> REGION2 +-+-> NAMESPACE2.0 +--> ND6  "blk2.0" |
+| DIMM2 <-+ +----+ | +---------+ | +--------------+  +----------------------+
++-------+ |        |             +-> NAMESPACE2.1 +--> ND5  "blk2.1" | BTT2 |
+| DIMM3 <-+        |               +--------------+  +----------------------+
++-------+          | +---------+   +--------------+  +---------------+
+                   +-> REGION3 +-+-> NAMESPACE3.0 +--> ND4  "blk3.0" |
+                   | +---------+ | +--------------+  +----------------------+
+                   |             +-> NAMESPACE3.1 +--> ND3  "blk3.1" | BTT1 |
+                   |               +--------------+  +----------------------+
+                   | +---------+   +--------------+  +---------------+
+                   +-> REGION4 +---> NAMESPACE4.0 +--> ND2  "blk4.0" |
+                   | +---------+   +--------------+  +---------------+
+                   | +---------+   +--------------+  +----------------------+
+                   +-> REGION5 +---> NAMESPACE5.0 +--> ND1  "blk5.0" | BTT0 |
+                     +---------+   +--------------+  +---------------+------+
+
+
diff --git a/MAINTAINERS b/MAINTAINERS
index 67e0b863d2af..0e6b09150aad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6102,6 +6102,39 @@ M:	Sasha Levin <sasha.levin@oracle.com>
 S:	Maintained
 F:	tools/lib/lockdep/
 
+LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
+M:	Dan Williams <dan.j.williams@intel.com>
+L:	linux-nvdimm@lists.01.org
+Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
+S:	Supported
+F:	drivers/nvdimm/*
+F:	include/linux/nd.h
+F:	include/linux/libnvdimm.h
+F:	include/uapi/linux/ndctl.h
+
+LIBNVDIMM BLK: MMIO-APERTURE DRIVER
+M:	Ross Zwisler <ross.zwisler@linux.intel.com>
+L:	linux-nvdimm@lists.01.org
+Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
+S:	Supported
+F:	drivers/nvdimm/blk.c
+F:	drivers/nvdimm/region_devs.c
+F:	drivers/acpi/nfit*
+
+LIBNVDIMM BTT: BLOCK TRANSLATION TABLE
+M:	Vishal Verma <vishal.l.verma@intel.com>
+L:	linux-nvdimm@lists.01.org
+Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
+S:	Supported
+F:	drivers/nvdimm/btt*
+
+LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER
+M:	Ross Zwisler <ross.zwisler@linux.intel.com>
+L:	linux-nvdimm@lists.01.org
+Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
+S:	Supported
+F:	drivers/nvdimm/pmem.c
+
 LINUX FOR IBM pSERIES (RS/6000)
 M:	Paul Mackerras <paulus@au.ibm.com>
 W:	http://www.ibm.com/linux/ltc/projects/ppc
@@ -8363,12 +8396,6 @@ S:	Maintained
 F:	Documentation/blockdev/ramdisk.txt
 F:	drivers/block/brd.c
 
-PERSISTENT MEMORY DRIVER
-M:	Ross Zwisler <ross.zwisler@linux.intel.com>
-L:	linux-nvdimm@lists.01.org
-S:	Supported
-F:	drivers/block/pmem.c
-
 RANDOM NUMBER DRIVER
 M:	"Theodore Ts'o" <tytso@mit.edu>
 S:	Maintained
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index ab21e0d58278..9d4aa18f2a82 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -158,6 +158,7 @@ static __init int is_reserve_region(efi_memory_desc_t *md)
 	case EFI_BOOT_SERVICES_CODE:
 	case EFI_BOOT_SERVICES_DATA:
 	case EFI_CONVENTIONAL_MEMORY:
+	case EFI_PERSISTENT_MEMORY:
 		return 0;
 	default:
 		break;
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 47e962f7ed5a..caae3f4e4341 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1222,6 +1222,10 @@ efi_initialize_iomem_resources(struct resource *code_resource,
 				flags |= IORESOURCE_DISABLED;
 				break;
 
+			case EFI_PERSISTENT_MEMORY:
+				name = "Persistent Memory";
+				break;
+
 			case EFI_RESERVED_TYPE:
 			case EFI_RUNTIME_SERVICES_CODE:
 			case EFI_RUNTIME_SERVICES_DATA:
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4fcf0ade7e91..d05a42357ef0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -27,6 +27,7 @@ config X86
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_PMEM_API
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
@@ -1419,6 +1420,9 @@ source "mm/Kconfig"
 
 config X86_PMEM_LEGACY
 	bool "Support non-standard NVDIMMs and ADR protected memory"
+	depends on PHYS_ADDR_T_64BIT
+	depends on BLK_DEV
+	select LIBNVDIMM
 	help
 	  Treat memory marked using the non-standard e820 type of 12 as used
 	  by the Intel Sandy Bridge-EP reference BIOS as protected memory.
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 48304b89b601..2c82bd150d43 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1224,6 +1224,10 @@ static efi_status_t setup_e820(struct boot_params *params,
 			e820_type = E820_NVS;
 			break;
 
+		case EFI_PERSISTENT_MEMORY:
+			e820_type = E820_PMEM;
+			break;
+
 		default:
 			continue;
 		}
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index b6f7457d12e4..9bf3ea14b9f0 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -4,6 +4,7 @@
 /* Caches aren't brain-dead on the intel. */
 #include <asm-generic/cacheflush.h>
 #include <asm/special_insns.h>
+#include <asm/uaccess.h>
 
 /*
  * The set_memory_* API can be used to change various attributes of a virtual
@@ -108,4 +109,75 @@ static inline int rodata_test(void)
 }
 #endif
 
+#ifdef ARCH_HAS_NOCACHE_UACCESS
+
+/**
+ * arch_memcpy_to_pmem - copy data to persistent memory
+ * @dst: destination buffer for the copy
+ * @src: source buffer for the copy
+ * @n: length of the copy in bytes
+ *
+ * Copy data to persistent memory media via non-temporal stores so that
+ * a subsequent arch_wmb_pmem() can flush cpu and memory controller
+ * write buffers to guarantee durability.
+ */
+static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
+		size_t n)
+{
+	int unwritten;
+
+	/*
+	 * We are copying between two kernel buffers, if
+	 * __copy_from_user_inatomic_nocache() returns an error (page
+	 * fault) we would have already reported a general protection fault
+	 * before the WARN+BUG.
+	 */
+	unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
+			(void __user *) src, n);
+	if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
+				__func__, dst, src, unwritten))
+		BUG();
+}
+
+/**
+ * arch_wmb_pmem - synchronize writes to persistent memory
+ *
+ * After a series of arch_memcpy_to_pmem() operations this drains data
+ * from cpu write buffers and any platform (memory controller) buffers
+ * to ensure that written data is durable on persistent memory media.
+ */
+static inline void arch_wmb_pmem(void)
+{
+	/*
+	 * wmb() to 'sfence' all previous writes such that they are
+	 * architecturally visible to 'pcommit'.  Note, that we've
+	 * already arranged for pmem writes to avoid the cache via
+	 * arch_memcpy_to_pmem().
+	 */
+	wmb();
+	pcommit_sfence();
+}
+
+static inline bool __arch_has_wmb_pmem(void)
+{
+#ifdef CONFIG_X86_64
+	/*
+	 * We require that wmb() be an 'sfence', that is only guaranteed on
+	 * 64-bit builds
+	 */
+	return static_cpu_has(X86_FEATURE_PCOMMIT);
+#else
+	return false;
+#endif
+}
+#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
+extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
+extern void arch_wmb_pmem(void);
+
+static inline bool __arch_has_wmb_pmem(void)
+{
+	return false;
+}
+#endif
+
 #endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 83ec9b1d77cc..cc9c61bc1abe 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -248,6 +248,12 @@ static inline void flush_write_buffers(void)
 #endif
 }
 
+static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
+	unsigned long size)
+{
+	return (void __force __pmem *) ioremap_cache(offset, size);
+}
+
 #endif /* __KERNEL__ */
 
 extern void native_io_delay(void);
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index 960a8a9dc4ab..0f457e6eab18 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -32,6 +32,7 @@
 #define E820_ACPI	3
 #define E820_NVS	4
 #define E820_UNUSABLE	5
+#define E820_PMEM	7
 
 /*
  * This is a non-standardized way to represent ADR or NVDIMM regions that
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index c8dda42cb6a3..a102564d08eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -149,6 +149,7 @@ static void __init e820_print_type(u32 type)
 	case E820_UNUSABLE:
 		printk(KERN_CONT "unusable");
 		break;
+	case E820_PMEM:
 	case E820_PRAM:
 		printk(KERN_CONT "persistent (type %u)", type);
 		break;
@@ -918,11 +919,32 @@ static inline const char *e820_type_to_string(int e820_type)
 	case E820_ACPI:	return "ACPI Tables";
 	case E820_NVS:	return "ACPI Non-volatile Storage";
 	case E820_UNUSABLE:	return "Unusable memory";
-	case E820_PRAM: return "Persistent RAM";
+	case E820_PRAM: return "Persistent Memory (legacy)";
+	case E820_PMEM: return "Persistent Memory";
 	default:	return "reserved";
 	}
 }
 
+static bool do_mark_busy(u32 type, struct resource *res)
+{
+	/* this is the legacy bios/dos rom-shadow + mmio region */
+	if (res->start < (1ULL<<20))
+		return true;
+
+	/*
+	 * Treat persistent memory like device memory, i.e. reserve it
+	 * for exclusive use of a driver
+	 */
+	switch (type) {
+	case E820_RESERVED:
+	case E820_PRAM:
+	case E820_PMEM:
+		return false;
+	default:
+		return true;
+	}
+}
+
 /*
  * Mark e820 reserved areas as busy for the resource manager.
  */
@@ -952,9 +974,7 @@ void __init e820_reserve_resources(void)
 		 * pci device BAR resource and insert them later in
 		 * pcibios_resource_survey()
 		 */
-		if (((e820.map[i].type != E820_RESERVED) &&
-		     (e820.map[i].type != E820_PRAM)) ||
-		     res->start < (1ULL<<20)) {
+		if (do_mark_busy(e820.map[i].type, res)) {
 			res->flags |= IORESOURCE_BUSY;
 			insert_resource(&iomem_resource, res);
 		}
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 3420c874ddc5..64f90f53bb85 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -1,53 +1,82 @@
 /*
  * Copyright (c) 2015, Christoph Hellwig.
+ * Copyright (c) 2015, Intel Corporation.
  */
-#include <linux/memblock.h>
 #include <linux/platform_device.h>
-#include <linux/slab.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
 #include <asm/e820.h>
-#include <asm/page_types.h>
-#include <asm/setup.h>
 
-static __init void register_pmem_device(struct resource *res)
+static void e820_pmem_release(struct device *dev)
 {
-	struct platform_device *pdev;
-	int error;
+	struct nvdimm_bus *nvdimm_bus = dev->platform_data;
 
-	pdev = platform_device_alloc("pmem", PLATFORM_DEVID_AUTO);
-	if (!pdev)
-		return;
+	if (nvdimm_bus)
+		nvdimm_bus_unregister(nvdimm_bus);
+}
 
-	error = platform_device_add_resources(pdev, res, 1);
-	if (error)
-		goto out_put_pdev;
+static struct platform_device e820_pmem = {
+	.name = "e820_pmem",
+	.id = -1,
+	.dev = {
+		.release = e820_pmem_release,
+	},
+};
 
-	error = platform_device_add(pdev);
-	if (error)
-		goto out_put_pdev;
-	return;
+static const struct attribute_group *e820_pmem_attribute_groups[] = {
+	&nvdimm_bus_attribute_group,
+	NULL,
+};
 
-out_put_pdev:
-	dev_warn(&pdev->dev, "failed to add 'pmem' (persistent memory) device!\n");
-	platform_device_put(pdev);
-}
+static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
+	&nd_region_attribute_group,
+	&nd_device_attribute_group,
+	NULL,
+};
 
-static __init int register_pmem_devices(void)
+static __init int register_e820_pmem(void)
 {
-	int i;
+	static struct nvdimm_bus_descriptor nd_desc;
+	struct device *dev = &e820_pmem.dev;
+	struct nvdimm_bus *nvdimm_bus;
+	int rc, i;
+
+	rc = platform_device_register(&e820_pmem);
+	if (rc)
+		return rc;
+
+	nd_desc.attr_groups = e820_pmem_attribute_groups;
+	nd_desc.provider_name = "e820";
+	nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
+	if (!nvdimm_bus)
+		goto err;
+	dev->platform_data = nvdimm_bus;
 
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
+		struct resource res = {
+			.flags	= IORESOURCE_MEM,
+			.start	= ei->addr,
+			.end	= ei->addr + ei->size - 1,
+		};
+		struct nd_region_desc ndr_desc;
+
+		if (ei->type != E820_PRAM)
+			continue;
 
-		if (ei->type == E820_PRAM) {
-			struct resource res = {
-				.flags	= IORESOURCE_MEM,
-				.start	= ei->addr,
-				.end	= ei->addr + ei->size - 1,
-			};
-			register_pmem_device(&res);
-		}
+		memset(&ndr_desc, 0, sizeof(ndr_desc));
+		ndr_desc.res = &res;
+		ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
+		ndr_desc.numa_node = NUMA_NO_NODE;
+		if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
+			goto err;
 	}
 
 	return 0;
+
+ err:
+	dev_err(dev, "failed to register legacy persistent memory ranges\n");
+	platform_device_unregister(&e820_pmem);
+	return -ENXIO;
 }
-device_initcall(register_pmem_devices);
+device_initcall(register_e820_pmem);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index c1c382c58c60..cfba30f27392 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -174,6 +174,9 @@ static void __init do_add_efi_memmap(void)
 		case EFI_UNUSABLE_MEMORY:
 			e820_type = E820_UNUSABLE;
 			break;
+		case EFI_PERSISTENT_MEMORY:
+			e820_type = E820_PMEM;
+			break;
 		default:
 			/*
 			 * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
diff --git a/drivers/Kconfig b/drivers/Kconfig
index c0cc96bab9e7..6e973b8e3a3b 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -182,4 +182,6 @@ source "drivers/thunderbolt/Kconfig"
 
 source "drivers/android/Kconfig"
 
+source "drivers/nvdimm/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 9a02fb7c5106..b64b49f6e01b 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
 
 obj-$(CONFIG_PARPORT)		+= parport/
 obj-y				+= base/ block/ misc/ mfd/ nfc/
+obj-$(CONFIG_LIBNVDIMM)		+= nvdimm/
 obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
 obj-$(CONFIG_NUBUS)		+= nubus/
 obj-y				+= macintosh/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 35da507411a0..f15db002be8e 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -386,6 +386,32 @@ config ACPI_REDUCED_HARDWARE_ONLY
 
 	  If you are unsure what to do, do not enable this option.
 
+config ACPI_NFIT
+	tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
+	depends on PHYS_ADDR_T_64BIT
+	depends on BLK_DEV
+	select LIBNVDIMM
+	help
+	  Infrastructure to probe ACPI 6 compliant platforms for
+	  NVDIMMs (NFIT) and register a libnvdimm device tree.  In
+	  addition to storage devices this also enables libnvdimm to pass
+	  ACPI._DSM messages for platform/dimm configuration.
+
+	  To compile this driver as a module, choose M here:
+	  the module will be called nfit.
+
+config ACPI_NFIT_DEBUG
+	bool "NFIT DSM debug"
+	depends on ACPI_NFIT
+	depends on DYNAMIC_DEBUG
+	default n
+	help
+	  Enabling this option causes the nfit driver to dump the
+	  input and output buffers of _DSM operations on the ACPI0012
+	  device and its children.  This can be very verbose, so leave
+	  it disabled unless you are debugging a hardware / firmware
+	  issue.
+
 source "drivers/acpi/apei/Kconfig"
 
 config ACPI_EXTLOG
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 73d840bef455..8321430d7f24 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_ACPI_PCI_SLOT)	+= pci_slot.o
 obj-$(CONFIG_ACPI_PROCESSOR)	+= processor.o
 obj-y				+= container.o
 obj-$(CONFIG_ACPI_THERMAL)	+= thermal.o
+obj-$(CONFIG_ACPI_NFIT)		+= nfit.o
 obj-y				+= acpi_memhotplug.o
 obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
 obj-$(CONFIG_ACPI_BATTERY)	+= battery.o
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
new file mode 100644
index 000000000000..2161fa178c8d
--- /dev/null
+++ b/drivers/acpi/nfit.c
@@ -0,0 +1,1587 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/list_sort.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+#include <linux/list.h>
+#include <linux/acpi.h>
+#include <linux/sort.h>
+#include <linux/io.h>
+#include "nfit.h"
+
+/*
+ * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
+ * irrelevant.
+ */
+#include <asm-generic/io-64-nonatomic-hi-lo.h>
+
+static bool force_enable_dimms;
+module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
+
+static u8 nfit_uuid[NFIT_UUID_MAX][16];
+
+const u8 *to_nfit_uuid(enum nfit_uuids id)
+{
+	return nfit_uuid[id];
+}
+EXPORT_SYMBOL(to_nfit_uuid);
+
+static struct acpi_nfit_desc *to_acpi_nfit_desc(
+		struct nvdimm_bus_descriptor *nd_desc)
+{
+	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+
+	/*
+	 * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
+	 * acpi_device.
+	 */
+	if (!nd_desc->provider_name
+			|| strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
+		return NULL;
+
+	return to_acpi_device(acpi_desc->dev);
+}
+
+static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
+		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+		unsigned int buf_len)
+{
+	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+	const struct nd_cmd_desc *desc = NULL;
+	union acpi_object in_obj, in_buf, *out_obj;
+	struct device *dev = acpi_desc->dev;
+	const char *cmd_name, *dimm_name;
+	unsigned long dsm_mask;
+	acpi_handle handle;
+	const u8 *uuid;
+	u32 offset;
+	int rc, i;
+
+	if (nvdimm) {
+		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+		struct acpi_device *adev = nfit_mem->adev;
+
+		if (!adev)
+			return -ENOTTY;
+		dimm_name = nvdimm_name(nvdimm);
+		cmd_name = nvdimm_cmd_name(cmd);
+		dsm_mask = nfit_mem->dsm_mask;
+		desc = nd_cmd_dimm_desc(cmd);
+		uuid = to_nfit_uuid(NFIT_DEV_DIMM);
+		handle = adev->handle;
+	} else {
+		struct acpi_device *adev = to_acpi_dev(acpi_desc);
+
+		cmd_name = nvdimm_bus_cmd_name(cmd);
+		dsm_mask = nd_desc->dsm_mask;
+		desc = nd_cmd_bus_desc(cmd);
+		uuid = to_nfit_uuid(NFIT_DEV_BUS);
+		handle = adev->handle;
+		dimm_name = "bus";
+	}
+
+	if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
+		return -ENOTTY;
+
+	if (!test_bit(cmd, &dsm_mask))
+		return -ENOTTY;
+
+	in_obj.type = ACPI_TYPE_PACKAGE;
+	in_obj.package.count = 1;
+	in_obj.package.elements = &in_buf;
+	in_buf.type = ACPI_TYPE_BUFFER;
+	in_buf.buffer.pointer = buf;
+	in_buf.buffer.length = 0;
+
+	/* libnvdimm has already validated the input envelope */
+	for (i = 0; i < desc->in_num; i++)
+		in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
+				i, buf);
+
+	if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+		dev_dbg(dev, "%s:%s cmd: %s input length: %d\n", __func__,
+				dimm_name, cmd_name, in_buf.buffer.length);
+		print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
+				4, in_buf.buffer.pointer, min_t(u32, 128,
+					in_buf.buffer.length), true);
+	}
+
+	out_obj = acpi_evaluate_dsm(handle, uuid, 1, cmd, &in_obj);
+	if (!out_obj) {
+		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
+				cmd_name);
+		return -EINVAL;
+	}
+
+	if (out_obj->package.type != ACPI_TYPE_BUFFER) {
+		dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
+				__func__, dimm_name, cmd_name, out_obj->type);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+		dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
+				dimm_name, cmd_name, out_obj->buffer.length);
+		print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
+				4, out_obj->buffer.pointer, min_t(u32, 128,
+					out_obj->buffer.length), true);
+	}
+
+	for (i = 0, offset = 0; i < desc->out_num; i++) {
+		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
+				(u32 *) out_obj->buffer.pointer);
+
+		if (offset + out_size > out_obj->buffer.length) {
+			dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			break;
+		}
+
+		if (in_buf.buffer.length + offset + out_size > buf_len) {
+			dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			rc = -ENXIO;
+			goto out;
+		}
+		memcpy(buf + in_buf.buffer.length + offset,
+				out_obj->buffer.pointer + offset, out_size);
+		offset += out_size;
+	}
+	if (offset + in_buf.buffer.length < buf_len) {
+		if (i >= 1) {
+			/*
+			 * status valid, return the number of bytes left
+			 * unfilled in the output buffer
+			 */
+			rc = buf_len - offset - in_buf.buffer.length;
+		} else {
+			dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
+					__func__, dimm_name, cmd_name, buf_len,
+					offset);
+			rc = -ENXIO;
+		}
+	} else
+		rc = 0;
+
+ out:
+	ACPI_FREE(out_obj);
+
+	return rc;
+}
+
+static const char *spa_type_name(u16 type)
+{
+	static const char *to_name[] = {
+		[NFIT_SPA_VOLATILE] = "volatile",
+		[NFIT_SPA_PM] = "pmem",
+		[NFIT_SPA_DCR] = "dimm-control-region",
+		[NFIT_SPA_BDW] = "block-data-window",
+		[NFIT_SPA_VDISK] = "volatile-disk",
+		[NFIT_SPA_VCD] = "volatile-cd",
+		[NFIT_SPA_PDISK] = "persistent-disk",
+		[NFIT_SPA_PCD] = "persistent-cd",
+
+	};
+
+	if (type > NFIT_SPA_PCD)
+		return "unknown";
+
+	return to_name[type];
+}
+
+static int nfit_spa_type(struct acpi_nfit_system_address *spa)
+{
+	int i;
+
+	for (i = 0; i < NFIT_UUID_MAX; i++)
+		if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0)
+			return i;
+	return -1;
+}
+
+static bool add_spa(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	struct device *dev = acpi_desc->dev;
+	struct nfit_spa *nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa),
+			GFP_KERNEL);
+
+	if (!nfit_spa)
+		return false;
+	INIT_LIST_HEAD(&nfit_spa->list);
+	nfit_spa->spa = spa;
+	list_add_tail(&nfit_spa->list, &acpi_desc->spas);
+	dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
+			spa->range_index,
+			spa_type_name(nfit_spa_type(spa)));
+	return true;
+}
+
+static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_memory_map *memdev)
+{
+	struct device *dev = acpi_desc->dev;
+	struct nfit_memdev *nfit_memdev = devm_kzalloc(dev,
+			sizeof(*nfit_memdev), GFP_KERNEL);
+
+	if (!nfit_memdev)
+		return false;
+	INIT_LIST_HEAD(&nfit_memdev->list);
+	nfit_memdev->memdev = memdev;
+	list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
+	dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
+			__func__, memdev->device_handle, memdev->range_index,
+			memdev->region_index);
+	return true;
+}
+
+static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_control_region *dcr)
+{
+	struct device *dev = acpi_desc->dev;
+	struct nfit_dcr *nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr),
+			GFP_KERNEL);
+
+	if (!nfit_dcr)
+		return false;
+	INIT_LIST_HEAD(&nfit_dcr->list);
+	nfit_dcr->dcr = dcr;
+	list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
+	dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
+			dcr->region_index, dcr->windows);
+	return true;
+}
+
+static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_data_region *bdw)
+{
+	struct device *dev = acpi_desc->dev;
+	struct nfit_bdw *nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw),
+			GFP_KERNEL);
+
+	if (!nfit_bdw)
+		return false;
+	INIT_LIST_HEAD(&nfit_bdw->list);
+	nfit_bdw->bdw = bdw;
+	list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
+	dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
+			bdw->region_index, bdw->windows);
+	return true;
+}
+
+static bool add_idt(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_interleave *idt)
+{
+	struct device *dev = acpi_desc->dev;
+	struct nfit_idt *nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt),
+			GFP_KERNEL);
+
+	if (!nfit_idt)
+		return false;
+	INIT_LIST_HEAD(&nfit_idt->list);
+	nfit_idt->idt = idt;
+	list_add_tail(&nfit_idt->list, &acpi_desc->idts);
+	dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
+			idt->interleave_index, idt->line_count);
+	return true;
+}
+
+static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table,
+		const void *end)
+{
+	struct device *dev = acpi_desc->dev;
+	struct acpi_nfit_header *hdr;
+	void *err = ERR_PTR(-ENOMEM);
+
+	if (table >= end)
+		return NULL;
+
+	hdr = table;
+	switch (hdr->type) {
+	case ACPI_NFIT_TYPE_SYSTEM_ADDRESS:
+		if (!add_spa(acpi_desc, table))
+			return err;
+		break;
+	case ACPI_NFIT_TYPE_MEMORY_MAP:
+		if (!add_memdev(acpi_desc, table))
+			return err;
+		break;
+	case ACPI_NFIT_TYPE_CONTROL_REGION:
+		if (!add_dcr(acpi_desc, table))
+			return err;
+		break;
+	case ACPI_NFIT_TYPE_DATA_REGION:
+		if (!add_bdw(acpi_desc, table))
+			return err;
+		break;
+	case ACPI_NFIT_TYPE_INTERLEAVE:
+		if (!add_idt(acpi_desc, table))
+			return err;
+		break;
+	case ACPI_NFIT_TYPE_FLUSH_ADDRESS:
+		dev_dbg(dev, "%s: flush\n", __func__);
+		break;
+	case ACPI_NFIT_TYPE_SMBIOS:
+		dev_dbg(dev, "%s: smbios\n", __func__);
+		break;
+	default:
+		dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
+		break;
+	}
+
+	return table + hdr->length;
+}
+
+static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_mem *nfit_mem)
+{
+	u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+	u16 dcr = nfit_mem->dcr->region_index;
+	struct nfit_spa *nfit_spa;
+
+	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+		u16 range_index = nfit_spa->spa->range_index;
+		int type = nfit_spa_type(nfit_spa->spa);
+		struct nfit_memdev *nfit_memdev;
+
+		if (type != NFIT_SPA_BDW)
+			continue;
+
+		list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+			if (nfit_memdev->memdev->range_index != range_index)
+				continue;
+			if (nfit_memdev->memdev->device_handle != device_handle)
+				continue;
+			if (nfit_memdev->memdev->region_index != dcr)
+				continue;
+
+			nfit_mem->spa_bdw = nfit_spa->spa;
+			return;
+		}
+	}
+
+	dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n",
+			nfit_mem->spa_dcr->range_index);
+	nfit_mem->bdw = NULL;
+}
+
+static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
+{
+	u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
+	struct nfit_memdev *nfit_memdev;
+	struct nfit_dcr *nfit_dcr;
+	struct nfit_bdw *nfit_bdw;
+	struct nfit_idt *nfit_idt;
+	u16 idt_idx, range_index;
+
+	list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+		if (nfit_dcr->dcr->region_index != dcr)
+			continue;
+		nfit_mem->dcr = nfit_dcr->dcr;
+		break;
+	}
+
+	if (!nfit_mem->dcr) {
+		dev_dbg(acpi_desc->dev, "SPA %d missing:%s%s\n",
+				spa->range_index, __to_nfit_memdev(nfit_mem)
+				? "" : " MEMDEV", nfit_mem->dcr ? "" : " DCR");
+		return -ENODEV;
+	}
+
+	/*
+	 * We've found enough to create an nvdimm, optionally
+	 * find an associated BDW
+	 */
+	list_add(&nfit_mem->list, &acpi_desc->dimms);
+
+	list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
+		if (nfit_bdw->bdw->region_index != dcr)
+			continue;
+		nfit_mem->bdw = nfit_bdw->bdw;
+		break;
+	}
+
+	if (!nfit_mem->bdw)
+		return 0;
+
+	nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
+
+	if (!nfit_mem->spa_bdw)
+		return 0;
+
+	range_index = nfit_mem->spa_bdw->range_index;
+	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+		if (nfit_memdev->memdev->range_index != range_index ||
+				nfit_memdev->memdev->region_index != dcr)
+			continue;
+		nfit_mem->memdev_bdw = nfit_memdev->memdev;
+		idt_idx = nfit_memdev->memdev->interleave_index;
+		list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
+			if (nfit_idt->idt->interleave_index != idt_idx)
+				continue;
+			nfit_mem->idt_bdw = nfit_idt->idt;
+			break;
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	struct nfit_mem *nfit_mem, *found;
+	struct nfit_memdev *nfit_memdev;
+	int type = nfit_spa_type(spa);
+	u16 dcr;
+
+	switch (type) {
+	case NFIT_SPA_DCR:
+	case NFIT_SPA_PM:
+		break;
+	default:
+		return 0;
+	}
+
+	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+		int rc;
+
+		if (nfit_memdev->memdev->range_index != spa->range_index)
+			continue;
+		found = NULL;
+		dcr = nfit_memdev->memdev->region_index;
+		list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
+			if (__to_nfit_memdev(nfit_mem)->region_index == dcr) {
+				found = nfit_mem;
+				break;
+			}
+
+		if (found)
+			nfit_mem = found;
+		else {
+			nfit_mem = devm_kzalloc(acpi_desc->dev,
+					sizeof(*nfit_mem), GFP_KERNEL);
+			if (!nfit_mem)
+				return -ENOMEM;
+			INIT_LIST_HEAD(&nfit_mem->list);
+		}
+
+		if (type == NFIT_SPA_DCR) {
+			struct nfit_idt *nfit_idt;
+			u16 idt_idx;
+
+			/* multiple dimms may share a SPA when interleaved */
+			nfit_mem->spa_dcr = spa;
+			nfit_mem->memdev_dcr = nfit_memdev->memdev;
+			idt_idx = nfit_memdev->memdev->interleave_index;
+			list_for_each_entry(nfit_idt, &acpi_desc->idts, list) {
+				if (nfit_idt->idt->interleave_index != idt_idx)
+					continue;
+				nfit_mem->idt_dcr = nfit_idt->idt;
+				break;
+			}
+		} else {
+			/*
+			 * A single dimm may belong to multiple SPA-PM
+			 * ranges, record at least one in addition to
+			 * any SPA-DCR range.
+			 */
+			nfit_mem->memdev_pmem = nfit_memdev->memdev;
+		}
+
+		if (found)
+			continue;
+
+		rc = nfit_mem_add(acpi_desc, nfit_mem, spa);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
+{
+	struct nfit_mem *a = container_of(_a, typeof(*a), list);
+	struct nfit_mem *b = container_of(_b, typeof(*b), list);
+	u32 handleA, handleB;
+
+	handleA = __to_nfit_memdev(a)->device_handle;
+	handleB = __to_nfit_memdev(b)->device_handle;
+	if (handleA < handleB)
+		return -1;
+	else if (handleA > handleB)
+		return 1;
+	return 0;
+}
+
+static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nfit_spa *nfit_spa;
+
+	/*
+	 * For each SPA-DCR or SPA-PMEM address range find its
+	 * corresponding MEMDEV(s).  From each MEMDEV find the
+	 * corresponding DCR.  Then, if we're operating on a SPA-DCR,
+	 * try to find a SPA-BDW and a corresponding BDW that references
+	 * the DCR.  Throw it all into an nfit_mem object.  Note, that
+	 * BDWs are optional.
+	 */
+	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+		int rc;
+
+		rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
+		if (rc)
+			return rc;
+	}
+
+	list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
+
+	return 0;
+}
+
+static ssize_t revision_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+	return sprintf(buf, "%d\n", acpi_desc->nfit->header.revision);
+}
+static DEVICE_ATTR_RO(revision);
+
+static struct attribute *acpi_nfit_attributes[] = {
+	&dev_attr_revision.attr,
+	NULL,
+};
+
+static struct attribute_group acpi_nfit_attribute_group = {
+	.name = "nfit",
+	.attrs = acpi_nfit_attributes,
+};
+
+const struct attribute_group *acpi_nfit_attribute_groups[] = {
+	&nvdimm_bus_attribute_group,
+	&acpi_nfit_attribute_group,
+	NULL,
+};
+EXPORT_SYMBOL_GPL(acpi_nfit_attribute_groups);
+
+static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+	return __to_nfit_memdev(nfit_mem);
+}
+
+static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+
+	return nfit_mem->dcr;
+}
+
+static ssize_t handle_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
+
+	return sprintf(buf, "%#x\n", memdev->device_handle);
+}
+static DEVICE_ATTR_RO(handle);
+
+static ssize_t phys_id_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev);
+
+	return sprintf(buf, "%#x\n", memdev->physical_id);
+}
+static DEVICE_ATTR_RO(phys_id);
+
+static ssize_t vendor_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+	return sprintf(buf, "%#x\n", dcr->vendor_id);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t rev_id_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+	return sprintf(buf, "%#x\n", dcr->revision_id);
+}
+static DEVICE_ATTR_RO(rev_id);
+
+static ssize_t device_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+	return sprintf(buf, "%#x\n", dcr->device_id);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t format_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+	return sprintf(buf, "%#x\n", dcr->code);
+}
+static DEVICE_ATTR_RO(format);
+
+static ssize_t serial_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
+
+	return sprintf(buf, "%#x\n", dcr->serial_number);
+}
+static DEVICE_ATTR_RO(serial);
+
+static ssize_t flags_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	u16 flags = to_nfit_memdev(dev)->flags;
+
+	return sprintf(buf, "%s%s%s%s%s\n",
+			flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "",
+			flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "",
+			flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "",
+			flags & ACPI_NFIT_MEM_ARMED ? "arm " : "",
+			flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart " : "");
+}
+static DEVICE_ATTR_RO(flags);
+
+static struct attribute *acpi_nfit_dimm_attributes[] = {
+	&dev_attr_handle.attr,
+	&dev_attr_phys_id.attr,
+	&dev_attr_vendor.attr,
+	&dev_attr_device.attr,
+	&dev_attr_format.attr,
+	&dev_attr_serial.attr,
+	&dev_attr_rev_id.attr,
+	&dev_attr_flags.attr,
+	NULL,
+};
+
+static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
+		struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+
+	if (to_nfit_dcr(dev))
+		return a->mode;
+	else
+		return 0;
+}
+
+static struct attribute_group acpi_nfit_dimm_attribute_group = {
+	.name = "nfit",
+	.attrs = acpi_nfit_dimm_attributes,
+	.is_visible = acpi_nfit_dimm_attr_visible,
+};
+
+static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
+	&nvdimm_attribute_group,
+	&nd_device_attribute_group,
+	&acpi_nfit_dimm_attribute_group,
+	NULL,
+};
+
+static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
+		u32 device_handle)
+{
+	struct nfit_mem *nfit_mem;
+
+	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
+		if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle)
+			return nfit_mem->nvdimm;
+
+	return NULL;
+}
+
+static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_mem *nfit_mem, u32 device_handle)
+{
+	struct acpi_device *adev, *adev_dimm;
+	struct device *dev = acpi_desc->dev;
+	const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM);
+	unsigned long long sta;
+	int i, rc = -ENODEV;
+	acpi_status status;
+
+	nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en;
+	adev = to_acpi_dev(acpi_desc);
+	if (!adev)
+		return 0;
+
+	adev_dimm = acpi_find_child_device(adev, device_handle, false);
+	nfit_mem->adev = adev_dimm;
+	if (!adev_dimm) {
+		dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
+				device_handle);
+		return force_enable_dimms ? 0 : -ENODEV;
+	}
+
+	status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta);
+	if (status == AE_NOT_FOUND) {
+		dev_dbg(dev, "%s missing _STA, assuming enabled...\n",
+				dev_name(&adev_dimm->dev));
+		rc = 0;
+	} else if (ACPI_FAILURE(status))
+		dev_err(dev, "%s failed to retrieve_STA, disabling...\n",
+				dev_name(&adev_dimm->dev));
+	else if ((sta & ACPI_STA_DEVICE_ENABLED) == 0)
+		dev_info(dev, "%s disabled by firmware\n",
+				dev_name(&adev_dimm->dev));
+	else
+		rc = 0;
+
+	for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++)
+		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+			set_bit(i, &nfit_mem->dsm_mask);
+
+	return force_enable_dimms ? 0 : rc;
+}
+
+static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nfit_mem *nfit_mem;
+	int dimm_count = 0;
+
+	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+		struct nvdimm *nvdimm;
+		unsigned long flags = 0;
+		u32 device_handle;
+		u16 mem_flags;
+		int rc;
+
+		device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+		nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
+		if (nvdimm) {
+			/*
+			 * If for some reason we find multiple DCRs the
+			 * first one wins
+			 */
+			dev_err(acpi_desc->dev, "duplicate DCR detected: %s\n",
+					nvdimm_name(nvdimm));
+			continue;
+		}
+
+		if (nfit_mem->bdw && nfit_mem->memdev_pmem)
+			flags |= NDD_ALIASING;
+
+		mem_flags = __to_nfit_memdev(nfit_mem)->flags;
+		if (mem_flags & ACPI_NFIT_MEM_ARMED)
+			flags |= NDD_UNARMED;
+
+		rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
+		if (rc)
+			continue;
+
+		nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
+				acpi_nfit_dimm_attribute_groups,
+				flags, &nfit_mem->dsm_mask);
+		if (!nvdimm)
+			return -ENOMEM;
+
+		nfit_mem->nvdimm = nvdimm;
+		dimm_count++;
+
+		if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
+			continue;
+
+		dev_info(acpi_desc->dev, "%s: failed: %s%s%s%s\n",
+				nvdimm_name(nvdimm),
+			mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "",
+			mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "",
+			mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "",
+			mem_flags & ACPI_NFIT_MEM_ARMED ? "arm " : "");
+
+	}
+
+	return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
+}
+
+static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+	const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+	struct acpi_device *adev;
+	int i;
+
+	adev = to_acpi_dev(acpi_desc);
+	if (!adev)
+		return;
+
+	for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++)
+		if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+			set_bit(i, &nd_desc->dsm_mask);
+}
+
+static ssize_t range_index_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region);
+
+	return sprintf(buf, "%d\n", nfit_spa->spa->range_index);
+}
+static DEVICE_ATTR_RO(range_index);
+
+static struct attribute *acpi_nfit_region_attributes[] = {
+	&dev_attr_range_index.attr,
+	NULL,
+};
+
+static struct attribute_group acpi_nfit_region_attribute_group = {
+	.name = "nfit",
+	.attrs = acpi_nfit_region_attributes,
+};
+
+static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
+	&nd_region_attribute_group,
+	&nd_mapping_attribute_group,
+	&nd_device_attribute_group,
+	&nd_numa_attribute_group,
+	&acpi_nfit_region_attribute_group,
+	NULL,
+};
+
+/* enough info to uniquely specify an interleave set */
+struct nfit_set_info {
+	struct nfit_set_info_map {
+		u64 region_offset;
+		u32 serial_number;
+		u32 pad;
+	} mapping[0];
+};
+
+static size_t sizeof_nfit_set_info(int num_mappings)
+{
+	return sizeof(struct nfit_set_info)
+		+ num_mappings * sizeof(struct nfit_set_info_map);
+}
+
+static int cmp_map(const void *m0, const void *m1)
+{
+	const struct nfit_set_info_map *map0 = m0;
+	const struct nfit_set_info_map *map1 = m1;
+
+	return memcmp(&map0->region_offset, &map1->region_offset,
+			sizeof(u64));
+}
+
+/* Retrieve the nth entry referencing this spa */
+static struct acpi_nfit_memory_map *memdev_from_spa(
+		struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
+{
+	struct nfit_memdev *nfit_memdev;
+
+	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list)
+		if (nfit_memdev->memdev->range_index == range_index)
+			if (n-- == 0)
+				return nfit_memdev->memdev;
+	return NULL;
+}
+
+static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
+		struct nd_region_desc *ndr_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	int i, spa_type = nfit_spa_type(spa);
+	struct device *dev = acpi_desc->dev;
+	struct nd_interleave_set *nd_set;
+	u16 nr = ndr_desc->num_mappings;
+	struct nfit_set_info *info;
+
+	if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
+		/* pass */;
+	else
+		return 0;
+
+	nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
+	if (!nd_set)
+		return -ENOMEM;
+
+	info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	for (i = 0; i < nr; i++) {
+		struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
+		struct nfit_set_info_map *map = &info->mapping[i];
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+		struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
+				spa->range_index, i);
+
+		if (!memdev || !nfit_mem->dcr) {
+			dev_err(dev, "%s: failed to find DCR\n", __func__);
+			return -ENODEV;
+		}
+
+		map->region_offset = memdev->region_offset;
+		map->serial_number = nfit_mem->dcr->serial_number;
+	}
+
+	sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
+			cmp_map, NULL);
+	nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
+	ndr_desc->nd_set = nd_set;
+	devm_kfree(dev, info);
+
+	return 0;
+}
+
+static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio)
+{
+	struct acpi_nfit_interleave *idt = mmio->idt;
+	u32 sub_line_offset, line_index, line_offset;
+	u64 line_no, table_skip_count, table_offset;
+
+	line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset);
+	table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index);
+	line_offset = idt->line_offset[line_index]
+		* mmio->line_size;
+	table_offset = table_skip_count * mmio->table_size;
+
+	return mmio->base_offset + line_offset + table_offset + sub_line_offset;
+}
+
+static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
+{
+	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
+	u64 offset = nfit_blk->stat_offset + mmio->size * bw;
+
+	if (mmio->num_lines)
+		offset = to_interleave_offset(offset, mmio);
+
+	return readq(mmio->base + offset);
+}
+
+static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
+		resource_size_t dpa, unsigned int len, unsigned int write)
+{
+	u64 cmd, offset;
+	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
+
+	enum {
+		BCW_OFFSET_MASK = (1ULL << 48)-1,
+		BCW_LEN_SHIFT = 48,
+		BCW_LEN_MASK = (1ULL << 8) - 1,
+		BCW_CMD_SHIFT = 56,
+	};
+
+	cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK;
+	len = len >> L1_CACHE_SHIFT;
+	cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT;
+	cmd |= ((u64) write) << BCW_CMD_SHIFT;
+
+	offset = nfit_blk->cmd_offset + mmio->size * bw;
+	if (mmio->num_lines)
+		offset = to_interleave_offset(offset, mmio);
+
+	writeq(cmd, mmio->base + offset);
+	/* FIXME: conditionally perform read-back if mandated by firmware */
+}
+
+static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
+		resource_size_t dpa, void *iobuf, size_t len, int rw,
+		unsigned int lane)
+{
+	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+	unsigned int copied = 0;
+	u64 base_offset;
+	int rc;
+
+	base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES
+		+ lane * mmio->size;
+	/* TODO: non-temporal access, flush hints, cache management etc... */
+	write_blk_ctl(nfit_blk, lane, dpa, len, rw);
+	while (len) {
+		unsigned int c;
+		u64 offset;
+
+		if (mmio->num_lines) {
+			u32 line_offset;
+
+			offset = to_interleave_offset(base_offset + copied,
+					mmio);
+			div_u64_rem(offset, mmio->line_size, &line_offset);
+			c = min_t(size_t, len, mmio->line_size - line_offset);
+		} else {
+			offset = base_offset + nfit_blk->bdw_offset;
+			c = len;
+		}
+
+		if (rw)
+			memcpy(mmio->aperture + offset, iobuf + copied, c);
+		else
+			memcpy(iobuf + copied, mmio->aperture + offset, c);
+
+		copied += c;
+		len -= c;
+	}
+	rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0;
+	return rc;
+}
+
+static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr,
+		resource_size_t dpa, void *iobuf, u64 len, int rw)
+{
+	struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
+	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+	struct nd_region *nd_region = nfit_blk->nd_region;
+	unsigned int lane, copied = 0;
+	int rc = 0;
+
+	lane = nd_region_acquire_lane(nd_region);
+	while (len) {
+		u64 c = min(len, mmio->size);
+
+		rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied,
+				iobuf + copied, c, rw, lane);
+		if (rc)
+			break;
+
+		copied += c;
+		len -= c;
+	}
+	nd_region_release_lane(nd_region, lane);
+
+	return rc;
+}
+
+static void nfit_spa_mapping_release(struct kref *kref)
+{
+	struct nfit_spa_mapping *spa_map = to_spa_map(kref);
+	struct acpi_nfit_system_address *spa = spa_map->spa;
+	struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc;
+
+	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
+	dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
+	iounmap(spa_map->iomem);
+	release_mem_region(spa->address, spa->length);
+	list_del(&spa_map->list);
+	kfree(spa_map);
+}
+
+static struct nfit_spa_mapping *find_spa_mapping(
+		struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	struct nfit_spa_mapping *spa_map;
+
+	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
+	list_for_each_entry(spa_map, &acpi_desc->spa_maps, list)
+		if (spa_map->spa == spa)
+			return spa_map;
+
+	return NULL;
+}
+
+static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	struct nfit_spa_mapping *spa_map;
+
+	mutex_lock(&acpi_desc->spa_map_mutex);
+	spa_map = find_spa_mapping(acpi_desc, spa);
+
+	if (spa_map)
+		kref_put(&spa_map->kref, nfit_spa_mapping_release);
+	mutex_unlock(&acpi_desc->spa_map_mutex);
+}
+
+static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	resource_size_t start = spa->address;
+	resource_size_t n = spa->length;
+	struct nfit_spa_mapping *spa_map;
+	struct resource *res;
+
+	WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
+
+	spa_map = find_spa_mapping(acpi_desc, spa);
+	if (spa_map) {
+		kref_get(&spa_map->kref);
+		return spa_map->iomem;
+	}
+
+	spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
+	if (!spa_map)
+		return NULL;
+
+	INIT_LIST_HEAD(&spa_map->list);
+	spa_map->spa = spa;
+	kref_init(&spa_map->kref);
+	spa_map->acpi_desc = acpi_desc;
+
+	res = request_mem_region(start, n, dev_name(acpi_desc->dev));
+	if (!res)
+		goto err_mem;
+
+	/* TODO: cacheability based on the spa type */
+	spa_map->iomem = ioremap_nocache(start, n);
+	if (!spa_map->iomem)
+		goto err_map;
+
+	list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
+	return spa_map->iomem;
+
+ err_map:
+	release_mem_region(start, n);
+ err_mem:
+	kfree(spa_map);
+	return NULL;
+}
+
+/**
+ * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges
+ * @nvdimm_bus: NFIT-bus that provided the spa table entry
+ * @nfit_spa: spa table to map
+ *
+ * In the case where block-data-window apertures and
+ * dimm-control-regions are interleaved they will end up sharing a
+ * single request_mem_region() + ioremap() for the address range.  In
+ * the style of devm nfit_spa_map() mappings are automatically dropped
+ * when all region devices referencing the same mapping are disabled /
+ * unbound.
+ */
+static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
+		struct acpi_nfit_system_address *spa)
+{
+	void __iomem *iomem;
+
+	mutex_lock(&acpi_desc->spa_map_mutex);
+	iomem = __nfit_spa_map(acpi_desc, spa);
+	mutex_unlock(&acpi_desc->spa_map_mutex);
+
+	return iomem;
+}
+
+static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio,
+		struct acpi_nfit_interleave *idt, u16 interleave_ways)
+{
+	if (idt) {
+		mmio->num_lines = idt->line_count;
+		mmio->line_size = idt->line_size;
+		if (interleave_ways == 0)
+			return -ENXIO;
+		mmio->table_size = mmio->num_lines * interleave_ways
+			* mmio->line_size;
+	}
+
+	return 0;
+}
+
+static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
+		struct device *dev)
+{
+	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+	struct nd_blk_region *ndbr = to_nd_blk_region(dev);
+	struct nfit_blk_mmio *mmio;
+	struct nfit_blk *nfit_blk;
+	struct nfit_mem *nfit_mem;
+	struct nvdimm *nvdimm;
+	int rc;
+
+	nvdimm = nd_blk_region_to_dimm(ndbr);
+	nfit_mem = nvdimm_provider_data(nvdimm);
+	if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
+		dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
+				nfit_mem ? "" : " nfit_mem",
+				nfit_mem->dcr ? "" : " dcr",
+				nfit_mem->bdw ? "" : " bdw");
+		return -ENXIO;
+	}
+
+	nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL);
+	if (!nfit_blk)
+		return -ENOMEM;
+	nd_blk_region_set_provider_data(ndbr, nfit_blk);
+	nfit_blk->nd_region = to_nd_region(dev);
+
+	/* map block aperture memory */
+	nfit_blk->bdw_offset = nfit_mem->bdw->offset;
+	mmio = &nfit_blk->mmio[BDW];
+	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw);
+	if (!mmio->base) {
+		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
+				nvdimm_name(nvdimm));
+		return -ENOMEM;
+	}
+	mmio->size = nfit_mem->bdw->size;
+	mmio->base_offset = nfit_mem->memdev_bdw->region_offset;
+	mmio->idt = nfit_mem->idt_bdw;
+	mmio->spa = nfit_mem->spa_bdw;
+	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
+			nfit_mem->memdev_bdw->interleave_ways);
+	if (rc) {
+		dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
+				__func__, nvdimm_name(nvdimm));
+		return rc;
+	}
+
+	/* map block control memory */
+	nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
+	nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
+	mmio = &nfit_blk->mmio[DCR];
+	mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr);
+	if (!mmio->base) {
+		dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
+				nvdimm_name(nvdimm));
+		return -ENOMEM;
+	}
+	mmio->size = nfit_mem->dcr->window_size;
+	mmio->base_offset = nfit_mem->memdev_dcr->region_offset;
+	mmio->idt = nfit_mem->idt_dcr;
+	mmio->spa = nfit_mem->spa_dcr;
+	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
+			nfit_mem->memdev_dcr->interleave_ways);
+	if (rc) {
+		dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
+				__func__, nvdimm_name(nvdimm));
+		return rc;
+	}
+
+	if (mmio->line_size == 0)
+		return 0;
+
+	if ((u32) nfit_blk->cmd_offset % mmio->line_size
+			+ 8 > mmio->line_size) {
+		dev_dbg(dev, "cmd_offset crosses interleave boundary\n");
+		return -ENXIO;
+	} else if ((u32) nfit_blk->stat_offset % mmio->line_size
+			+ 8 > mmio->line_size) {
+		dev_dbg(dev, "stat_offset crosses interleave boundary\n");
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
+		struct device *dev)
+{
+	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+	struct nd_blk_region *ndbr = to_nd_blk_region(dev);
+	struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr);
+	int i;
+
+	if (!nfit_blk)
+		return; /* never enabled */
+
+	/* auto-free BLK spa mappings */
+	for (i = 0; i < 2; i++) {
+		struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
+
+		if (mmio->base)
+			nfit_spa_unmap(acpi_desc, mmio->spa);
+	}
+	nd_blk_region_set_provider_data(ndbr, NULL);
+	/* devm will free nfit_blk */
+}
+
+static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
+		struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
+		struct acpi_nfit_memory_map *memdev,
+		struct acpi_nfit_system_address *spa)
+{
+	struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
+			memdev->device_handle);
+	struct nd_blk_region_desc *ndbr_desc;
+	struct nfit_mem *nfit_mem;
+	int blk_valid = 0;
+
+	if (!nvdimm) {
+		dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
+				spa->range_index, memdev->device_handle);
+		return -ENODEV;
+	}
+
+	nd_mapping->nvdimm = nvdimm;
+	switch (nfit_spa_type(spa)) {
+	case NFIT_SPA_PM:
+	case NFIT_SPA_VOLATILE:
+		nd_mapping->start = memdev->address;
+		nd_mapping->size = memdev->region_size;
+		break;
+	case NFIT_SPA_DCR:
+		nfit_mem = nvdimm_provider_data(nvdimm);
+		if (!nfit_mem || !nfit_mem->bdw) {
+			dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
+					spa->range_index, nvdimm_name(nvdimm));
+		} else {
+			nd_mapping->size = nfit_mem->bdw->capacity;
+			nd_mapping->start = nfit_mem->bdw->start_address;
+			ndr_desc->num_lanes = nfit_mem->bdw->windows;
+			blk_valid = 1;
+		}
+
+		ndr_desc->nd_mapping = nd_mapping;
+		ndr_desc->num_mappings = blk_valid;
+		ndbr_desc = to_blk_region_desc(ndr_desc);
+		ndbr_desc->enable = acpi_nfit_blk_region_enable;
+		ndbr_desc->disable = acpi_nfit_blk_region_disable;
+		ndbr_desc->do_io = acpi_desc->blk_do_io;
+		if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc))
+			return -ENOMEM;
+		break;
+	}
+
+	return 0;
+}
+
+static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_spa *nfit_spa)
+{
+	static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
+	struct acpi_nfit_system_address *spa = nfit_spa->spa;
+	struct nd_blk_region_desc ndbr_desc;
+	struct nd_region_desc *ndr_desc;
+	struct nfit_memdev *nfit_memdev;
+	struct nvdimm_bus *nvdimm_bus;
+	struct resource res;
+	int count = 0, rc;
+
+	if (spa->range_index == 0) {
+		dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
+				__func__);
+		return 0;
+	}
+
+	memset(&res, 0, sizeof(res));
+	memset(&nd_mappings, 0, sizeof(nd_mappings));
+	memset(&ndbr_desc, 0, sizeof(ndbr_desc));
+	res.start = spa->address;
+	res.end = res.start + spa->length - 1;
+	ndr_desc = &ndbr_desc.ndr_desc;
+	ndr_desc->res = &res;
+	ndr_desc->provider_data = nfit_spa;
+	ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
+	if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
+		ndr_desc->numa_node = acpi_map_pxm_to_online_node(
+						spa->proximity_domain);
+	else
+		ndr_desc->numa_node = NUMA_NO_NODE;
+
+	list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
+		struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
+		struct nd_mapping *nd_mapping;
+
+		if (memdev->range_index != spa->range_index)
+			continue;
+		if (count >= ND_MAX_MAPPINGS) {
+			dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n",
+					spa->range_index, ND_MAX_MAPPINGS);
+			return -ENXIO;
+		}
+		nd_mapping = &nd_mappings[count++];
+		rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
+				memdev, spa);
+		if (rc)
+			return rc;
+	}
+
+	ndr_desc->nd_mapping = nd_mappings;
+	ndr_desc->num_mappings = count;
+	rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
+	if (rc)
+		return rc;
+
+	nvdimm_bus = acpi_desc->nvdimm_bus;
+	if (nfit_spa_type(spa) == NFIT_SPA_PM) {
+		if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc))
+			return -ENOMEM;
+	} else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
+		if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc))
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nfit_spa *nfit_spa;
+
+	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+		int rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
+
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
+{
+	struct device *dev = acpi_desc->dev;
+	const void *end;
+	u8 *data;
+	int rc;
+
+	INIT_LIST_HEAD(&acpi_desc->spa_maps);
+	INIT_LIST_HEAD(&acpi_desc->spas);
+	INIT_LIST_HEAD(&acpi_desc->dcrs);
+	INIT_LIST_HEAD(&acpi_desc->bdws);
+	INIT_LIST_HEAD(&acpi_desc->idts);
+	INIT_LIST_HEAD(&acpi_desc->memdevs);
+	INIT_LIST_HEAD(&acpi_desc->dimms);
+	mutex_init(&acpi_desc->spa_map_mutex);
+
+	data = (u8 *) acpi_desc->nfit;
+	end = data + sz;
+	data += sizeof(struct acpi_table_nfit);
+	while (!IS_ERR_OR_NULL(data))
+		data = add_table(acpi_desc, data, end);
+
+	if (IS_ERR(data)) {
+		dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
+				PTR_ERR(data));
+		return PTR_ERR(data);
+	}
+
+	if (nfit_mem_init(acpi_desc) != 0)
+		return -ENOMEM;
+
+	acpi_nfit_init_dsms(acpi_desc);
+
+	rc = acpi_nfit_register_dimms(acpi_desc);
+	if (rc)
+		return rc;
+
+	return acpi_nfit_register_regions(acpi_desc);
+}
+EXPORT_SYMBOL_GPL(acpi_nfit_init);
+
+static int acpi_nfit_add(struct acpi_device *adev)
+{
+	struct nvdimm_bus_descriptor *nd_desc;
+	struct acpi_nfit_desc *acpi_desc;
+	struct device *dev = &adev->dev;
+	struct acpi_table_header *tbl;
+	acpi_status status = AE_OK;
+	acpi_size sz;
+	int rc;
+
+	status = acpi_get_table_with_size("NFIT", 0, &tbl, &sz);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "failed to find NFIT\n");
+		return -ENXIO;
+	}
+
+	acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+	if (!acpi_desc)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, acpi_desc);
+	acpi_desc->dev = dev;
+	acpi_desc->nfit = (struct acpi_table_nfit *) tbl;
+	acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io;
+	nd_desc = &acpi_desc->nd_desc;
+	nd_desc->provider_name = "ACPI.NFIT";
+	nd_desc->ndctl = acpi_nfit_ctl;
+	nd_desc->attr_groups = acpi_nfit_attribute_groups;
+
+	acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc);
+	if (!acpi_desc->nvdimm_bus)
+		return -ENXIO;
+
+	rc = acpi_nfit_init(acpi_desc, sz);
+	if (rc) {
+		nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+		return rc;
+	}
+	return 0;
+}
+
+static int acpi_nfit_remove(struct acpi_device *adev)
+{
+	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
+
+	nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+	return 0;
+}
+
+static const struct acpi_device_id acpi_nfit_ids[] = {
+	{ "ACPI0012", 0 },
+	{ "", 0 },
+};
+MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids);
+
+static struct acpi_driver acpi_nfit_driver = {
+	.name = KBUILD_MODNAME,
+	.ids = acpi_nfit_ids,
+	.ops = {
+		.add = acpi_nfit_add,
+		.remove = acpi_nfit_remove,
+	},
+};
+
+static __init int nfit_init(void)
+{
+	BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
+	BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
+
+	acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]);
+	acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]);
+	acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]);
+	acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]);
+	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]);
+	acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]);
+	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]);
+	acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]);
+	acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
+	acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
+
+	return acpi_bus_register_driver(&acpi_nfit_driver);
+}
+
+static __exit void nfit_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_nfit_driver);
+}
+
+module_init(nfit_init);
+module_exit(nfit_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
new file mode 100644
index 000000000000..81f2e8c5a79c
--- /dev/null
+++ b/drivers/acpi/nfit.h
@@ -0,0 +1,158 @@
+/*
+ * NVDIMM Firmware Interface Table - NFIT
+ *
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __NFIT_H__
+#define __NFIT_H__
+#include <linux/libnvdimm.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <linux/acpi.h>
+#include <acpi/acuuid.h>
+
+#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba"
+#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66"
+#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
+		| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
+		| ACPI_NFIT_MEM_ARMED)
+
+enum nfit_uuids {
+	NFIT_SPA_VOLATILE,
+	NFIT_SPA_PM,
+	NFIT_SPA_DCR,
+	NFIT_SPA_BDW,
+	NFIT_SPA_VDISK,
+	NFIT_SPA_VCD,
+	NFIT_SPA_PDISK,
+	NFIT_SPA_PCD,
+	NFIT_DEV_BUS,
+	NFIT_DEV_DIMM,
+	NFIT_UUID_MAX,
+};
+
+struct nfit_spa {
+	struct acpi_nfit_system_address *spa;
+	struct list_head list;
+};
+
+struct nfit_dcr {
+	struct acpi_nfit_control_region *dcr;
+	struct list_head list;
+};
+
+struct nfit_bdw {
+	struct acpi_nfit_data_region *bdw;
+	struct list_head list;
+};
+
+struct nfit_idt {
+	struct acpi_nfit_interleave *idt;
+	struct list_head list;
+};
+
+struct nfit_memdev {
+	struct acpi_nfit_memory_map *memdev;
+	struct list_head list;
+};
+
+/* assembled tables for a given dimm/memory-device */
+struct nfit_mem {
+	struct nvdimm *nvdimm;
+	struct acpi_nfit_memory_map *memdev_dcr;
+	struct acpi_nfit_memory_map *memdev_pmem;
+	struct acpi_nfit_memory_map *memdev_bdw;
+	struct acpi_nfit_control_region *dcr;
+	struct acpi_nfit_data_region *bdw;
+	struct acpi_nfit_system_address *spa_dcr;
+	struct acpi_nfit_system_address *spa_bdw;
+	struct acpi_nfit_interleave *idt_dcr;
+	struct acpi_nfit_interleave *idt_bdw;
+	struct list_head list;
+	struct acpi_device *adev;
+	unsigned long dsm_mask;
+};
+
+struct acpi_nfit_desc {
+	struct nvdimm_bus_descriptor nd_desc;
+	struct acpi_table_nfit *nfit;
+	struct mutex spa_map_mutex;
+	struct list_head spa_maps;
+	struct list_head memdevs;
+	struct list_head dimms;
+	struct list_head spas;
+	struct list_head dcrs;
+	struct list_head bdws;
+	struct list_head idts;
+	struct nvdimm_bus *nvdimm_bus;
+	struct device *dev;
+	unsigned long dimm_dsm_force_en;
+	int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
+			void *iobuf, u64 len, int rw);
+};
+
+enum nd_blk_mmio_selector {
+	BDW,
+	DCR,
+};
+
+struct nfit_blk {
+	struct nfit_blk_mmio {
+		union {
+			void __iomem *base;
+			void *aperture;
+		};
+		u64 size;
+		u64 base_offset;
+		u32 line_size;
+		u32 num_lines;
+		u32 table_size;
+		struct acpi_nfit_interleave *idt;
+		struct acpi_nfit_system_address *spa;
+	} mmio[2];
+	struct nd_region *nd_region;
+	u64 bdw_offset; /* post interleave offset */
+	u64 stat_offset;
+	u64 cmd_offset;
+};
+
+struct nfit_spa_mapping {
+	struct acpi_nfit_desc *acpi_desc;
+	struct acpi_nfit_system_address *spa;
+	struct list_head list;
+	struct kref kref;
+	void __iomem *iomem;
+};
+
+static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
+{
+	return container_of(kref, struct nfit_spa_mapping, kref);
+}
+
+static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
+		struct nfit_mem *nfit_mem)
+{
+	if (nfit_mem->memdev_dcr)
+		return nfit_mem->memdev_dcr;
+	return nfit_mem->memdev_pmem;
+}
+
+static inline struct acpi_nfit_desc *to_acpi_desc(
+		struct nvdimm_bus_descriptor *nd_desc)
+{
+	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+const u8 *to_nfit_uuid(enum nfit_uuids id);
+int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz);
+extern const struct attribute_group *acpi_nfit_attribute_groups[];
+#endif /* __NFIT_H__ */
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 1333cbdc3ea2..acaa3b4ea504 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -29,6 +29,8 @@
 #include <linux/errno.h>
 #include <linux/acpi.h>
 #include <linux/numa.h>
+#include <linux/nodemask.h>
+#include <linux/topology.h>
 
 #define PREFIX "ACPI: "
 
@@ -70,7 +72,12 @@ static void __acpi_map_pxm_to_node(int pxm, int node)
 
 int acpi_map_pxm_to_node(int pxm)
 {
-	int node = pxm_to_node_map[pxm];
+	int node;
+
+	if (pxm < 0 || pxm >= MAX_PXM_DOMAINS)
+		return NUMA_NO_NODE;
+
+	node = pxm_to_node_map[pxm];
 
 	if (node == NUMA_NO_NODE) {
 		if (nodes_weight(nodes_found_map) >= MAX_NUMNODES)
@@ -83,6 +90,45 @@ int acpi_map_pxm_to_node(int pxm)
 	return node;
 }
 
+/**
+ * acpi_map_pxm_to_online_node - Map proximity ID to online node
+ * @pxm: ACPI proximity ID
+ *
+ * This is similar to acpi_map_pxm_to_node(), but always returns an online
+ * node.  When the mapped node from a given proximity ID is offline, it
+ * looks up the node distance table and returns the nearest online node.
+ *
+ * ACPI device drivers, which are called after the NUMA initialization has
+ * completed in the kernel, can call this interface to obtain their device
+ * NUMA topology from ACPI tables.  Such drivers do not have to deal with
+ * offline nodes.  A node may be offline when a device proximity ID is
+ * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
+ * "numa=off" on x86.
+ */
+int acpi_map_pxm_to_online_node(int pxm)
+{
+	int node, n, dist, min_dist;
+
+	node = acpi_map_pxm_to_node(pxm);
+
+	if (node == NUMA_NO_NODE)
+		node = 0;
+
+	if (!node_online(node)) {
+		min_dist = INT_MAX;
+		for_each_online_node(n) {
+			dist = node_distance(node, n);
+			if (dist < min_dist) {
+				min_dist = dist;
+				node = n;
+			}
+		}
+	}
+
+	return node;
+}
+EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
+
 static void __init
 acpi_table_print_srat_entry(struct acpi_subtable_header *header)
 {
@@ -328,8 +374,6 @@ int acpi_get_node(acpi_handle handle)
 	int pxm;
 
 	pxm = acpi_get_pxm(handle);
-	if (pxm < 0 || pxm >= MAX_PXM_DOMAINS)
-		return NUMA_NO_NODE;
 
 	return acpi_map_pxm_to_node(pxm);
 }
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 3ccef9eba6f9..1b8094d4d7af 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -404,18 +404,6 @@ config BLK_DEV_RAM_DAX
 	  and will prevent RAM block device backing store memory from being
 	  allocated from highmem (only a problem for highmem systems).
 
-config BLK_DEV_PMEM
-	tristate "Persistent memory block device support"
-	depends on HAS_IOMEM
-	help
-	  Saying Y here will allow you to use a contiguous range of reserved
-	  memory as one or more persistent block devices.
-
-	  To compile this driver as a module, choose M here: the module will be
-	  called 'pmem'.
-
-	  If unsure, say N.
-
 config CDROM_PKTCDVD
 	tristate "Packet writing on CD/DVD media"
 	depends on !UML
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 9cc6c18a1c7e..02b688d1438d 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_PS3_VRAM)		+= ps3vram.o
 obj-$(CONFIG_ATARI_FLOPPY)	+= ataflop.o
 obj-$(CONFIG_AMIGA_Z2RAM)	+= z2ram.o
 obj-$(CONFIG_BLK_DEV_RAM)	+= brd.o
-obj-$(CONFIG_BLK_DEV_PMEM)	+= pmem.o
 obj-$(CONFIG_BLK_DEV_LOOP)	+= loop.o
 obj-$(CONFIG_BLK_CPQ_DA)	+= cpqarray.o
 obj-$(CONFIG_BLK_CPQ_CISS_DA)  += cciss.o
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
new file mode 100644
index 000000000000..72226acb5c0f
--- /dev/null
+++ b/drivers/nvdimm/Kconfig
@@ -0,0 +1,68 @@
+menuconfig LIBNVDIMM
+	tristate "NVDIMM (Non-Volatile Memory Device) Support"
+	depends on PHYS_ADDR_T_64BIT
+	depends on BLK_DEV
+	help
+	  Generic support for non-volatile memory devices including
+	  ACPI-6-NFIT defined resources.  On platforms that define an
+	  NFIT, or otherwise can discover NVDIMM resources, a libnvdimm
+	  bus is registered to advertise PMEM (persistent memory)
+	  namespaces (/dev/pmemX) and BLK (sliding mmio window(s))
+	  namespaces (/dev/ndblkX.Y). A PMEM namespace refers to a
+	  memory resource that may span multiple DIMMs and support DAX
+	  (see CONFIG_DAX).  A BLK namespace refers to an NVDIMM control
+	  region which exposes an mmio register set for windowed access
+	  mode to non-volatile memory.
+
+if LIBNVDIMM
+
+config BLK_DEV_PMEM
+	tristate "PMEM: Persistent memory block device support"
+	default LIBNVDIMM
+	depends on HAS_IOMEM
+	select ND_BTT if BTT
+	help
+	  Memory ranges for PMEM are described by either an NFIT
+	  (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a
+	  non-standard OEM-specific E820 memory type (type-12, see
+	  CONFIG_X86_PMEM_LEGACY), or it is manually specified by the
+	  'memmap=nn[KMG]!ss[KMG]' kernel command line (see
+	  Documentation/kernel-parameters.txt).  This driver converts
+	  these persistent memory ranges into block devices that are
+	  capable of DAX (direct-access) file system mappings.  See
+	  Documentation/nvdimm/nvdimm.txt for more details.
+
+	  Say Y if you want to use an NVDIMM
+
+config ND_BLK
+	tristate "BLK: Block data window (aperture) device support"
+	default LIBNVDIMM
+	select ND_BTT if BTT
+	help
+	  Support NVDIMMs, or other devices, that implement a BLK-mode
+	  access capability.  BLK-mode access uses memory-mapped-i/o
+	  apertures to access persistent media.
+
+	  Say Y if your platform firmware emits an ACPI.NFIT table
+	  (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
+	  capabilities.
+
+config ND_BTT
+	tristate
+
+config BTT
+	bool "BTT: Block Translation Table (atomic sector updates)"
+	default y if LIBNVDIMM
+	help
+	  The Block Translation Table (BTT) provides atomic sector
+	  update semantics for persistent memory devices, so that
+	  applications that rely on sector writes not being torn (a
+	  guarantee that typical disks provide) can continue to do so.
+	  The BTT manifests itself as an alternate personality for an
+	  NVDIMM namespace, i.e. a namespace can be in raw mode (pmemX,
+	  ndblkX.Y, etc...), or 'sectored' mode, (pmemXs, ndblkX.Ys,
+	  etc...).
+
+	  Select Y if unsure
+
+endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
new file mode 100644
index 000000000000..594bb97c867a
--- /dev/null
+++ b/drivers/nvdimm/Makefile
@@ -0,0 +1,20 @@
+obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
+obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
+obj-$(CONFIG_ND_BTT) += nd_btt.o
+obj-$(CONFIG_ND_BLK) += nd_blk.o
+
+nd_pmem-y := pmem.o
+
+nd_btt-y := btt.o
+
+nd_blk-y := blk.o
+
+libnvdimm-y := core.o
+libnvdimm-y += bus.o
+libnvdimm-y += dimm_devs.o
+libnvdimm-y += dimm.o
+libnvdimm-y += region_devs.o
+libnvdimm-y += region.o
+libnvdimm-y += namespace_devs.o
+libnvdimm-y += label.o
+libnvdimm-$(CONFIG_BTT) += btt_devs.o
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
new file mode 100644
index 000000000000..4f97b248c236
--- /dev/null
+++ b/drivers/nvdimm/blk.c
@@ -0,0 +1,384 @@
+/*
+ * NVDIMM Block Window Driver
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/nd.h>
+#include <linux/sizes.h>
+#include "nd.h"
+
+struct nd_blk_device {
+	struct request_queue *queue;
+	struct gendisk *disk;
+	struct nd_namespace_blk *nsblk;
+	struct nd_blk_region *ndbr;
+	size_t disk_size;
+	u32 sector_size;
+	u32 internal_lbasize;
+};
+
+static int nd_blk_major;
+
+static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev)
+{
+	return blk_dev->nsblk->lbasize - blk_dev->sector_size;
+}
+
+static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
+				resource_size_t ns_offset, unsigned int len)
+{
+	int i;
+
+	for (i = 0; i < nsblk->num_resources; i++) {
+		if (ns_offset < resource_size(nsblk->res[i])) {
+			if (ns_offset + len > resource_size(nsblk->res[i])) {
+				dev_WARN_ONCE(&nsblk->common.dev, 1,
+					"illegal request\n");
+				return SIZE_MAX;
+			}
+			return nsblk->res[i]->start + ns_offset;
+		}
+		ns_offset -= resource_size(nsblk->res[i]);
+	}
+
+	dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n");
+	return SIZE_MAX;
+}
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
+				struct bio_integrity_payload *bip, u64 lba,
+				int rw)
+{
+	unsigned int len = nd_blk_meta_size(blk_dev);
+	resource_size_t	dev_offset, ns_offset;
+	struct nd_namespace_blk *nsblk;
+	struct nd_blk_region *ndbr;
+	int err = 0;
+
+	nsblk = blk_dev->nsblk;
+	ndbr = blk_dev->ndbr;
+	ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size;
+	dev_offset = to_dev_offset(nsblk, ns_offset, len);
+	if (dev_offset == SIZE_MAX)
+		return -EIO;
+
+	while (len) {
+		unsigned int cur_len;
+		struct bio_vec bv;
+		void *iobuf;
+
+		bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
+		/*
+		 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
+		 * .bv_offset already adjusted for iter->bi_bvec_done, and we
+		 * can use those directly
+		 */
+
+		cur_len = min(len, bv.bv_len);
+		iobuf = kmap_atomic(bv.bv_page);
+		err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset,
+				cur_len, rw);
+		kunmap_atomic(iobuf);
+		if (err)
+			return err;
+
+		len -= cur_len;
+		dev_offset += cur_len;
+		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+	}
+
+	return err;
+}
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
+				struct bio_integrity_payload *bip, u64 lba,
+				int rw)
+{
+	return 0;
+}
+#endif
+
+static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
+			struct bio_integrity_payload *bip, struct page *page,
+			unsigned int len, unsigned int off, int rw,
+			sector_t sector)
+{
+	struct nd_blk_region *ndbr = blk_dev->ndbr;
+	resource_size_t	dev_offset, ns_offset;
+	int err = 0;
+	void *iobuf;
+	u64 lba;
+
+	while (len) {
+		unsigned int cur_len;
+
+		/*
+		 * If we don't have an integrity payload, we don't have to
+		 * split the bvec into sectors, as this would cause unnecessary
+		 * Block Window setup/move steps. the do_io routine is capable
+		 * of handling len <= PAGE_SIZE.
+		 */
+		cur_len = bip ? min(len, blk_dev->sector_size) : len;
+
+		lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size);
+		ns_offset = lba * blk_dev->internal_lbasize;
+		dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len);
+		if (dev_offset == SIZE_MAX)
+			return -EIO;
+
+		iobuf = kmap_atomic(page);
+		err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw);
+		kunmap_atomic(iobuf);
+		if (err)
+			return err;
+
+		if (bip) {
+			err = nd_blk_rw_integrity(blk_dev, bip, lba, rw);
+			if (err)
+				return err;
+		}
+		len -= cur_len;
+		off += cur_len;
+		sector += blk_dev->sector_size >> SECTOR_SHIFT;
+	}
+
+	return err;
+}
+
+static void nd_blk_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct block_device *bdev = bio->bi_bdev;
+	struct gendisk *disk = bdev->bd_disk;
+	struct bio_integrity_payload *bip;
+	struct nd_blk_device *blk_dev;
+	struct bvec_iter iter;
+	unsigned long start;
+	struct bio_vec bvec;
+	int err = 0, rw;
+	bool do_acct;
+
+	/*
+	 * bio_integrity_enabled also checks if the bio already has an
+	 * integrity payload attached. If it does, we *don't* do a
+	 * bio_integrity_prep here - the payload has been generated by
+	 * another kernel subsystem, and we just pass it through.
+	 */
+	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
+		err = -EIO;
+		goto out;
+	}
+
+	bip = bio_integrity(bio);
+	blk_dev = disk->private_data;
+	rw = bio_data_dir(bio);
+	do_acct = nd_iostat_start(bio, &start);
+	bio_for_each_segment(bvec, bio, iter) {
+		unsigned int len = bvec.bv_len;
+
+		BUG_ON(len > PAGE_SIZE);
+		err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len,
+					bvec.bv_offset, rw, iter.bi_sector);
+		if (err) {
+			dev_info(&blk_dev->nsblk->common.dev,
+					"io error in %s sector %lld, len %d,\n",
+					(rw == READ) ? "READ" : "WRITE",
+					(unsigned long long) iter.bi_sector, len);
+			break;
+		}
+	}
+	if (do_acct)
+		nd_iostat_end(bio, start);
+
+ out:
+	bio_endio(bio, err);
+}
+
+static int nd_blk_rw_bytes(struct nd_namespace_common *ndns,
+		resource_size_t offset, void *iobuf, size_t n, int rw)
+{
+	struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim);
+	struct nd_namespace_blk *nsblk = blk_dev->nsblk;
+	struct nd_blk_region *ndbr = blk_dev->ndbr;
+	resource_size_t	dev_offset;
+
+	dev_offset = to_dev_offset(nsblk, offset, n);
+
+	if (unlikely(offset + n > blk_dev->disk_size)) {
+		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
+		return -EFAULT;
+	}
+
+	if (dev_offset == SIZE_MAX)
+		return -EIO;
+
+	return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw);
+}
+
+static const struct block_device_operations nd_blk_fops = {
+	.owner = THIS_MODULE,
+	.revalidate_disk = nvdimm_revalidate_disk,
+};
+
+static int nd_blk_attach_disk(struct nd_namespace_common *ndns,
+		struct nd_blk_device *blk_dev)
+{
+	resource_size_t available_disk_size;
+	struct gendisk *disk;
+	u64 internal_nlba;
+
+	internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize);
+	available_disk_size = internal_nlba * blk_dev->sector_size;
+
+	blk_dev->queue = blk_alloc_queue(GFP_KERNEL);
+	if (!blk_dev->queue)
+		return -ENOMEM;
+
+	blk_queue_make_request(blk_dev->queue, nd_blk_make_request);
+	blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX);
+	blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY);
+	blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size);
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue);
+
+	disk = blk_dev->disk = alloc_disk(0);
+	if (!disk) {
+		blk_cleanup_queue(blk_dev->queue);
+		return -ENOMEM;
+	}
+
+	disk->driverfs_dev	= &ndns->dev;
+	disk->major		= nd_blk_major;
+	disk->first_minor	= 0;
+	disk->fops		= &nd_blk_fops;
+	disk->private_data	= blk_dev;
+	disk->queue		= blk_dev->queue;
+	disk->flags		= GENHD_FL_EXT_DEVT;
+	nvdimm_namespace_disk_name(ndns, disk->disk_name);
+	set_capacity(disk, 0);
+	add_disk(disk);
+
+	if (nd_blk_meta_size(blk_dev)) {
+		int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev));
+
+		if (rc) {
+			del_gendisk(disk);
+			put_disk(disk);
+			blk_cleanup_queue(blk_dev->queue);
+			return rc;
+		}
+	}
+
+	set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
+	revalidate_disk(disk);
+	return 0;
+}
+
+static int nd_blk_probe(struct device *dev)
+{
+	struct nd_namespace_common *ndns;
+	struct nd_namespace_blk *nsblk;
+	struct nd_blk_device *blk_dev;
+	int rc;
+
+	ndns = nvdimm_namespace_common_probe(dev);
+	if (IS_ERR(ndns))
+		return PTR_ERR(ndns);
+
+	blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL);
+	if (!blk_dev)
+		return -ENOMEM;
+
+	nsblk = to_nd_namespace_blk(&ndns->dev);
+	blk_dev->disk_size = nvdimm_namespace_capacity(ndns);
+	blk_dev->ndbr = to_nd_blk_region(dev->parent);
+	blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev);
+	blk_dev->internal_lbasize = roundup(nsblk->lbasize,
+						INT_LBASIZE_ALIGNMENT);
+	blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512);
+	dev_set_drvdata(dev, blk_dev);
+
+	ndns->rw_bytes = nd_blk_rw_bytes;
+	if (is_nd_btt(dev))
+		rc = nvdimm_namespace_attach_btt(ndns);
+	else if (nd_btt_probe(ndns, blk_dev) == 0) {
+		/* we'll come back as btt-blk */
+		rc = -ENXIO;
+	} else
+		rc = nd_blk_attach_disk(ndns, blk_dev);
+	if (rc)
+		kfree(blk_dev);
+	return rc;
+}
+
+static void nd_blk_detach_disk(struct nd_blk_device *blk_dev)
+{
+	del_gendisk(blk_dev->disk);
+	put_disk(blk_dev->disk);
+	blk_cleanup_queue(blk_dev->queue);
+}
+
+static int nd_blk_remove(struct device *dev)
+{
+	struct nd_blk_device *blk_dev = dev_get_drvdata(dev);
+
+	if (is_nd_btt(dev))
+		nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
+	else
+		nd_blk_detach_disk(blk_dev);
+	kfree(blk_dev);
+
+	return 0;
+}
+
+static struct nd_device_driver nd_blk_driver = {
+	.probe = nd_blk_probe,
+	.remove = nd_blk_remove,
+	.drv = {
+		.name = "nd_blk",
+	},
+	.type = ND_DRIVER_NAMESPACE_BLK,
+};
+
+static int __init nd_blk_init(void)
+{
+	int rc;
+
+	rc = register_blkdev(0, "nd_blk");
+	if (rc < 0)
+		return rc;
+
+	nd_blk_major = rc;
+	rc = nd_driver_register(&nd_blk_driver);
+
+	if (rc < 0)
+		unregister_blkdev(nd_blk_major, "nd_blk");
+
+	return rc;
+}
+
+static void __exit nd_blk_exit(void)
+{
+	driver_unregister(&nd_blk_driver.drv);
+	unregister_blkdev(nd_blk_major, "nd_blk");
+}
+
+MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK);
+module_init(nd_blk_init);
+module_exit(nd_blk_exit);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
new file mode 100644
index 000000000000..411c7b2bb37a
--- /dev/null
+++ b/drivers/nvdimm/btt.c
@@ -0,0 +1,1479 @@
+/*
+ * Block Translation Table
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/highmem.h>
+#include <linux/debugfs.h>
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/hdreg.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/ndctl.h>
+#include <linux/fs.h>
+#include <linux/nd.h>
+#include "btt.h"
+#include "nd.h"
+
+enum log_ent_request {
+	LOG_NEW_ENT = 0,
+	LOG_OLD_ENT
+};
+
+static int btt_major;
+
+static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
+		void *buf, size_t n)
+{
+	struct nd_btt *nd_btt = arena->nd_btt;
+	struct nd_namespace_common *ndns = nd_btt->ndns;
+
+	/* arena offsets are 4K from the base of the device */
+	offset += SZ_4K;
+	return nvdimm_read_bytes(ndns, offset, buf, n);
+}
+
+static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
+		void *buf, size_t n)
+{
+	struct nd_btt *nd_btt = arena->nd_btt;
+	struct nd_namespace_common *ndns = nd_btt->ndns;
+
+	/* arena offsets are 4K from the base of the device */
+	offset += SZ_4K;
+	return nvdimm_write_bytes(ndns, offset, buf, n);
+}
+
+static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
+{
+	int ret;
+
+	ret = arena_write_bytes(arena, arena->info2off, super,
+			sizeof(struct btt_sb));
+	if (ret)
+		return ret;
+
+	return arena_write_bytes(arena, arena->infooff, super,
+			sizeof(struct btt_sb));
+}
+
+static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
+{
+	WARN_ON(!super);
+	return arena_read_bytes(arena, arena->infooff, super,
+			sizeof(struct btt_sb));
+}
+
+/*
+ * 'raw' version of btt_map write
+ * Assumptions:
+ *   mapping is in little-endian
+ *   mapping contains 'E' and 'Z' flags as desired
+ */
+static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping)
+{
+	u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
+
+	WARN_ON(lba >= arena->external_nlba);
+	return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE);
+}
+
+static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
+			u32 z_flag, u32 e_flag)
+{
+	u32 ze;
+	__le32 mapping_le;
+
+	/*
+	 * This 'mapping' is supposed to be just the LBA mapping, without
+	 * any flags set, so strip the flag bits.
+	 */
+	mapping &= MAP_LBA_MASK;
+
+	ze = (z_flag << 1) + e_flag;
+	switch (ze) {
+	case 0:
+		/*
+		 * We want to set neither of the Z or E flags, and
+		 * in the actual layout, this means setting the bit
+		 * positions of both to '1' to indicate a 'normal'
+		 * map entry
+		 */
+		mapping |= MAP_ENT_NORMAL;
+		break;
+	case 1:
+		mapping |= (1 << MAP_ERR_SHIFT);
+		break;
+	case 2:
+		mapping |= (1 << MAP_TRIM_SHIFT);
+		break;
+	default:
+		/*
+		 * The case where Z and E are both sent in as '1' could be
+		 * construed as a valid 'normal' case, but we decide not to,
+		 * to avoid confusion
+		 */
+		WARN_ONCE(1, "Invalid use of Z and E flags\n");
+		return -EIO;
+	}
+
+	mapping_le = cpu_to_le32(mapping);
+	return __btt_map_write(arena, lba, mapping_le);
+}
+
+static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
+			int *trim, int *error)
+{
+	int ret;
+	__le32 in;
+	u32 raw_mapping, postmap, ze, z_flag, e_flag;
+	u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
+
+	WARN_ON(lba >= arena->external_nlba);
+
+	ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE);
+	if (ret)
+		return ret;
+
+	raw_mapping = le32_to_cpu(in);
+
+	z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT;
+	e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT;
+	ze = (z_flag << 1) + e_flag;
+	postmap = raw_mapping & MAP_LBA_MASK;
+
+	/* Reuse the {z,e}_flag variables for *trim and *error */
+	z_flag = 0;
+	e_flag = 0;
+
+	switch (ze) {
+	case 0:
+		/* Initial state. Return postmap = premap */
+		*mapping = lba;
+		break;
+	case 1:
+		*mapping = postmap;
+		e_flag = 1;
+		break;
+	case 2:
+		*mapping = postmap;
+		z_flag = 1;
+		break;
+	case 3:
+		*mapping = postmap;
+		break;
+	default:
+		return -EIO;
+	}
+
+	if (trim)
+		*trim = z_flag;
+	if (error)
+		*error = e_flag;
+
+	return ret;
+}
+
+static int btt_log_read_pair(struct arena_info *arena, u32 lane,
+			struct log_entry *ent)
+{
+	WARN_ON(!ent);
+	return arena_read_bytes(arena,
+			arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
+			2 * LOG_ENT_SIZE);
+}
+
+static struct dentry *debugfs_root;
+
+static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
+				int idx)
+{
+	char dirname[32];
+	struct dentry *d;
+
+	/* If for some reason, parent bttN was not created, exit */
+	if (!parent)
+		return;
+
+	snprintf(dirname, 32, "arena%d", idx);
+	d = debugfs_create_dir(dirname, parent);
+	if (IS_ERR_OR_NULL(d))
+		return;
+	a->debugfs_dir = d;
+
+	debugfs_create_x64("size", S_IRUGO, d, &a->size);
+	debugfs_create_x64("external_lba_start", S_IRUGO, d,
+				&a->external_lba_start);
+	debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba);
+	debugfs_create_u32("internal_lbasize", S_IRUGO, d,
+				&a->internal_lbasize);
+	debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba);
+	debugfs_create_u32("external_lbasize", S_IRUGO, d,
+				&a->external_lbasize);
+	debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree);
+	debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major);
+	debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor);
+	debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff);
+	debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff);
+	debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff);
+	debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff);
+	debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
+	debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
+	debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
+}
+
+static void btt_debugfs_init(struct btt *btt)
+{
+	int i = 0;
+	struct arena_info *arena;
+
+	btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev),
+						debugfs_root);
+	if (IS_ERR_OR_NULL(btt->debugfs_dir))
+		return;
+
+	list_for_each_entry(arena, &btt->arena_list, list) {
+		arena_debugfs_init(arena, btt->debugfs_dir, i);
+		i++;
+	}
+}
+
+/*
+ * This function accepts two log entries, and uses the
+ * sequence number to find the 'older' entry.
+ * It also updates the sequence number in this old entry to
+ * make it the 'new' one if the mark_flag is set.
+ * Finally, it returns which of the entries was the older one.
+ *
+ * TODO The logic feels a bit kludge-y. make it better..
+ */
+static int btt_log_get_old(struct log_entry *ent)
+{
+	int old;
+
+	/*
+	 * the first ever time this is seen, the entry goes into [0]
+	 * the next time, the following logic works out to put this
+	 * (next) entry into [1]
+	 */
+	if (ent[0].seq == 0) {
+		ent[0].seq = cpu_to_le32(1);
+		return 0;
+	}
+
+	if (ent[0].seq == ent[1].seq)
+		return -EINVAL;
+	if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
+		return -EINVAL;
+
+	if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
+		if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
+			old = 0;
+		else
+			old = 1;
+	} else {
+		if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
+			old = 1;
+		else
+			old = 0;
+	}
+
+	return old;
+}
+
+static struct device *to_dev(struct arena_info *arena)
+{
+	return &arena->nd_btt->dev;
+}
+
+/*
+ * This function copies the desired (old/new) log entry into ent if
+ * it is not NULL. It returns the sub-slot number (0 or 1)
+ * where the desired log entry was found. Negative return values
+ * indicate errors.
+ */
+static int btt_log_read(struct arena_info *arena, u32 lane,
+			struct log_entry *ent, int old_flag)
+{
+	int ret;
+	int old_ent, ret_ent;
+	struct log_entry log[2];
+
+	ret = btt_log_read_pair(arena, lane, log);
+	if (ret)
+		return -EIO;
+
+	old_ent = btt_log_get_old(log);
+	if (old_ent < 0 || old_ent > 1) {
+		dev_info(to_dev(arena),
+				"log corruption (%d): lane %d seq [%d, %d]\n",
+			old_ent, lane, log[0].seq, log[1].seq);
+		/* TODO set error state? */
+		return -EIO;
+	}
+
+	ret_ent = (old_flag ? old_ent : (1 - old_ent));
+
+	if (ent != NULL)
+		memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
+
+	return ret_ent;
+}
+
+/*
+ * This function commits a log entry to media
+ * It does _not_ prepare the freelist entry for the next write
+ * btt_flog_write is the wrapper for updating the freelist elements
+ */
+static int __btt_log_write(struct arena_info *arena, u32 lane,
+			u32 sub, struct log_entry *ent)
+{
+	int ret;
+	/*
+	 * Ignore the padding in log_entry for calculating log_half.
+	 * The entry is 'committed' when we write the sequence number,
+	 * and we want to ensure that that is the last thing written.
+	 * We don't bother writing the padding as that would be extra
+	 * media wear and write amplification
+	 */
+	unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
+	u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
+	void *src = ent;
+
+	/* split the 16B write into atomic, durable halves */
+	ret = arena_write_bytes(arena, ns_off, src, log_half);
+	if (ret)
+		return ret;
+
+	ns_off += log_half;
+	src += log_half;
+	return arena_write_bytes(arena, ns_off, src, log_half);
+}
+
+static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
+			struct log_entry *ent)
+{
+	int ret;
+
+	ret = __btt_log_write(arena, lane, sub, ent);
+	if (ret)
+		return ret;
+
+	/* prepare the next free entry */
+	arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
+	if (++(arena->freelist[lane].seq) == 4)
+		arena->freelist[lane].seq = 1;
+	arena->freelist[lane].block = le32_to_cpu(ent->old_map);
+
+	return ret;
+}
+
+/*
+ * This function initializes the BTT map to the initial state, which is
+ * all-zeroes, and indicates an identity mapping
+ */
+static int btt_map_init(struct arena_info *arena)
+{
+	int ret = -EINVAL;
+	void *zerobuf;
+	size_t offset = 0;
+	size_t chunk_size = SZ_2M;
+	size_t mapsize = arena->logoff - arena->mapoff;
+
+	zerobuf = kzalloc(chunk_size, GFP_KERNEL);
+	if (!zerobuf)
+		return -ENOMEM;
+
+	while (mapsize) {
+		size_t size = min(mapsize, chunk_size);
+
+		ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
+				size);
+		if (ret)
+			goto free;
+
+		offset += size;
+		mapsize -= size;
+		cond_resched();
+	}
+
+ free:
+	kfree(zerobuf);
+	return ret;
+}
+
+/*
+ * This function initializes the BTT log with 'fake' entries pointing
+ * to the initial reserved set of blocks as being free
+ */
+static int btt_log_init(struct arena_info *arena)
+{
+	int ret;
+	u32 i;
+	struct log_entry log, zerolog;
+
+	memset(&zerolog, 0, sizeof(zerolog));
+
+	for (i = 0; i < arena->nfree; i++) {
+		log.lba = cpu_to_le32(i);
+		log.old_map = cpu_to_le32(arena->external_nlba + i);
+		log.new_map = cpu_to_le32(arena->external_nlba + i);
+		log.seq = cpu_to_le32(LOG_SEQ_INIT);
+		ret = __btt_log_write(arena, i, 0, &log);
+		if (ret)
+			return ret;
+		ret = __btt_log_write(arena, i, 1, &zerolog);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int btt_freelist_init(struct arena_info *arena)
+{
+	int old, new, ret;
+	u32 i, map_entry;
+	struct log_entry log_new, log_old;
+
+	arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry),
+					GFP_KERNEL);
+	if (!arena->freelist)
+		return -ENOMEM;
+
+	for (i = 0; i < arena->nfree; i++) {
+		old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT);
+		if (old < 0)
+			return old;
+
+		new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT);
+		if (new < 0)
+			return new;
+
+		/* sub points to the next one to be overwritten */
+		arena->freelist[i].sub = 1 - new;
+		arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
+		arena->freelist[i].block = le32_to_cpu(log_new.old_map);
+
+		/* This implies a newly created or untouched flog entry */
+		if (log_new.old_map == log_new.new_map)
+			continue;
+
+		/* Check if map recovery is needed */
+		ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry,
+				NULL, NULL);
+		if (ret)
+			return ret;
+		if ((le32_to_cpu(log_new.new_map) != map_entry) &&
+				(le32_to_cpu(log_new.old_map) == map_entry)) {
+			/*
+			 * Last transaction wrote the flog, but wasn't able
+			 * to complete the map write. So fix up the map.
+			 */
+			ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
+					le32_to_cpu(log_new.new_map), 0, 0);
+			if (ret)
+				return ret;
+		}
+
+	}
+
+	return 0;
+}
+
+static int btt_rtt_init(struct arena_info *arena)
+{
+	arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
+	if (arena->rtt == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int btt_maplocks_init(struct arena_info *arena)
+{
+	u32 i;
+
+	arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock),
+				GFP_KERNEL);
+	if (!arena->map_locks)
+		return -ENOMEM;
+
+	for (i = 0; i < arena->nfree; i++)
+		spin_lock_init(&arena->map_locks[i].lock);
+
+	return 0;
+}
+
+static struct arena_info *alloc_arena(struct btt *btt, size_t size,
+				size_t start, size_t arena_off)
+{
+	struct arena_info *arena;
+	u64 logsize, mapsize, datasize;
+	u64 available = size;
+
+	arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL);
+	if (!arena)
+		return NULL;
+	arena->nd_btt = btt->nd_btt;
+
+	if (!size)
+		return arena;
+
+	arena->size = size;
+	arena->external_lba_start = start;
+	arena->external_lbasize = btt->lbasize;
+	arena->internal_lbasize = roundup(arena->external_lbasize,
+					INT_LBASIZE_ALIGNMENT);
+	arena->nfree = BTT_DEFAULT_NFREE;
+	arena->version_major = 1;
+	arena->version_minor = 1;
+
+	if (available % BTT_PG_SIZE)
+		available -= (available % BTT_PG_SIZE);
+
+	/* Two pages are reserved for the super block and its copy */
+	available -= 2 * BTT_PG_SIZE;
+
+	/* The log takes a fixed amount of space based on nfree */
+	logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
+				BTT_PG_SIZE);
+	available -= logsize;
+
+	/* Calculate optimal split between map and data area */
+	arena->internal_nlba = div_u64(available - BTT_PG_SIZE,
+			arena->internal_lbasize + MAP_ENT_SIZE);
+	arena->external_nlba = arena->internal_nlba - arena->nfree;
+
+	mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE);
+	datasize = available - mapsize;
+
+	/* 'Absolute' values, relative to start of storage space */
+	arena->infooff = arena_off;
+	arena->dataoff = arena->infooff + BTT_PG_SIZE;
+	arena->mapoff = arena->dataoff + datasize;
+	arena->logoff = arena->mapoff + mapsize;
+	arena->info2off = arena->logoff + logsize;
+	return arena;
+}
+
+static void free_arenas(struct btt *btt)
+{
+	struct arena_info *arena, *next;
+
+	list_for_each_entry_safe(arena, next, &btt->arena_list, list) {
+		list_del(&arena->list);
+		kfree(arena->rtt);
+		kfree(arena->map_locks);
+		kfree(arena->freelist);
+		debugfs_remove_recursive(arena->debugfs_dir);
+		kfree(arena);
+	}
+}
+
+/*
+ * This function checks if the metadata layout is valid and error free
+ */
+static int arena_is_valid(struct arena_info *arena, struct btt_sb *super,
+				u8 *uuid, u32 lbasize)
+{
+	u64 checksum;
+
+	if (memcmp(super->uuid, uuid, 16))
+		return 0;
+
+	checksum = le64_to_cpu(super->checksum);
+	super->checksum = 0;
+	if (checksum != nd_btt_sb_checksum(super))
+		return 0;
+	super->checksum = cpu_to_le64(checksum);
+
+	if (lbasize != le32_to_cpu(super->external_lbasize))
+		return 0;
+
+	/* TODO: figure out action for this */
+	if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
+		dev_info(to_dev(arena), "Found arena with an error flag\n");
+
+	return 1;
+}
+
+/*
+ * This function reads an existing valid btt superblock and
+ * populates the corresponding arena_info struct
+ */
+static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super,
+				u64 arena_off)
+{
+	arena->internal_nlba = le32_to_cpu(super->internal_nlba);
+	arena->internal_lbasize = le32_to_cpu(super->internal_lbasize);
+	arena->external_nlba = le32_to_cpu(super->external_nlba);
+	arena->external_lbasize = le32_to_cpu(super->external_lbasize);
+	arena->nfree = le32_to_cpu(super->nfree);
+	arena->version_major = le16_to_cpu(super->version_major);
+	arena->version_minor = le16_to_cpu(super->version_minor);
+
+	arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off +
+			le64_to_cpu(super->nextoff));
+	arena->infooff = arena_off;
+	arena->dataoff = arena_off + le64_to_cpu(super->dataoff);
+	arena->mapoff = arena_off + le64_to_cpu(super->mapoff);
+	arena->logoff = arena_off + le64_to_cpu(super->logoff);
+	arena->info2off = arena_off + le64_to_cpu(super->info2off);
+
+	arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) :
+			(arena->info2off - arena->infooff + BTT_PG_SIZE);
+
+	arena->flags = le32_to_cpu(super->flags);
+}
+
+static int discover_arenas(struct btt *btt)
+{
+	int ret = 0;
+	struct arena_info *arena;
+	struct btt_sb *super;
+	size_t remaining = btt->rawsize;
+	u64 cur_nlba = 0;
+	size_t cur_off = 0;
+	int num_arenas = 0;
+
+	super = kzalloc(sizeof(*super), GFP_KERNEL);
+	if (!super)
+		return -ENOMEM;
+
+	while (remaining) {
+		/* Alloc memory for arena */
+		arena = alloc_arena(btt, 0, 0, 0);
+		if (!arena) {
+			ret = -ENOMEM;
+			goto out_super;
+		}
+
+		arena->infooff = cur_off;
+		ret = btt_info_read(arena, super);
+		if (ret)
+			goto out;
+
+		if (!arena_is_valid(arena, super, btt->nd_btt->uuid,
+				btt->lbasize)) {
+			if (remaining == btt->rawsize) {
+				btt->init_state = INIT_NOTFOUND;
+				dev_info(to_dev(arena), "No existing arenas\n");
+				goto out;
+			} else {
+				dev_info(to_dev(arena),
+						"Found corrupted metadata!\n");
+				ret = -ENODEV;
+				goto out;
+			}
+		}
+
+		arena->external_lba_start = cur_nlba;
+		parse_arena_meta(arena, super, cur_off);
+
+		ret = btt_freelist_init(arena);
+		if (ret)
+			goto out;
+
+		ret = btt_rtt_init(arena);
+		if (ret)
+			goto out;
+
+		ret = btt_maplocks_init(arena);
+		if (ret)
+			goto out;
+
+		list_add_tail(&arena->list, &btt->arena_list);
+
+		remaining -= arena->size;
+		cur_off += arena->size;
+		cur_nlba += arena->external_nlba;
+		num_arenas++;
+
+		if (arena->nextoff == 0)
+			break;
+	}
+	btt->num_arenas = num_arenas;
+	btt->nlba = cur_nlba;
+	btt->init_state = INIT_READY;
+
+	kfree(super);
+	return ret;
+
+ out:
+	kfree(arena);
+	free_arenas(btt);
+ out_super:
+	kfree(super);
+	return ret;
+}
+
+static int create_arenas(struct btt *btt)
+{
+	size_t remaining = btt->rawsize;
+	size_t cur_off = 0;
+
+	while (remaining) {
+		struct arena_info *arena;
+		size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining);
+
+		remaining -= arena_size;
+		if (arena_size < ARENA_MIN_SIZE)
+			break;
+
+		arena = alloc_arena(btt, arena_size, btt->nlba, cur_off);
+		if (!arena) {
+			free_arenas(btt);
+			return -ENOMEM;
+		}
+		btt->nlba += arena->external_nlba;
+		if (remaining >= ARENA_MIN_SIZE)
+			arena->nextoff = arena->size;
+		else
+			arena->nextoff = 0;
+		cur_off += arena_size;
+		list_add_tail(&arena->list, &btt->arena_list);
+	}
+
+	return 0;
+}
+
+/*
+ * This function completes arena initialization by writing
+ * all the metadata.
+ * It is only called for an uninitialized arena when a write
+ * to that arena occurs for the first time.
+ */
+static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
+{
+	int ret;
+	struct btt_sb *super;
+
+	ret = btt_map_init(arena);
+	if (ret)
+		return ret;
+
+	ret = btt_log_init(arena);
+	if (ret)
+		return ret;
+
+	super = kzalloc(sizeof(struct btt_sb), GFP_NOIO);
+	if (!super)
+		return -ENOMEM;
+
+	strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
+	memcpy(super->uuid, uuid, 16);
+	super->flags = cpu_to_le32(arena->flags);
+	super->version_major = cpu_to_le16(arena->version_major);
+	super->version_minor = cpu_to_le16(arena->version_minor);
+	super->external_lbasize = cpu_to_le32(arena->external_lbasize);
+	super->external_nlba = cpu_to_le32(arena->external_nlba);
+	super->internal_lbasize = cpu_to_le32(arena->internal_lbasize);
+	super->internal_nlba = cpu_to_le32(arena->internal_nlba);
+	super->nfree = cpu_to_le32(arena->nfree);
+	super->infosize = cpu_to_le32(sizeof(struct btt_sb));
+	super->nextoff = cpu_to_le64(arena->nextoff);
+	/*
+	 * Subtract arena->infooff (arena start) so numbers are relative
+	 * to 'this' arena
+	 */
+	super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff);
+	super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff);
+	super->logoff = cpu_to_le64(arena->logoff - arena->infooff);
+	super->info2off = cpu_to_le64(arena->info2off - arena->infooff);
+
+	super->flags = 0;
+	super->checksum = cpu_to_le64(nd_btt_sb_checksum(super));
+
+	ret = btt_info_write(arena, super);
+
+	kfree(super);
+	return ret;
+}
+
+/*
+ * This function completes the initialization for the BTT namespace
+ * such that it is ready to accept IOs
+ */
+static int btt_meta_init(struct btt *btt)
+{
+	int ret = 0;
+	struct arena_info *arena;
+
+	mutex_lock(&btt->init_lock);
+	list_for_each_entry(arena, &btt->arena_list, list) {
+		ret = btt_arena_write_layout(arena, btt->nd_btt->uuid);
+		if (ret)
+			goto unlock;
+
+		ret = btt_freelist_init(arena);
+		if (ret)
+			goto unlock;
+
+		ret = btt_rtt_init(arena);
+		if (ret)
+			goto unlock;
+
+		ret = btt_maplocks_init(arena);
+		if (ret)
+			goto unlock;
+	}
+
+	btt->init_state = INIT_READY;
+
+ unlock:
+	mutex_unlock(&btt->init_lock);
+	return ret;
+}
+
+static u32 btt_meta_size(struct btt *btt)
+{
+	return btt->lbasize - btt->sector_size;
+}
+
+/*
+ * This function calculates the arena in which the given LBA lies
+ * by doing a linear walk. This is acceptable since we expect only
+ * a few arenas. If we have backing devices that get much larger,
+ * we can construct a balanced binary tree of arenas at init time
+ * so that this range search becomes faster.
+ */
+static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap,
+				struct arena_info **arena)
+{
+	struct arena_info *arena_list;
+	__u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size);
+
+	list_for_each_entry(arena_list, &btt->arena_list, list) {
+		if (lba < arena_list->external_nlba) {
+			*arena = arena_list;
+			*premap = lba;
+			return 0;
+		}
+		lba -= arena_list->external_nlba;
+	}
+
+	return -EIO;
+}
+
+/*
+ * The following (lock_map, unlock_map) are mostly just to improve
+ * readability, since they index into an array of locks
+ */
+static void lock_map(struct arena_info *arena, u32 premap)
+		__acquires(&arena->map_locks[idx].lock)
+{
+	u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
+
+	spin_lock(&arena->map_locks[idx].lock);
+}
+
+static void unlock_map(struct arena_info *arena, u32 premap)
+		__releases(&arena->map_locks[idx].lock)
+{
+	u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
+
+	spin_unlock(&arena->map_locks[idx].lock);
+}
+
+static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
+{
+	return arena->dataoff + ((u64)lba * arena->internal_lbasize);
+}
+
+static int btt_data_read(struct arena_info *arena, struct page *page,
+			unsigned int off, u32 lba, u32 len)
+{
+	int ret;
+	u64 nsoff = to_namespace_offset(arena, lba);
+	void *mem = kmap_atomic(page);
+
+	ret = arena_read_bytes(arena, nsoff, mem + off, len);
+	kunmap_atomic(mem);
+
+	return ret;
+}
+
+static int btt_data_write(struct arena_info *arena, u32 lba,
+			struct page *page, unsigned int off, u32 len)
+{
+	int ret;
+	u64 nsoff = to_namespace_offset(arena, lba);
+	void *mem = kmap_atomic(page);
+
+	ret = arena_write_bytes(arena, nsoff, mem + off, len);
+	kunmap_atomic(mem);
+
+	return ret;
+}
+
+static void zero_fill_data(struct page *page, unsigned int off, u32 len)
+{
+	void *mem = kmap_atomic(page);
+
+	memset(mem + off, 0, len);
+	kunmap_atomic(mem);
+}
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
+			struct arena_info *arena, u32 postmap, int rw)
+{
+	unsigned int len = btt_meta_size(btt);
+	u64 meta_nsoff;
+	int ret = 0;
+
+	if (bip == NULL)
+		return 0;
+
+	meta_nsoff = to_namespace_offset(arena, postmap) + btt->sector_size;
+
+	while (len) {
+		unsigned int cur_len;
+		struct bio_vec bv;
+		void *mem;
+
+		bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
+		/*
+		 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
+		 * .bv_offset already adjusted for iter->bi_bvec_done, and we
+		 * can use those directly
+		 */
+
+		cur_len = min(len, bv.bv_len);
+		mem = kmap_atomic(bv.bv_page);
+		if (rw)
+			ret = arena_write_bytes(arena, meta_nsoff,
+					mem + bv.bv_offset, cur_len);
+		else
+			ret = arena_read_bytes(arena, meta_nsoff,
+					mem + bv.bv_offset, cur_len);
+
+		kunmap_atomic(mem);
+		if (ret)
+			return ret;
+
+		len -= cur_len;
+		meta_nsoff += cur_len;
+		bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
+	}
+
+	return ret;
+}
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
+			struct arena_info *arena, u32 postmap, int rw)
+{
+	return 0;
+}
+#endif
+
+static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
+			struct page *page, unsigned int off, sector_t sector,
+			unsigned int len)
+{
+	int ret = 0;
+	int t_flag, e_flag;
+	struct arena_info *arena = NULL;
+	u32 lane = 0, premap, postmap;
+
+	while (len) {
+		u32 cur_len;
+
+		lane = nd_region_acquire_lane(btt->nd_region);
+
+		ret = lba_to_arena(btt, sector, &premap, &arena);
+		if (ret)
+			goto out_lane;
+
+		cur_len = min(btt->sector_size, len);
+
+		ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag);
+		if (ret)
+			goto out_lane;
+
+		/*
+		 * We loop to make sure that the post map LBA didn't change
+		 * from under us between writing the RTT and doing the actual
+		 * read.
+		 */
+		while (1) {
+			u32 new_map;
+
+			if (t_flag) {
+				zero_fill_data(page, off, cur_len);
+				goto out_lane;
+			}
+
+			if (e_flag) {
+				ret = -EIO;
+				goto out_lane;
+			}
+
+			arena->rtt[lane] = RTT_VALID | postmap;
+			/*
+			 * Barrier to make sure this write is not reordered
+			 * to do the verification map_read before the RTT store
+			 */
+			barrier();
+
+			ret = btt_map_read(arena, premap, &new_map, &t_flag,
+						&e_flag);
+			if (ret)
+				goto out_rtt;
+
+			if (postmap == new_map)
+				break;
+
+			postmap = new_map;
+		}
+
+		ret = btt_data_read(arena, page, off, postmap, cur_len);
+		if (ret)
+			goto out_rtt;
+
+		if (bip) {
+			ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
+			if (ret)
+				goto out_rtt;
+		}
+
+		arena->rtt[lane] = RTT_INVALID;
+		nd_region_release_lane(btt->nd_region, lane);
+
+		len -= cur_len;
+		off += cur_len;
+		sector += btt->sector_size >> SECTOR_SHIFT;
+	}
+
+	return 0;
+
+ out_rtt:
+	arena->rtt[lane] = RTT_INVALID;
+ out_lane:
+	nd_region_release_lane(btt->nd_region, lane);
+	return ret;
+}
+
+static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
+			sector_t sector, struct page *page, unsigned int off,
+			unsigned int len)
+{
+	int ret = 0;
+	struct arena_info *arena = NULL;
+	u32 premap = 0, old_postmap, new_postmap, lane = 0, i;
+	struct log_entry log;
+	int sub;
+
+	while (len) {
+		u32 cur_len;
+
+		lane = nd_region_acquire_lane(btt->nd_region);
+
+		ret = lba_to_arena(btt, sector, &premap, &arena);
+		if (ret)
+			goto out_lane;
+		cur_len = min(btt->sector_size, len);
+
+		if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) {
+			ret = -EIO;
+			goto out_lane;
+		}
+
+		new_postmap = arena->freelist[lane].block;
+
+		/* Wait if the new block is being read from */
+		for (i = 0; i < arena->nfree; i++)
+			while (arena->rtt[i] == (RTT_VALID | new_postmap))
+				cpu_relax();
+
+
+		if (new_postmap >= arena->internal_nlba) {
+			ret = -EIO;
+			goto out_lane;
+		}
+
+		ret = btt_data_write(arena, new_postmap, page, off, cur_len);
+		if (ret)
+			goto out_lane;
+
+		if (bip) {
+			ret = btt_rw_integrity(btt, bip, arena, new_postmap,
+						WRITE);
+			if (ret)
+				goto out_lane;
+		}
+
+		lock_map(arena, premap);
+		ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL);
+		if (ret)
+			goto out_map;
+		if (old_postmap >= arena->internal_nlba) {
+			ret = -EIO;
+			goto out_map;
+		}
+
+		log.lba = cpu_to_le32(premap);
+		log.old_map = cpu_to_le32(old_postmap);
+		log.new_map = cpu_to_le32(new_postmap);
+		log.seq = cpu_to_le32(arena->freelist[lane].seq);
+		sub = arena->freelist[lane].sub;
+		ret = btt_flog_write(arena, lane, sub, &log);
+		if (ret)
+			goto out_map;
+
+		ret = btt_map_write(arena, premap, new_postmap, 0, 0);
+		if (ret)
+			goto out_map;
+
+		unlock_map(arena, premap);
+		nd_region_release_lane(btt->nd_region, lane);
+
+		len -= cur_len;
+		off += cur_len;
+		sector += btt->sector_size >> SECTOR_SHIFT;
+	}
+
+	return 0;
+
+ out_map:
+	unlock_map(arena, premap);
+ out_lane:
+	nd_region_release_lane(btt->nd_region, lane);
+	return ret;
+}
+
+static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
+			struct page *page, unsigned int len, unsigned int off,
+			int rw, sector_t sector)
+{
+	int ret;
+
+	if (rw == READ) {
+		ret = btt_read_pg(btt, bip, page, off, sector, len);
+		flush_dcache_page(page);
+	} else {
+		flush_dcache_page(page);
+		ret = btt_write_pg(btt, bip, sector, page, off, len);
+	}
+
+	return ret;
+}
+
+static void btt_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+	struct btt *btt = q->queuedata;
+	struct bvec_iter iter;
+	unsigned long start;
+	struct bio_vec bvec;
+	int err = 0, rw;
+	bool do_acct;
+
+	/*
+	 * bio_integrity_enabled also checks if the bio already has an
+	 * integrity payload attached. If it does, we *don't* do a
+	 * bio_integrity_prep here - the payload has been generated by
+	 * another kernel subsystem, and we just pass it through.
+	 */
+	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
+		err = -EIO;
+		goto out;
+	}
+
+	do_acct = nd_iostat_start(bio, &start);
+	rw = bio_data_dir(bio);
+	bio_for_each_segment(bvec, bio, iter) {
+		unsigned int len = bvec.bv_len;
+
+		BUG_ON(len > PAGE_SIZE);
+		/* Make sure len is in multiples of sector size. */
+		/* XXX is this right? */
+		BUG_ON(len < btt->sector_size);
+		BUG_ON(len % btt->sector_size);
+
+		err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
+				rw, iter.bi_sector);
+		if (err) {
+			dev_info(&btt->nd_btt->dev,
+					"io error in %s sector %lld, len %d,\n",
+					(rw == READ) ? "READ" : "WRITE",
+					(unsigned long long) iter.bi_sector, len);
+			break;
+		}
+	}
+	if (do_acct)
+		nd_iostat_end(bio, start);
+
+out:
+	bio_endio(bio, err);
+}
+
+static int btt_rw_page(struct block_device *bdev, sector_t sector,
+		struct page *page, int rw)
+{
+	struct btt *btt = bdev->bd_disk->private_data;
+
+	btt_do_bvec(btt, NULL, page, PAGE_CACHE_SIZE, 0, rw, sector);
+	page_endio(page, rw & WRITE, 0);
+	return 0;
+}
+
+
+static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo)
+{
+	/* some standard values */
+	geo->heads = 1 << 6;
+	geo->sectors = 1 << 5;
+	geo->cylinders = get_capacity(bd->bd_disk) >> 11;
+	return 0;
+}
+
+static const struct block_device_operations btt_fops = {
+	.owner =		THIS_MODULE,
+	.rw_page =		btt_rw_page,
+	.getgeo =		btt_getgeo,
+	.revalidate_disk =	nvdimm_revalidate_disk,
+};
+
+static int btt_blk_init(struct btt *btt)
+{
+	struct nd_btt *nd_btt = btt->nd_btt;
+	struct nd_namespace_common *ndns = nd_btt->ndns;
+
+	/* create a new disk and request queue for btt */
+	btt->btt_queue = blk_alloc_queue(GFP_KERNEL);
+	if (!btt->btt_queue)
+		return -ENOMEM;
+
+	btt->btt_disk = alloc_disk(0);
+	if (!btt->btt_disk) {
+		blk_cleanup_queue(btt->btt_queue);
+		return -ENOMEM;
+	}
+
+	nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
+	btt->btt_disk->driverfs_dev = &btt->nd_btt->dev;
+	btt->btt_disk->major = btt_major;
+	btt->btt_disk->first_minor = 0;
+	btt->btt_disk->fops = &btt_fops;
+	btt->btt_disk->private_data = btt;
+	btt->btt_disk->queue = btt->btt_queue;
+	btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
+
+	blk_queue_make_request(btt->btt_queue, btt_make_request);
+	blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
+	blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
+	blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
+	btt->btt_queue->queuedata = btt;
+
+	set_capacity(btt->btt_disk, 0);
+	add_disk(btt->btt_disk);
+	if (btt_meta_size(btt)) {
+		int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
+
+		if (rc) {
+			del_gendisk(btt->btt_disk);
+			put_disk(btt->btt_disk);
+			blk_cleanup_queue(btt->btt_queue);
+			return rc;
+		}
+	}
+	set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
+	revalidate_disk(btt->btt_disk);
+
+	return 0;
+}
+
+static void btt_blk_cleanup(struct btt *btt)
+{
+	blk_integrity_unregister(btt->btt_disk);
+	del_gendisk(btt->btt_disk);
+	put_disk(btt->btt_disk);
+	blk_cleanup_queue(btt->btt_queue);
+}
+
+/**
+ * btt_init - initialize a block translation table for the given device
+ * @nd_btt:	device with BTT geometry and backing device info
+ * @rawsize:	raw size in bytes of the backing device
+ * @lbasize:	lba size of the backing device
+ * @uuid:	A uuid for the backing device - this is stored on media
+ * @maxlane:	maximum number of parallel requests the device can handle
+ *
+ * Initialize a Block Translation Table on a backing device to provide
+ * single sector power fail atomicity.
+ *
+ * Context:
+ * Might sleep.
+ *
+ * Returns:
+ * Pointer to a new struct btt on success, NULL on failure.
+ */
+static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
+		u32 lbasize, u8 *uuid, struct nd_region *nd_region)
+{
+	int ret;
+	struct btt *btt;
+	struct device *dev = &nd_btt->dev;
+
+	btt = kzalloc(sizeof(struct btt), GFP_KERNEL);
+	if (!btt)
+		return NULL;
+
+	btt->nd_btt = nd_btt;
+	btt->rawsize = rawsize;
+	btt->lbasize = lbasize;
+	btt->sector_size = ((lbasize >= 4096) ? 4096 : 512);
+	INIT_LIST_HEAD(&btt->arena_list);
+	mutex_init(&btt->init_lock);
+	btt->nd_region = nd_region;
+
+	ret = discover_arenas(btt);
+	if (ret) {
+		dev_err(dev, "init: error in arena_discover: %d\n", ret);
+		goto out_free;
+	}
+
+	if (btt->init_state != INIT_READY && nd_region->ro) {
+		dev_info(dev, "%s is read-only, unable to init btt metadata\n",
+				dev_name(&nd_region->dev));
+		goto out_free;
+	} else if (btt->init_state != INIT_READY) {
+		btt->num_arenas = (rawsize / ARENA_MAX_SIZE) +
+			((rawsize % ARENA_MAX_SIZE) ? 1 : 0);
+		dev_dbg(dev, "init: %d arenas for %llu rawsize\n",
+				btt->num_arenas, rawsize);
+
+		ret = create_arenas(btt);
+		if (ret) {
+			dev_info(dev, "init: create_arenas: %d\n", ret);
+			goto out_free;
+		}
+
+		ret = btt_meta_init(btt);
+		if (ret) {
+			dev_err(dev, "init: error in meta_init: %d\n", ret);
+			goto out_free;
+		}
+	}
+
+	ret = btt_blk_init(btt);
+	if (ret) {
+		dev_err(dev, "init: error in blk_init: %d\n", ret);
+		goto out_free;
+	}
+
+	btt_debugfs_init(btt);
+
+	return btt;
+
+ out_free:
+	kfree(btt);
+	return NULL;
+}
+
+/**
+ * btt_fini - de-initialize a BTT
+ * @btt:	the BTT handle that was generated by btt_init
+ *
+ * De-initialize a Block Translation Table on device removal
+ *
+ * Context:
+ * Might sleep.
+ */
+static void btt_fini(struct btt *btt)
+{
+	if (btt) {
+		btt_blk_cleanup(btt);
+		free_arenas(btt);
+		debugfs_remove_recursive(btt->debugfs_dir);
+		kfree(btt);
+	}
+}
+
+int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
+{
+	struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
+	struct nd_region *nd_region;
+	struct btt *btt;
+	size_t rawsize;
+
+	if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize)
+		return -ENODEV;
+
+	rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K;
+	if (rawsize < ARENA_MIN_SIZE) {
+		return -ENXIO;
+	}
+	nd_region = to_nd_region(nd_btt->dev.parent);
+	btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid,
+			nd_region);
+	if (!btt)
+		return -ENOMEM;
+	nd_btt->btt = btt;
+
+	return 0;
+}
+EXPORT_SYMBOL(nvdimm_namespace_attach_btt);
+
+int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns)
+{
+	struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
+	struct btt *btt = nd_btt->btt;
+
+	btt_fini(btt);
+	nd_btt->btt = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL(nvdimm_namespace_detach_btt);
+
+static int __init nd_btt_init(void)
+{
+	int rc;
+
+	BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K);
+
+	btt_major = register_blkdev(0, "btt");
+	if (btt_major < 0)
+		return btt_major;
+
+	debugfs_root = debugfs_create_dir("btt", NULL);
+	if (IS_ERR_OR_NULL(debugfs_root)) {
+		rc = -ENXIO;
+		goto err_debugfs;
+	}
+
+	return 0;
+
+ err_debugfs:
+	unregister_blkdev(btt_major, "btt");
+
+	return rc;
+}
+
+static void __exit nd_btt_exit(void)
+{
+	debugfs_remove_recursive(debugfs_root);
+	unregister_blkdev(btt_major, "btt");
+}
+
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT);
+MODULE_AUTHOR("Vishal Verma <vishal.l.verma@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
+module_init(nd_btt_init);
+module_exit(nd_btt_exit);
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
new file mode 100644
index 000000000000..75b0d80a6bd9
--- /dev/null
+++ b/drivers/nvdimm/btt.h
@@ -0,0 +1,185 @@
+/*
+ * Block Translation Table library
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_BTT_H
+#define _LINUX_BTT_H
+
+#include <linux/types.h>
+
+#define BTT_SIG_LEN 16
+#define BTT_SIG "BTT_ARENA_INFO\0"
+#define MAP_ENT_SIZE 4
+#define MAP_TRIM_SHIFT 31
+#define MAP_TRIM_MASK (1 << MAP_TRIM_SHIFT)
+#define MAP_ERR_SHIFT 30
+#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
+#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
+#define MAP_ENT_NORMAL 0xC0000000
+#define LOG_ENT_SIZE sizeof(struct log_entry)
+#define ARENA_MIN_SIZE (1UL << 24)	/* 16 MB */
+#define ARENA_MAX_SIZE (1ULL << 39)	/* 512 GB */
+#define RTT_VALID (1UL << 31)
+#define RTT_INVALID 0
+#define BTT_PG_SIZE 4096
+#define BTT_DEFAULT_NFREE ND_MAX_LANES
+#define LOG_SEQ_INIT 1
+
+#define IB_FLAG_ERROR 0x00000001
+#define IB_FLAG_ERROR_MASK 0x00000001
+
+enum btt_init_state {
+	INIT_UNCHECKED = 0,
+	INIT_NOTFOUND,
+	INIT_READY
+};
+
+struct log_entry {
+	__le32 lba;
+	__le32 old_map;
+	__le32 new_map;
+	__le32 seq;
+	__le64 padding[2];
+};
+
+struct btt_sb {
+	u8 signature[BTT_SIG_LEN];
+	u8 uuid[16];
+	u8 parent_uuid[16];
+	__le32 flags;
+	__le16 version_major;
+	__le16 version_minor;
+	__le32 external_lbasize;
+	__le32 external_nlba;
+	__le32 internal_lbasize;
+	__le32 internal_nlba;
+	__le32 nfree;
+	__le32 infosize;
+	__le64 nextoff;
+	__le64 dataoff;
+	__le64 mapoff;
+	__le64 logoff;
+	__le64 info2off;
+	u8 padding[3968];
+	__le64 checksum;
+};
+
+struct free_entry {
+	u32 block;
+	u8 sub;
+	u8 seq;
+};
+
+struct aligned_lock {
+	union {
+		spinlock_t lock;
+		u8 cacheline_padding[L1_CACHE_BYTES];
+	};
+};
+
+/**
+ * struct arena_info - handle for an arena
+ * @size:		Size in bytes this arena occupies on the raw device.
+ *			This includes arena metadata.
+ * @external_lba_start:	The first external LBA in this arena.
+ * @internal_nlba:	Number of internal blocks available in the arena
+ *			including nfree reserved blocks
+ * @internal_lbasize:	Internal and external lba sizes may be different as
+ *			we can round up 'odd' external lbasizes such as 520B
+ *			to be aligned.
+ * @external_nlba:	Number of blocks contributed by the arena to the number
+ *			reported to upper layers. (internal_nlba - nfree)
+ * @external_lbasize:	LBA size as exposed to upper layers.
+ * @nfree:		A reserve number of 'free' blocks that is used to
+ *			handle incoming writes.
+ * @version_major:	Metadata layout version major.
+ * @version_minor:	Metadata layout version minor.
+ * @nextoff:		Offset in bytes to the start of the next arena.
+ * @infooff:		Offset in bytes to the info block of this arena.
+ * @dataoff:		Offset in bytes to the data area of this arena.
+ * @mapoff:		Offset in bytes to the map area of this arena.
+ * @logoff:		Offset in bytes to the log area of this arena.
+ * @info2off:		Offset in bytes to the backup info block of this arena.
+ * @freelist:		Pointer to in-memory list of free blocks
+ * @rtt:		Pointer to in-memory "Read Tracking Table"
+ * @map_locks:		Spinlocks protecting concurrent map writes
+ * @nd_btt:		Pointer to parent nd_btt structure.
+ * @list:		List head for list of arenas
+ * @debugfs_dir:	Debugfs dentry
+ * @flags:		Arena flags - may signify error states.
+ *
+ * arena_info is a per-arena handle. Once an arena is narrowed down for an
+ * IO, this struct is passed around for the duration of the IO.
+ */
+struct arena_info {
+	u64 size;			/* Total bytes for this arena */
+	u64 external_lba_start;
+	u32 internal_nlba;
+	u32 internal_lbasize;
+	u32 external_nlba;
+	u32 external_lbasize;
+	u32 nfree;
+	u16 version_major;
+	u16 version_minor;
+	/* Byte offsets to the different on-media structures */
+	u64 nextoff;
+	u64 infooff;
+	u64 dataoff;
+	u64 mapoff;
+	u64 logoff;
+	u64 info2off;
+	/* Pointers to other in-memory structures for this arena */
+	struct free_entry *freelist;
+	u32 *rtt;
+	struct aligned_lock *map_locks;
+	struct nd_btt *nd_btt;
+	struct list_head list;
+	struct dentry *debugfs_dir;
+	/* Arena flags */
+	u32 flags;
+};
+
+/**
+ * struct btt - handle for a BTT instance
+ * @btt_disk:		Pointer to the gendisk for BTT device
+ * @btt_queue:		Pointer to the request queue for the BTT device
+ * @arena_list:		Head of the list of arenas
+ * @debugfs_dir:	Debugfs dentry
+ * @nd_btt:		Parent nd_btt struct
+ * @nlba:		Number of logical blocks exposed to the	upper layers
+ *			after removing the amount of space needed by metadata
+ * @rawsize:		Total size in bytes of the available backing device
+ * @lbasize:		LBA size as requested and presented to upper layers.
+ *			This is sector_size + size of any metadata.
+ * @sector_size:	The Linux sector size - 512 or 4096
+ * @lanes:		Per-lane spinlocks
+ * @init_lock:		Mutex used for the BTT initialization
+ * @init_state:		Flag describing the initialization state for the BTT
+ * @num_arenas:		Number of arenas in the BTT instance
+ */
+struct btt {
+	struct gendisk *btt_disk;
+	struct request_queue *btt_queue;
+	struct list_head arena_list;
+	struct dentry *debugfs_dir;
+	struct nd_btt *nd_btt;
+	u64 nlba;
+	unsigned long long rawsize;
+	u32 lbasize;
+	u32 sector_size;
+	struct nd_region *nd_region;
+	struct mutex init_lock;
+	int init_state;
+	int num_arenas;
+};
+#endif
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
new file mode 100644
index 000000000000..6ac8c0fea3ec
--- /dev/null
+++ b/drivers/nvdimm/btt_devs.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/blkdev.h>
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-core.h"
+#include "btt.h"
+#include "nd.h"
+
+static void __nd_btt_detach_ndns(struct nd_btt *nd_btt)
+{
+	struct nd_namespace_common *ndns = nd_btt->ndns;
+
+	dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
+			|| ndns->claim != &nd_btt->dev,
+			"%s: invalid claim\n", __func__);
+	ndns->claim = NULL;
+	nd_btt->ndns = NULL;
+	put_device(&ndns->dev);
+}
+
+static void nd_btt_detach_ndns(struct nd_btt *nd_btt)
+{
+	struct nd_namespace_common *ndns = nd_btt->ndns;
+
+	if (!ndns)
+		return;
+	get_device(&ndns->dev);
+	device_lock(&ndns->dev);
+	__nd_btt_detach_ndns(nd_btt);
+	device_unlock(&ndns->dev);
+	put_device(&ndns->dev);
+}
+
+static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt,
+		struct nd_namespace_common *ndns)
+{
+	if (ndns->claim)
+		return false;
+	dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
+			|| nd_btt->ndns,
+			"%s: invalid claim\n", __func__);
+	ndns->claim = &nd_btt->dev;
+	nd_btt->ndns = ndns;
+	get_device(&ndns->dev);
+	return true;
+}
+
+static bool nd_btt_attach_ndns(struct nd_btt *nd_btt,
+		struct nd_namespace_common *ndns)
+{
+	bool claimed;
+
+	device_lock(&ndns->dev);
+	claimed = __nd_btt_attach_ndns(nd_btt, ndns);
+	device_unlock(&ndns->dev);
+	return claimed;
+}
+
+static void nd_btt_release(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	dev_dbg(dev, "%s\n", __func__);
+	nd_btt_detach_ndns(nd_btt);
+	ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
+	kfree(nd_btt->uuid);
+	kfree(nd_btt);
+}
+
+static struct device_type nd_btt_device_type = {
+	.name = "nd_btt",
+	.release = nd_btt_release,
+};
+
+bool is_nd_btt(struct device *dev)
+{
+	return dev->type == &nd_btt_device_type;
+}
+EXPORT_SYMBOL(is_nd_btt);
+
+struct nd_btt *to_nd_btt(struct device *dev)
+{
+	struct nd_btt *nd_btt = container_of(dev, struct nd_btt, dev);
+
+	WARN_ON(!is_nd_btt(dev));
+	return nd_btt;
+}
+EXPORT_SYMBOL(to_nd_btt);
+
+static const unsigned long btt_lbasize_supported[] = { 512, 520, 528,
+	4096, 4104, 4160, 4224, 0 };
+
+static ssize_t sector_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf);
+}
+
+static ssize_t sector_size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize,
+			btt_lbasize_supported);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(sector_size);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nd_btt->uuid)
+		return sprintf(buf, "%pUb\n", nd_btt->uuid);
+	return sprintf(buf, "\n");
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t namespace_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	rc = sprintf(buf, "%s\n", nd_btt->ndns
+			? dev_name(&nd_btt->ndns->dev) : "");
+	nvdimm_bus_unlock(dev);
+	return rc;
+}
+
+static int namespace_match(struct device *dev, void *data)
+{
+	char *name = data;
+
+	return strcmp(name, dev_name(dev)) == 0;
+}
+
+static bool is_nd_btt_idle(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+
+	if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver)
+		return false;
+	return true;
+}
+
+static ssize_t __namespace_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	struct nd_namespace_common *ndns;
+	struct device *found;
+	char *name;
+
+	if (dev->driver) {
+		dev_dbg(dev, "%s: -EBUSY\n", __func__);
+		return -EBUSY;
+	}
+
+	name = kstrndup(buf, len, GFP_KERNEL);
+	if (!name)
+		return -ENOMEM;
+	strim(name);
+
+	if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
+		/* pass */;
+	else {
+		len = -EINVAL;
+		goto out;
+	}
+
+	ndns = nd_btt->ndns;
+	if (strcmp(name, "") == 0) {
+		/* detach the namespace and destroy / reset the btt device */
+		nd_btt_detach_ndns(nd_btt);
+		if (is_nd_btt_idle(dev))
+			nd_device_unregister(dev, ND_ASYNC);
+		else {
+			nd_btt->lbasize = 0;
+			kfree(nd_btt->uuid);
+			nd_btt->uuid = NULL;
+		}
+		goto out;
+	} else if (ndns) {
+		dev_dbg(dev, "namespace already set to: %s\n",
+				dev_name(&ndns->dev));
+		len = -EBUSY;
+		goto out;
+	}
+
+	found = device_find_child(dev->parent, name, namespace_match);
+	if (!found) {
+		dev_dbg(dev, "'%s' not found under %s\n", name,
+				dev_name(dev->parent));
+		len = -ENODEV;
+		goto out;
+	}
+
+	ndns = to_ndns(found);
+	if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
+		dev_dbg(dev, "%s too small to host btt\n", name);
+		len = -ENXIO;
+		goto out_attach;
+	}
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
+	if (!nd_btt_attach_ndns(nd_btt, ndns)) {
+		dev_dbg(dev, "%s already claimed\n",
+				dev_name(&ndns->dev));
+		len = -EBUSY;
+	}
+
+ out_attach:
+	put_device(&ndns->dev); /* from device_find_child */
+ out:
+	kfree(name);
+	return len;
+}
+
+static ssize_t namespace_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	device_lock(dev);
+	rc = __namespace_store(dev, attr, buf, len);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	device_unlock(dev);
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RW(namespace);
+
+static struct attribute *nd_btt_attributes[] = {
+	&dev_attr_sector_size.attr,
+	&dev_attr_namespace.attr,
+	&dev_attr_uuid.attr,
+	NULL,
+};
+
+static struct attribute_group nd_btt_attribute_group = {
+	.attrs = nd_btt_attributes,
+};
+
+static const struct attribute_group *nd_btt_attribute_groups[] = {
+	&nd_btt_attribute_group,
+	&nd_device_attribute_group,
+	&nd_numa_attribute_group,
+	NULL,
+};
+
+static struct device *__nd_btt_create(struct nd_region *nd_region,
+		unsigned long lbasize, u8 *uuid,
+		struct nd_namespace_common *ndns)
+{
+	struct nd_btt *nd_btt;
+	struct device *dev;
+
+	nd_btt = kzalloc(sizeof(*nd_btt), GFP_KERNEL);
+	if (!nd_btt)
+		return NULL;
+
+	nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL);
+	if (nd_btt->id < 0) {
+		kfree(nd_btt);
+		return NULL;
+	}
+
+	nd_btt->lbasize = lbasize;
+	if (uuid)
+		uuid = kmemdup(uuid, 16, GFP_KERNEL);
+	nd_btt->uuid = uuid;
+	dev = &nd_btt->dev;
+	dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id);
+	dev->parent = &nd_region->dev;
+	dev->type = &nd_btt_device_type;
+	dev->groups = nd_btt_attribute_groups;
+	device_initialize(&nd_btt->dev);
+	if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) {
+		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
+				__func__, dev_name(ndns->claim));
+		put_device(dev);
+		return NULL;
+	}
+	return dev;
+}
+
+struct device *nd_btt_create(struct nd_region *nd_region)
+{
+	struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);
+
+	if (dev)
+		__nd_device_register(dev);
+	return dev;
+}
+
+/*
+ * nd_btt_sb_checksum: compute checksum for btt info block
+ *
+ * Returns a fletcher64 checksum of everything in the given info block
+ * except the last field (since that's where the checksum lives).
+ */
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
+{
+	u64 sum;
+	__le64 sum_save;
+
+	sum_save = btt_sb->checksum;
+	btt_sb->checksum = 0;
+	sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
+	btt_sb->checksum = sum_save;
+	return sum;
+}
+EXPORT_SYMBOL(nd_btt_sb_checksum);
+
+static int __nd_btt_probe(struct nd_btt *nd_btt,
+		struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
+{
+	u64 checksum;
+
+	if (!btt_sb || !ndns || !nd_btt)
+		return -ENODEV;
+
+	if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb)))
+		return -ENXIO;
+
+	if (nvdimm_namespace_capacity(ndns) < SZ_16M)
+		return -ENXIO;
+
+	if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
+		return -ENODEV;
+
+	checksum = le64_to_cpu(btt_sb->checksum);
+	btt_sb->checksum = 0;
+	if (checksum != nd_btt_sb_checksum(btt_sb))
+		return -ENODEV;
+	btt_sb->checksum = cpu_to_le64(checksum);
+
+	nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
+	nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
+	if (!nd_btt->uuid)
+		return -ENOMEM;
+
+	__nd_device_register(&nd_btt->dev);
+
+	return 0;
+}
+
+int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+	int rc;
+	struct device *dev;
+	struct btt_sb *btt_sb;
+	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
+
+	if (ndns->force_raw)
+		return -ENODEV;
+
+	nvdimm_bus_lock(&ndns->dev);
+	dev = __nd_btt_create(nd_region, 0, NULL, ndns);
+	nvdimm_bus_unlock(&ndns->dev);
+	if (!dev)
+		return -ENOMEM;
+	dev_set_drvdata(dev, drvdata);
+	btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL);
+	rc = __nd_btt_probe(to_nd_btt(dev), ndns, btt_sb);
+	kfree(btt_sb);
+	dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__,
+			rc == 0 ? dev_name(dev) : "<none>");
+	if (rc < 0) {
+		__nd_btt_detach_ndns(to_nd_btt(dev));
+		put_device(dev);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(nd_btt_probe);
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
new file mode 100644
index 000000000000..8eb22c0ca7ce
--- /dev/null
+++ b/drivers/nvdimm/bus.c
@@ -0,0 +1,730 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/fcntl.h>
+#include <linux/async.h>
+#include <linux/genhd.h>
+#include <linux/ndctl.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/nd.h>
+#include "nd-core.h"
+#include "nd.h"
+
+int nvdimm_major;
+static int nvdimm_bus_major;
+static struct class *nd_class;
+
+static int to_nd_device_type(struct device *dev)
+{
+	if (is_nvdimm(dev))
+		return ND_DEVICE_DIMM;
+	else if (is_nd_pmem(dev))
+		return ND_DEVICE_REGION_PMEM;
+	else if (is_nd_blk(dev))
+		return ND_DEVICE_REGION_BLK;
+	else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
+		return nd_region_to_nstype(to_nd_region(dev->parent));
+
+	return 0;
+}
+
+static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	/*
+	 * Ensure that region devices always have their numa node set as
+	 * early as possible.
+	 */
+	if (is_nd_pmem(dev) || is_nd_blk(dev))
+		set_dev_node(dev, to_nd_region(dev)->numa_node);
+	return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
+			to_nd_device_type(dev));
+}
+
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
+
+	return test_bit(to_nd_device_type(dev), &nd_drv->type);
+}
+
+static struct module *to_bus_provider(struct device *dev)
+{
+	/* pin bus providers while regions are enabled */
+	if (is_nd_pmem(dev) || is_nd_blk(dev)) {
+		struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+		return nvdimm_bus->module;
+	}
+	return NULL;
+}
+
+static void nvdimm_bus_probe_start(struct nvdimm_bus *nvdimm_bus)
+{
+	nvdimm_bus_lock(&nvdimm_bus->dev);
+	nvdimm_bus->probe_active++;
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+
+static void nvdimm_bus_probe_end(struct nvdimm_bus *nvdimm_bus)
+{
+	nvdimm_bus_lock(&nvdimm_bus->dev);
+	if (--nvdimm_bus->probe_active == 0)
+		wake_up(&nvdimm_bus->probe_wait);
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+
+static int nvdimm_bus_probe(struct device *dev)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct module *provider = to_bus_provider(dev);
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	int rc;
+
+	if (!try_module_get(provider))
+		return -ENXIO;
+
+	nvdimm_bus_probe_start(nvdimm_bus);
+	rc = nd_drv->probe(dev);
+	if (rc == 0)
+		nd_region_probe_success(nvdimm_bus, dev);
+	else
+		nd_region_disable(nvdimm_bus, dev);
+	nvdimm_bus_probe_end(nvdimm_bus);
+
+	dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
+			dev_name(dev), rc);
+
+	if (rc != 0)
+		module_put(provider);
+	return rc;
+}
+
+static int nvdimm_bus_remove(struct device *dev)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct module *provider = to_bus_provider(dev);
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	int rc;
+
+	rc = nd_drv->remove(dev);
+	nd_region_disable(nvdimm_bus, dev);
+
+	dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
+			dev_name(dev), rc);
+	module_put(provider);
+	return rc;
+}
+
+static struct bus_type nvdimm_bus_type = {
+	.name = "nd",
+	.uevent = nvdimm_bus_uevent,
+	.match = nvdimm_bus_match,
+	.probe = nvdimm_bus_probe,
+	.remove = nvdimm_bus_remove,
+};
+
+static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
+
+void nd_synchronize(void)
+{
+	async_synchronize_full_domain(&nd_async_domain);
+}
+EXPORT_SYMBOL_GPL(nd_synchronize);
+
+static void nd_async_device_register(void *d, async_cookie_t cookie)
+{
+	struct device *dev = d;
+
+	if (device_add(dev) != 0) {
+		dev_err(dev, "%s: failed\n", __func__);
+		put_device(dev);
+	}
+	put_device(dev);
+}
+
+static void nd_async_device_unregister(void *d, async_cookie_t cookie)
+{
+	struct device *dev = d;
+
+	/* flush bus operations before delete */
+	nvdimm_bus_lock(dev);
+	nvdimm_bus_unlock(dev);
+
+	device_unregister(dev);
+	put_device(dev);
+}
+
+void __nd_device_register(struct device *dev)
+{
+	dev->bus = &nvdimm_bus_type;
+	get_device(dev);
+	async_schedule_domain(nd_async_device_register, dev,
+			&nd_async_domain);
+}
+
+void nd_device_register(struct device *dev)
+{
+	device_initialize(dev);
+	__nd_device_register(dev);
+}
+EXPORT_SYMBOL(nd_device_register);
+
+void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
+{
+	switch (mode) {
+	case ND_ASYNC:
+		get_device(dev);
+		async_schedule_domain(nd_async_device_unregister, dev,
+				&nd_async_domain);
+		break;
+	case ND_SYNC:
+		nd_synchronize();
+		device_unregister(dev);
+		break;
+	}
+}
+EXPORT_SYMBOL(nd_device_unregister);
+
+/**
+ * __nd_driver_register() - register a region or a namespace driver
+ * @nd_drv: driver to register
+ * @owner: automatically set by nd_driver_register() macro
+ * @mod_name: automatically set by nd_driver_register() macro
+ */
+int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
+		const char *mod_name)
+{
+	struct device_driver *drv = &nd_drv->drv;
+
+	if (!nd_drv->type) {
+		pr_debug("driver type bitmask not set (%pf)\n",
+				__builtin_return_address(0));
+		return -EINVAL;
+	}
+
+	if (!nd_drv->probe || !nd_drv->remove) {
+		pr_debug("->probe() and ->remove() must be specified\n");
+		return -EINVAL;
+	}
+
+	drv->bus = &nvdimm_bus_type;
+	drv->owner = owner;
+	drv->mod_name = mod_name;
+
+	return driver_register(drv);
+}
+EXPORT_SYMBOL(__nd_driver_register);
+
+int nvdimm_revalidate_disk(struct gendisk *disk)
+{
+	struct device *dev = disk->driverfs_dev;
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	const char *pol = nd_region->ro ? "only" : "write";
+
+	if (nd_region->ro == get_disk_ro(disk))
+		return 0;
+
+	dev_info(dev, "%s read-%s, marking %s read-%s\n",
+			dev_name(&nd_region->dev), pol, disk->disk_name, pol);
+	set_disk_ro(disk, nd_region->ro);
+
+	return 0;
+
+}
+EXPORT_SYMBOL(nvdimm_revalidate_disk);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	return sprintf(buf, ND_DEVICE_MODALIAS_FMT "\n",
+			to_nd_device_type(dev));
+}
+static DEVICE_ATTR_RO(modalias);
+
+static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	return sprintf(buf, "%s\n", dev->type->name);
+}
+static DEVICE_ATTR_RO(devtype);
+
+static struct attribute *nd_device_attributes[] = {
+	&dev_attr_modalias.attr,
+	&dev_attr_devtype.attr,
+	NULL,
+};
+
+/**
+ * nd_device_attribute_group - generic attributes for all devices on an nd bus
+ */
+struct attribute_group nd_device_attribute_group = {
+	.attrs = nd_device_attributes,
+};
+EXPORT_SYMBOL_GPL(nd_device_attribute_group);
+
+static ssize_t numa_node_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", dev_to_node(dev));
+}
+static DEVICE_ATTR_RO(numa_node);
+
+static struct attribute *nd_numa_attributes[] = {
+	&dev_attr_numa_node.attr,
+	NULL,
+};
+
+static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a,
+		int n)
+{
+	if (!IS_ENABLED(CONFIG_NUMA))
+		return 0;
+
+	return a->mode;
+}
+
+/**
+ * nd_numa_attribute_group - NUMA attributes for all devices on an nd bus
+ */
+struct attribute_group nd_numa_attribute_group = {
+	.attrs = nd_numa_attributes,
+	.is_visible = nd_numa_attr_visible,
+};
+EXPORT_SYMBOL_GPL(nd_numa_attribute_group);
+
+int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
+{
+	dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id);
+	struct device *dev;
+
+	dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
+			"ndctl%d", nvdimm_bus->id);
+
+	if (IS_ERR(dev)) {
+		dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
+				nvdimm_bus->id, PTR_ERR(dev));
+		return PTR_ERR(dev);
+	}
+	return 0;
+}
+
+void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
+{
+	device_destroy(nd_class, MKDEV(nvdimm_bus_major, nvdimm_bus->id));
+}
+
+static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = {
+	[ND_CMD_IMPLEMENTED] = { },
+	[ND_CMD_SMART] = {
+		.out_num = 2,
+		.out_sizes = { 4, 8, },
+	},
+	[ND_CMD_SMART_THRESHOLD] = {
+		.out_num = 2,
+		.out_sizes = { 4, 8, },
+	},
+	[ND_CMD_DIMM_FLAGS] = {
+		.out_num = 2,
+		.out_sizes = { 4, 4 },
+	},
+	[ND_CMD_GET_CONFIG_SIZE] = {
+		.out_num = 3,
+		.out_sizes = { 4, 4, 4, },
+	},
+	[ND_CMD_GET_CONFIG_DATA] = {
+		.in_num = 2,
+		.in_sizes = { 4, 4, },
+		.out_num = 2,
+		.out_sizes = { 4, UINT_MAX, },
+	},
+	[ND_CMD_SET_CONFIG_DATA] = {
+		.in_num = 3,
+		.in_sizes = { 4, 4, UINT_MAX, },
+		.out_num = 1,
+		.out_sizes = { 4, },
+	},
+	[ND_CMD_VENDOR] = {
+		.in_num = 3,
+		.in_sizes = { 4, 4, UINT_MAX, },
+		.out_num = 3,
+		.out_sizes = { 4, 4, UINT_MAX, },
+	},
+};
+
+const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd)
+{
+	if (cmd < ARRAY_SIZE(__nd_cmd_dimm_descs))
+		return &__nd_cmd_dimm_descs[cmd];
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_dimm_desc);
+
+static const struct nd_cmd_desc __nd_cmd_bus_descs[] = {
+	[ND_CMD_IMPLEMENTED] = { },
+	[ND_CMD_ARS_CAP] = {
+		.in_num = 2,
+		.in_sizes = { 8, 8, },
+		.out_num = 2,
+		.out_sizes = { 4, 4, },
+	},
+	[ND_CMD_ARS_START] = {
+		.in_num = 4,
+		.in_sizes = { 8, 8, 2, 6, },
+		.out_num = 1,
+		.out_sizes = { 4, },
+	},
+	[ND_CMD_ARS_STATUS] = {
+		.out_num = 2,
+		.out_sizes = { 4, UINT_MAX, },
+	},
+};
+
+const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd)
+{
+	if (cmd < ARRAY_SIZE(__nd_cmd_bus_descs))
+		return &__nd_cmd_bus_descs[cmd];
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_bus_desc);
+
+u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, void *buf)
+{
+	if (idx >= desc->in_num)
+		return UINT_MAX;
+
+	if (desc->in_sizes[idx] < UINT_MAX)
+		return desc->in_sizes[idx];
+
+	if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA && idx == 2) {
+		struct nd_cmd_set_config_hdr *hdr = buf;
+
+		return hdr->in_length;
+	} else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) {
+		struct nd_cmd_vendor_hdr *hdr = buf;
+
+		return hdr->in_length;
+	}
+
+	return UINT_MAX;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_in_size);
+
+u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
+		const u32 *out_field)
+{
+	if (idx >= desc->out_num)
+		return UINT_MAX;
+
+	if (desc->out_sizes[idx] < UINT_MAX)
+		return desc->out_sizes[idx];
+
+	if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && idx == 1)
+		return in_field[1];
+	else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
+		return out_field[1];
+	else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 1)
+		return ND_CMD_ARS_STATUS_MAX;
+
+	return UINT_MAX;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_out_size);
+
+void wait_nvdimm_bus_probe_idle(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	do {
+		if (nvdimm_bus->probe_active == 0)
+			break;
+		nvdimm_bus_unlock(&nvdimm_bus->dev);
+		wait_event(nvdimm_bus->probe_wait,
+				nvdimm_bus->probe_active == 0);
+		nvdimm_bus_lock(&nvdimm_bus->dev);
+	} while (true);
+}
+
+/* set_config requires an idle interleave set */
+static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd)
+{
+	struct nvdimm_bus *nvdimm_bus;
+
+	if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA)
+		return 0;
+
+	nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev);
+	wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev);
+
+	if (atomic_read(&nvdimm->busy))
+		return -EBUSY;
+	return 0;
+}
+
+static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+		int read_only, unsigned int ioctl_cmd, unsigned long arg)
+{
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+	size_t buf_len = 0, in_len = 0, out_len = 0;
+	static char out_env[ND_CMD_MAX_ENVELOPE];
+	static char in_env[ND_CMD_MAX_ENVELOPE];
+	const struct nd_cmd_desc *desc = NULL;
+	unsigned int cmd = _IOC_NR(ioctl_cmd);
+	void __user *p = (void __user *) arg;
+	struct device *dev = &nvdimm_bus->dev;
+	const char *cmd_name, *dimm_name;
+	unsigned long dsm_mask;
+	void *buf;
+	int rc, i;
+
+	if (nvdimm) {
+		desc = nd_cmd_dimm_desc(cmd);
+		cmd_name = nvdimm_cmd_name(cmd);
+		dsm_mask = nvdimm->dsm_mask ? *(nvdimm->dsm_mask) : 0;
+		dimm_name = dev_name(&nvdimm->dev);
+	} else {
+		desc = nd_cmd_bus_desc(cmd);
+		cmd_name = nvdimm_bus_cmd_name(cmd);
+		dsm_mask = nd_desc->dsm_mask;
+		dimm_name = "bus";
+	}
+
+	if (!desc || (desc->out_num + desc->in_num == 0) ||
+			!test_bit(cmd, &dsm_mask))
+		return -ENOTTY;
+
+	/* fail write commands (when read-only) */
+	if (read_only)
+		switch (ioctl_cmd) {
+		case ND_IOCTL_VENDOR:
+		case ND_IOCTL_SET_CONFIG_DATA:
+		case ND_IOCTL_ARS_START:
+			dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
+					nvdimm ? nvdimm_cmd_name(cmd)
+					: nvdimm_bus_cmd_name(cmd));
+			return -EPERM;
+		default:
+			break;
+		}
+
+	/* process an input envelope */
+	for (i = 0; i < desc->in_num; i++) {
+		u32 in_size, copy;
+
+		in_size = nd_cmd_in_size(nvdimm, cmd, desc, i, in_env);
+		if (in_size == UINT_MAX) {
+			dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			return -ENXIO;
+		}
+		if (!access_ok(VERIFY_READ, p + in_len, in_size))
+			return -EFAULT;
+		if (in_len < sizeof(in_env))
+			copy = min_t(u32, sizeof(in_env) - in_len, in_size);
+		else
+			copy = 0;
+		if (copy && copy_from_user(&in_env[in_len], p + in_len, copy))
+			return -EFAULT;
+		in_len += in_size;
+	}
+
+	/* process an output envelope */
+	for (i = 0; i < desc->out_num; i++) {
+		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
+				(u32 *) in_env, (u32 *) out_env);
+		u32 copy;
+
+		if (out_size == UINT_MAX) {
+			dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			return -EFAULT;
+		}
+		if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size))
+			return -EFAULT;
+		if (out_len < sizeof(out_env))
+			copy = min_t(u32, sizeof(out_env) - out_len, out_size);
+		else
+			copy = 0;
+		if (copy && copy_from_user(&out_env[out_len],
+					p + in_len + out_len, copy))
+			return -EFAULT;
+		out_len += out_size;
+	}
+
+	buf_len = out_len + in_len;
+	if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len)))
+		return -EFAULT;
+
+	if (buf_len > ND_IOCTL_MAX_BUFLEN) {
+		dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__,
+				dimm_name, cmd_name, buf_len,
+				ND_IOCTL_MAX_BUFLEN);
+		return -EINVAL;
+	}
+
+	buf = vmalloc(buf_len);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, p, buf_len)) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	nvdimm_bus_lock(&nvdimm_bus->dev);
+	rc = nd_cmd_clear_to_send(nvdimm, cmd);
+	if (rc)
+		goto out_unlock;
+
+	rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len);
+	if (rc < 0)
+		goto out_unlock;
+	if (copy_to_user(p, buf, buf_len))
+		rc = -EFAULT;
+ out_unlock:
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+ out:
+	vfree(buf);
+	return rc;
+}
+
+static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long id = (long) file->private_data;
+	int rc = -ENXIO, read_only;
+	struct nvdimm_bus *nvdimm_bus;
+
+	read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
+		if (nvdimm_bus->id == id) {
+			rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg);
+			break;
+		}
+	}
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	return rc;
+}
+
+static int match_dimm(struct device *dev, void *data)
+{
+	long id = (long) data;
+
+	if (is_nvdimm(dev)) {
+		struct nvdimm *nvdimm = to_nvdimm(dev);
+
+		return nvdimm->id == id;
+	}
+
+	return 0;
+}
+
+static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int rc = -ENXIO, read_only;
+	struct nvdimm_bus *nvdimm_bus;
+
+	read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
+		struct device *dev = device_find_child(&nvdimm_bus->dev,
+				file->private_data, match_dimm);
+		struct nvdimm *nvdimm;
+
+		if (!dev)
+			continue;
+
+		nvdimm = to_nvdimm(dev);
+		rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg);
+		put_device(dev);
+		break;
+	}
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	return rc;
+}
+
+static int nd_open(struct inode *inode, struct file *file)
+{
+	long minor = iminor(inode);
+
+	file->private_data = (void *) minor;
+	return 0;
+}
+
+static const struct file_operations nvdimm_bus_fops = {
+	.owner = THIS_MODULE,
+	.open = nd_open,
+	.unlocked_ioctl = nd_ioctl,
+	.compat_ioctl = nd_ioctl,
+	.llseek = noop_llseek,
+};
+
+static const struct file_operations nvdimm_fops = {
+	.owner = THIS_MODULE,
+	.open = nd_open,
+	.unlocked_ioctl = nvdimm_ioctl,
+	.compat_ioctl = nvdimm_ioctl,
+	.llseek = noop_llseek,
+};
+
+int __init nvdimm_bus_init(void)
+{
+	int rc;
+
+	rc = bus_register(&nvdimm_bus_type);
+	if (rc)
+		return rc;
+
+	rc = register_chrdev(0, "ndctl", &nvdimm_bus_fops);
+	if (rc < 0)
+		goto err_bus_chrdev;
+	nvdimm_bus_major = rc;
+
+	rc = register_chrdev(0, "dimmctl", &nvdimm_fops);
+	if (rc < 0)
+		goto err_dimm_chrdev;
+	nvdimm_major = rc;
+
+	nd_class = class_create(THIS_MODULE, "nd");
+	if (IS_ERR(nd_class))
+		goto err_class;
+
+	return 0;
+
+ err_class:
+	unregister_chrdev(nvdimm_major, "dimmctl");
+ err_dimm_chrdev:
+	unregister_chrdev(nvdimm_bus_major, "ndctl");
+ err_bus_chrdev:
+	bus_unregister(&nvdimm_bus_type);
+
+	return rc;
+}
+
+void nvdimm_bus_exit(void)
+{
+	class_destroy(nd_class);
+	unregister_chrdev(nvdimm_bus_major, "ndctl");
+	unregister_chrdev(nvdimm_major, "dimmctl");
+	bus_unregister(&nvdimm_bus_type);
+}
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
new file mode 100644
index 000000000000..cb62ec6a12d0
--- /dev/null
+++ b/drivers/nvdimm/core.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/libnvdimm.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/device.h>
+#include <linux/ctype.h>
+#include <linux/ndctl.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include "nd-core.h"
+#include "nd.h"
+
+LIST_HEAD(nvdimm_bus_list);
+DEFINE_MUTEX(nvdimm_bus_list_mutex);
+static DEFINE_IDA(nd_ida);
+
+void nvdimm_bus_lock(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return;
+	mutex_lock(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(nvdimm_bus_lock);
+
+void nvdimm_bus_unlock(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return;
+	mutex_unlock(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(nvdimm_bus_unlock);
+
+bool is_nvdimm_bus_locked(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return false;
+	return mutex_is_locked(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(is_nvdimm_bus_locked);
+
+u64 nd_fletcher64(void *addr, size_t len, bool le)
+{
+	u32 *buf = addr;
+	u32 lo32 = 0;
+	u64 hi32 = 0;
+	int i;
+
+	for (i = 0; i < len / sizeof(u32); i++) {
+		lo32 += le ? le32_to_cpu((__le32) buf[i]) : buf[i];
+		hi32 += lo32;
+	}
+
+	return hi32 << 32 | lo32;
+}
+EXPORT_SYMBOL_GPL(nd_fletcher64);
+
+static void nvdimm_bus_release(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus;
+
+	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+	ida_simple_remove(&nd_ida, nvdimm_bus->id);
+	kfree(nvdimm_bus);
+}
+
+struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus;
+
+	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
+	WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
+	return nvdimm_bus;
+}
+EXPORT_SYMBOL_GPL(to_nvdimm_bus);
+
+struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
+{
+	/* struct nvdimm_bus definition is private to libnvdimm */
+	return nvdimm_bus->nd_desc;
+}
+EXPORT_SYMBOL_GPL(to_nd_desc);
+
+struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
+{
+	struct device *dev;
+
+	for (dev = nd_dev; dev; dev = dev->parent)
+		if (dev->release == nvdimm_bus_release)
+			break;
+	dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
+	if (dev)
+		return to_nvdimm_bus(dev);
+	return NULL;
+}
+
+static bool is_uuid_sep(char sep)
+{
+	if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
+		return true;
+	return false;
+}
+
+static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
+		size_t len)
+{
+	const char *str = buf;
+	u8 uuid[16];
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		if (!isxdigit(str[0]) || !isxdigit(str[1])) {
+			dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
+					__func__, i, str - buf, str[0],
+					str + 1 - buf, str[1]);
+			return -EINVAL;
+		}
+
+		uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
+		str += 2;
+		if (is_uuid_sep(*str))
+			str++;
+	}
+
+	memcpy(uuid_out, uuid, sizeof(uuid));
+	return 0;
+}
+
+/**
+ * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
+ * @dev: container device for the uuid property
+ * @uuid_out: uuid buffer to replace
+ * @buf: raw sysfs buffer to parse
+ *
+ * Enforce that uuids can only be changed while the device is disabled
+ * (driver detached)
+ * LOCKING: expects device_lock() is held on entry
+ */
+int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+		size_t len)
+{
+	u8 uuid[16];
+	int rc;
+
+	if (dev->driver)
+		return -EBUSY;
+
+	rc = nd_uuid_parse(dev, uuid, buf, len);
+	if (rc)
+		return rc;
+
+	kfree(*uuid_out);
+	*uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
+	if (!(*uuid_out))
+		return -ENOMEM;
+
+	return 0;
+}
+
+ssize_t nd_sector_size_show(unsigned long current_lbasize,
+		const unsigned long *supported, char *buf)
+{
+	ssize_t len = 0;
+	int i;
+
+	for (i = 0; supported[i]; i++)
+		if (current_lbasize == supported[i])
+			len += sprintf(buf + len, "[%ld] ", supported[i]);
+		else
+			len += sprintf(buf + len, "%ld ", supported[i]);
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+
+ssize_t nd_sector_size_store(struct device *dev, const char *buf,
+		unsigned long *current_lbasize, const unsigned long *supported)
+{
+	unsigned long lbasize;
+	int rc, i;
+
+	if (dev->driver)
+		return -EBUSY;
+
+	rc = kstrtoul(buf, 0, &lbasize);
+	if (rc)
+		return rc;
+
+	for (i = 0; supported[i]; i++)
+		if (lbasize == supported[i])
+			break;
+
+	if (supported[i]) {
+		*current_lbasize = lbasize;
+		return 0;
+	} else {
+		return -EINVAL;
+	}
+}
+
+void __nd_iostat_start(struct bio *bio, unsigned long *start)
+{
+	struct gendisk *disk = bio->bi_bdev->bd_disk;
+	const int rw = bio_data_dir(bio);
+	int cpu = part_stat_lock();
+
+	*start = jiffies;
+	part_round_stats(cpu, &disk->part0);
+	part_stat_inc(cpu, &disk->part0, ios[rw]);
+	part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
+	part_inc_in_flight(&disk->part0, rw);
+	part_stat_unlock();
+}
+EXPORT_SYMBOL(__nd_iostat_start);
+
+void nd_iostat_end(struct bio *bio, unsigned long start)
+{
+	struct gendisk *disk = bio->bi_bdev->bd_disk;
+	unsigned long duration = jiffies - start;
+	const int rw = bio_data_dir(bio);
+	int cpu = part_stat_lock();
+
+	part_stat_add(cpu, &disk->part0, ticks[rw], duration);
+	part_round_stats(cpu, &disk->part0);
+	part_dec_in_flight(&disk->part0, rw);
+	part_stat_unlock();
+}
+EXPORT_SYMBOL(nd_iostat_end);
+
+static ssize_t commands_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	int cmd, len = 0;
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+
+	for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG)
+		len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd));
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+static DEVICE_ATTR_RO(commands);
+
+static const char *nvdimm_bus_provider(struct nvdimm_bus *nvdimm_bus)
+{
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+	struct device *parent = nvdimm_bus->dev.parent;
+
+	if (nd_desc->provider_name)
+		return nd_desc->provider_name;
+	else if (parent)
+		return dev_name(parent);
+	else
+		return "unknown";
+}
+
+static ssize_t provider_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+
+	return sprintf(buf, "%s\n", nvdimm_bus_provider(nvdimm_bus));
+}
+static DEVICE_ATTR_RO(provider);
+
+static int flush_namespaces(struct device *dev, void *data)
+{
+	device_lock(dev);
+	device_unlock(dev);
+	return 0;
+}
+
+static int flush_regions_dimms(struct device *dev, void *data)
+{
+	device_lock(dev);
+	device_unlock(dev);
+	device_for_each_child(dev, NULL, flush_namespaces);
+	return 0;
+}
+
+static ssize_t wait_probe_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	nd_synchronize();
+	device_for_each_child(dev, NULL, flush_regions_dimms);
+	return sprintf(buf, "1\n");
+}
+static DEVICE_ATTR_RO(wait_probe);
+
+static struct attribute *nvdimm_bus_attributes[] = {
+	&dev_attr_commands.attr,
+	&dev_attr_wait_probe.attr,
+	&dev_attr_provider.attr,
+	NULL,
+};
+
+struct attribute_group nvdimm_bus_attribute_group = {
+	.attrs = nvdimm_bus_attributes,
+};
+EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
+
+struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
+		struct nvdimm_bus_descriptor *nd_desc, struct module *module)
+{
+	struct nvdimm_bus *nvdimm_bus;
+	int rc;
+
+	nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
+	if (!nvdimm_bus)
+		return NULL;
+	INIT_LIST_HEAD(&nvdimm_bus->list);
+	init_waitqueue_head(&nvdimm_bus->probe_wait);
+	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
+	mutex_init(&nvdimm_bus->reconfig_mutex);
+	if (nvdimm_bus->id < 0) {
+		kfree(nvdimm_bus);
+		return NULL;
+	}
+	nvdimm_bus->nd_desc = nd_desc;
+	nvdimm_bus->module = module;
+	nvdimm_bus->dev.parent = parent;
+	nvdimm_bus->dev.release = nvdimm_bus_release;
+	nvdimm_bus->dev.groups = nd_desc->attr_groups;
+	dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
+	rc = device_register(&nvdimm_bus->dev);
+	if (rc) {
+		dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
+		goto err;
+	}
+
+	rc = nvdimm_bus_create_ndctl(nvdimm_bus);
+	if (rc)
+		goto err;
+
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	return nvdimm_bus;
+ err:
+	put_device(&nvdimm_bus->dev);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
+
+static int child_unregister(struct device *dev, void *data)
+{
+	/*
+	 * the singular ndctl class device per bus needs to be
+	 * "device_destroy"ed, so skip it here
+	 *
+	 * i.e. remove classless children
+	 */
+	if (dev->class)
+		/* pass */;
+	else
+		nd_device_unregister(dev, ND_SYNC);
+	return 0;
+}
+
+void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
+{
+	if (!nvdimm_bus)
+		return;
+
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_del_init(&nvdimm_bus->list);
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	nd_synchronize();
+	device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
+	nvdimm_bus_destroy_ndctl(nvdimm_bus);
+
+	device_unregister(&nvdimm_bus->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int nd_pi_nop_generate_verify(struct blk_integrity_iter *iter)
+{
+	return 0;
+}
+
+int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
+{
+	struct blk_integrity integrity = {
+		.name = "ND-PI-NOP",
+		.generate_fn = nd_pi_nop_generate_verify,
+		.verify_fn = nd_pi_nop_generate_verify,
+		.tuple_size = meta_size,
+		.tag_size = meta_size,
+	};
+	int ret;
+
+	if (meta_size == 0)
+		return 0;
+
+	ret = blk_integrity_register(disk, &integrity);
+	if (ret)
+		return ret;
+
+	blk_queue_max_integrity_segments(disk->queue, 1);
+
+	return 0;
+}
+EXPORT_SYMBOL(nd_integrity_init);
+
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
+{
+	return 0;
+}
+EXPORT_SYMBOL(nd_integrity_init);
+
+#endif
+
+static __init int libnvdimm_init(void)
+{
+	int rc;
+
+	rc = nvdimm_bus_init();
+	if (rc)
+		return rc;
+	rc = nvdimm_init();
+	if (rc)
+		goto err_dimm;
+	rc = nd_region_init();
+	if (rc)
+		goto err_region;
+	return 0;
+ err_region:
+	nvdimm_exit();
+ err_dimm:
+	nvdimm_bus_exit();
+	return rc;
+}
+
+static __exit void libnvdimm_exit(void)
+{
+	WARN_ON(!list_empty(&nvdimm_bus_list));
+	nd_region_exit();
+	nvdimm_exit();
+	nvdimm_bus_exit();
+}
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
+subsys_initcall(libnvdimm_init);
+module_exit(libnvdimm_exit);
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
new file mode 100644
index 000000000000..71d12bb67339
--- /dev/null
+++ b/drivers/nvdimm/dimm.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sizes.h>
+#include <linux/ndctl.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/nd.h>
+#include "label.h"
+#include "nd.h"
+
+static int nvdimm_probe(struct device *dev)
+{
+	struct nvdimm_drvdata *ndd;
+	int rc;
+
+	ndd = kzalloc(sizeof(*ndd), GFP_KERNEL);
+	if (!ndd)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, ndd);
+	ndd->dpa.name = dev_name(dev);
+	ndd->ns_current = -1;
+	ndd->ns_next = -1;
+	ndd->dpa.start = 0;
+	ndd->dpa.end = -1;
+	ndd->dev = dev;
+	get_device(dev);
+	kref_init(&ndd->kref);
+
+	rc = nvdimm_init_nsarea(ndd);
+	if (rc)
+		goto err;
+
+	rc = nvdimm_init_config_data(ndd);
+	if (rc)
+		goto err;
+
+	dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size);
+
+	nvdimm_bus_lock(dev);
+	ndd->ns_current = nd_label_validate(ndd);
+	ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
+	nd_label_copy(ndd, to_next_namespace_index(ndd),
+			to_current_namespace_index(ndd));
+	rc = nd_label_reserve_dpa(ndd);
+	nvdimm_bus_unlock(dev);
+
+	if (rc)
+		goto err;
+
+	return 0;
+
+ err:
+	put_ndd(ndd);
+	return rc;
+}
+
+static int nvdimm_remove(struct device *dev)
+{
+	struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
+
+	nvdimm_bus_lock(dev);
+	dev_set_drvdata(dev, NULL);
+	nvdimm_bus_unlock(dev);
+	put_ndd(ndd);
+
+	return 0;
+}
+
+static struct nd_device_driver nvdimm_driver = {
+	.probe = nvdimm_probe,
+	.remove = nvdimm_remove,
+	.drv = {
+		.name = "nvdimm",
+	},
+	.type = ND_DRIVER_DIMM,
+};
+
+int __init nvdimm_init(void)
+{
+	return nd_driver_register(&nvdimm_driver);
+}
+
+void nvdimm_exit(void)
+{
+	driver_unregister(&nvdimm_driver.drv);
+}
+
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DIMM);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
new file mode 100644
index 000000000000..c05eb807d674
--- /dev/null
+++ b/drivers/nvdimm/dimm_devs.c
@@ -0,0 +1,551 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/vmalloc.h>
+#include <linux/device.h>
+#include <linux/ndctl.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-core.h"
+#include "label.h"
+#include "nd.h"
+
+static DEFINE_IDA(dimm_ida);
+
+/*
+ * Retrieve bus and dimm handle and return if this bus supports
+ * get_config_data commands
+ */
+static int __validate_dimm(struct nvdimm_drvdata *ndd)
+{
+	struct nvdimm *nvdimm;
+
+	if (!ndd)
+		return -EINVAL;
+
+	nvdimm = to_nvdimm(ndd->dev);
+
+	if (!nvdimm->dsm_mask)
+		return -ENXIO;
+	if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask))
+		return -ENXIO;
+
+	return 0;
+}
+
+static int validate_dimm(struct nvdimm_drvdata *ndd)
+{
+	int rc = __validate_dimm(ndd);
+
+	if (rc && ndd)
+		dev_dbg(ndd->dev, "%pf: %s error: %d\n",
+				__builtin_return_address(0), __func__, rc);
+	return rc;
+}
+
+/**
+ * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area
+ * @nvdimm: dimm to initialize
+ */
+int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
+{
+	struct nd_cmd_get_config_size *cmd = &ndd->nsarea;
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+	struct nvdimm_bus_descriptor *nd_desc;
+	int rc = validate_dimm(ndd);
+
+	if (rc)
+		return rc;
+
+	if (cmd->config_size)
+		return 0; /* already valid */
+
+	memset(cmd, 0, sizeof(*cmd));
+	nd_desc = nvdimm_bus->nd_desc;
+	return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
+			ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd));
+}
+
+int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+	struct nd_cmd_get_config_data_hdr *cmd;
+	struct nvdimm_bus_descriptor *nd_desc;
+	int rc = validate_dimm(ndd);
+	u32 max_cmd_size, config_size;
+	size_t offset;
+
+	if (rc)
+		return rc;
+
+	if (ndd->data)
+		return 0;
+
+	if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0
+			|| ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) {
+		dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n",
+				ndd->nsarea.max_xfer, ndd->nsarea.config_size);
+		return -ENXIO;
+	}
+
+	ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL);
+	if (!ndd->data)
+		ndd->data = vmalloc(ndd->nsarea.config_size);
+
+	if (!ndd->data)
+		return -ENOMEM;
+
+	max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer);
+	cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	nd_desc = nvdimm_bus->nd_desc;
+	for (config_size = ndd->nsarea.config_size, offset = 0;
+			config_size; config_size -= cmd->in_length,
+			offset += cmd->in_length) {
+		cmd->in_length = min(config_size, max_cmd_size);
+		cmd->in_offset = offset;
+		rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
+				ND_CMD_GET_CONFIG_DATA, cmd,
+				cmd->in_length + sizeof(*cmd));
+		if (rc || cmd->status) {
+			rc = -ENXIO;
+			break;
+		}
+		memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
+	}
+	dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc);
+	kfree(cmd);
+
+	return rc;
+}
+
+int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
+		void *buf, size_t len)
+{
+	int rc = validate_dimm(ndd);
+	size_t max_cmd_size, buf_offset;
+	struct nd_cmd_set_config_hdr *cmd;
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+
+	if (rc)
+		return rc;
+
+	if (!ndd->data)
+		return -ENXIO;
+
+	if (offset + len > ndd->nsarea.config_size)
+		return -ENXIO;
+
+	max_cmd_size = min_t(u32, PAGE_SIZE, len);
+	max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer);
+	cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	for (buf_offset = 0; len; len -= cmd->in_length,
+			buf_offset += cmd->in_length) {
+		size_t cmd_size;
+		u32 *status;
+
+		cmd->in_offset = offset + buf_offset;
+		cmd->in_length = min(max_cmd_size, len);
+		memcpy(cmd->in_buf, buf + buf_offset, cmd->in_length);
+
+		/* status is output in the last 4-bytes of the command buffer */
+		cmd_size = sizeof(*cmd) + cmd->in_length + sizeof(u32);
+		status = ((void *) cmd) + cmd_size - sizeof(u32);
+
+		rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
+				ND_CMD_SET_CONFIG_DATA, cmd, cmd_size);
+		if (rc || *status) {
+			rc = rc ? rc : -ENXIO;
+			break;
+		}
+	}
+	kfree(cmd);
+
+	return rc;
+}
+
+static void nvdimm_release(struct device *dev)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+
+	ida_simple_remove(&dimm_ida, nvdimm->id);
+	kfree(nvdimm);
+}
+
+static struct device_type nvdimm_device_type = {
+	.name = "nvdimm",
+	.release = nvdimm_release,
+};
+
+bool is_nvdimm(struct device *dev)
+{
+	return dev->type == &nvdimm_device_type;
+}
+
+struct nvdimm *to_nvdimm(struct device *dev)
+{
+	struct nvdimm *nvdimm = container_of(dev, struct nvdimm, dev);
+
+	WARN_ON(!is_nvdimm(dev));
+	return nvdimm;
+}
+EXPORT_SYMBOL_GPL(to_nvdimm);
+
+struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr)
+{
+	struct nd_region *nd_region = &ndbr->nd_region;
+	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+
+	return nd_mapping->nvdimm;
+}
+EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm);
+
+struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
+{
+	struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev));
+
+	return dev_get_drvdata(&nvdimm->dev);
+}
+EXPORT_SYMBOL(to_ndd);
+
+void nvdimm_drvdata_release(struct kref *kref)
+{
+	struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref);
+	struct device *dev = ndd->dev;
+	struct resource *res, *_r;
+
+	dev_dbg(dev, "%s\n", __func__);
+
+	nvdimm_bus_lock(dev);
+	for_each_dpa_resource_safe(ndd, res, _r)
+		nvdimm_free_dpa(ndd, res);
+	nvdimm_bus_unlock(dev);
+
+	if (ndd->data && is_vmalloc_addr(ndd->data))
+		vfree(ndd->data);
+	else
+		kfree(ndd->data);
+	kfree(ndd);
+	put_device(dev);
+}
+
+void get_ndd(struct nvdimm_drvdata *ndd)
+{
+	kref_get(&ndd->kref);
+}
+
+void put_ndd(struct nvdimm_drvdata *ndd)
+{
+	if (ndd)
+		kref_put(&ndd->kref, nvdimm_drvdata_release);
+}
+
+const char *nvdimm_name(struct nvdimm *nvdimm)
+{
+	return dev_name(&nvdimm->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_name);
+
+void *nvdimm_provider_data(struct nvdimm *nvdimm)
+{
+	if (nvdimm)
+		return nvdimm->provider_data;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nvdimm_provider_data);
+
+static ssize_t commands_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	int cmd, len = 0;
+
+	if (!nvdimm->dsm_mask)
+		return sprintf(buf, "\n");
+
+	for_each_set_bit(cmd, nvdimm->dsm_mask, BITS_PER_LONG)
+		len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd));
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+static DEVICE_ATTR_RO(commands);
+
+static ssize_t state_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+
+	/*
+	 * The state may be in the process of changing, userspace should
+	 * quiesce probing if it wants a static answer
+	 */
+	nvdimm_bus_lock(dev);
+	nvdimm_bus_unlock(dev);
+	return sprintf(buf, "%s\n", atomic_read(&nvdimm->busy)
+			? "active" : "idle");
+}
+static DEVICE_ATTR_RO(state);
+
+static ssize_t available_slots_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
+	ssize_t rc;
+	u32 nfree;
+
+	if (!ndd)
+		return -ENXIO;
+
+	nvdimm_bus_lock(dev);
+	nfree = nd_label_nfree(ndd);
+	if (nfree - 1 > nfree) {
+		dev_WARN_ONCE(dev, 1, "we ate our last label?\n");
+		nfree = 0;
+	} else
+		nfree--;
+	rc = sprintf(buf, "%d\n", nfree);
+	nvdimm_bus_unlock(dev);
+	return rc;
+}
+static DEVICE_ATTR_RO(available_slots);
+
+static struct attribute *nvdimm_attributes[] = {
+	&dev_attr_state.attr,
+	&dev_attr_commands.attr,
+	&dev_attr_available_slots.attr,
+	NULL,
+};
+
+struct attribute_group nvdimm_attribute_group = {
+	.attrs = nvdimm_attributes,
+};
+EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
+
+struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
+		const struct attribute_group **groups, unsigned long flags,
+		unsigned long *dsm_mask)
+{
+	struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
+	struct device *dev;
+
+	if (!nvdimm)
+		return NULL;
+
+	nvdimm->id = ida_simple_get(&dimm_ida, 0, 0, GFP_KERNEL);
+	if (nvdimm->id < 0) {
+		kfree(nvdimm);
+		return NULL;
+	}
+	nvdimm->provider_data = provider_data;
+	nvdimm->flags = flags;
+	nvdimm->dsm_mask = dsm_mask;
+	atomic_set(&nvdimm->busy, 0);
+	dev = &nvdimm->dev;
+	dev_set_name(dev, "nmem%d", nvdimm->id);
+	dev->parent = &nvdimm_bus->dev;
+	dev->type = &nvdimm_device_type;
+	dev->devt = MKDEV(nvdimm_major, nvdimm->id);
+	dev->groups = groups;
+	nd_device_register(dev);
+
+	return nvdimm;
+}
+EXPORT_SYMBOL_GPL(nvdimm_create);
+
+/**
+ * nd_blk_available_dpa - account the unused dpa of BLK region
+ * @nd_mapping: container of dpa-resource-root + labels
+ *
+ * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges.
+ */
+resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping)
+{
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	resource_size_t map_end, busy = 0, available;
+	struct resource *res;
+
+	if (!ndd)
+		return 0;
+
+	map_end = nd_mapping->start + nd_mapping->size - 1;
+	for_each_dpa_resource(ndd, res)
+		if (res->start >= nd_mapping->start && res->start < map_end) {
+			resource_size_t end = min(map_end, res->end);
+
+			busy += end - res->start + 1;
+		} else if (res->end >= nd_mapping->start
+				&& res->end <= map_end) {
+			busy += res->end - nd_mapping->start;
+		} else if (nd_mapping->start > res->start
+				&& nd_mapping->start < res->end) {
+			/* total eclipse of the BLK region mapping */
+			busy += nd_mapping->size;
+		}
+
+	available = map_end - nd_mapping->start + 1;
+	if (busy < available)
+		return available - busy;
+	return 0;
+}
+
+/**
+ * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa
+ * @nd_mapping: container of dpa-resource-root + labels
+ * @nd_region: constrain available space check to this reference region
+ * @overlap: calculate available space assuming this level of overlap
+ *
+ * Validate that a PMEM label, if present, aligns with the start of an
+ * interleave set and truncate the available size at the lowest BLK
+ * overlap point.
+ *
+ * The expectation is that this routine is called multiple times as it
+ * probes for the largest BLK encroachment for any single member DIMM of
+ * the interleave set.  Once that value is determined the PMEM-limit for
+ * the set can be established.
+ */
+resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, resource_size_t *overlap)
+{
+	resource_size_t map_start, map_end, busy = 0, available, blk_start;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct resource *res;
+	const char *reason;
+
+	if (!ndd)
+		return 0;
+
+	map_start = nd_mapping->start;
+	map_end = map_start + nd_mapping->size - 1;
+	blk_start = max(map_start, map_end + 1 - *overlap);
+	for_each_dpa_resource(ndd, res)
+		if (res->start >= map_start && res->start < map_end) {
+			if (strncmp(res->name, "blk", 3) == 0)
+				blk_start = min(blk_start, res->start);
+			else if (res->start != map_start) {
+				reason = "misaligned to iset";
+				goto err;
+			} else {
+				if (busy) {
+					reason = "duplicate overlapping PMEM reservations?";
+					goto err;
+				}
+				busy += resource_size(res);
+				continue;
+			}
+		} else if (res->end >= map_start && res->end <= map_end) {
+			if (strncmp(res->name, "blk", 3) == 0) {
+				/*
+				 * If a BLK allocation overlaps the start of
+				 * PMEM the entire interleave set may now only
+				 * be used for BLK.
+				 */
+				blk_start = map_start;
+			} else {
+				reason = "misaligned to iset";
+				goto err;
+			}
+		} else if (map_start > res->start && map_start < res->end) {
+			/* total eclipse of the mapping */
+			busy += nd_mapping->size;
+			blk_start = map_start;
+		}
+
+	*overlap = map_end + 1 - blk_start;
+	available = blk_start - map_start;
+	if (busy < available)
+		return available - busy;
+	return 0;
+
+ err:
+	/*
+	 * Something is wrong, PMEM must align with the start of the
+	 * interleave set, and there can only be one allocation per set.
+	 */
+	nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
+	return 0;
+}
+
+void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res)
+{
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
+	kfree(res->name);
+	__release_region(&ndd->dpa, res->start, resource_size(res));
+}
+
+struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id, resource_size_t start,
+		resource_size_t n)
+{
+	char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL);
+	struct resource *res;
+
+	if (!name)
+		return NULL;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
+	res = __request_region(&ndd->dpa, start, n, name, 0);
+	if (!res)
+		kfree(name);
+	return res;
+}
+
+/**
+ * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id
+ * @nvdimm: container of dpa-resource-root + labels
+ * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid>
+ */
+resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id)
+{
+	resource_size_t allocated = 0;
+	struct resource *res;
+
+	for_each_dpa_resource(ndd, res)
+		if (strcmp(res->name, label_id->id) == 0)
+			allocated += resource_size(res);
+
+	return allocated;
+}
+
+static int count_dimms(struct device *dev, void *c)
+{
+	int *count = c;
+
+	if (is_nvdimm(dev))
+		(*count)++;
+	return 0;
+}
+
+int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
+{
+	int count = 0;
+	/* Flush any possible dimm registration failures */
+	nd_synchronize();
+
+	device_for_each_child(&nvdimm_bus->dev, &count, count_dimms);
+	dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count);
+	if (count != dimm_count)
+		return -ENXIO;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count);
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
new file mode 100644
index 000000000000..96526dcfdd37
--- /dev/null
+++ b/drivers/nvdimm/label.c
@@ -0,0 +1,927 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/ndctl.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/nd.h>
+#include "nd-core.h"
+#include "label.h"
+#include "nd.h"
+
+static u32 best_seq(u32 a, u32 b)
+{
+	a &= NSINDEX_SEQ_MASK;
+	b &= NSINDEX_SEQ_MASK;
+
+	if (a == 0 || a == b)
+		return b;
+	else if (b == 0)
+		return a;
+	else if (nd_inc_seq(a) == b)
+		return b;
+	else
+		return a;
+}
+
+size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
+{
+	u32 index_span;
+
+	if (ndd->nsindex_size)
+		return ndd->nsindex_size;
+
+	/*
+	 * The minimum index space is 512 bytes, with that amount of
+	 * index we can describe ~1400 labels which is less than a byte
+	 * of overhead per label.  Round up to a byte of overhead per
+	 * label and determine the size of the index region.  Yes, this
+	 * starts to waste space at larger config_sizes, but it's
+	 * unlikely we'll ever see anything but 128K.
+	 */
+	index_span = ndd->nsarea.config_size / 129;
+	index_span /= NSINDEX_ALIGN * 2;
+	ndd->nsindex_size = index_span * NSINDEX_ALIGN;
+
+	return ndd->nsindex_size;
+}
+
+int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
+{
+	return ndd->nsarea.config_size / 129;
+}
+
+int nd_label_validate(struct nvdimm_drvdata *ndd)
+{
+	/*
+	 * On media label format consists of two index blocks followed
+	 * by an array of labels.  None of these structures are ever
+	 * updated in place.  A sequence number tracks the current
+	 * active index and the next one to write, while labels are
+	 * written to free slots.
+	 *
+	 *     +------------+
+	 *     |            |
+	 *     |  nsindex0  |
+	 *     |            |
+	 *     +------------+
+	 *     |            |
+	 *     |  nsindex1  |
+	 *     |            |
+	 *     +------------+
+	 *     |   label0   |
+	 *     +------------+
+	 *     |   label1   |
+	 *     +------------+
+	 *     |            |
+	 *      ....nslot...
+	 *     |            |
+	 *     +------------+
+	 *     |   labelN   |
+	 *     +------------+
+	 */
+	struct nd_namespace_index *nsindex[] = {
+		to_namespace_index(ndd, 0),
+		to_namespace_index(ndd, 1),
+	};
+	const int num_index = ARRAY_SIZE(nsindex);
+	struct device *dev = ndd->dev;
+	bool valid[2] = { 0 };
+	int i, num_valid = 0;
+	u32 seq;
+
+	for (i = 0; i < num_index; i++) {
+		u32 nslot;
+		u8 sig[NSINDEX_SIG_LEN];
+		u64 sum_save, sum, size;
+
+		memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
+		if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
+			dev_dbg(dev, "%s: nsindex%d signature invalid\n",
+					__func__, i);
+			continue;
+		}
+		sum_save = __le64_to_cpu(nsindex[i]->checksum);
+		nsindex[i]->checksum = __cpu_to_le64(0);
+		sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
+		nsindex[i]->checksum = __cpu_to_le64(sum_save);
+		if (sum != sum_save) {
+			dev_dbg(dev, "%s: nsindex%d checksum invalid\n",
+					__func__, i);
+			continue;
+		}
+
+		seq = __le32_to_cpu(nsindex[i]->seq);
+		if ((seq & NSINDEX_SEQ_MASK) == 0) {
+			dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n",
+					__func__, i, seq);
+			continue;
+		}
+
+		/* sanity check the index against expected values */
+		if (__le64_to_cpu(nsindex[i]->myoff)
+				!= i * sizeof_namespace_index(ndd)) {
+			dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n",
+					__func__, i, (unsigned long long)
+					__le64_to_cpu(nsindex[i]->myoff));
+			continue;
+		}
+		if (__le64_to_cpu(nsindex[i]->otheroff)
+				!= (!i) * sizeof_namespace_index(ndd)) {
+			dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n",
+					__func__, i, (unsigned long long)
+					__le64_to_cpu(nsindex[i]->otheroff));
+			continue;
+		}
+
+		size = __le64_to_cpu(nsindex[i]->mysize);
+		if (size > sizeof_namespace_index(ndd)
+				|| size < sizeof(struct nd_namespace_index)) {
+			dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n",
+					__func__, i, size);
+			continue;
+		}
+
+		nslot = __le32_to_cpu(nsindex[i]->nslot);
+		if (nslot * sizeof(struct nd_namespace_label)
+				+ 2 * sizeof_namespace_index(ndd)
+				> ndd->nsarea.config_size) {
+			dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
+					__func__, i, nslot,
+					ndd->nsarea.config_size);
+			continue;
+		}
+		valid[i] = true;
+		num_valid++;
+	}
+
+	switch (num_valid) {
+	case 0:
+		break;
+	case 1:
+		for (i = 0; i < num_index; i++)
+			if (valid[i])
+				return i;
+		/* can't have num_valid > 0 but valid[] = { false, false } */
+		WARN_ON(1);
+		break;
+	default:
+		/* pick the best index... */
+		seq = best_seq(__le32_to_cpu(nsindex[0]->seq),
+				__le32_to_cpu(nsindex[1]->seq));
+		if (seq == (__le32_to_cpu(nsindex[1]->seq) & NSINDEX_SEQ_MASK))
+			return 1;
+		else
+			return 0;
+		break;
+	}
+
+	return -1;
+}
+
+void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
+		struct nd_namespace_index *src)
+{
+	if (dst && src)
+		/* pass */;
+	else
+		return;
+
+	memcpy(dst, src, sizeof_namespace_index(ndd));
+}
+
+static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd)
+{
+	void *base = to_namespace_index(ndd, 0);
+
+	return base + 2 * sizeof_namespace_index(ndd);
+}
+
+static int to_slot(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_label *nd_label)
+{
+	return nd_label - nd_label_base(ndd);
+}
+
+#define for_each_clear_bit_le(bit, addr, size) \
+	for ((bit) = find_next_zero_bit_le((addr), (size), 0);  \
+	     (bit) < (size);                                    \
+	     (bit) = find_next_zero_bit_le((addr), (size), (bit) + 1))
+
+/**
+ * preamble_index - common variable initialization for nd_label_* routines
+ * @ndd: dimm container for the relevant label set
+ * @idx: namespace_index index
+ * @nsindex_out: on return set to the currently active namespace index
+ * @free: on return set to the free label bitmap in the index
+ * @nslot: on return set to the number of slots in the label space
+ */
+static bool preamble_index(struct nvdimm_drvdata *ndd, int idx,
+		struct nd_namespace_index **nsindex_out,
+		unsigned long **free, u32 *nslot)
+{
+	struct nd_namespace_index *nsindex;
+
+	nsindex = to_namespace_index(ndd, idx);
+	if (nsindex == NULL)
+		return false;
+
+	*free = (unsigned long *) nsindex->free;
+	*nslot = __le32_to_cpu(nsindex->nslot);
+	*nsindex_out = nsindex;
+
+	return true;
+}
+
+char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
+{
+	if (!label_id || !uuid)
+		return NULL;
+	snprintf(label_id->id, ND_LABEL_ID_SIZE, "%s-%pUb",
+			flags & NSLABEL_FLAG_LOCAL ? "blk" : "pmem", uuid);
+	return label_id->id;
+}
+
+static bool preamble_current(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_index **nsindex,
+		unsigned long **free, u32 *nslot)
+{
+	return preamble_index(ndd, ndd->ns_current, nsindex,
+			free, nslot);
+}
+
+static bool preamble_next(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_index **nsindex,
+		unsigned long **free, u32 *nslot)
+{
+	return preamble_index(ndd, ndd->ns_next, nsindex,
+			free, nslot);
+}
+
+static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot)
+{
+	/* check that we are written where we expect to be written */
+	if (slot != __le32_to_cpu(nd_label->slot))
+		return false;
+
+	/* check that DPA allocations are page aligned */
+	if ((__le64_to_cpu(nd_label->dpa)
+				| __le64_to_cpu(nd_label->rawsize)) % SZ_4K)
+		return false;
+
+	return true;
+}
+
+int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return 0; /* no label, nothing to reserve */
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+		struct nd_region *nd_region = NULL;
+		u8 label_uuid[NSLABEL_UUID_LEN];
+		struct nd_label_id label_id;
+		struct resource *res;
+		u32 flags;
+
+		nd_label = nd_label_base(ndd) + slot;
+
+		if (!slot_valid(nd_label, slot))
+			continue;
+
+		memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+		flags = __le32_to_cpu(nd_label->flags);
+		nd_label_gen_id(&label_id, label_uuid, flags);
+		res = nvdimm_allocate_dpa(ndd, &label_id,
+				__le64_to_cpu(nd_label->dpa),
+				__le64_to_cpu(nd_label->rawsize));
+		nd_dbg_dpa(nd_region, ndd, res, "reserve\n");
+		if (!res)
+			return -EBUSY;
+	}
+
+	return 0;
+}
+
+int nd_label_active_count(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+	int count = 0;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return 0;
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+
+		nd_label = nd_label_base(ndd) + slot;
+
+		if (!slot_valid(nd_label, slot)) {
+			u32 label_slot = __le32_to_cpu(nd_label->slot);
+			u64 size = __le64_to_cpu(nd_label->rawsize);
+			u64 dpa = __le64_to_cpu(nd_label->dpa);
+
+			dev_dbg(ndd->dev,
+				"%s: slot%d invalid slot: %d dpa: %llx size: %llx\n",
+					__func__, slot, label_slot, dpa, size);
+			continue;
+		}
+		count++;
+	}
+	return count;
+}
+
+struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return NULL;
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+
+		nd_label = nd_label_base(ndd) + slot;
+		if (!slot_valid(nd_label, slot))
+			continue;
+
+		if (n-- == 0)
+			return nd_label_base(ndd) + slot;
+	}
+
+	return NULL;
+}
+
+u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!preamble_next(ndd, &nsindex, &free, &nslot))
+		return UINT_MAX;
+
+	WARN_ON(!is_nvdimm_bus_locked(ndd->dev));
+
+	slot = find_next_bit_le(free, nslot, 0);
+	if (slot == nslot)
+		return UINT_MAX;
+
+	clear_bit_le(slot, free);
+
+	return slot;
+}
+
+bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot;
+
+	if (!preamble_next(ndd, &nsindex, &free, &nslot))
+		return false;
+
+	WARN_ON(!is_nvdimm_bus_locked(ndd->dev));
+
+	if (slot < nslot)
+		return !test_and_set_bit_le(slot, free);
+	return false;
+}
+
+u32 nd_label_nfree(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot;
+
+	WARN_ON(!is_nvdimm_bus_locked(ndd->dev));
+
+	if (!preamble_next(ndd, &nsindex, &free, &nslot))
+		return nvdimm_num_label_slots(ndd);
+
+	return bitmap_weight(free, nslot);
+}
+
+static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq,
+		unsigned long flags)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long offset;
+	u64 checksum;
+	u32 nslot;
+	int rc;
+
+	nsindex = to_namespace_index(ndd, index);
+	if (flags & ND_NSINDEX_INIT)
+		nslot = nvdimm_num_label_slots(ndd);
+	else
+		nslot = __le32_to_cpu(nsindex->nslot);
+
+	memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN);
+	nsindex->flags = __cpu_to_le32(0);
+	nsindex->seq = __cpu_to_le32(seq);
+	offset = (unsigned long) nsindex
+		- (unsigned long) to_namespace_index(ndd, 0);
+	nsindex->myoff = __cpu_to_le64(offset);
+	nsindex->mysize = __cpu_to_le64(sizeof_namespace_index(ndd));
+	offset = (unsigned long) to_namespace_index(ndd,
+			nd_label_next_nsindex(index))
+		- (unsigned long) to_namespace_index(ndd, 0);
+	nsindex->otheroff = __cpu_to_le64(offset);
+	offset = (unsigned long) nd_label_base(ndd)
+		- (unsigned long) to_namespace_index(ndd, 0);
+	nsindex->labeloff = __cpu_to_le64(offset);
+	nsindex->nslot = __cpu_to_le32(nslot);
+	nsindex->major = __cpu_to_le16(1);
+	nsindex->minor = __cpu_to_le16(1);
+	nsindex->checksum = __cpu_to_le64(0);
+	if (flags & ND_NSINDEX_INIT) {
+		unsigned long *free = (unsigned long *) nsindex->free;
+		u32 nfree = ALIGN(nslot, BITS_PER_LONG);
+		int last_bits, i;
+
+		memset(nsindex->free, 0xff, nfree / 8);
+		for (i = 0, last_bits = nfree - nslot; i < last_bits; i++)
+			clear_bit_le(nslot + i, free);
+	}
+	checksum = nd_fletcher64(nsindex, sizeof_namespace_index(ndd), 1);
+	nsindex->checksum = __cpu_to_le64(checksum);
+	rc = nvdimm_set_config_data(ndd, __le64_to_cpu(nsindex->myoff),
+			nsindex, sizeof_namespace_index(ndd));
+	if (rc < 0)
+		return rc;
+
+	if (flags & ND_NSINDEX_INIT)
+		return 0;
+
+	/* copy the index we just wrote to the new 'next' */
+	WARN_ON(index != ndd->ns_next);
+	nd_label_copy(ndd, to_current_namespace_index(ndd), nsindex);
+	ndd->ns_current = nd_label_next_nsindex(ndd->ns_current);
+	ndd->ns_next = nd_label_next_nsindex(ndd->ns_next);
+	WARN_ON(ndd->ns_current == ndd->ns_next);
+
+	return 0;
+}
+
+static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_label *nd_label)
+{
+	return (unsigned long) nd_label
+		- (unsigned long) to_namespace_index(ndd, 0);
+}
+
+static int __pmem_label_update(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
+		int pos)
+{
+	u64 cookie = nd_region_interleave_set_cookie(nd_region), rawsize;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct nd_namespace_label *victim_label;
+	struct nd_namespace_label *nd_label;
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+	size_t offset;
+	int rc;
+
+	if (!preamble_next(ndd, &nsindex, &free, &nslot))
+		return -ENXIO;
+
+	/* allocate and write the label to the staging (next) index */
+	slot = nd_label_alloc_slot(ndd);
+	if (slot == UINT_MAX)
+		return -ENXIO;
+	dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+
+	nd_label = nd_label_base(ndd) + slot;
+	memset(nd_label, 0, sizeof(struct nd_namespace_label));
+	memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN);
+	if (nspm->alt_name)
+		memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN);
+	nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_UPDATING);
+	nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings);
+	nd_label->position = __cpu_to_le16(pos);
+	nd_label->isetcookie = __cpu_to_le64(cookie);
+	rawsize = div_u64(resource_size(&nspm->nsio.res),
+			nd_region->ndr_mappings);
+	nd_label->rawsize = __cpu_to_le64(rawsize);
+	nd_label->dpa = __cpu_to_le64(nd_mapping->start);
+	nd_label->slot = __cpu_to_le32(slot);
+
+	/* update label */
+	offset = nd_label_offset(ndd, nd_label);
+	rc = nvdimm_set_config_data(ndd, offset, nd_label,
+			sizeof(struct nd_namespace_label));
+	if (rc < 0)
+		return rc;
+
+	/* Garbage collect the previous label */
+	victim_label = nd_mapping->labels[0];
+	if (victim_label) {
+		slot = to_slot(ndd, victim_label);
+		nd_label_free_slot(ndd, slot);
+		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+	}
+
+	/* update index */
+	rc = nd_label_write_index(ndd, ndd->ns_next,
+			nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
+	if (rc < 0)
+		return rc;
+
+	nd_mapping->labels[0] = nd_label;
+
+	return 0;
+}
+
+static void del_label(struct nd_mapping *nd_mapping, int l)
+{
+	struct nd_namespace_label *next_label, *nd_label;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	unsigned int slot;
+	int j;
+
+	nd_label = nd_mapping->labels[l];
+	slot = to_slot(ndd, nd_label);
+	dev_vdbg(ndd->dev, "%s: clear: %d\n", __func__, slot);
+
+	for (j = l; (next_label = nd_mapping->labels[j + 1]); j++)
+		nd_mapping->labels[j] = next_label;
+	nd_mapping->labels[j] = NULL;
+}
+
+static bool is_old_resource(struct resource *res, struct resource **list, int n)
+{
+	int i;
+
+	if (res->flags & DPA_RESOURCE_ADJUSTED)
+		return false;
+	for (i = 0; i < n; i++)
+		if (res == list[i])
+			return true;
+	return false;
+}
+
+static struct resource *to_resource(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_label *nd_label)
+{
+	struct resource *res;
+
+	for_each_dpa_resource(ndd, res) {
+		if (res->start != __le64_to_cpu(nd_label->dpa))
+			continue;
+		if (resource_size(res) != __le64_to_cpu(nd_label->rawsize))
+			continue;
+		return res;
+	}
+
+	return NULL;
+}
+
+/*
+ * 1/ Account all the labels that can be freed after this update
+ * 2/ Allocate and write the label to the staging (next) index
+ * 3/ Record the resources in the namespace device
+ */
+static int __blk_label_update(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_namespace_blk *nsblk,
+		int num_labels)
+{
+	int i, l, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct nd_namespace_label *nd_label;
+	struct nd_namespace_index *nsindex;
+	unsigned long *free, *victim_map = NULL;
+	struct resource *res, **old_res_list;
+	struct nd_label_id label_id;
+	u8 uuid[NSLABEL_UUID_LEN];
+	u32 nslot, slot;
+
+	if (!preamble_next(ndd, &nsindex, &free, &nslot))
+		return -ENXIO;
+
+	old_res_list = nsblk->res;
+	nfree = nd_label_nfree(ndd);
+	old_num_resources = nsblk->num_resources;
+	nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
+
+	/*
+	 * We need to loop over the old resources a few times, which seems a
+	 * bit inefficient, but we need to know that we have the label
+	 * space before we start mutating the tracking structures.
+	 * Otherwise the recovery method of last resort for userspace is
+	 * disable and re-enable the parent region.
+	 */
+	alloc = 0;
+	for_each_dpa_resource(ndd, res) {
+		if (strcmp(res->name, label_id.id) != 0)
+			continue;
+		if (!is_old_resource(res, old_res_list, old_num_resources))
+			alloc++;
+	}
+
+	victims = 0;
+	if (old_num_resources) {
+		/* convert old local-label-map to dimm-slot victim-map */
+		victim_map = kcalloc(BITS_TO_LONGS(nslot), sizeof(long),
+				GFP_KERNEL);
+		if (!victim_map)
+			return -ENOMEM;
+
+		/* mark unused labels for garbage collection */
+		for_each_clear_bit_le(slot, free, nslot) {
+			nd_label = nd_label_base(ndd) + slot;
+			memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+			if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
+				continue;
+			res = to_resource(ndd, nd_label);
+			if (res && is_old_resource(res, old_res_list,
+						old_num_resources))
+				continue;
+			slot = to_slot(ndd, nd_label);
+			set_bit(slot, victim_map);
+			victims++;
+		}
+	}
+
+	/* don't allow updates that consume the last label */
+	if (nfree - alloc < 0 || nfree - alloc + victims < 1) {
+		dev_info(&nsblk->common.dev, "insufficient label space\n");
+		kfree(victim_map);
+		return -ENOSPC;
+	}
+	/* from here on we need to abort on error */
+
+
+	/* assign all resources to the namespace before writing the labels */
+	nsblk->res = NULL;
+	nsblk->num_resources = 0;
+	for_each_dpa_resource(ndd, res) {
+		if (strcmp(res->name, label_id.id) != 0)
+			continue;
+		if (!nsblk_add_resource(nd_region, ndd, nsblk, res->start)) {
+			rc = -ENOMEM;
+			goto abort;
+		}
+	}
+
+	for (i = 0; i < nsblk->num_resources; i++) {
+		size_t offset;
+
+		res = nsblk->res[i];
+		if (is_old_resource(res, old_res_list, old_num_resources))
+			continue; /* carry-over */
+		slot = nd_label_alloc_slot(ndd);
+		if (slot == UINT_MAX)
+			goto abort;
+		dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+
+		nd_label = nd_label_base(ndd) + slot;
+		memset(nd_label, 0, sizeof(struct nd_namespace_label));
+		memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN);
+		if (nsblk->alt_name)
+			memcpy(nd_label->name, nsblk->alt_name,
+					NSLABEL_NAME_LEN);
+		nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL);
+		nd_label->nlabel = __cpu_to_le16(0); /* N/A */
+		nd_label->position = __cpu_to_le16(0); /* N/A */
+		nd_label->isetcookie = __cpu_to_le64(0); /* N/A */
+		nd_label->dpa = __cpu_to_le64(res->start);
+		nd_label->rawsize = __cpu_to_le64(resource_size(res));
+		nd_label->lbasize = __cpu_to_le64(nsblk->lbasize);
+		nd_label->slot = __cpu_to_le32(slot);
+
+		/* update label */
+		offset = nd_label_offset(ndd, nd_label);
+		rc = nvdimm_set_config_data(ndd, offset, nd_label,
+				sizeof(struct nd_namespace_label));
+		if (rc < 0)
+			goto abort;
+	}
+
+	/* free up now unused slots in the new index */
+	for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) {
+		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		nd_label_free_slot(ndd, slot);
+	}
+
+	/* update index */
+	rc = nd_label_write_index(ndd, ndd->ns_next,
+			nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
+	if (rc)
+		goto abort;
+
+	/*
+	 * Now that the on-dimm labels are up to date, fix up the tracking
+	 * entries in nd_mapping->labels
+	 */
+	nlabel = 0;
+	for_each_label(l, nd_label, nd_mapping->labels) {
+		nlabel++;
+		memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+		if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
+			continue;
+		nlabel--;
+		del_label(nd_mapping, l);
+		l--; /* retry with the new label at this index */
+	}
+	if (nlabel + nsblk->num_resources > num_labels) {
+		/*
+		 * Bug, we can't end up with more resources than
+		 * available labels
+		 */
+		WARN_ON_ONCE(1);
+		rc = -ENXIO;
+		goto out;
+	}
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		nd_label = nd_label_base(ndd) + slot;
+		memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+		if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
+			continue;
+		res = to_resource(ndd, nd_label);
+		res->flags &= ~DPA_RESOURCE_ADJUSTED;
+		dev_vdbg(&nsblk->common.dev, "assign label[%d] slot: %d\n",
+				l, slot);
+		nd_mapping->labels[l++] = nd_label;
+	}
+	nd_mapping->labels[l] = NULL;
+
+ out:
+	kfree(old_res_list);
+	kfree(victim_map);
+	return rc;
+
+ abort:
+	/*
+	 * 1/ repair the allocated label bitmap in the index
+	 * 2/ restore the resource list
+	 */
+	nd_label_copy(ndd, nsindex, to_current_namespace_index(ndd));
+	kfree(nsblk->res);
+	nsblk->res = old_res_list;
+	nsblk->num_resources = old_num_resources;
+	old_res_list = NULL;
+	goto out;
+}
+
+static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
+{
+	int i, l, old_num_labels = 0;
+	struct nd_namespace_index *nsindex;
+	struct nd_namespace_label *nd_label;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	size_t size = (num_labels + 1) * sizeof(struct nd_namespace_label *);
+
+	for_each_label(l, nd_label, nd_mapping->labels)
+		old_num_labels++;
+
+	/*
+	 * We need to preserve all the old labels for the mapping so
+	 * they can be garbage collected after writing the new labels.
+	 */
+	if (num_labels > old_num_labels) {
+		struct nd_namespace_label **labels;
+
+		labels = krealloc(nd_mapping->labels, size, GFP_KERNEL);
+		if (!labels)
+			return -ENOMEM;
+		nd_mapping->labels = labels;
+	}
+	if (!nd_mapping->labels)
+		return -ENOMEM;
+
+	for (i = old_num_labels; i <= num_labels; i++)
+		nd_mapping->labels[i] = NULL;
+
+	if (ndd->ns_current == -1 || ndd->ns_next == -1)
+		/* pass */;
+	else
+		return max(num_labels, old_num_labels);
+
+	nsindex = to_namespace_index(ndd, 0);
+	memset(nsindex, 0, ndd->nsarea.config_size);
+	for (i = 0; i < 2; i++) {
+		int rc = nd_label_write_index(ndd, i, i*2, ND_NSINDEX_INIT);
+
+		if (rc)
+			return rc;
+	}
+	ndd->ns_next = 1;
+	ndd->ns_current = 0;
+
+	return max(num_labels, old_num_labels);
+}
+
+static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
+{
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct nd_namespace_label *nd_label;
+	struct nd_namespace_index *nsindex;
+	u8 label_uuid[NSLABEL_UUID_LEN];
+	int l, num_freed = 0;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!uuid)
+		return 0;
+
+	/* no index || no labels == nothing to delete */
+	if (!preamble_next(ndd, &nsindex, &free, &nslot)
+			|| !nd_mapping->labels)
+		return 0;
+
+	for_each_label(l, nd_label, nd_mapping->labels) {
+		memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+		if (memcmp(label_uuid, uuid, NSLABEL_UUID_LEN) != 0)
+			continue;
+		slot = to_slot(ndd, nd_label);
+		nd_label_free_slot(ndd, slot);
+		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		del_label(nd_mapping, l);
+		num_freed++;
+		l--; /* retry with new label at this index */
+	}
+
+	if (num_freed > l) {
+		/*
+		 * num_freed will only ever be > l when we delete the last
+		 * label
+		 */
+		kfree(nd_mapping->labels);
+		nd_mapping->labels = NULL;
+		dev_dbg(ndd->dev, "%s: no more labels\n", __func__);
+	}
+
+	return nd_label_write_index(ndd, ndd->ns_next,
+			nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
+}
+
+int nd_pmem_namespace_label_update(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm, resource_size_t size)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		int rc;
+
+		if (size == 0) {
+			rc = del_labels(nd_mapping, nspm->uuid);
+			if (rc)
+				return rc;
+			continue;
+		}
+
+		rc = init_labels(nd_mapping, 1);
+		if (rc < 0)
+			return rc;
+
+		rc = __pmem_label_update(nd_region, nd_mapping, nspm, i);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+int nd_blk_namespace_label_update(struct nd_region *nd_region,
+		struct nd_namespace_blk *nsblk, resource_size_t size)
+{
+	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+	struct resource *res;
+	int count = 0;
+
+	if (size == 0)
+		return del_labels(nd_mapping, nsblk->uuid);
+
+	for_each_dpa_resource(to_ndd(nd_mapping), res)
+		count++;
+
+	count = init_labels(nd_mapping, count);
+	if (count < 0)
+		return count;
+
+	return __blk_label_update(nd_region, nd_mapping, nsblk, count);
+}
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
new file mode 100644
index 000000000000..a59ef6eef2a3
--- /dev/null
+++ b/drivers/nvdimm/label.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __LABEL_H__
+#define __LABEL_H__
+
+#include <linux/ndctl.h>
+#include <linux/sizes.h>
+#include <linux/io.h>
+
+enum {
+	NSINDEX_SIG_LEN = 16,
+	NSINDEX_ALIGN = 256,
+	NSINDEX_SEQ_MASK = 0x3,
+	NSLABEL_UUID_LEN = 16,
+	NSLABEL_NAME_LEN = 64,
+	NSLABEL_FLAG_ROLABEL = 0x1,  /* read-only label */
+	NSLABEL_FLAG_LOCAL = 0x2,    /* DIMM-local namespace */
+	NSLABEL_FLAG_BTT = 0x4,      /* namespace contains a BTT */
+	NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */
+	BTT_ALIGN = 4096,            /* all btt structures */
+	BTTINFO_SIG_LEN = 16,
+	BTTINFO_UUID_LEN = 16,
+	BTTINFO_FLAG_ERROR = 0x1,    /* error state (read-only) */
+	BTTINFO_MAJOR_VERSION = 1,
+	ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */
+	ND_LABEL_ID_SIZE = 50,
+	ND_NSINDEX_INIT = 0x1,
+};
+
+static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
+
+/**
+ * struct nd_namespace_index - label set superblock
+ * @sig: NAMESPACE_INDEX\0
+ * @flags: placeholder
+ * @seq: sequence number for this index
+ * @myoff: offset of this index in label area
+ * @mysize: size of this index struct
+ * @otheroff: offset of other index
+ * @labeloff: offset of first label slot
+ * @nslot: total number of label slots
+ * @major: label area major version
+ * @minor: label area minor version
+ * @checksum: fletcher64 of all fields
+ * @free[0]: bitmap, nlabel bits
+ *
+ * The size of free[] is rounded up so the total struct size is a
+ * multiple of NSINDEX_ALIGN bytes.  Any bits this allocates beyond
+ * nlabel bits must be zero.
+ */
+struct nd_namespace_index {
+	u8 sig[NSINDEX_SIG_LEN];
+	__le32 flags;
+	__le32 seq;
+	__le64 myoff;
+	__le64 mysize;
+	__le64 otheroff;
+	__le64 labeloff;
+	__le32 nslot;
+	__le16 major;
+	__le16 minor;
+	__le64 checksum;
+	u8 free[0];
+};
+
+/**
+ * struct nd_namespace_label - namespace superblock
+ * @uuid: UUID per RFC 4122
+ * @name: optional name (NULL-terminated)
+ * @flags: see NSLABEL_FLAG_*
+ * @nlabel: num labels to describe this ns
+ * @position: labels position in set
+ * @isetcookie: interleave set cookie
+ * @lbasize: LBA size in bytes or 0 for pmem
+ * @dpa: DPA of NVM range on this DIMM
+ * @rawsize: size of namespace
+ * @slot: slot of this label in label area
+ * @unused: must be zero
+ */
+struct nd_namespace_label {
+	u8 uuid[NSLABEL_UUID_LEN];
+	u8 name[NSLABEL_NAME_LEN];
+	__le32 flags;
+	__le16 nlabel;
+	__le16 position;
+	__le64 isetcookie;
+	__le64 lbasize;
+	__le64 dpa;
+	__le64 rawsize;
+	__le32 slot;
+	__le32 unused;
+};
+
+/**
+ * struct nd_label_id - identifier string for dpa allocation
+ * @id: "{blk|pmem}-<namespace uuid>"
+ */
+struct nd_label_id {
+	char id[ND_LABEL_ID_SIZE];
+};
+
+/*
+ * If the 'best' index is invalid, so is the 'next' index.  Otherwise,
+ * the next index is MOD(index+1, 2)
+ */
+static inline int nd_label_next_nsindex(int index)
+{
+	if (index < 0)
+		return -1;
+
+	return (index + 1) % 2;
+}
+
+struct nvdimm_drvdata;
+int nd_label_validate(struct nvdimm_drvdata *ndd);
+void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
+		struct nd_namespace_index *src);
+size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd);
+int nd_label_active_count(struct nvdimm_drvdata *ndd);
+struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
+u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd);
+bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot);
+u32 nd_label_nfree(struct nvdimm_drvdata *ndd);
+struct nd_region;
+struct nd_namespace_pmem;
+struct nd_namespace_blk;
+int nd_pmem_namespace_label_update(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm, resource_size_t size);
+int nd_blk_namespace_label_update(struct nd_region *nd_region,
+		struct nd_namespace_blk *nsblk, resource_size_t size);
+#endif /* __LABEL_H__ */
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
new file mode 100644
index 000000000000..fef0dd80d4ad
--- /dev/null
+++ b/drivers/nvdimm/namespace_devs.c
@@ -0,0 +1,1870 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/nd.h>
+#include "nd-core.h"
+#include "nd.h"
+
+static void namespace_io_release(struct device *dev)
+{
+	struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+	kfree(nsio);
+}
+
+static void namespace_pmem_release(struct device *dev)
+{
+	struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+	kfree(nspm->alt_name);
+	kfree(nspm->uuid);
+	kfree(nspm);
+}
+
+static void namespace_blk_release(struct device *dev)
+{
+	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+
+	if (nsblk->id >= 0)
+		ida_simple_remove(&nd_region->ns_ida, nsblk->id);
+	kfree(nsblk->alt_name);
+	kfree(nsblk->uuid);
+	kfree(nsblk->res);
+	kfree(nsblk);
+}
+
+static struct device_type namespace_io_device_type = {
+	.name = "nd_namespace_io",
+	.release = namespace_io_release,
+};
+
+static struct device_type namespace_pmem_device_type = {
+	.name = "nd_namespace_pmem",
+	.release = namespace_pmem_release,
+};
+
+static struct device_type namespace_blk_device_type = {
+	.name = "nd_namespace_blk",
+	.release = namespace_blk_release,
+};
+
+static bool is_namespace_pmem(struct device *dev)
+{
+	return dev ? dev->type == &namespace_pmem_device_type : false;
+}
+
+static bool is_namespace_blk(struct device *dev)
+{
+	return dev ? dev->type == &namespace_blk_device_type : false;
+}
+
+static bool is_namespace_io(struct device *dev)
+{
+	return dev ? dev->type == &namespace_io_device_type : false;
+}
+
+const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
+		char *name)
+{
+	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
+	const char *suffix = "";
+
+	if (ndns->claim && is_nd_btt(ndns->claim))
+		suffix = "s";
+
+	if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev))
+		sprintf(name, "pmem%d%s", nd_region->id, suffix);
+	else if (is_namespace_blk(&ndns->dev)) {
+		struct nd_namespace_blk *nsblk;
+
+		nsblk = to_nd_namespace_blk(&ndns->dev);
+		sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix);
+	} else {
+		return NULL;
+	}
+
+	return name;
+}
+EXPORT_SYMBOL(nvdimm_namespace_disk_name);
+
+static ssize_t nstype_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+
+	return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
+}
+static DEVICE_ATTR_RO(nstype);
+
+static ssize_t __alt_name_store(struct device *dev, const char *buf,
+		const size_t len)
+{
+	char *input, *pos, *alt_name, **ns_altname;
+	ssize_t rc;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_altname = &nspm->alt_name;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		ns_altname = &nsblk->alt_name;
+	} else
+		return -ENXIO;
+
+	if (dev->driver || to_ndns(dev)->claim)
+		return -EBUSY;
+
+	input = kmemdup(buf, len + 1, GFP_KERNEL);
+	if (!input)
+		return -ENOMEM;
+
+	input[len] = '\0';
+	pos = strim(input);
+	if (strlen(pos) + 1 > NSLABEL_NAME_LEN) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	alt_name = kzalloc(NSLABEL_NAME_LEN, GFP_KERNEL);
+	if (!alt_name) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	kfree(*ns_altname);
+	*ns_altname = alt_name;
+	sprintf(*ns_altname, "%s", pos);
+	rc = len;
+
+out:
+	kfree(input);
+	return rc;
+}
+
+static resource_size_t nd_namespace_blk_size(struct nd_namespace_blk *nsblk)
+{
+	struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent);
+	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct nd_label_id label_id;
+	resource_size_t size = 0;
+	struct resource *res;
+
+	if (!nsblk->uuid)
+		return 0;
+	nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
+	for_each_dpa_resource(ndd, res)
+		if (strcmp(res->name, label_id.id) == 0)
+			size += resource_size(res);
+	return size;
+}
+
+static bool __nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
+{
+	struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent);
+	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct nd_label_id label_id;
+	struct resource *res;
+	int count, i;
+
+	if (!nsblk->uuid || !nsblk->lbasize || !ndd)
+		return false;
+
+	count = 0;
+	nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
+	for_each_dpa_resource(ndd, res) {
+		if (strcmp(res->name, label_id.id) != 0)
+			continue;
+		/*
+		 * Resources with unacknoweldged adjustments indicate a
+		 * failure to update labels
+		 */
+		if (res->flags & DPA_RESOURCE_ADJUSTED)
+			return false;
+		count++;
+	}
+
+	/* These values match after a successful label update */
+	if (count != nsblk->num_resources)
+		return false;
+
+	for (i = 0; i < nsblk->num_resources; i++) {
+		struct resource *found = NULL;
+
+		for_each_dpa_resource(ndd, res)
+			if (res == nsblk->res[i]) {
+				found = res;
+				break;
+			}
+		/* stale resource */
+		if (!found)
+			return false;
+	}
+
+	return true;
+}
+
+resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
+{
+	resource_size_t size;
+
+	nvdimm_bus_lock(&nsblk->common.dev);
+	size = __nd_namespace_blk_validate(nsblk);
+	nvdimm_bus_unlock(&nsblk->common.dev);
+
+	return size;
+}
+EXPORT_SYMBOL(nd_namespace_blk_validate);
+
+
+static int nd_namespace_label_update(struct nd_region *nd_region,
+		struct device *dev)
+{
+	dev_WARN_ONCE(dev, dev->driver || to_ndns(dev)->claim,
+			"namespace must be idle during label update\n");
+	if (dev->driver || to_ndns(dev)->claim)
+		return 0;
+
+	/*
+	 * Only allow label writes that will result in a valid namespace
+	 * or deletion of an existing namespace.
+	 */
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+		resource_size_t size = resource_size(&nspm->nsio.res);
+
+		if (size == 0 && nspm->uuid)
+			/* delete allocation */;
+		else if (!nspm->uuid)
+			return 0;
+
+		return nd_pmem_namespace_label_update(nd_region, nspm, size);
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+		resource_size_t size = nd_namespace_blk_size(nsblk);
+
+		if (size == 0 && nsblk->uuid)
+			/* delete allocation */;
+		else if (!nsblk->uuid || !nsblk->lbasize)
+			return 0;
+
+		return nd_blk_namespace_label_update(nd_region, nsblk, size);
+	} else
+		return -ENXIO;
+}
+
+static ssize_t alt_name_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	ssize_t rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	rc = __alt_name_store(dev, buf, len);
+	if (rc >= 0)
+		rc = nd_namespace_label_update(nd_region, dev);
+	dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc < 0 ? rc : len;
+}
+
+static ssize_t alt_name_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	char *ns_altname;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_altname = nspm->alt_name;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		ns_altname = nsblk->alt_name;
+	} else
+		return -ENXIO;
+
+	return sprintf(buf, "%s\n", ns_altname ? ns_altname : "");
+}
+static DEVICE_ATTR_RW(alt_name);
+
+static int scan_free(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
+		resource_size_t n)
+{
+	bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	int rc = 0;
+
+	while (n) {
+		struct resource *res, *last;
+		resource_size_t new_start;
+
+		last = NULL;
+		for_each_dpa_resource(ndd, res)
+			if (strcmp(res->name, label_id->id) == 0)
+				last = res;
+		res = last;
+		if (!res)
+			return 0;
+
+		if (n >= resource_size(res)) {
+			n -= resource_size(res);
+			nd_dbg_dpa(nd_region, ndd, res, "delete %d\n", rc);
+			nvdimm_free_dpa(ndd, res);
+			/* retry with last resource deleted */
+			continue;
+		}
+
+		/*
+		 * Keep BLK allocations relegated to high DPA as much as
+		 * possible
+		 */
+		if (is_blk)
+			new_start = res->start + n;
+		else
+			new_start = res->start;
+
+		rc = adjust_resource(res, new_start, resource_size(res) - n);
+		if (rc == 0)
+			res->flags |= DPA_RESOURCE_ADJUSTED;
+		nd_dbg_dpa(nd_region, ndd, res, "shrink %d\n", rc);
+		break;
+	}
+
+	return rc;
+}
+
+/**
+ * shrink_dpa_allocation - for each dimm in region free n bytes for label_id
+ * @nd_region: the set of dimms to reclaim @n bytes from
+ * @label_id: unique identifier for the namespace consuming this dpa range
+ * @n: number of bytes per-dimm to release
+ *
+ * Assumes resources are ordered.  Starting from the end try to
+ * adjust_resource() the allocation to @n, but if @n is larger than the
+ * allocation delete it and find the 'new' last allocation in the label
+ * set.
+ */
+static int shrink_dpa_allocation(struct nd_region *nd_region,
+		struct nd_label_id *label_id, resource_size_t n)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		int rc;
+
+		rc = scan_free(nd_region, nd_mapping, label_id, n);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
+		struct nd_region *nd_region, struct nd_mapping *nd_mapping,
+		resource_size_t n)
+{
+	bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	resource_size_t first_dpa;
+	struct resource *res;
+	int rc = 0;
+
+	/* allocate blk from highest dpa first */
+	if (is_blk)
+		first_dpa = nd_mapping->start + nd_mapping->size - n;
+	else
+		first_dpa = nd_mapping->start;
+
+	/* first resource allocation for this label-id or dimm */
+	res = nvdimm_allocate_dpa(ndd, label_id, first_dpa, n);
+	if (!res)
+		rc = -EBUSY;
+
+	nd_dbg_dpa(nd_region, ndd, res, "init %d\n", rc);
+	return rc ? n : 0;
+}
+
+static bool space_valid(bool is_pmem, bool is_reserve,
+		struct nd_label_id *label_id, struct resource *res)
+{
+	/*
+	 * For BLK-space any space is valid, for PMEM-space, it must be
+	 * contiguous with an existing allocation unless we are
+	 * reserving pmem.
+	 */
+	if (is_reserve || !is_pmem)
+		return true;
+	if (!res || strcmp(res->name, label_id->id) == 0)
+		return true;
+	return false;
+}
+
+enum alloc_loc {
+	ALLOC_ERR = 0, ALLOC_BEFORE, ALLOC_MID, ALLOC_AFTER,
+};
+
+static resource_size_t scan_allocate(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
+		resource_size_t n)
+{
+	resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
+	bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
+	bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	const resource_size_t to_allocate = n;
+	struct resource *res;
+	int first;
+
+ retry:
+	first = 0;
+	for_each_dpa_resource(ndd, res) {
+		resource_size_t allocate, available = 0, free_start, free_end;
+		struct resource *next = res->sibling, *new_res = NULL;
+		enum alloc_loc loc = ALLOC_ERR;
+		const char *action;
+		int rc = 0;
+
+		/* ignore resources outside this nd_mapping */
+		if (res->start > mapping_end)
+			continue;
+		if (res->end < nd_mapping->start)
+			continue;
+
+		/* space at the beginning of the mapping */
+		if (!first++ && res->start > nd_mapping->start) {
+			free_start = nd_mapping->start;
+			available = res->start - free_start;
+			if (space_valid(is_pmem, is_reserve, label_id, NULL))
+				loc = ALLOC_BEFORE;
+		}
+
+		/* space between allocations */
+		if (!loc && next) {
+			free_start = res->start + resource_size(res);
+			free_end = min(mapping_end, next->start - 1);
+			if (space_valid(is_pmem, is_reserve, label_id, res)
+					&& free_start < free_end) {
+				available = free_end + 1 - free_start;
+				loc = ALLOC_MID;
+			}
+		}
+
+		/* space at the end of the mapping */
+		if (!loc && !next) {
+			free_start = res->start + resource_size(res);
+			free_end = mapping_end;
+			if (space_valid(is_pmem, is_reserve, label_id, res)
+					&& free_start < free_end) {
+				available = free_end + 1 - free_start;
+				loc = ALLOC_AFTER;
+			}
+		}
+
+		if (!loc || !available)
+			continue;
+		allocate = min(available, n);
+		switch (loc) {
+		case ALLOC_BEFORE:
+			if (strcmp(res->name, label_id->id) == 0) {
+				/* adjust current resource up */
+				if (is_pmem && !is_reserve)
+					return n;
+				rc = adjust_resource(res, res->start - allocate,
+						resource_size(res) + allocate);
+				action = "cur grow up";
+			} else
+				action = "allocate";
+			break;
+		case ALLOC_MID:
+			if (strcmp(next->name, label_id->id) == 0) {
+				/* adjust next resource up */
+				if (is_pmem && !is_reserve)
+					return n;
+				rc = adjust_resource(next, next->start
+						- allocate, resource_size(next)
+						+ allocate);
+				new_res = next;
+				action = "next grow up";
+			} else if (strcmp(res->name, label_id->id) == 0) {
+				action = "grow down";
+			} else
+				action = "allocate";
+			break;
+		case ALLOC_AFTER:
+			if (strcmp(res->name, label_id->id) == 0)
+				action = "grow down";
+			else
+				action = "allocate";
+			break;
+		default:
+			return n;
+		}
+
+		if (strcmp(action, "allocate") == 0) {
+			/* BLK allocate bottom up */
+			if (!is_pmem)
+				free_start += available - allocate;
+			else if (!is_reserve && free_start != nd_mapping->start)
+				return n;
+
+			new_res = nvdimm_allocate_dpa(ndd, label_id,
+					free_start, allocate);
+			if (!new_res)
+				rc = -EBUSY;
+		} else if (strcmp(action, "grow down") == 0) {
+			/* adjust current resource down */
+			rc = adjust_resource(res, res->start, resource_size(res)
+					+ allocate);
+			if (rc == 0)
+				res->flags |= DPA_RESOURCE_ADJUSTED;
+		}
+
+		if (!new_res)
+			new_res = res;
+
+		nd_dbg_dpa(nd_region, ndd, new_res, "%s(%d) %d\n",
+				action, loc, rc);
+
+		if (rc)
+			return n;
+
+		n -= allocate;
+		if (n) {
+			/*
+			 * Retry scan with newly inserted resources.
+			 * For example, if we did an ALLOC_BEFORE
+			 * insertion there may also have been space
+			 * available for an ALLOC_AFTER insertion, so we
+			 * need to check this same resource again
+			 */
+			goto retry;
+		} else
+			return 0;
+	}
+
+	/*
+	 * If we allocated nothing in the BLK case it may be because we are in
+	 * an initial "pmem-reserve pass".  Only do an initial BLK allocation
+	 * when none of the DPA space is reserved.
+	 */
+	if ((is_pmem || !ndd->dpa.child) && n == to_allocate)
+		return init_dpa_allocation(label_id, nd_region, nd_mapping, n);
+	return n;
+}
+
+static int merge_dpa(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_label_id *label_id)
+{
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct resource *res;
+
+	if (strncmp("pmem", label_id->id, 4) == 0)
+		return 0;
+ retry:
+	for_each_dpa_resource(ndd, res) {
+		int rc;
+		struct resource *next = res->sibling;
+		resource_size_t end = res->start + resource_size(res);
+
+		if (!next || strcmp(res->name, label_id->id) != 0
+				|| strcmp(next->name, label_id->id) != 0
+				|| end != next->start)
+			continue;
+		end += resource_size(next);
+		nvdimm_free_dpa(ndd, next);
+		rc = adjust_resource(res, res->start, end - res->start);
+		nd_dbg_dpa(nd_region, ndd, res, "merge %d\n", rc);
+		if (rc)
+			return rc;
+		res->flags |= DPA_RESOURCE_ADJUSTED;
+		goto retry;
+	}
+
+	return 0;
+}
+
+static int __reserve_free_pmem(struct device *dev, void *data)
+{
+	struct nvdimm *nvdimm = data;
+	struct nd_region *nd_region;
+	struct nd_label_id label_id;
+	int i;
+
+	if (!is_nd_pmem(dev))
+		return 0;
+
+	nd_region = to_nd_region(dev);
+	if (nd_region->ndr_mappings == 0)
+		return 0;
+
+	memset(&label_id, 0, sizeof(label_id));
+	strcat(label_id.id, "pmem-reserve");
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		resource_size_t n, rem = 0;
+
+		if (nd_mapping->nvdimm != nvdimm)
+			continue;
+
+		n = nd_pmem_available_dpa(nd_region, nd_mapping, &rem);
+		if (n == 0)
+			return 0;
+		rem = scan_allocate(nd_region, nd_mapping, &label_id, n);
+		dev_WARN_ONCE(&nd_region->dev, rem,
+				"pmem reserve underrun: %#llx of %#llx bytes\n",
+				(unsigned long long) n - rem,
+				(unsigned long long) n);
+		return rem ? -ENXIO : 0;
+	}
+
+	return 0;
+}
+
+static void release_free_pmem(struct nvdimm_bus *nvdimm_bus,
+		struct nd_mapping *nd_mapping)
+{
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct resource *res, *_res;
+
+	for_each_dpa_resource_safe(ndd, res, _res)
+		if (strcmp(res->name, "pmem-reserve") == 0)
+			nvdimm_free_dpa(ndd, res);
+}
+
+static int reserve_free_pmem(struct nvdimm_bus *nvdimm_bus,
+		struct nd_mapping *nd_mapping)
+{
+	struct nvdimm *nvdimm = nd_mapping->nvdimm;
+	int rc;
+
+	rc = device_for_each_child(&nvdimm_bus->dev, nvdimm,
+			__reserve_free_pmem);
+	if (rc)
+		release_free_pmem(nvdimm_bus, nd_mapping);
+	return rc;
+}
+
+/**
+ * grow_dpa_allocation - for each dimm allocate n bytes for @label_id
+ * @nd_region: the set of dimms to allocate @n more bytes from
+ * @label_id: unique identifier for the namespace consuming this dpa range
+ * @n: number of bytes per-dimm to add to the existing allocation
+ *
+ * Assumes resources are ordered.  For BLK regions, first consume
+ * BLK-only available DPA free space, then consume PMEM-aliased DPA
+ * space starting at the highest DPA.  For PMEM regions start
+ * allocations from the start of an interleave set and end at the first
+ * BLK allocation or the end of the interleave set, whichever comes
+ * first.
+ */
+static int grow_dpa_allocation(struct nd_region *nd_region,
+		struct nd_label_id *label_id, resource_size_t n)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+	bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		resource_size_t rem = n;
+		int rc, j;
+
+		/*
+		 * In the BLK case try once with all unallocated PMEM
+		 * reserved, and once without
+		 */
+		for (j = is_pmem; j < 2; j++) {
+			bool blk_only = j == 0;
+
+			if (blk_only) {
+				rc = reserve_free_pmem(nvdimm_bus, nd_mapping);
+				if (rc)
+					return rc;
+			}
+			rem = scan_allocate(nd_region, nd_mapping,
+					label_id, rem);
+			if (blk_only)
+				release_free_pmem(nvdimm_bus, nd_mapping);
+
+			/* try again and allow encroachments into PMEM */
+			if (rem == 0)
+				break;
+		}
+
+		dev_WARN_ONCE(&nd_region->dev, rem,
+				"allocation underrun: %#llx of %#llx bytes\n",
+				(unsigned long long) n - rem,
+				(unsigned long long) n);
+		if (rem)
+			return -ENXIO;
+
+		rc = merge_dpa(nd_region, nd_mapping, label_id);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static void nd_namespace_pmem_set_size(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm, resource_size_t size)
+{
+	struct resource *res = &nspm->nsio.res;
+
+	res->start = nd_region->ndr_start;
+	res->end = nd_region->ndr_start + size - 1;
+}
+
+static ssize_t __size_store(struct device *dev, unsigned long long val)
+{
+	resource_size_t allocated = 0, available = 0;
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_mapping *nd_mapping;
+	struct nvdimm_drvdata *ndd;
+	struct nd_label_id label_id;
+	u32 flags = 0, remainder;
+	u8 *uuid = NULL;
+	int rc, i;
+
+	if (dev->driver || to_ndns(dev)->claim)
+		return -EBUSY;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		uuid = nsblk->uuid;
+		flags = NSLABEL_FLAG_LOCAL;
+	}
+
+	/*
+	 * We need a uuid for the allocation-label and dimm(s) on which
+	 * to store the label.
+	 */
+	if (!uuid || nd_region->ndr_mappings == 0)
+		return -ENXIO;
+
+	div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, &remainder);
+	if (remainder) {
+		dev_dbg(dev, "%llu is not %dK aligned\n", val,
+				(SZ_4K * nd_region->ndr_mappings) / SZ_1K);
+		return -EINVAL;
+	}
+
+	nd_label_gen_id(&label_id, uuid, flags);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		nd_mapping = &nd_region->mapping[i];
+		ndd = to_ndd(nd_mapping);
+
+		/*
+		 * All dimms in an interleave set, or the base dimm for a blk
+		 * region, need to be enabled for the size to be changed.
+		 */
+		if (!ndd)
+			return -ENXIO;
+
+		allocated += nvdimm_allocated_dpa(ndd, &label_id);
+	}
+	available = nd_region_available_dpa(nd_region);
+
+	if (val > available + allocated)
+		return -ENOSPC;
+
+	if (val == allocated)
+		return 0;
+
+	val = div_u64(val, nd_region->ndr_mappings);
+	allocated = div_u64(allocated, nd_region->ndr_mappings);
+	if (val < allocated)
+		rc = shrink_dpa_allocation(nd_region, &label_id,
+				allocated - val);
+	else
+		rc = grow_dpa_allocation(nd_region, &label_id, val - allocated);
+
+	if (rc)
+		return rc;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		nd_namespace_pmem_set_size(nd_region, nspm,
+				val * nd_region->ndr_mappings);
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		/*
+		 * Try to delete the namespace if we deleted all of its
+		 * allocation, this is not the seed device for the
+		 * region, and it is not actively claimed by a btt
+		 * instance.
+		 */
+		if (val == 0 && nd_region->ns_seed != dev
+				&& !nsblk->common.claim)
+			nd_device_unregister(dev, ND_ASYNC);
+	}
+
+	return rc;
+}
+
+static ssize_t size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	unsigned long long val;
+	u8 **uuid = NULL;
+	int rc;
+
+	rc = kstrtoull(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	rc = __size_store(dev, val);
+	if (rc >= 0)
+		rc = nd_namespace_label_update(nd_region, dev);
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = &nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		uuid = &nsblk->uuid;
+	}
+
+	if (rc == 0 && val == 0 && uuid) {
+		/* setting size zero == 'delete namespace' */
+		kfree(*uuid);
+		*uuid = NULL;
+	}
+
+	dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0
+			? "fail" : "success", rc);
+
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc < 0 ? rc : len;
+}
+
+resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns)
+{
+	struct device *dev = &ndns->dev;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		return resource_size(&nspm->nsio.res);
+	} else if (is_namespace_blk(dev)) {
+		return nd_namespace_blk_size(to_nd_namespace_blk(dev));
+	} else if (is_namespace_io(dev)) {
+		struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+		return resource_size(&nsio->res);
+	} else
+		WARN_ONCE(1, "unknown namespace type\n");
+	return 0;
+}
+
+resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns)
+{
+	resource_size_t size;
+
+	nvdimm_bus_lock(&ndns->dev);
+	size = __nvdimm_namespace_capacity(ndns);
+	nvdimm_bus_unlock(&ndns->dev);
+
+	return size;
+}
+EXPORT_SYMBOL(nvdimm_namespace_capacity);
+
+static ssize_t size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%llu\n", (unsigned long long)
+			nvdimm_namespace_capacity(to_ndns(dev)));
+}
+static DEVICE_ATTR(size, S_IRUGO, size_show, size_store);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	u8 *uuid;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		uuid = nsblk->uuid;
+	} else
+		return -ENXIO;
+
+	if (uuid)
+		return sprintf(buf, "%pUb\n", uuid);
+	return sprintf(buf, "\n");
+}
+
+/**
+ * namespace_update_uuid - check for a unique uuid and whether we're "renaming"
+ * @nd_region: parent region so we can updates all dimms in the set
+ * @dev: namespace type for generating label_id
+ * @new_uuid: incoming uuid
+ * @old_uuid: reference to the uuid storage location in the namespace object
+ */
+static int namespace_update_uuid(struct nd_region *nd_region,
+		struct device *dev, u8 *new_uuid, u8 **old_uuid)
+{
+	u32 flags = is_namespace_blk(dev) ? NSLABEL_FLAG_LOCAL : 0;
+	struct nd_label_id old_label_id;
+	struct nd_label_id new_label_id;
+	int i;
+
+	if (!nd_is_uuid_unique(dev, new_uuid))
+		return -EINVAL;
+
+	if (*old_uuid == NULL)
+		goto out;
+
+	/*
+	 * If we've already written a label with this uuid, then it's
+	 * too late to rename because we can't reliably update the uuid
+	 * without losing the old namespace.  Userspace must delete this
+	 * namespace to abandon the old uuid.
+	 */
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+
+		/*
+		 * This check by itself is sufficient because old_uuid
+		 * would be NULL above if this uuid did not exist in the
+		 * currently written set.
+		 *
+		 * FIXME: can we delete uuid with zero dpa allocated?
+		 */
+		if (nd_mapping->labels)
+			return -EBUSY;
+	}
+
+	nd_label_gen_id(&old_label_id, *old_uuid, flags);
+	nd_label_gen_id(&new_label_id, new_uuid, flags);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+		struct resource *res;
+
+		for_each_dpa_resource(ndd, res)
+			if (strcmp(res->name, old_label_id.id) == 0)
+				sprintf((void *) res->name, "%s",
+						new_label_id.id);
+	}
+	kfree(*old_uuid);
+ out:
+	*old_uuid = new_uuid;
+	return 0;
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	u8 *uuid = NULL;
+	ssize_t rc = 0;
+	u8 **ns_uuid;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_uuid = &nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		ns_uuid = &nsblk->uuid;
+	} else
+		return -ENXIO;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	if (to_ndns(dev)->claim)
+		rc = -EBUSY;
+	if (rc >= 0)
+		rc = nd_uuid_store(dev, &uuid, buf, len);
+	if (rc >= 0)
+		rc = namespace_update_uuid(nd_region, dev, uuid, ns_uuid);
+	if (rc >= 0)
+		rc = nd_namespace_label_update(nd_region, dev);
+	else
+		kfree(uuid);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc < 0 ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t resource_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct resource *res;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		res = &nspm->nsio.res;
+	} else if (is_namespace_io(dev)) {
+		struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+		res = &nsio->res;
+	} else
+		return -ENXIO;
+
+	/* no address to convey if the namespace has no allocation */
+	if (resource_size(res) == 0)
+		return -ENXIO;
+	return sprintf(buf, "%#llx\n", (unsigned long long) res->start);
+}
+static DEVICE_ATTR_RO(resource);
+
+static const unsigned long ns_lbasize_supported[] = { 512, 520, 528,
+	4096, 4104, 4160, 4224, 0 };
+
+static ssize_t sector_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+	if (!is_namespace_blk(dev))
+		return -ENXIO;
+
+	return nd_sector_size_show(nsblk->lbasize, ns_lbasize_supported, buf);
+}
+
+static ssize_t sector_size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	ssize_t rc = 0;
+
+	if (!is_namespace_blk(dev))
+		return -ENXIO;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	if (to_ndns(dev)->claim)
+		rc = -EBUSY;
+	if (rc >= 0)
+		rc = nd_sector_size_store(dev, buf, &nsblk->lbasize,
+				ns_lbasize_supported);
+	if (rc >= 0)
+		rc = nd_namespace_label_update(nd_region, dev);
+	dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
+			rc, rc < 0 ? "tried" : "wrote", buf,
+			buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(sector_size);
+
+static ssize_t dpa_extents_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_label_id label_id;
+	int count = 0, i;
+	u8 *uuid = NULL;
+	u32 flags = 0;
+
+	nvdimm_bus_lock(dev);
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = nspm->uuid;
+		flags = 0;
+	} else if (is_namespace_blk(dev)) {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		uuid = nsblk->uuid;
+		flags = NSLABEL_FLAG_LOCAL;
+	}
+
+	if (!uuid)
+		goto out;
+
+	nd_label_gen_id(&label_id, uuid, flags);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+		struct resource *res;
+
+		for_each_dpa_resource(ndd, res)
+			if (strcmp(res->name, label_id.id) == 0)
+				count++;
+	}
+ out:
+	nvdimm_bus_unlock(dev);
+
+	return sprintf(buf, "%d\n", count);
+}
+static DEVICE_ATTR_RO(dpa_extents);
+
+static ssize_t holder_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_namespace_common *ndns = to_ndns(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : "");
+	device_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(holder);
+
+static ssize_t force_raw_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	bool force_raw;
+	int rc = strtobool(buf, &force_raw);
+
+	if (rc)
+		return rc;
+
+	to_ndns(dev)->force_raw = force_raw;
+	return len;
+}
+
+static ssize_t force_raw_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", to_ndns(dev)->force_raw);
+}
+static DEVICE_ATTR_RW(force_raw);
+
+static struct attribute *nd_namespace_attributes[] = {
+	&dev_attr_nstype.attr,
+	&dev_attr_size.attr,
+	&dev_attr_uuid.attr,
+	&dev_attr_holder.attr,
+	&dev_attr_resource.attr,
+	&dev_attr_alt_name.attr,
+	&dev_attr_force_raw.attr,
+	&dev_attr_sector_size.attr,
+	&dev_attr_dpa_extents.attr,
+	NULL,
+};
+
+static umode_t namespace_visible(struct kobject *kobj,
+		struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+
+	if (a == &dev_attr_resource.attr) {
+		if (is_namespace_blk(dev))
+			return 0;
+		return a->mode;
+	}
+
+	if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
+		if (a == &dev_attr_size.attr)
+			return S_IWUSR | S_IRUGO;
+
+		if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr)
+			return 0;
+
+		return a->mode;
+	}
+
+	if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr
+			|| a == &dev_attr_holder.attr
+			|| a == &dev_attr_force_raw.attr)
+		return a->mode;
+
+	return 0;
+}
+
+static struct attribute_group nd_namespace_attribute_group = {
+	.attrs = nd_namespace_attributes,
+	.is_visible = namespace_visible,
+};
+
+static const struct attribute_group *nd_namespace_attribute_groups[] = {
+	&nd_device_attribute_group,
+	&nd_namespace_attribute_group,
+	&nd_numa_attribute_group,
+	NULL,
+};
+
+struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
+{
+	struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
+	struct nd_namespace_common *ndns;
+	resource_size_t size;
+
+	if (nd_btt) {
+		ndns = nd_btt->ndns;
+		if (!ndns)
+			return ERR_PTR(-ENODEV);
+
+		/*
+		 * Flush any in-progess probes / removals in the driver
+		 * for the raw personality of this namespace.
+		 */
+		device_lock(&ndns->dev);
+		device_unlock(&ndns->dev);
+		if (ndns->dev.driver) {
+			dev_dbg(&ndns->dev, "is active, can't bind %s\n",
+					dev_name(&nd_btt->dev));
+			return ERR_PTR(-EBUSY);
+		}
+		if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev,
+					"host (%s) vs claim (%s) mismatch\n",
+					dev_name(&nd_btt->dev),
+					dev_name(ndns->claim)))
+			return ERR_PTR(-ENXIO);
+	} else {
+		ndns = to_ndns(dev);
+		if (ndns->claim) {
+			dev_dbg(dev, "claimed by %s, failing probe\n",
+				dev_name(ndns->claim));
+
+			return ERR_PTR(-ENXIO);
+		}
+	}
+
+	size = nvdimm_namespace_capacity(ndns);
+	if (size < ND_MIN_NAMESPACE_SIZE) {
+		dev_dbg(&ndns->dev, "%pa, too small must be at least %#x\n",
+				&size, ND_MIN_NAMESPACE_SIZE);
+		return ERR_PTR(-ENODEV);
+	}
+
+	if (is_namespace_pmem(&ndns->dev)) {
+		struct nd_namespace_pmem *nspm;
+
+		nspm = to_nd_namespace_pmem(&ndns->dev);
+		if (!nspm->uuid) {
+			dev_dbg(&ndns->dev, "%s: uuid not set\n", __func__);
+			return ERR_PTR(-ENODEV);
+		}
+	} else if (is_namespace_blk(&ndns->dev)) {
+		struct nd_namespace_blk *nsblk;
+
+		nsblk = to_nd_namespace_blk(&ndns->dev);
+		if (!nd_namespace_blk_validate(nsblk))
+			return ERR_PTR(-ENODEV);
+	}
+
+	return ndns;
+}
+EXPORT_SYMBOL(nvdimm_namespace_common_probe);
+
+static struct device **create_namespace_io(struct nd_region *nd_region)
+{
+	struct nd_namespace_io *nsio;
+	struct device *dev, **devs;
+	struct resource *res;
+
+	nsio = kzalloc(sizeof(*nsio), GFP_KERNEL);
+	if (!nsio)
+		return NULL;
+
+	devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
+	if (!devs) {
+		kfree(nsio);
+		return NULL;
+	}
+
+	dev = &nsio->common.dev;
+	dev->type = &namespace_io_device_type;
+	dev->parent = &nd_region->dev;
+	res = &nsio->res;
+	res->name = dev_name(&nd_region->dev);
+	res->flags = IORESOURCE_MEM;
+	res->start = nd_region->ndr_start;
+	res->end = res->start + nd_region->ndr_size - 1;
+
+	devs[0] = dev;
+	return devs;
+}
+
+static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
+		u64 cookie, u16 pos)
+{
+	struct nd_namespace_label *found = NULL;
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *nd_label;
+		bool found_uuid = false;
+		int l;
+
+		for_each_label(l, nd_label, nd_mapping->labels) {
+			u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
+			u16 position = __le16_to_cpu(nd_label->position);
+			u16 nlabel = __le16_to_cpu(nd_label->nlabel);
+
+			if (isetcookie != cookie)
+				continue;
+
+			if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
+				continue;
+
+			if (found_uuid) {
+				dev_dbg(to_ndd(nd_mapping)->dev,
+						"%s duplicate entry for uuid\n",
+						__func__);
+				return false;
+			}
+			found_uuid = true;
+			if (nlabel != nd_region->ndr_mappings)
+				continue;
+			if (position != pos)
+				continue;
+			found = nd_label;
+			break;
+		}
+		if (found)
+			break;
+	}
+	return found != NULL;
+}
+
+static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
+{
+	struct nd_namespace_label *select = NULL;
+	int i;
+
+	if (!pmem_id)
+		return -ENODEV;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *nd_label;
+		u64 hw_start, hw_end, pmem_start, pmem_end;
+		int l;
+
+		for_each_label(l, nd_label, nd_mapping->labels)
+			if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0)
+				break;
+
+		if (!nd_label) {
+			WARN_ON(1);
+			return -EINVAL;
+		}
+
+		select = nd_label;
+		/*
+		 * Check that this label is compliant with the dpa
+		 * range published in NFIT
+		 */
+		hw_start = nd_mapping->start;
+		hw_end = hw_start + nd_mapping->size;
+		pmem_start = __le64_to_cpu(select->dpa);
+		pmem_end = pmem_start + __le64_to_cpu(select->rawsize);
+		if (pmem_start == hw_start && pmem_end <= hw_end)
+			/* pass */;
+		else
+			return -EINVAL;
+
+		nd_mapping->labels[0] = select;
+		nd_mapping->labels[1] = NULL;
+	}
+	return 0;
+}
+
+/**
+ * find_pmem_label_set - validate interleave set labelling, retrieve label0
+ * @nd_region: region with mappings to validate
+ */
+static int find_pmem_label_set(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm)
+{
+	u64 cookie = nd_region_interleave_set_cookie(nd_region);
+	struct nd_namespace_label *nd_label;
+	u8 select_id[NSLABEL_UUID_LEN];
+	resource_size_t size = 0;
+	u8 *pmem_id = NULL;
+	int rc = -ENODEV, l;
+	u16 i;
+
+	if (cookie == 0)
+		return -ENXIO;
+
+	/*
+	 * Find a complete set of labels by uuid.  By definition we can start
+	 * with any mapping as the reference label
+	 */
+	for_each_label(l, nd_label, nd_region->mapping[0].labels) {
+		u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
+
+		if (isetcookie != cookie)
+			continue;
+
+		for (i = 0; nd_region->ndr_mappings; i++)
+			if (!has_uuid_at_pos(nd_region, nd_label->uuid,
+						cookie, i))
+				break;
+		if (i < nd_region->ndr_mappings) {
+			/*
+			 * Give up if we don't find an instance of a
+			 * uuid at each position (from 0 to
+			 * nd_region->ndr_mappings - 1), or if we find a
+			 * dimm with two instances of the same uuid.
+			 */
+			rc = -EINVAL;
+			goto err;
+		} else if (pmem_id) {
+			/*
+			 * If there is more than one valid uuid set, we
+			 * need userspace to clean this up.
+			 */
+			rc = -EBUSY;
+			goto err;
+		}
+		memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN);
+		pmem_id = select_id;
+	}
+
+	/*
+	 * Fix up each mapping's 'labels' to have the validated pmem label for
+	 * that position at labels[0], and NULL at labels[1].  In the process,
+	 * check that the namespace aligns with interleave-set.  We know
+	 * that it does not overlap with any blk namespaces by virtue of
+	 * the dimm being enabled (i.e. nd_label_reserve_dpa()
+	 * succeeded).
+	 */
+	rc = select_pmem_id(nd_region, pmem_id);
+	if (rc)
+		goto err;
+
+	/* Calculate total size and populate namespace properties from label0 */
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *label0 = nd_mapping->labels[0];
+
+		size += __le64_to_cpu(label0->rawsize);
+		if (__le16_to_cpu(label0->position) != 0)
+			continue;
+		WARN_ON(nspm->alt_name || nspm->uuid);
+		nspm->alt_name = kmemdup((void __force *) label0->name,
+				NSLABEL_NAME_LEN, GFP_KERNEL);
+		nspm->uuid = kmemdup((void __force *) label0->uuid,
+				NSLABEL_UUID_LEN, GFP_KERNEL);
+	}
+
+	if (!nspm->alt_name || !nspm->uuid) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	nd_namespace_pmem_set_size(nd_region, nspm, size);
+
+	return 0;
+ err:
+	switch (rc) {
+	case -EINVAL:
+		dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
+		break;
+	case -ENODEV:
+		dev_dbg(&nd_region->dev, "%s: label not found\n", __func__);
+		break;
+	default:
+		dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n",
+				__func__, rc);
+		break;
+	}
+	return rc;
+}
+
+static struct device **create_namespace_pmem(struct nd_region *nd_region)
+{
+	struct nd_namespace_pmem *nspm;
+	struct device *dev, **devs;
+	struct resource *res;
+	int rc;
+
+	nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+	if (!nspm)
+		return NULL;
+
+	dev = &nspm->nsio.common.dev;
+	dev->type = &namespace_pmem_device_type;
+	dev->parent = &nd_region->dev;
+	res = &nspm->nsio.res;
+	res->name = dev_name(&nd_region->dev);
+	res->flags = IORESOURCE_MEM;
+	rc = find_pmem_label_set(nd_region, nspm);
+	if (rc == -ENODEV) {
+		int i;
+
+		/* Pass, try to permit namespace creation... */
+		for (i = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+
+			kfree(nd_mapping->labels);
+			nd_mapping->labels = NULL;
+		}
+
+		/* Publish a zero-sized namespace for userspace to configure. */
+		nd_namespace_pmem_set_size(nd_region, nspm, 0);
+
+		rc = 0;
+	} else if (rc)
+		goto err;
+
+	devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
+	if (!devs)
+		goto err;
+
+	devs[0] = dev;
+	return devs;
+
+ err:
+	namespace_pmem_release(&nspm->nsio.common.dev);
+	return NULL;
+}
+
+struct resource *nsblk_add_resource(struct nd_region *nd_region,
+		struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
+		resource_size_t start)
+{
+	struct nd_label_id label_id;
+	struct resource *res;
+
+	nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
+	res = krealloc(nsblk->res,
+			sizeof(void *) * (nsblk->num_resources + 1),
+			GFP_KERNEL);
+	if (!res)
+		return NULL;
+	nsblk->res = (struct resource **) res;
+	for_each_dpa_resource(ndd, res)
+		if (strcmp(res->name, label_id.id) == 0
+				&& res->start == start) {
+			nsblk->res[nsblk->num_resources++] = res;
+			return res;
+		}
+	return NULL;
+}
+
+static struct device *nd_namespace_blk_create(struct nd_region *nd_region)
+{
+	struct nd_namespace_blk *nsblk;
+	struct device *dev;
+
+	if (!is_nd_blk(&nd_region->dev))
+		return NULL;
+
+	nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+	if (!nsblk)
+		return NULL;
+
+	dev = &nsblk->common.dev;
+	dev->type = &namespace_blk_device_type;
+	nsblk->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL);
+	if (nsblk->id < 0) {
+		kfree(nsblk);
+		return NULL;
+	}
+	dev_set_name(dev, "namespace%d.%d", nd_region->id, nsblk->id);
+	dev->parent = &nd_region->dev;
+	dev->groups = nd_namespace_attribute_groups;
+
+	return &nsblk->common.dev;
+}
+
+void nd_region_create_blk_seed(struct nd_region *nd_region)
+{
+	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
+	nd_region->ns_seed = nd_namespace_blk_create(nd_region);
+	/*
+	 * Seed creation failures are not fatal, provisioning is simply
+	 * disabled until memory becomes available
+	 */
+	if (!nd_region->ns_seed)
+		dev_err(&nd_region->dev, "failed to create blk namespace\n");
+	else
+		nd_device_register(nd_region->ns_seed);
+}
+
+void nd_region_create_btt_seed(struct nd_region *nd_region)
+{
+	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
+	nd_region->btt_seed = nd_btt_create(nd_region);
+	/*
+	 * Seed creation failures are not fatal, provisioning is simply
+	 * disabled until memory becomes available
+	 */
+	if (!nd_region->btt_seed)
+		dev_err(&nd_region->dev, "failed to create btt namespace\n");
+}
+
+static struct device **create_namespace_blk(struct nd_region *nd_region)
+{
+	struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+	struct nd_namespace_label *nd_label;
+	struct device *dev, **devs = NULL;
+	struct nd_namespace_blk *nsblk;
+	struct nvdimm_drvdata *ndd;
+	int i, l, count = 0;
+	struct resource *res;
+
+	if (nd_region->ndr_mappings == 0)
+		return NULL;
+
+	ndd = to_ndd(nd_mapping);
+	for_each_label(l, nd_label, nd_mapping->labels) {
+		u32 flags = __le32_to_cpu(nd_label->flags);
+		char *name[NSLABEL_NAME_LEN];
+		struct device **__devs;
+
+		if (flags & NSLABEL_FLAG_LOCAL)
+			/* pass */;
+		else
+			continue;
+
+		for (i = 0; i < count; i++) {
+			nsblk = to_nd_namespace_blk(devs[i]);
+			if (memcmp(nsblk->uuid, nd_label->uuid,
+						NSLABEL_UUID_LEN) == 0) {
+				res = nsblk_add_resource(nd_region, ndd, nsblk,
+						__le64_to_cpu(nd_label->dpa));
+				if (!res)
+					goto err;
+				nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
+					dev_name(&nsblk->common.dev));
+				break;
+			}
+		}
+		if (i < count)
+			continue;
+		__devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL);
+		if (!__devs)
+			goto err;
+		memcpy(__devs, devs, sizeof(dev) * count);
+		kfree(devs);
+		devs = __devs;
+
+		nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+		if (!nsblk)
+			goto err;
+		dev = &nsblk->common.dev;
+		dev->type = &namespace_blk_device_type;
+		dev->parent = &nd_region->dev;
+		dev_set_name(dev, "namespace%d.%d", nd_region->id, count);
+		devs[count++] = dev;
+		nsblk->id = -1;
+		nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
+		nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
+				GFP_KERNEL);
+		if (!nsblk->uuid)
+			goto err;
+		memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
+		if (name[0])
+			nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
+					GFP_KERNEL);
+		res = nsblk_add_resource(nd_region, ndd, nsblk,
+				__le64_to_cpu(nd_label->dpa));
+		if (!res)
+			goto err;
+		nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
+				dev_name(&nsblk->common.dev));
+	}
+
+	dev_dbg(&nd_region->dev, "%s: discovered %d blk namespace%s\n",
+			__func__, count, count == 1 ? "" : "s");
+
+	if (count == 0) {
+		/* Publish a zero-sized namespace for userspace to configure. */
+		for (i = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+
+			kfree(nd_mapping->labels);
+			nd_mapping->labels = NULL;
+		}
+
+		devs = kcalloc(2, sizeof(dev), GFP_KERNEL);
+		if (!devs)
+			goto err;
+		nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+		if (!nsblk)
+			goto err;
+		dev = &nsblk->common.dev;
+		dev->type = &namespace_blk_device_type;
+		dev->parent = &nd_region->dev;
+		devs[count++] = dev;
+	}
+
+	return devs;
+
+err:
+	for (i = 0; i < count; i++) {
+		nsblk = to_nd_namespace_blk(devs[i]);
+		namespace_blk_release(&nsblk->common.dev);
+	}
+	kfree(devs);
+	return NULL;
+}
+
+static int init_active_labels(struct nd_region *nd_region)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+		int count, j;
+
+		/*
+		 * If the dimm is disabled then prevent the region from
+		 * being activated if it aliases DPA.
+		 */
+		if (!ndd) {
+			if ((nvdimm->flags & NDD_ALIASING) == 0)
+				return 0;
+			dev_dbg(&nd_region->dev, "%s: is disabled, failing probe\n",
+					dev_name(&nd_mapping->nvdimm->dev));
+			return -ENXIO;
+		}
+		nd_mapping->ndd = ndd;
+		atomic_inc(&nvdimm->busy);
+		get_ndd(ndd);
+
+		count = nd_label_active_count(ndd);
+		dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
+		if (!count)
+			continue;
+		nd_mapping->labels = kcalloc(count + 1, sizeof(void *),
+				GFP_KERNEL);
+		if (!nd_mapping->labels)
+			return -ENOMEM;
+		for (j = 0; j < count; j++) {
+			struct nd_namespace_label *label;
+
+			label = nd_label_active(ndd, j);
+			nd_mapping->labels[j] = label;
+		}
+	}
+
+	return 0;
+}
+
+int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
+{
+	struct device **devs = NULL;
+	int i, rc = 0, type;
+
+	*err = 0;
+	nvdimm_bus_lock(&nd_region->dev);
+	rc = init_active_labels(nd_region);
+	if (rc) {
+		nvdimm_bus_unlock(&nd_region->dev);
+		return rc;
+	}
+
+	type = nd_region_to_nstype(nd_region);
+	switch (type) {
+	case ND_DEVICE_NAMESPACE_IO:
+		devs = create_namespace_io(nd_region);
+		break;
+	case ND_DEVICE_NAMESPACE_PMEM:
+		devs = create_namespace_pmem(nd_region);
+		break;
+	case ND_DEVICE_NAMESPACE_BLK:
+		devs = create_namespace_blk(nd_region);
+		break;
+	default:
+		break;
+	}
+	nvdimm_bus_unlock(&nd_region->dev);
+
+	if (!devs)
+		return -ENODEV;
+
+	for (i = 0; devs[i]; i++) {
+		struct device *dev = devs[i];
+		int id;
+
+		if (type == ND_DEVICE_NAMESPACE_BLK) {
+			struct nd_namespace_blk *nsblk;
+
+			nsblk = to_nd_namespace_blk(dev);
+			id = ida_simple_get(&nd_region->ns_ida, 0, 0,
+					GFP_KERNEL);
+			nsblk->id = id;
+		} else
+			id = i;
+
+		if (id < 0)
+			break;
+		dev_set_name(dev, "namespace%d.%d", nd_region->id, id);
+		dev->groups = nd_namespace_attribute_groups;
+		nd_device_register(dev);
+	}
+	if (i)
+		nd_region->ns_seed = devs[0];
+
+	if (devs[i]) {
+		int j;
+
+		for (j = i; devs[j]; j++) {
+			struct device *dev = devs[j];
+
+			device_initialize(dev);
+			put_device(dev);
+		}
+		*err = j - i;
+		/*
+		 * All of the namespaces we tried to register failed, so
+		 * fail region activation.
+		 */
+		if (*err == 0)
+			rc = -ENODEV;
+	}
+	kfree(devs);
+
+	if (rc == -ENODEV)
+		return rc;
+
+	return i;
+}
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
new file mode 100644
index 000000000000..e1970c71ad1c
--- /dev/null
+++ b/drivers/nvdimm/nd-core.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __ND_CORE_H__
+#define __ND_CORE_H__
+#include <linux/libnvdimm.h>
+#include <linux/device.h>
+#include <linux/libnvdimm.h>
+#include <linux/sizes.h>
+#include <linux/mutex.h>
+#include <linux/nd.h>
+
+extern struct list_head nvdimm_bus_list;
+extern struct mutex nvdimm_bus_list_mutex;
+extern int nvdimm_major;
+
+struct nvdimm_bus {
+	struct nvdimm_bus_descriptor *nd_desc;
+	wait_queue_head_t probe_wait;
+	struct module *module;
+	struct list_head list;
+	struct device dev;
+	int id, probe_active;
+	struct mutex reconfig_mutex;
+};
+
+struct nvdimm {
+	unsigned long flags;
+	void *provider_data;
+	unsigned long *dsm_mask;
+	struct device dev;
+	atomic_t busy;
+	int id;
+};
+
+bool is_nvdimm(struct device *dev);
+bool is_nd_pmem(struct device *dev);
+bool is_nd_blk(struct device *dev);
+struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
+int __init nvdimm_bus_init(void);
+void nvdimm_bus_exit(void);
+void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+struct nd_region;
+void nd_region_create_blk_seed(struct nd_region *nd_region);
+void nd_region_create_btt_seed(struct nd_region *nd_region);
+void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
+void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
+void nd_synchronize(void);
+int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
+int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
+int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus);
+void __nd_device_register(struct device *dev);
+int nd_match_dimm(struct device *dev, void *data);
+struct nd_label_id;
+char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
+bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
+struct nd_region;
+struct nvdimm_drvdata;
+struct nd_mapping;
+resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, resource_size_t *overlap);
+resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping);
+resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
+resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id);
+struct nd_mapping;
+struct resource *nsblk_add_resource(struct nd_region *nd_region,
+		struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
+		resource_size_t start);
+int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
+void get_ndd(struct nvdimm_drvdata *ndd);
+resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
+#endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
new file mode 100644
index 000000000000..c41f53e74277
--- /dev/null
+++ b/drivers/nvdimm/nd.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __ND_H__
+#define __ND_H__
+#include <linux/libnvdimm.h>
+#include <linux/blkdev.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+#include <linux/types.h>
+#include "label.h"
+
+enum {
+	/*
+	 * Limits the maximum number of block apertures a dimm can
+	 * support and is an input to the geometry/on-disk-format of a
+	 * BTT instance
+	 */
+	ND_MAX_LANES = 256,
+	SECTOR_SHIFT = 9,
+	INT_LBASIZE_ALIGNMENT = 64,
+};
+
+struct nvdimm_drvdata {
+	struct device *dev;
+	int nsindex_size;
+	struct nd_cmd_get_config_size nsarea;
+	void *data;
+	int ns_current, ns_next;
+	struct resource dpa;
+	struct kref kref;
+};
+
+struct nd_region_namespaces {
+	int count;
+	int active;
+};
+
+static inline struct nd_namespace_index *to_namespace_index(
+		struct nvdimm_drvdata *ndd, int i)
+{
+	if (i < 0)
+		return NULL;
+
+	return ndd->data + sizeof_namespace_index(ndd) * i;
+}
+
+static inline struct nd_namespace_index *to_current_namespace_index(
+		struct nvdimm_drvdata *ndd)
+{
+	return to_namespace_index(ndd, ndd->ns_current);
+}
+
+static inline struct nd_namespace_index *to_next_namespace_index(
+		struct nvdimm_drvdata *ndd)
+{
+	return to_namespace_index(ndd, ndd->ns_next);
+}
+
+#define nd_dbg_dpa(r, d, res, fmt, arg...) \
+	dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \
+		(r) ? dev_name((d)->dev) : "", res ? res->name : "null", \
+		(unsigned long long) (res ? resource_size(res) : 0), \
+		(unsigned long long) (res ? res->start : 0), ##arg)
+
+#define for_each_label(l, label, labels) \
+	for (l = 0; (label = labels ? labels[l] : NULL); l++)
+
+#define for_each_dpa_resource(ndd, res) \
+	for (res = (ndd)->dpa.child; res; res = res->sibling)
+
+#define for_each_dpa_resource_safe(ndd, res, next) \
+	for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \
+			res; res = next, next = next ? next->sibling : NULL)
+
+struct nd_percpu_lane {
+	int count;
+	spinlock_t lock;
+};
+
+struct nd_region {
+	struct device dev;
+	struct ida ns_ida;
+	struct ida btt_ida;
+	struct device *ns_seed;
+	struct device *btt_seed;
+	u16 ndr_mappings;
+	u64 ndr_size;
+	u64 ndr_start;
+	int id, num_lanes, ro, numa_node;
+	void *provider_data;
+	struct nd_interleave_set *nd_set;
+	struct nd_percpu_lane __percpu *lane;
+	struct nd_mapping mapping[0];
+};
+
+struct nd_blk_region {
+	int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+	void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+	int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
+			void *iobuf, u64 len, int rw);
+	void *blk_provider_data;
+	struct nd_region nd_region;
+};
+
+/*
+ * Lookup next in the repeating sequence of 01, 10, and 11.
+ */
+static inline unsigned nd_inc_seq(unsigned seq)
+{
+	static const unsigned next[] = { 0, 2, 3, 1 };
+
+	return next[seq & 3];
+}
+
+struct btt;
+struct nd_btt {
+	struct device dev;
+	struct nd_namespace_common *ndns;
+	struct btt *btt;
+	unsigned long lbasize;
+	u8 *uuid;
+	int id;
+};
+
+enum nd_async_mode {
+	ND_SYNC,
+	ND_ASYNC,
+};
+
+int nd_integrity_init(struct gendisk *disk, unsigned long meta_size);
+void wait_nvdimm_bus_probe_idle(struct device *dev);
+void nd_device_register(struct device *dev);
+void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
+int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+		size_t len);
+ssize_t nd_sector_size_show(unsigned long current_lbasize,
+		const unsigned long *supported, char *buf);
+ssize_t nd_sector_size_store(struct device *dev, const char *buf,
+		unsigned long *current_lbasize, const unsigned long *supported);
+int __init nvdimm_init(void);
+int __init nd_region_init(void);
+void nvdimm_exit(void);
+void nd_region_exit(void);
+struct nvdimm;
+struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
+int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
+int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
+int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
+		void *buf, size_t len);
+struct nd_btt *to_nd_btt(struct device *dev);
+struct btt_sb;
+u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
+#if IS_ENABLED(CONFIG_BTT)
+int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata);
+bool is_nd_btt(struct device *dev);
+struct device *nd_btt_create(struct nd_region *nd_region);
+#else
+static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+	return -ENODEV;
+}
+
+static inline bool is_nd_btt(struct device *dev)
+{
+	return false;
+}
+
+static inline struct device *nd_btt_create(struct nd_region *nd_region)
+{
+	return NULL;
+}
+
+#endif
+struct nd_region *to_nd_region(struct device *dev);
+int nd_region_to_nstype(struct nd_region *nd_region);
+int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
+void nvdimm_bus_lock(struct device *dev);
+void nvdimm_bus_unlock(struct device *dev);
+bool is_nvdimm_bus_locked(struct device *dev);
+int nvdimm_revalidate_disk(struct gendisk *disk);
+void nvdimm_drvdata_release(struct kref *kref);
+void put_ndd(struct nvdimm_drvdata *ndd);
+int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);
+void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res);
+struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id, resource_size_t start,
+		resource_size_t n);
+resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
+struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev);
+int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns);
+int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns);
+const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
+		char *name);
+int nd_blk_region_init(struct nd_region *nd_region);
+void __nd_iostat_start(struct bio *bio, unsigned long *start);
+static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
+{
+	struct gendisk *disk = bio->bi_bdev->bd_disk;
+
+	if (!blk_queue_io_stat(disk->queue))
+		return false;
+
+	__nd_iostat_start(bio, start);
+	return true;
+}
+void nd_iostat_end(struct bio *bio, unsigned long start);
+resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
+#endif /* __ND_H__ */
diff --git a/drivers/block/pmem.c b/drivers/nvdimm/pmem.c
index 095dfaadcaa5..ade9eb917a4d 100644
--- a/drivers/block/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -1,7 +1,7 @@
 /*
  * Persistent Memory Driver
  *
- * Copyright (c) 2014, Intel Corporation.
+ * Copyright (c) 2014-2015, Intel Corporation.
  * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
  * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
  *
@@ -23,8 +23,9 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
-
-#define PMEM_MINORS		16
+#include <linux/pmem.h>
+#include <linux/nd.h>
+#include "nd.h"
 
 struct pmem_device {
 	struct request_queue	*pmem_queue;
@@ -32,12 +33,11 @@ struct pmem_device {
 
 	/* One contiguous memory region per device */
 	phys_addr_t		phys_addr;
-	void			*virt_addr;
+	void __pmem		*virt_addr;
 	size_t			size;
 };
 
 static int pmem_major;
-static atomic_t pmem_index;
 
 static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 			unsigned int len, unsigned int off, int rw,
@@ -45,13 +45,14 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 {
 	void *mem = kmap_atomic(page);
 	size_t pmem_off = sector << 9;
+	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
 
 	if (rw == READ) {
-		memcpy(mem + off, pmem->virt_addr + pmem_off, len);
+		memcpy_from_pmem(mem + off, pmem_addr, len);
 		flush_dcache_page(page);
 	} else {
 		flush_dcache_page(page);
-		memcpy(pmem->virt_addr + pmem_off, mem + off, len);
+		memcpy_to_pmem(pmem_addr, mem + off, len);
 	}
 
 	kunmap_atomic(mem);
@@ -59,31 +60,24 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 
 static void pmem_make_request(struct request_queue *q, struct bio *bio)
 {
-	struct block_device *bdev = bio->bi_bdev;
-	struct pmem_device *pmem = bdev->bd_disk->private_data;
-	int rw;
+	bool do_acct;
+	unsigned long start;
 	struct bio_vec bvec;
-	sector_t sector;
 	struct bvec_iter iter;
-	int err = 0;
-
-	if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) {
-		err = -EIO;
-		goto out;
-	}
-
-	BUG_ON(bio->bi_rw & REQ_DISCARD);
+	struct block_device *bdev = bio->bi_bdev;
+	struct pmem_device *pmem = bdev->bd_disk->private_data;
 
-	rw = bio_data_dir(bio);
-	sector = bio->bi_iter.bi_sector;
-	bio_for_each_segment(bvec, bio, iter) {
+	do_acct = nd_iostat_start(bio, &start);
+	bio_for_each_segment(bvec, bio, iter)
 		pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
-			     rw, sector);
-		sector += bvec.bv_len >> 9;
-	}
+				bio_data_dir(bio), iter.bi_sector);
+	if (do_acct)
+		nd_iostat_end(bio, start);
 
-out:
-	bio_endio(bio, err);
+	if (bio_data_dir(bio))
+		wmb_pmem();
+
+	bio_endio(bio, 0);
 }
 
 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
@@ -106,7 +100,8 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector,
 	if (!pmem)
 		return -ENODEV;
 
-	*kaddr = pmem->virt_addr + offset;
+	/* FIXME convert DAX to comprehend that this mapping has a lifetime */
+	*kaddr = (void __force *) pmem->virt_addr + offset;
 	*pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
 
 	return pmem->size - offset;
@@ -116,124 +111,165 @@ static const struct block_device_operations pmem_fops = {
 	.owner =		THIS_MODULE,
 	.rw_page =		pmem_rw_page,
 	.direct_access =	pmem_direct_access,
+	.revalidate_disk =	nvdimm_revalidate_disk,
 };
 
-static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res)
+static struct pmem_device *pmem_alloc(struct device *dev,
+		struct resource *res, int id)
 {
 	struct pmem_device *pmem;
-	struct gendisk *disk;
-	int idx, err;
 
-	err = -ENOMEM;
 	pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
 	if (!pmem)
-		goto out;
+		return ERR_PTR(-ENOMEM);
 
 	pmem->phys_addr = res->start;
 	pmem->size = resource_size(res);
+	if (!arch_has_pmem_api())
+		dev_warn(dev, "unable to guarantee persistence of writes\n");
+
+	if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
+		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
+				&pmem->phys_addr, pmem->size);
+		kfree(pmem);
+		return ERR_PTR(-EBUSY);
+	}
 
-	err = -EINVAL;
-	if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) {
-		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size);
-		goto out_free_dev;
+	pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size);
+	if (!pmem->virt_addr) {
+		release_mem_region(pmem->phys_addr, pmem->size);
+		kfree(pmem);
+		return ERR_PTR(-ENXIO);
 	}
 
-	/*
-	 * Map the memory as write-through, as we can't write back the contents
-	 * of the CPU caches in case of a crash.
-	 */
-	err = -ENOMEM;
-	pmem->virt_addr = ioremap_wt(pmem->phys_addr, pmem->size);
-	if (!pmem->virt_addr)
-		goto out_release_region;
+	return pmem;
+}
+
+static void pmem_detach_disk(struct pmem_device *pmem)
+{
+	del_gendisk(pmem->pmem_disk);
+	put_disk(pmem->pmem_disk);
+	blk_cleanup_queue(pmem->pmem_queue);
+}
+
+static int pmem_attach_disk(struct nd_namespace_common *ndns,
+		struct pmem_device *pmem)
+{
+	struct gendisk *disk;
 
 	pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
 	if (!pmem->pmem_queue)
-		goto out_unmap;
+		return -ENOMEM;
 
 	blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
-	blk_queue_max_hw_sectors(pmem->pmem_queue, 1024);
+	blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
 	blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
 
-	disk = alloc_disk(PMEM_MINORS);
-	if (!disk)
-		goto out_free_queue;
-
-	idx = atomic_inc_return(&pmem_index) - 1;
+	disk = alloc_disk(0);
+	if (!disk) {
+		blk_cleanup_queue(pmem->pmem_queue);
+		return -ENOMEM;
+	}
 
 	disk->major		= pmem_major;
-	disk->first_minor	= PMEM_MINORS * idx;
+	disk->first_minor	= 0;
 	disk->fops		= &pmem_fops;
 	disk->private_data	= pmem;
 	disk->queue		= pmem->pmem_queue;
 	disk->flags		= GENHD_FL_EXT_DEVT;
-	sprintf(disk->disk_name, "pmem%d", idx);
-	disk->driverfs_dev = dev;
+	nvdimm_namespace_disk_name(ndns, disk->disk_name);
+	disk->driverfs_dev = &ndns->dev;
 	set_capacity(disk, pmem->size >> 9);
 	pmem->pmem_disk = disk;
 
 	add_disk(disk);
+	revalidate_disk(disk);
 
-	return pmem;
+	return 0;
+}
 
-out_free_queue:
-	blk_cleanup_queue(pmem->pmem_queue);
-out_unmap:
-	iounmap(pmem->virt_addr);
-out_release_region:
-	release_mem_region(pmem->phys_addr, pmem->size);
-out_free_dev:
-	kfree(pmem);
-out:
-	return ERR_PTR(err);
+static int pmem_rw_bytes(struct nd_namespace_common *ndns,
+		resource_size_t offset, void *buf, size_t size, int rw)
+{
+	struct pmem_device *pmem = dev_get_drvdata(ndns->claim);
+
+	if (unlikely(offset + size > pmem->size)) {
+		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
+		return -EFAULT;
+	}
+
+	if (rw == READ)
+		memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
+	else {
+		memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
+		wmb_pmem();
+	}
+
+	return 0;
 }
 
 static void pmem_free(struct pmem_device *pmem)
 {
-	del_gendisk(pmem->pmem_disk);
-	put_disk(pmem->pmem_disk);
-	blk_cleanup_queue(pmem->pmem_queue);
-	iounmap(pmem->virt_addr);
+	memunmap_pmem(pmem->virt_addr);
 	release_mem_region(pmem->phys_addr, pmem->size);
 	kfree(pmem);
 }
 
-static int pmem_probe(struct platform_device *pdev)
+static int nd_pmem_probe(struct device *dev)
 {
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_namespace_common *ndns;
+	struct nd_namespace_io *nsio;
 	struct pmem_device *pmem;
-	struct resource *res;
-
-	if (WARN_ON(pdev->num_resources > 1))
-		return -ENXIO;
+	int rc;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENXIO;
+	ndns = nvdimm_namespace_common_probe(dev);
+	if (IS_ERR(ndns))
+		return PTR_ERR(ndns);
 
-	pmem = pmem_alloc(&pdev->dev, res);
+	nsio = to_nd_namespace_io(&ndns->dev);
+	pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
 	if (IS_ERR(pmem))
 		return PTR_ERR(pmem);
 
-	platform_set_drvdata(pdev, pmem);
-
-	return 0;
+	dev_set_drvdata(dev, pmem);
+	ndns->rw_bytes = pmem_rw_bytes;
+	if (is_nd_btt(dev))
+		rc = nvdimm_namespace_attach_btt(ndns);
+	else if (nd_btt_probe(ndns, pmem) == 0) {
+		/* we'll come back as btt-pmem */
+		rc = -ENXIO;
+	} else
+		rc = pmem_attach_disk(ndns, pmem);
+	if (rc)
+		pmem_free(pmem);
+	return rc;
 }
 
-static int pmem_remove(struct platform_device *pdev)
+static int nd_pmem_remove(struct device *dev)
 {
-	struct pmem_device *pmem = platform_get_drvdata(pdev);
+	struct pmem_device *pmem = dev_get_drvdata(dev);
 
+	if (is_nd_btt(dev))
+		nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
+	else
+		pmem_detach_disk(pmem);
 	pmem_free(pmem);
+
 	return 0;
 }
 
-static struct platform_driver pmem_driver = {
-	.probe		= pmem_probe,
-	.remove		= pmem_remove,
-	.driver		= {
-		.owner	= THIS_MODULE,
-		.name	= "pmem",
+MODULE_ALIAS("pmem");
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
+static struct nd_device_driver nd_pmem_driver = {
+	.probe = nd_pmem_probe,
+	.remove = nd_pmem_remove,
+	.drv = {
+		.name = "nd_pmem",
 	},
+	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
 };
 
 static int __init pmem_init(void)
@@ -244,16 +280,19 @@ static int __init pmem_init(void)
 	if (pmem_major < 0)
 		return pmem_major;
 
-	error = platform_driver_register(&pmem_driver);
-	if (error)
+	error = nd_driver_register(&nd_pmem_driver);
+	if (error) {
 		unregister_blkdev(pmem_major, "pmem");
-	return error;
+		return error;
+	}
+
+	return 0;
 }
 module_init(pmem_init);
 
 static void pmem_exit(void)
 {
-	platform_driver_unregister(&pmem_driver);
+	driver_unregister(&nd_pmem_driver.drv);
 	unregister_blkdev(pmem_major, "pmem");
 }
 module_exit(pmem_exit);
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
new file mode 100644
index 000000000000..f28f78ccff19
--- /dev/null
+++ b/drivers/nvdimm/region.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/nd.h>
+#include "nd.h"
+
+static int nd_region_probe(struct device *dev)
+{
+	int err, rc;
+	static unsigned long once;
+	struct nd_region_namespaces *num_ns;
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	if (nd_region->num_lanes > num_online_cpus()
+			&& nd_region->num_lanes < num_possible_cpus()
+			&& !test_and_set_bit(0, &once)) {
+		dev_info(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
+				num_online_cpus(), nd_region->num_lanes,
+				num_possible_cpus());
+		dev_info(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
+				nd_region->num_lanes);
+	}
+
+	rc = nd_blk_region_init(nd_region);
+	if (rc)
+		return rc;
+
+	rc = nd_region_register_namespaces(nd_region, &err);
+	num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
+	if (!num_ns)
+		return -ENOMEM;
+
+	if (rc < 0)
+		return rc;
+
+	num_ns->active = rc;
+	num_ns->count = rc + err;
+	dev_set_drvdata(dev, num_ns);
+
+	if (rc && err && rc == err)
+		return -ENODEV;
+
+	nd_region->btt_seed = nd_btt_create(nd_region);
+	if (err == 0)
+		return 0;
+
+	/*
+	 * Given multiple namespaces per region, we do not want to
+	 * disable all the successfully registered peer namespaces upon
+	 * a single registration failure.  If userspace is missing a
+	 * namespace that it expects it can disable/re-enable the region
+	 * to retry discovery after correcting the failure.
+	 * <regionX>/namespaces returns the current
+	 * "<async-registered>/<total>" namespace count.
+	 */
+	dev_err(dev, "failed to register %d namespace%s, continuing...\n",
+			err, err == 1 ? "" : "s");
+	return 0;
+}
+
+static int child_unregister(struct device *dev, void *data)
+{
+	nd_device_unregister(dev, ND_SYNC);
+	return 0;
+}
+
+static int nd_region_remove(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	/* flush attribute readers and disable */
+	nvdimm_bus_lock(dev);
+	nd_region->ns_seed = NULL;
+	nd_region->btt_seed = NULL;
+	dev_set_drvdata(dev, NULL);
+	nvdimm_bus_unlock(dev);
+
+	device_for_each_child(dev, NULL, child_unregister);
+	return 0;
+}
+
+static struct nd_device_driver nd_region_driver = {
+	.probe = nd_region_probe,
+	.remove = nd_region_remove,
+	.drv = {
+		.name = "nd_region",
+	},
+	.type = ND_DRIVER_REGION_BLK | ND_DRIVER_REGION_PMEM,
+};
+
+int __init nd_region_init(void)
+{
+	return nd_driver_register(&nd_region_driver);
+}
+
+void nd_region_exit(void)
+{
+	driver_unregister(&nd_region_driver.drv);
+}
+
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_PMEM);
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_BLK);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
new file mode 100644
index 000000000000..a5233422f9dc
--- /dev/null
+++ b/drivers/nvdimm/region_devs.c
@@ -0,0 +1,787 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/io.h>
+#include <linux/nd.h>
+#include "nd-core.h"
+#include "nd.h"
+
+static DEFINE_IDA(region_ida);
+
+static void nd_region_release(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	u16 i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+		put_device(&nvdimm->dev);
+	}
+	free_percpu(nd_region->lane);
+	ida_simple_remove(&region_ida, nd_region->id);
+	if (is_nd_blk(dev))
+		kfree(to_nd_blk_region(dev));
+	else
+		kfree(nd_region);
+}
+
+static struct device_type nd_blk_device_type = {
+	.name = "nd_blk",
+	.release = nd_region_release,
+};
+
+static struct device_type nd_pmem_device_type = {
+	.name = "nd_pmem",
+	.release = nd_region_release,
+};
+
+static struct device_type nd_volatile_device_type = {
+	.name = "nd_volatile",
+	.release = nd_region_release,
+};
+
+bool is_nd_pmem(struct device *dev)
+{
+	return dev ? dev->type == &nd_pmem_device_type : false;
+}
+
+bool is_nd_blk(struct device *dev)
+{
+	return dev ? dev->type == &nd_blk_device_type : false;
+}
+
+struct nd_region *to_nd_region(struct device *dev)
+{
+	struct nd_region *nd_region = container_of(dev, struct nd_region, dev);
+
+	WARN_ON(dev->type->release != nd_region_release);
+	return nd_region;
+}
+EXPORT_SYMBOL_GPL(to_nd_region);
+
+struct nd_blk_region *to_nd_blk_region(struct device *dev)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	WARN_ON(!is_nd_blk(dev));
+	return container_of(nd_region, struct nd_blk_region, nd_region);
+}
+EXPORT_SYMBOL_GPL(to_nd_blk_region);
+
+void *nd_region_provider_data(struct nd_region *nd_region)
+{
+	return nd_region->provider_data;
+}
+EXPORT_SYMBOL_GPL(nd_region_provider_data);
+
+void *nd_blk_region_provider_data(struct nd_blk_region *ndbr)
+{
+	return ndbr->blk_provider_data;
+}
+EXPORT_SYMBOL_GPL(nd_blk_region_provider_data);
+
+void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data)
+{
+	ndbr->blk_provider_data = data;
+}
+EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
+
+/**
+ * nd_region_to_nstype() - region to an integer namespace type
+ * @nd_region: region-device to interrogate
+ *
+ * This is the 'nstype' attribute of a region as well, an input to the
+ * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match
+ * namespace devices with namespace drivers.
+ */
+int nd_region_to_nstype(struct nd_region *nd_region)
+{
+	if (is_nd_pmem(&nd_region->dev)) {
+		u16 i, alias;
+
+		for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+			struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+			if (nvdimm->flags & NDD_ALIASING)
+				alias++;
+		}
+		if (alias)
+			return ND_DEVICE_NAMESPACE_PMEM;
+		else
+			return ND_DEVICE_NAMESPACE_IO;
+	} else if (is_nd_blk(&nd_region->dev)) {
+		return ND_DEVICE_NAMESPACE_BLK;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(nd_region_to_nstype);
+
+static int is_uuid_busy(struct device *dev, void *data)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	u8 *uuid = data;
+
+	switch (nd_region_to_nstype(nd_region)) {
+	case ND_DEVICE_NAMESPACE_PMEM: {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		if (!nspm->uuid)
+			break;
+		if (memcmp(uuid, nspm->uuid, NSLABEL_UUID_LEN) == 0)
+			return -EBUSY;
+		break;
+	}
+	case ND_DEVICE_NAMESPACE_BLK: {
+		struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+		if (!nsblk->uuid)
+			break;
+		if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) == 0)
+			return -EBUSY;
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int is_namespace_uuid_busy(struct device *dev, void *data)
+{
+	if (is_nd_pmem(dev) || is_nd_blk(dev))
+		return device_for_each_child(dev, data, is_uuid_busy);
+	return 0;
+}
+
+/**
+ * nd_is_uuid_unique - verify that no other namespace has @uuid
+ * @dev: any device on a nvdimm_bus
+ * @uuid: uuid to check
+ */
+bool nd_is_uuid_unique(struct device *dev, u8 *uuid)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return false;
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev));
+	if (device_for_each_child(&nvdimm_bus->dev, uuid,
+				is_namespace_uuid_busy) != 0)
+		return false;
+	return true;
+}
+
+static ssize_t size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	unsigned long long size = 0;
+
+	if (is_nd_pmem(dev)) {
+		size = nd_region->ndr_size;
+	} else if (nd_region->ndr_mappings == 1) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+
+		size = nd_mapping->size;
+	}
+
+	return sprintf(buf, "%llu\n", size);
+}
+static DEVICE_ATTR_RO(size);
+
+static ssize_t mappings_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	return sprintf(buf, "%d\n", nd_region->ndr_mappings);
+}
+static DEVICE_ATTR_RO(mappings);
+
+static ssize_t nstype_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
+}
+static DEVICE_ATTR_RO(nstype);
+
+static ssize_t set_cookie_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
+
+	if (is_nd_pmem(dev) && nd_set)
+		/* pass, should be precluded by region_visible */;
+	else
+		return -ENXIO;
+
+	return sprintf(buf, "%#llx\n", nd_set->cookie);
+}
+static DEVICE_ATTR_RO(set_cookie);
+
+resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
+{
+	resource_size_t blk_max_overlap = 0, available, overlap;
+	int i;
+
+	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
+
+ retry:
+	available = 0;
+	overlap = blk_max_overlap;
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+
+		/* if a dimm is disabled the available capacity is zero */
+		if (!ndd)
+			return 0;
+
+		if (is_nd_pmem(&nd_region->dev)) {
+			available += nd_pmem_available_dpa(nd_region,
+					nd_mapping, &overlap);
+			if (overlap > blk_max_overlap) {
+				blk_max_overlap = overlap;
+				goto retry;
+			}
+		} else if (is_nd_blk(&nd_region->dev)) {
+			available += nd_blk_available_dpa(nd_mapping);
+		}
+	}
+
+	return available;
+}
+
+static ssize_t available_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	unsigned long long available = 0;
+
+	/*
+	 * Flush in-flight updates and grab a snapshot of the available
+	 * size.  Of course, this value is potentially invalidated the
+	 * memory nvdimm_bus_lock() is dropped, but that's userspace's
+	 * problem to not race itself.
+	 */
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	available = nd_region_available_dpa(nd_region);
+	nvdimm_bus_unlock(dev);
+
+	return sprintf(buf, "%llu\n", available);
+}
+static DEVICE_ATTR_RO(available_size);
+
+static ssize_t init_namespaces_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region_namespaces *num_ns = dev_get_drvdata(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (num_ns)
+		rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count);
+	else
+		rc = -ENXIO;
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(init_namespaces);
+
+static ssize_t namespace_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (nd_region->ns_seed)
+		rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed));
+	else
+		rc = sprintf(buf, "\n");
+	nvdimm_bus_unlock(dev);
+	return rc;
+}
+static DEVICE_ATTR_RO(namespace_seed);
+
+static ssize_t btt_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (nd_region->btt_seed)
+		rc = sprintf(buf, "%s\n", dev_name(nd_region->btt_seed));
+	else
+		rc = sprintf(buf, "\n");
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(btt_seed);
+
+static ssize_t read_only_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	return sprintf(buf, "%d\n", nd_region->ro);
+}
+
+static ssize_t read_only_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	bool ro;
+	int rc = strtobool(buf, &ro);
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	if (rc)
+		return rc;
+
+	nd_region->ro = ro;
+	return len;
+}
+static DEVICE_ATTR_RW(read_only);
+
+static struct attribute *nd_region_attributes[] = {
+	&dev_attr_size.attr,
+	&dev_attr_nstype.attr,
+	&dev_attr_mappings.attr,
+	&dev_attr_btt_seed.attr,
+	&dev_attr_read_only.attr,
+	&dev_attr_set_cookie.attr,
+	&dev_attr_available_size.attr,
+	&dev_attr_namespace_seed.attr,
+	&dev_attr_init_namespaces.attr,
+	NULL,
+};
+
+static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, typeof(*dev), kobj);
+	struct nd_region *nd_region = to_nd_region(dev);
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
+	int type = nd_region_to_nstype(nd_region);
+
+	if (a != &dev_attr_set_cookie.attr
+			&& a != &dev_attr_available_size.attr)
+		return a->mode;
+
+	if ((type == ND_DEVICE_NAMESPACE_PMEM
+				|| type == ND_DEVICE_NAMESPACE_BLK)
+			&& a == &dev_attr_available_size.attr)
+		return a->mode;
+	else if (is_nd_pmem(dev) && nd_set)
+		return a->mode;
+
+	return 0;
+}
+
+struct attribute_group nd_region_attribute_group = {
+	.attrs = nd_region_attributes,
+	.is_visible = region_visible,
+};
+EXPORT_SYMBOL_GPL(nd_region_attribute_group);
+
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
+{
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
+
+	if (nd_set)
+		return nd_set->cookie;
+	return 0;
+}
+
+/*
+ * Upon successful probe/remove, take/release a reference on the
+ * associated interleave set (if present), and plant new btt + namespace
+ * seeds.  Also, on the removal of a BLK region, notify the provider to
+ * disable the region.
+ */
+static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
+		struct device *dev, bool probe)
+{
+	struct nd_region *nd_region;
+
+	if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) {
+		int i;
+
+		nd_region = to_nd_region(dev);
+		for (i = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+			struct nvdimm_drvdata *ndd = nd_mapping->ndd;
+			struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+			kfree(nd_mapping->labels);
+			nd_mapping->labels = NULL;
+			put_ndd(ndd);
+			nd_mapping->ndd = NULL;
+			if (ndd)
+				atomic_dec(&nvdimm->busy);
+		}
+
+		if (is_nd_pmem(dev))
+			return;
+
+		to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
+	}
+	if (dev->parent && is_nd_blk(dev->parent) && probe) {
+		nd_region = to_nd_region(dev->parent);
+		nvdimm_bus_lock(dev);
+		if (nd_region->ns_seed == dev)
+			nd_region_create_blk_seed(nd_region);
+		nvdimm_bus_unlock(dev);
+	}
+	if (is_nd_btt(dev) && probe) {
+		nd_region = to_nd_region(dev->parent);
+		nvdimm_bus_lock(dev);
+		if (nd_region->btt_seed == dev)
+			nd_region_create_btt_seed(nd_region);
+		nvdimm_bus_unlock(dev);
+	}
+}
+
+void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev)
+{
+	nd_region_notify_driver_action(nvdimm_bus, dev, true);
+}
+
+void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev)
+{
+	nd_region_notify_driver_action(nvdimm_bus, dev, false);
+}
+
+static ssize_t mappingN(struct device *dev, char *buf, int n)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	struct nd_mapping *nd_mapping;
+	struct nvdimm *nvdimm;
+
+	if (n >= nd_region->ndr_mappings)
+		return -ENXIO;
+	nd_mapping = &nd_region->mapping[n];
+	nvdimm = nd_mapping->nvdimm;
+
+	return sprintf(buf, "%s,%llu,%llu\n", dev_name(&nvdimm->dev),
+			nd_mapping->start, nd_mapping->size);
+}
+
+#define REGION_MAPPING(idx) \
+static ssize_t mapping##idx##_show(struct device *dev,		\
+		struct device_attribute *attr, char *buf)	\
+{								\
+	return mappingN(dev, buf, idx);				\
+}								\
+static DEVICE_ATTR_RO(mapping##idx)
+
+/*
+ * 32 should be enough for a while, even in the presence of socket
+ * interleave a 32-way interleave set is a degenerate case.
+ */
+REGION_MAPPING(0);
+REGION_MAPPING(1);
+REGION_MAPPING(2);
+REGION_MAPPING(3);
+REGION_MAPPING(4);
+REGION_MAPPING(5);
+REGION_MAPPING(6);
+REGION_MAPPING(7);
+REGION_MAPPING(8);
+REGION_MAPPING(9);
+REGION_MAPPING(10);
+REGION_MAPPING(11);
+REGION_MAPPING(12);
+REGION_MAPPING(13);
+REGION_MAPPING(14);
+REGION_MAPPING(15);
+REGION_MAPPING(16);
+REGION_MAPPING(17);
+REGION_MAPPING(18);
+REGION_MAPPING(19);
+REGION_MAPPING(20);
+REGION_MAPPING(21);
+REGION_MAPPING(22);
+REGION_MAPPING(23);
+REGION_MAPPING(24);
+REGION_MAPPING(25);
+REGION_MAPPING(26);
+REGION_MAPPING(27);
+REGION_MAPPING(28);
+REGION_MAPPING(29);
+REGION_MAPPING(30);
+REGION_MAPPING(31);
+
+static umode_t mapping_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	if (n < nd_region->ndr_mappings)
+		return a->mode;
+	return 0;
+}
+
+static struct attribute *mapping_attributes[] = {
+	&dev_attr_mapping0.attr,
+	&dev_attr_mapping1.attr,
+	&dev_attr_mapping2.attr,
+	&dev_attr_mapping3.attr,
+	&dev_attr_mapping4.attr,
+	&dev_attr_mapping5.attr,
+	&dev_attr_mapping6.attr,
+	&dev_attr_mapping7.attr,
+	&dev_attr_mapping8.attr,
+	&dev_attr_mapping9.attr,
+	&dev_attr_mapping10.attr,
+	&dev_attr_mapping11.attr,
+	&dev_attr_mapping12.attr,
+	&dev_attr_mapping13.attr,
+	&dev_attr_mapping14.attr,
+	&dev_attr_mapping15.attr,
+	&dev_attr_mapping16.attr,
+	&dev_attr_mapping17.attr,
+	&dev_attr_mapping18.attr,
+	&dev_attr_mapping19.attr,
+	&dev_attr_mapping20.attr,
+	&dev_attr_mapping21.attr,
+	&dev_attr_mapping22.attr,
+	&dev_attr_mapping23.attr,
+	&dev_attr_mapping24.attr,
+	&dev_attr_mapping25.attr,
+	&dev_attr_mapping26.attr,
+	&dev_attr_mapping27.attr,
+	&dev_attr_mapping28.attr,
+	&dev_attr_mapping29.attr,
+	&dev_attr_mapping30.attr,
+	&dev_attr_mapping31.attr,
+	NULL,
+};
+
+struct attribute_group nd_mapping_attribute_group = {
+	.is_visible = mapping_visible,
+	.attrs = mapping_attributes,
+};
+EXPORT_SYMBOL_GPL(nd_mapping_attribute_group);
+
+int nd_blk_region_init(struct nd_region *nd_region)
+{
+	struct device *dev = &nd_region->dev;
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!is_nd_blk(dev))
+		return 0;
+
+	if (nd_region->ndr_mappings < 1) {
+		dev_err(dev, "invalid BLK region\n");
+		return -ENXIO;
+	}
+
+	return to_nd_blk_region(dev)->enable(nvdimm_bus, dev);
+}
+
+/**
+ * nd_region_acquire_lane - allocate and lock a lane
+ * @nd_region: region id and number of lanes possible
+ *
+ * A lane correlates to a BLK-data-window and/or a log slot in the BTT.
+ * We optimize for the common case where there are 256 lanes, one
+ * per-cpu.  For larger systems we need to lock to share lanes.  For now
+ * this implementation assumes the cost of maintaining an allocator for
+ * free lanes is on the order of the lock hold time, so it implements a
+ * static lane = cpu % num_lanes mapping.
+ *
+ * In the case of a BTT instance on top of a BLK namespace a lane may be
+ * acquired recursively.  We lock on the first instance.
+ *
+ * In the case of a BTT instance on top of PMEM, we only acquire a lane
+ * for the BTT metadata updates.
+ */
+unsigned int nd_region_acquire_lane(struct nd_region *nd_region)
+{
+	unsigned int cpu, lane;
+
+	cpu = get_cpu();
+	if (nd_region->num_lanes < nr_cpu_ids) {
+		struct nd_percpu_lane *ndl_lock, *ndl_count;
+
+		lane = cpu % nd_region->num_lanes;
+		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
+		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
+		if (ndl_count->count++ == 0)
+			spin_lock(&ndl_lock->lock);
+	} else
+		lane = cpu;
+
+	return lane;
+}
+EXPORT_SYMBOL(nd_region_acquire_lane);
+
+void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
+{
+	if (nd_region->num_lanes < nr_cpu_ids) {
+		unsigned int cpu = get_cpu();
+		struct nd_percpu_lane *ndl_lock, *ndl_count;
+
+		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
+		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
+		if (--ndl_count->count == 0)
+			spin_unlock(&ndl_lock->lock);
+		put_cpu();
+	}
+	put_cpu();
+}
+EXPORT_SYMBOL(nd_region_release_lane);
+
+static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc, struct device_type *dev_type,
+		const char *caller)
+{
+	struct nd_region *nd_region;
+	struct device *dev;
+	void *region_buf;
+	unsigned int i;
+	int ro = 0;
+
+	for (i = 0; i < ndr_desc->num_mappings; i++) {
+		struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+		if ((nd_mapping->start | nd_mapping->size) % SZ_4K) {
+			dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n",
+					caller, dev_name(&nvdimm->dev), i);
+
+			return NULL;
+		}
+
+		if (nvdimm->flags & NDD_UNARMED)
+			ro = 1;
+	}
+
+	if (dev_type == &nd_blk_device_type) {
+		struct nd_blk_region_desc *ndbr_desc;
+		struct nd_blk_region *ndbr;
+
+		ndbr_desc = to_blk_region_desc(ndr_desc);
+		ndbr = kzalloc(sizeof(*ndbr) + sizeof(struct nd_mapping)
+				* ndr_desc->num_mappings,
+				GFP_KERNEL);
+		if (ndbr) {
+			nd_region = &ndbr->nd_region;
+			ndbr->enable = ndbr_desc->enable;
+			ndbr->disable = ndbr_desc->disable;
+			ndbr->do_io = ndbr_desc->do_io;
+		}
+		region_buf = ndbr;
+	} else {
+		nd_region = kzalloc(sizeof(struct nd_region)
+				+ sizeof(struct nd_mapping)
+				* ndr_desc->num_mappings,
+				GFP_KERNEL);
+		region_buf = nd_region;
+	}
+
+	if (!region_buf)
+		return NULL;
+	nd_region->id = ida_simple_get(&region_ida, 0, 0, GFP_KERNEL);
+	if (nd_region->id < 0)
+		goto err_id;
+
+	nd_region->lane = alloc_percpu(struct nd_percpu_lane);
+	if (!nd_region->lane)
+		goto err_percpu;
+
+        for (i = 0; i < nr_cpu_ids; i++) {
+		struct nd_percpu_lane *ndl;
+
+		ndl = per_cpu_ptr(nd_region->lane, i);
+		spin_lock_init(&ndl->lock);
+		ndl->count = 0;
+	}
+
+	memcpy(nd_region->mapping, ndr_desc->nd_mapping,
+			sizeof(struct nd_mapping) * ndr_desc->num_mappings);
+	for (i = 0; i < ndr_desc->num_mappings; i++) {
+		struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+		get_device(&nvdimm->dev);
+	}
+	nd_region->ndr_mappings = ndr_desc->num_mappings;
+	nd_region->provider_data = ndr_desc->provider_data;
+	nd_region->nd_set = ndr_desc->nd_set;
+	nd_region->num_lanes = ndr_desc->num_lanes;
+	nd_region->ro = ro;
+	nd_region->numa_node = ndr_desc->numa_node;
+	ida_init(&nd_region->ns_ida);
+	ida_init(&nd_region->btt_ida);
+	dev = &nd_region->dev;
+	dev_set_name(dev, "region%d", nd_region->id);
+	dev->parent = &nvdimm_bus->dev;
+	dev->type = dev_type;
+	dev->groups = ndr_desc->attr_groups;
+	nd_region->ndr_size = resource_size(ndr_desc->res);
+	nd_region->ndr_start = ndr_desc->res->start;
+	nd_device_register(dev);
+
+	return nd_region;
+
+ err_percpu:
+	ida_simple_remove(&region_ida, nd_region->id);
+ err_id:
+	kfree(region_buf);
+	return NULL;
+}
+
+struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc)
+{
+	ndr_desc->num_lanes = ND_MAX_LANES;
+	return nd_region_create(nvdimm_bus, ndr_desc, &nd_pmem_device_type,
+			__func__);
+}
+EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create);
+
+struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc)
+{
+	if (ndr_desc->num_mappings > 1)
+		return NULL;
+	ndr_desc->num_lanes = min(ndr_desc->num_lanes, ND_MAX_LANES);
+	return nd_region_create(nvdimm_bus, ndr_desc, &nd_blk_device_type,
+			__func__);
+}
+EXPORT_SYMBOL_GPL(nvdimm_blk_region_create);
+
+struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc)
+{
+	ndr_desc->num_lanes = ND_MAX_LANES;
+	return nd_region_create(nvdimm_bus, ndr_desc, &nd_volatile_device_type,
+			__func__);
+}
+EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index f04c873a7365..b155d32db766 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -377,7 +377,7 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
 			struct page *page)
 {
 	const struct block_device_operations *ops = bdev->bd_disk->fops;
-	if (!ops->rw_page)
+	if (!ops->rw_page || bdev_get_integrity(bdev))
 		return -EOPNOTSUPP;
 	return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
 }
@@ -408,7 +408,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 	int result;
 	int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
 	const struct block_device_operations *ops = bdev->bd_disk->fops;
-	if (!ops->rw_page)
+	if (!ops->rw_page || bdev_get_integrity(bdev))
 		return -EOPNOTSUPP;
 	set_page_writeback(page);
 	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c187817471fb..1618cdfb38c7 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -261,8 +261,13 @@ extern void acpi_osi_setup(char *str);
 extern bool acpi_osi_is_win8(void);
 
 #ifdef CONFIG_ACPI_NUMA
+int acpi_map_pxm_to_online_node(int pxm);
 int acpi_get_node(acpi_handle handle);
 #else
+static inline int acpi_map_pxm_to_online_node(int pxm)
+{
+	return 0;
+}
 static inline int acpi_get_node(acpi_handle handle)
 {
 	return 0;
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 05be2352fef8..26fc8bc77f85 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -21,6 +21,7 @@
 # define __rcu		__attribute__((noderef, address_space(4)))
 #else
 # define __rcu
+# define __pmem		__attribute__((noderef, address_space(5)))
 #endif
 extern void __chk_user_ptr(const volatile void __user *);
 extern void __chk_io_ptr(const volatile void __iomem *);
@@ -42,6 +43,7 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 # define __cond_lock(x,c) (c)
 # define __percpu
 # define __rcu
+# define __pmem
 #endif
 
 /* Indirect macros required for expanded argument pasting, eg. __LINE__. */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5f19efe4eb3f..85ef051ac6fb 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -85,7 +85,8 @@ typedef	struct {
 #define EFI_MEMORY_MAPPED_IO		11
 #define EFI_MEMORY_MAPPED_IO_PORT_SPACE	12
 #define EFI_PAL_CODE			13
-#define EFI_MAX_MEMORY_TYPE		14
+#define EFI_PERSISTENT_MEMORY		14
+#define EFI_MAX_MEMORY_TYPE		15
 
 /* Attribute values: */
 #define EFI_MEMORY_UC		((u64)0x0000000000000001ULL)	/* uncached */
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
new file mode 100644
index 000000000000..75e3af01ee32
--- /dev/null
+++ b/include/linux/libnvdimm.h
@@ -0,0 +1,151 @@
+/*
+ * libnvdimm - Non-volatile-memory Devices Subsystem
+ *
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __LIBNVDIMM_H__
+#define __LIBNVDIMM_H__
+#include <linux/kernel.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+enum {
+	/* when a dimm supports both PMEM and BLK access a label is required */
+	NDD_ALIASING = 1 << 0,
+	/* unarmed memory devices may not persist writes */
+	NDD_UNARMED = 1 << 1,
+
+	/* need to set a limit somewhere, but yes, this is likely overkill */
+	ND_IOCTL_MAX_BUFLEN = SZ_4M,
+	ND_CMD_MAX_ELEM = 4,
+	ND_CMD_MAX_ENVELOPE = 16,
+	ND_CMD_ARS_STATUS_MAX = SZ_4K,
+	ND_MAX_MAPPINGS = 32,
+
+	/* mark newly adjusted resources as requiring a label update */
+	DPA_RESOURCE_ADJUSTED = 1 << 0,
+};
+
+extern struct attribute_group nvdimm_bus_attribute_group;
+extern struct attribute_group nvdimm_attribute_group;
+extern struct attribute_group nd_device_attribute_group;
+extern struct attribute_group nd_numa_attribute_group;
+extern struct attribute_group nd_region_attribute_group;
+extern struct attribute_group nd_mapping_attribute_group;
+
+struct nvdimm;
+struct nvdimm_bus_descriptor;
+typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
+		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+		unsigned int buf_len);
+
+struct nd_namespace_label;
+struct nvdimm_drvdata;
+struct nd_mapping {
+	struct nvdimm *nvdimm;
+	struct nd_namespace_label **labels;
+	u64 start;
+	u64 size;
+	/*
+	 * @ndd is for private use at region enable / disable time for
+	 * get_ndd() + put_ndd(), all other nd_mapping to ndd
+	 * conversions use to_ndd() which respects enabled state of the
+	 * nvdimm.
+	 */
+	struct nvdimm_drvdata *ndd;
+};
+
+struct nvdimm_bus_descriptor {
+	const struct attribute_group **attr_groups;
+	unsigned long dsm_mask;
+	char *provider_name;
+	ndctl_fn ndctl;
+};
+
+struct nd_cmd_desc {
+	int in_num;
+	int out_num;
+	u32 in_sizes[ND_CMD_MAX_ELEM];
+	int out_sizes[ND_CMD_MAX_ELEM];
+};
+
+struct nd_interleave_set {
+	u64 cookie;
+};
+
+struct nd_region_desc {
+	struct resource *res;
+	struct nd_mapping *nd_mapping;
+	u16 num_mappings;
+	const struct attribute_group **attr_groups;
+	struct nd_interleave_set *nd_set;
+	void *provider_data;
+	int num_lanes;
+	int numa_node;
+};
+
+struct nvdimm_bus;
+struct module;
+struct device;
+struct nd_blk_region;
+struct nd_blk_region_desc {
+	int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+	void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+	int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
+			void *iobuf, u64 len, int rw);
+	struct nd_region_desc ndr_desc;
+};
+
+static inline struct nd_blk_region_desc *to_blk_region_desc(
+		struct nd_region_desc *ndr_desc)
+{
+	return container_of(ndr_desc, struct nd_blk_region_desc, ndr_desc);
+
+}
+
+struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
+		struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
+#define nvdimm_bus_register(parent, desc) \
+	__nvdimm_bus_register(parent, desc, THIS_MODULE)
+void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
+struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
+struct nvdimm *to_nvdimm(struct device *dev);
+struct nd_region *to_nd_region(struct device *dev);
+struct nd_blk_region *to_nd_blk_region(struct device *dev);
+struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
+const char *nvdimm_name(struct nvdimm *nvdimm);
+void *nvdimm_provider_data(struct nvdimm *nvdimm);
+struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
+		const struct attribute_group **groups, unsigned long flags,
+		unsigned long *dsm_mask);
+const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
+const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
+u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, void *buf);
+u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
+		const u32 *out_field);
+int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count);
+struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc);
+struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc);
+struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
+		struct nd_region_desc *ndr_desc);
+void *nd_region_provider_data(struct nd_region *nd_region);
+void *nd_blk_region_provider_data(struct nd_blk_region *ndbr);
+void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data);
+struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr);
+unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
+void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
+u64 nd_fletcher64(void *addr, size_t len, bool le);
+#endif /* __LIBNVDIMM_H__ */
diff --git a/include/linux/nd.h b/include/linux/nd.h
new file mode 100644
index 000000000000..507e47c86737
--- /dev/null
+++ b/include/linux/nd.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __LINUX_ND_H__
+#define __LINUX_ND_H__
+#include <linux/fs.h>
+#include <linux/ndctl.h>
+#include <linux/device.h>
+
+struct nd_device_driver {
+	struct device_driver drv;
+	unsigned long type;
+	int (*probe)(struct device *dev);
+	int (*remove)(struct device *dev);
+};
+
+static inline struct nd_device_driver *to_nd_device_driver(
+		struct device_driver *drv)
+{
+	return container_of(drv, struct nd_device_driver, drv);
+};
+
+/**
+ * struct nd_namespace_common - core infrastructure of a namespace
+ * @force_raw: ignore other personalities for the namespace (e.g. btt)
+ * @dev: device model node
+ * @claim: when set a another personality has taken ownership of the namespace
+ * @rw_bytes: access the raw namespace capacity with byte-aligned transfers
+ */
+struct nd_namespace_common {
+	int force_raw;
+	struct device dev;
+	struct device *claim;
+	int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset,
+			void *buf, size_t size, int rw);
+};
+
+static inline struct nd_namespace_common *to_ndns(struct device *dev)
+{
+	return container_of(dev, struct nd_namespace_common, dev);
+}
+
+/**
+ * struct nd_namespace_io - infrastructure for loading an nd_pmem instance
+ * @dev: namespace device created by the nd region driver
+ * @res: struct resource conversion of a NFIT SPA table
+ */
+struct nd_namespace_io {
+	struct nd_namespace_common common;
+	struct resource res;
+};
+
+/**
+ * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory
+ * @nsio: device and system physical address range to drive
+ * @alt_name: namespace name supplied in the dimm label
+ * @uuid: namespace name supplied in the dimm label
+ */
+struct nd_namespace_pmem {
+	struct nd_namespace_io nsio;
+	char *alt_name;
+	u8 *uuid;
+};
+
+/**
+ * struct nd_namespace_blk - namespace for dimm-bounded persistent memory
+ * @alt_name: namespace name supplied in the dimm label
+ * @uuid: namespace name supplied in the dimm label
+ * @id: ida allocated id
+ * @lbasize: blk namespaces have a native sector size when btt not present
+ * @num_resources: number of dpa extents to claim
+ * @res: discontiguous dpa extents for given dimm
+ */
+struct nd_namespace_blk {
+	struct nd_namespace_common common;
+	char *alt_name;
+	u8 *uuid;
+	int id;
+	unsigned long lbasize;
+	int num_resources;
+	struct resource **res;
+};
+
+static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
+{
+	return container_of(dev, struct nd_namespace_io, common.dev);
+}
+
+static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
+{
+	struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+	return container_of(nsio, struct nd_namespace_pmem, nsio);
+}
+
+static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev)
+{
+	return container_of(dev, struct nd_namespace_blk, common.dev);
+}
+
+/**
+ * nvdimm_read_bytes() - synchronously read bytes from an nvdimm namespace
+ * @ndns: device to read
+ * @offset: namespace-relative starting offset
+ * @buf: buffer to fill
+ * @size: transfer length
+ *
+ * @buf is up-to-date upon return from this routine.
+ */
+static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns,
+		resource_size_t offset, void *buf, size_t size)
+{
+	return ndns->rw_bytes(ndns, offset, buf, size, READ);
+}
+
+/**
+ * nvdimm_write_bytes() - synchronously write bytes to an nvdimm namespace
+ * @ndns: device to read
+ * @offset: namespace-relative starting offset
+ * @buf: buffer to drain
+ * @size: transfer length
+ *
+ * NVDIMM Namepaces disks do not implement sectors internally.  Depending on
+ * the @ndns, the contents of @buf may be in cpu cache, platform buffers,
+ * or on backing memory media upon return from this routine.  Flushing
+ * to media is handled internal to the @ndns driver, if at all.
+ */
+static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns,
+		resource_size_t offset, void *buf, size_t size)
+{
+	return ndns->rw_bytes(ndns, offset, buf, size, WRITE);
+}
+
+#define MODULE_ALIAS_ND_DEVICE(type) \
+	MODULE_ALIAS("nd:t" __stringify(type) "*")
+#define ND_DEVICE_MODALIAS_FMT "nd:t%d"
+
+int __must_check __nd_driver_register(struct nd_device_driver *nd_drv,
+		struct module *module, const char *mod_name);
+#define nd_driver_register(driver) \
+	__nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+#endif /* __LINUX_ND_H__ */
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
new file mode 100644
index 000000000000..d2114045a6c4
--- /dev/null
+++ b/include/linux/pmem.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __PMEM_H__
+#define __PMEM_H__
+
+#include <linux/io.h>
+
+#ifdef CONFIG_ARCH_HAS_PMEM_API
+#include <asm/cacheflush.h>
+#else
+static inline void arch_wmb_pmem(void)
+{
+	BUG();
+}
+
+static inline bool __arch_has_wmb_pmem(void)
+{
+	return false;
+}
+
+static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
+		unsigned long size)
+{
+	return NULL;
+}
+
+static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
+		size_t n)
+{
+	BUG();
+}
+#endif
+
+/*
+ * Architectures that define ARCH_HAS_PMEM_API must provide
+ * implementations for arch_memremap_pmem(), arch_memcpy_to_pmem(),
+ * arch_wmb_pmem(), and __arch_has_wmb_pmem().
+ */
+
+static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
+{
+	memcpy(dst, (void __force const *) src, size);
+}
+
+static inline void memunmap_pmem(void __pmem *addr)
+{
+	iounmap((void __force __iomem *) addr);
+}
+
+/**
+ * arch_has_wmb_pmem - true if wmb_pmem() ensures durability
+ *
+ * For a given cpu implementation within an architecture it is possible
+ * that wmb_pmem() resolves to a nop.  In the case this returns
+ * false, pmem api users are unable to ensure durability and may want to
+ * fall back to a different data consistency model, or otherwise notify
+ * the user.
+ */
+static inline bool arch_has_wmb_pmem(void)
+{
+	if (IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
+		return __arch_has_wmb_pmem();
+	return false;
+}
+
+static inline bool arch_has_pmem_api(void)
+{
+	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem();
+}
+
+/*
+ * These defaults seek to offer decent performance and minimize the
+ * window between i/o completion and writes being durable on media.
+ * However, it is undefined / architecture specific whether
+ * default_memremap_pmem + default_memcpy_to_pmem is sufficient for
+ * making data durable relative to i/o completion.
+ */
+static void default_memcpy_to_pmem(void __pmem *dst, const void *src,
+		size_t size)
+{
+	memcpy((void __force *) dst, src, size);
+}
+
+static void __pmem *default_memremap_pmem(resource_size_t offset,
+		unsigned long size)
+{
+	return (void __pmem __force *)ioremap_wt(offset, size);
+}
+
+/**
+ * memremap_pmem - map physical persistent memory for pmem api
+ * @offset: physical address of persistent memory
+ * @size: size of the mapping
+ *
+ * Establish a mapping of the architecture specific memory type expected
+ * by memcpy_to_pmem() and wmb_pmem().  For example, it may be
+ * the case that an uncacheable or writethrough mapping is sufficient,
+ * or a writeback mapping provided memcpy_to_pmem() and
+ * wmb_pmem() arrange for the data to be written through the
+ * cache to persistent media.
+ */
+static inline void __pmem *memremap_pmem(resource_size_t offset,
+		unsigned long size)
+{
+	if (arch_has_pmem_api())
+		return arch_memremap_pmem(offset, size);
+	return default_memremap_pmem(offset, size);
+}
+
+/**
+ * memcpy_to_pmem - copy data to persistent memory
+ * @dst: destination buffer for the copy
+ * @src: source buffer for the copy
+ * @n: length of the copy in bytes
+ *
+ * Perform a memory copy that results in the destination of the copy
+ * being effectively evicted from, or never written to, the processor
+ * cache hierarchy after the copy completes.  After memcpy_to_pmem()
+ * data may still reside in cpu or platform buffers, so this operation
+ * must be followed by a wmb_pmem().
+ */
+static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
+{
+	if (arch_has_pmem_api())
+		arch_memcpy_to_pmem(dst, src, n);
+	else
+		default_memcpy_to_pmem(dst, src, n);
+}
+
+/**
+ * wmb_pmem - synchronize writes to persistent memory
+ *
+ * After a series of memcpy_to_pmem() operations this drains data from
+ * cpu write buffers and any platform (memory controller) buffers to
+ * ensure that written data is durable on persistent memory media.
+ */
+static inline void wmb_pmem(void)
+{
+	if (arch_has_pmem_api())
+		arch_wmb_pmem();
+}
+#endif /* __PMEM_H__ */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index c1c23f19d4a2..1ff9942718fe 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -272,6 +272,7 @@ header-y += ncp_fs.h
 header-y += ncp.h
 header-y += ncp_mount.h
 header-y += ncp_no.h
+header-y += ndctl.h
 header-y += neighbour.h
 header-y += netconf.h
 header-y += netdevice.h
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
new file mode 100644
index 000000000000..2b94ea2287bb
--- /dev/null
+++ b/include/uapi/linux/ndctl.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ */
+#ifndef __NDCTL_H__
+#define __NDCTL_H__
+
+#include <linux/types.h>
+
+struct nd_cmd_smart {
+	__u32 status;
+	__u8 data[128];
+} __packed;
+
+struct nd_cmd_smart_threshold {
+	__u32 status;
+	__u8 data[8];
+} __packed;
+
+struct nd_cmd_dimm_flags {
+	__u32 status;
+	__u32 flags;
+} __packed;
+
+struct nd_cmd_get_config_size {
+	__u32 status;
+	__u32 config_size;
+	__u32 max_xfer;
+} __packed;
+
+struct nd_cmd_get_config_data_hdr {
+	__u32 in_offset;
+	__u32 in_length;
+	__u32 status;
+	__u8 out_buf[0];
+} __packed;
+
+struct nd_cmd_set_config_hdr {
+	__u32 in_offset;
+	__u32 in_length;
+	__u8 in_buf[0];
+} __packed;
+
+struct nd_cmd_vendor_hdr {
+	__u32 opcode;
+	__u32 in_length;
+	__u8 in_buf[0];
+} __packed;
+
+struct nd_cmd_vendor_tail {
+	__u32 status;
+	__u32 out_length;
+	__u8 out_buf[0];
+} __packed;
+
+struct nd_cmd_ars_cap {
+	__u64 address;
+	__u64 length;
+	__u32 status;
+	__u32 max_ars_out;
+} __packed;
+
+struct nd_cmd_ars_start {
+	__u64 address;
+	__u64 length;
+	__u16 type;
+	__u8 reserved[6];
+	__u32 status;
+} __packed;
+
+struct nd_cmd_ars_status {
+	__u32 status;
+	__u32 out_length;
+	__u64 address;
+	__u64 length;
+	__u16 type;
+	__u32 num_records;
+	struct nd_ars_record {
+		__u32 handle;
+		__u32 flags;
+		__u64 err_address;
+		__u64 mask;
+	} __packed records[0];
+} __packed;
+
+enum {
+	ND_CMD_IMPLEMENTED = 0,
+
+	/* bus commands */
+	ND_CMD_ARS_CAP = 1,
+	ND_CMD_ARS_START = 2,
+	ND_CMD_ARS_STATUS = 3,
+
+	/* per-dimm commands */
+	ND_CMD_SMART = 1,
+	ND_CMD_SMART_THRESHOLD = 2,
+	ND_CMD_DIMM_FLAGS = 3,
+	ND_CMD_GET_CONFIG_SIZE = 4,
+	ND_CMD_GET_CONFIG_DATA = 5,
+	ND_CMD_SET_CONFIG_DATA = 6,
+	ND_CMD_VENDOR_EFFECT_LOG_SIZE = 7,
+	ND_CMD_VENDOR_EFFECT_LOG = 8,
+	ND_CMD_VENDOR = 9,
+};
+
+static inline const char *nvdimm_bus_cmd_name(unsigned cmd)
+{
+	static const char * const names[] = {
+		[ND_CMD_ARS_CAP] = "ars_cap",
+		[ND_CMD_ARS_START] = "ars_start",
+		[ND_CMD_ARS_STATUS] = "ars_status",
+	};
+
+	if (cmd < ARRAY_SIZE(names) && names[cmd])
+		return names[cmd];
+	return "unknown";
+}
+
+static inline const char *nvdimm_cmd_name(unsigned cmd)
+{
+	static const char * const names[] = {
+		[ND_CMD_SMART] = "smart",
+		[ND_CMD_SMART_THRESHOLD] = "smart_thresh",
+		[ND_CMD_DIMM_FLAGS] = "flags",
+		[ND_CMD_GET_CONFIG_SIZE] = "get_size",
+		[ND_CMD_GET_CONFIG_DATA] = "get_data",
+		[ND_CMD_SET_CONFIG_DATA] = "set_data",
+		[ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size",
+		[ND_CMD_VENDOR_EFFECT_LOG] = "effect_log",
+		[ND_CMD_VENDOR] = "vendor",
+	};
+
+	if (cmd < ARRAY_SIZE(names) && names[cmd])
+		return names[cmd];
+	return "unknown";
+}
+
+#define ND_IOCTL 'N'
+
+#define ND_IOCTL_SMART			_IOWR(ND_IOCTL, ND_CMD_SMART,\
+					struct nd_cmd_smart)
+
+#define ND_IOCTL_SMART_THRESHOLD	_IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\
+					struct nd_cmd_smart_threshold)
+
+#define ND_IOCTL_DIMM_FLAGS		_IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\
+					struct nd_cmd_dimm_flags)
+
+#define ND_IOCTL_GET_CONFIG_SIZE	_IOWR(ND_IOCTL, ND_CMD_GET_CONFIG_SIZE,\
+					struct nd_cmd_get_config_size)
+
+#define ND_IOCTL_GET_CONFIG_DATA	_IOWR(ND_IOCTL, ND_CMD_GET_CONFIG_DATA,\
+					struct nd_cmd_get_config_data_hdr)
+
+#define ND_IOCTL_SET_CONFIG_DATA	_IOWR(ND_IOCTL, ND_CMD_SET_CONFIG_DATA,\
+					struct nd_cmd_set_config_hdr)
+
+#define ND_IOCTL_VENDOR			_IOWR(ND_IOCTL, ND_CMD_VENDOR,\
+					struct nd_cmd_vendor_hdr)
+
+#define ND_IOCTL_ARS_CAP		_IOWR(ND_IOCTL, ND_CMD_ARS_CAP,\
+					struct nd_cmd_ars_cap)
+
+#define ND_IOCTL_ARS_START		_IOWR(ND_IOCTL, ND_CMD_ARS_START,\
+					struct nd_cmd_ars_start)
+
+#define ND_IOCTL_ARS_STATUS		_IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\
+					struct nd_cmd_ars_status)
+
+#define ND_DEVICE_DIMM 1            /* nd_dimm: container for "config data" */
+#define ND_DEVICE_REGION_PMEM 2     /* nd_region: (parent of PMEM namespaces) */
+#define ND_DEVICE_REGION_BLK 3      /* nd_region: (parent of BLK namespaces) */
+#define ND_DEVICE_NAMESPACE_IO 4    /* legacy persistent memory */
+#define ND_DEVICE_NAMESPACE_PMEM 5  /* PMEM namespace (may alias with BLK) */
+#define ND_DEVICE_NAMESPACE_BLK 6   /* BLK namespace (may alias with PMEM) */
+
+enum nd_driver_flags {
+	ND_DRIVER_DIMM            = 1 << ND_DEVICE_DIMM,
+	ND_DRIVER_REGION_PMEM     = 1 << ND_DEVICE_REGION_PMEM,
+	ND_DRIVER_REGION_BLK      = 1 << ND_DEVICE_REGION_BLK,
+	ND_DRIVER_NAMESPACE_IO    = 1 << ND_DEVICE_NAMESPACE_IO,
+	ND_DRIVER_NAMESPACE_PMEM  = 1 << ND_DEVICE_NAMESPACE_PMEM,
+	ND_DRIVER_NAMESPACE_BLK   = 1 << ND_DEVICE_NAMESPACE_BLK,
+};
+
+enum {
+	ND_MIN_NAMESPACE_SIZE = 0x00400000,
+};
+#endif /* __NDCTL_H__ */
diff --git a/lib/Kconfig b/lib/Kconfig
index 34e332b8d326..3a2ef67db6c7 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -528,4 +528,7 @@ source "lib/fonts/Kconfig"
 config ARCH_HAS_SG_CHAIN
 	def_bool n
 
+config ARCH_HAS_PMEM_API
+	bool
+
 endmenu
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
new file mode 100644
index 000000000000..8e9b64520ec1
--- /dev/null
+++ b/tools/testing/nvdimm/Kbuild
@@ -0,0 +1,40 @@
+ldflags-y += --wrap=ioremap_cache
+ldflags-y += --wrap=ioremap_nocache
+ldflags-y += --wrap=iounmap
+ldflags-y += --wrap=__request_region
+ldflags-y += --wrap=__release_region
+
+DRIVERS := ../../../drivers
+NVDIMM_SRC := $(DRIVERS)/nvdimm
+ACPI_SRC := $(DRIVERS)/acpi
+
+obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
+obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
+obj-$(CONFIG_ND_BTT) += nd_btt.o
+obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_ACPI_NFIT) += nfit.o
+
+nfit-y := $(ACPI_SRC)/nfit.o
+nfit-y += config_check.o
+
+nd_pmem-y := $(NVDIMM_SRC)/pmem.o
+nd_pmem-y += config_check.o
+
+nd_btt-y := $(NVDIMM_SRC)/btt.o
+nd_btt-y += config_check.o
+
+nd_blk-y := $(NVDIMM_SRC)/blk.o
+nd_blk-y += config_check.o
+
+libnvdimm-y := $(NVDIMM_SRC)/core.o
+libnvdimm-y += $(NVDIMM_SRC)/bus.o
+libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/dimm.o
+libnvdimm-y += $(NVDIMM_SRC)/region_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/region.o
+libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/label.o
+libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
+libnvdimm-y += config_check.o
+
+obj-m += test/
diff --git a/tools/testing/nvdimm/Makefile b/tools/testing/nvdimm/Makefile
new file mode 100644
index 000000000000..3dfe024b4e7e
--- /dev/null
+++ b/tools/testing/nvdimm/Makefile
@@ -0,0 +1,7 @@
+KDIR ?= ../../../
+
+default:
+	$(MAKE) -C $(KDIR) M=$$PWD
+
+install: default
+	$(MAKE) -C $(KDIR) M=$$PWD modules_install
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c
new file mode 100644
index 000000000000..f2c7615554eb
--- /dev/null
+++ b/tools/testing/nvdimm/config_check.c
@@ -0,0 +1,15 @@
+#include <linux/kconfig.h>
+#include <linux/bug.h>
+
+void check(void)
+{
+	/*
+	 * These kconfig symbols must be set to "m" for nfit_test to
+	 * load and operate.
+	 */
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
+}
diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild
new file mode 100644
index 000000000000..9241064970fe
--- /dev/null
+++ b/tools/testing/nvdimm/test/Kbuild
@@ -0,0 +1,8 @@
+ccflags-y := -I$(src)/../../../../drivers/nvdimm/
+ccflags-y += -I$(src)/../../../../drivers/acpi/
+
+obj-m += nfit_test.o
+obj-m += nfit_test_iomap.o
+
+nfit_test-y := nfit.o
+nfit_test_iomap-y := iomap.o
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
new file mode 100644
index 000000000000..c85a6f6ba559
--- /dev/null
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/rculist.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include "nfit_test.h"
+
+static LIST_HEAD(iomap_head);
+
+static struct iomap_ops {
+	nfit_test_lookup_fn nfit_test_lookup;
+	struct list_head list;
+} iomap_ops = {
+	.list = LIST_HEAD_INIT(iomap_ops.list),
+};
+
+void nfit_test_setup(nfit_test_lookup_fn lookup)
+{
+	iomap_ops.nfit_test_lookup = lookup;
+	list_add_rcu(&iomap_ops.list, &iomap_head);
+}
+EXPORT_SYMBOL(nfit_test_setup);
+
+void nfit_test_teardown(void)
+{
+	list_del_rcu(&iomap_ops.list);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL(nfit_test_teardown);
+
+static struct nfit_test_resource *get_nfit_res(resource_size_t resource)
+{
+	struct iomap_ops *ops;
+
+	ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
+	if (ops)
+		return ops->nfit_test_lookup(resource);
+	return NULL;
+}
+
+void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
+		void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
+{
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res(offset);
+	rcu_read_unlock();
+	if (nfit_res)
+		return (void __iomem *) nfit_res->buf + offset
+			- nfit_res->res->start;
+	return fallback_fn(offset, size);
+}
+
+void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size)
+{
+	return __nfit_test_ioremap(offset, size, ioremap_cache);
+}
+EXPORT_SYMBOL(__wrap_ioremap_cache);
+
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
+{
+	return __nfit_test_ioremap(offset, size, ioremap_nocache);
+}
+EXPORT_SYMBOL(__wrap_ioremap_nocache);
+
+void __wrap_iounmap(volatile void __iomem *addr)
+{
+	struct nfit_test_resource *nfit_res;
+
+	rcu_read_lock();
+	nfit_res = get_nfit_res((unsigned long) addr);
+	rcu_read_unlock();
+	if (nfit_res)
+		return;
+	return iounmap(addr);
+}
+EXPORT_SYMBOL(__wrap_iounmap);
+
+struct resource *__wrap___request_region(struct resource *parent,
+		resource_size_t start, resource_size_t n, const char *name,
+		int flags)
+{
+	struct nfit_test_resource *nfit_res;
+
+	if (parent == &iomem_resource) {
+		rcu_read_lock();
+		nfit_res = get_nfit_res(start);
+		rcu_read_unlock();
+		if (nfit_res) {
+			struct resource *res = nfit_res->res + 1;
+
+			if (start + n > nfit_res->res->start
+					+ resource_size(nfit_res->res)) {
+				pr_debug("%s: start: %llx n: %llx overflow: %pr\n",
+						__func__, start, n,
+						nfit_res->res);
+				return NULL;
+			}
+
+			res->start = start;
+			res->end = start + n - 1;
+			res->name = name;
+			res->flags = resource_type(parent);
+			res->flags |= IORESOURCE_BUSY | flags;
+			pr_debug("%s: %pr\n", __func__, res);
+			return res;
+		}
+	}
+	return __request_region(parent, start, n, name, flags);
+}
+EXPORT_SYMBOL(__wrap___request_region);
+
+void __wrap___release_region(struct resource *parent, resource_size_t start,
+				resource_size_t n)
+{
+	struct nfit_test_resource *nfit_res;
+
+	if (parent == &iomem_resource) {
+		rcu_read_lock();
+		nfit_res = get_nfit_res(start);
+		rcu_read_unlock();
+		if (nfit_res) {
+			struct resource *res = nfit_res->res + 1;
+
+			if (start != res->start || resource_size(res) != n)
+				pr_info("%s: start: %llx n: %llx mismatch: %pr\n",
+						__func__, start, n, res);
+			else
+				memset(res, 0, sizeof(*res));
+			return;
+		}
+	}
+	__release_region(parent, start, n);
+}
+EXPORT_SYMBOL(__wrap___release_region);
+
+MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
new file mode 100644
index 000000000000..4b69b8368de0
--- /dev/null
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -0,0 +1,1116 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/libnvdimm.h>
+#include <linux/vmalloc.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/ndctl.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <nfit.h>
+#include <nd.h>
+#include "nfit_test.h"
+
+/*
+ * Generate an NFIT table to describe the following topology:
+ *
+ * BUS0: Interleaved PMEM regions, and aliasing with BLK regions
+ *
+ *                     (a)                       (b)            DIMM   BLK-REGION
+ *           +----------+--------------+----------+---------+
+ * +------+  |  blk2.0  |     pm0.0    |  blk2.1  |  pm1.0  |    0      region2
+ * | imc0 +--+- - - - - region0 - - - -+----------+         +
+ * +--+---+  |  blk3.0  |     pm0.0    |  blk3.1  |  pm1.0  |    1      region3
+ *    |      +----------+--------------v----------v         v
+ * +--+---+                            |                    |
+ * | cpu0 |                                    region1
+ * +--+---+                            |                    |
+ *    |      +-------------------------^----------^         ^
+ * +--+---+  |                 blk4.0             |  pm1.0  |    2      region4
+ * | imc1 +--+-------------------------+----------+         +
+ * +------+  |                 blk5.0             |  pm1.0  |    3      region5
+ *           +-------------------------+----------+-+-------+
+ *
+ * *) In this layout we have four dimms and two memory controllers in one
+ *    socket.  Each unique interface (BLK or PMEM) to DPA space
+ *    is identified by a region device with a dynamically assigned id.
+ *
+ * *) The first portion of dimm0 and dimm1 are interleaved as REGION0.
+ *    A single PMEM namespace "pm0.0" is created using half of the
+ *    REGION0 SPA-range.  REGION0 spans dimm0 and dimm1.  PMEM namespace
+ *    allocate from from the bottom of a region.  The unallocated
+ *    portion of REGION0 aliases with REGION2 and REGION3.  That
+ *    unallacted capacity is reclaimed as BLK namespaces ("blk2.0" and
+ *    "blk3.0") starting at the base of each DIMM to offset (a) in those
+ *    DIMMs.  "pm0.0", "blk2.0" and "blk3.0" are free-form readable
+ *    names that can be assigned to a namespace.
+ *
+ * *) In the last portion of dimm0 and dimm1 we have an interleaved
+ *    SPA range, REGION1, that spans those two dimms as well as dimm2
+ *    and dimm3.  Some of REGION1 allocated to a PMEM namespace named
+ *    "pm1.0" the rest is reclaimed in 4 BLK namespaces (for each
+ *    dimm in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
+ *    "blk5.0".
+ *
+ * *) The portion of dimm2 and dimm3 that do not participate in the
+ *    REGION1 interleaved SPA range (i.e. the DPA address below offset
+ *    (b) are also included in the "blk4.0" and "blk5.0" namespaces.
+ *    Note, that BLK namespaces need not be contiguous in DPA-space, and
+ *    can consume aliased capacity from multiple interleave sets.
+ *
+ * BUS1: Legacy NVDIMM (single contiguous range)
+ *
+ *  region2
+ * +---------------------+
+ * |---------------------|
+ * ||       pm2.0       ||
+ * |---------------------|
+ * +---------------------+
+ *
+ * *) A NFIT-table may describe a simple system-physical-address range
+ *    with no BLK aliasing.  This type of region may optionally
+ *    reference an NVDIMM.
+ */
+enum {
+	NUM_PM  = 2,
+	NUM_DCR = 4,
+	NUM_BDW = NUM_DCR,
+	NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
+	NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
+	DIMM_SIZE = SZ_32M,
+	LABEL_SIZE = SZ_128K,
+	SPA0_SIZE = DIMM_SIZE,
+	SPA1_SIZE = DIMM_SIZE*2,
+	SPA2_SIZE = DIMM_SIZE,
+	BDW_SIZE = 64 << 8,
+	DCR_SIZE = 12,
+	NUM_NFITS = 2, /* permit testing multiple NFITs per system */
+};
+
+struct nfit_test_dcr {
+	__le64 bdw_addr;
+	__le32 bdw_status;
+	__u8 aperature[BDW_SIZE];
+};
+
+#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \
+	(((node & 0xfff) << 16) | ((socket & 0xf) << 12) \
+	 | ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf))
+
+static u32 handle[NUM_DCR] = {
+	[0] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0),
+	[1] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1),
+	[2] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0),
+	[3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1),
+};
+
+struct nfit_test {
+	struct acpi_nfit_desc acpi_desc;
+	struct platform_device pdev;
+	struct list_head resources;
+	void *nfit_buf;
+	dma_addr_t nfit_dma;
+	size_t nfit_size;
+	int num_dcr;
+	int num_pm;
+	void **dimm;
+	dma_addr_t *dimm_dma;
+	void **label;
+	dma_addr_t *label_dma;
+	void **spa_set;
+	dma_addr_t *spa_set_dma;
+	struct nfit_test_dcr **dcr;
+	dma_addr_t *dcr_dma;
+	int (*alloc)(struct nfit_test *t);
+	void (*setup)(struct nfit_test *t);
+};
+
+static struct nfit_test *to_nfit_test(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	return container_of(pdev, struct nfit_test, pdev);
+}
+
+static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
+		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+		unsigned int buf_len)
+{
+	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+	struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
+	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+	int i, rc;
+
+	if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask))
+		return -ENXIO;
+
+	/* lookup label space for the given dimm */
+	for (i = 0; i < ARRAY_SIZE(handle); i++)
+		if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
+			break;
+	if (i >= ARRAY_SIZE(handle))
+		return -ENXIO;
+
+	switch (cmd) {
+	case ND_CMD_GET_CONFIG_SIZE: {
+		struct nd_cmd_get_config_size *nd_cmd = buf;
+
+		if (buf_len < sizeof(*nd_cmd))
+			return -EINVAL;
+		nd_cmd->status = 0;
+		nd_cmd->config_size = LABEL_SIZE;
+		nd_cmd->max_xfer = SZ_4K;
+		rc = 0;
+		break;
+	}
+	case ND_CMD_GET_CONFIG_DATA: {
+		struct nd_cmd_get_config_data_hdr *nd_cmd = buf;
+		unsigned int len, offset = nd_cmd->in_offset;
+
+		if (buf_len < sizeof(*nd_cmd))
+			return -EINVAL;
+		if (offset >= LABEL_SIZE)
+			return -EINVAL;
+		if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len)
+			return -EINVAL;
+
+		nd_cmd->status = 0;
+		len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+		memcpy(nd_cmd->out_buf, t->label[i] + offset, len);
+		rc = buf_len - sizeof(*nd_cmd) - len;
+		break;
+	}
+	case ND_CMD_SET_CONFIG_DATA: {
+		struct nd_cmd_set_config_hdr *nd_cmd = buf;
+		unsigned int len, offset = nd_cmd->in_offset;
+		u32 *status;
+
+		if (buf_len < sizeof(*nd_cmd))
+			return -EINVAL;
+		if (offset >= LABEL_SIZE)
+			return -EINVAL;
+		if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len)
+			return -EINVAL;
+
+		status = buf + nd_cmd->in_length + sizeof(*nd_cmd);
+		*status = 0;
+		len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+		memcpy(t->label[i] + offset, nd_cmd->in_buf, len);
+		rc = buf_len - sizeof(*nd_cmd) - (len + 4);
+		break;
+	}
+	default:
+		return -ENOTTY;
+	}
+
+	return rc;
+}
+
+static DEFINE_SPINLOCK(nfit_test_lock);
+static struct nfit_test *instances[NUM_NFITS];
+
+static void release_nfit_res(void *data)
+{
+	struct nfit_test_resource *nfit_res = data;
+	struct resource *res = nfit_res->res;
+
+	spin_lock(&nfit_test_lock);
+	list_del(&nfit_res->list);
+	spin_unlock(&nfit_test_lock);
+
+	if (is_vmalloc_addr(nfit_res->buf))
+		vfree(nfit_res->buf);
+	else
+		dma_free_coherent(nfit_res->dev, resource_size(res),
+				nfit_res->buf, res->start);
+	kfree(res);
+	kfree(nfit_res);
+}
+
+static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
+		void *buf)
+{
+	struct device *dev = &t->pdev.dev;
+	struct resource *res = kzalloc(sizeof(*res) * 2, GFP_KERNEL);
+	struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res),
+			GFP_KERNEL);
+	int rc;
+
+	if (!res || !buf || !nfit_res)
+		goto err;
+	rc = devm_add_action(dev, release_nfit_res, nfit_res);
+	if (rc)
+		goto err;
+	INIT_LIST_HEAD(&nfit_res->list);
+	memset(buf, 0, size);
+	nfit_res->dev = dev;
+	nfit_res->buf = buf;
+	nfit_res->res = res;
+	res->start = *dma;
+	res->end = *dma + size - 1;
+	res->name = "NFIT";
+	spin_lock(&nfit_test_lock);
+	list_add(&nfit_res->list, &t->resources);
+	spin_unlock(&nfit_test_lock);
+
+	return nfit_res->buf;
+ err:
+	if (buf && !is_vmalloc_addr(buf))
+		dma_free_coherent(dev, size, buf, *dma);
+	else if (buf)
+		vfree(buf);
+	kfree(res);
+	kfree(nfit_res);
+	return NULL;
+}
+
+static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
+{
+	void *buf = vmalloc(size);
+
+	*dma = (unsigned long) buf;
+	return __test_alloc(t, size, dma, buf);
+}
+
+static void *test_alloc_coherent(struct nfit_test *t, size_t size,
+		dma_addr_t *dma)
+{
+	struct device *dev = &t->pdev.dev;
+	void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
+
+	return __test_alloc(t, size, dma, buf);
+}
+
+static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(instances); i++) {
+		struct nfit_test_resource *n, *nfit_res = NULL;
+		struct nfit_test *t = instances[i];
+
+		if (!t)
+			continue;
+		spin_lock(&nfit_test_lock);
+		list_for_each_entry(n, &t->resources, list) {
+			if (addr >= n->res->start && (addr < n->res->start
+						+ resource_size(n->res))) {
+				nfit_res = n;
+				break;
+			} else if (addr >= (unsigned long) n->buf
+					&& (addr < (unsigned long) n->buf
+						+ resource_size(n->res))) {
+				nfit_res = n;
+				break;
+			}
+		}
+		spin_unlock(&nfit_test_lock);
+		if (nfit_res)
+			return nfit_res;
+	}
+
+	return NULL;
+}
+
+static int nfit_test0_alloc(struct nfit_test *t)
+{
+	size_t nfit_size = sizeof(struct acpi_table_nfit)
+			+ sizeof(struct acpi_nfit_system_address) * NUM_SPA
+			+ sizeof(struct acpi_nfit_memory_map) * NUM_MEM
+			+ sizeof(struct acpi_nfit_control_region) * NUM_DCR
+			+ sizeof(struct acpi_nfit_data_region) * NUM_BDW;
+	int i;
+
+	t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
+	if (!t->nfit_buf)
+		return -ENOMEM;
+	t->nfit_size = nfit_size;
+
+	t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]);
+	if (!t->spa_set[0])
+		return -ENOMEM;
+
+	t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]);
+	if (!t->spa_set[1])
+		return -ENOMEM;
+
+	for (i = 0; i < NUM_DCR; i++) {
+		t->dimm[i] = test_alloc(t, DIMM_SIZE, &t->dimm_dma[i]);
+		if (!t->dimm[i])
+			return -ENOMEM;
+
+		t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]);
+		if (!t->label[i])
+			return -ENOMEM;
+		sprintf(t->label[i], "label%d", i);
+	}
+
+	for (i = 0; i < NUM_DCR; i++) {
+		t->dcr[i] = test_alloc(t, LABEL_SIZE, &t->dcr_dma[i]);
+		if (!t->dcr[i])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int nfit_test1_alloc(struct nfit_test *t)
+{
+	size_t nfit_size = sizeof(struct acpi_table_nfit)
+		+ sizeof(struct acpi_nfit_system_address)
+		+ sizeof(struct acpi_nfit_memory_map)
+		+ sizeof(struct acpi_nfit_control_region);
+
+	t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
+	if (!t->nfit_buf)
+		return -ENOMEM;
+	t->nfit_size = nfit_size;
+
+	t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]);
+	if (!t->spa_set[0])
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void nfit_test_init_header(struct acpi_table_nfit *nfit, size_t size)
+{
+	memcpy(nfit->header.signature, ACPI_SIG_NFIT, 4);
+	nfit->header.length = size;
+	nfit->header.revision = 1;
+	memcpy(nfit->header.oem_id, "LIBND", 6);
+	memcpy(nfit->header.oem_table_id, "TEST", 5);
+	nfit->header.oem_revision = 1;
+	memcpy(nfit->header.asl_compiler_id, "TST", 4);
+	nfit->header.asl_compiler_revision = 1;
+}
+
+static void nfit_test0_setup(struct nfit_test *t)
+{
+	struct nvdimm_bus_descriptor *nd_desc;
+	struct acpi_nfit_desc *acpi_desc;
+	struct acpi_nfit_memory_map *memdev;
+	void *nfit_buf = t->nfit_buf;
+	size_t size = t->nfit_size;
+	struct acpi_nfit_system_address *spa;
+	struct acpi_nfit_control_region *dcr;
+	struct acpi_nfit_data_region *bdw;
+	unsigned int offset;
+
+	nfit_test_init_header(nfit_buf, size);
+
+	/*
+	 * spa0 (interleave first half of dimm0 and dimm1, note storage
+	 * does not actually alias the related block-data-window
+	 * regions)
+	 */
+	spa = nfit_buf + sizeof(struct acpi_table_nfit);
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+	spa->range_index = 0+1;
+	spa->address = t->spa_set_dma[0];
+	spa->length = SPA0_SIZE;
+
+	/*
+	 * spa1 (interleave last half of the 4 DIMMS, note storage
+	 * does not actually alias the related block-data-window
+	 * regions)
+	 */
+	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa);
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+	spa->range_index = 1+1;
+	spa->address = t->spa_set_dma[1];
+	spa->length = SPA1_SIZE;
+
+	/* spa2 (dcr0) dimm0 */
+	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 2;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+	spa->range_index = 2+1;
+	spa->address = t->dcr_dma[0];
+	spa->length = DCR_SIZE;
+
+	/* spa3 (dcr1) dimm1 */
+	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 3;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+	spa->range_index = 3+1;
+	spa->address = t->dcr_dma[1];
+	spa->length = DCR_SIZE;
+
+	/* spa4 (dcr2) dimm2 */
+	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 4;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+	spa->range_index = 4+1;
+	spa->address = t->dcr_dma[2];
+	spa->length = DCR_SIZE;
+
+	/* spa5 (dcr3) dimm3 */
+	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 5;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+	spa->range_index = 5+1;
+	spa->address = t->dcr_dma[3];
+	spa->length = DCR_SIZE;
+
+	/* spa6 (bdw for dcr0) dimm0 */
+	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 6;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+	spa->range_index = 6+1;
+	spa->address = t->dimm_dma[0];
+	spa->length = DIMM_SIZE;
+
+	/* spa7 (bdw for dcr1) dimm1 */
+	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 7;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+	spa->range_index = 7+1;
+	spa->address = t->dimm_dma[1];
+	spa->length = DIMM_SIZE;
+
+	/* spa8 (bdw for dcr2) dimm2 */
+	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 8;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+	spa->range_index = 8+1;
+	spa->address = t->dimm_dma[2];
+	spa->length = DIMM_SIZE;
+
+	/* spa9 (bdw for dcr3) dimm3 */
+	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 9;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+	spa->range_index = 9+1;
+	spa->address = t->dimm_dma[3];
+	spa->length = DIMM_SIZE;
+
+	offset = sizeof(struct acpi_table_nfit) + sizeof(*spa) * 10;
+	/* mem-region0 (spa0, dimm0) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[0];
+	memdev->physical_id = 0;
+	memdev->region_id = 0;
+	memdev->range_index = 0+1;
+	memdev->region_index = 0+1;
+	memdev->region_size = SPA0_SIZE/2;
+	memdev->region_offset = t->spa_set_dma[0];
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 2;
+
+	/* mem-region1 (spa0, dimm1) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map);
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[1];
+	memdev->physical_id = 1;
+	memdev->region_id = 0;
+	memdev->range_index = 0+1;
+	memdev->region_index = 1+1;
+	memdev->region_size = SPA0_SIZE/2;
+	memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 2;
+
+	/* mem-region2 (spa1, dimm0) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[0];
+	memdev->physical_id = 0;
+	memdev->region_id = 1;
+	memdev->range_index = 1+1;
+	memdev->region_index = 0+1;
+	memdev->region_size = SPA1_SIZE/4;
+	memdev->region_offset = t->spa_set_dma[1];
+	memdev->address = SPA0_SIZE/2;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 4;
+
+	/* mem-region3 (spa1, dimm1) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[1];
+	memdev->physical_id = 1;
+	memdev->region_id = 1;
+	memdev->range_index = 1+1;
+	memdev->region_index = 1+1;
+	memdev->region_size = SPA1_SIZE/4;
+	memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4;
+	memdev->address = SPA0_SIZE/2;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 4;
+
+	/* mem-region4 (spa1, dimm2) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[2];
+	memdev->physical_id = 2;
+	memdev->region_id = 0;
+	memdev->range_index = 1+1;
+	memdev->region_index = 2+1;
+	memdev->region_size = SPA1_SIZE/4;
+	memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4;
+	memdev->address = SPA0_SIZE/2;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 4;
+
+	/* mem-region5 (spa1, dimm3) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[3];
+	memdev->physical_id = 3;
+	memdev->region_id = 0;
+	memdev->range_index = 1+1;
+	memdev->region_index = 3+1;
+	memdev->region_size = SPA1_SIZE/4;
+	memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4;
+	memdev->address = SPA0_SIZE/2;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 4;
+
+	/* mem-region6 (spa/dcr0, dimm0) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[0];
+	memdev->physical_id = 0;
+	memdev->region_id = 0;
+	memdev->range_index = 2+1;
+	memdev->region_index = 0+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+
+	/* mem-region7 (spa/dcr1, dimm1) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[1];
+	memdev->physical_id = 1;
+	memdev->region_id = 0;
+	memdev->range_index = 3+1;
+	memdev->region_index = 1+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+
+	/* mem-region8 (spa/dcr2, dimm2) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[2];
+	memdev->physical_id = 2;
+	memdev->region_id = 0;
+	memdev->range_index = 4+1;
+	memdev->region_index = 2+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+
+	/* mem-region9 (spa/dcr3, dimm3) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[3];
+	memdev->physical_id = 3;
+	memdev->region_id = 0;
+	memdev->range_index = 5+1;
+	memdev->region_index = 3+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+
+	/* mem-region10 (spa/bdw0, dimm0) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[0];
+	memdev->physical_id = 0;
+	memdev->region_id = 0;
+	memdev->range_index = 6+1;
+	memdev->region_index = 0+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+
+	/* mem-region11 (spa/bdw1, dimm1) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[1];
+	memdev->physical_id = 1;
+	memdev->region_id = 0;
+	memdev->range_index = 7+1;
+	memdev->region_index = 1+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+
+	/* mem-region12 (spa/bdw2, dimm2) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[2];
+	memdev->physical_id = 2;
+	memdev->region_id = 0;
+	memdev->range_index = 8+1;
+	memdev->region_index = 2+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+
+	/* mem-region13 (spa/dcr3, dimm3) */
+	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[3];
+	memdev->physical_id = 3;
+	memdev->region_id = 0;
+	memdev->range_index = 9+1;
+	memdev->region_index = 3+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+
+	offset = offset + sizeof(struct acpi_nfit_memory_map) * 14;
+	/* dcr-descriptor0 */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->region_index = 0+1;
+	dcr->vendor_id = 0xabcd;
+	dcr->device_id = 0;
+	dcr->revision_id = 1;
+	dcr->serial_number = ~handle[0];
+	dcr->windows = 1;
+	dcr->window_size = DCR_SIZE;
+	dcr->command_offset = 0;
+	dcr->command_size = 8;
+	dcr->status_offset = 8;
+	dcr->status_size = 4;
+
+	/* dcr-descriptor1 */
+	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region);
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->region_index = 1+1;
+	dcr->vendor_id = 0xabcd;
+	dcr->device_id = 0;
+	dcr->revision_id = 1;
+	dcr->serial_number = ~handle[1];
+	dcr->windows = 1;
+	dcr->window_size = DCR_SIZE;
+	dcr->command_offset = 0;
+	dcr->command_size = 8;
+	dcr->status_offset = 8;
+	dcr->status_size = 4;
+
+	/* dcr-descriptor2 */
+	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->region_index = 2+1;
+	dcr->vendor_id = 0xabcd;
+	dcr->device_id = 0;
+	dcr->revision_id = 1;
+	dcr->serial_number = ~handle[2];
+	dcr->windows = 1;
+	dcr->window_size = DCR_SIZE;
+	dcr->command_offset = 0;
+	dcr->command_size = 8;
+	dcr->status_offset = 8;
+	dcr->status_size = 4;
+
+	/* dcr-descriptor3 */
+	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->region_index = 3+1;
+	dcr->vendor_id = 0xabcd;
+	dcr->device_id = 0;
+	dcr->revision_id = 1;
+	dcr->serial_number = ~handle[3];
+	dcr->windows = 1;
+	dcr->window_size = DCR_SIZE;
+	dcr->command_offset = 0;
+	dcr->command_size = 8;
+	dcr->status_offset = 8;
+	dcr->status_size = 4;
+
+	offset = offset + sizeof(struct acpi_nfit_control_region) * 4;
+	/* bdw0 (spa/dcr0, dimm0) */
+	bdw = nfit_buf + offset;
+	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->region_index = 0+1;
+	bdw->windows = 1;
+	bdw->offset = 0;
+	bdw->size = BDW_SIZE;
+	bdw->capacity = DIMM_SIZE;
+	bdw->start_address = 0;
+
+	/* bdw1 (spa/dcr1, dimm1) */
+	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region);
+	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->region_index = 1+1;
+	bdw->windows = 1;
+	bdw->offset = 0;
+	bdw->size = BDW_SIZE;
+	bdw->capacity = DIMM_SIZE;
+	bdw->start_address = 0;
+
+	/* bdw2 (spa/dcr2, dimm2) */
+	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2;
+	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->region_index = 2+1;
+	bdw->windows = 1;
+	bdw->offset = 0;
+	bdw->size = BDW_SIZE;
+	bdw->capacity = DIMM_SIZE;
+	bdw->start_address = 0;
+
+	/* bdw3 (spa/dcr3, dimm3) */
+	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3;
+	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->region_index = 3+1;
+	bdw->windows = 1;
+	bdw->offset = 0;
+	bdw->size = BDW_SIZE;
+	bdw->capacity = DIMM_SIZE;
+	bdw->start_address = 0;
+
+	acpi_desc = &t->acpi_desc;
+	set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en);
+	set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
+	set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
+	nd_desc = &acpi_desc->nd_desc;
+	nd_desc->ndctl = nfit_test_ctl;
+}
+
+static void nfit_test1_setup(struct nfit_test *t)
+{
+	size_t size = t->nfit_size, offset;
+	void *nfit_buf = t->nfit_buf;
+	struct acpi_nfit_memory_map *memdev;
+	struct acpi_nfit_control_region *dcr;
+	struct acpi_nfit_system_address *spa;
+
+	nfit_test_init_header(nfit_buf, size);
+
+	offset = sizeof(struct acpi_table_nfit);
+	/* spa0 (flat range with no bdw aliasing) */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+	spa->range_index = 0+1;
+	spa->address = t->spa_set_dma[0];
+	spa->length = SPA2_SIZE;
+
+	offset += sizeof(*spa);
+	/* mem-region0 (spa0, dimm0) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = 0;
+	memdev->physical_id = 0;
+	memdev->region_id = 0;
+	memdev->range_index = 0+1;
+	memdev->region_index = 0+1;
+	memdev->region_size = SPA2_SIZE;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+	memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED
+		| ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED
+		| ACPI_NFIT_MEM_ARMED;
+
+	offset += sizeof(*memdev);
+	/* dcr-descriptor0 */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->region_index = 0+1;
+	dcr->vendor_id = 0xabcd;
+	dcr->device_id = 0;
+	dcr->revision_id = 1;
+	dcr->serial_number = ~0;
+	dcr->code = 0x201;
+	dcr->windows = 0;
+	dcr->window_size = 0;
+	dcr->command_offset = 0;
+	dcr->command_size = 0;
+	dcr->status_offset = 0;
+	dcr->status_size = 0;
+}
+
+static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
+		void *iobuf, u64 len, int rw)
+{
+	struct nfit_blk *nfit_blk = ndbr->blk_provider_data;
+	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+	struct nd_region *nd_region = &ndbr->nd_region;
+	unsigned int lane;
+
+	lane = nd_region_acquire_lane(nd_region);
+	if (rw)
+		memcpy(mmio->base + dpa, iobuf, len);
+	else
+		memcpy(iobuf, mmio->base + dpa, len);
+	nd_region_release_lane(nd_region, lane);
+
+	return 0;
+}
+
+static int nfit_test_probe(struct platform_device *pdev)
+{
+	struct nvdimm_bus_descriptor *nd_desc;
+	struct acpi_nfit_desc *acpi_desc;
+	struct device *dev = &pdev->dev;
+	struct nfit_test *nfit_test;
+	int rc;
+
+	nfit_test = to_nfit_test(&pdev->dev);
+
+	/* common alloc */
+	if (nfit_test->num_dcr) {
+		int num = nfit_test->num_dcr;
+
+		nfit_test->dimm = devm_kcalloc(dev, num, sizeof(void *),
+				GFP_KERNEL);
+		nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t),
+				GFP_KERNEL);
+		nfit_test->label = devm_kcalloc(dev, num, sizeof(void *),
+				GFP_KERNEL);
+		nfit_test->label_dma = devm_kcalloc(dev, num,
+				sizeof(dma_addr_t), GFP_KERNEL);
+		nfit_test->dcr = devm_kcalloc(dev, num,
+				sizeof(struct nfit_test_dcr *), GFP_KERNEL);
+		nfit_test->dcr_dma = devm_kcalloc(dev, num,
+				sizeof(dma_addr_t), GFP_KERNEL);
+		if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
+				&& nfit_test->label_dma && nfit_test->dcr
+				&& nfit_test->dcr_dma)
+			/* pass */;
+		else
+			return -ENOMEM;
+	}
+
+	if (nfit_test->num_pm) {
+		int num = nfit_test->num_pm;
+
+		nfit_test->spa_set = devm_kcalloc(dev, num, sizeof(void *),
+				GFP_KERNEL);
+		nfit_test->spa_set_dma = devm_kcalloc(dev, num,
+				sizeof(dma_addr_t), GFP_KERNEL);
+		if (nfit_test->spa_set && nfit_test->spa_set_dma)
+			/* pass */;
+		else
+			return -ENOMEM;
+	}
+
+	/* per-nfit specific alloc */
+	if (nfit_test->alloc(nfit_test))
+		return -ENOMEM;
+
+	nfit_test->setup(nfit_test);
+	acpi_desc = &nfit_test->acpi_desc;
+	acpi_desc->dev = &pdev->dev;
+	acpi_desc->nfit = nfit_test->nfit_buf;
+	acpi_desc->blk_do_io = nfit_test_blk_do_io;
+	nd_desc = &acpi_desc->nd_desc;
+	nd_desc->attr_groups = acpi_nfit_attribute_groups;
+	acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc);
+	if (!acpi_desc->nvdimm_bus)
+		return -ENXIO;
+
+	rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
+	if (rc) {
+		nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+		return rc;
+	}
+
+	return 0;
+}
+
+static int nfit_test_remove(struct platform_device *pdev)
+{
+	struct nfit_test *nfit_test = to_nfit_test(&pdev->dev);
+	struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc;
+
+	nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
+
+	return 0;
+}
+
+static void nfit_test_release(struct device *dev)
+{
+	struct nfit_test *nfit_test = to_nfit_test(dev);
+
+	kfree(nfit_test);
+}
+
+static const struct platform_device_id nfit_test_id[] = {
+	{ KBUILD_MODNAME },
+	{ },
+};
+
+static struct platform_driver nfit_test_driver = {
+	.probe = nfit_test_probe,
+	.remove = nfit_test_remove,
+	.driver = {
+		.name = KBUILD_MODNAME,
+	},
+	.id_table = nfit_test_id,
+};
+
+#ifdef CONFIG_CMA_SIZE_MBYTES
+#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
+#else
+#define CMA_SIZE_MBYTES 0
+#endif
+
+static __init int nfit_test_init(void)
+{
+	int rc, i;
+
+	nfit_test_setup(nfit_test_lookup);
+
+	for (i = 0; i < NUM_NFITS; i++) {
+		struct nfit_test *nfit_test;
+		struct platform_device *pdev;
+		static int once;
+
+		nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL);
+		if (!nfit_test) {
+			rc = -ENOMEM;
+			goto err_register;
+		}
+		INIT_LIST_HEAD(&nfit_test->resources);
+		switch (i) {
+		case 0:
+			nfit_test->num_pm = NUM_PM;
+			nfit_test->num_dcr = NUM_DCR;
+			nfit_test->alloc = nfit_test0_alloc;
+			nfit_test->setup = nfit_test0_setup;
+			break;
+		case 1:
+			nfit_test->num_pm = 1;
+			nfit_test->alloc = nfit_test1_alloc;
+			nfit_test->setup = nfit_test1_setup;
+			break;
+		default:
+			rc = -EINVAL;
+			goto err_register;
+		}
+		pdev = &nfit_test->pdev;
+		pdev->name = KBUILD_MODNAME;
+		pdev->id = i;
+		pdev->dev.release = nfit_test_release;
+		rc = platform_device_register(pdev);
+		if (rc) {
+			put_device(&pdev->dev);
+			goto err_register;
+		}
+
+		rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+		if (rc)
+			goto err_register;
+
+		instances[i] = nfit_test;
+
+		if (!once++) {
+			dma_addr_t dma;
+			void *buf;
+
+			buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma,
+					GFP_KERNEL);
+			if (!buf) {
+				rc = -ENOMEM;
+				dev_warn(&pdev->dev, "need 128M of free cma\n");
+				goto err_register;
+			}
+			dma_free_coherent(&pdev->dev, SZ_128M, buf, dma);
+		}
+	}
+
+	rc = platform_driver_register(&nfit_test_driver);
+	if (rc)
+		goto err_register;
+	return 0;
+
+ err_register:
+	for (i = 0; i < NUM_NFITS; i++)
+		if (instances[i])
+			platform_device_unregister(&instances[i]->pdev);
+	nfit_test_teardown();
+	return rc;
+}
+
+static __exit void nfit_test_exit(void)
+{
+	int i;
+
+	platform_driver_unregister(&nfit_test_driver);
+	for (i = 0; i < NUM_NFITS; i++)
+		platform_device_unregister(&instances[i]->pdev);
+	nfit_test_teardown();
+}
+
+module_init(nfit_test_init);
+module_exit(nfit_test_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
new file mode 100644
index 000000000000..96c5e16d7db9
--- /dev/null
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __NFIT_TEST_H__
+#define __NFIT_TEST_H__
+
+struct nfit_test_resource {
+	struct list_head list;
+	struct resource *res;
+	struct device *dev;
+	void *buf;
+};
+
+typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
+		unsigned long size);
+void __wrap_iounmap(volatile void __iomem *addr);
+void nfit_test_setup(nfit_test_lookup_fn lookup);
+void nfit_test_teardown(void);
+#endif