diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/Kconfig | 2 | ||||
-rw-r--r-- | drivers/Makefile | 1 | ||||
-rw-r--r-- | drivers/dax/Kconfig | 26 | ||||
-rw-r--r-- | drivers/dax/Makefile | 4 | ||||
-rw-r--r-- | drivers/dax/dax.c | 575 | ||||
-rw-r--r-- | drivers/dax/dax.h | 24 | ||||
-rw-r--r-- | drivers/dax/pmem.c | 158 | ||||
-rw-r--r-- | drivers/nvdimm/bus.c | 9 | ||||
-rw-r--r-- | drivers/nvdimm/claim.c | 23 | ||||
-rw-r--r-- | drivers/nvdimm/core.c | 3 | ||||
-rw-r--r-- | drivers/nvdimm/dax_devs.c | 35 | ||||
-rw-r--r-- | drivers/nvdimm/dimm_devs.c | 5 | ||||
-rw-r--r-- | drivers/nvdimm/nd-core.h | 3 | ||||
-rw-r--r-- | drivers/nvdimm/nd.h | 11 | ||||
-rw-r--r-- | drivers/nvdimm/pfn.h | 1 | ||||
-rw-r--r-- | drivers/nvdimm/pfn_devs.c | 40 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 3 | ||||
-rw-r--r-- | drivers/nvdimm/region_devs.c | 5 |
18 files changed, 893 insertions, 35 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig index d2ac339de85f..8298eab84a6f 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -190,6 +190,8 @@ source "drivers/android/Kconfig" source "drivers/nvdimm/Kconfig" +source "drivers/dax/Kconfig" + source "drivers/nvmem/Kconfig" source "drivers/hwtracing/stm/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 8f5d076baeb0..0b6f3d60193d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_PARPORT) += parport/ obj-$(CONFIG_NVM) += lightnvm/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ +obj-$(CONFIG_DEV_DAX) += dax/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_NUBUS) += nubus/ obj-y += macintosh/ diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig new file mode 100644 index 000000000000..cedab7572de3 --- /dev/null +++ b/drivers/dax/Kconfig @@ -0,0 +1,26 @@ +menuconfig DEV_DAX + tristate "DAX: direct access to differentiated memory" + default m if NVDIMM_DAX + depends on TRANSPARENT_HUGEPAGE + help + Support raw access to differentiated (persistence, bandwidth, + latency...) memory via an mmap(2) capable character + device. Platform firmware or a device driver may identify a + platform memory resource that is differentiated from the + baseline memory pool. Mappings of a /dev/daxX.Y device impose + restrictions that make the mapping behavior deterministic. + +if DEV_DAX + +config DEV_DAX_PMEM + tristate "PMEM DAX: direct access to persistent memory" + depends on NVDIMM_DAX + default DEV_DAX + help + Support raw access to persistent memory. Note that this + driver consumes memory ranges allocated and exported by the + libnvdimm sub-system. + + Say Y if unsure + +endif diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile new file mode 100644 index 000000000000..27c54e38478a --- /dev/null +++ b/drivers/dax/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_DEV_DAX) += dax.o +obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o + +dax_pmem-y := pmem.o diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c new file mode 100644 index 000000000000..b891a129b275 --- /dev/null +++ b/drivers/dax/dax.c @@ -0,0 +1,575 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/pagemap.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/pfn_t.h> +#include <linux/slab.h> +#include <linux/dax.h> +#include <linux/fs.h> +#include <linux/mm.h> + +static int dax_major; +static struct class *dax_class; +static DEFINE_IDA(dax_minor_ida); + +/** + * struct dax_region - mapping infrastructure for dax devices + * @id: kernel-wide unique region for a memory range + * @base: linear address corresponding to @res + * @kref: to pin while other agents have a need to do lookups + * @dev: parent device backing this region + * @align: allocation and mapping alignment for child dax devices + * @res: physical address range of the region + * @pfn_flags: identify whether the pfns are paged back or not + */ +struct dax_region { + int id; + struct ida ida; + void *base; + struct kref kref; + struct device *dev; + unsigned int align; + struct resource res; + unsigned long pfn_flags; +}; + +/** + * struct dax_dev - subdivision of a dax region + * @region - parent region + * @dev - device backing the character device + * @kref - enable this data to be tracked in filp->private_data + * @alive - !alive + rcu grace period == no new mappings can be established + * @id - child id in the region + * @num_resources - number of physical address extents in this device + * @res - array of physical address ranges + */ +struct dax_dev { + struct dax_region *region; + struct device *dev; + struct kref kref; + bool alive; + int id; + int num_resources; + struct resource res[0]; +}; + +static void dax_region_free(struct kref *kref) +{ + struct dax_region *dax_region; + + dax_region = container_of(kref, struct dax_region, kref); + kfree(dax_region); +} + +void dax_region_put(struct dax_region *dax_region) +{ + kref_put(&dax_region->kref, dax_region_free); +} +EXPORT_SYMBOL_GPL(dax_region_put); + +static void dax_dev_free(struct kref *kref) +{ + struct dax_dev *dax_dev; + + dax_dev = container_of(kref, struct dax_dev, kref); + dax_region_put(dax_dev->region); + kfree(dax_dev); +} + +static void dax_dev_put(struct dax_dev *dax_dev) +{ + kref_put(&dax_dev->kref, dax_dev_free); +} + +struct dax_region *alloc_dax_region(struct device *parent, int region_id, + struct resource *res, unsigned int align, void *addr, + unsigned long pfn_flags) +{ + struct dax_region *dax_region; + + dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); + + if (!dax_region) + return NULL; + + memcpy(&dax_region->res, res, sizeof(*res)); + dax_region->pfn_flags = pfn_flags; + kref_init(&dax_region->kref); + dax_region->id = region_id; + ida_init(&dax_region->ida); + dax_region->align = align; + dax_region->dev = parent; + dax_region->base = addr; + + return dax_region; +} +EXPORT_SYMBOL_GPL(alloc_dax_region); + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dax_dev *dax_dev = dev_get_drvdata(dev); + unsigned long long size = 0; + int i; + + for (i = 0; i < dax_dev->num_resources; i++) + size += resource_size(&dax_dev->res[i]); + + return sprintf(buf, "%llu\n", size); +} +static DEVICE_ATTR_RO(size); + +static struct attribute *dax_device_attributes[] = { + &dev_attr_size.attr, + NULL, +}; + +static const struct attribute_group dax_device_attribute_group = { + .attrs = dax_device_attributes, +}; + +static const struct attribute_group *dax_attribute_groups[] = { + &dax_device_attribute_group, + NULL, +}; + +static void unregister_dax_dev(void *_dev) +{ + struct device *dev = _dev; + struct dax_dev *dax_dev = dev_get_drvdata(dev); + struct dax_region *dax_region = dax_dev->region; + + dev_dbg(dev, "%s\n", __func__); + + /* + * Note, rcu is not protecting the liveness of dax_dev, rcu is + * ensuring that any fault handlers that might have seen + * dax_dev->alive == true, have completed. Any fault handlers + * that start after synchronize_rcu() has started will abort + * upon seeing dax_dev->alive == false. + */ + dax_dev->alive = false; + synchronize_rcu(); + + get_device(dev); + device_unregister(dev); + ida_simple_remove(&dax_region->ida, dax_dev->id); + ida_simple_remove(&dax_minor_ida, MINOR(dev->devt)); + put_device(dev); + dax_dev_put(dax_dev); +} + +int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res, + int count) +{ + struct device *parent = dax_region->dev; + struct dax_dev *dax_dev; + struct device *dev; + int rc, minor; + dev_t dev_t; + + dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL); + if (!dax_dev) + return -ENOMEM; + memcpy(dax_dev->res, res, sizeof(*res) * count); + dax_dev->num_resources = count; + kref_init(&dax_dev->kref); + dax_dev->alive = true; + dax_dev->region = dax_region; + kref_get(&dax_region->kref); + + dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL); + if (dax_dev->id < 0) { + rc = dax_dev->id; + goto err_id; + } + + minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL); + if (minor < 0) { + rc = minor; + goto err_minor; + } + + dev_t = MKDEV(dax_major, minor); + dev = device_create_with_groups(dax_class, parent, dev_t, dax_dev, + dax_attribute_groups, "dax%d.%d", dax_region->id, + dax_dev->id); + if (IS_ERR(dev)) { + rc = PTR_ERR(dev); + goto err_create; + } + dax_dev->dev = dev; + + rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev); + if (rc) { + unregister_dax_dev(dev); + return rc; + } + + return 0; + + err_create: + ida_simple_remove(&dax_minor_ida, minor); + err_minor: + ida_simple_remove(&dax_region->ida, dax_dev->id); + err_id: + dax_dev_put(dax_dev); + + return rc; +} +EXPORT_SYMBOL_GPL(devm_create_dax_dev); + +/* return an unmapped area aligned to the dax region specified alignment */ +static unsigned long dax_dev_get_unmapped_area(struct file *filp, + unsigned long addr, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + unsigned long off, off_end, off_align, len_align, addr_align, align; + struct dax_dev *dax_dev = filp ? filp->private_data : NULL; + struct dax_region *dax_region; + + if (!dax_dev || addr) + goto out; + + dax_region = dax_dev->region; + align = dax_region->align; + off = pgoff << PAGE_SHIFT; + off_end = off + len; + off_align = round_up(off, align); + + if ((off_end <= off_align) || ((off_end - off_align) < align)) + goto out; + + len_align = len + align; + if ((off + len_align) < off) + goto out; + + addr_align = current->mm->get_unmapped_area(filp, addr, len_align, + pgoff, flags); + if (!IS_ERR_VALUE(addr_align)) { + addr_align += (off - addr_align) & (align - 1); + return addr_align; + } + out: + return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); +} + +static int __match_devt(struct device *dev, const void *data) +{ + const dev_t *devt = data; + + return dev->devt == *devt; +} + +static struct device *dax_dev_find(dev_t dev_t) +{ + return class_find_device(dax_class, NULL, &dev_t, __match_devt); +} + +static int dax_dev_open(struct inode *inode, struct file *filp) +{ + struct dax_dev *dax_dev = NULL; + struct device *dev; + + dev = dax_dev_find(inode->i_rdev); + if (!dev) + return -ENXIO; + + device_lock(dev); + dax_dev = dev_get_drvdata(dev); + if (dax_dev) { + dev_dbg(dev, "%s\n", __func__); + filp->private_data = dax_dev; + kref_get(&dax_dev->kref); + inode->i_flags = S_DAX; + } + device_unlock(dev); + + if (!dax_dev) { + put_device(dev); + return -ENXIO; + } + return 0; +} + +static int dax_dev_release(struct inode *inode, struct file *filp) +{ + struct dax_dev *dax_dev = filp->private_data; + struct device *dev = dax_dev->dev; + + dev_dbg(dax_dev->dev, "%s\n", __func__); + dax_dev_put(dax_dev); + put_device(dev); + + return 0; +} + +static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma, + const char *func) +{ + struct dax_region *dax_region = dax_dev->region; + struct device *dev = dax_dev->dev; + unsigned long mask; + + if (!dax_dev->alive) + return -ENXIO; + + /* prevent private / writable mappings from being established */ + if ((vma->vm_flags & (VM_NORESERVE|VM_SHARED|VM_WRITE)) == VM_WRITE) { + dev_info(dev, "%s: %s: fail, attempted private mapping\n", + current->comm, func); + return -EINVAL; + } + + mask = dax_region->align - 1; + if (vma->vm_start & mask || vma->vm_end & mask) { + dev_info(dev, "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n", + current->comm, func, vma->vm_start, vma->vm_end, + mask); + return -EINVAL; + } + + if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV + && (vma->vm_flags & VM_DONTCOPY) == 0) { + dev_info(dev, "%s: %s: fail, dax range requires MADV_DONTFORK\n", + current->comm, func); + return -EINVAL; + } + + if (!vma_is_dax(vma)) { + dev_info(dev, "%s: %s: fail, vma is not DAX capable\n", + current->comm, func); + return -EINVAL; + } + + return 0; +} + +static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff, + unsigned long size) +{ + struct resource *res; + phys_addr_t phys; + int i; + + for (i = 0; i < dax_dev->num_resources; i++) { + res = &dax_dev->res[i]; + phys = pgoff * PAGE_SIZE + res->start; + if (phys >= res->start && phys <= res->end) + break; + pgoff -= PHYS_PFN(resource_size(res)); + } + + if (i < dax_dev->num_resources) { + res = &dax_dev->res[i]; + if (phys + size - 1 <= res->end) + return phys; + } + + return -1; +} + +static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + unsigned long vaddr = (unsigned long) vmf->virtual_address; + struct device *dev = dax_dev->dev; + struct dax_region *dax_region; + int rc = VM_FAULT_SIGBUS; + phys_addr_t phys; + pfn_t pfn; + + if (check_vma(dax_dev, vma, __func__)) + return VM_FAULT_SIGBUS; + + dax_region = dax_dev->region; + if (dax_region->align > PAGE_SIZE) { + dev_dbg(dev, "%s: alignment > fault size\n", __func__); + return VM_FAULT_SIGBUS; + } + + phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE); + if (phys == -1) { + dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, + vmf->pgoff); + return VM_FAULT_SIGBUS; + } + + pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + + rc = vm_insert_mixed(vma, vaddr, pfn); + + if (rc == -ENOMEM) + return VM_FAULT_OOM; + if (rc < 0 && rc != -EBUSY) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} + +static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + int rc; + struct file *filp = vma->vm_file; + struct dax_dev *dax_dev = filp->private_data; + + dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__, + current->comm, (vmf->flags & FAULT_FLAG_WRITE) + ? "write" : "read", vma->vm_start, vma->vm_end); + rcu_read_lock(); + rc = __dax_dev_fault(dax_dev, vma, vmf); + rcu_read_unlock(); + + return rc; +} + +static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, + struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, + unsigned int flags) +{ + unsigned long pmd_addr = addr & PMD_MASK; + struct device *dev = dax_dev->dev; + struct dax_region *dax_region; + phys_addr_t phys; + pgoff_t pgoff; + pfn_t pfn; + + if (check_vma(dax_dev, vma, __func__)) + return VM_FAULT_SIGBUS; + + dax_region = dax_dev->region; + if (dax_region->align > PMD_SIZE) { + dev_dbg(dev, "%s: alignment > fault size\n", __func__); + return VM_FAULT_SIGBUS; + } + + /* dax pmd mappings require pfn_t_devmap() */ + if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { + dev_dbg(dev, "%s: alignment > fault size\n", __func__); + return VM_FAULT_SIGBUS; + } + + pgoff = linear_page_index(vma, pmd_addr); + phys = pgoff_to_phys(dax_dev, pgoff, PAGE_SIZE); + if (phys == -1) { + dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, + pgoff); + return VM_FAULT_SIGBUS; + } + + pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + + return vmf_insert_pfn_pmd(vma, addr, pmd, pfn, + flags & FAULT_FLAG_WRITE); +} + +static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, unsigned int flags) +{ + int rc; + struct file *filp = vma->vm_file; + struct dax_dev *dax_dev = filp->private_data; + + dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__, + current->comm, (flags & FAULT_FLAG_WRITE) + ? "write" : "read", vma->vm_start, vma->vm_end); + + rcu_read_lock(); + rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags); + rcu_read_unlock(); + + return rc; +} + +static void dax_dev_vm_open(struct vm_area_struct *vma) +{ + struct file *filp = vma->vm_file; + struct dax_dev *dax_dev = filp->private_data; + + dev_dbg(dax_dev->dev, "%s\n", __func__); + kref_get(&dax_dev->kref); +} + +static void dax_dev_vm_close(struct vm_area_struct *vma) +{ + struct file *filp = vma->vm_file; + struct dax_dev *dax_dev = filp->private_data; + + dev_dbg(dax_dev->dev, "%s\n", __func__); + dax_dev_put(dax_dev); +} + +static const struct vm_operations_struct dax_dev_vm_ops = { + .fault = dax_dev_fault, + .pmd_fault = dax_dev_pmd_fault, + .open = dax_dev_vm_open, + .close = dax_dev_vm_close, +}; + +static int dax_dev_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct dax_dev *dax_dev = filp->private_data; + int rc; + + dev_dbg(dax_dev->dev, "%s\n", __func__); + + rc = check_vma(dax_dev, vma, __func__); + if (rc) + return rc; + + kref_get(&dax_dev->kref); + vma->vm_ops = &dax_dev_vm_ops; + vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; + return 0; + +} + +static const struct file_operations dax_fops = { + .llseek = noop_llseek, + .owner = THIS_MODULE, + .open = dax_dev_open, + .release = dax_dev_release, + .get_unmapped_area = dax_dev_get_unmapped_area, + .mmap = dax_dev_mmap, +}; + +static int __init dax_init(void) +{ + int rc; + + rc = register_chrdev(0, "dax", &dax_fops); + if (rc < 0) + return rc; + dax_major = rc; + + dax_class = class_create(THIS_MODULE, "dax"); + if (IS_ERR(dax_class)) { + unregister_chrdev(dax_major, "dax"); + return PTR_ERR(dax_class); + } + + return 0; +} + +static void __exit dax_exit(void) +{ + class_destroy(dax_class); + unregister_chrdev(dax_major, "dax"); + ida_destroy(&dax_minor_ida); +} + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL v2"); +subsys_initcall(dax_init); +module_exit(dax_exit); diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h new file mode 100644 index 000000000000..d8b8f1f25054 --- /dev/null +++ b/drivers/dax/dax.h @@ -0,0 +1,24 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __DAX_H__ +#define __DAX_H__ +struct device; +struct resource; +struct dax_region; +void dax_region_put(struct dax_region *dax_region); +struct dax_region *alloc_dax_region(struct device *parent, + int region_id, struct resource *res, unsigned int align, + void *addr, unsigned long flags); +int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res, + int count); +#endif /* __DAX_H__ */ diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c new file mode 100644 index 000000000000..55d510e36cd1 --- /dev/null +++ b/drivers/dax/pmem.c @@ -0,0 +1,158 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/percpu-refcount.h> +#include <linux/memremap.h> +#include <linux/module.h> +#include <linux/pfn_t.h> +#include "../nvdimm/pfn.h" +#include "../nvdimm/nd.h" +#include "dax.h" + +struct dax_pmem { + struct device *dev; + struct percpu_ref ref; + struct completion cmp; +}; + +struct dax_pmem *to_dax_pmem(struct percpu_ref *ref) +{ + return container_of(ref, struct dax_pmem, ref); +} + +static void dax_pmem_percpu_release(struct percpu_ref *ref) +{ + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + complete(&dax_pmem->cmp); +} + +static void dax_pmem_percpu_exit(void *data) +{ + struct percpu_ref *ref = data; + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + percpu_ref_exit(ref); + wait_for_completion(&dax_pmem->cmp); +} + +static void dax_pmem_percpu_kill(void *data) +{ + struct percpu_ref *ref = data; + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + percpu_ref_kill(ref); +} + +static int dax_pmem_probe(struct device *dev) +{ + int rc; + void *addr; + struct resource res; + struct nd_pfn_sb *pfn_sb; + struct dax_pmem *dax_pmem; + struct nd_region *nd_region; + struct nd_namespace_io *nsio; + struct dax_region *dax_region; + struct nd_namespace_common *ndns; + struct nd_dax *nd_dax = to_nd_dax(dev); + struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; + struct vmem_altmap __altmap, *altmap = NULL; + + ndns = nvdimm_namespace_common_probe(dev); + if (IS_ERR(ndns)) + return PTR_ERR(ndns); + nsio = to_nd_namespace_io(&ndns->dev); + + /* parse the 'pfn' info block via ->rw_bytes */ + devm_nsio_enable(dev, nsio); + altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap); + if (IS_ERR(altmap)) + return PTR_ERR(altmap); + devm_nsio_disable(dev, nsio); + + pfn_sb = nd_pfn->pfn_sb; + + if (!devm_request_mem_region(dev, nsio->res.start, + resource_size(&nsio->res), dev_name(dev))) { + dev_warn(dev, "could not reserve region %pR\n", &nsio->res); + return -EBUSY; + } + + dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL); + if (!dax_pmem) + return -ENOMEM; + + dax_pmem->dev = dev; + init_completion(&dax_pmem->cmp); + rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0, + GFP_KERNEL); + if (rc) + return rc; + + rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref); + if (rc) { + dax_pmem_percpu_exit(&dax_pmem->ref); + return rc; + } + + addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); + if (IS_ERR(addr)) + return PTR_ERR(addr); + + rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref); + if (rc) { + dax_pmem_percpu_kill(&dax_pmem->ref); + return rc; + } + + nd_region = to_nd_region(dev->parent); + dax_region = alloc_dax_region(dev, nd_region->id, &res, + le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP); + if (!dax_region) + return -ENOMEM; + + /* TODO: support for subdividing a dax region... */ + rc = devm_create_dax_dev(dax_region, &res, 1); + + /* child dax_dev instances now own the lifetime of the dax_region */ + dax_region_put(dax_region); + + return rc; +} + +static struct nd_device_driver dax_pmem_driver = { + .probe = dax_pmem_probe, + .drv = { + .name = "dax_pmem", + }, + .type = ND_DRIVER_DAX_PMEM, +}; + +static int __init dax_pmem_init(void) +{ + return nd_driver_register(&dax_pmem_driver); +} +module_init(dax_pmem_init); + +static void __exit dax_pmem_exit(void) +{ + driver_unregister(&dax_pmem_driver.drv); +} +module_exit(dax_pmem_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 04c2c3fda1ab..f085f8bceae8 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -124,9 +124,10 @@ static int nvdimm_bus_remove(struct device *dev) struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); struct module *provider = to_bus_provider(dev); struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); - int rc; + int rc = 0; - rc = nd_drv->remove(dev); + if (nd_drv->remove) + rc = nd_drv->remove(dev); nd_region_disable(nvdimm_bus, dev); dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name, @@ -296,8 +297,8 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner, return -EINVAL; } - if (!nd_drv->probe || !nd_drv->remove) { - pr_debug("->probe() and ->remove() must be specified\n"); + if (!nd_drv->probe) { + pr_debug("%s ->probe() must be specified\n", mod_name); return -EINVAL; } diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 5f53db59a058..8b2e3c4fb0ad 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -93,6 +93,25 @@ static bool is_idle(struct device *dev, struct nd_namespace_common *ndns) return true; } +struct nd_pfn *to_nd_pfn_safe(struct device *dev) +{ + /* + * pfn device attributes are re-used by dax device instances, so we + * need to be careful to correct device-to-nd_pfn conversion. + */ + if (is_nd_pfn(dev)) + return to_nd_pfn(dev); + + if (is_nd_dax(dev)) { + struct nd_dax *nd_dax = to_nd_dax(dev); + + return &nd_dax->nd_pfn; + } + + WARN_ON(1); + return NULL; +} + static void nd_detach_and_reset(struct device *dev, struct nd_namespace_common **_ndns) { @@ -106,8 +125,8 @@ static void nd_detach_and_reset(struct device *dev, nd_btt->lbasize = 0; kfree(nd_btt->uuid); nd_btt->uuid = NULL; - } else if (is_nd_pfn(dev)) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + } else if (is_nd_pfn(dev) || is_nd_dax(dev)) { + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); kfree(nd_pfn->uuid); nd_pfn->uuid = NULL; diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index e8688a13cf4f..be89764315c2 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -648,6 +648,9 @@ static __exit void libnvdimm_exit(void) nd_region_exit(); nvdimm_exit(); nvdimm_bus_exit(); + nd_region_devs_exit(); + nvdimm_devs_exit(); + ida_destroy(&nd_ida); } MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index f90f7549e7f4..45fa82cae87c 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/mm.h> #include "nd-core.h" +#include "pfn.h" #include "nd.h" static void nd_dax_release(struct device *dev) @@ -97,3 +98,37 @@ struct device *nd_dax_create(struct nd_region *nd_region) __nd_device_register(dev); return dev; } + +int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) +{ + int rc; + struct nd_dax *nd_dax; + struct device *dax_dev; + struct nd_pfn *nd_pfn; + struct nd_pfn_sb *pfn_sb; + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + + if (ndns->force_raw) + return -ENODEV; + + nvdimm_bus_lock(&ndns->dev); + nd_dax = nd_dax_alloc(nd_region); + nd_pfn = &nd_dax->nd_pfn; + dax_dev = nd_pfn_devinit(nd_pfn, ndns); + nvdimm_bus_unlock(&ndns->dev); + if (!dax_dev) + return -ENOMEM; + pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); + nd_pfn->pfn_sb = pfn_sb; + rc = nd_pfn_validate(nd_pfn, DAX_SIG); + dev_dbg(dev, "%s: dax: %s\n", __func__, + rc == 0 ? dev_name(dax_dev) : "<none>"); + if (rc < 0) { + __nd_detach_ndns(dax_dev, &nd_pfn->ndns); + put_device(dax_dev); + } else + __nd_device_register(dax_dev); + + return rc; +} +EXPORT_SYMBOL(nd_dax_probe); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 79a35a02053c..bbde28d3dec5 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -552,3 +552,8 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count) return 0; } EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count); + +void __exit nvdimm_devs_exit(void) +{ + ida_destroy(&dimm_ida); +} diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 86d985ccce82..284cdaa268cf 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -49,6 +49,8 @@ bool is_nd_blk(struct device *dev); struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); int __init nvdimm_bus_init(void); void nvdimm_bus_exit(void); +void nvdimm_devs_exit(void); +void nd_region_devs_exit(void); void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev); struct nd_region; void nd_region_create_blk_seed(struct nd_region *nd_region); @@ -92,4 +94,5 @@ bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, ssize_t nd_namespace_store(struct device *dev, struct nd_namespace_common **_ndns, const char *buf, size_t len); +struct nd_pfn *to_nd_pfn_safe(struct device *dev); #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 46910b8f32b1..d0ac93c31dda 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -232,7 +232,7 @@ bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct nd_namespace_common *ndns); -int nd_pfn_validate(struct nd_pfn *nd_pfn); +int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig); extern struct attribute_group nd_pfn_attribute_group; #else static inline int nd_pfn_probe(struct device *dev, @@ -251,7 +251,7 @@ static inline struct device *nd_pfn_create(struct nd_region *nd_region) return NULL; } -static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) +static inline int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { return -ENODEV; } @@ -259,9 +259,16 @@ static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) struct nd_dax *to_nd_dax(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_DAX) +int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_dax(struct device *dev); struct device *nd_dax_create(struct nd_region *nd_region); #else +static inline int nd_dax_probe(struct device *dev, + struct nd_namespace_common *ndns) +{ + return -ENODEV; +} + static inline bool is_nd_dax(struct device *dev) { return false; diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h index 9d2704c83fa7..dde9853453d3 100644 --- a/drivers/nvdimm/pfn.h +++ b/drivers/nvdimm/pfn.h @@ -19,6 +19,7 @@ #define PFN_SIG_LEN 16 #define PFN_SIG "NVDIMM_PFN_INFO\0" +#define DAX_SIG "NVDIMM_DAX_INFO\0" struct nd_pfn_sb { u8 signature[PFN_SIG_LEN]; diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 2248056d29e7..f7718ec685fa 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -54,25 +54,6 @@ struct nd_pfn *to_nd_pfn(struct device *dev) } EXPORT_SYMBOL(to_nd_pfn); -static struct nd_pfn *to_nd_pfn_safe(struct device *dev) -{ - /* - * pfn device attributes are re-used by dax device instances, so we - * need to be careful to correct device-to-nd_pfn conversion. - */ - if (is_nd_pfn(dev)) - return to_nd_pfn(dev); - - if (is_nd_dax(dev)) { - struct nd_dax *nd_dax = to_nd_dax(dev); - - return &nd_dax->nd_pfn; - } - - WARN_ON(1); - return NULL; -} - static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -360,7 +341,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region) return dev; } -int nd_pfn_validate(struct nd_pfn *nd_pfn) +int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; struct nd_namespace_io *nsio; @@ -377,7 +358,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb))) return -ENXIO; - if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0) + if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0) return -ENODEV; checksum = le64_to_cpu(pfn_sb->checksum); @@ -416,6 +397,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) return -ENODEV; } + if (nd_pfn->align == 0) + nd_pfn->align = le32_to_cpu(pfn_sb->align); if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) { dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", nd_pfn->align, nvdimm_namespace_capacity(ndns)); @@ -436,8 +419,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) return -EBUSY; } - nd_pfn->align = le32_to_cpu(pfn_sb->align); - if (!is_power_of_2(offset) || offset < PAGE_SIZE) { + if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align)) + || !IS_ALIGNED(offset, PAGE_SIZE)) { dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n", offset); return -ENXIO; @@ -467,7 +450,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); nd_pfn = to_nd_pfn(pfn_dev); nd_pfn->pfn_sb = pfn_sb; - rc = nd_pfn_validate(nd_pfn); + rc = nd_pfn_validate(nd_pfn, PFN_SIG); dev_dbg(dev, "%s: pfn: %s\n", __func__, rc == 0 ? dev_name(pfn_dev) : "<none>"); if (rc < 0) { @@ -552,6 +535,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) struct nd_pfn_sb *pfn_sb; unsigned long npfns; phys_addr_t offset; + const char *sig; u64 checksum; int rc; @@ -560,7 +544,11 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) return -ENOMEM; nd_pfn->pfn_sb = pfn_sb; - rc = nd_pfn_validate(nd_pfn); + if (is_nd_dax(&nd_pfn->dev)) + sig = DAX_SIG; + else + sig = PFN_SIG; + rc = nd_pfn_validate(nd_pfn, sig); if (rc != -ENODEV) return rc; @@ -635,7 +623,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); - memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN); + memcpy(pfn_sb->signature, sig, PFN_SIG_LEN); memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index d9a0dbc2d023..042baec56931 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -320,7 +320,8 @@ static int nd_pmem_probe(struct device *dev) return pmem_attach_disk(dev, ndns); /* if we find a valid info-block we'll come back as that personality */ - if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0) + if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0 + || nd_dax_probe(dev, ndns) == 0) return -ENXIO; /* ...otherwise we're just a raw pmem device */ diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 9e1b054e0e61..40fcfea26fbb 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -793,3 +793,8 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, __func__); } EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); + +void __exit nd_region_devs_exit(void) +{ + ida_destroy(®ion_ida); +} |