diff options
Diffstat (limited to 'drivers')
73 files changed, 14097 insertions, 853 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig index c0cc96bab9e7..6e973b8e3a3b 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -182,4 +182,6 @@ source "drivers/thunderbolt/Kconfig" source "drivers/android/Kconfig" +source "drivers/nvdimm/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 9a02fb7c5106..b64b49f6e01b 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ obj-$(CONFIG_PARPORT) += parport/ obj-y += base/ block/ misc/ mfd/ nfc/ +obj-$(CONFIG_LIBNVDIMM) += nvdimm/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_NUBUS) += nubus/ obj-y += macintosh/ diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 35da507411a0..f15db002be8e 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -386,6 +386,32 @@ config ACPI_REDUCED_HARDWARE_ONLY If you are unsure what to do, do not enable this option. +config ACPI_NFIT + tristate "ACPI NVDIMM Firmware Interface Table (NFIT)" + depends on PHYS_ADDR_T_64BIT + depends on BLK_DEV + select LIBNVDIMM + help + Infrastructure to probe ACPI 6 compliant platforms for + NVDIMMs (NFIT) and register a libnvdimm device tree. In + addition to storage devices this also enables libnvdimm to pass + ACPI._DSM messages for platform/dimm configuration. + + To compile this driver as a module, choose M here: + the module will be called nfit. + +config ACPI_NFIT_DEBUG + bool "NFIT DSM debug" + depends on ACPI_NFIT + depends on DYNAMIC_DEBUG + default n + help + Enabling this option causes the nfit driver to dump the + input and output buffers of _DSM operations on the ACPI0012 + device and its children. This can be very verbose, so leave + it disabled unless you are debugging a hardware / firmware + issue. + source "drivers/acpi/apei/Kconfig" config ACPI_EXTLOG diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 73d840bef455..8321430d7f24 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o obj-$(CONFIG_ACPI_PROCESSOR) += processor.o obj-y += container.o obj-$(CONFIG_ACPI_THERMAL) += thermal.o +obj-$(CONFIG_ACPI_NFIT) += nfit.o obj-y += acpi_memhotplug.o obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o obj-$(CONFIG_ACPI_BATTERY) += battery.o diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c new file mode 100644 index 000000000000..2161fa178c8d --- /dev/null +++ b/drivers/acpi/nfit.c @@ -0,0 +1,1587 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/list_sort.h> +#include <linux/libnvdimm.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/ndctl.h> +#include <linux/list.h> +#include <linux/acpi.h> +#include <linux/sort.h> +#include <linux/io.h> +#include "nfit.h" + +/* + * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is + * irrelevant. + */ +#include <asm-generic/io-64-nonatomic-hi-lo.h> + +static bool force_enable_dimms; +module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); + +static u8 nfit_uuid[NFIT_UUID_MAX][16]; + +const u8 *to_nfit_uuid(enum nfit_uuids id) +{ + return nfit_uuid[id]; +} +EXPORT_SYMBOL(to_nfit_uuid); + +static struct acpi_nfit_desc *to_acpi_nfit_desc( + struct nvdimm_bus_descriptor *nd_desc) +{ + return container_of(nd_desc, struct acpi_nfit_desc, nd_desc); +} + +static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) +{ + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + + /* + * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct + * acpi_device. + */ + if (!nd_desc->provider_name + || strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0) + return NULL; + + return to_acpi_device(acpi_desc->dev); +} + +static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + const struct nd_cmd_desc *desc = NULL; + union acpi_object in_obj, in_buf, *out_obj; + struct device *dev = acpi_desc->dev; + const char *cmd_name, *dimm_name; + unsigned long dsm_mask; + acpi_handle handle; + const u8 *uuid; + u32 offset; + int rc, i; + + if (nvdimm) { + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct acpi_device *adev = nfit_mem->adev; + + if (!adev) + return -ENOTTY; + dimm_name = nvdimm_name(nvdimm); + cmd_name = nvdimm_cmd_name(cmd); + dsm_mask = nfit_mem->dsm_mask; + desc = nd_cmd_dimm_desc(cmd); + uuid = to_nfit_uuid(NFIT_DEV_DIMM); + handle = adev->handle; + } else { + struct acpi_device *adev = to_acpi_dev(acpi_desc); + + cmd_name = nvdimm_bus_cmd_name(cmd); + dsm_mask = nd_desc->dsm_mask; + desc = nd_cmd_bus_desc(cmd); + uuid = to_nfit_uuid(NFIT_DEV_BUS); + handle = adev->handle; + dimm_name = "bus"; + } + + if (!desc || (cmd && (desc->out_num + desc->in_num == 0))) + return -ENOTTY; + + if (!test_bit(cmd, &dsm_mask)) + return -ENOTTY; + + in_obj.type = ACPI_TYPE_PACKAGE; + in_obj.package.count = 1; + in_obj.package.elements = &in_buf; + in_buf.type = ACPI_TYPE_BUFFER; + in_buf.buffer.pointer = buf; + in_buf.buffer.length = 0; + + /* libnvdimm has already validated the input envelope */ + for (i = 0; i < desc->in_num; i++) + in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc, + i, buf); + + if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) { + dev_dbg(dev, "%s:%s cmd: %s input length: %d\n", __func__, + dimm_name, cmd_name, in_buf.buffer.length); + print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, + 4, in_buf.buffer.pointer, min_t(u32, 128, + in_buf.buffer.length), true); + } + + out_obj = acpi_evaluate_dsm(handle, uuid, 1, cmd, &in_obj); + if (!out_obj) { + dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name, + cmd_name); + return -EINVAL; + } + + if (out_obj->package.type != ACPI_TYPE_BUFFER) { + dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n", + __func__, dimm_name, cmd_name, out_obj->type); + rc = -EINVAL; + goto out; + } + + if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) { + dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, + dimm_name, cmd_name, out_obj->buffer.length); + print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, + 4, out_obj->buffer.pointer, min_t(u32, 128, + out_obj->buffer.length), true); + } + + for (i = 0, offset = 0; i < desc->out_num; i++) { + u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf, + (u32 *) out_obj->buffer.pointer); + + if (offset + out_size > out_obj->buffer.length) { + dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n", + __func__, dimm_name, cmd_name, i); + break; + } + + if (in_buf.buffer.length + offset + out_size > buf_len) { + dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n", + __func__, dimm_name, cmd_name, i); + rc = -ENXIO; + goto out; + } + memcpy(buf + in_buf.buffer.length + offset, + out_obj->buffer.pointer + offset, out_size); + offset += out_size; + } + if (offset + in_buf.buffer.length < buf_len) { + if (i >= 1) { + /* + * status valid, return the number of bytes left + * unfilled in the output buffer + */ + rc = buf_len - offset - in_buf.buffer.length; + } else { + dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n", + __func__, dimm_name, cmd_name, buf_len, + offset); + rc = -ENXIO; + } + } else + rc = 0; + + out: + ACPI_FREE(out_obj); + + return rc; +} + +static const char *spa_type_name(u16 type) +{ + static const char *to_name[] = { + [NFIT_SPA_VOLATILE] = "volatile", + [NFIT_SPA_PM] = "pmem", + [NFIT_SPA_DCR] = "dimm-control-region", + [NFIT_SPA_BDW] = "block-data-window", + [NFIT_SPA_VDISK] = "volatile-disk", + [NFIT_SPA_VCD] = "volatile-cd", + [NFIT_SPA_PDISK] = "persistent-disk", + [NFIT_SPA_PCD] = "persistent-cd", + + }; + + if (type > NFIT_SPA_PCD) + return "unknown"; + + return to_name[type]; +} + +static int nfit_spa_type(struct acpi_nfit_system_address *spa) +{ + int i; + + for (i = 0; i < NFIT_UUID_MAX; i++) + if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0) + return i; + return -1; +} + +static bool add_spa(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + struct device *dev = acpi_desc->dev; + struct nfit_spa *nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa), + GFP_KERNEL); + + if (!nfit_spa) + return false; + INIT_LIST_HEAD(&nfit_spa->list); + nfit_spa->spa = spa; + list_add_tail(&nfit_spa->list, &acpi_desc->spas); + dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__, + spa->range_index, + spa_type_name(nfit_spa_type(spa))); + return true; +} + +static bool add_memdev(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_memory_map *memdev) +{ + struct device *dev = acpi_desc->dev; + struct nfit_memdev *nfit_memdev = devm_kzalloc(dev, + sizeof(*nfit_memdev), GFP_KERNEL); + + if (!nfit_memdev) + return false; + INIT_LIST_HEAD(&nfit_memdev->list); + nfit_memdev->memdev = memdev; + list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs); + dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n", + __func__, memdev->device_handle, memdev->range_index, + memdev->region_index); + return true; +} + +static bool add_dcr(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_control_region *dcr) +{ + struct device *dev = acpi_desc->dev; + struct nfit_dcr *nfit_dcr = devm_kzalloc(dev, sizeof(*nfit_dcr), + GFP_KERNEL); + + if (!nfit_dcr) + return false; + INIT_LIST_HEAD(&nfit_dcr->list); + nfit_dcr->dcr = dcr; + list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs); + dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__, + dcr->region_index, dcr->windows); + return true; +} + +static bool add_bdw(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_data_region *bdw) +{ + struct device *dev = acpi_desc->dev; + struct nfit_bdw *nfit_bdw = devm_kzalloc(dev, sizeof(*nfit_bdw), + GFP_KERNEL); + + if (!nfit_bdw) + return false; + INIT_LIST_HEAD(&nfit_bdw->list); + nfit_bdw->bdw = bdw; + list_add_tail(&nfit_bdw->list, &acpi_desc->bdws); + dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__, + bdw->region_index, bdw->windows); + return true; +} + +static bool add_idt(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_interleave *idt) +{ + struct device *dev = acpi_desc->dev; + struct nfit_idt *nfit_idt = devm_kzalloc(dev, sizeof(*nfit_idt), + GFP_KERNEL); + + if (!nfit_idt) + return false; + INIT_LIST_HEAD(&nfit_idt->list); + nfit_idt->idt = idt; + list_add_tail(&nfit_idt->list, &acpi_desc->idts); + dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__, + idt->interleave_index, idt->line_count); + return true; +} + +static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, + const void *end) +{ + struct device *dev = acpi_desc->dev; + struct acpi_nfit_header *hdr; + void *err = ERR_PTR(-ENOMEM); + + if (table >= end) + return NULL; + + hdr = table; + switch (hdr->type) { + case ACPI_NFIT_TYPE_SYSTEM_ADDRESS: + if (!add_spa(acpi_desc, table)) + return err; + break; + case ACPI_NFIT_TYPE_MEMORY_MAP: + if (!add_memdev(acpi_desc, table)) + return err; + break; + case ACPI_NFIT_TYPE_CONTROL_REGION: + if (!add_dcr(acpi_desc, table)) + return err; + break; + case ACPI_NFIT_TYPE_DATA_REGION: + if (!add_bdw(acpi_desc, table)) + return err; + break; + case ACPI_NFIT_TYPE_INTERLEAVE: + if (!add_idt(acpi_desc, table)) + return err; + break; + case ACPI_NFIT_TYPE_FLUSH_ADDRESS: + dev_dbg(dev, "%s: flush\n", __func__); + break; + case ACPI_NFIT_TYPE_SMBIOS: + dev_dbg(dev, "%s: smbios\n", __func__); + break; + default: + dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type); + break; + } + + return table + hdr->length; +} + +static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc, + struct nfit_mem *nfit_mem) +{ + u32 device_handle = __to_nfit_memdev(nfit_mem)->device_handle; + u16 dcr = nfit_mem->dcr->region_index; + struct nfit_spa *nfit_spa; + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + u16 range_index = nfit_spa->spa->range_index; + int type = nfit_spa_type(nfit_spa->spa); + struct nfit_memdev *nfit_memdev; + + if (type != NFIT_SPA_BDW) + continue; + + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { + if (nfit_memdev->memdev->range_index != range_index) + continue; + if (nfit_memdev->memdev->device_handle != device_handle) + continue; + if (nfit_memdev->memdev->region_index != dcr) + continue; + + nfit_mem->spa_bdw = nfit_spa->spa; + return; + } + } + + dev_dbg(acpi_desc->dev, "SPA-BDW not found for SPA-DCR %d\n", + nfit_mem->spa_dcr->range_index); + nfit_mem->bdw = NULL; +} + +static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, + struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa) +{ + u16 dcr = __to_nfit_memdev(nfit_mem)->region_index; + struct nfit_memdev *nfit_memdev; + struct nfit_dcr *nfit_dcr; + struct nfit_bdw *nfit_bdw; + struct nfit_idt *nfit_idt; + u16 idt_idx, range_index; + + list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) { + if (nfit_dcr->dcr->region_index != dcr) + continue; + nfit_mem->dcr = nfit_dcr->dcr; + break; + } + + if (!nfit_mem->dcr) { + dev_dbg(acpi_desc->dev, "SPA %d missing:%s%s\n", + spa->range_index, __to_nfit_memdev(nfit_mem) + ? "" : " MEMDEV", nfit_mem->dcr ? "" : " DCR"); + return -ENODEV; + } + + /* + * We've found enough to create an nvdimm, optionally + * find an associated BDW + */ + list_add(&nfit_mem->list, &acpi_desc->dimms); + + list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) { + if (nfit_bdw->bdw->region_index != dcr) + continue; + nfit_mem->bdw = nfit_bdw->bdw; + break; + } + + if (!nfit_mem->bdw) + return 0; + + nfit_mem_find_spa_bdw(acpi_desc, nfit_mem); + + if (!nfit_mem->spa_bdw) + return 0; + + range_index = nfit_mem->spa_bdw->range_index; + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { + if (nfit_memdev->memdev->range_index != range_index || + nfit_memdev->memdev->region_index != dcr) + continue; + nfit_mem->memdev_bdw = nfit_memdev->memdev; + idt_idx = nfit_memdev->memdev->interleave_index; + list_for_each_entry(nfit_idt, &acpi_desc->idts, list) { + if (nfit_idt->idt->interleave_index != idt_idx) + continue; + nfit_mem->idt_bdw = nfit_idt->idt; + break; + } + break; + } + + return 0; +} + +static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + struct nfit_mem *nfit_mem, *found; + struct nfit_memdev *nfit_memdev; + int type = nfit_spa_type(spa); + u16 dcr; + + switch (type) { + case NFIT_SPA_DCR: + case NFIT_SPA_PM: + break; + default: + return 0; + } + + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { + int rc; + + if (nfit_memdev->memdev->range_index != spa->range_index) + continue; + found = NULL; + dcr = nfit_memdev->memdev->region_index; + list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) + if (__to_nfit_memdev(nfit_mem)->region_index == dcr) { + found = nfit_mem; + break; + } + + if (found) + nfit_mem = found; + else { + nfit_mem = devm_kzalloc(acpi_desc->dev, + sizeof(*nfit_mem), GFP_KERNEL); + if (!nfit_mem) + return -ENOMEM; + INIT_LIST_HEAD(&nfit_mem->list); + } + + if (type == NFIT_SPA_DCR) { + struct nfit_idt *nfit_idt; + u16 idt_idx; + + /* multiple dimms may share a SPA when interleaved */ + nfit_mem->spa_dcr = spa; + nfit_mem->memdev_dcr = nfit_memdev->memdev; + idt_idx = nfit_memdev->memdev->interleave_index; + list_for_each_entry(nfit_idt, &acpi_desc->idts, list) { + if (nfit_idt->idt->interleave_index != idt_idx) + continue; + nfit_mem->idt_dcr = nfit_idt->idt; + break; + } + } else { + /* + * A single dimm may belong to multiple SPA-PM + * ranges, record at least one in addition to + * any SPA-DCR range. + */ + nfit_mem->memdev_pmem = nfit_memdev->memdev; + } + + if (found) + continue; + + rc = nfit_mem_add(acpi_desc, nfit_mem, spa); + if (rc) + return rc; + } + + return 0; +} + +static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b) +{ + struct nfit_mem *a = container_of(_a, typeof(*a), list); + struct nfit_mem *b = container_of(_b, typeof(*b), list); + u32 handleA, handleB; + + handleA = __to_nfit_memdev(a)->device_handle; + handleB = __to_nfit_memdev(b)->device_handle; + if (handleA < handleB) + return -1; + else if (handleA > handleB) + return 1; + return 0; +} + +static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc) +{ + struct nfit_spa *nfit_spa; + + /* + * For each SPA-DCR or SPA-PMEM address range find its + * corresponding MEMDEV(s). From each MEMDEV find the + * corresponding DCR. Then, if we're operating on a SPA-DCR, + * try to find a SPA-BDW and a corresponding BDW that references + * the DCR. Throw it all into an nfit_mem object. Note, that + * BDWs are optional. + */ + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + int rc; + + rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa); + if (rc) + return rc; + } + + list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp); + + return 0; +} + +static ssize_t revision_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + + return sprintf(buf, "%d\n", acpi_desc->nfit->header.revision); +} +static DEVICE_ATTR_RO(revision); + +static struct attribute *acpi_nfit_attributes[] = { + &dev_attr_revision.attr, + NULL, +}; + +static struct attribute_group acpi_nfit_attribute_group = { + .name = "nfit", + .attrs = acpi_nfit_attributes, +}; + +const struct attribute_group *acpi_nfit_attribute_groups[] = { + &nvdimm_bus_attribute_group, + &acpi_nfit_attribute_group, + NULL, +}; +EXPORT_SYMBOL_GPL(acpi_nfit_attribute_groups); + +static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + + return __to_nfit_memdev(nfit_mem); +} + +static struct acpi_nfit_control_region *to_nfit_dcr(struct device *dev) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + + return nfit_mem->dcr; +} + +static ssize_t handle_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev); + + return sprintf(buf, "%#x\n", memdev->device_handle); +} +static DEVICE_ATTR_RO(handle); + +static ssize_t phys_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_memory_map *memdev = to_nfit_memdev(dev); + + return sprintf(buf, "%#x\n", memdev->physical_id); +} +static DEVICE_ATTR_RO(phys_id); + +static ssize_t vendor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); + + return sprintf(buf, "%#x\n", dcr->vendor_id); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t rev_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); + + return sprintf(buf, "%#x\n", dcr->revision_id); +} +static DEVICE_ATTR_RO(rev_id); + +static ssize_t device_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); + + return sprintf(buf, "%#x\n", dcr->device_id); +} +static DEVICE_ATTR_RO(device); + +static ssize_t format_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); + + return sprintf(buf, "%#x\n", dcr->code); +} +static DEVICE_ATTR_RO(format); + +static ssize_t serial_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); + + return sprintf(buf, "%#x\n", dcr->serial_number); +} +static DEVICE_ATTR_RO(serial); + +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u16 flags = to_nfit_memdev(dev)->flags; + + return sprintf(buf, "%s%s%s%s%s\n", + flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "", + flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "", + flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "", + flags & ACPI_NFIT_MEM_ARMED ? "arm " : "", + flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart " : ""); +} +static DEVICE_ATTR_RO(flags); + +static struct attribute *acpi_nfit_dimm_attributes[] = { + &dev_attr_handle.attr, + &dev_attr_phys_id.attr, + &dev_attr_vendor.attr, + &dev_attr_device.attr, + &dev_attr_format.attr, + &dev_attr_serial.attr, + &dev_attr_rev_id.attr, + &dev_attr_flags.attr, + NULL, +}; + +static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + + if (to_nfit_dcr(dev)) + return a->mode; + else + return 0; +} + +static struct attribute_group acpi_nfit_dimm_attribute_group = { + .name = "nfit", + .attrs = acpi_nfit_dimm_attributes, + .is_visible = acpi_nfit_dimm_attr_visible, +}; + +static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = { + &nvdimm_attribute_group, + &nd_device_attribute_group, + &acpi_nfit_dimm_attribute_group, + NULL, +}; + +static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc, + u32 device_handle) +{ + struct nfit_mem *nfit_mem; + + list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) + if (__to_nfit_memdev(nfit_mem)->device_handle == device_handle) + return nfit_mem->nvdimm; + + return NULL; +} + +static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, + struct nfit_mem *nfit_mem, u32 device_handle) +{ + struct acpi_device *adev, *adev_dimm; + struct device *dev = acpi_desc->dev; + const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM); + unsigned long long sta; + int i, rc = -ENODEV; + acpi_status status; + + nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en; + adev = to_acpi_dev(acpi_desc); + if (!adev) + return 0; + + adev_dimm = acpi_find_child_device(adev, device_handle, false); + nfit_mem->adev = adev_dimm; + if (!adev_dimm) { + dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n", + device_handle); + return force_enable_dimms ? 0 : -ENODEV; + } + + status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta); + if (status == AE_NOT_FOUND) { + dev_dbg(dev, "%s missing _STA, assuming enabled...\n", + dev_name(&adev_dimm->dev)); + rc = 0; + } else if (ACPI_FAILURE(status)) + dev_err(dev, "%s failed to retrieve_STA, disabling...\n", + dev_name(&adev_dimm->dev)); + else if ((sta & ACPI_STA_DEVICE_ENABLED) == 0) + dev_info(dev, "%s disabled by firmware\n", + dev_name(&adev_dimm->dev)); + else + rc = 0; + + for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++) + if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i)) + set_bit(i, &nfit_mem->dsm_mask); + + return force_enable_dimms ? 0 : rc; +} + +static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) +{ + struct nfit_mem *nfit_mem; + int dimm_count = 0; + + list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { + struct nvdimm *nvdimm; + unsigned long flags = 0; + u32 device_handle; + u16 mem_flags; + int rc; + + device_handle = __to_nfit_memdev(nfit_mem)->device_handle; + nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle); + if (nvdimm) { + /* + * If for some reason we find multiple DCRs the + * first one wins + */ + dev_err(acpi_desc->dev, "duplicate DCR detected: %s\n", + nvdimm_name(nvdimm)); + continue; + } + + if (nfit_mem->bdw && nfit_mem->memdev_pmem) + flags |= NDD_ALIASING; + + mem_flags = __to_nfit_memdev(nfit_mem)->flags; + if (mem_flags & ACPI_NFIT_MEM_ARMED) + flags |= NDD_UNARMED; + + rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle); + if (rc) + continue; + + nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, + acpi_nfit_dimm_attribute_groups, + flags, &nfit_mem->dsm_mask); + if (!nvdimm) + return -ENOMEM; + + nfit_mem->nvdimm = nvdimm; + dimm_count++; + + if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0) + continue; + + dev_info(acpi_desc->dev, "%s: failed: %s%s%s%s\n", + nvdimm_name(nvdimm), + mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save " : "", + mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore " : "", + mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush " : "", + mem_flags & ACPI_NFIT_MEM_ARMED ? "arm " : ""); + + } + + return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count); +} + +static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) +{ + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS); + struct acpi_device *adev; + int i; + + adev = to_acpi_dev(acpi_desc); + if (!adev) + return; + + for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++) + if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) + set_bit(i, &nd_desc->dsm_mask); +} + +static ssize_t range_index_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + struct nfit_spa *nfit_spa = nd_region_provider_data(nd_region); + + return sprintf(buf, "%d\n", nfit_spa->spa->range_index); +} +static DEVICE_ATTR_RO(range_index); + +static struct attribute *acpi_nfit_region_attributes[] = { + &dev_attr_range_index.attr, + NULL, +}; + +static struct attribute_group acpi_nfit_region_attribute_group = { + .name = "nfit", + .attrs = acpi_nfit_region_attributes, +}; + +static const struct attribute_group *acpi_nfit_region_attribute_groups[] = { + &nd_region_attribute_group, + &nd_mapping_attribute_group, + &nd_device_attribute_group, + &nd_numa_attribute_group, + &acpi_nfit_region_attribute_group, + NULL, +}; + +/* enough info to uniquely specify an interleave set */ +struct nfit_set_info { + struct nfit_set_info_map { + u64 region_offset; + u32 serial_number; + u32 pad; + } mapping[0]; +}; + +static size_t sizeof_nfit_set_info(int num_mappings) +{ + return sizeof(struct nfit_set_info) + + num_mappings * sizeof(struct nfit_set_info_map); +} + +static int cmp_map(const void *m0, const void *m1) +{ + const struct nfit_set_info_map *map0 = m0; + const struct nfit_set_info_map *map1 = m1; + + return memcmp(&map0->region_offset, &map1->region_offset, + sizeof(u64)); +} + +/* Retrieve the nth entry referencing this spa */ +static struct acpi_nfit_memory_map *memdev_from_spa( + struct acpi_nfit_desc *acpi_desc, u16 range_index, int n) +{ + struct nfit_memdev *nfit_memdev; + + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) + if (nfit_memdev->memdev->range_index == range_index) + if (n-- == 0) + return nfit_memdev->memdev; + return NULL; +} + +static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, + struct nd_region_desc *ndr_desc, + struct acpi_nfit_system_address *spa) +{ + int i, spa_type = nfit_spa_type(spa); + struct device *dev = acpi_desc->dev; + struct nd_interleave_set *nd_set; + u16 nr = ndr_desc->num_mappings; + struct nfit_set_info *info; + + if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE) + /* pass */; + else + return 0; + + nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); + if (!nd_set) + return -ENOMEM; + + info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL); + if (!info) + return -ENOMEM; + for (i = 0; i < nr; i++) { + struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i]; + struct nfit_set_info_map *map = &info->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc, + spa->range_index, i); + + if (!memdev || !nfit_mem->dcr) { + dev_err(dev, "%s: failed to find DCR\n", __func__); + return -ENODEV; + } + + map->region_offset = memdev->region_offset; + map->serial_number = nfit_mem->dcr->serial_number; + } + + sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), + cmp_map, NULL); + nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); + ndr_desc->nd_set = nd_set; + devm_kfree(dev, info); + + return 0; +} + +static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio) +{ + struct acpi_nfit_interleave *idt = mmio->idt; + u32 sub_line_offset, line_index, line_offset; + u64 line_no, table_skip_count, table_offset; + + line_no = div_u64_rem(offset, mmio->line_size, &sub_line_offset); + table_skip_count = div_u64_rem(line_no, mmio->num_lines, &line_index); + line_offset = idt->line_offset[line_index] + * mmio->line_size; + table_offset = table_skip_count * mmio->table_size; + + return mmio->base_offset + line_offset + table_offset + sub_line_offset; +} + +static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) +{ + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; + u64 offset = nfit_blk->stat_offset + mmio->size * bw; + + if (mmio->num_lines) + offset = to_interleave_offset(offset, mmio); + + return readq(mmio->base + offset); +} + +static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, + resource_size_t dpa, unsigned int len, unsigned int write) +{ + u64 cmd, offset; + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; + + enum { + BCW_OFFSET_MASK = (1ULL << 48)-1, + BCW_LEN_SHIFT = 48, + BCW_LEN_MASK = (1ULL << 8) - 1, + BCW_CMD_SHIFT = 56, + }; + + cmd = (dpa >> L1_CACHE_SHIFT) & BCW_OFFSET_MASK; + len = len >> L1_CACHE_SHIFT; + cmd |= ((u64) len & BCW_LEN_MASK) << BCW_LEN_SHIFT; + cmd |= ((u64) write) << BCW_CMD_SHIFT; + + offset = nfit_blk->cmd_offset + mmio->size * bw; + if (mmio->num_lines) + offset = to_interleave_offset(offset, mmio); + + writeq(cmd, mmio->base + offset); + /* FIXME: conditionally perform read-back if mandated by firmware */ +} + +static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, + resource_size_t dpa, void *iobuf, size_t len, int rw, + unsigned int lane) +{ + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW]; + unsigned int copied = 0; + u64 base_offset; + int rc; + + base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES + + lane * mmio->size; + /* TODO: non-temporal access, flush hints, cache management etc... */ + write_blk_ctl(nfit_blk, lane, dpa, len, rw); + while (len) { + unsigned int c; + u64 offset; + + if (mmio->num_lines) { + u32 line_offset; + + offset = to_interleave_offset(base_offset + copied, + mmio); + div_u64_rem(offset, mmio->line_size, &line_offset); + c = min_t(size_t, len, mmio->line_size - line_offset); + } else { + offset = base_offset + nfit_blk->bdw_offset; + c = len; + } + + if (rw) + memcpy(mmio->aperture + offset, iobuf + copied, c); + else + memcpy(iobuf + copied, mmio->aperture + offset, c); + + copied += c; + len -= c; + } + rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; + return rc; +} + +static int acpi_nfit_blk_region_do_io(struct nd_blk_region *ndbr, + resource_size_t dpa, void *iobuf, u64 len, int rw) +{ + struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr); + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW]; + struct nd_region *nd_region = nfit_blk->nd_region; + unsigned int lane, copied = 0; + int rc = 0; + + lane = nd_region_acquire_lane(nd_region); + while (len) { + u64 c = min(len, mmio->size); + + rc = acpi_nfit_blk_single_io(nfit_blk, dpa + copied, + iobuf + copied, c, rw, lane); + if (rc) + break; + + copied += c; + len -= c; + } + nd_region_release_lane(nd_region, lane); + + return rc; +} + +static void nfit_spa_mapping_release(struct kref *kref) +{ + struct nfit_spa_mapping *spa_map = to_spa_map(kref); + struct acpi_nfit_system_address *spa = spa_map->spa; + struct acpi_nfit_desc *acpi_desc = spa_map->acpi_desc; + + WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); + dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index); + iounmap(spa_map->iomem); + release_mem_region(spa->address, spa->length); + list_del(&spa_map->list); + kfree(spa_map); +} + +static struct nfit_spa_mapping *find_spa_mapping( + struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + struct nfit_spa_mapping *spa_map; + + WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); + list_for_each_entry(spa_map, &acpi_desc->spa_maps, list) + if (spa_map->spa == spa) + return spa_map; + + return NULL; +} + +static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + struct nfit_spa_mapping *spa_map; + + mutex_lock(&acpi_desc->spa_map_mutex); + spa_map = find_spa_mapping(acpi_desc, spa); + + if (spa_map) + kref_put(&spa_map->kref, nfit_spa_mapping_release); + mutex_unlock(&acpi_desc->spa_map_mutex); +} + +static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + resource_size_t start = spa->address; + resource_size_t n = spa->length; + struct nfit_spa_mapping *spa_map; + struct resource *res; + + WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); + + spa_map = find_spa_mapping(acpi_desc, spa); + if (spa_map) { + kref_get(&spa_map->kref); + return spa_map->iomem; + } + + spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL); + if (!spa_map) + return NULL; + + INIT_LIST_HEAD(&spa_map->list); + spa_map->spa = spa; + kref_init(&spa_map->kref); + spa_map->acpi_desc = acpi_desc; + + res = request_mem_region(start, n, dev_name(acpi_desc->dev)); + if (!res) + goto err_mem; + + /* TODO: cacheability based on the spa type */ + spa_map->iomem = ioremap_nocache(start, n); + if (!spa_map->iomem) + goto err_map; + + list_add_tail(&spa_map->list, &acpi_desc->spa_maps); + return spa_map->iomem; + + err_map: + release_mem_region(start, n); + err_mem: + kfree(spa_map); + return NULL; +} + +/** + * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges + * @nvdimm_bus: NFIT-bus that provided the spa table entry + * @nfit_spa: spa table to map + * + * In the case where block-data-window apertures and + * dimm-control-regions are interleaved they will end up sharing a + * single request_mem_region() + ioremap() for the address range. In + * the style of devm nfit_spa_map() mappings are automatically dropped + * when all region devices referencing the same mapping are disabled / + * unbound. + */ +static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_system_address *spa) +{ + void __iomem *iomem; + + mutex_lock(&acpi_desc->spa_map_mutex); + iomem = __nfit_spa_map(acpi_desc, spa); + mutex_unlock(&acpi_desc->spa_map_mutex); + + return iomem; +} + +static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio, + struct acpi_nfit_interleave *idt, u16 interleave_ways) +{ + if (idt) { + mmio->num_lines = idt->line_count; + mmio->line_size = idt->line_size; + if (interleave_ways == 0) + return -ENXIO; + mmio->table_size = mmio->num_lines * interleave_ways + * mmio->line_size; + } + + return 0; +} + +static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, + struct device *dev) +{ + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nfit_blk_mmio *mmio; + struct nfit_blk *nfit_blk; + struct nfit_mem *nfit_mem; + struct nvdimm *nvdimm; + int rc; + + nvdimm = nd_blk_region_to_dimm(ndbr); + nfit_mem = nvdimm_provider_data(nvdimm); + if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { + dev_dbg(dev, "%s: missing%s%s%s\n", __func__, + nfit_mem ? "" : " nfit_mem", + nfit_mem->dcr ? "" : " dcr", + nfit_mem->bdw ? "" : " bdw"); + return -ENXIO; + } + + nfit_blk = devm_kzalloc(dev, sizeof(*nfit_blk), GFP_KERNEL); + if (!nfit_blk) + return -ENOMEM; + nd_blk_region_set_provider_data(ndbr, nfit_blk); + nfit_blk->nd_region = to_nd_region(dev); + + /* map block aperture memory */ + nfit_blk->bdw_offset = nfit_mem->bdw->offset; + mmio = &nfit_blk->mmio[BDW]; + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw); + if (!mmio->base) { + dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, + nvdimm_name(nvdimm)); + return -ENOMEM; + } + mmio->size = nfit_mem->bdw->size; + mmio->base_offset = nfit_mem->memdev_bdw->region_offset; + mmio->idt = nfit_mem->idt_bdw; + mmio->spa = nfit_mem->spa_bdw; + rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw, + nfit_mem->memdev_bdw->interleave_ways); + if (rc) { + dev_dbg(dev, "%s: %s failed to init bdw interleave\n", + __func__, nvdimm_name(nvdimm)); + return rc; + } + + /* map block control memory */ + nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; + nfit_blk->stat_offset = nfit_mem->dcr->status_offset; + mmio = &nfit_blk->mmio[DCR]; + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr); + if (!mmio->base) { + dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, + nvdimm_name(nvdimm)); + return -ENOMEM; + } + mmio->size = nfit_mem->dcr->window_size; + mmio->base_offset = nfit_mem->memdev_dcr->region_offset; + mmio->idt = nfit_mem->idt_dcr; + mmio->spa = nfit_mem->spa_dcr; + rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr, + nfit_mem->memdev_dcr->interleave_ways); + if (rc) { + dev_dbg(dev, "%s: %s failed to init dcr interleave\n", + __func__, nvdimm_name(nvdimm)); + return rc; + } + + if (mmio->line_size == 0) + return 0; + + if ((u32) nfit_blk->cmd_offset % mmio->line_size + + 8 > mmio->line_size) { + dev_dbg(dev, "cmd_offset crosses interleave boundary\n"); + return -ENXIO; + } else if ((u32) nfit_blk->stat_offset % mmio->line_size + + 8 > mmio->line_size) { + dev_dbg(dev, "stat_offset crosses interleave boundary\n"); + return -ENXIO; + } + + return 0; +} + +static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus, + struct device *dev) +{ + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nfit_blk *nfit_blk = nd_blk_region_provider_data(ndbr); + int i; + + if (!nfit_blk) + return; /* never enabled */ + + /* auto-free BLK spa mappings */ + for (i = 0; i < 2; i++) { + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i]; + + if (mmio->base) + nfit_spa_unmap(acpi_desc, mmio->spa); + } + nd_blk_region_set_provider_data(ndbr, NULL); + /* devm will free nfit_blk */ +} + +static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, + struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc, + struct acpi_nfit_memory_map *memdev, + struct acpi_nfit_system_address *spa) +{ + struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, + memdev->device_handle); + struct nd_blk_region_desc *ndbr_desc; + struct nfit_mem *nfit_mem; + int blk_valid = 0; + + if (!nvdimm) { + dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n", + spa->range_index, memdev->device_handle); + return -ENODEV; + } + + nd_mapping->nvdimm = nvdimm; + switch (nfit_spa_type(spa)) { + case NFIT_SPA_PM: + case NFIT_SPA_VOLATILE: + nd_mapping->start = memdev->address; + nd_mapping->size = memdev->region_size; + break; + case NFIT_SPA_DCR: + nfit_mem = nvdimm_provider_data(nvdimm); + if (!nfit_mem || !nfit_mem->bdw) { + dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n", + spa->range_index, nvdimm_name(nvdimm)); + } else { + nd_mapping->size = nfit_mem->bdw->capacity; + nd_mapping->start = nfit_mem->bdw->start_address; + ndr_desc->num_lanes = nfit_mem->bdw->windows; + blk_valid = 1; + } + + ndr_desc->nd_mapping = nd_mapping; + ndr_desc->num_mappings = blk_valid; + ndbr_desc = to_blk_region_desc(ndr_desc); + ndbr_desc->enable = acpi_nfit_blk_region_enable; + ndbr_desc->disable = acpi_nfit_blk_region_disable; + ndbr_desc->do_io = acpi_desc->blk_do_io; + if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc)) + return -ENOMEM; + break; + } + + return 0; +} + +static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) +{ + static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS]; + struct acpi_nfit_system_address *spa = nfit_spa->spa; + struct nd_blk_region_desc ndbr_desc; + struct nd_region_desc *ndr_desc; + struct nfit_memdev *nfit_memdev; + struct nvdimm_bus *nvdimm_bus; + struct resource res; + int count = 0, rc; + + if (spa->range_index == 0) { + dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n", + __func__); + return 0; + } + + memset(&res, 0, sizeof(res)); + memset(&nd_mappings, 0, sizeof(nd_mappings)); + memset(&ndbr_desc, 0, sizeof(ndbr_desc)); + res.start = spa->address; + res.end = res.start + spa->length - 1; + ndr_desc = &ndbr_desc.ndr_desc; + ndr_desc->res = &res; + ndr_desc->provider_data = nfit_spa; + ndr_desc->attr_groups = acpi_nfit_region_attribute_groups; + if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) + ndr_desc->numa_node = acpi_map_pxm_to_online_node( + spa->proximity_domain); + else + ndr_desc->numa_node = NUMA_NO_NODE; + + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { + struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; + struct nd_mapping *nd_mapping; + + if (memdev->range_index != spa->range_index) + continue; + if (count >= ND_MAX_MAPPINGS) { + dev_err(acpi_desc->dev, "spa%d exceeds max mappings %d\n", + spa->range_index, ND_MAX_MAPPINGS); + return -ENXIO; + } + nd_mapping = &nd_mappings[count++]; + rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc, + memdev, spa); + if (rc) + return rc; + } + + ndr_desc->nd_mapping = nd_mappings; + ndr_desc->num_mappings = count; + rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa); + if (rc) + return rc; + + nvdimm_bus = acpi_desc->nvdimm_bus; + if (nfit_spa_type(spa) == NFIT_SPA_PM) { + if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc)) + return -ENOMEM; + } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { + if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc)) + return -ENOMEM; + } + return 0; +} + +static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) +{ + struct nfit_spa *nfit_spa; + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + int rc = acpi_nfit_register_region(acpi_desc, nfit_spa); + + if (rc) + return rc; + } + return 0; +} + +int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) +{ + struct device *dev = acpi_desc->dev; + const void *end; + u8 *data; + int rc; + + INIT_LIST_HEAD(&acpi_desc->spa_maps); + INIT_LIST_HEAD(&acpi_desc->spas); + INIT_LIST_HEAD(&acpi_desc->dcrs); + INIT_LIST_HEAD(&acpi_desc->bdws); + INIT_LIST_HEAD(&acpi_desc->idts); + INIT_LIST_HEAD(&acpi_desc->memdevs); + INIT_LIST_HEAD(&acpi_desc->dimms); + mutex_init(&acpi_desc->spa_map_mutex); + + data = (u8 *) acpi_desc->nfit; + end = data + sz; + data += sizeof(struct acpi_table_nfit); + while (!IS_ERR_OR_NULL(data)) + data = add_table(acpi_desc, data, end); + + if (IS_ERR(data)) { + dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__, + PTR_ERR(data)); + return PTR_ERR(data); + } + + if (nfit_mem_init(acpi_desc) != 0) + return -ENOMEM; + + acpi_nfit_init_dsms(acpi_desc); + + rc = acpi_nfit_register_dimms(acpi_desc); + if (rc) + return rc; + + return acpi_nfit_register_regions(acpi_desc); +} +EXPORT_SYMBOL_GPL(acpi_nfit_init); + +static int acpi_nfit_add(struct acpi_device *adev) +{ + struct nvdimm_bus_descriptor *nd_desc; + struct acpi_nfit_desc *acpi_desc; + struct device *dev = &adev->dev; + struct acpi_table_header *tbl; + acpi_status status = AE_OK; + acpi_size sz; + int rc; + + status = acpi_get_table_with_size("NFIT", 0, &tbl, &sz); + if (ACPI_FAILURE(status)) { + dev_err(dev, "failed to find NFIT\n"); + return -ENXIO; + } + + acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); + if (!acpi_desc) + return -ENOMEM; + + dev_set_drvdata(dev, acpi_desc); + acpi_desc->dev = dev; + acpi_desc->nfit = (struct acpi_table_nfit *) tbl; + acpi_desc->blk_do_io = acpi_nfit_blk_region_do_io; + nd_desc = &acpi_desc->nd_desc; + nd_desc->provider_name = "ACPI.NFIT"; + nd_desc->ndctl = acpi_nfit_ctl; + nd_desc->attr_groups = acpi_nfit_attribute_groups; + + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc); + if (!acpi_desc->nvdimm_bus) + return -ENXIO; + + rc = acpi_nfit_init(acpi_desc, sz); + if (rc) { + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + return rc; + } + return 0; +} + +static int acpi_nfit_remove(struct acpi_device *adev) +{ + struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); + + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + return 0; +} + +static const struct acpi_device_id acpi_nfit_ids[] = { + { "ACPI0012", 0 }, + { "", 0 }, +}; +MODULE_DEVICE_TABLE(acpi, acpi_nfit_ids); + +static struct acpi_driver acpi_nfit_driver = { + .name = KBUILD_MODNAME, + .ids = acpi_nfit_ids, + .ops = { + .add = acpi_nfit_add, + .remove = acpi_nfit_remove, + }, +}; + +static __init int nfit_init(void) +{ + BUILD_BUG_ON(sizeof(struct acpi_table_nfit) != 40); + BUILD_BUG_ON(sizeof(struct acpi_nfit_system_address) != 56); + BUILD_BUG_ON(sizeof(struct acpi_nfit_memory_map) != 48); + BUILD_BUG_ON(sizeof(struct acpi_nfit_interleave) != 20); + BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9); + BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80); + BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40); + + acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]); + acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]); + acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]); + acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]); + acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]); + acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]); + acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]); + acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]); + acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]); + acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); + + return acpi_bus_register_driver(&acpi_nfit_driver); +} + +static __exit void nfit_exit(void) +{ + acpi_bus_unregister_driver(&acpi_nfit_driver); +} + +module_init(nfit_init); +module_exit(nfit_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h new file mode 100644 index 000000000000..81f2e8c5a79c --- /dev/null +++ b/drivers/acpi/nfit.h @@ -0,0 +1,158 @@ +/* + * NVDIMM Firmware Interface Table - NFIT + * + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __NFIT_H__ +#define __NFIT_H__ +#include <linux/libnvdimm.h> +#include <linux/types.h> +#include <linux/uuid.h> +#include <linux/acpi.h> +#include <acpi/acuuid.h> + +#define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba" +#define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66" +#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \ + | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ + | ACPI_NFIT_MEM_ARMED) + +enum nfit_uuids { + NFIT_SPA_VOLATILE, + NFIT_SPA_PM, + NFIT_SPA_DCR, + NFIT_SPA_BDW, + NFIT_SPA_VDISK, + NFIT_SPA_VCD, + NFIT_SPA_PDISK, + NFIT_SPA_PCD, + NFIT_DEV_BUS, + NFIT_DEV_DIMM, + NFIT_UUID_MAX, +}; + +struct nfit_spa { + struct acpi_nfit_system_address *spa; + struct list_head list; +}; + +struct nfit_dcr { + struct acpi_nfit_control_region *dcr; + struct list_head list; +}; + +struct nfit_bdw { + struct acpi_nfit_data_region *bdw; + struct list_head list; +}; + +struct nfit_idt { + struct acpi_nfit_interleave *idt; + struct list_head list; +}; + +struct nfit_memdev { + struct acpi_nfit_memory_map *memdev; + struct list_head list; +}; + +/* assembled tables for a given dimm/memory-device */ +struct nfit_mem { + struct nvdimm *nvdimm; + struct acpi_nfit_memory_map *memdev_dcr; + struct acpi_nfit_memory_map *memdev_pmem; + struct acpi_nfit_memory_map *memdev_bdw; + struct acpi_nfit_control_region *dcr; + struct acpi_nfit_data_region *bdw; + struct acpi_nfit_system_address *spa_dcr; + struct acpi_nfit_system_address *spa_bdw; + struct acpi_nfit_interleave *idt_dcr; + struct acpi_nfit_interleave *idt_bdw; + struct list_head list; + struct acpi_device *adev; + unsigned long dsm_mask; +}; + +struct acpi_nfit_desc { + struct nvdimm_bus_descriptor nd_desc; + struct acpi_table_nfit *nfit; + struct mutex spa_map_mutex; + struct list_head spa_maps; + struct list_head memdevs; + struct list_head dimms; + struct list_head spas; + struct list_head dcrs; + struct list_head bdws; + struct list_head idts; + struct nvdimm_bus *nvdimm_bus; + struct device *dev; + unsigned long dimm_dsm_force_en; + int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw); +}; + +enum nd_blk_mmio_selector { + BDW, + DCR, +}; + +struct nfit_blk { + struct nfit_blk_mmio { + union { + void __iomem *base; + void *aperture; + }; + u64 size; + u64 base_offset; + u32 line_size; + u32 num_lines; + u32 table_size; + struct acpi_nfit_interleave *idt; + struct acpi_nfit_system_address *spa; + } mmio[2]; + struct nd_region *nd_region; + u64 bdw_offset; /* post interleave offset */ + u64 stat_offset; + u64 cmd_offset; +}; + +struct nfit_spa_mapping { + struct acpi_nfit_desc *acpi_desc; + struct acpi_nfit_system_address *spa; + struct list_head list; + struct kref kref; + void __iomem *iomem; +}; + +static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref) +{ + return container_of(kref, struct nfit_spa_mapping, kref); +} + +static inline struct acpi_nfit_memory_map *__to_nfit_memdev( + struct nfit_mem *nfit_mem) +{ + if (nfit_mem->memdev_dcr) + return nfit_mem->memdev_dcr; + return nfit_mem->memdev_pmem; +} + +static inline struct acpi_nfit_desc *to_acpi_desc( + struct nvdimm_bus_descriptor *nd_desc) +{ + return container_of(nd_desc, struct acpi_nfit_desc, nd_desc); +} + +const u8 *to_nfit_uuid(enum nfit_uuids id); +int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz); +extern const struct attribute_group *acpi_nfit_attribute_groups[]; +#endif /* __NFIT_H__ */ diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 1333cbdc3ea2..acaa3b4ea504 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -29,6 +29,8 @@ #include <linux/errno.h> #include <linux/acpi.h> #include <linux/numa.h> +#include <linux/nodemask.h> +#include <linux/topology.h> #define PREFIX "ACPI: " @@ -70,7 +72,12 @@ static void __acpi_map_pxm_to_node(int pxm, int node) int acpi_map_pxm_to_node(int pxm) { - int node = pxm_to_node_map[pxm]; + int node; + + if (pxm < 0 || pxm >= MAX_PXM_DOMAINS) + return NUMA_NO_NODE; + + node = pxm_to_node_map[pxm]; if (node == NUMA_NO_NODE) { if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) @@ -83,6 +90,45 @@ int acpi_map_pxm_to_node(int pxm) return node; } +/** + * acpi_map_pxm_to_online_node - Map proximity ID to online node + * @pxm: ACPI proximity ID + * + * This is similar to acpi_map_pxm_to_node(), but always returns an online + * node. When the mapped node from a given proximity ID is offline, it + * looks up the node distance table and returns the nearest online node. + * + * ACPI device drivers, which are called after the NUMA initialization has + * completed in the kernel, can call this interface to obtain their device + * NUMA topology from ACPI tables. Such drivers do not have to deal with + * offline nodes. A node may be offline when a device proximity ID is + * unique, SRAT memory entry does not exist, or NUMA is disabled, ex. + * "numa=off" on x86. + */ +int acpi_map_pxm_to_online_node(int pxm) +{ + int node, n, dist, min_dist; + + node = acpi_map_pxm_to_node(pxm); + + if (node == NUMA_NO_NODE) + node = 0; + + if (!node_online(node)) { + min_dist = INT_MAX; + for_each_online_node(n) { + dist = node_distance(node, n); + if (dist < min_dist) { + min_dist = dist; + node = n; + } + } + } + + return node; +} +EXPORT_SYMBOL(acpi_map_pxm_to_online_node); + static void __init acpi_table_print_srat_entry(struct acpi_subtable_header *header) { @@ -328,8 +374,6 @@ int acpi_get_node(acpi_handle handle) int pxm; pxm = acpi_get_pxm(handle); - if (pxm < 0 || pxm >= MAX_PXM_DOMAINS) - return NUMA_NO_NODE; return acpi_map_pxm_to_node(pxm); } diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 3ccef9eba6f9..1b8094d4d7af 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -404,18 +404,6 @@ config BLK_DEV_RAM_DAX and will prevent RAM block device backing store memory from being allocated from highmem (only a problem for highmem systems). -config BLK_DEV_PMEM - tristate "Persistent memory block device support" - depends on HAS_IOMEM - help - Saying Y here will allow you to use a contiguous range of reserved - memory as one or more persistent block devices. - - To compile this driver as a module, choose M here: the module will be - called 'pmem'. - - If unsure, say N. - config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media" depends on !UML diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 9cc6c18a1c7e..02b688d1438d 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -14,7 +14,6 @@ obj-$(CONFIG_PS3_VRAM) += ps3vram.o obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o -obj-$(CONFIG_BLK_DEV_PMEM) += pmem.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e5112714188f..34338d7438f5 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -193,6 +193,13 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, return 0; } +static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + nvmeq->tags = NULL; +} + static int nvme_admin_init_request(void *data, struct request *req, unsigned int hctx_idx, unsigned int rq_idx, unsigned int numa_node) @@ -606,7 +613,10 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, return; } if (req->cmd_type == REQ_TYPE_DRV_PRIV) { - req->errors = status; + if (cmd_rq->ctx == CMD_CTX_CANCELLED) + req->errors = -EINTR; + else + req->errors = status; } else { req->errors = nvme_error_status(status); } @@ -1161,12 +1171,13 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id) { - struct nvme_command c = { - .identify.opcode = nvme_admin_identify, - .identify.cns = cpu_to_le32(1), - }; + struct nvme_command c = { }; int error; + /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ + c.identify.opcode = nvme_admin_identify; + c.identify.cns = cpu_to_le32(1); + *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); if (!*id) return -ENOMEM; @@ -1181,12 +1192,13 @@ int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id) int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, struct nvme_id_ns **id) { - struct nvme_command c = { - .identify.opcode = nvme_admin_identify, - .identify.nsid = cpu_to_le32(nsid), - }; + struct nvme_command c = { }; int error; + /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ + c.identify.opcode = nvme_admin_identify, + c.identify.nsid = cpu_to_le32(nsid), + *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); if (!*id) return -ENOMEM; @@ -1230,14 +1242,14 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log) { - struct nvme_command c = { - .common.opcode = nvme_admin_get_log_page, - .common.nsid = cpu_to_le32(0xFFFFFFFF), - .common.cdw10[0] = cpu_to_le32( + struct nvme_command c = { }; + int error; + + c.common.opcode = nvme_admin_get_log_page, + c.common.nsid = cpu_to_le32(0xFFFFFFFF), + c.common.cdw10[0] = cpu_to_le32( (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) | NVME_LOG_SMART), - }; - int error; *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL); if (!*log) @@ -1606,6 +1618,7 @@ static struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .map_queue = blk_mq_map_queue, .init_hctx = nvme_admin_init_hctx, + .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_admin_init_request, .timeout = nvme_timeout, }; @@ -1648,6 +1661,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) } if (!blk_get_queue(dev->admin_q)) { nvme_dev_remove_admin(dev); + dev->admin_q = NULL; return -ENODEV; } } else @@ -2349,19 +2363,20 @@ static int nvme_dev_add(struct nvme_dev *dev) } kfree(ctrl); - dev->tagset.ops = &nvme_mq_ops; - dev->tagset.nr_hw_queues = dev->online_queues - 1; - dev->tagset.timeout = NVME_IO_TIMEOUT; - dev->tagset.numa_node = dev_to_node(dev->dev); - dev->tagset.queue_depth = + if (!dev->tagset.tags) { + dev->tagset.ops = &nvme_mq_ops; + dev->tagset.nr_hw_queues = dev->online_queues - 1; + dev->tagset.timeout = NVME_IO_TIMEOUT; + dev->tagset.numa_node = dev_to_node(dev->dev); + dev->tagset.queue_depth = min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; - dev->tagset.cmd_size = nvme_cmd_size(dev); - dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; - dev->tagset.driver_data = dev; - - if (blk_mq_alloc_tag_set(&dev->tagset)) - return 0; + dev->tagset.cmd_size = nvme_cmd_size(dev); + dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; + dev->tagset.driver_data = dev; + if (blk_mq_alloc_tag_set(&dev->tagset)) + return 0; + } schedule_work(&dev->scan_work); return 0; } @@ -2734,8 +2749,10 @@ static void nvme_free_dev(struct kref *kref) put_device(dev->device); nvme_free_namespaces(dev); nvme_release_instance(dev); - blk_mq_free_tag_set(&dev->tagset); - blk_put_queue(dev->admin_q); + if (dev->tagset.tags) + blk_mq_free_tag_set(&dev->tagset); + if (dev->admin_q) + blk_put_queue(dev->admin_q); kfree(dev->queues); kfree(dev->entry); kfree(dev); @@ -2866,6 +2883,9 @@ static int nvme_dev_start(struct nvme_dev *dev) free_tags: nvme_dev_remove_admin(dev); + blk_put_queue(dev->admin_q); + dev->admin_q = NULL; + dev->queues[0]->tags = NULL; disable: nvme_disable_queue(dev, 0); nvme_dev_list_remove(dev); @@ -2907,25 +2927,43 @@ static int nvme_dev_resume(struct nvme_dev *dev) spin_unlock(&dev_list_lock); } else { nvme_unfreeze_queues(dev); - schedule_work(&dev->scan_work); + nvme_dev_add(dev); nvme_set_irq_hints(dev); } return 0; } +static void nvme_dead_ctrl(struct nvme_dev *dev) +{ + dev_warn(dev->dev, "Device failed to resume\n"); + kref_get(&dev->kref); + if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", + dev->instance))) { + dev_err(dev->dev, + "Failed to start controller remove task\n"); + kref_put(&dev->kref, nvme_free_dev); + } +} + static void nvme_dev_reset(struct nvme_dev *dev) { + bool in_probe = work_busy(&dev->probe_work); + nvme_dev_shutdown(dev); - if (nvme_dev_resume(dev)) { - dev_warn(dev->dev, "Device failed to resume\n"); - kref_get(&dev->kref); - if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", - dev->instance))) { - dev_err(dev->dev, - "Failed to start controller remove task\n"); - kref_put(&dev->kref, nvme_free_dev); - } + + /* Synchronize with device probe so that work will see failure status + * and exit gracefully without trying to schedule another reset */ + flush_work(&dev->probe_work); + + /* Fail this device if reset occured during probe to avoid + * infinite initialization loops. */ + if (in_probe) { + nvme_dead_ctrl(dev); + return; } + /* Schedule device resume asynchronously so the reset work is available + * to cleanup errors that may occur during reinitialization */ + schedule_work(&dev->probe_work); } static void nvme_reset_failed_dev(struct work_struct *ws) @@ -2957,6 +2995,7 @@ static int nvme_reset(struct nvme_dev *dev) if (!ret) { flush_work(&dev->reset_work); + flush_work(&dev->probe_work); return 0; } @@ -3053,26 +3092,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) static void nvme_async_probe(struct work_struct *work) { struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work); - int result; - result = nvme_dev_start(dev); - if (result) - goto reset; - - if (dev->online_queues > 1) - result = nvme_dev_add(dev); - if (result) - goto reset; - - nvme_set_irq_hints(dev); - return; - reset: - spin_lock(&dev_list_lock); - if (!work_busy(&dev->reset_work)) { - dev->reset_workfn = nvme_reset_failed_dev; - queue_work(nvme_workq, &dev->reset_work); - } - spin_unlock(&dev_list_lock); + if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work)) + nvme_dead_ctrl(dev); } static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) @@ -3104,8 +3126,8 @@ static void nvme_remove(struct pci_dev *pdev) flush_work(&dev->reset_work); flush_work(&dev->scan_work); device_remove_file(dev->device, &dev_attr_reset_controller); - nvme_dev_shutdown(dev); nvme_dev_remove(dev); + nvme_dev_shutdown(dev); nvme_dev_remove_admin(dev); device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); nvme_free_queues(dev, 0); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 713fc9ff1149..2126842fb6e8 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -84,6 +84,13 @@ MODULE_PARM_DESC(max_persistent_grants, "Maximum number of grants to map persistently"); /* + * Maximum order of pages to be used for the shared ring between front and + * backend, 4KB page granularity is used. + */ +unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER; +module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); +MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); +/* * The LRU mechanism to clean the lists of persistent grants needs to * be executed periodically. The time interval between consecutive executions * of the purge mechanism is set in ms. @@ -1438,6 +1445,12 @@ static int __init xen_blkif_init(void) if (!xen_domain()) return -ENODEV; + if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) { + pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", + xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER); + xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER; + } + rc = xen_blkif_interface_init(); if (rc) goto failed_init; diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index f620b5d3f77c..8ccc49d01c8e 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -44,6 +44,7 @@ #include <xen/interface/io/blkif.h> #include <xen/interface/io/protocols.h> +extern unsigned int xen_blkif_max_ring_order; /* * This is the maximum number of segments that would be allowed in indirect * requests. This value will also be passed to the frontend. @@ -248,7 +249,7 @@ struct backend_info; #define PERSISTENT_GNT_WAS_ACTIVE 1 /* Number of requests that we can fit in a ring */ -#define XEN_BLKIF_REQS 32 +#define XEN_BLKIF_REQS_PER_PAGE 32 struct persistent_gnt { struct page *page; @@ -320,6 +321,7 @@ struct xen_blkif { struct work_struct free_work; /* Thread shutdown wait queue. */ wait_queue_head_t shutdown_wq; + unsigned int nr_ring_pages; }; struct seg_buf { diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 6ab69ad61ee1..deb3f001791f 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -25,6 +25,7 @@ /* Enlarge the array size in order to fully show blkback name. */ #define BLKBACK_NAME_LEN (20) +#define RINGREF_NAME_LEN (20) struct backend_info { struct xenbus_device *dev; @@ -124,8 +125,6 @@ static void xen_update_blkif_status(struct xen_blkif *blkif) static struct xen_blkif *xen_blkif_alloc(domid_t domid) { struct xen_blkif *blkif; - struct pending_req *req, *n; - int i, j; BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); @@ -151,55 +150,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) INIT_LIST_HEAD(&blkif->pending_free); INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); - - for (i = 0; i < XEN_BLKIF_REQS; i++) { - req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) - goto fail; - list_add_tail(&req->free_list, - &blkif->pending_free); - for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { - req->segments[j] = kzalloc(sizeof(*req->segments[0]), - GFP_KERNEL); - if (!req->segments[j]) - goto fail; - } - for (j = 0; j < MAX_INDIRECT_PAGES; j++) { - req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]), - GFP_KERNEL); - if (!req->indirect_pages[j]) - goto fail; - } - } spin_lock_init(&blkif->pending_free_lock); init_waitqueue_head(&blkif->pending_free_wq); init_waitqueue_head(&blkif->shutdown_wq); return blkif; - -fail: - list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { - list_del(&req->free_list); - for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { - if (!req->segments[j]) - break; - kfree(req->segments[j]); - } - for (j = 0; j < MAX_INDIRECT_PAGES; j++) { - if (!req->indirect_pages[j]) - break; - kfree(req->indirect_pages[j]); - } - kfree(req); - } - - kmem_cache_free(xen_blkif_cachep, blkif); - - return ERR_PTR(-ENOMEM); } -static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, - unsigned int evtchn) +static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, + unsigned int nr_grefs, unsigned int evtchn) { int err; @@ -207,7 +166,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, if (blkif->irq) return 0; - err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1, + err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs, &blkif->blk_ring); if (err < 0) return err; @@ -217,21 +176,21 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref, { struct blkif_sring *sring; sring = (struct blkif_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs); break; } case BLKIF_PROTOCOL_X86_32: { struct blkif_x86_32_sring *sring_x86_32; sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs); break; } case BLKIF_PROTOCOL_X86_64: { struct blkif_x86_64_sring *sring_x86_64; sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs); break; } default: @@ -312,7 +271,7 @@ static void xen_blkif_free(struct xen_blkif *blkif) i++; } - WARN_ON(i != XEN_BLKIF_REQS); + WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); kmem_cache_free(xen_blkif_cachep, blkif); } @@ -597,6 +556,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev, if (err) goto fail; + err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u", + xen_blkif_max_ring_order); + if (err) + pr_warn("%s write out 'max-ring-page-order' failed\n", __func__); + err = xenbus_switch_state(dev, XenbusStateInitWait); if (err) goto fail; @@ -860,22 +824,66 @@ again: static int connect_ring(struct backend_info *be) { struct xenbus_device *dev = be->dev; - unsigned long ring_ref; - unsigned int evtchn; + unsigned int ring_ref[XENBUS_MAX_RING_PAGES]; + unsigned int evtchn, nr_grefs, ring_page_order; unsigned int pers_grants; char protocol[64] = ""; - int err; + struct pending_req *req, *n; + int err, i, j; pr_debug("%s %s\n", __func__, dev->otherend); - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", - &ring_ref, "event-channel", "%u", &evtchn, NULL); - if (err) { - xenbus_dev_fatal(dev, err, - "reading %s/ring-ref and event-channel", + err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u", + &evtchn); + if (err != 1) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "reading %s/event-channel", dev->otherend); return err; } + pr_info("event-channel %u\n", evtchn); + + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", + &ring_page_order); + if (err != 1) { + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", + "%u", &ring_ref[0]); + if (err != 1) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "reading %s/ring-ref", + dev->otherend); + return err; + } + nr_grefs = 1; + pr_info("%s:using single page: ring-ref %d\n", dev->otherend, + ring_ref[0]); + } else { + unsigned int i; + + if (ring_page_order > xen_blkif_max_ring_order) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d", + dev->otherend, ring_page_order, + xen_blkif_max_ring_order); + return err; + } + + nr_grefs = 1 << ring_page_order; + for (i = 0; i < nr_grefs; i++) { + char ring_ref_name[RINGREF_NAME_LEN]; + + snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); + err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name, + "%u", &ring_ref[i]); + if (err != 1) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, "reading %s/%s", + dev->otherend, ring_ref_name); + return err; + } + pr_info("ring-ref%u: %u\n", i, ring_ref[i]); + } + } be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", @@ -900,20 +908,55 @@ static int connect_ring(struct backend_info *be) be->blkif->vbd.feature_gnt_persistent = pers_grants; be->blkif->vbd.overflow_max_grants = 0; + be->blkif->nr_ring_pages = nr_grefs; - pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol, + pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n", + nr_grefs, evtchn, be->blkif->blk_protocol, protocol, pers_grants ? "persistent grants" : ""); + for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) { + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + goto fail; + list_add_tail(&req->free_list, &be->blkif->pending_free); + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { + req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL); + if (!req->segments[j]) + goto fail; + } + for (j = 0; j < MAX_INDIRECT_PAGES; j++) { + req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]), + GFP_KERNEL); + if (!req->indirect_pages[j]) + goto fail; + } + } + /* Map the shared frame, irq etc. */ - err = xen_blkif_map(be->blkif, ring_ref, evtchn); + err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn); if (err) { - xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", - ring_ref, evtchn); + xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn); return err; } return 0; + +fail: + list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) { + list_del(&req->free_list); + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { + if (!req->segments[j]) + break; + kfree(req->segments[j]); + } + for (j = 0; j < MAX_INDIRECT_PAGES; j++) { + if (!req->indirect_pages[j]) + break; + kfree(req->indirect_pages[j]); + } + kfree(req); + } + return -ENOMEM; } static const struct xenbus_device_id xen_blkbk_ids[] = { diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2c61cf8c6f61..fc770b7d3beb 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -98,7 +98,21 @@ static unsigned int xen_blkif_max_segments = 32; module_param_named(max, xen_blkif_max_segments, int, S_IRUGO); MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)"); -#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) +/* + * Maximum order of pages to be used for the shared ring between front and + * backend, 4KB page granularity is used. + */ +static unsigned int xen_blkif_max_ring_order; +module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO); +MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); + +#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages) +#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES) +/* + * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19 + * characters are enough. Define to 20 to keep consist with backend. + */ +#define RINGREF_NAME_LEN (20) /* * We have one of these per vbd, whether ide, scsi or 'other'. They @@ -114,13 +128,14 @@ struct blkfront_info int vdevice; blkif_vdev_t handle; enum blkif_state connected; - int ring_ref; + int ring_ref[XENBUS_MAX_RING_PAGES]; + unsigned int nr_ring_pages; struct blkif_front_ring ring; unsigned int evtchn, irq; struct request_queue *rq; struct work_struct work; struct gnttab_free_callback callback; - struct blk_shadow shadow[BLK_RING_SIZE]; + struct blk_shadow shadow[BLK_MAX_RING_SIZE]; struct list_head grants; struct list_head indirect_pages; unsigned int persistent_gnts_c; @@ -139,8 +154,6 @@ static unsigned int nr_minors; static unsigned long *minors; static DEFINE_SPINLOCK(minor_lock); -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) #define GRANT_INVALID_REF 0 #define PARTS_PER_DISK 16 @@ -170,7 +183,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info); static int get_id_from_freelist(struct blkfront_info *info) { unsigned long free = info->shadow_free; - BUG_ON(free >= BLK_RING_SIZE); + BUG_ON(free >= BLK_RING_SIZE(info)); info->shadow_free = info->shadow[free].req.u.rw.id; info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ return free; @@ -983,7 +996,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) } } - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { /* * Clear persistent grants present in requests already * on the shared ring @@ -1033,12 +1046,15 @@ free_shadow: flush_work(&info->work); /* Free resources associated with old device channel. */ - if (info->ring_ref != GRANT_INVALID_REF) { - gnttab_end_foreign_access(info->ring_ref, 0, - (unsigned long)info->ring.sring); - info->ring_ref = GRANT_INVALID_REF; - info->ring.sring = NULL; + for (i = 0; i < info->nr_ring_pages; i++) { + if (info->ring_ref[i] != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref[i], 0, 0); + info->ring_ref[i] = GRANT_INVALID_REF; + } } + free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE)); + info->ring.sring = NULL; + if (info->irq) unbind_from_irqhandler(info->irq, info); info->evtchn = info->irq = 0; @@ -1157,7 +1173,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) * never have given to it (we stamp it up to BLK_RING_SIZE - * look in get_id_from_freelist. */ - if (id >= BLK_RING_SIZE) { + if (id >= BLK_RING_SIZE(info)) { WARN(1, "%s: response to %s has incorrect id (%ld)\n", info->gd->disk_name, op_name(bret->operation), id); /* We can't safely get the 'struct request' as @@ -1245,26 +1261,30 @@ static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info) { struct blkif_sring *sring; - grant_ref_t gref; - int err; + int err, i; + unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE; + grant_ref_t gref[XENBUS_MAX_RING_PAGES]; - info->ring_ref = GRANT_INVALID_REF; + for (i = 0; i < info->nr_ring_pages; i++) + info->ring_ref[i] = GRANT_INVALID_REF; - sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); + sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, + get_order(ring_size)); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; } SHARED_RING_INIT(sring); - FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + FRONT_RING_INIT(&info->ring, sring, ring_size); - err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref); + err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref); if (err < 0) { - free_page((unsigned long)sring); + free_pages((unsigned long)sring, get_order(ring_size)); info->ring.sring = NULL; goto fail; } - info->ring_ref = gref; + for (i = 0; i < info->nr_ring_pages; i++) + info->ring_ref[i] = gref[i]; err = xenbus_alloc_evtchn(dev, &info->evtchn); if (err) @@ -1292,7 +1312,18 @@ static int talk_to_blkback(struct xenbus_device *dev, { const char *message = NULL; struct xenbus_transaction xbt; - int err; + int err, i; + unsigned int max_page_order = 0; + unsigned int ring_page_order = 0; + + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "max-ring-page-order", "%u", &max_page_order); + if (err != 1) + info->nr_ring_pages = 1; + else { + ring_page_order = min(xen_blkif_max_ring_order, max_page_order); + info->nr_ring_pages = 1 << ring_page_order; + } /* Create shared ring, alloc event channel. */ err = setup_blkring(dev, info); @@ -1306,11 +1337,32 @@ again: goto destroy_blkring; } - err = xenbus_printf(xbt, dev->nodename, - "ring-ref", "%u", info->ring_ref); - if (err) { - message = "writing ring-ref"; - goto abort_transaction; + if (info->nr_ring_pages == 1) { + err = xenbus_printf(xbt, dev->nodename, + "ring-ref", "%u", info->ring_ref[0]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } else { + err = xenbus_printf(xbt, dev->nodename, + "ring-page-order", "%u", ring_page_order); + if (err) { + message = "writing ring-page-order"; + goto abort_transaction; + } + + for (i = 0; i < info->nr_ring_pages; i++) { + char ring_ref_name[RINGREF_NAME_LEN]; + + snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); + err = xenbus_printf(xbt, dev->nodename, ring_ref_name, + "%u", info->ring_ref[i]); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + } } err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u", info->evtchn); @@ -1338,6 +1390,9 @@ again: goto destroy_blkring; } + for (i = 0; i < BLK_RING_SIZE(info); i++) + info->shadow[i].req.u.rw.id = i+1; + info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; xenbus_switch_state(dev, XenbusStateInitialised); return 0; @@ -1361,7 +1416,7 @@ again: static int blkfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { - int err, vdevice, i; + int err, vdevice; struct blkfront_info *info; /* FIXME: Use dynamic device id if this is not set. */ @@ -1422,21 +1477,10 @@ static int blkfront_probe(struct xenbus_device *dev, info->connected = BLKIF_STATE_DISCONNECTED; INIT_WORK(&info->work, blkif_restart_queue); - for (i = 0; i < BLK_RING_SIZE; i++) - info->shadow[i].req.u.rw.id = i+1; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; - /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); - err = talk_to_blkback(dev, info); - if (err) { - kfree(info); - dev_set_drvdata(&dev->dev, NULL); - return err; - } - return 0; } @@ -1476,10 +1520,10 @@ static int blkif_recover(struct blkfront_info *info) /* Stage 2: Set up free list. */ memset(&info->shadow, 0, sizeof(info->shadow)); - for (i = 0; i < BLK_RING_SIZE; i++) + for (i = 0; i < BLK_RING_SIZE(info); i++) info->shadow[i].req.u.rw.id = i+1; info->shadow_free = info->ring.req_prod_pvt; - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; + info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; rc = blkfront_setup_indirect(info); if (rc) { @@ -1491,7 +1535,7 @@ static int blkif_recover(struct blkfront_info *info) blk_queue_max_segments(info->rq, segs); bio_list_init(&bio_list); INIT_LIST_HEAD(&requests); - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { /* Not in use? */ if (!copy[i].request) continue; @@ -1697,7 +1741,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) segs = info->max_indirect_segments; } - err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE); + err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info)); if (err) goto out_of_memory; @@ -1707,7 +1751,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) * grants, we need to allocate a set of pages that can be * used for mapping indirect grefs */ - int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE; + int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info); BUG_ON(!list_empty(&info->indirect_pages)); for (i = 0; i < num; i++) { @@ -1718,7 +1762,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) } } - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { info->shadow[i].grants_used = kzalloc( sizeof(info->shadow[i].grants_used[0]) * segs, GFP_NOIO); @@ -1740,7 +1784,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info) return 0; out_of_memory: - for (i = 0; i < BLK_RING_SIZE; i++) { + for (i = 0; i < BLK_RING_SIZE(info); i++) { kfree(info->shadow[i].grants_used); info->shadow[i].grants_used = NULL; kfree(info->shadow[i].sg); @@ -1906,8 +1950,15 @@ static void blkback_changed(struct xenbus_device *dev, dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); switch (backend_state) { - case XenbusStateInitialising: case XenbusStateInitWait: + if (dev->state != XenbusStateInitialising) + break; + if (talk_to_blkback(dev, info)) { + kfree(info); + dev_set_drvdata(&dev->dev, NULL); + break; + } + case XenbusStateInitialising: case XenbusStateInitialised: case XenbusStateReconfiguring: case XenbusStateReconfigured: @@ -2091,6 +2142,12 @@ static int __init xlblk_init(void) if (!xen_domain()) return -ENODEV; + if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) { + pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", + xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER); + xen_blkif_max_ring_order = 0; + } + if (!xen_has_pv_disk_devices()) return -ENODEV; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index bda2cb06dc7a..88d474b78076 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -162,6 +162,17 @@ config MX3_IPU_IRQS To avoid bloating the irq_desc[] array we allocate a sufficient number of IRQ slots and map them dynamically to specific sources. +config PXA_DMA + bool "PXA DMA support" + depends on (ARCH_MMP || ARCH_PXA) + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support the DMA engine for PXA. It is also compatible with MMP PDMA + platform. The internal DMA IP of all PXA variants is supported, with + 16 to 32 channels for peripheral to memory or memory to memory + transfers. + config TXX9_DMAC tristate "Toshiba TXx9 SoC DMA support" depends on MACH_TX49XX || MACH_TX39XX @@ -245,6 +256,9 @@ config TI_EDMA Enable support for the TI EDMA controller. This DMA engine is found on TI DaVinci and AM33xx parts. +config TI_DMA_CROSSBAR + bool + config ARCH_HAS_ASYNC_TX_FIND_CHANNEL bool @@ -330,6 +344,7 @@ config DMA_OMAP depends on ARCH_OMAP select DMA_ENGINE select DMA_VIRTUAL_CHANNELS + select TI_DMA_CROSSBAR if SOC_DRA7XX config DMA_BCM2835 tristate "BCM2835 DMA engine support" diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index 69f77d5ba53b..6a4d6f2827da 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/ obj-$(CONFIG_IMX_SDMA) += imx-sdma.o obj-$(CONFIG_IMX_DMA) += imx-dma.o obj-$(CONFIG_MXS_DMA) += mxs-dma.o +obj-$(CONFIG_PXA_DMA) += pxa_dma.o obj-$(CONFIG_TIMB_DMA) += timb_dma.o obj-$(CONFIG_SIRF_DMA) += sirf-dma.o obj-$(CONFIG_TI_EDMA) += edma.o @@ -38,6 +39,7 @@ obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o obj-$(CONFIG_DMA_OMAP) += omap-dma.o +obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 49d396ec06e5..5de3cf453f35 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -474,7 +474,7 @@ static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x, u32 val = readl(ch->reg_config); val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK | - PL080_CONFIG_TC_IRQ_MASK); + PL080_CONFIG_TC_IRQ_MASK); writel(val, ch->reg_config); diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 57b2141ddddc..59892126d175 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -247,6 +247,10 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) channel_writel(atchan, CTRLA, 0); channel_writel(atchan, CTRLB, 0); channel_writel(atchan, DSCR, first->txd.phys); + channel_writel(atchan, SPIP, ATC_SPIP_HOLE(first->src_hole) | + ATC_SPIP_BOUNDARY(first->boundary)); + channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) | + ATC_DPIP_BOUNDARY(first->boundary)); dma_writel(atdma, CHER, atchan->mask); vdbg_dump_regs(atchan); @@ -635,6 +639,104 @@ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) } /** + * atc_prep_dma_interleaved - prepare memory to memory interleaved operation + * @chan: the channel to prepare operation on + * @xt: Interleaved transfer template + * @flags: tx descriptor status flags + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_interleaved(struct dma_chan *chan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct data_chunk *first = xt->sgl; + struct at_desc *desc = NULL; + size_t xfer_count; + unsigned int dwidth; + u32 ctrla; + u32 ctrlb; + size_t len = 0; + int i; + + dev_info(chan2dev(chan), + "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n", + __func__, xt->src_start, xt->dst_start, xt->numf, + xt->frame_size, flags); + + if (unlikely(!xt || xt->numf != 1 || !xt->frame_size)) + return NULL; + + /* + * The controller can only "skip" X bytes every Y bytes, so we + * need to make sure we are given a template that fit that + * description, ie a template with chunks that always have the + * same size, with the same ICGs. + */ + for (i = 0; i < xt->frame_size; i++) { + struct data_chunk *chunk = xt->sgl + i; + + if ((chunk->size != xt->sgl->size) || + (dmaengine_get_dst_icg(xt, chunk) != dmaengine_get_dst_icg(xt, first)) || + (dmaengine_get_src_icg(xt, chunk) != dmaengine_get_src_icg(xt, first))) { + dev_err(chan2dev(chan), + "%s: the controller can transfer only identical chunks\n", + __func__); + return NULL; + } + + len += chunk->size; + } + + dwidth = atc_get_xfer_width(xt->src_start, + xt->dst_start, len); + + xfer_count = len >> dwidth; + if (xfer_count > ATC_BTSIZE_MAX) { + dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__); + return NULL; + } + + ctrla = ATC_SRC_WIDTH(dwidth) | + ATC_DST_WIDTH(dwidth); + + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN + | ATC_SRC_ADDR_MODE_INCR + | ATC_DST_ADDR_MODE_INCR + | ATC_SRC_PIP + | ATC_DST_PIP + | ATC_FC_MEM2MEM; + + /* create the transfer */ + desc = atc_desc_get(atchan); + if (!desc) { + dev_err(chan2dev(chan), + "%s: couldn't allocate our descriptor\n", __func__); + return NULL; + } + + desc->lli.saddr = xt->src_start; + desc->lli.daddr = xt->dst_start; + desc->lli.ctrla = ctrla | xfer_count; + desc->lli.ctrlb = ctrlb; + + desc->boundary = first->size >> dwidth; + desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1; + desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1; + + desc->txd.cookie = -EBUSY; + desc->total_len = desc->len = len; + desc->tx_width = dwidth; + + /* set end-of-link to the last link descriptor of list*/ + set_desc_eol(desc); + + desc->txd.flags = flags; /* client is in control of this ack */ + + return &desc->txd; +} + +/** * atc_prep_dma_memcpy - prepare a memcpy operation * @chan: the channel to prepare operation on * @dest: operation virtual destination address @@ -1609,6 +1711,7 @@ static int __init at_dma_probe(struct platform_device *pdev) /* setup platform data for each SoC */ dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask); dma_cap_set(DMA_SG, at91sam9rl_config.cap_mask); + dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask); dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask); dma_cap_set(DMA_SG, at91sam9g45_config.cap_mask); @@ -1713,6 +1816,9 @@ static int __init at_dma_probe(struct platform_device *pdev) atdma->dma_common.dev = &pdev->dev; /* set prep routines based on capability */ + if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_common.cap_mask)) + atdma->dma_common.device_prep_interleaved_dma = atc_prep_dma_interleaved; + if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 2727ca560572..bc8d5ebedd19 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -196,6 +196,11 @@ struct at_desc { size_t len; u32 tx_width; size_t total_len; + + /* Interleaved data */ + size_t boundary; + size_t dst_hole; + size_t src_hole; }; static inline struct at_desc * diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 7992164ea9ec..cf1213de7865 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -235,6 +235,10 @@ struct at_xdmac_lld { dma_addr_t mbr_sa; /* Source Address Member */ dma_addr_t mbr_da; /* Destination Address Member */ u32 mbr_cfg; /* Configuration Register */ + u32 mbr_bc; /* Block Control Register */ + u32 mbr_ds; /* Data Stride Register */ + u32 mbr_sus; /* Source Microblock Stride Register */ + u32 mbr_dus; /* Destination Microblock Stride Register */ }; @@ -358,6 +362,8 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, if (at_xdmac_chan_is_cyclic(atchan)) { reg = AT_XDMAC_CNDC_NDVIEW_NDV1; at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg); + } else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) { + reg = AT_XDMAC_CNDC_NDVIEW_NDV3; } else { /* * No need to write AT_XDMAC_CC reg, it will be done when the @@ -465,6 +471,33 @@ static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan) return desc; } +static void at_xdmac_queue_desc(struct dma_chan *chan, + struct at_xdmac_desc *prev, + struct at_xdmac_desc *desc) +{ + if (!prev || !desc) + return; + + prev->lld.mbr_nda = desc->tx_dma_desc.phys; + prev->lld.mbr_ubc |= AT_XDMAC_MBR_UBC_NDE; + + dev_dbg(chan2dev(chan), "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", + __func__, prev, &prev->lld.mbr_nda); +} + +static inline void at_xdmac_increment_block_count(struct dma_chan *chan, + struct at_xdmac_desc *desc) +{ + if (!desc) + return; + + desc->lld.mbr_bc++; + + dev_dbg(chan2dev(chan), + "%s: incrementing the block count of the desc 0x%p\n", + __func__, desc); +} + static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec, struct of_dma *of_dma) { @@ -656,19 +689,14 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 /* next descriptor view */ | AT_XDMAC_MBR_UBC_NDEN /* next descriptor dst parameter update */ | AT_XDMAC_MBR_UBC_NSEN /* next descriptor src parameter update */ - | (i == sg_len - 1 ? 0 : AT_XDMAC_MBR_UBC_NDE) /* descriptor fetch */ | (len >> fixed_dwidth); /* microblock length */ dev_dbg(chan2dev(chan), "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n", __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); /* Chain lld. */ - if (prev) { - prev->lld.mbr_nda = desc->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", - __func__, prev, &prev->lld.mbr_nda); - } + if (prev) + at_xdmac_queue_desc(chan, prev, desc); prev = desc; if (!first) @@ -748,7 +776,6 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1 | AT_XDMAC_MBR_UBC_NDEN | AT_XDMAC_MBR_UBC_NSEN - | AT_XDMAC_MBR_UBC_NDE | period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg); dev_dbg(chan2dev(chan), @@ -756,12 +783,8 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc); /* Chain lld. */ - if (prev) { - prev->lld.mbr_nda = desc->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=%pad\n", - __func__, prev, &prev->lld.mbr_nda); - } + if (prev) + at_xdmac_queue_desc(chan, prev, desc); prev = desc; if (!first) @@ -783,6 +806,215 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, return &first->tx_dma_desc; } +static inline u32 at_xdmac_align_width(struct dma_chan *chan, dma_addr_t addr) +{ + u32 width; + + /* + * Check address alignment to select the greater data width we + * can use. + * + * Some XDMAC implementations don't provide dword transfer, in + * this case selecting dword has the same behavior as + * selecting word transfers. + */ + if (!(addr & 7)) { + width = AT_XDMAC_CC_DWIDTH_DWORD; + dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__); + } else if (!(addr & 3)) { + width = AT_XDMAC_CC_DWIDTH_WORD; + dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__); + } else if (!(addr & 1)) { + width = AT_XDMAC_CC_DWIDTH_HALFWORD; + dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__); + } else { + width = AT_XDMAC_CC_DWIDTH_BYTE; + dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__); + } + + return width; +} + +static struct at_xdmac_desc * +at_xdmac_interleaved_queue_desc(struct dma_chan *chan, + struct at_xdmac_chan *atchan, + struct at_xdmac_desc *prev, + dma_addr_t src, dma_addr_t dst, + struct dma_interleaved_template *xt, + struct data_chunk *chunk) +{ + struct at_xdmac_desc *desc; + u32 dwidth; + unsigned long flags; + size_t ublen; + /* + * WARNING: The channel configuration is set here since there is no + * dmaengine_slave_config call in this case. Moreover we don't know the + * direction, it involves we can't dynamically set the source and dest + * interface so we have to use the same one. Only interface 0 allows EBI + * access. Hopefully we can access DDR through both ports (at least on + * SAMA5D4x), so we can use the same interface for source and dest, + * that solves the fact we don't know the direction. + */ + u32 chan_cc = AT_XDMAC_CC_DIF(0) + | AT_XDMAC_CC_SIF(0) + | AT_XDMAC_CC_MBSIZE_SIXTEEN + | AT_XDMAC_CC_TYPE_MEM_TRAN; + + dwidth = at_xdmac_align_width(chan, src | dst | chunk->size); + if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) { + dev_dbg(chan2dev(chan), + "%s: chunk too big (%d, max size %lu)...\n", + __func__, chunk->size, + AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth); + return NULL; + } + + if (prev) + dev_dbg(chan2dev(chan), + "Adding items at the end of desc 0x%p\n", prev); + + if (xt->src_inc) { + if (xt->src_sgl) + chan_cc |= AT_XDMAC_CC_SAM_UBS_DS_AM; + else + chan_cc |= AT_XDMAC_CC_SAM_INCREMENTED_AM; + } + + if (xt->dst_inc) { + if (xt->dst_sgl) + chan_cc |= AT_XDMAC_CC_DAM_UBS_DS_AM; + else + chan_cc |= AT_XDMAC_CC_DAM_INCREMENTED_AM; + } + + spin_lock_irqsave(&atchan->lock, flags); + desc = at_xdmac_get_desc(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + return NULL; + } + + chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); + + ublen = chunk->size >> dwidth; + + desc->lld.mbr_sa = src; + desc->lld.mbr_da = dst; + desc->lld.mbr_sus = dmaengine_get_src_icg(xt, chunk); + desc->lld.mbr_dus = dmaengine_get_dst_icg(xt, chunk); + + desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3 + | AT_XDMAC_MBR_UBC_NDEN + | AT_XDMAC_MBR_UBC_NSEN + | ublen; + desc->lld.mbr_cfg = chan_cc; + + dev_dbg(chan2dev(chan), + "%s: lld: mbr_sa=0x%08x, mbr_da=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", + __func__, desc->lld.mbr_sa, desc->lld.mbr_da, + desc->lld.mbr_ubc, desc->lld.mbr_cfg); + + /* Chain lld. */ + if (prev) + at_xdmac_queue_desc(chan, prev, desc); + + return desc; +} + +static struct dma_async_tx_descriptor * +at_xdmac_prep_interleaved(struct dma_chan *chan, + struct dma_interleaved_template *xt, + unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *prev = NULL, *first = NULL; + struct data_chunk *chunk, *prev_chunk = NULL; + dma_addr_t dst_addr, src_addr; + size_t dst_skip, src_skip, len = 0; + size_t prev_dst_icg = 0, prev_src_icg = 0; + int i; + + if (!xt || (xt->numf != 1) || (xt->dir != DMA_MEM_TO_MEM)) + return NULL; + + dev_dbg(chan2dev(chan), "%s: src=0x%08x, dest=0x%08x, numf=%d, frame_size=%d, flags=0x%lx\n", + __func__, xt->src_start, xt->dst_start, xt->numf, + xt->frame_size, flags); + + src_addr = xt->src_start; + dst_addr = xt->dst_start; + + for (i = 0; i < xt->frame_size; i++) { + struct at_xdmac_desc *desc; + size_t src_icg, dst_icg; + + chunk = xt->sgl + i; + + dst_icg = dmaengine_get_dst_icg(xt, chunk); + src_icg = dmaengine_get_src_icg(xt, chunk); + + src_skip = chunk->size + src_icg; + dst_skip = chunk->size + dst_icg; + + dev_dbg(chan2dev(chan), + "%s: chunk size=%d, src icg=%d, dst icg=%d\n", + __func__, chunk->size, src_icg, dst_icg); + + /* + * Handle the case where we just have the same + * transfer to setup, we can just increase the + * block number and reuse the same descriptor. + */ + if (prev_chunk && prev && + (prev_chunk->size == chunk->size) && + (prev_src_icg == src_icg) && + (prev_dst_icg == dst_icg)) { + dev_dbg(chan2dev(chan), + "%s: same configuration that the previous chunk, merging the descriptors...\n", + __func__); + at_xdmac_increment_block_count(chan, prev); + continue; + } + + desc = at_xdmac_interleaved_queue_desc(chan, atchan, + prev, + src_addr, dst_addr, + xt, chunk); + if (!desc) { + list_splice_init(&first->descs_list, + &atchan->free_descs_list); + return NULL; + } + + if (!first) + first = desc; + + dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n", + __func__, desc, first); + list_add_tail(&desc->desc_node, &first->descs_list); + + if (xt->src_sgl) + src_addr += src_skip; + + if (xt->dst_sgl) + dst_addr += dst_skip; + + len += chunk->size; + prev_chunk = chunk; + prev_dst_icg = dst_icg; + prev_src_icg = src_icg; + prev = desc; + } + + first->tx_dma_desc.cookie = -EBUSY; + first->tx_dma_desc.flags = flags; + first->xfer_size = len; + + return &first->tx_dma_desc; +} + static struct dma_async_tx_descriptor * at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len, unsigned long flags) @@ -814,24 +1046,7 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (unlikely(!len)) return NULL; - /* - * Check address alignment to select the greater data width we can use. - * Some XDMAC implementations don't provide dword transfer, in this - * case selecting dword has the same behavior as selecting word transfers. - */ - if (!((src_addr | dst_addr) & 7)) { - dwidth = AT_XDMAC_CC_DWIDTH_DWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__); - } else if (!((src_addr | dst_addr) & 3)) { - dwidth = AT_XDMAC_CC_DWIDTH_WORD; - dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__); - } else if (!((src_addr | dst_addr) & 1)) { - dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__); - } else { - dwidth = AT_XDMAC_CC_DWIDTH_BYTE; - dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__); - } + dwidth = at_xdmac_align_width(chan, src_addr | dst_addr); /* Prepare descriptors. */ while (remaining_size) { @@ -861,19 +1076,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size); /* Check remaining length and change data width if needed. */ - if (!((src_addr | dst_addr | xfer_size) & 7)) { - dwidth = AT_XDMAC_CC_DWIDTH_DWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__); - } else if (!((src_addr | dst_addr | xfer_size) & 3)) { - dwidth = AT_XDMAC_CC_DWIDTH_WORD; - dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__); - } else if (!((src_addr | dst_addr | xfer_size) & 1)) { - dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD; - dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__); - } else if ((src_addr | dst_addr | xfer_size) & 1) { - dwidth = AT_XDMAC_CC_DWIDTH_BYTE; - dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__); - } + dwidth = at_xdmac_align_width(chan, + src_addr | dst_addr | xfer_size); chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); ublen = xfer_size >> dwidth; @@ -884,7 +1088,6 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2 | AT_XDMAC_MBR_UBC_NDEN | AT_XDMAC_MBR_UBC_NSEN - | (remaining_size ? AT_XDMAC_MBR_UBC_NDE : 0) | ublen; desc->lld.mbr_cfg = chan_cc; @@ -893,12 +1096,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg); /* Chain lld. */ - if (prev) { - prev->lld.mbr_nda = desc->tx_dma_desc.phys; - dev_dbg(chan2dev(chan), - "%s: chain lld: prev=0x%p, mbr_nda=0x%08x\n", - __func__, prev, prev->lld.mbr_nda); - } + if (prev) + at_xdmac_queue_desc(chan, prev, desc); prev = desc; if (!first) @@ -915,6 +1114,93 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, return &first->tx_dma_desc; } +static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan, + struct at_xdmac_chan *atchan, + dma_addr_t dst_addr, + size_t len, + int value) +{ + struct at_xdmac_desc *desc; + unsigned long flags; + size_t ublen; + u32 dwidth; + /* + * WARNING: The channel configuration is set here since there is no + * dmaengine_slave_config call in this case. Moreover we don't know the + * direction, it involves we can't dynamically set the source and dest + * interface so we have to use the same one. Only interface 0 allows EBI + * access. Hopefully we can access DDR through both ports (at least on + * SAMA5D4x), so we can use the same interface for source and dest, + * that solves the fact we don't know the direction. + */ + u32 chan_cc = AT_XDMAC_CC_DAM_INCREMENTED_AM + | AT_XDMAC_CC_SAM_INCREMENTED_AM + | AT_XDMAC_CC_DIF(0) + | AT_XDMAC_CC_SIF(0) + | AT_XDMAC_CC_MBSIZE_SIXTEEN + | AT_XDMAC_CC_MEMSET_HW_MODE + | AT_XDMAC_CC_TYPE_MEM_TRAN; + + dwidth = at_xdmac_align_width(chan, dst_addr); + + if (len >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) { + dev_err(chan2dev(chan), + "%s: Transfer too large, aborting...\n", + __func__); + return NULL; + } + + spin_lock_irqsave(&atchan->lock, flags); + desc = at_xdmac_get_desc(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + return NULL; + } + + chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth); + + ublen = len >> dwidth; + + desc->lld.mbr_da = dst_addr; + desc->lld.mbr_ds = value; + desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3 + | AT_XDMAC_MBR_UBC_NDEN + | AT_XDMAC_MBR_UBC_NSEN + | ublen; + desc->lld.mbr_cfg = chan_cc; + + dev_dbg(chan2dev(chan), + "%s: lld: mbr_da=0x%08x, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n", + __func__, desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc, + desc->lld.mbr_cfg); + + return desc; +} + +struct dma_async_tx_descriptor * +at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); + struct at_xdmac_desc *desc; + + dev_dbg(chan2dev(chan), "%s: dest=0x%08x, len=%d, pattern=0x%x, flags=0x%lx\n", + __func__, dest, len, value, flags); + + if (unlikely(!len)) + return NULL; + + desc = at_xdmac_memset_create_desc(chan, atchan, dest, len, value); + list_add_tail(&desc->desc_node, &desc->descs_list); + + desc->tx_dma_desc.cookie = -EBUSY; + desc->tx_dma_desc.flags = flags; + desc->xfer_size = len; + + return &desc->tx_dma_desc; +} + static enum dma_status at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie, struct dma_tx_state *txstate) @@ -1445,7 +1731,9 @@ static int at_xdmac_probe(struct platform_device *pdev) } dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask); + dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask); dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask); + dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask); dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask); /* * Without DMA_PRIVATE the driver is not able to allocate more than @@ -1458,7 +1746,9 @@ static int at_xdmac_probe(struct platform_device *pdev) atxdmac->dma.device_tx_status = at_xdmac_tx_status; atxdmac->dma.device_issue_pending = at_xdmac_issue_pending; atxdmac->dma.device_prep_dma_cyclic = at_xdmac_prep_dma_cyclic; + atxdmac->dma.device_prep_interleaved_dma = at_xdmac_prep_interleaved; atxdmac->dma.device_prep_dma_memcpy = at_xdmac_prep_dma_memcpy; + atxdmac->dma.device_prep_dma_memset = at_xdmac_prep_dma_memset; atxdmac->dma.device_prep_slave_sg = at_xdmac_prep_slave_sg; atxdmac->dma.device_config = at_xdmac_device_config; atxdmac->dma.device_pause = at_xdmac_device_pause; diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 3ddfd1f6c23c..4a4cce15f25d 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -267,6 +267,13 @@ static void dma_chan_put(struct dma_chan *chan) /* This channel is not in use anymore, free it */ if (!chan->client_count && chan->device->device_free_chan_resources) chan->device->device_free_chan_resources(chan); + + /* If the channel is used via a DMA request router, free the mapping */ + if (chan->router && chan->router->route_free) { + chan->router->route_free(chan->router->dev, chan->route_data); + chan->router = NULL; + chan->route_data = NULL; + } } enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) @@ -536,7 +543,7 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask, } /** - * dma_request_slave_channel - try to get specific channel exclusively + * dma_get_slave_channel - try to get specific channel exclusively * @chan: target channel */ struct dma_chan *dma_get_slave_channel(struct dma_chan *chan) @@ -648,7 +655,7 @@ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask, EXPORT_SYMBOL_GPL(__dma_request_channel); /** - * dma_request_slave_channel - try to allocate an exclusive slave channel + * dma_request_slave_channel_reason - try to allocate an exclusive slave channel * @dev: pointer to client device structure * @name: slave channel name * @@ -836,6 +843,8 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_pq); BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) && !device->device_prep_dma_pq_val); + BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && + !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && !device->device_prep_dma_interrupt); BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) && diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index 24e5290faa32..57ff46284f15 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -1364,7 +1364,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev) return ret; } -static struct platform_device_id ep93xx_dma_driver_ids[] = { +static const struct platform_device_id ep93xx_dma_driver_ids[] = { { "ep93xx-dma-m2p", 0 }, { "ep93xx-dma-m2m", 1 }, { }, diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c index 09e2842d15ec..915eec3cc279 100644 --- a/drivers/dma/fsl-edma.c +++ b/drivers/dma/fsl-edma.c @@ -881,10 +881,6 @@ static int fsl_edma_probe(struct platform_device *pdev) } - ret = fsl_edma_irq_init(pdev, fsl_edma); - if (ret) - return ret; - fsl_edma->big_endian = of_property_read_bool(np, "big-endian"); INIT_LIST_HEAD(&fsl_edma->dma_dev.channels); @@ -900,6 +896,11 @@ static int fsl_edma_probe(struct platform_device *pdev) fsl_edma_chan_mux(fsl_chan, 0, false); } + edma_writel(fsl_edma, ~0, fsl_edma->membase + EDMA_INTR); + ret = fsl_edma_irq_init(pdev, fsl_edma); + if (ret) + return ret; + dma_cap_set(DMA_PRIVATE, fsl_edma->dma_dev.cap_mask); dma_cap_set(DMA_SLAVE, fsl_edma->dma_dev.cap_mask); dma_cap_set(DMA_CYCLIC, fsl_edma->dma_dev.cap_mask); diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index eed405976ea9..865501fcc67d 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -193,7 +193,7 @@ struct imxdma_filter_data { int request; }; -static struct platform_device_id imx_dma_devtype[] = { +static const struct platform_device_id imx_dma_devtype[] = { { .name = "imx1-dma", .driver_data = IMX1_DMA, diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 62bbd79338e0..77b6aab04f47 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -420,7 +420,7 @@ static struct sdma_driver_data sdma_imx6q = { .script_addrs = &sdma_script_imx6q, }; -static struct platform_device_id sdma_devtypes[] = { +static const struct platform_device_id sdma_devtypes[] = { { .name = "imx25-sdma", .driver_data = (unsigned long)&sdma_imx25, diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 1c56001df676..fbaf1ead2597 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -19,6 +19,7 @@ #include <linux/dma-mapping.h> #include <linux/spinlock.h> #include <linux/interrupt.h> +#include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/memory.h> #include <linux/clk.h> @@ -30,6 +31,11 @@ #include "dmaengine.h" #include "mv_xor.h" +enum mv_xor_mode { + XOR_MODE_IN_REG, + XOR_MODE_IN_DESC, +}; + static void mv_xor_issue_pending(struct dma_chan *chan); #define to_mv_xor_chan(chan) \ @@ -56,18 +62,30 @@ static void mv_desc_init(struct mv_xor_desc_slot *desc, hw_desc->byte_count = byte_count; } -static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc, - u32 next_desc_addr) +static void mv_desc_set_mode(struct mv_xor_desc_slot *desc) { struct mv_xor_desc *hw_desc = desc->hw_desc; - BUG_ON(hw_desc->phy_next_desc); - hw_desc->phy_next_desc = next_desc_addr; + + switch (desc->type) { + case DMA_XOR: + case DMA_INTERRUPT: + hw_desc->desc_command |= XOR_DESC_OPERATION_XOR; + break; + case DMA_MEMCPY: + hw_desc->desc_command |= XOR_DESC_OPERATION_MEMCPY; + break; + default: + BUG(); + return; + } } -static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc) +static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc, + u32 next_desc_addr) { struct mv_xor_desc *hw_desc = desc->hw_desc; - hw_desc->phy_next_desc = 0; + BUG_ON(hw_desc->phy_next_desc); + hw_desc->phy_next_desc = next_desc_addr; } static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc, @@ -104,7 +122,7 @@ static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan) return intr_cause; } -static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan) +static void mv_chan_clear_eoc_cause(struct mv_xor_chan *chan) { u32 val; @@ -114,14 +132,14 @@ static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan) writel_relaxed(val, XOR_INTR_CAUSE(chan)); } -static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan) +static void mv_chan_clear_err_status(struct mv_xor_chan *chan) { u32 val = 0xFFFF0000 >> (chan->idx * 16); writel_relaxed(val, XOR_INTR_CAUSE(chan)); } -static void mv_set_mode(struct mv_xor_chan *chan, - enum dma_transaction_type type) +static void mv_chan_set_mode(struct mv_xor_chan *chan, + enum dma_transaction_type type) { u32 op_mode; u32 config = readl_relaxed(XOR_CONFIG(chan)); @@ -144,6 +162,25 @@ static void mv_set_mode(struct mv_xor_chan *chan, config &= ~0x7; config |= op_mode; + if (IS_ENABLED(__BIG_ENDIAN)) + config |= XOR_DESCRIPTOR_SWAP; + else + config &= ~XOR_DESCRIPTOR_SWAP; + + writel_relaxed(config, XOR_CONFIG(chan)); + chan->current_type = type; +} + +static void mv_chan_set_mode_to_desc(struct mv_xor_chan *chan) +{ + u32 op_mode; + u32 config = readl_relaxed(XOR_CONFIG(chan)); + + op_mode = XOR_OPERATION_MODE_IN_DESC; + + config &= ~0x7; + config |= op_mode; + #if defined(__BIG_ENDIAN) config |= XOR_DESCRIPTOR_SWAP; #else @@ -151,7 +188,6 @@ static void mv_set_mode(struct mv_xor_chan *chan, #endif writel_relaxed(config, XOR_CONFIG(chan)); - chan->current_type = type; } static void mv_chan_activate(struct mv_xor_chan *chan) @@ -171,28 +207,13 @@ static char mv_chan_is_busy(struct mv_xor_chan *chan) return (state == 1) ? 1 : 0; } -/** - * mv_xor_free_slots - flags descriptor slots for reuse - * @slot: Slot to free - * Caller must hold &mv_chan->lock while calling this function - */ -static void mv_xor_free_slots(struct mv_xor_chan *mv_chan, - struct mv_xor_desc_slot *slot) -{ - dev_dbg(mv_chan_to_devp(mv_chan), "%s %d slot %p\n", - __func__, __LINE__, slot); - - slot->slot_used = 0; - -} - /* - * mv_xor_start_new_chain - program the engine to operate on new chain headed by - * sw_desc + * mv_chan_start_new_chain - program the engine to operate on new + * chain headed by sw_desc * Caller must hold &mv_chan->lock while calling this function */ -static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan, - struct mv_xor_desc_slot *sw_desc) +static void mv_chan_start_new_chain(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *sw_desc) { dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n", __func__, __LINE__, sw_desc); @@ -205,8 +226,9 @@ static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan, } static dma_cookie_t -mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc, - struct mv_xor_chan *mv_chan, dma_cookie_t cookie) +mv_desc_run_tx_complete_actions(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan, + dma_cookie_t cookie) { BUG_ON(desc->async_tx.cookie < 0); @@ -230,93 +252,110 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc, } static int -mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan) +mv_chan_clean_completed_slots(struct mv_xor_chan *mv_chan) { struct mv_xor_desc_slot *iter, *_iter; dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__); list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, - completed_node) { + node) { - if (async_tx_test_ack(&iter->async_tx)) { - list_del(&iter->completed_node); - mv_xor_free_slots(mv_chan, iter); - } + if (async_tx_test_ack(&iter->async_tx)) + list_move_tail(&iter->node, &mv_chan->free_slots); } return 0; } static int -mv_xor_clean_slot(struct mv_xor_desc_slot *desc, - struct mv_xor_chan *mv_chan) +mv_desc_clean_slot(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan) { dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags %d\n", __func__, __LINE__, desc, desc->async_tx.flags); - list_del(&desc->chain_node); + /* the client is allowed to attach dependent operations * until 'ack' is set */ - if (!async_tx_test_ack(&desc->async_tx)) { + if (!async_tx_test_ack(&desc->async_tx)) /* move this slot to the completed_slots */ - list_add_tail(&desc->completed_node, &mv_chan->completed_slots); - return 0; - } + list_move_tail(&desc->node, &mv_chan->completed_slots); + else + list_move_tail(&desc->node, &mv_chan->free_slots); - mv_xor_free_slots(mv_chan, desc); return 0; } /* This function must be called with the mv_xor_chan spinlock held */ -static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) +static void mv_chan_slot_cleanup(struct mv_xor_chan *mv_chan) { struct mv_xor_desc_slot *iter, *_iter; dma_cookie_t cookie = 0; int busy = mv_chan_is_busy(mv_chan); u32 current_desc = mv_chan_get_current_desc(mv_chan); - int seen_current = 0; + int current_cleaned = 0; + struct mv_xor_desc *hw_desc; dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__); dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc); - mv_xor_clean_completed_slots(mv_chan); + mv_chan_clean_completed_slots(mv_chan); /* free completed slots from the chain starting with * the oldest descriptor */ list_for_each_entry_safe(iter, _iter, &mv_chan->chain, - chain_node) { - prefetch(_iter); - prefetch(&_iter->async_tx); + node) { - /* do not advance past the current descriptor loaded into the - * hardware channel, subsequent descriptors are either in - * process or have not been submitted - */ - if (seen_current) - break; + /* clean finished descriptors */ + hw_desc = iter->hw_desc; + if (hw_desc->status & XOR_DESC_SUCCESS) { + cookie = mv_desc_run_tx_complete_actions(iter, mv_chan, + cookie); - /* stop the search if we reach the current descriptor and the - * channel is busy - */ - if (iter->async_tx.phys == current_desc) { - seen_current = 1; - if (busy) + /* done processing desc, clean slot */ + mv_desc_clean_slot(iter, mv_chan); + + /* break if we did cleaned the current */ + if (iter->async_tx.phys == current_desc) { + current_cleaned = 1; break; + } + } else { + if (iter->async_tx.phys == current_desc) { + current_cleaned = 0; + break; + } } - - cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie); - - if (mv_xor_clean_slot(iter, mv_chan)) - break; } if ((busy == 0) && !list_empty(&mv_chan->chain)) { - struct mv_xor_desc_slot *chain_head; - chain_head = list_entry(mv_chan->chain.next, - struct mv_xor_desc_slot, - chain_node); - - mv_xor_start_new_chain(mv_chan, chain_head); + if (current_cleaned) { + /* + * current descriptor cleaned and removed, run + * from list head + */ + iter = list_entry(mv_chan->chain.next, + struct mv_xor_desc_slot, + node); + mv_chan_start_new_chain(mv_chan, iter); + } else { + if (!list_is_last(&iter->node, &mv_chan->chain)) { + /* + * descriptors are still waiting after + * current, trigger them + */ + iter = list_entry(iter->node.next, + struct mv_xor_desc_slot, + node); + mv_chan_start_new_chain(mv_chan, iter); + } else { + /* + * some descriptors are still waiting + * to be cleaned + */ + tasklet_schedule(&mv_chan->irq_tasklet); + } + } } if (cookie > 0) @@ -328,56 +367,35 @@ static void mv_xor_tasklet(unsigned long data) struct mv_xor_chan *chan = (struct mv_xor_chan *) data; spin_lock_bh(&chan->lock); - mv_xor_slot_cleanup(chan); + mv_chan_slot_cleanup(chan); spin_unlock_bh(&chan->lock); } static struct mv_xor_desc_slot * -mv_xor_alloc_slot(struct mv_xor_chan *mv_chan) +mv_chan_alloc_slot(struct mv_xor_chan *mv_chan) { - struct mv_xor_desc_slot *iter, *_iter; - int retry = 0; + struct mv_xor_desc_slot *iter; - /* start search from the last allocated descrtiptor - * if a contiguous allocation can not be found start searching - * from the beginning of the list - */ -retry: - if (retry == 0) - iter = mv_chan->last_used; - else - iter = list_entry(&mv_chan->all_slots, - struct mv_xor_desc_slot, - slot_node); - - list_for_each_entry_safe_continue( - iter, _iter, &mv_chan->all_slots, slot_node) { - - prefetch(_iter); - prefetch(&_iter->async_tx); - if (iter->slot_used) { - /* give up after finding the first busy slot - * on the second pass through the list - */ - if (retry) - break; - continue; - } + spin_lock_bh(&mv_chan->lock); + + if (!list_empty(&mv_chan->free_slots)) { + iter = list_first_entry(&mv_chan->free_slots, + struct mv_xor_desc_slot, + node); + + list_move_tail(&iter->node, &mv_chan->allocated_slots); + + spin_unlock_bh(&mv_chan->lock); /* pre-ack descriptor */ async_tx_ack(&iter->async_tx); - - iter->slot_used = 1; - INIT_LIST_HEAD(&iter->chain_node); iter->async_tx.cookie = -EBUSY; - mv_chan->last_used = iter; - mv_desc_clear_next_desc(iter); return iter; } - if (!retry++) - goto retry; + + spin_unlock_bh(&mv_chan->lock); /* try to free some slots if the allocation fails */ tasklet_schedule(&mv_chan->irq_tasklet); @@ -403,14 +421,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) cookie = dma_cookie_assign(tx); if (list_empty(&mv_chan->chain)) - list_add_tail(&sw_desc->chain_node, &mv_chan->chain); + list_move_tail(&sw_desc->node, &mv_chan->chain); else { new_hw_chain = 0; old_chain_tail = list_entry(mv_chan->chain.prev, struct mv_xor_desc_slot, - chain_node); - list_add_tail(&sw_desc->chain_node, &mv_chan->chain); + node); + list_move_tail(&sw_desc->node, &mv_chan->chain); dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n", &old_chain_tail->async_tx.phys); @@ -431,7 +449,7 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) } if (new_hw_chain) - mv_xor_start_new_chain(mv_chan, sw_desc); + mv_chan_start_new_chain(mv_chan, sw_desc); spin_unlock_bh(&mv_chan->lock); @@ -463,26 +481,20 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan) dma_async_tx_descriptor_init(&slot->async_tx, chan); slot->async_tx.tx_submit = mv_xor_tx_submit; - INIT_LIST_HEAD(&slot->chain_node); - INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->node); dma_desc = mv_chan->dma_desc_pool; slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE; slot->idx = idx++; spin_lock_bh(&mv_chan->lock); mv_chan->slots_allocated = idx; - list_add_tail(&slot->slot_node, &mv_chan->all_slots); + list_add_tail(&slot->node, &mv_chan->free_slots); spin_unlock_bh(&mv_chan->lock); } - if (mv_chan->slots_allocated && !mv_chan->last_used) - mv_chan->last_used = list_entry(mv_chan->all_slots.next, - struct mv_xor_desc_slot, - slot_node); - dev_dbg(mv_chan_to_devp(mv_chan), - "allocated %d descriptor slots last_used: %p\n", - mv_chan->slots_allocated, mv_chan->last_used); + "allocated %d descriptor slots\n", + mv_chan->slots_allocated); return mv_chan->slots_allocated ? : -ENOMEM; } @@ -503,16 +515,17 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, "%s src_cnt: %d len: %u dest %pad flags: %ld\n", __func__, src_cnt, len, &dest, flags); - spin_lock_bh(&mv_chan->lock); - sw_desc = mv_xor_alloc_slot(mv_chan); + sw_desc = mv_chan_alloc_slot(mv_chan); if (sw_desc) { sw_desc->type = DMA_XOR; sw_desc->async_tx.flags = flags; mv_desc_init(sw_desc, dest, len, flags); + if (mv_chan->op_in_desc == XOR_MODE_IN_DESC) + mv_desc_set_mode(sw_desc); while (src_cnt--) mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]); } - spin_unlock_bh(&mv_chan->lock); + dev_dbg(mv_chan_to_devp(mv_chan), "%s sw_desc %p async_tx %p \n", __func__, sw_desc, &sw_desc->async_tx); @@ -556,25 +569,29 @@ static void mv_xor_free_chan_resources(struct dma_chan *chan) spin_lock_bh(&mv_chan->lock); - mv_xor_slot_cleanup(mv_chan); + mv_chan_slot_cleanup(mv_chan); list_for_each_entry_safe(iter, _iter, &mv_chan->chain, - chain_node) { + node) { in_use_descs++; - list_del(&iter->chain_node); + list_move_tail(&iter->node, &mv_chan->free_slots); } list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, - completed_node) { + node) { in_use_descs++; - list_del(&iter->completed_node); + list_move_tail(&iter->node, &mv_chan->free_slots); + } + list_for_each_entry_safe(iter, _iter, &mv_chan->allocated_slots, + node) { + in_use_descs++; + list_move_tail(&iter->node, &mv_chan->free_slots); } list_for_each_entry_safe_reverse( - iter, _iter, &mv_chan->all_slots, slot_node) { - list_del(&iter->slot_node); + iter, _iter, &mv_chan->free_slots, node) { + list_del(&iter->node); kfree(iter); mv_chan->slots_allocated--; } - mv_chan->last_used = NULL; dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n", __func__, mv_chan->slots_allocated); @@ -603,13 +620,13 @@ static enum dma_status mv_xor_status(struct dma_chan *chan, return ret; spin_lock_bh(&mv_chan->lock); - mv_xor_slot_cleanup(mv_chan); + mv_chan_slot_cleanup(mv_chan); spin_unlock_bh(&mv_chan->lock); return dma_cookie_status(chan, cookie, txstate); } -static void mv_dump_xor_regs(struct mv_xor_chan *chan) +static void mv_chan_dump_regs(struct mv_xor_chan *chan) { u32 val; @@ -632,8 +649,8 @@ static void mv_dump_xor_regs(struct mv_xor_chan *chan) dev_err(mv_chan_to_devp(chan), "error addr 0x%08x\n", val); } -static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan, - u32 intr_cause) +static void mv_chan_err_interrupt_handler(struct mv_xor_chan *chan, + u32 intr_cause) { if (intr_cause & XOR_INT_ERR_DECODE) { dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n"); @@ -643,7 +660,7 @@ static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan, dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n", chan->idx, intr_cause); - mv_dump_xor_regs(chan); + mv_chan_dump_regs(chan); WARN_ON(1); } @@ -655,11 +672,11 @@ static irqreturn_t mv_xor_interrupt_handler(int irq, void *data) dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause); if (intr_cause & XOR_INTR_ERRORS) - mv_xor_err_interrupt_handler(chan, intr_cause); + mv_chan_err_interrupt_handler(chan, intr_cause); tasklet_schedule(&chan->irq_tasklet); - mv_xor_device_clear_eoc_cause(chan); + mv_chan_clear_eoc_cause(chan); return IRQ_HANDLED; } @@ -678,7 +695,7 @@ static void mv_xor_issue_pending(struct dma_chan *chan) * Perform a transaction to verify the HW works. */ -static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan) +static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan) { int i, ret; void *src, *dest; @@ -787,7 +804,7 @@ out: #define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */ static int -mv_xor_xor_self_test(struct mv_xor_chan *mv_chan) +mv_chan_xor_self_test(struct mv_xor_chan *mv_chan) { int i, src_idx, ret; struct page *dest; @@ -951,7 +968,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan) static struct mv_xor_chan * mv_xor_channel_add(struct mv_xor_device *xordev, struct platform_device *pdev, - int idx, dma_cap_mask_t cap_mask, int irq) + int idx, dma_cap_mask_t cap_mask, int irq, int op_in_desc) { int ret = 0; struct mv_xor_chan *mv_chan; @@ -963,6 +980,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan->idx = idx; mv_chan->irq = irq; + mv_chan->op_in_desc = op_in_desc; dma_dev = &mv_chan->dmadev; @@ -1014,7 +1032,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan); /* clear errors before enabling interrupts */ - mv_xor_device_clear_err_status(mv_chan); + mv_chan_clear_err_status(mv_chan); ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler, 0, dev_name(&pdev->dev), mv_chan); @@ -1023,32 +1041,37 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan_unmask_interrupts(mv_chan); - mv_set_mode(mv_chan, DMA_XOR); + if (mv_chan->op_in_desc == XOR_MODE_IN_DESC) + mv_chan_set_mode_to_desc(mv_chan); + else + mv_chan_set_mode(mv_chan, DMA_XOR); spin_lock_init(&mv_chan->lock); INIT_LIST_HEAD(&mv_chan->chain); INIT_LIST_HEAD(&mv_chan->completed_slots); - INIT_LIST_HEAD(&mv_chan->all_slots); + INIT_LIST_HEAD(&mv_chan->free_slots); + INIT_LIST_HEAD(&mv_chan->allocated_slots); mv_chan->dmachan.device = dma_dev; dma_cookie_init(&mv_chan->dmachan); list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels); if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { - ret = mv_xor_memcpy_self_test(mv_chan); + ret = mv_chan_memcpy_self_test(mv_chan); dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); if (ret) goto err_free_irq; } if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { - ret = mv_xor_xor_self_test(mv_chan); + ret = mv_chan_xor_self_test(mv_chan); dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); if (ret) goto err_free_irq; } - dev_info(&pdev->dev, "Marvell XOR: ( %s%s%s)\n", + dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s)\n", + mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); @@ -1097,6 +1120,13 @@ mv_xor_conf_mbus_windows(struct mv_xor_device *xordev, writel(0, base + WINDOW_OVERRIDE_CTRL(1)); } +static const struct of_device_id mv_xor_dt_ids[] = { + { .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG }, + { .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC }, + {}, +}; +MODULE_DEVICE_TABLE(of, mv_xor_dt_ids); + static int mv_xor_probe(struct platform_device *pdev) { const struct mbus_dram_target_info *dram; @@ -1104,6 +1134,7 @@ static int mv_xor_probe(struct platform_device *pdev) struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev); struct resource *res; int i, ret; + int op_in_desc; dev_notice(&pdev->dev, "Marvell shared XOR driver\n"); @@ -1148,11 +1179,15 @@ static int mv_xor_probe(struct platform_device *pdev) if (pdev->dev.of_node) { struct device_node *np; int i = 0; + const struct of_device_id *of_id = + of_match_device(mv_xor_dt_ids, + &pdev->dev); for_each_child_of_node(pdev->dev.of_node, np) { struct mv_xor_chan *chan; dma_cap_mask_t cap_mask; int irq; + op_in_desc = (int)of_id->data; dma_cap_zero(cap_mask); if (of_property_read_bool(np, "dmacap,memcpy")) @@ -1169,7 +1204,7 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cap_mask, irq); + cap_mask, irq, op_in_desc); if (IS_ERR(chan)) { ret = PTR_ERR(chan); irq_dispose_mapping(irq); @@ -1198,7 +1233,8 @@ static int mv_xor_probe(struct platform_device *pdev) } chan = mv_xor_channel_add(xordev, pdev, i, - cd->cap_mask, irq); + cd->cap_mask, irq, + XOR_MODE_IN_REG); if (IS_ERR(chan)) { ret = PTR_ERR(chan); goto err_channel_add; @@ -1244,14 +1280,6 @@ static int mv_xor_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_OF -static const struct of_device_id mv_xor_dt_ids[] = { - { .compatible = "marvell,orion-xor", }, - {}, -}; -MODULE_DEVICE_TABLE(of, mv_xor_dt_ids); -#endif - static struct platform_driver mv_xor_driver = { .probe = mv_xor_probe, .remove = mv_xor_remove, diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index 91958dba39a2..b7455b42137b 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -19,7 +19,7 @@ #include <linux/dmaengine.h> #include <linux/interrupt.h> -#define MV_XOR_POOL_SIZE PAGE_SIZE +#define MV_XOR_POOL_SIZE (MV_XOR_SLOT_SIZE * 3072) #define MV_XOR_SLOT_SIZE 64 #define MV_XOR_THRESHOLD 1 #define MV_XOR_MAX_CHANNELS 2 @@ -30,7 +30,13 @@ /* Values for the XOR_CONFIG register */ #define XOR_OPERATION_MODE_XOR 0 #define XOR_OPERATION_MODE_MEMCPY 2 +#define XOR_OPERATION_MODE_IN_DESC 7 #define XOR_DESCRIPTOR_SWAP BIT(14) +#define XOR_DESC_SUCCESS 0x40000000 + +#define XOR_DESC_OPERATION_XOR (0 << 24) +#define XOR_DESC_OPERATION_CRC32C (1 << 24) +#define XOR_DESC_OPERATION_MEMCPY (2 << 24) #define XOR_DESC_DMA_OWNED BIT(31) #define XOR_DESC_EOD_INT_EN BIT(31) @@ -88,13 +94,14 @@ struct mv_xor_device { * @mmr_base: memory mapped register base * @idx: the index of the xor channel * @chain: device chain view of the descriptors + * @free_slots: free slots usable by the channel + * @allocated_slots: slots allocated by the driver * @completed_slots: slots completed by HW but still need to be acked * @device: parent device * @common: common dmaengine channel object members - * @last_used: place holder for allocation to continue from where it left off - * @all_slots: complete domain of slots usable by the channel * @slots_allocated: records the actual size of the descriptor slot pool * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs + * @op_in_desc: new mode of driver, each op is writen to descriptor. */ struct mv_xor_chan { int pending; @@ -105,16 +112,17 @@ struct mv_xor_chan { int irq; enum dma_transaction_type current_type; struct list_head chain; + struct list_head free_slots; + struct list_head allocated_slots; struct list_head completed_slots; dma_addr_t dma_desc_pool; void *dma_desc_pool_virt; size_t pool_size; struct dma_device dmadev; struct dma_chan dmachan; - struct mv_xor_desc_slot *last_used; - struct list_head all_slots; int slots_allocated; struct tasklet_struct irq_tasklet; + int op_in_desc; char dummy_src[MV_XOR_MIN_BYTE_COUNT]; char dummy_dst[MV_XOR_MIN_BYTE_COUNT]; dma_addr_t dummy_src_addr, dummy_dst_addr; @@ -122,9 +130,7 @@ struct mv_xor_chan { /** * struct mv_xor_desc_slot - software descriptor - * @slot_node: node on the mv_xor_chan.all_slots list - * @chain_node: node on the mv_xor_chan.chain list - * @completed_node: node on the mv_xor_chan.completed_slots list + * @node: node on the mv_xor_chan lists * @hw_desc: virtual address of the hardware descriptor chain * @phys: hardware address of the hardware descriptor chain * @slot_used: slot in use or not @@ -133,12 +139,9 @@ struct mv_xor_chan { * @async_tx: support for the async_tx api */ struct mv_xor_desc_slot { - struct list_head slot_node; - struct list_head chain_node; - struct list_head completed_node; + struct list_head node; enum dma_transaction_type type; void *hw_desc; - u16 slot_used; u16 idx; struct dma_async_tx_descriptor async_tx; }; diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index 829ec686dac3..60de35251da5 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -170,7 +170,7 @@ static struct mxs_dma_type mxs_dma_types[] = { } }; -static struct platform_device_id mxs_dma_ids[] = { +static const struct platform_device_id mxs_dma_ids[] = { { .name = "imx23-dma-apbh", .driver_data = (kernel_ulong_t) &mxs_dma_types[0], diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c index 88b77c98365d..2b5a198ac77e 100644 --- a/drivers/dma/nbpfaxi.c +++ b/drivers/dma/nbpfaxi.c @@ -1455,7 +1455,7 @@ static int nbpf_remove(struct platform_device *pdev) return 0; } -static struct platform_device_id nbpf_ids[] = { +static const struct platform_device_id nbpf_ids[] = { {"nbpfaxi64dmac1b4", (kernel_ulong_t)&nbpf_cfg[NBPF1B4]}, {"nbpfaxi64dmac1b8", (kernel_ulong_t)&nbpf_cfg[NBPF1B8]}, {"nbpfaxi64dmac1b16", (kernel_ulong_t)&nbpf_cfg[NBPF1B16]}, diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index cbd4a8aff120..1e1f2986eba8 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -45,6 +45,50 @@ static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec) } /** + * of_dma_router_xlate - translation function for router devices + * @dma_spec: pointer to DMA specifier as found in the device tree + * @of_dma: pointer to DMA controller data (router information) + * + * The function creates new dma_spec to be passed to the router driver's + * of_dma_route_allocate() function to prepare a dma_spec which will be used + * to request channel from the real DMA controller. + */ +static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dma_chan *chan; + struct of_dma *ofdma_target; + struct of_phandle_args dma_spec_target; + void *route_data; + + /* translate the request for the real DMA controller */ + memcpy(&dma_spec_target, dma_spec, sizeof(dma_spec_target)); + route_data = ofdma->of_dma_route_allocate(&dma_spec_target, ofdma); + if (IS_ERR(route_data)) + return NULL; + + ofdma_target = of_dma_find_controller(&dma_spec_target); + if (!ofdma_target) + return NULL; + + chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target); + if (chan) { + chan->router = ofdma->dma_router; + chan->route_data = route_data; + } else { + ofdma->dma_router->route_free(ofdma->dma_router->dev, + route_data); + } + + /* + * Need to put the node back since the ofdma->of_dma_route_allocate + * has taken it for generating the new, translated dma_spec + */ + of_node_put(dma_spec_target.np); + return chan; +} + +/** * of_dma_controller_register - Register a DMA controller to DT DMA helpers * @np: device node of DMA controller * @of_dma_xlate: translation function which converts a phandle @@ -110,6 +154,51 @@ void of_dma_controller_free(struct device_node *np) EXPORT_SYMBOL_GPL(of_dma_controller_free); /** + * of_dma_router_register - Register a DMA router to DT DMA helpers as a + * controller + * @np: device node of DMA router + * @of_dma_route_allocate: setup function for the router which need to + * modify the dma_spec for the DMA controller to + * use and to set up the requested route. + * @dma_router: pointer to dma_router structure to be used when + * the route need to be free up. + * + * Returns 0 on success or appropriate errno value on error. + * + * Allocated memory should be freed with appropriate of_dma_controller_free() + * call. + */ +int of_dma_router_register(struct device_node *np, + void *(*of_dma_route_allocate) + (struct of_phandle_args *, struct of_dma *), + struct dma_router *dma_router) +{ + struct of_dma *ofdma; + + if (!np || !of_dma_route_allocate || !dma_router) { + pr_err("%s: not enough information provided\n", __func__); + return -EINVAL; + } + + ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL); + if (!ofdma) + return -ENOMEM; + + ofdma->of_node = np; + ofdma->of_dma_xlate = of_dma_router_xlate; + ofdma->of_dma_route_allocate = of_dma_route_allocate; + ofdma->dma_router = dma_router; + + /* Now queue of_dma controller structure in list */ + mutex_lock(&of_dma_lock); + list_add_tail(&ofdma->of_dma_controllers, &of_dma_list); + mutex_unlock(&of_dma_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(of_dma_router_register); + +/** * of_dma_match_channel - Check if a DMA specifier matches name * @np: device node to look for DMA channels * @name: channel name to be matched diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index 167dbaf65742..249445c8a4c6 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -22,6 +22,9 @@ #include "virt-dma.h" +#define OMAP_SDMA_REQUESTS 127 +#define OMAP_SDMA_CHANNELS 32 + struct omap_dmadev { struct dma_device ddev; spinlock_t lock; @@ -31,9 +34,10 @@ struct omap_dmadev { const struct omap_dma_reg *reg_map; struct omap_system_dma_plat_info *plat; bool legacy; + unsigned dma_requests; spinlock_t irq_lock; uint32_t irq_enable_mask; - struct omap_chan *lch_map[32]; + struct omap_chan *lch_map[OMAP_SDMA_CHANNELS]; }; struct omap_chan { @@ -362,7 +366,7 @@ static void omap_dma_start_sg(struct omap_chan *c, struct omap_desc *d, struct omap_sg *sg = d->sg + idx; unsigned cxsa, cxei, cxfi; - if (d->dir == DMA_DEV_TO_MEM) { + if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) { cxsa = CDSA; cxei = CDEI; cxfi = CDFI; @@ -408,7 +412,7 @@ static void omap_dma_start_desc(struct omap_chan *c) if (dma_omap1()) omap_dma_chan_write(c, CCR2, d->ccr >> 16); - if (d->dir == DMA_DEV_TO_MEM) { + if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) { cxsa = CSSA; cxei = CSEI; cxfi = CSFI; @@ -589,6 +593,7 @@ static void omap_dma_free_chan_resources(struct dma_chan *chan) omap_free_dma(c->dma_ch); dev_dbg(od->ddev.dev, "freeing channel for %u\n", c->dma_sig); + c->dma_sig = 0; } static size_t omap_dma_sg_size(struct omap_sg *sg) @@ -948,6 +953,51 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic( return vchan_tx_prep(&c->vc, &d->vd, flags); } +static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long tx_flags) +{ + struct omap_chan *c = to_omap_dma_chan(chan); + struct omap_desc *d; + uint8_t data_type; + + d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC); + if (!d) + return NULL; + + data_type = __ffs((src | dest | len)); + if (data_type > CSDP_DATA_TYPE_32) + data_type = CSDP_DATA_TYPE_32; + + d->dir = DMA_MEM_TO_MEM; + d->dev_addr = src; + d->fi = 0; + d->es = data_type; + d->sg[0].en = len / BIT(data_type); + d->sg[0].fn = 1; + d->sg[0].addr = dest; + d->sglen = 1; + d->ccr = c->ccr; + d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC; + + d->cicr = CICR_DROP_IE; + if (tx_flags & DMA_PREP_INTERRUPT) + d->cicr |= CICR_FRAME_IE; + + d->csdp = data_type; + + if (dma_omap1()) { + d->cicr |= CICR_TOUT_IE; + d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF; + } else { + d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED; + d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE; + d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64; + } + + return vchan_tx_prep(&c->vc, &d->vd, tx_flags); +} + static int omap_dma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg) { struct omap_chan *c = to_omap_dma_chan(chan); @@ -1037,7 +1087,7 @@ static int omap_dma_resume(struct dma_chan *chan) return 0; } -static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig) +static int omap_dma_chan_init(struct omap_dmadev *od) { struct omap_chan *c; @@ -1046,7 +1096,6 @@ static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig) return -ENOMEM; c->reg_map = od->reg_map; - c->dma_sig = dma_sig; c->vc.desc_free = omap_dma_desc_free; vchan_init(&c->vc, &od->ddev); INIT_LIST_HEAD(&c->node); @@ -1094,12 +1143,14 @@ static int omap_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_SLAVE, od->ddev.cap_mask); dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask); + dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask); od->ddev.device_alloc_chan_resources = omap_dma_alloc_chan_resources; od->ddev.device_free_chan_resources = omap_dma_free_chan_resources; od->ddev.device_tx_status = omap_dma_tx_status; od->ddev.device_issue_pending = omap_dma_issue_pending; od->ddev.device_prep_slave_sg = omap_dma_prep_slave_sg; od->ddev.device_prep_dma_cyclic = omap_dma_prep_dma_cyclic; + od->ddev.device_prep_dma_memcpy = omap_dma_prep_dma_memcpy; od->ddev.device_config = omap_dma_slave_config; od->ddev.device_pause = omap_dma_pause; od->ddev.device_resume = omap_dma_resume; @@ -1116,8 +1167,17 @@ static int omap_dma_probe(struct platform_device *pdev) tasklet_init(&od->task, omap_dma_sched, (unsigned long)od); - for (i = 0; i < 127; i++) { - rc = omap_dma_chan_init(od, i); + od->dma_requests = OMAP_SDMA_REQUESTS; + if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node, + "dma-requests", + &od->dma_requests)) { + dev_info(&pdev->dev, + "Missing dma-requests property, using %u.\n", + OMAP_SDMA_REQUESTS); + } + + for (i = 0; i < OMAP_SDMA_CHANNELS; i++) { + rc = omap_dma_chan_init(od); if (rc) { omap_dma_free(od); return rc; @@ -1208,10 +1268,14 @@ static struct platform_driver omap_dma_driver = { bool omap_dma_filter_fn(struct dma_chan *chan, void *param) { if (chan->device->dev->driver == &omap_dma_driver.driver) { + struct omap_dmadev *od = to_omap_dma_dev(chan->device); struct omap_chan *c = to_omap_dma_chan(chan); unsigned req = *(unsigned *)param; - return req == c->dma_sig; + if (req <= od->dma_requests) { + c->dma_sig = req; + return true; + } } return false; } diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 340f9e607cd8..f513f77b1d85 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -1424,8 +1424,8 @@ static int pl330_submit_req(struct pl330_thread *thrd, goto xfer_exit; if (ret > pl330->mcbufsz / 2) { - dev_info(pl330->ddma.dev, "%s:%d Trying increasing mcbufsz\n", - __func__, __LINE__); + dev_info(pl330->ddma.dev, "%s:%d Try increasing mcbufsz (%i/%i)\n", + __func__, __LINE__, ret, pl330->mcbufsz / 2); ret = -ENOMEM; goto xfer_exit; } @@ -2584,12 +2584,14 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, { struct dma_pl330_desc *desc; struct dma_pl330_chan *pch = to_pchan(chan); - struct pl330_dmac *pl330 = pch->dmac; + struct pl330_dmac *pl330; int burst; if (unlikely(!pch || !len)) return NULL; + pl330 = pch->dmac; + desc = __pl330_prep_dma_memcpy(pch, dst, src, len); if (!desc) return NULL; diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c new file mode 100644 index 000000000000..ddcbbf5cd9e9 --- /dev/null +++ b/drivers/dma/pxa_dma.c @@ -0,0 +1,1467 @@ +/* + * Copyright 2015 Robert Jarzmik <robert.jarzmik@free.fr> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/interrupt.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/dmaengine.h> +#include <linux/platform_device.h> +#include <linux/device.h> +#include <linux/platform_data/mmp_dma.h> +#include <linux/dmapool.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> +#include <linux/of.h> +#include <linux/dma/pxa-dma.h> + +#include "dmaengine.h" +#include "virt-dma.h" + +#define DCSR(n) (0x0000 + ((n) << 2)) +#define DALGN(n) 0x00a0 +#define DINT 0x00f0 +#define DDADR(n) (0x0200 + ((n) << 4)) +#define DSADR(n) (0x0204 + ((n) << 4)) +#define DTADR(n) (0x0208 + ((n) << 4)) +#define DCMD(n) (0x020c + ((n) << 4)) + +#define PXA_DCSR_RUN BIT(31) /* Run Bit (read / write) */ +#define PXA_DCSR_NODESC BIT(30) /* No-Descriptor Fetch (read / write) */ +#define PXA_DCSR_STOPIRQEN BIT(29) /* Stop Interrupt Enable (R/W) */ +#define PXA_DCSR_REQPEND BIT(8) /* Request Pending (read-only) */ +#define PXA_DCSR_STOPSTATE BIT(3) /* Stop State (read-only) */ +#define PXA_DCSR_ENDINTR BIT(2) /* End Interrupt (read / write) */ +#define PXA_DCSR_STARTINTR BIT(1) /* Start Interrupt (read / write) */ +#define PXA_DCSR_BUSERR BIT(0) /* Bus Error Interrupt (read / write) */ + +#define PXA_DCSR_EORIRQEN BIT(28) /* End of Receive IRQ Enable (R/W) */ +#define PXA_DCSR_EORJMPEN BIT(27) /* Jump to next descriptor on EOR */ +#define PXA_DCSR_EORSTOPEN BIT(26) /* STOP on an EOR */ +#define PXA_DCSR_SETCMPST BIT(25) /* Set Descriptor Compare Status */ +#define PXA_DCSR_CLRCMPST BIT(24) /* Clear Descriptor Compare Status */ +#define PXA_DCSR_CMPST BIT(10) /* The Descriptor Compare Status */ +#define PXA_DCSR_EORINTR BIT(9) /* The end of Receive */ + +#define DRCMR_MAPVLD BIT(7) /* Map Valid (read / write) */ +#define DRCMR_CHLNUM 0x1f /* mask for Channel Number (read / write) */ + +#define DDADR_DESCADDR 0xfffffff0 /* Address of next descriptor (mask) */ +#define DDADR_STOP BIT(0) /* Stop (read / write) */ + +#define PXA_DCMD_INCSRCADDR BIT(31) /* Source Address Increment Setting. */ +#define PXA_DCMD_INCTRGADDR BIT(30) /* Target Address Increment Setting. */ +#define PXA_DCMD_FLOWSRC BIT(29) /* Flow Control by the source. */ +#define PXA_DCMD_FLOWTRG BIT(28) /* Flow Control by the target. */ +#define PXA_DCMD_STARTIRQEN BIT(22) /* Start Interrupt Enable */ +#define PXA_DCMD_ENDIRQEN BIT(21) /* End Interrupt Enable */ +#define PXA_DCMD_ENDIAN BIT(18) /* Device Endian-ness. */ +#define PXA_DCMD_BURST8 (1 << 16) /* 8 byte burst */ +#define PXA_DCMD_BURST16 (2 << 16) /* 16 byte burst */ +#define PXA_DCMD_BURST32 (3 << 16) /* 32 byte burst */ +#define PXA_DCMD_WIDTH1 (1 << 14) /* 1 byte width */ +#define PXA_DCMD_WIDTH2 (2 << 14) /* 2 byte width (HalfWord) */ +#define PXA_DCMD_WIDTH4 (3 << 14) /* 4 byte width (Word) */ +#define PXA_DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */ + +#define PDMA_ALIGNMENT 3 +#define PDMA_MAX_DESC_BYTES (PXA_DCMD_LENGTH & ~((1 << PDMA_ALIGNMENT) - 1)) + +struct pxad_desc_hw { + u32 ddadr; /* Points to the next descriptor + flags */ + u32 dsadr; /* DSADR value for the current transfer */ + u32 dtadr; /* DTADR value for the current transfer */ + u32 dcmd; /* DCMD value for the current transfer */ +} __aligned(16); + +struct pxad_desc_sw { + struct virt_dma_desc vd; /* Virtual descriptor */ + int nb_desc; /* Number of hw. descriptors */ + size_t len; /* Number of bytes xfered */ + dma_addr_t first; /* First descriptor's addr */ + + /* At least one descriptor has an src/dst address not multiple of 8 */ + bool misaligned; + bool cyclic; + struct dma_pool *desc_pool; /* Channel's used allocator */ + + struct pxad_desc_hw *hw_desc[]; /* DMA coherent descriptors */ +}; + +struct pxad_phy { + int idx; + void __iomem *base; + struct pxad_chan *vchan; +}; + +struct pxad_chan { + struct virt_dma_chan vc; /* Virtual channel */ + u32 drcmr; /* Requestor of the channel */ + enum pxad_chan_prio prio; /* Required priority of phy */ + /* + * At least one desc_sw in submitted or issued transfers on this channel + * has one address such as: addr % 8 != 0. This implies the DALGN + * setting on the phy. + */ + bool misaligned; + struct dma_slave_config cfg; /* Runtime config */ + + /* protected by vc->lock */ + struct pxad_phy *phy; + struct dma_pool *desc_pool; /* Descriptors pool */ +}; + +struct pxad_device { + struct dma_device slave; + int nr_chans; + void __iomem *base; + struct pxad_phy *phys; + spinlock_t phy_lock; /* Phy association */ +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs_root; + struct dentry *dbgfs_state; + struct dentry **dbgfs_chan; +#endif +}; + +#define tx_to_pxad_desc(tx) \ + container_of(tx, struct pxad_desc_sw, async_tx) +#define to_pxad_chan(dchan) \ + container_of(dchan, struct pxad_chan, vc.chan) +#define to_pxad_dev(dmadev) \ + container_of(dmadev, struct pxad_device, slave) +#define to_pxad_sw_desc(_vd) \ + container_of((_vd), struct pxad_desc_sw, vd) + +#define _phy_readl_relaxed(phy, _reg) \ + readl_relaxed((phy)->base + _reg((phy)->idx)) +#define phy_readl_relaxed(phy, _reg) \ + ({ \ + u32 _v; \ + _v = readl_relaxed((phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): readl(%s): 0x%08x\n", __func__, #_reg, \ + _v); \ + _v; \ + }) +#define phy_writel(phy, val, _reg) \ + do { \ + writel((val), (phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): writel(0x%08x, %s)\n", \ + __func__, (u32)(val), #_reg); \ + } while (0) +#define phy_writel_relaxed(phy, val, _reg) \ + do { \ + writel_relaxed((val), (phy)->base + _reg((phy)->idx)); \ + dev_vdbg(&phy->vchan->vc.chan.dev->device, \ + "%s(): writel_relaxed(0x%08x, %s)\n", \ + __func__, (u32)(val), #_reg); \ + } while (0) + +static unsigned int pxad_drcmr(unsigned int line) +{ + if (line < 64) + return 0x100 + line * 4; + return 0x1000 + line * 4; +} + +/* + * Debug fs + */ +#ifdef CONFIG_DEBUG_FS +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/seq_file.h> + +static int dbg_show_requester_chan(struct seq_file *s, void *p) +{ + int pos = 0; + struct pxad_phy *phy = s->private; + int i; + u32 drcmr; + + pos += seq_printf(s, "DMA channel %d requester :\n", phy->idx); + for (i = 0; i < 70; i++) { + drcmr = readl_relaxed(phy->base + pxad_drcmr(i)); + if ((drcmr & DRCMR_CHLNUM) == phy->idx) + pos += seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i, + !!(drcmr & DRCMR_MAPVLD)); + } + return pos; +} + +static inline int dbg_burst_from_dcmd(u32 dcmd) +{ + int burst = (dcmd >> 16) & 0x3; + + return burst ? 4 << burst : 0; +} + +static int is_phys_valid(unsigned long addr) +{ + return pfn_valid(__phys_to_pfn(addr)); +} + +#define PXA_DCSR_STR(flag) (dcsr & PXA_DCSR_##flag ? #flag" " : "") +#define PXA_DCMD_STR(flag) (dcmd & PXA_DCMD_##flag ? #flag" " : "") + +static int dbg_show_descriptors(struct seq_file *s, void *p) +{ + struct pxad_phy *phy = s->private; + int i, max_show = 20, burst, width; + u32 dcmd; + unsigned long phys_desc, ddadr; + struct pxad_desc_hw *desc; + + phys_desc = ddadr = _phy_readl_relaxed(phy, DDADR); + + seq_printf(s, "DMA channel %d descriptors :\n", phy->idx); + seq_printf(s, "[%03d] First descriptor unknown\n", 0); + for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) { + desc = phys_to_virt(phys_desc); + dcmd = desc->dcmd; + burst = dbg_burst_from_dcmd(dcmd); + width = (1 << ((dcmd >> 14) & 0x3)) >> 1; + + seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n", + i, phys_desc, desc); + seq_printf(s, "\tDDADR = %08x\n", desc->ddadr); + seq_printf(s, "\tDSADR = %08x\n", desc->dsadr); + seq_printf(s, "\tDTADR = %08x\n", desc->dtadr); + seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n", + dcmd, + PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR), + PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG), + PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN), + PXA_DCMD_STR(ENDIAN), burst, width, + dcmd & PXA_DCMD_LENGTH); + phys_desc = desc->ddadr; + } + if (i == max_show) + seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n", + i, phys_desc); + else + seq_printf(s, "[%03d] Desc at %08lx is %s\n", + i, phys_desc, phys_desc == DDADR_STOP ? + "DDADR_STOP" : "invalid"); + + return 0; +} + +static int dbg_show_chan_state(struct seq_file *s, void *p) +{ + struct pxad_phy *phy = s->private; + u32 dcsr, dcmd; + int burst, width; + static const char * const str_prio[] = { + "high", "normal", "low", "invalid" + }; + + dcsr = _phy_readl_relaxed(phy, DCSR); + dcmd = _phy_readl_relaxed(phy, DCMD); + burst = dbg_burst_from_dcmd(dcmd); + width = (1 << ((dcmd >> 14) & 0x3)) >> 1; + + seq_printf(s, "DMA channel %d\n", phy->idx); + seq_printf(s, "\tPriority : %s\n", + str_prio[(phy->idx & 0xf) / 4]); + seq_printf(s, "\tUnaligned transfer bit: %s\n", + _phy_readl_relaxed(phy, DALGN) & BIT(phy->idx) ? + "yes" : "no"); + seq_printf(s, "\tDCSR = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", + dcsr, PXA_DCSR_STR(RUN), PXA_DCSR_STR(NODESC), + PXA_DCSR_STR(STOPIRQEN), PXA_DCSR_STR(EORIRQEN), + PXA_DCSR_STR(EORJMPEN), PXA_DCSR_STR(EORSTOPEN), + PXA_DCSR_STR(SETCMPST), PXA_DCSR_STR(CLRCMPST), + PXA_DCSR_STR(CMPST), PXA_DCSR_STR(EORINTR), + PXA_DCSR_STR(REQPEND), PXA_DCSR_STR(STOPSTATE), + PXA_DCSR_STR(ENDINTR), PXA_DCSR_STR(STARTINTR), + PXA_DCSR_STR(BUSERR)); + + seq_printf(s, "\tDCMD = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n", + dcmd, + PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR), + PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG), + PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN), + PXA_DCMD_STR(ENDIAN), burst, width, dcmd & PXA_DCMD_LENGTH); + seq_printf(s, "\tDSADR = %08x\n", _phy_readl_relaxed(phy, DSADR)); + seq_printf(s, "\tDTADR = %08x\n", _phy_readl_relaxed(phy, DTADR)); + seq_printf(s, "\tDDADR = %08x\n", _phy_readl_relaxed(phy, DDADR)); + + return 0; +} + +static int dbg_show_state(struct seq_file *s, void *p) +{ + struct pxad_device *pdev = s->private; + + /* basic device status */ + seq_puts(s, "DMA engine status\n"); + seq_printf(s, "\tChannel number: %d\n", pdev->nr_chans); + + return 0; +} + +#define DBGFS_FUNC_DECL(name) \ +static int dbg_open_##name(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, dbg_show_##name, inode->i_private); \ +} \ +static const struct file_operations dbg_fops_##name = { \ + .owner = THIS_MODULE, \ + .open = dbg_open_##name, \ + .llseek = seq_lseek, \ + .read = seq_read, \ + .release = single_release, \ +} + +DBGFS_FUNC_DECL(state); +DBGFS_FUNC_DECL(chan_state); +DBGFS_FUNC_DECL(descriptors); +DBGFS_FUNC_DECL(requester_chan); + +static struct dentry *pxad_dbg_alloc_chan(struct pxad_device *pdev, + int ch, struct dentry *chandir) +{ + char chan_name[11]; + struct dentry *chan, *chan_state = NULL, *chan_descr = NULL; + struct dentry *chan_reqs = NULL; + void *dt; + + scnprintf(chan_name, sizeof(chan_name), "%d", ch); + chan = debugfs_create_dir(chan_name, chandir); + dt = (void *)&pdev->phys[ch]; + + if (chan) + chan_state = debugfs_create_file("state", 0400, chan, dt, + &dbg_fops_chan_state); + if (chan_state) + chan_descr = debugfs_create_file("descriptors", 0400, chan, dt, + &dbg_fops_descriptors); + if (chan_descr) + chan_reqs = debugfs_create_file("requesters", 0400, chan, dt, + &dbg_fops_requester_chan); + if (!chan_reqs) + goto err_state; + + return chan; + +err_state: + debugfs_remove_recursive(chan); + return NULL; +} + +static void pxad_init_debugfs(struct pxad_device *pdev) +{ + int i; + struct dentry *chandir; + + pdev->dbgfs_root = debugfs_create_dir(dev_name(pdev->slave.dev), NULL); + if (IS_ERR(pdev->dbgfs_root) || !pdev->dbgfs_root) + goto err_root; + + pdev->dbgfs_state = debugfs_create_file("state", 0400, pdev->dbgfs_root, + pdev, &dbg_fops_state); + if (!pdev->dbgfs_state) + goto err_state; + + pdev->dbgfs_chan = + kmalloc_array(pdev->nr_chans, sizeof(*pdev->dbgfs_state), + GFP_KERNEL); + if (!pdev->dbgfs_chan) + goto err_alloc; + + chandir = debugfs_create_dir("channels", pdev->dbgfs_root); + if (!chandir) + goto err_chandir; + + for (i = 0; i < pdev->nr_chans; i++) { + pdev->dbgfs_chan[i] = pxad_dbg_alloc_chan(pdev, i, chandir); + if (!pdev->dbgfs_chan[i]) + goto err_chans; + } + + return; +err_chans: +err_chandir: + kfree(pdev->dbgfs_chan); +err_alloc: +err_state: + debugfs_remove_recursive(pdev->dbgfs_root); +err_root: + pr_err("pxad: debugfs is not available\n"); +} + +static void pxad_cleanup_debugfs(struct pxad_device *pdev) +{ + debugfs_remove_recursive(pdev->dbgfs_root); +} +#else +static inline void pxad_init_debugfs(struct pxad_device *pdev) {} +static inline void pxad_cleanup_debugfs(struct pxad_device *pdev) {} +#endif + +/* + * In the transition phase where legacy pxa handling is done at the same time as + * mmp_dma, the DMA physical channel split between the 2 DMA providers is done + * through legacy_reserved. Legacy code reserves DMA channels by settings + * corresponding bits in legacy_reserved. + */ +static u32 legacy_reserved; +static u32 legacy_unavailable; + +static struct pxad_phy *lookup_phy(struct pxad_chan *pchan) +{ + int prio, i; + struct pxad_device *pdev = to_pxad_dev(pchan->vc.chan.device); + struct pxad_phy *phy, *found = NULL; + unsigned long flags; + + /* + * dma channel priorities + * ch 0 - 3, 16 - 19 <--> (0) + * ch 4 - 7, 20 - 23 <--> (1) + * ch 8 - 11, 24 - 27 <--> (2) + * ch 12 - 15, 28 - 31 <--> (3) + */ + + spin_lock_irqsave(&pdev->phy_lock, flags); + for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) { + for (i = 0; i < pdev->nr_chans; i++) { + if (prio != (i & 0xf) >> 2) + continue; + if ((i < 32) && (legacy_reserved & BIT(i))) + continue; + phy = &pdev->phys[i]; + if (!phy->vchan) { + phy->vchan = pchan; + found = phy; + if (i < 32) + legacy_unavailable |= BIT(i); + goto out_unlock; + } + } + } + +out_unlock: + spin_unlock_irqrestore(&pdev->phy_lock, flags); + dev_dbg(&pchan->vc.chan.dev->device, + "%s(): phy=%p(%d)\n", __func__, found, + found ? found->idx : -1); + + return found; +} + +static void pxad_free_phy(struct pxad_chan *chan) +{ + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + unsigned long flags; + u32 reg; + int i; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): freeing\n", __func__); + if (!chan->phy) + return; + + /* clear the channel mapping in DRCMR */ + reg = pxad_drcmr(chan->drcmr); + writel_relaxed(0, chan->phy->base + reg); + + spin_lock_irqsave(&pdev->phy_lock, flags); + for (i = 0; i < 32; i++) + if (chan->phy == &pdev->phys[i]) + legacy_unavailable &= ~BIT(i); + chan->phy->vchan = NULL; + chan->phy = NULL; + spin_unlock_irqrestore(&pdev->phy_lock, flags); +} + +static bool is_chan_running(struct pxad_chan *chan) +{ + u32 dcsr; + struct pxad_phy *phy = chan->phy; + + if (!phy) + return false; + dcsr = phy_readl_relaxed(phy, DCSR); + return dcsr & PXA_DCSR_RUN; +} + +static bool is_running_chan_misaligned(struct pxad_chan *chan) +{ + u32 dalgn; + + BUG_ON(!chan->phy); + dalgn = phy_readl_relaxed(chan->phy, DALGN); + return dalgn & (BIT(chan->phy->idx)); +} + +static void phy_enable(struct pxad_phy *phy, bool misaligned) +{ + u32 reg, dalgn; + + if (!phy->vchan) + return; + + dev_dbg(&phy->vchan->vc.chan.dev->device, + "%s(); phy=%p(%d) misaligned=%d\n", __func__, + phy, phy->idx, misaligned); + + reg = pxad_drcmr(phy->vchan->drcmr); + writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg); + + dalgn = phy_readl_relaxed(phy, DALGN); + if (misaligned) + dalgn |= BIT(phy->idx); + else + dalgn &= ~BIT(phy->idx); + phy_writel_relaxed(phy, dalgn, DALGN); + + phy_writel(phy, PXA_DCSR_STOPIRQEN | PXA_DCSR_ENDINTR | + PXA_DCSR_BUSERR | PXA_DCSR_RUN, DCSR); +} + +static void phy_disable(struct pxad_phy *phy) +{ + u32 dcsr; + + if (!phy) + return; + + dcsr = phy_readl_relaxed(phy, DCSR); + dev_dbg(&phy->vchan->vc.chan.dev->device, + "%s(): phy=%p(%d)\n", __func__, phy, phy->idx); + phy_writel(phy, dcsr & ~PXA_DCSR_RUN & ~PXA_DCSR_STOPIRQEN, DCSR); +} + +static void pxad_launch_chan(struct pxad_chan *chan, + struct pxad_desc_sw *desc) +{ + dev_dbg(&chan->vc.chan.dev->device, + "%s(): desc=%p\n", __func__, desc); + if (!chan->phy) { + chan->phy = lookup_phy(chan); + if (!chan->phy) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): no free dma channel\n", __func__); + return; + } + } + + /* + * Program the descriptor's address into the DMA controller, + * then start the DMA transaction + */ + phy_writel(chan->phy, desc->first, DDADR); + phy_enable(chan->phy, chan->misaligned); +} + +static void set_updater_desc(struct pxad_desc_sw *sw_desc, + unsigned long flags) +{ + struct pxad_desc_hw *updater = + sw_desc->hw_desc[sw_desc->nb_desc - 1]; + dma_addr_t dma = sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr; + + updater->ddadr = DDADR_STOP; + updater->dsadr = dma; + updater->dtadr = dma + 8; + updater->dcmd = PXA_DCMD_WIDTH4 | PXA_DCMD_BURST32 | + (PXA_DCMD_LENGTH & sizeof(u32)); + if (flags & DMA_PREP_INTERRUPT) + updater->dcmd |= PXA_DCMD_ENDIRQEN; +} + +static bool is_desc_completed(struct virt_dma_desc *vd) +{ + struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd); + struct pxad_desc_hw *updater = + sw_desc->hw_desc[sw_desc->nb_desc - 1]; + + return updater->dtadr != (updater->dsadr + 8); +} + +static void pxad_desc_chain(struct virt_dma_desc *vd1, + struct virt_dma_desc *vd2) +{ + struct pxad_desc_sw *desc1 = to_pxad_sw_desc(vd1); + struct pxad_desc_sw *desc2 = to_pxad_sw_desc(vd2); + dma_addr_t dma_to_chain; + + dma_to_chain = desc2->first; + desc1->hw_desc[desc1->nb_desc - 1]->ddadr = dma_to_chain; +} + +static bool pxad_try_hotchain(struct virt_dma_chan *vc, + struct virt_dma_desc *vd) +{ + struct virt_dma_desc *vd_last_issued = NULL; + struct pxad_chan *chan = to_pxad_chan(&vc->chan); + + /* + * Attempt to hot chain the tx if the phy is still running. This is + * considered successful only if either the channel is still running + * after the chaining, or if the chained transfer is completed after + * having been hot chained. + * A change of alignment is not allowed, and forbids hotchaining. + */ + if (is_chan_running(chan)) { + BUG_ON(list_empty(&vc->desc_issued)); + + if (!is_running_chan_misaligned(chan) && + to_pxad_sw_desc(vd)->misaligned) + return false; + + vd_last_issued = list_entry(vc->desc_issued.prev, + struct virt_dma_desc, node); + pxad_desc_chain(vd_last_issued, vd); + if (is_chan_running(chan) || is_desc_completed(vd_last_issued)) + return true; + } + + return false; +} + +static unsigned int clear_chan_irq(struct pxad_phy *phy) +{ + u32 dcsr; + u32 dint = readl(phy->base + DINT); + + if (!(dint & BIT(phy->idx))) + return PXA_DCSR_RUN; + + /* clear irq */ + dcsr = phy_readl_relaxed(phy, DCSR); + phy_writel(phy, dcsr, DCSR); + if ((dcsr & PXA_DCSR_BUSERR) && (phy->vchan)) + dev_warn(&phy->vchan->vc.chan.dev->device, + "%s(chan=%p): PXA_DCSR_BUSERR\n", + __func__, &phy->vchan); + + return dcsr & ~PXA_DCSR_RUN; +} + +static irqreturn_t pxad_chan_handler(int irq, void *dev_id) +{ + struct pxad_phy *phy = dev_id; + struct pxad_chan *chan = phy->vchan; + struct virt_dma_desc *vd, *tmp; + unsigned int dcsr; + unsigned long flags; + + BUG_ON(!chan); + + dcsr = clear_chan_irq(phy); + if (dcsr & PXA_DCSR_RUN) + return IRQ_NONE; + + spin_lock_irqsave(&chan->vc.lock, flags); + list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): checking txd %p[%x]: completed=%d\n", + __func__, vd, vd->tx.cookie, is_desc_completed(vd)); + if (is_desc_completed(vd)) { + list_del(&vd->node); + vchan_cookie_complete(vd); + } else { + break; + } + } + + if (dcsr & PXA_DCSR_STOPSTATE) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): channel stopped, submitted_empty=%d issued_empty=%d", + __func__, + list_empty(&chan->vc.desc_submitted), + list_empty(&chan->vc.desc_issued)); + phy_writel_relaxed(phy, dcsr & ~PXA_DCSR_STOPIRQEN, DCSR); + + if (list_empty(&chan->vc.desc_issued)) { + chan->misaligned = + !list_empty(&chan->vc.desc_submitted); + } else { + vd = list_first_entry(&chan->vc.desc_issued, + struct virt_dma_desc, node); + pxad_launch_chan(chan, to_pxad_sw_desc(vd)); + } + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + + return IRQ_HANDLED; +} + +static irqreturn_t pxad_int_handler(int irq, void *dev_id) +{ + struct pxad_device *pdev = dev_id; + struct pxad_phy *phy; + u32 dint = readl(pdev->base + DINT); + int i, ret = IRQ_NONE; + + while (dint) { + i = __ffs(dint); + dint &= (dint - 1); + phy = &pdev->phys[i]; + if ((i < 32) && (legacy_reserved & BIT(i))) + continue; + if (pxad_chan_handler(irq, phy) == IRQ_HANDLED) + ret = IRQ_HANDLED; + } + + return ret; +} + +static int pxad_alloc_chan_resources(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + + if (chan->desc_pool) + return 1; + + chan->desc_pool = dma_pool_create(dma_chan_name(dchan), + pdev->slave.dev, + sizeof(struct pxad_desc_hw), + __alignof__(struct pxad_desc_hw), + 0); + if (!chan->desc_pool) { + dev_err(&chan->vc.chan.dev->device, + "%s(): unable to allocate descriptor pool\n", + __func__); + return -ENOMEM; + } + + return 1; +} + +static void pxad_free_chan_resources(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + + vchan_free_chan_resources(&chan->vc); + dma_pool_destroy(chan->desc_pool); + chan->desc_pool = NULL; + +} + +static void pxad_free_desc(struct virt_dma_desc *vd) +{ + int i; + dma_addr_t dma; + struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd); + + BUG_ON(sw_desc->nb_desc == 0); + for (i = sw_desc->nb_desc - 1; i >= 0; i--) { + if (i > 0) + dma = sw_desc->hw_desc[i - 1]->ddadr; + else + dma = sw_desc->first; + dma_pool_free(sw_desc->desc_pool, + sw_desc->hw_desc[i], dma); + } + sw_desc->nb_desc = 0; + kfree(sw_desc); +} + +static struct pxad_desc_sw * +pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc) +{ + struct pxad_desc_sw *sw_desc; + dma_addr_t dma; + int i; + + sw_desc = kzalloc(sizeof(*sw_desc) + + nb_hw_desc * sizeof(struct pxad_desc_hw *), + GFP_NOWAIT); + if (!sw_desc) + return NULL; + sw_desc->desc_pool = chan->desc_pool; + + for (i = 0; i < nb_hw_desc; i++) { + sw_desc->hw_desc[i] = dma_pool_alloc(sw_desc->desc_pool, + GFP_NOWAIT, &dma); + if (!sw_desc->hw_desc[i]) { + dev_err(&chan->vc.chan.dev->device, + "%s(): Couldn't allocate the %dth hw_desc from dma_pool %p\n", + __func__, i, sw_desc->desc_pool); + goto err; + } + + if (i == 0) + sw_desc->first = dma; + else + sw_desc->hw_desc[i - 1]->ddadr = dma; + sw_desc->nb_desc++; + } + + return sw_desc; +err: + pxad_free_desc(&sw_desc->vd); + return NULL; +} + +static dma_cookie_t pxad_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct virt_dma_chan *vc = to_virt_chan(tx->chan); + struct pxad_chan *chan = to_pxad_chan(&vc->chan); + struct virt_dma_desc *vd_chained = NULL, + *vd = container_of(tx, struct virt_dma_desc, tx); + dma_cookie_t cookie; + unsigned long flags; + + set_updater_desc(to_pxad_sw_desc(vd), tx->flags); + + spin_lock_irqsave(&vc->lock, flags); + cookie = dma_cookie_assign(tx); + + if (list_empty(&vc->desc_submitted) && pxad_try_hotchain(vc, vd)) { + list_move_tail(&vd->node, &vc->desc_issued); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]: submitted (hot linked)\n", + __func__, vd, cookie); + goto out; + } + + /* + * Fallback to placing the tx in the submitted queue + */ + if (!list_empty(&vc->desc_submitted)) { + vd_chained = list_entry(vc->desc_submitted.prev, + struct virt_dma_desc, node); + /* + * Only chain the descriptors if no new misalignment is + * introduced. If a new misalignment is chained, let the channel + * stop, and be relaunched in misalign mode from the irq + * handler. + */ + if (chan->misaligned || !to_pxad_sw_desc(vd)->misaligned) + pxad_desc_chain(vd_chained, vd); + else + vd_chained = NULL; + } + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]: submitted (%s linked)\n", + __func__, vd, cookie, vd_chained ? "cold" : "not"); + list_move_tail(&vd->node, &vc->desc_submitted); + chan->misaligned |= to_pxad_sw_desc(vd)->misaligned; + +out: + spin_unlock_irqrestore(&vc->lock, flags); + return cookie; +} + +static void pxad_issue_pending(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct virt_dma_desc *vd_first; + unsigned long flags; + + spin_lock_irqsave(&chan->vc.lock, flags); + if (list_empty(&chan->vc.desc_submitted)) + goto out; + + vd_first = list_first_entry(&chan->vc.desc_submitted, + struct virt_dma_desc, node); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x]", __func__, vd_first, vd_first->tx.cookie); + + vchan_issue_pending(&chan->vc); + if (!pxad_try_hotchain(&chan->vc, vd_first)) + pxad_launch_chan(chan, to_pxad_sw_desc(vd_first)); +out: + spin_unlock_irqrestore(&chan->vc.lock, flags); +} + +static inline struct dma_async_tx_descriptor * +pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd, + unsigned long tx_flags) +{ + struct dma_async_tx_descriptor *tx; + struct pxad_chan *chan = container_of(vc, struct pxad_chan, vc); + + tx = vchan_tx_prep(vc, vd, tx_flags); + tx->tx_submit = pxad_tx_submit; + dev_dbg(&chan->vc.chan.dev->device, + "%s(): vc=%p txd=%p[%x] flags=0x%lx\n", __func__, + vc, vd, vd->tx.cookie, + tx_flags); + + return tx; +} + +static void pxad_get_config(struct pxad_chan *chan, + enum dma_transfer_direction dir, + u32 *dcmd, u32 *dev_src, u32 *dev_dst) +{ + u32 maxburst = 0, dev_addr = 0; + enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED; + + *dcmd = 0; + if (chan->cfg.direction == DMA_DEV_TO_MEM) { + maxburst = chan->cfg.src_maxburst; + width = chan->cfg.src_addr_width; + dev_addr = chan->cfg.src_addr; + *dev_src = dev_addr; + *dcmd |= PXA_DCMD_INCTRGADDR | PXA_DCMD_FLOWSRC; + } + if (chan->cfg.direction == DMA_MEM_TO_DEV) { + maxburst = chan->cfg.dst_maxburst; + width = chan->cfg.dst_addr_width; + dev_addr = chan->cfg.dst_addr; + *dev_dst = dev_addr; + *dcmd |= PXA_DCMD_INCSRCADDR | PXA_DCMD_FLOWTRG; + } + if (chan->cfg.direction == DMA_MEM_TO_MEM) + *dcmd |= PXA_DCMD_BURST32 | PXA_DCMD_INCTRGADDR | + PXA_DCMD_INCSRCADDR; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dev_addr=0x%x maxburst=%d width=%d dir=%d\n", + __func__, dev_addr, maxburst, width, dir); + + if (width == DMA_SLAVE_BUSWIDTH_1_BYTE) + *dcmd |= PXA_DCMD_WIDTH1; + else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES) + *dcmd |= PXA_DCMD_WIDTH2; + else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES) + *dcmd |= PXA_DCMD_WIDTH4; + + if (maxburst == 8) + *dcmd |= PXA_DCMD_BURST8; + else if (maxburst == 16) + *dcmd |= PXA_DCMD_BURST16; + else if (maxburst == 32) + *dcmd |= PXA_DCMD_BURST32; + + /* FIXME: drivers should be ported over to use the filter + * function. Once that's done, the following two lines can + * be removed. + */ + if (chan->cfg.slave_id) + chan->drcmr = chan->cfg.slave_id; +} + +static struct dma_async_tx_descriptor * +pxad_prep_memcpy(struct dma_chan *dchan, + dma_addr_t dma_dst, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + struct pxad_desc_hw *hw_desc; + u32 dcmd; + unsigned int i, nb_desc = 0; + size_t copy; + + if (!dchan || !len) + return NULL; + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dma_dst=0x%lx dma_src=0x%lx len=%zu flags=%lx\n", + __func__, (unsigned long)dma_dst, (unsigned long)dma_src, + len, flags); + pxad_get_config(chan, DMA_MEM_TO_MEM, &dcmd, NULL, NULL); + + nb_desc = DIV_ROUND_UP(len, PDMA_MAX_DESC_BYTES); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + sw_desc->len = len; + + if (!IS_ALIGNED(dma_src, 1 << PDMA_ALIGNMENT) || + !IS_ALIGNED(dma_dst, 1 << PDMA_ALIGNMENT)) + sw_desc->misaligned = true; + + i = 0; + do { + hw_desc = sw_desc->hw_desc[i++]; + copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES); + hw_desc->dcmd = dcmd | (PXA_DCMD_LENGTH & copy); + hw_desc->dsadr = dma_src; + hw_desc->dtadr = dma_dst; + len -= copy; + dma_src += copy; + dma_dst += copy; + } while (len); + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +pxad_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + size_t len, avail; + struct scatterlist *sg; + dma_addr_t dma; + u32 dcmd, dsadr = 0, dtadr = 0; + unsigned int nb_desc = 0, i, j = 0; + + if ((sgl == NULL) || (sg_len == 0)) + return NULL; + + pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): dir=%d flags=%lx\n", __func__, dir, flags); + + for_each_sg(sgl, sg, sg_len, i) + nb_desc += DIV_ROUND_UP(sg_dma_len(sg), PDMA_MAX_DESC_BYTES); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + dma = sg_dma_address(sg); + avail = sg_dma_len(sg); + sw_desc->len += avail; + + do { + len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES); + if (dma & 0x7) + sw_desc->misaligned = true; + + sw_desc->hw_desc[j]->dcmd = + dcmd | (PXA_DCMD_LENGTH & len); + sw_desc->hw_desc[j]->dsadr = dsadr ? dsadr : dma; + sw_desc->hw_desc[j++]->dtadr = dtadr ? dtadr : dma; + + dma += len; + avail -= len; + } while (avail); + } + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static struct dma_async_tx_descriptor * +pxad_prep_dma_cyclic(struct dma_chan *dchan, + dma_addr_t buf_addr, size_t len, size_t period_len, + enum dma_transfer_direction dir, unsigned long flags) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_desc_sw *sw_desc; + struct pxad_desc_hw **phw_desc; + dma_addr_t dma; + u32 dcmd, dsadr = 0, dtadr = 0; + unsigned int nb_desc = 0; + + if (!dchan || !len || !period_len) + return NULL; + if ((dir != DMA_DEV_TO_MEM) && (dir != DMA_MEM_TO_DEV)) { + dev_err(&chan->vc.chan.dev->device, + "Unsupported direction for cyclic DMA\n"); + return NULL; + } + /* the buffer length must be a multiple of period_len */ + if (len % period_len != 0 || period_len > PDMA_MAX_DESC_BYTES || + !IS_ALIGNED(period_len, 1 << PDMA_ALIGNMENT)) + return NULL; + + pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr); + dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH | period_len); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n", + __func__, (unsigned long)buf_addr, len, period_len, dir, flags); + + nb_desc = DIV_ROUND_UP(period_len, PDMA_MAX_DESC_BYTES); + nb_desc *= DIV_ROUND_UP(len, period_len); + sw_desc = pxad_alloc_desc(chan, nb_desc + 1); + if (!sw_desc) + return NULL; + sw_desc->cyclic = true; + sw_desc->len = len; + + phw_desc = sw_desc->hw_desc; + dma = buf_addr; + do { + phw_desc[0]->dsadr = dsadr ? dsadr : dma; + phw_desc[0]->dtadr = dtadr ? dtadr : dma; + phw_desc[0]->dcmd = dcmd; + phw_desc++; + dma += period_len; + len -= period_len; + } while (len); + set_updater_desc(sw_desc, flags); + + return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags); +} + +static int pxad_config(struct dma_chan *dchan, + struct dma_slave_config *cfg) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + + if (!dchan) + return -EINVAL; + + chan->cfg = *cfg; + return 0; +} + +static int pxad_terminate_all(struct dma_chan *dchan) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device); + struct virt_dma_desc *vd = NULL; + unsigned long flags; + struct pxad_phy *phy; + LIST_HEAD(head); + + dev_dbg(&chan->vc.chan.dev->device, + "%s(): vchan %p: terminate all\n", __func__, &chan->vc); + + spin_lock_irqsave(&chan->vc.lock, flags); + vchan_get_all_descriptors(&chan->vc, &head); + + list_for_each_entry(vd, &head, node) { + dev_dbg(&chan->vc.chan.dev->device, + "%s(): cancelling txd %p[%x] (completed=%d)", __func__, + vd, vd->tx.cookie, is_desc_completed(vd)); + } + + phy = chan->phy; + if (phy) { + phy_disable(chan->phy); + pxad_free_phy(chan); + chan->phy = NULL; + spin_lock(&pdev->phy_lock); + phy->vchan = NULL; + spin_unlock(&pdev->phy_lock); + } + spin_unlock_irqrestore(&chan->vc.lock, flags); + vchan_dma_desc_free_list(&chan->vc, &head); + + return 0; +} + +static unsigned int pxad_residue(struct pxad_chan *chan, + dma_cookie_t cookie) +{ + struct virt_dma_desc *vd = NULL; + struct pxad_desc_sw *sw_desc = NULL; + struct pxad_desc_hw *hw_desc = NULL; + u32 curr, start, len, end, residue = 0; + unsigned long flags; + bool passed = false; + int i; + + /* + * If the channel does not have a phy pointer anymore, it has already + * been completed. Therefore, its residue is 0. + */ + if (!chan->phy) + return 0; + + spin_lock_irqsave(&chan->vc.lock, flags); + + vd = vchan_find_desc(&chan->vc, cookie); + if (!vd) + goto out; + + sw_desc = to_pxad_sw_desc(vd); + if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR) + curr = phy_readl_relaxed(chan->phy, DSADR); + else + curr = phy_readl_relaxed(chan->phy, DTADR); + + for (i = 0; i < sw_desc->nb_desc - 1; i++) { + hw_desc = sw_desc->hw_desc[i]; + if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR) + start = hw_desc->dsadr; + else + start = hw_desc->dtadr; + len = hw_desc->dcmd & PXA_DCMD_LENGTH; + end = start + len; + + /* + * 'passed' will be latched once we found the descriptor + * which lies inside the boundaries of the curr + * pointer. All descriptors that occur in the list + * _after_ we found that partially handled descriptor + * are still to be processed and are hence added to the + * residual bytes counter. + */ + + if (passed) { + residue += len; + } else if (curr >= start && curr <= end) { + residue += end - curr; + passed = true; + } + } + if (!passed) + residue = sw_desc->len; + +out: + spin_unlock_irqrestore(&chan->vc.lock, flags); + dev_dbg(&chan->vc.chan.dev->device, + "%s(): txd %p[%x] sw_desc=%p: %d\n", + __func__, vd, cookie, sw_desc, residue); + return residue; +} + +static enum dma_status pxad_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, + struct dma_tx_state *txstate) +{ + struct pxad_chan *chan = to_pxad_chan(dchan); + enum dma_status ret; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (likely(txstate && (ret != DMA_ERROR))) + dma_set_residue(txstate, pxad_residue(chan, cookie)); + + return ret; +} + +static void pxad_free_channels(struct dma_device *dmadev) +{ + struct pxad_chan *c, *cn; + + list_for_each_entry_safe(c, cn, &dmadev->channels, + vc.chan.device_node) { + list_del(&c->vc.chan.device_node); + tasklet_kill(&c->vc.task); + } +} + +static int pxad_remove(struct platform_device *op) +{ + struct pxad_device *pdev = platform_get_drvdata(op); + + pxad_cleanup_debugfs(pdev); + pxad_free_channels(&pdev->slave); + dma_async_device_unregister(&pdev->slave); + return 0; +} + +static int pxad_init_phys(struct platform_device *op, + struct pxad_device *pdev, + unsigned int nb_phy_chans) +{ + int irq0, irq, nr_irq = 0, i, ret; + struct pxad_phy *phy; + + irq0 = platform_get_irq(op, 0); + if (irq0 < 0) + return irq0; + + pdev->phys = devm_kcalloc(&op->dev, nb_phy_chans, + sizeof(pdev->phys[0]), GFP_KERNEL); + if (!pdev->phys) + return -ENOMEM; + + for (i = 0; i < nb_phy_chans; i++) + if (platform_get_irq(op, i) > 0) + nr_irq++; + + for (i = 0; i < nb_phy_chans; i++) { + phy = &pdev->phys[i]; + phy->base = pdev->base; + phy->idx = i; + irq = platform_get_irq(op, i); + if ((nr_irq > 1) && (irq > 0)) + ret = devm_request_irq(&op->dev, irq, + pxad_chan_handler, + IRQF_SHARED, "pxa-dma", phy); + if ((nr_irq == 1) && (i == 0)) + ret = devm_request_irq(&op->dev, irq0, + pxad_int_handler, + IRQF_SHARED, "pxa-dma", pdev); + if (ret) { + dev_err(pdev->slave.dev, + "%s(): can't request irq %d:%d\n", __func__, + irq, ret); + return ret; + } + } + + return 0; +} + +static const struct of_device_id const pxad_dt_ids[] = { + { .compatible = "marvell,pdma-1.0", }, + {} +}; +MODULE_DEVICE_TABLE(of, pxad_dt_ids); + +static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct pxad_device *d = ofdma->of_dma_data; + struct dma_chan *chan; + + chan = dma_get_any_slave_channel(&d->slave); + if (!chan) + return NULL; + + to_pxad_chan(chan)->drcmr = dma_spec->args[0]; + to_pxad_chan(chan)->prio = dma_spec->args[1]; + + return chan; +} + +static int pxad_init_dmadev(struct platform_device *op, + struct pxad_device *pdev, + unsigned int nr_phy_chans) +{ + int ret; + unsigned int i; + struct pxad_chan *c; + + pdev->nr_chans = nr_phy_chans; + INIT_LIST_HEAD(&pdev->slave.channels); + pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources; + pdev->slave.device_free_chan_resources = pxad_free_chan_resources; + pdev->slave.device_tx_status = pxad_tx_status; + pdev->slave.device_issue_pending = pxad_issue_pending; + pdev->slave.device_config = pxad_config; + pdev->slave.device_terminate_all = pxad_terminate_all; + + if (op->dev.coherent_dma_mask) + dma_set_mask(&op->dev, op->dev.coherent_dma_mask); + else + dma_set_mask(&op->dev, DMA_BIT_MASK(32)); + + ret = pxad_init_phys(op, pdev, nr_phy_chans); + if (ret) + return ret; + + for (i = 0; i < nr_phy_chans; i++) { + c = devm_kzalloc(&op->dev, sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + c->vc.desc_free = pxad_free_desc; + vchan_init(&c->vc, &pdev->slave); + } + + return dma_async_device_register(&pdev->slave); +} + +static int pxad_probe(struct platform_device *op) +{ + struct pxad_device *pdev; + const struct of_device_id *of_id; + struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev); + struct resource *iores; + int ret, dma_channels = 0; + const enum dma_slave_buswidth widths = + DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES | + DMA_SLAVE_BUSWIDTH_4_BYTES; + + pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + + spin_lock_init(&pdev->phy_lock); + + iores = platform_get_resource(op, IORESOURCE_MEM, 0); + pdev->base = devm_ioremap_resource(&op->dev, iores); + if (IS_ERR(pdev->base)) + return PTR_ERR(pdev->base); + + of_id = of_match_device(pxad_dt_ids, &op->dev); + if (of_id) + of_property_read_u32(op->dev.of_node, "#dma-channels", + &dma_channels); + else if (pdata && pdata->dma_channels) + dma_channels = pdata->dma_channels; + else + dma_channels = 32; /* default 32 channel */ + + dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, pdev->slave.cap_mask); + dma_cap_set(DMA_PRIVATE, pdev->slave.cap_mask); + pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy; + pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg; + pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic; + + pdev->slave.copy_align = PDMA_ALIGNMENT; + pdev->slave.src_addr_widths = widths; + pdev->slave.dst_addr_widths = widths; + pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + + pdev->slave.dev = &op->dev; + ret = pxad_init_dmadev(op, pdev, dma_channels); + if (ret) { + dev_err(pdev->slave.dev, "unable to register\n"); + return ret; + } + + if (op->dev.of_node) { + /* Device-tree DMA controller registration */ + ret = of_dma_controller_register(op->dev.of_node, + pxad_dma_xlate, pdev); + if (ret < 0) { + dev_err(pdev->slave.dev, + "of_dma_controller_register failed\n"); + return ret; + } + } + + platform_set_drvdata(op, pdev); + pxad_init_debugfs(pdev); + dev_info(pdev->slave.dev, "initialized %d channels\n", dma_channels); + return 0; +} + +static const struct platform_device_id pxad_id_table[] = { + { "pxa-dma", }, + { }, +}; + +static struct platform_driver pxad_driver = { + .driver = { + .name = "pxa-dma", + .of_match_table = pxad_dt_ids, + }, + .id_table = pxad_id_table, + .probe = pxad_probe, + .remove = pxad_remove, +}; + +bool pxad_filter_fn(struct dma_chan *chan, void *param) +{ + struct pxad_chan *c = to_pxad_chan(chan); + struct pxad_param *p = param; + + if (chan->device->dev->driver != &pxad_driver.driver) + return false; + + c->drcmr = p->drcmr; + c->prio = p->prio; + + return true; +} +EXPORT_SYMBOL_GPL(pxad_filter_fn); + +int pxad_toggle_reserved_channel(int legacy_channel) +{ + if (legacy_unavailable & (BIT(legacy_channel))) + return -EBUSY; + legacy_reserved ^= BIT(legacy_channel); + return 0; +} +EXPORT_SYMBOL_GPL(pxad_toggle_reserved_channel); + +module_platform_driver(pxad_driver); + +MODULE_DESCRIPTION("Marvell PXA Peripheral DMA Driver"); +MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c index 01dcaf21b988..17ccdfd28f37 100644 --- a/drivers/dma/s3c24xx-dma.c +++ b/drivers/dma/s3c24xx-dma.c @@ -1168,7 +1168,7 @@ static struct soc_data soc_s3c2443 = { .has_clocks = true, }; -static struct platform_device_id s3c24xx_dma_driver_ids[] = { +static const struct platform_device_id s3c24xx_dma_driver_ids[] = { { .name = "s3c2410-dma", .driver_data = (kernel_ulong_t)&soc_s3c2410, diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index e0302c784ba4..7820d07e7bee 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -183,7 +183,7 @@ struct rcar_dmac { unsigned int n_channels; struct rcar_dmac_chan *channels; - unsigned long modules[256 / BITS_PER_LONG]; + DECLARE_BITMAP(modules, 256); }; #define to_rcar_dmac(d) container_of(d, struct rcar_dmac, engine) diff --git a/drivers/dma/sh/shdma-r8a73a4.c b/drivers/dma/sh/shdma-r8a73a4.c index 4fb99970a3ea..96ea3828c3eb 100644 --- a/drivers/dma/sh/shdma-r8a73a4.c +++ b/drivers/dma/sh/shdma-r8a73a4.c @@ -11,7 +11,7 @@ #include "shdma-arm.h" -const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT; +static const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT; static const struct sh_dmae_slave_config dma_slaves[] = { { diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c index a1afda43b8ef..8c5186cc9f63 100644 --- a/drivers/dma/sirf-dma.c +++ b/drivers/dma/sirf-dma.c @@ -23,8 +23,13 @@ #include "dmaengine.h" +#define SIRFSOC_DMA_VER_A7V1 1 +#define SIRFSOC_DMA_VER_A7V2 2 +#define SIRFSOC_DMA_VER_A6 4 + #define SIRFSOC_DMA_DESCRIPTORS 16 #define SIRFSOC_DMA_CHANNELS 16 +#define SIRFSOC_DMA_TABLE_NUM 256 #define SIRFSOC_DMA_CH_ADDR 0x00 #define SIRFSOC_DMA_CH_XLEN 0x04 @@ -35,15 +40,44 @@ #define SIRFSOC_DMA_CH_VALID 0x140 #define SIRFSOC_DMA_CH_INT 0x144 #define SIRFSOC_DMA_INT_EN 0x148 -#define SIRFSOC_DMA_INT_EN_CLR 0x14C +#define SIRFSOC_DMA_INT_EN_CLR 0x14C #define SIRFSOC_DMA_CH_LOOP_CTRL 0x150 -#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR 0x15C +#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR 0x154 +#define SIRFSOC_DMA_WIDTH_ATLAS7 0x10 +#define SIRFSOC_DMA_VALID_ATLAS7 0x14 +#define SIRFSOC_DMA_INT_ATLAS7 0x18 +#define SIRFSOC_DMA_INT_EN_ATLAS7 0x1c +#define SIRFSOC_DMA_LOOP_CTRL_ATLAS7 0x20 +#define SIRFSOC_DMA_CUR_DATA_ADDR 0x34 +#define SIRFSOC_DMA_MUL_ATLAS7 0x38 +#define SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7 0x158 +#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7 0x15C +#define SIRFSOC_DMA_IOBG_SCMD_EN 0x800 +#define SIRFSOC_DMA_EARLY_RESP_SET 0x818 +#define SIRFSOC_DMA_EARLY_RESP_CLR 0x81C #define SIRFSOC_DMA_MODE_CTRL_BIT 4 #define SIRFSOC_DMA_DIR_CTRL_BIT 5 +#define SIRFSOC_DMA_MODE_CTRL_BIT_ATLAS7 2 +#define SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7 3 +#define SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7 4 +#define SIRFSOC_DMA_TAB_NUM_ATLAS7 7 +#define SIRFSOC_DMA_CHAIN_INT_BIT_ATLAS7 5 +#define SIRFSOC_DMA_CHAIN_FLAG_SHIFT_ATLAS7 25 +#define SIRFSOC_DMA_CHAIN_ADDR_SHIFT 32 + +#define SIRFSOC_DMA_INT_FINI_INT_ATLAS7 BIT(0) +#define SIRFSOC_DMA_INT_CNT_INT_ATLAS7 BIT(1) +#define SIRFSOC_DMA_INT_PAU_INT_ATLAS7 BIT(2) +#define SIRFSOC_DMA_INT_LOOP_INT_ATLAS7 BIT(3) +#define SIRFSOC_DMA_INT_INV_INT_ATLAS7 BIT(4) +#define SIRFSOC_DMA_INT_END_INT_ATLAS7 BIT(5) +#define SIRFSOC_DMA_INT_ALL_ATLAS7 0x3F /* xlen and dma_width register is in 4 bytes boundary */ #define SIRFSOC_DMA_WORD_LEN 4 +#define SIRFSOC_DMA_XLEN_MAX_V1 0x800 +#define SIRFSOC_DMA_XLEN_MAX_V2 0x1000 struct sirfsoc_dma_desc { struct dma_async_tx_descriptor desc; @@ -56,7 +90,9 @@ struct sirfsoc_dma_desc { int width; /* DMA width */ int dir; bool cyclic; /* is loop DMA? */ + bool chain; /* is chain DMA? */ u32 addr; /* DMA buffer address */ + u64 chain_table[SIRFSOC_DMA_TABLE_NUM]; /* chain tbl */ }; struct sirfsoc_dma_chan { @@ -87,10 +123,25 @@ struct sirfsoc_dma { void __iomem *base; int irq; struct clk *clk; - bool is_marco; + int type; + void (*exec_desc)(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base); struct sirfsoc_dma_regs regs_save; }; +struct sirfsoc_dmadata { + void (*exec)(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base); + int type; +}; + +enum sirfsoc_dma_chain_flag { + SIRFSOC_DMA_CHAIN_NORMAL = 0x01, + SIRFSOC_DMA_CHAIN_PAUSE = 0x02, + SIRFSOC_DMA_CHAIN_LOOP = 0x03, + SIRFSOC_DMA_CHAIN_END = 0x04 +}; + #define DRV_NAME "sirfsoc_dma" static int sirfsoc_dma_runtime_suspend(struct device *dev); @@ -109,48 +160,105 @@ static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c) return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]); } +static void sirfsoc_dma_execute_hw_a7v2(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base) +{ + if (sdesc->chain) { + /* DMA v2 HW chain mode */ + writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) | + (sdesc->chain << + SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) | + (0x8 << SIRFSOC_DMA_TAB_NUM_ATLAS7) | 0x3, + base + SIRFSOC_DMA_CH_CTRL); + } else { + /* DMA v2 legacy mode */ + writel_relaxed(sdesc->xlen, base + SIRFSOC_DMA_CH_XLEN); + writel_relaxed(sdesc->ylen, base + SIRFSOC_DMA_CH_YLEN); + writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_ATLAS7); + writel_relaxed((sdesc->width*((sdesc->ylen+1)>>1)), + base + SIRFSOC_DMA_MUL_ATLAS7); + writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) | + (sdesc->chain << + SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) | + 0x3, base + SIRFSOC_DMA_CH_CTRL); + } + writel_relaxed(sdesc->chain ? SIRFSOC_DMA_INT_END_INT_ATLAS7 : + (SIRFSOC_DMA_INT_FINI_INT_ATLAS7 | + SIRFSOC_DMA_INT_LOOP_INT_ATLAS7), + base + SIRFSOC_DMA_INT_EN_ATLAS7); + writel(sdesc->addr, base + SIRFSOC_DMA_CH_ADDR); + if (sdesc->cyclic) + writel(0x10001, base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); +} + +static void sirfsoc_dma_execute_hw_a7v1(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base) +{ + writel_relaxed(1, base + SIRFSOC_DMA_IOBG_SCMD_EN); + writel_relaxed((1 << cid), base + SIRFSOC_DMA_EARLY_RESP_SET); + writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4); + writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) | + (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT), + base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL); + writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN); + writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN); + writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) | + (1 << cid), base + SIRFSOC_DMA_INT_EN); + writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR); + if (sdesc->cyclic) { + writel((1 << cid) | 1 << (cid + 16) | + readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7), + base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7); + } + +} + +static void sirfsoc_dma_execute_hw_a6(struct sirfsoc_dma_desc *sdesc, + int cid, int burst_mode, void __iomem *base) +{ + writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4); + writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) | + (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT), + base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL); + writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN); + writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN); + writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) | + (1 << cid), base + SIRFSOC_DMA_INT_EN); + writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR); + if (sdesc->cyclic) { + writel((1 << cid) | 1 << (cid + 16) | + readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL), + base + SIRFSOC_DMA_CH_LOOP_CTRL); + } + +} + /* Execute all queued DMA descriptors */ static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan) { struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan); int cid = schan->chan.chan_id; struct sirfsoc_dma_desc *sdesc = NULL; + void __iomem *base; /* * lock has been held by functions calling this, so we don't hold * lock again */ - + base = sdma->base; sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc, - node); + node); /* Move the first queued descriptor to active list */ list_move_tail(&sdesc->node, &schan->active); - /* Start the DMA transfer */ - writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 + - cid * 4); - writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) | - (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT), - sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL); - writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 + - SIRFSOC_DMA_CH_XLEN); - writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 + - SIRFSOC_DMA_CH_YLEN); - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) | - (1 << cid), sdma->base + SIRFSOC_DMA_INT_EN); + if (sdma->type == SIRFSOC_DMA_VER_A7V2) + cid = 0; - /* - * writel has an implict memory write barrier to make sure data is - * flushed into memory before starting DMA - */ - writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR); + /* Start the DMA transfer */ + sdma->exec_desc(sdesc, cid, schan->mode, base); - if (sdesc->cyclic) { - writel((1 << cid) | 1 << (cid + 16) | - readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + if (sdesc->cyclic) schan->happened_cyclic = schan->completed_cyclic = 0; - } } /* Interrupt handler */ @@ -160,27 +268,65 @@ static irqreturn_t sirfsoc_dma_irq(int irq, void *data) struct sirfsoc_dma_chan *schan; struct sirfsoc_dma_desc *sdesc = NULL; u32 is; + bool chain; int ch; + void __iomem *reg; + + switch (sdma->type) { + case SIRFSOC_DMA_VER_A6: + case SIRFSOC_DMA_VER_A7V1: + is = readl(sdma->base + SIRFSOC_DMA_CH_INT); + reg = sdma->base + SIRFSOC_DMA_CH_INT; + while ((ch = fls(is) - 1) >= 0) { + is &= ~(1 << ch); + writel_relaxed(1 << ch, reg); + schan = &sdma->channels[ch]; + spin_lock(&schan->lock); + sdesc = list_first_entry(&schan->active, + struct sirfsoc_dma_desc, node); + if (!sdesc->cyclic) { + /* Execute queued descriptors */ + list_splice_tail_init(&schan->active, + &schan->completed); + dma_cookie_complete(&sdesc->desc); + if (!list_empty(&schan->queued)) + sirfsoc_dma_execute(schan); + } else + schan->happened_cyclic++; + spin_unlock(&schan->lock); + } + break; - is = readl(sdma->base + SIRFSOC_DMA_CH_INT); - while ((ch = fls(is) - 1) >= 0) { - is &= ~(1 << ch); - writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT); - schan = &sdma->channels[ch]; + case SIRFSOC_DMA_VER_A7V2: + is = readl(sdma->base + SIRFSOC_DMA_INT_ATLAS7); + reg = sdma->base + SIRFSOC_DMA_INT_ATLAS7; + writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7, reg); + schan = &sdma->channels[0]; spin_lock(&schan->lock); - - sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, - node); + sdesc = list_first_entry(&schan->active, + struct sirfsoc_dma_desc, node); if (!sdesc->cyclic) { - /* Execute queued descriptors */ - list_splice_tail_init(&schan->active, &schan->completed); - if (!list_empty(&schan->queued)) - sirfsoc_dma_execute(schan); - } else + chain = sdesc->chain; + if ((chain && (is & SIRFSOC_DMA_INT_END_INT_ATLAS7)) || + (!chain && + (is & SIRFSOC_DMA_INT_FINI_INT_ATLAS7))) { + /* Execute queued descriptors */ + list_splice_tail_init(&schan->active, + &schan->completed); + dma_cookie_complete(&sdesc->desc); + if (!list_empty(&schan->queued)) + sirfsoc_dma_execute(schan); + } + } else if (sdesc->cyclic && (is & + SIRFSOC_DMA_INT_LOOP_INT_ATLAS7)) schan->happened_cyclic++; spin_unlock(&schan->lock); + break; + + default: + break; } /* Schedule tasklet */ @@ -227,16 +373,15 @@ static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma) schan->chan.completed_cookie = last_cookie; spin_unlock_irqrestore(&schan->lock, flags); } else { - /* for cyclic channel, desc is always in active list */ - sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, - node); - - if (!sdesc || (sdesc && !sdesc->cyclic)) { - /* without active cyclic DMA */ + if (list_empty(&schan->active)) { spin_unlock_irqrestore(&schan->lock, flags); continue; } + /* for cyclic channel, desc is always in active list */ + sdesc = list_first_entry(&schan->active, + struct sirfsoc_dma_desc, node); + /* cyclic DMA */ happened_cyclic = schan->happened_cyclic; spin_unlock_irqrestore(&schan->lock, flags); @@ -307,20 +452,32 @@ static int sirfsoc_dma_terminate_all(struct dma_chan *chan) spin_lock_irqsave(&schan->lock, flags); - if (!sdma->is_marco) { - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) & - ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN); - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL) - & ~((1 << cid) | 1 << (cid + 16)), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); - } else { + switch (sdma->type) { + case SIRFSOC_DMA_VER_A7V1: writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_INT_EN_CLR); writel_relaxed((1 << cid) | 1 << (cid + 16), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR); + sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7); + writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID); + break; + case SIRFSOC_DMA_VER_A7V2: + writel_relaxed(0, sdma->base + SIRFSOC_DMA_INT_EN_ATLAS7); + writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); + writel_relaxed(0, sdma->base + SIRFSOC_DMA_VALID_ATLAS7); + break; + case SIRFSOC_DMA_VER_A6: + writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) & + ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN); + writel_relaxed(readl_relaxed(sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL) & + ~((1 << cid) | 1 << (cid + 16)), + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID); + break; + default: + break; } - writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID); - list_splice_tail_init(&schan->active, &schan->free); list_splice_tail_init(&schan->queued, &schan->free); @@ -338,13 +495,25 @@ static int sirfsoc_dma_pause_chan(struct dma_chan *chan) spin_lock_irqsave(&schan->lock, flags); - if (!sdma->is_marco) - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL) - & ~((1 << cid) | 1 << (cid + 16)), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); - else + switch (sdma->type) { + case SIRFSOC_DMA_VER_A7V1: writel_relaxed((1 << cid) | 1 << (cid + 16), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR); + sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7); + break; + case SIRFSOC_DMA_VER_A7V2: + writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); + break; + case SIRFSOC_DMA_VER_A6: + writel_relaxed(readl_relaxed(sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL) & + ~((1 << cid) | 1 << (cid + 16)), + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + break; + + default: + break; + } spin_unlock_irqrestore(&schan->lock, flags); @@ -359,14 +528,25 @@ static int sirfsoc_dma_resume_chan(struct dma_chan *chan) unsigned long flags; spin_lock_irqsave(&schan->lock, flags); - - if (!sdma->is_marco) - writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL) - | ((1 << cid) | 1 << (cid + 16)), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); - else + switch (sdma->type) { + case SIRFSOC_DMA_VER_A7V1: writel_relaxed((1 << cid) | 1 << (cid + 16), - sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7); + break; + case SIRFSOC_DMA_VER_A7V2: + writel_relaxed(0x10001, + sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7); + break; + case SIRFSOC_DMA_VER_A6: + writel_relaxed(readl_relaxed(sdma->base + + SIRFSOC_DMA_CH_LOOP_CTRL) | + ((1 << cid) | 1 << (cid + 16)), + sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL); + break; + + default: + break; + } spin_unlock_irqrestore(&schan->lock, flags); @@ -473,14 +653,31 @@ sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie, spin_lock_irqsave(&schan->lock, flags); - sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, - node); - dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) * - (sdesc->width * SIRFSOC_DMA_WORD_LEN); + if (list_empty(&schan->active)) { + ret = dma_cookie_status(chan, cookie, txstate); + dma_set_residue(txstate, 0); + spin_unlock_irqrestore(&schan->lock, flags); + return ret; + } + sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, node); + if (sdesc->cyclic) + dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) * + (sdesc->width * SIRFSOC_DMA_WORD_LEN); + else + dma_request_bytes = sdesc->xlen * SIRFSOC_DMA_WORD_LEN; ret = dma_cookie_status(chan, cookie, txstate); - dma_pos = readl_relaxed(sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) - << 2; + + if (sdma->type == SIRFSOC_DMA_VER_A7V2) + cid = 0; + + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + dma_pos = readl_relaxed(sdma->base + SIRFSOC_DMA_CUR_DATA_ADDR); + } else { + dma_pos = readl_relaxed( + sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) << 2; + } + residue = dma_request_bytes - (dma_pos - sdesc->addr); dma_set_residue(txstate, residue); @@ -647,6 +844,7 @@ static int sirfsoc_dma_probe(struct platform_device *op) struct dma_device *dma; struct sirfsoc_dma *sdma; struct sirfsoc_dma_chan *schan; + struct sirfsoc_dmadata *data; struct resource res; ulong regs_start, regs_size; u32 id; @@ -657,9 +855,11 @@ static int sirfsoc_dma_probe(struct platform_device *op) dev_err(dev, "Memory exhausted!\n"); return -ENOMEM; } - - if (of_device_is_compatible(dn, "sirf,marco-dmac")) - sdma->is_marco = true; + data = (struct sirfsoc_dmadata *) + (of_match_device(op->dev.driver->of_match_table, + &op->dev)->data); + sdma->exec_desc = data->exec; + sdma->type = data->type; if (of_property_read_u32(dn, "cell-index", &id)) { dev_err(dev, "Fail to get DMAC index\n"); @@ -816,6 +1016,8 @@ static int sirfsoc_dma_pm_suspend(struct device *dev) struct sirfsoc_dma_chan *schan; int ch; int ret; + int count; + u32 int_offset; /* * if we were runtime-suspended before, resume to enable clock @@ -827,11 +1029,19 @@ static int sirfsoc_dma_pm_suspend(struct device *dev) return ret; } + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + count = 1; + int_offset = SIRFSOC_DMA_INT_EN_ATLAS7; + } else { + count = SIRFSOC_DMA_CHANNELS; + int_offset = SIRFSOC_DMA_INT_EN; + } + /* * DMA controller will lose all registers while suspending * so we need to save registers for active channels */ - for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) { + for (ch = 0; ch < count; ch++) { schan = &sdma->channels[ch]; if (list_empty(&schan->active)) continue; @@ -841,7 +1051,7 @@ static int sirfsoc_dma_pm_suspend(struct device *dev) save->ctrl[ch] = readl_relaxed(sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL); } - save->interrupt_en = readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN); + save->interrupt_en = readl_relaxed(sdma->base + int_offset); /* Disable clock */ sirfsoc_dma_runtime_suspend(dev); @@ -857,14 +1067,27 @@ static int sirfsoc_dma_pm_resume(struct device *dev) struct sirfsoc_dma_chan *schan; int ch; int ret; + int count; + u32 int_offset; + u32 width_offset; /* Enable clock before accessing register */ ret = sirfsoc_dma_runtime_resume(dev); if (ret < 0) return ret; - writel_relaxed(save->interrupt_en, sdma->base + SIRFSOC_DMA_INT_EN); - for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) { + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + count = 1; + int_offset = SIRFSOC_DMA_INT_EN_ATLAS7; + width_offset = SIRFSOC_DMA_WIDTH_ATLAS7; + } else { + count = SIRFSOC_DMA_CHANNELS; + int_offset = SIRFSOC_DMA_INT_EN; + width_offset = SIRFSOC_DMA_WIDTH_0; + } + + writel_relaxed(save->interrupt_en, sdma->base + int_offset); + for (ch = 0; ch < count; ch++) { schan = &sdma->channels[ch]; if (list_empty(&schan->active)) continue; @@ -872,15 +1095,21 @@ static int sirfsoc_dma_pm_resume(struct device *dev) struct sirfsoc_dma_desc, node); writel_relaxed(sdesc->width, - sdma->base + SIRFSOC_DMA_WIDTH_0 + ch * 4); + sdma->base + width_offset + ch * 4); writel_relaxed(sdesc->xlen, sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_XLEN); writel_relaxed(sdesc->ylen, sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_YLEN); writel_relaxed(save->ctrl[ch], sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL); - writel_relaxed(sdesc->addr >> 2, - sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR); + if (sdma->type == SIRFSOC_DMA_VER_A7V2) { + writel_relaxed(sdesc->addr, + sdma->base + SIRFSOC_DMA_CH_ADDR); + } else { + writel_relaxed(sdesc->addr >> 2, + sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR); + + } } /* if we were runtime-suspended before, suspend again */ @@ -896,9 +1125,25 @@ static const struct dev_pm_ops sirfsoc_dma_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume) }; +struct sirfsoc_dmadata sirfsoc_dmadata_a6 = { + .exec = sirfsoc_dma_execute_hw_a6, + .type = SIRFSOC_DMA_VER_A6, +}; + +struct sirfsoc_dmadata sirfsoc_dmadata_a7v1 = { + .exec = sirfsoc_dma_execute_hw_a7v1, + .type = SIRFSOC_DMA_VER_A7V1, +}; + +struct sirfsoc_dmadata sirfsoc_dmadata_a7v2 = { + .exec = sirfsoc_dma_execute_hw_a7v2, + .type = SIRFSOC_DMA_VER_A7V2, +}; + static const struct of_device_id sirfsoc_dma_match[] = { - { .compatible = "sirf,prima2-dmac", }, - { .compatible = "sirf,marco-dmac", }, + { .compatible = "sirf,prima2-dmac", .data = &sirfsoc_dmadata_a6,}, + { .compatible = "sirf,atlas7-dmac", .data = &sirfsoc_dmadata_a7v1,}, + { .compatible = "sirf,atlas7-dmac-v2", .data = &sirfsoc_dmadata_a7v2,}, {}, }; @@ -925,7 +1170,7 @@ static void __exit sirfsoc_dma_exit(void) subsys_initcall(sirfsoc_dma_init); module_exit(sirfsoc_dma_exit); -MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, " - "Barry Song <baohua.song@csr.com>"); +MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>"); +MODULE_AUTHOR("Barry Song <baohua.song@csr.com>"); MODULE_DESCRIPTION("SIRFSOC DMA control driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 11e536586812..842ff97c2cfb 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -891,9 +891,21 @@ static struct sun6i_dma_config sun8i_a23_dma_cfg = { .nr_max_vchans = 37, }; +/* + * The H3 has 12 physical channels, a maximum DRQ port id of 27, + * and a total of 34 usable source and destination endpoints. + */ + +static struct sun6i_dma_config sun8i_h3_dma_cfg = { + .nr_max_channels = 12, + .nr_max_requests = 27, + .nr_max_vchans = 34, +}; + static const struct of_device_id sun6i_dma_match[] = { { .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg }, { .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg }, + { .compatible = "allwinner,sun8i-h3-dma", .data = &sun8i_h3_dma_cfg }, { /* sentinel */ } }; diff --git a/drivers/dma/ti-dma-crossbar.c b/drivers/dma/ti-dma-crossbar.c new file mode 100644 index 000000000000..24f5ca2356bf --- /dev/null +++ b/drivers/dma/ti-dma-crossbar.c @@ -0,0 +1,188 @@ +/* + * Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com + * Author: Peter Ujfalusi <peter.ujfalusi@ti.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/io.h> +#include <linux/idr.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> + +#define TI_XBAR_OUTPUTS 127 +#define TI_XBAR_INPUTS 256 + +static DEFINE_IDR(map_idr); + +struct ti_dma_xbar_data { + void __iomem *iomem; + + struct dma_router dmarouter; + + u16 safe_val; /* Value to rest the crossbar lines */ + u32 xbar_requests; /* number of DMA requests connected to XBAR */ + u32 dma_requests; /* number of DMA requests forwarded to DMA */ +}; + +struct ti_dma_xbar_map { + u16 xbar_in; + int xbar_out; +}; + +static inline void ti_dma_xbar_write(void __iomem *iomem, int xbar, u16 val) +{ + writew_relaxed(val, iomem + (xbar * 2)); +} + +static void ti_dma_xbar_free(struct device *dev, void *route_data) +{ + struct ti_dma_xbar_data *xbar = dev_get_drvdata(dev); + struct ti_dma_xbar_map *map = route_data; + + dev_dbg(dev, "Unmapping XBAR%u (was routed to %d)\n", + map->xbar_in, map->xbar_out); + + ti_dma_xbar_write(xbar->iomem, map->xbar_out, xbar->safe_val); + idr_remove(&map_idr, map->xbar_out); + kfree(map); +} + +static void *ti_dma_xbar_route_allocate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct platform_device *pdev = of_find_device_by_node(ofdma->of_node); + struct ti_dma_xbar_data *xbar = platform_get_drvdata(pdev); + struct ti_dma_xbar_map *map; + + if (dma_spec->args[0] >= xbar->xbar_requests) { + dev_err(&pdev->dev, "Invalid XBAR request number: %d\n", + dma_spec->args[0]); + return ERR_PTR(-EINVAL); + } + + /* The of_node_put() will be done in the core for the node */ + dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0); + if (!dma_spec->np) { + dev_err(&pdev->dev, "Can't get DMA master\n"); + return ERR_PTR(-EINVAL); + } + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + of_node_put(dma_spec->np); + return ERR_PTR(-ENOMEM); + } + + map->xbar_out = idr_alloc(&map_idr, NULL, 0, xbar->dma_requests, + GFP_KERNEL); + map->xbar_in = (u16)dma_spec->args[0]; + + /* The DMA request is 1 based in sDMA */ + dma_spec->args[0] = map->xbar_out + 1; + + dev_dbg(&pdev->dev, "Mapping XBAR%u to DMA%d\n", + map->xbar_in, map->xbar_out); + + ti_dma_xbar_write(xbar->iomem, map->xbar_out, map->xbar_in); + + return map; +} + +static int ti_dma_xbar_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct device_node *dma_node; + struct ti_dma_xbar_data *xbar; + struct resource *res; + u32 safe_val; + void __iomem *iomem; + int i, ret; + + if (!node) + return -ENODEV; + + xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL); + if (!xbar) + return -ENOMEM; + + dma_node = of_parse_phandle(node, "dma-masters", 0); + if (!dma_node) { + dev_err(&pdev->dev, "Can't get DMA master node\n"); + return -ENODEV; + } + + if (of_property_read_u32(dma_node, "dma-requests", + &xbar->dma_requests)) { + dev_info(&pdev->dev, + "Missing XBAR output information, using %u.\n", + TI_XBAR_OUTPUTS); + xbar->dma_requests = TI_XBAR_OUTPUTS; + } + of_node_put(dma_node); + + if (of_property_read_u32(node, "dma-requests", &xbar->xbar_requests)) { + dev_info(&pdev->dev, + "Missing XBAR input information, using %u.\n", + TI_XBAR_INPUTS); + xbar->xbar_requests = TI_XBAR_INPUTS; + } + + if (!of_property_read_u32(node, "ti,dma-safe-map", &safe_val)) + xbar->safe_val = (u16)safe_val; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + iomem = devm_ioremap_resource(&pdev->dev, res); + if (!iomem) + return -ENOMEM; + + xbar->iomem = iomem; + + xbar->dmarouter.dev = &pdev->dev; + xbar->dmarouter.route_free = ti_dma_xbar_free; + + platform_set_drvdata(pdev, xbar); + + /* Reset the crossbar */ + for (i = 0; i < xbar->dma_requests; i++) + ti_dma_xbar_write(xbar->iomem, i, xbar->safe_val); + + ret = of_dma_router_register(node, ti_dma_xbar_route_allocate, + &xbar->dmarouter); + if (ret) { + /* Restore the defaults for the crossbar */ + for (i = 0; i < xbar->dma_requests; i++) + ti_dma_xbar_write(xbar->iomem, i, i); + } + + return ret; +} + +static const struct of_device_id ti_dma_xbar_match[] = { + { .compatible = "ti,dra7-dma-crossbar" }, + {}, +}; + +static struct platform_driver ti_dma_xbar_driver = { + .driver = { + .name = "ti-dma-crossbar", + .of_match_table = of_match_ptr(ti_dma_xbar_match), + }, + .probe = ti_dma_xbar_probe, +}; + +int omap_dmaxbar_init(void) +{ + return platform_driver_register(&ti_dma_xbar_driver); +} +arch_initcall(omap_dmaxbar_init); diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c index 6f80432a3f0a..7d2c17d8d30f 100644 --- a/drivers/dma/virt-dma.c +++ b/drivers/dma/virt-dma.c @@ -29,7 +29,7 @@ dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&vc->lock, flags); cookie = dma_cookie_assign(tx); - list_add_tail(&vd->node, &vc->desc_submitted); + list_move_tail(&vd->node, &vc->desc_submitted); spin_unlock_irqrestore(&vc->lock, flags); dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n", @@ -83,8 +83,10 @@ static void vchan_complete(unsigned long arg) cb_data = vd->tx.callback_param; list_del(&vd->node); - - vc->desc_free(vd); + if (async_tx_test_ack(&vd->tx)) + list_add(&vd->node, &vc->desc_allocated); + else + vc->desc_free(vd); if (cb) cb(cb_data); @@ -96,9 +98,13 @@ void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head) while (!list_empty(head)) { struct virt_dma_desc *vd = list_first_entry(head, struct virt_dma_desc, node); - list_del(&vd->node); - dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd); - vc->desc_free(vd); + if (async_tx_test_ack(&vd->tx)) { + list_move_tail(&vd->node, &vc->desc_allocated); + } else { + dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd); + list_del(&vd->node); + vc->desc_free(vd); + } } } EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list); @@ -108,6 +114,7 @@ void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev) dma_cookie_init(&vc->chan); spin_lock_init(&vc->lock); + INIT_LIST_HEAD(&vc->desc_allocated); INIT_LIST_HEAD(&vc->desc_submitted); INIT_LIST_HEAD(&vc->desc_issued); INIT_LIST_HEAD(&vc->desc_completed); diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h index 181b95267866..189e75dbcb15 100644 --- a/drivers/dma/virt-dma.h +++ b/drivers/dma/virt-dma.h @@ -29,6 +29,7 @@ struct virt_dma_chan { spinlock_t lock; /* protected by vc.lock */ + struct list_head desc_allocated; struct list_head desc_submitted; struct list_head desc_issued; struct list_head desc_completed; @@ -55,11 +56,16 @@ static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan struct virt_dma_desc *vd, unsigned long tx_flags) { extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *); + unsigned long flags; dma_async_tx_descriptor_init(&vd->tx, &vc->chan); vd->tx.flags = tx_flags; vd->tx.tx_submit = vchan_tx_submit; + spin_lock_irqsave(&vc->lock, flags); + list_add_tail(&vd->node, &vc->desc_allocated); + spin_unlock_irqrestore(&vc->lock, flags); + return &vd->tx; } @@ -122,7 +128,8 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) } /** - * vchan_get_all_descriptors - obtain all submitted and issued descriptors + * vchan_get_all_descriptors - obtain all allocated, submitted and issued + * descriptors * vc: virtual channel to get descriptors from * head: list of descriptors found * @@ -134,6 +141,7 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc) static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc, struct list_head *head) { + list_splice_tail_init(&vc->desc_allocated, head); list_splice_tail_init(&vc->desc_submitted, head); list_splice_tail_init(&vc->desc_issued, head); list_splice_tail_init(&vc->desc_completed, head); @@ -141,11 +149,14 @@ static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc, static inline void vchan_free_chan_resources(struct virt_dma_chan *vc) { + struct virt_dma_desc *vd; unsigned long flags; LIST_HEAD(head); spin_lock_irqsave(&vc->lock, flags); vchan_get_all_descriptors(vc, &head); + list_for_each_entry(vd, &head, node) + async_tx_clear_ack(&vd->tx); spin_unlock_irqrestore(&vc->lock, flags); vchan_dma_desc_free_list(vc, &head); diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c index f52e37502254..620fd55ec766 100755..100644 --- a/drivers/dma/xgene-dma.c +++ b/drivers/dma/xgene-dma.c @@ -124,32 +124,8 @@ #define XGENE_DMA_DESC_ELERR_POS 46 #define XGENE_DMA_DESC_RTYPE_POS 56 #define XGENE_DMA_DESC_LERR_POS 60 -#define XGENE_DMA_DESC_FLYBY_POS 4 #define XGENE_DMA_DESC_BUFLEN_POS 48 #define XGENE_DMA_DESC_HOENQ_NUM_POS 48 - -#define XGENE_DMA_DESC_NV_SET(m) \ - (((u64 *)(m))[0] |= XGENE_DMA_DESC_NV_BIT) -#define XGENE_DMA_DESC_IN_SET(m) \ - (((u64 *)(m))[0] |= XGENE_DMA_DESC_IN_BIT) -#define XGENE_DMA_DESC_RTYPE_SET(m, v) \ - (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_RTYPE_POS)) -#define XGENE_DMA_DESC_BUFADDR_SET(m, v) \ - (((u64 *)(m))[0] |= (v)) -#define XGENE_DMA_DESC_BUFLEN_SET(m, v) \ - (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_BUFLEN_POS)) -#define XGENE_DMA_DESC_C_SET(m) \ - (((u64 *)(m))[1] |= XGENE_DMA_DESC_C_BIT) -#define XGENE_DMA_DESC_FLYBY_SET(m, v) \ - (((u64 *)(m))[2] |= ((v) << XGENE_DMA_DESC_FLYBY_POS)) -#define XGENE_DMA_DESC_MULTI_SET(m, v, i) \ - (((u64 *)(m))[2] |= ((u64)(v) << (((i) + 1) * 8))) -#define XGENE_DMA_DESC_DR_SET(m) \ - (((u64 *)(m))[2] |= XGENE_DMA_DESC_DR_BIT) -#define XGENE_DMA_DESC_DST_ADDR_SET(m, v) \ - (((u64 *)(m))[3] |= (v)) -#define XGENE_DMA_DESC_H0ENQ_NUM_SET(m, v) \ - (((u64 *)(m))[3] |= ((u64)(v) << XGENE_DMA_DESC_HOENQ_NUM_POS)) #define XGENE_DMA_DESC_ELERR_RD(m) \ (((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3) #define XGENE_DMA_DESC_LERR_RD(m) \ @@ -158,14 +134,7 @@ (((elerr) << 4) | (lerr)) /* X-Gene DMA descriptor empty s/w signature */ -#define XGENE_DMA_DESC_EMPTY_INDEX 0 #define XGENE_DMA_DESC_EMPTY_SIGNATURE ~0ULL -#define XGENE_DMA_DESC_SET_EMPTY(m) \ - (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] = \ - XGENE_DMA_DESC_EMPTY_SIGNATURE) -#define XGENE_DMA_DESC_IS_EMPTY(m) \ - (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] == \ - XGENE_DMA_DESC_EMPTY_SIGNATURE) /* X-Gene DMA configurable parameters defines */ #define XGENE_DMA_RING_NUM 512 @@ -184,7 +153,7 @@ #define XGENE_DMA_XOR_ALIGNMENT 6 /* 64 Bytes */ #define XGENE_DMA_MAX_XOR_SRC 5 #define XGENE_DMA_16K_BUFFER_LEN_CODE 0x0 -#define XGENE_DMA_INVALID_LEN_CODE 0x7800 +#define XGENE_DMA_INVALID_LEN_CODE 0x7800000000000000ULL /* X-Gene DMA descriptor error codes */ #define ERR_DESC_AXI 0x01 @@ -214,10 +183,10 @@ #define ERR_DESC_SRC_INT 0xB /* X-Gene DMA flyby operation code */ -#define FLYBY_2SRC_XOR 0x8 -#define FLYBY_3SRC_XOR 0x9 -#define FLYBY_4SRC_XOR 0xA -#define FLYBY_5SRC_XOR 0xB +#define FLYBY_2SRC_XOR 0x80 +#define FLYBY_3SRC_XOR 0x90 +#define FLYBY_4SRC_XOR 0xA0 +#define FLYBY_5SRC_XOR 0xB0 /* X-Gene DMA SW descriptor flags */ #define XGENE_DMA_FLAG_64B_DESC BIT(0) @@ -238,10 +207,10 @@ dev_err(chan->dev, "%s: " fmt, chan->name, ##arg) struct xgene_dma_desc_hw { - u64 m0; - u64 m1; - u64 m2; - u64 m3; + __le64 m0; + __le64 m1; + __le64 m2; + __le64 m3; }; enum xgene_dma_ring_cfgsize { @@ -388,18 +357,11 @@ static bool is_pq_enabled(struct xgene_dma *pdma) return !(val & XGENE_DMA_PQ_DISABLE_MASK); } -static void xgene_dma_cpu_to_le64(u64 *desc, int count) -{ - int i; - - for (i = 0; i < count; i++) - desc[i] = cpu_to_le64(desc[i]); -} - -static u16 xgene_dma_encode_len(u32 len) +static u64 xgene_dma_encode_len(size_t len) { return (len < XGENE_DMA_MAX_BYTE_CNT) ? - len : XGENE_DMA_16K_BUFFER_LEN_CODE; + ((u64)len << XGENE_DMA_DESC_BUFLEN_POS) : + XGENE_DMA_16K_BUFFER_LEN_CODE; } static u8 xgene_dma_encode_xor_flyby(u32 src_cnt) @@ -424,34 +386,50 @@ static u32 xgene_dma_ring_desc_cnt(struct xgene_dma_ring *ring) return XGENE_DMA_RING_DESC_CNT(ring_state); } -static void xgene_dma_set_src_buffer(void *ext8, size_t *len, +static void xgene_dma_set_src_buffer(__le64 *ext8, size_t *len, dma_addr_t *paddr) { size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ? *len : XGENE_DMA_MAX_BYTE_CNT; - XGENE_DMA_DESC_BUFADDR_SET(ext8, *paddr); - XGENE_DMA_DESC_BUFLEN_SET(ext8, xgene_dma_encode_len(nbytes)); + *ext8 |= cpu_to_le64(*paddr); + *ext8 |= cpu_to_le64(xgene_dma_encode_len(nbytes)); *len -= nbytes; *paddr += nbytes; } -static void xgene_dma_invalidate_buffer(void *ext8) +static void xgene_dma_invalidate_buffer(__le64 *ext8) { - XGENE_DMA_DESC_BUFLEN_SET(ext8, XGENE_DMA_INVALID_LEN_CODE); + *ext8 |= cpu_to_le64(XGENE_DMA_INVALID_LEN_CODE); } -static void *xgene_dma_lookup_ext8(u64 *desc, int idx) +static __le64 *xgene_dma_lookup_ext8(struct xgene_dma_desc_hw *desc, int idx) { - return (idx % 2) ? (desc + idx - 1) : (desc + idx + 1); + switch (idx) { + case 0: + return &desc->m1; + case 1: + return &desc->m0; + case 2: + return &desc->m3; + case 3: + return &desc->m2; + default: + pr_err("Invalid dma descriptor index\n"); + } + + return NULL; } -static void xgene_dma_init_desc(void *desc, u16 dst_ring_num) +static void xgene_dma_init_desc(struct xgene_dma_desc_hw *desc, + u16 dst_ring_num) { - XGENE_DMA_DESC_C_SET(desc); /* Coherent IO */ - XGENE_DMA_DESC_IN_SET(desc); - XGENE_DMA_DESC_H0ENQ_NUM_SET(desc, dst_ring_num); - XGENE_DMA_DESC_RTYPE_SET(desc, XGENE_DMA_RING_OWNER_DMA); + desc->m0 |= cpu_to_le64(XGENE_DMA_DESC_IN_BIT); + desc->m0 |= cpu_to_le64((u64)XGENE_DMA_RING_OWNER_DMA << + XGENE_DMA_DESC_RTYPE_POS); + desc->m1 |= cpu_to_le64(XGENE_DMA_DESC_C_BIT); + desc->m3 |= cpu_to_le64((u64)dst_ring_num << + XGENE_DMA_DESC_HOENQ_NUM_POS); } static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, @@ -459,7 +437,7 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, dma_addr_t dst, dma_addr_t src, size_t len) { - void *desc1, *desc2; + struct xgene_dma_desc_hw *desc1, *desc2; int i; /* Get 1st descriptor */ @@ -467,23 +445,21 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num); /* Set destination address */ - XGENE_DMA_DESC_DR_SET(desc1); - XGENE_DMA_DESC_DST_ADDR_SET(desc1, dst); + desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT); + desc1->m3 |= cpu_to_le64(dst); /* Set 1st source address */ - xgene_dma_set_src_buffer(desc1 + 8, &len, &src); + xgene_dma_set_src_buffer(&desc1->m1, &len, &src); - if (len <= 0) { - desc2 = NULL; - goto skip_additional_src; - } + if (!len) + return; /* * We need to split this source buffer, * and need to use 2nd descriptor */ desc2 = &desc_sw->desc2; - XGENE_DMA_DESC_NV_SET(desc1); + desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT); /* Set 2nd to 5th source address */ for (i = 0; i < 4 && len; i++) @@ -496,12 +472,6 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan, /* Updated flag that we have prepared 64B descriptor */ desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC; - -skip_additional_src: - /* Hardware stores descriptor in little endian format */ - xgene_dma_cpu_to_le64(desc1, 4); - if (desc2) - xgene_dma_cpu_to_le64(desc2, 4); } static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, @@ -510,7 +480,7 @@ static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, u32 src_cnt, size_t *nbytes, const u8 *scf) { - void *desc1, *desc2; + struct xgene_dma_desc_hw *desc1, *desc2; size_t len = *nbytes; int i; @@ -521,28 +491,24 @@ static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan, xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num); /* Set destination address */ - XGENE_DMA_DESC_DR_SET(desc1); - XGENE_DMA_DESC_DST_ADDR_SET(desc1, *dst); + desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT); + desc1->m3 |= cpu_to_le64(*dst); /* We have multiple source addresses, so need to set NV bit*/ - XGENE_DMA_DESC_NV_SET(desc1); + desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT); /* Set flyby opcode */ - XGENE_DMA_DESC_FLYBY_SET(desc1, xgene_dma_encode_xor_flyby(src_cnt)); + desc1->m2 |= cpu_to_le64(xgene_dma_encode_xor_flyby(src_cnt)); /* Set 1st to 5th source addresses */ for (i = 0; i < src_cnt; i++) { len = *nbytes; - xgene_dma_set_src_buffer((i == 0) ? (desc1 + 8) : + xgene_dma_set_src_buffer((i == 0) ? &desc1->m1 : xgene_dma_lookup_ext8(desc2, i - 1), &len, &src[i]); - XGENE_DMA_DESC_MULTI_SET(desc1, scf[i], i); + desc1->m2 |= cpu_to_le64((scf[i] << ((i + 1) * 8))); } - /* Hardware stores descriptor in little endian format */ - xgene_dma_cpu_to_le64(desc1, 4); - xgene_dma_cpu_to_le64(desc2, 4); - /* Update meta data */ *nbytes = len; *dst += XGENE_DMA_MAX_BYTE_CNT; @@ -738,7 +704,7 @@ static int xgene_chan_xfer_request(struct xgene_dma_ring *ring, * xgene_chan_xfer_ld_pending - push any pending transactions to hw * @chan : X-Gene DMA channel * - * LOCKING: must hold chan->desc_lock + * LOCKING: must hold chan->lock */ static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan) { @@ -808,7 +774,8 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) desc_hw = &ring->desc_hw[ring->head]; /* Check if this descriptor has been completed */ - if (unlikely(XGENE_DMA_DESC_IS_EMPTY(desc_hw))) + if (unlikely(le64_to_cpu(desc_hw->m0) == + XGENE_DMA_DESC_EMPTY_SIGNATURE)) break; if (++ring->head == ring->slots) @@ -842,7 +809,7 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) iowrite32(-1, ring->cmd); /* Mark this hw descriptor as processed */ - XGENE_DMA_DESC_SET_EMPTY(desc_hw); + desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE); xgene_dma_run_tx_complete_actions(chan, desc_sw); @@ -889,7 +856,7 @@ static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan) * @chan: X-Gene DMA channel * @list: the list to free * - * LOCKING: must hold chan->desc_lock + * LOCKING: must hold chan->lock */ static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan, struct list_head *list) @@ -900,15 +867,6 @@ static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan, xgene_dma_clean_descriptor(chan, desc); } -static void xgene_dma_free_tx_desc_list(struct xgene_dma_chan *chan, - struct list_head *list) -{ - struct xgene_dma_desc_sw *desc, *_desc; - - list_for_each_entry_safe(desc, _desc, list, node) - xgene_dma_clean_descriptor(chan, desc); -} - static void xgene_dma_free_chan_resources(struct dma_chan *dchan) { struct xgene_dma_chan *chan = to_dma_chan(dchan); @@ -985,7 +943,7 @@ fail: if (!first) return NULL; - xgene_dma_free_tx_desc_list(chan, &first->tx_list); + xgene_dma_free_desc_list(chan, &first->tx_list); return NULL; } @@ -1093,7 +1051,7 @@ fail: if (!first) return NULL; - xgene_dma_free_tx_desc_list(chan, &first->tx_list); + xgene_dma_free_desc_list(chan, &first->tx_list); return NULL; } @@ -1141,7 +1099,7 @@ fail: if (!first) return NULL; - xgene_dma_free_tx_desc_list(chan, &first->tx_list); + xgene_dma_free_desc_list(chan, &first->tx_list); return NULL; } @@ -1218,7 +1176,7 @@ fail: if (!first) return NULL; - xgene_dma_free_tx_desc_list(chan, &first->tx_list); + xgene_dma_free_desc_list(chan, &first->tx_list); return NULL; } @@ -1316,7 +1274,6 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring) { void *ring_cfg = ring->state; u64 addr = ring->desc_paddr; - void *desc; u32 i, val; ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE; @@ -1358,8 +1315,10 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring) /* Set empty signature to DMA Rx ring descriptors */ for (i = 0; i < ring->slots; i++) { + struct xgene_dma_desc_hw *desc; + desc = &ring->desc_hw[i]; - XGENE_DMA_DESC_SET_EMPTY(desc); + desc->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE); } /* Enable DMA Rx ring interrupt */ diff --git a/drivers/md/md.c b/drivers/md/md.c index 8d9f89b4519d..df92d30ca054 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2628,13 +2628,14 @@ errors_show(struct md_rdev *rdev, char *page) static ssize_t errors_store(struct md_rdev *rdev, const char *buf, size_t len) { - char *e; - unsigned long n = simple_strtoul(buf, &e, 10); - if (*buf && (*e == 0 || *e == '\n')) { - atomic_set(&rdev->corrected_errors, n); - return len; - } - return -EINVAL; + unsigned int n; + int rv; + + rv = kstrtouint(buf, 10, &n); + if (rv < 0) + return rv; + atomic_set(&rdev->corrected_errors, n); + return len; } static struct rdev_sysfs_entry rdev_errors = __ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store); @@ -2651,13 +2652,16 @@ slot_show(struct md_rdev *rdev, char *page) static ssize_t slot_store(struct md_rdev *rdev, const char *buf, size_t len) { - char *e; + int slot; int err; - int slot = simple_strtoul(buf, &e, 10); + if (strncmp(buf, "none", 4)==0) slot = -1; - else if (e==buf || (*e && *e!= '\n')) - return -EINVAL; + else { + err = kstrtouint(buf, 10, (unsigned int *)&slot); + if (err < 0) + return err; + } if (rdev->mddev->pers && slot == -1) { /* Setting 'slot' on an active array requires also * updating the 'rd%d' link, and communicating @@ -3542,12 +3546,12 @@ layout_show(struct mddev *mddev, char *page) static ssize_t layout_store(struct mddev *mddev, const char *buf, size_t len) { - char *e; - unsigned long n = simple_strtoul(buf, &e, 10); + unsigned int n; int err; - if (!*buf || (*e && *e != '\n')) - return -EINVAL; + err = kstrtouint(buf, 10, &n); + if (err < 0) + return err; err = mddev_lock(mddev); if (err) return err; @@ -3591,12 +3595,12 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks); static ssize_t raid_disks_store(struct mddev *mddev, const char *buf, size_t len) { - char *e; + unsigned int n; int err; - unsigned long n = simple_strtoul(buf, &e, 10); - if (!*buf || (*e && *e != '\n')) - return -EINVAL; + err = kstrtouint(buf, 10, &n); + if (err < 0) + return err; err = mddev_lock(mddev); if (err) @@ -3643,12 +3647,12 @@ chunk_size_show(struct mddev *mddev, char *page) static ssize_t chunk_size_store(struct mddev *mddev, const char *buf, size_t len) { + unsigned long n; int err; - char *e; - unsigned long n = simple_strtoul(buf, &e, 10); - if (!*buf || (*e && *e != '\n')) - return -EINVAL; + err = kstrtoul(buf, 10, &n); + if (err < 0) + return err; err = mddev_lock(mddev); if (err) @@ -3686,19 +3690,24 @@ resync_start_show(struct mddev *mddev, char *page) static ssize_t resync_start_store(struct mddev *mddev, const char *buf, size_t len) { + unsigned long long n; int err; - char *e; - unsigned long long n = simple_strtoull(buf, &e, 10); + + if (cmd_match(buf, "none")) + n = MaxSector; + else { + err = kstrtoull(buf, 10, &n); + if (err < 0) + return err; + if (n != (sector_t)n) + return -EINVAL; + } err = mddev_lock(mddev); if (err) return err; if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) err = -EBUSY; - else if (cmd_match(buf, "none")) - n = MaxSector; - else if (!*buf || (*e && *e != '\n')) - err = -EINVAL; if (!err) { mddev->recovery_cp = n; @@ -3934,14 +3943,14 @@ max_corrected_read_errors_show(struct mddev *mddev, char *page) { static ssize_t max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len) { - char *e; - unsigned long n = simple_strtoul(buf, &e, 10); + unsigned int n; + int rv; - if (*buf && (*e == 0 || *e == '\n')) { - atomic_set(&mddev->max_corr_read_errors, n); - return len; - } - return -EINVAL; + rv = kstrtouint(buf, 10, &n); + if (rv < 0) + return rv; + atomic_set(&mddev->max_corr_read_errors, n); + return len; } static struct md_sysfs_entry max_corr_read_errors = @@ -4003,8 +4012,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) else rdev = md_import_device(dev, -1, -1); - if (IS_ERR(rdev)) + if (IS_ERR(rdev)) { + mddev_unlock(mddev); return PTR_ERR(rdev); + } err = bind_rdev_to_array(rdev, mddev); out: if (err) @@ -4298,15 +4309,18 @@ sync_min_show(struct mddev *mddev, char *page) static ssize_t sync_min_store(struct mddev *mddev, const char *buf, size_t len) { - int min; - char *e; + unsigned int min; + int rv; + if (strncmp(buf, "system", 6)==0) { - mddev->sync_speed_min = 0; - return len; + min = 0; + } else { + rv = kstrtouint(buf, 10, &min); + if (rv < 0) + return rv; + if (min == 0) + return -EINVAL; } - min = simple_strtoul(buf, &e, 10); - if (buf == e || (*e && *e != '\n') || min <= 0) - return -EINVAL; mddev->sync_speed_min = min; return len; } @@ -4324,15 +4338,18 @@ sync_max_show(struct mddev *mddev, char *page) static ssize_t sync_max_store(struct mddev *mddev, const char *buf, size_t len) { - int max; - char *e; + unsigned int max; + int rv; + if (strncmp(buf, "system", 6)==0) { - mddev->sync_speed_max = 0; - return len; + max = 0; + } else { + rv = kstrtouint(buf, 10, &max); + if (rv < 0) + return rv; + if (max == 0) + return -EINVAL; } - max = simple_strtoul(buf, &e, 10); - if (buf == e || (*e && *e != '\n') || max <= 0) - return -EINVAL; mddev->sync_speed_max = max; return len; } @@ -4515,12 +4532,13 @@ suspend_lo_show(struct mddev *mddev, char *page) static ssize_t suspend_lo_store(struct mddev *mddev, const char *buf, size_t len) { - char *e; - unsigned long long new = simple_strtoull(buf, &e, 10); - unsigned long long old; + unsigned long long old, new; int err; - if (buf == e || (*e && *e != '\n')) + err = kstrtoull(buf, 10, &new); + if (err < 0) + return err; + if (new != (sector_t)new) return -EINVAL; err = mddev_lock(mddev); @@ -4557,12 +4575,13 @@ suspend_hi_show(struct mddev *mddev, char *page) static ssize_t suspend_hi_store(struct mddev *mddev, const char *buf, size_t len) { - char *e; - unsigned long long new = simple_strtoull(buf, &e, 10); - unsigned long long old; + unsigned long long old, new; int err; - if (buf == e || (*e && *e != '\n')) + err = kstrtoull(buf, 10, &new); + if (err < 0) + return err; + if (new != (sector_t)new) return -EINVAL; err = mddev_lock(mddev); @@ -4604,11 +4623,13 @@ static ssize_t reshape_position_store(struct mddev *mddev, const char *buf, size_t len) { struct md_rdev *rdev; - char *e; + unsigned long long new; int err; - unsigned long long new = simple_strtoull(buf, &e, 10); - if (buf == e || (*e && *e != '\n')) + err = kstrtoull(buf, 10, &new); + if (err < 0) + return err; + if (new != (sector_t)new) return -EINVAL; err = mddev_lock(mddev); if (err) @@ -5157,6 +5178,7 @@ int md_run(struct mddev *mddev) mddev_detach(mddev); if (mddev->private) pers->free(mddev, mddev->private); + mddev->private = NULL; module_put(pers->owner); bitmap_destroy(mddev); return err; @@ -5292,6 +5314,7 @@ static void md_clean(struct mddev *mddev) mddev->changed = 0; mddev->degraded = 0; mddev->safemode = 0; + mddev->private = NULL; mddev->merge_check_needed = 0; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 0; @@ -5364,6 +5387,7 @@ static void __md_stop(struct mddev *mddev) mddev->pers = NULL; spin_unlock(&mddev->lock); pers->free(mddev, mddev->private); + mddev->private = NULL; if (pers->sync_request && mddev->to_remove == NULL) mddev->to_remove = &md_redundancy_group; module_put(pers->owner); @@ -6373,7 +6397,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->ctime != info->ctime || mddev->level != info->level || /* mddev->layout != info->layout || */ - !mddev->persistent != info->not_persistent|| + mddev->persistent != !info->not_persistent || mddev->chunk_sectors != info->chunk_size >> 9 || /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ ((state^info->state) & 0xfffffe00) @@ -8104,6 +8128,15 @@ void md_check_recovery(struct mddev *mddev) int spares = 0; if (mddev->ro) { + struct md_rdev *rdev; + if (!mddev->external && mddev->in_sync) + /* 'Blocked' flag not needed as failed devices + * will be recorded if array switched to read/write. + * Leaving it set will prevent the device + * from being removed. + */ + rdev_for_each(rdev, mddev) + clear_bit(Blocked, &rdev->flags); /* On a read-only array we can: * - remove failed devices * - add already-in_sync devices if the array itself @@ -9011,13 +9044,7 @@ static int get_ro(char *buffer, struct kernel_param *kp) } static int set_ro(const char *val, struct kernel_param *kp) { - char *e; - int num = simple_strtoul(val, &e, 10); - if (*val && (*e == '\0' || *e == '\n')) { - start_readonly = num; - return 0; - } - return -EINVAL; + return kstrtouint(val, 10, (unsigned int *)&start_readonly); } module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 188d8e9a6bdc..940f2f365461 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2099,17 +2099,10 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) tbio->bi_rw = WRITE; tbio->bi_private = r10_bio; tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; - - for (j=0; j < vcnt ; j++) { - tbio->bi_io_vec[j].bv_offset = 0; - tbio->bi_io_vec[j].bv_len = PAGE_SIZE; - - memcpy(page_address(tbio->bi_io_vec[j].bv_page), - page_address(fbio->bi_io_vec[j].bv_page), - PAGE_SIZE); - } tbio->bi_end_io = end_sync_write; + bio_copy_data(tbio, fbio); + d = r10_bio->devs[i].devnum; atomic_inc(&conf->mirrors[d].rdev->nr_pending); atomic_inc(&r10_bio->remaining); @@ -2124,17 +2117,14 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) * that are active */ for (i = 0; i < conf->copies; i++) { - int j, d; + int d; tbio = r10_bio->devs[i].repl_bio; if (!tbio || !tbio->bi_end_io) continue; if (r10_bio->devs[i].bio->bi_end_io != end_sync_write && r10_bio->devs[i].bio != fbio) - for (j = 0; j < vcnt; j++) - memcpy(page_address(tbio->bi_io_vec[j].bv_page), - page_address(fbio->bi_io_vec[j].bv_page), - PAGE_SIZE); + bio_copy_data(tbio, fbio); d = r10_bio->devs[i].devnum; atomic_inc(&r10_bio->remaining); md_sync_acct(conf->mirrors[d].replacement->bdev, diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b6793d2e051f..59e44e99eef3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -344,7 +344,8 @@ static void release_inactive_stripe_list(struct r5conf *conf, int hash) { int size; - bool do_wakeup = false; + unsigned long do_wakeup = 0; + int i = 0; unsigned long flags; if (hash == NR_STRIPE_HASH_LOCKS) { @@ -365,15 +366,21 @@ static void release_inactive_stripe_list(struct r5conf *conf, !list_empty(list)) atomic_dec(&conf->empty_inactive_list_nr); list_splice_tail_init(list, conf->inactive_list + hash); - do_wakeup = true; + do_wakeup |= 1 << hash; spin_unlock_irqrestore(conf->hash_locks + hash, flags); } size--; hash--; } + for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { + if (do_wakeup & (1 << i)) + wake_up(&conf->wait_for_stripe[i]); + } + if (do_wakeup) { - wake_up(&conf->wait_for_stripe); + if (atomic_read(&conf->active_stripes) == 0) + wake_up(&conf->wait_for_quiescent); if (conf->retry_read_aligned) md_wakeup_thread(conf->mddev->thread); } @@ -667,15 +674,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector, spin_lock_irq(conf->hash_locks + hash); do { - wait_event_lock_irq(conf->wait_for_stripe, + wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0 || noquiesce, *(conf->hash_locks + hash)); sh = __find_stripe(conf, sector, conf->generation - previous); if (!sh) { if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) { sh = get_free_stripe(conf, hash); - if (!sh && llist_empty(&conf->released_stripes) && - !test_bit(R5_DID_ALLOC, &conf->cache_state)) + if (!sh && !test_bit(R5_DID_ALLOC, + &conf->cache_state)) set_bit(R5_ALLOC_MORE, &conf->cache_state); } @@ -684,14 +691,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector, if (!sh) { set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); - wait_event_lock_irq( - conf->wait_for_stripe, + wait_event_exclusive_cmd( + conf->wait_for_stripe[hash], !list_empty(conf->inactive_list + hash) && (atomic_read(&conf->active_stripes) < (conf->max_nr_stripes * 3 / 4) || !test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)), - *(conf->hash_locks + hash)); + spin_unlock_irq(conf->hash_locks + hash), + spin_lock_irq(conf->hash_locks + hash)); clear_bit(R5_INACTIVE_BLOCKED, &conf->cache_state); } else { @@ -716,6 +724,9 @@ get_active_stripe(struct r5conf *conf, sector_t sector, } } while (sh == NULL); + if (!list_empty(conf->inactive_list + hash)) + wake_up(&conf->wait_for_stripe[hash]); + spin_unlock_irq(conf->hash_locks + hash); return sh; } @@ -2177,7 +2188,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) cnt = 0; list_for_each_entry(nsh, &newstripes, lru) { lock_device_hash_lock(conf, hash); - wait_event_cmd(conf->wait_for_stripe, + wait_event_exclusive_cmd(conf->wait_for_stripe[hash], !list_empty(conf->inactive_list + hash), unlock_device_hash_lock(conf, hash), lock_device_hash_lock(conf, hash)); @@ -4760,7 +4771,7 @@ static void raid5_align_endio(struct bio *bi, int error) raid_bi, 0); bio_endio(raid_bi, 0); if (atomic_dec_and_test(&conf->active_aligned_reads)) - wake_up(&conf->wait_for_stripe); + wake_up(&conf->wait_for_quiescent); return; } @@ -4855,7 +4866,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) align_bi->bi_iter.bi_sector += rdev->data_offset; spin_lock_irq(&conf->device_lock); - wait_event_lock_irq(conf->wait_for_stripe, + wait_event_lock_irq(conf->wait_for_quiescent, conf->quiesce == 0, conf->device_lock); atomic_inc(&conf->active_aligned_reads); @@ -5699,7 +5710,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) bio_endio(raid_bio, 0); } if (atomic_dec_and_test(&conf->active_aligned_reads)) - wake_up(&conf->wait_for_stripe); + wake_up(&conf->wait_for_quiescent); return handled; } @@ -6433,7 +6444,10 @@ static struct r5conf *setup_conf(struct mddev *mddev) goto abort; spin_lock_init(&conf->device_lock); seqcount_init(&conf->gen_lock); - init_waitqueue_head(&conf->wait_for_stripe); + init_waitqueue_head(&conf->wait_for_quiescent); + for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { + init_waitqueue_head(&conf->wait_for_stripe[i]); + } init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list); INIT_LIST_HEAD(&conf->hold_list); @@ -7466,7 +7480,7 @@ static void raid5_quiesce(struct mddev *mddev, int state) * active stripes can drain */ conf->quiesce = 2; - wait_event_cmd(conf->wait_for_stripe, + wait_event_cmd(conf->wait_for_quiescent, atomic_read(&conf->active_stripes) == 0 && atomic_read(&conf->active_aligned_reads) == 0, unlock_all_device_hash_locks_irq(conf), @@ -7480,7 +7494,7 @@ static void raid5_quiesce(struct mddev *mddev, int state) case 0: /* re-enable writes */ lock_all_device_hash_locks_irq(conf); conf->quiesce = 0; - wake_up(&conf->wait_for_stripe); + wake_up(&conf->wait_for_quiescent); wake_up(&conf->wait_for_overlap); unlock_all_device_hash_locks_irq(conf); break; diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 896d603ad0da..02c3bf8fbfe7 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -511,7 +511,8 @@ struct r5conf { struct list_head inactive_list[NR_STRIPE_HASH_LOCKS]; atomic_t empty_inactive_list_nr; struct llist_head released_stripes; - wait_queue_head_t wait_for_stripe; + wait_queue_head_t wait_for_quiescent; + wait_queue_head_t wait_for_stripe[NR_STRIPE_HASH_LOCKS]; wait_queue_head_t wait_for_overlap; unsigned long cache_state; #define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked, diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig new file mode 100644 index 000000000000..72226acb5c0f --- /dev/null +++ b/drivers/nvdimm/Kconfig @@ -0,0 +1,68 @@ +menuconfig LIBNVDIMM + tristate "NVDIMM (Non-Volatile Memory Device) Support" + depends on PHYS_ADDR_T_64BIT + depends on BLK_DEV + help + Generic support for non-volatile memory devices including + ACPI-6-NFIT defined resources. On platforms that define an + NFIT, or otherwise can discover NVDIMM resources, a libnvdimm + bus is registered to advertise PMEM (persistent memory) + namespaces (/dev/pmemX) and BLK (sliding mmio window(s)) + namespaces (/dev/ndblkX.Y). A PMEM namespace refers to a + memory resource that may span multiple DIMMs and support DAX + (see CONFIG_DAX). A BLK namespace refers to an NVDIMM control + region which exposes an mmio register set for windowed access + mode to non-volatile memory. + +if LIBNVDIMM + +config BLK_DEV_PMEM + tristate "PMEM: Persistent memory block device support" + default LIBNVDIMM + depends on HAS_IOMEM + select ND_BTT if BTT + help + Memory ranges for PMEM are described by either an NFIT + (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a + non-standard OEM-specific E820 memory type (type-12, see + CONFIG_X86_PMEM_LEGACY), or it is manually specified by the + 'memmap=nn[KMG]!ss[KMG]' kernel command line (see + Documentation/kernel-parameters.txt). This driver converts + these persistent memory ranges into block devices that are + capable of DAX (direct-access) file system mappings. See + Documentation/nvdimm/nvdimm.txt for more details. + + Say Y if you want to use an NVDIMM + +config ND_BLK + tristate "BLK: Block data window (aperture) device support" + default LIBNVDIMM + select ND_BTT if BTT + help + Support NVDIMMs, or other devices, that implement a BLK-mode + access capability. BLK-mode access uses memory-mapped-i/o + apertures to access persistent media. + + Say Y if your platform firmware emits an ACPI.NFIT table + (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode + capabilities. + +config ND_BTT + tristate + +config BTT + bool "BTT: Block Translation Table (atomic sector updates)" + default y if LIBNVDIMM + help + The Block Translation Table (BTT) provides atomic sector + update semantics for persistent memory devices, so that + applications that rely on sector writes not being torn (a + guarantee that typical disks provide) can continue to do so. + The BTT manifests itself as an alternate personality for an + NVDIMM namespace, i.e. a namespace can be in raw mode (pmemX, + ndblkX.Y, etc...), or 'sectored' mode, (pmemXs, ndblkX.Ys, + etc...). + + Select Y if unsure + +endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile new file mode 100644 index 000000000000..594bb97c867a --- /dev/null +++ b/drivers/nvdimm/Makefile @@ -0,0 +1,20 @@ +obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o +obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o +obj-$(CONFIG_ND_BTT) += nd_btt.o +obj-$(CONFIG_ND_BLK) += nd_blk.o + +nd_pmem-y := pmem.o + +nd_btt-y := btt.o + +nd_blk-y := blk.o + +libnvdimm-y := core.o +libnvdimm-y += bus.o +libnvdimm-y += dimm_devs.o +libnvdimm-y += dimm.o +libnvdimm-y += region_devs.o +libnvdimm-y += region.o +libnvdimm-y += namespace_devs.o +libnvdimm-y += label.o +libnvdimm-$(CONFIG_BTT) += btt_devs.o diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c new file mode 100644 index 000000000000..4f97b248c236 --- /dev/null +++ b/drivers/nvdimm/blk.c @@ -0,0 +1,384 @@ +/* + * NVDIMM Block Window Driver + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/blkdev.h> +#include <linux/fs.h> +#include <linux/genhd.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/nd.h> +#include <linux/sizes.h> +#include "nd.h" + +struct nd_blk_device { + struct request_queue *queue; + struct gendisk *disk; + struct nd_namespace_blk *nsblk; + struct nd_blk_region *ndbr; + size_t disk_size; + u32 sector_size; + u32 internal_lbasize; +}; + +static int nd_blk_major; + +static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) +{ + return blk_dev->nsblk->lbasize - blk_dev->sector_size; +} + +static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, + resource_size_t ns_offset, unsigned int len) +{ + int i; + + for (i = 0; i < nsblk->num_resources; i++) { + if (ns_offset < resource_size(nsblk->res[i])) { + if (ns_offset + len > resource_size(nsblk->res[i])) { + dev_WARN_ONCE(&nsblk->common.dev, 1, + "illegal request\n"); + return SIZE_MAX; + } + return nsblk->res[i]->start + ns_offset; + } + ns_offset -= resource_size(nsblk->res[i]); + } + + dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n"); + return SIZE_MAX; +} + +#ifdef CONFIG_BLK_DEV_INTEGRITY +static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, + struct bio_integrity_payload *bip, u64 lba, + int rw) +{ + unsigned int len = nd_blk_meta_size(blk_dev); + resource_size_t dev_offset, ns_offset; + struct nd_namespace_blk *nsblk; + struct nd_blk_region *ndbr; + int err = 0; + + nsblk = blk_dev->nsblk; + ndbr = blk_dev->ndbr; + ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size; + dev_offset = to_dev_offset(nsblk, ns_offset, len); + if (dev_offset == SIZE_MAX) + return -EIO; + + while (len) { + unsigned int cur_len; + struct bio_vec bv; + void *iobuf; + + bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); + /* + * The 'bv' obtained from bvec_iter_bvec has its .bv_len and + * .bv_offset already adjusted for iter->bi_bvec_done, and we + * can use those directly + */ + + cur_len = min(len, bv.bv_len); + iobuf = kmap_atomic(bv.bv_page); + err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset, + cur_len, rw); + kunmap_atomic(iobuf); + if (err) + return err; + + len -= cur_len; + dev_offset += cur_len; + bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len); + } + + return err; +} + +#else /* CONFIG_BLK_DEV_INTEGRITY */ +static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, + struct bio_integrity_payload *bip, u64 lba, + int rw) +{ + return 0; +} +#endif + +static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, + struct bio_integrity_payload *bip, struct page *page, + unsigned int len, unsigned int off, int rw, + sector_t sector) +{ + struct nd_blk_region *ndbr = blk_dev->ndbr; + resource_size_t dev_offset, ns_offset; + int err = 0; + void *iobuf; + u64 lba; + + while (len) { + unsigned int cur_len; + + /* + * If we don't have an integrity payload, we don't have to + * split the bvec into sectors, as this would cause unnecessary + * Block Window setup/move steps. the do_io routine is capable + * of handling len <= PAGE_SIZE. + */ + cur_len = bip ? min(len, blk_dev->sector_size) : len; + + lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size); + ns_offset = lba * blk_dev->internal_lbasize; + dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len); + if (dev_offset == SIZE_MAX) + return -EIO; + + iobuf = kmap_atomic(page); + err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw); + kunmap_atomic(iobuf); + if (err) + return err; + + if (bip) { + err = nd_blk_rw_integrity(blk_dev, bip, lba, rw); + if (err) + return err; + } + len -= cur_len; + off += cur_len; + sector += blk_dev->sector_size >> SECTOR_SHIFT; + } + + return err; +} + +static void nd_blk_make_request(struct request_queue *q, struct bio *bio) +{ + struct block_device *bdev = bio->bi_bdev; + struct gendisk *disk = bdev->bd_disk; + struct bio_integrity_payload *bip; + struct nd_blk_device *blk_dev; + struct bvec_iter iter; + unsigned long start; + struct bio_vec bvec; + int err = 0, rw; + bool do_acct; + + /* + * bio_integrity_enabled also checks if the bio already has an + * integrity payload attached. If it does, we *don't* do a + * bio_integrity_prep here - the payload has been generated by + * another kernel subsystem, and we just pass it through. + */ + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { + err = -EIO; + goto out; + } + + bip = bio_integrity(bio); + blk_dev = disk->private_data; + rw = bio_data_dir(bio); + do_acct = nd_iostat_start(bio, &start); + bio_for_each_segment(bvec, bio, iter) { + unsigned int len = bvec.bv_len; + + BUG_ON(len > PAGE_SIZE); + err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len, + bvec.bv_offset, rw, iter.bi_sector); + if (err) { + dev_info(&blk_dev->nsblk->common.dev, + "io error in %s sector %lld, len %d,\n", + (rw == READ) ? "READ" : "WRITE", + (unsigned long long) iter.bi_sector, len); + break; + } + } + if (do_acct) + nd_iostat_end(bio, start); + + out: + bio_endio(bio, err); +} + +static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *iobuf, size_t n, int rw) +{ + struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim); + struct nd_namespace_blk *nsblk = blk_dev->nsblk; + struct nd_blk_region *ndbr = blk_dev->ndbr; + resource_size_t dev_offset; + + dev_offset = to_dev_offset(nsblk, offset, n); + + if (unlikely(offset + n > blk_dev->disk_size)) { + dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); + return -EFAULT; + } + + if (dev_offset == SIZE_MAX) + return -EIO; + + return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw); +} + +static const struct block_device_operations nd_blk_fops = { + .owner = THIS_MODULE, + .revalidate_disk = nvdimm_revalidate_disk, +}; + +static int nd_blk_attach_disk(struct nd_namespace_common *ndns, + struct nd_blk_device *blk_dev) +{ + resource_size_t available_disk_size; + struct gendisk *disk; + u64 internal_nlba; + + internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize); + available_disk_size = internal_nlba * blk_dev->sector_size; + + blk_dev->queue = blk_alloc_queue(GFP_KERNEL); + if (!blk_dev->queue) + return -ENOMEM; + + blk_queue_make_request(blk_dev->queue, nd_blk_make_request); + blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX); + blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY); + blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size); + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue); + + disk = blk_dev->disk = alloc_disk(0); + if (!disk) { + blk_cleanup_queue(blk_dev->queue); + return -ENOMEM; + } + + disk->driverfs_dev = &ndns->dev; + disk->major = nd_blk_major; + disk->first_minor = 0; + disk->fops = &nd_blk_fops; + disk->private_data = blk_dev; + disk->queue = blk_dev->queue; + disk->flags = GENHD_FL_EXT_DEVT; + nvdimm_namespace_disk_name(ndns, disk->disk_name); + set_capacity(disk, 0); + add_disk(disk); + + if (nd_blk_meta_size(blk_dev)) { + int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev)); + + if (rc) { + del_gendisk(disk); + put_disk(disk); + blk_cleanup_queue(blk_dev->queue); + return rc; + } + } + + set_capacity(disk, available_disk_size >> SECTOR_SHIFT); + revalidate_disk(disk); + return 0; +} + +static int nd_blk_probe(struct device *dev) +{ + struct nd_namespace_common *ndns; + struct nd_namespace_blk *nsblk; + struct nd_blk_device *blk_dev; + int rc; + + ndns = nvdimm_namespace_common_probe(dev); + if (IS_ERR(ndns)) + return PTR_ERR(ndns); + + blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL); + if (!blk_dev) + return -ENOMEM; + + nsblk = to_nd_namespace_blk(&ndns->dev); + blk_dev->disk_size = nvdimm_namespace_capacity(ndns); + blk_dev->ndbr = to_nd_blk_region(dev->parent); + blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev); + blk_dev->internal_lbasize = roundup(nsblk->lbasize, + INT_LBASIZE_ALIGNMENT); + blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512); + dev_set_drvdata(dev, blk_dev); + + ndns->rw_bytes = nd_blk_rw_bytes; + if (is_nd_btt(dev)) + rc = nvdimm_namespace_attach_btt(ndns); + else if (nd_btt_probe(ndns, blk_dev) == 0) { + /* we'll come back as btt-blk */ + rc = -ENXIO; + } else + rc = nd_blk_attach_disk(ndns, blk_dev); + if (rc) + kfree(blk_dev); + return rc; +} + +static void nd_blk_detach_disk(struct nd_blk_device *blk_dev) +{ + del_gendisk(blk_dev->disk); + put_disk(blk_dev->disk); + blk_cleanup_queue(blk_dev->queue); +} + +static int nd_blk_remove(struct device *dev) +{ + struct nd_blk_device *blk_dev = dev_get_drvdata(dev); + + if (is_nd_btt(dev)) + nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); + else + nd_blk_detach_disk(blk_dev); + kfree(blk_dev); + + return 0; +} + +static struct nd_device_driver nd_blk_driver = { + .probe = nd_blk_probe, + .remove = nd_blk_remove, + .drv = { + .name = "nd_blk", + }, + .type = ND_DRIVER_NAMESPACE_BLK, +}; + +static int __init nd_blk_init(void) +{ + int rc; + + rc = register_blkdev(0, "nd_blk"); + if (rc < 0) + return rc; + + nd_blk_major = rc; + rc = nd_driver_register(&nd_blk_driver); + + if (rc < 0) + unregister_blkdev(nd_blk_major, "nd_blk"); + + return rc; +} + +static void __exit nd_blk_exit(void) +{ + driver_unregister(&nd_blk_driver.drv); + unregister_blkdev(nd_blk_major, "nd_blk"); +} + +MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK); +module_init(nd_blk_init); +module_exit(nd_blk_exit); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c new file mode 100644 index 000000000000..411c7b2bb37a --- /dev/null +++ b/drivers/nvdimm/btt.c @@ -0,0 +1,1479 @@ +/* + * Block Translation Table + * Copyright (c) 2014-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include <linux/highmem.h> +#include <linux/debugfs.h> +#include <linux/blkdev.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/mutex.h> +#include <linux/hdreg.h> +#include <linux/genhd.h> +#include <linux/sizes.h> +#include <linux/ndctl.h> +#include <linux/fs.h> +#include <linux/nd.h> +#include "btt.h" +#include "nd.h" + +enum log_ent_request { + LOG_NEW_ENT = 0, + LOG_OLD_ENT +}; + +static int btt_major; + +static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, + void *buf, size_t n) +{ + struct nd_btt *nd_btt = arena->nd_btt; + struct nd_namespace_common *ndns = nd_btt->ndns; + + /* arena offsets are 4K from the base of the device */ + offset += SZ_4K; + return nvdimm_read_bytes(ndns, offset, buf, n); +} + +static int arena_write_bytes(struct arena_info *arena, resource_size_t offset, + void *buf, size_t n) +{ + struct nd_btt *nd_btt = arena->nd_btt; + struct nd_namespace_common *ndns = nd_btt->ndns; + + /* arena offsets are 4K from the base of the device */ + offset += SZ_4K; + return nvdimm_write_bytes(ndns, offset, buf, n); +} + +static int btt_info_write(struct arena_info *arena, struct btt_sb *super) +{ + int ret; + + ret = arena_write_bytes(arena, arena->info2off, super, + sizeof(struct btt_sb)); + if (ret) + return ret; + + return arena_write_bytes(arena, arena->infooff, super, + sizeof(struct btt_sb)); +} + +static int btt_info_read(struct arena_info *arena, struct btt_sb *super) +{ + WARN_ON(!super); + return arena_read_bytes(arena, arena->infooff, super, + sizeof(struct btt_sb)); +} + +/* + * 'raw' version of btt_map write + * Assumptions: + * mapping is in little-endian + * mapping contains 'E' and 'Z' flags as desired + */ +static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping) +{ + u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); + + WARN_ON(lba >= arena->external_nlba); + return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE); +} + +static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping, + u32 z_flag, u32 e_flag) +{ + u32 ze; + __le32 mapping_le; + + /* + * This 'mapping' is supposed to be just the LBA mapping, without + * any flags set, so strip the flag bits. + */ + mapping &= MAP_LBA_MASK; + + ze = (z_flag << 1) + e_flag; + switch (ze) { + case 0: + /* + * We want to set neither of the Z or E flags, and + * in the actual layout, this means setting the bit + * positions of both to '1' to indicate a 'normal' + * map entry + */ + mapping |= MAP_ENT_NORMAL; + break; + case 1: + mapping |= (1 << MAP_ERR_SHIFT); + break; + case 2: + mapping |= (1 << MAP_TRIM_SHIFT); + break; + default: + /* + * The case where Z and E are both sent in as '1' could be + * construed as a valid 'normal' case, but we decide not to, + * to avoid confusion + */ + WARN_ONCE(1, "Invalid use of Z and E flags\n"); + return -EIO; + } + + mapping_le = cpu_to_le32(mapping); + return __btt_map_write(arena, lba, mapping_le); +} + +static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, + int *trim, int *error) +{ + int ret; + __le32 in; + u32 raw_mapping, postmap, ze, z_flag, e_flag; + u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE); + + WARN_ON(lba >= arena->external_nlba); + + ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE); + if (ret) + return ret; + + raw_mapping = le32_to_cpu(in); + + z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT; + e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT; + ze = (z_flag << 1) + e_flag; + postmap = raw_mapping & MAP_LBA_MASK; + + /* Reuse the {z,e}_flag variables for *trim and *error */ + z_flag = 0; + e_flag = 0; + + switch (ze) { + case 0: + /* Initial state. Return postmap = premap */ + *mapping = lba; + break; + case 1: + *mapping = postmap; + e_flag = 1; + break; + case 2: + *mapping = postmap; + z_flag = 1; + break; + case 3: + *mapping = postmap; + break; + default: + return -EIO; + } + + if (trim) + *trim = z_flag; + if (error) + *error = e_flag; + + return ret; +} + +static int btt_log_read_pair(struct arena_info *arena, u32 lane, + struct log_entry *ent) +{ + WARN_ON(!ent); + return arena_read_bytes(arena, + arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, + 2 * LOG_ENT_SIZE); +} + +static struct dentry *debugfs_root; + +static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, + int idx) +{ + char dirname[32]; + struct dentry *d; + + /* If for some reason, parent bttN was not created, exit */ + if (!parent) + return; + + snprintf(dirname, 32, "arena%d", idx); + d = debugfs_create_dir(dirname, parent); + if (IS_ERR_OR_NULL(d)) + return; + a->debugfs_dir = d; + + debugfs_create_x64("size", S_IRUGO, d, &a->size); + debugfs_create_x64("external_lba_start", S_IRUGO, d, + &a->external_lba_start); + debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba); + debugfs_create_u32("internal_lbasize", S_IRUGO, d, + &a->internal_lbasize); + debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba); + debugfs_create_u32("external_lbasize", S_IRUGO, d, + &a->external_lbasize); + debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree); + debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major); + debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor); + debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff); + debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff); + debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff); + debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff); + debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); + debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); + debugfs_create_x32("flags", S_IRUGO, d, &a->flags); +} + +static void btt_debugfs_init(struct btt *btt) +{ + int i = 0; + struct arena_info *arena; + + btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev), + debugfs_root); + if (IS_ERR_OR_NULL(btt->debugfs_dir)) + return; + + list_for_each_entry(arena, &btt->arena_list, list) { + arena_debugfs_init(arena, btt->debugfs_dir, i); + i++; + } +} + +/* + * This function accepts two log entries, and uses the + * sequence number to find the 'older' entry. + * It also updates the sequence number in this old entry to + * make it the 'new' one if the mark_flag is set. + * Finally, it returns which of the entries was the older one. + * + * TODO The logic feels a bit kludge-y. make it better.. + */ +static int btt_log_get_old(struct log_entry *ent) +{ + int old; + + /* + * the first ever time this is seen, the entry goes into [0] + * the next time, the following logic works out to put this + * (next) entry into [1] + */ + if (ent[0].seq == 0) { + ent[0].seq = cpu_to_le32(1); + return 0; + } + + if (ent[0].seq == ent[1].seq) + return -EINVAL; + if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) + return -EINVAL; + + if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { + if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) + old = 0; + else + old = 1; + } else { + if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) + old = 1; + else + old = 0; + } + + return old; +} + +static struct device *to_dev(struct arena_info *arena) +{ + return &arena->nd_btt->dev; +} + +/* + * This function copies the desired (old/new) log entry into ent if + * it is not NULL. It returns the sub-slot number (0 or 1) + * where the desired log entry was found. Negative return values + * indicate errors. + */ +static int btt_log_read(struct arena_info *arena, u32 lane, + struct log_entry *ent, int old_flag) +{ + int ret; + int old_ent, ret_ent; + struct log_entry log[2]; + + ret = btt_log_read_pair(arena, lane, log); + if (ret) + return -EIO; + + old_ent = btt_log_get_old(log); + if (old_ent < 0 || old_ent > 1) { + dev_info(to_dev(arena), + "log corruption (%d): lane %d seq [%d, %d]\n", + old_ent, lane, log[0].seq, log[1].seq); + /* TODO set error state? */ + return -EIO; + } + + ret_ent = (old_flag ? old_ent : (1 - old_ent)); + + if (ent != NULL) + memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); + + return ret_ent; +} + +/* + * This function commits a log entry to media + * It does _not_ prepare the freelist entry for the next write + * btt_flog_write is the wrapper for updating the freelist elements + */ +static int __btt_log_write(struct arena_info *arena, u32 lane, + u32 sub, struct log_entry *ent) +{ + int ret; + /* + * Ignore the padding in log_entry for calculating log_half. + * The entry is 'committed' when we write the sequence number, + * and we want to ensure that that is the last thing written. + * We don't bother writing the padding as that would be extra + * media wear and write amplification + */ + unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; + u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); + void *src = ent; + + /* split the 16B write into atomic, durable halves */ + ret = arena_write_bytes(arena, ns_off, src, log_half); + if (ret) + return ret; + + ns_off += log_half; + src += log_half; + return arena_write_bytes(arena, ns_off, src, log_half); +} + +static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub, + struct log_entry *ent) +{ + int ret; + + ret = __btt_log_write(arena, lane, sub, ent); + if (ret) + return ret; + + /* prepare the next free entry */ + arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; + if (++(arena->freelist[lane].seq) == 4) + arena->freelist[lane].seq = 1; + arena->freelist[lane].block = le32_to_cpu(ent->old_map); + + return ret; +} + +/* + * This function initializes the BTT map to the initial state, which is + * all-zeroes, and indicates an identity mapping + */ +static int btt_map_init(struct arena_info *arena) +{ + int ret = -EINVAL; + void *zerobuf; + size_t offset = 0; + size_t chunk_size = SZ_2M; + size_t mapsize = arena->logoff - arena->mapoff; + + zerobuf = kzalloc(chunk_size, GFP_KERNEL); + if (!zerobuf) + return -ENOMEM; + + while (mapsize) { + size_t size = min(mapsize, chunk_size); + + ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf, + size); + if (ret) + goto free; + + offset += size; + mapsize -= size; + cond_resched(); + } + + free: + kfree(zerobuf); + return ret; +} + +/* + * This function initializes the BTT log with 'fake' entries pointing + * to the initial reserved set of blocks as being free + */ +static int btt_log_init(struct arena_info *arena) +{ + int ret; + u32 i; + struct log_entry log, zerolog; + + memset(&zerolog, 0, sizeof(zerolog)); + + for (i = 0; i < arena->nfree; i++) { + log.lba = cpu_to_le32(i); + log.old_map = cpu_to_le32(arena->external_nlba + i); + log.new_map = cpu_to_le32(arena->external_nlba + i); + log.seq = cpu_to_le32(LOG_SEQ_INIT); + ret = __btt_log_write(arena, i, 0, &log); + if (ret) + return ret; + ret = __btt_log_write(arena, i, 1, &zerolog); + if (ret) + return ret; + } + + return 0; +} + +static int btt_freelist_init(struct arena_info *arena) +{ + int old, new, ret; + u32 i, map_entry; + struct log_entry log_new, log_old; + + arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry), + GFP_KERNEL); + if (!arena->freelist) + return -ENOMEM; + + for (i = 0; i < arena->nfree; i++) { + old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT); + if (old < 0) + return old; + + new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT); + if (new < 0) + return new; + + /* sub points to the next one to be overwritten */ + arena->freelist[i].sub = 1 - new; + arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq)); + arena->freelist[i].block = le32_to_cpu(log_new.old_map); + + /* This implies a newly created or untouched flog entry */ + if (log_new.old_map == log_new.new_map) + continue; + + /* Check if map recovery is needed */ + ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry, + NULL, NULL); + if (ret) + return ret; + if ((le32_to_cpu(log_new.new_map) != map_entry) && + (le32_to_cpu(log_new.old_map) == map_entry)) { + /* + * Last transaction wrote the flog, but wasn't able + * to complete the map write. So fix up the map. + */ + ret = btt_map_write(arena, le32_to_cpu(log_new.lba), + le32_to_cpu(log_new.new_map), 0, 0); + if (ret) + return ret; + } + + } + + return 0; +} + +static int btt_rtt_init(struct arena_info *arena) +{ + arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); + if (arena->rtt == NULL) + return -ENOMEM; + + return 0; +} + +static int btt_maplocks_init(struct arena_info *arena) +{ + u32 i; + + arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock), + GFP_KERNEL); + if (!arena->map_locks) + return -ENOMEM; + + for (i = 0; i < arena->nfree; i++) + spin_lock_init(&arena->map_locks[i].lock); + + return 0; +} + +static struct arena_info *alloc_arena(struct btt *btt, size_t size, + size_t start, size_t arena_off) +{ + struct arena_info *arena; + u64 logsize, mapsize, datasize; + u64 available = size; + + arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL); + if (!arena) + return NULL; + arena->nd_btt = btt->nd_btt; + + if (!size) + return arena; + + arena->size = size; + arena->external_lba_start = start; + arena->external_lbasize = btt->lbasize; + arena->internal_lbasize = roundup(arena->external_lbasize, + INT_LBASIZE_ALIGNMENT); + arena->nfree = BTT_DEFAULT_NFREE; + arena->version_major = 1; + arena->version_minor = 1; + + if (available % BTT_PG_SIZE) + available -= (available % BTT_PG_SIZE); + + /* Two pages are reserved for the super block and its copy */ + available -= 2 * BTT_PG_SIZE; + + /* The log takes a fixed amount of space based on nfree */ + logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), + BTT_PG_SIZE); + available -= logsize; + + /* Calculate optimal split between map and data area */ + arena->internal_nlba = div_u64(available - BTT_PG_SIZE, + arena->internal_lbasize + MAP_ENT_SIZE); + arena->external_nlba = arena->internal_nlba - arena->nfree; + + mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE); + datasize = available - mapsize; + + /* 'Absolute' values, relative to start of storage space */ + arena->infooff = arena_off; + arena->dataoff = arena->infooff + BTT_PG_SIZE; + arena->mapoff = arena->dataoff + datasize; + arena->logoff = arena->mapoff + mapsize; + arena->info2off = arena->logoff + logsize; + return arena; +} + +static void free_arenas(struct btt *btt) +{ + struct arena_info *arena, *next; + + list_for_each_entry_safe(arena, next, &btt->arena_list, list) { + list_del(&arena->list); + kfree(arena->rtt); + kfree(arena->map_locks); + kfree(arena->freelist); + debugfs_remove_recursive(arena->debugfs_dir); + kfree(arena); + } +} + +/* + * This function checks if the metadata layout is valid and error free + */ +static int arena_is_valid(struct arena_info *arena, struct btt_sb *super, + u8 *uuid, u32 lbasize) +{ + u64 checksum; + + if (memcmp(super->uuid, uuid, 16)) + return 0; + + checksum = le64_to_cpu(super->checksum); + super->checksum = 0; + if (checksum != nd_btt_sb_checksum(super)) + return 0; + super->checksum = cpu_to_le64(checksum); + + if (lbasize != le32_to_cpu(super->external_lbasize)) + return 0; + + /* TODO: figure out action for this */ + if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0) + dev_info(to_dev(arena), "Found arena with an error flag\n"); + + return 1; +} + +/* + * This function reads an existing valid btt superblock and + * populates the corresponding arena_info struct + */ +static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super, + u64 arena_off) +{ + arena->internal_nlba = le32_to_cpu(super->internal_nlba); + arena->internal_lbasize = le32_to_cpu(super->internal_lbasize); + arena->external_nlba = le32_to_cpu(super->external_nlba); + arena->external_lbasize = le32_to_cpu(super->external_lbasize); + arena->nfree = le32_to_cpu(super->nfree); + arena->version_major = le16_to_cpu(super->version_major); + arena->version_minor = le16_to_cpu(super->version_minor); + + arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off + + le64_to_cpu(super->nextoff)); + arena->infooff = arena_off; + arena->dataoff = arena_off + le64_to_cpu(super->dataoff); + arena->mapoff = arena_off + le64_to_cpu(super->mapoff); + arena->logoff = arena_off + le64_to_cpu(super->logoff); + arena->info2off = arena_off + le64_to_cpu(super->info2off); + + arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) : + (arena->info2off - arena->infooff + BTT_PG_SIZE); + + arena->flags = le32_to_cpu(super->flags); +} + +static int discover_arenas(struct btt *btt) +{ + int ret = 0; + struct arena_info *arena; + struct btt_sb *super; + size_t remaining = btt->rawsize; + u64 cur_nlba = 0; + size_t cur_off = 0; + int num_arenas = 0; + + super = kzalloc(sizeof(*super), GFP_KERNEL); + if (!super) + return -ENOMEM; + + while (remaining) { + /* Alloc memory for arena */ + arena = alloc_arena(btt, 0, 0, 0); + if (!arena) { + ret = -ENOMEM; + goto out_super; + } + + arena->infooff = cur_off; + ret = btt_info_read(arena, super); + if (ret) + goto out; + + if (!arena_is_valid(arena, super, btt->nd_btt->uuid, + btt->lbasize)) { + if (remaining == btt->rawsize) { + btt->init_state = INIT_NOTFOUND; + dev_info(to_dev(arena), "No existing arenas\n"); + goto out; + } else { + dev_info(to_dev(arena), + "Found corrupted metadata!\n"); + ret = -ENODEV; + goto out; + } + } + + arena->external_lba_start = cur_nlba; + parse_arena_meta(arena, super, cur_off); + + ret = btt_freelist_init(arena); + if (ret) + goto out; + + ret = btt_rtt_init(arena); + if (ret) + goto out; + + ret = btt_maplocks_init(arena); + if (ret) + goto out; + + list_add_tail(&arena->list, &btt->arena_list); + + remaining -= arena->size; + cur_off += arena->size; + cur_nlba += arena->external_nlba; + num_arenas++; + + if (arena->nextoff == 0) + break; + } + btt->num_arenas = num_arenas; + btt->nlba = cur_nlba; + btt->init_state = INIT_READY; + + kfree(super); + return ret; + + out: + kfree(arena); + free_arenas(btt); + out_super: + kfree(super); + return ret; +} + +static int create_arenas(struct btt *btt) +{ + size_t remaining = btt->rawsize; + size_t cur_off = 0; + + while (remaining) { + struct arena_info *arena; + size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining); + + remaining -= arena_size; + if (arena_size < ARENA_MIN_SIZE) + break; + + arena = alloc_arena(btt, arena_size, btt->nlba, cur_off); + if (!arena) { + free_arenas(btt); + return -ENOMEM; + } + btt->nlba += arena->external_nlba; + if (remaining >= ARENA_MIN_SIZE) + arena->nextoff = arena->size; + else + arena->nextoff = 0; + cur_off += arena_size; + list_add_tail(&arena->list, &btt->arena_list); + } + + return 0; +} + +/* + * This function completes arena initialization by writing + * all the metadata. + * It is only called for an uninitialized arena when a write + * to that arena occurs for the first time. + */ +static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid) +{ + int ret; + struct btt_sb *super; + + ret = btt_map_init(arena); + if (ret) + return ret; + + ret = btt_log_init(arena); + if (ret) + return ret; + + super = kzalloc(sizeof(struct btt_sb), GFP_NOIO); + if (!super) + return -ENOMEM; + + strncpy(super->signature, BTT_SIG, BTT_SIG_LEN); + memcpy(super->uuid, uuid, 16); + super->flags = cpu_to_le32(arena->flags); + super->version_major = cpu_to_le16(arena->version_major); + super->version_minor = cpu_to_le16(arena->version_minor); + super->external_lbasize = cpu_to_le32(arena->external_lbasize); + super->external_nlba = cpu_to_le32(arena->external_nlba); + super->internal_lbasize = cpu_to_le32(arena->internal_lbasize); + super->internal_nlba = cpu_to_le32(arena->internal_nlba); + super->nfree = cpu_to_le32(arena->nfree); + super->infosize = cpu_to_le32(sizeof(struct btt_sb)); + super->nextoff = cpu_to_le64(arena->nextoff); + /* + * Subtract arena->infooff (arena start) so numbers are relative + * to 'this' arena + */ + super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff); + super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff); + super->logoff = cpu_to_le64(arena->logoff - arena->infooff); + super->info2off = cpu_to_le64(arena->info2off - arena->infooff); + + super->flags = 0; + super->checksum = cpu_to_le64(nd_btt_sb_checksum(super)); + + ret = btt_info_write(arena, super); + + kfree(super); + return ret; +} + +/* + * This function completes the initialization for the BTT namespace + * such that it is ready to accept IOs + */ +static int btt_meta_init(struct btt *btt) +{ + int ret = 0; + struct arena_info *arena; + + mutex_lock(&btt->init_lock); + list_for_each_entry(arena, &btt->arena_list, list) { + ret = btt_arena_write_layout(arena, btt->nd_btt->uuid); + if (ret) + goto unlock; + + ret = btt_freelist_init(arena); + if (ret) + goto unlock; + + ret = btt_rtt_init(arena); + if (ret) + goto unlock; + + ret = btt_maplocks_init(arena); + if (ret) + goto unlock; + } + + btt->init_state = INIT_READY; + + unlock: + mutex_unlock(&btt->init_lock); + return ret; +} + +static u32 btt_meta_size(struct btt *btt) +{ + return btt->lbasize - btt->sector_size; +} + +/* + * This function calculates the arena in which the given LBA lies + * by doing a linear walk. This is acceptable since we expect only + * a few arenas. If we have backing devices that get much larger, + * we can construct a balanced binary tree of arenas at init time + * so that this range search becomes faster. + */ +static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap, + struct arena_info **arena) +{ + struct arena_info *arena_list; + __u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size); + + list_for_each_entry(arena_list, &btt->arena_list, list) { + if (lba < arena_list->external_nlba) { + *arena = arena_list; + *premap = lba; + return 0; + } + lba -= arena_list->external_nlba; + } + + return -EIO; +} + +/* + * The following (lock_map, unlock_map) are mostly just to improve + * readability, since they index into an array of locks + */ +static void lock_map(struct arena_info *arena, u32 premap) + __acquires(&arena->map_locks[idx].lock) +{ + u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; + + spin_lock(&arena->map_locks[idx].lock); +} + +static void unlock_map(struct arena_info *arena, u32 premap) + __releases(&arena->map_locks[idx].lock) +{ + u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree; + + spin_unlock(&arena->map_locks[idx].lock); +} + +static u64 to_namespace_offset(struct arena_info *arena, u64 lba) +{ + return arena->dataoff + ((u64)lba * arena->internal_lbasize); +} + +static int btt_data_read(struct arena_info *arena, struct page *page, + unsigned int off, u32 lba, u32 len) +{ + int ret; + u64 nsoff = to_namespace_offset(arena, lba); + void *mem = kmap_atomic(page); + + ret = arena_read_bytes(arena, nsoff, mem + off, len); + kunmap_atomic(mem); + + return ret; +} + +static int btt_data_write(struct arena_info *arena, u32 lba, + struct page *page, unsigned int off, u32 len) +{ + int ret; + u64 nsoff = to_namespace_offset(arena, lba); + void *mem = kmap_atomic(page); + + ret = arena_write_bytes(arena, nsoff, mem + off, len); + kunmap_atomic(mem); + + return ret; +} + +static void zero_fill_data(struct page *page, unsigned int off, u32 len) +{ + void *mem = kmap_atomic(page); + + memset(mem + off, 0, len); + kunmap_atomic(mem); +} + +#ifdef CONFIG_BLK_DEV_INTEGRITY +static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip, + struct arena_info *arena, u32 postmap, int rw) +{ + unsigned int len = btt_meta_size(btt); + u64 meta_nsoff; + int ret = 0; + + if (bip == NULL) + return 0; + + meta_nsoff = to_namespace_offset(arena, postmap) + btt->sector_size; + + while (len) { + unsigned int cur_len; + struct bio_vec bv; + void *mem; + + bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); + /* + * The 'bv' obtained from bvec_iter_bvec has its .bv_len and + * .bv_offset already adjusted for iter->bi_bvec_done, and we + * can use those directly + */ + + cur_len = min(len, bv.bv_len); + mem = kmap_atomic(bv.bv_page); + if (rw) + ret = arena_write_bytes(arena, meta_nsoff, + mem + bv.bv_offset, cur_len); + else + ret = arena_read_bytes(arena, meta_nsoff, + mem + bv.bv_offset, cur_len); + + kunmap_atomic(mem); + if (ret) + return ret; + + len -= cur_len; + meta_nsoff += cur_len; + bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len); + } + + return ret; +} + +#else /* CONFIG_BLK_DEV_INTEGRITY */ +static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip, + struct arena_info *arena, u32 postmap, int rw) +{ + return 0; +} +#endif + +static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, + struct page *page, unsigned int off, sector_t sector, + unsigned int len) +{ + int ret = 0; + int t_flag, e_flag; + struct arena_info *arena = NULL; + u32 lane = 0, premap, postmap; + + while (len) { + u32 cur_len; + + lane = nd_region_acquire_lane(btt->nd_region); + + ret = lba_to_arena(btt, sector, &premap, &arena); + if (ret) + goto out_lane; + + cur_len = min(btt->sector_size, len); + + ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag); + if (ret) + goto out_lane; + + /* + * We loop to make sure that the post map LBA didn't change + * from under us between writing the RTT and doing the actual + * read. + */ + while (1) { + u32 new_map; + + if (t_flag) { + zero_fill_data(page, off, cur_len); + goto out_lane; + } + + if (e_flag) { + ret = -EIO; + goto out_lane; + } + + arena->rtt[lane] = RTT_VALID | postmap; + /* + * Barrier to make sure this write is not reordered + * to do the verification map_read before the RTT store + */ + barrier(); + + ret = btt_map_read(arena, premap, &new_map, &t_flag, + &e_flag); + if (ret) + goto out_rtt; + + if (postmap == new_map) + break; + + postmap = new_map; + } + + ret = btt_data_read(arena, page, off, postmap, cur_len); + if (ret) + goto out_rtt; + + if (bip) { + ret = btt_rw_integrity(btt, bip, arena, postmap, READ); + if (ret) + goto out_rtt; + } + + arena->rtt[lane] = RTT_INVALID; + nd_region_release_lane(btt->nd_region, lane); + + len -= cur_len; + off += cur_len; + sector += btt->sector_size >> SECTOR_SHIFT; + } + + return 0; + + out_rtt: + arena->rtt[lane] = RTT_INVALID; + out_lane: + nd_region_release_lane(btt->nd_region, lane); + return ret; +} + +static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, + sector_t sector, struct page *page, unsigned int off, + unsigned int len) +{ + int ret = 0; + struct arena_info *arena = NULL; + u32 premap = 0, old_postmap, new_postmap, lane = 0, i; + struct log_entry log; + int sub; + + while (len) { + u32 cur_len; + + lane = nd_region_acquire_lane(btt->nd_region); + + ret = lba_to_arena(btt, sector, &premap, &arena); + if (ret) + goto out_lane; + cur_len = min(btt->sector_size, len); + + if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) { + ret = -EIO; + goto out_lane; + } + + new_postmap = arena->freelist[lane].block; + + /* Wait if the new block is being read from */ + for (i = 0; i < arena->nfree; i++) + while (arena->rtt[i] == (RTT_VALID | new_postmap)) + cpu_relax(); + + + if (new_postmap >= arena->internal_nlba) { + ret = -EIO; + goto out_lane; + } + + ret = btt_data_write(arena, new_postmap, page, off, cur_len); + if (ret) + goto out_lane; + + if (bip) { + ret = btt_rw_integrity(btt, bip, arena, new_postmap, + WRITE); + if (ret) + goto out_lane; + } + + lock_map(arena, premap); + ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL); + if (ret) + goto out_map; + if (old_postmap >= arena->internal_nlba) { + ret = -EIO; + goto out_map; + } + + log.lba = cpu_to_le32(premap); + log.old_map = cpu_to_le32(old_postmap); + log.new_map = cpu_to_le32(new_postmap); + log.seq = cpu_to_le32(arena->freelist[lane].seq); + sub = arena->freelist[lane].sub; + ret = btt_flog_write(arena, lane, sub, &log); + if (ret) + goto out_map; + + ret = btt_map_write(arena, premap, new_postmap, 0, 0); + if (ret) + goto out_map; + + unlock_map(arena, premap); + nd_region_release_lane(btt->nd_region, lane); + + len -= cur_len; + off += cur_len; + sector += btt->sector_size >> SECTOR_SHIFT; + } + + return 0; + + out_map: + unlock_map(arena, premap); + out_lane: + nd_region_release_lane(btt->nd_region, lane); + return ret; +} + +static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, + struct page *page, unsigned int len, unsigned int off, + int rw, sector_t sector) +{ + int ret; + + if (rw == READ) { + ret = btt_read_pg(btt, bip, page, off, sector, len); + flush_dcache_page(page); + } else { + flush_dcache_page(page); + ret = btt_write_pg(btt, bip, sector, page, off, len); + } + + return ret; +} + +static void btt_make_request(struct request_queue *q, struct bio *bio) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct btt *btt = q->queuedata; + struct bvec_iter iter; + unsigned long start; + struct bio_vec bvec; + int err = 0, rw; + bool do_acct; + + /* + * bio_integrity_enabled also checks if the bio already has an + * integrity payload attached. If it does, we *don't* do a + * bio_integrity_prep here - the payload has been generated by + * another kernel subsystem, and we just pass it through. + */ + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { + err = -EIO; + goto out; + } + + do_acct = nd_iostat_start(bio, &start); + rw = bio_data_dir(bio); + bio_for_each_segment(bvec, bio, iter) { + unsigned int len = bvec.bv_len; + + BUG_ON(len > PAGE_SIZE); + /* Make sure len is in multiples of sector size. */ + /* XXX is this right? */ + BUG_ON(len < btt->sector_size); + BUG_ON(len % btt->sector_size); + + err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, + rw, iter.bi_sector); + if (err) { + dev_info(&btt->nd_btt->dev, + "io error in %s sector %lld, len %d,\n", + (rw == READ) ? "READ" : "WRITE", + (unsigned long long) iter.bi_sector, len); + break; + } + } + if (do_acct) + nd_iostat_end(bio, start); + +out: + bio_endio(bio, err); +} + +static int btt_rw_page(struct block_device *bdev, sector_t sector, + struct page *page, int rw) +{ + struct btt *btt = bdev->bd_disk->private_data; + + btt_do_bvec(btt, NULL, page, PAGE_CACHE_SIZE, 0, rw, sector); + page_endio(page, rw & WRITE, 0); + return 0; +} + + +static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo) +{ + /* some standard values */ + geo->heads = 1 << 6; + geo->sectors = 1 << 5; + geo->cylinders = get_capacity(bd->bd_disk) >> 11; + return 0; +} + +static const struct block_device_operations btt_fops = { + .owner = THIS_MODULE, + .rw_page = btt_rw_page, + .getgeo = btt_getgeo, + .revalidate_disk = nvdimm_revalidate_disk, +}; + +static int btt_blk_init(struct btt *btt) +{ + struct nd_btt *nd_btt = btt->nd_btt; + struct nd_namespace_common *ndns = nd_btt->ndns; + + /* create a new disk and request queue for btt */ + btt->btt_queue = blk_alloc_queue(GFP_KERNEL); + if (!btt->btt_queue) + return -ENOMEM; + + btt->btt_disk = alloc_disk(0); + if (!btt->btt_disk) { + blk_cleanup_queue(btt->btt_queue); + return -ENOMEM; + } + + nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name); + btt->btt_disk->driverfs_dev = &btt->nd_btt->dev; + btt->btt_disk->major = btt_major; + btt->btt_disk->first_minor = 0; + btt->btt_disk->fops = &btt_fops; + btt->btt_disk->private_data = btt; + btt->btt_disk->queue = btt->btt_queue; + btt->btt_disk->flags = GENHD_FL_EXT_DEVT; + + blk_queue_make_request(btt->btt_queue, btt_make_request); + blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); + blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); + blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY); + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); + btt->btt_queue->queuedata = btt; + + set_capacity(btt->btt_disk, 0); + add_disk(btt->btt_disk); + if (btt_meta_size(btt)) { + int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt)); + + if (rc) { + del_gendisk(btt->btt_disk); + put_disk(btt->btt_disk); + blk_cleanup_queue(btt->btt_queue); + return rc; + } + } + set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); + revalidate_disk(btt->btt_disk); + + return 0; +} + +static void btt_blk_cleanup(struct btt *btt) +{ + blk_integrity_unregister(btt->btt_disk); + del_gendisk(btt->btt_disk); + put_disk(btt->btt_disk); + blk_cleanup_queue(btt->btt_queue); +} + +/** + * btt_init - initialize a block translation table for the given device + * @nd_btt: device with BTT geometry and backing device info + * @rawsize: raw size in bytes of the backing device + * @lbasize: lba size of the backing device + * @uuid: A uuid for the backing device - this is stored on media + * @maxlane: maximum number of parallel requests the device can handle + * + * Initialize a Block Translation Table on a backing device to provide + * single sector power fail atomicity. + * + * Context: + * Might sleep. + * + * Returns: + * Pointer to a new struct btt on success, NULL on failure. + */ +static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, + u32 lbasize, u8 *uuid, struct nd_region *nd_region) +{ + int ret; + struct btt *btt; + struct device *dev = &nd_btt->dev; + + btt = kzalloc(sizeof(struct btt), GFP_KERNEL); + if (!btt) + return NULL; + + btt->nd_btt = nd_btt; + btt->rawsize = rawsize; + btt->lbasize = lbasize; + btt->sector_size = ((lbasize >= 4096) ? 4096 : 512); + INIT_LIST_HEAD(&btt->arena_list); + mutex_init(&btt->init_lock); + btt->nd_region = nd_region; + + ret = discover_arenas(btt); + if (ret) { + dev_err(dev, "init: error in arena_discover: %d\n", ret); + goto out_free; + } + + if (btt->init_state != INIT_READY && nd_region->ro) { + dev_info(dev, "%s is read-only, unable to init btt metadata\n", + dev_name(&nd_region->dev)); + goto out_free; + } else if (btt->init_state != INIT_READY) { + btt->num_arenas = (rawsize / ARENA_MAX_SIZE) + + ((rawsize % ARENA_MAX_SIZE) ? 1 : 0); + dev_dbg(dev, "init: %d arenas for %llu rawsize\n", + btt->num_arenas, rawsize); + + ret = create_arenas(btt); + if (ret) { + dev_info(dev, "init: create_arenas: %d\n", ret); + goto out_free; + } + + ret = btt_meta_init(btt); + if (ret) { + dev_err(dev, "init: error in meta_init: %d\n", ret); + goto out_free; + } + } + + ret = btt_blk_init(btt); + if (ret) { + dev_err(dev, "init: error in blk_init: %d\n", ret); + goto out_free; + } + + btt_debugfs_init(btt); + + return btt; + + out_free: + kfree(btt); + return NULL; +} + +/** + * btt_fini - de-initialize a BTT + * @btt: the BTT handle that was generated by btt_init + * + * De-initialize a Block Translation Table on device removal + * + * Context: + * Might sleep. + */ +static void btt_fini(struct btt *btt) +{ + if (btt) { + btt_blk_cleanup(btt); + free_arenas(btt); + debugfs_remove_recursive(btt->debugfs_dir); + kfree(btt); + } +} + +int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) +{ + struct nd_btt *nd_btt = to_nd_btt(ndns->claim); + struct nd_region *nd_region; + struct btt *btt; + size_t rawsize; + + if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize) + return -ENODEV; + + rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K; + if (rawsize < ARENA_MIN_SIZE) { + return -ENXIO; + } + nd_region = to_nd_region(nd_btt->dev.parent); + btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid, + nd_region); + if (!btt) + return -ENOMEM; + nd_btt->btt = btt; + + return 0; +} +EXPORT_SYMBOL(nvdimm_namespace_attach_btt); + +int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns) +{ + struct nd_btt *nd_btt = to_nd_btt(ndns->claim); + struct btt *btt = nd_btt->btt; + + btt_fini(btt); + nd_btt->btt = NULL; + + return 0; +} +EXPORT_SYMBOL(nvdimm_namespace_detach_btt); + +static int __init nd_btt_init(void) +{ + int rc; + + BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K); + + btt_major = register_blkdev(0, "btt"); + if (btt_major < 0) + return btt_major; + + debugfs_root = debugfs_create_dir("btt", NULL); + if (IS_ERR_OR_NULL(debugfs_root)) { + rc = -ENXIO; + goto err_debugfs; + } + + return 0; + + err_debugfs: + unregister_blkdev(btt_major, "btt"); + + return rc; +} + +static void __exit nd_btt_exit(void) +{ + debugfs_remove_recursive(debugfs_root); + unregister_blkdev(btt_major, "btt"); +} + +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT); +MODULE_AUTHOR("Vishal Verma <vishal.l.verma@linux.intel.com>"); +MODULE_LICENSE("GPL v2"); +module_init(nd_btt_init); +module_exit(nd_btt_exit); diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h new file mode 100644 index 000000000000..75b0d80a6bd9 --- /dev/null +++ b/drivers/nvdimm/btt.h @@ -0,0 +1,185 @@ +/* + * Block Translation Table library + * Copyright (c) 2014-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _LINUX_BTT_H +#define _LINUX_BTT_H + +#include <linux/types.h> + +#define BTT_SIG_LEN 16 +#define BTT_SIG "BTT_ARENA_INFO\0" +#define MAP_ENT_SIZE 4 +#define MAP_TRIM_SHIFT 31 +#define MAP_TRIM_MASK (1 << MAP_TRIM_SHIFT) +#define MAP_ERR_SHIFT 30 +#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) +#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) +#define MAP_ENT_NORMAL 0xC0000000 +#define LOG_ENT_SIZE sizeof(struct log_entry) +#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ +#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ +#define RTT_VALID (1UL << 31) +#define RTT_INVALID 0 +#define BTT_PG_SIZE 4096 +#define BTT_DEFAULT_NFREE ND_MAX_LANES +#define LOG_SEQ_INIT 1 + +#define IB_FLAG_ERROR 0x00000001 +#define IB_FLAG_ERROR_MASK 0x00000001 + +enum btt_init_state { + INIT_UNCHECKED = 0, + INIT_NOTFOUND, + INIT_READY +}; + +struct log_entry { + __le32 lba; + __le32 old_map; + __le32 new_map; + __le32 seq; + __le64 padding[2]; +}; + +struct btt_sb { + u8 signature[BTT_SIG_LEN]; + u8 uuid[16]; + u8 parent_uuid[16]; + __le32 flags; + __le16 version_major; + __le16 version_minor; + __le32 external_lbasize; + __le32 external_nlba; + __le32 internal_lbasize; + __le32 internal_nlba; + __le32 nfree; + __le32 infosize; + __le64 nextoff; + __le64 dataoff; + __le64 mapoff; + __le64 logoff; + __le64 info2off; + u8 padding[3968]; + __le64 checksum; +}; + +struct free_entry { + u32 block; + u8 sub; + u8 seq; +}; + +struct aligned_lock { + union { + spinlock_t lock; + u8 cacheline_padding[L1_CACHE_BYTES]; + }; +}; + +/** + * struct arena_info - handle for an arena + * @size: Size in bytes this arena occupies on the raw device. + * This includes arena metadata. + * @external_lba_start: The first external LBA in this arena. + * @internal_nlba: Number of internal blocks available in the arena + * including nfree reserved blocks + * @internal_lbasize: Internal and external lba sizes may be different as + * we can round up 'odd' external lbasizes such as 520B + * to be aligned. + * @external_nlba: Number of blocks contributed by the arena to the number + * reported to upper layers. (internal_nlba - nfree) + * @external_lbasize: LBA size as exposed to upper layers. + * @nfree: A reserve number of 'free' blocks that is used to + * handle incoming writes. + * @version_major: Metadata layout version major. + * @version_minor: Metadata layout version minor. + * @nextoff: Offset in bytes to the start of the next arena. + * @infooff: Offset in bytes to the info block of this arena. + * @dataoff: Offset in bytes to the data area of this arena. + * @mapoff: Offset in bytes to the map area of this arena. + * @logoff: Offset in bytes to the log area of this arena. + * @info2off: Offset in bytes to the backup info block of this arena. + * @freelist: Pointer to in-memory list of free blocks + * @rtt: Pointer to in-memory "Read Tracking Table" + * @map_locks: Spinlocks protecting concurrent map writes + * @nd_btt: Pointer to parent nd_btt structure. + * @list: List head for list of arenas + * @debugfs_dir: Debugfs dentry + * @flags: Arena flags - may signify error states. + * + * arena_info is a per-arena handle. Once an arena is narrowed down for an + * IO, this struct is passed around for the duration of the IO. + */ +struct arena_info { + u64 size; /* Total bytes for this arena */ + u64 external_lba_start; + u32 internal_nlba; + u32 internal_lbasize; + u32 external_nlba; + u32 external_lbasize; + u32 nfree; + u16 version_major; + u16 version_minor; + /* Byte offsets to the different on-media structures */ + u64 nextoff; + u64 infooff; + u64 dataoff; + u64 mapoff; + u64 logoff; + u64 info2off; + /* Pointers to other in-memory structures for this arena */ + struct free_entry *freelist; + u32 *rtt; + struct aligned_lock *map_locks; + struct nd_btt *nd_btt; + struct list_head list; + struct dentry *debugfs_dir; + /* Arena flags */ + u32 flags; +}; + +/** + * struct btt - handle for a BTT instance + * @btt_disk: Pointer to the gendisk for BTT device + * @btt_queue: Pointer to the request queue for the BTT device + * @arena_list: Head of the list of arenas + * @debugfs_dir: Debugfs dentry + * @nd_btt: Parent nd_btt struct + * @nlba: Number of logical blocks exposed to the upper layers + * after removing the amount of space needed by metadata + * @rawsize: Total size in bytes of the available backing device + * @lbasize: LBA size as requested and presented to upper layers. + * This is sector_size + size of any metadata. + * @sector_size: The Linux sector size - 512 or 4096 + * @lanes: Per-lane spinlocks + * @init_lock: Mutex used for the BTT initialization + * @init_state: Flag describing the initialization state for the BTT + * @num_arenas: Number of arenas in the BTT instance + */ +struct btt { + struct gendisk *btt_disk; + struct request_queue *btt_queue; + struct list_head arena_list; + struct dentry *debugfs_dir; + struct nd_btt *nd_btt; + u64 nlba; + unsigned long long rawsize; + u32 lbasize; + u32 sector_size; + struct nd_region *nd_region; + struct mutex init_lock; + int init_state; + int num_arenas; +}; +#endif diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c new file mode 100644 index 000000000000..6ac8c0fea3ec --- /dev/null +++ b/drivers/nvdimm/btt_devs.c @@ -0,0 +1,425 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/blkdev.h> +#include <linux/device.h> +#include <linux/genhd.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include "nd-core.h" +#include "btt.h" +#include "nd.h" + +static void __nd_btt_detach_ndns(struct nd_btt *nd_btt) +{ + struct nd_namespace_common *ndns = nd_btt->ndns; + + dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex) + || ndns->claim != &nd_btt->dev, + "%s: invalid claim\n", __func__); + ndns->claim = NULL; + nd_btt->ndns = NULL; + put_device(&ndns->dev); +} + +static void nd_btt_detach_ndns(struct nd_btt *nd_btt) +{ + struct nd_namespace_common *ndns = nd_btt->ndns; + + if (!ndns) + return; + get_device(&ndns->dev); + device_lock(&ndns->dev); + __nd_btt_detach_ndns(nd_btt); + device_unlock(&ndns->dev); + put_device(&ndns->dev); +} + +static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt, + struct nd_namespace_common *ndns) +{ + if (ndns->claim) + return false; + dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex) + || nd_btt->ndns, + "%s: invalid claim\n", __func__); + ndns->claim = &nd_btt->dev; + nd_btt->ndns = ndns; + get_device(&ndns->dev); + return true; +} + +static bool nd_btt_attach_ndns(struct nd_btt *nd_btt, + struct nd_namespace_common *ndns) +{ + bool claimed; + + device_lock(&ndns->dev); + claimed = __nd_btt_attach_ndns(nd_btt, ndns); + device_unlock(&ndns->dev); + return claimed; +} + +static void nd_btt_release(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_btt *nd_btt = to_nd_btt(dev); + + dev_dbg(dev, "%s\n", __func__); + nd_btt_detach_ndns(nd_btt); + ida_simple_remove(&nd_region->btt_ida, nd_btt->id); + kfree(nd_btt->uuid); + kfree(nd_btt); +} + +static struct device_type nd_btt_device_type = { + .name = "nd_btt", + .release = nd_btt_release, +}; + +bool is_nd_btt(struct device *dev) +{ + return dev->type == &nd_btt_device_type; +} +EXPORT_SYMBOL(is_nd_btt); + +struct nd_btt *to_nd_btt(struct device *dev) +{ + struct nd_btt *nd_btt = container_of(dev, struct nd_btt, dev); + + WARN_ON(!is_nd_btt(dev)); + return nd_btt; +} +EXPORT_SYMBOL(to_nd_btt); + +static const unsigned long btt_lbasize_supported[] = { 512, 520, 528, + 4096, 4104, 4160, 4224, 0 }; + +static ssize_t sector_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + + return nd_sector_size_show(nd_btt->lbasize, btt_lbasize_supported, buf); +} + +static ssize_t sector_size_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + ssize_t rc; + + device_lock(dev); + nvdimm_bus_lock(dev); + rc = nd_sector_size_store(dev, buf, &nd_btt->lbasize, + btt_lbasize_supported); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(sector_size); + +static ssize_t uuid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + + if (nd_btt->uuid) + return sprintf(buf, "%pUb\n", nd_btt->uuid); + return sprintf(buf, "\n"); +} + +static ssize_t uuid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + ssize_t rc; + + device_lock(dev); + rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(uuid); + +static ssize_t namespace_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + rc = sprintf(buf, "%s\n", nd_btt->ndns + ? dev_name(&nd_btt->ndns->dev) : ""); + nvdimm_bus_unlock(dev); + return rc; +} + +static int namespace_match(struct device *dev, void *data) +{ + char *name = data; + + return strcmp(name, dev_name(dev)) == 0; +} + +static bool is_nd_btt_idle(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_btt *nd_btt = to_nd_btt(dev); + + if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver) + return false; + return true; +} + +static ssize_t __namespace_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_btt *nd_btt = to_nd_btt(dev); + struct nd_namespace_common *ndns; + struct device *found; + char *name; + + if (dev->driver) { + dev_dbg(dev, "%s: -EBUSY\n", __func__); + return -EBUSY; + } + + name = kstrndup(buf, len, GFP_KERNEL); + if (!name) + return -ENOMEM; + strim(name); + + if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0) + /* pass */; + else { + len = -EINVAL; + goto out; + } + + ndns = nd_btt->ndns; + if (strcmp(name, "") == 0) { + /* detach the namespace and destroy / reset the btt device */ + nd_btt_detach_ndns(nd_btt); + if (is_nd_btt_idle(dev)) + nd_device_unregister(dev, ND_ASYNC); + else { + nd_btt->lbasize = 0; + kfree(nd_btt->uuid); + nd_btt->uuid = NULL; + } + goto out; + } else if (ndns) { + dev_dbg(dev, "namespace already set to: %s\n", + dev_name(&ndns->dev)); + len = -EBUSY; + goto out; + } + + found = device_find_child(dev->parent, name, namespace_match); + if (!found) { + dev_dbg(dev, "'%s' not found under %s\n", name, + dev_name(dev->parent)); + len = -ENODEV; + goto out; + } + + ndns = to_ndns(found); + if (__nvdimm_namespace_capacity(ndns) < SZ_16M) { + dev_dbg(dev, "%s too small to host btt\n", name); + len = -ENXIO; + goto out_attach; + } + + WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev)); + if (!nd_btt_attach_ndns(nd_btt, ndns)) { + dev_dbg(dev, "%s already claimed\n", + dev_name(&ndns->dev)); + len = -EBUSY; + } + + out_attach: + put_device(&ndns->dev); /* from device_find_child */ + out: + kfree(name); + return len; +} + +static ssize_t namespace_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + ssize_t rc; + + nvdimm_bus_lock(dev); + device_lock(dev); + rc = __namespace_store(dev, attr, buf, len); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + device_unlock(dev); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RW(namespace); + +static struct attribute *nd_btt_attributes[] = { + &dev_attr_sector_size.attr, + &dev_attr_namespace.attr, + &dev_attr_uuid.attr, + NULL, +}; + +static struct attribute_group nd_btt_attribute_group = { + .attrs = nd_btt_attributes, +}; + +static const struct attribute_group *nd_btt_attribute_groups[] = { + &nd_btt_attribute_group, + &nd_device_attribute_group, + &nd_numa_attribute_group, + NULL, +}; + +static struct device *__nd_btt_create(struct nd_region *nd_region, + unsigned long lbasize, u8 *uuid, + struct nd_namespace_common *ndns) +{ + struct nd_btt *nd_btt; + struct device *dev; + + nd_btt = kzalloc(sizeof(*nd_btt), GFP_KERNEL); + if (!nd_btt) + return NULL; + + nd_btt->id = ida_simple_get(&nd_region->btt_ida, 0, 0, GFP_KERNEL); + if (nd_btt->id < 0) { + kfree(nd_btt); + return NULL; + } + + nd_btt->lbasize = lbasize; + if (uuid) + uuid = kmemdup(uuid, 16, GFP_KERNEL); + nd_btt->uuid = uuid; + dev = &nd_btt->dev; + dev_set_name(dev, "btt%d.%d", nd_region->id, nd_btt->id); + dev->parent = &nd_region->dev; + dev->type = &nd_btt_device_type; + dev->groups = nd_btt_attribute_groups; + device_initialize(&nd_btt->dev); + if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) { + dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", + __func__, dev_name(ndns->claim)); + put_device(dev); + return NULL; + } + return dev; +} + +struct device *nd_btt_create(struct nd_region *nd_region) +{ + struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL); + + if (dev) + __nd_device_register(dev); + return dev; +} + +/* + * nd_btt_sb_checksum: compute checksum for btt info block + * + * Returns a fletcher64 checksum of everything in the given info block + * except the last field (since that's where the checksum lives). + */ +u64 nd_btt_sb_checksum(struct btt_sb *btt_sb) +{ + u64 sum; + __le64 sum_save; + + sum_save = btt_sb->checksum; + btt_sb->checksum = 0; + sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1); + btt_sb->checksum = sum_save; + return sum; +} +EXPORT_SYMBOL(nd_btt_sb_checksum); + +static int __nd_btt_probe(struct nd_btt *nd_btt, + struct nd_namespace_common *ndns, struct btt_sb *btt_sb) +{ + u64 checksum; + + if (!btt_sb || !ndns || !nd_btt) + return -ENODEV; + + if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb))) + return -ENXIO; + + if (nvdimm_namespace_capacity(ndns) < SZ_16M) + return -ENXIO; + + if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0) + return -ENODEV; + + checksum = le64_to_cpu(btt_sb->checksum); + btt_sb->checksum = 0; + if (checksum != nd_btt_sb_checksum(btt_sb)) + return -ENODEV; + btt_sb->checksum = cpu_to_le64(checksum); + + nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize); + nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL); + if (!nd_btt->uuid) + return -ENOMEM; + + __nd_device_register(&nd_btt->dev); + + return 0; +} + +int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) +{ + int rc; + struct device *dev; + struct btt_sb *btt_sb; + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + + if (ndns->force_raw) + return -ENODEV; + + nvdimm_bus_lock(&ndns->dev); + dev = __nd_btt_create(nd_region, 0, NULL, ndns); + nvdimm_bus_unlock(&ndns->dev); + if (!dev) + return -ENOMEM; + dev_set_drvdata(dev, drvdata); + btt_sb = kzalloc(sizeof(*btt_sb), GFP_KERNEL); + rc = __nd_btt_probe(to_nd_btt(dev), ndns, btt_sb); + kfree(btt_sb); + dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__, + rc == 0 ? dev_name(dev) : "<none>"); + if (rc < 0) { + __nd_btt_detach_ndns(to_nd_btt(dev)); + put_device(dev); + } + + return rc; +} +EXPORT_SYMBOL(nd_btt_probe); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c new file mode 100644 index 000000000000..8eb22c0ca7ce --- /dev/null +++ b/drivers/nvdimm/bus.c @@ -0,0 +1,730 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/vmalloc.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/blkdev.h> +#include <linux/fcntl.h> +#include <linux/async.h> +#include <linux/genhd.h> +#include <linux/ndctl.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/nd.h> +#include "nd-core.h" +#include "nd.h" + +int nvdimm_major; +static int nvdimm_bus_major; +static struct class *nd_class; + +static int to_nd_device_type(struct device *dev) +{ + if (is_nvdimm(dev)) + return ND_DEVICE_DIMM; + else if (is_nd_pmem(dev)) + return ND_DEVICE_REGION_PMEM; + else if (is_nd_blk(dev)) + return ND_DEVICE_REGION_BLK; + else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent)) + return nd_region_to_nstype(to_nd_region(dev->parent)); + + return 0; +} + +static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + /* + * Ensure that region devices always have their numa node set as + * early as possible. + */ + if (is_nd_pmem(dev) || is_nd_blk(dev)) + set_dev_node(dev, to_nd_region(dev)->numa_node); + return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT, + to_nd_device_type(dev)); +} + +static int nvdimm_bus_match(struct device *dev, struct device_driver *drv) +{ + struct nd_device_driver *nd_drv = to_nd_device_driver(drv); + + return test_bit(to_nd_device_type(dev), &nd_drv->type); +} + +static struct module *to_bus_provider(struct device *dev) +{ + /* pin bus providers while regions are enabled */ + if (is_nd_pmem(dev) || is_nd_blk(dev)) { + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + return nvdimm_bus->module; + } + return NULL; +} + +static void nvdimm_bus_probe_start(struct nvdimm_bus *nvdimm_bus) +{ + nvdimm_bus_lock(&nvdimm_bus->dev); + nvdimm_bus->probe_active++; + nvdimm_bus_unlock(&nvdimm_bus->dev); +} + +static void nvdimm_bus_probe_end(struct nvdimm_bus *nvdimm_bus) +{ + nvdimm_bus_lock(&nvdimm_bus->dev); + if (--nvdimm_bus->probe_active == 0) + wake_up(&nvdimm_bus->probe_wait); + nvdimm_bus_unlock(&nvdimm_bus->dev); +} + +static int nvdimm_bus_probe(struct device *dev) +{ + struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); + struct module *provider = to_bus_provider(dev); + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + int rc; + + if (!try_module_get(provider)) + return -ENXIO; + + nvdimm_bus_probe_start(nvdimm_bus); + rc = nd_drv->probe(dev); + if (rc == 0) + nd_region_probe_success(nvdimm_bus, dev); + else + nd_region_disable(nvdimm_bus, dev); + nvdimm_bus_probe_end(nvdimm_bus); + + dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name, + dev_name(dev), rc); + + if (rc != 0) + module_put(provider); + return rc; +} + +static int nvdimm_bus_remove(struct device *dev) +{ + struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver); + struct module *provider = to_bus_provider(dev); + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + int rc; + + rc = nd_drv->remove(dev); + nd_region_disable(nvdimm_bus, dev); + + dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name, + dev_name(dev), rc); + module_put(provider); + return rc; +} + +static struct bus_type nvdimm_bus_type = { + .name = "nd", + .uevent = nvdimm_bus_uevent, + .match = nvdimm_bus_match, + .probe = nvdimm_bus_probe, + .remove = nvdimm_bus_remove, +}; + +static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain); + +void nd_synchronize(void) +{ + async_synchronize_full_domain(&nd_async_domain); +} +EXPORT_SYMBOL_GPL(nd_synchronize); + +static void nd_async_device_register(void *d, async_cookie_t cookie) +{ + struct device *dev = d; + + if (device_add(dev) != 0) { + dev_err(dev, "%s: failed\n", __func__); + put_device(dev); + } + put_device(dev); +} + +static void nd_async_device_unregister(void *d, async_cookie_t cookie) +{ + struct device *dev = d; + + /* flush bus operations before delete */ + nvdimm_bus_lock(dev); + nvdimm_bus_unlock(dev); + + device_unregister(dev); + put_device(dev); +} + +void __nd_device_register(struct device *dev) +{ + dev->bus = &nvdimm_bus_type; + get_device(dev); + async_schedule_domain(nd_async_device_register, dev, + &nd_async_domain); +} + +void nd_device_register(struct device *dev) +{ + device_initialize(dev); + __nd_device_register(dev); +} +EXPORT_SYMBOL(nd_device_register); + +void nd_device_unregister(struct device *dev, enum nd_async_mode mode) +{ + switch (mode) { + case ND_ASYNC: + get_device(dev); + async_schedule_domain(nd_async_device_unregister, dev, + &nd_async_domain); + break; + case ND_SYNC: + nd_synchronize(); + device_unregister(dev); + break; + } +} +EXPORT_SYMBOL(nd_device_unregister); + +/** + * __nd_driver_register() - register a region or a namespace driver + * @nd_drv: driver to register + * @owner: automatically set by nd_driver_register() macro + * @mod_name: automatically set by nd_driver_register() macro + */ +int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner, + const char *mod_name) +{ + struct device_driver *drv = &nd_drv->drv; + + if (!nd_drv->type) { + pr_debug("driver type bitmask not set (%pf)\n", + __builtin_return_address(0)); + return -EINVAL; + } + + if (!nd_drv->probe || !nd_drv->remove) { + pr_debug("->probe() and ->remove() must be specified\n"); + return -EINVAL; + } + + drv->bus = &nvdimm_bus_type; + drv->owner = owner; + drv->mod_name = mod_name; + + return driver_register(drv); +} +EXPORT_SYMBOL(__nd_driver_register); + +int nvdimm_revalidate_disk(struct gendisk *disk) +{ + struct device *dev = disk->driverfs_dev; + struct nd_region *nd_region = to_nd_region(dev->parent); + const char *pol = nd_region->ro ? "only" : "write"; + + if (nd_region->ro == get_disk_ro(disk)) + return 0; + + dev_info(dev, "%s read-%s, marking %s read-%s\n", + dev_name(&nd_region->dev), pol, disk->disk_name, pol); + set_disk_ro(disk, nd_region->ro); + + return 0; + +} +EXPORT_SYMBOL(nvdimm_revalidate_disk); + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, ND_DEVICE_MODALIAS_FMT "\n", + to_nd_device_type(dev)); +} +static DEVICE_ATTR_RO(modalias); + +static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s\n", dev->type->name); +} +static DEVICE_ATTR_RO(devtype); + +static struct attribute *nd_device_attributes[] = { + &dev_attr_modalias.attr, + &dev_attr_devtype.attr, + NULL, +}; + +/** + * nd_device_attribute_group - generic attributes for all devices on an nd bus + */ +struct attribute_group nd_device_attribute_group = { + .attrs = nd_device_attributes, +}; +EXPORT_SYMBOL_GPL(nd_device_attribute_group); + +static ssize_t numa_node_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", dev_to_node(dev)); +} +static DEVICE_ATTR_RO(numa_node); + +static struct attribute *nd_numa_attributes[] = { + &dev_attr_numa_node.attr, + NULL, +}; + +static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a, + int n) +{ + if (!IS_ENABLED(CONFIG_NUMA)) + return 0; + + return a->mode; +} + +/** + * nd_numa_attribute_group - NUMA attributes for all devices on an nd bus + */ +struct attribute_group nd_numa_attribute_group = { + .attrs = nd_numa_attributes, + .is_visible = nd_numa_attr_visible, +}; +EXPORT_SYMBOL_GPL(nd_numa_attribute_group); + +int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) +{ + dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id); + struct device *dev; + + dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus, + "ndctl%d", nvdimm_bus->id); + + if (IS_ERR(dev)) { + dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n", + nvdimm_bus->id, PTR_ERR(dev)); + return PTR_ERR(dev); + } + return 0; +} + +void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus) +{ + device_destroy(nd_class, MKDEV(nvdimm_bus_major, nvdimm_bus->id)); +} + +static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = { + [ND_CMD_IMPLEMENTED] = { }, + [ND_CMD_SMART] = { + .out_num = 2, + .out_sizes = { 4, 8, }, + }, + [ND_CMD_SMART_THRESHOLD] = { + .out_num = 2, + .out_sizes = { 4, 8, }, + }, + [ND_CMD_DIMM_FLAGS] = { + .out_num = 2, + .out_sizes = { 4, 4 }, + }, + [ND_CMD_GET_CONFIG_SIZE] = { + .out_num = 3, + .out_sizes = { 4, 4, 4, }, + }, + [ND_CMD_GET_CONFIG_DATA] = { + .in_num = 2, + .in_sizes = { 4, 4, }, + .out_num = 2, + .out_sizes = { 4, UINT_MAX, }, + }, + [ND_CMD_SET_CONFIG_DATA] = { + .in_num = 3, + .in_sizes = { 4, 4, UINT_MAX, }, + .out_num = 1, + .out_sizes = { 4, }, + }, + [ND_CMD_VENDOR] = { + .in_num = 3, + .in_sizes = { 4, 4, UINT_MAX, }, + .out_num = 3, + .out_sizes = { 4, 4, UINT_MAX, }, + }, +}; + +const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd) +{ + if (cmd < ARRAY_SIZE(__nd_cmd_dimm_descs)) + return &__nd_cmd_dimm_descs[cmd]; + return NULL; +} +EXPORT_SYMBOL_GPL(nd_cmd_dimm_desc); + +static const struct nd_cmd_desc __nd_cmd_bus_descs[] = { + [ND_CMD_IMPLEMENTED] = { }, + [ND_CMD_ARS_CAP] = { + .in_num = 2, + .in_sizes = { 8, 8, }, + .out_num = 2, + .out_sizes = { 4, 4, }, + }, + [ND_CMD_ARS_START] = { + .in_num = 4, + .in_sizes = { 8, 8, 2, 6, }, + .out_num = 1, + .out_sizes = { 4, }, + }, + [ND_CMD_ARS_STATUS] = { + .out_num = 2, + .out_sizes = { 4, UINT_MAX, }, + }, +}; + +const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd) +{ + if (cmd < ARRAY_SIZE(__nd_cmd_bus_descs)) + return &__nd_cmd_bus_descs[cmd]; + return NULL; +} +EXPORT_SYMBOL_GPL(nd_cmd_bus_desc); + +u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, + const struct nd_cmd_desc *desc, int idx, void *buf) +{ + if (idx >= desc->in_num) + return UINT_MAX; + + if (desc->in_sizes[idx] < UINT_MAX) + return desc->in_sizes[idx]; + + if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA && idx == 2) { + struct nd_cmd_set_config_hdr *hdr = buf; + + return hdr->in_length; + } else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) { + struct nd_cmd_vendor_hdr *hdr = buf; + + return hdr->in_length; + } + + return UINT_MAX; +} +EXPORT_SYMBOL_GPL(nd_cmd_in_size); + +u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, + const struct nd_cmd_desc *desc, int idx, const u32 *in_field, + const u32 *out_field) +{ + if (idx >= desc->out_num) + return UINT_MAX; + + if (desc->out_sizes[idx] < UINT_MAX) + return desc->out_sizes[idx]; + + if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && idx == 1) + return in_field[1]; + else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) + return out_field[1]; + else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 1) + return ND_CMD_ARS_STATUS_MAX; + + return UINT_MAX; +} +EXPORT_SYMBOL_GPL(nd_cmd_out_size); + +void wait_nvdimm_bus_probe_idle(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + do { + if (nvdimm_bus->probe_active == 0) + break; + nvdimm_bus_unlock(&nvdimm_bus->dev); + wait_event(nvdimm_bus->probe_wait, + nvdimm_bus->probe_active == 0); + nvdimm_bus_lock(&nvdimm_bus->dev); + } while (true); +} + +/* set_config requires an idle interleave set */ +static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd) +{ + struct nvdimm_bus *nvdimm_bus; + + if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) + return 0; + + nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev); + wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev); + + if (atomic_read(&nvdimm->busy)) + return -EBUSY; + return 0; +} + +static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, + int read_only, unsigned int ioctl_cmd, unsigned long arg) +{ + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + size_t buf_len = 0, in_len = 0, out_len = 0; + static char out_env[ND_CMD_MAX_ENVELOPE]; + static char in_env[ND_CMD_MAX_ENVELOPE]; + const struct nd_cmd_desc *desc = NULL; + unsigned int cmd = _IOC_NR(ioctl_cmd); + void __user *p = (void __user *) arg; + struct device *dev = &nvdimm_bus->dev; + const char *cmd_name, *dimm_name; + unsigned long dsm_mask; + void *buf; + int rc, i; + + if (nvdimm) { + desc = nd_cmd_dimm_desc(cmd); + cmd_name = nvdimm_cmd_name(cmd); + dsm_mask = nvdimm->dsm_mask ? *(nvdimm->dsm_mask) : 0; + dimm_name = dev_name(&nvdimm->dev); + } else { + desc = nd_cmd_bus_desc(cmd); + cmd_name = nvdimm_bus_cmd_name(cmd); + dsm_mask = nd_desc->dsm_mask; + dimm_name = "bus"; + } + + if (!desc || (desc->out_num + desc->in_num == 0) || + !test_bit(cmd, &dsm_mask)) + return -ENOTTY; + + /* fail write commands (when read-only) */ + if (read_only) + switch (ioctl_cmd) { + case ND_IOCTL_VENDOR: + case ND_IOCTL_SET_CONFIG_DATA: + case ND_IOCTL_ARS_START: + dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", + nvdimm ? nvdimm_cmd_name(cmd) + : nvdimm_bus_cmd_name(cmd)); + return -EPERM; + default: + break; + } + + /* process an input envelope */ + for (i = 0; i < desc->in_num; i++) { + u32 in_size, copy; + + in_size = nd_cmd_in_size(nvdimm, cmd, desc, i, in_env); + if (in_size == UINT_MAX) { + dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n", + __func__, dimm_name, cmd_name, i); + return -ENXIO; + } + if (!access_ok(VERIFY_READ, p + in_len, in_size)) + return -EFAULT; + if (in_len < sizeof(in_env)) + copy = min_t(u32, sizeof(in_env) - in_len, in_size); + else + copy = 0; + if (copy && copy_from_user(&in_env[in_len], p + in_len, copy)) + return -EFAULT; + in_len += in_size; + } + + /* process an output envelope */ + for (i = 0; i < desc->out_num; i++) { + u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, + (u32 *) in_env, (u32 *) out_env); + u32 copy; + + if (out_size == UINT_MAX) { + dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n", + __func__, dimm_name, cmd_name, i); + return -EFAULT; + } + if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size)) + return -EFAULT; + if (out_len < sizeof(out_env)) + copy = min_t(u32, sizeof(out_env) - out_len, out_size); + else + copy = 0; + if (copy && copy_from_user(&out_env[out_len], + p + in_len + out_len, copy)) + return -EFAULT; + out_len += out_size; + } + + buf_len = out_len + in_len; + if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len))) + return -EFAULT; + + if (buf_len > ND_IOCTL_MAX_BUFLEN) { + dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__, + dimm_name, cmd_name, buf_len, + ND_IOCTL_MAX_BUFLEN); + return -EINVAL; + } + + buf = vmalloc(buf_len); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, p, buf_len)) { + rc = -EFAULT; + goto out; + } + + nvdimm_bus_lock(&nvdimm_bus->dev); + rc = nd_cmd_clear_to_send(nvdimm, cmd); + if (rc) + goto out_unlock; + + rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len); + if (rc < 0) + goto out_unlock; + if (copy_to_user(p, buf, buf_len)) + rc = -EFAULT; + out_unlock: + nvdimm_bus_unlock(&nvdimm_bus->dev); + out: + vfree(buf); + return rc; +} + +static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + long id = (long) file->private_data; + int rc = -ENXIO, read_only; + struct nvdimm_bus *nvdimm_bus; + + read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + mutex_lock(&nvdimm_bus_list_mutex); + list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { + if (nvdimm_bus->id == id) { + rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg); + break; + } + } + mutex_unlock(&nvdimm_bus_list_mutex); + + return rc; +} + +static int match_dimm(struct device *dev, void *data) +{ + long id = (long) data; + + if (is_nvdimm(dev)) { + struct nvdimm *nvdimm = to_nvdimm(dev); + + return nvdimm->id == id; + } + + return 0; +} + +static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int rc = -ENXIO, read_only; + struct nvdimm_bus *nvdimm_bus; + + read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + mutex_lock(&nvdimm_bus_list_mutex); + list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { + struct device *dev = device_find_child(&nvdimm_bus->dev, + file->private_data, match_dimm); + struct nvdimm *nvdimm; + + if (!dev) + continue; + + nvdimm = to_nvdimm(dev); + rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg); + put_device(dev); + break; + } + mutex_unlock(&nvdimm_bus_list_mutex); + + return rc; +} + +static int nd_open(struct inode *inode, struct file *file) +{ + long minor = iminor(inode); + + file->private_data = (void *) minor; + return 0; +} + +static const struct file_operations nvdimm_bus_fops = { + .owner = THIS_MODULE, + .open = nd_open, + .unlocked_ioctl = nd_ioctl, + .compat_ioctl = nd_ioctl, + .llseek = noop_llseek, +}; + +static const struct file_operations nvdimm_fops = { + .owner = THIS_MODULE, + .open = nd_open, + .unlocked_ioctl = nvdimm_ioctl, + .compat_ioctl = nvdimm_ioctl, + .llseek = noop_llseek, +}; + +int __init nvdimm_bus_init(void) +{ + int rc; + + rc = bus_register(&nvdimm_bus_type); + if (rc) + return rc; + + rc = register_chrdev(0, "ndctl", &nvdimm_bus_fops); + if (rc < 0) + goto err_bus_chrdev; + nvdimm_bus_major = rc; + + rc = register_chrdev(0, "dimmctl", &nvdimm_fops); + if (rc < 0) + goto err_dimm_chrdev; + nvdimm_major = rc; + + nd_class = class_create(THIS_MODULE, "nd"); + if (IS_ERR(nd_class)) + goto err_class; + + return 0; + + err_class: + unregister_chrdev(nvdimm_major, "dimmctl"); + err_dimm_chrdev: + unregister_chrdev(nvdimm_bus_major, "ndctl"); + err_bus_chrdev: + bus_unregister(&nvdimm_bus_type); + + return rc; +} + +void nvdimm_bus_exit(void) +{ + class_destroy(nd_class); + unregister_chrdev(nvdimm_bus_major, "ndctl"); + unregister_chrdev(nvdimm_major, "dimmctl"); + bus_unregister(&nvdimm_bus_type); +} diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c new file mode 100644 index 000000000000..cb62ec6a12d0 --- /dev/null +++ b/drivers/nvdimm/core.c @@ -0,0 +1,465 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/libnvdimm.h> +#include <linux/export.h> +#include <linux/module.h> +#include <linux/blkdev.h> +#include <linux/device.h> +#include <linux/ctype.h> +#include <linux/ndctl.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include "nd-core.h" +#include "nd.h" + +LIST_HEAD(nvdimm_bus_list); +DEFINE_MUTEX(nvdimm_bus_list_mutex); +static DEFINE_IDA(nd_ida); + +void nvdimm_bus_lock(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return; + mutex_lock(&nvdimm_bus->reconfig_mutex); +} +EXPORT_SYMBOL(nvdimm_bus_lock); + +void nvdimm_bus_unlock(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return; + mutex_unlock(&nvdimm_bus->reconfig_mutex); +} +EXPORT_SYMBOL(nvdimm_bus_unlock); + +bool is_nvdimm_bus_locked(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return false; + return mutex_is_locked(&nvdimm_bus->reconfig_mutex); +} +EXPORT_SYMBOL(is_nvdimm_bus_locked); + +u64 nd_fletcher64(void *addr, size_t len, bool le) +{ + u32 *buf = addr; + u32 lo32 = 0; + u64 hi32 = 0; + int i; + + for (i = 0; i < len / sizeof(u32); i++) { + lo32 += le ? le32_to_cpu((__le32) buf[i]) : buf[i]; + hi32 += lo32; + } + + return hi32 << 32 | lo32; +} +EXPORT_SYMBOL_GPL(nd_fletcher64); + +static void nvdimm_bus_release(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus; + + nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); + ida_simple_remove(&nd_ida, nvdimm_bus->id); + kfree(nvdimm_bus); +} + +struct nvdimm_bus *to_nvdimm_bus(struct device *dev) +{ + struct nvdimm_bus *nvdimm_bus; + + nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); + WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release); + return nvdimm_bus; +} +EXPORT_SYMBOL_GPL(to_nvdimm_bus); + +struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus) +{ + /* struct nvdimm_bus definition is private to libnvdimm */ + return nvdimm_bus->nd_desc; +} +EXPORT_SYMBOL_GPL(to_nd_desc); + +struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) +{ + struct device *dev; + + for (dev = nd_dev; dev; dev = dev->parent) + if (dev->release == nvdimm_bus_release) + break; + dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n"); + if (dev) + return to_nvdimm_bus(dev); + return NULL; +} + +static bool is_uuid_sep(char sep) +{ + if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0') + return true; + return false; +} + +static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf, + size_t len) +{ + const char *str = buf; + u8 uuid[16]; + int i; + + for (i = 0; i < 16; i++) { + if (!isxdigit(str[0]) || !isxdigit(str[1])) { + dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n", + __func__, i, str - buf, str[0], + str + 1 - buf, str[1]); + return -EINVAL; + } + + uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]); + str += 2; + if (is_uuid_sep(*str)) + str++; + } + + memcpy(uuid_out, uuid, sizeof(uuid)); + return 0; +} + +/** + * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes + * @dev: container device for the uuid property + * @uuid_out: uuid buffer to replace + * @buf: raw sysfs buffer to parse + * + * Enforce that uuids can only be changed while the device is disabled + * (driver detached) + * LOCKING: expects device_lock() is held on entry + */ +int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, + size_t len) +{ + u8 uuid[16]; + int rc; + + if (dev->driver) + return -EBUSY; + + rc = nd_uuid_parse(dev, uuid, buf, len); + if (rc) + return rc; + + kfree(*uuid_out); + *uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL); + if (!(*uuid_out)) + return -ENOMEM; + + return 0; +} + +ssize_t nd_sector_size_show(unsigned long current_lbasize, + const unsigned long *supported, char *buf) +{ + ssize_t len = 0; + int i; + + for (i = 0; supported[i]; i++) + if (current_lbasize == supported[i]) + len += sprintf(buf + len, "[%ld] ", supported[i]); + else + len += sprintf(buf + len, "%ld ", supported[i]); + len += sprintf(buf + len, "\n"); + return len; +} + +ssize_t nd_sector_size_store(struct device *dev, const char *buf, + unsigned long *current_lbasize, const unsigned long *supported) +{ + unsigned long lbasize; + int rc, i; + + if (dev->driver) + return -EBUSY; + + rc = kstrtoul(buf, 0, &lbasize); + if (rc) + return rc; + + for (i = 0; supported[i]; i++) + if (lbasize == supported[i]) + break; + + if (supported[i]) { + *current_lbasize = lbasize; + return 0; + } else { + return -EINVAL; + } +} + +void __nd_iostat_start(struct bio *bio, unsigned long *start) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + const int rw = bio_data_dir(bio); + int cpu = part_stat_lock(); + + *start = jiffies; + part_round_stats(cpu, &disk->part0); + part_stat_inc(cpu, &disk->part0, ios[rw]); + part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio)); + part_inc_in_flight(&disk->part0, rw); + part_stat_unlock(); +} +EXPORT_SYMBOL(__nd_iostat_start); + +void nd_iostat_end(struct bio *bio, unsigned long start) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + unsigned long duration = jiffies - start; + const int rw = bio_data_dir(bio); + int cpu = part_stat_lock(); + + part_stat_add(cpu, &disk->part0, ticks[rw], duration); + part_round_stats(cpu, &disk->part0); + part_dec_in_flight(&disk->part0, rw); + part_stat_unlock(); +} +EXPORT_SYMBOL(nd_iostat_end); + +static ssize_t commands_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int cmd, len = 0; + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + + for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG) + len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd)); + len += sprintf(buf + len, "\n"); + return len; +} +static DEVICE_ATTR_RO(commands); + +static const char *nvdimm_bus_provider(struct nvdimm_bus *nvdimm_bus) +{ + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + struct device *parent = nvdimm_bus->dev.parent; + + if (nd_desc->provider_name) + return nd_desc->provider_name; + else if (parent) + return dev_name(parent); + else + return "unknown"; +} + +static ssize_t provider_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + + return sprintf(buf, "%s\n", nvdimm_bus_provider(nvdimm_bus)); +} +static DEVICE_ATTR_RO(provider); + +static int flush_namespaces(struct device *dev, void *data) +{ + device_lock(dev); + device_unlock(dev); + return 0; +} + +static int flush_regions_dimms(struct device *dev, void *data) +{ + device_lock(dev); + device_unlock(dev); + device_for_each_child(dev, NULL, flush_namespaces); + return 0; +} + +static ssize_t wait_probe_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + nd_synchronize(); + device_for_each_child(dev, NULL, flush_regions_dimms); + return sprintf(buf, "1\n"); +} +static DEVICE_ATTR_RO(wait_probe); + +static struct attribute *nvdimm_bus_attributes[] = { + &dev_attr_commands.attr, + &dev_attr_wait_probe.attr, + &dev_attr_provider.attr, + NULL, +}; + +struct attribute_group nvdimm_bus_attribute_group = { + .attrs = nvdimm_bus_attributes, +}; +EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group); + +struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, + struct nvdimm_bus_descriptor *nd_desc, struct module *module) +{ + struct nvdimm_bus *nvdimm_bus; + int rc; + + nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL); + if (!nvdimm_bus) + return NULL; + INIT_LIST_HEAD(&nvdimm_bus->list); + init_waitqueue_head(&nvdimm_bus->probe_wait); + nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); + mutex_init(&nvdimm_bus->reconfig_mutex); + if (nvdimm_bus->id < 0) { + kfree(nvdimm_bus); + return NULL; + } + nvdimm_bus->nd_desc = nd_desc; + nvdimm_bus->module = module; + nvdimm_bus->dev.parent = parent; + nvdimm_bus->dev.release = nvdimm_bus_release; + nvdimm_bus->dev.groups = nd_desc->attr_groups; + dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); + rc = device_register(&nvdimm_bus->dev); + if (rc) { + dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc); + goto err; + } + + rc = nvdimm_bus_create_ndctl(nvdimm_bus); + if (rc) + goto err; + + mutex_lock(&nvdimm_bus_list_mutex); + list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list); + mutex_unlock(&nvdimm_bus_list_mutex); + + return nvdimm_bus; + err: + put_device(&nvdimm_bus->dev); + return NULL; +} +EXPORT_SYMBOL_GPL(__nvdimm_bus_register); + +static int child_unregister(struct device *dev, void *data) +{ + /* + * the singular ndctl class device per bus needs to be + * "device_destroy"ed, so skip it here + * + * i.e. remove classless children + */ + if (dev->class) + /* pass */; + else + nd_device_unregister(dev, ND_SYNC); + return 0; +} + +void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) +{ + if (!nvdimm_bus) + return; + + mutex_lock(&nvdimm_bus_list_mutex); + list_del_init(&nvdimm_bus->list); + mutex_unlock(&nvdimm_bus_list_mutex); + + nd_synchronize(); + device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); + nvdimm_bus_destroy_ndctl(nvdimm_bus); + + device_unregister(&nvdimm_bus->dev); +} +EXPORT_SYMBOL_GPL(nvdimm_bus_unregister); + +#ifdef CONFIG_BLK_DEV_INTEGRITY +static int nd_pi_nop_generate_verify(struct blk_integrity_iter *iter) +{ + return 0; +} + +int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) +{ + struct blk_integrity integrity = { + .name = "ND-PI-NOP", + .generate_fn = nd_pi_nop_generate_verify, + .verify_fn = nd_pi_nop_generate_verify, + .tuple_size = meta_size, + .tag_size = meta_size, + }; + int ret; + + if (meta_size == 0) + return 0; + + ret = blk_integrity_register(disk, &integrity); + if (ret) + return ret; + + blk_queue_max_integrity_segments(disk->queue, 1); + + return 0; +} +EXPORT_SYMBOL(nd_integrity_init); + +#else /* CONFIG_BLK_DEV_INTEGRITY */ +int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) +{ + return 0; +} +EXPORT_SYMBOL(nd_integrity_init); + +#endif + +static __init int libnvdimm_init(void) +{ + int rc; + + rc = nvdimm_bus_init(); + if (rc) + return rc; + rc = nvdimm_init(); + if (rc) + goto err_dimm; + rc = nd_region_init(); + if (rc) + goto err_region; + return 0; + err_region: + nvdimm_exit(); + err_dimm: + nvdimm_bus_exit(); + return rc; +} + +static __exit void libnvdimm_exit(void) +{ + WARN_ON(!list_empty(&nvdimm_bus_list)); + nd_region_exit(); + nvdimm_exit(); + nvdimm_bus_exit(); +} + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +subsys_initcall(libnvdimm_init); +module_exit(libnvdimm_exit); diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c new file mode 100644 index 000000000000..71d12bb67339 --- /dev/null +++ b/drivers/nvdimm/dimm.c @@ -0,0 +1,102 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/vmalloc.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/sizes.h> +#include <linux/ndctl.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/nd.h> +#include "label.h" +#include "nd.h" + +static int nvdimm_probe(struct device *dev) +{ + struct nvdimm_drvdata *ndd; + int rc; + + ndd = kzalloc(sizeof(*ndd), GFP_KERNEL); + if (!ndd) + return -ENOMEM; + + dev_set_drvdata(dev, ndd); + ndd->dpa.name = dev_name(dev); + ndd->ns_current = -1; + ndd->ns_next = -1; + ndd->dpa.start = 0; + ndd->dpa.end = -1; + ndd->dev = dev; + get_device(dev); + kref_init(&ndd->kref); + + rc = nvdimm_init_nsarea(ndd); + if (rc) + goto err; + + rc = nvdimm_init_config_data(ndd); + if (rc) + goto err; + + dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size); + + nvdimm_bus_lock(dev); + ndd->ns_current = nd_label_validate(ndd); + ndd->ns_next = nd_label_next_nsindex(ndd->ns_current); + nd_label_copy(ndd, to_next_namespace_index(ndd), + to_current_namespace_index(ndd)); + rc = nd_label_reserve_dpa(ndd); + nvdimm_bus_unlock(dev); + + if (rc) + goto err; + + return 0; + + err: + put_ndd(ndd); + return rc; +} + +static int nvdimm_remove(struct device *dev) +{ + struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); + + nvdimm_bus_lock(dev); + dev_set_drvdata(dev, NULL); + nvdimm_bus_unlock(dev); + put_ndd(ndd); + + return 0; +} + +static struct nd_device_driver nvdimm_driver = { + .probe = nvdimm_probe, + .remove = nvdimm_remove, + .drv = { + .name = "nvdimm", + }, + .type = ND_DRIVER_DIMM, +}; + +int __init nvdimm_init(void) +{ + return nd_driver_register(&nvdimm_driver); +} + +void nvdimm_exit(void) +{ + driver_unregister(&nvdimm_driver.drv); +} + +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DIMM); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c new file mode 100644 index 000000000000..c05eb807d674 --- /dev/null +++ b/drivers/nvdimm/dimm_devs.c @@ -0,0 +1,551 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/vmalloc.h> +#include <linux/device.h> +#include <linux/ndctl.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include "nd-core.h" +#include "label.h" +#include "nd.h" + +static DEFINE_IDA(dimm_ida); + +/* + * Retrieve bus and dimm handle and return if this bus supports + * get_config_data commands + */ +static int __validate_dimm(struct nvdimm_drvdata *ndd) +{ + struct nvdimm *nvdimm; + + if (!ndd) + return -EINVAL; + + nvdimm = to_nvdimm(ndd->dev); + + if (!nvdimm->dsm_mask) + return -ENXIO; + if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask)) + return -ENXIO; + + return 0; +} + +static int validate_dimm(struct nvdimm_drvdata *ndd) +{ + int rc = __validate_dimm(ndd); + + if (rc && ndd) + dev_dbg(ndd->dev, "%pf: %s error: %d\n", + __builtin_return_address(0), __func__, rc); + return rc; +} + +/** + * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area + * @nvdimm: dimm to initialize + */ +int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) +{ + struct nd_cmd_get_config_size *cmd = &ndd->nsarea; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + struct nvdimm_bus_descriptor *nd_desc; + int rc = validate_dimm(ndd); + + if (rc) + return rc; + + if (cmd->config_size) + return 0; /* already valid */ + + memset(cmd, 0, sizeof(*cmd)); + nd_desc = nvdimm_bus->nd_desc; + return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), + ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd)); +} + +int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + struct nd_cmd_get_config_data_hdr *cmd; + struct nvdimm_bus_descriptor *nd_desc; + int rc = validate_dimm(ndd); + u32 max_cmd_size, config_size; + size_t offset; + + if (rc) + return rc; + + if (ndd->data) + return 0; + + if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0 + || ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) { + dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n", + ndd->nsarea.max_xfer, ndd->nsarea.config_size); + return -ENXIO; + } + + ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL); + if (!ndd->data) + ndd->data = vmalloc(ndd->nsarea.config_size); + + if (!ndd->data) + return -ENOMEM; + + max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer); + cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + nd_desc = nvdimm_bus->nd_desc; + for (config_size = ndd->nsarea.config_size, offset = 0; + config_size; config_size -= cmd->in_length, + offset += cmd->in_length) { + cmd->in_length = min(config_size, max_cmd_size); + cmd->in_offset = offset; + rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), + ND_CMD_GET_CONFIG_DATA, cmd, + cmd->in_length + sizeof(*cmd)); + if (rc || cmd->status) { + rc = -ENXIO; + break; + } + memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length); + } + dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc); + kfree(cmd); + + return rc; +} + +int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, + void *buf, size_t len) +{ + int rc = validate_dimm(ndd); + size_t max_cmd_size, buf_offset; + struct nd_cmd_set_config_hdr *cmd; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + + if (rc) + return rc; + + if (!ndd->data) + return -ENXIO; + + if (offset + len > ndd->nsarea.config_size) + return -ENXIO; + + max_cmd_size = min_t(u32, PAGE_SIZE, len); + max_cmd_size = min_t(u32, max_cmd_size, ndd->nsarea.max_xfer); + cmd = kzalloc(max_cmd_size + sizeof(*cmd) + sizeof(u32), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + for (buf_offset = 0; len; len -= cmd->in_length, + buf_offset += cmd->in_length) { + size_t cmd_size; + u32 *status; + + cmd->in_offset = offset + buf_offset; + cmd->in_length = min(max_cmd_size, len); + memcpy(cmd->in_buf, buf + buf_offset, cmd->in_length); + + /* status is output in the last 4-bytes of the command buffer */ + cmd_size = sizeof(*cmd) + cmd->in_length + sizeof(u32); + status = ((void *) cmd) + cmd_size - sizeof(u32); + + rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), + ND_CMD_SET_CONFIG_DATA, cmd, cmd_size); + if (rc || *status) { + rc = rc ? rc : -ENXIO; + break; + } + } + kfree(cmd); + + return rc; +} + +static void nvdimm_release(struct device *dev) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + + ida_simple_remove(&dimm_ida, nvdimm->id); + kfree(nvdimm); +} + +static struct device_type nvdimm_device_type = { + .name = "nvdimm", + .release = nvdimm_release, +}; + +bool is_nvdimm(struct device *dev) +{ + return dev->type == &nvdimm_device_type; +} + +struct nvdimm *to_nvdimm(struct device *dev) +{ + struct nvdimm *nvdimm = container_of(dev, struct nvdimm, dev); + + WARN_ON(!is_nvdimm(dev)); + return nvdimm; +} +EXPORT_SYMBOL_GPL(to_nvdimm); + +struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr) +{ + struct nd_region *nd_region = &ndbr->nd_region; + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + + return nd_mapping->nvdimm; +} +EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm); + +struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping) +{ + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev)); + + return dev_get_drvdata(&nvdimm->dev); +} +EXPORT_SYMBOL(to_ndd); + +void nvdimm_drvdata_release(struct kref *kref) +{ + struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref); + struct device *dev = ndd->dev; + struct resource *res, *_r; + + dev_dbg(dev, "%s\n", __func__); + + nvdimm_bus_lock(dev); + for_each_dpa_resource_safe(ndd, res, _r) + nvdimm_free_dpa(ndd, res); + nvdimm_bus_unlock(dev); + + if (ndd->data && is_vmalloc_addr(ndd->data)) + vfree(ndd->data); + else + kfree(ndd->data); + kfree(ndd); + put_device(dev); +} + +void get_ndd(struct nvdimm_drvdata *ndd) +{ + kref_get(&ndd->kref); +} + +void put_ndd(struct nvdimm_drvdata *ndd) +{ + if (ndd) + kref_put(&ndd->kref, nvdimm_drvdata_release); +} + +const char *nvdimm_name(struct nvdimm *nvdimm) +{ + return dev_name(&nvdimm->dev); +} +EXPORT_SYMBOL_GPL(nvdimm_name); + +void *nvdimm_provider_data(struct nvdimm *nvdimm) +{ + if (nvdimm) + return nvdimm->provider_data; + return NULL; +} +EXPORT_SYMBOL_GPL(nvdimm_provider_data); + +static ssize_t commands_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + int cmd, len = 0; + + if (!nvdimm->dsm_mask) + return sprintf(buf, "\n"); + + for_each_set_bit(cmd, nvdimm->dsm_mask, BITS_PER_LONG) + len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd)); + len += sprintf(buf + len, "\n"); + return len; +} +static DEVICE_ATTR_RO(commands); + +static ssize_t state_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + + /* + * The state may be in the process of changing, userspace should + * quiesce probing if it wants a static answer + */ + nvdimm_bus_lock(dev); + nvdimm_bus_unlock(dev); + return sprintf(buf, "%s\n", atomic_read(&nvdimm->busy) + ? "active" : "idle"); +} +static DEVICE_ATTR_RO(state); + +static ssize_t available_slots_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); + ssize_t rc; + u32 nfree; + + if (!ndd) + return -ENXIO; + + nvdimm_bus_lock(dev); + nfree = nd_label_nfree(ndd); + if (nfree - 1 > nfree) { + dev_WARN_ONCE(dev, 1, "we ate our last label?\n"); + nfree = 0; + } else + nfree--; + rc = sprintf(buf, "%d\n", nfree); + nvdimm_bus_unlock(dev); + return rc; +} +static DEVICE_ATTR_RO(available_slots); + +static struct attribute *nvdimm_attributes[] = { + &dev_attr_state.attr, + &dev_attr_commands.attr, + &dev_attr_available_slots.attr, + NULL, +}; + +struct attribute_group nvdimm_attribute_group = { + .attrs = nvdimm_attributes, +}; +EXPORT_SYMBOL_GPL(nvdimm_attribute_group); + +struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, + const struct attribute_group **groups, unsigned long flags, + unsigned long *dsm_mask) +{ + struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); + struct device *dev; + + if (!nvdimm) + return NULL; + + nvdimm->id = ida_simple_get(&dimm_ida, 0, 0, GFP_KERNEL); + if (nvdimm->id < 0) { + kfree(nvdimm); + return NULL; + } + nvdimm->provider_data = provider_data; + nvdimm->flags = flags; + nvdimm->dsm_mask = dsm_mask; + atomic_set(&nvdimm->busy, 0); + dev = &nvdimm->dev; + dev_set_name(dev, "nmem%d", nvdimm->id); + dev->parent = &nvdimm_bus->dev; + dev->type = &nvdimm_device_type; + dev->devt = MKDEV(nvdimm_major, nvdimm->id); + dev->groups = groups; + nd_device_register(dev); + + return nvdimm; +} +EXPORT_SYMBOL_GPL(nvdimm_create); + +/** + * nd_blk_available_dpa - account the unused dpa of BLK region + * @nd_mapping: container of dpa-resource-root + labels + * + * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges. + */ +resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping) +{ + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + resource_size_t map_end, busy = 0, available; + struct resource *res; + + if (!ndd) + return 0; + + map_end = nd_mapping->start + nd_mapping->size - 1; + for_each_dpa_resource(ndd, res) + if (res->start >= nd_mapping->start && res->start < map_end) { + resource_size_t end = min(map_end, res->end); + + busy += end - res->start + 1; + } else if (res->end >= nd_mapping->start + && res->end <= map_end) { + busy += res->end - nd_mapping->start; + } else if (nd_mapping->start > res->start + && nd_mapping->start < res->end) { + /* total eclipse of the BLK region mapping */ + busy += nd_mapping->size; + } + + available = map_end - nd_mapping->start + 1; + if (busy < available) + return available - busy; + return 0; +} + +/** + * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa + * @nd_mapping: container of dpa-resource-root + labels + * @nd_region: constrain available space check to this reference region + * @overlap: calculate available space assuming this level of overlap + * + * Validate that a PMEM label, if present, aligns with the start of an + * interleave set and truncate the available size at the lowest BLK + * overlap point. + * + * The expectation is that this routine is called multiple times as it + * probes for the largest BLK encroachment for any single member DIMM of + * the interleave set. Once that value is determined the PMEM-limit for + * the set can be established. + */ +resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, resource_size_t *overlap) +{ + resource_size_t map_start, map_end, busy = 0, available, blk_start; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res; + const char *reason; + + if (!ndd) + return 0; + + map_start = nd_mapping->start; + map_end = map_start + nd_mapping->size - 1; + blk_start = max(map_start, map_end + 1 - *overlap); + for_each_dpa_resource(ndd, res) + if (res->start >= map_start && res->start < map_end) { + if (strncmp(res->name, "blk", 3) == 0) + blk_start = min(blk_start, res->start); + else if (res->start != map_start) { + reason = "misaligned to iset"; + goto err; + } else { + if (busy) { + reason = "duplicate overlapping PMEM reservations?"; + goto err; + } + busy += resource_size(res); + continue; + } + } else if (res->end >= map_start && res->end <= map_end) { + if (strncmp(res->name, "blk", 3) == 0) { + /* + * If a BLK allocation overlaps the start of + * PMEM the entire interleave set may now only + * be used for BLK. + */ + blk_start = map_start; + } else { + reason = "misaligned to iset"; + goto err; + } + } else if (map_start > res->start && map_start < res->end) { + /* total eclipse of the mapping */ + busy += nd_mapping->size; + blk_start = map_start; + } + + *overlap = map_end + 1 - blk_start; + available = blk_start - map_start; + if (busy < available) + return available - busy; + return 0; + + err: + /* + * Something is wrong, PMEM must align with the start of the + * interleave set, and there can only be one allocation per set. + */ + nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason); + return 0; +} + +void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res) +{ + WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev)); + kfree(res->name); + __release_region(&ndd->dpa, res->start, resource_size(res)); +} + +struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id, resource_size_t start, + resource_size_t n) +{ + char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL); + struct resource *res; + + if (!name) + return NULL; + + WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev)); + res = __request_region(&ndd->dpa, start, n, name, 0); + if (!res) + kfree(name); + return res; +} + +/** + * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id + * @nvdimm: container of dpa-resource-root + labels + * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid> + */ +resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id) +{ + resource_size_t allocated = 0; + struct resource *res; + + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id->id) == 0) + allocated += resource_size(res); + + return allocated; +} + +static int count_dimms(struct device *dev, void *c) +{ + int *count = c; + + if (is_nvdimm(dev)) + (*count)++; + return 0; +} + +int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count) +{ + int count = 0; + /* Flush any possible dimm registration failures */ + nd_synchronize(); + + device_for_each_child(&nvdimm_bus->dev, &count, count_dimms); + dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count); + if (count != dimm_count) + return -ENXIO; + return 0; +} +EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count); diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c new file mode 100644 index 000000000000..96526dcfdd37 --- /dev/null +++ b/drivers/nvdimm/label.c @@ -0,0 +1,927 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/device.h> +#include <linux/ndctl.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/nd.h> +#include "nd-core.h" +#include "label.h" +#include "nd.h" + +static u32 best_seq(u32 a, u32 b) +{ + a &= NSINDEX_SEQ_MASK; + b &= NSINDEX_SEQ_MASK; + + if (a == 0 || a == b) + return b; + else if (b == 0) + return a; + else if (nd_inc_seq(a) == b) + return b; + else + return a; +} + +size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) +{ + u32 index_span; + + if (ndd->nsindex_size) + return ndd->nsindex_size; + + /* + * The minimum index space is 512 bytes, with that amount of + * index we can describe ~1400 labels which is less than a byte + * of overhead per label. Round up to a byte of overhead per + * label and determine the size of the index region. Yes, this + * starts to waste space at larger config_sizes, but it's + * unlikely we'll ever see anything but 128K. + */ + index_span = ndd->nsarea.config_size / 129; + index_span /= NSINDEX_ALIGN * 2; + ndd->nsindex_size = index_span * NSINDEX_ALIGN; + + return ndd->nsindex_size; +} + +int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd) +{ + return ndd->nsarea.config_size / 129; +} + +int nd_label_validate(struct nvdimm_drvdata *ndd) +{ + /* + * On media label format consists of two index blocks followed + * by an array of labels. None of these structures are ever + * updated in place. A sequence number tracks the current + * active index and the next one to write, while labels are + * written to free slots. + * + * +------------+ + * | | + * | nsindex0 | + * | | + * +------------+ + * | | + * | nsindex1 | + * | | + * +------------+ + * | label0 | + * +------------+ + * | label1 | + * +------------+ + * | | + * ....nslot... + * | | + * +------------+ + * | labelN | + * +------------+ + */ + struct nd_namespace_index *nsindex[] = { + to_namespace_index(ndd, 0), + to_namespace_index(ndd, 1), + }; + const int num_index = ARRAY_SIZE(nsindex); + struct device *dev = ndd->dev; + bool valid[2] = { 0 }; + int i, num_valid = 0; + u32 seq; + + for (i = 0; i < num_index; i++) { + u32 nslot; + u8 sig[NSINDEX_SIG_LEN]; + u64 sum_save, sum, size; + + memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); + if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) { + dev_dbg(dev, "%s: nsindex%d signature invalid\n", + __func__, i); + continue; + } + sum_save = __le64_to_cpu(nsindex[i]->checksum); + nsindex[i]->checksum = __cpu_to_le64(0); + sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1); + nsindex[i]->checksum = __cpu_to_le64(sum_save); + if (sum != sum_save) { + dev_dbg(dev, "%s: nsindex%d checksum invalid\n", + __func__, i); + continue; + } + + seq = __le32_to_cpu(nsindex[i]->seq); + if ((seq & NSINDEX_SEQ_MASK) == 0) { + dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n", + __func__, i, seq); + continue; + } + + /* sanity check the index against expected values */ + if (__le64_to_cpu(nsindex[i]->myoff) + != i * sizeof_namespace_index(ndd)) { + dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n", + __func__, i, (unsigned long long) + __le64_to_cpu(nsindex[i]->myoff)); + continue; + } + if (__le64_to_cpu(nsindex[i]->otheroff) + != (!i) * sizeof_namespace_index(ndd)) { + dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n", + __func__, i, (unsigned long long) + __le64_to_cpu(nsindex[i]->otheroff)); + continue; + } + + size = __le64_to_cpu(nsindex[i]->mysize); + if (size > sizeof_namespace_index(ndd) + || size < sizeof(struct nd_namespace_index)) { + dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n", + __func__, i, size); + continue; + } + + nslot = __le32_to_cpu(nsindex[i]->nslot); + if (nslot * sizeof(struct nd_namespace_label) + + 2 * sizeof_namespace_index(ndd) + > ndd->nsarea.config_size) { + dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n", + __func__, i, nslot, + ndd->nsarea.config_size); + continue; + } + valid[i] = true; + num_valid++; + } + + switch (num_valid) { + case 0: + break; + case 1: + for (i = 0; i < num_index; i++) + if (valid[i]) + return i; + /* can't have num_valid > 0 but valid[] = { false, false } */ + WARN_ON(1); + break; + default: + /* pick the best index... */ + seq = best_seq(__le32_to_cpu(nsindex[0]->seq), + __le32_to_cpu(nsindex[1]->seq)); + if (seq == (__le32_to_cpu(nsindex[1]->seq) & NSINDEX_SEQ_MASK)) + return 1; + else + return 0; + break; + } + + return -1; +} + +void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, + struct nd_namespace_index *src) +{ + if (dst && src) + /* pass */; + else + return; + + memcpy(dst, src, sizeof_namespace_index(ndd)); +} + +static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd) +{ + void *base = to_namespace_index(ndd, 0); + + return base + 2 * sizeof_namespace_index(ndd); +} + +static int to_slot(struct nvdimm_drvdata *ndd, + struct nd_namespace_label *nd_label) +{ + return nd_label - nd_label_base(ndd); +} + +#define for_each_clear_bit_le(bit, addr, size) \ + for ((bit) = find_next_zero_bit_le((addr), (size), 0); \ + (bit) < (size); \ + (bit) = find_next_zero_bit_le((addr), (size), (bit) + 1)) + +/** + * preamble_index - common variable initialization for nd_label_* routines + * @ndd: dimm container for the relevant label set + * @idx: namespace_index index + * @nsindex_out: on return set to the currently active namespace index + * @free: on return set to the free label bitmap in the index + * @nslot: on return set to the number of slots in the label space + */ +static bool preamble_index(struct nvdimm_drvdata *ndd, int idx, + struct nd_namespace_index **nsindex_out, + unsigned long **free, u32 *nslot) +{ + struct nd_namespace_index *nsindex; + + nsindex = to_namespace_index(ndd, idx); + if (nsindex == NULL) + return false; + + *free = (unsigned long *) nsindex->free; + *nslot = __le32_to_cpu(nsindex->nslot); + *nsindex_out = nsindex; + + return true; +} + +char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags) +{ + if (!label_id || !uuid) + return NULL; + snprintf(label_id->id, ND_LABEL_ID_SIZE, "%s-%pUb", + flags & NSLABEL_FLAG_LOCAL ? "blk" : "pmem", uuid); + return label_id->id; +} + +static bool preamble_current(struct nvdimm_drvdata *ndd, + struct nd_namespace_index **nsindex, + unsigned long **free, u32 *nslot) +{ + return preamble_index(ndd, ndd->ns_current, nsindex, + free, nslot); +} + +static bool preamble_next(struct nvdimm_drvdata *ndd, + struct nd_namespace_index **nsindex, + unsigned long **free, u32 *nslot) +{ + return preamble_index(ndd, ndd->ns_next, nsindex, + free, nslot); +} + +static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot) +{ + /* check that we are written where we expect to be written */ + if (slot != __le32_to_cpu(nd_label->slot)) + return false; + + /* check that DPA allocations are page aligned */ + if ((__le64_to_cpu(nd_label->dpa) + | __le64_to_cpu(nd_label->rawsize)) % SZ_4K) + return false; + + return true; +} + +int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + + if (!preamble_current(ndd, &nsindex, &free, &nslot)) + return 0; /* no label, nothing to reserve */ + + for_each_clear_bit_le(slot, free, nslot) { + struct nd_namespace_label *nd_label; + struct nd_region *nd_region = NULL; + u8 label_uuid[NSLABEL_UUID_LEN]; + struct nd_label_id label_id; + struct resource *res; + u32 flags; + + nd_label = nd_label_base(ndd) + slot; + + if (!slot_valid(nd_label, slot)) + continue; + + memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); + flags = __le32_to_cpu(nd_label->flags); + nd_label_gen_id(&label_id, label_uuid, flags); + res = nvdimm_allocate_dpa(ndd, &label_id, + __le64_to_cpu(nd_label->dpa), + __le64_to_cpu(nd_label->rawsize)); + nd_dbg_dpa(nd_region, ndd, res, "reserve\n"); + if (!res) + return -EBUSY; + } + + return 0; +} + +int nd_label_active_count(struct nvdimm_drvdata *ndd) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + int count = 0; + + if (!preamble_current(ndd, &nsindex, &free, &nslot)) + return 0; + + for_each_clear_bit_le(slot, free, nslot) { + struct nd_namespace_label *nd_label; + + nd_label = nd_label_base(ndd) + slot; + + if (!slot_valid(nd_label, slot)) { + u32 label_slot = __le32_to_cpu(nd_label->slot); + u64 size = __le64_to_cpu(nd_label->rawsize); + u64 dpa = __le64_to_cpu(nd_label->dpa); + + dev_dbg(ndd->dev, + "%s: slot%d invalid slot: %d dpa: %llx size: %llx\n", + __func__, slot, label_slot, dpa, size); + continue; + } + count++; + } + return count; +} + +struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + + if (!preamble_current(ndd, &nsindex, &free, &nslot)) + return NULL; + + for_each_clear_bit_le(slot, free, nslot) { + struct nd_namespace_label *nd_label; + + nd_label = nd_label_base(ndd) + slot; + if (!slot_valid(nd_label, slot)) + continue; + + if (n-- == 0) + return nd_label_base(ndd) + slot; + } + + return NULL; +} + +u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + + if (!preamble_next(ndd, &nsindex, &free, &nslot)) + return UINT_MAX; + + WARN_ON(!is_nvdimm_bus_locked(ndd->dev)); + + slot = find_next_bit_le(free, nslot, 0); + if (slot == nslot) + return UINT_MAX; + + clear_bit_le(slot, free); + + return slot; +} + +bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot; + + if (!preamble_next(ndd, &nsindex, &free, &nslot)) + return false; + + WARN_ON(!is_nvdimm_bus_locked(ndd->dev)); + + if (slot < nslot) + return !test_and_set_bit_le(slot, free); + return false; +} + +u32 nd_label_nfree(struct nvdimm_drvdata *ndd) +{ + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot; + + WARN_ON(!is_nvdimm_bus_locked(ndd->dev)); + + if (!preamble_next(ndd, &nsindex, &free, &nslot)) + return nvdimm_num_label_slots(ndd); + + return bitmap_weight(free, nslot); +} + +static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq, + unsigned long flags) +{ + struct nd_namespace_index *nsindex; + unsigned long offset; + u64 checksum; + u32 nslot; + int rc; + + nsindex = to_namespace_index(ndd, index); + if (flags & ND_NSINDEX_INIT) + nslot = nvdimm_num_label_slots(ndd); + else + nslot = __le32_to_cpu(nsindex->nslot); + + memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN); + nsindex->flags = __cpu_to_le32(0); + nsindex->seq = __cpu_to_le32(seq); + offset = (unsigned long) nsindex + - (unsigned long) to_namespace_index(ndd, 0); + nsindex->myoff = __cpu_to_le64(offset); + nsindex->mysize = __cpu_to_le64(sizeof_namespace_index(ndd)); + offset = (unsigned long) to_namespace_index(ndd, + nd_label_next_nsindex(index)) + - (unsigned long) to_namespace_index(ndd, 0); + nsindex->otheroff = __cpu_to_le64(offset); + offset = (unsigned long) nd_label_base(ndd) + - (unsigned long) to_namespace_index(ndd, 0); + nsindex->labeloff = __cpu_to_le64(offset); + nsindex->nslot = __cpu_to_le32(nslot); + nsindex->major = __cpu_to_le16(1); + nsindex->minor = __cpu_to_le16(1); + nsindex->checksum = __cpu_to_le64(0); + if (flags & ND_NSINDEX_INIT) { + unsigned long *free = (unsigned long *) nsindex->free; + u32 nfree = ALIGN(nslot, BITS_PER_LONG); + int last_bits, i; + + memset(nsindex->free, 0xff, nfree / 8); + for (i = 0, last_bits = nfree - nslot; i < last_bits; i++) + clear_bit_le(nslot + i, free); + } + checksum = nd_fletcher64(nsindex, sizeof_namespace_index(ndd), 1); + nsindex->checksum = __cpu_to_le64(checksum); + rc = nvdimm_set_config_data(ndd, __le64_to_cpu(nsindex->myoff), + nsindex, sizeof_namespace_index(ndd)); + if (rc < 0) + return rc; + + if (flags & ND_NSINDEX_INIT) + return 0; + + /* copy the index we just wrote to the new 'next' */ + WARN_ON(index != ndd->ns_next); + nd_label_copy(ndd, to_current_namespace_index(ndd), nsindex); + ndd->ns_current = nd_label_next_nsindex(ndd->ns_current); + ndd->ns_next = nd_label_next_nsindex(ndd->ns_next); + WARN_ON(ndd->ns_current == ndd->ns_next); + + return 0; +} + +static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd, + struct nd_namespace_label *nd_label) +{ + return (unsigned long) nd_label + - (unsigned long) to_namespace_index(ndd, 0); +} + +static int __pmem_label_update(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, + int pos) +{ + u64 cookie = nd_region_interleave_set_cookie(nd_region), rawsize; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_namespace_label *victim_label; + struct nd_namespace_label *nd_label; + struct nd_namespace_index *nsindex; + unsigned long *free; + u32 nslot, slot; + size_t offset; + int rc; + + if (!preamble_next(ndd, &nsindex, &free, &nslot)) + return -ENXIO; + + /* allocate and write the label to the staging (next) index */ + slot = nd_label_alloc_slot(ndd); + if (slot == UINT_MAX) + return -ENXIO; + dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); + + nd_label = nd_label_base(ndd) + slot; + memset(nd_label, 0, sizeof(struct nd_namespace_label)); + memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN); + if (nspm->alt_name) + memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN); + nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_UPDATING); + nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings); + nd_label->position = __cpu_to_le16(pos); + nd_label->isetcookie = __cpu_to_le64(cookie); + rawsize = div_u64(resource_size(&nspm->nsio.res), + nd_region->ndr_mappings); + nd_label->rawsize = __cpu_to_le64(rawsize); + nd_label->dpa = __cpu_to_le64(nd_mapping->start); + nd_label->slot = __cpu_to_le32(slot); + + /* update label */ + offset = nd_label_offset(ndd, nd_label); + rc = nvdimm_set_config_data(ndd, offset, nd_label, + sizeof(struct nd_namespace_label)); + if (rc < 0) + return rc; + + /* Garbage collect the previous label */ + victim_label = nd_mapping->labels[0]; + if (victim_label) { + slot = to_slot(ndd, victim_label); + nd_label_free_slot(ndd, slot); + dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); + } + + /* update index */ + rc = nd_label_write_index(ndd, ndd->ns_next, + nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0); + if (rc < 0) + return rc; + + nd_mapping->labels[0] = nd_label; + + return 0; +} + +static void del_label(struct nd_mapping *nd_mapping, int l) +{ + struct nd_namespace_label *next_label, *nd_label; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + unsigned int slot; + int j; + + nd_label = nd_mapping->labels[l]; + slot = to_slot(ndd, nd_label); + dev_vdbg(ndd->dev, "%s: clear: %d\n", __func__, slot); + + for (j = l; (next_label = nd_mapping->labels[j + 1]); j++) + nd_mapping->labels[j] = next_label; + nd_mapping->labels[j] = NULL; +} + +static bool is_old_resource(struct resource *res, struct resource **list, int n) +{ + int i; + + if (res->flags & DPA_RESOURCE_ADJUSTED) + return false; + for (i = 0; i < n; i++) + if (res == list[i]) + return true; + return false; +} + +static struct resource *to_resource(struct nvdimm_drvdata *ndd, + struct nd_namespace_label *nd_label) +{ + struct resource *res; + + for_each_dpa_resource(ndd, res) { + if (res->start != __le64_to_cpu(nd_label->dpa)) + continue; + if (resource_size(res) != __le64_to_cpu(nd_label->rawsize)) + continue; + return res; + } + + return NULL; +} + +/* + * 1/ Account all the labels that can be freed after this update + * 2/ Allocate and write the label to the staging (next) index + * 3/ Record the resources in the namespace device + */ +static int __blk_label_update(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_namespace_blk *nsblk, + int num_labels) +{ + int i, l, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_namespace_label *nd_label; + struct nd_namespace_index *nsindex; + unsigned long *free, *victim_map = NULL; + struct resource *res, **old_res_list; + struct nd_label_id label_id; + u8 uuid[NSLABEL_UUID_LEN]; + u32 nslot, slot; + + if (!preamble_next(ndd, &nsindex, &free, &nslot)) + return -ENXIO; + + old_res_list = nsblk->res; + nfree = nd_label_nfree(ndd); + old_num_resources = nsblk->num_resources; + nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL); + + /* + * We need to loop over the old resources a few times, which seems a + * bit inefficient, but we need to know that we have the label + * space before we start mutating the tracking structures. + * Otherwise the recovery method of last resort for userspace is + * disable and re-enable the parent region. + */ + alloc = 0; + for_each_dpa_resource(ndd, res) { + if (strcmp(res->name, label_id.id) != 0) + continue; + if (!is_old_resource(res, old_res_list, old_num_resources)) + alloc++; + } + + victims = 0; + if (old_num_resources) { + /* convert old local-label-map to dimm-slot victim-map */ + victim_map = kcalloc(BITS_TO_LONGS(nslot), sizeof(long), + GFP_KERNEL); + if (!victim_map) + return -ENOMEM; + + /* mark unused labels for garbage collection */ + for_each_clear_bit_le(slot, free, nslot) { + nd_label = nd_label_base(ndd) + slot; + memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); + if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) + continue; + res = to_resource(ndd, nd_label); + if (res && is_old_resource(res, old_res_list, + old_num_resources)) + continue; + slot = to_slot(ndd, nd_label); + set_bit(slot, victim_map); + victims++; + } + } + + /* don't allow updates that consume the last label */ + if (nfree - alloc < 0 || nfree - alloc + victims < 1) { + dev_info(&nsblk->common.dev, "insufficient label space\n"); + kfree(victim_map); + return -ENOSPC; + } + /* from here on we need to abort on error */ + + + /* assign all resources to the namespace before writing the labels */ + nsblk->res = NULL; + nsblk->num_resources = 0; + for_each_dpa_resource(ndd, res) { + if (strcmp(res->name, label_id.id) != 0) + continue; + if (!nsblk_add_resource(nd_region, ndd, nsblk, res->start)) { + rc = -ENOMEM; + goto abort; + } + } + + for (i = 0; i < nsblk->num_resources; i++) { + size_t offset; + + res = nsblk->res[i]; + if (is_old_resource(res, old_res_list, old_num_resources)) + continue; /* carry-over */ + slot = nd_label_alloc_slot(ndd); + if (slot == UINT_MAX) + goto abort; + dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); + + nd_label = nd_label_base(ndd) + slot; + memset(nd_label, 0, sizeof(struct nd_namespace_label)); + memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN); + if (nsblk->alt_name) + memcpy(nd_label->name, nsblk->alt_name, + NSLABEL_NAME_LEN); + nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL); + nd_label->nlabel = __cpu_to_le16(0); /* N/A */ + nd_label->position = __cpu_to_le16(0); /* N/A */ + nd_label->isetcookie = __cpu_to_le64(0); /* N/A */ + nd_label->dpa = __cpu_to_le64(res->start); + nd_label->rawsize = __cpu_to_le64(resource_size(res)); + nd_label->lbasize = __cpu_to_le64(nsblk->lbasize); + nd_label->slot = __cpu_to_le32(slot); + + /* update label */ + offset = nd_label_offset(ndd, nd_label); + rc = nvdimm_set_config_data(ndd, offset, nd_label, + sizeof(struct nd_namespace_label)); + if (rc < 0) + goto abort; + } + + /* free up now unused slots in the new index */ + for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) { + dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); + nd_label_free_slot(ndd, slot); + } + + /* update index */ + rc = nd_label_write_index(ndd, ndd->ns_next, + nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0); + if (rc) + goto abort; + + /* + * Now that the on-dimm labels are up to date, fix up the tracking + * entries in nd_mapping->labels + */ + nlabel = 0; + for_each_label(l, nd_label, nd_mapping->labels) { + nlabel++; + memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); + if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) + continue; + nlabel--; + del_label(nd_mapping, l); + l--; /* retry with the new label at this index */ + } + if (nlabel + nsblk->num_resources > num_labels) { + /* + * Bug, we can't end up with more resources than + * available labels + */ + WARN_ON_ONCE(1); + rc = -ENXIO; + goto out; + } + + for_each_clear_bit_le(slot, free, nslot) { + nd_label = nd_label_base(ndd) + slot; + memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); + if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) + continue; + res = to_resource(ndd, nd_label); + res->flags &= ~DPA_RESOURCE_ADJUSTED; + dev_vdbg(&nsblk->common.dev, "assign label[%d] slot: %d\n", + l, slot); + nd_mapping->labels[l++] = nd_label; + } + nd_mapping->labels[l] = NULL; + + out: + kfree(old_res_list); + kfree(victim_map); + return rc; + + abort: + /* + * 1/ repair the allocated label bitmap in the index + * 2/ restore the resource list + */ + nd_label_copy(ndd, nsindex, to_current_namespace_index(ndd)); + kfree(nsblk->res); + nsblk->res = old_res_list; + nsblk->num_resources = old_num_resources; + old_res_list = NULL; + goto out; +} + +static int init_labels(struct nd_mapping *nd_mapping, int num_labels) +{ + int i, l, old_num_labels = 0; + struct nd_namespace_index *nsindex; + struct nd_namespace_label *nd_label; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + size_t size = (num_labels + 1) * sizeof(struct nd_namespace_label *); + + for_each_label(l, nd_label, nd_mapping->labels) + old_num_labels++; + + /* + * We need to preserve all the old labels for the mapping so + * they can be garbage collected after writing the new labels. + */ + if (num_labels > old_num_labels) { + struct nd_namespace_label **labels; + + labels = krealloc(nd_mapping->labels, size, GFP_KERNEL); + if (!labels) + return -ENOMEM; + nd_mapping->labels = labels; + } + if (!nd_mapping->labels) + return -ENOMEM; + + for (i = old_num_labels; i <= num_labels; i++) + nd_mapping->labels[i] = NULL; + + if (ndd->ns_current == -1 || ndd->ns_next == -1) + /* pass */; + else + return max(num_labels, old_num_labels); + + nsindex = to_namespace_index(ndd, 0); + memset(nsindex, 0, ndd->nsarea.config_size); + for (i = 0; i < 2; i++) { + int rc = nd_label_write_index(ndd, i, i*2, ND_NSINDEX_INIT); + + if (rc) + return rc; + } + ndd->ns_next = 1; + ndd->ns_current = 0; + + return max(num_labels, old_num_labels); +} + +static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid) +{ + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_namespace_label *nd_label; + struct nd_namespace_index *nsindex; + u8 label_uuid[NSLABEL_UUID_LEN]; + int l, num_freed = 0; + unsigned long *free; + u32 nslot, slot; + + if (!uuid) + return 0; + + /* no index || no labels == nothing to delete */ + if (!preamble_next(ndd, &nsindex, &free, &nslot) + || !nd_mapping->labels) + return 0; + + for_each_label(l, nd_label, nd_mapping->labels) { + memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); + if (memcmp(label_uuid, uuid, NSLABEL_UUID_LEN) != 0) + continue; + slot = to_slot(ndd, nd_label); + nd_label_free_slot(ndd, slot); + dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); + del_label(nd_mapping, l); + num_freed++; + l--; /* retry with new label at this index */ + } + + if (num_freed > l) { + /* + * num_freed will only ever be > l when we delete the last + * label + */ + kfree(nd_mapping->labels); + nd_mapping->labels = NULL; + dev_dbg(ndd->dev, "%s: no more labels\n", __func__); + } + + return nd_label_write_index(ndd, ndd->ns_next, + nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0); +} + +int nd_pmem_namespace_label_update(struct nd_region *nd_region, + struct nd_namespace_pmem *nspm, resource_size_t size) +{ + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + int rc; + + if (size == 0) { + rc = del_labels(nd_mapping, nspm->uuid); + if (rc) + return rc; + continue; + } + + rc = init_labels(nd_mapping, 1); + if (rc < 0) + return rc; + + rc = __pmem_label_update(nd_region, nd_mapping, nspm, i); + if (rc) + return rc; + } + + return 0; +} + +int nd_blk_namespace_label_update(struct nd_region *nd_region, + struct nd_namespace_blk *nsblk, resource_size_t size) +{ + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct resource *res; + int count = 0; + + if (size == 0) + return del_labels(nd_mapping, nsblk->uuid); + + for_each_dpa_resource(to_ndd(nd_mapping), res) + count++; + + count = init_labels(nd_mapping, count); + if (count < 0) + return count; + + return __blk_label_update(nd_region, nd_mapping, nsblk, count); +} diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h new file mode 100644 index 000000000000..a59ef6eef2a3 --- /dev/null +++ b/drivers/nvdimm/label.h @@ -0,0 +1,141 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __LABEL_H__ +#define __LABEL_H__ + +#include <linux/ndctl.h> +#include <linux/sizes.h> +#include <linux/io.h> + +enum { + NSINDEX_SIG_LEN = 16, + NSINDEX_ALIGN = 256, + NSINDEX_SEQ_MASK = 0x3, + NSLABEL_UUID_LEN = 16, + NSLABEL_NAME_LEN = 64, + NSLABEL_FLAG_ROLABEL = 0x1, /* read-only label */ + NSLABEL_FLAG_LOCAL = 0x2, /* DIMM-local namespace */ + NSLABEL_FLAG_BTT = 0x4, /* namespace contains a BTT */ + NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */ + BTT_ALIGN = 4096, /* all btt structures */ + BTTINFO_SIG_LEN = 16, + BTTINFO_UUID_LEN = 16, + BTTINFO_FLAG_ERROR = 0x1, /* error state (read-only) */ + BTTINFO_MAJOR_VERSION = 1, + ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */ + ND_LABEL_ID_SIZE = 50, + ND_NSINDEX_INIT = 0x1, +}; + +static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; + +/** + * struct nd_namespace_index - label set superblock + * @sig: NAMESPACE_INDEX\0 + * @flags: placeholder + * @seq: sequence number for this index + * @myoff: offset of this index in label area + * @mysize: size of this index struct + * @otheroff: offset of other index + * @labeloff: offset of first label slot + * @nslot: total number of label slots + * @major: label area major version + * @minor: label area minor version + * @checksum: fletcher64 of all fields + * @free[0]: bitmap, nlabel bits + * + * The size of free[] is rounded up so the total struct size is a + * multiple of NSINDEX_ALIGN bytes. Any bits this allocates beyond + * nlabel bits must be zero. + */ +struct nd_namespace_index { + u8 sig[NSINDEX_SIG_LEN]; + __le32 flags; + __le32 seq; + __le64 myoff; + __le64 mysize; + __le64 otheroff; + __le64 labeloff; + __le32 nslot; + __le16 major; + __le16 minor; + __le64 checksum; + u8 free[0]; +}; + +/** + * struct nd_namespace_label - namespace superblock + * @uuid: UUID per RFC 4122 + * @name: optional name (NULL-terminated) + * @flags: see NSLABEL_FLAG_* + * @nlabel: num labels to describe this ns + * @position: labels position in set + * @isetcookie: interleave set cookie + * @lbasize: LBA size in bytes or 0 for pmem + * @dpa: DPA of NVM range on this DIMM + * @rawsize: size of namespace + * @slot: slot of this label in label area + * @unused: must be zero + */ +struct nd_namespace_label { + u8 uuid[NSLABEL_UUID_LEN]; + u8 name[NSLABEL_NAME_LEN]; + __le32 flags; + __le16 nlabel; + __le16 position; + __le64 isetcookie; + __le64 lbasize; + __le64 dpa; + __le64 rawsize; + __le32 slot; + __le32 unused; +}; + +/** + * struct nd_label_id - identifier string for dpa allocation + * @id: "{blk|pmem}-<namespace uuid>" + */ +struct nd_label_id { + char id[ND_LABEL_ID_SIZE]; +}; + +/* + * If the 'best' index is invalid, so is the 'next' index. Otherwise, + * the next index is MOD(index+1, 2) + */ +static inline int nd_label_next_nsindex(int index) +{ + if (index < 0) + return -1; + + return (index + 1) % 2; +} + +struct nvdimm_drvdata; +int nd_label_validate(struct nvdimm_drvdata *ndd); +void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, + struct nd_namespace_index *src); +size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd); +int nd_label_active_count(struct nvdimm_drvdata *ndd); +struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n); +u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd); +bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot); +u32 nd_label_nfree(struct nvdimm_drvdata *ndd); +struct nd_region; +struct nd_namespace_pmem; +struct nd_namespace_blk; +int nd_pmem_namespace_label_update(struct nd_region *nd_region, + struct nd_namespace_pmem *nspm, resource_size_t size); +int nd_blk_namespace_label_update(struct nd_region *nd_region, + struct nd_namespace_blk *nsblk, resource_size_t size); +#endif /* __LABEL_H__ */ diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c new file mode 100644 index 000000000000..fef0dd80d4ad --- /dev/null +++ b/drivers/nvdimm/namespace_devs.c @@ -0,0 +1,1870 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/module.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/nd.h> +#include "nd-core.h" +#include "nd.h" + +static void namespace_io_release(struct device *dev) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + kfree(nsio); +} + +static void namespace_pmem_release(struct device *dev) +{ + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + kfree(nspm->alt_name); + kfree(nspm->uuid); + kfree(nspm); +} + +static void namespace_blk_release(struct device *dev) +{ + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + struct nd_region *nd_region = to_nd_region(dev->parent); + + if (nsblk->id >= 0) + ida_simple_remove(&nd_region->ns_ida, nsblk->id); + kfree(nsblk->alt_name); + kfree(nsblk->uuid); + kfree(nsblk->res); + kfree(nsblk); +} + +static struct device_type namespace_io_device_type = { + .name = "nd_namespace_io", + .release = namespace_io_release, +}; + +static struct device_type namespace_pmem_device_type = { + .name = "nd_namespace_pmem", + .release = namespace_pmem_release, +}; + +static struct device_type namespace_blk_device_type = { + .name = "nd_namespace_blk", + .release = namespace_blk_release, +}; + +static bool is_namespace_pmem(struct device *dev) +{ + return dev ? dev->type == &namespace_pmem_device_type : false; +} + +static bool is_namespace_blk(struct device *dev) +{ + return dev ? dev->type == &namespace_blk_device_type : false; +} + +static bool is_namespace_io(struct device *dev) +{ + return dev ? dev->type == &namespace_io_device_type : false; +} + +const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, + char *name) +{ + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + const char *suffix = ""; + + if (ndns->claim && is_nd_btt(ndns->claim)) + suffix = "s"; + + if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) + sprintf(name, "pmem%d%s", nd_region->id, suffix); + else if (is_namespace_blk(&ndns->dev)) { + struct nd_namespace_blk *nsblk; + + nsblk = to_nd_namespace_blk(&ndns->dev); + sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix); + } else { + return NULL; + } + + return name; +} +EXPORT_SYMBOL(nvdimm_namespace_disk_name); + +static ssize_t nstype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + + return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region)); +} +static DEVICE_ATTR_RO(nstype); + +static ssize_t __alt_name_store(struct device *dev, const char *buf, + const size_t len) +{ + char *input, *pos, *alt_name, **ns_altname; + ssize_t rc; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + ns_altname = &nspm->alt_name; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + ns_altname = &nsblk->alt_name; + } else + return -ENXIO; + + if (dev->driver || to_ndns(dev)->claim) + return -EBUSY; + + input = kmemdup(buf, len + 1, GFP_KERNEL); + if (!input) + return -ENOMEM; + + input[len] = '\0'; + pos = strim(input); + if (strlen(pos) + 1 > NSLABEL_NAME_LEN) { + rc = -EINVAL; + goto out; + } + + alt_name = kzalloc(NSLABEL_NAME_LEN, GFP_KERNEL); + if (!alt_name) { + rc = -ENOMEM; + goto out; + } + kfree(*ns_altname); + *ns_altname = alt_name; + sprintf(*ns_altname, "%s", pos); + rc = len; + +out: + kfree(input); + return rc; +} + +static resource_size_t nd_namespace_blk_size(struct nd_namespace_blk *nsblk) +{ + struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent); + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_label_id label_id; + resource_size_t size = 0; + struct resource *res; + + if (!nsblk->uuid) + return 0; + nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL); + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id.id) == 0) + size += resource_size(res); + return size; +} + +static bool __nd_namespace_blk_validate(struct nd_namespace_blk *nsblk) +{ + struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent); + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_label_id label_id; + struct resource *res; + int count, i; + + if (!nsblk->uuid || !nsblk->lbasize || !ndd) + return false; + + count = 0; + nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL); + for_each_dpa_resource(ndd, res) { + if (strcmp(res->name, label_id.id) != 0) + continue; + /* + * Resources with unacknoweldged adjustments indicate a + * failure to update labels + */ + if (res->flags & DPA_RESOURCE_ADJUSTED) + return false; + count++; + } + + /* These values match after a successful label update */ + if (count != nsblk->num_resources) + return false; + + for (i = 0; i < nsblk->num_resources; i++) { + struct resource *found = NULL; + + for_each_dpa_resource(ndd, res) + if (res == nsblk->res[i]) { + found = res; + break; + } + /* stale resource */ + if (!found) + return false; + } + + return true; +} + +resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk) +{ + resource_size_t size; + + nvdimm_bus_lock(&nsblk->common.dev); + size = __nd_namespace_blk_validate(nsblk); + nvdimm_bus_unlock(&nsblk->common.dev); + + return size; +} +EXPORT_SYMBOL(nd_namespace_blk_validate); + + +static int nd_namespace_label_update(struct nd_region *nd_region, + struct device *dev) +{ + dev_WARN_ONCE(dev, dev->driver || to_ndns(dev)->claim, + "namespace must be idle during label update\n"); + if (dev->driver || to_ndns(dev)->claim) + return 0; + + /* + * Only allow label writes that will result in a valid namespace + * or deletion of an existing namespace. + */ + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + resource_size_t size = resource_size(&nspm->nsio.res); + + if (size == 0 && nspm->uuid) + /* delete allocation */; + else if (!nspm->uuid) + return 0; + + return nd_pmem_namespace_label_update(nd_region, nspm, size); + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + resource_size_t size = nd_namespace_blk_size(nsblk); + + if (size == 0 && nsblk->uuid) + /* delete allocation */; + else if (!nsblk->uuid || !nsblk->lbasize) + return 0; + + return nd_blk_namespace_label_update(nd_region, nsblk, size); + } else + return -ENXIO; +} + +static ssize_t alt_name_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + ssize_t rc; + + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + rc = __alt_name_store(dev, buf, len); + if (rc >= 0) + rc = nd_namespace_label_update(nd_region, dev); + dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc < 0 ? rc : len; +} + +static ssize_t alt_name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + char *ns_altname; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + ns_altname = nspm->alt_name; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + ns_altname = nsblk->alt_name; + } else + return -ENXIO; + + return sprintf(buf, "%s\n", ns_altname ? ns_altname : ""); +} +static DEVICE_ATTR_RW(alt_name); + +static int scan_free(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_label_id *label_id, + resource_size_t n) +{ + bool is_blk = strncmp(label_id->id, "blk", 3) == 0; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + int rc = 0; + + while (n) { + struct resource *res, *last; + resource_size_t new_start; + + last = NULL; + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id->id) == 0) + last = res; + res = last; + if (!res) + return 0; + + if (n >= resource_size(res)) { + n -= resource_size(res); + nd_dbg_dpa(nd_region, ndd, res, "delete %d\n", rc); + nvdimm_free_dpa(ndd, res); + /* retry with last resource deleted */ + continue; + } + + /* + * Keep BLK allocations relegated to high DPA as much as + * possible + */ + if (is_blk) + new_start = res->start + n; + else + new_start = res->start; + + rc = adjust_resource(res, new_start, resource_size(res) - n); + if (rc == 0) + res->flags |= DPA_RESOURCE_ADJUSTED; + nd_dbg_dpa(nd_region, ndd, res, "shrink %d\n", rc); + break; + } + + return rc; +} + +/** + * shrink_dpa_allocation - for each dimm in region free n bytes for label_id + * @nd_region: the set of dimms to reclaim @n bytes from + * @label_id: unique identifier for the namespace consuming this dpa range + * @n: number of bytes per-dimm to release + * + * Assumes resources are ordered. Starting from the end try to + * adjust_resource() the allocation to @n, but if @n is larger than the + * allocation delete it and find the 'new' last allocation in the label + * set. + */ +static int shrink_dpa_allocation(struct nd_region *nd_region, + struct nd_label_id *label_id, resource_size_t n) +{ + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + int rc; + + rc = scan_free(nd_region, nd_mapping, label_id, n); + if (rc) + return rc; + } + + return 0; +} + +static resource_size_t init_dpa_allocation(struct nd_label_id *label_id, + struct nd_region *nd_region, struct nd_mapping *nd_mapping, + resource_size_t n) +{ + bool is_blk = strncmp(label_id->id, "blk", 3) == 0; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + resource_size_t first_dpa; + struct resource *res; + int rc = 0; + + /* allocate blk from highest dpa first */ + if (is_blk) + first_dpa = nd_mapping->start + nd_mapping->size - n; + else + first_dpa = nd_mapping->start; + + /* first resource allocation for this label-id or dimm */ + res = nvdimm_allocate_dpa(ndd, label_id, first_dpa, n); + if (!res) + rc = -EBUSY; + + nd_dbg_dpa(nd_region, ndd, res, "init %d\n", rc); + return rc ? n : 0; +} + +static bool space_valid(bool is_pmem, bool is_reserve, + struct nd_label_id *label_id, struct resource *res) +{ + /* + * For BLK-space any space is valid, for PMEM-space, it must be + * contiguous with an existing allocation unless we are + * reserving pmem. + */ + if (is_reserve || !is_pmem) + return true; + if (!res || strcmp(res->name, label_id->id) == 0) + return true; + return false; +} + +enum alloc_loc { + ALLOC_ERR = 0, ALLOC_BEFORE, ALLOC_MID, ALLOC_AFTER, +}; + +static resource_size_t scan_allocate(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_label_id *label_id, + resource_size_t n) +{ + resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1; + bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0; + bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + const resource_size_t to_allocate = n; + struct resource *res; + int first; + + retry: + first = 0; + for_each_dpa_resource(ndd, res) { + resource_size_t allocate, available = 0, free_start, free_end; + struct resource *next = res->sibling, *new_res = NULL; + enum alloc_loc loc = ALLOC_ERR; + const char *action; + int rc = 0; + + /* ignore resources outside this nd_mapping */ + if (res->start > mapping_end) + continue; + if (res->end < nd_mapping->start) + continue; + + /* space at the beginning of the mapping */ + if (!first++ && res->start > nd_mapping->start) { + free_start = nd_mapping->start; + available = res->start - free_start; + if (space_valid(is_pmem, is_reserve, label_id, NULL)) + loc = ALLOC_BEFORE; + } + + /* space between allocations */ + if (!loc && next) { + free_start = res->start + resource_size(res); + free_end = min(mapping_end, next->start - 1); + if (space_valid(is_pmem, is_reserve, label_id, res) + && free_start < free_end) { + available = free_end + 1 - free_start; + loc = ALLOC_MID; + } + } + + /* space at the end of the mapping */ + if (!loc && !next) { + free_start = res->start + resource_size(res); + free_end = mapping_end; + if (space_valid(is_pmem, is_reserve, label_id, res) + && free_start < free_end) { + available = free_end + 1 - free_start; + loc = ALLOC_AFTER; + } + } + + if (!loc || !available) + continue; + allocate = min(available, n); + switch (loc) { + case ALLOC_BEFORE: + if (strcmp(res->name, label_id->id) == 0) { + /* adjust current resource up */ + if (is_pmem && !is_reserve) + return n; + rc = adjust_resource(res, res->start - allocate, + resource_size(res) + allocate); + action = "cur grow up"; + } else + action = "allocate"; + break; + case ALLOC_MID: + if (strcmp(next->name, label_id->id) == 0) { + /* adjust next resource up */ + if (is_pmem && !is_reserve) + return n; + rc = adjust_resource(next, next->start + - allocate, resource_size(next) + + allocate); + new_res = next; + action = "next grow up"; + } else if (strcmp(res->name, label_id->id) == 0) { + action = "grow down"; + } else + action = "allocate"; + break; + case ALLOC_AFTER: + if (strcmp(res->name, label_id->id) == 0) + action = "grow down"; + else + action = "allocate"; + break; + default: + return n; + } + + if (strcmp(action, "allocate") == 0) { + /* BLK allocate bottom up */ + if (!is_pmem) + free_start += available - allocate; + else if (!is_reserve && free_start != nd_mapping->start) + return n; + + new_res = nvdimm_allocate_dpa(ndd, label_id, + free_start, allocate); + if (!new_res) + rc = -EBUSY; + } else if (strcmp(action, "grow down") == 0) { + /* adjust current resource down */ + rc = adjust_resource(res, res->start, resource_size(res) + + allocate); + if (rc == 0) + res->flags |= DPA_RESOURCE_ADJUSTED; + } + + if (!new_res) + new_res = res; + + nd_dbg_dpa(nd_region, ndd, new_res, "%s(%d) %d\n", + action, loc, rc); + + if (rc) + return n; + + n -= allocate; + if (n) { + /* + * Retry scan with newly inserted resources. + * For example, if we did an ALLOC_BEFORE + * insertion there may also have been space + * available for an ALLOC_AFTER insertion, so we + * need to check this same resource again + */ + goto retry; + } else + return 0; + } + + /* + * If we allocated nothing in the BLK case it may be because we are in + * an initial "pmem-reserve pass". Only do an initial BLK allocation + * when none of the DPA space is reserved. + */ + if ((is_pmem || !ndd->dpa.child) && n == to_allocate) + return init_dpa_allocation(label_id, nd_region, nd_mapping, n); + return n; +} + +static int merge_dpa(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, struct nd_label_id *label_id) +{ + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res; + + if (strncmp("pmem", label_id->id, 4) == 0) + return 0; + retry: + for_each_dpa_resource(ndd, res) { + int rc; + struct resource *next = res->sibling; + resource_size_t end = res->start + resource_size(res); + + if (!next || strcmp(res->name, label_id->id) != 0 + || strcmp(next->name, label_id->id) != 0 + || end != next->start) + continue; + end += resource_size(next); + nvdimm_free_dpa(ndd, next); + rc = adjust_resource(res, res->start, end - res->start); + nd_dbg_dpa(nd_region, ndd, res, "merge %d\n", rc); + if (rc) + return rc; + res->flags |= DPA_RESOURCE_ADJUSTED; + goto retry; + } + + return 0; +} + +static int __reserve_free_pmem(struct device *dev, void *data) +{ + struct nvdimm *nvdimm = data; + struct nd_region *nd_region; + struct nd_label_id label_id; + int i; + + if (!is_nd_pmem(dev)) + return 0; + + nd_region = to_nd_region(dev); + if (nd_region->ndr_mappings == 0) + return 0; + + memset(&label_id, 0, sizeof(label_id)); + strcat(label_id.id, "pmem-reserve"); + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + resource_size_t n, rem = 0; + + if (nd_mapping->nvdimm != nvdimm) + continue; + + n = nd_pmem_available_dpa(nd_region, nd_mapping, &rem); + if (n == 0) + return 0; + rem = scan_allocate(nd_region, nd_mapping, &label_id, n); + dev_WARN_ONCE(&nd_region->dev, rem, + "pmem reserve underrun: %#llx of %#llx bytes\n", + (unsigned long long) n - rem, + (unsigned long long) n); + return rem ? -ENXIO : 0; + } + + return 0; +} + +static void release_free_pmem(struct nvdimm_bus *nvdimm_bus, + struct nd_mapping *nd_mapping) +{ + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res, *_res; + + for_each_dpa_resource_safe(ndd, res, _res) + if (strcmp(res->name, "pmem-reserve") == 0) + nvdimm_free_dpa(ndd, res); +} + +static int reserve_free_pmem(struct nvdimm_bus *nvdimm_bus, + struct nd_mapping *nd_mapping) +{ + struct nvdimm *nvdimm = nd_mapping->nvdimm; + int rc; + + rc = device_for_each_child(&nvdimm_bus->dev, nvdimm, + __reserve_free_pmem); + if (rc) + release_free_pmem(nvdimm_bus, nd_mapping); + return rc; +} + +/** + * grow_dpa_allocation - for each dimm allocate n bytes for @label_id + * @nd_region: the set of dimms to allocate @n more bytes from + * @label_id: unique identifier for the namespace consuming this dpa range + * @n: number of bytes per-dimm to add to the existing allocation + * + * Assumes resources are ordered. For BLK regions, first consume + * BLK-only available DPA free space, then consume PMEM-aliased DPA + * space starting at the highest DPA. For PMEM regions start + * allocations from the start of an interleave set and end at the first + * BLK allocation or the end of the interleave set, whichever comes + * first. + */ +static int grow_dpa_allocation(struct nd_region *nd_region, + struct nd_label_id *label_id, resource_size_t n) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0; + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + resource_size_t rem = n; + int rc, j; + + /* + * In the BLK case try once with all unallocated PMEM + * reserved, and once without + */ + for (j = is_pmem; j < 2; j++) { + bool blk_only = j == 0; + + if (blk_only) { + rc = reserve_free_pmem(nvdimm_bus, nd_mapping); + if (rc) + return rc; + } + rem = scan_allocate(nd_region, nd_mapping, + label_id, rem); + if (blk_only) + release_free_pmem(nvdimm_bus, nd_mapping); + + /* try again and allow encroachments into PMEM */ + if (rem == 0) + break; + } + + dev_WARN_ONCE(&nd_region->dev, rem, + "allocation underrun: %#llx of %#llx bytes\n", + (unsigned long long) n - rem, + (unsigned long long) n); + if (rem) + return -ENXIO; + + rc = merge_dpa(nd_region, nd_mapping, label_id); + if (rc) + return rc; + } + + return 0; +} + +static void nd_namespace_pmem_set_size(struct nd_region *nd_region, + struct nd_namespace_pmem *nspm, resource_size_t size) +{ + struct resource *res = &nspm->nsio.res; + + res->start = nd_region->ndr_start; + res->end = nd_region->ndr_start + size - 1; +} + +static ssize_t __size_store(struct device *dev, unsigned long long val) +{ + resource_size_t allocated = 0, available = 0; + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_mapping *nd_mapping; + struct nvdimm_drvdata *ndd; + struct nd_label_id label_id; + u32 flags = 0, remainder; + u8 *uuid = NULL; + int rc, i; + + if (dev->driver || to_ndns(dev)->claim) + return -EBUSY; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + uuid = nspm->uuid; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + uuid = nsblk->uuid; + flags = NSLABEL_FLAG_LOCAL; + } + + /* + * We need a uuid for the allocation-label and dimm(s) on which + * to store the label. + */ + if (!uuid || nd_region->ndr_mappings == 0) + return -ENXIO; + + div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, &remainder); + if (remainder) { + dev_dbg(dev, "%llu is not %dK aligned\n", val, + (SZ_4K * nd_region->ndr_mappings) / SZ_1K); + return -EINVAL; + } + + nd_label_gen_id(&label_id, uuid, flags); + for (i = 0; i < nd_region->ndr_mappings; i++) { + nd_mapping = &nd_region->mapping[i]; + ndd = to_ndd(nd_mapping); + + /* + * All dimms in an interleave set, or the base dimm for a blk + * region, need to be enabled for the size to be changed. + */ + if (!ndd) + return -ENXIO; + + allocated += nvdimm_allocated_dpa(ndd, &label_id); + } + available = nd_region_available_dpa(nd_region); + + if (val > available + allocated) + return -ENOSPC; + + if (val == allocated) + return 0; + + val = div_u64(val, nd_region->ndr_mappings); + allocated = div_u64(allocated, nd_region->ndr_mappings); + if (val < allocated) + rc = shrink_dpa_allocation(nd_region, &label_id, + allocated - val); + else + rc = grow_dpa_allocation(nd_region, &label_id, val - allocated); + + if (rc) + return rc; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + nd_namespace_pmem_set_size(nd_region, nspm, + val * nd_region->ndr_mappings); + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + /* + * Try to delete the namespace if we deleted all of its + * allocation, this is not the seed device for the + * region, and it is not actively claimed by a btt + * instance. + */ + if (val == 0 && nd_region->ns_seed != dev + && !nsblk->common.claim) + nd_device_unregister(dev, ND_ASYNC); + } + + return rc; +} + +static ssize_t size_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + unsigned long long val; + u8 **uuid = NULL; + int rc; + + rc = kstrtoull(buf, 0, &val); + if (rc) + return rc; + + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + rc = __size_store(dev, val); + if (rc >= 0) + rc = nd_namespace_label_update(nd_region, dev); + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + uuid = &nspm->uuid; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + uuid = &nsblk->uuid; + } + + if (rc == 0 && val == 0 && uuid) { + /* setting size zero == 'delete namespace' */ + kfree(*uuid); + *uuid = NULL; + } + + dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0 + ? "fail" : "success", rc); + + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc < 0 ? rc : len; +} + +resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns) +{ + struct device *dev = &ndns->dev; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + return resource_size(&nspm->nsio.res); + } else if (is_namespace_blk(dev)) { + return nd_namespace_blk_size(to_nd_namespace_blk(dev)); + } else if (is_namespace_io(dev)) { + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + return resource_size(&nsio->res); + } else + WARN_ONCE(1, "unknown namespace type\n"); + return 0; +} + +resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns) +{ + resource_size_t size; + + nvdimm_bus_lock(&ndns->dev); + size = __nvdimm_namespace_capacity(ndns); + nvdimm_bus_unlock(&ndns->dev); + + return size; +} +EXPORT_SYMBOL(nvdimm_namespace_capacity); + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%llu\n", (unsigned long long) + nvdimm_namespace_capacity(to_ndns(dev))); +} +static DEVICE_ATTR(size, S_IRUGO, size_show, size_store); + +static ssize_t uuid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u8 *uuid; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + uuid = nspm->uuid; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + uuid = nsblk->uuid; + } else + return -ENXIO; + + if (uuid) + return sprintf(buf, "%pUb\n", uuid); + return sprintf(buf, "\n"); +} + +/** + * namespace_update_uuid - check for a unique uuid and whether we're "renaming" + * @nd_region: parent region so we can updates all dimms in the set + * @dev: namespace type for generating label_id + * @new_uuid: incoming uuid + * @old_uuid: reference to the uuid storage location in the namespace object + */ +static int namespace_update_uuid(struct nd_region *nd_region, + struct device *dev, u8 *new_uuid, u8 **old_uuid) +{ + u32 flags = is_namespace_blk(dev) ? NSLABEL_FLAG_LOCAL : 0; + struct nd_label_id old_label_id; + struct nd_label_id new_label_id; + int i; + + if (!nd_is_uuid_unique(dev, new_uuid)) + return -EINVAL; + + if (*old_uuid == NULL) + goto out; + + /* + * If we've already written a label with this uuid, then it's + * too late to rename because we can't reliably update the uuid + * without losing the old namespace. Userspace must delete this + * namespace to abandon the old uuid. + */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + + /* + * This check by itself is sufficient because old_uuid + * would be NULL above if this uuid did not exist in the + * currently written set. + * + * FIXME: can we delete uuid with zero dpa allocated? + */ + if (nd_mapping->labels) + return -EBUSY; + } + + nd_label_gen_id(&old_label_id, *old_uuid, flags); + nd_label_gen_id(&new_label_id, new_uuid, flags); + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res; + + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, old_label_id.id) == 0) + sprintf((void *) res->name, "%s", + new_label_id.id); + } + kfree(*old_uuid); + out: + *old_uuid = new_uuid; + return 0; +} + +static ssize_t uuid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + u8 *uuid = NULL; + ssize_t rc = 0; + u8 **ns_uuid; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + ns_uuid = &nspm->uuid; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + ns_uuid = &nsblk->uuid; + } else + return -ENXIO; + + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + if (to_ndns(dev)->claim) + rc = -EBUSY; + if (rc >= 0) + rc = nd_uuid_store(dev, &uuid, buf, len); + if (rc >= 0) + rc = namespace_update_uuid(nd_region, dev, uuid, ns_uuid); + if (rc >= 0) + rc = nd_namespace_label_update(nd_region, dev); + else + kfree(uuid); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc < 0 ? rc : len; +} +static DEVICE_ATTR_RW(uuid); + +static ssize_t resource_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct resource *res; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + res = &nspm->nsio.res; + } else if (is_namespace_io(dev)) { + struct nd_namespace_io *nsio = to_nd_namespace_io(dev); + + res = &nsio->res; + } else + return -ENXIO; + + /* no address to convey if the namespace has no allocation */ + if (resource_size(res) == 0) + return -ENXIO; + return sprintf(buf, "%#llx\n", (unsigned long long) res->start); +} +static DEVICE_ATTR_RO(resource); + +static const unsigned long ns_lbasize_supported[] = { 512, 520, 528, + 4096, 4104, 4160, 4224, 0 }; + +static ssize_t sector_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + if (!is_namespace_blk(dev)) + return -ENXIO; + + return nd_sector_size_show(nsblk->lbasize, ns_lbasize_supported, buf); +} + +static ssize_t sector_size_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + struct nd_region *nd_region = to_nd_region(dev->parent); + ssize_t rc = 0; + + if (!is_namespace_blk(dev)) + return -ENXIO; + + device_lock(dev); + nvdimm_bus_lock(dev); + if (to_ndns(dev)->claim) + rc = -EBUSY; + if (rc >= 0) + rc = nd_sector_size_store(dev, buf, &nsblk->lbasize, + ns_lbasize_supported); + if (rc >= 0) + rc = nd_namespace_label_update(nd_region, dev); + dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__, + rc, rc < 0 ? "tried" : "wrote", buf, + buf[len - 1] == '\n' ? "" : "\n"); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(sector_size); + +static ssize_t dpa_extents_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_label_id label_id; + int count = 0, i; + u8 *uuid = NULL; + u32 flags = 0; + + nvdimm_bus_lock(dev); + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + uuid = nspm->uuid; + flags = 0; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + uuid = nsblk->uuid; + flags = NSLABEL_FLAG_LOCAL; + } + + if (!uuid) + goto out; + + nd_label_gen_id(&label_id, uuid, flags); + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct resource *res; + + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id.id) == 0) + count++; + } + out: + nvdimm_bus_unlock(dev); + + return sprintf(buf, "%d\n", count); +} +static DEVICE_ATTR_RO(dpa_extents); + +static ssize_t holder_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_namespace_common *ndns = to_ndns(dev); + ssize_t rc; + + device_lock(dev); + rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : ""); + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(holder); + +static ssize_t force_raw_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + bool force_raw; + int rc = strtobool(buf, &force_raw); + + if (rc) + return rc; + + to_ndns(dev)->force_raw = force_raw; + return len; +} + +static ssize_t force_raw_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", to_ndns(dev)->force_raw); +} +static DEVICE_ATTR_RW(force_raw); + +static struct attribute *nd_namespace_attributes[] = { + &dev_attr_nstype.attr, + &dev_attr_size.attr, + &dev_attr_uuid.attr, + &dev_attr_holder.attr, + &dev_attr_resource.attr, + &dev_attr_alt_name.attr, + &dev_attr_force_raw.attr, + &dev_attr_sector_size.attr, + &dev_attr_dpa_extents.attr, + NULL, +}; + +static umode_t namespace_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + + if (a == &dev_attr_resource.attr) { + if (is_namespace_blk(dev)) + return 0; + return a->mode; + } + + if (is_namespace_pmem(dev) || is_namespace_blk(dev)) { + if (a == &dev_attr_size.attr) + return S_IWUSR | S_IRUGO; + + if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr) + return 0; + + return a->mode; + } + + if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr + || a == &dev_attr_holder.attr + || a == &dev_attr_force_raw.attr) + return a->mode; + + return 0; +} + +static struct attribute_group nd_namespace_attribute_group = { + .attrs = nd_namespace_attributes, + .is_visible = namespace_visible, +}; + +static const struct attribute_group *nd_namespace_attribute_groups[] = { + &nd_device_attribute_group, + &nd_namespace_attribute_group, + &nd_numa_attribute_group, + NULL, +}; + +struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) +{ + struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; + struct nd_namespace_common *ndns; + resource_size_t size; + + if (nd_btt) { + ndns = nd_btt->ndns; + if (!ndns) + return ERR_PTR(-ENODEV); + + /* + * Flush any in-progess probes / removals in the driver + * for the raw personality of this namespace. + */ + device_lock(&ndns->dev); + device_unlock(&ndns->dev); + if (ndns->dev.driver) { + dev_dbg(&ndns->dev, "is active, can't bind %s\n", + dev_name(&nd_btt->dev)); + return ERR_PTR(-EBUSY); + } + if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev, + "host (%s) vs claim (%s) mismatch\n", + dev_name(&nd_btt->dev), + dev_name(ndns->claim))) + return ERR_PTR(-ENXIO); + } else { + ndns = to_ndns(dev); + if (ndns->claim) { + dev_dbg(dev, "claimed by %s, failing probe\n", + dev_name(ndns->claim)); + + return ERR_PTR(-ENXIO); + } + } + + size = nvdimm_namespace_capacity(ndns); + if (size < ND_MIN_NAMESPACE_SIZE) { + dev_dbg(&ndns->dev, "%pa, too small must be at least %#x\n", + &size, ND_MIN_NAMESPACE_SIZE); + return ERR_PTR(-ENODEV); + } + + if (is_namespace_pmem(&ndns->dev)) { + struct nd_namespace_pmem *nspm; + + nspm = to_nd_namespace_pmem(&ndns->dev); + if (!nspm->uuid) { + dev_dbg(&ndns->dev, "%s: uuid not set\n", __func__); + return ERR_PTR(-ENODEV); + } + } else if (is_namespace_blk(&ndns->dev)) { + struct nd_namespace_blk *nsblk; + + nsblk = to_nd_namespace_blk(&ndns->dev); + if (!nd_namespace_blk_validate(nsblk)) + return ERR_PTR(-ENODEV); + } + + return ndns; +} +EXPORT_SYMBOL(nvdimm_namespace_common_probe); + +static struct device **create_namespace_io(struct nd_region *nd_region) +{ + struct nd_namespace_io *nsio; + struct device *dev, **devs; + struct resource *res; + + nsio = kzalloc(sizeof(*nsio), GFP_KERNEL); + if (!nsio) + return NULL; + + devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL); + if (!devs) { + kfree(nsio); + return NULL; + } + + dev = &nsio->common.dev; + dev->type = &namespace_io_device_type; + dev->parent = &nd_region->dev; + res = &nsio->res; + res->name = dev_name(&nd_region->dev); + res->flags = IORESOURCE_MEM; + res->start = nd_region->ndr_start; + res->end = res->start + nd_region->ndr_size - 1; + + devs[0] = dev; + return devs; +} + +static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid, + u64 cookie, u16 pos) +{ + struct nd_namespace_label *found = NULL; + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nd_namespace_label *nd_label; + bool found_uuid = false; + int l; + + for_each_label(l, nd_label, nd_mapping->labels) { + u64 isetcookie = __le64_to_cpu(nd_label->isetcookie); + u16 position = __le16_to_cpu(nd_label->position); + u16 nlabel = __le16_to_cpu(nd_label->nlabel); + + if (isetcookie != cookie) + continue; + + if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0) + continue; + + if (found_uuid) { + dev_dbg(to_ndd(nd_mapping)->dev, + "%s duplicate entry for uuid\n", + __func__); + return false; + } + found_uuid = true; + if (nlabel != nd_region->ndr_mappings) + continue; + if (position != pos) + continue; + found = nd_label; + break; + } + if (found) + break; + } + return found != NULL; +} + +static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id) +{ + struct nd_namespace_label *select = NULL; + int i; + + if (!pmem_id) + return -ENODEV; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nd_namespace_label *nd_label; + u64 hw_start, hw_end, pmem_start, pmem_end; + int l; + + for_each_label(l, nd_label, nd_mapping->labels) + if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0) + break; + + if (!nd_label) { + WARN_ON(1); + return -EINVAL; + } + + select = nd_label; + /* + * Check that this label is compliant with the dpa + * range published in NFIT + */ + hw_start = nd_mapping->start; + hw_end = hw_start + nd_mapping->size; + pmem_start = __le64_to_cpu(select->dpa); + pmem_end = pmem_start + __le64_to_cpu(select->rawsize); + if (pmem_start == hw_start && pmem_end <= hw_end) + /* pass */; + else + return -EINVAL; + + nd_mapping->labels[0] = select; + nd_mapping->labels[1] = NULL; + } + return 0; +} + +/** + * find_pmem_label_set - validate interleave set labelling, retrieve label0 + * @nd_region: region with mappings to validate + */ +static int find_pmem_label_set(struct nd_region *nd_region, + struct nd_namespace_pmem *nspm) +{ + u64 cookie = nd_region_interleave_set_cookie(nd_region); + struct nd_namespace_label *nd_label; + u8 select_id[NSLABEL_UUID_LEN]; + resource_size_t size = 0; + u8 *pmem_id = NULL; + int rc = -ENODEV, l; + u16 i; + + if (cookie == 0) + return -ENXIO; + + /* + * Find a complete set of labels by uuid. By definition we can start + * with any mapping as the reference label + */ + for_each_label(l, nd_label, nd_region->mapping[0].labels) { + u64 isetcookie = __le64_to_cpu(nd_label->isetcookie); + + if (isetcookie != cookie) + continue; + + for (i = 0; nd_region->ndr_mappings; i++) + if (!has_uuid_at_pos(nd_region, nd_label->uuid, + cookie, i)) + break; + if (i < nd_region->ndr_mappings) { + /* + * Give up if we don't find an instance of a + * uuid at each position (from 0 to + * nd_region->ndr_mappings - 1), or if we find a + * dimm with two instances of the same uuid. + */ + rc = -EINVAL; + goto err; + } else if (pmem_id) { + /* + * If there is more than one valid uuid set, we + * need userspace to clean this up. + */ + rc = -EBUSY; + goto err; + } + memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN); + pmem_id = select_id; + } + + /* + * Fix up each mapping's 'labels' to have the validated pmem label for + * that position at labels[0], and NULL at labels[1]. In the process, + * check that the namespace aligns with interleave-set. We know + * that it does not overlap with any blk namespaces by virtue of + * the dimm being enabled (i.e. nd_label_reserve_dpa() + * succeeded). + */ + rc = select_pmem_id(nd_region, pmem_id); + if (rc) + goto err; + + /* Calculate total size and populate namespace properties from label0 */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nd_namespace_label *label0 = nd_mapping->labels[0]; + + size += __le64_to_cpu(label0->rawsize); + if (__le16_to_cpu(label0->position) != 0) + continue; + WARN_ON(nspm->alt_name || nspm->uuid); + nspm->alt_name = kmemdup((void __force *) label0->name, + NSLABEL_NAME_LEN, GFP_KERNEL); + nspm->uuid = kmemdup((void __force *) label0->uuid, + NSLABEL_UUID_LEN, GFP_KERNEL); + } + + if (!nspm->alt_name || !nspm->uuid) { + rc = -ENOMEM; + goto err; + } + + nd_namespace_pmem_set_size(nd_region, nspm, size); + + return 0; + err: + switch (rc) { + case -EINVAL: + dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__); + break; + case -ENODEV: + dev_dbg(&nd_region->dev, "%s: label not found\n", __func__); + break; + default: + dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n", + __func__, rc); + break; + } + return rc; +} + +static struct device **create_namespace_pmem(struct nd_region *nd_region) +{ + struct nd_namespace_pmem *nspm; + struct device *dev, **devs; + struct resource *res; + int rc; + + nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); + if (!nspm) + return NULL; + + dev = &nspm->nsio.common.dev; + dev->type = &namespace_pmem_device_type; + dev->parent = &nd_region->dev; + res = &nspm->nsio.res; + res->name = dev_name(&nd_region->dev); + res->flags = IORESOURCE_MEM; + rc = find_pmem_label_set(nd_region, nspm); + if (rc == -ENODEV) { + int i; + + /* Pass, try to permit namespace creation... */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + + kfree(nd_mapping->labels); + nd_mapping->labels = NULL; + } + + /* Publish a zero-sized namespace for userspace to configure. */ + nd_namespace_pmem_set_size(nd_region, nspm, 0); + + rc = 0; + } else if (rc) + goto err; + + devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL); + if (!devs) + goto err; + + devs[0] = dev; + return devs; + + err: + namespace_pmem_release(&nspm->nsio.common.dev); + return NULL; +} + +struct resource *nsblk_add_resource(struct nd_region *nd_region, + struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk, + resource_size_t start) +{ + struct nd_label_id label_id; + struct resource *res; + + nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL); + res = krealloc(nsblk->res, + sizeof(void *) * (nsblk->num_resources + 1), + GFP_KERNEL); + if (!res) + return NULL; + nsblk->res = (struct resource **) res; + for_each_dpa_resource(ndd, res) + if (strcmp(res->name, label_id.id) == 0 + && res->start == start) { + nsblk->res[nsblk->num_resources++] = res; + return res; + } + return NULL; +} + +static struct device *nd_namespace_blk_create(struct nd_region *nd_region) +{ + struct nd_namespace_blk *nsblk; + struct device *dev; + + if (!is_nd_blk(&nd_region->dev)) + return NULL; + + nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); + if (!nsblk) + return NULL; + + dev = &nsblk->common.dev; + dev->type = &namespace_blk_device_type; + nsblk->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL); + if (nsblk->id < 0) { + kfree(nsblk); + return NULL; + } + dev_set_name(dev, "namespace%d.%d", nd_region->id, nsblk->id); + dev->parent = &nd_region->dev; + dev->groups = nd_namespace_attribute_groups; + + return &nsblk->common.dev; +} + +void nd_region_create_blk_seed(struct nd_region *nd_region) +{ + WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); + nd_region->ns_seed = nd_namespace_blk_create(nd_region); + /* + * Seed creation failures are not fatal, provisioning is simply + * disabled until memory becomes available + */ + if (!nd_region->ns_seed) + dev_err(&nd_region->dev, "failed to create blk namespace\n"); + else + nd_device_register(nd_region->ns_seed); +} + +void nd_region_create_btt_seed(struct nd_region *nd_region) +{ + WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); + nd_region->btt_seed = nd_btt_create(nd_region); + /* + * Seed creation failures are not fatal, provisioning is simply + * disabled until memory becomes available + */ + if (!nd_region->btt_seed) + dev_err(&nd_region->dev, "failed to create btt namespace\n"); +} + +static struct device **create_namespace_blk(struct nd_region *nd_region) +{ + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nd_namespace_label *nd_label; + struct device *dev, **devs = NULL; + struct nd_namespace_blk *nsblk; + struct nvdimm_drvdata *ndd; + int i, l, count = 0; + struct resource *res; + + if (nd_region->ndr_mappings == 0) + return NULL; + + ndd = to_ndd(nd_mapping); + for_each_label(l, nd_label, nd_mapping->labels) { + u32 flags = __le32_to_cpu(nd_label->flags); + char *name[NSLABEL_NAME_LEN]; + struct device **__devs; + + if (flags & NSLABEL_FLAG_LOCAL) + /* pass */; + else + continue; + + for (i = 0; i < count; i++) { + nsblk = to_nd_namespace_blk(devs[i]); + if (memcmp(nsblk->uuid, nd_label->uuid, + NSLABEL_UUID_LEN) == 0) { + res = nsblk_add_resource(nd_region, ndd, nsblk, + __le64_to_cpu(nd_label->dpa)); + if (!res) + goto err; + nd_dbg_dpa(nd_region, ndd, res, "%s assign\n", + dev_name(&nsblk->common.dev)); + break; + } + } + if (i < count) + continue; + __devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL); + if (!__devs) + goto err; + memcpy(__devs, devs, sizeof(dev) * count); + kfree(devs); + devs = __devs; + + nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); + if (!nsblk) + goto err; + dev = &nsblk->common.dev; + dev->type = &namespace_blk_device_type; + dev->parent = &nd_region->dev; + dev_set_name(dev, "namespace%d.%d", nd_region->id, count); + devs[count++] = dev; + nsblk->id = -1; + nsblk->lbasize = __le64_to_cpu(nd_label->lbasize); + nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN, + GFP_KERNEL); + if (!nsblk->uuid) + goto err; + memcpy(name, nd_label->name, NSLABEL_NAME_LEN); + if (name[0]) + nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN, + GFP_KERNEL); + res = nsblk_add_resource(nd_region, ndd, nsblk, + __le64_to_cpu(nd_label->dpa)); + if (!res) + goto err; + nd_dbg_dpa(nd_region, ndd, res, "%s assign\n", + dev_name(&nsblk->common.dev)); + } + + dev_dbg(&nd_region->dev, "%s: discovered %d blk namespace%s\n", + __func__, count, count == 1 ? "" : "s"); + + if (count == 0) { + /* Publish a zero-sized namespace for userspace to configure. */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + + kfree(nd_mapping->labels); + nd_mapping->labels = NULL; + } + + devs = kcalloc(2, sizeof(dev), GFP_KERNEL); + if (!devs) + goto err; + nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); + if (!nsblk) + goto err; + dev = &nsblk->common.dev; + dev->type = &namespace_blk_device_type; + dev->parent = &nd_region->dev; + devs[count++] = dev; + } + + return devs; + +err: + for (i = 0; i < count; i++) { + nsblk = to_nd_namespace_blk(devs[i]); + namespace_blk_release(&nsblk->common.dev); + } + kfree(devs); + return NULL; +} + +static int init_active_labels(struct nd_region *nd_region) +{ + int i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nvdimm *nvdimm = nd_mapping->nvdimm; + int count, j; + + /* + * If the dimm is disabled then prevent the region from + * being activated if it aliases DPA. + */ + if (!ndd) { + if ((nvdimm->flags & NDD_ALIASING) == 0) + return 0; + dev_dbg(&nd_region->dev, "%s: is disabled, failing probe\n", + dev_name(&nd_mapping->nvdimm->dev)); + return -ENXIO; + } + nd_mapping->ndd = ndd; + atomic_inc(&nvdimm->busy); + get_ndd(ndd); + + count = nd_label_active_count(ndd); + dev_dbg(ndd->dev, "%s: %d\n", __func__, count); + if (!count) + continue; + nd_mapping->labels = kcalloc(count + 1, sizeof(void *), + GFP_KERNEL); + if (!nd_mapping->labels) + return -ENOMEM; + for (j = 0; j < count; j++) { + struct nd_namespace_label *label; + + label = nd_label_active(ndd, j); + nd_mapping->labels[j] = label; + } + } + + return 0; +} + +int nd_region_register_namespaces(struct nd_region *nd_region, int *err) +{ + struct device **devs = NULL; + int i, rc = 0, type; + + *err = 0; + nvdimm_bus_lock(&nd_region->dev); + rc = init_active_labels(nd_region); + if (rc) { + nvdimm_bus_unlock(&nd_region->dev); + return rc; + } + + type = nd_region_to_nstype(nd_region); + switch (type) { + case ND_DEVICE_NAMESPACE_IO: + devs = create_namespace_io(nd_region); + break; + case ND_DEVICE_NAMESPACE_PMEM: + devs = create_namespace_pmem(nd_region); + break; + case ND_DEVICE_NAMESPACE_BLK: + devs = create_namespace_blk(nd_region); + break; + default: + break; + } + nvdimm_bus_unlock(&nd_region->dev); + + if (!devs) + return -ENODEV; + + for (i = 0; devs[i]; i++) { + struct device *dev = devs[i]; + int id; + + if (type == ND_DEVICE_NAMESPACE_BLK) { + struct nd_namespace_blk *nsblk; + + nsblk = to_nd_namespace_blk(dev); + id = ida_simple_get(&nd_region->ns_ida, 0, 0, + GFP_KERNEL); + nsblk->id = id; + } else + id = i; + + if (id < 0) + break; + dev_set_name(dev, "namespace%d.%d", nd_region->id, id); + dev->groups = nd_namespace_attribute_groups; + nd_device_register(dev); + } + if (i) + nd_region->ns_seed = devs[0]; + + if (devs[i]) { + int j; + + for (j = i; devs[j]; j++) { + struct device *dev = devs[j]; + + device_initialize(dev); + put_device(dev); + } + *err = j - i; + /* + * All of the namespaces we tried to register failed, so + * fail region activation. + */ + if (*err == 0) + rc = -ENODEV; + } + kfree(devs); + + if (rc == -ENODEV) + return rc; + + return i; +} diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h new file mode 100644 index 000000000000..e1970c71ad1c --- /dev/null +++ b/drivers/nvdimm/nd-core.h @@ -0,0 +1,83 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __ND_CORE_H__ +#define __ND_CORE_H__ +#include <linux/libnvdimm.h> +#include <linux/device.h> +#include <linux/libnvdimm.h> +#include <linux/sizes.h> +#include <linux/mutex.h> +#include <linux/nd.h> + +extern struct list_head nvdimm_bus_list; +extern struct mutex nvdimm_bus_list_mutex; +extern int nvdimm_major; + +struct nvdimm_bus { + struct nvdimm_bus_descriptor *nd_desc; + wait_queue_head_t probe_wait; + struct module *module; + struct list_head list; + struct device dev; + int id, probe_active; + struct mutex reconfig_mutex; +}; + +struct nvdimm { + unsigned long flags; + void *provider_data; + unsigned long *dsm_mask; + struct device dev; + atomic_t busy; + int id; +}; + +bool is_nvdimm(struct device *dev); +bool is_nd_pmem(struct device *dev); +bool is_nd_blk(struct device *dev); +struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); +int __init nvdimm_bus_init(void); +void nvdimm_bus_exit(void); +void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev); +struct nd_region; +void nd_region_create_blk_seed(struct nd_region *nd_region); +void nd_region_create_btt_seed(struct nd_region *nd_region); +void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev); +int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); +void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); +void nd_synchronize(void); +int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus); +int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus); +int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus); +void __nd_device_register(struct device *dev); +int nd_match_dimm(struct device *dev, void *data); +struct nd_label_id; +char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags); +bool nd_is_uuid_unique(struct device *dev, u8 *uuid); +struct nd_region; +struct nvdimm_drvdata; +struct nd_mapping; +resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region, + struct nd_mapping *nd_mapping, resource_size_t *overlap); +resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping); +resource_size_t nd_region_available_dpa(struct nd_region *nd_region); +resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id); +struct nd_mapping; +struct resource *nsblk_add_resource(struct nd_region *nd_region, + struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk, + resource_size_t start); +int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd); +void get_ndd(struct nvdimm_drvdata *ndd); +resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns); +#endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h new file mode 100644 index 000000000000..c41f53e74277 --- /dev/null +++ b/drivers/nvdimm/nd.h @@ -0,0 +1,220 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __ND_H__ +#define __ND_H__ +#include <linux/libnvdimm.h> +#include <linux/blkdev.h> +#include <linux/device.h> +#include <linux/mutex.h> +#include <linux/ndctl.h> +#include <linux/types.h> +#include "label.h" + +enum { + /* + * Limits the maximum number of block apertures a dimm can + * support and is an input to the geometry/on-disk-format of a + * BTT instance + */ + ND_MAX_LANES = 256, + SECTOR_SHIFT = 9, + INT_LBASIZE_ALIGNMENT = 64, +}; + +struct nvdimm_drvdata { + struct device *dev; + int nsindex_size; + struct nd_cmd_get_config_size nsarea; + void *data; + int ns_current, ns_next; + struct resource dpa; + struct kref kref; +}; + +struct nd_region_namespaces { + int count; + int active; +}; + +static inline struct nd_namespace_index *to_namespace_index( + struct nvdimm_drvdata *ndd, int i) +{ + if (i < 0) + return NULL; + + return ndd->data + sizeof_namespace_index(ndd) * i; +} + +static inline struct nd_namespace_index *to_current_namespace_index( + struct nvdimm_drvdata *ndd) +{ + return to_namespace_index(ndd, ndd->ns_current); +} + +static inline struct nd_namespace_index *to_next_namespace_index( + struct nvdimm_drvdata *ndd) +{ + return to_namespace_index(ndd, ndd->ns_next); +} + +#define nd_dbg_dpa(r, d, res, fmt, arg...) \ + dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \ + (r) ? dev_name((d)->dev) : "", res ? res->name : "null", \ + (unsigned long long) (res ? resource_size(res) : 0), \ + (unsigned long long) (res ? res->start : 0), ##arg) + +#define for_each_label(l, label, labels) \ + for (l = 0; (label = labels ? labels[l] : NULL); l++) + +#define for_each_dpa_resource(ndd, res) \ + for (res = (ndd)->dpa.child; res; res = res->sibling) + +#define for_each_dpa_resource_safe(ndd, res, next) \ + for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \ + res; res = next, next = next ? next->sibling : NULL) + +struct nd_percpu_lane { + int count; + spinlock_t lock; +}; + +struct nd_region { + struct device dev; + struct ida ns_ida; + struct ida btt_ida; + struct device *ns_seed; + struct device *btt_seed; + u16 ndr_mappings; + u64 ndr_size; + u64 ndr_start; + int id, num_lanes, ro, numa_node; + void *provider_data; + struct nd_interleave_set *nd_set; + struct nd_percpu_lane __percpu *lane; + struct nd_mapping mapping[0]; +}; + +struct nd_blk_region { + int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); + void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev); + int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw); + void *blk_provider_data; + struct nd_region nd_region; +}; + +/* + * Lookup next in the repeating sequence of 01, 10, and 11. + */ +static inline unsigned nd_inc_seq(unsigned seq) +{ + static const unsigned next[] = { 0, 2, 3, 1 }; + + return next[seq & 3]; +} + +struct btt; +struct nd_btt { + struct device dev; + struct nd_namespace_common *ndns; + struct btt *btt; + unsigned long lbasize; + u8 *uuid; + int id; +}; + +enum nd_async_mode { + ND_SYNC, + ND_ASYNC, +}; + +int nd_integrity_init(struct gendisk *disk, unsigned long meta_size); +void wait_nvdimm_bus_probe_idle(struct device *dev); +void nd_device_register(struct device *dev); +void nd_device_unregister(struct device *dev, enum nd_async_mode mode); +int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, + size_t len); +ssize_t nd_sector_size_show(unsigned long current_lbasize, + const unsigned long *supported, char *buf); +ssize_t nd_sector_size_store(struct device *dev, const char *buf, + unsigned long *current_lbasize, const unsigned long *supported); +int __init nvdimm_init(void); +int __init nd_region_init(void); +void nvdimm_exit(void); +void nd_region_exit(void); +struct nvdimm; +struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping); +int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); +int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); +int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, + void *buf, size_t len); +struct nd_btt *to_nd_btt(struct device *dev); +struct btt_sb; +u64 nd_btt_sb_checksum(struct btt_sb *btt_sb); +#if IS_ENABLED(CONFIG_BTT) +int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata); +bool is_nd_btt(struct device *dev); +struct device *nd_btt_create(struct nd_region *nd_region); +#else +static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) +{ + return -ENODEV; +} + +static inline bool is_nd_btt(struct device *dev) +{ + return false; +} + +static inline struct device *nd_btt_create(struct nd_region *nd_region) +{ + return NULL; +} + +#endif +struct nd_region *to_nd_region(struct device *dev); +int nd_region_to_nstype(struct nd_region *nd_region); +int nd_region_register_namespaces(struct nd_region *nd_region, int *err); +u64 nd_region_interleave_set_cookie(struct nd_region *nd_region); +void nvdimm_bus_lock(struct device *dev); +void nvdimm_bus_unlock(struct device *dev); +bool is_nvdimm_bus_locked(struct device *dev); +int nvdimm_revalidate_disk(struct gendisk *disk); +void nvdimm_drvdata_release(struct kref *kref); +void put_ndd(struct nvdimm_drvdata *ndd); +int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd); +void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res); +struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd, + struct nd_label_id *label_id, resource_size_t start, + resource_size_t n); +resource_size_t nvdimm_namespace_capacity(struct nd_namespace_common *ndns); +struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev); +int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns); +int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns); +const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, + char *name); +int nd_blk_region_init(struct nd_region *nd_region); +void __nd_iostat_start(struct bio *bio, unsigned long *start); +static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + + if (!blk_queue_io_stat(disk->queue)) + return false; + + __nd_iostat_start(bio, start); + return true; +} +void nd_iostat_end(struct bio *bio, unsigned long start); +resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); +#endif /* __ND_H__ */ diff --git a/drivers/block/pmem.c b/drivers/nvdimm/pmem.c index 095dfaadcaa5..ade9eb917a4d 100644 --- a/drivers/block/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -1,7 +1,7 @@ /* * Persistent Memory Driver * - * Copyright (c) 2014, Intel Corporation. + * Copyright (c) 2014-2015, Intel Corporation. * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>. * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>. * @@ -23,8 +23,9 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/slab.h> - -#define PMEM_MINORS 16 +#include <linux/pmem.h> +#include <linux/nd.h> +#include "nd.h" struct pmem_device { struct request_queue *pmem_queue; @@ -32,12 +33,11 @@ struct pmem_device { /* One contiguous memory region per device */ phys_addr_t phys_addr; - void *virt_addr; + void __pmem *virt_addr; size_t size; }; static int pmem_major; -static atomic_t pmem_index; static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, int rw, @@ -45,13 +45,14 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, { void *mem = kmap_atomic(page); size_t pmem_off = sector << 9; + void __pmem *pmem_addr = pmem->virt_addr + pmem_off; if (rw == READ) { - memcpy(mem + off, pmem->virt_addr + pmem_off, len); + memcpy_from_pmem(mem + off, pmem_addr, len); flush_dcache_page(page); } else { flush_dcache_page(page); - memcpy(pmem->virt_addr + pmem_off, mem + off, len); + memcpy_to_pmem(pmem_addr, mem + off, len); } kunmap_atomic(mem); @@ -59,31 +60,24 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, static void pmem_make_request(struct request_queue *q, struct bio *bio) { - struct block_device *bdev = bio->bi_bdev; - struct pmem_device *pmem = bdev->bd_disk->private_data; - int rw; + bool do_acct; + unsigned long start; struct bio_vec bvec; - sector_t sector; struct bvec_iter iter; - int err = 0; - - if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) { - err = -EIO; - goto out; - } - - BUG_ON(bio->bi_rw & REQ_DISCARD); + struct block_device *bdev = bio->bi_bdev; + struct pmem_device *pmem = bdev->bd_disk->private_data; - rw = bio_data_dir(bio); - sector = bio->bi_iter.bi_sector; - bio_for_each_segment(bvec, bio, iter) { + do_acct = nd_iostat_start(bio, &start); + bio_for_each_segment(bvec, bio, iter) pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset, - rw, sector); - sector += bvec.bv_len >> 9; - } + bio_data_dir(bio), iter.bi_sector); + if (do_acct) + nd_iostat_end(bio, start); -out: - bio_endio(bio, err); + if (bio_data_dir(bio)) + wmb_pmem(); + + bio_endio(bio, 0); } static int pmem_rw_page(struct block_device *bdev, sector_t sector, @@ -106,7 +100,8 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector, if (!pmem) return -ENODEV; - *kaddr = pmem->virt_addr + offset; + /* FIXME convert DAX to comprehend that this mapping has a lifetime */ + *kaddr = (void __force *) pmem->virt_addr + offset; *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; return pmem->size - offset; @@ -116,124 +111,165 @@ static const struct block_device_operations pmem_fops = { .owner = THIS_MODULE, .rw_page = pmem_rw_page, .direct_access = pmem_direct_access, + .revalidate_disk = nvdimm_revalidate_disk, }; -static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res) +static struct pmem_device *pmem_alloc(struct device *dev, + struct resource *res, int id) { struct pmem_device *pmem; - struct gendisk *disk; - int idx, err; - err = -ENOMEM; pmem = kzalloc(sizeof(*pmem), GFP_KERNEL); if (!pmem) - goto out; + return ERR_PTR(-ENOMEM); pmem->phys_addr = res->start; pmem->size = resource_size(res); + if (!arch_has_pmem_api()) + dev_warn(dev, "unable to guarantee persistence of writes\n"); + + if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) { + dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", + &pmem->phys_addr, pmem->size); + kfree(pmem); + return ERR_PTR(-EBUSY); + } - err = -EINVAL; - if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) { - dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size); - goto out_free_dev; + pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size); + if (!pmem->virt_addr) { + release_mem_region(pmem->phys_addr, pmem->size); + kfree(pmem); + return ERR_PTR(-ENXIO); } - /* - * Map the memory as write-through, as we can't write back the contents - * of the CPU caches in case of a crash. - */ - err = -ENOMEM; - pmem->virt_addr = ioremap_wt(pmem->phys_addr, pmem->size); - if (!pmem->virt_addr) - goto out_release_region; + return pmem; +} + +static void pmem_detach_disk(struct pmem_device *pmem) +{ + del_gendisk(pmem->pmem_disk); + put_disk(pmem->pmem_disk); + blk_cleanup_queue(pmem->pmem_queue); +} + +static int pmem_attach_disk(struct nd_namespace_common *ndns, + struct pmem_device *pmem) +{ + struct gendisk *disk; pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL); if (!pmem->pmem_queue) - goto out_unmap; + return -ENOMEM; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); - blk_queue_max_hw_sectors(pmem->pmem_queue, 1024); + blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX); blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY); + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue); - disk = alloc_disk(PMEM_MINORS); - if (!disk) - goto out_free_queue; - - idx = atomic_inc_return(&pmem_index) - 1; + disk = alloc_disk(0); + if (!disk) { + blk_cleanup_queue(pmem->pmem_queue); + return -ENOMEM; + } disk->major = pmem_major; - disk->first_minor = PMEM_MINORS * idx; + disk->first_minor = 0; disk->fops = &pmem_fops; disk->private_data = pmem; disk->queue = pmem->pmem_queue; disk->flags = GENHD_FL_EXT_DEVT; - sprintf(disk->disk_name, "pmem%d", idx); - disk->driverfs_dev = dev; + nvdimm_namespace_disk_name(ndns, disk->disk_name); + disk->driverfs_dev = &ndns->dev; set_capacity(disk, pmem->size >> 9); pmem->pmem_disk = disk; add_disk(disk); + revalidate_disk(disk); - return pmem; + return 0; +} -out_free_queue: - blk_cleanup_queue(pmem->pmem_queue); -out_unmap: - iounmap(pmem->virt_addr); -out_release_region: - release_mem_region(pmem->phys_addr, pmem->size); -out_free_dev: - kfree(pmem); -out: - return ERR_PTR(err); +static int pmem_rw_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size, int rw) +{ + struct pmem_device *pmem = dev_get_drvdata(ndns->claim); + + if (unlikely(offset + size > pmem->size)) { + dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); + return -EFAULT; + } + + if (rw == READ) + memcpy_from_pmem(buf, pmem->virt_addr + offset, size); + else { + memcpy_to_pmem(pmem->virt_addr + offset, buf, size); + wmb_pmem(); + } + + return 0; } static void pmem_free(struct pmem_device *pmem) { - del_gendisk(pmem->pmem_disk); - put_disk(pmem->pmem_disk); - blk_cleanup_queue(pmem->pmem_queue); - iounmap(pmem->virt_addr); + memunmap_pmem(pmem->virt_addr); release_mem_region(pmem->phys_addr, pmem->size); kfree(pmem); } -static int pmem_probe(struct platform_device *pdev) +static int nd_pmem_probe(struct device *dev) { + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_common *ndns; + struct nd_namespace_io *nsio; struct pmem_device *pmem; - struct resource *res; - - if (WARN_ON(pdev->num_resources > 1)) - return -ENXIO; + int rc; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENXIO; + ndns = nvdimm_namespace_common_probe(dev); + if (IS_ERR(ndns)) + return PTR_ERR(ndns); - pmem = pmem_alloc(&pdev->dev, res); + nsio = to_nd_namespace_io(&ndns->dev); + pmem = pmem_alloc(dev, &nsio->res, nd_region->id); if (IS_ERR(pmem)) return PTR_ERR(pmem); - platform_set_drvdata(pdev, pmem); - - return 0; + dev_set_drvdata(dev, pmem); + ndns->rw_bytes = pmem_rw_bytes; + if (is_nd_btt(dev)) + rc = nvdimm_namespace_attach_btt(ndns); + else if (nd_btt_probe(ndns, pmem) == 0) { + /* we'll come back as btt-pmem */ + rc = -ENXIO; + } else + rc = pmem_attach_disk(ndns, pmem); + if (rc) + pmem_free(pmem); + return rc; } -static int pmem_remove(struct platform_device *pdev) +static int nd_pmem_remove(struct device *dev) { - struct pmem_device *pmem = platform_get_drvdata(pdev); + struct pmem_device *pmem = dev_get_drvdata(dev); + if (is_nd_btt(dev)) + nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); + else + pmem_detach_disk(pmem); pmem_free(pmem); + return 0; } -static struct platform_driver pmem_driver = { - .probe = pmem_probe, - .remove = pmem_remove, - .driver = { - .owner = THIS_MODULE, - .name = "pmem", +MODULE_ALIAS("pmem"); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); +static struct nd_device_driver nd_pmem_driver = { + .probe = nd_pmem_probe, + .remove = nd_pmem_remove, + .drv = { + .name = "nd_pmem", }, + .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM, }; static int __init pmem_init(void) @@ -244,16 +280,19 @@ static int __init pmem_init(void) if (pmem_major < 0) return pmem_major; - error = platform_driver_register(&pmem_driver); - if (error) + error = nd_driver_register(&nd_pmem_driver); + if (error) { unregister_blkdev(pmem_major, "pmem"); - return error; + return error; + } + + return 0; } module_init(pmem_init); static void pmem_exit(void) { - platform_driver_unregister(&pmem_driver); + driver_unregister(&nd_pmem_driver.drv); unregister_blkdev(pmem_major, "pmem"); } module_exit(pmem_exit); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c new file mode 100644 index 000000000000..f28f78ccff19 --- /dev/null +++ b/drivers/nvdimm/region.c @@ -0,0 +1,114 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/cpumask.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/nd.h> +#include "nd.h" + +static int nd_region_probe(struct device *dev) +{ + int err, rc; + static unsigned long once; + struct nd_region_namespaces *num_ns; + struct nd_region *nd_region = to_nd_region(dev); + + if (nd_region->num_lanes > num_online_cpus() + && nd_region->num_lanes < num_possible_cpus() + && !test_and_set_bit(0, &once)) { + dev_info(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n", + num_online_cpus(), nd_region->num_lanes, + num_possible_cpus()); + dev_info(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n", + nd_region->num_lanes); + } + + rc = nd_blk_region_init(nd_region); + if (rc) + return rc; + + rc = nd_region_register_namespaces(nd_region, &err); + num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL); + if (!num_ns) + return -ENOMEM; + + if (rc < 0) + return rc; + + num_ns->active = rc; + num_ns->count = rc + err; + dev_set_drvdata(dev, num_ns); + + if (rc && err && rc == err) + return -ENODEV; + + nd_region->btt_seed = nd_btt_create(nd_region); + if (err == 0) + return 0; + + /* + * Given multiple namespaces per region, we do not want to + * disable all the successfully registered peer namespaces upon + * a single registration failure. If userspace is missing a + * namespace that it expects it can disable/re-enable the region + * to retry discovery after correcting the failure. + * <regionX>/namespaces returns the current + * "<async-registered>/<total>" namespace count. + */ + dev_err(dev, "failed to register %d namespace%s, continuing...\n", + err, err == 1 ? "" : "s"); + return 0; +} + +static int child_unregister(struct device *dev, void *data) +{ + nd_device_unregister(dev, ND_SYNC); + return 0; +} + +static int nd_region_remove(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev); + + /* flush attribute readers and disable */ + nvdimm_bus_lock(dev); + nd_region->ns_seed = NULL; + nd_region->btt_seed = NULL; + dev_set_drvdata(dev, NULL); + nvdimm_bus_unlock(dev); + + device_for_each_child(dev, NULL, child_unregister); + return 0; +} + +static struct nd_device_driver nd_region_driver = { + .probe = nd_region_probe, + .remove = nd_region_remove, + .drv = { + .name = "nd_region", + }, + .type = ND_DRIVER_REGION_BLK | ND_DRIVER_REGION_PMEM, +}; + +int __init nd_region_init(void) +{ + return nd_driver_register(&nd_region_driver); +} + +void nd_region_exit(void) +{ + driver_unregister(&nd_region_driver.drv); +} + +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_PMEM); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_BLK); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c new file mode 100644 index 000000000000..a5233422f9dc --- /dev/null +++ b/drivers/nvdimm/region_devs.c @@ -0,0 +1,787 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/scatterlist.h> +#include <linux/highmem.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/sort.h> +#include <linux/io.h> +#include <linux/nd.h> +#include "nd-core.h" +#include "nd.h" + +static DEFINE_IDA(region_ida); + +static void nd_region_release(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev); + u16 i; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + put_device(&nvdimm->dev); + } + free_percpu(nd_region->lane); + ida_simple_remove(®ion_ida, nd_region->id); + if (is_nd_blk(dev)) + kfree(to_nd_blk_region(dev)); + else + kfree(nd_region); +} + +static struct device_type nd_blk_device_type = { + .name = "nd_blk", + .release = nd_region_release, +}; + +static struct device_type nd_pmem_device_type = { + .name = "nd_pmem", + .release = nd_region_release, +}; + +static struct device_type nd_volatile_device_type = { + .name = "nd_volatile", + .release = nd_region_release, +}; + +bool is_nd_pmem(struct device *dev) +{ + return dev ? dev->type == &nd_pmem_device_type : false; +} + +bool is_nd_blk(struct device *dev) +{ + return dev ? dev->type == &nd_blk_device_type : false; +} + +struct nd_region *to_nd_region(struct device *dev) +{ + struct nd_region *nd_region = container_of(dev, struct nd_region, dev); + + WARN_ON(dev->type->release != nd_region_release); + return nd_region; +} +EXPORT_SYMBOL_GPL(to_nd_region); + +struct nd_blk_region *to_nd_blk_region(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev); + + WARN_ON(!is_nd_blk(dev)); + return container_of(nd_region, struct nd_blk_region, nd_region); +} +EXPORT_SYMBOL_GPL(to_nd_blk_region); + +void *nd_region_provider_data(struct nd_region *nd_region) +{ + return nd_region->provider_data; +} +EXPORT_SYMBOL_GPL(nd_region_provider_data); + +void *nd_blk_region_provider_data(struct nd_blk_region *ndbr) +{ + return ndbr->blk_provider_data; +} +EXPORT_SYMBOL_GPL(nd_blk_region_provider_data); + +void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data) +{ + ndbr->blk_provider_data = data; +} +EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data); + +/** + * nd_region_to_nstype() - region to an integer namespace type + * @nd_region: region-device to interrogate + * + * This is the 'nstype' attribute of a region as well, an input to the + * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match + * namespace devices with namespace drivers. + */ +int nd_region_to_nstype(struct nd_region *nd_region) +{ + if (is_nd_pmem(&nd_region->dev)) { + u16 i, alias; + + for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + if (nvdimm->flags & NDD_ALIASING) + alias++; + } + if (alias) + return ND_DEVICE_NAMESPACE_PMEM; + else + return ND_DEVICE_NAMESPACE_IO; + } else if (is_nd_blk(&nd_region->dev)) { + return ND_DEVICE_NAMESPACE_BLK; + } + + return 0; +} +EXPORT_SYMBOL(nd_region_to_nstype); + +static int is_uuid_busy(struct device *dev, void *data) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + u8 *uuid = data; + + switch (nd_region_to_nstype(nd_region)) { + case ND_DEVICE_NAMESPACE_PMEM: { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + if (!nspm->uuid) + break; + if (memcmp(uuid, nspm->uuid, NSLABEL_UUID_LEN) == 0) + return -EBUSY; + break; + } + case ND_DEVICE_NAMESPACE_BLK: { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + if (!nsblk->uuid) + break; + if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) == 0) + return -EBUSY; + break; + } + default: + break; + } + + return 0; +} + +static int is_namespace_uuid_busy(struct device *dev, void *data) +{ + if (is_nd_pmem(dev) || is_nd_blk(dev)) + return device_for_each_child(dev, data, is_uuid_busy); + return 0; +} + +/** + * nd_is_uuid_unique - verify that no other namespace has @uuid + * @dev: any device on a nvdimm_bus + * @uuid: uuid to check + */ +bool nd_is_uuid_unique(struct device *dev, u8 *uuid) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!nvdimm_bus) + return false; + WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev)); + if (device_for_each_child(&nvdimm_bus->dev, uuid, + is_namespace_uuid_busy) != 0) + return false; + return true; +} + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + unsigned long long size = 0; + + if (is_nd_pmem(dev)) { + size = nd_region->ndr_size; + } else if (nd_region->ndr_mappings == 1) { + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + + size = nd_mapping->size; + } + + return sprintf(buf, "%llu\n", size); +} +static DEVICE_ATTR_RO(size); + +static ssize_t mappings_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + + return sprintf(buf, "%d\n", nd_region->ndr_mappings); +} +static DEVICE_ATTR_RO(mappings); + +static ssize_t nstype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + + return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region)); +} +static DEVICE_ATTR_RO(nstype); + +static ssize_t set_cookie_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + struct nd_interleave_set *nd_set = nd_region->nd_set; + + if (is_nd_pmem(dev) && nd_set) + /* pass, should be precluded by region_visible */; + else + return -ENXIO; + + return sprintf(buf, "%#llx\n", nd_set->cookie); +} +static DEVICE_ATTR_RO(set_cookie); + +resource_size_t nd_region_available_dpa(struct nd_region *nd_region) +{ + resource_size_t blk_max_overlap = 0, available, overlap; + int i; + + WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); + + retry: + available = 0; + overlap = blk_max_overlap; + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + + /* if a dimm is disabled the available capacity is zero */ + if (!ndd) + return 0; + + if (is_nd_pmem(&nd_region->dev)) { + available += nd_pmem_available_dpa(nd_region, + nd_mapping, &overlap); + if (overlap > blk_max_overlap) { + blk_max_overlap = overlap; + goto retry; + } + } else if (is_nd_blk(&nd_region->dev)) { + available += nd_blk_available_dpa(nd_mapping); + } + } + + return available; +} + +static ssize_t available_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + unsigned long long available = 0; + + /* + * Flush in-flight updates and grab a snapshot of the available + * size. Of course, this value is potentially invalidated the + * memory nvdimm_bus_lock() is dropped, but that's userspace's + * problem to not race itself. + */ + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + available = nd_region_available_dpa(nd_region); + nvdimm_bus_unlock(dev); + + return sprintf(buf, "%llu\n", available); +} +static DEVICE_ATTR_RO(available_size); + +static ssize_t init_namespaces_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region_namespaces *num_ns = dev_get_drvdata(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (num_ns) + rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count); + else + rc = -ENXIO; + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(init_namespaces); + +static ssize_t namespace_seed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (nd_region->ns_seed) + rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed)); + else + rc = sprintf(buf, "\n"); + nvdimm_bus_unlock(dev); + return rc; +} +static DEVICE_ATTR_RO(namespace_seed); + +static ssize_t btt_seed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (nd_region->btt_seed) + rc = sprintf(buf, "%s\n", dev_name(nd_region->btt_seed)); + else + rc = sprintf(buf, "\n"); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(btt_seed); + +static ssize_t read_only_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + + return sprintf(buf, "%d\n", nd_region->ro); +} + +static ssize_t read_only_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + bool ro; + int rc = strtobool(buf, &ro); + struct nd_region *nd_region = to_nd_region(dev); + + if (rc) + return rc; + + nd_region->ro = ro; + return len; +} +static DEVICE_ATTR_RW(read_only); + +static struct attribute *nd_region_attributes[] = { + &dev_attr_size.attr, + &dev_attr_nstype.attr, + &dev_attr_mappings.attr, + &dev_attr_btt_seed.attr, + &dev_attr_read_only.attr, + &dev_attr_set_cookie.attr, + &dev_attr_available_size.attr, + &dev_attr_namespace_seed.attr, + &dev_attr_init_namespaces.attr, + NULL, +}; + +static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, typeof(*dev), kobj); + struct nd_region *nd_region = to_nd_region(dev); + struct nd_interleave_set *nd_set = nd_region->nd_set; + int type = nd_region_to_nstype(nd_region); + + if (a != &dev_attr_set_cookie.attr + && a != &dev_attr_available_size.attr) + return a->mode; + + if ((type == ND_DEVICE_NAMESPACE_PMEM + || type == ND_DEVICE_NAMESPACE_BLK) + && a == &dev_attr_available_size.attr) + return a->mode; + else if (is_nd_pmem(dev) && nd_set) + return a->mode; + + return 0; +} + +struct attribute_group nd_region_attribute_group = { + .attrs = nd_region_attributes, + .is_visible = region_visible, +}; +EXPORT_SYMBOL_GPL(nd_region_attribute_group); + +u64 nd_region_interleave_set_cookie(struct nd_region *nd_region) +{ + struct nd_interleave_set *nd_set = nd_region->nd_set; + + if (nd_set) + return nd_set->cookie; + return 0; +} + +/* + * Upon successful probe/remove, take/release a reference on the + * associated interleave set (if present), and plant new btt + namespace + * seeds. Also, on the removal of a BLK region, notify the provider to + * disable the region. + */ +static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, + struct device *dev, bool probe) +{ + struct nd_region *nd_region; + + if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) { + int i; + + nd_region = to_nd_region(dev); + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = nd_mapping->ndd; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + kfree(nd_mapping->labels); + nd_mapping->labels = NULL; + put_ndd(ndd); + nd_mapping->ndd = NULL; + if (ndd) + atomic_dec(&nvdimm->busy); + } + + if (is_nd_pmem(dev)) + return; + + to_nd_blk_region(dev)->disable(nvdimm_bus, dev); + } + if (dev->parent && is_nd_blk(dev->parent) && probe) { + nd_region = to_nd_region(dev->parent); + nvdimm_bus_lock(dev); + if (nd_region->ns_seed == dev) + nd_region_create_blk_seed(nd_region); + nvdimm_bus_unlock(dev); + } + if (is_nd_btt(dev) && probe) { + nd_region = to_nd_region(dev->parent); + nvdimm_bus_lock(dev); + if (nd_region->btt_seed == dev) + nd_region_create_btt_seed(nd_region); + nvdimm_bus_unlock(dev); + } +} + +void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev) +{ + nd_region_notify_driver_action(nvdimm_bus, dev, true); +} + +void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev) +{ + nd_region_notify_driver_action(nvdimm_bus, dev, false); +} + +static ssize_t mappingN(struct device *dev, char *buf, int n) +{ + struct nd_region *nd_region = to_nd_region(dev); + struct nd_mapping *nd_mapping; + struct nvdimm *nvdimm; + + if (n >= nd_region->ndr_mappings) + return -ENXIO; + nd_mapping = &nd_region->mapping[n]; + nvdimm = nd_mapping->nvdimm; + + return sprintf(buf, "%s,%llu,%llu\n", dev_name(&nvdimm->dev), + nd_mapping->start, nd_mapping->size); +} + +#define REGION_MAPPING(idx) \ +static ssize_t mapping##idx##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + return mappingN(dev, buf, idx); \ +} \ +static DEVICE_ATTR_RO(mapping##idx) + +/* + * 32 should be enough for a while, even in the presence of socket + * interleave a 32-way interleave set is a degenerate case. + */ +REGION_MAPPING(0); +REGION_MAPPING(1); +REGION_MAPPING(2); +REGION_MAPPING(3); +REGION_MAPPING(4); +REGION_MAPPING(5); +REGION_MAPPING(6); +REGION_MAPPING(7); +REGION_MAPPING(8); +REGION_MAPPING(9); +REGION_MAPPING(10); +REGION_MAPPING(11); +REGION_MAPPING(12); +REGION_MAPPING(13); +REGION_MAPPING(14); +REGION_MAPPING(15); +REGION_MAPPING(16); +REGION_MAPPING(17); +REGION_MAPPING(18); +REGION_MAPPING(19); +REGION_MAPPING(20); +REGION_MAPPING(21); +REGION_MAPPING(22); +REGION_MAPPING(23); +REGION_MAPPING(24); +REGION_MAPPING(25); +REGION_MAPPING(26); +REGION_MAPPING(27); +REGION_MAPPING(28); +REGION_MAPPING(29); +REGION_MAPPING(30); +REGION_MAPPING(31); + +static umode_t mapping_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct nd_region *nd_region = to_nd_region(dev); + + if (n < nd_region->ndr_mappings) + return a->mode; + return 0; +} + +static struct attribute *mapping_attributes[] = { + &dev_attr_mapping0.attr, + &dev_attr_mapping1.attr, + &dev_attr_mapping2.attr, + &dev_attr_mapping3.attr, + &dev_attr_mapping4.attr, + &dev_attr_mapping5.attr, + &dev_attr_mapping6.attr, + &dev_attr_mapping7.attr, + &dev_attr_mapping8.attr, + &dev_attr_mapping9.attr, + &dev_attr_mapping10.attr, + &dev_attr_mapping11.attr, + &dev_attr_mapping12.attr, + &dev_attr_mapping13.attr, + &dev_attr_mapping14.attr, + &dev_attr_mapping15.attr, + &dev_attr_mapping16.attr, + &dev_attr_mapping17.attr, + &dev_attr_mapping18.attr, + &dev_attr_mapping19.attr, + &dev_attr_mapping20.attr, + &dev_attr_mapping21.attr, + &dev_attr_mapping22.attr, + &dev_attr_mapping23.attr, + &dev_attr_mapping24.attr, + &dev_attr_mapping25.attr, + &dev_attr_mapping26.attr, + &dev_attr_mapping27.attr, + &dev_attr_mapping28.attr, + &dev_attr_mapping29.attr, + &dev_attr_mapping30.attr, + &dev_attr_mapping31.attr, + NULL, +}; + +struct attribute_group nd_mapping_attribute_group = { + .is_visible = mapping_visible, + .attrs = mapping_attributes, +}; +EXPORT_SYMBOL_GPL(nd_mapping_attribute_group); + +int nd_blk_region_init(struct nd_region *nd_region) +{ + struct device *dev = &nd_region->dev; + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + + if (!is_nd_blk(dev)) + return 0; + + if (nd_region->ndr_mappings < 1) { + dev_err(dev, "invalid BLK region\n"); + return -ENXIO; + } + + return to_nd_blk_region(dev)->enable(nvdimm_bus, dev); +} + +/** + * nd_region_acquire_lane - allocate and lock a lane + * @nd_region: region id and number of lanes possible + * + * A lane correlates to a BLK-data-window and/or a log slot in the BTT. + * We optimize for the common case where there are 256 lanes, one + * per-cpu. For larger systems we need to lock to share lanes. For now + * this implementation assumes the cost of maintaining an allocator for + * free lanes is on the order of the lock hold time, so it implements a + * static lane = cpu % num_lanes mapping. + * + * In the case of a BTT instance on top of a BLK namespace a lane may be + * acquired recursively. We lock on the first instance. + * + * In the case of a BTT instance on top of PMEM, we only acquire a lane + * for the BTT metadata updates. + */ +unsigned int nd_region_acquire_lane(struct nd_region *nd_region) +{ + unsigned int cpu, lane; + + cpu = get_cpu(); + if (nd_region->num_lanes < nr_cpu_ids) { + struct nd_percpu_lane *ndl_lock, *ndl_count; + + lane = cpu % nd_region->num_lanes; + ndl_count = per_cpu_ptr(nd_region->lane, cpu); + ndl_lock = per_cpu_ptr(nd_region->lane, lane); + if (ndl_count->count++ == 0) + spin_lock(&ndl_lock->lock); + } else + lane = cpu; + + return lane; +} +EXPORT_SYMBOL(nd_region_acquire_lane); + +void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane) +{ + if (nd_region->num_lanes < nr_cpu_ids) { + unsigned int cpu = get_cpu(); + struct nd_percpu_lane *ndl_lock, *ndl_count; + + ndl_count = per_cpu_ptr(nd_region->lane, cpu); + ndl_lock = per_cpu_ptr(nd_region->lane, lane); + if (--ndl_count->count == 0) + spin_unlock(&ndl_lock->lock); + put_cpu(); + } + put_cpu(); +} +EXPORT_SYMBOL(nd_region_release_lane); + +static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc, struct device_type *dev_type, + const char *caller) +{ + struct nd_region *nd_region; + struct device *dev; + void *region_buf; + unsigned int i; + int ro = 0; + + for (i = 0; i < ndr_desc->num_mappings; i++) { + struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + if ((nd_mapping->start | nd_mapping->size) % SZ_4K) { + dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n", + caller, dev_name(&nvdimm->dev), i); + + return NULL; + } + + if (nvdimm->flags & NDD_UNARMED) + ro = 1; + } + + if (dev_type == &nd_blk_device_type) { + struct nd_blk_region_desc *ndbr_desc; + struct nd_blk_region *ndbr; + + ndbr_desc = to_blk_region_desc(ndr_desc); + ndbr = kzalloc(sizeof(*ndbr) + sizeof(struct nd_mapping) + * ndr_desc->num_mappings, + GFP_KERNEL); + if (ndbr) { + nd_region = &ndbr->nd_region; + ndbr->enable = ndbr_desc->enable; + ndbr->disable = ndbr_desc->disable; + ndbr->do_io = ndbr_desc->do_io; + } + region_buf = ndbr; + } else { + nd_region = kzalloc(sizeof(struct nd_region) + + sizeof(struct nd_mapping) + * ndr_desc->num_mappings, + GFP_KERNEL); + region_buf = nd_region; + } + + if (!region_buf) + return NULL; + nd_region->id = ida_simple_get(®ion_ida, 0, 0, GFP_KERNEL); + if (nd_region->id < 0) + goto err_id; + + nd_region->lane = alloc_percpu(struct nd_percpu_lane); + if (!nd_region->lane) + goto err_percpu; + + for (i = 0; i < nr_cpu_ids; i++) { + struct nd_percpu_lane *ndl; + + ndl = per_cpu_ptr(nd_region->lane, i); + spin_lock_init(&ndl->lock); + ndl->count = 0; + } + + memcpy(nd_region->mapping, ndr_desc->nd_mapping, + sizeof(struct nd_mapping) * ndr_desc->num_mappings); + for (i = 0; i < ndr_desc->num_mappings; i++) { + struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + get_device(&nvdimm->dev); + } + nd_region->ndr_mappings = ndr_desc->num_mappings; + nd_region->provider_data = ndr_desc->provider_data; + nd_region->nd_set = ndr_desc->nd_set; + nd_region->num_lanes = ndr_desc->num_lanes; + nd_region->ro = ro; + nd_region->numa_node = ndr_desc->numa_node; + ida_init(&nd_region->ns_ida); + ida_init(&nd_region->btt_ida); + dev = &nd_region->dev; + dev_set_name(dev, "region%d", nd_region->id); + dev->parent = &nvdimm_bus->dev; + dev->type = dev_type; + dev->groups = ndr_desc->attr_groups; + nd_region->ndr_size = resource_size(ndr_desc->res); + nd_region->ndr_start = ndr_desc->res->start; + nd_device_register(dev); + + return nd_region; + + err_percpu: + ida_simple_remove(®ion_ida, nd_region->id); + err_id: + kfree(region_buf); + return NULL; +} + +struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc) +{ + ndr_desc->num_lanes = ND_MAX_LANES; + return nd_region_create(nvdimm_bus, ndr_desc, &nd_pmem_device_type, + __func__); +} +EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create); + +struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc) +{ + if (ndr_desc->num_mappings > 1) + return NULL; + ndr_desc->num_lanes = min(ndr_desc->num_lanes, ND_MAX_LANES); + return nd_region_create(nvdimm_bus, ndr_desc, &nd_blk_device_type, + __func__); +} +EXPORT_SYMBOL_GPL(nvdimm_blk_region_create); + +struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, + struct nd_region_desc *ndr_desc) +{ + ndr_desc->num_lanes = ND_MAX_LANES; + return nd_region_create(nvdimm_bus, ndr_desc, &nd_volatile_device_type, + __func__); +} +EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e9851add6f4e..964ad572aaee 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1056,19 +1056,21 @@ struct vfio_devices { static int vfio_pci_get_devs(struct pci_dev *pdev, void *data) { struct vfio_devices *devs = data; - struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); - - if (pci_drv != &vfio_pci_driver) - return -EBUSY; + struct vfio_device *device; if (devs->cur_index == devs->max_index) return -ENOSPC; - devs->devices[devs->cur_index] = vfio_device_get_from_dev(&pdev->dev); - if (!devs->devices[devs->cur_index]) + device = vfio_device_get_from_dev(&pdev->dev); + if (!device) return -EINVAL; - devs->cur_index++; + if (pci_dev_driver(pdev) != &vfio_pci_driver) { + vfio_device_put(device); + return -EBUSY; + } + + devs->devices[devs->cur_index++] = device; return 0; } diff --git a/drivers/vfio/platform/Kconfig b/drivers/vfio/platform/Kconfig index 9a4403e2a36c..bb30128782aa 100644 --- a/drivers/vfio/platform/Kconfig +++ b/drivers/vfio/platform/Kconfig @@ -1,6 +1,6 @@ config VFIO_PLATFORM tristate "VFIO support for platform devices" - depends on VFIO && EVENTFD && ARM + depends on VFIO && EVENTFD && (ARM || ARM64) select VFIO_VIRQFD help Support for platform devices with VFIO. This is required to make @@ -18,3 +18,5 @@ config VFIO_AMBA framework. If you don't know what to do here, say N. + +source "drivers/vfio/platform/reset/Kconfig" diff --git a/drivers/vfio/platform/Makefile b/drivers/vfio/platform/Makefile index 81de144c0eaa..9ce8afe28450 100644 --- a/drivers/vfio/platform/Makefile +++ b/drivers/vfio/platform/Makefile @@ -2,7 +2,9 @@ vfio-platform-y := vfio_platform.o vfio_platform_common.o vfio_platform_irq.o obj-$(CONFIG_VFIO_PLATFORM) += vfio-platform.o +obj-$(CONFIG_VFIO_PLATFORM) += reset/ vfio-amba-y := vfio_amba.o obj-$(CONFIG_VFIO_AMBA) += vfio-amba.o +obj-$(CONFIG_VFIO_AMBA) += reset/ diff --git a/drivers/vfio/platform/reset/Kconfig b/drivers/vfio/platform/reset/Kconfig new file mode 100644 index 000000000000..746b96b0003b --- /dev/null +++ b/drivers/vfio/platform/reset/Kconfig @@ -0,0 +1,7 @@ +config VFIO_PLATFORM_CALXEDAXGMAC_RESET + tristate "VFIO support for calxeda xgmac reset" + depends on VFIO_PLATFORM + help + Enables the VFIO platform driver to handle reset for Calxeda xgmac + + If you don't know what to do here, say N. diff --git a/drivers/vfio/platform/reset/Makefile b/drivers/vfio/platform/reset/Makefile new file mode 100644 index 000000000000..2a486af9f8fa --- /dev/null +++ b/drivers/vfio/platform/reset/Makefile @@ -0,0 +1,5 @@ +vfio-platform-calxedaxgmac-y := vfio_platform_calxedaxgmac.o + +ccflags-y += -Idrivers/vfio/platform + +obj-$(CONFIG_VFIO_PLATFORM_CALXEDAXGMAC_RESET) += vfio-platform-calxedaxgmac.o diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c new file mode 100644 index 000000000000..619dc7d22082 --- /dev/null +++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c @@ -0,0 +1,86 @@ +/* + * VFIO platform driver specialized for Calxeda xgmac reset + * reset code is inherited from calxeda xgmac native driver + * + * Copyright 2010-2011 Calxeda, Inc. + * Copyright (c) 2015 Linaro Ltd. + * www.linaro.org + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> + +#include "vfio_platform_private.h" + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Eric Auger <eric.auger@linaro.org>" +#define DRIVER_DESC "Reset support for Calxeda xgmac vfio platform device" + +#define CALXEDAXGMAC_COMPAT "calxeda,hb-xgmac" + +/* XGMAC Register definitions */ +#define XGMAC_CONTROL 0x00000000 /* MAC Configuration */ + +/* DMA Control and Status Registers */ +#define XGMAC_DMA_CONTROL 0x00000f18 /* Ctrl (Operational Mode) */ +#define XGMAC_DMA_INTR_ENA 0x00000f1c /* Interrupt Enable */ + +/* DMA Control registe defines */ +#define DMA_CONTROL_ST 0x00002000 /* Start/Stop Transmission */ +#define DMA_CONTROL_SR 0x00000002 /* Start/Stop Receive */ + +/* Common MAC defines */ +#define MAC_ENABLE_TX 0x00000008 /* Transmitter Enable */ +#define MAC_ENABLE_RX 0x00000004 /* Receiver Enable */ + +static inline void xgmac_mac_disable(void __iomem *ioaddr) +{ + u32 value = readl(ioaddr + XGMAC_DMA_CONTROL); + + value &= ~(DMA_CONTROL_ST | DMA_CONTROL_SR); + writel(value, ioaddr + XGMAC_DMA_CONTROL); + + value = readl(ioaddr + XGMAC_CONTROL); + value &= ~(MAC_ENABLE_TX | MAC_ENABLE_RX); + writel(value, ioaddr + XGMAC_CONTROL); +} + +int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev) +{ + struct vfio_platform_region reg = vdev->regions[0]; + + if (!reg.ioaddr) { + reg.ioaddr = + ioremap_nocache(reg.addr, reg.size); + if (!reg.ioaddr) + return -ENOMEM; + } + + /* disable IRQ */ + writel(0, reg.ioaddr + XGMAC_DMA_INTR_ENA); + + /* Disable the MAC core */ + xgmac_mac_disable(reg.ioaddr); + + return 0; +} +EXPORT_SYMBOL_GPL(vfio_platform_calxedaxgmac_reset); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index abcff7a1aa66..e43efb5e92bf 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -25,6 +25,44 @@ static DEFINE_MUTEX(driver_lock); +static const struct vfio_platform_reset_combo reset_lookup_table[] = { + { + .compat = "calxeda,hb-xgmac", + .reset_function_name = "vfio_platform_calxedaxgmac_reset", + .module_name = "vfio-platform-calxedaxgmac", + }, +}; + +static void vfio_platform_get_reset(struct vfio_platform_device *vdev, + struct device *dev) +{ + const char *compat; + int (*reset)(struct vfio_platform_device *); + int ret, i; + + ret = device_property_read_string(dev, "compatible", &compat); + if (ret) + return; + + for (i = 0 ; i < ARRAY_SIZE(reset_lookup_table); i++) { + if (!strcmp(reset_lookup_table[i].compat, compat)) { + request_module(reset_lookup_table[i].module_name); + reset = __symbol_get( + reset_lookup_table[i].reset_function_name); + if (reset) { + vdev->reset = reset; + return; + } + } + } +} + +static void vfio_platform_put_reset(struct vfio_platform_device *vdev) +{ + if (vdev->reset) + symbol_put_addr(vdev->reset); +} + static int vfio_platform_regions_init(struct vfio_platform_device *vdev) { int cnt = 0, i; @@ -100,6 +138,8 @@ static void vfio_platform_release(void *device_data) mutex_lock(&driver_lock); if (!(--vdev->refcnt)) { + if (vdev->reset) + vdev->reset(vdev); vfio_platform_regions_cleanup(vdev); vfio_platform_irq_cleanup(vdev); } @@ -127,6 +167,9 @@ static int vfio_platform_open(void *device_data) ret = vfio_platform_irq_init(vdev); if (ret) goto err_irq; + + if (vdev->reset) + vdev->reset(vdev); } vdev->refcnt++; @@ -159,6 +202,8 @@ static long vfio_platform_ioctl(void *device_data, if (info.argsz < minsz) return -EINVAL; + if (vdev->reset) + vdev->flags |= VFIO_DEVICE_FLAGS_RESET; info.flags = vdev->flags; info.num_regions = vdev->num_regions; info.num_irqs = vdev->num_irqs; @@ -252,8 +297,12 @@ static long vfio_platform_ioctl(void *device_data, return ret; - } else if (cmd == VFIO_DEVICE_RESET) - return -EINVAL; + } else if (cmd == VFIO_DEVICE_RESET) { + if (vdev->reset) + return vdev->reset(vdev); + else + return -EINVAL; + } return -ENOTTY; } @@ -502,6 +551,8 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, return ret; } + vfio_platform_get_reset(vdev, dev); + mutex_init(&vdev->igate); return 0; @@ -513,8 +564,11 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev) struct vfio_platform_device *vdev; vdev = vfio_del_group_dev(dev); - if (vdev) + + if (vdev) { + vfio_platform_put_reset(vdev); iommu_group_put(dev->iommu_group); + } return vdev; } diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h index 5d31e0473406..1c9b3d59543c 100644 --- a/drivers/vfio/platform/vfio_platform_private.h +++ b/drivers/vfio/platform/vfio_platform_private.h @@ -67,6 +67,13 @@ struct vfio_platform_device { struct resource* (*get_resource)(struct vfio_platform_device *vdev, int i); int (*get_irq)(struct vfio_platform_device *vdev, int i); + int (*reset)(struct vfio_platform_device *vdev); +}; + +struct vfio_platform_reset_combo { + const char *compat; + const char *reset_function_name; + const char *module_name; }; extern int vfio_platform_probe_common(struct vfio_platform_device *vdev, diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index e1278fe04b1e..2fb29dfeffbd 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -661,18 +661,29 @@ int vfio_add_group_dev(struct device *dev, EXPORT_SYMBOL_GPL(vfio_add_group_dev); /** - * Get a reference to the vfio_device for a device that is known to - * be bound to a vfio driver. The driver implicitly holds a - * vfio_device reference between vfio_add_group_dev and - * vfio_del_group_dev. We can therefore use drvdata to increment - * that reference from the struct device. This additional - * reference must be released by calling vfio_device_put. + * Get a reference to the vfio_device for a device. Even if the + * caller thinks they own the device, they could be racing with a + * release call path, so we can't trust drvdata for the shortcut. + * Go the long way around, from the iommu_group to the vfio_group + * to the vfio_device. */ struct vfio_device *vfio_device_get_from_dev(struct device *dev) { - struct vfio_device *device = dev_get_drvdata(dev); + struct iommu_group *iommu_group; + struct vfio_group *group; + struct vfio_device *device; + + iommu_group = iommu_group_get(dev); + if (!iommu_group) + return NULL; - vfio_device_get(device); + group = vfio_group_get_from_iommu(iommu_group); + iommu_group_put(iommu_group); + if (!group) + return NULL; + + device = vfio_group_get_device(group, dev); + vfio_group_put(group); return device; } |