diff options
50 files changed, 1217 insertions, 788 deletions
diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.txt b/Documentation/devicetree/bindings/pmem/pmem-region.txt new file mode 100644 index 000000000000..5cfa4f016a00 --- /dev/null +++ b/Documentation/devicetree/bindings/pmem/pmem-region.txt @@ -0,0 +1,65 @@ +Device-tree bindings for persistent memory regions +----------------------------------------------------- + +Persistent memory refers to a class of memory devices that are: + + a) Usable as main system memory (i.e. cacheable), and + b) Retain their contents across power failure. + +Given b) it is best to think of persistent memory as a kind of memory mapped +storage device. To ensure data integrity the operating system needs to manage +persistent regions separately to the normal memory pool. To aid with that this +binding provides a standardised interface for discovering where persistent +memory regions exist inside the physical address space. + +Bindings for the region nodes: +----------------------------- + +Required properties: + - compatible = "pmem-region" + + - reg = <base, size>; + The reg property should specificy an address range that is + translatable to a system physical address range. This address + range should be mappable as normal system memory would be + (i.e cacheable). + + If the reg property contains multiple address ranges + each address range will be treated as though it was specified + in a separate device node. Having multiple address ranges in a + node implies no special relationship between the two ranges. + +Optional properties: + - Any relevant NUMA assocativity properties for the target platform. + + - volatile; This property indicates that this region is actually + backed by non-persistent memory. This lets the OS know that it + may skip the cache flushes required to ensure data is made + persistent after a write. + + If this property is absent then the OS must assume that the region + is backed by non-volatile memory. + +Examples: +-------------------- + + /* + * This node specifies one 4KB region spanning from + * 0x5000 to 0x5fff that is backed by non-volatile memory. + */ + pmem@5000 { + compatible = "pmem-region"; + reg = <0x00005000 0x00001000>; + }; + + /* + * This node specifies two 4KB regions that are backed by + * volatile (normal) memory. + */ + pmem@6000 { + compatible = "pmem-region"; + reg = < 0x00006000 0x00001000 + 0x00008000 0x00001000 >; + volatile; + }; + diff --git a/MAINTAINERS b/MAINTAINERS index dd7ce9171ac0..00855ffc8de9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8048,6 +8048,14 @@ Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported F: drivers/nvdimm/pmem* +LIBNVDIMM: DEVICETREE BINDINGS +M: Oliver O'Halloran <oohall@gmail.com> +L: linux-nvdimm@lists.01.org +Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ +S: Supported +F: drivers/nvdimm/of_pmem.c +F: Documentation/devicetree/bindings/pmem/pmem-region.txt + LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM M: Dan Williams <dan.j.williams@intel.com> L: linux-nvdimm@lists.01.org diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 516e23de5a3d..48fbb41af5d1 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -824,6 +824,9 @@ static int __init opal_init(void) /* Create i2c platform devices */ opal_pdev_init("ibm,opal-i2c"); + /* Handle non-volatile memory devices */ + opal_pdev_init("pmem-region"); + /* Setup a heatbeat thread if requested by OPAL */ opal_init_heartbeat(); diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 22a112b4f4d8..e2235ed3e4be 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -36,16 +36,6 @@ static bool force_enable_dimms; module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); -static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT; -module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds"); - -/* after three payloads of overflow, it's dead jim */ -static unsigned int scrub_overflow_abort = 3; -module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(scrub_overflow_abort, - "Number of times we overflow ARS results before abort"); - static bool disable_vendor_specific; module_param(disable_vendor_specific, bool, S_IRUGO); MODULE_PARM_DESC(disable_vendor_specific, @@ -60,6 +50,10 @@ module_param(default_dsm_family, int, S_IRUGO); MODULE_PARM_DESC(default_dsm_family, "Try this DSM type first when identifying NVDIMM family"); +static bool no_init_ars; +module_param(no_init_ars, bool, 0644); +MODULE_PARM_DESC(no_init_ars, "Skip ARS run at nfit init time"); + LIST_HEAD(acpi_descs); DEFINE_MUTEX(acpi_desc_lock); @@ -197,7 +191,7 @@ static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd * In the _LSI, _LSR, _LSW case the locked status is * communicated via the read/write commands */ - if (nfit_mem->has_lsi) + if (nfit_mem->has_lsr) break; if (status >> 16 & ND_CONFIG_LOCKED) @@ -477,14 +471,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, in_buf.buffer.length = call_pkg->nd_size_in; } - dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n", - __func__, dimm_name, cmd, func, in_buf.buffer.length); + dev_dbg(dev, "%s cmd: %d: func: %d input length: %d\n", + dimm_name, cmd, func, in_buf.buffer.length); print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4, in_buf.buffer.pointer, min_t(u32, 256, in_buf.buffer.length), true); /* call the BIOS, prefer the named methods over _DSM if available */ - if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsi) + if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsr) out_obj = acpi_label_info(handle); else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) { struct nd_cmd_get_config_data_hdr *p = buf; @@ -507,8 +501,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, } if (!out_obj) { - dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name, - cmd_name); + dev_dbg(dev, "%s _DSM failed cmd: %s\n", dimm_name, cmd_name); return -EINVAL; } @@ -529,13 +522,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, } if (out_obj->package.type != ACPI_TYPE_BUFFER) { - dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n", - __func__, dimm_name, cmd_name, out_obj->type); + dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n", + dimm_name, cmd_name, out_obj->type); rc = -EINVAL; goto out; } - dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, dimm_name, + dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name, cmd_name, out_obj->buffer.length); print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4, out_obj->buffer.pointer, @@ -547,14 +540,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, out_obj->buffer.length - offset); if (offset + out_size > out_obj->buffer.length) { - dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n", - __func__, dimm_name, cmd_name, i); + dev_dbg(dev, "%s output object underflow cmd: %s field: %d\n", + dimm_name, cmd_name, i); break; } if (in_buf.buffer.length + offset + out_size > buf_len) { - dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n", - __func__, dimm_name, cmd_name, i); + dev_dbg(dev, "%s output overrun cmd: %s field: %d\n", + dimm_name, cmd_name, i); rc = -ENXIO; goto out; } @@ -656,7 +649,7 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc, INIT_LIST_HEAD(&nfit_spa->list); memcpy(nfit_spa->spa, spa, sizeof(*spa)); list_add_tail(&nfit_spa->list, &acpi_desc->spas); - dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__, + dev_dbg(dev, "spa index: %d type: %s\n", spa->range_index, spa_type_name(nfit_spa_type(spa))); return true; @@ -685,8 +678,8 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc, INIT_LIST_HEAD(&nfit_memdev->list); memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev)); list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs); - dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d flags: %#x\n", - __func__, memdev->device_handle, memdev->range_index, + dev_dbg(dev, "memdev handle: %#x spa: %d dcr: %d flags: %#x\n", + memdev->device_handle, memdev->range_index, memdev->region_index, memdev->flags); return true; } @@ -754,7 +747,7 @@ static bool add_dcr(struct acpi_nfit_desc *acpi_desc, INIT_LIST_HEAD(&nfit_dcr->list); memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr)); list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs); - dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__, + dev_dbg(dev, "dcr index: %d windows: %d\n", dcr->region_index, dcr->windows); return true; } @@ -781,7 +774,7 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc, INIT_LIST_HEAD(&nfit_bdw->list); memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw)); list_add_tail(&nfit_bdw->list, &acpi_desc->bdws); - dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__, + dev_dbg(dev, "bdw dcr: %d windows: %d\n", bdw->region_index, bdw->windows); return true; } @@ -820,7 +813,7 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc, INIT_LIST_HEAD(&nfit_idt->list); memcpy(nfit_idt->idt, idt, sizeof_idt(idt)); list_add_tail(&nfit_idt->list, &acpi_desc->idts); - dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__, + dev_dbg(dev, "idt index: %d num_lines: %d\n", idt->interleave_index, idt->line_count); return true; } @@ -860,7 +853,7 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc, INIT_LIST_HEAD(&nfit_flush->list); memcpy(nfit_flush->flush, flush, sizeof_flush(flush)); list_add_tail(&nfit_flush->list, &acpi_desc->flushes); - dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__, + dev_dbg(dev, "nfit_flush handle: %d hint_count: %d\n", flush->device_handle, flush->hint_count); return true; } @@ -873,7 +866,7 @@ static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc, mask = (1 << (pcap->highest_capability + 1)) - 1; acpi_desc->platform_cap = pcap->capabilities & mask; - dev_dbg(dev, "%s: cap: %#x\n", __func__, acpi_desc->platform_cap); + dev_dbg(dev, "cap: %#x\n", acpi_desc->platform_cap); return true; } @@ -920,7 +913,7 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc, return err; break; case ACPI_NFIT_TYPE_SMBIOS: - dev_dbg(dev, "%s: smbios\n", __func__); + dev_dbg(dev, "smbios\n"); break; case ACPI_NFIT_TYPE_CAPABILITIES: if (!add_platform_cap(acpi_desc, table)) @@ -1277,8 +1270,11 @@ static ssize_t scrub_show(struct device *dev, if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + mutex_lock(&acpi_desc->init_mutex); rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, - (work_busy(&acpi_desc->work)) ? "+\n" : "\n"); + work_busy(&acpi_desc->dwork.work) + && !acpi_desc->cancel ? "+\n" : "\n"); + mutex_unlock(&acpi_desc->init_mutex); } device_unlock(dev); return rc; @@ -1648,7 +1644,7 @@ void __acpi_nvdimm_notify(struct device *dev, u32 event) struct nfit_mem *nfit_mem; struct acpi_nfit_desc *acpi_desc; - dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__, + dev_dbg(dev->parent, "%s: event: %d\n", dev_name(dev), event); if (event != NFIT_NOTIFY_DIMM_HEALTH) { @@ -1681,12 +1677,23 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data) device_unlock(dev->parent); } +static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method) +{ + acpi_handle handle; + acpi_status status; + + status = acpi_get_handle(adev->handle, method, &handle); + + if (ACPI_SUCCESS(status)) + return true; + return false; +} + static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, struct nfit_mem *nfit_mem, u32 device_handle) { struct acpi_device *adev, *adev_dimm; struct device *dev = acpi_desc->dev; - union acpi_object *obj; unsigned long dsm_mask; const guid_t *guid; int i; @@ -1759,25 +1766,15 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, 1ULL << i)) set_bit(i, &nfit_mem->dsm_mask); - obj = acpi_label_info(adev_dimm->handle); - if (obj) { - ACPI_FREE(obj); - nfit_mem->has_lsi = 1; - dev_dbg(dev, "%s: has _LSI\n", dev_name(&adev_dimm->dev)); - } - - obj = acpi_label_read(adev_dimm->handle, 0, 0); - if (obj) { - ACPI_FREE(obj); - nfit_mem->has_lsr = 1; + if (acpi_nvdimm_has_method(adev_dimm, "_LSI") + && acpi_nvdimm_has_method(adev_dimm, "_LSR")) { dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev)); + nfit_mem->has_lsr = true; } - obj = acpi_label_write(adev_dimm->handle, 0, 0, NULL); - if (obj) { - ACPI_FREE(obj); - nfit_mem->has_lsw = 1; + if (nfit_mem->has_lsr && acpi_nvdimm_has_method(adev_dimm, "_LSW")) { dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev)); + nfit_mem->has_lsw = true; } return 0; @@ -1866,10 +1863,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK; } - if (nfit_mem->has_lsi) + if (nfit_mem->has_lsr) { set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); - if (nfit_mem->has_lsr) set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); + } if (nfit_mem->has_lsw) set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); @@ -2365,7 +2362,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, nvdimm = nd_blk_region_to_dimm(ndbr); nfit_mem = nvdimm_provider_data(nvdimm); if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { - dev_dbg(dev, "%s: missing%s%s%s\n", __func__, + dev_dbg(dev, "missing%s%s%s\n", nfit_mem ? "" : " nfit_mem", (nfit_mem && nfit_mem->dcr) ? "" : " dcr", (nfit_mem && nfit_mem->bdw) ? "" : " bdw"); @@ -2384,7 +2381,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address, nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr)); if (!mmio->addr.base) { - dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, + dev_dbg(dev, "%s failed to map bdw\n", nvdimm_name(nvdimm)); return -ENOMEM; } @@ -2395,8 +2392,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw, nfit_mem->memdev_bdw->interleave_ways); if (rc) { - dev_dbg(dev, "%s: %s failed to init bdw interleave\n", - __func__, nvdimm_name(nvdimm)); + dev_dbg(dev, "%s failed to init bdw interleave\n", + nvdimm_name(nvdimm)); return rc; } @@ -2407,7 +2404,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address, nfit_mem->spa_dcr->length); if (!mmio->addr.base) { - dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, + dev_dbg(dev, "%s failed to map dcr\n", nvdimm_name(nvdimm)); return -ENOMEM; } @@ -2418,15 +2415,15 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr, nfit_mem->memdev_dcr->interleave_ways); if (rc) { - dev_dbg(dev, "%s: %s failed to init dcr interleave\n", - __func__, nvdimm_name(nvdimm)); + dev_dbg(dev, "%s failed to init dcr interleave\n", + nvdimm_name(nvdimm)); return rc; } rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk); if (rc < 0) { - dev_dbg(dev, "%s: %s failed get DIMM flags\n", - __func__, nvdimm_name(nvdimm)); + dev_dbg(dev, "%s failed get DIMM flags\n", + nvdimm_name(nvdimm)); return rc; } @@ -2476,7 +2473,8 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa memset(&ars_start, 0, sizeof(ars_start)); ars_start.address = spa->address; ars_start.length = spa->length; - ars_start.flags = acpi_desc->ars_start_flags; + if (test_bit(ARS_SHORT, &nfit_spa->ars_state)) + ars_start.flags = ND_ARS_RETURN_PREV_DATA; if (nfit_spa_type(spa) == NFIT_SPA_PM) ars_start.type = ND_ARS_PERSISTENT; else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) @@ -2518,16 +2516,62 @@ static int ars_get_status(struct acpi_nfit_desc *acpi_desc) int rc, cmd_rc; rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status, - acpi_desc->ars_status_size, &cmd_rc); + acpi_desc->max_ars, &cmd_rc); if (rc < 0) return rc; return cmd_rc; } -static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc, - struct nd_cmd_ars_status *ars_status) +static void ars_complete(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) +{ + struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; + struct acpi_nfit_system_address *spa = nfit_spa->spa; + struct nd_region *nd_region = nfit_spa->nd_region; + struct device *dev; + + if ((ars_status->address >= spa->address && ars_status->address + < spa->address + spa->length) + || (ars_status->address < spa->address)) { + /* + * Assume that if a scrub starts at an offset from the + * start of nfit_spa that we are in the continuation + * case. + * + * Otherwise, if the scrub covers the spa range, mark + * any pending request complete. + */ + if (ars_status->address + ars_status->length + >= spa->address + spa->length) + /* complete */; + else + return; + } else + return; + + if (test_bit(ARS_DONE, &nfit_spa->ars_state)) + return; + + if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state)) + return; + + if (nd_region) { + dev = nd_region_dev(nd_region); + nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON); + } else + dev = acpi_desc->dev; + + dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index, + test_bit(ARS_SHORT, &nfit_spa->ars_state) + ? "short" : "long"); + clear_bit(ARS_SHORT, &nfit_spa->ars_state); + set_bit(ARS_DONE, &nfit_spa->ars_state); +} + +static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc) { struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus; + struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; int rc; u32 i; @@ -2606,7 +2650,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, struct acpi_nfit_system_address *spa = nfit_spa->spa; struct nd_blk_region_desc *ndbr_desc; struct nfit_mem *nfit_mem; - int blk_valid = 0, rc; + int rc; if (!nvdimm) { dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n", @@ -2626,15 +2670,14 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, if (!nfit_mem || !nfit_mem->bdw) { dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n", spa->range_index, nvdimm_name(nvdimm)); - } else { - mapping->size = nfit_mem->bdw->capacity; - mapping->start = nfit_mem->bdw->start_address; - ndr_desc->num_lanes = nfit_mem->bdw->windows; - blk_valid = 1; + break; } + mapping->size = nfit_mem->bdw->capacity; + mapping->start = nfit_mem->bdw->start_address; + ndr_desc->num_lanes = nfit_mem->bdw->windows; ndr_desc->mapping = mapping; - ndr_desc->num_mappings = blk_valid; + ndr_desc->num_mappings = 1; ndbr_desc = to_blk_region_desc(ndr_desc); ndbr_desc->enable = acpi_nfit_blk_region_enable; ndbr_desc->do_io = acpi_desc->blk_do_io; @@ -2682,8 +2725,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, return 0; if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) { - dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n", - __func__); + dev_dbg(acpi_desc->dev, "detected invalid spa index\n"); return 0; } @@ -2769,301 +2811,243 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, return rc; } -static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc, - u32 max_ars) +static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc) { struct device *dev = acpi_desc->dev; struct nd_cmd_ars_status *ars_status; - if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) { - memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size); + if (acpi_desc->ars_status) { + memset(acpi_desc->ars_status, 0, acpi_desc->max_ars); return 0; } - if (acpi_desc->ars_status) - devm_kfree(dev, acpi_desc->ars_status); - acpi_desc->ars_status = NULL; - ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL); + ars_status = devm_kzalloc(dev, acpi_desc->max_ars, GFP_KERNEL); if (!ars_status) return -ENOMEM; acpi_desc->ars_status = ars_status; - acpi_desc->ars_status_size = max_ars; return 0; } -static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc, - struct nfit_spa *nfit_spa) +static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc) { - struct acpi_nfit_system_address *spa = nfit_spa->spa; int rc; - if (!nfit_spa->max_ars) { - struct nd_cmd_ars_cap ars_cap; - - memset(&ars_cap, 0, sizeof(ars_cap)); - rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa); - if (rc < 0) - return rc; - nfit_spa->max_ars = ars_cap.max_ars_out; - nfit_spa->clear_err_unit = ars_cap.clear_err_unit; - /* check that the supported scrub types match the spa type */ - if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE && - ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0) - return -ENOTTY; - else if (nfit_spa_type(spa) == NFIT_SPA_PM && - ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0) - return -ENOTTY; - } - - if (ars_status_alloc(acpi_desc, nfit_spa->max_ars)) + if (ars_status_alloc(acpi_desc)) return -ENOMEM; rc = ars_get_status(acpi_desc); + if (rc < 0 && rc != -ENOSPC) return rc; - if (ars_status_process_records(acpi_desc, acpi_desc->ars_status)) + if (ars_status_process_records(acpi_desc)) return -ENOMEM; return 0; } -static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc, - struct nfit_spa *nfit_spa) +static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa, + int *query_rc) { - struct acpi_nfit_system_address *spa = nfit_spa->spa; - unsigned int overflow_retry = scrub_overflow_abort; - u64 init_ars_start = 0, init_ars_len = 0; - struct device *dev = acpi_desc->dev; - unsigned int tmo = scrub_timeout; - int rc; + int rc = *query_rc; - if (!nfit_spa->ars_required || !nfit_spa->nd_region) - return; + if (no_init_ars) + return acpi_nfit_register_region(acpi_desc, nfit_spa); - rc = ars_start(acpi_desc, nfit_spa); - /* - * If we timed out the initial scan we'll still be busy here, - * and will wait another timeout before giving up permanently. - */ - if (rc < 0 && rc != -EBUSY) - return; - - do { - u64 ars_start, ars_len; - - if (acpi_desc->cancel) - break; - rc = acpi_nfit_query_poison(acpi_desc, nfit_spa); - if (rc == -ENOTTY) - break; - if (rc == -EBUSY && !tmo) { - dev_warn(dev, "range %d ars timeout, aborting\n", - spa->range_index); - break; - } + set_bit(ARS_REQ, &nfit_spa->ars_state); + set_bit(ARS_SHORT, &nfit_spa->ars_state); + switch (rc) { + case 0: + case -EAGAIN: + rc = ars_start(acpi_desc, nfit_spa); if (rc == -EBUSY) { - /* - * Note, entries may be appended to the list - * while the lock is dropped, but the workqueue - * being active prevents entries being deleted / - * freed. - */ - mutex_unlock(&acpi_desc->init_mutex); - ssleep(1); - tmo--; - mutex_lock(&acpi_desc->init_mutex); - continue; - } - - /* we got some results, but there are more pending... */ - if (rc == -ENOSPC && overflow_retry--) { - if (!init_ars_len) { - init_ars_len = acpi_desc->ars_status->length; - init_ars_start = acpi_desc->ars_status->address; - } - rc = ars_continue(acpi_desc); - } - - if (rc < 0) { - dev_warn(dev, "range %d ars continuation failed\n", - spa->range_index); + *query_rc = rc; break; - } - - if (init_ars_len) { - ars_start = init_ars_start; - ars_len = init_ars_len; + } else if (rc == 0) { + rc = acpi_nfit_query_poison(acpi_desc); } else { - ars_start = acpi_desc->ars_status->address; - ars_len = acpi_desc->ars_status->length; + set_bit(ARS_FAILED, &nfit_spa->ars_state); + break; } - dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n", - spa->range_index, ars_start, ars_len); - /* notify the region about new poison entries */ - nvdimm_region_notify(nfit_spa->nd_region, - NVDIMM_REVALIDATE_POISON); + if (rc == -EAGAIN) + clear_bit(ARS_SHORT, &nfit_spa->ars_state); + else if (rc == 0) + ars_complete(acpi_desc, nfit_spa); break; - } while (1); + case -EBUSY: + case -ENOSPC: + break; + default: + set_bit(ARS_FAILED, &nfit_spa->ars_state); + break; + } + + if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state)) + set_bit(ARS_REQ, &nfit_spa->ars_state); + + return acpi_nfit_register_region(acpi_desc, nfit_spa); } -static void acpi_nfit_scrub(struct work_struct *work) +static void ars_complete_all(struct acpi_nfit_desc *acpi_desc) { - struct device *dev; - u64 init_scrub_length = 0; struct nfit_spa *nfit_spa; - u64 init_scrub_address = 0; - bool init_ars_done = false; - struct acpi_nfit_desc *acpi_desc; - unsigned int tmo = scrub_timeout; - unsigned int overflow_retry = scrub_overflow_abort; - - acpi_desc = container_of(work, typeof(*acpi_desc), work); - dev = acpi_desc->dev; - - /* - * We scrub in 2 phases. The first phase waits for any platform - * firmware initiated scrubs to complete and then we go search for the - * affected spa regions to mark them scanned. In the second phase we - * initiate a directed scrub for every range that was not scrubbed in - * phase 1. If we're called for a 'rescan', we harmlessly pass through - * the first phase, but really only care about running phase 2, where - * regions can be notified of new poison. - */ - /* process platform firmware initiated scrubs */ - retry: - mutex_lock(&acpi_desc->init_mutex); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - struct nd_cmd_ars_status *ars_status; - struct acpi_nfit_system_address *spa; - u64 ars_start, ars_len; - int rc; - - if (acpi_desc->cancel) - break; - - if (nfit_spa->nd_region) + if (test_bit(ARS_FAILED, &nfit_spa->ars_state)) continue; + ars_complete(acpi_desc, nfit_spa); + } +} - if (init_ars_done) { - /* - * No need to re-query, we're now just - * reconciling all the ranges covered by the - * initial scrub - */ - rc = 0; - } else - rc = acpi_nfit_query_poison(acpi_desc, nfit_spa); - - if (rc == -ENOTTY) { - /* no ars capability, just register spa and move on */ - acpi_nfit_register_region(acpi_desc, nfit_spa); - continue; - } - - if (rc == -EBUSY && !tmo) { - /* fallthrough to directed scrub in phase 2 */ - dev_warn(dev, "timeout awaiting ars results, continuing...\n"); - break; - } else if (rc == -EBUSY) { - mutex_unlock(&acpi_desc->init_mutex); - ssleep(1); - tmo--; - goto retry; - } - - /* we got some results, but there are more pending... */ - if (rc == -ENOSPC && overflow_retry--) { - ars_status = acpi_desc->ars_status; - /* - * Record the original scrub range, so that we - * can recall all the ranges impacted by the - * initial scrub. - */ - if (!init_scrub_length) { - init_scrub_length = ars_status->length; - init_scrub_address = ars_status->address; - } - rc = ars_continue(acpi_desc); - if (rc == 0) { - mutex_unlock(&acpi_desc->init_mutex); - goto retry; - } - } +static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc, + int query_rc) +{ + unsigned int tmo = acpi_desc->scrub_tmo; + struct device *dev = acpi_desc->dev; + struct nfit_spa *nfit_spa; - if (rc < 0) { - /* - * Initial scrub failed, we'll give it one more - * try below... - */ - break; - } + if (acpi_desc->cancel) + return 0; - /* We got some final results, record completed ranges */ - ars_status = acpi_desc->ars_status; - if (init_scrub_length) { - ars_start = init_scrub_address; - ars_len = ars_start + init_scrub_length; - } else { - ars_start = ars_status->address; - ars_len = ars_status->length; - } - spa = nfit_spa->spa; + if (query_rc == -EBUSY) { + dev_dbg(dev, "ARS: ARS busy\n"); + return min(30U * 60U, tmo * 2); + } + if (query_rc == -ENOSPC) { + dev_dbg(dev, "ARS: ARS continue\n"); + ars_continue(acpi_desc); + return 1; + } + if (query_rc && query_rc != -EAGAIN) { + unsigned long long addr, end; - if (!init_ars_done) { - init_ars_done = true; - dev_dbg(dev, "init scrub %#llx + %#llx complete\n", - ars_start, ars_len); - } - if (ars_start <= spa->address && ars_start + ars_len - >= spa->address + spa->length) - acpi_nfit_register_region(acpi_desc, nfit_spa); + addr = acpi_desc->ars_status->address; + end = addr + acpi_desc->ars_status->length; + dev_dbg(dev, "ARS: %llx-%llx failed (%d)\n", addr, end, + query_rc); } - /* - * For all the ranges not covered by an initial scrub we still - * want to see if there are errors, but it's ok to discover them - * asynchronously. - */ + ars_complete_all(acpi_desc); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - /* - * Flag all the ranges that still need scrubbing, but - * register them now to make data available. - */ - if (!nfit_spa->nd_region) { - nfit_spa->ars_required = 1; - acpi_nfit_register_region(acpi_desc, nfit_spa); + if (test_bit(ARS_FAILED, &nfit_spa->ars_state)) + continue; + if (test_bit(ARS_REQ, &nfit_spa->ars_state)) { + int rc = ars_start(acpi_desc, nfit_spa); + + clear_bit(ARS_DONE, &nfit_spa->ars_state); + dev = nd_region_dev(nfit_spa->nd_region); + dev_dbg(dev, "ARS: range %d ARS start (%d)\n", + nfit_spa->spa->range_index, rc); + if (rc == 0 || rc == -EBUSY) + return 1; + dev_err(dev, "ARS: range %d ARS failed (%d)\n", + nfit_spa->spa->range_index, rc); + set_bit(ARS_FAILED, &nfit_spa->ars_state); } } - acpi_desc->init_complete = 1; + return 0; +} - list_for_each_entry(nfit_spa, &acpi_desc->spas, list) - acpi_nfit_async_scrub(acpi_desc, nfit_spa); - acpi_desc->scrub_count++; - acpi_desc->ars_start_flags = 0; - if (acpi_desc->scrub_count_state) - sysfs_notify_dirent(acpi_desc->scrub_count_state); +static void acpi_nfit_scrub(struct work_struct *work) +{ + struct acpi_nfit_desc *acpi_desc; + unsigned int tmo; + int query_rc; + + acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work); + mutex_lock(&acpi_desc->init_mutex); + query_rc = acpi_nfit_query_poison(acpi_desc); + tmo = __acpi_nfit_scrub(acpi_desc, query_rc); + if (tmo) { + queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ); + acpi_desc->scrub_tmo = tmo; + } else { + acpi_desc->scrub_count++; + if (acpi_desc->scrub_count_state) + sysfs_notify_dirent(acpi_desc->scrub_count_state); + } + memset(acpi_desc->ars_status, 0, acpi_desc->max_ars); mutex_unlock(&acpi_desc->init_mutex); } +static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) +{ + int type = nfit_spa_type(nfit_spa->spa); + struct nd_cmd_ars_cap ars_cap; + int rc; + + memset(&ars_cap, 0, sizeof(ars_cap)); + rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa); + if (rc < 0) + return; + /* check that the supported scrub types match the spa type */ + if (type == NFIT_SPA_VOLATILE && ((ars_cap.status >> 16) + & ND_ARS_VOLATILE) == 0) + return; + if (type == NFIT_SPA_PM && ((ars_cap.status >> 16) + & ND_ARS_PERSISTENT) == 0) + return; + + nfit_spa->max_ars = ars_cap.max_ars_out; + nfit_spa->clear_err_unit = ars_cap.clear_err_unit; + acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars); + clear_bit(ARS_FAILED, &nfit_spa->ars_state); + set_bit(ARS_REQ, &nfit_spa->ars_state); +} + static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) { struct nfit_spa *nfit_spa; - int rc; + int rc, query_rc; + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + set_bit(ARS_FAILED, &nfit_spa->ars_state); + switch (nfit_spa_type(nfit_spa->spa)) { + case NFIT_SPA_VOLATILE: + case NFIT_SPA_PM: + acpi_nfit_init_ars(acpi_desc, nfit_spa); + break; + } + } + + /* + * Reap any results that might be pending before starting new + * short requests. + */ + query_rc = acpi_nfit_query_poison(acpi_desc); + if (query_rc == 0) + ars_complete_all(acpi_desc); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) - if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) { - /* BLK regions don't need to wait for ars results */ + switch (nfit_spa_type(nfit_spa->spa)) { + case NFIT_SPA_VOLATILE: + case NFIT_SPA_PM: + /* register regions and kick off initial ARS run */ + rc = ars_register(acpi_desc, nfit_spa, &query_rc); + if (rc) + return rc; + break; + case NFIT_SPA_BDW: + /* nothing to register */ + break; + case NFIT_SPA_DCR: + case NFIT_SPA_VDISK: + case NFIT_SPA_VCD: + case NFIT_SPA_PDISK: + case NFIT_SPA_PCD: + /* register known regions that don't support ARS */ rc = acpi_nfit_register_region(acpi_desc, nfit_spa); if (rc) return rc; + break; + default: + /* don't register unknown regions */ + break; } - acpi_desc->ars_start_flags = 0; - if (!acpi_desc->cancel) - queue_work(nfit_wq, &acpi_desc->work); + queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0); return 0; } @@ -3173,8 +3157,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz) data = add_table(acpi_desc, &prev, data, end); if (IS_ERR(data)) { - dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__, - PTR_ERR(data)); + dev_dbg(dev, "nfit table parsing error: %ld\n", PTR_ERR(data)); rc = PTR_ERR(data); goto out_unlock; } @@ -3199,49 +3182,20 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz) } EXPORT_SYMBOL_GPL(acpi_nfit_init); -struct acpi_nfit_flush_work { - struct work_struct work; - struct completion cmp; -}; - -static void flush_probe(struct work_struct *work) -{ - struct acpi_nfit_flush_work *flush; - - flush = container_of(work, typeof(*flush), work); - complete(&flush->cmp); -} - static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); struct device *dev = acpi_desc->dev; - struct acpi_nfit_flush_work flush; - int rc; - /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ + /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ device_lock(dev); device_unlock(dev); - /* bounce the init_mutex to make init_complete valid */ + /* Bounce the init_mutex to complete initial registration */ mutex_lock(&acpi_desc->init_mutex); - if (acpi_desc->cancel || acpi_desc->init_complete) { - mutex_unlock(&acpi_desc->init_mutex); - return 0; - } - - /* - * Scrub work could take 10s of seconds, userspace may give up so we - * need to be interruptible while waiting. - */ - INIT_WORK_ONSTACK(&flush.work, flush_probe); - init_completion(&flush.cmp); - queue_work(nfit_wq, &flush.work); mutex_unlock(&acpi_desc->init_mutex); - rc = wait_for_completion_interruptible(&flush.cmp); - cancel_work_sync(&flush.work); - return rc; + return 0; } static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, @@ -3260,20 +3214,18 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, * just needs guarantees that any ars it initiates are not * interrupted by any intervening start reqeusts from userspace. */ - if (work_busy(&acpi_desc->work)) + if (work_busy(&acpi_desc->dwork.work)) return -EBUSY; return 0; } -int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags) +int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags) { struct device *dev = acpi_desc->dev; + int scheduled = 0, busy = 0; struct nfit_spa *nfit_spa; - if (work_busy(&acpi_desc->work)) - return -EBUSY; - mutex_lock(&acpi_desc->init_mutex); if (acpi_desc->cancel) { mutex_unlock(&acpi_desc->init_mutex); @@ -3281,19 +3233,32 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags) } list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - struct acpi_nfit_system_address *spa = nfit_spa->spa; + int type = nfit_spa_type(nfit_spa->spa); - if (nfit_spa_type(spa) != NFIT_SPA_PM) + if (type != NFIT_SPA_PM && type != NFIT_SPA_VOLATILE) + continue; + if (test_bit(ARS_FAILED, &nfit_spa->ars_state)) continue; - nfit_spa->ars_required = 1; + if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state)) + busy++; + else { + if (test_bit(ARS_SHORT, &flags)) + set_bit(ARS_SHORT, &nfit_spa->ars_state); + scheduled++; + } + } + if (scheduled) { + queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0); + dev_dbg(dev, "ars_scan triggered\n"); } - acpi_desc->ars_start_flags = flags; - queue_work(nfit_wq, &acpi_desc->work); - dev_dbg(dev, "%s: ars_scan triggered\n", __func__); mutex_unlock(&acpi_desc->init_mutex); - return 0; + if (scheduled) + return 0; + if (busy) + return -EBUSY; + return -ENOTTY; } void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) @@ -3320,7 +3285,8 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) INIT_LIST_HEAD(&acpi_desc->dimms); INIT_LIST_HEAD(&acpi_desc->list); mutex_init(&acpi_desc->init_mutex); - INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); + acpi_desc->scrub_tmo = 1; + INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub); } EXPORT_SYMBOL_GPL(acpi_nfit_desc_init); @@ -3344,6 +3310,7 @@ void acpi_nfit_shutdown(void *data) mutex_lock(&acpi_desc->init_mutex); acpi_desc->cancel = 1; + cancel_delayed_work_sync(&acpi_desc->dwork); mutex_unlock(&acpi_desc->init_mutex); /* @@ -3397,8 +3364,8 @@ static int acpi_nfit_add(struct acpi_device *adev) rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer, obj->buffer.length); else - dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n", - __func__, (int) obj->type); + dev_dbg(dev, "invalid type %d, ignoring _FIT\n", + (int) obj->type); kfree(buf.pointer); } else /* skip over the lead-in header table */ @@ -3427,7 +3394,7 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle) if (!dev->driver) { /* dev->driver may be null if we're being removed */ - dev_dbg(dev, "%s: no driver found for dev\n", __func__); + dev_dbg(dev, "no driver found for dev\n"); return; } @@ -3465,15 +3432,15 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle) static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle) { struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); - u8 flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ? - 0 : ND_ARS_RETURN_PREV_DATA; + unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ? + 0 : 1 << ARS_SHORT; acpi_nfit_ars_rescan(acpi_desc, flags); } void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) { - dev_dbg(dev, "%s: event: 0x%x\n", __func__, event); + dev_dbg(dev, "event: 0x%x\n", event); switch (event) { case NFIT_NOTIFY_UPDATE: diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index b92921439657..e9626bf6ca29 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -51,9 +51,8 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, if ((spa->address + spa->length - 1) < mce->addr) continue; found_match = 1; - dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n", - __func__, spa->range_index, spa->address, - spa->length); + dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n", + spa->range_index, spa->address, spa->length); /* * We can break at the first match because we're going * to rescan all the SPA ranges. There shouldn't be any diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 50d36e166d70..7d15856a739f 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -117,10 +117,17 @@ enum nfit_dimm_notifiers { NFIT_NOTIFY_DIMM_HEALTH = 0x81, }; +enum nfit_ars_state { + ARS_REQ, + ARS_DONE, + ARS_SHORT, + ARS_FAILED, +}; + struct nfit_spa { struct list_head list; struct nd_region *nd_region; - unsigned int ars_required:1; + unsigned long ars_state; u32 clear_err_unit; u32 max_ars; struct acpi_nfit_system_address spa[0]; @@ -171,9 +178,8 @@ struct nfit_mem { struct resource *flush_wpq; unsigned long dsm_mask; int family; - u32 has_lsi:1; - u32 has_lsr:1; - u32 has_lsw:1; + bool has_lsr; + bool has_lsw; }; struct acpi_nfit_desc { @@ -191,18 +197,18 @@ struct acpi_nfit_desc { struct device *dev; u8 ars_start_flags; struct nd_cmd_ars_status *ars_status; - size_t ars_status_size; - struct work_struct work; + struct delayed_work dwork; struct list_head list; struct kernfs_node *scrub_count_state; + unsigned int max_ars; unsigned int scrub_count; unsigned int scrub_mode; unsigned int cancel:1; - unsigned int init_complete:1; unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; unsigned long bus_nfit_cmd_force_en; unsigned int platform_cap; + unsigned int scrub_tmo; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); }; @@ -244,7 +250,7 @@ struct nfit_blk { extern struct list_head acpi_descs; extern struct mutex acpi_desc_lock; -int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags); +int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags); #ifdef CONFIG_X86_MCE void nfit_mce_register(void); diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index b79aa8f7a497..e0700bf4893a 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -1,3 +1,7 @@ +config DAX_DRIVER + select DAX + bool + menuconfig DAX tristate "DAX: direct access to differentiated memory" select SRCU @@ -16,7 +20,6 @@ config DEV_DAX baseline memory pool. Mappings of a /dev/daxX.Y device impose restrictions that make the mapping behavior deterministic. - config DEV_DAX_PMEM tristate "PMEM DAX: direct access to persistent memory" depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 0b61f48f21a6..be8606457f27 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -257,8 +257,8 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf) dax_region = dev_dax->region; if (dax_region->align > PAGE_SIZE) { - dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n", - __func__, dax_region->align, fault_size); + dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", + dax_region->align, fault_size); return VM_FAULT_SIGBUS; } @@ -267,8 +267,7 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf) phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE); if (phys == -1) { - dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, - vmf->pgoff); + dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", vmf->pgoff); return VM_FAULT_SIGBUS; } @@ -299,14 +298,14 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf) dax_region = dev_dax->region; if (dax_region->align > PMD_SIZE) { - dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n", - __func__, dax_region->align, fault_size); + dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", + dax_region->align, fault_size); return VM_FAULT_SIGBUS; } /* dax pmd mappings require pfn_t_devmap() */ if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { - dev_dbg(dev, "%s: region lacks devmap flags\n", __func__); + dev_dbg(dev, "region lacks devmap flags\n"); return VM_FAULT_SIGBUS; } @@ -323,8 +322,7 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf) pgoff = linear_page_index(vmf->vma, pmd_addr); phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE); if (phys == -1) { - dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, - pgoff); + dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff); return VM_FAULT_SIGBUS; } @@ -351,14 +349,14 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf) dax_region = dev_dax->region; if (dax_region->align > PUD_SIZE) { - dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n", - __func__, dax_region->align, fault_size); + dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", + dax_region->align, fault_size); return VM_FAULT_SIGBUS; } /* dax pud mappings require pfn_t_devmap() */ if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) { - dev_dbg(dev, "%s: region lacks devmap flags\n", __func__); + dev_dbg(dev, "region lacks devmap flags\n"); return VM_FAULT_SIGBUS; } @@ -375,8 +373,7 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf) pgoff = linear_page_index(vmf->vma, pud_addr); phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE); if (phys == -1) { - dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, - pgoff); + dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff); return VM_FAULT_SIGBUS; } @@ -399,9 +396,8 @@ static int dev_dax_huge_fault(struct vm_fault *vmf, struct file *filp = vmf->vma->vm_file; struct dev_dax *dev_dax = filp->private_data; - dev_dbg(&dev_dax->dev, "%s: %s: %s (%#lx - %#lx) size = %d\n", __func__, - current->comm, (vmf->flags & FAULT_FLAG_WRITE) - ? "write" : "read", + dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm, + (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read", vmf->vma->vm_start, vmf->vma->vm_end, pe_size); id = dax_read_lock(); @@ -460,7 +456,7 @@ static int dax_mmap(struct file *filp, struct vm_area_struct *vma) struct dev_dax *dev_dax = filp->private_data; int rc, id; - dev_dbg(&dev_dax->dev, "%s\n", __func__); + dev_dbg(&dev_dax->dev, "trace\n"); /* * We lock to check dax_dev liveness and will re-check at @@ -518,7 +514,7 @@ static int dax_open(struct inode *inode, struct file *filp) struct inode *__dax_inode = dax_inode(dax_dev); struct dev_dax *dev_dax = dax_get_private(dax_dev); - dev_dbg(&dev_dax->dev, "%s\n", __func__); + dev_dbg(&dev_dax->dev, "trace\n"); inode->i_mapping = __dax_inode->i_mapping; inode->i_mapping->host = __dax_inode; filp->f_mapping = inode->i_mapping; @@ -533,7 +529,7 @@ static int dax_release(struct inode *inode, struct file *filp) { struct dev_dax *dev_dax = filp->private_data; - dev_dbg(&dev_dax->dev, "%s\n", __func__); + dev_dbg(&dev_dax->dev, "trace\n"); return 0; } @@ -575,7 +571,7 @@ static void unregister_dev_dax(void *dev) struct inode *inode = dax_inode(dax_dev); struct cdev *cdev = inode->i_cdev; - dev_dbg(dev, "%s\n", __func__); + dev_dbg(dev, "trace\n"); kill_dev_dax(dev_dax); cdev_device_del(cdev, dev); diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index 31b6ecce4c64..fd49b24fd6af 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -34,7 +34,7 @@ static void dax_pmem_percpu_release(struct percpu_ref *ref) { struct dax_pmem *dax_pmem = to_dax_pmem(ref); - dev_dbg(dax_pmem->dev, "%s\n", __func__); + dev_dbg(dax_pmem->dev, "trace\n"); complete(&dax_pmem->cmp); } @@ -43,7 +43,7 @@ static void dax_pmem_percpu_exit(void *data) struct percpu_ref *ref = data; struct dax_pmem *dax_pmem = to_dax_pmem(ref); - dev_dbg(dax_pmem->dev, "%s\n", __func__); + dev_dbg(dax_pmem->dev, "trace\n"); wait_for_completion(&dax_pmem->cmp); percpu_ref_exit(ref); } @@ -53,7 +53,7 @@ static void dax_pmem_percpu_kill(void *data) struct percpu_ref *ref = data; struct dax_pmem *dax_pmem = to_dax_pmem(ref); - dev_dbg(dax_pmem->dev, "%s\n", __func__); + dev_dbg(dax_pmem->dev, "trace\n"); percpu_ref_kill(ref); } @@ -150,17 +150,7 @@ static struct nd_device_driver dax_pmem_driver = { .type = ND_DRIVER_DAX_PMEM, }; -static int __init dax_pmem_init(void) -{ - return nd_driver_register(&dax_pmem_driver); -} -module_init(dax_pmem_init); - -static void __exit dax_pmem_exit(void) -{ - driver_unregister(&dax_pmem_driver.drv); -} -module_exit(dax_pmem_exit); +module_nd_driver(dax_pmem_driver); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Intel Corporation"); diff --git a/drivers/dax/super.c b/drivers/dax/super.c index ecdc292aa4e4..2b2332b605e4 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -124,10 +124,19 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) return len < 0 ? len : -EIO; } - if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) - || pfn_t_devmap(pfn)) + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) { + /* + * An arch that has enabled the pmem api should also + * have its drivers support pfn_t_devmap() + * + * This is a developer warning and should not trigger in + * production. dax_flush() will crash since it depends + * on being able to do (page_address(pfn_to_page())). + */ + WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)); + } else if (pfn_t_devmap(pfn)) { /* pass */; - else { + } else { pr_debug("VFS (%s): error: dax support not enabled\n", sb->s_id); return -EOPNOTSUPP; diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 2c8ac3688815..edff083f7c4e 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -201,7 +201,7 @@ config BLK_DEV_DM_BUILTIN config BLK_DEV_DM tristate "Device mapper support" select BLK_DEV_DM_BUILTIN - select DAX + depends on DAX || DAX=n ---help--- Device-mapper is a low level volume manager. It works by allowing people to specify mappings for ranges of logical sectors. Various diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 99297212eeec..775c06d953b7 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -154,6 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti, return fn(ti, lc->dev, lc->start, ti->len, data); } +#if IS_ENABLED(CONFIG_DAX_DRIVER) static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { @@ -184,6 +185,11 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +#else +#define linear_dax_direct_access NULL +#define linear_dax_copy_from_iter NULL +#endif + static struct target_type linear_target = { .name = "linear", .version = {1, 4, 0}, diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 9de072b7782a..c90c7c08a77f 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -611,51 +611,6 @@ static int log_mark(struct log_writes_c *lc, char *data) return 0; } -static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes, - struct iov_iter *i) -{ - struct pending_block *block; - - if (!bytes) - return 0; - - block = kzalloc(sizeof(struct pending_block), GFP_KERNEL); - if (!block) { - DMERR("Error allocating dax pending block"); - return -ENOMEM; - } - - block->data = kzalloc(bytes, GFP_KERNEL); - if (!block->data) { - DMERR("Error allocating dax data space"); - kfree(block); - return -ENOMEM; - } - - /* write data provided via the iterator */ - if (!copy_from_iter(block->data, bytes, i)) { - DMERR("Error copying dax data"); - kfree(block->data); - kfree(block); - return -EIO; - } - - /* rewind the iterator so that the block driver can use it */ - iov_iter_revert(i, bytes); - - block->datalen = bytes; - block->sector = bio_to_dev_sectors(lc, sector); - block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift; - - atomic_inc(&lc->pending_blocks); - spin_lock_irq(&lc->blocks_lock); - list_add_tail(&block->list, &lc->unflushed_blocks); - spin_unlock_irq(&lc->blocks_lock); - wake_up_process(lc->log_kthread); - - return 0; -} - static void log_writes_dtr(struct dm_target *ti) { struct log_writes_c *lc = ti->private; @@ -925,6 +880,52 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit limits->io_min = limits->physical_block_size; } +#if IS_ENABLED(CONFIG_DAX_DRIVER) +static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes, + struct iov_iter *i) +{ + struct pending_block *block; + + if (!bytes) + return 0; + + block = kzalloc(sizeof(struct pending_block), GFP_KERNEL); + if (!block) { + DMERR("Error allocating dax pending block"); + return -ENOMEM; + } + + block->data = kzalloc(bytes, GFP_KERNEL); + if (!block->data) { + DMERR("Error allocating dax data space"); + kfree(block); + return -ENOMEM; + } + + /* write data provided via the iterator */ + if (!copy_from_iter(block->data, bytes, i)) { + DMERR("Error copying dax data"); + kfree(block->data); + kfree(block); + return -EIO; + } + + /* rewind the iterator so that the block driver can use it */ + iov_iter_revert(i, bytes); + + block->datalen = bytes; + block->sector = bio_to_dev_sectors(lc, sector); + block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift; + + atomic_inc(&lc->pending_blocks); + spin_lock_irq(&lc->blocks_lock); + list_add_tail(&block->list, &lc->unflushed_blocks); + spin_unlock_irq(&lc->blocks_lock); + wake_up_process(lc->log_kthread); + + return 0; +} + static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { @@ -961,6 +962,10 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, dax_copy: return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); } +#else +#define log_writes_dax_direct_access NULL +#define log_writes_dax_copy_from_iter NULL +#endif static struct target_type log_writes_target = { .name = "log-writes", diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index bb907cb3e60d..fe7fb9b1aec3 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -313,6 +313,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } +#if IS_ENABLED(CONFIG_DAX_DRIVER) static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { @@ -353,6 +354,11 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +#else +#define stripe_dax_direct_access NULL +#define stripe_dax_copy_from_iter NULL +#endif + /* * Stripe status: * diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5a81c47be4e4..4ea404dbcf0b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1826,7 +1826,7 @@ static void cleanup_mapped_device(struct mapped_device *md) static struct mapped_device *alloc_dev(int minor) { int r, numa_node_id = dm_get_numa_node(); - struct dax_device *dax_dev; + struct dax_device *dax_dev = NULL; struct mapped_device *md; void *old_md; @@ -1892,9 +1892,11 @@ static struct mapped_device *alloc_dev(int minor) md->disk->private_data = md; sprintf(md->disk->disk_name, "dm-%d", minor); - dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops); - if (!dax_dev) - goto bad; + if (IS_ENABLED(CONFIG_DAX_DRIVER)) { + dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops); + if (!dax_dev) + goto bad; + } md->dax_dev = dax_dev; add_disk_no_queue_reg(md->disk); diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index a65f2e1d9f53..85997184e047 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -20,7 +20,7 @@ if LIBNVDIMM config BLK_DEV_PMEM tristate "PMEM: Persistent memory block device support" default LIBNVDIMM - select DAX + select DAX_DRIVER select ND_BTT if BTT select ND_PFN if NVDIMM_PFN help @@ -102,4 +102,15 @@ config NVDIMM_DAX Select Y if unsure +config OF_PMEM + # FIXME: make tristate once OF_NUMA dependency removed + bool "Device-tree support for persistent memory regions" + depends on OF + default LIBNVDIMM + help + Allows regions of persistent memory to be described in the + device-tree. + + Select Y if unsure. + endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 70d5f3ad9909..e8847045dac0 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o +obj-$(CONFIG_OF_PMEM) += of_pmem.o nd_pmem-y := pmem.o diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index d58925295aa7..795ad4ff35ca 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -26,7 +26,7 @@ static void nd_btt_release(struct device *dev) struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_btt *nd_btt = to_nd_btt(dev); - dev_dbg(dev, "%s\n", __func__); + dev_dbg(dev, "trace\n"); nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns); ida_simple_remove(&nd_region->btt_ida, nd_btt->id); kfree(nd_btt->uuid); @@ -74,8 +74,8 @@ static ssize_t sector_size_store(struct device *dev, nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_btt->lbasize, btt_lbasize_supported); - dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, - rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, + buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); device_unlock(dev); @@ -101,8 +101,8 @@ static ssize_t uuid_store(struct device *dev, device_lock(dev); rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len); - dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, - rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, + buf[len - 1] == '\n' ? "" : "\n"); device_unlock(dev); return rc ? rc : len; @@ -131,8 +131,8 @@ static ssize_t namespace_store(struct device *dev, device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); - dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, - rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, + buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); device_unlock(dev); @@ -206,8 +206,8 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, dev->groups = nd_btt_attribute_groups; device_initialize(&nd_btt->dev); if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) { - dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", - __func__, dev_name(ndns->claim)); + dev_dbg(&ndns->dev, "failed, already claimed by %s\n", + dev_name(ndns->claim)); put_device(dev); return NULL; } @@ -346,8 +346,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) return -ENOMEM; btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL); rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb); - dev_dbg(dev, "%s: btt: %s\n", __func__, - rc == 0 ? dev_name(btt_dev) : "<none>"); + dev_dbg(dev, "btt: %s\n", rc == 0 ? dev_name(btt_dev) : "<none>"); if (rc < 0) { struct nd_btt *nd_btt = to_nd_btt(btt_dev); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 78eabc3a1ab1..a64023690cad 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -358,6 +358,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, nvdimm_bus->dev.release = nvdimm_bus_release; nvdimm_bus->dev.groups = nd_desc->attr_groups; nvdimm_bus->dev.bus = &nvdimm_bus_type; + nvdimm_bus->dev.of_node = nd_desc->of_node; dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); rc = device_register(&nvdimm_bus->dev); if (rc) { @@ -984,8 +985,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, if (cmd == ND_CMD_CALL) { func = pkg.nd_command; - dev_dbg(dev, "%s:%s, idx: %llu, in: %u, out: %u, len %llu\n", - __func__, dimm_name, pkg.nd_command, + dev_dbg(dev, "%s, idx: %llu, in: %u, out: %u, len %llu\n", + dimm_name, pkg.nd_command, in_len, out_len, buf_len); } @@ -996,8 +997,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, u32 copy; if (out_size == UINT_MAX) { - dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n", - __func__, dimm_name, cmd_name, i); + dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n", + dimm_name, cmd_name, i); return -EFAULT; } if (out_len < sizeof(out_env)) @@ -1012,9 +1013,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, buf_len = (u64) out_len + (u64) in_len; if (buf_len > ND_IOCTL_MAX_BUFLEN) { - dev_dbg(dev, "%s:%s cmd: %s buf_len: %llu > %d\n", __func__, - dimm_name, cmd_name, buf_len, - ND_IOCTL_MAX_BUFLEN); + dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name, + cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN); return -EINVAL; } diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index b2fc29b8279b..30852270484f 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -148,7 +148,7 @@ ssize_t nd_namespace_store(struct device *dev, char *name; if (dev->driver) { - dev_dbg(dev, "%s: -EBUSY\n", __func__); + dev_dbg(dev, "namespace already active\n"); return -EBUSY; } diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 1dc527660637..acce050856a8 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -134,7 +134,7 @@ static void nvdimm_map_release(struct kref *kref) nvdimm_map = container_of(kref, struct nvdimm_map, kref); nvdimm_bus = nvdimm_map->nvdimm_bus; - dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset); + dev_dbg(&nvdimm_bus->dev, "%pa\n", &nvdimm_map->offset); list_del(&nvdimm_map->list); if (nvdimm_map->flags) memunmap(nvdimm_map->mem); @@ -230,8 +230,8 @@ static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf, for (i = 0; i < 16; i++) { if (!isxdigit(str[0]) || !isxdigit(str[1])) { - dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n", - __func__, i, str - buf, str[0], + dev_dbg(dev, "pos: %d buf[%zd]: %c buf[%zd]: %c\n", + i, str - buf, str[0], str + 1 - buf, str[1]); return -EINVAL; } diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index 1bf2bd318371..0453f49dc708 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c @@ -24,7 +24,7 @@ static void nd_dax_release(struct device *dev) struct nd_dax *nd_dax = to_nd_dax(dev); struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; - dev_dbg(dev, "%s\n", __func__); + dev_dbg(dev, "trace\n"); nd_detach_ndns(dev, &nd_pfn->ndns); ida_simple_remove(&nd_region->dax_ida, nd_pfn->id); kfree(nd_pfn->uuid); @@ -129,8 +129,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); nd_pfn->pfn_sb = pfn_sb; rc = nd_pfn_validate(nd_pfn, DAX_SIG); - dev_dbg(dev, "%s: dax: %s\n", __func__, - rc == 0 ? dev_name(dax_dev) : "<none>"); + dev_dbg(dev, "dax: %s\n", rc == 0 ? dev_name(dax_dev) : "<none>"); if (rc < 0) { nd_detach_ndns(dax_dev, &nd_pfn->ndns); put_device(dax_dev); diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c index f8913b8124b6..233907889f96 100644 --- a/drivers/nvdimm/dimm.c +++ b/drivers/nvdimm/dimm.c @@ -67,9 +67,11 @@ static int nvdimm_probe(struct device *dev) ndd->ns_next = nd_label_next_nsindex(ndd->ns_current); nd_label_copy(ndd, to_next_namespace_index(ndd), to_current_namespace_index(ndd)); - rc = nd_label_reserve_dpa(ndd); - if (ndd->ns_current >= 0) - nvdimm_set_aliasing(dev); + if (ndd->ns_current >= 0) { + rc = nd_label_reserve_dpa(ndd); + if (rc == 0) + nvdimm_set_aliasing(dev); + } nvdimm_clear_locked(dev); nvdimm_bus_unlock(dev); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 097794d9f786..e00d45522b80 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -131,7 +131,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) } memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length); } - dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc); + dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc); kfree(cmd); return rc; @@ -266,8 +266,7 @@ void nvdimm_drvdata_release(struct kref *kref) struct device *dev = ndd->dev; struct resource *res, *_r; - dev_dbg(dev, "%s\n", __func__); - + dev_dbg(dev, "trace\n"); nvdimm_bus_lock(dev); for_each_dpa_resource_safe(ndd, res, _r) nvdimm_free_dpa(ndd, res); @@ -660,7 +659,7 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count) nd_synchronize(); device_for_each_child(&nvdimm_bus->dev, &count, count_dimms); - dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count); + dev_dbg(&nvdimm_bus->dev, "count: %d\n", count); if (count != dimm_count) return -ENXIO; return 0; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index de66c02f6140..1d28cd656536 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -45,9 +45,27 @@ unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd) return ndd->nslabel_size; } +static size_t __sizeof_namespace_index(u32 nslot) +{ + return ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8), + NSINDEX_ALIGN); +} + +static int __nvdimm_num_label_slots(struct nvdimm_drvdata *ndd, + size_t index_size) +{ + return (ndd->nsarea.config_size - index_size * 2) / + sizeof_namespace_label(ndd); +} + int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd) { - return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1); + u32 tmp_nslot, n; + + tmp_nslot = ndd->nsarea.config_size / sizeof_namespace_label(ndd); + n = __sizeof_namespace_index(tmp_nslot) / NSINDEX_ALIGN; + + return __nvdimm_num_label_slots(ndd, NSINDEX_ALIGN * n); } size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) @@ -55,18 +73,14 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) u32 nslot, space, size; /* - * The minimum index space is 512 bytes, with that amount of - * index we can describe ~1400 labels which is less than a byte - * of overhead per label. Round up to a byte of overhead per - * label and determine the size of the index region. Yes, this - * starts to waste space at larger config_sizes, but it's - * unlikely we'll ever see anything but 128K. + * Per UEFI 2.7, the minimum size of the Label Storage Area is large + * enough to hold 2 index blocks and 2 labels. The minimum index + * block size is 256 bytes, and the minimum label size is 256 bytes. */ nslot = nvdimm_num_label_slots(ndd); space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd); - size = ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8), - NSINDEX_ALIGN) * 2; - if (size <= space) + size = __sizeof_namespace_index(nslot) * 2; + if (size <= space && nslot >= 2) return size / 2; dev_err(ndd->dev, "label area (%d) too small to host (%d byte) labels\n", @@ -121,8 +135,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd) memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) { - dev_dbg(dev, "%s: nsindex%d signature invalid\n", - __func__, i); + dev_dbg(dev, "nsindex%d signature invalid\n", i); continue; } @@ -135,8 +148,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd) labelsize = 128; if (labelsize != sizeof_namespace_label(ndd)) { - dev_dbg(dev, "%s: nsindex%d labelsize %d invalid\n", - __func__, i, nsindex[i]->labelsize); + dev_dbg(dev, "nsindex%d labelsize %d invalid\n", + i, nsindex[i]->labelsize); continue; } @@ -145,30 +158,28 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd) sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1); nsindex[i]->checksum = __cpu_to_le64(sum_save); if (sum != sum_save) { - dev_dbg(dev, "%s: nsindex%d checksum invalid\n", - __func__, i); + dev_dbg(dev, "nsindex%d checksum invalid\n", i); continue; } seq = __le32_to_cpu(nsindex[i]->seq); if ((seq & NSINDEX_SEQ_MASK) == 0) { - dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n", - __func__, i, seq); + dev_dbg(dev, "nsindex%d sequence: %#x invalid\n", i, seq); continue; } /* sanity check the index against expected values */ if (__le64_to_cpu(nsindex[i]->myoff) != i * sizeof_namespace_index(ndd)) { - dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n", - __func__, i, (unsigned long long) + dev_dbg(dev, "nsindex%d myoff: %#llx invalid\n", + i, (unsigned long long) __le64_to_cpu(nsindex[i]->myoff)); continue; } if (__le64_to_cpu(nsindex[i]->otheroff) != (!i) * sizeof_namespace_index(ndd)) { - dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n", - __func__, i, (unsigned long long) + dev_dbg(dev, "nsindex%d otheroff: %#llx invalid\n", + i, (unsigned long long) __le64_to_cpu(nsindex[i]->otheroff)); continue; } @@ -176,8 +187,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd) size = __le64_to_cpu(nsindex[i]->mysize); if (size > sizeof_namespace_index(ndd) || size < sizeof(struct nd_namespace_index)) { - dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n", - __func__, i, size); + dev_dbg(dev, "nsindex%d mysize: %#llx invalid\n", i, size); continue; } @@ -185,9 +195,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd) if (nslot * sizeof_namespace_label(ndd) + 2 * sizeof_namespace_index(ndd) > ndd->nsarea.config_size) { - dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n", - __func__, i, nslot, - ndd->nsarea.config_size); + dev_dbg(dev, "nsindex%d nslot: %u invalid, config_size: %#x\n", + i, nslot, ndd->nsarea.config_size); continue; } valid[i] = true; @@ -356,8 +365,8 @@ static bool slot_valid(struct nvdimm_drvdata *ndd, sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1); nd_label->checksum = __cpu_to_le64(sum_save); if (sum != sum_save) { - dev_dbg(ndd->dev, "%s fail checksum. slot: %d expect: %#llx\n", - __func__, slot, sum); + dev_dbg(ndd->dev, "fail checksum. slot: %d expect: %#llx\n", + slot, sum); return false; } } @@ -422,8 +431,8 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd) u64 dpa = __le64_to_cpu(nd_label->dpa); dev_dbg(ndd->dev, - "%s: slot%d invalid slot: %d dpa: %llx size: %llx\n", - __func__, slot, label_slot, dpa, size); + "slot%d invalid slot: %d dpa: %llx size: %llx\n", + slot, label_slot, dpa, size); continue; } count++; @@ -650,7 +659,7 @@ static int __pmem_label_update(struct nd_region *nd_region, slot = nd_label_alloc_slot(ndd); if (slot == UINT_MAX) return -ENXIO; - dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); + dev_dbg(ndd->dev, "allocated: %d\n", slot); nd_label = to_label(ndd, slot); memset(nd_label, 0, sizeof_namespace_label(ndd)); @@ -678,7 +687,7 @@ static int __pmem_label_update(struct nd_region *nd_region, sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1); nd_label->checksum = __cpu_to_le64(sum); } - nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__); + nd_dbg_dpa(nd_region, ndd, res, "\n"); /* update label */ offset = nd_label_offset(ndd, nd_label); @@ -700,7 +709,7 @@ static int __pmem_label_update(struct nd_region *nd_region, break; } if (victim) { - dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); + dev_dbg(ndd->dev, "free: %d\n", slot); slot = to_slot(ndd, victim->label); nd_label_free_slot(ndd, slot); victim->label = NULL; @@ -868,7 +877,7 @@ static int __blk_label_update(struct nd_region *nd_region, slot = nd_label_alloc_slot(ndd); if (slot == UINT_MAX) goto abort; - dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); + dev_dbg(ndd->dev, "allocated: %d\n", slot); nd_label = to_label(ndd, slot); memset(nd_label, 0, sizeof_namespace_label(ndd)); @@ -928,7 +937,7 @@ static int __blk_label_update(struct nd_region *nd_region, /* free up now unused slots in the new index */ for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) { - dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); + dev_dbg(ndd->dev, "free: %d\n", slot); nd_label_free_slot(ndd, slot); } @@ -1092,7 +1101,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid) active--; slot = to_slot(ndd, nd_label); nd_label_free_slot(ndd, slot); - dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot); + dev_dbg(ndd->dev, "free: %d\n", slot); list_move_tail(&label_ent->list, &list); label_ent->label = NULL; } @@ -1100,7 +1109,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid) if (active == 0) { nd_mapping_free_labels(nd_mapping); - dev_dbg(ndd->dev, "%s: no more active labels\n", __func__); + dev_dbg(ndd->dev, "no more active labels\n"); } mutex_unlock(&nd_mapping->lock); diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index 1ebf4d3d01ba..18bbe183b3a9 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -33,7 +33,7 @@ enum { BTTINFO_UUID_LEN = 16, BTTINFO_FLAG_ERROR = 0x1, /* error state (read-only) */ BTTINFO_MAJOR_VERSION = 1, - ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */ + ND_LABEL_MIN_SIZE = 256 * 4, /* see sizeof_namespace_index() */ ND_LABEL_ID_SIZE = 50, ND_NSINDEX_INIT = 0x1, }; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 658ada497be0..28afdd668905 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -421,7 +421,7 @@ static ssize_t alt_name_store(struct device *dev, rc = __alt_name_store(dev, buf, len); if (rc >= 0) rc = nd_namespace_label_update(nd_region, dev); - dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc); + dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); device_unlock(dev); @@ -1007,7 +1007,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val) if (uuid_not_set(uuid, dev, __func__)) return -ENXIO; if (nd_region->ndr_mappings == 0) { - dev_dbg(dev, "%s: not associated with dimm(s)\n", __func__); + dev_dbg(dev, "not associated with dimm(s)\n"); return -ENXIO; } @@ -1105,8 +1105,7 @@ static ssize_t size_store(struct device *dev, *uuid = NULL; } - dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0 - ? "fail" : "success", rc); + dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc); nvdimm_bus_unlock(dev); device_unlock(dev); @@ -1270,8 +1269,8 @@ static ssize_t uuid_store(struct device *dev, rc = nd_namespace_label_update(nd_region, dev); else kfree(uuid); - dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, - rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, + buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); device_unlock(dev); @@ -1355,9 +1354,8 @@ static ssize_t sector_size_store(struct device *dev, rc = nd_size_select_store(dev, buf, lbasize, supported); if (rc >= 0) rc = nd_namespace_label_update(nd_region, dev); - dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__, - rc, rc < 0 ? "tried" : "wrote", buf, - buf[len - 1] == '\n' ? "" : "\n"); + dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote", + buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); device_unlock(dev); @@ -1519,7 +1517,7 @@ static ssize_t holder_class_store(struct device *dev, rc = __holder_class_store(dev, buf); if (rc >= 0) rc = nd_namespace_label_update(nd_region, dev); - dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc); + dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); device_unlock(dev); @@ -1717,8 +1715,7 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) if (uuid_not_set(nsblk->uuid, &ndns->dev, __func__)) return ERR_PTR(-ENODEV); if (!nsblk->lbasize) { - dev_dbg(&ndns->dev, "%s: sector size not set\n", - __func__); + dev_dbg(&ndns->dev, "sector size not set\n"); return ERR_PTR(-ENODEV); } if (!nd_namespace_blk_validate(nsblk)) @@ -1798,9 +1795,7 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid, } if (found_uuid) { - dev_dbg(ndd->dev, - "%s duplicate entry for uuid\n", - __func__); + dev_dbg(ndd->dev, "duplicate entry for uuid\n"); return false; } found_uuid = true; @@ -1926,7 +1921,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region, } if (i < nd_region->ndr_mappings) { - struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]); + struct nvdimm *nvdimm = nd_region->mapping[i].nvdimm; /* * Give up if we don't find an instance of a uuid at each @@ -1934,7 +1929,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region, * find a dimm with two instances of the same uuid. */ dev_err(&nd_region->dev, "%s missing label for %pUb\n", - dev_name(ndd->dev), nd_label->uuid); + nvdimm_name(nvdimm), nd_label->uuid); rc = -EINVAL; goto err; } @@ -1994,14 +1989,13 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region, namespace_pmem_release(dev); switch (rc) { case -EINVAL: - dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__); + dev_dbg(&nd_region->dev, "invalid label(s)\n"); break; case -ENODEV: - dev_dbg(&nd_region->dev, "%s: label not found\n", __func__); + dev_dbg(&nd_region->dev, "label not found\n"); break; default: - dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n", - __func__, rc); + dev_dbg(&nd_region->dev, "unexpected err: %d\n", rc); break; } return ERR_PTR(rc); @@ -2334,8 +2328,8 @@ static struct device **scan_labels(struct nd_region *nd_region) } - dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n", - __func__, count, is_nd_blk(&nd_region->dev) + dev_dbg(&nd_region->dev, "discovered %d %s namespace%s\n", + count, is_nd_blk(&nd_region->dev) ? "blk" : "pmem", count == 1 ? "" : "s"); if (count == 0) { @@ -2467,7 +2461,7 @@ static int init_active_labels(struct nd_region *nd_region) get_ndd(ndd); count = nd_label_active_count(ndd); - dev_dbg(ndd->dev, "%s: %d\n", __func__, count); + dev_dbg(ndd->dev, "count: %d\n", count); if (!count) continue; for (j = 0; j < count; j++) { diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 184e070d50a2..32e0364b48b9 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -340,7 +340,6 @@ static inline struct device *nd_dax_create(struct nd_region *nd_region) } #endif -struct nd_region *to_nd_region(struct device *dev); int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_register_namespaces(struct nd_region *nd_region, int *err); u64 nd_region_interleave_set_cookie(struct nd_region *nd_region, diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c new file mode 100644 index 000000000000..85013bad35de --- /dev/null +++ b/drivers/nvdimm/of_pmem.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#define pr_fmt(fmt) "of_pmem: " fmt + +#include <linux/of_platform.h> +#include <linux/of_address.h> +#include <linux/libnvdimm.h> +#include <linux/module.h> +#include <linux/ioport.h> +#include <linux/slab.h> + +static const struct attribute_group *region_attr_groups[] = { + &nd_region_attribute_group, + &nd_device_attribute_group, + NULL, +}; + +static const struct attribute_group *bus_attr_groups[] = { + &nvdimm_bus_attribute_group, + NULL, +}; + +struct of_pmem_private { + struct nvdimm_bus_descriptor bus_desc; + struct nvdimm_bus *bus; +}; + +static int of_pmem_region_probe(struct platform_device *pdev) +{ + struct of_pmem_private *priv; + struct device_node *np; + struct nvdimm_bus *bus; + bool is_volatile; + int i; + + np = dev_of_node(&pdev->dev); + if (!np) + return -ENXIO; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->bus_desc.attr_groups = bus_attr_groups; + priv->bus_desc.provider_name = "of_pmem"; + priv->bus_desc.module = THIS_MODULE; + priv->bus_desc.of_node = np; + + priv->bus = bus = nvdimm_bus_register(&pdev->dev, &priv->bus_desc); + if (!bus) { + kfree(priv); + return -ENODEV; + } + platform_set_drvdata(pdev, priv); + + is_volatile = !!of_find_property(np, "volatile", NULL); + dev_dbg(&pdev->dev, "Registering %s regions from %pOF\n", + is_volatile ? "volatile" : "non-volatile", np); + + for (i = 0; i < pdev->num_resources; i++) { + struct nd_region_desc ndr_desc; + struct nd_region *region; + + /* + * NB: libnvdimm copies the data from ndr_desc into it's own + * structures so passing a stack pointer is fine. + */ + memset(&ndr_desc, 0, sizeof(ndr_desc)); + ndr_desc.attr_groups = region_attr_groups; + ndr_desc.numa_node = of_node_to_nid(np); + ndr_desc.res = &pdev->resource[i]; + ndr_desc.of_node = np; + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); + + if (is_volatile) + region = nvdimm_volatile_region_create(bus, &ndr_desc); + else + region = nvdimm_pmem_region_create(bus, &ndr_desc); + + if (!region) + dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n", + ndr_desc.res, np); + else + dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n", + ndr_desc.res, np); + } + + return 0; +} + +static int of_pmem_region_remove(struct platform_device *pdev) +{ + struct of_pmem_private *priv = platform_get_drvdata(pdev); + + nvdimm_bus_unregister(priv->bus); + kfree(priv); + + return 0; +} + +static const struct of_device_id of_pmem_region_match[] = { + { .compatible = "pmem-region" }, + { }, +}; + +static struct platform_driver of_pmem_region_driver = { + .probe = of_pmem_region_probe, + .remove = of_pmem_region_remove, + .driver = { + .name = "of_pmem", + .owner = THIS_MODULE, + .of_match_table = of_pmem_region_match, + }, +}; + +module_platform_driver(of_pmem_region_driver); +MODULE_DEVICE_TABLE(of, of_pmem_region_match); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("IBM Corporation"); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 2f4d18752c97..30b08791597d 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -27,7 +27,7 @@ static void nd_pfn_release(struct device *dev) struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_pfn *nd_pfn = to_nd_pfn(dev); - dev_dbg(dev, "%s\n", __func__); + dev_dbg(dev, "trace\n"); nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id); kfree(nd_pfn->uuid); @@ -94,8 +94,8 @@ static ssize_t mode_store(struct device *dev, else rc = -EINVAL; } - dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, - rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, + buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); device_unlock(dev); @@ -144,8 +144,8 @@ static ssize_t align_store(struct device *dev, nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_pfn->align, nd_pfn_supported_alignments()); - dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, - rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, + buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); device_unlock(dev); @@ -171,8 +171,8 @@ static ssize_t uuid_store(struct device *dev, device_lock(dev); rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); - dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, - rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, + buf[len - 1] == '\n' ? "" : "\n"); device_unlock(dev); return rc ? rc : len; @@ -201,8 +201,8 @@ static ssize_t namespace_store(struct device *dev, device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); - dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, - rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, + buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); device_unlock(dev); @@ -314,8 +314,8 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, dev = &nd_pfn->dev; device_initialize(&nd_pfn->dev); if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { - dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", - __func__, dev_name(ndns->claim)); + dev_dbg(&ndns->dev, "failed, already claimed by %s\n", + dev_name(ndns->claim)); put_device(dev); return NULL; } @@ -510,8 +510,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) nd_pfn = to_nd_pfn(pfn_dev); nd_pfn->pfn_sb = pfn_sb; rc = nd_pfn_validate(nd_pfn, PFN_SIG); - dev_dbg(dev, "%s: pfn: %s\n", __func__, - rc == 0 ? dev_name(pfn_dev) : "<none>"); + dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>"); if (rc < 0) { nd_detach_ndns(pfn_dev, &nd_pfn->ndns); put_device(pfn_dev); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 5a96d30c294a..9d714926ecf5 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -66,7 +66,7 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem, rc = BLK_STS_IOERR; if (cleared > 0 && cleared / 512) { cleared /= 512; - dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__, + dev_dbg(dev, "%#llx clear %ld sector%s\n", (unsigned long long) sector, cleared, cleared > 1 ? "s" : ""); badblocks_clear(&pmem->bb, sector, cleared); @@ -547,17 +547,7 @@ static struct nd_device_driver nd_pmem_driver = { .type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM, }; -static int __init pmem_init(void) -{ - return nd_driver_register(&nd_pmem_driver); -} -module_init(pmem_init); - -static void pmem_exit(void) -{ - driver_unregister(&nd_pmem_driver.drv); -} -module_exit(pmem_exit); +module_nd_driver(nd_pmem_driver); MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 034f0a07d627..b9ca0033cc99 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -27,10 +27,10 @@ static int nd_region_probe(struct device *dev) if (nd_region->num_lanes > num_online_cpus() && nd_region->num_lanes < num_possible_cpus() && !test_and_set_bit(0, &once)) { - dev_info(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n", + dev_dbg(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n", num_online_cpus(), nd_region->num_lanes, num_possible_cpus()); - dev_info(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n", + dev_dbg(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n", nd_region->num_lanes); } diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 1593e1806b16..a612be6f019d 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -182,6 +182,14 @@ struct nd_region *to_nd_region(struct device *dev) } EXPORT_SYMBOL_GPL(to_nd_region); +struct device *nd_region_dev(struct nd_region *nd_region) +{ + if (!nd_region) + return NULL; + return &nd_region->dev; +} +EXPORT_SYMBOL_GPL(nd_region_dev); + struct nd_blk_region *to_nd_blk_region(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev); @@ -1014,6 +1022,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, dev->parent = &nvdimm_bus->dev; dev->type = dev_type; dev->groups = ndr_desc->attr_groups; + dev->of_node = ndr_desc->of_node; nd_region->ndr_size = resource_size(ndr_desc->res); nd_region->ndr_start = ndr_desc->res->start; nd_device_register(dev); diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 1444333210c7..9ac7574e3cfb 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -15,8 +15,8 @@ config BLK_DEV_XPRAM config DCSSBLK def_tristate m - select DAX select FS_DAX_LIMITED + select DAX_DRIVER prompt "DCSSBLK support" depends on S390 && BLOCK help diff --git a/fs/block_dev.c b/fs/block_dev.c index 7a506c55a993..7ec920e27065 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1948,11 +1948,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) static int blkdev_writepages(struct address_space *mapping, struct writeback_control *wbc) { - if (dax_mapping(mapping)) { - struct block_device *bdev = I_BDEV(mapping->host); - - return dax_writeback_mapping_range(mapping, bdev, wbc); - } return generic_writepages(mapping, wbc); } @@ -73,16 +73,15 @@ fs_initcall(init_dax_wait_table); #define RADIX_DAX_ZERO_PAGE (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2)) #define RADIX_DAX_EMPTY (1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3)) -static unsigned long dax_radix_sector(void *entry) +static unsigned long dax_radix_pfn(void *entry) { return (unsigned long)entry >> RADIX_DAX_SHIFT; } -static void *dax_radix_locked_entry(sector_t sector, unsigned long flags) +static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags) { return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags | - ((unsigned long)sector << RADIX_DAX_SHIFT) | - RADIX_DAX_ENTRY_LOCK); + (pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK); } static unsigned int dax_radix_order(void *entry) @@ -299,6 +298,63 @@ static void put_unlocked_mapping_entry(struct address_space *mapping, dax_wake_mapping_entry_waiter(mapping, index, entry, false); } +static unsigned long dax_entry_size(void *entry) +{ + if (dax_is_zero_entry(entry)) + return 0; + else if (dax_is_empty_entry(entry)) + return 0; + else if (dax_is_pmd_entry(entry)) + return PMD_SIZE; + else + return PAGE_SIZE; +} + +static unsigned long dax_radix_end_pfn(void *entry) +{ + return dax_radix_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE; +} + +/* + * Iterate through all mapped pfns represented by an entry, i.e. skip + * 'empty' and 'zero' entries. + */ +#define for_each_mapped_pfn(entry, pfn) \ + for (pfn = dax_radix_pfn(entry); \ + pfn < dax_radix_end_pfn(entry); pfn++) + +static void dax_associate_entry(void *entry, struct address_space *mapping) +{ + unsigned long pfn; + + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) + return; + + for_each_mapped_pfn(entry, pfn) { + struct page *page = pfn_to_page(pfn); + + WARN_ON_ONCE(page->mapping); + page->mapping = mapping; + } +} + +static void dax_disassociate_entry(void *entry, struct address_space *mapping, + bool trunc) +{ + unsigned long pfn; + + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) + return; + + for_each_mapped_pfn(entry, pfn) { + struct page *page = pfn_to_page(pfn); + + WARN_ON_ONCE(trunc && page_ref_count(page) > 1); + WARN_ON_ONCE(page->mapping && page->mapping != mapping); + page->mapping = NULL; + } +} + /* * Find radix tree entry at given index. If it points to an exceptional entry, * return it with the radix tree entry locked. If the radix tree doesn't @@ -405,6 +461,7 @@ restart: } if (pmd_downgrade) { + dax_disassociate_entry(entry, mapping, false); radix_tree_delete(&mapping->page_tree, index); mapping->nrexceptional--; dax_wake_mapping_entry_waiter(mapping, index, entry, @@ -454,6 +511,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping, (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))) goto out; + dax_disassociate_entry(entry, mapping, trunc); radix_tree_delete(page_tree, index); mapping->nrexceptional--; ret = 1; @@ -526,12 +584,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev, */ static void *dax_insert_mapping_entry(struct address_space *mapping, struct vm_fault *vmf, - void *entry, sector_t sector, + void *entry, pfn_t pfn_t, unsigned long flags, bool dirty) { struct radix_tree_root *page_tree = &mapping->page_tree; - void *new_entry; + unsigned long pfn = pfn_t_to_pfn(pfn_t); pgoff_t index = vmf->pgoff; + void *new_entry; if (dirty) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); @@ -546,7 +605,11 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, } spin_lock_irq(&mapping->tree_lock); - new_entry = dax_radix_locked_entry(sector, flags); + new_entry = dax_radix_locked_entry(pfn, flags); + if (dax_entry_size(entry) != dax_entry_size(new_entry)) { + dax_disassociate_entry(entry, mapping, false); + dax_associate_entry(new_entry, mapping); + } if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { /* @@ -657,17 +720,14 @@ unlock_pte: i_mmap_unlock_read(mapping); } -static int dax_writeback_one(struct block_device *bdev, - struct dax_device *dax_dev, struct address_space *mapping, - pgoff_t index, void *entry) +static int dax_writeback_one(struct dax_device *dax_dev, + struct address_space *mapping, pgoff_t index, void *entry) { struct radix_tree_root *page_tree = &mapping->page_tree; - void *entry2, **slot, *kaddr; - long ret = 0, id; - sector_t sector; - pgoff_t pgoff; + void *entry2, **slot; + unsigned long pfn; + long ret = 0; size_t size; - pfn_t pfn; /* * A page got tagged dirty in DAX mapping? Something is seriously @@ -683,10 +743,10 @@ static int dax_writeback_one(struct block_device *bdev, goto put_unlocked; /* * Entry got reallocated elsewhere? No need to writeback. We have to - * compare sectors as we must not bail out due to difference in lockbit + * compare pfns as we must not bail out due to difference in lockbit * or entry type. */ - if (dax_radix_sector(entry2) != dax_radix_sector(entry)) + if (dax_radix_pfn(entry2) != dax_radix_pfn(entry)) goto put_unlocked; if (WARN_ON_ONCE(dax_is_empty_entry(entry) || dax_is_zero_entry(entry))) { @@ -712,33 +772,15 @@ static int dax_writeback_one(struct block_device *bdev, /* * Even if dax_writeback_mapping_range() was given a wbc->range_start * in the middle of a PMD, the 'index' we are given will be aligned to - * the start index of the PMD, as will the sector we pull from - * 'entry'. This allows us to flush for PMD_SIZE and not have to - * worry about partial PMD writebacks. + * the start index of the PMD, as will the pfn we pull from 'entry'. + * This allows us to flush for PMD_SIZE and not have to worry about + * partial PMD writebacks. */ - sector = dax_radix_sector(entry); + pfn = dax_radix_pfn(entry); size = PAGE_SIZE << dax_radix_order(entry); - id = dax_read_lock(); - ret = bdev_dax_pgoff(bdev, sector, size, &pgoff); - if (ret) - goto dax_unlock; - - /* - * dax_direct_access() may sleep, so cannot hold tree_lock over - * its invocation. - */ - ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn); - if (ret < 0) - goto dax_unlock; - - if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) { - ret = -EIO; - goto dax_unlock; - } - - dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn)); - dax_flush(dax_dev, kaddr, size); + dax_mapping_entry_mkclean(mapping, index, pfn); + dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size); /* * After we have flushed the cache, we can clear the dirty tag. There * cannot be new dirty data in the pfn after the flush has completed as @@ -749,8 +791,6 @@ static int dax_writeback_one(struct block_device *bdev, radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY); spin_unlock_irq(&mapping->tree_lock); trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); - dax_unlock: - dax_read_unlock(id); put_locked_mapping_entry(mapping, index); return ret; @@ -808,8 +848,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, break; } - ret = dax_writeback_one(bdev, dax_dev, mapping, - indices[i], pvec.pages[i]); + ret = dax_writeback_one(dax_dev, mapping, indices[i], + pvec.pages[i]); if (ret < 0) { mapping_set_error(mapping, ret); goto out; @@ -877,6 +917,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry, int ret = VM_FAULT_NOPAGE; struct page *zero_page; void *entry2; + pfn_t pfn; zero_page = ZERO_PAGE(0); if (unlikely(!zero_page)) { @@ -884,14 +925,15 @@ static int dax_load_hole(struct address_space *mapping, void *entry, goto out; } - entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0, + pfn = page_to_pfn_t(zero_page); + entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, false); if (IS_ERR(entry2)) { ret = VM_FAULT_SIGBUS; goto out; } - vm_insert_mixed(vmf->vma, vaddr, page_to_pfn_t(zero_page)); + vm_insert_mixed(vmf->vma, vaddr, pfn); out: trace_dax_load_hole(inode, vmf, ret); return ret; @@ -1200,8 +1242,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp, if (error < 0) goto error_finish_iomap; - entry = dax_insert_mapping_entry(mapping, vmf, entry, - dax_iomap_sector(&iomap, pos), + entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, 0, write && !sync); if (IS_ERR(entry)) { error = PTR_ERR(entry); @@ -1280,13 +1321,15 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap, void *ret = NULL; spinlock_t *ptl; pmd_t pmd_entry; + pfn_t pfn; zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm); if (unlikely(!zero_page)) goto fallback; - ret = dax_insert_mapping_entry(mapping, vmf, entry, 0, + pfn = page_to_pfn_t(zero_page); + ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false); if (IS_ERR(ret)) goto fallback; @@ -1409,8 +1452,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp, if (error < 0) goto finish_iomap; - entry = dax_insert_mapping_entry(mapping, vmf, entry, - dax_iomap_sector(&iomap, pos), + entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_PMD, write && !sync); if (IS_ERR(entry)) goto finish_iomap; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 032295e1d386..cc40802ddfa8 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -814,6 +814,7 @@ extern const struct inode_operations ext2_file_inode_operations; extern const struct file_operations ext2_file_operations; /* inode.c */ +extern void ext2_set_file_ops(struct inode *inode); extern const struct address_space_operations ext2_aops; extern const struct address_space_operations ext2_nobh_aops; extern const struct iomap_ops ext2_iomap_ops; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 9b2ac55ac34f..1e01fabef130 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -940,9 +940,6 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) loff_t offset = iocb->ki_pos; ssize_t ret; - if (WARN_ON_ONCE(IS_DAX(inode))) - return -EIO; - ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block); if (ret < 0 && iov_iter_rw(iter) == WRITE) ext2_write_failed(mapping, offset + count); @@ -952,17 +949,16 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) static int ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) { -#ifdef CONFIG_FS_DAX - if (dax_mapping(mapping)) { - return dax_writeback_mapping_range(mapping, - mapping->host->i_sb->s_bdev, - wbc); - } -#endif - return mpage_writepages(mapping, wbc, ext2_get_block); } +static int +ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc) +{ + return dax_writeback_mapping_range(mapping, + mapping->host->i_sb->s_bdev, wbc); +} + const struct address_space_operations ext2_aops = { .readpage = ext2_readpage, .readpages = ext2_readpages, @@ -990,6 +986,13 @@ const struct address_space_operations ext2_nobh_aops = { .error_remove_page = generic_error_remove_page, }; +static const struct address_space_operations ext2_dax_aops = { + .writepages = ext2_dax_writepages, + .direct_IO = noop_direct_IO, + .set_page_dirty = noop_set_page_dirty, + .invalidatepage = noop_invalidatepage, +}; + /* * Probably it should be a library function... search for first non-zero word * or memcmp with zero_page, whatever is better for particular architecture. @@ -1388,6 +1391,18 @@ void ext2_set_inode_flags(struct inode *inode) inode->i_flags |= S_DAX; } +void ext2_set_file_ops(struct inode *inode) +{ + inode->i_op = &ext2_file_inode_operations; + inode->i_fop = &ext2_file_operations; + if (IS_DAX(inode)) + inode->i_mapping->a_ops = &ext2_dax_aops; + else if (test_opt(inode->i_sb, NOBH)) + inode->i_mapping->a_ops = &ext2_nobh_aops; + else + inode->i_mapping->a_ops = &ext2_aops; +} + struct inode *ext2_iget (struct super_block *sb, unsigned long ino) { struct ext2_inode_info *ei; @@ -1480,14 +1495,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) ei->i_data[n] = raw_inode->i_block[n]; if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, NOBH)) { - inode->i_mapping->a_ops = &ext2_nobh_aops; - inode->i_fop = &ext2_file_operations; - } else { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_file_operations; - } + ext2_set_file_ops(inode); } else if (S_ISDIR(inode->i_mode)) { inode->i_op = &ext2_dir_inode_operations; inode->i_fop = &ext2_dir_operations; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index e078075dc66f..55f7caadb093 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -107,14 +107,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode if (IS_ERR(inode)) return PTR_ERR(inode); - inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, NOBH)) { - inode->i_mapping->a_ops = &ext2_nobh_aops; - inode->i_fop = &ext2_file_operations; - } else { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_file_operations; - } + ext2_set_file_ops(inode); mark_inode_dirty(inode); return ext2_add_nondir(dentry, inode); } @@ -125,14 +118,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) if (IS_ERR(inode)) return PTR_ERR(inode); - inode->i_op = &ext2_file_inode_operations; - if (test_opt(inode->i_sb, NOBH)) { - inode->i_mapping->a_ops = &ext2_nobh_aops; - inode->i_fop = &ext2_file_operations; - } else { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_file_operations; - } + ext2_set_file_ops(inode); mark_inode_dirty(inode); d_tmpfile(dentry, inode); unlock_new_inode(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 129205028300..1e50c5efae67 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2716,12 +2716,6 @@ static int ext4_writepages(struct address_space *mapping, percpu_down_read(&sbi->s_journal_flag_rwsem); trace_ext4_writepages(inode, wbc); - if (dax_mapping(mapping)) { - ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, - wbc); - goto out_writepages; - } - /* * No pages to write? This is mainly a kludge to avoid starting * a transaction for special inodes like journal inode on last iput() @@ -2942,6 +2936,27 @@ out_writepages: return ret; } +static int ext4_dax_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + int ret; + long nr_to_write = wbc->nr_to_write; + struct inode *inode = mapping->host; + struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); + + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + + percpu_down_read(&sbi->s_journal_flag_rwsem); + trace_ext4_writepages(inode, wbc); + + ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc); + trace_ext4_writepages_result(inode, wbc, ret, + nr_to_write - wbc->nr_to_write); + percpu_up_read(&sbi->s_journal_flag_rwsem); + return ret; +} + static int ext4_nonda_switch(struct super_block *sb) { s64 free_clusters, dirty_clusters; @@ -3845,10 +3860,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (ext4_has_inline_data(inode)) return 0; - /* DAX uses iomap path now */ - if (WARN_ON_ONCE(IS_DAX(inode))) - return 0; - trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (iov_iter_rw(iter) == READ) ret = ext4_direct_IO_read(iocb, iter); @@ -3934,6 +3945,13 @@ static const struct address_space_operations ext4_da_aops = { .error_remove_page = generic_error_remove_page, }; +static const struct address_space_operations ext4_dax_aops = { + .writepages = ext4_dax_writepages, + .direct_IO = noop_direct_IO, + .set_page_dirty = noop_set_page_dirty, + .invalidatepage = noop_invalidatepage, +}; + void ext4_set_aops(struct inode *inode) { switch (ext4_inode_journal_mode(inode)) { @@ -3946,7 +3964,9 @@ void ext4_set_aops(struct inode *inode) default: BUG(); } - if (test_opt(inode->i_sb, DELALLOC)) + if (IS_DAX(inode)) + inode->i_mapping->a_ops = &ext4_dax_aops; + else if (test_opt(inode->i_sb, DELALLOC)) inode->i_mapping->a_ops = &ext4_da_aops; else inode->i_mapping->a_ops = &ext4_aops; diff --git a/fs/libfs.c b/fs/libfs.c index 7ff3cb904acd..0fb590d79f30 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1060,6 +1060,45 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) } EXPORT_SYMBOL(noop_fsync); +int noop_set_page_dirty(struct page *page) +{ + /* + * Unlike __set_page_dirty_no_writeback that handles dirty page + * tracking in the page object, dax does all dirty tracking in + * the inode address_space in response to mkwrite faults. In the + * dax case we only need to worry about potentially dirty CPU + * caches, not dirty page cache pages to write back. + * + * This callback is defined to prevent fallback to + * __set_page_dirty_buffers() in set_page_dirty(). + */ + return 0; +} +EXPORT_SYMBOL_GPL(noop_set_page_dirty); + +void noop_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) +{ + /* + * There is no page cache to invalidate in the dax case, however + * we need this callback defined to prevent falling back to + * block_invalidatepage() in do_invalidatepage(). + */ +} +EXPORT_SYMBOL_GPL(noop_invalidatepage); + +ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +{ + /* + * iomap based filesystems support direct I/O without need for + * this callback. However, it still needs to be set in + * inode->a_ops so that open/fcntl know that direct I/O is + * generally supported. + */ + return -EINVAL; +} +EXPORT_SYMBOL_GPL(noop_direct_IO); + /* Because kfree isn't assignment-compatible with void(void*) ;-/ */ void kfree_link(void *p) { diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 31f1f10eecd1..436a1de3fcdf 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1195,16 +1195,22 @@ xfs_vm_writepages( int ret; xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); - if (dax_mapping(mapping)) - return dax_writeback_mapping_range(mapping, - xfs_find_bdev_for_inode(mapping->host), wbc); - ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc); if (wpc.ioend) ret = xfs_submit_ioend(wbc, wpc.ioend, ret); return ret; } +STATIC int +xfs_dax_writepages( + struct address_space *mapping, + struct writeback_control *wbc) +{ + xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); + return dax_writeback_mapping_range(mapping, + xfs_find_bdev_for_inode(mapping->host), wbc); +} + /* * Called to move a page into cleanable state - and from there * to be released. The page should already be clean. We always @@ -1367,17 +1373,6 @@ out_unlock: return error; } -STATIC ssize_t -xfs_vm_direct_IO( - struct kiocb *iocb, - struct iov_iter *iter) -{ - /* - * We just need the method present so that open/fcntl allow direct I/O. - */ - return -EINVAL; -} - STATIC sector_t xfs_vm_bmap( struct address_space *mapping, @@ -1500,8 +1495,15 @@ const struct address_space_operations xfs_address_space_operations = { .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .bmap = xfs_vm_bmap, - .direct_IO = xfs_vm_direct_IO, + .direct_IO = noop_direct_IO, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; + +const struct address_space_operations xfs_dax_aops = { + .writepages = xfs_dax_writepages, + .direct_IO = noop_direct_IO, + .set_page_dirty = noop_set_page_dirty, + .invalidatepage = noop_invalidatepage, +}; diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 88c85ea63da0..69346d460dfa 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -54,6 +54,7 @@ struct xfs_ioend { }; extern const struct address_space_operations xfs_address_space_operations; +extern const struct address_space_operations xfs_dax_aops; int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index e0307fbff911..154725b1b813 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1285,7 +1285,10 @@ xfs_setup_iops( case S_IFREG: inode->i_op = &xfs_inode_operations; inode->i_fop = &xfs_file_operations; - inode->i_mapping->a_ops = &xfs_address_space_operations; + if (IS_DAX(inode)) + inode->i_mapping->a_ops = &xfs_dax_aops; + else + inode->i_mapping->a_ops = &xfs_address_space_operations; break; case S_IFDIR: if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) diff --git a/include/linux/dax.h b/include/linux/dax.h index 0185ecdae135..f9eb22ad341e 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -26,18 +26,42 @@ extern struct attribute_group dax_attribute_group; #if IS_ENABLED(CONFIG_DAX) struct dax_device *dax_get_by_host(const char *host); +struct dax_device *alloc_dax(void *private, const char *host, + const struct dax_operations *ops); void put_dax(struct dax_device *dax_dev); +void kill_dax(struct dax_device *dax_dev); +void dax_write_cache(struct dax_device *dax_dev, bool wc); +bool dax_write_cache_enabled(struct dax_device *dax_dev); #else static inline struct dax_device *dax_get_by_host(const char *host) { return NULL; } - +static inline struct dax_device *alloc_dax(void *private, const char *host, + const struct dax_operations *ops) +{ + /* + * Callers should check IS_ENABLED(CONFIG_DAX) to know if this + * NULL is an error or expected. + */ + return NULL; +} static inline void put_dax(struct dax_device *dax_dev) { } +static inline void kill_dax(struct dax_device *dax_dev) +{ +} +static inline void dax_write_cache(struct dax_device *dax_dev, bool wc) +{ +} +static inline bool dax_write_cache_enabled(struct dax_device *dax_dev) +{ + return false; +} #endif +struct writeback_control; int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) int __bdev_dax_supported(struct super_block *sb, int blocksize); @@ -57,6 +81,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev) } struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); +int dax_writeback_mapping_range(struct address_space *mapping, + struct block_device *bdev, struct writeback_control *wbc); #else static inline int bdev_dax_supported(struct super_block *sb, int blocksize) { @@ -76,22 +102,23 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) { return NULL; } + +static inline int dax_writeback_mapping_range(struct address_space *mapping, + struct block_device *bdev, struct writeback_control *wbc) +{ + return -EOPNOTSUPP; +} #endif int dax_read_lock(void); void dax_read_unlock(int id); -struct dax_device *alloc_dax(void *private, const char *host, - const struct dax_operations *ops); bool dax_alive(struct dax_device *dax_dev); -void kill_dax(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); -void dax_write_cache(struct dax_device *dax_dev, bool wc); -bool dax_write_cache_enabled(struct dax_device *dax_dev); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); @@ -121,7 +148,4 @@ static inline bool dax_mapping(struct address_space *mapping) return mapping->host && IS_DAX(mapping->host); } -struct writeback_control; -int dax_writeback_mapping_range(struct address_space *mapping, - struct block_device *bdev, struct writeback_control *wbc); #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index 1ee7f592e239..2aa02cad94d4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3127,6 +3127,10 @@ extern int simple_rmdir(struct inode *, struct dentry *); extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); extern int noop_fsync(struct file *, loff_t, loff_t, int); +extern int noop_set_page_dirty(struct page *page); +extern void noop_invalidatepage(struct page *page, unsigned int offset, + unsigned int length); +extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter); extern int simple_empty(struct dentry *); extern int simple_readpage(struct file *file, struct page *page); extern int simple_write_begin(struct file *file, struct address_space *mapping, diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index ff855ed965fb..097072c5a852 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -76,12 +76,14 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc); +struct device_node; struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; unsigned long bus_dsm_mask; unsigned long cmd_mask; struct module *module; char *provider_name; + struct device_node *of_node; ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc, @@ -123,6 +125,7 @@ struct nd_region_desc { int num_lanes; int numa_node; unsigned long flags; + struct device_node *of_node; }; struct device; @@ -164,6 +167,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); struct nvdimm_bus *to_nvdimm_bus(struct device *dev); struct nvdimm *to_nvdimm(struct device *dev); struct nd_region *to_nd_region(struct device *dev); +struct device *nd_region_dev(struct nd_region *nd_region); struct nd_blk_region *to_nd_blk_region(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus); diff --git a/include/linux/nd.h b/include/linux/nd.h index 5dc6b695437d..43c181a6add5 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -180,6 +180,12 @@ struct nd_region; void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event); int __must_check __nd_driver_register(struct nd_device_driver *nd_drv, struct module *module, const char *mod_name); +static inline void nd_driver_unregister(struct nd_device_driver *drv) +{ + driver_unregister(&drv->drv); +} #define nd_driver_register(driver) \ __nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) +#define module_nd_driver(driver) \ + module_driver(driver, nd_driver_register, nd_driver_unregister) #endif /* __LINUX_ND_H__ */ diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 620fa78b3b1b..cb166be4918d 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -104,7 +104,8 @@ enum { NUM_HINTS = 8, NUM_BDW = NUM_DCR, NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, - NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, + NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + + 4 /* spa1 iset */ + 1 /* spa11 iset */, DIMM_SIZE = SZ_32M, LABEL_SIZE = SZ_128K, SPA_VCD_SIZE = SZ_4M, @@ -153,6 +154,7 @@ struct nfit_test { void *nfit_buf; dma_addr_t nfit_dma; size_t nfit_size; + size_t nfit_filled; int dcr_idx; int num_dcr; int num_pm; @@ -709,7 +711,9 @@ static void smart_notify(struct device *bus_dev, >= thresh->media_temperature) || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP) && smart->ctrl_temperature - >= thresh->ctrl_temperature)) { + >= thresh->ctrl_temperature) + || (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH) + || (smart->shutdown_state != 0)) { device_lock(bus_dev); __acpi_nvdimm_notify(dimm_dev, 0x81); device_unlock(bus_dev); @@ -735,6 +739,32 @@ static int nfit_test_cmd_smart_set_threshold( return 0; } +static int nfit_test_cmd_smart_inject( + struct nd_intel_smart_inject *inj, + unsigned int buf_len, + struct nd_intel_smart_threshold *thresh, + struct nd_intel_smart *smart, + struct device *bus_dev, struct device *dimm_dev) +{ + if (buf_len != sizeof(*inj)) + return -EINVAL; + + if (inj->mtemp_enable) + smart->media_temperature = inj->media_temperature; + if (inj->spare_enable) + smart->spares = inj->spares; + if (inj->fatal_enable) + smart->health = ND_INTEL_SMART_FATAL_HEALTH; + if (inj->unsafe_shutdown_enable) { + smart->shutdown_state = 1; + smart->shutdown_count++; + } + inj->status = 0; + smart_notify(bus_dev, dimm_dev, smart, thresh); + + return 0; +} + static void uc_error_notify(struct work_struct *work) { struct nfit_test *t = container_of(work, typeof(*t), work); @@ -935,6 +965,13 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, t->dcr_idx], &t->smart[i - t->dcr_idx], &t->pdev.dev, t->dimm_dev[i]); + case ND_INTEL_SMART_INJECT: + return nfit_test_cmd_smart_inject(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx], + &t->smart[i - t->dcr_idx], + &t->pdev.dev, t->dimm_dev[i]); default: return -ENOTTY; } @@ -1222,7 +1259,7 @@ static void smart_init(struct nfit_test *t) | ND_INTEL_SMART_MTEMP_VALID, .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, .media_temperature = 23 * 16, - .ctrl_temperature = 30 * 16, + .ctrl_temperature = 25 * 16, .pmic_temperature = 40 * 16, .spares = 75, .alarm_flags = ND_INTEL_SMART_SPARE_TRIP @@ -1366,7 +1403,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_data_region *bdw; struct acpi_nfit_flush_address *flush; struct acpi_nfit_capabilities *pcap; - unsigned int offset, i; + unsigned int offset = 0, i; /* * spa0 (interleave first half of dimm0 and dimm1, note storage @@ -1380,93 +1417,102 @@ static void nfit_test0_setup(struct nfit_test *t) spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; spa->length = SPA0_SIZE; + offset += spa->header.length; /* * spa1 (interleave last half of the 4 DIMMS, note storage * does not actually alias the related block-data-window * regions) */ - spa = nfit_buf + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 1+1; spa->address = t->spa_set_dma[1]; spa->length = SPA1_SIZE; + offset += spa->header.length; /* spa2 (dcr0) dimm0 */ - spa = nfit_buf + sizeof(*spa) * 2; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 2+1; spa->address = t->dcr_dma[0]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa3 (dcr1) dimm1 */ - spa = nfit_buf + sizeof(*spa) * 3; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 3+1; spa->address = t->dcr_dma[1]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa4 (dcr2) dimm2 */ - spa = nfit_buf + sizeof(*spa) * 4; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 4+1; spa->address = t->dcr_dma[2]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa5 (dcr3) dimm3 */ - spa = nfit_buf + sizeof(*spa) * 5; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 5+1; spa->address = t->dcr_dma[3]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa6 (bdw for dcr0) dimm0 */ - spa = nfit_buf + sizeof(*spa) * 6; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 6+1; spa->address = t->dimm_dma[0]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa7 (bdw for dcr1) dimm1 */ - spa = nfit_buf + sizeof(*spa) * 7; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 7+1; spa->address = t->dimm_dma[1]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa8 (bdw for dcr2) dimm2 */ - spa = nfit_buf + sizeof(*spa) * 8; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 8+1; spa->address = t->dimm_dma[2]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa9 (bdw for dcr3) dimm3 */ - spa = nfit_buf + sizeof(*spa) * 9; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 9+1; spa->address = t->dimm_dma[3]; spa->length = DIMM_SIZE; + offset += spa->header.length; - offset = sizeof(*spa) * 10; /* mem-region0 (spa0, dimm0) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1481,9 +1527,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 2; + offset += memdev->header.length; /* mem-region1 (spa0, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map); + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1497,9 +1544,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 2; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region2 (spa1, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1513,9 +1561,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 4; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region3 (spa1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1528,9 +1577,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + offset += memdev->header.length; /* mem-region4 (spa1, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1544,9 +1594,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 4; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region5 (spa1, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1559,9 +1610,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + offset += memdev->header.length; /* mem-region6 (spa/dcr0, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1574,9 +1626,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region7 (spa/dcr1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1589,9 +1642,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region8 (spa/dcr2, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1604,9 +1658,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region9 (spa/dcr3, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1619,9 +1674,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region10 (spa/bdw0, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1634,9 +1690,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region11 (spa/bdw1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1649,9 +1706,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region12 (spa/bdw2, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1664,9 +1722,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region13 (spa/dcr3, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1680,12 +1739,12 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; - offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; /* dcr-descriptor0: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 0+1; dcr_common_init(dcr); dcr->serial_number = ~handle[0]; @@ -1696,11 +1755,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor1: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region); + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 1+1; dcr_common_init(dcr); dcr->serial_number = ~handle[1]; @@ -1711,11 +1771,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor2: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 2+1; dcr_common_init(dcr); dcr->serial_number = ~handle[2]; @@ -1726,11 +1787,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor3: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 3+1; dcr_common_init(dcr); dcr->serial_number = ~handle[3]; @@ -1741,8 +1803,8 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; - offset = offset + sizeof(struct acpi_nfit_control_region) * 4; /* dcr-descriptor0: pmem */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -1753,10 +1815,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[0]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor1: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size); + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1765,10 +1827,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[1]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor2: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size) * 2; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1777,10 +1839,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[2]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor3: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size) * 3; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1789,54 +1851,56 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[3]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; - offset = offset + offsetof(struct acpi_nfit_control_region, - window_size) * 4; /* bdw0 (spa/dcr0, dimm0) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 0+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw1 (spa/dcr1, dimm1) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region); + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 1+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw2 (spa/dcr2, dimm2) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2; + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 2+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw3 (spa/dcr3, dimm3) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3; + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 3+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; - offset = offset + sizeof(struct acpi_nfit_data_region) * 4; /* flush0 (dimm0) */ flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; @@ -1845,48 +1909,52 @@ static void nfit_test0_setup(struct nfit_test *t) flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64); + offset += flush->header.length; /* flush1 (dimm1) */ - flush = nfit_buf + offset + flush_hint_size * 1; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[1]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64); + offset += flush->header.length; /* flush2 (dimm2) */ - flush = nfit_buf + offset + flush_hint_size * 2; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[2]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64); + offset += flush->header.length; /* flush3 (dimm3) */ - flush = nfit_buf + offset + flush_hint_size * 3; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[3]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); + offset += flush->header.length; /* platform capabilities */ - pcap = nfit_buf + offset + flush_hint_size * 4; + pcap = nfit_buf + offset; pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES; pcap->header.length = sizeof(*pcap); pcap->highest_capability = 1; pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH | ACPI_NFIT_CAPABILITY_MEM_FLUSH; + offset += pcap->header.length; if (t->setup_hotplug) { - offset = offset + flush_hint_size * 4 + sizeof(*pcap); /* dcr-descriptor4: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 8+1; dcr_common_init(dcr); dcr->serial_number = ~handle[4]; @@ -1897,8 +1965,8 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; - offset = offset + sizeof(struct acpi_nfit_control_region); /* dcr-descriptor4: pmem */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -1909,21 +1977,20 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[4]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; - offset = offset + offsetof(struct acpi_nfit_control_region, - window_size); /* bdw4 (spa/dcr4, dimm4) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 8+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; - offset = offset + sizeof(struct acpi_nfit_data_region); /* spa10 (dcr4) dimm4 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; @@ -1932,30 +1999,32 @@ static void nfit_test0_setup(struct nfit_test *t) spa->range_index = 10+1; spa->address = t->dcr_dma[4]; spa->length = DCR_SIZE; + offset += spa->header.length; /* * spa11 (single-dimm interleave for hotplug, note storage * does not actually alias the related block-data-window * regions) */ - spa = nfit_buf + offset + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 11+1; spa->address = t->spa_set_dma[2]; spa->length = SPA0_SIZE; + offset += spa->header.length; /* spa12 (bdw for dcr4) dimm4 */ - spa = nfit_buf + offset + sizeof(*spa) * 2; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 12+1; spa->address = t->dimm_dma[4]; spa->length = DIMM_SIZE; + offset += spa->header.length; - offset = offset + sizeof(*spa) * 3; /* mem-region14 (spa/dcr4, dimm4) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1970,10 +2039,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; - /* mem-region15 (spa0, dimm4) */ - memdev = nfit_buf + offset + - sizeof(struct acpi_nfit_memory_map); + /* mem-region15 (spa11, dimm4) */ + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[4]; @@ -1987,10 +2056,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region16 (spa/bdw4, dimm4) */ - memdev = nfit_buf + offset + - sizeof(struct acpi_nfit_memory_map) * 2; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[4]; @@ -2003,8 +2072,8 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; - offset = offset + sizeof(struct acpi_nfit_memory_map) * 3; /* flush3 (dimm4) */ flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; @@ -2014,8 +2083,14 @@ static void nfit_test0_setup(struct nfit_test *t) for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[4] + i * sizeof(u64); + offset += flush->header.length; + + /* sanity check to make sure we've filled the buffer */ + WARN_ON(offset != t->nfit_size); } + t->nfit_filled = offset; + post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], SPA0_SIZE); @@ -2026,6 +2101,7 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); @@ -2061,17 +2137,18 @@ static void nfit_test1_setup(struct nfit_test *t) spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; spa->length = SPA2_SIZE; + offset += spa->header.length; /* virtual cd region */ - spa = nfit_buf + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16); spa->range_index = 0; spa->address = t->spa_set_dma[1]; spa->length = SPA_VCD_SIZE; + offset += spa->header.length; - offset += sizeof(*spa) * 2; /* mem-region0 (spa0, dimm0) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -2089,8 +2166,8 @@ static void nfit_test1_setup(struct nfit_test *t) memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED | ACPI_NFIT_MEM_NOT_ARMED; + offset += memdev->header.length; - offset += sizeof(*memdev); /* dcr-descriptor0 */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -2101,8 +2178,8 @@ static void nfit_test1_setup(struct nfit_test *t) dcr->serial_number = ~handle[5]; dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; - offset += dcr->header.length; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); @@ -2117,9 +2194,9 @@ static void nfit_test1_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_MAP_FAILED; + offset += memdev->header.length; /* dcr-descriptor1 */ - offset += sizeof(*memdev); dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, @@ -2129,6 +2206,12 @@ static void nfit_test1_setup(struct nfit_test *t) dcr->serial_number = ~handle[6]; dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; + offset += dcr->header.length; + + /* sanity check to make sure we've filled the buffer */ + WARN_ON(offset != t->nfit_size); + + t->nfit_filled = offset; post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], SPA2_SIZE); @@ -2487,7 +2570,7 @@ static int nfit_test_probe(struct platform_device *pdev) nd_desc->ndctl = nfit_test_ctl; rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, - nfit_test->nfit_size); + nfit_test->nfit_filled); if (rc) return rc; diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 428344519cdf..33752e06ff8d 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -93,6 +93,7 @@ struct nd_cmd_ars_err_inj_stat { #define ND_INTEL_FW_FINISH_UPDATE 15 #define ND_INTEL_FW_FINISH_QUERY 16 #define ND_INTEL_SMART_SET_THRESHOLD 17 +#define ND_INTEL_SMART_INJECT 18 #define ND_INTEL_SMART_HEALTH_VALID (1 << 0) #define ND_INTEL_SMART_SPARES_VALID (1 << 1) @@ -111,6 +112,10 @@ struct nd_cmd_ars_err_inj_stat { #define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0) #define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1) #define ND_INTEL_SMART_FATAL_HEALTH (1 << 2) +#define ND_INTEL_SMART_INJECT_MTEMP (1 << 0) +#define ND_INTEL_SMART_INJECT_SPARE (1 << 1) +#define ND_INTEL_SMART_INJECT_FATAL (1 << 2) +#define ND_INTEL_SMART_INJECT_SHUTDOWN (1 << 3) struct nd_intel_smart { __u32 status; @@ -158,6 +163,17 @@ struct nd_intel_smart_set_threshold { __u32 status; } __packed; +struct nd_intel_smart_inject { + __u64 flags; + __u8 mtemp_enable; + __u16 media_temperature; + __u8 spare_enable; + __u8 spares; + __u8 fatal_enable; + __u8 unsafe_shutdown_enable; + __u32 status; +} __packed; + #define INTEL_FW_STORAGE_SIZE 0x100000 #define INTEL_FW_MAX_SEND_LEN 0xFFEC #define INTEL_FW_QUERY_INTERVAL 250000 |