From 0776aa0e30aa31b2fad606457e9d3faf39d88314 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 8 Dec 2017 14:40:52 -0500 Subject: dm: ensure bio-based DM's bioset and io_pool support targets' maximum IOs alloc_multiple_bios() assumes it can allocate the requested number of bios but until now there was no gaurantee that the mempools would be accomodating. Suggested-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/md/dm-table.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index aaffd0c0ee9a..7b22cc8d30f4 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1079,7 +1079,8 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * { enum dm_queue_mode type = dm_table_get_type(t); unsigned per_io_data_size = 0; - struct dm_target *tgt; + unsigned min_pool_size = 0; + struct dm_target *ti; unsigned i; if (unlikely(type == DM_TYPE_NONE)) { @@ -1089,11 +1090,13 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * if (__table_type_bio_based(type)) for (i = 0; i < t->num_targets; i++) { - tgt = t->targets + i; - per_io_data_size = max(per_io_data_size, tgt->per_io_data_size); + ti = t->targets + i; + per_io_data_size = max(per_io_data_size, ti->per_io_data_size); + min_pool_size = max(min_pool_size, ti->num_flush_bios); } - t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_io_data_size); + t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, + per_io_data_size, min_pool_size); if (!t->mempools) return -ENOMEM; -- cgit v1.2.3 From ad3793fc3945173f64d82d05d3ecde41f6c0435c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 4 Dec 2017 23:28:32 -0500 Subject: dm: set QUEUE_FLAG_DAX accordingly in dm_table_set_restrictions() Rather than having DAX support be unique by setting it based on table type in dm_setup_md_queue(). Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 2 ++ drivers/md/dm.c | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md/dm-table.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7b22cc8d30f4..504e79bc3a55 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1823,6 +1823,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } blk_queue_write_cache(q, wc, fua); + if (dm_table_supports_dax(t)) + queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); if (dm_table_supports_dax_write_cache(t)) dax_write_cache(t->md->dax_dev, true); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4e7682afebfa..308d178fff73 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2034,9 +2034,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) case DM_TYPE_DAX_BIO_BASED: dm_init_normal_md_queue(md); blk_queue_make_request(md->queue, dm_make_request); - - if (type == DM_TYPE_DAX_BIO_BASED) - queue_flag_set_unlocked(QUEUE_FLAG_DAX, md->queue); break; case DM_TYPE_NONE: WARN_ON_ONCE(true); -- cgit v1.2.3 From 22c11858e8002592c59ebb762e4e42dc634bf84f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 4 Dec 2017 21:07:37 -0500 Subject: dm: introduce DM_TYPE_NVME_BIO_BASED If dm_table_determine_type() establishes DM_TYPE_NVME_BIO_BASED then all devices in the DM table do not support partial completions. Also, the table has a single immutable target that doesn't require DM core to split bios. This will enable adding NVMe optimizations to bio-based DM. Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 54 ++++++++++++++++++++++++++++++++++++++----- drivers/md/dm.c | 2 ++ include/linux/device-mapper.h | 1 + 3 files changed, 51 insertions(+), 6 deletions(-) (limited to 'drivers/md/dm-table.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 504e79bc3a55..ad4ac294dd57 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -866,7 +866,8 @@ EXPORT_SYMBOL(dm_consume_args); static bool __table_type_bio_based(enum dm_queue_mode table_type) { return (table_type == DM_TYPE_BIO_BASED || - table_type == DM_TYPE_DAX_BIO_BASED); + table_type == DM_TYPE_DAX_BIO_BASED || + table_type == DM_TYPE_NVME_BIO_BASED); } static bool __table_type_request_based(enum dm_queue_mode table_type) @@ -909,6 +910,8 @@ static bool dm_table_supports_dax(struct dm_table *t) return true; } +static bool dm_table_does_not_support_partial_completion(struct dm_table *t); + static int dm_table_determine_type(struct dm_table *t) { unsigned i; @@ -923,6 +926,14 @@ static int dm_table_determine_type(struct dm_table *t) /* target already set the table's type */ if (t->type == DM_TYPE_BIO_BASED) return 0; + else if (t->type == DM_TYPE_NVME_BIO_BASED) { + if (!dm_table_does_not_support_partial_completion(t)) { + DMERR("nvme bio-based is only possible with devices" + " that don't support partial completion"); + return -EINVAL; + } + /* Fallthru, also verify all devices are blk-mq */ + } BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED); goto verify_rq_based; } @@ -937,8 +948,8 @@ static int dm_table_determine_type(struct dm_table *t) bio_based = 1; if (bio_based && request_based) { - DMWARN("Inconsistent table: different target types" - " can't be mixed up"); + DMERR("Inconsistent table: different target types" + " can't be mixed up"); return -EINVAL; } } @@ -959,8 +970,14 @@ static int dm_table_determine_type(struct dm_table *t) /* We must use this table as bio-based */ t->type = DM_TYPE_BIO_BASED; if (dm_table_supports_dax(t) || - (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) + (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { t->type = DM_TYPE_DAX_BIO_BASED; + } else if ((dm_table_get_immutable_target(t) && + dm_table_does_not_support_partial_completion(t)) || + (list_empty(devices) && live_md_type == DM_TYPE_NVME_BIO_BASED)) { + t->type = DM_TYPE_NVME_BIO_BASED; + goto verify_rq_based; + } return 0; } @@ -980,7 +997,8 @@ verify_rq_based: * (e.g. request completion process for partial completion.) */ if (t->num_targets > 1) { - DMWARN("Request-based dm doesn't support multiple targets yet"); + DMERR("%s DM doesn't support multiple targets", + t->type == DM_TYPE_NVME_BIO_BASED ? "nvme bio-based" : "request-based"); return -EINVAL; } @@ -997,6 +1015,15 @@ verify_rq_based: return 0; } + tgt = dm_table_get_immutable_target(t); + if (!tgt) { + DMERR("table load rejected: immutable target is required"); + return -EINVAL; + } else if (tgt->max_io_len) { + DMERR("table load rejected: immutable target that splits IO is not supported"); + return -EINVAL; + } + /* Non-request-stackable devices can't be used for request-based dm */ list_for_each_entry(dd, devices, list) { struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev); @@ -1018,7 +1045,8 @@ verify_rq_based: } t->all_blk_mq = mq_count > 0; - if (t->type == DM_TYPE_MQ_REQUEST_BASED && !t->all_blk_mq) { + if (!t->all_blk_mq && + (t->type == DM_TYPE_MQ_REQUEST_BASED || t->type == DM_TYPE_NVME_BIO_BASED)) { DMERR("table load rejected: all devices are not blk-mq request-stackable"); return -EINVAL; } @@ -1708,6 +1736,20 @@ static bool dm_table_all_devices_attribute(struct dm_table *t, return true; } +static int device_no_partial_completion(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + char b[BDEVNAME_SIZE]; + + /* For now, NVMe devices are the only devices of this class */ + return (strncmp(bdevname(dev->bdev, b), "nvme", 3) == 0); +} + +static bool dm_table_does_not_support_partial_completion(struct dm_table *t) +{ + return dm_table_all_devices_attribute(t, device_no_partial_completion); +} + static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index cbb4ae5051fc..a1bd7a6ff522 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2073,6 +2073,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) break; case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: + case DM_TYPE_NVME_BIO_BASED: dm_init_normal_md_queue(md); blk_queue_make_request(md->queue, dm_make_request); break; @@ -2780,6 +2781,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu switch (type) { case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: + case DM_TYPE_NVME_BIO_BASED: pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); io_front_pad = roundup(front_pad, __alignof__(struct dm_io)) + offsetof(struct dm_io, tio); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 0e518d2ee280..41ec228b02a6 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -28,6 +28,7 @@ enum dm_queue_mode { DM_TYPE_REQUEST_BASED = 2, DM_TYPE_MQ_REQUEST_BASED = 3, DM_TYPE_DAX_BIO_BASED = 4, + DM_TYPE_NVME_BIO_BASED = 5, }; typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; -- cgit v1.2.3 From eaa160ededfad7a38f7ee06dc1af2ced1b410ad8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 13 Jan 2018 14:33:30 -0500 Subject: dm table: fix NVMe bio-based dm_table_determine_type() validation The 'verify_rq_based:' code in dm_table_determine_type() was checking all devices in the DM table rather than only checking the data devices. Fix this by using the immutable target's iterate_devices method. Also, tweak the block of dm_table_determine_type() code that decides whether to upgrade from DM_TYPE_BIO_BASED to DM_TYPE_NVME_BIO_BASED so that it makes sure the immutable_target doesn't support require splitting IOs. These changes have been verified to allow a "thin-pool" target whose data device is an NVMe device to be upgraded to DM_TYPE_NVME_BIO_BASED. Using the thin-pool in NVMe bio-based mode was verified to pass all the device-mapper-test-suite's "thin-provisioning" tests. Also verified that request-based DM multipath (with queue_mode "rq" and "mq") works as expected using the 'mptest' harness. Fixes: 22c11858e ("dm: introduce DM_TYPE_NVME_BIO_BASED") Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 57 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 22 deletions(-) (limited to 'drivers/md/dm-table.c') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index ad4ac294dd57..5fe7ec356c33 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -912,13 +912,31 @@ static bool dm_table_supports_dax(struct dm_table *t) static bool dm_table_does_not_support_partial_completion(struct dm_table *t); +struct verify_rq_based_data { + unsigned sq_count; + unsigned mq_count; +}; + +static int device_is_rq_based(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + struct verify_rq_based_data *v = data; + + if (q->mq_ops) + v->mq_count++; + else + v->sq_count++; + + return queue_is_rq_based(q); +} + static int dm_table_determine_type(struct dm_table *t) { unsigned i; unsigned bio_based = 0, request_based = 0, hybrid = 0; - unsigned sq_count = 0, mq_count = 0; + struct verify_rq_based_data v = {.sq_count = 0, .mq_count = 0}; struct dm_target *tgt; - struct dm_dev_internal *dd; struct list_head *devices = dm_table_get_devices(t); enum dm_queue_mode live_md_type = dm_get_md_type(t->md); @@ -972,11 +990,15 @@ static int dm_table_determine_type(struct dm_table *t) if (dm_table_supports_dax(t) || (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { t->type = DM_TYPE_DAX_BIO_BASED; - } else if ((dm_table_get_immutable_target(t) && - dm_table_does_not_support_partial_completion(t)) || - (list_empty(devices) && live_md_type == DM_TYPE_NVME_BIO_BASED)) { - t->type = DM_TYPE_NVME_BIO_BASED; - goto verify_rq_based; + } else { + /* Check if upgrading to NVMe bio-based is valid or required */ + tgt = dm_table_get_immutable_target(t); + if (tgt && !tgt->max_io_len && dm_table_does_not_support_partial_completion(t)) { + t->type = DM_TYPE_NVME_BIO_BASED; + goto verify_rq_based; /* must be stacked directly on NVMe (blk-mq) */ + } else if (list_empty(devices) && live_md_type == DM_TYPE_NVME_BIO_BASED) { + t->type = DM_TYPE_NVME_BIO_BASED; + } } return 0; } @@ -1025,25 +1047,16 @@ verify_rq_based: } /* Non-request-stackable devices can't be used for request-based dm */ - list_for_each_entry(dd, devices, list) { - struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev); - - if (!queue_is_rq_based(q)) { - DMERR("table load rejected: including" - " non-request-stackable devices"); - return -EINVAL; - } - - if (q->mq_ops) - mq_count++; - else - sq_count++; + if (!tgt->type->iterate_devices || + !tgt->type->iterate_devices(tgt, device_is_rq_based, &v)) { + DMERR("table load rejected: including non-request-stackable devices"); + return -EINVAL; } - if (sq_count && mq_count) { + if (v.sq_count && v.mq_count) { DMERR("table load rejected: not all devices are blk-mq request-stackable"); return -EINVAL; } - t->all_blk_mq = mq_count > 0; + t->all_blk_mq = v.mq_count > 0; if (!t->all_blk_mq && (t->type == DM_TYPE_MQ_REQUEST_BASED || t->type == DM_TYPE_NVME_BIO_BASED)) { -- cgit v1.2.3